Merge branch 'drm-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6

* 'drm-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6: (50 commits)
  drm: include kernel list header file in hashtab header
  drm: Export hash table functionality.
  drm: Split out the mm declarations in a separate header. Add atomic operations.
  drm/radeon: add support for RV790.
  drm/radeon: add rv740 drm support.
  drm_calloc_large: check right size, check integer overflow, use GFP_ZERO
  drm: Eliminate magic I2C frobbing when reading EDID
  drm/i915: duplicate desired mode for use by fbcon.
  drm/via: vfree() no need checking before calling it
  drm: Replace DRM_DEBUG with DRM_DEBUG_DRIVER in i915 driver
  drm: Replace DRM_DEBUG with DRM_DEBUG_MODE in drm_mode
  drm/i915: Replace DRM_DEBUG with DRM_DEBUG_KMS in intel_sdvo
  drm/i915: replace DRM_DEBUG with DRM_DEBUG_KMS in intel_lvds
  drm: add separate drm debugging levels
  radeon: remove _DRM_DRIVER from the preadded sarea map
  drm: don't associate _DRM_DRIVER maps with a master
  drm: simplify kcalloc() call to kzalloc().
  intelfb: fix spelling of "CLOCK"
  drm: fix LOCK_TEST_WITH_RETURN macro
  drm/i915: Hook connector to encoder during load detection (fixes tv/vga detect)
  ...
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 44f52a4..cbbd3e0 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -60,3 +60,62 @@
 		Indicates whether the block layer should automatically
 		generate checksums for write requests bound for
 		devices that support receiving integrity metadata.
+
+What:		/sys/block/<disk>/alignment_offset
+Date:		April 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Storage devices may report a physical block size that is
+		bigger than the logical block size (for instance a drive
+		with 4KB physical sectors exposing 512-byte logical
+		blocks to the operating system).  This parameter
+		indicates how many bytes the beginning of the device is
+		offset from the disk's natural alignment.
+
+What:		/sys/block/<disk>/<partition>/alignment_offset
+Date:		April 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Storage devices may report a physical block size that is
+		bigger than the logical block size (for instance a drive
+		with 4KB physical sectors exposing 512-byte logical
+		blocks to the operating system).  This parameter
+		indicates how many bytes the beginning of the partition
+		is offset from the disk's natural alignment.
+
+What:		/sys/block/<disk>/queue/logical_block_size
+Date:		May 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		This is the smallest unit the storage device can
+		address.  It is typically 512 bytes.
+
+What:		/sys/block/<disk>/queue/physical_block_size
+Date:		May 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		This is the smallest unit the storage device can write
+		without resorting to read-modify-write operation.  It is
+		usually the same as the logical block size but may be
+		bigger.  One example is SATA drives with 4KB sectors
+		that expose a 512-byte logical block size to the
+		operating system.
+
+What:		/sys/block/<disk>/queue/minimum_io_size
+Date:		April 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Storage devices may report a preferred minimum I/O size,
+		which is the smallest request the device can perform
+		without incurring a read-modify-write penalty.  For disk
+		drives this is often the physical block size.  For RAID
+		arrays it is often the stripe chunk size.
+
+What:		/sys/block/<disk>/queue/optimal_io_size
+Date:		April 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Storage devices may report an optimal I/O size, which is
+		the device's preferred unit of receiving I/O.  This is
+		rarely reported for disk drives.  For RAID devices it is
+		usually the stripe width or the internal block size.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
new file mode 100644
index 0000000..0a92a7c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
@@ -0,0 +1,33 @@
+Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/model
+Date:		March 2009
+Kernel Version: 2.6.30
+Contact:	iss_storagedev@hp.com
+Description:	Displays the SCSI INQUIRY page 0 model for logical drive
+		Y of controller X.
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/rev
+Date:		March 2009
+Kernel Version: 2.6.30
+Contact:	iss_storagedev@hp.com
+Description:	Displays the SCSI INQUIRY page 0 revision for logical
+		drive Y of controller X.
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/unique_id
+Date:		March 2009
+Kernel Version: 2.6.30
+Contact:	iss_storagedev@hp.com
+Description:	Displays the SCSI INQUIRY page 83 serial number for logical
+		drive Y of controller X.
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/vendor
+Date:		March 2009
+Kernel Version: 2.6.30
+Contact:	iss_storagedev@hp.com
+Description:	Displays the SCSI INQUIRY page 0 vendor for logical drive
+		Y of controller X.
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/block:cciss!cXdY
+Date:		March 2009
+Kernel Version: 2.6.30
+Contact:	iss_storagedev@hp.com
+Description:	A symbolic link to /sys/block/cciss!cXdY
diff --git a/Documentation/ABI/testing/sysfs-devices-cache_disable b/Documentation/ABI/testing/sysfs-devices-cache_disable
new file mode 100644
index 0000000..175bb4f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-cache_disable
@@ -0,0 +1,18 @@
+What:      /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
+Date:      August 2008
+KernelVersion:	2.6.27
+Contact:	mark.langsdorf@amd.com
+Description:	These files exist in every cpu's cache index directories.
+		There are currently 2 cache_disable_# files in each
+		directory.  Reading from these files on a supported
+		processor will return that cache disable index value
+		for that processor and node.  Writing to one of these
+		files will cause the specificed cache index to be disabled.
+
+		Currently, only AMD Family 10h Processors support cache index
+		disable, and only for their L3 caches.  See the BIOS and
+		Kernel Developer's Guide at
+		http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf
+		for formatting information and other details on the
+		cache index disable.
+Users:    joachim.deguara@amd.com
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index d9aa43d..25fb8bc 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -704,12 +704,24 @@
 				The current number of free dma_debug_entries
 				in the allocator.
 
+	dma-api/driver-filter
+				You can write a name of a driver into this file
+				to limit the debug output to requests from that
+				particular driver. Write an empty string to
+				that file to disable the filter and see
+				all errors again.
+
 If you have this code compiled into your kernel it will be enabled by default.
 If you want to boot without the bookkeeping anyway you can provide
 'dma_debug=off' as a boot parameter. This will disable DMA-API debugging.
 Notice that you can not enable it again at runtime. You have to reboot to do
 so.
 
+If you want to see debug messages only for a special device driver you can
+specify the dma_debug_driver=<drivername> parameter. This will enable the
+driver filter at boot time. The debug code will only print errors for that
+driver afterwards. This filter can be disabled or changed later using debugfs.
+
 When the code disables itself at runtime this is most likely because it ran
 out of dma_debug_entries. These entries are preallocated at boot. The number
 of preallocated entries is defined per architecture. If it is too low for you
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index b1eb661..9632444 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -13,7 +13,8 @@
 	    gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
 	    genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
 	    mac80211.xml debugobjects.xml sh.xml regulator.xml \
-	    alsa-driver-api.xml writing-an-alsa-driver.xml
+	    alsa-driver-api.xml writing-an-alsa-driver.xml \
+	    tracepoint.xml
 
 ###
 # The build process is as follows (targets):
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
new file mode 100644
index 0000000..b0756d0
--- /dev/null
+++ b/Documentation/DocBook/tracepoint.tmpl
@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Tracepoints">
+ <bookinfo>
+  <title>The Linux Kernel Tracepoint API</title>
+
+  <authorgroup>
+   <author>
+    <firstname>Jason</firstname>
+    <surname>Baron</surname>
+    <affiliation>
+     <address>
+      <email>jbaron@redhat.com</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+ <toc></toc>
+  <chapter id="intro">
+   <title>Introduction</title>
+   <para>
+     Tracepoints are static probe points that are located in strategic points
+     throughout the kernel. 'Probes' register/unregister with tracepoints
+     via a callback mechanism. The 'probes' are strictly typed functions that
+     are passed a unique set of parameters defined by each tracepoint.
+   </para>
+
+   <para>
+     From this simple callback mechanism, 'probes' can be used to profile, debug,
+     and understand kernel behavior. There are a number of tools that provide a
+     framework for using 'probes'. These tools include Systemtap, ftrace, and
+     LTTng.
+   </para>
+
+   <para>
+     Tracepoints are defined in a number of header files via various macros. Thus,
+     the purpose of this document is to provide a clear accounting of the available
+     tracepoints. The intention is to understand not only what tracepoints are
+     available but also to understand where future tracepoints might be added.
+   </para>
+
+   <para>
+     The API presented has functions of the form:
+     <function>trace_tracepointname(function parameters)</function>. These are the
+     tracepoints callbacks that are found throughout the code. Registering and
+     unregistering probes with these callback sites is covered in the
+     <filename>Documentation/trace/*</filename> directory.
+   </para>
+  </chapter>
+
+  <chapter id="irq">
+   <title>IRQ</title>
+!Iinclude/trace/events/irq.h
+  </chapter>
+
+</book>
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index 0688482..02cced1 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -192,23 +192,24 @@
 The output of "cat rcu/rcudata" looks as follows:
 
 rcu:
-  0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10
-  1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10
-  2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10
-  3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10
-  4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10
-  5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10
-  6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10
-  7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10
+rcu:
+  0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10
+  1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10
+  2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10
+  3 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=6681/1 dn=0 df=1545 of=0 ri=0 ql=0 b=10
+  4 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1003/1 dn=0 df=1992 of=0 ri=0 ql=0 b=10
+  5 c=17829 g=17830 pq=1 pqc=17829 qp=1 dt=3887/1 dn=0 df=3331 of=0 ri=4 ql=2 b=10
+  6 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=859/1 dn=0 df=3224 of=0 ri=0 ql=0 b=10
+  7 c=17829 g=17830 pq=0 pqc=17829 qp=1 dt=3761/1 dn=0 df=1818 of=0 ri=0 ql=2 b=10
 rcu_bh:
-  0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10
-  1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10
-  2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10
-  3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10
-  4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10
-  5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10
-  6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10
-  7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10
+  0 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=10951/1 dn=0 df=0 of=0 ri=0 ql=0 b=10
+  1 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=16117/1 dn=0 df=13 of=0 ri=0 ql=0 b=10
+  2 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1445/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  3 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=6681/1 dn=0 df=9 of=0 ri=0 ql=0 b=10
+  4 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1003/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  5 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3887/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
 
 The first section lists the rcu_data structures for rcu, the second for
 rcu_bh.  Each section has one line per CPU, or eight for this 8-CPU system.
@@ -253,12 +254,6 @@
 o	"qp" indicates that RCU still expects a quiescent state from
 	this CPU.
 
-o	"rpfq" is the number of rcu_pending() calls on this CPU required
-	to induce this CPU to invoke force_quiescent_state().
-
-o	"rp" is low-order four hex digits of the count of how many times
-	rcu_pending() has been invoked on this CPU.
-
 o	"dt" is the current value of the dyntick counter that is incremented
 	when entering or leaving dynticks idle state, either by the
 	scheduler or by irq.  The number after the "/" is the interrupt
@@ -305,6 +300,9 @@
 	of RCU callbacks is ready to invoke, then the remainder will
 	be deferred.
 
+There is also an rcu/rcudata.csv file with the same information in
+comma-separated-variable spreadsheet format.
+
 
 The output of "cat rcu/rcugp" looks as follows:
 
@@ -411,3 +409,63 @@
 		For example, the first entry at the lowest level shows
 		"^0", indicating that it corresponds to bit zero in
 		the first entry at the middle level.
+
+
+The output of "cat rcu/rcu_pending" looks as follows:
+
+rcu:
+  0 np=255892 qsp=53936 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741
+  1 np=261224 qsp=54638 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792
+  2 np=237496 qsp=49664 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629
+  3 np=236249 qsp=48766 cbr=0 cng=286 gpc=48049 gps=1218 nf=207 nn=137723
+  4 np=221310 qsp=46850 cbr=0 cng=26 gpc=43161 gps=4634 nf=3529 nn=123110
+  5 np=237332 qsp=48449 cbr=0 cng=54 gpc=47920 gps=3252 nf=201 nn=137456
+  6 np=219995 qsp=46718 cbr=0 cng=50 gpc=42098 gps=6093 nf=4202 nn=120834
+  7 np=249893 qsp=49390 cbr=0 cng=72 gpc=38400 gps=17102 nf=41 nn=144888
+rcu_bh:
+  0 np=146741 qsp=1419 cbr=0 cng=6 gpc=0 gps=0 nf=2 nn=145314
+  1 np=155792 qsp=12597 cbr=0 cng=0 gpc=4 gps=8 nf=3 nn=143180
+  2 np=136629 qsp=18680 cbr=0 cng=0 gpc=7 gps=6 nf=0 nn=117936
+  3 np=137723 qsp=2843 cbr=0 cng=0 gpc=10 gps=7 nf=0 nn=134863
+  4 np=123110 qsp=12433 cbr=0 cng=0 gpc=4 gps=2 nf=0 nn=110671
+  5 np=137456 qsp=4210 cbr=0 cng=0 gpc=6 gps=5 nf=0 nn=133235
+  6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921
+  7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542
+
+As always, this is once again split into "rcu" and "rcu_bh" portions.
+The fields are as follows:
+
+o	"np" is the number of times that __rcu_pending() has been invoked
+	for the corresponding flavor of RCU.
+
+o	"qsp" is the number of times that the RCU was waiting for a
+	quiescent state from this CPU.
+
+o	"cbr" is the number of times that this CPU had RCU callbacks
+	that had passed through a grace period, and were thus ready
+	to be invoked.
+
+o	"cng" is the number of times that this CPU needed another
+	grace period while RCU was idle.
+
+o	"gpc" is the number of times that an old grace period had
+	completed, but this CPU was not yet aware of it.
+
+o	"gps" is the number of times that a new grace period had started,
+	but this CPU was not yet aware of it.
+
+o	"nf" is the number of times that this CPU suspected that the
+	current grace period had run for too long, and thus needed to
+	be forced.
+
+	Please note that "forcing" consists of sending resched IPIs
+	to holdout CPUs.  If that CPU really still is in an old RCU
+	read-side critical section, then we really do have to wait for it.
+	The assumption behing "forcing" is that the CPU is not still in
+	an old RCU read-side critical section, but has not yet responded
+	for some other reason.
+
+o	"nn" is the number of times that this CPU needed nothing.  Alert
+	readers will note that the rcu "nn" number for a given CPU very
+	closely matches the rcu_bh "np" number for that same CPU.  This
+	is due to short-circuit evaluation in rcu_pending().
diff --git a/Documentation/Smack.txt b/Documentation/Smack.txt
index 629c92e..34614b4 100644
--- a/Documentation/Smack.txt
+++ b/Documentation/Smack.txt
@@ -184,8 +184,9 @@
 other than a letter or digit, are reserved for use by the Smack development
 team. Smack labels are unstructured, case sensitive, and the only operation
 ever performed on them is comparison for equality. Smack labels cannot
-contain unprintable characters or the "/" (slash) character. Smack labels
-cannot begin with a '-', which is reserved for special options.
+contain unprintable characters, the "/" (slash), the "\" (backslash), the "'"
+(quote) and '"' (double-quote) characters.
+Smack labels cannot begin with a '-', which is reserved for special options.
 
 There are some predefined labels:
 
@@ -523,3 +524,18 @@
 
 These mount options apply to all file system types.
 
+Smack auditing
+
+If you want Smack auditing of security events, you need to set CONFIG_AUDIT
+in your kernel configuration.
+By default, all denied events will be audited. You can change this behavior by
+writing a single character to the /smack/logging file :
+0 : no logging
+1 : log denied (default)
+2 : log accepted
+3 : log denied & accepted
+
+Events are logged as 'key=value' pairs, for each event you at least will get
+the subjet, the object, the rights requested, the action, the kernel function
+that triggered the event, plus other pairs depending on the type of event
+audited.
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 6fab97e..8d2158a 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -186,7 +186,7 @@
 do not have a corresponding kernel virtual address space mapping) and
 low-memory pages.
 
-Note: Please refer to Documentation/PCI/PCI-DMA-mapping.txt for a discussion
+Note: Please refer to Documentation/DMA-mapping.txt for a discussion
 on PCI high mem DMA aspects and mapping of scatter gather lists, and support
 for 64 bit PCI.
 
diff --git a/Documentation/filesystems/gfs2-glocks.txt b/Documentation/filesystems/gfs2-glocks.txt
index 4dae9a3..0494f78 100644
--- a/Documentation/filesystems/gfs2-glocks.txt
+++ b/Documentation/filesystems/gfs2-glocks.txt
@@ -60,7 +60,7 @@
 go_unlock        | Called on the final local unlock of a lock
 go_dump          | Called to print content of object for debugfs file, or on
                  | error to dump glock to the log.
-go_type;         | The type of the glock, LM_TYPE_.....
+go_type          | The type of the glock, LM_TYPE_.....
 go_min_hold_time | The minimum hold time
 
 The minimum hold time for each lock is the time after a remote lock
diff --git a/Documentation/filesystems/gfs2.txt b/Documentation/filesystems/gfs2.txt
index 593004b..5e3ab8f 100644
--- a/Documentation/filesystems/gfs2.txt
+++ b/Documentation/filesystems/gfs2.txt
@@ -11,18 +11,15 @@
 features of GFS is perfect consistency -- changes made to the file system
 on one machine show up immediately on all other machines in the cluster.
 
-GFS uses interchangable inter-node locking mechanisms.  Different lock
-modules can plug into GFS and each file system selects the appropriate
-lock module at mount time.  Lock modules include:
+GFS uses interchangable inter-node locking mechanisms, the currently
+supported mechanisms are:
 
   lock_nolock -- allows gfs to be used as a local file system
 
   lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking
   The dlm is found at linux/fs/dlm/
 
-In addition to interfacing with an external locking manager, a gfs lock
-module is responsible for interacting with external cluster management
-systems.  Lock_dlm depends on user space cluster management systems found
+Lock_dlm depends on user space cluster management systems found
 at the URL above.
 
 To use gfs as a local file system, no external clustering systems are
@@ -31,13 +28,19 @@
   $ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device
   $ mount -t gfs2 /dev/block_device /dir
 
-GFS2 is not on-disk compatible with previous versions of GFS.
+If you are using Fedora, you need to install the gfs2-utils package
+and, for lock_dlm, you will also need to install the cman package
+and write a cluster.conf as per the documentation.
+
+GFS2 is not on-disk compatible with previous versions of GFS, but it
+is pretty close.
 
 The following man pages can be found at the URL above:
-  gfs2_fsck	to repair a filesystem
+  fsck.gfs2	to repair a filesystem
   gfs2_grow	to expand a filesystem online
   gfs2_jadd	to add journals to a filesystem online
   gfs2_tool	to manipulate, examine and tune a filesystem
   gfs2_quota	to examine and change quota values in a filesystem
+  gfs2_convert	to convert a gfs filesystem to gfs2 in-place
   mount.gfs2	to help mount(8) mount a filesystem
   mkfs.gfs2	to make a filesystem
diff --git a/Documentation/futex-requeue-pi.txt b/Documentation/futex-requeue-pi.txt
new file mode 100644
index 0000000..9dc1ff4
--- /dev/null
+++ b/Documentation/futex-requeue-pi.txt
@@ -0,0 +1,131 @@
+Futex Requeue PI
+----------------
+
+Requeueing of tasks from a non-PI futex to a PI futex requires
+special handling in order to ensure the underlying rt_mutex is never
+left without an owner if it has waiters; doing so would break the PI
+boosting logic [see rt-mutex-desgin.txt] For the purposes of
+brevity, this action will be referred to as "requeue_pi" throughout
+this document.  Priority inheritance is abbreviated throughout as
+"PI".
+
+Motivation
+----------
+
+Without requeue_pi, the glibc implementation of
+pthread_cond_broadcast() must resort to waking all the tasks waiting
+on a pthread_condvar and letting them try to sort out which task
+gets to run first in classic thundering-herd formation.  An ideal
+implementation would wake the highest-priority waiter, and leave the
+rest to the natural wakeup inherent in unlocking the mutex
+associated with the condvar.
+
+Consider the simplified glibc calls:
+
+/* caller must lock mutex */
+pthread_cond_wait(cond, mutex)
+{
+	lock(cond->__data.__lock);
+	unlock(mutex);
+	do {
+	   unlock(cond->__data.__lock);
+	   futex_wait(cond->__data.__futex);
+	   lock(cond->__data.__lock);
+	} while(...)
+	unlock(cond->__data.__lock);
+	lock(mutex);
+}
+
+pthread_cond_broadcast(cond)
+{
+	lock(cond->__data.__lock);
+	unlock(cond->__data.__lock);
+	futex_requeue(cond->data.__futex, cond->mutex);
+}
+
+Once pthread_cond_broadcast() requeues the tasks, the cond->mutex
+has waiters. Note that pthread_cond_wait() attempts to lock the
+mutex only after it has returned to user space.  This will leave the
+underlying rt_mutex with waiters, and no owner, breaking the
+previously mentioned PI-boosting algorithms.
+
+In order to support PI-aware pthread_condvar's, the kernel needs to
+be able to requeue tasks to PI futexes.  This support implies that
+upon a successful futex_wait system call, the caller would return to
+user space already holding the PI futex.  The glibc implementation
+would be modified as follows:
+
+
+/* caller must lock mutex */
+pthread_cond_wait_pi(cond, mutex)
+{
+	lock(cond->__data.__lock);
+	unlock(mutex);
+	do {
+	   unlock(cond->__data.__lock);
+	   futex_wait_requeue_pi(cond->__data.__futex);
+	   lock(cond->__data.__lock);
+	} while(...)
+	unlock(cond->__data.__lock);
+        /* the kernel acquired the the mutex for us */
+}
+
+pthread_cond_broadcast_pi(cond)
+{
+	lock(cond->__data.__lock);
+	unlock(cond->__data.__lock);
+	futex_requeue_pi(cond->data.__futex, cond->mutex);
+}
+
+The actual glibc implementation will likely test for PI and make the
+necessary changes inside the existing calls rather than creating new
+calls for the PI cases.  Similar changes are needed for
+pthread_cond_timedwait() and pthread_cond_signal().
+
+Implementation
+--------------
+
+In order to ensure the rt_mutex has an owner if it has waiters, it
+is necessary for both the requeue code, as well as the waiting code,
+to be able to acquire the rt_mutex before returning to user space.
+The requeue code cannot simply wake the waiter and leave it to
+acquire the rt_mutex as it would open a race window between the
+requeue call returning to user space and the waiter waking and
+starting to run.  This is especially true in the uncontended case.
+
+The solution involves two new rt_mutex helper routines,
+rt_mutex_start_proxy_lock() and rt_mutex_finish_proxy_lock(), which
+allow the requeue code to acquire an uncontended rt_mutex on behalf
+of the waiter and to enqueue the waiter on a contended rt_mutex.
+Two new system calls provide the kernel<->user interface to
+requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_REQUEUE_CMP_PI.
+
+FUTEX_WAIT_REQUEUE_PI is called by the waiter (pthread_cond_wait()
+and pthread_cond_timedwait()) to block on the initial futex and wait
+to be requeued to a PI-aware futex.  The implementation is the
+result of a high-speed collision between futex_wait() and
+futex_lock_pi(), with some extra logic to check for the additional
+wake-up scenarios.
+
+FUTEX_REQUEUE_CMP_PI is called by the waker
+(pthread_cond_broadcast() and pthread_cond_signal()) to requeue and
+possibly wake the waiting tasks. Internally, this system call is
+still handled by futex_requeue (by passing requeue_pi=1).  Before
+requeueing, futex_requeue() attempts to acquire the requeue target
+PI futex on behalf of the top waiter.  If it can, this waiter is
+woken.  futex_requeue() then proceeds to requeue the remaining
+nr_wake+nr_requeue tasks to the PI futex, calling
+rt_mutex_start_proxy_lock() prior to each requeue to prepare the
+task as a waiter on the underlying rt_mutex.  It is possible that
+the lock can be acquired at this stage as well, if so, the next
+waiter is woken to finish the acquisition of the lock.
+
+FUTEX_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but
+their sum is all that really matters.  futex_requeue() will wake or
+requeue up to nr_wake + nr_requeue tasks.  It will wake only as many
+tasks as it can acquire the lock for, which in the majority of cases
+should be 0 as good programming practice dictates that the caller of
+either pthread_cond_broadcast() or pthread_cond_signal() acquire the
+mutex prior to making the call. FUTEX_REQUEUE_PI requires that
+nr_wake=1.  nr_requeue should be INT_MAX for broadcast and 0 for
+signal.
diff --git a/Documentation/ide/ide.txt b/Documentation/ide/ide.txt
index 0c78f4b..e77bebf 100644
--- a/Documentation/ide/ide.txt
+++ b/Documentation/ide/ide.txt
@@ -216,6 +216,8 @@
 
 * "noflush=[interface_number.device_number]" to disable flush requests
 
+* "nohpa=[interface_number.device_number]" to disable Host Protected Area
+
 * "noprobe=[interface_number.device_number]" to skip probing
 
 * "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index fd5cac0..0bf8a88 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -56,7 +56,6 @@
 	ISAPNP	ISA PnP code is enabled.
 	ISDN	Appropriate ISDN support is enabled.
 	JOY	Appropriate joystick support is enabled.
-	KMEMTRACE kmemtrace is enabled.
 	LIBATA  Libata driver is enabled
 	LP	Printer support is enabled.
 	LOOP	Loopback device support is enabled.
@@ -329,11 +328,6 @@
 				    flushed before they will be reused, which
 				    is a lot of faster
 
-	amd_iommu_size= [HW,X86-64]
-			Define the size of the aperture for the AMD IOMMU
-			driver. Possible values are:
-			'32M', '64M' (default), '128M', '256M', '512M', '1G'
-
 	amijoy.map=	[HW,JOY] Amiga joystick support
 			Map of devices attached to JOY0DAT and JOY1DAT
 			Format: <a>,<b>
@@ -646,6 +640,13 @@
 			DMA-API debugging code disables itself because the
 			architectural default is too low.
 
+	dma_debug_driver=<driver_name>
+			With this option the DMA-API debugging driver
+			filter feature can be enabled at boot time. Just
+			pass the driver to filter for as the parameter.
+			The filter can be disabled or changed to another
+			driver later using sysfs.
+
 	dscc4.setup=	[NET]
 
 	dtc3181e=	[HW,SCSI]
@@ -752,12 +753,25 @@
 			ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
 
 	ftrace=[tracer]
-			[ftrace] will set and start the specified tracer
+			[FTRACE] will set and start the specified tracer
 			as early as possible in order to facilitate early
 			boot debugging.
 
 	ftrace_dump_on_oops
-			[ftrace] will dump the trace buffers on oops.
+			[FTRACE] will dump the trace buffers on oops.
+
+	ftrace_filter=[function-list]
+			[FTRACE] Limit the functions traced by the function
+			tracer at boot up. function-list is a comma separated
+			list of functions. This list can be changed at run
+			time by the set_ftrace_filter file in the debugfs
+			tracing directory. 
+
+	ftrace_notrace=[function-list]
+			[FTRACE] Do not trace the functions specified in
+			function-list. This list can be changed at run time
+			by the set_ftrace_notrace file in the debugfs
+			tracing directory.
 
 	gamecon.map[2|3]=
 			[HW,JOY] Multisystem joystick and NES/SNES/PSX pad
@@ -873,11 +887,8 @@
 
 	ide-core.nodma=	[HW] (E)IDE subsystem
 			Format: =0.0 to prevent dma on hda, =0.1 hdb =1.0 hdc
-			.vlb_clock .pci_clock .noflush .noprobe .nowerr .cdrom
-			.chs .ignore_cable are additional options
-			See Documentation/ide/ide.txt.
-
-	idebus=		[HW] (E)IDE subsystem - VLB/PCI bus speed
+			.vlb_clock .pci_clock .noflush .nohpa .noprobe .nowerr
+			.cdrom .chs .ignore_cable are additional options
 			See Documentation/ide/ide.txt.
 
 	ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
@@ -914,6 +925,12 @@
 			Formt: { "sha1" | "md5" }
 			default: "sha1"
 
+	ima_tcb		[IMA]
+			Load a policy which meets the needs of the Trusted
+			Computing Base.  This means IMA will measure all
+			programs exec'd, files mmap'd for exec, and all files
+			opened for read by uid=0.
+
 	in2000=		[HW,SCSI]
 			See header of drivers/scsi/in2000.c.
 
@@ -1054,15 +1071,6 @@
 			use the HighMem zone if it exists, and the Normal
 			zone if it does not.
 
-	kmemtrace.enable=	[KNL,KMEMTRACE] Format: { yes | no }
-				Controls whether kmemtrace is enabled
-				at boot-time.
-
-	kmemtrace.subbufs=n	[KNL,KMEMTRACE] Overrides the number of
-			subbufs kmemtrace's relay channel has. Set this
-			higher than default (KMEMTRACE_N_SUBBUFS in code) if
-			you experience buffer overruns.
-
 	kgdboc=		[HW] kgdb over consoles.
 			Requires a tty driver that supports console polling.
 			(only serial suported for now)
@@ -1072,6 +1080,10 @@
 			Configure the RouterBoard 532 series on-chip
 			Ethernet adapter MAC address.
 
+	kmemleak=	[KNL] Boot-time kmemleak enable/disable
+			Valid arguments: on, off
+			Default: on
+
 	kstack=N	[X86] Print N words from the kernel stack
 			in oops dumps.
 
@@ -1575,6 +1587,9 @@
 	noinitrd	[RAM] Tells the kernel not to load any configured
 			initial RAM disk.
 
+	nointremap	[X86-64, Intel-IOMMU] Do not enable interrupt
+			remapping.
+
 	nointroute	[IA-64]
 
 	nojitter	[IA64] Disables jitter checking for ITC timers.
@@ -1660,6 +1675,14 @@
 	oprofile.timer=	[HW]
 			Use timer interrupt instead of performance counters
 
+	oprofile.cpu_type=	Force an oprofile cpu type
+			This might be useful if you have an older oprofile
+			userland or if you want common events.
+			Format: { archperfmon }
+			archperfmon: [X86] Force use of architectural
+				perfmon on Intel CPUs instead of the
+				CPU specific event set.
+
 	osst=		[HW,SCSI] SCSI Tape Driver
 			Format: <buffer_size>,<write_threshold>
 			See also Documentation/scsi/st.txt.
diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt
new file mode 100644
index 0000000..0112da3
--- /dev/null
+++ b/Documentation/kmemleak.txt
@@ -0,0 +1,142 @@
+Kernel Memory Leak Detector
+===========================
+
+Introduction
+------------
+
+Kmemleak provides a way of detecting possible kernel memory leaks in a
+way similar to a tracing garbage collector
+(http://en.wikipedia.org/wiki/Garbage_collection_%28computer_science%29#Tracing_garbage_collectors),
+with the difference that the orphan objects are not freed but only
+reported via /sys/kernel/debug/kmemleak. A similar method is used by the
+Valgrind tool (memcheck --leak-check) to detect the memory leaks in
+user-space applications.
+
+Usage
+-----
+
+CONFIG_DEBUG_KMEMLEAK in "Kernel hacking" has to be enabled. A kernel
+thread scans the memory every 10 minutes (by default) and prints any new
+unreferenced objects found. To trigger an intermediate scan and display
+all the possible memory leaks:
+
+  # mount -t debugfs nodev /sys/kernel/debug/
+  # cat /sys/kernel/debug/kmemleak
+
+Note that the orphan objects are listed in the order they were allocated
+and one object at the beginning of the list may cause other subsequent
+objects to be reported as orphan.
+
+Memory scanning parameters can be modified at run-time by writing to the
+/sys/kernel/debug/kmemleak file. The following parameters are supported:
+
+  off		- disable kmemleak (irreversible)
+  stack=on	- enable the task stacks scanning
+  stack=off	- disable the tasks stacks scanning
+  scan=on	- start the automatic memory scanning thread
+  scan=off	- stop the automatic memory scanning thread
+  scan=<secs>	- set the automatic memory scanning period in seconds (0
+		  to disable it)
+
+Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on
+the kernel command line.
+
+Basic Algorithm
+---------------
+
+The memory allocations via kmalloc, vmalloc, kmem_cache_alloc and
+friends are traced and the pointers, together with additional
+information like size and stack trace, are stored in a prio search tree.
+The corresponding freeing function calls are tracked and the pointers
+removed from the kmemleak data structures.
+
+An allocated block of memory is considered orphan if no pointer to its
+start address or to any location inside the block can be found by
+scanning the memory (including saved registers). This means that there
+might be no way for the kernel to pass the address of the allocated
+block to a freeing function and therefore the block is considered a
+memory leak.
+
+The scanning algorithm steps:
+
+  1. mark all objects as white (remaining white objects will later be
+     considered orphan)
+  2. scan the memory starting with the data section and stacks, checking
+     the values against the addresses stored in the prio search tree. If
+     a pointer to a white object is found, the object is added to the
+     gray list
+  3. scan the gray objects for matching addresses (some white objects
+     can become gray and added at the end of the gray list) until the
+     gray set is finished
+  4. the remaining white objects are considered orphan and reported via
+     /sys/kernel/debug/kmemleak
+
+Some allocated memory blocks have pointers stored in the kernel's
+internal data structures and they cannot be detected as orphans. To
+avoid this, kmemleak can also store the number of values pointing to an
+address inside the block address range that need to be found so that the
+block is not considered a leak. One example is __vmalloc().
+
+Kmemleak API
+------------
+
+See the include/linux/kmemleak.h header for the functions prototype.
+
+kmemleak_init		 - initialize kmemleak
+kmemleak_alloc		 - notify of a memory block allocation
+kmemleak_free		 - notify of a memory block freeing
+kmemleak_not_leak	 - mark an object as not a leak
+kmemleak_ignore		 - do not scan or report an object as leak
+kmemleak_scan_area	 - add scan areas inside a memory block
+kmemleak_no_scan	 - do not scan a memory block
+kmemleak_erase		 - erase an old value in a pointer variable
+kmemleak_alloc_recursive - as kmemleak_alloc but checks the recursiveness
+kmemleak_free_recursive	 - as kmemleak_free but checks the recursiveness
+
+Dealing with false positives/negatives
+--------------------------------------
+
+The false negatives are real memory leaks (orphan objects) but not
+reported by kmemleak because values found during the memory scanning
+point to such objects. To reduce the number of false negatives, kmemleak
+provides the kmemleak_ignore, kmemleak_scan_area, kmemleak_no_scan and
+kmemleak_erase functions (see above). The task stacks also increase the
+amount of false negatives and their scanning is not enabled by default.
+
+The false positives are objects wrongly reported as being memory leaks
+(orphan). For objects known not to be leaks, kmemleak provides the
+kmemleak_not_leak function. The kmemleak_ignore could also be used if
+the memory block is known not to contain other pointers and it will no
+longer be scanned.
+
+Some of the reported leaks are only transient, especially on SMP
+systems, because of pointers temporarily stored in CPU registers or
+stacks. Kmemleak defines MSECS_MIN_AGE (defaulting to 1000) representing
+the minimum age of an object to be reported as a memory leak.
+
+Limitations and Drawbacks
+-------------------------
+
+The main drawback is the reduced performance of memory allocation and
+freeing. To avoid other penalties, the memory scanning is only performed
+when the /sys/kernel/debug/kmemleak file is read. Anyway, this tool is
+intended for debugging purposes where the performance might not be the
+most important requirement.
+
+To keep the algorithm simple, kmemleak scans for values pointing to any
+address inside a block's address range. This may lead to an increased
+number of false negatives. However, it is likely that a real memory leak
+will eventually become visible.
+
+Another source of false negatives is the data stored in non-pointer
+values. In a future version, kmemleak could only scan the pointer
+members in the allocated structures. This feature would solve many of
+the false negative cases described above.
+
+The tool can report false positives. These are cases where an allocated
+block doesn't need to be freed (some cases in the init_call functions),
+the pointer is calculated by other methods than the usual container_of
+macro or the pointer is stored in a location not scanned by kmemleak.
+
+Page allocations and ioremap are not tracked. Only the ARM and x86
+architectures are currently supported.
diff --git a/Documentation/lguest/Makefile b/Documentation/lguest/Makefile
index 1f4f9e8..28c8cdf 100644
--- a/Documentation/lguest/Makefile
+++ b/Documentation/lguest/Makefile
@@ -1,6 +1,5 @@
 # This creates the demonstration utility "lguest" which runs a Linux guest.
-CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include -U_FORTIFY_SOURCE
-LDLIBS:=-lz
+CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include -U_FORTIFY_SOURCE
 
 all: lguest
 
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index d36fcc0..9ebcd6e 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -16,6 +16,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
+#include <sys/eventfd.h>
 #include <fcntl.h>
 #include <stdbool.h>
 #include <errno.h>
@@ -59,7 +60,6 @@
 /*:*/
 
 #define PAGE_PRESENT 0x7 	/* Present, RW, Execute */
-#define NET_PEERNUM 1
 #define BRIDGE_PFX "bridge:"
 #ifndef SIOCBRADDIF
 #define SIOCBRADDIF	0x89a2		/* add interface to bridge      */
@@ -76,19 +76,12 @@
 	do { if (verbose) printf(args); } while(0)
 /*:*/
 
-/* File descriptors for the Waker. */
-struct {
-	int pipe[2];
-	int lguest_fd;
-} waker_fds;
-
 /* The pointer to the start of guest memory. */
 static void *guest_base;
 /* The maximum guest physical address allowed, and maximum possible. */
 static unsigned long guest_limit, guest_max;
-/* The pipe for signal hander to write to. */
-static int timeoutpipe[2];
-static unsigned int timeout_usec = 500;
+/* The /dev/lguest file descriptor. */
+static int lguest_fd;
 
 /* a per-cpu variable indicating whose vcpu is currently running */
 static unsigned int __thread cpu_id;
@@ -96,11 +89,6 @@
 /* This is our list of devices. */
 struct device_list
 {
-	/* Summary information about the devices in our list: ready to pass to
-	 * select() to ask which need servicing.*/
-	fd_set infds;
-	int max_infd;
-
 	/* Counter to assign interrupt numbers. */
 	unsigned int next_irq;
 
@@ -126,22 +114,21 @@
 	/* The linked-list pointer. */
 	struct device *next;
 
-	/* The this device's descriptor, as mapped into the Guest. */
+	/* The device's descriptor, as mapped into the Guest. */
 	struct lguest_device_desc *desc;
 
+	/* We can't trust desc values once Guest has booted: we use these. */
+	unsigned int feature_len;
+	unsigned int num_vq;
+
 	/* The name of this device, for --verbose. */
 	const char *name;
 
-	/* If handle_input is set, it wants to be called when this file
-	 * descriptor is ready. */
-	int fd;
-	bool (*handle_input)(int fd, struct device *me);
-
 	/* Any queues attached to this device */
 	struct virtqueue *vq;
 
-	/* Handle status being finalized (ie. feature bits stable). */
-	void (*ready)(struct device *me);
+	/* Is it operational */
+	bool running;
 
 	/* Device-specific data. */
 	void *priv;
@@ -164,22 +151,28 @@
 	/* Last available index we saw. */
 	u16 last_avail_idx;
 
-	/* The routine to call when the Guest pings us, or timeout. */
-	void (*handle_output)(int fd, struct virtqueue *me, bool timeout);
+	/* How many are used since we sent last irq? */
+	unsigned int pending_used;
 
-	/* Outstanding buffers */
-	unsigned int inflight;
+	/* Eventfd where Guest notifications arrive. */
+	int eventfd;
 
-	/* Is this blocked awaiting a timer? */
-	bool blocked;
+	/* Function for the thread which is servicing this virtqueue. */
+	void (*service)(struct virtqueue *vq);
+	pid_t thread;
 };
 
 /* Remember the arguments to the program so we can "reboot" */
 static char **main_args;
 
-/* Since guest is UP and we don't run at the same time, we don't need barriers.
- * But I include them in the code in case others copy it. */
-#define wmb()
+/* The original tty settings to restore on exit. */
+static struct termios orig_term;
+
+/* We have to be careful with barriers: our devices are all run in separate
+ * threads and so we need to make sure that changes visible to the Guest happen
+ * in precise order. */
+#define wmb() __asm__ __volatile__("" : : : "memory")
+#define mb() __asm__ __volatile__("" : : : "memory")
 
 /* Convert an iovec element to the given type.
  *
@@ -245,7 +238,7 @@
 static u8 *get_feature_bits(struct device *dev)
 {
 	return (u8 *)(dev->desc + 1)
-		+ dev->desc->num_vq * sizeof(struct lguest_vqconfig);
+		+ dev->num_vq * sizeof(struct lguest_vqconfig);
 }
 
 /*L:100 The Launcher code itself takes us out into userspace, that scary place
@@ -505,99 +498,19 @@
  * saw the arguments it expects when we looked at initialize() in lguest_user.c:
  * the base of Guest "physical" memory, the top physical page to allow and the
  * entry point for the Guest. */
-static int tell_kernel(unsigned long start)
+static void tell_kernel(unsigned long start)
 {
 	unsigned long args[] = { LHREQ_INITIALIZE,
 				 (unsigned long)guest_base,
 				 guest_limit / getpagesize(), start };
-	int fd;
-
 	verbose("Guest: %p - %p (%#lx)\n",
 		guest_base, guest_base + guest_limit, guest_limit);
-	fd = open_or_die("/dev/lguest", O_RDWR);
-	if (write(fd, args, sizeof(args)) < 0)
+	lguest_fd = open_or_die("/dev/lguest", O_RDWR);
+	if (write(lguest_fd, args, sizeof(args)) < 0)
 		err(1, "Writing to /dev/lguest");
-
-	/* We return the /dev/lguest file descriptor to control this Guest */
-	return fd;
 }
 /*:*/
 
-static void add_device_fd(int fd)
-{
-	FD_SET(fd, &devices.infds);
-	if (fd > devices.max_infd)
-		devices.max_infd = fd;
-}
-
-/*L:200
- * The Waker.
- *
- * With console, block and network devices, we can have lots of input which we
- * need to process.  We could try to tell the kernel what file descriptors to
- * watch, but handing a file descriptor mask through to the kernel is fairly
- * icky.
- *
- * Instead, we clone off a thread which watches the file descriptors and writes
- * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host
- * stop running the Guest.  This causes the Launcher to return from the
- * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset
- * the LHREQ_BREAK and wake us up again.
- *
- * This, of course, is merely a different *kind* of icky.
- *
- * Given my well-known antipathy to threads, I'd prefer to use processes.  But
- * it's easier to share Guest memory with threads, and trivial to share the
- * devices.infds as the Launcher changes it.
- */
-static int waker(void *unused)
-{
-	/* Close the write end of the pipe: only the Launcher has it open. */
-	close(waker_fds.pipe[1]);
-
-	for (;;) {
-		fd_set rfds = devices.infds;
-		unsigned long args[] = { LHREQ_BREAK, 1 };
-		unsigned int maxfd = devices.max_infd;
-
-		/* We also listen to the pipe from the Launcher. */
-		FD_SET(waker_fds.pipe[0], &rfds);
-		if (waker_fds.pipe[0] > maxfd)
-			maxfd = waker_fds.pipe[0];
-
-		/* Wait until input is ready from one of the devices. */
-		select(maxfd+1, &rfds, NULL, NULL, NULL);
-
-		/* Message from Launcher? */
-		if (FD_ISSET(waker_fds.pipe[0], &rfds)) {
-			char c;
-			/* If this fails, then assume Launcher has exited.
-			 * Don't do anything on exit: we're just a thread! */
-			if (read(waker_fds.pipe[0], &c, 1) != 1)
-				_exit(0);
-			continue;
-		}
-
-		/* Send LHREQ_BREAK command to snap the Launcher out of it. */
-		pwrite(waker_fds.lguest_fd, args, sizeof(args), cpu_id);
-	}
-	return 0;
-}
-
-/* This routine just sets up a pipe to the Waker process. */
-static void setup_waker(int lguest_fd)
-{
-	/* This pipe is closed when Launcher dies, telling Waker. */
-	if (pipe(waker_fds.pipe) != 0)
-		err(1, "Creating pipe for Waker");
-
-	/* Waker also needs to know the lguest fd */
-	waker_fds.lguest_fd = lguest_fd;
-
-	if (clone(waker, malloc(4096) + 4096, CLONE_VM | SIGCHLD, NULL) == -1)
-		err(1, "Creating Waker");
-}
-
 /*
  * Device Handling.
  *
@@ -623,49 +536,90 @@
 /* Each buffer in the virtqueues is actually a chain of descriptors.  This
  * function returns the next descriptor in the chain, or vq->vring.num if we're
  * at the end. */
-static unsigned next_desc(struct virtqueue *vq, unsigned int i)
+static unsigned next_desc(struct vring_desc *desc,
+			  unsigned int i, unsigned int max)
 {
 	unsigned int next;
 
 	/* If this descriptor says it doesn't chain, we're done. */
-	if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT))
-		return vq->vring.num;
+	if (!(desc[i].flags & VRING_DESC_F_NEXT))
+		return max;
 
 	/* Check they're not leading us off end of descriptors. */
-	next = vq->vring.desc[i].next;
+	next = desc[i].next;
 	/* Make sure compiler knows to grab that: we don't want it changing! */
 	wmb();
 
-	if (next >= vq->vring.num)
+	if (next >= max)
 		errx(1, "Desc next is %u", next);
 
 	return next;
 }
 
+/* This actually sends the interrupt for this virtqueue */
+static void trigger_irq(struct virtqueue *vq)
+{
+	unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
+
+	/* Don't inform them if nothing used. */
+	if (!vq->pending_used)
+		return;
+	vq->pending_used = 0;
+
+	/* If they don't want an interrupt, don't send one, unless empty. */
+	if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+	    && lg_last_avail(vq) != vq->vring.avail->idx)
+		return;
+
+	/* Send the Guest an interrupt tell them we used something up. */
+	if (write(lguest_fd, buf, sizeof(buf)) != 0)
+		err(1, "Triggering irq %i", vq->config.irq);
+}
+
 /* This looks in the virtqueue and for the first available buffer, and converts
  * it to an iovec for convenient access.  Since descriptors consist of some
  * number of output then some number of input descriptors, it's actually two
  * iovecs, but we pack them into one and note how many of each there were.
  *
- * This function returns the descriptor number found, or vq->vring.num (which
- * is never a valid descriptor number) if none was found. */
-static unsigned get_vq_desc(struct virtqueue *vq,
-			    struct iovec iov[],
-			    unsigned int *out_num, unsigned int *in_num)
+ * This function returns the descriptor number found. */
+static unsigned wait_for_vq_desc(struct virtqueue *vq,
+				 struct iovec iov[],
+				 unsigned int *out_num, unsigned int *in_num)
 {
-	unsigned int i, head;
-	u16 last_avail;
+	unsigned int i, head, max;
+	struct vring_desc *desc;
+	u16 last_avail = lg_last_avail(vq);
+
+	while (last_avail == vq->vring.avail->idx) {
+		u64 event;
+
+		/* OK, tell Guest about progress up to now. */
+		trigger_irq(vq);
+
+		/* OK, now we need to know about added descriptors. */
+		vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
+
+		/* They could have slipped one in as we were doing that: make
+		 * sure it's written, then check again. */
+		mb();
+		if (last_avail != vq->vring.avail->idx) {
+			vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+			break;
+		}
+
+		/* Nothing new?  Wait for eventfd to tell us they refilled. */
+		if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event))
+			errx(1, "Event read failed?");
+
+		/* We don't need to be notified again. */
+		vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+	}
 
 	/* Check it isn't doing very strange things with descriptor numbers. */
-	last_avail = lg_last_avail(vq);
 	if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num)
 		errx(1, "Guest moved used index from %u to %u",
 		     last_avail, vq->vring.avail->idx);
 
-	/* If there's nothing new since last we looked, return invalid. */
-	if (vq->vring.avail->idx == last_avail)
-		return vq->vring.num;
-
 	/* Grab the next descriptor number they're advertising, and increment
 	 * the index we've seen. */
 	head = vq->vring.avail->ring[last_avail % vq->vring.num];
@@ -678,15 +632,28 @@
 	/* When we start there are none of either input nor output. */
 	*out_num = *in_num = 0;
 
+	max = vq->vring.num;
+	desc = vq->vring.desc;
 	i = head;
+
+	/* If this is an indirect entry, then this buffer contains a descriptor
+	 * table which we handle as if it's any normal descriptor chain. */
+	if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+		if (desc[i].len % sizeof(struct vring_desc))
+			errx(1, "Invalid size for indirect buffer table");
+
+		max = desc[i].len / sizeof(struct vring_desc);
+		desc = check_pointer(desc[i].addr, desc[i].len);
+		i = 0;
+	}
+
 	do {
 		/* Grab the first descriptor, and check it's OK. */
-		iov[*out_num + *in_num].iov_len = vq->vring.desc[i].len;
+		iov[*out_num + *in_num].iov_len = desc[i].len;
 		iov[*out_num + *in_num].iov_base
-			= check_pointer(vq->vring.desc[i].addr,
-					vq->vring.desc[i].len);
+			= check_pointer(desc[i].addr, desc[i].len);
 		/* If this is an input descriptor, increment that count. */
-		if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE)
+		if (desc[i].flags & VRING_DESC_F_WRITE)
 			(*in_num)++;
 		else {
 			/* If it's an output descriptor, they're all supposed
@@ -697,11 +664,10 @@
 		}
 
 		/* If we've got too many, that implies a descriptor loop. */
-		if (*out_num + *in_num > vq->vring.num)
+		if (*out_num + *in_num > max)
 			errx(1, "Looped descriptor");
-	} while ((i = next_desc(vq, i)) != vq->vring.num);
+	} while ((i = next_desc(desc, i, max)) != max);
 
-	vq->inflight++;
 	return head;
 }
 
@@ -719,44 +685,20 @@
 	/* Make sure buffer is written before we update index. */
 	wmb();
 	vq->vring.used->idx++;
-	vq->inflight--;
-}
-
-/* This actually sends the interrupt for this virtqueue */
-static void trigger_irq(int fd, struct virtqueue *vq)
-{
-	unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
-
-	/* If they don't want an interrupt, don't send one, unless empty. */
-	if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
-	    && vq->inflight)
-		return;
-
-	/* Send the Guest an interrupt tell them we used something up. */
-	if (write(fd, buf, sizeof(buf)) != 0)
-		err(1, "Triggering irq %i", vq->config.irq);
+	vq->pending_used++;
 }
 
 /* And here's the combo meal deal.  Supersize me! */
-static void add_used_and_trigger(int fd, struct virtqueue *vq,
-				 unsigned int head, int len)
+static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len)
 {
 	add_used(vq, head, len);
-	trigger_irq(fd, vq);
+	trigger_irq(vq);
 }
 
 /*
  * The Console
  *
- * Here is the input terminal setting we save, and the routine to restore them
- * on exit so the user gets their terminal back. */
-static struct termios orig_term;
-static void restore_term(void)
-{
-	tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
-}
-
-/* We associate some data with the console for our exit hack. */
+ * We associate some data with the console for our exit hack. */
 struct console_abort
 {
 	/* How many times have they hit ^C? */
@@ -766,276 +708,275 @@
 };
 
 /* This is the routine which handles console input (ie. stdin). */
-static bool handle_console_input(int fd, struct device *dev)
+static void console_input(struct virtqueue *vq)
 {
 	int len;
 	unsigned int head, in_num, out_num;
-	struct iovec iov[dev->vq->vring.num];
-	struct console_abort *abort = dev->priv;
+	struct console_abort *abort = vq->dev->priv;
+	struct iovec iov[vq->vring.num];
 
-	/* First we need a console buffer from the Guests's input virtqueue. */
-	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
-
-	/* If they're not ready for input, stop listening to this file
-	 * descriptor.  We'll start again once they add an input buffer. */
-	if (head == dev->vq->vring.num)
-		return false;
-
+	/* Make sure there's a descriptor waiting. */
+	head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
 	if (out_num)
 		errx(1, "Output buffers in console in queue?");
 
-	/* This is why we convert to iovecs: the readv() call uses them, and so
-	 * it reads straight into the Guest's buffer. */
-	len = readv(dev->fd, iov, in_num);
+	/* Read it in. */
+	len = readv(STDIN_FILENO, iov, in_num);
 	if (len <= 0) {
-		/* This implies that the console is closed, is /dev/null, or
-		 * something went terribly wrong. */
+		/* Ran out of input? */
 		warnx("Failed to get console input, ignoring console.");
-		/* Put the input terminal back. */
-		restore_term();
-		/* Remove callback from input vq, so it doesn't restart us. */
-		dev->vq->handle_output = NULL;
-		/* Stop listening to this fd: don't call us again. */
-		return false;
+		/* For simplicity, dying threads kill the whole Launcher.  So
+		 * just nap here. */
+		for (;;)
+			pause();
 	}
 
-	/* Tell the Guest about the new input. */
-	add_used_and_trigger(fd, dev->vq, head, len);
+	add_used_and_trigger(vq, head, len);
 
 	/* Three ^C within one second?  Exit.
 	 *
-	 * This is such a hack, but works surprisingly well.  Each ^C has to be
-	 * in a buffer by itself, so they can't be too fast.  But we check that
-	 * we get three within about a second, so they can't be too slow. */
-	if (len == 1 && ((char *)iov[0].iov_base)[0] == 3) {
-		if (!abort->count++)
-			gettimeofday(&abort->start, NULL);
-		else if (abort->count == 3) {
-			struct timeval now;
-			gettimeofday(&now, NULL);
-			if (now.tv_sec <= abort->start.tv_sec+1) {
-				unsigned long args[] = { LHREQ_BREAK, 0 };
-				/* Close the fd so Waker will know it has to
-				 * exit. */
-				close(waker_fds.pipe[1]);
-				/* Just in case Waker is blocked in BREAK, send
-				 * unbreak now. */
-				write(fd, args, sizeof(args));
-				exit(2);
-			}
-			abort->count = 0;
-		}
-	} else
-		/* Any other key resets the abort counter. */
+	 * This is such a hack, but works surprisingly well.  Each ^C has to
+	 * be in a buffer by itself, so they can't be too fast.  But we check
+	 * that we get three within about a second, so they can't be too
+	 * slow. */
+	if (len != 1 || ((char *)iov[0].iov_base)[0] != 3) {
 		abort->count = 0;
+		return;
+	}
 
-	/* Everything went OK! */
-	return true;
-}
-
-/* Handling output for console is simple: we just get all the output buffers
- * and write them to stdout. */
-static void handle_console_output(int fd, struct virtqueue *vq, bool timeout)
-{
-	unsigned int head, out, in;
-	int len;
-	struct iovec iov[vq->vring.num];
-
-	/* Keep getting output buffers from the Guest until we run out. */
-	while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
-		if (in)
-			errx(1, "Input buffers in output queue?");
-		len = writev(STDOUT_FILENO, iov, out);
-		add_used_and_trigger(fd, vq, head, len);
+	abort->count++;
+	if (abort->count == 1)
+		gettimeofday(&abort->start, NULL);
+	else if (abort->count == 3) {
+		struct timeval now;
+		gettimeofday(&now, NULL);
+		/* Kill all Launcher processes with SIGINT, like normal ^C */
+		if (now.tv_sec <= abort->start.tv_sec+1)
+			kill(0, SIGINT);
+		abort->count = 0;
 	}
 }
 
-/* This is called when we no longer want to hear about Guest changes to a
- * virtqueue.  This is more efficient in high-traffic cases, but it means we
- * have to set a timer to check if any more changes have occurred. */
-static void block_vq(struct virtqueue *vq)
+/* This is the routine which handles console output (ie. stdout). */
+static void console_output(struct virtqueue *vq)
 {
-	struct itimerval itm;
+	unsigned int head, out, in;
+	struct iovec iov[vq->vring.num];
 
-	vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
-	vq->blocked = true;
-
-	itm.it_interval.tv_sec = 0;
-	itm.it_interval.tv_usec = 0;
-	itm.it_value.tv_sec = 0;
-	itm.it_value.tv_usec = timeout_usec;
-
-	setitimer(ITIMER_REAL, &itm, NULL);
+	head = wait_for_vq_desc(vq, iov, &out, &in);
+	if (in)
+		errx(1, "Input buffers in console output queue?");
+	while (!iov_empty(iov, out)) {
+		int len = writev(STDOUT_FILENO, iov, out);
+		if (len <= 0)
+			err(1, "Write to stdout gave %i", len);
+		iov_consume(iov, out, len);
+	}
+	add_used(vq, head, 0);
 }
 
 /*
  * The Network
  *
  * Handling output for network is also simple: we get all the output buffers
- * and write them (ignoring the first element) to this device's file descriptor
- * (/dev/net/tun).
+ * and write them to /dev/net/tun.
  */
-static void handle_net_output(int fd, struct virtqueue *vq, bool timeout)
+struct net_info {
+	int tunfd;
+};
+
+static void net_output(struct virtqueue *vq)
 {
-	unsigned int head, out, in, num = 0;
-	int len;
+	struct net_info *net_info = vq->dev->priv;
+	unsigned int head, out, in;
 	struct iovec iov[vq->vring.num];
-	static int last_timeout_num;
 
-	/* Keep getting output buffers from the Guest until we run out. */
-	while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
-		if (in)
-			errx(1, "Input buffers in output queue?");
-		len = writev(vq->dev->fd, iov, out);
-		if (len < 0)
-			err(1, "Writing network packet to tun");
-		add_used_and_trigger(fd, vq, head, len);
-		num++;
-	}
-
-	/* Block further kicks and set up a timer if we saw anything. */
-	if (!timeout && num)
-		block_vq(vq);
-
-	/* We never quite know how long should we wait before we check the
-	 * queue again for more packets.  We start at 500 microseconds, and if
-	 * we get fewer packets than last time, we assume we made the timeout
-	 * too small and increase it by 10 microseconds.  Otherwise, we drop it
-	 * by one microsecond every time.  It seems to work well enough. */
-	if (timeout) {
-		if (num < last_timeout_num)
-			timeout_usec += 10;
-		else if (timeout_usec > 1)
-			timeout_usec--;
-		last_timeout_num = num;
-	}
+	head = wait_for_vq_desc(vq, iov, &out, &in);
+	if (in)
+		errx(1, "Input buffers in net output queue?");
+	if (writev(net_info->tunfd, iov, out) < 0)
+		errx(1, "Write to tun failed?");
+	add_used(vq, head, 0);
 }
 
-/* This is where we handle a packet coming in from the tun device to our
+/* Will reading from this file descriptor block? */
+static bool will_block(int fd)
+{
+	fd_set fdset;
+	struct timeval zero = { 0, 0 };
+	FD_ZERO(&fdset);
+	FD_SET(fd, &fdset);
+	return select(fd+1, &fdset, NULL, NULL, &zero) != 1;
+}
+
+/* This is where we handle packets coming in from the tun device to our
  * Guest. */
-static bool handle_tun_input(int fd, struct device *dev)
+static void net_input(struct virtqueue *vq)
 {
-	unsigned int head, in_num, out_num;
 	int len;
-	struct iovec iov[dev->vq->vring.num];
+	unsigned int head, out, in;
+	struct iovec iov[vq->vring.num];
+	struct net_info *net_info = vq->dev->priv;
 
-	/* First we need a network buffer from the Guests's recv virtqueue. */
-	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
-	if (head == dev->vq->vring.num) {
-		/* Now, it's expected that if we try to send a packet too
-		 * early, the Guest won't be ready yet.  Wait until the device
-		 * status says it's ready. */
-		/* FIXME: Actually want DRIVER_ACTIVE here. */
+	head = wait_for_vq_desc(vq, iov, &out, &in);
+	if (out)
+		errx(1, "Output buffers in net input queue?");
 
-		/* Now tell it we want to know if new things appear. */
-		dev->vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
-		wmb();
+	/* Deliver interrupt now, since we're about to sleep. */
+	if (vq->pending_used && will_block(net_info->tunfd))
+		trigger_irq(vq);
 
-		/* We'll turn this back on if input buffers are registered. */
-		return false;
-	} else if (out_num)
-		errx(1, "Output buffers in network recv queue?");
-
-	/* Read the packet from the device directly into the Guest's buffer. */
-	len = readv(dev->fd, iov, in_num);
+	len = readv(net_info->tunfd, iov, in);
 	if (len <= 0)
-		err(1, "reading network");
-
-	/* Tell the Guest about the new packet. */
-	add_used_and_trigger(fd, dev->vq, head, len);
-
-	verbose("tun input packet len %i [%02x %02x] (%s)\n", len,
-		((u8 *)iov[1].iov_base)[0], ((u8 *)iov[1].iov_base)[1],
-		head != dev->vq->vring.num ? "sent" : "discarded");
-
-	/* All good. */
-	return true;
+		err(1, "Failed to read from tun.");
+	add_used(vq, head, len);
 }
 
-/*L:215 This is the callback attached to the network and console input
- * virtqueues: it ensures we try again, in case we stopped console or net
- * delivery because Guest didn't have any buffers. */
-static void enable_fd(int fd, struct virtqueue *vq, bool timeout)
+/* This is the helper to create threads. */
+static int do_thread(void *_vq)
 {
-	add_device_fd(vq->dev->fd);
-	/* Snap the Waker out of its select loop. */
-	write(waker_fds.pipe[1], "", 1);
+	struct virtqueue *vq = _vq;
+
+	for (;;)
+		vq->service(vq);
+	return 0;
 }
 
-static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout)
+/* When a child dies, we kill our entire process group with SIGTERM.  This
+ * also has the side effect that the shell restores the console for us! */
+static void kill_launcher(int signal)
 {
-	/* We don't need to know again when Guest refills receive buffer. */
-	vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
-	enable_fd(fd, vq, timeout);
+	kill(0, SIGTERM);
+}
+
+static void reset_device(struct device *dev)
+{
+	struct virtqueue *vq;
+
+	verbose("Resetting device %s\n", dev->name);
+
+	/* Clear any features they've acked. */
+	memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len);
+
+	/* We're going to be explicitly killing threads, so ignore them. */
+	signal(SIGCHLD, SIG_IGN);
+
+	/* Zero out the virtqueues, get rid of their threads */
+	for (vq = dev->vq; vq; vq = vq->next) {
+		if (vq->thread != (pid_t)-1) {
+			kill(vq->thread, SIGTERM);
+			waitpid(vq->thread, NULL, 0);
+			vq->thread = (pid_t)-1;
+		}
+		memset(vq->vring.desc, 0,
+		       vring_size(vq->config.num, LGUEST_VRING_ALIGN));
+		lg_last_avail(vq) = 0;
+	}
+	dev->running = false;
+
+	/* Now we care if threads die. */
+	signal(SIGCHLD, (void *)kill_launcher);
+}
+
+static void create_thread(struct virtqueue *vq)
+{
+	/* Create stack for thread and run it.  Since stack grows
+	 * upwards, we point the stack pointer to the end of this
+	 * region. */
+	char *stack = malloc(32768);
+	unsigned long args[] = { LHREQ_EVENTFD,
+				 vq->config.pfn*getpagesize(), 0 };
+
+	/* Create a zero-initialized eventfd. */
+	vq->eventfd = eventfd(0, 0);
+	if (vq->eventfd < 0)
+		err(1, "Creating eventfd");
+	args[2] = vq->eventfd;
+
+	/* Attach an eventfd to this virtqueue: it will go off
+	 * when the Guest does an LHCALL_NOTIFY for this vq. */
+	if (write(lguest_fd, &args, sizeof(args)) != 0)
+		err(1, "Attaching eventfd");
+
+	/* CLONE_VM: because it has to access the Guest memory, and
+	 * SIGCHLD so we get a signal if it dies. */
+	vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq);
+	if (vq->thread == (pid_t)-1)
+		err(1, "Creating clone");
+	/* We close our local copy, now the child has it. */
+	close(vq->eventfd);
+}
+
+static void start_device(struct device *dev)
+{
+	unsigned int i;
+	struct virtqueue *vq;
+
+	verbose("Device %s OK: offered", dev->name);
+	for (i = 0; i < dev->feature_len; i++)
+		verbose(" %02x", get_feature_bits(dev)[i]);
+	verbose(", accepted");
+	for (i = 0; i < dev->feature_len; i++)
+		verbose(" %02x", get_feature_bits(dev)
+			[dev->feature_len+i]);
+
+	for (vq = dev->vq; vq; vq = vq->next) {
+		if (vq->service)
+			create_thread(vq);
+	}
+	dev->running = true;
+}
+
+static void cleanup_devices(void)
+{
+	struct device *dev;
+
+	for (dev = devices.dev; dev; dev = dev->next)
+		reset_device(dev);
+
+	/* If we saved off the original terminal settings, restore them now. */
+	if (orig_term.c_lflag & (ISIG|ICANON|ECHO))
+		tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
 }
 
 /* When the Guest tells us they updated the status field, we handle it. */
 static void update_device_status(struct device *dev)
 {
-	struct virtqueue *vq;
-
-	/* This is a reset. */
-	if (dev->desc->status == 0) {
-		verbose("Resetting device %s\n", dev->name);
-
-		/* Clear any features they've acked. */
-		memset(get_feature_bits(dev) + dev->desc->feature_len, 0,
-		       dev->desc->feature_len);
-
-		/* Zero out the virtqueues. */
-		for (vq = dev->vq; vq; vq = vq->next) {
-			memset(vq->vring.desc, 0,
-			       vring_size(vq->config.num, LGUEST_VRING_ALIGN));
-			lg_last_avail(vq) = 0;
-		}
-	} else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
+	/* A zero status is a reset, otherwise it's a set of flags. */
+	if (dev->desc->status == 0)
+		reset_device(dev);
+	else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
 		warnx("Device %s configuration FAILED", dev->name);
+		if (dev->running)
+			reset_device(dev);
 	} else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
-		unsigned int i;
-
-		verbose("Device %s OK: offered", dev->name);
-		for (i = 0; i < dev->desc->feature_len; i++)
-			verbose(" %02x", get_feature_bits(dev)[i]);
-		verbose(", accepted");
-		for (i = 0; i < dev->desc->feature_len; i++)
-			verbose(" %02x", get_feature_bits(dev)
-				[dev->desc->feature_len+i]);
-
-		if (dev->ready)
-			dev->ready(dev);
+		if (!dev->running)
+			start_device(dev);
 	}
 }
 
 /* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */
-static void handle_output(int fd, unsigned long addr)
+static void handle_output(unsigned long addr)
 {
 	struct device *i;
-	struct virtqueue *vq;
 
-	/* Check each device and virtqueue. */
+	/* Check each device. */
 	for (i = devices.dev; i; i = i->next) {
+		struct virtqueue *vq;
+
 		/* Notifications to device descriptors update device status. */
 		if (from_guest_phys(addr) == i->desc) {
 			update_device_status(i);
 			return;
 		}
 
-		/* Notifications to virtqueues mean output has occurred. */
+		/* Devices *can* be used before status is set to DRIVER_OK. */
 		for (vq = i->vq; vq; vq = vq->next) {
-			if (vq->config.pfn != addr/getpagesize())
+			if (addr != vq->config.pfn*getpagesize())
 				continue;
-
-			/* Guest should acknowledge (and set features!)  before
-			 * using the device. */
-			if (i->desc->status == 0) {
-				warnx("%s gave early output", i->name);
-				return;
-			}
-
-			if (strcmp(vq->dev->name, "console") != 0)
-				verbose("Output to %s\n", vq->dev->name);
-			if (vq->handle_output)
-				vq->handle_output(fd, vq, false);
+			if (i->running)
+				errx(1, "Notification on running %s", i->name);
+			start_device(i);
 			return;
 		}
 	}
@@ -1049,71 +990,6 @@
 	      strnlen(from_guest_phys(addr), guest_limit - addr));
 }
 
-static void handle_timeout(int fd)
-{
-	char buf[32];
-	struct device *i;
-	struct virtqueue *vq;
-
-	/* Clear the pipe */
-	read(timeoutpipe[0], buf, sizeof(buf));
-
-	/* Check each device and virtqueue: flush blocked ones. */
-	for (i = devices.dev; i; i = i->next) {
-		for (vq = i->vq; vq; vq = vq->next) {
-			if (!vq->blocked)
-				continue;
-
-			vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
-			vq->blocked = false;
-			if (vq->handle_output)
-				vq->handle_output(fd, vq, true);
-		}
-	}
-}
-
-/* This is called when the Waker wakes us up: check for incoming file
- * descriptors. */
-static void handle_input(int fd)
-{
-	/* select() wants a zeroed timeval to mean "don't wait". */
-	struct timeval poll = { .tv_sec = 0, .tv_usec = 0 };
-
-	for (;;) {
-		struct device *i;
-		fd_set fds = devices.infds;
-		int num;
-
-		num = select(devices.max_infd+1, &fds, NULL, NULL, &poll);
-		/* Could get interrupted */
-		if (num < 0)
-			continue;
-		/* If nothing is ready, we're done. */
-		if (num == 0)
-			break;
-
-		/* Otherwise, call the device(s) which have readable file
-		 * descriptors and a method of handling them.  */
-		for (i = devices.dev; i; i = i->next) {
-			if (i->handle_input && FD_ISSET(i->fd, &fds)) {
-				if (i->handle_input(fd, i))
-					continue;
-
-				/* If handle_input() returns false, it means we
-				 * should no longer service it.  Networking and
-				 * console do this when there's no input
-				 * buffers to deliver into.  Console also uses
-				 * it when it discovers that stdin is closed. */
-				FD_CLR(i->fd, &devices.infds);
-			}
-		}
-
-		/* Is this the timeout fd? */
-		if (FD_ISSET(timeoutpipe[0], &fds))
-			handle_timeout(fd);
-	}
-}
-
 /*L:190
  * Device Setup
  *
@@ -1129,8 +1005,8 @@
 static u8 *device_config(const struct device *dev)
 {
 	return (void *)(dev->desc + 1)
-		+ dev->desc->num_vq * sizeof(struct lguest_vqconfig)
-		+ dev->desc->feature_len * 2;
+		+ dev->num_vq * sizeof(struct lguest_vqconfig)
+		+ dev->feature_len * 2;
 }
 
 /* This routine allocates a new "struct lguest_device_desc" from descriptor
@@ -1159,7 +1035,7 @@
 /* Each device descriptor is followed by the description of its virtqueues.  We
  * specify how many descriptors the virtqueue is to have. */
 static void add_virtqueue(struct device *dev, unsigned int num_descs,
-			  void (*handle_output)(int, struct virtqueue *, bool))
+			  void (*service)(struct virtqueue *))
 {
 	unsigned int pages;
 	struct virtqueue **i, *vq = malloc(sizeof(*vq));
@@ -1174,8 +1050,8 @@
 	vq->next = NULL;
 	vq->last_avail_idx = 0;
 	vq->dev = dev;
-	vq->inflight = 0;
-	vq->blocked = false;
+	vq->service = service;
+	vq->thread = (pid_t)-1;
 
 	/* Initialize the configuration. */
 	vq->config.num = num_descs;
@@ -1191,6 +1067,7 @@
 	 * yet, otherwise we'd be overwriting them. */
 	assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0);
 	memcpy(device_config(dev), &vq->config, sizeof(vq->config));
+	dev->num_vq++;
 	dev->desc->num_vq++;
 
 	verbose("Virtqueue page %#lx\n", to_guest_phys(p));
@@ -1199,15 +1076,6 @@
 	 * second.  */
 	for (i = &dev->vq; *i; i = &(*i)->next);
 	*i = vq;
-
-	/* Set the routine to call when the Guest does something to this
-	 * virtqueue. */
-	vq->handle_output = handle_output;
-
-	/* As an optimization, set the advisory "Don't Notify Me" flag if we
-	 * don't have a handler */
-	if (!handle_output)
-		vq->vring.used->flags = VRING_USED_F_NO_NOTIFY;
 }
 
 /* The first half of the feature bitmask is for us to advertise features.  The
@@ -1219,7 +1087,7 @@
 	/* We can't extend the feature bits once we've added config bytes */
 	if (dev->desc->feature_len <= bit / CHAR_BIT) {
 		assert(dev->desc->config_len == 0);
-		dev->desc->feature_len = (bit / CHAR_BIT) + 1;
+		dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1;
 	}
 
 	features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT));
@@ -1243,22 +1111,17 @@
  * calling new_dev_desc() to allocate the descriptor and device memory.
  *
  * See what I mean about userspace being boring? */
-static struct device *new_device(const char *name, u16 type, int fd,
-				 bool (*handle_input)(int, struct device *))
+static struct device *new_device(const char *name, u16 type)
 {
 	struct device *dev = malloc(sizeof(*dev));
 
 	/* Now we populate the fields one at a time. */
-	dev->fd = fd;
-	/* If we have an input handler for this file descriptor, then we add it
-	 * to the device_list's fdset and maxfd. */
-	if (handle_input)
-		add_device_fd(dev->fd);
 	dev->desc = new_dev_desc(type);
-	dev->handle_input = handle_input;
 	dev->name = name;
 	dev->vq = NULL;
-	dev->ready = NULL;
+	dev->feature_len = 0;
+	dev->num_vq = 0;
+	dev->running = false;
 
 	/* Append to device list.  Prepending to a single-linked list is
 	 * easier, but the user expects the devices to be arranged on the bus
@@ -1286,13 +1149,10 @@
 		 * raw input stream to the Guest. */
 		term.c_lflag &= ~(ISIG|ICANON|ECHO);
 		tcsetattr(STDIN_FILENO, TCSANOW, &term);
-		/* If we exit gracefully, the original settings will be
-		 * restored so the user can see what they're typing. */
-		atexit(restore_term);
 	}
 
-	dev = new_device("console", VIRTIO_ID_CONSOLE,
-			 STDIN_FILENO, handle_console_input);
+	dev = new_device("console", VIRTIO_ID_CONSOLE);
+
 	/* We store the console state in dev->priv, and initialize it. */
 	dev->priv = malloc(sizeof(struct console_abort));
 	((struct console_abort *)dev->priv)->count = 0;
@@ -1301,31 +1161,13 @@
 	 * they put something the input queue, we make sure we're listening to
 	 * stdin.  When they put something in the output queue, we write it to
 	 * stdout. */
-	add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd);
-	add_virtqueue(dev, VIRTQUEUE_NUM, handle_console_output);
+	add_virtqueue(dev, VIRTQUEUE_NUM, console_input);
+	add_virtqueue(dev, VIRTQUEUE_NUM, console_output);
 
-	verbose("device %u: console\n", devices.device_num++);
+	verbose("device %u: console\n", ++devices.device_num);
 }
 /*:*/
 
-static void timeout_alarm(int sig)
-{
-	write(timeoutpipe[1], "", 1);
-}
-
-static void setup_timeout(void)
-{
-	if (pipe(timeoutpipe) != 0)
-		err(1, "Creating timeout pipe");
-
-	if (fcntl(timeoutpipe[1], F_SETFL,
-		  fcntl(timeoutpipe[1], F_GETFL) | O_NONBLOCK) != 0)
-		err(1, "Making timeout pipe nonblocking");
-
-	add_device_fd(timeoutpipe[0]);
-	signal(SIGALRM, timeout_alarm);
-}
-
 /*M:010 Inter-guest networking is an interesting area.  Simplest is to have a
  * --sharenet=<name> option which opens or creates a named pipe.  This can be
  * used to send packets to another guest in a 1:1 manner.
@@ -1447,21 +1289,23 @@
 static void setup_tun_net(char *arg)
 {
 	struct device *dev;
-	int netfd, ipfd;
+	struct net_info *net_info = malloc(sizeof(*net_info));
+	int ipfd;
 	u32 ip = INADDR_ANY;
 	bool bridging = false;
 	char tapif[IFNAMSIZ], *p;
 	struct virtio_net_config conf;
 
-	netfd = get_tun_device(tapif);
+	net_info->tunfd = get_tun_device(tapif);
 
 	/* First we create a new network device. */
-	dev = new_device("net", VIRTIO_ID_NET, netfd, handle_tun_input);
+	dev = new_device("net", VIRTIO_ID_NET);
+	dev->priv = net_info;
 
 	/* Network devices need a receive and a send queue, just like
 	 * console. */
-	add_virtqueue(dev, VIRTQUEUE_NUM, net_enable_fd);
-	add_virtqueue(dev, VIRTQUEUE_NUM, handle_net_output);
+	add_virtqueue(dev, VIRTQUEUE_NUM, net_input);
+	add_virtqueue(dev, VIRTQUEUE_NUM, net_output);
 
 	/* We need a socket to perform the magic network ioctls to bring up the
 	 * tap interface, connect to the bridge etc.  Any socket will do! */
@@ -1502,6 +1346,8 @@
 	add_feature(dev, VIRTIO_NET_F_HOST_TSO4);
 	add_feature(dev, VIRTIO_NET_F_HOST_TSO6);
 	add_feature(dev, VIRTIO_NET_F_HOST_ECN);
+	/* We handle indirect ring entries */
+	add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC);
 	set_config(dev, sizeof(conf), &conf);
 
 	/* We don't need the socket any more; setup is done. */
@@ -1550,20 +1396,18 @@
  * Remember that the block device is handled by a separate I/O thread.  We head
  * straight into the core of that thread here:
  */
-static bool service_io(struct device *dev)
+static void blk_request(struct virtqueue *vq)
 {
-	struct vblk_info *vblk = dev->priv;
+	struct vblk_info *vblk = vq->dev->priv;
 	unsigned int head, out_num, in_num, wlen;
 	int ret;
 	u8 *in;
 	struct virtio_blk_outhdr *out;
-	struct iovec iov[dev->vq->vring.num];
+	struct iovec iov[vq->vring.num];
 	off64_t off;
 
-	/* See if there's a request waiting.  If not, nothing to do. */
-	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
-	if (head == dev->vq->vring.num)
-		return false;
+	/* Get the next request. */
+	head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
 
 	/* Every block request should contain at least one output buffer
 	 * (detailing the location on disk and the type of request) and one
@@ -1637,83 +1481,21 @@
 	if (out->type & VIRTIO_BLK_T_BARRIER)
 		fdatasync(vblk->fd);
 
-	/* We can't trigger an IRQ, because we're not the Launcher.  It does
-	 * that when we tell it we're done. */
-	add_used(dev->vq, head, wlen);
-	return true;
-}
-
-/* This is the thread which actually services the I/O. */
-static int io_thread(void *_dev)
-{
-	struct device *dev = _dev;
-	struct vblk_info *vblk = dev->priv;
-	char c;
-
-	/* Close other side of workpipe so we get 0 read when main dies. */
-	close(vblk->workpipe[1]);
-	/* Close the other side of the done_fd pipe. */
-	close(dev->fd);
-
-	/* When this read fails, it means Launcher died, so we follow. */
-	while (read(vblk->workpipe[0], &c, 1) == 1) {
-		/* We acknowledge each request immediately to reduce latency,
-		 * rather than waiting until we've done them all.  I haven't
-		 * measured to see if it makes any difference.
-		 *
-		 * That would be an interesting test, wouldn't it?  You could
-		 * also try having more than one I/O thread. */
-		while (service_io(dev))
-			write(vblk->done_fd, &c, 1);
-	}
-	return 0;
-}
-
-/* Now we've seen the I/O thread, we return to the Launcher to see what happens
- * when that thread tells us it's completed some I/O. */
-static bool handle_io_finish(int fd, struct device *dev)
-{
-	char c;
-
-	/* If the I/O thread died, presumably it printed the error, so we
-	 * simply exit. */
-	if (read(dev->fd, &c, 1) != 1)
-		exit(1);
-
-	/* It did some work, so trigger the irq. */
-	trigger_irq(fd, dev->vq);
-	return true;
-}
-
-/* When the Guest submits some I/O, we just need to wake the I/O thread. */
-static void handle_virtblk_output(int fd, struct virtqueue *vq, bool timeout)
-{
-	struct vblk_info *vblk = vq->dev->priv;
-	char c = 0;
-
-	/* Wake up I/O thread and tell it to go to work! */
-	if (write(vblk->workpipe[1], &c, 1) != 1)
-		/* Presumably it indicated why it died. */
-		exit(1);
+	add_used(vq, head, wlen);
 }
 
 /*L:198 This actually sets up a virtual block device. */
 static void setup_block_file(const char *filename)
 {
-	int p[2];
 	struct device *dev;
 	struct vblk_info *vblk;
-	void *stack;
 	struct virtio_blk_config conf;
 
-	/* This is the pipe the I/O thread will use to tell us I/O is done. */
-	pipe(p);
-
 	/* The device responds to return from I/O thread. */
-	dev = new_device("block", VIRTIO_ID_BLOCK, p[0], handle_io_finish);
+	dev = new_device("block", VIRTIO_ID_BLOCK);
 
 	/* The device has one virtqueue, where the Guest places requests. */
-	add_virtqueue(dev, VIRTQUEUE_NUM, handle_virtblk_output);
+	add_virtqueue(dev, VIRTQUEUE_NUM, blk_request);
 
 	/* Allocate the room for our own bookkeeping */
 	vblk = dev->priv = malloc(sizeof(*vblk));
@@ -1735,49 +1517,29 @@
 
 	set_config(dev, sizeof(conf), &conf);
 
-	/* The I/O thread writes to this end of the pipe when done. */
-	vblk->done_fd = p[1];
-
-	/* This is the second pipe, which is how we tell the I/O thread about
-	 * more work. */
-	pipe(vblk->workpipe);
-
-	/* Create stack for thread and run it.  Since stack grows upwards, we
-	 * point the stack pointer to the end of this region. */
-	stack = malloc(32768);
-	/* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from
-	 * becoming a zombie. */
-	if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1)
-		err(1, "Creating clone");
-
-	/* We don't need to keep the I/O thread's end of the pipes open. */
-	close(vblk->done_fd);
-	close(vblk->workpipe[0]);
-
 	verbose("device %u: virtblock %llu sectors\n",
-		devices.device_num, le64_to_cpu(conf.capacity));
+		++devices.device_num, le64_to_cpu(conf.capacity));
 }
 
+struct rng_info {
+	int rfd;
+};
+
 /* Our random number generator device reads from /dev/random into the Guest's
  * input buffers.  The usual case is that the Guest doesn't want random numbers
  * and so has no buffers although /dev/random is still readable, whereas
  * console is the reverse.
  *
  * The same logic applies, however. */
-static bool handle_rng_input(int fd, struct device *dev)
+static void rng_input(struct virtqueue *vq)
 {
 	int len;
 	unsigned int head, in_num, out_num, totlen = 0;
-	struct iovec iov[dev->vq->vring.num];
+	struct rng_info *rng_info = vq->dev->priv;
+	struct iovec iov[vq->vring.num];
 
 	/* First we need a buffer from the Guests's virtqueue. */
-	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
-
-	/* If they're not ready for input, stop listening to this file
-	 * descriptor.  We'll start again once they add an input buffer. */
-	if (head == dev->vq->vring.num)
-		return false;
-
+	head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
 	if (out_num)
 		errx(1, "Output buffers in rng?");
 
@@ -1785,7 +1547,7 @@
 	 * it reads straight into the Guest's buffer.  We loop to make sure we
 	 * fill it. */
 	while (!iov_empty(iov, in_num)) {
-		len = readv(dev->fd, iov, in_num);
+		len = readv(rng_info->rfd, iov, in_num);
 		if (len <= 0)
 			err(1, "Read from /dev/random gave %i", len);
 		iov_consume(iov, in_num, len);
@@ -1793,25 +1555,23 @@
 	}
 
 	/* Tell the Guest about the new input. */
-	add_used_and_trigger(fd, dev->vq, head, totlen);
-
-	/* Everything went OK! */
-	return true;
+	add_used(vq, head, totlen);
 }
 
 /* And this creates a "hardware" random number device for the Guest. */
 static void setup_rng(void)
 {
 	struct device *dev;
-	int fd;
+	struct rng_info *rng_info = malloc(sizeof(*rng_info));
 
-	fd = open_or_die("/dev/random", O_RDONLY);
+	rng_info->rfd = open_or_die("/dev/random", O_RDONLY);
 
 	/* The device responds to return from I/O thread. */
-	dev = new_device("rng", VIRTIO_ID_RNG, fd, handle_rng_input);
+	dev = new_device("rng", VIRTIO_ID_RNG);
+	dev->priv = rng_info;
 
 	/* The device has one virtqueue, where the Guest places inbufs. */
-	add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd);
+	add_virtqueue(dev, VIRTQUEUE_NUM, rng_input);
 
 	verbose("device %u: rng\n", devices.device_num++);
 }
@@ -1827,17 +1587,18 @@
 	for (i = 3; i < FD_SETSIZE; i++)
 		close(i);
 
-	/* The exec automatically gets rid of the I/O and Waker threads. */
+	/* Reset all the devices (kills all threads). */
+	cleanup_devices();
+
 	execv(main_args[0], main_args);
 	err(1, "Could not exec %s", main_args[0]);
 }
 
 /*L:220 Finally we reach the core of the Launcher which runs the Guest, serves
  * its input and output, and finally, lays it to rest. */
-static void __attribute__((noreturn)) run_guest(int lguest_fd)
+static void __attribute__((noreturn)) run_guest(void)
 {
 	for (;;) {
-		unsigned long args[] = { LHREQ_BREAK, 0 };
 		unsigned long notify_addr;
 		int readval;
 
@@ -1848,8 +1609,7 @@
 		/* One unsigned long means the Guest did HCALL_NOTIFY */
 		if (readval == sizeof(notify_addr)) {
 			verbose("Notify on address %#lx\n", notify_addr);
-			handle_output(lguest_fd, notify_addr);
-			continue;
+			handle_output(notify_addr);
 		/* ENOENT means the Guest died.  Reading tells us why. */
 		} else if (errno == ENOENT) {
 			char reason[1024] = { 0 };
@@ -1858,19 +1618,9 @@
 		/* ERESTART means that we need to reboot the guest */
 		} else if (errno == ERESTART) {
 			restart_guest();
-		/* EAGAIN means a signal (timeout).
-		 * Anything else means a bug or incompatible change. */
-		} else if (errno != EAGAIN)
+		/* Anything else means a bug or incompatible change. */
+		} else
 			err(1, "Running guest failed");
-
-		/* Only service input on thread for CPU 0. */
-		if (cpu_id != 0)
-			continue;
-
-		/* Service input, then unset the BREAK to release the Waker. */
-		handle_input(lguest_fd);
-		if (pwrite(lguest_fd, args, sizeof(args), cpu_id) < 0)
-			err(1, "Resetting break");
 	}
 }
 /*L:240
@@ -1904,8 +1654,8 @@
 	/* Memory, top-level pagetable, code startpoint and size of the
 	 * (optional) initrd. */
 	unsigned long mem = 0, start, initrd_size = 0;
-	/* Two temporaries and the /dev/lguest file descriptor. */
-	int i, c, lguest_fd;
+	/* Two temporaries. */
+	int i, c;
 	/* The boot information for the Guest. */
 	struct boot_params *boot;
 	/* If they specify an initrd file to load. */
@@ -1913,18 +1663,10 @@
 
 	/* Save the args: we "reboot" by execing ourselves again. */
 	main_args = argv;
-	/* We don't "wait" for the children, so prevent them from becoming
-	 * zombies. */
-	signal(SIGCHLD, SIG_IGN);
 
-	/* First we initialize the device list.  Since console and network
-	 * device receive input from a file descriptor, we keep an fdset
-	 * (infds) and the maximum fd number (max_infd) with the head of the
-	 * list.  We also keep a pointer to the last device.  Finally, we keep
-	 * the next interrupt number to use for devices (1: remember that 0 is
-	 * used by the timer). */
-	FD_ZERO(&devices.infds);
-	devices.max_infd = -1;
+	/* First we initialize the device list.  We keep a pointer to the last
+	 * device, and the next interrupt number to use for devices (1:
+	 * remember that 0 is used by the timer). */
 	devices.lastdev = NULL;
 	devices.next_irq = 1;
 
@@ -1982,9 +1724,6 @@
 	/* We always have a console device */
 	setup_console();
 
-	/* We can timeout waiting for Guest network transmit. */
-	setup_timeout();
-
 	/* Now we load the kernel */
 	start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));
 
@@ -2023,15 +1762,16 @@
 
 	/* We tell the kernel to initialize the Guest: this returns the open
 	 * /dev/lguest file descriptor. */
-	lguest_fd = tell_kernel(start);
+	tell_kernel(start);
 
-	/* We clone off a thread, which wakes the Launcher whenever one of the
-	 * input file descriptors needs attention.  We call this the Waker, and
-	 * we'll cover it in a moment. */
-	setup_waker(lguest_fd);
+	/* Ensure that we terminate if a child dies. */
+	signal(SIGCHLD, kill_launcher);
+
+	/* If we exit via err(), this kills all the threads, restores tty. */
+	atexit(cleanup_devices);
 
 	/* Finally, run the Guest.  This doesn't return. */
-	run_guest(lguest_fd);
+	run_guest();
 }
 /*:*/
 
diff --git a/Documentation/lguest/lguest.txt b/Documentation/lguest/lguest.txt
index 28c7473..efb3a6a 100644
--- a/Documentation/lguest/lguest.txt
+++ b/Documentation/lguest/lguest.txt
@@ -37,7 +37,6 @@
      "Paravirtualized guest support" = Y
         "Lguest guest support" = Y
      "High Memory Support" = off/4GB
-     "PAE (Physical Address Extension) Support" = N
      "Alignment value to which kernel should be aligned" = 0x100000
         (CONFIG_PARAVIRT=y, CONFIG_LGUEST_GUEST=y, CONFIG_HIGHMEM64G=n and
          CONFIG_PHYSICAL_ALIGN=0x100000)
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index f5b7127..7f5809e 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -31,6 +31,7 @@
 
      - Locking functions.
      - Interrupt disabling functions.
+     - Sleep and wake-up functions.
      - Miscellaneous functions.
 
  (*) Inter-CPU locking barrier effects.
@@ -1217,6 +1218,132 @@
 other means.
 
 
+SLEEP AND WAKE-UP FUNCTIONS
+---------------------------
+
+Sleeping and waking on an event flagged in global data can be viewed as an
+interaction between two pieces of data: the task state of the task waiting for
+the event and the global data used to indicate the event.  To make sure that
+these appear to happen in the right order, the primitives to begin the process
+of going to sleep, and the primitives to initiate a wake up imply certain
+barriers.
+
+Firstly, the sleeper normally follows something like this sequence of events:
+
+	for (;;) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (event_indicated)
+			break;
+		schedule();
+	}
+
+A general memory barrier is interpolated automatically by set_current_state()
+after it has altered the task state:
+
+	CPU 1
+	===============================
+	set_current_state();
+	  set_mb();
+	    STORE current->state
+	    <general barrier>
+	LOAD event_indicated
+
+set_current_state() may be wrapped by:
+
+	prepare_to_wait();
+	prepare_to_wait_exclusive();
+
+which therefore also imply a general memory barrier after setting the state.
+The whole sequence above is available in various canned forms, all of which
+interpolate the memory barrier in the right place:
+
+	wait_event();
+	wait_event_interruptible();
+	wait_event_interruptible_exclusive();
+	wait_event_interruptible_timeout();
+	wait_event_killable();
+	wait_event_timeout();
+	wait_on_bit();
+	wait_on_bit_lock();
+
+
+Secondly, code that performs a wake up normally follows something like this:
+
+	event_indicated = 1;
+	wake_up(&event_wait_queue);
+
+or:
+
+	event_indicated = 1;
+	wake_up_process(event_daemon);
+
+A write memory barrier is implied by wake_up() and co. if and only if they wake
+something up.  The barrier occurs before the task state is cleared, and so sits
+between the STORE to indicate the event and the STORE to set TASK_RUNNING:
+
+	CPU 1				CPU 2
+	===============================	===============================
+	set_current_state();		STORE event_indicated
+	  set_mb();			wake_up();
+	    STORE current->state	  <write barrier>
+	    <general barrier>		  STORE current->state
+	LOAD event_indicated
+
+The available waker functions include:
+
+	complete();
+	wake_up();
+	wake_up_all();
+	wake_up_bit();
+	wake_up_interruptible();
+	wake_up_interruptible_all();
+	wake_up_interruptible_nr();
+	wake_up_interruptible_poll();
+	wake_up_interruptible_sync();
+	wake_up_interruptible_sync_poll();
+	wake_up_locked();
+	wake_up_locked_poll();
+	wake_up_nr();
+	wake_up_poll();
+	wake_up_process();
+
+
+[!] Note that the memory barriers implied by the sleeper and the waker do _not_
+order multiple stores before the wake-up with respect to loads of those stored
+values after the sleeper has called set_current_state().  For instance, if the
+sleeper does:
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (event_indicated)
+		break;
+	__set_current_state(TASK_RUNNING);
+	do_something(my_data);
+
+and the waker does:
+
+	my_data = value;
+	event_indicated = 1;
+	wake_up(&event_wait_queue);
+
+there's no guarantee that the change to event_indicated will be perceived by
+the sleeper as coming after the change to my_data.  In such a circumstance, the
+code on both sides must interpolate its own memory barriers between the
+separate data accesses.  Thus the above sleeper ought to do:
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (event_indicated) {
+		smp_rmb();
+		do_something(my_data);
+	}
+
+and the waker should do:
+
+	my_data = value;
+	smp_wmb();
+	event_indicated = 1;
+	wake_up(&event_wait_queue);
+
+
 MISCELLANEOUS FUNCTIONS
 -----------------------
 
@@ -1366,7 +1493,7 @@
 
 Under normal operation, memory operation reordering is generally not going to
 be a problem as a single-threaded linear piece of code will still appear to
-work correctly, even if it's in an SMP kernel.  There are, however, three
+work correctly, even if it's in an SMP kernel.  There are, however, four
 circumstances in which reordering definitely _could_ be a problem:
 
  (*) Interprocessor interaction.
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 421e7d0..c9abbd8 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -75,9 +75,6 @@
 struct bus_type {
 	...
 	int  (*suspend)(struct device *dev, pm_message_t state);
-	int  (*suspend_late)(struct device *dev, pm_message_t state);
-
-	int  (*resume_early)(struct device *dev);
 	int  (*resume)(struct device *dev);
 };
 
@@ -226,20 +223,7 @@
 
 	This call should handle parts of device suspend logic that require
 	sleeping.  It probably does work to quiesce the device which hasn't
-	been abstracted into class.suspend() or bus.suspend_late().
-
-   3	bus.suspend_late(dev, message) is called with IRQs disabled, and
-	with only one CPU active.  Until the bus.resume_early() phase
-	completes (see later), IRQs are not enabled again.  This method
-	won't be exposed by all busses; for message based busses like USB,
-	I2C, or SPI, device interactions normally require IRQs.  This bus
-	call may be morphed into a driver call with bus-specific parameters.
-
-	This call might save low level hardware state that might otherwise
-	be lost in the upcoming low power state, and actually put the
-	device into a low power state ... so that in some cases the device
-	may stay partly usable until this late.  This "late" call may also
-	help when coping with hardware that behaves badly.
+	been abstracted into class.suspend().
 
 The pm_message_t parameter is currently used to refine those semantics
 (described later).
@@ -351,19 +335,11 @@
 
 The phases are seen by driver notifications issued in this order:
 
-   1	bus.resume_early(dev) is called with IRQs disabled, and with
-   	only one CPU active.  As with bus.suspend_late(), this method
-	won't be supported on busses that require IRQs in order to
-	interact with devices.
+   1	bus.resume(dev) reverses the effects of bus.suspend().  This may
+	be morphed into a device driver call with bus-specific parameters;
+	implementations may sleep.
 
-	This reverses the effects of bus.suspend_late().
-
-   2	bus.resume(dev) is called next.  This may be morphed into a device
-   	driver call with bus-specific parameters; implementations may sleep.
-
-	This reverses the effects of bus.suspend().
-
-   3	class.resume(dev) is called for devices associated with a class
+   2	class.resume(dev) is called for devices associated with a class
 	that has such a method.  Implementations may sleep.
 
 	This reverses the effects of class.suspend(), and would usually
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt
index 5ba4d3f..1df7f9c 100644
--- a/Documentation/scheduler/sched-rt-group.txt
+++ b/Documentation/scheduler/sched-rt-group.txt
@@ -4,6 +4,7 @@
 CONTENTS
 ========
 
+0. WARNING
 1. Overview
   1.1 The problem
   1.2 The solution
@@ -14,6 +15,23 @@
 3. Future plans
 
 
+0. WARNING
+==========
+
+ Fiddling with these settings can result in an unstable system, the knobs are
+ root only and assumes root knows what he is doing.
+
+Most notable:
+
+ * very small values in sched_rt_period_us can result in an unstable
+   system when the period is smaller than either the available hrtimer
+   resolution, or the time it takes to handle the budget refresh itself.
+
+ * very small values in sched_rt_runtime_us can result in an unstable
+   system when the runtime is so small the system has difficulty making
+   forward progress (NOTE: the migration thread and kstopmachine both
+   are real-time processes).
+
 1. Overview
 ===========
 
@@ -169,7 +187,7 @@
 
 Implementing SCHED_EDF might take a while to complete. Priority Inheritance is
 the biggest challenge as the current linux PI infrastructure is geared towards
-the limited static priority levels 0-139. With deadline scheduling you need to
+the limited static priority levels 0-99. With deadline scheduling you need to
 do deadline inheritance (since priority is inversely proportional to the
 deadline delta (deadline - now).
 
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 012858d..5c08d96 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -460,6 +460,25 @@
 
     The power-management is supported.
 
+  Module snd-ctxfi
+  ----------------
+
+    Module for Creative Sound Blaster X-Fi boards (20k1 / 20k2 chips)
+	* Creative Sound Blaster X-Fi Titanium Fatal1ty Champion Series
+	* Creative Sound Blaster X-Fi Titanium Fatal1ty Professional Series
+	* Creative Sound Blaster X-Fi Titanium Professional Audio
+	* Creative Sound Blaster X-Fi Titanium
+	* Creative Sound Blaster X-Fi Elite Pro
+	* Creative Sound Blaster X-Fi Platinum
+	* Creative Sound Blaster X-Fi Fatal1ty
+	* Creative Sound Blaster X-Fi XtremeGamer
+	* Creative Sound Blaster X-Fi XtremeMusic
+
+    reference_rate	- reference sample rate, 44100 or 48000 (default)
+    multiple		- multiple to ref. sample rate, 1 or 2 (default)
+
+    This module supports multiple cards.
+
   Module snd-darla20
   ------------------
 
@@ -925,6 +944,7 @@
 			* Onkyo SE-90PCI
 			* Onkyo SE-200PCI
 			* ESI Juli@
+			* ESI Maya44
 			* Hercules Fortissimo IV
 			* EGO-SYS WaveTerminal 192M
 
@@ -933,7 +953,7 @@
 		  prodigy71xt, prodigy71hifi, prodigyhd2, prodigy192,
 		  juli, aureon51, aureon71, universe, ap192, k8x800,
 		  phase22, phase28, ms300, av710, se200pci, se90pci,
-		  fortissimo4, sn25p, WT192M
+		  fortissimo4, sn25p, WT192M, maya44
 
     This module supports multiple cards and autoprobe.
 
@@ -1093,6 +1113,13 @@
     This module supports multiple cards.
     The driver requires the firmware loader support on kernel.
 
+  Module snd-lx6464es
+  -------------------
+
+    Module for Digigram LX6464ES boards
+
+    This module supports multiple cards.
+
   Module snd-maestro3
   -------------------
 
@@ -1543,13 +1570,15 @@
   Module snd-sc6000
   -----------------
 
-    Module for Gallant SC-6000 soundcard.
+    Module for Gallant SC-6000 soundcard and later models: SC-6600
+    and SC-7000.
 
     port	- Port # (0x220 or 0x240)
     mss_port	- MSS Port # (0x530 or 0xe80)
     irq		- IRQ # (5,7,9,10,11)
     mpu_irq	- MPU-401 IRQ # (5,7,9,10) ,0 - no MPU-401 irq
     dma		- DMA # (1,3,0)
+    joystick	- Enable gameport - 0 = disable (default), 1 = enable
 
     This module supports multiple cards.
 
@@ -1859,7 +1888,8 @@
   -------------------
 
     Module for sound cards based on the Asus AV100/AV200 chips,
-    i.e., Xonar D1, DX, D2, D2X, HDAV1.3 (Deluxe), and Essence STX.
+    i.e., Xonar D1, DX, D2, D2X, HDAV1.3 (Deluxe), Essence ST
+    (Deluxe) and Essence STX.
 
     This module supports autoprobe and multiple cards.
 
diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 322869f..de8e10a 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -36,6 +36,7 @@
   acer		Acer TravelMate
   will		Will laptops (PB V7900)
   replacer	Replacer 672V
+  favorit100	Maxdata Favorit 100XS
   basic		fixed pin assignment (old default model)
   test		for testing/debugging purpose, almost all controls can
 		adjusted.  Appearing only when compiled with
@@ -85,10 +86,11 @@
   eeepc-p703	ASUS Eeepc P703 P900A
   eeepc-p901	ASUS Eeepc P901 S101
   fujitsu	FSC Amilo
+  lifebook	Fujitsu Lifebook S6420
   auto		auto-config reading BIOS (default)
 
-ALC662/663
-==========
+ALC662/663/272
+==============
   3stack-dig	3-stack (2-channel) with SPDIF
   3stack-6ch	 3-stack (6-channel)
   3stack-6ch-dig 3-stack (6-channel) with SPDIF
@@ -107,6 +109,9 @@
   asus-mode4	ASUS
   asus-mode5	ASUS
   asus-mode6	ASUS
+  dell		Dell with ALC272
+  dell-zm1	Dell ZM1 with ALC272
+  samsung-nc10	Samsung NC10 mini notebook
   auto		auto-config reading BIOS (default)
 
 ALC882/885
@@ -118,6 +123,7 @@
   asus-a7j	ASUS A7J
   asus-a7m	ASUS A7M
   macpro	MacPro support
+  mb5		Macbook 5,1
   mbp3		Macbook Pro rev3
   imac24	iMac 24'' with jack detection
   w2jc		ASUS W2JC
@@ -133,10 +139,12 @@
   acer		Acer laptops (Travelmate 3012WTMi, Aspire 5600, etc)
   acer-aspire	Acer Aspire 9810
   acer-aspire-4930g Acer Aspire 4930G
+  acer-aspire-8930g Acer Aspire 8930G
   medion	Medion Laptops
   medion-md2	Medion MD2
   targa-dig	Targa/MSI
-  targa-2ch-dig	Targs/MSI with 2-channel
+  targa-2ch-dig	Targa/MSI with 2-channel
+  targa-8ch-dig Targa/MSI with 8-channel (MSI GX620)
   laptop-eapd   3-jack with SPDIF I/O and EAPD (Clevo M540JE, M550JE)
   lenovo-101e	Lenovo 101E
   lenovo-nb0763	Lenovo NB0763
@@ -150,6 +158,9 @@
   fujitsu-pi2515 Fujitsu AMILO Pi2515
   fujitsu-xa3530 Fujitsu AMILO XA3530
   3stack-6ch-intel Intel DG33* boards
+  asus-p5q	ASUS P5Q-EM boards
+  mb31		MacBook 3,1
+  sony-vaio-tt  Sony VAIO TT
   auto		auto-config reading BIOS (default)
 
 ALC861/660
@@ -348,6 +359,7 @@
   hp-m4		HP mini 1000
   hp-dv5	HP dv series
   hp-hdx	HP HDX series
+  hp-dv4-1222nr	HP dv4-1222nr (with LED support)
   auto		BIOS setup (default)
 
 STAC92HD73*
diff --git a/Documentation/sound/alsa/Procfile.txt b/Documentation/sound/alsa/Procfile.txt
index cfac20c..381908d 100644
--- a/Documentation/sound/alsa/Procfile.txt
+++ b/Documentation/sound/alsa/Procfile.txt
@@ -88,26 +88,34 @@
 	substreams, etc.
 
 card*/pcm*/xrun_debug
-	This file appears when CONFIG_SND_DEBUG=y.
-	This shows the status of xrun (= buffer overrun/xrun) debug of
-	ALSA PCM middle layer, as an integer from 0 to 2.  The value
-	can be changed by writing to this file, such as
+	This file appears when CONFIG_SND_DEBUG=y and
+	CONFIG_PCM_XRUN_DEBUG=y.
+	This shows the status of xrun (= buffer overrun/xrun) and
+	invalid PCM position debug/check of ALSA PCM middle layer.
+	It takes an integer value, can be changed by writing to this
+	file, such as
 
-		 # cat 2 > /proc/asound/card0/pcm0p/xrun_debug
+		 # cat 5 > /proc/asound/card0/pcm0p/xrun_debug
 
-	When this value is greater than 0, the driver will show the
-	messages to kernel log when an xrun is detected.  The debug
-	message is shown also when the invalid H/W pointer is detected
-	at the update of periods (usually called from the interrupt
+	The value consists of the following bit flags:
+	  bit 0 = Enable XRUN/jiffies debug messages
+	  bit 1 = Show stack trace at XRUN / jiffies check
+	  bit 2 = Enable additional jiffies check
+
+	When the bit 0 is set, the driver will show the messages to
+	kernel log when an xrun is detected.  The debug message is
+	shown also when the invalid H/W pointer is detected at the
+	update of periods (usually called from the interrupt
 	handler).
 
-	When this value is greater than 1, the driver will show the
-	stack trace additionally.  This may help the debugging.
+	When the bit 1 is set, the driver will show the stack trace
+	additionally.  This may help the debugging.
 
-	Since 2.6.30, this option also enables the hwptr check using
+	Since 2.6.30, this option can enable the hwptr check using
 	jiffies.  This detects spontaneous invalid pointer callback
 	values, but can be lead to too much corrections for a (mostly
 	buggy) hardware that doesn't give smooth pointer updates.
+	This feature is enabled via the bit 2.
 
 card*/pcm*/sub*/info
 	The general information of this PCM sub-stream.
diff --git a/Documentation/sound/alsa/README.maya44 b/Documentation/sound/alsa/README.maya44
new file mode 100644
index 0000000..0e41576
--- /dev/null
+++ b/Documentation/sound/alsa/README.maya44
@@ -0,0 +1,163 @@
+NOTE: The following is the original document of Rainer's patch that the
+current maya44 code based on.  Some contents might be obsoleted, but I
+keep here as reference -- tiwai
+
+----------------------------------------------------------------
+ 
+STATE OF DEVELOPMENT:
+
+This driver is being developed on the initiative of Piotr Makowski (oponek@gmail.com) and financed by Lars Bergmann.
+Development is carried out by Rainer Zimmermann (mail@lightshed.de).
+
+ESI provided a sample Maya44 card for the development work.
+
+However, unfortunately it has turned out difficult to get detailed programming information, so I (Rainer Zimmermann) had to find out some card-specific information by experiment and conjecture. Some information (in particular, several GPIO bits) is still missing.
+
+This is the first testing version of the Maya44 driver released to the alsa-devel mailing list (Feb 5, 2008).
+
+
+The following functions work, as tested by Rainer Zimmermann and Piotr Makowski:
+
+- playback and capture at all sampling rates
+- input/output level
+- crossmixing
+- line/mic switch
+- phantom power switch
+- analogue monitor a.k.a bypass
+
+
+The following functions *should* work, but are not fully tested:
+
+- Channel 3+4 analogue - S/PDIF input switching
+- S/PDIF output
+- all inputs/outputs on the M/IO/DIO extension card
+- internal/external clock selection
+
+
+*In particular, we would appreciate testing of these functions by anyone who has access to an M/IO/DIO extension card.*
+
+
+Things that do not seem to work:
+
+- The level meters ("multi track") in 'alsamixer' do not seem to react to signals in (if this is a bug, it would probably be in the existing ICE1724 code).
+
+- Ardour 2.1 seems to work only via JACK, not using ALSA directly or via OSS. This still needs to be tracked down.
+
+
+DRIVER DETAILS:
+
+the following files were added:
+
+pci/ice1724/maya44.c        - Maya44 specific code
+pci/ice1724/maya44.h
+pci/ice1724/ice1724.patch
+pci/ice1724/ice1724.h.patch - PROPOSED patch to ice1724.h (see SAMPLING RATES)
+i2c/other/wm8776.c  - low-level access routines for Wolfson WM8776 codecs 
+include/wm8776.h
+
+
+Note that the wm8776.c code is meant to be card-independent and does not actually register the codec with the ALSA infrastructure.
+This is done in maya44.c, mainly because some of the WM8776 controls are used in Maya44-specific ways, and should be named appropriately.
+
+
+the following files were created in pci/ice1724, simply #including the corresponding file from the alsa-kernel tree:
+
+wtm.h
+vt1720_mobo.h
+revo.h
+prodigy192.h
+pontis.h
+phase.h
+maya44.h
+juli.h
+aureon.h
+amp.h
+envy24ht.h
+se.h
+prodigy_hifi.h
+
+
+*I hope this is the correct way to do things.*
+
+
+SAMPLING RATES:
+
+The Maya44 card (or more exactly, the Wolfson WM8776 codecs) allow a maximum sampling rate of 192 kHz for playback and 92 kHz for capture.
+
+As the ICE1724 chip only allows one global sampling rate, this is handled as follows:
+
+* setting the sampling rate on any open PCM device on the maya44 card will always set the *global* sampling rate for all playback and capture channels.
+
+* In the current state of the driver, setting rates of up to 192 kHz is permitted even for capture devices.
+
+*AVOID CAPTURING AT RATES ABOVE 96kHz*, even though it may appear to work. The codec cannot actually capture at such rates, meaning poor quality.
+
+
+I propose some additional code for limiting the sampling rate when setting on a capture pcm device. However because of the global sampling rate, this logic would be somewhat problematic.
+
+The proposed code (currently deactivated) is in ice1712.h.patch, ice1724.c and maya44.c (in pci/ice1712).
+
+
+SOUND DEVICES:
+
+PCM devices correspond to inputs/outputs as follows (assuming Maya44 is card #0):
+
+hw:0,0 input - stereo, analog input 1+2
+hw:0,0 output - stereo, analog output 1+2
+hw:0,1 input - stereo, analog input 3+4 OR S/PDIF input
+hw:0,1 output - stereo, analog output 3+4 (and SPDIF out)
+
+
+NAMING OF MIXER CONTROLS:
+
+(for more information about the signal flow, please refer to the block diagram on p.24 of the ESI Maya44 manual, or in the ESI windows software).
+
+
+PCM: (digital) output level for channel 1+2
+PCM 1: same for channel 3+4
+
+Mic Phantom+48V: switch for +48V phantom power for electrostatic microphones on input 1/2.
+    Make sure this is not turned on while any other source is connected to input 1/2.
+    It might damage the source and/or the maya44 card.
+
+Mic/Line input: if switch is is on, input jack 1/2 is microphone input (mono), otherwise line input (stereo).
+
+Bypass: analogue bypass from ADC input to output for channel 1+2. Same as "Monitor" in the windows driver.
+Bypass 1: same for channel 3+4.
+
+Crossmix: cross-mixer from channels 1+2 to channels 3+4
+Crossmix 1: cross-mixer from channels 3+4 to channels 1+2
+
+IEC958 Output: switch for S/PDIF output.
+    This is not supported by the ESI windows driver.
+    S/PDIF should output the same signal as channel 3+4. [untested!]
+
+
+Digitial output selectors:
+
+    These switches allow a direct digital routing from the ADCs to the DACs.
+    Each switch determines where the digital input data to one of the DACs comes from.
+    They are not supported by the ESI windows driver.
+    For normal operation, they should all be set to "PCM out".
+
+H/W: Output source channel 1
+H/W 1: Output source channel 2
+H/W 2: Output source channel 3
+H/W 3: Output source channel 4
+
+H/W 4 ... H/W 9: unknown function, left in to enable testing.
+    Possibly some of these control S/PDIF output(s).
+    If these turn out to be unused, they will go away in later driver versions.
+
+Selectable values for each of the digital output selectors are:
+   "PCM out" -> DAC output of the corresponding channel (default setting)
+   "Input 1"...
+   "Input 4" -> direct routing from ADC output of the selected input channel
+
+
+--------
+
+Feb 14, 2008
+Rainer Zimmermann
+mail@lightshed.de
+
diff --git a/Documentation/sound/alsa/soc/dapm.txt b/Documentation/sound/alsa/soc/dapm.txt
index 9e67632..9ac842b 100644
--- a/Documentation/sound/alsa/soc/dapm.txt
+++ b/Documentation/sound/alsa/soc/dapm.txt
@@ -62,6 +62,7 @@
  o Mic        - Mic (and optional Jack)
  o Line       - Line Input/Output (and optional Jack)
  o Speaker    - Speaker
+ o Supply     - Power or clock supply widget used by other widgets.
  o Pre        - Special PRE widget (exec before all others)
  o Post       - Special POST widget (exec after all others)
 
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index f11ca79..322a00bb 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -32,6 +32,7 @@
 - kstack_depth_to_print       [ X86 only ]
 - l2cr                        [ PPC only ]
 - modprobe                    ==> Documentation/debugging-modules.txt
+- modules_disabled
 - msgmax
 - msgmnb
 - msgmni
@@ -184,6 +185,16 @@
 
 ==============================================================
 
+modules_disabled:
+
+A toggle value indicating if modules are allowed to be loaded
+in an otherwise modular kernel.  This toggle defaults to off
+(0), but can be set true (1).  Once true, modules can be
+neither loaded nor unloaded, and the toggle cannot be set back
+to false.
+
+==============================================================
+
 osrelease, ostype & version:
 
 # cat osrelease
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
new file mode 100644
index 0000000..f157d75
--- /dev/null
+++ b/Documentation/trace/events.txt
@@ -0,0 +1,90 @@
+			     Event Tracing
+
+		Documentation written by Theodore Ts'o
+			Updated by Li Zefan
+
+1. Introduction
+===============
+
+Tracepoints (see Documentation/trace/tracepoints.txt) can be used
+without creating custom kernel modules to register probe functions
+using the event tracing infrastructure.
+
+Not all tracepoints can be traced using the event tracing system;
+the kernel developer must provide code snippets which define how the
+tracing information is saved into the tracing buffer, and how the
+tracing information should be printed.
+
+2. Using Event Tracing
+======================
+
+2.1 Via the 'set_event' interface
+---------------------------------
+
+The events which are available for tracing can be found in the file
+/debug/tracing/available_events.
+
+To enable a particular event, such as 'sched_wakeup', simply echo it
+to /debug/tracing/set_event. For example:
+
+	# echo sched_wakeup >> /debug/tracing/set_event
+
+[ Note: '>>' is necessary, otherwise it will firstly disable
+  all the events. ]
+
+To disable an event, echo the event name to the set_event file prefixed
+with an exclamation point:
+
+	# echo '!sched_wakeup' >> /debug/tracing/set_event
+
+To disable all events, echo an empty line to the set_event file:
+
+	# echo > /debug/tracing/set_event
+
+To enable all events, echo '*:*' or '*:' to the set_event file:
+
+	# echo *:* > /debug/tracing/set_event
+
+The events are organized into subsystems, such as ext4, irq, sched,
+etc., and a full event name looks like this: <subsystem>:<event>.  The
+subsystem name is optional, but it is displayed in the available_events
+file.  All of the events in a subsystem can be specified via the syntax
+"<subsystem>:*"; for example, to enable all irq events, you can use the
+command:
+
+	# echo 'irq:*' > /debug/tracing/set_event
+
+2.2 Via the 'enable' toggle
+---------------------------
+
+The events available are also listed in /debug/tracing/events/ hierarchy
+of directories.
+
+To enable event 'sched_wakeup':
+
+	# echo 1 > /debug/tracing/events/sched/sched_wakeup/enable
+
+To disable it:
+
+	# echo 0 > /debug/tracing/events/sched/sched_wakeup/enable
+
+To enable all events in sched subsystem:
+
+	# echo 1 > /debug/tracing/events/sched/enable
+
+To eanble all events:
+
+	# echo 1 > /debug/tracing/events/enable
+
+When reading one of these enable files, there are four results:
+
+ 0 - all events this file affects are disabled
+ 1 - all events this file affects are enabled
+ X - there is a mixture of events enabled and disabled
+ ? - this file does not affect any event
+
+3. Defining an event-enabled tracepoint
+=======================================
+
+See The example provided in samples/trace_events
+
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index fd9a3e6..2a82d86 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -179,7 +179,7 @@
 
 	Function call tracer to trace all kernel functions.
 
-  "function_graph_tracer"
+  "function_graph"
 
 	Similar to the function tracer except that the
 	function tracer probes the functions on their entry
@@ -518,9 +518,18 @@
 values starting at 100 (nice -20). Below is a quick chart to map
 the kernel priority to user land priorities.
 
-  Kernel priority: 0 to 99    ==> user RT priority 99 to 0
-  Kernel priority: 100 to 139 ==> user nice -20 to 19
-  Kernel priority: 140        ==> idle task priority
+   Kernel Space                     User Space
+ ===============================================================
+   0(high) to  98(low)     user RT priority 99(high) to 1(low)
+                           with SCHED_RR or SCHED_FIFO
+ ---------------------------------------------------------------
+  99                       sched_priority is not used in scheduling
+                           decisions(it must be specified as 0)
+ ---------------------------------------------------------------
+ 100(high) to 139(low)     user nice -20(high) to 19(low)
+ ---------------------------------------------------------------
+ 140                       idle task priority
+ ---------------------------------------------------------------
 
 The task states are:
 
diff --git a/Documentation/trace/power.txt b/Documentation/trace/power.txt
new file mode 100644
index 0000000..cd805e1
--- /dev/null
+++ b/Documentation/trace/power.txt
@@ -0,0 +1,17 @@
+The power tracer collects detailed information about C-state and P-state
+transitions, instead of just looking at the high-level "average"
+information.
+
+There is a helper script found in scrips/tracing/power.pl in the kernel
+sources which can be used to parse this information and create a
+Scalable Vector Graphics (SVG) picture from the trace data.
+
+To use this tracer:
+
+	echo 0 > /sys/kernel/debug/tracing/tracing_enabled
+	echo power > /sys/kernel/debug/tracing/current_tracer
+	echo 1 > /sys/kernel/debug/tracing/tracing_enabled
+	sleep 1
+	echo 0 > /sys/kernel/debug/tracing/tracing_enabled
+	cat /sys/kernel/debug/tracing/trace | \
+		perl scripts/tracing/power.pl > out.sv
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index e020366..8da3a79 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -50,6 +50,10 @@
 Protocol 2.09:	(Kernel 2.6.26) Added a field of 64-bit physical
 		pointer to single linked list of struct	setup_data.
 
+Protocol 2.10:	(Kernel 2.6.31) Added a protocol for relaxed alignment
+		beyond the kernel_alignment added, new init_size and
+		pref_address fields.  Added extended boot loader IDs.
+
 **** MEMORY LAYOUT
 
 The traditional memory map for the kernel loader, used for Image or
@@ -168,12 +172,13 @@
 021C/4	2.00+	ramdisk_size	initrd size (set by boot loader)
 0220/4	2.00+	bootsect_kludge	DO NOT USE - for bootsect.S use only
 0224/2	2.01+	heap_end_ptr	Free memory after setup end
-0226/2	N/A	pad1		Unused
+0226/1	2.02+(3 ext_loader_ver	Extended boot loader version
+0227/1	2.02+(3	ext_loader_type	Extended boot loader ID
 0228/4	2.02+	cmd_line_ptr	32-bit pointer to the kernel command line
 022C/4	2.03+	ramdisk_max	Highest legal initrd address
 0230/4	2.05+	kernel_alignment Physical addr alignment required for kernel
 0234/1	2.05+	relocatable_kernel Whether kernel is relocatable or not
-0235/1	N/A	pad2		Unused
+0235/1	2.10+	min_alignment	Minimum alignment, as a power of two
 0236/2	N/A	pad3		Unused
 0238/4	2.06+	cmdline_size	Maximum size of the kernel command line
 023C/4	2.07+	hardware_subarch Hardware subarchitecture
@@ -182,6 +187,8 @@
 024C/4	2.08+	payload_length	Length of kernel payload
 0250/8	2.09+	setup_data	64-bit physical pointer to linked list
 				of struct setup_data
+0258/8	2.10+	pref_address	Preferred loading address
+0260/4	2.10+	init_size	Linear memory required during initialization
 
 (1) For backwards compatibility, if the setup_sects field contains 0, the
     real value is 4.
@@ -190,6 +197,8 @@
     field are unusable, which means the size of a bzImage kernel
     cannot be determined.
 
+(3) Ignored, but safe to set, for boot protocols 2.02-2.09.
+
 If the "HdrS" (0x53726448) magic number is not found at offset 0x202,
 the boot protocol version is "old".  Loading an old kernel, the
 following parameters should be assumed:
@@ -343,18 +352,32 @@
   0xTV here, where T is an identifier for the boot loader and V is
   a version number.  Otherwise, enter 0xFF here.
 
+  For boot loader IDs above T = 0xD, write T = 0xE to this field and
+  write the extended ID minus 0x10 to the ext_loader_type field.
+  Similarly, the ext_loader_ver field can be used to provide more than
+  four bits for the bootloader version.
+
+  For example, for T = 0x15, V = 0x234, write:
+
+  type_of_loader  <- 0xE4
+  ext_loader_type <- 0x05
+  ext_loader_ver  <- 0x23
+
   Assigned boot loader ids:
 	0  LILO			(0x00 reserved for pre-2.00 bootloader)
 	1  Loadlin
 	2  bootsect-loader	(0x20, all other values reserved)
-	3  SYSLINUX
-	4  EtherBoot
+	3  Syslinux
+	4  Etherboot/gPXE
 	5  ELILO
 	7  GRUB
-	8  U-BOOT
+	8  U-Boot
 	9  Xen
 	A  Gujin
 	B  Qemu
+	C  Arcturus Networks uCbootloader
+	E  Extended		(see ext_loader_type)
+	F  Special		(0xFF = undefined)
 
   Please contact <hpa@zytor.com> if you need a bootloader ID
   value assigned.
@@ -453,6 +476,35 @@
   Set this field to the offset (from the beginning of the real-mode
   code) of the end of the setup stack/heap, minus 0x0200.
 
+Field name:	ext_loader_ver
+Type:		write (optional)
+Offset/size:	0x226/1
+Protocol:	2.02+
+
+  This field is used as an extension of the version number in the
+  type_of_loader field.  The total version number is considered to be
+  (type_of_loader & 0x0f) + (ext_loader_ver << 4).
+
+  The use of this field is boot loader specific.  If not written, it
+  is zero.
+
+  Kernels prior to 2.6.31 did not recognize this field, but it is safe
+  to write for protocol version 2.02 or higher.
+
+Field name:	ext_loader_type
+Type:		write (obligatory if (type_of_loader & 0xf0) == 0xe0)
+Offset/size:	0x227/1
+Protocol:	2.02+
+
+  This field is used as an extension of the type number in
+  type_of_loader field.  If the type in type_of_loader is 0xE, then
+  the actual type is (ext_loader_type + 0x10).
+
+  This field is ignored if the type in type_of_loader is not 0xE.
+
+  Kernels prior to 2.6.31 did not recognize this field, but it is safe
+  to write for protocol version 2.02 or higher.
+
 Field name:	cmd_line_ptr
 Type:		write (obligatory)
 Offset/size:	0x228/4
@@ -482,11 +534,19 @@
   0x37FFFFFF, you can start your ramdisk at 0x37FE0000.)
 
 Field name:	kernel_alignment
-Type:		read (reloc)
+Type:		read/modify (reloc)
 Offset/size:	0x230/4
-Protocol:	2.05+
+Protocol:	2.05+ (read), 2.10+ (modify)
 
-  Alignment unit required by the kernel (if relocatable_kernel is true.)
+  Alignment unit required by the kernel (if relocatable_kernel is
+  true.)  A relocatable kernel that is loaded at an alignment
+  incompatible with the value in this field will be realigned during
+  kernel initialization.
+
+  Starting with protocol version 2.10, this reflects the kernel
+  alignment preferred for optimal performance; it is possible for the
+  loader to modify this field to permit a lesser alignment.  See the
+  min_alignment and pref_address field below.
 
 Field name:	relocatable_kernel
 Type:		read (reloc)
@@ -498,6 +558,22 @@
   After loading, the boot loader must set the code32_start field to
   point to the loaded code, or to a boot loader hook.
 
+Field name:	min_alignment
+Type:		read (reloc)
+Offset/size:	0x235/1
+Protocol:	2.10+
+
+  This field, if nonzero, indicates as a power of two the minimum
+  alignment required, as opposed to preferred, by the kernel to boot.
+  If a boot loader makes use of this field, it should update the
+  kernel_alignment field with the alignment unit desired; typically:
+
+	kernel_alignment = 1 << min_alignment
+
+  There may be a considerable performance cost with an excessively
+  misaligned kernel.  Therefore, a loader should typically try each
+  power-of-two alignment from kernel_alignment down to this alignment.
+
 Field name:	cmdline_size
 Type:		read
 Offset/size:	0x238/4
@@ -582,6 +658,36 @@
   sure to consider the case where the linked list already contains
   entries.
 
+Field name:	pref_address
+Type:		read (reloc)
+Offset/size:	0x258/8
+Protocol:	2.10+
+
+  This field, if nonzero, represents a preferred load address for the
+  kernel.  A relocating bootloader should attempt to load at this
+  address if possible.
+
+  A non-relocatable kernel will unconditionally move itself and to run
+  at this address.
+
+Field name:	init_size
+Type:		read
+Offset/size:	0x25c/4
+
+  This field indicates the amount of linear contiguous memory starting
+  at the kernel runtime start address that the kernel needs before it
+  is capable of examining its memory map.  This is not the same thing
+  as the total amount of memory the kernel needs to boot, but it can
+  be used by a relocating boot loader to help select a safe load
+  address for the kernel.
+
+  The kernel runtime start address is determined by the following algorithm:
+
+  if (relocatable_kernel)
+	runtime_start = align_up(load_address, kernel_alignment)
+  else
+	runtime_start = pref_address
+
 
 **** THE IMAGE CHECKSUM
 
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 34c1304..2db5893 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -150,11 +150,6 @@
 		Otherwise, the remaining system RAM is allocated to an
 		additional node.
 
-  numa=hotadd=percent
-		Only allow hotadd memory to preallocate page structures upto
-		percent of already available memory.
-		numa=hotadd=0 will disable hotadd memory.
-
 ACPI
 
   acpi=off	Don't enable ACPI
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 29b52b1..d6498e3 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -6,10 +6,11 @@
 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
 hole caused by [48:63] sign extension
 ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole
-ffff880000000000 - ffffc0ffffffffff (=57 TB) direct mapping of all phys. memory
-ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole
-ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space
-ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB)
+ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory
+ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole
+ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space
+ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole
+ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
 ... unused hole ...
 ffffffff80000000 - ffffffffa0000000 (=512 MB)  kernel text mapping, from phys 0
 ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space
diff --git a/MAINTAINERS b/MAINTAINERS
index cf4abdd..90f8128 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -71,7 +71,7 @@
 M: Mail patches to
 L: Mailing list that is relevant to this area
 W: Web-page with status/info
-T: SCM tree type and location.  Type is one of: git, hg, quilt.
+T: SCM tree type and location.  Type is one of: git, hg, quilt, stgit.
 S: Status, one of the following:
 
 	Supported:	Someone is actually paid to look after this.
@@ -159,7 +159,8 @@
 8250/16?50 (AND CLONE UARTS) SERIAL DRIVER
 L:	linux-serial@vger.kernel.org
 W:	http://serial.sourceforge.net
-S:	Orphan
+M:	alan@lxorguk.ukuu.org.uk
+S:	Odd Fixes
 F:	drivers/serial/8250*
 F:	include/linux/serial_8250.h
 
@@ -1801,10 +1802,10 @@
 F:	drivers/char/digi*
 
 DIRECTORY NOTIFICATION (DNOTIFY)
-P:	Stephen Rothwell
-M:	sfr@canb.auug.org.au
+P:	Eric Paris
+M:	eparis@parisplace.org
 L:	linux-kernel@vger.kernel.org
-S:	Supported
+S:	Maintained
 F:	Documentation/filesystems/dnotify.txt
 F:	fs/notify/dnotify/
 F:	include/linux/dnotify.h
@@ -1978,6 +1979,16 @@
 F:	drivers/edac/edac_*
 F:	include/linux/edac.h
 
+EDAC-AMD64
+P:	Doug Thompson
+M:	dougthompson@xmission.com
+P:	Borislav Petkov
+M:	borislav.petkov@amd.com
+L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
+W:	bluesmoke.sourceforge.net
+S:	Supported
+F:	drivers/edac/amd64_edac*
+
 EDAC-E752X
 P:	Mark Gross
 M:	mark.gross@intel.com
@@ -2847,6 +2858,8 @@
 M:	john@johnmccutchan.com
 P:	Robert Love
 M:	rlove@rlove.org
+P:	Eric Paris
+M:	eparis@parisplace.org
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	Documentation/filesystems/inotify.txt
@@ -3350,6 +3363,16 @@
 F:	include/linux/kgdb.h
 F:	kernel/kgdb.c
 
+KMEMLEAK
+P:	Catalin Marinas
+M:	catalin.marinas@arm.com
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	Documentation/kmemleak.txt
+F:	include/linux/kmemleak.h
+F:	mm/kmemleak.c
+F:	mm/kmemleak-test.c
+
 KMEMTRACE
 P:	Eduard - Gabriel Munteanu
 M:	eduard.munteanu@linux360.ro
@@ -4392,6 +4415,16 @@
 F:	include/linux/delayacct.h
 F:	kernel/delayacct.c
 
+PERFORMANCE COUNTER SUBSYSTEM
+P:	Peter Zijlstra
+M:	a.p.zijlstra@chello.nl
+P:	Paul Mackerras
+M:	paulus@samba.org
+P:	Ingo Molnar
+M:	mingo@elte.hu
+L:	linux-kernel@vger.kernel.org
+S:	Supported
+
 PERSONALITY HANDLING
 P:	Christoph Hellwig
 M:	hch@infradead.org
@@ -4574,7 +4607,8 @@
 F:	drivers/spi/pxa2xx*
 F:	drivers/usb/gadget/pxa2*
 F:	include/sound/pxa2xx-lib.h
-F:	sound/soc/pxa/pxa2xx*
+F:	sound/arm/pxa*
+F:	sound/soc/pxa
 
 PXA168 SUPPORT
 P:	Eric Miao
@@ -5302,11 +5336,12 @@
 M:	lrg@slimlogic.co.uk
 P:	Mark Brown
 M:	broonie@opensource.wolfsonmicro.com
-T:	git git://opensource.wolfsonmicro.com/linux-2.6-asoc
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound-2.6.git
 L:	alsa-devel@alsa-project.org (subscribers-only)
 W:	http://alsa-project.org/main/index.php/ASoC
 S:	Supported
 F:	sound/soc/
+F:	include/sound/soc*
 
 SPARC + UltraSPARC (sparc/sparc64)
 P:	David S. Miller
@@ -5629,6 +5664,7 @@
 M:	alan@lxorguk.ukuu.org.uk
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
+T:	stgit http://zeniv.linux.org.uk/~alan/ttydev/
 
 TULIP NETWORK DRIVERS
 P:	Grant Grundler
diff --git a/Makefile b/Makefile
index 610d1c3..03373bb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 30
-EXTRAVERSION = -rc8
+EXTRAVERSION =
 NAME = Man-Eating Seals of Antiquity
 
 # *DOCUMENTATION*
@@ -533,7 +533,7 @@
 
 include $(srctree)/arch/$(SRCARCH)/Makefile
 
-ifneq (CONFIG_FRAME_WARN,0)
+ifneq ($(CONFIG_FRAME_WARN),0)
 KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
 endif
 
diff --git a/arch/alpha/include/asm/suspend.h b/arch/alpha/include/asm/suspend.h
deleted file mode 100644
index c7042d5..0000000
--- a/arch/alpha/include/asm/suspend.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ALPHA_SUSPEND_H
-#define __ALPHA_SUSPEND_H
-
-/* Dummy include. */
-
-#endif  /* __ALPHA_SUSPEND_H */
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 42ee059..9a3334a 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -371,8 +371,6 @@
 	int retval = -EINVAL;
 	char *name;
 
-	lock_kernel();
-
 	name = getname(path);
 	retval = PTR_ERR(name);
 	if (IS_ERR(name))
@@ -392,7 +390,6 @@
 	}
 	putname(name);
  out:
-	unlock_kernel();
 	return retval;
 }
 
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index 9c9d1fd..5bd5259 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -176,22 +176,26 @@
 	}
 }
 
-static void
+static int
 dp264_set_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&dp264_irq_lock);
 	cpu_set_irq_affinity(irq, *affinity);
 	tsunami_update_irq_hw(cached_irq_mask);
 	spin_unlock(&dp264_irq_lock);
+
+	return 0;
 }
 
-static void
+static int
 clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&dp264_irq_lock);
 	cpu_set_irq_affinity(irq - 16, *affinity);
 	tsunami_update_irq_hw(cached_irq_mask);
 	spin_unlock(&dp264_irq_lock);
+
+	return 0;
 }
 
 static struct hw_interrupt_type dp264_irq_type = {
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index 27f840a..8dd239e 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -157,13 +157,15 @@
 
 }
 
-static void
+static int
 titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&titan_irq_lock);
 	titan_cpu_set_irq_affinity(irq - 16, *affinity);
 	titan_update_irq_hw(titan_cached_irq_mask);
 	spin_unlock(&titan_irq_lock);
+
+	return 0;
 }
 
 static void
diff --git a/arch/alpha/mm/extable.c b/arch/alpha/mm/extable.c
index 62dc379..813c9b6 100644
--- a/arch/alpha/mm/extable.c
+++ b/arch/alpha/mm/extable.c
@@ -48,6 +48,27 @@
 	     cmp_ex, swap_ex);
 }
 
+#ifdef CONFIG_MODULES
+/*
+ * Any entry referring to the module init will be at the beginning or
+ * the end.
+ */
+void trim_init_extable(struct module *m)
+{
+	/*trim the beginning*/
+	while (m->num_exentries &&
+	       within_module_init(ex_to_addr(&m->extable[0]), m)) {
+		m->extable++;
+		m->num_exentries--;
+	}
+	/*trim the end*/
+	while (m->num_exentries &&
+	       within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]),
+				  m))
+		m->num_exentries--;
+}
+#endif /* CONFIG_MODULES */
+
 const struct exception_table_entry *
 search_extable(const struct exception_table_entry *first,
 	       const struct exception_table_entry *last,
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index 3e1714c..664c7b8 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -109,7 +109,7 @@
 }
 
 #ifdef CONFIG_SMP
-static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
+static int gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
 {
 	void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3);
 	unsigned int shift = (irq % 4) * 8;
@@ -122,6 +122,8 @@
 	val |= 1 << (cpu + shift);
 	writel(val, reg);
 	spin_unlock(&irq_controller_lock);
+
+	return 0;
 }
 #endif
 
diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
index cb7a9e9..feaa75f 100644
--- a/arch/arm/include/asm/cache.h
+++ b/arch/arm/include/asm/cache.h
@@ -7,4 +7,20 @@
 #define L1_CACHE_SHIFT		5
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 
+/*
+ * Memory returned by kmalloc() may be used for DMA, so we must make
+ * sure that all such allocations are cache aligned. Otherwise,
+ * unrelated code may cause parts of the buffer to be read into the
+ * cache before the transfer is done, causing old data to be seen by
+ * the CPU.
+ */
+#define ARCH_KMALLOC_MINALIGN	L1_CACHE_BYTES
+
+/*
+ * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers.
+ */
+#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
+#define ARCH_SLAB_MINALIGN 8
+#endif
+
 #endif
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index e6eb8a6..7b52277 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -202,13 +202,6 @@
 	(((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
 	 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-/*
- * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers.
- */
-#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
-#define ARCH_SLAB_MINALIGN 8
-#endif
-
 #include <asm-generic/page.h>
 
 #endif
diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h
deleted file mode 100644
index cf0d0bd..0000000
--- a/arch/arm/include/asm/suspend.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef _ASMARM_SUSPEND_H
-#define _ASMARM_SUSPEND_H
-
-#endif
diff --git a/arch/arm/mach-mx2/clock_imx21.c b/arch/arm/mach-mx2/clock_imx21.c
index 999d013..e4b08ca 100644
--- a/arch/arm/mach-mx2/clock_imx21.c
+++ b/arch/arm/mach-mx2/clock_imx21.c
@@ -890,7 +890,7 @@
 		.con_id = n, \
 		.clk = &c, \
 	},
-static struct clk_lookup lookups[] __initdata = {
+static struct clk_lookup lookups[] = {
 /* It's unlikely that any driver wants one of them directly:
 	_REGISTER_CLOCK(NULL, "ckih", ckih_clk)
 	_REGISTER_CLOCK(NULL, "ckil", ckil_clk)
diff --git a/arch/arm/mach-mx2/clock_imx27.c b/arch/arm/mach-mx2/clock_imx27.c
index 3f7280c..2c97144 100644
--- a/arch/arm/mach-mx2/clock_imx27.c
+++ b/arch/arm/mach-mx2/clock_imx27.c
@@ -621,7 +621,7 @@
 		.clk = &c, \
 	},
 
-static struct clk_lookup lookups[] __initdata = {
+static struct clk_lookup lookups[] = {
 	_REGISTER_CLOCK("imx-uart.0", NULL, uart1_clk)
 	_REGISTER_CLOCK("imx-uart.1", NULL, uart2_clk)
 	_REGISTER_CLOCK("imx-uart.2", NULL, uart3_clk)
diff --git a/arch/arm/mach-mx3/clock-imx35.c b/arch/arm/mach-mx3/clock-imx35.c
index 53a112d..3c1e06f 100644
--- a/arch/arm/mach-mx3/clock-imx35.c
+++ b/arch/arm/mach-mx3/clock-imx35.c
@@ -404,7 +404,7 @@
 		.clk = &c,		\
 	},
 
-static struct clk_lookup lookups[] __initdata = {
+static struct clk_lookup lookups[] = {
 	_REGISTER_CLOCK(NULL, "asrc", asrc_clk)
 	_REGISTER_CLOCK(NULL, "ata", ata_clk)
 	_REGISTER_CLOCK(NULL, "audmux", audmux_clk)
diff --git a/arch/arm/mach-mx3/clock.c b/arch/arm/mach-mx3/clock.c
index 9957a11..a68fcf9 100644
--- a/arch/arm/mach-mx3/clock.c
+++ b/arch/arm/mach-mx3/clock.c
@@ -516,7 +516,7 @@
 		.clk = &c, \
 	},
 
-static struct clk_lookup lookups[] __initdata = {
+static struct clk_lookup lookups[] = {
 	_REGISTER_CLOCK(NULL, "emi", emi_clk)
 	_REGISTER_CLOCK(NULL, "cspi", cspi1_clk)
 	_REGISTER_CLOCK(NULL, "cspi", cspi2_clk)
diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c
index d245e59c..29970f7 100644
--- a/arch/arm/mach-pxa/devices.c
+++ b/arch/arm/mach-pxa/devices.c
@@ -72,7 +72,10 @@
 }
 
 
-static struct pxa2xx_udc_mach_info pxa_udc_info;
+static struct pxa2xx_udc_mach_info pxa_udc_info = {
+	.gpio_pullup = -1,
+	.gpio_vbus   = -1,
+};
 
 void __init pxa_set_udc_info(struct pxa2xx_udc_mach_info *info)
 {
diff --git a/arch/arm/mach-pxa/imote2.c b/arch/arm/mach-pxa/imote2.c
index 2121309..2b27336 100644
--- a/arch/arm/mach-pxa/imote2.c
+++ b/arch/arm/mach-pxa/imote2.c
@@ -412,7 +412,7 @@
  */
 static struct i2c_board_info __initdata imote2_i2c_board_info[] = {
 	{ /* UCAM sensor board */
-		.type = "max1238",
+		.type = "max1239",
 		.addr = 0x35,
 	}, { /* ITS400 Sensor board only */
 		.type = "max1363",
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 3397f1e..a08d9d2 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -184,23 +184,37 @@
 	stmia	r12, {r0-r5, r7, r9, r11, lr}
 	bl	v7_flush_dcache_all
 	ldmia	r12, {r0-r5, r7, r9, r11, lr}
+
+	mrc	p15, 0, r0, c0, c0, 0		@ read main ID register
+	and	r10, r0, #0xff000000		@ ARM?
+	teq	r10, #0x41000000
+	bne	2f
+	and	r5, r0, #0x00f00000		@ variant
+	and	r6, r0, #0x0000000f		@ revision
+	orr	r0, r6, r5, lsr #20-4		@ combine variant and revision
+
 #ifdef CONFIG_ARM_ERRATA_430973
-	mrc	p15, 0, r10, c1, c0, 1		@ read aux control register
-	orr	r10, r10, #(1 << 6)		@ set IBE to 1
-	mcr	p15, 0, r10, c1, c0, 1		@ write aux control register
+	teq	r5, #0x00100000			@ only present in r1p*
+	mrceq	p15, 0, r10, c1, c0, 1		@ read aux control register
+	orreq	r10, r10, #(1 << 6)		@ set IBE to 1
+	mcreq	p15, 0, r10, c1, c0, 1		@ write aux control register
 #endif
 #ifdef CONFIG_ARM_ERRATA_458693
-	mrc	p15, 0, r10, c1, c0, 1		@ read aux control register
-	orr	r10, r10, #(1 << 5)		@ set L1NEON to 1
-	orr	r10, r10, #(1 << 9)		@ set PLDNOP to 1
-	mcr	p15, 0, r10, c1, c0, 1		@ write aux control register
+	teq	r0, #0x20			@ only present in r2p0
+	mrceq	p15, 0, r10, c1, c0, 1		@ read aux control register
+	orreq	r10, r10, #(1 << 5)		@ set L1NEON to 1
+	orreq	r10, r10, #(1 << 9)		@ set PLDNOP to 1
+	mcreq	p15, 0, r10, c1, c0, 1		@ write aux control register
 #endif
 #ifdef CONFIG_ARM_ERRATA_460075
-	mrc	p15, 1, r10, c9, c0, 2		@ read L2 cache aux ctrl register
-	orr	r10, r10, #(1 << 22)		@ set the Write Allocate disable bit
-	mcr	p15, 1, r10, c9, c0, 2		@ write the L2 cache aux ctrl register
+	teq	r0, #0x20			@ only present in r2p0
+	mrceq	p15, 1, r10, c9, c0, 2		@ read L2 cache aux ctrl register
+	tsteq	r10, #1 << 22
+	orreq	r10, r10, #(1 << 22)		@ set the Write Allocate disable bit
+	mcreq	p15, 1, r10, c9, c0, 2		@ write the L2 cache aux ctrl register
 #endif
-	mov	r10, #0
+
+2:	mov	r10, #0
 #ifdef HARVARD_CACHE
 	mcr	p15, 0, r10, c7, c5, 0		@ I+BTB cache invalidate
 #endif
diff --git a/arch/arm/plat-mxc/include/mach/imx-uart.h b/arch/arm/plat-mxc/include/mach/imx-uart.h
index 599217b..f9bd17d 100644
--- a/arch/arm/plat-mxc/include/mach/imx-uart.h
+++ b/arch/arm/plat-mxc/include/mach/imx-uart.h
@@ -20,11 +20,16 @@
 #define ASMARM_ARCH_UART_H
 
 #define IMXUART_HAVE_RTSCTS (1<<0)
+#define IMXUART_IRDA        (1<<1)
 
 struct imxuart_platform_data {
 	int (*init)(struct platform_device *pdev);
 	int (*exit)(struct platform_device *pdev);
 	unsigned int flags;
+	void (*irda_enable)(int enable);
+	unsigned int irda_inv_rx:1;
+	unsigned int irda_inv_tx:1;
+	unsigned short transceiver_delay;
 };
 
 #endif
diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c
index 0abfbaa..40424ed 100644
--- a/arch/arm/plat-omap/mailbox.c
+++ b/arch/arm/plat-omap/mailbox.c
@@ -147,24 +147,40 @@
 	return ret;
 }
 
+struct omap_msg_tx_data {
+	mbox_msg_t	msg;
+	void		*arg;
+};
+
+static void omap_msg_tx_end_io(struct request *rq, int error)
+{
+	kfree(rq->special);
+	__blk_put_request(rq->q, rq);
+}
+
 int omap_mbox_msg_send(struct omap_mbox *mbox, mbox_msg_t msg, void* arg)
 {
+	struct omap_msg_tx_data *tx_data;
 	struct request *rq;
 	struct request_queue *q = mbox->txq->queue;
-	int ret = 0;
+
+	tx_data = kmalloc(sizeof(*tx_data), GFP_ATOMIC);
+	if (unlikely(!tx_data))
+		return -ENOMEM;
 
 	rq = blk_get_request(q, WRITE, GFP_ATOMIC);
 	if (unlikely(!rq)) {
-		ret = -ENOMEM;
-		goto fail;
+		kfree(tx_data);
+		return -ENOMEM;
 	}
 
-	rq->data = (void *)msg;
-	blk_insert_request(q, rq, 0, arg);
+	tx_data->msg = msg;
+	tx_data->arg = arg;
+	rq->end_io = omap_msg_tx_end_io;
+	blk_insert_request(q, rq, 0, tx_data);
 
 	schedule_work(&mbox->txq->work);
- fail:
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(omap_mbox_msg_send);
 
@@ -178,22 +194,28 @@
 	struct request_queue *q = mbox->txq->queue;
 
 	while (1) {
+		struct omap_msg_tx_data *tx_data;
+
 		spin_lock(q->queue_lock);
-		rq = elv_next_request(q);
+		rq = blk_fetch_request(q);
 		spin_unlock(q->queue_lock);
 
 		if (!rq)
 			break;
 
-		ret = __mbox_msg_send(mbox, (mbox_msg_t) rq->data, rq->special);
+		tx_data = rq->special;
+
+		ret = __mbox_msg_send(mbox, tx_data->msg, tx_data->arg);
 		if (ret) {
 			enable_mbox_irq(mbox, IRQ_TX);
+			spin_lock(q->queue_lock);
+			blk_requeue_request(q, rq);
+			spin_unlock(q->queue_lock);
 			return;
 		}
 
 		spin_lock(q->queue_lock);
-		if (__blk_end_request(rq, 0, 0))
-			BUG();
+		__blk_end_request_all(rq, 0);
 		spin_unlock(q->queue_lock);
 	}
 }
@@ -218,16 +240,13 @@
 
 	while (1) {
 		spin_lock_irqsave(q->queue_lock, flags);
-		rq = elv_next_request(q);
+		rq = blk_fetch_request(q);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 		if (!rq)
 			break;
 
-		msg = (mbox_msg_t) rq->data;
-
-		if (blk_end_request(rq, 0, 0))
-			BUG();
-
+		msg = (mbox_msg_t)rq->special;
+		blk_end_request_all(rq, 0);
 		mbox->rxq->callback((void *)msg);
 	}
 }
@@ -264,7 +283,6 @@
 			goto nomem;
 
 		msg = mbox_fifo_read(mbox);
-		rq->data = (void *)msg;
 
 		if (unlikely(mbox_seq_test(mbox, msg))) {
 			pr_info("mbox: Illegal seq bit!(%08x)\n", msg);
@@ -272,7 +290,7 @@
 				mbox->err_notify();
 		}
 
-		blk_insert_request(q, rq, 0, NULL);
+		blk_insert_request(q, rq, 0, (void *)msg);
 		if (mbox->ops->type == OMAP_MBOX_TYPE1)
 			break;
 	}
@@ -329,16 +347,15 @@
 
 	while (1) {
 		spin_lock_irqsave(q->queue_lock, flags);
-		rq = elv_next_request(q);
+		rq = blk_fetch_request(q);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 
 		if (!rq)
 			break;
 
-		*p = (mbox_msg_t) rq->data;
+		*p = (mbox_msg_t)rq->special;
 
-		if (blk_end_request(rq, 0, 0))
-			BUG();
+		blk_end_request_all(rq, 0);
 
 		if (unlikely(mbox_seq_test(mbox, *p))) {
 			pr_info("mbox: Illegal seq bit!(%08x) ignored\n", *p);
diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c
index 1167fe9c..98f94d0 100644
--- a/arch/avr32/kernel/module.c
+++ b/arch/avr32/kernel/module.c
@@ -32,8 +32,6 @@
 	mod->arch.syminfo = NULL;
 
 	vfree(module_region);
-	/* FIXME: if module_region == mod->init_region, trim exception
-	 * table entries. */
 }
 
 static inline int check_rela(Elf32_Rela *rela, struct module *module,
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index 3640cdc..c56fd3e 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -223,6 +223,7 @@
 
 config SMP
 	depends on BF561
+	select GENERIC_TIME
 	bool "Symmetric multi-processing support"
 	---help---
 	  This enables support for systems with more than one CPU,
@@ -241,12 +242,6 @@
 	depends on SMP
 	default y
 
-config TICK_SOURCE_SYSTMR0
-	bool
-	select BFIN_GPTIMERS
-	depends on SMP
-	default y
-
 config BF_REV_MIN
 	int
 	default 0 if (BF51x || BF52x || (BF54x && !BF54xM))
@@ -263,8 +258,8 @@
 
 choice
 	prompt "Silicon Rev"
-	default BF_REV_0_1 if (BF51x || BF52x || (BF54x && !BF54xM))
-	default BF_REV_0_2 if (BF534 || BF536 || BF537)
+	default BF_REV_0_0 if (BF51x || BF52x)
+	default BF_REV_0_2 if (BF534 || BF536 || BF537 || (BF54x && !BF54xM))
 	default BF_REV_0_3 if (BF531 || BF532 || BF533 || BF54xM || BF561)
 
 config BF_REV_0_0
@@ -607,7 +602,6 @@
 
 config GENERIC_TIME
 	bool "Generic time"
-	depends on !SMP
 	default y
 
 config GENERIC_CLOCKEVENTS
@@ -615,12 +609,26 @@
 	depends on GENERIC_TIME
 	default y
 
+choice
+	prompt "Kernel Tick Source"
+	depends on GENERIC_CLOCKEVENTS
+	default TICKSOURCE_CORETMR
+
+config TICKSOURCE_GPTMR0
+	bool "Gptimer0 (SCLK domain)"
+	select BFIN_GPTIMERS
+	depends on !IPIPE
+
+config TICKSOURCE_CORETMR
+	bool "Core timer (CCLK domain)"
+
+endchoice
+
 config CYCLES_CLOCKSOURCE
-	bool "Use 'CYCLES' as a clocksource (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	bool "Use 'CYCLES' as a clocksource"
 	depends on GENERIC_CLOCKEVENTS
 	depends on !BFIN_SCRATCH_REG_CYCLES
-	default n
+	depends on !SMP
 	help
 	  If you say Y here, you will enable support for using the 'cycles'
 	  registers as a clock source.  Doing so means you will be unable to
@@ -628,6 +636,11 @@
 	  still be able to read it (such as for performance monitoring), but
 	  writing the registers will most likely crash the kernel.
 
+config GPTMR0_CLOCKSOURCE
+	bool "Use GPTimer0 as a clocksource (higher rating)"
+	depends on GENERIC_CLOCKEVENTS
+	depends on !TICKSOURCE_GPTMR0
+
 source kernel/time/Kconfig
 
 comment "Misc"
@@ -808,7 +821,7 @@
 config EXCEPTION_L1_SCRATCH
 	bool "Locate exception stack in L1 Scratch Memory"
 	default n
-	depends on !APP_STACK_L1 && !SYSCALL_TAB_L1
+	depends on !APP_STACK_L1
 	help
 	  Whenever an exception occurs, use the L1 Scratch memory for
 	  stack storage.  You cannot place the stacks of FLAT binaries
@@ -901,7 +914,7 @@
 	bool "Enable Instruction Cache Locking"
 
 choice
-	prompt "Policy"
+	prompt "External memory cache policy"
 	depends on BFIN_DCACHE
 	default BFIN_WB if !SMP
 	default BFIN_WT if SMP
@@ -942,12 +955,22 @@
 
 endchoice
 
-config BFIN_L2_CACHEABLE
-	bool "Cache L2 SRAM"
-	depends on (BFIN_DCACHE || BFIN_ICACHE) && (BF54x || (BF561 && !SMP))
-	default n
-	help
-	  Select to make L2 SRAM cacheable in L1 data and instruction cache.
+choice
+	prompt "L2 SRAM cache policy"
+	depends on (BF54x || BF561)
+	default BFIN_L2_WT
+config BFIN_L2_WB
+	bool "Write back"
+	depends on !SMP
+
+config BFIN_L2_WT
+	bool "Write through"
+	depends on !SMP
+
+config BFIN_L2_NOT_CACHED
+	bool "Not cached"
+
+endchoice
 
 config MPU
 	bool "Enable the memory protection unit (EXPERIMENTAL)"
@@ -1011,21 +1034,34 @@
 
 menu "EBIU_AMBCTL Control"
 config BANK_0
-	hex "Bank 0"
+	hex "Bank 0 (AMBCTL0.L)"
 	default 0x7BB0
+	help
+	  These are the low 16 bits of the EBIU_AMBCTL0 MMR which are
+	  used to control the Asynchronous Memory Bank 0 settings.
 
 config BANK_1
-	hex "Bank 1"
+	hex "Bank 1 (AMBCTL0.H)"
 	default 0x7BB0
 	default 0x5558 if BF54x
+	help
+	  These are the high 16 bits of the EBIU_AMBCTL0 MMR which are
+	  used to control the Asynchronous Memory Bank 1 settings.
 
 config BANK_2
-	hex "Bank 2"
+	hex "Bank 2 (AMBCTL1.L)"
 	default 0x7BB0
+	help
+	  These are the low 16 bits of the EBIU_AMBCTL1 MMR which are
+	  used to control the Asynchronous Memory Bank 2 settings.
 
 config BANK_3
-	hex "Bank 3"
+	hex "Bank 3 (AMBCTL1.H)"
 	default 0x99B3
+	help
+	  These are the high 16 bits of the EBIU_AMBCTL1 MMR which are
+	  used to control the Asynchronous Memory Bank 3 settings.
+
 endmenu
 
 config EBIU_MBSCTLVAL
diff --git a/arch/blackfin/Kconfig.debug b/arch/blackfin/Kconfig.debug
index 79e7e63..1fc4981 100644
--- a/arch/blackfin/Kconfig.debug
+++ b/arch/blackfin/Kconfig.debug
@@ -54,6 +54,19 @@
 	  hardware error interrupts and need to know where they are coming
 	  from.
 
+config EXACT_HWERR
+	bool "Try to make Hardware errors exact"
+	depends on DEBUG_HWERR
+	help
+	  By default, the Blackfin hardware errors are not exact - the error
+          be reported multiple cycles after the error happens. This delay
+	  can cause the wrong application, or even the kernel to receive a
+	  signal to be killed. If you are getting HW errors in your system,
+	  try turning this on to ensure they are at least comming from the
+	  proper thread.
+
+	  On production systems, it is safe (and a small optimization) to say N.
+
 config DEBUG_DOUBLEFAULT
 	bool "Debug Double Faults"
 	default n
diff --git a/arch/blackfin/configs/BF518F-EZBRD_defconfig b/arch/blackfin/configs/BF518F-EZBRD_defconfig
index c121d6e..baec133 100644
--- a/arch/blackfin/configs/BF518F-EZBRD_defconfig
+++ b/arch/blackfin/configs/BF518F-EZBRD_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -259,7 +259,10 @@
 # CONFIG_SCHED_HRTICK is not set
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 # CONFIG_NO_HZ is not set
 # CONFIG_HIGH_RES_TIMERS is not set
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
@@ -404,7 +407,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -688,14 +691,14 @@
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-# CONFIG_BF5xx_PPI is not set
+# CONFIG_BFIN_SPI_ADC is not set
 # CONFIG_BFIN_SPORT is not set
 # CONFIG_BFIN_TIMER_LATENCY is not set
-# CONFIG_TWI_LCD is not set
-CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_TWI_LCD is not set
 CONFIG_SIMPLE_GPIO=m
 CONFIG_VT=y
 CONFIG_CONSOLE_TRANSLATIONS=y
@@ -802,7 +805,30 @@
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
@@ -831,6 +857,7 @@
 # CONFIG_HTC_PASIC3 is not set
 # CONFIG_MFD_TMIO is not set
 # CONFIG_PMIC_DA903X is not set
+# CONFIG_PMIC_ADP5520 is not set
 # CONFIG_MFD_WM8400 is not set
 # CONFIG_MFD_WM8350_I2C is not set
 # CONFIG_REGULATOR is not set
@@ -962,7 +989,8 @@
 #
 # File systems
 #
-# CONFIG_EXT2_FS is not set
+CONFIG_EXT2_FS=m
+# CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT3_FS is not set
 # CONFIG_EXT4_FS is not set
 # CONFIG_REISERFS_FS is not set
@@ -988,8 +1016,11 @@
 #
 # DOS/FAT/NT Filesystems
 #
+CONFIG_FAT_FS=m
 # CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
 # CONFIG_NTFS_FS is not set
 
 #
@@ -1012,8 +1043,8 @@
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-# CONFIG_YAFFS_FS is not set
 # CONFIG_JFFS2_FS is not set
+# CONFIG_YAFFS_FS is not set
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1048,9 +1079,9 @@
 #
 # CONFIG_PARTITION_ADVANCED is not set
 CONFIG_MSDOS_PARTITION=y
-CONFIG_NLS=y
+CONFIG_NLS=m
 CONFIG_NLS_DEFAULT="iso8859-1"
-CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_437=m
 # CONFIG_NLS_CODEPAGE_737 is not set
 # CONFIG_NLS_CODEPAGE_775 is not set
 # CONFIG_NLS_CODEPAGE_850 is not set
@@ -1065,7 +1096,7 @@
 # CONFIG_NLS_CODEPAGE_865 is not set
 # CONFIG_NLS_CODEPAGE_866 is not set
 # CONFIG_NLS_CODEPAGE_869 is not set
-# CONFIG_NLS_CODEPAGE_936 is not set
+CONFIG_NLS_CODEPAGE_936=m
 # CONFIG_NLS_CODEPAGE_950 is not set
 # CONFIG_NLS_CODEPAGE_932 is not set
 # CONFIG_NLS_CODEPAGE_949 is not set
@@ -1074,7 +1105,7 @@
 # CONFIG_NLS_CODEPAGE_1250 is not set
 # CONFIG_NLS_CODEPAGE_1251 is not set
 # CONFIG_NLS_ASCII is not set
-CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_1=m
 # CONFIG_NLS_ISO8859_2 is not set
 # CONFIG_NLS_ISO8859_3 is not set
 # CONFIG_NLS_ISO8859_4 is not set
@@ -1087,7 +1118,7 @@
 # CONFIG_NLS_ISO8859_15 is not set
 # CONFIG_NLS_KOI8_R is not set
 # CONFIG_NLS_KOI8_U is not set
-# CONFIG_NLS_UTF8 is not set
+CONFIG_NLS_UTF8=m
 # CONFIG_DLM is not set
 
 #
@@ -1102,7 +1133,7 @@
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1111,8 +1142,6 @@
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1132,7 +1161,6 @@
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 
 #
 # Tracers
@@ -1148,16 +1176,20 @@
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1269,7 +1301,6 @@
 # CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BF526-EZBRD_defconfig b/arch/blackfin/configs/BF526-EZBRD_defconfig
index 3e562b2..c06262e 100644
--- a/arch/blackfin/configs/BF526-EZBRD_defconfig
+++ b/arch/blackfin/configs/BF526-EZBRD_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -144,8 +144,8 @@
 # CONFIG_BF561 is not set
 CONFIG_BF_REV_MIN=0
 CONFIG_BF_REV_MAX=2
-# CONFIG_BF_REV_0_0 is not set
-CONFIG_BF_REV_0_1=y
+CONFIG_BF_REV_0_0=y
+# CONFIG_BF_REV_0_1 is not set
 # CONFIG_BF_REV_0_2 is not set
 # CONFIG_BF_REV_0_3 is not set
 # CONFIG_BF_REV_0_4 is not set
@@ -264,7 +264,10 @@
 # CONFIG_SCHED_HRTICK is not set
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 # CONFIG_NO_HZ is not set
 # CONFIG_HIGH_RES_TIMERS is not set
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
@@ -409,7 +412,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -491,7 +494,7 @@
 #
 # User Modules And Translation Layers
 #
-CONFIG_MTD_CHAR=m
+CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLKDEVS=y
 CONFIG_MTD_BLOCK=y
 # CONFIG_FTL is not set
@@ -504,9 +507,9 @@
 #
 # RAM/ROM/Flash chip drivers
 #
-# CONFIG_MTD_CFI is not set
-CONFIG_MTD_JEDECPROBE=m
-CONFIG_MTD_GEN_PROBE=m
+CONFIG_MTD_CFI=y
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_GEN_PROBE=y
 # CONFIG_MTD_CFI_ADV_OPTIONS is not set
 CONFIG_MTD_MAP_BANK_WIDTH_1=y
 CONFIG_MTD_MAP_BANK_WIDTH_2=y
@@ -518,9 +521,10 @@
 CONFIG_MTD_CFI_I2=y
 # CONFIG_MTD_CFI_I4 is not set
 # CONFIG_MTD_CFI_I8 is not set
-# CONFIG_MTD_CFI_INTELEXT is not set
+CONFIG_MTD_CFI_INTELEXT=y
 # CONFIG_MTD_CFI_AMDSTD is not set
 # CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_ROM=m
 # CONFIG_MTD_ABSENT is not set
@@ -529,7 +533,8 @@
 # Mapping drivers for chip access
 #
 CONFIG_MTD_COMPLEX_MAPPINGS=y
-# CONFIG_MTD_PHYSMAP is not set
+CONFIG_MTD_PHYSMAP=y
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
 # CONFIG_MTD_GPIO_ADDR is not set
 # CONFIG_MTD_UCLINUX is not set
 # CONFIG_MTD_PLATRAM is not set
@@ -597,9 +602,42 @@
 # SCSI device support
 #
 # CONFIG_RAID_ATTRS is not set
-# CONFIG_SCSI is not set
-# CONFIG_SCSI_DMA is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_TGT is not set
 # CONFIG_SCSI_NETLINK is not set
+# CONFIG_SCSI_PROC_FS is not set
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+CONFIG_BLK_DEV_SR=m
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+# CONFIG_SCSI_SCAN_ASYNC is not set
+CONFIG_SCSI_WAIT_SCAN=m
+
+#
+# SCSI Transports
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+# CONFIG_SCSI_LOWLEVEL is not set
+# CONFIG_SCSI_DH is not set
 # CONFIG_ATA is not set
 # CONFIG_MD is not set
 CONFIG_NETDEVICES=y
@@ -644,9 +682,8 @@
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -715,14 +752,14 @@
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-# CONFIG_BF5xx_PPI is not set
+# CONFIG_BFIN_SPI_ADC is not set
 # CONFIG_BFIN_SPORT is not set
 # CONFIG_BFIN_TIMER_LATENCY is not set
-# CONFIG_TWI_LCD is not set
-CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_TWI_LCD is not set
 CONFIG_SIMPLE_GPIO=m
 CONFIG_VT=y
 CONFIG_CONSOLE_TRANSLATIONS=y
@@ -832,11 +869,35 @@
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 CONFIG_HWMON=y
 # CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_AD5252 is not set
 # CONFIG_SENSORS_AD7414 is not set
 # CONFIG_SENSORS_AD7418 is not set
 # CONFIG_SENSORS_ADCXX is not set
@@ -920,6 +981,7 @@
 # CONFIG_HTC_PASIC3 is not set
 # CONFIG_MFD_TMIO is not set
 # CONFIG_PMIC_DA903X is not set
+# CONFIG_PMIC_ADP5520 is not set
 # CONFIG_MFD_WM8400 is not set
 # CONFIG_MFD_WM8350_I2C is not set
 # CONFIG_REGULATOR is not set
@@ -1008,8 +1070,8 @@
 #
 # Miscellaneous USB options
 #
-# CONFIG_USB_DEVICEFS is not set
-CONFIG_USB_DEVICE_CLASS=y
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_DEVICE_CLASS is not set
 # CONFIG_USB_DYNAMIC_MINORS is not set
 # CONFIG_USB_OTG is not set
 # CONFIG_USB_OTG_WHITELIST is not set
@@ -1037,10 +1099,10 @@
 CONFIG_USB_MUSB_HOST=y
 # CONFIG_USB_MUSB_PERIPHERAL is not set
 # CONFIG_USB_MUSB_OTG is not set
-# CONFIG_USB_GADGET_MUSB_HDRC is not set
 CONFIG_USB_MUSB_HDRC_HCD=y
-CONFIG_MUSB_PIO_ONLY=y
-CONFIG_MUSB_DMA_POLL=y
+# CONFIG_MUSB_PIO_ONLY is not set
+CONFIG_USB_INVENTRA_DMA=y
+# CONFIG_USB_TI_CPPI_DMA is not set
 # CONFIG_USB_MUSB_DEBUG is not set
 
 #
@@ -1058,7 +1120,7 @@
 #
 # see USB_STORAGE Help for more information
 #
-CONFIG_USB_STORAGE=m
+CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
 # CONFIG_USB_STORAGE_DATAFAB is not set
 # CONFIG_USB_STORAGE_FREECOM is not set
@@ -1107,33 +1169,10 @@
 # CONFIG_USB_LD is not set
 # CONFIG_USB_TRANCEVIBRATOR is not set
 # CONFIG_USB_IOWARRIOR is not set
+# CONFIG_USB_TEST is not set
 # CONFIG_USB_ISIGHTFW is not set
 # CONFIG_USB_VST is not set
 # CONFIG_USB_GADGET is not set
-# CONFIG_USB_GADGET_AT91 is not set
-# CONFIG_USB_GADGET_ATMEL_USBA is not set
-# CONFIG_USB_GADGET_FSL_USB2 is not set
-# CONFIG_USB_GADGET_LH7A40X is not set
-# CONFIG_USB_GADGET_OMAP is not set
-# CONFIG_USB_GADGET_PXA25X is not set
-# CONFIG_USB_GADGET_PXA27X is not set
-# CONFIG_USB_GADGET_S3C2410 is not set
-# CONFIG_USB_GADGET_M66592 is not set
-# CONFIG_USB_GADGET_AMD5536UDC is not set
-# CONFIG_USB_GADGET_FSL_QE is not set
-# CONFIG_USB_GADGET_NET2272 is not set
-# CONFIG_USB_GADGET_NET2280 is not set
-# CONFIG_USB_GADGET_GOKU is not set
-# CONFIG_USB_GADGET_DUMMY_HCD is not set
-# CONFIG_USB_ZERO is not set
-# CONFIG_USB_AUDIO is not set
-# CONFIG_USB_ETH is not set
-# CONFIG_USB_GADGETFS is not set
-# CONFIG_USB_FILE_STORAGE is not set
-# CONFIG_USB_G_SERIAL is not set
-# CONFIG_USB_MIDI_GADGET is not set
-# CONFIG_USB_G_PRINTER is not set
-# CONFIG_USB_CDC_COMPOSITE is not set
 # CONFIG_MMC is not set
 # CONFIG_MEMSTICK is not set
 # CONFIG_NEW_LEDS is not set
@@ -1206,7 +1245,8 @@
 #
 # File systems
 #
-# CONFIG_EXT2_FS is not set
+CONFIG_EXT2_FS=m
+# CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT3_FS is not set
 # CONFIG_EXT4_FS is not set
 # CONFIG_REISERFS_FS is not set
@@ -1226,14 +1266,19 @@
 #
 # CD-ROM/DVD Filesystems
 #
-# CONFIG_ISO9660_FS is not set
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+# CONFIG_ZISOFS is not set
 # CONFIG_UDF_FS is not set
 
 #
 # DOS/FAT/NT Filesystems
 #
+CONFIG_FAT_FS=m
 # CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
 # CONFIG_NTFS_FS is not set
 
 #
@@ -1256,16 +1301,6 @@
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_YAFFS_FS=m
-CONFIG_YAFFS_YAFFS1=y
-# CONFIG_YAFFS_9BYTE_TAGS is not set
-# CONFIG_YAFFS_DOES_ECC is not set
-CONFIG_YAFFS_YAFFS2=y
-CONFIG_YAFFS_AUTO_YAFFS2=y
-# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
-# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
-# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
-CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
@@ -1277,6 +1312,16 @@
 # CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
+CONFIG_YAFFS_FS=m
+CONFIG_YAFFS_YAFFS1=y
+# CONFIG_YAFFS_9BYTE_TAGS is not set
+# CONFIG_YAFFS_DOES_ECC is not set
+CONFIG_YAFFS_YAFFS2=y
+CONFIG_YAFFS_AUTO_YAFFS2=y
+# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
+# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
+# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1313,7 +1358,7 @@
 CONFIG_MSDOS_PARTITION=y
 CONFIG_NLS=m
 CONFIG_NLS_DEFAULT="iso8859-1"
-# CONFIG_NLS_CODEPAGE_437 is not set
+CONFIG_NLS_CODEPAGE_437=m
 # CONFIG_NLS_CODEPAGE_737 is not set
 # CONFIG_NLS_CODEPAGE_775 is not set
 # CONFIG_NLS_CODEPAGE_850 is not set
@@ -1328,7 +1373,7 @@
 # CONFIG_NLS_CODEPAGE_865 is not set
 # CONFIG_NLS_CODEPAGE_866 is not set
 # CONFIG_NLS_CODEPAGE_869 is not set
-# CONFIG_NLS_CODEPAGE_936 is not set
+CONFIG_NLS_CODEPAGE_936=m
 # CONFIG_NLS_CODEPAGE_950 is not set
 # CONFIG_NLS_CODEPAGE_932 is not set
 # CONFIG_NLS_CODEPAGE_949 is not set
@@ -1337,7 +1382,7 @@
 # CONFIG_NLS_CODEPAGE_1250 is not set
 # CONFIG_NLS_CODEPAGE_1251 is not set
 # CONFIG_NLS_ASCII is not set
-# CONFIG_NLS_ISO8859_1 is not set
+CONFIG_NLS_ISO8859_1=m
 # CONFIG_NLS_ISO8859_2 is not set
 # CONFIG_NLS_ISO8859_3 is not set
 # CONFIG_NLS_ISO8859_4 is not set
@@ -1350,7 +1395,7 @@
 # CONFIG_NLS_ISO8859_15 is not set
 # CONFIG_NLS_KOI8_R is not set
 # CONFIG_NLS_KOI8_U is not set
-# CONFIG_NLS_UTF8 is not set
+CONFIG_NLS_UTF8=m
 # CONFIG_DLM is not set
 
 #
@@ -1365,7 +1410,7 @@
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1374,8 +1419,6 @@
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1395,7 +1438,6 @@
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 
 #
 # Tracers
@@ -1411,16 +1453,20 @@
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1534,7 +1580,6 @@
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BF527-EZKIT_defconfig b/arch/blackfin/configs/BF527-EZKIT_defconfig
index 911b5db..e9175c6 100644
--- a/arch/blackfin/configs/BF527-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF527-EZKIT_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -145,8 +145,8 @@
 CONFIG_BF_REV_MIN=0
 CONFIG_BF_REV_MAX=2
 # CONFIG_BF_REV_0_0 is not set
-CONFIG_BF_REV_0_1=y
-# CONFIG_BF_REV_0_2 is not set
+# CONFIG_BF_REV_0_1 is not set
+CONFIG_BF_REV_0_2=y
 # CONFIG_BF_REV_0_3 is not set
 # CONFIG_BF_REV_0_4 is not set
 # CONFIG_BF_REV_0_5 is not set
@@ -264,7 +264,10 @@
 # CONFIG_SCHED_HRTICK is not set
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 # CONFIG_NO_HZ is not set
 # CONFIG_HIGH_RES_TIMERS is not set
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
@@ -318,7 +321,7 @@
 # CONFIG_PHYS_ADDR_T_64BIT is not set
 CONFIG_ZONE_DMA_FLAG=1
 CONFIG_VIRT_TO_BUS=y
-CONFIG_BFIN_GPTIMERS=m
+CONFIG_BFIN_GPTIMERS=y
 # CONFIG_DMA_UNCACHED_4M is not set
 # CONFIG_DMA_UNCACHED_2M is not set
 CONFIG_DMA_UNCACHED_1M=y
@@ -409,7 +412,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -639,9 +642,42 @@
 # SCSI device support
 #
 # CONFIG_RAID_ATTRS is not set
-# CONFIG_SCSI is not set
-# CONFIG_SCSI_DMA is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_TGT is not set
 # CONFIG_SCSI_NETLINK is not set
+# CONFIG_SCSI_PROC_FS is not set
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+CONFIG_BLK_DEV_SR=m
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+# CONFIG_SCSI_SCAN_ASYNC is not set
+CONFIG_SCSI_WAIT_SCAN=m
+
+#
+# SCSI Transports
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+# CONFIG_SCSI_LOWLEVEL is not set
+# CONFIG_SCSI_DH is not set
 # CONFIG_ATA is not set
 # CONFIG_MD is not set
 CONFIG_NETDEVICES=y
@@ -687,9 +723,8 @@
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -758,14 +793,14 @@
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-CONFIG_BF5xx_PPI=m
+# CONFIG_BFIN_SPI_ADC is not set
 CONFIG_BFIN_SPORT=m
 # CONFIG_BFIN_TIMER_LATENCY is not set
-# CONFIG_TWI_LCD is not set
-CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_TWI_LCD is not set
 CONFIG_SIMPLE_GPIO=m
 CONFIG_VT=y
 CONFIG_CONSOLE_TRANSLATIONS=y
@@ -875,7 +910,30 @@
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
@@ -909,6 +967,7 @@
 # CONFIG_HTC_PASIC3 is not set
 # CONFIG_MFD_TMIO is not set
 # CONFIG_PMIC_DA903X is not set
+# CONFIG_PMIC_ADP5520 is not set
 # CONFIG_MFD_WM8400 is not set
 # CONFIG_MFD_WM8350_I2C is not set
 # CONFIG_REGULATOR is not set
@@ -1091,8 +1150,8 @@
 #
 # Miscellaneous USB options
 #
-# CONFIG_USB_DEVICEFS is not set
-CONFIG_USB_DEVICE_CLASS=y
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_DEVICE_CLASS is not set
 # CONFIG_USB_DYNAMIC_MINORS is not set
 # CONFIG_USB_OTG is not set
 # CONFIG_USB_OTG_WHITELIST is not set
@@ -1120,10 +1179,10 @@
 CONFIG_USB_MUSB_HOST=y
 # CONFIG_USB_MUSB_PERIPHERAL is not set
 # CONFIG_USB_MUSB_OTG is not set
-# CONFIG_USB_GADGET_MUSB_HDRC is not set
 CONFIG_USB_MUSB_HDRC_HCD=y
-CONFIG_MUSB_PIO_ONLY=y
-CONFIG_MUSB_DMA_POLL=y
+# CONFIG_MUSB_PIO_ONLY is not set
+CONFIG_USB_INVENTRA_DMA=y
+# CONFIG_USB_TI_CPPI_DMA is not set
 # CONFIG_USB_MUSB_DEBUG is not set
 
 #
@@ -1141,7 +1200,7 @@
 #
 # see USB_STORAGE Help for more information
 #
-CONFIG_USB_STORAGE=m
+CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
 # CONFIG_USB_STORAGE_DATAFAB is not set
 # CONFIG_USB_STORAGE_FREECOM is not set
@@ -1190,33 +1249,10 @@
 # CONFIG_USB_LD is not set
 # CONFIG_USB_TRANCEVIBRATOR is not set
 # CONFIG_USB_IOWARRIOR is not set
+# CONFIG_USB_TEST is not set
 # CONFIG_USB_ISIGHTFW is not set
 # CONFIG_USB_VST is not set
 # CONFIG_USB_GADGET is not set
-# CONFIG_USB_GADGET_AT91 is not set
-# CONFIG_USB_GADGET_ATMEL_USBA is not set
-# CONFIG_USB_GADGET_FSL_USB2 is not set
-# CONFIG_USB_GADGET_LH7A40X is not set
-# CONFIG_USB_GADGET_OMAP is not set
-# CONFIG_USB_GADGET_PXA25X is not set
-# CONFIG_USB_GADGET_PXA27X is not set
-# CONFIG_USB_GADGET_S3C2410 is not set
-# CONFIG_USB_GADGET_M66592 is not set
-# CONFIG_USB_GADGET_AMD5536UDC is not set
-# CONFIG_USB_GADGET_FSL_QE is not set
-# CONFIG_USB_GADGET_NET2272 is not set
-# CONFIG_USB_GADGET_NET2280 is not set
-# CONFIG_USB_GADGET_GOKU is not set
-# CONFIG_USB_GADGET_DUMMY_HCD is not set
-# CONFIG_USB_ZERO is not set
-# CONFIG_USB_AUDIO is not set
-# CONFIG_USB_ETH is not set
-# CONFIG_USB_GADGETFS is not set
-# CONFIG_USB_FILE_STORAGE is not set
-# CONFIG_USB_G_SERIAL is not set
-# CONFIG_USB_MIDI_GADGET is not set
-# CONFIG_USB_G_PRINTER is not set
-# CONFIG_USB_CDC_COMPOSITE is not set
 # CONFIG_MMC is not set
 # CONFIG_MEMSTICK is not set
 # CONFIG_NEW_LEDS is not set
@@ -1289,7 +1325,8 @@
 #
 # File systems
 #
-# CONFIG_EXT2_FS is not set
+CONFIG_EXT2_FS=m
+# CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT3_FS is not set
 # CONFIG_EXT4_FS is not set
 # CONFIG_REISERFS_FS is not set
@@ -1309,14 +1346,20 @@
 #
 # CD-ROM/DVD Filesystems
 #
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+# CONFIG_ZISOFS is not set
+CONFIG_UDF_FS=m
+CONFIG_UDF_NLS=y
 
 #
 # DOS/FAT/NT Filesystems
 #
+CONFIG_FAT_FS=m
 # CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
 # CONFIG_NTFS_FS is not set
 
 #
@@ -1339,16 +1382,6 @@
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_YAFFS_FS=m
-CONFIG_YAFFS_YAFFS1=y
-# CONFIG_YAFFS_9BYTE_TAGS is not set
-# CONFIG_YAFFS_DOES_ECC is not set
-CONFIG_YAFFS_YAFFS2=y
-CONFIG_YAFFS_AUTO_YAFFS2=y
-# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
-# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
-# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
-CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
@@ -1360,6 +1393,16 @@
 # CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
+CONFIG_YAFFS_FS=m
+CONFIG_YAFFS_YAFFS1=y
+# CONFIG_YAFFS_9BYTE_TAGS is not set
+# CONFIG_YAFFS_DOES_ECC is not set
+CONFIG_YAFFS_YAFFS2=y
+CONFIG_YAFFS_AUTO_YAFFS2=y
+# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
+# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
+# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1396,7 +1439,7 @@
 CONFIG_MSDOS_PARTITION=y
 CONFIG_NLS=m
 CONFIG_NLS_DEFAULT="iso8859-1"
-# CONFIG_NLS_CODEPAGE_437 is not set
+CONFIG_NLS_CODEPAGE_437=m
 # CONFIG_NLS_CODEPAGE_737 is not set
 # CONFIG_NLS_CODEPAGE_775 is not set
 # CONFIG_NLS_CODEPAGE_850 is not set
@@ -1411,7 +1454,7 @@
 # CONFIG_NLS_CODEPAGE_865 is not set
 # CONFIG_NLS_CODEPAGE_866 is not set
 # CONFIG_NLS_CODEPAGE_869 is not set
-# CONFIG_NLS_CODEPAGE_936 is not set
+CONFIG_NLS_CODEPAGE_936=m
 # CONFIG_NLS_CODEPAGE_950 is not set
 # CONFIG_NLS_CODEPAGE_932 is not set
 # CONFIG_NLS_CODEPAGE_949 is not set
@@ -1420,7 +1463,7 @@
 # CONFIG_NLS_CODEPAGE_1250 is not set
 # CONFIG_NLS_CODEPAGE_1251 is not set
 # CONFIG_NLS_ASCII is not set
-# CONFIG_NLS_ISO8859_1 is not set
+CONFIG_NLS_ISO8859_1=m
 # CONFIG_NLS_ISO8859_2 is not set
 # CONFIG_NLS_ISO8859_3 is not set
 # CONFIG_NLS_ISO8859_4 is not set
@@ -1433,7 +1476,7 @@
 # CONFIG_NLS_ISO8859_15 is not set
 # CONFIG_NLS_KOI8_R is not set
 # CONFIG_NLS_KOI8_U is not set
-# CONFIG_NLS_UTF8 is not set
+CONFIG_NLS_UTF8=m
 # CONFIG_DLM is not set
 
 #
@@ -1448,7 +1491,7 @@
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1457,8 +1500,6 @@
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1478,7 +1519,6 @@
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
 
 #
 # Tracers
@@ -1494,16 +1534,20 @@
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1611,13 +1655,12 @@
 CONFIG_CRC_CCITT=m
 # CONFIG_CRC16 is not set
 # CONFIG_CRC_T10DIF is not set
-# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC_ITU_T=m
 CONFIG_CRC32=y
 # CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BF533-EZKIT_defconfig b/arch/blackfin/configs/BF533-EZKIT_defconfig
index 4c41e03..5aa63ba 100644
--- a/arch/blackfin/configs/BF533-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF533-EZKIT_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -225,7 +225,10 @@
 CONFIG_SCHED_HRTICK=y
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 CONFIG_TICK_ONESHOT=y
 # CONFIG_NO_HZ is not set
 CONFIG_HIGH_RES_TIMERS=y
@@ -382,7 +385,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -613,9 +616,8 @@
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -667,13 +669,13 @@
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-# CONFIG_BF5xx_PPI is not set
+# CONFIG_BFIN_SPI_ADC is not set
 CONFIG_BFIN_SPORT=y
 # CONFIG_BFIN_TIMER_LATENCY is not set
-CONFIG_BFIN_DMA_INTERFACE=m
 CONFIG_SIMPLE_GPIO=m
 # CONFIG_VT is not set
 # CONFIG_DEVKMEM is not set
@@ -729,7 +731,30 @@
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
@@ -904,16 +929,6 @@
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_YAFFS_FS=m
-CONFIG_YAFFS_YAFFS1=y
-# CONFIG_YAFFS_9BYTE_TAGS is not set
-# CONFIG_YAFFS_DOES_ECC is not set
-CONFIG_YAFFS_YAFFS2=y
-CONFIG_YAFFS_AUTO_YAFFS2=y
-# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
-# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
-# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
-CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
@@ -925,6 +940,16 @@
 # CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
+CONFIG_YAFFS_FS=m
+CONFIG_YAFFS_YAFFS1=y
+# CONFIG_YAFFS_9BYTE_TAGS is not set
+# CONFIG_YAFFS_DOES_ECC is not set
+CONFIG_YAFFS_YAFFS2=y
+CONFIG_YAFFS_AUTO_YAFFS2=y
+# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
+# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
+# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1013,7 +1038,7 @@
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1022,8 +1047,6 @@
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1043,7 +1066,6 @@
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
 
 #
 # Tracers
@@ -1059,16 +1081,20 @@
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1181,7 +1207,6 @@
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BF533-STAMP_defconfig b/arch/blackfin/configs/BF533-STAMP_defconfig
index 9c482cd..fed2532 100644
--- a/arch/blackfin/configs/BF533-STAMP_defconfig
+++ b/arch/blackfin/configs/BF533-STAMP_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -225,7 +225,10 @@
 CONFIG_SCHED_HRTICK=y
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 CONFIG_TICK_ONESHOT=y
 # CONFIG_NO_HZ is not set
 CONFIG_HIGH_RES_TIMERS=y
@@ -382,7 +385,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -618,9 +621,8 @@
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -674,14 +676,14 @@
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-CONFIG_BF5xx_PPI=m
+# CONFIG_BFIN_SPI_ADC is not set
 CONFIG_BFIN_SPORT=m
 # CONFIG_BFIN_TIMER_LATENCY is not set
-# CONFIG_TWI_LCD is not set
-CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_TWI_LCD is not set
 CONFIG_SIMPLE_GPIO=m
 # CONFIG_VT is not set
 # CONFIG_DEVKMEM is not set
@@ -781,7 +783,30 @@
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
@@ -1068,16 +1093,6 @@
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_YAFFS_FS=m
-CONFIG_YAFFS_YAFFS1=y
-# CONFIG_YAFFS_9BYTE_TAGS is not set
-# CONFIG_YAFFS_DOES_ECC is not set
-CONFIG_YAFFS_YAFFS2=y
-CONFIG_YAFFS_AUTO_YAFFS2=y
-# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
-# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
-# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
-CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
@@ -1089,6 +1104,16 @@
 # CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
+CONFIG_YAFFS_FS=m
+CONFIG_YAFFS_YAFFS1=y
+# CONFIG_YAFFS_9BYTE_TAGS is not set
+# CONFIG_YAFFS_DOES_ECC is not set
+CONFIG_YAFFS_YAFFS2=y
+CONFIG_YAFFS_AUTO_YAFFS2=y
+# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
+# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
+# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1177,7 +1202,7 @@
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1186,8 +1211,6 @@
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1207,7 +1230,6 @@
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
 
 #
 # Tracers
@@ -1223,16 +1245,20 @@
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1345,7 +1371,6 @@
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BF537-STAMP_defconfig b/arch/blackfin/configs/BF537-STAMP_defconfig
index 591f6ed..f9ac20d 100644
--- a/arch/blackfin/configs/BF537-STAMP_defconfig
+++ b/arch/blackfin/configs/BF537-STAMP_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -232,7 +232,10 @@
 CONFIG_SCHED_HRTICK=y
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 CONFIG_TICK_ONESHOT=y
 # CONFIG_NO_HZ is not set
 CONFIG_HIGH_RES_TIMERS=y
@@ -390,7 +393,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -548,9 +551,7 @@
 #
 # CONFIG_MTD_COMPLEX_MAPPINGS is not set
 CONFIG_MTD_PHYSMAP=m
-CONFIG_MTD_PHYSMAP_START=0x20000000
-CONFIG_MTD_PHYSMAP_LEN=0x0
-CONFIG_MTD_PHYSMAP_BANKWIDTH=2
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
 # CONFIG_MTD_UCLINUX is not set
 # CONFIG_MTD_PLATRAM is not set
 
@@ -649,9 +650,8 @@
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -708,14 +708,14 @@
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-CONFIG_BF5xx_PPI=m
+# CONFIG_BFIN_SPI_ADC is not set
 CONFIG_BFIN_SPORT=m
 # CONFIG_BFIN_TIMER_LATENCY is not set
-# CONFIG_TWI_LCD is not set
-CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_TWI_LCD is not set
 CONFIG_SIMPLE_GPIO=m
 # CONFIG_VT is not set
 # CONFIG_DEVKMEM is not set
@@ -823,7 +823,30 @@
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
@@ -1123,16 +1146,6 @@
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_YAFFS_FS=m
-CONFIG_YAFFS_YAFFS1=y
-# CONFIG_YAFFS_9BYTE_TAGS is not set
-# CONFIG_YAFFS_DOES_ECC is not set
-CONFIG_YAFFS_YAFFS2=y
-CONFIG_YAFFS_AUTO_YAFFS2=y
-# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
-# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
-# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
-CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
@@ -1144,6 +1157,16 @@
 # CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
+CONFIG_YAFFS_FS=m
+CONFIG_YAFFS_YAFFS1=y
+# CONFIG_YAFFS_9BYTE_TAGS is not set
+# CONFIG_YAFFS_DOES_ECC is not set
+CONFIG_YAFFS_YAFFS2=y
+CONFIG_YAFFS_AUTO_YAFFS2=y
+# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
+# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
+# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1232,7 +1255,7 @@
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1241,8 +1264,6 @@
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1262,7 +1283,6 @@
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
 
 #
 # Tracers
@@ -1278,16 +1298,20 @@
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1400,7 +1424,6 @@
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BF538-EZKIT_defconfig b/arch/blackfin/configs/BF538-EZKIT_defconfig
index 1a8e8c3..ee98e22 100644
--- a/arch/blackfin/configs/BF538-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF538-EZKIT_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -243,7 +243,10 @@
 CONFIG_SCHED_HRTICK=y
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 CONFIG_TICK_ONESHOT=y
 # CONFIG_NO_HZ is not set
 CONFIG_HIGH_RES_TIMERS=y
@@ -389,7 +392,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -546,9 +549,7 @@
 #
 # CONFIG_MTD_COMPLEX_MAPPINGS is not set
 CONFIG_MTD_PHYSMAP=m
-CONFIG_MTD_PHYSMAP_START=0x20000000
-CONFIG_MTD_PHYSMAP_LEN=0x0
-CONFIG_MTD_PHYSMAP_BANKWIDTH=2
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
 # CONFIG_MTD_UCLINUX is not set
 # CONFIG_MTD_PLATRAM is not set
 
@@ -691,11 +692,11 @@
 # CONFIG_INPUT_JOYSTICK is not set
 # CONFIG_INPUT_TABLET is not set
 CONFIG_INPUT_TOUCHSCREEN=y
-# CONFIG_TOUCHSCREEN_ADS7846 is not set
 # CONFIG_TOUCHSCREEN_AD7877 is not set
 # CONFIG_TOUCHSCREEN_AD7879_I2C is not set
 CONFIG_TOUCHSCREEN_AD7879_SPI=y
 CONFIG_TOUCHSCREEN_AD7879=y
+# CONFIG_TOUCHSCREEN_ADS7846 is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
@@ -720,14 +721,14 @@
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-CONFIG_BF5xx_PPI=m
+# CONFIG_BFIN_SPI_ADC is not set
 CONFIG_BFIN_SPORT=m
 # CONFIG_BFIN_TIMER_LATENCY is not set
-# CONFIG_TWI_LCD is not set
-CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_TWI_LCD is not set
 CONFIG_SIMPLE_GPIO=m
 # CONFIG_VT is not set
 # CONFIG_DEVKMEM is not set
@@ -833,7 +834,30 @@
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
@@ -1056,16 +1080,6 @@
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_YAFFS_FS=m
-CONFIG_YAFFS_YAFFS1=y
-# CONFIG_YAFFS_9BYTE_TAGS is not set
-# CONFIG_YAFFS_DOES_ECC is not set
-CONFIG_YAFFS_YAFFS2=y
-CONFIG_YAFFS_AUTO_YAFFS2=y
-# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
-# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
-# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
-CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
@@ -1077,6 +1091,16 @@
 # CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
+CONFIG_YAFFS_FS=m
+CONFIG_YAFFS_YAFFS1=y
+# CONFIG_YAFFS_9BYTE_TAGS is not set
+# CONFIG_YAFFS_DOES_ECC is not set
+CONFIG_YAFFS_YAFFS2=y
+CONFIG_YAFFS_AUTO_YAFFS2=y
+# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
+# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
+# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1165,7 +1189,7 @@
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1174,8 +1198,6 @@
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1195,7 +1217,6 @@
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 
 #
 # Tracers
@@ -1211,16 +1232,20 @@
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1333,7 +1358,6 @@
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BF548-EZKIT_defconfig b/arch/blackfin/configs/BF548-EZKIT_defconfig
index 2cd1c2b..deeabef 100644
--- a/arch/blackfin/configs/BF548-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF548-EZKIT_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -196,6 +196,7 @@
 # BF548 Specific Configuration
 #
 # CONFIG_DEB_DMA_URGENT is not set
+# CONFIG_BF548_ATAPI_ALTERNATIVE_PORT is not set
 
 #
 # Interrupt Priority Assignment
@@ -298,7 +299,10 @@
 # CONFIG_SCHED_HRTICK is not set
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 # CONFIG_NO_HZ is not set
 # CONFIG_HIGH_RES_TIMERS is not set
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
@@ -367,7 +371,9 @@
 # CONFIG_BFIN_ICACHE_LOCK is not set
 CONFIG_BFIN_WB=y
 # CONFIG_BFIN_WT is not set
-# CONFIG_BFIN_L2_CACHEABLE is not set
+# CONFIG_BFIN_L2_WB is not set
+CONFIG_BFIN_L2_WT=y
+# CONFIG_BFIN_L2_NOT_CACHED is not set
 # CONFIG_MPU is not set
 
 #
@@ -447,7 +453,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -616,9 +622,7 @@
 #
 CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_PHYSMAP=y
-CONFIG_MTD_PHYSMAP_START=0x20000000
-CONFIG_MTD_PHYSMAP_LEN=0
-CONFIG_MTD_PHYSMAP_BANKWIDTH=2
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
 # CONFIG_MTD_GPIO_ADDR is not set
 # CONFIG_MTD_UCLINUX is not set
 # CONFIG_MTD_PLATRAM is not set
@@ -696,7 +700,7 @@
 CONFIG_BLK_DEV_SD=y
 # CONFIG_CHR_DEV_ST is not set
 # CONFIG_CHR_DEV_OSST is not set
-CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR=m
 # CONFIG_BLK_DEV_SR_VENDOR is not set
 # CONFIG_CHR_DEV_SG is not set
 # CONFIG_CHR_DEV_SCH is not set
@@ -718,9 +722,7 @@
 # CONFIG_SCSI_ISCSI_ATTRS is not set
 # CONFIG_SCSI_SAS_LIBSAS is not set
 # CONFIG_SCSI_SRP_ATTRS is not set
-CONFIG_SCSI_LOWLEVEL=y
-# CONFIG_ISCSI_TCP is not set
-# CONFIG_SCSI_DEBUG is not set
+# CONFIG_SCSI_LOWLEVEL is not set
 # CONFIG_SCSI_DH is not set
 CONFIG_ATA=y
 # CONFIG_ATA_NONSTANDARD is not set
@@ -752,9 +754,8 @@
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -821,11 +822,11 @@
 # CONFIG_INPUT_JOYSTICK is not set
 # CONFIG_INPUT_TABLET is not set
 CONFIG_INPUT_TOUCHSCREEN=y
-# CONFIG_TOUCHSCREEN_ADS7846 is not set
 CONFIG_TOUCHSCREEN_AD7877=m
 # CONFIG_TOUCHSCREEN_AD7879_I2C is not set
 # CONFIG_TOUCHSCREEN_AD7879_SPI is not set
 # CONFIG_TOUCHSCREEN_AD7879 is not set
+# CONFIG_TOUCHSCREEN_ADS7846 is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
@@ -858,14 +859,14 @@
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-CONFIG_BF5xx_PPI=m
+# CONFIG_BFIN_SPI_ADC is not set
 CONFIG_BFIN_SPORT=m
 # CONFIG_BFIN_TIMER_LATENCY is not set
-# CONFIG_TWI_LCD is not set
-CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_TWI_LCD is not set
 CONFIG_SIMPLE_GPIO=m
 CONFIG_VT=y
 CONFIG_CONSOLE_TRANSLATIONS=y
@@ -977,7 +978,30 @@
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
@@ -1011,6 +1035,7 @@
 # CONFIG_HTC_PASIC3 is not set
 # CONFIG_MFD_TMIO is not set
 # CONFIG_PMIC_DA903X is not set
+# CONFIG_PMIC_ADP5520 is not set
 # CONFIG_MFD_WM8400 is not set
 # CONFIG_MFD_WM8350_I2C is not set
 # CONFIG_REGULATOR is not set
@@ -1193,8 +1218,8 @@
 #
 # Miscellaneous USB options
 #
-# CONFIG_USB_DEVICEFS is not set
-CONFIG_USB_DEVICE_CLASS=y
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_DEVICE_CLASS is not set
 # CONFIG_USB_DYNAMIC_MINORS is not set
 # CONFIG_USB_OTG is not set
 # CONFIG_USB_OTG_WHITELIST is not set
@@ -1222,10 +1247,10 @@
 CONFIG_USB_MUSB_HOST=y
 # CONFIG_USB_MUSB_PERIPHERAL is not set
 # CONFIG_USB_MUSB_OTG is not set
-# CONFIG_USB_GADGET_MUSB_HDRC is not set
 CONFIG_USB_MUSB_HDRC_HCD=y
-CONFIG_MUSB_PIO_ONLY=y
-CONFIG_MUSB_DMA_POLL=y
+# CONFIG_MUSB_PIO_ONLY is not set
+CONFIG_USB_INVENTRA_DMA=y
+# CONFIG_USB_TI_CPPI_DMA is not set
 # CONFIG_USB_MUSB_DEBUG is not set
 
 #
@@ -1243,7 +1268,7 @@
 #
 # see USB_STORAGE Help for more information
 #
-CONFIG_USB_STORAGE=m
+CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
 # CONFIG_USB_STORAGE_DATAFAB is not set
 # CONFIG_USB_STORAGE_FREECOM is not set
@@ -1292,33 +1317,10 @@
 # CONFIG_USB_LD is not set
 # CONFIG_USB_TRANCEVIBRATOR is not set
 # CONFIG_USB_IOWARRIOR is not set
+# CONFIG_USB_TEST is not set
 # CONFIG_USB_ISIGHTFW is not set
 # CONFIG_USB_VST is not set
 # CONFIG_USB_GADGET is not set
-# CONFIG_USB_GADGET_AT91 is not set
-# CONFIG_USB_GADGET_ATMEL_USBA is not set
-# CONFIG_USB_GADGET_FSL_USB2 is not set
-# CONFIG_USB_GADGET_LH7A40X is not set
-# CONFIG_USB_GADGET_OMAP is not set
-# CONFIG_USB_GADGET_PXA25X is not set
-# CONFIG_USB_GADGET_PXA27X is not set
-# CONFIG_USB_GADGET_S3C2410 is not set
-# CONFIG_USB_GADGET_M66592 is not set
-# CONFIG_USB_GADGET_AMD5536UDC is not set
-# CONFIG_USB_GADGET_FSL_QE is not set
-# CONFIG_USB_GADGET_NET2272 is not set
-# CONFIG_USB_GADGET_NET2280 is not set
-# CONFIG_USB_GADGET_GOKU is not set
-# CONFIG_USB_GADGET_DUMMY_HCD is not set
-# CONFIG_USB_ZERO is not set
-# CONFIG_USB_AUDIO is not set
-# CONFIG_USB_ETH is not set
-# CONFIG_USB_GADGETFS is not set
-# CONFIG_USB_FILE_STORAGE is not set
-# CONFIG_USB_G_SERIAL is not set
-# CONFIG_USB_MIDI_GADGET is not set
-# CONFIG_USB_G_PRINTER is not set
-# CONFIG_USB_CDC_COMPOSITE is not set
 CONFIG_MMC=y
 # CONFIG_MMC_DEBUG is not set
 # CONFIG_MMC_UNSAFE_RESUME is not set
@@ -1414,13 +1416,8 @@
 CONFIG_EXT2_FS_XATTR=y
 # CONFIG_EXT2_FS_POSIX_ACL is not set
 # CONFIG_EXT2_FS_SECURITY is not set
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_XATTR=y
-# CONFIG_EXT3_FS_POSIX_ACL is not set
-# CONFIG_EXT3_FS_SECURITY is not set
+# CONFIG_EXT3_FS is not set
 # CONFIG_EXT4_FS is not set
-CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
 CONFIG_FS_MBCACHE=y
 # CONFIG_REISERFS_FS is not set
 # CONFIG_JFS_FS is not set
@@ -1476,16 +1473,6 @@
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_YAFFS_FS=m
-CONFIG_YAFFS_YAFFS1=y
-# CONFIG_YAFFS_9BYTE_TAGS is not set
-# CONFIG_YAFFS_DOES_ECC is not set
-CONFIG_YAFFS_YAFFS2=y
-CONFIG_YAFFS_AUTO_YAFFS2=y
-# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
-# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
-# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
-CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
@@ -1497,6 +1484,16 @@
 # CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
+CONFIG_YAFFS_FS=m
+CONFIG_YAFFS_YAFFS1=y
+# CONFIG_YAFFS_9BYTE_TAGS is not set
+# CONFIG_YAFFS_DOES_ECC is not set
+CONFIG_YAFFS_YAFFS2=y
+CONFIG_YAFFS_AUTO_YAFFS2=y
+# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
+# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
+# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1539,63 +1536,47 @@
 #
 # Partition Types
 #
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-# CONFIG_MAC_PARTITION is not set
+# CONFIG_PARTITION_ADVANCED is not set
 CONFIG_MSDOS_PARTITION=y
-# CONFIG_BSD_DISKLABEL is not set
-# CONFIG_MINIX_SUBPARTITION is not set
-# CONFIG_SOLARIS_X86_PARTITION is not set
-# CONFIG_UNIXWARE_DISKLABEL is not set
-# CONFIG_LDM_PARTITION is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-# CONFIG_KARMA_PARTITION is not set
-# CONFIG_EFI_PARTITION is not set
-# CONFIG_SYSV68_PARTITION is not set
 CONFIG_NLS=y
 CONFIG_NLS_DEFAULT="iso8859-1"
 CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
 CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
 CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
 CONFIG_NLS_UTF8=m
 # CONFIG_DLM is not set
 
@@ -1611,7 +1592,7 @@
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1620,8 +1601,6 @@
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1641,7 +1620,6 @@
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
 
 #
 # Tracers
@@ -1657,16 +1635,20 @@
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1780,7 +1762,6 @@
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BF561-EZKIT_defconfig b/arch/blackfin/configs/BF561-EZKIT_defconfig
index 4a6ea8e..dcfbe2e 100644
--- a/arch/blackfin/configs/BF561-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF561-EZKIT_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -148,9 +148,9 @@
 # CONFIG_BF_REV_0_0 is not set
 # CONFIG_BF_REV_0_1 is not set
 # CONFIG_BF_REV_0_2 is not set
-CONFIG_BF_REV_0_3=y
+# CONFIG_BF_REV_0_3 is not set
 # CONFIG_BF_REV_0_4 is not set
-# CONFIG_BF_REV_0_5 is not set
+CONFIG_BF_REV_0_5=y
 # CONFIG_BF_REV_0_6 is not set
 # CONFIG_BF_REV_ANY is not set
 # CONFIG_BF_REV_NONE is not set
@@ -179,7 +179,6 @@
 # Core B Support
 #
 CONFIG_BF561_COREB=y
-CONFIG_BF561_COREB_RESET=y
 
 #
 # Interrupt Priority Assignment
@@ -264,7 +263,10 @@
 CONFIG_SCHED_HRTICK=y
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 CONFIG_TICK_ONESHOT=y
 # CONFIG_NO_HZ is not set
 CONFIG_HIGH_RES_TIMERS=y
@@ -334,7 +336,9 @@
 # CONFIG_BFIN_ICACHE_LOCK is not set
 CONFIG_BFIN_WB=y
 # CONFIG_BFIN_WT is not set
-# CONFIG_BFIN_L2_CACHEABLE is not set
+# CONFIG_BFIN_L2_WB is not set
+CONFIG_BFIN_L2_WT=y
+# CONFIG_BFIN_L2_NOT_CACHED is not set
 # CONFIG_MPU is not set
 
 #
@@ -415,7 +419,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -570,9 +574,7 @@
 #
 # CONFIG_MTD_COMPLEX_MAPPINGS is not set
 CONFIG_MTD_PHYSMAP=m
-CONFIG_MTD_PHYSMAP_START=0x20000000
-CONFIG_MTD_PHYSMAP_LEN=0x0
-CONFIG_MTD_PHYSMAP_BANKWIDTH=2
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
 # CONFIG_MTD_UCLINUX is not set
 # CONFIG_MTD_PLATRAM is not set
 
@@ -649,9 +651,8 @@
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -703,13 +704,13 @@
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-# CONFIG_BF5xx_PPI is not set
+# CONFIG_BFIN_SPI_ADC is not set
 # CONFIG_BFIN_SPORT is not set
 # CONFIG_BFIN_TIMER_LATENCY is not set
-CONFIG_BFIN_DMA_INTERFACE=m
 CONFIG_SIMPLE_GPIO=m
 # CONFIG_VT is not set
 # CONFIG_DEVKMEM is not set
@@ -765,7 +766,30 @@
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
@@ -897,16 +921,6 @@
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_YAFFS_FS=m
-CONFIG_YAFFS_YAFFS1=y
-# CONFIG_YAFFS_9BYTE_TAGS is not set
-# CONFIG_YAFFS_DOES_ECC is not set
-CONFIG_YAFFS_YAFFS2=y
-CONFIG_YAFFS_AUTO_YAFFS2=y
-# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
-# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
-# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
-CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
@@ -918,6 +932,16 @@
 # CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
+CONFIG_YAFFS_FS=m
+CONFIG_YAFFS_YAFFS1=y
+# CONFIG_YAFFS_9BYTE_TAGS is not set
+# CONFIG_YAFFS_DOES_ECC is not set
+CONFIG_YAFFS_YAFFS2=y
+CONFIG_YAFFS_AUTO_YAFFS2=y
+# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
+# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
+# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1006,7 +1030,7 @@
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1015,8 +1039,6 @@
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1036,7 +1058,6 @@
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
 
 #
 # Tracers
@@ -1052,16 +1073,20 @@
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1174,7 +1199,6 @@
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BlackStamp_defconfig b/arch/blackfin/configs/BlackStamp_defconfig
index ef1a2c8..174c578 100644
--- a/arch/blackfin/configs/BlackStamp_defconfig
+++ b/arch/blackfin/configs/BlackStamp_defconfig
@@ -46,7 +46,7 @@
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -381,7 +381,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
diff --git a/arch/blackfin/configs/CM-BF527_defconfig b/arch/blackfin/configs/CM-BF527_defconfig
index e2fc588..e17875e 100644
--- a/arch/blackfin/configs/CM-BF527_defconfig
+++ b/arch/blackfin/configs/CM-BF527_defconfig
@@ -46,7 +46,7 @@
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -411,7 +411,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -783,7 +783,30 @@
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 CONFIG_HWMON=y
diff --git a/arch/blackfin/configs/CM-BF533_defconfig b/arch/blackfin/configs/CM-BF533_defconfig
index 65a8bbb..fafd95e 100644
--- a/arch/blackfin/configs/CM-BF533_defconfig
+++ b/arch/blackfin/configs/CM-BF533_defconfig
@@ -49,7 +49,7 @@
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 # CONFIG_UID16 is not set
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 # CONFIG_HOTPLUG is not set
@@ -347,7 +347,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -547,9 +547,9 @@
 CONFIG_SMC91X=y
 # CONFIG_SMSC911X is not set
 # CONFIG_DM9000 is not set
-CONFIG_NETDEV_1000=y
+# CONFIG_NETDEV_1000 is not set
 # CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -641,6 +641,10 @@
 # CONFIG_TCG_TPM is not set
 # CONFIG_I2C is not set
 
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+
 #
 # SPI support
 #
diff --git a/arch/blackfin/configs/CM-BF537E_defconfig b/arch/blackfin/configs/CM-BF537E_defconfig
index 9b7e9d78..e73aa5a 100644
--- a/arch/blackfin/configs/CM-BF537E_defconfig
+++ b/arch/blackfin/configs/CM-BF537E_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.22.16
+# Linux kernel version: 2.6.28.10
+# Wed Jun  3 06:27:41 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -8,48 +9,44 @@
 # CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
 CONFIG_BLACKFIN=y
 CONFIG_ZONE_DMA=y
-CONFIG_SEMAPHORE_SLEEPERS=y
 CONFIG_GENERIC_FIND_NEXT_BIT=y
 CONFIG_GENERIC_HWEIGHT=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_GPIO=y
 CONFIG_FORCE_MAX_ZONEORDER=14
 CONFIG_GENERIC_CALIBRATE_DELAY=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
 #
-# Code maturity level options
+# General setup
 #
 CONFIG_EXPERIMENTAL=y
 CONFIG_BROKEN_ON_SMP=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
 CONFIG_SYSVIPC=y
-# CONFIG_IPC_NS is not set
 CONFIG_SYSVIPC_SYSCTL=y
 # CONFIG_POSIX_MQUEUE is not set
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
-# CONFIG_UTS_NS is not set
 # CONFIG_AUDIT is not set
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSFS_DEPRECATED is not set
+# CONFIG_CGROUPS is not set
+# CONFIG_GROUP_SCHED is not set
+# CONFIG_SYSFS_DEPRECATED_V2 is not set
 # CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
 # CONFIG_BLK_DEV_INITRD is not set
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 # CONFIG_UID16 is not set
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 # CONFIG_HOTPLUG is not set
@@ -58,37 +55,36 @@
 # CONFIG_ELF_CORE is not set
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
+# CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_BIG_ORDER_ALLOC_NOFAIL_MAGIC=3
-# CONFIG_NP2 is not set
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
-CONFIG_RT_MUTEXES=y
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
+CONFIG_SLABINFO=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
-
-#
-# Loadable module support
-#
 CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_FORCE_UNLOAD is not set
 # CONFIG_MODVERSIONS is not set
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_KMOD=y
-
-#
-# Block layer
-#
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
 # CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
 
 #
 # IO Schedulers
@@ -102,9 +98,11 @@
 # CONFIG_DEFAULT_CFQ is not set
 CONFIG_DEFAULT_NOOP=y
 CONFIG_DEFAULT_IOSCHED="noop"
+CONFIG_CLASSIC_RCU=y
 CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
+# CONFIG_FREEZER is not set
 
 #
 # Blackfin Processor Options
@@ -113,6 +111,10 @@
 #
 # Processor and Board Settings
 #
+# CONFIG_BF512 is not set
+# CONFIG_BF514 is not set
+# CONFIG_BF516 is not set
+# CONFIG_BF518 is not set
 # CONFIG_BF522 is not set
 # CONFIG_BF523 is not set
 # CONFIG_BF524 is not set
@@ -125,22 +127,31 @@
 # CONFIG_BF534 is not set
 # CONFIG_BF536 is not set
 CONFIG_BF537=y
+# CONFIG_BF538 is not set
+# CONFIG_BF539 is not set
 # CONFIG_BF542 is not set
+# CONFIG_BF542M is not set
 # CONFIG_BF544 is not set
+# CONFIG_BF544M is not set
 # CONFIG_BF547 is not set
+# CONFIG_BF547M is not set
 # CONFIG_BF548 is not set
+# CONFIG_BF548M is not set
 # CONFIG_BF549 is not set
+# CONFIG_BF549M is not set
 # CONFIG_BF561 is not set
+CONFIG_BF_REV_MIN=2
+CONFIG_BF_REV_MAX=3
 # CONFIG_BF_REV_0_0 is not set
 # CONFIG_BF_REV_0_1 is not set
 CONFIG_BF_REV_0_2=y
 # CONFIG_BF_REV_0_3 is not set
 # CONFIG_BF_REV_0_4 is not set
 # CONFIG_BF_REV_0_5 is not set
+# CONFIG_BF_REV_0_6 is not set
 # CONFIG_BF_REV_ANY is not set
 # CONFIG_BF_REV_NONE is not set
 CONFIG_BF53x=y
-CONFIG_BFIN_SINGLE_CORE=y
 CONFIG_MEM_MT48LC16M16A2TG_75=y
 CONFIG_IRQ_PLL_WAKEUP=7
 CONFIG_IRQ_RTC=8
@@ -150,7 +161,6 @@
 CONFIG_IRQ_SPORT1_RX=9
 CONFIG_IRQ_SPORT1_TX=9
 CONFIG_IRQ_TWI=10
-CONFIG_IRQ_SPI=10
 CONFIG_IRQ_UART0_RX=10
 CONFIG_IRQ_UART0_TX=10
 CONFIG_IRQ_UART1_RX=10
@@ -169,11 +179,12 @@
 CONFIG_IRQ_MEM_DMA0=13
 CONFIG_IRQ_MEM_DMA1=13
 CONFIG_IRQ_WATCH=13
+CONFIG_IRQ_SPI=10
 # CONFIG_BFIN537_STAMP is not set
 CONFIG_BFIN537_BLUETECHNIX_CM=y
+# CONFIG_BFIN537_BLUETECHNIX_TCM is not set
 # CONFIG_PNAV10 is not set
 # CONFIG_CAMSIG_MINOTAUR is not set
-# CONFIG_GENERIC_BF537_BOARD is not set
 
 #
 # BF537 Specific Configuration
@@ -196,6 +207,7 @@
 # Board customizations
 #
 # CONFIG_CMDLINE_BOOL is not set
+CONFIG_BOOT_LOAD=0x1000
 
 #
 # Clock/PLL Setup
@@ -215,13 +227,20 @@
 # CONFIG_HZ_300 is not set
 # CONFIG_HZ_1000 is not set
 CONFIG_HZ=250
+# CONFIG_SCHED_HRTICK is not set
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
+# CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
 
 #
-# Memory Setup
+# Misc
 #
-CONFIG_MAX_MEM_SIZE=32
-CONFIG_MEM_ADD_WIDTH=9
-CONFIG_BOOT_LOAD=0x1000
 CONFIG_BFIN_SCRATCH_REG_RETN=y
 # CONFIG_BFIN_SCRATCH_REG_RETE is not set
 # CONFIG_BFIN_SCRATCH_REG_CYCLES is not set
@@ -248,6 +267,12 @@
 CONFIG_CACHELINE_ALIGNED_L1=y
 CONFIG_SYSCALL_TAB_L1=y
 CONFIG_CPLB_SWITCH_TAB_L1=y
+CONFIG_APP_STACK_L1=y
+
+#
+# Speed Optimizations
+#
+CONFIG_BFIN_INS_LOWOVERHEAD=y
 CONFIG_RAMKERNEL=y
 # CONFIG_ROMKERNEL is not set
 CONFIG_SELECT_MEMORY_MODEL=y
@@ -256,12 +281,14 @@
 # CONFIG_SPARSEMEM_MANUAL is not set
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
-# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_PAGEFLAGS_EXTENDED=y
 CONFIG_SPLIT_PTLOCK_CPUS=4
 # CONFIG_RESOURCES_64BIT is not set
+# CONFIG_PHYS_ADDR_T_64BIT is not set
 CONFIG_ZONE_DMA_FLAG=1
-CONFIG_LARGE_ALLOCS=y
+CONFIG_VIRT_TO_BUS=y
 # CONFIG_BFIN_GPTIMERS is not set
+# CONFIG_DMA_UNCACHED_4M is not set
 # CONFIG_DMA_UNCACHED_2M is not set
 CONFIG_DMA_UNCACHED_1M=y
 # CONFIG_DMA_UNCACHED_NONE is not set
@@ -275,7 +302,6 @@
 # CONFIG_BFIN_ICACHE_LOCK is not set
 CONFIG_BFIN_WB=y
 # CONFIG_BFIN_WT is not set
-CONFIG_L1_MAX_PIECE=16
 # CONFIG_MPU is not set
 
 #
@@ -304,36 +330,28 @@
 #
 # Bus options (PCI, PCMCIA, EISA, MCA, ISA)
 #
-# CONFIG_PCI is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 
 #
-# PCCARD (PCMCIA/CardBus) support
-#
-
-#
 # Executable file formats
 #
 CONFIG_BINFMT_ELF_FDPIC=y
 CONFIG_BINFMT_FLAT=y
 CONFIG_BINFMT_ZFLAT=y
 CONFIG_BINFMT_SHARED_FLAT=y
+# CONFIG_HAVE_AOUT is not set
 # CONFIG_BINFMT_MISC is not set
 
 #
 # Power management options
 #
 # CONFIG_PM is not set
-# CONFIG_PM_WAKEUP_BY_GPIO is not set
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
 
 #
 # CPU Frequency scaling
 #
 # CONFIG_CPU_FREQ is not set
-
-#
-# Networking
-#
 CONFIG_NET=y
 
 #
@@ -346,6 +364,7 @@
 # CONFIG_XFRM_USER is not set
 # CONFIG_XFRM_SUB_POLICY is not set
 # CONFIG_XFRM_MIGRATE is not set
+# CONFIG_XFRM_STATISTICS is not set
 # CONFIG_NET_KEY is not set
 CONFIG_INET=y
 # CONFIG_IP_MULTICAST is not set
@@ -358,7 +377,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -367,6 +386,7 @@
 CONFIG_INET_XFRM_MODE_TRANSPORT=y
 CONFIG_INET_XFRM_MODE_TUNNEL=y
 CONFIG_INET_XFRM_MODE_BEET=y
+# CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=y
 CONFIG_INET_TCP_DIAG=y
 # CONFIG_TCP_CONG_ADVANCED is not set
@@ -374,8 +394,6 @@
 CONFIG_DEFAULT_TCP_CONG="cubic"
 # CONFIG_TCP_MD5SIG is not set
 # CONFIG_IPV6 is not set
-# CONFIG_INET6_XFRM_TUNNEL is not set
-# CONFIG_INET6_TUNNEL is not set
 # CONFIG_NETLABEL is not set
 # CONFIG_NETWORK_SECMARK is not set
 # CONFIG_NETFILTER is not set
@@ -384,6 +402,7 @@
 # CONFIG_TIPC is not set
 # CONFIG_ATM is not set
 # CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
 # CONFIG_VLAN_8021Q is not set
 # CONFIG_DECNET is not set
 # CONFIG_LLC2 is not set
@@ -393,10 +412,6 @@
 # CONFIG_LAPB is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
 # CONFIG_NET_SCHED is not set
 
 #
@@ -404,18 +419,14 @@
 #
 # CONFIG_NET_PKTGEN is not set
 # CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
 # CONFIG_AF_RXRPC is not set
-
-#
-# Wireless
-#
-# CONFIG_CFG80211 is not set
-# CONFIG_WIRELESS_EXT is not set
-# CONFIG_MAC80211 is not set
-# CONFIG_IEEE80211 is not set
+# CONFIG_PHONET is not set
+# CONFIG_WIRELESS is not set
 # CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
 
 #
 # Device Drivers
@@ -427,10 +438,6 @@
 CONFIG_STANDALONE=y
 CONFIG_PREVENT_FIRMWARE_BUILD=y
 # CONFIG_SYS_HYPERVISOR is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
 # CONFIG_CONNECTOR is not set
 CONFIG_MTD=y
 # CONFIG_MTD_DEBUG is not set
@@ -438,6 +445,7 @@
 CONFIG_MTD_PARTITIONS=y
 # CONFIG_MTD_REDBOOT_PARTS is not set
 # CONFIG_MTD_CMDLINE_PARTS is not set
+# CONFIG_MTD_AR7_PARTS is not set
 
 #
 # User Modules And Translation Layers
@@ -450,12 +458,15 @@
 # CONFIG_INFTL is not set
 # CONFIG_RFD_FTL is not set
 # CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
 
 #
 # RAM/ROM/Flash chip drivers
 #
-# CONFIG_MTD_CFI is not set
+CONFIG_MTD_CFI=y
 # CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_GEN_PROBE=y
+# CONFIG_MTD_CFI_ADV_OPTIONS is not set
 CONFIG_MTD_MAP_BANK_WIDTH_1=y
 CONFIG_MTD_MAP_BANK_WIDTH_2=y
 CONFIG_MTD_MAP_BANK_WIDTH_4=y
@@ -466,6 +477,10 @@
 CONFIG_MTD_CFI_I2=y
 # CONFIG_MTD_CFI_I4 is not set
 # CONFIG_MTD_CFI_I8 is not set
+CONFIG_MTD_CFI_INTELEXT=y
+# CONFIG_MTD_CFI_AMDSTD is not set
+# CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
 CONFIG_MTD_RAM=y
 # CONFIG_MTD_ROM is not set
 # CONFIG_MTD_ABSENT is not set
@@ -473,7 +488,8 @@
 #
 # Mapping drivers for chip access
 #
-# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+CONFIG_MTD_COMPLEX_MAPPINGS=y
+CONFIG_MTD_GPIO_ADDR=y
 CONFIG_MTD_UCLINUX=y
 # CONFIG_MTD_PLATRAM is not set
 
@@ -498,33 +514,23 @@
 # UBI - Unsorted block images
 #
 # CONFIG_MTD_UBI is not set
-
-#
-# Parallel port support
-#
 # CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-# CONFIG_PNPACPI is not set
-
-#
-# Block devices
-#
+CONFIG_BLK_DEV=y
 # CONFIG_BLK_DEV_COW_COMMON is not set
 # CONFIG_BLK_DEV_LOOP is not set
 # CONFIG_BLK_DEV_NBD is not set
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=16
 CONFIG_BLK_DEV_RAM_SIZE=4096
-CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
+# CONFIG_BLK_DEV_XIP is not set
 # CONFIG_CDROM_PKTCDVD is not set
 # CONFIG_ATA_OVER_ETH is not set
-
-#
-# Misc devices
-#
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_EEPROM_93CX6 is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_C2PORT is not set
+CONFIG_HAVE_IDE=y
 # CONFIG_IDE is not set
 
 #
@@ -532,22 +538,17 @@
 #
 # CONFIG_RAID_ATTRS is not set
 # CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
 # CONFIG_SCSI_NETLINK is not set
 # CONFIG_ATA is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
 # CONFIG_MD is not set
-
-#
-# Network device support
-#
 CONFIG_NETDEVICES=y
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
 # CONFIG_EQUALIZER is not set
 # CONFIG_TUN is not set
+# CONFIG_VETH is not set
 CONFIG_PHYLIB=y
 
 #
@@ -561,46 +562,44 @@
 # CONFIG_VITESSE_PHY is not set
 # CONFIG_SMSC_PHY is not set
 # CONFIG_BROADCOM_PHY is not set
+# CONFIG_ICPLUS_PHY is not set
+# CONFIG_REALTEK_PHY is not set
 # CONFIG_FIXED_PHY is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
+# CONFIG_MDIO_BITBANG is not set
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=y
-# CONFIG_SMC91X is not set
 CONFIG_BFIN_MAC=y
 CONFIG_BFIN_MAC_USE_L1=y
 CONFIG_BFIN_TX_DESC_NUM=10
 CONFIG_BFIN_RX_DESC_NUM=20
 # CONFIG_BFIN_MAC_RMII is not set
+# CONFIG_SMC91X is not set
 # CONFIG_SMSC911X is not set
 # CONFIG_DM9000 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+# CONFIG_B44 is not set
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
 #
 # CONFIG_WLAN_PRE80211 is not set
 # CONFIG_WLAN_80211 is not set
+# CONFIG_IWLWIFI_LEDS is not set
 # CONFIG_WAN is not set
 # CONFIG_PPP is not set
 # CONFIG_SLIP is not set
-# CONFIG_SHAPER is not set
 # CONFIG_NETCONSOLE is not set
 # CONFIG_NETPOLL is not set
 # CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
 # CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
 # CONFIG_PHONE is not set
 
 #
@@ -618,15 +617,17 @@
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PFLAGS is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-# CONFIG_BF5xx_PPI is not set
+# CONFIG_BFIN_SPI_ADC is not set
 CONFIG_BFIN_SPORT=y
 # CONFIG_BFIN_TIMER_LATENCY is not set
+# CONFIG_SIMPLE_GPIO is not set
 # CONFIG_VT is not set
 # CONFIG_DEVKMEM is not set
+# CONFIG_BFIN_JTAG_COMM is not set
 # CONFIG_SERIAL_NONSTANDARD is not set
 
 #
@@ -655,138 +656,119 @@
 # CAN, the car bus and industrial fieldbus
 #
 # CONFIG_CAN4LINUX is not set
-
-#
-# IPMI
-#
 # CONFIG_IPMI_HANDLER is not set
-# CONFIG_WATCHDOG is not set
 # CONFIG_HW_RANDOM is not set
-# CONFIG_GEN_RTC is not set
 # CONFIG_R3964 is not set
 # CONFIG_RAW_DRIVER is not set
-
-#
-# TPM devices
-#
 # CONFIG_TCG_TPM is not set
 # CONFIG_I2C is not set
-
-#
-# SPI support
-#
 # CONFIG_SPI is not set
-# CONFIG_SPI_MASTER is not set
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
 
 #
-# Dallas's 1-wire bus
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
 #
 # CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
 CONFIG_HWMON=y
 # CONFIG_HWMON_VID is not set
-# CONFIG_SENSORS_ABITUGURU is not set
 # CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_F71882FG is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_PC87360 is not set
 # CONFIG_SENSORS_PC87427 is not set
 # CONFIG_SENSORS_SMSC47M1 is not set
 # CONFIG_SENSORS_SMSC47B397 is not set
 # CONFIG_SENSORS_VT1211 is not set
 # CONFIG_SENSORS_W83627HF is not set
+# CONFIG_SENSORS_W83627EHF is not set
 # CONFIG_HWMON_DEBUG_CHIP is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
 
 #
 # Multifunction device drivers
 #
+# CONFIG_MFD_CORE is not set
 # CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_REGULATOR is not set
 
 #
 # Multimedia devices
 #
+
+#
+# Multimedia core support
+#
 # CONFIG_VIDEO_DEV is not set
 # CONFIG_DVB_CORE is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
 # CONFIG_DAB is not set
 
 #
 # Graphics support
 #
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
 # CONFIG_BACKLIGHT_LCD_SUPPORT is not set
 
 #
 # Display device support
 #
 # CONFIG_DISPLAY_SUPPORT is not set
-# CONFIG_VGASTATE is not set
-# CONFIG_FB is not set
-
-#
-# Sound
-#
 # CONFIG_SOUND is not set
-
-#
-# USB support
-#
+CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 # CONFIG_USB_ARCH_HAS_OHCI is not set
 # CONFIG_USB_ARCH_HAS_EHCI is not set
 # CONFIG_USB is not set
+# CONFIG_USB_OTG_WHITELIST is not set
+# CONFIG_USB_OTG_BLACKLIST_HUB is not set
 
 #
 # Enable Host or Gadget support to see Inventra options
 #
 
 #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
-#
-
-#
-# USB Gadget Support
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
 #
 # CONFIG_USB_GADGET is not set
 # CONFIG_MMC is not set
-
-#
-# LED devices
-#
+# CONFIG_MEMSTICK is not set
 # CONFIG_NEW_LEDS is not set
-
-#
-# LED drivers
-#
-
-#
-# LED Triggers
-#
-
-#
-# InfiniBand support
-#
-
-#
-# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
-#
-
-#
-# Real Time Clock
-#
+# CONFIG_ACCESSIBILITY is not set
 # CONFIG_RTC_CLASS is not set
-
-#
-# DMA Engine support
-#
-# CONFIG_DMA_ENGINE is not set
-
-#
-# DMA Clients
-#
-
-#
-# DMA Devices
-#
-
-#
-# PBX support
-#
-# CONFIG_PBX is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
 
 #
 # File systems
@@ -796,20 +778,18 @@
 # CONFIG_EXT2_FS_POSIX_ACL is not set
 # CONFIG_EXT2_FS_SECURITY is not set
 # CONFIG_EXT3_FS is not set
-# CONFIG_EXT4DEV_FS is not set
+# CONFIG_EXT4_FS is not set
 CONFIG_FS_MBCACHE=y
 # CONFIG_REISERFS_FS is not set
 # CONFIG_JFS_FS is not set
 # CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
 # CONFIG_XFS_FS is not set
-# CONFIG_GFS2_FS is not set
 # CONFIG_OCFS2_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
+# CONFIG_DNOTIFY is not set
 CONFIG_INOTIFY=y
 CONFIG_INOTIFY_USER=y
 # CONFIG_QUOTA is not set
-# CONFIG_DNOTIFY is not set
 # CONFIG_AUTOFS_FS is not set
 # CONFIG_AUTOFS4_FS is not set
 # CONFIG_FUSE_FS is not set
@@ -835,7 +815,6 @@
 CONFIG_SYSFS=y
 # CONFIG_TMPFS is not set
 # CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
 # CONFIG_CONFIGFS_FS is not set
 
 #
@@ -848,60 +827,53 @@
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-# CONFIG_YAFFS_FS is not set
 # CONFIG_JFFS2_FS is not set
+# CONFIG_YAFFS_FS is not set
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
-# CONFIG_NFS_FS is not set
-# CONFIG_NFSD is not set
-# CONFIG_SMB_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
 
 #
 # Partition Types
 #
 # CONFIG_PARTITION_ADVANCED is not set
 CONFIG_MSDOS_PARTITION=y
-
-#
-# Native Language Support
-#
 # CONFIG_NLS is not set
-
-#
-# Distributed Lock Manager
-#
 # CONFIG_DLM is not set
 
 #
-# Profiling support
-#
-# CONFIG_PROFILING is not set
-
-#
 # Kernel hacking
 #
 # CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
 CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
 # CONFIG_MAGIC_SYSRQ is not set
 # CONFIG_UNUSED_SYMBOLS is not set
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_SECTION_MISMATCH=y
 # CONFIG_DEBUG_KERNEL is not set
 # CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+
+#
+# Tracers
+#
+# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
+# CONFIG_DEBUG_DOUBLEFAULT is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
 CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
@@ -919,13 +891,95 @@
 #
 # CONFIG_KEYS is not set
 CONFIG_SECURITY=y
+# CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_NETWORK is not set
-CONFIG_SECURITY_CAPABILITIES=y
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=0
+CONFIG_CRYPTO=y
 
 #
-# Cryptographic options
+# Crypto core or helper
 #
-# CONFIG_CRYPTO is not set
+# CONFIG_CRYPTO_FIPS is not set
+# CONFIG_CRYPTO_MANAGER is not set
+# CONFIG_CRYPTO_MANAGER2 is not set
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+# CONFIG_CRYPTO_CBC is not set
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+# CONFIG_CRYPTO_MD5 is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_HW=y
 
 #
 # Library routines
@@ -933,11 +987,12 @@
 CONFIG_BITREVERSE=y
 CONFIG_CRC_CCITT=m
 # CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
 # CONFIG_CRC_ITU_T is not set
 CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/CM-BF537U_defconfig b/arch/blackfin/configs/CM-BF537U_defconfig
index 569523c..8021130 100644
--- a/arch/blackfin/configs/CM-BF537U_defconfig
+++ b/arch/blackfin/configs/CM-BF537U_defconfig
@@ -49,7 +49,7 @@
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 # CONFIG_UID16 is not set
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 # CONFIG_HOTPLUG is not set
@@ -355,7 +355,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -556,9 +556,9 @@
 # CONFIG_BFIN_MAC is not set
 # CONFIG_SMSC911X is not set
 # CONFIG_DM9000 is not set
-CONFIG_NETDEV_1000=y
+# CONFIG_NETDEV_1000 is not set
 # CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -652,6 +652,10 @@
 # CONFIG_TCG_TPM is not set
 # CONFIG_I2C is not set
 
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+
 #
 # SPI support
 #
diff --git a/arch/blackfin/configs/CM-BF548_defconfig b/arch/blackfin/configs/CM-BF548_defconfig
index 035b635..dd815f0 100644
--- a/arch/blackfin/configs/CM-BF548_defconfig
+++ b/arch/blackfin/configs/CM-BF548_defconfig
@@ -49,7 +49,7 @@
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 CONFIG_HOTPLUG=y
@@ -125,9 +125,9 @@
 CONFIG_BF548=y
 # CONFIG_BF549 is not set
 # CONFIG_BF561 is not set
-CONFIG_BF_REV_0_0=y
+# CONFIG_BF_REV_0_0 is not set
 # CONFIG_BF_REV_0_1 is not set
-# CONFIG_BF_REV_0_2 is not set
+CONFIG_BF_REV_0_2=y
 # CONFIG_BF_REV_0_3 is not set
 # CONFIG_BF_REV_0_4 is not set
 # CONFIG_BF_REV_0_5 is not set
@@ -422,7 +422,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -811,6 +811,10 @@
 # CONFIG_I2C_DEBUG_BUS is not set
 # CONFIG_I2C_DEBUG_CHIP is not set
 
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+
 #
 # SPI support
 #
diff --git a/arch/blackfin/configs/CM-BF561_defconfig b/arch/blackfin/configs/CM-BF561_defconfig
index 7015e42..16c198b 100644
--- a/arch/blackfin/configs/CM-BF561_defconfig
+++ b/arch/blackfin/configs/CM-BF561_defconfig
@@ -49,7 +49,7 @@
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 # CONFIG_UID16 is not set
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 # CONFIG_HOTPLUG is not set
@@ -389,7 +389,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -569,9 +569,9 @@
 # CONFIG_IBM_NEW_EMAC_TAH is not set
 # CONFIG_IBM_NEW_EMAC_EMAC4 is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
+# CONFIG_NETDEV_1000 is not set
 # CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -646,6 +646,10 @@
 # CONFIG_TCG_TPM is not set
 # CONFIG_I2C is not set
 
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+
 #
 # SPI support
 #
diff --git a/arch/blackfin/configs/H8606_defconfig b/arch/blackfin/configs/H8606_defconfig
index dfc8e1d..6b4c1a9 100644
--- a/arch/blackfin/configs/H8606_defconfig
+++ b/arch/blackfin/configs/H8606_defconfig
@@ -48,7 +48,7 @@
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 CONFIG_HOTPLUG=y
@@ -347,7 +347,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -594,8 +594,8 @@
 # CONFIG_SMC91X is not set
 # CONFIG_SMSC911X is not set
 CONFIG_DM9000=y
-CONFIG_NETDEV_1000=y
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 # CONFIG_AX88180 is not set
 
 #
diff --git a/arch/blackfin/configs/IP0X_defconfig b/arch/blackfin/configs/IP0X_defconfig
index 95a5f91..1ec9ae2 100644
--- a/arch/blackfin/configs/IP0X_defconfig
+++ b/arch/blackfin/configs/IP0X_defconfig
@@ -49,7 +49,7 @@
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 # CONFIG_HOTPLUG is not set
@@ -355,7 +355,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -672,9 +672,9 @@
 # CONFIG_SMC91X is not set
 # CONFIG_SMSC911X is not set
 CONFIG_DM9000=y
-CONFIG_NETDEV_1000=y
+# CONFIG_NETDEV_1000 is not set
 # CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
diff --git a/arch/blackfin/configs/PNAV-10_defconfig b/arch/blackfin/configs/PNAV-10_defconfig
index 78e2408..09701f9 100644
--- a/arch/blackfin/configs/PNAV-10_defconfig
+++ b/arch/blackfin/configs/PNAV-10_defconfig
@@ -1,6 +1,6 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -40,26 +40,26 @@
 # CONFIG_NAMESPACES is not set
 # CONFIG_BLK_DEV_INITRD is not set
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
 CONFIG_SIGNALFD=y
 CONFIG_TIMERFD=y
 CONFIG_EVENTFD=y
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -68,7 +68,6 @@
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -229,7 +228,10 @@
 # CONFIG_SCHED_HRTICK is not set
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 # CONFIG_NO_HZ is not set
 # CONFIG_HIGH_RES_TIMERS is not set
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
@@ -374,7 +376,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -598,9 +600,8 @@
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -640,11 +641,11 @@
 # CONFIG_INPUT_JOYSTICK is not set
 # CONFIG_INPUT_TABLET is not set
 CONFIG_INPUT_TOUCHSCREEN=y
-# CONFIG_TOUCHSCREEN_ADS7846 is not set
 CONFIG_TOUCHSCREEN_AD7877=y
 # CONFIG_TOUCHSCREEN_AD7879_I2C is not set
 # CONFIG_TOUCHSCREEN_AD7879_SPI is not set
 # CONFIG_TOUCHSCREEN_AD7879 is not set
+# CONFIG_TOUCHSCREEN_ADS7846 is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
@@ -676,14 +677,14 @@
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-# CONFIG_BF5xx_PPI is not set
+# CONFIG_BFIN_SPI_ADC is not set
 CONFIG_BFIN_SPORT=y
 # CONFIG_BFIN_TIMER_LATENCY is not set
-CONFIG_TWI_LCD=m
-CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_TWI_LCD is not set
 # CONFIG_SIMPLE_GPIO is not set
 # CONFIG_VT is not set
 CONFIG_DEVKMEM=y
@@ -796,6 +797,7 @@
 # CONFIG_POWER_SUPPLY is not set
 CONFIG_HWMON=y
 # CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_AD5252 is not set
 # CONFIG_SENSORS_AD7414 is not set
 # CONFIG_SENSORS_AD7418 is not set
 # CONFIG_SENSORS_ADCXX is not set
@@ -867,6 +869,7 @@
 # CONFIG_HTC_PASIC3 is not set
 # CONFIG_MFD_TMIO is not set
 # CONFIG_PMIC_DA903X is not set
+# CONFIG_PMIC_ADP5520 is not set
 # CONFIG_MFD_WM8400 is not set
 # CONFIG_MFD_WM8350_I2C is not set
 # CONFIG_REGULATOR is not set
@@ -1111,6 +1114,7 @@
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
+# CONFIG_JFFS2_FS is not set
 CONFIG_YAFFS_FS=y
 CONFIG_YAFFS_YAFFS1=y
 # CONFIG_YAFFS_9BYTE_TAGS is not set
@@ -1121,7 +1125,6 @@
 # CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
 # CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
 CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
-# CONFIG_JFFS2_FS is not set
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1213,7 +1216,6 @@
 # CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_DEBUG_MEMORY_INIT is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
 
 #
 # Tracers
@@ -1343,7 +1345,6 @@
 # CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/SRV1_defconfig b/arch/blackfin/configs/SRV1_defconfig
index 2bc0779..ec84a53 100644
--- a/arch/blackfin/configs/SRV1_defconfig
+++ b/arch/blackfin/configs/SRV1_defconfig
@@ -52,7 +52,7 @@
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -373,7 +373,7 @@
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
diff --git a/arch/blackfin/configs/TCM-BF537_defconfig b/arch/blackfin/configs/TCM-BF537_defconfig
index e65b3a4..6e27962 100644
--- a/arch/blackfin/configs/TCM-BF537_defconfig
+++ b/arch/blackfin/configs/TCM-BF537_defconfig
@@ -42,7 +42,7 @@
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 # CONFIG_UID16 is not set
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 # CONFIG_HOTPLUG is not set
@@ -537,7 +537,30 @@
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
diff --git a/arch/blackfin/include/asm/cacheflush.h b/arch/blackfin/include/asm/cacheflush.h
index 1b040f5..94697f0 100644
--- a/arch/blackfin/include/asm/cacheflush.h
+++ b/arch/blackfin/include/asm/cacheflush.h
@@ -30,7 +30,8 @@
 #ifndef _BLACKFIN_CACHEFLUSH_H
 #define _BLACKFIN_CACHEFLUSH_H
 
-extern void blackfin_icache_dcache_flush_range(unsigned long start_address, unsigned long end_address);
+#include <asm/blackfin.h>	/* for SSYNC() */
+
 extern void blackfin_icache_flush_range(unsigned long start_address, unsigned long end_address);
 extern void blackfin_dcache_flush_range(unsigned long start_address, unsigned long end_address);
 extern void blackfin_dcache_invalidate_range(unsigned long start_address, unsigned long end_address);
@@ -54,32 +55,28 @@
 
 static inline void flush_icache_range(unsigned start, unsigned end)
 {
-#if defined(CONFIG_BFIN_DCACHE) && defined(CONFIG_BFIN_ICACHE)
+#if defined(CONFIG_BFIN_WB)
+	blackfin_dcache_flush_range(start, end);
+#endif
 
-# if defined(CONFIG_BFIN_WT)
-	blackfin_icache_flush_range((start), (end));
+	/* Make sure all write buffers in the data side of the core
+	 * are flushed before trying to invalidate the icache.  This
+	 * needs to be after the data flush and before the icache
+	 * flush so that the SSYNC does the right thing in preventing
+	 * the instruction prefetcher from hitting things in cached
+	 * memory at the wrong time -- it runs much further ahead than
+	 * the pipeline.
+	 */
+	SSYNC();
+#if defined(CONFIG_BFIN_ICACHE)
+	blackfin_icache_flush_range(start, end);
 	flush_icache_range_others(start, end);
-# else
-	blackfin_icache_dcache_flush_range((start), (end));
-# endif
-
-#else
-
-# if defined(CONFIG_BFIN_ICACHE)
-	blackfin_icache_flush_range((start), (end));
-	flush_icache_range_others(start, end);
-# endif
-# if defined(CONFIG_BFIN_DCACHE)
-	blackfin_dcache_flush_range((start), (end));
-# endif
-
 #endif
 }
 
 #define copy_to_user_page(vma, page, vaddr, dst, src, len)		\
 do { memcpy(dst, src, len);						\
      flush_icache_range((unsigned) (dst), (unsigned) (dst) + (len));	\
-     flush_icache_range_others((unsigned long) (dst), (unsigned long) (dst) + (len));\
 } while (0)
 
 #define copy_from_user_page(vma, page, vaddr, dst, src, len)	memcpy(dst, src, len)
@@ -111,6 +108,11 @@
 		addr >= _ramend && addr < physical_mem_end)
 		return 1;
 
+#ifndef CONFIG_BFIN_L2_NOT_CACHED
+	if (addr >= L2_START && addr < L2_START + L2_LENGTH)
+		return 1;
+#endif
+
 	return 0;
 }
 
diff --git a/arch/blackfin/include/asm/cplb.h b/arch/blackfin/include/asm/cplb.h
index ad566ff..a75a6a9 100644
--- a/arch/blackfin/include/asm/cplb.h
+++ b/arch/blackfin/include/asm/cplb.h
@@ -53,29 +53,32 @@
 #define SDRAM_DGENERIC   (CPLB_L1_CHBL | CPLB_WT | CPLB_L1_AOW  | CPLB_COMMON)
 #endif
 
-#define L1_DMEMORY       (CPLB_LOCK | CPLB_COMMON)
-
-#ifdef CONFIG_SMP
-#define L2_ATTR           (INITIAL_T | I_CPLB | D_CPLB)
-#define L2_IMEMORY         (CPLB_COMMON | CPLB_LOCK)
-#define L2_DMEMORY         (CPLB_COMMON | CPLB_LOCK)
-
-#else
-#ifdef CONFIG_BFIN_L2_CACHEABLE
-#define L2_IMEMORY        (SDRAM_IGENERIC)
-#define L2_DMEMORY        (SDRAM_DGENERIC)
-#else
-#define L2_IMEMORY        (CPLB_COMMON)
-#define L2_DMEMORY        (CPLB_COMMON)
-#endif /* CONFIG_BFIN_L2_CACHEABLE */
-
-#define L2_ATTR           (INITIAL_T | SWITCH_T | I_CPLB | D_CPLB)
-#endif /* CONFIG_SMP */
-
 #define SDRAM_DNON_CHBL  (CPLB_COMMON)
 #define SDRAM_EBIU       (CPLB_COMMON)
 #define SDRAM_OOPS       (CPLB_VALID | ANOMALY_05000158_WORKAROUND | CPLB_LOCK | CPLB_DIRTY)
 
+#define L1_DMEMORY       (CPLB_LOCK | CPLB_COMMON)
+
+#ifdef CONFIG_SMP
+#define L2_ATTR          (INITIAL_T | I_CPLB | D_CPLB)
+#define L2_IMEMORY       (CPLB_COMMON)
+#define L2_DMEMORY       (CPLB_LOCK | CPLB_COMMON)
+
+#else
+#define L2_ATTR          (INITIAL_T | SWITCH_T | I_CPLB | D_CPLB)
+#define L2_IMEMORY       (SDRAM_IGENERIC)
+
+# if defined(CONFIG_BFIN_L2_WB)
+# define L2_DMEMORY      (CPLB_L1_CHBL | CPLB_COMMON)
+# elif defined(CONFIG_BFIN_L2_WT)
+# define L2_DMEMORY      (CPLB_L1_CHBL | CPLB_WT | CPLB_L1_AOW  | CPLB_COMMON)
+# elif defined(CONFIG_BFIN_L2_NOT_CACHED)
+# define L2_DMEMORY      (CPLB_COMMON)
+# else
+# define L2_DMEMORY      (0)
+# endif
+#endif /* CONFIG_SMP */
+
 #define SIZE_1K 0x00000400      /* 1K */
 #define SIZE_4K 0x00001000      /* 4K */
 #define SIZE_1M 0x00100000      /* 1M */
diff --git a/arch/blackfin/include/asm/dma.h b/arch/blackfin/include/asm/dma.h
index e4f7b80..c9a5962 100644
--- a/arch/blackfin/include/asm/dma.h
+++ b/arch/blackfin/include/asm/dma.h
@@ -206,10 +206,16 @@
 
 static inline void set_dma_sg(unsigned int channel, struct dmasg *sg, int ndsize)
 {
+	/* Make sure the internal data buffers in the core are drained
+	 * so that the DMA descriptors are completely written when the
+	 * DMA engine goes to fetch them below.
+	 */
+	SSYNC();
+
+	dma_ch[channel].regs->next_desc_ptr = sg;
 	dma_ch[channel].regs->cfg =
 		(dma_ch[channel].regs->cfg & ~(0xf << 8)) |
 		((ndsize & 0xf) << 8);
-	dma_ch[channel].regs->next_desc_ptr = sg;
 }
 
 static inline int dma_channel_active(unsigned int channel)
@@ -253,5 +259,7 @@
 void *dma_memcpy(void *dest, const void *src, size_t count);
 void *safe_dma_memcpy(void *dest, const void *src, size_t count);
 void blackfin_dma_early_init(void);
+void early_dma_memcpy(void *dest, const void *src, size_t count);
+void early_dma_memcpy_done(void);
 
 #endif
diff --git a/arch/blackfin/include/asm/elf.h b/arch/blackfin/include/asm/elf.h
index cdbfcfc..230e160 100644
--- a/arch/blackfin/include/asm/elf.h
+++ b/arch/blackfin/include/asm/elf.h
@@ -55,50 +55,50 @@
 #define ELF_FDPIC_CORE_EFLAGS	EF_BFIN_FDPIC
 #define ELF_EXEC_PAGESIZE	4096
 
-#define	R_unused0	0	/* relocation type 0 is not defined */
-#define R_pcrel5m2	1	/*LSETUP part a */
-#define R_unused1	2	/* relocation type 2 is not defined */
-#define R_pcrel10	3	/* type 3, if cc jump <target>  */
-#define R_pcrel12_jump	4	/* type 4, jump <target> */
-#define R_rimm16	5	/* type 0x5, rN = <target> */
-#define R_luimm16	6	/* # 0x6, preg.l=<target> Load imm 16 to lower half */
-#define R_huimm16  	7	/* # 0x7, preg.h=<target> Load imm 16 to upper half */
-#define R_pcrel12_jump_s 8	/* # 0x8 jump.s <target> */
-#define R_pcrel24_jump_x 9	/* # 0x9 jump.x <target> */
-#define R_pcrel24       10	/* # 0xa call <target> , not expandable */
-#define R_unusedb       11	/* # 0xb not generated */
-#define R_unusedc       12	/* # 0xc  not used */
-#define R_pcrel24_jump_l 13	/*0xd jump.l <target> */
-#define R_pcrel24_call_x 14	/* 0xE, call.x <target> if <target> is above 24 bit limit call through P1 */
-#define R_var_eq_symb    15	/* 0xf, linker should treat it same as 0x12 */
-#define R_byte_data      16	/* 0x10, .byte var = symbol */
-#define R_byte2_data     17	/* 0x11, .byte2 var = symbol */
-#define R_byte4_data     18	/* 0x12, .byte4 var = symbol and .var var=symbol */
-#define R_pcrel11        19	/* 0x13, lsetup part b */
-#define R_unused14      20	/* 0x14, undefined */
-#define R_unused15       21	/* not generated by VDSP 3.5 */
+#define R_BFIN_UNUSED0         0   /* relocation type 0 is not defined */
+#define R_BFIN_PCREL5M2        1   /* LSETUP part a */
+#define R_BFIN_UNUSED1         2   /* relocation type 2 is not defined */
+#define R_BFIN_PCREL10         3   /* type 3, if cc jump <target> */
+#define R_BFIN_PCREL12_JUMP    4   /* type 4, jump <target> */
+#define R_BFIN_RIMM16          5   /* type 0x5, rN = <target> */
+#define R_BFIN_LUIMM16         6   /* # 0x6, preg.l=<target> Load imm 16 to lower half */
+#define R_BFIN_HUIMM16         7   /* # 0x7, preg.h=<target> Load imm 16 to upper half */
+#define R_BFIN_PCREL12_JUMP_S  8   /* # 0x8 jump.s <target> */
+#define R_BFIN_PCREL24_JUMP_X  9   /* # 0x9 jump.x <target> */
+#define R_BFIN_PCREL24         10  /* # 0xa call <target> , not expandable */
+#define R_BFIN_UNUSEDB         11  /* # 0xb not generated */
+#define R_BFIN_UNUSEDC         12  /* # 0xc  not used */
+#define R_BFIN_PCREL24_JUMP_L  13  /* 0xd jump.l <target> */
+#define R_BFIN_PCREL24_CALL_X  14  /* 0xE, call.x <target> if <target> is above 24 bit limit call through P1 */
+#define R_BFIN_VAR_EQ_SYMB     15  /* 0xf, linker should treat it same as 0x12 */
+#define R_BFIN_BYTE_DATA       16  /* 0x10, .byte var = symbol */
+#define R_BFIN_BYTE2_DATA      17  /* 0x11, .byte2 var = symbol */
+#define R_BFIN_BYTE4_DATA      18  /* 0x12, .byte4 var = symbol and .var var=symbol */
+#define R_BFIN_PCREL11         19  /* 0x13, lsetup part b */
+#define R_BFIN_UNUSED14        20  /* 0x14, undefined */
+#define R_BFIN_UNUSED15        21  /* not generated by VDSP 3.5 */
 
 /* arithmetic relocations */
-#define R_push		 0xE0
-#define R_const		 0xE1
-#define R_add		 0xE2
-#define R_sub		 0xE3
-#define R_mult		 0xE4
-#define R_div		 0xE5
-#define R_mod		 0xE6
-#define R_lshift	 0xE7
-#define R_rshift	 0xE8
-#define R_and		 0xE9
-#define R_or		 0xEA
-#define R_xor		 0xEB
-#define R_land		 0xEC
-#define R_lor		 0xED
-#define R_len		 0xEE
-#define R_neg		 0xEF
-#define R_comp		 0xF0
-#define R_page		 0xF1
-#define R_hwpage	 0xF2
-#define R_addr		 0xF3
+#define R_BFIN_PUSH            0xE0
+#define R_BFIN_CONST           0xE1
+#define R_BFIN_ADD             0xE2
+#define R_BFIN_SUB             0xE3
+#define R_BFIN_MULT            0xE4
+#define R_BFIN_DIV             0xE5
+#define R_BFIN_MOD             0xE6
+#define R_BFIN_LSHIFT          0xE7
+#define R_BFIN_RSHIFT          0xE8
+#define R_BFIN_AND             0xE9
+#define R_BFIN_OR              0xEA
+#define R_BFIN_XOR             0xEB
+#define R_BFIN_LAND            0xEC
+#define R_BFIN_LOR             0xED
+#define R_BFIN_LEN             0xEE
+#define R_BFIN_NEG             0xEF
+#define R_BFIN_COMP            0xF0
+#define R_BFIN_PAGE            0xF1
+#define R_BFIN_HWPAGE          0xF2
+#define R_BFIN_ADDR            0xF3
 
 /* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
    use of this is to invoke "./ld.so someprog" to test out a new version of
diff --git a/arch/blackfin/include/asm/entry.h b/arch/blackfin/include/asm/entry.h
index b30a296..ec58efc 100644
--- a/arch/blackfin/include/asm/entry.h
+++ b/arch/blackfin/include/asm/entry.h
@@ -35,21 +35,39 @@
 #else
 # define LOAD_IPIPE_IPEND
 #endif
+
+#ifndef CONFIG_EXACT_HWERR
+/* As a debugging aid - we save IPEND when DEBUG_KERNEL is on,
+ * otherwise it is a waste of cycles.
+ */
+# ifndef CONFIG_DEBUG_KERNEL
 #define INTERRUPT_ENTRY(N)						\
     [--sp] = SYSCFG;							\
-									\
     [--sp] = P0;	/*orig_p0*/					\
     [--sp] = R0;	/*orig_r0*/					\
     [--sp] = (R7:0,P5:0);						\
     R0 = (N);								\
     LOAD_IPIPE_IPEND							\
     jump __common_int_entry;
+# else /* CONFIG_DEBUG_KERNEL */
+#define INTERRUPT_ENTRY(N)						\
+    [--sp] = SYSCFG;							\
+    [--sp] = P0;	/*orig_p0*/					\
+    [--sp] = R0;	/*orig_r0*/					\
+    [--sp] = (R7:0,P5:0);						\
+    p0.l = lo(IPEND);							\
+    p0.h = hi(IPEND);							\
+    r1 = [p0];								\
+    R0 = (N);								\
+    LOAD_IPIPE_IPEND							\
+    jump __common_int_entry;
+# endif /* CONFIG_DEBUG_KERNEL */
 
 /* For timer interrupts, we need to save IPEND, since the user_mode
-	   macro accesses it to determine where to account time.  */
+ *macro accesses it to determine where to account time.
+ */
 #define TIMER_INTERRUPT_ENTRY(N)					\
     [--sp] = SYSCFG;							\
-									\
     [--sp] = P0;	/*orig_p0*/					\
     [--sp] = R0;	/*orig_r0*/					\
     [--sp] = (R7:0,P5:0);						\
@@ -58,6 +76,74 @@
     r1 = [p0];								\
     R0 = (N);								\
     jump __common_int_entry;
+#else /* CONFIG_EXACT_HWERR is defined */
+
+/* if we want hardware error to be exact, we need to do a SSYNC (which forces
+ * read/writes to complete to the memory controllers), and check to see that
+ * caused a pending HW error condition. If so, we assume it was caused by user
+ * space, by setting the same interrupt that we are in (so it goes off again)
+ * and context restore, and a RTI (without servicing anything). This should
+ * cause the pending HWERR to fire, and when that is done, this interrupt will
+ * be re-serviced properly.
+ * As you can see by the code - we actually need to do two SSYNCS - one to
+ * make sure the read/writes complete, and another to make sure the hardware
+ * error is recognized by the core.
+ */
+#define INTERRUPT_ENTRY(N)						\
+    SSYNC;								\
+    SSYNC;								\
+    [--sp] = SYSCFG;							\
+    [--sp] = P0;	/*orig_p0*/					\
+    [--sp] = R0;	/*orig_r0*/					\
+    [--sp] = (R7:0,P5:0);						\
+    R1 = ASTAT;								\
+    P0.L = LO(ILAT);							\
+    P0.H = HI(ILAT);							\
+    R0 = [P0];								\
+    CC = BITTST(R0, EVT_IVHW_P);					\
+    IF CC JUMP 1f;							\
+    ASTAT = R1;								\
+    p0.l = lo(IPEND);							\
+    p0.h = hi(IPEND);							\
+    r1 = [p0];								\
+    R0 = (N);								\
+    LOAD_IPIPE_IPEND							\
+    jump __common_int_entry;						\
+1:  ASTAT = R1;								\
+    RAISE N;								\
+    (R7:0, P5:0) = [SP++];						\
+    SP += 0x8;								\
+    SYSCFG = [SP++];							\
+    CSYNC;								\
+    RTI;
+
+#define TIMER_INTERRUPT_ENTRY(N)					\
+    SSYNC;								\
+    SSYNC;								\
+    [--sp] = SYSCFG;							\
+    [--sp] = P0;	/*orig_p0*/					\
+    [--sp] = R0;	/*orig_r0*/					\
+    [--sp] = (R7:0,P5:0);						\
+    R1 = ASTAT;								\
+    P0.L = LO(ILAT);							\
+    P0.H = HI(ILAT);							\
+    R0 = [P0];								\
+    CC = BITTST(R0, EVT_IVHW_P);					\
+    IF CC JUMP 1f;							\
+    ASTAT = R1;								\
+    p0.l = lo(IPEND);							\
+    p0.h = hi(IPEND);							\
+    r1 = [p0];								\
+    R0 = (N);								\
+    jump __common_int_entry;						\
+1:  ASTAT = R1;								\
+    RAISE N;								\
+    (R7:0, P5:0) = [SP++];						\
+    SP += 0x8;								\
+    SYSCFG = [SP++];							\
+    CSYNC;								\
+    RTI;
+#endif	/* CONFIG_EXACT_HWERR */
 
 /* This one pushes RETI without using CLI.  Interrupts are enabled.  */
 #define SAVE_CONTEXT_SYSCALL	save_context_syscall
diff --git a/arch/blackfin/include/asm/gptimers.h b/arch/blackfin/include/asm/gptimers.h
index b0f847a..89f08de 100644
--- a/arch/blackfin/include/asm/gptimers.h
+++ b/arch/blackfin/include/asm/gptimers.h
@@ -30,6 +30,7 @@
 # else
 #  define MAX_BLACKFIN_GPTIMERS 11
 #  define TIMER8_GROUP_REG      TIMER_ENABLE1
+#  define TIMER_GROUP2          1
 # endif
 # define TIMER0_GROUP_REG       TIMER_ENABLE0
 #endif
@@ -40,10 +41,12 @@
 # define MAX_BLACKFIN_GPTIMERS 12
 # define TIMER0_GROUP_REG      TMRS8_ENABLE
 # define TIMER8_GROUP_REG      TMRS4_ENABLE
+# define TIMER_GROUP2          1
 #endif
 /*
  * All others: 3 timers:
  */
+#define TIMER_GROUP1           0
 #if !defined(MAX_BLACKFIN_GPTIMERS)
 # define MAX_BLACKFIN_GPTIMERS 3
 # define TIMER0_GROUP_REG      TIMER_ENABLE
@@ -109,8 +112,8 @@
 #define TIMER_ERR_PROG_PER  0x8000
 #define TIMER_ERR_PROG_PW   0xC000
 #define TIMER_EMU_RUN       0x0200
-#define	TIMER_TOGGLE_HI     0x0100
-#define	TIMER_CLK_SEL       0x0080
+#define TIMER_TOGGLE_HI     0x0100
+#define TIMER_CLK_SEL       0x0080
 #define TIMER_OUT_DIS       0x0040
 #define TIMER_TIN_SEL       0x0020
 #define TIMER_IRQ_ENA       0x0010
@@ -169,23 +172,25 @@
 
 /* The actual gptimer API */
 
-void     set_gptimer_pwidth    (int timer_id, uint32_t width);
-uint32_t get_gptimer_pwidth    (int timer_id);
-void     set_gptimer_period    (int timer_id, uint32_t period);
-uint32_t get_gptimer_period    (int timer_id);
-uint32_t get_gptimer_count     (int timer_id);
-uint16_t get_gptimer_intr      (int timer_id);
-void     clear_gptimer_intr    (int timer_id);
-uint16_t get_gptimer_over      (int timer_id);
-void     clear_gptimer_over    (int timer_id);
-void     set_gptimer_config    (int timer_id, uint16_t config);
-uint16_t get_gptimer_config    (int timer_id);
-void     set_gptimer_pulse_hi  (int timer_id);
+void     set_gptimer_pwidth(int timer_id, uint32_t width);
+uint32_t get_gptimer_pwidth(int timer_id);
+void     set_gptimer_period(int timer_id, uint32_t period);
+uint32_t get_gptimer_period(int timer_id);
+uint32_t get_gptimer_count(int timer_id);
+int      get_gptimer_intr(int timer_id);
+void     clear_gptimer_intr(int timer_id);
+int      get_gptimer_over(int timer_id);
+void     clear_gptimer_over(int timer_id);
+void     set_gptimer_config(int timer_id, uint16_t config);
+uint16_t get_gptimer_config(int timer_id);
+int      get_gptimer_run(int timer_id);
+void     set_gptimer_pulse_hi(int timer_id);
 void     clear_gptimer_pulse_hi(int timer_id);
-void     enable_gptimers       (uint16_t mask);
-void     disable_gptimers      (uint16_t mask);
-uint16_t get_enabled_gptimers  (void);
-uint32_t get_gptimer_status    (int group);
-void     set_gptimer_status    (int group, uint32_t value);
+void     enable_gptimers(uint16_t mask);
+void     disable_gptimers(uint16_t mask);
+void     disable_gptimers_sync(uint16_t mask);
+uint16_t get_enabled_gptimers(void);
+uint32_t get_gptimer_status(int group);
+void     set_gptimer_status(int group, uint32_t value);
 
 #endif
diff --git a/arch/blackfin/include/asm/io.h b/arch/blackfin/include/asm/io.h
index 63b2d8c..3022b5c 100644
--- a/arch/blackfin/include/asm/io.h
+++ b/arch/blackfin/include/asm/io.h
@@ -80,19 +80,22 @@
 #define memcpy_fromio(a,b,c)	memcpy((a),(void *)(b),(c))
 #define memcpy_toio(a,b,c)	memcpy((void *)(a),(b),(c))
 
-#define inb(addr)    readb(addr)
-#define inw(addr)    readw(addr)
-#define inl(addr)    readl(addr)
-#define outb(x,addr) ((void) writeb(x,addr))
-#define outw(x,addr) ((void) writew(x,addr))
-#define outl(x,addr) ((void) writel(x,addr))
+/* Convert "I/O port addresses" to actual addresses.  i.e. ugly casts. */
+#define __io(port) ((void *)(unsigned long)(port))
 
-#define inb_p(addr)    inb(addr)
-#define inw_p(addr)    inw(addr)
-#define inl_p(addr)    inl(addr)
-#define outb_p(x,addr) outb(x,addr)
-#define outw_p(x,addr) outw(x,addr)
-#define outl_p(x,addr) outl(x,addr)
+#define inb(port)    readb(__io(port))
+#define inw(port)    readw(__io(port))
+#define inl(port)    readl(__io(port))
+#define outb(x,port) writeb(x,__io(port))
+#define outw(x,port) writew(x,__io(port))
+#define outl(x,port) writel(x,__io(port))
+
+#define inb_p(port)    inb(__io(port))
+#define inw_p(port)    inw(__io(port))
+#define inl_p(port)    inl(__io(port))
+#define outb_p(x,port) outb(x,__io(port))
+#define outw_p(x,port) outw(x,__io(port))
+#define outl_p(x,port) outl(x,__io(port))
 
 #define ioread8_rep(a,d,c)	readsb(a,d,c)
 #define ioread16_rep(a,d,c)	readsw(a,d,c)
diff --git a/arch/blackfin/include/asm/ipipe.h b/arch/blackfin/include/asm/ipipe.h
index 343b563..51d0bf5 100644
--- a/arch/blackfin/include/asm/ipipe.h
+++ b/arch/blackfin/include/asm/ipipe.h
@@ -35,10 +35,10 @@
 #include <asm/atomic.h>
 #include <asm/traps.h>
 
-#define IPIPE_ARCH_STRING     "1.9-00"
+#define IPIPE_ARCH_STRING     "1.9-01"
 #define IPIPE_MAJOR_NUMBER    1
 #define IPIPE_MINOR_NUMBER    9
-#define IPIPE_PATCH_NUMBER    0
+#define IPIPE_PATCH_NUMBER    1
 
 #ifdef CONFIG_SMP
 #error "I-pipe/blackfin: SMP not implemented"
diff --git a/arch/blackfin/include/asm/pda.h b/arch/blackfin/include/asm/pda.h
index a671427..b42555c 100644
--- a/arch/blackfin/include/asm/pda.h
+++ b/arch/blackfin/include/asm/pda.h
@@ -64,8 +64,6 @@
 
 extern struct blackfin_pda cpu_pda[];
 
-void reserve_pda(void);
-
 #endif	/* __ASSEMBLY__ */
 
 #endif /* _ASM_BLACKFIN_PDA_H */
diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h
index 0eece23..3040415 100644
--- a/arch/blackfin/include/asm/processor.h
+++ b/arch/blackfin/include/asm/processor.h
@@ -131,8 +131,8 @@
 /* Get the Silicon Revision of the chip */
 static inline uint32_t __pure bfin_revid(void)
 {
-	/* stored in the upper 4 bits */
-	uint32_t revid = bfin_read_CHIPID() >> 28;
+	/* Always use CHIPID, to work around ANOMALY_05000234 */
+	uint32_t revid = (bfin_read_CHIPID() & CHIPID_VERSION) >> 28;
 
 #ifdef CONFIG_BF52x
 	/* ANOMALY_05000357
diff --git a/arch/blackfin/include/asm/time.h b/arch/blackfin/include/asm/time.h
index ddc43ce..589e937 100644
--- a/arch/blackfin/include/asm/time.h
+++ b/arch/blackfin/include/asm/time.h
@@ -37,4 +37,5 @@
 extern unsigned int __bfin_cycles_mod;
 #endif
 
+extern void __init setup_core_timer(void);
 #endif
diff --git a/arch/blackfin/include/asm/uaccess.h b/arch/blackfin/include/asm/uaccess.h
index 3248033..8894e9f 100644
--- a/arch/blackfin/include/asm/uaccess.h
+++ b/arch/blackfin/include/asm/uaccess.h
@@ -59,12 +59,8 @@
 #ifndef CONFIG_ACCESS_CHECK
 static inline int _access_ok(unsigned long addr, unsigned long size) { return 1; }
 #else
-#ifdef CONFIG_ACCESS_OK_L1
-extern int _access_ok(unsigned long addr, unsigned long size)__attribute__((l1_text));
-#else
 extern int _access_ok(unsigned long addr, unsigned long size);
 #endif
-#endif
 
 /*
  * The exception table consists of pairs of addresses: the first is the
@@ -83,9 +79,6 @@
 	unsigned long insn, fixup;
 };
 
-/* Returns 0 if exception not found and fixup otherwise.  */
-extern unsigned long search_exception_table(unsigned long);
-
 /*
  * These are the main single-value transfer routines.  They automatically
  * use the right size if we just have the right pointer type.
@@ -233,16 +226,29 @@
 }
 
 /*
- * Return the size of a string (including the ending 0)
+ * Get the size of a string in user space.
+ *   src: The string to measure
+ *     n: The maximum valid length
  *
- * Return 0 on exception, a value greater than N if too long
+ * Get the size of a NUL-terminated string in user space.
+ *
+ * Returns the size of the string INCLUDING the terminating NUL.
+ * On exception, returns 0.
+ * If the string is too long, returns a value greater than n.
  */
-static inline long strnlen_user(const char *src, long n)
+static inline long __must_check strnlen_user(const char *src, long n)
 {
-	return (strlen(src) + 1);
+	if (!access_ok(VERIFY_READ, src, 1))
+		return 0;
+	return strnlen(src, n) + 1;
 }
 
-#define strlen_user(str) strnlen_user(str, 32767)
+static inline long __must_check strlen_user(const char *src)
+{
+	if (!access_ok(VERIFY_READ, src, 1))
+		return 0;
+	return strlen(src) + 1;
+}
 
 /*
  * Zero Userspace
@@ -251,6 +257,8 @@
 static inline unsigned long __must_check
 __clear_user(void *to, unsigned long n)
 {
+	if (!access_ok(VERIFY_WRITE, to, n))
+		return n;
 	memset(to, 0, n);
 	return 0;
 }
diff --git a/arch/blackfin/kernel/bfin_dma_5xx.c b/arch/blackfin/kernel/bfin_dma_5xx.c
index 8531693..763ed84 100644
--- a/arch/blackfin/kernel/bfin_dma_5xx.c
+++ b/arch/blackfin/kernel/bfin_dma_5xx.c
@@ -20,6 +20,11 @@
 #include <asm/dma.h>
 #include <asm/uaccess.h>
 
+/*
+ * To make sure we work around 05000119 - we always check DMA_DONE bit,
+ * never the DMA_RUN bit
+ */
+
 struct dma_channel dma_ch[MAX_DMA_CHANNELS];
 EXPORT_SYMBOL(dma_ch);
 
@@ -232,6 +237,87 @@
 void __init blackfin_dma_early_init(void)
 {
 	bfin_write_MDMA_S0_CONFIG(0);
+	bfin_write_MDMA_S1_CONFIG(0);
+}
+
+void __init early_dma_memcpy(void *pdst, const void *psrc, size_t size)
+{
+	unsigned long dst = (unsigned long)pdst;
+	unsigned long src = (unsigned long)psrc;
+	struct dma_register *dst_ch, *src_ch;
+
+	/* We assume that everything is 4 byte aligned, so include
+	 * a basic sanity check
+	 */
+	BUG_ON(dst % 4);
+	BUG_ON(src % 4);
+	BUG_ON(size % 4);
+
+	/* Force a sync in case a previous config reset on this channel
+	 * occurred.  This is needed so subsequent writes to DMA registers
+	 * are not spuriously lost/corrupted.
+	 */
+	__builtin_bfin_ssync();
+
+	src_ch = 0;
+	/* Find an avalible memDMA channel */
+	while (1) {
+		if (!src_ch || src_ch == (struct dma_register *)MDMA_S1_NEXT_DESC_PTR) {
+			dst_ch = (struct dma_register *)MDMA_D0_NEXT_DESC_PTR;
+			src_ch = (struct dma_register *)MDMA_S0_NEXT_DESC_PTR;
+		} else {
+			dst_ch = (struct dma_register *)MDMA_D1_NEXT_DESC_PTR;
+			src_ch = (struct dma_register *)MDMA_S1_NEXT_DESC_PTR;
+		}
+
+		if (!bfin_read16(&src_ch->cfg)) {
+			break;
+		} else {
+			if (bfin_read16(&src_ch->irq_status) & DMA_DONE)
+				bfin_write16(&src_ch->cfg, 0);
+		}
+
+	}
+
+	/* Destination */
+	bfin_write32(&dst_ch->start_addr, dst);
+	bfin_write16(&dst_ch->x_count, size >> 2);
+	bfin_write16(&dst_ch->x_modify, 1 << 2);
+	bfin_write16(&dst_ch->irq_status, DMA_DONE | DMA_ERR);
+
+	/* Source */
+	bfin_write32(&src_ch->start_addr, src);
+	bfin_write16(&src_ch->x_count, size >> 2);
+	bfin_write16(&src_ch->x_modify, 1 << 2);
+	bfin_write16(&src_ch->irq_status, DMA_DONE | DMA_ERR);
+
+	/* Enable */
+	bfin_write16(&src_ch->cfg, DMAEN | WDSIZE_32);
+	bfin_write16(&dst_ch->cfg, WNR | DI_EN | DMAEN | WDSIZE_32);
+
+	/* Since we are atomic now, don't use the workaround ssync */
+	__builtin_bfin_ssync();
+}
+
+void __init early_dma_memcpy_done(void)
+{
+	while ((bfin_read_MDMA_S0_CONFIG() && !(bfin_read_MDMA_D0_IRQ_STATUS() & DMA_DONE)) ||
+	       (bfin_read_MDMA_S1_CONFIG() && !(bfin_read_MDMA_D1_IRQ_STATUS() & DMA_DONE)))
+		continue;
+
+	bfin_write_MDMA_D0_IRQ_STATUS(DMA_DONE | DMA_ERR);
+	bfin_write_MDMA_D1_IRQ_STATUS(DMA_DONE | DMA_ERR);
+	/*
+	 * Now that DMA is done, we would normally flush cache, but
+	 * i/d cache isn't running this early, so we don't bother,
+	 * and just clear out the DMA channel for next time
+	 */
+	bfin_write_MDMA_S0_CONFIG(0);
+	bfin_write_MDMA_S1_CONFIG(0);
+	bfin_write_MDMA_D0_CONFIG(0);
+	bfin_write_MDMA_D1_CONFIG(0);
+
+	__builtin_bfin_ssync();
 }
 
 /**
diff --git a/arch/blackfin/kernel/bfin_gpio.c b/arch/blackfin/kernel/bfin_gpio.c
index a0678da..beffa00 100644
--- a/arch/blackfin/kernel/bfin_gpio.c
+++ b/arch/blackfin/kernel/bfin_gpio.c
@@ -313,15 +313,6 @@
 # define portmux_setup(...)  do { } while (0)
 #endif
 
-static int __init bfin_gpio_init(void)
-{
-	printk(KERN_INFO "Blackfin GPIO Controller\n");
-
-	return 0;
-}
-arch_initcall(bfin_gpio_init);
-
-
 #ifndef CONFIG_BF54x
 /***********************************************************
 *
@@ -1021,15 +1012,6 @@
 
 	local_irq_save_hw(flags);
 
-	if (unlikely(reserved_gpio_irq_map[gpio_bank(gpio)] & gpio_bit(gpio))) {
-		if (system_state == SYSTEM_BOOTING)
-			dump_stack();
-		printk(KERN_ERR
-		       "bfin-gpio: GPIO %d is already reserved as gpio-irq !\n",
-		       gpio);
-		local_irq_restore_hw(flags);
-		return -EBUSY;
-	}
 	if (unlikely(reserved_peri_map[gpio_bank(gpio)] & gpio_bit(gpio))) {
 		if (system_state == SYSTEM_BOOTING)
 			dump_stack();
diff --git a/arch/blackfin/kernel/bfin_ksyms.c b/arch/blackfin/kernel/bfin_ksyms.c
index 01f917d..53e893f 100644
--- a/arch/blackfin/kernel/bfin_ksyms.c
+++ b/arch/blackfin/kernel/bfin_ksyms.c
@@ -16,7 +16,6 @@
 
 /* All the Blackfin cache functions: mach-common/cache.S */
 EXPORT_SYMBOL(blackfin_dcache_invalidate_range);
-EXPORT_SYMBOL(blackfin_icache_dcache_flush_range);
 EXPORT_SYMBOL(blackfin_icache_flush_range);
 EXPORT_SYMBOL(blackfin_dcache_flush_range);
 EXPORT_SYMBOL(blackfin_dflush_page);
diff --git a/arch/blackfin/kernel/cplb-mpu/cacheinit.c b/arch/blackfin/kernel/cplb-mpu/cacheinit.c
index c6ff947..d5a86c3 100644
--- a/arch/blackfin/kernel/cplb-mpu/cacheinit.c
+++ b/arch/blackfin/kernel/cplb-mpu/cacheinit.c
@@ -55,7 +55,14 @@
 	}
 
 	ctrl = bfin_read_DMEM_CONTROL();
-	ctrl |= DMEM_CNTR;
+
+	/*
+	 *  Anomaly notes:
+	 *  05000287 - We implement workaround #2 - Change the DMEM_CONTROL
+	 *  register, so that the port preferences for DAG0 and DAG1 are set
+	 *  to port B
+	 */
+	ctrl |= DMEM_CNTR | PORT_PREF0 | (ANOMALY_05000287 ? PORT_PREF1 : 0);
 	bfin_write_DMEM_CONTROL(ctrl);
 	SSYNC();
 }
diff --git a/arch/blackfin/kernel/cplb-mpu/cplbinit.c b/arch/blackfin/kernel/cplb-mpu/cplbinit.c
index 3e329a6..c006a44 100644
--- a/arch/blackfin/kernel/cplb-mpu/cplbinit.c
+++ b/arch/blackfin/kernel/cplb-mpu/cplbinit.c
@@ -64,7 +64,7 @@
 	dcplb_tbl[cpu][i_d++].data = SDRAM_OOPS | PAGE_SIZE_1KB;
 
 	icplb_tbl[cpu][i_i].addr = 0;
-	icplb_tbl[cpu][i_i++].data = i_cache | CPLB_USER_RD | PAGE_SIZE_1KB;
+	icplb_tbl[cpu][i_i++].data = CPLB_VALID | i_cache | CPLB_USER_RD | PAGE_SIZE_1KB;
 
 	/* Cover kernel memory with 4M pages.  */
 	addr = 0;
diff --git a/arch/blackfin/kernel/cplb-nompu/cacheinit.c b/arch/blackfin/kernel/cplb-nompu/cacheinit.c
index c6ff947..d5a86c3 100644
--- a/arch/blackfin/kernel/cplb-nompu/cacheinit.c
+++ b/arch/blackfin/kernel/cplb-nompu/cacheinit.c
@@ -55,7 +55,14 @@
 	}
 
 	ctrl = bfin_read_DMEM_CONTROL();
-	ctrl |= DMEM_CNTR;
+
+	/*
+	 *  Anomaly notes:
+	 *  05000287 - We implement workaround #2 - Change the DMEM_CONTROL
+	 *  register, so that the port preferences for DAG0 and DAG1 are set
+	 *  to port B
+	 */
+	ctrl |= DMEM_CNTR | PORT_PREF0 | (ANOMALY_05000287 ? PORT_PREF1 : 0);
 	bfin_write_DMEM_CONTROL(ctrl);
 	SSYNC();
 }
diff --git a/arch/blackfin/kernel/early_printk.c b/arch/blackfin/kernel/early_printk.c
index c8ad051..3302719 100644
--- a/arch/blackfin/kernel/early_printk.c
+++ b/arch/blackfin/kernel/early_printk.c
@@ -178,25 +178,15 @@
 
 asmlinkage void __init init_early_exception_vectors(void)
 {
+	u32 evt;
 	SSYNC();
 
 	/* cannot program in software:
 	 * evt0 - emulation (jtag)
 	 * evt1 - reset
 	 */
-	bfin_write_EVT2(early_trap);
-	bfin_write_EVT3(early_trap);
-	bfin_write_EVT5(early_trap);
-	bfin_write_EVT6(early_trap);
-	bfin_write_EVT7(early_trap);
-	bfin_write_EVT8(early_trap);
-	bfin_write_EVT9(early_trap);
-	bfin_write_EVT10(early_trap);
-	bfin_write_EVT11(early_trap);
-	bfin_write_EVT12(early_trap);
-	bfin_write_EVT13(early_trap);
-	bfin_write_EVT14(early_trap);
-	bfin_write_EVT15(early_trap);
+	for (evt = EVT2; evt <= EVT15; evt += 4)
+		bfin_write32(evt, early_trap);
 	CSYNC();
 
 	/* Set all the return from interrupt, exception, NMI to a known place
diff --git a/arch/blackfin/kernel/gptimers.c b/arch/blackfin/kernel/gptimers.c
index 3a3e961..7281a91 100644
--- a/arch/blackfin/kernel/gptimers.c
+++ b/arch/blackfin/kernel/gptimers.c
@@ -189,10 +189,10 @@
 }
 EXPORT_SYMBOL(set_gptimer_status);
 
-uint16_t get_gptimer_intr(int timer_id)
+int get_gptimer_intr(int timer_id)
 {
 	tassert(timer_id < MAX_BLACKFIN_GPTIMERS);
-	return (group_regs[BFIN_TIMER_OCTET(timer_id)]->status & timil_mask[timer_id]) ? 1 : 0;
+	return !!(group_regs[BFIN_TIMER_OCTET(timer_id)]->status & timil_mask[timer_id]);
 }
 EXPORT_SYMBOL(get_gptimer_intr);
 
@@ -203,10 +203,10 @@
 }
 EXPORT_SYMBOL(clear_gptimer_intr);
 
-uint16_t get_gptimer_over(int timer_id)
+int get_gptimer_over(int timer_id)
 {
 	tassert(timer_id < MAX_BLACKFIN_GPTIMERS);
-	return (group_regs[BFIN_TIMER_OCTET(timer_id)]->status & tovf_mask[timer_id]) ? 1 : 0;
+	return !!(group_regs[BFIN_TIMER_OCTET(timer_id)]->status & tovf_mask[timer_id]);
 }
 EXPORT_SYMBOL(get_gptimer_over);
 
@@ -217,6 +217,13 @@
 }
 EXPORT_SYMBOL(clear_gptimer_over);
 
+int get_gptimer_run(int timer_id)
+{
+	tassert(timer_id < MAX_BLACKFIN_GPTIMERS);
+	return !!(group_regs[BFIN_TIMER_OCTET(timer_id)]->status & trun_mask[timer_id]);
+}
+EXPORT_SYMBOL(get_gptimer_run);
+
 void set_gptimer_config(int timer_id, uint16_t config)
 {
 	tassert(timer_id < MAX_BLACKFIN_GPTIMERS);
@@ -244,7 +251,7 @@
 }
 EXPORT_SYMBOL(enable_gptimers);
 
-void disable_gptimers(uint16_t mask)
+static void _disable_gptimers(uint16_t mask)
 {
 	int i;
 	uint16_t m = mask;
@@ -253,6 +260,12 @@
 		group_regs[i]->disable = m & 0xFF;
 		m >>= 8;
 	}
+}
+
+void disable_gptimers(uint16_t mask)
+{
+	int i;
+	_disable_gptimers(mask);
 	for (i = 0; i < MAX_BLACKFIN_GPTIMERS; ++i)
 		if (mask & (1 << i))
 			group_regs[BFIN_TIMER_OCTET(i)]->status |= trun_mask[i];
@@ -260,6 +273,13 @@
 }
 EXPORT_SYMBOL(disable_gptimers);
 
+void disable_gptimers_sync(uint16_t mask)
+{
+	_disable_gptimers(mask);
+	SSYNC();
+}
+EXPORT_SYMBOL(disable_gptimers_sync);
+
 void set_gptimer_pulse_hi(int timer_id)
 {
 	tassert(timer_id < MAX_BLACKFIN_GPTIMERS);
diff --git a/arch/blackfin/kernel/ipipe.c b/arch/blackfin/kernel/ipipe.c
index a5de8d4..5fc4248 100644
--- a/arch/blackfin/kernel/ipipe.c
+++ b/arch/blackfin/kernel/ipipe.c
@@ -167,7 +167,7 @@
 void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-	int prio = desc->ic_prio;
+	int prio = __ipipe_get_irq_priority(irq);
 
 	desc->depth = 0;
 	if (ipd != &ipipe_root &&
@@ -178,8 +178,7 @@
 
 void __ipipe_disable_irqdesc(struct ipipe_domain *ipd, unsigned irq)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-	int prio = desc->ic_prio;
+	int prio = __ipipe_get_irq_priority(irq);
 
 	if (ipd != &ipipe_root &&
 	    atomic_dec_and_test(&__ipipe_irq_lvdepth[prio]))
@@ -310,12 +309,16 @@
 
 asmlinkage void __ipipe_sync_root(void)
 {
+	void (*irq_tail_hook)(void) = (void (*)(void))__ipipe_irq_tail_hook;
 	unsigned long flags;
 
 	BUG_ON(irqs_disabled());
 
 	local_irq_save_hw(flags);
 
+	if (irq_tail_hook)
+		irq_tail_hook();
+
 	clear_thread_flag(TIF_IRQ_SYNC);
 
 	if (ipipe_root_cpudom_var(irqpend_himask) != 0)
diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c
index 401bd32..6e31e93 100644
--- a/arch/blackfin/kernel/irqchip.c
+++ b/arch/blackfin/kernel/irqchip.c
@@ -59,12 +59,14 @@
 	.unmask = dummy_mask_unmask_irq,
 };
 
+static int bad_stats;
 static struct irq_desc bad_irq_desc = {
 	.status = IRQ_DISABLED,
 	.chip = &bad_chip,
 	.handle_irq = handle_bad_irq,
 	.depth = 1,
 	.lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
+	.kstat_irqs = &bad_stats,
 #ifdef CONFIG_SMP
 	.affinity = CPU_MASK_ALL
 #endif
diff --git a/arch/blackfin/kernel/kgdb.c b/arch/blackfin/kernel/kgdb.c
index b163f6d..da28f79 100644
--- a/arch/blackfin/kernel/kgdb.c
+++ b/arch/blackfin/kernel/kgdb.c
@@ -466,7 +466,7 @@
 	int cpu = raw_smp_processor_id();
 
 	if (size < 0)
-		return EFAULT;
+		return -EFAULT;
 	if (addr >= 0x1000 && (addr + size) <= physical_mem_end)
 		return 0;
 	if (addr >= SYSMMR_BASE)
@@ -498,7 +498,7 @@
 	if (IN_MEM(addr, size, L2_START, L2_LENGTH))
 		return 0;
 
-	return EFAULT;
+	return -EFAULT;
 }
 
 /*
@@ -508,14 +508,15 @@
 int kgdb_mem2hex(char *mem, char *buf, int count)
 {
 	char *tmp;
-	int err = 0;
+	int err;
 	unsigned char *pch;
 	unsigned short mmr16;
 	unsigned long mmr32;
 	int cpu = raw_smp_processor_id();
 
-	if (validate_memory_access_address((unsigned long)mem, count))
-		return EFAULT;
+	err = validate_memory_access_address((unsigned long)mem, count);
+	if (err)
+		return err;
 
 	/*
 	 * We use the upper half of buf as an intermediate buffer for the
@@ -533,7 +534,7 @@
 				*tmp++ = *pch++;
 				tmp -= 2;
 			} else
-				err = EFAULT;
+				err = -EFAULT;
 			break;
 		case 4:
 			if ((unsigned int)mem % 4 == 0) {
@@ -545,10 +546,10 @@
 				*tmp++ = *pch++;
 				tmp -= 4;
 			} else
-				err = EFAULT;
+				err = -EFAULT;
 			break;
 		default:
-			err = EFAULT;
+			err = -EFAULT;
 		}
 	} else if ((cpu == 0 && IN_MEM(mem, count, L1_CODE_START, L1_CODE_LENGTH))
 #ifdef CONFIG_SMP
@@ -557,7 +558,7 @@
 		) {
 		/* access L1 instruction SRAM*/
 		if (dma_memcpy(tmp, mem, count) == NULL)
-			err = EFAULT;
+			err = -EFAULT;
 	} else
 		err = probe_kernel_read(tmp, mem, count);
 
@@ -585,24 +586,24 @@
 	char *tmp_new;
 	unsigned short *mmr16;
 	unsigned long *mmr32;
-	int err = 0;
-	int size = 0;
+	int err;
+	int size;
 	int cpu = raw_smp_processor_id();
 
 	tmp_old = tmp_new = buf;
 
-	while (count-- > 0) {
+	for (size = 0; size < count; ++size) {
 		if (*tmp_old == 0x7d)
 			*tmp_new = *(++tmp_old) ^ 0x20;
 		else
 			*tmp_new = *tmp_old;
 		tmp_new++;
 		tmp_old++;
-		size++;
 	}
 
-	if (validate_memory_access_address((unsigned long)mem, size))
-		return EFAULT;
+	err = validate_memory_access_address((unsigned long)mem, size);
+	if (err)
+		return err;
 
 	if ((unsigned int)mem >= SYSMMR_BASE) { /*access MMR registers*/
 		switch (size) {
@@ -611,17 +612,17 @@
 				mmr16 = (unsigned short *)buf;
 				*(unsigned short *)mem = *mmr16;
 			} else
-				return EFAULT;
+				err = -EFAULT;
 			break;
 		case 4:
 			if ((unsigned int)mem % 4 == 0) {
 				mmr32 = (unsigned long *)buf;
 				*(unsigned long *)mem = *mmr32;
 			} else
-				return EFAULT;
+				err = -EFAULT;
 			break;
 		default:
-			return EFAULT;
+			err = -EFAULT;
 		}
 	} else if ((cpu == 0 && IN_MEM(mem, count, L1_CODE_START, L1_CODE_LENGTH))
 #ifdef CONFIG_SMP
@@ -630,7 +631,7 @@
 		) {
 		/* access L1 instruction SRAM */
 		if (dma_memcpy(mem, buf, size) == NULL)
-			err = EFAULT;
+			err = -EFAULT;
 	} else
 		err = probe_kernel_write(mem, buf, size);
 
@@ -648,10 +649,12 @@
 	char *tmp_hex;
 	unsigned short *mmr16;
 	unsigned long *mmr32;
+	int err;
 	int cpu = raw_smp_processor_id();
 
-	if (validate_memory_access_address((unsigned long)mem, count))
-		return EFAULT;
+	err = validate_memory_access_address((unsigned long)mem, count);
+	if (err)
+		return err;
 
 	/*
 	 * We use the upper half of buf as an intermediate buffer for the
@@ -673,17 +676,17 @@
 				mmr16 = (unsigned short *)tmp_raw;
 				*(unsigned short *)mem = *mmr16;
 			} else
-				return EFAULT;
+				err = -EFAULT;
 			break;
 		case 4:
 			if ((unsigned int)mem % 4 == 0) {
 				mmr32 = (unsigned long *)tmp_raw;
 				*(unsigned long *)mem = *mmr32;
 			} else
-				return EFAULT;
+				err = -EFAULT;
 			break;
 		default:
-			return EFAULT;
+			err = -EFAULT;
 		}
 	} else if ((cpu == 0 && IN_MEM(mem, count, L1_CODE_START, L1_CODE_LENGTH))
 #ifdef CONFIG_SMP
@@ -692,10 +695,11 @@
 		) {
 		/* access L1 instruction SRAM */
 		if (dma_memcpy(mem, tmp_raw, count) == NULL)
-			return EFAULT;
+			err = -EFAULT;
 	} else
-		return probe_kernel_write(mem, tmp_raw, count);
-	return 0;
+		err = probe_kernel_write(mem, tmp_raw, count);
+
+	return err;
 }
 
 int kgdb_validate_break_address(unsigned long addr)
@@ -715,7 +719,7 @@
 	if (IN_MEM(addr, BREAK_INSTR_SIZE, L2_START, L2_LENGTH))
 		return 0;
 
-	return EFAULT;
+	return -EFAULT;
 }
 
 int kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr)
diff --git a/arch/blackfin/kernel/module.c b/arch/blackfin/kernel/module.c
index 1bd7f2d..d5aee36 100644
--- a/arch/blackfin/kernel/module.c
+++ b/arch/blackfin/kernel/module.c
@@ -201,8 +201,8 @@
 /*            Arithmetic relocations are handled.                        */
 /*            We do not expect LSETUP to be split and hence is not       */
 /*            handled.                                                   */
-/*            R_byte and R_byte2 are also not handled as the gas         */
-/*            does not generate it.                                      */
+/*            R_BFIN_BYTE and R_BFIN_BYTE2 are also not handled as the   */
+/*            gas does not generate it.                                  */
 /*************************************************************************/
 int
 apply_relocate_add(Elf_Shdr * sechdrs, const char *strtab,
@@ -243,8 +243,8 @@
 #endif
 		switch (ELF32_R_TYPE(rel[i].r_info)) {
 
-		case R_pcrel24:
-		case R_pcrel24_jump_l:
+		case R_BFIN_PCREL24:
+		case R_BFIN_PCREL24_JUMP_L:
 			/* Add the value, subtract its postition */
 			location16 =
 			    (uint16_t *) (sechdrs[sechdrs[relsec].sh_info].
@@ -266,18 +266,18 @@
 			    (*location16 & 0xff00) | (value >> 16 & 0x00ff);
 			*(location16 + 1) = value & 0xffff;
 			break;
-		case R_pcrel12_jump:
-		case R_pcrel12_jump_s:
+		case R_BFIN_PCREL12_JUMP:
+		case R_BFIN_PCREL12_JUMP_S:
 			value -= (uint32_t) location32;
 			value >>= 1;
 			*location16 = (value & 0xfff);
 			break;
-		case R_pcrel10:
+		case R_BFIN_PCREL10:
 			value -= (uint32_t) location32;
 			value >>= 1;
 			*location16 = (value & 0x3ff);
 			break;
-		case R_luimm16:
+		case R_BFIN_LUIMM16:
 			pr_debug("before %x after %x\n", *location16,
 				       (value & 0xffff));
 			tmp = (value & 0xffff);
@@ -286,7 +286,7 @@
 			} else
 				*location16 = tmp;
 			break;
-		case R_huimm16:
+		case R_BFIN_HUIMM16:
 			pr_debug("before %x after %x\n", *location16,
 				       ((value >> 16) & 0xffff));
 			tmp = ((value >> 16) & 0xffff);
@@ -295,10 +295,10 @@
 			} else
 				*location16 = tmp;
 			break;
-		case R_rimm16:
+		case R_BFIN_RIMM16:
 			*location16 = (value & 0xffff);
 			break;
-		case R_byte4_data:
+		case R_BFIN_BYTE4_DATA:
 			pr_debug("before %x after %x\n", *location32, value);
 			*location32 = value;
 			break;
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index e040e03..30d0843 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -322,6 +322,9 @@
 }
 
 #if defined(CONFIG_ACCESS_CHECK)
+#ifdef CONFIG_ACCESS_OK_L1
+__attribute__((l1_text))
+#endif
 /* Return 1 if access to memory range is OK, 0 otherwise */
 int _access_ok(unsigned long addr, unsigned long size)
 {
diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c
index a58687b..80447f9 100644
--- a/arch/blackfin/kernel/setup.c
+++ b/arch/blackfin/kernel/setup.c
@@ -18,9 +18,12 @@
 #include <linux/tty.h>
 #include <linux/pfn.h>
 
+#ifdef CONFIG_MTD_UCLINUX
+#include <linux/mtd/map.h>
 #include <linux/ext2_fs.h>
 #include <linux/cramfs_fs.h>
 #include <linux/romfs_fs.h>
+#endif
 
 #include <asm/cplb.h>
 #include <asm/cacheflush.h>
@@ -45,6 +48,7 @@
 EXPORT_SYMBOL(reserved_mem_dcache_on);
 
 #ifdef CONFIG_MTD_UCLINUX
+extern struct map_info uclinux_ram_map;
 unsigned long memory_mtd_end, memory_mtd_start, mtd_size;
 unsigned long _ebss;
 EXPORT_SYMBOL(memory_mtd_end);
@@ -150,40 +154,45 @@
 	unsigned long l1_data_b_length;
 	unsigned long l2_length;
 
-	blackfin_dma_early_init();
-
-	l1_code_length = _etext_l1 - _stext_l1;
-	if (l1_code_length > L1_CODE_LENGTH)
-		panic("L1 Instruction SRAM Overflow\n");
-	/* cannot complain as printk is not available as yet.
-	 * But we can continue booting and complain later!
+	/*
+	 * due to the ALIGN(4) in the arch/blackfin/kernel/vmlinux.lds.S
+	 * we know that everything about l1 text/data is nice and aligned,
+	 * so copy by 4 byte chunks, and don't worry about overlapping
+	 * src/dest.
+	 *
+	 * We can't use the dma_memcpy functions, since they can call
+	 * scheduler functions which might be in L1 :( and core writes
+	 * into L1 instruction cause bad access errors, so we are stuck,
+	 * we are required to use DMA, but can't use the common dma
+	 * functions. We can't use memcpy either - since that might be
+	 * going to be in the relocated L1
 	 */
 
-	/* Copy _stext_l1 to _etext_l1 to L1 instruction SRAM */
-	dma_memcpy(_stext_l1, _l1_lma_start, l1_code_length);
+	blackfin_dma_early_init();
 
+	/* if necessary, copy _stext_l1 to _etext_l1 to L1 instruction SRAM */
+	l1_code_length = _etext_l1 - _stext_l1;
+	if (l1_code_length)
+		early_dma_memcpy(_stext_l1, _l1_lma_start, l1_code_length);
+
+	/* if necessary, copy _sdata_l1 to _sbss_l1 to L1 data bank A SRAM */
 	l1_data_a_length = _sbss_l1 - _sdata_l1;
-	if (l1_data_a_length > L1_DATA_A_LENGTH)
-		panic("L1 Data SRAM Bank A Overflow\n");
+	if (l1_data_a_length)
+		early_dma_memcpy(_sdata_l1, _l1_lma_start + l1_code_length, l1_data_a_length);
 
-	/* Copy _sdata_l1 to _sbss_l1 to L1 data bank A SRAM */
-	dma_memcpy(_sdata_l1, _l1_lma_start + l1_code_length, l1_data_a_length);
-
+	/* if necessary, copy _sdata_b_l1 to _sbss_b_l1 to L1 data bank B SRAM */
 	l1_data_b_length = _sbss_b_l1 - _sdata_b_l1;
-	if (l1_data_b_length > L1_DATA_B_LENGTH)
-		panic("L1 Data SRAM Bank B Overflow\n");
-
-	/* Copy _sdata_b_l1 to _sbss_b_l1 to L1 data bank B SRAM */
-	dma_memcpy(_sdata_b_l1, _l1_lma_start + l1_code_length +
+	if (l1_data_b_length)
+		early_dma_memcpy(_sdata_b_l1, _l1_lma_start + l1_code_length +
 			l1_data_a_length, l1_data_b_length);
 
+	early_dma_memcpy_done();
+
+	/* if necessary, copy _stext_l2 to _edata_l2 to L2 SRAM */
 	if (L2_LENGTH != 0) {
 		l2_length = _sbss_l2 - _stext_l2;
-		if (l2_length > L2_LENGTH)
-			panic("L2 SRAM Overflow\n");
-
-		/* Copy _stext_l2 to _edata_l2 to L2 SRAM */
-		dma_memcpy(_stext_l2, _l2_lma_start, l2_length);
+		if (l2_length)
+			memcpy(_stext_l2, _l2_lma_start, l2_length);
 	}
 }
 
@@ -472,7 +481,7 @@
 
 	if (DMA_UNCACHED_REGION > (_ramend - _ramstart)) {
 		console_init();
-		panic("DMA region exceeds memory limit: %lu.\n",
+		panic("DMA region exceeds memory limit: %lu.",
 			_ramend - _ramstart);
 	}
 	memory_end = _ramend - DMA_UNCACHED_REGION;
@@ -526,14 +535,13 @@
 
 	if (mtd_size == 0) {
 		console_init();
-		panic("Don't boot kernel without rootfs attached.\n");
+		panic("Don't boot kernel without rootfs attached.");
 	}
 
 	/* Relocate MTD image to the top of memory after the uncached memory area */
-	dma_memcpy((char *)memory_end, _end, mtd_size);
-
-	memory_mtd_start = memory_end;
-	_ebss = memory_mtd_start;	/* define _ebss for compatible */
+	uclinux_ram_map.phys = memory_mtd_start = memory_end;
+	uclinux_ram_map.size = mtd_size;
+	dma_memcpy((void *)uclinux_ram_map.phys, _end, uclinux_ram_map.size);
 #endif				/* CONFIG_MTD_UCLINUX */
 
 #if (defined(CONFIG_BFIN_ICACHE) && ANOMALY_05000263)
@@ -796,10 +804,8 @@
 	cclk = get_cclk();
 	sclk = get_sclk();
 
-#if !defined(CONFIG_BFIN_KERNEL_CLOCK)
-	if (ANOMALY_05000273 && cclk == sclk)
-		panic("ANOMALY 05000273, SCLK can not be same as CCLK");
-#endif
+	if ((ANOMALY_05000273 || ANOMALY_05000274) && (cclk >> 1) < sclk)
+		panic("ANOMALY 05000273 or 05000274: CCLK must be >= 2*SCLK");
 
 #ifdef BF561_FAMILY
 	if (ANOMALY_05000266) {
@@ -881,7 +887,7 @@
 				printk(KERN_ERR "Warning: Compiled for Rev %d, but running on Rev %d\n",
 				       bfin_compiled_revid(), bfin_revid());
 				if (bfin_compiled_revid() > bfin_revid())
-					panic("Error: you are missing anomaly workarounds for this rev\n");
+					panic("Error: you are missing anomaly workarounds for this rev");
 			}
 		}
 		if (bfin_revid() < CONFIG_BF_REV_MIN || bfin_revid() > CONFIG_BF_REV_MAX)
@@ -891,16 +897,13 @@
 
 	/* We can't run on BF548-0.1 due to ANOMALY 05000448 */
 	if (bfin_cpuid() == 0x27de && bfin_revid() == 1)
-		panic("You can't run on this processor due to 05000448\n");
+		panic("You can't run on this processor due to 05000448");
 
 	printk(KERN_INFO "Blackfin Linux support by http://blackfin.uclinux.org/\n");
 
 	printk(KERN_INFO "Processor Speed: %lu MHz core clock and %lu MHz System Clock\n",
 	       cclk / 1000000, sclk / 1000000);
 
-	if (ANOMALY_05000273 && (cclk >> 1) <= sclk)
-		printk("\n\n\nANOMALY_05000273: CCLK must be >= 2*SCLK !!!\n\n\n");
-
 	setup_bootmem_allocator();
 
 	paging_init();
diff --git a/arch/blackfin/kernel/sys_bfin.c b/arch/blackfin/kernel/sys_bfin.c
index fce49d7..a8f1329 100644
--- a/arch/blackfin/kernel/sys_bfin.c
+++ b/arch/blackfin/kernel/sys_bfin.c
@@ -78,11 +78,6 @@
 	return do_mmap2(addr, len, prot, flags, fd, pgoff);
 }
 
-asmlinkage int sys_getpagesize(void)
-{
-	return PAGE_SIZE;
-}
-
 asmlinkage void *sys_sram_alloc(size_t size, unsigned long flags)
 {
 	return sram_alloc_with_lsl(size, flags);
diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c
index 2764612..0791eba 100644
--- a/arch/blackfin/kernel/time-ts.c
+++ b/arch/blackfin/kernel/time-ts.c
@@ -20,8 +20,9 @@
 
 #include <asm/blackfin.h>
 #include <asm/time.h>
+#include <asm/gptimers.h>
 
-#ifdef CONFIG_CYCLES_CLOCKSOURCE
+#if defined(CONFIG_CYCLES_CLOCKSOURCE)
 
 /* Accelerators for sched_clock()
  * convert from cycles(64bits) => nanoseconds (64bits)
@@ -58,15 +59,15 @@
 	return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
 }
 
-static cycle_t read_cycles(struct clocksource *cs)
+static cycle_t bfin_read_cycles(struct clocksource *cs)
 {
 	return __bfin_cycles_off + (get_cycles() << __bfin_cycles_mod);
 }
 
-static struct clocksource clocksource_bfin = {
-	.name		= "bfin_cycles",
+static struct clocksource bfin_cs_cycles = {
+	.name		= "bfin_cs_cycles",
 	.rating		= 350,
-	.read		= read_cycles,
+	.read		= bfin_read_cycles,
 	.mask		= CLOCKSOURCE_MASK(64),
 	.shift		= 22,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
@@ -74,53 +75,198 @@
 
 unsigned long long sched_clock(void)
 {
-	return cycles_2_ns(read_cycles(&clocksource_bfin));
+	return cycles_2_ns(bfin_read_cycles(&bfin_cs_cycles));
 }
 
-static int __init bfin_clocksource_init(void)
+static int __init bfin_cs_cycles_init(void)
 {
 	set_cyc2ns_scale(get_cclk() / 1000);
 
-	clocksource_bfin.mult = clocksource_hz2mult(get_cclk(), clocksource_bfin.shift);
+	bfin_cs_cycles.mult = \
+		clocksource_hz2mult(get_cclk(), bfin_cs_cycles.shift);
 
-	if (clocksource_register(&clocksource_bfin))
+	if (clocksource_register(&bfin_cs_cycles))
 		panic("failed to register clocksource");
 
 	return 0;
 }
-
 #else
-# define bfin_clocksource_init()
+# define bfin_cs_cycles_init()
 #endif
 
+#ifdef CONFIG_GPTMR0_CLOCKSOURCE
+
+void __init setup_gptimer0(void)
+{
+	disable_gptimers(TIMER0bit);
+
+	set_gptimer_config(TIMER0_id, \
+		TIMER_OUT_DIS | TIMER_PERIOD_CNT | TIMER_MODE_PWM);
+	set_gptimer_period(TIMER0_id, -1);
+	set_gptimer_pwidth(TIMER0_id, -2);
+	SSYNC();
+	enable_gptimers(TIMER0bit);
+}
+
+static cycle_t bfin_read_gptimer0(void)
+{
+	return bfin_read_TIMER0_COUNTER();
+}
+
+static struct clocksource bfin_cs_gptimer0 = {
+	.name		= "bfin_cs_gptimer0",
+	.rating		= 400,
+	.read		= bfin_read_gptimer0,
+	.mask		= CLOCKSOURCE_MASK(32),
+	.shift		= 22,
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static int __init bfin_cs_gptimer0_init(void)
+{
+	setup_gptimer0();
+
+	bfin_cs_gptimer0.mult = \
+		clocksource_hz2mult(get_sclk(), bfin_cs_gptimer0.shift);
+
+	if (clocksource_register(&bfin_cs_gptimer0))
+		panic("failed to register clocksource");
+
+	return 0;
+}
+#else
+# define bfin_cs_gptimer0_init()
+#endif
+
+#ifdef CONFIG_CORE_TIMER_IRQ_L1
+__attribute__((l1_text))
+#endif
+irqreturn_t timer_interrupt(int irq, void *dev_id);
+
+static int bfin_timer_set_next_event(unsigned long, \
+		struct clock_event_device *);
+
+static void bfin_timer_set_mode(enum clock_event_mode, \
+		struct clock_event_device *);
+
+static struct clock_event_device clockevent_bfin = {
+#if defined(CONFIG_TICKSOURCE_GPTMR0)
+	.name		= "bfin_gptimer0",
+	.rating		= 300,
+	.irq		= IRQ_TIMER0,
+#else
+	.name		= "bfin_core_timer",
+	.rating		= 350,
+	.irq		= IRQ_CORETMR,
+#endif
+	.shift		= 32,
+	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+	.set_next_event = bfin_timer_set_next_event,
+	.set_mode	= bfin_timer_set_mode,
+};
+
+static struct irqaction bfin_timer_irq = {
+#if defined(CONFIG_TICKSOURCE_GPTMR0)
+	.name		= "Blackfin GPTimer0",
+#else
+	.name		= "Blackfin CoreTimer",
+#endif
+	.flags		= IRQF_DISABLED | IRQF_TIMER | \
+			  IRQF_IRQPOLL | IRQF_PERCPU,
+	.handler	= timer_interrupt,
+	.dev_id		= &clockevent_bfin,
+};
+
+#if defined(CONFIG_TICKSOURCE_GPTMR0)
 static int bfin_timer_set_next_event(unsigned long cycles,
                                      struct clock_event_device *evt)
 {
-	bfin_write_TCOUNT(cycles);
-	CSYNC();
+	disable_gptimers(TIMER0bit);
+
+	/* it starts counting three SCLK cycles after the TIMENx bit is set */
+	set_gptimer_pwidth(TIMER0_id, cycles - 3);
+	enable_gptimers(TIMER0bit);
 	return 0;
 }
 
 static void bfin_timer_set_mode(enum clock_event_mode mode,
-                                struct clock_event_device *evt)
+				struct clock_event_device *evt)
+{
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC: {
+		set_gptimer_config(TIMER0_id, \
+			TIMER_OUT_DIS | TIMER_IRQ_ENA | \
+			TIMER_PERIOD_CNT | TIMER_MODE_PWM);
+		set_gptimer_period(TIMER0_id, get_sclk() / HZ);
+		set_gptimer_pwidth(TIMER0_id, get_sclk() / HZ - 1);
+		enable_gptimers(TIMER0bit);
+		break;
+	}
+	case CLOCK_EVT_MODE_ONESHOT:
+		disable_gptimers(TIMER0bit);
+		set_gptimer_config(TIMER0_id, \
+			TIMER_OUT_DIS | TIMER_IRQ_ENA | TIMER_MODE_PWM);
+		set_gptimer_period(TIMER0_id, 0);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		disable_gptimers(TIMER0bit);
+		break;
+	case CLOCK_EVT_MODE_RESUME:
+		break;
+	}
+}
+
+static void bfin_timer_ack(void)
+{
+	set_gptimer_status(TIMER_GROUP1, TIMER_STATUS_TIMIL0);
+}
+
+static void __init bfin_timer_init(void)
+{
+	disable_gptimers(TIMER0bit);
+}
+
+static unsigned long  __init bfin_clockevent_check(void)
+{
+	setup_irq(IRQ_TIMER0, &bfin_timer_irq);
+	return get_sclk();
+}
+
+#else /* CONFIG_TICKSOURCE_CORETMR */
+
+static int bfin_timer_set_next_event(unsigned long cycles,
+				struct clock_event_device *evt)
+{
+	bfin_write_TCNTL(TMPWR);
+	CSYNC();
+	bfin_write_TCOUNT(cycles);
+	CSYNC();
+	bfin_write_TCNTL(TMPWR | TMREN);
+	return 0;
+}
+
+static void bfin_timer_set_mode(enum clock_event_mode mode,
+				struct clock_event_device *evt)
 {
 	switch (mode) {
 	case CLOCK_EVT_MODE_PERIODIC: {
 		unsigned long tcount = ((get_cclk() / (HZ * TIME_SCALE)) - 1);
 		bfin_write_TCNTL(TMPWR);
-		bfin_write_TSCALE(TIME_SCALE - 1);
 		CSYNC();
+		bfin_write_TSCALE(TIME_SCALE - 1);
 		bfin_write_TPERIOD(tcount);
 		bfin_write_TCOUNT(tcount);
-		bfin_write_TCNTL(TMPWR | TMREN | TAUTORLD);
 		CSYNC();
+		bfin_write_TCNTL(TMPWR | TMREN | TAUTORLD);
 		break;
 	}
 	case CLOCK_EVT_MODE_ONESHOT:
-		bfin_write_TSCALE(TIME_SCALE - 1);
-		bfin_write_TCOUNT(0);
-		bfin_write_TCNTL(TMPWR | TMREN);
+		bfin_write_TCNTL(TMPWR);
 		CSYNC();
+		bfin_write_TSCALE(TIME_SCALE - 1);
+		bfin_write_TPERIOD(0);
+		bfin_write_TCOUNT(0);
 		break;
 	case CLOCK_EVT_MODE_UNUSED:
 	case CLOCK_EVT_MODE_SHUTDOWN:
@@ -132,6 +278,10 @@
 	}
 }
 
+static void bfin_timer_ack(void)
+{
+}
+
 static void __init bfin_timer_init(void)
 {
 	/* power up the timer, but don't enable it just yet */
@@ -145,38 +295,32 @@
 	bfin_write_TPERIOD(0);
 	bfin_write_TCOUNT(0);
 
-	/* now enable the timer */
 	CSYNC();
 }
 
+static unsigned long  __init bfin_clockevent_check(void)
+{
+	setup_irq(IRQ_CORETMR, &bfin_timer_irq);
+	return get_cclk() / TIME_SCALE;
+}
+
+void __init setup_core_timer(void)
+{
+	bfin_timer_init();
+	bfin_timer_set_mode(CLOCK_EVT_MODE_PERIODIC, NULL);
+}
+#endif /* CONFIG_TICKSOURCE_GPTMR0 */
+
 /*
  * timer_interrupt() needs to keep up the real-time clock,
  * as well as call the "do_timer()" routine every clocktick
  */
-#ifdef CONFIG_CORE_TIMER_IRQ_L1
-__attribute__((l1_text))
-#endif
-irqreturn_t timer_interrupt(int irq, void *dev_id);
-
-static struct clock_event_device clockevent_bfin = {
-	.name		= "bfin_core_timer",
-	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
-	.shift		= 32,
-	.set_next_event = bfin_timer_set_next_event,
-	.set_mode	= bfin_timer_set_mode,
-};
-
-static struct irqaction bfin_timer_irq = {
-	.name		= "Blackfin Core Timer",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
-	.handler	= timer_interrupt,
-	.dev_id		= &clockevent_bfin,
-};
-
 irqreturn_t timer_interrupt(int irq, void *dev_id)
 {
 	struct clock_event_device *evt = dev_id;
+	smp_mb();
 	evt->event_handler(evt);
+	bfin_timer_ack();
 	return IRQ_HANDLED;
 }
 
@@ -184,9 +328,8 @@
 {
 	unsigned long timer_clk;
 
-	timer_clk = get_cclk() / TIME_SCALE;
+	timer_clk = bfin_clockevent_check();
 
-	setup_irq(IRQ_CORETMR, &bfin_timer_irq);
 	bfin_timer_init();
 
 	clockevent_bfin.mult = div_sc(timer_clk, NSEC_PER_SEC, clockevent_bfin.shift);
@@ -218,6 +361,7 @@
 	xtime.tv_nsec = 0;
 	set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec);
 
-	bfin_clocksource_init();
+	bfin_cs_cycles_init();
+	bfin_cs_gptimer0_init();
 	bfin_clockevent_init();
 }
diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c
index 1bbacfb..adb54aa 100644
--- a/arch/blackfin/kernel/time.c
+++ b/arch/blackfin/kernel/time.c
@@ -24,14 +24,10 @@
 
 static struct irqaction bfin_timer_irq = {
 	.name = "Blackfin Timer Tick",
-#ifdef CONFIG_IRQ_PER_CPU
-	.flags = IRQF_DISABLED | IRQF_PERCPU,
-#else
 	.flags = IRQF_DISABLED
-#endif
 };
 
-#if defined(CONFIG_TICK_SOURCE_SYSTMR0) || defined(CONFIG_IPIPE)
+#if defined(CONFIG_IPIPE)
 void __init setup_system_timer0(void)
 {
 	/* Power down the core timer, just to play safe. */
@@ -74,7 +70,7 @@
 static void __init
 time_sched_init(irqreturn_t(*timer_routine) (int, void *))
 {
-#if defined(CONFIG_TICK_SOURCE_SYSTMR0) || defined(CONFIG_IPIPE)
+#if defined(CONFIG_IPIPE)
 	setup_system_timer0();
 	bfin_timer_irq.handler = timer_routine;
 	setup_irq(IRQ_TIMER0, &bfin_timer_irq);
@@ -94,7 +90,7 @@
 	unsigned long offset;
 	unsigned long clocks_per_jiffy;
 
-#if defined(CONFIG_TICK_SOURCE_SYSTMR0) || defined(CONFIG_IPIPE)
+#if defined(CONFIG_IPIPE)
 	clocks_per_jiffy = bfin_read_TIMER0_PERIOD();
 	offset = bfin_read_TIMER0_COUNTER() / \
 		(((clocks_per_jiffy + 1) * HZ) / USEC_PER_SEC);
@@ -133,36 +129,25 @@
 	static long last_rtc_update;
 
 	write_seqlock(&xtime_lock);
-#if defined(CONFIG_TICK_SOURCE_SYSTMR0) && !defined(CONFIG_IPIPE)
-	/*
-	 * TIMIL0 is latched in __ipipe_grab_irq() when the I-Pipe is
-	 * enabled.
-	 */
-	if (get_gptimer_status(0) & TIMER_STATUS_TIMIL0) {
-#endif
-		do_timer(1);
+	do_timer(1);
 
-		/*
-		 * If we have an externally synchronized Linux clock, then update
-		 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
-		 * called as close as possible to 500 ms before the new second starts.
-		 */
-		if (ntp_synced() &&
-		    xtime.tv_sec > last_rtc_update + 660 &&
-		    (xtime.tv_nsec / NSEC_PER_USEC) >=
-		    500000 - ((unsigned)TICK_SIZE) / 2
-		    && (xtime.tv_nsec / NSEC_PER_USEC) <=
-		    500000 + ((unsigned)TICK_SIZE) / 2) {
-			if (set_rtc_mmss(xtime.tv_sec) == 0)
-				last_rtc_update = xtime.tv_sec;
-			else
-				/* Do it again in 60s. */
-				last_rtc_update = xtime.tv_sec - 600;
-		}
-#if defined(CONFIG_TICK_SOURCE_SYSTMR0) && !defined(CONFIG_IPIPE)
-		set_gptimer_status(0, TIMER_STATUS_TIMIL0);
+	/*
+	 * If we have an externally synchronized Linux clock, then update
+	 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
+	 * called as close as possible to 500 ms before the new second starts.
+	 */
+	if (ntp_synced() &&
+	    xtime.tv_sec > last_rtc_update + 660 &&
+	    (xtime.tv_nsec / NSEC_PER_USEC) >=
+	    500000 - ((unsigned)TICK_SIZE) / 2
+	    && (xtime.tv_nsec / NSEC_PER_USEC) <=
+	    500000 + ((unsigned)TICK_SIZE) / 2) {
+		if (set_rtc_mmss(xtime.tv_sec) == 0)
+			last_rtc_update = xtime.tv_sec;
+		else
+			/* Do it again in 60s. */
+			last_rtc_update = xtime.tv_sec - 600;
 	}
-#endif
 	write_sequnlock(&xtime_lock);
 
 #ifdef CONFIG_IPIPE
diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index ffe7fb5..aa76dfb 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -68,6 +68,13 @@
 	({ if (0) printk(fmt, ##arg); 0; })
 #endif
 
+#if defined(CONFIG_DEBUG_MMRS) || defined(CONFIG_DEBUG_MMRS_MODULE)
+u32 last_seqstat;
+#ifdef CONFIG_DEBUG_MMRS_MODULE
+EXPORT_SYMBOL(last_seqstat);
+#endif
+#endif
+
 /* Initiate the event table handler */
 void __init trap_init(void)
 {
@@ -79,7 +86,6 @@
 static void decode_address(char *buf, unsigned long address)
 {
 #ifdef CONFIG_DEBUG_VERBOSE
-	struct vm_list_struct *vml;
 	struct task_struct *p;
 	struct mm_struct *mm;
 	unsigned long flags, offset;
@@ -196,6 +202,11 @@
 
 asmlinkage void double_fault_c(struct pt_regs *fp)
 {
+#ifdef CONFIG_DEBUG_BFIN_HWTRACE_ON
+	int j;
+	trace_buffer_save(j);
+#endif
+
 	console_verbose();
 	oops_in_progress = 1;
 #ifdef CONFIG_DEBUG_VERBOSE
@@ -220,9 +231,10 @@
 		dump_bfin_process(fp);
 		dump_bfin_mem(fp);
 		show_regs(fp);
+		dump_bfin_trace_buffer();
 	}
 #endif
-	panic("Double Fault - unrecoverable event\n");
+	panic("Double Fault - unrecoverable event");
 
 }
 
@@ -239,6 +251,9 @@
 	unsigned long trapnr = fp->seqstat & SEQSTAT_EXCAUSE;
 
 	trace_buffer_save(j);
+#if defined(CONFIG_DEBUG_MMRS) || defined(CONFIG_DEBUG_MMRS_MODULE)
+	last_seqstat = (u32)fp->seqstat;
+#endif
 
 	/* Important - be very careful dereferncing pointers - will lead to
 	 * double faults if the stack has become corrupt
@@ -588,6 +603,9 @@
 		force_sig_info(sig, &info, current);
 	}
 
+	if (ANOMALY_05000461 && trapnr == VEC_HWERR && !access_ok(VERIFY_READ, fp->pc, 8))
+		fp->pc = SAFE_USER_INSTRUCTION;
+
 	trace_buffer_restore(j);
 	return;
 }
@@ -832,6 +850,11 @@
 	decode_address(buf, (unsigned int)stack);
 	printk(KERN_NOTICE " SP: [0x%p] %s\n", stack, buf);
 
+	if (!access_ok(VERIFY_READ, stack, (unsigned int)endstack - (unsigned int)stack)) {
+		printk(KERN_NOTICE "Invalid stack pointer\n");
+		return;
+	}
+
 	/* First thing is to look for a frame pointer */
 	for (addr = (unsigned int *)((unsigned int)stack & ~0xF); addr < endstack; addr++) {
 		if (*addr & 0x1)
@@ -1066,6 +1089,29 @@
 	unsigned int cpu = smp_processor_id();
 	unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic();
 
+	verbose_printk(KERN_NOTICE "\n");
+	if (CPUID != bfin_cpuid())
+		verbose_printk(KERN_NOTICE "Compiled for cpu family 0x%04x (Rev %d), "
+			"but running on:0x%04x (Rev %d)\n",
+			CPUID, bfin_compiled_revid(), bfin_cpuid(), bfin_revid());
+
+	verbose_printk(KERN_NOTICE "ADSP-%s-0.%d",
+		CPU, bfin_compiled_revid());
+
+	if (bfin_compiled_revid() !=  bfin_revid())
+		verbose_printk("(Detected 0.%d)", bfin_revid());
+
+	verbose_printk(" %lu(MHz CCLK) %lu(MHz SCLK) (%s)\n",
+		get_cclk()/1000000, get_sclk()/1000000,
+#ifdef CONFIG_MPU
+		"mpu on"
+#else
+		"mpu off"
+#endif
+		);
+
+	verbose_printk(KERN_NOTICE "%s", linux_banner);
+
 	verbose_printk(KERN_NOTICE "\n" KERN_NOTICE "SEQUENCER STATUS:\t\t%s\n", print_tainted());
 	verbose_printk(KERN_NOTICE " SEQSTAT: %08lx  IPEND: %04lx  SYSCFG: %04lx\n",
 		(long)fp->seqstat, fp->ipend, fp->syscfg);
@@ -1246,5 +1292,5 @@
 	dump_bfin_mem(fp);
 	show_regs(fp);
 	dump_stack();
-	panic("Unrecoverable event\n");
+	panic("Unrecoverable event");
 }
diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index 27952ae..8b67167 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -50,7 +50,9 @@
 		_text = .;
 		__stext = .;
 		TEXT_TEXT
+#ifndef CONFIG_SCHEDULE_L1
 		SCHED_TEXT
+#endif
 		LOCK_TEXT
 		KPROBES_TEXT
 		*(.text.*)
@@ -180,6 +182,9 @@
 		. = ALIGN(4);
 		__stext_l1 = .;
 		*(.l1.text)
+#ifdef CONFIG_SCHEDULE_L1
+		SCHED_TEXT
+#endif
 		. = ALIGN(4);
 		__etext_l1 = .;
 	}
diff --git a/arch/blackfin/mach-bf518/Kconfig b/arch/blackfin/mach-bf518/Kconfig
index f397ede..4c76fef 100644
--- a/arch/blackfin/mach-bf518/Kconfig
+++ b/arch/blackfin/mach-bf518/Kconfig
@@ -156,6 +156,7 @@
 	default 11
 config IRQ_TIMER0
 	int "IRQ_TIMER0"
+	default 7 if TICKSOURCE_GPTMR0
 	default 8
 config IRQ_TIMER1
 	int "IRQ_TIMER1"
diff --git a/arch/blackfin/mach-bf518/boards/ezbrd.c b/arch/blackfin/mach-bf518/boards/ezbrd.c
index 41f2eac..62bba09 100644
--- a/arch/blackfin/mach-bf518/boards/ezbrd.c
+++ b/arch/blackfin/mach-bf518/boards/ezbrd.c
@@ -82,7 +82,11 @@
 
 static struct resource ezbrd_flash_resource = {
 	.start = 0x20000000,
+#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+	.end   = 0x202fffff,
+#else
 	.end   = 0x203fffff,
+#endif
 	.flags = IORESOURCE_MEM,
 };
 
@@ -162,8 +166,8 @@
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -249,8 +253,8 @@
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
@@ -514,7 +518,7 @@
 #endif
 
 static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
-#if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
+#if defined(CONFIG_BFIN_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
 	{
 		I2C_BOARD_INFO("pcf8574_lcd", 0x22),
 	},
@@ -678,6 +682,11 @@
 				ARRAY_SIZE(bfin_i2c_board_info));
 	platform_add_devices(stamp_devices, ARRAY_SIZE(stamp_devices));
 	spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info));
+	/* setup BF518-EZBRD GPIO pin PG11 to AMS2, PG15 to AMS3. */
+	peripheral_request(P_AMS2, "ParaFlash");
+#if !defined(CONFIG_SPI_BFIN) && !defined(CONFIG_SPI_BFIN_MODULE)
+	peripheral_request(P_AMS3, "ParaFlash");
+#endif
 	return 0;
 }
 
diff --git a/arch/blackfin/mach-bf518/include/mach/anomaly.h b/arch/blackfin/mach-bf518/include/mach/anomaly.h
index c847bb1..b69bd9a 100644
--- a/arch/blackfin/mach-bf518/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf518/include/mach/anomaly.h
@@ -6,14 +6,19 @@
  * Licensed under the GPL-2 or later.
  */
 
-/* This file shoule be up to date with:
+/* This file should be up to date with:
  *  - Revision B, 02/03/2009; ADSP-BF512/BF514/BF516/BF518 Blackfin Processor Anomaly List
  */
 
+/* We plan on not supporting 0.0 silicon, but 0.1 isn't out yet - sorry */
+#if __SILICON_REVISION__ < 0
+# error will not work on BF518 silicon version
+#endif
+
 #ifndef _MACH_ANOMALY_H_
 #define _MACH_ANOMALY_H_
 
-/* Multi-Issue Instruction with dsp32shiftimm in slot1 and P-reg Store in slot2 Not Supported */
+/* Multi-issue instruction with dsp32shiftimm in slot1 and P-reg store in slot 2 not supported */
 #define ANOMALY_05000074 (1)
 /* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */
 #define ANOMALY_05000122 (1)
@@ -47,7 +52,7 @@
 #define ANOMALY_05000435 (1)
 /* PORTx_DRIVE and PORTx_HYSTERESIS Registers Read Back Incorrect Values */
 #define ANOMALY_05000438 (1)
-/* Preboot Cannot be Used to Program the PLL_DIV Register */
+/* Preboot Cannot be Used to Alter the PLL_DIV Register */
 #define ANOMALY_05000439 (1)
 /* bfrom_SysControl() Cannot be Used to Write the PLL_DIV Register */
 #define ANOMALY_05000440 (1)
@@ -61,32 +66,56 @@
 #define ANOMALY_05000453 (1)
 /* PPI_FS3 is Driven One Half Cycle Later Than PPI Data */
 #define ANOMALY_05000455 (1)
+/* False Hardware Error when RETI points to invalid memory */
+#define ANOMALY_05000461 (1)
 
 /* Anomalies that don't exist on this proc */
+#define ANOMALY_05000099 (0)
+#define ANOMALY_05000119 (0)
+#define ANOMALY_05000120 (0)
 #define ANOMALY_05000125 (0)
+#define ANOMALY_05000149 (0)
 #define ANOMALY_05000158 (0)
+#define ANOMALY_05000171 (0)
+#define ANOMALY_05000179 (0)
 #define ANOMALY_05000183 (0)
 #define ANOMALY_05000198 (0)
+#define ANOMALY_05000215 (0)
+#define ANOMALY_05000220 (0)
+#define ANOMALY_05000227 (0)
 #define ANOMALY_05000230 (0)
+#define ANOMALY_05000231 (0)
+#define ANOMALY_05000233 (0)
+#define ANOMALY_05000242 (0)
 #define ANOMALY_05000244 (0)
+#define ANOMALY_05000248 (0)
+#define ANOMALY_05000250 (0)
 #define ANOMALY_05000261 (0)
 #define ANOMALY_05000263 (0)
 #define ANOMALY_05000266 (0)
 #define ANOMALY_05000273 (0)
+#define ANOMALY_05000274 (0)
 #define ANOMALY_05000278 (0)
 #define ANOMALY_05000285 (0)
+#define ANOMALY_05000287 (0)
+#define ANOMALY_05000301 (0)
 #define ANOMALY_05000305 (0)
 #define ANOMALY_05000307 (0)
 #define ANOMALY_05000311 (0)
 #define ANOMALY_05000312 (0)
 #define ANOMALY_05000323 (0)
 #define ANOMALY_05000353 (0)
+#define ANOMALY_05000362 (1)
 #define ANOMALY_05000363 (0)
 #define ANOMALY_05000380 (0)
 #define ANOMALY_05000386 (0)
+#define ANOMALY_05000389 (0)
+#define ANOMALY_05000400 (0)
 #define ANOMALY_05000412 (0)
 #define ANOMALY_05000432 (0)
 #define ANOMALY_05000447 (0)
 #define ANOMALY_05000448 (0)
+#define ANOMALY_05000456 (0)
+#define ANOMALY_05000450 (0)
 
 #endif
diff --git a/arch/blackfin/mach-bf518/include/mach/portmux.h b/arch/blackfin/mach-bf518/include/mach/portmux.h
index f618b48..a0fc77f 100644
--- a/arch/blackfin/mach-bf518/include/mach/portmux.h
+++ b/arch/blackfin/mach-bf518/include/mach/portmux.h
@@ -185,6 +185,10 @@
 #define P_PTP_PPS		(P_DEFINED | P_IDENT(GPIO_PG12) | P_FUNCT(2))
 #define P_PTP_CLKOUT		(P_DEFINED | P_IDENT(GPIO_PG13) | P_FUNCT(2))
 
-#define P_HWAIT		(P_DEFINED | P_IDENT(GPIO_PG000000000) | P_FUNCT(1))
+/* AMS */
+#define P_AMS2			(P_DEFINED | P_IDENT(GPIO_PG11) | P_FUNCT(1))
+#define P_AMS3			(P_DEFINED | P_IDENT(GPIO_PG15) | P_FUNCT(2))
+
+#define P_HWAIT			(P_DEFINED | P_IDENT(GPIO_PG000000000) | P_FUNCT(1))
 
 #endif				/* _MACH_PORTMUX_H_ */
diff --git a/arch/blackfin/mach-bf527/Kconfig b/arch/blackfin/mach-bf527/Kconfig
index 8438ec6..848ac6f 100644
--- a/arch/blackfin/mach-bf527/Kconfig
+++ b/arch/blackfin/mach-bf527/Kconfig
@@ -170,6 +170,7 @@
 	default 11
 config IRQ_TIMER0
 	int "IRQ_TIMER0"
+	default 7 if TICKSOURCE_GPTMR0
 	default 8
 config IRQ_TIMER1
 	int "IRQ_TIMER1"
diff --git a/arch/blackfin/mach-bf527/boards/cm_bf527.c b/arch/blackfin/mach-bf527/boards/cm_bf527.c
index 48e69ee..6d6f9ef 100644
--- a/arch/blackfin/mach-bf527/boards/cm_bf527.c
+++ b/arch/blackfin/mach-bf527/boards/cm_bf527.c
@@ -463,8 +463,8 @@
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -554,8 +554,8 @@
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
@@ -789,7 +789,7 @@
 #endif
 
 static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
-#if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
+#if defined(CONFIG_BFIN_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
 	{
 		I2C_BOARD_INFO("pcf8574_lcd", 0x22),
 		.type = "pcf8574_lcd",
diff --git a/arch/blackfin/mach-bf527/boards/ezbrd.c b/arch/blackfin/mach-bf527/boards/ezbrd.c
index 7fe480e..1435c5d 100644
--- a/arch/blackfin/mach-bf527/boards/ezbrd.c
+++ b/arch/blackfin/mach-bf527/boards/ezbrd.c
@@ -247,8 +247,8 @@
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -354,8 +354,8 @@
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
@@ -586,7 +586,7 @@
 #endif
 
 static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
-#if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
+#if defined(CONFIG_BFIN_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
 	{
 		I2C_BOARD_INFO("pcf8574_lcd", 0x22),
 	},
diff --git a/arch/blackfin/mach-bf527/boards/ezkit.c b/arch/blackfin/mach-bf527/boards/ezkit.c
index d086411..147edd1 100644
--- a/arch/blackfin/mach-bf527/boards/ezkit.c
+++ b/arch/blackfin/mach-bf527/boards/ezkit.c
@@ -485,8 +485,8 @@
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -509,6 +509,13 @@
 };
 #endif
 
+#if defined(CONFIG_MMC_SPI) || defined(CONFIG_MMC_SPI_MODULE)
+static struct bfin5xx_spi_chip  mmc_spi_chip_info = {
+	.enable_dma = 0,
+	.bits_per_word = 8,
+};
+#endif
+
 #if defined(CONFIG_PBX)
 static struct bfin5xx_spi_chip spi_si3xxx_chip_info = {
 	.ctl_reg	= 0x4, /* send zero */
@@ -593,8 +600,8 @@
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
@@ -624,6 +631,17 @@
 		.controller_data = &ad9960_spi_chip_info,
 	},
 #endif
+#if defined(CONFIG_MMC_SPI) || defined(CONFIG_MMC_SPI_MODULE)
+	{
+		.modalias = "mmc_spi",
+		.max_speed_hz = 20000000,     /* max spi clock (SCK) speed in HZ */
+		.bus_num = 0,
+		.chip_select = 3,
+		.controller_data = &mmc_spi_chip_info,
+		.mode = SPI_MODE_0,
+	},
+#endif
+
 #if defined(CONFIG_PBX)
 	{
 		.modalias = "fxs-spi",
@@ -836,7 +854,7 @@
 #endif
 
 static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
-#if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
+#if defined(CONFIG_BFIN_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
 	{
 		I2C_BOARD_INFO("pcf8574_lcd", 0x22),
 	},
diff --git a/arch/blackfin/mach-bf527/include/mach/anomaly.h b/arch/blackfin/mach-bf527/include/mach/anomaly.h
index df6808d..c84ddea 100644
--- a/arch/blackfin/mach-bf527/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf527/include/mach/anomaly.h
@@ -6,14 +6,19 @@
  * Licensed under the GPL-2 or later.
  */
 
-/* This file shoule be up to date with:
- *  - Revision B, 08/12/2008; ADSP-BF526 Blackfin Processor Anomaly List
- *  - Revision E, 08/18/2008; ADSP-BF527 Blackfin Processor Anomaly List
+/* This file should be up to date with:
+ *  - Revision C, 03/13/2009; ADSP-BF526 Blackfin Processor Anomaly List
+ *  - Revision F, 03/03/2009; ADSP-BF527 Blackfin Processor Anomaly List
  */
 
 #ifndef _MACH_ANOMALY_H_
 #define _MACH_ANOMALY_H_
 
+/* We do not support old silicon - sorry */
+#if __SILICON_REVISION__ < 0
+# error will not work on BF526/BF527 silicon version
+#endif
+
 #if defined(__ADSPBF522__) || defined(__ADSPBF524__) || defined(__ADSPBF526__)
 # define ANOMALY_BF526 1
 #else
@@ -25,158 +30,203 @@
 # define ANOMALY_BF527 0
 #endif
 
-/* Multi-Issue Instruction with dsp32shiftimm in slot1 and P-reg Store in slot2 Not Supported */
+#define _ANOMALY_BF526(rev526) (ANOMALY_BF526 && __SILICON_REVISION__ rev526)
+#define _ANOMALY_BF527(rev527) (ANOMALY_BF527 && __SILICON_REVISION__ rev527)
+#define _ANOMALY_BF526_BF527(rev526, rev527) (_ANOMALY_BF526(rev526) || _ANOMALY_BF527(rev527))
+
+/* Multi-issue instruction with dsp32shiftimm in slot1 and P-reg store in slot 2 not supported */
 #define ANOMALY_05000074 (1)
 /* DMA_RUN Bit Is Not Valid after a Peripheral Receive Channel DMA Stops */
 #define ANOMALY_05000119 (1)	/* note: brokenness is noted in documentation, not anomaly sheet */
 /* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */
 #define ANOMALY_05000122 (1)
-/* Spurious Hardware Error from an Access in the Shadow of a Conditional Branch */
+/* False Hardware Error from an Access in the Shadow of a Conditional Branch */
 #define ANOMALY_05000245 (1)
+/* Incorrect Timer Pulse Width in Single-Shot PWM_OUT Mode with External Clock */
+#define ANOMALY_05000254 (1)
 /* Sensitivity To Noise with Slow Input Edge Rates on External SPORT TX and RX Clocks */
 #define ANOMALY_05000265 (1)
 /* False Hardware Errors Caused by Fetches at the Boundary of Reserved Memory */
 #define ANOMALY_05000310 (1)
 /* PPI Is Level-Sensitive on First Transfer In Single Frame Sync Modes */
-#define ANOMALY_05000313 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000313 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Incorrect Access of OTP_STATUS During otp_write() Function */
-#define ANOMALY_05000328 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000328 (_ANOMALY_BF527(< 2))
+/* Host DMA Boot Modes Are Not Functional */
+#define ANOMALY_05000330 (__SILICON_REVISION__ < 2)
 /* Disallowed Configuration Prevents Subsequent Allowed Configuration on Host DMA Port */
-#define ANOMALY_05000337 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000337 (_ANOMALY_BF527(< 2))
 /* Ethernet MAC MDIO Reads Do Not Meet IEEE Specification */
-#define ANOMALY_05000341 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000341 (_ANOMALY_BF527(< 2))
 /* TWI May Not Operate Correctly Under Certain Signal Termination Conditions */
-#define ANOMALY_05000342 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000342 (_ANOMALY_BF527(< 2))
 /* USB Calibration Value Is Not Initialized */
-#define ANOMALY_05000346 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000346 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* USB Calibration Value to use */
 #define ANOMALY_05000346_value 0xE510
 /* Preboot Routine Incorrectly Alters Reset Value of USB Register */
-#define ANOMALY_05000347 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000347 (_ANOMALY_BF527(< 2))
 /* Security Features Are Not Functional */
-#define ANOMALY_05000348 (ANOMALY_BF527 && __SILICON_REVISION__ < 1)
+#define ANOMALY_05000348 (_ANOMALY_BF527(< 1))
 /* bfrom_SysControl() Firmware Function Performs Improper System Reset */
-#define ANOMALY_05000353 (ANOMALY_BF526)
+#define ANOMALY_05000353 (_ANOMALY_BF526(< 1))
 /* Regulator Programming Blocked when Hibernate Wakeup Source Remains Active */
-#define ANOMALY_05000355 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000355 (_ANOMALY_BF527(< 2))
 /* Serial Port (SPORT) Multichannel Transmit Failure when Channel 0 Is Disabled */
-#define ANOMALY_05000357 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000357 (_ANOMALY_BF527(< 2))
 /* Incorrect Revision Number in DSPID Register */
-#define ANOMALY_05000364 (ANOMALY_BF527 && __SILICON_REVISION__ == 1)
+#define ANOMALY_05000364 (_ANOMALY_BF527(== 1))
 /* PPI Underflow Error Goes Undetected in ITU-R 656 Mode */
 #define ANOMALY_05000366 (1)
 /* Incorrect Default CSEL Value in PLL_DIV */
-#define ANOMALY_05000368 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000368 (_ANOMALY_BF527(< 2))
 /* Possible RETS Register Corruption when Subroutine Is under 5 Cycles in Duration */
-#define ANOMALY_05000371 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000371 (_ANOMALY_BF527(< 2))
 /* Authentication Fails To Initiate */
-#define ANOMALY_05000376 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000376 (_ANOMALY_BF527(< 2))
 /* Data Read From L3 Memory by USB DMA May be Corrupted */
-#define ANOMALY_05000380 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000380 (_ANOMALY_BF527(< 2))
 /* 8-Bit NAND Flash Boot Mode Not Functional */
-#define ANOMALY_05000382 (__SILICON_REVISION__ < 2)
-/* Host Must Not Read Back During Host DMA Boot */
-#define ANOMALY_05000384 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000382 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Boot from OTP Memory Not Functional */
-#define ANOMALY_05000385 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000385 (_ANOMALY_BF527(< 2))
 /* bfrom_SysControl() Firmware Routine Not Functional */
-#define ANOMALY_05000386 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000386 (_ANOMALY_BF527(< 2))
 /* Programmable Preboot Settings Not Functional */
-#define ANOMALY_05000387 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000387 (_ANOMALY_BF527(< 2))
 /* CRC32 Checksum Support Not Functional */
-#define ANOMALY_05000388 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000388 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Reset Vector Must Not Be in SDRAM Memory Space */
-#define ANOMALY_05000389 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000389 (_ANOMALY_BF527(< 2))
 /* pTempCurrent Not Present in ADI_BOOT_DATA Structure */
-#define ANOMALY_05000392 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000392 (_ANOMALY_BF527(< 2))
 /* Deprecated Value of dTempByteCount in ADI_BOOT_DATA Structure */
-#define ANOMALY_05000393 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000393 (_ANOMALY_BF527(< 2))
 /* Log Buffer Not Functional */
-#define ANOMALY_05000394 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000394 (_ANOMALY_BF527(< 2))
 /* Hook Routine Not Functional */
-#define ANOMALY_05000395 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000395 (_ANOMALY_BF527(< 2))
 /* Header Indirect Bit Not Functional */
-#define ANOMALY_05000396 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000396 (_ANOMALY_BF527(< 2))
 /* BK_ONES, BK_ZEROS, and BK_DATECODE Constants Not Functional */
-#define ANOMALY_05000397 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000397 (_ANOMALY_BF527(< 2))
 /* SWRESET, DFRESET and WDRESET Bits in the SYSCR Register Not Functional */
-#define ANOMALY_05000398 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000398 (_ANOMALY_BF527(< 2))
 /* BCODE_NOBOOT in BCODE Field of SYSCR Register Not Functional */
-#define ANOMALY_05000399 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000399 (_ANOMALY_BF527(< 2))
 /* PPI Data Signals D0 and D8 do not Tristate After Disabling PPI */
-#define ANOMALY_05000401 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000401 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Level-Sensitive External GPIO Wakeups May Cause Indefinite Stall */
-#define ANOMALY_05000403 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000403 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Lockbox SESR Disallows Certain User Interrupts */
-#define ANOMALY_05000404 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000404 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Lockbox SESR Firmware Does Not Save/Restore Full Context */
 #define ANOMALY_05000405 (1)
 /* Lockbox SESR Firmware Arguments Are Not Retained After First Initialization */
-#define ANOMALY_05000407 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000407 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Lockbox Firmware Memory Cleanup Routine Does not Clear Registers */
 #define ANOMALY_05000408 (1)
 /* Lockbox firmware leaves MDMA0 channel enabled */
-#define ANOMALY_05000409 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000409 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Incorrect Default Internal Voltage Regulator Setting */
-#define ANOMALY_05000410 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000410 (_ANOMALY_BF527(< 2))
 /* bfrom_SysControl() Firmware Function Cannot be Used to Enter Power Saving Modes */
-#define ANOMALY_05000411 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000411 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* OTP_CHECK_FOR_PREV_WRITE Bit is Not Functional in bfrom_OtpWrite() API */
-#define ANOMALY_05000414 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000414 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* DEB2_URGENT Bit Not Functional */
-#define ANOMALY_05000415 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000415 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Speculative Fetches Can Cause Undesired External FIFO Operations */
 #define ANOMALY_05000416 (1)
 /* SPORT0 Ignores External TSCLK0 on PG14 When TMR6 is an Output */
-#define ANOMALY_05000417 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
-/* tSFSPE and tHFSPE Do Not Meet Data Sheet Specifications */
-#define ANOMALY_05000418 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000417 (_ANOMALY_BF527(< 2))
+/* PPI Timing Requirements tSFSPE and tHFSPE Do Not Meet Data Sheet Specifications */
+#define ANOMALY_05000418 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* USB PLL_STABLE Bit May Not Accurately Reflect the USB PLL's Status */
-#define ANOMALY_05000420 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000420 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* TWI Fall Time (Tof) May Violate the Minimum I2C Specification */
 #define ANOMALY_05000421 (1)
 /* TWI Input Capacitance (Ci) May Violate the Maximum I2C Specification */
-#define ANOMALY_05000422 (ANOMALY_BF527 && __SILICON_REVISION__ > 1)
+#define ANOMALY_05000422 (_ANOMALY_BF526_BF527(> 0, > 1))
 /* Certain Ethernet Frames With Errors are Misclassified in RMII Mode */
-#define ANOMALY_05000423 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000423 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Internal Voltage Regulator Not Trimmed */
-#define ANOMALY_05000424 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000424 (_ANOMALY_BF527(< 2))
 /* Multichannel SPORT Channel Misalignment Under Specific Configuration */
-#define ANOMALY_05000425 (__SILICON_REVISION__ < 2)
-/* Speculative Fetches of Indirect-Pointer Instructions Can Cause Spurious Hardware Errors */
+#define ANOMALY_05000425 (_ANOMALY_BF526_BF527(< 1, < 2))
+/* Speculative Fetches of Indirect-Pointer Instructions Can Cause False Hardware Errors */
 #define ANOMALY_05000426 (1)
 /* WB_EDGE Bit in NFC_IRQSTAT Incorrectly Reflects Buffer Status Instead of IRQ Status */
-#define ANOMALY_05000429 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000429 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Software System Reset Corrupts PLL_LOCKCNT Register */
-#define ANOMALY_05000430 (ANOMALY_BF527 && __SILICON_REVISION__ > 1)
+#define ANOMALY_05000430 (_ANOMALY_BF527(> 1))
+/* Incorrect Use of Stack in Lockbox Firmware During Authentication */
+#define ANOMALY_05000431 (1)
 /* bfrom_SysControl() Does Not Clear SIC_IWR1 Before Executing PLL Programming Sequence */
-#define ANOMALY_05000432 (ANOMALY_BF526)
+#define ANOMALY_05000432 (_ANOMALY_BF526(< 1))
 /* Certain SIC Registers are not Reset After Soft or Core Double Fault Reset */
-#define ANOMALY_05000435 ((ANOMALY_BF526 && __SILICON_REVISION__ < 1) || ANOMALY_BF527)
+#define ANOMALY_05000435 (_ANOMALY_BF526_BF527(< 1, >= 0))
+/* Preboot Cannot be Used to Alter the PLL_DIV Register */
+#define ANOMALY_05000439 (_ANOMALY_BF526_BF527(< 1, >= 0))
+/* bfrom_SysControl() Cannot be Used to Write the PLL_DIV Register */
+#define ANOMALY_05000440 (_ANOMALY_BF526_BF527(< 1, >= 0))
+/* OTP Write Accesses Not Supported */
+#define ANOMALY_05000442 (_ANOMALY_BF527(< 1))
 /* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */
 #define ANOMALY_05000443 (1)
+/* The WURESET Bit in the SYSCR Register is not Functional */
+#define ANOMALY_05000445 (1)
+/* BCODE_QUICKBOOT, BCODE_ALLBOOT, and BCODE_FULLBOOT Settings in SYSCR Register Not Functional */
+#define ANOMALY_05000451 (1)
+/* Incorrect Default Hysteresis Setting for RESET, NMI, and BMODE Signals */
+#define ANOMALY_05000452 (_ANOMALY_BF526_BF527(< 1, >= 0))
+/* USB Receive Interrupt Is Not Generated in DMA Mode 1 */
+#define ANOMALY_05000456 (1)
+/* Host DMA Port Responds to Certain Bus Activity Without HOST_CE Assertion */
+#define ANOMALY_05000457 (1)
+/* False Hardware Error when RETI points to invalid memory */
+#define ANOMALY_05000461 (1)
 
 /* Anomalies that don't exist on this proc */
+#define ANOMALY_05000099 (0)
+#define ANOMALY_05000120 (0)
 #define ANOMALY_05000125 (0)
+#define ANOMALY_05000149 (0)
 #define ANOMALY_05000158 (0)
+#define ANOMALY_05000171 (0)
+#define ANOMALY_05000179 (0)
 #define ANOMALY_05000183 (0)
 #define ANOMALY_05000198 (0)
+#define ANOMALY_05000215 (0)
+#define ANOMALY_05000220 (0)
+#define ANOMALY_05000227 (0)
 #define ANOMALY_05000230 (0)
+#define ANOMALY_05000231 (0)
+#define ANOMALY_05000233 (0)
+#define ANOMALY_05000242 (0)
 #define ANOMALY_05000244 (0)
+#define ANOMALY_05000248 (0)
+#define ANOMALY_05000250 (0)
 #define ANOMALY_05000261 (0)
 #define ANOMALY_05000263 (0)
 #define ANOMALY_05000266 (0)
 #define ANOMALY_05000273 (0)
+#define ANOMALY_05000274 (0)
 #define ANOMALY_05000278 (0)
 #define ANOMALY_05000285 (0)
+#define ANOMALY_05000287 (0)
+#define ANOMALY_05000301 (0)
 #define ANOMALY_05000305 (0)
 #define ANOMALY_05000307 (0)
 #define ANOMALY_05000311 (0)
 #define ANOMALY_05000312 (0)
 #define ANOMALY_05000323 (0)
+#define ANOMALY_05000362 (1)
 #define ANOMALY_05000363 (0)
+#define ANOMALY_05000400 (0)
 #define ANOMALY_05000412 (0)
 #define ANOMALY_05000447 (0)
 #define ANOMALY_05000448 (0)
+#define ANOMALY_05000450 (0)
 
 #endif
diff --git a/arch/blackfin/mach-bf533/Kconfig b/arch/blackfin/mach-bf533/Kconfig
index 14427de..4c57244 100644
--- a/arch/blackfin/mach-bf533/Kconfig
+++ b/arch/blackfin/mach-bf533/Kconfig
@@ -59,6 +59,7 @@
 	default 10
 config TIMER0
 	int "TIMER0"
+	default 7 if TICKSOURCE_GPTMR0
 	default 8
 config TIMER1
 	int "TIMER1"
diff --git a/arch/blackfin/mach-bf533/boards/H8606.c b/arch/blackfin/mach-bf533/boards/H8606.c
index 0c66bf4..895f213 100644
--- a/arch/blackfin/mach-bf533/boards/H8606.c
+++ b/arch/blackfin/mach-bf533/boards/H8606.c
@@ -173,7 +173,7 @@
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.ctl_reg = 0x1000,
@@ -216,7 +216,7 @@
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 4,     /* actual baudrate is SCLK/(2xspeed_hz) */
diff --git a/arch/blackfin/mach-bf533/boards/cm_bf533.c b/arch/blackfin/mach-bf533/boards/cm_bf533.c
index e897487..a727e53 100644
--- a/arch/blackfin/mach-bf533/boards/cm_bf533.c
+++ b/arch/blackfin/mach-bf533/boards/cm_bf533.c
@@ -82,7 +82,7 @@
 #endif
 
 /* SPI ADC chip */
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
 	.bits_per_word = 16,
@@ -117,7 +117,7 @@
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
diff --git a/arch/blackfin/mach-bf533/boards/ezkit.c b/arch/blackfin/mach-bf533/boards/ezkit.c
index 08cd096..842f1c9 100644
--- a/arch/blackfin/mach-bf533/boards/ezkit.c
+++ b/arch/blackfin/mach-bf533/boards/ezkit.c
@@ -118,7 +118,7 @@
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -154,7 +154,7 @@
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c
index db96f33..e19c565 100644
--- a/arch/blackfin/mach-bf533/boards/stamp.c
+++ b/arch/blackfin/mach-bf533/boards/stamp.c
@@ -192,7 +192,7 @@
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -237,7 +237,7 @@
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
@@ -448,7 +448,7 @@
 		.irq = 39,
 	},
 #endif
-#if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
+#if defined(CONFIG_BFIN_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
 	{
 		I2C_BOARD_INFO("pcf8574_lcd", 0x22),
 	},
diff --git a/arch/blackfin/mach-bf533/include/mach/anomaly.h b/arch/blackfin/mach-bf533/include/mach/anomaly.h
index 1cf893e..31145b5 100644
--- a/arch/blackfin/mach-bf533/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf533/include/mach/anomaly.h
@@ -6,7 +6,7 @@
  * Licensed under the GPL-2 or later.
  */
 
-/* This file shoule be up to date with:
+/* This file should be up to date with:
  *  - Revision E, 09/18/2008; ADSP-BF531/BF532/BF533 Blackfin Processor Anomaly List
  */
 
@@ -34,12 +34,12 @@
 # define ANOMALY_BF533 0
 #endif
 
-/* Multi-Issue Instruction with dsp32shiftimm in slot1 and P-reg Store in slot 2 Not Supported */
+/* Multi-issue instruction with dsp32shiftimm in slot1 and P-reg store in slot 2 not supported */
 #define ANOMALY_05000074 (1)
 /* UART Line Status Register (UART_LSR) Bits Are Not Updated at the Same Time */
 #define ANOMALY_05000099 (__SILICON_REVISION__ < 5)
 /* Watchpoint Status Register (WPSTAT) Bits Are Set on Every Corresponding Match */
-#define ANOMALY_05000105 (1)
+#define ANOMALY_05000105 (__SILICON_REVISION__ > 2)
 /* DMA_RUN Bit Is Not Valid after a Peripheral Receive Channel DMA Stops */
 #define ANOMALY_05000119 (1)
 /* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */
@@ -48,7 +48,7 @@
 #define ANOMALY_05000158 (__SILICON_REVISION__ < 5)
 /* PPI Data Lengths Between 8 and 16 Do Not Zero Out Upper Bits */
 #define ANOMALY_05000166 (1)
-/* Turning Serial Ports on with External Frame Syncs */
+/* Turning SPORTs on while External Frame Sync Is Active May Corrupt Data */
 #define ANOMALY_05000167 (1)
 /* PPI_COUNT Cannot Be Programmed to 0 in General Purpose TX or RX Modes */
 #define ANOMALY_05000179 (__SILICON_REVISION__ < 5)
@@ -67,9 +67,9 @@
 /* Current DMA Address Shows Wrong Value During Carry Fix */
 #define ANOMALY_05000199 (__SILICON_REVISION__ < 4)
 /* SPORT TFS and DT Are Incorrectly Driven During Inactive Channels in Certain Conditions */
-#define ANOMALY_05000200 (__SILICON_REVISION__ < 5)
+#define ANOMALY_05000200 (__SILICON_REVISION__ == 3 || __SILICON_REVISION__ == 4)
 /* Receive Frame Sync Not Ignored During Active Frames in SPORT Multi-Channel Mode */
-#define ANOMALY_05000201 (__SILICON_REVISION__ < 4)
+#define ANOMALY_05000201 (__SILICON_REVISION__ == 3)
 /* Possible Infinite Stall with Specific Dual-DAG Situation */
 #define ANOMALY_05000202 (__SILICON_REVISION__ < 5)
 /* Specific Sequence That Can Cause DMA Error or DMA Stopping */
@@ -104,7 +104,7 @@
 #define ANOMALY_05000242 (__SILICON_REVISION__ < 5)
 /* If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control Causes Failures */
 #define ANOMALY_05000244 (__SILICON_REVISION__ < 5)
-/* Spurious Hardware Error from an Access in the Shadow of a Conditional Branch */
+/* False Hardware Error from an Access in the Shadow of a Conditional Branch */
 #define ANOMALY_05000245 (1)
 /* Data CPLBs Should Prevent Spurious Hardware Errors */
 #define ANOMALY_05000246 (__SILICON_REVISION__ < 5)
@@ -137,7 +137,7 @@
 /* High I/O Activity Causes Output Voltage of Internal Voltage Regulator (Vddint) to Decrease */
 #define ANOMALY_05000270 (__SILICON_REVISION__ < 5)
 /* Spontaneous Reset of Internal Voltage Regulator */
-#define ANOMALY_05000271 (__SILICON_REVISION__ < 4)
+#define ANOMALY_05000271 (__SILICON_REVISION__ == 3)
 /* Certain Data Cache Writethrough Modes Fail for Vddint <= 0.9V */
 #define ANOMALY_05000272 (1)
 /* Writes to Synchronous SDRAM Memory May Be Lost */
@@ -165,14 +165,14 @@
 /* New Feature: Additional PPI Frame Sync Sampling Options (Not Available On Older Silicon) */
 #define ANOMALY_05000306 (__SILICON_REVISION__ < 5)
 /* SCKELOW Bit Does Not Maintain State Through Hibernate */
-#define ANOMALY_05000307 (1)
+#define ANOMALY_05000307 (1)	/* note: brokenness is noted in documentation, not anomaly sheet */
 /* False Hardware Errors Caused by Fetches at the Boundary of Reserved Memory */
 #define ANOMALY_05000310 (1)
 /* Erroneous Flag (GPIO) Pin Operations under Specific Sequences */
 #define ANOMALY_05000311 (__SILICON_REVISION__ < 6)
 /* Errors When SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted */
 #define ANOMALY_05000312 (__SILICON_REVISION__ < 6)
-/* PPI Is Level-Sensitive on First Transfer */
+/* PPI Is Level-Sensitive on First Transfer In Single Frame Sync Modes */
 #define ANOMALY_05000313 (__SILICON_REVISION__ < 6)
 /* Killed System MMR Write Completes Erroneously On Next System MMR Access */
 #define ANOMALY_05000315 (__SILICON_REVISION__ < 6)
@@ -200,17 +200,63 @@
 #define ANOMALY_05000426 (1)
 /* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */
 #define ANOMALY_05000443 (1)
+/* False Hardware Error when RETI points to invalid memory */
+#define ANOMALY_05000461 (1)
 
 /* These anomalies have been "phased" out of analog.com anomaly sheets and are
  * here to show running on older silicon just isn't feasible.
  */
 
+/* Internal voltage regulator can't be modified via register writes */
+#define ANOMALY_05000066 (__SILICON_REVISION__ < 2)
 /* Watchpoints (Hardware Breakpoints) are not supported */
 #define ANOMALY_05000067 (__SILICON_REVISION__ < 3)
+/* SDRAM PSSE bit cannot be set again after SDRAM Powerup */
+#define ANOMALY_05000070 (__SILICON_REVISION__ < 2)
+/* Writing FIO_DIR can corrupt a programmable flag's data */
+#define ANOMALY_05000079 (__SILICON_REVISION__ < 2)
+/* Timer Auto-Baud Mode requires the UART clock to be enabled */
+#define ANOMALY_05000086 (__SILICON_REVISION__ < 2)
+/* Internal Clocking Modes on SPORT0 not supported */
+#define ANOMALY_05000088 (__SILICON_REVISION__ < 2)
+/* Internal voltage regulator does not wake up from an RTC wakeup */
+#define ANOMALY_05000092 (__SILICON_REVISION__ < 2)
+/* The IFLUSH instruction must be preceded by a CSYNC instruction */
+#define ANOMALY_05000093 (__SILICON_REVISION__ < 2)
+/* Vectoring to an instruction that is presently being filled into the instruction cache may cause erroneous behavior */
+#define ANOMALY_05000095 (__SILICON_REVISION__ < 2)
+/* PREFETCH, FLUSH, and FLUSHINV must be followed by a CSYNC */
+#define ANOMALY_05000096 (__SILICON_REVISION__ < 2)
+/* Performance Monitor 0 and 1 are swapped when monitoring memory events */
+#define ANOMALY_05000097 (__SILICON_REVISION__ < 2)
+/* 32-bit SPORT DMA will be word reversed */
+#define ANOMALY_05000098 (__SILICON_REVISION__ < 2)
+/* Incorrect status in the UART_IIR register */
+#define ANOMALY_05000100 (__SILICON_REVISION__ < 2)
+/* Reading X_MODIFY or Y_MODIFY while DMA channel is active */
+#define ANOMALY_05000101 (__SILICON_REVISION__ < 2)
+/* Descriptor-based MemDMA may lock up with 32-bit transfers or if transfers span 64KB buffers */
+#define ANOMALY_05000102 (__SILICON_REVISION__ < 2)
+/* Incorrect value written to the cycle counters */
+#define ANOMALY_05000103 (__SILICON_REVISION__ < 2)
+/* Stores to L1 Data memory incorrect when a specific sequence is followed */
+#define ANOMALY_05000104 (__SILICON_REVISION__ < 2)
+/* Programmable Flag (PF3) functionality not supported in all PPI modes */
+#define ANOMALY_05000106 (__SILICON_REVISION__ < 2)
+/* Data store can be lost when targeting a cache line fill */
+#define ANOMALY_05000107 (__SILICON_REVISION__ < 2)
 /* Reserved bits in SYSCFG register not set at power on */
 #define ANOMALY_05000109 (__SILICON_REVISION__ < 3)
+/* Infinite Core Stall */
+#define ANOMALY_05000114 (__SILICON_REVISION__ < 2)
+/* PPI_FSx may glitch when generated by the on chip Timers */
+#define ANOMALY_05000115 (__SILICON_REVISION__ < 2)
 /* Trace Buffers may record discontinuities into emulation mode and/or exception, NMI, reset handlers */
 #define ANOMALY_05000116 (__SILICON_REVISION__ < 3)
+/* DTEST registers allow access to Data Cache when DTEST_COMMAND< 14 >= 0 */
+#define ANOMALY_05000117 (__SILICON_REVISION__ < 2)
+/* Booting from an 8-bit or 24-bit Addressable SPI device is not supported */
+#define ANOMALY_05000118 (__SILICON_REVISION__ < 2)
 /* DTEST_COMMAND initiated memory access may be incorrect if data cache or DMA is active */
 #define ANOMALY_05000123 (__SILICON_REVISION__ < 3)
 /* DMA Lock-up at CCLK to SCLK ratios of 4:1, 2:1, or 1:1 */
@@ -222,7 +268,9 @@
 /* DMEM_CONTROL is not set on Reset */
 #define ANOMALY_05000137 (__SILICON_REVISION__ < 3)
 /* SPI boot will not complete if there is a zero fill block in the loader file */
-#define ANOMALY_05000138 (__SILICON_REVISION__ < 3)
+#define ANOMALY_05000138 (__SILICON_REVISION__ == 2)
+/* Timerx_Config must be set for using the PPI in GP output mode with internal Frame Syncs */
+#define ANOMALY_05000139 (__SILICON_REVISION__ < 2)
 /* Allowing the SPORT RX FIFO to fill will cause an overflow */
 #define ANOMALY_05000140 (__SILICON_REVISION__ < 3)
 /* An Infinite Stall occurs with a particular sequence of consecutive dual dag events */
@@ -237,17 +285,17 @@
 #define ANOMALY_05000145 (__SILICON_REVISION__ < 3)
 /* MDMA may lose the first few words of a descriptor chain */
 #define ANOMALY_05000146 (__SILICON_REVISION__ < 3)
-/* The source MDMA descriptor may stop with a DMA Error */
+/* Source MDMA descriptor may stop with a DMA Error near beginning of descriptor fetch */
 #define ANOMALY_05000147 (__SILICON_REVISION__ < 3)
 /* When booting from a 16-bit asynchronous memory device, the upper 8-bits of each word must be 0x00 */
 #define ANOMALY_05000148 (__SILICON_REVISION__ < 3)
 /* Frame Delay in SPORT Multichannel Mode */
 #define ANOMALY_05000153 (__SILICON_REVISION__ < 3)
-/* SPORT TFS signal is active in Multi-channel mode outside of valid channels */
+/* SPORT TFS signal stays active in multichannel mode outside of valid channels */
 #define ANOMALY_05000154 (__SILICON_REVISION__ < 3)
 /* Timer1 can not be used for PWMOUT mode when a certain PPI mode is in use */
 #define ANOMALY_05000155 (__SILICON_REVISION__ < 3)
-/* A killed 32-bit System MMR write will lead to the next system MMR access thinking it should be 32-bit. */
+/* Killed 32-bit MMR write leads to next system MMR access thinking it should be 32-bit */
 #define ANOMALY_05000157 (__SILICON_REVISION__ < 3)
 /* SPORT transmit data is not gated by external frame sync in certain conditions */
 #define ANOMALY_05000163 (__SILICON_REVISION__ < 3)
@@ -275,15 +323,27 @@
 #define ANOMALY_05000206 (__SILICON_REVISION__ < 3)
 
 /* Anomalies that don't exist on this proc */
+#define ANOMALY_05000120 (0)
+#define ANOMALY_05000149 (0)
+#define ANOMALY_05000171 (0)
+#define ANOMALY_05000220 (0)
+#define ANOMALY_05000248 (0)
 #define ANOMALY_05000266 (0)
+#define ANOMALY_05000274 (0)
+#define ANOMALY_05000287 (0)
 #define ANOMALY_05000323 (0)
 #define ANOMALY_05000353 (1)
+#define ANOMALY_05000362 (1)
 #define ANOMALY_05000380 (0)
 #define ANOMALY_05000386 (1)
+#define ANOMALY_05000389 (0)
 #define ANOMALY_05000412 (0)
+#define ANOMALY_05000430 (0)
 #define ANOMALY_05000432 (0)
 #define ANOMALY_05000435 (0)
 #define ANOMALY_05000447 (0)
 #define ANOMALY_05000448 (0)
+#define ANOMALY_05000456 (0)
+#define ANOMALY_05000450 (0)
 
 #endif
diff --git a/arch/blackfin/mach-bf537/Kconfig b/arch/blackfin/mach-bf537/Kconfig
index bbc08fd..d81224f 100644
--- a/arch/blackfin/mach-bf537/Kconfig
+++ b/arch/blackfin/mach-bf537/Kconfig
@@ -66,6 +66,7 @@
 	default 11
 config IRQ_TIMER0
 	int "IRQ_TIMER0"
+	default 7 if TICKSOURCE_GPTMR0
 	default 8
 config IRQ_TIMER1
 	int "IRQ_TIMER1"
diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537.c b/arch/blackfin/mach-bf537/boards/cm_bf537.c
index 41c75b9..4fee196 100644
--- a/arch/blackfin/mach-bf537/boards/cm_bf537.c
+++ b/arch/blackfin/mach-bf537/boards/cm_bf537.c
@@ -86,7 +86,7 @@
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -129,7 +129,7 @@
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
diff --git a/arch/blackfin/mach-bf537/boards/pnav10.c b/arch/blackfin/mach-bf537/boards/pnav10.c
index 4e1de1e..26707ce 100644
--- a/arch/blackfin/mach-bf537/boards/pnav10.c
+++ b/arch/blackfin/mach-bf537/boards/pnav10.c
@@ -265,8 +265,8 @@
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -333,8 +333,8 @@
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c
index 0572926..dfb5036 100644
--- a/arch/blackfin/mach-bf537/boards/stamp.c
+++ b/arch/blackfin/mach-bf537/boards/stamp.c
@@ -508,8 +508,8 @@
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -607,6 +607,43 @@
 };
 #endif
 
+#if defined(CONFIG_INPUT_ADXL34X) || defined(CONFIG_INPUT_ADXL34X_MODULE)
+#include <linux/input.h>
+#include <linux/spi/adxl34x.h>
+static const struct adxl34x_platform_data adxl34x_info = {
+	.x_axis_offset = 0,
+	.y_axis_offset = 0,
+	.z_axis_offset = 0,
+	.tap_threshold = 0x31,
+	.tap_duration = 0x10,
+	.tap_latency = 0x60,
+	.tap_window = 0xF0,
+	.tap_axis_control = ADXL_TAP_X_EN | ADXL_TAP_Y_EN | ADXL_TAP_Z_EN,
+	.act_axis_control = 0xFF,
+	.activity_threshold = 5,
+	.inactivity_threshold = 3,
+	.inactivity_time = 4,
+	.free_fall_threshold = 0x7,
+	.free_fall_time = 0x20,
+	.data_rate = 0x8,
+	.data_range = ADXL_FULL_RES,
+
+	.ev_type = EV_ABS,
+	.ev_code_x = ABS_X,		/* EV_REL */
+	.ev_code_y = ABS_Y,		/* EV_REL */
+	.ev_code_z = ABS_Z,		/* EV_REL */
+
+	.ev_code_tap_x = BTN_TOUCH,		/* EV_KEY */
+	.ev_code_tap_y = BTN_TOUCH,		/* EV_KEY */
+	.ev_code_tap_z = BTN_TOUCH,		/* EV_KEY */
+
+/*	.ev_code_ff = KEY_F,*/		/* EV_KEY */
+/*	.ev_code_act_inactivity = KEY_A,*/	/* EV_KEY */
+	.power_mode = ADXL_AUTO_SLEEP | ADXL_LINK,
+	.fifo_mode = ADXL_FIFO_STREAM,
+};
+#endif
+
 #if defined(CONFIG_TOUCHSCREEN_AD7879_SPI) || defined(CONFIG_TOUCHSCREEN_AD7879_SPI_MODULE)
 static struct bfin5xx_spi_chip spi_ad7879_chip_info = {
 	.enable_dma = 0,
@@ -695,8 +732,8 @@
 		.mode = SPI_MODE_3,
 	},
 #endif
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
@@ -1280,7 +1317,7 @@
 		.irq = IRQ_PF5,
 	},
 #endif
-#if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
+#if defined(CONFIG_BFIN_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
 	{
 		I2C_BOARD_INFO("pcf8574_lcd", 0x22),
 	},
@@ -1312,6 +1349,13 @@
 		.platform_data = (void *)&adp5520_pdev_data,
 	},
 #endif
+#if defined(CONFIG_INPUT_ADXL34X_I2C) || defined(CONFIG_INPUT_ADXL34X_I2C_MODULE)
+	{
+		I2C_BOARD_INFO("adxl34x", 0x53),
+		.irq = IRQ_PG3,
+		.platform_data = (void *)&adxl34x_info,
+	},
+#endif
 };
 
 #if defined(CONFIG_SERIAL_BFIN_SPORT) || defined(CONFIG_SERIAL_BFIN_SPORT_MODULE)
@@ -1358,16 +1402,18 @@
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 0,
 };
-
+/* CompactFlash Storage Card Memory Mapped Adressing
+ * /REG = A11 = 1
+ */
 static struct resource bfin_pata_resources[] = {
 	{
-		.start = 0x20211820,
-		.end = 0x2021183F,
+		.start = 0x20211800,
+		.end = 0x20211807,
 		.flags = IORESOURCE_MEM,
 	},
 	{
-		.start = 0x2021181C,
-		.end = 0x2021181F,
+		.start = 0x2021180E,	/* Device Ctl */
+		.end = 0x2021180E,
 		.flags = IORESOURCE_MEM,
 	},
 };
@@ -1527,7 +1573,8 @@
 	platform_add_devices(stamp_devices, ARRAY_SIZE(stamp_devices));
 	spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info));
 
-#if defined(CONFIG_PATA_PLATFORM) || defined(CONFIG_PATA_PLATFORM_MODULE)
+#if (defined(CONFIG_PATA_PLATFORM) || defined(CONFIG_PATA_PLATFORM_MODULE)) \
+	 && defined(PATA_INT)
 	irq_desc[PATA_INT].status |= IRQ_NOAUTOEN;
 #endif
 
diff --git a/arch/blackfin/mach-bf537/boards/tcm_bf537.c b/arch/blackfin/mach-bf537/boards/tcm_bf537.c
index 53ad10f..2805745 100644
--- a/arch/blackfin/mach-bf537/boards/tcm_bf537.c
+++ b/arch/blackfin/mach-bf537/boards/tcm_bf537.c
@@ -86,7 +86,7 @@
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -129,7 +129,7 @@
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
diff --git a/arch/blackfin/mach-bf537/include/mach/anomaly.h b/arch/blackfin/mach-bf537/include/mach/anomaly.h
index 1bfd80c..fc96634 100644
--- a/arch/blackfin/mach-bf537/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf537/include/mach/anomaly.h
@@ -6,7 +6,7 @@
  * Licensed under the GPL-2 or later.
  */
 
-/* This file shoule be up to date with:
+/* This file should be up to date with:
  *  - Revision D, 09/18/2008; ADSP-BF534/ADSP-BF536/ADSP-BF537 Blackfin Processor Anomaly List
  */
 
@@ -36,77 +36,75 @@
 
 /* Multi-issue instruction with dsp32shiftimm in slot1 and P-reg store in slot 2 not supported */
 #define ANOMALY_05000074 (1)
-/* DMA_RUN bit is not valid after a Peripheral Receive Channel DMA stops */
+/* DMA_RUN Bit Is Not Valid after a Peripheral Receive Channel DMA Stops */
 #define ANOMALY_05000119 (1)
-/* Rx.H cannot be used to access 16-bit System MMR registers */
+/* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */
 #define ANOMALY_05000122 (1)
 /* Killed 32-bit MMR write leads to next system MMR access thinking it should be 32-bit */
 #define ANOMALY_05000157 (__SILICON_REVISION__ < 2)
-/* Turning SPORTs on while External Frame Sync Is Active May Corrupt Data */
-#define ANOMALY_05000167 (1)
-/* PPI_DELAY not functional in PPI modes with 0 frame syncs */
+/* PPI_DELAY Not Functional in PPI Modes with 0 Frame Syncs */
 #define ANOMALY_05000180 (1)
 /* Instruction Cache Is Not Functional */
 #define ANOMALY_05000237 (__SILICON_REVISION__ < 2)
-/* If i-cache is on, CSYNC/SSYNC/IDLE around Change of Control causes failures */
+/* If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control Causes Failures */
 #define ANOMALY_05000244 (__SILICON_REVISION__ < 3)
-/* Spurious Hardware Error from an access in the shadow of a conditional branch */
+/* False Hardware Error from an Access in the Shadow of a Conditional Branch */
 #define ANOMALY_05000245 (1)
 /* CLKIN Buffer Output Enable Reset Behavior Is Changed */
 #define ANOMALY_05000247 (1)
-/* Incorrect Bit-Shift of Data Word in Multichannel (TDM) mode in certain conditions */
+/* Incorrect Bit Shift of Data Word in Multichannel (TDM) Mode in Certain Conditions */
 #define ANOMALY_05000250 (__SILICON_REVISION__ < 3)
 /* EMAC Tx DMA error after an early frame abort */
 #define ANOMALY_05000252 (__SILICON_REVISION__ < 3)
-/* Maximum external clock speed for Timers */
+/* Maximum External Clock Speed for Timers */
 #define ANOMALY_05000253 (__SILICON_REVISION__ < 3)
-/* Incorrect Timer Pulse Width in Single-Shot PWM_OUT mode with external clock */
+/* Incorrect Timer Pulse Width in Single-Shot PWM_OUT Mode with External Clock */
 #define ANOMALY_05000254 (__SILICON_REVISION__ > 2)
-/* Entering Hibernate Mode with RTC Seconds event interrupt not functional */
+/* Entering Hibernate State with RTC Seconds Interrupt Not Functional */
 #define ANOMALY_05000255 (__SILICON_REVISION__ < 3)
 /* EMAC MDIO input latched on wrong MDC edge */
 #define ANOMALY_05000256 (__SILICON_REVISION__ < 3)
-/* Interrupt/Exception during short hardware loop may cause bad instruction fetches */
+/* Interrupt/Exception During Short Hardware Loop May Cause Bad Instruction Fetches */
 #define ANOMALY_05000257 (__SILICON_REVISION__ < 3)
-/* Instruction Cache is corrupted when bits 9 and 12 of the ICPLB Data registers differ */
+/* Instruction Cache Is Corrupted When Bits 9 and 12 of the ICPLB Data Registers Differ */
 #define ANOMALY_05000258 (((ANOMALY_BF536 || ANOMALY_BF537) && __SILICON_REVISION__ == 1) || __SILICON_REVISION__ == 2)
-/* ICPLB_STATUS MMR register may be corrupted */
+/* ICPLB_STATUS MMR Register May Be Corrupted */
 #define ANOMALY_05000260 (__SILICON_REVISION__ == 2)
-/* DCPLB_FAULT_ADDR MMR register may be corrupted */
+/* DCPLB_FAULT_ADDR MMR Register May Be Corrupted */
 #define ANOMALY_05000261 (__SILICON_REVISION__ < 3)
-/* Stores to data cache may be lost */
+/* Stores To Data Cache May Be Lost */
 #define ANOMALY_05000262 (__SILICON_REVISION__ < 3)
-/* Hardware loop corrupted when taking an ICPLB exception */
+/* Hardware Loop Corrupted When Taking an ICPLB Exception */
 #define ANOMALY_05000263 (__SILICON_REVISION__ == 2)
-/* CSYNC/SSYNC/IDLE causes infinite stall in second to last instruction in hardware loop */
+/* CSYNC/SSYNC/IDLE Causes Infinite Stall in Penultimate Instruction in Hardware Loop */
 #define ANOMALY_05000264 (__SILICON_REVISION__ < 3)
-/* Sensitivity to noise with slow input edge rates on external SPORT TX and RX clocks */
+/* Sensitivity To Noise with Slow Input Edge Rates on External SPORT TX and RX Clocks */
 #define ANOMALY_05000265 (1)
 /* Memory DMA error when peripheral DMA is running with non-zero DEB_TRAFFIC_PERIOD */
 #define ANOMALY_05000268 (__SILICON_REVISION__ < 3)
-/* High I/O activity causes output voltage of internal voltage regulator (VDDint) to decrease */
+/* High I/O Activity Causes Output Voltage of Internal Voltage Regulator (Vddint) to Decrease */
 #define ANOMALY_05000270 (__SILICON_REVISION__ < 3)
-/* Certain data cache write through modes fail for VDDint <=0.9V */
+/* Certain Data Cache Writethrough Modes Fail for Vddint <= 0.9V */
 #define ANOMALY_05000272 (1)
-/* Writes to Synchronous SDRAM memory may be lost */
+/* Writes to Synchronous SDRAM Memory May Be Lost */
 #define ANOMALY_05000273 (__SILICON_REVISION__ < 3)
-/* Writes to an I/O data register one SCLK cycle after an edge is detected may clear interrupt */
+/* Writes to an I/O Data Register One SCLK Cycle after an Edge Is Detected May Clear Interrupt */
 #define ANOMALY_05000277 (__SILICON_REVISION__ < 3)
-/* Disabling Peripherals with DMA running may cause DMA system instability */
+/* Disabling Peripherals with DMA Running May Cause DMA System Instability */
 #define ANOMALY_05000278 (((ANOMALY_BF536 || ANOMALY_BF537) && __SILICON_REVISION__ < 3) || (ANOMALY_BF534 && __SILICON_REVISION__ < 2))
 /* SPI Master boot mode does not work well with Atmel Data flash devices */
 #define ANOMALY_05000280 (1)
-/* False Hardware Error Exception when ISR context is not restored */
+/* False Hardware Error Exception When ISR Context Is Not Restored */
 #define ANOMALY_05000281 (__SILICON_REVISION__ < 3)
-/* Memory DMA corruption with 32-bit data and traffic control */
+/* Memory DMA Corruption with 32-Bit Data and Traffic Control */
 #define ANOMALY_05000282 (__SILICON_REVISION__ < 3)
 /* System MMR Write Is Stalled Indefinitely When Killed in a Particular Stage */
 #define ANOMALY_05000283 (__SILICON_REVISION__ < 3)
 /* New Feature: EMAC TX DMA Word Alignment (Not Available On Older Silicon) */
 #define ANOMALY_05000285 (__SILICON_REVISION__ < 3)
-/* SPORTs may receive bad data if FIFOs fill up */
+/* SPORTs May Receive Bad Data If FIFOs Fill Up */
 #define ANOMALY_05000288 (__SILICON_REVISION__ < 3)
-/* Memory to memory DMA source/destination descriptors must be in same memory space */
+/* Memory-To-Memory DMA Source/Destination Descriptors Must Be in Same Memory Space */
 #define ANOMALY_05000301 (1)
 /* SSYNCs After Writes To CAN/DMA MMR Registers Are Not Always Handled Correctly */
 #define ANOMALY_05000304 (__SILICON_REVISION__ < 3)
@@ -116,11 +114,11 @@
 #define ANOMALY_05000307 (__SILICON_REVISION__ < 3)
 /* Writing UART_THR while UART clock is disabled sends erroneous start bit */
 #define ANOMALY_05000309 (__SILICON_REVISION__ < 3)
-/* False hardware errors caused by fetches at the boundary of reserved memory */
+/* False Hardware Errors Caused by Fetches at the Boundary of Reserved Memory */
 #define ANOMALY_05000310 (1)
-/* Errors when SSYNC, CSYNC, or loads to LT, LB and LC registers are interrupted */
+/* Errors When SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted */
 #define ANOMALY_05000312 (1)
-/* PPI is level sensitive on first transfer */
+/* PPI Is Level-Sensitive on First Transfer In Single Frame Sync Modes */
 #define ANOMALY_05000313 (1)
 /* Killed System MMR Write Completes Erroneously On Next System MMR Access */
 #define ANOMALY_05000315 (__SILICON_REVISION__ < 3)
@@ -156,24 +154,46 @@
 #define ANOMALY_05000426 (1)
 /* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */
 #define ANOMALY_05000443 (1)
+/* False Hardware Error when RETI points to invalid memory */
+#define ANOMALY_05000461 (1)
 
 /* Anomalies that don't exist on this proc */
+#define ANOMALY_05000099 (0)
+#define ANOMALY_05000120 (0)
 #define ANOMALY_05000125 (0)
+#define ANOMALY_05000149 (0)
 #define ANOMALY_05000158 (0)
+#define ANOMALY_05000171 (0)
+#define ANOMALY_05000179 (0)
 #define ANOMALY_05000183 (0)
 #define ANOMALY_05000198 (0)
+#define ANOMALY_05000215 (0)
+#define ANOMALY_05000220 (0)
+#define ANOMALY_05000227 (0)
 #define ANOMALY_05000230 (0)
+#define ANOMALY_05000231 (0)
+#define ANOMALY_05000233 (0)
+#define ANOMALY_05000242 (0)
+#define ANOMALY_05000248 (0)
 #define ANOMALY_05000266 (0)
+#define ANOMALY_05000274 (0)
+#define ANOMALY_05000287 (0)
 #define ANOMALY_05000311 (0)
 #define ANOMALY_05000323 (0)
 #define ANOMALY_05000353 (1)
+#define ANOMALY_05000362 (1)
 #define ANOMALY_05000363 (0)
 #define ANOMALY_05000380 (0)
 #define ANOMALY_05000386 (1)
+#define ANOMALY_05000389 (0)
+#define ANOMALY_05000400 (0)
 #define ANOMALY_05000412 (0)
+#define ANOMALY_05000430 (0)
 #define ANOMALY_05000432 (0)
 #define ANOMALY_05000435 (0)
 #define ANOMALY_05000447 (0)
 #define ANOMALY_05000448 (0)
+#define ANOMALY_05000456 (0)
+#define ANOMALY_05000450 (0)
 
 #endif
diff --git a/arch/blackfin/mach-bf538/Kconfig b/arch/blackfin/mach-bf538/Kconfig
index f068c35..2d280f5 100644
--- a/arch/blackfin/mach-bf538/Kconfig
+++ b/arch/blackfin/mach-bf538/Kconfig
@@ -57,6 +57,7 @@
 	default 10
 config IRQ_TIMER0
 	int "IRQ_TIMER0"
+	default 7 if TICKSOURCE_GPTMR0
 	default 8
 config IRQ_TIMER1
 	int "IRQ_TIMER1"
diff --git a/arch/blackfin/mach-bf538/include/mach/anomaly.h b/arch/blackfin/mach-bf538/include/mach/anomaly.h
index 3a56998..175ca9e 100644
--- a/arch/blackfin/mach-bf538/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf538/include/mach/anomaly.h
@@ -6,7 +6,7 @@
  * Licensed under the GPL-2 or later.
  */
 
-/* This file shoule be up to date with:
+/* This file should be up to date with:
  *  - Revision G, 09/18/2008; ADSP-BF538/BF538F Blackfin Processor Anomaly List
  *  - Revision L, 09/18/2008; ADSP-BF539/BF539F Blackfin Processor Anomaly List
  */
@@ -14,17 +14,29 @@
 #ifndef _MACH_ANOMALY_H_
 #define _MACH_ANOMALY_H_
 
+/* We do not support old silicon - sorry */
 #if __SILICON_REVISION__ < 4
-# error will not work on BF538 silicon version 0.0, 0.1, 0.2, or 0.3
+# error will not work on BF538/BF539 silicon version 0.0, 0.1, 0.2, or 0.3
 #endif
 
-/* Multi-Issue Instruction with dsp32shiftimm in slot1 and P-reg Store in slot2 Not Supported */
+#if defined(__ADSPBF538__)
+# define ANOMALY_BF538 1
+#else
+# define ANOMALY_BF538 0
+#endif
+#if defined(__ADSPBF539__)
+# define ANOMALY_BF539 1
+#else
+# define ANOMALY_BF539 0
+#endif
+
+/* Multi-issue instruction with dsp32shiftimm in slot1 and P-reg store in slot 2 not supported */
 #define ANOMALY_05000074 (1)
 /* DMA_RUN Bit Is Not Valid after a Peripheral Receive Channel DMA Stops */
 #define ANOMALY_05000119 (1)
 /* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */
 #define ANOMALY_05000122 (1)
-/* PPI Data Lengths between 8 and 16 Do Not Zero Out Upper Bits */
+/* PPI Data Lengths Between 8 and 16 Do Not Zero Out Upper Bits */
 #define ANOMALY_05000166 (1)
 /* PPI_COUNT Cannot Be Programmed to 0 in General Purpose TX or RX Modes */
 #define ANOMALY_05000179 (1)
@@ -40,13 +52,13 @@
 #define ANOMALY_05000229 (1)
 /* PPI_FS3 Is Not Driven in 2 or 3 Internal Frame Sync Transmit Modes */
 #define ANOMALY_05000233 (1)
-/* If i-cache is on, CSYNC/SSYNC/IDLE around Change of Control causes failures */
+/* If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control Causes Failures */
 #define ANOMALY_05000244 (__SILICON_REVISION__ < 3)
-/* Spurious Hardware Error from an Access in the Shadow of a Conditional Branch */
+/* False Hardware Error from an Access in the Shadow of a Conditional Branch */
 #define ANOMALY_05000245 (1)
 /* Maximum External Clock Speed for Timers */
 #define ANOMALY_05000253 (1)
-/* DCPLB_FAULT_ADDR MMR register may be corrupted */
+/* DCPLB_FAULT_ADDR MMR Register May Be Corrupted */
 #define ANOMALY_05000261 (__SILICON_REVISION__ < 3)
 /* High I/O Activity Causes Output Voltage of Internal Voltage Regulator (Vddint) to Decrease */
 #define ANOMALY_05000270 (__SILICON_REVISION__ < 4)
@@ -58,11 +70,11 @@
 #define ANOMALY_05000277 (__SILICON_REVISION__ < 4)
 /* Disabling Peripherals with DMA Running May Cause DMA System Instability */
 #define ANOMALY_05000278 (__SILICON_REVISION__ < 4)
-/* False Hardware Error Exception when ISR Context Is Not Restored */
+/* False Hardware Error Exception When ISR Context Is Not Restored */
 #define ANOMALY_05000281 (__SILICON_REVISION__ < 4)
 /* Memory DMA Corruption with 32-Bit Data and Traffic Control */
 #define ANOMALY_05000282 (__SILICON_REVISION__ < 4)
-/* System MMR Write Is Stalled Indefinitely when Killed in a Particular Stage */
+/* System MMR Write Is Stalled Indefinitely When Killed in a Particular Stage */
 #define ANOMALY_05000283 (__SILICON_REVISION__ < 4)
 /* SPORTs May Receive Bad Data If FIFOs Fill Up */
 #define ANOMALY_05000288 (__SILICON_REVISION__ < 4)
@@ -80,14 +92,14 @@
 #define ANOMALY_05000307 (__SILICON_REVISION__ < 4)
 /* False Hardware Errors Caused by Fetches at the Boundary of Reserved Memory */
 #define ANOMALY_05000310 (1)
-/* Errors when SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted */
+/* Errors When SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted */
 #define ANOMALY_05000312 (__SILICON_REVISION__ < 5)
-/* PPI Is Level-Sensitive on First Transfer */
+/* PPI Is Level-Sensitive on First Transfer In Single Frame Sync Modes */
 #define ANOMALY_05000313 (__SILICON_REVISION__ < 4)
-/* Killed System MMR Write Completes Erroneously on Next System MMR Access */
+/* Killed System MMR Write Completes Erroneously On Next System MMR Access */
 #define ANOMALY_05000315 (__SILICON_REVISION__ < 4)
 /* PFx Glitch on Write to FIO_FLAG_D or FIO_FLAG_T */
-#define ANOMALY_05000318 (__SILICON_REVISION__ < 4)
+#define ANOMALY_05000318 (ANOMALY_BF539 && __SILICON_REVISION__ < 4)
 /* Regulator Programming Blocked when Hibernate Wakeup Source Remains Active */
 #define ANOMALY_05000355 (__SILICON_REVISION__ < 5)
 /* Serial Port (SPORT) Multichannel Transmit Failure when Channel 0 Is Disabled */
@@ -114,23 +126,45 @@
 #define ANOMALY_05000436 (__SILICON_REVISION__ > 3)
 /* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */
 #define ANOMALY_05000443 (1)
+/* False Hardware Error when RETI points to invalid memory */
+#define ANOMALY_05000461 (1)
 
 /* Anomalies that don't exist on this proc */
+#define ANOMALY_05000099 (0)
+#define ANOMALY_05000120 (0)
+#define ANOMALY_05000149 (0)
 #define ANOMALY_05000158 (0)
+#define ANOMALY_05000171 (0)
 #define ANOMALY_05000198 (0)
+#define ANOMALY_05000215 (0)
+#define ANOMALY_05000220 (0)
+#define ANOMALY_05000227 (0)
 #define ANOMALY_05000230 (0)
+#define ANOMALY_05000231 (0)
+#define ANOMALY_05000242 (0)
+#define ANOMALY_05000248 (0)
+#define ANOMALY_05000250 (0)
+#define ANOMALY_05000254 (0)
 #define ANOMALY_05000263 (0)
+#define ANOMALY_05000274 (0)
+#define ANOMALY_05000287 (0)
 #define ANOMALY_05000305 (0)
 #define ANOMALY_05000311 (0)
 #define ANOMALY_05000323 (0)
 #define ANOMALY_05000353 (1)
+#define ANOMALY_05000362 (1)
 #define ANOMALY_05000363 (0)
 #define ANOMALY_05000380 (0)
 #define ANOMALY_05000386 (1)
+#define ANOMALY_05000389 (0)
+#define ANOMALY_05000400 (0)
 #define ANOMALY_05000412 (0)
+#define ANOMALY_05000430 (0)
 #define ANOMALY_05000432 (0)
 #define ANOMALY_05000435 (0)
 #define ANOMALY_05000447 (0)
 #define ANOMALY_05000448 (0)
+#define ANOMALY_05000456 (0)
+#define ANOMALY_05000450 (0)
 
 #endif
diff --git a/arch/blackfin/mach-bf538/include/mach/blackfin.h b/arch/blackfin/mach-bf538/include/mach/blackfin.h
index ea25371a9..6f62835 100644
--- a/arch/blackfin/mach-bf538/include/mach/blackfin.h
+++ b/arch/blackfin/mach-bf538/include/mach/blackfin.h
@@ -68,25 +68,6 @@
 #define OFFSET_SCR              0x1C	/* SCR Scratch Register                 */
 #define OFFSET_GCTL             0x24	/* Global Control Register              */
 
-
-#define bfin_write_MDMA_D0_IRQ_STATUS	bfin_write_MDMA0_D0_IRQ_STATUS
-#define bfin_write_MDMA_D0_START_ADDR   bfin_write_MDMA0_D0_START_ADDR
-#define bfin_write_MDMA_S0_START_ADDR   bfin_write_MDMA0_S0_START_ADDR
-#define bfin_write_MDMA_D0_X_COUNT      bfin_write_MDMA0_D0_X_COUNT
-#define bfin_write_MDMA_S0_X_COUNT      bfin_write_MDMA0_S0_X_COUNT
-#define bfin_write_MDMA_D0_Y_COUNT      bfin_write_MDMA0_D0_Y_COUNT
-#define bfin_write_MDMA_S0_Y_COUNT      bfin_write_MDMA0_S0_Y_COUNT
-#define bfin_write_MDMA_D0_X_MODIFY     bfin_write_MDMA0_D0_X_MODIFY
-#define bfin_write_MDMA_S0_X_MODIFY     bfin_write_MDMA0_S0_X_MODIFY
-#define bfin_write_MDMA_D0_Y_MODIFY     bfin_write_MDMA0_D0_Y_MODIFY
-#define bfin_write_MDMA_S0_Y_MODIFY     bfin_write_MDMA0_S0_Y_MODIFY
-#define bfin_write_MDMA_S0_CONFIG       bfin_write_MDMA0_S0_CONFIG
-#define bfin_write_MDMA_D0_CONFIG       bfin_write_MDMA0_D0_CONFIG
-#define bfin_read_MDMA_S0_CONFIG        bfin_read_MDMA0_S0_CONFIG
-#define bfin_read_MDMA_D0_IRQ_STATUS    bfin_read_MDMA0_D0_IRQ_STATUS
-#define bfin_write_MDMA_S0_IRQ_STATUS   bfin_write_MDMA0_S0_IRQ_STATUS
-
-
 /* DPMC*/
 #define bfin_read_STOPCK_OFF() bfin_read_STOPCK()
 #define bfin_write_STOPCK_OFF(val) bfin_write_STOPCK(val)
diff --git a/arch/blackfin/mach-bf538/include/mach/cdefBF538.h b/arch/blackfin/mach-bf538/include/mach/cdefBF538.h
index 241725b..99ca3f4 100644
--- a/arch/blackfin/mach-bf538/include/mach/cdefBF538.h
+++ b/arch/blackfin/mach-bf538/include/mach/cdefBF538.h
@@ -67,14 +67,14 @@
 #define bfin_write_SIC_ISR0(val)       bfin_write32(SIC_ISR0, val)
 #define bfin_read_SIC_ISR1()           bfin_read32(SIC_ISR1)
 #define bfin_write_SIC_ISR1(val)       bfin_write32(SIC_ISR1, val)
-#define bfin_read_SIC_ISR(x)	       bfin_read32(SIC_ISR0 + x * (SIC_ISR1 - SIC_ISR0))
+#define bfin_read_SIC_ISR(x)           bfin_read32(SIC_ISR0 + x * (SIC_ISR1 - SIC_ISR0))
 #define bfin_write_SIC_ISR(x, val)     bfin_write32(SIC_ISR0 + x * (SIC_ISR1 - SIC_ISR0), val)
 #define bfin_read_SIC_IWR0()           bfin_read32(SIC_IWR0)
 #define bfin_write_SIC_IWR0(val)       bfin_write32(SIC_IWR0, val)
 #define bfin_read_SIC_IWR1()           bfin_read32(SIC_IWR1)
 #define bfin_write_SIC_IWR1(val)       bfin_write32(SIC_IWR1, val)
-#define bfin_read_SIC_IWR(x)	       bfin_read32(SIC_IWR0 + x * (SIC_IWR1 - SIC_IWR0))
-#define bfin_write_SIC_IWR(x, val)     bfin_write32((SIC_IWR0 + x * (SIC_IWR1 - SIC_IWR0), val)
+#define bfin_read_SIC_IWR(x)           bfin_read32(SIC_IWR0 + x * (SIC_IWR1 - SIC_IWR0))
+#define bfin_write_SIC_IWR(x, val)     bfin_write32(SIC_IWR0 + x * (SIC_IWR1 - SIC_IWR0), val)
 #define bfin_read_SIC_IAR0()           bfin_read32(SIC_IAR0)
 #define bfin_write_SIC_IAR0(val)       bfin_write32(SIC_IAR0, val)
 #define bfin_read_SIC_IAR1()           bfin_read32(SIC_IAR1)
@@ -1247,6 +1247,65 @@
 #define bfin_write_MDMA1_S1_CURR_X_COUNT(val) bfin_write16(MDMA1_S1_CURR_X_COUNT, val)
 #define bfin_read_MDMA1_S1_CURR_Y_COUNT() bfin_read16(MDMA1_S1_CURR_Y_COUNT)
 #define bfin_write_MDMA1_S1_CURR_Y_COUNT(val) bfin_write16(MDMA1_S1_CURR_Y_COUNT, val)
+
+#define bfin_read_MDMA_S0_CONFIG()  bfin_read_MDMA0_S0_CONFIG()
+#define bfin_write_MDMA_S0_CONFIG(val) bfin_write_MDMA0_S0_CONFIG(val)
+#define bfin_read_MDMA_S0_IRQ_STATUS()  bfin_read_MDMA0_S0_IRQ_STATUS()
+#define bfin_write_MDMA_S0_IRQ_STATUS(val) bfin_write_MDMA0_S0_IRQ_STATUS(val)
+#define bfin_read_MDMA_S0_X_MODIFY()  bfin_read_MDMA0_S0_X_MODIFY()
+#define bfin_write_MDMA_S0_X_MODIFY(val) bfin_write_MDMA0_S0_X_MODIFY(val)
+#define bfin_read_MDMA_S0_Y_MODIFY()  bfin_read_MDMA0_S0_Y_MODIFY()
+#define bfin_write_MDMA_S0_Y_MODIFY(val) bfin_write_MDMA0_S0_Y_MODIFY(val)
+#define bfin_read_MDMA_S0_X_COUNT()  bfin_read_MDMA0_S0_X_COUNT()
+#define bfin_write_MDMA_S0_X_COUNT(val) bfin_write_MDMA0_S0_X_COUNT(val)
+#define bfin_read_MDMA_S0_Y_COUNT()  bfin_read_MDMA0_S0_Y_COUNT()
+#define bfin_write_MDMA_S0_Y_COUNT(val) bfin_write_MDMA0_S0_Y_COUNT(val)
+#define bfin_read_MDMA_S0_START_ADDR()  bfin_read_MDMA0_S0_START_ADDR()
+#define bfin_write_MDMA_S0_START_ADDR(val) bfin_write_MDMA0_S0_START_ADDR(val)
+#define bfin_read_MDMA_D0_CONFIG()  bfin_read_MDMA0_D0_CONFIG()
+#define bfin_write_MDMA_D0_CONFIG(val) bfin_write_MDMA0_D0_CONFIG(val)
+#define bfin_read_MDMA_D0_IRQ_STATUS()  bfin_read_MDMA0_D0_IRQ_STATUS()
+#define bfin_write_MDMA_D0_IRQ_STATUS(val) bfin_write_MDMA0_D0_IRQ_STATUS(val)
+#define bfin_read_MDMA_D0_X_MODIFY()  bfin_read_MDMA0_D0_X_MODIFY()
+#define bfin_write_MDMA_D0_X_MODIFY(val) bfin_write_MDMA0_D0_X_MODIFY(val)
+#define bfin_read_MDMA_D0_Y_MODIFY()  bfin_read_MDMA0_D0_Y_MODIFY()
+#define bfin_write_MDMA_D0_Y_MODIFY(val) bfin_write_MDMA0_D0_Y_MODIFY(val)
+#define bfin_read_MDMA_D0_X_COUNT()  bfin_read_MDMA0_D0_X_COUNT()
+#define bfin_write_MDMA_D0_X_COUNT(val) bfin_write_MDMA0_D0_X_COUNT(val)
+#define bfin_read_MDMA_D0_Y_COUNT()  bfin_read_MDMA0_D0_Y_COUNT()
+#define bfin_write_MDMA_D0_Y_COUNT(val) bfin_write_MDMA0_D0_Y_COUNT(val)
+#define bfin_read_MDMA_D0_START_ADDR()  bfin_read_MDMA0_D0_START_ADDR()
+#define bfin_write_MDMA_D0_START_ADDR(val) bfin_write_MDMA0_D0_START_ADDR(val)
+
+#define bfin_read_MDMA_S1_CONFIG()  bfin_read_MDMA0_S1_CONFIG()
+#define bfin_write_MDMA_S1_CONFIG(val) bfin_write_MDMA0_S1_CONFIG(val)
+#define bfin_read_MDMA_S1_IRQ_STATUS()  bfin_read_MDMA0_S1_IRQ_STATUS()
+#define bfin_write_MDMA_S1_IRQ_STATUS(val) bfin_write_MDMA0_S1_IRQ_STATUS(val)
+#define bfin_read_MDMA_S1_X_MODIFY()  bfin_read_MDMA0_S1_X_MODIFY()
+#define bfin_write_MDMA_S1_X_MODIFY(val) bfin_write_MDMA0_S1_X_MODIFY(val)
+#define bfin_read_MDMA_S1_Y_MODIFY()  bfin_read_MDMA0_S1_Y_MODIFY()
+#define bfin_write_MDMA_S1_Y_MODIFY(val) bfin_write_MDMA0_S1_Y_MODIFY(val)
+#define bfin_read_MDMA_S1_X_COUNT()  bfin_read_MDMA0_S1_X_COUNT()
+#define bfin_write_MDMA_S1_X_COUNT(val) bfin_write_MDMA0_S1_X_COUNT(val)
+#define bfin_read_MDMA_S1_Y_COUNT()  bfin_read_MDMA0_S1_Y_COUNT()
+#define bfin_write_MDMA_S1_Y_COUNT(val) bfin_write_MDMA0_S1_Y_COUNT(val)
+#define bfin_read_MDMA_S1_START_ADDR()  bfin_read_MDMA0_S1_START_ADDR()
+#define bfin_write_MDMA_S1_START_ADDR(val) bfin_write_MDMA0_S1_START_ADDR(val)
+#define bfin_read_MDMA_D1_CONFIG()  bfin_read_MDMA0_D1_CONFIG()
+#define bfin_write_MDMA_D1_CONFIG(val) bfin_write_MDMA0_D1_CONFIG(val)
+#define bfin_read_MDMA_D1_IRQ_STATUS()  bfin_read_MDMA0_D1_IRQ_STATUS()
+#define bfin_write_MDMA_D1_IRQ_STATUS(val) bfin_write_MDMA0_D1_IRQ_STATUS(val)
+#define bfin_read_MDMA_D1_X_MODIFY()  bfin_read_MDMA0_D1_X_MODIFY()
+#define bfin_write_MDMA_D1_X_MODIFY(val) bfin_write_MDMA0_D1_X_MODIFY(val)
+#define bfin_read_MDMA_D1_Y_MODIFY()  bfin_read_MDMA0_D1_Y_MODIFY()
+#define bfin_write_MDMA_D1_Y_MODIFY(val) bfin_write_MDMA0_D1_Y_MODIFY(val)
+#define bfin_read_MDMA_D1_X_COUNT()  bfin_read_MDMA0_D1_X_COUNT()
+#define bfin_write_MDMA_D1_X_COUNT(val) bfin_write_MDMA0_D1_X_COUNT(val)
+#define bfin_read_MDMA_D1_Y_COUNT()  bfin_read_MDMA0_D1_Y_COUNT()
+#define bfin_write_MDMA_D1_Y_COUNT(val) bfin_write_MDMA0_D1_Y_COUNT(val)
+#define bfin_read_MDMA_D1_START_ADDR()  bfin_read_MDMA0_D1_START_ADDR()
+#define bfin_write_MDMA_D1_START_ADDR(val) bfin_write_MDMA0_D1_START_ADDR(val)
+
 #define bfin_read_PPI_CONTROL()        bfin_read16(PPI_CONTROL)
 #define bfin_write_PPI_CONTROL(val)    bfin_write16(PPI_CONTROL, val)
 #define bfin_read_PPI_STATUS()         bfin_read16(PPI_STATUS)
diff --git a/arch/blackfin/mach-bf538/include/mach/defBF539.h b/arch/blackfin/mach-bf538/include/mach/defBF539.h
index 6adbfcc..bdc330c 100644
--- a/arch/blackfin/mach-bf538/include/mach/defBF539.h
+++ b/arch/blackfin/mach-bf538/include/mach/defBF539.h
@@ -412,6 +412,62 @@
 #define	MDMA0_S1_CURR_X_COUNT	0xFFC00EF0	/* MemDMA0 Stream 1 Source Current X Count Register */
 #define	MDMA0_S1_CURR_Y_COUNT	0xFFC00EF8	/* MemDMA0 Stream 1 Source Current Y Count Register */
 
+#define MDMA_D0_NEXT_DESC_PTR MDMA0_D0_NEXT_DESC_PTR
+#define MDMA_D0_START_ADDR MDMA0_D0_START_ADDR
+#define MDMA_D0_CONFIG MDMA0_D0_CONFIG
+#define MDMA_D0_X_COUNT MDMA0_D0_X_COUNT
+#define MDMA_D0_X_MODIFY MDMA0_D0_X_MODIFY
+#define MDMA_D0_Y_COUNT MDMA0_D0_Y_COUNT
+#define MDMA_D0_Y_MODIFY MDMA0_D0_Y_MODIFY
+#define MDMA_D0_CURR_DESC_PTR MDMA0_D0_CURR_DESC_PTR
+#define MDMA_D0_CURR_ADDR MDMA0_D0_CURR_ADDR
+#define MDMA_D0_IRQ_STATUS MDMA0_D0_IRQ_STATUS
+#define MDMA_D0_PERIPHERAL_MAP MDMA0_D0_PERIPHERAL_MAP
+#define MDMA_D0_CURR_X_COUNT MDMA0_D0_CURR_X_COUNT
+#define MDMA_D0_CURR_Y_COUNT MDMA0_D0_CURR_Y_COUNT
+
+#define MDMA_S0_NEXT_DESC_PTR MDMA0_S0_NEXT_DESC_PTR
+#define MDMA_S0_START_ADDR MDMA0_S0_START_ADDR
+#define MDMA_S0_CONFIG MDMA0_S0_CONFIG
+#define MDMA_S0_X_COUNT MDMA0_S0_X_COUNT
+#define MDMA_S0_X_MODIFY MDMA0_S0_X_MODIFY
+#define MDMA_S0_Y_COUNT MDMA0_S0_Y_COUNT
+#define MDMA_S0_Y_MODIFY MDMA0_S0_Y_MODIFY
+#define MDMA_S0_CURR_DESC_PTR MDMA0_S0_CURR_DESC_PTR
+#define MDMA_S0_CURR_ADDR MDMA0_S0_CURR_ADDR
+#define MDMA_S0_IRQ_STATUS MDMA0_S0_IRQ_STATUS
+#define MDMA_S0_PERIPHERAL_MAP MDMA0_S0_PERIPHERAL_MAP
+#define MDMA_S0_CURR_X_COUNT MDMA0_S0_CURR_X_COUNT
+#define MDMA_S0_CURR_Y_COUNT MDMA0_S0_CURR_Y_COUNT
+
+#define MDMA_D1_NEXT_DESC_PTR MDMA0_D1_NEXT_DESC_PTR
+#define MDMA_D1_START_ADDR MDMA0_D1_START_ADDR
+#define MDMA_D1_CONFIG MDMA0_D1_CONFIG
+#define MDMA_D1_X_COUNT MDMA0_D1_X_COUNT
+#define MDMA_D1_X_MODIFY MDMA0_D1_X_MODIFY
+#define MDMA_D1_Y_COUNT MDMA0_D1_Y_COUNT
+#define MDMA_D1_Y_MODIFY MDMA0_D1_Y_MODIFY
+#define MDMA_D1_CURR_DESC_PTR MDMA0_D1_CURR_DESC_PTR
+#define MDMA_D1_CURR_ADDR MDMA0_D1_CURR_ADDR
+#define MDMA_D1_IRQ_STATUS MDMA0_D1_IRQ_STATUS
+#define MDMA_D1_PERIPHERAL_MAP MDMA0_D1_PERIPHERAL_MAP
+#define MDMA_D1_CURR_X_COUNT MDMA0_D1_CURR_X_COUNT
+#define MDMA_D1_CURR_Y_COUNT MDMA0_D1_CURR_Y_COUNT
+
+#define MDMA_S1_NEXT_DESC_PTR MDMA0_S1_NEXT_DESC_PTR
+#define MDMA_S1_START_ADDR MDMA0_S1_START_ADDR
+#define MDMA_S1_CONFIG MDMA0_S1_CONFIG
+#define MDMA_S1_X_COUNT MDMA0_S1_X_COUNT
+#define MDMA_S1_X_MODIFY MDMA0_S1_X_MODIFY
+#define MDMA_S1_Y_COUNT MDMA0_S1_Y_COUNT
+#define MDMA_S1_Y_MODIFY MDMA0_S1_Y_MODIFY
+#define MDMA_S1_CURR_DESC_PTR MDMA0_S1_CURR_DESC_PTR
+#define MDMA_S1_CURR_ADDR MDMA0_S1_CURR_ADDR
+#define MDMA_S1_IRQ_STATUS MDMA0_S1_IRQ_STATUS
+#define MDMA_S1_PERIPHERAL_MAP MDMA0_S1_PERIPHERAL_MAP
+#define MDMA_S1_CURR_X_COUNT MDMA0_S1_CURR_X_COUNT
+#define MDMA_S1_CURR_Y_COUNT MDMA0_S1_CURR_Y_COUNT
+
 
 /* Parallel Peripheral Interface (PPI) (0xFFC01000 - 0xFFC010FF) */
 #define	PPI_CONTROL			0xFFC01000	/* PPI Control Register */
diff --git a/arch/blackfin/mach-bf548/Kconfig b/arch/blackfin/mach-bf548/Kconfig
index dcf6571..a09623d 100644
--- a/arch/blackfin/mach-bf548/Kconfig
+++ b/arch/blackfin/mach-bf548/Kconfig
@@ -11,6 +11,13 @@
 	help
 	  Treat any DEB1, DEB2 and DEB3 request as Urgent
 
+config BF548_ATAPI_ALTERNATIVE_PORT
+	bool "BF548 ATAPI alternative port via GPIO"
+	help
+	  BF548 ATAPI data and address PINs can be routed through
+	  async address or GPIO port F and G. Select y to route it
+	  to GPIO.
+
 comment "Interrupt Priority Assignment"
 menu "Priority"
 
@@ -250,6 +257,7 @@
 	default 11
 config IRQ_TIMER0
 	int "IRQ_TIMER0"
+	default 7 if TICKSOURCE_GPTMR0
 	default 8
 config IRQ_TIMER1
 	int "IRQ_TIMER1"
diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c
index 096e661..add5a17 100644
--- a/arch/blackfin/mach-bf548/boards/ezkit.c
+++ b/arch/blackfin/mach-bf548/boards/ezkit.c
@@ -208,6 +208,43 @@
 };
 #endif
 
+#if defined(CONFIG_INPUT_ADXL34X) || defined(CONFIG_INPUT_ADXL34X_MODULE)
+#include <linux/input.h>
+#include <linux/spi/adxl34x.h>
+static const struct adxl34x_platform_data adxl34x_info = {
+	.x_axis_offset = 0,
+	.y_axis_offset = 0,
+	.z_axis_offset = 0,
+	.tap_threshold = 0x31,
+	.tap_duration = 0x10,
+	.tap_latency = 0x60,
+	.tap_window = 0xF0,
+	.tap_axis_control = ADXL_TAP_X_EN | ADXL_TAP_Y_EN | ADXL_TAP_Z_EN,
+	.act_axis_control = 0xFF,
+	.activity_threshold = 5,
+	.inactivity_threshold = 3,
+	.inactivity_time = 4,
+	.free_fall_threshold = 0x7,
+	.free_fall_time = 0x20,
+	.data_rate = 0x8,
+	.data_range = ADXL_FULL_RES,
+
+	.ev_type = EV_ABS,
+	.ev_code_x = ABS_X,		/* EV_REL */
+	.ev_code_y = ABS_Y,		/* EV_REL */
+	.ev_code_z = ABS_Z,		/* EV_REL */
+
+	.ev_code_tap_x = BTN_TOUCH,		/* EV_KEY */
+	.ev_code_tap_y = BTN_TOUCH,		/* EV_KEY */
+	.ev_code_tap_z = BTN_TOUCH,		/* EV_KEY */
+
+/*	.ev_code_ff = KEY_F,*/		/* EV_KEY */
+/*	.ev_code_act_inactivity = KEY_A,*/	/* EV_KEY */
+	.power_mode = ADXL_AUTO_SLEEP | ADXL_LINK,
+	.fifo_mode = ADXL_FIFO_STREAM,
+};
+#endif
+
 #if defined(CONFIG_RTC_DRV_BFIN) || defined(CONFIG_RTC_DRV_BFIN_MODULE)
 static struct platform_device rtc_device = {
 	.name = "rtc-bfin",
@@ -628,6 +665,14 @@
 };
 #endif
 
+#if defined(CONFIG_INPUT_ADXL34X_SPI) || defined(CONFIG_INPUT_ADXL34X_SPI_MODULE)
+static struct bfin5xx_spi_chip spi_adxl34x_chip_info = {
+	.enable_dma = 0,         /* use dma transfer with this chip*/
+	.bits_per_word = 8,
+	.cs_change_per_word = 0,
+};
+#endif
+
 static struct spi_board_info bfin_spi_board_info[] __initdata = {
 #if defined(CONFIG_MTD_M25P80) \
 	|| defined(CONFIG_MTD_M25P80_MODULE)
@@ -653,15 +698,15 @@
 	},
 #endif
 #if defined(CONFIG_TOUCHSCREEN_AD7877) || defined(CONFIG_TOUCHSCREEN_AD7877_MODULE)
-{
-	.modalias		= "ad7877",
-	.platform_data		= &bfin_ad7877_ts_info,
-	.irq			= IRQ_PB4,	/* old boards (<=Rev 1.3) use IRQ_PJ11 */
-	.max_speed_hz		= 12500000,     /* max spi clock (SCK) speed in HZ */
-	.bus_num		= 0,
-	.chip_select  		= 2,
-	.controller_data = &spi_ad7877_chip_info,
-},
+	{
+		.modalias		= "ad7877",
+		.platform_data		= &bfin_ad7877_ts_info,
+		.irq			= IRQ_PB4,	/* old boards (<=Rev 1.3) use IRQ_PJ11 */
+		.max_speed_hz		= 12500000,     /* max spi clock (SCK) speed in HZ */
+		.bus_num		= 0,
+		.chip_select  		= 2,
+		.controller_data = &spi_ad7877_chip_info,
+	},
 #endif
 #if defined(CONFIG_SPI_SPIDEV) || defined(CONFIG_SPI_SPIDEV_MODULE)
 	{
@@ -672,8 +717,19 @@
 		.controller_data = &spidev_chip_info,
 	},
 #endif
+#if defined(CONFIG_INPUT_ADXL34X_SPI) || defined(CONFIG_INPUT_ADXL34X_SPI_MODULE)
+	{
+		.modalias		= "adxl34x",
+		.platform_data		= &adxl34x_info,
+		.irq			= IRQ_PC5,
+		.max_speed_hz		= 5000000,     /* max spi clock (SCK) speed in HZ */
+		.bus_num		= 1,
+		.chip_select  		= 2,
+		.controller_data = &spi_adxl34x_chip_info,
+		.mode = SPI_MODE_3,
+	},
+#endif
 };
-
 #if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
 /* SPI (0) */
 static struct resource bfin_spi0_resource[] = {
@@ -786,7 +842,7 @@
 
 #if !defined(CONFIG_BF542)	/* The BF542 only has 1 TWI */
 static struct i2c_board_info __initdata bfin_i2c_board_info1[] = {
-#if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
+#if defined(CONFIG_BFIN_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
 	{
 		I2C_BOARD_INFO("pcf8574_lcd", 0x22),
 	},
@@ -797,6 +853,13 @@
 		.irq = 212,
 	},
 #endif
+#if defined(CONFIG_INPUT_ADXL34X_I2C) || defined(CONFIG_INPUT_ADXL34X_I2C_MODULE)
+	{
+		I2C_BOARD_INFO("adxl34x", 0x53),
+		.irq = IRQ_PC5,
+		.platform_data = (void *)&adxl34x_info,
+	},
+#endif
 };
 #endif
 
diff --git a/arch/blackfin/mach-bf548/include/mach/anomaly.h b/arch/blackfin/mach-bf548/include/mach/anomaly.h
index 882e40c..c510ae6 100644
--- a/arch/blackfin/mach-bf548/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf548/include/mach/anomaly.h
@@ -6,26 +6,31 @@
  * Licensed under the GPL-2 or later.
  */
 
-/* This file shoule be up to date with:
+/* This file should be up to date with:
  *  - Revision H, 01/16/2009; ADSP-BF542/BF544/BF547/BF548/BF549 Blackfin Processor Anomaly List
  */
 
 #ifndef _MACH_ANOMALY_H_
 #define _MACH_ANOMALY_H_
 
-/* Multi-Issue Instruction with dsp32shiftimm in slot1 and P-reg Store in slot2 Not Supported */
+/* We do not support 0.0 or 0.1 silicon - sorry */
+#if __SILICON_REVISION__ < 2
+# error will not work on BF548 silicon version 0.0, or 0.1
+#endif
+
+/* Multi-issue instruction with dsp32shiftimm in slot1 and P-reg store in slot 2 not supported */
 #define ANOMALY_05000074 (1)
 /* DMA_RUN Bit Is Not Valid after a Peripheral Receive Channel DMA Stops */
 #define ANOMALY_05000119 (1)
 /* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */
 #define ANOMALY_05000122 (1)
-/* Spurious Hardware Error from an Access in the Shadow of a Conditional Branch */
+/* False Hardware Error from an Access in the Shadow of a Conditional Branch */
 #define ANOMALY_05000245 (1)
 /* Sensitivity To Noise with Slow Input Edge Rates on External SPORT TX and RX Clocks */
 #define ANOMALY_05000265 (1)
 /* Certain Data Cache Writethrough Modes Fail for Vddint <= 0.9V */
 #define ANOMALY_05000272 (1)
-/* False Hardware Error Exception when ISR context is not restored */
+/* False Hardware Error Exception When ISR Context Is Not Restored */
 #define ANOMALY_05000281 (__SILICON_REVISION__ < 1)
 /* SSYNCs After Writes To CAN/DMA MMR Registers Are Not Always Handled Correctly */
 #define ANOMALY_05000304 (__SILICON_REVISION__ < 1)
@@ -59,7 +64,7 @@
 #define ANOMALY_05000340 (__SILICON_REVISION__ < 1)
 /* Boot Host Wait (HWAIT) and Boot Host Wait Alternate (HWAITA) Signals Are Swapped */
 #define ANOMALY_05000344 (__SILICON_REVISION__ < 1)
-/* USB Calibration Value Is Not Intialized */
+/* USB Calibration Value Is Not Initialized */
 #define ANOMALY_05000346 (__SILICON_REVISION__ < 1)
 /* USB Calibration Value to use */
 #define ANOMALY_05000346_value 0x5411
@@ -147,11 +152,11 @@
 #define ANOMALY_05000416 (1)
 /* Multichannel SPORT Channel Misalignment Under Specific Configuration */
 #define ANOMALY_05000425 (1)
-/* Speculative Fetches of Indirect-Pointer Instructions Can Cause Spurious Hardware Errors */
+/* Speculative Fetches of Indirect-Pointer Instructions Can Cause False Hardware Errors */
 #define ANOMALY_05000426 (1)
 /* CORE_EPPI_PRIO bit and SYS_EPPI_PRIO bit in the HMDMA1_CONTROL register are not functional */
 #define ANOMALY_05000427 (__SILICON_REVISION__ < 2)
-/* WB_EDGE Bit in NFC_IRQSTAT Incorrectly Behaves as a Buffer Status Bit Instead of an IRQ Status Bit */
+/* WB_EDGE Bit in NFC_IRQSTAT Incorrectly Reflects Buffer Status Instead of IRQ Status */
 #define ANOMALY_05000429 (__SILICON_REVISION__ < 2)
 /* Software System Reset Corrupts PLL_LOCKCNT Register */
 #define ANOMALY_05000430 (__SILICON_REVISION__ >= 2)
@@ -170,26 +175,49 @@
 /* Reduced Timing Margins on DDR Output Setup and Hold (tDS and tDH) */
 #define ANOMALY_05000449 (__SILICON_REVISION__ == 1)
 /* USB DMA Mode 1 Short Packet Data Corruption */
-#define ANOMALY_05000450 (1
+#define ANOMALY_05000450 (1)
+/* USB Receive Interrupt Is Not Generated in DMA Mode 1 */
+#define ANOMALY_05000456 (__SILICON_REVISION__ < 3)
+/* False Hardware Error when RETI points to invalid memory */
+#define ANOMALY_05000461 (1)
 
 /* Anomalies that don't exist on this proc */
+#define ANOMALY_05000099 (0)
+#define ANOMALY_05000120 (0)
 #define ANOMALY_05000125 (0)
+#define ANOMALY_05000149 (0)
 #define ANOMALY_05000158 (0)
+#define ANOMALY_05000171 (0)
+#define ANOMALY_05000179 (0)
 #define ANOMALY_05000183 (0)
 #define ANOMALY_05000198 (0)
+#define ANOMALY_05000215 (0)
+#define ANOMALY_05000220 (0)
+#define ANOMALY_05000227 (0)
 #define ANOMALY_05000230 (0)
+#define ANOMALY_05000231 (0)
+#define ANOMALY_05000233 (0)
+#define ANOMALY_05000242 (0)
 #define ANOMALY_05000244 (0)
+#define ANOMALY_05000248 (0)
+#define ANOMALY_05000250 (0)
+#define ANOMALY_05000254 (0)
 #define ANOMALY_05000261 (0)
 #define ANOMALY_05000263 (0)
 #define ANOMALY_05000266 (0)
 #define ANOMALY_05000273 (0)
+#define ANOMALY_05000274 (0)
 #define ANOMALY_05000278 (0)
+#define ANOMALY_05000287 (0)
+#define ANOMALY_05000301 (0)
 #define ANOMALY_05000305 (0)
 #define ANOMALY_05000307 (0)
 #define ANOMALY_05000311 (0)
 #define ANOMALY_05000323 (0)
+#define ANOMALY_05000362 (1)
 #define ANOMALY_05000363 (0)
 #define ANOMALY_05000380 (0)
+#define ANOMALY_05000400 (0)
 #define ANOMALY_05000412 (0)
 #define ANOMALY_05000432 (0)
 #define ANOMALY_05000435 (0)
diff --git a/arch/blackfin/mach-bf548/include/mach/portmux.h b/arch/blackfin/mach-bf548/include/mach/portmux.h
index ffb1d0a..ce372ba 100644
--- a/arch/blackfin/mach-bf548/include/mach/portmux.h
+++ b/arch/blackfin/mach-bf548/include/mach/portmux.h
@@ -167,22 +167,42 @@
 #define P_PPI0_D13	(P_DEFINED | P_IDENT(GPIO_PF13) | P_FUNCT(0))
 #define P_PPI0_D14	(P_DEFINED | P_IDENT(GPIO_PF14) | P_FUNCT(0))
 #define P_PPI0_D15	(P_DEFINED | P_IDENT(GPIO_PF15) | P_FUNCT(0))
-#define P_ATAPI_D0A	(P_DEFINED | P_IDENT(GPIO_PF0) | P_FUNCT(1))
-#define P_ATAPI_D1A	(P_DEFINED | P_IDENT(GPIO_PF1) | P_FUNCT(1))
-#define P_ATAPI_D2A	(P_DEFINED | P_IDENT(GPIO_PF2) | P_FUNCT(1))
-#define P_ATAPI_D3A	(P_DEFINED | P_IDENT(GPIO_PF3) | P_FUNCT(1))
-#define P_ATAPI_D4A	(P_DEFINED | P_IDENT(GPIO_PF4) | P_FUNCT(1))
-#define P_ATAPI_D5A	(P_DEFINED | P_IDENT(GPIO_PF5) | P_FUNCT(1))
-#define P_ATAPI_D6A	(P_DEFINED | P_IDENT(GPIO_PF6) | P_FUNCT(1))
-#define P_ATAPI_D7A	(P_DEFINED | P_IDENT(GPIO_PF7) | P_FUNCT(1))
-#define P_ATAPI_D8A	(P_DEFINED | P_IDENT(GPIO_PF8) | P_FUNCT(1))
-#define P_ATAPI_D9A	(P_DEFINED | P_IDENT(GPIO_PF9) | P_FUNCT(1))
-#define P_ATAPI_D10A	(P_DEFINED | P_IDENT(GPIO_PF10) | P_FUNCT(1))
-#define P_ATAPI_D11A	(P_DEFINED | P_IDENT(GPIO_PF11) | P_FUNCT(1))
-#define P_ATAPI_D12A	(P_DEFINED | P_IDENT(GPIO_PF12) | P_FUNCT(1))
-#define P_ATAPI_D13A	(P_DEFINED | P_IDENT(GPIO_PF13) | P_FUNCT(1))
-#define P_ATAPI_D14A	(P_DEFINED | P_IDENT(GPIO_PF14) | P_FUNCT(1))
-#define P_ATAPI_D15A	(P_DEFINED | P_IDENT(GPIO_PF15) | P_FUNCT(1))
+
+#ifdef CONFIG_BF548_ATAPI_ALTERNATIVE_PORT
+# define P_ATAPI_D0A	(P_DEFINED | P_IDENT(GPIO_PF0) | P_FUNCT(1))
+# define P_ATAPI_D1A	(P_DEFINED | P_IDENT(GPIO_PF1) | P_FUNCT(1))
+# define P_ATAPI_D2A	(P_DEFINED | P_IDENT(GPIO_PF2) | P_FUNCT(1))
+# define P_ATAPI_D3A	(P_DEFINED | P_IDENT(GPIO_PF3) | P_FUNCT(1))
+# define P_ATAPI_D4A	(P_DEFINED | P_IDENT(GPIO_PF4) | P_FUNCT(1))
+# define P_ATAPI_D5A	(P_DEFINED | P_IDENT(GPIO_PF5) | P_FUNCT(1))
+# define P_ATAPI_D6A	(P_DEFINED | P_IDENT(GPIO_PF6) | P_FUNCT(1))
+# define P_ATAPI_D7A	(P_DEFINED | P_IDENT(GPIO_PF7) | P_FUNCT(1))
+# define P_ATAPI_D8A	(P_DEFINED | P_IDENT(GPIO_PF8) | P_FUNCT(1))
+# define P_ATAPI_D9A	(P_DEFINED | P_IDENT(GPIO_PF9) | P_FUNCT(1))
+# define P_ATAPI_D10A	(P_DEFINED | P_IDENT(GPIO_PF10) | P_FUNCT(1))
+# define P_ATAPI_D11A	(P_DEFINED | P_IDENT(GPIO_PF11) | P_FUNCT(1))
+# define P_ATAPI_D12A	(P_DEFINED | P_IDENT(GPIO_PF12) | P_FUNCT(1))
+# define P_ATAPI_D13A	(P_DEFINED | P_IDENT(GPIO_PF13) | P_FUNCT(1))
+# define P_ATAPI_D14A	(P_DEFINED | P_IDENT(GPIO_PF14) | P_FUNCT(1))
+# define P_ATAPI_D15A	(P_DEFINED | P_IDENT(GPIO_PF15) | P_FUNCT(1))
+#else
+# define P_ATAPI_D0A	(P_DONTCARE)
+# define P_ATAPI_D1A	(P_DONTCARE)
+# define P_ATAPI_D2A	(P_DONTCARE)
+# define P_ATAPI_D3A	(P_DONTCARE)
+# define P_ATAPI_D4A	(P_DONTCARE)
+# define P_ATAPI_D5A	(P_DONTCARE)
+# define P_ATAPI_D6A	(P_DONTCARE)
+# define P_ATAPI_D7A	(P_DONTCARE)
+# define P_ATAPI_D8A	(P_DONTCARE)
+# define P_ATAPI_D9A	(P_DONTCARE)
+# define P_ATAPI_D10A	(P_DONTCARE)
+# define P_ATAPI_D11A	(P_DONTCARE)
+# define P_ATAPI_D12A	(P_DONTCARE)
+# define P_ATAPI_D13A	(P_DONTCARE)
+# define P_ATAPI_D14A	(P_DONTCARE)
+# define P_ATAPI_D15A	(P_DONTCARE)
+#endif
 
 #define P_PPI0_CLK	(P_DEFINED | P_IDENT(GPIO_PG0) | P_FUNCT(0))
 #define P_PPI0_FS1	(P_DEFINED | P_IDENT(GPIO_PG1) | P_FUNCT(0))
@@ -200,9 +220,15 @@
 #define P_CAN0_RX	(P_DEFINED | P_IDENT(GPIO_PG13) | P_FUNCT(0))
 #define P_CAN1_TX	(P_DEFINED | P_IDENT(GPIO_PG14) | P_FUNCT(0))
 #define P_CAN1_RX	(P_DEFINED | P_IDENT(GPIO_PG15) | P_FUNCT(0))
-#define P_ATAPI_A0A	(P_DEFINED | P_IDENT(GPIO_PG2) | P_FUNCT(1))
-#define P_ATAPI_A1A	(P_DEFINED | P_IDENT(GPIO_PG3) | P_FUNCT(1))
-#define P_ATAPI_A2A	(P_DEFINED | P_IDENT(GPIO_PG4) | P_FUNCT(1))
+#ifdef CONFIG_BF548_ATAPI_ALTERNATIVE_PORT
+# define P_ATAPI_A0A	(P_DEFINED | P_IDENT(GPIO_PG2) | P_FUNCT(1))
+# define P_ATAPI_A1A	(P_DEFINED | P_IDENT(GPIO_PG3) | P_FUNCT(1))
+# define P_ATAPI_A2A	(P_DEFINED | P_IDENT(GPIO_PG4) | P_FUNCT(1))
+#else
+# define P_ATAPI_A0A	(P_DONTCARE)
+# define P_ATAPI_A1A	(P_DONTCARE)
+# define P_ATAPI_A2A	(P_DONTCARE)
+#endif
 #define P_HOST_CE	(P_DEFINED | P_IDENT(GPIO_PG5) | P_FUNCT(1))
 #define P_HOST_RD	(P_DEFINED | P_IDENT(GPIO_PG6) | P_FUNCT(1))
 #define P_HOST_WR	(P_DEFINED | P_IDENT(GPIO_PG7) | P_FUNCT(1))
diff --git a/arch/blackfin/mach-bf561/Kconfig b/arch/blackfin/mach-bf561/Kconfig
index 638ec38..cb97436 100644
--- a/arch/blackfin/mach-bf561/Kconfig
+++ b/arch/blackfin/mach-bf561/Kconfig
@@ -9,22 +9,9 @@
 comment "Core B Support"
 
 config BF561_COREB
-	bool "Enable Core B support"
+	bool "Enable Core B loader"
 	default y
 
-config BF561_COREB_RESET
-	bool "Enable Core B reset support"
-	default n
-	help
-	  This requires code in the application that is loaded
-	  into Core B. In order to reset, the application needs
-	  to install an interrupt handler for Supplemental
-	  Interrupt 0, that sets RETI to 0xff600000 and writes
-	  bit 11 of SICB_SYSCR when bit 5 of SICA_SYSCR is 0.
-	  This causes Core B to stall when Supplemental Interrupt
-	  0 is set, and will reset PC to 0xff600000 when
-	  COREB_SRAM_INIT is cleared.
-
 endif
 
 comment "Interrupt Priority Assignment"
@@ -138,6 +125,7 @@
 	default 9
 config IRQ_TIMER0
 	int "TIMER 0  Interrupt"
+	default 7 if TICKSOURCE_GPTMR0
 	default 8
 config IRQ_TIMER1
 	int "TIMER 1  Interrupt"
diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c
index f623c6b..0dd9685 100644
--- a/arch/blackfin/mach-bf561/boards/cm_bf561.c
+++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c
@@ -83,7 +83,7 @@
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -126,7 +126,7 @@
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
diff --git a/arch/blackfin/mach-bf561/coreb.c b/arch/blackfin/mach-bf561/coreb.c
index 8598098..93635a7 100644
--- a/arch/blackfin/mach-bf561/coreb.c
+++ b/arch/blackfin/mach-bf561/coreb.c
@@ -1,406 +1,74 @@
-/*
- * File:         arch/blackfin/mach-bf561/coreb.c
- * Based on:
- * Author:
+/* Load firmware into Core B on a BF561
  *
- * Created:
- * Description:  Handle CoreB on a BF561
- *
- * Modified:
- *               Copyright 2004-2006 Analog Devices Inc.
- *
- * Bugs:         Enter bugs at http://blackfin.uclinux.org/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see the file COPYING, or write
- * to the Free Software Foundation, Inc.,
- * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ * Copyright 2004-2009 Analog Devices Inc.
+ * Licensed under the GPL-2 or later.
  */
 
-#include <linux/mm.h>
-#include <linux/miscdevice.h>
+/* The Core B reset func requires code in the application that is loaded into
+ * Core B.  In order to reset, the application needs to install an interrupt
+ * handler for Supplemental Interrupt 0, that sets RETI to 0xff600000 and
+ * writes bit 11 of SICB_SYSCR when bit 5 of SICA_SYSCR is 0.  This causes Core
+ * B to stall when Supplemental Interrupt 0 is set, and will reset PC to
+ * 0xff600000 when COREB_SRAM_INIT is cleared.
+ */
+
 #include <linux/device.h>
-#include <linux/ioport.h>
-#include <linux/module.h>
-#include <linux/uaccess.h>
 #include <linux/fs.h>
-#include <asm/dma.h>
-#include <asm/cacheflush.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
 
-#define MODULE_VER		"v0.1"
-
-static spinlock_t coreb_lock;
-static wait_queue_head_t coreb_dma_wait;
-
-#define COREB_IS_OPEN		0x00000001
-#define COREB_IS_RUNNING	0x00000010
-
-#define CMD_COREB_INDEX		1
 #define CMD_COREB_START		2
 #define CMD_COREB_STOP		3
 #define CMD_COREB_RESET		4
 
-#define COREB_MINOR		229
-
-static unsigned long coreb_status = 0;
-static unsigned long coreb_base = 0xff600000;
-static unsigned long coreb_size = 0x4000;
-int coreb_dma_done;
-
-static loff_t coreb_lseek(struct file *file, loff_t offset, int origin);
-static ssize_t coreb_read(struct file *file, char *buf, size_t count,
-			  loff_t * ppos);
-static ssize_t coreb_write(struct file *file, const char *buf, size_t count,
-			   loff_t * ppos);
-static int coreb_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
-		       unsigned long arg);
-static int coreb_open(struct inode *inode, struct file *file);
-static int coreb_release(struct inode *inode, struct file *file);
-
-static irqreturn_t coreb_dma_interrupt(int irq, void *dev_id)
+static int
+coreb_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
 {
-	clear_dma_irqstat(CH_MEM_STREAM2_DEST);
-	coreb_dma_done = 1;
-	wake_up_interruptible(&coreb_dma_wait);
-	return IRQ_HANDLED;
-}
+	int ret = 0;
 
-static ssize_t coreb_write(struct file *file, const char *buf, size_t count,
-			   loff_t * ppos)
-{
-	unsigned long p = *ppos;
-	ssize_t wrote = 0;
-
-	if (p + count > coreb_size)
-		return -EFAULT;
-
-	while (count > 0) {
-		int len = count;
-
-		if (len > PAGE_SIZE)
-			len = PAGE_SIZE;
-
-		coreb_dma_done = 0;
-
-		flush_dcache_range((unsigned long)buf, (unsigned long)(buf+len));
-		/* Source Channel */
-		set_dma_start_addr(CH_MEM_STREAM2_SRC, (unsigned long)buf);
-		set_dma_x_count(CH_MEM_STREAM2_SRC, len);
-		set_dma_x_modify(CH_MEM_STREAM2_SRC, sizeof(char));
-		set_dma_config(CH_MEM_STREAM2_SRC, 0);
-		/* Destination Channel */
-		set_dma_start_addr(CH_MEM_STREAM2_DEST, coreb_base + p);
-		set_dma_x_count(CH_MEM_STREAM2_DEST, len);
-		set_dma_x_modify(CH_MEM_STREAM2_DEST, sizeof(char));
-		set_dma_config(CH_MEM_STREAM2_DEST, WNR | RESTART | DI_EN);
-
-		enable_dma(CH_MEM_STREAM2_SRC);
-		enable_dma(CH_MEM_STREAM2_DEST);
-
-		wait_event_interruptible(coreb_dma_wait, coreb_dma_done);
-
-		disable_dma(CH_MEM_STREAM2_SRC);
-		disable_dma(CH_MEM_STREAM2_DEST);
-
-		count -= len;
-		wrote += len;
-		buf += len;
-		p += len;
-	}
-	*ppos = p;
-	return wrote;
-}
-
-static ssize_t coreb_read(struct file *file, char *buf, size_t count,
-			  loff_t * ppos)
-{
-	unsigned long p = *ppos;
-	ssize_t read = 0;
-
-	if ((p + count) > coreb_size)
-		return -EFAULT;
-
-	while (count > 0) {
-		int len = count;
-
-		if (len > PAGE_SIZE)
-			len = PAGE_SIZE;
-
-		coreb_dma_done = 0;
-
-		invalidate_dcache_range((unsigned long)buf, (unsigned long)(buf+len));
-		/* Source Channel */
-		set_dma_start_addr(CH_MEM_STREAM2_SRC, coreb_base + p);
-		set_dma_x_count(CH_MEM_STREAM2_SRC, len);
-		set_dma_x_modify(CH_MEM_STREAM2_SRC, sizeof(char));
-		set_dma_config(CH_MEM_STREAM2_SRC, 0);
-		/* Destination Channel */
-		set_dma_start_addr(CH_MEM_STREAM2_DEST, (unsigned long)buf);
-		set_dma_x_count(CH_MEM_STREAM2_DEST, len);
-		set_dma_x_modify(CH_MEM_STREAM2_DEST, sizeof(char));
-		set_dma_config(CH_MEM_STREAM2_DEST, WNR | RESTART | DI_EN);
-
-		enable_dma(CH_MEM_STREAM2_SRC);
-		enable_dma(CH_MEM_STREAM2_DEST);
-
-		wait_event_interruptible(coreb_dma_wait, coreb_dma_done);
-
-		disable_dma(CH_MEM_STREAM2_SRC);
-		disable_dma(CH_MEM_STREAM2_DEST);
-
-		count -= len;
-		read += len;
-		buf += len;
-		p += len;
-	}
-
-	return read;
-}
-
-static loff_t coreb_lseek(struct file *file, loff_t offset, int origin)
-{
-	loff_t ret;
-
-	mutex_lock(&file->f_dentry->d_inode->i_mutex);
-
-	switch (origin) {
-	case 0 /* SEEK_SET */ :
-		if (offset < coreb_size) {
-			file->f_pos = offset;
-			ret = file->f_pos;
-		} else
-			ret = -EINVAL;
+	switch (cmd) {
+	case CMD_COREB_START:
+		bfin_write_SICA_SYSCR(bfin_read_SICA_SYSCR() & ~0x0020);
 		break;
-	case 1 /* SEEK_CUR */ :
-		if ((offset + file->f_pos) < coreb_size) {
-			file->f_pos += offset;
-			ret = file->f_pos;
-		} else
-			ret = -EINVAL;
+	case CMD_COREB_STOP:
+		bfin_write_SICA_SYSCR(bfin_read_SICA_SYSCR() | 0x0020);
+		bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | 0x0080);
+		break;
+	case CMD_COREB_RESET:
+		bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | 0x0080);
+		break;
 	default:
 		ret = -EINVAL;
+		break;
 	}
-	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+
+	CSYNC();
+
 	return ret;
 }
 
-/* No BKL needed here */
-static int coreb_open(struct inode *inode, struct file *file)
-{
-	spin_lock_irq(&coreb_lock);
-
-	if (coreb_status & COREB_IS_OPEN)
-		goto out_busy;
-
-	coreb_status |= COREB_IS_OPEN;
-
-	spin_unlock_irq(&coreb_lock);
-	return 0;
-
- out_busy:
-	spin_unlock_irq(&coreb_lock);
-	return -EBUSY;
-}
-
-static int coreb_release(struct inode *inode, struct file *file)
-{
-	spin_lock_irq(&coreb_lock);
-	coreb_status &= ~COREB_IS_OPEN;
-	spin_unlock_irq(&coreb_lock);
-	return 0;
-}
-
-static int coreb_ioctl(struct inode *inode, struct file *file,
-		       unsigned int cmd, unsigned long arg)
-{
-	int retval = 0;
-	int coreb_index = 0;
-
-	switch (cmd) {
-	case CMD_COREB_INDEX:
-		if (copy_from_user(&coreb_index, (int *)arg, sizeof(int))) {
-			retval = -EFAULT;
-			break;
-		}
-
-		spin_lock_irq(&coreb_lock);
-		switch (coreb_index) {
-		case 0:
-			coreb_base = 0xff600000;
-			coreb_size = 0x4000;
-			break;
-		case 1:
-			coreb_base = 0xff610000;
-			coreb_size = 0x4000;
-			break;
-		case 2:
-			coreb_base = 0xff500000;
-			coreb_size = 0x8000;
-			break;
-		case 3:
-			coreb_base = 0xff400000;
-			coreb_size = 0x8000;
-			break;
-		default:
-			retval = -EINVAL;
-			break;
-		}
-		spin_unlock_irq(&coreb_lock);
-
-		mutex_lock(&file->f_dentry->d_inode->i_mutex);
-		file->f_pos = 0;
-		mutex_unlock(&file->f_dentry->d_inode->i_mutex);
-		break;
-	case CMD_COREB_START:
-		spin_lock_irq(&coreb_lock);
-		if (coreb_status & COREB_IS_RUNNING) {
-			retval = -EBUSY;
-			break;
-		}
-		printk(KERN_INFO "Starting Core B\n");
-		coreb_status |= COREB_IS_RUNNING;
-		bfin_write_SICA_SYSCR(bfin_read_SICA_SYSCR() & ~0x0020);
-		SSYNC();
-		spin_unlock_irq(&coreb_lock);
-		break;
-#if defined(CONFIG_BF561_COREB_RESET)
-	case CMD_COREB_STOP:
-		spin_lock_irq(&coreb_lock);
-		printk(KERN_INFO "Stopping Core B\n");
-		bfin_write_SICA_SYSCR(bfin_read_SICA_SYSCR() | 0x0020);
-		bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | 0x0080);
-		coreb_status &= ~COREB_IS_RUNNING;
-		spin_unlock_irq(&coreb_lock);
-		break;
-	case CMD_COREB_RESET:
-		printk(KERN_INFO "Resetting Core B\n");
-		bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | 0x0080);
-		break;
-#endif
-	}
-
-	return retval;
-}
-
 static struct file_operations coreb_fops = {
-	.owner = THIS_MODULE,
-	.llseek = coreb_lseek,
-	.read = coreb_read,
-	.write = coreb_write,
-	.ioctl = coreb_ioctl,
-	.open = coreb_open,
-	.release = coreb_release
+	.owner   = THIS_MODULE,
+	.ioctl   = coreb_ioctl,
 };
 
 static struct miscdevice coreb_dev = {
-	COREB_MINOR,
-	"coreb",
-	&coreb_fops
+	.minor = MISC_DYNAMIC_MINOR,
+	.name  = "coreb",
+	.fops  = &coreb_fops,
 };
 
-static ssize_t coreb_show_status(struct device *dev, struct device_attribute *attr, char *buf)
+static int __init bf561_coreb_init(void)
 {
-	return sprintf(buf,
-		       "Base Address:\t0x%08lx\n"
-		       "Core B is %s\n"
-		       "SICA_SYSCR:\t%04x\n"
-		       "SICB_SYSCR:\t%04x\n"
-		       "\n"
-		       "IRQ Status:\tCore A\t\tCore B\n"
-		       "ISR0:\t\t%08x\t\t%08x\n"
-		       "ISR1:\t\t%08x\t\t%08x\n"
-		       "IMASK0:\t\t%08x\t\t%08x\n"
-		       "IMASK1:\t\t%08x\t\t%08x\n",
-		       coreb_base,
-		       coreb_status & COREB_IS_RUNNING ? "running" : "stalled",
-		       bfin_read_SICA_SYSCR(), bfin_read_SICB_SYSCR(),
-		       bfin_read_SICA_ISR0(), bfin_read_SICB_ISR0(),
-		       bfin_read_SICA_ISR1(), bfin_read_SICB_ISR0(),
-		       bfin_read_SICA_IMASK0(), bfin_read_SICB_IMASK0(),
-		       bfin_read_SICA_IMASK1(), bfin_read_SICB_IMASK1());
+	return misc_register(&coreb_dev);
 }
-
-static DEVICE_ATTR(coreb_status, S_IRUGO, coreb_show_status, NULL);
-
-int __init bf561_coreb_init(void)
-{
-	init_waitqueue_head(&coreb_dma_wait);
-
-	spin_lock_init(&coreb_lock);
-	/* Request the core memory regions for Core B */
-	if (request_mem_region(0xff600000, 0x4000,
-			       "Core B - Instruction SRAM") == NULL)
-		goto exit;
-
-	if (request_mem_region(0xFF610000, 0x4000,
-			       "Core B - Instruction SRAM") == NULL)
-		goto release_instruction_a_sram;
-
-	if (request_mem_region(0xFF500000, 0x8000,
-			       "Core B - Data Bank B SRAM") == NULL)
-		goto release_instruction_b_sram;
-
-	if (request_mem_region(0xff400000, 0x8000,
-			       "Core B - Data Bank A SRAM") == NULL)
-		goto release_data_b_sram;
-
-	if (request_dma(CH_MEM_STREAM2_DEST, "Core B - DMA Destination") < 0)
-		goto release_data_a_sram;
-
-	if (request_dma(CH_MEM_STREAM2_SRC, "Core B - DMA Source") < 0)
-		goto release_dma_dest;
-
-	set_dma_callback(CH_MEM_STREAM2_DEST, coreb_dma_interrupt, NULL);
-
-	misc_register(&coreb_dev);
-
-	if (device_create_file(coreb_dev.this_device, &dev_attr_coreb_status))
-		goto release_dma_src;
-
-	printk(KERN_INFO "BF561 Core B driver %s initialized.\n", MODULE_VER);
-	return 0;
-
- release_dma_src:
-	free_dma(CH_MEM_STREAM2_SRC);
- release_dma_dest:
-	free_dma(CH_MEM_STREAM2_DEST);
- release_data_a_sram:
-	release_mem_region(0xff400000, 0x8000);
- release_data_b_sram:
-	release_mem_region(0xff500000, 0x8000);
- release_instruction_b_sram:
-	release_mem_region(0xff610000, 0x4000);
- release_instruction_a_sram:
-	release_mem_region(0xff600000, 0x4000);
- exit:
-	return -ENOMEM;
-}
-
-void __exit bf561_coreb_exit(void)
-{
-	device_remove_file(coreb_dev.this_device, &dev_attr_coreb_status);
-	misc_deregister(&coreb_dev);
-
-	release_mem_region(0xff610000, 0x4000);
-	release_mem_region(0xff600000, 0x4000);
-	release_mem_region(0xff500000, 0x8000);
-	release_mem_region(0xff400000, 0x8000);
-
-	free_dma(CH_MEM_STREAM2_DEST);
-	free_dma(CH_MEM_STREAM2_SRC);
-}
-
 module_init(bf561_coreb_init);
+
+static void __exit bf561_coreb_exit(void)
+{
+	misc_deregister(&coreb_dev);
+}
 module_exit(bf561_coreb_exit);
 
 MODULE_AUTHOR("Bas Vermeulen <bvermeul@blackstar.xs4all.nl>");
diff --git a/arch/blackfin/mach-bf561/include/mach/anomaly.h b/arch/blackfin/mach-bf561/include/mach/anomaly.h
index d0b0b35..dccd396c 100644
--- a/arch/blackfin/mach-bf561/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf561/include/mach/anomaly.h
@@ -6,7 +6,7 @@
  * Licensed under the GPL-2 or later.
  */
 
-/* This file shoule be up to date with:
+/* This file should be up to date with:
  *  - Revision Q, 11/07/2008; ADSP-BF561 Blackfin Processor Anomaly List
  */
 
@@ -18,11 +18,11 @@
 # error will not work on BF561 silicon version 0.0, 0.1, 0.2, or 0.4
 #endif
 
-/* Multi-Issue Instruction with dsp32shiftimm in slot1 and P-reg Store in slot 2 Not Supported */
+/* Multi-issue instruction with dsp32shiftimm in slot1 and P-reg store in slot 2 not supported */
 #define ANOMALY_05000074 (1)
 /* UART Line Status Register (UART_LSR) Bits Are Not Updated at the Same Time */
 #define ANOMALY_05000099 (__SILICON_REVISION__ < 5)
-/* Trace Buffers may contain errors in emulation mode and/or exception, NMI, reset handlers */
+/* Trace Buffers may record discontinuities into emulation mode and/or exception, NMI, reset handlers */
 #define ANOMALY_05000116 (__SILICON_REVISION__ < 3)
 /* Testset instructions restricted to 32-bit aligned memory locations */
 #define ANOMALY_05000120 (1)
@@ -40,7 +40,7 @@
 #define ANOMALY_05000136 (__SILICON_REVISION__ < 3)
 /* Allowing the SPORT RX FIFO to fill will cause an overflow */
 #define ANOMALY_05000140 (__SILICON_REVISION__ < 3)
-/* Infinite Stall may occur with a particular sequence of consecutive dual dag events */
+/* An Infinite Stall occurs with a particular sequence of consecutive dual dag events */
 #define ANOMALY_05000141 (__SILICON_REVISION__ < 3)
 /* Interrupts may be lost when a programmable input flag is configured to be edge sensitive */
 #define ANOMALY_05000142 (__SILICON_REVISION__ < 3)
@@ -80,7 +80,7 @@
 #define ANOMALY_05000163 (__SILICON_REVISION__ < 3)
 /* PPI Data Lengths Between 8 and 16 Do Not Zero Out Upper Bits */
 #define ANOMALY_05000166 (1)
-/* Turning Serial Ports on with External Frame Syncs */
+/* Turning SPORTs on while External Frame Sync Is Active May Corrupt Data */
 #define ANOMALY_05000167 (1)
 /* SDRAM auto-refresh and subsequent Power Ups */
 #define ANOMALY_05000168 (__SILICON_REVISION__ < 5)
@@ -164,7 +164,7 @@
 #define ANOMALY_05000242 (__SILICON_REVISION__ < 5)
 /* If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control Causes Failures */
 #define ANOMALY_05000244 (__SILICON_REVISION__ < 5)
-/* Spurious Hardware Error from an Access in the Shadow of a Conditional Branch */
+/* False Hardware Error from an Access in the Shadow of a Conditional Branch */
 #define ANOMALY_05000245 (__SILICON_REVISION__ < 5)
 /* TESTSET operation forces stall on the other core */
 #define ANOMALY_05000248 (__SILICON_REVISION__ < 5)
@@ -208,7 +208,7 @@
 #define ANOMALY_05000275 (__SILICON_REVISION__ > 2)
 /* Timing Requirements Change for External Frame Sync PPI Modes with Non-Zero PPI_DELAY */
 #define ANOMALY_05000276 (__SILICON_REVISION__ < 5)
-/* Writes to an I/O data register one SCLK cycle after an edge is detected may clear interrupt */
+/* Writes to an I/O Data Register One SCLK Cycle after an Edge Is Detected May Clear Interrupt */
 #define ANOMALY_05000277 (__SILICON_REVISION__ < 3)
 /* Disabling Peripherals with DMA Running May Cause DMA System Instability */
 #define ANOMALY_05000278 (__SILICON_REVISION__ < 5)
@@ -232,7 +232,7 @@
 #define ANOMALY_05000310 (1)
 /* Errors When SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted */
 #define ANOMALY_05000312 (1)
-/* PPI Is Level-Sensitive on First Transfer */
+/* PPI Is Level-Sensitive on First Transfer In Single Frame Sync Modes */
 #define ANOMALY_05000313 (1)
 /* Killed System MMR Write Completes Erroneously On Next System MMR Access */
 #define ANOMALY_05000315 (1)
@@ -276,18 +276,27 @@
 #define ANOMALY_05000428 (__SILICON_REVISION__ > 3)
 /* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */
 #define ANOMALY_05000443 (1)
+/* False Hardware Error when RETI points to invalid memory */
+#define ANOMALY_05000461 (1)
 
 /* Anomalies that don't exist on this proc */
+#define ANOMALY_05000119 (0)
 #define ANOMALY_05000158 (0)
 #define ANOMALY_05000183 (0)
+#define ANOMALY_05000233 (0)
 #define ANOMALY_05000273 (0)
 #define ANOMALY_05000311 (0)
 #define ANOMALY_05000353 (1)
 #define ANOMALY_05000380 (0)
 #define ANOMALY_05000386 (1)
+#define ANOMALY_05000389 (0)
+#define ANOMALY_05000400 (0)
+#define ANOMALY_05000430 (0)
 #define ANOMALY_05000432 (0)
 #define ANOMALY_05000435 (0)
 #define ANOMALY_05000447 (0)
 #define ANOMALY_05000448 (0)
+#define ANOMALY_05000456 (0)
+#define ANOMALY_05000450 (0)
 
 #endif
diff --git a/arch/blackfin/mach-bf561/include/mach/cdefBF561.h b/arch/blackfin/mach-bf561/include/mach/cdefBF561.h
index 95d609f..9d9858c 100644
--- a/arch/blackfin/mach-bf561/include/mach/cdefBF561.h
+++ b/arch/blackfin/mach-bf561/include/mach/cdefBF561.h
@@ -1526,6 +1526,35 @@
 #define bfin_read_MDMA_D0_START_ADDR()  bfin_read_MDMA1_D0_START_ADDR()
 #define bfin_write_MDMA_D0_START_ADDR(val) bfin_write_MDMA1_D0_START_ADDR(val)
 
+#define bfin_read_MDMA_S1_CONFIG()  bfin_read_MDMA1_S1_CONFIG()
+#define bfin_write_MDMA_S1_CONFIG(val) bfin_write_MDMA1_S1_CONFIG(val)
+#define bfin_read_MDMA_S1_IRQ_STATUS()  bfin_read_MDMA1_S1_IRQ_STATUS()
+#define bfin_write_MDMA_S1_IRQ_STATUS(val) bfin_write_MDMA1_S1_IRQ_STATUS(val)
+#define bfin_read_MDMA_S1_X_MODIFY()  bfin_read_MDMA1_S1_X_MODIFY()
+#define bfin_write_MDMA_S1_X_MODIFY(val) bfin_write_MDMA1_S1_X_MODIFY(val)
+#define bfin_read_MDMA_S1_Y_MODIFY()  bfin_read_MDMA1_S1_Y_MODIFY()
+#define bfin_write_MDMA_S1_Y_MODIFY(val) bfin_write_MDMA1_S1_Y_MODIFY(val)
+#define bfin_read_MDMA_S1_X_COUNT()  bfin_read_MDMA1_S1_X_COUNT()
+#define bfin_write_MDMA_S1_X_COUNT(val) bfin_write_MDMA1_S1_X_COUNT(val)
+#define bfin_read_MDMA_S1_Y_COUNT()  bfin_read_MDMA1_S1_Y_COUNT()
+#define bfin_write_MDMA_S1_Y_COUNT(val) bfin_write_MDMA1_S1_Y_COUNT(val)
+#define bfin_read_MDMA_S1_START_ADDR()  bfin_read_MDMA1_S1_START_ADDR()
+#define bfin_write_MDMA_S1_START_ADDR(val) bfin_write_MDMA1_S1_START_ADDR(val)
+#define bfin_read_MDMA_D1_CONFIG()  bfin_read_MDMA1_D1_CONFIG()
+#define bfin_write_MDMA_D1_CONFIG(val) bfin_write_MDMA1_D1_CONFIG(val)
+#define bfin_read_MDMA_D1_IRQ_STATUS()  bfin_read_MDMA1_D1_IRQ_STATUS()
+#define bfin_write_MDMA_D1_IRQ_STATUS(val) bfin_write_MDMA1_D1_IRQ_STATUS(val)
+#define bfin_read_MDMA_D1_X_MODIFY()  bfin_read_MDMA1_D1_X_MODIFY()
+#define bfin_write_MDMA_D1_X_MODIFY(val) bfin_write_MDMA1_D1_X_MODIFY(val)
+#define bfin_read_MDMA_D1_Y_MODIFY()  bfin_read_MDMA1_D1_Y_MODIFY()
+#define bfin_write_MDMA_D1_Y_MODIFY(val) bfin_write_MDMA1_D1_Y_MODIFY(val)
+#define bfin_read_MDMA_D1_X_COUNT()  bfin_read_MDMA1_D1_X_COUNT()
+#define bfin_write_MDMA_D1_X_COUNT(val) bfin_write_MDMA1_D1_X_COUNT(val)
+#define bfin_read_MDMA_D1_Y_COUNT()  bfin_read_MDMA1_D1_Y_COUNT()
+#define bfin_write_MDMA_D1_Y_COUNT(val) bfin_write_MDMA1_D1_Y_COUNT(val)
+#define bfin_read_MDMA_D1_START_ADDR()  bfin_read_MDMA1_D1_START_ADDR()
+#define bfin_write_MDMA_D1_START_ADDR(val) bfin_write_MDMA1_D1_START_ADDR(val)
+
 /* These need to be last due to the cdef/linux inter-dependencies */
 #include <asm/irq.h>
 
diff --git a/arch/blackfin/mach-bf561/include/mach/defBF561.h b/arch/blackfin/mach-bf561/include/mach/defBF561.h
index cf92229..5fc0f05 100644
--- a/arch/blackfin/mach-bf561/include/mach/defBF561.h
+++ b/arch/blackfin/mach-bf561/include/mach/defBF561.h
@@ -796,6 +796,62 @@
 #define MDMA2_S1_IRQ_STATUS 0xFFC00FE8	/*MemDMA2 Stream 1 Source Interrupt/Status Register */
 #define MDMA2_S1_PERIPHERAL_MAP 0xFFC00FEC	/*MemDMA2 Stream 1 Source Peripheral Map register */
 
+#define MDMA_D0_NEXT_DESC_PTR MDMA1_D0_NEXT_DESC_PTR
+#define MDMA_D0_START_ADDR MDMA1_D0_START_ADDR
+#define MDMA_D0_CONFIG MDMA1_D0_CONFIG
+#define MDMA_D0_X_COUNT MDMA1_D0_X_COUNT
+#define MDMA_D0_X_MODIFY MDMA1_D0_X_MODIFY
+#define MDMA_D0_Y_COUNT MDMA1_D0_Y_COUNT
+#define MDMA_D0_Y_MODIFY MDMA1_D0_Y_MODIFY
+#define MDMA_D0_CURR_DESC_PTR MDMA1_D0_CURR_DESC_PTR
+#define MDMA_D0_CURR_ADDR MDMA1_D0_CURR_ADDR
+#define MDMA_D0_IRQ_STATUS MDMA1_D0_IRQ_STATUS
+#define MDMA_D0_PERIPHERAL_MAP MDMA1_D0_PERIPHERAL_MAP
+#define MDMA_D0_CURR_X_COUNT MDMA1_D0_CURR_X_COUNT
+#define MDMA_D0_CURR_Y_COUNT MDMA1_D0_CURR_Y_COUNT
+
+#define MDMA_S0_NEXT_DESC_PTR MDMA1_S0_NEXT_DESC_PTR
+#define MDMA_S0_START_ADDR MDMA1_S0_START_ADDR
+#define MDMA_S0_CONFIG MDMA1_S0_CONFIG
+#define MDMA_S0_X_COUNT MDMA1_S0_X_COUNT
+#define MDMA_S0_X_MODIFY MDMA1_S0_X_MODIFY
+#define MDMA_S0_Y_COUNT MDMA1_S0_Y_COUNT
+#define MDMA_S0_Y_MODIFY MDMA1_S0_Y_MODIFY
+#define MDMA_S0_CURR_DESC_PTR MDMA1_S0_CURR_DESC_PTR
+#define MDMA_S0_CURR_ADDR MDMA1_S0_CURR_ADDR
+#define MDMA_S0_IRQ_STATUS MDMA1_S0_IRQ_STATUS
+#define MDMA_S0_PERIPHERAL_MAP MDMA1_S0_PERIPHERAL_MAP
+#define MDMA_S0_CURR_X_COUNT MDMA1_S0_CURR_X_COUNT
+#define MDMA_S0_CURR_Y_COUNT MDMA1_S0_CURR_Y_COUNT
+
+#define MDMA_D1_NEXT_DESC_PTR MDMA1_D1_NEXT_DESC_PTR
+#define MDMA_D1_START_ADDR MDMA1_D1_START_ADDR
+#define MDMA_D1_CONFIG MDMA1_D1_CONFIG
+#define MDMA_D1_X_COUNT MDMA1_D1_X_COUNT
+#define MDMA_D1_X_MODIFY MDMA1_D1_X_MODIFY
+#define MDMA_D1_Y_COUNT MDMA1_D1_Y_COUNT
+#define MDMA_D1_Y_MODIFY MDMA1_D1_Y_MODIFY
+#define MDMA_D1_CURR_DESC_PTR MDMA1_D1_CURR_DESC_PTR
+#define MDMA_D1_CURR_ADDR MDMA1_D1_CURR_ADDR
+#define MDMA_D1_IRQ_STATUS MDMA1_D1_IRQ_STATUS
+#define MDMA_D1_PERIPHERAL_MAP MDMA1_D1_PERIPHERAL_MAP
+#define MDMA_D1_CURR_X_COUNT MDMA1_D1_CURR_X_COUNT
+#define MDMA_D1_CURR_Y_COUNT MDMA1_D1_CURR_Y_COUNT
+
+#define MDMA_S1_NEXT_DESC_PTR MDMA1_S1_NEXT_DESC_PTR
+#define MDMA_S1_START_ADDR MDMA1_S1_START_ADDR
+#define MDMA_S1_CONFIG MDMA1_S1_CONFIG
+#define MDMA_S1_X_COUNT MDMA1_S1_X_COUNT
+#define MDMA_S1_X_MODIFY MDMA1_S1_X_MODIFY
+#define MDMA_S1_Y_COUNT MDMA1_S1_Y_COUNT
+#define MDMA_S1_Y_MODIFY MDMA1_S1_Y_MODIFY
+#define MDMA_S1_CURR_DESC_PTR MDMA1_S1_CURR_DESC_PTR
+#define MDMA_S1_CURR_ADDR MDMA1_S1_CURR_ADDR
+#define MDMA_S1_IRQ_STATUS MDMA1_S1_IRQ_STATUS
+#define MDMA_S1_PERIPHERAL_MAP MDMA1_S1_PERIPHERAL_MAP
+#define MDMA_S1_CURR_X_COUNT MDMA1_S1_CURR_X_COUNT
+#define MDMA_S1_CURR_Y_COUNT MDMA1_S1_CURR_Y_COUNT
+
 /* Internal Memory DMA Registers (0xFFC0_1800 - 0xFFC0_19FF) */
 #define IMDMA_D0_CONFIG 0xFFC01808	/*IMDMA Stream 0 Destination Configuration */
 #define IMDMA_D0_NEXT_DESC_PTR 0xFFC01800	/*IMDMA Stream 0 Destination Next Descriptor Ptr Reg */
diff --git a/arch/blackfin/mach-bf561/smp.c b/arch/blackfin/mach-bf561/smp.c
index 9b27e69..8c10701 100644
--- a/arch/blackfin/mach-bf561/smp.c
+++ b/arch/blackfin/mach-bf561/smp.c
@@ -133,9 +133,9 @@
 	int ret;
 
 	ret = request_irq(IRQ_SUPPLE_0, handler, IRQF_DISABLED,
-			  "SMP interrupt", handler);
+			  "Supplemental Interrupt0", handler);
 	if (ret)
-		panic("Cannot request supplemental interrupt 0 for IPI service\n");
+		panic("Cannot request supplemental interrupt 0 for IPI service");
 }
 
 void platform_send_ipi(cpumask_t callmap)
diff --git a/arch/blackfin/mach-common/arch_checks.c b/arch/blackfin/mach-common/arch_checks.c
index 80d39b2..da93d92 100644
--- a/arch/blackfin/mach-common/arch_checks.c
+++ b/arch/blackfin/mach-common/arch_checks.c
@@ -71,3 +71,10 @@
 #if ANOMALY_05000448
 # error You are using a part with anomaly 05000448, this issue causes random memory read/write failures - that means random crashes.
 #endif
+
+/* if 220 exists, can not set External Memory WB and L2 not_cached, either External Memory not_cached and L2 WB */
+#if ANOMALY_05000220 && \
+	((defined(CONFIG_BFIN_WB) && defined(CONFIG_BFIN_L2_NOT_CACHED)) || \
+	 (!defined(CONFIG_BFIN_DCACHE) && defined(CONFIG_BFIN_L2_WB)))
+# error You are exposing Anomaly 220 in this config, either config L2 as Write Through, or make External Memory WB.
+#endif
diff --git a/arch/blackfin/mach-common/cache.S b/arch/blackfin/mach-common/cache.S
index aa0648c..d9666fe 100644
--- a/arch/blackfin/mach-common/cache.S
+++ b/arch/blackfin/mach-common/cache.S
@@ -15,6 +15,13 @@
 
 .text
 
+/* 05000443 - IFLUSH cannot be last instruction in hardware loop */
+#if ANOMALY_05000443
+# define BROK_FLUSH_INST "IFLUSH"
+#else
+# define BROK_FLUSH_INST "no anomaly! yeah!"
+#endif
+
 /* Since all L1 caches work the same way, we use the same method for flushing
  * them.  Only the actual flush instruction differs.  We write this in asm as
  * GCC can be hard to coax into writing nice hardware loops.
@@ -23,7 +30,7 @@
  * R0 = start address
  * R1 = end address
  */
-.macro do_flush flushins:req optflushins optnopins label
+.macro do_flush flushins:req label
 
 	R2 = -L1_CACHE_BYTES;
 
@@ -44,22 +51,15 @@
 \label :
 .endif
 	P0 = R0;
+
 	LSETUP (1f, 2f) LC1 = P1;
 1:
-.ifnb \optflushins
-	\optflushins [P0];
-.endif
-#if ANOMALY_05000443
-.ifb \optnopins
-2:
-.endif
+.ifeqs "\flushins", BROK_FLUSH_INST
 	\flushins [P0++];
-.ifnb \optnopins
-2:	\optnopins;
-.endif
-#else
+2:	nop;
+.else
 2:	\flushins [P0++];
-#endif
+.endif
 
 	RTS;
 .endm
@@ -77,25 +77,9 @@
  */
 	P0 = R0;
 	IFLUSH[P0];
-	do_flush IFLUSH, , nop
+	do_flush IFLUSH
 ENDPROC(_blackfin_icache_flush_range)
 
-/* Flush all cache lines assocoiated with this area of memory. */
-ENTRY(_blackfin_icache_dcache_flush_range)
-/*
- * Walkaround to avoid loading wrong instruction after invalidating icache
- * and following sequence is met.
- *
- * 1) One instruction address is cached in the instruction cache.
- * 2) This instruction in SDRAM is changed.
- * 3) IFLASH[P0] is executed only once in blackfin_icache_flush_range().
- * 4) This instruction is executed again, but the old one is loaded.
- */
-	P0 = R0;
-	IFLUSH[P0];
-	do_flush FLUSH, IFLUSH
-ENDPROC(_blackfin_icache_dcache_flush_range)
-
 /* Throw away all D-cached data in specified region without any obligation to
  * write them back.  Since the Blackfin ISA does not have an "invalidate"
  * instruction, we use flush/invalidate.  Perhaps as a speed optimization we
@@ -107,7 +91,7 @@
 
 /* Flush all data cache lines assocoiated with this memory area */
 ENTRY(_blackfin_dcache_flush_range)
-	do_flush FLUSH, , , .Ldfr
+	do_flush FLUSH, .Ldfr
 ENDPROC(_blackfin_dcache_flush_range)
 
 /* Our headers convert the page structure to an address, so just need to flush
diff --git a/arch/blackfin/mach-common/clocks-init.c b/arch/blackfin/mach-common/clocks-init.c
index 3539365..ef6870e 100644
--- a/arch/blackfin/mach-common/clocks-init.c
+++ b/arch/blackfin/mach-common/clocks-init.c
@@ -72,6 +72,7 @@
 #endif
 	bfin_write_PLL_LOCKCNT(0x300);
 	do_sync();
+	/* We always write PLL_CTL thus avoiding Anomaly 05000242 */
 	bfin_write16(PLL_CTL, PLL_CTL_VAL);
 	__asm__ __volatile__("IDLE;");
 	bfin_write_PLL_DIV(CONFIG_CCLK_ACT_DIV | CONFIG_SCLK_DIV);
diff --git a/arch/blackfin/mach-common/cpufreq.c b/arch/blackfin/mach-common/cpufreq.c
index 72e16605..70e3411 100644
--- a/arch/blackfin/mach-common/cpufreq.c
+++ b/arch/blackfin/mach-common/cpufreq.c
@@ -140,7 +140,8 @@
 	cclk = get_cclk() / 1000;
 	sclk = get_sclk() / 1000;
 
-#if ANOMALY_05000273 || (!defined(CONFIG_BF54x) && defined(CONFIG_BFIN_DCACHE))
+#if ANOMALY_05000273 || ANOMALY_05000274 || \
+	(!defined(CONFIG_BF54x) && defined(CONFIG_BFIN_DCACHE))
 	min_cclk = sclk * 2;
 #else
 	min_cclk = sclk;
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index a063a43..da0558a 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -36,7 +36,6 @@
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <linux/unistd.h>
-#include <linux/threads.h>
 #include <asm/blackfin.h>
 #include <asm/errno.h>
 #include <asm/fixed_code.h>
@@ -201,7 +200,18 @@
 	cc = r7 == 0;
 	if !cc jump 1f;
 #endif
-
+#ifdef CONFIG_EXACT_HWERR
+	/* Read the ILAT, and to check to see if the process we are
+	 * single stepping caused a previous hardware error
+	 * If so, do not single step, (which lowers to IRQ5, and makes
+	 * us miss the error).
+	 */
+	p5.l = lo(ILAT);
+	p5.h = hi(ILAT);
+	r7 = [p5];
+	cc = bittst(r7, EVT_IVHW_P);
+	if cc jump 1f;
+#endif
 	/* Single stepping only a single instruction, so clear the trace
 	 * bit here.  */
 	r7 = syscfg;
@@ -263,15 +273,6 @@
 	r6 = 0x25;
 	CC = R7 == R6;
 	if CC JUMP _double_fault;
-
-	/* Did we cause a HW error? */
-	p5.l = lo(ILAT);
-	p5.h = hi(ILAT);
-	r6 = [p5];
-	r7 = 0x20;		/* Did I just cause anther HW error? */
-	r6 = r7 & r6;
-	CC = R7 == R6;
-	if CC JUMP _double_fault;
 #endif
 
 	(R7:6,P5:4) = [sp++];
@@ -473,6 +474,16 @@
 	[--sp] = ASTAT;
 	[--sp] = (R7:6,P5:4);
 
+#ifdef CONFIG_EXACT_HWERR
+	/* Make sure all pending read/writes complete. This will ensure any
+	 * accesses which could cause hardware errors completes, and signal
+	 * the the hardware before we do something silly, like crash the
+	 * kernel. We don't need to work around anomaly 05000312, since
+	 * we are already atomic
+	 */
+	ssync;
+#endif
+
 #if ANOMALY_05000283 || ANOMALY_05000315
 	cc = r7 == r7;
 	p5.h = HI(CHIPID);
@@ -855,7 +866,7 @@
 	p1.h = _schedule_and_signal;
 	[p0] = p1;
 	csync;
-	raise 15;		/* raise evt14 to do signal or reschedule */
+	raise 15;		/* raise evt15 to do signal or reschedule */
 4:
 	r0 = syscfg;
 	bitclr(r0, 0);
@@ -916,7 +927,7 @@
 	p1.h = _schedule_and_signal_from_int;
 	[p0] = p1;
 	csync;
-#if ANOMALY_05000281
+#if ANOMALY_05000281 || ANOMALY_05000461
 	r0.l = lo(SAFE_USER_INSTRUCTION);
 	r0.h = hi(SAFE_USER_INSTRUCTION);
 	reti = r0;
@@ -930,18 +941,27 @@
 ENDPROC(_return_from_int)
 
 ENTRY(_lower_to_irq14)
-#if ANOMALY_05000281
+#if ANOMALY_05000281 || ANOMALY_05000461
 	r0.l = lo(SAFE_USER_INSTRUCTION);
 	r0.h = hi(SAFE_USER_INSTRUCTION);
 	reti = r0;
 #endif
-	r0 = 0x401f;
+
+#ifdef CONFIG_DEBUG_HWERR
+	/* enable irq14 & hwerr interrupt, until we transition to _evt14_softirq */
+	r0 = (EVT_IVG14 | EVT_IVHW | EVT_IRPTEN | EVT_EVX | EVT_NMI | EVT_RST | EVT_EMU);
+#else
+	/* Only enable irq14 interrupt, until we transition to _evt14_softirq */
+	r0 = (EVT_IVG14 | EVT_IRPTEN | EVT_EVX | EVT_NMI | EVT_RST | EVT_EMU);
+#endif
 	sti r0;
 	raise 14;
 	rti;
+ENDPROC(_lower_to_irq14)
+
 ENTRY(_evt14_softirq)
 #ifdef CONFIG_DEBUG_HWERR
-	r0 = 0x3f;
+	r0 = (EVT_IVHW | EVT_IRPTEN | EVT_EVX | EVT_NMI | EVT_RST | EVT_EMU);
 	sti r0;
 #else
 	cli r0;
@@ -949,8 +969,9 @@
 	[--sp] = RETI;
 	SP += 4;
 	rts;
+ENDPROC(_evt14_softirq)
 
-_schedule_and_signal_from_int:
+ENTRY(_schedule_and_signal_from_int)
 	/* To end up here, vector 15 was changed - so we have to change it
 	 * back.
 	 */
@@ -983,8 +1004,9 @@
 	call _finish_atomic_sections;
 	sp += 12;
 	jump.s .Lresume_userspace;
+ENDPROC(_schedule_and_signal_from_int)
 
-_schedule_and_signal:
+ENTRY(_schedule_and_signal)
 	SAVE_CONTEXT_SYSCALL
 	/* To end up here, vector 15 was changed - so we have to change it
 	 * back.
@@ -1002,7 +1024,7 @@
 1:
 	RESTORE_CONTEXT
 	rti;
-ENDPROC(_lower_to_irq14)
+ENDPROC(_schedule_and_signal)
 
 /* We handle this 100% in exception space - to reduce overhead
  * Only potiential problem is if the software buffer gets swapped out of the
@@ -1588,19 +1610,3 @@
 	.long _sys_ni_syscall
 	.endr
 END(_sys_call_table)
-
-#ifdef CONFIG_EXCEPTION_L1_SCRATCH
-/* .section .l1.bss.scratch */
-.set _exception_stack_top, L1_SCRATCH_START + L1_SCRATCH_LENGTH
-#else
-#ifdef CONFIG_SYSCALL_TAB_L1
-.section .l1.bss
-#else
-.bss
-#endif
-ENTRY(_exception_stack)
-	.rept 1024 * NR_CPUS
-	.long 0
-	.endr
-_exception_stack_top:
-#endif
diff --git a/arch/blackfin/mach-common/head.S b/arch/blackfin/mach-common/head.S
index 698d4c0..f826f6b 100644
--- a/arch/blackfin/mach-common/head.S
+++ b/arch/blackfin/mach-common/head.S
@@ -30,8 +30,6 @@
 	rts;
 ENDPROC(__init_clear_bss)
 
-#define INITIAL_STACK	(L1_SCRATCH_START + L1_SCRATCH_LENGTH - 12)
-
 ENTRY(__start)
 	/* R0: argument of command line string, passed from uboot, save it */
 	R7 = R0;
@@ -126,30 +124,30 @@
 	 * below
 	 */
 	GET_PDA(p0, r0);
-	r7 = [p0 + PDA_RETX];
+	r6 = [p0 + PDA_RETX];
 	p1.l = _init_saved_retx;
 	p1.h = _init_saved_retx;
-	[p1] = r7;
+	[p1] = r6;
 
-	r7 = [p0 + PDA_DCPLB];
+	r6 = [p0 + PDA_DCPLB];
 	p1.l = _init_saved_dcplb_fault_addr;
 	p1.h = _init_saved_dcplb_fault_addr;
-	[p1] = r7;
+	[p1] = r6;
 
-	r7 = [p0 + PDA_ICPLB];
+	r6 = [p0 + PDA_ICPLB];
 	p1.l = _init_saved_icplb_fault_addr;
 	p1.h = _init_saved_icplb_fault_addr;
-	[p1] = r7;
+	[p1] = r6;
 
-	r7 = [p0 + PDA_SEQSTAT];
+	r6 = [p0 + PDA_SEQSTAT];
 	p1.l = _init_saved_seqstat;
 	p1.h = _init_saved_seqstat;
-	[p1] = r7;
+	[p1] = r6;
 #endif
 
 	/* Initialize stack pointer */
-	sp.l = lo(INITIAL_STACK);
-	sp.h = hi(INITIAL_STACK);
+	sp.l = _init_thread_union;
+	sp.h = _init_thread_union;
 	fp = sp;
 	usp = sp;
 
@@ -189,7 +187,15 @@
 	/* Put The Code for PLL Programming and SDRAM Programming in L1 ISRAM */
 	call _bfin_relocate_l1_mem;
 #ifdef CONFIG_BFIN_KERNEL_CLOCK
+	/* Only use on-chip scratch space for stack when absolutely required
+	 * to avoid Anomaly 05000227 ... we know the init_clocks() func only
+	 * uses L1 text and stack space and no other memory region.
+	 */
+# define KERNEL_CLOCK_STACK (L1_SCRATCH_START + L1_SCRATCH_LENGTH - 12)
+	sp.l = lo(KERNEL_CLOCK_STACK);
+	sp.h = hi(KERNEL_CLOCK_STACK);
 	call _init_clocks;
+	sp = usp;	/* usp hasnt been touched, so restore from there */
 #endif
 
 	/* This section keeps the processor in supervisor mode
@@ -243,9 +249,7 @@
 	call _cmdline_init;
 
 	/* Load the current thread pointer and stack */
-	sp.l = _init_thread_union;
-	sp.h = _init_thread_union;
-	p1 = THREAD_SIZE (z);
+	p1 = THREAD_SIZE + 4 (z);	/* +4 is for reti loading */
 	sp = sp + p1;
 	usp = sp;
 	fp = sp;
diff --git a/arch/blackfin/mach-common/interrupt.S b/arch/blackfin/mach-common/interrupt.S
index 0069c2d..9c46680 100644
--- a/arch/blackfin/mach-common/interrupt.S
+++ b/arch/blackfin/mach-common/interrupt.S
@@ -145,6 +145,14 @@
 
 /* interrupt routine for ivhw - 5 */
 ENTRY(_evt_ivhw)
+	/* In case a single action kicks off multiple memory transactions, (like
+	 * a cache line fetch, - this can cause multiple hardware errors, let's
+	 * catch them all. First - make sure all the actions are complete, and
+	 * the core sees the hardware errors.
+	 */
+	SSYNC;
+	SSYNC;
+
 	SAVE_ALL_SYS
 #ifdef CONFIG_FRAME_POINTER
 	fp = 0;
@@ -159,6 +167,25 @@
 1:
 #endif
 
+	/* Handle all stacked hardware errors
+	 * To make sure we don't hang forever, only do it 10 times
+	 */
+	R0 = 0;
+	R2 = 10;
+1:
+	P0.L = LO(ILAT);
+	P0.H = HI(ILAT);
+	R1 = [P0];
+	CC = BITTST(R1, EVT_IVHW_P);
+	IF ! CC JUMP 2f;
+	/* OK a hardware error is pending - clear it */
+	R1 = EVT_IVHW_P;
+	[P0] = R1;
+	R0 += 1;
+	CC = R1 == R2;
+	if CC JUMP 2f;
+	JUMP 1b;
+2:
 	# We are going to dump something out, so make sure we print IPEND properly
 	p2.l = lo(IPEND);
 	p2.h = hi(IPEND);
diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c
index a7d7b2d..351afd0 100644
--- a/arch/blackfin/mach-common/ints-priority.c
+++ b/arch/blackfin/mach-common/ints-priority.c
@@ -1052,7 +1052,7 @@
 			set_irq_chained_handler(irq, bfin_demux_error_irq);
 			break;
 #endif
-#if defined(CONFIG_TICK_SOURCE_SYSTMR0) || defined(CONFIG_IPIPE)
+#if defined(CONFIG_TICKSOURCE_GPTMR0)
 		case IRQ_TIMER0:
 			set_irq_handler(irq, handle_percpu_irq);
 			break;
@@ -1116,6 +1116,9 @@
 	    IMASK_IVG14 | IMASK_IVG13 | IMASK_IVG12 | IMASK_IVG11 |
 	    IMASK_IVG10 | IMASK_IVG9 | IMASK_IVG8 | IMASK_IVG7 | IMASK_IVGHW;
 
+	/* This implicitly covers ANOMALY_05000171
+	 * Boot-ROM code modifies SICA_IWRx wakeup registers
+	 */
 #ifdef SIC_IWR0
 	bfin_write_SIC_IWR0(IWR_DISABLE_ALL);
 # ifdef SIC_IWR1
@@ -1136,13 +1139,6 @@
 	bfin_write_SIC_IWR(IWR_DISABLE_ALL);
 #endif
 
-#ifdef CONFIG_IPIPE
-	for (irq = 0; irq < NR_IRQS; irq++) {
-		struct irq_desc *desc = irq_to_desc(irq);
-		desc->ic_prio = __ipipe_get_irq_priority(irq);
-	}
-#endif /* CONFIG_IPIPE */
-
 	return 0;
 }
 
@@ -1156,23 +1152,22 @@
 	} else {
 		struct ivgx *ivg = ivg7_13[vec - IVG7].ifirst;
 		struct ivgx *ivg_stop = ivg7_13[vec - IVG7].istop;
-#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) \
-	|| defined(BF538_FAMILY) || defined(CONFIG_BF51x)
+#if defined(SIC_ISR0) || defined(SICA_ISR0)
 		unsigned long sic_status[3];
 
 		if (smp_processor_id()) {
-#ifdef CONFIG_SMP
+# ifdef SICB_ISR0
 			/* This will be optimized out in UP mode. */
 			sic_status[0] = bfin_read_SICB_ISR0() & bfin_read_SICB_IMASK0();
 			sic_status[1] = bfin_read_SICB_ISR1() & bfin_read_SICB_IMASK1();
-#endif
+# endif
 		} else {
 			sic_status[0] = bfin_read_SIC_ISR0() & bfin_read_SIC_IMASK0();
 			sic_status[1] = bfin_read_SIC_ISR1() & bfin_read_SIC_IMASK1();
 		}
-#ifdef CONFIG_BF54x
+# ifdef SIC_ISR2
 		sic_status[2] = bfin_read_SIC_ISR2() & bfin_read_SIC_IMASK2();
-#endif
+# endif
 		for (;; ivg++) {
 			if (ivg >= ivg_stop) {
 				atomic_inc(&num_spurious);
@@ -1236,20 +1231,16 @@
 
 	if (likely(vec == EVT_IVTMR_P)) {
 		irq = IRQ_CORETMR;
-		goto core_tick;
-	}
 
-	SSYNC();
-
-#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561)
-	{
+	} else {
+#if defined(SIC_ISR0) || defined(SICA_ISR0)
 		unsigned long sic_status[3];
 
 		sic_status[0] = bfin_read_SIC_ISR0() & bfin_read_SIC_IMASK0();
 		sic_status[1] = bfin_read_SIC_ISR1() & bfin_read_SIC_IMASK1();
-#ifdef CONFIG_BF54x
+# ifdef SIC_ISR2
 		sic_status[2] = bfin_read_SIC_ISR2() & bfin_read_SIC_IMASK2();
-#endif
+# endif
 		for (;; ivg++) {
 			if (ivg >= ivg_stop) {
 				atomic_inc(&num_spurious);
@@ -1258,9 +1249,7 @@
 			if (sic_status[(ivg->irqno - IVG7) / 32] & ivg->isrflag)
 				break;
 		}
-	}
 #else
-	{
 		unsigned long sic_status;
 
 		sic_status = bfin_read_SIC_IMASK() & bfin_read_SIC_ISR();
@@ -1272,15 +1261,13 @@
 			} else if (sic_status & ivg->isrflag)
 				break;
 		}
-	}
 #endif
 
-	irq = ivg->irqno;
+		irq = ivg->irqno;
+	}
 
 	if (irq == IRQ_SYSTMR) {
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
-core_tick:
-#else
+#ifndef CONFIG_GENERIC_CLOCKEVENTS
 		bfin_write_TIMER_STATUS(1); /* Latch TIMIL0 */
 #endif
 		/* This is basically what we need from the register frame. */
@@ -1292,9 +1279,6 @@
 			__raw_get_cpu_var(__ipipe_tick_regs).ipend |= 0x10;
 	}
 
-#ifndef CONFIG_GENERIC_CLOCKEVENTS
-core_tick:
-#endif
 	if (this_domain == ipipe_root_domain) {
 		s = __test_and_set_bit(IPIPE_SYNCDEFER_FLAG, &p->status);
 		barrier();
@@ -1312,7 +1296,7 @@
 		}
 	}
 
-       return 0;
+	return 0;
 }
 
 #endif /* CONFIG_IPIPE */
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 93eab61..3b8ebae 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -43,8 +43,13 @@
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/cpu.h>
+#include <asm/time.h>
 #include <linux/err.h>
 
+/*
+ * Anomaly notes:
+ * 05000120 - we always define corelock as 32-bit integer in L2
+ */
 struct corelock_slot corelock __attribute__ ((__section__(".l2.bss")));
 
 void __cpuinitdata *init_retx_coreb, *init_saved_retx_coreb,
@@ -352,7 +357,7 @@
 
 static void __cpuinit setup_secondary(unsigned int cpu)
 {
-#if !(defined(CONFIG_TICK_SOURCE_SYSTMR0) || defined(CONFIG_IPIPE))
+#if !defined(CONFIG_TICKSOURCE_GPTMR0)
 	struct irq_desc *timer_desc;
 #endif
 	unsigned long ilat;
@@ -364,16 +369,13 @@
 	bfin_write_ILAT(ilat);
 	CSYNC();
 
-	/* Reserve the PDA space for the secondary CPU. */
-	reserve_pda();
-
 	/* Enable interrupt levels IVG7-15. IARs have been already
 	 * programmed by the boot CPU.  */
 	bfin_irq_flags |= IMASK_IVG15 |
 	    IMASK_IVG14 | IMASK_IVG13 | IMASK_IVG12 | IMASK_IVG11 |
 	    IMASK_IVG10 | IMASK_IVG9 | IMASK_IVG8 | IMASK_IVG7 | IMASK_IVGHW;
 
-#if defined(CONFIG_TICK_SOURCE_SYSTMR0) || defined(CONFIG_IPIPE)
+#if defined(CONFIG_TICKSOURCE_GPTMR0)
 	/* Power down the core timer, just to play safe. */
 	bfin_write_TCNTL(0);
 
diff --git a/arch/blackfin/mm/blackfin_sram.h b/arch/blackfin/mm/blackfin_sram.h
index 8cb0945..bc00628 100644
--- a/arch/blackfin/mm/blackfin_sram.h
+++ b/arch/blackfin/mm/blackfin_sram.h
@@ -30,7 +30,6 @@
 #ifndef __BLACKFIN_SRAM_H__
 #define __BLACKFIN_SRAM_H__
 
-extern void bfin_sram_init(void);
 extern void *l1sram_alloc(size_t);
 
 #endif
diff --git a/arch/blackfin/mm/init.c b/arch/blackfin/mm/init.c
index 9c3629b..014a55a 100644
--- a/arch/blackfin/mm/init.c
+++ b/arch/blackfin/mm/init.c
@@ -52,9 +52,14 @@
 
 static unsigned long empty_bad_page;
 
-unsigned long empty_zero_page;
+static unsigned long empty_zero_page;
 
-extern unsigned long exception_stack[NR_CPUS][1024];
+#ifndef CONFIG_EXCEPTION_L1_SCRATCH
+#if defined CONFIG_SYSCALL_TAB_L1
+__attribute__((l1_data))
+#endif
+static unsigned long exception_stack[NR_CPUS][1024];
+#endif
 
 struct blackfin_pda cpu_pda[NR_CPUS];
 EXPORT_SYMBOL(cpu_pda);
@@ -117,19 +122,18 @@
 	cpu_pda[0].next = &cpu_pda[1];
 	cpu_pda[1].next = &cpu_pda[0];
 
+#ifdef CONFIG_EXCEPTION_L1_SCRATCH
+	cpu_pda[cpu].ex_stack = (unsigned long *)(L1_SCRATCH_START + \
+					L1_SCRATCH_LENGTH);
+#else
 	cpu_pda[cpu].ex_stack = exception_stack[cpu + 1];
+#endif
 
 #ifdef CONFIG_SMP
 	cpu_pda[cpu].imask = 0x1f;
 #endif
 }
 
-void __cpuinit reserve_pda(void)
-{
-	printk(KERN_INFO "PDA for CPU%u reserved at %p\n", smp_processor_id(),
-					&cpu_pda[smp_processor_id()]);
-}
-
 void __init mem_init(void)
 {
 	unsigned int codek = 0, datak = 0, initk = 0;
@@ -171,19 +175,6 @@
 		initk, codek, datak, DMA_UNCACHED_REGION >> 10, (reservedpages << (PAGE_SHIFT-10)));
 }
 
-static int __init sram_init(void)
-{
-	/* Initialize the blackfin L1 Memory. */
-	bfin_sram_init();
-
-	/* Reserve the PDA space for the boot CPU right after we
-	 * initialized the scratch memory allocator.
-	 */
-	reserve_pda();
-	return 0;
-}
-pure_initcall(sram_init);
-
 static void __init free_init_pages(const char *what, unsigned long begin, unsigned long end)
 {
 	unsigned long addr;
diff --git a/arch/blackfin/mm/isram-driver.c b/arch/blackfin/mm/isram-driver.c
index 22913e7..c080e70 100644
--- a/arch/blackfin/mm/isram-driver.c
+++ b/arch/blackfin/mm/isram-driver.c
@@ -125,7 +125,7 @@
 {
 	if ((addr >= (void *)L1_CODE_START) &&
 	    (addr < (void *)(L1_CODE_START + L1_CODE_LENGTH))) {
-		if ((addr + n) >= (void *)(L1_CODE_START + L1_CODE_LENGTH)) {
+		if ((addr + n) > (void *)(L1_CODE_START + L1_CODE_LENGTH)) {
 			show_stack(NULL, NULL);
 			printk(KERN_ERR "isram_memcpy: copy involving %p length "
 					"(%zu) too long\n", addr, n);
diff --git a/arch/blackfin/mm/sram-alloc.c b/arch/blackfin/mm/sram-alloc.c
index 530d139..0bc3c4e 100644
--- a/arch/blackfin/mm/sram-alloc.c
+++ b/arch/blackfin/mm/sram-alloc.c
@@ -83,6 +83,14 @@
 static void __init l1sram_init(void)
 {
 	unsigned int cpu;
+	unsigned long reserve;
+
+#ifdef CONFIG_SMP
+	reserve = 0;
+#else
+	reserve = sizeof(struct l1_scratch_task_info);
+#endif
+
 	for (cpu = 0; cpu < num_possible_cpus(); ++cpu) {
 		per_cpu(free_l1_ssram_head, cpu).next =
 			kmem_cache_alloc(sram_piece_cache, GFP_KERNEL);
@@ -91,8 +99,8 @@
 			return;
 		}
 
-		per_cpu(free_l1_ssram_head, cpu).next->paddr = (void *)get_l1_scratch_start_cpu(cpu);
-		per_cpu(free_l1_ssram_head, cpu).next->size = L1_SCRATCH_LENGTH;
+		per_cpu(free_l1_ssram_head, cpu).next->paddr = (void *)get_l1_scratch_start_cpu(cpu) + reserve;
+		per_cpu(free_l1_ssram_head, cpu).next->size = L1_SCRATCH_LENGTH - reserve;
 		per_cpu(free_l1_ssram_head, cpu).next->pid = 0;
 		per_cpu(free_l1_ssram_head, cpu).next->next = NULL;
 
@@ -223,7 +231,7 @@
 	spin_lock_init(&l2_sram_lock);
 }
 
-void __init bfin_sram_init(void)
+static int __init bfin_sram_init(void)
 {
 	sram_piece_cache = kmem_cache_create("sram_piece_cache",
 				sizeof(struct sram_piece),
@@ -233,7 +241,10 @@
 	l1_data_sram_init();
 	l1_inst_sram_init();
 	l2_sram_init();
+
+	return 0;
 }
+pure_initcall(bfin_sram_init);
 
 /* SRAM allocate function */
 static void *_sram_alloc(size_t size, struct sram_piece *pfree_head,
@@ -732,6 +743,10 @@
 }
 EXPORT_SYMBOL(sram_free_with_lsl);
 
+/* Allocate memory and keep in L1 SRAM List (lsl) so that the resources are
+ * tracked.  These are designed for userspace so that when a process exits,
+ * we can safely reap their resources.
+ */
 void *sram_alloc_with_lsl(size_t size, unsigned long flags)
 {
 	void *addr = NULL;
diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c
index df3925c..d70b445 100644
--- a/arch/cris/arch-v32/kernel/irq.c
+++ b/arch/cris/arch-v32/kernel/irq.c
@@ -325,12 +325,14 @@
 {
 }
 
-void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
+int set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&irq_lock, flags);
 	irq_allocations[irq - FIRST_IRQ].mask = *dest;
 	spin_unlock_irqrestore(&irq_lock, flags);
+
+	return 0;
 }
 
 static struct irq_chip crisv32_irq_type = {
diff --git a/arch/cris/kernel/module.c b/arch/cris/kernel/module.c
index a187833..abc13e3 100644
--- a/arch/cris/kernel/module.c
+++ b/arch/cris/kernel/module.c
@@ -48,8 +48,6 @@
 void module_free(struct module *mod, void *module_region)
 {
 	FREE_MODULE(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-	   table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 9d1552a..8a5bd7a 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -6,6 +6,7 @@
 	bool
 	default y
 	select HAVE_IDE
+	select HAVE_ARCH_TRACEHOOK
 
 config ZONE_DMA
 	bool
diff --git a/arch/frv/include/asm/bitops.h b/arch/frv/include/asm/bitops.h
index 287f6f6..50ae91b 100644
--- a/arch/frv/include/asm/bitops.h
+++ b/arch/frv/include/asm/bitops.h
@@ -112,7 +112,7 @@
 #define atomic_clear_mask(mask, v)	atomic_test_and_ANDNOT_mask((mask), (v))
 #define atomic_set_mask(mask, v)	atomic_test_and_OR_mask((mask), (v))
 
-static inline int test_and_clear_bit(int nr, volatile void *addr)
+static inline int test_and_clear_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *ptr = addr;
 	unsigned long mask = 1UL << (nr & 31);
@@ -120,7 +120,7 @@
 	return (atomic_test_and_ANDNOT_mask(mask, ptr) & mask) != 0;
 }
 
-static inline int test_and_set_bit(int nr, volatile void *addr)
+static inline int test_and_set_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *ptr = addr;
 	unsigned long mask = 1UL << (nr & 31);
@@ -128,7 +128,7 @@
 	return (atomic_test_and_OR_mask(mask, ptr) & mask) != 0;
 }
 
-static inline int test_and_change_bit(int nr, volatile void *addr)
+static inline int test_and_change_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *ptr = addr;
 	unsigned long mask = 1UL << (nr & 31);
@@ -136,22 +136,22 @@
 	return (atomic_test_and_XOR_mask(mask, ptr) & mask) != 0;
 }
 
-static inline void clear_bit(int nr, volatile void *addr)
+static inline void clear_bit(unsigned long nr, volatile void *addr)
 {
 	test_and_clear_bit(nr, addr);
 }
 
-static inline void set_bit(int nr, volatile void *addr)
+static inline void set_bit(unsigned long nr, volatile void *addr)
 {
 	test_and_set_bit(nr, addr);
 }
 
-static inline void change_bit(int nr, volatile void * addr)
+static inline void change_bit(unsigned long nr, volatile void *addr)
 {
 	test_and_change_bit(nr, addr);
 }
 
-static inline void __clear_bit(int nr, volatile void * addr)
+static inline void __clear_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask;
@@ -161,7 +161,7 @@
 	*a &= ~mask;
 }
 
-static inline void __set_bit(int nr, volatile void * addr)
+static inline void __set_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask;
@@ -171,7 +171,7 @@
 	*a |= mask;
 }
 
-static inline void __change_bit(int nr, volatile void *addr)
+static inline void __change_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask;
@@ -181,7 +181,7 @@
 	*a ^= mask;
 }
 
-static inline int __test_and_clear_bit(int nr, volatile void * addr)
+static inline int __test_and_clear_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask, retval;
@@ -193,7 +193,7 @@
 	return retval;
 }
 
-static inline int __test_and_set_bit(int nr, volatile void * addr)
+static inline int __test_and_set_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask, retval;
@@ -205,7 +205,7 @@
 	return retval;
 }
 
-static inline int __test_and_change_bit(int nr, volatile void * addr)
+static inline int __test_and_change_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask, retval;
@@ -220,12 +220,13 @@
 /*
  * This routine doesn't need to be atomic.
  */
-static inline int __constant_test_bit(int nr, const volatile void * addr)
+static inline int
+__constant_test_bit(unsigned long nr, const volatile void *addr)
 {
 	return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
 }
 
-static inline int __test_bit(int nr, const volatile void * addr)
+static inline int __test_bit(unsigned long nr, const volatile void *addr)
 {
 	int 	* a = (int *) addr;
 	int	mask;
diff --git a/arch/frv/include/asm/elf.h b/arch/frv/include/asm/elf.h
index 7279ec07d..7bbf6e4 100644
--- a/arch/frv/include/asm/elf.h
+++ b/arch/frv/include/asm/elf.h
@@ -116,6 +116,7 @@
 } while(0)
 
 #define USE_ELF_CORE_DUMP
+#define CORE_DUMP_USE_REGSET
 #define ELF_FDPIC_CORE_EFLAGS	EF_FRV_FDPIC
 #define ELF_EXEC_PAGESIZE	16384
 
diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h
index 585d9b4..cc685e6 100644
--- a/arch/frv/include/asm/pci.h
+++ b/arch/frv/include/asm/pci.h
@@ -87,8 +87,7 @@
 				       dma_addr_t dma_handle,
 				       size_t size, int direction)
 {
-	if (direction == PCI_DMA_NONE)
-                BUG();
+	BUG_ON(direction == PCI_DMA_NONE);
 
 	frv_cache_wback_inv((unsigned long)bus_to_virt(dma_handle),
 			    (unsigned long)bus_to_virt(dma_handle) + size);
@@ -105,9 +104,7 @@
 				   int nelems, int direction)
 {
 	int i;
-
-	if (direction == PCI_DMA_NONE)
-                BUG();
+	BUG_ON(direction == PCI_DMA_NONE);
 
 	for (i = 0; i < nelems; i++)
 		frv_cache_wback_inv(sg_dma_address(&sg[i]),
diff --git a/arch/frv/include/asm/ptrace.h b/arch/frv/include/asm/ptrace.h
index cf69340..a54b535 100644
--- a/arch/frv/include/asm/ptrace.h
+++ b/arch/frv/include/asm/ptrace.h
@@ -65,6 +65,8 @@
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
+struct task_struct;
+
 /*
  * we dedicate GR28 to keeping a pointer to the current exception frame
  * - gr28 is destroyed on entry to the kernel from userspace
@@ -73,11 +75,18 @@
 
 #define user_mode(regs)			(!((regs)->psr & PSR_S))
 #define instruction_pointer(regs)	((regs)->pc)
+#define user_stack_pointer(regs)	((regs)->sp)
 
 extern unsigned long user_stack(const struct pt_regs *);
 extern void show_regs(struct pt_regs *);
 #define profile_pc(regs) ((regs)->pc)
-#endif
+
+#define task_pt_regs(task) ((task)->thread.frame0)
+
+#define arch_has_single_step()	(1)
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
 
 #endif /* !__ASSEMBLY__ */
+#endif /* __KERNEL__ */
 #endif /* _ASM_PTRACE_H */
diff --git a/arch/frv/include/asm/syscall.h b/arch/frv/include/asm/syscall.h
new file mode 100644
index 0000000..70689eb
--- /dev/null
+++ b/arch/frv/include/asm/syscall.h
@@ -0,0 +1,123 @@
+/* syscall parameter access functions
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H
+
+#include <linux/err.h>
+#include <asm/ptrace.h>
+
+/*
+ * Get the system call number or -1
+ */
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	return regs->syscallno;
+}
+
+/*
+ * Restore the clobbered GR8 register
+ * (1st syscall arg was overwritten with syscall return or error)
+ */
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	regs->gr8 = regs->orig_gr8;
+}
+
+/*
+ * See if the syscall return value is an error, returning it if it is and 0 if
+ * not
+ */
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	return IS_ERR_VALUE(regs->gr8) ? regs->gr8 : 0;
+}
+
+/*
+ * Get the syscall return value
+ */
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->gr8;
+}
+
+/*
+ * Set the syscall return value
+ */
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	if (error)
+		regs->gr8 = -error;
+	else
+		regs->gr8 = val;
+}
+
+/*
+ * Retrieve the system call arguments
+ */
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	/*
+	 * Do this simply for now. If we need to start supporting
+	 * fetching arguments from arbitrary indices, this will need some
+	 * extra logic. Presently there are no in-tree users that depend
+	 * on this behaviour.
+	 */
+	BUG_ON(i);
+
+	/* Argument pattern is: GR8, GR9, GR10, GR11, GR12, GR13 */
+	switch (n) {
+	case 6: args[5] = regs->gr13;
+	case 5: args[4] = regs->gr12;
+	case 4: args[3] = regs->gr11;
+	case 3: args[2] = regs->gr10;
+	case 2: args[1] = regs->gr9;
+	case 1:	args[0] = regs->gr8;
+		break;
+	default:
+		BUG();
+	}
+}
+
+/*
+ * Alter the system call arguments
+ */
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	/* Same note as above applies */
+	BUG_ON(i);
+
+	switch (n) {
+	case 6: regs->gr13 = args[5];
+	case 5: regs->gr12 = args[4];
+	case 4: regs->gr11 = args[3];
+	case 3: regs->gr10 = args[2];
+	case 2: regs->gr9  = args[1];
+	case 1: regs->gr8  = args[0];
+		break;
+	default:
+		BUG();
+	}
+}
+
+#endif /* _ASM_SYSCALL_H */
diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h
index bb53ab7..e8a5ed7 100644
--- a/arch/frv/include/asm/thread_info.h
+++ b/arch/frv/include/asm/thread_info.h
@@ -109,20 +109,20 @@
  * - other flags in MSW
  */
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
-#define TIF_SIGPENDING		1	/* signal pending */
-#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
-#define TIF_SINGLESTEP		3	/* restore singlestep on return to user mode */
-#define TIF_IRET		4	/* return with iret */
+#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
+#define TIF_SIGPENDING		2	/* signal pending */
+#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
+#define TIF_SINGLESTEP		4	/* restore singlestep on return to user mode */
 #define TIF_RESTORE_SIGMASK	5	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		17	/* OOM killer killed process */
 #define TIF_FREEZE		18	/* freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
-#define _TIF_IRET		(1 << TIF_IRET)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_FREEZE		(1 << TIF_FREEZE)
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index 1da523b..356e0e3 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -886,7 +886,6 @@
 	bnc		icc0,#0,__syscall_badsys
 
 	ldi		@(gr15,#TI_FLAGS),gr4
-	ori		gr4,#_TIF_SYSCALL_TRACE,gr4
 	andicc		gr4,#_TIF_SYSCALL_TRACE,gr0,icc0
 	bne		icc0,#0,__syscall_trace_entry
 
@@ -1150,11 +1149,10 @@
 	# perform syscall entry tracing
 __syscall_trace_entry:
 	LEDS		0x6320
-	setlos.p	#0,gr8
-	call		do_syscall_trace
+	call		syscall_trace_entry
 
-	ldi		@(gr28,#REG_SYSCALLNO),gr7
-	lddi		@(gr28,#REG_GR(8)) ,gr8
+	lddi.p		@(gr28,#REG_GR(8)) ,gr8
+	ori		gr8,#0,gr7		; syscall_trace_entry() returned new syscallno
 	lddi		@(gr28,#REG_GR(10)),gr10
 	lddi.p		@(gr28,#REG_GR(12)),gr12
 
@@ -1169,11 +1167,10 @@
 	beq		icc0,#1,__entry_work_pending
 
 	movsg		psr,gr23
-	andi		gr23,#~PSR_PIL,gr23	; could let do_syscall_trace() call schedule()
+	andi		gr23,#~PSR_PIL,gr23	; could let syscall_trace_exit() call schedule()
 	movgs		gr23,psr
 
-	setlos.p	#1,gr8
-	call		do_syscall_trace
+	call		syscall_trace_exit
 	bra		__entry_resume_userspace
 
 __syscall_badsys:
diff --git a/arch/frv/kernel/module.c b/arch/frv/kernel/module.c
index 850d168..711763c 100644
--- a/arch/frv/kernel/module.c
+++ b/arch/frv/kernel/module.c
@@ -35,8 +35,6 @@
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c
index 5e7d401..60eeed3 100644
--- a/arch/frv/kernel/ptrace.c
+++ b/arch/frv/kernel/ptrace.c
@@ -19,6 +19,9 @@
 #include <linux/user.h>
 #include <linux/security.h>
 #include <linux/signal.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/tracehook.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -33,6 +36,169 @@
  */
 
 /*
+ * retrieve the contents of FRV userspace general registers
+ */
+static int genregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	const struct user_int_regs *iregs = &target->thread.user->i;
+	int ret;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  iregs, 0, sizeof(*iregs));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					sizeof(*iregs), -1);
+}
+
+/*
+ * update the contents of the FRV userspace general registers
+ */
+static int genregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	struct user_int_regs *iregs = &target->thread.user->i;
+	unsigned int offs_gr0, offs_gr1;
+	int ret;
+
+	/* not allowed to set PSR or __status */
+	if (pos < offsetof(struct user_int_regs, psr) + sizeof(long) &&
+	    pos + count > offsetof(struct user_int_regs, psr))
+		return -EIO;
+
+	if (pos < offsetof(struct user_int_regs, __status) + sizeof(long) &&
+	    pos + count > offsetof(struct user_int_regs, __status))
+		return -EIO;
+
+	/* set the control regs */
+	offs_gr0 = offsetof(struct user_int_regs, gr[0]);
+	offs_gr1 = offsetof(struct user_int_regs, gr[1]);
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 iregs, 0, offs_gr0);
+	if (ret < 0)
+		return ret;
+
+	/* skip GR0/TBR */
+	ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					offs_gr0, offs_gr1);
+	if (ret < 0)
+		return ret;
+
+	/* set the general regs */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &iregs->gr[1], offs_gr1, sizeof(*iregs));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					sizeof(*iregs), -1);
+}
+
+/*
+ * retrieve the contents of FRV userspace FP/Media registers
+ */
+static int fpmregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	const struct user_fpmedia_regs *fpregs = &target->thread.user->f;
+	int ret;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  fpregs, 0, sizeof(*fpregs));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					sizeof(*fpregs), -1);
+}
+
+/*
+ * update the contents of the FRV userspace FP/Media registers
+ */
+static int fpmregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	struct user_fpmedia_regs *fpregs = &target->thread.user->f;
+	int ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 fpregs, 0, sizeof(*fpregs));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					sizeof(*fpregs), -1);
+}
+
+/*
+ * determine if the FP/Media registers have actually been used
+ */
+static int fpmregs_active(struct task_struct *target,
+			  const struct user_regset *regset)
+{
+	return tsk_used_math(target) ? regset->n : 0;
+}
+
+/*
+ * Define the register sets available on the FRV under Linux
+ */
+enum frv_regset {
+	REGSET_GENERAL,
+	REGSET_FPMEDIA,
+};
+
+static const struct user_regset frv_regsets[] = {
+	/*
+	 * General register format is:
+	 *	PSR, ISR, CCR, CCCR, LR, LCR, PC, (STATUS), SYSCALLNO, ORIG_G8
+	 *	GNER0-1, IACC0, TBR, GR1-63
+	 */
+	[REGSET_GENERAL] = {
+		.core_note_type	= NT_PRSTATUS,
+		.n		= ELF_NGREG,
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= genregs_get,
+		.set		= genregs_set,
+	},
+	/*
+	 * FPU/Media register format is:
+	 *	FR0-63, FNER0-1, MSR0-1, ACC0-7, ACCG0-8, FSR
+	 */
+	[REGSET_FPMEDIA] = {
+		.core_note_type	= NT_PRFPREG,
+		.n		= sizeof(struct user_fpmedia_regs) / sizeof(long),
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= fpmregs_get,
+		.set		= fpmregs_set,
+		.active		= fpmregs_active,
+	},
+};
+
+static const struct user_regset_view user_frv_native_view = {
+	.name		= "frv",
+	.e_machine	= EM_FRV,
+	.regsets	= frv_regsets,
+	.n		= ARRAY_SIZE(frv_regsets),
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &user_frv_native_view;
+}
+
+/*
  * Get contents of register REGNO in task TASK.
  */
 static inline long get_reg(struct task_struct *task, int regno)
@@ -69,40 +235,23 @@
 }
 
 /*
- * check that an address falls within the bounds of the target process's memory
- * mappings
- */
-static inline int is_user_addr_valid(struct task_struct *child,
-				     unsigned long start, unsigned long len)
-{
-#ifdef CONFIG_MMU
-	if (start >= PAGE_OFFSET || len > PAGE_OFFSET - start)
-		return -EIO;
-	return 0;
-#else
-	struct vm_area_struct *vma;
-
-	vma = find_vma(child->mm, start);
-	if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
-		return 0;
-
-	return -EIO;
-#endif
-}
-
-/*
  * Called by kernel/ptrace.c when detaching..
  *
  * Control h/w single stepping
  */
-void ptrace_disable(struct task_struct *child)
+void user_enable_single_step(struct task_struct *child)
+{
+	child->thread.frame0->__status |= REG__STATUS_STEP;
+}
+
+void user_disable_single_step(struct task_struct *child)
 {
 	child->thread.frame0->__status &= ~REG__STATUS_STEP;
 }
 
-void ptrace_enable(struct task_struct *child)
+void ptrace_disable(struct task_struct *child)
 {
-	child->thread.frame0->__status |= REG__STATUS_STEP;
+	user_disable_single_step(child);
 }
 
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
@@ -111,15 +260,6 @@
 	int ret;
 
 	switch (request) {
-		/* when I and D space are separate, these will need to be fixed. */
-	case PTRACE_PEEKTEXT: /* read word at location addr. */
-	case PTRACE_PEEKDATA:
-		ret = -EIO;
-		if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0)
-			break;
-		ret = generic_ptrace_peekdata(child, addr, data);
-		break;
-
 		/* read the word at location addr in the USER area. */
 	case PTRACE_PEEKUSR: {
 		tmp = 0;
@@ -163,15 +303,6 @@
 		break;
 	}
 
-		/* when I and D space are separate, this will have to be fixed. */
-	case PTRACE_POKETEXT: /* write the word at location addr. */
-	case PTRACE_POKEDATA:
-		ret = -EIO;
-		if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0)
-			break;
-		ret = generic_ptrace_pokedata(child, addr, data);
-		break;
-
 	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
 		ret = -EIO;
 		if ((addr & 3) || addr < 0)
@@ -179,7 +310,7 @@
 
 		ret = 0;
 		switch (addr >> 2) {
-		case 0 ... PT__END-1:
+		case 0 ... PT__END - 1:
 			ret = put_reg(child, addr >> 2, data);
 			break;
 
@@ -189,95 +320,29 @@
 		}
 		break;
 
-	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
-	case PTRACE_CONT: /* restart after signal. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-		if (request == PTRACE_SYSCALL)
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		else
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		child->exit_code = data;
-		ptrace_disable(child);
-		wake_up_process(child);
-		ret = 0;
-		break;
+	case PTRACE_GETREGS:	/* Get all integer regs from the child. */
+		return copy_regset_to_user(child, &user_frv_native_view,
+					   REGSET_GENERAL,
+					   0, sizeof(child->thread.user->i),
+					   (void __user *)data);
 
-		/* make the child exit.  Best I can do is send it a sigkill.
-		 * perhaps it should be put in the status that it wants to
-		 * exit.
-		 */
-	case PTRACE_KILL:
-		ret = 0;
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			break;
-		child->exit_code = SIGKILL;
-		clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-		ptrace_disable(child);
-		wake_up_process(child);
-		break;
+	case PTRACE_SETREGS:	/* Set all integer regs in the child. */
+		return copy_regset_from_user(child, &user_frv_native_view,
+					     REGSET_GENERAL,
+					     0, sizeof(child->thread.user->i),
+					     (const void __user *)data);
 
-	case PTRACE_SINGLESTEP:  /* set the trap flag. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		ptrace_enable(child);
-		child->exit_code = data;
-		wake_up_process(child);
-		ret = 0;
-		break;
+	case PTRACE_GETFPREGS:	/* Get the child FP/Media state. */
+		return copy_regset_to_user(child, &user_frv_native_view,
+					   REGSET_FPMEDIA,
+					   0, sizeof(child->thread.user->f),
+					   (void __user *)data);
 
-	case PTRACE_DETACH:	/* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
-		break;
-
-	case PTRACE_GETREGS: { /* Get all integer regs from the child. */
-		int i;
-		for (i = 0; i < PT__GPEND; i++) {
-			tmp = get_reg(child, i);
-			if (put_user(tmp, (unsigned long *) data)) {
-				ret = -EFAULT;
-				break;
-			}
-			data += sizeof(long);
-		}
-		ret = 0;
-		break;
-	}
-
-	case PTRACE_SETREGS: { /* Set all integer regs in the child. */
-		int i;
-		for (i = 0; i < PT__GPEND; i++) {
-			if (get_user(tmp, (unsigned long *) data)) {
-				ret = -EFAULT;
-				break;
-			}
-			put_reg(child, i, tmp);
-			data += sizeof(long);
-		}
-		ret = 0;
-		break;
-	}
-
-	case PTRACE_GETFPREGS: { /* Get the child FP/Media state. */
-		ret = 0;
-		if (copy_to_user((void *) data,
-				 &child->thread.user->f,
-				 sizeof(child->thread.user->f)))
-			ret = -EFAULT;
-		break;
-	}
-
-	case PTRACE_SETFPREGS: { /* Set the child FP/Media state. */
-		ret = 0;
-		if (copy_from_user(&child->thread.user->f,
-				   (void *) data,
-				   sizeof(child->thread.user->f)))
-			ret = -EFAULT;
-		break;
-	}
+	case PTRACE_SETFPREGS:	/* Set the child FP/Media state. */
+		return copy_regset_from_user(child, &user_frv_native_view,
+					     REGSET_FPMEDIA,
+					     0, sizeof(child->thread.user->f),
+					     (const void __user *)data);
 
 	case PTRACE_GETFDPIC:
 		tmp = 0;
@@ -300,414 +365,36 @@
 		break;
 
 	default:
-		ret = -EIO;
+		ret = ptrace_request(child, request, addr, data);
 		break;
 	}
 	return ret;
 }
 
-int __nongprelbss kstrace;
-
-static const struct {
-	const char	*name;
-	unsigned	argmask;
-} __syscall_name_table[NR_syscalls] = {
-	[0]	= { "restart_syscall"			},
-	[1]	= { "exit",		0x000001	},
-	[2]	= { "fork",		0xffffff	},
-	[3]	= { "read",		0x000141	},
-	[4]	= { "write",		0x000141	},
-	[5]	= { "open",		0x000235	},
-	[6]	= { "close",		0x000001	},
-	[7]	= { "waitpid",		0x000141	},
-	[8]	= { "creat",		0x000025	},
-	[9]	= { "link",		0x000055	},
-	[10]	= { "unlink",		0x000005	},
-	[11]	= { "execve",		0x000445	},
-	[12]	= { "chdir",		0x000005	},
-	[13]	= { "time",		0x000004	},
-	[14]	= { "mknod",		0x000325	},
-	[15]	= { "chmod",		0x000025	},
-	[16]	= { "lchown",		0x000025	},
-	[17]	= { "break" },
-	[18]	= { "oldstat",		0x000045	},
-	[19]	= { "lseek",		0x000131	},
-	[20]	= { "getpid",		0xffffff	},
-	[21]	= { "mount",		0x043555	},
-	[22]	= { "umount",		0x000005	},
-	[23]	= { "setuid",		0x000001	},
-	[24]	= { "getuid",		0xffffff	},
-	[25]	= { "stime",		0x000004	},
-	[26]	= { "ptrace",		0x004413	},
-	[27]	= { "alarm",		0x000001	},
-	[28]	= { "oldfstat",		0x000041	},
-	[29]	= { "pause",		0xffffff	},
-	[30]	= { "utime",		0x000045	},
-	[31]	= { "stty" },
-	[32]	= { "gtty" },
-	[33]	= { "access",		0x000025	},
-	[34]	= { "nice",		0x000001	},
-	[35]	= { "ftime" },
-	[36]	= { "sync",		0xffffff	},
-	[37]	= { "kill",		0x000011	},
-	[38]	= { "rename",		0x000055	},
-	[39]	= { "mkdir",		0x000025	},
-	[40]	= { "rmdir",		0x000005	},
-	[41]	= { "dup",		0x000001	},
-	[42]	= { "pipe",		0x000004	},
-	[43]	= { "times",		0x000004	},
-	[44]	= { "prof" },
-	[45]	= { "brk",		0x000004	},
-	[46]	= { "setgid",		0x000001	},
-	[47]	= { "getgid",		0xffffff	},
-	[48]	= { "signal",		0x000041	},
-	[49]	= { "geteuid",		0xffffff	},
-	[50]	= { "getegid",		0xffffff	},
-	[51]	= { "acct",		0x000005	},
-	[52]	= { "umount2",		0x000035	},
-	[53]	= { "lock" },
-	[54]	= { "ioctl",		0x000331	},
-	[55]	= { "fcntl",		0x000331	},
-	[56]	= { "mpx" },
-	[57]	= { "setpgid",		0x000011	},
-	[58]	= { "ulimit" },
-	[60]	= { "umask",		0x000002	},
-	[61]	= { "chroot",		0x000005	},
-	[62]	= { "ustat",		0x000043	},
-	[63]	= { "dup2",		0x000011	},
-	[64]	= { "getppid",		0xffffff	},
-	[65]	= { "getpgrp",		0xffffff	},
-	[66]	= { "setsid",		0xffffff	},
-	[67]	= { "sigaction" },
-	[68]	= { "sgetmask" },
-	[69]	= { "ssetmask" },
-	[70]	= { "setreuid" },
-	[71]	= { "setregid" },
-	[72]	= { "sigsuspend" },
-	[73]	= { "sigpending" },
-	[74]	= { "sethostname" },
-	[75]	= { "setrlimit" },
-	[76]	= { "getrlimit" },
-	[77]	= { "getrusage" },
-	[78]	= { "gettimeofday" },
-	[79]	= { "settimeofday" },
-	[80]	= { "getgroups" },
-	[81]	= { "setgroups" },
-	[82]	= { "select" },
-	[83]	= { "symlink" },
-	[84]	= { "oldlstat" },
-	[85]	= { "readlink" },
-	[86]	= { "uselib" },
-	[87]	= { "swapon" },
-	[88]	= { "reboot" },
-	[89]	= { "readdir" },
-	[91]	= { "munmap",		0x000034	},
-	[92]	= { "truncate" },
-	[93]	= { "ftruncate" },
-	[94]	= { "fchmod" },
-	[95]	= { "fchown" },
-	[96]	= { "getpriority" },
-	[97]	= { "setpriority" },
-	[99]	= { "statfs" },
-	[100]	= { "fstatfs" },
-	[102]	= { "socketcall" },
-	[103]	= { "syslog" },
-	[104]	= { "setitimer" },
-	[105]	= { "getitimer" },
-	[106]	= { "stat" },
-	[107]	= { "lstat" },
-	[108]	= { "fstat" },
-	[111]	= { "vhangup" },
-	[114]	= { "wait4" },
-	[115]	= { "swapoff" },
-	[116]	= { "sysinfo" },
-	[117]	= { "ipc" },
-	[118]	= { "fsync" },
-	[119]	= { "sigreturn" },
-	[120]	= { "clone" },
-	[121]	= { "setdomainname" },
-	[122]	= { "uname" },
-	[123]	= { "modify_ldt" },
-	[123]	= { "cacheflush" },
-	[124]	= { "adjtimex" },
-	[125]	= { "mprotect" },
-	[126]	= { "sigprocmask" },
-	[127]	= { "create_module" },
-	[128]	= { "init_module" },
-	[129]	= { "delete_module" },
-	[130]	= { "get_kernel_syms" },
-	[131]	= { "quotactl" },
-	[132]	= { "getpgid" },
-	[133]	= { "fchdir" },
-	[134]	= { "bdflush" },
-	[135]	= { "sysfs" },
-	[136]	= { "personality" },
-	[137]	= { "afs_syscall" },
-	[138]	= { "setfsuid" },
-	[139]	= { "setfsgid" },
-	[140]	= { "_llseek",			0x014331	},
-	[141]	= { "getdents" },
-	[142]	= { "_newselect",		0x000141	},
-	[143]	= { "flock" },
-	[144]	= { "msync" },
-	[145]	= { "readv" },
-	[146]	= { "writev" },
-	[147]	= { "getsid",			0x000001	},
-	[148]	= { "fdatasync",		0x000001	},
-	[149]	= { "_sysctl",			0x000004	},
-	[150]	= { "mlock" },
-	[151]	= { "munlock" },
-	[152]	= { "mlockall" },
-	[153]	= { "munlockall" },
-	[154]	= { "sched_setparam" },
-	[155]	= { "sched_getparam" },
-	[156]	= { "sched_setscheduler" },
-	[157]	= { "sched_getscheduler" },
-	[158]	= { "sched_yield" },
-	[159]	= { "sched_get_priority_max" },
-	[160]	= { "sched_get_priority_min" },
-	[161]	= { "sched_rr_get_interval" },
-	[162]	= { "nanosleep",		0x000044	},
-	[163]	= { "mremap" },
-	[164]	= { "setresuid" },
-	[165]	= { "getresuid" },
-	[166]	= { "vm86" },
-	[167]	= { "query_module" },
-	[168]	= { "poll" },
-	[169]	= { "nfsservctl" },
-	[170]	= { "setresgid" },
-	[171]	= { "getresgid" },
-	[172]	= { "prctl",			0x333331	},
-	[173]	= { "rt_sigreturn",		0xffffff	},
-	[174]	= { "rt_sigaction",		0x001441	},
-	[175]	= { "rt_sigprocmask",		0x001441	},
-	[176]	= { "rt_sigpending",		0x000014	},
-	[177]	= { "rt_sigtimedwait",		0x001444	},
-	[178]	= { "rt_sigqueueinfo",		0x000411	},
-	[179]	= { "rt_sigsuspend",		0x000014	},
-	[180]	= { "pread",			0x003341	},
-	[181]	= { "pwrite",			0x003341	},
-	[182]	= { "chown",			0x000115	},
-	[183]	= { "getcwd" },
-	[184]	= { "capget" },
-	[185]	= { "capset" },
-	[186]	= { "sigaltstack" },
-	[187]	= { "sendfile" },
-	[188]	= { "getpmsg" },
-	[189]	= { "putpmsg" },
-	[190]	= { "vfork",			0xffffff	},
-	[191]	= { "ugetrlimit" },
-	[192]	= { "mmap2",			0x313314	},
-	[193]	= { "truncate64" },
-	[194]	= { "ftruncate64" },
-	[195]	= { "stat64",			0x000045	},
-	[196]	= { "lstat64",			0x000045	},
-	[197]	= { "fstat64",			0x000041	},
-	[198]	= { "lchown32" },
-	[199]	= { "getuid32",			0xffffff	},
-	[200]	= { "getgid32",			0xffffff	},
-	[201]	= { "geteuid32",		0xffffff	},
-	[202]	= { "getegid32",		0xffffff	},
-	[203]	= { "setreuid32" },
-	[204]	= { "setregid32" },
-	[205]	= { "getgroups32" },
-	[206]	= { "setgroups32" },
-	[207]	= { "fchown32" },
-	[208]	= { "setresuid32" },
-	[209]	= { "getresuid32" },
-	[210]	= { "setresgid32" },
-	[211]	= { "getresgid32" },
-	[212]	= { "chown32" },
-	[213]	= { "setuid32" },
-	[214]	= { "setgid32" },
-	[215]	= { "setfsuid32" },
-	[216]	= { "setfsgid32" },
-	[217]	= { "pivot_root" },
-	[218]	= { "mincore" },
-	[219]	= { "madvise" },
-	[220]	= { "getdents64" },
-	[221]	= { "fcntl64" },
-	[223]	= { "security" },
-	[224]	= { "gettid" },
-	[225]	= { "readahead" },
-	[226]	= { "setxattr" },
-	[227]	= { "lsetxattr" },
-	[228]	= { "fsetxattr" },
-	[229]	= { "getxattr" },
-	[230]	= { "lgetxattr" },
-	[231]	= { "fgetxattr" },
-	[232]	= { "listxattr" },
-	[233]	= { "llistxattr" },
-	[234]	= { "flistxattr" },
-	[235]	= { "removexattr" },
-	[236]	= { "lremovexattr" },
-	[237]	= { "fremovexattr" },
-	[238]	= { "tkill" },
-	[239]	= { "sendfile64" },
-	[240]	= { "futex" },
-	[241]	= { "sched_setaffinity" },
-	[242]	= { "sched_getaffinity" },
-	[243]	= { "set_thread_area" },
-	[244]	= { "get_thread_area" },
-	[245]	= { "io_setup" },
-	[246]	= { "io_destroy" },
-	[247]	= { "io_getevents" },
-	[248]	= { "io_submit" },
-	[249]	= { "io_cancel" },
-	[250]	= { "fadvise64" },
-	[252]	= { "exit_group",		0x000001	},
-	[253]	= { "lookup_dcookie" },
-	[254]	= { "epoll_create" },
-	[255]	= { "epoll_ctl" },
-	[256]	= { "epoll_wait" },
-	[257]	= { "remap_file_pages" },
-	[258]	= { "set_tid_address" },
-	[259]	= { "timer_create" },
-	[260]	= { "timer_settime" },
-	[261]	= { "timer_gettime" },
-	[262]	= { "timer_getoverrun" },
-	[263]	= { "timer_delete" },
-	[264]	= { "clock_settime" },
-	[265]	= { "clock_gettime" },
-	[266]	= { "clock_getres" },
-	[267]	= { "clock_nanosleep" },
-	[268]	= { "statfs64" },
-	[269]	= { "fstatfs64" },
-	[270]	= { "tgkill" },
-	[271]	= { "utimes" },
-	[272]	= { "fadvise64_64" },
-	[273]	= { "vserver" },
-	[274]	= { "mbind" },
-	[275]	= { "get_mempolicy" },
-	[276]	= { "set_mempolicy" },
-	[277]	= { "mq_open" },
-	[278]	= { "mq_unlink" },
-	[279]	= { "mq_timedsend" },
-	[280]	= { "mq_timedreceive" },
-	[281]	= { "mq_notify" },
-	[282]	= { "mq_getsetattr" },
-	[283]	= { "sys_kexec_load" },
-};
-
-asmlinkage void do_syscall_trace(int leaving)
+/*
+ * handle tracing of system call entry
+ * - return the revised system call number or ULONG_MAX to cause ENOSYS
+ */
+asmlinkage unsigned long syscall_trace_entry(void)
 {
-#if 0
-	unsigned long *argp;
-	const char *name;
-	unsigned argmask;
-	char buffer[16];
-
-	if (!kstrace)
-		return;
-
-	if (!current->mm)
-		return;
-
-	if (__frame->gr7 == __NR_close)
-		return;
-
-#if 0
-	if (__frame->gr7 != __NR_mmap2 &&
-	    __frame->gr7 != __NR_vfork &&
-	    __frame->gr7 != __NR_execve &&
-	    __frame->gr7 != __NR_exit)
-		return;
-#endif
-
-	argmask = 0;
-	name = NULL;
-	if (__frame->gr7 < NR_syscalls) {
-		name = __syscall_name_table[__frame->gr7].name;
-		argmask = __syscall_name_table[__frame->gr7].argmask;
-	}
-	if (!name) {
-		sprintf(buffer, "sys_%lx", __frame->gr7);
-		name = buffer;
+	__frame->__status |= REG__STATUS_SYSC_ENTRY;
+	if (tracehook_report_syscall_entry(__frame)) {
+		/* tracing decided this syscall should not happen, so
+		 * We'll return a bogus call number to get an ENOSYS
+		 * error, but leave the original number in
+		 * __frame->syscallno
+		 */
+		return ULONG_MAX;
 	}
 
-	if (!leaving) {
-		if (!argmask) {
-			printk(KERN_CRIT "[%d] %s(%lx,%lx,%lx,%lx,%lx,%lx)\n",
-			       current->pid,
-			       name,
-			       __frame->gr8,
-			       __frame->gr9,
-			       __frame->gr10,
-			       __frame->gr11,
-			       __frame->gr12,
-			       __frame->gr13);
-		}
-		else if (argmask == 0xffffff) {
-			printk(KERN_CRIT "[%d] %s()\n",
-			       current->pid,
-			       name);
-		}
-		else {
-			printk(KERN_CRIT "[%d] %s(",
-			       current->pid,
-			       name);
+	return __frame->syscallno;
+}
 
-			argp = &__frame->gr8;
-
-			do {
-				switch (argmask & 0xf) {
-				case 1:
-					printk("%ld", (long) *argp);
-					break;
-				case 2:
-					printk("%lo", *argp);
-					break;
-				case 3:
-					printk("%lx", *argp);
-					break;
-				case 4:
-					printk("%p", (void *) *argp);
-					break;
-				case 5:
-					printk("\"%s\"", (char *) *argp);
-					break;
-				}
-
-				argp++;
-				argmask >>= 4;
-				if (argmask)
-					printk(",");
-
-			} while (argmask);
-
-			printk(")\n");
-		}
-	}
-	else {
-		if ((int)__frame->gr8 > -4096 && (int)__frame->gr8 < 4096)
-			printk(KERN_CRIT "[%d] %s() = %ld\n", current->pid, name, __frame->gr8);
-		else
-			printk(KERN_CRIT "[%d] %s() = %lx\n", current->pid, name, __frame->gr8);
-	}
-	return;
-#endif
-
-	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		return;
-
-	if (!(current->ptrace & PT_PTRACED))
-		return;
-
-	/* we need to indicate entry or exit to strace */
-	if (leaving)
-		__frame->__status |= REG__STATUS_SYSC_EXIT;
-	else
-		__frame->__status |= REG__STATUS_SYSC_ENTRY;
-
-	ptrace_notify(SIGTRAP);
-
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
+/*
+ * handle tracing of system call exit
+ */
+asmlinkage void syscall_trace_exit(void)
+{
+	__frame->__status |= REG__STATUS_SYSC_EXIT;
+	tracehook_report_syscall_exit(__frame, 0);
 }
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 3bdb368..4a7a62c 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -21,6 +21,7 @@
 #include <linux/unistd.h>
 #include <linux/personality.h>
 #include <linux/freezer.h>
+#include <linux/tracehook.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -516,6 +517,9 @@
 			 * clear the TIF_RESTORE_SIGMASK flag */
 			if (test_thread_flag(TIF_RESTORE_SIGMASK))
 				clear_thread_flag(TIF_RESTORE_SIGMASK);
+
+			tracehook_signal_handler(signr, &info, &ka, __frame,
+						 test_thread_flag(TIF_SINGLESTEP));
 		}
 
 		return;
@@ -564,4 +568,10 @@
 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
 		do_signal();
 
+	/* deal with notification on about to resume userspace execution */
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(__frame);
+	}
+
 } /* end do_notify_resume() */
diff --git a/arch/frv/kernel/uaccess.c b/arch/frv/kernel/uaccess.c
index 9fb771a..374f88d 100644
--- a/arch/frv/kernel/uaccess.c
+++ b/arch/frv/kernel/uaccess.c
@@ -23,8 +23,7 @@
 	char *p, ch;
 	long err = -EFAULT;
 
-	if (count < 0)
-		BUG();
+	BUG_ON(count < 0);
 
 	p = dst;
 
@@ -76,8 +75,7 @@
 	long err = 0;
 	char ch;
 
-	if (count < 0)
-		BUG();
+	BUG_ON(count < 0);
 
 #ifndef CONFIG_MMU
 	if ((unsigned long) src < memory_start)
diff --git a/arch/frv/mb93090-mb00/pci-dma-nommu.c b/arch/frv/mb93090-mb00/pci-dma-nommu.c
index 52ff9ae..4e1ba0b 100644
--- a/arch/frv/mb93090-mb00/pci-dma-nommu.c
+++ b/arch/frv/mb93090-mb00/pci-dma-nommu.c
@@ -116,8 +116,7 @@
 dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
 			  enum dma_data_direction direction)
 {
-	if (direction == DMA_NONE)
-                BUG();
+	BUG_ON(direction == DMA_NONE);
 
 	frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size);
 
@@ -151,8 +150,7 @@
 		frv_cache_wback_inv(sg_dma_address(&sg[i]),
 				    sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]));
 
-	if (direction == DMA_NONE)
-                BUG();
+	BUG_ON(direction == DMA_NONE);
 
 	return nents;
 }
diff --git a/arch/frv/mb93090-mb00/pci-dma.c b/arch/frv/mb93090-mb00/pci-dma.c
index 3ddedeb..45954f0 100644
--- a/arch/frv/mb93090-mb00/pci-dma.c
+++ b/arch/frv/mb93090-mb00/pci-dma.c
@@ -48,8 +48,7 @@
 dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
 			  enum dma_data_direction direction)
 {
-	if (direction == DMA_NONE)
-                BUG();
+	BUG_ON(direction == DMA_NONE);
 
 	frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size);
 
@@ -81,8 +80,7 @@
 	void *vaddr;
 	int i;
 
-	if (direction == DMA_NONE)
-                BUG();
+	BUG_ON(direction == DMA_NONE);
 
 	dampr2 = __get_DAMPR(2);
 
diff --git a/arch/h8300/kernel/module.c b/arch/h8300/kernel/module.c
index cfc9127..0865e29 100644
--- a/arch/h8300/kernel/module.c
+++ b/arch/h8300/kernel/module.c
@@ -23,8 +23,6 @@
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c
index cc0a318..acb5047 100644
--- a/arch/ia64/hp/sim/hpsim_irq.c
+++ b/arch/ia64/hp/sim/hpsim_irq.c
@@ -21,9 +21,10 @@
 {
 }
 
-static void
+static int
 hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b)
 {
+	return 0;
 }
 
 static struct hw_interrupt_type irq_type_hp_sim = {
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 4542651..5f43697 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -371,6 +371,7 @@
 	int last_run_cpu;
 	int vmm_tr_slot;
 	int vm_tr_slot;
+	int sn_rtc_tr_slot;
 
 #define KVM_MP_STATE_RUNNABLE          0
 #define KVM_MP_STATE_UNINITIALIZED     1
@@ -465,6 +466,7 @@
 	unsigned long	vmm_init_rr;
 
 	int		online_vcpus;
+	int		is_sn2;
 
 	struct kvm_ioapic *vioapic;
 	struct kvm_vm_stat stat;
@@ -472,6 +474,7 @@
 
 	struct list_head assigned_dev_head;
 	struct iommu_domain *iommu_domain;
+	int iommu_flags;
 	struct hlist_head irq_ack_notifier_list;
 
 	unsigned long irq_sources_bitmap;
@@ -578,6 +581,8 @@
 	kvm_vmm_entry 	*vmm_entry;
 	kvm_tramp_entry *tramp_entry;
 	unsigned long 	vmm_ivt;
+	unsigned long	patch_mov_ar;
+	unsigned long	patch_mov_ar_sn2;
 };
 
 int kvm_highest_pending_irq(struct kvm_vcpu *vcpu);
@@ -585,7 +590,6 @@
 int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
 void kvm_sal_emul(struct kvm_vcpu *vcpu);
 
-static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {}
 #endif /* __ASSEMBLY__*/
 
 #endif
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 7a9bff4..0a9cc73 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -146,6 +146,8 @@
 #define PAGE_GATE	__pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX)
 #define PAGE_KERNEL	__pgprot(__DIRTY_BITS  | _PAGE_PL_0 | _PAGE_AR_RWX)
 #define PAGE_KERNELRX	__pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX)
+#define PAGE_KERNEL_UC	__pgprot(__DIRTY_BITS  | _PAGE_PL_0 | _PAGE_AR_RWX | \
+				 _PAGE_MA_UC)
 
 # ifndef __ASSEMBLY__
 
diff --git a/arch/ia64/include/asm/suspend.h b/arch/ia64/include/asm/suspend.h
deleted file mode 100644
index b05bbb6..0000000
--- a/arch/ia64/include/asm/suspend.h
+++ /dev/null
@@ -1 +0,0 @@
-/* dummy (must be non-empty to prevent prejudicial removal...) */
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 5510317..baec6f0 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -636,7 +636,7 @@
  * success: return IRQ number (>=0)
  * failure: return < 0
  */
-int acpi_register_gsi(u32 gsi, int triggering, int polarity)
+int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 {
 	if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM)
 		return gsi;
@@ -678,7 +678,8 @@
 
 	fadt = (struct acpi_table_fadt *)fadt_header;
 
-	acpi_register_gsi(fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW);
+	acpi_register_gsi(NULL, fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE,
+				 ACPI_ACTIVE_LOW);
 	return 0;
 }
 
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 166e0d8..f92cef4 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -329,7 +329,7 @@
 }
 
 
-static void
+static int
 iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 #ifdef CONFIG_SMP
@@ -343,15 +343,15 @@
 
 	cpu = cpumask_first_and(cpu_online_mask, mask);
 	if (cpu >= nr_cpu_ids)
-		return;
+		return -1;
 
 	if (irq_prepare_move(irq, cpu))
-		return;
+		return -1;
 
 	dest = cpu_physical_id(cpu);
 
 	if (!iosapic_intr_info[irq].count)
-		return;			/* not an IOSAPIC interrupt */
+		return -1;			/* not an IOSAPIC interrupt */
 
 	set_irq_affinity_info(irq, dest, redir);
 
@@ -376,7 +376,9 @@
 		iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
 		iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
 	}
+
 #endif
+	return 0;
 }
 
 /*
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index acc4d19..b448197 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -610,6 +610,9 @@
 	.name =		"IPI"
 };
 
+/*
+ * KVM uses this interrupt to force a cpu out of guest mode
+ */
 static struct irqaction resched_irqaction = {
 	.handler =	dummy_handler,
 	.flags =	IRQF_DISABLED,
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 2b15e23..0f8ade9 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -12,7 +12,7 @@
 static struct irq_chip	ia64_msi_chip;
 
 #ifdef CONFIG_SMP
-static void ia64_set_msi_irq_affinity(unsigned int irq,
+static int ia64_set_msi_irq_affinity(unsigned int irq,
 				      const cpumask_t *cpu_mask)
 {
 	struct msi_msg msg;
@@ -20,10 +20,10 @@
 	int cpu = first_cpu(*cpu_mask);
 
 	if (!cpu_online(cpu))
-		return;
+		return -1;
 
 	if (irq_prepare_move(irq, cpu))
-		return;
+		return -1;
 
 	read_msi_msg(irq, &msg);
 
@@ -39,6 +39,8 @@
 
 	write_msi_msg(irq, &msg);
 	cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu));
+
+	return 0;
 }
 #endif /* CONFIG_SMP */
 
@@ -130,17 +132,17 @@
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg = irq_cfg + irq;
 	struct msi_msg msg;
 	int cpu = cpumask_first(mask);
 
 	if (!cpu_online(cpu))
-		return;
+		return -1;
 
 	if (irq_prepare_move(irq, cpu))
-		return;
+		return -1;
 
 	dmar_msi_read(irq, &msg);
 
@@ -151,6 +153,8 @@
 
 	dmar_msi_write(irq, &msg);
 	cpumask_copy(irq_desc[irq].affinity, mask);
+
+	return 0;
 }
 #endif /* CONFIG_SMP */
 
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 0a2d6b8..64d5209 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -23,7 +23,7 @@
 
 config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
-	depends on HAVE_KVM && EXPERIMENTAL
+	depends on HAVE_KVM && MODULES && EXPERIMENTAL
 	# for device assignment:
 	depends on PCI
 	select PREEMPT_NOTIFIERS
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index d20a5db..80c57b0 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -41,6 +41,9 @@
 #include <asm/div64.h>
 #include <asm/tlb.h>
 #include <asm/elf.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/clksupport.h>
+#include <asm/sn/shub_mmr.h>
 
 #include "misc.h"
 #include "vti.h"
@@ -65,6 +68,16 @@
 	{ NULL }
 };
 
+static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+	if (vcpu->kvm->arch.is_sn2)
+		return rtc_time();
+	else
+#endif
+		return ia64_getreg(_IA64_REG_AR_ITC);
+}
+
 static void kvm_flush_icache(unsigned long start, unsigned long len)
 {
 	int l;
@@ -119,8 +132,7 @@
 	unsigned long saved_psr;
 	int slot;
 
-	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
-				PAGE_KERNEL));
+	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
 	local_irq_save(saved_psr);
 	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
 	local_irq_restore(saved_psr);
@@ -283,6 +295,18 @@
 
 }
 
+static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
+{
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+	if (!test_and_set_bit(vector, &vpd->irr[0])) {
+		vcpu->arch.irq_new_pending = 1;
+		kvm_vcpu_kick(vcpu);
+		return 1;
+	}
+	return 0;
+}
+
 /*
  *  offset: address offset to IPI space.
  *  value:  deliver value.
@@ -292,20 +316,20 @@
 {
 	switch (dm) {
 	case SAPIC_FIXED:
-		kvm_apic_set_irq(vcpu, vector, 0);
 		break;
 	case SAPIC_NMI:
-		kvm_apic_set_irq(vcpu, 2, 0);
+		vector = 2;
 		break;
 	case SAPIC_EXTINT:
-		kvm_apic_set_irq(vcpu, 0, 0);
+		vector = 0;
 		break;
 	case SAPIC_INIT:
 	case SAPIC_PMI:
 	default:
 		printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
-		break;
+		return;
 	}
+	__apic_accept_irq(vcpu, vector);
 }
 
 static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
@@ -413,6 +437,23 @@
 	return 1;
 }
 
+static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
+{
+	unsigned long pte, rtc_phys_addr, map_addr;
+	int slot;
+
+	map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
+	rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
+	pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
+	slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
+	vcpu->arch.sn_rtc_tr_slot = slot;
+	if (slot < 0) {
+		printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
+		slot = 0;
+	}
+	return slot;
+}
+
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 {
 
@@ -426,7 +467,7 @@
 
 	if (irqchip_in_kernel(vcpu->kvm)) {
 
-		vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset;
+		vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
 
 		if (time_after(vcpu_now_itc, vpd->itm)) {
 			vcpu->arch.timer_check = 1;
@@ -447,10 +488,10 @@
 		hrtimer_cancel(p_ht);
 		vcpu->arch.ht_active = 0;
 
-		if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
+		if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
+				kvm_cpu_has_pending_timer(vcpu))
 			if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
-				vcpu->arch.mp_state =
-					KVM_MP_STATE_RUNNABLE;
+				vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
 		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
 			return -EINTR;
@@ -551,22 +592,35 @@
 	if (r < 0)
 		goto out;
 	vcpu->arch.vm_tr_slot = r;
+
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+	if (kvm->arch.is_sn2) {
+		r = kvm_sn2_setup_mappings(vcpu);
+		if (r < 0)
+			goto out;
+	}
+#endif
+
 	r = 0;
 out:
 	return r;
-
 }
 
 static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
 {
-
+	struct kvm *kvm = vcpu->kvm;
 	ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
 	ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
-
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+	if (kvm->arch.is_sn2)
+		ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
+#endif
 }
 
 static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
 {
+	unsigned long psr;
+	int r;
 	int cpu = smp_processor_id();
 
 	if (vcpu->arch.last_run_cpu != cpu ||
@@ -578,36 +632,27 @@
 
 	vcpu->arch.host_rr6 = ia64_get_rr(RR6);
 	vti_set_rr6(vcpu->arch.vmm_rr);
-	return kvm_insert_vmm_mapping(vcpu);
+	local_irq_save(psr);
+	r = kvm_insert_vmm_mapping(vcpu);
+	local_irq_restore(psr);
+	return r;
 }
+
 static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
 {
 	kvm_purge_vmm_mapping(vcpu);
 	vti_set_rr6(vcpu->arch.host_rr6);
 }
 
-static int  vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	union context *host_ctx, *guest_ctx;
 	int r;
 
-	/*Get host and guest context with guest address space.*/
-	host_ctx = kvm_get_host_context(vcpu);
-	guest_ctx = kvm_get_guest_context(vcpu);
-
-	r = kvm_vcpu_pre_transition(vcpu);
-	if (r < 0)
-		goto out;
-	kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
-	kvm_vcpu_post_transition(vcpu);
-	r = 0;
-out:
-	return r;
-}
-
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	int r;
+	/*
+	 * down_read() may sleep and return with interrupts enabled
+	 */
+	down_read(&vcpu->kvm->slots_lock);
 
 again:
 	if (signal_pending(current)) {
@@ -616,26 +661,31 @@
 		goto out;
 	}
 
-	/*
-	 * down_read() may sleep and return with interrupts enabled
-	 */
-	down_read(&vcpu->kvm->slots_lock);
-
 	preempt_disable();
 	local_irq_disable();
 
-	vcpu->guest_mode = 1;
+	/*Get host and guest context with guest address space.*/
+	host_ctx = kvm_get_host_context(vcpu);
+	guest_ctx = kvm_get_guest_context(vcpu);
+
+	clear_bit(KVM_REQ_KICK, &vcpu->requests);
+
+	r = kvm_vcpu_pre_transition(vcpu);
+	if (r < 0)
+		goto vcpu_run_fail;
+
+	up_read(&vcpu->kvm->slots_lock);
 	kvm_guest_enter();
-	r = vti_vcpu_run(vcpu, kvm_run);
-	if (r < 0) {
-		local_irq_enable();
-		preempt_enable();
-		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
-		goto out;
-	}
+
+	/*
+	 * Transition to the guest
+	 */
+	kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
+
+	kvm_vcpu_post_transition(vcpu);
 
 	vcpu->arch.launched = 1;
-	vcpu->guest_mode = 0;
+	set_bit(KVM_REQ_KICK, &vcpu->requests);
 	local_irq_enable();
 
 	/*
@@ -646,9 +696,10 @@
 	 */
 	barrier();
 	kvm_guest_exit();
-	up_read(&vcpu->kvm->slots_lock);
 	preempt_enable();
 
+	down_read(&vcpu->kvm->slots_lock);
+
 	r = kvm_handle_exit(kvm_run, vcpu);
 
 	if (r > 0) {
@@ -657,12 +708,20 @@
 	}
 
 out:
+	up_read(&vcpu->kvm->slots_lock);
 	if (r > 0) {
 		kvm_resched(vcpu);
+		down_read(&vcpu->kvm->slots_lock);
 		goto again;
 	}
 
 	return r;
+
+vcpu_run_fail:
+	local_irq_enable();
+	preempt_enable();
+	kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+	goto out;
 }
 
 static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
@@ -788,6 +847,9 @@
 
 	if (IS_ERR(kvm))
 		return ERR_PTR(-ENOMEM);
+
+	kvm->arch.is_sn2 = ia64_platform_is("sn2");
+
 	kvm_init_vm(kvm);
 
 	kvm->arch.online_vcpus = 0;
@@ -884,7 +946,7 @@
 	RESTORE_REGS(saved_gp);
 
 	vcpu->arch.irq_new_pending = 1;
-	vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC);
+	vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
 	set_bit(KVM_REQ_RESUME, &vcpu->requests);
 
 	vcpu_put(vcpu);
@@ -1043,10 +1105,6 @@
 	}
 }
 
-static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-{
-}
-
 static int vti_init_vpd(struct kvm_vcpu *vcpu)
 {
 	int i;
@@ -1165,7 +1223,7 @@
 		regs->cr_iip = PALE_RESET_ENTRY;
 
 		/*Initialize itc offset for vcpus*/
-		itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
+		itc_offset = 0UL - kvm_get_itc(vcpu);
 		for (i = 0; i < kvm->arch.online_vcpus; i++) {
 			v = (struct kvm_vcpu *)((char *)vcpu +
 					sizeof(struct kvm_vcpu_data) * i);
@@ -1237,6 +1295,7 @@
 
 	local_irq_save(psr);
 	r = kvm_insert_vmm_mapping(vcpu);
+	local_irq_restore(psr);
 	if (r)
 		goto fail;
 	r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
@@ -1254,13 +1313,11 @@
 		goto uninit;
 
 	kvm_purge_vmm_mapping(vcpu);
-	local_irq_restore(psr);
 
 	return 0;
 uninit:
 	kvm_vcpu_uninit(vcpu);
 fail:
-	local_irq_restore(psr);
 	return r;
 }
 
@@ -1291,7 +1348,6 @@
 	vcpu->kvm = kvm;
 
 	cpu = get_cpu();
-	vti_vcpu_load(vcpu, cpu);
 	r = vti_vcpu_setup(vcpu, id);
 	put_cpu();
 
@@ -1427,7 +1483,7 @@
 	}
 	for (i = 0; i < 4; i++)
 		regs->insvc[i] = vcpu->arch.insvc[i];
-	regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC);
+	regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
 	SAVE_REGS(xtp);
 	SAVE_REGS(metaphysical_rr0);
 	SAVE_REGS(metaphysical_rr4);
@@ -1574,6 +1630,7 @@
 
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
+	kvm_flush_remote_tlbs(kvm);
 }
 
 long kvm_arch_dev_ioctl(struct file *filp,
@@ -1616,8 +1673,37 @@
 	return 0;
 }
 
+
+/*
+ * On SN2, the ITC isn't stable, so copy in fast path code to use the
+ * SN2 RTC, replacing the ITC based default verion.
+ */
+static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
+			  struct module *module)
+{
+	unsigned long new_ar, new_ar_sn2;
+	unsigned long module_base;
+
+	if (!ia64_platform_is("sn2"))
+		return;
+
+	module_base = (unsigned long)module->module_core;
+
+	new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
+	new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
+
+	printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
+	       "as source\n");
+
+	/*
+	 * Copy the SN2 version of mov_ar into place. They are both
+	 * the same size, so 6 bundles is sufficient (6 * 0x10).
+	 */
+	memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
+}
+
 static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
-						struct module *module)
+			    struct module *module)
 {
 	unsigned long module_base;
 	unsigned long vmm_size;
@@ -1639,6 +1725,7 @@
 		return -EFAULT;
 
 	memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
+	kvm_patch_vmm(vmm_info, module);
 	kvm_flush_icache(kvm_vmm_base, vmm_size);
 
 	/*Recalculate kvm_vmm_info based on new VMM*/
@@ -1792,38 +1879,24 @@
 {
 }
 
-static void vcpu_kick_intr(void *info)
-{
-#ifdef DEBUG
-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
-	printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu);
-#endif
-}
-
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
-	int ipi_pcpu = vcpu->cpu;
-	int cpu = get_cpu();
+	int me;
+	int cpu = vcpu->cpu;
 
 	if (waitqueue_active(&vcpu->wq))
 		wake_up_interruptible(&vcpu->wq);
 
-	if (vcpu->guest_mode && cpu != ipi_pcpu)
-		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
+	me = get_cpu();
+	if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu))
+		if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
+			smp_send_reschedule(cpu);
 	put_cpu();
 }
 
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig)
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 {
-
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-	if (!test_and_set_bit(vec, &vpd->irr[0])) {
-		vcpu->arch.irq_new_pending = 1;
-		kvm_vcpu_kick(vcpu);
-		return 1;
-	}
-	return 0;
+	return __apic_accept_irq(vcpu, irq->vector);
 }
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
@@ -1836,20 +1909,18 @@
 	return 0;
 }
 
-struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
-				       unsigned long bitmap)
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 {
-	struct kvm_vcpu *lvcpu = kvm->vcpus[0];
-	int i;
+	return vcpu1->arch.xtp - vcpu2->arch.xtp;
+}
 
-	for (i = 1; i < kvm->arch.online_vcpus; i++) {
-		if (!kvm->vcpus[i])
-			continue;
-		if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp)
-			lvcpu = kvm->vcpus[i];
-	}
-
-	return lvcpu;
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+		int short_hand, int dest, int dest_mode)
+{
+	struct kvm_lapic *target = vcpu->arch.apic;
+	return (dest_mode == 0) ?
+		kvm_apic_match_physical_addr(target, dest) :
+		kvm_apic_match_logical_addr(target, dest);
 }
 
 static int find_highest_bits(int *dat)
@@ -1888,6 +1959,12 @@
 	return 0;
 }
 
+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
+{
+	/* do real check here */
+	return 1;
+}
+
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.timer_fired;
@@ -1918,6 +1995,7 @@
 	long psr;
 	local_irq_save(psr);
 	r = kvm_insert_vmm_mapping(vcpu);
+	local_irq_restore(psr);
 	if (r)
 		goto fail;
 
@@ -1930,7 +2008,6 @@
 	kvm_purge_vmm_mapping(vcpu);
 	r = 0;
 fail:
-	local_irq_restore(psr);
 	return r;
 }
 
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
index a8ae52e..e4b8231 100644
--- a/arch/ia64/kvm/kvm_fw.c
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -21,6 +21,9 @@
 
 #include <linux/kvm_host.h>
 #include <linux/smp.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/clksupport.h>
+#include <asm/sn/shub_mmr.h>
 
 #include "vti.h"
 #include "misc.h"
@@ -188,12 +191,35 @@
 	return result;
 }
 
+/*
+ * On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2
+ * RTC is used instead. This function patches the ratios from SAL
+ * to match the RTC before providing them to the guest.
+ */
+static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result)
+{
+	struct pal_freq_ratio *ratio;
+	unsigned long sal_freq, sal_drift, factor;
+
+	result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
+					    &sal_freq, &sal_drift);
+	ratio = (struct pal_freq_ratio *)&result->v2;
+	factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) /
+		sn_rtc_cycles_per_second;
+
+	ratio->num = 3;
+	ratio->den = factor;
+}
+
 static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
 {
-
 	struct ia64_pal_retval result;
 
 	PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
+
+	if (vcpu->kvm->arch.is_sn2)
+		sn2_patch_itc_freq_ratios(&result);
+
 	return result;
 }
 
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
index 6d6cbcb..ee541ce 100644
--- a/arch/ia64/kvm/lapic.h
+++ b/arch/ia64/kvm/lapic.h
@@ -20,6 +20,10 @@
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig);
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+		int short_hand, int dest, int dest_mode);
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
+#define kvm_apic_present(x) (true)
 
 #endif
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
index 32254ce..f793be3 100644
--- a/arch/ia64/kvm/optvfault.S
+++ b/arch/ia64/kvm/optvfault.S
@@ -11,6 +11,7 @@
 
 #include <asm/asmmacro.h>
 #include <asm/processor.h>
+#include <asm/kvm_host.h>
 
 #include "vti.h"
 #include "asm-offsets.h"
@@ -140,6 +141,35 @@
 	;;
 END(kvm_asm_mov_from_ar)
 
+/*
+ * Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC
+ * clock as it's source for emulating the ITC. This version will be
+ * copied on top of the original version if the host is determined to
+ * be an SN2.
+ */
+GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2)
+	add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
+	movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT))
+
+	add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
+	extr.u r17=r25,6,7
+	mov r24=b0
+	;;
+	ld8 r18=[r18]
+	ld8 r19=[r19]
+	addl r20=@gprel(asm_mov_to_reg),gp
+	;;
+	add r19=r19,r18
+	shladd r17=r17,4,r20
+	;;
+	adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
+	st8 [r16] = r19
+	mov b0=r17
+	br.sptk.few b0
+	;;
+END(kvm_asm_mov_from_ar_sn2)
+
+
 
 // mov r1=rr[r3]
 GLOBAL_ENTRY(kvm_asm_mov_from_rr)
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index b1dc809..a8f84da 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -652,20 +652,25 @@
 		unsigned long isr, unsigned long iim)
 {
 	struct kvm_vcpu *v = current_vcpu;
+	long psr;
 
 	if (ia64_psr(regs)->cpl == 0) {
 		/* Allow hypercalls only when cpl = 0.  */
 		if (iim == DOMN_PAL_REQUEST) {
+			local_irq_save(psr);
 			set_pal_call_data(v);
 			vmm_transition(v);
 			get_pal_call_result(v);
 			vcpu_increment_iip(v);
+			local_irq_restore(psr);
 			return;
 		} else if (iim == DOMN_SAL_REQUEST) {
+			local_irq_save(psr);
 			set_sal_call_data(v);
 			vmm_transition(v);
 			get_sal_call_result(v);
 			vcpu_increment_iip(v);
+			local_irq_restore(psr);
 			return;
 		}
 	}
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index a18ee17..a2c6c15 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -788,13 +788,29 @@
 		setfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
 }
 
+/*
+ * The Altix RTC is mapped specially here for the vmm module
+ */
+#define SN_RTC_BASE	(u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT))
+static long kvm_get_itc(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+	struct kvm *kvm = (struct kvm *)KVM_VM_BASE;
+
+	if (kvm->arch.is_sn2)
+		return (*SN_RTC_BASE);
+	else
+#endif
+		return ia64_getreg(_IA64_REG_AR_ITC);
+}
+
 /************************************************************************
  * lsapic timer
  ***********************************************************************/
 u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
 {
 	unsigned long guest_itc;
-	guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC);
+	guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu);
 
 	if (guest_itc >= VMX(vcpu, last_itc)) {
 		VMX(vcpu, last_itc) = guest_itc;
@@ -809,7 +825,7 @@
 	struct kvm_vcpu *v;
 	struct kvm *kvm;
 	int i;
-	long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC);
+	long itc_offset = val - kvm_get_itc(vcpu);
 	unsigned long vitv = VCPU(vcpu, itv);
 
 	kvm = (struct kvm *)KVM_VM_BASE;
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
index 9eee5c0..f4b4c89 100644
--- a/arch/ia64/kvm/vmm.c
+++ b/arch/ia64/kvm/vmm.c
@@ -30,15 +30,19 @@
 MODULE_LICENSE("GPL");
 
 extern char kvm_ia64_ivt;
+extern char kvm_asm_mov_from_ar;
+extern char kvm_asm_mov_from_ar_sn2;
 extern fpswa_interface_t *vmm_fpswa_interface;
 
 long vmm_sanity = 1;
 
 struct kvm_vmm_info vmm_info = {
-	.module	     = THIS_MODULE,
-	.vmm_entry   = vmm_entry,
-	.tramp_entry = vmm_trampoline,
-	.vmm_ivt     = (unsigned long)&kvm_ia64_ivt,
+	.module			= THIS_MODULE,
+	.vmm_entry		= vmm_entry,
+	.tramp_entry		= vmm_trampoline,
+	.vmm_ivt		= (unsigned long)&kvm_ia64_ivt,
+	.patch_mov_ar		= (unsigned long)&kvm_asm_mov_from_ar,
+	.patch_mov_ar_sn2	= (unsigned long)&kvm_asm_mov_from_ar_sn2,
 };
 
 static int __init  kvm_vmm_init(void)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
index 3ef1a01..40920c6 100644
--- a/arch/ia64/kvm/vmm_ivt.S
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -95,7 +95,7 @@
 	;;
 	srlz.i    // guarantee that interruption collection is on
 	;;
-	//(p15) ssm psr.i               // restore psr.i
+	(p15) ssm psr.i               // restore psr.
 	addl r14=@gprel(ia64_leave_hypervisor),gp
 	;;
 	KVM_SAVE_REST
@@ -249,7 +249,7 @@
 	;;
 	srlz.i         // guarantee that interruption collection is on
 	;;
-	//(p15)ssm psr.i               // restore psr.i
+	(p15)ssm psr.i               // restore psr.i
 	addl r14=@gprel(ia64_leave_hypervisor),gp
 	;;
 	KVM_SAVE_REST
@@ -439,7 +439,7 @@
 	;;
 	srlz.i // guarantee that interruption collection is on
 	;;
-	//(p15) ssm psr.i               // restore psr.i
+	(p15) ssm psr.i               // restore psr.i
 	adds r3=8,r2                // set up second base pointer
 	;;
 	KVM_SAVE_REST
@@ -819,7 +819,7 @@
 	;;
 	srlz.i     // guarantee that interruption collection is on
 	;;
-	//(p15) ssm psr.i               // restore psr.i
+	(p15) ssm psr.i               // restore psr.i
 	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
 	;;
 	KVM_SAVE_REST
@@ -842,7 +842,7 @@
 	;;
 	srlz.i   // guarantee that interruption collection is on
 	;;
-	//(p15) ssm psr.i               // restore psr.i
+	(p15) ssm psr.i               // restore psr.i
 	addl r14=@gprel(ia64_leave_hypervisor),gp
 	;;
 	KVM_SAVE_REST
@@ -871,7 +871,7 @@
 	;;
 	srlz.i   // guarantee that interruption collection is on
 	;;
-	//(p15) ssm psr.i               // restore psr.i
+	(p15) ssm psr.i               // restore psr.i
 	addl r14=@gprel(ia64_leave_hypervisor),gp
 	;;
 	KVM_SAVE_REST
@@ -898,7 +898,7 @@
 	;;
 	srlz.i    // guarantee that interruption collection is on
 	;;
-	//(p15) ssm psr.i               // restore psr.i
+	(p15) ssm psr.i               // restore psr.i
 	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
 	;;
 	KVM_SAVE_REST
@@ -920,7 +920,7 @@
 	;;
 	srlz.i
 	;;
-	//(p15) ssm psr.i
+	(p15) ssm psr.i
 	addl r14=@gprel(ia64_leave_hypervisor),gp
 	;;
 	KVM_SAVE_REST
@@ -1333,7 +1333,7 @@
 	;;
 (p7)    srlz.i
 	;;
-//(p6)    ssm psr.i
+(p6)    ssm psr.i
 	;;
 	mov rp=rpsave
 	mov ar.pfs=pfssave
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index 2c2501f..4290a42 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -254,7 +254,8 @@
 			"(p7) st8 [%2]=r9;;"
 			"ssm psr.ic;;"
 			"srlz.d;;"
-			/* "ssm psr.i;;" Once interrupts in vmm open, need fix*/
+			"ssm psr.i;;"
+			"srlz.d;;"
 			: "=r"(ret) : "r"(iha), "r"(pte):"memory");
 
 	return ret;
diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c
index 71c50dd..e95d5ad 100644
--- a/arch/ia64/mm/extable.c
+++ b/arch/ia64/mm/extable.c
@@ -53,6 +53,32 @@
 	     cmp_ex, swap_ex);
 }
 
+static inline unsigned long ex_to_addr(const struct exception_table_entry *x)
+{
+	return (unsigned long)&x->insn + x->insn;
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * Any entry referring to the module init will be at the beginning or
+ * the end.
+ */
+void trim_init_extable(struct module *m)
+{
+	/*trim the beginning*/
+	while (m->num_exentries &&
+	       within_module_init(ex_to_addr(&m->extable[0]), m)) {
+		m->extable++;
+		m->num_exentries--;
+	}
+	/*trim the end*/
+	while (m->num_exentries &&
+	       within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]),
+				  m))
+		m->num_exentries--;
+}
+#endif /* CONFIG_MODULES */
+
 const struct exception_table_entry *
 search_extable (const struct exception_table_entry *first,
 		const struct exception_table_entry *last,
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 66fd705..764f26a 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -227,7 +227,7 @@
 	return new_irq_info;
 }
 
-static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
+static int sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
 {
 	struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
 	nasid_t nasid;
@@ -239,6 +239,8 @@
 	list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
 				 sn_irq_lh[irq], list)
 		(void)sn_retarget_vector(sn_irq_info, nasid, slice);
+
+	return 0;
 }
 
 #ifdef CONFIG_SMP
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index 81e4289..fbbfb97 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -151,7 +151,7 @@
 }
 
 #ifdef CONFIG_SMP
-static void sn_set_msi_irq_affinity(unsigned int irq,
+static int sn_set_msi_irq_affinity(unsigned int irq,
 				    const struct cpumask *cpu_mask)
 {
 	struct msi_msg msg;
@@ -168,7 +168,7 @@
 	cpu = cpumask_first(cpu_mask);
 	sn_irq_info = sn_msi_info[irq].sn_irq_info;
 	if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0)
-		return;
+		return -1;
 
 	/*
 	 * Release XIO resources for the old MSI PCI address
@@ -189,7 +189,7 @@
 	new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice);
 	sn_msi_info[irq].sn_irq_info = new_irq_info;
 	if (new_irq_info == NULL)
-		return;
+		return -1;
 
 	/*
 	 * Map the xio address into bus space
@@ -206,6 +206,8 @@
 
 	write_msi_msg(irq, &msg);
 	cpumask_copy(irq_desc[irq].affinity, cpu_mask);
+
+	return 0;
 }
 #endif /* CONFIG_SMP */
 
diff --git a/arch/m32r/kernel/module.c b/arch/m32r/kernel/module.c
index 8d42057..cb5f37d 100644
--- a/arch/m32r/kernel/module.c
+++ b/arch/m32r/kernel/module.c
@@ -44,8 +44,6 @@
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/arch/m68k/include/asm/m520xsim.h b/arch/m68k/include/asm/m520xsim.h
index 49d016e..83bbcfd 100644
--- a/arch/m68k/include/asm/m520xsim.h
+++ b/arch/m68k/include/asm/m520xsim.h
@@ -59,5 +59,14 @@
 #define MCFPIT_IMR		MCFINTC_IMRL
 #define MCFPIT_IMR_IBIT		(1 << MCFINT_PIT1)
 
+/*
+ *  Reset Controll Unit.
+ */
+#define	MCF_RCR			0xFC0A0000
+#define	MCF_RSR			0xFC0A0001
+
+#define	MCF_RCR_SWRESET		0x80		/* Software reset bit */
+#define	MCF_RCR_FRCSTOUT	0x40		/* Force external reset */
+
 /****************************************************************************/
 #endif  /* m520xsim_h */
diff --git a/arch/m68k/include/asm/m523xsim.h b/arch/m68k/include/asm/m523xsim.h
index bf39731..55183b5d 100644
--- a/arch/m68k/include/asm/m523xsim.h
+++ b/arch/m68k/include/asm/m523xsim.h
@@ -41,5 +41,14 @@
 #define	MCFSIM_DACR1		0x50		/* SDRAM base address 1 */
 #define	MCFSIM_DMR1		0x54		/* SDRAM address mask 1 */
 
+/*
+ *  Reset Controll Unit (relative to IPSBAR).
+ */
+#define	MCF_RCR			0x110000
+#define	MCF_RSR			0x110001
+
+#define	MCF_RCR_SWRESET		0x80		/* Software reset bit */
+#define	MCF_RCR_FRCSTOUT	0x40		/* Force external reset */
+
 /****************************************************************************/
 #endif	/* m523xsim_h */
diff --git a/arch/m68k/include/asm/m527xsim.h b/arch/m68k/include/asm/m527xsim.h
index 1f63ab3..95f4f8e 100644
--- a/arch/m68k/include/asm/m527xsim.h
+++ b/arch/m68k/include/asm/m527xsim.h
@@ -70,5 +70,14 @@
 #define UART2_ENABLE_MASK	0x3f00 
 #endif
 
+/*
+ *  Reset Controll Unit (relative to IPSBAR).
+ */
+#define	MCF_RCR			0x110000
+#define	MCF_RSR			0x110001
+
+#define	MCF_RCR_SWRESET		0x80		/* Software reset bit */
+#define	MCF_RCR_FRCSTOUT	0x40		/* Force external reset */
+
 /****************************************************************************/
 #endif	/* m527xsim_h */
diff --git a/arch/m68k/include/asm/m528xsim.h b/arch/m68k/include/asm/m528xsim.h
index 28bf783..d79c49f 100644
--- a/arch/m68k/include/asm/m528xsim.h
+++ b/arch/m68k/include/asm/m528xsim.h
@@ -56,6 +56,14 @@
 #define MCF5282_INTC0_ICR17     (volatile u8 *) (MCF_IPSBAR + 0x0C51)
 
 
+/*
+ *  Reset Control Unit (relative to IPSBAR).
+ */
+#define	MCF_RCR			0x110000
+#define	MCF_RSR			0x110001
+
+#define	MCF_RCR_SWRESET		0x80		/* Software reset bit */
+#define	MCF_RCR_FRCSTOUT	0x40		/* Force external reset */
 
 /*********************************************************************
 *
diff --git a/arch/m68k/include/asm/m532xsim.h b/arch/m68k/include/asm/m532xsim.h
index ce60345..eb7fd44 100644
--- a/arch/m68k/include/asm/m532xsim.h
+++ b/arch/m68k/include/asm/m532xsim.h
@@ -127,6 +127,18 @@
 
 /*********************************************************************
  *
+ * Reset Controller Module
+ *
+ *********************************************************************/
+
+#define	MCF_RCR			0xFC0A0000
+#define	MCF_RSR			0xFC0A0001
+
+#define	MCF_RCR_SWRESET		0x80		/* Software reset bit */
+#define	MCF_RCR_FRCSTOUT	0x40		/* Force external reset */
+
+/*********************************************************************
+ *
  * Inter-IC (I2C) Module
  *
  *********************************************************************/
diff --git a/arch/m68k/include/asm/processor_no.h b/arch/m68k/include/asm/processor_no.h
index 91cba18..7a1e0ba 100644
--- a/arch/m68k/include/asm/processor_no.h
+++ b/arch/m68k/include/asm/processor_no.h
@@ -72,10 +72,10 @@
 	unsigned char  fpstate[FPSTATESIZE];  /* floating point state */
 };
 
-#define INIT_THREAD  { \
-	sizeof(init_stack) + (unsigned long) init_stack, 0, \
-	PS_S, __KERNEL_DS, \
-	{0, 0}, 0, {0,}, {0, 0, 0}, {0,}, \
+#define INIT_THREAD  {							\
+	.ksp	= sizeof(init_stack) + (unsigned long) init_stack,	\
+	.sr	= PS_S,							\
+	.fs	= __KERNEL_DS,						\
 }
 
 /*
diff --git a/arch/m68k/include/asm/suspend.h b/arch/m68k/include/asm/suspend.h
deleted file mode 100644
index 57b3ddb..0000000
--- a/arch/m68k/include/asm/suspend.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _M68K_SUSPEND_H
-#define _M68K_SUSPEND_H
-
-/* Dummy include. */
-
-#endif  /* _M68K_SUSPEND_H */
diff --git a/arch/m68k/include/asm/swab.h b/arch/m68k/include/asm/swab.h
index 9e3054e..5b754aa 100644
--- a/arch/m68k/include/asm/swab.h
+++ b/arch/m68k/include/asm/swab.h
@@ -1,7 +1,7 @@
 #ifndef _M68K_SWAB_H
 #define _M68K_SWAB_H
 
-#include <asm/types.h>
+#include <linux/types.h>
 #include <linux/compiler.h>
 
 #define __SWAB_64_THRU_32__
diff --git a/arch/m68k/include/asm/system_no.h b/arch/m68k/include/asm/system_no.h
index 4496c0a..3c0718d 100644
--- a/arch/m68k/include/asm/system_no.h
+++ b/arch/m68k/include/asm/system_no.h
@@ -203,113 +203,6 @@
 #include <asm-generic/cmpxchg.h>
 #endif
 
-#if defined( CONFIG_M68328 ) || defined( CONFIG_M68EZ328 ) || \
-	defined (CONFIG_M68360) || defined( CONFIG_M68VZ328 )
-#define HARD_RESET_NOW() ({		\
-        local_irq_disable();		\
-        asm("				\
-        moveal #0x10c00000, %a0;	\
-        moveb #0, 0xFFFFF300;		\
-        moveal 0(%a0), %sp;		\
-        moveal 4(%a0), %a0;		\
-        jmp (%a0);			\
-        ");				\
-})
-#endif
-
-#ifdef CONFIG_COLDFIRE
-#if defined(CONFIG_M5272) && defined(CONFIG_NETtel)
-/*
- * Need to account for broken early mask of 5272 silicon. So don't
- * jump through the original start address. Jump strait into the
- * known start of the FLASH code.
- */
-#define HARD_RESET_NOW() ({		\
-        asm("				\
-	movew #0x2700, %sr;		\
-        jmp 0xf0000400;			\
-        ");				\
-})
-#elif defined(CONFIG_NETtel) || \
-      defined(CONFIG_SECUREEDGEMP3) || defined(CONFIG_CLEOPATRA)
-#define HARD_RESET_NOW() ({		\
-        asm("				\
-	movew #0x2700, %sr;		\
-	moveal #0x10000044, %a0;	\
-	movel #0xffffffff, (%a0);	\
-	moveal #0x10000001, %a0;	\
-	moveb #0x00, (%a0);		\
-        moveal #0xf0000004, %a0;	\
-        moveal (%a0), %a0;		\
-        jmp (%a0);			\
-        ");				\
-})
-#elif defined(CONFIG_M5272)
-/*
- * Retrieve the boot address in flash using CSBR0 and CSOR0
- * find the reset vector at flash_address + 4 (e.g. 0x400)
- * remap it in the flash's current location (e.g. 0xf0000400)
- * and jump there.
- */ 
-#define HARD_RESET_NOW() ({		\
-	asm("				\
-	movew #0x2700, %%sr;		\
-	move.l	%0+0x40,%%d0;		\
-	and.l	%0+0x44,%%d0;		\
-	andi.l	#0xfffff000,%%d0;	\
-	mov.l	%%d0,%%a0;		\
-	or.l	4(%%a0),%%d0;		\
-	mov.l	%%d0,%%a0;		\
-	jmp (%%a0);"			\
-	: /* No output */		\
-	: "o" (*(char *)MCF_MBAR) );	\
-})
-#elif defined(CONFIG_M528x)
-/*
- * The MCF528x has a bit (SOFTRST) in memory (Reset Control Register RCR),
- * that when set, resets the MCF528x.
- */
-#define HARD_RESET_NOW() \
-({						\
-	unsigned char volatile *reset;		\
-	asm("move.w	#0x2700, %sr");		\
-	reset = ((volatile unsigned char *)(MCF_IPSBAR + 0x110000));	\
-	while(1)				\
-	*reset |= (0x01 << 7);\
-})
-#elif defined(CONFIG_M523x)
-#define HARD_RESET_NOW() ({		\
-	asm("				\
-	movew #0x2700, %sr;		\
-	movel #0x01000000, %sp;		\
-	moveal #0x40110000, %a0;	\
-	moveb #0x80, (%a0);		\
-	");				\
-})
-#elif defined(CONFIG_M520x)
-	/*
-	 * The MCF5208 has a bit (SOFTRST) in memory (Reset Control Register 
-	 * RCR), that when set, resets the MCF5208.
-	 */
-#define HARD_RESET_NOW() 		\
-({					\
-	unsigned char volatile *reset;	\
-	asm("move.w     #0x2700, %sr");	\
-	reset = ((volatile unsigned char *)(MCF_IPSBAR + 0xA0000));	\
-	while(1)			\
-		*reset |= 0x80;		\
-})
-#else
-#define HARD_RESET_NOW() ({		\
-        asm("				\
-	movew #0x2700, %sr;		\
-        moveal #0x4, %a0;		\
-        moveal (%a0), %a0;		\
-        jmp (%a0);			\
-        ");				\
-})
-#endif
-#endif
 #define arch_align_stack(x) (x)
 
 
diff --git a/arch/m68k/kernel/module.c b/arch/m68k/kernel/module.c
index 774862b..cd6bcb1c9 100644
--- a/arch/m68k/kernel/module.c
+++ b/arch/m68k/kernel/module.c
@@ -31,8 +31,6 @@
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/arch/m68knommu/kernel/entry.S b/arch/m68knommu/kernel/entry.S
index f4782d2..f56faa5 100644
--- a/arch/m68knommu/kernel/entry.S
+++ b/arch/m68knommu/kernel/entry.S
@@ -26,7 +26,6 @@
 
 #include <linux/sys.h>
 #include <linux/linkage.h>
-#include <asm/thread_info.h>
 #include <asm/errno.h>
 #include <asm/setup.h>
 #include <asm/segment.h>
diff --git a/arch/m68knommu/kernel/module.c b/arch/m68knommu/kernel/module.c
index 3b1a2ff..d11ffae 100644
--- a/arch/m68knommu/kernel/module.c
+++ b/arch/m68knommu/kernel/module.c
@@ -23,8 +23,6 @@
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/arch/m68knommu/kernel/setup.c b/arch/m68knommu/kernel/setup.c
index 5985f19..5c2bb3e 100644
--- a/arch/m68knommu/kernel/setup.c
+++ b/arch/m68knommu/kernel/setup.c
@@ -166,15 +166,13 @@
 	printk(KERN_INFO "Motorola M5235EVB support (C)2005 Syn-tech Systems, Inc. (Jate Sujjavanich)\n");
 #endif
 
-#ifdef DEBUG
-	printk(KERN_DEBUG "KERNEL -> TEXT=0x%06x-0x%06x DATA=0x%06x-0x%06x "
-		"BSS=0x%06x-0x%06x\n", (int) &_stext, (int) &_etext,
-		(int) &_sdata, (int) &_edata,
-		(int) &_sbss, (int) &_ebss);
-	printk(KERN_DEBUG "MEMORY -> ROMFS=0x%06x-0x%06x MEM=0x%06x-0x%06x\n ",
-		(int) &_ebss, (int) memory_start,
-		(int) memory_start, (int) memory_end);
-#endif
+	pr_debug("KERNEL -> TEXT=0x%06x-0x%06x DATA=0x%06x-0x%06x "
+		 "BSS=0x%06x-0x%06x\n", (int) &_stext, (int) &_etext,
+		 (int) &_sdata, (int) &_edata,
+		 (int) &_sbss, (int) &_ebss);
+	pr_debug("MEMORY -> ROMFS=0x%06x-0x%06x MEM=0x%06x-0x%06x\n ",
+		 (int) &_ebss, (int) memory_start,
+		 (int) memory_start, (int) memory_end);
 
 	/* Keep a copy of command line */
 	*cmdline_p = &command_line[0];
diff --git a/arch/m68knommu/mm/init.c b/arch/m68knommu/mm/init.c
index 7befc0c..b1703c6 100644
--- a/arch/m68knommu/mm/init.c
+++ b/arch/m68knommu/mm/init.c
@@ -126,9 +126,7 @@
 	unsigned long start_mem = memory_start; /* DAVIDM - these must start at end of kernel */
 	unsigned long end_mem   = memory_end; /* DAVIDM - this must not include kernel stack at top */
 
-#ifdef DEBUG
-	printk(KERN_DEBUG "Mem_init: start=%lx, end=%lx\n", start_mem, end_mem);
-#endif
+	pr_debug("Mem_init: start=%lx, end=%lx\n", start_mem, end_mem);
 
 	end_mem &= PAGE_MASK;
 	high_memory = (void *) end_mem;
diff --git a/arch/m68knommu/platform/5206/config.c b/arch/m68knommu/platform/5206/config.c
index 53a5920..f6f7987 100644
--- a/arch/m68knommu/platform/5206/config.c
+++ b/arch/m68knommu/platform/5206/config.c
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -21,10 +20,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
-/***************************************************************************/
-
 static struct mcf_platform_uart m5206_uart_platform[] = {
 	{
 		.mapbase	= MCF_MBAR + MCFUART_BASE1,
@@ -109,10 +104,21 @@
 
 /***************************************************************************/
 
+void m5206_cpu_reset(void)
+{
+	local_irq_disable();
+	/* Set watchdog to soft reset, and enabled */
+	__raw_writeb(0xc0, MCF_MBAR + MCFSIM_SYPCR);
+	for (;;)
+		/* wait for watchdog to timeout */;
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 	mcf_setimr(MCFSIM_IMR_MASKALL);
-	mach_reset = coldfire_reset;
+	mach_reset = m5206_cpu_reset;
 }
 
 /***************************************************************************/
diff --git a/arch/m68knommu/platform/5206e/config.c b/arch/m68knommu/platform/5206e/config.c
index db90254..65887799 100644
--- a/arch/m68knommu/platform/5206e/config.c
+++ b/arch/m68knommu/platform/5206e/config.c
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -21,10 +20,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
-/***************************************************************************/
-
 static struct mcf_platform_uart m5206e_uart_platform[] = {
 	{
 		.mapbase	= MCF_MBAR + MCFUART_BASE1,
@@ -109,6 +104,17 @@
 
 /***************************************************************************/
 
+void m5206e_cpu_reset(void)
+{
+	local_irq_disable();
+	/* Set watchdog to soft reset, and enabled */
+	__raw_writeb(0xc0, MCF_MBAR + MCFSIM_SYPCR);
+	for (;;)
+		/* wait for watchdog to timeout */;
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 	mcf_setimr(MCFSIM_IMR_MASKALL);
@@ -119,7 +125,7 @@
 	commandp[size-1] = 0;
 #endif /* CONFIG_NETtel */
 
-	mach_reset = coldfire_reset;
+	mach_reset = m5206e_cpu_reset;
 }
 
 /***************************************************************************/
diff --git a/arch/m68knommu/platform/520x/config.c b/arch/m68knommu/platform/520x/config.c
index 855fc6a..1c43a8a 100644
--- a/arch/m68knommu/platform/520x/config.c
+++ b/arch/m68knommu/platform/520x/config.c
@@ -14,7 +14,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -23,10 +22,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
-/***************************************************************************/
-
 static struct mcf_platform_uart m520x_uart_platform[] = {
 	{
 		.mapbase	= MCF_MBAR + MCFUART_BASE1,
@@ -169,9 +164,17 @@
 
 /***************************************************************************/
 
+static void m520x_cpu_reset(void)
+{
+	local_irq_disable();
+	__raw_writeb(MCF_RCR_SWRESET, MCF_RCR);
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
-	mach_reset = coldfire_reset;
+	mach_reset = m520x_cpu_reset;
 	m520x_uarts_init();
 	m520x_fec_init();
 }
diff --git a/arch/m68knommu/platform/523x/config.c b/arch/m68knommu/platform/523x/config.c
index 74133f2..961fefe 100644
--- a/arch/m68knommu/platform/523x/config.c
+++ b/arch/m68knommu/platform/523x/config.c
@@ -15,7 +15,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -24,10 +23,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
-/***************************************************************************/
-
 static struct mcf_platform_uart m523x_uart_platform[] = {
 	{
 		.mapbase	= MCF_MBAR + MCFUART_BASE1,
@@ -145,13 +140,20 @@
 {
 	/* Everything is auto-vectored on the 523x */
 }
+/***************************************************************************/
+
+static void m523x_cpu_reset(void)
+{
+	local_irq_disable();
+	__raw_writeb(MCF_RCR_SWRESET, MCF_IPSBAR + MCF_RCR);
+}
 
 /***************************************************************************/
 
 void __init config_BSP(char *commandp, int size)
 {
 	mcf_disableall();
-	mach_reset = coldfire_reset;
+	mach_reset = m523x_cpu_reset;
 	m523x_uarts_init();
 	m523x_fec_init();
 }
diff --git a/arch/m68knommu/platform/5249/config.c b/arch/m68knommu/platform/5249/config.c
index 9eab19d..93d9988 100644
--- a/arch/m68knommu/platform/5249/config.c
+++ b/arch/m68knommu/platform/5249/config.c
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -20,10 +19,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
-/***************************************************************************/
-
 static struct mcf_platform_uart m5249_uart_platform[] = {
 	{
 		.mapbase	= MCF_MBAR + MCFUART_BASE1,
@@ -106,10 +101,21 @@
 
 /***************************************************************************/
 
+void m5249_cpu_reset(void)
+{
+	local_irq_disable();
+	/* Set watchdog to soft reset, and enabled */
+	__raw_writeb(0xc0, MCF_MBAR + MCFSIM_SYPCR);
+	for (;;)
+		/* wait for watchdog to timeout */;
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 	mcf_setimr(MCFSIM_IMR_MASKALL);
-	mach_reset = coldfire_reset;
+	mach_reset = m5249_cpu_reset;
 }
 
 /***************************************************************************/
diff --git a/arch/m68knommu/platform/5272/config.c b/arch/m68knommu/platform/5272/config.c
index e049245..5f95fcd 100644
--- a/arch/m68knommu/platform/5272/config.c
+++ b/arch/m68knommu/platform/5272/config.c
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -21,8 +20,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
 extern unsigned int mcf_timervector;
 extern unsigned int mcf_profilevector;
 extern unsigned int mcf_timerlevel;
@@ -170,6 +167,19 @@
 
 /***************************************************************************/
 
+static void m5272_cpu_reset(void)
+{
+	local_irq_disable();
+	/* Set watchdog to reset, and enabled */
+	__raw_writew(0, MCF_MBAR + MCFSIM_WIRR);
+	__raw_writew(1, MCF_MBAR + MCFSIM_WRRR);
+	__raw_writew(0, MCF_MBAR + MCFSIM_WCR);
+	for (;;)
+		/* wait for watchdog to timeout */;
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 #if defined (CONFIG_MOD5272)
@@ -194,7 +204,7 @@
 
 	mcf_timervector = 69;
 	mcf_profilevector = 70;
-	mach_reset = coldfire_reset;
+	mach_reset = m5272_cpu_reset;
 }
 
 /***************************************************************************/
diff --git a/arch/m68knommu/platform/527x/config.c b/arch/m68knommu/platform/527x/config.c
index 428b159..f746439 100644
--- a/arch/m68knommu/platform/527x/config.c
+++ b/arch/m68knommu/platform/527x/config.c
@@ -15,7 +15,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -24,10 +23,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
-/***************************************************************************/
-
 static struct mcf_platform_uart m527x_uart_platform[] = {
 	{
 		.mapbase	= MCF_MBAR + MCFUART_BASE1,
@@ -227,10 +222,18 @@
 
 /***************************************************************************/
 
+static void m527x_cpu_reset(void)
+{
+	local_irq_disable();
+	__raw_writeb(MCF_RCR_SWRESET, MCF_IPSBAR + MCF_RCR);
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 	mcf_disableall();
-	mach_reset = coldfire_reset;
+	mach_reset = m527x_cpu_reset;
 	m527x_uarts_init();
 	m527x_fec_init();
 }
diff --git a/arch/m68knommu/platform/528x/config.c b/arch/m68knommu/platform/528x/config.c
index bee526f..a1d1a61 100644
--- a/arch/m68knommu/platform/528x/config.c
+++ b/arch/m68knommu/platform/528x/config.c
@@ -31,10 +31,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
-/***************************************************************************/
-
 static struct mcf_platform_uart m528x_uart_platform[] = {
 	{
 		.mapbase	= MCF_MBAR + MCFUART_BASE1,
@@ -171,6 +167,14 @@
 
 /***************************************************************************/
 
+static void m528x_cpu_reset(void)
+{
+	local_irq_disable();
+	__raw_writeb(MCF_RCR_SWRESET, MCF_IPSBAR + MCF_RCR);
+}
+
+/***************************************************************************/
+
 #ifdef CONFIG_WILDFIRE
 void wildfire_halt(void)
 {
@@ -214,6 +218,7 @@
 
 static int __init init_BSP(void)
 {
+	mach_reset = m528x_cpu_reset;
 	m528x_uarts_init();
 	m528x_fec_init();
 	platform_add_devices(m528x_devices, ARRAY_SIZE(m528x_devices));
diff --git a/arch/m68knommu/platform/5307/config.c b/arch/m68knommu/platform/5307/config.c
index 44803bf..39da9e9 100644
--- a/arch/m68knommu/platform/5307/config.c
+++ b/arch/m68knommu/platform/5307/config.c
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -22,8 +21,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
 extern unsigned int mcf_timervector;
 extern unsigned int mcf_profilevector;
 extern unsigned int mcf_timerlevel;
@@ -119,6 +116,17 @@
 
 /***************************************************************************/
 
+void m5307_cpu_reset(void)
+{
+	local_irq_disable();
+	/* Set watchdog to soft reset, and enabled */
+	__raw_writeb(0xc0, MCF_MBAR + MCFSIM_SYPCR);
+	for (;;)
+		/* wait for watchdog to timeout */;
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 	mcf_setimr(MCFSIM_IMR_MASKALL);
@@ -134,7 +142,7 @@
 	mcf_timerlevel = 6;
 #endif
 
-	mach_reset = coldfire_reset;
+	mach_reset = m5307_cpu_reset;
 
 #ifdef CONFIG_BDM_DISABLE
 	/*
diff --git a/arch/m68knommu/platform/532x/config.c b/arch/m68knommu/platform/532x/config.c
index 591f2f8..cdb7619 100644
--- a/arch/m68knommu/platform/532x/config.c
+++ b/arch/m68knommu/platform/532x/config.c
@@ -31,8 +31,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
 extern unsigned int mcf_timervector;
 extern unsigned int mcf_profilevector;
 extern unsigned int mcf_timerlevel;
@@ -164,6 +162,14 @@
 
 /***************************************************************************/
 
+static void m532x_cpu_reset(void)
+{
+	local_irq_disable();
+	__raw_writeb(MCF_RCR_SWRESET, MCF_RCR);
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 	mcf_setimr(MCFSIM_IMR_MASKALL);
@@ -181,7 +187,7 @@
 
 	mcf_timervector = 64+32;
 	mcf_profilevector = 64+33;
-	mach_reset = coldfire_reset;
+	mach_reset = m532x_cpu_reset;
 
 #ifdef CONFIG_BDM_DISABLE
 	/*
diff --git a/arch/m68knommu/platform/5407/config.c b/arch/m68knommu/platform/5407/config.c
index 0ee8c1a..b41d942 100644
--- a/arch/m68knommu/platform/5407/config.c
+++ b/arch/m68knommu/platform/5407/config.c
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -21,8 +20,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
 extern unsigned int mcf_timervector;
 extern unsigned int mcf_profilevector;
 extern unsigned int mcf_timerlevel;
@@ -110,6 +107,17 @@
 
 /***************************************************************************/
 
+void m5407_cpu_reset(void)
+{
+	local_irq_disable();
+	/* set watchdog to soft reset, and enabled */
+	__raw_writeb(0xc0, MCF_MBAR + MCFSIM_SYPCR);
+	for (;;)
+		/* wait for watchdog to timeout */;
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 	mcf_setimr(MCFSIM_IMR_MASKALL);
@@ -121,7 +129,7 @@
 	mcf_timerlevel = 6;
 #endif
 
-	mach_reset = coldfire_reset;
+	mach_reset = m5407_cpu_reset;
 }
 
 /***************************************************************************/
diff --git a/arch/m68knommu/platform/coldfire/vectors.c b/arch/m68knommu/platform/coldfire/vectors.c
index 6cf8946..bdca029 100644
--- a/arch/m68knommu/platform/coldfire/vectors.c
+++ b/arch/m68knommu/platform/coldfire/vectors.c
@@ -96,10 +96,3 @@
 }
 
 /***************************************************************************/
-
-void coldfire_reset(void)
-{
-	HARD_RESET_NOW();
-}
-
-/***************************************************************************/
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 8cc312b..b50b845 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -6,6 +6,7 @@
 config MICROBLAZE
 	def_bool y
 	select HAVE_LMB
+	select ARCH_WANT_OPTIONAL_GPIOLIB
 
 config SWAP
 	def_bool n
@@ -49,13 +50,14 @@
 config GENERIC_HARDIRQS_NO__DO_IRQ
 	def_bool y
 
+config GENERIC_GPIO
+	def_bool y
+
 config PCI
-	depends on !MMU
 	def_bool n
 
 config NO_DMA
-	depends on !MMU
-	def_bool n
+	def_bool y
 
 source "init/Kconfig"
 
@@ -72,7 +74,8 @@
 source "kernel/Kconfig.hz"
 
 config MMU
-	def_bool n
+	bool "MMU support"
+	default n
 
 config NO_MMU
 	bool
@@ -105,9 +108,6 @@
 config OF
 	def_bool y
 
-config OF_DEVICE
-	def_bool y
-
 config PROC_DEVICETREE
 	bool "Support for device tree in /proc"
 	depends on PROC_FS
@@ -118,6 +118,113 @@
 
 endmenu
 
+menu "Advanced setup"
+
+config ADVANCED_OPTIONS
+	bool "Prompt for advanced kernel configuration options"
+	depends on MMU
+	help
+	  This option will enable prompting for a variety of advanced kernel
+	  configuration options.  These options can cause the kernel to not
+	  work if they are set incorrectly, but can be used to optimize certain
+	  aspects of kernel memory management.
+
+	  Unless you know what you are doing, say N here.
+
+comment "Default settings for advanced configuration options are used"
+	depends on !ADVANCED_OPTIONS
+
+config HIGHMEM_START_BOOL
+	bool "Set high memory pool address"
+	depends on ADVANCED_OPTIONS && HIGHMEM
+	help
+	  This option allows you to set the base address of the kernel virtual
+	  area used to map high memory pages.  This can be useful in
+	  optimizing the layout of kernel virtual memory.
+
+	  Say N here unless you know what you are doing.
+
+config HIGHMEM_START
+	hex "Virtual start address of high memory pool" if HIGHMEM_START_BOOL
+	depends on MMU
+	default "0xfe000000"
+
+config LOWMEM_SIZE_BOOL
+	bool "Set maximum low memory"
+	depends on ADVANCED_OPTIONS
+	help
+	  This option allows you to set the maximum amount of memory which
+	  will be used as "low memory", that is, memory which the kernel can
+	  access directly, without having to set up a kernel virtual mapping.
+	  This can be useful in optimizing the layout of kernel virtual
+	  memory.
+
+	  Say N here unless you know what you are doing.
+
+config LOWMEM_SIZE
+	hex "Maximum low memory size (in bytes)" if LOWMEM_SIZE_BOOL
+	depends on MMU
+	default "0x30000000"
+
+config KERNEL_START_BOOL
+	bool "Set custom kernel base address"
+	depends on ADVANCED_OPTIONS
+	help
+	  This option allows you to set the kernel virtual address at which
+	  the kernel will map low memory (the kernel image will be linked at
+	  this address).  This can be useful in optimizing the virtual memory
+	  layout of the system.
+
+	  Say N here unless you know what you are doing.
+
+config KERNEL_START
+	hex "Virtual address of kernel base" if KERNEL_START_BOOL
+	default "0xc0000000" if MMU
+	default KERNEL_BASE_ADDR if !MMU
+
+config TASK_SIZE_BOOL
+	bool "Set custom user task size"
+	depends on ADVANCED_OPTIONS
+	help
+	  This option allows you to set the amount of virtual address space
+	  allocated to user tasks.  This can be useful in optimizing the
+	  virtual memory layout of the system.
+
+	  Say N here unless you know what you are doing.
+
+config TASK_SIZE
+	hex "Size of user task space" if TASK_SIZE_BOOL
+	depends on MMU
+	default "0x80000000"
+
+config CONSISTENT_START_BOOL
+	bool "Set custom consistent memory pool address"
+	depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
+	help
+	  This option allows you to set the base virtual address
+	  of the the consistent memory pool.  This pool of virtual
+	  memory is used to make consistent memory allocations.
+
+config CONSISTENT_START
+	hex "Base virtual address of consistent memory pool" if CONSISTENT_START_BOOL
+	depends on MMU
+	default "0xff100000" if NOT_COHERENT_CACHE
+
+config CONSISTENT_SIZE_BOOL
+	bool "Set custom consistent memory pool size"
+	depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
+	help
+	  This option allows you to set the size of the the
+	  consistent memory pool.  This pool of virtual memory
+	  is used to make consistent memory allocations.
+
+config CONSISTENT_SIZE
+	hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL
+	depends on MMU
+	default "0x00200000" if NOT_COHERENT_CACHE
+
+endmenu
+
 source "mm/Kconfig"
 
 menu "Exectuable file formats"
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index aaadfa7..d0bcf80 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -1,4 +1,8 @@
+ifeq ($(CONFIG_MMU),y)
+UTS_SYSNAME = -DUTS_SYSNAME=\"Linux\"
+else
 UTS_SYSNAME = -DUTS_SYSNAME=\"uClinux\"
+endif
 
 # What CPU vesion are we building for, and crack it open
 # as major.minor.rev
@@ -36,6 +40,8 @@
 # r31 holds current when in kernel mode
 CFLAGS_KERNEL += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2)
 
+LDFLAGS		:=
+LDFLAGS_vmlinux	:=
 LDFLAGS_BLOB := --format binary --oformat elf32-microblaze
 
 LIBGCC := $(shell $(CC) $(CFLAGS_KERNEL) -print-libgcc-file-name)
diff --git a/arch/microblaze/boot/Makefile b/arch/microblaze/boot/Makefile
index 844edf4..c2bb043 100644
--- a/arch/microblaze/boot/Makefile
+++ b/arch/microblaze/boot/Makefile
@@ -7,6 +7,8 @@
 OBJCOPYFLAGS_linux.bin  := -O binary
 
 $(obj)/linux.bin: vmlinux FORCE
+	[ -n $(CONFIG_INITRAMFS_SOURCE) ] && [ ! -e $(CONFIG_INITRAMFS_SOURCE) ] && \
+	touch $(CONFIG_INITRAMFS_SOURCE) || echo "No CPIO image"
 	$(call if_changed,objcopy)
 	@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
 
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
new file mode 100644
index 0000000..bd0b85e
--- /dev/null
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -0,0 +1,798 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.30-rc6
+# Fri May 22 10:02:33 2009
+#
+CONFIG_MICROBLAZE=y
+# CONFIG_SWAP is not set
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_TIME=y
+# CONFIG_GENERIC_TIME_VSYSCALL is not set
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=17
+# CONFIG_GROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="rootfs.cpio"
+CONFIG_INITRAMFS_ROOT_UID=0
+CONFIG_INITRAMFS_ROOT_GID=0
+CONFIG_RD_GZIP=y
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+CONFIG_INITRAMFS_COMPRESSION_NONE=y
+# CONFIG_INITRAMFS_COMPRESSION_GZIP is not set
+# CONFIG_INITRAMFS_COMPRESSION_BZIP2 is not set
+# CONFIG_INITRAMFS_COMPRESSION_LZMA is not set
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
+# CONFIG_HOTPLUG is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+# CONFIG_BASE_FULL is not set
+# CONFIG_FUTEX is not set
+# CONFIG_EPOLL is not set
+# CONFIG_SIGNALFD is not set
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+# CONFIG_SHMEM is not set
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+# CONFIG_SLOW_WORK is not set
+# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
+CONFIG_SLABINFO=y
+CONFIG_BASE_SMALL=1
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+CONFIG_DEFAULT_CFQ=y
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
+# CONFIG_FREEZER is not set
+
+#
+# Platform options
+#
+CONFIG_PLATFORM_GENERIC=y
+CONFIG_OPT_LIB_FUNCTION=y
+CONFIG_OPT_LIB_ASM=y
+CONFIG_ALLOW_EDIT_AUTO=y
+
+#
+# Automatic platform settings from Kconfig.auto
+#
+
+#
+# Definitions for MICROBLAZE0
+#
+CONFIG_KERNEL_BASE_ADDR=0x90000000
+CONFIG_XILINX_MICROBLAZE0_FAMILY="virtex5"
+CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR=1
+CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR=1
+CONFIG_XILINX_MICROBLAZE0_USE_BARREL=1
+CONFIG_XILINX_MICROBLAZE0_USE_DIV=1
+CONFIG_XILINX_MICROBLAZE0_USE_HW_MUL=2
+CONFIG_XILINX_MICROBLAZE0_USE_FPU=2
+CONFIG_XILINX_MICROBLAZE0_HW_VER="7.10.d"
+
+#
+# Processor type and features
+#
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=100
+# CONFIG_SCHED_HRTICK is not set
+CONFIG_MMU=y
+
+#
+# Boot options
+#
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyUL0,115200"
+CONFIG_CMDLINE_FORCE=y
+CONFIG_OF=y
+CONFIG_PROC_DEVICETREE=y
+
+#
+# Advanced setup
+#
+# CONFIG_ADVANCED_OPTIONS is not set
+
+#
+# Default settings for advanced configuration options are used
+#
+CONFIG_HIGHMEM_START=0xfe000000
+CONFIG_LOWMEM_SIZE=0x30000000
+CONFIG_KERNEL_START=0xc0000000
+CONFIG_TASK_SIZE=0x80000000
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+
+#
+# Exectuable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_HAVE_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+CONFIG_XFRM=y
+# CONFIG_XFRM_USER is not set
+# CONFIG_XFRM_SUB_POLICY is not set
+# CONFIG_XFRM_MIGRATE is not set
+# CONFIG_XFRM_STATISTICS is not set
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_INET_XFRM_MODE_TRANSPORT=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_INET_XFRM_MODE_BEET=y
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+# CONFIG_MTD is not set
+CONFIG_OF_DEVICE=y
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=8192
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_XILINX_SYSACE is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_93CX6 is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+# CONFIG_PHYLIB is not set
+CONFIG_NET_ETHERNET=y
+# CONFIG_MII is not set
+# CONFIG_ETHOC is not set
+# CONFIG_DNET is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+# CONFIG_B44 is not set
+CONFIG_NETDEV_1000=y
+CONFIG_NETDEV_10000=y
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_UARTLITE=y
+CONFIG_SERIAL_UARTLITE_CONSOLE=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_RTC is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_XILINX_HWICAP is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+# CONFIG_SPI is not set
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+# CONFIG_GPIOLIB is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_REGULATOR is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+# CONFIG_SOUND is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_RTC_CLASS is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+# CONFIG_PROC_KCORE is not set
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFSD is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+CONFIG_CIFS=y
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_STATS2=y
+# CONFIG_CIFS_WEAK_PW_HASH is not set
+# CONFIG_CIFS_XATTR is not set
+# CONFIG_CIFS_DEBUG2 is not set
+# CONFIG_CIFS_EXPERIMENTAL is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_KARMA_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+# CONFIG_SYSV68_PARTITION is not set
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+# CONFIG_NLS_CODEPAGE_437 is not set
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+# CONFIG_NLS_ISO8859_1 is not set
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
+CONFIG_SCHED_DEBUG=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+CONFIG_DEBUG_SLAB=y
+# CONFIG_DEBUG_SLAB_LEAK is not set
+CONFIG_DEBUG_SPINLOCK=y
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_SYSCTL_SYSCALL_CHECK is not set
+# CONFIG_PAGE_POISONING is not set
+# CONFIG_SAMPLES is not set
+CONFIG_EARLY_PRINTK=y
+CONFIG_HEART_BEAT=y
+CONFIG_DEBUG_BOOTMEM=y
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+# CONFIG_CRYPTO_MANAGER is not set
+# CONFIG_CRYPTO_MANAGER2 is not set
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+# CONFIG_CRYPTO_CBC is not set
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+# CONFIG_CRYPTO_MD5 is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
+# CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_HAVE_LMB=y
+CONFIG_NLATTR=y
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 31820df..db5294c 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -1,26 +1,3 @@
 include include/asm-generic/Kbuild.asm
 
-header-y += auxvec.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += linkage.h
-header-y += msgbuf.h
-header-y += poll.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += socket.h
-header-y += sockios.h
-header-y += statfs.h
-header-y += stat.h
-header-y += termbits.h
-header-y += ucontext.h
-
-unifdef-y += cputable.h
-unifdef-y += elf.h
-unifdef-y += termios.h
+header-y  += elf.h
diff --git a/arch/microblaze/include/asm/cacheflush.h b/arch/microblaze/include/asm/cacheflush.h
index 3300b78..f989d6a 100644
--- a/arch/microblaze/include/asm/cacheflush.h
+++ b/arch/microblaze/include/asm/cacheflush.h
@@ -1,5 +1,6 @@
 /*
- * Copyright (C) 2007 PetaLogix
+ * Copyright (C) 2007-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2007-2009 PetaLogix
  * Copyright (C) 2007 John Williams <john.williams@petalogix.com>
  * based on v850 version which was
  * Copyright (C) 2001,02,03 NEC Electronics Corporation
@@ -43,6 +44,23 @@
 #define flush_icache_range(start, len)	__invalidate_icache_range(start, len)
 #define flush_icache_page(vma, pg)		do { } while (0)
 
+#ifndef CONFIG_MMU
+# define flush_icache_user_range(start, len)	do { } while (0)
+#else
+# define flush_icache_user_range(vma, pg, adr, len) __invalidate_icache_all()
+
+# define flush_page_to_ram(page)		do { } while (0)
+
+# define flush_icache()			__invalidate_icache_all()
+# define flush_cache_sigtramp(vaddr) \
+			__invalidate_icache_range(vaddr, vaddr + 8)
+
+# define flush_dcache_mmap_lock(mapping)	do { } while (0)
+# define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+
+# define flush_cache_dup_mm(mm)			do { } while (0)
+#endif
+
 #define flush_cache_vmap(start, end)		do { } while (0)
 #define flush_cache_vunmap(start, end)		do { } while (0)
 
diff --git a/arch/microblaze/include/asm/checksum.h b/arch/microblaze/include/asm/checksum.h
index 92b3076..97ea46b 100644
--- a/arch/microblaze/include/asm/checksum.h
+++ b/arch/microblaze/include/asm/checksum.h
@@ -51,7 +51,8 @@
  * here even more important to align src and dst on a 32-bit (or even
  * better 64-bit) boundary
  */
-extern __wsum csum_partial_copy(const char *src, char *dst, int len, int sum);
+extern __wsum csum_partial_copy(const void *src, void *dst, int len,
+								__wsum sum);
 
 /*
  * the same as csum_partial_copy, but copies from user space.
@@ -59,8 +60,8 @@
  * here even more important to align src and dst on a 32-bit (or even
  * better 64-bit) boundary
  */
-extern __wsum csum_partial_copy_from_user(const char *src, char *dst,
-					int len, int sum, int *csum_err);
+extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+					int len, __wsum sum, int *csum_err);
 
 #define csum_partial_copy_nocheck(src, dst, len, sum)	\
 	csum_partial_copy((src), (dst), (len), (sum))
@@ -75,11 +76,12 @@
 /*
  *	Fold a partial checksum
  */
-static inline __sum16 csum_fold(unsigned int sum)
+static inline __sum16 csum_fold(__wsum csum)
 {
+	u32 sum = (__force u32)csum;
 	sum = (sum & 0xffff) + (sum >> 16);
 	sum = (sum & 0xffff) + (sum >> 16);
-	return ~sum;
+	return (__force __sum16)~sum;
 }
 
 static inline __sum16
@@ -93,6 +95,6 @@
  * this routine is used for miscellaneous IP-like checksums, mainly
  * in icmp.c
  */
-extern __sum16 ip_compute_csum(const unsigned char *buff, int len);
+extern __sum16 ip_compute_csum(const void *buff, int len);
 
 #endif /* _ASM_MICROBLAZE_CHECKSUM_H */
diff --git a/arch/microblaze/include/asm/current.h b/arch/microblaze/include/asm/current.h
index 8375ea9..29303ed 100644
--- a/arch/microblaze/include/asm/current.h
+++ b/arch/microblaze/include/asm/current.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -9,6 +11,12 @@
 #ifndef _ASM_MICROBLAZE_CURRENT_H
 #define _ASM_MICROBLAZE_CURRENT_H
 
+/*
+ * Register used to hold the current task pointer while in the kernel.
+ * Any `call clobbered' register without a special meaning should be OK,
+ * but check asm/microblaze/kernel/entry.S to be sure.
+ */
+#define CURRENT_TASK	r31
 # ifndef __ASSEMBLY__
 /*
  * Dedicate r31 to keeping the current task pointer
diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h
index 1733625..d00e400 100644
--- a/arch/microblaze/include/asm/dma-mapping.h
+++ b/arch/microblaze/include/asm/dma-mapping.h
@@ -1,129 +1 @@
-/*
- * Copyright (C) 2006 Atmark Techno, Inc.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef _ASM_MICROBLAZE_DMA_MAPPING_H
-#define _ASM_MICROBLAZE_DMA_MAPPING_H
-
-#include <asm/cacheflush.h>
-#include <linux/io.h>
-#include <linux/bug.h>
-
-struct scatterlist;
-
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
-/* FIXME */
-static inline int
-dma_supported(struct device *dev, u64 mask)
-{
-	return 1;
-}
-
-static inline dma_addr_t
-dma_map_page(struct device *dev, struct page *page,
-	unsigned long offset, size_t size,
-	enum dma_data_direction direction)
-{
-	BUG();
-	return 0;
-}
-
-static inline void
-dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
-	enum dma_data_direction direction)
-{
-	BUG();
-}
-
-static inline int
-dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-	enum dma_data_direction direction)
-{
-	BUG();
-	return 0;
-}
-
-static inline void
-dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
-	enum dma_data_direction direction)
-{
-	BUG();
-}
-
-static inline void
-dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
-			enum dma_data_direction direction)
-{
-	BUG();
-}
-
-static inline void
-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
-		size_t size, enum dma_data_direction direction)
-{
-	BUG();
-}
-
-static inline void
-dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
-		enum dma_data_direction direction)
-{
-	BUG();
-}
-
-static inline void
-dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
-		enum dma_data_direction direction)
-{
-	BUG();
-}
-
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-	return 0;
-}
-
-static inline void *dma_alloc_coherent(struct device *dev, size_t size,
-				dma_addr_t *dma_handle, int flag)
-{
-	return NULL; /* consistent_alloc(flag, size, dma_handle); */
-}
-
-static inline void dma_free_coherent(struct device *dev, size_t size,
-			void *vaddr, dma_addr_t dma_handle)
-{
-	BUG();
-}
-
-static inline dma_addr_t
-dma_map_single(struct device *dev, void *ptr, size_t size,
-	enum dma_data_direction direction)
-{
-	BUG_ON(direction == DMA_NONE);
-
-	return virt_to_bus(ptr);
-}
-
-static inline void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
-				    size_t size,
-				    enum dma_data_direction direction)
-{
-	switch (direction) {
-	case DMA_FROM_DEVICE:
-		flush_dcache_range((unsigned)dma_addr,
-			(unsigned)dma_addr + size);
-			/* Fall through */
-	case DMA_TO_DEVICE:
-		break;
-	default:
-		BUG();
-	}
-}
-
-#endif /* _ASM_MICROBLAZE_DMA_MAPPING_H */
+#include <asm-generic/dma-mapping-broken.h>
diff --git a/arch/microblaze/include/asm/dma.h b/arch/microblaze/include/asm/dma.h
index 0967fa0..08c073b 100644
--- a/arch/microblaze/include/asm/dma.h
+++ b/arch/microblaze/include/asm/dma.h
@@ -9,8 +9,13 @@
 #ifndef _ASM_MICROBLAZE_DMA_H
 #define _ASM_MICROBLAZE_DMA_H
 
+#ifndef CONFIG_MMU
 /* we don't have dma address limit. define it as zero to be
  * unlimited. */
 #define MAX_DMA_ADDRESS		(0)
+#else
+/* Virtual address corresponding to last available physical memory address.  */
+#define MAX_DMA_ADDRESS (CONFIG_KERNEL_START + memory_size - 1)
+#endif
 
 #endif /* _ASM_MICROBLAZE_DMA_H */
diff --git a/arch/microblaze/include/asm/elf.h b/arch/microblaze/include/asm/elf.h
index 81337f2..f92fc0d 100644
--- a/arch/microblaze/include/asm/elf.h
+++ b/arch/microblaze/include/asm/elf.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -27,4 +29,95 @@
  */
 #define ELF_CLASS	ELFCLASS32
 
+#ifndef __uClinux__
+
+/*
+ * ELF register definitions..
+ */
+
+#include <asm/ptrace.h>
+#include <asm/byteorder.h>
+
+#ifndef ELF_GREG_T
+#define ELF_GREG_T
+typedef unsigned long elf_greg_t;
+#endif
+
+#ifndef ELF_NGREG
+#define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t))
+#endif
+
+#ifndef ELF_GREGSET_T
+#define ELF_GREGSET_T
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+#endif
+
+#ifndef ELF_FPREGSET_T
+#define ELF_FPREGSET_T
+
+/* TBD */
+#define ELF_NFPREG	33	/* includes fsr */
+typedef unsigned long elf_fpreg_t;
+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+
+/* typedef struct user_fpu_struct elf_fpregset_t; */
+#endif
+
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+ * use of this is to invoke "./ld.so someprog" to test out a new version of
+ * the loader.  We need to make sure that it is out of the way of the program
+ * that it will "exec", and that there is sufficient room for the brk.
+ */
+
+#define ELF_ET_DYN_BASE         (0x08000000)
+
+#ifdef __LITTLE_ENDIAN__
+#define ELF_DATA	ELFDATA2LSB
+#else
+#define ELF_DATA	ELFDATA2MSB
+#endif
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE	4096
+
+
+#define ELF_CORE_COPY_REGS(_dest, _regs)			\
+	memcpy((char *) &_dest, (char *) _regs,		\
+	sizeof(struct pt_regs));
+
+/* This yields a mask that user programs can use to figure out what
+ * instruction set this CPU supports.  This could be done in user space,
+ * but it's not easy, and we've already done it here.
+ */
+#define ELF_HWCAP	(0)
+
+/* This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization.  This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+
+ * For the moment, we have only optimizations for the Intel generations,
+ * but that could change...
+ */
+#define ELF_PLATFORM  (NULL)
+
+/* Added _f parameter. Is this definition correct: TBD */
+#define ELF_PLAT_INIT(_r, _f)				\
+do {							\
+	_r->r1 =  _r->r1 =  _r->r2 =  _r->r3 =		\
+	_r->r4 =  _r->r5 =  _r->r6 =  _r->r7 =		\
+	_r->r8 =  _r->r9 =  _r->r10 = _r->r11 =		\
+	_r->r12 = _r->r13 = _r->r14 = _r->r15 =		\
+	_r->r16 = _r->r17 = _r->r18 = _r->r19 =		\
+	_r->r20 = _r->r21 = _r->r22 = _r->r23 =		\
+	_r->r24 = _r->r25 = _r->r26 = _r->r27 =		\
+	_r->r28 = _r->r29 = _r->r30 = _r->r31 =		\
+	0;						\
+} while (0)
+
+#ifdef __KERNEL__
+#define SET_PERSONALITY(ex) set_personality(PER_LINUX_32BIT)
+#endif
+
+#endif /* __uClinux__ */
+
 #endif /* _ASM_MICROBLAZE_ELF_H */
diff --git a/arch/microblaze/include/asm/entry.h b/arch/microblaze/include/asm/entry.h
index e4c3aef..61abbd2 100644
--- a/arch/microblaze/include/asm/entry.h
+++ b/arch/microblaze/include/asm/entry.h
@@ -1,8 +1,8 @@
 /*
  * Definitions used by low-level trap handlers
  *
- * Copyright (C) 2008 Michal Simek
- * Copyright (C) 2007 - 2008 PetaLogix
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2007-2009 PetaLogix
  * Copyright (C) 2007 John Williams <john.williams@petalogix.com>
  *
  * This file is subject to the terms and conditions of the GNU General
@@ -31,7 +31,40 @@
 DECLARE_PER_CPU(unsigned int, CURRENT_SAVE); /* Saved current pointer */
 # endif /* __ASSEMBLY__ */
 
+#ifndef CONFIG_MMU
+
 /* noMMU hasn't any space for args */
 # define STATE_SAVE_ARG_SPACE	(0)
 
+#else /* CONFIG_MMU */
+
+/* If true, system calls save and restore all registers (except result
+ * registers, of course).  If false, then `call clobbered' registers
+ * will not be preserved, on the theory that system calls are basically
+ * function calls anyway, and the caller should be able to deal with it.
+ * This is a security risk, of course, as `internal' values may leak out
+ * after a system call, but that certainly doesn't matter very much for
+ * a processor with no MMU protection!  For a protected-mode kernel, it
+ * would be faster to just zero those registers before returning.
+ *
+ * I can not rely on the glibc implementation. If you turn it off make
+ * sure that r11/r12 is saved in user-space. --KAA
+ *
+ * These are special variables using by the kernel trap/interrupt code
+ * to save registers in, at a time when there are no spare registers we
+ * can use to do so, and we can't depend on the value of the stack
+ * pointer.  This means that they must be within a signed 16-bit
+ * displacement of 0x00000000.
+ */
+
+/* A `state save frame' is a struct pt_regs preceded by some extra space
+ * suitable for a function call stack frame. */
+
+/* Amount of room on the stack reserved for arguments and to satisfy the
+ * C calling conventions, in addition to the space used by the struct
+ * pt_regs that actually holds saved values. */
+#define STATE_SAVE_ARG_SPACE	(6*4) /* Up to six arguments */
+
+#endif /* CONFIG_MMU */
+
 #endif /* _ASM_MICROBLAZE_ENTRY_H */
diff --git a/arch/microblaze/include/asm/exceptions.h b/arch/microblaze/include/asm/exceptions.h
index 24ca540..90731df 100644
--- a/arch/microblaze/include/asm/exceptions.h
+++ b/arch/microblaze/include/asm/exceptions.h
@@ -1,8 +1,8 @@
 /*
  * Preliminary support for HW exception handing for Microblaze
  *
- * Copyright (C) 2008 Michal Simek
- * Copyright (C) 2008 PetaLogix
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2005 John Williams <jwilliams@itee.uq.edu.au>
  *
  * This file is subject to the terms and conditions of the GNU General
@@ -64,21 +64,13 @@
 void die(const char *str, struct pt_regs *fp, long err);
 void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr);
 
-#if defined(CONFIG_XMON)
-extern void xmon(struct pt_regs *regs);
-extern int xmon_bpt(struct pt_regs *regs);
-extern int xmon_sstep(struct pt_regs *regs);
-extern int xmon_iabr_match(struct pt_regs *regs);
-extern int xmon_dabr_match(struct pt_regs *regs);
-extern void (*xmon_fault_handler)(struct pt_regs *regs);
+#ifdef CONFIG_MMU
+void __bug(const char *file, int line, void *data);
+int bad_trap(int trap_num, struct pt_regs *regs);
+int debug_trap(struct pt_regs *regs);
+#endif /* CONFIG_MMU */
 
-void (*debugger)(struct pt_regs *regs) = xmon;
-int (*debugger_bpt)(struct pt_regs *regs) = xmon_bpt;
-int (*debugger_sstep)(struct pt_regs *regs) = xmon_sstep;
-int (*debugger_iabr_match)(struct pt_regs *regs) = xmon_iabr_match;
-int (*debugger_dabr_match)(struct pt_regs *regs) = xmon_dabr_match;
-void (*debugger_fault_handler)(struct pt_regs *regs);
-#elif defined(CONFIG_KGDB)
+#if defined(CONFIG_KGDB)
 void (*debugger)(struct pt_regs *regs);
 int (*debugger_bpt)(struct pt_regs *regs);
 int (*debugger_sstep)(struct pt_regs *regs);
diff --git a/arch/microblaze/include/asm/flat.h b/arch/microblaze/include/asm/flat.h
index acf0da5..6847c15 100644
--- a/arch/microblaze/include/asm/flat.h
+++ b/arch/microblaze/include/asm/flat.h
@@ -13,7 +13,6 @@
 
 #include <asm/unaligned.h>
 
-#define	flat_stack_align(sp) /* nothing needed */
 #define	flat_argvp_envp_on_stack()	0
 #define	flat_old_ram_flag(flags)	(flags)
 #define	flat_reloc_valid(reloc, size)	((reloc) <= (size))
diff --git a/arch/microblaze/include/asm/gpio.h b/arch/microblaze/include/asm/gpio.h
index ea04632..2345ac3 100644
--- a/arch/microblaze/include/asm/gpio.h
+++ b/arch/microblaze/include/asm/gpio.h
@@ -11,8 +11,8 @@
  * (at your option) any later version.
  */
 
-#ifndef __ASM_POWERPC_GPIO_H
-#define __ASM_POWERPC_GPIO_H
+#ifndef _ASM_MICROBLAZE_GPIO_H
+#define _ASM_MICROBLAZE_GPIO_H
 
 #include <linux/errno.h>
 #include <asm-generic/gpio.h>
@@ -53,4 +53,4 @@
 
 #endif /* CONFIG_GPIOLIB */
 
-#endif /* __ASM_POWERPC_GPIO_H */
+#endif /* _ASM_MICROBLAZE_GPIO_H */
diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h
index 8b5853e..5c17342 100644
--- a/arch/microblaze/include/asm/io.h
+++ b/arch/microblaze/include/asm/io.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2007-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2007-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -12,6 +14,9 @@
 #include <asm/byteorder.h>
 #include <asm/page.h>
 #include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/mm.h>          /* Get struct page {...} */
+
 
 #define IO_SPACE_LIMIT (0xFFFFFFFF)
 
@@ -112,6 +117,30 @@
 #define memcpy_fromio(a, b, c)	memcpy((a), (void *)(b), (c))
 #define memcpy_toio(a, b, c)	memcpy((void *)(a), (b), (c))
 
+#ifdef CONFIG_MMU
+
+#define mm_ptov(addr)		((void *)__phys_to_virt(addr))
+#define mm_vtop(addr)		((unsigned long)__virt_to_phys(addr))
+#define phys_to_virt(addr)	((void *)__phys_to_virt(addr))
+#define virt_to_phys(addr)	((unsigned long)__virt_to_phys(addr))
+#define virt_to_bus(addr)	((unsigned long)__virt_to_phys(addr))
+
+#define __page_address(page) \
+		(PAGE_OFFSET + (((page) - mem_map) << PAGE_SHIFT))
+#define page_to_phys(page)	virt_to_phys((void *)__page_address(page))
+#define page_to_bus(page)	(page_to_phys(page))
+#define bus_to_virt(addr)	(phys_to_virt(addr))
+
+extern void iounmap(void *addr);
+/*extern void *__ioremap(phys_addr_t address, unsigned long size,
+		unsigned long flags);*/
+extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
+#define ioremap_writethrough(addr, size) ioremap((addr), (size))
+#define ioremap_nocache(addr, size)      ioremap((addr), (size))
+#define ioremap_fullcache(addr, size)    ioremap((addr), (size))
+
+#else /* CONFIG_MMU */
+
 /**
  *	virt_to_phys - map virtual addresses to physical
  *	@address: address to remap
@@ -160,6 +189,8 @@
 #define iounmap(addr)		((void)0)
 #define ioremap_nocache(physaddr, size)	ioremap(physaddr, size)
 
+#endif /* CONFIG_MMU */
+
 /*
  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
  * access
diff --git a/arch/microblaze/include/asm/mmu.h b/arch/microblaze/include/asm/mmu.h
index 0e0431d..66cad6a 100644
--- a/arch/microblaze/include/asm/mmu.h
+++ b/arch/microblaze/include/asm/mmu.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -9,11 +11,109 @@
 #ifndef _ASM_MICROBLAZE_MMU_H
 #define _ASM_MICROBLAZE_MMU_H
 
-#ifndef __ASSEMBLY__
+# ifndef CONFIG_MMU
+#  ifndef __ASSEMBLY__
 typedef struct {
 	struct vm_list_struct	*vmlist;
 	unsigned long		end_brk;
 } mm_context_t;
-#endif /* __ASSEMBLY__ */
+#  endif /* __ASSEMBLY__ */
+# else /* CONFIG_MMU */
+#  ifdef __KERNEL__
+#   ifndef __ASSEMBLY__
 
+/* Default "unsigned long" context */
+typedef unsigned long mm_context_t;
+
+/* Hardware Page Table Entry */
+typedef struct _PTE {
+	unsigned long    v:1;	/* Entry is valid */
+	unsigned long vsid:24;	/* Virtual segment identifier */
+	unsigned long    h:1;	/* Hash algorithm indicator */
+	unsigned long  api:6;	/* Abbreviated page index */
+	unsigned long  rpn:20;	/* Real (physical) page number */
+	unsigned long     :3;	/* Unused */
+	unsigned long    r:1;	/* Referenced */
+	unsigned long    c:1;	/* Changed */
+	unsigned long    w:1;	/* Write-thru cache mode */
+	unsigned long    i:1;	/* Cache inhibited */
+	unsigned long    m:1;	/* Memory coherence */
+	unsigned long    g:1;	/* Guarded */
+	unsigned long     :1;	/* Unused */
+	unsigned long   pp:2;	/* Page protection */
+} PTE;
+
+/* Values for PP (assumes Ks=0, Kp=1) */
+#  define PP_RWXX	0 /* Supervisor read/write, User none */
+#  define PP_RWRX	1 /* Supervisor read/write, User read */
+#  define PP_RWRW	2 /* Supervisor read/write, User read/write */
+#  define PP_RXRX	3 /* Supervisor read,       User read */
+
+/* Segment Register */
+typedef struct _SEGREG {
+	unsigned long    t:1;	/* Normal or I/O  type */
+	unsigned long   ks:1;	/* Supervisor 'key' (normally 0) */
+	unsigned long   kp:1;	/* User 'key' (normally 1) */
+	unsigned long    n:1;	/* No-execute */
+	unsigned long     :4;	/* Unused */
+	unsigned long vsid:24;	/* Virtual Segment Identifier */
+} SEGREG;
+
+extern void _tlbie(unsigned long va);	/* invalidate a TLB entry */
+extern void _tlbia(void);		/* invalidate all TLB entries */
+#   endif /* __ASSEMBLY__ */
+
+/*
+ * The MicroBlaze processor has a TLB architecture identical to PPC-40x. The
+ * instruction and data sides share a unified, 64-entry, semi-associative
+ * TLB which is maintained totally under software control. In addition, the
+ * instruction side has a hardware-managed, 2,4, or 8-entry, fully-associative
+ * TLB which serves as a first level to the shared TLB. These two TLBs are
+ * known as the UTLB and ITLB, respectively.
+ */
+
+#  define MICROBLAZE_TLB_SIZE 64
+
+/*
+ * TLB entries are defined by a "high" tag portion and a "low" data
+ * portion. The data portion is 32-bits.
+ *
+ * TLB entries are managed entirely under software control by reading,
+ * writing, and searching using the MTS and MFS instructions.
+ */
+
+#  define TLB_LO		1
+#  define TLB_HI		0
+#  define TLB_DATA		TLB_LO
+#  define TLB_TAG		TLB_HI
+
+/* Tag portion */
+#  define TLB_EPN_MASK		0xFFFFFC00 /* Effective Page Number */
+#  define TLB_PAGESZ_MASK	0x00000380
+#  define TLB_PAGESZ(x)		(((x) & 0x7) << 7)
+#  define PAGESZ_1K		0
+#  define PAGESZ_4K		1
+#  define PAGESZ_16K		2
+#  define PAGESZ_64K		3
+#  define PAGESZ_256K		4
+#  define PAGESZ_1M		5
+#  define PAGESZ_4M		6
+#  define PAGESZ_16M		7
+#  define TLB_VALID		0x00000040 /* Entry is valid */
+
+/* Data portion */
+#  define TLB_RPN_MASK		0xFFFFFC00 /* Real Page Number */
+#  define TLB_PERM_MASK		0x00000300
+#  define TLB_EX		0x00000200 /* Instruction execution allowed */
+#  define TLB_WR		0x00000100 /* Writes permitted */
+#  define TLB_ZSEL_MASK		0x000000F0
+#  define TLB_ZSEL(x)		(((x) & 0xF) << 4)
+#  define TLB_ATTR_MASK		0x0000000F
+#  define TLB_W			0x00000008 /* Caching is write-through */
+#  define TLB_I			0x00000004 /* Caching is inhibited */
+#  define TLB_M			0x00000002 /* Memory is coherent */
+#  define TLB_G			0x00000001 /* Memory is guarded from prefetch */
+
+#  endif /* __KERNEL__ */
+# endif /* CONFIG_MMU */
 #endif /* _ASM_MICROBLAZE_MMU_H */
diff --git a/arch/microblaze/include/asm/mmu_context.h b/arch/microblaze/include/asm/mmu_context.h
index 150ca01..385fed1 100644
--- a/arch/microblaze/include/asm/mmu_context.h
+++ b/arch/microblaze/include/asm/mmu_context.h
@@ -1,21 +1,5 @@
-/*
- * Copyright (C) 2006 Atmark Techno, Inc.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef _ASM_MICROBLAZE_MMU_CONTEXT_H
-#define _ASM_MICROBLAZE_MMU_CONTEXT_H
-
-# define init_new_context(tsk, mm)		({ 0; })
-
-# define enter_lazy_tlb(mm, tsk)		do {} while (0)
-# define change_mm_context(old, ctx, _pml4)	do {} while (0)
-# define destroy_context(mm)			do {} while (0)
-# define deactivate_mm(tsk, mm)			do {} while (0)
-# define switch_mm(prev, next, tsk)		do {} while (0)
-# define activate_mm(prev, next)		do {} while (0)
-
-#endif /* _ASM_MICROBLAZE_MMU_CONTEXT_H */
+#ifdef CONFIG_MMU
+# include "mmu_context_mm.h"
+#else
+# include "mmu_context_no.h"
+#endif
diff --git a/arch/microblaze/include/asm/mmu_context_mm.h b/arch/microblaze/include/asm/mmu_context_mm.h
new file mode 100644
index 0000000..3e5c254
--- /dev/null
+++ b/arch/microblaze/include/asm/mmu_context_mm.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
+ * Copyright (C) 2006 Atmark Techno, Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef _ASM_MICROBLAZE_MMU_CONTEXT_H
+#define _ASM_MICROBLAZE_MMU_CONTEXT_H
+
+#include <asm/atomic.h>
+#include <asm/bitops.h>
+#include <asm/mmu.h>
+#include <asm-generic/mm_hooks.h>
+
+# ifdef __KERNEL__
+/*
+ * This function defines the mapping from contexts to VSIDs (virtual
+ * segment IDs).  We use a skew on both the context and the high 4 bits
+ * of the 32-bit virtual address (the "effective segment ID") in order
+ * to spread out the entries in the MMU hash table.
+ */
+# define CTX_TO_VSID(ctx, va)	(((ctx) * (897 * 16) + ((va) >> 28) * 0x111) \
+				 & 0xffffff)
+
+/*
+   MicroBlaze has 256 contexts, so we can just rotate through these
+   as a way of "switching" contexts.  If the TID of the TLB is zero,
+   the PID/TID comparison is disabled, so we can use a TID of zero
+   to represent all kernel pages as shared among all contexts.
+ */
+
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+}
+
+# define NO_CONTEXT	256
+# define LAST_CONTEXT	255
+# define FIRST_CONTEXT	1
+
+/*
+ * Set the current MMU context.
+ * This is done byloading up the segment registers for the user part of the
+ * address space.
+ *
+ * Since the PGD is immediately available, it is much faster to simply
+ * pass this along as a second parameter, which is required for 8xx and
+ * can be used for debugging on all processors (if you happen to have
+ * an Abatron).
+ */
+extern void set_context(mm_context_t context, pgd_t *pgd);
+
+/*
+ * Bitmap of contexts in use.
+ * The size of this bitmap is LAST_CONTEXT + 1 bits.
+ */
+extern unsigned long context_map[];
+
+/*
+ * This caches the next context number that we expect to be free.
+ * Its use is an optimization only, we can't rely on this context
+ * number to be free, but it usually will be.
+ */
+extern mm_context_t next_mmu_context;
+
+/*
+ * Since we don't have sufficient contexts to give one to every task
+ * that could be in the system, we need to be able to steal contexts.
+ * These variables support that.
+ */
+extern atomic_t nr_free_contexts;
+extern struct mm_struct *context_mm[LAST_CONTEXT+1];
+extern void steal_context(void);
+
+/*
+ * Get a new mmu context for the address space described by `mm'.
+ */
+static inline void get_mmu_context(struct mm_struct *mm)
+{
+	mm_context_t ctx;
+
+	if (mm->context != NO_CONTEXT)
+		return;
+	while (atomic_dec_if_positive(&nr_free_contexts) < 0)
+		steal_context();
+	ctx = next_mmu_context;
+	while (test_and_set_bit(ctx, context_map)) {
+		ctx = find_next_zero_bit(context_map, LAST_CONTEXT+1, ctx);
+		if (ctx > LAST_CONTEXT)
+			ctx = 0;
+	}
+	next_mmu_context = (ctx + 1) & LAST_CONTEXT;
+	mm->context = ctx;
+	context_mm[ctx] = mm;
+}
+
+/*
+ * Set up the context for a new address space.
+ */
+# define init_new_context(tsk, mm)	(((mm)->context = NO_CONTEXT), 0)
+
+/*
+ * We're finished using the context for an address space.
+ */
+static inline void destroy_context(struct mm_struct *mm)
+{
+	if (mm->context != NO_CONTEXT) {
+		clear_bit(mm->context, context_map);
+		mm->context = NO_CONTEXT;
+		atomic_inc(&nr_free_contexts);
+	}
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+			     struct task_struct *tsk)
+{
+	tsk->thread.pgdir = next->pgd;
+	get_mmu_context(next);
+	set_context(next->context, next->pgd);
+}
+
+/*
+ * After we have set current->mm to a new value, this activates
+ * the context for the new mm so we see the new mappings.
+ */
+static inline void activate_mm(struct mm_struct *active_mm,
+			struct mm_struct *mm)
+{
+	current->thread.pgdir = mm->pgd;
+	get_mmu_context(mm);
+	set_context(mm->context, mm->pgd);
+}
+
+extern void mmu_context_init(void);
+
+# endif /* __KERNEL__ */
+#endif /* _ASM_MICROBLAZE_MMU_CONTEXT_H */
diff --git a/arch/microblaze/include/asm/mmu_context_no.h b/arch/microblaze/include/asm/mmu_context_no.h
new file mode 100644
index 0000000..ba55671
--- /dev/null
+++ b/arch/microblaze/include/asm/mmu_context_no.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
+ * Copyright (C) 2006 Atmark Techno, Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef _ASM_MICROBLAZE_MMU_CONTEXT_H
+#define _ASM_MICROBLAZE_MMU_CONTEXT_H
+
+# define init_new_context(tsk, mm)		({ 0; })
+
+# define enter_lazy_tlb(mm, tsk)		do {} while (0)
+# define change_mm_context(old, ctx, _pml4)	do {} while (0)
+# define destroy_context(mm)			do {} while (0)
+# define deactivate_mm(tsk, mm)			do {} while (0)
+# define switch_mm(prev, next, tsk)		do {} while (0)
+# define activate_mm(prev, next)		do {} while (0)
+
+#endif /* _ASM_MICROBLAZE_MMU_CONTEXT_H */
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index 7238dcf..210e584 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -1,6 +1,8 @@
 /*
- * Copyright (C) 2008 Michal Simek
- * Copyright (C) 2008 PetaLogix
+ * VM ops
+ *
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  * Changes for MMU support:
  *    Copyright (C) 2007 Xilinx, Inc.  All rights reserved.
@@ -15,14 +17,15 @@
 
 #include <linux/pfn.h>
 #include <asm/setup.h>
+#include <linux/const.h>
+
+#ifdef __KERNEL__
 
 /* PAGE_SHIFT determines the page size */
 #define PAGE_SHIFT	(12)
-#define PAGE_SIZE	(1UL << PAGE_SHIFT)
+#define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 
-#ifdef __KERNEL__
-
 #ifndef __ASSEMBLY__
 
 #define PAGE_UP(addr)	(((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1)))
@@ -35,6 +38,7 @@
 /* align addr on a size boundary - adjust address up if needed */
 #define _ALIGN(addr, size)	_ALIGN_UP(addr, size)
 
+#ifndef CONFIG_MMU
 /*
  * PAGE_OFFSET -- the first address of the first page of memory. When not
  * using MMU this corresponds to the first free page in physical memory (aligned
@@ -43,15 +47,44 @@
 extern unsigned int __page_offset;
 #define PAGE_OFFSET __page_offset
 
-#define copy_page(to, from)			memcpy((to), (from), PAGE_SIZE)
-#define get_user_page(vaddr)			__get_free_page(GFP_KERNEL)
-#define free_user_page(page, addr)		free_page(addr)
+#else /* CONFIG_MMU */
 
-#define clear_page(pgaddr)			memset((pgaddr), 0, PAGE_SIZE)
+/*
+ * PAGE_OFFSET -- the first address of the first page of memory. With MMU
+ * it is set to the kernel start address (aligned on a page boundary).
+ *
+ * CONFIG_KERNEL_START is defined in arch/microblaze/config.in and used
+ * in arch/microblaze/Makefile.
+ */
+#define PAGE_OFFSET	CONFIG_KERNEL_START
 
+/*
+ * MAP_NR -- given an address, calculate the index of the page struct which
+ * points to the address's page.
+ */
+#define MAP_NR(addr) (((unsigned long)(addr) - PAGE_OFFSET) >> PAGE_SHIFT)
 
-#define clear_user_page(pgaddr, vaddr, page)	memset((pgaddr), 0, PAGE_SIZE)
-#define copy_user_page(vto, vfrom, vaddr, topg) \
+/*
+ * The basic type of a PTE - 32 bit physical addressing.
+ */
+typedef unsigned long pte_basic_t;
+#define PTE_SHIFT	(PAGE_SHIFT - 2)	/* 1024 ptes per page */
+#define PTE_FMT		"%.8lx"
+
+#endif /* CONFIG_MMU */
+
+#  ifndef CONFIG_MMU
+#  define copy_page(to, from)			memcpy((to), (from), PAGE_SIZE)
+#  define get_user_page(vaddr)			__get_free_page(GFP_KERNEL)
+#  define free_user_page(page, addr)		free_page(addr)
+#  else /* CONFIG_MMU */
+extern void copy_page(void *to, void *from);
+#  endif /* CONFIG_MMU */
+
+# define clear_page(pgaddr)			memset((pgaddr), 0, PAGE_SIZE)
+
+# define clear_user_page(pgaddr, vaddr, page)	memset((pgaddr), 0, PAGE_SIZE)
+# define copy_user_page(vto, vfrom, vaddr, topg) \
 			memcpy((vto), (vfrom), PAGE_SIZE)
 
 /*
@@ -60,21 +93,32 @@
 typedef struct page *pgtable_t;
 typedef struct { unsigned long	pte; }		pte_t;
 typedef struct { unsigned long	pgprot; }	pgprot_t;
+/* FIXME this can depend on linux kernel version */
+#   ifdef CONFIG_MMU
+typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pgd; } pgd_t;
+#   else /* CONFIG_MMU */
 typedef struct { unsigned long	ste[64]; }	pmd_t;
 typedef struct { pmd_t		pue[1]; }	pud_t;
 typedef struct { pud_t		pge[1]; }	pgd_t;
+#   endif /* CONFIG_MMU */
 
+# define pte_val(x)	((x).pte)
+# define pgprot_val(x)	((x).pgprot)
 
-#define pte_val(x)	((x).pte)
-#define pgprot_val(x)	((x).pgprot)
-#define pmd_val(x)	((x).ste[0])
-#define pud_val(x)	((x).pue[0])
-#define pgd_val(x)	((x).pge[0])
+#   ifdef CONFIG_MMU
+#   define pmd_val(x)      ((x).pmd)
+#   define pgd_val(x)      ((x).pgd)
+#   else  /* CONFIG_MMU */
+#   define pmd_val(x)	((x).ste[0])
+#   define pud_val(x)	((x).pue[0])
+#   define pgd_val(x)	((x).pge[0])
+#   endif  /* CONFIG_MMU */
 
-#define __pte(x)	((pte_t) { (x) })
-#define __pmd(x)	((pmd_t) { (x) })
-#define __pgd(x)	((pgd_t) { (x) })
-#define __pgprot(x)	((pgprot_t) { (x) })
+# define __pte(x)	((pte_t) { (x) })
+# define __pmd(x)	((pmd_t) { (x) })
+# define __pgd(x)	((pgd_t) { (x) })
+# define __pgprot(x)	((pgprot_t) { (x) })
 
 /**
  * Conversions for virtual address, physical address, pfn, and struct
@@ -94,44 +138,80 @@
 extern unsigned long min_low_pfn;
 extern unsigned long max_pfn;
 
-#define __pa(vaddr)		((unsigned long) (vaddr))
-#define __va(paddr)		((void *) (paddr))
+extern unsigned long memory_start;
+extern unsigned long memory_end;
+extern unsigned long memory_size;
 
-#define phys_to_pfn(phys)	(PFN_DOWN(phys))
-#define pfn_to_phys(pfn)	(PFN_PHYS(pfn))
+extern int page_is_ram(unsigned long pfn);
 
-#define virt_to_pfn(vaddr)	(phys_to_pfn((__pa(vaddr))))
-#define pfn_to_virt(pfn)	__va(pfn_to_phys((pfn)))
+# define phys_to_pfn(phys)	(PFN_DOWN(phys))
+# define pfn_to_phys(pfn)	(PFN_PHYS(pfn))
 
-#define virt_to_page(vaddr)	(pfn_to_page(virt_to_pfn(vaddr)))
-#define page_to_virt(page)	(pfn_to_virt(page_to_pfn(page)))
+# define virt_to_pfn(vaddr)	(phys_to_pfn((__pa(vaddr))))
+# define pfn_to_virt(pfn)	__va(pfn_to_phys((pfn)))
 
-#define page_to_phys(page)	(pfn_to_phys(page_to_pfn(page)))
-#define page_to_bus(page)	(page_to_phys(page))
-#define phys_to_page(paddr)	(pfn_to_page(phys_to_pfn(paddr)))
+#  ifdef CONFIG_MMU
+#  define virt_to_page(kaddr) 	(mem_map +  MAP_NR(kaddr))
+#  else /* CONFIG_MMU */
+#  define virt_to_page(vaddr)	(pfn_to_page(virt_to_pfn(vaddr)))
+#  define page_to_virt(page)	(pfn_to_virt(page_to_pfn(page)))
+#  define page_to_phys(page)	(pfn_to_phys(page_to_pfn(page)))
+#  define page_to_bus(page)	(page_to_phys(page))
+#  define phys_to_page(paddr)	(pfn_to_page(phys_to_pfn(paddr)))
+#  endif /* CONFIG_MMU */
 
-extern unsigned int memory_start;
-extern unsigned int memory_end;
-extern unsigned int memory_size;
+#  ifndef CONFIG_MMU
+#  define pfn_valid(pfn)	((pfn) >= min_low_pfn && (pfn) <= max_mapnr)
+#  define ARCH_PFN_OFFSET	(PAGE_OFFSET >> PAGE_SHIFT)
+#  else /* CONFIG_MMU */
+#  define ARCH_PFN_OFFSET	(memory_start >> PAGE_SHIFT)
+#  define pfn_valid(pfn)	((pfn) < (max_mapnr + ARCH_PFN_OFFSET))
+#  define VALID_PAGE(page) 	((page - mem_map) < max_mapnr)
+#  endif /* CONFIG_MMU */
 
-#define pfn_valid(pfn)		((pfn) >= min_low_pfn && (pfn) < max_mapnr)
+# endif /* __ASSEMBLY__ */
 
-#define ARCH_PFN_OFFSET		(PAGE_OFFSET >> PAGE_SHIFT)
+#define	virt_addr_valid(vaddr)	(pfn_valid(virt_to_pfn(vaddr)))
 
-#else
-#define tophys(rd, rs)	(addik rd, rs, 0)
-#define tovirt(rd, rs)	(addik rd, rs, 0)
-#endif /* __ASSEMBLY__ */
 
-#define virt_addr_valid(vaddr)	(pfn_valid(virt_to_pfn(vaddr)))
+#  ifndef CONFIG_MMU
+#  define __pa(vaddr)	((unsigned long) (vaddr))
+#  define __va(paddr)	((void *) (paddr))
+#  else /* CONFIG_MMU */
+#  define __pa(x)	__virt_to_phys((unsigned long)(x))
+#  define __va(x)	((void *)__phys_to_virt((unsigned long)(x)))
+#  endif /* CONFIG_MMU */
 
-/* Convert between virtual and physical address for MMU.  */
-/* Handle MicroBlaze processor with virtual memory.  */
+
+/* Convert between virtual and physical address for MMU. */
+/* Handle MicroBlaze processor with virtual memory. */
+#ifndef CONFIG_MMU
 #define __virt_to_phys(addr)	addr
 #define __phys_to_virt(addr)	addr
+#define tophys(rd, rs)	addik rd, rs, 0
+#define tovirt(rd, rs)	addik rd, rs, 0
+#else
+#define __virt_to_phys(addr) \
+	((addr) + CONFIG_KERNEL_BASE_ADDR - CONFIG_KERNEL_START)
+#define __phys_to_virt(addr) \
+	((addr) + CONFIG_KERNEL_START - CONFIG_KERNEL_BASE_ADDR)
+#define tophys(rd, rs) \
+	addik rd, rs, (CONFIG_KERNEL_BASE_ADDR - CONFIG_KERNEL_START)
+#define tovirt(rd, rs) \
+	addik rd, rs, (CONFIG_KERNEL_START - CONFIG_KERNEL_BASE_ADDR)
+#endif /* CONFIG_MMU */
 
 #define TOPHYS(addr)  __virt_to_phys(addr)
 
+#ifdef CONFIG_MMU
+#ifdef CONFIG_CONTIGUOUS_PAGE_ALLOC
+#define WANT_PAGE_VIRTUAL 1 /* page alloc 2 relies on this */
+#endif
+
+#define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
+				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#endif /* CONFIG_MMU */
+
 #endif /* __KERNEL__ */
 
 #include <asm-generic/memory_model.h>
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 2a4b354..59a757e 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -9,6 +11,195 @@
 #ifndef _ASM_MICROBLAZE_PGALLOC_H
 #define _ASM_MICROBLAZE_PGALLOC_H
 
+#ifdef CONFIG_MMU
+
+#include <linux/kernel.h>	/* For min/max macros */
+#include <linux/highmem.h>
+#include <asm/setup.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+
+#define PGDIR_ORDER	0
+
+/*
+ * This is handled very differently on MicroBlaze since out page tables
+ * are all 0's and I want to be able to use these zero'd pages elsewhere
+ * as well - it gives us quite a speedup.
+ * -- Cort
+ */
+extern struct pgtable_cache_struct {
+	unsigned long *pgd_cache;
+	unsigned long *pte_cache;
+	unsigned long pgtable_cache_sz;
+} quicklists;
+
+#define pgd_quicklist		(quicklists.pgd_cache)
+#define pmd_quicklist		((unsigned long *)0)
+#define pte_quicklist		(quicklists.pte_cache)
+#define pgtable_cache_size	(quicklists.pgtable_cache_sz)
+
+extern unsigned long *zero_cache; /* head linked list of pre-zero'd pages */
+extern atomic_t zero_sz; /* # currently pre-zero'd pages */
+extern atomic_t zeropage_hits; /* # zero'd pages request that we've done */
+extern atomic_t zeropage_calls; /* # zero'd pages request that've been made */
+extern atomic_t zerototal; /* # pages zero'd over time */
+
+#define zero_quicklist		(zero_cache)
+#define zero_cache_sz	 	(zero_sz)
+#define zero_cache_calls	(zeropage_calls)
+#define zero_cache_hits		(zeropage_hits)
+#define zero_cache_total	(zerototal)
+
+/*
+ * return a pre-zero'd page from the list,
+ * return NULL if none available -- Cort
+ */
+extern unsigned long get_zero_page_fast(void);
+
+extern void __bad_pte(pmd_t *pmd);
+
+extern inline pgd_t *get_pgd_slow(void)
+{
+	pgd_t *ret;
+
+	ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGDIR_ORDER);
+	if (ret != NULL)
+		clear_page(ret);
+	return ret;
+}
+
+extern inline pgd_t *get_pgd_fast(void)
+{
+	unsigned long *ret;
+
+	ret = pgd_quicklist;
+	if (ret != NULL) {
+		pgd_quicklist = (unsigned long *)(*ret);
+		ret[0] = 0;
+		pgtable_cache_size--;
+	} else
+		ret = (unsigned long *)get_pgd_slow();
+	return (pgd_t *)ret;
+}
+
+extern inline void free_pgd_fast(pgd_t *pgd)
+{
+	*(unsigned long **)pgd = pgd_quicklist;
+	pgd_quicklist = (unsigned long *) pgd;
+	pgtable_cache_size++;
+}
+
+extern inline void free_pgd_slow(pgd_t *pgd)
+{
+	free_page((unsigned long)pgd);
+}
+
+#define pgd_free(mm, pgd)        free_pgd_fast(pgd)
+#define pgd_alloc(mm)		get_pgd_fast()
+
+#define pmd_pgtable(pmd)	pmd_page(pmd)
+
+/*
+ * We don't have any real pmd's, and this code never triggers because
+ * the pgd will always be present..
+ */
+#define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
+#define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
+/* FIXME two definition - look below */
+#define pmd_free(mm, x)			do { } while (0)
+#define pgd_populate(mm, pmd, pte)	BUG()
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+		unsigned long address)
+{
+	pte_t *pte;
+	extern int mem_init_done;
+	extern void *early_get_page(void);
+	if (mem_init_done) {
+		pte = (pte_t *)__get_free_page(GFP_KERNEL |
+					__GFP_REPEAT | __GFP_ZERO);
+	} else {
+		pte = (pte_t *)early_get_page();
+		if (pte)
+			clear_page(pte);
+	}
+	return pte;
+}
+
+static inline struct page *pte_alloc_one(struct mm_struct *mm,
+		unsigned long address)
+{
+	struct page *ptepage;
+
+#ifdef CONFIG_HIGHPTE
+	int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
+#else
+	int flags = GFP_KERNEL | __GFP_REPEAT;
+#endif
+
+	ptepage = alloc_pages(flags, 0);
+	if (ptepage)
+		clear_highpage(ptepage);
+	return ptepage;
+}
+
+static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
+		unsigned long address)
+{
+	unsigned long *ret;
+
+	ret = pte_quicklist;
+	if (ret != NULL) {
+		pte_quicklist = (unsigned long *)(*ret);
+		ret[0] = 0;
+		pgtable_cache_size--;
+	}
+	return (pte_t *)ret;
+}
+
+extern inline void pte_free_fast(pte_t *pte)
+{
+	*(unsigned long **)pte = pte_quicklist;
+	pte_quicklist = (unsigned long *) pte;
+	pgtable_cache_size++;
+}
+
+extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	free_page((unsigned long)pte);
+}
+
+extern inline void pte_free_slow(struct page *ptepage)
+{
+	__free_page(ptepage);
+}
+
+extern inline void pte_free(struct mm_struct *mm, struct page *ptepage)
+{
+	__free_page(ptepage);
+}
+
+#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, (pte))
+
+#define pmd_populate(mm, pmd, pte)	(pmd_val(*(pmd)) = page_address(pte))
+
+#define pmd_populate_kernel(mm, pmd, pte) \
+		(pmd_val(*(pmd)) = (unsigned long) (pte))
+
+/*
+ * We don't have any real pmd's, and this code never triggers because
+ * the pgd will always be present..
+ */
+#define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
+/*#define pmd_free(mm, x)			do { } while (0)*/
+#define __pmd_free_tlb(tlb, x)		do { } while (0)
+#define pgd_populate(mm, pmd, pte)	BUG()
+
+extern int do_check_pgt_cache(int, int);
+
+#endif /* CONFIG_MMU */
+
 #define check_pgt_cache()	do {} while (0)
 
 #endif /* _ASM_MICROBLAZE_PGALLOC_H */
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 4df31e4..4c57a58 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -14,6 +16,8 @@
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
 		remap_pfn_range(vma, vaddr, pfn, size, prot)
 
+#ifndef CONFIG_MMU
+
 #define pgd_present(pgd)	(1) /* pages are always present on non MMU */
 #define pgd_none(pgd)		(0)
 #define pgd_bad(pgd)		(0)
@@ -27,6 +31,8 @@
 #define PAGE_READONLY		__pgprot(0) /* these mean nothing to non MMU */
 #define PAGE_KERNEL		__pgprot(0) /* these mean nothing to non MMU */
 
+#define pgprot_noncached(x)	(x)
+
 #define __swp_type(x)		(0)
 #define __swp_offset(x)		(0)
 #define __swp_entry(typ, off)	((swp_entry_t) { ((typ) | ((off) << 7)) })
@@ -45,6 +51,538 @@
 
 #define arch_enter_lazy_cpu_mode()	do {} while (0)
 
+#else /* CONFIG_MMU */
+
+#include <asm-generic/4level-fixup.h>
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+#include <linux/sched.h>
+#include <linux/threads.h>
+#include <asm/processor.h>		/* For TASK_SIZE */
+#include <asm/mmu.h>
+#include <asm/page.h>
+
+#define FIRST_USER_ADDRESS	0
+
+extern unsigned long va_to_phys(unsigned long address);
+extern pte_t *va_to_pte(unsigned long address);
+extern unsigned long ioremap_bot, ioremap_base;
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+
+static inline int pte_special(pte_t pte)	{ return 0; }
+
+static inline pte_t pte_mkspecial(pte_t pte)	{ return pte; }
+
+/* Start and end of the vmalloc area. */
+/* Make sure to map the vmalloc area above the pinned kernel memory area
+   of 32Mb.  */
+#define VMALLOC_START	(CONFIG_KERNEL_START + \
+				max(32 * 1024 * 1024UL, memory_size))
+#define VMALLOC_END	ioremap_bot
+#define VMALLOC_VMADDR(x) ((unsigned long)(x))
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * The MicroBlaze MMU is identical to the PPC-40x MMU, and uses a hash
+ * table containing PTEs, together with a set of 16 segment registers, to
+ * define the virtual to physical address mapping.
+ *
+ * We use the hash table as an extended TLB, i.e. a cache of currently
+ * active mappings.  We maintain a two-level page table tree, much
+ * like that used by the i386, for the sake of the Linux memory
+ * management code.  Low-level assembler code in hashtable.S
+ * (procedure hash_page) is responsible for extracting ptes from the
+ * tree and putting them into the hash table when necessary, and
+ * updating the accessed and modified bits in the page table tree.
+ */
+
+/*
+ * The MicroBlaze processor has a TLB architecture identical to PPC-40x. The
+ * instruction and data sides share a unified, 64-entry, semi-associative
+ * TLB which is maintained totally under software control. In addition, the
+ * instruction side has a hardware-managed, 2,4, or 8-entry, fully-associative
+ * TLB which serves as a first level to the shared TLB. These two TLBs are
+ * known as the UTLB and ITLB, respectively (see "mmu.h" for definitions).
+ */
+
+/*
+ * The normal case is that PTEs are 32-bits and we have a 1-page
+ * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages.  -- paulus
+ *
+ */
+
+/* PMD_SHIFT determines the size of the area mapped by the PTE pages */
+#define PMD_SHIFT	(PAGE_SHIFT + PTE_SHIFT)
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a top-level page table entry can map */
+#define PGDIR_SHIFT	PMD_SHIFT
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/*
+ * entries per page directory level: our page-table tree is two-level, so
+ * we don't really have any PMD directory.
+ */
+#define PTRS_PER_PTE	(1 << PTE_SHIFT)
+#define PTRS_PER_PMD	1
+#define PTRS_PER_PGD	(1 << (32 - PGDIR_SHIFT))
+
+#define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
+#define FIRST_USER_PGD_NR	0
+
+#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
+#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
+
+#define pte_ERROR(e) \
+	printk(KERN_ERR "%s:%d: bad pte "PTE_FMT".\n", \
+		__FILE__, __LINE__, pte_val(e))
+#define pmd_ERROR(e) \
+	printk(KERN_ERR "%s:%d: bad pmd %08lx.\n", \
+		__FILE__, __LINE__, pmd_val(e))
+#define pgd_ERROR(e) \
+	printk(KERN_ERR "%s:%d: bad pgd %08lx.\n", \
+		__FILE__, __LINE__, pgd_val(e))
+
+/*
+ * Bits in a linux-style PTE.  These match the bits in the
+ * (hardware-defined) PTE as closely as possible.
+ */
+
+/* There are several potential gotchas here.  The hardware TLBLO
+ * field looks like this:
+ *
+ * 0  1  2  3  4  ... 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+ * RPN.....................  0  0 EX WR ZSEL.......  W  I  M  G
+ *
+ * Where possible we make the Linux PTE bits match up with this
+ *
+ * - bits 20 and 21 must be cleared, because we use 4k pages (4xx can
+ * support down to 1k pages), this is done in the TLBMiss exception
+ * handler.
+ * - We use only zones 0 (for kernel pages) and 1 (for user pages)
+ * of the 16 available.  Bit 24-26 of the TLB are cleared in the TLB
+ * miss handler.  Bit 27 is PAGE_USER, thus selecting the correct
+ * zone.
+ * - PRESENT *must* be in the bottom two bits because swap cache
+ * entries use the top 30 bits.  Because 4xx doesn't support SMP
+ * anyway, M is irrelevant so we borrow it for PAGE_PRESENT.  Bit 30
+ * is cleared in the TLB miss handler before the TLB entry is loaded.
+ * - All other bits of the PTE are loaded into TLBLO without
+ *  * modification, leaving us only the bits 20, 21, 24, 25, 26, 30 for
+ * software PTE bits.  We actually use use bits 21, 24, 25, and
+ * 30 respectively for the software bits: ACCESSED, DIRTY, RW, and
+ * PRESENT.
+ */
+
+/* Definitions for MicroBlaze. */
+#define	_PAGE_GUARDED	0x001	/* G: page is guarded from prefetch */
+#define _PAGE_PRESENT	0x002	/* software: PTE contains a translation */
+#define	_PAGE_NO_CACHE	0x004	/* I: caching is inhibited */
+#define	_PAGE_WRITETHRU	0x008	/* W: caching is write-through */
+#define	_PAGE_USER	0x010	/* matches one of the zone permission bits */
+#define	_PAGE_RW	0x040	/* software: Writes permitted */
+#define	_PAGE_DIRTY	0x080	/* software: dirty page */
+#define _PAGE_HWWRITE	0x100	/* hardware: Dirty & RW, set in exception */
+#define _PAGE_HWEXEC	0x200	/* hardware: EX permission */
+#define _PAGE_ACCESSED	0x400	/* software: R: page referenced */
+#define _PMD_PRESENT	PAGE_MASK
+
+/*
+ * Some bits are unused...
+ */
+#ifndef _PAGE_HASHPTE
+#define _PAGE_HASHPTE	0
+#endif
+#ifndef _PTE_NONE_MASK
+#define _PTE_NONE_MASK	0
+#endif
+#ifndef _PAGE_SHARED
+#define _PAGE_SHARED	0
+#endif
+#ifndef _PAGE_HWWRITE
+#define _PAGE_HWWRITE	0
+#endif
+#ifndef _PAGE_HWEXEC
+#define _PAGE_HWEXEC	0
+#endif
+#ifndef _PAGE_EXEC
+#define _PAGE_EXEC	0
+#endif
+
+#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+/*
+ * Note: the _PAGE_COHERENT bit automatically gets set in the hardware
+ * PTE if CONFIG_SMP is defined (hash_page does this); there is no need
+ * to have it in the Linux PTE, and in fact the bit could be reused for
+ * another purpose.  -- paulus.
+ */
+#define _PAGE_BASE	(_PAGE_PRESENT | _PAGE_ACCESSED)
+#define _PAGE_WRENABLE	(_PAGE_RW | _PAGE_DIRTY | _PAGE_HWWRITE)
+
+#define _PAGE_KERNEL \
+	(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_SHARED | _PAGE_HWEXEC)
+
+#define _PAGE_IO	(_PAGE_KERNEL | _PAGE_NO_CACHE | _PAGE_GUARDED)
+
+#define PAGE_NONE	__pgprot(_PAGE_BASE)
+#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X \
+		__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+
+#define PAGE_KERNEL	__pgprot(_PAGE_KERNEL)
+#define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_SHARED)
+#define PAGE_KERNEL_CI	__pgprot(_PAGE_IO)
+
+/*
+ * We consider execute permission the same as read.
+ * Also, write permissions imply read permissions.
+ */
+#define __P000	PAGE_NONE
+#define __P001	PAGE_READONLY_X
+#define __P010	PAGE_COPY
+#define __P011	PAGE_COPY_X
+#define __P100	PAGE_READONLY
+#define __P101	PAGE_READONLY_X
+#define __P110	PAGE_COPY
+#define __P111	PAGE_COPY_X
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_READONLY_X
+#define __S010	PAGE_SHARED
+#define __S011	PAGE_SHARED_X
+#define __S100	PAGE_READONLY
+#define __S101	PAGE_READONLY_X
+#define __S110	PAGE_SHARED
+#define __S111	PAGE_SHARED_X
+
+#ifndef __ASSEMBLY__
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[1024];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+#endif /* __ASSEMBLY__ */
+
+#define pte_none(pte)		((pte_val(pte) & ~_PTE_NONE_MASK) == 0)
+#define pte_present(pte)	(pte_val(pte) & _PAGE_PRESENT)
+#define pte_clear(mm, addr, ptep) \
+	do { set_pte_at((mm), (addr), (ptep), __pte(0)); } while (0)
+
+#define pmd_none(pmd)		(!pmd_val(pmd))
+#define	pmd_bad(pmd)		((pmd_val(pmd) & _PMD_PRESENT) == 0)
+#define	pmd_present(pmd)	((pmd_val(pmd) & _PMD_PRESENT) != 0)
+#define	pmd_clear(pmdp)		do { pmd_val(*(pmdp)) = 0; } while (0)
+
+#define pte_page(x)		(mem_map + (unsigned long) \
+				((pte_val(x) - memory_start) >> PAGE_SHIFT))
+#define PFN_SHIFT_OFFSET	(PAGE_SHIFT)
+
+#define pte_pfn(x)		(pte_val(x) >> PFN_SHIFT_OFFSET)
+
+#define pfn_pte(pfn, prot) \
+	__pte(((pte_basic_t)(pfn) << PFN_SHIFT_OFFSET) | pgprot_val(prot))
+
+#ifndef __ASSEMBLY__
+/*
+ * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * setup: the pgd is never bad, and a pmd always exists (as it's folded
+ * into the pgd entry)
+ */
+static inline int pgd_none(pgd_t pgd)		{ return 0; }
+static inline int pgd_bad(pgd_t pgd)		{ return 0; }
+static inline int pgd_present(pgd_t pgd)	{ return 1; }
+#define pgd_clear(xp)				do { } while (0)
+#define pgd_page(pgd) \
+	((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_read(pte_t pte)  { return pte_val(pte) & _PAGE_USER; }
+static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
+static inline int pte_exec(pte_t pte)  { return pte_val(pte) & _PAGE_EXEC; }
+static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
+/* FIXME */
+static inline int pte_file(pte_t pte)		{ return 0; }
+
+static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; }
+static inline void pte_cache(pte_t pte)   { pte_val(pte) &= ~_PAGE_NO_CACHE; }
+
+static inline pte_t pte_rdprotect(pte_t pte) \
+		{ pte_val(pte) &= ~_PAGE_USER; return pte; }
+static inline pte_t pte_wrprotect(pte_t pte) \
+	{ pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE); return pte; }
+static inline pte_t pte_exprotect(pte_t pte) \
+	{ pte_val(pte) &= ~_PAGE_EXEC; return pte; }
+static inline pte_t pte_mkclean(pte_t pte) \
+	{ pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; }
+static inline pte_t pte_mkold(pte_t pte) \
+	{ pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
+
+static inline pte_t pte_mkread(pte_t pte) \
+	{ pte_val(pte) |= _PAGE_USER; return pte; }
+static inline pte_t pte_mkexec(pte_t pte) \
+	{ pte_val(pte) |= _PAGE_USER | _PAGE_EXEC; return pte; }
+static inline pte_t pte_mkwrite(pte_t pte) \
+	{ pte_val(pte) |= _PAGE_RW; return pte; }
+static inline pte_t pte_mkdirty(pte_t pte) \
+	{ pte_val(pte) |= _PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkyoung(pte_t pte) \
+	{ pte_val(pte) |= _PAGE_ACCESSED; return pte; }
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+static inline pte_t mk_pte_phys(phys_addr_t physpage, pgprot_t pgprot)
+{
+	pte_t pte;
+	pte_val(pte) = physpage | pgprot_val(pgprot);
+	return pte;
+}
+
+#define mk_pte(page, pgprot) \
+({									   \
+	pte_t pte;							   \
+	pte_val(pte) = (((page - mem_map) << PAGE_SHIFT) + memory_start) |  \
+			pgprot_val(pgprot);				   \
+	pte;								   \
+})
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot);
+	return pte;
+}
+
+/*
+ * Atomic PTE updates.
+ *
+ * pte_update clears and sets bit atomically, and returns
+ * the old pte value.
+ * The ((unsigned long)(p+1) - 4) hack is to get to the least-significant
+ * 32 bits of the PTE regardless of whether PTEs are 32 or 64 bits.
+ */
+static inline unsigned long pte_update(pte_t *p, unsigned long clr,
+				unsigned long set)
+{
+	unsigned long old, tmp, msr;
+
+	__asm__ __volatile__("\
+	msrclr	%2, 0x2\n\
+	nop\n\
+	lw	%0, %4, r0\n\
+	andn	%1, %0, %5\n\
+	or	%1, %1, %6\n\
+	sw	%1, %4, r0\n\
+	mts     rmsr, %2\n\
+	nop"
+	: "=&r" (old), "=&r" (tmp), "=&r" (msr), "=m" (*p)
+	: "r" ((unsigned long)(p+1) - 4), "r" (clr), "r" (set), "m" (*p)
+	: "cc");
+
+	return old;
+}
+
+/*
+ * set_pte stores a linux PTE into the linux page table.
+ */
+static inline void set_pte(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pte)
+{
+	*ptep = pte;
+}
+
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pte)
+{
+	*ptep = pte;
+}
+
+static inline int ptep_test_and_clear_young(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
+{
+	return (pte_update(ptep, _PAGE_ACCESSED, 0) & _PAGE_ACCESSED) != 0;
+}
+
+static inline int ptep_test_and_clear_dirty(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
+{
+	return (pte_update(ptep, \
+		(_PAGE_DIRTY | _PAGE_HWWRITE), 0) & _PAGE_DIRTY) != 0;
+}
+
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
+{
+	return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0));
+}
+
+/*static inline void ptep_set_wrprotect(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
+{
+	pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), 0);
+}*/
+
+static inline void ptep_mkdirty(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
+{
+	pte_update(ptep, 0, _PAGE_DIRTY);
+}
+
+/*#define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0)*/
+
+/* Convert pmd entry to page */
+/* our pmd entry is an effective address of pte table*/
+/* returns effective address of the pmd entry*/
+#define pmd_page_kernel(pmd)	((unsigned long) (pmd_val(pmd) & PAGE_MASK))
+
+/* returns struct *page of the pmd entry*/
+#define pmd_page(pmd)	(pfn_to_page(__pa(pmd_val(pmd)) >> PAGE_SHIFT))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+/* to find an entry in a page-table-directory */
+#define pgd_index(address)	 ((address) >> PGDIR_SHIFT)
+#define pgd_offset(mm, address)	 ((mm)->pgd + pgd_index(address))
+
+/* Find an entry in the second-level page table.. */
+static inline pmd_t *pmd_offset(pgd_t *dir, unsigned long address)
+{
+	return (pmd_t *) dir;
+}
+
+/* Find an entry in the third-level page table.. */
+#define pte_index(address)		\
+	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset_kernel(dir, addr)	\
+	((pte_t *) pmd_page_kernel(*(dir)) + pte_index(addr))
+#define pte_offset_map(dir, addr)		\
+	((pte_t *) kmap_atomic(pmd_page(*(dir)), KM_PTE0) + pte_index(addr))
+#define pte_offset_map_nested(dir, addr)	\
+	((pte_t *) kmap_atomic(pmd_page(*(dir)), KM_PTE1) + pte_index(addr))
+
+#define pte_unmap(pte)		kunmap_atomic(pte, KM_PTE0)
+#define pte_unmap_nested(pte)	kunmap_atomic(pte, KM_PTE1)
+
+/* Encode and decode a nonlinear file mapping entry */
+#define PTE_FILE_MAX_BITS	29
+#define pte_to_pgoff(pte)	(pte_val(pte) >> 3)
+#define pgoff_to_pte(off)	((pte_t) { ((off) << 3) })
+
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+
+/*
+ * When flushing the tlb entry for a page, we also need to flush the hash
+ * table entry.  flush_hash_page is assembler (for speed) in hashtable.S.
+ */
+extern int flush_hash_page(unsigned context, unsigned long va, pte_t *ptep);
+
+/* Add an HPTE to the hash table */
+extern void add_hash_page(unsigned context, unsigned long va, pte_t *ptep);
+
+/*
+ * Encode and decode a swap entry.
+ * Note that the bits we use in a PTE for representing a swap entry
+ * must not include the _PAGE_PRESENT bit, or the _PAGE_HASHPTE bit
+ * (if used).  -- paulus
+ */
+#define __swp_type(entry)		((entry).val & 0x3f)
+#define __swp_offset(entry)	((entry).val >> 6)
+#define __swp_entry(type, offset) \
+		((swp_entry_t) { (type) | ((offset) << 6) })
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) >> 2 })
+#define __swp_entry_to_pte(x)	((pte_t) { (x).val << 2 })
+
+
+/* CONFIG_APUS */
+/* For virtual address to physical address conversion */
+extern void cache_clear(__u32 addr, int length);
+extern void cache_push(__u32 addr, int length);
+extern int mm_end_of_chunk(unsigned long addr, int len);
+extern unsigned long iopa(unsigned long addr);
+/* extern unsigned long mm_ptov(unsigned long addr) \
+	__attribute__ ((const)); TBD */
+
+/* Values for nocacheflag and cmode */
+/* These are not used by the APUS kernel_map, but prevents
+ * compilation errors.
+ */
+#define	IOMAP_FULL_CACHING	0
+#define	IOMAP_NOCACHE_SER	1
+#define	IOMAP_NOCACHE_NONSER	2
+#define	IOMAP_NO_COPYBACK	3
+
+/*
+ * Map some physical address range into the kernel address space.
+ */
+extern unsigned long kernel_map(unsigned long paddr, unsigned long size,
+				int nocacheflag, unsigned long *memavailp);
+
+/*
+ * Set cache mode of (kernel space) address range.
+ */
+extern void kernel_set_cachemode(unsigned long address, unsigned long size,
+				unsigned int cmode);
+
+/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
+#define kern_addr_valid(addr)	(1)
+
+#define io_remap_page_range remap_page_range
+
+/*
+ * No page table caches to initialise
+ */
+#define pgtable_cache_init()	do { } while (0)
+
+void do_page_fault(struct pt_regs *regs, unsigned long address,
+		   unsigned long error_code);
+
+void __init io_block_mapping(unsigned long virt, phys_addr_t phys,
+			     unsigned int size, int flags);
+
+void __init adjust_total_lowmem(void);
+void mapin_ram(void);
+int map_page(unsigned long va, phys_addr_t pa, int flags);
+
+extern int mem_init_done;
+extern unsigned long ioremap_base;
+extern unsigned long ioremap_bot;
+
+asmlinkage void __init mmu_init(void);
+
+void __init *early_get_page(void);
+
+void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle);
+void consistent_free(void *vaddr);
+void consistent_sync(void *vaddr, size_t size, int direction);
+void consistent_sync_page(struct page *page, unsigned long offset,
+	size_t size, int direction);
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+
+#endif /* CONFIG_MMU */
+
 #ifndef __ASSEMBLY__
 #include <asm-generic/pgtable.h>
 
diff --git a/arch/microblaze/include/asm/posix_types.h b/arch/microblaze/include/asm/posix_types.h
index b4df41c..8c758b23 100644
--- a/arch/microblaze/include/asm/posix_types.h
+++ b/arch/microblaze/include/asm/posix_types.h
@@ -16,7 +16,7 @@
  */
 
 typedef unsigned long	__kernel_ino_t;
-typedef unsigned int	__kernel_mode_t;
+typedef unsigned short	__kernel_mode_t;
 typedef unsigned int	__kernel_nlink_t;
 typedef long		__kernel_off_t;
 typedef int		__kernel_pid_t;
diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index 9329029..563c6b9 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -1,6 +1,6 @@
 /*
- * Copyright (C) 2008 Michal Simek
- * Copyright (C) 2008 PetaLogix
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -26,14 +26,15 @@
 # define cpu_sleep()		do {} while (0)
 # define prepare_to_copy(tsk)	do {} while (0)
 
-# endif /* __ASSEMBLY__ */
-
 #define task_pt_regs(tsk) \
 		(((struct pt_regs *)(THREAD_SIZE + task_stack_page(tsk))) - 1)
 
 /* Do necessary setup to start up a newly executed thread. */
 void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp);
 
+# endif /* __ASSEMBLY__ */
+
+# ifndef CONFIG_MMU
 /*
  * User space process size: memory size
  *
@@ -85,4 +86,90 @@
 # define KSTK_EIP(tsk)	(0)
 # define KSTK_ESP(tsk)	(0)
 
+# else /* CONFIG_MMU */
+
+/*
+ * This is used to define STACK_TOP, and with MMU it must be below
+ * kernel base to select the correct PGD when handling MMU exceptions.
+ */
+# define TASK_SIZE	(CONFIG_KERNEL_START)
+
+/*
+ * This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+# define TASK_UNMAPPED_BASE	(TASK_SIZE / 8 * 3)
+
+# define THREAD_KSP	0
+
+#  ifndef __ASSEMBLY__
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#  define current_text_addr()	({ __label__ _l; _l: &&_l; })
+
+/* If you change this, you must change the associated assembly-languages
+ * constants defined below, THREAD_*.
+ */
+struct thread_struct {
+	/* kernel stack pointer (must be first field in structure) */
+	unsigned long	ksp;
+	unsigned long	ksp_limit;	/* if ksp <= ksp_limit stack overflow */
+	void		*pgdir;		/* root of page-table tree */
+	struct pt_regs	*regs;		/* Pointer to saved register state */
+};
+
+#  define INIT_THREAD { \
+	.ksp   = sizeof init_stack + (unsigned long)init_stack, \
+	.pgdir = swapper_pg_dir, \
+}
+
+/* Do necessary setup to start up a newly executed thread.  */
+void start_thread(struct pt_regs *regs,
+		unsigned long pc, unsigned long usp);
+
+/* Free all resources held by a thread. */
+extern inline void release_thread(struct task_struct *dead_task)
+{
+}
+
+extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+
+/* Free current thread data structures etc.  */
+static inline void exit_thread(void)
+{
+}
+
+/* Return saved (kernel) PC of a blocked thread.  */
+#  define thread_saved_pc(tsk)	\
+	((tsk)->thread.regs ? (tsk)->thread.regs->r15 : 0)
+
+unsigned long get_wchan(struct task_struct *p);
+
+/* The size allocated for kernel stacks. This _must_ be a power of two! */
+# define KERNEL_STACK_SIZE	0x2000
+
+/* Return some info about the user process TASK.  */
+#  define task_tos(task)	((unsigned long)(task) + KERNEL_STACK_SIZE)
+#  define task_regs(task) ((struct pt_regs *)task_tos(task) - 1)
+
+#  define task_pt_regs_plus_args(tsk) \
+	(((void *)task_pt_regs(tsk)) - STATE_SAVE_ARG_SPACE)
+
+#  define task_sp(task)	(task_regs(task)->r1)
+#  define task_pc(task)	(task_regs(task)->pc)
+/* Grotty old names for some.  */
+#  define KSTK_EIP(task)	(task_pc(task))
+#  define KSTK_ESP(task)	(task_sp(task))
+
+/* FIXME */
+#  define deactivate_mm(tsk, mm)	do { } while (0)
+
+#  define STACK_TOP	TASK_SIZE
+#  define STACK_TOP_MAX	STACK_TOP
+
+#  endif /* __ASSEMBLY__ */
+# endif /* CONFIG_MMU */
 #endif /* _ASM_MICROBLAZE_PROCESSOR_H */
diff --git a/arch/microblaze/include/asm/ptrace.h b/arch/microblaze/include/asm/ptrace.h
index 55015bc..a917dc5 100644
--- a/arch/microblaze/include/asm/ptrace.h
+++ b/arch/microblaze/include/asm/ptrace.h
@@ -10,7 +10,6 @@
 #define _ASM_MICROBLAZE_PTRACE_H
 
 #ifndef __ASSEMBLY__
-#include <linux/types.h>
 
 typedef unsigned long microblaze_reg_t;
 
diff --git a/arch/microblaze/include/asm/registers.h b/arch/microblaze/include/asm/registers.h
index 834142d..68c3afb 100644
--- a/arch/microblaze/include/asm/registers.h
+++ b/arch/microblaze/include/asm/registers.h
@@ -1,6 +1,6 @@
 /*
- * Copyright (C) 2008 Michal Simek
- * Copyright (C) 2008 PetaLogix
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -30,4 +30,21 @@
 #define FSR_UF		(1<<1) /* Underflow */
 #define FSR_DO		(1<<0) /* Denormalized operand error */
 
+# ifdef CONFIG_MMU
+/* Machine State Register (MSR) Fields */
+# define MSR_UM		(1<<11) /* User Mode */
+# define MSR_UMS	(1<<12) /* User Mode Save */
+# define MSR_VM		(1<<13) /* Virtual Mode */
+# define MSR_VMS	(1<<14) /* Virtual Mode Save */
+
+# define MSR_KERNEL	(MSR_EE | MSR_VM)
+/* # define MSR_USER	(MSR_KERNEL | MSR_UM | MSR_IE) */
+# define MSR_KERNEL_VMS	(MSR_EE | MSR_VMS)
+/* # define MSR_USER_VMS	(MSR_KERNEL_VMS | MSR_UMS | MSR_IE) */
+
+/* Exception State Register (ESR) Fields */
+# define	  ESR_DIZ	(1<<11) /* Zone Protection */
+# define	  ESR_S		(1<<10) /* Store instruction */
+
+# endif /* CONFIG_MMU */
 #endif /* _ASM_MICROBLAZE_REGISTERS_H */
diff --git a/arch/microblaze/include/asm/sections.h b/arch/microblaze/include/asm/sections.h
index 8434a43..4487e15 100644
--- a/arch/microblaze/include/asm/sections.h
+++ b/arch/microblaze/include/asm/sections.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -14,6 +16,7 @@
 # ifndef __ASSEMBLY__
 extern char _ssbss[], _esbss[];
 extern unsigned long __ivt_start[], __ivt_end[];
+extern char _etext[], _stext[];
 
 #  ifdef CONFIG_MTD_UCLINUX
 extern char *_ebss;
diff --git a/arch/microblaze/include/asm/segment.h b/arch/microblaze/include/asm/segment.h
index 7f5dcc5..0e7102c 100644
--- a/arch/microblaze/include/asm/segment.h
+++ b/arch/microblaze/include/asm/segment.h
@@ -1,6 +1,6 @@
 /*
- * Copyright (C) 2008 Michal Simek
- * Copyright (C) 2008 PetaLogix
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -11,7 +11,7 @@
 #ifndef _ASM_MICROBLAZE_SEGMENT_H
 #define _ASM_MICROBLAZE_SEGMENT_H
 
-#ifndef __ASSEMBLY__
+# ifndef __ASSEMBLY__
 
 typedef struct {
 	unsigned long seg;
@@ -29,15 +29,21 @@
  *
  * For non-MMU arch like Microblaze, KERNEL_DS and USER_DS is equal.
  */
-#  define KERNEL_DS	((mm_segment_t){0})
+# define MAKE_MM_SEG(s)       ((mm_segment_t) { (s) })
+
+#  ifndef CONFIG_MMU
+#  define KERNEL_DS	MAKE_MM_SEG(0)
 #  define USER_DS	KERNEL_DS
+#  else
+#  define KERNEL_DS	MAKE_MM_SEG(0xFFFFFFFF)
+#  define USER_DS	MAKE_MM_SEG(TASK_SIZE - 1)
+#  endif
 
 # define get_ds()	(KERNEL_DS)
 # define get_fs()	(current_thread_info()->addr_limit)
-# define set_fs(x) \
-		do { current_thread_info()->addr_limit = (x); } while (0)
+# define set_fs(val)	(current_thread_info()->addr_limit = (val))
 
-# define segment_eq(a, b)		((a).seg == (b).seg)
+# define segment_eq(a, b)	((a).seg == (b).seg)
 
 # endif /* __ASSEMBLY__ */
 #endif /* _ASM_MICROBLAZE_SEGMENT_H */
diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h
index 9b98e8e..27f8daf 100644
--- a/arch/microblaze/include/asm/setup.h
+++ b/arch/microblaze/include/asm/setup.h
@@ -1,5 +1,6 @@
 /*
- * Copyright (C) 2007-2008 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2007-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2007-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -18,7 +19,6 @@
 extern unsigned int boot_cpuid; /* move to smp.h */
 
 extern char cmd_line[COMMAND_LINE_SIZE];
-#  endif/* __KERNEL__ */
 
 void early_printk(const char *fmt, ...);
 
@@ -30,6 +30,11 @@
 
 unsigned long long sched_clock(void);
 
+#   ifdef CONFIG_MMU
+extern void mmu_reset(void);
+extern void early_console_reg_tlb_alloc(unsigned int addr);
+#   endif /* CONFIG_MMU */
+
 void time_init(void);
 void init_IRQ(void);
 void machine_early_init(const char *cmdline, unsigned int ram,
@@ -40,5 +45,6 @@
 void machine_halt(void);
 void machine_power_off(void);
 
+#  endif/* __KERNEL__ */
 # endif /* __ASSEMBLY__ */
 #endif /* _ASM_MICROBLAZE_SETUP_H */
diff --git a/arch/microblaze/include/asm/stat.h b/arch/microblaze/include/asm/stat.h
index 5f18b8a..a15f775 100644
--- a/arch/microblaze/include/asm/stat.h
+++ b/arch/microblaze/include/asm/stat.h
@@ -16,58 +16,53 @@
 
 #include <linux/posix_types.h>
 
+#define STAT_HAVE_NSEC 1
+
 struct stat {
-	unsigned int	st_dev;
+	unsigned long	st_dev;
 	unsigned long	st_ino;
 	unsigned int	st_mode;
 	unsigned int	st_nlink;
 	unsigned int	st_uid;
 	unsigned int	st_gid;
-	unsigned int	st_rdev;
-	unsigned long	st_size;
-	unsigned long	st_blksize;
-	unsigned long	st_blocks;
-	unsigned long	st_atime;
-	unsigned long	__unused1; /* unsigned long  st_atime_nsec */
-	unsigned long	st_mtime;
-	unsigned long	__unused2; /* unsigned long  st_mtime_nsec */
-	unsigned long	st_ctime;
-	unsigned long	__unused3; /* unsigned long  st_ctime_nsec */
+	unsigned long	st_rdev;
+	unsigned long	__pad1;
+	long		st_size;
+	int		st_blksize;
+	int		__pad2;
+	long		st_blocks;
+	int		st_atime;
+	unsigned int	st_atime_nsec;
+	int		st_mtime;
+	unsigned int	st_mtime_nsec;
+	int		st_ctime;
+	unsigned int	st_ctime_nsec;
 	unsigned long	__unused4;
 	unsigned long	__unused5;
 };
 
 struct stat64 {
-	unsigned long long	st_dev;
-	unsigned long	__unused1;
-
-	unsigned long long	st_ino;
-
-	unsigned int	st_mode;
-	unsigned int	st_nlink;
-
-	unsigned int	st_uid;
-	unsigned int	st_gid;
-
-	unsigned long long	st_rdev;
-	unsigned long	__unused3;
-
-	long long	st_size;
-	unsigned long	st_blksize;
-
-	unsigned long	st_blocks; /* No. of 512-byte blocks allocated */
-	unsigned long	__unused4; /* future possible st_blocks high bits */
-
-	unsigned long	st_atime;
-	unsigned long	st_atime_nsec;
-
-	unsigned long	st_mtime;
-	unsigned long	st_mtime_nsec;
-
-	unsigned long	st_ctime;
-	unsigned long	st_ctime_nsec;
-
-	unsigned long	__unused8;
+	unsigned long long	st_dev;		/* Device.  */
+	unsigned long long	st_ino;		/* File serial number.  */
+	unsigned int		st_mode;	/* File mode.  */
+	unsigned int		st_nlink;	/* Link count.  */
+	unsigned int		st_uid;		/* User ID of the file's owner.  */
+	unsigned int		st_gid;		/* Group ID of the file's group. */
+	unsigned long long	st_rdev;	/* Device number, if device.  */
+	unsigned long long	__pad1;
+	long long		st_size;	/* Size of file, in bytes.  */
+	int			st_blksize;	/* Optimal block size for I/O.  */
+	int			__pad2;
+	long long		st_blocks;	/* Number 512-byte blocks allocated. */
+	int			st_atime;	/* Time of last access.  */
+	unsigned int		st_atime_nsec;
+	int			st_mtime;	/* Time of last modification.  */
+	unsigned int		st_mtime_nsec;
+	int			st_ctime;	/* Time of last status change.  */
+	unsigned int		st_ctime_nsec;
+	unsigned int		__unused4;
+	unsigned int		__unused5;
 };
 
 #endif /* _ASM_MICROBLAZE_STAT_H */
+
diff --git a/arch/microblaze/include/asm/string.h b/arch/microblaze/include/asm/string.h
index f7728c9..aec2f59 100644
--- a/arch/microblaze/include/asm/string.h
+++ b/arch/microblaze/include/asm/string.h
@@ -9,7 +9,7 @@
 #ifndef _ASM_MICROBLAZE_STRING_H
 #define _ASM_MICROBLAZE_STRING_H
 
-#ifndef __KERNEL__
+#ifdef __KERNEL__
 
 #define __HAVE_ARCH_MEMSET
 #define __HAVE_ARCH_MEMCPY
diff --git a/arch/microblaze/include/asm/syscalls.h b/arch/microblaze/include/asm/syscalls.h
index 9cb4ff0..ddea9eb 100644
--- a/arch/microblaze/include/asm/syscalls.h
+++ b/arch/microblaze/include/asm/syscalls.h
@@ -34,6 +34,9 @@
 asmlinkage int sys_sigaction(int sig, const struct old_sigaction *act,
 		struct old_sigaction *oact);
 
+asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act,
+		struct sigaction __user *oact, size_t sigsetsize);
+
 asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 		struct pt_regs *regs);
 
diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h
index 4c3943e..7fac444 100644
--- a/arch/microblaze/include/asm/thread_info.h
+++ b/arch/microblaze/include/asm/thread_info.h
@@ -122,6 +122,8 @@
 #define TIF_SINGLESTEP		4
 #define TIF_IRET		5 /* return with iret */
 #define TIF_MEMDIE		6
+#define TIF_SYSCALL_AUDIT	9       /* syscall auditing active */
+#define TIF_SECCOMP		10      /* secure computing */
 #define TIF_FREEZE		14	/* Freezing for suspend */
 
 /* FIXME change in entry.S */
@@ -138,10 +140,17 @@
 #define _TIF_IRET		(1<<TIF_IRET)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_FREEZE		(1<<TIF_FREEZE)
+#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_KERNEL_TRACE	(1 << TIF_KERNEL_TRACE)
 
+/* work to do in syscall trace */
+#define _TIF_WORK_SYSCALL_MASK  (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | \
+				 _TIF_SYSCALL_AUDIT | _TIF_SECCOMP)
+
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK		0x0000FFFE
+
 /* work to do on any return to u-space */
 #define _TIF_ALLWORK_MASK	0x0000FFFF
 
@@ -154,6 +163,17 @@
  */
 /* FPU was used by this task this quantum (SMP) */
 #define TS_USEDFPU		0x0001
+#define TS_RESTORE_SIGMASK	0x0002
+
+#ifndef __ASSEMBLY__
+#define HAVE_SET_RESTORE_SIGMASK 1
+static inline void set_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	ti->status |= TS_RESTORE_SIGMASK;
+	set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
+}
+#endif
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_MICROBLAZE_THREAD_INFO_H */
diff --git a/arch/microblaze/include/asm/tlb.h b/arch/microblaze/include/asm/tlb.h
index d1dfe37..c472d28 100644
--- a/arch/microblaze/include/asm/tlb.h
+++ b/arch/microblaze/include/asm/tlb.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -13,4 +15,10 @@
 
 #include <asm-generic/tlb.h>
 
+#ifdef CONFIG_MMU
+#define tlb_start_vma(tlb, vma)		do { } while (0)
+#define tlb_end_vma(tlb, vma)		do { } while (0)
+#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
+#endif
+
 #endif /* _ASM_MICROBLAZE_TLB_H */
diff --git a/arch/microblaze/include/asm/tlbflush.h b/arch/microblaze/include/asm/tlbflush.h
index d7fe762..eb31a0e 100644
--- a/arch/microblaze/include/asm/tlbflush.h
+++ b/arch/microblaze/include/asm/tlbflush.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -9,6 +11,50 @@
 #ifndef _ASM_MICROBLAZE_TLBFLUSH_H
 #define _ASM_MICROBLAZE_TLBFLUSH_H
 
+#ifdef CONFIG_MMU
+
+#include <linux/sched.h>
+#include <linux/threads.h>
+#include <asm/processor.h>	/* For TASK_SIZE */
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+
+extern void _tlbie(unsigned long address);
+extern void _tlbia(void);
+
+#define __tlbia()	_tlbia()
+
+static inline void local_flush_tlb_all(void)
+	{ __tlbia(); }
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
+	{ __tlbia(); }
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
+				unsigned long vmaddr)
+	{ _tlbie(vmaddr); }
+static inline void local_flush_tlb_range(struct vm_area_struct *vma,
+		unsigned long start, unsigned long end)
+	{ __tlbia(); }
+
+#define flush_tlb_kernel_range(start, end)	do { } while (0)
+
+#define update_mmu_cache(vma, addr, pte)	do { } while (0)
+
+#define flush_tlb_all local_flush_tlb_all
+#define flush_tlb_mm local_flush_tlb_mm
+#define flush_tlb_page local_flush_tlb_page
+#define flush_tlb_range local_flush_tlb_range
+
+/*
+ * This is called in munmap when we have freed up some page-table
+ * pages.  We don't need to do anything here, there's nothing special
+ * about our page-table pages.  -- paulus
+ */
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+	unsigned long start, unsigned long end) { }
+
+#else /* CONFIG_MMU */
+
 #define flush_tlb()				BUG()
 #define flush_tlb_all()				BUG()
 #define flush_tlb_mm(mm)			BUG()
@@ -17,4 +63,6 @@
 #define flush_tlb_pgtables(mm, start, end)	BUG()
 #define flush_tlb_kernel_range(start, end)	BUG()
 
+#endif /* CONFIG_MMU */
+
 #endif /* _ASM_MICROBLAZE_TLBFLUSH_H */
diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h
index 5a3ffc3..65adad6 100644
--- a/arch/microblaze/include/asm/uaccess.h
+++ b/arch/microblaze/include/asm/uaccess.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -26,6 +28,10 @@
 #define VERIFY_READ	0
 #define VERIFY_WRITE	1
 
+#define __clear_user(addr, n)	(memset((void *)(addr), 0, (n)), 0)
+
+#ifndef CONFIG_MMU
+
 extern int ___range_ok(unsigned long addr, unsigned long size);
 
 #define __range_ok(addr, size) \
@@ -34,68 +40,68 @@
 #define access_ok(type, addr, size) (__range_ok((addr), (size)) == 0)
 #define __access_ok(add, size) (__range_ok((addr), (size)) == 0)
 
-extern inline int bad_user_access_length(void)
-{
-	return 0;
-}
+/* Undefined function to trigger linker error */
+extern int bad_user_access_length(void);
+
 /* FIXME this is function for optimalization -> memcpy */
-#define __get_user(var, ptr)					\
-	({							\
-		int __gu_err = 0;				\
-		switch (sizeof(*(ptr))) {			\
-		case 1:						\
-		case 2:						\
-		case 4:						\
-			(var) = *(ptr);				\
-			break;					\
-		case 8:						\
-			memcpy((void *) &(var), (ptr), 8);	\
-			break;					\
-		default:					\
-			(var) = 0;				\
-			__gu_err = __get_user_bad();		\
-			break;					\
-		}						\
-		__gu_err;					\
-	})
+#define __get_user(var, ptr)				\
+({							\
+	int __gu_err = 0;				\
+	switch (sizeof(*(ptr))) {			\
+	case 1:						\
+	case 2:						\
+	case 4:						\
+		(var) = *(ptr);				\
+		break;					\
+	case 8:						\
+		memcpy((void *) &(var), (ptr), 8);	\
+		break;					\
+	default:					\
+		(var) = 0;				\
+		__gu_err = __get_user_bad();		\
+		break;					\
+	}						\
+	__gu_err;					\
+})
 
 #define __get_user_bad()	(bad_user_access_length(), (-EFAULT))
 
+/* FIXME is not there defined __pu_val */
 #define __put_user(var, ptr)					\
-	({							\
-		int __pu_err = 0;				\
-		switch (sizeof(*(ptr))) {			\
-		case 1:						\
-		case 2:						\
-		case 4:						\
-			*(ptr) = (var);				\
-			break;					\
-		case 8: {					\
-			typeof(*(ptr)) __pu_val = var;		\
-			memcpy(ptr, &__pu_val, sizeof(__pu_val));\
-			}					\
-			break;					\
-		default:					\
-			__pu_err = __put_user_bad();		\
-			break;					\
-		}							\
-		__pu_err;						\
-	})
+({								\
+	int __pu_err = 0;					\
+	switch (sizeof(*(ptr))) {				\
+	case 1:							\
+	case 2:							\
+	case 4:							\
+		*(ptr) = (var);					\
+		break;						\
+	case 8: {						\
+		typeof(*(ptr)) __pu_val = (var);		\
+		memcpy(ptr, &__pu_val, sizeof(__pu_val));	\
+		}						\
+		break;						\
+	default:						\
+		__pu_err = __put_user_bad();			\
+		break;						\
+	}							\
+	__pu_err;						\
+})
 
 #define __put_user_bad()	(bad_user_access_length(), (-EFAULT))
 
-#define put_user(x, ptr)	__put_user(x, ptr)
-#define get_user(x, ptr)	__get_user(x, ptr)
+#define put_user(x, ptr)	__put_user((x), (ptr))
+#define get_user(x, ptr)	__get_user((x), (ptr))
 
-#define copy_to_user(to, from, n)		(memcpy(to, from, n), 0)
-#define copy_from_user(to, from, n)		(memcpy(to, from, n), 0)
+#define copy_to_user(to, from, n)	(memcpy((to), (from), (n)), 0)
+#define copy_from_user(to, from, n)	(memcpy((to), (from), (n)), 0)
 
-#define __copy_to_user(to, from, n)		(copy_to_user(to, from, n))
-#define __copy_from_user(to, from, n)		(copy_from_user(to, from, n))
-#define __copy_to_user_inatomic(to, from, n)	(__copy_to_user(to, from, n))
-#define __copy_from_user_inatomic(to, from, n)	(__copy_from_user(to, from, n))
-
-#define __clear_user(addr, n)	(memset((void *)addr, 0, n), 0)
+#define __copy_to_user(to, from, n)	(copy_to_user((to), (from), (n)))
+#define __copy_from_user(to, from, n)	(copy_from_user((to), (from), (n)))
+#define __copy_to_user_inatomic(to, from, n) \
+			(__copy_to_user((to), (from), (n)))
+#define __copy_from_user_inatomic(to, from, n) \
+			(__copy_from_user((to), (from), (n)))
 
 static inline unsigned long clear_user(void *addr, unsigned long size)
 {
@@ -104,13 +110,200 @@
 	return size;
 }
 
-/* Returns 0 if exception not found and fixup otherwise. */
+/* Returns 0 if exception not found and fixup otherwise.  */
 extern unsigned long search_exception_table(unsigned long);
 
+extern long strncpy_from_user(char *dst, const char *src, long count);
+extern long strnlen_user(const char *src, long count);
 
-extern long strncpy_from_user(char *dst, const char __user *src, long count);
-extern long strnlen_user(const char __user *src, long count);
-extern long __strncpy_from_user(char *dst, const char __user *src, long count);
+#else /* CONFIG_MMU */
+
+/*
+ * Address is valid if:
+ *  - "addr", "addr + size" and "size" are all below the limit
+ */
+#define access_ok(type, addr, size) \
+	(get_fs().seg > (((unsigned long)(addr)) | \
+		(size) | ((unsigned long)(addr) + (size))))
+
+/* || printk("access_ok failed for %s at 0x%08lx (size %d), seg 0x%08x\n",
+ type?"WRITE":"READ",addr,size,get_fs().seg)) */
+
+/*
+ * All the __XXX versions macros/functions below do not perform
+ * access checking. It is assumed that the necessary checks have been
+ * already performed before the finction (macro) is called.
+ */
+
+#define get_user(x, ptr)						\
+({									\
+	access_ok(VERIFY_READ, (ptr), sizeof(*(ptr)))			\
+		? __get_user((x), (ptr)) : -EFAULT;			\
+})
+
+#define put_user(x, ptr)						\
+({									\
+	access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr)))			\
+		? __put_user((x), (ptr)) : -EFAULT;			\
+})
+
+#define __get_user(x, ptr)						\
+({									\
+	unsigned long __gu_val;						\
+	/*unsigned long __gu_ptr = (unsigned long)(ptr);*/		\
+	long __gu_err;							\
+	switch (sizeof(*(ptr))) {					\
+	case 1:								\
+		__get_user_asm("lbu", (ptr), __gu_val, __gu_err);	\
+		break;							\
+	case 2:								\
+		__get_user_asm("lhu", (ptr), __gu_val, __gu_err);	\
+		break;							\
+	case 4:								\
+		__get_user_asm("lw", (ptr), __gu_val, __gu_err);	\
+		break;							\
+	default:							\
+		__gu_val = 0; __gu_err = -EINVAL;			\
+	}								\
+	x = (__typeof__(*(ptr))) __gu_val;				\
+	__gu_err;							\
+})
+
+#define __get_user_asm(insn, __gu_ptr, __gu_val, __gu_err)		\
+({									\
+	__asm__ __volatile__ (						\
+			"1:"	insn	" %1, %2, r0;			\
+				addk	%0, r0, r0;			\
+			2:						\
+			.section .fixup,\"ax\";				\
+			3:	brid	2b;				\
+				addik	%0, r0, %3;			\
+			.previous;					\
+			.section __ex_table,\"a\";			\
+			.word	1b,3b;					\
+			.previous;"					\
+		: "=r"(__gu_err), "=r"(__gu_val)			\
+		: "r"(__gu_ptr), "i"(-EFAULT)				\
+	);								\
+})
+
+#define __put_user(x, ptr)						\
+({									\
+	__typeof__(*(ptr)) __gu_val = x;				\
+	long __gu_err = 0;						\
+	switch (sizeof(__gu_val)) {					\
+	case 1:								\
+		__put_user_asm("sb", (ptr), __gu_val, __gu_err);	\
+		break;							\
+	case 2: 							\
+		__put_user_asm("sh", (ptr), __gu_val, __gu_err);	\
+		break;							\
+	case 4:								\
+		__put_user_asm("sw", (ptr), __gu_val, __gu_err);	\
+		break;							\
+	case 8:								\
+		__put_user_asm_8((ptr), __gu_val, __gu_err);		\
+		break;							\
+	default:							\
+		__gu_err = -EINVAL;					\
+	}								\
+	__gu_err;							\
+})
+
+#define __put_user_asm_8(__gu_ptr, __gu_val, __gu_err)	\
+({							\
+__asm__ __volatile__ ("	lwi	%0, %1, 0;		\
+		1:	swi	%0, %2, 0;		\
+			lwi	%0, %1, 4;		\
+		2:	swi	%0, %2, 4;		\
+			addk	%0,r0,r0;		\
+		3:					\
+		.section .fixup,\"ax\";			\
+		4:	brid	3b;			\
+			addik	%0, r0, %3;		\
+		.previous;				\
+		.section __ex_table,\"a\";		\
+		.word	1b,4b,2b,4b;			\
+		.previous;"				\
+	: "=&r"(__gu_err)				\
+	: "r"(&__gu_val),				\
+	"r"(__gu_ptr), "i"(-EFAULT)			\
+	);						\
+})
+
+#define __put_user_asm(insn, __gu_ptr, __gu_val, __gu_err)	\
+({								\
+	__asm__ __volatile__ (					\
+			"1:"	insn	" %1, %2, r0;		\
+				addk	%0, r0, r0;		\
+			2:					\
+			.section .fixup,\"ax\";			\
+			3:	brid	2b;			\
+				addik	%0, r0, %3;		\
+			.previous;				\
+			.section __ex_table,\"a\";		\
+			.word	1b,3b;				\
+			.previous;"				\
+		: "=r"(__gu_err)				\
+		: "r"(__gu_val), "r"(__gu_ptr), "i"(-EFAULT)	\
+	);							\
+})
+
+/*
+ * Return: number of not copied bytes, i.e. 0 if OK or non-zero if fail.
+ */
+static inline int clear_user(char *to, int size)
+{
+	if (size && access_ok(VERIFY_WRITE, to, size)) {
+		__asm__ __volatile__ ("				\
+				1:				\
+					sb	r0, %2, r0;	\
+					addik	%0, %0, -1;	\
+					bneid	%0, 1b;		\
+					addik	%2, %2, 1;	\
+				2:				\
+				.section __ex_table,\"a\";	\
+				.word	1b,2b;			\
+				.section .text;"		\
+			: "=r"(size)				\
+			: "0"(size), "r"(to)
+		);
+	}
+	return size;
+}
+
+extern unsigned long __copy_tofrom_user(void __user *to,
+		const void __user *from, unsigned long size);
+
+#define copy_to_user(to, from, n)					\
+	(access_ok(VERIFY_WRITE, (to), (n)) ?				\
+		__copy_tofrom_user((void __user *)(to),			\
+			(__force const void __user *)(from), (n))	\
+		: -EFAULT)
+
+#define __copy_to_user(to, from, n)	copy_to_user((to), (from), (n))
+#define __copy_to_user_inatomic(to, from, n)	copy_to_user((to), (from), (n))
+
+#define copy_from_user(to, from, n)					\
+	(access_ok(VERIFY_READ, (from), (n)) ?				\
+		__copy_tofrom_user((__force void __user *)(to),		\
+			(void __user *)(from), (n))			\
+		: -EFAULT)
+
+#define __copy_from_user(to, from, n)	copy_from_user((to), (from), (n))
+#define __copy_from_user_inatomic(to, from, n) \
+		copy_from_user((to), (from), (n))
+
+extern int __strncpy_user(char *to, const char __user *from, int len);
+extern int __strnlen_user(const char __user *sstr, int len);
+
+#define strncpy_from_user(to, from, len)	\
+		(access_ok(VERIFY_READ, from, 1) ?	\
+			__strncpy_user(to, from, len) : -EFAULT)
+#define strnlen_user(str, len)	\
+		(access_ok(VERIFY_READ, str, 1) ? __strnlen_user(str, len) : 0)
+
+#endif /* CONFIG_MMU */
 
 /*
  * The exception table consists of pairs of addresses: the first is the
diff --git a/arch/microblaze/include/asm/unaligned.h b/arch/microblaze/include/asm/unaligned.h
index 9d66b64..3658d91 100644
--- a/arch/microblaze/include/asm/unaligned.h
+++ b/arch/microblaze/include/asm/unaligned.h
@@ -12,7 +12,8 @@
 
 # ifdef __KERNEL__
 
-# include <linux/unaligned/access_ok.h>
+# include <linux/unaligned/be_struct.h>
+# include <linux/unaligned/le_byteshift.h>
 # include <linux/unaligned/generic.h>
 
 # define get_unaligned	__get_unaligned_be
diff --git a/arch/microblaze/kernel/Makefile b/arch/microblaze/kernel/Makefile
index da94bec..f4a5e19 100644
--- a/arch/microblaze/kernel/Makefile
+++ b/arch/microblaze/kernel/Makefile
@@ -15,5 +15,6 @@
 obj-$(CONFIG_SELFMOD)		+= selfmod.o
 obj-$(CONFIG_HEART_BEAT)	+= heartbeat.o
 obj-$(CONFIG_MODULES)		+= microblaze_ksyms.o module.o
+obj-$(CONFIG_MMU)		+= misc.o
 
 obj-y	+= entry$(MMUEXT).o
diff --git a/arch/microblaze/kernel/asm-offsets.c b/arch/microblaze/kernel/asm-offsets.c
index aabd9e9..7bc7b68 100644
--- a/arch/microblaze/kernel/asm-offsets.c
+++ b/arch/microblaze/kernel/asm-offsets.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2007-2009 Michal Simek <monstr@monstr.eu>
  * Copyright (C) 2007-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
@@ -68,16 +69,26 @@
 
 	/* struct task_struct */
 	DEFINE(TS_THREAD_INFO, offsetof(struct task_struct, stack));
+#ifdef CONFIG_MMU
+	DEFINE(TASK_STATE, offsetof(struct task_struct, state));
+	DEFINE(TASK_FLAGS, offsetof(struct task_struct, flags));
+	DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace));
+	DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked));
+	DEFINE(TASK_MM, offsetof(struct task_struct, mm));
+	DEFINE(TASK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
+	DEFINE(TASK_PID, offsetof(struct task_struct, pid));
+	DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
+	DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
+	BLANK();
+
+	DEFINE(PGDIR, offsetof(struct thread_struct, pgdir));
+	BLANK();
+#endif
 
 	/* struct thread_info */
 	DEFINE(TI_TASK, offsetof(struct thread_info, task));
-	DEFINE(TI_EXEC_DOMAIN, offsetof(struct thread_info, exec_domain));
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
-	DEFINE(TI_STATUS, offsetof(struct thread_info, status));
-	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
-	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
 	DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
-	DEFINE(TI_RESTART_BLOCK, offsetof(struct thread_info, restart_block));
 	DEFINE(TI_CPU_CONTEXT, offsetof(struct thread_info, cpu_context));
 	BLANK();
 
diff --git a/arch/microblaze/kernel/early_printk.c b/arch/microblaze/kernel/early_printk.c
index 4b0f0fd..7de8492 100644
--- a/arch/microblaze/kernel/early_printk.c
+++ b/arch/microblaze/kernel/early_printk.c
@@ -87,6 +87,9 @@
 	base_addr = early_uartlite_console();
 	if (base_addr) {
 		early_console_initialized = 1;
+#ifdef CONFIG_MMU
+		early_console_reg_tlb_alloc(base_addr);
+#endif
 		early_printk("early_printk_console is enabled at 0x%08x\n",
 							base_addr);
 
diff --git a/arch/microblaze/kernel/entry-nommu.S b/arch/microblaze/kernel/entry-nommu.S
index f24b126..1fce6b8 100644
--- a/arch/microblaze/kernel/entry-nommu.S
+++ b/arch/microblaze/kernel/entry-nommu.S
@@ -10,7 +10,7 @@
 
 #include <linux/linkage.h>
 #include <asm/thread_info.h>
-#include <asm/errno.h>
+#include <linux/errno.h>
 #include <asm/entry.h>
 #include <asm/asm-offsets.h>
 #include <asm/registers.h>
diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S
new file mode 100644
index 0000000..91a0e7b
--- /dev/null
+++ b/arch/microblaze/kernel/entry.S
@@ -0,0 +1,1116 @@
+/*
+ * Low-level system-call handling, trap handlers and context-switching
+ *
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
+ * Copyright (C) 2003		John Williams <jwilliams@itee.uq.edu.au>
+ * Copyright (C) 2001,2002	NEC Corporation
+ * Copyright (C) 2001,2002	Miles Bader <miles@gnu.org>
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file COPYING in the main directory of this
+ * archive for more details.
+ *
+ * Written by Miles Bader <miles@gnu.org>
+ * Heavily modified by John Williams for Microblaze
+ */
+
+#include <linux/sys.h>
+#include <linux/linkage.h>
+
+#include <asm/entry.h>
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/exceptions.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+
+#include <asm/page.h>
+#include <asm/unistd.h>
+
+#include <linux/errno.h>
+#include <asm/signal.h>
+
+/* The size of a state save frame. */
+#define STATE_SAVE_SIZE		(PT_SIZE + STATE_SAVE_ARG_SPACE)
+
+/* The offset of the struct pt_regs in a `state save frame' on the stack. */
+#define PTO	STATE_SAVE_ARG_SPACE /* 24 the space for args */
+
+#define C_ENTRY(name)	.globl name; .align 4; name
+
+/*
+ * Various ways of setting and clearing BIP in flags reg.
+ * This is mucky, but necessary using microblaze version that
+ * allows msr ops to write to BIP
+ */
+#if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
+	.macro	clear_bip
+	msrclr	r11, MSR_BIP
+	nop
+	.endm
+
+	.macro	set_bip
+	msrset	r11, MSR_BIP
+	nop
+	.endm
+
+	.macro	clear_eip
+	msrclr	r11, MSR_EIP
+	nop
+	.endm
+
+	.macro	set_ee
+	msrset	r11, MSR_EE
+	nop
+	.endm
+
+	.macro	disable_irq
+	msrclr	r11, MSR_IE
+	nop
+	.endm
+
+	.macro	enable_irq
+	msrset	r11, MSR_IE
+	nop
+	.endm
+
+	.macro	set_ums
+	msrset	r11, MSR_UMS
+	nop
+	msrclr	r11, MSR_VMS
+	nop
+	.endm
+
+	.macro	set_vms
+	msrclr	r11, MSR_UMS
+	nop
+	msrset	r11, MSR_VMS
+	nop
+	.endm
+
+	.macro	clear_vms_ums
+	msrclr	r11, MSR_VMS
+	nop
+	msrclr	r11, MSR_UMS
+	nop
+	.endm
+#else
+	.macro	clear_bip
+	mfs	r11, rmsr
+	nop
+	andi	r11, r11, ~MSR_BIP
+	mts	rmsr, r11
+	nop
+	.endm
+
+	.macro	set_bip
+	mfs	r11, rmsr
+	nop
+	ori	r11, r11, MSR_BIP
+	mts	rmsr, r11
+	nop
+	.endm
+
+	.macro	clear_eip
+	mfs	r11, rmsr
+	nop
+	andi	r11, r11, ~MSR_EIP
+	mts	rmsr, r11
+	nop
+	.endm
+
+	.macro	set_ee
+	mfs	r11, rmsr
+	nop
+	ori	r11, r11, MSR_EE
+	mts	rmsr, r11
+	nop
+	.endm
+
+	.macro	disable_irq
+	mfs	r11, rmsr
+	nop
+	andi	r11, r11, ~MSR_IE
+	mts	rmsr, r11
+	nop
+	.endm
+
+	.macro	enable_irq
+	mfs	r11, rmsr
+	nop
+	ori	r11, r11, MSR_IE
+	mts	rmsr, r11
+	nop
+	.endm
+
+	.macro set_ums
+	mfs	r11, rmsr
+	nop
+	ori	r11, r11, MSR_VMS
+	andni	r11, r11, MSR_UMS
+	mts	rmsr, r11
+	nop
+	.endm
+
+	.macro	set_vms
+	mfs	r11, rmsr
+	nop
+	ori	r11, r11, MSR_VMS
+	andni	r11, r11, MSR_UMS
+	mts	rmsr, r11
+	nop
+	.endm
+
+	.macro	clear_vms_ums
+	mfs	r11, rmsr
+	nop
+	andni	r11, r11, (MSR_VMS|MSR_UMS)
+	mts	rmsr,r11
+	nop
+	.endm
+#endif
+
+/* Define how to call high-level functions. With MMU, virtual mode must be
+ * enabled when calling the high-level function. Clobbers R11.
+ * VM_ON, VM_OFF, DO_JUMP_BIPCLR, DO_CALL
+ */
+
+/* turn on virtual protected mode save */
+#define VM_ON		\
+	set_ums;		\
+	rted	r0, 2f;	\
+2: nop;
+
+/* turn off virtual protected mode save and user mode save*/
+#define VM_OFF			\
+	clear_vms_ums;			\
+	rted	r0, TOPHYS(1f);	\
+1: nop;
+
+#define SAVE_REGS \
+	swi	r2, r1, PTO+PT_R2;	/* Save SDA */			\
+	swi	r5, r1, PTO+PT_R5;					\
+	swi	r6, r1, PTO+PT_R6;					\
+	swi	r7, r1, PTO+PT_R7;					\
+	swi	r8, r1, PTO+PT_R8;					\
+	swi	r9, r1, PTO+PT_R9;					\
+	swi	r10, r1, PTO+PT_R10;					\
+	swi	r11, r1, PTO+PT_R11;	/* save clobbered regs after rval */\
+	swi	r12, r1, PTO+PT_R12;					\
+	swi	r13, r1, PTO+PT_R13;	/* Save SDA2 */			\
+	swi	r14, r1, PTO+PT_PC;	/* PC, before IRQ/trap */	\
+	swi	r15, r1, PTO+PT_R15;	/* Save LP */			\
+	swi	r18, r1, PTO+PT_R18;	/* Save asm scratch reg */	\
+	swi	r19, r1, PTO+PT_R19;					\
+	swi	r20, r1, PTO+PT_R20;					\
+	swi	r21, r1, PTO+PT_R21;					\
+	swi	r22, r1, PTO+PT_R22;					\
+	swi	r23, r1, PTO+PT_R23;					\
+	swi	r24, r1, PTO+PT_R24;					\
+	swi	r25, r1, PTO+PT_R25;					\
+	swi	r26, r1, PTO+PT_R26;					\
+	swi	r27, r1, PTO+PT_R27;					\
+	swi	r28, r1, PTO+PT_R28;					\
+	swi	r29, r1, PTO+PT_R29;					\
+	swi	r30, r1, PTO+PT_R30;					\
+	swi	r31, r1, PTO+PT_R31;	/* Save current task reg */	\
+	mfs	r11, rmsr;		/* save MSR */			\
+	nop;								\
+	swi	r11, r1, PTO+PT_MSR;
+
+#define RESTORE_REGS \
+	lwi	r11, r1, PTO+PT_MSR;					\
+	mts	rmsr , r11;						\
+	nop;								\
+	lwi	r2, r1, PTO+PT_R2;	/* restore SDA */		\
+	lwi	r5, r1, PTO+PT_R5;					\
+	lwi	r6, r1, PTO+PT_R6;					\
+	lwi	r7, r1, PTO+PT_R7;					\
+	lwi	r8, r1, PTO+PT_R8;					\
+	lwi	r9, r1, PTO+PT_R9;					\
+	lwi	r10, r1, PTO+PT_R10;					\
+	lwi	r11, r1, PTO+PT_R11;	/* restore clobbered regs after rval */\
+	lwi	r12, r1, PTO+PT_R12;					\
+	lwi	r13, r1, PTO+PT_R13;	/* restore SDA2 */		\
+	lwi	r14, r1, PTO+PT_PC;	/* RESTORE_LINK PC, before IRQ/trap */\
+	lwi	r15, r1, PTO+PT_R15;	/* restore LP */		\
+	lwi	r18, r1, PTO+PT_R18;	/* restore asm scratch reg */	\
+	lwi	r19, r1, PTO+PT_R19;					\
+	lwi	r20, r1, PTO+PT_R20;					\
+	lwi	r21, r1, PTO+PT_R21;					\
+	lwi	r22, r1, PTO+PT_R22;					\
+	lwi	r23, r1, PTO+PT_R23;					\
+	lwi	r24, r1, PTO+PT_R24;					\
+	lwi	r25, r1, PTO+PT_R25;					\
+	lwi	r26, r1, PTO+PT_R26;					\
+	lwi	r27, r1, PTO+PT_R27;					\
+	lwi	r28, r1, PTO+PT_R28;					\
+	lwi	r29, r1, PTO+PT_R29;					\
+	lwi	r30, r1, PTO+PT_R30;					\
+	lwi	r31, r1, PTO+PT_R31;	/* Restore cur task reg */
+
+.text
+
+/*
+ * User trap.
+ *
+ * System calls are handled here.
+ *
+ * Syscall protocol:
+ * Syscall number in r12, args in r5-r10
+ * Return value in r3
+ *
+ * Trap entered via brki instruction, so BIP bit is set, and interrupts
+ * are masked. This is nice, means we don't have to CLI before state save
+ */
+C_ENTRY(_user_exception):
+	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */
+	addi	r14, r14, 4	/* return address is 4 byte after call */
+	swi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11);	/* Save r11 */
+
+	lwi	r11, r0, TOPHYS(PER_CPU(KM));/* See if already in kernel mode.*/
+	beqi	r11, 1f;		/* Jump ahead if coming from user */
+/* Kernel-mode state save. */
+	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP)); /* Reload kernel stack-ptr*/
+	tophys(r1,r11);
+	swi	r11, r1, (PT_R1-PT_SIZE); /* Save original SP. */
+	lwi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* restore r11 */
+
+	addik	r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack. */
+	SAVE_REGS
+
+	addi	r11, r0, 1; 		/* Was in kernel-mode. */
+	swi	r11, r1, PTO+PT_MODE; /* pt_regs -> kernel mode */
+	brid	2f;
+	nop;				/* Fill delay slot */
+
+/* User-mode state save.  */
+1:
+	lwi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11);	/* restore r11 */
+	lwi	r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */
+	tophys(r1,r1);
+	lwi	r1, r1, TS_THREAD_INFO;	/* get stack from task_struct */
+/* calculate kernel stack pointer from task struct 8k */
+	addik	r1, r1, THREAD_SIZE;
+	tophys(r1,r1);
+
+	addik	r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack.  */
+	SAVE_REGS
+
+	swi	r0, r1, PTO+PT_MODE;			/* Was in user-mode. */
+	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));
+	swi	r11, r1, PTO+PT_R1;		/* Store user SP.  */
+	addi	r11, r0, 1;
+	swi	r11, r0, TOPHYS(PER_CPU(KM));	/* Now we're in kernel-mode.  */
+2:	lwi	r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE));	/* get saved current */
+	/* Save away the syscall number.  */
+	swi	r12, r1, PTO+PT_R0;
+	tovirt(r1,r1)
+
+	la	r15, r0, ret_from_trap-8
+/* where the trap should return need -8 to adjust for rtsd r15, 8*/
+/* Jump to the appropriate function for the system call number in r12
+ * (r12 is not preserved), or return an error if r12 is not valid. The LP
+ * register should point to the location where
+ * the called function should return.  [note that MAKE_SYS_CALL uses label 1] */
+	/* See if the system call number is valid.  */
+	addi	r11, r12, -__NR_syscalls;
+	bgei	r11,1f;
+	/* Figure out which function to use for this system call.  */
+	/* Note Microblaze barrel shift is optional, so don't rely on it */
+	add	r12, r12, r12;			/* convert num -> ptr */
+	add	r12, r12, r12;
+
+	/* Trac syscalls and stored them to r0_ram */
+	lwi	r3, r12, 0x400 + TOPHYS(r0_ram)
+	addi	r3, r3, 1
+	swi	r3, r12, 0x400 + TOPHYS(r0_ram)
+
+	lwi	r12, r12, TOPHYS(sys_call_table); /* Function ptr */
+	/* Make the system call.  to r12*/
+	set_vms;
+	rtid	r12, 0;
+	nop;
+	/* The syscall number is invalid, return an error.  */
+1:	VM_ON;	/* RETURN() expects virtual mode*/
+	addi	r3, r0, -ENOSYS;
+	rtsd	r15,8;		/* looks like a normal subroutine return */
+	or 	r0, r0, r0
+
+
+/* Entry point used to return from a syscall/trap.  */
+/* We re-enable BIP bit before state restore */
+C_ENTRY(ret_from_trap):
+	set_bip;			/*  Ints masked for state restore*/
+	lwi	r11, r1, PTO+PT_MODE;
+/* See if returning to kernel mode, if so, skip resched &c.  */
+	bnei	r11, 2f;
+
+	/* We're returning to user mode, so check for various conditions that
+	 * trigger rescheduling. */
+	/* Get current task ptr into r11 */
+	add	r11, r0, CURRENT_TASK;	/* Get current task ptr into r11 */
+	lwi	r11, r11, TS_THREAD_INFO;	/* get thread info */
+	lwi	r11, r11, TI_FLAGS;		/* get flags in thread info */
+	andi	r11, r11, _TIF_NEED_RESCHED;
+	beqi	r11, 5f;
+
+	swi	r3, r1, PTO + PT_R3; /* store syscall result */
+	swi	r4, r1, PTO + PT_R4;
+	bralid	r15, schedule;	/* Call scheduler */
+	nop;				/* delay slot */
+	lwi	r3, r1, PTO + PT_R3; /* restore syscall result */
+	lwi	r4, r1, PTO + PT_R4;
+
+	/* Maybe handle a signal */
+5:	add	r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+	lwi	r11, r11, TS_THREAD_INFO;	/* get thread info */
+	lwi	r11, r11, TI_FLAGS;	/* get flags in thread info */
+	andi	r11, r11, _TIF_SIGPENDING;
+	beqi	r11, 1f;		/* Signals to handle, handle them */
+
+	swi	r3, r1, PTO + PT_R3; /* store syscall result */
+	swi	r4, r1, PTO + PT_R4;
+	la	r5, r1, PTO;		/* Arg 1: struct pt_regs *regs */
+	add	r6, r0, r0;		/* Arg 2: sigset_t *oldset */
+	addi	r7, r0, 1;		/* Arg 3: int in_syscall */
+	bralid	r15, do_signal;	/* Handle any signals */
+	nop;
+	lwi	r3, r1, PTO + PT_R3; /* restore syscall result */
+	lwi	r4, r1, PTO + PT_R4;
+
+/* Finally, return to user state.  */
+1:	swi	r0, r0, PER_CPU(KM);	/* Now officially in user state. */
+	add	r11, r0, CURRENT_TASK;	/* Get current task ptr into r11 */
+	swi	r11, r0, PER_CPU(CURRENT_SAVE); /* save current */
+	VM_OFF;
+	tophys(r1,r1);
+	RESTORE_REGS;
+	addik	r1, r1, STATE_SAVE_SIZE		/* Clean up stack space.  */
+	lwi	r1, r1, PT_R1 - PT_SIZE;/* Restore user stack pointer. */
+	bri	6f;
+
+/* Return to kernel state.  */
+2:	VM_OFF;
+	tophys(r1,r1);
+	RESTORE_REGS;
+	addik	r1, r1, STATE_SAVE_SIZE		/* Clean up stack space.  */
+	tovirt(r1,r1);
+6:
+TRAP_return:		/* Make global symbol for debugging */
+	rtbd	r14, 0;	/* Instructions to return from an IRQ */
+	nop;
+
+
+/* These syscalls need access to the struct pt_regs on the stack, so we
+   implement them in assembly (they're basically all wrappers anyway).  */
+
+C_ENTRY(sys_fork_wrapper):
+	addi	r5, r0, SIGCHLD			/* Arg 0: flags */
+	lwi	r6, r1, PTO+PT_R1	/* Arg 1: child SP (use parent's) */
+	la	r7, r1, PTO			/* Arg 2: parent context */
+	add	r8. r0, r0			/* Arg 3: (unused) */
+	add	r9, r0, r0;			/* Arg 4: (unused) */
+	add	r10, r0, r0;			/* Arg 5: (unused) */
+	brid	do_fork		/* Do real work (tail-call) */
+	nop;
+
+/* This the initial entry point for a new child thread, with an appropriate
+   stack in place that makes it look the the child is in the middle of an
+   syscall.  This function is actually `returned to' from switch_thread
+   (copy_thread makes ret_from_fork the return address in each new thread's
+   saved context).  */
+C_ENTRY(ret_from_fork):
+	bralid	r15, schedule_tail; /* ...which is schedule_tail's arg */
+	add	r3, r5, r0;	/* switch_thread returns the prev task */
+				/* ( in the delay slot ) */
+	add	r3, r0, r0;	/* Child's fork call should return 0. */
+	brid	ret_from_trap;	/* Do normal trap return */
+	nop;
+
+C_ENTRY(sys_vfork_wrapper):
+	la	r5, r1, PTO
+	brid	sys_vfork	/* Do real work (tail-call) */
+	nop
+
+C_ENTRY(sys_clone_wrapper):
+	bnei	r6, 1f;			/* See if child SP arg (arg 1) is 0. */
+	lwi	r6, r1, PTO+PT_R1;	/* If so, use paret's stack ptr */
+1:	la	r7, r1, PTO;			/* Arg 2: parent context */
+	add	r8, r0, r0;			/* Arg 3: (unused) */
+	add	r9, r0, r0;			/* Arg 4: (unused) */
+	add	r10, r0, r0;			/* Arg 5: (unused) */
+	brid	do_fork		/* Do real work (tail-call) */
+	nop;
+
+C_ENTRY(sys_execve_wrapper):
+	la	r8, r1, PTO;		/* add user context as 4th arg */
+	brid	sys_execve;	/* Do real work (tail-call).*/
+	nop;
+
+C_ENTRY(sys_sigsuspend_wrapper):
+	swi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	swi	r4, r1, PTO+PT_R4;
+	la	r6, r1, PTO;		/* add user context as 2nd arg */
+	bralid	r15, sys_sigsuspend; /* Do real work.*/
+	nop;
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+	bri ret_from_trap /* fall through will not work here due to align */
+	nop;
+
+C_ENTRY(sys_rt_sigsuspend_wrapper):
+	swi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	swi	r4, r1, PTO+PT_R4;
+	la	r7, r1, PTO;		/* add user context as 3rd arg */
+	brlid	r15, sys_rt_sigsuspend;	/* Do real work.*/
+	nop;
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+	bri ret_from_trap /* fall through will not work here due to align */
+	nop;
+
+
+C_ENTRY(sys_sigreturn_wrapper):
+	swi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	swi	r4, r1, PTO+PT_R4;
+	la	r5, r1, PTO;		/* add user context as 1st arg */
+	brlid	r15, sys_sigreturn;	/* Do real work.*/
+	nop;
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+	bri ret_from_trap /* fall through will not work here due to align */
+	nop;
+
+C_ENTRY(sys_rt_sigreturn_wrapper):
+	swi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	swi	r4, r1, PTO+PT_R4;
+	la	r5, r1, PTO;		/* add user context as 1st arg */
+	brlid	r15, sys_rt_sigreturn	/* Do real work */
+	nop;
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+	bri ret_from_trap /* fall through will not work here due to align */
+	nop;
+
+/*
+ * HW EXCEPTION rutine start
+ */
+
+#define SAVE_STATE	\
+	swi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* Save r11 */	\
+	set_bip;	/*equalize initial state for all possible entries*/\
+	clear_eip;							\
+	enable_irq;							\
+	set_ee;								\
+	/* See if already in kernel mode.*/				\
+	lwi	r11, r0, TOPHYS(PER_CPU(KM));				\
+	beqi	r11, 1f;		/* Jump ahead if coming from user */\
+	/* Kernel-mode state save.  */					\
+	/* Reload kernel stack-ptr. */					\
+	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));			\
+	tophys(r1,r11);							\
+	swi	r11, r1, (PT_R1-PT_SIZE); /* Save original SP. */	\
+	lwi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* restore r11 */\
+	addik	r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack.  */\
+	/* store return registers separately because			\
+	 * this macros is use for others exceptions */			\
+	swi	r3, r1, PTO + PT_R3;					\
+	swi	r4, r1, PTO + PT_R4;					\
+	SAVE_REGS							\
+	/* PC, before IRQ/trap - this is one instruction above */	\
+	swi	r17, r1, PTO+PT_PC;					\
+									\
+	addi	r11, r0, 1; 		/* Was in kernel-mode.  */	\
+	swi	r11, r1, PTO+PT_MODE; 	 				\
+	brid	2f;							\
+	nop;				/* Fill delay slot */		\
+1:	/* User-mode state save.  */					\
+	lwi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* restore r11 */\
+	lwi	r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */\
+	tophys(r1,r1);							\
+	lwi	r1, r1, TS_THREAD_INFO;	/* get the thread info */	\
+	addik	r1, r1, THREAD_SIZE;	/* calculate kernel stack pointer */\
+	tophys(r1,r1);							\
+									\
+	addik	r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack.  */\
+	/* store return registers separately because this macros	\
+	 * is use for others exceptions */				\
+	swi	r3, r1, PTO + PT_R3; 					\
+	swi	r4, r1, PTO + PT_R4;					\
+	SAVE_REGS							\
+	/* PC, before IRQ/trap - this is one instruction above FIXME*/	\
+	swi	r17, r1, PTO+PT_PC;					\
+									\
+	swi	r0, r1, PTO+PT_MODE; /* Was in user-mode.  */		\
+	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));			\
+	swi	r11, r1, PTO+PT_R1; /* Store user SP.  */		\
+	addi	r11, r0, 1;						\
+	swi	r11, r0, TOPHYS(PER_CPU(KM)); /* Now we're in kernel-mode.*/\
+2:	lwi	r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */\
+	/* Save away the syscall number.  */				\
+	swi	r0, r1, PTO+PT_R0;					\
+	tovirt(r1,r1)
+
+C_ENTRY(full_exception_trap):
+	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */
+	/* adjust exception address for privileged instruction
+	 * for finding where is it */
+	addik	r17, r17, -4
+	SAVE_STATE /* Save registers */
+	/* FIXME this can be store directly in PT_ESR reg.
+	 * I tested it but there is a fault */
+	/* where the trap should return need -8 to adjust for rtsd r15, 8 */
+	la	r15, r0, ret_from_exc - 8
+	la	r5, r1, PTO		 /* parameter struct pt_regs * regs */
+	mfs	r6, resr
+	nop
+	mfs	r7, rfsr;		/* save FSR */
+	nop
+	la	r12, r0, full_exception
+	set_vms;
+	rtbd	r12, 0;
+	nop;
+
+/*
+ * Unaligned data trap.
+ *
+ * Unaligned data trap last on 4k page is handled here.
+ *
+ * Trap entered via exception, so EE bit is set, and interrupts
+ * are masked.  This is nice, means we don't have to CLI before state save
+ *
+ * The assembler routine is in "arch/microblaze/kernel/hw_exception_handler.S"
+ */
+C_ENTRY(unaligned_data_trap):
+	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */
+	SAVE_STATE		/* Save registers.*/
+	/* where the trap should return need -8 to adjust for rtsd r15, 8 */
+	la	r15, r0, ret_from_exc-8
+	mfs	r3, resr		/* ESR */
+	nop
+	mfs	r4, rear		/* EAR */
+	nop
+	la	r7, r1, PTO		/* parameter struct pt_regs * regs */
+	la	r12, r0, _unaligned_data_exception
+	set_vms;
+	rtbd	r12, 0;	/* interrupts enabled */
+	nop;
+
+/*
+ * Page fault traps.
+ *
+ * If the real exception handler (from hw_exception_handler.S) didn't find
+ * the mapping for the process, then we're thrown here to handle such situation.
+ *
+ * Trap entered via exceptions, so EE bit is set, and interrupts
+ * are masked.  This is nice, means we don't have to CLI before state save
+ *
+ * Build a standard exception frame for TLB Access errors.  All TLB exceptions
+ * will bail out to this point if they can't resolve the lightweight TLB fault.
+ *
+ * The C function called is in "arch/microblaze/mm/fault.c", declared as:
+ * void do_page_fault(struct pt_regs *regs,
+ *				unsigned long address,
+ *				unsigned long error_code)
+ */
+/* data and intruction trap - which is choose is resolved int fault.c */
+C_ENTRY(page_fault_data_trap):
+	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */
+	SAVE_STATE		/* Save registers.*/
+	/* where the trap should return need -8 to adjust for rtsd r15, 8 */
+	la	r15, r0, ret_from_exc-8
+	la	r5, r1, PTO		/* parameter struct pt_regs * regs */
+	mfs	r6, rear		/* parameter unsigned long address */
+	nop
+	mfs	r7, resr		/* parameter unsigned long error_code */
+	nop
+	la	r12, r0, do_page_fault
+	set_vms;
+	rtbd	r12, 0;	/* interrupts enabled */
+	nop;
+
+C_ENTRY(page_fault_instr_trap):
+	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */
+	SAVE_STATE		/* Save registers.*/
+	/* where the trap should return need -8 to adjust for rtsd r15, 8 */
+	la	r15, r0, ret_from_exc-8
+	la	r5, r1, PTO		/* parameter struct pt_regs * regs */
+	mfs	r6, rear		/* parameter unsigned long address */
+	nop
+	ori	r7, r0, 0		/* parameter unsigned long error_code */
+	la	r12, r0, do_page_fault
+	set_vms;
+	rtbd	r12, 0;	/* interrupts enabled */
+	nop;
+
+/* Entry point used to return from an exception.  */
+C_ENTRY(ret_from_exc):
+	set_bip;			/*  Ints masked for state restore*/
+	lwi	r11, r1, PTO+PT_MODE;
+	bnei	r11, 2f;		/* See if returning to kernel mode, */
+					/* ... if so, skip resched &c.  */
+
+	/* We're returning to user mode, so check for various conditions that
+	   trigger rescheduling. */
+	/* Get current task ptr into r11 */
+	add	r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+	lwi	r11, r11, TS_THREAD_INFO;	/* get thread info */
+	lwi	r11, r11, TI_FLAGS;	/* get flags in thread info */
+	andi	r11, r11, _TIF_NEED_RESCHED;
+	beqi	r11, 5f;
+
+/* Call the scheduler before returning from a syscall/trap. */
+	bralid	r15, schedule;	/* Call scheduler */
+	nop;				/* delay slot */
+
+	/* Maybe handle a signal */
+5:	add	r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+	lwi	r11, r11, TS_THREAD_INFO;	/* get thread info */
+	lwi	r11, r11, TI_FLAGS;	/* get flags in thread info */
+	andi	r11, r11, _TIF_SIGPENDING;
+	beqi	r11, 1f;		/* Signals to handle, handle them */
+
+	/*
+	 * Handle a signal return; Pending signals should be in r18.
+	 *
+	 * Not all registers are saved by the normal trap/interrupt entry
+	 * points (for instance, call-saved registers (because the normal
+	 * C-compiler calling sequence in the kernel makes sure they're
+	 * preserved), and call-clobbered registers in the case of
+	 * traps), but signal handlers may want to examine or change the
+	 * complete register state.  Here we save anything not saved by
+	 * the normal entry sequence, so that it may be safely restored
+	 * (in a possibly modified form) after do_signal returns.
+	 * store return registers separately because this macros is use
+	 * for others exceptions */
+	swi	r3, r1, PTO + PT_R3;
+	swi	r4, r1, PTO + PT_R4;
+	la	r5, r1, PTO;		/* Arg 1: struct pt_regs *regs */
+	add	r6, r0, r0;		/* Arg 2: sigset_t *oldset */
+	addi	r7, r0, 0;		/* Arg 3: int in_syscall */
+	bralid	r15, do_signal;	/* Handle any signals */
+	nop;
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+
+/* Finally, return to user state.  */
+1:	swi	r0, r0, PER_CPU(KM);	/* Now officially in user state. */
+	add	r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+	swi	r11, r0, PER_CPU(CURRENT_SAVE); /* save current */
+	VM_OFF;
+	tophys(r1,r1);
+
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+	RESTORE_REGS;
+	addik	r1, r1, STATE_SAVE_SIZE		/* Clean up stack space.  */
+
+	lwi	r1, r1, PT_R1 - PT_SIZE; /* Restore user stack pointer. */
+	bri	6f;
+/* Return to kernel state.  */
+2:	VM_OFF;
+	tophys(r1,r1);
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+	RESTORE_REGS;
+	addik	r1, r1, STATE_SAVE_SIZE		/* Clean up stack space.  */
+
+	tovirt(r1,r1);
+6:
+EXC_return:		/* Make global symbol for debugging */
+	rtbd	r14, 0;	/* Instructions to return from an IRQ */
+	nop;
+
+/*
+ * HW EXCEPTION rutine end
+ */
+
+/*
+ * Hardware maskable interrupts.
+ *
+ * The stack-pointer (r1) should have already been saved to the memory
+ * location PER_CPU(ENTRY_SP).
+ */
+C_ENTRY(_interrupt):
+/* MS: we are in physical address */
+/* Save registers, switch to proper stack, convert SP to virtual.*/
+	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP))
+	swi	r11, r0, TOPHYS(PER_CPU(R11_SAVE));
+	/* MS: See if already in kernel mode. */
+	lwi	r11, r0, TOPHYS(PER_CPU(KM));
+	beqi	r11, 1f; /* MS: Jump ahead if coming from user */
+
+/* Kernel-mode state save. */
+	or	r11, r1, r0
+	tophys(r1,r11); /* MS: I have in r1 physical address where stack is */
+/* MS: Save original SP - position PT_R1 to next stack frame 4 *1 - 152*/
+	swi	r11, r1, (PT_R1 - PT_SIZE);
+/* MS: restore r11 because of saving in SAVE_REGS */
+	lwi	r11, r0, TOPHYS(PER_CPU(R11_SAVE));
+	/* save registers */
+/* MS: Make room on the stack -> activation record */
+	addik	r1, r1, -STATE_SAVE_SIZE;
+/* MS: store return registers separately because
+ * this macros is use for others exceptions */
+	swi	r3, r1, PTO + PT_R3;
+	swi	r4, r1, PTO + PT_R4;
+	SAVE_REGS
+	/* MS: store mode */
+	addi	r11, r0, 1; /* MS: Was in kernel-mode. */
+	swi	r11, r1, PTO + PT_MODE; /* MS: and save it */
+	brid	2f;
+	nop; /* MS: Fill delay slot */
+
+1:
+/* User-mode state save. */
+/* MS: restore r11 -> FIXME move before SAVE_REG */
+	lwi	r11, r0, TOPHYS(PER_CPU(R11_SAVE));
+ /* MS: get the saved current */
+	lwi	r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE));
+	tophys(r1,r1);
+	lwi	r1, r1, TS_THREAD_INFO;
+	addik	r1, r1, THREAD_SIZE;
+	tophys(r1,r1);
+	/* save registers */
+	addik	r1, r1, -STATE_SAVE_SIZE;
+	swi	r3, r1, PTO+PT_R3;
+	swi	r4, r1, PTO+PT_R4;
+	SAVE_REGS
+	/* calculate mode */
+	swi	r0, r1, PTO + PT_MODE;
+	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));
+	swi	r11, r1, PTO+PT_R1;
+	/* setup kernel mode to KM */
+	addi	r11, r0, 1;
+	swi	r11, r0, TOPHYS(PER_CPU(KM));
+
+2:
+	lwi	r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE));
+	swi	r0, r1, PTO + PT_R0;
+	tovirt(r1,r1)
+	la	r5, r1, PTO;
+	set_vms;
+	la	r11, r0, do_IRQ;
+	la	r15, r0, irq_call;
+irq_call:rtbd	r11, 0;
+	nop;
+
+/* MS: we are in virtual mode */
+ret_from_irq:
+	lwi	r11, r1, PTO + PT_MODE;
+	bnei	r11, 2f;
+
+	add	r11, r0, CURRENT_TASK;
+	lwi	r11, r11, TS_THREAD_INFO;
+	lwi	r11, r11, TI_FLAGS; /* MS: get flags from thread info */
+	andi	r11, r11, _TIF_NEED_RESCHED;
+	beqi	r11, 5f
+	bralid	r15, schedule;
+	nop; /* delay slot */
+
+    /* Maybe handle a signal */
+5:	add	r11, r0, CURRENT_TASK;
+	lwi	r11, r11, TS_THREAD_INFO; /* MS: get thread info */
+	lwi	r11, r11, TI_FLAGS; /* get flags in thread info */
+	andi	r11, r11, _TIF_SIGPENDING;
+	beqid	r11, no_intr_resched
+/* Handle a signal return; Pending signals should be in r18. */
+	addi	r7, r0, 0; /* Arg 3: int in_syscall */
+	la	r5, r1, PTO; /* Arg 1: struct pt_regs *regs */
+	bralid	r15, do_signal;	/* Handle any signals */
+	add	r6, r0, r0; /* Arg 2: sigset_t *oldset */
+
+/* Finally, return to user state. */
+no_intr_resched:
+    /* Disable interrupts, we are now committed to the state restore */
+	disable_irq
+	swi	r0, r0, PER_CPU(KM); /* MS: Now officially in user state. */
+	add	r11, r0, CURRENT_TASK;
+	swi	r11, r0, PER_CPU(CURRENT_SAVE);
+	VM_OFF;
+	tophys(r1,r1);
+	lwi	r3, r1, PTO + PT_R3; /* MS: restore saved r3, r4 registers */
+	lwi	r4, r1, PTO + PT_R4;
+	RESTORE_REGS
+	addik	r1, r1, STATE_SAVE_SIZE /* MS: Clean up stack space. */
+	lwi	r1, r1, PT_R1 - PT_SIZE;
+	bri	6f;
+/* MS: Return to kernel state. */
+2:	VM_OFF /* MS: turn off MMU */
+	tophys(r1,r1)
+	lwi	r3, r1, PTO + PT_R3; /* MS: restore saved r3, r4 registers */
+	lwi	r4, r1, PTO + PT_R4;
+	RESTORE_REGS
+	addik	r1, r1, STATE_SAVE_SIZE	/* MS: Clean up stack space. */
+	tovirt(r1,r1);
+6:
+IRQ_return: /* MS: Make global symbol for debugging */
+	rtid	r14, 0
+	nop
+
+/*
+ * `Debug' trap
+ *  We enter dbtrap in "BIP" (breakpoint) mode.
+ *  So we exit the breakpoint mode with an 'rtbd' and proceed with the
+ *  original dbtrap.
+ *  however, wait to save state first
+ */
+C_ENTRY(_debug_exception):
+	/* BIP bit is set on entry, no interrupts can occur */
+	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP))
+
+	swi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* Save r11 */
+	set_bip;	/*equalize initial state for all possible entries*/
+	clear_eip;
+	enable_irq;
+	lwi	r11, r0, TOPHYS(PER_CPU(KM));/* See if already in kernel mode.*/
+	beqi	r11, 1f;		/* Jump ahead if coming from user */
+	/* Kernel-mode state save.  */
+	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP)); /* Reload kernel stack-ptr*/
+	tophys(r1,r11);
+	swi	r11, r1, (PT_R1-PT_SIZE); /* Save original SP. */
+	lwi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* restore r11 */
+
+	addik	r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack.  */
+	swi	r3, r1, PTO + PT_R3;
+	swi	r4, r1, PTO + PT_R4;
+	SAVE_REGS;
+
+	addi	r11, r0, 1; 		/* Was in kernel-mode.  */
+	swi	r11, r1, PTO + PT_MODE;
+	brid	2f;
+	nop;				/* Fill delay slot */
+1:      /* User-mode state save.  */
+	lwi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* restore r11 */
+	lwi	r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */
+	tophys(r1,r1);
+	lwi	r1, r1, TS_THREAD_INFO;	/* get the thread info */
+	addik	r1, r1, THREAD_SIZE;	/* calculate kernel stack pointer */
+	tophys(r1,r1);
+
+	addik	r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack.  */
+	swi	r3, r1, PTO + PT_R3;
+	swi	r4, r1, PTO + PT_R4;
+	SAVE_REGS;
+
+	swi	r0, r1, PTO+PT_MODE; /* Was in user-mode.  */
+	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));
+	swi	r11, r1, PTO+PT_R1; /* Store user SP.  */
+	addi	r11, r0, 1;
+	swi	r11, r0, TOPHYS(PER_CPU(KM));	/* Now we're in kernel-mode.  */
+2:	lwi	r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */
+	/* Save away the syscall number.  */
+	swi	r0, r1, PTO+PT_R0;
+	tovirt(r1,r1)
+
+	addi	r5, r0, SIGTRAP		     /* send the trap signal */
+	add	r6, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+	addk	r7, r0, r0		     /* 3rd param zero */
+
+	set_vms;
+	la	r11, r0, send_sig;
+	la	r15, r0, dbtrap_call;
+dbtrap_call:	rtbd	r11, 0;
+	nop;
+
+	set_bip;			/*  Ints masked for state restore*/
+	lwi	r11, r1, PTO+PT_MODE;
+	bnei	r11, 2f;
+
+	/* Get current task ptr into r11 */
+	add	r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+	lwi	r11, r11, TS_THREAD_INFO;	/* get thread info */
+	lwi	r11, r11, TI_FLAGS;	/* get flags in thread info */
+	andi	r11, r11, _TIF_NEED_RESCHED;
+	beqi	r11, 5f;
+
+/* Call the scheduler before returning from a syscall/trap. */
+
+	bralid	r15, schedule;	/* Call scheduler */
+	nop;				/* delay slot */
+	/* XXX Is PT_DTRACE handling needed here? */
+	/* XXX m68knommu also checks TASK_STATE & TASK_COUNTER here.  */
+
+	/* Maybe handle a signal */
+5:	add	r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+	lwi	r11, r11, TS_THREAD_INFO;	/* get thread info */
+	lwi	r11, r11, TI_FLAGS;	/* get flags in thread info */
+	andi	r11, r11, _TIF_SIGPENDING;
+	beqi	r11, 1f;		/* Signals to handle, handle them */
+
+/* Handle a signal return; Pending signals should be in r18.  */
+	/* Not all registers are saved by the normal trap/interrupt entry
+	   points (for instance, call-saved registers (because the normal
+	   C-compiler calling sequence in the kernel makes sure they're
+	   preserved), and call-clobbered registers in the case of
+	   traps), but signal handlers may want to examine or change the
+	   complete register state.  Here we save anything not saved by
+	   the normal entry sequence, so that it may be safely restored
+	   (in a possibly modified form) after do_signal returns.  */
+
+	la	r5, r1, PTO;		/* Arg 1: struct pt_regs *regs */
+	add	r6, r0, r0;		/* Arg 2: sigset_t *oldset */
+	addi  r7, r0, 0;	/* Arg 3: int in_syscall */
+	bralid	r15, do_signal;	/* Handle any signals */
+	nop;
+
+
+/* Finally, return to user state.  */
+1:	swi	r0, r0, PER_CPU(KM);	/* Now officially in user state. */
+	add	r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+	swi	r11, r0, PER_CPU(CURRENT_SAVE); /* save current */
+	VM_OFF;
+	tophys(r1,r1);
+
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+	RESTORE_REGS
+	addik	r1, r1, STATE_SAVE_SIZE		/* Clean up stack space.  */
+
+
+	lwi	r1, r1, PT_R1 - PT_SIZE;
+					/* Restore user stack pointer. */
+	bri	6f;
+
+/* Return to kernel state.  */
+2:	VM_OFF;
+	tophys(r1,r1);
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+	RESTORE_REGS
+	addik	r1, r1, STATE_SAVE_SIZE		/* Clean up stack space.  */
+
+	tovirt(r1,r1);
+6:
+DBTRAP_return:		/* Make global symbol for debugging */
+	rtbd	r14, 0;	/* Instructions to return from an IRQ */
+	nop;
+
+
+
+ENTRY(_switch_to)
+	/* prepare return value */
+	addk	r3, r0, r31
+
+	/* save registers in cpu_context */
+	/* use r11 and r12, volatile registers, as temp register */
+	/* give start of cpu_context for previous process */
+	addik	r11, r5, TI_CPU_CONTEXT
+	swi	r1, r11, CC_R1
+	swi	r2, r11, CC_R2
+	/* skip volatile registers.
+	 * they are saved on stack when we jumped to _switch_to() */
+	/* dedicated registers */
+	swi	r13, r11, CC_R13
+	swi	r14, r11, CC_R14
+	swi	r15, r11, CC_R15
+	swi	r16, r11, CC_R16
+	swi	r17, r11, CC_R17
+	swi	r18, r11, CC_R18
+	/* save non-volatile registers */
+	swi	r19, r11, CC_R19
+	swi	r20, r11, CC_R20
+	swi	r21, r11, CC_R21
+	swi	r22, r11, CC_R22
+	swi	r23, r11, CC_R23
+	swi	r24, r11, CC_R24
+	swi	r25, r11, CC_R25
+	swi	r26, r11, CC_R26
+	swi	r27, r11, CC_R27
+	swi	r28, r11, CC_R28
+	swi	r29, r11, CC_R29
+	swi	r30, r11, CC_R30
+	/* special purpose registers */
+	mfs	r12, rmsr
+	nop
+	swi	r12, r11, CC_MSR
+	mfs	r12, rear
+	nop
+	swi	r12, r11, CC_EAR
+	mfs	r12, resr
+	nop
+	swi	r12, r11, CC_ESR
+	mfs	r12, rfsr
+	nop
+	swi	r12, r11, CC_FSR
+
+	/* update r31, the current */
+	lwi	r31, r6, TI_TASK/* give me pointer to task which will be next */
+	/* stored it to current_save too */
+	swi	r31, r0, PER_CPU(CURRENT_SAVE)
+
+	/* get new process' cpu context and restore */
+	/* give me start where start context of next task */
+	addik	r11, r6, TI_CPU_CONTEXT
+
+	/* non-volatile registers */
+	lwi	r30, r11, CC_R30
+	lwi	r29, r11, CC_R29
+	lwi	r28, r11, CC_R28
+	lwi	r27, r11, CC_R27
+	lwi	r26, r11, CC_R26
+	lwi	r25, r11, CC_R25
+	lwi	r24, r11, CC_R24
+	lwi	r23, r11, CC_R23
+	lwi	r22, r11, CC_R22
+	lwi	r21, r11, CC_R21
+	lwi	r20, r11, CC_R20
+	lwi	r19, r11, CC_R19
+	/* dedicated registers */
+	lwi	r18, r11, CC_R18
+	lwi	r17, r11, CC_R17
+	lwi	r16, r11, CC_R16
+	lwi	r15, r11, CC_R15
+	lwi	r14, r11, CC_R14
+	lwi	r13, r11, CC_R13
+	/* skip volatile registers */
+	lwi	r2, r11, CC_R2
+	lwi	r1, r11, CC_R1
+
+	/* special purpose registers */
+	lwi	r12, r11, CC_FSR
+	mts	rfsr, r12
+	nop
+	lwi	r12, r11, CC_MSR
+	mts	rmsr, r12
+	nop
+
+	rtsd	r15, 8
+	nop
+
+ENTRY(_reset)
+	brai	0x70; /* Jump back to FS-boot */
+
+ENTRY(_break)
+	mfs	r5, rmsr
+	nop
+	swi	r5, r0, 0x250 + TOPHYS(r0_ram)
+	mfs	r5, resr
+	nop
+	swi	r5, r0, 0x254 + TOPHYS(r0_ram)
+	bri	0
+
+	/* These are compiled and loaded into high memory, then
+	 * copied into place in mach_early_setup */
+	.section	.init.ivt, "ax"
+	.org	0x0
+	/* this is very important - here is the reset vector */
+	/* in current MMU branch you don't care what is here - it is
+	 * used from bootloader site - but this is correct for FS-BOOT */
+	brai	0x70
+	nop
+	brai	TOPHYS(_user_exception); /* syscall handler */
+	brai	TOPHYS(_interrupt);	/* Interrupt handler */
+	brai	TOPHYS(_break);		/* nmi trap handler */
+	brai	TOPHYS(_hw_exception_handler);	/* HW exception handler */
+
+	.org	0x60
+	brai	TOPHYS(_debug_exception);	/* debug trap handler*/
+
+.section .rodata,"a"
+#include "syscall_table.S"
+
+syscall_table_size=(.-sys_call_table)
+
diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c
index 4a8a406..0cb64a3 100644
--- a/arch/microblaze/kernel/exceptions.c
+++ b/arch/microblaze/kernel/exceptions.c
@@ -21,9 +21,9 @@
 
 #include <asm/exceptions.h>
 #include <asm/entry.h>		/* For KM CPU var */
-#include <asm/uaccess.h>
-#include <asm/errno.h>
-#include <asm/ptrace.h>
+#include <linux/uaccess.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
 #include <asm/current.h>
 
 #define MICROBLAZE_ILL_OPCODE_EXCEPTION	0x02
@@ -31,7 +31,7 @@
 #define MICROBLAZE_DBUS_EXCEPTION	0x04
 #define MICROBLAZE_DIV_ZERO_EXCEPTION	0x05
 #define MICROBLAZE_FPU_EXCEPTION	0x06
-#define MICROBLAZE_PRIVILEG_EXCEPTION	0x07
+#define MICROBLAZE_PRIVILEGED_EXCEPTION	0x07
 
 static DEFINE_SPINLOCK(die_lock);
 
@@ -66,6 +66,11 @@
 asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
 							int fsr, int addr)
 {
+#ifdef CONFIG_MMU
+	int code;
+	addr = regs->pc;
+#endif
+
 #if 0
 	printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x ESR=%08x\n",
 			type, user_mode(regs) ? "user" : "kernel", fsr,
@@ -74,7 +79,13 @@
 
 	switch (type & 0x1F) {
 	case MICROBLAZE_ILL_OPCODE_EXCEPTION:
-		_exception(SIGILL, regs, ILL_ILLOPC, addr);
+		if (user_mode(regs)) {
+			printk(KERN_WARNING "Illegal opcode exception in user mode.\n");
+			_exception(SIGILL, regs, ILL_ILLOPC, addr);
+			return;
+		}
+		printk(KERN_WARNING "Illegal opcode exception in kernel mode.\n");
+		die("opcode exception", regs, SIGBUS);
 		break;
 	case MICROBLAZE_IBUS_EXCEPTION:
 		if (user_mode(regs)) {
@@ -95,11 +106,16 @@
 		die("bus exception", regs, SIGBUS);
 		break;
 	case MICROBLAZE_DIV_ZERO_EXCEPTION:
-		printk(KERN_WARNING "Divide by zero exception\n");
-		_exception(SIGILL, regs, ILL_ILLOPC, addr);
+		if (user_mode(regs)) {
+			printk(KERN_WARNING "Divide by zero exception in user mode\n");
+			_exception(SIGILL, regs, ILL_ILLOPC, addr);
+			return;
+		}
+		printk(KERN_WARNING "Divide by zero exception in kernel mode.\n");
+		die("Divide by exception", regs, SIGBUS);
 		break;
-
 	case MICROBLAZE_FPU_EXCEPTION:
+		printk(KERN_WARNING "FPU exception\n");
 		/* IEEE FP exception */
 		/* I removed fsr variable and use code var for storing fsr */
 		if (fsr & FSR_IO)
@@ -115,7 +131,20 @@
 		_exception(SIGFPE, regs, fsr, addr);
 		break;
 
+#ifdef CONFIG_MMU
+	case MICROBLAZE_PRIVILEGED_EXCEPTION:
+		printk(KERN_WARNING "Privileged exception\n");
+		/* "brk r0,r0" - used as debug breakpoint */
+		if (get_user(code, (unsigned long *)regs->pc) == 0
+			&& code == 0x980c0000) {
+			_exception(SIGTRAP, regs, TRAP_BRKPT, addr);
+		} else {
+			_exception(SIGILL, regs, ILL_PRVOPC, addr);
+		}
+		break;
+#endif
 	default:
+	/* FIXME what to do in unexpected exception */
 		printk(KERN_WARNING "Unexpected exception %02x "
 			"PC=%08x in %s mode\n", type, (unsigned int) addr,
 			kernel_mode(regs) ? "kernel" : "user");
diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S
index 319dc35..e568d6e 100644
--- a/arch/microblaze/kernel/head.S
+++ b/arch/microblaze/kernel/head.S
@@ -3,6 +3,26 @@
  * Copyright (C) 2007-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
+ * MMU code derived from arch/ppc/kernel/head_4xx.S:
+ *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
+ *      Initial PowerPC version.
+ *    Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *      Rewritten for PReP
+ *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ *      Low-level exception handers, MMU support, and rewrite.
+ *    Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
+ *      PowerPC 8xx modifications.
+ *    Copyright (c) 1998-1999 TiVo, Inc.
+ *      PowerPC 403GCX modifications.
+ *    Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ *      PowerPC 403GCX/405GP modifications.
+ *    Copyright 2000 MontaVista Software Inc.
+ *	PPC405 modifications
+ *      PowerPC 403GCX/405GP modifications.
+ * 	Author: MontaVista Software, Inc.
+ *         	frank_rowand@mvista.com or source@mvista.com
+ * 	   	debbie_chu@mvista.com
+ *
  * This file is subject to the terms and conditions of the GNU General Public
  * License. See the file "COPYING" in the main directory of this archive
  * for more details.
@@ -12,6 +32,22 @@
 #include <asm/thread_info.h>
 #include <asm/page.h>
 
+#ifdef CONFIG_MMU
+#include <asm/setup.h> /* COMMAND_LINE_SIZE */
+#include <asm/mmu.h>
+#include <asm/processor.h>
+
+.data
+.global empty_zero_page
+.align 12
+empty_zero_page:
+	.space	4096
+.global swapper_pg_dir
+swapper_pg_dir:
+	.space	4096
+
+#endif /* CONFIG_MMU */
+
 	.text
 ENTRY(_start)
 	mfs	r1, rmsr
@@ -32,6 +68,123 @@
 	addik	r3, r3, -4 /* descrement loop */
 no_fdt_arg:
 
+#ifdef CONFIG_MMU
+
+#ifndef CONFIG_CMDLINE_BOOL
+/*
+ * handling command line
+ * copy command line to __init_end. There is space for storing command line.
+ */
+	or	r6, r0, r0		/* incremment */
+	ori	r4, r0, __init_end	/* load address of command line */
+	tophys(r4,r4)			/* convert to phys address */
+	ori	r3, r0, COMMAND_LINE_SIZE - 1 /* number of loops */
+_copy_command_line:
+	lbu	r7, r5, r6 /* r7=r5+r6 - r5 contain pointer to command line */
+	sb	r7, r4, r6		/* addr[r4+r6]= r7*/
+	addik	r6, r6, 1		/* increment counting */
+	bgtid	r3, _copy_command_line	/* loop for all entries       */
+	addik	r3, r3, -1		/* descrement loop */
+	addik	r5, r4, 0		/* add new space for command line */
+	tovirt(r5,r5)
+#endif /* CONFIG_CMDLINE_BOOL */
+
+#ifdef NOT_COMPILE
+/* save bram context */
+	or	r6, r0, r0				/* incremment */
+	ori	r4, r0, TOPHYS(_bram_load_start)	/* save bram context */
+	ori	r3, r0, (LMB_SIZE - 4)
+_copy_bram:
+	lw	r7, r0, r6		/* r7 = r0 + r6 */
+	sw	r7, r4, r6		/* addr[r4 + r6] = r7*/
+	addik	r6, r6, 4		/* increment counting */
+	bgtid	r3, _copy_bram		/* loop for all entries */
+	addik	r3, r3, -4		/* descrement loop */
+#endif
+	/* We have to turn on the MMU right away. */
+
+	/*
+	 * Set up the initial MMU state so we can do the first level of
+	 * kernel initialization.  This maps the first 16 MBytes of memory 1:1
+	 * virtual to physical.
+	 */
+	nop
+	addik	r3, r0, 63		/* Invalidate all TLB entries */
+_invalidate:
+	mts	rtlbx, r3
+	mts	rtlbhi, r0			/* flush: ensure V is clear   */
+	bgtid	r3, _invalidate		/* loop for all entries       */
+	addik	r3, r3, -1
+	/* sync */
+
+	/*
+	 * We should still be executing code at physical address area
+	 * RAM_BASEADDR at this point. However, kernel code is at
+	 * a virtual address. So, set up a TLB mapping to cover this once
+	 * translation is enabled.
+	 */
+
+	addik	r3,r0, CONFIG_KERNEL_START /* Load the kernel virtual address */
+	tophys(r4,r3)			/* Load the kernel physical address */
+
+	mts	rpid,r0			/* Load the kernel PID */
+	nop
+	bri	4
+
+	/*
+	 * Configure and load two entries into TLB slots 0 and 1.
+	 * In case we are pinning TLBs, these are reserved in by the
+	 * other TLB functions.  If not reserving, then it doesn't
+	 * matter where they are loaded.
+	 */
+	andi	r4,r4,0xfffffc00	/* Mask off the real page number */
+	ori	r4,r4,(TLB_WR | TLB_EX)	/* Set the write and execute bits */
+
+	andi	r3,r3,0xfffffc00	/* Mask off the effective page number */
+	ori	r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_16M))
+
+	mts     rtlbx,r0		/* TLB slow 0 */
+
+	mts	rtlblo,r4		/* Load the data portion of the entry */
+	mts	rtlbhi,r3		/* Load the tag portion of the entry */
+
+	addik	r4, r4, 0x01000000	/* Map next 16 M entries */
+	addik	r3, r3, 0x01000000
+
+	ori	r6,r0,1			/* TLB slot 1 */
+	mts     rtlbx,r6
+
+	mts	rtlblo,r4		/* Load the data portion of the entry */
+	mts	rtlbhi,r3		/* Load the tag portion of the entry */
+
+	/*
+	 * Load a TLB entry for LMB, since we need access to
+	 * the exception vectors, using a 4k real==virtual mapping.
+	 */
+	ori	r6,r0,3			/* TLB slot 3 */
+	mts     rtlbx,r6
+
+	ori	r4,r0,(TLB_WR | TLB_EX)
+	ori	r3,r0,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K))
+
+	mts	rtlblo,r4		/* Load the data portion of the entry */
+	mts	rtlbhi,r3		/* Load the tag portion of the entry */
+
+	/*
+	 * We now have the lower 16 Meg of RAM mapped into TLB entries, and the
+	 * caches ready to work.
+	 */
+turn_on_mmu:
+	ori	r15,r0,start_here
+	ori	r4,r0,MSR_KERNEL_VMS
+	mts	rmsr,r4
+	nop
+	rted	r15,0			/* enables MMU */
+	nop
+
+start_here:
+#endif /* CONFIG_MMU */
+
 	/* Initialize small data anchors */
 	la	r13, r0, _KERNEL_SDA_BASE_
 	la	r2, r0, _KERNEL_SDA2_BASE_
@@ -51,6 +204,43 @@
 	brald	r15, r8
 	nop
 
+#ifndef CONFIG_MMU
 	la	r15, r0, machine_halt
 	braid	start_kernel
 	nop
+#else
+	/*
+	 * Initialize the MMU.
+	 */
+	bralid	r15, mmu_init
+	nop
+
+	/* Go back to running unmapped so we can load up new values
+	 * and change to using our exception vectors.
+	 * On the MicroBlaze, all we invalidate the used TLB entries to clear
+	 * the old 16M byte TLB mappings.
+	 */
+	ori	r15,r0,TOPHYS(kernel_load_context)
+	ori	r4,r0,MSR_KERNEL
+	mts	rmsr,r4
+	nop
+	bri	4
+	rted	r15,0
+	nop
+
+	/* Load up the kernel context */
+kernel_load_context:
+	# Keep entry 0 and 1 valid. Entry 3 mapped to LMB can go away.
+	ori	r5,r0,3
+	mts     rtlbx,r5
+	nop
+	mts	rtlbhi,r0
+	nop
+	addi	r15, r0, machine_halt
+	ori	r17, r0, start_kernel
+	ori	r4, r0, MSR_KERNEL_VMS
+	mts	rmsr, r4
+	nop
+	rted	r17, 0		/* enable MMU and jump to start_kernel */
+	nop
+#endif /* CONFIG_MMU */
diff --git a/arch/microblaze/kernel/hw_exception_handler.S b/arch/microblaze/kernel/hw_exception_handler.S
index cf9486d..9d591cd 100644
--- a/arch/microblaze/kernel/hw_exception_handler.S
+++ b/arch/microblaze/kernel/hw_exception_handler.S
@@ -53,6 +53,12 @@
  *   - Illegal instruction opcode
  *   - Divide-by-zero
  *
+ *   - Privileged instruction exception (MMU)
+ *   - Data storage exception (MMU)
+ *   - Instruction storage exception (MMU)
+ *   - Data TLB miss exception (MMU)
+ *   - Instruction TLB miss exception (MMU)
+ *
  * Note we disable interrupts during exception handling, otherwise we will
  * possibly get multiple re-entrancy if interrupt handles themselves cause
  * exceptions. JW
@@ -71,9 +77,24 @@
 #include <asm/asm-offsets.h>
 
 /* Helpful Macros */
+#ifndef CONFIG_MMU
 #define EX_HANDLER_STACK_SIZ	(4*19)
+#endif
 #define NUM_TO_REG(num)		r ## num
 
+#ifdef CONFIG_MMU
+/* FIXME you can't change first load of MSR because there is
+ * hardcoded jump bri 4 */
+	#define RESTORE_STATE			\
+		lwi	r3, r1, PT_R3;		\
+		lwi	r4, r1, PT_R4;		\
+		lwi	r5, r1, PT_R5;		\
+		lwi	r6, r1, PT_R6;		\
+		lwi	r11, r1, PT_R11;	\
+		lwi	r31, r1, PT_R31;	\
+		lwi	r1, r0, TOPHYS(r0_ram + 0);
+#endif /* CONFIG_MMU */
+
 #define LWREG_NOP			\
 	bri	ex_handler_unhandled;	\
 	nop;
@@ -106,6 +127,54 @@
 	or	r3, r0, NUM_TO_REG (regnum);		\
 	bri	ex_sw_tail;
 
+#ifdef CONFIG_MMU
+	#define R3_TO_LWREG_VM_V(regnum)		\
+		brid	ex_lw_end_vm;			\
+		swi	r3, r7, 4 * regnum;
+
+	#define R3_TO_LWREG_VM(regnum)			\
+		brid	ex_lw_end_vm;			\
+		or	NUM_TO_REG (regnum), r0, r3;
+
+	#define SWREG_TO_R3_VM_V(regnum)		\
+		brid	ex_sw_tail_vm;			\
+		lwi	r3, r7, 4 * regnum;
+
+	#define SWREG_TO_R3_VM(regnum)			\
+		brid	ex_sw_tail_vm;			\
+		or	r3, r0, NUM_TO_REG (regnum);
+
+	/* Shift right instruction depending on available configuration */
+	#if CONFIG_XILINX_MICROBLAZE0_USE_BARREL > 0
+	#define BSRLI(rD, rA, imm)	\
+		bsrli rD, rA, imm
+	#elif CONFIG_XILINX_MICROBLAZE0_USE_DIV > 0
+	#define BSRLI(rD, rA, imm)	\
+		ori rD, r0, (1 << imm);	\
+		idivu rD, rD, rA
+	#else
+	#define BSRLI(rD, rA, imm) BSRLI ## imm (rD, rA)
+	/* Only the used shift constants defined here - add more if needed */
+	#define BSRLI2(rD, rA)				\
+		srl rD, rA;		/* << 1 */	\
+		srl rD, rD;		/* << 2 */
+	#define BSRLI10(rD, rA)				\
+		srl rD, rA;		/* << 1 */	\
+		srl rD, rD;		/* << 2 */	\
+		srl rD, rD;		/* << 3 */	\
+		srl rD, rD;		/* << 4 */	\
+		srl rD, rD;		/* << 5 */	\
+		srl rD, rD;		/* << 6 */	\
+		srl rD, rD;		/* << 7 */	\
+		srl rD, rD;		/* << 8 */	\
+		srl rD, rD;		/* << 9 */	\
+		srl rD, rD		/* << 10 */
+	#define BSRLI20(rD, rA)		\
+		BSRLI10(rD, rA);	\
+		BSRLI10(rD, rD)
+	#endif
+#endif /* CONFIG_MMU */
+
 .extern other_exception_handler /* Defined in exception.c */
 
 /*
@@ -163,34 +232,119 @@
 
 /* wrappers to restore state before coming to entry.S */
 
+#ifdef CONFIG_MMU
+.section .rodata
+.align 4
+_MB_HW_ExceptionVectorTable:
+/*  0 - Undefined */
+	.long	TOPHYS(ex_handler_unhandled)
+/*  1 - Unaligned data access exception */
+	.long	TOPHYS(handle_unaligned_ex)
+/*  2 - Illegal op-code exception */
+	.long	TOPHYS(full_exception_trapw)
+/*  3 - Instruction bus error exception */
+	.long	TOPHYS(full_exception_trapw)
+/*  4 - Data bus error exception */
+	.long	TOPHYS(full_exception_trapw)
+/*  5 - Divide by zero exception */
+	.long	TOPHYS(full_exception_trapw)
+/*  6 - Floating point unit exception */
+	.long	TOPHYS(full_exception_trapw)
+/*  7 - Privileged instruction exception */
+	.long	TOPHYS(full_exception_trapw)
+/*  8 - 15 - Undefined */
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+/* 16 - Data storage exception */
+	.long	TOPHYS(handle_data_storage_exception)
+/* 17 - Instruction storage exception */
+	.long	TOPHYS(handle_instruction_storage_exception)
+/* 18 - Data TLB miss exception */
+	.long	TOPHYS(handle_data_tlb_miss_exception)
+/* 19 - Instruction TLB miss exception */
+	.long	TOPHYS(handle_instruction_tlb_miss_exception)
+/* 20 - 31 - Undefined */
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+#endif
+
 .global _hw_exception_handler
 .section .text
 .align 4
 .ent _hw_exception_handler
 _hw_exception_handler:
+#ifndef CONFIG_MMU
 	addik	r1, r1, -(EX_HANDLER_STACK_SIZ); /* Create stack frame */
+#else
+	swi	r1, r0, TOPHYS(r0_ram + 0); /* GET_SP */
+	/* Save date to kernel memory. Here is the problem
+	 * when you came from user space */
+	ori	r1, r0, TOPHYS(r0_ram + 28);
+#endif
 	swi	r3, r1, PT_R3
 	swi	r4, r1, PT_R4
 	swi	r5, r1, PT_R5
 	swi	r6, r1, PT_R6
 
-	mfs	r5, rmsr;
-	nop
-	swi	r5, r1, 0;
-	mfs	r4, rbtr	/* Save BTR before jumping to handler */
-	nop
+#ifdef CONFIG_MMU
+	swi	r11, r1, PT_R11
+	swi	r31, r1, PT_R31
+	lwi	r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)) /* get saved current */
+#endif
+
 	mfs	r3, resr
 	nop
+	mfs	r4, rear;
+	nop
 
+#ifndef CONFIG_MMU
 	andi	r5, r3, 0x1000;		/* Check ESR[DS] */
 	beqi	r5, not_in_delay_slot;	/* Branch if ESR[DS] not set */
 	mfs	r17, rbtr;	/* ESR[DS] set - return address in BTR */
 	nop
 not_in_delay_slot:
 	swi	r17, r1, PT_R17
+#endif
 
 	andi	r5, r3, 0x1F;		/* Extract ESR[EXC] */
 
+#ifdef CONFIG_MMU
+	/* Calculate exception vector offset = r5 << 2 */
+	addk	r6, r5, r5; /* << 1 */
+	addk	r6, r6, r6; /* << 2 */
+
+/* counting which exception happen */
+	lwi	r5, r0, 0x200 + TOPHYS(r0_ram)
+	addi	r5, r5, 1
+	swi	r5, r0, 0x200 + TOPHYS(r0_ram)
+	lwi	r5, r6, 0x200 + TOPHYS(r0_ram)
+	addi	r5, r5, 1
+	swi	r5, r6, 0x200 + TOPHYS(r0_ram)
+/* end */
+	/* Load the HW Exception vector */
+	lwi	r6, r6, TOPHYS(_MB_HW_ExceptionVectorTable)
+	bra	r6
+
+full_exception_trapw:
+	RESTORE_STATE
+	bri	full_exception_trap
+#else
 	/* Exceptions enabled here. This will allow nested exceptions */
 	mfs	r6, rmsr;
 	nop
@@ -254,6 +408,7 @@
 	lwi	r18, r1, PT_R18
 
 	bri	ex_handler_done; /* Complete exception handling */
+#endif
 
 /* 0x01 - Unaligned data access exception
  * This occurs when a word access is not aligned on a word boundary,
@@ -265,11 +420,28 @@
 handle_unaligned_ex:
 	/* Working registers already saved: R3, R4, R5, R6
 	 *  R3 = ESR
-	 *  R4 = BTR
+	 *  R4 = EAR
 	 */
-	mfs	r4, rear;
+#ifdef CONFIG_MMU
+	andi	r6, r3, 0x1000			/* Check ESR[DS] */
+	beqi	r6, _no_delayslot		/* Branch if ESR[DS] not set */
+	mfs	r17, rbtr;	/* ESR[DS] set - return address in BTR */
 	nop
+_no_delayslot:
+#endif
 
+#ifdef CONFIG_MMU
+	/* Check if unaligned address is last on a 4k page */
+		andi	r5, r4, 0xffc
+		xori	r5, r5, 0xffc
+		bnei	r5, _unaligned_ex2
+	_unaligned_ex1:
+		RESTORE_STATE;
+/* Another page must be accessed or physical address not in page table */
+		bri	unaligned_data_trap
+
+	_unaligned_ex2:
+#endif
 	andi	r6, r3, 0x3E0; /* Mask and extract the register operand */
 	srl	r6, r6; /* r6 >> 5 */
 	srl	r6, r6;
@@ -278,6 +450,45 @@
 	srl	r6, r6;
 	/* Store the register operand in a temporary location */
 	sbi	r6, r0, TOPHYS(ex_reg_op);
+#ifdef CONFIG_MMU
+	/* Get physical address */
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	ori	r5, r0, CONFIG_KERNEL_START
+	cmpu	r5, r4, r5
+	bgti	r5, _unaligned_ex3
+	ori	r5, r0, swapper_pg_dir
+	bri	_unaligned_ex4
+
+	/* Get the PGD for the current thread. */
+_unaligned_ex3: /* user thread */
+	addi	r5 ,CURRENT_TASK, TOPHYS(0); /* get current task address */
+	lwi	r5, r5, TASK_THREAD + PGDIR
+_unaligned_ex4:
+	tophys(r5,r5)
+	BSRLI(r6,r4,20)			/* Create L1 (pgdir/pmd) address */
+	andi	r6, r6, 0xffc
+/* Assume pgdir aligned on 4K boundary, no need for "andi r5,r5,0xfffff003" */
+	or	r5, r5, r6
+	lwi	r6, r5, 0		/* Get L1 entry */
+	andi	r5, r6, 0xfffff000	/* Extract L2 (pte) base address. */
+	beqi	r5, _unaligned_ex1	/* Bail if no table */
+
+	tophys(r5,r5)
+	BSRLI(r6,r4,10)			/* Compute PTE address */
+	andi	r6, r6, 0xffc
+	andi	r5, r5, 0xfffff003
+	or	r5, r5, r6
+	lwi	r5, r5, 0		/* Get Linux PTE */
+
+	andi	r6, r5, _PAGE_PRESENT
+	beqi	r6, _unaligned_ex1	/* Bail if no page */
+
+	andi	r5, r5, 0xfffff000	/* Extract RPN */
+	andi	r4, r4, 0x00000fff	/* Extract offset */
+	or	r4, r4, r5		/* Create physical address */
+#endif /* CONFIG_MMU */
 
 	andi	r6, r3, 0x400; /* Extract ESR[S] */
 	bnei	r6, ex_sw;
@@ -355,6 +566,7 @@
 ex_sw_end: /* Exception handling of store word, ends. */
 
 ex_handler_done:
+#ifndef CONFIG_MMU
 	lwi	r5, r1, 0 /* RMSR */
 	mts	rmsr, r5
 	nop
@@ -366,13 +578,455 @@
 
 	rted	r17, 0
 	addik	r1, r1, (EX_HANDLER_STACK_SIZ); /* Restore stack frame */
+#else
+	RESTORE_STATE;
+	rted	r17, 0
+	nop
+#endif
 
+#ifdef CONFIG_MMU
+	/* Exception vector entry code. This code runs with address translation
+	 * turned off (i.e. using physical addresses). */
+
+	/* Exception vectors. */
+
+	/* 0x10 - Data Storage Exception
+	 * This happens for just a few reasons. U0 set (but we don't do that),
+	 * or zone protection fault (user violation, write to protected page).
+	 * If this is just an update of modified status, we do that quickly
+	 * and exit. Otherwise, we call heavyweight functions to do the work.
+	 */
+	handle_data_storage_exception:
+		/* Working registers already saved: R3, R4, R5, R6
+		 * R3 = ESR
+		 */
+		mfs	r11, rpid
+		nop
+		bri	4
+		mfs	r3, rear		/* Get faulting address */
+		nop
+		/* If we are faulting a kernel address, we have to use the
+		 * kernel page tables.
+		 */
+		ori	r4, r0, CONFIG_KERNEL_START
+		cmpu	r4, r3, r4
+		bgti	r4, ex3
+		/* First, check if it was a zone fault (which means a user
+		 * tried to access a kernel or read-protected page - always
+		 * a SEGV). All other faults here must be stores, so no
+		 * need to check ESR_S as well. */
+		mfs	r4, resr
+		nop
+		andi	r4, r4, 0x800		/* ESR_Z - zone protection */
+		bnei	r4, ex2
+
+		ori	r4, r0, swapper_pg_dir
+		mts	rpid, r0		/* TLB will have 0 TID */
+		nop
+		bri	ex4
+
+		/* Get the PGD for the current thread. */
+	ex3:
+		/* First, check if it was a zone fault (which means a user
+		 * tried to access a kernel or read-protected page - always
+		 * a SEGV). All other faults here must be stores, so no
+		 * need to check ESR_S as well. */
+		mfs	r4, resr
+		nop
+		andi	r4, r4, 0x800		/* ESR_Z */
+		bnei	r4, ex2
+		/* get current task address */
+		addi	r4 ,CURRENT_TASK, TOPHYS(0);
+		lwi	r4, r4, TASK_THREAD+PGDIR
+	ex4:
+		tophys(r4,r4)
+		BSRLI(r5,r3,20)		/* Create L1 (pgdir/pmd) address */
+		andi	r5, r5, 0xffc
+/* Assume pgdir aligned on 4K boundary, no need for "andi r4,r4,0xfffff003" */
+		or	r4, r4, r5
+		lwi	r4, r4, 0		/* Get L1 entry */
+		andi	r5, r4, 0xfffff000 /* Extract L2 (pte) base address */
+		beqi	r5, ex2			/* Bail if no table */
+
+		tophys(r5,r5)
+		BSRLI(r6,r3,10)			/* Compute PTE address */
+		andi	r6, r6, 0xffc
+		andi	r5, r5, 0xfffff003
+		or	r5, r5, r6
+		lwi	r4, r5, 0		/* Get Linux PTE */
+
+		andi	r6, r4, _PAGE_RW	/* Is it writeable? */
+		beqi	r6, ex2			/* Bail if not */
+
+		/* Update 'changed' */
+		ori	r4, r4, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
+		swi	r4, r5, 0		/* Update Linux page table */
+
+		/* Most of the Linux PTE is ready to load into the TLB LO.
+		 * We set ZSEL, where only the LS-bit determines user access.
+		 * We set execute, because we don't have the granularity to
+		 * properly set this at the page level (Linux problem).
+		 * If shared is set, we cause a zero PID->TID load.
+		 * Many of these bits are software only. Bits we don't set
+		 * here we (properly should) assume have the appropriate value.
+		 */
+		andni	r4, r4, 0x0ce2		/* Make sure 20, 21 are zero */
+		ori	r4, r4, _PAGE_HWEXEC	/* make it executable */
+
+		/* find the TLB index that caused the fault. It has to be here*/
+		mts	rtlbsx, r3
+		nop
+		mfs	r5, rtlbx		/* DEBUG: TBD */
+		nop
+		mts	rtlblo, r4		/* Load TLB LO */
+		nop
+						/* Will sync shadow TLBs */
+
+		/* Done...restore registers and get out of here. */
+		mts	rpid, r11
+		nop
+		bri 4
+
+		RESTORE_STATE;
+		rted	r17, 0
+		nop
+	ex2:
+		/* The bailout. Restore registers to pre-exception conditions
+		 * and call the heavyweights to help us out. */
+		mts	rpid, r11
+		nop
+		bri 4
+		RESTORE_STATE;
+		bri	page_fault_data_trap
+
+
+	/* 0x11 - Instruction Storage Exception
+	 * This is caused by a fetch from non-execute or guarded pages. */
+	handle_instruction_storage_exception:
+		/* Working registers already saved: R3, R4, R5, R6
+		 * R3 = ESR
+		 */
+
+		mfs	r3, rear		/* Get faulting address */
+		nop
+		RESTORE_STATE;
+		bri	page_fault_instr_trap
+
+	/* 0x12 - Data TLB Miss Exception
+	 * As the name implies, translation is not in the MMU, so search the
+	 * page tables and fix it. The only purpose of this function is to
+	 * load TLB entries from the page table if they exist.
+	 */
+	handle_data_tlb_miss_exception:
+		/* Working registers already saved: R3, R4, R5, R6
+		 * R3 = ESR
+		 */
+		mfs	r11, rpid
+		nop
+		bri	4
+		mfs	r3, rear		/* Get faulting address */
+		nop
+
+		/* If we are faulting a kernel address, we have to use the
+		 * kernel page tables. */
+		ori	r4, r0, CONFIG_KERNEL_START
+		cmpu	r4, r3, r4
+		bgti	r4, ex5
+		ori	r4, r0, swapper_pg_dir
+		mts	rpid, r0		/* TLB will have 0 TID */
+		nop
+		bri	ex6
+
+		/* Get the PGD for the current thread. */
+	ex5:
+		/* get current task address */
+		addi	r4 ,CURRENT_TASK, TOPHYS(0);
+		lwi	r4, r4, TASK_THREAD+PGDIR
+	ex6:
+		tophys(r4,r4)
+		BSRLI(r5,r3,20)		/* Create L1 (pgdir/pmd) address */
+		andi	r5, r5, 0xffc
+/* Assume pgdir aligned on 4K boundary, no need for "andi r4,r4,0xfffff003" */
+		or	r4, r4, r5
+		lwi	r4, r4, 0		/* Get L1 entry */
+		andi	r5, r4, 0xfffff000 /* Extract L2 (pte) base address */
+		beqi	r5, ex7			/* Bail if no table */
+
+		tophys(r5,r5)
+		BSRLI(r6,r3,10)			/* Compute PTE address */
+		andi	r6, r6, 0xffc
+		andi	r5, r5, 0xfffff003
+		or	r5, r5, r6
+		lwi	r4, r5, 0		/* Get Linux PTE */
+
+		andi	r6, r4, _PAGE_PRESENT
+		beqi	r6, ex7
+
+		ori	r4, r4, _PAGE_ACCESSED
+		swi	r4, r5, 0
+
+		/* Most of the Linux PTE is ready to load into the TLB LO.
+		 * We set ZSEL, where only the LS-bit determines user access.
+		 * We set execute, because we don't have the granularity to
+		 * properly set this at the page level (Linux problem).
+		 * If shared is set, we cause a zero PID->TID load.
+		 * Many of these bits are software only. Bits we don't set
+		 * here we (properly should) assume have the appropriate value.
+		 */
+		andni	r4, r4, 0x0ce2		/* Make sure 20, 21 are zero */
+
+		bri	finish_tlb_load
+	ex7:
+		/* The bailout. Restore registers to pre-exception conditions
+		 * and call the heavyweights to help us out.
+		 */
+		mts	rpid, r11
+		nop
+		bri	4
+		RESTORE_STATE;
+		bri	page_fault_data_trap
+
+	/* 0x13 - Instruction TLB Miss Exception
+	 * Nearly the same as above, except we get our information from
+	 * different registers and bailout to a different point.
+	 */
+	handle_instruction_tlb_miss_exception:
+		/* Working registers already saved: R3, R4, R5, R6
+		 *  R3 = ESR
+		 */
+		mfs	r11, rpid
+		nop
+		bri	4
+		mfs	r3, rear		/* Get faulting address */
+		nop
+
+		/* If we are faulting a kernel address, we have to use the
+		 * kernel page tables.
+		 */
+		ori	r4, r0, CONFIG_KERNEL_START
+		cmpu	r4, r3, r4
+		bgti	r4, ex8
+		ori	r4, r0, swapper_pg_dir
+		mts	rpid, r0		/* TLB will have 0 TID */
+		nop
+		bri	ex9
+
+		/* Get the PGD for the current thread. */
+	ex8:
+		/* get current task address */
+		addi	r4 ,CURRENT_TASK, TOPHYS(0);
+		lwi	r4, r4, TASK_THREAD+PGDIR
+	ex9:
+		tophys(r4,r4)
+		BSRLI(r5,r3,20)		/* Create L1 (pgdir/pmd) address */
+		andi	r5, r5, 0xffc
+/* Assume pgdir aligned on 4K boundary, no need for "andi r4,r4,0xfffff003" */
+		or	r4, r4, r5
+		lwi	r4, r4, 0		/* Get L1 entry */
+		andi	r5, r4, 0xfffff000 /* Extract L2 (pte) base address */
+		beqi	r5, ex10		/* Bail if no table */
+
+		tophys(r5,r5)
+		BSRLI(r6,r3,10)			/* Compute PTE address */
+		andi	r6, r6, 0xffc
+		andi	r5, r5, 0xfffff003
+		or	r5, r5, r6
+		lwi	r4, r5, 0		/* Get Linux PTE */
+
+		andi	r6, r4, _PAGE_PRESENT
+		beqi	r6, ex7
+
+		ori	r4, r4, _PAGE_ACCESSED
+		swi	r4, r5, 0
+
+		/* Most of the Linux PTE is ready to load into the TLB LO.
+		 * We set ZSEL, where only the LS-bit determines user access.
+		 * We set execute, because we don't have the granularity to
+		 * properly set this at the page level (Linux problem).
+		 * If shared is set, we cause a zero PID->TID load.
+		 * Many of these bits are software only. Bits we don't set
+		 * here we (properly should) assume have the appropriate value.
+		 */
+		andni	r4, r4, 0x0ce2		/* Make sure 20, 21 are zero */
+
+		bri	finish_tlb_load
+	ex10:
+		/* The bailout. Restore registers to pre-exception conditions
+		 * and call the heavyweights to help us out.
+		 */
+		mts	rpid, r11
+		nop
+		bri 4
+		RESTORE_STATE;
+		bri	page_fault_instr_trap
+
+/* Both the instruction and data TLB miss get to this point to load the TLB.
+ *	r3 - EA of fault
+ *	r4 - TLB LO (info from Linux PTE)
+ *	r5, r6 - available to use
+ *	PID - loaded with proper value when we get here
+ *	Upon exit, we reload everything and RFI.
+ * A common place to load the TLB.
+ */
+	tlb_index:
+		.long	1 /* MS: storing last used tlb index */
+	finish_tlb_load:
+		/* MS: load the last used TLB index. */
+		lwi	r5, r0, TOPHYS(tlb_index)
+		addik	r5, r5, 1 /* MS: inc tlb_index -> use next one */
+
+/* MS: FIXME this is potential fault, because this is mask not count */
+		andi	r5, r5, (MICROBLAZE_TLB_SIZE-1)
+		ori	r6, r0, 1
+		cmp	r31, r5, r6
+		blti	r31, sem
+		addik	r5, r6, 1
+	sem:
+		/* MS: save back current TLB index */
+		swi	r5, r0, TOPHYS(tlb_index)
+
+		ori	r4, r4, _PAGE_HWEXEC	/* make it executable */
+		mts	rtlbx, r5		/* MS: save current TLB */
+		nop
+		mts	rtlblo,	r4		/* MS: save to TLB LO */
+		nop
+
+		/* Create EPN. This is the faulting address plus a static
+		 * set of bits. These are size, valid, E, U0, and ensure
+		 * bits 20 and 21 are zero.
+		 */
+		andi	r3, r3, 0xfffff000
+		ori	r3, r3, 0x0c0
+		mts	rtlbhi,	r3		/* Load TLB HI */
+		nop
+
+		/* Done...restore registers and get out of here. */
+	ex12:
+		mts	rpid, r11
+		nop
+		bri 4
+		RESTORE_STATE;
+		rted	r17, 0
+		nop
+
+	/* extern void giveup_fpu(struct task_struct *prev)
+	 *
+	 * The MicroBlaze processor may have an FPU, so this should not just
+	 * return: TBD.
+	 */
+	.globl giveup_fpu;
+	.align 4;
+	giveup_fpu:
+		bralid	r15,0			/* TBD */
+		nop
+
+	/* At present, this routine just hangs. - extern void abort(void) */
+	.globl abort;
+	.align 4;
+	abort:
+		br	r0
+
+	.globl set_context;
+	.align 4;
+	set_context:
+		mts	rpid, r5	/* Shadow TLBs are automatically */
+		nop
+		bri	4		/* flushed by changing PID */
+		rtsd	r15,8
+		nop
+
+#endif
 .end _hw_exception_handler
 
+#ifdef CONFIG_MMU
+/* Unaligned data access exception last on a 4k page for MMU.
+ * When this is called, we are in virtual mode with exceptions enabled
+ * and registers 1-13,15,17,18 saved.
+ *
+ * R3 = ESR
+ * R4 = EAR
+ * R7 = pointer to saved registers (struct pt_regs *regs)
+ *
+ * This handler perform the access, and returns via ret_from_exc.
+ */
+.global _unaligned_data_exception
+.ent _unaligned_data_exception
+_unaligned_data_exception:
+	andi	r8, r3, 0x3E0;	/* Mask and extract the register operand */
+	BSRLI(r8,r8,2);		/* r8 >> 2 = register operand * 8 */
+	andi	r6, r3, 0x400;	/* Extract ESR[S] */
+	bneid	r6, ex_sw_vm;
+	andi	r6, r3, 0x800;	/* Extract ESR[W] - delay slot */
+ex_lw_vm:
+	beqid	r6, ex_lhw_vm;
+	lbui	r5, r4, 0;	/* Exception address in r4 - delay slot */
+/* Load a word, byte-by-byte from destination address and save it in tmp space*/
+	la	r6, r0, ex_tmp_data_loc_0;
+	sbi	r5, r6, 0;
+	lbui	r5, r4, 1;
+	sbi	r5, r6, 1;
+	lbui	r5, r4, 2;
+	sbi	r5, r6, 2;
+	lbui	r5, r4, 3;
+	sbi	r5, r6, 3;
+	brid	ex_lw_tail_vm;
+/* Get the destination register value into r3 - delay slot */
+	lwi	r3, r6, 0;
+ex_lhw_vm:
+	/* Load a half-word, byte-by-byte from destination address and
+	 * save it in tmp space */
+	la	r6, r0, ex_tmp_data_loc_0;
+	sbi	r5, r6, 0;
+	lbui	r5, r4, 1;
+	sbi	r5, r6, 1;
+	lhui	r3, r6, 0;	/* Get the destination register value into r3 */
+ex_lw_tail_vm:
+	/* Form load_word jump table offset (lw_table_vm + (8 * regnum)) */
+	addik	r5, r8, lw_table_vm;
+	bra	r5;
+ex_lw_end_vm:			/* Exception handling of load word, ends */
+	brai	ret_from_exc;
+ex_sw_vm:
+/* Form store_word jump table offset (sw_table_vm + (8 * regnum)) */
+	addik	r5, r8, sw_table_vm;
+	bra	r5;
+ex_sw_tail_vm:
+	la	r5, r0, ex_tmp_data_loc_0;
+	beqid	r6, ex_shw_vm;
+	swi	r3, r5, 0;	/* Get the word - delay slot */
+	/* Store the word, byte-by-byte into destination address */
+	lbui	r3, r5, 0;
+	sbi	r3, r4, 0;
+	lbui	r3, r5, 1;
+	sbi	r3, r4, 1;
+	lbui	r3, r5, 2;
+	sbi	r3, r4, 2;
+	lbui	r3, r5, 3;
+	brid	ret_from_exc;
+	sbi	r3, r4, 3;	/* Delay slot */
+ex_shw_vm:
+	/* Store the lower half-word, byte-by-byte into destination address */
+	lbui	r3, r5, 2;
+	sbi	r3, r4, 0;
+	lbui	r3, r5, 3;
+	brid	ret_from_exc;
+	sbi	r3, r4, 1;	/* Delay slot */
+ex_sw_end_vm:			/* Exception handling of store word, ends. */
+.end _unaligned_data_exception
+#endif /* CONFIG_MMU */
+
 ex_handler_unhandled:
 /* FIXME add handle function for unhandled exception - dump register */
 	bri 0
 
+/*
+ * hw_exception_handler Jump Table
+ * - Contains code snippets for each register that caused the unalign exception
+ * - Hence exception handler is NOT self-modifying
+ * - Separate table for load exceptions and store exceptions.
+ * - Each table is of size: (8 * 32) = 256 bytes
+ */
+
 .section .text
 .align 4
 lw_table:
@@ -407,7 +1061,11 @@
 lw_r28:		R3_TO_LWREG	(28);
 lw_r29:		R3_TO_LWREG	(29);
 lw_r30:		R3_TO_LWREG	(30);
+#ifdef CONFIG_MMU
+lw_r31: 	R3_TO_LWREG_V	(31);
+#else
 lw_r31:		R3_TO_LWREG	(31);
+#endif
 
 sw_table:
 sw_r0:		SWREG_TO_R3	(0);
@@ -441,7 +1099,81 @@
 sw_r28:		SWREG_TO_R3	(28);
 sw_r29:		SWREG_TO_R3	(29);
 sw_r30:		SWREG_TO_R3	(30);
+#ifdef CONFIG_MMU
+sw_r31:		SWREG_TO_R3_V	(31);
+#else
 sw_r31:		SWREG_TO_R3	(31);
+#endif
+
+#ifdef CONFIG_MMU
+lw_table_vm:
+lw_r0_vm:	R3_TO_LWREG_VM		(0);
+lw_r1_vm:	R3_TO_LWREG_VM_V	(1);
+lw_r2_vm:	R3_TO_LWREG_VM_V	(2);
+lw_r3_vm:	R3_TO_LWREG_VM_V	(3);
+lw_r4_vm:	R3_TO_LWREG_VM_V	(4);
+lw_r5_vm:	R3_TO_LWREG_VM_V	(5);
+lw_r6_vm:	R3_TO_LWREG_VM_V	(6);
+lw_r7_vm:	R3_TO_LWREG_VM_V	(7);
+lw_r8_vm:	R3_TO_LWREG_VM_V	(8);
+lw_r9_vm:	R3_TO_LWREG_VM_V	(9);
+lw_r10_vm:	R3_TO_LWREG_VM_V	(10);
+lw_r11_vm:	R3_TO_LWREG_VM_V	(11);
+lw_r12_vm:	R3_TO_LWREG_VM_V	(12);
+lw_r13_vm:	R3_TO_LWREG_VM_V	(13);
+lw_r14_vm:	R3_TO_LWREG_VM		(14);
+lw_r15_vm:	R3_TO_LWREG_VM_V	(15);
+lw_r16_vm:	R3_TO_LWREG_VM		(16);
+lw_r17_vm:	R3_TO_LWREG_VM_V	(17);
+lw_r18_vm:	R3_TO_LWREG_VM_V	(18);
+lw_r19_vm:	R3_TO_LWREG_VM		(19);
+lw_r20_vm:	R3_TO_LWREG_VM		(20);
+lw_r21_vm:	R3_TO_LWREG_VM		(21);
+lw_r22_vm:	R3_TO_LWREG_VM		(22);
+lw_r23_vm:	R3_TO_LWREG_VM		(23);
+lw_r24_vm:	R3_TO_LWREG_VM		(24);
+lw_r25_vm:	R3_TO_LWREG_VM		(25);
+lw_r26_vm:	R3_TO_LWREG_VM		(26);
+lw_r27_vm:	R3_TO_LWREG_VM		(27);
+lw_r28_vm:	R3_TO_LWREG_VM		(28);
+lw_r29_vm:	R3_TO_LWREG_VM		(29);
+lw_r30_vm:	R3_TO_LWREG_VM		(30);
+lw_r31_vm:	R3_TO_LWREG_VM_V	(31);
+
+sw_table_vm:
+sw_r0_vm:	SWREG_TO_R3_VM		(0);
+sw_r1_vm:	SWREG_TO_R3_VM_V	(1);
+sw_r2_vm:	SWREG_TO_R3_VM_V	(2);
+sw_r3_vm:	SWREG_TO_R3_VM_V	(3);
+sw_r4_vm:	SWREG_TO_R3_VM_V	(4);
+sw_r5_vm:	SWREG_TO_R3_VM_V	(5);
+sw_r6_vm:	SWREG_TO_R3_VM_V	(6);
+sw_r7_vm:	SWREG_TO_R3_VM_V	(7);
+sw_r8_vm:	SWREG_TO_R3_VM_V	(8);
+sw_r9_vm:	SWREG_TO_R3_VM_V	(9);
+sw_r10_vm:	SWREG_TO_R3_VM_V	(10);
+sw_r11_vm:	SWREG_TO_R3_VM_V	(11);
+sw_r12_vm:	SWREG_TO_R3_VM_V	(12);
+sw_r13_vm:	SWREG_TO_R3_VM_V	(13);
+sw_r14_vm:	SWREG_TO_R3_VM		(14);
+sw_r15_vm:	SWREG_TO_R3_VM_V	(15);
+sw_r16_vm:	SWREG_TO_R3_VM		(16);
+sw_r17_vm:	SWREG_TO_R3_VM_V	(17);
+sw_r18_vm:	SWREG_TO_R3_VM_V	(18);
+sw_r19_vm:	SWREG_TO_R3_VM		(19);
+sw_r20_vm:	SWREG_TO_R3_VM		(20);
+sw_r21_vm:	SWREG_TO_R3_VM		(21);
+sw_r22_vm:	SWREG_TO_R3_VM		(22);
+sw_r23_vm:	SWREG_TO_R3_VM		(23);
+sw_r24_vm:	SWREG_TO_R3_VM		(24);
+sw_r25_vm:	SWREG_TO_R3_VM		(25);
+sw_r26_vm:	SWREG_TO_R3_VM		(26);
+sw_r27_vm:	SWREG_TO_R3_VM		(27);
+sw_r28_vm:	SWREG_TO_R3_VM		(28);
+sw_r29_vm:	SWREG_TO_R3_VM		(29);
+sw_r30_vm:	SWREG_TO_R3_VM		(30);
+sw_r31_vm:	SWREG_TO_R3_VM_V	(31);
+#endif /* CONFIG_MMU */
 
 /* Temporary data structures used in the handler */
 .section .data
diff --git a/arch/microblaze/kernel/microblaze_ksyms.c b/arch/microblaze/kernel/microblaze_ksyms.c
index 5f71790..59ff20e 100644
--- a/arch/microblaze/kernel/microblaze_ksyms.c
+++ b/arch/microblaze/kernel/microblaze_ksyms.c
@@ -45,3 +45,5 @@
 EXPORT_SYMBOL(__udivsi3);
 extern void __umodsi3(void);
 EXPORT_SYMBOL(__umodsi3);
+extern char *_ebss;
+EXPORT_SYMBOL_GPL(_ebss);
diff --git a/arch/microblaze/kernel/misc.S b/arch/microblaze/kernel/misc.S
new file mode 100644
index 0000000..df16c62
--- /dev/null
+++ b/arch/microblaze/kernel/misc.S
@@ -0,0 +1,120 @@
+/*
+ * Miscellaneous low-level MMU functions.
+ *
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
+ * Copyright (C) 2007 Xilinx, Inc.  All rights reserved.
+ *
+ * Derived from arch/ppc/kernel/misc.S
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file COPYING in the main directory of this
+ * archive for more details.
+ */
+
+#include <linux/linkage.h>
+#include <linux/sys.h>
+#include <asm/unistd.h>
+#include <linux/errno.h>
+#include <asm/mmu.h>
+#include <asm/page.h>
+
+	.text
+/*
+ * Flush MMU TLB
+ *
+ * We avoid flushing the pinned 0, 1 and possibly 2 entries.
+ */
+.globl _tlbia;
+.align 4;
+_tlbia:
+	addik	r12, r0, 63 /* flush all entries (63 - 3) */
+	/* isync */
+_tlbia_1:
+	mts	rtlbx, r12
+	nop
+	mts	rtlbhi, r0 /* flush: ensure V is clear */
+	nop
+	addik	r11, r12, -2
+	bneid	r11, _tlbia_1 /* loop for all entries */
+	addik	r12, r12, -1
+	/* sync */
+	rtsd	r15, 8
+	nop
+
+/*
+ * Flush MMU TLB for a particular address (in r5)
+ */
+.globl _tlbie;
+.align 4;
+_tlbie:
+	mts	rtlbsx, r5 /* look up the address in TLB */
+	nop
+	mfs	r12, rtlbx /* Retrieve index */
+	nop
+	blti	r12, _tlbie_1 /* Check if found */
+	mts	rtlbhi, r0 /* flush: ensure V is clear */
+	nop
+_tlbie_1:
+	rtsd	r15, 8
+	nop
+
+/*
+ * Allocate TLB entry for early console
+ */
+.globl early_console_reg_tlb_alloc;
+.align 4;
+early_console_reg_tlb_alloc:
+	/*
+	 * Load a TLB entry for the UART, so that microblaze_progress() can use
+	 * the UARTs nice and early.  We use a 4k real==virtual mapping.
+	 */
+	ori	r4, r0, 63
+	mts	rtlbx, r4 /* TLB slot 2 */
+
+	or	r4,r5,r0
+	andi	r4,r4,0xfffff000
+	ori	r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G)
+
+	andi	r5,r5,0xfffff000
+	ori	r5,r5,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K))
+
+	mts	rtlblo,r4 /* Load the data portion of the entry */
+	nop
+	mts	rtlbhi,r5 /* Load the tag portion of the entry */
+	nop
+	rtsd	r15, 8
+	nop
+
+/*
+ * Copy a whole page (4096 bytes).
+ */
+#define COPY_16_BYTES		\
+	lwi	r7, r6, 0;	\
+	lwi	r8, r6, 4;	\
+	lwi	r9, r6, 8;	\
+	lwi	r10, r6, 12;	\
+	swi	r7, r5, 0;	\
+	swi	r8, r5, 4;	\
+	swi	r9, r5, 8;	\
+	swi	r10, r5, 12
+
+
+/* FIXME DCACHE_LINE_BYTES (CONFIG_XILINX_MICROBLAZE0_DCACHE_LINE_LEN * 4)*/
+#define DCACHE_LINE_BYTES (4 * 4)
+
+.globl copy_page;
+.align 4;
+copy_page:
+	ori	r11, r0, (PAGE_SIZE/DCACHE_LINE_BYTES) - 1
+_copy_page_loop:
+	COPY_16_BYTES
+#if DCACHE_LINE_BYTES >= 32
+	COPY_16_BYTES
+#endif
+	addik	r6, r6, DCACHE_LINE_BYTES
+	addik	r5, r5, DCACHE_LINE_BYTES
+	bneid	r11, _copy_page_loop
+	addik	r11, r11, -1
+	rtsd	r15, 8
+	nop
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 07d4fa3..00b12c6 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -126,9 +126,54 @@
 	else
 		childregs->r1 = ((unsigned long) ti) + THREAD_SIZE;
 
+#ifndef CONFIG_MMU
 	memset(&ti->cpu_context, 0, sizeof(struct cpu_context));
 	ti->cpu_context.r1 = (unsigned long)childregs;
 	ti->cpu_context.msr = (unsigned long)childregs->msr;
+#else
+
+	/* if creating a kernel thread then update the current reg (we don't
+	 * want to use the parent's value when restoring by POP_STATE) */
+	if (kernel_mode(regs))
+		/* save new current on stack to use POP_STATE */
+		childregs->CURRENT_TASK = (unsigned long)p;
+	/* if returning to user then use the parent's value of this register */
+
+	/* if we're creating a new kernel thread then just zeroing all
+	 * the registers. That's OK for a brand new thread.*/
+	/* Pls. note that some of them will be restored in POP_STATE */
+	if (kernel_mode(regs))
+		memset(&ti->cpu_context, 0, sizeof(struct cpu_context));
+	/* if this thread is created for fork/vfork/clone, then we want to
+	 * restore all the parent's context */
+	/* in addition to the registers which will be restored by POP_STATE */
+	else {
+		ti->cpu_context = *(struct cpu_context *)regs;
+		childregs->msr |= MSR_UMS;
+	}
+
+	/* FIXME STATE_SAVE_PT_OFFSET; */
+	ti->cpu_context.r1  = (unsigned long)childregs - STATE_SAVE_ARG_SPACE;
+	/* we should consider the fact that childregs is a copy of the parent
+	 * regs which were saved immediately after entering the kernel state
+	 * before enabling VM. This MSR will be restored in switch_to and
+	 * RETURN() and we want to have the right machine state there
+	 * specifically this state must have INTs disabled before and enabled
+	 * after performing rtbd
+	 * compose the right MSR for RETURN(). It will work for switch_to also
+	 * excepting for VM and UMS
+	 * don't touch UMS , CARRY and cache bits
+	 * right now MSR is a copy of parent one */
+	childregs->msr |= MSR_BIP;
+	childregs->msr &= ~MSR_EIP;
+	childregs->msr |= MSR_IE;
+	childregs->msr &= ~MSR_VM;
+	childregs->msr |= MSR_VMS;
+	childregs->msr |= MSR_EE; /* exceptions will be enabled*/
+
+	ti->cpu_context.msr = (childregs->msr|MSR_VM);
+	ti->cpu_context.msr &= ~MSR_UMS; /* switch_to to kernel mode */
+#endif
 	ti->cpu_context.r15 = (unsigned long)ret_from_fork - 8;
 
 	if (clone_flags & CLONE_SETTLS)
@@ -137,6 +182,7 @@
 	return 0;
 }
 
+#ifndef CONFIG_MMU
 /*
  * Return saved PC of a blocked thread.
  */
@@ -151,6 +197,7 @@
 	else
 		return ctx->r14;
 }
+#endif
 
 static void kernel_thread_helper(int (*fn)(void *), void *arg)
 {
@@ -173,6 +220,7 @@
 	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
 			&regs, 0, NULL, NULL);
 }
+EXPORT_SYMBOL_GPL(kernel_thread);
 
 unsigned long get_wchan(struct task_struct *p)
 {
@@ -188,3 +236,14 @@
 	regs->r1 = usp;
 	regs->pt_mode = 0;
 }
+
+#ifdef CONFIG_MMU
+#include <linux/elfcore.h>
+/*
+ * Set up a thread for executing a new program
+ */
+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs)
+{
+	return 0; /* MicroBlaze has no separate FPU registers */
+}
+#endif /* CONFIG_MMU */
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 34c4871..c005cc6 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -509,12 +509,13 @@
 
 	prop = of_get_flat_dt_prop(node, "linux,initrd-start", &l);
 	if (prop) {
-		initrd_start = (unsigned long)__va(of_read_ulong(prop, l/4));
+		initrd_start = (unsigned long)
+					__va((u32)of_read_ulong(prop, l/4));
 
 		prop = of_get_flat_dt_prop(node, "linux,initrd-end", &l);
 		if (prop) {
 			initrd_end = (unsigned long)
-					__va(of_read_ulong(prop, l/4));
+					__va((u32)of_read_ulong(prop, 1/4));
 			initrd_below_start_ok = 1;
 		} else {
 			initrd_start = 0;
@@ -563,7 +564,9 @@
 		strlcpy(cmd_line, p, min((int)l, COMMAND_LINE_SIZE));
 
 #ifdef CONFIG_CMDLINE
+#ifndef CONFIG_CMDLINE_FORCE
 	if (p == NULL || l == 0 || (l == 1 && (*p) == 0))
+#endif
 		strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
 #endif /* CONFIG_CMDLINE */
 
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index eb6b417..8709bea 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -42,10 +42,6 @@
 
 void __init setup_arch(char **cmdline_p)
 {
-#ifdef CONFIG_CMDLINE_FORCE
-	strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-	strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#endif
 	*cmdline_p = cmd_line;
 
 	console_verbose();
@@ -102,14 +98,34 @@
 {
 	unsigned long *src, *dst = (unsigned long *)0x0;
 
+	/* If CONFIG_MTD_UCLINUX is defined, assume ROMFS is at the
+	 * end of kernel. There are two position which we want to check.
+	 * The first is __init_end and the second __bss_start.
+	 */
+#ifdef CONFIG_MTD_UCLINUX
+	int romfs_size;
+	unsigned int romfs_base;
+	char *old_klimit = klimit;
+
+	romfs_base = (ram ? ram : (unsigned int)&__init_end);
+	romfs_size = PAGE_ALIGN(get_romfs_len((unsigned *)romfs_base));
+	if (!romfs_size) {
+		romfs_base = (unsigned int)&__bss_start;
+		romfs_size = PAGE_ALIGN(get_romfs_len((unsigned *)romfs_base));
+	}
+
+	/* Move ROMFS out of BSS before clearing it */
+	if (romfs_size > 0) {
+		memmove(&_ebss, (int *)romfs_base, romfs_size);
+		klimit += romfs_size;
+	}
+#endif
+
 /* clearing bss section */
 	memset(__bss_start, 0, __bss_stop-__bss_start);
 	memset(_ssbss, 0, _esbss-_ssbss);
 
-	/*
-	 * Copy command line passed from bootloader, or use default
-	 * if none provided, or forced
-	 */
+	/* Copy command line passed from bootloader */
 #ifndef CONFIG_CMDLINE_BOOL
 	if (cmdline && cmdline[0] != '\0')
 		strlcpy(cmd_line, cmdline, COMMAND_LINE_SIZE);
@@ -126,27 +142,15 @@
 	printk(KERN_NOTICE "Found FDT at 0x%08x\n", fdt);
 
 #ifdef CONFIG_MTD_UCLINUX
-	{
-		int size;
-		unsigned int romfs_base;
-		romfs_base = (ram ? ram : (unsigned int)&__init_end);
-		/* if CONFIG_MTD_UCLINUX_EBSS is defined, assume ROMFS is at the
-		 * end of kernel, which is ROMFS_LOCATION defined above. */
-		size = PAGE_ALIGN(get_romfs_len((unsigned *)romfs_base));
-		early_printk("Found romfs @ 0x%08x (0x%08x)\n",
-				romfs_base, size);
-		early_printk("#### klimit %p ####\n", klimit);
-		BUG_ON(size < 0); /* What else can we do? */
+	early_printk("Found romfs @ 0x%08x (0x%08x)\n",
+			romfs_base, romfs_size);
+	early_printk("#### klimit %p ####\n", old_klimit);
+	BUG_ON(romfs_size < 0); /* What else can we do? */
 
-		/* Use memmove to handle likely case of memory overlap */
-		early_printk("Moving 0x%08x bytes from 0x%08x to 0x%08x\n",
-			size, romfs_base, (unsigned)&_ebss);
-		memmove(&_ebss, (int *)romfs_base, size);
+	early_printk("Moved 0x%08x bytes from 0x%08x to 0x%08x\n",
+			romfs_size, romfs_base, (unsigned)&_ebss);
 
-		/* update klimit */
-		klimit += PAGE_ALIGN(size);
-		early_printk("New klimit: 0x%08x\n", (unsigned)klimit);
-	}
+	early_printk("New klimit: 0x%08x\n", (unsigned)klimit);
 #endif
 
 	for (src = __ivt_start; src < __ivt_end; src++, dst++)
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index 40d3693..4c0e652 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -152,8 +152,8 @@
 	unsigned long tramp[2];	/* signal trampoline */
 };
 
-static int
-restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *rval_p)
+static int restore_sigcontext(struct pt_regs *regs,
+				struct sigcontext __user *sc, int *rval_p)
 {
 	unsigned int err = 0;
 
@@ -211,11 +211,10 @@
 
 asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
 {
-	struct rt_sigframe *frame =
-			(struct rt_sigframe *)(regs->r1 + STATE_SAVE_ARG_SPACE);
+	struct rt_sigframe __user *frame =
+		(struct rt_sigframe __user *)(regs->r1 + STATE_SAVE_ARG_SPACE);
 
 	sigset_t set;
-	stack_t st;
 	int rval;
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
@@ -233,11 +232,10 @@
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &rval))
 		goto badframe;
 
-	if (__copy_from_user((void *)&st, &frame->uc.uc_stack, sizeof(st)))
-		goto badframe;
 	/* It is more difficult to avoid calling this function than to
 	 call it and ignore errors. */
-	do_sigaltstack(&st, NULL, regs->r1);
+	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->r1))
+		goto badframe;
 
 	return rval;
 
@@ -251,7 +249,7 @@
  */
 
 static int
-setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
 		unsigned long mask)
 {
 	int err = 0;
@@ -278,7 +276,7 @@
 /*
  * Determine which stack to use..
  */
-static inline void *
+static inline void __user *
 get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
 {
 	/* Default to using normal stack */
@@ -287,87 +285,13 @@
 	if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && !on_sig_stack(sp))
 		sp = current->sas_ss_sp + current->sas_ss_size;
 
-	return (void *)((sp - frame_size) & -8UL);
-}
-
-static void setup_frame(int sig, struct k_sigaction *ka,
-			sigset_t *set, struct pt_regs *regs)
-{
-	struct sigframe *frame;
-	int err = 0;
-	int signal;
-
-	frame = get_sigframe(ka, regs, sizeof(*frame));
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
-
-	signal = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
-	err |= setup_sigcontext(&frame->sc, regs, set->sig[0]);
-
-	if (_NSIG_WORDS > 1) {
-		err |= __copy_to_user(frame->extramask, &set->sig[1],
-					sizeof(frame->extramask));
-	}
-
-	/* Set up to return from userspace. If provided, use a stub
-	 already in userspace. */
-	/* minus 8 is offset to cater for "rtsd r15,8" offset */
-	if (ka->sa.sa_flags & SA_RESTORER) {
-		regs->r15 = ((unsigned long)ka->sa.sa_restorer)-8;
-	} else {
-		/* Note, these encodings are _big endian_! */
-
-		/* addi r12, r0, __NR_sigreturn */
-		err |= __put_user(0x31800000 | __NR_sigreturn ,
-				frame->tramp + 0);
-		/* brki r14, 0x8 */
-		err |= __put_user(0xb9cc0008, frame->tramp + 1);
-
-		/* Return from sighandler will jump to the tramp.
-		 Negative 8 offset because return is rtsd r15, 8 */
-		regs->r15 = ((unsigned long)frame->tramp)-8;
-
-		__invalidate_cache_sigtramp((unsigned long)frame->tramp);
-	}
-
-	if (err)
-		goto give_sigsegv;
-
-	/* Set up registers for signal handler */
-	regs->r1 = (unsigned long) frame - STATE_SAVE_ARG_SPACE;
-
-	/* Signal handler args: */
-	regs->r5 = signal; /* Arg 0: signum */
-	regs->r6 = (unsigned long) &frame->sc; /* arg 1: sigcontext */
-
-	/* Offset of 4 to handle microblaze rtid r14, 0 */
-	regs->pc = (unsigned long)ka->sa.sa_handler;
-
-	set_fs(USER_DS);
-
-#ifdef DEBUG_SIG
-	printk(KERN_INFO "SIG deliver (%s:%d): sp=%p pc=%08lx\n",
-		current->comm, current->pid, frame, regs->pc);
-#endif
-
-	return;
-
-give_sigsegv:
-	if (sig == SIGSEGV)
-		ka->sa.sa_handler = SIG_DFL;
-	force_sig(SIGSEGV, current);
+	return (void __user *)((sp - frame_size) & -8UL);
 }
 
 static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 			sigset_t *set, struct pt_regs *regs)
 {
-	struct rt_sigframe *frame;
+	struct rt_sigframe __user *frame;
 	int err = 0;
 	int signal;
 
@@ -382,7 +306,8 @@
 		? current_thread_info()->exec_domain->signal_invmap[sig]
 		: sig;
 
-	err |= copy_siginfo_to_user(&frame->info, info);
+	if (info)
+		err |= copy_siginfo_to_user(&frame->info, info);
 
 	/* Create the ucontext. */
 	err |= __put_user(0, &frame->uc.uc_flags);
@@ -463,7 +388,15 @@
 	case -ERESTARTNOINTR:
 do_restart:
 		/* offset of 4 bytes to re-execute trap (brki) instruction */
+#ifndef CONFIG_MMU
 		regs->pc -= 4;
+#else
+		/* offset of 8 bytes required = 4 for rtbd
+		   offset, plus 4 for size of
+			"brki r14,8"
+		   instruction. */
+		regs->pc -= 8;
+#endif
 		break;
 	}
 }
@@ -480,7 +413,7 @@
 	if (ka->sa.sa_flags & SA_SIGINFO)
 		setup_rt_frame(sig, ka, info, oldset, regs);
 	else
-		setup_frame(sig, ka, oldset, regs);
+		setup_rt_frame(sig, ka, NULL, oldset, regs);
 
 	if (ka->sa.sa_flags & SA_ONESHOT)
 		ka->sa.sa_handler = SIG_DFL;
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index 3bb42ec..376d178 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -2,7 +2,11 @@
 	.long sys_restart_syscall	/* 0 - old "setup()" system call,
 					 * used for restarting */
 	.long sys_exit
-	.long sys_ni_syscall		/* was fork */
+#ifdef CONFIG_MMU
+	.long sys_fork_wrapper
+#else
+	.long sys_ni_syscall
+#endif
 	.long sys_read
 	.long sys_write
 	.long sys_open			/* 5 */
diff --git a/arch/microblaze/kernel/traps.c b/arch/microblaze/kernel/traps.c
index 293ef48..eaaaf80 100644
--- a/arch/microblaze/kernel/traps.c
+++ b/arch/microblaze/kernel/traps.c
@@ -22,14 +22,6 @@
 	__enable_hw_exceptions();
 }
 
-void __bad_xchg(volatile void *ptr, int size)
-{
-	printk(KERN_INFO "xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n",
-		__builtin_return_address(0), ptr, size);
-	BUG();
-}
-EXPORT_SYMBOL(__bad_xchg);
-
 static int kstack_depth_to_print = 24;
 
 static int __init kstack_setup(char *s)
@@ -105,3 +97,37 @@
 	show_stack(NULL, NULL);
 }
 EXPORT_SYMBOL(dump_stack);
+
+#ifdef CONFIG_MMU
+void __bug(const char *file, int line, void *data)
+{
+	if (data)
+		printk(KERN_CRIT "kernel BUG at %s:%d (data = %p)!\n",
+			file, line, data);
+	else
+		printk(KERN_CRIT "kernel BUG at %s:%d!\n", file, line);
+
+	machine_halt();
+}
+
+int bad_trap(int trap_num, struct pt_regs *regs)
+{
+	printk(KERN_CRIT
+		"unimplemented trap %d called at 0x%08lx, pid %d!\n",
+		trap_num, regs->pc, current->pid);
+	return -ENOSYS;
+}
+
+int debug_trap(struct pt_regs *regs)
+{
+	int i;
+	printk(KERN_CRIT "debug trap\n");
+	for (i = 0; i < 32; i++) {
+		/* printk("r%i:%08X\t",i,regs->gpr[i]); */
+		if ((i % 4) == 3)
+			printk(KERN_CRIT "\n");
+	}
+	printk(KERN_CRIT "pc:%08lX\tmsr:%08lX\n", regs->pc, regs->msr);
+	return -ENOSYS;
+}
+#endif
diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
index 840385e..8ae807a 100644
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -17,8 +17,7 @@
 jiffies = jiffies_64 + 4;
 
 SECTIONS {
-	. = CONFIG_KERNEL_BASE_ADDR;
-
+	. = CONFIG_KERNEL_START;
 	.text : {
 		_text = . ;
 		_stext = . ;
@@ -132,6 +131,8 @@
 		__con_initcall_end = .;
 	}
 
+	SECURITY_INIT
+
 	__init_end_before_initramfs = .;
 
 	.init.ramfs ALIGN(4096) : {
diff --git a/arch/microblaze/lib/Makefile b/arch/microblaze/lib/Makefile
index d27126b..71c8cb6 100644
--- a/arch/microblaze/lib/Makefile
+++ b/arch/microblaze/lib/Makefile
@@ -10,4 +10,5 @@
 lib-y += memcpy.o memmove.o
 endif
 
-lib-y +=  uaccess.o
+lib-$(CONFIG_NO_MMU) += uaccess.o
+lib-$(CONFIG_MMU) += uaccess_old.o
diff --git a/arch/microblaze/lib/checksum.c b/arch/microblaze/lib/checksum.c
index 8093400..f08e745 100644
--- a/arch/microblaze/lib/checksum.c
+++ b/arch/microblaze/lib/checksum.c
@@ -32,9 +32,10 @@
 /* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access
  kills, so most of the assembly has to go. */
 
-#include <net/checksum.h>
-#include <asm/checksum.h>
 #include <linux/module.h>
+#include <net/checksum.h>
+
+#include <asm/byteorder.h>
 
 static inline unsigned short from32to16(unsigned long x)
 {
@@ -102,6 +103,7 @@
 {
 	return (__force __sum16)~do_csum(iph, ihl*4);
 }
+EXPORT_SYMBOL(ip_fast_csum);
 
 /*
  * computes the checksum of a memory block at buff, length len,
@@ -115,15 +117,16 @@
  *
  * it's best to have buff aligned on a 32-bit boundary
  */
-__wsum csum_partial(const void *buff, int len, __wsum sum)
+__wsum csum_partial(const void *buff, int len, __wsum wsum)
 {
+	unsigned int sum = (__force unsigned int)wsum;
 	unsigned int result = do_csum(buff, len);
 
 	/* add in old sum, and carry.. */
 	result += sum;
 	if (sum > result)
 		result += 1;
-	return result;
+	return (__force __wsum)result;
 }
 EXPORT_SYMBOL(csum_partial);
 
@@ -131,9 +134,9 @@
  * this routine is used for miscellaneous IP-like checksums, mainly
  * in icmp.c
  */
-__sum16 ip_compute_csum(const unsigned char *buff, int len)
+__sum16 ip_compute_csum(const void *buff, int len)
 {
-	return ~do_csum(buff, len);
+	return (__force __sum16)~do_csum(buff, len);
 }
 EXPORT_SYMBOL(ip_compute_csum);
 
@@ -141,12 +144,18 @@
  * copy from fs while checksumming, otherwise like csum_partial
  */
 __wsum
-csum_partial_copy_from_user(const char __user *src, char *dst, int len,
-						int sum, int *csum_err)
+csum_partial_copy_from_user(const void __user *src, void *dst, int len,
+						__wsum sum, int *csum_err)
 {
-	if (csum_err)
+	int missing;
+
+	missing = __copy_from_user(dst, src, len);
+	if (missing) {
+		memset(dst + len - missing, 0, missing);
+		*csum_err = -EFAULT;
+	} else
 		*csum_err = 0;
-	memcpy(dst, src, len);
+
 	return csum_partial(dst, len, sum);
 }
 EXPORT_SYMBOL(csum_partial_copy_from_user);
@@ -155,7 +164,7 @@
  * copy from ds while checksumming, otherwise like csum_partial
  */
 __wsum
-csum_partial_copy(const char *src, char *dst, int len, int sum)
+csum_partial_copy(const void *src, void *dst, int len, __wsum sum)
 {
 	memcpy(dst, src, len);
 	return csum_partial(dst, len, sum);
diff --git a/arch/microblaze/lib/memcpy.c b/arch/microblaze/lib/memcpy.c
index 5880119..6a907c5 100644
--- a/arch/microblaze/lib/memcpy.c
+++ b/arch/microblaze/lib/memcpy.c
@@ -154,8 +154,3 @@
 }
 EXPORT_SYMBOL(memcpy);
 #endif /* __HAVE_ARCH_MEMCPY */
-
-void *cacheable_memcpy(void *d, const void *s, __kernel_size_t c)
-{
-	return memcpy(d, s, c);
-}
diff --git a/arch/microblaze/lib/uaccess_old.S b/arch/microblaze/lib/uaccess_old.S
new file mode 100644
index 0000000..67f991c
--- /dev/null
+++ b/arch/microblaze/lib/uaccess_old.S
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2009 PetaLogix
+ * Copyright (C) 2007 LynuxWorks, Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/errno.h>
+#include <linux/linkage.h>
+
+/*
+ * int __strncpy_user(char *to, char *from, int len);
+ *
+ * Returns:
+ *  -EFAULT  for an exception
+ *  len      if we hit the buffer limit
+ *  bytes copied
+ */
+
+	.text
+.globl __strncpy_user;
+.align 4;
+__strncpy_user:
+
+	/*
+	 * r5 - to
+	 * r6 - from
+	 * r7 - len
+	 * r3 - temp count
+	 * r4 - temp val
+	 */
+	addik	r3,r7,0		/* temp_count = len */
+	beqi	r3,3f
+1:
+	lbu	r4,r6,r0
+	sb	r4,r5,r0
+
+	addik	r3,r3,-1
+	beqi	r3,2f		/* break on len */
+
+	addik	r5,r5,1
+	bneid	r4,1b
+	addik	r6,r6,1		/* delay slot */
+	addik	r3,r3,1		/* undo "temp_count--" */
+2:
+	rsubk	r3,r3,r7	/* temp_count = len - temp_count */
+3:
+	rtsd	r15,8
+	nop
+
+
+	.section	.fixup, "ax"
+	.align	2
+4:
+	brid	3b
+	addik	r3,r0, -EFAULT
+
+	.section	__ex_table, "a"
+	.word	1b,4b
+
+/*
+ * int __strnlen_user(char __user *str, int maxlen);
+ *
+ * Returns:
+ *  0 on error
+ *  maxlen + 1  if no NUL byte found within maxlen bytes
+ *  size of the string (including NUL byte)
+ */
+
+	.text
+.globl __strnlen_user;
+.align 4;
+__strnlen_user:
+	addik	r3,r6,0
+	beqi	r3,3f
+1:
+	lbu	r4,r5,r0
+	beqid	r4,2f		/* break on NUL */
+	addik	r3,r3,-1	/* delay slot */
+
+	bneid	r3,1b
+	addik	r5,r5,1		/* delay slot */
+
+	addik	r3,r3,-1	/* for break on len */
+2:
+	rsubk	r3,r3,r6
+3:
+	rtsd	r15,8
+	nop
+
+
+	.section	.fixup,"ax"
+4:
+	brid	3b
+	addk	r3,r0,r0
+
+	.section	__ex_table,"a"
+	.word	1b,4b
+
+/*
+ * int __copy_tofrom_user(char *to, char *from, int len)
+ * Return:
+ *   0 on success
+ *   number of not copied bytes on error
+ */
+	.text
+.globl __copy_tofrom_user;
+.align 4;
+__copy_tofrom_user:
+	/*
+	 * r5 - to
+	 * r6 - from
+	 * r7, r3 - count
+	 * r4 - tempval
+	 */
+	addik	r3,r7,0
+	beqi	r3,3f
+1:
+	lbu	r4,r6,r0
+	addik	r6,r6,1
+2:
+	sb	r4,r5,r0
+	addik	r3,r3,-1
+	bneid	r3,1b
+	addik	r5,r5,1		/* delay slot */
+3:
+	rtsd	r15,8
+	nop
+
+
+	.section	__ex_table,"a"
+	.word	1b,3b,2b,3b
diff --git a/arch/microblaze/mm/Makefile b/arch/microblaze/mm/Makefile
index bf9e447..6c8a924 100644
--- a/arch/microblaze/mm/Makefile
+++ b/arch/microblaze/mm/Makefile
@@ -3,3 +3,5 @@
 #
 
 obj-y := init.o
+
+obj-$(CONFIG_MMU) += pgtable.o mmu_context.o fault.o
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
new file mode 100644
index 0000000..5e67cd1
--- /dev/null
+++ b/arch/microblaze/mm/fault.c
@@ -0,0 +1,304 @@
+/*
+ *  arch/microblaze/mm/fault.c
+ *
+ *    Copyright (C) 2007 Xilinx, Inc.  All rights reserved.
+ *
+ *  Derived from "arch/ppc/mm/fault.c"
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Derived from "arch/i386/mm/fault.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Modified by Cort Dougan and Paul Mackerras.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License.  See the file COPYING in the main directory of this
+ * archive for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/system.h>
+#include <linux/uaccess.h>
+#include <asm/exceptions.h>
+
+#if defined(CONFIG_KGDB)
+int debugger_kernel_faults = 1;
+#endif
+
+static unsigned long pte_misses;	/* updated by do_page_fault() */
+static unsigned long pte_errors;	/* updated by do_page_fault() */
+
+/*
+ * Check whether the instruction at regs->pc is a store using
+ * an update addressing form which will update r1.
+ */
+static int store_updates_sp(struct pt_regs *regs)
+{
+	unsigned int inst;
+
+	if (get_user(inst, (unsigned int *)regs->pc))
+		return 0;
+	/* check for 1 in the rD field */
+	if (((inst >> 21) & 0x1f) != 1)
+		return 0;
+	/* check for store opcodes */
+	if ((inst & 0xd0000000) == 0xd0000000)
+		return 1;
+	return 0;
+}
+
+
+/*
+ * bad_page_fault is called when we have a bad access from the kernel.
+ * It is called from do_page_fault above and from some of the procedures
+ * in traps.c.
+ */
+static void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
+{
+	const struct exception_table_entry *fixup;
+/* MS: no context */
+	/* Are we prepared to handle this fault?  */
+	fixup = search_exception_tables(regs->pc);
+	if (fixup) {
+		regs->pc = fixup->fixup;
+		return;
+	}
+
+	/* kernel has accessed a bad area */
+#if defined(CONFIG_KGDB)
+	if (debugger_kernel_faults)
+		debugger(regs);
+#endif
+	die("kernel access of bad area", regs, sig);
+}
+
+/*
+ * The error_code parameter is ESR for a data fault,
+ * 0 for an instruction fault.
+ */
+void do_page_fault(struct pt_regs *regs, unsigned long address,
+		   unsigned long error_code)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	siginfo_t info;
+	int code = SEGV_MAPERR;
+	int is_write = error_code & ESR_S;
+	int fault;
+
+	regs->ear = address;
+	regs->esr = error_code;
+
+	/* On a kernel SLB miss we can only check for a valid exception entry */
+	if (kernel_mode(regs) && (address >= TASK_SIZE)) {
+		printk(KERN_WARNING "kernel task_size exceed");
+		_exception(SIGSEGV, regs, code, address);
+	}
+
+	/* for instr TLB miss and instr storage exception ESR_S is undefined */
+	if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
+		is_write = 0;
+
+#if defined(CONFIG_KGDB)
+	if (debugger_fault_handler && regs->trap == 0x300) {
+		debugger_fault_handler(regs);
+		return;
+	}
+#endif /* CONFIG_KGDB */
+
+	if (in_atomic() || mm == NULL) {
+		/* FIXME */
+		if (kernel_mode(regs)) {
+			printk(KERN_EMERG
+				"Page fault in kernel mode - Oooou!!! pid %d\n",
+				current->pid);
+			_exception(SIGSEGV, regs, code, address);
+			return;
+		}
+		/* in_atomic() in user mode is really bad,
+		   as is current->mm == NULL. */
+		printk(KERN_EMERG "Page fault in user mode with "
+		       "in_atomic(), mm = %p\n", mm);
+		printk(KERN_EMERG "r15 = %lx  MSR = %lx\n",
+		       regs->r15, regs->msr);
+		die("Weird page fault", regs, SIGSEGV);
+	}
+
+	/* When running in the kernel we expect faults to occur only to
+	 * addresses in user space.  All other faults represent errors in the
+	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
+	 * erroneous fault occurring in a code path which already holds mmap_sem
+	 * we will deadlock attempting to validate the fault against the
+	 * address space.  Luckily the kernel only validly references user
+	 * space from well defined areas of code, which are listed in the
+	 * exceptions table.
+	 *
+	 * As the vast majority of faults will be valid we will only perform
+	 * the source reference check when there is a possibility of a deadlock.
+	 * Attempt to lock the address space, if we cannot we then validate the
+	 * source.  If this is invalid we can skip the address space check,
+	 * thus avoiding the deadlock.
+	 */
+	if (!down_read_trylock(&mm->mmap_sem)) {
+		if (kernel_mode(regs) && !search_exception_tables(regs->pc))
+			goto bad_area_nosemaphore;
+
+		down_read(&mm->mmap_sem);
+	}
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+
+	if (vma->vm_start <= address)
+		goto good_area;
+
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+
+	if (!is_write)
+		goto bad_area;
+
+	/*
+	 * N.B. The ABI allows programs to access up to
+	 * a few hundred bytes below the stack pointer (TBD).
+	 * The kernel signal delivery code writes up to about 1.5kB
+	 * below the stack pointer (r1) before decrementing it.
+	 * The exec code can write slightly over 640kB to the stack
+	 * before setting the user r1.  Thus we allow the stack to
+	 * expand to 1MB without further checks.
+	 */
+	if (address + 0x100000 < vma->vm_end) {
+
+		/* get user regs even if this fault is in kernel mode */
+		struct pt_regs *uregs = current->thread.regs;
+		if (uregs == NULL)
+			goto bad_area;
+
+		/*
+		 * A user-mode access to an address a long way below
+		 * the stack pointer is only valid if the instruction
+		 * is one which would update the stack pointer to the
+		 * address accessed if the instruction completed,
+		 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
+		 * (or the byte, halfword, float or double forms).
+		 *
+		 * If we don't check this then any write to the area
+		 * between the last mapped region and the stack will
+		 * expand the stack rather than segfaulting.
+		 */
+		if (address + 2048 < uregs->r1
+			&& (kernel_mode(regs) || !store_updates_sp(regs)))
+				goto bad_area;
+	}
+	if (expand_stack(vma, address))
+		goto bad_area;
+
+good_area:
+	code = SEGV_ACCERR;
+
+	/* a write */
+	if (is_write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	/* a read */
+	} else {
+		/* protection fault */
+		if (error_code & 0x08000000)
+			goto bad_area;
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto bad_area;
+	}
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+survive:
+	fault = handle_mm_fault(mm, vma, address, is_write);
+	if (unlikely(fault & VM_FAULT_ERROR)) {
+		if (fault & VM_FAULT_OOM)
+			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGBUS)
+			goto do_sigbus;
+		BUG();
+	}
+	if (fault & VM_FAULT_MAJOR)
+		current->maj_flt++;
+	else
+		current->min_flt++;
+	up_read(&mm->mmap_sem);
+	/*
+	 * keep track of tlb+htab misses that are good addrs but
+	 * just need pte's created via handle_mm_fault()
+	 * -- Cort
+	 */
+	pte_misses++;
+	return;
+
+bad_area:
+	up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+	pte_errors++;
+
+	/* User mode accesses cause a SIGSEGV */
+	if (user_mode(regs)) {
+		_exception(SIGSEGV, regs, code, address);
+/*		info.si_signo = SIGSEGV;
+		info.si_errno = 0;
+		info.si_code = code;
+		info.si_addr = (void *) address;
+		force_sig_info(SIGSEGV, &info, current);*/
+		return;
+	}
+
+	bad_page_fault(regs, address, SIGSEGV);
+	return;
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	if (current->pid == 1) {
+		yield();
+		down_read(&mm->mmap_sem);
+		goto survive;
+	}
+	up_read(&mm->mmap_sem);
+	printk(KERN_WARNING "VM: killing process %s\n", current->comm);
+	if (user_mode(regs))
+		do_exit(SIGKILL);
+	bad_page_fault(regs, address, SIGKILL);
+	return;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+	if (user_mode(regs)) {
+		info.si_signo = SIGBUS;
+		info.si_errno = 0;
+		info.si_code = BUS_ADRERR;
+		info.si_addr = (void __user *)address;
+		force_sig_info(SIGBUS, &info, current);
+		return;
+	}
+	bad_page_fault(regs, address, SIGBUS);
+}
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index b0c8213..b5a701c 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -23,8 +23,16 @@
 #include <asm/sections.h>
 #include <asm/tlb.h>
 
+#ifndef CONFIG_MMU
 unsigned int __page_offset;
-/* EXPORT_SYMBOL(__page_offset); */
+EXPORT_SYMBOL(__page_offset);
+
+#else
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+int mem_init_done;
+static int init_bootmem_done;
+#endif /* CONFIG_MMU */
 
 char *klimit = _end;
 
@@ -32,28 +40,26 @@
  * Initialize the bootmem system and give it all the memory we
  * have available.
  */
-unsigned int memory_start;
-unsigned int memory_end; /* due to mm/nommu.c */
-unsigned int memory_size;
+unsigned long memory_start;
+unsigned long memory_end; /* due to mm/nommu.c */
+unsigned long memory_size;
 
 /*
  * paging_init() sets up the page tables - in fact we've already done this.
  */
 static void __init paging_init(void)
 {
-	int i;
 	unsigned long zones_size[MAX_NR_ZONES];
 
+	/* Clean every zones */
+	memset(zones_size, 0, sizeof(zones_size));
+
 	/*
 	 * old: we can DMA to/from any address.put all page into ZONE_DMA
 	 * We use only ZONE_NORMAL
 	 */
 	zones_size[ZONE_NORMAL] = max_mapnr;
 
-	/* every other zones are empty */
-	for (i = 1; i < MAX_NR_ZONES; i++)
-		zones_size[i] = 0;
-
 	free_area_init(zones_size);
 }
 
@@ -61,6 +67,7 @@
 {
 	int i;
 	unsigned long map_size;
+#ifndef CONFIG_MMU
 	u32 kernel_align_start, kernel_align_size;
 
 	/* Find main memory where is the kernel */
@@ -93,6 +100,7 @@
 		__func__, kernel_align_start, kernel_align_start
 			+ kernel_align_size, kernel_align_size);
 
+#endif
 	/*
 	 * Kernel:
 	 * start: base phys address of kernel - page align
@@ -121,9 +129,13 @@
 	 * for 4GB of memory, using 4kB pages), plus 1 page
 	 * (in case the address isn't page-aligned).
 	 */
+#ifndef CONFIG_MMU
 	map_size = init_bootmem_node(NODE_DATA(0), PFN_UP(TOPHYS((u32)_end)),
 					min_low_pfn, max_low_pfn);
-
+#else
+	map_size = init_bootmem_node(&contig_page_data,
+		PFN_UP(TOPHYS((u32)_end)), min_low_pfn, max_low_pfn);
+#endif
 	lmb_reserve(PFN_UP(TOPHYS((u32)_end)) << PAGE_SHIFT, map_size);
 
 	/* free bootmem is whole main memory */
@@ -137,6 +149,9 @@
 		reserve_bootmem(lmb.reserved.region[i].base,
 			lmb_size_bytes(&lmb.reserved, i) - 1, BOOTMEM_DEFAULT);
 	}
+#ifdef CONFIG_MMU
+	init_bootmem_done = 1;
+#endif
 	paging_init();
 }
 
@@ -191,11 +206,145 @@
 	printk(KERN_INFO "Memory: %luk/%luk available\n",
 	       (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
 	       num_physpages << (PAGE_SHIFT-10));
+#ifdef CONFIG_MMU
+	mem_init_done = 1;
+#endif
 }
 
+#ifndef CONFIG_MMU
 /* Check against bounds of physical memory */
 int ___range_ok(unsigned long addr, unsigned long size)
 {
 	return ((addr < memory_start) ||
 		((addr + size) > memory_end));
 }
+EXPORT_SYMBOL(___range_ok);
+
+#else
+int page_is_ram(unsigned long pfn)
+{
+	return pfn < max_low_pfn;
+}
+
+/*
+ * Check for command-line options that affect what MMU_init will do.
+ */
+static void mm_cmdline_setup(void)
+{
+	unsigned long maxmem = 0;
+	char *p = cmd_line;
+
+	/* Look for mem= option on command line */
+	p = strstr(cmd_line, "mem=");
+	if (p) {
+		p += 4;
+		maxmem = memparse(p, &p);
+		if (maxmem && memory_size > maxmem) {
+			memory_size = maxmem;
+			memory_end = memory_start + memory_size;
+			lmb.memory.region[0].size = memory_size;
+		}
+	}
+}
+
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+static void __init mmu_init_hw(void)
+{
+	/*
+	 * The Zone Protection Register (ZPR) defines how protection will
+	 * be applied to every page which is a member of a given zone. At
+	 * present, we utilize only two of the zones.
+	 * The zone index bits (of ZSEL) in the PTE are used for software
+	 * indicators, except the LSB.  For user access, zone 1 is used,
+	 * for kernel access, zone 0 is used.  We set all but zone 1
+	 * to zero, allowing only kernel access as indicated in the PTE.
+	 * For zone 1, we set a 01 binary (a value of 10 will not work)
+	 * to allow user access as indicated in the PTE.  This also allows
+	 * kernel access as indicated in the PTE.
+	 */
+	__asm__ __volatile__ ("ori r11, r0, 0x10000000;" \
+			"mts rzpr, r11;"
+			: : : "r11");
+}
+
+/*
+ * MMU_init sets up the basic memory mappings for the kernel,
+ * including both RAM and possibly some I/O regions,
+ * and sets up the page tables and the MMU hardware ready to go.
+ */
+
+/* called from head.S */
+asmlinkage void __init mmu_init(void)
+{
+	unsigned int kstart, ksize;
+
+	if (!lmb.reserved.cnt) {
+		printk(KERN_EMERG "Error memory count\n");
+		machine_restart(NULL);
+	}
+
+	if ((u32) lmb.memory.region[0].size < 0x1000000) {
+		printk(KERN_EMERG "Memory must be greater than 16MB\n");
+		machine_restart(NULL);
+	}
+	/* Find main memory where the kernel is */
+	memory_start = (u32) lmb.memory.region[0].base;
+	memory_end = (u32) lmb.memory.region[0].base +
+				(u32) lmb.memory.region[0].size;
+	memory_size = memory_end - memory_start;
+
+	mm_cmdline_setup(); /* FIXME parse args from command line - not used */
+
+	/*
+	 * Map out the kernel text/data/bss from the available physical
+	 * memory.
+	 */
+	kstart = __pa(CONFIG_KERNEL_START); /* kernel start */
+	/* kernel size */
+	ksize = PAGE_ALIGN(((u32)_end - (u32)CONFIG_KERNEL_START));
+	lmb_reserve(kstart, ksize);
+
+#if defined(CONFIG_BLK_DEV_INITRD)
+	/* Remove the init RAM disk from the available memory. */
+/*	if (initrd_start) {
+		mem_pieces_remove(&phys_avail, __pa(initrd_start),
+				  initrd_end - initrd_start, 1);
+	}*/
+#endif /* CONFIG_BLK_DEV_INITRD */
+
+	/* Initialize the MMU hardware */
+	mmu_init_hw();
+
+	/* Map in all of RAM starting at CONFIG_KERNEL_START */
+	mapin_ram();
+
+#ifdef HIGHMEM_START_BOOL
+	ioremap_base = HIGHMEM_START;
+#else
+	ioremap_base = 0xfe000000UL;	/* for now, could be 0xfffff000 */
+#endif /* CONFIG_HIGHMEM */
+	ioremap_bot = ioremap_base;
+
+	/* Initialize the context management stuff */
+	mmu_context_init();
+}
+
+/* This is only called until mem_init is done. */
+void __init *early_get_page(void)
+{
+	void *p;
+	if (init_bootmem_done) {
+		p = alloc_bootmem_pages(PAGE_SIZE);
+	} else {
+		/*
+		 * Mem start + 32MB -> here is limit
+		 * because of mem mapping from head.S
+		 */
+		p = __va(lmb_alloc_base(PAGE_SIZE, PAGE_SIZE,
+					memory_start + 0x2000000));
+	}
+	return p;
+}
+#endif /* CONFIG_MMU */
diff --git a/arch/microblaze/mm/mmu_context.c b/arch/microblaze/mm/mmu_context.c
new file mode 100644
index 0000000..26ff82f
--- /dev/null
+++ b/arch/microblaze/mm/mmu_context.c
@@ -0,0 +1,70 @@
+/*
+ * This file contains the routines for handling the MMU.
+ *
+ *    Copyright (C) 2007 Xilinx, Inc.  All rights reserved.
+ *
+ *  Derived from arch/ppc/mm/4xx_mmu.c:
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/init.h>
+
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+
+mm_context_t next_mmu_context;
+unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
+atomic_t nr_free_contexts;
+struct mm_struct *context_mm[LAST_CONTEXT+1];
+
+/*
+ * Initialize the context management stuff.
+ */
+void __init mmu_context_init(void)
+{
+	/*
+	 * The use of context zero is reserved for the kernel.
+	 * This code assumes FIRST_CONTEXT < 32.
+	 */
+	context_map[0] = (1 << FIRST_CONTEXT) - 1;
+	next_mmu_context = FIRST_CONTEXT;
+	atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1);
+}
+
+/*
+ * Steal a context from a task that has one at the moment.
+ *
+ * This isn't an LRU system, it just frees up each context in
+ * turn (sort-of pseudo-random replacement :).  This would be the
+ * place to implement an LRU scheme if anyone were motivated to do it.
+ */
+void steal_context(void)
+{
+	struct mm_struct *mm;
+
+	/* free up context `next_mmu_context' */
+	/* if we shouldn't free context 0, don't... */
+	if (next_mmu_context < FIRST_CONTEXT)
+		next_mmu_context = FIRST_CONTEXT;
+	mm = context_mm[next_mmu_context];
+	flush_tlb_mm(mm);
+	destroy_context(mm);
+}
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
new file mode 100644
index 0000000..46c4ca5
--- /dev/null
+++ b/arch/microblaze/mm/pgtable.c
@@ -0,0 +1,286 @@
+/*
+ *  This file contains the routines setting up the linux page tables.
+ *
+ * Copyright (C) 2008 Michal Simek
+ * Copyright (C) 2008 PetaLogix
+ *
+ *    Copyright (C) 2007 Xilinx, Inc.  All rights reserved.
+ *
+ *  Derived from arch/ppc/mm/pgtable.c:
+ *    -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This file is subject to the terms and conditions of the GNU General
+ *  Public License.  See the file COPYING in the main directory of this
+ *  archive for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <linux/io.h>
+#include <asm/mmu.h>
+#include <asm/sections.h>
+
+#define flush_HPTE(X, va, pg)	_tlbie(va)
+
+unsigned long ioremap_base;
+unsigned long ioremap_bot;
+
+/* The maximum lowmem defaults to 768Mb, but this can be configured to
+ * another value.
+ */
+#define MAX_LOW_MEM	CONFIG_LOWMEM_SIZE
+
+#ifndef CONFIG_SMP
+struct pgtable_cache_struct quicklists;
+#endif
+
+static void __iomem *__ioremap(phys_addr_t addr, unsigned long size,
+		unsigned long flags)
+{
+	unsigned long v, i;
+	phys_addr_t p;
+	int err;
+
+	/*
+	 * Choose an address to map it to.
+	 * Once the vmalloc system is running, we use it.
+	 * Before then, we use space going down from ioremap_base
+	 * (ioremap_bot records where we're up to).
+	 */
+	p = addr & PAGE_MASK;
+	size = PAGE_ALIGN(addr + size) - p;
+
+	/*
+	 * Don't allow anybody to remap normal RAM that we're using.
+	 * mem_init() sets high_memory so only do the check after that.
+	 *
+	 * However, allow remap of rootfs: TBD
+	 */
+	if (mem_init_done &&
+		p >= memory_start && p < virt_to_phys(high_memory) &&
+		!(p >= virt_to_phys((unsigned long)&__bss_stop) &&
+		p < virt_to_phys((unsigned long)__bss_stop))) {
+		printk(KERN_WARNING "__ioremap(): phys addr "PTE_FMT
+			" is RAM lr %p\n", (unsigned long)p,
+			__builtin_return_address(0));
+		return NULL;
+	}
+
+	if (size == 0)
+		return NULL;
+
+	/*
+	 * Is it already mapped? If the whole area is mapped then we're
+	 * done, otherwise remap it since we want to keep the virt addrs for
+	 * each request contiguous.
+	 *
+	 * We make the assumption here that if the bottom and top
+	 * of the range we want are mapped then it's mapped to the
+	 * same virt address (and this is contiguous).
+	 *  -- Cort
+	 */
+
+	if (mem_init_done) {
+		struct vm_struct *area;
+		area = get_vm_area(size, VM_IOREMAP);
+		if (area == NULL)
+			return NULL;
+		v = VMALLOC_VMADDR(area->addr);
+	} else {
+		v = (ioremap_bot -= size);
+	}
+
+	if ((flags & _PAGE_PRESENT) == 0)
+		flags |= _PAGE_KERNEL;
+	if (flags & _PAGE_NO_CACHE)
+		flags |= _PAGE_GUARDED;
+
+	err = 0;
+	for (i = 0; i < size && err == 0; i += PAGE_SIZE)
+		err = map_page(v + i, p + i, flags);
+	if (err) {
+		if (mem_init_done)
+			vfree((void *)v);
+		return NULL;
+	}
+
+	return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK));
+}
+
+void __iomem *ioremap(phys_addr_t addr, unsigned long size)
+{
+	return __ioremap(addr, size, _PAGE_NO_CACHE);
+}
+EXPORT_SYMBOL(ioremap);
+
+void iounmap(void *addr)
+{
+	if (addr > high_memory && (unsigned long) addr < ioremap_bot)
+		vfree((void *) (PAGE_MASK & (unsigned long) addr));
+}
+EXPORT_SYMBOL(iounmap);
+
+
+int map_page(unsigned long va, phys_addr_t pa, int flags)
+{
+	pmd_t *pd;
+	pte_t *pg;
+	int err = -ENOMEM;
+	/* spin_lock(&init_mm.page_table_lock); */
+	/* Use upper 10 bits of VA to index the first level map */
+	pd = pmd_offset(pgd_offset_k(va), va);
+	/* Use middle 10 bits of VA to index the second-level map */
+	pg = pte_alloc_kernel(pd, va); /* from powerpc - pgtable.c */
+	/* pg = pte_alloc_kernel(&init_mm, pd, va); */
+
+	if (pg != NULL) {
+		err = 0;
+		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT,
+				__pgprot(flags)));
+		if (mem_init_done)
+			flush_HPTE(0, va, pmd_val(*pd));
+			/* flush_HPTE(0, va, pg); */
+
+	}
+	/* spin_unlock(&init_mm.page_table_lock); */
+	return err;
+}
+
+void __init adjust_total_lowmem(void)
+{
+/* TBD */
+#if 0
+	unsigned long max_low_mem = MAX_LOW_MEM;
+
+	if (total_lowmem > max_low_mem) {
+		total_lowmem = max_low_mem;
+#ifndef CONFIG_HIGHMEM
+		printk(KERN_INFO "Warning, memory limited to %ld Mb, use "
+				"CONFIG_HIGHMEM to reach %ld Mb\n",
+				max_low_mem >> 20, total_memory >> 20);
+		total_memory = total_lowmem;
+#endif /* CONFIG_HIGHMEM */
+	}
+#endif
+}
+
+static void show_tmem(unsigned long tmem)
+{
+	volatile unsigned long a;
+	a = a + tmem;
+}
+
+/*
+ * Map in all of physical memory starting at CONFIG_KERNEL_START.
+ */
+void __init mapin_ram(void)
+{
+	unsigned long v, p, s, f;
+
+	v = CONFIG_KERNEL_START;
+	p = memory_start;
+	show_tmem(memory_size);
+	for (s = 0; s < memory_size; s += PAGE_SIZE) {
+		f = _PAGE_PRESENT | _PAGE_ACCESSED |
+				_PAGE_SHARED | _PAGE_HWEXEC;
+		if ((char *) v < _stext || (char *) v >= _etext)
+			f |= _PAGE_WRENABLE;
+		else
+			/* On the MicroBlaze, no user access
+			   forces R/W kernel access */
+			f |= _PAGE_USER;
+		map_page(v, p, f);
+		v += PAGE_SIZE;
+		p += PAGE_SIZE;
+	}
+}
+
+/* is x a power of 2? */
+#define is_power_of_2(x)	((x) != 0 && (((x) & ((x) - 1)) == 0))
+
+/*
+ * Set up a mapping for a block of I/O.
+ * virt, phys, size must all be page-aligned.
+ * This should only be called before ioremap is called.
+ */
+void __init io_block_mapping(unsigned long virt, phys_addr_t phys,
+			     unsigned int size, int flags)
+{
+	int i;
+
+	if (virt > CONFIG_KERNEL_START && virt < ioremap_bot)
+		ioremap_bot = ioremap_base = virt;
+
+	/* Put it in the page tables. */
+	for (i = 0; i < size; i += PAGE_SIZE)
+		map_page(virt + i, phys + i, flags);
+}
+
+/* Scan the real Linux page tables and return a PTE pointer for
+ * a virtual address in a context.
+ * Returns true (1) if PTE was found, zero otherwise.  The pointer to
+ * the PTE pointer is unmodified if PTE is not found.
+ */
+static int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep)
+{
+	pgd_t	*pgd;
+	pmd_t	*pmd;
+	pte_t	*pte;
+	int     retval = 0;
+
+	pgd = pgd_offset(mm, addr & PAGE_MASK);
+	if (pgd) {
+		pmd = pmd_offset(pgd, addr & PAGE_MASK);
+		if (pmd_present(*pmd)) {
+			pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
+			if (pte) {
+				retval = 1;
+				*ptep = pte;
+			}
+		}
+	}
+	return retval;
+}
+
+/* Find physical address for this virtual address.  Normally used by
+ * I/O functions, but anyone can call it.
+ */
+unsigned long iopa(unsigned long addr)
+{
+	unsigned long pa;
+
+	pte_t *pte;
+	struct mm_struct *mm;
+
+	/* Allow mapping of user addresses (within the thread)
+	 * for DMA if necessary.
+	 */
+	if (addr < TASK_SIZE)
+		mm = current->mm;
+	else
+		mm = &init_mm;
+
+	pa = 0;
+	if (get_pteptr(mm, addr, &pte))
+		pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK);
+
+	return pa;
+}
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 09b1287..25f3b0a 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -72,6 +72,7 @@
 	select IRQ_CPU
 	select IRQ_GT641XX
 	select PCI_GT64XXX_PCI0
+	select PCI
 	select SYS_HAS_CPU_NEVADA
 	select SYS_HAS_EARLY_PRINTK
 	select SYS_SUPPORTS_32BIT_KERNEL
@@ -593,7 +594,7 @@
 	  board, which is based on GT64120 bridge chip.
 
 config CAVIUM_OCTEON_SIMULATOR
-	bool "Support for the Cavium Networks Octeon Simulator"
+	bool "Cavium Networks Octeon Simulator"
 	select CEVT_R4K
 	select 64BIT_PHYS_ADDR
 	select DMA_COHERENT
@@ -607,7 +608,7 @@
 	  hardware.
 
 config CAVIUM_OCTEON_REFERENCE_BOARD
-	bool "Support for the Cavium Networks Octeon reference board"
+	bool "Cavium Networks Octeon reference board"
 	select CEVT_R4K
 	select 64BIT_PHYS_ADDR
 	select DMA_COHERENT
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 1c19af8..d3a0c81 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -177,7 +177,7 @@
 }
 
 #ifdef CONFIG_SMP
-static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest)
+static int octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest)
 {
 	int cpu;
 	int bit = irq - OCTEON_IRQ_WORKQ0;	/* Bit 0-63 of EN0 */
@@ -199,6 +199,8 @@
 	 */
 	cvmx_read_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2));
 	write_unlock(&octeon_irq_ciu0_rwlock);
+
+	return 0;
 }
 #endif
 
@@ -292,7 +294,7 @@
 }
 
 #ifdef CONFIG_SMP
-static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest)
+static int octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest)
 {
 	int cpu;
 	int bit = irq - OCTEON_IRQ_WDOG0;	/* Bit 0-63 of EN1 */
@@ -315,6 +317,8 @@
 	 */
 	cvmx_read_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1));
 	write_unlock(&octeon_irq_ciu1_rwlock);
+
+	return 0;
 }
 #endif
 
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index 744cd8f..1260443 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -39,8 +39,8 @@
 #define MIPS_CACHE_PINDEX	0x00000020	/* Physically indexed cache */
 
 struct cpuinfo_mips {
-	unsigned long		udelay_val;
-	unsigned long		asid_cache;
+	unsigned int		udelay_val;
+	unsigned int		asid_cache;
 
 	/*
 	 * Capability and feature descriptor structure for MIPS CPU
diff --git a/arch/mips/include/asm/delay.h b/arch/mips/include/asm/delay.h
index b0bccd2..a07e51b 100644
--- a/arch/mips/include/asm/delay.h
+++ b/arch/mips/include/asm/delay.h
@@ -11,94 +11,12 @@
 #ifndef _ASM_DELAY_H
 #define _ASM_DELAY_H
 
-#include <linux/param.h>
-#include <linux/smp.h>
+extern void __delay(unsigned int loops);
+extern void __ndelay(unsigned int ns);
+extern void __udelay(unsigned int us);
 
-#include <asm/compiler.h>
-#include <asm/war.h>
-
-static inline void __delay(unsigned long loops)
-{
-	if (sizeof(long) == 4)
-		__asm__ __volatile__ (
-		"	.set	noreorder				\n"
-		"	.align	3					\n"
-		"1:	bnez	%0, 1b					\n"
-		"	subu	%0, 1					\n"
-		"	.set	reorder					\n"
-		: "=r" (loops)
-		: "0" (loops));
-	else if (sizeof(long) == 8 && !DADDI_WAR)
-		__asm__ __volatile__ (
-		"	.set	noreorder				\n"
-		"	.align	3					\n"
-		"1:	bnez	%0, 1b					\n"
-		"	dsubu	%0, 1					\n"
-		"	.set	reorder					\n"
-		: "=r" (loops)
-		: "0" (loops));
-	else if (sizeof(long) == 8 && DADDI_WAR)
-		__asm__ __volatile__ (
-		"	.set	noreorder				\n"
-		"	.align	3					\n"
-		"1:	bnez	%0, 1b					\n"
-		"	dsubu	%0, %2					\n"
-		"	.set	reorder					\n"
-		: "=r" (loops)
-		: "0" (loops), "r" (1));
-}
-
-
-/*
- * Division by multiplication: you don't have to worry about
- * loss of precision.
- *
- * Use only for very small delays ( < 1 msec).  Should probably use a
- * lookup table, really, as the multiplications take much too long with
- * short delays.  This is a "reasonable" implementation, though (and the
- * first constant multiplications gets optimized away if the delay is
- * a constant)
- */
-
-static inline void __udelay(unsigned long usecs, unsigned long lpj)
-{
-	unsigned long hi, lo;
-
-	/*
-	 * The rates of 128 is rounded wrongly by the catchall case
-	 * for 64-bit.  Excessive precission?  Probably ...
-	 */
-#if defined(CONFIG_64BIT) && (HZ == 128)
-	usecs *= 0x0008637bd05af6c7UL;		/* 2**64 / (1000000 / HZ) */
-#elif defined(CONFIG_64BIT)
-	usecs *= (0x8000000000000000UL / (500000 / HZ));
-#else /* 32-bit junk follows here */
-	usecs *= (unsigned long) (((0x8000000000000000ULL / (500000 / HZ)) +
-	                           0x80000000ULL) >> 32);
-#endif
-
-	if (sizeof(long) == 4)
-		__asm__("multu\t%2, %3"
-		: "=h" (usecs), "=l" (lo)
-		: "r" (usecs), "r" (lpj)
-		: GCC_REG_ACCUM);
-	else if (sizeof(long) == 8 && !R4000_WAR)
-		__asm__("dmultu\t%2, %3"
-		: "=h" (usecs), "=l" (lo)
-		: "r" (usecs), "r" (lpj)
-		: GCC_REG_ACCUM);
-	else if (sizeof(long) == 8 && R4000_WAR)
-		__asm__("dmultu\t%3, %4\n\tmfhi\t%0"
-		: "=r" (usecs), "=h" (hi), "=l" (lo)
-		: "r" (usecs), "r" (lpj)
-		: GCC_REG_ACCUM);
-
-	__delay(usecs);
-}
-
-#define __udelay_val cpu_data[raw_smp_processor_id()].udelay_val
-
-#define udelay(usecs) __udelay((usecs), __udelay_val)
+#define ndelay(ns) __udelay(ns)
+#define udelay(us) __udelay(us)
 
 /* make sure "usecs *= ..." in udelay do not overflow. */
 #if HZ >= 1000
diff --git a/arch/mips/include/asm/ioctl.h b/arch/mips/include/asm/ioctl.h
index 85067e2..9161634 100644
--- a/arch/mips/include/asm/ioctl.h
+++ b/arch/mips/include/asm/ioctl.h
@@ -60,12 +60,16 @@
 	 ((nr)   << _IOC_NRSHIFT) | \
 	 ((size) << _IOC_SIZESHIFT))
 
+#ifdef __KERNEL__
 /* provoke compile error for invalid uses of size argument */
 extern unsigned int __invalid_size_argument_for_IOC;
 #define _IOC_TYPECHECK(t) \
 	((sizeof(t) == sizeof(t[1]) && \
 	  sizeof(t) < (1 << _IOC_SIZEBITS)) ? \
 	  sizeof(t) : __invalid_size_argument_for_IOC)
+#else
+#define _IOC_TYPECHECK(t)	(sizeof(t))
+#endif
 
 /* used to create numbers */
 #define _IO(type, nr)		_IOC(_IOC_NONE, (type), (nr), 0)
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index 3214ade..4f1eed1 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -49,7 +49,7 @@
 #ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
 #include <linux/cpumask.h>
 
-extern void plat_set_irq_affinity(unsigned int irq,
+extern int plat_set_irq_affinity(unsigned int irq,
 				  const struct cpumask *affinity);
 extern void smtc_forward_irq(unsigned int irq);
 
diff --git a/arch/mips/include/asm/suspend.h b/arch/mips/include/asm/suspend.h
deleted file mode 100644
index 2562f8f..0000000
--- a/arch/mips/include/asm/suspend.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_SUSPEND_H
-#define __ASM_SUSPEND_H
-
-/* Somewhen...  Maybe :-)  */
-
-#endif /* __ASM_SUSPEND_H */
diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
index 87deb8f..3f43c2e 100644
--- a/arch/mips/kernel/irq-gic.c
+++ b/arch/mips/kernel/irq-gic.c
@@ -155,7 +155,7 @@
 
 static DEFINE_SPINLOCK(gic_lock);
 
-static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+static int gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
 	cpumask_t	tmp = CPU_MASK_NONE;
 	unsigned long	flags;
@@ -166,7 +166,7 @@
 
 	cpumask_and(&tmp, cpumask, cpu_online_mask);
 	if (cpus_empty(tmp))
-		return;
+		return -1;
 
 	/* Assumption : cpumask refers to a single CPU */
 	spin_lock_irqsave(&gic_lock, flags);
@@ -190,6 +190,7 @@
 	cpumask_copy(irq_desc[irq].affinity, cpumask);
 	spin_unlock_irqrestore(&gic_lock, flags);
 
+	return 0;
 }
 #endif
 
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 1f60e27..3e9100d 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -68,8 +68,6 @@
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
index 26760ca..e0a4ac1 100644
--- a/arch/mips/kernel/proc.c
+++ b/arch/mips/kernel/proc.c
@@ -42,7 +42,7 @@
 	seq_printf(m, fmt, __cpu_name[n],
 	                           (version >> 4) & 0x0f, version & 0x0f,
 	                           (fp_vers >> 4) & 0x0f, fp_vers & 0x0f);
-	seq_printf(m, "BogoMIPS\t\t: %lu.%02lu\n",
+	seq_printf(m, "BogoMIPS\t\t: %u.%02u\n",
 	              cpu_data[n].udelay_val / (500000/HZ),
 	              (cpu_data[n].udelay_val / (5000/HZ)) % 100);
 	seq_printf(m, "wait instruction\t: %s\n", cpu_wait ? "yes" : "no");
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
index c13c7ad..2adead5 100644
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -2,8 +2,8 @@
 # Makefile for MIPS-specific library files..
 #
 
-lib-y	+= csum_partial.o memcpy.o memcpy-inatomic.o memset.o strlen_user.o \
-	   strncpy_user.o strnlen_user.o uncached.o
+lib-y	+= csum_partial.o delay.o memcpy.o memcpy-inatomic.o memset.o \
+	   strlen_user.o strncpy_user.o strnlen_user.o uncached.o
 
 obj-y			+= iomap.o
 obj-$(CONFIG_PCI)	+= iomap-pci.o
diff --git a/arch/mips/lib/delay.c b/arch/mips/lib/delay.c
new file mode 100644
index 0000000..f69c6b5
--- /dev/null
+++ b/arch/mips/lib/delay.c
@@ -0,0 +1,56 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1994 by Waldorf Electronics
+ * Copyright (C) 1995 - 2000, 01, 03 by Ralf Baechle
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2007  Maciej W. Rozycki
+ */
+#include <linux/module.h>
+#include <linux/param.h>
+#include <linux/smp.h>
+
+#include <asm/compiler.h>
+#include <asm/war.h>
+
+inline void __delay(unsigned int loops)
+{
+	__asm__ __volatile__ (
+	"	.set	noreorder				\n"
+	"	.align	3					\n"
+	"1:	bnez	%0, 1b					\n"
+	"	subu	%0, 1					\n"
+	"	.set	reorder					\n"
+	: "=r" (loops)
+	: "0" (loops));
+}
+EXPORT_SYMBOL(__delay);
+
+/*
+ * Division by multiplication: you don't have to worry about
+ * loss of precision.
+ *
+ * Use only for very small delays ( < 1 msec).  Should probably use a
+ * lookup table, really, as the multiplications take much too long with
+ * short delays.  This is a "reasonable" implementation, though (and the
+ * first constant multiplications gets optimized away if the delay is
+ * a constant)
+ */
+
+void __udelay(unsigned long us)
+{
+	unsigned int lpj = current_cpu_data.udelay_val;
+
+	__delay((us * 0x000010c7 * HZ * lpj) >> 32);
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long ns)
+{
+	unsigned int lpj = current_cpu_data.udelay_val;
+
+	__delay((us * 0x00000005 * HZ * lpj) >> 32);
+}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index 5ba3188..499ffe5 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c
@@ -114,7 +114,7 @@
  */
 
 
-void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
+int plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 {
 	cpumask_t tmask;
 	int cpu = 0;
@@ -156,5 +156,7 @@
 
 	/* Do any generic SMTC IRQ affinity setup */
 	smtc_set_irq_affinity(irq, tmask);
+
+	return 0;
 }
 #endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */
diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c
index c147c4b..690de06 100644
--- a/arch/mips/sibyte/bcm1480/irq.c
+++ b/arch/mips/sibyte/bcm1480/irq.c
@@ -50,7 +50,7 @@
 static void disable_bcm1480_irq(unsigned int irq);
 static void ack_bcm1480_irq(unsigned int irq);
 #ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
+static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
 #endif
 
 #ifdef CONFIG_PCI
@@ -109,7 +109,7 @@
 }
 
 #ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	int i = 0, old_cpu, cpu, int_on, k;
 	u64 cur_ints;
@@ -118,7 +118,7 @@
 
 	if (cpumask_weight(mask) != 1) {
 		printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
-		return;
+		return -1;
 	}
 	i = cpumask_first(mask);
 
@@ -152,6 +152,8 @@
 		}
 	}
 	spin_unlock_irqrestore(&bcm1480_imr_lock, flags);
+
+	return 0;
 }
 #endif
 
diff --git a/arch/mips/sibyte/cfe/setup.c b/arch/mips/sibyte/cfe/setup.c
index 3de30f7..eb5396c 100644
--- a/arch/mips/sibyte/cfe/setup.c
+++ b/arch/mips/sibyte/cfe/setup.c
@@ -288,13 +288,7 @@
 	 */
 	cfe_cons_handle = cfe_getstdhandle(CFE_STDHANDLE_CONSOLE);
 	if (cfe_getenv("LINUX_CMDLINE", arcs_cmdline, CL_SIZE) < 0) {
-		if (argc < 0) {
-			/*
-			 * It's OK for direct boot to not provide a
-			 *  command line
-			 */
-			strcpy(arcs_cmdline, "root=/dev/ram0 ");
-		} else {
+		if (argc >= 0) {
 			/* The loader should have set the command line */
 			/* too early for panic to do any good */
 			printk("LINUX_CMDLINE not defined in cfe.");
diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c
index 38cb998..409dec7 100644
--- a/arch/mips/sibyte/sb1250/irq.c
+++ b/arch/mips/sibyte/sb1250/irq.c
@@ -50,7 +50,7 @@
 static void disable_sb1250_irq(unsigned int irq);
 static void ack_sb1250_irq(unsigned int irq);
 #ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
+static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
 #endif
 
 #ifdef CONFIG_SIBYTE_HAS_LDT
@@ -103,7 +103,7 @@
 }
 
 #ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	int i = 0, old_cpu, cpu, int_on;
 	u64 cur_ints;
@@ -113,7 +113,7 @@
 
 	if (cpumask_weight(mask) > 1) {
 		printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
-		return;
+		return -1;
 	}
 
 	/* Convert logical CPU to physical CPU */
@@ -143,6 +143,8 @@
 					R_IMR_INTERRUPT_MASK));
 	}
 	spin_unlock_irqrestore(&sb1250_imr_lock, flags);
+
+	return 0;
 }
 #endif
 
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 3559267..89faaca 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -8,6 +8,7 @@
 config MN10300
 	def_bool y
 	select HAVE_OPROFILE
+	select HAVE_ARCH_TRACEHOOK
 
 config AM33
 	def_bool y
diff --git a/arch/mn10300/include/asm/elf.h b/arch/mn10300/include/asm/elf.h
index bf09f8b..4910546 100644
--- a/arch/mn10300/include/asm/elf.h
+++ b/arch/mn10300/include/asm/elf.h
@@ -34,7 +34,7 @@
  */
 typedef unsigned long elf_greg_t;
 
-#define ELF_NGREG (sizeof (struct pt_regs) / sizeof(elf_greg_t))
+#define ELF_NGREG ((sizeof(struct pt_regs) / sizeof(elf_greg_t)) - 1)
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
 #define ELF_NFPREG 32
@@ -76,6 +76,7 @@
 } while (0)
 
 #define USE_ELF_CORE_DUMP
+#define CORE_DUMP_USE_REGSET
 #define ELF_EXEC_PAGESIZE	4096
 
 /*
diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h
index 7323927..f7d4b0d 100644
--- a/arch/mn10300/include/asm/processor.h
+++ b/arch/mn10300/include/asm/processor.h
@@ -143,13 +143,7 @@
 
 unsigned long get_wchan(struct task_struct *p);
 
-#define task_pt_regs(task)						\
-({									\
-       struct pt_regs *__regs__;					\
-       __regs__ = (struct pt_regs *) (KSTK_TOP(task_stack_page(task)) - 8); \
-       __regs__ - 1;							\
-})
-
+#define task_pt_regs(task) ((task)->thread.uregs)
 #define KSTK_EIP(task) (task_pt_regs(task)->pc)
 #define KSTK_ESP(task) (task_pt_regs(task)->sp)
 
diff --git a/arch/mn10300/include/asm/ptrace.h b/arch/mn10300/include/asm/ptrace.h
index 7b06cc6..921942e 100644
--- a/arch/mn10300/include/asm/ptrace.h
+++ b/arch/mn10300/include/asm/ptrace.h
@@ -91,9 +91,17 @@
 #if defined(__KERNEL__)
 
 #if !defined(__ASSEMBLY__)
+struct task_struct;
+
 #define user_mode(regs)			(((regs)->epsw & EPSW_nSL) == EPSW_nSL)
 #define instruction_pointer(regs)	((regs)->pc)
+#define user_stack_pointer(regs)	((regs)->sp)
 extern void show_regs(struct pt_regs *);
+
+#define arch_has_single_step()	(1)
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
+
 #endif  /*  !__ASSEMBLY  */
 
 #define profile_pc(regs) ((regs)->pc)
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index 3dc3e46..7408a27 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -76,7 +76,7 @@
 	cmp	nr_syscalls,d0
 	bcc	syscall_badsys
 	btst	_TIF_SYSCALL_TRACE,(TI_flags,a2)
-	bne	syscall_trace_entry
+	bne	syscall_entry_trace
 syscall_call:
 	add	d0,d0,a1
 	add	a1,a1
@@ -104,11 +104,10 @@
 syscall_exit_work:
 	btst	_TIF_SYSCALL_TRACE,d2
 	beq	work_pending
-	__sti				# could let do_syscall_trace() call
+	__sti				# could let syscall_trace_exit() call
 					# schedule() instead
 	mov	fp,d0
-	mov	1,d1
-	call	do_syscall_trace[],0	# do_syscall_trace(regs,entryexit)
+	call	syscall_trace_exit[],0	# do_syscall_trace(regs)
 	jmp	resume_userspace
 
 	ALIGN
@@ -138,13 +137,11 @@
 	jmp	resume_userspace
 
 	# perform syscall entry tracing
-syscall_trace_entry:
+syscall_entry_trace:
 	mov	-ENOSYS,d0
 	mov	d0,(REG_D0,fp)
 	mov	fp,d0
-	clr	d1
-	call	do_syscall_trace[],0
-	mov	(REG_ORIG_D0,fp),d0
+	call	syscall_trace_entry[],0	# returns the syscall number to actually use
 	mov	(REG_D1,fp),d1
 	cmp	nr_syscalls,d0
 	bcs	syscall_call
diff --git a/arch/mn10300/kernel/module.c b/arch/mn10300/kernel/module.c
index 6b287f2..4fa0e36 100644
--- a/arch/mn10300/kernel/module.c
+++ b/arch/mn10300/kernel/module.c
@@ -48,8 +48,6 @@
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-	 * table entries. */
 }
 
 /*
diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c
index d6d6cdc..e143339 100644
--- a/arch/mn10300/kernel/ptrace.c
+++ b/arch/mn10300/kernel/ptrace.c
@@ -17,6 +17,9 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/tracehook.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -64,12 +67,6 @@
 		((unsigned long) task->thread.uregs + offset);
 }
 
-/*
- * this routine will put a word on the processes privileged stack.
- * the offset is how far from the base addr as stored in the TSS.
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */
 static inline
 int put_stack_long(struct task_struct *task, int offset, unsigned long data)
 {
@@ -80,44 +77,191 @@
 	return 0;
 }
 
-static inline unsigned long get_fpregs(struct fpu_state_struct *buf,
-				       struct task_struct *tsk)
+/*
+ * retrieve the contents of MN10300 userspace general registers
+ */
+static int genregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
 {
-	return __copy_to_user(buf, &tsk->thread.fpu_state,
-			      sizeof(struct fpu_state_struct));
-}
+	const struct pt_regs *regs = task_pt_regs(target);
+	int ret;
 
-static inline unsigned long set_fpregs(struct task_struct *tsk,
-				       struct fpu_state_struct *buf)
-{
-	return __copy_from_user(&tsk->thread.fpu_state, buf,
-				sizeof(struct fpu_state_struct));
-}
+	/* we need to skip regs->next */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  regs, 0, PT_ORIG_D0 * sizeof(long));
+	if (ret < 0)
+		return ret;
 
-static inline void fpsave_init(struct task_struct *task)
-{
-	memset(&task->thread.fpu_state, 0, sizeof(struct fpu_state_struct));
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &regs->orig_d0, PT_ORIG_D0 * sizeof(long),
+				  NR_PTREGS * sizeof(long));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					NR_PTREGS * sizeof(long), -1);
 }
 
 /*
- * make sure the single step bit is not set
+ * update the contents of the MN10300 userspace general registers
  */
-void ptrace_disable(struct task_struct *child)
+static int genregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
 {
-#ifndef CONFIG_MN10300_USING_JTAG
-	struct user *dummy = NULL;
-	long tmp;
+	struct pt_regs *regs = task_pt_regs(target);
+	unsigned long tmp;
+	int ret;
 
-	tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw);
-	tmp &= ~EPSW_T;
-	put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp);
-#endif
+	/* we need to skip regs->next */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 regs, 0, PT_ORIG_D0 * sizeof(long));
+	if (ret < 0)
+		return ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &regs->orig_d0, PT_ORIG_D0 * sizeof(long),
+				 PT_EPSW * sizeof(long));
+	if (ret < 0)
+		return ret;
+
+	/* we need to mask off changes to EPSW */
+	tmp = regs->epsw;
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &tmp, PT_EPSW * sizeof(long),
+				 PT_PC * sizeof(long));
+	tmp &= EPSW_FLAG_V | EPSW_FLAG_C | EPSW_FLAG_N | EPSW_FLAG_Z;
+	tmp |= regs->epsw & ~(EPSW_FLAG_V | EPSW_FLAG_C | EPSW_FLAG_N |
+			      EPSW_FLAG_Z);
+	regs->epsw = tmp;
+
+	if (ret < 0)
+		return ret;
+
+	/* and finally load the PC */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &regs->pc, PT_PC * sizeof(long),
+				 NR_PTREGS * sizeof(long));
+
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					 NR_PTREGS * sizeof(long), -1);
 }
 
 /*
- * set the single step bit
+ * retrieve the contents of MN10300 userspace FPU registers
  */
-void ptrace_enable(struct task_struct *child)
+static int fpuregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	const struct fpu_state_struct *fpregs = &target->thread.fpu_state;
+	int ret;
+
+	unlazy_fpu(target);
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  fpregs, 0, sizeof(*fpregs));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					sizeof(*fpregs), -1);
+}
+
+/*
+ * update the contents of the MN10300 userspace FPU registers
+ */
+static int fpuregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	struct fpu_state_struct fpu_state = target->thread.fpu_state;
+	int ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &fpu_state, 0, sizeof(fpu_state));
+	if (ret < 0)
+		return ret;
+
+	fpu_kill_state(target);
+	target->thread.fpu_state = fpu_state;
+	set_using_fpu(target);
+
+	return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					 sizeof(fpu_state), -1);
+}
+
+/*
+ * determine if the FPU registers have actually been used
+ */
+static int fpuregs_active(struct task_struct *target,
+			  const struct user_regset *regset)
+{
+	return is_using_fpu(target) ? regset->n : 0;
+}
+
+/*
+ * Define the register sets available on the MN10300 under Linux
+ */
+enum mn10300_regset {
+	REGSET_GENERAL,
+	REGSET_FPU,
+};
+
+static const struct user_regset mn10300_regsets[] = {
+	/*
+	 * General register format is:
+	 *	A3, A2, D3, D2, MCVF, MCRL, MCRH, MDRQ
+	 *	E1, E0, E7...E2, SP, LAR, LIR, MDR
+	 *	A1, A0, D1, D0, ORIG_D0, EPSW, PC
+	 */
+	[REGSET_GENERAL] = {
+		.core_note_type	= NT_PRSTATUS,
+		.n		= ELF_NGREG,
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= genregs_get,
+		.set		= genregs_set,
+	},
+	/*
+	 * FPU register format is:
+	 *	FS0-31, FPCR
+	 */
+	[REGSET_FPU] = {
+		.core_note_type	= NT_PRFPREG,
+		.n		= sizeof(struct fpu_state_struct) / sizeof(long),
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= fpuregs_get,
+		.set		= fpuregs_set,
+		.active		= fpuregs_active,
+	},
+};
+
+static const struct user_regset_view user_mn10300_native_view = {
+	.name		= "mn10300",
+	.e_machine	= EM_MN10300,
+	.regsets	= mn10300_regsets,
+	.n		= ARRAY_SIZE(mn10300_regsets),
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &user_mn10300_native_view;
+}
+
+/*
+ * set the single-step bit
+ */
+void user_enable_single_step(struct task_struct *child)
 {
 #ifndef CONFIG_MN10300_USING_JTAG
 	struct user *dummy = NULL;
@@ -130,44 +274,36 @@
 }
 
 /*
+ * make sure the single-step bit is not set
+ */
+void user_disable_single_step(struct task_struct *child)
+{
+#ifndef CONFIG_MN10300_USING_JTAG
+	struct user *dummy = NULL;
+	long tmp;
+
+	tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw);
+	tmp &= ~EPSW_T;
+	put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp);
+#endif
+}
+
+void ptrace_disable(struct task_struct *child)
+{
+	user_disable_single_step(child);
+}
+
+/*
  * handle the arch-specific side of process tracing
  */
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-	struct fpu_state_struct fpu_state;
-	int i, ret;
+	unsigned long tmp;
+	int ret;
 
 	switch (request) {
-	/* read the word at location addr. */
-	case PTRACE_PEEKTEXT: {
-		unsigned long tmp;
-		int copied;
-
-		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
-		ret = -EIO;
-		if (copied != sizeof(tmp))
-			break;
-		ret = put_user(tmp, (unsigned long *) data);
-		break;
-	}
-
-	/* read the word at location addr. */
-	case PTRACE_PEEKDATA: {
-		unsigned long tmp;
-		int copied;
-
-		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
-		ret = -EIO;
-		if (copied != sizeof(tmp))
-			break;
-		ret = put_user(tmp, (unsigned long *) data);
-		break;
-	}
-
 	/* read the word at location addr in the USER area. */
-	case PTRACE_PEEKUSR: {
-		unsigned long tmp;
-
+	case PTRACE_PEEKUSR:
 		ret = -EIO;
 		if ((addr & 3) || addr < 0 ||
 		    addr > sizeof(struct user) - 3)
@@ -179,17 +315,6 @@
 					     ptrace_regid_to_frame[addr]);
 		ret = put_user(tmp, (unsigned long *) data);
 		break;
-	}
-
-	/* write the word at location addr. */
-	case PTRACE_POKETEXT:
-	case PTRACE_POKEDATA:
-		if (access_process_vm(child, addr, &data, sizeof(data), 1) ==
-		    sizeof(data))
-			ret = 0;
-		else
-			ret = -EIO;
-		break;
 
 		/* write the word at location addr in the USER area */
 	case PTRACE_POKEUSR:
@@ -204,132 +329,32 @@
 					     data);
 		break;
 
-		/* continue and stop at next (return from) syscall */
-	case PTRACE_SYSCALL:
-		/* restart after signal. */
-	case PTRACE_CONT:
-		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
-			break;
-		if (request == PTRACE_SYSCALL)
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		else
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		child->exit_code = data;
-		ptrace_disable(child);
-		wake_up_process(child);
-		ret = 0;
-		break;
+	case PTRACE_GETREGS:	/* Get all integer regs from the child. */
+		return copy_regset_to_user(child, &user_mn10300_native_view,
+					   REGSET_GENERAL,
+					   0, NR_PTREGS * sizeof(long),
+					   (void __user *)data);
 
-		/*
-		 * make the child exit
-		 * - the best I can do is send it a sigkill
-		 * - perhaps it should be put in the status that it wants to
-		 *   exit
-		 */
-	case PTRACE_KILL:
-		ret = 0;
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			break;
-		child->exit_code = SIGKILL;
-		clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-		ptrace_disable(child);
-		wake_up_process(child);
-		break;
+	case PTRACE_SETREGS:	/* Set all integer regs in the child. */
+		return copy_regset_from_user(child, &user_mn10300_native_view,
+					     REGSET_GENERAL,
+					     0, NR_PTREGS * sizeof(long),
+					     (const void __user *)data);
 
-	case PTRACE_SINGLESTEP: /* set the trap flag. */
-#ifndef CONFIG_MN10300_USING_JTAG
-		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
-			break;
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		ptrace_enable(child);
-		child->exit_code = data;
-		wake_up_process(child);
-		ret = 0;
-#else
-		ret = -EINVAL;
-#endif
-		break;
+	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
+		return copy_regset_to_user(child, &user_mn10300_native_view,
+					   REGSET_FPU,
+					   0, sizeof(struct fpu_state_struct),
+					   (void __user *)data);
 
-	case PTRACE_DETACH:	/* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
-		break;
-
-		/* Get all gp regs from the child. */
-	case PTRACE_GETREGS: {
-		unsigned long tmp;
-
-		if (!access_ok(VERIFY_WRITE, (unsigned *) data, NR_PTREGS << 2)) {
-			ret = -EIO;
-			break;
-		}
-
-		for (i = 0; i < NR_PTREGS << 2; i += 4) {
-			tmp = get_stack_long(child, ptrace_regid_to_frame[i]);
-			__put_user(tmp, (unsigned long *) data);
-			data += sizeof(tmp);
-		}
-		ret = 0;
-		break;
-	}
-
-	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
-		unsigned long tmp;
-
-		if (!access_ok(VERIFY_READ, (unsigned long *)data,
-			       sizeof(struct pt_regs))) {
-			ret = -EIO;
-			break;
-		}
-
-		for (i = 0; i < NR_PTREGS << 2; i += 4) {
-			__get_user(tmp, (unsigned long *) data);
-			put_stack_long(child, ptrace_regid_to_frame[i], tmp);
-			data += sizeof(tmp);
-		}
-		ret = 0;
-		break;
-	}
-
-	case PTRACE_GETFPREGS: { /* Get the child FPU state. */
-		if (is_using_fpu(child)) {
-			unlazy_fpu(child);
-			fpu_state = child->thread.fpu_state;
-		} else {
-			memset(&fpu_state, 0, sizeof(fpu_state));
-		}
-
-		ret = -EIO;
-		if (copy_to_user((void *) data, &fpu_state,
-				 sizeof(fpu_state)) == 0)
-			ret = 0;
-		break;
-	}
-
-	case PTRACE_SETFPREGS: { /* Set the child FPU state. */
-		ret = -EFAULT;
-		if (copy_from_user(&fpu_state, (const void *) data,
-				   sizeof(fpu_state)) == 0) {
-			fpu_kill_state(child);
-			child->thread.fpu_state = fpu_state;
-			set_using_fpu(child);
-			ret = 0;
-		}
-		break;
-	}
-
-	case PTRACE_SETOPTIONS: {
-		if (data & PTRACE_O_TRACESYSGOOD)
-			child->ptrace |= PT_TRACESYSGOOD;
-		else
-			child->ptrace &= ~PT_TRACESYSGOOD;
-		ret = 0;
-		break;
-	}
+	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
+		return copy_regset_from_user(child, &user_mn10300_native_view,
+					     REGSET_FPU,
+					     0, sizeof(struct fpu_state_struct),
+					     (const void __user *)data);
 
 	default:
-		ret = -EIO;
+		ret = ptrace_request(child, request, addr, data);
 		break;
 	}
 
@@ -337,43 +362,26 @@
 }
 
 /*
- * notification of system call entry/exit
- * - triggered by current->work.syscall_trace
+ * handle tracing of system call entry
+ * - return the revised system call number or ULONG_MAX to cause ENOSYS
  */
-asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
+asmlinkage unsigned long syscall_trace_entry(struct pt_regs *regs)
 {
-#if 0
-	/* just in case... */
-	printk(KERN_DEBUG "[%d] syscall_%lu(%lx,%lx,%lx,%lx) = %lx\n",
-	       current->pid,
-	       regs->orig_d0,
-	       regs->a0,
-	       regs->d1,
-	       regs->a3,
-	       regs->a2,
-	       regs->d0);
-	return;
-#endif
+	if (tracehook_report_syscall_entry(regs))
+		/* tracing decided this syscall should not happen, so
+		 * We'll return a bogus call number to get an ENOSYS
+		 * error, but leave the original number in
+		 * regs->orig_d0
+		 */
+		return ULONG_MAX;
 
-	if (!test_thread_flag(TIF_SYSCALL_TRACE) &&
-	    !test_thread_flag(TIF_SINGLESTEP))
-		return;
-	if (!(current->ptrace & PT_PTRACED))
-		return;
+	return regs->orig_d0;
+}
 
-	/* the 0x80 provides a way for the tracing parent to distinguish
-	   between a syscall stop and SIGTRAP delivery */
-	ptrace_notify(SIGTRAP |
-		      ((current->ptrace & PT_TRACESYSGOOD) &&
-		       !test_thread_flag(TIF_SINGLESTEP) ? 0x80 : 0));
-
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
+/*
+ * handle tracing of system call exit
+ */
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+	tracehook_report_syscall_exit(regs, 0);
 }
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index 841ca99..9f7572a 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -23,6 +23,7 @@
 #include <linux/tty.h>
 #include <linux/personality.h>
 #include <linux/suspend.h>
+#include <linux/tracehook.h>
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
@@ -511,6 +512,9 @@
 			 * clear the TIF_RESTORE_SIGMASK flag */
 			if (test_thread_flag(TIF_RESTORE_SIGMASK))
 				clear_thread_flag(TIF_RESTORE_SIGMASK);
+
+			tracehook_signal_handler(signr, &info, &ka, regs,
+						 test_thread_flag(TIF_SINGLESTEP));
 		}
 
 		return;
@@ -561,4 +565,9 @@
 	/* deal with pending signal delivery */
 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
 		do_signal(regs);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(__frame);
+	}
 }
diff --git a/arch/mn10300/mm/tlb-mn10300.S b/arch/mn10300/mm/tlb-mn10300.S
index 7892080..7095147 100644
--- a/arch/mn10300/mm/tlb-mn10300.S
+++ b/arch/mn10300/mm/tlb-mn10300.S
@@ -165,24 +165,6 @@
 ENTRY(dtlb_aerror)
 	and	~EPSW_NMID,epsw
 	add	-4,sp
-	mov	d1,(sp)
-
-	movhu	(MMUFCR_DFC),d1			# is it the initial valid write
-						# to this page?
-	and	MMUFCR_xFC_INITWR,d1
- 	beq	dtlb_pagefault			# jump if not
-
-	mov	(DPTEL),d1			# set the dirty bit
-						# (don't replace with BSET!)
-	or	_PAGE_DIRTY,d1
-	mov	d1,(DPTEL)
-	mov	(sp),d1
-	add	4,sp
- 	rti
-
-	ALIGN
-dtlb_pagefault:
-	mov	(sp),d1
 	SAVE_ALL
 	add	-4,sp				# need to pass three params
 
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 4ea4229..8007f1e 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -130,15 +130,17 @@
 	return cpu_dest;
 }
 
-static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
+static int cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
 {
 	int cpu_dest;
 
 	cpu_dest = cpu_check_affinity(irq, dest);
 	if (cpu_dest < 0)
-		return;
+		return -1;
 
 	cpumask_copy(&irq_desc[irq].affinity, dest);
+
+	return 0;
 }
 #endif
 
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index ecd1c50..ef5caf2 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -267,8 +267,6 @@
 	mod->arch.section = NULL;
 
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 /* Additional bytes needed in front of individual sections */
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index b7e034b..5351237 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -131,5 +131,41 @@
  */
 struct irq_chip;
 
+#ifdef CONFIG_PERF_COUNTERS
+static inline unsigned long test_perf_counter_pending(void)
+{
+	unsigned long x;
+
+	asm volatile("lbz %0,%1(13)"
+		: "=r" (x)
+		: "i" (offsetof(struct paca_struct, perf_counter_pending)));
+	return x;
+}
+
+static inline void set_perf_counter_pending(void)
+{
+	asm volatile("stb %0,%1(13)" : :
+		"r" (1),
+		"i" (offsetof(struct paca_struct, perf_counter_pending)));
+}
+
+static inline void clear_perf_counter_pending(void)
+{
+	asm volatile("stb %0,%1(13)" : :
+		"r" (0),
+		"i" (offsetof(struct paca_struct, perf_counter_pending)));
+}
+
+#else
+
+static inline unsigned long test_perf_counter_pending(void)
+{
+	return 0;
+}
+
+static inline void set_perf_counter_pending(void) {}
+static inline void clear_perf_counter_pending(void) {}
+#endif /* CONFIG_PERF_COUNTERS */
+
 #endif	/* __KERNEL__ */
 #endif	/* _ASM_POWERPC_HW_IRQ_H */
diff --git a/arch/powerpc/include/asm/mpc52xx_psc.h b/arch/powerpc/include/asm/mpc52xx_psc.h
index a218da6..fb84120 100644
--- a/arch/powerpc/include/asm/mpc52xx_psc.h
+++ b/arch/powerpc/include/asm/mpc52xx_psc.h
@@ -28,6 +28,10 @@
 #define MPC52xx_PSC_MAXNUM	6
 
 /* Programmable Serial Controller (PSC) status register bits */
+#define MPC52xx_PSC_SR_UNEX_RX	0x0001
+#define MPC52xx_PSC_SR_DATA_VAL	0x0002
+#define MPC52xx_PSC_SR_DATA_OVR	0x0004
+#define MPC52xx_PSC_SR_CMDSEND	0x0008
 #define MPC52xx_PSC_SR_CDE	0x0080
 #define MPC52xx_PSC_SR_RXRDY	0x0100
 #define MPC52xx_PSC_SR_RXFULL	0x0200
@@ -61,6 +65,12 @@
 #define MPC52xx_PSC_RXTX_FIFO_EMPTY	0x0001
 
 /* PSC interrupt status/mask bits */
+#define MPC52xx_PSC_IMR_UNEX_RX_SLOT 0x0001
+#define MPC52xx_PSC_IMR_DATA_VALID	0x0002
+#define MPC52xx_PSC_IMR_DATA_OVR	0x0004
+#define MPC52xx_PSC_IMR_CMD_SEND	0x0008
+#define MPC52xx_PSC_IMR_ERROR		0x0040
+#define MPC52xx_PSC_IMR_DEOF		0x0080
 #define MPC52xx_PSC_IMR_TXRDY		0x0100
 #define MPC52xx_PSC_IMR_RXRDY		0x0200
 #define MPC52xx_PSC_IMR_DB		0x0400
@@ -117,6 +127,7 @@
 #define MPC52xx_PSC_SICR_SIM_FIR		(0x6 << 24)
 #define MPC52xx_PSC_SICR_SIM_CODEC_24		(0x7 << 24)
 #define MPC52xx_PSC_SICR_SIM_CODEC_32		(0xf << 24)
+#define MPC52xx_PSC_SICR_AWR			(1 << 30)
 #define MPC52xx_PSC_SICR_GENCLK			(1 << 23)
 #define MPC52xx_PSC_SICR_I2S			(1 << 22)
 #define MPC52xx_PSC_SICR_CLKPOL			(1 << 21)
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 082b3ae..6ef0557 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -99,6 +99,7 @@
 	u8 soft_enabled;		/* irq soft-enable flag */
 	u8 hard_enabled;		/* set if irqs are enabled in MSR */
 	u8 io_sync;			/* writel() needs spin_unlock sync */
+	u8 perf_counter_pending;	/* PM interrupt while soft-disabled */
 
 	/* Stuff for accurate time accounting */
 	u64 user_time;			/* accumulated usermode TB ticks */
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
new file mode 100644
index 0000000..cc7c887
--- /dev/null
+++ b/arch/powerpc/include/asm/perf_counter.h
@@ -0,0 +1,98 @@
+/*
+ * Performance counter support - PowerPC-specific definitions.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+
+#define MAX_HWCOUNTERS		8
+#define MAX_EVENT_ALTERNATIVES	8
+#define MAX_LIMITED_HWCOUNTERS	2
+
+/*
+ * This struct provides the constants and functions needed to
+ * describe the PMU on a particular POWER-family CPU.
+ */
+struct power_pmu {
+	int	n_counter;
+	int	max_alternatives;
+	u64	add_fields;
+	u64	test_adder;
+	int	(*compute_mmcr)(u64 events[], int n_ev,
+				unsigned int hwc[], u64 mmcr[]);
+	int	(*get_constraint)(u64 event, u64 *mskp, u64 *valp);
+	int	(*get_alternatives)(u64 event, unsigned int flags,
+				    u64 alt[]);
+	void	(*disable_pmc)(unsigned int pmc, u64 mmcr[]);
+	int	(*limited_pmc_event)(u64 event);
+	u32	flags;
+	int	n_generic;
+	int	*generic_events;
+	int	(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+			       [PERF_COUNT_HW_CACHE_OP_MAX]
+			       [PERF_COUNT_HW_CACHE_RESULT_MAX];
+};
+
+extern struct power_pmu *ppmu;
+
+/*
+ * Values for power_pmu.flags
+ */
+#define PPMU_LIMITED_PMC5_6	1	/* PMC5/6 have limited function */
+#define PPMU_ALT_SIPR		2	/* uses alternate posn for SIPR/HV */
+
+/*
+ * Values for flags to get_alternatives()
+ */
+#define PPMU_LIMITED_PMC_OK	1	/* can put this on a limited PMC */
+#define PPMU_LIMITED_PMC_REQD	2	/* have to put this on a limited PMC */
+#define PPMU_ONLY_COUNT_RUN	4	/* only counting in run state */
+
+struct pt_regs;
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs)	perf_misc_flags(regs)
+
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+
+/*
+ * The power_pmu.get_constraint function returns a 64-bit value and
+ * a 64-bit mask that express the constraints between this event and
+ * other events.
+ *
+ * The value and mask are divided up into (non-overlapping) bitfields
+ * of three different types:
+ *
+ * Select field: this expresses the constraint that some set of bits
+ * in MMCR* needs to be set to a specific value for this event.  For a
+ * select field, the mask contains 1s in every bit of the field, and
+ * the value contains a unique value for each possible setting of the
+ * MMCR* bits.  The constraint checking code will ensure that two events
+ * that set the same field in their masks have the same value in their
+ * value dwords.
+ *
+ * Add field: this expresses the constraint that there can be at most
+ * N events in a particular class.  A field of k bits can be used for
+ * N <= 2^(k-1) - 1.  The mask has the most significant bit of the field
+ * set (and the other bits 0), and the value has only the least significant
+ * bit of the field set.  In addition, the 'add_fields' and 'test_adder'
+ * in the struct power_pmu for this processor come into play.  The
+ * add_fields value contains 1 in the LSB of the field, and the
+ * test_adder contains 2^(k-1) - 1 - N in the field.
+ *
+ * NAND field: this expresses the constraint that you may not have events
+ * in all of a set of classes.  (For example, on PPC970, you can't select
+ * events from the FPU, ISU and IDU simultaneously, although any two are
+ * possible.)  For N classes, the field is N+1 bits wide, and each class
+ * is assigned one bit from the least-significant N bits.  The mask has
+ * only the most-significant bit set, and the value has only the bit
+ * for the event's class set.  The test_adder has the least significant
+ * bit set in the field.
+ *
+ * If an event is not subject to the constraint expressed by a particular
+ * field, then it will have 0 in both the mask and value for that field.
+ */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e8018d5..fb359b0 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -492,11 +492,13 @@
 #define   MMCR0_FCHV	0x00000001UL /* freeze conditions in hypervisor mode */
 #define SPRN_MMCR1	798
 #define SPRN_MMCRA	0x312
+#define   MMCRA_SDSYNC	0x80000000UL /* SDAR synced with SIAR */
 #define   MMCRA_SIHV	0x10000000UL /* state of MSR HV when SIAR set */
 #define   MMCRA_SIPR	0x08000000UL /* state of MSR PR when SIAR set */
 #define   MMCRA_SLOT	0x07000000UL /* SLOT bits (37-39) */
 #define   MMCRA_SLOT_SHIFT	24
 #define   MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */
+#define   POWER6_MMCRA_SDSYNC 0x0000080000000000ULL	/* SDAR/SIAR synced */
 #define   POWER6_MMCRA_SIHV   0x0000040000000000ULL
 #define   POWER6_MMCRA_SIPR   0x0000020000000000ULL
 #define   POWER6_MMCRA_THRM	0x00000020UL
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index d98a30d..a0b92de 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -322,6 +322,6 @@
 SYSCALL_SPU(dup3)
 SYSCALL_SPU(pipe2)
 SYSCALL(inotify_init1)
-SYSCALL(ni_syscall)
+SYSCALL_SPU(perf_counter_open)
 COMPAT_SYS_SPU(preadv)
 COMPAT_SYS_SPU(pwritev)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 3f06f8e..4badac2 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -341,6 +341,7 @@
 #define __NR_dup3		316
 #define __NR_pipe2		317
 #define __NR_inotify_init1	318
+#define __NR_perf_counter_open	319
 #define __NR_preadv		320
 #define __NR_pwritev		321
 
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 71901fb..a2c6834 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -94,6 +94,9 @@
 
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
+obj-$(CONFIG_PERF_COUNTERS)	+= perf_counter.o power4-pmu.o ppc970-pmu.o \
+				   power5-pmu.o power5+-pmu.o power6-pmu.o \
+				   power7-pmu.o
 
 obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 1e40bc0..e981d1c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -131,6 +131,7 @@
 	DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
 	DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
 	DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
+	DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending));
 	DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
 	DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
 	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index abfc323..43e0734 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -526,6 +526,15 @@
 2:
 	TRACE_AND_RESTORE_IRQ(r5);
 
+#ifdef CONFIG_PERF_COUNTERS
+	/* check paca->perf_counter_pending if we're enabling ints */
+	lbz	r3,PACAPERFPEND(r13)
+	and.	r3,r3,r5
+	beq	27f
+	bl	.perf_counter_do_pending
+27:
+#endif /* CONFIG_PERF_COUNTERS */
+
 	/* extract EE bit and use it to restore paca->hard_enabled */
 	ld	r3,_MSR(r1)
 	rldicl	r4,r3,49,63		/* r0 = (r3 >> 15) & 1 */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 8c1a496..844d3f8 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -53,6 +53,7 @@
 #include <linux/bootmem.h>
 #include <linux/pci.h>
 #include <linux/debugfs.h>
+#include <linux/perf_counter.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -135,6 +136,11 @@
 			iseries_handle_interrupts();
 	}
 
+	if (test_perf_counter_pending()) {
+		clear_perf_counter_pending();
+		perf_counter_do_pending();
+	}
+
 	/*
 	 * if (get_paca()->hard_enabled) return;
 	 * But again we need to take care that gcc gets hard_enabled directly
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 43e7e3a..477c663 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -43,8 +43,6 @@
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
new file mode 100644
index 0000000..bb20238
--- /dev/null
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -0,0 +1,1263 @@
+/*
+ * Performance counter support - powerpc architecture code
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_counter.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <asm/reg.h>
+#include <asm/pmc.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/ptrace.h>
+
+struct cpu_hw_counters {
+	int n_counters;
+	int n_percpu;
+	int disabled;
+	int n_added;
+	int n_limited;
+	u8  pmcs_enabled;
+	struct perf_counter *counter[MAX_HWCOUNTERS];
+	u64 events[MAX_HWCOUNTERS];
+	unsigned int flags[MAX_HWCOUNTERS];
+	u64 mmcr[3];
+	struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS];
+	u8  limited_hwidx[MAX_LIMITED_HWCOUNTERS];
+};
+DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
+
+struct power_pmu *ppmu;
+
+/*
+ * Normally, to ignore kernel events we set the FCS (freeze counters
+ * in supervisor mode) bit in MMCR0, but if the kernel runs with the
+ * hypervisor bit set in the MSR, or if we are running on a processor
+ * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
+ * then we need to use the FCHV bit to ignore kernel events.
+ */
+static unsigned int freeze_counters_kernel = MMCR0_FCS;
+
+static void perf_counter_interrupt(struct pt_regs *regs);
+
+void perf_counter_print_debug(void)
+{
+}
+
+/*
+ * Read one performance monitor counter (PMC).
+ */
+static unsigned long read_pmc(int idx)
+{
+	unsigned long val;
+
+	switch (idx) {
+	case 1:
+		val = mfspr(SPRN_PMC1);
+		break;
+	case 2:
+		val = mfspr(SPRN_PMC2);
+		break;
+	case 3:
+		val = mfspr(SPRN_PMC3);
+		break;
+	case 4:
+		val = mfspr(SPRN_PMC4);
+		break;
+	case 5:
+		val = mfspr(SPRN_PMC5);
+		break;
+	case 6:
+		val = mfspr(SPRN_PMC6);
+		break;
+	case 7:
+		val = mfspr(SPRN_PMC7);
+		break;
+	case 8:
+		val = mfspr(SPRN_PMC8);
+		break;
+	default:
+		printk(KERN_ERR "oops trying to read PMC%d\n", idx);
+		val = 0;
+	}
+	return val;
+}
+
+/*
+ * Write one PMC.
+ */
+static void write_pmc(int idx, unsigned long val)
+{
+	switch (idx) {
+	case 1:
+		mtspr(SPRN_PMC1, val);
+		break;
+	case 2:
+		mtspr(SPRN_PMC2, val);
+		break;
+	case 3:
+		mtspr(SPRN_PMC3, val);
+		break;
+	case 4:
+		mtspr(SPRN_PMC4, val);
+		break;
+	case 5:
+		mtspr(SPRN_PMC5, val);
+		break;
+	case 6:
+		mtspr(SPRN_PMC6, val);
+		break;
+	case 7:
+		mtspr(SPRN_PMC7, val);
+		break;
+	case 8:
+		mtspr(SPRN_PMC8, val);
+		break;
+	default:
+		printk(KERN_ERR "oops trying to write PMC%d\n", idx);
+	}
+}
+
+/*
+ * Check if a set of events can all go on the PMU at once.
+ * If they can't, this will look at alternative codes for the events
+ * and see if any combination of alternative codes is feasible.
+ * The feasible set is returned in event[].
+ */
+static int power_check_constraints(u64 event[], unsigned int cflags[],
+				   int n_ev)
+{
+	u64 mask, value, nv;
+	u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
+	u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
+	u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
+	u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
+	int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
+	int i, j;
+	u64 addf = ppmu->add_fields;
+	u64 tadd = ppmu->test_adder;
+
+	if (n_ev > ppmu->n_counter)
+		return -1;
+
+	/* First see if the events will go on as-is */
+	for (i = 0; i < n_ev; ++i) {
+		if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
+		    && !ppmu->limited_pmc_event(event[i])) {
+			ppmu->get_alternatives(event[i], cflags[i],
+					       alternatives[i]);
+			event[i] = alternatives[i][0];
+		}
+		if (ppmu->get_constraint(event[i], &amasks[i][0],
+					 &avalues[i][0]))
+			return -1;
+	}
+	value = mask = 0;
+	for (i = 0; i < n_ev; ++i) {
+		nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf);
+		if ((((nv + tadd) ^ value) & mask) != 0 ||
+		    (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0)
+			break;
+		value = nv;
+		mask |= amasks[i][0];
+	}
+	if (i == n_ev)
+		return 0;	/* all OK */
+
+	/* doesn't work, gather alternatives... */
+	if (!ppmu->get_alternatives)
+		return -1;
+	for (i = 0; i < n_ev; ++i) {
+		choice[i] = 0;
+		n_alt[i] = ppmu->get_alternatives(event[i], cflags[i],
+						  alternatives[i]);
+		for (j = 1; j < n_alt[i]; ++j)
+			ppmu->get_constraint(alternatives[i][j],
+					     &amasks[i][j], &avalues[i][j]);
+	}
+
+	/* enumerate all possibilities and see if any will work */
+	i = 0;
+	j = -1;
+	value = mask = nv = 0;
+	while (i < n_ev) {
+		if (j >= 0) {
+			/* we're backtracking, restore context */
+			value = svalues[i];
+			mask = smasks[i];
+			j = choice[i];
+		}
+		/*
+		 * See if any alternative k for event i,
+		 * where k > j, will satisfy the constraints.
+		 */
+		while (++j < n_alt[i]) {
+			nv = (value | avalues[i][j]) +
+				(value & avalues[i][j] & addf);
+			if ((((nv + tadd) ^ value) & mask) == 0 &&
+			    (((nv + tadd) ^ avalues[i][j])
+			     & amasks[i][j]) == 0)
+				break;
+		}
+		if (j >= n_alt[i]) {
+			/*
+			 * No feasible alternative, backtrack
+			 * to event i-1 and continue enumerating its
+			 * alternatives from where we got up to.
+			 */
+			if (--i < 0)
+				return -1;
+		} else {
+			/*
+			 * Found a feasible alternative for event i,
+			 * remember where we got up to with this event,
+			 * go on to the next event, and start with
+			 * the first alternative for it.
+			 */
+			choice[i] = j;
+			svalues[i] = value;
+			smasks[i] = mask;
+			value = nv;
+			mask |= amasks[i][j];
+			++i;
+			j = -1;
+		}
+	}
+
+	/* OK, we have a feasible combination, tell the caller the solution */
+	for (i = 0; i < n_ev; ++i)
+		event[i] = alternatives[i][choice[i]];
+	return 0;
+}
+
+/*
+ * Check if newly-added counters have consistent settings for
+ * exclude_{user,kernel,hv} with each other and any previously
+ * added counters.
+ */
+static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[],
+			  int n_prev, int n_new)
+{
+	int eu = 0, ek = 0, eh = 0;
+	int i, n, first;
+	struct perf_counter *counter;
+
+	n = n_prev + n_new;
+	if (n <= 1)
+		return 0;
+
+	first = 1;
+	for (i = 0; i < n; ++i) {
+		if (cflags[i] & PPMU_LIMITED_PMC_OK) {
+			cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
+			continue;
+		}
+		counter = ctrs[i];
+		if (first) {
+			eu = counter->attr.exclude_user;
+			ek = counter->attr.exclude_kernel;
+			eh = counter->attr.exclude_hv;
+			first = 0;
+		} else if (counter->attr.exclude_user != eu ||
+			   counter->attr.exclude_kernel != ek ||
+			   counter->attr.exclude_hv != eh) {
+			return -EAGAIN;
+		}
+	}
+
+	if (eu || ek || eh)
+		for (i = 0; i < n; ++i)
+			if (cflags[i] & PPMU_LIMITED_PMC_OK)
+				cflags[i] |= PPMU_LIMITED_PMC_REQD;
+
+	return 0;
+}
+
+static void power_pmu_read(struct perf_counter *counter)
+{
+	long val, delta, prev;
+
+	if (!counter->hw.idx)
+		return;
+	/*
+	 * Performance monitor interrupts come even when interrupts
+	 * are soft-disabled, as long as interrupts are hard-enabled.
+	 * Therefore we treat them like NMIs.
+	 */
+	do {
+		prev = atomic64_read(&counter->hw.prev_count);
+		barrier();
+		val = read_pmc(counter->hw.idx);
+	} while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
+
+	/* The counters are only 32 bits wide */
+	delta = (val - prev) & 0xfffffffful;
+	atomic64_add(delta, &counter->count);
+	atomic64_sub(delta, &counter->hw.period_left);
+}
+
+/*
+ * On some machines, PMC5 and PMC6 can't be written, don't respect
+ * the freeze conditions, and don't generate interrupts.  This tells
+ * us if `counter' is using such a PMC.
+ */
+static int is_limited_pmc(int pmcnum)
+{
+	return (ppmu->flags & PPMU_LIMITED_PMC5_6)
+		&& (pmcnum == 5 || pmcnum == 6);
+}
+
+static void freeze_limited_counters(struct cpu_hw_counters *cpuhw,
+				    unsigned long pmc5, unsigned long pmc6)
+{
+	struct perf_counter *counter;
+	u64 val, prev, delta;
+	int i;
+
+	for (i = 0; i < cpuhw->n_limited; ++i) {
+		counter = cpuhw->limited_counter[i];
+		if (!counter->hw.idx)
+			continue;
+		val = (counter->hw.idx == 5) ? pmc5 : pmc6;
+		prev = atomic64_read(&counter->hw.prev_count);
+		counter->hw.idx = 0;
+		delta = (val - prev) & 0xfffffffful;
+		atomic64_add(delta, &counter->count);
+	}
+}
+
+static void thaw_limited_counters(struct cpu_hw_counters *cpuhw,
+				  unsigned long pmc5, unsigned long pmc6)
+{
+	struct perf_counter *counter;
+	u64 val;
+	int i;
+
+	for (i = 0; i < cpuhw->n_limited; ++i) {
+		counter = cpuhw->limited_counter[i];
+		counter->hw.idx = cpuhw->limited_hwidx[i];
+		val = (counter->hw.idx == 5) ? pmc5 : pmc6;
+		atomic64_set(&counter->hw.prev_count, val);
+		perf_counter_update_userpage(counter);
+	}
+}
+
+/*
+ * Since limited counters don't respect the freeze conditions, we
+ * have to read them immediately after freezing or unfreezing the
+ * other counters.  We try to keep the values from the limited
+ * counters as consistent as possible by keeping the delay (in
+ * cycles and instructions) between freezing/unfreezing and reading
+ * the limited counters as small and consistent as possible.
+ * Therefore, if any limited counters are in use, we read them
+ * both, and always in the same order, to minimize variability,
+ * and do it inside the same asm that writes MMCR0.
+ */
+static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0)
+{
+	unsigned long pmc5, pmc6;
+
+	if (!cpuhw->n_limited) {
+		mtspr(SPRN_MMCR0, mmcr0);
+		return;
+	}
+
+	/*
+	 * Write MMCR0, then read PMC5 and PMC6 immediately.
+	 * To ensure we don't get a performance monitor interrupt
+	 * between writing MMCR0 and freezing/thawing the limited
+	 * counters, we first write MMCR0 with the counter overflow
+	 * interrupt enable bits turned off.
+	 */
+	asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
+		     : "=&r" (pmc5), "=&r" (pmc6)
+		     : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)),
+		       "i" (SPRN_MMCR0),
+		       "i" (SPRN_PMC5), "i" (SPRN_PMC6));
+
+	if (mmcr0 & MMCR0_FC)
+		freeze_limited_counters(cpuhw, pmc5, pmc6);
+	else
+		thaw_limited_counters(cpuhw, pmc5, pmc6);
+
+	/*
+	 * Write the full MMCR0 including the counter overflow interrupt
+	 * enable bits, if necessary.
+	 */
+	if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE))
+		mtspr(SPRN_MMCR0, mmcr0);
+}
+
+/*
+ * Disable all counters to prevent PMU interrupts and to allow
+ * counters to be added or removed.
+ */
+void hw_perf_disable(void)
+{
+	struct cpu_hw_counters *cpuhw;
+	unsigned long ret;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	cpuhw = &__get_cpu_var(cpu_hw_counters);
+
+	ret = cpuhw->disabled;
+	if (!ret) {
+		cpuhw->disabled = 1;
+		cpuhw->n_added = 0;
+
+		/*
+		 * Check if we ever enabled the PMU on this cpu.
+		 */
+		if (!cpuhw->pmcs_enabled) {
+			if (ppc_md.enable_pmcs)
+				ppc_md.enable_pmcs();
+			cpuhw->pmcs_enabled = 1;
+		}
+
+		/*
+		 * Disable instruction sampling if it was enabled
+		 */
+		if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+			mtspr(SPRN_MMCRA,
+			      cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+			mb();
+		}
+
+		/*
+		 * Set the 'freeze counters' bit.
+		 * The barrier is to make sure the mtspr has been
+		 * executed and the PMU has frozen the counters
+		 * before we return.
+		 */
+		write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
+		mb();
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Re-enable all counters if disable == 0.
+ * If we were previously disabled and counters were added, then
+ * put the new config on the PMU.
+ */
+void hw_perf_enable(void)
+{
+	struct perf_counter *counter;
+	struct cpu_hw_counters *cpuhw;
+	unsigned long flags;
+	long i;
+	unsigned long val;
+	s64 left;
+	unsigned int hwc_index[MAX_HWCOUNTERS];
+	int n_lim;
+	int idx;
+
+	local_irq_save(flags);
+	cpuhw = &__get_cpu_var(cpu_hw_counters);
+	if (!cpuhw->disabled) {
+		local_irq_restore(flags);
+		return;
+	}
+	cpuhw->disabled = 0;
+
+	/*
+	 * If we didn't change anything, or only removed counters,
+	 * no need to recalculate MMCR* settings and reset the PMCs.
+	 * Just reenable the PMU with the current MMCR* settings
+	 * (possibly updated for removal of counters).
+	 */
+	if (!cpuhw->n_added) {
+		mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+		mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+		if (cpuhw->n_counters == 0)
+			get_lppaca()->pmcregs_in_use = 0;
+		goto out_enable;
+	}
+
+	/*
+	 * Compute MMCR* values for the new set of counters
+	 */
+	if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
+			       cpuhw->mmcr)) {
+		/* shouldn't ever get here */
+		printk(KERN_ERR "oops compute_mmcr failed\n");
+		goto out;
+	}
+
+	/*
+	 * Add in MMCR0 freeze bits corresponding to the
+	 * attr.exclude_* bits for the first counter.
+	 * We have already checked that all counters have the
+	 * same values for these bits as the first counter.
+	 */
+	counter = cpuhw->counter[0];
+	if (counter->attr.exclude_user)
+		cpuhw->mmcr[0] |= MMCR0_FCP;
+	if (counter->attr.exclude_kernel)
+		cpuhw->mmcr[0] |= freeze_counters_kernel;
+	if (counter->attr.exclude_hv)
+		cpuhw->mmcr[0] |= MMCR0_FCHV;
+
+	/*
+	 * Write the new configuration to MMCR* with the freeze
+	 * bit set and set the hardware counters to their initial values.
+	 * Then unfreeze the counters.
+	 */
+	get_lppaca()->pmcregs_in_use = 1;
+	mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
+				| MMCR0_FC);
+
+	/*
+	 * Read off any pre-existing counters that need to move
+	 * to another PMC.
+	 */
+	for (i = 0; i < cpuhw->n_counters; ++i) {
+		counter = cpuhw->counter[i];
+		if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
+			power_pmu_read(counter);
+			write_pmc(counter->hw.idx, 0);
+			counter->hw.idx = 0;
+		}
+	}
+
+	/*
+	 * Initialize the PMCs for all the new and moved counters.
+	 */
+	cpuhw->n_limited = n_lim = 0;
+	for (i = 0; i < cpuhw->n_counters; ++i) {
+		counter = cpuhw->counter[i];
+		if (counter->hw.idx)
+			continue;
+		idx = hwc_index[i] + 1;
+		if (is_limited_pmc(idx)) {
+			cpuhw->limited_counter[n_lim] = counter;
+			cpuhw->limited_hwidx[n_lim] = idx;
+			++n_lim;
+			continue;
+		}
+		val = 0;
+		if (counter->hw.sample_period) {
+			left = atomic64_read(&counter->hw.period_left);
+			if (left < 0x80000000L)
+				val = 0x80000000L - left;
+		}
+		atomic64_set(&counter->hw.prev_count, val);
+		counter->hw.idx = idx;
+		write_pmc(idx, val);
+		perf_counter_update_userpage(counter);
+	}
+	cpuhw->n_limited = n_lim;
+	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
+
+ out_enable:
+	mb();
+	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
+
+	/*
+	 * Enable instruction sampling if necessary
+	 */
+	if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+		mb();
+		mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+	}
+
+ out:
+	local_irq_restore(flags);
+}
+
+static int collect_events(struct perf_counter *group, int max_count,
+			  struct perf_counter *ctrs[], u64 *events,
+			  unsigned int *flags)
+{
+	int n = 0;
+	struct perf_counter *counter;
+
+	if (!is_software_counter(group)) {
+		if (n >= max_count)
+			return -1;
+		ctrs[n] = group;
+		flags[n] = group->hw.counter_base;
+		events[n++] = group->hw.config;
+	}
+	list_for_each_entry(counter, &group->sibling_list, list_entry) {
+		if (!is_software_counter(counter) &&
+		    counter->state != PERF_COUNTER_STATE_OFF) {
+			if (n >= max_count)
+				return -1;
+			ctrs[n] = counter;
+			flags[n] = counter->hw.counter_base;
+			events[n++] = counter->hw.config;
+		}
+	}
+	return n;
+}
+
+static void counter_sched_in(struct perf_counter *counter, int cpu)
+{
+	counter->state = PERF_COUNTER_STATE_ACTIVE;
+	counter->oncpu = cpu;
+	counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped;
+	if (is_software_counter(counter))
+		counter->pmu->enable(counter);
+}
+
+/*
+ * Called to enable a whole group of counters.
+ * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
+ * Assumes the caller has disabled interrupts and has
+ * frozen the PMU with hw_perf_save_disable.
+ */
+int hw_perf_group_sched_in(struct perf_counter *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_counter_context *ctx, int cpu)
+{
+	struct cpu_hw_counters *cpuhw;
+	long i, n, n0;
+	struct perf_counter *sub;
+
+	cpuhw = &__get_cpu_var(cpu_hw_counters);
+	n0 = cpuhw->n_counters;
+	n = collect_events(group_leader, ppmu->n_counter - n0,
+			   &cpuhw->counter[n0], &cpuhw->events[n0],
+			   &cpuhw->flags[n0]);
+	if (n < 0)
+		return -EAGAIN;
+	if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n))
+		return -EAGAIN;
+	i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0);
+	if (i < 0)
+		return -EAGAIN;
+	cpuhw->n_counters = n0 + n;
+	cpuhw->n_added += n;
+
+	/*
+	 * OK, this group can go on; update counter states etc.,
+	 * and enable any software counters
+	 */
+	for (i = n0; i < n0 + n; ++i)
+		cpuhw->counter[i]->hw.config = cpuhw->events[i];
+	cpuctx->active_oncpu += n;
+	n = 1;
+	counter_sched_in(group_leader, cpu);
+	list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
+		if (sub->state != PERF_COUNTER_STATE_OFF) {
+			counter_sched_in(sub, cpu);
+			++n;
+		}
+	}
+	ctx->nr_active += n;
+
+	return 1;
+}
+
+/*
+ * Add a counter to the PMU.
+ * If all counters are not already frozen, then we disable and
+ * re-enable the PMU in order to get hw_perf_enable to do the
+ * actual work of reconfiguring the PMU.
+ */
+static int power_pmu_enable(struct perf_counter *counter)
+{
+	struct cpu_hw_counters *cpuhw;
+	unsigned long flags;
+	int n0;
+	int ret = -EAGAIN;
+
+	local_irq_save(flags);
+	perf_disable();
+
+	/*
+	 * Add the counter to the list (if there is room)
+	 * and check whether the total set is still feasible.
+	 */
+	cpuhw = &__get_cpu_var(cpu_hw_counters);
+	n0 = cpuhw->n_counters;
+	if (n0 >= ppmu->n_counter)
+		goto out;
+	cpuhw->counter[n0] = counter;
+	cpuhw->events[n0] = counter->hw.config;
+	cpuhw->flags[n0] = counter->hw.counter_base;
+	if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1))
+		goto out;
+	if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1))
+		goto out;
+
+	counter->hw.config = cpuhw->events[n0];
+	++cpuhw->n_counters;
+	++cpuhw->n_added;
+
+	ret = 0;
+ out:
+	perf_enable();
+	local_irq_restore(flags);
+	return ret;
+}
+
+/*
+ * Remove a counter from the PMU.
+ */
+static void power_pmu_disable(struct perf_counter *counter)
+{
+	struct cpu_hw_counters *cpuhw;
+	long i;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	perf_disable();
+
+	power_pmu_read(counter);
+
+	cpuhw = &__get_cpu_var(cpu_hw_counters);
+	for (i = 0; i < cpuhw->n_counters; ++i) {
+		if (counter == cpuhw->counter[i]) {
+			while (++i < cpuhw->n_counters)
+				cpuhw->counter[i-1] = cpuhw->counter[i];
+			--cpuhw->n_counters;
+			ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
+			if (counter->hw.idx) {
+				write_pmc(counter->hw.idx, 0);
+				counter->hw.idx = 0;
+			}
+			perf_counter_update_userpage(counter);
+			break;
+		}
+	}
+	for (i = 0; i < cpuhw->n_limited; ++i)
+		if (counter == cpuhw->limited_counter[i])
+			break;
+	if (i < cpuhw->n_limited) {
+		while (++i < cpuhw->n_limited) {
+			cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
+			cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
+		}
+		--cpuhw->n_limited;
+	}
+	if (cpuhw->n_counters == 0) {
+		/* disable exceptions if no counters are running */
+		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
+	}
+
+	perf_enable();
+	local_irq_restore(flags);
+}
+
+/*
+ * Re-enable interrupts on a counter after they were throttled
+ * because they were coming too fast.
+ */
+static void power_pmu_unthrottle(struct perf_counter *counter)
+{
+	s64 val, left;
+	unsigned long flags;
+
+	if (!counter->hw.idx || !counter->hw.sample_period)
+		return;
+	local_irq_save(flags);
+	perf_disable();
+	power_pmu_read(counter);
+	left = counter->hw.sample_period;
+	counter->hw.last_period = left;
+	val = 0;
+	if (left < 0x80000000L)
+		val = 0x80000000L - left;
+	write_pmc(counter->hw.idx, val);
+	atomic64_set(&counter->hw.prev_count, val);
+	atomic64_set(&counter->hw.period_left, left);
+	perf_counter_update_userpage(counter);
+	perf_enable();
+	local_irq_restore(flags);
+}
+
+struct pmu power_pmu = {
+	.enable		= power_pmu_enable,
+	.disable	= power_pmu_disable,
+	.read		= power_pmu_read,
+	.unthrottle	= power_pmu_unthrottle,
+};
+
+/*
+ * Return 1 if we might be able to put counter on a limited PMC,
+ * or 0 if not.
+ * A counter can only go on a limited PMC if it counts something
+ * that a limited PMC can count, doesn't require interrupts, and
+ * doesn't exclude any processor mode.
+ */
+static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev,
+				 unsigned int flags)
+{
+	int n;
+	u64 alt[MAX_EVENT_ALTERNATIVES];
+
+	if (counter->attr.exclude_user
+	    || counter->attr.exclude_kernel
+	    || counter->attr.exclude_hv
+	    || counter->attr.sample_period)
+		return 0;
+
+	if (ppmu->limited_pmc_event(ev))
+		return 1;
+
+	/*
+	 * The requested event isn't on a limited PMC already;
+	 * see if any alternative code goes on a limited PMC.
+	 */
+	if (!ppmu->get_alternatives)
+		return 0;
+
+	flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
+	n = ppmu->get_alternatives(ev, flags, alt);
+
+	return n > 0;
+}
+
+/*
+ * Find an alternative event that goes on a normal PMC, if possible,
+ * and return the event code, or 0 if there is no such alternative.
+ * (Note: event code 0 is "don't count" on all machines.)
+ */
+static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
+{
+	u64 alt[MAX_EVENT_ALTERNATIVES];
+	int n;
+
+	flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
+	n = ppmu->get_alternatives(ev, flags, alt);
+	if (!n)
+		return 0;
+	return alt[0];
+}
+
+/* Number of perf_counters counting hardware events */
+static atomic_t num_counters;
+/* Used to avoid races in calling reserve/release_pmc_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Release the PMU if this is the last perf_counter.
+ */
+static void hw_perf_counter_destroy(struct perf_counter *counter)
+{
+	if (!atomic_add_unless(&num_counters, -1, 1)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_dec_return(&num_counters) == 0)
+			release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+/*
+ * Translate a generic cache event config to a raw event code.
+ */
+static int hw_perf_cache_event(u64 config, u64 *eventp)
+{
+	unsigned long type, op, result;
+	int ev;
+
+	if (!ppmu->cache_events)
+		return -EINVAL;
+
+	/* unpack config */
+	type = config & 0xff;
+	op = (config >> 8) & 0xff;
+	result = (config >> 16) & 0xff;
+
+	if (type >= PERF_COUNT_HW_CACHE_MAX ||
+	    op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+	    result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ev = (*ppmu->cache_events)[type][op][result];
+	if (ev == 0)
+		return -EOPNOTSUPP;
+	if (ev == -1)
+		return -EINVAL;
+	*eventp = ev;
+	return 0;
+}
+
+const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
+{
+	u64 ev;
+	unsigned long flags;
+	struct perf_counter *ctrs[MAX_HWCOUNTERS];
+	u64 events[MAX_HWCOUNTERS];
+	unsigned int cflags[MAX_HWCOUNTERS];
+	int n;
+	int err;
+
+	if (!ppmu)
+		return ERR_PTR(-ENXIO);
+	switch (counter->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		ev = counter->attr.config;
+		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
+			return ERR_PTR(-EOPNOTSUPP);
+		ev = ppmu->generic_events[ev];
+		break;
+	case PERF_TYPE_HW_CACHE:
+		err = hw_perf_cache_event(counter->attr.config, &ev);
+		if (err)
+			return ERR_PTR(err);
+		break;
+	case PERF_TYPE_RAW:
+		ev = counter->attr.config;
+		break;
+	}
+	counter->hw.config_base = ev;
+	counter->hw.idx = 0;
+
+	/*
+	 * If we are not running on a hypervisor, force the
+	 * exclude_hv bit to 0 so that we don't care what
+	 * the user set it to.
+	 */
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		counter->attr.exclude_hv = 0;
+
+	/*
+	 * If this is a per-task counter, then we can use
+	 * PM_RUN_* events interchangeably with their non RUN_*
+	 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
+	 * XXX we should check if the task is an idle task.
+	 */
+	flags = 0;
+	if (counter->ctx->task)
+		flags |= PPMU_ONLY_COUNT_RUN;
+
+	/*
+	 * If this machine has limited counters, check whether this
+	 * event could go on a limited counter.
+	 */
+	if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
+		if (can_go_on_limited_pmc(counter, ev, flags)) {
+			flags |= PPMU_LIMITED_PMC_OK;
+		} else if (ppmu->limited_pmc_event(ev)) {
+			/*
+			 * The requested event is on a limited PMC,
+			 * but we can't use a limited PMC; see if any
+			 * alternative goes on a normal PMC.
+			 */
+			ev = normal_pmc_alternative(ev, flags);
+			if (!ev)
+				return ERR_PTR(-EINVAL);
+		}
+	}
+
+	/*
+	 * If this is in a group, check if it can go on with all the
+	 * other hardware counters in the group.  We assume the counter
+	 * hasn't been linked into its leader's sibling list at this point.
+	 */
+	n = 0;
+	if (counter->group_leader != counter) {
+		n = collect_events(counter->group_leader, ppmu->n_counter - 1,
+				   ctrs, events, cflags);
+		if (n < 0)
+			return ERR_PTR(-EINVAL);
+	}
+	events[n] = ev;
+	ctrs[n] = counter;
+	cflags[n] = flags;
+	if (check_excludes(ctrs, cflags, n, 1))
+		return ERR_PTR(-EINVAL);
+	if (power_check_constraints(events, cflags, n + 1))
+		return ERR_PTR(-EINVAL);
+
+	counter->hw.config = events[n];
+	counter->hw.counter_base = cflags[n];
+	counter->hw.last_period = counter->hw.sample_period;
+	atomic64_set(&counter->hw.period_left, counter->hw.last_period);
+
+	/*
+	 * See if we need to reserve the PMU.
+	 * If no counters are currently in use, then we have to take a
+	 * mutex to ensure that we don't race with another task doing
+	 * reserve_pmc_hardware or release_pmc_hardware.
+	 */
+	err = 0;
+	if (!atomic_inc_not_zero(&num_counters)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&num_counters) == 0 &&
+		    reserve_pmc_hardware(perf_counter_interrupt))
+			err = -EBUSY;
+		else
+			atomic_inc(&num_counters);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	counter->destroy = hw_perf_counter_destroy;
+
+	if (err)
+		return ERR_PTR(err);
+	return &power_pmu;
+}
+
+/*
+ * A counter has overflowed; update its count and record
+ * things if requested.  Note that interrupts are hard-disabled
+ * here so there is no possibility of being interrupted.
+ */
+static void record_and_restart(struct perf_counter *counter, long val,
+			       struct pt_regs *regs, int nmi)
+{
+	u64 period = counter->hw.sample_period;
+	s64 prev, delta, left;
+	int record = 0;
+	u64 addr, mmcra, sdsync;
+
+	/* we don't have to worry about interrupts here */
+	prev = atomic64_read(&counter->hw.prev_count);
+	delta = (val - prev) & 0xfffffffful;
+	atomic64_add(delta, &counter->count);
+
+	/*
+	 * See if the total period for this counter has expired,
+	 * and update for the next period.
+	 */
+	val = 0;
+	left = atomic64_read(&counter->hw.period_left) - delta;
+	if (period) {
+		if (left <= 0) {
+			left += period;
+			if (left <= 0)
+				left = period;
+			record = 1;
+		}
+		if (left < 0x80000000L)
+			val = 0x80000000L - left;
+	}
+
+	/*
+	 * Finally record data if requested.
+	 */
+	if (record) {
+		struct perf_sample_data data = {
+			.regs	= regs,
+			.addr	= 0,
+			.period	= counter->hw.last_period,
+		};
+
+		if (counter->attr.sample_type & PERF_SAMPLE_ADDR) {
+			/*
+			 * The user wants a data address recorded.
+			 * If we're not doing instruction sampling,
+			 * give them the SDAR (sampled data address).
+			 * If we are doing instruction sampling, then only
+			 * give them the SDAR if it corresponds to the
+			 * instruction pointed to by SIAR; this is indicated
+			 * by the [POWER6_]MMCRA_SDSYNC bit in MMCRA.
+			 */
+			mmcra = regs->dsisr;
+			sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
+				POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
+			if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
+				data.addr = mfspr(SPRN_SDAR);
+		}
+		if (perf_counter_overflow(counter, nmi, &data)) {
+			/*
+			 * Interrupts are coming too fast - throttle them
+			 * by setting the counter to 0, so it will be
+			 * at least 2^30 cycles until the next interrupt
+			 * (assuming each counter counts at most 2 counts
+			 * per cycle).
+			 */
+			val = 0;
+			left = ~0ULL >> 1;
+		}
+	}
+
+	write_pmc(counter->hw.idx, val);
+	atomic64_set(&counter->hw.prev_count, val);
+	atomic64_set(&counter->hw.period_left, left);
+	perf_counter_update_userpage(counter);
+}
+
+/*
+ * Called from generic code to get the misc flags (i.e. processor mode)
+ * for an event.
+ */
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	unsigned long mmcra;
+
+	if (TRAP(regs) != 0xf00) {
+		/* not a PMU interrupt */
+		return user_mode(regs) ? PERF_EVENT_MISC_USER :
+			PERF_EVENT_MISC_KERNEL;
+	}
+
+	mmcra = regs->dsisr;
+	if (ppmu->flags & PPMU_ALT_SIPR) {
+		if (mmcra & POWER6_MMCRA_SIHV)
+			return PERF_EVENT_MISC_HYPERVISOR;
+		return (mmcra & POWER6_MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
+			PERF_EVENT_MISC_KERNEL;
+	}
+	if (mmcra & MMCRA_SIHV)
+		return PERF_EVENT_MISC_HYPERVISOR;
+	return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
+			PERF_EVENT_MISC_KERNEL;
+}
+
+/*
+ * Called from generic code to get the instruction pointer
+ * for an event.
+ */
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	unsigned long mmcra;
+	unsigned long ip;
+	unsigned long slot;
+
+	if (TRAP(regs) != 0xf00)
+		return regs->nip;	/* not a PMU interrupt */
+
+	ip = mfspr(SPRN_SIAR);
+	mmcra = regs->dsisr;
+	if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) {
+		slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
+		if (slot > 1)
+			ip += 4 * (slot - 1);
+	}
+	return ip;
+}
+
+/*
+ * Performance monitor interrupt stuff
+ */
+static void perf_counter_interrupt(struct pt_regs *regs)
+{
+	int i;
+	struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
+	struct perf_counter *counter;
+	long val;
+	int found = 0;
+	int nmi;
+
+	if (cpuhw->n_limited)
+		freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
+					mfspr(SPRN_PMC6));
+
+	/*
+	 * Overload regs->dsisr to store MMCRA so we only need to read it once.
+	 */
+	regs->dsisr = mfspr(SPRN_MMCRA);
+
+	/*
+	 * If interrupts were soft-disabled when this PMU interrupt
+	 * occurred, treat it as an NMI.
+	 */
+	nmi = !regs->softe;
+	if (nmi)
+		nmi_enter();
+	else
+		irq_enter();
+
+	for (i = 0; i < cpuhw->n_counters; ++i) {
+		counter = cpuhw->counter[i];
+		if (!counter->hw.idx || is_limited_pmc(counter->hw.idx))
+			continue;
+		val = read_pmc(counter->hw.idx);
+		if ((int)val < 0) {
+			/* counter has overflowed */
+			found = 1;
+			record_and_restart(counter, val, regs, nmi);
+		}
+	}
+
+	/*
+	 * In case we didn't find and reset the counter that caused
+	 * the interrupt, scan all counters and reset any that are
+	 * negative, to avoid getting continual interrupts.
+	 * Any that we processed in the previous loop will not be negative.
+	 */
+	if (!found) {
+		for (i = 0; i < ppmu->n_counter; ++i) {
+			if (is_limited_pmc(i + 1))
+				continue;
+			val = read_pmc(i + 1);
+			if ((int)val < 0)
+				write_pmc(i + 1, 0);
+		}
+	}
+
+	/*
+	 * Reset MMCR0 to its normal value.  This will set PMXE and
+	 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
+	 * and thus allow interrupts to occur again.
+	 * XXX might want to use MSR.PM to keep the counters frozen until
+	 * we get back out of this interrupt.
+	 */
+	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
+
+	if (nmi)
+		nmi_exit();
+	else
+		irq_exit();
+}
+
+void hw_perf_counter_setup(int cpu)
+{
+	struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
+
+	memset(cpuhw, 0, sizeof(*cpuhw));
+	cpuhw->mmcr[0] = MMCR0_FC;
+}
+
+extern struct power_pmu power4_pmu;
+extern struct power_pmu ppc970_pmu;
+extern struct power_pmu power5_pmu;
+extern struct power_pmu power5p_pmu;
+extern struct power_pmu power6_pmu;
+extern struct power_pmu power7_pmu;
+
+static int init_perf_counters(void)
+{
+	unsigned long pvr;
+
+	/* XXX should get this from cputable */
+	pvr = mfspr(SPRN_PVR);
+	switch (PVR_VER(pvr)) {
+	case PV_POWER4:
+	case PV_POWER4p:
+		ppmu = &power4_pmu;
+		break;
+	case PV_970:
+	case PV_970FX:
+	case PV_970MP:
+		ppmu = &ppc970_pmu;
+		break;
+	case PV_POWER5:
+		ppmu = &power5_pmu;
+		break;
+	case PV_POWER5p:
+		ppmu = &power5p_pmu;
+		break;
+	case 0x3e:
+		ppmu = &power6_pmu;
+		break;
+	case 0x3f:
+		ppmu = &power7_pmu;
+		break;
+	}
+
+	/*
+	 * Use FCHV to ignore kernel events if MSR.HV is set.
+	 */
+	if (mfmsr() & MSR_HV)
+		freeze_counters_kernel = MMCR0_FCHV;
+
+	return 0;
+}
+
+arch_initcall(init_perf_counters);
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
new file mode 100644
index 0000000..07bd308
--- /dev/null
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -0,0 +1,598 @@
+/*
+ * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER4
+ */
+#define PM_PMC_SH	12	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0xf
+#define PM_UNIT_SH	8	/* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK	0xf
+#define PM_LOWER_SH	6
+#define PM_LOWER_MSK	1
+#define PM_LOWER_MSKS	0x40
+#define PM_BYTE_SH	4	/* Byte number of event bus to use */
+#define PM_BYTE_MSK	3
+#define PM_PMCSEL_MSK	7
+
+/*
+ * Unit code values
+ */
+#define PM_FPU		1
+#define PM_ISU1		2
+#define PM_IFU		3
+#define PM_IDU0		4
+#define PM_ISU1_ALT	6
+#define PM_ISU2		7
+#define PM_IFU_ALT	8
+#define PM_LSU0		9
+#define PM_LSU1		0xc
+#define PM_GPS		0xf
+
+/*
+ * Bits in MMCR0 for POWER4
+ */
+#define MMCR0_PMC1SEL_SH	8
+#define MMCR0_PMC2SEL_SH	1
+#define MMCR_PMCSEL_MSK		0x1f
+
+/*
+ * Bits in MMCR1 for POWER4
+ */
+#define MMCR1_TTM0SEL_SH	62
+#define MMCR1_TTC0SEL_SH	61
+#define MMCR1_TTM1SEL_SH	59
+#define MMCR1_TTC1SEL_SH	58
+#define MMCR1_TTM2SEL_SH	56
+#define MMCR1_TTC2SEL_SH	55
+#define MMCR1_TTM3SEL_SH	53
+#define MMCR1_TTC3SEL_SH	52
+#define MMCR1_TTMSEL_MSK	3
+#define MMCR1_TD_CP_DBG0SEL_SH	50
+#define MMCR1_TD_CP_DBG1SEL_SH	48
+#define MMCR1_TD_CP_DBG2SEL_SH	46
+#define MMCR1_TD_CP_DBG3SEL_SH	44
+#define MMCR1_DEBUG0SEL_SH	43
+#define MMCR1_DEBUG1SEL_SH	42
+#define MMCR1_DEBUG2SEL_SH	41
+#define MMCR1_DEBUG3SEL_SH	40
+#define MMCR1_PMC1_ADDER_SEL_SH	39
+#define MMCR1_PMC2_ADDER_SEL_SH	38
+#define MMCR1_PMC6_ADDER_SEL_SH	37
+#define MMCR1_PMC5_ADDER_SEL_SH	36
+#define MMCR1_PMC8_ADDER_SEL_SH	35
+#define MMCR1_PMC7_ADDER_SEL_SH	34
+#define MMCR1_PMC3_ADDER_SEL_SH	33
+#define MMCR1_PMC4_ADDER_SEL_SH	32
+#define MMCR1_PMC3SEL_SH	27
+#define MMCR1_PMC4SEL_SH	22
+#define MMCR1_PMC5SEL_SH	17
+#define MMCR1_PMC6SEL_SH	12
+#define MMCR1_PMC7SEL_SH	7
+#define MMCR1_PMC8SEL_SH	2	/* note bit 0 is in MMCRA for GP */
+
+static short mmcr1_adder_bits[8] = {
+	MMCR1_PMC1_ADDER_SEL_SH,
+	MMCR1_PMC2_ADDER_SEL_SH,
+	MMCR1_PMC3_ADDER_SEL_SH,
+	MMCR1_PMC4_ADDER_SEL_SH,
+	MMCR1_PMC5_ADDER_SEL_SH,
+	MMCR1_PMC6_ADDER_SEL_SH,
+	MMCR1_PMC7_ADDER_SEL_SH,
+	MMCR1_PMC8_ADDER_SEL_SH
+};
+
+/*
+ * Bits in MMCRA
+ */
+#define MMCRA_PMC8SEL0_SH	17	/* PMC8SEL bit 0 for GP */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ *        |[  >[  >[   >|||[  >[  ><  ><  ><  ><  ><><><><><><><><>
+ *        | UC1 UC2 UC3 ||| PS1 PS2 B0  B1  B2  B3 P1P2P3P4P5P6P7P8
+ * 	  \SMPL	        ||\TTC3SEL
+ * 		        |\TTC_IFU_SEL
+ * 		        \TTM2SEL0
+ *
+ * SMPL - SAMPLE_ENABLE constraint
+ *     56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
+ *
+ * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
+ *     55: UC1 error 0x0080_0000_0000_0000
+ *     54: FPU events needed 0x0040_0000_0000_0000
+ *     53: ISU1 events needed 0x0020_0000_0000_0000
+ *     52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
+ *
+ * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
+ *     51: UC2 error 0x0008_0000_0000_0000
+ *     50: FPU events needed 0x0004_0000_0000_0000
+ *     49: IFU events needed 0x0002_0000_0000_0000
+ *     48: LSU0 events needed 0x0001_0000_0000_0000
+ *
+ * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
+ *     47: UC3 error 0x8000_0000_0000
+ *     46: LSU0 events needed 0x4000_0000_0000
+ *     45: IFU events needed 0x2000_0000_0000
+ *     44: IDU0|ISU2 events needed 0x1000_0000_0000
+ *     43: ISU1 events needed 0x0800_0000_0000
+ *
+ * TTM2SEL0
+ *     42: 0 = IDU0 events needed
+ *     	   1 = ISU2 events needed 0x0400_0000_0000
+ *
+ * TTC_IFU_SEL
+ *     41: 0 = IFU.U events needed
+ *     	   1 = IFU.L events needed 0x0200_0000_0000
+ *
+ * TTC3SEL
+ *     40: 0 = LSU1.U events needed
+ *     	   1 = LSU1.L events needed 0x0100_0000_0000
+ *
+ * PS1
+ *     39: PS1 error 0x0080_0000_0000
+ *     36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
+ *
+ * PS2
+ *     35: PS2 error 0x0008_0000_0000
+ *     32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
+ *
+ * B0
+ *     28-31: Byte 0 event source 0xf000_0000
+ *     	   1 = FPU
+ * 	   2 = ISU1
+ * 	   3 = IFU
+ * 	   4 = IDU0
+ * 	   7 = ISU2
+ * 	   9 = LSU0
+ * 	   c = LSU1
+ * 	   f = GPS
+ *
+ * B1, B2, B3
+ *     24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
+ *
+ * P8
+ *     15: P8 error 0x8000
+ *     14-15: Count of events needing PMC8
+ *
+ * P1..P7
+ *     0-13: Count of events needing PMC1..PMC7
+ *
+ * Note: this doesn't allow events using IFU.U to be combined with events
+ * using IFU.L, though that is feasible (using TTM0 and TTM2).  However
+ * there are no listed events for IFU.L (they are debug events not
+ * verified for performance monitoring) so this shouldn't cause a
+ * problem.
+ */
+
+static struct unitinfo {
+	u64	value, mask;
+	int	unit;
+	int	lowerbit;
+} p4_unitinfo[16] = {
+	[PM_FPU]  = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 },
+	[PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
+	[PM_ISU1_ALT] =
+		    { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
+	[PM_IFU]  = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
+	[PM_IFU_ALT] =
+		    { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
+	[PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 },
+	[PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 },
+	[PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 },
+	[PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 },
+	[PM_GPS]  = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 }
+};
+
+static unsigned char direct_marked_event[8] = {
+	(1<<2) | (1<<3),	/* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
+	(1<<3) | (1<<5),	/* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
+	(1<<3),			/* PMC3: PM_MRK_ST_CMPL_INT */
+	(1<<4) | (1<<5),	/* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
+	(1<<4) | (1<<5),	/* PMC5: PM_MRK_GRP_TIMEO */
+	(1<<3) | (1<<4) | (1<<5),
+		/* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
+	(1<<4) | (1<<5),	/* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
+	(1<<4),			/* PMC8: PM_MRK_LSU_FIN */
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int p4_marked_instr_event(u64 event)
+{
+	int pmc, psel, unit, byte, bit;
+	unsigned int mask;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = event & PM_PMCSEL_MSK;
+	if (pmc) {
+		if (direct_marked_event[pmc - 1] & (1 << psel))
+			return 1;
+		if (psel == 0)		/* add events */
+			bit = (pmc <= 4)? pmc - 1: 8 - pmc;
+		else if (psel == 6)	/* decode events */
+			bit = 4;
+		else
+			return 0;
+	} else
+		bit = psel;
+
+	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	mask = 0;
+	switch (unit) {
+	case PM_LSU1:
+		if (event & PM_LOWER_MSKS)
+			mask = 1 << 28;		/* byte 7 bit 4 */
+		else
+			mask = 6 << 24;		/* byte 3 bits 1 and 2 */
+		break;
+	case PM_LSU0:
+		/* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
+		mask = 0x083dff00;
+	}
+	return (mask >> (byte * 8 + bit)) & 1;
+}
+
+static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp)
+{
+	int pmc, byte, unit, lower, sh;
+	u64 mask = 0, value = 0;
+	int grp = -1;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 8)
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+		grp = ((pmc - 1) >> 1) & 1;
+	}
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+	if (unit) {
+		lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
+
+		/*
+		 * Bus events on bytes 0 and 2 can be counted
+		 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
+		 */
+		if (!pmc)
+			grp = byte & 1;
+
+		if (!p4_unitinfo[unit].unit)
+			return -1;
+		mask  |= p4_unitinfo[unit].mask;
+		value |= p4_unitinfo[unit].value;
+		sh = p4_unitinfo[unit].lowerbit;
+		if (sh > 1)
+			value |= (u64)lower << sh;
+		else if (lower != sh)
+			return -1;
+		unit = p4_unitinfo[unit].unit;
+
+		/* Set byte lane select field */
+		mask  |= 0xfULL << (28 - 4 * byte);
+		value |= (u64)unit << (28 - 4 * byte);
+	}
+	if (grp == 0) {
+		/* increment PMC1/2/5/6 field */
+		mask  |= 0x8000000000ull;
+		value |= 0x1000000000ull;
+	} else {
+		/* increment PMC3/4/7/8 field */
+		mask  |= 0x800000000ull;
+		value |= 0x100000000ull;
+	}
+
+	/* Marked instruction events need sample_enable set */
+	if (p4_marked_instr_event(event)) {
+		mask  |= 1ull << 56;
+		value |= 1ull << 56;
+	}
+
+	/* PMCSEL=6 decode events on byte 2 need sample_enable clear */
+	if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
+		mask  |= 1ull << 56;
+
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+static unsigned int ppc_inst_cmpl[] = {
+	0x1001, 0x4001, 0x6001, 0x7001, 0x8001
+};
+
+static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int i, j, na;
+
+	alt[0] = event;
+	na = 1;
+
+	/* 2 possibilities for PM_GRP_DISP_REJECT */
+	if (event == 0x8003 || event == 0x0224) {
+		alt[1] = event ^ (0x8003 ^ 0x0224);
+		return 2;
+	}
+
+	/* 2 possibilities for PM_ST_MISS_L1 */
+	if (event == 0x0c13 || event == 0x0c23) {
+		alt[1] = event ^ (0x0c13 ^ 0x0c23);
+		return 2;
+	}
+
+	/* several possibilities for PM_INST_CMPL */
+	for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
+		if (event == ppc_inst_cmpl[i]) {
+			for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
+				if (j != i)
+					alt[na++] = ppc_inst_cmpl[j];
+			break;
+		}
+	}
+
+	return na;
+}
+
+static int p4_compute_mmcr(u64 event[], int n_ev,
+			   unsigned int hwc[], u64 mmcr[])
+{
+	u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
+	unsigned int pmc, unit, byte, psel, lower;
+	unsigned int ttm, grp;
+	unsigned int pmc_inuse = 0;
+	unsigned int pmc_grp_use[2];
+	unsigned char busbyte[4];
+	unsigned char unituse[16];
+	unsigned int unitlower = 0;
+	int i;
+
+	if (n_ev > 8)
+		return -1;
+
+	/* First pass to count resource use */
+	pmc_grp_use[0] = pmc_grp_use[1] = 0;
+	memset(busbyte, 0, sizeof(busbyte));
+	memset(unituse, 0, sizeof(unituse));
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;
+			pmc_inuse |= 1 << (pmc - 1);
+			/* count 1/2/5/6 vs 3/4/7/8 use */
+			++pmc_grp_use[((pmc - 1) >> 1) & 1];
+		}
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+		lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
+		if (unit) {
+			if (!pmc)
+				++pmc_grp_use[byte & 1];
+			if (unit == 6 || unit == 8)
+				/* map alt ISU1/IFU codes: 6->2, 8->3 */
+				unit = (unit >> 1) - 1;
+			if (busbyte[byte] && busbyte[byte] != unit)
+				return -1;
+			busbyte[byte] = unit;
+			lower <<= unit;
+			if (unituse[unit] && lower != (unitlower & lower))
+				return -1;
+			unituse[unit] = 1;
+			unitlower |= lower;
+		}
+	}
+	if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
+		return -1;
+
+	/*
+	 * Assign resources and set multiplexer selects.
+	 *
+	 * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
+	 * Each TTMx can only select one unit, but since
+	 * units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
+	 * we have some choices.
+	 */
+	if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
+		unituse[6] = 1;		/* Move 2 to 6 */
+		unituse[2] = 0;
+	}
+	if (unituse[3] & (unituse[1] | unituse[2])) {
+		unituse[8] = 1;		/* Move 3 to 8 */
+		unituse[3] = 0;
+		unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
+	}
+	/* Check only one unit per TTMx */
+	if (unituse[1] + unituse[2] + unituse[3] > 1 ||
+	    unituse[4] + unituse[6] + unituse[7] > 1 ||
+	    unituse[8] + unituse[9] > 1 ||
+	    (unituse[5] | unituse[10] | unituse[11] |
+	     unituse[13] | unituse[14]))
+		return -1;
+
+	/* Set TTMxSEL fields.  Note, units 1-3 => TTM0SEL codes 0-2 */
+	mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH;
+	mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH;
+	mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH;
+
+	/* Set TTCxSEL fields. */
+	if (unitlower & 0xe)
+		mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
+	if (unitlower & 0xf0)
+		mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
+	if (unitlower & 0xf00)
+		mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
+	if (unitlower & 0x7000)
+		mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
+
+	/* Set byte lane select fields. */
+	for (byte = 0; byte < 4; ++byte) {
+		unit = busbyte[byte];
+		if (!unit)
+			continue;
+		if (unit == 0xf) {
+			/* special case for GPS */
+			mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
+		} else {
+			if (!unituse[unit])
+				ttm = unit - 1;		/* 2->1, 3->2 */
+			else
+				ttm = unit >> 2;
+			mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte);
+		}
+	}
+
+	/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+		psel = event[i] & PM_PMCSEL_MSK;
+		if (!pmc) {
+			/* Bus event or 00xxx direct event (off or cycles) */
+			if (unit)
+				psel |= 0x10 | ((byte & 2) << 2);
+			for (pmc = 0; pmc < 8; ++pmc) {
+				if (pmc_inuse & (1 << pmc))
+					continue;
+				grp = (pmc >> 1) & 1;
+				if (unit) {
+					if (grp == (byte & 1))
+						break;
+				} else if (pmc_grp_use[grp] < 4) {
+					++pmc_grp_use[grp];
+					break;
+				}
+			}
+			pmc_inuse |= 1 << pmc;
+		} else {
+			/* Direct event */
+			--pmc;
+			if (psel == 0 && (byte & 2))
+				/* add events on higher-numbered bus */
+				mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
+			else if (psel == 6 && byte == 3)
+				/* seem to need to set sample_enable here */
+				mmcra |= MMCRA_SAMPLE_ENABLE;
+			psel |= 8;
+		}
+		if (pmc <= 1)
+			mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
+		else
+			mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
+		if (pmc == 7)	/* PMC8 */
+			mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
+		hwc[i] = pmc;
+		if (p4_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
+	}
+
+	if (pmc_inuse & 1)
+		mmcr0 |= MMCR0_PMC1CE;
+	if (pmc_inuse & 0xfe)
+		mmcr0 |= MMCR0_PMCjCE;
+
+	mmcra |= 0x2000;	/* mark only one IOP per PPC instruction */
+
+	/* Return MMCRx values */
+	mmcr[0] = mmcr0;
+	mmcr[1] = mmcr1;
+	mmcr[2] = mmcra;
+	return 0;
+}
+
+static void p4_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+	/*
+	 * Setting the PMCxSEL field to 0 disables PMC x.
+	 * (Note that pmc is 0-based here, not 1-based.)
+	 */
+	if (pmc <= 1) {
+		mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
+	} else {
+		mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
+		if (pmc == 7)
+			mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
+	}
+}
+
+static int p4_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= 7,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x1001,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x8c10, /* PM_LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x3c10, /* PM_LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x330,  /* PM_BR_ISSUED */
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x331,  /* PM_BR_MPRED_CR */
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x8c10,		0x3c10	},
+		[C(OP_WRITE)] = {	0x7c10,		0xc13	},
+		[C(OP_PREFETCH)] = {	0xc35,		0	},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	0,		0	},
+	},
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0	},
+		[C(OP_WRITE)] = {	0,		0	},
+		[C(OP_PREFETCH)] = {	0xc34,		0	},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x904	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x900	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x330,		0x331	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+};
+
+struct power_pmu power4_pmu = {
+	.n_counter = 8,
+	.max_alternatives = 5,
+	.add_fields = 0x0000001100005555ull,
+	.test_adder = 0x0011083300000000ull,
+	.compute_mmcr = p4_compute_mmcr,
+	.get_constraint = p4_get_constraint,
+	.get_alternatives = p4_get_alternatives,
+	.disable_pmc = p4_disable_pmc,
+	.n_generic = ARRAY_SIZE(p4_generic_events),
+	.generic_events = p4_generic_events,
+	.cache_events = &power4_cache_events,
+};
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
new file mode 100644
index 0000000..41e5d2d
--- /dev/null
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -0,0 +1,671 @@
+/*
+ * Performance counter support for POWER5+/++ (not POWER5) processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
+ */
+#define PM_PMC_SH	20	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0xf
+#define PM_PMC_MSKS	(PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH	16	/* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK	0xf
+#define PM_BYTE_SH	12	/* Byte number of event bus to use */
+#define PM_BYTE_MSK	7
+#define PM_GRS_SH	8	/* Storage subsystem mux select */
+#define PM_GRS_MSK	7
+#define PM_BUSEVENT_MSK	0x80	/* Set if event uses event bus */
+#define PM_PMCSEL_MSK	0x7f
+
+/* Values in PM_UNIT field */
+#define PM_FPU		0
+#define PM_ISU0		1
+#define PM_IFU		2
+#define PM_ISU1		3
+#define PM_IDU		4
+#define PM_ISU0_ALT	6
+#define PM_GRS		7
+#define PM_LSU0		8
+#define PM_LSU1		0xc
+#define PM_LASTUNIT	0xc
+
+/*
+ * Bits in MMCR1 for POWER5+
+ */
+#define MMCR1_TTM0SEL_SH	62
+#define MMCR1_TTM1SEL_SH	60
+#define MMCR1_TTM2SEL_SH	58
+#define MMCR1_TTM3SEL_SH	56
+#define MMCR1_TTMSEL_MSK	3
+#define MMCR1_TD_CP_DBG0SEL_SH	54
+#define MMCR1_TD_CP_DBG1SEL_SH	52
+#define MMCR1_TD_CP_DBG2SEL_SH	50
+#define MMCR1_TD_CP_DBG3SEL_SH	48
+#define MMCR1_GRS_L2SEL_SH	46
+#define MMCR1_GRS_L2SEL_MSK	3
+#define MMCR1_GRS_L3SEL_SH	44
+#define MMCR1_GRS_L3SEL_MSK	3
+#define MMCR1_GRS_MCSEL_SH	41
+#define MMCR1_GRS_MCSEL_MSK	7
+#define MMCR1_GRS_FABSEL_SH	39
+#define MMCR1_GRS_FABSEL_MSK	3
+#define MMCR1_PMC1_ADDER_SEL_SH	35
+#define MMCR1_PMC2_ADDER_SEL_SH	34
+#define MMCR1_PMC3_ADDER_SEL_SH	33
+#define MMCR1_PMC4_ADDER_SEL_SH	32
+#define MMCR1_PMC1SEL_SH	25
+#define MMCR1_PMC2SEL_SH	17
+#define MMCR1_PMC3SEL_SH	9
+#define MMCR1_PMC4SEL_SH	1
+#define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK	0x7f
+
+/*
+ * Bits in MMCRA
+ */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ *             [  ><><>< ><> <><>[  >  <  ><  ><  ><  ><><><><><><>
+ *             NC  G0G1G2 G3 T0T1 UC    B0  B1  B2  B3 P6P5P4P3P2P1
+ *
+ * NC - number of counters
+ *     51: NC error 0x0008_0000_0000_0000
+ *     48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
+ *
+ * G0..G3 - GRS mux constraints
+ *     46-47: GRS_L2SEL value
+ *     44-45: GRS_L3SEL value
+ *     41-44: GRS_MCSEL value
+ *     39-40: GRS_FABSEL value
+ *	Note that these match up with their bit positions in MMCR1
+ *
+ * T0 - TTM0 constraint
+ *     36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000
+ *
+ * T1 - TTM1 constraint
+ *     34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000
+ *
+ * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
+ *     33: UC3 error 0x02_0000_0000
+ *     32: FPU|IFU|ISU1 events needed 0x01_0000_0000
+ *     31: ISU0 events needed 0x01_8000_0000
+ *     30: IDU|GRS events needed 0x00_4000_0000
+ *
+ * B0
+ *     24-27: Byte 0 event source 0x0f00_0000
+ *	      Encoding as for the event code
+ *
+ * B1, B2, B3
+ *     20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
+ *
+ * P6
+ *     11: P6 error 0x800
+ *     10-11: Count of events needing PMC6
+ *
+ * P1..P5
+ *     0-9: Count of events needing PMC1..PMC5
+ */
+
+static const int grsel_shift[8] = {
+	MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
+	MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
+	MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
+};
+
+/* Masks and values for using events from the various units */
+static u64 unit_cons[PM_LASTUNIT+1][2] = {
+	[PM_FPU] =   { 0x3200000000ull, 0x0100000000ull },
+	[PM_ISU0] =  { 0x0200000000ull, 0x0080000000ull },
+	[PM_ISU1] =  { 0x3200000000ull, 0x3100000000ull },
+	[PM_IFU] =   { 0x3200000000ull, 0x2100000000ull },
+	[PM_IDU] =   { 0x0e00000000ull, 0x0040000000ull },
+	[PM_GRS] =   { 0x0e00000000ull, 0x0c40000000ull },
+};
+
+static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp)
+{
+	int pmc, byte, unit, sh;
+	int bit, fmask;
+	u64 mask = 0, value = 0;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 6)
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+		if (pmc >= 5 && !(event == 0x500009 || event == 0x600005))
+			return -1;
+	}
+	if (event & PM_BUSEVENT_MSK) {
+		unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+		if (unit > PM_LASTUNIT)
+			return -1;
+		if (unit == PM_ISU0_ALT)
+			unit = PM_ISU0;
+		mask |= unit_cons[unit][0];
+		value |= unit_cons[unit][1];
+		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+		if (byte >= 4) {
+			if (unit != PM_LSU1)
+				return -1;
+			/* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
+			++unit;
+			byte &= 3;
+		}
+		if (unit == PM_GRS) {
+			bit = event & 7;
+			fmask = (bit == 6)? 7: 3;
+			sh = grsel_shift[bit];
+			mask |= (u64)fmask << sh;
+			value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
+		}
+		/* Set byte lane select field */
+		mask  |= 0xfULL << (24 - 4 * byte);
+		value |= (u64)unit << (24 - 4 * byte);
+	}
+	if (pmc < 5) {
+		/* need a counter from PMC1-4 set */
+		mask  |= 0x8000000000000ull;
+		value |= 0x1000000000000ull;
+	}
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+static int power5p_limited_pmc_event(u64 event)
+{
+	int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+
+	return pmc == 5 || pmc == 6;
+}
+
+#define MAX_ALT	3	/* at most 3 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+	{ 0x100c0,  0x40001f },			/* PM_GCT_FULL_CYC */
+	{ 0x120e4,  0x400002 },			/* PM_GRP_DISP_REJECT */
+	{ 0x230e2,  0x323087 },			/* PM_BR_PRED_CR */
+	{ 0x230e3,  0x223087, 0x3230a0 },	/* PM_BR_PRED_TA */
+	{ 0x410c7,  0x441084 },			/* PM_THRD_L2MISS_BOTH_CYC */
+	{ 0x800c4,  0xc20e0 },			/* PM_DTLB_MISS */
+	{ 0xc50c6,  0xc60e0 },			/* PM_MRK_DTLB_MISS */
+	{ 0x100005, 0x600005 },			/* PM_RUN_CYC */
+	{ 0x100009, 0x200009 },			/* PM_INST_CMPL */
+	{ 0x200015, 0x300015 },			/* PM_LSU_LMQ_SRQ_EMPTY_CYC */
+	{ 0x300009, 0x400009 },			/* PM_INST_DISP */
+};
+
+/*
+ * Scan the alternatives table for a match and return the
+ * index into the alternatives table if found, else -1.
+ */
+static int find_alternative(unsigned int event)
+{
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+		if (event < event_alternatives[i][0])
+			break;
+		for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
+			if (event == event_alternatives[i][j])
+				return i;
+	}
+	return -1;
+}
+
+static const unsigned char bytedecode_alternatives[4][4] = {
+	/* PMC 1 */	{ 0x21, 0x23, 0x25, 0x27 },
+	/* PMC 2 */	{ 0x07, 0x17, 0x0e, 0x1e },
+	/* PMC 3 */	{ 0x20, 0x22, 0x24, 0x26 },
+	/* PMC 4 */	{ 0x07, 0x17, 0x0e, 0x1e }
+};
+
+/*
+ * Some direct events for decodes of event bus byte 3 have alternative
+ * PMCSEL values on other counters.  This returns the alternative
+ * event code for those that do, or -1 otherwise.  This also handles
+ * alternative PCMSEL values for add events.
+ */
+static s64 find_alternative_bdecode(u64 event)
+{
+	int pmc, altpmc, pp, j;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc == 0 || pmc > 4)
+		return -1;
+	altpmc = 5 - pmc;	/* 1 <-> 4, 2 <-> 3 */
+	pp = event & PM_PMCSEL_MSK;
+	for (j = 0; j < 4; ++j) {
+		if (bytedecode_alternatives[pmc - 1][j] == pp) {
+			return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
+				(altpmc << PM_PMC_SH) |
+				bytedecode_alternatives[altpmc - 1][j];
+		}
+	}
+
+	/* new decode alternatives for power5+ */
+	if (pmc == 1 && (pp == 0x0d || pp == 0x0e))
+		return event + (2 << PM_PMC_SH) + (0x2e - 0x0d);
+	if (pmc == 3 && (pp == 0x2e || pp == 0x2f))
+		return event - (2 << PM_PMC_SH) - (0x2e - 0x0d);
+
+	/* alternative add event encodings */
+	if (pp == 0x10 || pp == 0x28)
+		return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) |
+			(altpmc << PM_PMC_SH);
+
+	return -1;
+}
+
+static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int i, j, nalt = 1;
+	int nlim;
+	s64 ae;
+
+	alt[0] = event;
+	nalt = 1;
+	nlim = power5p_limited_pmc_event(event);
+	i = find_alternative(event);
+	if (i >= 0) {
+		for (j = 0; j < MAX_ALT; ++j) {
+			ae = event_alternatives[i][j];
+			if (ae && ae != event)
+				alt[nalt++] = ae;
+			nlim += power5p_limited_pmc_event(ae);
+		}
+	} else {
+		ae = find_alternative_bdecode(event);
+		if (ae > 0)
+			alt[nalt++] = ae;
+	}
+
+	if (flags & PPMU_ONLY_COUNT_RUN) {
+		/*
+		 * We're only counting in RUN state,
+		 * so PM_CYC is equivalent to PM_RUN_CYC
+		 * and PM_INST_CMPL === PM_RUN_INST_CMPL.
+		 * This doesn't include alternatives that don't provide
+		 * any extra flexibility in assigning PMCs (e.g.
+		 * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC).
+		 * Note that even with these additional alternatives
+		 * we never end up with more than 3 alternatives for any event.
+		 */
+		j = nalt;
+		for (i = 0; i < nalt; ++i) {
+			switch (alt[i]) {
+			case 0xf:	/* PM_CYC */
+				alt[j++] = 0x600005;	/* PM_RUN_CYC */
+				++nlim;
+				break;
+			case 0x600005:	/* PM_RUN_CYC */
+				alt[j++] = 0xf;
+				break;
+			case 0x100009:	/* PM_INST_CMPL */
+				alt[j++] = 0x500009;	/* PM_RUN_INST_CMPL */
+				++nlim;
+				break;
+			case 0x500009:	/* PM_RUN_INST_CMPL */
+				alt[j++] = 0x100009;	/* PM_INST_CMPL */
+				alt[j++] = 0x200009;
+				break;
+			}
+		}
+		nalt = j;
+	}
+
+	if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
+		/* remove the limited PMC events */
+		j = 0;
+		for (i = 0; i < nalt; ++i) {
+			if (!power5p_limited_pmc_event(alt[i])) {
+				alt[j] = alt[i];
+				++j;
+			}
+		}
+		nalt = j;
+	} else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
+		/* remove all but the limited PMC events */
+		j = 0;
+		for (i = 0; i < nalt; ++i) {
+			if (power5p_limited_pmc_event(alt[i])) {
+				alt[j] = alt[i];
+				++j;
+			}
+		}
+		nalt = j;
+	}
+
+	return nalt;
+}
+
+/*
+ * Map of which direct events on which PMCs are marked instruction events.
+ * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
+ * Bit 0 is set if it is marked for all PMCs.
+ * The 0x80 bit indicates a byte decode PMCSEL value.
+ */
+static unsigned char direct_event_is_marked[0x28] = {
+	0,	/* 00 */
+	0x1f,	/* 01 PM_IOPS_CMPL */
+	0x2,	/* 02 PM_MRK_GRP_DISP */
+	0xe,	/* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
+	0,	/* 04 */
+	0x1c,	/* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
+	0x80,	/* 06 */
+	0x80,	/* 07 */
+	0, 0, 0,/* 08 - 0a */
+	0x18,	/* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
+	0,	/* 0c */
+	0x80,	/* 0d */
+	0x80,	/* 0e */
+	0,	/* 0f */
+	0,	/* 10 */
+	0x14,	/* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
+	0,	/* 12 */
+	0x10,	/* 13 PM_MRK_GRP_CMPL */
+	0x1f,	/* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
+	0x2,	/* 15 PM_MRK_GRP_ISSUED */
+	0x80,	/* 16 */
+	0x80,	/* 17 */
+	0, 0, 0, 0, 0,
+	0x80,	/* 1d */
+	0x80,	/* 1e */
+	0,	/* 1f */
+	0x80,	/* 20 */
+	0x80,	/* 21 */
+	0x80,	/* 22 */
+	0x80,	/* 23 */
+	0x80,	/* 24 */
+	0x80,	/* 25 */
+	0x80,	/* 26 */
+	0x80,	/* 27 */
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int power5p_marked_instr_event(u64 event)
+{
+	int pmc, psel;
+	int bit, byte, unit;
+	u32 mask;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = event & PM_PMCSEL_MSK;
+	if (pmc >= 5)
+		return 0;
+
+	bit = -1;
+	if (psel < sizeof(direct_event_is_marked)) {
+		if (direct_event_is_marked[psel] & (1 << pmc))
+			return 1;
+		if (direct_event_is_marked[psel] & 0x80)
+			bit = 4;
+		else if (psel == 0x08)
+			bit = pmc - 1;
+		else if (psel == 0x10)
+			bit = 4 - pmc;
+		else if (psel == 0x1b && (pmc == 1 || pmc == 3))
+			bit = 4;
+	} else if ((psel & 0x48) == 0x40) {
+		bit = psel & 7;
+	} else if (psel == 0x28) {
+		bit = pmc - 1;
+	} else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) {
+		bit = 4;
+	}
+
+	if (!(event & PM_BUSEVENT_MSK) || bit == -1)
+		return 0;
+
+	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	if (unit == PM_LSU0) {
+		/* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
+		mask = 0x5dff00;
+	} else if (unit == PM_LSU1 && byte >= 4) {
+		byte -= 4;
+		/* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */
+		mask = 0x5f11c000;
+	} else
+		return 0;
+
+	return (mask >> (byte * 8 + bit)) & 1;
+}
+
+static int power5p_compute_mmcr(u64 event[], int n_ev,
+				unsigned int hwc[], u64 mmcr[])
+{
+	u64 mmcr1 = 0;
+	u64 mmcra = 0;
+	unsigned int pmc, unit, byte, psel;
+	unsigned int ttm;
+	int i, isbus, bit, grsel;
+	unsigned int pmc_inuse = 0;
+	unsigned char busbyte[4];
+	unsigned char unituse[16];
+	int ttmuse;
+
+	if (n_ev > 6)
+		return -1;
+
+	/* First pass to count resource use */
+	memset(busbyte, 0, sizeof(busbyte));
+	memset(unituse, 0, sizeof(unituse));
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc > 6)
+				return -1;
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;
+			pmc_inuse |= 1 << (pmc - 1);
+		}
+		if (event[i] & PM_BUSEVENT_MSK) {
+			unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+			byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+			if (unit > PM_LASTUNIT)
+				return -1;
+			if (unit == PM_ISU0_ALT)
+				unit = PM_ISU0;
+			if (byte >= 4) {
+				if (unit != PM_LSU1)
+					return -1;
+				++unit;
+				byte &= 3;
+			}
+			if (busbyte[byte] && busbyte[byte] != unit)
+				return -1;
+			busbyte[byte] = unit;
+			unituse[unit] = 1;
+		}
+	}
+
+	/*
+	 * Assign resources and set multiplexer selects.
+	 *
+	 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
+	 * choice we have to deal with.
+	 */
+	if (unituse[PM_ISU0] &
+	    (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
+		unituse[PM_ISU0_ALT] = 1;	/* move ISU to TTM1 */
+		unituse[PM_ISU0] = 0;
+	}
+	/* Set TTM[01]SEL fields. */
+	ttmuse = 0;
+	for (i = PM_FPU; i <= PM_ISU1; ++i) {
+		if (!unituse[i])
+			continue;
+		if (ttmuse++)
+			return -1;
+		mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
+	}
+	ttmuse = 0;
+	for (; i <= PM_GRS; ++i) {
+		if (!unituse[i])
+			continue;
+		if (ttmuse++)
+			return -1;
+		mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
+	}
+	if (ttmuse > 1)
+		return -1;
+
+	/* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
+	for (byte = 0; byte < 4; ++byte) {
+		unit = busbyte[byte];
+		if (!unit)
+			continue;
+		if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
+			/* get ISU0 through TTM1 rather than TTM0 */
+			unit = PM_ISU0_ALT;
+		} else if (unit == PM_LSU1 + 1) {
+			/* select lower word of LSU1 for this byte */
+			mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
+		}
+		ttm = unit >> 2;
+		mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
+	}
+
+	/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+		psel = event[i] & PM_PMCSEL_MSK;
+		isbus = event[i] & PM_BUSEVENT_MSK;
+		if (!pmc) {
+			/* Bus event or any-PMC direct event */
+			for (pmc = 0; pmc < 4; ++pmc) {
+				if (!(pmc_inuse & (1 << pmc)))
+					break;
+			}
+			if (pmc >= 4)
+				return -1;
+			pmc_inuse |= 1 << pmc;
+		} else if (pmc <= 4) {
+			/* Direct event */
+			--pmc;
+			if (isbus && (byte & 2) &&
+			    (psel == 8 || psel == 0x10 || psel == 0x28))
+				/* add events on higher-numbered bus */
+				mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
+		} else {
+			/* Instructions or run cycles on PMC5/6 */
+			--pmc;
+		}
+		if (isbus && unit == PM_GRS) {
+			bit = psel & 7;
+			grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
+			mmcr1 |= (u64)grsel << grsel_shift[bit];
+		}
+		if (power5p_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
+		if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1))
+			/* select alternate byte lane */
+			psel |= 0x10;
+		if (pmc <= 3)
+			mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
+		hwc[i] = pmc;
+	}
+
+	/* Return MMCRx values */
+	mmcr[0] = 0;
+	if (pmc_inuse & 1)
+		mmcr[0] = MMCR0_PMC1CE;
+	if (pmc_inuse & 0x3e)
+		mmcr[0] |= MMCR0_PMCjCE;
+	mmcr[1] = mmcr1;
+	mmcr[2] = mmcra;
+	return 0;
+}
+
+static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+	if (pmc <= 3)
+		mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power5p_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0xf,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x100009,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x1c10a8, /* LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x3c1088, /* LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x230e4,  /* BR_ISSUED */
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x230e5,  /* BR_MPRED_CR */
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x1c10a8,	0x3c1088	},
+		[C(OP_WRITE)] = {	0x2c10a8,	0xc10c3		},
+		[C(OP_PREFETCH)] = {	0xc70e7,	-1		},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	0,		0		},
+	},
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0		},
+		[C(OP_WRITE)] = {	0,		0		},
+		[C(OP_PREFETCH)] = {	0xc50c3,	0		},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0xc20e4,	0x800c4		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x800c0		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x230e4,	0x230e5		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+};
+
+struct power_pmu power5p_pmu = {
+	.n_counter = 6,
+	.max_alternatives = MAX_ALT,
+	.add_fields = 0x7000000000055ull,
+	.test_adder = 0x3000040000000ull,
+	.compute_mmcr = power5p_compute_mmcr,
+	.get_constraint = power5p_get_constraint,
+	.get_alternatives = power5p_get_alternatives,
+	.disable_pmc = power5p_disable_pmc,
+	.limited_pmc_event = power5p_limited_pmc_event,
+	.flags = PPMU_LIMITED_PMC5_6,
+	.n_generic = ARRAY_SIZE(power5p_generic_events),
+	.generic_events = power5p_generic_events,
+	.cache_events = &power5p_cache_events,
+};
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
new file mode 100644
index 0000000..05600b6
--- /dev/null
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -0,0 +1,611 @@
+/*
+ * Performance counter support for POWER5 (not POWER5++) processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER5 (not POWER5++)
+ */
+#define PM_PMC_SH	20	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0xf
+#define PM_PMC_MSKS	(PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH	16	/* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK	0xf
+#define PM_BYTE_SH	12	/* Byte number of event bus to use */
+#define PM_BYTE_MSK	7
+#define PM_GRS_SH	8	/* Storage subsystem mux select */
+#define PM_GRS_MSK	7
+#define PM_BUSEVENT_MSK	0x80	/* Set if event uses event bus */
+#define PM_PMCSEL_MSK	0x7f
+
+/* Values in PM_UNIT field */
+#define PM_FPU		0
+#define PM_ISU0		1
+#define PM_IFU		2
+#define PM_ISU1		3
+#define PM_IDU		4
+#define PM_ISU0_ALT	6
+#define PM_GRS		7
+#define PM_LSU0		8
+#define PM_LSU1		0xc
+#define PM_LASTUNIT	0xc
+
+/*
+ * Bits in MMCR1 for POWER5
+ */
+#define MMCR1_TTM0SEL_SH	62
+#define MMCR1_TTM1SEL_SH	60
+#define MMCR1_TTM2SEL_SH	58
+#define MMCR1_TTM3SEL_SH	56
+#define MMCR1_TTMSEL_MSK	3
+#define MMCR1_TD_CP_DBG0SEL_SH	54
+#define MMCR1_TD_CP_DBG1SEL_SH	52
+#define MMCR1_TD_CP_DBG2SEL_SH	50
+#define MMCR1_TD_CP_DBG3SEL_SH	48
+#define MMCR1_GRS_L2SEL_SH	46
+#define MMCR1_GRS_L2SEL_MSK	3
+#define MMCR1_GRS_L3SEL_SH	44
+#define MMCR1_GRS_L3SEL_MSK	3
+#define MMCR1_GRS_MCSEL_SH	41
+#define MMCR1_GRS_MCSEL_MSK	7
+#define MMCR1_GRS_FABSEL_SH	39
+#define MMCR1_GRS_FABSEL_MSK	3
+#define MMCR1_PMC1_ADDER_SEL_SH	35
+#define MMCR1_PMC2_ADDER_SEL_SH	34
+#define MMCR1_PMC3_ADDER_SEL_SH	33
+#define MMCR1_PMC4_ADDER_SEL_SH	32
+#define MMCR1_PMC1SEL_SH	25
+#define MMCR1_PMC2SEL_SH	17
+#define MMCR1_PMC3SEL_SH	9
+#define MMCR1_PMC4SEL_SH	1
+#define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK	0x7f
+
+/*
+ * Bits in MMCRA
+ */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ *         <><>[  ><><>< ><> [  >[ >[ ><  ><  ><  ><  ><><><><><><>
+ *         T0T1 NC G0G1G2 G3  UC PS1PS2 B0  B1  B2  B3 P6P5P4P3P2P1
+ *
+ * T0 - TTM0 constraint
+ *     54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000
+ *
+ * T1 - TTM1 constraint
+ *     52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000
+ *
+ * NC - number of counters
+ *     51: NC error 0x0008_0000_0000_0000
+ *     48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
+ *
+ * G0..G3 - GRS mux constraints
+ *     46-47: GRS_L2SEL value
+ *     44-45: GRS_L3SEL value
+ *     41-44: GRS_MCSEL value
+ *     39-40: GRS_FABSEL value
+ *	Note that these match up with their bit positions in MMCR1
+ *
+ * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
+ *     37: UC3 error 0x20_0000_0000
+ *     36: FPU|IFU|ISU1 events needed 0x10_0000_0000
+ *     35: ISU0 events needed 0x08_0000_0000
+ *     34: IDU|GRS events needed 0x04_0000_0000
+ *
+ * PS1
+ *     33: PS1 error 0x2_0000_0000
+ *     31-32: count of events needing PMC1/2 0x1_8000_0000
+ *
+ * PS2
+ *     30: PS2 error 0x4000_0000
+ *     28-29: count of events needing PMC3/4 0x3000_0000
+ *
+ * B0
+ *     24-27: Byte 0 event source 0x0f00_0000
+ *	      Encoding as for the event code
+ *
+ * B1, B2, B3
+ *     20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
+ *
+ * P1..P6
+ *     0-11: Count of events needing PMC1..PMC6
+ */
+
+static const int grsel_shift[8] = {
+	MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
+	MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
+	MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
+};
+
+/* Masks and values for using events from the various units */
+static u64 unit_cons[PM_LASTUNIT+1][2] = {
+	[PM_FPU] =   { 0xc0002000000000ull, 0x00001000000000ull },
+	[PM_ISU0] =  { 0x00002000000000ull, 0x00000800000000ull },
+	[PM_ISU1] =  { 0xc0002000000000ull, 0xc0001000000000ull },
+	[PM_IFU] =   { 0xc0002000000000ull, 0x80001000000000ull },
+	[PM_IDU] =   { 0x30002000000000ull, 0x00000400000000ull },
+	[PM_GRS] =   { 0x30002000000000ull, 0x30000400000000ull },
+};
+
+static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp)
+{
+	int pmc, byte, unit, sh;
+	int bit, fmask;
+	u64 mask = 0, value = 0;
+	int grp = -1;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 6)
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+		if (pmc <= 4)
+			grp = (pmc - 1) >> 1;
+		else if (event != 0x500009 && event != 0x600005)
+			return -1;
+	}
+	if (event & PM_BUSEVENT_MSK) {
+		unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+		if (unit > PM_LASTUNIT)
+			return -1;
+		if (unit == PM_ISU0_ALT)
+			unit = PM_ISU0;
+		mask |= unit_cons[unit][0];
+		value |= unit_cons[unit][1];
+		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+		if (byte >= 4) {
+			if (unit != PM_LSU1)
+				return -1;
+			/* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
+			++unit;
+			byte &= 3;
+		}
+		if (unit == PM_GRS) {
+			bit = event & 7;
+			fmask = (bit == 6)? 7: 3;
+			sh = grsel_shift[bit];
+			mask |= (u64)fmask << sh;
+			value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
+		}
+		/*
+		 * Bus events on bytes 0 and 2 can be counted
+		 * on PMC1/2; bytes 1 and 3 on PMC3/4.
+		 */
+		if (!pmc)
+			grp = byte & 1;
+		/* Set byte lane select field */
+		mask  |= 0xfULL << (24 - 4 * byte);
+		value |= (u64)unit << (24 - 4 * byte);
+	}
+	if (grp == 0) {
+		/* increment PMC1/2 field */
+		mask  |= 0x200000000ull;
+		value |= 0x080000000ull;
+	} else if (grp == 1) {
+		/* increment PMC3/4 field */
+		mask  |= 0x40000000ull;
+		value |= 0x10000000ull;
+	}
+	if (pmc < 5) {
+		/* need a counter from PMC1-4 set */
+		mask  |= 0x8000000000000ull;
+		value |= 0x1000000000000ull;
+	}
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+#define MAX_ALT	3	/* at most 3 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+	{ 0x120e4,  0x400002 },			/* PM_GRP_DISP_REJECT */
+	{ 0x410c7,  0x441084 },			/* PM_THRD_L2MISS_BOTH_CYC */
+	{ 0x100005, 0x600005 },			/* PM_RUN_CYC */
+	{ 0x100009, 0x200009, 0x500009 },	/* PM_INST_CMPL */
+	{ 0x300009, 0x400009 },			/* PM_INST_DISP */
+};
+
+/*
+ * Scan the alternatives table for a match and return the
+ * index into the alternatives table if found, else -1.
+ */
+static int find_alternative(u64 event)
+{
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+		if (event < event_alternatives[i][0])
+			break;
+		for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
+			if (event == event_alternatives[i][j])
+				return i;
+	}
+	return -1;
+}
+
+static const unsigned char bytedecode_alternatives[4][4] = {
+	/* PMC 1 */	{ 0x21, 0x23, 0x25, 0x27 },
+	/* PMC 2 */	{ 0x07, 0x17, 0x0e, 0x1e },
+	/* PMC 3 */	{ 0x20, 0x22, 0x24, 0x26 },
+	/* PMC 4 */	{ 0x07, 0x17, 0x0e, 0x1e }
+};
+
+/*
+ * Some direct events for decodes of event bus byte 3 have alternative
+ * PMCSEL values on other counters.  This returns the alternative
+ * event code for those that do, or -1 otherwise.
+ */
+static s64 find_alternative_bdecode(u64 event)
+{
+	int pmc, altpmc, pp, j;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc == 0 || pmc > 4)
+		return -1;
+	altpmc = 5 - pmc;	/* 1 <-> 4, 2 <-> 3 */
+	pp = event & PM_PMCSEL_MSK;
+	for (j = 0; j < 4; ++j) {
+		if (bytedecode_alternatives[pmc - 1][j] == pp) {
+			return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
+				(altpmc << PM_PMC_SH) |
+				bytedecode_alternatives[altpmc - 1][j];
+		}
+	}
+	return -1;
+}
+
+static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int i, j, nalt = 1;
+	s64 ae;
+
+	alt[0] = event;
+	nalt = 1;
+	i = find_alternative(event);
+	if (i >= 0) {
+		for (j = 0; j < MAX_ALT; ++j) {
+			ae = event_alternatives[i][j];
+			if (ae && ae != event)
+				alt[nalt++] = ae;
+		}
+	} else {
+		ae = find_alternative_bdecode(event);
+		if (ae > 0)
+			alt[nalt++] = ae;
+	}
+	return nalt;
+}
+
+/*
+ * Map of which direct events on which PMCs are marked instruction events.
+ * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
+ * Bit 0 is set if it is marked for all PMCs.
+ * The 0x80 bit indicates a byte decode PMCSEL value.
+ */
+static unsigned char direct_event_is_marked[0x28] = {
+	0,	/* 00 */
+	0x1f,	/* 01 PM_IOPS_CMPL */
+	0x2,	/* 02 PM_MRK_GRP_DISP */
+	0xe,	/* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
+	0,	/* 04 */
+	0x1c,	/* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
+	0x80,	/* 06 */
+	0x80,	/* 07 */
+	0, 0, 0,/* 08 - 0a */
+	0x18,	/* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
+	0,	/* 0c */
+	0x80,	/* 0d */
+	0x80,	/* 0e */
+	0,	/* 0f */
+	0,	/* 10 */
+	0x14,	/* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
+	0,	/* 12 */
+	0x10,	/* 13 PM_MRK_GRP_CMPL */
+	0x1f,	/* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
+	0x2,	/* 15 PM_MRK_GRP_ISSUED */
+	0x80,	/* 16 */
+	0x80,	/* 17 */
+	0, 0, 0, 0, 0,
+	0x80,	/* 1d */
+	0x80,	/* 1e */
+	0,	/* 1f */
+	0x80,	/* 20 */
+	0x80,	/* 21 */
+	0x80,	/* 22 */
+	0x80,	/* 23 */
+	0x80,	/* 24 */
+	0x80,	/* 25 */
+	0x80,	/* 26 */
+	0x80,	/* 27 */
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int power5_marked_instr_event(u64 event)
+{
+	int pmc, psel;
+	int bit, byte, unit;
+	u32 mask;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = event & PM_PMCSEL_MSK;
+	if (pmc >= 5)
+		return 0;
+
+	bit = -1;
+	if (psel < sizeof(direct_event_is_marked)) {
+		if (direct_event_is_marked[psel] & (1 << pmc))
+			return 1;
+		if (direct_event_is_marked[psel] & 0x80)
+			bit = 4;
+		else if (psel == 0x08)
+			bit = pmc - 1;
+		else if (psel == 0x10)
+			bit = 4 - pmc;
+		else if (psel == 0x1b && (pmc == 1 || pmc == 3))
+			bit = 4;
+	} else if ((psel & 0x58) == 0x40)
+		bit = psel & 7;
+
+	if (!(event & PM_BUSEVENT_MSK))
+		return 0;
+
+	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	if (unit == PM_LSU0) {
+		/* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
+		mask = 0x5dff00;
+	} else if (unit == PM_LSU1 && byte >= 4) {
+		byte -= 4;
+		/* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */
+		mask = 0x5f00c0aa;
+	} else
+		return 0;
+
+	return (mask >> (byte * 8 + bit)) & 1;
+}
+
+static int power5_compute_mmcr(u64 event[], int n_ev,
+			       unsigned int hwc[], u64 mmcr[])
+{
+	u64 mmcr1 = 0;
+	u64 mmcra = 0;
+	unsigned int pmc, unit, byte, psel;
+	unsigned int ttm, grp;
+	int i, isbus, bit, grsel;
+	unsigned int pmc_inuse = 0;
+	unsigned int pmc_grp_use[2];
+	unsigned char busbyte[4];
+	unsigned char unituse[16];
+	int ttmuse;
+
+	if (n_ev > 6)
+		return -1;
+
+	/* First pass to count resource use */
+	pmc_grp_use[0] = pmc_grp_use[1] = 0;
+	memset(busbyte, 0, sizeof(busbyte));
+	memset(unituse, 0, sizeof(unituse));
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc > 6)
+				return -1;
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;
+			pmc_inuse |= 1 << (pmc - 1);
+			/* count 1/2 vs 3/4 use */
+			if (pmc <= 4)
+				++pmc_grp_use[(pmc - 1) >> 1];
+		}
+		if (event[i] & PM_BUSEVENT_MSK) {
+			unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+			byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+			if (unit > PM_LASTUNIT)
+				return -1;
+			if (unit == PM_ISU0_ALT)
+				unit = PM_ISU0;
+			if (byte >= 4) {
+				if (unit != PM_LSU1)
+					return -1;
+				++unit;
+				byte &= 3;
+			}
+			if (!pmc)
+				++pmc_grp_use[byte & 1];
+			if (busbyte[byte] && busbyte[byte] != unit)
+				return -1;
+			busbyte[byte] = unit;
+			unituse[unit] = 1;
+		}
+	}
+	if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
+		return -1;
+
+	/*
+	 * Assign resources and set multiplexer selects.
+	 *
+	 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
+	 * choice we have to deal with.
+	 */
+	if (unituse[PM_ISU0] &
+	    (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
+		unituse[PM_ISU0_ALT] = 1;	/* move ISU to TTM1 */
+		unituse[PM_ISU0] = 0;
+	}
+	/* Set TTM[01]SEL fields. */
+	ttmuse = 0;
+	for (i = PM_FPU; i <= PM_ISU1; ++i) {
+		if (!unituse[i])
+			continue;
+		if (ttmuse++)
+			return -1;
+		mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
+	}
+	ttmuse = 0;
+	for (; i <= PM_GRS; ++i) {
+		if (!unituse[i])
+			continue;
+		if (ttmuse++)
+			return -1;
+		mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
+	}
+	if (ttmuse > 1)
+		return -1;
+
+	/* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
+	for (byte = 0; byte < 4; ++byte) {
+		unit = busbyte[byte];
+		if (!unit)
+			continue;
+		if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
+			/* get ISU0 through TTM1 rather than TTM0 */
+			unit = PM_ISU0_ALT;
+		} else if (unit == PM_LSU1 + 1) {
+			/* select lower word of LSU1 for this byte */
+			mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
+		}
+		ttm = unit >> 2;
+		mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
+	}
+
+	/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+		psel = event[i] & PM_PMCSEL_MSK;
+		isbus = event[i] & PM_BUSEVENT_MSK;
+		if (!pmc) {
+			/* Bus event or any-PMC direct event */
+			for (pmc = 0; pmc < 4; ++pmc) {
+				if (pmc_inuse & (1 << pmc))
+					continue;
+				grp = (pmc >> 1) & 1;
+				if (isbus) {
+					if (grp == (byte & 1))
+						break;
+				} else if (pmc_grp_use[grp] < 2) {
+					++pmc_grp_use[grp];
+					break;
+				}
+			}
+			pmc_inuse |= 1 << pmc;
+		} else if (pmc <= 4) {
+			/* Direct event */
+			--pmc;
+			if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
+				/* add events on higher-numbered bus */
+				mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
+		} else {
+			/* Instructions or run cycles on PMC5/6 */
+			--pmc;
+		}
+		if (isbus && unit == PM_GRS) {
+			bit = psel & 7;
+			grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
+			mmcr1 |= (u64)grsel << grsel_shift[bit];
+		}
+		if (power5_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
+		if (pmc <= 3)
+			mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
+		hwc[i] = pmc;
+	}
+
+	/* Return MMCRx values */
+	mmcr[0] = 0;
+	if (pmc_inuse & 1)
+		mmcr[0] = MMCR0_PMC1CE;
+	if (pmc_inuse & 0x3e)
+		mmcr[0] |= MMCR0_PMCjCE;
+	mmcr[1] = mmcr1;
+	mmcr[2] = mmcra;
+	return 0;
+}
+
+static void power5_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+	if (pmc <= 3)
+		mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power5_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0xf,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x100009,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4c1090, /* LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x3c1088, /* LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x230e4,  /* BR_ISSUED */
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x230e5,  /* BR_MPRED_CR */
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x4c1090,	0x3c1088	},
+		[C(OP_WRITE)] = {	0x3c1090,	0xc10c3		},
+		[C(OP_PREFETCH)] = {	0xc70e7,	0		},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	0,		0		},
+	},
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x3c309b	},
+		[C(OP_WRITE)] = {	0,		0		},
+		[C(OP_PREFETCH)] = {	0xc50c3,	0		},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x2c4090,	0x800c4		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x800c0		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x230e4,	0x230e5		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+};
+
+struct power_pmu power5_pmu = {
+	.n_counter = 6,
+	.max_alternatives = MAX_ALT,
+	.add_fields = 0x7000090000555ull,
+	.test_adder = 0x3000490000000ull,
+	.compute_mmcr = power5_compute_mmcr,
+	.get_constraint = power5_get_constraint,
+	.get_alternatives = power5_get_alternatives,
+	.disable_pmc = power5_disable_pmc,
+	.n_generic = ARRAY_SIZE(power5_generic_events),
+	.generic_events = power5_generic_events,
+	.cache_events = &power5_cache_events,
+};
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
new file mode 100644
index 0000000..46f74be
--- /dev/null
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -0,0 +1,532 @@
+/*
+ * Performance counter support for POWER6 processors.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER6
+ */
+#define PM_PMC_SH	20	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0x7
+#define PM_PMC_MSKS	(PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH	16	/* Unit event comes (TTMxSEL encoding) */
+#define PM_UNIT_MSK	0xf
+#define PM_UNIT_MSKS	(PM_UNIT_MSK << PM_UNIT_SH)
+#define PM_LLAV		0x8000	/* Load lookahead match value */
+#define PM_LLA		0x4000	/* Load lookahead match enable */
+#define PM_BYTE_SH	12	/* Byte of event bus to use */
+#define PM_BYTE_MSK	3
+#define PM_SUBUNIT_SH	8	/* Subunit event comes from (NEST_SEL enc.) */
+#define PM_SUBUNIT_MSK	7
+#define PM_SUBUNIT_MSKS	(PM_SUBUNIT_MSK << PM_SUBUNIT_SH)
+#define PM_PMCSEL_MSK	0xff	/* PMCxSEL value */
+#define PM_BUSEVENT_MSK	0xf3700
+
+/*
+ * Bits in MMCR1 for POWER6
+ */
+#define MMCR1_TTM0SEL_SH	60
+#define MMCR1_TTMSEL_SH(n)	(MMCR1_TTM0SEL_SH - (n) * 4)
+#define MMCR1_TTMSEL_MSK	0xf
+#define MMCR1_TTMSEL(m, n)	(((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK)
+#define MMCR1_NESTSEL_SH	45
+#define MMCR1_NESTSEL_MSK	0x7
+#define MMCR1_NESTSEL(m)	(((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
+#define MMCR1_PMC1_LLA		((u64)1 << 44)
+#define MMCR1_PMC1_LLA_VALUE	((u64)1 << 39)
+#define MMCR1_PMC1_ADDR_SEL	((u64)1 << 35)
+#define MMCR1_PMC1SEL_SH	24
+#define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK	0xff
+
+/*
+ * Map of which direct events on which PMCs are marked instruction events.
+ * Indexed by PMCSEL value >> 1.
+ * Bottom 4 bits are a map of which PMCs are interesting,
+ * top 4 bits say what sort of event:
+ *   0 = direct marked event,
+ *   1 = byte decode event,
+ *   4 = add/and event (PMC1 -> bits 0 & 4),
+ *   5 = add/and event (PMC1 -> bits 1 & 5),
+ *   6 = add/and event (PMC1 -> bits 2 & 6),
+ *   7 = add/and event (PMC1 -> bits 3 & 7).
+ */
+static unsigned char direct_event_is_marked[0x60 >> 1] = {
+	0,	/* 00 */
+	0,	/* 02 */
+	0,	/* 04 */
+	0x07,	/* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
+	0x04,	/* 08 PM_MRK_DFU_FIN */
+	0x06,	/* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */
+	0,	/* 0c */
+	0,	/* 0e */
+	0x02,	/* 10 PM_MRK_INST_DISP */
+	0x08,	/* 12 PM_MRK_LSU_DERAT_MISS */
+	0,	/* 14 */
+	0,	/* 16 */
+	0x0c,	/* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */
+	0x0f,	/* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */
+	0x01,	/* 1c PM_MRK_INST_ISSUED */
+	0,	/* 1e */
+	0,	/* 20 */
+	0,	/* 22 */
+	0,	/* 24 */
+	0,	/* 26 */
+	0x15,	/* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */
+	0,	/* 2a */
+	0,	/* 2c */
+	0,	/* 2e */
+	0x4f,	/* 30 */
+	0x7f,	/* 32 */
+	0x4f,	/* 34 */
+	0x5f,	/* 36 */
+	0x6f,	/* 38 */
+	0x4f,	/* 3a */
+	0,	/* 3c */
+	0x08,	/* 3e PM_MRK_INST_TIMEO */
+	0x1f,	/* 40 */
+	0x1f,	/* 42 */
+	0x1f,	/* 44 */
+	0x1f,	/* 46 */
+	0x1f,	/* 48 */
+	0x1f,	/* 4a */
+	0x1f,	/* 4c */
+	0x1f,	/* 4e */
+	0,	/* 50 */
+	0x05,	/* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */
+	0x1c,	/* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */
+	0x02,	/* 56 PM_MRK_LD_MISS_L1 */
+	0,	/* 58 */
+	0,	/* 5a */
+	0,	/* 5c */
+	0,	/* 5e */
+};
+
+/*
+ * Masks showing for each unit which bits are marked events.
+ * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0.
+ */
+static u32 marked_bus_events[16] = {
+	0x01000000,	/* direct events set 1: byte 3 bit 0 */
+	0x00010000,	/* direct events set 2: byte 2 bit 0 */
+	0, 0, 0, 0,	/* IDU, IFU, nest: nothing */
+	0x00000088,	/* VMX set 1: byte 0 bits 3, 7 */
+	0x000000c0,	/* VMX set 2: byte 0 bits 4-7 */
+	0x04010000,	/* LSU set 1: byte 2 bit 0, byte 3 bit 2 */
+	0xff010000u,	/* LSU set 2: byte 2 bit 0, all of byte 3 */
+	0,		/* LSU set 3 */
+	0x00000010,	/* VMX set 3: byte 0 bit 4 */
+	0,		/* BFP set 1 */
+	0x00000022,	/* BFP set 2: byte 0 bits 1, 5 */
+	0, 0
+};
+	
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int power6_marked_instr_event(u64 event)
+{
+	int pmc, psel, ptype;
+	int bit, byte, unit;
+	u32 mask;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = (event & PM_PMCSEL_MSK) >> 1;	/* drop edge/level bit */
+	if (pmc >= 5)
+		return 0;
+
+	bit = -1;
+	if (psel < sizeof(direct_event_is_marked)) {
+		ptype = direct_event_is_marked[psel];
+		if (pmc == 0 || !(ptype & (1 << (pmc - 1))))
+			return 0;
+		ptype >>= 4;
+		if (ptype == 0)
+			return 1;
+		if (ptype == 1)
+			bit = 0;
+		else
+			bit = ptype ^ (pmc - 1);
+	} else if ((psel & 0x48) == 0x40)
+		bit = psel & 7;
+
+	if (!(event & PM_BUSEVENT_MSK) || bit == -1)
+		return 0;
+
+	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	mask = marked_bus_events[unit];
+	return (mask >> (byte * 8 + bit)) & 1;
+}
+
+/*
+ * Assign PMC numbers and compute MMCR1 value for a set of events
+ */
+static int p6_compute_mmcr(u64 event[], int n_ev,
+			   unsigned int hwc[], u64 mmcr[])
+{
+	u64 mmcr1 = 0;
+	u64 mmcra = 0;
+	int i;
+	unsigned int pmc, ev, b, u, s, psel;
+	unsigned int ttmset = 0;
+	unsigned int pmc_inuse = 0;
+
+	if (n_ev > 6)
+		return -1;
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;	/* collision! */
+			pmc_inuse |= 1 << (pmc - 1);
+		}
+	}
+	for (i = 0; i < n_ev; ++i) {
+		ev = event[i];
+		pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			--pmc;
+		} else {
+			/* can go on any PMC; find a free one */
+			for (pmc = 0; pmc < 4; ++pmc)
+				if (!(pmc_inuse & (1 << pmc)))
+					break;
+			if (pmc >= 4)
+				return -1;
+			pmc_inuse |= 1 << pmc;
+		}
+		hwc[i] = pmc;
+		psel = ev & PM_PMCSEL_MSK;
+		if (ev & PM_BUSEVENT_MSK) {
+			/* this event uses the event bus */
+			b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK;
+			u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK;
+			/* check for conflict on this byte of event bus */
+			if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
+				return -1;
+			mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b);
+			ttmset |= 1 << b;
+			if (u == 5) {
+				/* Nest events have a further mux */
+				s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
+				if ((ttmset & 0x10) &&
+				    MMCR1_NESTSEL(mmcr1) != s)
+					return -1;
+				ttmset |= 0x10;
+				mmcr1 |= (u64)s << MMCR1_NESTSEL_SH;
+			}
+			if (0x30 <= psel && psel <= 0x3d) {
+				/* these need the PMCx_ADDR_SEL bits */
+				if (b >= 2)
+					mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc;
+			}
+			/* bus select values are different for PMC3/4 */
+			if (pmc >= 2 && (psel & 0x90) == 0x80)
+				psel ^= 0x20;
+		}
+		if (ev & PM_LLA) {
+			mmcr1 |= MMCR1_PMC1_LLA >> pmc;
+			if (ev & PM_LLAV)
+				mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc;
+		}
+		if (power6_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
+		if (pmc < 4)
+			mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
+	}
+	mmcr[0] = 0;
+	if (pmc_inuse & 1)
+		mmcr[0] = MMCR0_PMC1CE;
+	if (pmc_inuse & 0xe)
+		mmcr[0] |= MMCR0_PMCjCE;
+	mmcr[1] = mmcr1;
+	mmcr[2] = mmcra;
+	return 0;
+}
+
+/*
+ * Layout of constraint bits:
+ *
+ *	0-1	add field: number of uses of PMC1 (max 1)
+ *	2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6
+ *	12-15	add field: number of uses of PMC1-4 (max 4)
+ *	16-19	select field: unit on byte 0 of event bus
+ *	20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
+ *	32-34	select field: nest (subunit) event selector
+ */
+static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp)
+{
+	int pmc, byte, sh, subunit;
+	u64 mask = 0, value = 0;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 4 && !(event == 0x500009 || event == 0x600005))
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+	}
+	if (event & PM_BUSEVENT_MSK) {
+		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+		sh = byte * 4 + (16 - PM_UNIT_SH);
+		mask |= PM_UNIT_MSKS << sh;
+		value |= (u64)(event & PM_UNIT_MSKS) << sh;
+		if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
+			subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
+			mask  |= (u64)PM_SUBUNIT_MSK << 32;
+			value |= (u64)subunit << 32;
+		}
+	}
+	if (pmc <= 4) {
+		mask  |= 0x8000;	/* add field for count of PMC1-4 uses */
+		value |= 0x1000;
+	}
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+static int p6_limited_pmc_event(u64 event)
+{
+	int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+
+	return pmc == 5 || pmc == 6;
+}
+
+#define MAX_ALT	4	/* at most 4 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+	{ 0x0130e8, 0x2000f6, 0x3000fc },	/* PM_PTEG_RELOAD_VALID */
+	{ 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
+	{ 0x080088, 0x200054, 0x3000f0 },	/* PM_ST_MISS_L1 */
+	{ 0x10000a, 0x2000f4, 0x600005 },	/* PM_RUN_CYC */
+	{ 0x10000b, 0x2000f5 },			/* PM_RUN_COUNT */
+	{ 0x10000e, 0x400010 },			/* PM_PURR */
+	{ 0x100010, 0x4000f8 },			/* PM_FLUSH */
+	{ 0x10001a, 0x200010 },			/* PM_MRK_INST_DISP */
+	{ 0x100026, 0x3000f8 },			/* PM_TB_BIT_TRANS */
+	{ 0x100054, 0x2000f0 },			/* PM_ST_FIN */
+	{ 0x100056, 0x2000fc },			/* PM_L1_ICACHE_MISS */
+	{ 0x1000f0, 0x40000a },			/* PM_INST_IMC_MATCH_CMPL */
+	{ 0x1000f8, 0x200008 },			/* PM_GCT_EMPTY_CYC */
+	{ 0x1000fc, 0x400006 },			/* PM_LSU_DERAT_MISS_CYC */
+	{ 0x20000e, 0x400007 },			/* PM_LSU_DERAT_MISS */
+	{ 0x200012, 0x300012 },			/* PM_INST_DISP */
+	{ 0x2000f2, 0x3000f2 },			/* PM_INST_DISP */
+	{ 0x2000f8, 0x300010 },			/* PM_EXT_INT */
+	{ 0x2000fe, 0x300056 },			/* PM_DATA_FROM_L2MISS */
+	{ 0x2d0030, 0x30001a },			/* PM_MRK_FPU_FIN */
+	{ 0x30000a, 0x400018 },			/* PM_MRK_INST_FIN */
+	{ 0x3000f6, 0x40000e },			/* PM_L1_DCACHE_RELOAD_VALID */
+	{ 0x3000fe, 0x400056 },			/* PM_DATA_FROM_L3MISS */
+};
+
+/*
+ * This could be made more efficient with a binary search on
+ * a presorted list, if necessary
+ */
+static int find_alternatives_list(u64 event)
+{
+	int i, j;
+	unsigned int alt;
+
+	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+		if (event < event_alternatives[i][0])
+			return -1;
+		for (j = 0; j < MAX_ALT; ++j) {
+			alt = event_alternatives[i][j];
+			if (!alt || event < alt)
+				break;
+			if (event == alt)
+				return i;
+		}
+	}
+	return -1;
+}
+
+static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int i, j, nlim;
+	unsigned int psel, pmc;
+	unsigned int nalt = 1;
+	u64 aevent;
+
+	alt[0] = event;
+	nlim = p6_limited_pmc_event(event);
+
+	/* check the alternatives table */
+	i = find_alternatives_list(event);
+	if (i >= 0) {
+		/* copy out alternatives from list */
+		for (j = 0; j < MAX_ALT; ++j) {
+			aevent = event_alternatives[i][j];
+			if (!aevent)
+				break;
+			if (aevent != event)
+				alt[nalt++] = aevent;
+			nlim += p6_limited_pmc_event(aevent);
+		}
+
+	} else {
+		/* Check for alternative ways of computing sum events */
+		/* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */
+		psel = event & (PM_PMCSEL_MSK & ~1);	/* ignore edge bit */
+		pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc && (psel == 0x32 || psel == 0x34))
+			alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) |
+				((5 - pmc) << PM_PMC_SH);
+
+		/* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */
+		if (pmc && (psel == 0x38 || psel == 0x3a))
+			alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) |
+				((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
+	}
+
+	if (flags & PPMU_ONLY_COUNT_RUN) {
+		/*
+		 * We're only counting in RUN state,
+		 * so PM_CYC is equivalent to PM_RUN_CYC,
+		 * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR.
+		 * This doesn't include alternatives that don't provide
+		 * any extra flexibility in assigning PMCs (e.g.
+		 * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC).
+		 * Note that even with these additional alternatives
+		 * we never end up with more than 4 alternatives for any event.
+		 */
+		j = nalt;
+		for (i = 0; i < nalt; ++i) {
+			switch (alt[i]) {
+			case 0x1e:	/* PM_CYC */
+				alt[j++] = 0x600005;	/* PM_RUN_CYC */
+				++nlim;
+				break;
+			case 0x10000a:	/* PM_RUN_CYC */
+				alt[j++] = 0x1e;	/* PM_CYC */
+				break;
+			case 2:		/* PM_INST_CMPL */
+				alt[j++] = 0x500009;	/* PM_RUN_INST_CMPL */
+				++nlim;
+				break;
+			case 0x500009:	/* PM_RUN_INST_CMPL */
+				alt[j++] = 2;		/* PM_INST_CMPL */
+				break;
+			case 0x10000e:	/* PM_PURR */
+				alt[j++] = 0x4000f4;	/* PM_RUN_PURR */
+				break;
+			case 0x4000f4:	/* PM_RUN_PURR */
+				alt[j++] = 0x10000e;	/* PM_PURR */
+				break;
+			}
+		}
+		nalt = j;
+	}
+
+	if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
+		/* remove the limited PMC events */
+		j = 0;
+		for (i = 0; i < nalt; ++i) {
+			if (!p6_limited_pmc_event(alt[i])) {
+				alt[j] = alt[i];
+				++j;
+			}
+		}
+		nalt = j;
+	} else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
+		/* remove all but the limited PMC events */
+		j = 0;
+		for (i = 0; i < nalt; ++i) {
+			if (p6_limited_pmc_event(alt[i])) {
+				alt[j] = alt[i];
+				++j;
+			}
+		}
+		nalt = j;
+	}
+
+	return nalt;
+}
+
+static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+	/* Set PMCxSEL to 0 to disable PMCx */
+	if (pmc <= 3)
+		mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power6_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0x1e,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 2,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x280030, /* LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x30000c, /* LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x410a0,  /* BR_PRED */
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x400052, /* BR_MPRED */
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ * The "DTLB" and "ITLB" events relate to the DERAT and IERAT.
+ */
+static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x80082,	0x80080		},
+		[C(OP_WRITE)] = {	0x80086,	0x80088		},
+		[C(OP_PREFETCH)] = {	0x810a4,	0		},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x100056 	},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	0x4008c,	0		},
+	},
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x150730,	0x250532	},
+		[C(OP_WRITE)] = {	0x250432,	0x150432	},
+		[C(OP_PREFETCH)] = {	0x810a6,	0		},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x20000e	},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x420ce		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x430e6,	0x400052	},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+};
+
+struct power_pmu power6_pmu = {
+	.n_counter = 6,
+	.max_alternatives = MAX_ALT,
+	.add_fields = 0x1555,
+	.test_adder = 0x3000,
+	.compute_mmcr = p6_compute_mmcr,
+	.get_constraint = p6_get_constraint,
+	.get_alternatives = p6_get_alternatives,
+	.disable_pmc = p6_disable_pmc,
+	.limited_pmc_event = p6_limited_pmc_event,
+	.flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR,
+	.n_generic = ARRAY_SIZE(power6_generic_events),
+	.generic_events = power6_generic_events,
+	.cache_events = &power6_cache_events,
+};
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
new file mode 100644
index 0000000..b72e7a1
--- /dev/null
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -0,0 +1,357 @@
+/*
+ * Performance counter support for POWER7 processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER7
+ */
+#define PM_PMC_SH	16	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0xf
+#define PM_PMC_MSKS	(PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH	12	/* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK	0xf
+#define PM_COMBINE_SH	11	/* Combined event bit */
+#define PM_COMBINE_MSK	1
+#define PM_COMBINE_MSKS	0x800
+#define PM_L2SEL_SH	8	/* L2 event select */
+#define PM_L2SEL_MSK	7
+#define PM_PMCSEL_MSK	0xff
+
+/*
+ * Bits in MMCR1 for POWER7
+ */
+#define MMCR1_TTM0SEL_SH	60
+#define MMCR1_TTM1SEL_SH	56
+#define MMCR1_TTM2SEL_SH	52
+#define MMCR1_TTM3SEL_SH	48
+#define MMCR1_TTMSEL_MSK	0xf
+#define MMCR1_L2SEL_SH		45
+#define MMCR1_L2SEL_MSK		7
+#define MMCR1_PMC1_COMBINE_SH	35
+#define MMCR1_PMC2_COMBINE_SH	34
+#define MMCR1_PMC3_COMBINE_SH	33
+#define MMCR1_PMC4_COMBINE_SH	32
+#define MMCR1_PMC1SEL_SH	24
+#define MMCR1_PMC2SEL_SH	16
+#define MMCR1_PMC3SEL_SH	8
+#define MMCR1_PMC4SEL_SH	0
+#define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK	0xff
+
+/*
+ * Bits in MMCRA
+ */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ *                                                 [  ><><><><><><>
+ *                                                  NC P6P5P4P3P2P1
+ *
+ * NC - number of counters
+ *     15: NC error 0x8000
+ *     12-14: number of events needing PMC1-4 0x7000
+ *
+ * P6
+ *     11: P6 error 0x800
+ *     10-11: Count of events needing PMC6
+ *
+ * P1..P5
+ *     0-9: Count of events needing PMC1..PMC5
+ */
+
+static int power7_get_constraint(u64 event, u64 *maskp, u64 *valp)
+{
+	int pmc, sh;
+	u64 mask = 0, value = 0;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 6)
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+		if (pmc >= 5 && !(event == 0x500fa || event == 0x600f4))
+			return -1;
+	}
+	if (pmc < 5) {
+		/* need a counter from PMC1-4 set */
+		mask  |= 0x8000;
+		value |= 0x1000;
+	}
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+#define MAX_ALT	2	/* at most 2 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+	{ 0x200f2, 0x300f2 },		/* PM_INST_DISP */
+	{ 0x200f4, 0x600f4 },		/* PM_RUN_CYC */
+	{ 0x400fa, 0x500fa },		/* PM_RUN_INST_CMPL */
+};
+
+/*
+ * Scan the alternatives table for a match and return the
+ * index into the alternatives table if found, else -1.
+ */
+static int find_alternative(u64 event)
+{
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+		if (event < event_alternatives[i][0])
+			break;
+		for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
+			if (event == event_alternatives[i][j])
+				return i;
+	}
+	return -1;
+}
+
+static s64 find_alternative_decode(u64 event)
+{
+	int pmc, psel;
+
+	/* this only handles the 4x decode events */
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = event & PM_PMCSEL_MSK;
+	if ((pmc == 2 || pmc == 4) && (psel & ~7) == 0x40)
+		return event - (1 << PM_PMC_SH) + 8;
+	if ((pmc == 1 || pmc == 3) && (psel & ~7) == 0x48)
+		return event + (1 << PM_PMC_SH) - 8;
+	return -1;
+}
+
+static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int i, j, nalt = 1;
+	s64 ae;
+
+	alt[0] = event;
+	nalt = 1;
+	i = find_alternative(event);
+	if (i >= 0) {
+		for (j = 0; j < MAX_ALT; ++j) {
+			ae = event_alternatives[i][j];
+			if (ae && ae != event)
+				alt[nalt++] = ae;
+		}
+	} else {
+		ae = find_alternative_decode(event);
+		if (ae > 0)
+			alt[nalt++] = ae;
+	}
+
+	if (flags & PPMU_ONLY_COUNT_RUN) {
+		/*
+		 * We're only counting in RUN state,
+		 * so PM_CYC is equivalent to PM_RUN_CYC
+		 * and PM_INST_CMPL === PM_RUN_INST_CMPL.
+		 * This doesn't include alternatives that don't provide
+		 * any extra flexibility in assigning PMCs.
+		 */
+		j = nalt;
+		for (i = 0; i < nalt; ++i) {
+			switch (alt[i]) {
+			case 0x1e:	/* PM_CYC */
+				alt[j++] = 0x600f4;	/* PM_RUN_CYC */
+				break;
+			case 0x600f4:	/* PM_RUN_CYC */
+				alt[j++] = 0x1e;
+				break;
+			case 0x2:	/* PM_PPC_CMPL */
+				alt[j++] = 0x500fa;	/* PM_RUN_INST_CMPL */
+				break;
+			case 0x500fa:	/* PM_RUN_INST_CMPL */
+				alt[j++] = 0x2;	/* PM_PPC_CMPL */
+				break;
+			}
+		}
+		nalt = j;
+	}
+
+	return nalt;
+}
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int power7_marked_instr_event(u64 event)
+{
+	int pmc, psel;
+	int unit;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	psel = event & PM_PMCSEL_MSK & ~1;	/* trim off edge/level bit */
+	if (pmc >= 5)
+		return 0;
+
+	switch (psel >> 4) {
+	case 2:
+		return pmc == 2 || pmc == 4;
+	case 3:
+		if (psel == 0x3c)
+			return pmc == 1;
+		if (psel == 0x3e)
+			return pmc != 2;
+		return 1;
+	case 4:
+	case 5:
+		return unit == 0xd;
+	case 6:
+		if (psel == 0x64)
+			return pmc >= 3;
+	case 8:
+		return unit == 0xd;
+	}
+	return 0;
+}
+
+static int power7_compute_mmcr(u64 event[], int n_ev,
+			       unsigned int hwc[], u64 mmcr[])
+{
+	u64 mmcr1 = 0;
+	u64 mmcra = 0;
+	unsigned int pmc, unit, combine, l2sel, psel;
+	unsigned int pmc_inuse = 0;
+	int i;
+
+	/* First pass to count resource use */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc > 6)
+				return -1;
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;
+			pmc_inuse |= 1 << (pmc - 1);
+		}
+	}
+
+	/* Second pass: assign PMCs, set all MMCR1 fields */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		combine = (event[i] >> PM_COMBINE_SH) & PM_COMBINE_MSK;
+		l2sel = (event[i] >> PM_L2SEL_SH) & PM_L2SEL_MSK;
+		psel = event[i] & PM_PMCSEL_MSK;
+		if (!pmc) {
+			/* Bus event or any-PMC direct event */
+			for (pmc = 0; pmc < 4; ++pmc) {
+				if (!(pmc_inuse & (1 << pmc)))
+					break;
+			}
+			if (pmc >= 4)
+				return -1;
+			pmc_inuse |= 1 << pmc;
+		} else {
+			/* Direct or decoded event */
+			--pmc;
+		}
+		if (pmc <= 3) {
+			mmcr1 |= (u64) unit << (MMCR1_TTM0SEL_SH - 4 * pmc);
+			mmcr1 |= (u64) combine << (MMCR1_PMC1_COMBINE_SH - pmc);
+			mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
+			if (unit == 6)	/* L2 events */
+				mmcr1 |= (u64) l2sel << MMCR1_L2SEL_SH;
+		}
+		if (power7_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
+		hwc[i] = pmc;
+	}
+
+	/* Return MMCRx values */
+	mmcr[0] = 0;
+	if (pmc_inuse & 1)
+		mmcr[0] = MMCR0_PMC1CE;
+	if (pmc_inuse & 0x3e)
+		mmcr[0] |= MMCR0_PMCjCE;
+	mmcr[1] = mmcr1;
+	mmcr[2] = mmcra;
+	return 0;
+}
+
+static void power7_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+	if (pmc <= 3)
+		mmcr[1] &= ~(0xffULL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power7_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
+	[PERF_COUNT_HW_INSTRUCTIONS] = 2,
+	[PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880,	/* LD_REF_L1_LSU*/
+	[PERF_COUNT_HW_CACHE_MISSES] = 0x400f0,		/* LD_MISS_L1	*/
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x10068,	/* BRU_FIN	*/
+	[PERF_COUNT_HW_BRANCH_MISSES] = 0x400f6,	/* BR_MPRED	*/
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x400f0,	0xc880	},
+		[C(OP_WRITE)] = {	0,		0x300f0	},
+		[C(OP_PREFETCH)] = {	0xd8b8,		0	},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x200fc	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	0x408a,		0	},
+	},
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x6080,		0x6084	},
+		[C(OP_WRITE)] = {	0x6082,		0x6086	},
+		[C(OP_PREFETCH)] = {	0,		0	},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x300fc	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x400fc	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x10068,	0x400f6	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+};
+
+struct power_pmu power7_pmu = {
+	.n_counter = 6,
+	.max_alternatives = MAX_ALT + 1,
+	.add_fields = 0x1555ull,
+	.test_adder = 0x3000ull,
+	.compute_mmcr = power7_compute_mmcr,
+	.get_constraint = power7_get_constraint,
+	.get_alternatives = power7_get_alternatives,
+	.disable_pmc = power7_disable_pmc,
+	.n_generic = ARRAY_SIZE(power7_generic_events),
+	.generic_events = power7_generic_events,
+	.cache_events = &power7_cache_events,
+};
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
new file mode 100644
index 0000000..ba0a357
--- /dev/null
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -0,0 +1,482 @@
+/*
+ * Performance counter support for PPC970-family processors.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/string.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for PPC970
+ */
+#define PM_PMC_SH	12	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0xf
+#define PM_UNIT_SH	8	/* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK	0xf
+#define PM_SPCSEL_SH	6
+#define PM_SPCSEL_MSK	3
+#define PM_BYTE_SH	4	/* Byte number of event bus to use */
+#define PM_BYTE_MSK	3
+#define PM_PMCSEL_MSK	0xf
+
+/* Values in PM_UNIT field */
+#define PM_NONE		0
+#define PM_FPU		1
+#define PM_VPU		2
+#define PM_ISU		3
+#define PM_IFU		4
+#define PM_IDU		5
+#define PM_STS		6
+#define PM_LSU0		7
+#define PM_LSU1U	8
+#define PM_LSU1L	9
+#define PM_LASTUNIT	9
+
+/*
+ * Bits in MMCR0 for PPC970
+ */
+#define MMCR0_PMC1SEL_SH	8
+#define MMCR0_PMC2SEL_SH	1
+#define MMCR_PMCSEL_MSK		0x1f
+
+/*
+ * Bits in MMCR1 for PPC970
+ */
+#define MMCR1_TTM0SEL_SH	62
+#define MMCR1_TTM1SEL_SH	59
+#define MMCR1_TTM3SEL_SH	53
+#define MMCR1_TTMSEL_MSK	3
+#define MMCR1_TD_CP_DBG0SEL_SH	50
+#define MMCR1_TD_CP_DBG1SEL_SH	48
+#define MMCR1_TD_CP_DBG2SEL_SH	46
+#define MMCR1_TD_CP_DBG3SEL_SH	44
+#define MMCR1_PMC1_ADDER_SEL_SH	39
+#define MMCR1_PMC2_ADDER_SEL_SH	38
+#define MMCR1_PMC6_ADDER_SEL_SH	37
+#define MMCR1_PMC5_ADDER_SEL_SH	36
+#define MMCR1_PMC8_ADDER_SEL_SH	35
+#define MMCR1_PMC7_ADDER_SEL_SH	34
+#define MMCR1_PMC3_ADDER_SEL_SH	33
+#define MMCR1_PMC4_ADDER_SEL_SH	32
+#define MMCR1_PMC3SEL_SH	27
+#define MMCR1_PMC4SEL_SH	22
+#define MMCR1_PMC5SEL_SH	17
+#define MMCR1_PMC6SEL_SH	12
+#define MMCR1_PMC7SEL_SH	7
+#define MMCR1_PMC8SEL_SH	2
+
+static short mmcr1_adder_bits[8] = {
+	MMCR1_PMC1_ADDER_SEL_SH,
+	MMCR1_PMC2_ADDER_SEL_SH,
+	MMCR1_PMC3_ADDER_SEL_SH,
+	MMCR1_PMC4_ADDER_SEL_SH,
+	MMCR1_PMC5_ADDER_SEL_SH,
+	MMCR1_PMC6_ADDER_SEL_SH,
+	MMCR1_PMC7_ADDER_SEL_SH,
+	MMCR1_PMC8_ADDER_SEL_SH
+};
+
+/*
+ * Bits in MMCRA
+ */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ *               <><><>[  >[  >[  ><  ><  ><  ><  ><><><><><><><><>
+ *               SPT0T1 UC  PS1 PS2 B0  B1  B2  B3 P1P2P3P4P5P6P7P8
+ *
+ * SP - SPCSEL constraint
+ *     48-49: SPCSEL value 0x3_0000_0000_0000
+ *
+ * T0 - TTM0 constraint
+ *     46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000
+ *
+ * T1 - TTM1 constraint
+ *     44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000
+ *
+ * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS
+ *     43: UC3 error 0x0800_0000_0000
+ *     42: FPU|IFU|VPU events needed 0x0400_0000_0000
+ *     41: ISU events needed 0x0200_0000_0000
+ *     40: IDU|STS events needed 0x0100_0000_0000
+ *
+ * PS1
+ *     39: PS1 error 0x0080_0000_0000
+ *     36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
+ *
+ * PS2
+ *     35: PS2 error 0x0008_0000_0000
+ *     32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
+ *
+ * B0
+ *     28-31: Byte 0 event source 0xf000_0000
+ *	      Encoding as for the event code
+ *
+ * B1, B2, B3
+ *     24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
+ *
+ * P1
+ *     15: P1 error 0x8000
+ *     14-15: Count of events needing PMC1
+ *
+ * P2..P8
+ *     0-13: Count of events needing PMC2..PMC8
+ */
+
+static unsigned char direct_marked_event[8] = {
+	(1<<2) | (1<<3),	/* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
+	(1<<3) | (1<<5),	/* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
+	(1<<3) | (1<<5),	/* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */
+	(1<<4) | (1<<5),	/* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
+	(1<<4) | (1<<5),	/* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */
+	(1<<3) | (1<<4) | (1<<5),
+		/* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
+	(1<<4) | (1<<5),	/* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
+	(1<<4)			/* PMC8: PM_MRK_LSU_FIN */
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int p970_marked_instr_event(u64 event)
+{
+	int pmc, psel, unit, byte, bit;
+	unsigned int mask;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = event & PM_PMCSEL_MSK;
+	if (pmc) {
+		if (direct_marked_event[pmc - 1] & (1 << psel))
+			return 1;
+		if (psel == 0)		/* add events */
+			bit = (pmc <= 4)? pmc - 1: 8 - pmc;
+		else if (psel == 7 || psel == 13)	/* decode events */
+			bit = 4;
+		else
+			return 0;
+	} else
+		bit = psel;
+
+	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	mask = 0;
+	switch (unit) {
+	case PM_VPU:
+		mask = 0x4c;		/* byte 0 bits 2,3,6 */
+	case PM_LSU0:
+		/* byte 2 bits 0,2,3,4,6; all of byte 1 */
+		mask = 0x085dff00;
+	case PM_LSU1L:
+		mask = 0x50 << 24;	/* byte 3 bits 4,6 */
+		break;
+	}
+	return (mask >> (byte * 8 + bit)) & 1;
+}
+
+/* Masks and values for using events from the various units */
+static u64 unit_cons[PM_LASTUNIT+1][2] = {
+	[PM_FPU] =   { 0xc80000000000ull, 0x040000000000ull },
+	[PM_VPU] =   { 0xc80000000000ull, 0xc40000000000ull },
+	[PM_ISU] =   { 0x080000000000ull, 0x020000000000ull },
+	[PM_IFU] =   { 0xc80000000000ull, 0x840000000000ull },
+	[PM_IDU] =   { 0x380000000000ull, 0x010000000000ull },
+	[PM_STS] =   { 0x380000000000ull, 0x310000000000ull },
+};
+
+static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp)
+{
+	int pmc, byte, unit, sh, spcsel;
+	u64 mask = 0, value = 0;
+	int grp = -1;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 8)
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+		grp = ((pmc - 1) >> 1) & 1;
+	}
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	if (unit) {
+		if (unit > PM_LASTUNIT)
+			return -1;
+		mask |= unit_cons[unit][0];
+		value |= unit_cons[unit][1];
+		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+		/*
+		 * Bus events on bytes 0 and 2 can be counted
+		 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
+		 */
+		if (!pmc)
+			grp = byte & 1;
+		/* Set byte lane select field */
+		mask  |= 0xfULL << (28 - 4 * byte);
+		value |= (u64)unit << (28 - 4 * byte);
+	}
+	if (grp == 0) {
+		/* increment PMC1/2/5/6 field */
+		mask  |= 0x8000000000ull;
+		value |= 0x1000000000ull;
+	} else if (grp == 1) {
+		/* increment PMC3/4/7/8 field */
+		mask  |= 0x800000000ull;
+		value |= 0x100000000ull;
+	}
+	spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
+	if (spcsel) {
+		mask  |= 3ull << 48;
+		value |= (u64)spcsel << 48;
+	}
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	alt[0] = event;
+
+	/* 2 alternatives for LSU empty */
+	if (event == 0x2002 || event == 0x3002) {
+		alt[1] = event ^ 0x1000;
+		return 2;
+	}
+		
+	return 1;
+}
+
+static int p970_compute_mmcr(u64 event[], int n_ev,
+			     unsigned int hwc[], u64 mmcr[])
+{
+	u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
+	unsigned int pmc, unit, byte, psel;
+	unsigned int ttm, grp;
+	unsigned int pmc_inuse = 0;
+	unsigned int pmc_grp_use[2];
+	unsigned char busbyte[4];
+	unsigned char unituse[16];
+	unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 };
+	unsigned char ttmuse[2];
+	unsigned char pmcsel[8];
+	int i;
+	int spcsel;
+
+	if (n_ev > 8)
+		return -1;
+
+	/* First pass to count resource use */
+	pmc_grp_use[0] = pmc_grp_use[1] = 0;
+	memset(busbyte, 0, sizeof(busbyte));
+	memset(unituse, 0, sizeof(unituse));
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;
+			pmc_inuse |= 1 << (pmc - 1);
+			/* count 1/2/5/6 vs 3/4/7/8 use */
+			++pmc_grp_use[((pmc - 1) >> 1) & 1];
+		}
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+		if (unit) {
+			if (unit > PM_LASTUNIT)
+				return -1;
+			if (!pmc)
+				++pmc_grp_use[byte & 1];
+			if (busbyte[byte] && busbyte[byte] != unit)
+				return -1;
+			busbyte[byte] = unit;
+			unituse[unit] = 1;
+		}
+	}
+	if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
+		return -1;
+
+	/*
+	 * Assign resources and set multiplexer selects.
+	 *
+	 * PM_ISU can go either on TTM0 or TTM1, but that's the only
+	 * choice we have to deal with.
+	 */
+	if (unituse[PM_ISU] &
+	    (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU]))
+		unitmap[PM_ISU] = 2 | 4;	/* move ISU to TTM1 */
+	/* Set TTM[01]SEL fields. */
+	ttmuse[0] = ttmuse[1] = 0;
+	for (i = PM_FPU; i <= PM_STS; ++i) {
+		if (!unituse[i])
+			continue;
+		ttm = unitmap[i];
+		++ttmuse[(ttm >> 2) & 1];
+		mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH;
+	}
+	/* Check only one unit per TTMx */
+	if (ttmuse[0] > 1 || ttmuse[1] > 1)
+		return -1;
+
+	/* Set byte lane select fields and TTM3SEL. */
+	for (byte = 0; byte < 4; ++byte) {
+		unit = busbyte[byte];
+		if (!unit)
+			continue;
+		if (unit <= PM_STS)
+			ttm = (unitmap[unit] >> 2) & 1;
+		else if (unit == PM_LSU0)
+			ttm = 2;
+		else {
+			ttm = 3;
+			if (unit == PM_LSU1L && byte >= 2)
+				mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
+		}
+		mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
+	}
+
+	/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+	memset(pmcsel, 0x8, sizeof(pmcsel));	/* 8 means don't count */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+		psel = event[i] & PM_PMCSEL_MSK;
+		if (!pmc) {
+			/* Bus event or any-PMC direct event */
+			if (unit)
+				psel |= 0x10 | ((byte & 2) << 2);
+			else
+				psel |= 8;
+			for (pmc = 0; pmc < 8; ++pmc) {
+				if (pmc_inuse & (1 << pmc))
+					continue;
+				grp = (pmc >> 1) & 1;
+				if (unit) {
+					if (grp == (byte & 1))
+						break;
+				} else if (pmc_grp_use[grp] < 4) {
+					++pmc_grp_use[grp];
+					break;
+				}
+			}
+			pmc_inuse |= 1 << pmc;
+		} else {
+			/* Direct event */
+			--pmc;
+			if (psel == 0 && (byte & 2))
+				/* add events on higher-numbered bus */
+				mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
+		}
+		pmcsel[pmc] = psel;
+		hwc[i] = pmc;
+		spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
+		mmcr1 |= spcsel;
+		if (p970_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
+	}
+	for (pmc = 0; pmc < 2; ++pmc)
+		mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
+	for (; pmc < 8; ++pmc)
+		mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
+	if (pmc_inuse & 1)
+		mmcr0 |= MMCR0_PMC1CE;
+	if (pmc_inuse & 0xfe)
+		mmcr0 |= MMCR0_PMCjCE;
+
+	mmcra |= 0x2000;	/* mark only one IOP per PPC instruction */
+
+	/* Return MMCRx values */
+	mmcr[0] = mmcr0;
+	mmcr[1] = mmcr1;
+	mmcr[2] = mmcra;
+	return 0;
+}
+
+static void p970_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+	int shift, i;
+
+	if (pmc <= 1) {
+		shift = MMCR0_PMC1SEL_SH - 7 * pmc;
+		i = 0;
+	} else {
+		shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
+		i = 1;
+	}
+	/*
+	 * Setting the PMCxSEL field to 0x08 disables PMC x.
+	 */
+	mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift);
+}
+
+static int ppc970_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= 7,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 1,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x8810, /* PM_LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x3810, /* PM_LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x431,  /* PM_BR_ISSUED */
+	[PERF_COUNT_HW_BRANCH_MISSES] 		= 0x327,  /* PM_GRP_BR_MPRED */
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x8810,		0x3810	},
+		[C(OP_WRITE)] = {	0x7810,		0x813	},
+		[C(OP_PREFETCH)] = {	0x731,		0	},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	0,		0	},
+	},
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0	},
+		[C(OP_WRITE)] = {	0,		0	},
+		[C(OP_PREFETCH)] = {	0x733,		0	},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x704	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x700	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x431,		0x327	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+};
+
+struct power_pmu ppc970_pmu = {
+	.n_counter = 8,
+	.max_alternatives = 2,
+	.add_fields = 0x001100005555ull,
+	.test_adder = 0x013300000000ull,
+	.compute_mmcr = p970_compute_mmcr,
+	.get_constraint = p970_get_constraint,
+	.get_alternatives = p970_get_alternatives,
+	.disable_pmc = p970_disable_pmc,
+	.n_generic = ARRAY_SIZE(ppc970_generic_events),
+	.generic_events = ppc970_generic_events,
+	.cache_events = &ppc970_cache_events,
+};
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 9057335..2cf915e 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -41,6 +41,12 @@
 	return !!(v->arch.pending_exceptions);
 }
 
+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
+{
+	/* do real check here */
+	return 1;
+}
+
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
 	return !(v->arch.msr & MSR_WE);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 7699394..5beffc8 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -29,6 +29,7 @@
 #include <linux/module.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
+#include <linux/perf_counter.h>
 
 #include <asm/firmware.h>
 #include <asm/page.h>
@@ -170,6 +171,8 @@
 		die("Weird page fault", regs, SIGSEGV);
 	}
 
+	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -309,6 +312,8 @@
 	}
 	if (ret & VM_FAULT_MAJOR) {
 		current->maj_flt++;
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+				     regs, address);
 #ifdef CONFIG_PPC_SMLPAR
 		if (firmware_has_feature(FW_FEATURE_CMO)) {
 			preempt_disable();
@@ -316,8 +321,11 @@
 			preempt_enable();
 		}
 #endif
-	} else
+	} else {
 		current->min_flt++;
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+				     regs, address);
+	}
 	up_read(&mm->mmap_sem);
 	return 0;
 
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 9da795e..732ee93 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -1,6 +1,7 @@
 config PPC64
 	bool "64-bit kernel"
 	default n
+	select HAVE_PERF_COUNTERS
 	help
 	  This option selects whether a 32-bit or a 64-bit kernel
 	  will be built.
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 80b5134..be3581a 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -333,7 +333,7 @@
 	lpar_xirr_info_set((0xff << 24) | irq);
 }
 
-static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
+static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
 {
 	unsigned int irq;
 	int status;
@@ -342,14 +342,14 @@
 
 	irq = (unsigned int)irq_map[virq].hwirq;
 	if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
-		return;
+		return -1;
 
 	status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
 
 	if (status) {
 		printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
 			__func__, irq, status);
-		return;
+		return -1;
 	}
 
 	/*
@@ -363,7 +363,7 @@
 		printk(KERN_WARNING
 			"%s: No online cpus in the mask %s for irq %d\n",
 			__func__, cpulist, virq);
-		return;
+		return -1;
 	}
 
 	status = rtas_call(ibm_set_xive, 3, 1, NULL,
@@ -372,8 +372,10 @@
 	if (status) {
 		printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
 			__func__, irq, status);
-		return;
+		return -1;
 	}
+
+	return 0;
 }
 
 static struct irq_chip xics_pic_direct = {
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 9e105cb..a477991 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -250,7 +250,7 @@
 
 	set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
 	blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
-	blk_queue_hardsect_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
+	blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
 	add_disk(bank->disk);
 
 	bank->irq_id = irq_of_parse_and_map(device->node, 0);
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 0efc12d..352d8c3 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -807,7 +807,7 @@
 
 #endif /* CONFIG_SMP */
 
-void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
 	struct mpic *mpic = mpic_from_irq(irq);
 	unsigned int src = mpic_irq_to_hw(irq);
@@ -824,6 +824,8 @@
 		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
 			       mpic_physmask(cpus_addr(tmp)[0]));
 	}
+
+	return 0;
 }
 
 static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type)
diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h
index 3cef2af..eff433c 100644
--- a/arch/powerpc/sysdev/mpic.h
+++ b/arch/powerpc/sysdev/mpic.h
@@ -36,6 +36,6 @@
 
 extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type);
 extern void mpic_set_vector(unsigned int virq, unsigned int vector);
-extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
+extern int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 
 #endif /* _POWERPC_SYSDEV_MPIC_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 54ea39f..a27d0d5 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -13,6 +13,8 @@
 
 #ifndef ASM_KVM_HOST_H
 #define ASM_KVM_HOST_H
+#include <linux/hrtimer.h>
+#include <linux/interrupt.h>
 #include <linux/kvm_host.h>
 #include <asm/debug.h>
 #include <asm/cpuid.h>
@@ -210,7 +212,8 @@
 	s390_fp_regs      guest_fpregs;
 	unsigned int      guest_acrs[NUM_ACRS];
 	struct kvm_s390_local_interrupt local_int;
-	struct timer_list ckc_timer;
+	struct hrtimer    ckc_timer;
+	struct tasklet_struct tasklet;
 	union  {
 		cpuid_t	  cpu_id;
 		u64	  stidp_data;
diff --git a/arch/s390/include/asm/suspend.h b/arch/s390/include/asm/suspend.h
deleted file mode 100644
index 1f34580..0000000
--- a/arch/s390/include/asm/suspend.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifndef __ASM_S390_SUSPEND_H
-#define __ASM_S390_SUSPEND_H
-
-#endif
-
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index eed4a00..ab2e3ed 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -56,8 +56,6 @@
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 static void
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 9d19803..98997cc 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -154,17 +154,25 @@
 static int handle_validity(struct kvm_vcpu *vcpu)
 {
 	int viwhy = vcpu->arch.sie_block->ipb >> 16;
+	int rc;
+
 	vcpu->stat.exit_validity++;
-	if (viwhy == 0x37) {
-		fault_in_pages_writeable((char __user *)
-					 vcpu->kvm->arch.guest_origin +
-					 vcpu->arch.sie_block->prefix,
-					 PAGE_SIZE);
-		return 0;
-	}
-	VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
-		   viwhy);
-	return -ENOTSUPP;
+	if ((viwhy == 0x37) && (vcpu->arch.sie_block->prefix
+		<= vcpu->kvm->arch.guest_memsize - 2*PAGE_SIZE)){
+		rc = fault_in_pages_writeable((char __user *)
+			 vcpu->kvm->arch.guest_origin +
+			 vcpu->arch.sie_block->prefix,
+			 2*PAGE_SIZE);
+		if (rc)
+			/* user will receive sigsegv, exit to user */
+			rc = -ENOTSUPP;
+	} else
+		rc = -ENOTSUPP;
+
+	if (rc)
+		VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
+			   viwhy);
+	return rc;
 }
 
 static int handle_instruction(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 0189356..f04f530 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -12,6 +12,8 @@
 
 #include <asm/lowcore.h>
 #include <asm/uaccess.h>
+#include <linux/hrtimer.h>
+#include <linux/interrupt.h>
 #include <linux/kvm_host.h>
 #include <linux/signal.h>
 #include "kvm-s390.h"
@@ -299,13 +301,13 @@
 	}
 
 	if ((!rc) && atomic_read(&fi->active)) {
-		spin_lock_bh(&fi->lock);
+		spin_lock(&fi->lock);
 		list_for_each_entry(inti, &fi->list, list)
 			if (__interrupt_is_deliverable(vcpu, inti)) {
 				rc = 1;
 				break;
 			}
-		spin_unlock_bh(&fi->lock);
+		spin_unlock(&fi->lock);
 	}
 
 	if ((!rc) && (vcpu->arch.sie_block->ckc <
@@ -318,6 +320,12 @@
 	return rc;
 }
 
+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
+{
+	/* do real check here */
+	return 1;
+}
+
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
 	return 0;
@@ -355,14 +363,12 @@
 		return 0;
 	}
 
-	sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1;
+	sltime = ((vcpu->arch.sie_block->ckc - now)*125)>>9;
 
-	vcpu->arch.ckc_timer.expires = jiffies + sltime;
-
-	add_timer(&vcpu->arch.ckc_timer);
-	VCPU_EVENT(vcpu, 5, "enabled wait timer:%llx jiffies", sltime);
+	hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
+	VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
 no_timer:
-	spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+	spin_lock(&vcpu->arch.local_int.float_int->lock);
 	spin_lock_bh(&vcpu->arch.local_int.lock);
 	add_wait_queue(&vcpu->arch.local_int.wq, &wait);
 	while (list_empty(&vcpu->arch.local_int.list) &&
@@ -371,33 +377,46 @@
 		!signal_pending(current)) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		spin_unlock_bh(&vcpu->arch.local_int.lock);
-		spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
+		spin_unlock(&vcpu->arch.local_int.float_int->lock);
 		vcpu_put(vcpu);
 		schedule();
 		vcpu_load(vcpu);
-		spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+		spin_lock(&vcpu->arch.local_int.float_int->lock);
 		spin_lock_bh(&vcpu->arch.local_int.lock);
 	}
 	__unset_cpu_idle(vcpu);
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&vcpu->wq, &wait);
 	spin_unlock_bh(&vcpu->arch.local_int.lock);
-	spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
-	del_timer(&vcpu->arch.ckc_timer);
+	spin_unlock(&vcpu->arch.local_int.float_int->lock);
+	hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
 	return 0;
 }
 
-void kvm_s390_idle_wakeup(unsigned long data)
+void kvm_s390_tasklet(unsigned long parm)
 {
-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm;
 
-	spin_lock_bh(&vcpu->arch.local_int.lock);
+	spin_lock(&vcpu->arch.local_int.lock);
 	vcpu->arch.local_int.timer_due = 1;
 	if (waitqueue_active(&vcpu->arch.local_int.wq))
 		wake_up_interruptible(&vcpu->arch.local_int.wq);
-	spin_unlock_bh(&vcpu->arch.local_int.lock);
+	spin_unlock(&vcpu->arch.local_int.lock);
 }
 
+/*
+ * low level hrtimer wake routine. Because this runs in hardirq context
+ * we schedule a tasklet to do the real work.
+ */
+enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
+{
+	struct kvm_vcpu *vcpu;
+
+	vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
+	tasklet_schedule(&vcpu->arch.tasklet);
+
+	return HRTIMER_NORESTART;
+}
 
 void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
@@ -436,7 +455,7 @@
 	if (atomic_read(&fi->active)) {
 		do {
 			deliver = 0;
-			spin_lock_bh(&fi->lock);
+			spin_lock(&fi->lock);
 			list_for_each_entry_safe(inti, n, &fi->list, list) {
 				if (__interrupt_is_deliverable(vcpu, inti)) {
 					list_del(&inti->list);
@@ -447,7 +466,7 @@
 			}
 			if (list_empty(&fi->list))
 				atomic_set(&fi->active, 0);
-			spin_unlock_bh(&fi->lock);
+			spin_unlock(&fi->lock);
 			if (deliver) {
 				__do_deliver_interrupt(vcpu, inti);
 				kfree(inti);
@@ -512,7 +531,7 @@
 
 	mutex_lock(&kvm->lock);
 	fi = &kvm->arch.float_int;
-	spin_lock_bh(&fi->lock);
+	spin_lock(&fi->lock);
 	list_add_tail(&inti->list, &fi->list);
 	atomic_set(&fi->active, 1);
 	sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
@@ -529,7 +548,7 @@
 	if (waitqueue_active(&li->wq))
 		wake_up_interruptible(&li->wq);
 	spin_unlock_bh(&li->lock);
-	spin_unlock_bh(&fi->lock);
+	spin_unlock(&fi->lock);
 	mutex_unlock(&kvm->lock);
 	return 0;
 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f4d56e9..10bccd1 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -15,6 +15,7 @@
 #include <linux/compiler.h>
 #include <linux/err.h>
 #include <linux/fs.h>
+#include <linux/hrtimer.h>
 #include <linux/init.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
@@ -195,6 +196,10 @@
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
+	if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
+		(__u64) vcpu->arch.sie_block)
+		vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
+	smp_mb();
 	free_page((unsigned long)(vcpu->arch.sie_block));
 	kvm_vcpu_uninit(vcpu);
 	kfree(vcpu);
@@ -283,8 +288,10 @@
 	vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
 	vcpu->arch.sie_block->ecb   = 2;
 	vcpu->arch.sie_block->eca   = 0xC1002001U;
-	setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
-		 (unsigned long) vcpu);
+	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+	tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
+		     (unsigned long) vcpu);
+	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
 	get_cpu_id(&vcpu->arch.cpu_id);
 	vcpu->arch.cpu_id.version = 0xff;
 	return 0;
@@ -307,19 +314,21 @@
 
 	vcpu->arch.sie_block->icpua = id;
 	BUG_ON(!kvm->arch.sca);
-	BUG_ON(kvm->arch.sca->cpu[id].sda);
-	kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+	if (!kvm->arch.sca->cpu[id].sda)
+		kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+	else
+		BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */
 	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
 	vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
 
 	spin_lock_init(&vcpu->arch.local_int.lock);
 	INIT_LIST_HEAD(&vcpu->arch.local_int.list);
 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
-	spin_lock_bh(&kvm->arch.float_int.lock);
+	spin_lock(&kvm->arch.float_int.lock);
 	kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
 	init_waitqueue_head(&vcpu->arch.local_int.wq);
 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
-	spin_unlock_bh(&kvm->arch.float_int.lock);
+	spin_unlock(&kvm->arch.float_int.lock);
 
 	rc = kvm_vcpu_init(vcpu, kvm, id);
 	if (rc)
@@ -478,6 +487,12 @@
 
 	vcpu_load(vcpu);
 
+	/* verify, that memory has been registered */
+	if (!vcpu->kvm->arch.guest_memsize) {
+		vcpu_put(vcpu);
+		return -EINVAL;
+	}
+
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
@@ -657,6 +672,8 @@
 				struct kvm_memory_slot old,
 				int user_alloc)
 {
+	int i;
+
 	/* A few sanity checks. We can have exactly one memory slot which has
 	   to start at guest virtual zero and which has to be located at a
 	   page boundary in userland and which has to end at a page boundary.
@@ -664,7 +681,7 @@
 	   vmas. It is okay to mmap() and munmap() stuff in this slot after
 	   doing this call at any time */
 
-	if (mem->slot)
+	if (mem->slot || kvm->arch.guest_memsize)
 		return -EINVAL;
 
 	if (mem->guest_phys_addr)
@@ -676,15 +693,39 @@
 	if (mem->memory_size & (PAGE_SIZE - 1))
 		return -EINVAL;
 
+	if (!user_alloc)
+		return -EINVAL;
+
+	/* lock all vcpus */
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		if (!kvm->vcpus[i])
+			continue;
+		if (!mutex_trylock(&kvm->vcpus[i]->mutex))
+			goto fail_out;
+	}
+
 	kvm->arch.guest_origin = mem->userspace_addr;
 	kvm->arch.guest_memsize = mem->memory_size;
 
-	/* FIXME: we do want to interrupt running CPUs and update their memory
-	   configuration now to avoid race conditions. But hey, changing the
-	   memory layout while virtual CPUs are running is usually bad
-	   programming practice. */
+	/* update sie control blocks, and unlock all vcpus */
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		if (kvm->vcpus[i]) {
+			kvm->vcpus[i]->arch.sie_block->gmsor =
+				kvm->arch.guest_origin;
+			kvm->vcpus[i]->arch.sie_block->gmslm =
+				kvm->arch.guest_memsize +
+				kvm->arch.guest_origin +
+				VIRTIODESCSPACE - 1ul;
+			mutex_unlock(&kvm->vcpus[i]->mutex);
+		}
+	}
 
 	return 0;
+
+fail_out:
+	for (; i >= 0; i--)
+		mutex_unlock(&kvm->vcpus[i]->mutex);
+	return -EINVAL;
 }
 
 void kvm_arch_flush_shadow(struct kvm *kvm)
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 00bbe69..748fee8 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -14,6 +14,7 @@
 #ifndef ARCH_S390_KVM_S390_H
 #define ARCH_S390_KVM_S390_H
 
+#include <linux/hrtimer.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 
@@ -41,7 +42,8 @@
 }
 
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
-void kvm_s390_idle_wakeup(unsigned long data);
+enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
+void kvm_s390_tasklet(unsigned long parm);
 void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
 int kvm_s390_inject_vm(struct kvm *kvm,
 		struct kvm_s390_interrupt *s390int);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 4b88834..93ecd06 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -204,11 +204,11 @@
 	int cpus = 0;
 	int n;
 
-	spin_lock_bh(&fi->lock);
+	spin_lock(&fi->lock);
 	for (n = 0; n < KVM_MAX_VCPUS; n++)
 		if (fi->local_int[n])
 			cpus++;
-	spin_unlock_bh(&fi->lock);
+	spin_unlock(&fi->lock);
 
 	/* deal with other level 3 hypervisors */
 	if (stsi(mem, 3, 2, 2) == -ENOSYS)
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index f27dbed..3667883 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -52,7 +52,7 @@
 	if (cpu_addr >= KVM_MAX_VCPUS)
 		return 3; /* not operational */
 
-	spin_lock_bh(&fi->lock);
+	spin_lock(&fi->lock);
 	if (fi->local_int[cpu_addr] == NULL)
 		rc = 3; /* not operational */
 	else if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
@@ -64,7 +64,7 @@
 		*reg |= SIGP_STAT_STOPPED;
 		rc = 1; /* status stored */
 	}
-	spin_unlock_bh(&fi->lock);
+	spin_unlock(&fi->lock);
 
 	VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
 	return rc;
@@ -86,7 +86,7 @@
 
 	inti->type = KVM_S390_INT_EMERGENCY;
 
-	spin_lock_bh(&fi->lock);
+	spin_lock(&fi->lock);
 	li = fi->local_int[cpu_addr];
 	if (li == NULL) {
 		rc = 3; /* not operational */
@@ -102,7 +102,7 @@
 	spin_unlock_bh(&li->lock);
 	rc = 0; /* order accepted */
 unlock:
-	spin_unlock_bh(&fi->lock);
+	spin_unlock(&fi->lock);
 	VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
 	return rc;
 }
@@ -123,7 +123,7 @@
 
 	inti->type = KVM_S390_SIGP_STOP;
 
-	spin_lock_bh(&fi->lock);
+	spin_lock(&fi->lock);
 	li = fi->local_int[cpu_addr];
 	if (li == NULL) {
 		rc = 3; /* not operational */
@@ -142,7 +142,7 @@
 	spin_unlock_bh(&li->lock);
 	rc = 0; /* order accepted */
 unlock:
-	spin_unlock_bh(&fi->lock);
+	spin_unlock(&fi->lock);
 	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
 	return rc;
 }
@@ -188,7 +188,7 @@
 	if (!inti)
 		return 2; /* busy */
 
-	spin_lock_bh(&fi->lock);
+	spin_lock(&fi->lock);
 	li = fi->local_int[cpu_addr];
 
 	if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) {
@@ -220,7 +220,7 @@
 out_li:
 	spin_unlock_bh(&li->lock);
 out_fi:
-	spin_unlock_bh(&fi->lock);
+	spin_unlock(&fi->lock);
 	return rc;
 }
 
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index e7390dd..586cd04 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -15,6 +15,7 @@
 	select HAVE_IOREMAP_PROT if MMU
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DMA_API_DEBUG
+	select RTC_LIB
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
@@ -74,14 +75,18 @@
 	bool
 
 config GENERIC_TIME
-	def_bool n
+	def_bool y
 
 config GENERIC_CLOCKEVENTS
-	def_bool n
+	def_bool y
 
 config GENERIC_CLOCKEVENTS_BROADCAST
 	bool
 
+config GENERIC_CMOS_UPDATE
+	def_bool y
+	depends on SH_SH03 || SH_DREAMCAST
+
 config GENERIC_LOCKBREAK
 	def_bool y
 	depends on SMP && PREEMPT
@@ -112,6 +117,12 @@
 config SYS_SUPPORTS_CMT
 	bool
 
+config SYS_SUPPORTS_MTU2
+	bool
+
+config SYS_SUPPORTS_TMU
+	bool
+
 config STACKTRACE_SUPPORT
 	def_bool y
 
@@ -157,13 +168,14 @@
 	bool
 	select CPU_HAS_INTEVT
 	select CPU_HAS_SR_RB
+	select SYS_SUPPORTS_TMU
 
 config CPU_SH4
 	bool
 	select CPU_HAS_INTEVT
 	select CPU_HAS_SR_RB
-	select CPU_HAS_PTEA if !CPU_SH4A || CPU_SHX2
 	select CPU_HAS_FPU if !CPU_SH4AL_DSP
+	select SYS_SUPPORTS_TMU
 
 config CPU_SH4A
 	bool
@@ -177,6 +189,7 @@
 config CPU_SH5
 	bool
 	select CPU_HAS_FPU
+	select SYS_SUPPORTS_TMU
 
 config CPU_SHX2
 	bool
@@ -210,27 +223,32 @@
 	bool "Support SH7201 processor"
 	select CPU_SH2A
 	select CPU_HAS_FPU
+	select SYS_SUPPORTS_MTU2
  
 config CPU_SUBTYPE_SH7203
 	bool "Support SH7203 processor"
 	select CPU_SH2A
 	select CPU_HAS_FPU
 	select SYS_SUPPORTS_CMT
+	select SYS_SUPPORTS_MTU2
 
 config CPU_SUBTYPE_SH7206
 	bool "Support SH7206 processor"
 	select CPU_SH2A
 	select SYS_SUPPORTS_CMT
+	select SYS_SUPPORTS_MTU2
 
 config CPU_SUBTYPE_SH7263
 	bool "Support SH7263 processor"
 	select CPU_SH2A
 	select CPU_HAS_FPU
 	select SYS_SUPPORTS_CMT
+	select SYS_SUPPORTS_MTU2
 
 config CPU_SUBTYPE_MXG
 	bool "Support MX-G processor"
 	select CPU_SH2A
+	select SYS_SUPPORTS_MTU2
 	help
 	  Select MX-G if running on an R8A03022BG part.
 
@@ -283,6 +301,7 @@
 	bool "Support SH7720 processor"
 	select CPU_SH3
 	select CPU_HAS_DSP
+	select SYS_SUPPORTS_CMT
 	help
 	  Select SH7720 if you have a SH3-DSP SH7720 CPU.
 
@@ -290,6 +309,7 @@
 	bool "Support SH7721 processor"
 	select CPU_SH3
 	select CPU_HAS_DSP
+	select SYS_SUPPORTS_CMT
 	help
 	  Select SH7721 if you have a SH3-DSP SH7721 CPU.
 
@@ -347,6 +367,16 @@
 	help
 	  Select SH7723 if you have an SH-MobileR2 CPU.
 
+config CPU_SUBTYPE_SH7724
+	bool "Support SH7724 processor"
+	select CPU_SH4A
+	select CPU_SHX2
+	select ARCH_SHMOBILE
+	select ARCH_SPARSEMEM_ENABLE
+	select SYS_SUPPORTS_CMT
+	help
+	  Select SH7724 if you have an SH-MobileR2R CPU.
+
 config CPU_SUBTYPE_SH7763
 	bool "Support SH7763 processor"
 	select CPU_SH4A
@@ -442,48 +472,26 @@
 
 menu "Timer and clock configuration"
 
-config SH_TMU
-	bool "TMU timer support"
-	depends on CPU_SH3 || CPU_SH4
-	default y
-	select GENERIC_TIME
-	select GENERIC_CLOCKEVENTS
-	help
-	  This enables the use of the TMU as the system timer.
-
-config SH_CMT
-	bool "CMT timer support"
-	depends on SYS_SUPPORTS_CMT && CPU_SH2
+config SH_TIMER_TMU
+	bool "TMU timer driver"
+	depends on SYS_SUPPORTS_TMU
 	default y
 	help
-	  This enables the use of the CMT as the system timer.
+	  This enables the build of the TMU timer driver.
 
-#
-# Support for the new-style CMT driver. This will replace SH_CMT
-# once its other dependencies are merged.
-#
 config SH_TIMER_CMT
-	bool "CMT clockevents driver"
-	depends on SYS_SUPPORTS_CMT && !SH_CMT
-	select GENERIC_CLOCKEVENTS
-
-config SH_MTU2
-	bool "MTU2 timer support"
-	depends on CPU_SH2A
+	bool "CMT timer driver"
+	depends on SYS_SUPPORTS_CMT
 	default y
 	help
-	  This enables the use of the MTU2 as the system timer.
+	  This enables build of the CMT timer driver.
 
-config SH_TIMER_IRQ
-	int
-	default "28" if CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \
-			CPU_SUBTYPE_SH7763
-	default "86" if CPU_SUBTYPE_SH7619
-	default "140" if CPU_SUBTYPE_SH7206
-	default "142" if CPU_SUBTYPE_SH7203 && SH_CMT
-	default "153" if CPU_SUBTYPE_SH7203 && SH_MTU2
-	default "238" if CPU_SUBTYPE_MXG
-	default "16"
+config SH_TIMER_MTU2
+	bool "MTU2 timer driver"
+	depends on SYS_SUPPORTS_MTU2
+	default y
+	help
+	  This enables build of the MTU2 timer driver.
 
 config SH_PCLK_FREQ
 	int "Peripheral clock frequency (in Hz)"
@@ -494,7 +502,7 @@
 			      CPU_SUBTYPE_SH7760 || CPU_SUBTYPE_SH7705 || \
 			      CPU_SUBTYPE_SH7203 || CPU_SUBTYPE_SH7206 || \
 			      CPU_SUBTYPE_SH7263 || CPU_SUBTYPE_MXG    || \
-			      CPU_SUBTYPE_SH7786
+			      CPU_SUBTYPE_SH7786 || CPU_SUBTYPE_SH7724
 	default "60000000" if CPU_SUBTYPE_SH7751 || CPU_SUBTYPE_SH7751R
 	default "66000000" if CPU_SUBTYPE_SH4_202
 	default "50000000"
@@ -503,6 +511,13 @@
 	  This is necessary for determining the reference clock value on
 	  platforms lacking an RTC.
 
+config SH_CLK_CPG
+	def_bool y
+
+config SH_CLK_CPG_LEGACY
+	depends on SH_CLK_CPG
+	def_bool y if !CPU_SUBTYPE_SH7785 && !ARCH_SHMOBILE
+
 config SH_CLK_MD
 	int "CPU Mode Pin Setting"
 	depends on CPU_SH2
@@ -663,27 +678,54 @@
 	  LLSC, this should be more efficient than the other alternative of
 	  disabling interrupts around the atomic sequence.
 
+config SPARSE_IRQ
+	bool "Support sparse irq numbering"
+	depends on EXPERIMENTAL
+	help
+	  This enables support for sparse irqs. This is useful in general
+	  as most CPUs have a fairly sparse array of IRQ vectors, which
+	  the irq_desc then maps directly on to. Systems with a high
+	  number of off-chip IRQs will want to treat this as
+	  experimental until they have been independently verified.
+
+	  If you don't know what to do here, say N.
+
 endmenu
 
 menu "Boot options"
 
 config ZERO_PAGE_OFFSET
-	hex "Zero page offset"
-	default "0x00004000" if SH_SH03
-	default "0x00010000" if PAGE_SIZE_64KB
+	hex
+	default "0x00010000" if PAGE_SIZE_64KB || SH_RTS7751R2D || \
+				SH_7751_SOLUTION_ENGINE
+	default "0x00004000" if PAGE_SIZE_16KB || SH_SH03
 	default "0x00002000" if PAGE_SIZE_8KB
 	default "0x00001000"
 	help
 	  This sets the default offset of zero page.
 
 config BOOT_LINK_OFFSET
-	hex "Link address offset for booting"
+	hex
+	default "0x00210000" if SH_SHMIN
+	default "0x00400000" if SH_CAYMAN
+	default "0x00810000" if SH_7780_SOLUTION_ENGINE
+	default "0x009e0000" if SH_TITAN
+	default "0x01800000" if SH_SDK7780
+	default "0x02000000" if SH_EDOSK7760
 	default "0x00800000"
 	help
 	  This option allows you to set the link address offset of the zImage.
 	  This can be useful if you are on a board which has a small amount of
 	  memory.
 
+config ENTRY_OFFSET
+	hex
+	default "0x00001000" if PAGE_SIZE_4KB
+	default "0x00002000" if PAGE_SIZE_8KB
+	default "0x00004000" if PAGE_SIZE_16KB
+	default "0x00010000" if PAGE_SIZE_64KB
+	default "0x00000000"
+
 config UBC_WAKEUP
 	bool "Wakeup UBC on startup"
 	depends on CPU_SH4 && !CPU_SH4A
diff --git a/arch/sh/Kconfig.cpu b/arch/sh/Kconfig.cpu
index c7d7043..cd6e3ea 100644
--- a/arch/sh/Kconfig.cpu
+++ b/arch/sh/Kconfig.cpu
@@ -76,11 +76,6 @@
 
 	  If unsure, say N.
 
-config SH64_USER_MISALIGNED_FIXUP
-	def_bool y
-	prompt "Fixup misaligned loads/stores occurring in user mode"
-	depends on SUPERH64
-
 config SH64_ID2815_WORKAROUND
 	bool "Include workaround for SH5-101 cut2 silicon defect ID2815"
 	depends on CPU_SUBTYPE_SH5_101
@@ -101,9 +96,6 @@
 	  See <file:Documentation/sh/register-banks.txt> for further
 	  information on SR.RB and register banking in the kernel in general.
 
-config CPU_HAS_PTEA
-	bool
-
 config CPU_HAS_PTEAEX
 	bool
 
diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug
index 0d62681..8179cc9 100644
--- a/arch/sh/Kconfig.debug
+++ b/arch/sh/Kconfig.debug
@@ -38,10 +38,10 @@
 	default "0xffe00000" if CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7763 || \
 				CPU_SUBTYPE_SH7722 || CPU_SUBTYPE_SH7366 || \
 				CPU_SUBTYPE_SH7343
-	default "0xffe80000" if CPU_SH4
 	default "0xffea0000" if CPU_SUBTYPE_SH7785
 	default "0xfffe8000" if CPU_SUBTYPE_SH7203
 	default "0xfffe9800" if CPU_SUBTYPE_SH7206 || CPU_SUBTYPE_SH7263
+	default "0xffe80000" if CPU_SH4
 	default "0x00000000"
 
 config EARLY_PRINTK
@@ -92,7 +92,7 @@
 
 config IRQSTACKS
 	bool "Use separate kernel stacks when processing interrupts"
-	depends on DEBUG_KERNEL && SUPERH32
+	depends on DEBUG_KERNEL && SUPERH32 && BROKEN
 	help
 	  If you say Y here the kernel will use separate kernel stacks
 	  for handling hard and soft interrupts.  This can help avoid
@@ -122,27 +122,8 @@
 	  For all other cases, say N. If this option seems perplexing, or
 	  you aren't sure, say N.
 
-config MORE_COMPILE_OPTIONS
-	bool "Add any additional compile options"
-	help
-	  If you want to add additional CFLAGS to the kernel build, enable this
-	  option and then enter what you would like to add in the next question.
-	  Note however that -g is already appended with the selection of KGDB.
-
-config COMPILE_OPTIONS
-	string "Additional compile arguments"
-	depends on MORE_COMPILE_OPTIONS
-
 config SH64_SR_WATCH
 	bool "Debug: set SR.WATCH to enable hardware watchpoints and trace"
 	depends on SUPERH64
 
-config POOR_MANS_STRACE
-	bool "Debug: enable rudimentary strace facility"
-	depends on SUPERH64
-	help
-	  This option allows system calls to be traced to the console.  It also
-	  aids in detecting kernel stack underflow.  It is useful for debugging
-	  early-userland problems (e.g. init incurring fatal exceptions.)
-
 endmenu
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index bece1f7..75d049b0 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -70,9 +70,6 @@
 cflags-y	+= $(call cc-option,-mno-fdpic)
 cflags-y	+= $(isaflags-y) -ffreestanding
 
-cflags-$(CONFIG_MORE_COMPILE_OPTIONS)	+= \
-	$(shell echo $(CONFIG_COMPILE_OPTIONS) | sed -e 's/"//g')
-
 OBJCOPYFLAGS	:= -O binary -R .note -R .note.gnu.build-id -R .comment \
 		   -R .stab -R .stabstr -S
 
@@ -85,7 +82,6 @@
 defaultimage-$(CONFIG_SH_7619_SOLUTION_ENGINE)	:= vmlinux
 
 # Set some sensible Kbuild defaults
-KBUILD_DEFCONFIG	:= shx3_defconfig
 KBUILD_IMAGE		:= $(defaultimage-y)
 
 #
@@ -93,26 +89,38 @@
 # error messages during linking.
 #
 ifdef CONFIG_SUPERH32
-UTS_MACHINE	:= sh
-LDFLAGS_vmlinux	+= -e _stext
+UTS_MACHINE		:= sh
+BITS			:= 32
+LDFLAGS_vmlinux		+= -e _stext
+KBUILD_DEFCONFIG	:= shx3_defconfig
 else
-UTS_MACHINE	:= sh64
-LDFLAGS_vmlinux	+= --defsym phys_stext=_stext-$(CONFIG_PAGE_OFFSET) \
-		   --defsym phys_stext_shmedia=phys_stext+1 \
-		   -e phys_stext_shmedia
+UTS_MACHINE		:= sh64
+BITS			:= 64
+LDFLAGS_vmlinux		+= --defsym phys_stext=_stext-$(CONFIG_PAGE_OFFSET) \
+			   --defsym phys_stext_shmedia=phys_stext+1 \
+			   -e phys_stext_shmedia
+KBUILD_DEFCONFIG	:= cayman_defconfig
+endif
+
+ifneq ($(SUBARCH),$(ARCH))
+  ifeq ($(CROSS_COMPILE),)
+    CROSS_COMPILE := $(call cc-cross-prefix, $(UTS_MACHINE)-linux-  $(UTS_MACHINE)-linux-gnu-  $(UTS_MACHINE)-unknown-linux-gnu-)
+  endif
 endif
 
 ifdef CONFIG_CPU_LITTLE_ENDIAN
-LDFLAGS_vmlinux		+= --defsym 'jiffies=jiffies_64'
+ld-bfd			:= elf32-$(UTS_MACHINE)-linux
+LDFLAGS_vmlinux		+= --defsym 'jiffies=jiffies_64' --oformat $(ld-bfd)
 LDFLAGS			+= -EL
 else
-LDFLAGS_vmlinux		+= --defsym 'jiffies=jiffies_64+4'
+ld-bfd			:= elf32-$(UTS_MACHINE)big-linux
+LDFLAGS_vmlinux		+= --defsym 'jiffies=jiffies_64+4' --oformat $(ld-bfd)
 LDFLAGS			+= -EB
 endif
 
-head-y			:= arch/sh/kernel/init_task.o
-head-$(CONFIG_SUPERH32)	+= arch/sh/kernel/head_32.o
-head-$(CONFIG_SUPERH64)	+= arch/sh/kernel/head_64.o
+export ld-bfd BITS
+
+head-y	:= arch/sh/kernel/init_task.o arch/sh/kernel/head_$(BITS).o
 
 core-y				+= arch/sh/kernel/ arch/sh/mm/ arch/sh/boards/
 core-$(CONFIG_SH_FPU_EMU)	+= arch/sh/math-emu/
@@ -193,10 +201,11 @@
 
 compressed: zImage
 
-archprepare: maketools arch/sh/lib64/syscalltab.h
+archprepare: maketools
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
+	$(Q)$(MAKE) $(clean)=arch/sh/kernel/vsyscall
 
 define archhelp
 	@echo '* zImage 	           - Compressed kernel image'
@@ -205,34 +214,4 @@
 	@echo '  uImage.srec  	           - Create an S-record for U-Boot'
 endef
 
-define filechk_gen-syscalltab
-       (set -e; \
-	echo "/*"; \
-	echo " * DO NOT MODIFY."; \
-	echo " *"; \
-	echo " * This file was generated by arch/sh/Makefile"; \
-	echo " * Any changes will be reverted at build time."; \
-	echo " */"; \
-	echo ""; \
-	echo "#ifndef __SYSCALLTAB_H"; \
-	echo "#define __SYSCALLTAB_H"; \
-	echo ""; \
-	echo "#include <linux/kernel.h>"; \
-	echo ""; \
-	echo "struct syscall_info {"; \
-	echo "	const char *name;"; \
-	echo "} syscall_info_table[] = {"; \
-	sed -e '/^.*\.long /!d;s//	{ "/;s/\(\([^/]*\)\/\)\{1\}.*/\2/; \
-		s/[ \t]*$$//g;s/$$/" },/;s/\("\)sys_/\1/g'; \
-	echo "};"; \
-	echo ""; \
-	echo "#define NUM_SYSCALL_INFO_ENTRIES ARRAY_SIZE(syscall_info_table)";\
-	echo ""; \
-	echo "#endif /* __SYSCALLTAB_H */" )
-endef
-
-arch/sh/lib64/syscalltab.h: arch/sh/kernel/syscalls_64.S
-	$(call filechk,gen-syscalltab)
-
-CLEAN_FILES += arch/sh/lib64/syscalltab.h \
-	       include/asm-sh/machtypes.h
+CLEAN_FILES += include/asm-sh/machtypes.h
diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index dcc1af8..1c91b1f 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -46,6 +46,15 @@
 	  Select 7722 SolutionEngine if configuring for a Hitachi SH772
 	  evaluation board.
 
+config SH_7724_SOLUTION_ENGINE
+	bool "SolutionEngine7724"
+	select SOLUTION_ENGINE
+	depends on CPU_SUBTYPE_SH7724
+	select ARCH_REQUIRE_GPIOLIB
+	help
+	  Select 7724 SolutionEngine if configuring for a Hitachi SH7724
+	  evaluation board.
+
 config SH_7751_SOLUTION_ENGINE
 	bool "SolutionEngine7751"
 	select SOLUTION_ENGINE
@@ -121,7 +130,7 @@
 	bool "RTS7751R2D"
 	depends on CPU_SUBTYPE_SH7751R
 	select SYS_SUPPORTS_PCI
-	select IO_TRAPPED
+	select IO_TRAPPED if MMU
 	help
 	  Select RTS7751R2D if configuring for a Renesas Technology
 	  Sales SH-Graphics board.
@@ -145,13 +154,13 @@
 	bool "Highlander"
 	depends on CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785
 	select SYS_SUPPORTS_PCI
-	select IO_TRAPPED
+	select IO_TRAPPED if MMU
 
 config SH_SH7785LCR
 	bool "SH7785LCR"
 	depends on CPU_SUBTYPE_SH7785
 	select SYS_SUPPORTS_PCI
-	select IO_TRAPPED
+	select IO_TRAPPED if MMU
 
 config SH_SH7785LCR_29BIT_PHYSMAPS
 	bool "SH7785LCR 29bit physmaps"
diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index f2a2964..1c4d83e 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -535,6 +535,18 @@
 }
 device_initcall(ap325rxa_devices_setup);
 
+/* Return the board specific boot mode pin configuration */
+static int ap325rxa_mode_pins(void)
+{
+	/* MD0=0, MD1=0, MD2=0: Clock Mode 0
+	 * MD3=0: 16-bit Area0 Bus Width
+	 * MD5=1: Little Endian
+	 * TSTMD=1, MD8=1: Test Mode Disabled
+	 */
+	return MODE_PIN5 | MODE_PIN8;
+}
+
 static struct sh_machine_vector mv_ap325rxa __initmv = {
 	.mv_name = "AP-325RXA",
+	.mv_mode_pins = ap325rxa_mode_pins,
 };
diff --git a/arch/sh/boards/board-sh7785lcr.c b/arch/sh/boards/board-sh7785lcr.c
index 6f94f17..7be56fb 100644
--- a/arch/sh/boards/board-sh7785lcr.c
+++ b/arch/sh/boards/board-sh7785lcr.c
@@ -2,12 +2,12 @@
  * Renesas Technology Corp. R0P7785LC0011RL Support.
  *
  * Copyright (C) 2008  Yoshihiro Shimoda
+ * Copyright (C) 2009  Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
-
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/sm501.h>
@@ -19,8 +19,12 @@
 #include <linux/i2c-pca-platform.h>
 #include <linux/i2c-algo-pca.h>
 #include <linux/irq.h>
-#include <asm/heartbeat.h>
+#include <linux/clk.h>
+#include <linux/errno.h>
 #include <mach/sh7785lcr.h>
+#include <asm/heartbeat.h>
+#include <asm/clock.h>
+#include <cpu/sh7785.h>
 
 /*
  * NOTE: This board has 2 physical memory maps.
@@ -273,6 +277,20 @@
 	plat_irq_setup_pins(IRQ_MODE_IRQ3210);
 }
 
+static int sh7785lcr_clk_init(void)
+{
+	struct clk *clk;
+	int ret;
+
+	clk = clk_get(NULL, "extal");
+	if (!clk || IS_ERR(clk))
+		return PTR_ERR(clk);
+	ret = clk_set_rate(clk, 33333333);
+	clk_put(clk);
+
+	return ret;
+}
+
 static void sh7785lcr_power_off(void)
 {
 	unsigned char *p;
@@ -303,12 +321,34 @@
 	writel(0x000307c2, sm501_reg);
 }
 
+/* Return the board specific boot mode pin configuration */
+static int sh7785lcr_mode_pins(void)
+{
+	int value = 0;
+
+	/* These are the factory default settings of S1 and S2.
+	 * If you change these dip switches then you will need to
+	 * adjust the values below as well.
+	 */
+	value |= MODE_PIN4; /* Clock Mode 16 */
+	value |= MODE_PIN5; /* 32-bit Area0 bus width */
+	value |= MODE_PIN6; /* 32-bit Area0 bus width */
+	value |= MODE_PIN7; /* Area 0 SRAM interface [fixed] */
+	value |= MODE_PIN8; /* Little Endian */
+	value |= MODE_PIN9; /* Master Mode */
+	value |= MODE_PIN14; /* No PLL step-up */
+
+	return value;
+}
+
 /*
  * The Machine Vector
  */
 static struct sh_machine_vector mv_sh7785lcr __initmv = {
 	.mv_name		= "SH7785LCR",
 	.mv_setup		= sh7785lcr_setup,
+	.mv_clk_init		= sh7785lcr_clk_init,
 	.mv_init_irq		= init_sh7785lcr_IRQ,
+	.mv_mode_pins		= sh7785lcr_mode_pins,
 };
 
diff --git a/arch/sh/boards/mach-cayman/Makefile b/arch/sh/boards/mach-cayman/Makefile
index cafe1ac3..00fa3ea 100644
--- a/arch/sh/boards/mach-cayman/Makefile
+++ b/arch/sh/boards/mach-cayman/Makefile
@@ -1,4 +1,4 @@
 #
 # Makefile for the Hitachi Cayman specific parts of the kernel
 #
-obj-y := setup.o irq.o
+obj-y := setup.o irq.o panic.o
diff --git a/arch/sh/boards/mach-cayman/irq.c b/arch/sh/boards/mach-cayman/irq.c
index da62ad5..33f77085 100644
--- a/arch/sh/boards/mach-cayman/irq.c
+++ b/arch/sh/boards/mach-cayman/irq.c
@@ -142,26 +142,11 @@
 	return irq;
 }
 
-#if defined(CONFIG_PROC_FS) && defined(CONFIG_SYSCTL)
-int cayman_irq_describe(char* p, int irq)
-{
-	if (irq < NR_INTC_IRQS) {
-		return intc_irq_describe(p, irq);
-	} else if (irq < NR_INTC_IRQS + 8) {
-		return sprintf(p, "(SMSC %d)", irq - NR_INTC_IRQS);
-	} else if ((irq >= NR_INTC_IRQS + 24) && (irq < NR_INTC_IRQS + 32)) {
-		return sprintf(p, "(PCI2 %d)", irq - (NR_INTC_IRQS + 24));
-	}
-
-	return 0;
-}
-#endif
-
 void init_cayman_irq(void)
 {
 	int i;
 
-	epld_virt = onchip_remap(EPLD_BASE, 1024, "EPLD");
+	epld_virt = (unsigned long)ioremap_nocache(EPLD_BASE, 1024);
 	if (!epld_virt) {
 		printk(KERN_ERR "Cayman IRQ: Unable to remap EPLD\n");
 		return;
diff --git a/arch/sh/boards/mach-cayman/panic.c b/arch/sh/boards/mach-cayman/panic.c
new file mode 100644
index 0000000..d1e6730
--- /dev/null
+++ b/arch/sh/boards/mach-cayman/panic.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2003  Richard Curnow, SuperH UK Limited
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <cpu/registers.h>
+
+/* THIS IS A PHYSICAL ADDRESS */
+#define HDSP2534_ADDR (0x04002100)
+
+static void poor_mans_delay(void)
+{
+	int i;
+
+	for (i = 0; i < 2500000; i++)
+		cpu_relax();
+}
+
+static void show_value(unsigned long x)
+{
+	int i;
+	unsigned nibble;
+	for (i = 0; i < 8; i++) {
+		nibble = ((x >> (i * 4)) & 0xf);
+
+		__raw_writeb(nibble + ((nibble > 9) ? 55 : 48),
+			  HDSP2534_ADDR + 0xe0 + ((7 - i) << 2));
+	}
+}
+
+void
+panic_handler(unsigned long panicPC, unsigned long panicSSR,
+	      unsigned long panicEXPEVT)
+{
+	while (1) {
+		/* This piece of code displays the PC on the LED display */
+		show_value(panicPC);
+		poor_mans_delay();
+		show_value(panicSSR);
+		poor_mans_delay();
+		show_value(panicEXPEVT);
+		poor_mans_delay();
+	}
+}
diff --git a/arch/sh/boards/mach-cayman/setup.c b/arch/sh/boards/mach-cayman/setup.c
index e7f9cc5..7e8216a 100644
--- a/arch/sh/boards/mach-cayman/setup.c
+++ b/arch/sh/boards/mach-cayman/setup.c
@@ -102,7 +102,7 @@
 {
 	unsigned char devid, devrev;
 
-	smsc_superio_virt = onchip_remap(SMSC_SUPERIO_BASE, 1024, "SMSC SuperIO");
+	smsc_superio_virt = (unsigned long)ioremap_nocache(SMSC_SUPERIO_BASE, 1024);
 	if (!smsc_superio_virt) {
 		panic("Unable to remap SMSC SuperIO\n");
 	}
diff --git a/arch/sh/boards/mach-dreamcast/setup.c b/arch/sh/boards/mach-dreamcast/setup.c
index d1bee48..ebe9922 100644
--- a/arch/sh/boards/mach-dreamcast/setup.c
+++ b/arch/sh/boards/mach-dreamcast/setup.c
@@ -30,7 +30,6 @@
 
 extern struct irq_chip systemasic_int;
 extern void aica_time_init(void);
-extern int gapspci_init(void);
 extern int systemasic_irq_demux(int);
 
 static void __init dreamcast_setup(char **cmdline_p)
@@ -51,11 +50,6 @@
 					 handle_level_irq);
 
 	board_time_init = aica_time_init;
-
-#ifdef CONFIG_PCI
-	if (gapspci_init() < 0)
-		printk(KERN_WARNING "GAPSPCI was not detected.\n");
-#endif
 }
 
 static struct sh_machine_vector mv_dreamcast __initmv = {
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 1ee1de0..6ed401c 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -584,3 +584,22 @@
 	return platform_add_devices(migor_devices, ARRAY_SIZE(migor_devices));
 }
 __initcall(migor_devices_setup);
+
+/* Return the board specific boot mode pin configuration */
+static int migor_mode_pins(void)
+{
+	/* MD0=1, MD1=1, MD2=0: Clock Mode 3
+	 * MD3=0: 16-bit Area0 Bus Width
+	 * MD5=1: Little Endian
+	 * TSTMD=1, MD8=0: Test Mode Disabled
+	 */
+	return MODE_PIN0 | MODE_PIN1 | MODE_PIN5;
+}
+
+/*
+ * The Machine Vector
+ */
+static struct sh_machine_vector mv_migor __initmv = {
+	.mv_name		= "Migo-R",
+	.mv_mode_pins		= migor_mode_pins,
+};
diff --git a/arch/sh/boards/mach-r2d/setup.c b/arch/sh/boards/mach-r2d/setup.c
index c585be0..a625ecb 100644
--- a/arch/sh/boards/mach-r2d/setup.c
+++ b/arch/sh/boards/mach-r2d/setup.c
@@ -10,6 +10,9 @@
  */
 #include <linux/init.h>
 #include <linux/platform_device.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/physmap.h>
 #include <linux/ata_platform.h>
 #include <linux/sm501.h>
 #include <linux/sm501-regs.h>
@@ -181,6 +184,50 @@
 	.resource	= sm501_resources,
 };
 
+static struct mtd_partition r2d_partitions[] = {
+	{
+		.name		= "U-Boot",
+		.offset		= 0x00000000,
+		.size		= 0x00040000,
+		.mask_flags	= MTD_WRITEABLE,
+	}, {
+		.name		= "Environment",
+		.offset		= MTDPART_OFS_NXTBLK,
+		.size		= 0x00040000,
+		.mask_flags	= MTD_WRITEABLE,
+	}, {
+		.name		= "Kernel",
+		.offset		= MTDPART_OFS_NXTBLK,
+		.size		= 0x001c0000,
+	}, {
+		.name		= "Flash_FS",
+		.offset		= MTDPART_OFS_NXTBLK,
+		.size		= MTDPART_SIZ_FULL,
+	}
+};
+
+static struct physmap_flash_data flash_data = {
+	.width		= 2,
+	.nr_parts	= ARRAY_SIZE(r2d_partitions),
+	.parts		= r2d_partitions,
+};
+
+static struct resource flash_resource = {
+	.start		= 0x00000000,
+	.end		= 0x02000000,
+	.flags		= IORESOURCE_MEM,
+};
+
+static struct platform_device flash_device = {
+	.name		= "physmap-flash",
+	.id		= -1,
+	.resource	= &flash_resource,
+	.num_resources	= 1,
+	.dev		= {
+		.platform_data = &flash_data,
+	},
+};
+
 static struct platform_device *rts7751r2d_devices[] __initdata = {
 	&sm501_device,
 	&heartbeat_device,
@@ -203,6 +250,9 @@
 	if (register_trapped_io(&cf_trapped_io) == 0)
 		platform_device_register(&cf_ide_device);
 
+	if (mach_is_r2d_plus())
+		platform_device_register(&flash_device);
+
 	spi_register_board_info(spi_bus, ARRAY_SIZE(spi_bus));
 
 	return platform_add_devices(rts7751r2d_devices,
diff --git a/arch/sh/boards/mach-se/7724/Makefile b/arch/sh/boards/mach-se/7724/Makefile
new file mode 100644
index 0000000..349cbd6
--- /dev/null
+++ b/arch/sh/boards/mach-se/7724/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for the HITACHI UL SolutionEngine 7724 specific parts of the kernel
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+#
+
+obj-y	 := setup.o irq.o
\ No newline at end of file
diff --git a/arch/sh/boards/mach-se/7724/irq.c b/arch/sh/boards/mach-se/7724/irq.c
new file mode 100644
index 0000000..f76cf3b
--- /dev/null
+++ b/arch/sh/boards/mach-se/7724/irq.c
@@ -0,0 +1,139 @@
+/*
+ * linux/arch/sh/boards/se/7724/irq.c
+ *
+ * Copyright (C) 2009 Renesas Solutions Corp.
+ *
+ * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * Based on  linux/arch/sh/boards/se/7722/irq.c
+ * Copyright (C) 2007  Nobuhiro Iwamatsu
+ *
+ * Hitachi UL SolutionEngine 7724 Support.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <mach-se/mach/se7724.h>
+
+struct fpga_irq {
+	unsigned long  sraddr;
+	unsigned long  mraddr;
+	unsigned short mask;
+	unsigned int   base;
+};
+
+static unsigned int fpga2irq(unsigned int irq)
+{
+	if (irq >= IRQ0_BASE &&
+	    irq <= IRQ0_END)
+		return IRQ0_IRQ;
+	else if (irq >= IRQ1_BASE &&
+		 irq <= IRQ1_END)
+		return IRQ1_IRQ;
+	else
+		return IRQ2_IRQ;
+}
+
+static struct fpga_irq get_fpga_irq(unsigned int irq)
+{
+	struct fpga_irq set;
+
+	switch (irq) {
+	case IRQ0_IRQ:
+		set.sraddr = IRQ0_SR;
+		set.mraddr = IRQ0_MR;
+		set.mask   = IRQ0_MASK;
+		set.base   = IRQ0_BASE;
+		break;
+	case IRQ1_IRQ:
+		set.sraddr = IRQ1_SR;
+		set.mraddr = IRQ1_MR;
+		set.mask   = IRQ1_MASK;
+		set.base   = IRQ1_BASE;
+		break;
+	default:
+		set.sraddr = IRQ2_SR;
+		set.mraddr = IRQ2_MR;
+		set.mask   = IRQ2_MASK;
+		set.base   = IRQ2_BASE;
+		break;
+	}
+
+	return set;
+}
+
+static void disable_se7724_irq(unsigned int irq)
+{
+	struct fpga_irq set = get_fpga_irq(fpga2irq(irq));
+	unsigned int bit = irq - set.base;
+	ctrl_outw(ctrl_inw(set.mraddr) | 0x0001 << bit, set.mraddr);
+}
+
+static void enable_se7724_irq(unsigned int irq)
+{
+	struct fpga_irq set = get_fpga_irq(fpga2irq(irq));
+	unsigned int bit = irq - set.base;
+	ctrl_outw(ctrl_inw(set.mraddr) & ~(0x0001 << bit), set.mraddr);
+}
+
+static struct irq_chip se7724_irq_chip __read_mostly = {
+	.name           = "SE7724-FPGA",
+	.mask           = disable_se7724_irq,
+	.unmask         = enable_se7724_irq,
+	.mask_ack       = disable_se7724_irq,
+};
+
+static void se7724_irq_demux(unsigned int irq, struct irq_desc *desc)
+{
+	struct fpga_irq set = get_fpga_irq(irq);
+	unsigned short intv = ctrl_inw(set.sraddr);
+	struct irq_desc *ext_desc;
+	unsigned int ext_irq = set.base;
+
+	intv &= set.mask;
+
+	while (intv) {
+		if (intv & 0x0001) {
+			ext_desc = irq_desc + ext_irq;
+			handle_level_irq(ext_irq, ext_desc);
+		}
+		intv >>= 1;
+		ext_irq++;
+	}
+}
+
+/*
+ * Initialize IRQ setting
+ */
+void __init init_se7724_IRQ(void)
+{
+	int i;
+
+	ctrl_outw(0xffff, IRQ0_MR);  /* mask all */
+	ctrl_outw(0xffff, IRQ1_MR);  /* mask all */
+	ctrl_outw(0xffff, IRQ2_MR);  /* mask all */
+	ctrl_outw(0x0000, IRQ0_SR);  /* clear irq */
+	ctrl_outw(0x0000, IRQ1_SR);  /* clear irq */
+	ctrl_outw(0x0000, IRQ2_SR);  /* clear irq */
+	ctrl_outw(0x002a, IRQ_MODE); /* set irq type */
+
+	for (i = 0; i < SE7724_FPGA_IRQ_NR; i++)
+		set_irq_chip_and_handler_name(SE7724_FPGA_IRQ_BASE + i,
+					      &se7724_irq_chip,
+					      handle_level_irq, "level");
+
+	set_irq_chained_handler(IRQ0_IRQ, se7724_irq_demux);
+	set_irq_type(IRQ0_IRQ, IRQ_TYPE_LEVEL_LOW);
+
+	set_irq_chained_handler(IRQ1_IRQ, se7724_irq_demux);
+	set_irq_type(IRQ1_IRQ, IRQ_TYPE_LEVEL_LOW);
+
+	set_irq_chained_handler(IRQ2_IRQ, se7724_irq_demux);
+	set_irq_type(IRQ2_IRQ, IRQ_TYPE_LEVEL_LOW);
+}
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
new file mode 100644
index 0000000..9cd04bd
--- /dev/null
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -0,0 +1,448 @@
+/*
+ * linux/arch/sh/boards/se/7724/setup.c
+ *
+ * Copyright (C) 2009 Renesas Solutions Corp.
+ *
+ * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/physmap.h>
+#include <linux/delay.h>
+#include <linux/smc91x.h>
+#include <linux/gpio.h>
+#include <linux/input.h>
+#include <video/sh_mobile_lcdc.h>
+#include <media/sh_mobile_ceu.h>
+#include <asm/io.h>
+#include <asm/heartbeat.h>
+#include <asm/sh_keysc.h>
+#include <cpu/sh7724.h>
+#include <mach-se/mach/se7724.h>
+
+/*
+ * SWx    1234 5678
+ * ------------------------------------
+ * SW31 : 1001 1100    : default
+ * SW32 : 0111 1111    : use on board flash
+ *
+ * SW41 : abxx xxxx  -> a = 0 : Analog  monitor
+ *                          1 : Digital monitor
+ *                      b = 0 : VGA
+ *                          1 : SVGA
+ */
+
+/* Heartbeat */
+static struct heartbeat_data heartbeat_data = {
+	.regsize = 16,
+};
+
+static struct resource heartbeat_resources[] = {
+	[0] = {
+		.start  = PA_LED,
+		.end    = PA_LED,
+		.flags  = IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device heartbeat_device = {
+	.name           = "heartbeat",
+	.id             = -1,
+	.dev = {
+		.platform_data = &heartbeat_data,
+	},
+	.num_resources  = ARRAY_SIZE(heartbeat_resources),
+	.resource       = heartbeat_resources,
+};
+
+/* LAN91C111 */
+static struct smc91x_platdata smc91x_info = {
+	.flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
+};
+
+static struct resource smc91x_eth_resources[] = {
+	[0] = {
+		.name   = "SMC91C111" ,
+		.start  = 0x1a300300,
+		.end    = 0x1a30030f,
+		.flags  = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = IRQ0_SMC,
+		.flags  = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
+	},
+};
+
+static struct platform_device smc91x_eth_device = {
+	.name	= "smc91x",
+	.num_resources  = ARRAY_SIZE(smc91x_eth_resources),
+	.resource       = smc91x_eth_resources,
+	.dev	= {
+		.platform_data	= &smc91x_info,
+	},
+};
+
+/* MTD */
+static struct mtd_partition nor_flash_partitions[] = {
+	{
+		.name = "uboot",
+		.offset = 0,
+		.size = (1 * 1024 * 1024),
+		.mask_flags = MTD_WRITEABLE,	/* Read-only */
+	}, {
+		.name = "kernel",
+		.offset = MTDPART_OFS_APPEND,
+		.size = (2 * 1024 * 1024),
+	}, {
+		.name = "free-area",
+		.offset = MTDPART_OFS_APPEND,
+		.size = MTDPART_SIZ_FULL,
+	},
+};
+
+static struct physmap_flash_data nor_flash_data = {
+	.width		= 2,
+	.parts		= nor_flash_partitions,
+	.nr_parts	= ARRAY_SIZE(nor_flash_partitions),
+};
+
+static struct resource nor_flash_resources[] = {
+	[0] = {
+		.name	= "NOR Flash",
+		.start	= 0x00000000,
+		.end	= 0x01ffffff,
+		.flags	= IORESOURCE_MEM,
+	}
+};
+
+static struct platform_device nor_flash_device = {
+	.name		= "physmap-flash",
+	.resource	= nor_flash_resources,
+	.num_resources	= ARRAY_SIZE(nor_flash_resources),
+	.dev		= {
+		.platform_data = &nor_flash_data,
+	},
+};
+
+/* LCDC */
+static struct sh_mobile_lcdc_info lcdc_info = {
+	.clock_source = LCDC_CLK_EXTERNAL,
+	.ch[0] = {
+		.chan = LCDC_CHAN_MAINLCD,
+		.bpp = 16,
+		.clock_divider = 1,
+		.lcd_cfg = {
+			.name = "LB070WV1",
+			.sync = 0, /* hsync and vsync are active low */
+		},
+		.lcd_size_cfg = { /* 7.0 inch */
+			.width = 152,
+			.height = 91,
+		},
+		.board_cfg = {
+		},
+	}
+};
+
+static struct resource lcdc_resources[] = {
+	[0] = {
+		.name	= "LCDC",
+		.start	= 0xfe940000,
+		.end	= 0xfe941fff,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 106,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device lcdc_device = {
+	.name		= "sh_mobile_lcdc_fb",
+	.num_resources	= ARRAY_SIZE(lcdc_resources),
+	.resource	= lcdc_resources,
+	.dev		= {
+		.platform_data	= &lcdc_info,
+	},
+};
+
+/* CEU0 */
+static struct sh_mobile_ceu_info sh_mobile_ceu0_info = {
+	.flags = SH_CEU_FLAG_USE_8BIT_BUS,
+};
+
+static struct resource ceu0_resources[] = {
+	[0] = {
+		.name	= "CEU0",
+		.start	= 0xfe910000,
+		.end	= 0xfe91009f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = 52,
+		.flags  = IORESOURCE_IRQ,
+	},
+	[2] = {
+		/* place holder for contiguous memory */
+	},
+};
+
+static struct platform_device ceu0_device = {
+	.name		= "sh_mobile_ceu",
+	.id             = 0, /* "ceu0" clock */
+	.num_resources	= ARRAY_SIZE(ceu0_resources),
+	.resource	= ceu0_resources,
+	.dev	= {
+		.platform_data	= &sh_mobile_ceu0_info,
+	},
+};
+
+/* CEU1 */
+static struct sh_mobile_ceu_info sh_mobile_ceu1_info = {
+	.flags = SH_CEU_FLAG_USE_8BIT_BUS,
+};
+
+static struct resource ceu1_resources[] = {
+	[0] = {
+		.name	= "CEU1",
+		.start	= 0xfe914000,
+		.end	= 0xfe91409f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = 63,
+		.flags  = IORESOURCE_IRQ,
+	},
+	[2] = {
+		/* place holder for contiguous memory */
+	},
+};
+
+static struct platform_device ceu1_device = {
+	.name		= "sh_mobile_ceu",
+	.id             = 1, /* "ceu1" clock */
+	.num_resources	= ARRAY_SIZE(ceu1_resources),
+	.resource	= ceu1_resources,
+	.dev	= {
+		.platform_data	= &sh_mobile_ceu1_info,
+	},
+};
+
+/* KEYSC */
+static struct sh_keysc_info keysc_info = {
+	.mode = SH_KEYSC_MODE_1,
+	.scan_timing = 10,
+	.delay = 50,
+	.keycodes = {
+		KEY_1, KEY_2, KEY_3, KEY_4, KEY_5,
+		KEY_6, KEY_7, KEY_8, KEY_9, KEY_A,
+		KEY_B, KEY_C, KEY_D, KEY_E, KEY_F,
+		KEY_G, KEY_H, KEY_I, KEY_K, KEY_L,
+		KEY_M, KEY_N, KEY_O, KEY_P, KEY_Q,
+		KEY_R, KEY_S, KEY_T, KEY_U, KEY_V,
+	},
+};
+
+static struct resource keysc_resources[] = {
+	[0] = {
+		.start  = 0x1a204000,
+		.end    = 0x1a20400f,
+		.flags  = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = IRQ0_KEY,
+		.flags  = IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device keysc_device = {
+	.name           = "sh_keysc",
+	.id             = 0, /* "keysc0" clock */
+	.num_resources  = ARRAY_SIZE(keysc_resources),
+	.resource       = keysc_resources,
+	.dev	= {
+		.platform_data	= &keysc_info,
+	},
+};
+
+static struct platform_device *ms7724se_devices[] __initdata = {
+	&heartbeat_device,
+	&smc91x_eth_device,
+	&lcdc_device,
+	&nor_flash_device,
+	&ceu0_device,
+	&ceu1_device,
+	&keysc_device,
+};
+
+#define SW4140    0xBA201000
+#define FPGA_OUT  0xBA200400
+#define PORT_HIZA 0xA4050158
+
+#define SW41_A    0x0100
+#define SW41_B    0x0200
+#define SW41_C    0x0400
+#define SW41_D    0x0800
+#define SW41_E    0x1000
+#define SW41_F    0x2000
+#define SW41_G    0x4000
+#define SW41_H    0x8000
+static int __init devices_setup(void)
+{
+	u16 sw = ctrl_inw(SW4140); /* select camera, monitor */
+
+	/* Reset Release */
+	ctrl_outw(ctrl_inw(FPGA_OUT) &
+		  ~((1 << 1)  | /* LAN */
+		    (1 << 6)  | /* VIDEO DAC */
+		    (1 << 12)), /* USB0 */
+		  FPGA_OUT);
+
+	/* enable IRQ 0,1,2 */
+	gpio_request(GPIO_FN_INTC_IRQ0, NULL);
+	gpio_request(GPIO_FN_INTC_IRQ1, NULL);
+	gpio_request(GPIO_FN_INTC_IRQ2, NULL);
+
+	/* enable SCIFA3 */
+	gpio_request(GPIO_FN_SCIF3_I_SCK, NULL);
+	gpio_request(GPIO_FN_SCIF3_I_RXD, NULL);
+	gpio_request(GPIO_FN_SCIF3_I_TXD, NULL);
+	gpio_request(GPIO_FN_SCIF3_I_CTS, NULL);
+	gpio_request(GPIO_FN_SCIF3_I_RTS, NULL);
+
+	/* enable LCDC */
+	gpio_request(GPIO_FN_LCDD23,   NULL);
+	gpio_request(GPIO_FN_LCDD22,   NULL);
+	gpio_request(GPIO_FN_LCDD21,   NULL);
+	gpio_request(GPIO_FN_LCDD20,   NULL);
+	gpio_request(GPIO_FN_LCDD19,   NULL);
+	gpio_request(GPIO_FN_LCDD18,   NULL);
+	gpio_request(GPIO_FN_LCDD17,   NULL);
+	gpio_request(GPIO_FN_LCDD16,   NULL);
+	gpio_request(GPIO_FN_LCDD15,   NULL);
+	gpio_request(GPIO_FN_LCDD14,   NULL);
+	gpio_request(GPIO_FN_LCDD13,   NULL);
+	gpio_request(GPIO_FN_LCDD12,   NULL);
+	gpio_request(GPIO_FN_LCDD11,   NULL);
+	gpio_request(GPIO_FN_LCDD10,   NULL);
+	gpio_request(GPIO_FN_LCDD9,    NULL);
+	gpio_request(GPIO_FN_LCDD8,    NULL);
+	gpio_request(GPIO_FN_LCDD7,    NULL);
+	gpio_request(GPIO_FN_LCDD6,    NULL);
+	gpio_request(GPIO_FN_LCDD5,    NULL);
+	gpio_request(GPIO_FN_LCDD4,    NULL);
+	gpio_request(GPIO_FN_LCDD3,    NULL);
+	gpio_request(GPIO_FN_LCDD2,    NULL);
+	gpio_request(GPIO_FN_LCDD1,    NULL);
+	gpio_request(GPIO_FN_LCDD0,    NULL);
+	gpio_request(GPIO_FN_LCDDISP,  NULL);
+	gpio_request(GPIO_FN_LCDHSYN,  NULL);
+	gpio_request(GPIO_FN_LCDDCK,   NULL);
+	gpio_request(GPIO_FN_LCDVSYN,  NULL);
+	gpio_request(GPIO_FN_LCDDON,   NULL);
+	gpio_request(GPIO_FN_LCDVEPWC, NULL);
+	gpio_request(GPIO_FN_LCDVCPWC, NULL);
+	gpio_request(GPIO_FN_LCDRD,    NULL);
+	gpio_request(GPIO_FN_LCDLCLK,  NULL);
+	ctrl_outw((ctrl_inw(PORT_HIZA) & ~0x0001), PORT_HIZA);
+
+	/* enable CEU0 */
+	gpio_request(GPIO_FN_VIO0_D15, NULL);
+	gpio_request(GPIO_FN_VIO0_D14, NULL);
+	gpio_request(GPIO_FN_VIO0_D13, NULL);
+	gpio_request(GPIO_FN_VIO0_D12, NULL);
+	gpio_request(GPIO_FN_VIO0_D11, NULL);
+	gpio_request(GPIO_FN_VIO0_D10, NULL);
+	gpio_request(GPIO_FN_VIO0_D9,  NULL);
+	gpio_request(GPIO_FN_VIO0_D8,  NULL);
+	gpio_request(GPIO_FN_VIO0_D7,  NULL);
+	gpio_request(GPIO_FN_VIO0_D6,  NULL);
+	gpio_request(GPIO_FN_VIO0_D5,  NULL);
+	gpio_request(GPIO_FN_VIO0_D4,  NULL);
+	gpio_request(GPIO_FN_VIO0_D3,  NULL);
+	gpio_request(GPIO_FN_VIO0_D2,  NULL);
+	gpio_request(GPIO_FN_VIO0_D1,  NULL);
+	gpio_request(GPIO_FN_VIO0_D0,  NULL);
+	gpio_request(GPIO_FN_VIO0_VD,  NULL);
+	gpio_request(GPIO_FN_VIO0_CLK, NULL);
+	gpio_request(GPIO_FN_VIO0_FLD, NULL);
+	gpio_request(GPIO_FN_VIO0_HD,  NULL);
+	platform_resource_setup_memory(&ceu0_device, "ceu", 4 << 20);
+
+	/* enable CEU1 */
+	gpio_request(GPIO_FN_VIO1_D7,  NULL);
+	gpio_request(GPIO_FN_VIO1_D6,  NULL);
+	gpio_request(GPIO_FN_VIO1_D5,  NULL);
+	gpio_request(GPIO_FN_VIO1_D4,  NULL);
+	gpio_request(GPIO_FN_VIO1_D3,  NULL);
+	gpio_request(GPIO_FN_VIO1_D2,  NULL);
+	gpio_request(GPIO_FN_VIO1_D1,  NULL);
+	gpio_request(GPIO_FN_VIO1_D0,  NULL);
+	gpio_request(GPIO_FN_VIO1_FLD, NULL);
+	gpio_request(GPIO_FN_VIO1_HD,  NULL);
+	gpio_request(GPIO_FN_VIO1_VD,  NULL);
+	gpio_request(GPIO_FN_VIO1_CLK, NULL);
+	platform_resource_setup_memory(&ceu1_device, "ceu", 4 << 20);
+
+	/* KEYSC */
+	gpio_request(GPIO_FN_KEYOUT5_IN5, NULL);
+	gpio_request(GPIO_FN_KEYOUT4_IN6, NULL);
+	gpio_request(GPIO_FN_KEYIN4,      NULL);
+	gpio_request(GPIO_FN_KEYIN3,      NULL);
+	gpio_request(GPIO_FN_KEYIN2,      NULL);
+	gpio_request(GPIO_FN_KEYIN1,      NULL);
+	gpio_request(GPIO_FN_KEYIN0,      NULL);
+	gpio_request(GPIO_FN_KEYOUT3,     NULL);
+	gpio_request(GPIO_FN_KEYOUT2,     NULL);
+	gpio_request(GPIO_FN_KEYOUT1,     NULL);
+	gpio_request(GPIO_FN_KEYOUT0,     NULL);
+
+	if (sw & SW41_B) {
+		/* SVGA */
+		lcdc_info.ch[0].lcd_cfg.xres         = 800;
+		lcdc_info.ch[0].lcd_cfg.yres         = 600;
+		lcdc_info.ch[0].lcd_cfg.left_margin  = 142;
+		lcdc_info.ch[0].lcd_cfg.right_margin = 52;
+		lcdc_info.ch[0].lcd_cfg.hsync_len    = 96;
+		lcdc_info.ch[0].lcd_cfg.upper_margin = 24;
+		lcdc_info.ch[0].lcd_cfg.lower_margin = 2;
+		lcdc_info.ch[0].lcd_cfg.vsync_len    = 2;
+	} else {
+		/* VGA */
+		lcdc_info.ch[0].lcd_cfg.xres         = 640;
+		lcdc_info.ch[0].lcd_cfg.yres         = 480;
+		lcdc_info.ch[0].lcd_cfg.left_margin  = 105;
+		lcdc_info.ch[0].lcd_cfg.right_margin = 50;
+		lcdc_info.ch[0].lcd_cfg.hsync_len    = 96;
+		lcdc_info.ch[0].lcd_cfg.upper_margin = 33;
+		lcdc_info.ch[0].lcd_cfg.lower_margin = 10;
+		lcdc_info.ch[0].lcd_cfg.vsync_len    = 2;
+	}
+
+	if (sw & SW41_A) {
+		/* Digital monitor */
+		lcdc_info.ch[0].interface_type = RGB18;
+		lcdc_info.ch[0].flags          = 0;
+	} else {
+		/* Analog monitor */
+		lcdc_info.ch[0].interface_type = RGB24;
+		lcdc_info.ch[0].flags          = LCDC_FLAGS_DWPOL;
+	}
+
+	return platform_add_devices(ms7724se_devices,
+				ARRAY_SIZE(ms7724se_devices));
+}
+device_initcall(devices_setup);
+
+static struct sh_machine_vector mv_ms7724se __initmv = {
+	.mv_name	= "ms7724se",
+	.mv_init_irq	= init_se7724_IRQ,
+	.mv_nr_irqs	= SE7724_FPGA_IRQ_BASE + SE7724_FPGA_IRQ_NR,
+};
diff --git a/arch/sh/boards/mach-se/7751/Makefile b/arch/sh/boards/mach-se/7751/Makefile
index dbc29f3..e6f4341 100644
--- a/arch/sh/boards/mach-se/7751/Makefile
+++ b/arch/sh/boards/mach-se/7751/Makefile
@@ -3,5 +3,3 @@
 #
 
 obj-y	 := setup.o io.o irq.o
-
-obj-$(CONFIG_PCI) += pci.o
diff --git a/arch/sh/boards/mach-se/7751/io.c b/arch/sh/boards/mach-se/7751/io.c
index 6287ae5..6e75bd4 100644
--- a/arch/sh/boards/mach-se/7751/io.c
+++ b/arch/sh/boards/mach-se/7751/io.c
@@ -34,8 +34,6 @@
 {
 	if (PXSEG(port))
 		return *(volatile unsigned char *)port;
-	else if (is_pci_ioaddr(port))
-		return *(volatile unsigned char *)pci_ioaddr(port);
 	else
 		return (*port2adr(port)) & 0xff;
 }
@@ -46,8 +44,6 @@
 
         if (PXSEG(port))
                 v = *(volatile unsigned char *)port;
-	else if (is_pci_ioaddr(port))
-                v = *(volatile unsigned char *)pci_ioaddr(port);
 	else
 		v = (*port2adr(port)) & 0xff;
 	ctrl_delay();
@@ -58,8 +54,6 @@
 {
         if (PXSEG(port))
                 return *(volatile unsigned short *)port;
-	else if (is_pci_ioaddr(port))
-                return *(volatile unsigned short *)pci_ioaddr(port);
 	else if (port >= 0x2000)
 		return *port2adr(port);
 	else
@@ -71,8 +65,6 @@
 {
         if (PXSEG(port))
                 return *(volatile unsigned long *)port;
-	else if (is_pci_ioaddr(port))
-                return *(volatile unsigned int *)pci_ioaddr(port);
 	else if (port >= 0x2000)
 		return *port2adr(port);
 	else
@@ -85,8 +77,6 @@
 
         if (PXSEG(port))
                 *(volatile unsigned char *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned char*)pci_ioaddr(port)) = value;
 	else
 		*(port2adr(port)) = value;
 }
@@ -95,8 +85,6 @@
 {
         if (PXSEG(port))
                 *(volatile unsigned char *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned char*)pci_ioaddr(port)) = value;
 	else
 		*(port2adr(port)) = value;
 	ctrl_delay();
@@ -106,8 +94,6 @@
 {
         if (PXSEG(port))
                 *(volatile unsigned short *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned short *)pci_ioaddr(port)) = value;
 	else if (port >= 0x2000)
 		*port2adr(port) = value;
 	else
@@ -118,8 +104,6 @@
 {
         if (PXSEG(port))
                 *(volatile unsigned long *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned long*)pci_ioaddr(port)) = value;
 	else
 		maybebadio(port);
 }
diff --git a/arch/sh/boards/mach-se/7751/pci.c b/arch/sh/boards/mach-se/7751/pci.c
deleted file mode 100644
index 203b292..0000000
--- a/arch/sh/boards/mach-se/7751/pci.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * linux/arch/sh/boards/se/7751/pci.c
- *
- * Author:  Ian DaSilva (idasilva@mvista.com)
- *
- * Highly leveraged from pci-bigsur.c, written by Dustin McIntire.
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- *
- * PCI initialization for the Hitachi SH7751 Solution Engine board (MS7751SE01)
- */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-
-#include <asm/io.h>
-#include "../../../drivers/pci/pci-sh7751.h"
-
-#define PCIMCR_MRSET_OFF	0xBFFFFFFF
-#define PCIMCR_RFSH_OFF		0xFFFFFFFB
-
-/*
- * Only long word accesses of the PCIC's internal local registers and the
- * configuration registers from the CPU is supported.
- */
-#define PCIC_WRITE(x,v) writel((v), PCI_REG(x))
-#define PCIC_READ(x) readl(PCI_REG(x))
-
-/*
- * Description:  This function sets up and initializes the pcic, sets
- * up the BARS, maps the DRAM into the address space etc, etc.
- */
-int __init pcibios_init_platform(void)
-{
-   unsigned long bcr1, wcr1, wcr2, wcr3, mcr;
-   unsigned short bcr2;
-
-   /*
-    * Initialize the slave bus controller on the pcic.  The values used
-    * here should not be hardcoded, but they should be taken from the bsc
-    * on the processor, to make this function as generic as possible.
-    * (i.e. Another sbc may usr different SDRAM timing settings -- in order
-    * for the pcic to work, its settings need to be exactly the same.)
-    */
-   bcr1 = (*(volatile unsigned long*)(SH7751_BCR1));
-   bcr2 = (*(volatile unsigned short*)(SH7751_BCR2));
-   wcr1 = (*(volatile unsigned long*)(SH7751_WCR1));
-   wcr2 = (*(volatile unsigned long*)(SH7751_WCR2));
-   wcr3 = (*(volatile unsigned long*)(SH7751_WCR3));
-   mcr = (*(volatile unsigned long*)(SH7751_MCR));
-
-   bcr1 = bcr1 | 0x00080000;  /* Enable Bit 19, BREQEN */
-   (*(volatile unsigned long*)(SH7751_BCR1)) = bcr1;   
-
-   bcr1 = bcr1 | 0x40080000;  /* Enable Bit 19 BREQEN, set PCIC to slave */
-   PCIC_WRITE(SH7751_PCIBCR1, bcr1);	 /* PCIC BCR1 */
-   PCIC_WRITE(SH7751_PCIBCR2, bcr2);     /* PCIC BCR2 */
-   PCIC_WRITE(SH7751_PCIWCR1, wcr1);     /* PCIC WCR1 */
-   PCIC_WRITE(SH7751_PCIWCR2, wcr2);     /* PCIC WCR2 */
-   PCIC_WRITE(SH7751_PCIWCR3, wcr3);     /* PCIC WCR3 */
-   mcr = (mcr & PCIMCR_MRSET_OFF) & PCIMCR_RFSH_OFF;
-   PCIC_WRITE(SH7751_PCIMCR, mcr);      /* PCIC MCR */
-
-
-   /* Enable all interrupts, so we know what to fix */
-   PCIC_WRITE(SH7751_PCIINTM, 0x0000c3ff);
-   PCIC_WRITE(SH7751_PCIAINTM, 0x0000380f);
-
-   /* Set up standard PCI config registers */
-   PCIC_WRITE(SH7751_PCICONF1, 	0xF39000C7); /* Bus Master, Mem & I/O access */
-   PCIC_WRITE(SH7751_PCICONF2, 	0x00000000); /* PCI Class code & Revision ID */
-   PCIC_WRITE(SH7751_PCICONF4, 	0xab000001); /* PCI I/O address (local regs) */
-   PCIC_WRITE(SH7751_PCICONF5, 	0x0c000000); /* PCI MEM address (local RAM)  */
-   PCIC_WRITE(SH7751_PCICONF6, 	0xd0000000); /* PCI MEM address (unused)     */
-   PCIC_WRITE(SH7751_PCICONF11, 0x35051054); /* PCI Subsystem ID & Vendor ID */
-   PCIC_WRITE(SH7751_PCILSR0, 0x03f00000);   /* MEM (full 64M exposed)       */
-   PCIC_WRITE(SH7751_PCILSR1, 0x00000000);   /* MEM (unused)                 */
-   PCIC_WRITE(SH7751_PCILAR0, 0x0c000000);   /* MEM (direct map from PCI)    */
-   PCIC_WRITE(SH7751_PCILAR1, 0x00000000);   /* MEM (unused)                 */
-
-   /* Now turn it on... */
-   PCIC_WRITE(SH7751_PCICR, 0xa5000001);
-
-   /*
-    * Set PCIMBR and PCIIOBR here, assuming a single window
-    * (16M MEM, 256K IO) is enough.  If a larger space is
-    * needed, the readx/writex and inx/outx functions will
-    * have to do more (e.g. setting registers for each call).
-    */
-
-   /*
-    * Set the MBR so PCI address is one-to-one with window,
-    * meaning all calls go straight through... use BUG_ON to
-    * catch erroneous assumption.
-    */
-   BUG_ON(PCIBIOS_MIN_MEM != SH7751_PCI_MEMORY_BASE);
-
-   PCIC_WRITE(SH7751_PCIMBR, PCIBIOS_MIN_MEM);
-
-   /* Set IOBR for window containing area specified in pci.h */
-   PCIC_WRITE(SH7751_PCIIOBR, (PCIBIOS_MIN_IO & SH7751_PCIIOBR_MASK));
-
-   /* All done, may as well say so... */
-   printk("SH7751 PCI: Finished initialization of the PCI controller\n");
-
-   return 1;
-}
-
-int __init pcibios_map_platform_irq(u8 slot, u8 pin)
-{
-        switch (slot) {
-        case 0: return 13;
-        case 1: return 13; 	/* AMD Ethernet controller */
-        case 2: return -1;
-        case 3: return -1;
-        case 4: return -1;
-        default:
-                printk("PCI: Bad IRQ mapping request for slot %d\n", slot);
-                return -1;
-        }
-}
-
-static struct resource sh7751_io_resource = {
-	.name   = "SH7751 IO",
-	.start  = SH7751_PCI_IO_BASE,
-	.end    = SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1,
-	.flags  = IORESOURCE_IO
-};
-
-static struct resource sh7751_mem_resource = {
-	.name   = "SH7751 mem",
-	.start  = SH7751_PCI_MEMORY_BASE,
-	.end    = SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1,
-	.flags  = IORESOURCE_MEM
-};
-
-extern struct pci_ops sh7751_pci_ops;
-
-struct pci_channel board_pci_channels[] = {
-	{ &sh7751_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-
diff --git a/arch/sh/boards/mach-se/7780/irq.c b/arch/sh/boards/mach-se/7780/irq.c
index 66ad292..b8d43b6 100644
--- a/arch/sh/boards/mach-se/7780/irq.c
+++ b/arch/sh/boards/mach-se/7780/irq.c
@@ -12,10 +12,13 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
-#include <asm/irq.h>
-#include <asm/io.h>
+#include <linux/irq.h>
+#include <linux/io.h>
 #include <mach-se/mach/se7780.h>
 
+#define INTC_BASE	0xffd00000
+#define INTC_ICR1	(INTC_BASE+0x1c)
+
 /*
  * Initialize IRQ setting
  */
@@ -43,4 +46,24 @@
 	ctrl_outw((IRQPIN_PCCPW << IRQPOS_PCCPW), FPGA_INTSEL3);
 
 	plat_irq_setup_pins(IRQ_MODE_IRQ); /* install handlers for IRQ0-7 */
+
+	/* ICR1: detect low level(for 2ndcut) */
+	ctrl_outl(0xAAAA0000, INTC_ICR1);
+
+	/*
+	 * FPGA PCISEL register initialize
+	 *
+	 *  CPU  || SLOT1 | SLOT2 | S-ATA | USB
+	 *  -------------------------------------
+	 *  INTA || INTA  | INTD  |  --   | INTB
+	 *  -------------------------------------
+	 *  INTB || INTB  | INTA  |  --   | INTC
+	 *  -------------------------------------
+	 *  INTC || INTC  | INTB  | INTA  |  --
+	 *  -------------------------------------
+	 *  INTD || INTD  | INTC  |  --   | INTA
+	 *  -------------------------------------
+	 */
+	ctrl_outw(0x0013, FPGA_PCI_INTSEL1);
+	ctrl_outw(0xE402, FPGA_PCI_INTSEL2);
 }
diff --git a/arch/sh/boards/mach-se/Makefile b/arch/sh/boards/mach-se/Makefile
index 2de42ba..b537e23 100644
--- a/arch/sh/boards/mach-se/Makefile
+++ b/arch/sh/boards/mach-se/Makefile
@@ -7,3 +7,4 @@
 obj-$(CONFIG_SH_7780_SOLUTION_ENGINE)	+= 7780/
 obj-$(CONFIG_SH_7343_SOLUTION_ENGINE)	+= 7343/
 obj-$(CONFIG_SH_7721_SOLUTION_ENGINE)	+= 7721/
+obj-$(CONFIG_SH_7724_SOLUTION_ENGINE)	+= 7724/
diff --git a/arch/sh/boards/mach-sh03/rtc.c b/arch/sh/boards/mach-sh03/rtc.c
index 0a9266b..a8b9f84 100644
--- a/arch/sh/boards/mach-sh03/rtc.c
+++ b/arch/sh/boards/mach-sh03/rtc.c
@@ -35,13 +35,13 @@
 #define RTC_BUSY	1
 #define RTC_STOP	2
 
-extern spinlock_t rtc_lock;
+static DEFINE_SPINLOCK(sh03_rtc_lock);
 
 unsigned long get_cmos_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec;
 
-	spin_lock(&rtc_lock);
+	spin_lock(&sh03_rtc_lock);
  again:
 	do {
 		sec  = (ctrl_inb(RTC_SEC1) & 0xf) + (ctrl_inb(RTC_SEC10) & 0x7) * 10;
@@ -73,7 +73,7 @@
 		goto again;
 	}
 
-	spin_unlock(&rtc_lock);
+	spin_unlock(&sh03_rtc_lock);
 	return mktime(year, mon, day, hour, min, sec);
 }
 
@@ -91,7 +91,7 @@
 	int i;
 
 	/* gets recalled with irq locally disabled */
-	spin_lock(&rtc_lock);
+	spin_lock(&sh03_rtc_lock);
 	for (i = 0 ; i < 1000000 ; i++)	/* may take up to 1 second... */
 		if (!(ctrl_inb(RTC_CTL) & RTC_BUSY))
 			break;
@@ -113,7 +113,7 @@
 		       cmos_minutes, real_minutes);
 		retval = -1;
 	}
-	spin_unlock(&rtc_lock);
+	spin_unlock(&sh03_rtc_lock);
 
 	return retval;
 }
diff --git a/arch/sh/boards/mach-snapgear/io.c b/arch/sh/boards/mach-snapgear/io.c
index 0f48242..476650e 100644
--- a/arch/sh/boards/mach-snapgear/io.c
+++ b/arch/sh/boards/mach-snapgear/io.c
@@ -36,8 +36,6 @@
 {
 	if (PXSEG(port))
 		return *(volatile unsigned char *)port;
-	else if (is_pci_ioaddr(port))
-		return *(volatile unsigned char *)pci_ioaddr(port);
 	else
 		return (*port2adr(port)) & 0xff;
 }
@@ -48,8 +46,6 @@
 
 	if (PXSEG(port))
 		v = *(volatile unsigned char *)port;
-	else if (is_pci_ioaddr(port))
-		v = *(volatile unsigned char *)pci_ioaddr(port);
 	else
 		v = (*port2adr(port))&0xff;
 	ctrl_delay();
@@ -60,8 +56,6 @@
 {
 	if (PXSEG(port))
 		return *(volatile unsigned short *)port;
-	else if (is_pci_ioaddr(port))
-		return *(volatile unsigned short *)pci_ioaddr(port);
 	else if (port >= 0x2000)
 		return *port2adr(port);
 	else
@@ -73,8 +67,6 @@
 {
 	if (PXSEG(port))
 		return *(volatile unsigned long *)port;
-	else if (is_pci_ioaddr(port))
-		return *(volatile unsigned int *)pci_ioaddr(port);
 	else if (port >= 0x2000)
 		return *port2adr(port);
 	else
@@ -87,8 +79,6 @@
 
 	if (PXSEG(port))
 		*(volatile unsigned char *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned char*)pci_ioaddr(port)) = value;
 	else
 		*(port2adr(port)) = value;
 }
@@ -97,8 +87,6 @@
 {
 	if (PXSEG(port))
 		*(volatile unsigned char *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned char*)pci_ioaddr(port)) = value;
 	else
 		*(port2adr(port)) = value;
 	ctrl_delay();
@@ -108,8 +96,6 @@
 {
 	if (PXSEG(port))
 		*(volatile unsigned short *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned short *)pci_ioaddr(port)) = value;
 	else if (port >= 0x2000)
 		*port2adr(port) = value;
 	else
@@ -120,8 +106,6 @@
 {
 	if (PXSEG(port))
 		*(volatile unsigned long *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned long*)pci_ioaddr(port)) = value;
 	else
 		maybebadio(port);
 }
diff --git a/arch/sh/boards/mach-systemh/io.c b/arch/sh/boards/mach-systemh/io.c
index dec3db0..15577ff 100644
--- a/arch/sh/boards/mach-systemh/io.c
+++ b/arch/sh/boards/mach-systemh/io.c
@@ -35,8 +35,6 @@
 {
 	if (PXSEG(port))
 		return *(volatile unsigned char *)port;
-	else if (is_pci_ioaddr(port))
-		return *(volatile unsigned char *)pci_ioaddr(port);
 	else if (port <= 0x3F1)
 		return *(volatile unsigned char *)ETHER_IOMAP(port);
 	else
@@ -49,8 +47,6 @@
 
         if (PXSEG(port))
                 v = *(volatile unsigned char *)port;
-	else if (is_pci_ioaddr(port))
-                v = *(volatile unsigned char *)pci_ioaddr(port);
 	else if (port <= 0x3F1)
 		v = *(volatile unsigned char *)ETHER_IOMAP(port);
 	else
@@ -63,8 +59,6 @@
 {
         if (PXSEG(port))
                 return *(volatile unsigned short *)port;
-	else if (is_pci_ioaddr(port))
-                return *(volatile unsigned short *)pci_ioaddr(port);
 	else if (port >= 0x2000)
 		return *port2adr(port);
 	else if (port <= 0x3F1)
@@ -78,8 +72,6 @@
 {
         if (PXSEG(port))
                 return *(volatile unsigned long *)port;
-	else if (is_pci_ioaddr(port))
-                return *(volatile unsigned int *)pci_ioaddr(port);
 	else if (port >= 0x2000)
 		return *port2adr(port);
 	else if (port <= 0x3F1)
@@ -94,8 +86,6 @@
 
         if (PXSEG(port))
                 *(volatile unsigned char *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned char*)pci_ioaddr(port)) = value;
 	else if (port <= 0x3F1)
 		*(volatile unsigned char *)ETHER_IOMAP(port) = value;
 	else
@@ -106,8 +96,6 @@
 {
         if (PXSEG(port))
                 *(volatile unsigned char *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned char*)pci_ioaddr(port)) = value;
 	else if (port <= 0x3F1)
 		*(volatile unsigned char *)ETHER_IOMAP(port) = value;
 	else
@@ -119,8 +107,6 @@
 {
         if (PXSEG(port))
                 *(volatile unsigned short *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned short *)pci_ioaddr(port)) = value;
 	else if (port >= 0x2000)
 		*port2adr(port) = value;
 	else if (port <= 0x3F1)
@@ -133,8 +119,6 @@
 {
         if (PXSEG(port))
                 *(volatile unsigned long *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned long*)pci_ioaddr(port)) = value;
 	else
 		maybebadio(port);
 }
diff --git a/arch/sh/boards/mach-titan/io.c b/arch/sh/boards/mach-titan/io.c
index 4badad4..0130e98 100644
--- a/arch/sh/boards/mach-titan/io.c
+++ b/arch/sh/boards/mach-titan/io.c
@@ -17,8 +17,6 @@
 {
         if (PXSEG(port))
                 return ctrl_inb(port);
-        else if (is_pci_ioaddr(port))
-                return ctrl_inb(pci_ioaddr(port));
         return ctrl_inw(port2adr(port)) & 0xff;
 }
 
@@ -28,8 +26,6 @@
 
         if (PXSEG(port))
                 v = ctrl_inb(port);
-        else if (is_pci_ioaddr(port))
-                v = ctrl_inb(pci_ioaddr(port));
         else
                 v = ctrl_inw(port2adr(port)) & 0xff;
         ctrl_delay();
@@ -40,8 +36,6 @@
 {
         if (PXSEG(port))
                 return ctrl_inw(port);
-        else if (is_pci_ioaddr(port))
-                return ctrl_inw(pci_ioaddr(port));
         else if (port >= 0x2000)
                 return ctrl_inw(port2adr(port));
         else
@@ -53,8 +47,6 @@
 {
         if (PXSEG(port))
                 return ctrl_inl(port);
-        else if (is_pci_ioaddr(port))
-                return ctrl_inl(pci_ioaddr(port));
         else if (port >= 0x2000)
                 return ctrl_inw(port2adr(port));
         else
@@ -66,8 +58,6 @@
 {
         if (PXSEG(port))
                 ctrl_outb(value, port);
-        else if (is_pci_ioaddr(port))
-                ctrl_outb(value, pci_ioaddr(port));
         else
                 ctrl_outw(value, port2adr(port));
 }
@@ -76,8 +66,6 @@
 {
         if (PXSEG(port))
                 ctrl_outb(value, port);
-        else if (is_pci_ioaddr(port))
-                ctrl_outb(value, pci_ioaddr(port));
         else
                 ctrl_outw(value, port2adr(port));
         ctrl_delay();
@@ -87,8 +75,6 @@
 {
         if (PXSEG(port))
                 ctrl_outw(value, port);
-        else if (is_pci_ioaddr(port))
-                ctrl_outw(value, pci_ioaddr(port));
         else if (port >= 0x2000)
                 ctrl_outw(value, port2adr(port));
         else
@@ -99,8 +85,6 @@
 {
         if (PXSEG(port))
                 ctrl_outl(value, port);
-        else if (is_pci_ioaddr(port))
-                ctrl_outl(value, pci_ioaddr(port));
         else
                 maybebadio(port);
 }
@@ -117,10 +101,8 @@
 
 void __iomem *titan_ioport_map(unsigned long port, unsigned int size)
 {
-	if (PXSEG(port) || is_pci_memaddr(port))
+	if (PXSEG(port))
 		return (void __iomem *)port;
-	else if (is_pci_ioaddr(port))
-		return (void __iomem *)pci_ioaddr(port);
 
 	return (void __iomem *)port2adr(port);
 }
diff --git a/arch/sh/boot/Makefile b/arch/sh/boot/Makefile
index 95483d1..78efb04 100644
--- a/arch/sh/boot/Makefile
+++ b/arch/sh/boot/Makefile
@@ -20,9 +20,6 @@
 CONFIG_ZERO_PAGE_OFFSET	?= 0x00001000
 CONFIG_ENTRY_OFFSET	?= 0x00001000
 
-export CONFIG_PAGE_OFFSET CONFIG_MEMORY_START CONFIG_BOOT_LINK_OFFSET \
-       CONFIG_ZERO_PAGE_OFFSET CONFIG_ENTRY_OFFSET
-
 targets := zImage vmlinux.srec uImage uImage.srec
 subdir- := compressed
 
@@ -43,6 +40,9 @@
 		     $$[$(CONFIG_MEMORY_START)]')
 endif
 
+export CONFIG_PAGE_OFFSET CONFIG_MEMORY_START CONFIG_BOOT_LINK_OFFSET \
+       CONFIG_ZERO_PAGE_OFFSET CONFIG_ENTRY_OFFSET KERNEL_MEMORY
+
 KERNEL_LOAD	:= $(shell /bin/bash -c 'printf "0x%08x" \
 		     $$[$(CONFIG_PAGE_OFFSET)  + \
 			$(KERNEL_MEMORY) + \
diff --git a/arch/sh/boot/compressed/Makefile b/arch/sh/boot/compressed/Makefile
index efb01dc..9531bf1 100644
--- a/arch/sh/boot/compressed/Makefile
+++ b/arch/sh/boot/compressed/Makefile
@@ -1,5 +1,47 @@
-ifeq ($(CONFIG_SUPERH32),y)
-include ${srctree}/arch/sh/boot/compressed/Makefile_32
-else
-include ${srctree}/arch/sh/boot/compressed/Makefile_64
+#
+# linux/arch/sh/boot/compressed/Makefile
+#
+# create a compressed vmlinux image from the original vmlinux
+#
+
+targets		:= vmlinux vmlinux.bin vmlinux.bin.gz \
+		   head_$(BITS).o misc_$(BITS).o piggy.o
+
+OBJECTS = $(obj)/head_$(BITS).o $(obj)/misc_$(BITS).o $(obj)/cache.o
+
+ifdef CONFIG_SH_STANDARD_BIOS
+OBJECTS += $(obj)/../../kernel/sh_bios.o
 endif
+
+#
+# IMAGE_OFFSET is the load offset of the compression loader
+#
+IMAGE_OFFSET	:= $(shell /bin/bash -c 'printf "0x%08x" \
+		     $$[$(CONFIG_PAGE_OFFSET)  + \
+			$(KERNEL_MEMORY) + \
+			$(CONFIG_BOOT_LINK_OFFSET)]')
+
+LIBGCC	:= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
+
+ifeq ($(CONFIG_FUNCTION_TRACER),y)
+ORIG_CFLAGS := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
+endif
+
+LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext $(IMAGE_OFFSET) -e startup \
+		   -T $(obj)/../../kernel/vmlinux.lds
+
+$(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(LIBGCC) FORCE
+	$(call if_changed,ld)
+	@:
+
+$(obj)/vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,gzip)
+
+OBJCOPYFLAGS += -R .empty_zero_page
+
+$(obj)/piggy.o: $(obj)/piggy.S $(obj)/vmlinux.bin.gz FORCE
+	$(call if_changed,as_o_S)
diff --git a/arch/sh/boot/compressed/Makefile_32 b/arch/sh/boot/compressed/Makefile_32
deleted file mode 100644
index b96a055..0000000
--- a/arch/sh/boot/compressed/Makefile_32
+++ /dev/null
@@ -1,46 +0,0 @@
-#
-# linux/arch/sh/boot/compressed/Makefile
-#
-# create a compressed vmlinux image from the original vmlinux
-#
-
-targets		:= vmlinux vmlinux.bin vmlinux.bin.gz \
-		   head_32.o misc_32.o piggy.o
-
-OBJECTS = $(obj)/head_32.o $(obj)/misc_32.o
-
-ifdef CONFIG_SH_STANDARD_BIOS
-OBJECTS += $(obj)/../../kernel/sh_bios.o
-endif
-
-#
-# IMAGE_OFFSET is the load offset of the compression loader
-#
-IMAGE_OFFSET	:= $(shell /bin/bash -c 'printf "0x%08x" \
-		     $$[$(CONFIG_PAGE_OFFSET)  + \
-			$(CONFIG_MEMORY_START) + \
-			$(CONFIG_BOOT_LINK_OFFSET)]')
-
-LIBGCC	:= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
-
-ifeq ($(CONFIG_FUNCTION_TRACER),y)
-ORIG_CFLAGS := $(KBUILD_CFLAGS)
-KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
-endif
-
-LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup -T $(obj)/../../kernel/vmlinux.lds
-
-$(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(LIBGCC) FORCE
-	$(call if_changed,ld)
-	@:
-
-$(obj)/vmlinux.bin: vmlinux FORCE
-	$(call if_changed,objcopy)
-
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,gzip)
-
-OBJCOPYFLAGS += -R .empty_zero_page
-
-$(obj)/piggy.o: $(obj)/piggy.S $(obj)/vmlinux.bin.gz FORCE
-	$(call if_changed,as_o_S)
diff --git a/arch/sh/boot/compressed/Makefile_64 b/arch/sh/boot/compressed/Makefile_64
deleted file mode 100644
index 658d4f9..0000000
--- a/arch/sh/boot/compressed/Makefile_64
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# arch/sh/boot/compressed/Makefile_64
-#
-# create a compressed vmlinux image from the original vmlinux
-#
-# Copyright (C) 2002 Stuart Menefy
-# Copyright (C) 2004 Paul Mundt
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-
-targets		:= vmlinux vmlinux.bin vmlinux.bin.gz \
-		   head_64.o misc_64.o cache.o piggy.o
-
-OBJECTS		:= $(obj)/vmlinux_64.lds $(obj)/head_64.o $(obj)/misc_64.o \
-		   $(obj)/cache.o
-
-#
-# ZIMAGE_OFFSET is the load offset of the compression loader
-# (4M for the kernel plus 64K for this loader)
-#
-ZIMAGE_OFFSET	:= $(shell /bin/bash -c 'printf "0x%08x" \
-		     $$[$(CONFIG_PAGE_OFFSET)+0x400000+0x10000]')
-
-LDFLAGS_vmlinux := -Ttext $(ZIMAGE_OFFSET) -e startup \
-		    -T $(obj)/../../kernel/vmlinux.lds
-
-$(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o FORCE
-	$(call if_changed,ld)
-	@:
-
-$(obj)/vmlinux.bin: vmlinux FORCE
-	$(call if_changed,objcopy)
-
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,gzip)
-
-OBJCOPYFLAGS += -R .empty_zero_page
-
-$(obj)/piggy.o: $(obj)/piggy.S $(obj)/vmlinux.bin.gz FORCE
-	$(call if_changed,as_o_S)
diff --git a/arch/sh/boot/compressed/head_64.S b/arch/sh/boot/compressed/head_64.S
index 622eac3..9993113 100644
--- a/arch/sh/boot/compressed/head_64.S
+++ b/arch/sh/boot/compressed/head_64.S
@@ -14,6 +14,7 @@
  *   Copyright (C) 2002 Stuart Menefy (stuart.menefy@st.com)
  */
 #include <asm/cache.h>
+#include <asm/tlb.h>
 #include <cpu/mmu_context.h>
 #include <cpu/registers.h>
 
@@ -33,11 +34,7 @@
 #define	ICCR0_INIT_VAL	ICCR0_ON | ICCR0_ICI		/* ICE + ICI */
 #define	ICCR1_INIT_VAL	ICCR1_NOLOCK			/* No locking */
 
-#if 1
 #define	OCCR0_INIT_VAL	OCCR0_ON | OCCR0_OCI | OCCR0_WB	/* OCE + OCI + WB */
-#else
-#define	OCCR0_INIT_VAL	OCCR0_OFF
-#endif
 #define	OCCR1_INIT_VAL	OCCR1_NOLOCK			/* No locking */
 
 	.text
diff --git a/arch/sh/boot/compressed/vmlinux_64.lds b/arch/sh/boot/compressed/vmlinux_64.lds
deleted file mode 100644
index 59c2ef4..0000000
--- a/arch/sh/boot/compressed/vmlinux_64.lds
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * ld script to make compressed SuperH/shmedia Linux kernel+decompression
- *		bootstrap
- * Modified by Stuart Menefy from arch/sh/vmlinux.lds.S written by Niibe Yutaka
- */
-
-
-#ifdef CONFIG_LITTLE_ENDIAN
-/* OUTPUT_FORMAT("elf32-sh64l-linux", "elf32-sh64l-linux", "elf32-sh64l-linux") */
-#define NOP 0x6ff0fff0
-#else
-/* OUTPUT_FORMAT("elf32-sh64", "elf32-sh64", "elf32-sh64") */
-#define NOP 0xf0fff06f
-#endif
-
-OUTPUT_FORMAT("elf32-sh64-linux")
-OUTPUT_ARCH(sh)
-ENTRY(_start)
-
-#define ALIGNED_GAP(section, align) (((ADDR(section)+SIZEOF(section)+(align)-1) & ~((align)-1))-ADDR(section))
-#define FOLLOWING(section, align) AT (LOADADDR(section) + ALIGNED_GAP(section,align))
-
-SECTIONS
-{
-  _text = .;			/* Text and read-only data */
-
-  .text : {
-	*(.text)
-	*(.text64)
-	*(.text..SHmedia32)
-	*(.fixup)
-	*(.gnu.warning)
-	} = NOP
-  . = ALIGN(4);
-  .rodata : { *(.rodata) }
-
-  /* There is no 'real' reason for eight byte alignment, four would work
-   * as well, but gdb downloads much (*4) faster with this.
-   */
-  . = ALIGN(8);
-  .image : { *(.image) }
-  . = ALIGN(4);
-  _etext = .;			/* End of text section */
-
-  .data :			/* Data */
-	FOLLOWING(.image, 4)
-	{
-	_data = .;
-	*(.data)
-	}
-  _data_image = LOADADDR(.data);/* Address of data section in ROM */
-
-  _edata = .;			/* End of data section */
-
-  .stack : { stack = .;  _stack = .; }
-
-  . = ALIGN(4);
-  __bss_start = .;		/* BSS */
-  .bss : {
-	*(.bss)
-	}
-  . = ALIGN(4);
-  _end = . ;
-}
diff --git a/arch/sh/cchips/Kconfig b/arch/sh/cchips/Kconfig
index f43d183..a5ab2ec 100644
--- a/arch/sh/cchips/Kconfig
+++ b/arch/sh/cchips/Kconfig
@@ -34,11 +34,6 @@
 
 	  Do not change this unless you know what you are doing.
 
-config HD64461_IOBASE
-	hex "HD64461 start address"
-	depends on HD64461
-	default "0xb0000000"
-
 config HD64461_ENABLER
 	bool "HD64461 PCMCIA enabler"
 	depends on HD64461
diff --git a/arch/sh/cchips/hd6446x/hd64461.c b/arch/sh/cchips/hd6446x/hd64461.c
index 25ef910..50aa0c1 100644
--- a/arch/sh/cchips/hd6446x/hd64461.c
+++ b/arch/sh/cchips/hd6446x/hd64461.c
@@ -80,7 +80,7 @@
 
 	printk(KERN_INFO
 	       "HD64461 configured at 0x%x on irq %d(mapped into %d to %d)\n",
-	       CONFIG_HD64461_IOBASE, CONFIG_HD64461_IRQ, HD64461_IRQBASE,
+	       HD64461_IOBASE, CONFIG_HD64461_IRQ, HD64461_IRQBASE,
 	       HD64461_IRQBASE + 15);
 
 /* Should be at processor specific part.. */
diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig
index c8d982a..022f70e 100644
--- a/arch/sh/configs/ap325rxa_defconfig
+++ b/arch/sh/configs/ap325rxa_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 17:46:53 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:42:06 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -74,6 +75,7 @@
 CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 # CONFIG_KALLSYMS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -92,12 +94,15 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -110,7 +115,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -159,6 +163,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 CONFIG_CPU_SUBTYPE_SH7723=y
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -168,8 +173,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -573,6 +576,7 @@
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -695,6 +699,7 @@
 #
 # Non-8250 serial port support
 #
+# CONFIG_SERIAL_MAX3100 is not set
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=6
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
@@ -1030,6 +1035,7 @@
 CONFIG_EXT2_FS_SECURITY=y
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
@@ -1052,6 +1058,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1100,6 +1111,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1191,10 +1203,24 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1303,6 +1329,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/cayman_defconfig b/arch/sh/configs/cayman_defconfig
index fa5fc1e..40301f8 100644
--- a/arch/sh/configs/cayman_defconfig
+++ b/arch/sh/configs/cayman_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 17:49:14 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:42:53 2009
 #
 CONFIG_SUPERH=y
 # CONFIG_SUPERH32 is not set
@@ -40,6 +40,7 @@
 CONFIG_SWAP=y
 # CONFIG_SYSVIPC is not set
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
 # CONFIG_AUDIT is not set
@@ -70,6 +71,7 @@
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -89,10 +91,13 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -105,7 +110,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -127,39 +131,6 @@
 # System type
 #
 CONFIG_CPU_SH5=y
-# CONFIG_CPU_SUBTYPE_SH7619 is not set
-# CONFIG_CPU_SUBTYPE_SH7201 is not set
-# CONFIG_CPU_SUBTYPE_SH7203 is not set
-# CONFIG_CPU_SUBTYPE_SH7206 is not set
-# CONFIG_CPU_SUBTYPE_SH7263 is not set
-# CONFIG_CPU_SUBTYPE_MXG is not set
-# CONFIG_CPU_SUBTYPE_SH7705 is not set
-# CONFIG_CPU_SUBTYPE_SH7706 is not set
-# CONFIG_CPU_SUBTYPE_SH7707 is not set
-# CONFIG_CPU_SUBTYPE_SH7708 is not set
-# CONFIG_CPU_SUBTYPE_SH7709 is not set
-# CONFIG_CPU_SUBTYPE_SH7710 is not set
-# CONFIG_CPU_SUBTYPE_SH7712 is not set
-# CONFIG_CPU_SUBTYPE_SH7720 is not set
-# CONFIG_CPU_SUBTYPE_SH7721 is not set
-# CONFIG_CPU_SUBTYPE_SH7750 is not set
-# CONFIG_CPU_SUBTYPE_SH7091 is not set
-# CONFIG_CPU_SUBTYPE_SH7750R is not set
-# CONFIG_CPU_SUBTYPE_SH7750S is not set
-# CONFIG_CPU_SUBTYPE_SH7751 is not set
-# CONFIG_CPU_SUBTYPE_SH7751R is not set
-# CONFIG_CPU_SUBTYPE_SH7760 is not set
-# CONFIG_CPU_SUBTYPE_SH4_202 is not set
-# CONFIG_CPU_SUBTYPE_SH7723 is not set
-# CONFIG_CPU_SUBTYPE_SH7763 is not set
-# CONFIG_CPU_SUBTYPE_SH7770 is not set
-# CONFIG_CPU_SUBTYPE_SH7780 is not set
-# CONFIG_CPU_SUBTYPE_SH7785 is not set
-# CONFIG_CPU_SUBTYPE_SH7786 is not set
-# CONFIG_CPU_SUBTYPE_SHX3 is not set
-# CONFIG_CPU_SUBTYPE_SH7343 is not set
-# CONFIG_CPU_SUBTYPE_SH7722 is not set
-# CONFIG_CPU_SUBTYPE_SH7366 is not set
 CONFIG_CPU_SUBTYPE_SH5_101=y
 # CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
@@ -279,8 +250,6 @@
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -492,6 +461,7 @@
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -568,6 +538,7 @@
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -591,6 +562,7 @@
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -779,6 +751,7 @@
 # CONFIG_SENSORS_F71805F is not set
 # CONFIG_SENSORS_F71882FG is not set
 # CONFIG_SENSORS_F75375S is not set
+# CONFIG_SENSORS_G760A is not set
 # CONFIG_SENSORS_GL518SM is not set
 # CONFIG_SENSORS_GL520SM is not set
 # CONFIG_SENSORS_IT87 is not set
@@ -1042,7 +1015,6 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
@@ -1082,6 +1054,7 @@
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1105,6 +1078,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1146,8 +1124,13 @@
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1208,6 +1191,9 @@
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 CONFIG_SCHED_DEBUG=y
 CONFIG_SCHEDSTATS=y
 # CONFIG_TIMER_STATS is not set
@@ -1241,15 +1227,21 @@
 # CONFIG_LATENCYTOP is not set
 # CONFIG_SYSCTL_SYSCALL_CHECK is not set
 # CONFIG_PAGE_POISONING is not set
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 # CONFIG_EARLY_SCIF_CONSOLE is not set
 # CONFIG_DEBUG_BOOTMEM is not set
@@ -1354,6 +1346,7 @@
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/dreamcast_defconfig b/arch/sh/configs/dreamcast_defconfig
index 5c11236..1f3cc98 100644
--- a/arch/sh/configs/dreamcast_defconfig
+++ b/arch/sh/configs/dreamcast_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 17:51:48 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:44:27 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -71,6 +72,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -90,6 +92,7 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 CONFIG_PROFILING=y
+# CONFIG_MARKERS is not set
 # CONFIG_OPROFILE is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
@@ -98,6 +101,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -110,7 +115,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -156,6 +160,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -165,8 +170,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -271,7 +274,7 @@
 CONFIG_SH_DMA_API=y
 CONFIG_SH_DMA=y
 CONFIG_SH_DMA_IRQ_MULTI=y
-CONFIG_NR_ONCHIP_DMA_CHANNELS=6
+CONFIG_NR_ONCHIP_DMA_CHANNELS=4
 CONFIG_NR_DMA_CHANNELS_BOOL=y
 CONFIG_NR_DMA_CHANNELS=9
 # CONFIG_PVR2_DMA is not set
@@ -320,7 +323,6 @@
 CONFIG_MAPLE=y
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -602,6 +604,7 @@
 # CONFIG_MOUSE_APPLETOUCH is not set
 # CONFIG_MOUSE_BCM5974 is not set
 # CONFIG_MOUSE_VSXXXAA is not set
+# CONFIG_MOUSE_MAPLE is not set
 # CONFIG_INPUT_JOYSTICK is not set
 # CONFIG_INPUT_TABLET is not set
 # CONFIG_INPUT_TOUCHSCREEN is not set
@@ -812,7 +815,6 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
@@ -867,6 +869,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -910,6 +917,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -947,10 +955,24 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1051,6 +1073,7 @@
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/edosk7705_defconfig b/arch/sh/configs/edosk7705_defconfig
index f4c34b0..d709245 100644
--- a/arch/sh/configs/edosk7705_defconfig
+++ b/arch/sh/configs/edosk7705_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 17:54:02 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:45:04 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_FIND_NEXT_BIT=y
@@ -56,6 +57,7 @@
 # CONFIG_UID16 is not set
 # CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_KALLSYMS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 # CONFIG_PRINTK is not set
 # CONFIG_BUG is not set
@@ -74,12 +76,15 @@
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_BASE_SMALL=1
 # CONFIG_MODULES is not set
@@ -114,6 +119,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -123,8 +129,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -382,6 +386,10 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+
+#
 # Pseudo filesystems
 #
 # CONFIG_PROC_FS is not set
@@ -409,10 +417,22 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -426,6 +446,7 @@
 # CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig
index 7825c26..a822b1d 100644
--- a/arch/sh/configs/edosk7760_defconfig
+++ b/arch/sh/configs/edosk7760_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 17:54:57 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:45:25 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -40,6 +41,7 @@
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -67,7 +69,6 @@
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -77,6 +78,7 @@
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -96,6 +98,7 @@
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -103,6 +106,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -115,7 +120,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -161,6 +165,7 @@
 CONFIG_CPU_SUBTYPE_SH7760=y
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -170,8 +175,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -815,6 +818,7 @@
 # CONFIG_EXT2_FS_SECURITY is not set
 CONFIG_EXT2_FS_XIP=y
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -839,6 +843,11 @@
 CONFIG_GENERIC_ACL=y
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -883,6 +892,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -964,6 +974,9 @@
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_SCHEDSTATS is not set
 CONFIG_TIMER_STATS=y
@@ -1001,6 +1014,7 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1010,9 +1024,14 @@
 # CONFIG_PREEMPT_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1126,6 +1145,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig
index ebb4c37..c5b5007 100644
--- a/arch/sh/configs/espt_defconfig
+++ b/arch/sh/configs/espt_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 17:58:18 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:46:26 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -78,6 +79,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -106,6 +108,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -117,7 +121,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -164,6 +167,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 CONFIG_CPU_SUBTYPE_SH7763=y
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -173,8 +177,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -548,6 +550,7 @@
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -919,6 +922,7 @@
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -943,6 +947,11 @@
 CONFIG_GENERIC_ACL=y
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -985,8 +994,13 @@
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -1075,11 +1089,25 @@
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1179,6 +1207,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig
index 82b113a..8e13027 100644
--- a/arch/sh/configs/hp6xx_defconfig
+++ b/arch/sh/configs/hp6xx_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:01:05 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:47:15 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -67,6 +68,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -85,12 +87,15 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -98,7 +103,6 @@
 # CONFIG_MODULES is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -144,6 +148,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -153,8 +158,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -385,6 +388,7 @@
 # CONFIG_SCSI_SRP_ATTRS is not set
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
 # CONFIG_SCSI_DH is not set
@@ -431,6 +435,7 @@
 # CONFIG_INPUT_JOYSTICK is not set
 # CONFIG_INPUT_TABLET is not set
 CONFIG_INPUT_TOUCHSCREEN=y
+# CONFIG_TOUCHSCREEN_AD7879 is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
@@ -674,6 +679,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -719,6 +729,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 
 #
 # Partition Types
@@ -786,10 +797,23 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -898,6 +922,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig
index b6fa4a7..7f549ae 100644
--- a/arch/sh/configs/landisk_defconfig
+++ b/arch/sh/configs/landisk_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:02:54 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:47:48 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -69,6 +70,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -88,6 +90,7 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -95,6 +98,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -107,7 +112,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -153,6 +157,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -162,8 +167,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -292,8 +295,6 @@
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -602,6 +603,7 @@
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -706,6 +708,7 @@
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -729,6 +732,7 @@
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -922,6 +926,7 @@
 # CONFIG_SOC_CAMERA is not set
 CONFIG_V4L_USB_DRIVERS=y
 # CONFIG_USB_VIDEO_CLASS is not set
+CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
 # CONFIG_USB_GSPCA is not set
 # CONFIG_VIDEO_HDPVR is not set
 CONFIG_VIDEO_USBVIDEO=m
@@ -994,15 +999,17 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=m
 CONFIG_HID_APPLE=m
 CONFIG_HID_BELKIN=m
 CONFIG_HID_CHERRY=m
 CONFIG_HID_CHICONY=m
 CONFIG_HID_CYPRESS=m
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=m
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=m
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=m
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1197,6 +1204,7 @@
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1222,6 +1230,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 CONFIG_ISO9660_FS=m
@@ -1270,10 +1283,15 @@
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 CONFIG_UFS_FS=m
 # CONFIG_UFS_FS_WRITE is not set
 # CONFIG_UFS_DEBUG is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
@@ -1364,10 +1382,23 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 CONFIG_SH_STANDARD_BIOS=y
@@ -1469,6 +1500,7 @@
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig
index 92c515c..a7db539 100644
--- a/arch/sh/configs/lboxre2_defconfig
+++ b/arch/sh/configs/lboxre2_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:06:51 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:48:54 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -69,6 +70,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -88,6 +90,7 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -95,6 +98,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -107,7 +112,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -153,6 +157,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -162,8 +167,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -293,8 +296,6 @@
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -542,6 +543,7 @@
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -702,6 +704,7 @@
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -725,6 +728,7 @@
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -931,7 +935,6 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
@@ -1007,6 +1010,7 @@
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1029,6 +1033,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1073,8 +1082,13 @@
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -1151,10 +1165,23 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 CONFIG_SH_STANDARD_BIOS=y
@@ -1256,6 +1283,7 @@
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig
index 26586c2..58bec61 100644
--- a/arch/sh/configs/magicpanelr2_defconfig
+++ b/arch/sh/configs/magicpanelr2_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:07:39 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:49:32 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -39,6 +40,7 @@
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_BSD_PROCESS_ACCT_V3=y
 # CONFIG_TASKSTATS is not set
@@ -66,7 +68,6 @@
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -76,6 +77,7 @@
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -94,6 +96,7 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -101,6 +104,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -113,7 +118,6 @@
 CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -159,6 +163,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -168,8 +173,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -813,6 +816,7 @@
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 # CONFIG_EXT3_FS_XATTR is not set
 # CONFIG_EXT4_FS is not set
 CONFIG_JBD=y
@@ -831,6 +835,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -884,6 +893,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -965,6 +975,7 @@
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1000,6 +1011,7 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1008,9 +1020,14 @@
 # CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1035,6 +1052,7 @@
 # CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/microdev_defconfig b/arch/sh/configs/microdev_defconfig
index 7517835..2886fc8 100644
--- a/arch/sh/configs/microdev_defconfig
+++ b/arch/sh/configs/microdev_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:11:13 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:50:51 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -65,7 +66,6 @@
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -74,6 +74,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -92,12 +93,15 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -105,7 +109,6 @@
 # CONFIG_MODULES is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -151,6 +154,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 CONFIG_CPU_SUBTYPE_SH4_202=y
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -160,8 +164,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -647,6 +649,7 @@
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -669,6 +672,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -715,6 +723,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -802,10 +811,24 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -914,6 +937,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig
index a8720f9..8ecceb4 100644
--- a/arch/sh/configs/migor_defconfig
+++ b/arch/sh/configs/migor_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:14:03 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:51:34 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -67,7 +68,6 @@
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -76,6 +76,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -104,6 +105,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -115,7 +118,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -165,6 +167,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -174,8 +177,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 CONFIG_CPU_SUBTYPE_SH7722=y
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -577,6 +578,7 @@
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -873,7 +875,6 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 # CONFIG_USB_ARCH_HAS_OHCI is not set
@@ -1012,6 +1013,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1056,6 +1062,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -1105,11 +1112,25 @@
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1218,6 +1239,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/polaris_defconfig b/arch/sh/configs/polaris_defconfig
index df2d177d..2b95072 100644
--- a/arch/sh/configs/polaris_defconfig
+++ b/arch/sh/configs/polaris_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:16:48 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:52:19 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -40,6 +41,7 @@
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_BSD_PROCESS_ACCT_V3=y
 # CONFIG_TASKSTATS is not set
@@ -76,6 +78,7 @@
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -94,6 +97,7 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -101,6 +105,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -113,7 +119,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -159,6 +164,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -168,8 +174,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -778,6 +782,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -831,6 +840,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -874,6 +884,9 @@
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -914,6 +927,7 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -923,9 +937,14 @@
 # CONFIG_PREEMPT_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -950,6 +969,7 @@
 # CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig
index 7def4df..943da63 100644
--- a/arch/sh/configs/r7780mp_defconfig
+++ b/arch/sh/configs/r7780mp_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:20:17 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:53:28 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -78,6 +79,7 @@
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -107,6 +109,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_BASE_SMALL=0
@@ -118,7 +122,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -165,6 +168,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 CONFIG_CPU_SUBTYPE_SH7780=y
@@ -174,8 +178,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -258,7 +260,7 @@
 #
 CONFIG_SH_TMU=y
 CONFIG_SH_TIMER_IRQ=28
-CONFIG_SH_PCLK_FREQ=32000000
+CONFIG_SH_PCLK_FREQ=33333333
 # CONFIG_NO_HZ is not set
 # CONFIG_HIGH_RES_TIMERS is not set
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
@@ -313,8 +315,6 @@
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -536,6 +536,7 @@
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -696,6 +697,7 @@
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -719,6 +721,7 @@
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -920,6 +923,7 @@
 # CONFIG_SENSORS_F71805F is not set
 # CONFIG_SENSORS_F71882FG is not set
 # CONFIG_SENSORS_F75375S is not set
+# CONFIG_SENSORS_G760A is not set
 # CONFIG_SENSORS_GL518SM is not set
 # CONFIG_SENSORS_GL520SM is not set
 # CONFIG_SENSORS_IT87 is not set
@@ -1027,7 +1031,6 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
@@ -1120,6 +1123,7 @@
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1143,6 +1147,11 @@
 CONFIG_FUSE_FS=m
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1191,6 +1200,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1279,6 +1289,9 @@
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1316,6 +1329,7 @@
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1325,11 +1339,16 @@
 # CONFIG_PREEMPT_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1449,6 +1468,7 @@
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig
index cb134ff..82658f6 100644
--- a/arch/sh/configs/r7785rp_defconfig
+++ b/arch/sh/configs/r7785rp_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:24:35 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:55:10 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -43,6 +44,7 @@
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -78,6 +80,7 @@
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -108,6 +111,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -120,7 +125,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -168,6 +172,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -177,8 +182,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -266,7 +269,7 @@
 #
 CONFIG_SH_TMU=y
 CONFIG_SH_TIMER_IRQ=28
-CONFIG_SH_PCLK_FREQ=50000000
+CONFIG_SH_PCLK_FREQ=33333333
 CONFIG_TICK_ONESHOT=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -337,8 +340,6 @@
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 # CONFIG_PCI_LEGACY is not set
@@ -561,6 +562,7 @@
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -698,6 +700,7 @@
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -721,6 +724,7 @@
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -948,6 +952,7 @@
 # CONFIG_SENSORS_F71805F is not set
 # CONFIG_SENSORS_F71882FG is not set
 # CONFIG_SENSORS_F75375S is not set
+# CONFIG_SENSORS_G760A is not set
 # CONFIG_SENSORS_GL518SM is not set
 # CONFIG_SENSORS_GL520SM is not set
 # CONFIG_SENSORS_IT87 is not set
@@ -970,6 +975,7 @@
 # CONFIG_SENSORS_PC87360 is not set
 # CONFIG_SENSORS_PC87427 is not set
 # CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_SENSORS_SHT15 is not set
 # CONFIG_SENSORS_SIS5595 is not set
 # CONFIG_SENSORS_DME1737 is not set
 # CONFIG_SENSORS_SMSC47M1 is not set
@@ -1109,7 +1115,6 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
@@ -1202,6 +1207,7 @@
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1225,6 +1231,11 @@
 CONFIG_FUSE_FS=m
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1273,6 +1284,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1359,6 +1371,7 @@
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1401,6 +1414,7 @@
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1410,11 +1424,16 @@
 # CONFIG_PREEMPT_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1534,6 +1553,7 @@
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/rsk7201_defconfig b/arch/sh/configs/rsk7201_defconfig
index a037c74..fa43957 100644
--- a/arch/sh/configs/rsk7201_defconfig
+++ b/arch/sh/configs/rsk7201_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:29:08 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:56:29 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -65,7 +66,6 @@
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -74,6 +74,7 @@
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -100,6 +101,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_RT_MUTEXES=y
 CONFIG_BASE_SMALL=0
@@ -110,7 +113,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -157,6 +159,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -166,8 +169,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -603,6 +604,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -651,8 +657,13 @@
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 
 #
 # Partition Types
@@ -686,11 +697,24 @@
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -705,6 +729,7 @@
 # CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/rsk7203_defconfig b/arch/sh/configs/rsk7203_defconfig
index 9ae28e8..e3a65f8 100644
--- a/arch/sh/configs/rsk7203_defconfig
+++ b/arch/sh/configs/rsk7203_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:30:34 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:57:06 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -39,6 +40,7 @@
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -70,7 +72,6 @@
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -80,6 +81,7 @@
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -106,6 +108,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_RT_MUTEXES=y
 CONFIG_BASE_SMALL=0
@@ -116,7 +120,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -163,6 +166,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -172,8 +176,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -750,15 +752,17 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -874,6 +878,7 @@
 # LED drivers
 #
 CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_GPIO_PLATFORM=y
 
 #
 # LED Triggers
@@ -882,6 +887,7 @@
 CONFIG_LEDS_TRIGGER_TIMER=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_LEDS_TRIGGER_BACKLIGHT=y
+# CONFIG_LEDS_TRIGGER_GPIO is not set
 CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
 
 #
@@ -951,6 +957,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -989,8 +1000,13 @@
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -1033,6 +1049,9 @@
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1076,6 +1095,7 @@
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1083,11 +1103,16 @@
 # CONFIG_FUNCTION_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1111,6 +1136,7 @@
 # CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/rts7751r2d1_defconfig b/arch/sh/configs/rts7751r2d1_defconfig
index c0f741a..a4a59f6 100644
--- a/arch/sh/configs/rts7751r2d1_defconfig
+++ b/arch/sh/configs/rts7751r2d1_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:33:25 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:58:13 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -70,6 +71,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -99,6 +101,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -110,7 +114,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -156,6 +159,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -165,8 +169,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -302,8 +304,6 @@
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -513,6 +513,7 @@
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -672,6 +673,7 @@
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -695,6 +697,7 @@
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -793,6 +796,7 @@
 #
 # Non-8250 serial port support
 #
+# CONFIG_SERIAL_MAX3100 is not set
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=1
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
@@ -1079,15 +1083,17 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1291,6 +1297,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1337,6 +1348,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -1417,11 +1429,25 @@
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1524,6 +1550,7 @@
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig
index 8feef62..a860435 100644
--- a/arch/sh/configs/rts7751r2dplus_defconfig
+++ b/arch/sh/configs/rts7751r2dplus_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:34:12 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:59:01 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -70,6 +71,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -99,6 +101,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -110,7 +114,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -156,6 +159,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -165,8 +169,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -302,8 +304,6 @@
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -424,7 +424,92 @@
 CONFIG_EXTRA_FIRMWARE=""
 # CONFIG_SYS_HYPERVISOR is not set
 # CONFIG_CONNECTOR is not set
-# CONFIG_MTD is not set
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+CONFIG_MTD_CONCAT=y
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_TESTS is not set
+# CONFIG_MTD_REDBOOT_PARTS is not set
+CONFIG_MTD_CMDLINE_PARTS=y
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+# CONFIG_MTD_BLKDEVS is not set
+# CONFIG_MTD_BLOCK is not set
+# CONFIG_MTD_BLOCK_RO is not set
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=y
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_GEN_PROBE=y
+# CONFIG_MTD_CFI_ADV_OPTIONS is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+# CONFIG_MTD_CFI_INTELEXT is not set
+CONFIG_MTD_CFI_AMDSTD=y
+# CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+CONFIG_MTD_PHYSMAP=y
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
+# CONFIG_MTD_INTEL_VR_NOR is not set
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_PMC551 is not set
+# CONFIG_MTD_DATAFLASH is not set
+# CONFIG_MTD_M25P80 is not set
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+# CONFIG_MTD_NAND is not set
+# CONFIG_MTD_ONENAND is not set
+
+#
+# LPDDR flash memory drivers
+#
+# CONFIG_MTD_LPDDR is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
 # CONFIG_PARPORT is not set
 CONFIG_BLK_DEV=y
 # CONFIG_BLK_CPQ_CISS_DA is not set
@@ -513,6 +598,7 @@
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -672,6 +758,7 @@
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -695,6 +782,7 @@
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -793,6 +881,7 @@
 #
 # Non-8250 serial port support
 #
+# CONFIG_SERIAL_MAX3100 is not set
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=1
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
@@ -1079,15 +1168,17 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1291,6 +1382,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1337,6 +1433,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -1417,11 +1514,25 @@
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1524,6 +1635,7 @@
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig
index 739e229..a629b79 100644
--- a/arch/sh/configs/sdk7780_defconfig
+++ b/arch/sh/configs/sdk7780_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:34:43 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:59:32 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -41,6 +42,7 @@
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -73,6 +75,7 @@
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -93,6 +96,7 @@
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -100,6 +104,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -112,7 +118,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 CONFIG_LBD=y
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -159,6 +164,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 CONFIG_CPU_SUBTYPE_SH7780=y
@@ -168,8 +174,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -310,8 +314,6 @@
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 # CONFIG_PCI_LEGACY is not set
@@ -646,6 +648,7 @@
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -1091,15 +1094,17 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1257,6 +1262,7 @@
 # CONFIG_EXT2_FS_SECURITY is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1281,6 +1287,11 @@
 CONFIG_GENERIC_ACL=y
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 CONFIG_ISO9660_FS=y
@@ -1331,6 +1342,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1418,6 +1430,9 @@
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_SCHEDSTATS is not set
 CONFIG_TIMER_STATS=y
@@ -1455,6 +1470,7 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1464,9 +1480,14 @@
 # CONFIG_PREEMPT_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1580,6 +1601,7 @@
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig
index d30e0a7..5caf85a 100644
--- a/arch/sh/configs/se7206_defconfig
+++ b/arch/sh/configs/se7206_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:39:37 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:01:02 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -40,6 +41,7 @@
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -63,6 +65,7 @@
 CONFIG_CGROUP_NS=y
 # CONFIG_CGROUP_FREEZER is not set
 CONFIG_CGROUP_DEVICE=y
+# CONFIG_CPUSETS is not set
 CONFIG_CGROUP_CPUACCT=y
 CONFIG_RESOURCE_COUNTERS=y
 CONFIG_CGROUP_MEM_RES_CTLR=y
@@ -80,7 +83,6 @@
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -90,6 +92,7 @@
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -116,6 +119,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_RT_MUTEXES=y
 CONFIG_BASE_SMALL=0
@@ -127,7 +132,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -174,6 +178,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -183,8 +188,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -746,6 +749,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -785,8 +793,13 @@
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -831,6 +844,9 @@
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -869,6 +885,7 @@
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -876,11 +893,16 @@
 # CONFIG_FUNCTION_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -991,6 +1013,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig
index fbb72d0..004d531 100644
--- a/arch/sh/configs/se7343_defconfig
+++ b/arch/sh/configs/se7343_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:42:00 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:01:44 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -40,6 +41,7 @@
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
 # CONFIG_AUDIT is not set
@@ -73,6 +75,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -91,6 +94,7 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -98,6 +102,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_BASE_SMALL=0
@@ -109,7 +115,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -158,6 +163,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -167,8 +173,6 @@
 CONFIG_CPU_SUBTYPE_SH7343=y
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -769,6 +773,7 @@
 # CONFIG_SOC_CAMERA is not set
 CONFIG_V4L_USB_DRIVERS=y
 # CONFIG_USB_VIDEO_CLASS is not set
+CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
 CONFIG_USB_GSPCA=m
 # CONFIG_USB_M5602 is not set
 # CONFIG_USB_STV06XX is not set
@@ -800,6 +805,7 @@
 # CONFIG_VIDEO_PVRUSB2 is not set
 # CONFIG_VIDEO_HDPVR is not set
 # CONFIG_VIDEO_EM28XX is not set
+# CONFIG_VIDEO_CX231XX is not set
 # CONFIG_VIDEO_USBVISION is not set
 # CONFIG_USB_VICAM is not set
 # CONFIG_USB_IBMCAM is not set
@@ -813,6 +819,7 @@
 # CONFIG_USB_STV680 is not set
 # CONFIG_USB_ZC0301 is not set
 # CONFIG_USB_PWC is not set
+CONFIG_USB_PWC_INPUT_EVDEV=y
 # CONFIG_USB_ZR364XX is not set
 # CONFIG_USB_STKWEBCAM is not set
 # CONFIG_USB_S2255 is not set
@@ -914,15 +921,17 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1050,6 +1059,7 @@
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1071,6 +1081,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1125,6 +1140,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1174,10 +1190,23 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1279,6 +1308,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7619_defconfig b/arch/sh/configs/se7619_defconfig
index 125304e..edbece5 100644
--- a/arch/sh/configs/se7619_defconfig
+++ b/arch/sh/configs/se7619_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:44:53 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:02:32 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -61,6 +62,7 @@
 # CONFIG_UID16 is not set
 # CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_KALLSYMS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -78,11 +80,14 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_BASE_SMALL=1
@@ -134,6 +139,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -143,8 +149,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -513,7 +517,6 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 # CONFIG_USB_ARCH_HAS_OHCI is not set
@@ -564,6 +567,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -601,8 +609,13 @@
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 
 #
 # Partition Types
@@ -630,10 +643,21 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -647,6 +671,7 @@
 # CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7705_defconfig b/arch/sh/configs/se7705_defconfig
index 0308abf..bae161c 100644
--- a/arch/sh/configs/se7705_defconfig
+++ b/arch/sh/configs/se7705_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:45:56 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:02:52 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -62,7 +63,6 @@
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -70,6 +70,7 @@
 CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_KALLSYMS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -88,12 +89,15 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -150,6 +154,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -159,8 +164,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -698,7 +701,6 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 # CONFIG_USB_ARCH_HAS_OHCI is not set
@@ -752,6 +754,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -804,6 +811,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -847,10 +855,23 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -949,6 +970,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig
index a8c24b7..330043f 100644
--- a/arch/sh/configs/se7712_defconfig
+++ b/arch/sh/configs/se7712_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:48:18 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:03:27 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_FIND_NEXT_BIT=y
@@ -38,6 +39,7 @@
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -69,6 +71,7 @@
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 # CONFIG_BUG is not set
@@ -87,6 +90,7 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -94,6 +98,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -105,7 +111,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -151,6 +156,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -160,8 +166,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -585,6 +589,7 @@
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -812,6 +817,7 @@
 CONFIG_EXT2_FS_SECURITY=y
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -833,6 +839,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -887,6 +898,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -927,6 +939,7 @@
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -961,6 +974,7 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -969,9 +983,14 @@
 # CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1090,6 +1109,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig
index 4b79c25..5647891 100644
--- a/arch/sh/configs/se7721_defconfig
+++ b/arch/sh/configs/se7721_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:51:44 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:04:19 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_FIND_NEXT_BIT=y
@@ -38,6 +39,7 @@
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -73,6 +75,7 @@
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 # CONFIG_BUG is not set
@@ -91,6 +94,7 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -98,6 +102,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -109,7 +115,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -155,6 +160,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -164,8 +170,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -776,15 +780,17 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -943,6 +949,7 @@
 CONFIG_EXT2_FS_SECURITY=y
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -964,6 +971,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1021,6 +1033,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 # CONFIG_NETWORK_FILESYSTEMS is not set
 
 #
@@ -1085,6 +1098,7 @@
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1119,6 +1133,7 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1127,9 +1142,14 @@
 # CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1248,6 +1268,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7722_defconfig b/arch/sh/configs/se7722_defconfig
index 82bdaac..726fdbd 100644
--- a/arch/sh/configs/se7722_defconfig
+++ b/arch/sh/configs/se7722_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:55:10 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:05:29 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -69,7 +70,6 @@
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -78,6 +78,7 @@
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -97,6 +98,7 @@
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
 CONFIG_PROFILING=y
+# CONFIG_MARKERS is not set
 # CONFIG_OPROFILE is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
@@ -105,6 +107,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -117,7 +121,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -167,6 +170,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -176,8 +180,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 CONFIG_CPU_SUBTYPE_SH7722=y
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -486,6 +488,7 @@
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -692,7 +695,6 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 # CONFIG_USB_ARCH_HAS_OHCI is not set
@@ -766,6 +768,7 @@
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -789,6 +792,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -832,6 +840,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -872,11 +881,25 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 CONFIG_SH_STANDARD_BIOS=y
@@ -977,6 +1000,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig
new file mode 100644
index 0000000..96d2587
--- /dev/null
+++ b/arch/sh/configs/se7724_defconfig
@@ -0,0 +1,1552 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.30-rc6
+# Tue May 26 13:18:09 2009
+#
+CONFIG_SUPERH=y
+CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
+CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_ARCH_HIBERNATION_POSSIBLE=y
+CONFIG_SYS_SUPPORTS_CMT=y
+CONFIG_SYS_SUPPORTS_TMU=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_ARCH_NO_VIRT_TO_BUS=y
+CONFIG_ARCH_HAS_DEFAULT_IDLE=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_POSIX_MQUEUE is not set
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+# CONFIG_RT_GROUP_SCHED is not set
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+# CONFIG_BLK_DEV_INITRD is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+CONFIG_UID16=y
+CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_KALLSYMS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
+CONFIG_HAVE_IOREMAP_PROT=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+CONFIG_DEFAULT_CFQ=y
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
+# CONFIG_FREEZER is not set
+
+#
+# System type
+#
+CONFIG_CPU_SH4=y
+CONFIG_CPU_SH4A=y
+CONFIG_CPU_SHX2=y
+CONFIG_ARCH_SHMOBILE=y
+# CONFIG_CPU_SUBTYPE_SH7619 is not set
+# CONFIG_CPU_SUBTYPE_SH7201 is not set
+# CONFIG_CPU_SUBTYPE_SH7203 is not set
+# CONFIG_CPU_SUBTYPE_SH7206 is not set
+# CONFIG_CPU_SUBTYPE_SH7263 is not set
+# CONFIG_CPU_SUBTYPE_MXG is not set
+# CONFIG_CPU_SUBTYPE_SH7705 is not set
+# CONFIG_CPU_SUBTYPE_SH7706 is not set
+# CONFIG_CPU_SUBTYPE_SH7707 is not set
+# CONFIG_CPU_SUBTYPE_SH7708 is not set
+# CONFIG_CPU_SUBTYPE_SH7709 is not set
+# CONFIG_CPU_SUBTYPE_SH7710 is not set
+# CONFIG_CPU_SUBTYPE_SH7712 is not set
+# CONFIG_CPU_SUBTYPE_SH7720 is not set
+# CONFIG_CPU_SUBTYPE_SH7721 is not set
+# CONFIG_CPU_SUBTYPE_SH7750 is not set
+# CONFIG_CPU_SUBTYPE_SH7091 is not set
+# CONFIG_CPU_SUBTYPE_SH7750R is not set
+# CONFIG_CPU_SUBTYPE_SH7750S is not set
+# CONFIG_CPU_SUBTYPE_SH7751 is not set
+# CONFIG_CPU_SUBTYPE_SH7751R is not set
+# CONFIG_CPU_SUBTYPE_SH7760 is not set
+# CONFIG_CPU_SUBTYPE_SH4_202 is not set
+# CONFIG_CPU_SUBTYPE_SH7723 is not set
+CONFIG_CPU_SUBTYPE_SH7724=y
+# CONFIG_CPU_SUBTYPE_SH7763 is not set
+# CONFIG_CPU_SUBTYPE_SH7770 is not set
+# CONFIG_CPU_SUBTYPE_SH7780 is not set
+# CONFIG_CPU_SUBTYPE_SH7785 is not set
+# CONFIG_CPU_SUBTYPE_SH7786 is not set
+# CONFIG_CPU_SUBTYPE_SHX3 is not set
+# CONFIG_CPU_SUBTYPE_SH7343 is not set
+# CONFIG_CPU_SUBTYPE_SH7722 is not set
+# CONFIG_CPU_SUBTYPE_SH7366 is not set
+
+#
+# Memory management options
+#
+CONFIG_QUICKLIST=y
+CONFIG_MMU=y
+CONFIG_PAGE_OFFSET=0x80000000
+CONFIG_FORCE_MAX_ZONEORDER=11
+CONFIG_MEMORY_START=0x08000000
+CONFIG_MEMORY_SIZE=0x08000000
+CONFIG_29BIT=y
+# CONFIG_X2TLB is not set
+CONFIG_VSYSCALL=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+CONFIG_MAX_ACTIVE_REGIONS=1
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_PAGE_SIZE_4KB=y
+# CONFIG_PAGE_SIZE_8KB is not set
+# CONFIG_PAGE_SIZE_16KB is not set
+# CONFIG_PAGE_SIZE_64KB is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_SPARSEMEM_STATIC=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_NR_QUICK=2
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+
+#
+# Cache configuration
+#
+CONFIG_CACHE_WRITEBACK=y
+# CONFIG_CACHE_WRITETHROUGH is not set
+# CONFIG_CACHE_OFF is not set
+
+#
+# Processor features
+#
+CONFIG_CPU_LITTLE_ENDIAN=y
+# CONFIG_CPU_BIG_ENDIAN is not set
+CONFIG_SH_FPU=y
+# CONFIG_SH_STORE_QUEUES is not set
+CONFIG_CPU_HAS_INTEVT=y
+CONFIG_CPU_HAS_SR_RB=y
+CONFIG_CPU_HAS_PTEA=y
+CONFIG_CPU_HAS_FPU=y
+
+#
+# Board support
+#
+CONFIG_SOLUTION_ENGINE=y
+CONFIG_SH_7724_SOLUTION_ENGINE=y
+
+#
+# Timer and clock configuration
+#
+CONFIG_SH_TIMER_TMU=y
+# CONFIG_SH_TIMER_CMT is not set
+CONFIG_SH_PCLK_FREQ=33333333
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
+
+#
+# DMA support
+#
+# CONFIG_SH_DMA is not set
+
+#
+# Companion Chips
+#
+
+#
+# Additional SuperH Device Drivers
+#
+CONFIG_HEARTBEAT=y
+# CONFIG_PUSH_SWITCH is not set
+
+#
+# Kernel features
+#
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+# CONFIG_SCHED_HRTICK is not set
+# CONFIG_KEXEC is not set
+# CONFIG_CRASH_DUMP is not set
+CONFIG_SECCOMP=y
+# CONFIG_PREEMPT_NONE is not set
+# CONFIG_PREEMPT_VOLUNTARY is not set
+CONFIG_PREEMPT=y
+CONFIG_GUSA=y
+
+#
+# Boot options
+#
+CONFIG_ZERO_PAGE_OFFSET=0x00001000
+CONFIG_BOOT_LINK_OFFSET=0x00800000
+CONFIG_ENTRY_OFFSET=0x00001000
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=tty1 console=ttySC3,115200 root=/dev/nfs ip=dhcp memchunk.vpu=4m"
+
+#
+# Bus options
+#
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_HAVE_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options (EXPERIMENTAL)
+#
+# CONFIG_PM is not set
+# CONFIG_CPU_IDLE is not set
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_ASK_IP_FIB_HASH=y
+# CONFIG_IP_FIB_TRIE is not set
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_MULTIPLE_TABLES is not set
+# CONFIG_IP_ROUTE_MULTIPATH is not set
+# CONFIG_IP_ROUTE_VERBOSE is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+CONFIG_WIRELESS=y
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_OLD_REGULATORY is not set
+# CONFIG_WIRELESS_EXT is not set
+# CONFIG_LIB80211 is not set
+# CONFIG_MAC80211 is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+CONFIG_MTD_CONCAT=y
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_TESTS is not set
+# CONFIG_MTD_REDBOOT_PARTS is not set
+CONFIG_MTD_CMDLINE_PARTS=y
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
+CONFIG_MTD_BLOCK=y
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=y
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_GEN_PROBE=y
+# CONFIG_MTD_CFI_ADV_OPTIONS is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+# CONFIG_MTD_CFI_INTELEXT is not set
+CONFIG_MTD_CFI_AMDSTD=y
+# CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+CONFIG_MTD_PHYSMAP=y
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_DATAFLASH is not set
+# CONFIG_MTD_M25P80 is not set
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+CONFIG_MTD_NAND=y
+# CONFIG_MTD_NAND_VERIFY_WRITE is not set
+# CONFIG_MTD_NAND_ECC_SMC is not set
+# CONFIG_MTD_NAND_MUSEUM_IDS is not set
+CONFIG_MTD_NAND_IDS=y
+# CONFIG_MTD_NAND_DISKONCHIP is not set
+# CONFIG_MTD_NAND_NANDSIM is not set
+# CONFIG_MTD_NAND_PLATFORM is not set
+# CONFIG_MTD_ALAUDA is not set
+# CONFIG_MTD_ONENAND is not set
+
+#
+# LPDDR flash memory drivers
+#
+# CONFIG_MTD_LPDDR is not set
+
+#
+# UBI - Unsorted block images
+#
+CONFIG_MTD_UBI=y
+CONFIG_MTD_UBI_WL_THRESHOLD=4096
+CONFIG_MTD_UBI_BEB_RESERVE=1
+# CONFIG_MTD_UBI_GLUEBI is not set
+
+#
+# UBI debugging options
+#
+# CONFIG_MTD_UBI_DEBUG is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_UB is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=4
+CONFIG_BLK_DEV_RAM_SIZE=4096
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_ICS932S401 is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_ISL29003 is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_AT24 is not set
+# CONFIG_EEPROM_AT25 is not set
+# CONFIG_EEPROM_LEGACY is not set
+# CONFIG_EEPROM_93CX6 is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_TGT is not set
+# CONFIG_SCSI_NETLINK is not set
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+# CONFIG_SCSI_SCAN_ASYNC is not set
+CONFIG_SCSI_WAIT_SCAN=m
+
+#
+# SCSI Transports
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+CONFIG_SCSI_LOWLEVEL=y
+# CONFIG_ISCSI_TCP is not set
+# CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
+# CONFIG_SCSI_DEBUG is not set
+# CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+CONFIG_PHYLIB=y
+
+#
+# MII PHY device drivers
+#
+# CONFIG_MARVELL_PHY is not set
+# CONFIG_DAVICOM_PHY is not set
+# CONFIG_QSEMI_PHY is not set
+# CONFIG_LXT_PHY is not set
+# CONFIG_CICADA_PHY is not set
+# CONFIG_VITESSE_PHY is not set
+CONFIG_SMSC_PHY=y
+# CONFIG_BROADCOM_PHY is not set
+# CONFIG_ICPLUS_PHY is not set
+# CONFIG_REALTEK_PHY is not set
+# CONFIG_NATIONAL_PHY is not set
+# CONFIG_STE10XP is not set
+# CONFIG_LSI_ET1011C_PHY is not set
+# CONFIG_FIXED_PHY is not set
+CONFIG_MDIO_BITBANG=y
+# CONFIG_MDIO_GPIO is not set
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_AX88796 is not set
+# CONFIG_STNIC is not set
+CONFIG_SMC91X=y
+# CONFIG_ENC28J60 is not set
+# CONFIG_ETHOC is not set
+# CONFIG_SMC911X is not set
+# CONFIG_SMSC911X is not set
+# CONFIG_DNET is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+# CONFIG_B44 is not set
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+# CONFIG_KEYBOARD_STOWAWAY is not set
+# CONFIG_KEYBOARD_GPIO is not set
+CONFIG_KEYBOARD_SH_KEYSC=y
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+# CONFIG_SERIAL_MAX3100 is not set
+CONFIG_SERIAL_SH_SCI=y
+CONFIG_SERIAL_SH_SCI_NR_UARTS=6
+CONFIG_SERIAL_SH_SCI_CONSOLE=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_IPMI_HANDLER is not set
+CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_TIMERIOMEM is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_HELPER_AUTO=y
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_GPIO is not set
+# CONFIG_I2C_OCORES is not set
+CONFIG_I2C_SH_MOBILE=y
+# CONFIG_I2C_SIMTEC is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+# CONFIG_I2C_TINY_USB is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
+# CONFIG_I2C_PCA_PLATFORM is not set
+# CONFIG_I2C_STUB is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_DS1682 is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+CONFIG_SPI=y
+CONFIG_SPI_MASTER=y
+
+#
+# SPI Master Controller Drivers
+#
+CONFIG_SPI_BITBANG=y
+# CONFIG_SPI_GPIO is not set
+# CONFIG_SPI_SH_SCI is not set
+
+#
+# SPI Protocol Masters
+#
+# CONFIG_SPI_SPIDEV is not set
+# CONFIG_SPI_TLE62X0 is not set
+CONFIG_ARCH_REQUIRE_GPIOLIB=y
+CONFIG_GPIOLIB=y
+# CONFIG_GPIO_SYSFS is not set
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_TPS65010 is not set
+# CONFIG_TWL4030_CORE is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_PMIC_DA903X is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_PCF50633 is not set
+# CONFIG_REGULATOR is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+CONFIG_VIDEO_DEV=y
+CONFIG_VIDEO_V4L2_COMMON=y
+# CONFIG_VIDEO_ALLOW_V4L1 is not set
+CONFIG_VIDEO_V4L1_COMPAT=y
+# CONFIG_DVB_CORE is not set
+CONFIG_VIDEO_MEDIA=y
+
+#
+# Multimedia drivers
+#
+# CONFIG_MEDIA_ATTACH is not set
+CONFIG_MEDIA_TUNER=y
+# CONFIG_MEDIA_TUNER_CUSTOMISE is not set
+CONFIG_MEDIA_TUNER_SIMPLE=y
+CONFIG_MEDIA_TUNER_TDA8290=y
+CONFIG_MEDIA_TUNER_TDA9887=y
+CONFIG_MEDIA_TUNER_TEA5761=y
+CONFIG_MEDIA_TUNER_TEA5767=y
+CONFIG_MEDIA_TUNER_MT20XX=y
+CONFIG_MEDIA_TUNER_XC2028=y
+CONFIG_MEDIA_TUNER_XC5000=y
+CONFIG_MEDIA_TUNER_MC44S803=y
+CONFIG_VIDEO_V4L2=y
+CONFIG_VIDEOBUF_GEN=y
+CONFIG_VIDEOBUF_DMA_CONTIG=y
+CONFIG_VIDEO_CAPTURE_DRIVERS=y
+# CONFIG_VIDEO_ADV_DEBUG is not set
+# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
+CONFIG_VIDEO_HELPER_CHIPS_AUTO=y
+# CONFIG_VIDEO_VIVI is not set
+# CONFIG_VIDEO_SAA5246A is not set
+# CONFIG_VIDEO_SAA5249 is not set
+CONFIG_SOC_CAMERA=y
+# CONFIG_SOC_CAMERA_MT9M001 is not set
+# CONFIG_SOC_CAMERA_MT9M111 is not set
+# CONFIG_SOC_CAMERA_MT9T031 is not set
+# CONFIG_SOC_CAMERA_MT9V022 is not set
+# CONFIG_SOC_CAMERA_TW9910 is not set
+# CONFIG_SOC_CAMERA_PLATFORM is not set
+CONFIG_SOC_CAMERA_OV772X=y
+CONFIG_VIDEO_SH_MOBILE_CEU=y
+CONFIG_V4L_USB_DRIVERS=y
+# CONFIG_USB_VIDEO_CLASS is not set
+CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
+CONFIG_USB_GSPCA=m
+# CONFIG_USB_M5602 is not set
+# CONFIG_USB_STV06XX is not set
+# CONFIG_USB_GSPCA_CONEX is not set
+# CONFIG_USB_GSPCA_ETOMS is not set
+# CONFIG_USB_GSPCA_FINEPIX is not set
+# CONFIG_USB_GSPCA_MARS is not set
+# CONFIG_USB_GSPCA_MR97310A is not set
+# CONFIG_USB_GSPCA_OV519 is not set
+# CONFIG_USB_GSPCA_OV534 is not set
+# CONFIG_USB_GSPCA_PAC207 is not set
+# CONFIG_USB_GSPCA_PAC7311 is not set
+# CONFIG_USB_GSPCA_SONIXB is not set
+# CONFIG_USB_GSPCA_SONIXJ is not set
+# CONFIG_USB_GSPCA_SPCA500 is not set
+# CONFIG_USB_GSPCA_SPCA501 is not set
+# CONFIG_USB_GSPCA_SPCA505 is not set
+# CONFIG_USB_GSPCA_SPCA506 is not set
+# CONFIG_USB_GSPCA_SPCA508 is not set
+# CONFIG_USB_GSPCA_SPCA561 is not set
+# CONFIG_USB_GSPCA_SQ905 is not set
+# CONFIG_USB_GSPCA_SQ905C is not set
+# CONFIG_USB_GSPCA_STK014 is not set
+# CONFIG_USB_GSPCA_SUNPLUS is not set
+# CONFIG_USB_GSPCA_T613 is not set
+# CONFIG_USB_GSPCA_TV8532 is not set
+# CONFIG_USB_GSPCA_VC032X is not set
+# CONFIG_USB_GSPCA_ZC3XX is not set
+# CONFIG_VIDEO_PVRUSB2 is not set
+# CONFIG_VIDEO_HDPVR is not set
+# CONFIG_VIDEO_EM28XX is not set
+# CONFIG_VIDEO_CX231XX is not set
+# CONFIG_VIDEO_USBVISION is not set
+# CONFIG_USB_ET61X251 is not set
+# CONFIG_USB_SN9C102 is not set
+# CONFIG_USB_ZC0301 is not set
+CONFIG_USB_PWC_INPUT_EVDEV=y
+# CONFIG_USB_ZR364XX is not set
+# CONFIG_USB_STKWEBCAM is not set
+# CONFIG_USB_S2255 is not set
+# CONFIG_RADIO_ADAPTERS is not set
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+CONFIG_FB=y
+# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_FB_DDC is not set
+# CONFIG_FB_BOOT_VESA_SUPPORT is not set
+# CONFIG_FB_CFB_FILLRECT is not set
+# CONFIG_FB_CFB_COPYAREA is not set
+# CONFIG_FB_CFB_IMAGEBLIT is not set
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+CONFIG_FB_SYS_FILLRECT=y
+CONFIG_FB_SYS_COPYAREA=y
+CONFIG_FB_SYS_IMAGEBLIT=y
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+CONFIG_FB_SYS_FOPS=y
+CONFIG_FB_DEFERRED_IO=y
+# CONFIG_FB_SVGALIB is not set
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+# CONFIG_FB_TILEBLITTING is not set
+
+#
+# Frame buffer hardware drivers
+#
+# CONFIG_FB_S1D13XXX is not set
+CONFIG_FB_SH_MOBILE_LCDC=y
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_FB_METRONOME is not set
+# CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Console display driver support
+#
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
+# CONFIG_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+# CONFIG_LOGO_SUPERH_MONO is not set
+# CONFIG_LOGO_SUPERH_VGA16 is not set
+CONFIG_LOGO_SUPERH_CLUT224=y
+# CONFIG_SOUND is not set
+CONFIG_HID_SUPPORT=y
+CONFIG_HID=y
+# CONFIG_HID_DEBUG is not set
+# CONFIG_HIDRAW is not set
+
+#
+# USB Input Devices
+#
+CONFIG_USB_HID=y
+# CONFIG_HID_PID is not set
+# CONFIG_USB_HIDDEV is not set
+
+#
+# Special HID drivers
+#
+# CONFIG_HID_A4TECH is not set
+# CONFIG_HID_APPLE is not set
+# CONFIG_HID_BELKIN is not set
+# CONFIG_HID_CHERRY is not set
+# CONFIG_HID_CHICONY is not set
+# CONFIG_HID_CYPRESS is not set
+# CONFIG_DRAGONRISE_FF is not set
+# CONFIG_HID_EZKEY is not set
+# CONFIG_HID_KYE is not set
+# CONFIG_HID_GYRATION is not set
+# CONFIG_HID_KENSINGTON is not set
+# CONFIG_HID_LOGITECH is not set
+# CONFIG_HID_MICROSOFT is not set
+# CONFIG_HID_MONTEREY is not set
+# CONFIG_HID_NTRIG is not set
+# CONFIG_HID_PANTHERLORD is not set
+# CONFIG_HID_PETALYNX is not set
+# CONFIG_HID_SAMSUNG is not set
+# CONFIG_HID_SONY is not set
+# CONFIG_HID_SUNPLUS is not set
+# CONFIG_GREENASIA_FF is not set
+# CONFIG_HID_TOPSEED is not set
+# CONFIG_THRUSTMASTER_FF is not set
+# CONFIG_ZEROPLUS_FF is not set
+CONFIG_USB_SUPPORT=y
+CONFIG_USB_ARCH_HAS_HCD=y
+# CONFIG_USB_ARCH_HAS_OHCI is not set
+# CONFIG_USB_ARCH_HAS_EHCI is not set
+CONFIG_USB=y
+# CONFIG_USB_DEBUG is not set
+# CONFIG_USB_ANNOUNCE_NEW_DEVICES is not set
+
+#
+# Miscellaneous USB options
+#
+# CONFIG_USB_DEVICEFS is not set
+CONFIG_USB_DEVICE_CLASS=y
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_OTG is not set
+# CONFIG_USB_OTG_WHITELIST is not set
+# CONFIG_USB_OTG_BLACKLIST_HUB is not set
+CONFIG_USB_MON=y
+# CONFIG_USB_WUSB is not set
+# CONFIG_USB_WUSB_CBAF is not set
+
+#
+# USB Host Controller Drivers
+#
+# CONFIG_USB_C67X00_HCD is not set
+# CONFIG_USB_OXU210HP_HCD is not set
+# CONFIG_USB_ISP116X_HCD is not set
+# CONFIG_USB_ISP1760_HCD is not set
+# CONFIG_USB_SL811_HCD is not set
+CONFIG_USB_R8A66597_HCD=y
+# CONFIG_USB_HWA_HCD is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+# CONFIG_USB_WDM is not set
+# CONFIG_USB_TMC is not set
+
+#
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
+#
+
+#
+# also be needed; see USB_STORAGE Help for more info
+#
+CONFIG_USB_STORAGE=y
+# CONFIG_USB_STORAGE_DEBUG is not set
+# CONFIG_USB_STORAGE_DATAFAB is not set
+# CONFIG_USB_STORAGE_FREECOM is not set
+# CONFIG_USB_STORAGE_ISD200 is not set
+# CONFIG_USB_STORAGE_USBAT is not set
+# CONFIG_USB_STORAGE_SDDR09 is not set
+# CONFIG_USB_STORAGE_SDDR55 is not set
+# CONFIG_USB_STORAGE_JUMPSHOT is not set
+# CONFIG_USB_STORAGE_ALAUDA is not set
+# CONFIG_USB_STORAGE_ONETOUCH is not set
+# CONFIG_USB_STORAGE_KARMA is not set
+# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
+# CONFIG_USB_LIBUSUAL is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB port drivers
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_ADUTUX is not set
+# CONFIG_USB_SEVSEG is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_BERRY_CHARGE is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYPRESS_CY7C63 is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_FTDI_ELAN is not set
+# CONFIG_USB_APPLEDISPLAY is not set
+# CONFIG_USB_LD is not set
+# CONFIG_USB_TRANCEVIBRATOR is not set
+# CONFIG_USB_IOWARRIOR is not set
+# CONFIG_USB_ISIGHTFW is not set
+# CONFIG_USB_VST is not set
+# CONFIG_USB_GADGET is not set
+
+#
+# OTG and related infrastructure
+#
+# CONFIG_USB_GPIO_VBUS is not set
+# CONFIG_NOP_USB_XCEIV is not set
+CONFIG_MMC=y
+# CONFIG_MMC_DEBUG is not set
+# CONFIG_MMC_UNSAFE_RESUME is not set
+
+#
+# MMC/SD/SDIO Card Drivers
+#
+CONFIG_MMC_BLOCK=y
+CONFIG_MMC_BLOCK_BOUNCE=y
+# CONFIG_SDIO_UART is not set
+# CONFIG_MMC_TEST is not set
+
+#
+# MMC/SD/SDIO Host Controller Drivers
+#
+# CONFIG_MMC_SDHCI is not set
+CONFIG_MMC_SPI=y
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+CONFIG_RTC_LIB=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+# CONFIG_RTC_DEBUG is not set
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_SYSFS=y
+CONFIG_RTC_INTF_PROC=y
+CONFIG_RTC_INTF_DEV=y
+# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+# CONFIG_RTC_DRV_TEST is not set
+
+#
+# I2C RTC drivers
+#
+# CONFIG_RTC_DRV_DS1307 is not set
+# CONFIG_RTC_DRV_DS1374 is not set
+# CONFIG_RTC_DRV_DS1672 is not set
+# CONFIG_RTC_DRV_MAX6900 is not set
+# CONFIG_RTC_DRV_RS5C372 is not set
+# CONFIG_RTC_DRV_ISL1208 is not set
+# CONFIG_RTC_DRV_X1205 is not set
+CONFIG_RTC_DRV_PCF8563=y
+# CONFIG_RTC_DRV_PCF8583 is not set
+# CONFIG_RTC_DRV_M41T80 is not set
+# CONFIG_RTC_DRV_S35390A is not set
+# CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
+
+#
+# SPI RTC drivers
+#
+# CONFIG_RTC_DRV_M41T94 is not set
+# CONFIG_RTC_DRV_DS1305 is not set
+# CONFIG_RTC_DRV_DS1390 is not set
+# CONFIG_RTC_DRV_MAX6902 is not set
+# CONFIG_RTC_DRV_R9701 is not set
+# CONFIG_RTC_DRV_RS5C348 is not set
+# CONFIG_RTC_DRV_DS3234 is not set
+
+#
+# Platform RTC drivers
+#
+# CONFIG_RTC_DRV_DS1286 is not set
+# CONFIG_RTC_DRV_DS1511 is not set
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_STK17TA8 is not set
+# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T35 is not set
+# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_BQ4802 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+
+#
+# on-CPU RTC drivers
+#
+# CONFIG_RTC_DRV_SH is not set
+# CONFIG_RTC_DRV_GENERIC is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
+CONFIG_UIO=y
+# CONFIG_UIO_PDRV is not set
+CONFIG_UIO_PDRV_GENIRQ=y
+# CONFIG_UIO_SMX is not set
+# CONFIG_UIO_SERCOS3 is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+# CONFIG_EXT4_FS is not set
+CONFIG_JBD=y
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+CONFIG_FS_POSIX_ACL=y
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+CONFIG_DNOTIFY=y
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+# CONFIG_MSDOS_FS is not set
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_UBIFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_V3_ACL is not set
+# CONFIG_NFSD_V4 is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+CONFIG_NLS_CODEPAGE_932=y
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+# CONFIG_DEBUG_KERNEL is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_LATENCYTOP is not set
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_SH_STANDARD_BIOS is not set
+# CONFIG_EARLY_SCIF_CONSOLE is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_AEAD2=y
+CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
+CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+CONFIG_CRYPTO_WORKQUEUE=y
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+CONFIG_CRYPTO_CBC=y
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+# CONFIG_CRYPTO_MD5 is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
+# CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+CONFIG_CRC_T10DIF=y
+CONFIG_CRC_ITU_T=y
+CONFIG_CRC32=y
+CONFIG_CRC7=y
+# CONFIG_LIBCRC32C is not set
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
diff --git a/arch/sh/configs/se7750_defconfig b/arch/sh/configs/se7750_defconfig
index ceef6d9..ed1a123 100644
--- a/arch/sh/configs/se7750_defconfig
+++ b/arch/sh/configs/se7750_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:57:31 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:06:02 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -70,6 +71,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -88,6 +90,7 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -95,6 +98,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -106,7 +111,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -152,6 +156,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -161,8 +166,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -553,6 +556,7 @@
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -775,6 +779,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -829,6 +838,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -886,10 +896,23 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -989,6 +1012,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7751_defconfig b/arch/sh/configs/se7751_defconfig
index 67fc26b3..55f3b78 100644
--- a/arch/sh/configs/se7751_defconfig
+++ b/arch/sh/configs/se7751_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:59:59 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:06:44 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -65,7 +66,6 @@
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -74,6 +74,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -92,6 +93,7 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -99,6 +101,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -110,7 +114,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -156,6 +159,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -165,8 +169,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -742,6 +744,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -796,6 +803,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -833,10 +841,23 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -936,6 +957,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7780_defconfig b/arch/sh/configs/se7780_defconfig
index ebce23c..c4f0af3 100644
--- a/arch/sh/configs/se7780_defconfig
+++ b/arch/sh/configs/se7780_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:02:05 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:07:14 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -67,6 +68,7 @@
 CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 # CONFIG_KALLSYMS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -86,12 +88,15 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -103,7 +108,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
 #
@@ -149,6 +153,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 CONFIG_CPU_SUBTYPE_SH7780=y
@@ -158,8 +163,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -285,8 +288,6 @@
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -558,6 +559,7 @@
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -957,15 +959,17 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1122,6 +1126,10 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1245,11 +1253,24 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1345,6 +1366,7 @@
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig
index 6fcdb09..f9c6e51 100644
--- a/arch/sh/configs/sh03_defconfig
+++ b/arch/sh/configs/sh03_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:04:59 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:07:56 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -41,6 +42,7 @@
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -67,7 +69,6 @@
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -76,6 +77,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -105,6 +107,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -117,7 +121,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -163,6 +166,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -172,8 +176,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -300,8 +302,6 @@
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -562,6 +562,7 @@
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -656,6 +657,7 @@
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -679,6 +681,7 @@
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -887,7 +890,6 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
@@ -929,6 +931,7 @@
 # CONFIG_EXT2_FS_SECURITY is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -952,6 +955,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 CONFIG_ISO9660_FS=m
@@ -1001,6 +1009,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1112,11 +1121,26 @@
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 CONFIG_SH_STANDARD_BIOS=y
@@ -1228,6 +1252,7 @@
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/sh7710voipgw_defconfig b/arch/sh/configs/sh7710voipgw_defconfig
index 1ab37c0..48b5809 100644
--- a/arch/sh/configs/sh7710voipgw_defconfig
+++ b/arch/sh/configs/sh7710voipgw_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:09:01 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:09:16 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -39,6 +40,7 @@
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
 # CONFIG_AUDIT is not set
@@ -72,6 +74,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -90,6 +93,7 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -97,6 +101,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_BASE_SMALL=0
@@ -108,7 +114,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -154,6 +159,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -163,8 +169,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -728,7 +732,6 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 # CONFIG_USB_ARCH_HAS_OHCI is not set
@@ -780,6 +783,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -834,6 +842,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -871,11 +880,24 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -975,6 +997,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/sh7724_generic_defconfig b/arch/sh/configs/sh7724_generic_defconfig
new file mode 100644
index 0000000..ec8f18c
--- /dev/null
+++ b/arch/sh/configs/sh7724_generic_defconfig
@@ -0,0 +1,707 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:09:47 2009
+#
+CONFIG_SUPERH=y
+CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
+CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_GENERIC_IRQ_PROBE=y
+# CONFIG_GENERIC_GPIO is not set
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_ARCH_HIBERNATION_POSSIBLE=y
+CONFIG_SYS_SUPPORTS_CMT=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_ARCH_NO_VIRT_TO_BUS=y
+CONFIG_ARCH_HAS_DEFAULT_IDLE=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+
+#
+# RCU Subsystem
+#
+# CONFIG_CLASSIC_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=32
+# CONFIG_RCU_FANOUT_EXACT is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=17
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+CONFIG_CGROUPS=y
+# CONFIG_CGROUP_DEBUG is not set
+# CONFIG_CGROUP_NS is not set
+# CONFIG_CGROUP_FREEZER is not set
+# CONFIG_CGROUP_DEVICE is not set
+# CONFIG_CPUSETS is not set
+# CONFIG_CGROUP_CPUACCT is not set
+# CONFIG_RESOURCE_COUNTERS is not set
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+# CONFIG_BLK_DEV_INITRD is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+# CONFIG_UID16 is not set
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_SLAB is not set
+CONFIG_SLUB=y
+# CONFIG_SLOB is not set
+CONFIG_PROFILING=y
+CONFIG_TRACEPOINTS=y
+# CONFIG_MARKERS is not set
+CONFIG_OPROFILE=y
+CONFIG_HAVE_OPROFILE=y
+CONFIG_HAVE_IOREMAP_PROT=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+# CONFIG_MODULES is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+CONFIG_FREEZER=y
+
+#
+# System type
+#
+CONFIG_CPU_SH4=y
+CONFIG_CPU_SH4A=y
+CONFIG_CPU_SHX2=y
+CONFIG_ARCH_SHMOBILE=y
+# CONFIG_CPU_SUBTYPE_SH7619 is not set
+# CONFIG_CPU_SUBTYPE_SH7201 is not set
+# CONFIG_CPU_SUBTYPE_SH7203 is not set
+# CONFIG_CPU_SUBTYPE_SH7206 is not set
+# CONFIG_CPU_SUBTYPE_SH7263 is not set
+# CONFIG_CPU_SUBTYPE_MXG is not set
+# CONFIG_CPU_SUBTYPE_SH7705 is not set
+# CONFIG_CPU_SUBTYPE_SH7706 is not set
+# CONFIG_CPU_SUBTYPE_SH7707 is not set
+# CONFIG_CPU_SUBTYPE_SH7708 is not set
+# CONFIG_CPU_SUBTYPE_SH7709 is not set
+# CONFIG_CPU_SUBTYPE_SH7710 is not set
+# CONFIG_CPU_SUBTYPE_SH7712 is not set
+# CONFIG_CPU_SUBTYPE_SH7720 is not set
+# CONFIG_CPU_SUBTYPE_SH7721 is not set
+# CONFIG_CPU_SUBTYPE_SH7750 is not set
+# CONFIG_CPU_SUBTYPE_SH7091 is not set
+# CONFIG_CPU_SUBTYPE_SH7750R is not set
+# CONFIG_CPU_SUBTYPE_SH7750S is not set
+# CONFIG_CPU_SUBTYPE_SH7751 is not set
+# CONFIG_CPU_SUBTYPE_SH7751R is not set
+# CONFIG_CPU_SUBTYPE_SH7760 is not set
+# CONFIG_CPU_SUBTYPE_SH4_202 is not set
+# CONFIG_CPU_SUBTYPE_SH7723 is not set
+CONFIG_CPU_SUBTYPE_SH7724=y
+# CONFIG_CPU_SUBTYPE_SH7763 is not set
+# CONFIG_CPU_SUBTYPE_SH7770 is not set
+# CONFIG_CPU_SUBTYPE_SH7780 is not set
+# CONFIG_CPU_SUBTYPE_SH7785 is not set
+# CONFIG_CPU_SUBTYPE_SH7786 is not set
+# CONFIG_CPU_SUBTYPE_SHX3 is not set
+# CONFIG_CPU_SUBTYPE_SH7343 is not set
+# CONFIG_CPU_SUBTYPE_SH7722 is not set
+# CONFIG_CPU_SUBTYPE_SH7366 is not set
+
+#
+# Memory management options
+#
+CONFIG_QUICKLIST=y
+CONFIG_MMU=y
+CONFIG_PAGE_OFFSET=0x80000000
+CONFIG_MEMORY_START=0x08000000
+CONFIG_MEMORY_SIZE=0x04000000
+CONFIG_29BIT=y
+# CONFIG_X2TLB is not set
+CONFIG_VSYSCALL=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+CONFIG_MAX_ACTIVE_REGIONS=1
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
+CONFIG_PAGE_SIZE_4KB=y
+# CONFIG_PAGE_SIZE_8KB is not set
+# CONFIG_PAGE_SIZE_16KB is not set
+# CONFIG_PAGE_SIZE_64KB is not set
+CONFIG_ENTRY_OFFSET=0x00001000
+CONFIG_SELECT_MEMORY_MODEL=y
+# CONFIG_FLATMEM_MANUAL is not set
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+CONFIG_SPARSEMEM_MANUAL=y
+CONFIG_SPARSEMEM=y
+CONFIG_HAVE_MEMORY_PRESENT=y
+CONFIG_SPARSEMEM_STATIC=y
+
+#
+# Memory hotplug is currently incompatible with Software Suspend
+#
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_MIGRATION=y
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_NR_QUICK=2
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+
+#
+# Cache configuration
+#
+CONFIG_CACHE_WRITEBACK=y
+# CONFIG_CACHE_WRITETHROUGH is not set
+# CONFIG_CACHE_OFF is not set
+
+#
+# Processor features
+#
+CONFIG_CPU_LITTLE_ENDIAN=y
+# CONFIG_CPU_BIG_ENDIAN is not set
+CONFIG_SH_FPU=y
+# CONFIG_SH_STORE_QUEUES is not set
+CONFIG_CPU_HAS_INTEVT=y
+CONFIG_CPU_HAS_SR_RB=y
+CONFIG_CPU_HAS_PTEA=y
+CONFIG_CPU_HAS_FPU=y
+
+#
+# Board support
+#
+
+#
+# Timer and clock configuration
+#
+CONFIG_SH_TMU=y
+CONFIG_SH_TIMER_CMT=y
+CONFIG_SH_TIMER_IRQ=16
+CONFIG_SH_PCLK_FREQ=41666666
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+
+#
+# CPU Frequency scaling
+#
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_TABLE=y
+# CONFIG_CPU_FREQ_DEBUG is not set
+CONFIG_CPU_FREQ_STAT=y
+# CONFIG_CPU_FREQ_STAT_DETAILS is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
+# CONFIG_CPU_FREQ_GOV_USERSPACE is not set
+# CONFIG_CPU_FREQ_GOV_ONDEMAND is not set
+# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+CONFIG_SH_CPU_FREQ=y
+
+#
+# DMA support
+#
+# CONFIG_SH_DMA is not set
+
+#
+# Companion Chips
+#
+
+#
+# Additional SuperH Device Drivers
+#
+# CONFIG_HEARTBEAT is not set
+# CONFIG_PUSH_SWITCH is not set
+
+#
+# Kernel features
+#
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_SCHED_HRTICK=y
+CONFIG_KEXEC=y
+# CONFIG_CRASH_DUMP is not set
+CONFIG_KEXEC_JUMP=y
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_GUSA=y
+
+#
+# Boot options
+#
+CONFIG_ZERO_PAGE_OFFSET=0x00001000
+CONFIG_BOOT_LINK_OFFSET=0x00800000
+# CONFIG_CMDLINE_BOOL is not set
+
+#
+# Bus options
+#
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_HAVE_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options (EXPERIMENTAL)
+#
+CONFIG_PM=y
+# CONFIG_PM_DEBUG is not set
+CONFIG_PM_SLEEP=y
+CONFIG_SUSPEND=y
+CONFIG_SUSPEND_FREEZER=y
+CONFIG_HIBERNATION=y
+CONFIG_PM_STD_PARTITION=""
+CONFIG_CPU_IDLE=y
+CONFIG_CPU_IDLE_GOV_LADDER=y
+CONFIG_CPU_IDLE_GOV_MENU=y
+# CONFIG_NET is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_MTD is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_BLK_DEV_HD is not set
+# CONFIG_MISC_DEVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_DEVKMEM is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_SH_SCI=y
+CONFIG_SERIAL_SH_SCI_NR_UARTS=6
+CONFIG_SERIAL_SH_SCI_CONSOLE=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_UNIX98_PTYS is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_HELPER_AUTO=y
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_OCORES is not set
+CONFIG_I2C_SH_MOBILE=y
+# CONFIG_I2C_SIMTEC is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
+# CONFIG_I2C_PCA_PLATFORM is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_DS1682 is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+# CONFIG_SPI is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_TWL4030_CORE is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_PMIC_DA903X is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_PCF50633 is not set
+# CONFIG_REGULATOR is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+# CONFIG_SOUND is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+CONFIG_RTC_LIB=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+# CONFIG_RTC_DEBUG is not set
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_DEV=y
+# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+# CONFIG_RTC_DRV_TEST is not set
+
+#
+# I2C RTC drivers
+#
+# CONFIG_RTC_DRV_DS1307 is not set
+# CONFIG_RTC_DRV_DS1374 is not set
+# CONFIG_RTC_DRV_DS1672 is not set
+# CONFIG_RTC_DRV_MAX6900 is not set
+# CONFIG_RTC_DRV_RS5C372 is not set
+# CONFIG_RTC_DRV_ISL1208 is not set
+# CONFIG_RTC_DRV_X1205 is not set
+# CONFIG_RTC_DRV_PCF8563 is not set
+# CONFIG_RTC_DRV_PCF8583 is not set
+# CONFIG_RTC_DRV_M41T80 is not set
+# CONFIG_RTC_DRV_S35390A is not set
+# CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
+
+#
+# SPI RTC drivers
+#
+
+#
+# Platform RTC drivers
+#
+# CONFIG_RTC_DRV_DS1286 is not set
+# CONFIG_RTC_DRV_DS1511 is not set
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_STK17TA8 is not set
+# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T35 is not set
+# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_BQ4802 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+
+#
+# on-CPU RTC drivers
+#
+CONFIG_RTC_DRV_SH=y
+# CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
+CONFIG_UIO=y
+# CONFIG_UIO_PDRV is not set
+CONFIG_UIO_PDRV_GENIRQ=y
+# CONFIG_UIO_SMX is not set
+# CONFIG_UIO_SERCOS3 is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+# CONFIG_EXT2_FS is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_BTRFS_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+# CONFIG_PROC_FS is not set
+# CONFIG_SYSFS is not set
+# CONFIG_TMPFS is not set
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_NLS is not set
+
+#
+# Kernel hacking
+#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+# CONFIG_PRINTK_TIME is not set
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_HEADERS_CHECK is not set
+# CONFIG_DEBUG_KERNEL is not set
+CONFIG_STACKTRACE=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_LATENCYTOP is not set
+# CONFIG_SYSCTL_SYSCALL_CHECK is not set
+CONFIG_NOP_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_SH_STANDARD_BIOS is not set
+# CONFIG_EARLY_SCIF_CONSOLE is not set
+# CONFIG_MORE_COMPILE_OPTIONS is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_CRYPTO is not set
+CONFIG_BINARY_PRINTF=y
+
+#
+# Library routines
+#
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+# CONFIG_CRC32 is not set
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig
index c79bb84..f77bc79 100644
--- a/arch/sh/configs/sh7763rdp_defconfig
+++ b/arch/sh/configs/sh7763rdp_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:10:57 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:10:12 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -78,6 +79,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -106,6 +108,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -117,7 +121,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -164,6 +167,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 CONFIG_CPU_SUBTYPE_SH7763=y
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -173,8 +177,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -555,6 +557,7 @@
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -941,6 +944,7 @@
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -965,6 +969,11 @@
 CONFIG_GENERIC_ACL=y
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1012,6 +1021,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -1100,11 +1110,25 @@
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1204,6 +1228,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/sh7770_generic_defconfig b/arch/sh/configs/sh7770_generic_defconfig
new file mode 100644
index 0000000..d6489b4
--- /dev/null
+++ b/arch/sh/configs/sh7770_generic_defconfig
@@ -0,0 +1,700 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.30-rc4
+# Tue May 12 14:48:21 2009
+#
+CONFIG_SUPERH=y
+CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
+CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_GENERIC_IRQ_PROBE=y
+# CONFIG_GENERIC_GPIO is not set
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_ARCH_SUSPEND_POSSIBLE is not set
+CONFIG_ARCH_HIBERNATION_POSSIBLE=y
+CONFIG_SYS_SUPPORTS_TMU=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_ARCH_NO_VIRT_TO_BUS=y
+CONFIG_ARCH_HAS_DEFAULT_IDLE=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+
+#
+# RCU Subsystem
+#
+# CONFIG_CLASSIC_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=32
+# CONFIG_RCU_FANOUT_EXACT is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=17
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+CONFIG_CGROUPS=y
+# CONFIG_CGROUP_DEBUG is not set
+# CONFIG_CGROUP_NS is not set
+# CONFIG_CGROUP_FREEZER is not set
+# CONFIG_CGROUP_DEVICE is not set
+# CONFIG_CPUSETS is not set
+# CONFIG_CGROUP_CPUACCT is not set
+# CONFIG_RESOURCE_COUNTERS is not set
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+# CONFIG_BLK_DEV_INITRD is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+# CONFIG_UID16 is not set
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_SLAB is not set
+CONFIG_SLUB=y
+# CONFIG_SLOB is not set
+CONFIG_PROFILING=y
+CONFIG_TRACEPOINTS=y
+# CONFIG_MARKERS is not set
+CONFIG_OPROFILE=y
+CONFIG_HAVE_OPROFILE=y
+CONFIG_HAVE_IOREMAP_PROT=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+# CONFIG_MODULES is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+CONFIG_FREEZER=y
+
+#
+# System type
+#
+CONFIG_CPU_SH4=y
+CONFIG_CPU_SH4A=y
+# CONFIG_CPU_SUBTYPE_SH7619 is not set
+# CONFIG_CPU_SUBTYPE_SH7201 is not set
+# CONFIG_CPU_SUBTYPE_SH7203 is not set
+# CONFIG_CPU_SUBTYPE_SH7206 is not set
+# CONFIG_CPU_SUBTYPE_SH7263 is not set
+# CONFIG_CPU_SUBTYPE_MXG is not set
+# CONFIG_CPU_SUBTYPE_SH7705 is not set
+# CONFIG_CPU_SUBTYPE_SH7706 is not set
+# CONFIG_CPU_SUBTYPE_SH7707 is not set
+# CONFIG_CPU_SUBTYPE_SH7708 is not set
+# CONFIG_CPU_SUBTYPE_SH7709 is not set
+# CONFIG_CPU_SUBTYPE_SH7710 is not set
+# CONFIG_CPU_SUBTYPE_SH7712 is not set
+# CONFIG_CPU_SUBTYPE_SH7720 is not set
+# CONFIG_CPU_SUBTYPE_SH7721 is not set
+# CONFIG_CPU_SUBTYPE_SH7750 is not set
+# CONFIG_CPU_SUBTYPE_SH7091 is not set
+# CONFIG_CPU_SUBTYPE_SH7750R is not set
+# CONFIG_CPU_SUBTYPE_SH7750S is not set
+# CONFIG_CPU_SUBTYPE_SH7751 is not set
+# CONFIG_CPU_SUBTYPE_SH7751R is not set
+# CONFIG_CPU_SUBTYPE_SH7760 is not set
+# CONFIG_CPU_SUBTYPE_SH4_202 is not set
+# CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
+# CONFIG_CPU_SUBTYPE_SH7763 is not set
+CONFIG_CPU_SUBTYPE_SH7770=y
+# CONFIG_CPU_SUBTYPE_SH7780 is not set
+# CONFIG_CPU_SUBTYPE_SH7785 is not set
+# CONFIG_CPU_SUBTYPE_SH7786 is not set
+# CONFIG_CPU_SUBTYPE_SHX3 is not set
+# CONFIG_CPU_SUBTYPE_SH7343 is not set
+# CONFIG_CPU_SUBTYPE_SH7722 is not set
+# CONFIG_CPU_SUBTYPE_SH7366 is not set
+
+#
+# Memory management options
+#
+CONFIG_QUICKLIST=y
+CONFIG_MMU=y
+CONFIG_PAGE_OFFSET=0x80000000
+CONFIG_MEMORY_START=0x08000000
+CONFIG_MEMORY_SIZE=0x04000000
+CONFIG_29BIT=y
+CONFIG_VSYSCALL=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+CONFIG_MAX_ACTIVE_REGIONS=1
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
+CONFIG_PAGE_SIZE_4KB=y
+# CONFIG_PAGE_SIZE_8KB is not set
+# CONFIG_PAGE_SIZE_16KB is not set
+# CONFIG_PAGE_SIZE_64KB is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+# CONFIG_FLATMEM_MANUAL is not set
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+CONFIG_SPARSEMEM_MANUAL=y
+CONFIG_SPARSEMEM=y
+CONFIG_HAVE_MEMORY_PRESENT=y
+CONFIG_SPARSEMEM_STATIC=y
+
+#
+# Memory hotplug is currently incompatible with Software Suspend
+#
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_MIGRATION=y
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_NR_QUICK=2
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+
+#
+# Cache configuration
+#
+CONFIG_CACHE_WRITEBACK=y
+# CONFIG_CACHE_WRITETHROUGH is not set
+# CONFIG_CACHE_OFF is not set
+
+#
+# Processor features
+#
+CONFIG_CPU_LITTLE_ENDIAN=y
+# CONFIG_CPU_BIG_ENDIAN is not set
+CONFIG_SH_FPU=y
+# CONFIG_SH_STORE_QUEUES is not set
+CONFIG_CPU_HAS_INTEVT=y
+CONFIG_CPU_HAS_SR_RB=y
+CONFIG_CPU_HAS_FPU=y
+
+#
+# Board support
+#
+
+#
+# Timer and clock configuration
+#
+CONFIG_SH_TMU=y
+CONFIG_SH_TIMER_IRQ=16
+CONFIG_SH_PCLK_FREQ=41666666
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+
+#
+# CPU Frequency scaling
+#
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_TABLE=y
+# CONFIG_CPU_FREQ_DEBUG is not set
+CONFIG_CPU_FREQ_STAT=y
+# CONFIG_CPU_FREQ_STAT_DETAILS is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
+# CONFIG_CPU_FREQ_GOV_USERSPACE is not set
+# CONFIG_CPU_FREQ_GOV_ONDEMAND is not set
+# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+CONFIG_SH_CPU_FREQ=y
+
+#
+# DMA support
+#
+# CONFIG_SH_DMA is not set
+
+#
+# Companion Chips
+#
+
+#
+# Additional SuperH Device Drivers
+#
+# CONFIG_HEARTBEAT is not set
+# CONFIG_PUSH_SWITCH is not set
+
+#
+# Kernel features
+#
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_SCHED_HRTICK=y
+CONFIG_KEXEC=y
+# CONFIG_CRASH_DUMP is not set
+CONFIG_KEXEC_JUMP=y
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_GUSA=y
+
+#
+# Boot options
+#
+CONFIG_ZERO_PAGE_OFFSET=0x00001000
+CONFIG_BOOT_LINK_OFFSET=0x00800000
+CONFIG_ENTRY_OFFSET=0x00001000
+# CONFIG_CMDLINE_BOOL is not set
+
+#
+# Bus options
+#
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_HAVE_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options (EXPERIMENTAL)
+#
+CONFIG_PM=y
+# CONFIG_PM_DEBUG is not set
+CONFIG_PM_SLEEP=y
+CONFIG_HIBERNATION=y
+CONFIG_PM_STD_PARTITION=""
+CONFIG_CPU_IDLE=y
+CONFIG_CPU_IDLE_GOV_LADDER=y
+CONFIG_CPU_IDLE_GOV_MENU=y
+# CONFIG_NET is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_MTD is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_BLK_DEV_HD is not set
+# CONFIG_MISC_DEVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_DEVKMEM is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_SH_SCI=y
+CONFIG_SERIAL_SH_SCI_NR_UARTS=6
+CONFIG_SERIAL_SH_SCI_CONSOLE=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_UNIX98_PTYS is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_HELPER_AUTO=y
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_OCORES is not set
+CONFIG_I2C_SH_MOBILE=y
+# CONFIG_I2C_SIMTEC is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
+# CONFIG_I2C_PCA_PLATFORM is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_DS1682 is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+# CONFIG_SPI is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_TWL4030_CORE is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_PMIC_DA903X is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_PCF50633 is not set
+# CONFIG_REGULATOR is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+# CONFIG_SOUND is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+CONFIG_RTC_LIB=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+# CONFIG_RTC_DEBUG is not set
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_DEV=y
+# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+# CONFIG_RTC_DRV_TEST is not set
+
+#
+# I2C RTC drivers
+#
+# CONFIG_RTC_DRV_DS1307 is not set
+# CONFIG_RTC_DRV_DS1374 is not set
+# CONFIG_RTC_DRV_DS1672 is not set
+# CONFIG_RTC_DRV_MAX6900 is not set
+# CONFIG_RTC_DRV_RS5C372 is not set
+# CONFIG_RTC_DRV_ISL1208 is not set
+# CONFIG_RTC_DRV_X1205 is not set
+# CONFIG_RTC_DRV_PCF8563 is not set
+# CONFIG_RTC_DRV_PCF8583 is not set
+# CONFIG_RTC_DRV_M41T80 is not set
+# CONFIG_RTC_DRV_S35390A is not set
+# CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
+
+#
+# SPI RTC drivers
+#
+
+#
+# Platform RTC drivers
+#
+# CONFIG_RTC_DRV_DS1286 is not set
+# CONFIG_RTC_DRV_DS1511 is not set
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_STK17TA8 is not set
+# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T35 is not set
+# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_BQ4802 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+
+#
+# on-CPU RTC drivers
+#
+CONFIG_RTC_DRV_SH=y
+# CONFIG_RTC_DRV_GENERIC is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
+CONFIG_UIO=y
+# CONFIG_UIO_PDRV is not set
+CONFIG_UIO_PDRV_GENIRQ=y
+# CONFIG_UIO_SMX is not set
+# CONFIG_UIO_SERCOS3 is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+# CONFIG_EXT2_FS is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_BTRFS_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+# CONFIG_PROC_FS is not set
+# CONFIG_SYSFS is not set
+# CONFIG_TMPFS is not set
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_NLS is not set
+
+#
+# Kernel hacking
+#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+# CONFIG_PRINTK_TIME is not set
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_HEADERS_CHECK is not set
+# CONFIG_DEBUG_KERNEL is not set
+CONFIG_STACKTRACE=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_LATENCYTOP is not set
+# CONFIG_SYSCTL_SYSCALL_CHECK is not set
+CONFIG_NOP_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_SH_STANDARD_BIOS is not set
+# CONFIG_EARLY_SCIF_CONSOLE is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_CRYPTO is not set
+CONFIG_BINARY_PRINTF=y
+
+#
+# Library routines
+#
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+# CONFIG_CRC32 is not set
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig
index a6cf4505..1c55b80 100644
--- a/arch/sh/configs/sh7785lcr_32bit_defconfig
+++ b/arch/sh/configs/sh7785lcr_32bit_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:12:18 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:10:53 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -79,6 +80,7 @@
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -98,6 +100,7 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 CONFIG_PROFILING=y
+# CONFIG_MARKERS is not set
 # CONFIG_OPROFILE is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
@@ -106,6 +109,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -118,7 +123,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -166,6 +170,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -175,8 +180,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -310,8 +313,6 @@
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -672,6 +673,7 @@
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -1009,15 +1011,17 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1218,6 +1222,7 @@
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1240,6 +1245,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1289,6 +1299,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1377,6 +1388,9 @@
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1413,6 +1427,7 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1422,9 +1437,14 @@
 # CONFIG_PREEMPT_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1542,6 +1562,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/sh7785lcr_defconfig b/arch/sh/configs/sh7785lcr_defconfig
index e4fac2e..4385fe9 100644
--- a/arch/sh/configs/sh7785lcr_defconfig
+++ b/arch/sh/configs/sh7785lcr_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.30-rc2
-# Wed Apr 22 19:17:56 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:11:48 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
@@ -307,8 +307,6 @@
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig
index d695e90..4e12025 100644
--- a/arch/sh/configs/shmin_defconfig
+++ b/arch/sh/configs/shmin_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:19:03 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:12:41 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_FIND_NEXT_BIT=y
@@ -63,6 +64,7 @@
 # CONFIG_UID16 is not set
 # CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_KALLSYMS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 # CONFIG_BUG is not set
@@ -81,12 +83,15 @@
 # CONFIG_SLUB is not set
 CONFIG_SLOB=y
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_BASE_SMALL=1
 # CONFIG_MODULES is not set
@@ -137,6 +142,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -146,8 +152,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -675,6 +679,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -718,6 +727,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -762,10 +772,22 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 CONFIG_SH_STANDARD_BIOS=y
@@ -864,6 +886,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig
index e3651f5..c088144 100644
--- a/arch/sh/configs/shx3_defconfig
+++ b/arch/sh/configs/shx3_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:20:54 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:13:12 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -42,6 +43,7 @@
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -96,6 +98,7 @@
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -126,6 +129,8 @@
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_USE_GENERIC_SMP_HELPERS=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_RT_MUTEXES=y
 CONFIG_BASE_SMALL=0
@@ -138,7 +143,6 @@
 CONFIG_STOP_MACHINE=y
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -186,6 +190,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -195,8 +200,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -553,6 +556,7 @@
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -646,6 +650,7 @@
 #
 # Non-8250 serial port support
 #
+# CONFIG_SERIAL_MAX3100 is not set
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=2
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
@@ -985,6 +990,7 @@
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1008,6 +1014,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1051,6 +1062,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -1085,6 +1097,9 @@
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1124,6 +1139,7 @@
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1133,11 +1149,16 @@
 # CONFIG_PREEMPT_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1245,6 +1266,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/snapgear_defconfig b/arch/sh/configs/snapgear_defconfig
index 6960f60..54a7a3c 100644
--- a/arch/sh/configs/snapgear_defconfig
+++ b/arch/sh/configs/snapgear_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:21:39 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:14:00 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -64,7 +65,6 @@
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -73,6 +73,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -92,12 +93,15 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -105,7 +109,6 @@
 # CONFIG_MODULES is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -151,6 +154,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -160,8 +164,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -295,8 +297,6 @@
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -763,6 +763,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -805,8 +810,13 @@
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -844,10 +854,23 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -862,6 +885,7 @@
 # CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/systemh_defconfig b/arch/sh/configs/systemh_defconfig
index 7ea639b..dbe7e54 100644
--- a/arch/sh/configs/systemh_defconfig
+++ b/arch/sh/configs/systemh_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:23:31 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:14:33 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -61,7 +62,6 @@
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -70,6 +70,7 @@
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -88,6 +89,7 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -95,6 +97,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -107,7 +111,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -153,6 +156,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -162,8 +166,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -506,6 +508,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -547,8 +554,13 @@
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 
 #
 # Partition Types
@@ -577,10 +589,24 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -595,6 +621,7 @@
 # CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
index bbeb4c6..8ca94ef 100644
--- a/arch/sh/configs/titan_defconfig
+++ b/arch/sh/configs/titan_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:24:55 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:14:55 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -40,6 +41,7 @@
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
 # CONFIG_AUDIT is not set
@@ -66,7 +68,6 @@
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -76,6 +77,7 @@
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -95,6 +97,7 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -102,6 +105,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -114,7 +119,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -160,6 +164,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -169,8 +174,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -305,8 +308,6 @@
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -789,6 +790,7 @@
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -906,6 +908,7 @@
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -929,6 +932,7 @@
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -1182,7 +1186,6 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
@@ -1393,6 +1396,7 @@
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 # CONFIG_EXT3_FS_XATTR is not set
 # CONFIG_EXT4_FS is not set
 CONFIG_JBD=y
@@ -1419,6 +1423,11 @@
 CONFIG_FUSE_FS=m
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 CONFIG_ISO9660_FS=m
@@ -1467,8 +1476,13 @@
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1576,6 +1590,7 @@
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1610,6 +1625,7 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1618,9 +1634,14 @@
 # CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1741,6 +1762,7 @@
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/ul2_defconfig b/arch/sh/configs/ul2_defconfig
index 34f5192..bfb4d98 100644
--- a/arch/sh/configs/ul2_defconfig
+++ b/arch/sh/configs/ul2_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:30:27 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:17:05 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -69,7 +70,6 @@
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -78,6 +78,7 @@
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -97,6 +98,7 @@
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
 CONFIG_PROFILING=y
+# CONFIG_MARKERS is not set
 # CONFIG_OPROFILE is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
@@ -105,6 +107,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -117,7 +121,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -167,6 +170,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -176,8 +180,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 CONFIG_CPU_SUBTYPE_SH7366=y
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -584,6 +586,7 @@
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -936,6 +939,7 @@
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -958,6 +962,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1005,6 +1014,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -1095,10 +1105,24 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1208,6 +1232,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig
index d174b1a..512664f 100644
--- a/arch/sh/configs/urquell_defconfig
+++ b/arch/sh/configs/urquell_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:33:39 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 14:02:55 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -76,6 +77,7 @@
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -94,6 +96,7 @@
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 CONFIG_PROFILING=y
+# CONFIG_MARKERS is not set
 # CONFIG_OPROFILE is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
@@ -102,6 +105,8 @@
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -114,7 +119,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -162,6 +166,7 @@
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -171,8 +176,6 @@
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -900,15 +903,17 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1046,6 +1051,7 @@
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1068,6 +1074,11 @@
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1117,6 +1128,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1209,10 +1221,24 @@
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1322,6 +1348,7 @@
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig
index 666713a..63e9dd3 100644
--- a/arch/sh/drivers/dma/Kconfig
+++ b/arch/sh/drivers/dma/Kconfig
@@ -16,7 +16,8 @@
 		     CPU_SUBTYPE_SH7750S || CPU_SUBTYPE_SH7750R || \
 		     CPU_SUBTYPE_SH7751R || CPU_SUBTYPE_SH7091  || \
 		     CPU_SUBTYPE_SH7763  || CPU_SUBTYPE_SH7764  || \
-		     CPU_SUBTYPE_SH7780  || CPU_SUBTYPE_SH7785
+		     CPU_SUBTYPE_SH7780  || CPU_SUBTYPE_SH7785  || \
+		     CPU_SUBTYPE_SH7760
 
 config NR_ONCHIP_DMA_CHANNELS
 	int
diff --git a/arch/sh/drivers/pci/Kconfig b/arch/sh/drivers/pci/Kconfig
index 7e816ed..e8db585 100644
--- a/arch/sh/drivers/pci/Kconfig
+++ b/arch/sh/drivers/pci/Kconfig
@@ -17,21 +17,3 @@
 	  code will not have to flush the CPU's caches. If you have a PCI host
 	  bridge integrated with your SH CPU, refer carefully to the chip specs
 	  to see if you can say 'N' here. Otherwise, leave it as 'Y'.
-
-# This is also board-specific
-config PCI_AUTO
-	bool
-	depends on PCI
-	default y
-
-config PCI_AUTO_UPDATE_RESOURCES
-	bool
-	depends on PCI_AUTO
-	default y if !SH_DREAMCAST
-	help
-	  Selecting this option will cause the PCI auto code to leave your
-	  BAR values alone. Otherwise they will be updated automatically. If
-	  for some reason, you have a board that simply refuses to work
-	  with its resources updated beyond what they are when the device
-	  is powered up, set this to N. Everyone else will want this as Y.
-
diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile
index 847e9089..d2ffc47 100644
--- a/arch/sh/drivers/pci/Makefile
+++ b/arch/sh/drivers/pci/Makefile
@@ -1,9 +1,7 @@
 #
 # Makefile for the PCI specific kernel interface routines under Linux.
 #
-
 obj-y					+= pci.o
-obj-$(CONFIG_PCI_AUTO)			+= pci-auto.o
 
 obj-$(CONFIG_CPU_SUBTYPE_SH7751)	+= pci-sh7751.o ops-sh4.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7751R)	+= pci-sh7751.o ops-sh4.o
@@ -12,15 +10,17 @@
 obj-$(CONFIG_CPU_SUBTYPE_SH7785)	+= pci-sh7780.o ops-sh4.o
 obj-$(CONFIG_CPU_SH5)			+= pci-sh5.o ops-sh5.o
 
-obj-$(CONFIG_SH_DREAMCAST)		+= ops-dreamcast.o fixups-dreamcast.o
-obj-$(CONFIG_SH_SECUREEDGE5410)		+= ops-snapgear.o
-obj-$(CONFIG_SH_RTS7751R2D)		+= ops-rts7751r2d.o fixups-rts7751r2d.o
-obj-$(CONFIG_SH_SH03)			+= ops-sh03.o fixups-sh03.o
-obj-$(CONFIG_SH_HIGHLANDER)		+= ops-r7780rp.o fixups-r7780rp.o
-obj-$(CONFIG_SH_SDK7780)		+= ops-sdk7780.o fixups-sdk7780.o
-obj-$(CONFIG_SH_TITAN)			+= ops-titan.o
-obj-$(CONFIG_SH_LANDISK)		+= ops-landisk.o
-obj-$(CONFIG_SH_LBOX_RE2)		+= ops-lboxre2.o fixups-lboxre2.o
-obj-$(CONFIG_SH_7780_SOLUTION_ENGINE)	+= ops-se7780.o fixups-se7780.o
-obj-$(CONFIG_SH_CAYMAN)			+= ops-cayman.o
-obj-$(CONFIG_SH_SH7785LCR)		+= ops-sh7785lcr.o fixups-sh7785lcr.o
+obj-$(CONFIG_SH_DREAMCAST)		+= ops-dreamcast.o fixups-dreamcast.o \
+					   pci-dreamcast.o
+obj-$(CONFIG_SH_SECUREEDGE5410)		+= fixups-snapgear.o
+obj-$(CONFIG_SH_7751_SOLUTION_ENGINE)	+= fixups-se7751.o
+obj-$(CONFIG_SH_RTS7751R2D)		+= fixups-rts7751r2d.o
+obj-$(CONFIG_SH_SH03)			+= fixups-sh03.o
+obj-$(CONFIG_SH_HIGHLANDER)		+= fixups-r7780rp.o
+obj-$(CONFIG_SH_SH7785LCR)		+= fixups-r7780rp.o
+obj-$(CONFIG_SH_SDK7780)		+= fixups-sdk7780.o
+obj-$(CONFIG_SH_7780_SOLUTION_ENGINE)	+= fixups-sdk7780.o
+obj-$(CONFIG_SH_TITAN)			+= fixups-titan.o
+obj-$(CONFIG_SH_LANDISK)		+= fixups-landisk.o
+obj-$(CONFIG_SH_LBOX_RE2)		+= fixups-rts7751r2d.o
+obj-$(CONFIG_SH_CAYMAN)			+= fixups-cayman.o
diff --git a/arch/sh/drivers/pci/ops-cayman.c b/arch/sh/drivers/pci/fixups-cayman.c
similarity index 88%
rename from arch/sh/drivers/pci/ops-cayman.c
rename to arch/sh/drivers/pci/fixups-cayman.c
index 38ef762..b68b61d 100644
--- a/arch/sh/drivers/pci/ops-cayman.c
+++ b/arch/sh/drivers/pci/fixups-cayman.c
@@ -75,15 +75,3 @@
 
 	return result;
 }
-
-struct pci_channel board_pci_channels[] = {
-	{ &sh5_pci_ops, NULL, NULL, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-EXPORT_SYMBOL(board_pci_channels);
-
-int __init pcibios_init_platform(void)
-{
-	return sh5pci_init(__pa(memory_start),
-			   __pa(memory_end) - __pa(memory_start));
-}
diff --git a/arch/sh/drivers/pci/fixups-dreamcast.c b/arch/sh/drivers/pci/fixups-dreamcast.c
index 2bf85cf..ed7f489 100644
--- a/arch/sh/drivers/pci/fixups-dreamcast.c
+++ b/arch/sh/drivers/pci/fixups-dreamcast.c
@@ -30,7 +30,7 @@
 
 static void __init gapspci_fixup_resources(struct pci_dev *dev)
 {
-	struct pci_channel *p = board_pci_channels;
+	struct pci_channel *p = dev->sysdata;
 
 	printk(KERN_NOTICE "PCI: Fixing up device %s\n", pci_name(dev));
 
@@ -41,6 +41,13 @@
 		 */
 		dev->resource[1].start	= p->io_resource->start  + 0x100;
 		dev->resource[1].end	= dev->resource[1].start + 0x200 - 1;
+
+		/*
+		 * This is not a normal BAR, prevent any attempts to move
+		 * the BAR, as this will result in a bus lock.
+		 */
+		dev->resource[1].flags |= IORESOURCE_PCI_FIXED;
+
 		/*
 		 * Redirect dma memory allocations to special memory window.
 		 */
diff --git a/arch/sh/drivers/pci/fixups-landisk.c b/arch/sh/drivers/pci/fixups-landisk.c
new file mode 100644
index 0000000..bb1a6bb
--- /dev/null
+++ b/arch/sh/drivers/pci/fixups-landisk.c
@@ -0,0 +1,34 @@
+/*
+ * arch/sh/drivers/pci/ops-landisk.c
+ *
+ * PCI initialization for the I-O DATA Device, Inc. LANDISK board
+ *
+ * Copyright (C) 2006 kogiidena
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include "pci-sh4.h"
+
+int pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
+{
+	/*
+	 * slot0: pin1-4 = irq5,6,7,8
+	 * slot1: pin1-4 = irq6,7,8,5
+	 * slot2: pin1-4 = irq7,8,5,6
+	 * slot3: pin1-4 = irq8,5,6,7
+	 */
+	int irq = ((slot + pin - 1) & 0x3) + 5;
+
+	if ((slot | (pin - 1)) > 0x3) {
+		printk("PCI: Bad IRQ mapping request for slot %d pin %c\n",
+		       slot, pin - 1 + 'A');
+		return -1;
+	}
+	return irq;
+}
diff --git a/arch/sh/drivers/pci/fixups-lboxre2.c b/arch/sh/drivers/pci/fixups-lboxre2.c
deleted file mode 100644
index 1c1d412..0000000
--- a/arch/sh/drivers/pci/fixups-lboxre2.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * arch/sh/drivers/pci/fixups-lboxre2.c
- *
- * L-BOX RE2 PCI fixups
- *
- * Copyright (C) 2007 Nobuhiro Iwamatsu
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include "pci-sh4.h"
-
-#define PCIMCR_MRSET_OFF	0xBFFFFFFF
-#define PCIMCR_RFSH_OFF		0xFFFFFFFB
-
-int pci_fixup_pcic(void)
-{
-	unsigned long bcr1, mcr;
-
-	bcr1 = ctrl_inl(SH7751_BCR1);
-	bcr1 |= 0x40080000;	/* Enable Bit 19 BREQEN, set PCIC to slave */
-	pci_write_reg(bcr1, SH4_PCIBCR1);
-
-	/* Enable all interrupts, so we known what to fix */
-	pci_write_reg(0x0000c3ff, SH4_PCIINTM);
-	pci_write_reg(0x0000380f, SH4_PCIAINTM);
-	pci_write_reg(0xfb900047, SH7751_PCICONF1);
-	pci_write_reg(0xab000001, SH7751_PCICONF4);
-
-	mcr = ctrl_inl(SH7751_MCR);
-	mcr = (mcr & PCIMCR_MRSET_OFF) & PCIMCR_RFSH_OFF;
-	pci_write_reg(mcr, SH4_PCIMCR);
-
-	pci_write_reg(0x0c000000, SH7751_PCICONF5);
-	pci_write_reg(0xd0000000, SH7751_PCICONF6);
-	pci_write_reg(0x0c000000, SH4_PCILAR0);
-	pci_write_reg(0x00000000, SH4_PCILAR1);
-
-	return 0;
-}
diff --git a/arch/sh/drivers/pci/fixups-r7780rp.c b/arch/sh/drivers/pci/fixups-r7780rp.c
index 3e321df..15ca65c 100644
--- a/arch/sh/drivers/pci/fixups-r7780rp.c
+++ b/arch/sh/drivers/pci/fixups-r7780rp.c
@@ -11,35 +11,26 @@
  * for more details.
  */
 #include <linux/pci.h>
+#include <linux/io.h>
 #include "pci-sh4.h"
-#include <asm/io.h>
 
-int pci_fixup_pcic(void)
+static char irq_tab[] __initdata = {
+	65, 66, 67, 68,
+};
+
+int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
 {
-	pci_write_reg(0x000043ff, SH4_PCIINTM);
-	pci_write_reg(0x0000380f, SH4_PCIAINTM);
+	return irq_tab[slot];
+}
 
-	pci_write_reg(0xfbb00047, SH7780_PCICMD);
-	pci_write_reg(0x00000000, SH7780_PCIIBAR);
-
-	pci_write_reg(0x00011912, SH7780_PCISVID);
-	pci_write_reg(0x08000000, SH7780_PCICSCR0);
-	pci_write_reg(0x0000001b, SH7780_PCICSAR0);
-	pci_write_reg(0xfd000000, SH7780_PCICSCR1);
-	pci_write_reg(0x0000000f, SH7780_PCICSAR1);
-
-	pci_write_reg(0xfd000000, SH7780_PCIMBR0);
-	pci_write_reg(0x00fc0000, SH7780_PCIMBMR0);
-
-#ifdef CONFIG_32BIT
-	pci_write_reg(0xc0000000, SH7780_PCIMBR2);
-	pci_write_reg(0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2);
-#endif
-
-	/* Set IOBR for windows containing area specified in pci.h */
-	pci_write_reg((PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE - 1)),
-		      SH7780_PCIIOBR);
-	pci_write_reg(((SH7780_PCI_IO_SIZE-1) & (7<<18)), SH7780_PCIIOBMR);
+int pci_fixup_pcic(struct pci_channel *chan)
+{
+	pci_write_reg(chan, 0x000043ff, SH4_PCIINTM);
+	pci_write_reg(chan, 0x00000000, SH7780_PCIIBAR);
+	pci_write_reg(chan, 0x08000000, SH7780_PCICSCR0);
+	pci_write_reg(chan, 0x0000001b, SH7780_PCICSAR0);
+	pci_write_reg(chan, 0xfd000000, SH7780_PCICSCR1);
+	pci_write_reg(chan, 0x0000000f, SH7780_PCICSAR1);
 
 	return 0;
 }
diff --git a/arch/sh/drivers/pci/fixups-rts7751r2d.c b/arch/sh/drivers/pci/fixups-rts7751r2d.c
index 904bce8..052b354 100644
--- a/arch/sh/drivers/pci/fixups-rts7751r2d.c
+++ b/arch/sh/drivers/pci/fixups-rts7751r2d.c
@@ -1,43 +1,67 @@
 /*
  * arch/sh/drivers/pci/fixups-rts7751r2d.c
  *
- * RTS7751R2D PCI fixups
+ * RTS7751R2D / LBOXRE2 PCI fixups
  *
  * Copyright (C) 2003  Lineo uSolutions, Inc.
  * Copyright (C) 2004  Paul Mundt
+ * Copyright (C) 2007  Nobuhiro Iwamatsu
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
+#include <linux/pci.h>
+#include <mach/lboxre2.h>
+#include <mach/r2d.h>
 #include "pci-sh4.h"
+#include <asm/machtypes.h>
 
 #define PCIMCR_MRSET_OFF	0xBFFFFFFF
 #define PCIMCR_RFSH_OFF		0xFFFFFFFB
 
-int pci_fixup_pcic(void)
+static u8 rts7751r2d_irq_tab[] __initdata = {
+	IRQ_PCI_INTA,
+	IRQ_PCI_INTB,
+	IRQ_PCI_INTC,
+	IRQ_PCI_INTD,
+};
+
+static char lboxre2_irq_tab[] __initdata = {
+	IRQ_ETH0, IRQ_ETH1, IRQ_INTA, IRQ_INTD,
+};
+
+int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
+{
+	if (mach_is_lboxre2())
+		return lboxre2_irq_tab[slot];
+	else
+		return rts7751r2d_irq_tab[slot];
+}
+
+int pci_fixup_pcic(struct pci_channel *chan)
 {
 	unsigned long bcr1, mcr;
 
 	bcr1 = ctrl_inl(SH7751_BCR1);
 	bcr1 |= 0x40080000;	/* Enable Bit 19 BREQEN, set PCIC to slave */
-	pci_write_reg(bcr1, SH4_PCIBCR1);
+	pci_write_reg(chan, bcr1, SH4_PCIBCR1);
 
 	/* Enable all interrupts, so we known what to fix */
-	pci_write_reg(0x0000c3ff, SH4_PCIINTM);
-	pci_write_reg(0x0000380f, SH4_PCIAINTM);
+	pci_write_reg(chan, 0x0000c3ff, SH4_PCIINTM);
+	pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM);
 
-	pci_write_reg(0xfb900047, SH7751_PCICONF1);
-	pci_write_reg(0xab000001, SH7751_PCICONF4);
+	pci_write_reg(chan, 0xfb900047, SH7751_PCICONF1);
+	pci_write_reg(chan, 0xab000001, SH7751_PCICONF4);
 
 	mcr = ctrl_inl(SH7751_MCR);
 	mcr = (mcr & PCIMCR_MRSET_OFF) & PCIMCR_RFSH_OFF;
-	pci_write_reg(mcr, SH4_PCIMCR);
+	pci_write_reg(chan, mcr, SH4_PCIMCR);
 
-	pci_write_reg(0x0c000000, SH7751_PCICONF5);
-	pci_write_reg(0xd0000000, SH7751_PCICONF6);
-	pci_write_reg(0x0c000000, SH4_PCILAR0);
-	pci_write_reg(0x00000000, SH4_PCILAR1);
+	pci_write_reg(chan, 0x0c000000, SH7751_PCICONF5);
+	pci_write_reg(chan, 0xd0000000, SH7751_PCICONF6);
+	pci_write_reg(chan, 0x0c000000, SH4_PCILAR0);
+	pci_write_reg(chan, 0x00000000, SH4_PCILAR1);
 
 	return 0;
 }
diff --git a/arch/sh/drivers/pci/fixups-sdk7780.c b/arch/sh/drivers/pci/fixups-sdk7780.c
index 2f88630..250b0ed 100644
--- a/arch/sh/drivers/pci/fixups-sdk7780.c
+++ b/arch/sh/drivers/pci/fixups-sdk7780.c
@@ -5,55 +5,48 @@
  *
  * Copyright (C) 2003  Lineo uSolutions, Inc.
  * Copyright (C) 2004 - 2006  Paul Mundt
+ * Copyright (C) 2006  Nobuhiro Iwamatsu
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
 #include <linux/pci.h>
+#include <linux/io.h>
 #include "pci-sh4.h"
-#include <asm/io.h>
 
-int pci_fixup_pcic(void)
+/* IDSEL [16][17][18][19][20][21][22][23][24][25][26][27][28][29][30][31] */
+static char sdk7780_irq_tab[4][16] __initdata = {
+	/* INTA */
+	{ 65, 68, 67, 68, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+	/* INTB */
+	{ 66, 65, -1, 65, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+	/* INTC */
+	{ 67, 66, -1, 66, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+	/* INTD */
+	{ 68, 67, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+};
+
+int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
 {
-	ctrl_outl(0x00000001, SH7780_PCI_VCR2);
-
+       return sdk7780_irq_tab[pin-1][slot];
+}
+int pci_fixup_pcic(struct pci_channel *chan)
+{
 	/* Enable all interrupts, so we know what to fix */
-	pci_write_reg(0x0000C3FF, SH7780_PCIIMR);
-	pci_write_reg(0x0000380F, SH7780_PCIAINTM);
+	pci_write_reg(chan, 0x0000C3FF, SH7780_PCIIMR);
 
 	/* Set up standard PCI config registers */
-	pci_write_reg(0xFB00, SH7780_PCISTATUS);
-	pci_write_reg(0x0047, SH7780_PCICMD);
-	pci_write_reg(0x00, SH7780_PCIPIF);
-	pci_write_reg(0x00, SH7780_PCISUB);
-	pci_write_reg(0x06, SH7780_PCIBCC);
-	pci_write_reg(0x1912, SH7780_PCISVID);
-	pci_write_reg(0x0001, SH7780_PCISID);
+	pci_write_reg(chan, 0x08000000, SH7780_PCIMBAR0);	/* PCI */
+	pci_write_reg(chan, 0x08000000, SH4_PCILAR0);	/* SHwy */
+	pci_write_reg(chan, 0x07F00001, SH4_PCILSR0);	/* size 128M w/ MBAR */
 
-	pci_write_reg(0x08000000, SH7780_PCIMBAR0);	/* PCI */
-	pci_write_reg(0x08000000, SH7780_PCILAR0);	/* SHwy */
-	pci_write_reg(0x07F00001, SH7780_PCILSR);	/* size 128M w/ MBAR */
+	pci_write_reg(chan, 0x00000000, SH7780_PCIMBAR1);
+	pci_write_reg(chan, 0x00000000, SH4_PCILAR1);
+	pci_write_reg(chan, 0x00000000, SH4_PCILSR1);
 
-	pci_write_reg(0x00000000, SH7780_PCIMBAR1);
-	pci_write_reg(0x00000000, SH7780_PCILAR1);
-	pci_write_reg(0x00000000, SH7780_PCILSR1);
-
-	pci_write_reg(0xAB000801, SH7780_PCIIBAR);
-
-	/*
-	 * Set the MBR so PCI address is one-to-one with window,
-	 * meaning all calls go straight through... use ifdef to
-	 * catch erroneous assumption.
-	 */
-	pci_write_reg(0xFD000000 , SH7780_PCIMBR0);
-	pci_write_reg(0x00FC0000 , SH7780_PCIMBMR0);	/* 16M */
-
-	/* Set IOBR for window containing area specified in pci.h */
-	pci_write_reg(PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE-1), SH7780_PCIIOBR);
-	pci_write_reg((SH7780_PCI_IO_SIZE-1) & (7 << 18), SH7780_PCIIOBMR);
-
-	pci_write_reg(0xA5000C01, SH7780_PCICR);
+	pci_write_reg(chan, 0xAB000801, SH7780_PCIIBAR);
+	pci_write_reg(chan, 0xA5000C01, SH4_PCICR);
 
 	return 0;
 }
diff --git a/arch/sh/drivers/pci/fixups-se7751.c b/arch/sh/drivers/pci/fixups-se7751.c
new file mode 100644
index 0000000..475fa9f
--- /dev/null
+++ b/arch/sh/drivers/pci/fixups-se7751.c
@@ -0,0 +1,111 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include "pci-sh4.h"
+
+int __init pcibios_map_platform_irq(u8 slot, u8 pin)
+{
+        switch (slot) {
+        case 0: return 13;
+        case 1: return 13;	/* AMD Ethernet controller */
+        case 2: return -1;
+        case 3: return -1;
+        case 4: return -1;
+        default:
+                printk("PCI: Bad IRQ mapping request for slot %d\n", slot);
+                return -1;
+        }
+}
+
+#define PCIMCR_MRSET_OFF	0xBFFFFFFF
+#define PCIMCR_RFSH_OFF		0xFFFFFFFB
+
+/*
+ * Only long word accesses of the PCIC's internal local registers and the
+ * configuration registers from the CPU is supported.
+ */
+#define PCIC_WRITE(x,v) writel((v), PCI_REG(x))
+#define PCIC_READ(x) readl(PCI_REG(x))
+
+/*
+ * Description:  This function sets up and initializes the pcic, sets
+ * up the BARS, maps the DRAM into the address space etc, etc.
+ */
+int pci_fixup_pcic(struct pci_channel *chan)
+{
+	unsigned long bcr1, wcr1, wcr2, wcr3, mcr;
+	unsigned short bcr2;
+
+	/*
+	* Initialize the slave bus controller on the pcic.  The values used
+	* here should not be hardcoded, but they should be taken from the bsc
+	* on the processor, to make this function as generic as possible.
+	* (i.e. Another sbc may usr different SDRAM timing settings -- in order
+	* for the pcic to work, its settings need to be exactly the same.)
+	*/
+	bcr1 = (*(volatile unsigned long*)(SH7751_BCR1));
+	bcr2 = (*(volatile unsigned short*)(SH7751_BCR2));
+	wcr1 = (*(volatile unsigned long*)(SH7751_WCR1));
+	wcr2 = (*(volatile unsigned long*)(SH7751_WCR2));
+	wcr3 = (*(volatile unsigned long*)(SH7751_WCR3));
+	mcr = (*(volatile unsigned long*)(SH7751_MCR));
+
+	bcr1 = bcr1 | 0x00080000;  /* Enable Bit 19, BREQEN */
+	(*(volatile unsigned long*)(SH7751_BCR1)) = bcr1;
+
+	bcr1 = bcr1 | 0x40080000;  /* Enable Bit 19 BREQEN, set PCIC to slave */
+	PCIC_WRITE(SH7751_PCIBCR1, bcr1);	 /* PCIC BCR1 */
+	PCIC_WRITE(SH7751_PCIBCR2, bcr2);     /* PCIC BCR2 */
+	PCIC_WRITE(SH7751_PCIWCR1, wcr1);     /* PCIC WCR1 */
+	PCIC_WRITE(SH7751_PCIWCR2, wcr2);     /* PCIC WCR2 */
+	PCIC_WRITE(SH7751_PCIWCR3, wcr3);     /* PCIC WCR3 */
+	mcr = (mcr & PCIMCR_MRSET_OFF) & PCIMCR_RFSH_OFF;
+	PCIC_WRITE(SH7751_PCIMCR, mcr);      /* PCIC MCR */
+
+
+	/* Enable all interrupts, so we know what to fix */
+	PCIC_WRITE(SH7751_PCIINTM, 0x0000c3ff);
+	PCIC_WRITE(SH7751_PCIAINTM, 0x0000380f);
+
+	/* Set up standard PCI config registers */
+	PCIC_WRITE(SH7751_PCICONF1,	0xF39000C7); /* Bus Master, Mem & I/O access */
+	PCIC_WRITE(SH7751_PCICONF2,	0x00000000); /* PCI Class code & Revision ID */
+	PCIC_WRITE(SH7751_PCICONF4,	0xab000001); /* PCI I/O address (local regs) */
+	PCIC_WRITE(SH7751_PCICONF5,	0x0c000000); /* PCI MEM address (local RAM)  */
+	PCIC_WRITE(SH7751_PCICONF6,	0xd0000000); /* PCI MEM address (unused)     */
+	PCIC_WRITE(SH7751_PCICONF11, 0x35051054); /* PCI Subsystem ID & Vendor ID */
+	PCIC_WRITE(SH7751_PCILSR0, 0x03f00000);   /* MEM (full 64M exposed)       */
+	PCIC_WRITE(SH7751_PCILSR1, 0x00000000);   /* MEM (unused)                 */
+	PCIC_WRITE(SH7751_PCILAR0, 0x0c000000);   /* MEM (direct map from PCI)    */
+	PCIC_WRITE(SH7751_PCILAR1, 0x00000000);   /* MEM (unused)                 */
+
+	/* Now turn it on... */
+	PCIC_WRITE(SH7751_PCICR, 0xa5000001);
+
+	/*
+	* Set PCIMBR and PCIIOBR here, assuming a single window
+	* (16M MEM, 256K IO) is enough.  If a larger space is
+	* needed, the readx/writex and inx/outx functions will
+	* have to do more (e.g. setting registers for each call).
+	*/
+
+	/*
+	* Set the MBR so PCI address is one-to-one with window,
+	* meaning all calls go straight through... use BUG_ON to
+	* catch erroneous assumption.
+	*/
+	BUG_ON(chan->mem_resource->start != SH7751_PCI_MEMORY_BASE);
+
+	PCIC_WRITE(SH7751_PCIMBR, chan->mem_resource->start);
+
+	/* Set IOBR for window containing area specified in pci.h */
+	PCIC_WRITE(SH7751_PCIIOBR, (chan->io_resource->start & SH7751_PCIIOBR_MASK));
+
+	/* All done, may as well say so... */
+	printk("SH7751 PCI: Finished initialization of the PCI controller\n");
+
+	return 1;
+}
diff --git a/arch/sh/drivers/pci/fixups-se7780.c b/arch/sh/drivers/pci/fixups-se7780.c
deleted file mode 100644
index 880cea1..0000000
--- a/arch/sh/drivers/pci/fixups-se7780.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * arch/sh/drivers/pci/fixups-se7780.c
- *
- * HITACHI UL Solution Engine 7780  PCI fixups
- *
- * Copyright (C) 2003  Lineo uSolutions, Inc.
- * Copyright (C) 2004 - 2006  Paul Mundt
- * Copyright (C) 2006  Nobuhiro Iwamatsu
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/pci.h>
-#include "pci-sh4.h"
-#include <asm/io.h>
-
-int pci_fixup_pcic(void)
-{
-	ctrl_outl(0x00000001, SH7780_PCI_VCR2);
-
-	/* Enable all interrupts, so we know what to fix */
-	pci_write_reg(0x0000C3FF, SH7780_PCIIMR);
-	pci_write_reg(0x0000380F, SH7780_PCIAINTM);
-
-	/* Set up standard PCI config registers */
-	ctrl_outw(0xFB00, PCI_REG(SH7780_PCISTATUS));
-	ctrl_outw(0x0047, PCI_REG(SH7780_PCICMD));
-	ctrl_outb(  0x00, PCI_REG(SH7780_PCIPIF));
-	ctrl_outb(  0x00, PCI_REG(SH7780_PCISUB));
-	ctrl_outb(  0x06, PCI_REG(SH7780_PCIBCC));
-	ctrl_outw(0x1912, PCI_REG(SH7780_PCISVID));
-	ctrl_outw(0x0001, PCI_REG(SH7780_PCISID));
-
-	pci_write_reg(0x08000000, SH7780_PCIMBAR0);     /* PCI */
-	pci_write_reg(0x08000000, SH7780_PCILAR0);     /* SHwy */
-	pci_write_reg(0x07F00001, SH7780_PCILSR);      /* size 128M w/ MBAR */
-
-	pci_write_reg(0x00000000, SH7780_PCIMBAR1);
-	pci_write_reg(0x00000000, SH7780_PCILAR1);
-	pci_write_reg(0x00000000, SH7780_PCILSR1);
-
-	pci_write_reg(0xAB000801, SH7780_PCIIBAR);
-
-	/*
-	 * Set the MBR so PCI address is one-to-one with window,
-	 * meaning all calls go straight through... use ifdef to
-	 * catch erroneous assumption.
-	 */
-	pci_write_reg(0xFD000000 , SH7780_PCIMBR0);
-	pci_write_reg(0x00FC0000 , SH7780_PCIMBMR0);    /* 16M */
-
-	/* Set IOBR for window containing area specified in pci.h */
-	pci_write_reg(PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE-1), SH7780_PCIIOBR);
-	pci_write_reg((SH7780_PCI_IO_SIZE-1) & (7 << 18), SH7780_PCIIOBMR);
-
-	pci_write_reg(0xA5000C01, SH7780_PCICR);
-
-	return 0;
-}
diff --git a/arch/sh/drivers/pci/fixups-sh7785lcr.c b/arch/sh/drivers/pci/fixups-sh7785lcr.c
deleted file mode 100644
index 4949e60..0000000
--- a/arch/sh/drivers/pci/fixups-sh7785lcr.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * arch/sh/drivers/pci/fixups-sh7785lcr.c
- *
- * R0P7785LC0011RL PCI fixups
- * Copyright (C) 2008  Yoshihiro Shimoda
- *
- * Based on arch/sh/drivers/pci/fixups-r7780rp.c
- * Copyright (C) 2003  Lineo uSolutions, Inc.
- * Copyright (C) 2004 - 2006  Paul Mundt
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/pci.h>
-#include "pci-sh4.h"
-
-int pci_fixup_pcic(void)
-{
-	pci_write_reg(0x000043ff, SH4_PCIINTM);
-	pci_write_reg(0x0000380f, SH4_PCIAINTM);
-
-	pci_write_reg(0xfbb00047, SH7780_PCICMD);
-	pci_write_reg(0x00000000, SH7780_PCIIBAR);
-
-	pci_write_reg(0x00011912, SH7780_PCISVID);
-	pci_write_reg(0x08000000, SH7780_PCICSCR0);
-	pci_write_reg(0x0000001b, SH7780_PCICSAR0);
-	pci_write_reg(0xfd000000, SH7780_PCICSCR1);
-	pci_write_reg(0x0000000f, SH7780_PCICSAR1);
-
-	pci_write_reg(0xfd000000, SH7780_PCIMBR0);
-	pci_write_reg(0x00fc0000, SH7780_PCIMBMR0);
-
-#ifdef CONFIG_32BIT
-	pci_write_reg(0xc0000000, SH7780_PCIMBR2);
-	pci_write_reg(0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2);
-#endif
-
-	/* Set IOBR for windows containing area specified in pci.h */
-	pci_write_reg((PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE - 1)),
-		      SH7780_PCIIOBR);
-	pci_write_reg(((SH7780_PCI_IO_SIZE - 1) & (7 << 18)), SH7780_PCIIOBMR);
-
-	return 0;
-}
diff --git a/arch/sh/drivers/pci/fixups-snapgear.c b/arch/sh/drivers/pci/fixups-snapgear.c
new file mode 100644
index 0000000..5a39ecc
--- /dev/null
+++ b/arch/sh/drivers/pci/fixups-snapgear.c
@@ -0,0 +1,38 @@
+/*
+ * arch/sh/drivers/pci/ops-snapgear.c
+ *
+ * Author:  David McCullough <davidm@snapgear.com>
+ *
+ * Ported to new API by Paul Mundt <lethal@linux-sh.org>
+ *
+ * Highly leveraged from pci-bigsur.c, written by Dustin McIntire.
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * PCI initialization for the SnapGear boards
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include "pci-sh4.h"
+
+int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
+{
+	int irq = -1;
+
+	switch (slot) {
+	case 8:  /* the PCI bridge */ break;
+	case 11: irq = 8;  break; /* USB    */
+	case 12: irq = 11; break; /* PCMCIA */
+	case 13: irq = 5;  break; /* eth0   */
+	case 14: irq = 8;  break; /* eth1   */
+	case 15: irq = 11; break; /* safenet (unused) */
+	}
+
+	printk("PCI: Mapping SnapGear IRQ for slot %d, pin %c to irq %d\n",
+	       slot, pin - 1 + 'A', irq);
+
+	return irq;
+}
diff --git a/arch/sh/drivers/pci/fixups-titan.c b/arch/sh/drivers/pci/fixups-titan.c
new file mode 100644
index 0000000..3a79fa8
--- /dev/null
+++ b/arch/sh/drivers/pci/fixups-titan.c
@@ -0,0 +1,38 @@
+/*
+ * arch/sh/drivers/pci/ops-titan.c
+ *
+ * Ported to new API by Paul Mundt <lethal@linux-sh.org>
+ *
+ * Modified from ops-snapgear.c written by  David McCullough
+ * Highly leveraged from pci-bigsur.c, written by Dustin McIntire.
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * PCI initialization for the Titan boards
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <mach/titan.h>
+#include "pci-sh4.h"
+
+static char titan_irq_tab[] __initdata = {
+	TITAN_IRQ_WAN,
+	TITAN_IRQ_LAN,
+	TITAN_IRQ_MPCIA,
+	TITAN_IRQ_MPCIB,
+	TITAN_IRQ_USB,
+};
+
+int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
+{
+	int irq = titan_irq_tab[slot];
+
+	printk("PCI: Mapping TITAN IRQ for slot %d, pin %c to irq %d\n",
+		slot, pin - 1 + 'A', irq);
+
+	return irq;
+}
diff --git a/arch/sh/drivers/pci/ops-dreamcast.c b/arch/sh/drivers/pci/ops-dreamcast.c
index f5d2a2a..e83d0d3 100644
--- a/arch/sh/drivers/pci/ops-dreamcast.c
+++ b/arch/sh/drivers/pci/ops-dreamcast.c
@@ -1,15 +1,9 @@
 /*
- * arch/sh/drivers/pci/ops-dreamcast.c
- *
  * PCI operations for the Sega Dreamcast
  *
  * Copyright (C) 2001, 2002  M. R. Brown
  * Copyright (C) 2002, 2003  Paul Mundt
  *
- * This file originally bore the message (with enclosed-$):
- *	Id: pci.c,v 1.3 2003/05/04 19:29:46 lethal Exp
- *	Dreamcast PCI: Supports SEGA Broadband Adaptor only.
- *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
@@ -23,34 +17,10 @@
 #include <linux/irq.h>
 #include <linux/pci.h>
 #include <linux/module.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
+#include <linux/io.h>
+#include <linux/irq.h>
 #include <mach/pci.h>
 
-static struct resource gapspci_io_resource = {
-	.name	= "GAPSPCI IO",
-	.start	= GAPSPCI_BBA_CONFIG,
-	.end	= GAPSPCI_BBA_CONFIG + GAPSPCI_BBA_CONFIG_SIZE - 1,
-	.flags	= IORESOURCE_IO,
-};
-
-static struct resource gapspci_mem_resource = {
-	.name	= "GAPSPCI mem",
-	.start	= GAPSPCI_DMA_BASE,
-	.end	= GAPSPCI_DMA_BASE + GAPSPCI_DMA_SIZE - 1,
-	.flags	= IORESOURCE_MEM,
-};
-
-static struct pci_ops gapspci_pci_ops;
-
-struct pci_channel board_pci_channels[] = {
-	{ &gapspci_pci_ops, &gapspci_io_resource,
-	  &gapspci_mem_resource, 0, 1 },
-	{ 0, }
-};
-EXPORT_SYMBOL(board_pci_channels);
-
 /*
  * The !gapspci_config_access case really shouldn't happen, ever, unless
  * someone implicitly messes around with the last devfn value.. otherwise we
@@ -85,10 +55,10 @@
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	switch (size) {
-		case 1: *val = inb(GAPSPCI_BBA_CONFIG+where); break;
-		case 2: *val = inw(GAPSPCI_BBA_CONFIG+where); break;
-		case 4: *val = inl(GAPSPCI_BBA_CONFIG+where); break;
-	}	
+	case 1: *val = inb(GAPSPCI_BBA_CONFIG+where); break;
+	case 2: *val = inw(GAPSPCI_BBA_CONFIG+where); break;
+	case 4: *val = inl(GAPSPCI_BBA_CONFIG+where); break;
+	}
 
         return PCIBIOS_SUCCESSFUL;
 }
@@ -99,72 +69,15 @@
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	switch (size) {
-		case 1: outb(( u8)val, GAPSPCI_BBA_CONFIG+where); break;
-		case 2: outw((u16)val, GAPSPCI_BBA_CONFIG+where); break;
-		case 4: outl((u32)val, GAPSPCI_BBA_CONFIG+where); break;
+	case 1: outb(( u8)val, GAPSPCI_BBA_CONFIG+where); break;
+	case 2: outw((u16)val, GAPSPCI_BBA_CONFIG+where); break;
+	case 4: outl((u32)val, GAPSPCI_BBA_CONFIG+where); break;
 	}
 
         return PCIBIOS_SUCCESSFUL;
 }
 
-static struct pci_ops gapspci_pci_ops = {
+struct pci_ops gapspci_pci_ops = {
 	.read	= gapspci_read,
 	.write	= gapspci_write,
 };
-
-/*
- * gapspci init
- */
-
-int __init gapspci_init(void)
-{
-	char idbuf[16];
-	int i;
-
-	/*
-	 * FIXME: All of this wants documenting to some degree,
-	 * even some basic register definitions would be nice.
-	 *
-	 * I haven't seen anything this ugly since.. maple.
-	 */
-
-	for (i=0; i<16; i++)
-		idbuf[i] = inb(GAPSPCI_REGS+i);
-
-	if (strncmp(idbuf, "GAPSPCI_BRIDGE_2", 16))
-		return -ENODEV;
-
-	outl(0x5a14a501, GAPSPCI_REGS+0x18);
-
-	for (i=0; i<1000000; i++)
-		;
-
-	if (inl(GAPSPCI_REGS+0x18) != 1)
-		return -EINVAL;
-
-	outl(0x01000000, GAPSPCI_REGS+0x20);
-	outl(0x01000000, GAPSPCI_REGS+0x24);
-
-	outl(GAPSPCI_DMA_BASE, GAPSPCI_REGS+0x28);
-	outl(GAPSPCI_DMA_BASE+GAPSPCI_DMA_SIZE, GAPSPCI_REGS+0x2c);
-
-	outl(1, GAPSPCI_REGS+0x14);
-	outl(1, GAPSPCI_REGS+0x34);
-
-	/* Setting Broadband Adapter */
-	outw(0xf900, GAPSPCI_BBA_CONFIG+0x06);
-	outl(0x00000000, GAPSPCI_BBA_CONFIG+0x30);
-	outb(0x00, GAPSPCI_BBA_CONFIG+0x3c);
-	outb(0xf0, GAPSPCI_BBA_CONFIG+0x0d);
-	outw(0x0006, GAPSPCI_BBA_CONFIG+0x04);
-	outl(0x00002001, GAPSPCI_BBA_CONFIG+0x10);
-	outl(0x01000000, GAPSPCI_BBA_CONFIG+0x14);
-
-	return 0;
-}
-
-/* Haven't done anything here as yet */
-char * __devinit pcibios_setup(char *str)
-{
-	return str;
-}
diff --git a/arch/sh/drivers/pci/ops-landisk.c b/arch/sh/drivers/pci/ops-landisk.c
deleted file mode 100644
index bff09ec..0000000
--- a/arch/sh/drivers/pci/ops-landisk.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * arch/sh/drivers/pci/ops-landisk.c
- *
- * PCI initialization for the I-O DATA Device, Inc. LANDISK board
- *
- * Copyright (C) 2006 kogiidena
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include "pci-sh4.h"
-
-static struct resource sh7751_io_resource = {
-	.name = "SH7751 IO",
-	.start = SH7751_PCI_IO_BASE,
-	.end = SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1,
-	.flags = IORESOURCE_IO
-};
-
-static struct resource sh7751_mem_resource = {
-	.name = "SH7751 mem",
-	.start = SH7751_PCI_MEMORY_BASE,
-	.end = SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1,
-	.flags = IORESOURCE_MEM
-};
-
-struct pci_channel board_pci_channels[] = {
-	{&sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0x3ff},
-	{NULL, NULL, NULL, 0, 0},
-};
-
-static struct sh4_pci_address_map sh7751_pci_map = {
-	.window0 = {
-		.base	= SH7751_CS3_BASE_ADDR,
-		.size	= (64 << 20),	/* 64MB */
-	},
-
-	.flags = SH4_PCIC_NO_RESET,
-};
-
-int __init pcibios_init_platform(void)
-{
-	return sh7751_pcic_init(&sh7751_pci_map);
-}
-
-int pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-	/*
-	 * slot0: pin1-4 = irq5,6,7,8
-	 * slot1: pin1-4 = irq6,7,8,5
-	 * slot2: pin1-4 = irq7,8,5,6
-	 * slot3: pin1-4 = irq8,5,6,7
-	 */
-	int irq = ((slot + pin - 1) & 0x3) + 5;
-
-	if ((slot | (pin - 1)) > 0x3) {
-		printk("PCI: Bad IRQ mapping request for slot %d pin %c\n",
-		       slot, pin - 1 + 'A');
-		return -1;
-	}
-	return irq;
-}
diff --git a/arch/sh/drivers/pci/ops-lboxre2.c b/arch/sh/drivers/pci/ops-lboxre2.c
deleted file mode 100644
index 86c0b6f..0000000
--- a/arch/sh/drivers/pci/ops-lboxre2.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * linux/arch/sh/drivers/pci/ops-lboxre2.c
- *
- * Copyright (C) 2007 Nobuhiro Iwamatsu
- *
- * PCI initialization for the NTT COMWARE L-BOX RE2
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/io.h>
-#include <mach/lboxre2.h>
-#include "pci-sh4.h"
-
-static char lboxre2_irq_tab[] __initdata = {
-	IRQ_ETH0, IRQ_ETH1, IRQ_INTA, IRQ_INTD,
-};
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-	return lboxre2_irq_tab[slot];
-}
-
-static struct resource sh7751_io_resource = {
-	.name	= "SH7751_IO",
-	.start	= SH7751_PCI_IO_BASE ,
-	.end	= SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1,
-	.flags	= IORESOURCE_IO
-};
-
-static struct resource sh7751_mem_resource = {
-	.name	= "SH7751_mem",
-	.start	= SH7751_PCI_MEMORY_BASE,
-	.end	= SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1,
-	.flags	= IORESOURCE_MEM
-};
-
-extern struct pci_ops sh7751_pci_ops;
-
-struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-
-EXPORT_SYMBOL(board_pci_channels);
-
-static struct sh4_pci_address_map sh7751_pci_map = {
-	.window0	= {
-		.base	= SH7751_CS3_BASE_ADDR,
-		.size	= 0x04000000,
-	},
-	.window1	= {
-		.base	= 0x00000000,	/* Unused */
-		.size	= 0x00000000,	/* Unused */
-	},
-	.flags	= SH4_PCIC_NO_RESET,
-};
-
-int __init pcibios_init_platform(void)
-{
-	return sh7751_pcic_init(&sh7751_pci_map);
-}
diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c
deleted file mode 100644
index 8555238..0000000
--- a/arch/sh/drivers/pci/ops-r7780rp.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Author:  Ian DaSilva (idasilva@mvista.com)
- *
- * Highly leveraged from pci-bigsur.c, written by Dustin McIntire.
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- *
- * PCI initialization for the Renesas SH7780 Highlander R7780RP-1 board
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <mach/highlander.h>
-#include <asm/io.h>
-#include "pci-sh4.h"
-
-static char irq_tab[] __initdata = {
-	65, 66, 67, 68,
-};
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-	return irq_tab[slot];
-}
-
-static struct resource sh7780_io_resource = {
-	.name	= "SH7780_IO",
-	.start	= SH7780_PCI_IO_BASE,
-	.end	= SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1,
-	.flags	= IORESOURCE_IO
-};
-
-static struct resource sh7780_mem_resource = {
-	.name	= "SH7780_mem",
-	.start	= SH7780_PCI_MEMORY_BASE,
-	.end	= SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1,
-	.flags	= IORESOURCE_MEM
-};
-
-extern struct pci_ops sh7780_pci_ops;
-
-struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &sh7780_io_resource, &sh7780_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-EXPORT_SYMBOL(board_pci_channels);
-
-static struct sh4_pci_address_map sh7780_pci_map = {
-	.window0	= {
-		.base	= SH7780_CS2_BASE_ADDR,
-		.size	= 0x04000000,
-	},
-
-	.window1	= {
-		.base	= SH7780_CS3_BASE_ADDR,
-		.size	= 0x04000000,
-	},
-
-	.flags	= SH4_PCIC_NO_RESET,
-};
-
-int __init pcibios_init_platform(void)
-{
-	return sh7780_pcic_init(&sh7780_pci_map);
-}
diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c
deleted file mode 100644
index d6ca74b..0000000
--- a/arch/sh/drivers/pci/ops-rts7751r2d.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * linux/arch/sh/drivers/pci/ops-rts7751r2d.c
- *
- * Author:  Ian DaSilva (idasilva@mvista.com)
- *
- * Highly leveraged from pci-bigsur.c, written by Dustin McIntire.
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- *
- * PCI initialization for the Renesas SH7751R RTS7751R2D board
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/io.h>
-#include <mach/r2d.h>
-#include "pci-sh4.h"
-
-static u8 rts7751r2d_irq_tab[] __initdata = {
-	IRQ_PCI_INTA,
-	IRQ_PCI_INTB,
-	IRQ_PCI_INTC,
-	IRQ_PCI_INTD,
-};
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-	return rts7751r2d_irq_tab[slot];
-}
-
-static struct resource sh7751_io_resource = {
-	.name	= "SH7751_IO",
-	.start	= 0x4000,
-	.end	= SH7751_PCI_IO_SIZE - 1,
-	.flags	= IORESOURCE_IO
-};
-
-static struct resource sh7751_mem_resource = {
-	.name	= "SH7751_mem",
-	.start	= SH7751_PCI_MEMORY_BASE,
-	.end	= SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1,
-	.flags	= IORESOURCE_MEM
-};
-
-extern struct pci_ops sh7751_pci_ops;
-
-struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-EXPORT_SYMBOL(board_pci_channels);
-
-static struct sh4_pci_address_map sh7751_pci_map = {
-	.window0	= {
-		.base	= SH7751_CS3_BASE_ADDR,
-		.size	= 0x04000000,
-	},
-
-	.window1	= {
-		.base	= 0x00000000,	/* Unused */
-		.size	= 0x00000000,	/* Unused */
-	},
-
-	.flags	= SH4_PCIC_NO_RESET,
-};
-
-int __init pcibios_init_platform(void)
-{
-	__set_io_port_base(SH7751_PCI_IO_BASE);
-	return sh7751_pcic_init(&sh7751_pci_map);
-}
-
diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c
deleted file mode 100644
index 4dcc641..0000000
--- a/arch/sh/drivers/pci/ops-sdk7780.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * linux/arch/sh/drivers/pci/ops-sdk7780.c
- *
- * Copyright (C) 2006  Nobuhiro Iwamatsu
- *
- * PCI initialization for the SDK7780SE03
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <mach/sdk7780.h>
-#include <asm/io.h>
-#include "pci-sh4.h"
-
-/* IDSEL [16][17][18][19][20][21][22][23][24][25][26][27][28][29][30][31] */
-static char sdk7780_irq_tab[4][16] __initdata = {
-	/* INTA */
-	{ 65, 68, 67, 68, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-	/* INTB */
-	{ 66, 65, -1, 65, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-	/* INTC */
-	{ 67, 66, -1, 66, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-	/* INTD */
-	{ 68, 67, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-};
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-       return sdk7780_irq_tab[pin-1][slot];
-}
-
-static struct resource sdk7780_io_resource = {
-	.name	= "SH7780_IO",
-	.start	= SH7780_PCI_IO_BASE,
-	.end	= SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1,
-	.flags	= IORESOURCE_IO
-};
-
-static struct resource sdk7780_mem_resource = {
-	.name	= "SH7780_mem",
-	.start	= SH7780_PCI_MEMORY_BASE,
-	.end	= SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1,
-	.flags	= IORESOURCE_MEM
-};
-
-struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &sdk7780_io_resource, &sdk7780_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-EXPORT_SYMBOL(board_pci_channels);
-
-static struct sh4_pci_address_map sdk7780_pci_map = {
-	.window0	= {
-		.base	= SH7780_CS2_BASE_ADDR,
-		.size	= 0x04000000,
-	},
-	.window1	= {
-		.base	= SH7780_CS3_BASE_ADDR,
-		.size	= 0x04000000,
-	},
-	.flags	= SH4_PCIC_NO_RESET,
-};
-
-int __init pcibios_init_platform(void)
-{
-	printk(KERN_INFO "SH7780 PCI: Finished initializing PCI controller\n");
-	return sh7780_pcic_init(&sdk7780_pci_map);
-}
diff --git a/arch/sh/drivers/pci/ops-se7780.c b/arch/sh/drivers/pci/ops-se7780.c
deleted file mode 100644
index 3145c62..0000000
--- a/arch/sh/drivers/pci/ops-se7780.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * linux/arch/sh/drivers/pci/ops-se7780.c
- *
- * Copyright (C) 2006  Nobuhiro Iwamatsu
- *
- * PCI initialization for the Hitachi UL Solution Engine 7780SE03
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <mach-se/mach/se7780.h>
-#include <asm/io.h>
-#include "pci-sh4.h"
-
-/*
- * IDSEL = AD16  PCI slot
- * IDSEL = AD17  PCI slot
- * IDSEL = AD18  Serial ATA Controller (Silicon Image SiL3512A)
- * IDSEL = AD19  USB Host Controller (NEC uPD7210100A)
- */
-
-/* IDSEL [16][17][18][19][20][21][22][23][24][25][26][27][28][29][30][31] */
-static char se7780_irq_tab[4][16] __initdata = {
-	/* INTA */
-	{ 65, 68, 67, 68, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-	/* INTB */
-	{ 66, 65, -1, 65, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-	/* INTC */
-	{ 67, 66, -1, 66, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-	/* INTD */
-	{ 68, 67, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-};
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-       return se7780_irq_tab[pin-1][slot];
-}
-
-static struct resource se7780_io_resource = {
-	.name	= "SH7780_IO",
-	.start	= SH7780_PCI_IO_BASE,
-	.end	= SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1,
-	.flags	= IORESOURCE_IO
-};
-
-static struct resource se7780_mem_resource = {
-	.name	= "SH7780_mem",
-	.start	= SH7780_PCI_MEMORY_BASE,
-	.end	= SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1,
-	.flags	= IORESOURCE_MEM
-};
-
-extern struct pci_ops se7780_pci_ops;
-
-struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &se7780_io_resource, &se7780_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-EXPORT_SYMBOL(board_pci_channels);
-
-static struct sh4_pci_address_map se7780_pci_map = {
-	.window0	= {
-		.base	= SH7780_CS2_BASE_ADDR,
-		.size	= 0x04000000,
-	},
-	.flags  = SH4_PCIC_NO_RESET,
-};
-
-int __init pcibios_init_platform(void)
-{
-	printk("SH7780 PCI: Finished initialization of the PCI controller\n");
-
-	/*
-	 * FPGA PCISEL register initialize
-	 *
-	 *  CPU  || SLOT1 | SLOT2 | S-ATA | USB
-	 *  -------------------------------------
-	 *  INTA || INTA  | INTD  |  --   | INTB
-	 *  -------------------------------------
-	 *  INTB || INTB  | INTA  |  --   | INTC
-	 *  -------------------------------------
-	 *  INTC || INTC  | INTB  | INTA  |  --
-	 *  -------------------------------------
-	 *  INTD || INTD  | INTC  |  --   | INTA
-	 *  -------------------------------------
-	 */
-	ctrl_outw(0x0013, FPGA_PCI_INTSEL1);
-	ctrl_outw(0xE402, FPGA_PCI_INTSEL2);
-
-	return sh7780_pcic_init(&se7780_pci_map);
-}
diff --git a/arch/sh/drivers/pci/ops-sh03.c b/arch/sh/drivers/pci/ops-sh03.c
deleted file mode 100644
index e1703ff..0000000
--- a/arch/sh/drivers/pci/ops-sh03.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * linux/arch/sh/drivers/pci/ops-sh03.c
- *
- * PCI initialization for the Interface CTP/PCI-SH03 board
- */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <asm/io.h>
-#include "pci-sh7751.h"
-
-/*
- * Description:  This function sets up and initializes the pcic, sets
- * up the BARS, maps the DRAM into the address space etc, etc.
- */
-int __init pcibios_init_platform(void)
-{
-	__set_io_port_base(SH7751_PCI_IO_BASE);
-	return 1;
-}
-
-static struct resource sh7751_io_resource = {
-	.name   = "SH03 IO",
-	.start  = SH7751_PCI_IO_BASE,
-	.end    = SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1,
-	.flags  = IORESOURCE_IO
-};
-
-static struct resource sh7751_mem_resource = {
-	.name   = "SH03 mem",
-	.start  = SH7751_PCI_MEMORY_BASE,
-	.end    = SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1,
-	.flags  = IORESOURCE_MEM
-};
-
-extern struct pci_ops sh4_pci_ops;
-
-struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-
diff --git a/arch/sh/drivers/pci/ops-sh4.c b/arch/sh/drivers/pci/ops-sh4.c
index 710a3b0..78bebeb 100644
--- a/arch/sh/drivers/pci/ops-sh4.c
+++ b/arch/sh/drivers/pci/ops-sh4.c
@@ -1,22 +1,22 @@
 /*
  * Generic SH-4 / SH-4A PCIC operations (SH7751, SH7780).
  *
- * Copyright (C) 2002 - 2006  Paul Mundt
+ * Copyright (C) 2002 - 2009  Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License v2. See the file "COPYING" in the main directory of this archive
  * for more details.
  */
 #include <linux/pci.h>
+#include <linux/io.h>
 #include <asm/addrspace.h>
-#include <asm/io.h>
 #include "pci-sh4.h"
 
 /*
  * Direct access to PCI hardware...
  */
 #define CONFIG_CMD(bus, devfn, where) \
-	P1SEGADDR((bus->number << 16) | (devfn << 8) | (where & ~3))
+	(P1SEG | (bus->number << 16) | (devfn << 8) | (where & ~3))
 
 static DEFINE_SPINLOCK(sh4_pci_lock);
 
@@ -26,6 +26,7 @@
 static int sh4_pci_read(struct pci_bus *bus, unsigned int devfn,
 			   int where, int size, u32 *val)
 {
+	struct pci_channel *chan = bus->sysdata;
 	unsigned long flags;
 	u32 data;
 
@@ -34,8 +35,8 @@
 	 * so we must do byte alignment by hand
 	 */
 	spin_lock_irqsave(&sh4_pci_lock, flags);
-	pci_write_reg(CONFIG_CMD(bus, devfn, where), SH4_PCIPAR);
-	data = pci_read_reg(SH4_PCIPDR);
+	pci_write_reg(chan, CONFIG_CMD(bus, devfn, where), SH4_PCIPAR);
+	data = pci_read_reg(chan, SH4_PCIPDR);
 	spin_unlock_irqrestore(&sh4_pci_lock, flags);
 
 	switch (size) {
@@ -63,13 +64,14 @@
 static int sh4_pci_write(struct pci_bus *bus, unsigned int devfn,
 			 int where, int size, u32 val)
 {
+	struct pci_channel *chan = bus->sysdata;
 	unsigned long flags;
 	int shift;
 	u32 data;
 
 	spin_lock_irqsave(&sh4_pci_lock, flags);
-	pci_write_reg(CONFIG_CMD(bus, devfn, where), SH4_PCIPAR);
-	data = pci_read_reg(SH4_PCIPDR);
+	pci_write_reg(chan, CONFIG_CMD(bus, devfn, where), SH4_PCIPAR);
+	data = pci_read_reg(chan, SH4_PCIPDR);
 	spin_unlock_irqrestore(&sh4_pci_lock, flags);
 
 	switch (size) {
@@ -90,7 +92,7 @@
 		return PCIBIOS_FUNC_NOT_SUPPORTED;
 	}
 
-	pci_write_reg(data, SH4_PCIPDR);
+	pci_write_reg(chan, data, SH4_PCIPDR);
 
 	return PCIBIOS_SUCCESSFUL;
 }
@@ -104,66 +106,31 @@
  * Not really related to pci_ops, but it's common and not worth shoving
  * somewhere else for now..
  */
-static unsigned int pci_probe = PCI_PROBE_CONF1;
-
-int __init sh4_pci_check_direct(void)
+int __init sh4_pci_check_direct(struct pci_channel *chan)
 {
 	/*
 	 * Check if configuration works.
 	 */
-	if (pci_probe & PCI_PROBE_CONF1) {
-		unsigned int tmp = pci_read_reg(SH4_PCIPAR);
+	unsigned int tmp = pci_read_reg(chan, SH4_PCIPAR);
 
-		pci_write_reg(P1SEG, SH4_PCIPAR);
+	pci_write_reg(chan, P1SEG, SH4_PCIPAR);
 
-		if (pci_read_reg(SH4_PCIPAR) == P1SEG) {
-			pci_write_reg(tmp, SH4_PCIPAR);
-			printk(KERN_INFO "PCI: Using configuration type 1\n");
-			request_region(PCI_REG(SH4_PCIPAR), 8, "PCI conf1");
-
-			return 0;
-		}
-
-		pci_write_reg(tmp, SH4_PCIPAR);
+	if (pci_read_reg(chan, SH4_PCIPAR) == P1SEG) {
+		pci_write_reg(chan, tmp, SH4_PCIPAR);
+		printk(KERN_INFO "PCI: Using configuration type 1\n");
+		request_region(chan->reg_base + SH4_PCIPAR, 8,
+			       "PCI conf1");
+		return 0;
 	}
 
-	pr_debug("PCI: pci_check_direct failed\n");
+	pci_write_reg(chan, tmp, SH4_PCIPAR);
+
+	printk(KERN_ERR "PCI: %s failed\n", __func__);
+
 	return -EINVAL;
 }
 
-/* Handle generic fixups */
-static void __init pci_fixup_ide_bases(struct pci_dev *d)
-{
-	int i;
-
-	/*
-	 * PCI IDE controllers use non-standard I/O port decoding, respect it.
-	 */
-	if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE)
-		return;
-	pr_debug("PCI: IDE base address fixup for %s\n", pci_name(d));
-	for(i = 0; i < 4; i++) {
-		struct resource *r = &d->resource[i];
-
-		if ((r->start & ~0x80) == 0x374) {
-			r->start |= 2;
-			r->end = r->start;
-		}
-	}
-}
-DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases);
-
-char * __devinit pcibios_setup(char *str)
-{
-	if (!strcmp(str, "off")) {
-		pci_probe = 0;
-		return NULL;
-	}
-
-	return str;
-}
-
-int __attribute__((weak)) pci_fixup_pcic(void)
+int __attribute__((weak)) pci_fixup_pcic(struct pci_channel *chan)
 {
 	/* Nothing to do. */
 	return 0;
diff --git a/arch/sh/drivers/pci/ops-sh5.c b/arch/sh/drivers/pci/ops-sh5.c
index 729e38a..4ce95a0 100644
--- a/arch/sh/drivers/pci/ops-sh5.c
+++ b/arch/sh/drivers/pci/ops-sh5.c
@@ -22,31 +22,6 @@
 #include <asm/io.h>
 #include "pci-sh5.h"
 
-static void __init pci_fixup_ide_bases(struct pci_dev *d)
-{
-	int i;
-
-	/*
-	 * PCI IDE controllers use non-standard I/O port decoding, respect it.
-	 */
-	if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE)
-		return;
-	printk("PCI: IDE base address fixup for %s\n", pci_name(d));
-	for(i=0; i<4; i++) {
-		struct resource *r = &d->resource[i];
-		if ((r->start & ~0x80) == 0x374) {
-			r->start |= 2;
-			r->end = r->start;
-		}
-	}
-}
-DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases);
-
-char * __devinit pcibios_setup(char *str)
-{
-	return str;
-}
-
 static int sh5pci_read(struct pci_bus *bus, unsigned int devfn, int where,
 			int size, u32 *val)
 {
diff --git a/arch/sh/drivers/pci/ops-sh7785lcr.c b/arch/sh/drivers/pci/ops-sh7785lcr.c
deleted file mode 100644
index fb0869f..0000000
--- a/arch/sh/drivers/pci/ops-sh7785lcr.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Author:  Ian DaSilva (idasilva@mvista.com)
- *
- * Highly leveraged from pci-bigsur.c, written by Dustin McIntire.
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- *
- * PCI initialization for the Renesas R0P7785LC0011RL board
- * Based on arch/sh/drivers/pci/ops-r7780rp.c
- *
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include "pci-sh4.h"
-
-static char irq_tab[] __initdata = {
-	65, 66, 67, 68,
-};
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-	return irq_tab[slot];
-}
-
-static struct resource sh7785_io_resource = {
-	.name	= "SH7785_IO",
-	.start	= SH7780_PCI_IO_BASE,
-	.end	= SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1,
-	.flags	= IORESOURCE_IO
-};
-
-static struct resource sh7785_mem_resource = {
-	.name	= "SH7785_mem",
-	.start	= SH7780_PCI_MEMORY_BASE,
-	.end	= SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1,
-	.flags	= IORESOURCE_MEM
-};
-
-struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &sh7785_io_resource, &sh7785_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-EXPORT_SYMBOL(board_pci_channels);
-
-static struct sh4_pci_address_map sh7785_pci_map = {
-	.window0	= {
-#if defined(CONFIG_32BIT)
-		.base	= SH7780_32BIT_DDR_BASE_ADDR,
-		.size	= 0x40000000,
-#else
-		.base	= SH7780_CS0_BASE_ADDR,
-		.size	= 0x20000000,
-#endif
-	},
-
-	.flags	= SH4_PCIC_NO_RESET,
-};
-
-int __init pcibios_init_platform(void)
-{
-	return sh7780_pcic_init(&sh7785_pci_map);
-}
diff --git a/arch/sh/drivers/pci/ops-snapgear.c b/arch/sh/drivers/pci/ops-snapgear.c
deleted file mode 100644
index 53dd893..0000000
--- a/arch/sh/drivers/pci/ops-snapgear.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * arch/sh/drivers/pci/ops-snapgear.c
- *
- * Author:  David McCullough <davidm@snapgear.com>
- *
- * Ported to new API by Paul Mundt <lethal@linux-sh.org>
- *
- * Highly leveraged from pci-bigsur.c, written by Dustin McIntire.
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- *
- * PCI initialization for the SnapGear boards
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include "pci-sh4.h"
-
-#define SNAPGEAR_PCI_IO		0x4000
-#define SNAPGEAR_PCI_MEM	0xfd000000
-
-/* PCI: default LOCAL memory window sizes (seen from PCI bus) */
-#define SNAPGEAR_LSR0_SIZE    (64*(1<<20)) //64MB
-#define SNAPGEAR_LSR1_SIZE    (64*(1<<20)) //64MB
-
-static struct resource sh7751_io_resource = {
-	.name		= "SH7751 IO",
-	.start		= SNAPGEAR_PCI_IO,
-	.end		= SNAPGEAR_PCI_IO + (64*1024) - 1, /* 64KiB I/O */
-	.flags		= IORESOURCE_IO,
-};
-
-static struct resource sh7751_mem_resource = {
-	.name		= "SH7751 mem",
-	.start		= SNAPGEAR_PCI_MEM,
-	.end		= SNAPGEAR_PCI_MEM + (64*1024*1024) - 1, /* 64MiB mem */
-	.flags		= IORESOURCE_MEM,
-};
-
-struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
-	{ 0, }
-};
-
-static struct sh4_pci_address_map sh7751_pci_map = {
-	.window0	= {
-		.base	= SH7751_CS2_BASE_ADDR,
-		.size	= SNAPGEAR_LSR0_SIZE,
-	},
-
-	.window1	= {
-		.base	= SH7751_CS2_BASE_ADDR,
-		.size	= SNAPGEAR_LSR1_SIZE,
-	},
-
-	.flags	= SH4_PCIC_NO_RESET,
-};
-
-/*
- * Initialize the SnapGear PCI interface
- * Setup hardware to be Central Funtion
- * Copy the BSR regs to the PCI interface
- * Setup PCI windows into local RAM
- */
-int __init pcibios_init_platform(void)
-{
-	return sh7751_pcic_init(&sh7751_pci_map);
-}
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-	int irq = -1;
-
-	switch (slot) {
-	case 8:  /* the PCI bridge */ break;
-	case 11: irq = 8;  break; /* USB    */
-	case 12: irq = 11; break; /* PCMCIA */
-	case 13: irq = 5;  break; /* eth0   */
-	case 14: irq = 8;  break; /* eth1   */
-	case 15: irq = 11; break; /* safenet (unused) */
-	}
-
-	printk("PCI: Mapping SnapGear IRQ for slot %d, pin %c to irq %d\n",
-	       slot, pin - 1 + 'A', irq);
-
-	return irq;
-}
-
-void __init pcibios_fixup(void)
-{
-	/* Nothing to fixup .. */
-}
diff --git a/arch/sh/drivers/pci/ops-titan.c b/arch/sh/drivers/pci/ops-titan.c
deleted file mode 100644
index a8f7801..0000000
--- a/arch/sh/drivers/pci/ops-titan.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * arch/sh/drivers/pci/ops-titan.c
- *
- * Ported to new API by Paul Mundt <lethal@linux-sh.org>
- *
- * Modified from ops-snapgear.c written by  David McCullough
- * Highly leveraged from pci-bigsur.c, written by Dustin McIntire.
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- *
- * PCI initialization for the Titan boards
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/io.h>
-#include <mach/titan.h>
-#include "pci-sh4.h"
-
-static char titan_irq_tab[] __initdata = {
-	TITAN_IRQ_WAN,
-	TITAN_IRQ_LAN,
-	TITAN_IRQ_MPCIA,
-	TITAN_IRQ_MPCIB,
-	TITAN_IRQ_USB,
-};
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-	int irq = titan_irq_tab[slot];
-
-	printk("PCI: Mapping TITAN IRQ for slot %d, pin %c to irq %d\n",
-		slot, pin - 1 + 'A', irq);
-
-	return irq;
-}
-
-static struct resource sh7751_io_resource = {
-	.name	= "SH7751_IO",
-	.start	= SH7751_PCI_IO_BASE,
-	.end	= SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1,
-	.flags	= IORESOURCE_IO
-};
-
-static struct resource sh7751_mem_resource = {
-	.name	= "SH7751_mem",
-	.start	= SH7751_PCI_MEMORY_BASE,
-	.end	= SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1,
-	.flags	= IORESOURCE_MEM
-};
-
-struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-EXPORT_SYMBOL(board_pci_channels);
-
-static struct sh4_pci_address_map sh7751_pci_map = {
-	.window0	= {
-		.base	= SH7751_CS2_BASE_ADDR,
-		.size	= SH7751_MEM_REGION_SIZE*2,	/* cs2 and cs3 */
-	},
-
-	.window1	= {
-		.base	= SH7751_CS2_BASE_ADDR,
-		.size	= SH7751_MEM_REGION_SIZE*2,
-	},
-
-	.flags	= SH4_PCIC_NO_RESET,
-};
-
-int __init pcibios_init_platform(void)
-{
-	return sh7751_pcic_init(&sh7751_pci_map);
-}
diff --git a/arch/sh/drivers/pci/pci-auto.c b/arch/sh/drivers/pci/pci-auto.c
deleted file mode 100644
index cf48b12..0000000
--- a/arch/sh/drivers/pci/pci-auto.c
+++ /dev/null
@@ -1,545 +0,0 @@
-/*
- * PCI autoconfiguration library
- *
- * Author: Matt Porter <mporter@mvista.com>
- *
- * Copyright 2000, 2001 MontaVista Software Inc.
- * Copyright 2001 Bradley D. LaRonde <brad@ltc.com>
- * Copyright 2003 Paul Mundt <lethal@linux-sh.org>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-/*
- * Modified for MIPS by Jun Sun, jsun@mvista.com
- *
- * . Simplify the interface between pci_auto and the rest: a single function.
- * . Assign resources from low address to upper address.
- * . change most int to u32.
- *
- * Further modified to include it as mips generic code, ppopov@mvista.com.
- *
- * 2001-10-26  Bradley D. LaRonde <brad@ltc.com>
- * - Add a top_bus argument to the "early config" functions so that
- *   they can set a fake parent bus pointer to convince the underlying
- *   pci ops to use type 1 configuration for sub busses.
- * - Set bridge base and limit registers correctly.
- * - Align io and memory base properly before and after bridge setup.
- * - Don't fall through to pci_setup_bars for bridge.
- * - Reformat the debug output to look more like lspci's output.
- *
- * Cloned for SuperH by M. R. Brown, mrbrown@0xd6.org
- *
- * 2003-08-05  Paul Mundt <lethal@linux-sh.org>
- * - Don't update the BAR values on systems that already have valid addresses
- *   and don't want these updated for whatever reason, by way of a new config
- *   option check. However, we still read in the old BAR values so that they
- *   can still be reported through the debug output.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/pci.h>
-
-#define	DEBUG
-#ifdef	DEBUG
-#define	DBG(x...)	printk(x)
-#else
-#define	DBG(x...)
-#endif
-
-/*
- * These functions are used early on before PCI scanning is done
- * and all of the pci_dev and pci_bus structures have been created.
- */
-static struct pci_dev *fake_pci_dev(struct pci_channel *hose,
-	int top_bus, int busnr, int devfn)
-{
-	static struct pci_dev dev;
-	static struct pci_bus bus;
-
-	dev.bus = &bus;
-	dev.sysdata = hose;
-	dev.devfn = devfn;
-	bus.number = busnr;
-	bus.ops = hose->pci_ops;
-
-	if(busnr != top_bus)
-		/* Fake a parent bus structure. */
-		bus.parent = &bus;
-	else
-		bus.parent = NULL;
-
-	return &dev;
-}
-
-#define EARLY_PCI_OP(rw, size, type)					\
-static int early_##rw##_config_##size(struct pci_channel *hose,		\
-	int top_bus, int bus, int devfn, int offset, type value)	\
-{									\
-	return pci_##rw##_config_##size(				\
-		fake_pci_dev(hose, top_bus, bus, devfn),		\
-		offset, value);						\
-}
-
-EARLY_PCI_OP(read, byte, u8 *)
-EARLY_PCI_OP(read, word, u16 *)
-EARLY_PCI_OP(read, dword, u32 *)
-EARLY_PCI_OP(write, byte, u8)
-EARLY_PCI_OP(write, word, u16)
-EARLY_PCI_OP(write, dword, u32)
-
-static struct resource *io_resource_inuse;
-static struct resource *mem_resource_inuse;
-
-static u32 pciauto_lower_iospc;
-static u32 pciauto_upper_iospc;
-
-static u32 pciauto_lower_memspc;
-static u32 pciauto_upper_memspc;
-
-static void __init
-pciauto_setup_bars(struct pci_channel *hose,
-		   int top_bus,
-		   int current_bus,
-		   int pci_devfn,
-		   int bar_limit)
-{
-	u32 bar_response, bar_size, bar_value;
-	u32 bar, addr_mask, bar_nr = 0;
-	u32 * upper_limit;
-	u32 * lower_limit;
-	int found_mem64 = 0;
-
-	for (bar = PCI_BASE_ADDRESS_0; bar <= bar_limit; bar+=4) {
-		u32 bar_addr;
-
-		/* Read the old BAR value */
-		early_read_config_dword(hose, top_bus,
-					current_bus,
-					pci_devfn,
-					bar,
-					&bar_addr);
-
-		/* Tickle the BAR and get the response */
-		early_write_config_dword(hose, top_bus,
-					 current_bus,
-					 pci_devfn,
-					 bar,
-					 0xffffffff);
-
-		early_read_config_dword(hose, top_bus,
-					current_bus,
-					pci_devfn,
-					bar,
-					&bar_response);
-
-		/*
-		 * Write the old BAR value back out, only update the BAR
-		 * if we implicitly want resources to be updated, which
-		 * is done by the generic code further down. -- PFM.
-		 */
-		early_write_config_dword(hose, top_bus,
-					 current_bus,
-					 pci_devfn,
-					 bar,
-					 bar_addr);
-
-		/* If BAR is not implemented go to the next BAR */
-		if (!bar_response)
-			continue;
-
-		/*
-		 * Workaround for a BAR that doesn't use its upper word,
-		 * like the ALi 1535D+ PCI DC-97 Controller Modem (M5457).
-		 * bdl <brad@ltc.com>
-		 */
-		if (!(bar_response & 0xffff0000))
-			bar_response |= 0xffff0000;
-
-retry:
-		/* Check the BAR type and set our address mask */
-		if (bar_response & PCI_BASE_ADDRESS_SPACE) {
-			addr_mask = PCI_BASE_ADDRESS_IO_MASK;
-			upper_limit = &pciauto_upper_iospc;
-			lower_limit = &pciauto_lower_iospc;
-			DBG("        I/O");
-		} else {
-			if ((bar_response & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
-			    PCI_BASE_ADDRESS_MEM_TYPE_64)
-				found_mem64 = 1;
-
-			addr_mask = PCI_BASE_ADDRESS_MEM_MASK;
-			upper_limit = &pciauto_upper_memspc;
-			lower_limit = &pciauto_lower_memspc;
-			DBG("        Mem");
-		}
-
-
-		/* Calculate requested size */
-		bar_size = ~(bar_response & addr_mask) + 1;
-
-		/* Allocate a base address */
-		bar_value = ((*lower_limit - 1) & ~(bar_size - 1)) + bar_size;
-
-		if ((bar_value + bar_size) > *upper_limit) {
-			if (bar_response & PCI_BASE_ADDRESS_SPACE) {
-				if (io_resource_inuse->child) {
-					io_resource_inuse =
-						io_resource_inuse->child;
-					pciauto_lower_iospc =
-						io_resource_inuse->start;
-					pciauto_upper_iospc =
-						io_resource_inuse->end + 1;
-					goto retry;
-				}
-
-			} else {
-				if (mem_resource_inuse->child) {
-					mem_resource_inuse =
-						mem_resource_inuse->child;
-					pciauto_lower_memspc =
-						mem_resource_inuse->start;
-					pciauto_upper_memspc =
-						mem_resource_inuse->end + 1;
-					goto retry;
-				}
-			}
-			DBG(" unavailable -- skipping, value %x size %x\n",
-					bar_value, bar_size);
-			continue;
-		}
-
-		if (bar_value < *lower_limit || (bar_value + bar_size) >= *upper_limit) {
-			DBG(" unavailable -- skipping, value %x size %x\n",
-					bar_value, bar_size);
-			continue;
-		}
-
-#ifdef CONFIG_PCI_AUTO_UPDATE_RESOURCES
-		/* Write it out and update our limit */
-		early_write_config_dword(hose, top_bus, current_bus, pci_devfn,
-					 bar, bar_value);
-#endif
-
-		*lower_limit = bar_value + bar_size;
-
-		/*
-		 * If we are a 64-bit decoder then increment to the
-		 * upper 32 bits of the bar and force it to locate
-		 * in the lower 4GB of memory.
-		 */
-		if (found_mem64) {
-			bar += 4;
-			early_write_config_dword(hose, top_bus,
-						 current_bus,
-						 pci_devfn,
-						 bar,
-						 0x00000000);
-		}
-
-		DBG(" at 0x%.8x [size=0x%x]\n", bar_value, bar_size);
-
-		bar_nr++;
-	}
-
-}
-
-static void __init
-pciauto_prescan_setup_bridge(struct pci_channel *hose,
-			     int top_bus,
-			     int current_bus,
-			     int pci_devfn,
-			     int sub_bus)
-{
-	/* Configure bus number registers */
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-	                        PCI_PRIMARY_BUS, current_bus);
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-				PCI_SECONDARY_BUS, sub_bus + 1);
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-				PCI_SUBORDINATE_BUS, 0xff);
-
-	/* Align memory and I/O to 1MB and 4KB boundaries. */
-	pciauto_lower_memspc = (pciauto_lower_memspc + (0x100000 - 1))
-		& ~(0x100000 - 1);
-	pciauto_lower_iospc = (pciauto_lower_iospc + (0x1000 - 1))
-		& ~(0x1000 - 1);
-
-	/* Set base (lower limit) of address range behind bridge. */
-	early_write_config_word(hose, top_bus, current_bus, pci_devfn,
-		PCI_MEMORY_BASE, pciauto_lower_memspc >> 16);
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-		PCI_IO_BASE, (pciauto_lower_iospc & 0x0000f000) >> 8);
-	early_write_config_word(hose, top_bus, current_bus, pci_devfn,
-		PCI_IO_BASE_UPPER16, pciauto_lower_iospc >> 16);
-
-	/* We don't support prefetchable memory for now, so disable */
-	early_write_config_word(hose, top_bus, current_bus, pci_devfn,
-				PCI_PREF_MEMORY_BASE, 0);
-	early_write_config_word(hose, top_bus, current_bus, pci_devfn,
-				PCI_PREF_MEMORY_LIMIT, 0);
-}
-
-static void __init
-pciauto_postscan_setup_bridge(struct pci_channel *hose,
-			      int top_bus,
-			      int current_bus,
-			      int pci_devfn,
-			      int sub_bus)
-{
-	u32 temp;
-
-	/*
-	 * [jsun] we always bump up baselines a little, so that if there
-	 * nothing behind P2P bridge, we don't wind up overlapping IO/MEM
-	 * spaces.
-	 */
-	pciauto_lower_memspc += 1;
-	pciauto_lower_iospc += 1;
-
-	/* Configure bus number registers */
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-				PCI_SUBORDINATE_BUS, sub_bus);
-
-	/* Set upper limit of address range behind bridge. */
-	early_write_config_word(hose, top_bus, current_bus, pci_devfn,
-		PCI_MEMORY_LIMIT, pciauto_lower_memspc >> 16);
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-		PCI_IO_LIMIT, (pciauto_lower_iospc & 0x0000f000) >> 8);
-	early_write_config_word(hose, top_bus, current_bus, pci_devfn,
-		PCI_IO_LIMIT_UPPER16, pciauto_lower_iospc >> 16);
-
-	/* Align memory and I/O to 1MB and 4KB boundaries. */
-	pciauto_lower_memspc = (pciauto_lower_memspc + (0x100000 - 1))
-		& ~(0x100000 - 1);
-	pciauto_lower_iospc = (pciauto_lower_iospc + (0x1000 - 1))
-		& ~(0x1000 - 1);
-
-	/* Enable memory and I/O accesses, enable bus master */
-	early_read_config_dword(hose, top_bus, current_bus, pci_devfn,
-		PCI_COMMAND, &temp);
-	early_write_config_dword(hose, top_bus, current_bus, pci_devfn,
-		PCI_COMMAND, temp | PCI_COMMAND_IO | PCI_COMMAND_MEMORY
-		| PCI_COMMAND_MASTER);
-}
-
-static void __init
-pciauto_prescan_setup_cardbus_bridge(struct pci_channel *hose,
-			int top_bus,
-			int current_bus,
-			int pci_devfn,
-			int sub_bus)
-{
-	/* Configure bus number registers */
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-				PCI_PRIMARY_BUS, current_bus);
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-				PCI_SECONDARY_BUS, sub_bus + 1);
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-				PCI_SUBORDINATE_BUS, 0xff);
-
-	/* Align memory and I/O to 4KB and 4 byte boundaries. */
-	pciauto_lower_memspc = (pciauto_lower_memspc + (0x1000 - 1))
-		& ~(0x1000 - 1);
-	pciauto_lower_iospc = (pciauto_lower_iospc + (0x4 - 1))
-		& ~(0x4 - 1);
-
-	early_write_config_dword(hose, top_bus, current_bus, pci_devfn,
-		PCI_CB_MEMORY_BASE_0, pciauto_lower_memspc);
-	early_write_config_dword(hose, top_bus, current_bus, pci_devfn,
-		PCI_CB_IO_BASE_0, pciauto_lower_iospc);
-}
-
-static void __init
-pciauto_postscan_setup_cardbus_bridge(struct pci_channel *hose,
-			int top_bus,
-			int current_bus,
-			int pci_devfn,
-			int sub_bus)
-{
-	u32 temp;
-
-	/*
-	 * [jsun] we always bump up baselines a little, so that if there
-	 * nothing behind P2P bridge, we don't wind up overlapping IO/MEM
-	 * spaces.
-	 */
-	pciauto_lower_memspc += 1;
-	pciauto_lower_iospc += 1;
-
-	/*
-	 * Configure subordinate bus number.  The PCI subsystem
-	 * bus scan will renumber buses (reserving three additional
-	 * for this PCI<->CardBus bridge for the case where a CardBus
-	 * adapter contains a P2P or CB2CB bridge.
-	 */
-
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-				PCI_SUBORDINATE_BUS, sub_bus);
-
-	/*
-	 * Reserve an additional 4MB for mem space and 16KB for
-	 * I/O space.  This should cover any additional space
-	 * requirement of unusual CardBus devices with
-	 * additional bridges that can consume more address space.
-	 *
-	 * Although pcmcia-cs currently will reprogram bridge
-	 * windows, the goal is to add an option to leave them
-	 * alone and use the bridge window ranges as the regions
-	 * that are searched for free resources upon hot-insertion
-	 * of a device.  This will allow a PCI<->CardBus bridge
-	 * configured by this routine to happily live behind a
-	 * P2P bridge in a system.
-	 */
-	/* Align memory and I/O to 4KB and 4 byte boundaries. */
-	pciauto_lower_memspc = (pciauto_lower_memspc + (0x1000 - 1))
-		& ~(0x1000 - 1);
-	pciauto_lower_iospc = (pciauto_lower_iospc + (0x4 - 1))
-		& ~(0x4 - 1);
-	/* Set up memory and I/O filter limits, assume 32-bit I/O space */
-	early_write_config_dword(hose, top_bus, current_bus, pci_devfn,
-		PCI_CB_MEMORY_LIMIT_0, pciauto_lower_memspc - 1);
-	early_write_config_dword(hose, top_bus, current_bus, pci_devfn,
-		PCI_CB_IO_LIMIT_0, pciauto_lower_iospc - 1);
-
-	/* Enable memory and I/O accesses, enable bus master */
-	early_read_config_dword(hose, top_bus, current_bus, pci_devfn,
-		PCI_COMMAND, &temp);
-	early_write_config_dword(hose, top_bus, current_bus, pci_devfn,
-		PCI_COMMAND, temp | PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
-		PCI_COMMAND_MASTER);
-}
-
-#define	PCIAUTO_IDE_MODE_MASK		0x05
-
-static int __init
-pciauto_bus_scan(struct pci_channel *hose, int top_bus, int current_bus)
-{
-	int sub_bus;
-	u32 pci_devfn, pci_class, cmdstat, found_multi=0;
-	unsigned short vid, did;
-	unsigned char header_type;
-	int devfn_start = 0;
-	int devfn_stop = 0xff;
-
-	sub_bus = current_bus;
-
-	if (hose->first_devfn)
-		devfn_start = hose->first_devfn;
-	if (hose->last_devfn)
-		devfn_stop = hose->last_devfn;
-
-	for (pci_devfn=devfn_start; pci_devfn<devfn_stop; pci_devfn++) {
-
-		if (PCI_FUNC(pci_devfn) && !found_multi)
-			continue;
-
-		early_read_config_word(hose, top_bus, current_bus, pci_devfn,
-				       PCI_VENDOR_ID, &vid);
-
-		if (vid == 0xffff) continue;
-
-		early_read_config_byte(hose, top_bus, current_bus, pci_devfn,
-				       PCI_HEADER_TYPE, &header_type);
-
-		if (!PCI_FUNC(pci_devfn))
-			found_multi = header_type & 0x80;
-
-		early_read_config_word(hose, top_bus, current_bus, pci_devfn,
-				       PCI_DEVICE_ID, &did);
-
-		early_read_config_dword(hose, top_bus, current_bus, pci_devfn,
-					PCI_CLASS_REVISION, &pci_class);
-
-		DBG("%.2x:%.2x.%x Class %.4x: %.4x:%.4x",
-			current_bus, PCI_SLOT(pci_devfn), PCI_FUNC(pci_devfn),
-			pci_class >> 16, vid, did);
-		if (pci_class & 0xff)
-			DBG(" (rev %.2x)", pci_class & 0xff);
-		DBG("\n");
-
-		if ((pci_class >> 16) == PCI_CLASS_BRIDGE_PCI) {
-			DBG("        Bridge: primary=%.2x, secondary=%.2x\n",
-				current_bus, sub_bus + 1);
-			pciauto_prescan_setup_bridge(hose, top_bus, current_bus,
-						     pci_devfn, sub_bus);
-			DBG("Scanning sub bus %.2x, I/O 0x%.8x, Mem 0x%.8x\n",
-				sub_bus + 1,
-				pciauto_lower_iospc, pciauto_lower_memspc);
-			sub_bus = pciauto_bus_scan(hose, top_bus, sub_bus+1);
-			DBG("Back to bus %.2x\n", current_bus);
-			pciauto_postscan_setup_bridge(hose, top_bus, current_bus,
-							pci_devfn, sub_bus);
-			continue;
-		} else if ((pci_class >> 16) == PCI_CLASS_BRIDGE_CARDBUS) {
-			DBG("  CARDBUS  Bridge: primary=%.2x, secondary=%.2x\n",
-				current_bus, sub_bus + 1);
-			DBG("PCI Autoconfig: Found CardBus bridge, device %d function %d\n", PCI_SLOT(pci_devfn), PCI_FUNC(pci_devfn));
-			/* Place CardBus Socket/ExCA registers */
-			pciauto_setup_bars(hose, top_bus, current_bus, pci_devfn, PCI_BASE_ADDRESS_0);
-
-			pciauto_prescan_setup_cardbus_bridge(hose, top_bus,
-					current_bus, pci_devfn, sub_bus);
-
-			DBG("Scanning sub bus %.2x, I/O 0x%.8x, Mem 0x%.8x\n",
-				sub_bus + 1,
-				pciauto_lower_iospc, pciauto_lower_memspc);
-			sub_bus = pciauto_bus_scan(hose, top_bus, sub_bus+1);
-			DBG("Back to bus %.2x, sub_bus is %x\n", current_bus, sub_bus);
-			pciauto_postscan_setup_cardbus_bridge(hose, top_bus,
-					current_bus, pci_devfn, sub_bus);
-			continue;
-		} else if ((pci_class >> 16) == PCI_CLASS_STORAGE_IDE) {
-
-			unsigned char prg_iface;
-
-			early_read_config_byte(hose, top_bus, current_bus,
-				pci_devfn, PCI_CLASS_PROG, &prg_iface);
-			if (!(prg_iface & PCIAUTO_IDE_MODE_MASK)) {
-				DBG("Skipping legacy mode IDE controller\n");
-				continue;
-			}
-		}
-
-		/*
-		 * Found a peripheral, enable some standard
-		 * settings
-		 */
-		early_read_config_dword(hose, top_bus, current_bus, pci_devfn,
-					PCI_COMMAND, &cmdstat);
-		early_write_config_dword(hose, top_bus, current_bus, pci_devfn,
-					 PCI_COMMAND, cmdstat | PCI_COMMAND_IO |
-					 PCI_COMMAND_MEMORY |
-					 PCI_COMMAND_MASTER);
-		early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-					PCI_LATENCY_TIMER, 0x80);
-
-		/* Allocate PCI I/O and/or memory space */
-		pciauto_setup_bars(hose, top_bus, current_bus, pci_devfn, PCI_BASE_ADDRESS_5);
-	}
-	return sub_bus;
-}
-
-int __init
-pciauto_assign_resources(int busno, struct pci_channel *hose)
-{
-	/* setup resource limits */
-	io_resource_inuse = hose->io_resource;
-	mem_resource_inuse = hose->mem_resource;
-
-	pciauto_lower_iospc = io_resource_inuse->start;
-	pciauto_upper_iospc = io_resource_inuse->end + 1;
-	pciauto_lower_memspc = mem_resource_inuse->start;
-	pciauto_upper_memspc = mem_resource_inuse->end + 1;
-	DBG("Autoconfig PCI channel 0x%p\n", hose);
-	DBG("Scanning bus %.2x, I/O 0x%.8x:0x%.8x, Mem 0x%.8x:0x%.8x\n",
-		busno, pciauto_lower_iospc, pciauto_upper_iospc, 
-		pciauto_lower_memspc, pciauto_upper_memspc);
-
-	return pciauto_bus_scan(hose, busno, busno);
-}
diff --git a/arch/sh/drivers/pci/pci-dreamcast.c b/arch/sh/drivers/pci/pci-dreamcast.c
new file mode 100644
index 0000000..210f9d4
--- /dev/null
+++ b/arch/sh/drivers/pci/pci-dreamcast.c
@@ -0,0 +1,102 @@
+/*
+ * PCI support for the Sega Dreamcast
+ *
+ * Copyright (C) 2001, 2002  M. R. Brown
+ * Copyright (C) 2002, 2003  Paul Mundt
+ *
+ * This file originally bore the message (with enclosed-$):
+ *	Id: pci.c,v 1.3 2003/05/04 19:29:46 lethal Exp
+ *	Dreamcast PCI: Supports SEGA Broadband Adaptor only.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <mach/pci.h>
+
+static struct resource gapspci_io_resource = {
+	.name	= "GAPSPCI IO",
+	.start	= GAPSPCI_BBA_CONFIG,
+	.end	= GAPSPCI_BBA_CONFIG + GAPSPCI_BBA_CONFIG_SIZE - 1,
+	.flags	= IORESOURCE_IO,
+};
+
+static struct resource gapspci_mem_resource = {
+	.name	= "GAPSPCI mem",
+	.start	= GAPSPCI_DMA_BASE,
+	.end	= GAPSPCI_DMA_BASE + GAPSPCI_DMA_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+static struct pci_channel dreamcast_pci_controller = {
+	.pci_ops	= &gapspci_pci_ops,
+	.io_resource	= &gapspci_io_resource,
+	.io_offset	= 0x00000000,
+	.mem_resource	= &gapspci_mem_resource,
+	.mem_offset	= 0x00000000,
+};
+
+/*
+ * gapspci init
+ */
+
+static int __init gapspci_init(void)
+{
+	char idbuf[16];
+	int i;
+
+	/*
+	 * FIXME: All of this wants documenting to some degree,
+	 * even some basic register definitions would be nice.
+	 *
+	 * I haven't seen anything this ugly since.. maple.
+	 */
+
+	for (i=0; i<16; i++)
+		idbuf[i] = inb(GAPSPCI_REGS+i);
+
+	if (strncmp(idbuf, "GAPSPCI_BRIDGE_2", 16))
+		return -ENODEV;
+
+	outl(0x5a14a501, GAPSPCI_REGS+0x18);
+
+	for (i=0; i<1000000; i++)
+		cpu_relax();
+
+	if (inl(GAPSPCI_REGS+0x18) != 1)
+		return -EINVAL;
+
+	outl(0x01000000, GAPSPCI_REGS+0x20);
+	outl(0x01000000, GAPSPCI_REGS+0x24);
+
+	outl(GAPSPCI_DMA_BASE, GAPSPCI_REGS+0x28);
+	outl(GAPSPCI_DMA_BASE+GAPSPCI_DMA_SIZE, GAPSPCI_REGS+0x2c);
+
+	outl(1, GAPSPCI_REGS+0x14);
+	outl(1, GAPSPCI_REGS+0x34);
+
+	/* Setting Broadband Adapter */
+	outw(0xf900, GAPSPCI_BBA_CONFIG+0x06);
+	outl(0x00000000, GAPSPCI_BBA_CONFIG+0x30);
+	outb(0x00, GAPSPCI_BBA_CONFIG+0x3c);
+	outb(0xf0, GAPSPCI_BBA_CONFIG+0x0d);
+	outw(0x0006, GAPSPCI_BBA_CONFIG+0x04);
+	outl(0x00002001, GAPSPCI_BBA_CONFIG+0x10);
+	outl(0x01000000, GAPSPCI_BBA_CONFIG+0x14);
+
+	register_pci_controller(&dreamcast_pci_controller);
+
+	return 0;
+}
+arch_initcall(gapspci_init);
diff --git a/arch/sh/drivers/pci/pci-sh4.h b/arch/sh/drivers/pci/pci-sh4.h
index a83dcf7..3d5296c 100644
--- a/arch/sh/drivers/pci/pci-sh4.h
+++ b/arch/sh/drivers/pci/pci-sh4.h
@@ -149,13 +149,10 @@
   #define SH4_PCIPDTR_PB0	  0x000000001	/* Port 0 Enable */
 #define SH4_PCIPDR		0x220		/* Port IO Data Register */
 
-/* Flags */
-#define SH4_PCIC_NO_RESET	0x0001
-
 /* arch/sh/kernel/drivers/pci/ops-sh4.c */
 extern struct pci_ops sh4_pci_ops;
-int sh4_pci_check_direct(void);
-int pci_fixup_pcic(void);
+int sh4_pci_check_direct(struct pci_channel *chan);
+int pci_fixup_pcic(struct pci_channel *chan);
 
 struct sh4_pci_address_space {
 	unsigned long base;
@@ -165,16 +162,18 @@
 struct sh4_pci_address_map {
 	struct sh4_pci_address_space window0;
 	struct sh4_pci_address_space window1;
-	unsigned long flags;
 };
 
-static inline void pci_write_reg(unsigned long val, unsigned long reg)
+static inline void pci_write_reg(struct pci_channel *chan,
+				 unsigned long val, unsigned long reg)
 {
-	ctrl_outl(val, PCI_REG(reg));
+	ctrl_outl(val, chan->reg_base + reg);
 }
 
-static inline unsigned long pci_read_reg(unsigned long reg)
+static inline unsigned long pci_read_reg(struct pci_channel *chan,
+					 unsigned long reg)
 {
-	return ctrl_inl(PCI_REG(reg));
+	return ctrl_inl(chan->reg_base + reg);
 }
+
 #endif /* __PCI_SH4_H */
diff --git a/arch/sh/drivers/pci/pci-sh5.c b/arch/sh/drivers/pci/pci-sh5.c
index 7a97438..873ed2b 100644
--- a/arch/sh/drivers/pci/pci-sh5.c
+++ b/arch/sh/drivers/pci/pci-sh5.c
@@ -89,8 +89,21 @@
 	return IRQ_NONE;
 }
 
-int __init sh5pci_init(unsigned long memStart, unsigned long memSize)
+static struct resource sh5_io_resource = { /* place holder */ };
+static struct resource sh5_mem_resource = { /* place holder */ };
+
+static struct pci_channel sh5pci_controller = {
+	.pci_ops		= &sh5_pci_ops,
+	.mem_resource		= &sh5_mem_resource,
+	.mem_offset		= 0x00000000,
+	.io_resource		= &sh5_io_resource,
+	.io_offset		= 0x00000000,
+};
+
+static int __init sh5pci_init(void)
 {
+	unsigned long memStart = __pa(memory_start);
+	unsigned long memSize = __pa(memory_end) - memStart;
 	u32 lsr0;
 	u32 uval;
 
@@ -106,12 +119,12 @@
                 return -EINVAL;
         }
 
-	pcicr_virt = onchip_remap(SH5PCI_ICR_BASE, 1024, "PCICR");
+	pcicr_virt = (unsigned long)ioremap_nocache(SH5PCI_ICR_BASE, 1024);
 	if (!pcicr_virt) {
 		panic("Unable to remap PCICR\n");
 	}
 
-	PCI_IO_AREA = onchip_remap(SH5PCI_IO_BASE, 0x10000, "PCIIO");
+	PCI_IO_AREA = (unsigned long)ioremap_nocache(SH5PCI_IO_BASE, 0x10000);
 	if (!PCI_IO_AREA) {
 		panic("Unable to remap PCIIO\n");
 	}
@@ -197,32 +210,14 @@
         SH5PCI_WRITE(AINTM, ~0);
         SH5PCI_WRITE(PINTM, ~0);
 
+	sh5_io_resource.start = PCI_IO_AREA;
+	sh5_io_resource.end = PCI_IO_AREA + 0x10000;
+
+	sh5_mem_resource.start = memStart;
+	sh5_mem_resource.end = memStart + memSize;
+
+	register_pci_controller(&sh5pci_controller);
+
 	return 0;
 }
-
-void __devinit pcibios_fixup_bus(struct pci_bus *bus)
-{
-	struct pci_dev *dev = bus->self;
-	int i;
-
-	if (dev) {
-		for (i= 0; i < 3; i++) {
-			bus->resource[i] =
-				&dev->resource[PCI_BRIDGE_RESOURCES+i];
-			bus->resource[i]->name = bus->name;
-		}
-		bus->resource[0]->flags |= IORESOURCE_IO;
-		bus->resource[1]->flags |= IORESOURCE_MEM;
-
-		/* For now, propagate host limits to the bus;
-		 * we'll adjust them later. */
-		bus->resource[0]->end = 64*1024 - 1 ;
-		bus->resource[1]->end = PCIBIOS_MIN_MEM+(256*1024*1024)-1;
-		bus->resource[0]->start = PCIBIOS_MIN_IO;
-		bus->resource[1]->start = PCIBIOS_MIN_MEM;
-
-		/* Turn off downstream PF memory address range by default */
-		bus->resource[2]->start = 1024*1024;
-		bus->resource[2]->end = bus->resource[2]->start - 1;
-	}
-}
+arch_initcall(sh5pci_init);
diff --git a/arch/sh/drivers/pci/pci-sh5.h b/arch/sh/drivers/pci/pci-sh5.h
index 7cff3fc..f277628 100644
--- a/arch/sh/drivers/pci/pci-sh5.h
+++ b/arch/sh/drivers/pci/pci-sh5.h
@@ -107,7 +107,4 @@
 
 extern struct pci_ops sh5_pci_ops;
 
-/* arch/sh/drivers/pci/pci-sh5.c */
-int sh5pci_init(unsigned long memStart, unsigned long memSize);
-
 #endif /* __PCI_SH5_H */
diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c
index 3065eb184..70c1999 100644
--- a/arch/sh/drivers/pci/pci-sh7751.c
+++ b/arch/sh/drivers/pci/pci-sh7751.c
@@ -1,114 +1,111 @@
 /*
- *	Low-Level PCI Support for the SH7751
+ * Low-Level PCI Support for the SH7751
  *
- *  Dustin McIntire (dustin@sensoria.com)
- *	Derived from arch/i386/kernel/pci-*.c which bore the message:
- *	(c) 1999--2000 Martin Mares <mj@ucw.cz>
+ *  Copyright (C) 2003 - 2009  Paul Mundt
+ *  Copyright (C) 2001  Dustin McIntire
  *
- *  Ported to the new API by Paul Mundt <lethal@linux-sh.org>
- *  With cleanup by Paul van Gool <pvangool@mimotech.com>
+ *  With cleanup by Paul van Gool <pvangool@mimotech.com>, 2003.
  *
- *  May be copied or modified under the terms of the GNU General Public
- *  License.  See linux/COPYING for more information.
- *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
  */
-#undef DEBUG
-
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <linux/delay.h>
+#include <linux/io.h>
 #include "pci-sh4.h"
 #include <asm/addrspace.h>
-#include <asm/io.h>
 
-/*
- * Initialization. Try all known PCI access methods. Note that we support
- * using both PCI BIOS and direct access: in such cases, we use I/O ports
- * to access config space.
- *
- * Note that the platform specific initialization (BSC registers, and memory
- * space mapping) will be called via the platform defined function
- * pcibios_init_platform().
- */
+static int __init __area_sdram_check(struct pci_channel *chan,
+				     unsigned int area)
+{
+	unsigned long word;
+
+	word = __raw_readl(SH7751_BCR1);
+	/* check BCR for SDRAM in area */
+	if (((word >> area) & 1) == 0) {
+		printk("PCI: Area %d is not configured for SDRAM. BCR1=0x%lx\n",
+		       area, word);
+		return 0;
+	}
+	pci_write_reg(chan, word, SH4_PCIBCR1);
+
+	word = __raw_readw(SH7751_BCR2);
+	/* check BCR2 for 32bit SDRAM interface*/
+	if (((word >> (area << 1)) & 0x3) != 0x3) {
+		printk("PCI: Area %d is not 32 bit SDRAM. BCR2=0x%lx\n",
+		       area, word);
+		return 0;
+	}
+	pci_write_reg(chan, word, SH4_PCIBCR2);
+
+	return 1;
+}
+
+static struct resource sh7751_io_resource = {
+	.name	= "SH7751_IO",
+	.start	= SH7751_PCI_IO_BASE,
+	.end	= SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1,
+	.flags	= IORESOURCE_IO
+};
+
+static struct resource sh7751_mem_resource = {
+	.name	= "SH7751_mem",
+	.start	= SH7751_PCI_MEMORY_BASE,
+	.end	= SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1,
+	.flags	= IORESOURCE_MEM
+};
+
+static struct pci_channel sh7751_pci_controller = {
+	.pci_ops	= &sh4_pci_ops,
+	.mem_resource	= &sh7751_mem_resource,
+	.mem_offset	= 0x00000000,
+	.io_resource	= &sh7751_io_resource,
+	.io_offset	= 0x00000000,
+	.io_map_base	= SH7751_PCI_IO_BASE,
+};
+
+static struct sh4_pci_address_map sh7751_pci_map = {
+	.window0	= {
+		.base	= SH7751_CS3_BASE_ADDR,
+		.size	= 0x04000000,
+	},
+};
+
 static int __init sh7751_pci_init(void)
 {
+	struct pci_channel *chan = &sh7751_pci_controller;
 	unsigned int id;
+	u32 word, reg;
 	int ret;
 
-	pr_debug("PCI: Starting intialization.\n");
+	printk(KERN_NOTICE "PCI: Starting intialization.\n");
+
+	chan->reg_base = 0xfe200000;
 
 	/* check for SH7751/SH7751R hardware */
-	id = pci_read_reg(SH7751_PCICONF0);
+	id = pci_read_reg(chan, SH7751_PCICONF0);
 	if (id != ((SH7751_DEVICE_ID << 16) | SH7751_VENDOR_ID) &&
 	    id != ((SH7751R_DEVICE_ID << 16) | SH7751_VENDOR_ID)) {
 		pr_debug("PCI: This is not an SH7751(R) (%x)\n", id);
 		return -ENODEV;
 	}
 
-	if ((ret = sh4_pci_check_direct()) != 0)
+	if ((ret = sh4_pci_check_direct(chan)) != 0)
 		return ret;
 
-	return pcibios_init_platform();
-}
-subsys_initcall(sh7751_pci_init);
-
-static int __init __area_sdram_check(unsigned int area)
-{
-	u32 word;
-
-	word = ctrl_inl(SH7751_BCR1);
-	/* check BCR for SDRAM in area */
-	if (((word >> area) & 1) == 0) {
-		printk("PCI: Area %d is not configured for SDRAM. BCR1=0x%x\n",
-		       area, word);
-		return 0;
-	}
-	pci_write_reg(word, SH4_PCIBCR1);
-
-	word = (u16)ctrl_inw(SH7751_BCR2);
-	/* check BCR2 for 32bit SDRAM interface*/
-	if (((word >> (area << 1)) & 0x3) != 0x3) {
-		printk("PCI: Area %d is not 32 bit SDRAM. BCR2=0x%x\n",
-		       area, word);
-		return 0;
-	}
-	pci_write_reg(word, SH4_PCIBCR2);
-
-	return 1;
-}
-
-int __init sh7751_pcic_init(struct sh4_pci_address_map *map)
-{
-	u32 reg;
-	u32 word;
-
 	/* Set the BCR's to enable PCI access */
 	reg = ctrl_inl(SH7751_BCR1);
 	reg |= 0x80000;
 	ctrl_outl(reg, SH7751_BCR1);
 
 	/* Turn the clocks back on (not done in reset)*/
-	pci_write_reg(0, SH4_PCICLKR);
+	pci_write_reg(chan, 0, SH4_PCICLKR);
 	/* Clear Powerdown IRQ's (not done in reset) */
 	word = SH4_PCIPINT_D3 | SH4_PCIPINT_D0;
-	pci_write_reg(word, SH4_PCIPINT);
-
-	/*
-	 * This code is unused for some boards as it is done in the
-	 * bootloader and doing it here means the MAC addresses loaded
-	 * by the bootloader get lost.
-	 */
-	if (!(map->flags & SH4_PCIC_NO_RESET)) {
-		/* toggle PCI reset pin */
-		word = SH4_PCICR_PREFIX | SH4_PCICR_PRST;
-		pci_write_reg(word, SH4_PCICR);
-		/* Wait for a long time... not 1 sec. but long enough */
-		mdelay(100);
-		word = SH4_PCICR_PREFIX;
-		pci_write_reg(word, SH4_PCICR);
-	}
+	pci_write_reg(chan, word, SH4_PCIPINT);
 
 	/* set the command/status bits to:
 	 * Wait Cycle Control + Parity Enable + Bus Master +
@@ -116,89 +113,75 @@
 	 */
 	word = SH7751_PCICONF1_WCC | SH7751_PCICONF1_PER |
 	       SH7751_PCICONF1_BUM | SH7751_PCICONF1_MES;
-	pci_write_reg(word, SH7751_PCICONF1);
+	pci_write_reg(chan, word, SH7751_PCICONF1);
 
 	/* define this host as the host bridge */
 	word = PCI_BASE_CLASS_BRIDGE << 24;
-	pci_write_reg(word, SH7751_PCICONF2);
+	pci_write_reg(chan, word, SH7751_PCICONF2);
 
 	/* Set IO and Mem windows to local address
 	 * Make PCI and local address the same for easy 1 to 1 mapping
-	 * Window0 = map->window0.size @ non-cached area base = SDRAM
-	 * Window1 = map->window1.size @ cached area base = SDRAM
 	 */
-	word = map->window0.size - 1;
-	pci_write_reg(word, SH4_PCILSR0);
-	word = map->window1.size - 1;
-	pci_write_reg(word, SH4_PCILSR1);
+	word = sh7751_pci_map.window0.size - 1;
+	pci_write_reg(chan, word, SH4_PCILSR0);
 	/* Set the values on window 0 PCI config registers */
-	word = P2SEGADDR(map->window0.base);
-	pci_write_reg(word, SH4_PCILAR0);
-	pci_write_reg(word, SH7751_PCICONF5);
-	/* Set the values on window 1 PCI config registers */
-	word =  PHYSADDR(map->window1.base);
-	pci_write_reg(word, SH4_PCILAR1);
-	pci_write_reg(word, SH7751_PCICONF6);
+	word = P2SEGADDR(sh7751_pci_map.window0.base);
+	pci_write_reg(chan, word, SH4_PCILAR0);
+	pci_write_reg(chan, word, SH7751_PCICONF5);
 
 	/* Set the local 16MB PCI memory space window to
 	 * the lowest PCI mapped address
 	 */
-	word = PCIBIOS_MIN_MEM & SH4_PCIMBR_MASK;
+	word = chan->mem_resource->start & SH4_PCIMBR_MASK;
 	pr_debug("PCI: Setting upper bits of Memory window to 0x%x\n", word);
-	pci_write_reg(word , SH4_PCIMBR);
-
-	/* Map IO space into PCI IO window
-	 * The IO window is 64K-PCIBIOS_MIN_IO in size
-	 * IO addresses will be translated to the
-	 * PCI IO window base address
-	 */
-	pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%x\n",
-		 PCIBIOS_MIN_IO, (64 << 10),
-		 SH7751_PCI_IO_BASE + PCIBIOS_MIN_IO);
+	pci_write_reg(chan, word , SH4_PCIMBR);
 
 	/* Make sure the MSB's of IO window are set to access PCI space
 	 * correctly */
-	word = PCIBIOS_MIN_IO & SH4_PCIIOBR_MASK;
+	word = chan->io_resource->start & SH4_PCIIOBR_MASK;
 	pr_debug("PCI: Setting upper bits of IO window to 0x%x\n", word);
-	pci_write_reg(word, SH4_PCIIOBR);
+	pci_write_reg(chan, word, SH4_PCIIOBR);
 
 	/* Set PCI WCRx, BCRx's, copy from BSC locations */
 
 	/* check BCR for SDRAM in specified area */
-	switch (map->window0.base) {
-	case SH7751_CS0_BASE_ADDR: word = __area_sdram_check(0); break;
-	case SH7751_CS1_BASE_ADDR: word = __area_sdram_check(1); break;
-	case SH7751_CS2_BASE_ADDR: word = __area_sdram_check(2); break;
-	case SH7751_CS3_BASE_ADDR: word = __area_sdram_check(3); break;
-	case SH7751_CS4_BASE_ADDR: word = __area_sdram_check(4); break;
-	case SH7751_CS5_BASE_ADDR: word = __area_sdram_check(5); break;
-	case SH7751_CS6_BASE_ADDR: word = __area_sdram_check(6); break;
+	switch (sh7751_pci_map.window0.base) {
+	case SH7751_CS0_BASE_ADDR: word = __area_sdram_check(chan, 0); break;
+	case SH7751_CS1_BASE_ADDR: word = __area_sdram_check(chan, 1); break;
+	case SH7751_CS2_BASE_ADDR: word = __area_sdram_check(chan, 2); break;
+	case SH7751_CS3_BASE_ADDR: word = __area_sdram_check(chan, 3); break;
+	case SH7751_CS4_BASE_ADDR: word = __area_sdram_check(chan, 4); break;
+	case SH7751_CS5_BASE_ADDR: word = __area_sdram_check(chan, 5); break;
+	case SH7751_CS6_BASE_ADDR: word = __area_sdram_check(chan, 6); break;
 	}
 
 	if (!word)
-		return 0;
+		return -1;
 
 	/* configure the wait control registers */
 	word = ctrl_inl(SH7751_WCR1);
-	pci_write_reg(word, SH4_PCIWCR1);
+	pci_write_reg(chan, word, SH4_PCIWCR1);
 	word = ctrl_inl(SH7751_WCR2);
-	pci_write_reg(word, SH4_PCIWCR2);
+	pci_write_reg(chan, word, SH4_PCIWCR2);
 	word = ctrl_inl(SH7751_WCR3);
-	pci_write_reg(word, SH4_PCIWCR3);
+	pci_write_reg(chan, word, SH4_PCIWCR3);
 	word = ctrl_inl(SH7751_MCR);
-	pci_write_reg(word, SH4_PCIMCR);
+	pci_write_reg(chan, word, SH4_PCIMCR);
 
 	/* NOTE: I'm ignoring the PCI error IRQs for now..
 	 * TODO: add support for the internal error interrupts and
 	 * DMA interrupts...
 	 */
 
-	pci_fixup_pcic();
+	pci_fixup_pcic(chan);
 
 	/* SH7751 init done, set central function init complete */
 	/* use round robin mode to stop a device starving/overruning */
 	word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_ARBM;
-	pci_write_reg(word, SH4_PCICR);
+	pci_write_reg(chan, word, SH4_PCICR);
 
-	return 1;
+	register_pci_controller(chan);
+
+	return 0;
 }
+arch_initcall(sh7751_pci_init);
diff --git a/arch/sh/drivers/pci/pci-sh7751.h b/arch/sh/drivers/pci/pci-sh7751.h
index 68e3cb5..4983a4d 100644
--- a/arch/sh/drivers/pci/pci-sh7751.h
+++ b/arch/sh/drivers/pci/pci-sh7751.h
@@ -26,7 +26,6 @@
 #define SH7751_PCI_IO_SIZE           0x40000     /* Size of IO window */
 
 #define SH7751_PCIREG_BASE           0xFE200000  /* PCI regs base address */
-#define PCI_REG(n)                  (SH7751_PCIREG_BASE+ n)
 
 #define SH7751_PCICONF0            0x0           /* PCI Config Reg 0 */
   #define SH7751_PCICONF0_DEVID      0xFFFF0000  /* Device ID */
@@ -58,7 +57,7 @@
   #define SH7751_PCICONF2_SCC        0x00FF0000  /* Sub-Class Code */
   #define SH7751_PCICONF2_RLPI       0x0000FF00  /* Programming Interface */
   #define SH7751_PCICONF2_REV        0x000000FF  /* Revision ID */
-#define SH7751_PCICONF3            0xC           /* PCI Config Reg 3 */ 
+#define SH7751_PCICONF3            0xC           /* PCI Config Reg 3 */
   #define SH7751_PCICONF3_BIST7      0x80000000  /* Bist Supported */
   #define SH7751_PCICONF3_BIST6      0x40000000  /* Bist Executing */
   #define SH7751_PCICONF3_BIST3_0    0x0F000000  /* Bist Passed */
@@ -73,12 +72,12 @@
   #define SH7751_PCICONF5_BASE       0xFFFFFFF0  /* Mem Space Base Addr */
   #define SH7751_PCICONF5_LAP        0x00000008  /* Prefetch Enabled */
   #define SH7751_PCICONF5_LAT        0x00000006  /* Local Memory type */
-  #define SH7751_PCICONF5_ASI        0x00000001  /* Address Space Type */  
+  #define SH7751_PCICONF5_ASI        0x00000001  /* Address Space Type */
 #define SH7751_PCICONF6            0x18          /* PCI Config Reg 6 */
   #define SH7751_PCICONF6_BASE       0xFFFFFFF0  /* Mem Space Base Addr */
   #define SH7751_PCICONF6_LAP        0x00000008  /* Prefetch Enabled */
   #define SH7751_PCICONF6_LAT        0x00000006  /* Local Memory type */
-  #define SH7751_PCICONF6_ASI        0x00000001  /* Address Space Type */  
+  #define SH7751_PCICONF6_ASI        0x00000001  /* Address Space Type */
 /* PCICONF7 - PCICONF10 are undefined */
 #define SH7751_PCICONF11           0x2C          /* PCI Config Reg 11 */
   #define SH7751_PCICONF11_SSID      0xFFFF0000  /* Subsystem ID */
@@ -127,9 +126,4 @@
 #define SH7751_CS5_BASE_ADDR       (SH7751_CS4_BASE_ADDR + SH7751_MEM_REGION_SIZE)
 #define SH7751_CS6_BASE_ADDR       (SH7751_CS5_BASE_ADDR + SH7751_MEM_REGION_SIZE)
 
-struct sh4_pci_address_map;
-
-/* arch/sh/drivers/pci/pci-sh7751.c */
-int sh7751_pcic_init(struct sh4_pci_address_map *map);
-
 #endif /* _PCI_SH7751_H_ */
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index bae6a2c..323b92d 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -1,19 +1,12 @@
 /*
- *	Low-Level PCI Support for the SH7780
+ * Low-Level PCI Support for the SH7780
  *
- *  Dustin McIntire (dustin@sensoria.com)
- *	Derived from arch/i386/kernel/pci-*.c which bore the message:
- *	(c) 1999--2000 Martin Mares <mj@ucw.cz>
+ *  Copyright (C) 2005 - 2009  Paul Mundt
  *
- *  Ported to the new API by Paul Mundt <lethal@linux-sh.org>
- *  With cleanup by Paul van Gool <pvangool@mimotech.com>
- *
- *  May be copied or modified under the terms of the GNU General Public
- *  License.  See linux/COPYING for more information.
- *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
  */
-#undef DEBUG
-
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -22,135 +15,132 @@
 #include <linux/delay.h>
 #include "pci-sh4.h"
 
-#define INTC_BASE	0xffd00000
-#define INTC_ICR0	(INTC_BASE+0x0)
-#define INTC_ICR1	(INTC_BASE+0x1c)
-#define INTC_INTPRI	(INTC_BASE+0x10)
-#define INTC_INTREQ	(INTC_BASE+0x24)
-#define INTC_INTMSK0	(INTC_BASE+0x44)
-#define INTC_INTMSK1	(INTC_BASE+0x48)
-#define INTC_INTMSK2	(INTC_BASE+0x40080)
-#define INTC_INTMSKCLR0	(INTC_BASE+0x64)
-#define INTC_INTMSKCLR1	(INTC_BASE+0x68)
-#define INTC_INTMSKCLR2	(INTC_BASE+0x40084)
-#define INTC_INT2MSKR	(INTC_BASE+0x40038)
-#define INTC_INT2MSKCR	(INTC_BASE+0x4003c)
+static struct resource sh7785_io_resource = {
+	.name	= "SH7785_IO",
+	.start	= SH7780_PCI_IO_BASE,
+	.end	= SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1,
+	.flags	= IORESOURCE_IO
+};
 
-/*
- * Initialization. Try all known PCI access methods. Note that we support
- * using both PCI BIOS and direct access: in such cases, we use I/O ports
- * to access config space.
- *
- * Note that the platform specific initialization (BSC registers, and memory
- * space mapping) will be called via the platform defined function
- * pcibios_init_platform().
- */
+static struct resource sh7785_mem_resource = {
+	.name	= "SH7785_mem",
+	.start	= SH7780_PCI_MEMORY_BASE,
+	.end	= SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1,
+	.flags	= IORESOURCE_MEM
+};
+
+static struct pci_channel sh7780_pci_controller = {
+	.pci_ops	= &sh4_pci_ops,
+	.mem_resource	= &sh7785_mem_resource,
+	.mem_offset	= 0x00000000,
+	.io_resource	= &sh7785_io_resource,
+	.io_offset	= 0x00000000,
+	.io_map_base	= SH7780_PCI_IO_BASE,
+};
+
+static struct sh4_pci_address_map sh7780_pci_map = {
+	.window0	= {
+#if defined(CONFIG_32BIT)
+		.base	= SH7780_32BIT_DDR_BASE_ADDR,
+		.size	= 0x40000000,
+#else
+		.base	= SH7780_CS0_BASE_ADDR,
+		.size	= 0x20000000,
+#endif
+	},
+};
+
 static int __init sh7780_pci_init(void)
 {
+	struct pci_channel *chan = &sh7780_pci_controller;
 	unsigned int id;
-	int ret, match = 0;
+	const char *type = NULL;
+	int ret;
+	u32 word;
 
-	pr_debug("PCI: Starting intialization.\n");
+	printk(KERN_NOTICE "PCI: Starting intialization.\n");
 
-	ctrl_outl(0x00000001, SH7780_PCI_VCR2); /* Enable PCIC */
+	chan->reg_base = 0xfe040000;
 
-	/* check for SH7780/SH7780R hardware */
-	id = pci_read_reg(SH7780_PCIVID);
-	if ((id & 0xffff) == SH7780_VENDOR_ID) {
-		switch ((id >> 16) & 0xffff) {
-		case SH7763_DEVICE_ID:
-		case SH7780_DEVICE_ID:
-		case SH7781_DEVICE_ID:
-		case SH7785_DEVICE_ID:
-			match = 1;
-			break;
-		}
-	}
+	/* Enable CPU access to the PCIC registers. */
+	__raw_writel(PCIECR_ENBL, PCIECR);
 
-	if (unlikely(!match)) {
-		printk(KERN_ERR "PCI: This is not an SH7780 (%x)\n", id);
+	id = __raw_readw(chan->reg_base + SH7780_PCIVID);
+	if (id != SH7780_VENDOR_ID) {
+		printk(KERN_ERR "PCI: Unknown vendor ID 0x%04x.\n", id);
 		return -ENODEV;
 	}
 
-	/* Setup the INTC */
-	if (mach_is_7780se()) {
-		/* ICR0: IRL=use separately */
-		ctrl_outl(0x00C00020, INTC_ICR0);
-		/* ICR1: detect low level(for 2ndcut) */
-		ctrl_outl(0xAAAA0000, INTC_ICR1);
-		/* INTPRI: priority=3(all) */
-		ctrl_outl(0x33333333, INTC_INTPRI);
+	id = __raw_readw(chan->reg_base + SH7780_PCIDID);
+	type = (id == SH7763_DEVICE_ID)	? "SH7763" :
+	       (id == SH7780_DEVICE_ID) ? "SH7780" :
+	       (id == SH7781_DEVICE_ID) ? "SH7781" :
+	       (id == SH7785_DEVICE_ID) ? "SH7785" :
+					  NULL;
+	if (unlikely(!type)) {
+		printk(KERN_ERR "PCI: Found an unsupported Renesas host "
+		       "controller, device id 0x%04x.\n", id);
+		return -EINVAL;
 	}
 
-	if ((ret = sh4_pci_check_direct()) != 0)
+	printk(KERN_NOTICE "PCI: Found a Renesas %s host "
+	       "controller, revision %d.\n", type,
+	       __raw_readb(chan->reg_base + SH7780_PCIRID));
+
+	if ((ret = sh4_pci_check_direct(chan)) != 0)
 		return ret;
 
-	return pcibios_init_platform();
-}
-core_initcall(sh7780_pci_init);
-
-int __init sh7780_pcic_init(struct sh4_pci_address_map *map)
-{
-	u32 word;
+	/*
+	 * Set the class and sub-class codes.
+	 */
+	__raw_writeb(PCI_CLASS_BRIDGE_HOST >> 8,
+		     chan->reg_base + SH7780_PCIBCC);
+	__raw_writeb(PCI_CLASS_BRIDGE_HOST & 0xff,
+		     chan->reg_base + SH7780_PCISUB);
 
 	/*
-	 * This code is unused for some boards as it is done in the
-	 * bootloader and doing it here means the MAC addresses loaded
-	 * by the bootloader get lost.
-	 */
-	if (!(map->flags & SH4_PCIC_NO_RESET)) {
-		/* toggle PCI reset pin */
-		word = SH4_PCICR_PREFIX | SH4_PCICR_PRST;
-		pci_write_reg(word, SH4_PCICR);
-		/* Wait for a long time... not 1 sec. but long enough */
-		mdelay(100);
-		word = SH4_PCICR_PREFIX;
-		pci_write_reg(word, SH4_PCICR);
-	}
-
-	/* set the command/status bits to:
-	 * Wait Cycle Control + Parity Enable + Bus Master +
-	 * Mem space enable
-	 */
-	pci_write_reg(0x00000046, SH7780_PCICMD);
-
-	/* define this host as the host bridge */
-	word = PCI_BASE_CLASS_BRIDGE << 24;
-	pci_write_reg(word, SH7780_PCIRID);
-
-	/* Set IO and Mem windows to local address
+	 * Set IO and Mem windows to local address
 	 * Make PCI and local address the same for easy 1 to 1 mapping
 	 */
-	pci_write_reg(map->window0.size - 0xfffff, SH4_PCILSR0);
-	pci_write_reg(map->window1.size - 0xfffff, SH4_PCILSR1);
+	pci_write_reg(chan, sh7780_pci_map.window0.size - 0xfffff, SH4_PCILSR0);
 	/* Set the values on window 0 PCI config registers */
-	pci_write_reg(map->window0.base, SH4_PCILAR0);
-	pci_write_reg(map->window0.base, SH7780_PCIMBAR0);
-	/* Set the values on window 1 PCI config registers */
-	pci_write_reg(map->window1.base, SH4_PCILAR1);
-	pci_write_reg(map->window1.base, SH7780_PCIMBAR1);
+	pci_write_reg(chan, sh7780_pci_map.window0.base, SH4_PCILAR0);
+	pci_write_reg(chan, sh7780_pci_map.window0.base, SH7780_PCIMBAR0);
 
-	/* Map IO space into PCI IO window
-	 * The IO window is 64K-PCIBIOS_MIN_IO in size
-	 * IO addresses will be translated to the
-	 * PCI IO window base address
-	 */
-	pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%x\n",
-		 PCIBIOS_MIN_IO, (64 << 10),
-		 SH7780_PCI_IO_BASE + PCIBIOS_MIN_IO);
+	pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM);
 
-	/* NOTE: I'm ignoring the PCI error IRQs for now..
-	 * TODO: add support for the internal error interrupts and
-	 * DMA interrupts...
-	 */
+	/* Set up standard PCI config registers */
+	__raw_writew(0xFB00, chan->reg_base + SH7780_PCISTATUS);
+	__raw_writew(0x0047, chan->reg_base + SH7780_PCICMD);
+	__raw_writew(0x1912, chan->reg_base + SH7780_PCISVID);
+	__raw_writew(0x0001, chan->reg_base + SH7780_PCISID);
+
+	__raw_writeb(0x00, chan->reg_base + SH7780_PCIPIF);
 
 	/* Apply any last-minute PCIC fixups */
-	pci_fixup_pcic();
+	pci_fixup_pcic(chan);
+
+	pci_write_reg(chan, 0xfd000000, SH7780_PCIMBR0);
+	pci_write_reg(chan, 0x00fc0000, SH7780_PCIMBMR0);
+
+#ifdef CONFIG_32BIT
+	pci_write_reg(chan, 0xc0000000, SH7780_PCIMBR2);
+	pci_write_reg(chan, 0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2);
+#endif
+
+	/* Set IOBR for windows containing area specified in pci.h */
+	pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE-1),
+		      SH7780_PCIIOBR);
+	pci_write_reg(chan, ((SH7780_PCI_IO_SIZE-1) & (7<<18)),
+		      SH7780_PCIIOBMR);
 
 	/* SH7780 init done, set central function init complete */
 	/* use round robin mode to stop a device starving/overruning */
 	word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_FTO;
-	pci_write_reg(word, SH4_PCICR);
+	pci_write_reg(chan, word, SH4_PCICR);
 
-	return 1;
+	register_pci_controller(chan);
+
+	return 0;
 }
+arch_initcall(sh7780_pci_init);
diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h
index 93adc71..4a52478 100644
--- a/arch/sh/drivers/pci/pci-sh7780.h
+++ b/arch/sh/drivers/pci/pci-sh7780.h
@@ -20,9 +20,8 @@
 #define SH7785_DEVICE_ID	0x0007
 
 /* SH7780 Control Registers */
-#define	SH7780_PCI_VCR0		0xFE000000
-#define	SH7780_PCI_VCR1		0xFE000004
-#define	SH7780_PCI_VCR2		0xFE000008
+#define	PCIECR			0xFE000008
+#define PCIECR_ENBL		0x01
 
 /* SH7780 Specific Values */
 #define SH7780_PCI_CONFIG_BASE	0xFD000000	/* Config space base addr */
@@ -35,7 +34,6 @@
 #define SH7780_PCI_IO_SIZE	0x00400000	/* Size of IO window */
 
 #define SH7780_PCIREG_BASE	0xFE040000	/* PCI regs base address */
-#define PCI_REG(n)		(SH7780_PCIREG_BASE+n)
 
 /* SH7780 PCI Config Registers */
 #define SH7780_PCIVID		0x000		/* Vendor ID */
@@ -67,11 +65,6 @@
 #define SH7780_PCIPMCSR_BSE	0x046
 #define SH7780_PCICDD		0x047
 
-#define SH7780_PCICR		0x100		/* PCI Control Register */
-#define SH7780_PCILSR		0x104		/* PCI Local Space Register0 */
-#define SH7780_PCILSR1		0x108		/* PCI Local Space Register1 */
-#define SH7780_PCILAR0		0x10C		/* PCI Local Address Register1 */
-#define SH7780_PCILAR1		0x110		/* PCI Local Address Register1 */
 #define SH7780_PCIIR		0x114		/* PCI Interrupt Register */
 #define SH7780_PCIIMR		0x118		/* PCI Interrupt Mask Register */
 #define SH7780_PCIAIR		0x11C		/* Error Address Register */
@@ -106,9 +99,4 @@
 
 #define SH7780_32BIT_DDR_BASE_ADDR	0x40000000
 
-struct sh4_pci_address_map;
-
-/* arch/sh/drivers/pci/pci-sh7780.c */
-int sh7780_pcic_init(struct sh4_pci_address_map *map);
-
 #endif /* _PCI_SH7780_H_ */
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 0d6ac7a1..54d77cb 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -1,67 +1,156 @@
 /*
- * arch/sh/drivers/pci/pci.c
+ * New-style PCI core.
  *
- * Copyright (c) 2002 M. R. Brown  <mrbrown@linux-sh.org>
- * Copyright (c) 2004 - 2006 Paul Mundt  <lethal@linux-sh.org>
+ * Copyright (c) 2004 - 2009  Paul Mundt
+ * Copyright (c) 2002  M. R. Brown
  *
- * These functions are collected here to reduce duplication of common
- * code amongst the many platform-specific PCI support code files.
- *
- * These routines require the following board-specific routines:
- * void pcibios_fixup_irqs();
- *
- * See include/asm-sh/pci.h for more information.
+ * Modelled after arch/mips/pci/pci.c:
+ *  Copyright (C) 2003, 04 Ralf Baechle (ralf@linux-mips.org)
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
 #include <linux/kernel.h>
+#include <linux/mm.h>
 #include <linux/pci.h>
 #include <linux/init.h>
+#include <linux/types.h>
 #include <linux/dma-debug.h>
-#include <asm/io.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+
+unsigned long PCIBIOS_MIN_IO = 0x0000;
+unsigned long PCIBIOS_MIN_MEM = 0;
+
+/*
+ * The PCI controller list.
+ */
+static struct pci_channel *hose_head, **hose_tail = &hose_head;
+
+static int pci_initialized;
+
+static void __devinit pcibios_scanbus(struct pci_channel *hose)
+{
+	static int next_busno;
+	struct pci_bus *bus;
+
+	bus = pci_scan_bus(next_busno, hose->pci_ops, hose);
+	if (bus) {
+		next_busno = bus->subordinate + 1;
+		/* Don't allow 8-bit bus number overflow inside the hose -
+		   reserve some space for bridges. */
+		if (next_busno > 224)
+			next_busno = 0;
+
+		pci_bus_size_bridges(bus);
+		pci_bus_assign_resources(bus);
+		pci_enable_bridges(bus);
+	}
+}
+
+static DEFINE_MUTEX(pci_scan_mutex);
+
+void __devinit register_pci_controller(struct pci_channel *hose)
+{
+	if (request_resource(&iomem_resource, hose->mem_resource) < 0)
+		goto out;
+	if (request_resource(&ioport_resource, hose->io_resource) < 0) {
+		release_resource(hose->mem_resource);
+		goto out;
+	}
+
+	*hose_tail = hose;
+	hose_tail = &hose->next;
+
+	/*
+	 * Do not panic here but later - this might hapen before console init.
+	 */
+	if (!hose->io_map_base) {
+		printk(KERN_WARNING
+		       "registering PCI controller with io_map_base unset\n");
+	}
+
+	/*
+	 * Scan the bus if it is register after the PCI subsystem
+	 * initialization.
+	 */
+	if (pci_initialized) {
+		mutex_lock(&pci_scan_mutex);
+		pcibios_scanbus(hose);
+		mutex_unlock(&pci_scan_mutex);
+	}
+
+	return;
+
+out:
+	printk(KERN_WARNING
+	       "Skipping PCI bus scan due to resource conflict\n");
+}
 
 static int __init pcibios_init(void)
 {
-	struct pci_channel *p;
-	struct pci_bus *bus;
-	int busno;
+	struct pci_channel *hose;
 
-#ifdef CONFIG_PCI_AUTO
-	/* assign resources */
-	busno = 0;
-	for (p = board_pci_channels; p->pci_ops != NULL; p++)
-		busno = pciauto_assign_resources(busno, p) + 1;
-#endif
-
-	/* scan the buses */
-	busno = 0;
-	for (p = board_pci_channels; p->pci_ops != NULL; p++) {
-		bus = pci_scan_bus(busno, p->pci_ops, p);
-		busno = bus->subordinate + 1;
-	}
+	/* Scan all of the recorded PCI controllers.  */
+	for (hose = hose_head; hose; hose = hose->next)
+		pcibios_scanbus(hose);
 
 	pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq);
 
 	dma_debug_add_bus(&pci_bus_type);
 
+	pci_initialized = 1;
+
 	return 0;
 }
 subsys_initcall(pcibios_init);
 
+static void pcibios_fixup_device_resources(struct pci_dev *dev,
+	struct pci_bus *bus)
+{
+	/* Update device resources.  */
+	struct pci_channel *hose = bus->sysdata;
+	unsigned long offset = 0;
+	int i;
+
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		if (!dev->resource[i].start)
+			continue;
+		if (dev->resource[i].flags & IORESOURCE_PCI_FIXED)
+			continue;
+		if (dev->resource[i].flags & IORESOURCE_IO)
+			offset = hose->io_offset;
+		else if (dev->resource[i].flags & IORESOURCE_MEM)
+			offset = hose->mem_offset;
+
+		dev->resource[i].start += offset;
+		dev->resource[i].end += offset;
+	}
+}
+
 /*
  *  Called after each bus is probed, but before its children
  *  are examined.
  */
-void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus)
+void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 {
-	pci_read_bridge_bases(bus);
-}
+	struct pci_dev *dev = bus->self;
+	struct list_head *ln;
+	struct pci_channel *chan = bus->sysdata;
 
-void pcibios_align_resource(void *data, struct resource *res,
-			    resource_size_t size, resource_size_t align)
-			    __attribute__ ((weak));
+	if (!dev) {
+		bus->resource[0] = chan->io_resource;
+		bus->resource[1] = chan->mem_resource;
+	}
+
+	for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) {
+		dev = pci_dev_b(ln);
+
+		if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+			pcibios_fixup_device_resources(dev, bus);
+	}
+}
 
 /*
  * We need to avoid collisions with `mirrored' VGA ports
@@ -72,14 +161,58 @@
 void pcibios_align_resource(void *data, struct resource *res,
 			    resource_size_t size, resource_size_t align)
 {
-	if (res->flags & IORESOURCE_IO) {
-		resource_size_t start = res->start;
+	struct pci_dev *dev = data;
+	struct pci_channel *chan = dev->sysdata;
+	resource_size_t start = res->start;
 
+	if (res->flags & IORESOURCE_IO) {
+		if (start < PCIBIOS_MIN_IO + chan->io_resource->start)
+			start = PCIBIOS_MIN_IO + chan->io_resource->start;
+
+		/*
+                 * Put everything into 0x00-0xff region modulo 0x400.
+		 */
 		if (start & 0x300) {
 			start = (start + 0x3ff) & ~0x3ff;
 			res->start = start;
 		}
+	} else if (res->flags & IORESOURCE_MEM) {
+		if (start < PCIBIOS_MIN_MEM + chan->mem_resource->start)
+			start = PCIBIOS_MIN_MEM + chan->mem_resource->start;
 	}
+
+	res->start = start;
+}
+
+void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+			 struct resource *res)
+{
+	struct pci_channel *hose = dev->sysdata;
+	unsigned long offset = 0;
+
+	if (res->flags & IORESOURCE_IO)
+		offset = hose->io_offset;
+	else if (res->flags & IORESOURCE_MEM)
+		offset = hose->mem_offset;
+
+	region->start = res->start - offset;
+	region->end = res->end - offset;
+}
+
+void __devinit
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+			struct pci_bus_region *region)
+{
+	struct pci_channel *hose = dev->sysdata;
+	unsigned long offset = 0;
+
+	if (res->flags & IORESOURCE_IO)
+		offset = hose->io_offset;
+	else if (res->flags & IORESOURCE_MEM)
+		offset = hose->mem_offset;
+
+	res->start = region->start + offset;
+	res->end = region->end + offset;
 }
 
 int pcibios_enable_device(struct pci_dev *dev, int mask)
@@ -90,13 +223,21 @@
 
 	pci_read_config_word(dev, PCI_COMMAND, &cmd);
 	old_cmd = cmd;
-	for(idx=0; idx<6; idx++) {
-		if (!(mask & (1 << idx)))
+	for (idx=0; idx < PCI_NUM_RESOURCES; idx++) {
+		/* Only set up the requested stuff */
+		if (!(mask & (1<<idx)))
 			continue;
+
 		r = &dev->resource[idx];
+		if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM)))
+			continue;
+		if ((idx == PCI_ROM_RESOURCE) &&
+				(!(r->flags & IORESOURCE_ROM_ENABLE)))
+			continue;
 		if (!r->start && r->end) {
-			printk(KERN_ERR "PCI: Device %s not available because "
-			       "of resource collisions\n", pci_name(dev));
+			printk(KERN_ERR "PCI: Device %s not available "
+			       "because of resource collisions\n",
+			       pci_name(dev));
 			return -EINVAL;
 		}
 		if (r->flags & IORESOURCE_IO)
@@ -104,10 +245,8 @@
 		if (r->flags & IORESOURCE_MEM)
 			cmd |= PCI_COMMAND_MEMORY;
 	}
-	if (dev->resource[PCI_ROM_RESOURCE].start)
-		cmd |= PCI_COMMAND_MEMORY;
 	if (cmd != old_cmd) {
-		printk(KERN_INFO "PCI: Enabling device %s (%04x -> %04x)\n",
+		printk("PCI: Enabling device %s (%04x -> %04x)\n",
 		       pci_name(dev), old_cmd, cmd);
 		pci_write_config_word(dev, PCI_COMMAND, cmd);
 	}
@@ -140,6 +279,43 @@
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
 
+char * __devinit pcibios_setup(char *str)
+{
+	return str;
+}
+
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+			enum pci_mmap_state mmap_state, int write_combine)
+{
+	/*
+	 * I/O space can be accessed via normal processor loads and stores on
+	 * this platform but for now we elect not to do this and portable
+	 * drivers should not do this anyway.
+	 */
+	if (mmap_state == pci_mmap_io)
+		return -EINVAL;
+
+	/*
+	 * Ignore write-combine; for now only return uncached mappings.
+	 */
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			       vma->vm_end - vma->vm_start,
+			       vma->vm_page_prot);
+}
+
+static void __iomem *ioport_map_pci(struct pci_dev *dev,
+				    unsigned long port, unsigned int nr)
+{
+	struct pci_channel *chan = dev->sysdata;
+
+	if (!chan->io_map_base)
+		chan->io_map_base = generic_io_base;
+
+	return (void __iomem *)(chan->io_map_base + port);
+}
+
 void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
 {
 	resource_size_t start = pci_resource_start(dev, bar);
@@ -151,20 +327,24 @@
 	if (maxlen && len > maxlen)
 		len = maxlen;
 
+	if (flags & IORESOURCE_IO)
+		return ioport_map_pci(dev, start, len);
+
 	/*
 	 * Presently the IORESOURCE_MEM case is a bit special, most
 	 * SH7751 style PCI controllers have PCI memory at a fixed
-	 * location in the address space where no remapping is desired
-	 * (typically at 0xfd000000, but is_pci_memaddr() will know
-	 * best). With the IORESOURCE_MEM case more care has to be taken
+	 * location in the address space where no remapping is desired.
+	 * With the IORESOURCE_MEM case more care has to be taken
 	 * to inhibit page table mapping for legacy cores, but this is
 	 * punted off to __ioremap().
 	 *					-- PFM.
 	 */
-	if (flags & IORESOURCE_IO)
-		return ioport_map(start, len);
-	if (flags & IORESOURCE_MEM)
-		return ioremap(start, len);
+	if (flags & IORESOURCE_MEM) {
+		if (flags & IORESOURCE_CACHEABLE)
+			return ioremap(start, len);
+
+		return ioremap_nocache(start, len);
+	}
 
 	return NULL;
 }
@@ -175,3 +355,10 @@
 	iounmap(addr);
 }
 EXPORT_SYMBOL(pci_iounmap);
+
+#ifdef CONFIG_HOTPLUG
+EXPORT_SYMBOL(pcibios_resource_to_bus);
+EXPORT_SYMBOL(pcibios_bus_to_resource);
+EXPORT_SYMBOL(PCIBIOS_MIN_IO);
+EXPORT_SYMBOL(PCIBIOS_MIN_MEM);
+#endif
diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h
index 4b00b78..b040e1e 100644
--- a/arch/sh/include/asm/atomic-llsc.h
+++ b/arch/sh/include/asm/atomic-llsc.h
@@ -104,4 +104,31 @@
 	: "t");
 }
 
+#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
+
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int c, old;
+	c = atomic_read(v);
+	for (;;) {
+		if (unlikely(c == (u)))
+			break;
+		old = atomic_cmpxchg((v), c, c + (a));
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+
+	return c != (u);
+}
+
 #endif /* __ASM_SH_ATOMIC_LLSC_H */
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index 6327ffb..978b58e 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -45,7 +45,7 @@
 #define atomic_inc(v) atomic_add(1,(v))
 #define atomic_dec(v) atomic_sub(1,(v))
 
-#ifndef CONFIG_GUSA_RB
+#if !defined(CONFIG_GUSA_RB) && !defined(CONFIG_CPU_SH4A)
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
@@ -73,7 +73,7 @@
 
 	return ret != u;
 }
-#endif
+#endif /* !CONFIG_GUSA_RB && !CONFIG_CPU_SH4A */
 
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index 09acbc3..4c5462d 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -75,7 +75,5 @@
 #define flush_cache_vmap(start, end)		flush_cache_all()
 #define flush_cache_vunmap(start, end)		flush_cache_all()
 
-#define HAVE_ARCH_UNMAPPED_AREA
-
 #endif /* __KERNEL__ */
 #endif /* __ASM_SH_CACHEFLUSH_H */
diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index 2f6c962..9fe7d7f 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -1,9 +1,9 @@
 #ifndef __ASM_SH_CLOCK_H
 #define __ASM_SH_CLOCK_H
 
-#include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/seq_file.h>
+#include <linux/cpufreq.h>
 #include <linux/clk.h>
 #include <linux/err.h>
 
@@ -11,9 +11,9 @@
 
 struct clk_ops {
 	void (*init)(struct clk *clk);
-	void (*enable)(struct clk *clk);
+	int (*enable)(struct clk *clk);
 	void (*disable)(struct clk *clk);
-	void (*recalc)(struct clk *clk);
+	unsigned long (*recalc)(struct clk *clk);
 	int (*set_rate)(struct clk *clk, unsigned long rate, int algo_id);
 	int (*set_parent)(struct clk *clk, struct clk *parent);
 	long (*round_rate)(struct clk *clk, unsigned long rate);
@@ -28,43 +28,47 @@
 	struct clk		*parent;
 	struct clk_ops		*ops;
 
-	struct kref		kref;
+	struct list_head	children;
+	struct list_head	sibling;	/* node for children */
+
+	int			usecount;
 
 	unsigned long		rate;
 	unsigned long		flags;
+
+	void __iomem		*enable_reg;
+	unsigned int		enable_bit;
+
 	unsigned long		arch_flags;
+	void			*priv;
+	struct dentry		*dentry;
+	struct cpufreq_frequency_table *freq_table;
 };
 
-#define CLK_ALWAYS_ENABLED	(1 << 0)
-#define CLK_RATE_PROPAGATES	(1 << 1)
+struct clk_lookup {
+	struct list_head	node;
+	const char		*dev_id;
+	const char		*con_id;
+	struct clk		*clk;
+};
+
+#define CLK_ENABLE_ON_INIT	(1 << 0)
 
 /* Should be defined by processor-specific code */
-void arch_init_clk_ops(struct clk_ops **, int type);
+void __deprecated arch_init_clk_ops(struct clk_ops **, int type);
 int __init arch_clk_init(void);
 
 /* arch/sh/kernel/cpu/clock.c */
 int clk_init(void);
-
-void clk_recalc_rate(struct clk *);
-
+unsigned long followparent_recalc(struct clk *);
+void recalculate_root_clocks(void);
+void propagate_rate(struct clk *);
+int clk_reparent(struct clk *child, struct clk *parent);
 int clk_register(struct clk *);
 void clk_unregister(struct clk *);
 
-static inline int clk_always_enable(const char *id)
-{
-	struct clk *clk;
-	int ret;
-
-	clk = clk_get(NULL, id);
-	if (IS_ERR(clk))
-		return PTR_ERR(clk);
-
-	ret = clk_enable(clk);
-	if (ret)
-		clk_put(clk);
-
-	return ret;
-}
+/* arch/sh/kernel/cpu/clock-cpg.c */
+int __init __deprecated cpg_clk_init(void);
 
 /* the exported API, in addition to clk_set_rate */
 /**
@@ -96,4 +100,63 @@
 
 	IP_N1,
 };
+
+struct clk_div_mult_table {
+	unsigned int *divisors;
+	unsigned int nr_divisors;
+	unsigned int *multipliers;
+	unsigned int nr_multipliers;
+};
+
+struct cpufreq_frequency_table;
+void clk_rate_table_build(struct clk *clk,
+			  struct cpufreq_frequency_table *freq_table,
+			  int nr_freqs,
+			  struct clk_div_mult_table *src_table,
+			  unsigned long *bitmap);
+
+long clk_rate_table_round(struct clk *clk,
+			  struct cpufreq_frequency_table *freq_table,
+			  unsigned long rate);
+
+int clk_rate_table_find(struct clk *clk,
+			struct cpufreq_frequency_table *freq_table,
+			unsigned long rate);
+
+#define SH_CLK_MSTP32(_name, _id, _parent, _enable_reg,	\
+	    _enable_bit, _flags)			\
+{							\
+	.name		= _name,			\
+	.id		= _id,				\
+	.parent		= _parent,			\
+	.enable_reg	= (void __iomem *)_enable_reg,	\
+	.enable_bit	= _enable_bit,			\
+	.flags		= _flags,			\
+}
+
+int sh_clk_mstp32_register(struct clk *clks, int nr);
+
+#define SH_CLK_DIV4(_name, _parent, _reg, _shift, _div_bitmap, _flags)	\
+{									\
+	.name = _name,							\
+	.parent = _parent,						\
+	.enable_reg = (void __iomem *)_reg,				\
+	.enable_bit = _shift,						\
+	.arch_flags = _div_bitmap,					\
+	.flags = _flags,						\
+}
+
+int sh_clk_div4_register(struct clk *clks, int nr,
+			 struct clk_div_mult_table *table);
+
+#define SH_CLK_DIV6(_name, _parent, _reg, _flags)	\
+{							\
+	.name = _name,					\
+	.parent = _parent,				\
+	.enable_reg = (void __iomem *)_reg,		\
+	.flags = _flags,				\
+}
+
+int sh_clk_div6_register(struct clk *clks, int nr);
+
 #endif /* __ASM_SH_CLOCK_H */
diff --git a/arch/sh/include/asm/cmpxchg-llsc.h b/arch/sh/include/asm/cmpxchg-llsc.h
index 0fac3da..4713666 100644
--- a/arch/sh/include/asm/cmpxchg-llsc.h
+++ b/arch/sh/include/asm/cmpxchg-llsc.h
@@ -55,7 +55,7 @@
 		"mov		%0, %1				\n\t"
 		"cmp/eq		%1, %3				\n\t"
 		"bf		2f				\n\t"
-		"mov		%3, %0				\n\t"
+		"mov		%4, %0				\n\t"
 		"2:						\n\t"
 		"movco.l	%0, @%2				\n\t"
 		"bf		1b				\n\t"
diff --git a/arch/sh/include/asm/device.h b/arch/sh/include/asm/device.h
index efd511d..8688a88 100644
--- a/arch/sh/include/asm/device.h
+++ b/arch/sh/include/asm/device.h
@@ -10,3 +10,5 @@
 int platform_resource_setup_memory(struct platform_device *pdev,
 				   char *name, unsigned long memsize);
 
+void plat_early_device_setup(void);
+
diff --git a/arch/sh/include/asm/hd64461.h b/arch/sh/include/asm/hd64461.h
index 52b4b62..977355f 100644
--- a/arch/sh/include/asm/hd64461.h
+++ b/arch/sh/include/asm/hd64461.h
@@ -13,18 +13,20 @@
 #define	HD64461_PCC_WINDOW	0x01000000
 
 /* Area 6 - Slot 0 - memory and/or IO card */
-#define	HD64461_PCC0_BASE	(CONFIG_HD64461_IOBASE + 0x8000000)
+#define HD64461_IOBASE		0xb0000000
+#define HD64461_IO_OFFSET(x)	(HD64461_IOBASE + (x))
+#define	HD64461_PCC0_BASE	HD64461_IO_OFFSET(0x8000000)
 #define	HD64461_PCC0_ATTR	(HD64461_PCC0_BASE)				/* 0xb80000000 */
 #define	HD64461_PCC0_COMM	(HD64461_PCC0_BASE+HD64461_PCC_WINDOW)		/* 0xb90000000 */
 #define	HD64461_PCC0_IO		(HD64461_PCC0_BASE+2*HD64461_PCC_WINDOW)	/* 0xba0000000 */
 
 /* Area 5 - Slot 1 - memory card only */
-#define	HD64461_PCC1_BASE	(CONFIG_HD64461_IOBASE + 0x4000000)
+#define	HD64461_PCC1_BASE	HD64461_IO_OFFSET(0x4000000)
 #define	HD64461_PCC1_ATTR	(HD64461_PCC1_BASE)				/* 0xb4000000 */
 #define	HD64461_PCC1_COMM	(HD64461_PCC1_BASE+HD64461_PCC_WINDOW)		/* 0xb5000000 */
 
 /* Standby Control Register for HD64461 */
-#define	HD64461_STBCR			CONFIG_HD64461_IOBASE
+#define	HD64461_STBCR			HD64461_IO_OFFSET(0x00000000)
 #define	HD64461_STBCR_CKIO_STBY		0x2000
 #define	HD64461_STBCR_SAFECKE_IST	0x1000
 #define	HD64461_STBCR_SLCKE_IST		0x0800
@@ -41,19 +43,19 @@
 #define	HD64461_STBCR_SURTST		0x0001
 
 /* System Configuration Register */
-#define	HD64461_SYSCR		(CONFIG_HD64461_IOBASE + 0x02)
+#define	HD64461_SYSCR		HD64461_IO_OFFSET(0x02)
 
 /* CPU Data Bus Control Register */
-#define	HD64461_SCPUCR		(CONFIG_HD64461_IOBASE + 0x04)
+#define	HD64461_SCPUCR		HD64461_IO_OFFSET(0x04)
 
 /* Base Address Register */
-#define	HD64461_LCDCBAR		(CONFIG_HD64461_IOBASE + 0x1000)
+#define	HD64461_LCDCBAR		HD64461_IO_OFFSET(0x1000)
 
 /* Line increment address */
-#define	HD64461_LCDCLOR		(CONFIG_HD64461_IOBASE + 0x1002)
+#define	HD64461_LCDCLOR		HD64461_IO_OFFSET(0x1002)
 
 /* Controls LCD controller */
-#define	HD64461_LCDCCR		(CONFIG_HD64461_IOBASE + 0x1004)
+#define	HD64461_LCDCCR		HD64461_IO_OFFSET(0x1004)
 
 /* LCCDR control bits */
 #define	HD64461_LCDCCR_STBACK	0x0400	/* Standby Back */
@@ -64,30 +66,30 @@
 #define	HD64461_LCDCCR_SPON	0x0010	/* Start Power On */
 
 /* Controls LCD (1) */
-#define	HD64461_LDR1		(CONFIG_HD64461_IOBASE + 0x1010)
+#define	HD64461_LDR1		HD64461_IO_OFFSET(0x1010)
 #define	HD64461_LDR1_DON	0x01	/* Display On */
 #define	HD64461_LDR1_DINV	0x80	/* Display Invert */
 
 /* Controls LCD (2) */
-#define	HD64461_LDR2		(CONFIG_HD64461_IOBASE + 0x1012)
-#define	HD64461_LDHNCR		(CONFIG_HD64461_IOBASE + 0x1014)	/* Number of horizontal characters */
-#define	HD64461_LDHNSR		(CONFIG_HD64461_IOBASE + 0x1016)	/* Specify output start position + width of CL1 */
-#define	HD64461_LDVNTR		(CONFIG_HD64461_IOBASE + 0x1018)	/* Specify total vertical lines */
-#define	HD64461_LDVNDR		(CONFIG_HD64461_IOBASE + 0x101a)	/* specify number of display vertical lines */
-#define	HD64461_LDVSPR		(CONFIG_HD64461_IOBASE + 0x101c)	/* specify vertical synchronization pos and AC nr */
+#define	HD64461_LDR2		HD64461_IO_OFFSET(0x1012)
+#define	HD64461_LDHNCR		HD64461_IO_OFFSET(0x1014)	/* Number of horizontal characters */
+#define	HD64461_LDHNSR		HD64461_IO_OFFSET(0x1016)	/* Specify output start position + width of CL1 */
+#define	HD64461_LDVNTR		HD64461_IO_OFFSET(0x1018)	/* Specify total vertical lines */
+#define	HD64461_LDVNDR		HD64461_IO_OFFSET(0x101a)	/* specify number of display vertical lines */
+#define	HD64461_LDVSPR		HD64461_IO_OFFSET(0x101c)	/* specify vertical synchronization pos and AC nr */
 
 /* Controls LCD (3) */
-#define	HD64461_LDR3		(CONFIG_HD64461_IOBASE + 0x101e)
+#define	HD64461_LDR3		HD64461_IO_OFFSET(0x101e)
 
 /* Palette Registers */
-#define	HD64461_CPTWAR		(CONFIG_HD64461_IOBASE + 0x1030)	/* Color Palette Write Address Register */
-#define	HD64461_CPTWDR		(CONFIG_HD64461_IOBASE + 0x1032)	/* Color Palette Write Data Register */
-#define	HD64461_CPTRAR		(CONFIG_HD64461_IOBASE + 0x1034)	/* Color Palette Read Address Register */
-#define	HD64461_CPTRDR		(CONFIG_HD64461_IOBASE + 0x1036)	/* Color Palette Read Data Register */
+#define	HD64461_CPTWAR		HD64461_IO_OFFSET(0x1030)	/* Color Palette Write Address Register */
+#define	HD64461_CPTWDR		HD64461_IO_OFFSET(0x1032)	/* Color Palette Write Data Register */
+#define	HD64461_CPTRAR		HD64461_IO_OFFSET(0x1034)	/* Color Palette Read Address Register */
+#define	HD64461_CPTRDR		HD64461_IO_OFFSET(0x1036)	/* Color Palette Read Data Register */
 
-#define	HD64461_GRDOR		(CONFIG_HD64461_IOBASE + 0x1040)	/* Display Resolution Offset Register */
-#define	HD64461_GRSCR		(CONFIG_HD64461_IOBASE + 0x1042)	/* Solid Color Register */
-#define	HD64461_GRCFGR		(CONFIG_HD64461_IOBASE + 0x1044)	/* Accelerator Configuration Register */
+#define	HD64461_GRDOR		HD64461_IO_OFFSET(0x1040)	/* Display Resolution Offset Register */
+#define	HD64461_GRSCR		HD64461_IO_OFFSET(0x1042)	/* Solid Color Register */
+#define	HD64461_GRCFGR		HD64461_IO_OFFSET(0x1044)	/* Accelerator Configuration Register */
 
 #define	HD64461_GRCFGR_ACCSTATUS	0x10	/* Accelerator Status */
 #define	HD64461_GRCFGR_ACCRESET		0x08	/* Accelerator Reset */
@@ -97,41 +99,41 @@
 #define	HD64461_GRCFGR_COLORDEPTH8	0x01	/* Sets Colordepth 8 for Accelerator */
 
 /* Line Drawing Registers */
-#define	HD64461_LNSARH		(CONFIG_HD64461_IOBASE + 0x1046)	/* Line Start Address Register (H) */
-#define	HD64461_LNSARL		(CONFIG_HD64461_IOBASE + 0x1048)	/* Line Start Address Register (L) */
-#define	HD64461_LNAXLR		(CONFIG_HD64461_IOBASE + 0x104a)	/* Axis Pixel Length Register */
-#define	HD64461_LNDGR		(CONFIG_HD64461_IOBASE + 0x104c)	/* Diagonal Register */
-#define	HD64461_LNAXR		(CONFIG_HD64461_IOBASE + 0x104e)	/* Axial Register */
-#define	HD64461_LNERTR		(CONFIG_HD64461_IOBASE + 0x1050)	/* Start Error Term Register */
-#define	HD64461_LNMDR		(CONFIG_HD64461_IOBASE + 0x1052)	/* Line Mode Register */
+#define	HD64461_LNSARH		HD64461_IO_OFFSET(0x1046)	/* Line Start Address Register (H) */
+#define	HD64461_LNSARL		HD64461_IO_OFFSET(0x1048)	/* Line Start Address Register (L) */
+#define	HD64461_LNAXLR		HD64461_IO_OFFSET(0x104a)	/* Axis Pixel Length Register */
+#define	HD64461_LNDGR		HD64461_IO_OFFSET(0x104c)	/* Diagonal Register */
+#define	HD64461_LNAXR		HD64461_IO_OFFSET(0x104e)	/* Axial Register */
+#define	HD64461_LNERTR		HD64461_IO_OFFSET(0x1050)	/* Start Error Term Register */
+#define	HD64461_LNMDR		HD64461_IO_OFFSET(0x1052)	/* Line Mode Register */
 
 /* BitBLT Registers */
-#define	HD64461_BBTSSARH	(CONFIG_HD64461_IOBASE + 0x1054)	/* Source Start Address Register (H) */
-#define	HD64461_BBTSSARL	(CONFIG_HD64461_IOBASE + 0x1056)	/* Source Start Address Register (L) */
-#define	HD64461_BBTDSARH	(CONFIG_HD64461_IOBASE + 0x1058)	/* Destination Start Address Register (H) */
-#define	HD64461_BBTDSARL	(CONFIG_HD64461_IOBASE + 0x105a)	/* Destination Start Address Register (L) */
-#define	HD64461_BBTDWR		(CONFIG_HD64461_IOBASE + 0x105c)	/* Destination Block Width Register */
-#define	HD64461_BBTDHR		(CONFIG_HD64461_IOBASE + 0x105e)	/* Destination Block Height Register */
-#define	HD64461_BBTPARH		(CONFIG_HD64461_IOBASE + 0x1060)	/* Pattern Start Address Register (H) */
-#define	HD64461_BBTPARL		(CONFIG_HD64461_IOBASE + 0x1062)	/* Pattern Start Address Register (L) */
-#define	HD64461_BBTMARH		(CONFIG_HD64461_IOBASE + 0x1064)	/* Mask Start Address Register (H) */
-#define	HD64461_BBTMARL		(CONFIG_HD64461_IOBASE + 0x1066)	/* Mask Start Address Register (L) */
-#define	HD64461_BBTROPR		(CONFIG_HD64461_IOBASE + 0x1068)	/* ROP Register */
-#define	HD64461_BBTMDR		(CONFIG_HD64461_IOBASE + 0x106a)	/* BitBLT Mode Register */
+#define	HD64461_BBTSSARH	HD64461_IO_OFFSET(0x1054)	/* Source Start Address Register (H) */
+#define	HD64461_BBTSSARL	HD64461_IO_OFFSET(0x1056)	/* Source Start Address Register (L) */
+#define	HD64461_BBTDSARH	HD64461_IO_OFFSET(0x1058)	/* Destination Start Address Register (H) */
+#define	HD64461_BBTDSARL	HD64461_IO_OFFSET(0x105a)	/* Destination Start Address Register (L) */
+#define	HD64461_BBTDWR		HD64461_IO_OFFSET(0x105c)	/* Destination Block Width Register */
+#define	HD64461_BBTDHR		HD64461_IO_OFFSET(0x105e)	/* Destination Block Height Register */
+#define	HD64461_BBTPARH		HD64461_IO_OFFSET(0x1060)	/* Pattern Start Address Register (H) */
+#define	HD64461_BBTPARL		HD64461_IO_OFFSET(0x1062)	/* Pattern Start Address Register (L) */
+#define	HD64461_BBTMARH		HD64461_IO_OFFSET(0x1064)	/* Mask Start Address Register (H) */
+#define	HD64461_BBTMARL		HD64461_IO_OFFSET(0x1066)	/* Mask Start Address Register (L) */
+#define	HD64461_BBTROPR		HD64461_IO_OFFSET(0x1068)	/* ROP Register */
+#define	HD64461_BBTMDR		HD64461_IO_OFFSET(0x106a)	/* BitBLT Mode Register */
 
 /* PC Card Controller Registers */
 /* Maps to Physical Area 6 */
-#define	HD64461_PCC0ISR		(CONFIG_HD64461_IOBASE + 0x2000)	/* socket 0 interface status */
-#define	HD64461_PCC0GCR		(CONFIG_HD64461_IOBASE + 0x2002)	/* socket 0 general control */
-#define	HD64461_PCC0CSCR	(CONFIG_HD64461_IOBASE + 0x2004)	/* socket 0 card status change */
-#define	HD64461_PCC0CSCIER	(CONFIG_HD64461_IOBASE + 0x2006)	/* socket 0 card status change interrupt enable */
-#define	HD64461_PCC0SCR		(CONFIG_HD64461_IOBASE + 0x2008)	/* socket 0 software control */
+#define	HD64461_PCC0ISR		HD64461_IO_OFFSET(0x2000)	/* socket 0 interface status */
+#define	HD64461_PCC0GCR		HD64461_IO_OFFSET(0x2002)	/* socket 0 general control */
+#define	HD64461_PCC0CSCR	HD64461_IO_OFFSET(0x2004)	/* socket 0 card status change */
+#define	HD64461_PCC0CSCIER	HD64461_IO_OFFSET(0x2006)	/* socket 0 card status change interrupt enable */
+#define	HD64461_PCC0SCR		HD64461_IO_OFFSET(0x2008)	/* socket 0 software control */
 /* Maps to Physical Area 5 */
-#define	HD64461_PCC1ISR		(CONFIG_HD64461_IOBASE + 0x2010)	/* socket 1 interface status */
-#define	HD64461_PCC1GCR		(CONFIG_HD64461_IOBASE + 0x2012)	/* socket 1 general control */
-#define	HD64461_PCC1CSCR	(CONFIG_HD64461_IOBASE + 0x2014)	/* socket 1 card status change */
-#define	HD64461_PCC1CSCIER	(CONFIG_HD64461_IOBASE + 0x2016)	/* socket 1 card status change interrupt enable */
-#define	HD64461_PCC1SCR		(CONFIG_HD64461_IOBASE + 0x2018)	/* socket 1 software control */
+#define	HD64461_PCC1ISR		HD64461_IO_OFFSET(0x2010)	/* socket 1 interface status */
+#define	HD64461_PCC1GCR		HD64461_IO_OFFSET(0x2012)	/* socket 1 general control */
+#define	HD64461_PCC1CSCR	HD64461_IO_OFFSET(0x2014)	/* socket 1 card status change */
+#define	HD64461_PCC1CSCIER	HD64461_IO_OFFSET(0x2016)	/* socket 1 card status change interrupt enable */
+#define	HD64461_PCC1SCR		HD64461_IO_OFFSET(0x2018)	/* socket 1 software control */
 
 /* PCC Interface Status Register */
 #define	HD64461_PCCISR_READY		0x80	/* card ready */
@@ -189,41 +191,41 @@
 #define	HD64461_PCCSCR_SWP		0x01	/* write protect */
 
 /* PCC0 Output Pins Control Register */
-#define	HD64461_P0OCR		(CONFIG_HD64461_IOBASE + 0x202a)
+#define	HD64461_P0OCR		HD64461_IO_OFFSET(0x202a)
 
 /* PCC1 Output Pins Control Register */
-#define	HD64461_P1OCR		(CONFIG_HD64461_IOBASE + 0x202c)
+#define	HD64461_P1OCR		HD64461_IO_OFFSET(0x202c)
 
 /* PC Card General Control Register */
-#define	HD64461_PGCR		(CONFIG_HD64461_IOBASE + 0x202e)
+#define	HD64461_PGCR		HD64461_IO_OFFSET(0x202e)
 
 /* Port Control Registers */
-#define	HD64461_GPACR		(CONFIG_HD64461_IOBASE + 0x4000)	/* Port A - Handles IRDA/TIMER */
-#define	HD64461_GPBCR		(CONFIG_HD64461_IOBASE + 0x4002)	/* Port B - Handles UART */
-#define	HD64461_GPCCR		(CONFIG_HD64461_IOBASE + 0x4004)	/* Port C - Handles PCMCIA 1 */
-#define	HD64461_GPDCR		(CONFIG_HD64461_IOBASE + 0x4006)	/* Port D - Handles PCMCIA 1 */
+#define	HD64461_GPACR		HD64461_IO_OFFSET(0x4000)	/* Port A - Handles IRDA/TIMER */
+#define	HD64461_GPBCR		HD64461_IO_OFFSET(0x4002)	/* Port B - Handles UART */
+#define	HD64461_GPCCR		HD64461_IO_OFFSET(0x4004)	/* Port C - Handles PCMCIA 1 */
+#define	HD64461_GPDCR		HD64461_IO_OFFSET(0x4006)	/* Port D - Handles PCMCIA 1 */
 
 /* Port Control Data Registers */
-#define	HD64461_GPADR		(CONFIG_HD64461_IOBASE + 0x4010)	/* A */
-#define	HD64461_GPBDR		(CONFIG_HD64461_IOBASE + 0x4012)	/* B */
-#define	HD64461_GPCDR		(CONFIG_HD64461_IOBASE + 0x4014)	/* C */
-#define	HD64461_GPDDR		(CONFIG_HD64461_IOBASE + 0x4016)	/* D */
+#define	HD64461_GPADR		HD64461_IO_OFFSET(0x4010)	/* A */
+#define	HD64461_GPBDR		HD64461_IO_OFFSET(0x4012)	/* B */
+#define	HD64461_GPCDR		HD64461_IO_OFFSET(0x4014)	/* C */
+#define	HD64461_GPDDR		HD64461_IO_OFFSET(0x4016)	/* D */
 
 /* Interrupt Control Registers */
-#define	HD64461_GPAICR		(CONFIG_HD64461_IOBASE + 0x4020)	/* A */
-#define	HD64461_GPBICR		(CONFIG_HD64461_IOBASE + 0x4022)	/* B */
-#define	HD64461_GPCICR		(CONFIG_HD64461_IOBASE + 0x4024)	/* C */
-#define	HD64461_GPDICR		(CONFIG_HD64461_IOBASE + 0x4026)	/* D */
+#define	HD64461_GPAICR		HD64461_IO_OFFSET(0x4020)	/* A */
+#define	HD64461_GPBICR		HD64461_IO_OFFSET(0x4022)	/* B */
+#define	HD64461_GPCICR		HD64461_IO_OFFSET(0x4024)	/* C */
+#define	HD64461_GPDICR		HD64461_IO_OFFSET(0x4026)	/* D */
 
 /* Interrupt Status Registers */
-#define	HD64461_GPAISR		(CONFIG_HD64461_IOBASE + 0x4040)	/* A */
-#define	HD64461_GPBISR		(CONFIG_HD64461_IOBASE + 0x4042)	/* B */
-#define	HD64461_GPCISR		(CONFIG_HD64461_IOBASE + 0x4044)	/* C */
-#define	HD64461_GPDISR		(CONFIG_HD64461_IOBASE + 0x4046)	/* D */
+#define	HD64461_GPAISR		HD64461_IO_OFFSET(0x4040)	/* A */
+#define	HD64461_GPBISR		HD64461_IO_OFFSET(0x4042)	/* B */
+#define	HD64461_GPCISR		HD64461_IO_OFFSET(0x4044)	/* C */
+#define	HD64461_GPDISR		HD64461_IO_OFFSET(0x4046)	/* D */
 
 /* Interrupt Request Register & Interrupt Mask Register */
-#define	HD64461_NIRR		(CONFIG_HD64461_IOBASE + 0x5000)
-#define	HD64461_NIMR		(CONFIG_HD64461_IOBASE + 0x5002)
+#define	HD64461_NIRR		HD64461_IO_OFFSET(0x5000)
+#define	HD64461_NIMR		HD64461_IO_OFFSET(0x5002)
 
 #define	HD64461_IRQBASE		OFFCHIP_IRQ_BASE
 #define	OFFCHIP_IRQ_BASE	64
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index 0454f8d..2534814 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -123,10 +123,15 @@
 
 __BUILD_MEMORY_STRING(b, u8)
 __BUILD_MEMORY_STRING(w, u16)
-__BUILD_MEMORY_STRING(q, u64)
 
+#ifdef CONFIG_SUPERH32
 void __raw_writesl(void __iomem *addr, const void *data, int longlen);
 void __raw_readsl(const void __iomem *addr, void *data, int longlen);
+#else
+__BUILD_MEMORY_STRING(l, u32)
+#endif
+
+__BUILD_MEMORY_STRING(q, u64)
 
 #define writesb			__raw_writesb
 #define writesw			__raw_writesw
@@ -224,17 +229,6 @@
 			unsigned long flags);
 void __iounmap(void __iomem *addr);
 
-/* arch/sh/mm/ioremap_64.c */
-unsigned long onchip_remap(unsigned long addr, unsigned long size,
-			   const char *name);
-extern void onchip_unmap(unsigned long vaddr);
-#else
-#define __ioremap(offset, size, flags)	((void __iomem *)(offset))
-#define __iounmap(addr)			do { } while (0)
-#define onchip_remap(addr, size, name)	(addr)
-#define onchip_unmap(addr)		do { } while (0)
-#endif /* CONFIG_MMU */
-
 static inline void __iomem *
 __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags)
 {
@@ -268,6 +262,10 @@
 
 	return __ioremap(offset, size, flags);
 }
+#else
+#define __ioremap_mode(offset, size, flags)	((void __iomem *)(offset))
+#define __iounmap(addr)				do { } while (0)
+#endif /* CONFIG_MMU */
 
 #define ioremap(offset, size)				\
 	__ioremap_mode((offset), (size), 0)
diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h
index d319baa..a2b8c99 100644
--- a/arch/sh/include/asm/irq.h
+++ b/arch/sh/include/asm/irq.h
@@ -8,7 +8,8 @@
  * advised to cap this at the hard limit that they're interested in
  * through the machvec.
  */
-#define NR_IRQS 256
+#define NR_IRQS			256
+#define NR_IRQS_LEGACY		8	/* Legacy external IRQ0-7 */
 
 /*
  * Convert back and forth between INTEVT and IRQ values.
diff --git a/arch/sh/include/asm/kprobes.h b/arch/sh/include/asm/kprobes.h
index 613644a..036c331 100644
--- a/arch/sh/include/asm/kprobes.h
+++ b/arch/sh/include/asm/kprobes.h
@@ -6,7 +6,7 @@
 #include <linux/types.h>
 #include <linux/ptrace.h>
 
-typedef u16 kprobe_opcode_t;
+typedef insn_size_t kprobe_opcode_t;
 #define BREAKPOINT_INSTRUCTION	0xc33a
 
 #define MAX_INSN_SIZE 16
diff --git a/arch/sh/include/asm/machvec.h b/arch/sh/include/asm/machvec.h
index 64b1c16..84dd377 100644
--- a/arch/sh/include/asm/machvec.h
+++ b/arch/sh/include/asm/machvec.h
@@ -46,6 +46,9 @@
 
 	void __iomem *(*mv_ioport_map)(unsigned long port, unsigned int size);
 	void (*mv_ioport_unmap)(void __iomem *);
+
+	int (*mv_clk_init)(void);
+	int (*mv_mode_pins)(void);
 };
 
 extern struct sh_machine_vector sh_mv;
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index df1d383..ae0da6f 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -17,54 +17,29 @@
  * external) PCI controllers.
  */
 struct pci_channel {
-	struct pci_ops *pci_ops;
-	struct resource *io_resource;
-	struct resource *mem_resource;
-	int first_devfn;
-	int last_devfn;
+	struct pci_channel	*next;
+
+	struct pci_ops		*pci_ops;
+	struct resource		*io_resource;
+	struct resource		*mem_resource;
+
+	unsigned long		io_offset;
+	unsigned long		mem_offset;
+
+	unsigned long		reg_base;
+
+	unsigned long		io_map_base;
 };
 
-/*
- * Each board initializes this array and terminates it with a NULL entry.
- */
-extern struct pci_channel board_pci_channels[];
+extern void register_pci_controller(struct pci_channel *hose);
 
-#define PCIBIOS_MIN_IO		board_pci_channels->io_resource->start
-#define PCIBIOS_MIN_MEM		board_pci_channels->mem_resource->start
-
-/*
- * I/O routine helpers
- */
-#if defined(CONFIG_CPU_SUBTYPE_SH7780) || defined(CONFIG_CPU_SUBTYPE_SH7785)
-#define PCI_IO_AREA		0xFE400000
-#define PCI_IO_SIZE		0x00400000
-#elif defined(CONFIG_CPU_SH5)
-extern unsigned long PCI_IO_AREA;
-#define PCI_IO_SIZE		0x00010000
-#else
-#define PCI_IO_AREA		0xFE240000
-#define PCI_IO_SIZE		0x00040000
-#endif
-
-#define PCI_MEM_SIZE		0x01000000
-
-#define SH4_PCIIOBR_MASK	0xFFFC0000
-#define pci_ioaddr(addr)	(PCI_IO_AREA + (addr & ~SH4_PCIIOBR_MASK))
-
-#if defined(CONFIG_PCI)
-#define is_pci_ioaddr(port)		\
-	(((port) >= PCIBIOS_MIN_IO) &&	\
-	 ((port) < (PCIBIOS_MIN_IO + PCI_IO_SIZE)))
-#define is_pci_memaddr(port)		\
-	(((port) >= PCIBIOS_MIN_MEM) &&	\
-	 ((port) < (PCIBIOS_MIN_MEM + PCI_MEM_SIZE)))
-#else
-#define is_pci_ioaddr(port)	(0)
-#define is_pci_memaddr(port)	(0)
-#endif
+extern unsigned long PCIBIOS_MIN_IO, PCIBIOS_MIN_MEM;
 
 struct pci_dev;
 
+#define HAVE_PCI_MMAP
+extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+	enum pci_mmap_state mmap_state, int write_combine);
 extern void pcibios_set_master(struct pci_dev *dev);
 
 static inline void pcibios_penalize_isa_irq(int irq, int active)
@@ -114,31 +89,76 @@
 #endif
 
 #ifdef CONFIG_PCI
+/*
+ * None of the SH PCI controllers support MWI, it is always treated as a
+ * direct memory write.
+ */
+#define PCI_DISABLE_MWI
+
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
 {
-	*strat = PCI_DMA_BURST_INFINITY;
-	*strategy_parameter = ~0UL;
+	unsigned long cacheline_size;
+	u8 byte;
+
+	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte);
+
+	if (byte == 0)
+		cacheline_size = L1_CACHE_BYTES;
+	else
+		cacheline_size = byte << 2;
+
+	*strat = PCI_DMA_BURST_MULTIPLE;
+	*strategy_parameter = cacheline_size;
 }
 #endif
 
-/* Board-specific fixup routines. */
-void pcibios_fixup(void);
-int pcibios_init_platform(void);
-int pcibios_map_platform_irq(struct pci_dev *dev, u8 slot, u8 pin);
+#ifdef CONFIG_SUPERH32
+/*
+ * If we're on an SH7751 or SH7780 PCI controller, PCI memory is mapped
+ * at the end of the address space in a special non-translatable area.
+ */
+#define PCI_MEM_FIXED_START	0xfd000000
+#define PCI_MEM_FIXED_END	(PCI_MEM_FIXED_START + 0x01000000)
 
-#ifdef CONFIG_PCI_AUTO
-int pciauto_assign_resources(int busno, struct pci_channel *hose);
+#define is_pci_memory_fixed_range(s, e)	\
+	((s) >= PCI_MEM_FIXED_START && (e) < PCI_MEM_FIXED_END)
+#else
+#define is_pci_memory_fixed_range(s, e)	(0)
 #endif
 
-#endif /* __KERNEL__ */
+/* Board-specific fixup routines. */
+int pcibios_map_platform_irq(struct pci_dev *dev, u8 slot, u8 pin);
 
-/* generic pci stuff */
-#include <asm-generic/pci.h>
+extern void pcibios_resource_to_bus(struct pci_dev *dev,
+	struct pci_bus_region *region, struct resource *res);
+
+extern void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+				    struct pci_bus_region *region);
+
+static inline struct resource *
+pcibios_select_root(struct pci_dev *pdev, struct resource *res)
+{
+	struct resource *root = NULL;
+
+	if (res->flags & IORESOURCE_IO)
+		root = &ioport_resource;
+	if (res->flags & IORESOURCE_MEM)
+		root = &iomem_resource;
+
+	return root;
+}
+
+/* Chances are this interrupt is wired PC-style ...  */
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+	return channel ? 15 : 14;
+}
 
 /* generic DMA-mapping stuff */
 #include <asm-generic/pci-dma-compat.h>
 
+#endif /* __KERNEL__ */
 #endif /* __ASM_SH_PCI_H */
 
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
index b517ae0..2a011b1 100644
--- a/arch/sh/include/asm/pgtable.h
+++ b/arch/sh/include/asm/pgtable.h
@@ -154,6 +154,10 @@
 #define kmap_coherent_init()	do { } while (0)
 #endif
 
+/* arch/sh/mm/mmap.c */
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
 #include <asm-generic/pgtable.h>
 
 #endif /* __ASM_SH_PGTABLE_H */
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 1fd58b4..ff7daaf 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -32,7 +32,7 @@
 
 	/* SH-4A types */
 	CPU_SH7763, CPU_SH7770, CPU_SH7780, CPU_SH7781, CPU_SH7785, CPU_SH7786,
-	CPU_SH7723, CPU_SHX3,
+	CPU_SH7723, CPU_SH7724, CPU_SHX3,
 
 	/* SH4AL-DSP types */
 	CPU_SH7343, CPU_SH7722, CPU_SH7366,
@@ -94,6 +94,27 @@
 const char *get_cpu_subtype(struct sh_cpuinfo *c);
 extern const struct seq_operations cpuinfo_op;
 
+/* processor boot mode configuration */
+#define MODE_PIN0 (1 << 0)
+#define MODE_PIN1 (1 << 1)
+#define MODE_PIN2 (1 << 2)
+#define MODE_PIN3 (1 << 3)
+#define MODE_PIN4 (1 << 4)
+#define MODE_PIN5 (1 << 5)
+#define MODE_PIN6 (1 << 6)
+#define MODE_PIN7 (1 << 7)
+#define MODE_PIN8 (1 << 8)
+#define MODE_PIN9 (1 << 9)
+#define MODE_PIN10 (1 << 10)
+#define MODE_PIN11 (1 << 11)
+#define MODE_PIN12 (1 << 12)
+#define MODE_PIN13 (1 << 13)
+#define MODE_PIN14 (1 << 14)
+#define MODE_PIN15 (1 << 15)
+
+int generic_mode_pins(void);
+int test_mode_pin(int pin);
+
 #ifdef CONFIG_VSYSCALL
 int vsyscall_init(void);
 #else
diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h
index 68e20ff..1dc12cb 100644
--- a/arch/sh/include/asm/ptrace.h
+++ b/arch/sh/include/asm/ptrace.h
@@ -102,6 +102,11 @@
 #define	PTRACE_GETDSPREGS	55	/* DSP registers */
 #define	PTRACE_SETDSPREGS	56
 
+#define PT_TEXT_END_ADDR 	240
+#define PT_TEXT_ADDR 		244	/* &(struct user)->start_code */
+#define PT_DATA_ADDR 		248	/* &(struct user)->start_data */
+#define PT_TEXT_LEN		252
+
 #ifdef __KERNEL__
 #include <asm/addrspace.h>
 
diff --git a/arch/sh/include/asm/rtc.h b/arch/sh/include/asm/rtc.h
index f7b010d..52b0c2d 100644
--- a/arch/sh/include/asm/rtc.h
+++ b/arch/sh/include/asm/rtc.h
@@ -6,6 +6,17 @@
 extern void (*rtc_sh_get_time)(struct timespec *);
 extern int (*rtc_sh_set_time)(const time_t);
 
+/* some dummy definitions */
+#define RTC_BATT_BAD 0x100	/* battery bad */
+#define RTC_SQWE 0x08		/* enable square-wave output */
+#define RTC_DM_BINARY 0x04	/* all time/date values are BCD if clear */
+#define RTC_24H 0x02		/* 24 hour mode - else hours bit 7 means pm */
+#define RTC_DST_EN 0x01	        /* auto switch DST - works f. USA only */
+
+struct rtc_time;
+unsigned int get_rtc_time(struct rtc_time *);
+int set_rtc_time(struct rtc_time *);
+
 #define RTC_CAP_4_DIGIT_YEAR	(1 << 0)
 
 struct sh_rtc_platform_info {
diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h
index 6028356..a28c9f0 100644
--- a/arch/sh/include/asm/spinlock.h
+++ b/arch/sh/include/asm/spinlock.h
@@ -26,7 +26,7 @@
 #define __raw_spin_is_locked(x)		((x)->lock <= 0)
 #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
 #define __raw_spin_unlock_wait(x) \
-	do { cpu_relax(); } while ((x)->lock)
+	do { while (__raw_spin_is_locked(x)) cpu_relax(); } while (0)
 
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
diff --git a/arch/sh/include/asm/swab.h b/arch/sh/include/asm/swab.h
index e693159..0e08fe5 100644
--- a/arch/sh/include/asm/swab.h
+++ b/arch/sh/include/asm/swab.h
@@ -14,15 +14,15 @@
 {
 	__asm__(
 #ifdef __SH5__
-		"byterev	%0, %0\n\t"
+		"byterev	%1, %0\n\t"
 		"shari		%0, 32, %0"
 #else
-		"swap.b		%0, %0\n\t"
+		"swap.b		%1, %0\n\t"
 		"swap.w		%0, %0\n\t"
 		"swap.b		%0, %0"
 #endif
 		: "=r" (x)
-		: "0" (x));
+		: "r" (x));
 
 	return x;
 }
@@ -32,13 +32,13 @@
 {
 	__asm__(
 #ifdef __SH5__
-		"byterev	%0, %0\n\t"
+		"byterev	%1, %0\n\t"
 		"shari		%0, 32, %0"
 #else
-		"swap.b		%0, %0"
+		"swap.b		%1, %0"
 #endif
 		: "=r" (x)
-		:  "0" (x));
+		:  "r" (x));
 
 	return x;
 }
diff --git a/arch/sh/include/asm/system_32.h b/arch/sh/include/asm/system_32.h
index 240b31e..6c68a51 100644
--- a/arch/sh/include/asm/system_32.h
+++ b/arch/sh/include/asm/system_32.h
@@ -198,7 +198,7 @@
 })
 #endif
 
-int handle_unaligned_access(opcode_t instruction, struct pt_regs *regs,
+int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
 			    struct mem_access *ma);
 
 asmlinkage void do_address_error(struct pt_regs *regs,
diff --git a/arch/sh/include/asm/timer.h b/arch/sh/include/asm/timer.h
deleted file mode 100644
index 4c3b66e3..0000000
--- a/arch/sh/include/asm/timer.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef __ASM_SH_TIMER_H
-#define __ASM_SH_TIMER_H
-
-#include <linux/sysdev.h>
-#include <linux/clocksource.h>
-#include <cpu/timer.h>
-
-struct sys_timer_ops {
-	int (*init)(void);
-	int (*start)(void);
-	int (*stop)(void);
-#ifndef CONFIG_GENERIC_TIME
-	unsigned long (*get_offset)(void);
-#endif
-};
-
-struct sys_timer {
-	const char		*name;
-
-	struct sys_device	dev;
-	struct sys_timer_ops	*ops;
-};
-
-#define TICK_SIZE (tick_nsec / 1000)
-
-extern struct sys_timer tmu_timer, cmt_timer, mtu2_timer;
-extern struct sys_timer *sys_timer;
-
-#ifndef CONFIG_GENERIC_TIME
-static inline unsigned long get_timer_offset(void)
-{
-	return sys_timer->ops->get_offset();
-}
-#endif
-
-/* arch/sh/kernel/timers/timer.c */
-struct sys_timer *get_sys_timer(void);
-
-/* arch/sh/kernel/time.c */
-void handle_timer_tick(void);
-
-extern struct clocksource clocksource_sh;
-
-#endif /* __ASM_SH_TIMER_H */
diff --git a/arch/sh/include/asm/types.h b/arch/sh/include/asm/types.h
index beea4e6..b13caca 100644
--- a/arch/sh/include/asm/types.h
+++ b/arch/sh/include/asm/types.h
@@ -23,9 +23,9 @@
 typedef u32 dma_addr_t;
 
 #ifdef CONFIG_SUPERH32
-typedef u16 opcode_t;
+typedef u16 insn_size_t;
 #else
-typedef u32 opcode_t;
+typedef u32 insn_size_t;
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/sh/include/asm/ubc.h b/arch/sh/include/asm/ubc.h
index a7b9028..4ca4b77 100644
--- a/arch/sh/include/asm/ubc.h
+++ b/arch/sh/include/asm/ubc.h
@@ -42,12 +42,23 @@
 
 #define BRCR_CMFA		(1 << 15)
 #define BRCR_CMFB		(1 << 14)
+
+#if defined CONFIG_CPU_SH2A
+#define BRCR_CMFCA		(1 << 15)
+#define BRCR_CMFCB		(1 << 14)
+#define BRCR_CMFDA		(1 << 13)
+#define BRCR_CMFDB		(1 << 12)
+#define BRCR_PCBB		(1 << 6)	/* 1: after execution */
+#define BRCR_PCBA		(1 << 5)	/* 1: after execution */
+#define BRCR_PCTE		0
+#else
 #define BRCR_PCTE		(1 << 11)
 #define BRCR_PCBA		(1 << 10)	/* 1: after execution */
 #define BRCR_DBEB		(1 << 7)
 #define BRCR_PCBB		(1 << 6)
 #define BRCR_SEQ		(1 << 3)
 #define BRCR_UBDE		(1 << 0)
+#endif
 
 #ifndef __ASSEMBLY__
 /* arch/sh/kernel/cpu/ubc.S */
diff --git a/arch/sh/include/asm/unaligned-sh4a.h b/arch/sh/include/asm/unaligned-sh4a.h
index d8f8977..9f4dd25 100644
--- a/arch/sh/include/asm/unaligned-sh4a.h
+++ b/arch/sh/include/asm/unaligned-sh4a.h
@@ -3,9 +3,9 @@
 
 /*
  * SH-4A has support for unaligned 32-bit loads, and 32-bit loads only.
- * Support for 16 and 64-bit accesses are done through shifting and
- * masking relative to the endianness. Unaligned stores are not supported
- * by the instruction encoding, so these continue to use the packed
+ * Support for 64-bit accesses are done through shifting and masking
+ * relative to the endianness. Unaligned stores are not supported by the
+ * instruction encoding, so these continue to use the packed
  * struct.
  *
  * The same note as with the movli.l/movco.l pair applies here, as long
@@ -41,9 +41,9 @@
 static inline u16 __get_unaligned_cpu16(const u8 *p)
 {
 #ifdef __LITTLE_ENDIAN
-	return __get_unaligned_cpu32(p) & 0xffff;
+	return p[0] | p[1] << 8;
 #else
-	return __get_unaligned_cpu32(p) >> 16;
+	return p[0] << 8 | p[1];
 #endif
 }
 
diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h
index 2efb819..6519708 100644
--- a/arch/sh/include/asm/unistd_32.h
+++ b/arch/sh/include/asm/unistd_32.h
@@ -343,8 +343,9 @@
 #define __NR_inotify_init1	332
 #define __NR_preadv		333
 #define __NR_pwritev		334
+#define __NR_rt_tgsigqueueinfo	335
 
-#define NR_syscalls 335
+#define NR_syscalls 336
 
 #ifdef __KERNEL__
 
diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h
index 6eb9d29..8014aea 100644
--- a/arch/sh/include/asm/unistd_64.h
+++ b/arch/sh/include/asm/unistd_64.h
@@ -383,10 +383,11 @@
 #define __NR_inotify_init1	360
 #define __NR_preadv		361
 #define __NR_pwritev		362
+#define __NR_rt_tgsigqueueinfo	363
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 363
+#define NR_syscalls 364
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/sh/include/cpu-sh2a/cpu/ubc.h b/arch/sh/include/cpu-sh2a/cpu/ubc.h
index 8ce2fc1c..1192e1c 100644
--- a/arch/sh/include/cpu-sh2a/cpu/ubc.h
+++ b/arch/sh/include/cpu-sh2a/cpu/ubc.h
@@ -1 +1,28 @@
-#include <cpu-sh2/cpu/ubc.h>
+/*
+ * SH-2A UBC definitions
+ *
+ * Copyright (C) 2008 Kieran Bingham
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef __ASM_CPU_SH2A_UBC_H
+#define __ASM_CPU_SH2A_UBC_H
+
+#define UBC_BARA                0xfffc0400
+#define UBC_BAMRA               0xfffc0404
+#define UBC_BBRA                0xfffc04a0	/* 16 bit access */
+#define UBC_BDRA                0xfffc0408
+#define UBC_BDMRA               0xfffc040c
+
+#define UBC_BARB                0xfffc0410
+#define UBC_BAMRB               0xfffc0414
+#define UBC_BBRB                0xfffc04b0	/* 16 bit access */
+#define UBC_BDRB                0xfffc0418
+#define UBC_BDMRB               0xfffc041c
+
+#define UBC_BRCR                0xfffc04c0
+
+#endif /* __ASM_CPU_SH2A_UBC_H */
diff --git a/arch/sh/include/cpu-sh3/cpu/timer.h b/arch/sh/include/cpu-sh3/cpu/timer.h
deleted file mode 100644
index 793acf1..0000000
--- a/arch/sh/include/cpu-sh3/cpu/timer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * include/asm-sh/cpu-sh3/timer.h
- *
- * Copyright (C) 2004 Lineo Solutions, Inc.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#ifndef __ASM_CPU_SH3_TIMER_H
-#define __ASM_CPU_SH3_TIMER_H
-
-/*
- * ---------------------------------------------------------------------------
- * TMU Common definitions for SH3 processors
- *	SH7706
- *	SH7709S
- *	SH7727
- *	SH7729R
- *	SH7710
- *	SH7720
- *	SH7710
- * ---------------------------------------------------------------------------
- */
-
-#if  !defined(CONFIG_CPU_SUBTYPE_SH7720) && !defined(CONFIG_CPU_SUBTYPE_SH7721)
-#define TMU_TOCR	0xfffffe90	/* Byte access */
-#endif
-
-#if defined(CONFIG_CPU_SUBTYPE_SH7710) || \
-    defined(CONFIG_CPU_SUBTYPE_SH7720) || \
-    defined(CONFIG_CPU_SUBTYPE_SH7721)
-#define TMU_012_TSTR	0xa412fe92	/* Byte access */
-
-#define TMU0_TCOR	0xa412fe94	/* Long access */
-#define TMU0_TCNT	0xa412fe98	/* Long access */
-#define TMU0_TCR	0xa412fe9c	/* Word access */
-
-#define TMU1_TCOR	0xa412fea0	/* Long access */
-#define TMU1_TCNT	0xa412fea4	/* Long access */
-#define TMU1_TCR	0xa412fea8	/* Word access */
-
-#define TMU2_TCOR	0xa412feac	/* Long access */
-#define TMU2_TCNT	0xa412feb0	/* Long access */
-#define TMU2_TCR	0xa412feb4	/* Word access */
-
-#else
-#define TMU_012_TSTR	0xfffffe92	/* Byte access */
-
-#define TMU0_TCOR	0xfffffe94	/* Long access */
-#define TMU0_TCNT	0xfffffe98	/* Long access */
-#define TMU0_TCR	0xfffffe9c	/* Word access */
-
-#define TMU1_TCOR	0xfffffea0	/* Long access */
-#define TMU1_TCNT	0xfffffea4	/* Long access */
-#define TMU1_TCR	0xfffffea8	/* Word access */
-
-#define TMU2_TCOR	0xfffffeac	/* Long access */
-#define TMU2_TCNT	0xfffffeb0	/* Long access */
-#define TMU2_TCR	0xfffffeb4	/* Word access */
-#if !defined(CONFIG_CPU_SUBTYPE_SH7720) && !defined(CONFIG_CPU_SUBTYPE_SH7721)
-#define TMU2_TCPR2	0xfffffeb8	/* Long access */
-#endif
-#endif
-
-#endif /* __ASM_CPU_SH3_TIMER_H */
-
diff --git a/arch/sh/include/cpu-sh4/cpu/cache.h b/arch/sh/include/cpu-sh4/cpu/cache.h
index 1c61ebf..7bfb9e8 100644
--- a/arch/sh/include/cpu-sh4/cpu/cache.h
+++ b/arch/sh/include/cpu-sh4/cpu/cache.h
@@ -38,5 +38,7 @@
 #define CACHE_IC_ADDRESS_ARRAY	0xf0000000
 #define CACHE_OC_ADDRESS_ARRAY	0xf4000000
 
+#define RAMCR			0xFF000074
+
 #endif /* __ASM_CPU_SH4_CACHE_H */
 
diff --git a/arch/sh/include/cpu-sh4/cpu/freq.h b/arch/sh/include/cpu-sh4/cpu/freq.h
index 749d1c4..ccf1d99 100644
--- a/arch/sh/include/cpu-sh4/cpu/freq.h
+++ b/arch/sh/include/cpu-sh4/cpu/freq.h
@@ -25,6 +25,24 @@
 #elif defined(CONFIG_CPU_SUBTYPE_SH7763) || \
       defined(CONFIG_CPU_SUBTYPE_SH7780)
 #define	FRQCR			0xffc80000
+#elif defined(CONFIG_CPU_SUBTYPE_SH7724)
+#define FRQCRA			0xa4150000
+#define FRQCRB			0xa4150004
+#define VCLKCR			0xa4150048
+
+#define FCLKACR			0xa4150008
+#define FCLKBCR			0xa415000c
+#define FRQCR			FRQCRA
+#define SCLKACR			FCLKACR
+#define SCLKBCR			FCLKBCR
+#define FCLKACR			0xa4150008
+#define FCLKBCR			0xa415000c
+#define IrDACLKCR		0xa4150018
+
+#define MSTPCR0			0xa4150030
+#define MSTPCR1			0xa4150034
+#define MSTPCR2			0xa4150038
+
 #elif defined(CONFIG_CPU_SUBTYPE_SH7785)
 #define FRQCR0			0xffc80000
 #define FRQCR1			0xffc80004
diff --git a/arch/sh/include/cpu-sh4/cpu/sh7722.h b/arch/sh/include/cpu-sh4/cpu/sh7722.h
index 4b3096f..738ea43 100644
--- a/arch/sh/include/cpu-sh4/cpu/sh7722.h
+++ b/arch/sh/include/cpu-sh4/cpu/sh7722.h
@@ -1,6 +1,20 @@
 #ifndef __ASM_SH7722_H__
 #define __ASM_SH7722_H__
 
+/* Boot Mode Pins:
+ *
+ * MD0: CPG - Clock Mode 0->3
+ * MD1: CPG - Clock Mode 0->3
+ * MD2: CPG - Reserved (L: Normal operation)
+ * MD3: BSC - Area0 Bus Width (16/32-bit) [CS0BCR.9,10]
+ * MD5: BSC - Endian Mode (L: Big, H: Little) [CMNCR.3]
+ * MD8: Test Mode
+ */
+
+/* Pin Function Controller:
+ * GPIO_FN_xx - GPIO used to select pin function
+ * GPIO_Pxx - GPIO mapped to real I/O pin on CPU
+ */
 enum {
 	/* PTA */
 	GPIO_PTA7, GPIO_PTA6, GPIO_PTA5, GPIO_PTA4,
diff --git a/arch/sh/include/cpu-sh4/cpu/sh7723.h b/arch/sh/include/cpu-sh4/cpu/sh7723.h
index 9d2f6d7..14c8ca9 100644
--- a/arch/sh/include/cpu-sh4/cpu/sh7723.h
+++ b/arch/sh/include/cpu-sh4/cpu/sh7723.h
@@ -1,6 +1,20 @@
 #ifndef __ASM_SH7723_H__
 #define __ASM_SH7723_H__
 
+/* Boot Mode Pins:
+ *
+ * MD0: CPG - Clock Mode 0->3
+ * MD1: CPG - Clock Mode 0->3
+ * MD2: CPG - Reserved (L: Normal operation)
+ * MD3: BSC - Area0 Bus Width (16/32-bit) [CS0BCR.9,10]
+ * MD5: BSC - Endian Mode (L: Big, H: Little) [CMNCR.3]
+ * MD8: Test Mode
+ */
+
+/* Pin Function Controller:
+ * GPIO_FN_xx - GPIO used to select pin function
+ * GPIO_Pxx - GPIO mapped to real I/O pin on CPU
+ */
 enum {
 	/* PTA */
 	GPIO_PTA7, GPIO_PTA6, GPIO_PTA5, GPIO_PTA4,
diff --git a/arch/sh/include/cpu-sh4/cpu/sh7724.h b/arch/sh/include/cpu-sh4/cpu/sh7724.h
new file mode 100644
index 0000000..66fd118
--- /dev/null
+++ b/arch/sh/include/cpu-sh4/cpu/sh7724.h
@@ -0,0 +1,269 @@
+#ifndef __ASM_SH7724_H__
+#define __ASM_SH7724_H__
+
+/* Boot Mode Pins:
+ *
+ * MD0: CPG - Clock Mode 0->7
+ * MD1: CPG - Clock Mode 0->7
+ * MD2: CPG - Clock Mode 0->7
+ * MD3: BSC - Area0 Bus Width (16/32-bit) [CS0BCR.9,10]
+ * MD5: BSC - Endian Mode (L: Big, H: Little) [CMNCR.3]
+ * MD8: Test Mode
+ */
+
+/* Pin Function Controller:
+ * GPIO_FN_xx - GPIO used to select pin function
+ * GPIO_Pxx - GPIO mapped to real I/O pin on CPU
+ */
+enum {
+	/* PTA */
+	GPIO_PTA7, GPIO_PTA6, GPIO_PTA5, GPIO_PTA4,
+	GPIO_PTA3, GPIO_PTA2, GPIO_PTA1, GPIO_PTA0,
+
+	/* PTB */
+	GPIO_PTB7, GPIO_PTB6, GPIO_PTB5, GPIO_PTB4,
+	GPIO_PTB3, GPIO_PTB2, GPIO_PTB1, GPIO_PTB0,
+
+	/* PTC */
+	GPIO_PTC7, GPIO_PTC6, GPIO_PTC5, GPIO_PTC4,
+	GPIO_PTC3, GPIO_PTC2, GPIO_PTC1, GPIO_PTC0,
+
+	/* PTD */
+	GPIO_PTD7, GPIO_PTD6, GPIO_PTD5, GPIO_PTD4,
+	GPIO_PTD3, GPIO_PTD2, GPIO_PTD1, GPIO_PTD0,
+
+	/* PTE */
+	GPIO_PTE7, GPIO_PTE6, GPIO_PTE5, GPIO_PTE4,
+	GPIO_PTE3, GPIO_PTE2, GPIO_PTE1, GPIO_PTE0,
+
+	/* PTF */
+	GPIO_PTF7, GPIO_PTF6, GPIO_PTF5, GPIO_PTF4,
+	GPIO_PTF3, GPIO_PTF2, GPIO_PTF1, GPIO_PTF0,
+
+	/* PTG */
+			      GPIO_PTG5, GPIO_PTG4,
+	GPIO_PTG3, GPIO_PTG2, GPIO_PTG1, GPIO_PTG0,
+
+	/* PTH */
+	GPIO_PTH7, GPIO_PTH6, GPIO_PTH5, GPIO_PTH4,
+	GPIO_PTH3, GPIO_PTH2, GPIO_PTH1, GPIO_PTH0,
+
+	/* PTJ */
+	GPIO_PTJ7, GPIO_PTJ6, GPIO_PTJ5,
+	GPIO_PTJ3, GPIO_PTJ2, GPIO_PTJ1, GPIO_PTJ0,
+
+	/* PTK */
+	GPIO_PTK7, GPIO_PTK6, GPIO_PTK5, GPIO_PTK4,
+	GPIO_PTK3, GPIO_PTK2, GPIO_PTK1, GPIO_PTK0,
+
+	/* PTL */
+	GPIO_PTL7, GPIO_PTL6, GPIO_PTL5, GPIO_PTL4,
+	GPIO_PTL3, GPIO_PTL2, GPIO_PTL1, GPIO_PTL0,
+
+	/* PTM */
+	GPIO_PTM7, GPIO_PTM6, GPIO_PTM5, GPIO_PTM4,
+	GPIO_PTM3, GPIO_PTM2, GPIO_PTM1, GPIO_PTM0,
+
+	/* PTN */
+	GPIO_PTN7, GPIO_PTN6, GPIO_PTN5, GPIO_PTN4,
+	GPIO_PTN3, GPIO_PTN2, GPIO_PTN1, GPIO_PTN0,
+
+	/* PTQ */
+	GPIO_PTQ7, GPIO_PTQ6, GPIO_PTQ5, GPIO_PTQ4,
+	GPIO_PTQ3, GPIO_PTQ2, GPIO_PTQ1, GPIO_PTQ0,
+
+	/* PTR */
+	GPIO_PTR7, GPIO_PTR6, GPIO_PTR5, GPIO_PTR4,
+	GPIO_PTR3, GPIO_PTR2, GPIO_PTR1, GPIO_PTR0,
+
+	/* PTS */
+		   GPIO_PTS6, GPIO_PTS5, GPIO_PTS4,
+	GPIO_PTS3, GPIO_PTS2, GPIO_PTS1, GPIO_PTS0,
+
+	/* PTT */
+	GPIO_PTT7, GPIO_PTT6, GPIO_PTT5, GPIO_PTT4,
+	GPIO_PTT3, GPIO_PTT2, GPIO_PTT1, GPIO_PTT0,
+
+	/* PTU */
+	GPIO_PTU7, GPIO_PTU6, GPIO_PTU5, GPIO_PTU4,
+	GPIO_PTU3, GPIO_PTU2, GPIO_PTU1, GPIO_PTU0,
+
+	/* PTV */
+	GPIO_PTV7, GPIO_PTV6, GPIO_PTV5, GPIO_PTV4,
+	GPIO_PTV3, GPIO_PTV2, GPIO_PTV1, GPIO_PTV0,
+
+	/* PTW */
+	GPIO_PTW7, GPIO_PTW6, GPIO_PTW5, GPIO_PTW4,
+	GPIO_PTW3, GPIO_PTW2, GPIO_PTW1, GPIO_PTW0,
+
+	/* PTX */
+	GPIO_PTX7, GPIO_PTX6, GPIO_PTX5, GPIO_PTX4,
+	GPIO_PTX3, GPIO_PTX2, GPIO_PTX1, GPIO_PTX0,
+
+	/* PTY */
+	GPIO_PTY7, GPIO_PTY6, GPIO_PTY5, GPIO_PTY4,
+	GPIO_PTY3, GPIO_PTY2, GPIO_PTY1, GPIO_PTY0,
+
+	/* PTZ */
+	GPIO_PTZ7, GPIO_PTZ6, GPIO_PTZ5, GPIO_PTZ4,
+	GPIO_PTZ3, GPIO_PTZ2, GPIO_PTZ1, GPIO_PTZ0,
+
+	/* BSC (PTA/PTB/PTJ/PTQ/PTR/PTT) */
+	GPIO_FN_D31, GPIO_FN_D30, GPIO_FN_D29, GPIO_FN_D28,
+	GPIO_FN_D27, GPIO_FN_D26, GPIO_FN_D25, GPIO_FN_D24,
+	GPIO_FN_D23, GPIO_FN_D22, GPIO_FN_D21, GPIO_FN_D20,
+	GPIO_FN_D19, GPIO_FN_D18, GPIO_FN_D17, GPIO_FN_D16,
+	GPIO_FN_D15, GPIO_FN_D14, GPIO_FN_D13, GPIO_FN_D12,
+	GPIO_FN_D11, GPIO_FN_D10, GPIO_FN_D9,  GPIO_FN_D8,
+	GPIO_FN_D7,  GPIO_FN_D6,  GPIO_FN_D5,  GPIO_FN_D4,
+	GPIO_FN_D3,  GPIO_FN_D2,  GPIO_FN_D1,  GPIO_FN_D0,
+	GPIO_FN_A25, GPIO_FN_A24, GPIO_FN_A23, GPIO_FN_A22,
+	GPIO_FN_CS6B_CE1B,  GPIO_FN_CS6A_CE2B,
+	GPIO_FN_CS5B_CE1A,  GPIO_FN_CS5A_CE2A,
+	GPIO_FN_WE3_ICIOWR, GPIO_FN_WE2_ICIORD,
+	GPIO_FN_IOIS16,     GPIO_FN_WAIT,
+	GPIO_FN_BS,
+
+	/* KEYSC (PTA/PTB)*/
+	GPIO_FN_KEYOUT5_IN5, GPIO_FN_KEYOUT4_IN6, GPIO_FN_KEYIN4,
+	GPIO_FN_KEYIN3,  GPIO_FN_KEYIN2,  GPIO_FN_KEYIN1,  GPIO_FN_KEYIN0,
+	GPIO_FN_KEYOUT3, GPIO_FN_KEYOUT2, GPIO_FN_KEYOUT1, GPIO_FN_KEYOUT0,
+
+	/* ATAPI (PTA/PTB/PTK/PTR/PTS/PTW) */
+	GPIO_FN_IDED15, GPIO_FN_IDED14, GPIO_FN_IDED13, GPIO_FN_IDED12,
+	GPIO_FN_IDED11, GPIO_FN_IDED10, GPIO_FN_IDED9,  GPIO_FN_IDED8,
+	GPIO_FN_IDED7,  GPIO_FN_IDED6,  GPIO_FN_IDED5,  GPIO_FN_IDED4,
+	GPIO_FN_IDED3,  GPIO_FN_IDED2,  GPIO_FN_IDED1,  GPIO_FN_IDED0,
+	GPIO_FN_IDEA2,     GPIO_FN_IDEA1,     GPIO_FN_IDEA0,  GPIO_FN_IDEIOWR,
+	GPIO_FN_IODREQ,    GPIO_FN_IDECS0,    GPIO_FN_IDECS1, GPIO_FN_IDEIORD,
+	GPIO_FN_DIRECTION, GPIO_FN_EXBUF_ENB, GPIO_FN_IDERST, GPIO_FN_IODACK,
+	GPIO_FN_IDEINT,    GPIO_FN_IDEIORDY,
+
+	/* TPU (PTB/PTR/PTS) */
+	GPIO_FN_TPUTO3, GPIO_FN_TPUTO2, GPIO_FN_TPUTO1, GPIO_FN_TPUTO0,
+	GPIO_FN_TPUTI3, GPIO_FN_TPUTI2,
+
+	/* LCDC (PTC/PTD/PTE/PTF/PTM/PTR) */
+	GPIO_FN_LCDD23, GPIO_FN_LCDD22, GPIO_FN_LCDD21, GPIO_FN_LCDD20,
+	GPIO_FN_LCDD19, GPIO_FN_LCDD18, GPIO_FN_LCDD17, GPIO_FN_LCDD16,
+	GPIO_FN_LCDD15, GPIO_FN_LCDD14, GPIO_FN_LCDD13, GPIO_FN_LCDD12,
+	GPIO_FN_LCDD11, GPIO_FN_LCDD10, GPIO_FN_LCDD9,  GPIO_FN_LCDD8,
+	GPIO_FN_LCDD7,  GPIO_FN_LCDD6,  GPIO_FN_LCDD5,  GPIO_FN_LCDD4,
+	GPIO_FN_LCDD3,  GPIO_FN_LCDD2,  GPIO_FN_LCDD1,  GPIO_FN_LCDD0,
+	GPIO_FN_LCDVSYN,  GPIO_FN_LCDDISP,  GPIO_FN_LCDRS,  GPIO_FN_LCDHSYN,
+	GPIO_FN_LCDCS,    GPIO_FN_LCDDON,   GPIO_FN_LCDDCK, GPIO_FN_LCDWR,
+	GPIO_FN_LCDVEPWC, GPIO_FN_LCDVCPWC, GPIO_FN_LCDRD,  GPIO_FN_LCDLCLK,
+
+	/* SCIF0 (PTF/PTM) */
+	GPIO_FN_SCIF0_TXD, GPIO_FN_SCIF0_RXD, GPIO_FN_SCIF0_SCK,
+
+	/* SCIF1 (PTL) */
+	GPIO_FN_SCIF1_SCK, GPIO_FN_SCIF1_RXD, GPIO_FN_SCIF1_TXD,
+
+	/* SCIF2 (PTE/PTF/PTN) with LCDC, VOU */
+	GPIO_FN_SCIF2_L_TXD, GPIO_FN_SCIF2_L_SCK, GPIO_FN_SCIF2_L_RXD,
+	GPIO_FN_SCIF2_V_TXD, GPIO_FN_SCIF2_V_SCK, GPIO_FN_SCIF2_V_RXD,
+
+	/* SCIF3 (PTL/PTN/PTZ) with VOU, IRQ */
+	GPIO_FN_SCIF3_V_SCK, GPIO_FN_SCIF3_V_RXD, GPIO_FN_SCIF3_V_TXD,
+	GPIO_FN_SCIF3_V_CTS, GPIO_FN_SCIF3_V_RTS,
+	GPIO_FN_SCIF3_I_SCK, GPIO_FN_SCIF3_I_RXD, GPIO_FN_SCIF3_I_TXD,
+	GPIO_FN_SCIF3_I_CTS, GPIO_FN_SCIF3_I_RTS,
+
+	/* SCIF4 (PTE) */
+	GPIO_FN_SCIF4_SCK, GPIO_FN_SCIF4_RXD, GPIO_FN_SCIF4_TXD,
+
+	/* SCIF5 (PTS) */
+	GPIO_FN_SCIF5_SCK, GPIO_FN_SCIF5_RXD, GPIO_FN_SCIF5_TXD,
+
+	/* FSI (PTE/PTU/PTV) */
+	GPIO_FN_FSIMCKB,   GPIO_FN_FSIMCKA,    GPIO_FN_FSIOASD,
+	GPIO_FN_FSIIABCK,  GPIO_FN_FSIIALRCK,  GPIO_FN_FSIOABCK,
+	GPIO_FN_FSIOALRCK, GPIO_FN_CLKAUDIOAO, GPIO_FN_FSIIBSD,
+	GPIO_FN_FSIOBSD,   GPIO_FN_FSIIBBCK,   GPIO_FN_FSIIBLRCK,
+	GPIO_FN_FSIOBBCK,  GPIO_FN_FSIOBLRCK,  GPIO_FN_CLKAUDIOBO,
+	GPIO_FN_FSIIASD,
+
+	/* AUD (PTG) */
+	GPIO_FN_AUDCK,   GPIO_FN_AUDSYNC, GPIO_FN_AUDATA3,
+	GPIO_FN_AUDATA2, GPIO_FN_AUDATA1, GPIO_FN_AUDATA0,
+
+	/* VIO (PTS) (common?) */
+	GPIO_FN_VIO_CKO,
+
+	/* VIO0 (PTH/PTK) */
+	GPIO_FN_VIO0_D15, GPIO_FN_VIO0_D14, GPIO_FN_VIO0_D13, GPIO_FN_VIO0_D12,
+	GPIO_FN_VIO0_D11, GPIO_FN_VIO0_D10, GPIO_FN_VIO0_D9,  GPIO_FN_VIO0_D8,
+	GPIO_FN_VIO0_D7,  GPIO_FN_VIO0_D6,  GPIO_FN_VIO0_D5,  GPIO_FN_VIO0_D4,
+	GPIO_FN_VIO0_D3,  GPIO_FN_VIO0_D2,  GPIO_FN_VIO0_D1,  GPIO_FN_VIO0_D0,
+	GPIO_FN_VIO0_VD,  GPIO_FN_VIO0_CLK,
+	GPIO_FN_VIO0_FLD, GPIO_FN_VIO0_HD,
+
+	/* VIO1 (PTK/PTS) */
+	GPIO_FN_VIO1_D7,  GPIO_FN_VIO1_D6, GPIO_FN_VIO1_D5, GPIO_FN_VIO1_D4,
+	GPIO_FN_VIO1_D3,  GPIO_FN_VIO1_D2, GPIO_FN_VIO1_D1, GPIO_FN_VIO1_D0,
+	GPIO_FN_VIO1_FLD, GPIO_FN_VIO1_HD, GPIO_FN_VIO1_VD, GPIO_FN_VIO1_CLK,
+
+	/* Eth  (PTL/PTN/PTX) */
+	GPIO_FN_RMII_RXD0,    GPIO_FN_RMII_RXD1,
+	GPIO_FN_RMII_TXD0,    GPIO_FN_RMII_TXD1,
+	GPIO_FN_RMII_REF_CLK, GPIO_FN_RMII_TX_EN,
+	GPIO_FN_RMII_RX_ER,   GPIO_FN_RMII_CRS_DV,
+	GPIO_FN_LNKSTA,       GPIO_FN_MDIO,
+	GPIO_FN_MDC,
+
+	/* System (PTJ) */
+	GPIO_FN_PDSTATUS, GPIO_FN_STATUS2, GPIO_FN_STATUS0,
+
+	/* VOU (PTL/PTM/PTN*/
+	GPIO_FN_DV_D15,  GPIO_FN_DV_D14, GPIO_FN_DV_D13,   GPIO_FN_DV_D12,
+	GPIO_FN_DV_D11,  GPIO_FN_DV_D10, GPIO_FN_DV_D9,    GPIO_FN_DV_D8,
+	GPIO_FN_DV_D7,   GPIO_FN_DV_D6,  GPIO_FN_DV_D5,    GPIO_FN_DV_D4,
+	GPIO_FN_DV_D3,   GPIO_FN_DV_D2,  GPIO_FN_DV_D1,    GPIO_FN_DV_D0,
+	GPIO_FN_DV_CLKI, GPIO_FN_DV_CLK, GPIO_FN_DV_VSYNC, GPIO_FN_DV_HSYNC,
+
+	/* MSIOF0 (PTL/PTM) */
+	GPIO_FN_MSIOF0_RXD,   GPIO_FN_MSIOF0_TXD,
+	GPIO_FN_MSIOF0_MCK,   GPIO_FN_MSIOF0_TSCK,
+	GPIO_FN_MSIOF0_SS1,   GPIO_FN_MSIOF0_SS2,
+	GPIO_FN_MSIOF0_TSYNC, GPIO_FN_MSIOF0_RSCK,
+	GPIO_FN_MSIOF0_RSYNC,
+
+	/* MSIOF1 (PTV) */
+	GPIO_FN_MSIOF1_RXD,   GPIO_FN_MSIOF1_TXD,
+	GPIO_FN_MSIOF1_MCK,   GPIO_FN_MSIOF1_TSCK,
+	GPIO_FN_MSIOF1_SS1,   GPIO_FN_MSIOF1_SS2,
+	GPIO_FN_MSIOF1_TSYNC, GPIO_FN_MSIOF1_RSCK,
+	GPIO_FN_MSIOF1_RSYNC,
+
+	/* DMAC (PTU/PTX) */
+	GPIO_FN_DMAC_DACK0, GPIO_FN_DMAC_DREQ0,
+	GPIO_FN_DMAC_DACK1, GPIO_FN_DMAC_DREQ1,
+
+	/* SDHI0 (PTY) */
+	GPIO_FN_SDHI0CD, GPIO_FN_SDHI0WP, GPIO_FN_SDHI0CMD, GPIO_FN_SDHI0CLK,
+	GPIO_FN_SDHI0D3, GPIO_FN_SDHI0D2, GPIO_FN_SDHI0D1,  GPIO_FN_SDHI0D0,
+
+	/* SDHI1 (PTW) */
+	GPIO_FN_SDHI1CD, GPIO_FN_SDHI1WP, GPIO_FN_SDHI1CMD, GPIO_FN_SDHI1CLK,
+	GPIO_FN_SDHI1D3, GPIO_FN_SDHI1D2, GPIO_FN_SDHI1D1,  GPIO_FN_SDHI1D0,
+
+	/* MMC (PTW/PTX)*/
+	GPIO_FN_MMC_D7,  GPIO_FN_MMC_D6,  GPIO_FN_MMC_D5, GPIO_FN_MMC_D4,
+	GPIO_FN_MMC_D3,  GPIO_FN_MMC_D2,  GPIO_FN_MMC_D1, GPIO_FN_MMC_D0,
+	GPIO_FN_MMC_CLK, GPIO_FN_MMC_CMD,
+
+	/* IrDA (PTX) */
+	GPIO_FN_IRDA_OUT, GPIO_FN_IRDA_IN,
+
+	/* TSIF (PTX) */
+	GPIO_FN_TSIF_TS0_SDAT, GPIO_FN_TSIF_TS0_SCK,
+	GPIO_FN_TSIF_TS0_SDEN, GPIO_FN_TSIF_TS0_SPSYNC,
+
+	/* IRQ (PTZ) */
+	GPIO_FN_INTC_IRQ7, GPIO_FN_INTC_IRQ6, GPIO_FN_INTC_IRQ5,
+	GPIO_FN_INTC_IRQ4, GPIO_FN_INTC_IRQ3, GPIO_FN_INTC_IRQ2,
+	GPIO_FN_INTC_IRQ1, GPIO_FN_INTC_IRQ0,
+};
+
+#endif /* __ASM_SH7724_H__ */
diff --git a/arch/sh/include/cpu-sh4/cpu/sh7785.h b/arch/sh/include/cpu-sh4/cpu/sh7785.h
index e4006af..9dc9d91 100644
--- a/arch/sh/include/cpu-sh4/cpu/sh7785.h
+++ b/arch/sh/include/cpu-sh4/cpu/sh7785.h
@@ -1,6 +1,31 @@
 #ifndef __ASM_SH7785_H__
 #define __ASM_SH7785_H__
 
+/* Boot Mode Pins:
+ *
+ * MODE0: CPG - Initial Pck/Bck Frequency [FRQMR1]
+ * MODE1: CPG - Initial Uck/SHck/DDRck Frequency [FRQMR1]
+ * MODE2: CPG - Reserved (L: Normal operation)
+ * MODE3: CPG - Reserved (L: Normal operation)
+ * MODE4: CPG - Initial PLL setting (72x/36x)
+ * MODE5: LBSC - Area0 Memory Type / Bus Width [CS0BCR.8]
+ * MODE6: LBSC - Area0 Memory Type / Bus Width [CS0BCR.9]
+ * MODE7: LBSC - Area0 Memory Type / Bus Width [CS0BCR.3]
+ * MODE8: LBSC - Endian Mode (L: Big, H: Little) [BCR.31]
+ * MODE9: LBSC - Master/Slave Mode (L: Slave) [BCR.30]
+ * MODE10: CPG - Clock Input (L: Ext Clk, H: Crystal)
+ * MODE11: PCI - Pin Mode (LL: PCI host, LH: PCI slave)
+ * MODE12: PCI - Pin Mode (HL: Local bus, HH: DU)
+ * MODE13: Boot Address Mode (L: 29-bit, H: 32-bit)
+ * MODE14: Reserved (H: Normal operation)
+ *
+ * More information in sh7785 manual Rev.1.00, page 1628.
+ */
+
+/* Pin Function Controller:
+ * GPIO_FN_xx - GPIO used to select pin function
+ * GPIO_Pxx - GPIO mapped to real I/O pin on CPU
+ */
 enum {
 	/* PA */
 	GPIO_PA7, GPIO_PA6, GPIO_PA5, GPIO_PA4,
diff --git a/arch/sh/include/cpu-sh4/cpu/timer.h b/arch/sh/include/cpu-sh4/cpu/timer.h
deleted file mode 100644
index d1e796b..0000000
--- a/arch/sh/include/cpu-sh4/cpu/timer.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * include/asm-sh/cpu-sh4/timer.h
- *
- * Copyright (C) 2004 Lineo Solutions, Inc.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#ifndef __ASM_CPU_SH4_TIMER_H
-#define __ASM_CPU_SH4_TIMER_H
-
-/*
- * ---------------------------------------------------------------------------
- * TMU Common definitions for SH4 processors
- *	SH7750S/SH7750R
- *	SH7751/SH7751R
- *	SH7760
- *	SH-X3
- * ---------------------------------------------------------------------------
- */
-#ifdef CONFIG_CPU_SUBTYPE_SHX3
-#define TMU_012_BASE	0xffc10000
-#define TMU_345_BASE	0xffc20000
-#else
-#define TMU_012_BASE	0xffd80000
-#define TMU_345_BASE	0xfe100000
-#endif
-
-#define TMU_TOCR	TMU_012_BASE	/* Not supported on all CPUs */
-
-#define TMU_012_TSTR	(TMU_012_BASE + 0x04)
-#define TMU_345_TSTR	(TMU_345_BASE + 0x04)
-
-#define TMU0_TCOR	(TMU_012_BASE + 0x08)
-#define TMU0_TCNT	(TMU_012_BASE + 0x0c)
-#define TMU0_TCR	(TMU_012_BASE + 0x10)
-
-#define TMU1_TCOR       (TMU_012_BASE + 0x14)
-#define TMU1_TCNT       (TMU_012_BASE + 0x18)
-#define TMU1_TCR        (TMU_012_BASE + 0x1c)
-
-#define TMU2_TCOR       (TMU_012_BASE + 0x20)
-#define TMU2_TCNT       (TMU_012_BASE + 0x24)
-#define TMU2_TCR	(TMU_012_BASE + 0x28)
-#define TMU2_TCPR	(TMU_012_BASE + 0x2c)
-
-#define TMU3_TCOR	(TMU_345_BASE + 0x08)
-#define TMU3_TCNT	(TMU_345_BASE + 0x0c)
-#define TMU3_TCR	(TMU_345_BASE + 0x10)
-
-#define TMU4_TCOR	(TMU_345_BASE + 0x14)
-#define TMU4_TCNT	(TMU_345_BASE + 0x18)
-#define TMU4_TCR	(TMU_345_BASE + 0x1c)
-
-#define TMU5_TCOR	(TMU_345_BASE + 0x20)
-#define TMU5_TCNT	(TMU_345_BASE + 0x24)
-#define TMU5_TCR	(TMU_345_BASE + 0x28)
-
-#endif /* __ASM_CPU_SH4_TIMER_H */
diff --git a/arch/sh/include/cpu-sh5/cpu/irq.h b/arch/sh/include/cpu-sh5/cpu/irq.h
index f0f0756e..0ccf257 100644
--- a/arch/sh/include/cpu-sh5/cpu/irq.h
+++ b/arch/sh/include/cpu-sh5/cpu/irq.h
@@ -111,7 +111,6 @@
 #define TOP_PRIORITY	15
 
 extern int intc_evt_to_irq[(0xE20/0x20)+1];
-int intc_irq_describe(char* p, int irq);
 extern int platform_int_priority[NR_INTC_IRQS];
 
 #endif /* __ASM_SH_CPU_SH5_IRQ_H */
diff --git a/arch/sh/include/mach-common/mach/sh7785lcr.h b/arch/sh/include/mach-common/mach/sh7785lcr.h
index 1ce27d5..90011d4 100644
--- a/arch/sh/include/mach-common/mach/sh7785lcr.h
+++ b/arch/sh/include/mach-common/mach/sh7785lcr.h
@@ -9,11 +9,11 @@
  * -----------------------------+---------------+---------------
  * 0x00000000 - 0x03ffffff(CS0)	| NOR Flash	| NOR Flash
  * 0x04000000 - 0x05ffffff(CS1)	| PLD		| PLD
- * 0x06000000 - 0x07ffffff(CS1)	| reserved	| I2C
+ * 0x06000000 - 0x07ffffff(CS1)	| I2C		| I2C
  * 0x08000000 - 0x0bffffff(CS2)	| USB		| DDR SDRAM
  * 0x0c000000 - 0x0fffffff(CS3)	| SD		| DDR SDRAM
  * 0x10000000 - 0x13ffffff(CS4)	| SM107		| SM107
- * 0x14000000 - 0x17ffffff(CS5)	| I2C		| USB
+ * 0x14000000 - 0x17ffffff(CS5)	| reserved	| USB
  * 0x18000000 - 0x1bffffff(CS6)	| reserved	| SD
  * 0x40000000 - 0x5fffffff	| DDR SDRAM	| (cannot use)
  *
@@ -32,6 +32,9 @@
 #define PLD_VERSR		(PLD_BASE_ADDR + 0x0c)
 #define PLD_MMSR		(PLD_BASE_ADDR + 0x0e)
 
+#define PCA9564_ADDR		0x06000000	/* I2C */
+#define PCA9564_SIZE		0x00000100
+
 #define SM107_MEM_ADDR		0x10000000
 #define SM107_MEM_SIZE		0x00e00000
 #define SM107_REG_ADDR		0x13e00000
@@ -40,16 +43,13 @@
 #if defined(CONFIG_SH_SH7785LCR_29BIT_PHYSMAPS)
 #define R8A66597_ADDR		0x14000000	/* USB */
 #define CG200_ADDR		0x18000000	/* SD */
-#define PCA9564_ADDR		0x06000000	/* I2C */
 #else
 #define R8A66597_ADDR		0x08000000
 #define CG200_ADDR		0x0c000000
-#define PCA9564_ADDR		0x14000000
 #endif
 
 #define R8A66597_SIZE		0x00000100
 #define CG200_SIZE		0x00010000
-#define PCA9564_SIZE		0x00000100
 
 #endif  /* __ASM_SH_RENESAS_SH7785LCR_H */
 
diff --git a/arch/sh/include/mach-dreamcast/mach/pci.h b/arch/sh/include/mach-dreamcast/mach/pci.h
index 75fc900..0314d97 100644
--- a/arch/sh/include/mach-dreamcast/mach/pci.h
+++ b/arch/sh/include/mach-dreamcast/mach/pci.h
@@ -21,5 +21,7 @@
 
 #define	GAPSPCI_IRQ		HW_EVENT_EXTERNAL
 
+extern struct pci_ops gapspci_pci_ops;
+
 #endif /* __ASM_SH_DREAMCAST_PCI_H */
 
diff --git a/arch/sh/include/mach-se/mach/se7724.h b/arch/sh/include/mach-se/mach/se7724.h
new file mode 100644
index 0000000..74164b6
--- /dev/null
+++ b/arch/sh/include/mach-se/mach/se7724.h
@@ -0,0 +1,67 @@
+#ifndef __ASM_SH_SE7724_H
+#define __ASM_SH_SE7724_H
+
+/*
+ * linux/include/asm-sh/se7724.h
+ *
+ * Copyright (C) 2009 Renesas Solutions Corp.
+ *
+ * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * Hitachi UL SolutionEngine 7724 Support.
+ *
+ * Based on se7722.h
+ * Copyright (C) 2007  Nobuhiro Iwamatsu
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ */
+#include <asm/addrspace.h>
+
+#define PA_LED		(0xba203000)	/* 8bit LED */
+#define IRQ_MODE	(0xba200010)
+#define IRQ0_SR		(0xba200014)
+#define IRQ1_SR		(0xba200018)
+#define IRQ2_SR		(0xba20001c)
+#define IRQ0_MR		(0xba200020)
+#define IRQ1_MR		(0xba200024)
+#define IRQ2_MR		(0xba200028)
+
+/* IRQ */
+#define IRQ0_IRQ        32
+#define IRQ1_IRQ        33
+#define IRQ2_IRQ        34
+
+/* Bits in IRQ012 registers */
+#define SE7724_FPGA_IRQ_BASE	220
+
+/* IRQ0 */
+#define IRQ0_BASE	SE7724_FPGA_IRQ_BASE
+#define IRQ0_KEY	(IRQ0_BASE + 12)
+#define IRQ0_RMII	(IRQ0_BASE + 13)
+#define IRQ0_SMC	(IRQ0_BASE + 14)
+#define IRQ0_MASK	0x7fff
+#define IRQ0_END	IRQ0_SMC
+/* IRQ1 */
+#define IRQ1_BASE	(IRQ0_END + 1)
+#define IRQ1_TS		(IRQ1_BASE + 0)
+#define IRQ1_MASK	0x0001
+#define IRQ1_END	IRQ1_TS
+/* IRQ2 */
+#define IRQ2_BASE	(IRQ1_END + 1)
+#define IRQ2_USB0	(IRQ1_BASE + 0)
+#define IRQ2_USB1	(IRQ1_BASE + 1)
+#define IRQ2_MASK	0x0003
+#define IRQ2_END	IRQ2_USB1
+
+#define SE7724_FPGA_IRQ_NR	(IRQ2_END - IRQ0_BASE)
+
+/* arch/sh/boards/se/7724/irq.c */
+void init_se7724_IRQ(void);
+
+#define __IO_PREFIX		se7724
+#include <asm/io_generic.h>
+
+#endif  /* __ASM_SH_SE7724_H */
diff --git a/arch/sh/kernel/Makefile_32 b/arch/sh/kernel/Makefile_32
index 82a3a15..9411e3e 100644
--- a/arch/sh/kernel/Makefile_32
+++ b/arch/sh/kernel/Makefile_32
@@ -11,10 +11,10 @@
 
 obj-y	:= debugtraps.o idle.o io.o io_generic.o irq.o			\
 	   machvec.o process_32.o ptrace_32.o setup.o signal_32.o	\
-	   sys_sh.o sys_sh32.o syscalls_32.o time_32.o topology.o	\
+	   sys_sh.o sys_sh32.o syscalls_32.o time.o topology.o	\
 	   traps.o traps_32.o
 
-obj-y				+= cpu/ timers/
+obj-y				+= cpu/
 obj-$(CONFIG_VSYSCALL)		+= vsyscall/
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_SH_STANDARD_BIOS)	+= sh_bios.o
@@ -32,4 +32,6 @@
 obj-$(CONFIG_DUMP_CODE)		+= disassemble.o
 obj-$(CONFIG_HIBERNATION)	+= swsusp.o
 
+obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)	+= localtimer.o
+
 EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/kernel/Makefile_64 b/arch/sh/kernel/Makefile_64
index fe425d7..67b9f6c 100644
--- a/arch/sh/kernel/Makefile_64
+++ b/arch/sh/kernel/Makefile_64
@@ -2,19 +2,18 @@
 
 obj-y	:= debugtraps.o idle.o io.o io_generic.o irq.o machvec.o process_64.o \
 	   ptrace_64.o setup.o signal_64.o sys_sh.o sys_sh64.o \
-	   syscalls_64.o time_64.o topology.o traps.o traps_64.o
+	   syscalls_64.o time.o topology.o traps.o traps_64.o
 
-obj-y				+= cpu/ timers/
-obj-$(CONFIG_VSYSCALL)		+= vsyscall/
+obj-y				+= cpu/
 obj-$(CONFIG_SMP)		+= smp.o
-obj-$(CONFIG_SH_STANDARD_BIOS)	+= sh_bios.o
 obj-$(CONFIG_SH_CPU_FREQ)	+= cpufreq.o
 obj-$(CONFIG_MODULES)		+= sh_ksyms_64.o module.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_IO_TRAPPED)	+= io_trapped.o
 obj-$(CONFIG_GENERIC_GPIO)	+= gpio.o
 
+obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)	+= localtimer.o
+
 EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/kernel/cpu/Makefile b/arch/sh/kernel/cpu/Makefile
index 2600641..eecad7c 100644
--- a/arch/sh/kernel/cpu/Makefile
+++ b/arch/sh/kernel/cpu/Makefile
@@ -17,5 +17,6 @@
 
 obj-$(CONFIG_UBC_WAKEUP)	+= ubc.o
 obj-$(CONFIG_SH_ADC)		+= adc.o
+obj-$(CONFIG_SH_CLK_CPG)	+= clock-cpg.o
 
 obj-y	+= irq/ init.o clock.o
diff --git a/arch/sh/kernel/cpu/clock-cpg.c b/arch/sh/kernel/cpu/clock-cpg.c
new file mode 100644
index 0000000..275942e
--- /dev/null
+++ b/arch/sh/kernel/cpu/clock-cpg.c
@@ -0,0 +1,256 @@
+#include <linux/clk.h>
+#include <linux/compiler.h>
+#include <linux/bootmem.h>
+#include <linux/io.h>
+#include <asm/clock.h>
+
+static int sh_clk_mstp32_enable(struct clk *clk)
+{
+	__raw_writel(__raw_readl(clk->enable_reg) & ~(1 << clk->enable_bit),
+		     clk->enable_reg);
+	return 0;
+}
+
+static void sh_clk_mstp32_disable(struct clk *clk)
+{
+	__raw_writel(__raw_readl(clk->enable_reg) | (1 << clk->enable_bit),
+		     clk->enable_reg);
+}
+
+static struct clk_ops sh_clk_mstp32_clk_ops = {
+	.enable		= sh_clk_mstp32_enable,
+	.disable	= sh_clk_mstp32_disable,
+	.recalc		= followparent_recalc,
+};
+
+int __init sh_clk_mstp32_register(struct clk *clks, int nr)
+{
+	struct clk *clkp;
+	int ret = 0;
+	int k;
+
+	for (k = 0; !ret && (k < nr); k++) {
+		clkp = clks + k;
+		clkp->ops = &sh_clk_mstp32_clk_ops;
+		ret |= clk_register(clkp);
+	}
+
+	return ret;
+}
+
+static long sh_clk_div_round_rate(struct clk *clk, unsigned long rate)
+{
+	return clk_rate_table_round(clk, clk->freq_table, rate);
+}
+
+static int sh_clk_div6_divisors[64] = {
+	1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+	17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+	33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+	49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
+};
+
+static struct clk_div_mult_table sh_clk_div6_table = {
+	.divisors = sh_clk_div6_divisors,
+	.nr_divisors = ARRAY_SIZE(sh_clk_div6_divisors),
+};
+
+static unsigned long sh_clk_div6_recalc(struct clk *clk)
+{
+	struct clk_div_mult_table *table = &sh_clk_div6_table;
+	unsigned int idx;
+
+	clk_rate_table_build(clk, clk->freq_table, table->nr_divisors,
+			     table, NULL);
+
+	idx = __raw_readl(clk->enable_reg) & 0x003f;
+
+	return clk->freq_table[idx].frequency;
+}
+
+static int sh_clk_div6_set_rate(struct clk *clk,
+				unsigned long rate, int algo_id)
+{
+	unsigned long value;
+	int idx;
+
+	idx = clk_rate_table_find(clk, clk->freq_table, rate);
+	if (idx < 0)
+		return idx;
+
+	value = __raw_readl(clk->enable_reg);
+	value &= ~0x3f;
+	value |= idx;
+	__raw_writel(value, clk->enable_reg);
+	return 0;
+}
+
+static int sh_clk_div6_enable(struct clk *clk)
+{
+	unsigned long value;
+	int ret;
+
+	ret = sh_clk_div6_set_rate(clk, clk->rate, 0);
+	if (ret == 0) {
+		value = __raw_readl(clk->enable_reg);
+		value &= ~0x100; /* clear stop bit to enable clock */
+		__raw_writel(value, clk->enable_reg);
+	}
+	return ret;
+}
+
+static void sh_clk_div6_disable(struct clk *clk)
+{
+	unsigned long value;
+
+	value = __raw_readl(clk->enable_reg);
+	value |= 0x100; /* stop clock */
+	value |= 0x3f; /* VDIV bits must be non-zero, overwrite divider */
+	__raw_writel(value, clk->enable_reg);
+}
+
+static struct clk_ops sh_clk_div6_clk_ops = {
+	.recalc		= sh_clk_div6_recalc,
+	.round_rate	= sh_clk_div_round_rate,
+	.set_rate	= sh_clk_div6_set_rate,
+	.enable		= sh_clk_div6_enable,
+	.disable	= sh_clk_div6_disable,
+};
+
+int __init sh_clk_div6_register(struct clk *clks, int nr)
+{
+	struct clk *clkp;
+	void *freq_table;
+	int nr_divs = sh_clk_div6_table.nr_divisors;
+	int freq_table_size = sizeof(struct cpufreq_frequency_table);
+	int ret = 0;
+	int k;
+
+	freq_table_size *= (nr_divs + 1);
+
+	freq_table = alloc_bootmem(freq_table_size * nr);
+	if (!freq_table)
+		return -ENOMEM;
+
+	for (k = 0; !ret && (k < nr); k++) {
+		clkp = clks + k;
+
+		clkp->ops = &sh_clk_div6_clk_ops;
+		clkp->id = -1;
+		clkp->freq_table = freq_table + (k * freq_table_size);
+		clkp->freq_table[nr_divs].frequency = CPUFREQ_TABLE_END;
+
+		ret = clk_register(clkp);
+	}
+
+	return ret;
+}
+
+static unsigned long sh_clk_div4_recalc(struct clk *clk)
+{
+	struct clk_div_mult_table *table = clk->priv;
+	unsigned int idx;
+
+	clk_rate_table_build(clk, clk->freq_table, table->nr_divisors,
+			     table, &clk->arch_flags);
+
+	idx = (__raw_readl(clk->enable_reg) >> clk->enable_bit) & 0x000f;
+
+	return clk->freq_table[idx].frequency;
+}
+
+static struct clk_ops sh_clk_div4_clk_ops = {
+	.recalc		= sh_clk_div4_recalc,
+	.round_rate	= sh_clk_div_round_rate,
+};
+
+int __init sh_clk_div4_register(struct clk *clks, int nr,
+				struct clk_div_mult_table *table)
+{
+	struct clk *clkp;
+	void *freq_table;
+	int nr_divs = table->nr_divisors;
+	int freq_table_size = sizeof(struct cpufreq_frequency_table);
+	int ret = 0;
+	int k;
+
+	freq_table_size *= (nr_divs + 1);
+
+	freq_table = alloc_bootmem(freq_table_size * nr);
+	if (!freq_table)
+		return -ENOMEM;
+
+	for (k = 0; !ret && (k < nr); k++) {
+		clkp = clks + k;
+
+		clkp->ops = &sh_clk_div4_clk_ops;
+		clkp->id = -1;
+		clkp->priv = table;
+
+		clkp->freq_table = freq_table + (k * freq_table_size);
+		clkp->freq_table[nr_divs].frequency = CPUFREQ_TABLE_END;
+
+		ret = clk_register(clkp);
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_SH_CLK_CPG_LEGACY
+static struct clk master_clk = {
+	.name		= "master_clk",
+	.flags		= CLK_ENABLE_ON_INIT,
+	.rate		= CONFIG_SH_PCLK_FREQ,
+};
+
+static struct clk peripheral_clk = {
+	.name		= "peripheral_clk",
+	.parent		= &master_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+static struct clk bus_clk = {
+	.name		= "bus_clk",
+	.parent		= &master_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+static struct clk cpu_clk = {
+	.name		= "cpu_clk",
+	.parent		= &master_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+/*
+ * The ordering of these clocks matters, do not change it.
+ */
+static struct clk *onchip_clocks[] = {
+	&master_clk,
+	&peripheral_clk,
+	&bus_clk,
+	&cpu_clk,
+};
+
+int __init __deprecated cpg_clk_init(void)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < ARRAY_SIZE(onchip_clocks); i++) {
+		struct clk *clk = onchip_clocks[i];
+		arch_init_clk_ops(&clk->ops, i);
+		if (clk->ops)
+			ret |= clk_register(clk);
+	}
+
+	return ret;
+}
+
+/*
+ * Placeholder for compatability, until the lazy CPUs do this
+ * on their own.
+ */
+int __init __weak arch_clk_init(void)
+{
+	return cpg_clk_init();
+}
+#endif /* CONFIG_SH_CPG_CLK_LEGACY */
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 1dc8964..f3a46be 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -1,15 +1,19 @@
 /*
  * arch/sh/kernel/cpu/clock.c - SuperH clock framework
  *
- *  Copyright (C) 2005, 2006, 2007  Paul Mundt
+ *  Copyright (C) 2005 - 2009  Paul Mundt
  *
  * This clock framework is derived from the OMAP version by:
  *
- *	Copyright (C) 2004 - 2005 Nokia Corporation
+ *	Copyright (C) 2004 - 2008 Nokia Corporation
  *	Written by Tuukka Tikkanen <tuukka.tikkanen@elektrobit.com>
  *
  *  Modified for omap shared clock framework by Tony Lindgren <tony@atomide.com>
  *
+ *  With clkdev bits:
+ *
+ *	Copyright (C) 2008 Russell King.
+ *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
@@ -19,134 +23,159 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
-#include <linux/kref.h>
 #include <linux/kobject.h>
 #include <linux/sysdev.h>
 #include <linux/seq_file.h>
 #include <linux/err.h>
 #include <linux/platform_device.h>
-#include <linux/proc_fs.h>
+#include <linux/debugfs.h>
+#include <linux/cpufreq.h>
 #include <asm/clock.h>
-#include <asm/timer.h>
+#include <asm/machvec.h>
 
 static LIST_HEAD(clock_list);
 static DEFINE_SPINLOCK(clock_lock);
 static DEFINE_MUTEX(clock_list_sem);
 
-/*
- * Each subtype is expected to define the init routines for these clocks,
- * as each subtype (or processor family) will have these clocks at the
- * very least. These are all provided through the CPG, which even some of
- * the more quirky parts (such as ST40, SH4-202, etc.) still have.
- *
- * The processor-specific code is expected to register any additional
- * clock sources that are of interest.
- */
-static struct clk master_clk = {
-	.name		= "master_clk",
-	.flags		= CLK_ALWAYS_ENABLED | CLK_RATE_PROPAGATES,
-	.rate		= CONFIG_SH_PCLK_FREQ,
-};
-
-static struct clk module_clk = {
-	.name		= "module_clk",
-	.parent		= &master_clk,
-	.flags		= CLK_ALWAYS_ENABLED | CLK_RATE_PROPAGATES,
-};
-
-static struct clk bus_clk = {
-	.name		= "bus_clk",
-	.parent		= &master_clk,
-	.flags		= CLK_ALWAYS_ENABLED | CLK_RATE_PROPAGATES,
-};
-
-static struct clk cpu_clk = {
-	.name		= "cpu_clk",
-	.parent		= &master_clk,
-	.flags		= CLK_ALWAYS_ENABLED,
-};
-
-/*
- * The ordering of these clocks matters, do not change it.
- */
-static struct clk *onchip_clocks[] = {
-	&master_clk,
-	&module_clk,
-	&bus_clk,
-	&cpu_clk,
-};
-
-static void propagate_rate(struct clk *clk)
+void clk_rate_table_build(struct clk *clk,
+			  struct cpufreq_frequency_table *freq_table,
+			  int nr_freqs,
+			  struct clk_div_mult_table *src_table,
+			  unsigned long *bitmap)
 {
-	struct clk *clkp;
+	unsigned long mult, div;
+	unsigned long freq;
+	int i;
 
-	list_for_each_entry(clkp, &clock_list, node) {
-		if (likely(clkp->parent != clk))
-			continue;
-		if (likely(clkp->ops && clkp->ops->recalc))
-			clkp->ops->recalc(clkp);
-		if (unlikely(clkp->flags & CLK_RATE_PROPAGATES))
-			propagate_rate(clkp);
+	for (i = 0; i < nr_freqs; i++) {
+		div = 1;
+		mult = 1;
+
+		if (src_table->divisors && i < src_table->nr_divisors)
+			div = src_table->divisors[i];
+
+		if (src_table->multipliers && i < src_table->nr_multipliers)
+			mult = src_table->multipliers[i];
+
+		if (!div || !mult || (bitmap && !test_bit(i, bitmap)))
+			freq = CPUFREQ_ENTRY_INVALID;
+		else
+			freq = clk->parent->rate * mult / div;
+
+		freq_table[i].index = i;
+		freq_table[i].frequency = freq;
 	}
+
+	/* Termination entry */
+	freq_table[i].index = i;
+	freq_table[i].frequency = CPUFREQ_TABLE_END;
 }
 
-static int __clk_enable(struct clk *clk)
+long clk_rate_table_round(struct clk *clk,
+			  struct cpufreq_frequency_table *freq_table,
+			  unsigned long rate)
 {
-	/*
-	 * See if this is the first time we're enabling the clock, some
-	 * clocks that are always enabled still require "special"
-	 * initialization. This is especially true if the clock mode
-	 * changes and the clock needs to hunt for the proper set of
-	 * divisors to use before it can effectively recalc.
-	 */
-	if (unlikely(atomic_read(&clk->kref.refcount) == 1))
-		if (clk->ops && clk->ops->init)
-			clk->ops->init(clk);
+	unsigned long rate_error, rate_error_prev = ~0UL;
+	unsigned long rate_best_fit = rate;
+	unsigned long highest, lowest;
+	int i;
 
-	kref_get(&clk->kref);
+	highest = lowest = 0;
 
-	if (clk->flags & CLK_ALWAYS_ENABLED)
-		return 0;
+	for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		unsigned long freq = freq_table[i].frequency;
 
-	if (likely(clk->ops && clk->ops->enable))
-		clk->ops->enable(clk);
+		if (freq == CPUFREQ_ENTRY_INVALID)
+			continue;
+
+		if (freq > highest)
+			highest = freq;
+		if (freq < lowest)
+			lowest = freq;
+
+		rate_error = abs(freq - rate);
+		if (rate_error < rate_error_prev) {
+			rate_best_fit = freq;
+			rate_error_prev = rate_error;
+		}
+
+		if (rate_error == 0)
+			break;
+	}
+
+	if (rate >= highest)
+		rate_best_fit = highest;
+	if (rate <= lowest)
+		rate_best_fit = lowest;
+
+	return rate_best_fit;
+}
+
+int clk_rate_table_find(struct clk *clk,
+			struct cpufreq_frequency_table *freq_table,
+			unsigned long rate)
+{
+	int i;
+
+	for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		unsigned long freq = freq_table[i].frequency;
+
+		if (freq == CPUFREQ_ENTRY_INVALID)
+			continue;
+
+		if (freq == rate)
+			return i;
+	}
+
+	return -ENOENT;
+}
+
+/* Used for clocks that always have same value as the parent clock */
+unsigned long followparent_recalc(struct clk *clk)
+{
+	return clk->parent ? clk->parent->rate : 0;
+}
+
+int clk_reparent(struct clk *child, struct clk *parent)
+{
+	list_del_init(&child->sibling);
+	if (parent)
+		list_add(&child->sibling, &parent->children);
+	child->parent = parent;
+
+	/* now do the debugfs renaming to reattach the child
+	   to the proper parent */
 
 	return 0;
 }
 
-int clk_enable(struct clk *clk)
+/* Propagate rate to children */
+void propagate_rate(struct clk *tclk)
 {
-	unsigned long flags;
-	int ret;
+	struct clk *clkp;
 
-	if (!clk)
-		return -EINVAL;
+	list_for_each_entry(clkp, &tclk->children, sibling) {
+		if (clkp->ops && clkp->ops->recalc)
+			clkp->rate = clkp->ops->recalc(clkp);
 
-	clk_enable(clk->parent);
-
-	spin_lock_irqsave(&clock_lock, flags);
-	ret = __clk_enable(clk);
-	spin_unlock_irqrestore(&clock_lock, flags);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(clk_enable);
-
-static void clk_kref_release(struct kref *kref)
-{
-	/* Nothing to do */
+		propagate_rate(clkp);
+	}
 }
 
 static void __clk_disable(struct clk *clk)
 {
-	int count = kref_put(&clk->kref, clk_kref_release);
-
-	if (clk->flags & CLK_ALWAYS_ENABLED)
+	if (clk->usecount == 0) {
+		printk(KERN_ERR "Trying disable clock %s with 0 usecount\n",
+		       clk->name);
+		WARN_ON(1);
 		return;
+	}
 
-	if (!count) {	/* count reaches zero, disable the clock */
+	if (!(--clk->usecount)) {
 		if (likely(clk->ops && clk->ops->disable))
 			clk->ops->disable(clk);
+		if (likely(clk->parent))
+			__clk_disable(clk->parent);
 	}
 }
 
@@ -160,29 +189,98 @@
 	spin_lock_irqsave(&clock_lock, flags);
 	__clk_disable(clk);
 	spin_unlock_irqrestore(&clock_lock, flags);
-
-	clk_disable(clk->parent);
 }
 EXPORT_SYMBOL_GPL(clk_disable);
 
+static int __clk_enable(struct clk *clk)
+{
+	int ret = 0;
+
+	if (clk->usecount++ == 0) {
+		if (clk->parent) {
+			ret = __clk_enable(clk->parent);
+			if (unlikely(ret))
+				goto err;
+		}
+
+		if (clk->ops && clk->ops->enable) {
+			ret = clk->ops->enable(clk);
+			if (ret) {
+				if (clk->parent)
+					__clk_disable(clk->parent);
+				goto err;
+			}
+		}
+	}
+
+	return ret;
+err:
+	clk->usecount--;
+	return ret;
+}
+
+int clk_enable(struct clk *clk)
+{
+	unsigned long flags;
+	int ret;
+
+	if (!clk)
+		return -EINVAL;
+
+	spin_lock_irqsave(&clock_lock, flags);
+	ret = __clk_enable(clk);
+	spin_unlock_irqrestore(&clock_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(clk_enable);
+
+static LIST_HEAD(root_clks);
+
+/**
+ * recalculate_root_clocks - recalculate and propagate all root clocks
+ *
+ * Recalculates all root clocks (clocks with no parent), which if the
+ * clock's .recalc is set correctly, should also propagate their rates.
+ * Called at init.
+ */
+void recalculate_root_clocks(void)
+{
+	struct clk *clkp;
+
+	list_for_each_entry(clkp, &root_clks, sibling) {
+		if (clkp->ops && clkp->ops->recalc)
+			clkp->rate = clkp->ops->recalc(clkp);
+		propagate_rate(clkp);
+	}
+}
+
 int clk_register(struct clk *clk)
 {
+	if (clk == NULL || IS_ERR(clk))
+		return -EINVAL;
+
+	/*
+	 * trap out already registered clocks
+	 */
+	if (clk->node.next || clk->node.prev)
+		return 0;
+
 	mutex_lock(&clock_list_sem);
 
+	INIT_LIST_HEAD(&clk->children);
+	clk->usecount = 0;
+
+	if (clk->parent)
+		list_add(&clk->sibling, &clk->parent->children);
+	else
+		list_add(&clk->sibling, &root_clks);
+
 	list_add(&clk->node, &clock_list);
-	kref_init(&clk->kref);
-
+	if (clk->ops && clk->ops->init)
+		clk->ops->init(clk);
 	mutex_unlock(&clock_list_sem);
 
-	if (clk->flags & CLK_ALWAYS_ENABLED) {
-		pr_debug( "Clock '%s' is ALWAYS_ENABLED\n", clk->name);
-		if (clk->ops && clk->ops->init)
-			clk->ops->init(clk);
-		if (clk->ops && clk->ops->enable)
-			clk->ops->enable(clk);
-		pr_debug( "Enabled.");
-	}
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(clk_register);
@@ -190,11 +288,21 @@
 void clk_unregister(struct clk *clk)
 {
 	mutex_lock(&clock_list_sem);
+	list_del(&clk->sibling);
 	list_del(&clk->node);
 	mutex_unlock(&clock_list_sem);
 }
 EXPORT_SYMBOL_GPL(clk_unregister);
 
+static void clk_enable_init_clocks(void)
+{
+	struct clk *clkp;
+
+	list_for_each_entry(clkp, &clock_list, node)
+		if (clkp->flags & CLK_ENABLE_ON_INIT)
+			clk_enable(clkp);
+}
+
 unsigned long clk_get_rate(struct clk *clk)
 {
 	return clk->rate;
@@ -210,56 +318,59 @@
 int clk_set_rate_ex(struct clk *clk, unsigned long rate, int algo_id)
 {
 	int ret = -EOPNOTSUPP;
+	unsigned long flags;
+
+	spin_lock_irqsave(&clock_lock, flags);
 
 	if (likely(clk->ops && clk->ops->set_rate)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&clock_lock, flags);
 		ret = clk->ops->set_rate(clk, rate, algo_id);
-		spin_unlock_irqrestore(&clock_lock, flags);
+		if (ret != 0)
+			goto out_unlock;
+	} else {
+		clk->rate = rate;
+		ret = 0;
 	}
 
-	if (unlikely(clk->flags & CLK_RATE_PROPAGATES))
-		propagate_rate(clk);
+	if (clk->ops && clk->ops->recalc)
+		clk->rate = clk->ops->recalc(clk);
+
+	propagate_rate(clk);
+
+out_unlock:
+	spin_unlock_irqrestore(&clock_lock, flags);
 
 	return ret;
 }
 EXPORT_SYMBOL_GPL(clk_set_rate_ex);
 
-void clk_recalc_rate(struct clk *clk)
-{
-	if (likely(clk->ops && clk->ops->recalc)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&clock_lock, flags);
-		clk->ops->recalc(clk);
-		spin_unlock_irqrestore(&clock_lock, flags);
-	}
-
-	if (unlikely(clk->flags & CLK_RATE_PROPAGATES))
-		propagate_rate(clk);
-}
-EXPORT_SYMBOL_GPL(clk_recalc_rate);
-
 int clk_set_parent(struct clk *clk, struct clk *parent)
 {
+	unsigned long flags;
 	int ret = -EINVAL;
-	struct clk *old;
 
 	if (!parent || !clk)
 		return ret;
+	if (clk->parent == parent)
+		return 0;
 
-	old = clk->parent;
-	if (likely(clk->ops && clk->ops->set_parent)) {
-		unsigned long flags;
-		spin_lock_irqsave(&clock_lock, flags);
-		ret = clk->ops->set_parent(clk, parent);
-		spin_unlock_irqrestore(&clock_lock, flags);
-		clk->parent = (ret ? old : parent);
-	}
+	spin_lock_irqsave(&clock_lock, flags);
+	if (clk->usecount == 0) {
+		if (clk->ops->set_parent)
+			ret = clk->ops->set_parent(clk, parent);
+		else
+			ret = clk_reparent(clk, parent);
 
-	if (unlikely(clk->flags & CLK_RATE_PROPAGATES))
-		propagate_rate(clk);
+		if (ret == 0) {
+			pr_debug("clock: set parent of %s to %s (new rate %ld)\n",
+				 clk->name, clk->parent->name, clk->rate);
+			if (clk->ops->recalc)
+				clk->rate = clk->ops->recalc(clk);
+			propagate_rate(clk);
+		}
+	} else
+		ret = -EBUSY;
+	spin_unlock_irqrestore(&clock_lock, flags);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(clk_set_parent);
@@ -287,14 +398,69 @@
 EXPORT_SYMBOL_GPL(clk_round_rate);
 
 /*
+ * Find the correct struct clk for the device and connection ID.
+ * We do slightly fuzzy matching here:
+ *  An entry with a NULL ID is assumed to be a wildcard.
+ *  If an entry has a device ID, it must match
+ *  If an entry has a connection ID, it must match
+ * Then we take the most specific entry - with the following
+ * order of precidence: dev+con > dev only > con only.
+ */
+static struct clk *clk_find(const char *dev_id, const char *con_id)
+{
+	struct clk_lookup *p;
+	struct clk *clk = NULL;
+	int match, best = 0;
+
+	list_for_each_entry(p, &clock_list, node) {
+		match = 0;
+		if (p->dev_id) {
+			if (!dev_id || strcmp(p->dev_id, dev_id))
+				continue;
+			match += 2;
+		}
+		if (p->con_id) {
+			if (!con_id || strcmp(p->con_id, con_id))
+				continue;
+			match += 1;
+		}
+		if (match == 0)
+			continue;
+
+		if (match > best) {
+			clk = p->clk;
+			best = match;
+		}
+	}
+	return clk;
+}
+
+struct clk *clk_get_sys(const char *dev_id, const char *con_id)
+{
+	struct clk *clk;
+
+	mutex_lock(&clock_list_sem);
+	clk = clk_find(dev_id, con_id);
+	mutex_unlock(&clock_list_sem);
+
+	return clk ? clk : ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(clk_get_sys);
+
+/*
  * Returns a clock. Note that we first try to use device id on the bus
  * and clock name. If this fails, we try to use clock name only.
  */
 struct clk *clk_get(struct device *dev, const char *id)
 {
+	const char *dev_id = dev ? dev_name(dev) : NULL;
 	struct clk *p, *clk = ERR_PTR(-ENOENT);
 	int idno;
 
+	clk = clk_get_sys(dev_id, id);
+	if (clk && !IS_ERR(clk))
+		return clk;
+
 	if (dev == NULL || dev->bus != &platform_bus_type)
 		idno = -1;
 	else
@@ -330,36 +496,6 @@
 }
 EXPORT_SYMBOL_GPL(clk_put);
 
-void __init __attribute__ ((weak))
-arch_init_clk_ops(struct clk_ops **ops, int type)
-{
-}
-
-int __init __attribute__ ((weak))
-arch_clk_init(void)
-{
-	return 0;
-}
-
-static int show_clocks(char *buf, char **start, off_t off,
-		       int len, int *eof, void *data)
-{
-	struct clk *clk;
-	char *p = buf;
-
-	list_for_each_entry_reverse(clk, &clock_list, node) {
-		unsigned long rate = clk_get_rate(clk);
-
-		p += sprintf(p, "%-12s\t: %ld.%02ldMHz\t%s\n", clk->name,
-			     rate / 1000000, (rate % 1000000) / 10000,
-			     ((clk->flags & CLK_ALWAYS_ENABLED) ||
-			      (atomic_read(&clk->kref.refcount) != 1)) ?
-			     "enabled" : "disabled");
-	}
-
-	return p - buf;
-}
-
 #ifdef CONFIG_PM
 static int clks_sysdev_suspend(struct sys_device *dev, pm_message_t state)
 {
@@ -369,20 +505,22 @@
 	switch (state.event) {
 	case PM_EVENT_ON:
 		/* Resumeing from hibernation */
-		if (prev_state.event == PM_EVENT_FREEZE) {
-			list_for_each_entry(clkp, &clock_list, node)
-				if (likely(clkp->ops)) {
-					unsigned long rate = clkp->rate;
+		if (prev_state.event != PM_EVENT_FREEZE)
+			break;
 
-					if (likely(clkp->ops->set_parent))
-						clkp->ops->set_parent(clkp,
-							clkp->parent);
-					if (likely(clkp->ops->set_rate))
-						clkp->ops->set_rate(clkp,
-							rate, NO_CHANGE);
-					else if (likely(clkp->ops->recalc))
-						clkp->ops->recalc(clkp);
-					}
+		list_for_each_entry(clkp, &clock_list, node) {
+			if (likely(clkp->ops)) {
+				unsigned long rate = clkp->rate;
+
+				if (likely(clkp->ops->set_parent))
+					clkp->ops->set_parent(clkp,
+						clkp->parent);
+				if (likely(clkp->ops->set_rate))
+					clkp->ops->set_rate(clkp,
+						rate, NO_CHANGE);
+				else if (likely(clkp->ops->recalc))
+					clkp->rate = clkp->ops->recalc(clkp);
+			}
 		}
 		break;
 	case PM_EVENT_FREEZE:
@@ -426,34 +564,116 @@
 
 int __init clk_init(void)
 {
-	int i, ret = 0;
+	int ret;
 
-	BUG_ON(!master_clk.rate);
-
-	for (i = 0; i < ARRAY_SIZE(onchip_clocks); i++) {
-		struct clk *clk = onchip_clocks[i];
-
-		arch_init_clk_ops(&clk->ops, i);
-		ret |= clk_register(clk);
+	ret = arch_clk_init();
+	if (unlikely(ret)) {
+		pr_err("%s: CPU clock registration failed.\n", __func__);
+		return ret;
 	}
 
-	ret |= arch_clk_init();
+	if (sh_mv.mv_clk_init) {
+		ret = sh_mv.mv_clk_init();
+		if (unlikely(ret)) {
+			pr_err("%s: machvec clock initialization failed.\n",
+			       __func__);
+			return ret;
+		}
+	}
 
 	/* Kick the child clocks.. */
-	propagate_rate(&master_clk);
-	propagate_rate(&bus_clk);
+	recalculate_root_clocks();
+
+	/* Enable the necessary init clocks */
+	clk_enable_init_clocks();
 
 	return ret;
 }
 
-static int __init clk_proc_init(void)
-{
-	struct proc_dir_entry *p;
-	p = create_proc_read_entry("clocks", S_IRUSR, NULL,
-				   show_clocks, NULL);
-	if (unlikely(!p))
-		return -EINVAL;
+/*
+ *	debugfs support to trace clock tree hierarchy and attributes
+ */
+static struct dentry *clk_debugfs_root;
 
+static int clk_debugfs_register_one(struct clk *c)
+{
+	int err;
+	struct dentry *d, *child;
+	struct clk *pa = c->parent;
+	char s[255];
+	char *p = s;
+
+	p += sprintf(p, "%s", c->name);
+	if (c->id >= 0)
+		sprintf(p, ":%d", c->id);
+	d = debugfs_create_dir(s, pa ? pa->dentry : clk_debugfs_root);
+	if (!d)
+		return -ENOMEM;
+	c->dentry = d;
+
+	d = debugfs_create_u8("usecount", S_IRUGO, c->dentry, (u8 *)&c->usecount);
+	if (!d) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	d = debugfs_create_u32("rate", S_IRUGO, c->dentry, (u32 *)&c->rate);
+	if (!d) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	d = debugfs_create_x32("flags", S_IRUGO, c->dentry, (u32 *)&c->flags);
+	if (!d) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	return 0;
+
+err_out:
+	d = c->dentry;
+	list_for_each_entry(child, &d->d_subdirs, d_u.d_child)
+		debugfs_remove(child);
+	debugfs_remove(c->dentry);
+	return err;
+}
+
+static int clk_debugfs_register(struct clk *c)
+{
+	int err;
+	struct clk *pa = c->parent;
+
+	if (pa && !pa->dentry) {
+		err = clk_debugfs_register(pa);
+		if (err)
+			return err;
+	}
+
+	if (!c->dentry) {
+		err = clk_debugfs_register_one(c);
+		if (err)
+			return err;
+	}
 	return 0;
 }
-subsys_initcall(clk_proc_init);
+
+static int __init clk_debugfs_init(void)
+{
+	struct clk *c;
+	struct dentry *d;
+	int err;
+
+	d = debugfs_create_dir("clock", NULL);
+	if (!d)
+		return -ENOMEM;
+	clk_debugfs_root = d;
+
+	list_for_each_entry(c, &clock_list, node) {
+		err = clk_debugfs_register(c);
+		if (err)
+			goto err_out;
+	}
+	return 0;
+err_out:
+	debugfs_remove(clk_debugfs_root); /* REVISIT: Cleanup correctly */
+	return err;
+}
+late_initcall(clk_debugfs_init);
diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c
index d29e69c..ad85421 100644
--- a/arch/sh/kernel/cpu/init.c
+++ b/arch/sh/kernel/cpu/init.c
@@ -62,6 +62,11 @@
 #define speculative_execution_init()	do { } while (0)
 #endif
 
+/* 2nd-level cache init */
+void __uses_jump_to_uncached __attribute__ ((weak)) l2_cache_init(void)
+{
+}
+
 /*
  * Generic first-level cache init
  */
@@ -146,6 +151,8 @@
 	flags &= ~CCR_CACHE_ENABLE;
 #endif
 
+	l2_cache_init();
+
 	ctrl_outl(flags, CCR);
 	back_to_cached();
 }
diff --git a/arch/sh/kernel/cpu/irq/imask.c b/arch/sh/kernel/cpu/irq/imask.c
index 301b505..6b5d191 100644
--- a/arch/sh/kernel/cpu/irq/imask.c
+++ b/arch/sh/kernel/cpu/irq/imask.c
@@ -18,38 +18,17 @@
 #include <linux/spinlock.h>
 #include <linux/cache.h>
 #include <linux/irq.h>
+#include <linux/bitmap.h>
 #include <asm/system.h>
 #include <asm/irq.h>
 
 /* Bitmap of IRQ masked */
-static unsigned long imask_mask = 0x7fff;
-static int interrupt_priority = 0;
-
-static void enable_imask_irq(unsigned int irq);
-static void disable_imask_irq(unsigned int irq);
-static void shutdown_imask_irq(unsigned int irq);
-static void mask_and_ack_imask(unsigned int);
-static void end_imask_irq(unsigned int irq);
-
 #define IMASK_PRIORITY	15
 
-static unsigned int startup_imask_irq(unsigned int irq)
-{
-	/* Nothing to do */
-	return 0; /* never anything pending */
-}
+static DECLARE_BITMAP(imask_mask, IMASK_PRIORITY);
+static int interrupt_priority;
 
-static struct hw_interrupt_type imask_irq_type = {
-	.typename = "SR.IMASK",
-	.startup = startup_imask_irq,
-	.shutdown = shutdown_imask_irq,
-	.enable = enable_imask_irq,
-	.disable = disable_imask_irq,
-	.ack = mask_and_ack_imask,
-	.end = end_imask_irq
-};
-
-void static inline set_interrupt_registers(int ip)
+static inline void set_interrupt_registers(int ip)
 {
 	unsigned long __dummy;
 
@@ -72,42 +51,31 @@
 		     : "t");
 }
 
-static void disable_imask_irq(unsigned int irq)
+static void mask_imask_irq(unsigned int irq)
 {
-	clear_bit(irq, &imask_mask);
+	clear_bit(irq, imask_mask);
 	if (interrupt_priority < IMASK_PRIORITY - irq)
 		interrupt_priority = IMASK_PRIORITY - irq;
-
 	set_interrupt_registers(interrupt_priority);
 }
 
-static void enable_imask_irq(unsigned int irq)
+static void unmask_imask_irq(unsigned int irq)
 {
-	set_bit(irq, &imask_mask);
-	interrupt_priority = IMASK_PRIORITY - ffz(imask_mask);
-
+	set_bit(irq, imask_mask);
+	interrupt_priority = IMASK_PRIORITY -
+		find_first_zero_bit(imask_mask, IMASK_PRIORITY);
 	set_interrupt_registers(interrupt_priority);
 }
 
-static void mask_and_ack_imask(unsigned int irq)
-{
-	disable_imask_irq(irq);
-}
-
-static void end_imask_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		enable_imask_irq(irq);
-}
-
-static void shutdown_imask_irq(unsigned int irq)
-{
-	/* Nothing to do */
-}
+static struct irq_chip imask_irq_chip = {
+	.typename	= "SR.IMASK",
+	.mask		= mask_imask_irq,
+	.unmask		= unmask_imask_irq,
+	.mask_ack	= mask_imask_irq,
+};
 
 void make_imask_irq(unsigned int irq)
 {
-	disable_irq_nosync(irq);
-	irq_desc[irq].chip = &imask_irq_type;
-	enable_irq(irq);
+	set_irq_chip_and_handler_name(irq, &imask_irq_chip,
+				      handle_level_irq, "level");
 }
diff --git a/arch/sh/kernel/cpu/irq/intc-sh5.c b/arch/sh/kernel/cpu/irq/intc-sh5.c
index 726f033..6c092f1 100644
--- a/arch/sh/kernel/cpu/irq/intc-sh5.c
+++ b/arch/sh/kernel/cpu/irq/intc-sh5.c
@@ -84,7 +84,7 @@
 static void mask_and_ack_intc(unsigned int);
 static void end_intc_irq(unsigned int irq);
 
-static struct hw_interrupt_type intc_irq_type = {
+static struct irq_chip intc_irq_type = {
 	.typename = "INTC",
 	.startup = startup_intc_irq,
 	.shutdown = shutdown_intc_irq,
@@ -152,43 +152,13 @@
 	enable_intc_irq(irq);
 }
 
-/* For future use, if we ever support IRLM=0) */
-void make_intc_irq(unsigned int irq)
-{
-	disable_irq_nosync(irq);
-	irq_desc[irq].chip = &intc_irq_type;
-	disable_intc_irq(irq);
-}
-
-#if defined(CONFIG_PROC_FS) && defined(CONFIG_SYSCTL)
-static int IRQ_to_vectorN[NR_INTC_IRQS] = {
-	0x12, 0x15, 0x18, 0x1B, 0x40, 0x41, 0x42, 0x43, /*  0- 7 */
-	  -1,   -1,   -1,   -1, 0x50, 0x51, 0x52, 0x53,	/*  8-15 */
-	0x54, 0x55, 0x32, 0x33, 0x34, 0x35, 0x36,   -1, /* 16-23 */
-	  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1, /* 24-31 */
-	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x38,	/* 32-39 */
-        0x39, 0x3A, 0x3B,   -1,   -1,   -1,   -1,   -1, /* 40-47 */
-	  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1, /* 48-55 */
-	  -1,   -1,   -1,   -1,   -1,   -1,   -1, 0x2B, /* 56-63 */
-
-};
-
-int intc_irq_describe(char* p, int irq)
-{
-	if (irq < NR_INTC_IRQS)
-		return sprintf(p, "(0x%3x)", IRQ_to_vectorN[irq]*0x20);
-	else
-		return 0;
-}
-#endif
-
 void __init plat_irq_setup(void)
 {
 	unsigned long long __dummy0, __dummy1=~0x00000000100000f0;
 	unsigned long reg;
 	int i;
 
-	intc_virt = onchip_remap(INTC_BASE, 1024, "INTC");
+	intc_virt = (unsigned long)ioremap_nocache(INTC_BASE, 1024);
 	if (!intc_virt) {
 		panic("Unable to remap INTC\n");
 	}
@@ -196,7 +166,7 @@
 
 	/* Set default: per-line enable/disable, priority driven ack/eoi */
 	for (i = 0; i < NR_INTC_IRQS; i++)
-		irq_desc[i].chip = &intc_irq_type;
+		set_irq_chip_and_handler(i, &intc_irq_type, handle_level_irq);
 
 
 	/* Disable all interrupts and set all priorities to 0 to avoid trouble */
diff --git a/arch/sh/kernel/cpu/irq/ipr.c b/arch/sh/kernel/cpu/irq/ipr.c
index 3eb17ee..808d99a 100644
--- a/arch/sh/kernel/cpu/irq/ipr.c
+++ b/arch/sh/kernel/cpu/irq/ipr.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/interrupt.h>
+#include <linux/topology.h>
 
 static inline struct ipr_desc *get_ipr_desc(unsigned int irq)
 {
@@ -59,10 +60,18 @@
 
 	for (i = 0; i < desc->nr_irqs; i++) {
 		struct ipr_data *p = desc->ipr_data + i;
+		struct irq_desc *irq_desc;
 
 		BUG_ON(p->ipr_idx >= desc->nr_offsets);
 		BUG_ON(!desc->ipr_offsets[p->ipr_idx]);
 
+		irq_desc = irq_to_desc_alloc_node(p->irq, numa_node_id());
+		if (unlikely(!irq_desc)) {
+			printk(KERN_INFO "can not get irq_desc for %d\n",
+			       p->irq);
+			continue;
+		}
+
 		disable_irq_nosync(p->irq);
 		set_irq_chip_and_handler_name(p->irq, &desc->chip,
 				      handle_level_irq, "level");
diff --git a/arch/sh/kernel/cpu/sh2/clock-sh7619.c b/arch/sh/kernel/cpu/sh2/clock-sh7619.c
index d2c1579..4fe8631 100644
--- a/arch/sh/kernel/cpu/sh2/clock-sh7619.c
+++ b/arch/sh/kernel/cpu/sh2/clock-sh7619.c
@@ -38,32 +38,27 @@
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FREQCR) & 0x0007);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7619_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
-	clk->rate = clk->parent->rate / pll1rate[(ctrl_inw(FREQCR) >> 8) & 7];
+	return clk->parent->rate / pll1rate[(ctrl_inw(FREQCR) >> 8) & 7];
 }
 
 static struct clk_ops sh7619_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
-{
-	clk->rate = clk->parent->rate;
-}
-
 static struct clk_ops sh7619_cpu_clk_ops = {
-	.recalc		= cpu_clk_recalc,
+	.recalc		= followparent_recalc,
 };
 
 static struct clk_ops *sh7619_clk_ops[] = {
@@ -78,4 +73,3 @@
 	if (idx < ARRAY_SIZE(sh7619_clk_ops))
 		*ops = sh7619_clk_ops[idx];
 }
-
diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
index 0e32d8e..1379873 100644
--- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c
+++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
@@ -12,6 +12,8 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
+#include <linux/io.h>
 
 enum {
 	UNUSED = 0,
@@ -109,9 +111,75 @@
 	.resource = eth_resources,
 };
 
+static struct sh_timer_config cmt0_platform_data = {
+	.name = "CMT0",
+	.channel_offset = 0x02,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 125,
+	.clocksource_rating = 0, /* disabled due to code generation issues */
+};
+
+static struct resource cmt0_resources[] = {
+	[0] = {
+		.name	= "CMT0",
+		.start	= 0xf84a0072,
+		.end	= 0xf84a0077,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 86,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt0_device = {
+	.name		= "sh_cmt",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &cmt0_platform_data,
+	},
+	.resource	= cmt0_resources,
+	.num_resources	= ARRAY_SIZE(cmt0_resources),
+};
+
+static struct sh_timer_config cmt1_platform_data = {
+	.name = "CMT1",
+	.channel_offset = 0x08,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 125,
+	.clocksource_rating = 0, /* disabled due to code generation issues */
+};
+
+static struct resource cmt1_resources[] = {
+	[0] = {
+		.name	= "CMT1",
+		.start	= 0xf84a0078,
+		.end	= 0xf84a007d,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 87,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt1_device = {
+	.name		= "sh_cmt",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &cmt1_platform_data,
+	},
+	.resource	= cmt1_resources,
+	.num_resources	= ARRAY_SIZE(cmt1_resources),
+};
+
 static struct platform_device *sh7619_devices[] __initdata = {
 	&sci_device,
 	&eth_device,
+	&cmt0_device,
+	&cmt1_device,
 };
 
 static int __init sh7619_devices_setup(void)
@@ -125,3 +193,19 @@
 {
 	register_intc_controller(&intc_desc);
 }
+
+static struct platform_device *sh7619_early_devices[] __initdata = {
+	&cmt0_device,
+	&cmt1_device,
+};
+
+#define STBCR3 0xf80a0000
+
+void __init plat_early_device_setup(void)
+{
+	/* enable CMT clock */
+	__raw_writeb(__raw_readb(STBCR3) & ~0x10, STBCR3);
+
+	early_platform_add_devices(sh7619_early_devices,
+				   ARRAY_SIZE(sh7619_early_devices));
+}
diff --git a/arch/sh/kernel/cpu/sh2a/clock-sh7201.c b/arch/sh/kernel/cpu/sh2a/clock-sh7201.c
index 4a5e597..7814c76 100644
--- a/arch/sh/kernel/cpu/sh2a/clock-sh7201.c
+++ b/arch/sh/kernel/cpu/sh2a/clock-sh7201.c
@@ -34,37 +34,37 @@
 
 static void master_clk_init(struct clk *clk)
 {
-	clk->rate = 10000000 * PLL2 * pll1rate[(ctrl_inw(FREQCR) >> 8) & 0x0007];
+	return 10000000 * PLL2 * pll1rate[(ctrl_inw(FREQCR) >> 8) & 0x0007];
 }
 
 static struct clk_ops sh7201_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FREQCR) & 0x0007);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7201_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FREQCR) & 0x0007);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7201_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inw(FREQCR) >> 4) & 0x0007);
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7201_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh2a/clock-sh7203.c b/arch/sh/kernel/cpu/sh2a/clock-sh7203.c
index fb78132..9409869 100644
--- a/arch/sh/kernel/cpu/sh2a/clock-sh7203.c
+++ b/arch/sh/kernel/cpu/sh2a/clock-sh7203.c
@@ -46,33 +46,28 @@
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FREQCR) & 0x0007);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7203_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FREQCR) & 0x0007);
-	clk->rate = clk->parent->rate / pfc_divisors[idx-2];
+	return clk->parent->rate / pfc_divisors[idx-2];
 }
 
 static struct clk_ops sh7203_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
-{
-	clk->rate = clk->parent->rate;
-}
-
 static struct clk_ops sh7203_cpu_clk_ops = {
-	.recalc		= cpu_clk_recalc,
+	.recalc		= followparent_recalc,
 };
 
 static struct clk_ops *sh7203_clk_ops[] = {
diff --git a/arch/sh/kernel/cpu/sh2a/clock-sh7206.c b/arch/sh/kernel/cpu/sh2a/clock-sh7206.c
index 82d7f99..c2268bd 100644
--- a/arch/sh/kernel/cpu/sh2a/clock-sh7206.c
+++ b/arch/sh/kernel/cpu/sh2a/clock-sh7206.c
@@ -41,29 +41,29 @@
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FREQCR) & 0x0007);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7206_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
-	clk->rate = clk->parent->rate / pll1rate[(ctrl_inw(FREQCR) >> 8) & 0x0007];
+	return clk->parent->rate / pll1rate[(ctrl_inw(FREQCR) >> 8) & 0x0007];
 }
 
 static struct clk_ops sh7206_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FREQCR) & 0x0007);
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7206_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh2a/setup-mxg.c b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
index 8442937..869c2da 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-mxg.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
 
 enum {
 	UNUSED = 0,
@@ -24,7 +25,7 @@
 
 	SCIF0, SCIF1,
 
-	MTU2_GROUP1, MTU2_GROUP2, MTU2_GROUP3, MTU2_GROUP4, MTU2_GROUP5
+	MTU2_GROUP1, MTU2_GROUP2, MTU2_GROUP3, MTU2_GROUP4, MTU2_GROUP5,
 	MTU2_TGI3B, MTU2_TGI3C,
 
 	/* interrupt groups */
@@ -113,6 +114,99 @@
 static DECLARE_INTC_DESC(intc_desc, "mxg", vectors, groups,
 			 mask_registers, prio_registers, NULL);
 
+static struct sh_timer_config mtu2_0_platform_data = {
+	.name = "MTU2_0",
+	.channel_offset = -0x80,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_0_resources[] = {
+	[0] = {
+		.name	= "MTU2_0",
+		.start	= 0xff801300,
+		.end	= 0xff801326,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 228,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_0_device = {
+	.name		= "sh_mtu2",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &mtu2_0_platform_data,
+	},
+	.resource	= mtu2_0_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_0_resources),
+};
+
+static struct sh_timer_config mtu2_1_platform_data = {
+	.name = "MTU2_1",
+	.channel_offset = -0x100,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_1_resources[] = {
+	[0] = {
+		.name	= "MTU2_1",
+		.start	= 0xff801380,
+		.end	= 0xff801390,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 234,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_1_device = {
+	.name		= "sh_mtu2",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &mtu2_1_platform_data,
+	},
+	.resource	= mtu2_1_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_1_resources),
+};
+
+static struct sh_timer_config mtu2_2_platform_data = {
+	.name = "MTU2_2",
+	.channel_offset = 0x80,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_2_resources[] = {
+	[0] = {
+		.name	= "MTU2_2",
+		.start	= 0xff801000,
+		.end	= 0xff80100a,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 240,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_2_device = {
+	.name		= "sh_mtu2",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &mtu2_2_platform_data,
+	},
+	.resource	= mtu2_2_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_2_resources),
+};
+
 static struct plat_sci_port sci_platform_data[] = {
 	{
 		.mapbase	= 0xff804000,
@@ -134,6 +228,9 @@
 
 static struct platform_device *mxg_devices[] __initdata = {
 	&sci_device,
+	&mtu2_0_device,
+	&mtu2_1_device,
+	&mtu2_2_device,
 };
 
 static int __init mxg_devices_setup(void)
@@ -147,3 +244,15 @@
 {
 	register_intc_controller(&intc_desc);
 }
+
+static struct platform_device *mxg_early_devices[] __initdata = {
+	&mtu2_0_device,
+	&mtu2_1_device,
+	&mtu2_2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(mxg_early_devices,
+				   ARRAY_SIZE(mxg_early_devices));
+}
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
index 00f42f9..d8febe1 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
@@ -12,6 +12,8 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
+#include <linux/io.h>
 
 enum {
 	UNUSED = 0,
@@ -249,9 +251,105 @@
 	.resource	= rtc_resources,
 };
 
+static struct sh_timer_config mtu2_0_platform_data = {
+	.name = "MTU2_0",
+	.channel_offset = -0x80,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_0_resources[] = {
+	[0] = {
+		.name	= "MTU2_0",
+		.start	= 0xfffe4300,
+		.end	= 0xfffe4326,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 108,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_0_device = {
+	.name		= "sh_mtu2",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &mtu2_0_platform_data,
+	},
+	.resource	= mtu2_0_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_0_resources),
+};
+
+static struct sh_timer_config mtu2_1_platform_data = {
+	.name = "MTU2_1",
+	.channel_offset = -0x100,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_1_resources[] = {
+	[0] = {
+		.name	= "MTU2_1",
+		.start	= 0xfffe4380,
+		.end	= 0xfffe4390,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 116,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_1_device = {
+	.name		= "sh_mtu2",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &mtu2_1_platform_data,
+	},
+	.resource	= mtu2_1_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_1_resources),
+};
+
+static struct sh_timer_config mtu2_2_platform_data = {
+	.name = "MTU2_2",
+	.channel_offset = 0x80,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_2_resources[] = {
+	[0] = {
+		.name	= "MTU2_2",
+		.start	= 0xfffe4000,
+		.end	= 0xfffe400a,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 124,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_2_device = {
+	.name		= "sh_mtu2",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &mtu2_2_platform_data,
+	},
+	.resource	= mtu2_2_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_2_resources),
+};
+
 static struct platform_device *sh7201_devices[] __initdata = {
 	&sci_device,
 	&rtc_device,
+	&mtu2_0_device,
+	&mtu2_1_device,
+	&mtu2_2_device,
 };
 
 static int __init sh7201_devices_setup(void)
@@ -265,3 +363,20 @@
 {
 	register_intc_controller(&intc_desc);
 }
+
+static struct platform_device *sh7201_early_devices[] __initdata = {
+	&mtu2_0_device,
+	&mtu2_1_device,
+	&mtu2_2_device,
+};
+
+#define STBCR3 0xfffe0408
+
+void __init plat_early_device_setup(void)
+{
+	/* enable MTU2 clock */
+	__raw_writeb(__raw_readb(STBCR3) & ~0x20, STBCR3);
+
+	early_platform_add_devices(sh7201_early_devices,
+				   ARRAY_SIZE(sh7201_early_devices));
+}
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
index 820dfb2..62e3039 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
@@ -11,6 +11,8 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
+#include <linux/io.h>
 
 enum {
 	UNUSED = 0,
@@ -205,6 +207,132 @@
 	},
 };
 
+static struct sh_timer_config cmt0_platform_data = {
+	.name = "CMT0",
+	.channel_offset = 0x02,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 125,
+	.clocksource_rating = 0, /* disabled due to code generation issues */
+};
+
+static struct resource cmt0_resources[] = {
+	[0] = {
+		.name	= "CMT0",
+		.start	= 0xfffec002,
+		.end	= 0xfffec007,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 142,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt0_device = {
+	.name		= "sh_cmt",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &cmt0_platform_data,
+	},
+	.resource	= cmt0_resources,
+	.num_resources	= ARRAY_SIZE(cmt0_resources),
+};
+
+static struct sh_timer_config cmt1_platform_data = {
+	.name = "CMT1",
+	.channel_offset = 0x08,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 125,
+	.clocksource_rating = 0, /* disabled due to code generation issues */
+};
+
+static struct resource cmt1_resources[] = {
+	[0] = {
+		.name	= "CMT1",
+		.start	= 0xfffec008,
+		.end	= 0xfffec00d,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 143,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt1_device = {
+	.name		= "sh_cmt",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &cmt1_platform_data,
+	},
+	.resource	= cmt1_resources,
+	.num_resources	= ARRAY_SIZE(cmt1_resources),
+};
+
+static struct sh_timer_config mtu2_0_platform_data = {
+	.name = "MTU2_0",
+	.channel_offset = -0x80,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_0_resources[] = {
+	[0] = {
+		.name	= "MTU2_0",
+		.start	= 0xfffe4300,
+		.end	= 0xfffe4326,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 146,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_0_device = {
+	.name		= "sh_mtu2",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &mtu2_0_platform_data,
+	},
+	.resource	= mtu2_0_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_0_resources),
+};
+
+static struct sh_timer_config mtu2_1_platform_data = {
+	.name = "MTU2_1",
+	.channel_offset = -0x100,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_1_resources[] = {
+	[0] = {
+		.name	= "MTU2_1",
+		.start	= 0xfffe4380,
+		.end	= 0xfffe4390,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 153,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_1_device = {
+	.name		= "sh_mtu2",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &mtu2_1_platform_data,
+	},
+	.resource	= mtu2_1_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_1_resources),
+};
+
 static struct resource rtc_resources[] = {
 	[0] = {
 		.start	= 0xffff2000,
@@ -227,6 +355,10 @@
 
 static struct platform_device *sh7203_devices[] __initdata = {
 	&sci_device,
+	&cmt0_device,
+	&cmt1_device,
+	&mtu2_0_device,
+	&mtu2_1_device,
 	&rtc_device,
 };
 
@@ -241,3 +373,25 @@
 {
 	register_intc_controller(&intc_desc);
 }
+
+static struct platform_device *sh7203_early_devices[] __initdata = {
+	&cmt0_device,
+	&cmt1_device,
+	&mtu2_0_device,
+	&mtu2_1_device,
+};
+
+#define STBCR3 0xfffe0408
+#define STBCR4 0xfffe040c
+
+void __init plat_early_device_setup(void)
+{
+	/* enable CMT clock */
+	__raw_writeb(__raw_readb(STBCR4) & ~0x04, STBCR4);
+
+	/* enable MTU2 clock */
+	__raw_writeb(__raw_readb(STBCR3) & ~0x20, STBCR3);
+
+	early_platform_add_devices(sh7203_early_devices,
+				   ARRAY_SIZE(sh7203_early_devices));
+}
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
index c46a835..3e6f3d7 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
@@ -12,6 +12,8 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
+#include <linux/io.h>
 
 enum {
 	UNUSED = 0,
@@ -165,8 +167,170 @@
 	},
 };
 
+static struct sh_timer_config cmt0_platform_data = {
+	.name = "CMT0",
+	.channel_offset = 0x02,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 125,
+	.clocksource_rating = 0, /* disabled due to code generation issues */
+};
+
+static struct resource cmt0_resources[] = {
+	[0] = {
+		.name	= "CMT0",
+		.start	= 0xfffec002,
+		.end	= 0xfffec007,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 140,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt0_device = {
+	.name		= "sh_cmt",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &cmt0_platform_data,
+	},
+	.resource	= cmt0_resources,
+	.num_resources	= ARRAY_SIZE(cmt0_resources),
+};
+
+static struct sh_timer_config cmt1_platform_data = {
+	.name = "CMT1",
+	.channel_offset = 0x08,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 125,
+	.clocksource_rating = 0, /* disabled due to code generation issues */
+};
+
+static struct resource cmt1_resources[] = {
+	[0] = {
+		.name	= "CMT1",
+		.start	= 0xfffec008,
+		.end	= 0xfffec00d,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 144,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt1_device = {
+	.name		= "sh_cmt",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &cmt1_platform_data,
+	},
+	.resource	= cmt1_resources,
+	.num_resources	= ARRAY_SIZE(cmt1_resources),
+};
+
+static struct sh_timer_config mtu2_0_platform_data = {
+	.name = "MTU2_0",
+	.channel_offset = -0x80,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_0_resources[] = {
+	[0] = {
+		.name	= "MTU2_0",
+		.start	= 0xfffe4300,
+		.end	= 0xfffe4326,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 156,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_0_device = {
+	.name		= "sh_mtu2",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &mtu2_0_platform_data,
+	},
+	.resource	= mtu2_0_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_0_resources),
+};
+
+static struct sh_timer_config mtu2_1_platform_data = {
+	.name = "MTU2_1",
+	.channel_offset = -0x100,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_1_resources[] = {
+	[0] = {
+		.name	= "MTU2_1",
+		.start	= 0xfffe4380,
+		.end	= 0xfffe4390,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 164,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_1_device = {
+	.name		= "sh_mtu2",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &mtu2_1_platform_data,
+	},
+	.resource	= mtu2_1_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_1_resources),
+};
+
+static struct sh_timer_config mtu2_2_platform_data = {
+	.name = "MTU2_2",
+	.channel_offset = 0x80,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_2_resources[] = {
+	[0] = {
+		.name	= "MTU2_2",
+		.start	= 0xfffe4000,
+		.end	= 0xfffe400a,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 180,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_2_device = {
+	.name		= "sh_mtu2",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &mtu2_2_platform_data,
+	},
+	.resource	= mtu2_2_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_2_resources),
+};
+
 static struct platform_device *sh7206_devices[] __initdata = {
 	&sci_device,
+	&cmt0_device,
+	&cmt1_device,
+	&mtu2_0_device,
+	&mtu2_1_device,
+	&mtu2_2_device,
 };
 
 static int __init sh7206_devices_setup(void)
@@ -180,3 +344,26 @@
 {
 	register_intc_controller(&intc_desc);
 }
+
+static struct platform_device *sh7206_early_devices[] __initdata = {
+	&cmt0_device,
+	&cmt1_device,
+	&mtu2_0_device,
+	&mtu2_1_device,
+	&mtu2_2_device,
+};
+
+#define STBCR3 0xfffe0408
+#define STBCR4 0xfffe040c
+
+void __init plat_early_device_setup(void)
+{
+	/* enable CMT clock */
+	__raw_writeb(__raw_readb(STBCR4) & ~0x04, STBCR4);
+
+	/* enable MTU2 clock */
+	__raw_writeb(__raw_readb(STBCR3) & ~0x20, STBCR3);
+
+	early_platform_add_devices(sh7206_early_devices,
+				   ARRAY_SIZE(sh7206_early_devices));
+}
diff --git a/arch/sh/kernel/cpu/sh3/clock-sh3.c b/arch/sh/kernel/cpu/sh3/clock-sh3.c
index c3c9459..27b8738 100644
--- a/arch/sh/kernel/cpu/sh3/clock-sh3.c
+++ b/arch/sh/kernel/cpu/sh3/clock-sh3.c
@@ -38,36 +38,36 @@
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = ((frqcr & 0x2000) >> 11) | (frqcr & 0x0003);
 
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh3_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = ((frqcr & 0x8000) >> 13) | ((frqcr & 0x0030) >> 4);
 
-	clk->rate = clk->parent->rate / stc_multipliers[idx];
+	return clk->parent->rate / stc_multipliers[idx];
 }
 
 static struct clk_ops sh3_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = ((frqcr & 0x4000) >> 12) | ((frqcr & 0x000c) >> 2);
 
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh3_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh3/clock-sh7705.c b/arch/sh/kernel/cpu/sh3/clock-sh7705.c
index dfdbf32..0ca8f2c 100644
--- a/arch/sh/kernel/cpu/sh3/clock-sh7705.c
+++ b/arch/sh/kernel/cpu/sh3/clock-sh7705.c
@@ -39,30 +39,30 @@
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = ctrl_inw(FRQCR) & 0x0003;
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7705_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FRQCR) & 0x0300) >> 8;
-	clk->rate = clk->parent->rate / stc_multipliers[idx];
+	return clk->parent->rate / stc_multipliers[idx];
 }
 
 static struct clk_ops sh7705_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FRQCR) & 0x0030) >> 4;
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7705_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh3/clock-sh7706.c b/arch/sh/kernel/cpu/sh3/clock-sh7706.c
index 0cf96f9..4bf7887 100644
--- a/arch/sh/kernel/cpu/sh3/clock-sh7706.c
+++ b/arch/sh/kernel/cpu/sh3/clock-sh7706.c
@@ -34,36 +34,36 @@
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = ((frqcr & 0x2000) >> 11) | (frqcr & 0x0003);
 
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7706_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = ((frqcr & 0x8000) >> 13) | ((frqcr & 0x0030) >> 4);
 
-	clk->rate = clk->parent->rate / stc_multipliers[idx];
+	return clk->parent->rate / stc_multipliers[idx];
 }
 
 static struct clk_ops sh7706_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = ((frqcr & 0x4000) >> 12) | ((frqcr & 0x000c) >> 2);
 
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7706_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh3/clock-sh7709.c b/arch/sh/kernel/cpu/sh3/clock-sh7709.c
index b791a29..fa30b60 100644
--- a/arch/sh/kernel/cpu/sh3/clock-sh7709.c
+++ b/arch/sh/kernel/cpu/sh3/clock-sh7709.c
@@ -41,12 +41,12 @@
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = ((frqcr & 0x2000) >> 11) | (frqcr & 0x0003);
 
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7709_module_clk_ops = {
@@ -56,25 +56,25 @@
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = (frqcr & 0x0080) ?
 		((frqcr & 0x8000) >> 13) | ((frqcr & 0x0030) >> 4) : 1;
 
-	clk->rate = clk->parent->rate * stc_multipliers[idx];
+	return clk->parent->rate * stc_multipliers[idx];
 }
 
 static struct clk_ops sh7709_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = ((frqcr & 0x4000) >> 12) | ((frqcr & 0x000c) >> 2);
 
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7709_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh3/clock-sh7710.c b/arch/sh/kernel/cpu/sh3/clock-sh7710.c
index 4744c50..030a58b 100644
--- a/arch/sh/kernel/cpu/sh3/clock-sh7710.c
+++ b/arch/sh/kernel/cpu/sh3/clock-sh7710.c
@@ -33,30 +33,30 @@
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FRQCR) & 0x0007);
-	clk->rate = clk->parent->rate / md_table[idx];
+	return clk->parent->rate / md_table[idx];
 }
 
 static struct clk_ops sh7710_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FRQCR) & 0x0700) >> 8;
-	clk->rate = clk->parent->rate / md_table[idx];
+	return clk->parent->rate / md_table[idx];
 }
 
 static struct clk_ops sh7710_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FRQCR) & 0x0070) >> 4;
-	clk->rate = clk->parent->rate / md_table[idx];
+	return clk->parent->rate / md_table[idx];
 }
 
 static struct clk_ops sh7710_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh3/clock-sh7712.c b/arch/sh/kernel/cpu/sh3/clock-sh7712.c
index 54f54df..6428ee6c 100644
--- a/arch/sh/kernel/cpu/sh3/clock-sh7712.c
+++ b/arch/sh/kernel/cpu/sh3/clock-sh7712.c
@@ -33,24 +33,24 @@
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = frqcr & 0x0007;
 
-	clk->rate = clk->parent->rate / divisors[idx];
+	return clk->parent->rate / divisors[idx];
 }
 
 static struct clk_ops sh7712_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = (frqcr & 0x0030) >> 4;
 
-	clk->rate = clk->parent->rate / divisors[idx];
+	return clk->parent->rate / divisors[idx];
 }
 
 static struct clk_ops sh7712_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7705.c b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
index 63b67ba..88f742f 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7705.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
@@ -13,6 +13,7 @@
 #include <linux/irq.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
 #include <asm/rtc.h>
 
 enum {
@@ -116,7 +117,102 @@
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x02,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xfffffe94,
+		.end	= 0xfffffe9f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0xe,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xfffffea0,
+		.end	= 0xfffffeab,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1a,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xfffffeac,
+		.end	= 0xfffffebb,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
 static struct platform_device *sh7705_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 	&sci_device,
 	&rtc_device,
 };
@@ -128,6 +224,18 @@
 }
 __initcall(sh7705_devices_setup);
 
+static struct platform_device *sh7705_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7705_early_devices,
+				   ARRAY_SIZE(sh7705_early_devices));
+}
+
 void __init plat_irq_setup(void)
 {
 	register_intc_controller(&intc_desc);
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh770x.c b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
index a74f960..c563067 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh770x.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
@@ -18,6 +18,7 @@
 #include <linux/platform_device.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
 
 enum {
 	UNUSED = 0,
@@ -144,7 +145,102 @@
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x02,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xfffffe94,
+		.end	= 0xfffffe9f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0xe,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xfffffea0,
+		.end	= 0xfffffeab,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1a,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xfffffeac,
+		.end	= 0xfffffebb,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
 static struct platform_device *sh770x_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 	&sci_device,
 	&rtc_device,
 };
@@ -156,6 +252,18 @@
 }
 __initcall(sh770x_devices_setup);
 
+static struct platform_device *sh770x_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh770x_early_devices,
+				   ARRAY_SIZE(sh770x_early_devices));
+}
+
 void __init plat_irq_setup(void)
 {
 	register_intc_controller(&intc_desc);
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7710.c b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
index 335098b..efa76c8 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7710.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
@@ -13,6 +13,7 @@
 #include <linux/irq.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
 #include <asm/rtc.h>
 
 enum {
@@ -120,7 +121,102 @@
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x02,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xa412fe94,
+		.end	= 0xa412fe9f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0xe,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xa412fea0,
+		.end	= 0xa412feab,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1a,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xa412feac,
+		.end	= 0xa412feb5,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
 static struct platform_device *sh7710_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 	&sci_device,
 	&rtc_device,
 };
@@ -132,6 +228,18 @@
 }
 __initcall(sh7710_devices_setup);
 
+static struct platform_device *sh7710_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7710_early_devices,
+				   ARRAY_SIZE(sh7710_early_devices));
+}
+
 void __init plat_irq_setup(void)
 {
 	register_intc_controller(&intc_desc);
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7720.c b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
index 003874a..5b21077 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7720.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
@@ -18,6 +18,7 @@
 #include <linux/serial.h>
 #include <linux/io.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
 #include <asm/rtc.h>
 
 static struct resource rtc_resources[] = {
@@ -123,7 +124,259 @@
 	.resource	= usbf_resources,
 };
 
+static struct sh_timer_config cmt0_platform_data = {
+	.name = "CMT0",
+	.channel_offset = 0x10,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 125,
+	.clocksource_rating = 125,
+};
+
+static struct resource cmt0_resources[] = {
+	[0] = {
+		.name	= "CMT0",
+		.start	= 0x044a0010,
+		.end	= 0x044a001b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 104,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt0_device = {
+	.name		= "sh_cmt",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &cmt0_platform_data,
+	},
+	.resource	= cmt0_resources,
+	.num_resources	= ARRAY_SIZE(cmt0_resources),
+};
+
+static struct sh_timer_config cmt1_platform_data = {
+	.name = "CMT1",
+	.channel_offset = 0x20,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+};
+
+static struct resource cmt1_resources[] = {
+	[0] = {
+		.name	= "CMT1",
+		.start	= 0x044a0020,
+		.end	= 0x044a002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 104,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt1_device = {
+	.name		= "sh_cmt",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &cmt1_platform_data,
+	},
+	.resource	= cmt1_resources,
+	.num_resources	= ARRAY_SIZE(cmt1_resources),
+};
+
+static struct sh_timer_config cmt2_platform_data = {
+	.name = "CMT2",
+	.channel_offset = 0x30,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource cmt2_resources[] = {
+	[0] = {
+		.name	= "CMT2",
+		.start	= 0x044a0030,
+		.end	= 0x044a003b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 104,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt2_device = {
+	.name		= "sh_cmt",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &cmt2_platform_data,
+	},
+	.resource	= cmt2_resources,
+	.num_resources	= ARRAY_SIZE(cmt2_resources),
+};
+
+static struct sh_timer_config cmt3_platform_data = {
+	.name = "CMT3",
+	.channel_offset = 0x40,
+	.timer_bit = 3,
+	.clk = "peripheral_clk",
+};
+
+static struct resource cmt3_resources[] = {
+	[0] = {
+		.name	= "CMT3",
+		.start	= 0x044a0040,
+		.end	= 0x044a004b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 104,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt3_device = {
+	.name		= "sh_cmt",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &cmt3_platform_data,
+	},
+	.resource	= cmt3_resources,
+	.num_resources	= ARRAY_SIZE(cmt3_resources),
+};
+
+static struct sh_timer_config cmt4_platform_data = {
+	.name = "CMT4",
+	.channel_offset = 0x50,
+	.timer_bit = 4,
+	.clk = "peripheral_clk",
+};
+
+static struct resource cmt4_resources[] = {
+	[0] = {
+		.name	= "CMT4",
+		.start	= 0x044a0050,
+		.end	= 0x044a005b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 104,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt4_device = {
+	.name		= "sh_cmt",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &cmt4_platform_data,
+	},
+	.resource	= cmt4_resources,
+	.num_resources	= ARRAY_SIZE(cmt4_resources),
+};
+
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x02,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xa412fe94,
+		.end	= 0xa412fe9f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0xe,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xa412fea0,
+		.end	= 0xa412feab,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1a,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xa412feac,
+		.end	= 0xa412feb5,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
 static struct platform_device *sh7720_devices[] __initdata = {
+	&cmt0_device,
+	&cmt1_device,
+	&cmt2_device,
+	&cmt3_device,
+	&cmt4_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 	&rtc_device,
 	&sci_device,
 	&usb_ohci_device,
@@ -137,6 +390,23 @@
 }
 __initcall(sh7720_devices_setup);
 
+static struct platform_device *sh7720_early_devices[] __initdata = {
+	&cmt0_device,
+	&cmt1_device,
+	&cmt2_device,
+	&cmt3_device,
+	&cmt4_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7720_early_devices,
+				   ARRAY_SIZE(sh7720_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
diff --git a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
index a334294..21421e3 100644
--- a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
@@ -21,10 +21,10 @@
 static int frqcr3_divisors[] = { 1, 2, 3, 4, 6, 8, 16 };
 static int frqcr3_values[]   = { 0, 1, 2, 3, 4, 5, 6  };
 
-static void emi_clk_recalc(struct clk *clk)
+static unsigned long emi_clk_recalc(struct clk *clk)
 {
 	int idx = ctrl_inl(CPG2_FRQCR3) & 0x0007;
-	clk->rate = clk->parent->rate / frqcr3_divisors[idx];
+	return clk->parent->rate / frqcr3_divisors[idx];
 }
 
 static inline int frqcr3_lookup(struct clk *clk, unsigned long rate)
@@ -46,14 +46,14 @@
 
 static struct clk sh4202_emi_clk = {
 	.name		= "emi_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh4202_emi_clk_ops,
 };
 
-static void femi_clk_recalc(struct clk *clk)
+static unsigned long femi_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inl(CPG2_FRQCR3) >> 3) & 0x0007;
-	clk->rate = clk->parent->rate / frqcr3_divisors[idx];
+	return clk->parent->rate / frqcr3_divisors[idx];
 }
 
 static struct clk_ops sh4202_femi_clk_ops = {
@@ -62,7 +62,7 @@
 
 static struct clk sh4202_femi_clk = {
 	.name		= "femi_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh4202_femi_clk_ops,
 };
 
@@ -90,10 +90,10 @@
 	WARN_ON(i == ARRAY_SIZE(frqcr3_divisors));	/* Undefined clock */
 }
 
-static void shoc_clk_recalc(struct clk *clk)
+static unsigned long shoc_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inl(CPG2_FRQCR3) >> 6) & 0x0007;
-	clk->rate = clk->parent->rate / frqcr3_divisors[idx];
+	return clk->parent->rate / frqcr3_divisors[idx];
 }
 
 static int shoc_clk_verify_rate(struct clk *clk, unsigned long rate)
@@ -140,7 +140,7 @@
 
 static struct clk sh4202_shoc_clk = {
 	.name		= "shoc_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh4202_shoc_clk_ops,
 };
 
@@ -150,31 +150,22 @@
 	&sh4202_shoc_clk,
 };
 
-static int __init sh4202_clk_init(void)
+int __init arch_clk_init(void)
 {
-	struct clk *clk = clk_get(NULL, "master_clk");
-	int i;
+	struct clk *clk;
+	int i, ret = 0;
 
+	cpg_clk_init();
+
+	clk = clk_get(NULL, "master_clk");
 	for (i = 0; i < ARRAY_SIZE(sh4202_onchip_clocks); i++) {
 		struct clk *clkp = sh4202_onchip_clocks[i];
 
 		clkp->parent = clk;
-		clk_register(clkp);
-		clk_enable(clkp);
+		ret |= clk_register(clkp);
 	}
 
-	/*
-	 * Now that we have the rest of the clocks registered, we need to
-	 * force the parent clock to propagate so that these clocks will
-	 * automatically figure out their rate. We cheat by handing the
-	 * parent clock its current rate and forcing child propagation.
-	 */
-	clk_set_rate(clk, clk_get_rate(clk));
-
 	clk_put(clk);
 
-	return 0;
+	return ret;
 }
-
-arch_initcall(sh4202_clk_init);
-
diff --git a/arch/sh/kernel/cpu/sh4/clock-sh4.c b/arch/sh/kernel/cpu/sh4/clock-sh4.c
index dca9f87..73294d9 100644
--- a/arch/sh/kernel/cpu/sh4/clock-sh4.c
+++ b/arch/sh/kernel/cpu/sh4/clock-sh4.c
@@ -35,30 +35,30 @@
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FRQCR) & 0x0007);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh4_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FRQCR) >> 3) & 0x0007;
-	clk->rate = clk->parent->rate / bfc_divisors[idx];
+	return clk->parent->rate / bfc_divisors[idx];
 }
 
 static struct clk_ops sh4_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FRQCR) >> 6) & 0x0007;
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh4_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh4/probe.c b/arch/sh/kernel/cpu/sh4/probe.c
index 91e3677..6c78d0a 100644
--- a/arch/sh/kernel/cpu/sh4/probe.c
+++ b/arch/sh/kernel/cpu/sh4/probe.c
@@ -60,12 +60,18 @@
 		if ((cvr & 0x10000000) == 0)
 			boot_cpu_data.flags |= CPU_HAS_DSP;
 
-		boot_cpu_data.flags |= CPU_HAS_LLSC;
+		boot_cpu_data.flags |= CPU_HAS_LLSC | CPU_HAS_PERF_COUNTER;
 		boot_cpu_data.cut_major = pvr & 0x7f;
+
+		boot_cpu_data.icache.ways = 4;
+		boot_cpu_data.dcache.ways = 4;
+	} else {
+		/* And some SH-4 defaults.. */
+		boot_cpu_data.flags |= CPU_HAS_PTEA;
 	}
 
 	/* FPU detection works for everyone */
-	if ((cvr & 0x20000000) == 1)
+	if ((cvr & 0x20000000))
 		boot_cpu_data.flags |= CPU_HAS_FPU;
 
 	/* Mask off the upper chip ID */
@@ -78,25 +84,20 @@
 	switch (pvr) {
 	case 0x205:
 		boot_cpu_data.type = CPU_SH7750;
-		boot_cpu_data.flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU |
-				   CPU_HAS_PERF_COUNTER;
+		boot_cpu_data.flags |= CPU_HAS_P2_FLUSH_BUG |
+				       CPU_HAS_PERF_COUNTER;
 		break;
 	case 0x206:
 		boot_cpu_data.type = CPU_SH7750S;
-		boot_cpu_data.flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU |
-				   CPU_HAS_PERF_COUNTER;
+		boot_cpu_data.flags |= CPU_HAS_P2_FLUSH_BUG |
+				       CPU_HAS_PERF_COUNTER;
 		break;
 	case 0x1100:
 		boot_cpu_data.type = CPU_SH7751;
-		boot_cpu_data.flags |= CPU_HAS_FPU;
 		break;
 	case 0x2001:
 	case 0x2004:
 		boot_cpu_data.type = CPU_SH7770;
-		boot_cpu_data.icache.ways = 4;
-		boot_cpu_data.dcache.ways = 4;
-
-		boot_cpu_data.flags |= CPU_HAS_FPU | CPU_HAS_LLSC;
 		break;
 	case 0x2006:
 	case 0x200A:
@@ -107,45 +108,26 @@
 		else
 			boot_cpu_data.type = CPU_SH7780;
 
-		boot_cpu_data.icache.ways = 4;
-		boot_cpu_data.dcache.ways = 4;
-
-		boot_cpu_data.flags |= CPU_HAS_FPU | CPU_HAS_PERF_COUNTER |
-				   CPU_HAS_LLSC;
 		break;
 	case 0x3000:
 	case 0x3003:
 	case 0x3009:
 		boot_cpu_data.type = CPU_SH7343;
-		boot_cpu_data.icache.ways = 4;
-		boot_cpu_data.dcache.ways = 4;
-		boot_cpu_data.flags |= CPU_HAS_LLSC;
 		break;
 	case 0x3004:
 	case 0x3007:
 		boot_cpu_data.type = CPU_SH7785;
-		boot_cpu_data.icache.ways = 4;
-		boot_cpu_data.dcache.ways = 4;
-		boot_cpu_data.flags |= CPU_HAS_FPU | CPU_HAS_PERF_COUNTER |
-					  CPU_HAS_LLSC;
 		break;
 	case 0x4004:
 		boot_cpu_data.type = CPU_SH7786;
-		boot_cpu_data.icache.ways = 4;
-		boot_cpu_data.dcache.ways = 4;
-		boot_cpu_data.flags |= CPU_HAS_FPU | CPU_HAS_PERF_COUNTER |
-			CPU_HAS_LLSC | CPU_HAS_PTEAEX;
+		boot_cpu_data.flags |= CPU_HAS_PTEAEX | CPU_HAS_L2_CACHE;
 		break;
 	case 0x3008:
-		boot_cpu_data.icache.ways = 4;
-		boot_cpu_data.dcache.ways = 4;
-		boot_cpu_data.flags |= CPU_HAS_LLSC;
-
 		switch (prr) {
 		case 0x50:
 		case 0x51:
 			boot_cpu_data.type = CPU_SH7723;
-			boot_cpu_data.flags |= CPU_HAS_FPU | CPU_HAS_L2_CACHE;
+			boot_cpu_data.flags |= CPU_HAS_L2_CACHE;
 			break;
 		case 0x70:
 			boot_cpu_data.type = CPU_SH7366;
@@ -156,13 +138,13 @@
 			break;
 		}
 		break;
+	case 0x300b:
+		boot_cpu_data.type = CPU_SH7724;
+		boot_cpu_data.flags |= CPU_HAS_L2_CACHE;
+		break;
 	case 0x4000:	/* 1st cut */
 	case 0x4001:	/* 2nd cut */
 		boot_cpu_data.type = CPU_SHX3;
-		boot_cpu_data.icache.ways = 4;
-		boot_cpu_data.dcache.ways = 4;
-		boot_cpu_data.flags |= CPU_HAS_FPU | CPU_HAS_PERF_COUNTER |
-					  CPU_HAS_LLSC;
 		break;
 	case 0x700:
 		boot_cpu_data.type = CPU_SH4_501;
@@ -173,7 +155,6 @@
 		boot_cpu_data.type = CPU_SH4_202;
 		boot_cpu_data.icache.ways = 2;
 		boot_cpu_data.dcache.ways = 2;
-		boot_cpu_data.flags |= CPU_HAS_FPU;
 		break;
 	case 0x500 ... 0x501:
 		switch (prr) {
@@ -191,18 +172,12 @@
 		boot_cpu_data.icache.ways = 2;
 		boot_cpu_data.dcache.ways = 2;
 
-		boot_cpu_data.flags |= CPU_HAS_FPU;
-
 		break;
 	default:
 		boot_cpu_data.type = CPU_SH_NONE;
 		break;
 	}
 
-#ifdef CONFIG_CPU_HAS_PTEA
-	boot_cpu_data.flags |= CPU_HAS_PTEA;
-#endif
-
 	/*
 	 * On anything that's not a direct-mapped cache, look to the CVR
 	 * for I/D-cache specifics.
@@ -222,43 +197,48 @@
 	}
 
 	/*
-	 * Setup the L2 cache desc
-	 *
 	 * SH-4A's have an optional PIPT L2.
 	 */
 	if (boot_cpu_data.flags & CPU_HAS_L2_CACHE) {
-		/* Bug if we can't decode the L2 info */
-		BUG_ON(!(cvr & 0xf));
-
-		/* Silicon and specifications have clearly never met.. */
-		cvr ^= 0xf;
-
 		/*
-		 * Size calculation is much more sensible
-		 * than it is for the L1.
-		 *
-		 * Sizes are 128KB, 258KB, 512KB, and 1MB.
+		 * Verify that it really has something hooked up, this
+		 * is the safety net for CPUs that have optional L2
+		 * support yet do not implement it.
 		 */
-		size = (cvr & 0xf) << 17;
+		if ((cvr & 0xf) == 0)
+			boot_cpu_data.flags &= ~CPU_HAS_L2_CACHE;
+		else {
+			/*
+			 * Silicon and specifications have clearly never
+			 * met..
+			 */
+			cvr ^= 0xf;
 
-		BUG_ON(!size);
+			/*
+			 * Size calculation is much more sensible
+			 * than it is for the L1.
+			 *
+			 * Sizes are 128KB, 258KB, 512KB, and 1MB.
+			 */
+			size = (cvr & 0xf) << 17;
 
-		boot_cpu_data.scache.way_incr		= (1 << 16);
-		boot_cpu_data.scache.entry_shift	= 5;
-		boot_cpu_data.scache.ways		= 4;
-		boot_cpu_data.scache.linesz		= L1_CACHE_BYTES;
+			boot_cpu_data.scache.way_incr		= (1 << 16);
+			boot_cpu_data.scache.entry_shift	= 5;
+			boot_cpu_data.scache.ways		= 4;
+			boot_cpu_data.scache.linesz		= L1_CACHE_BYTES;
 
-		boot_cpu_data.scache.entry_mask	=
-			(boot_cpu_data.scache.way_incr -
-			 boot_cpu_data.scache.linesz);
+			boot_cpu_data.scache.entry_mask	=
+				(boot_cpu_data.scache.way_incr -
+				 boot_cpu_data.scache.linesz);
 
-		boot_cpu_data.scache.sets	= size /
-			(boot_cpu_data.scache.linesz *
-			 boot_cpu_data.scache.ways);
+			boot_cpu_data.scache.sets	= size /
+				(boot_cpu_data.scache.linesz *
+				 boot_cpu_data.scache.ways);
 
-		boot_cpu_data.scache.way_size	=
-			(boot_cpu_data.scache.sets *
-			 boot_cpu_data.scache.linesz);
+			boot_cpu_data.scache.way_size	=
+				(boot_cpu_data.scache.sets *
+				 boot_cpu_data.scache.linesz);
+		}
 	}
 
 	return 0;
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
index 7371abf..6d088d1 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
@@ -2,6 +2,7 @@
  * SH4-202 Setup
  *
  *  Copyright (C) 2006  Paul Mundt
+ *  Copyright (C) 2009  Magnus Damm
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -11,6 +12,8 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
+#include <linux/io.h>
 
 static struct plat_sci_port sci_platform_data[] = {
 	{
@@ -31,8 +34,103 @@
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
 static struct platform_device *sh4202_devices[] __initdata = {
 	&sci_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 };
 
 static int __init sh4202_devices_setup(void)
@@ -42,7 +140,71 @@
 }
 __initcall(sh4202_devices_setup);
 
+static struct platform_device *sh4202_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh4202_early_devices,
+				   ARRAY_SIZE(sh4202_early_devices));
+}
+
+enum {
+	UNUSED = 0,
+
+	/* interrupt sources */
+	IRL0, IRL1, IRL2, IRL3, /* only IRLM mode supported */
+	HUDI, TMU0, TMU1, TMU2, RTC, SCIF, WDT,
+};
+
+static struct intc_vect vectors[] __initdata = {
+	INTC_VECT(HUDI, 0x600),
+	INTC_VECT(TMU0, 0x400), INTC_VECT(TMU1, 0x420),
+	INTC_VECT(TMU2, 0x440), INTC_VECT(TMU2, 0x460),
+	INTC_VECT(RTC, 0x480), INTC_VECT(RTC, 0x4a0),
+	INTC_VECT(RTC, 0x4c0),
+	INTC_VECT(SCIF, 0x700), INTC_VECT(SCIF, 0x720),
+	INTC_VECT(SCIF, 0x740), INTC_VECT(SCIF, 0x760),
+	INTC_VECT(WDT, 0x560),
+};
+
+static struct intc_prio_reg prio_registers[] __initdata = {
+	{ 0xffd00004, 0, 16, 4, /* IPRA */ { TMU0, TMU1, TMU2, RTC } },
+	{ 0xffd00008, 0, 16, 4, /* IPRB */ { WDT, 0, 0, 0 } },
+	{ 0xffd0000c, 0, 16, 4, /* IPRC */ { 0, 0, SCIF, HUDI } },
+	{ 0xffd00010, 0, 16, 4, /* IPRD */ { IRL0, IRL1, IRL2, IRL3 } },
+};
+
+static DECLARE_INTC_DESC(intc_desc, "sh4-202", vectors, NULL,
+			 NULL, prio_registers, NULL);
+
+static struct intc_vect vectors_irlm[] __initdata = {
+	INTC_VECT(IRL0, 0x240), INTC_VECT(IRL1, 0x2a0),
+	INTC_VECT(IRL2, 0x300), INTC_VECT(IRL3, 0x360),
+};
+
+static DECLARE_INTC_DESC(intc_desc_irlm, "sh4-202_irlm", vectors_irlm, NULL,
+			 NULL, prio_registers, NULL);
+
 void __init plat_irq_setup(void)
 {
-	/* do nothing - all IRL interrupts are handled by the board code */
+	register_intc_controller(&intc_desc);
+}
+
+#define INTC_ICR	0xffd00000UL
+#define INTC_ICR_IRLM   (1<<7)
+
+void __init plat_irq_setup_pins(int mode)
+{
+	switch (mode) {
+	case IRQ_MODE_IRQ: /* individual interrupt mode for IRL3-0 */
+		ctrl_outw(ctrl_inw(INTC_ICR) | INTC_ICR_IRLM, INTC_ICR);
+		register_intc_controller(&intc_desc_irlm);
+		break;
+	default:
+		BUG();
+	}
 }
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7750.c b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
index a1c80d9..851672d 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7750.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/io.h>
+#include <linux/sh_timer.h>
 #include <linux/serial_sci.h>
 
 static struct resource rtc_resources[] = {
@@ -60,9 +61,177 @@
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+/* SH7750R, SH7751 and SH7751R all have two extra timer channels */
+#if defined(CONFIG_CPU_SUBTYPE_SH7750R) || \
+	defined(CONFIG_CPU_SUBTYPE_SH7751) || \
+	defined(CONFIG_CPU_SUBTYPE_SH7751R)
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xfe100008,
+		.end	= 0xfe100013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 72,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xfe100014,
+		.end	= 0xfe10001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 76,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+#endif
+
 static struct platform_device *sh7750_devices[] __initdata = {
 	&rtc_device,
 	&sci_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+#if defined(CONFIG_CPU_SUBTYPE_SH7750R) || \
+	defined(CONFIG_CPU_SUBTYPE_SH7751) || \
+	defined(CONFIG_CPU_SUBTYPE_SH7751R)
+	&tmu3_device,
+	&tmu4_device,
+#endif
 };
 
 static int __init sh7750_devices_setup(void)
@@ -72,6 +241,24 @@
 }
 __initcall(sh7750_devices_setup);
 
+static struct platform_device *sh7750_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+#if defined(CONFIG_CPU_SUBTYPE_SH7750R) || \
+	defined(CONFIG_CPU_SUBTYPE_SH7751) || \
+	defined(CONFIG_CPU_SUBTYPE_SH7751R)
+	&tmu3_device,
+	&tmu4_device,
+#endif
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7750_early_devices,
+				   ARRAY_SIZE(sh7750_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
index d9bdc93..5b82251 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7760.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
@@ -10,6 +10,7 @@
 #include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/serial.h>
+#include <linux/sh_timer.h>
 #include <linux/serial_sci.h>
 #include <linux/io.h>
 
@@ -18,10 +19,7 @@
 
 	/* interrupt sources */
 	IRL0, IRL1, IRL2, IRL3,
-	HUDI, GPIOI,
-	DMAC_DMTE0, DMAC_DMTE1, DMAC_DMTE2, DMAC_DMTE3,
-	DMAC_DMTE4, DMAC_DMTE5, DMAC_DMTE6, DMAC_DMTE7,
-	DMAC_DMAE,
+	HUDI, GPIOI, DMAC,
 	IRQ4, IRQ5, IRQ6, IRQ7,
 	HCAN20, HCAN21,
 	SSI0, SSI1,
@@ -36,21 +34,20 @@
 	HSPI,
 	MMCIF0, MMCIF1, MMCIF2, MMCIF3,
 	MFI, ADC, CMT,
-	TMU0, TMU1, TMU2_TUNI, TMU2_TICPI,
-	WDT,
-	REF_RCMI, REF_ROVI,
+	TMU0, TMU1, TMU2,
+	WDT, REF,
 
 	/* interrupt groups */
-	DMAC, DMABRG, SCIF0, SCIF1, SCIF2, SIM, MMCIF, TMU2, REF,
+	DMABRG, SCIF0, SCIF1, SCIF2, SIM, MMCIF,
 };
 
 static struct intc_vect vectors[] __initdata = {
 	INTC_VECT(HUDI, 0x600), INTC_VECT(GPIOI, 0x620),
-	INTC_VECT(DMAC_DMTE0, 0x640), INTC_VECT(DMAC_DMTE1, 0x660),
-	INTC_VECT(DMAC_DMTE2, 0x680), INTC_VECT(DMAC_DMTE3, 0x6a0),
-	INTC_VECT(DMAC_DMTE4, 0x780), INTC_VECT(DMAC_DMTE5, 0x7a0),
-	INTC_VECT(DMAC_DMTE6, 0x7c0), INTC_VECT(DMAC_DMTE7, 0x7e0),
-	INTC_VECT(DMAC_DMAE, 0x6c0),
+	INTC_VECT(DMAC, 0x640), INTC_VECT(DMAC, 0x660),
+	INTC_VECT(DMAC, 0x680), INTC_VECT(DMAC, 0x6a0),
+	INTC_VECT(DMAC, 0x780), INTC_VECT(DMAC, 0x7a0),
+	INTC_VECT(DMAC, 0x7c0), INTC_VECT(DMAC, 0x7e0),
+	INTC_VECT(DMAC, 0x6c0),
 	INTC_VECT(IRQ4, 0x800), INTC_VECT(IRQ5, 0x820),
 	INTC_VECT(IRQ6, 0x840), INTC_VECT(IRQ6, 0x860),
 	INTC_VECT(HCAN20, 0x900), INTC_VECT(HCAN21, 0x920),
@@ -74,23 +71,18 @@
 	INTC_VECT(MFI, 0xe80), /* 0xf80 according to data sheet */
 	INTC_VECT(ADC, 0xf80), INTC_VECT(CMT, 0xfa0),
 	INTC_VECT(TMU0, 0x400), INTC_VECT(TMU1, 0x420),
-	INTC_VECT(TMU2_TUNI, 0x440), INTC_VECT(TMU2_TICPI, 0x460),
+	INTC_VECT(TMU2, 0x440), INTC_VECT(TMU2, 0x460),
 	INTC_VECT(WDT, 0x560),
-	INTC_VECT(REF_RCMI, 0x580), INTC_VECT(REF_ROVI, 0x5a0),
+	INTC_VECT(REF, 0x580), INTC_VECT(REF, 0x5a0),
 };
 
 static struct intc_group groups[] __initdata = {
-	INTC_GROUP(DMAC, DMAC_DMTE0, DMAC_DMTE1, DMAC_DMTE2,
-		   DMAC_DMTE3, DMAC_DMTE4, DMAC_DMTE5,
-		   DMAC_DMTE6, DMAC_DMTE7, DMAC_DMAE),
 	INTC_GROUP(DMABRG, DMABRG0, DMABRG1, DMABRG2),
 	INTC_GROUP(SCIF0, SCIF0_ERI, SCIF0_RXI, SCIF0_BRI, SCIF0_TXI),
 	INTC_GROUP(SCIF1, SCIF1_ERI, SCIF1_RXI, SCIF1_BRI, SCIF1_TXI),
 	INTC_GROUP(SCIF2, SCIF2_ERI, SCIF2_RXI, SCIF2_BRI, SCIF2_TXI),
 	INTC_GROUP(SIM, SIM_ERI, SIM_RXI, SIM_TXI, SIM_TEI),
 	INTC_GROUP(MMCIF, MMCIF0, MMCIF1, MMCIF2, MMCIF3),
-	INTC_GROUP(TMU2, TMU2_TUNI, TMU2_TICPI),
-	INTC_GROUP(REF, REF_RCMI, REF_ROVI),
 };
 
 static struct intc_mask_reg mask_registers[] __initdata = {
@@ -168,8 +160,104 @@
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+
 static struct platform_device *sh7760_devices[] __initdata = {
 	&sci_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 };
 
 static int __init sh7760_devices_setup(void)
@@ -179,6 +267,18 @@
 }
 __initcall(sh7760_devices_setup);
 
+static struct platform_device *sh7760_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7760_early_devices,
+				   ARRAY_SIZE(sh7760_early_devices));
+}
+
 #define INTC_ICR	0xffd00000UL
 #define INTC_ICR_IRLM	(1 << 7)
 
diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile
index 1a92361..96ea09c 100644
--- a/arch/sh/kernel/cpu/sh4a/Makefile
+++ b/arch/sh/kernel/cpu/sh4a/Makefile
@@ -11,6 +11,7 @@
 obj-$(CONFIG_CPU_SUBTYPE_SH7343)	+= setup-sh7343.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7722)	+= setup-sh7722.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7723)	+= setup-sh7723.o
+obj-$(CONFIG_CPU_SUBTYPE_SH7724)	+= setup-sh7724.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7366)	+= setup-sh7366.o
 obj-$(CONFIG_CPU_SUBTYPE_SHX3)		+= setup-shx3.o
 
@@ -23,15 +24,17 @@
 clock-$(CONFIG_CPU_SUBTYPE_SH7780)	:= clock-sh7780.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7785)	:= clock-sh7785.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7786)	:= clock-sh7786.o
-clock-$(CONFIG_CPU_SUBTYPE_SH7343)	:= clock-sh7722.o
+clock-$(CONFIG_CPU_SUBTYPE_SH7343)	:= clock-sh7343.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7722)	:= clock-sh7722.o
-clock-$(CONFIG_CPU_SUBTYPE_SH7723)	:= clock-sh7722.o
-clock-$(CONFIG_CPU_SUBTYPE_SH7366)	:= clock-sh7722.o
+clock-$(CONFIG_CPU_SUBTYPE_SH7723)	:= clock-sh7723.o
+clock-$(CONFIG_CPU_SUBTYPE_SH7724)	:= clock-sh7724.o
+clock-$(CONFIG_CPU_SUBTYPE_SH7366)	:= clock-sh7366.o
 clock-$(CONFIG_CPU_SUBTYPE_SHX3)	:= clock-shx3.o
 
 # Pinmux setup
 pinmux-$(CONFIG_CPU_SUBTYPE_SH7722)	:= pinmux-sh7722.o
 pinmux-$(CONFIG_CPU_SUBTYPE_SH7723)	:= pinmux-sh7723.o
+pinmux-$(CONFIG_CPU_SUBTYPE_SH7724)	:= pinmux-sh7724.o
 pinmux-$(CONFIG_CPU_SUBTYPE_SH7785)	:= pinmux-sh7785.o
 pinmux-$(CONFIG_CPU_SUBTYPE_SH7786)	:= pinmux-sh7786.o
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7343.c b/arch/sh/kernel/cpu/sh4a/clock-sh7343.c
new file mode 100644
index 0000000..0ee3ee8
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7343.c
@@ -0,0 +1,211 @@
+/*
+ * arch/sh/kernel/cpu/sh4a/clock-sh7343.c
+ *
+ * SH7343 clock framework support
+ *
+ * Copyright (C) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <asm/clock.h>
+
+/* SH7343 registers */
+#define FRQCR		0xa4150000
+#define VCLKCR		0xa4150004
+#define SCLKACR		0xa4150008
+#define SCLKBCR		0xa415000c
+#define PLLCR		0xa4150024
+#define MSTPCR0		0xa4150030
+#define MSTPCR1		0xa4150034
+#define MSTPCR2		0xa4150038
+#define DLLFRQ		0xa4150050
+
+/* Fixed 32 KHz root clock for RTC and Power Management purposes */
+static struct clk r_clk = {
+	.name           = "rclk",
+	.id             = -1,
+	.rate           = 32768,
+};
+
+/*
+ * Default rate for the root input clock, reset this with clk_set_rate()
+ * from the platform code.
+ */
+struct clk extal_clk = {
+	.name		= "extal",
+	.id		= -1,
+	.rate		= 33333333,
+};
+
+/* The dll block multiplies the 32khz r_clk, may be used instead of extal */
+static unsigned long dll_recalc(struct clk *clk)
+{
+	unsigned long mult;
+
+	if (__raw_readl(PLLCR) & 0x1000)
+		mult = __raw_readl(DLLFRQ);
+	else
+		mult = 0;
+
+	return clk->parent->rate * mult;
+}
+
+static struct clk_ops dll_clk_ops = {
+	.recalc		= dll_recalc,
+};
+
+static struct clk dll_clk = {
+	.name           = "dll_clk",
+	.id             = -1,
+	.ops		= &dll_clk_ops,
+	.parent		= &r_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+static unsigned long pll_recalc(struct clk *clk)
+{
+	unsigned long mult = 1;
+
+	if (__raw_readl(PLLCR) & 0x4000)
+		mult = (((__raw_readl(FRQCR) >> 24) & 0x1f) + 1);
+
+	return clk->parent->rate * mult;
+}
+
+static struct clk_ops pll_clk_ops = {
+	.recalc		= pll_recalc,
+};
+
+static struct clk pll_clk = {
+	.name		= "pll_clk",
+	.id		= -1,
+	.ops		= &pll_clk_ops,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+struct clk *main_clks[] = {
+	&r_clk,
+	&extal_clk,
+	&dll_clk,
+	&pll_clk,
+};
+
+static int multipliers[] = { 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
+static int divisors[] = { 1, 3, 2, 5, 3, 4, 5, 6, 8, 10, 12, 16, 20 };
+
+static struct clk_div_mult_table div4_table = {
+	.divisors = divisors,
+	.nr_divisors = ARRAY_SIZE(divisors),
+	.multipliers = multipliers,
+	.nr_multipliers = ARRAY_SIZE(multipliers),
+};
+
+enum { DIV4_I, DIV4_U, DIV4_SH, DIV4_B, DIV4_B3, DIV4_P,
+       DIV4_SIUA, DIV4_SIUB, DIV4_NR };
+
+#define DIV4(_str, _reg, _bit, _mask, _flags) \
+  SH_CLK_DIV4(_str, &pll_clk, _reg, _bit, _mask, _flags)
+
+struct clk div4_clks[DIV4_NR] = {
+	[DIV4_I] = DIV4("cpu_clk", FRQCR, 20, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_U] = DIV4("umem_clk", FRQCR, 16, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_SH] = DIV4("shyway_clk", FRQCR, 12, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_B] = DIV4("bus_clk", FRQCR, 8, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_B3] = DIV4("b3_clk", FRQCR, 4, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_P] = DIV4("peripheral_clk", FRQCR, 0, 0x1fff, 0),
+	[DIV4_SIUA] = DIV4("siua_clk", SCLKACR, 0, 0x1fff, 0),
+	[DIV4_SIUB] = DIV4("siub_clk", SCLKBCR, 0, 0x1fff, 0),
+};
+
+struct clk div6_clks[] = {
+	SH_CLK_DIV6("video_clk", &pll_clk, VCLKCR, 0),
+};
+
+#define MSTP(_str, _parent, _reg, _bit, _flags) \
+  SH_CLK_MSTP32(_str, -1, _parent, _reg, _bit, _flags)
+
+static struct clk mstp_clks[] = {
+	MSTP("tlb0", &div4_clks[DIV4_I], MSTPCR0, 31, CLK_ENABLE_ON_INIT),
+	MSTP("ic0", &div4_clks[DIV4_I], MSTPCR0, 30, CLK_ENABLE_ON_INIT),
+	MSTP("oc0", &div4_clks[DIV4_I], MSTPCR0, 29, CLK_ENABLE_ON_INIT),
+	MSTP("uram0", &div4_clks[DIV4_U], MSTPCR0, 28, CLK_ENABLE_ON_INIT),
+	MSTP("xymem0", &div4_clks[DIV4_B], MSTPCR0, 26, CLK_ENABLE_ON_INIT),
+	MSTP("intc3", &div4_clks[DIV4_P], MSTPCR0, 23, 0),
+	MSTP("intc0", &div4_clks[DIV4_P], MSTPCR0, 22, 0),
+	MSTP("dmac0", &div4_clks[DIV4_P], MSTPCR0, 21, 0),
+	MSTP("sh0", &div4_clks[DIV4_P], MSTPCR0, 20, 0),
+	MSTP("hudi0", &div4_clks[DIV4_P], MSTPCR0, 19, 0),
+	MSTP("ubc0", &div4_clks[DIV4_P], MSTPCR0, 17, 0),
+	MSTP("tmu0", &div4_clks[DIV4_P], MSTPCR0, 15, 0),
+	MSTP("cmt0", &r_clk, MSTPCR0, 14, 0),
+	MSTP("rwdt0", &r_clk, MSTPCR0, 13, 0),
+	MSTP("mfi0", &div4_clks[DIV4_P], MSTPCR0, 11, 0),
+	MSTP("flctl0", &div4_clks[DIV4_P], MSTPCR0, 10, 0),
+	MSTP("scif0", &div4_clks[DIV4_P], MSTPCR0, 7, 0),
+	MSTP("scif1", &div4_clks[DIV4_P], MSTPCR0, 6, 0),
+	MSTP("scif2", &div4_clks[DIV4_P], MSTPCR0, 5, 0),
+	MSTP("scif3", &div4_clks[DIV4_P], MSTPCR0, 4, 0),
+	MSTP("sio0", &div4_clks[DIV4_P], MSTPCR0, 3, 0),
+	MSTP("siof0", &div4_clks[DIV4_P], MSTPCR0, 2, 0),
+	MSTP("siof1", &div4_clks[DIV4_P], MSTPCR0, 1, 0),
+
+	MSTP("i2c0", &div4_clks[DIV4_P], MSTPCR1, 9, 0),
+	MSTP("i2c1", &div4_clks[DIV4_P], MSTPCR1, 8, 0),
+
+	MSTP("tpu0", &div4_clks[DIV4_P], MSTPCR2, 25, 0),
+	MSTP("irda0", &div4_clks[DIV4_P], MSTPCR2, 24, 0),
+	MSTP("sdhi0", &div4_clks[DIV4_P], MSTPCR2, 18, 0),
+	MSTP("mmcif0", &div4_clks[DIV4_P], MSTPCR2, 17, 0),
+	MSTP("sim0", &div4_clks[DIV4_P], MSTPCR2, 16, 0),
+	MSTP("keysc0", &r_clk, MSTPCR2, 14, 0),
+	MSTP("tsif0", &div4_clks[DIV4_P], MSTPCR2, 13, 0),
+	MSTP("s3d40", &div4_clks[DIV4_P], MSTPCR2, 12, 0),
+	MSTP("usbf0", &div4_clks[DIV4_P], MSTPCR2, 11, 0),
+	MSTP("siu0", &div4_clks[DIV4_B], MSTPCR2, 8, 0),
+	MSTP("jpu0", &div4_clks[DIV4_B], MSTPCR2, 6, CLK_ENABLE_ON_INIT),
+	MSTP("vou0", &div4_clks[DIV4_B], MSTPCR2, 5, 0),
+	MSTP("beu0", &div4_clks[DIV4_B], MSTPCR2, 4, 0),
+	MSTP("ceu0", &div4_clks[DIV4_B], MSTPCR2, 3, 0),
+	MSTP("veu0", &div4_clks[DIV4_B], MSTPCR2, 2, CLK_ENABLE_ON_INIT),
+	MSTP("vpu0", &div4_clks[DIV4_B], MSTPCR2, 1, CLK_ENABLE_ON_INIT),
+	MSTP("lcdc0", &div4_clks[DIV4_B], MSTPCR2, 0, 0),
+};
+
+int __init arch_clk_init(void)
+{
+	int k, ret = 0;
+
+	/* autodetect extal or dll configuration */
+	if (__raw_readl(PLLCR) & 0x1000)
+		pll_clk.parent = &dll_clk;
+	else
+		pll_clk.parent = &extal_clk;
+
+	for (k = 0; !ret && (k < ARRAY_SIZE(main_clks)); k++)
+		ret = clk_register(main_clks[k]);
+
+	if (!ret)
+		ret = sh_clk_div4_register(div4_clks, DIV4_NR, &div4_table);
+
+	if (!ret)
+		ret = sh_clk_div6_register(div6_clks, ARRAY_SIZE(div6_clks));
+
+	if (!ret)
+		ret = sh_clk_mstp32_register(mstp_clks, ARRAY_SIZE(mstp_clks));
+
+	return ret;
+}
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7366.c b/arch/sh/kernel/cpu/sh4a/clock-sh7366.c
new file mode 100644
index 0000000..a95ebab
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7366.c
@@ -0,0 +1,211 @@
+/*
+ * arch/sh/kernel/cpu/sh4a/clock-sh7366.c
+ *
+ * SH7366 clock framework support
+ *
+ * Copyright (C) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <asm/clock.h>
+
+/* SH7366 registers */
+#define FRQCR		0xa4150000
+#define VCLKCR		0xa4150004
+#define SCLKACR		0xa4150008
+#define SCLKBCR		0xa415000c
+#define PLLCR		0xa4150024
+#define MSTPCR0		0xa4150030
+#define MSTPCR1		0xa4150034
+#define MSTPCR2		0xa4150038
+#define DLLFRQ		0xa4150050
+
+/* Fixed 32 KHz root clock for RTC and Power Management purposes */
+static struct clk r_clk = {
+	.name           = "rclk",
+	.id             = -1,
+	.rate           = 32768,
+};
+
+/*
+ * Default rate for the root input clock, reset this with clk_set_rate()
+ * from the platform code.
+ */
+struct clk extal_clk = {
+	.name		= "extal",
+	.id		= -1,
+	.rate		= 33333333,
+};
+
+/* The dll block multiplies the 32khz r_clk, may be used instead of extal */
+static unsigned long dll_recalc(struct clk *clk)
+{
+	unsigned long mult;
+
+	if (__raw_readl(PLLCR) & 0x1000)
+		mult = __raw_readl(DLLFRQ);
+	else
+		mult = 0;
+
+	return clk->parent->rate * mult;
+}
+
+static struct clk_ops dll_clk_ops = {
+	.recalc		= dll_recalc,
+};
+
+static struct clk dll_clk = {
+	.name           = "dll_clk",
+	.id             = -1,
+	.ops		= &dll_clk_ops,
+	.parent		= &r_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+static unsigned long pll_recalc(struct clk *clk)
+{
+	unsigned long mult = 1;
+	unsigned long div = 1;
+
+	if (__raw_readl(PLLCR) & 0x4000)
+		mult = (((__raw_readl(FRQCR) >> 24) & 0x1f) + 1);
+	else
+		div = 2;
+
+	return (clk->parent->rate * mult) / div;
+}
+
+static struct clk_ops pll_clk_ops = {
+	.recalc		= pll_recalc,
+};
+
+static struct clk pll_clk = {
+	.name		= "pll_clk",
+	.id		= -1,
+	.ops		= &pll_clk_ops,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+struct clk *main_clks[] = {
+	&r_clk,
+	&extal_clk,
+	&dll_clk,
+	&pll_clk,
+};
+
+static int multipliers[] = { 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
+static int divisors[] = { 1, 3, 2, 5, 3, 4, 5, 6, 8, 10, 12, 16, 20 };
+
+static struct clk_div_mult_table div4_table = {
+	.divisors = divisors,
+	.nr_divisors = ARRAY_SIZE(divisors),
+	.multipliers = multipliers,
+	.nr_multipliers = ARRAY_SIZE(multipliers),
+};
+
+enum { DIV4_I, DIV4_U, DIV4_SH, DIV4_B, DIV4_B3, DIV4_P,
+       DIV4_SIUA, DIV4_SIUB, DIV4_NR };
+
+#define DIV4(_str, _reg, _bit, _mask, _flags) \
+  SH_CLK_DIV4(_str, &pll_clk, _reg, _bit, _mask, _flags)
+
+struct clk div4_clks[DIV4_NR] = {
+	[DIV4_I] = DIV4("cpu_clk", FRQCR, 20, 0x1fef, CLK_ENABLE_ON_INIT),
+	[DIV4_U] = DIV4("umem_clk", FRQCR, 16, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_SH] = DIV4("shyway_clk", FRQCR, 12, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_B] = DIV4("bus_clk", FRQCR, 8, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_B3] = DIV4("b3_clk", FRQCR, 4, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_P] = DIV4("peripheral_clk", FRQCR, 0, 0x1fff, 0),
+	[DIV4_SIUA] = DIV4("siua_clk", SCLKACR, 0, 0x1fff, 0),
+	[DIV4_SIUB] = DIV4("siub_clk", SCLKBCR, 0, 0x1fff, 0),
+};
+
+struct clk div6_clks[] = {
+	SH_CLK_DIV6("video_clk", &pll_clk, VCLKCR, 0),
+};
+
+#define MSTP(_str, _parent, _reg, _bit, _flags) \
+  SH_CLK_MSTP32(_str, -1, _parent, _reg, _bit, _flags)
+
+static struct clk mstp_clks[] = {
+	/* See page 52 of Datasheet V0.40: Overview -> Block Diagram */
+	MSTP("tlb0", &div4_clks[DIV4_I], MSTPCR0, 31, CLK_ENABLE_ON_INIT),
+	MSTP("ic0", &div4_clks[DIV4_I], MSTPCR0, 30, CLK_ENABLE_ON_INIT),
+	MSTP("oc0", &div4_clks[DIV4_I], MSTPCR0, 29, CLK_ENABLE_ON_INIT),
+	MSTP("rsmem0", &div4_clks[DIV4_SH], MSTPCR0, 28, CLK_ENABLE_ON_INIT),
+	MSTP("xymem0", &div4_clks[DIV4_B], MSTPCR0, 26, CLK_ENABLE_ON_INIT),
+	MSTP("intc3", &div4_clks[DIV4_P], MSTPCR0, 23, 0),
+	MSTP("intc0", &div4_clks[DIV4_P], MSTPCR0, 22, 0),
+	MSTP("dmac0", &div4_clks[DIV4_P], MSTPCR0, 21, 0),
+	MSTP("sh0", &div4_clks[DIV4_P], MSTPCR0, 20, 0),
+	MSTP("hudi0", &div4_clks[DIV4_P], MSTPCR0, 19, 0),
+	MSTP("ubc0", &div4_clks[DIV4_P], MSTPCR0, 17, 0),
+	MSTP("tmu0", &div4_clks[DIV4_P], MSTPCR0, 15, 0),
+	MSTP("cmt0", &r_clk, MSTPCR0, 14, 0),
+	MSTP("rwdt0", &r_clk, MSTPCR0, 13, 0),
+	MSTP("mfi0", &div4_clks[DIV4_P], MSTPCR0, 11, 0),
+	MSTP("flctl0", &div4_clks[DIV4_P], MSTPCR0, 10, 0),
+	MSTP("scif0", &div4_clks[DIV4_P], MSTPCR0, 7, 0),
+	MSTP("scif1", &div4_clks[DIV4_P], MSTPCR0, 6, 0),
+	MSTP("scif2", &div4_clks[DIV4_P], MSTPCR0, 5, 0),
+	MSTP("msiof0", &div4_clks[DIV4_P], MSTPCR0, 2, 0),
+	MSTP("sbr0", &div4_clks[DIV4_P], MSTPCR0, 1, 0),
+
+	MSTP("i2c0", &div4_clks[DIV4_P], MSTPCR1, 9, 0),
+
+	MSTP("icb0", &div4_clks[DIV4_P], MSTPCR2, 27, 0),
+	MSTP("meram0", &div4_clks[DIV4_P], MSTPCR2, 26, 0),
+	MSTP("dacy1", &div4_clks[DIV4_P], MSTPCR2, 24, 0),
+	MSTP("dacy0", &div4_clks[DIV4_P], MSTPCR2, 23, 0),
+	MSTP("tsif0", &div4_clks[DIV4_P], MSTPCR2, 22, 0),
+	MSTP("sdhi0", &div4_clks[DIV4_P], MSTPCR2, 18, 0),
+	MSTP("mmcif0", &div4_clks[DIV4_P], MSTPCR2, 17, 0),
+	MSTP("usbf0", &div4_clks[DIV4_P], MSTPCR2, 11, 0),
+	MSTP("siu0", &div4_clks[DIV4_B], MSTPCR2, 9, 0),
+	MSTP("veu1", &div4_clks[DIV4_B], MSTPCR2, 7, CLK_ENABLE_ON_INIT),
+	MSTP("vou0", &div4_clks[DIV4_B], MSTPCR2, 5, 0),
+	MSTP("beu0", &div4_clks[DIV4_B], MSTPCR2, 4, 0),
+	MSTP("ceu0", &div4_clks[DIV4_B], MSTPCR2, 3, 0),
+	MSTP("veu0", &div4_clks[DIV4_B], MSTPCR2, 2, CLK_ENABLE_ON_INIT),
+	MSTP("vpu0", &div4_clks[DIV4_B], MSTPCR2, 1, CLK_ENABLE_ON_INIT),
+	MSTP("lcdc0", &div4_clks[DIV4_B], MSTPCR2, 0, 0),
+};
+
+int __init arch_clk_init(void)
+{
+	int k, ret = 0;
+
+	/* autodetect extal or dll configuration */
+	if (__raw_readl(PLLCR) & 0x1000)
+		pll_clk.parent = &dll_clk;
+	else
+		pll_clk.parent = &extal_clk;
+
+	for (k = 0; !ret && (k < ARRAY_SIZE(main_clks)); k++)
+		ret = clk_register(main_clks[k]);
+
+	if (!ret)
+		ret = sh_clk_div4_register(div4_clks, DIV4_NR, &div4_table);
+
+	if (!ret)
+		ret = sh_clk_div6_register(div6_clks, ARRAY_SIZE(div6_clks));
+
+	if (!ret)
+		ret = sh_clk_mstp32_register(mstp_clks, ARRAY_SIZE(mstp_clks));
+
+	return ret;
+}
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index 0e174af..40f8593 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -1,844 +1,197 @@
 /*
  * arch/sh/kernel/cpu/sh4a/clock-sh7722.c
  *
- * SH7343, SH7722, SH7723 & SH7366 support for the clock framework
+ * SH7722 clock framework support
  *
- * Copyright (c) 2006-2007 Nomad Global Solutions Inc
- * Based on code for sh7343 by Paul Mundt
+ * Copyright (C) 2009 Magnus Damm
  *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
-#include <linux/errno.h>
-#include <linux/stringify.h>
 #include <asm/clock.h>
-#include <asm/freq.h>
 
-#define N  (-1)
-#define NM (-2)
-#define ROUND_NEAREST 0
-#define ROUND_DOWN -1
-#define ROUND_UP   +1
+/* SH7722 registers */
+#define FRQCR		0xa4150000
+#define VCLKCR		0xa4150004
+#define SCLKACR		0xa4150008
+#define SCLKBCR		0xa415000c
+#define IRDACLKCR	0xa4150018
+#define PLLCR		0xa4150024
+#define MSTPCR0		0xa4150030
+#define MSTPCR1		0xa4150034
+#define MSTPCR2		0xa4150038
+#define DLLFRQ		0xa4150050
 
-static int adjust_algos[][3] = {
-	{},	/* NO_CHANGE */
-	{ NM, N, 1 },   /* N:1, N:1 */
-	{ 3, 2, 2 },	/* 3:2:2 */
-	{ 5, 2, 2 },    /* 5:2:2 */
-	{ N, 1, 1 },	/* N:1:1 */
-
-	{ N, 1 },	/* N:1 */
-
-	{ N, 1 },	/* N:1 */
-	{ 3, 2 },
-	{ 4, 3 },
-	{ 5, 4 },
-
-	{ N, 1 }
-};
-
-static unsigned long adjust_pair_of_clocks(unsigned long r1, unsigned long r2,
-			int m1, int m2, int round_flag)
-{
-	unsigned long rem, div;
-	int the_one = 0;
-
-	pr_debug( "Actual values: r1 = %ld\n", r1);
-	pr_debug( "...............r2 = %ld\n", r2);
-
-	if (m1 == m2) {
-		r2 = r1;
-		pr_debug( "setting equal rates: r2 now %ld\n", r2);
-	} else if ((m2 == N  && m1 == 1) ||
-		   (m2 == NM && m1 == N)) { /* N:1 or NM:N */
-		pr_debug( "Setting rates as 1:N (N:N*M)\n");
-		rem = r2 % r1;
-		pr_debug( "...remainder = %ld\n", rem);
-		if (rem) {
-			div = r2 / r1;
-			pr_debug( "...div = %ld\n", div);
-			switch (round_flag) {
-			case ROUND_NEAREST:
-				the_one = rem >= r1/2 ? 1 : 0; break;
-			case ROUND_UP:
-				the_one = 1; break;
-			case ROUND_DOWN:
-				the_one = 0; break;
-			}
-
-			r2 = r1 * (div + the_one);
-			pr_debug( "...setting r2 to %ld\n", r2);
-		}
-	} else if ((m2 == 1  && m1 == N) ||
-		   (m2 == N && m1 == NM)) { /* 1:N or N:NM */
-		pr_debug( "Setting rates as N:1 (N*M:N)\n");
-		rem = r1 % r2;
-		pr_debug( "...remainder = %ld\n", rem);
-		if (rem) {
-			div = r1 / r2;
-			pr_debug( "...div = %ld\n", div);
-			switch (round_flag) {
-			case ROUND_NEAREST:
-				the_one = rem > r2/2 ? 1 : 0; break;
-			case ROUND_UP:
-				the_one = 0; break;
-			case ROUND_DOWN:
-				the_one = 1; break;
-			}
-
-			r2 = r1 / (div + the_one);
-			pr_debug( "...setting r2 to %ld\n", r2);
-		}
-	} else { /* value:value */
-		pr_debug( "Setting rates as %d:%d\n", m1, m2);
-		div = r1 / m1;
-		r2 = div * m2;
-		pr_debug( "...div = %ld\n", div);
-		pr_debug( "...setting r2 to %ld\n", r2);
-	}
-
-	return r2;
-}
-
-static void adjust_clocks(int originate, int *l, unsigned long v[],
-			  int n_in_line)
-{
-	int x;
-
-	pr_debug( "Go down from %d...\n", originate);
-	/* go up recalculation clocks */
-	for (x = originate; x>0; x -- )
-		v[x-1] = adjust_pair_of_clocks(v[x], v[x-1],
-					l[x], l[x-1],
-					ROUND_UP);
-
-	pr_debug( "Go up from %d...\n", originate);
-	/* go down recalculation clocks */
-	for (x = originate; x<n_in_line - 1; x ++ )
-		v[x+1] = adjust_pair_of_clocks(v[x], v[x+1],
-					l[x], l[x+1],
-					ROUND_UP);
-}
-
-
-/*
- * SH7722 uses a common set of multipliers and divisors, so this
- * is quite simple..
- */
-
-/*
- * Instead of having two separate multipliers/divisors set, like this:
- *
- * static int multipliers[] = { 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
- * static int divisors[] = { 1, 3, 2, 5, 3, 4, 5, 6, 8, 10, 12, 16, 20 };
- *
- * I created the divisors2 array, which is used to calculate rate like
- *   rate = parent * 2 / divisors2[ divisor ];
-*/
-static int divisors2[] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 32, 40 };
-
-static void master_clk_recalc(struct clk *clk)
-{
-	unsigned frqcr = ctrl_inl(FRQCR);
-
-	clk->rate = CONFIG_SH_PCLK_FREQ * (((frqcr >> 24) & 0x1f) + 1);
-}
-
-static void master_clk_init(struct clk *clk)
-{
-	clk->parent = NULL;
-	clk->flags |= CLK_RATE_PROPAGATES;
-	clk->rate = CONFIG_SH_PCLK_FREQ;
-	master_clk_recalc(clk);
-}
-
-
-static void module_clk_recalc(struct clk *clk)
-{
-	unsigned long frqcr = ctrl_inl(FRQCR);
-
-	clk->rate = clk->parent->rate / (((frqcr >> 24) & 0x1f) + 1);
-}
-
-static int master_clk_setrate(struct clk *clk, unsigned long rate, int id)
-{
-	int div = rate / clk->rate;
-	int master_divs[] = { 2, 3, 4, 6, 8, 16 };
-	int index;
-	unsigned long frqcr;
-
-	for (index = 1; index < ARRAY_SIZE(master_divs); index++)
-		if (div >= master_divs[index - 1] && div < master_divs[index])
-			break;
-
-	if (index >= ARRAY_SIZE(master_divs))
-		index = ARRAY_SIZE(master_divs);
-	div = master_divs[index - 1];
-
-	frqcr = ctrl_inl(FRQCR);
-	frqcr &= ~(0xF << 24);
-	frqcr |= ( (div-1) << 24);
-	ctrl_outl(frqcr, FRQCR);
-
-	return 0;
-}
-
-static struct clk_ops sh7722_master_clk_ops = {
-	.init = master_clk_init,
-	.recalc = master_clk_recalc,
-	.set_rate = master_clk_setrate,
-};
-
-static struct clk_ops sh7722_module_clk_ops = {
-       .recalc = module_clk_recalc,
-};
-
-struct frqcr_context {
-	unsigned mask;
-	unsigned shift;
-};
-
-struct frqcr_context sh7722_get_clk_context(const char *name)
-{
-	struct frqcr_context ctx = { 0, };
-
-	if (!strcmp(name, "peripheral_clk")) {
-		ctx.shift = 0;
-		ctx.mask = 0xF;
-	} else if (!strcmp(name, "sdram_clk")) {
-		ctx.shift = 4;
-		ctx.mask = 0xF;
-	} else if (!strcmp(name, "bus_clk")) {
-		ctx.shift = 8;
-		ctx.mask = 0xF;
-	} else if (!strcmp(name, "sh_clk")) {
-		ctx.shift = 12;
-		ctx.mask = 0xF;
-	} else if (!strcmp(name, "umem_clk")) {
-		ctx.shift = 16;
-		ctx.mask = 0xF;
-	} else if (!strcmp(name, "cpu_clk")) {
-		ctx.shift = 20;
-		ctx.mask = 7;
-	}
-	return ctx;
-}
-
-/**
- * sh7722_find_div_index - find divisor for setting rate
- *
- * All sh7722 clocks use the same set of multipliers/divisors. This function
- * chooses correct divisor to set the rate of clock with parent clock that
- * generates frequency of 'parent_rate'
- *
- * @parent_rate: rate of parent clock
- * @rate: requested rate to be set
- */
-static int sh7722_find_div_index(unsigned long parent_rate, unsigned rate)
-{
-	unsigned div2 = parent_rate * 2 / rate;
-	int index;
-
-	if (rate > parent_rate)
-		return -EINVAL;
-
-	for (index = 1; index < ARRAY_SIZE(divisors2); index++) {
-		if (div2 > divisors2[index - 1] && div2 <= divisors2[index])
-			break;
-	}
-	if (index >= ARRAY_SIZE(divisors2))
-		index = ARRAY_SIZE(divisors2) - 1;
-	return index;
-}
-
-static void sh7722_frqcr_recalc(struct clk *clk)
-{
-	struct frqcr_context ctx = sh7722_get_clk_context(clk->name);
-	unsigned long frqcr = ctrl_inl(FRQCR);
-	int index;
-
-	index = (frqcr >> ctx.shift) & ctx.mask;
-	clk->rate = clk->parent->rate * 2 / divisors2[index];
-}
-
-static int sh7722_frqcr_set_rate(struct clk *clk, unsigned long rate,
-				 int algo_id)
-{
-	struct frqcr_context ctx = sh7722_get_clk_context(clk->name);
-	unsigned long parent_rate = clk->parent->rate;
-	int div;
-	unsigned long frqcr;
-	int err = 0;
-
-	/* pretty invalid */
-	if (parent_rate < rate)
-		return -EINVAL;
-
-	/* look for multiplier/divisor pair */
-	div = sh7722_find_div_index(parent_rate, rate);
-	if (div<0)
-		return div;
-
-	/* calculate new value of clock rate */
-	clk->rate = parent_rate * 2 / divisors2[div];
-	frqcr = ctrl_inl(FRQCR);
-
-	/* FIXME: adjust as algo_id specifies */
-	if (algo_id != NO_CHANGE) {
-		int originator;
-		char *algo_group_1[] = { "cpu_clk", "umem_clk", "sh_clk" };
-		char *algo_group_2[] = { "sh_clk", "bus_clk" };
-		char *algo_group_3[] = { "sh_clk", "sdram_clk" };
-		char *algo_group_4[] = { "bus_clk", "peripheral_clk" };
-		char *algo_group_5[] = { "cpu_clk", "peripheral_clk" };
-		char **algo_current = NULL;
-		/* 3 is the maximum number of clocks in relation */
-		struct clk *ck[3];
-		unsigned long values[3]; /* the same comment as above */
-		int part_length = -1;
-		int i;
-
-		/*
-		 * all the steps below only required if adjustion was
-		 * requested
-		 */
-		if (algo_id == IUS_N1_N1 ||
-		    algo_id == IUS_322 ||
-		    algo_id == IUS_522 ||
-		    algo_id == IUS_N11) {
-			algo_current = algo_group_1;
-			part_length = 3;
-		}
-		if (algo_id == SB_N1) {
-			algo_current = algo_group_2;
-			part_length = 2;
-		}
-		if (algo_id == SB3_N1 ||
-		    algo_id == SB3_32 ||
-		    algo_id == SB3_43 ||
-		    algo_id == SB3_54) {
-			algo_current = algo_group_3;
-			part_length = 2;
-		}
-		if (algo_id == BP_N1) {
-			algo_current = algo_group_4;
-			part_length = 2;
-		}
-		if (algo_id == IP_N1) {
-			algo_current = algo_group_5;
-			part_length = 2;
-		}
-		if (!algo_current)
-			goto incorrect_algo_id;
-
-		originator = -1;
-		for (i = 0; i < part_length; i ++ ) {
-			if (originator >= 0 && !strcmp(clk->name,
-						       algo_current[i]))
-				originator = i;
-			ck[i] = clk_get(NULL, algo_current[i]);
-			values[i] = clk_get_rate(ck[i]);
-		}
-
-		if (originator >= 0)
-			adjust_clocks(originator, adjust_algos[algo_id],
-				      values, part_length);
-
-		for (i = 0; i < part_length; i ++ ) {
-			struct frqcr_context part_ctx;
-			int part_div;
-
-			if (likely(!err)) {
-				part_div = sh7722_find_div_index(parent_rate,
-								rate);
-				if (part_div > 0) {
-					part_ctx = sh7722_get_clk_context(
-								ck[i]->name);
-					frqcr &= ~(part_ctx.mask <<
-						   part_ctx.shift);
-					frqcr |= part_div << part_ctx.shift;
-				} else
-					err = part_div;
-			}
-
-			ck[i]->ops->recalc(ck[i]);
-			clk_put(ck[i]);
-		}
-	}
-
-	/* was there any error during recalculation ? If so, bail out.. */
-	if (unlikely(err!=0))
-		goto out_err;
-
-	/* clear FRQCR bits */
-	frqcr &= ~(ctx.mask << ctx.shift);
-	frqcr |= div << ctx.shift;
-
-	/* ...and perform actual change */
-	ctrl_outl(frqcr, FRQCR);
-	return 0;
-
-incorrect_algo_id:
-	return -EINVAL;
-out_err:
-	return err;
-}
-
-static long sh7722_frqcr_round_rate(struct clk *clk, unsigned long rate)
-{
-	unsigned long parent_rate = clk->parent->rate;
-	int div;
-
-	/* look for multiplier/divisor pair */
-	div = sh7722_find_div_index(parent_rate, rate);
-	if (div < 0)
-		return clk->rate;
-
-	/* calculate new value of clock rate */
-	return parent_rate * 2 / divisors2[div];
-}
-
-static struct clk_ops sh7722_frqcr_clk_ops = {
-	.recalc = sh7722_frqcr_recalc,
-	.set_rate = sh7722_frqcr_set_rate,
-	.round_rate = sh7722_frqcr_round_rate,
+/* Fixed 32 KHz root clock for RTC and Power Management purposes */
+static struct clk r_clk = {
+	.name           = "rclk",
+	.id             = -1,
+	.rate           = 32768,
 };
 
 /*
- * clock ops methods for SIU A/B and IrDA clock
- *
+ * Default rate for the root input clock, reset this with clk_set_rate()
+ * from the platform code.
  */
+struct clk extal_clk = {
+	.name		= "extal",
+	.id		= -1,
+	.rate		= 33333333,
+};
 
-#ifndef CONFIG_CPU_SUBTYPE_SH7343
-
-static int sh7722_siu_set_rate(struct clk *clk, unsigned long rate, int algo_id)
+/* The dll block multiplies the 32khz r_clk, may be used instead of extal */
+static unsigned long dll_recalc(struct clk *clk)
 {
-	unsigned long r;
-	int div;
+	unsigned long mult;
 
-	r = ctrl_inl(clk->arch_flags);
-	div = sh7722_find_div_index(clk->parent->rate, rate);
-	if (div < 0)
-		return div;
-	r = (r & ~0xF) | div;
-	ctrl_outl(r, clk->arch_flags);
-	return 0;
-}
-
-static void sh7722_siu_recalc(struct clk *clk)
-{
-	unsigned long r;
-
-	r = ctrl_inl(clk->arch_flags);
-	clk->rate = clk->parent->rate * 2 / divisors2[r & 0xF];
-}
-
-static int sh7722_siu_start_stop(struct clk *clk, int enable)
-{
-	unsigned long r;
-
-	r = ctrl_inl(clk->arch_flags);
-	if (enable)
-		ctrl_outl(r & ~(1 << 8), clk->arch_flags);
+	if (__raw_readl(PLLCR) & 0x1000)
+		mult = __raw_readl(DLLFRQ);
 	else
-		ctrl_outl(r | (1 << 8), clk->arch_flags);
-	return 0;
+		mult = 0;
+
+	return clk->parent->rate * mult;
 }
 
-static void sh7722_siu_enable(struct clk *clk)
+static struct clk_ops dll_clk_ops = {
+	.recalc		= dll_recalc,
+};
+
+static struct clk dll_clk = {
+	.name           = "dll_clk",
+	.id             = -1,
+	.ops		= &dll_clk_ops,
+	.parent		= &r_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+static unsigned long pll_recalc(struct clk *clk)
 {
-	sh7722_siu_start_stop(clk, 1);
-}
+	unsigned long mult = 1;
+	unsigned long div = 1;
 
-static void sh7722_siu_disable(struct clk *clk)
-{
-	sh7722_siu_start_stop(clk, 0);
-}
-
-static struct clk_ops sh7722_siu_clk_ops = {
-	.recalc = sh7722_siu_recalc,
-	.set_rate = sh7722_siu_set_rate,
-	.enable = sh7722_siu_enable,
-	.disable = sh7722_siu_disable,
-};
-
-#endif /* CONFIG_CPU_SUBTYPE_SH7343 */
-
-static void sh7722_video_enable(struct clk *clk)
-{
-	unsigned long r;
-
-	r = ctrl_inl(VCLKCR);
-	ctrl_outl( r & ~(1<<8), VCLKCR);
-}
-
-static void sh7722_video_disable(struct clk *clk)
-{
-	unsigned long r;
-
-	r = ctrl_inl(VCLKCR);
-	ctrl_outl( r | (1<<8), VCLKCR);
-}
-
-static int sh7722_video_set_rate(struct clk *clk, unsigned long rate,
-				 int algo_id)
-{
-	unsigned long r;
-
-	r = ctrl_inl(VCLKCR);
-	r &= ~0x3F;
-	r |= ((clk->parent->rate / rate - 1) & 0x3F);
-	ctrl_outl(r, VCLKCR);
-	return 0;
-}
-
-static void sh7722_video_recalc(struct clk *clk)
-{
-	unsigned long r;
-
-	r = ctrl_inl(VCLKCR);
-	clk->rate = clk->parent->rate / ((r & 0x3F) + 1);
-}
-
-static struct clk_ops sh7722_video_clk_ops = {
-	.recalc = sh7722_video_recalc,
-	.set_rate = sh7722_video_set_rate,
-	.enable = sh7722_video_enable,
-	.disable = sh7722_video_disable,
-};
-/*
- * and at last, clock definitions themselves
- */
-static struct clk sh7722_umem_clock = {
-	.name = "umem_clk",
-	.ops = &sh7722_frqcr_clk_ops,
-	.flags = CLK_RATE_PROPAGATES,
-};
-
-static struct clk sh7722_sh_clock = {
-	.name = "sh_clk",
-	.ops = &sh7722_frqcr_clk_ops,
-	.flags = CLK_RATE_PROPAGATES,
-};
-
-static struct clk sh7722_peripheral_clock = {
-	.name = "peripheral_clk",
-	.ops = &sh7722_frqcr_clk_ops,
-	.flags = CLK_RATE_PROPAGATES,
-};
-
-static struct clk sh7722_sdram_clock = {
-	.name = "sdram_clk",
-	.ops = &sh7722_frqcr_clk_ops,
-};
-
-static struct clk sh7722_r_clock = {
-	.name = "r_clk",
-	.rate = 32768,
-	.flags = CLK_RATE_PROPAGATES,
-};
-
-#ifndef CONFIG_CPU_SUBTYPE_SH7343
-
-/*
- * these three clocks - SIU A, SIU B, IrDA - share the same clk_ops
- * methods of clk_ops determine which register they should access by
- * examining clk->name field
- */
-static struct clk sh7722_siu_a_clock = {
-	.name = "siu_a_clk",
-	.arch_flags = SCLKACR,
-	.ops = &sh7722_siu_clk_ops,
-};
-
-static struct clk sh7722_siu_b_clock = {
-	.name = "siu_b_clk",
-	.arch_flags = SCLKBCR,
-	.ops = &sh7722_siu_clk_ops,
-};
-
-#if defined(CONFIG_CPU_SUBTYPE_SH7722)
-static struct clk sh7722_irda_clock = {
-	.name = "irda_clk",
-	.arch_flags = IrDACLKCR,
-	.ops = &sh7722_siu_clk_ops,
-};
-#endif
-#endif /* CONFIG_CPU_SUBTYPE_SH7343 */
-
-static struct clk sh7722_video_clock = {
-	.name = "video_clk",
-	.ops = &sh7722_video_clk_ops,
-};
-
-#define MSTPCR_ARCH_FLAGS(reg, bit) (((reg) << 8) | (bit))
-#define MSTPCR_ARCH_FLAGS_REG(value) ((value) >> 8)
-#define MSTPCR_ARCH_FLAGS_BIT(value) ((value) & 0xff)
-
-static int sh7722_mstpcr_start_stop(struct clk *clk, int enable)
-{
-	unsigned long bit = MSTPCR_ARCH_FLAGS_BIT(clk->arch_flags);
-	unsigned long reg;
-	unsigned long r;
-
-	switch(MSTPCR_ARCH_FLAGS_REG(clk->arch_flags)) {
-	case 0:
-		reg = MSTPCR0;
-		break;
-	case 1:
-		reg = MSTPCR1;
-		break;
-	case 2:
-		reg = MSTPCR2;
-		break;
-	default:
-		return -EINVAL;
-	}  
-
-	r = ctrl_inl(reg);
-
-	if (enable)
-		r &= ~(1 << bit);
+	if (__raw_readl(PLLCR) & 0x4000)
+		mult = (((__raw_readl(FRQCR) >> 24) & 0x1f) + 1);
 	else
-		r |= (1 << bit);
+		div = 2;
 
-	ctrl_outl(r, reg);
-	return 0;
+	return (clk->parent->rate * mult) / div;
 }
 
-static void sh7722_mstpcr_enable(struct clk *clk)
-{
-	sh7722_mstpcr_start_stop(clk, 1);
-}
-
-static void sh7722_mstpcr_disable(struct clk *clk)
-{
-	sh7722_mstpcr_start_stop(clk, 0);
-}
-
-static void sh7722_mstpcr_recalc(struct clk *clk)
-{
-	if (clk->parent)
-		clk->rate = clk->parent->rate;
-}
-
-static struct clk_ops sh7722_mstpcr_clk_ops = {
-	.enable = sh7722_mstpcr_enable,
-	.disable = sh7722_mstpcr_disable,
-	.recalc = sh7722_mstpcr_recalc,
+static struct clk_ops pll_clk_ops = {
+	.recalc		= pll_recalc,
 };
 
-#define MSTPCR(_name, _parent, regnr, bitnr) \
-{						\
-	.name = _name,				\
-	.arch_flags = MSTPCR_ARCH_FLAGS(regnr, bitnr),	\
-	.ops = (void *)_parent,		\
-}
-
-static struct clk sh7722_mstpcr_clocks[] = {
-#if defined(CONFIG_CPU_SUBTYPE_SH7722)
-	MSTPCR("uram0", "umem_clk", 0, 28),
-	MSTPCR("xymem0", "bus_clk", 0, 26),
-	MSTPCR("tmu0", "peripheral_clk", 0, 15),
-	MSTPCR("cmt0", "r_clk", 0, 14),
-	MSTPCR("rwdt0", "r_clk", 0, 13),
-	MSTPCR("flctl0", "peripheral_clk", 0, 10),
-	MSTPCR("scif0", "peripheral_clk", 0, 7),
-	MSTPCR("scif1", "peripheral_clk", 0, 6),
-	MSTPCR("scif2", "peripheral_clk", 0, 5),
-	MSTPCR("i2c0", "peripheral_clk", 1, 9),
-	MSTPCR("rtc0", "r_clk", 1, 8),
-	MSTPCR("sdhi0", "peripheral_clk", 2, 18),
-	MSTPCR("keysc0", "r_clk", 2, 14),
-	MSTPCR("usbf0", "peripheral_clk", 2, 11),
-	MSTPCR("2dg0", "bus_clk", 2, 9),
-	MSTPCR("siu0", "bus_clk", 2, 8),
-	MSTPCR("vou0", "bus_clk", 2, 5),
-	MSTPCR("jpu0", "bus_clk", 2, 6),
-	MSTPCR("beu0", "bus_clk", 2, 4),
-	MSTPCR("ceu0", "bus_clk", 2, 3),
-	MSTPCR("veu0", "bus_clk", 2, 2),
-	MSTPCR("vpu0", "bus_clk", 2, 1),
-	MSTPCR("lcdc0", "bus_clk", 2, 0),
-#endif
-#if defined(CONFIG_CPU_SUBTYPE_SH7723)
-	/* See page 60 of Datasheet V1.0: Overview -> Block Diagram */
-	MSTPCR("tlb0", "cpu_clk", 0, 31),
-	MSTPCR("ic0", "cpu_clk", 0, 30),
-	MSTPCR("oc0", "cpu_clk", 0, 29),
-	MSTPCR("l2c0", "sh_clk", 0, 28),
-	MSTPCR("ilmem0", "cpu_clk", 0, 27),
-	MSTPCR("fpu0", "cpu_clk", 0, 24),
-	MSTPCR("intc0", "cpu_clk", 0, 22),
-	MSTPCR("dmac0", "bus_clk", 0, 21),
-	MSTPCR("sh0", "sh_clk", 0, 20),
-	MSTPCR("hudi0", "peripheral_clk", 0, 19),
-	MSTPCR("ubc0", "cpu_clk", 0, 17),
-	MSTPCR("tmu0", "peripheral_clk", 0, 15),
-	MSTPCR("cmt0", "r_clk", 0, 14),
-	MSTPCR("rwdt0", "r_clk", 0, 13),
-	MSTPCR("dmac1", "bus_clk", 0, 12),
-	MSTPCR("tmu1", "peripheral_clk", 0, 11),
-	MSTPCR("flctl0", "peripheral_clk", 0, 10),
-	MSTPCR("scif0", "peripheral_clk", 0, 9),
-	MSTPCR("scif1", "peripheral_clk", 0, 8),
-	MSTPCR("scif2", "peripheral_clk", 0, 7),
-	MSTPCR("scif3", "bus_clk", 0, 6),
-	MSTPCR("scif4", "bus_clk", 0, 5),
-	MSTPCR("scif5", "bus_clk", 0, 4),
-	MSTPCR("msiof0", "bus_clk", 0, 2),
-	MSTPCR("msiof1", "bus_clk", 0, 1),
-	MSTPCR("meram0", "sh_clk", 0, 0),
-	MSTPCR("i2c0", "peripheral_clk", 1, 9),
-	MSTPCR("rtc0", "r_clk", 1, 8),
-	MSTPCR("atapi0", "sh_clk", 2, 28),
-	MSTPCR("adc0", "peripheral_clk", 2, 28),
-	MSTPCR("tpu0", "bus_clk", 2, 25),
-	MSTPCR("irda0", "peripheral_clk", 2, 24),
-	MSTPCR("tsif0", "bus_clk", 2, 22),
-	MSTPCR("icb0", "bus_clk", 2, 21),
-	MSTPCR("sdhi0", "bus_clk", 2, 18),
-	MSTPCR("sdhi1", "bus_clk", 2, 17),
-	MSTPCR("keysc0", "r_clk", 2, 14),
-	MSTPCR("usb0", "bus_clk", 2, 11),
-	MSTPCR("2dg0", "bus_clk", 2, 10),
-	MSTPCR("siu0", "bus_clk", 2, 8),
-	MSTPCR("veu1", "bus_clk", 2, 6),
-	MSTPCR("vou0", "bus_clk", 2, 5),
-	MSTPCR("beu0", "bus_clk", 2, 4),
-	MSTPCR("ceu0", "bus_clk", 2, 3),
-	MSTPCR("veu0", "bus_clk", 2, 2),
-	MSTPCR("vpu0", "bus_clk", 2, 1),
-	MSTPCR("lcdc0", "bus_clk", 2, 0),
-#endif
-#if defined(CONFIG_CPU_SUBTYPE_SH7343)
-	MSTPCR("uram0", "umem_clk", 0, 28),
-	MSTPCR("xymem0", "bus_clk", 0, 26),
-	MSTPCR("tmu0", "peripheral_clk", 0, 15),
-	MSTPCR("cmt0", "r_clk", 0, 14),
-	MSTPCR("rwdt0", "r_clk", 0, 13),
-	MSTPCR("scif0", "peripheral_clk", 0, 7),
-	MSTPCR("scif1", "peripheral_clk", 0, 6),
-	MSTPCR("scif2", "peripheral_clk", 0, 5),
-	MSTPCR("scif3", "peripheral_clk", 0, 4),
-	MSTPCR("i2c0", "peripheral_clk", 1, 9),
-	MSTPCR("i2c1", "peripheral_clk", 1, 8),
-	MSTPCR("sdhi0", "peripheral_clk", 2, 18),
-	MSTPCR("keysc0", "r_clk", 2, 14),
-	MSTPCR("usbf0", "peripheral_clk", 2, 11),
-	MSTPCR("siu0", "bus_clk", 2, 8),
-	MSTPCR("jpu0", "bus_clk", 2, 6),
-	MSTPCR("vou0", "bus_clk", 2, 5),
-	MSTPCR("beu0", "bus_clk", 2, 4),
-	MSTPCR("ceu0", "bus_clk", 2, 3),
-	MSTPCR("veu0", "bus_clk", 2, 2),
-	MSTPCR("vpu0", "bus_clk", 2, 1),
-	MSTPCR("lcdc0", "bus_clk", 2, 0),
-#endif
-#if defined(CONFIG_CPU_SUBTYPE_SH7366)
-	/* See page 52 of Datasheet V0.40: Overview -> Block Diagram */
-	MSTPCR("tlb0", "cpu_clk", 0, 31),
-	MSTPCR("ic0", "cpu_clk", 0, 30),
-	MSTPCR("oc0", "cpu_clk", 0, 29),
-	MSTPCR("rsmem0", "sh_clk", 0, 28),
-	MSTPCR("xymem0", "cpu_clk", 0, 26),
-	MSTPCR("intc30", "peripheral_clk", 0, 23),
-	MSTPCR("intc0", "peripheral_clk", 0, 22),
-	MSTPCR("dmac0", "bus_clk", 0, 21),
-	MSTPCR("sh0", "sh_clk", 0, 20),
-	MSTPCR("hudi0", "peripheral_clk", 0, 19),
-	MSTPCR("ubc0", "cpu_clk", 0, 17),
-	MSTPCR("tmu0", "peripheral_clk", 0, 15),
-	MSTPCR("cmt0", "r_clk", 0, 14),
-	MSTPCR("rwdt0", "r_clk", 0, 13),
-	MSTPCR("flctl0", "peripheral_clk", 0, 10),
-	MSTPCR("scif0", "peripheral_clk", 0, 7),
-	MSTPCR("scif1", "bus_clk", 0, 6),
-	MSTPCR("scif2", "bus_clk", 0, 5),
-	MSTPCR("msiof0", "peripheral_clk", 0, 2),
-	MSTPCR("sbr0", "peripheral_clk", 0, 1),
-	MSTPCR("i2c0", "peripheral_clk", 1, 9),
-	MSTPCR("icb0", "bus_clk", 2, 27),
-	MSTPCR("meram0", "sh_clk", 2, 26),
-	MSTPCR("dacc0", "peripheral_clk", 2, 24),
-	MSTPCR("dacy0", "peripheral_clk", 2, 23),
-	MSTPCR("tsif0", "bus_clk", 2, 22),
-	MSTPCR("sdhi0", "bus_clk", 2, 18),
-	MSTPCR("mmcif0", "bus_clk", 2, 17),
-	MSTPCR("usb0", "bus_clk", 2, 11),
-	MSTPCR("siu0", "bus_clk", 2, 8),
-	MSTPCR("veu1", "bus_clk", 2, 7),
-	MSTPCR("vou0", "bus_clk", 2, 5),
-	MSTPCR("beu0", "bus_clk", 2, 4),
-	MSTPCR("ceu0", "bus_clk", 2, 3),
-	MSTPCR("veu0", "bus_clk", 2, 2),
-	MSTPCR("vpu0", "bus_clk", 2, 1),
-	MSTPCR("lcdc0", "bus_clk", 2, 0),
-#endif
+static struct clk pll_clk = {
+	.name		= "pll_clk",
+	.id		= -1,
+	.ops		= &pll_clk_ops,
+	.flags		= CLK_ENABLE_ON_INIT,
 };
 
-static struct clk *sh7722_clocks[] = {
-	&sh7722_umem_clock,
-	&sh7722_sh_clock,
-	&sh7722_peripheral_clock,
-	&sh7722_sdram_clock,
-#ifndef CONFIG_CPU_SUBTYPE_SH7343
-	&sh7722_siu_a_clock,
-	&sh7722_siu_b_clock,
-#if defined(CONFIG_CPU_SUBTYPE_SH7722)
-	&sh7722_irda_clock,
-#endif
-#endif
-	&sh7722_video_clock,
+struct clk *main_clks[] = {
+	&r_clk,
+	&extal_clk,
+	&dll_clk,
+	&pll_clk,
 };
 
-/*
- * init in order: master, module, bus, cpu
- */
-struct clk_ops *onchip_ops[] = {
-	&sh7722_master_clk_ops,
-	&sh7722_module_clk_ops,
-	&sh7722_frqcr_clk_ops,
-	&sh7722_frqcr_clk_ops,
+static int multipliers[] = { 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
+static int divisors[] = { 1, 3, 2, 5, 3, 4, 5, 6, 8, 10, 12, 16, 20 };
+
+static struct clk_div_mult_table div4_table = {
+	.divisors = divisors,
+	.nr_divisors = ARRAY_SIZE(divisors),
+	.multipliers = multipliers,
+	.nr_multipliers = ARRAY_SIZE(multipliers),
 };
 
-void __init
-arch_init_clk_ops(struct clk_ops **ops, int type)
-{
-	BUG_ON(type < 0 || type > ARRAY_SIZE(onchip_ops));
-	*ops = onchip_ops[type];
-}
+enum { DIV4_I, DIV4_U, DIV4_SH, DIV4_B, DIV4_B3, DIV4_P,
+       DIV4_SIUA, DIV4_SIUB, DIV4_IRDA, DIV4_NR };
+
+#define DIV4(_str, _reg, _bit, _mask, _flags) \
+  SH_CLK_DIV4(_str, &pll_clk, _reg, _bit, _mask, _flags)
+
+struct clk div4_clks[DIV4_NR] = {
+	[DIV4_I] = DIV4("cpu_clk", FRQCR, 20, 0x1fef, CLK_ENABLE_ON_INIT),
+	[DIV4_U] = DIV4("umem_clk", FRQCR, 16, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_SH] = DIV4("shyway_clk", FRQCR, 12, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_B] = DIV4("bus_clk", FRQCR, 8, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_B3] = DIV4("b3_clk", FRQCR, 4, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_P] = DIV4("peripheral_clk", FRQCR, 0, 0x1fff, 0),
+	[DIV4_SIUA] = DIV4("siua_clk", SCLKACR, 0, 0x1fff, 0),
+	[DIV4_SIUB] = DIV4("siub_clk", SCLKBCR, 0, 0x1fff, 0),
+	[DIV4_IRDA] = DIV4("irda_clk", IRDACLKCR, 0, 0x1fff, 0),
+};
+
+struct clk div6_clks[] = {
+	SH_CLK_DIV6("video_clk", &pll_clk, VCLKCR, 0),
+};
+
+#define MSTP(_str, _parent, _reg, _bit, _flags) \
+  SH_CLK_MSTP32(_str, -1, _parent, _reg, _bit, _flags)
+
+static struct clk mstp_clks[] = {
+	MSTP("uram0", &div4_clks[DIV4_U], MSTPCR0, 28, CLK_ENABLE_ON_INIT),
+	MSTP("xymem0", &div4_clks[DIV4_B], MSTPCR0, 26, CLK_ENABLE_ON_INIT),
+	MSTP("tmu0", &div4_clks[DIV4_P], MSTPCR0, 15, 0),
+	MSTP("cmt0", &r_clk, MSTPCR0, 14, 0),
+	MSTP("rwdt0", &r_clk, MSTPCR0, 13, 0),
+	MSTP("flctl0", &div4_clks[DIV4_P], MSTPCR0, 10, 0),
+	MSTP("scif0", &div4_clks[DIV4_P], MSTPCR0, 7, 0),
+	MSTP("scif1", &div4_clks[DIV4_P], MSTPCR0, 6, 0),
+	MSTP("scif2", &div4_clks[DIV4_P], MSTPCR0, 5, 0),
+
+	MSTP("i2c0", &div4_clks[DIV4_P], MSTPCR1, 9, 0),
+	MSTP("rtc0", &r_clk, MSTPCR1, 8, 0),
+
+	MSTP("sdhi0", &div4_clks[DIV4_P], MSTPCR2, 18, 0),
+	MSTP("keysc0", &r_clk, MSTPCR2, 14, 0),
+	MSTP("usbf0", &div4_clks[DIV4_P], MSTPCR2, 11, 0),
+	MSTP("2dg0", &div4_clks[DIV4_B], MSTPCR2, 9, 0),
+	MSTP("siu0", &div4_clks[DIV4_B], MSTPCR2, 8, 0),
+	MSTP("vou0", &div4_clks[DIV4_B], MSTPCR2, 5, 0),
+	MSTP("jpu0", &div4_clks[DIV4_B], MSTPCR2, 6, CLK_ENABLE_ON_INIT),
+	MSTP("beu0", &div4_clks[DIV4_B], MSTPCR2, 4, 0),
+	MSTP("ceu0", &div4_clks[DIV4_B], MSTPCR2, 3, 0),
+	MSTP("veu0", &div4_clks[DIV4_B], MSTPCR2, 2, CLK_ENABLE_ON_INIT),
+	MSTP("vpu0", &div4_clks[DIV4_B], MSTPCR2, 1, CLK_ENABLE_ON_INIT),
+	MSTP("lcdc0", &div4_clks[DIV4_B], MSTPCR2, 0, 0),
+};
 
 int __init arch_clk_init(void)
 {
-	struct clk *clk;
-	int i;
+	int k, ret = 0;
 
-	clk = clk_get(NULL, "master_clk");
-	for (i = 0; i < ARRAY_SIZE(sh7722_clocks); i++) {
-		pr_debug( "Registering clock '%s'\n", sh7722_clocks[i]->name);
-		sh7722_clocks[i]->parent = clk;
-		clk_register(sh7722_clocks[i]);
-	}
-	clk_put(clk);
+	/* autodetect extal or dll configuration */
+	if (__raw_readl(PLLCR) & 0x1000)
+		pll_clk.parent = &dll_clk;
+	else
+		pll_clk.parent = &extal_clk;
 
-	clk_register(&sh7722_r_clock);
+	for (k = 0; !ret && (k < ARRAY_SIZE(main_clks)); k++)
+		ret = clk_register(main_clks[k]);
 
-	for (i = 0; i < ARRAY_SIZE(sh7722_mstpcr_clocks); i++) {
-		pr_debug( "Registering mstpcr clock '%s'\n",
-			  sh7722_mstpcr_clocks[i].name);
-		clk = clk_get(NULL, (void *) sh7722_mstpcr_clocks[i].ops);
-		sh7722_mstpcr_clocks[i].parent = clk;
-		sh7722_mstpcr_clocks[i].ops = &sh7722_mstpcr_clk_ops;
-		clk_register(&sh7722_mstpcr_clocks[i]);
-		clk_put(clk);
-	}
+	if (!ret)
+		ret = sh_clk_div4_register(div4_clks, DIV4_NR, &div4_table);
 
-	clk_recalc_rate(&sh7722_r_clock); /* make sure rate gets propagated */
+	if (!ret)
+		ret = sh_clk_div6_register(div6_clks, ARRAY_SIZE(div6_clks));
 
-	return 0;
+	if (!ret)
+		ret = sh_clk_mstp32_register(mstp_clks, ARRAY_SIZE(mstp_clks));
+
+	return ret;
 }
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7723.c b/arch/sh/kernel/cpu/sh4a/clock-sh7723.c
new file mode 100644
index 0000000..e67c267
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7723.c
@@ -0,0 +1,222 @@
+/*
+ * arch/sh/kernel/cpu/sh4a/clock-sh7723.c
+ *
+ * SH7723 clock framework support
+ *
+ * Copyright (C) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <asm/clock.h>
+
+/* SH7723 registers */
+#define FRQCR		0xa4150000
+#define VCLKCR		0xa4150004
+#define SCLKACR		0xa4150008
+#define SCLKBCR		0xa415000c
+#define IRDACLKCR	0xa4150018
+#define PLLCR		0xa4150024
+#define MSTPCR0		0xa4150030
+#define MSTPCR1		0xa4150034
+#define MSTPCR2		0xa4150038
+#define DLLFRQ		0xa4150050
+
+/* Fixed 32 KHz root clock for RTC and Power Management purposes */
+static struct clk r_clk = {
+	.name           = "rclk",
+	.id             = -1,
+	.rate           = 32768,
+};
+
+/*
+ * Default rate for the root input clock, reset this with clk_set_rate()
+ * from the platform code.
+ */
+struct clk extal_clk = {
+	.name		= "extal",
+	.id		= -1,
+	.rate		= 33333333,
+};
+
+/* The dll multiplies the 32khz r_clk, may be used instead of extal */
+static unsigned long dll_recalc(struct clk *clk)
+{
+	unsigned long mult;
+
+	if (__raw_readl(PLLCR) & 0x1000)
+		mult = __raw_readl(DLLFRQ);
+	else
+		mult = 0;
+
+	return clk->parent->rate * mult;
+}
+
+static struct clk_ops dll_clk_ops = {
+	.recalc		= dll_recalc,
+};
+
+static struct clk dll_clk = {
+	.name           = "dll_clk",
+	.id             = -1,
+	.ops		= &dll_clk_ops,
+	.parent		= &r_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+static unsigned long pll_recalc(struct clk *clk)
+{
+	unsigned long mult = 1;
+	unsigned long div = 1;
+
+	if (__raw_readl(PLLCR) & 0x4000)
+		mult = (((__raw_readl(FRQCR) >> 24) & 0x1f) + 1);
+	else
+		div = 2;
+
+	return (clk->parent->rate * mult) / div;
+}
+
+static struct clk_ops pll_clk_ops = {
+	.recalc		= pll_recalc,
+};
+
+static struct clk pll_clk = {
+	.name		= "pll_clk",
+	.id		= -1,
+	.ops		= &pll_clk_ops,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+struct clk *main_clks[] = {
+	&r_clk,
+	&extal_clk,
+	&dll_clk,
+	&pll_clk,
+};
+
+static int multipliers[] = { 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
+static int divisors[] = { 1, 3, 2, 5, 3, 4, 5, 6, 8, 10, 12, 16, 20 };
+
+static struct clk_div_mult_table div4_table = {
+	.divisors = divisors,
+	.nr_divisors = ARRAY_SIZE(divisors),
+	.multipliers = multipliers,
+	.nr_multipliers = ARRAY_SIZE(multipliers),
+};
+
+enum { DIV4_I, DIV4_U, DIV4_SH, DIV4_B, DIV4_B3, DIV4_P,
+       DIV4_SIUA, DIV4_SIUB, DIV4_IRDA, DIV4_NR };
+
+#define DIV4(_str, _reg, _bit, _mask, _flags) \
+  SH_CLK_DIV4(_str, &pll_clk, _reg, _bit, _mask, _flags)
+
+struct clk div4_clks[DIV4_NR] = {
+	[DIV4_I] = DIV4("cpu_clk", FRQCR, 20, 0x0dbf, CLK_ENABLE_ON_INIT),
+	[DIV4_U] = DIV4("umem_clk", FRQCR, 16, 0x0dbf, CLK_ENABLE_ON_INIT),
+	[DIV4_SH] = DIV4("shyway_clk", FRQCR, 12, 0x0dbf, CLK_ENABLE_ON_INIT),
+	[DIV4_B] = DIV4("bus_clk", FRQCR, 8, 0x0dbf, CLK_ENABLE_ON_INIT),
+	[DIV4_B3] = DIV4("b3_clk", FRQCR, 4, 0x0db4, CLK_ENABLE_ON_INIT),
+	[DIV4_P] = DIV4("peripheral_clk", FRQCR, 0, 0x0dbf, 0),
+	[DIV4_SIUA] = DIV4("siua_clk", SCLKACR, 0, 0x0dbf, 0),
+	[DIV4_SIUB] = DIV4("siub_clk", SCLKBCR, 0, 0x0dbf, 0),
+	[DIV4_IRDA] = DIV4("irda_clk", IRDACLKCR, 0, 0x0dbf, 0),
+};
+
+struct clk div6_clks[] = {
+	SH_CLK_DIV6("video_clk", &pll_clk, VCLKCR, 0),
+};
+
+#define MSTP(_str, _parent, _reg, _bit, _force_on, _need_cpg, _need_ram) \
+  SH_CLK_MSTP32(_str, -1, _parent, _reg, _bit, _force_on * CLK_ENABLE_ON_INIT)
+
+static struct clk mstp_clks[] = {
+	/* See page 60 of Datasheet V1.0: Overview -> Block Diagram */
+	MSTP("tlb0", &div4_clks[DIV4_I], MSTPCR0, 31, 1, 1, 0),
+	MSTP("ic0", &div4_clks[DIV4_I], MSTPCR0, 30, 1, 1, 0),
+	MSTP("oc0", &div4_clks[DIV4_I], MSTPCR0, 29, 1, 1, 0),
+	MSTP("l2c0", &div4_clks[DIV4_SH], MSTPCR0, 28, 1, 1, 0),
+	MSTP("ilmem0", &div4_clks[DIV4_I], MSTPCR0, 27, 1, 1, 0),
+	MSTP("fpu0", &div4_clks[DIV4_I], MSTPCR0, 24, 1, 1, 0),
+	MSTP("intc0", &div4_clks[DIV4_I], MSTPCR0, 22, 1, 1, 0),
+	MSTP("dmac0", &div4_clks[DIV4_B], MSTPCR0, 21, 0, 1, 1),
+	MSTP("sh0", &div4_clks[DIV4_SH], MSTPCR0, 20, 0, 1, 0),
+	MSTP("hudi0", &div4_clks[DIV4_P], MSTPCR0, 19, 0, 1, 0),
+	MSTP("ubc0", &div4_clks[DIV4_I], MSTPCR0, 17, 0, 1, 0),
+	MSTP("tmu0", &div4_clks[DIV4_P], MSTPCR0, 15, 0, 1, 0),
+	MSTP("cmt0", &r_clk, MSTPCR0, 14, 0, 0, 0),
+	MSTP("rwdt0", &r_clk, MSTPCR0, 13, 0, 0, 0),
+	MSTP("dmac1", &div4_clks[DIV4_B], MSTPCR0, 12, 0, 1, 1),
+	MSTP("tmu1", &div4_clks[DIV4_P], MSTPCR0, 11, 0, 1, 0),
+	MSTP("flctl0", &div4_clks[DIV4_P], MSTPCR0, 10, 0, 1, 0),
+	MSTP("scif0", &div4_clks[DIV4_P], MSTPCR0, 9, 0, 1, 0),
+	MSTP("scif1", &div4_clks[DIV4_P], MSTPCR0, 8, 0, 1, 0),
+	MSTP("scif2", &div4_clks[DIV4_P], MSTPCR0, 7, 0, 1, 0),
+	MSTP("scif3", &div4_clks[DIV4_B], MSTPCR0, 6, 0, 1, 0),
+	MSTP("scif4", &div4_clks[DIV4_B], MSTPCR0, 5, 0, 1, 0),
+	MSTP("scif5", &div4_clks[DIV4_B], MSTPCR0, 4, 0, 1, 0),
+	MSTP("msiof0", &div4_clks[DIV4_B], MSTPCR0, 2, 0, 1, 0),
+	MSTP("msiof1", &div4_clks[DIV4_B], MSTPCR0, 1, 0, 1, 0),
+	MSTP("meram0", &div4_clks[DIV4_SH], MSTPCR0, 0, 1, 1, 0),
+
+	MSTP("i2c0", &div4_clks[DIV4_P], MSTPCR1, 9, 0, 1, 0),
+	MSTP("rtc0", &r_clk, MSTPCR1, 8, 0, 0, 0),
+
+	MSTP("atapi0", &div4_clks[DIV4_SH], MSTPCR2, 28, 0, 1, 0),
+	MSTP("adc0", &div4_clks[DIV4_P], MSTPCR2, 27, 0, 1, 0),
+	MSTP("tpu0", &div4_clks[DIV4_B], MSTPCR2, 25, 0, 1, 0),
+	MSTP("irda0", &div4_clks[DIV4_P], MSTPCR2, 24, 0, 1, 0),
+	MSTP("tsif0", &div4_clks[DIV4_B], MSTPCR2, 22, 0, 1, 0),
+	MSTP("icb0", &div4_clks[DIV4_B], MSTPCR2, 21, 0, 1, 1),
+	MSTP("sdhi0", &div4_clks[DIV4_B], MSTPCR2, 18, 0, 1, 0),
+	MSTP("sdhi1", &div4_clks[DIV4_B], MSTPCR2, 17, 0, 1, 0),
+	MSTP("keysc0", &r_clk, MSTPCR2, 14, 0, 0, 0),
+	MSTP("usb0", &div4_clks[DIV4_B], MSTPCR2, 11, 0, 1, 0),
+	MSTP("2dg0", &div4_clks[DIV4_B], MSTPCR2, 10, 0, 1, 1),
+	MSTP("siu0", &div4_clks[DIV4_B], MSTPCR2, 8, 0, 1, 0),
+	MSTP("veu1", &div4_clks[DIV4_B], MSTPCR2, 6, 1, 1, 1),
+	MSTP("vou0", &div4_clks[DIV4_B], MSTPCR2, 5, 0, 1, 1),
+	MSTP("beu0", &div4_clks[DIV4_B], MSTPCR2, 4, 0, 1, 1),
+	MSTP("ceu0", &div4_clks[DIV4_B], MSTPCR2, 3, 0, 1, 1),
+	MSTP("veu0", &div4_clks[DIV4_B], MSTPCR2, 2, 1, 1, 1),
+	MSTP("vpu0", &div4_clks[DIV4_B], MSTPCR2, 1, 1, 1, 1),
+	MSTP("lcdc0", &div4_clks[DIV4_B], MSTPCR2, 0, 0, 1, 1),
+};
+
+int __init arch_clk_init(void)
+{
+	int k, ret = 0;
+
+	/* autodetect extal or dll configuration */
+	if (__raw_readl(PLLCR) & 0x1000)
+		pll_clk.parent = &dll_clk;
+	else
+		pll_clk.parent = &extal_clk;
+
+	for (k = 0; !ret && (k < ARRAY_SIZE(main_clks)); k++)
+		ret = clk_register(main_clks[k]);
+
+	if (!ret)
+		ret = sh_clk_div4_register(div4_clks, DIV4_NR, &div4_table);
+
+	if (!ret)
+		ret = sh_clk_div6_register(div6_clks, ARRAY_SIZE(div6_clks));
+
+	if (!ret)
+		ret = sh_clk_mstp32_register(mstp_clks, ARRAY_SIZE(mstp_clks));
+
+	return ret;
+}
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
new file mode 100644
index 0000000..5d5c9b9
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
@@ -0,0 +1,242 @@
+/*
+ * arch/sh/kernel/cpu/sh4a/clock-sh7724.c
+ *
+ * SH7724 clock framework support
+ *
+ * Copyright (C) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <asm/clock.h>
+
+/* SH7724 registers */
+#define FRQCRA		0xa4150000
+#define FRQCRB		0xa4150004
+#define VCLKCR		0xa4150048
+#define FCLKACR		0xa4150008
+#define FCLKBCR		0xa415000c
+#define IRDACLKCR	0xa4150018
+#define PLLCR		0xa4150024
+#define MSTPCR0		0xa4150030
+#define MSTPCR1		0xa4150034
+#define MSTPCR2		0xa4150038
+#define SPUCLKCR	0xa415003c
+#define FLLFRQ		0xa4150050
+#define LSTATS		0xa4150060
+
+/* Fixed 32 KHz root clock for RTC and Power Management purposes */
+static struct clk r_clk = {
+	.name           = "rclk",
+	.id             = -1,
+	.rate           = 32768,
+};
+
+/*
+ * Default rate for the root input clock, reset this with clk_set_rate()
+ * from the platform code.
+ */
+struct clk extal_clk = {
+	.name		= "extal",
+	.id		= -1,
+	.rate		= 33333333,
+};
+
+/* The fll multiplies the 32khz r_clk, may be used instead of extal */
+static unsigned long fll_recalc(struct clk *clk)
+{
+	unsigned long mult = 0;
+	unsigned long div = 1;
+
+	if (__raw_readl(PLLCR) & 0x1000)
+		mult = __raw_readl(FLLFRQ) & 0x3ff;
+
+	if (__raw_readl(FLLFRQ) & 0x4000)
+		div = 2;
+
+	return (clk->parent->rate * mult) / div;
+}
+
+static struct clk_ops fll_clk_ops = {
+	.recalc		= fll_recalc,
+};
+
+static struct clk fll_clk = {
+	.name           = "fll_clk",
+	.id             = -1,
+	.ops		= &fll_clk_ops,
+	.parent		= &r_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+static unsigned long pll_recalc(struct clk *clk)
+{
+	unsigned long mult = 1;
+
+	if (__raw_readl(PLLCR) & 0x4000)
+		mult = (((__raw_readl(FRQCRA) >> 24) & 0x3f) + 1) * 2;
+
+	return clk->parent->rate * mult;
+}
+
+static struct clk_ops pll_clk_ops = {
+	.recalc		= pll_recalc,
+};
+
+static struct clk pll_clk = {
+	.name		= "pll_clk",
+	.id		= -1,
+	.ops		= &pll_clk_ops,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+/* A fixed divide-by-3 block use by the div6 clocks */
+static unsigned long div3_recalc(struct clk *clk)
+{
+	return clk->parent->rate / 3;
+}
+
+static struct clk_ops div3_clk_ops = {
+	.recalc		= div3_recalc,
+};
+
+static struct clk div3_clk = {
+	.name		= "div3_clk",
+	.id		= -1,
+	.ops		= &div3_clk_ops,
+	.parent		= &pll_clk,
+};
+
+struct clk *main_clks[] = {
+	&r_clk,
+	&extal_clk,
+	&fll_clk,
+	&pll_clk,
+	&div3_clk,
+};
+
+static int divisors[] = { 2, 0, 4, 6, 8, 12, 16, 0, 24, 32, 36, 48, 0, 72 };
+
+static struct clk_div_mult_table div4_table = {
+	.divisors = divisors,
+	.nr_divisors = ARRAY_SIZE(divisors),
+};
+
+enum { DIV4_I, DIV4_SH, DIV4_B, DIV4_P, DIV4_M1, DIV4_NR };
+
+#define DIV4(_str, _reg, _bit, _mask, _flags) \
+  SH_CLK_DIV4(_str, &pll_clk, _reg, _bit, _mask, _flags)
+
+struct clk div4_clks[DIV4_NR] = {
+	[DIV4_I] = DIV4("cpu_clk", FRQCRA, 20, 0x2f7d, CLK_ENABLE_ON_INIT),
+	[DIV4_SH] = DIV4("shyway_clk", FRQCRA, 12, 0x2f7c, CLK_ENABLE_ON_INIT),
+	[DIV4_B] = DIV4("bus_clk", FRQCRA, 8, 0x2f7c, CLK_ENABLE_ON_INIT),
+	[DIV4_P] = DIV4("peripheral_clk", FRQCRA, 0, 0x2f7c, 0),
+	[DIV4_M1] = DIV4("vpu_clk", FRQCRB, 4, 0x2f7c, 0),
+};
+
+struct clk div6_clks[] = {
+	SH_CLK_DIV6("video_clk", &div3_clk, VCLKCR, 0),
+	SH_CLK_DIV6("fsia_clk", &div3_clk, FCLKACR, 0),
+	SH_CLK_DIV6("fsib_clk", &div3_clk, FCLKBCR, 0),
+	SH_CLK_DIV6("irda_clk", &div3_clk, IRDACLKCR, 0),
+	SH_CLK_DIV6("spu_clk", &div3_clk, SPUCLKCR, 0),
+};
+
+#define MSTP(_str, _parent, _reg, _bit, _force_on, _need_cpg, _need_ram) \
+  SH_CLK_MSTP32(_str, -1, _parent, _reg, _bit, _force_on * CLK_ENABLE_ON_INIT)
+
+static struct clk mstp_clks[] = {
+	MSTP("tlb0", &div4_clks[DIV4_I], MSTPCR0, 31, 1, 1, 0),
+	MSTP("ic0", &div4_clks[DIV4_I], MSTPCR0, 30, 1, 1, 0),
+	MSTP("oc0", &div4_clks[DIV4_I], MSTPCR0, 29, 1, 1, 0),
+	MSTP("rs0", &div4_clks[DIV4_B], MSTPCR0, 28, 1, 1, 0),
+	MSTP("ilmem0", &div4_clks[DIV4_I], MSTPCR0, 27, 1, 1, 0),
+	MSTP("l2c0", &div4_clks[DIV4_SH], MSTPCR0, 26, 1, 1, 0),
+	MSTP("fpu0", &div4_clks[DIV4_I], MSTPCR0, 24, 1, 1, 0),
+	MSTP("intc0", &div4_clks[DIV4_P], MSTPCR0, 22, 1, 1, 0),
+	MSTP("dmac0", &div4_clks[DIV4_B], MSTPCR0, 21, 0, 1, 1),
+	MSTP("sh0", &div4_clks[DIV4_SH], MSTPCR0, 20, 0, 1, 0),
+	MSTP("hudi0", &div4_clks[DIV4_P], MSTPCR0, 19, 0, 1, 0),
+	MSTP("ubc0", &div4_clks[DIV4_I], MSTPCR0, 17, 0, 1, 0),
+	MSTP("tmu0", &div4_clks[DIV4_P], MSTPCR0, 15, 0, 1, 0),
+	MSTP("cmt0", &r_clk, MSTPCR0, 14, 0, 0, 0),
+	MSTP("rwdt0", &r_clk, MSTPCR0, 13, 0, 0, 0),
+	MSTP("dmac1", &div4_clks[DIV4_B], MSTPCR0, 12, 0, 1, 1),
+	MSTP("tmu1", &div4_clks[DIV4_P], MSTPCR0, 10, 0, 1, 0),
+	MSTP("scif0", &div4_clks[DIV4_P], MSTPCR0, 9, 0, 1, 0),
+	MSTP("scif1", &div4_clks[DIV4_P], MSTPCR0, 8, 0, 1, 0),
+	MSTP("scif2", &div4_clks[DIV4_P], MSTPCR0, 7, 0, 1, 0),
+	MSTP("scif3", &div4_clks[DIV4_B], MSTPCR0, 6, 0, 1, 0),
+	MSTP("scif4", &div4_clks[DIV4_B], MSTPCR0, 5, 0, 1, 0),
+	MSTP("scif5", &div4_clks[DIV4_B], MSTPCR0, 4, 0, 1, 0),
+	MSTP("msiof0", &div4_clks[DIV4_B], MSTPCR0, 2, 0, 1, 0),
+	MSTP("msiof1", &div4_clks[DIV4_B], MSTPCR0, 1, 0, 1, 0),
+
+	MSTP("keysc0", &r_clk, MSTPCR1, 12, 0, 0, 0),
+	MSTP("rtc0", &r_clk, MSTPCR1, 11, 0, 0, 0),
+	MSTP("i2c0", &div4_clks[DIV4_P], MSTPCR1, 9, 0, 1, 0),
+	MSTP("i2c1", &div4_clks[DIV4_P], MSTPCR1, 8, 0, 1, 0),
+
+	MSTP("mmc0", &div4_clks[DIV4_B], MSTPCR2, 29, 0, 1, 0),
+	MSTP("eth0", &div4_clks[DIV4_B], MSTPCR2, 28, 0, 1, 0),
+	MSTP("atapi0", &div4_clks[DIV4_B], MSTPCR2, 26, 0, 1, 0),
+	MSTP("tpu0", &div4_clks[DIV4_B], MSTPCR2, 25, 0, 1, 0),
+	MSTP("irda0", &div4_clks[DIV4_P], MSTPCR2, 24, 0, 1, 0),
+	MSTP("tsif0", &div4_clks[DIV4_B], MSTPCR2, 22, 0, 1, 0),
+	MSTP("usb1", &div4_clks[DIV4_B], MSTPCR2, 21, 0, 1, 1),
+	MSTP("usb0", &div4_clks[DIV4_B], MSTPCR2, 20, 0, 1, 1),
+	MSTP("2dg0", &div4_clks[DIV4_B], MSTPCR2, 19, 0, 1, 1),
+	MSTP("sdhi0", &div4_clks[DIV4_B], MSTPCR2, 18, 0, 1, 0),
+	MSTP("sdhi1", &div4_clks[DIV4_B], MSTPCR2, 17, 0, 1, 0),
+	MSTP("veu1", &div4_clks[DIV4_B], MSTPCR2, 15, 1, 1, 1),
+	MSTP("ceu1", &div4_clks[DIV4_B], MSTPCR2, 13, 0, 1, 1),
+	MSTP("beu1", &div4_clks[DIV4_B], MSTPCR2, 12, 0, 1, 1),
+	MSTP("2ddmac0", &div4_clks[DIV4_SH], MSTPCR2, 10, 0, 1, 1),
+	MSTP("spu0", &div4_clks[DIV4_B], MSTPCR2, 9, 0, 1, 0),
+	MSTP("jpu0", &div4_clks[DIV4_B], MSTPCR2, 6, 1, 1, 1),
+	MSTP("vou0", &div4_clks[DIV4_B], MSTPCR2, 5, 0, 1, 1),
+	MSTP("beu0", &div4_clks[DIV4_B], MSTPCR2, 4, 0, 1, 1),
+	MSTP("ceu0", &div4_clks[DIV4_B], MSTPCR2, 3, 0, 1, 1),
+	MSTP("veu0", &div4_clks[DIV4_B], MSTPCR2, 2, 1, 1, 1),
+	MSTP("vpu0", &div4_clks[DIV4_B], MSTPCR2, 1, 1, 1, 1),
+	MSTP("lcdc0", &div4_clks[DIV4_B], MSTPCR2, 0, 0, 1, 1),
+};
+
+int __init arch_clk_init(void)
+{
+	int k, ret = 0;
+
+	/* autodetect extal or fll configuration */
+	if (__raw_readl(PLLCR) & 0x1000)
+		pll_clk.parent = &fll_clk;
+	else
+		pll_clk.parent = &extal_clk;
+
+	for (k = 0; !ret && (k < ARRAY_SIZE(main_clks)); k++)
+		ret = clk_register(main_clks[k]);
+
+	if (!ret)
+		ret = sh_clk_div4_register(div4_clks, DIV4_NR, &div4_table);
+
+	if (!ret)
+		ret = sh_clk_div6_register(div6_clks, ARRAY_SIZE(div6_clks));
+
+	if (!ret)
+		ret = sh_clk_mstp32_register(mstp_clks, ARRAY_SIZE(mstp_clks));
+
+	return ret;
+}
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
index 3177d0d..370cd47 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
@@ -29,33 +29,28 @@
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> 4) & 0x07);
-	clk->rate = clk->parent->rate / p0fc_divisors[idx];
+	return clk->parent->rate / p0fc_divisors[idx];
 }
 
 static struct clk_ops sh7763_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> 16) & 0x07);
-	clk->rate = clk->parent->rate / bfc_divisors[idx];
+	return clk->parent->rate / bfc_divisors[idx];
 }
 
 static struct clk_ops sh7763_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
-{
-	clk->rate = clk->parent->rate;
-}
-
 static struct clk_ops sh7763_cpu_clk_ops = {
-	.recalc		= cpu_clk_recalc,
+	.recalc		= followparent_recalc,
 };
 
 static struct clk_ops *sh7763_clk_ops[] = {
@@ -71,10 +66,10 @@
 		*ops = sh7763_clk_ops[idx];
 }
 
-static void shyway_clk_recalc(struct clk *clk)
+static unsigned long shyway_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> 20) & 0x07);
-	clk->rate = clk->parent->rate / cfc_divisors[idx];
+	return clk->parent->rate / cfc_divisors[idx];
 }
 
 static struct clk_ops sh7763_shyway_clk_ops = {
@@ -83,7 +78,7 @@
 
 static struct clk sh7763_shyway_clk = {
 	.name		= "shyway_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh7763_shyway_clk_ops,
 };
 
@@ -95,31 +90,22 @@
 	&sh7763_shyway_clk,
 };
 
-static int __init sh7763_clk_init(void)
+int __init arch_clk_init(void)
 {
-	struct clk *clk = clk_get(NULL, "master_clk");
-	int i;
+	struct clk *clk;
+	int i, ret = 0;
 
+	cpg_clk_init();
+
+	clk = clk_get(NULL, "master_clk");
 	for (i = 0; i < ARRAY_SIZE(sh7763_onchip_clocks); i++) {
 		struct clk *clkp = sh7763_onchip_clocks[i];
 
 		clkp->parent = clk;
-		clk_register(clkp);
-		clk_enable(clkp);
+		ret |= clk_register(clkp);
 	}
 
-	/*
-	 * Now that we have the rest of the clocks registered, we need to
-	 * force the parent clock to propagate so that these clocks will
-	 * automatically figure out their rate. We cheat by handing the
-	 * parent clock its current rate and forcing child propagation.
-	 */
-	clk_set_rate(clk, clk_get_rate(clk));
-
 	clk_put(clk);
 
-	return 0;
+	return ret;
 }
-
-arch_initcall(sh7763_clk_init);
-
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7770.c b/arch/sh/kernel/cpu/sh4a/clock-sh7770.c
index 8e23606..e0b8967 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7770.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7770.c
@@ -28,30 +28,30 @@
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> 28) & 0x000f);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7770_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inl(FRQCR) & 0x000f);
-	clk->rate = clk->parent->rate / bfc_divisors[idx];
+	return clk->parent->rate / bfc_divisors[idx];
 }
 
 static struct clk_ops sh7770_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> 24) & 0x000f);
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7770_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
index 01f3da6..a249d82 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
@@ -29,30 +29,30 @@
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inl(FRQCR) & 0x0003);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7780_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> 16) & 0x0007);
-	clk->rate = clk->parent->rate / bfc_divisors[idx];
+	return clk->parent->rate / bfc_divisors[idx];
 }
 
 static struct clk_ops sh7780_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> 24) & 0x0001);
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7780_cpu_clk_ops = {
@@ -72,10 +72,10 @@
 		*ops = sh7780_clk_ops[idx];
 }
 
-static void shyway_clk_recalc(struct clk *clk)
+static unsigned long shyway_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> 20) & 0x0007);
-	clk->rate = clk->parent->rate / cfc_divisors[idx];
+	return clk->parent->rate / cfc_divisors[idx];
 }
 
 static struct clk_ops sh7780_shyway_clk_ops = {
@@ -84,7 +84,7 @@
 
 static struct clk sh7780_shyway_clk = {
 	.name		= "shyway_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh7780_shyway_clk_ops,
 };
 
@@ -96,31 +96,22 @@
 	&sh7780_shyway_clk,
 };
 
-static int __init sh7780_clk_init(void)
+int __init arch_clk_init(void)
 {
-	struct clk *clk = clk_get(NULL, "master_clk");
-	int i;
+	struct clk *clk;
+	int i, ret = 0;
 
+	cpg_clk_init();
+
+	clk = clk_get(NULL, "master_clk");
 	for (i = 0; i < ARRAY_SIZE(sh7780_onchip_clocks); i++) {
 		struct clk *clkp = sh7780_onchip_clocks[i];
 
 		clkp->parent = clk;
-		clk_register(clkp);
-		clk_enable(clkp);
+		ret |= clk_register(clkp);
 	}
 
-	/*
-	 * Now that we have the rest of the clocks registered, we need to
-	 * force the parent clock to propagate so that these clocks will
-	 * automatically figure out their rate. We cheat by handing the
-	 * parent clock its current rate and forcing child propagation.
-	 */
-	clk_set_rate(clk, clk_get_rate(clk));
-
 	clk_put(clk);
 
-	return 0;
+	return ret;
 }
-
-arch_initcall(sh7780_clk_init);
-
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
index 27fa81b..73abfbf 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
@@ -3,7 +3,7 @@
  *
  * SH7785 support for the clock framework
  *
- *  Copyright (C) 2007  Paul Mundt
+ *  Copyright (C) 2007 - 2009  Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -11,152 +11,116 @@
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/cpufreq.h>
 #include <asm/clock.h>
 #include <asm/freq.h>
-#include <asm/io.h>
-
-static int ifc_divisors[] = { 1, 2, 4, 6 };
-static int ufc_divisors[] = { 1, 1, 4, 6 };
-static int sfc_divisors[] = { 1, 1, 4, 6 };
-static int bfc_divisors[] = { 1, 1, 1, 1, 1, 12, 16, 18,
-			     24, 32, 36, 48, 1, 1, 1, 1 };
-static int mfc_divisors[] = { 1, 1, 4, 6 };
-static int pfc_divisors[] = { 1, 1, 1, 1, 1, 1, 1, 18,
-			      24, 32, 36, 48, 1, 1, 1, 1 };
-
-static void master_clk_init(struct clk *clk)
-{
-	clk->rate *= pfc_divisors[ctrl_inl(FRQMR1) & 0x000f];
-}
-
-static struct clk_ops sh7785_master_clk_ops = {
-	.init		= master_clk_init,
-};
-
-static void module_clk_recalc(struct clk *clk)
-{
-	int idx = (ctrl_inl(FRQMR1) & 0x000f);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
-}
-
-static struct clk_ops sh7785_module_clk_ops = {
-	.recalc		= module_clk_recalc,
-};
-
-static void bus_clk_recalc(struct clk *clk)
-{
-	int idx = ((ctrl_inl(FRQMR1) >> 16) & 0x000f);
-	clk->rate = clk->parent->rate / bfc_divisors[idx];
-}
-
-static struct clk_ops sh7785_bus_clk_ops = {
-	.recalc		= bus_clk_recalc,
-};
-
-static void cpu_clk_recalc(struct clk *clk)
-{
-	int idx = ((ctrl_inl(FRQMR1) >> 28) & 0x0003);
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
-}
-
-static struct clk_ops sh7785_cpu_clk_ops = {
-	.recalc		= cpu_clk_recalc,
-};
-
-static struct clk_ops *sh7785_clk_ops[] = {
-	&sh7785_master_clk_ops,
-	&sh7785_module_clk_ops,
-	&sh7785_bus_clk_ops,
-	&sh7785_cpu_clk_ops,
-};
-
-void __init arch_init_clk_ops(struct clk_ops **ops, int idx)
-{
-	if (idx < ARRAY_SIZE(sh7785_clk_ops))
-		*ops = sh7785_clk_ops[idx];
-}
-
-static void shyway_clk_recalc(struct clk *clk)
-{
-	int idx = ((ctrl_inl(FRQMR1) >> 20) & 0x0003);
-	clk->rate = clk->parent->rate / sfc_divisors[idx];
-}
-
-static struct clk_ops sh7785_shyway_clk_ops = {
-	.recalc		= shyway_clk_recalc,
-};
-
-static struct clk sh7785_shyway_clk = {
-	.name		= "shyway_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
-	.ops		= &sh7785_shyway_clk_ops,
-};
-
-static void ddr_clk_recalc(struct clk *clk)
-{
-	int idx = ((ctrl_inl(FRQMR1) >> 12) & 0x0003);
-	clk->rate = clk->parent->rate / mfc_divisors[idx];
-}
-
-static struct clk_ops sh7785_ddr_clk_ops = {
-	.recalc		= ddr_clk_recalc,
-};
-
-static struct clk sh7785_ddr_clk = {
-	.name		= "ddr_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
-	.ops		= &sh7785_ddr_clk_ops,
-};
-
-static void ram_clk_recalc(struct clk *clk)
-{
-	int idx = ((ctrl_inl(FRQMR1) >> 24) & 0x0003);
-	clk->rate = clk->parent->rate / ufc_divisors[idx];
-}
-
-static struct clk_ops sh7785_ram_clk_ops = {
-	.recalc		= ram_clk_recalc,
-};
-
-static struct clk sh7785_ram_clk = {
-	.name		= "ram_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
-	.ops		= &sh7785_ram_clk_ops,
-};
+#include <cpu/sh7785.h>
 
 /*
- * Additional SH7785-specific on-chip clocks that aren't already part of the
- * clock framework
+ * Default rate for the root input clock, reset this with clk_set_rate()
+ * from the platform code.
  */
-static struct clk *sh7785_onchip_clocks[] = {
-	&sh7785_shyway_clk,
-	&sh7785_ddr_clk,
-	&sh7785_ram_clk,
+static struct clk extal_clk = {
+	.name		= "extal",
+	.id		= -1,
+	.rate		= 33333333,
 };
 
-static int __init sh7785_clk_init(void)
+static unsigned long pll_recalc(struct clk *clk)
 {
-	struct clk *clk = clk_get(NULL, "master_clk");
-	int i;
+	int multiplier;
 
-	for (i = 0; i < ARRAY_SIZE(sh7785_onchip_clocks); i++) {
-		struct clk *clkp = sh7785_onchip_clocks[i];
+	multiplier = test_mode_pin(MODE_PIN4) ? 36 : 72;
 
-		clkp->parent = clk;
-		clk_register(clkp);
-		clk_enable(clkp);
-	}
-
-	/*
-	 * Now that we have the rest of the clocks registered, we need to
-	 * force the parent clock to propagate so that these clocks will
-	 * automatically figure out their rate. We cheat by handing the
-	 * parent clock its current rate and forcing child propagation.
-	 */
-	clk_set_rate(clk, clk_get_rate(clk));
-
-	clk_put(clk);
-
-	return 0;
+	return clk->parent->rate * multiplier;
 }
-arch_initcall(sh7785_clk_init);
+
+static struct clk_ops pll_clk_ops = {
+	.recalc		= pll_recalc,
+};
+
+static struct clk pll_clk = {
+	.name		= "pll_clk",
+	.id		= -1,
+	.ops		= &pll_clk_ops,
+	.parent		= &extal_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+static struct clk *clks[] = {
+	&extal_clk,
+	&pll_clk,
+};
+
+static unsigned int div2[] = { 1, 2, 4, 6, 8, 12, 16, 18,
+			       24, 32, 36, 48 };
+
+static struct clk_div_mult_table div4_table = {
+	.divisors = div2,
+	.nr_divisors = ARRAY_SIZE(div2),
+};
+
+enum { DIV4_I, DIV4_U, DIV4_SH, DIV4_B, DIV4_DDR, DIV4_GA,
+	DIV4_DU, DIV4_P, DIV4_NR };
+
+#define DIV4(_str, _bit, _mask, _flags) \
+  SH_CLK_DIV4(_str, &pll_clk, FRQMR1, _bit, _mask, _flags)
+
+struct clk div4_clks[DIV4_NR] = {
+	[DIV4_P] = DIV4("peripheral_clk", 0, 0x0f80, 0),
+	[DIV4_DU] = DIV4("du_clk", 4, 0x0ff0, 0),
+	[DIV4_GA] = DIV4("ga_clk", 8, 0x0030, 0),
+	[DIV4_DDR] = DIV4("ddr_clk", 12, 0x000c, CLK_ENABLE_ON_INIT),
+	[DIV4_B] = DIV4("bus_clk", 16, 0x0fe0, CLK_ENABLE_ON_INIT),
+	[DIV4_SH] = DIV4("shyway_clk", 20, 0x000c, CLK_ENABLE_ON_INIT),
+	[DIV4_U] = DIV4("umem_clk", 24, 0x000c, CLK_ENABLE_ON_INIT),
+	[DIV4_I] = DIV4("cpu_clk", 28, 0x000e, CLK_ENABLE_ON_INIT),
+};
+
+#define MSTPCR0		0xffc80030
+#define MSTPCR1		0xffc80034
+
+static struct clk mstp_clks[] = {
+	/* MSTPCR0 */
+	SH_CLK_MSTP32("scif_fck", 5, &div4_clks[DIV4_P], MSTPCR0, 29, 0),
+	SH_CLK_MSTP32("scif_fck", 4, &div4_clks[DIV4_P], MSTPCR0, 28, 0),
+	SH_CLK_MSTP32("scif_fck", 3, &div4_clks[DIV4_P], MSTPCR0, 27, 0),
+	SH_CLK_MSTP32("scif_fck", 2, &div4_clks[DIV4_P], MSTPCR0, 26, 0),
+	SH_CLK_MSTP32("scif_fck", 1, &div4_clks[DIV4_P], MSTPCR0, 25, 0),
+	SH_CLK_MSTP32("scif_fck", 0, &div4_clks[DIV4_P], MSTPCR0, 24, 0),
+	SH_CLK_MSTP32("ssi_fck", 1, &div4_clks[DIV4_P], MSTPCR0, 21, 0),
+	SH_CLK_MSTP32("ssi_fck", 0, &div4_clks[DIV4_P], MSTPCR0, 20, 0),
+	SH_CLK_MSTP32("hac_fck", 1, &div4_clks[DIV4_P], MSTPCR0, 17, 0),
+	SH_CLK_MSTP32("hac_fck", 0, &div4_clks[DIV4_P], MSTPCR0, 16, 0),
+	SH_CLK_MSTP32("mmcif_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 13, 0),
+	SH_CLK_MSTP32("flctl_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 12, 0),
+	SH_CLK_MSTP32("tmu345_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 9, 0),
+	SH_CLK_MSTP32("tmu012_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 8, 0),
+	SH_CLK_MSTP32("siof_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 3, 0),
+	SH_CLK_MSTP32("hspi_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 2, 0),
+
+	/* MSTPCR1 */
+	SH_CLK_MSTP32("hudi_fck", -1, NULL, MSTPCR1, 19, 0),
+	SH_CLK_MSTP32("ubc_fck", -1, NULL, MSTPCR1, 17, 0),
+	SH_CLK_MSTP32("dmac_11_6_fck", -1, NULL, MSTPCR1, 5, 0),
+	SH_CLK_MSTP32("dmac_5_0_fck", -1, NULL, MSTPCR1, 4, 0),
+	SH_CLK_MSTP32("gdta_fck", -1, NULL, MSTPCR1, 0, 0),
+};
+
+int __init arch_clk_init(void)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < ARRAY_SIZE(clks); i++)
+		ret |= clk_register(clks[i]);
+
+	if (!ret)
+		ret = sh_clk_div4_register(div4_clks, ARRAY_SIZE(div4_clks),
+					   &div4_table);
+	if (!ret)
+		ret = sh_clk_mstp32_register(mstp_clks, ARRAY_SIZE(mstp_clks));
+
+	return ret;
+}
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
index f84a9c1..a0e8869 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
@@ -36,30 +36,30 @@
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inl(FRQMR1) & 0x000f);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7786_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQMR1) >> 16) & 0x000f);
-	clk->rate = clk->parent->rate / bfc_divisors[idx];
+	return clk->parent->rate / bfc_divisors[idx];
 }
 
 static struct clk_ops sh7786_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQMR1) >> 28) & 0x0003);
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7786_cpu_clk_ops = {
@@ -79,10 +79,10 @@
 		*ops = sh7786_clk_ops[idx];
 }
 
-static void shyway_clk_recalc(struct clk *clk)
+static unsigned long shyway_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQMR1) >> 20) & 0x0003);
-	clk->rate = clk->parent->rate / sfc_divisors[idx];
+	return clk->parent->rate / sfc_divisors[idx];
 }
 
 static struct clk_ops sh7786_shyway_clk_ops = {
@@ -91,14 +91,14 @@
 
 static struct clk sh7786_shyway_clk = {
 	.name		= "shyway_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh7786_shyway_clk_ops,
 };
 
-static void ddr_clk_recalc(struct clk *clk)
+static unsigned long ddr_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQMR1) >> 12) & 0x0003);
-	clk->rate = clk->parent->rate / mfc_divisors[idx];
+	return clk->parent->rate / mfc_divisors[idx];
 }
 
 static struct clk_ops sh7786_ddr_clk_ops = {
@@ -107,7 +107,7 @@
 
 static struct clk sh7786_ddr_clk = {
 	.name		= "ddr_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh7786_ddr_clk_ops,
 };
 
@@ -120,29 +120,22 @@
 	&sh7786_ddr_clk,
 };
 
-static int __init sh7786_clk_init(void)
+int __init arch_clk_init(void)
 {
-	struct clk *clk = clk_get(NULL, "master_clk");
-	int i;
+	struct clk *clk;
+	int i, ret = 0;
 
+	cpg_clk_init();
+
+	clk = clk_get(NULL, "master_clk");
 	for (i = 0; i < ARRAY_SIZE(sh7786_onchip_clocks); i++) {
 		struct clk *clkp = sh7786_onchip_clocks[i];
 
 		clkp->parent = clk;
-		clk_register(clkp);
-		clk_enable(clkp);
+		ret |= clk_register(clkp);
 	}
 
-	/*
-	 * Now that we have the rest of the clocks registered, we need to
-	 * force the parent clock to propagate so that these clocks will
-	 * automatically figure out their rate. We cheat by handing the
-	 * parent clock its current rate and forcing child propagation.
-	 */
-	clk_set_rate(clk, clk_get_rate(clk));
-
 	clk_put(clk);
 
-	return 0;
+	return ret;
 }
-arch_initcall(sh7786_clk_init);
diff --git a/arch/sh/kernel/cpu/sh4a/clock-shx3.c b/arch/sh/kernel/cpu/sh4a/clock-shx3.c
index c630b29..23c27d3 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-shx3.c
@@ -40,30 +40,30 @@
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> PFC_POS) & PFC_MSK);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops shx3_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> BFC_POS) & BFC_MSK);
-	clk->rate = clk->parent->rate / bfc_divisors[idx];
+	return clk->parent->rate / bfc_divisors[idx];
 }
 
 static struct clk_ops shx3_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> IFC_POS) & IFC_MSK);
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops shx3_cpu_clk_ops = {
@@ -83,10 +83,10 @@
 		*ops = shx3_clk_ops[idx];
 }
 
-static void shyway_clk_recalc(struct clk *clk)
+static unsigned long shyway_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> CFC_POS) & CFC_MSK);
-	clk->rate = clk->parent->rate / cfc_divisors[idx];
+	return clk->parent->rate / cfc_divisors[idx];
 }
 
 static struct clk_ops shx3_shyway_clk_ops = {
@@ -95,7 +95,7 @@
 
 static struct clk shx3_shyway_clk = {
 	.name		= "shyway_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &shx3_shyway_clk_ops,
 };
 
@@ -107,29 +107,22 @@
 	&shx3_shyway_clk,
 };
 
-static int __init shx3_clk_init(void)
+int __init arch_clk_init(void)
 {
-	struct clk *clk = clk_get(NULL, "master_clk");
-	int i;
+	struct clk *clk;
+	int i, ret = 0;
 
+	cpg_clk_init();
+
+	clk = clk_get(NULL, "master_clk");
 	for (i = 0; i < ARRAY_SIZE(shx3_onchip_clocks); i++) {
 		struct clk *clkp = shx3_onchip_clocks[i];
 
 		clkp->parent = clk;
-		clk_register(clkp);
-		clk_enable(clkp);
+		ret |= clk_register(clkp);
 	}
 
-	/*
-	 * Now that we have the rest of the clocks registered, we need to
-	 * force the parent clock to propagate so that these clocks will
-	 * automatically figure out their rate. We cheat by handing the
-	 * parent clock its current rate and forcing child propagation.
-	 */
-	clk_set_rate(clk, clk_get_rate(clk));
-
 	clk_put(clk);
 
-	return 0;
+	return ret;
 }
-arch_initcall(shx3_clk_init);
diff --git a/arch/sh/kernel/cpu/sh4a/pinmux-sh7724.c b/arch/sh/kernel/cpu/sh4a/pinmux-sh7724.c
new file mode 100644
index 0000000..1af0f95
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/pinmux-sh7724.c
@@ -0,0 +1,2230 @@
+/*
+ * SH7724 Pinmux
+ *
+ * Copyright (C) 2009 Renesas Solutions Corp.
+ *
+ * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * Based on SH7723 Pinmux
+ *  Copyright (C) 2008  Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <cpu/sh7724.h>
+
+enum {
+	PINMUX_RESERVED = 0,
+
+	PINMUX_DATA_BEGIN,
+	PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA,
+	PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA,
+	PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA,
+	PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA,
+	PTC7_DATA, PTC6_DATA, PTC5_DATA, PTC4_DATA,
+	PTC3_DATA, PTC2_DATA, PTC1_DATA, PTC0_DATA,
+	PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA,
+	PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA,
+	PTE7_DATA, PTE6_DATA, PTE5_DATA, PTE4_DATA,
+	PTE3_DATA, PTE2_DATA, PTE1_DATA, PTE0_DATA,
+	PTF7_DATA, PTF6_DATA, PTF5_DATA, PTF4_DATA,
+	PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA,
+			      PTG5_DATA, PTG4_DATA,
+	PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA,
+	PTH7_DATA, PTH6_DATA, PTH5_DATA, PTH4_DATA,
+	PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA,
+	PTJ7_DATA, PTJ6_DATA, PTJ5_DATA,
+	PTJ3_DATA, PTJ2_DATA, PTJ1_DATA, PTJ0_DATA,
+	PTK7_DATA, PTK6_DATA, PTK5_DATA, PTK4_DATA,
+	PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA,
+	PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA,
+	PTL3_DATA, PTL2_DATA, PTL1_DATA, PTL0_DATA,
+	PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA,
+	PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA,
+	PTN7_DATA, PTN6_DATA, PTN5_DATA, PTN4_DATA,
+	PTN3_DATA, PTN2_DATA, PTN1_DATA, PTN0_DATA,
+	PTQ7_DATA, PTQ6_DATA, PTQ5_DATA, PTQ4_DATA,
+	PTQ3_DATA, PTQ2_DATA, PTQ1_DATA, PTQ0_DATA,
+	PTR7_DATA, PTR6_DATA, PTR5_DATA, PTR4_DATA,
+	PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA,
+		   PTS6_DATA, PTS5_DATA, PTS4_DATA,
+	PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA,
+	PTT7_DATA, PTT6_DATA, PTT5_DATA, PTT4_DATA,
+	PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA,
+	PTU7_DATA, PTU6_DATA, PTU5_DATA, PTU4_DATA,
+	PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA,
+	PTV7_DATA, PTV6_DATA, PTV5_DATA, PTV4_DATA,
+	PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA,
+	PTW7_DATA, PTW6_DATA, PTW5_DATA, PTW4_DATA,
+	PTW3_DATA, PTW2_DATA, PTW1_DATA, PTW0_DATA,
+	PTX7_DATA, PTX6_DATA, PTX5_DATA, PTX4_DATA,
+	PTX3_DATA, PTX2_DATA, PTX1_DATA, PTX0_DATA,
+	PTY7_DATA, PTY6_DATA, PTY5_DATA, PTY4_DATA,
+	PTY3_DATA, PTY2_DATA, PTY1_DATA, PTY0_DATA,
+	PTZ7_DATA, PTZ6_DATA, PTZ5_DATA, PTZ4_DATA,
+	PTZ3_DATA, PTZ2_DATA, PTZ1_DATA, PTZ0_DATA,
+	PINMUX_DATA_END,
+
+	PINMUX_INPUT_BEGIN,
+	PTA7_IN, PTA6_IN, PTA5_IN, PTA4_IN,
+	PTA3_IN, PTA2_IN, PTA1_IN, PTA0_IN,
+	PTB7_IN, PTB6_IN, PTB5_IN, PTB4_IN,
+	PTB3_IN, PTB2_IN, PTB1_IN, PTB0_IN,
+	PTC7_IN, PTC6_IN, PTC5_IN, PTC4_IN,
+	PTC3_IN, PTC2_IN, PTC1_IN, PTC0_IN,
+	PTD7_IN, PTD6_IN, PTD5_IN, PTD4_IN,
+	PTD3_IN, PTD2_IN, PTD1_IN, PTD0_IN,
+	PTE7_IN, PTE6_IN, PTE5_IN, PTE4_IN,
+	PTE3_IN, PTE2_IN, PTE1_IN, PTE0_IN,
+	PTF7_IN, PTF6_IN, PTF5_IN, PTF4_IN,
+	PTF3_IN, PTF2_IN, PTF1_IN, PTF0_IN,
+	PTH7_IN, PTH6_IN, PTH5_IN, PTH4_IN,
+	PTH3_IN, PTH2_IN, PTH1_IN, PTH0_IN,
+	PTJ3_IN, PTJ2_IN, PTJ1_IN, PTJ0_IN,
+	PTK7_IN, PTK6_IN, PTK5_IN, PTK4_IN,
+	PTK3_IN, PTK2_IN, PTK1_IN, PTK0_IN,
+	PTL7_IN, PTL6_IN, PTL5_IN, PTL4_IN,
+	PTL3_IN, PTL2_IN, PTL1_IN, PTL0_IN,
+	PTM7_IN, PTM6_IN, PTM5_IN, PTM4_IN,
+	PTM3_IN, PTM2_IN, PTM1_IN, PTM0_IN,
+	PTN7_IN, PTN6_IN, PTN5_IN, PTN4_IN,
+	PTN3_IN, PTN2_IN, PTN1_IN, PTN0_IN,
+	PTQ7_IN, PTQ6_IN, PTQ5_IN, PTQ4_IN,
+	PTQ3_IN, PTQ2_IN, PTQ1_IN, PTQ0_IN,
+	PTR7_IN, PTR6_IN, PTR5_IN, PTR4_IN,
+	PTR3_IN, PTR2_IN, PTR1_IN, PTR0_IN,
+		 PTS6_IN, PTS5_IN, PTS4_IN,
+	PTS3_IN, PTS2_IN, PTS1_IN, PTS0_IN,
+	PTT7_IN, PTT6_IN, PTT5_IN, PTT4_IN,
+	PTT3_IN, PTT2_IN, PTT1_IN, PTT0_IN,
+	PTU7_IN, PTU6_IN, PTU5_IN, PTU4_IN,
+	PTU3_IN, PTU2_IN, PTU1_IN, PTU0_IN,
+	PTV7_IN, PTV6_IN, PTV5_IN, PTV4_IN,
+	PTV3_IN, PTV2_IN, PTV1_IN, PTV0_IN,
+	PTW7_IN, PTW6_IN, PTW5_IN, PTW4_IN,
+	PTW3_IN, PTW2_IN, PTW1_IN, PTW0_IN,
+	PTX7_IN, PTX6_IN, PTX5_IN, PTX4_IN,
+	PTX3_IN, PTX2_IN, PTX1_IN, PTX0_IN,
+	PTY7_IN, PTY6_IN, PTY5_IN, PTY4_IN,
+	PTY3_IN, PTY2_IN, PTY1_IN, PTY0_IN,
+	PTZ7_IN, PTZ6_IN, PTZ5_IN, PTZ4_IN,
+	PTZ3_IN, PTZ2_IN, PTZ1_IN, PTZ0_IN,
+	PINMUX_INPUT_END,
+
+	PINMUX_INPUT_PULLUP_BEGIN,
+	PTA7_IN_PU, PTA6_IN_PU, PTA5_IN_PU, PTA4_IN_PU,
+	PTA3_IN_PU, PTA2_IN_PU, PTA1_IN_PU, PTA0_IN_PU,
+	PTB7_IN_PU, PTB6_IN_PU, PTB5_IN_PU, PTB4_IN_PU,
+	PTB3_IN_PU, PTB2_IN_PU, PTB1_IN_PU, PTB0_IN_PU,
+	PTC7_IN_PU, PTC6_IN_PU, PTC5_IN_PU, PTC4_IN_PU,
+	PTC3_IN_PU, PTC2_IN_PU, PTC1_IN_PU, PTC0_IN_PU,
+	PTD7_IN_PU, PTD6_IN_PU, PTD5_IN_PU, PTD4_IN_PU,
+	PTD3_IN_PU, PTD2_IN_PU, PTD1_IN_PU, PTD0_IN_PU,
+	PTE7_IN_PU, PTE6_IN_PU, PTE5_IN_PU, PTE4_IN_PU,
+	PTE3_IN_PU, PTE2_IN_PU, PTE1_IN_PU, PTE0_IN_PU,
+	PTF7_IN_PU, PTF6_IN_PU, PTF5_IN_PU, PTF4_IN_PU,
+	PTF3_IN_PU, PTF2_IN_PU, PTF1_IN_PU, PTF0_IN_PU,
+	PTH7_IN_PU, PTH6_IN_PU, PTH5_IN_PU, PTH4_IN_PU,
+	PTH3_IN_PU, PTH2_IN_PU, PTH1_IN_PU, PTH0_IN_PU,
+	PTJ3_IN_PU, PTJ2_IN_PU, PTJ1_IN_PU, PTJ0_IN_PU,
+	PTK7_IN_PU, PTK6_IN_PU, PTK5_IN_PU, PTK4_IN_PU,
+	PTK3_IN_PU, PTK2_IN_PU, PTK1_IN_PU, PTK0_IN_PU,
+	PTL7_IN_PU, PTL6_IN_PU, PTL5_IN_PU, PTL4_IN_PU,
+	PTL3_IN_PU, PTL2_IN_PU, PTL1_IN_PU, PTL0_IN_PU,
+	PTM7_IN_PU, PTM6_IN_PU, PTM5_IN_PU, PTM4_IN_PU,
+	PTM3_IN_PU, PTM2_IN_PU, PTM1_IN_PU, PTM0_IN_PU,
+	PTN7_IN_PU, PTN6_IN_PU, PTN5_IN_PU, PTN4_IN_PU,
+	PTN3_IN_PU, PTN2_IN_PU, PTN1_IN_PU, PTN0_IN_PU,
+	PTQ7_IN_PU, PTQ6_IN_PU, PTQ5_IN_PU, PTQ4_IN_PU,
+	PTQ3_IN_PU, PTQ2_IN_PU, PTQ1_IN_PU, PTQ0_IN_PU,
+	PTR7_IN_PU, PTR6_IN_PU, PTR5_IN_PU, PTR4_IN_PU,
+	PTR3_IN_PU, PTR2_IN_PU, PTR1_IN_PU, PTR0_IN_PU,
+		    PTS6_IN_PU, PTS5_IN_PU, PTS4_IN_PU,
+	PTS3_IN_PU, PTS2_IN_PU, PTS1_IN_PU, PTS0_IN_PU,
+	PTT7_IN_PU, PTT6_IN_PU, PTT5_IN_PU, PTT4_IN_PU,
+	PTT3_IN_PU, PTT2_IN_PU, PTT1_IN_PU, PTT0_IN_PU,
+	PTU7_IN_PU, PTU6_IN_PU, PTU5_IN_PU, PTU4_IN_PU,
+	PTU3_IN_PU, PTU2_IN_PU, PTU1_IN_PU, PTU0_IN_PU,
+	PTV7_IN_PU, PTV6_IN_PU, PTV5_IN_PU, PTV4_IN_PU,
+	PTV3_IN_PU, PTV2_IN_PU, PTV1_IN_PU, PTV0_IN_PU,
+	PTW7_IN_PU, PTW6_IN_PU, PTW5_IN_PU, PTW4_IN_PU,
+	PTW3_IN_PU, PTW2_IN_PU, PTW1_IN_PU, PTW0_IN_PU,
+	PTX7_IN_PU, PTX6_IN_PU, PTX5_IN_PU, PTX4_IN_PU,
+	PTX3_IN_PU, PTX2_IN_PU, PTX1_IN_PU, PTX0_IN_PU,
+	PTY7_IN_PU, PTY6_IN_PU, PTY5_IN_PU, PTY4_IN_PU,
+	PTY3_IN_PU, PTY2_IN_PU, PTY1_IN_PU, PTY0_IN_PU,
+	PTZ7_IN_PU, PTZ6_IN_PU, PTZ5_IN_PU, PTZ4_IN_PU,
+	PTZ3_IN_PU, PTZ2_IN_PU, PTZ1_IN_PU, PTZ0_IN_PU,
+	PINMUX_INPUT_PULLUP_END,
+
+	PINMUX_OUTPUT_BEGIN,
+	PTA7_OUT, PTA6_OUT, PTA5_OUT, PTA4_OUT,
+	PTA3_OUT, PTA2_OUT, PTA1_OUT, PTA0_OUT,
+	PTB7_OUT, PTB6_OUT, PTB5_OUT, PTB4_OUT,
+	PTB3_OUT, PTB2_OUT, PTB1_OUT, PTB0_OUT,
+	PTC7_OUT, PTC6_OUT, PTC5_OUT, PTC4_OUT,
+	PTC3_OUT, PTC2_OUT, PTC1_OUT, PTC0_OUT,
+	PTD7_OUT, PTD6_OUT, PTD5_OUT, PTD4_OUT,
+	PTD3_OUT, PTD2_OUT, PTD1_OUT, PTD0_OUT,
+	PTE7_OUT, PTE6_OUT, PTE5_OUT, PTE4_OUT,
+	PTE3_OUT, PTE2_OUT, PTE1_OUT, PTE0_OUT,
+	PTF7_OUT, PTF6_OUT, PTF5_OUT, PTF4_OUT,
+	PTF3_OUT, PTF2_OUT, PTF1_OUT, PTF0_OUT,
+			    PTG5_OUT, PTG4_OUT,
+	PTG3_OUT, PTG2_OUT, PTG1_OUT, PTG0_OUT,
+	PTH7_OUT, PTH6_OUT, PTH5_OUT, PTH4_OUT,
+	PTH3_OUT, PTH2_OUT, PTH1_OUT, PTH0_OUT,
+	PTJ7_OUT, PTJ6_OUT, PTJ5_OUT,
+	PTJ3_OUT, PTJ2_OUT, PTJ1_OUT, PTJ0_OUT,
+	PTK7_OUT, PTK6_OUT, PTK5_OUT, PTK4_OUT,
+	PTK3_OUT, PTK2_OUT, PTK1_OUT, PTK0_OUT,
+	PTL7_OUT, PTL6_OUT, PTL5_OUT, PTL4_OUT,
+	PTL3_OUT, PTL2_OUT, PTL1_OUT, PTL0_OUT,
+	PTM7_OUT, PTM6_OUT, PTM5_OUT, PTM4_OUT,
+	PTM3_OUT, PTM2_OUT, PTM1_OUT, PTM0_OUT,
+	PTN7_OUT, PTN6_OUT, PTN5_OUT, PTN4_OUT,
+	PTN3_OUT, PTN2_OUT, PTN1_OUT, PTN0_OUT,
+	PTQ7_OUT, PTQ6_OUT, PTQ5_OUT, PTQ4_OUT,
+	PTQ3_OUT, PTQ2_OUT, PTQ1_OUT, PTQ0_OUT,
+	PTR7_OUT, PTR6_OUT, PTR5_OUT, PTR4_OUT,
+			    PTR1_OUT, PTR0_OUT,
+		  PTS6_OUT, PTS5_OUT, PTS4_OUT,
+	PTS3_OUT, PTS2_OUT, PTS1_OUT, PTS0_OUT,
+	PTT7_OUT, PTT6_OUT, PTT5_OUT, PTT4_OUT,
+	PTT3_OUT, PTT2_OUT, PTT1_OUT, PTT0_OUT,
+	PTU7_OUT, PTU6_OUT, PTU5_OUT, PTU4_OUT,
+	PTU3_OUT, PTU2_OUT, PTU1_OUT, PTU0_OUT,
+	PTV7_OUT, PTV6_OUT, PTV5_OUT, PTV4_OUT,
+	PTV3_OUT, PTV2_OUT, PTV1_OUT, PTV0_OUT,
+	PTW7_OUT, PTW6_OUT, PTW5_OUT, PTW4_OUT,
+	PTW3_OUT, PTW2_OUT, PTW1_OUT, PTW0_OUT,
+	PTX7_OUT, PTX6_OUT, PTX5_OUT, PTX4_OUT,
+	PTX3_OUT, PTX2_OUT, PTX1_OUT, PTX0_OUT,
+	PTY7_OUT, PTY6_OUT, PTY5_OUT, PTY4_OUT,
+	PTY3_OUT, PTY2_OUT, PTY1_OUT, PTY0_OUT,
+	PTZ7_OUT, PTZ6_OUT, PTZ5_OUT, PTZ4_OUT,
+	PTZ3_OUT, PTZ2_OUT, PTZ1_OUT, PTZ0_OUT,
+	PINMUX_OUTPUT_END,
+
+	PINMUX_FUNCTION_BEGIN,
+	PTA7_FN, PTA6_FN, PTA5_FN, PTA4_FN,
+	PTA3_FN, PTA2_FN, PTA1_FN, PTA0_FN,
+	PTB7_FN, PTB6_FN, PTB5_FN, PTB4_FN,
+	PTB3_FN, PTB2_FN, PTB1_FN, PTB0_FN,
+	PTC7_FN, PTC6_FN, PTC5_FN, PTC4_FN,
+	PTC3_FN, PTC2_FN, PTC1_FN, PTC0_FN,
+	PTD7_FN, PTD6_FN, PTD5_FN, PTD4_FN,
+	PTD3_FN, PTD2_FN, PTD1_FN, PTD0_FN,
+	PTE7_FN, PTE6_FN, PTE5_FN, PTE4_FN,
+	PTE3_FN, PTE2_FN, PTE1_FN, PTE0_FN,
+	PTF7_FN, PTF6_FN, PTF5_FN, PTF4_FN,
+	PTF3_FN, PTF2_FN, PTF1_FN, PTF0_FN,
+			  PTG5_FN, PTG4_FN,
+	PTG3_FN, PTG2_FN, PTG1_FN, PTG0_FN,
+	PTH7_FN, PTH6_FN, PTH5_FN, PTH4_FN,
+	PTH3_FN, PTH2_FN, PTH1_FN, PTH0_FN,
+	PTJ7_FN, PTJ6_FN, PTJ5_FN,
+	PTJ3_FN, PTJ2_FN, PTJ1_FN, PTJ0_FN,
+	PTK7_FN, PTK6_FN, PTK5_FN, PTK4_FN,
+	PTK3_FN, PTK2_FN, PTK1_FN, PTK0_FN,
+	PTL7_FN, PTL6_FN, PTL5_FN, PTL4_FN,
+	PTL3_FN, PTL2_FN, PTL1_FN, PTL0_FN,
+	PTM7_FN, PTM6_FN, PTM5_FN, PTM4_FN,
+	PTM3_FN, PTM2_FN, PTM1_FN, PTM0_FN,
+	PTN7_FN, PTN6_FN, PTN5_FN, PTN4_FN,
+	PTN3_FN, PTN2_FN, PTN1_FN, PTN0_FN,
+	PTQ7_FN, PTQ6_FN, PTQ5_FN, PTQ4_FN,
+	PTQ3_FN, PTQ2_FN, PTQ1_FN, PTQ0_FN,
+	PTR7_FN, PTR6_FN, PTR5_FN, PTR4_FN,
+	PTR3_FN, PTR2_FN, PTR1_FN, PTR0_FN,
+		 PTS6_FN, PTS5_FN, PTS4_FN,
+	PTS3_FN, PTS2_FN, PTS1_FN, PTS0_FN,
+	PTT7_FN, PTT6_FN, PTT5_FN, PTT4_FN,
+	PTT3_FN, PTT2_FN, PTT1_FN, PTT0_FN,
+	PTU7_FN, PTU6_FN, PTU5_FN, PTU4_FN,
+	PTU3_FN, PTU2_FN, PTU1_FN, PTU0_FN,
+	PTV7_FN, PTV6_FN, PTV5_FN, PTV4_FN,
+	PTV3_FN, PTV2_FN, PTV1_FN, PTV0_FN,
+	PTW7_FN, PTW6_FN, PTW5_FN, PTW4_FN,
+	PTW3_FN, PTW2_FN, PTW1_FN, PTW0_FN,
+	PTX7_FN, PTX6_FN, PTX5_FN, PTX4_FN,
+	PTX3_FN, PTX2_FN, PTX1_FN, PTX0_FN,
+	PTY7_FN, PTY6_FN, PTY5_FN, PTY4_FN,
+	PTY3_FN, PTY2_FN, PTY1_FN, PTY0_FN,
+	PTZ7_FN, PTZ6_FN, PTZ5_FN, PTZ4_FN,
+	PTZ3_FN, PTZ2_FN, PTZ1_FN, PTZ0_FN,
+
+
+	PSA15_0, PSA15_1,
+	PSA14_0, PSA14_1,
+	PSA13_0, PSA13_1,
+	PSA12_0, PSA12_1,
+	PSA10_0, PSA10_1,
+	PSA9_0,  PSA9_1,
+	PSA8_0,  PSA8_1,
+	PSA7_0,  PSA7_1,
+	PSA6_0,  PSA6_1,
+	PSA5_0,  PSA5_1,
+	PSA3_0,  PSA3_1,
+	PSA2_0,  PSA2_1,
+	PSA1_0,  PSA1_1,
+	PSA0_0,  PSA0_1,
+
+	PSB14_0, PSB14_1,
+	PSB13_0, PSB13_1,
+	PSB12_0, PSB12_1,
+	PSB11_0, PSB11_1,
+	PSB10_0, PSB10_1,
+	PSB9_0,  PSB9_1,
+	PSB8_0,  PSB8_1,
+	PSB7_0,  PSB7_1,
+	PSB6_0,  PSB6_1,
+	PSB5_0,  PSB5_1,
+	PSB4_0,  PSB4_1,
+	PSB3_0,  PSB3_1,
+	PSB2_0,  PSB2_1,
+	PSB1_0,  PSB1_1,
+	PSB0_0,  PSB0_1,
+
+	PSC15_0, PSC15_1,
+	PSC14_0, PSC14_1,
+	PSC13_0, PSC13_1,
+	PSC12_0, PSC12_1,
+	PSC11_0, PSC11_1,
+	PSC10_0, PSC10_1,
+	PSC9_0,  PSC9_1,
+	PSC8_0,  PSC8_1,
+	PSC7_0,  PSC7_1,
+	PSC6_0,  PSC6_1,
+	PSC5_0,  PSC5_1,
+	PSC4_0,  PSC4_1,
+	PSC2_0,  PSC2_1,
+	PSC1_0,  PSC1_1,
+	PSC0_0,  PSC0_1,
+
+	PSD15_0, PSD15_1,
+	PSD14_0, PSD14_1,
+	PSD13_0, PSD13_1,
+	PSD12_0, PSD12_1,
+	PSD11_0, PSD11_1,
+	PSD10_0, PSD10_1,
+	PSD9_0,  PSD9_1,
+	PSD8_0,  PSD8_1,
+	PSD7_0,  PSD7_1,
+	PSD6_0,  PSD6_1,
+	PSD5_0,  PSD5_1,
+	PSD4_0,  PSD4_1,
+	PSD3_0,  PSD3_1,
+	PSD2_0,  PSD2_1,
+	PSD1_0,  PSD1_1,
+	PSD0_0,  PSD0_1,
+
+	PSE15_0, PSE15_1,
+	PSE14_0, PSE14_1,
+	PSE13_0, PSE13_1,
+	PSE12_0, PSE12_1,
+	PSE11_0, PSE11_1,
+	PSE10_0, PSE10_1,
+	PSE9_0,  PSE9_1,
+	PSE8_0,  PSE8_1,
+	PSE7_0,  PSE7_1,
+	PSE6_0,  PSE6_1,
+	PSE5_0,  PSE5_1,
+	PSE4_0,  PSE4_1,
+	PSE3_0,  PSE3_1,
+	PSE2_0,  PSE2_1,
+	PSE1_0,  PSE1_1,
+	PSE0_0,  PSE0_1,
+	PINMUX_FUNCTION_END,
+
+	PINMUX_MARK_BEGIN,
+	/*PTA*/
+	D23_MARK,	KEYOUT2_MARK,		IDED15_MARK,
+	D22_MARK,	KEYOUT1_MARK,		IDED14_MARK,
+	D21_MARK,	KEYOUT0_MARK,		IDED13_MARK,
+	D20_MARK,	KEYIN4_MARK,		IDED12_MARK,
+	D19_MARK,	KEYIN3_MARK,		IDED11_MARK,
+	D18_MARK,	KEYIN2_MARK,		IDED10_MARK,
+	D17_MARK,	KEYIN1_MARK,		IDED9_MARK,
+	D16_MARK,	KEYIN0_MARK,		IDED8_MARK,
+
+	/*PTB*/
+	D31_MARK,	TPUTO1_MARK,		IDEA1_MARK,
+	D30_MARK,	TPUTO0_MARK,		IDEA0_MARK,
+	D29_MARK,				IODREQ_MARK,
+	D28_MARK,				IDECS0_MARK,
+	D27_MARK,				IDECS1_MARK,
+	D26_MARK,	KEYOUT5_IN5_MARK,	IDEIORD_MARK,
+	D25_MARK,	KEYOUT4_IN6_MARK,	IDEIOWR_MARK,
+	D24_MARK,	KEYOUT3_MARK,		IDEINT_MARK,
+
+	/*PTC*/
+	LCDD7_MARK,
+	LCDD6_MARK,
+	LCDD5_MARK,
+	LCDD4_MARK,
+	LCDD3_MARK,
+	LCDD2_MARK,
+	LCDD1_MARK,
+	LCDD0_MARK,
+
+	/*PTD*/
+	LCDD15_MARK,
+	LCDD14_MARK,
+	LCDD13_MARK,
+	LCDD12_MARK,
+	LCDD11_MARK,
+	LCDD10_MARK,
+	LCDD9_MARK,
+	LCDD8_MARK,
+
+	/*PTE*/
+	FSIMCKB_MARK,
+	FSIMCKA_MARK,
+	LCDD21_MARK,	SCIF2_L_TXD_MARK,
+	LCDD20_MARK,	SCIF4_SCK_MARK,
+	LCDD19_MARK,	SCIF4_RXD_MARK,
+	LCDD18_MARK,	SCIF4_TXD_MARK,
+	LCDD17_MARK,
+	LCDD16_MARK,
+
+	/*PTF*/
+	LCDVSYN_MARK,
+	LCDDISP_MARK,	LCDRS_MARK,
+	LCDHSYN_MARK,	LCDCS_MARK,
+	LCDDON_MARK,
+	LCDDCK_MARK,	LCDWR_MARK,
+	LCDVEPWC_MARK,	SCIF0_TXD_MARK,
+	LCDD23_MARK,	SCIF2_L_SCK_MARK,
+	LCDD22_MARK,	SCIF2_L_RXD_MARK,
+
+	/*PTG*/
+	AUDCK_MARK,
+	AUDSYNC_MARK,
+	AUDATA3_MARK,
+	AUDATA2_MARK,
+	AUDATA1_MARK,
+	AUDATA0_MARK,
+
+	/*PTH*/
+	VIO0_VD_MARK,
+	VIO0_CLK_MARK,
+	VIO0_D7_MARK,
+	VIO0_D6_MARK,
+	VIO0_D5_MARK,
+	VIO0_D4_MARK,
+	VIO0_D3_MARK,
+	VIO0_D2_MARK,
+
+	/*PTJ*/
+	PDSTATUS_MARK,
+	STATUS2_MARK,
+	STATUS0_MARK,
+	A25_MARK,		BS_MARK,
+	A24_MARK,
+	A23_MARK,
+	A22_MARK,
+
+	/*PTK*/
+	VIO1_D5_MARK,	VIO0_D13_MARK,	IDED5_MARK,
+	VIO1_D4_MARK,	VIO0_D12_MARK,	IDED4_MARK,
+	VIO1_D3_MARK,	VIO0_D11_MARK,	IDED3_MARK,
+	VIO1_D2_MARK,	VIO0_D10_MARK,	IDED2_MARK,
+	VIO1_D1_MARK,	VIO0_D9_MARK,	IDED1_MARK,
+	VIO1_D0_MARK,	VIO0_D8_MARK,	IDED0_MARK,
+	VIO0_FLD_MARK,
+	VIO0_HD_MARK,
+
+	/*PTL*/
+	DV_D5_MARK,	SCIF3_V_SCK_MARK,	RMII_RXD0_MARK,
+	DV_D4_MARK,	SCIF3_V_RXD_MARK,	RMII_RXD1_MARK,
+	DV_D3_MARK,	SCIF3_V_TXD_MARK,	RMII_REF_CLK_MARK,
+	DV_D2_MARK,	SCIF1_SCK_MARK,		RMII_TX_EN_MARK,
+	DV_D1_MARK,	SCIF1_RXD_MARK,		RMII_TXD0_MARK,
+	DV_D0_MARK,	SCIF1_TXD_MARK,		RMII_TXD1_MARK,
+	DV_D15_MARK,
+	DV_D14_MARK,	MSIOF0_MCK_MARK,
+
+	/*PTM*/
+	DV_D13_MARK,	MSIOF0_TSCK_MARK,
+	DV_D12_MARK,	MSIOF0_RXD_MARK,
+	DV_D11_MARK,	MSIOF0_TXD_MARK,
+	DV_D10_MARK,	MSIOF0_TSYNC_MARK,
+	DV_D9_MARK,	MSIOF0_SS1_MARK,	MSIOF0_RSCK_MARK,
+	DV_D8_MARK,	MSIOF0_SS2_MARK,	MSIOF0_RSYNC_MARK,
+	LCDVCPWC_MARK,	SCIF0_RXD_MARK,
+	LCDRD_MARK,	SCIF0_SCK_MARK,
+
+	/*PTN*/
+	VIO0_D1_MARK,
+	VIO0_D0_MARK,
+	DV_CLKI_MARK,
+	DV_CLK_MARK,	SCIF2_V_SCK_MARK,
+	DV_VSYNC_MARK,	SCIF2_V_RXD_MARK,
+	DV_HSYNC_MARK,	SCIF2_V_TXD_MARK,
+	DV_D7_MARK,	SCIF3_V_CTS_MARK,	RMII_RX_ER_MARK,
+	DV_D6_MARK,	SCIF3_V_RTS_MARK,	RMII_CRS_DV_MARK,
+
+	/*PTQ*/
+	D7_MARK,
+	D6_MARK,
+	D5_MARK,
+	D4_MARK,
+	D3_MARK,
+	D2_MARK,
+	D1_MARK,
+	D0_MARK,
+
+	/*PTR*/
+	CS6B_CE1B_MARK,
+	CS6A_CE2B_MARK,
+	CS5B_CE1A_MARK,
+	CS5A_CE2A_MARK,
+	IOIS16_MARK,		LCDLCLK_MARK,
+	WAIT_MARK,
+	WE3_ICIOWR_MARK,	TPUTO3_MARK,	TPUTI3_MARK,
+	WE2_ICIORD_MARK,	TPUTO2_MARK,	IDEA2_MARK,
+
+	/*PTS*/
+	VIO_CKO_MARK,
+	VIO1_FLD_MARK,	TPUTI2_MARK,		IDEIORDY_MARK,
+	VIO1_HD_MARK,	SCIF5_SCK_MARK,
+	VIO1_VD_MARK,	SCIF5_RXD_MARK,
+	VIO1_CLK_MARK,	SCIF5_TXD_MARK,
+	VIO1_D7_MARK,	VIO0_D15_MARK,		IDED7_MARK,
+	VIO1_D6_MARK,	VIO0_D14_MARK,		IDED6_MARK,
+
+	/*PTT*/
+	D15_MARK,
+	D14_MARK,
+	D13_MARK,
+	D12_MARK,
+	D11_MARK,
+	D10_MARK,
+	D9_MARK,
+	D8_MARK,
+
+	/*PTU*/
+	DMAC_DACK0_MARK,
+	DMAC_DREQ0_MARK,
+	FSIOASD_MARK,
+	FSIIABCK_MARK,
+	FSIIALRCK_MARK,
+	FSIOABCK_MARK,
+	FSIOALRCK_MARK,
+	CLKAUDIOAO_MARK,
+
+	/*PTV*/
+	FSIIBSD_MARK,		MSIOF1_SS2_MARK,	MSIOF1_RSYNC_MARK,
+	FSIOBSD_MARK,		MSIOF1_SS1_MARK,	MSIOF1_RSCK_MARK,
+	FSIIBBCK_MARK,		MSIOF1_RXD_MARK,
+	FSIIBLRCK_MARK,		MSIOF1_TSYNC_MARK,
+	FSIOBBCK_MARK,		MSIOF1_TSCK_MARK,
+	FSIOBLRCK_MARK,		MSIOF1_TXD_MARK,
+	CLKAUDIOBO_MARK,	MSIOF1_MCK_MARK,
+	FSIIASD_MARK,
+
+	/*PTW*/
+	MMC_D7_MARK,		SDHI1CD_MARK,		IODACK_MARK,
+	MMC_D6_MARK,		SDHI1WP_MARK,		IDERST_MARK,
+	MMC_D5_MARK,		SDHI1D3_MARK,		EXBUF_ENB_MARK,
+	MMC_D4_MARK,		SDHI1D2_MARK,		DIRECTION_MARK,
+	MMC_D3_MARK,		SDHI1D1_MARK,
+	MMC_D2_MARK,		SDHI1D0_MARK,
+	MMC_D1_MARK,		SDHI1CMD_MARK,
+	MMC_D0_MARK,		SDHI1CLK_MARK,
+
+	/*PTX*/
+	DMAC_DACK1_MARK,	IRDA_OUT_MARK,
+	DMAC_DREQ1_MARK,	IRDA_IN_MARK,
+	TSIF_TS0_SDAT_MARK,				LNKSTA_MARK,
+	TSIF_TS0_SCK_MARK,				MDIO_MARK,
+	TSIF_TS0_SDEN_MARK,				MDC_MARK,
+	TSIF_TS0_SPSYNC_MARK,
+	MMC_CLK_MARK,
+	MMC_CMD_MARK,
+
+	/*PTY*/
+	SDHI0CD_MARK,
+	SDHI0WP_MARK,
+	SDHI0D3_MARK,
+	SDHI0D2_MARK,
+	SDHI0D1_MARK,
+	SDHI0D0_MARK,
+	SDHI0CMD_MARK,
+	SDHI0CLK_MARK,
+
+	/*PTZ*/
+	INTC_IRQ7_MARK,		SCIF3_I_CTS_MARK,
+	INTC_IRQ6_MARK,		SCIF3_I_RTS_MARK,
+	INTC_IRQ5_MARK,		SCIF3_I_SCK_MARK,
+	INTC_IRQ4_MARK,		SCIF3_I_RXD_MARK,
+	INTC_IRQ3_MARK,		SCIF3_I_TXD_MARK,
+	INTC_IRQ2_MARK,
+	INTC_IRQ1_MARK,
+	INTC_IRQ0_MARK,
+	PINMUX_MARK_END,
+};
+
+static pinmux_enum_t pinmux_data[] = {
+	/* PTA GPIO */
+	PINMUX_DATA(PTA7_DATA, PTA7_IN, PTA7_OUT, PTA7_IN_PU),
+	PINMUX_DATA(PTA6_DATA, PTA6_IN, PTA6_OUT, PTA6_IN_PU),
+	PINMUX_DATA(PTA5_DATA, PTA5_IN, PTA5_OUT, PTA5_IN_PU),
+	PINMUX_DATA(PTA4_DATA, PTA4_IN, PTA4_OUT, PTA4_IN_PU),
+	PINMUX_DATA(PTA3_DATA, PTA3_IN, PTA3_OUT, PTA3_IN_PU),
+	PINMUX_DATA(PTA2_DATA, PTA2_IN, PTA2_OUT, PTA2_IN_PU),
+	PINMUX_DATA(PTA1_DATA, PTA1_IN, PTA1_OUT, PTA1_IN_PU),
+	PINMUX_DATA(PTA0_DATA, PTA0_IN, PTA0_OUT, PTA0_IN_PU),
+
+	/* PTB GPIO */
+	PINMUX_DATA(PTB7_DATA, PTB7_IN, PTB7_OUT, PTB7_IN_PU),
+	PINMUX_DATA(PTB6_DATA, PTB6_IN, PTB6_OUT, PTB6_IN_PU),
+	PINMUX_DATA(PTB5_DATA, PTB5_IN, PTB5_OUT, PTB5_IN_PU),
+	PINMUX_DATA(PTB4_DATA, PTB4_IN, PTB4_OUT, PTB4_IN_PU),
+	PINMUX_DATA(PTB3_DATA, PTB3_IN, PTB3_OUT, PTB3_IN_PU),
+	PINMUX_DATA(PTB2_DATA, PTB2_IN, PTB2_OUT, PTB2_IN_PU),
+	PINMUX_DATA(PTB1_DATA, PTB1_IN, PTB1_OUT, PTB1_IN_PU),
+	PINMUX_DATA(PTB0_DATA, PTB0_IN, PTB0_OUT, PTB0_IN_PU),
+
+	/* PTC GPIO */
+	PINMUX_DATA(PTC7_DATA, PTC7_IN, PTC7_OUT, PTC7_IN_PU),
+	PINMUX_DATA(PTC6_DATA, PTC6_IN, PTC6_OUT, PTC6_IN_PU),
+	PINMUX_DATA(PTC5_DATA, PTC5_IN, PTC5_OUT, PTC5_IN_PU),
+	PINMUX_DATA(PTC4_DATA, PTC4_IN, PTC4_OUT, PTC4_IN_PU),
+	PINMUX_DATA(PTC3_DATA, PTC3_IN, PTC3_OUT, PTC3_IN_PU),
+	PINMUX_DATA(PTC2_DATA, PTC2_IN, PTC2_OUT, PTC2_IN_PU),
+	PINMUX_DATA(PTC1_DATA, PTC1_IN, PTC1_OUT, PTC1_IN_PU),
+	PINMUX_DATA(PTC0_DATA, PTC0_IN, PTC0_OUT, PTC0_IN_PU),
+
+	/* PTD GPIO */
+	PINMUX_DATA(PTD7_DATA, PTD7_IN, PTD7_OUT, PTD7_IN_PU),
+	PINMUX_DATA(PTD6_DATA, PTD6_IN, PTD6_OUT, PTD6_IN_PU),
+	PINMUX_DATA(PTD5_DATA, PTD5_IN, PTD5_OUT, PTD5_IN_PU),
+	PINMUX_DATA(PTD4_DATA, PTD4_IN, PTD4_OUT, PTD4_IN_PU),
+	PINMUX_DATA(PTD3_DATA, PTD3_IN, PTD3_OUT, PTD3_IN_PU),
+	PINMUX_DATA(PTD2_DATA, PTD2_IN, PTD2_OUT, PTD2_IN_PU),
+	PINMUX_DATA(PTD1_DATA, PTD1_IN, PTD1_OUT, PTD1_IN_PU),
+	PINMUX_DATA(PTD0_DATA, PTD0_IN, PTD0_OUT, PTD0_IN_PU),
+
+	/* PTE GPIO */
+	PINMUX_DATA(PTE7_DATA, PTE7_IN, PTE7_OUT, PTE7_IN_PU),
+	PINMUX_DATA(PTE6_DATA, PTE6_IN, PTE6_OUT, PTE6_IN_PU),
+	PINMUX_DATA(PTE5_DATA, PTE5_IN, PTE5_OUT, PTE5_IN_PU),
+	PINMUX_DATA(PTE4_DATA, PTE4_IN, PTE4_OUT, PTE4_IN_PU),
+	PINMUX_DATA(PTE3_DATA, PTE3_IN, PTE3_OUT, PTE3_IN_PU),
+	PINMUX_DATA(PTE2_DATA, PTE2_IN, PTE2_OUT, PTE2_IN_PU),
+	PINMUX_DATA(PTE1_DATA, PTE1_IN, PTE1_OUT, PTE1_IN_PU),
+	PINMUX_DATA(PTE0_DATA, PTE0_IN, PTE0_OUT, PTE0_IN_PU),
+
+	/* PTF GPIO */
+	PINMUX_DATA(PTF7_DATA, PTF7_IN, PTF7_OUT, PTF7_IN_PU),
+	PINMUX_DATA(PTF6_DATA, PTF6_IN, PTF6_OUT, PTF6_IN_PU),
+	PINMUX_DATA(PTF5_DATA, PTF5_IN, PTF5_OUT, PTF5_IN_PU),
+	PINMUX_DATA(PTF4_DATA, PTF4_IN, PTF4_OUT, PTF4_IN_PU),
+	PINMUX_DATA(PTF3_DATA, PTF3_IN, PTF3_OUT, PTF3_IN_PU),
+	PINMUX_DATA(PTF2_DATA, PTF2_IN, PTF2_OUT, PTF2_IN_PU),
+	PINMUX_DATA(PTF1_DATA, PTF1_IN, PTF1_OUT, PTF1_IN_PU),
+	PINMUX_DATA(PTF0_DATA, PTF0_IN, PTF0_OUT, PTF0_IN_PU),
+
+	/* PTG GPIO */
+	PINMUX_DATA(PTG5_DATA, PTG5_OUT),
+	PINMUX_DATA(PTG4_DATA, PTG4_OUT),
+	PINMUX_DATA(PTG3_DATA, PTG3_OUT),
+	PINMUX_DATA(PTG2_DATA, PTG2_OUT),
+	PINMUX_DATA(PTG1_DATA, PTG1_OUT),
+	PINMUX_DATA(PTG0_DATA, PTG0_OUT),
+
+	/* PTH GPIO */
+	PINMUX_DATA(PTH7_DATA, PTH7_IN, PTH7_OUT, PTH7_IN_PU),
+	PINMUX_DATA(PTH6_DATA, PTH6_IN, PTH6_OUT, PTH6_IN_PU),
+	PINMUX_DATA(PTH5_DATA, PTH5_IN, PTH5_OUT, PTH5_IN_PU),
+	PINMUX_DATA(PTH4_DATA, PTH4_IN, PTH4_OUT, PTH4_IN_PU),
+	PINMUX_DATA(PTH3_DATA, PTH3_IN, PTH3_OUT, PTH3_IN_PU),
+	PINMUX_DATA(PTH2_DATA, PTH2_IN, PTH2_OUT, PTH2_IN_PU),
+	PINMUX_DATA(PTH1_DATA, PTH1_IN, PTH1_OUT, PTH1_IN_PU),
+	PINMUX_DATA(PTH0_DATA, PTH0_IN, PTH0_OUT, PTH0_IN_PU),
+
+	/* PTJ GPIO */
+	PINMUX_DATA(PTJ7_DATA, PTJ7_OUT),
+	PINMUX_DATA(PTJ6_DATA, PTJ6_OUT),
+	PINMUX_DATA(PTJ5_DATA, PTJ5_OUT),
+	PINMUX_DATA(PTJ3_DATA, PTJ3_IN, PTJ3_OUT, PTJ3_IN_PU),
+	PINMUX_DATA(PTJ2_DATA, PTJ2_IN, PTJ2_OUT, PTJ2_IN_PU),
+	PINMUX_DATA(PTJ1_DATA, PTJ1_IN, PTJ1_OUT, PTJ1_IN_PU),
+	PINMUX_DATA(PTJ0_DATA, PTJ0_IN, PTJ0_OUT, PTJ0_IN_PU),
+
+	/* PTK GPIO */
+	PINMUX_DATA(PTK7_DATA, PTK7_IN, PTK7_OUT, PTK7_IN_PU),
+	PINMUX_DATA(PTK6_DATA, PTK6_IN, PTK6_OUT, PTK6_IN_PU),
+	PINMUX_DATA(PTK5_DATA, PTK5_IN, PTK5_OUT, PTK5_IN_PU),
+	PINMUX_DATA(PTK4_DATA, PTK4_IN, PTK4_OUT, PTK4_IN_PU),
+	PINMUX_DATA(PTK3_DATA, PTK3_IN, PTK3_OUT, PTK3_IN_PU),
+	PINMUX_DATA(PTK2_DATA, PTK2_IN, PTK2_OUT, PTK2_IN_PU),
+	PINMUX_DATA(PTK1_DATA, PTK1_IN, PTK1_OUT, PTK1_IN_PU),
+	PINMUX_DATA(PTK0_DATA, PTK0_IN, PTK0_OUT, PTK0_IN_PU),
+
+	/* PTL GPIO */
+	PINMUX_DATA(PTL7_DATA, PTL7_IN, PTL7_OUT, PTL7_IN_PU),
+	PINMUX_DATA(PTL6_DATA, PTL6_IN, PTL6_OUT, PTL6_IN_PU),
+	PINMUX_DATA(PTL5_DATA, PTL5_IN, PTL5_OUT, PTL5_IN_PU),
+	PINMUX_DATA(PTL4_DATA, PTL4_IN, PTL4_OUT, PTL4_IN_PU),
+	PINMUX_DATA(PTL3_DATA, PTL3_IN, PTL3_OUT, PTL3_IN_PU),
+	PINMUX_DATA(PTL2_DATA, PTL2_IN, PTL2_OUT, PTL2_IN_PU),
+	PINMUX_DATA(PTL1_DATA, PTL1_IN, PTL1_OUT, PTL1_IN_PU),
+	PINMUX_DATA(PTL0_DATA, PTL0_IN, PTL0_OUT, PTL0_IN_PU),
+
+	/* PTM GPIO */
+	PINMUX_DATA(PTM7_DATA, PTM7_IN, PTM7_OUT, PTM7_IN_PU),
+	PINMUX_DATA(PTM6_DATA, PTM6_IN, PTM6_OUT, PTM6_IN_PU),
+	PINMUX_DATA(PTM5_DATA, PTM5_IN, PTM5_OUT, PTM5_IN_PU),
+	PINMUX_DATA(PTM4_DATA, PTM4_IN, PTM4_OUT, PTM4_IN_PU),
+	PINMUX_DATA(PTM3_DATA, PTM3_IN, PTM3_OUT, PTM3_IN_PU),
+	PINMUX_DATA(PTM2_DATA, PTM2_IN, PTM2_OUT, PTM2_IN_PU),
+	PINMUX_DATA(PTM1_DATA, PTM1_IN, PTM1_OUT, PTM1_IN_PU),
+	PINMUX_DATA(PTM0_DATA, PTM0_IN, PTM0_OUT, PTM0_IN_PU),
+
+	/* PTN GPIO */
+	PINMUX_DATA(PTN7_DATA, PTN7_IN, PTN7_OUT, PTN7_IN_PU),
+	PINMUX_DATA(PTN6_DATA, PTN6_IN, PTN6_OUT, PTN6_IN_PU),
+	PINMUX_DATA(PTN5_DATA, PTN5_IN, PTN5_OUT, PTN5_IN_PU),
+	PINMUX_DATA(PTN4_DATA, PTN4_IN, PTN4_OUT, PTN4_IN_PU),
+	PINMUX_DATA(PTN3_DATA, PTN3_IN, PTN3_OUT, PTN3_IN_PU),
+	PINMUX_DATA(PTN2_DATA, PTN2_IN, PTN2_OUT, PTN2_IN_PU),
+	PINMUX_DATA(PTN1_DATA, PTN1_IN, PTN1_OUT, PTN1_IN_PU),
+	PINMUX_DATA(PTN0_DATA, PTN0_IN, PTN0_OUT, PTN0_IN_PU),
+
+	/* PTQ GPIO */
+	PINMUX_DATA(PTQ7_DATA, PTQ7_IN, PTQ7_OUT, PTQ7_IN_PU),
+	PINMUX_DATA(PTQ6_DATA, PTQ6_IN, PTQ6_OUT, PTQ6_IN_PU),
+	PINMUX_DATA(PTQ5_DATA, PTQ5_IN, PTQ5_OUT, PTQ5_IN_PU),
+	PINMUX_DATA(PTQ4_DATA, PTQ4_IN, PTQ4_OUT, PTQ4_IN_PU),
+	PINMUX_DATA(PTQ3_DATA, PTQ3_IN, PTQ3_OUT, PTQ3_IN_PU),
+	PINMUX_DATA(PTQ2_DATA, PTQ2_IN, PTQ2_OUT, PTQ2_IN_PU),
+	PINMUX_DATA(PTQ1_DATA, PTQ1_IN, PTQ1_OUT, PTQ1_IN_PU),
+	PINMUX_DATA(PTQ0_DATA, PTQ0_IN, PTQ0_OUT, PTQ0_IN_PU),
+
+	/* PTR GPIO */
+	PINMUX_DATA(PTR7_DATA, PTR7_IN, PTR7_OUT, PTR7_IN_PU),
+	PINMUX_DATA(PTR6_DATA, PTR6_IN, PTR6_OUT, PTR6_IN_PU),
+	PINMUX_DATA(PTR5_DATA, PTR5_IN, PTR5_OUT, PTR5_IN_PU),
+	PINMUX_DATA(PTR4_DATA, PTR4_IN, PTR4_OUT, PTR4_IN_PU),
+	PINMUX_DATA(PTR3_DATA, PTR3_IN,           PTR3_IN_PU),
+	PINMUX_DATA(PTR2_DATA, PTR2_IN,           PTR2_IN_PU),
+	PINMUX_DATA(PTR1_DATA, PTR1_IN, PTR1_OUT, PTR1_IN_PU),
+	PINMUX_DATA(PTR0_DATA, PTR0_IN, PTR0_OUT, PTR0_IN_PU),
+
+	/* PTS GPIO */
+	PINMUX_DATA(PTS6_DATA, PTS6_IN, PTS6_OUT, PTS6_IN_PU),
+	PINMUX_DATA(PTS5_DATA, PTS5_IN, PTS5_OUT, PTS5_IN_PU),
+	PINMUX_DATA(PTS4_DATA, PTS4_IN, PTS4_OUT, PTS4_IN_PU),
+	PINMUX_DATA(PTS3_DATA, PTS3_IN, PTS3_OUT, PTS3_IN_PU),
+	PINMUX_DATA(PTS2_DATA, PTS2_IN, PTS2_OUT, PTS2_IN_PU),
+	PINMUX_DATA(PTS1_DATA, PTS1_IN, PTS1_OUT, PTS1_IN_PU),
+	PINMUX_DATA(PTS0_DATA, PTS0_IN, PTS0_OUT, PTS0_IN_PU),
+
+	/* PTT GPIO */
+	PINMUX_DATA(PTT7_DATA, PTT7_IN, PTT7_OUT, PTT7_IN_PU),
+	PINMUX_DATA(PTT6_DATA, PTT6_IN, PTT6_OUT, PTT6_IN_PU),
+	PINMUX_DATA(PTT5_DATA, PTT5_IN, PTT5_OUT, PTT5_IN_PU),
+	PINMUX_DATA(PTT4_DATA, PTT4_IN, PTT4_OUT, PTT4_IN_PU),
+	PINMUX_DATA(PTT3_DATA, PTT3_IN, PTT3_OUT, PTT3_IN_PU),
+	PINMUX_DATA(PTT2_DATA, PTT2_IN, PTT2_OUT, PTT2_IN_PU),
+	PINMUX_DATA(PTT1_DATA, PTT1_IN, PTT1_OUT, PTT1_IN_PU),
+	PINMUX_DATA(PTT0_DATA, PTT0_IN, PTT0_OUT, PTT0_IN_PU),
+
+	/* PTU GPIO */
+	PINMUX_DATA(PTU7_DATA, PTU7_IN, PTU7_OUT, PTU7_IN_PU),
+	PINMUX_DATA(PTU6_DATA, PTU6_IN, PTU6_OUT, PTU6_IN_PU),
+	PINMUX_DATA(PTU5_DATA, PTU5_IN, PTU5_OUT, PTU5_IN_PU),
+	PINMUX_DATA(PTU4_DATA, PTU4_IN, PTU4_OUT, PTU4_IN_PU),
+	PINMUX_DATA(PTU3_DATA, PTU3_IN, PTU3_OUT, PTU3_IN_PU),
+	PINMUX_DATA(PTU2_DATA, PTU2_IN, PTU2_OUT, PTU2_IN_PU),
+	PINMUX_DATA(PTU1_DATA, PTU1_IN, PTU1_OUT, PTU1_IN_PU),
+	PINMUX_DATA(PTU0_DATA, PTU0_IN, PTU0_OUT, PTU0_IN_PU),
+
+	/* PTV GPIO */
+	PINMUX_DATA(PTV7_DATA, PTV7_IN, PTV7_OUT, PTV7_IN_PU),
+	PINMUX_DATA(PTV6_DATA, PTV6_IN, PTV6_OUT, PTV6_IN_PU),
+	PINMUX_DATA(PTV5_DATA, PTV5_IN, PTV5_OUT, PTV5_IN_PU),
+	PINMUX_DATA(PTV4_DATA, PTV4_IN, PTV4_OUT, PTV4_IN_PU),
+	PINMUX_DATA(PTV3_DATA, PTV3_IN, PTV3_OUT, PTV3_IN_PU),
+	PINMUX_DATA(PTV2_DATA, PTV2_IN, PTV2_OUT, PTV2_IN_PU),
+	PINMUX_DATA(PTV1_DATA, PTV1_IN, PTV1_OUT, PTV1_IN_PU),
+	PINMUX_DATA(PTV0_DATA, PTV0_IN, PTV0_OUT, PTV0_IN_PU),
+
+	/* PTW GPIO */
+	PINMUX_DATA(PTW7_DATA, PTW7_IN, PTW7_OUT, PTW7_IN_PU),
+	PINMUX_DATA(PTW6_DATA, PTW6_IN, PTW6_OUT, PTW6_IN_PU),
+	PINMUX_DATA(PTW5_DATA, PTW5_IN, PTW5_OUT, PTW5_IN_PU),
+	PINMUX_DATA(PTW4_DATA, PTW4_IN, PTW4_OUT, PTW4_IN_PU),
+	PINMUX_DATA(PTW3_DATA, PTW3_IN, PTW3_OUT, PTW3_IN_PU),
+	PINMUX_DATA(PTW2_DATA, PTW2_IN, PTW2_OUT, PTW2_IN_PU),
+	PINMUX_DATA(PTW1_DATA, PTW1_IN, PTW1_OUT, PTW1_IN_PU),
+	PINMUX_DATA(PTW0_DATA, PTW0_IN, PTW0_OUT, PTW0_IN_PU),
+
+	/* PTX GPIO */
+	PINMUX_DATA(PTX7_DATA, PTX7_IN, PTX7_OUT, PTX7_IN_PU),
+	PINMUX_DATA(PTX6_DATA, PTX6_IN, PTX6_OUT, PTX6_IN_PU),
+	PINMUX_DATA(PTX5_DATA, PTX5_IN, PTX5_OUT, PTX5_IN_PU),
+	PINMUX_DATA(PTX4_DATA, PTX4_IN, PTX4_OUT, PTX4_IN_PU),
+	PINMUX_DATA(PTX3_DATA, PTX3_IN, PTX3_OUT, PTX3_IN_PU),
+	PINMUX_DATA(PTX2_DATA, PTX2_IN, PTX2_OUT, PTX2_IN_PU),
+	PINMUX_DATA(PTX1_DATA, PTX1_IN, PTX1_OUT, PTX1_IN_PU),
+	PINMUX_DATA(PTX0_DATA, PTX0_IN, PTX0_OUT, PTX0_IN_PU),
+
+	/* PTY GPIO */
+	PINMUX_DATA(PTY7_DATA, PTY7_IN, PTY7_OUT, PTY7_IN_PU),
+	PINMUX_DATA(PTY6_DATA, PTY6_IN, PTY6_OUT, PTY6_IN_PU),
+	PINMUX_DATA(PTY5_DATA, PTY5_IN, PTY5_OUT, PTY5_IN_PU),
+	PINMUX_DATA(PTY4_DATA, PTY4_IN, PTY4_OUT, PTY4_IN_PU),
+	PINMUX_DATA(PTY3_DATA, PTY3_IN, PTY3_OUT, PTY3_IN_PU),
+	PINMUX_DATA(PTY2_DATA, PTY2_IN, PTY2_OUT, PTY2_IN_PU),
+	PINMUX_DATA(PTY1_DATA, PTY1_IN, PTY1_OUT, PTY1_IN_PU),
+	PINMUX_DATA(PTY0_DATA, PTY0_IN, PTY0_OUT, PTY0_IN_PU),
+
+	/* PTZ GPIO */
+	PINMUX_DATA(PTZ7_DATA, PTZ7_IN, PTZ7_OUT, PTZ7_IN_PU),
+	PINMUX_DATA(PTZ6_DATA, PTZ6_IN, PTZ6_OUT, PTZ6_IN_PU),
+	PINMUX_DATA(PTZ5_DATA, PTZ5_IN, PTZ5_OUT, PTZ5_IN_PU),
+	PINMUX_DATA(PTZ4_DATA, PTZ4_IN, PTZ4_OUT, PTZ4_IN_PU),
+	PINMUX_DATA(PTZ3_DATA, PTZ3_IN, PTZ3_OUT, PTZ3_IN_PU),
+	PINMUX_DATA(PTZ2_DATA, PTZ2_IN, PTZ2_OUT, PTZ2_IN_PU),
+	PINMUX_DATA(PTZ1_DATA, PTZ1_IN, PTZ1_OUT, PTZ1_IN_PU),
+	PINMUX_DATA(PTZ0_DATA, PTZ0_IN, PTZ0_OUT, PTZ0_IN_PU),
+
+	/* PTA FN */
+	PINMUX_DATA(D23_MARK,	PSA15_0, PSA14_0, PTA7_FN),
+	PINMUX_DATA(D22_MARK,	PSA15_0, PSA14_0, PTA6_FN),
+	PINMUX_DATA(D21_MARK,	PSA15_0, PSA14_0, PTA5_FN),
+	PINMUX_DATA(D20_MARK,	PSA15_0, PSA14_0, PTA4_FN),
+	PINMUX_DATA(D19_MARK,	PSA15_0, PSA14_0, PTA3_FN),
+	PINMUX_DATA(D18_MARK,	PSA15_0, PSA14_0, PTA2_FN),
+	PINMUX_DATA(D17_MARK,	PSA15_0, PSA14_0, PTA1_FN),
+	PINMUX_DATA(D16_MARK,	PSA15_0, PSA14_0, PTA0_FN),
+
+	PINMUX_DATA(KEYOUT2_MARK,	PSA15_0, PSA14_1, PTA7_FN),
+	PINMUX_DATA(KEYOUT1_MARK,	PSA15_0, PSA14_1, PTA6_FN),
+	PINMUX_DATA(KEYOUT0_MARK,	PSA15_0, PSA14_1, PTA5_FN),
+	PINMUX_DATA(KEYIN4_MARK,	PSA15_0, PSA14_1, PTA4_FN),
+	PINMUX_DATA(KEYIN3_MARK,	PSA15_0, PSA14_1, PTA3_FN),
+	PINMUX_DATA(KEYIN2_MARK,	PSA15_0, PSA14_1, PTA2_FN),
+	PINMUX_DATA(KEYIN1_MARK,	PSA15_0, PSA14_1, PTA1_FN),
+	PINMUX_DATA(KEYIN0_MARK,	PSA15_0, PSA14_1, PTA0_FN),
+
+	PINMUX_DATA(IDED15_MARK,	PSA15_1, PSA14_0, PTA7_FN),
+	PINMUX_DATA(IDED14_MARK,	PSA15_1, PSA14_0, PTA6_FN),
+	PINMUX_DATA(IDED13_MARK,	PSA15_1, PSA14_0, PTA5_FN),
+	PINMUX_DATA(IDED12_MARK,	PSA15_1, PSA14_0, PTA4_FN),
+	PINMUX_DATA(IDED11_MARK,	PSA15_1, PSA14_0, PTA3_FN),
+	PINMUX_DATA(IDED10_MARK,	PSA15_1, PSA14_0, PTA2_FN),
+	PINMUX_DATA(IDED9_MARK,		PSA15_1, PSA14_0, PTA1_FN),
+	PINMUX_DATA(IDED8_MARK,		PSA15_1, PSA14_0, PTA0_FN),
+
+	/* PTB FN */
+	PINMUX_DATA(D31_MARK,		PSE15_0, PSE14_0, PTB7_FN),
+	PINMUX_DATA(D30_MARK,		PSE15_0, PSE14_0, PTB6_FN),
+	PINMUX_DATA(D29_MARK,		PSE11_0,          PTB5_FN),
+	PINMUX_DATA(D28_MARK,		PSE11_0,          PTB4_FN),
+	PINMUX_DATA(D27_MARK,		PSE11_0,          PTB3_FN),
+	PINMUX_DATA(D26_MARK,		PSA15_0, PSA14_0, PTB2_FN),
+	PINMUX_DATA(D25_MARK,		PSA15_0, PSA14_0, PTB1_FN),
+	PINMUX_DATA(D24_MARK,		PSA15_0, PSA14_0, PTB0_FN),
+
+	PINMUX_DATA(IDEA1_MARK,		PSE15_1, PSE14_0, PTB7_FN),
+	PINMUX_DATA(IDEA0_MARK,		PSE15_1, PSE14_0, PTB6_FN),
+	PINMUX_DATA(IODREQ_MARK,	PSE11_1,          PTB5_FN),
+	PINMUX_DATA(IDECS0_MARK,	PSE11_1,          PTB4_FN),
+	PINMUX_DATA(IDECS1_MARK,	PSE11_1,          PTB3_FN),
+	PINMUX_DATA(IDEIORD_MARK,	PSA15_1, PSA14_0, PTB2_FN),
+	PINMUX_DATA(IDEIOWR_MARK,	PSA15_1, PSA14_0, PTB1_FN),
+	PINMUX_DATA(IDEINT_MARK,	PSA15_1, PSA14_0, PTB0_FN),
+
+	PINMUX_DATA(TPUTO1_MARK,	PSE15_0, PSE14_1, PTB7_FN),
+	PINMUX_DATA(TPUTO0_MARK,	PSE15_0, PSE14_1, PTB6_FN),
+
+	PINMUX_DATA(KEYOUT5_IN5_MARK,	PSA15_0, PSA14_1, PTB2_FN),
+	PINMUX_DATA(KEYOUT4_IN6_MARK,	PSA15_0, PSA14_1, PTB1_FN),
+	PINMUX_DATA(KEYOUT3_MARK,	PSA15_0, PSA14_1, PTB0_FN),
+
+	/* PTC FN */
+	PINMUX_DATA(LCDD7_MARK, PSD5_0, PTC7_FN),
+	PINMUX_DATA(LCDD6_MARK, PSD5_0, PTC6_FN),
+	PINMUX_DATA(LCDD5_MARK, PSD5_0, PTC5_FN),
+	PINMUX_DATA(LCDD4_MARK, PSD5_0, PTC4_FN),
+	PINMUX_DATA(LCDD3_MARK, PSD5_0, PTC3_FN),
+	PINMUX_DATA(LCDD2_MARK, PSD5_0, PTC2_FN),
+	PINMUX_DATA(LCDD1_MARK, PSD5_0, PTC1_FN),
+	PINMUX_DATA(LCDD0_MARK, PSD5_0, PTC0_FN),
+
+	/* PTD FN */
+	PINMUX_DATA(LCDD15_MARK, PSD5_0, PTD7_FN),
+	PINMUX_DATA(LCDD14_MARK, PSD5_0, PTD6_FN),
+	PINMUX_DATA(LCDD13_MARK, PSD5_0, PTD5_FN),
+	PINMUX_DATA(LCDD12_MARK, PSD5_0, PTD4_FN),
+	PINMUX_DATA(LCDD11_MARK, PSD5_0, PTD3_FN),
+	PINMUX_DATA(LCDD10_MARK, PSD5_0, PTD2_FN),
+	PINMUX_DATA(LCDD9_MARK,  PSD5_0, PTD1_FN),
+	PINMUX_DATA(LCDD8_MARK,  PSD5_0, PTD0_FN),
+
+	/* PTE FN */
+	PINMUX_DATA(FSIMCKB_MARK, PTE7_FN),
+	PINMUX_DATA(FSIMCKA_MARK, PTE6_FN),
+
+	PINMUX_DATA(LCDD21_MARK,	PSC5_0, PSC4_0, PTE5_FN),
+	PINMUX_DATA(LCDD20_MARK,	PSD3_0, PSD2_0, PTE4_FN),
+	PINMUX_DATA(LCDD19_MARK,	PSA3_0, PSA2_0, PTE3_FN),
+	PINMUX_DATA(LCDD18_MARK,	PSA3_0, PSA2_0, PTE2_FN),
+	PINMUX_DATA(LCDD17_MARK,	PSD5_0,         PTE1_FN),
+	PINMUX_DATA(LCDD16_MARK,	PSD5_0,         PTE0_FN),
+
+	PINMUX_DATA(SCIF2_L_TXD_MARK,	PSC5_0, PSC4_1, PTE5_FN),
+	PINMUX_DATA(SCIF4_SCK_MARK,	PSD3_0, PSD2_1, PTE4_FN),
+	PINMUX_DATA(SCIF4_RXD_MARK,	PSA3_0, PSA2_1, PTE3_FN),
+	PINMUX_DATA(SCIF4_TXD_MARK,	PSA3_0, PSA2_1, PTE2_FN),
+
+	/* PTF FN */
+	PINMUX_DATA(LCDVSYN_MARK,	PSD8_0,          PTF7_FN),
+	PINMUX_DATA(LCDDISP_MARK,	PSD10_0, PSD9_0, PTF6_FN),
+	PINMUX_DATA(LCDHSYN_MARK,	PSD10_0, PSD9_0, PTF5_FN),
+	PINMUX_DATA(LCDDON_MARK,	PSD8_0,          PTF4_FN),
+	PINMUX_DATA(LCDDCK_MARK,	PSD10_0, PSD9_0, PTF3_FN),
+	PINMUX_DATA(LCDVEPWC_MARK,	PSA6_0,          PTF2_FN),
+	PINMUX_DATA(LCDD23_MARK,	PSC7_0,  PSC6_0, PTF1_FN),
+	PINMUX_DATA(LCDD22_MARK,	PSC5_0,  PSC4_0, PTF0_FN),
+
+	PINMUX_DATA(LCDRS_MARK,		PSD10_0, PSD9_1, PTF6_FN),
+	PINMUX_DATA(LCDCS_MARK,		PSD10_0, PSD9_1, PTF5_FN),
+	PINMUX_DATA(LCDWR_MARK,		PSD10_0, PSD9_1, PTF3_FN),
+
+	PINMUX_DATA(SCIF0_TXD_MARK,	PSA6_1,          PTF2_FN),
+	PINMUX_DATA(SCIF2_L_SCK_MARK,	PSC7_0,  PSC6_1, PTF1_FN),
+	PINMUX_DATA(SCIF2_L_RXD_MARK,	PSC5_0,  PSC4_1, PTF0_FN),
+
+	/* PTG FN */
+	PINMUX_DATA(AUDCK_MARK,   PTG5_FN),
+	PINMUX_DATA(AUDSYNC_MARK, PTG4_FN),
+	PINMUX_DATA(AUDATA3_MARK, PTG3_FN),
+	PINMUX_DATA(AUDATA2_MARK, PTG2_FN),
+	PINMUX_DATA(AUDATA1_MARK, PTG1_FN),
+	PINMUX_DATA(AUDATA0_MARK, PTG0_FN),
+
+	/* PTH FN */
+	PINMUX_DATA(VIO0_VD_MARK,  PTH7_FN),
+	PINMUX_DATA(VIO0_CLK_MARK, PTH6_FN),
+	PINMUX_DATA(VIO0_D7_MARK,  PTH5_FN),
+	PINMUX_DATA(VIO0_D6_MARK,  PTH4_FN),
+	PINMUX_DATA(VIO0_D5_MARK,  PTH3_FN),
+	PINMUX_DATA(VIO0_D4_MARK,  PTH2_FN),
+	PINMUX_DATA(VIO0_D3_MARK,  PTH1_FN),
+	PINMUX_DATA(VIO0_D2_MARK,  PTH0_FN),
+
+	/* PTJ FN */
+	PINMUX_DATA(PDSTATUS_MARK,	PTJ7_FN),
+	PINMUX_DATA(STATUS2_MARK,	PTJ6_FN),
+	PINMUX_DATA(STATUS0_MARK,	PTJ5_FN),
+	PINMUX_DATA(A25_MARK,		PSA8_0, PTJ3_FN),
+	PINMUX_DATA(BS_MARK,		PSA8_1, PTJ3_FN),
+	PINMUX_DATA(A24_MARK,		PTJ2_FN),
+	PINMUX_DATA(A23_MARK,		PTJ1_FN),
+	PINMUX_DATA(A22_MARK,		PTJ0_FN),
+
+	/* PTK FN */
+	PINMUX_DATA(VIO1_D5_MARK,	PSB7_0, PSB6_0, PTK7_FN),
+	PINMUX_DATA(VIO1_D4_MARK,	PSB7_0, PSB6_0, PTK6_FN),
+	PINMUX_DATA(VIO1_D3_MARK,	PSB7_0, PSB6_0, PTK5_FN),
+	PINMUX_DATA(VIO1_D2_MARK,	PSB7_0, PSB6_0, PTK4_FN),
+	PINMUX_DATA(VIO1_D1_MARK,	PSB7_0, PSB6_0, PTK3_FN),
+	PINMUX_DATA(VIO1_D0_MARK,	PSB7_0, PSB6_0, PTK2_FN),
+
+	PINMUX_DATA(VIO0_D13_MARK,	PSB7_0, PSB6_1, PTK7_FN),
+	PINMUX_DATA(VIO0_D12_MARK,	PSB7_0, PSB6_1, PTK6_FN),
+	PINMUX_DATA(VIO0_D11_MARK,	PSB7_0, PSB6_1, PTK5_FN),
+	PINMUX_DATA(VIO0_D10_MARK,	PSB7_0, PSB6_1, PTK4_FN),
+	PINMUX_DATA(VIO0_D9_MARK,	PSB7_0, PSB6_1, PTK3_FN),
+	PINMUX_DATA(VIO0_D8_MARK,	PSB7_0, PSB6_1, PTK2_FN),
+
+	PINMUX_DATA(IDED5_MARK,		PSB7_1, PSB6_0, PTK7_FN),
+	PINMUX_DATA(IDED4_MARK,		PSB7_1, PSB6_0, PTK6_FN),
+	PINMUX_DATA(IDED3_MARK,		PSB7_1, PSB6_0, PTK5_FN),
+	PINMUX_DATA(IDED2_MARK,		PSB7_1, PSB6_0, PTK4_FN),
+	PINMUX_DATA(IDED1_MARK,		PSB7_1, PSB6_0, PTK3_FN),
+	PINMUX_DATA(IDED0_MARK,		PSB7_1, PSB6_0, PTK2_FN),
+
+	PINMUX_DATA(VIO0_FLD_MARK,	PTK1_FN),
+	PINMUX_DATA(VIO0_HD_MARK,	PTK0_FN),
+
+	/* PTL FN */
+	PINMUX_DATA(DV_D5_MARK,		PSB9_0, PSB8_0, PTL7_FN),
+	PINMUX_DATA(DV_D4_MARK,		PSB9_0, PSB8_0, PTL6_FN),
+	PINMUX_DATA(DV_D3_MARK,		PSE7_0, PSE6_0, PTL5_FN),
+	PINMUX_DATA(DV_D2_MARK,		PSC9_0, PSC8_0, PTL4_FN),
+	PINMUX_DATA(DV_D1_MARK,		PSC9_0, PSC8_0, PTL3_FN),
+	PINMUX_DATA(DV_D0_MARK,		PSC9_0, PSC8_0, PTL2_FN),
+	PINMUX_DATA(DV_D15_MARK,	PSD4_0,         PTL1_FN),
+	PINMUX_DATA(DV_D14_MARK,	PSE5_0, PSE4_0, PTL0_FN),
+
+	PINMUX_DATA(SCIF3_V_SCK_MARK,	PSB9_0, PSB8_1, PTL7_FN),
+	PINMUX_DATA(SCIF3_V_RXD_MARK,	PSB9_0, PSB8_1, PTL6_FN),
+	PINMUX_DATA(SCIF3_V_TXD_MARK,	PSE7_0, PSE6_1, PTL5_FN),
+	PINMUX_DATA(SCIF1_SCK_MARK,	PSC9_0, PSC8_1, PTL4_FN),
+	PINMUX_DATA(SCIF1_RXD_MARK,	PSC9_0, PSC8_1, PTL3_FN),
+	PINMUX_DATA(SCIF1_TXD_MARK,	PSC9_0, PSC8_1, PTL2_FN),
+
+	PINMUX_DATA(RMII_RXD0_MARK,	PSB9_1, PSB8_0, PTL7_FN),
+	PINMUX_DATA(RMII_RXD1_MARK,	PSB9_1, PSB8_0, PTL6_FN),
+	PINMUX_DATA(RMII_REF_CLK_MARK,	PSE7_1, PSE6_0, PTL5_FN),
+	PINMUX_DATA(RMII_TX_EN_MARK,	PSC9_1, PSC8_0, PTL4_FN),
+	PINMUX_DATA(RMII_TXD0_MARK,	PSC9_1, PSC8_0, PTL3_FN),
+	PINMUX_DATA(RMII_TXD1_MARK,	PSC9_1, PSC8_0, PTL2_FN),
+
+	PINMUX_DATA(MSIOF0_MCK_MARK,	PSE5_0, PSE4_1, PTL0_FN),
+
+	/* PTM FN */
+	PINMUX_DATA(DV_D13_MARK,	PSC13_0, PSC12_0, PTM7_FN),
+	PINMUX_DATA(DV_D12_MARK,	PSC13_0, PSC12_0, PTM6_FN),
+	PINMUX_DATA(DV_D11_MARK,	PSC13_0, PSC12_0, PTM5_FN),
+	PINMUX_DATA(DV_D10_MARK,	PSC13_0, PSC12_0, PTM4_FN),
+	PINMUX_DATA(DV_D9_MARK,		PSC11_0, PSC10_0, PTM3_FN),
+	PINMUX_DATA(DV_D8_MARK,		PSC11_0, PSC10_0, PTM2_FN),
+
+	PINMUX_DATA(MSIOF0_TSCK_MARK,	PSC13_0, PSC12_1, PTM7_FN),
+	PINMUX_DATA(MSIOF0_RXD_MARK,	PSC13_0, PSC12_1, PTM6_FN),
+	PINMUX_DATA(MSIOF0_TXD_MARK,	PSC13_0, PSC12_1, PTM5_FN),
+	PINMUX_DATA(MSIOF0_TSYNC_MARK,	PSC13_0, PSC12_1, PTM4_FN),
+	PINMUX_DATA(MSIOF0_SS1_MARK,	PSC11_0, PSC10_1, PTM3_FN),
+	PINMUX_DATA(MSIOF0_RSCK_MARK,	PSC11_1, PSC10_0, PTM3_FN),
+	PINMUX_DATA(MSIOF0_SS2_MARK,	PSC11_0, PSC10_1, PTM2_FN),
+	PINMUX_DATA(MSIOF0_RSYNC_MARK,	PSC11_1, PSC10_0, PTM2_FN),
+
+	PINMUX_DATA(LCDVCPWC_MARK,	PSA6_0, PTM1_FN),
+	PINMUX_DATA(LCDRD_MARK,		PSA7_0, PTM0_FN),
+
+	PINMUX_DATA(SCIF0_RXD_MARK,	PSA6_1, PTM1_FN),
+	PINMUX_DATA(SCIF0_SCK_MARK,	PSA7_1, PTM0_FN),
+
+	/* PTN FN */
+	PINMUX_DATA(VIO0_D1_MARK,	PTN7_FN),
+	PINMUX_DATA(VIO0_D0_MARK,	PTN6_FN),
+
+	PINMUX_DATA(DV_CLKI_MARK,	PSD11_0,          PTN5_FN),
+	PINMUX_DATA(DV_CLK_MARK,	PSD13_0, PSD12_0, PTN4_FN),
+	PINMUX_DATA(DV_VSYNC_MARK,	PSD15_0, PSD14_0, PTN3_FN),
+	PINMUX_DATA(DV_HSYNC_MARK,	PSB5_0,  PSB4_0,  PTN2_FN),
+	PINMUX_DATA(DV_D7_MARK,		PSB3_0,  PSB2_0,  PTN1_FN),
+	PINMUX_DATA(DV_D6_MARK,		PSB1_0,  PSB0_0,  PTN0_FN),
+
+	PINMUX_DATA(SCIF2_V_SCK_MARK,	PSD13_0, PSD12_1, PTN4_FN),
+	PINMUX_DATA(SCIF2_V_RXD_MARK,	PSD15_0, PSD14_1, PTN3_FN),
+	PINMUX_DATA(SCIF2_V_TXD_MARK,	PSB5_0,  PSB4_1,  PTN2_FN),
+	PINMUX_DATA(SCIF3_V_CTS_MARK,	PSB3_0,  PSB2_1,  PTN1_FN),
+	PINMUX_DATA(SCIF3_V_RTS_MARK,	PSB1_0,  PSB0_1,  PTN0_FN),
+
+	PINMUX_DATA(RMII_RX_ER_MARK,	PSB3_1, PSB2_0, PTN1_FN),
+	PINMUX_DATA(RMII_CRS_DV_MARK,	PSB1_1, PSB0_0, PTN0_FN),
+
+	/* PTQ FN */
+	PINMUX_DATA(D7_MARK, PTQ7_FN),
+	PINMUX_DATA(D6_MARK, PTQ6_FN),
+	PINMUX_DATA(D5_MARK, PTQ5_FN),
+	PINMUX_DATA(D4_MARK, PTQ4_FN),
+	PINMUX_DATA(D3_MARK, PTQ3_FN),
+	PINMUX_DATA(D2_MARK, PTQ2_FN),
+	PINMUX_DATA(D1_MARK, PTQ1_FN),
+	PINMUX_DATA(D0_MARK, PTQ0_FN),
+
+	/* PTR FN */
+	PINMUX_DATA(CS6B_CE1B_MARK,	                PTR7_FN),
+	PINMUX_DATA(CS6A_CE2B_MARK,	                PTR6_FN),
+	PINMUX_DATA(CS5B_CE1A_MARK,	                PTR5_FN),
+	PINMUX_DATA(CS5A_CE2A_MARK,	                PTR4_FN),
+	PINMUX_DATA(IOIS16_MARK,	PSA5_0,         PTR3_FN),
+	PINMUX_DATA(WAIT_MARK,		                PTR2_FN),
+	PINMUX_DATA(WE3_ICIOWR_MARK,	PSA1_0, PSA0_0, PTR1_FN),
+	PINMUX_DATA(WE2_ICIORD_MARK,	PSD1_0, PSD0_0, PTR0_FN),
+
+	PINMUX_DATA(LCDLCLK_MARK,	PSA5_1,         PTR3_FN),
+
+	PINMUX_DATA(IDEA2_MARK,		PSD1_1, PSD0_0, PTR0_FN),
+
+	PINMUX_DATA(TPUTO3_MARK,	PSA1_0, PSA0_1, PTR1_FN),
+	PINMUX_DATA(TPUTI3_MARK,	PSA1_1, PSA0_0, PTR1_FN),
+	PINMUX_DATA(TPUTO2_MARK,	PSD1_0, PSD0_1, PTR0_FN),
+
+	/* PTS FN */
+	PINMUX_DATA(VIO_CKO_MARK,	PTS6_FN),
+
+	PINMUX_DATA(TPUTI2_MARK,	PSE9_0, PSE8_1, PTS5_FN),
+
+	PINMUX_DATA(IDEIORDY_MARK,	PSE9_1, PSE8_0, PTS5_FN),
+
+	PINMUX_DATA(VIO1_FLD_MARK,	PSE9_0, PSE8_0, PTS5_FN),
+	PINMUX_DATA(VIO1_HD_MARK,	PSA10_0,        PTS4_FN),
+	PINMUX_DATA(VIO1_VD_MARK,	PSA9_0,         PTS3_FN),
+	PINMUX_DATA(VIO1_CLK_MARK,	PSA9_0,         PTS2_FN),
+	PINMUX_DATA(VIO1_D7_MARK,	PSB7_0, PSB6_0, PTS1_FN),
+	PINMUX_DATA(VIO1_D6_MARK,	PSB7_0, PSB6_0, PTS0_FN),
+
+	PINMUX_DATA(SCIF5_SCK_MARK,	PSA10_1, PTS4_FN),
+	PINMUX_DATA(SCIF5_RXD_MARK,	PSA9_1,  PTS3_FN),
+	PINMUX_DATA(SCIF5_TXD_MARK,	PSA9_1,  PTS2_FN),
+
+	PINMUX_DATA(VIO0_D15_MARK,	PSB7_0, PSB6_1, PTS1_FN),
+	PINMUX_DATA(VIO0_D14_MARK,	PSB7_0, PSB6_1, PTS0_FN),
+
+	PINMUX_DATA(IDED7_MARK,		PSB7_1, PSB6_0, PTS1_FN),
+	PINMUX_DATA(IDED6_MARK,		PSB7_1, PSB6_0, PTS0_FN),
+
+	/* PTT FN */
+	PINMUX_DATA(D15_MARK, PTT7_FN),
+	PINMUX_DATA(D14_MARK, PTT6_FN),
+	PINMUX_DATA(D13_MARK, PTT5_FN),
+	PINMUX_DATA(D12_MARK, PTT4_FN),
+	PINMUX_DATA(D11_MARK, PTT3_FN),
+	PINMUX_DATA(D10_MARK, PTT2_FN),
+	PINMUX_DATA(D9_MARK,  PTT1_FN),
+	PINMUX_DATA(D8_MARK,  PTT0_FN),
+
+	/* PTU FN */
+	PINMUX_DATA(DMAC_DACK0_MARK, PTU7_FN),
+	PINMUX_DATA(DMAC_DREQ0_MARK, PTU6_FN),
+
+	PINMUX_DATA(FSIOASD_MARK,	PSE1_0, PTU5_FN),
+	PINMUX_DATA(FSIIABCK_MARK,	PSE1_0, PTU4_FN),
+	PINMUX_DATA(FSIIALRCK_MARK,	PSE1_0, PTU3_FN),
+	PINMUX_DATA(FSIOABCK_MARK,	PSE1_0, PTU2_FN),
+	PINMUX_DATA(FSIOALRCK_MARK,	PSE1_0, PTU1_FN),
+	PINMUX_DATA(CLKAUDIOAO_MARK,	PSE0_0, PTU0_FN),
+
+	/* PTV FN */
+	PINMUX_DATA(FSIIBSD_MARK,	PSD7_0,  PSD6_0,  PTV7_FN),
+	PINMUX_DATA(FSIOBSD_MARK,	PSD7_0,  PSD6_0,  PTV6_FN),
+	PINMUX_DATA(FSIIBBCK_MARK,	PSC15_0, PSC14_0, PTV5_FN),
+	PINMUX_DATA(FSIIBLRCK_MARK,	PSC15_0, PSC14_0, PTV4_FN),
+	PINMUX_DATA(FSIOBBCK_MARK,	PSC15_0, PSC14_0, PTV3_FN),
+	PINMUX_DATA(FSIOBLRCK_MARK,	PSC15_0, PSC14_0, PTV2_FN),
+	PINMUX_DATA(CLKAUDIOBO_MARK,	PSE3_0,  PSE2_0,  PTV1_FN),
+	PINMUX_DATA(FSIIASD_MARK,	PSE10_0,          PTV0_FN),
+
+	PINMUX_DATA(MSIOF1_SS2_MARK,	PSD7_0,  PSD6_1,  PTV7_FN),
+	PINMUX_DATA(MSIOF1_RSYNC_MARK,	PSD7_1,  PSD6_0,  PTV7_FN),
+	PINMUX_DATA(MSIOF1_SS1_MARK,	PSD7_0,  PSD6_1,  PTV6_FN),
+	PINMUX_DATA(MSIOF1_RSCK_MARK,	PSD7_1,  PSD6_0,  PTV6_FN),
+	PINMUX_DATA(MSIOF1_RXD_MARK,	PSC15_0, PSC14_1, PTV5_FN),
+	PINMUX_DATA(MSIOF1_TSYNC_MARK,	PSC15_0, PSC14_1, PTV4_FN),
+	PINMUX_DATA(MSIOF1_TSCK_MARK,	PSC15_0, PSC14_1, PTV3_FN),
+	PINMUX_DATA(MSIOF1_TXD_MARK,	PSC15_0, PSC14_1, PTV2_FN),
+	PINMUX_DATA(MSIOF1_MCK_MARK,	PSE3_0,  PSE2_1,  PTV1_FN),
+
+	/* PTW FN */
+	PINMUX_DATA(MMC_D7_MARK,	PSE13_0, PSE12_0, PTW7_FN),
+	PINMUX_DATA(MMC_D6_MARK,	PSE13_0, PSE12_0, PTW6_FN),
+	PINMUX_DATA(MMC_D5_MARK,	PSE13_0, PSE12_0, PTW5_FN),
+	PINMUX_DATA(MMC_D4_MARK,	PSE13_0, PSE12_0, PTW4_FN),
+	PINMUX_DATA(MMC_D3_MARK,	PSA13_0,          PTW3_FN),
+	PINMUX_DATA(MMC_D2_MARK,	PSA13_0,          PTW2_FN),
+	PINMUX_DATA(MMC_D1_MARK,	PSA13_0,          PTW1_FN),
+	PINMUX_DATA(MMC_D0_MARK,	PSA13_0,          PTW0_FN),
+
+	PINMUX_DATA(SDHI1CD_MARK,	PSE13_0, PSE12_1, PTW7_FN),
+	PINMUX_DATA(SDHI1WP_MARK,	PSE13_0, PSE12_1, PTW6_FN),
+	PINMUX_DATA(SDHI1D3_MARK,	PSE13_0, PSE12_1, PTW5_FN),
+	PINMUX_DATA(SDHI1D2_MARK,	PSE13_0, PSE12_1, PTW4_FN),
+	PINMUX_DATA(SDHI1D1_MARK,	PSA13_1,          PTW3_FN),
+	PINMUX_DATA(SDHI1D0_MARK,	PSA13_1,          PTW2_FN),
+	PINMUX_DATA(SDHI1CMD_MARK,	PSA13_1,          PTW1_FN),
+	PINMUX_DATA(SDHI1CLK_MARK,	PSA13_1,          PTW0_FN),
+
+	PINMUX_DATA(IODACK_MARK,	PSE13_1, PSE12_0, PTW7_FN),
+	PINMUX_DATA(IDERST_MARK,	PSE13_1, PSE12_0, PTW6_FN),
+	PINMUX_DATA(EXBUF_ENB_MARK,	PSE13_1, PSE12_0, PTW5_FN),
+	PINMUX_DATA(DIRECTION_MARK,	PSE13_1, PSE12_0, PTW4_FN),
+
+	/* PTX FN */
+	PINMUX_DATA(DMAC_DACK1_MARK,	PSA12_0, PTX7_FN),
+	PINMUX_DATA(DMAC_DREQ1_MARK,	PSA12_0, PTX6_FN),
+
+	PINMUX_DATA(IRDA_OUT_MARK,	PSA12_1, PTX7_FN),
+	PINMUX_DATA(IRDA_IN_MARK,	PSA12_1, PTX6_FN),
+
+	PINMUX_DATA(TSIF_TS0_SDAT_MARK,	PSC0_0, PTX5_FN),
+	PINMUX_DATA(TSIF_TS0_SCK_MARK,	PSC1_0, PTX4_FN),
+	PINMUX_DATA(TSIF_TS0_SDEN_MARK,	PSC2_0, PTX3_FN),
+	PINMUX_DATA(TSIF_TS0_SPSYNC_MARK,       PTX2_FN),
+
+	PINMUX_DATA(LNKSTA_MARK,	PSC0_1, PTX5_FN),
+	PINMUX_DATA(MDIO_MARK,		PSC1_1, PTX4_FN),
+	PINMUX_DATA(MDC_MARK,		PSC2_1, PTX3_FN),
+
+	PINMUX_DATA(MMC_CLK_MARK, PTX1_FN),
+	PINMUX_DATA(MMC_CMD_MARK, PTX0_FN),
+
+	/* PTY FN */
+	PINMUX_DATA(SDHI0CD_MARK,  PTY7_FN),
+	PINMUX_DATA(SDHI0WP_MARK,  PTY6_FN),
+	PINMUX_DATA(SDHI0D3_MARK,  PTY5_FN),
+	PINMUX_DATA(SDHI0D2_MARK,  PTY4_FN),
+	PINMUX_DATA(SDHI0D1_MARK,  PTY3_FN),
+	PINMUX_DATA(SDHI0D0_MARK,  PTY2_FN),
+	PINMUX_DATA(SDHI0CMD_MARK, PTY1_FN),
+	PINMUX_DATA(SDHI0CLK_MARK, PTY0_FN),
+
+	/* PTZ FN */
+	PINMUX_DATA(INTC_IRQ7_MARK,	PSB10_0, PTZ7_FN),
+	PINMUX_DATA(INTC_IRQ6_MARK,	PSB11_0, PTZ6_FN),
+	PINMUX_DATA(INTC_IRQ5_MARK,	PSB12_0, PTZ5_FN),
+	PINMUX_DATA(INTC_IRQ4_MARK,	PSB13_0, PTZ4_FN),
+	PINMUX_DATA(INTC_IRQ3_MARK,	PSB14_0, PTZ3_FN),
+	PINMUX_DATA(INTC_IRQ2_MARK,	         PTZ2_FN),
+	PINMUX_DATA(INTC_IRQ1_MARK,	         PTZ1_FN),
+	PINMUX_DATA(INTC_IRQ0_MARK,	         PTZ0_FN),
+
+	PINMUX_DATA(SCIF3_I_CTS_MARK,	PSB10_1, PTZ7_FN),
+	PINMUX_DATA(SCIF3_I_RTS_MARK,	PSB11_1, PTZ6_FN),
+	PINMUX_DATA(SCIF3_I_SCK_MARK,	PSB12_1, PTZ5_FN),
+	PINMUX_DATA(SCIF3_I_RXD_MARK,	PSB13_1, PTZ4_FN),
+	PINMUX_DATA(SCIF3_I_TXD_MARK,	PSB14_1, PTZ3_FN),
+};
+
+static struct pinmux_gpio pinmux_gpios[] = {
+	/* PTA */
+	PINMUX_GPIO(GPIO_PTA7, PTA7_DATA),
+	PINMUX_GPIO(GPIO_PTA6, PTA6_DATA),
+	PINMUX_GPIO(GPIO_PTA5, PTA5_DATA),
+	PINMUX_GPIO(GPIO_PTA4, PTA4_DATA),
+	PINMUX_GPIO(GPIO_PTA3, PTA3_DATA),
+	PINMUX_GPIO(GPIO_PTA2, PTA2_DATA),
+	PINMUX_GPIO(GPIO_PTA1, PTA1_DATA),
+	PINMUX_GPIO(GPIO_PTA0, PTA0_DATA),
+
+	/* PTB */
+	PINMUX_GPIO(GPIO_PTB7, PTB7_DATA),
+	PINMUX_GPIO(GPIO_PTB6, PTB6_DATA),
+	PINMUX_GPIO(GPIO_PTB5, PTB5_DATA),
+	PINMUX_GPIO(GPIO_PTB4, PTB4_DATA),
+	PINMUX_GPIO(GPIO_PTB3, PTB3_DATA),
+	PINMUX_GPIO(GPIO_PTB2, PTB2_DATA),
+	PINMUX_GPIO(GPIO_PTB1, PTB1_DATA),
+	PINMUX_GPIO(GPIO_PTB0, PTB0_DATA),
+
+	/* PTC */
+	PINMUX_GPIO(GPIO_PTC7, PTC7_DATA),
+	PINMUX_GPIO(GPIO_PTC6, PTC6_DATA),
+	PINMUX_GPIO(GPIO_PTC5, PTC5_DATA),
+	PINMUX_GPIO(GPIO_PTC4, PTC4_DATA),
+	PINMUX_GPIO(GPIO_PTC3, PTC3_DATA),
+	PINMUX_GPIO(GPIO_PTC2, PTC2_DATA),
+	PINMUX_GPIO(GPIO_PTC1, PTC1_DATA),
+	PINMUX_GPIO(GPIO_PTC0, PTC0_DATA),
+
+	/* PTD */
+	PINMUX_GPIO(GPIO_PTD7, PTD7_DATA),
+	PINMUX_GPIO(GPIO_PTD6, PTD6_DATA),
+	PINMUX_GPIO(GPIO_PTD5, PTD5_DATA),
+	PINMUX_GPIO(GPIO_PTD4, PTD4_DATA),
+	PINMUX_GPIO(GPIO_PTD3, PTD3_DATA),
+	PINMUX_GPIO(GPIO_PTD2, PTD2_DATA),
+	PINMUX_GPIO(GPIO_PTD1, PTD1_DATA),
+	PINMUX_GPIO(GPIO_PTD0, PTD0_DATA),
+
+	/* PTE */
+	PINMUX_GPIO(GPIO_PTE7, PTE7_DATA),
+	PINMUX_GPIO(GPIO_PTE6, PTE6_DATA),
+	PINMUX_GPIO(GPIO_PTE5, PTE5_DATA),
+	PINMUX_GPIO(GPIO_PTE4, PTE4_DATA),
+	PINMUX_GPIO(GPIO_PTE3, PTE3_DATA),
+	PINMUX_GPIO(GPIO_PTE2, PTE2_DATA),
+	PINMUX_GPIO(GPIO_PTE1, PTE1_DATA),
+	PINMUX_GPIO(GPIO_PTE0, PTE0_DATA),
+
+	/* PTF */
+	PINMUX_GPIO(GPIO_PTF7, PTF7_DATA),
+	PINMUX_GPIO(GPIO_PTF6, PTF6_DATA),
+	PINMUX_GPIO(GPIO_PTF5, PTF5_DATA),
+	PINMUX_GPIO(GPIO_PTF4, PTF4_DATA),
+	PINMUX_GPIO(GPIO_PTF3, PTF3_DATA),
+	PINMUX_GPIO(GPIO_PTF2, PTF2_DATA),
+	PINMUX_GPIO(GPIO_PTF1, PTF1_DATA),
+	PINMUX_GPIO(GPIO_PTF0, PTF0_DATA),
+
+	/* PTG */
+	PINMUX_GPIO(GPIO_PTG5, PTG5_DATA),
+	PINMUX_GPIO(GPIO_PTG4, PTG4_DATA),
+	PINMUX_GPIO(GPIO_PTG3, PTG3_DATA),
+	PINMUX_GPIO(GPIO_PTG2, PTG2_DATA),
+	PINMUX_GPIO(GPIO_PTG1, PTG1_DATA),
+	PINMUX_GPIO(GPIO_PTG0, PTG0_DATA),
+
+	/* PTH */
+	PINMUX_GPIO(GPIO_PTH7, PTH7_DATA),
+	PINMUX_GPIO(GPIO_PTH6, PTH6_DATA),
+	PINMUX_GPIO(GPIO_PTH5, PTH5_DATA),
+	PINMUX_GPIO(GPIO_PTH4, PTH4_DATA),
+	PINMUX_GPIO(GPIO_PTH3, PTH3_DATA),
+	PINMUX_GPIO(GPIO_PTH2, PTH2_DATA),
+	PINMUX_GPIO(GPIO_PTH1, PTH1_DATA),
+	PINMUX_GPIO(GPIO_PTH0, PTH0_DATA),
+
+	/* PTJ */
+	PINMUX_GPIO(GPIO_PTJ7, PTJ7_DATA),
+	PINMUX_GPIO(GPIO_PTJ6, PTJ6_DATA),
+	PINMUX_GPIO(GPIO_PTJ5, PTJ5_DATA),
+	PINMUX_GPIO(GPIO_PTJ3, PTJ3_DATA),
+	PINMUX_GPIO(GPIO_PTJ2, PTJ2_DATA),
+	PINMUX_GPIO(GPIO_PTJ1, PTJ1_DATA),
+	PINMUX_GPIO(GPIO_PTJ0, PTJ0_DATA),
+
+	/* PTK */
+	PINMUX_GPIO(GPIO_PTK7, PTK7_DATA),
+	PINMUX_GPIO(GPIO_PTK6, PTK6_DATA),
+	PINMUX_GPIO(GPIO_PTK5, PTK5_DATA),
+	PINMUX_GPIO(GPIO_PTK4, PTK4_DATA),
+	PINMUX_GPIO(GPIO_PTK3, PTK3_DATA),
+	PINMUX_GPIO(GPIO_PTK2, PTK2_DATA),
+	PINMUX_GPIO(GPIO_PTK1, PTK1_DATA),
+	PINMUX_GPIO(GPIO_PTK0, PTK0_DATA),
+
+	/* PTL */
+	PINMUX_GPIO(GPIO_PTL7, PTL7_DATA),
+	PINMUX_GPIO(GPIO_PTL6, PTL6_DATA),
+	PINMUX_GPIO(GPIO_PTL5, PTL5_DATA),
+	PINMUX_GPIO(GPIO_PTL4, PTL4_DATA),
+	PINMUX_GPIO(GPIO_PTL3, PTL3_DATA),
+	PINMUX_GPIO(GPIO_PTL2, PTL2_DATA),
+	PINMUX_GPIO(GPIO_PTL1, PTL1_DATA),
+	PINMUX_GPIO(GPIO_PTL0, PTL0_DATA),
+
+	/* PTM */
+	PINMUX_GPIO(GPIO_PTM7, PTM7_DATA),
+	PINMUX_GPIO(GPIO_PTM6, PTM6_DATA),
+	PINMUX_GPIO(GPIO_PTM5, PTM5_DATA),
+	PINMUX_GPIO(GPIO_PTM4, PTM4_DATA),
+	PINMUX_GPIO(GPIO_PTM3, PTM3_DATA),
+	PINMUX_GPIO(GPIO_PTM2, PTM2_DATA),
+	PINMUX_GPIO(GPIO_PTM1, PTM1_DATA),
+	PINMUX_GPIO(GPIO_PTM0, PTM0_DATA),
+
+	/* PTN */
+	PINMUX_GPIO(GPIO_PTN7, PTN7_DATA),
+	PINMUX_GPIO(GPIO_PTN6, PTN6_DATA),
+	PINMUX_GPIO(GPIO_PTN5, PTN5_DATA),
+	PINMUX_GPIO(GPIO_PTN4, PTN4_DATA),
+	PINMUX_GPIO(GPIO_PTN3, PTN3_DATA),
+	PINMUX_GPIO(GPIO_PTN2, PTN2_DATA),
+	PINMUX_GPIO(GPIO_PTN1, PTN1_DATA),
+	PINMUX_GPIO(GPIO_PTN0, PTN0_DATA),
+
+	/* PTQ */
+	PINMUX_GPIO(GPIO_PTQ7, PTQ7_DATA),
+	PINMUX_GPIO(GPIO_PTQ6, PTQ6_DATA),
+	PINMUX_GPIO(GPIO_PTQ5, PTQ5_DATA),
+	PINMUX_GPIO(GPIO_PTQ4, PTQ4_DATA),
+	PINMUX_GPIO(GPIO_PTQ3, PTQ3_DATA),
+	PINMUX_GPIO(GPIO_PTQ2, PTQ2_DATA),
+	PINMUX_GPIO(GPIO_PTQ1, PTQ1_DATA),
+	PINMUX_GPIO(GPIO_PTQ0, PTQ0_DATA),
+
+	/* PTR */
+	PINMUX_GPIO(GPIO_PTR7, PTR7_DATA),
+	PINMUX_GPIO(GPIO_PTR6, PTR6_DATA),
+	PINMUX_GPIO(GPIO_PTR5, PTR5_DATA),
+	PINMUX_GPIO(GPIO_PTR4, PTR4_DATA),
+	PINMUX_GPIO(GPIO_PTR3, PTR3_DATA),
+	PINMUX_GPIO(GPIO_PTR2, PTR2_DATA),
+	PINMUX_GPIO(GPIO_PTR1, PTR1_DATA),
+	PINMUX_GPIO(GPIO_PTR0, PTR0_DATA),
+
+	/* PTS */
+	PINMUX_GPIO(GPIO_PTS6, PTS6_DATA),
+	PINMUX_GPIO(GPIO_PTS5, PTS5_DATA),
+	PINMUX_GPIO(GPIO_PTS4, PTS4_DATA),
+	PINMUX_GPIO(GPIO_PTS3, PTS3_DATA),
+	PINMUX_GPIO(GPIO_PTS2, PTS2_DATA),
+	PINMUX_GPIO(GPIO_PTS1, PTS1_DATA),
+	PINMUX_GPIO(GPIO_PTS0, PTS0_DATA),
+
+	/* PTT */
+	PINMUX_GPIO(GPIO_PTT7, PTT7_DATA),
+	PINMUX_GPIO(GPIO_PTT6, PTT6_DATA),
+	PINMUX_GPIO(GPIO_PTT5, PTT5_DATA),
+	PINMUX_GPIO(GPIO_PTT4, PTT4_DATA),
+	PINMUX_GPIO(GPIO_PTT3, PTT3_DATA),
+	PINMUX_GPIO(GPIO_PTT2, PTT2_DATA),
+	PINMUX_GPIO(GPIO_PTT1, PTT1_DATA),
+	PINMUX_GPIO(GPIO_PTT0, PTT0_DATA),
+
+	/* PTU */
+	PINMUX_GPIO(GPIO_PTU7, PTU7_DATA),
+	PINMUX_GPIO(GPIO_PTU6, PTU6_DATA),
+	PINMUX_GPIO(GPIO_PTU5, PTU5_DATA),
+	PINMUX_GPIO(GPIO_PTU4, PTU4_DATA),
+	PINMUX_GPIO(GPIO_PTU3, PTU3_DATA),
+	PINMUX_GPIO(GPIO_PTU2, PTU2_DATA),
+	PINMUX_GPIO(GPIO_PTU1, PTU1_DATA),
+	PINMUX_GPIO(GPIO_PTU0, PTU0_DATA),
+
+	/* PTV */
+	PINMUX_GPIO(GPIO_PTV7, PTV7_DATA),
+	PINMUX_GPIO(GPIO_PTV6, PTV6_DATA),
+	PINMUX_GPIO(GPIO_PTV5, PTV5_DATA),
+	PINMUX_GPIO(GPIO_PTV4, PTV4_DATA),
+	PINMUX_GPIO(GPIO_PTV3, PTV3_DATA),
+	PINMUX_GPIO(GPIO_PTV2, PTV2_DATA),
+	PINMUX_GPIO(GPIO_PTV1, PTV1_DATA),
+	PINMUX_GPIO(GPIO_PTV0, PTV0_DATA),
+
+	/* PTW */
+	PINMUX_GPIO(GPIO_PTW7, PTW7_DATA),
+	PINMUX_GPIO(GPIO_PTW6, PTW6_DATA),
+	PINMUX_GPIO(GPIO_PTW5, PTW5_DATA),
+	PINMUX_GPIO(GPIO_PTW4, PTW4_DATA),
+	PINMUX_GPIO(GPIO_PTW3, PTW3_DATA),
+	PINMUX_GPIO(GPIO_PTW2, PTW2_DATA),
+	PINMUX_GPIO(GPIO_PTW1, PTW1_DATA),
+	PINMUX_GPIO(GPIO_PTW0, PTW0_DATA),
+
+	/* PTX */
+	PINMUX_GPIO(GPIO_PTX7, PTX7_DATA),
+	PINMUX_GPIO(GPIO_PTX6, PTX6_DATA),
+	PINMUX_GPIO(GPIO_PTX5, PTX5_DATA),
+	PINMUX_GPIO(GPIO_PTX4, PTX4_DATA),
+	PINMUX_GPIO(GPIO_PTX3, PTX3_DATA),
+	PINMUX_GPIO(GPIO_PTX2, PTX2_DATA),
+	PINMUX_GPIO(GPIO_PTX1, PTX1_DATA),
+	PINMUX_GPIO(GPIO_PTX0, PTX0_DATA),
+
+	/* PTY */
+	PINMUX_GPIO(GPIO_PTY7, PTY7_DATA),
+	PINMUX_GPIO(GPIO_PTY6, PTY6_DATA),
+	PINMUX_GPIO(GPIO_PTY5, PTY5_DATA),
+	PINMUX_GPIO(GPIO_PTY4, PTY4_DATA),
+	PINMUX_GPIO(GPIO_PTY3, PTY3_DATA),
+	PINMUX_GPIO(GPIO_PTY2, PTY2_DATA),
+	PINMUX_GPIO(GPIO_PTY1, PTY1_DATA),
+	PINMUX_GPIO(GPIO_PTY0, PTY0_DATA),
+
+	/* PTZ */
+	PINMUX_GPIO(GPIO_PTZ7, PTZ7_DATA),
+	PINMUX_GPIO(GPIO_PTZ6, PTZ6_DATA),
+	PINMUX_GPIO(GPIO_PTZ5, PTZ5_DATA),
+	PINMUX_GPIO(GPIO_PTZ4, PTZ4_DATA),
+	PINMUX_GPIO(GPIO_PTZ3, PTZ3_DATA),
+	PINMUX_GPIO(GPIO_PTZ2, PTZ2_DATA),
+	PINMUX_GPIO(GPIO_PTZ1, PTZ1_DATA),
+	PINMUX_GPIO(GPIO_PTZ0, PTZ0_DATA),
+
+	/* BSC */
+	PINMUX_GPIO(GPIO_FN_D31,	D31_MARK),
+	PINMUX_GPIO(GPIO_FN_D30,	D30_MARK),
+	PINMUX_GPIO(GPIO_FN_D29,	D29_MARK),
+	PINMUX_GPIO(GPIO_FN_D28,	D28_MARK),
+	PINMUX_GPIO(GPIO_FN_D27,	D27_MARK),
+	PINMUX_GPIO(GPIO_FN_D26,	D26_MARK),
+	PINMUX_GPIO(GPIO_FN_D25,	D25_MARK),
+	PINMUX_GPIO(GPIO_FN_D24,	D24_MARK),
+	PINMUX_GPIO(GPIO_FN_D23,	D23_MARK),
+	PINMUX_GPIO(GPIO_FN_D22,	D22_MARK),
+	PINMUX_GPIO(GPIO_FN_D21,	D21_MARK),
+	PINMUX_GPIO(GPIO_FN_D20,	D20_MARK),
+	PINMUX_GPIO(GPIO_FN_D19,	D19_MARK),
+	PINMUX_GPIO(GPIO_FN_D18,	D18_MARK),
+	PINMUX_GPIO(GPIO_FN_D17,	D17_MARK),
+	PINMUX_GPIO(GPIO_FN_D16,	D16_MARK),
+	PINMUX_GPIO(GPIO_FN_D15,	D15_MARK),
+	PINMUX_GPIO(GPIO_FN_D14,	D14_MARK),
+	PINMUX_GPIO(GPIO_FN_D13,	D13_MARK),
+	PINMUX_GPIO(GPIO_FN_D12,	D12_MARK),
+	PINMUX_GPIO(GPIO_FN_D11,	D11_MARK),
+	PINMUX_GPIO(GPIO_FN_D10,	D10_MARK),
+	PINMUX_GPIO(GPIO_FN_D9,		D9_MARK),
+	PINMUX_GPIO(GPIO_FN_D8,		D8_MARK),
+	PINMUX_GPIO(GPIO_FN_D7,		D7_MARK),
+	PINMUX_GPIO(GPIO_FN_D6,		D6_MARK),
+	PINMUX_GPIO(GPIO_FN_D5,		D5_MARK),
+	PINMUX_GPIO(GPIO_FN_D4,		D4_MARK),
+	PINMUX_GPIO(GPIO_FN_D3,		D3_MARK),
+	PINMUX_GPIO(GPIO_FN_D2,		D2_MARK),
+	PINMUX_GPIO(GPIO_FN_D1,		D1_MARK),
+	PINMUX_GPIO(GPIO_FN_D0,		D0_MARK),
+	PINMUX_GPIO(GPIO_FN_A25,	A25_MARK),
+	PINMUX_GPIO(GPIO_FN_A24,	A24_MARK),
+	PINMUX_GPIO(GPIO_FN_A23,	A23_MARK),
+	PINMUX_GPIO(GPIO_FN_A22,	A22_MARK),
+	PINMUX_GPIO(GPIO_FN_CS6B_CE1B,	CS6B_CE1B_MARK),
+	PINMUX_GPIO(GPIO_FN_CS6A_CE2B,	CS6A_CE2B_MARK),
+	PINMUX_GPIO(GPIO_FN_CS5B_CE1A,	CS5B_CE1A_MARK),
+	PINMUX_GPIO(GPIO_FN_CS5A_CE2A,	CS5A_CE2A_MARK),
+	PINMUX_GPIO(GPIO_FN_WE3_ICIOWR,	WE3_ICIOWR_MARK),
+	PINMUX_GPIO(GPIO_FN_WE2_ICIORD,	WE2_ICIORD_MARK),
+	PINMUX_GPIO(GPIO_FN_IOIS16,	IOIS16_MARK),
+	PINMUX_GPIO(GPIO_FN_WAIT,	WAIT_MARK),
+	PINMUX_GPIO(GPIO_FN_BS,		BS_MARK),
+
+	/* KEYSC */
+	PINMUX_GPIO(GPIO_FN_KEYOUT5_IN5,	KEYOUT5_IN5_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT4_IN6,	KEYOUT4_IN6_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN4,		KEYIN4_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN3,		KEYIN3_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN2,		KEYIN2_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN1,		KEYIN1_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN0,		KEYIN0_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT3,		KEYOUT3_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT2,		KEYOUT2_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT1,		KEYOUT1_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT0,		KEYOUT0_MARK),
+
+	/* ATAPI */
+	PINMUX_GPIO(GPIO_FN_IDED15,	IDED15_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED14,	IDED14_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED13,	IDED13_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED12,	IDED12_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED11,	IDED11_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED10,	IDED10_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED9,	IDED9_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED8,	IDED8_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED7,	IDED7_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED6,	IDED6_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED5,	IDED5_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED4,	IDED4_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED3,	IDED3_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED2,	IDED2_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED1,	IDED1_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED0,	IDED0_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEA2,	IDEA2_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEA1,	IDEA1_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEA0,	IDEA0_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEIOWR,	IDEIOWR_MARK),
+	PINMUX_GPIO(GPIO_FN_IODREQ,	IODREQ_MARK),
+	PINMUX_GPIO(GPIO_FN_IDECS0,	IDECS0_MARK),
+	PINMUX_GPIO(GPIO_FN_IDECS1,	IDECS1_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEIORD,	IDEIORD_MARK),
+	PINMUX_GPIO(GPIO_FN_DIRECTION,	DIRECTION_MARK),
+	PINMUX_GPIO(GPIO_FN_EXBUF_ENB,	EXBUF_ENB_MARK),
+	PINMUX_GPIO(GPIO_FN_IDERST,	IDERST_MARK),
+	PINMUX_GPIO(GPIO_FN_IODACK,	IODACK_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEINT,	IDEINT_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEIORDY,	IDEIORDY_MARK),
+
+	/* TPU */
+	PINMUX_GPIO(GPIO_FN_TPUTO3,	TPUTO3_MARK),
+	PINMUX_GPIO(GPIO_FN_TPUTO2,	TPUTO2_MARK),
+	PINMUX_GPIO(GPIO_FN_TPUTO1,	TPUTO1_MARK),
+	PINMUX_GPIO(GPIO_FN_TPUTO0,	TPUTO0_MARK),
+	PINMUX_GPIO(GPIO_FN_TPUTI3,	TPUTI3_MARK),
+	PINMUX_GPIO(GPIO_FN_TPUTI2,	TPUTI2_MARK),
+
+	/* LCDC */
+	PINMUX_GPIO(GPIO_FN_LCDD23,	LCDD23_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD22,	LCDD22_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD21,	LCDD21_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD20,	LCDD20_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD19,	LCDD19_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD18,	LCDD18_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD17,	LCDD17_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD16,	LCDD16_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD15,	LCDD15_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD14,	LCDD14_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD13,	LCDD13_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD12,	LCDD12_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD11,	LCDD11_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD10,	LCDD10_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD9,	LCDD9_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD8,	LCDD8_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD7,	LCDD7_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD6,	LCDD6_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD5,	LCDD5_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD4,	LCDD4_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD3,	LCDD3_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD2,	LCDD2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD1,	LCDD1_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD0,	LCDD0_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVSYN,	LCDVSYN_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDDISP,	LCDDISP_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDRS,	LCDRS_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDHSYN,	LCDHSYN_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDCS,	LCDCS_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDDON,	LCDDON_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDDCK,	LCDDCK_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDWR,	LCDWR_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVEPWC,	LCDVEPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVCPWC,	LCDVCPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDRD,	LCDRD_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDLCLK,	LCDLCLK_MARK),
+
+	/* SCIF0 */
+	PINMUX_GPIO(GPIO_FN_SCIF0_TXD,	SCIF0_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_RXD,	SCIF0_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_SCK,	SCIF0_SCK_MARK),
+
+	/* SCIF1 */
+	PINMUX_GPIO(GPIO_FN_SCIF1_SCK,	SCIF1_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_RXD,	SCIF1_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_TXD,	SCIF1_TXD_MARK),
+
+	/* SCIF2 */
+	PINMUX_GPIO(GPIO_FN_SCIF2_L_TXD,	SCIF2_L_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_L_SCK,	SCIF2_L_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_L_RXD,	SCIF2_L_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_V_TXD,	SCIF2_V_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_V_SCK,	SCIF2_V_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_V_RXD,	SCIF2_V_RXD_MARK),
+
+	/* SCIF3 */
+	PINMUX_GPIO(GPIO_FN_SCIF3_V_SCK,	SCIF3_V_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_V_RXD,	SCIF3_V_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_V_TXD,	SCIF3_V_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_V_CTS,	SCIF3_V_CTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_V_RTS,	SCIF3_V_RTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_I_SCK,	SCIF3_I_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_I_RXD,	SCIF3_I_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_I_TXD,	SCIF3_I_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_I_CTS,	SCIF3_I_CTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_I_RTS,	SCIF3_I_RTS_MARK),
+
+	/* SCIF4 */
+	PINMUX_GPIO(GPIO_FN_SCIF4_SCK,	SCIF4_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF4_RXD,	SCIF4_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF4_TXD,	SCIF4_TXD_MARK),
+
+	/* SCIF5 */
+	PINMUX_GPIO(GPIO_FN_SCIF5_SCK,	SCIF5_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF5_RXD,	SCIF5_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF5_TXD,	SCIF5_TXD_MARK),
+
+	/* FSI */
+	PINMUX_GPIO(GPIO_FN_FSIMCKB,	FSIMCKB_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIMCKA,	FSIMCKA_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIOASD,	FSIOASD_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIIABCK,	FSIIABCK_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIIALRCK,	FSIIALRCK_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIOABCK,	FSIOABCK_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIOALRCK,	FSIOALRCK_MARK),
+	PINMUX_GPIO(GPIO_FN_CLKAUDIOAO,	CLKAUDIOAO_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIIBSD,	FSIIBSD_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIOBSD,	FSIOBSD_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIIBBCK,	FSIIBBCK_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIIBLRCK,	FSIIBLRCK_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIOBBCK,	FSIOBBCK_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIOBLRCK,	FSIOBLRCK_MARK),
+	PINMUX_GPIO(GPIO_FN_CLKAUDIOBO,	CLKAUDIOBO_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIIASD,	FSIIASD_MARK),
+
+	/* AUD */
+	PINMUX_GPIO(GPIO_FN_AUDCK,	AUDCK_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDSYNC,	AUDSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA3,	AUDATA3_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA2,	AUDATA2_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA1,	AUDATA1_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA0,	AUDATA0_MARK),
+
+	/* VIO */
+	PINMUX_GPIO(GPIO_FN_VIO_CKO,	VIO_CKO_MARK),
+
+	/* VIO0 */
+	PINMUX_GPIO(GPIO_FN_VIO0_D15,	VIO0_D15_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D14,	VIO0_D14_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D13,	VIO0_D13_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D12,	VIO0_D12_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D11,	VIO0_D11_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D10,	VIO0_D10_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D9,	VIO0_D9_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D8,	VIO0_D8_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D7,	VIO0_D7_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D6,	VIO0_D6_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D5,	VIO0_D5_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D4,	VIO0_D4_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D3,	VIO0_D3_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D2,	VIO0_D2_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D1,	VIO0_D1_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D0,	VIO0_D0_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_VD,	VIO0_VD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_CLK,	VIO0_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_FLD,	VIO0_FLD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_HD,	VIO0_HD_MARK),
+
+	/* VIO1 */
+	PINMUX_GPIO(GPIO_FN_VIO1_D7,	VIO1_D7_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_D6,	VIO1_D6_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_D5,	VIO1_D5_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_D4,	VIO1_D4_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_D3,	VIO1_D3_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_D2,	VIO1_D2_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_D1,	VIO1_D1_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_D0,	VIO1_D0_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_FLD,	VIO1_FLD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_HD,	VIO1_HD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_VD,	VIO1_VD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_CLK,	VIO1_CLK_MARK),
+
+	/* Eth */
+	PINMUX_GPIO(GPIO_FN_RMII_RXD0,		RMII_RXD0_MARK),
+	PINMUX_GPIO(GPIO_FN_RMII_RXD1,		RMII_RXD1_MARK),
+	PINMUX_GPIO(GPIO_FN_RMII_TXD0,		RMII_TXD0_MARK),
+	PINMUX_GPIO(GPIO_FN_RMII_TXD1,		RMII_TXD1_MARK),
+	PINMUX_GPIO(GPIO_FN_RMII_REF_CLK,	RMII_REF_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_RMII_TX_EN,		RMII_TX_EN_MARK),
+	PINMUX_GPIO(GPIO_FN_RMII_RX_ER,		RMII_RX_ER_MARK),
+	PINMUX_GPIO(GPIO_FN_RMII_CRS_DV,	RMII_CRS_DV_MARK),
+	PINMUX_GPIO(GPIO_FN_LNKSTA,		LNKSTA_MARK),
+	PINMUX_GPIO(GPIO_FN_MDIO,		MDIO_MARK),
+	PINMUX_GPIO(GPIO_FN_MDC,		MDC_MARK),
+
+	/* System */
+	PINMUX_GPIO(GPIO_FN_PDSTATUS,	PDSTATUS_MARK),
+	PINMUX_GPIO(GPIO_FN_STATUS2,	STATUS2_MARK),
+	PINMUX_GPIO(GPIO_FN_STATUS0,	STATUS0_MARK),
+
+	/* VOU */
+	PINMUX_GPIO(GPIO_FN_DV_D15,	DV_D15_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D14,	DV_D14_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D13,	DV_D13_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D12,	DV_D12_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D11,	DV_D11_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D10,	DV_D10_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D9,	DV_D9_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D8,	DV_D8_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D7,	DV_D7_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D6,	DV_D6_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D5,	DV_D5_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D4,	DV_D4_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D3,	DV_D3_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D2,	DV_D2_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D1,	DV_D1_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D0,	DV_D0_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_CLKI,	DV_CLKI_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_CLK,	DV_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_VSYNC,	DV_VSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_HSYNC,	DV_HSYNC_MARK),
+
+	/* MSIOF0 */
+	PINMUX_GPIO(GPIO_FN_MSIOF0_RXD,		MSIOF0_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_TXD,		MSIOF0_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_MCK,		MSIOF0_MCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_TSCK,	MSIOF0_TSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_SS1,		MSIOF0_SS1_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_SS2,		MSIOF0_SS2_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_TSYNC,	MSIOF0_TSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_RSCK,	MSIOF0_RSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_RSYNC,	MSIOF0_RSYNC_MARK),
+
+	/* MSIOF1 */
+	PINMUX_GPIO(GPIO_FN_MSIOF1_RXD,		MSIOF1_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_TXD,		MSIOF1_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_MCK,		MSIOF1_MCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_TSCK,	MSIOF1_TSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_SS1,		MSIOF1_SS1_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_SS2,		MSIOF1_SS2_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_TSYNC,	MSIOF1_TSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_RSCK,	MSIOF1_RSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_RSYNC,	MSIOF1_RSYNC_MARK),
+
+	/* DMAC */
+	PINMUX_GPIO(GPIO_FN_DMAC_DACK0,	DMAC_DACK0_MARK),
+	PINMUX_GPIO(GPIO_FN_DMAC_DREQ0,	DMAC_DREQ0_MARK),
+	PINMUX_GPIO(GPIO_FN_DMAC_DACK1,	DMAC_DACK1_MARK),
+	PINMUX_GPIO(GPIO_FN_DMAC_DREQ1,	DMAC_DREQ1_MARK),
+
+	/* SDHI0 */
+	PINMUX_GPIO(GPIO_FN_SDHI0CD,	SDHI0CD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0WP,	SDHI0WP_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0CMD,	SDHI0CMD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0CLK,	SDHI0CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D3,	SDHI0D3_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D2,	SDHI0D2_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D1,	SDHI0D1_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D0,	SDHI0D0_MARK),
+
+	/* SDHI1 */
+	PINMUX_GPIO(GPIO_FN_SDHI1CD,	SDHI1CD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1WP,	SDHI1WP_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1CMD,	SDHI1CMD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1CLK,	SDHI1CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1D3,	SDHI1D3_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1D2,	SDHI1D2_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1D1,	SDHI1D1_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1D0,	SDHI1D0_MARK),
+
+	/* MMC */
+	PINMUX_GPIO(GPIO_FN_MMC_D7,	MMC_D7_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_D6,	MMC_D6_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_D5,	MMC_D5_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_D4,	MMC_D4_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_D3,	MMC_D3_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_D2,	MMC_D2_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_D1,	MMC_D1_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_D0,	MMC_D0_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_CLK,	MMC_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_CMD,	MMC_CMD_MARK),
+
+	/* IrDA */
+	PINMUX_GPIO(GPIO_FN_IRDA_OUT,	IRDA_OUT_MARK),
+	PINMUX_GPIO(GPIO_FN_IRDA_IN,	IRDA_IN_MARK),
+
+	/* TSIF */
+	PINMUX_GPIO(GPIO_FN_TSIF_TS0_SDAT,	TSIF_TS0_SDAT_MARK),
+	PINMUX_GPIO(GPIO_FN_TSIF_TS0_SCK,	TSIF_TS0_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_TSIF_TS0_SDEN,	TSIF_TS0_SDEN_MARK),
+	PINMUX_GPIO(GPIO_FN_TSIF_TS0_SPSYNC,	TSIF_TS0_SPSYNC_MARK),
+
+	/* IRQ */
+	PINMUX_GPIO(GPIO_FN_INTC_IRQ7,	INTC_IRQ7_MARK),
+	PINMUX_GPIO(GPIO_FN_INTC_IRQ6,	INTC_IRQ6_MARK),
+	PINMUX_GPIO(GPIO_FN_INTC_IRQ5,	INTC_IRQ5_MARK),
+	PINMUX_GPIO(GPIO_FN_INTC_IRQ4,	INTC_IRQ4_MARK),
+	PINMUX_GPIO(GPIO_FN_INTC_IRQ3,	INTC_IRQ3_MARK),
+	PINMUX_GPIO(GPIO_FN_INTC_IRQ2,	INTC_IRQ2_MARK),
+	PINMUX_GPIO(GPIO_FN_INTC_IRQ1,	INTC_IRQ1_MARK),
+	PINMUX_GPIO(GPIO_FN_INTC_IRQ0,	INTC_IRQ0_MARK),
+ };
+
+static struct pinmux_cfg_reg pinmux_config_regs[] = {
+	{ PINMUX_CFG_REG("PACR", 0xa4050100, 16, 2) {
+		PTA7_FN, PTA7_OUT, PTA7_IN_PU, PTA7_IN,
+		PTA6_FN, PTA6_OUT, PTA6_IN_PU, PTA6_IN,
+		PTA5_FN, PTA5_OUT, PTA5_IN_PU, PTA5_IN,
+		PTA4_FN, PTA4_OUT, PTA4_IN_PU, PTA4_IN,
+		PTA3_FN, PTA3_OUT, PTA3_IN_PU, PTA3_IN,
+		PTA2_FN, PTA2_OUT, PTA2_IN_PU, PTA2_IN,
+		PTA1_FN, PTA1_OUT, PTA1_IN_PU, PTA1_IN,
+		PTA0_FN, PTA0_OUT, PTA0_IN_PU, PTA0_IN }
+	},
+	{ PINMUX_CFG_REG("PBCR", 0xa4050102, 16, 2) {
+		PTB7_FN, PTB7_OUT, PTB7_IN_PU, PTB7_IN,
+		PTB6_FN, PTB6_OUT, PTB6_IN_PU, PTB6_IN,
+		PTB5_FN, PTB5_OUT, PTB5_IN_PU, PTB5_IN,
+		PTB4_FN, PTB4_OUT, PTB4_IN_PU, PTB4_IN,
+		PTB3_FN, PTB3_OUT, PTB3_IN_PU, PTB3_IN,
+		PTB2_FN, PTB2_OUT, PTB2_IN_PU, PTB2_IN,
+		PTB1_FN, PTB1_OUT, PTB1_IN_PU, PTB1_IN,
+		PTB0_FN, PTB0_OUT, PTB0_IN_PU, PTB0_IN }
+	},
+	{ PINMUX_CFG_REG("PCCR", 0xa4050104, 16, 2) {
+		PTC7_FN, PTC7_OUT, PTC7_IN_PU, PTC7_IN,
+		PTC6_FN, PTC6_OUT, PTC6_IN_PU, PTC6_IN,
+		PTC5_FN, PTC5_OUT, PTC5_IN_PU, PTC5_IN,
+		PTC4_FN, PTC4_OUT, PTC4_IN_PU, PTC4_IN,
+		PTC3_FN, PTC3_OUT, PTC3_IN_PU, PTC3_IN,
+		PTC2_FN, PTC2_OUT, PTC2_IN_PU, PTC2_IN,
+		PTC1_FN, PTC1_OUT, PTC1_IN_PU, PTC1_IN,
+		PTC0_FN, PTC0_OUT, PTC0_IN_PU, PTC0_IN }
+	},
+	{ PINMUX_CFG_REG("PDCR", 0xa4050106, 16, 2) {
+		PTD7_FN, PTD7_OUT, PTD7_IN_PU, PTD7_IN,
+		PTD6_FN, PTD6_OUT, PTD6_IN_PU, PTD6_IN,
+		PTD5_FN, PTD5_OUT, PTD5_IN_PU, PTD5_IN,
+		PTD4_FN, PTD4_OUT, PTD4_IN_PU, PTD4_IN,
+		PTD3_FN, PTD3_OUT, PTD3_IN_PU, PTD3_IN,
+		PTD2_FN, PTD2_OUT, PTD2_IN_PU, PTD2_IN,
+		PTD1_FN, PTD1_OUT, PTD1_IN_PU, PTD1_IN,
+		PTD0_FN, PTD0_OUT, PTD0_IN_PU, PTD0_IN }
+	},
+	{ PINMUX_CFG_REG("PECR", 0xa4050108, 16, 2) {
+		PTE7_FN, PTE7_OUT, PTE7_IN_PU, PTE7_IN,
+		PTE6_FN, PTE6_OUT, PTE6_IN_PU, PTE6_IN,
+		PTE5_FN, PTE5_OUT, PTE5_IN_PU, PTE5_IN,
+		PTE4_FN, PTE4_OUT, PTE4_IN_PU, PTE4_IN,
+		PTE3_FN, PTE3_OUT, PTE3_IN_PU, PTE3_IN,
+		PTE2_FN, PTE2_OUT, PTE2_IN_PU, PTE2_IN,
+		PTE1_FN, PTE1_OUT, PTE1_IN_PU, PTE1_IN,
+		PTE0_FN, PTE0_OUT, PTE0_IN_PU, PTE0_IN }
+	},
+	{ PINMUX_CFG_REG("PFCR", 0xa405010a, 16, 2) {
+		PTF7_FN, PTF7_OUT, PTF7_IN_PU, PTF7_IN,
+		PTF6_FN, PTF6_OUT, PTF6_IN_PU, PTF6_IN,
+		PTF5_FN, PTF5_OUT, PTF5_IN_PU, PTF5_IN,
+		PTF4_FN, PTF4_OUT, PTF4_IN_PU, PTF4_IN,
+		PTF3_FN, PTF3_OUT, PTF3_IN_PU, PTF3_IN,
+		PTF2_FN, PTF2_OUT, PTF2_IN_PU, PTF2_IN,
+		PTF1_FN, PTF1_OUT, PTF1_IN_PU, PTF1_IN,
+		PTF0_FN, PTF0_OUT, PTF0_IN_PU, PTF0_IN }
+	},
+	{ PINMUX_CFG_REG("PGCR", 0xa405010c, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTG5_FN, PTG5_OUT, 0, 0,
+		PTG4_FN, PTG4_OUT, 0, 0,
+		PTG3_FN, PTG3_OUT, 0, 0,
+		PTG2_FN, PTG2_OUT, 0, 0,
+		PTG1_FN, PTG1_OUT, 0, 0,
+		PTG0_FN, PTG0_OUT, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PHCR", 0xa405010e, 16, 2) {
+		PTH7_FN, PTH7_OUT, PTH7_IN_PU, PTH7_IN,
+		PTH6_FN, PTH6_OUT, PTH6_IN_PU, PTH6_IN,
+		PTH5_FN, PTH5_OUT, PTH5_IN_PU, PTH5_IN,
+		PTH4_FN, PTH4_OUT, PTH4_IN_PU, PTH4_IN,
+		PTH3_FN, PTH3_OUT, PTH3_IN_PU, PTH3_IN,
+		PTH2_FN, PTH2_OUT, PTH2_IN_PU, PTH2_IN,
+		PTH1_FN, PTH1_OUT, PTH1_IN_PU, PTH1_IN,
+		PTH0_FN, PTH0_OUT, PTH0_IN_PU, PTH0_IN }
+	},
+	{ PINMUX_CFG_REG("PJCR", 0xa4050110, 16, 2) {
+		PTJ7_FN, PTJ7_OUT, 0, 0,
+		PTJ6_FN, PTJ6_OUT, 0, 0,
+		PTJ5_FN, PTJ5_OUT, 0, 0,
+		0, 0, 0, 0,
+		PTJ3_FN, PTJ3_OUT, PTJ3_IN_PU, PTJ3_IN,
+		PTJ2_FN, PTJ2_OUT, PTJ2_IN_PU, PTJ2_IN,
+		PTJ1_FN, PTJ1_OUT, PTJ1_IN_PU, PTJ1_IN,
+		PTJ0_FN, PTJ0_OUT, PTJ0_IN_PU, PTJ0_IN }
+	},
+	{ PINMUX_CFG_REG("PKCR", 0xa4050112, 16, 2) {
+		PTK7_FN, PTK7_OUT, PTK7_IN_PU, PTK7_IN,
+		PTK6_FN, PTK6_OUT, PTK6_IN_PU, PTK6_IN,
+		PTK5_FN, PTK5_OUT, PTK5_IN_PU, PTK5_IN,
+		PTK4_FN, PTK4_OUT, PTK4_IN_PU, PTK4_IN,
+		PTK3_FN, PTK3_OUT, PTK3_IN_PU, PTK3_IN,
+		PTK2_FN, PTK2_OUT, PTK2_IN_PU, PTK2_IN,
+		PTK1_FN, PTK1_OUT, PTK1_IN_PU, PTK1_IN,
+		PTK0_FN, PTK0_OUT, PTK0_IN_PU, PTK0_IN }
+	},
+	{ PINMUX_CFG_REG("PLCR", 0xa4050114, 16, 2) {
+		PTL7_FN, PTL7_OUT, PTL7_IN_PU, PTL7_IN,
+		PTL6_FN, PTL6_OUT, PTL6_IN_PU, PTL6_IN,
+		PTL5_FN, PTL5_OUT, PTL5_IN_PU, PTL5_IN,
+		PTL4_FN, PTL4_OUT, PTL4_IN_PU, PTL4_IN,
+		PTL3_FN, PTL3_OUT, PTL3_IN_PU, PTL3_IN,
+		PTL2_FN, PTL2_OUT, PTL2_IN_PU, PTL2_IN,
+		PTL1_FN, PTL1_OUT, PTL1_IN_PU, PTL1_IN,
+		PTL0_FN, PTL0_OUT, PTL0_IN_PU, PTL0_IN }
+	},
+	{ PINMUX_CFG_REG("PMCR", 0xa4050116, 16, 2) {
+		PTM7_FN, PTM7_OUT, PTM7_IN_PU, PTM7_IN,
+		PTM6_FN, PTM6_OUT, PTM6_IN_PU, PTM6_IN,
+		PTM5_FN, PTM5_OUT, PTM5_IN_PU, PTM5_IN,
+		PTM4_FN, PTM4_OUT, PTM4_IN_PU, PTM4_IN,
+		PTM3_FN, PTM3_OUT, PTM3_IN_PU, PTM3_IN,
+		PTM2_FN, PTM2_OUT, PTM2_IN_PU, PTM2_IN,
+		PTM1_FN, PTM1_OUT, PTM1_IN_PU, PTM1_IN,
+		PTM0_FN, PTM0_OUT, PTM0_IN_PU, PTM0_IN }
+	},
+	{ PINMUX_CFG_REG("PNCR", 0xa4050118, 16, 2) {
+		PTN7_FN, PTN7_OUT, PTN7_IN_PU, PTN7_IN,
+		PTN6_FN, PTN6_OUT, PTN6_IN_PU, PTN6_IN,
+		PTN5_FN, PTN5_OUT, PTN5_IN_PU, PTN5_IN,
+		PTN4_FN, PTN4_OUT, PTN4_IN_PU, PTN4_IN,
+		PTN3_FN, PTN3_OUT, PTN3_IN_PU, PTN3_IN,
+		PTN2_FN, PTN2_OUT, PTN2_IN_PU, PTN2_IN,
+		PTN1_FN, PTN1_OUT, PTN1_IN_PU, PTN1_IN,
+		PTN0_FN, PTN0_OUT, PTN0_IN_PU, PTN0_IN }
+	},
+	{ PINMUX_CFG_REG("PQCR", 0xa405011a, 16, 2) {
+		PTQ7_FN, PTQ7_OUT, PTQ7_IN_PU, PTQ7_IN,
+		PTQ6_FN, PTQ6_OUT, PTQ6_IN_PU, PTQ6_IN,
+		PTQ5_FN, PTQ5_OUT, PTQ5_IN_PU, PTQ5_IN,
+		PTQ4_FN, PTQ4_OUT, PTQ4_IN_PU, PTQ4_IN,
+		PTQ3_FN, PTQ3_OUT, PTQ3_IN_PU, PTQ3_IN,
+		PTQ2_FN, PTQ2_OUT, PTQ2_IN_PU, PTQ2_IN,
+		PTQ1_FN, PTQ1_OUT, PTQ1_IN_PU, PTQ1_IN,
+		PTQ0_FN, PTQ0_OUT, PTQ0_IN_PU, PTQ0_IN }
+	},
+	{ PINMUX_CFG_REG("PRCR", 0xa405011c, 16, 2) {
+		PTR7_FN, PTR7_OUT, PTR7_IN_PU, PTR7_IN,
+		PTR6_FN, PTR6_OUT, PTR6_IN_PU, PTR6_IN,
+		PTR5_FN, PTR5_OUT, PTR5_IN_PU, PTR5_IN,
+		PTR4_FN, PTR4_OUT, PTR4_IN_PU, PTR4_IN,
+		PTR3_FN, 0,        PTR3_IN_PU, PTR3_IN,
+		PTR2_FN, 0,        PTR2_IN_PU, PTR2_IN,
+		PTR1_FN, PTR1_OUT, PTR1_IN_PU, PTR1_IN,
+		PTR0_FN, PTR0_OUT, PTR0_IN_PU, PTR0_IN }
+	},
+	{ PINMUX_CFG_REG("PSCR", 0xa405011e, 16, 2) {
+		0, 0, 0, 0,
+		PTS6_FN, PTS6_OUT, PTS6_IN_PU, PTS6_IN,
+		PTS5_FN, PTS5_OUT, PTS5_IN_PU, PTS5_IN,
+		PTS4_FN, PTS4_OUT, PTS4_IN_PU, PTS4_IN,
+		PTS3_FN, PTS3_OUT, PTS3_IN_PU, PTS3_IN,
+		PTS2_FN, PTS2_OUT, PTS2_IN_PU, PTS2_IN,
+		PTS1_FN, PTS1_OUT, PTS1_IN_PU, PTS1_IN,
+		PTS0_FN, PTS0_OUT, PTS0_IN_PU, PTS0_IN }
+	},
+	{ PINMUX_CFG_REG("PTCR", 0xa4050140, 16, 2) {
+		PTT7_FN, PTT7_OUT, PTT7_IN_PU, PTT7_IN,
+		PTT6_FN, PTT6_OUT, PTT6_IN_PU, PTT6_IN,
+		PTT5_FN, PTT5_OUT, PTT5_IN_PU, PTT5_IN,
+		PTT4_FN, PTT4_OUT, PTT4_IN_PU, PTT4_IN,
+		PTT3_FN, PTT3_OUT, PTT3_IN_PU, PTT3_IN,
+		PTT2_FN, PTT2_OUT, PTT2_IN_PU, PTT2_IN,
+		PTT1_FN, PTT1_OUT, PTT1_IN_PU, PTT1_IN,
+		PTT0_FN, PTT0_OUT, PTT0_IN_PU, PTT0_IN }
+	},
+	{ PINMUX_CFG_REG("PUCR", 0xa4050142, 16, 2) {
+		PTU7_FN, PTU7_OUT, PTU7_IN_PU, PTU7_IN,
+		PTU6_FN, PTU6_OUT, PTU6_IN_PU, PTU6_IN,
+		PTU5_FN, PTU5_OUT, PTU5_IN_PU, PTU5_IN,
+		PTU4_FN, PTU4_OUT, PTU4_IN_PU, PTU4_IN,
+		PTU3_FN, PTU3_OUT, PTU3_IN_PU, PTU3_IN,
+		PTU2_FN, PTU2_OUT, PTU2_IN_PU, PTU2_IN,
+		PTU1_FN, PTU1_OUT, PTU1_IN_PU, PTU1_IN,
+		PTU0_FN, PTU0_OUT, PTU0_IN_PU, PTU0_IN }
+	},
+	{ PINMUX_CFG_REG("PVCR", 0xa4050144, 16, 2) {
+		PTV7_FN, PTV7_OUT, PTV7_IN_PU, PTV7_IN,
+		PTV6_FN, PTV6_OUT, PTV6_IN_PU, PTV6_IN,
+		PTV5_FN, PTV5_OUT, PTV5_IN_PU, PTV5_IN,
+		PTV4_FN, PTV4_OUT, PTV4_IN_PU, PTV4_IN,
+		PTV3_FN, PTV3_OUT, PTV3_IN_PU, PTV3_IN,
+		PTV2_FN, PTV2_OUT, PTV2_IN_PU, PTV2_IN,
+		PTV1_FN, PTV1_OUT, PTV1_IN_PU, PTV1_IN,
+		PTV0_FN, PTV0_OUT, PTV0_IN_PU, PTV0_IN }
+	},
+	{ PINMUX_CFG_REG("PWCR", 0xa4050146, 16, 2) {
+		PTW7_FN, PTW7_OUT, PTW7_IN_PU, PTW7_IN,
+		PTW6_FN, PTW6_OUT, PTW6_IN_PU, PTW6_IN,
+		PTW5_FN, PTW5_OUT, PTW5_IN_PU, PTW5_IN,
+		PTW4_FN, PTW4_OUT, PTW4_IN_PU, PTW4_IN,
+		PTW3_FN, PTW3_OUT, PTW3_IN_PU, PTW3_IN,
+		PTW2_FN, PTW2_OUT, PTW2_IN_PU, PTW2_IN,
+		PTW1_FN, PTW1_OUT, PTW1_IN_PU, PTW1_IN,
+		PTW0_FN, PTW0_OUT, PTW0_IN_PU, PTW0_IN }
+	},
+	{ PINMUX_CFG_REG("PXCR", 0xa4050148, 16, 2) {
+		PTX7_FN, PTX7_OUT, PTX7_IN_PU, PTX7_IN,
+		PTX6_FN, PTX6_OUT, PTX6_IN_PU, PTX6_IN,
+		PTX5_FN, PTX5_OUT, PTX5_IN_PU, PTX5_IN,
+		PTX4_FN, PTX4_OUT, PTX4_IN_PU, PTX4_IN,
+		PTX3_FN, PTX3_OUT, PTX3_IN_PU, PTX3_IN,
+		PTX2_FN, PTX2_OUT, PTX2_IN_PU, PTX2_IN,
+		PTX1_FN, PTX1_OUT, PTX1_IN_PU, PTX1_IN,
+		PTX0_FN, PTX0_OUT, PTX0_IN_PU, PTX0_IN }
+	},
+	{ PINMUX_CFG_REG("PYCR", 0xa405014a, 16, 2) {
+		PTY7_FN, PTY7_OUT, PTY7_IN_PU, PTY7_IN,
+		PTY6_FN, PTY6_OUT, PTY6_IN_PU, PTY6_IN,
+		PTY5_FN, PTY5_OUT, PTY5_IN_PU, PTY5_IN,
+		PTY4_FN, PTY4_OUT, PTY4_IN_PU, PTY4_IN,
+		PTY3_FN, PTY3_OUT, PTY3_IN_PU, PTY3_IN,
+		PTY2_FN, PTY2_OUT, PTY2_IN_PU, PTY2_IN,
+		PTY1_FN, PTY1_OUT, PTY1_IN_PU, PTY1_IN,
+		PTY0_FN, PTY0_OUT, PTY0_IN_PU, PTY0_IN }
+	},
+	{ PINMUX_CFG_REG("PZCR", 0xa405014c, 16, 2) {
+		PTZ7_FN, PTZ7_OUT, PTZ7_IN_PU, PTZ7_IN,
+		PTZ6_FN, PTZ6_OUT, PTZ6_IN_PU, PTZ6_IN,
+		PTZ5_FN, PTZ5_OUT, PTZ5_IN_PU, PTZ5_IN,
+		PTZ4_FN, PTZ4_OUT, PTZ4_IN_PU, PTZ4_IN,
+		PTZ3_FN, PTZ3_OUT, PTZ3_IN_PU, PTZ3_IN,
+		PTZ2_FN, PTZ2_OUT, PTZ2_IN_PU, PTZ2_IN,
+		PTZ1_FN, PTZ1_OUT, PTZ1_IN_PU, PTZ1_IN,
+		PTZ0_FN, PTZ0_OUT, PTZ0_IN_PU, PTZ0_IN }
+	},
+	{ PINMUX_CFG_REG("PSELA", 0xa405014e, 16, 1) {
+		PSA15_0, PSA15_1,
+		PSA14_0, PSA14_1,
+		PSA13_0, PSA13_1,
+		PSA12_0, PSA12_1,
+		0, 0,
+		PSA10_0, PSA10_1,
+		PSA9_0,  PSA9_1,
+		PSA8_0,  PSA8_1,
+		PSA7_0,  PSA7_1,
+		PSA6_0,  PSA6_1,
+		PSA5_0,  PSA5_1,
+		0, 0,
+		PSA3_0,  PSA3_1,
+		PSA2_0,  PSA2_1,
+		PSA1_0,  PSA1_1,
+		PSA0_0,  PSA0_1}
+	},
+	{ PINMUX_CFG_REG("PSELB", 0xa4050150, 16, 1) {
+		0, 0,
+		PSB14_0, PSB14_1,
+		PSB13_0, PSB13_1,
+		PSB12_0, PSB12_1,
+		PSB11_0, PSB11_1,
+		PSB10_0, PSB10_1,
+		PSB9_0,  PSB9_1,
+		PSB8_0,  PSB8_1,
+		PSB7_0,  PSB7_1,
+		PSB6_0,  PSB6_1,
+		PSB5_0,  PSB5_1,
+		PSB4_0,  PSB4_1,
+		PSB3_0,  PSB3_1,
+		PSB2_0,  PSB2_1,
+		PSB1_0,  PSB1_1,
+		PSB0_0,  PSB0_1}
+	},
+	{ PINMUX_CFG_REG("PSELC", 0xa4050152, 16, 1) {
+		PSC15_0, PSC15_1,
+		PSC14_0, PSC14_1,
+		PSC13_0, PSC13_1,
+		PSC12_0, PSC12_1,
+		PSC11_0, PSC11_1,
+		PSC10_0, PSC10_1,
+		PSC9_0,  PSC9_1,
+		PSC8_0,  PSC8_1,
+		PSC7_0,  PSC7_1,
+		PSC6_0,  PSC6_1,
+		PSC5_0,  PSC5_1,
+		PSC4_0,  PSC4_1,
+		0, 0,
+		PSC2_0,  PSC2_1,
+		PSC1_0,  PSC1_1,
+		PSC0_0,  PSC0_1}
+	},
+	{ PINMUX_CFG_REG("PSELD", 0xa4050154, 16, 1) {
+		PSD15_0, PSD15_1,
+		PSD14_0, PSD14_1,
+		PSD13_0, PSD13_1,
+		PSD12_0, PSD12_1,
+		PSD11_0, PSD11_1,
+		PSD10_0, PSD10_1,
+		PSD9_0,  PSD9_1,
+		PSD8_0,  PSD8_1,
+		PSD7_0,  PSD7_1,
+		PSD6_0,  PSD6_1,
+		PSD5_0,  PSD5_1,
+		PSD4_0,  PSD4_1,
+		PSD3_0,  PSD3_1,
+		PSD2_0,  PSD2_1,
+		PSD1_0,  PSD1_1,
+		PSD0_0,  PSD0_1}
+	},
+	{ PINMUX_CFG_REG("PSELE", 0xa4050156, 16, 1) {
+		PSE15_0, PSE15_1,
+		PSE14_0, PSE14_1,
+		PSE13_0, PSE13_1,
+		PSE12_0, PSE12_1,
+		PSE11_0, PSE11_1,
+		PSE10_0, PSE10_1,
+		PSE9_0,  PSE9_1,
+		PSE8_0,  PSE8_1,
+		PSE7_0,  PSE7_1,
+		PSE6_0,  PSE6_1,
+		PSE5_0,  PSE5_1,
+		PSE4_0,  PSE4_1,
+		PSE3_0,  PSE3_1,
+		PSE2_0,  PSE2_1,
+		PSE1_0,  PSE1_1,
+		PSE0_0,  PSE0_1}
+	},
+	{}
+};
+
+static struct pinmux_data_reg pinmux_data_regs[] = {
+	{ PINMUX_DATA_REG("PADR", 0xa4050120, 8) {
+		PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA,
+		PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA }
+	},
+	{ PINMUX_DATA_REG("PBDR", 0xa4050122, 8) {
+		PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA,
+		PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA }
+	},
+	{ PINMUX_DATA_REG("PCDR", 0xa4050124, 8) {
+		PTC7_DATA, PTC6_DATA, PTC5_DATA, PTC4_DATA,
+		PTC3_DATA, PTC2_DATA, PTC1_DATA, PTC0_DATA }
+	},
+	{ PINMUX_DATA_REG("PDDR", 0xa4050126, 8) {
+		PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA,
+		PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA }
+	},
+	{ PINMUX_DATA_REG("PEDR", 0xa4050128, 8) {
+		PTE7_DATA, PTE6_DATA, PTE5_DATA, PTE4_DATA,
+		PTE3_DATA, PTE2_DATA, PTE1_DATA, PTE0_DATA }
+	},
+	{ PINMUX_DATA_REG("PFDR", 0xa405012a, 8) {
+		PTF7_DATA, PTF6_DATA, PTF5_DATA, PTF4_DATA,
+		PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA }
+	},
+	{ PINMUX_DATA_REG("PGDR", 0xa405012c, 8) {
+		0,         0,         PTG5_DATA, PTG4_DATA,
+		PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA }
+	},
+	{ PINMUX_DATA_REG("PHDR", 0xa405012e, 8) {
+		PTH7_DATA, PTH6_DATA, PTH5_DATA, PTH4_DATA,
+		PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA }
+	},
+	{ PINMUX_DATA_REG("PJDR", 0xa4050130, 8) {
+		PTJ7_DATA, PTJ6_DATA, PTJ5_DATA, 0,
+		PTJ3_DATA, PTJ2_DATA, PTJ1_DATA, PTJ0_DATA }
+	},
+	{ PINMUX_DATA_REG("PKDR", 0xa4050132, 8) {
+		PTK7_DATA, PTK6_DATA, PTK5_DATA, PTK4_DATA,
+		PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA }
+	},
+	{ PINMUX_DATA_REG("PLDR", 0xa4050134, 8) {
+		PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA,
+		PTL3_DATA, PTL2_DATA, PTL1_DATA, PTL0_DATA }
+	},
+	{ PINMUX_DATA_REG("PMDR", 0xa4050136, 8) {
+		PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA,
+		PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA }
+	},
+	{ PINMUX_DATA_REG("PNDR", 0xa4050138, 8) {
+		PTN7_DATA, PTN6_DATA, PTN5_DATA, PTN4_DATA,
+		PTN3_DATA, PTN2_DATA, PTN1_DATA, PTN0_DATA }
+	},
+	{ PINMUX_DATA_REG("PQDR", 0xa405013a, 8) {
+		PTQ7_DATA, PTQ6_DATA, PTQ5_DATA, PTQ4_DATA,
+		PTQ3_DATA, PTQ2_DATA, PTQ1_DATA, PTQ0_DATA }
+	},
+	{ PINMUX_DATA_REG("PRDR", 0xa405013c, 8) {
+		PTR7_DATA, PTR6_DATA, PTR5_DATA, PTR4_DATA,
+		PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA }
+	},
+	{ PINMUX_DATA_REG("PSDR", 0xa405013e, 8) {
+		0,         PTS6_DATA, PTS5_DATA, PTS4_DATA,
+		PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA }
+	},
+	{ PINMUX_DATA_REG("PTDR", 0xa4050160, 8) {
+		PTT7_DATA, PTT6_DATA, PTT5_DATA, PTT4_DATA,
+		PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA }
+	},
+	{ PINMUX_DATA_REG("PUDR", 0xa4050162, 8) {
+		PTU7_DATA, PTU6_DATA, PTU5_DATA, PTU4_DATA,
+		PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA }
+	},
+	{ PINMUX_DATA_REG("PVDR", 0xa4050164, 8) {
+		PTV7_DATA, PTV6_DATA, PTV5_DATA, PTV4_DATA,
+		PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA }
+	},
+	{ PINMUX_DATA_REG("PWDR", 0xa4050166, 8) {
+		PTW7_DATA, PTW6_DATA, PTW5_DATA, PTW4_DATA,
+		PTW3_DATA, PTW2_DATA, PTW1_DATA, PTW0_DATA }
+	},
+	{ PINMUX_DATA_REG("PXDR", 0xa4050168, 8) {
+		PTX7_DATA, PTX6_DATA, PTX5_DATA, PTX4_DATA,
+		PTX3_DATA, PTX2_DATA, PTX1_DATA, PTX0_DATA }
+	},
+	{ PINMUX_DATA_REG("PYDR", 0xa405016a, 8) {
+		PTY7_DATA, PTY6_DATA, PTY5_DATA, PTY4_DATA,
+		PTY3_DATA, PTY2_DATA, PTY1_DATA, PTY0_DATA }
+	},
+	{ PINMUX_DATA_REG("PZDR", 0xa405016c, 8) {
+		PTZ7_DATA, PTZ6_DATA, PTZ5_DATA, PTZ4_DATA,
+		PTZ3_DATA, PTZ2_DATA, PTZ1_DATA, PTZ0_DATA }
+	},
+	{ },
+};
+
+static struct pinmux_info sh7724_pinmux_info = {
+	.name = "sh7724_pfc",
+	.reserved_id = PINMUX_RESERVED,
+	.data = { PINMUX_DATA_BEGIN, PINMUX_DATA_END },
+	.input = { PINMUX_INPUT_BEGIN, PINMUX_INPUT_END },
+	.input_pu = { PINMUX_INPUT_PULLUP_BEGIN, PINMUX_INPUT_PULLUP_END },
+	.output = { PINMUX_OUTPUT_BEGIN, PINMUX_OUTPUT_END },
+	.mark = { PINMUX_MARK_BEGIN, PINMUX_MARK_END },
+	.function = { PINMUX_FUNCTION_BEGIN, PINMUX_FUNCTION_END },
+
+	.first_gpio = GPIO_PTA7,
+	.last_gpio = GPIO_FN_INTC_IRQ0,
+
+	.gpios = pinmux_gpios,
+	.cfg_regs = pinmux_config_regs,
+	.data_regs = pinmux_data_regs,
+
+	.gpio_data = pinmux_data,
+	.gpio_data_size = ARRAY_SIZE(pinmux_data),
+};
+
+static int __init plat_pinmux_setup(void)
+{
+	return register_pinmux(&sh7724_pinmux_info);
+}
+arch_initcall(plat_pinmux_setup);
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
index c154938..6307e08 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
@@ -12,7 +12,7 @@
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
 #include <linux/uio_driver.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
 #include <asm/clock.h>
 
 static struct resource iic0_resources[] = {
@@ -141,7 +141,7 @@
 	.num_resources	= ARRAY_SIZE(jpu_resources),
 };
 
-static struct sh_cmt_config cmt_platform_data = {
+static struct sh_timer_config cmt_platform_data = {
 	.name = "CMT",
 	.channel_offset = 0x60,
 	.timer_bit = 5,
@@ -173,27 +173,123 @@
 	.num_resources	= ARRAY_SIZE(cmt_resources),
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "tmu0",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "tmu0",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "tmu0",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
 static struct plat_sci_port sci_platform_data[] = {
 	{
 		.mapbase	= 0xffe00000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 80, 80, 80, 80 },
+		.clk		= "scif0",
 	}, {
 		.mapbase	= 0xffe10000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 81, 81, 81, 81 },
+		.clk		= "scif1",
 	}, {
 		.mapbase	= 0xffe20000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 82, 82, 82, 82 },
+		.clk		= "scif2",
 	}, {
 		.mapbase	= 0xffe30000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 83, 83, 83, 83 },
+		.clk		= "scif3",
 	}, {
 		.flags = 0,
 	}
@@ -209,6 +305,9 @@
 
 static struct platform_device *sh7343_devices[] __initdata = {
 	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 	&iic0_device,
 	&iic1_device,
 	&sci_device,
@@ -219,12 +318,6 @@
 
 static int __init sh7343_devices_setup(void)
 {
-	clk_always_enable("uram0"); /* URAM */
-	clk_always_enable("xymem0"); /* XYMEM */
-	clk_always_enable("veu0"); /* VEU */
-	clk_always_enable("vpu0"); /* VPU */
-	clk_always_enable("jpu0"); /* JPU */
-
 	platform_resource_setup_memory(&vpu_device, "vpu", 1 << 20);
 	platform_resource_setup_memory(&veu_device, "veu", 2 << 20);
 	platform_resource_setup_memory(&jpu_device, "jpu", 2 << 20);
@@ -234,6 +327,19 @@
 }
 __initcall(sh7343_devices_setup);
 
+static struct platform_device *sh7343_early_devices[] __initdata = {
+	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7343_early_devices,
+				   ARRAY_SIZE(sh7343_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
index 93ecf8e..318516f 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
@@ -14,7 +14,7 @@
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
 #include <linux/uio_driver.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
 #include <asm/clock.h>
 
 static struct resource iic_resources[] = {
@@ -148,7 +148,7 @@
 	.num_resources	= ARRAY_SIZE(veu1_resources),
 };
 
-static struct sh_cmt_config cmt_platform_data = {
+static struct sh_timer_config cmt_platform_data = {
 	.name = "CMT",
 	.channel_offset = 0x60,
 	.timer_bit = 5,
@@ -180,12 +180,105 @@
 	.num_resources	= ARRAY_SIZE(cmt_resources),
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "tmu0",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "tmu0",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "tmu0",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
 static struct plat_sci_port sci_platform_data[] = {
 	{
 		.mapbase	= 0xffe00000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 80, 80, 80, 80 },
+		.clk		= "scif0",
 	}, {
 		.flags = 0,
 	}
@@ -201,6 +294,9 @@
 
 static struct platform_device *sh7366_devices[] __initdata = {
 	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 	&iic_device,
 	&sci_device,
 	&usb_host_device,
@@ -211,12 +307,6 @@
 
 static int __init sh7366_devices_setup(void)
 {
-	clk_always_enable("rsmem0"); /* RSMEM */
-	clk_always_enable("xymem0"); /* XYMEM */
-	clk_always_enable("veu1"); /* VEU-2 */
-	clk_always_enable("veu0"); /* VEU-1 */
-	clk_always_enable("vpu0"); /* VPU */
-
 	platform_resource_setup_memory(&vpu_device, "vpu", 2 << 20);
 	platform_resource_setup_memory(&veu0_device, "veu0", 2 << 20);
 	platform_resource_setup_memory(&veu1_device, "veu1", 2 << 20);
@@ -226,6 +316,19 @@
 }
 __initcall(sh7366_devices_setup);
 
+static struct platform_device *sh7366_early_devices[] __initdata = {
+	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7366_early_devices,
+				   ARRAY_SIZE(sh7366_early_devices));
+}
+
 enum {
 	UNUSED=0,
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index 406747f..ea524a2 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -13,7 +13,7 @@
 #include <linux/serial_sci.h>
 #include <linux/mm.h>
 #include <linux/uio_driver.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
 #include <asm/clock.h>
 #include <asm/mmzone.h>
 
@@ -177,13 +177,13 @@
 	.num_resources	= ARRAY_SIZE(jpu_resources),
 };
 
-static struct sh_cmt_config cmt_platform_data = {
+static struct sh_timer_config cmt_platform_data = {
 	.name = "CMT",
 	.channel_offset = 0x60,
 	.timer_bit = 5,
 	.clk = "cmt0",
 	.clockevent_rating = 125,
-	.clocksource_rating = 200,
+	.clocksource_rating = 125,
 };
 
 static struct resource cmt_resources[] = {
@@ -209,24 +209,119 @@
 	.num_resources	= ARRAY_SIZE(cmt_resources),
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "tmu0",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "tmu0",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "tmu0",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
 static struct plat_sci_port sci_platform_data[] = {
 	{
 		.mapbase	= 0xffe00000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 80, 80, 80, 80 },
+		.clk		= "scif0",
 	},
 	{
 		.mapbase	= 0xffe10000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 81, 81, 81, 81 },
+		.clk		= "scif1",
 	},
 	{
 		.mapbase	= 0xffe20000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 82, 82, 82, 82 },
+		.clk		= "scif2",
 	},
 	{
 		.flags = 0,
@@ -243,6 +338,9 @@
 
 static struct platform_device *sh7722_devices[] __initdata = {
 	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 	&rtc_device,
 	&usbf_device,
 	&iic_device,
@@ -254,12 +352,6 @@
 
 static int __init sh7722_devices_setup(void)
 {
-	clk_always_enable("uram0"); /* URAM */
-	clk_always_enable("xymem0"); /* XYMEM */
-	clk_always_enable("veu0"); /* VEU */
-	clk_always_enable("vpu0"); /* VPU */
-	clk_always_enable("jpu0"); /* JPU */
-
 	platform_resource_setup_memory(&vpu_device, "vpu", 1 << 20);
 	platform_resource_setup_memory(&veu_device, "veu", 2 << 20);
 	platform_resource_setup_memory(&jpu_device, "jpu", 2 << 20);
@@ -269,6 +361,19 @@
 }
 __initcall(sh7722_devices_setup);
 
+static struct platform_device *sh7722_early_devices[] __initdata = {
+	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7722_early_devices,
+				   ARRAY_SIZE(sh7722_early_devices));
+}
+
 enum {
 	UNUSED=0,
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
index a800466..d8f4a13 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
@@ -13,7 +13,8 @@
 #include <linux/mm.h>
 #include <linux/serial_sci.h>
 #include <linux/uio_driver.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
+#include <linux/io.h>
 #include <asm/clock.h>
 #include <asm/mmzone.h>
 
@@ -101,13 +102,13 @@
 	.num_resources	= ARRAY_SIZE(veu1_resources),
 };
 
-static struct sh_cmt_config cmt_platform_data = {
+static struct sh_timer_config cmt_platform_data = {
 	.name = "CMT",
 	.channel_offset = 0x60,
 	.timer_bit = 5,
 	.clk = "cmt0",
 	.clockevent_rating = 125,
-	.clocksource_rating = 200,
+	.clocksource_rating = 125,
 };
 
 static struct resource cmt_resources[] = {
@@ -133,37 +134,225 @@
 	.num_resources	= ARRAY_SIZE(cmt_resources),
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "tmu0",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "tmu0",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "tmu0",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "tmu1",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xffd90008,
+		.end	= 0xffd90013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 57,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "tmu1",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xffd90014,
+		.end	= 0xffd9001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 58,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+static struct sh_timer_config tmu5_platform_data = {
+	.name = "TMU5",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "tmu1",
+};
+
+static struct resource tmu5_resources[] = {
+	[0] = {
+		.name	= "TMU5",
+		.start	= 0xffd90020,
+		.end	= 0xffd9002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 57,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu5_device = {
+	.name		= "sh_tmu",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &tmu5_platform_data,
+	},
+	.resource	= tmu5_resources,
+	.num_resources	= ARRAY_SIZE(tmu5_resources),
+};
+
 static struct plat_sci_port sci_platform_data[] = {
 	{
 		.mapbase        = 0xffe00000,
 		.flags          = UPF_BOOT_AUTOCONF,
 		.type           = PORT_SCIF,
 		.irqs           = { 80, 80, 80, 80 },
+		.clk		= "scif0",
 	},{
 		.mapbase        = 0xffe10000,
 		.flags          = UPF_BOOT_AUTOCONF,
 		.type           = PORT_SCIF,
 		.irqs           = { 81, 81, 81, 81 },
+		.clk		= "scif1",
 	},{
 		.mapbase        = 0xffe20000,
 		.flags          = UPF_BOOT_AUTOCONF,
 		.type           = PORT_SCIF,
 		.irqs           = { 82, 82, 82, 82 },
+		.clk		= "scif2",
 	},{
 		.mapbase	= 0xa4e30000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIFA,
 		.irqs		= { 56, 56, 56, 56 },
+		.clk		= "scif3",
 	},{
 		.mapbase	= 0xa4e40000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIFA,
 		.irqs		= { 88, 88, 88, 88 },
+		.clk		= "scif4",
 	},{
 		.mapbase	= 0xa4e50000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIFA,
 		.irqs		= { 109, 109, 109, 109 },
+		.clk		= "scif5",
 	}, {
 		.flags = 0,
 	}
@@ -255,6 +444,12 @@
 
 static struct platform_device *sh7723_devices[] __initdata = {
 	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
 	&sci_device,
 	&rtc_device,
 	&iic_device,
@@ -266,11 +461,6 @@
 
 static int __init sh7723_devices_setup(void)
 {
-	clk_always_enable("meram0"); /* MERAM */
-	clk_always_enable("veu1"); /* VEU2H1 */
-	clk_always_enable("veu0"); /* VEU2H0 */
-	clk_always_enable("vpu0"); /* VPU */
-
 	platform_resource_setup_memory(&vpu_device, "vpu", 2 << 20);
 	platform_resource_setup_memory(&veu0_device, "veu0", 2 << 20);
 	platform_resource_setup_memory(&veu1_device, "veu1", 2 << 20);
@@ -280,6 +470,31 @@
 }
 __initcall(sh7723_devices_setup);
 
+static struct platform_device *sh7723_early_devices[] __initdata = {
+	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7723_early_devices,
+				   ARRAY_SIZE(sh7723_early_devices));
+}
+
+#define RAMCR_CACHE_L2FC	0x0002
+#define RAMCR_CACHE_L2E		0x0001
+#define L2_CACHE_ENABLE		(RAMCR_CACHE_L2E|RAMCR_CACHE_L2FC)
+void __uses_jump_to_uncached l2_cache_init(void)
+{
+	/* Enable L2 cache */
+	ctrl_outl(L2_CACHE_ENABLE, RAMCR);
+}
+
 enum {
 	UNUSED=0,
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
new file mode 100644
index 0000000..e5ac9eb
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -0,0 +1,786 @@
+/*
+ * SH7724 Setup
+ *
+ * Copyright (C) 2009 Renesas Solutions Corp.
+ *
+ * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * Based on SH7723 Setup
+ * Copyright (C) 2008  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/platform_device.h>
+#include <linux/init.h>
+#include <linux/serial.h>
+#include <linux/mm.h>
+#include <linux/serial_sci.h>
+#include <linux/uio_driver.h>
+#include <linux/sh_timer.h>
+#include <linux/io.h>
+#include <asm/clock.h>
+#include <asm/mmzone.h>
+
+/* Serial */
+static struct plat_sci_port sci_platform_data[] = {
+	{
+		.mapbase        = 0xffe00000,
+		.flags          = UPF_BOOT_AUTOCONF,
+		.type           = PORT_SCIF,
+		.irqs           = { 80, 80, 80, 80 },
+		.clk		= "scif0",
+	}, {
+		.mapbase        = 0xffe10000,
+		.flags          = UPF_BOOT_AUTOCONF,
+		.type           = PORT_SCIF,
+		.irqs           = { 81, 81, 81, 81 },
+		.clk		= "scif1",
+	}, {
+		.mapbase        = 0xffe20000,
+		.flags          = UPF_BOOT_AUTOCONF,
+		.type           = PORT_SCIF,
+		.irqs           = { 82, 82, 82, 82 },
+		.clk		= "scif2",
+	}, {
+		.mapbase	= 0xa4e30000,
+		.flags		= UPF_BOOT_AUTOCONF,
+		.type		= PORT_SCIFA,
+		.irqs		= { 56, 56, 56, 56 },
+		.clk		= "scif3",
+	}, {
+		.mapbase	= 0xa4e40000,
+		.flags		= UPF_BOOT_AUTOCONF,
+		.type		= PORT_SCIFA,
+		.irqs		= { 88, 88, 88, 88 },
+		.clk		= "scif4",
+	}, {
+		.mapbase	= 0xa4e50000,
+		.flags		= UPF_BOOT_AUTOCONF,
+		.type		= PORT_SCIFA,
+		.irqs		= { 109, 109, 109, 109 },
+		.clk		= "scif5",
+	}, {
+		.flags = 0,
+	}
+};
+
+static struct platform_device sci_device = {
+	.name		= "sh-sci",
+	.id		= -1,
+	.dev		= {
+		.platform_data	= sci_platform_data,
+	},
+};
+
+/* RTC */
+static struct resource rtc_resources[] = {
+	[0] = {
+		.start	= 0xa465fec0,
+		.end	= 0xa465fec0 + 0x58 - 1,
+		.flags	= IORESOURCE_IO,
+	},
+	[1] = {
+		/* Period IRQ */
+		.start	= 69,
+		.flags	= IORESOURCE_IRQ,
+	},
+	[2] = {
+		/* Carry IRQ */
+		.start	= 70,
+		.flags	= IORESOURCE_IRQ,
+	},
+	[3] = {
+		/* Alarm IRQ */
+		.start	= 68,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device rtc_device = {
+	.name		= "sh-rtc",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(rtc_resources),
+	.resource	= rtc_resources,
+};
+
+/* I2C0 */
+static struct resource iic0_resources[] = {
+	[0] = {
+		.name	= "IIC0",
+		.start  = 0x04470000,
+		.end    = 0x04470018 - 1,
+		.flags  = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = 96,
+		.end    = 99,
+		.flags  = IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device iic0_device = {
+	.name           = "i2c-sh_mobile",
+	.id             = 0, /* "i2c0" clock */
+	.num_resources  = ARRAY_SIZE(iic0_resources),
+	.resource       = iic0_resources,
+};
+
+/* I2C1 */
+static struct resource iic1_resources[] = {
+	[0] = {
+		.name	= "IIC1",
+		.start  = 0x04750000,
+		.end    = 0x04750018 - 1,
+		.flags  = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = 92,
+		.end    = 95,
+		.flags  = IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device iic1_device = {
+	.name           = "i2c-sh_mobile",
+	.id             = 1, /* "i2c1" clock */
+	.num_resources  = ARRAY_SIZE(iic1_resources),
+	.resource       = iic1_resources,
+};
+
+/* VPU */
+static struct uio_info vpu_platform_data = {
+	.name = "VPU5F",
+	.version = "0",
+	.irq = 60,
+};
+
+static struct resource vpu_resources[] = {
+	[0] = {
+		.name	= "VPU",
+		.start	= 0xfe900000,
+		.end	= 0xfe902807,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		/* place holder for contiguous memory */
+	},
+};
+
+static struct platform_device vpu_device = {
+	.name		= "uio_pdrv_genirq",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &vpu_platform_data,
+	},
+	.resource	= vpu_resources,
+	.num_resources	= ARRAY_SIZE(vpu_resources),
+};
+
+/* VEU0 */
+static struct uio_info veu0_platform_data = {
+	.name = "VEU3F0",
+	.version = "0",
+	.irq = 83,
+};
+
+static struct resource veu0_resources[] = {
+	[0] = {
+		.name	= "VEU3F0",
+		.start	= 0xfe920000,
+		.end	= 0xfe9200cb - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		/* place holder for contiguous memory */
+	},
+};
+
+static struct platform_device veu0_device = {
+	.name		= "uio_pdrv_genirq",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &veu0_platform_data,
+	},
+	.resource	= veu0_resources,
+	.num_resources	= ARRAY_SIZE(veu0_resources),
+};
+
+/* VEU1 */
+static struct uio_info veu1_platform_data = {
+	.name = "VEU3F1",
+	.version = "0",
+	.irq = 54,
+};
+
+static struct resource veu1_resources[] = {
+	[0] = {
+		.name	= "VEU3F1",
+		.start	= 0xfe924000,
+		.end	= 0xfe9240cb - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		/* place holder for contiguous memory */
+	},
+};
+
+static struct platform_device veu1_device = {
+	.name		= "uio_pdrv_genirq",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &veu1_platform_data,
+	},
+	.resource	= veu1_resources,
+	.num_resources	= ARRAY_SIZE(veu1_resources),
+};
+
+static struct sh_timer_config cmt_platform_data = {
+	.name = "CMT",
+	.channel_offset = 0x60,
+	.timer_bit = 5,
+	.clk = "cmt0",
+	.clockevent_rating = 125,
+	.clocksource_rating = 200,
+};
+
+static struct resource cmt_resources[] = {
+	[0] = {
+		.name	= "CMT",
+		.start	= 0x044a0060,
+		.end	= 0x044a006b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 104,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt_device = {
+	.name		= "sh_cmt",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &cmt_platform_data,
+	},
+	.resource	= cmt_resources,
+	.num_resources	= ARRAY_SIZE(cmt_resources),
+};
+
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "tmu0",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "tmu0",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "tmu0",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "tmu1",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xffd90008,
+		.end	= 0xffd90013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 57,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "tmu1",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xffd90014,
+		.end	= 0xffd9001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 58,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+static struct sh_timer_config tmu5_platform_data = {
+	.name = "TMU5",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "tmu1",
+};
+
+static struct resource tmu5_resources[] = {
+	[0] = {
+		.name	= "TMU5",
+		.start	= 0xffd90020,
+		.end	= 0xffd9002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 57,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu5_device = {
+	.name		= "sh_tmu",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &tmu5_platform_data,
+	},
+	.resource	= tmu5_resources,
+	.num_resources	= ARRAY_SIZE(tmu5_resources),
+};
+
+/* JPU */
+static struct uio_info jpu_platform_data = {
+	.name = "JPU",
+	.version = "0",
+	.irq = 27,
+};
+
+static struct resource jpu_resources[] = {
+	[0] = {
+		.name	= "JPU",
+		.start	= 0xfe980000,
+		.end	= 0xfe9902d3,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		/* place holder for contiguous memory */
+	},
+};
+
+static struct platform_device jpu_device = {
+	.name		= "uio_pdrv_genirq",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &jpu_platform_data,
+	},
+	.resource	= jpu_resources,
+	.num_resources	= ARRAY_SIZE(jpu_resources),
+};
+
+static struct platform_device *sh7724_devices[] __initdata = {
+	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+	&sci_device,
+	&rtc_device,
+	&iic0_device,
+	&iic1_device,
+	&vpu_device,
+	&veu0_device,
+	&veu1_device,
+	&jpu_device,
+};
+
+static int __init sh7724_devices_setup(void)
+{
+	platform_resource_setup_memory(&vpu_device, "vpu", 2 << 20);
+	platform_resource_setup_memory(&veu0_device, "veu0", 2 << 20);
+	platform_resource_setup_memory(&veu1_device, "veu1", 2 << 20);
+	platform_resource_setup_memory(&jpu_device,  "jpu",  2 << 20);
+
+	return platform_add_devices(sh7724_devices,
+				    ARRAY_SIZE(sh7724_devices));
+}
+device_initcall(sh7724_devices_setup);
+
+static struct platform_device *sh7724_early_devices[] __initdata = {
+	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7724_early_devices,
+				   ARRAY_SIZE(sh7724_early_devices));
+}
+
+#define RAMCR_CACHE_L2FC	0x0002
+#define RAMCR_CACHE_L2E		0x0001
+#define L2_CACHE_ENABLE		(RAMCR_CACHE_L2E|RAMCR_CACHE_L2FC)
+void __uses_jump_to_uncached l2_cache_init(void)
+{
+	/* Enable L2 cache */
+	ctrl_outl(L2_CACHE_ENABLE, RAMCR);
+}
+
+enum {
+	UNUSED = 0,
+
+	/* interrupt sources */
+	IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7,
+	HUDI,
+	DMAC1A_DEI0, DMAC1A_DEI1, DMAC1A_DEI2, DMAC1A_DEI3,
+	_2DG_TRI, _2DG_INI, _2DG_CEI,
+	DMAC0A_DEI0, DMAC0A_DEI1, DMAC0A_DEI2, DMAC0A_DEI3,
+	VIO_CEU0, VIO_BEU0, VIO_VEU1, VIO_VOU,
+	SCIFA3,
+	VPU,
+	TPU,
+	CEU1,
+	BEU1,
+	USB0, USB1,
+	ATAPI,
+	RTC_ATI, RTC_PRI, RTC_CUI,
+	DMAC1B_DEI4, DMAC1B_DEI5, DMAC1B_DADERR,
+	DMAC0B_DEI4, DMAC0B_DEI5, DMAC0B_DADERR,
+	KEYSC,
+	SCIF_SCIF0, SCIF_SCIF1, SCIF_SCIF2,
+	VEU0,
+	MSIOF_MSIOFI0, MSIOF_MSIOFI1,
+	SPU_SPUI0, SPU_SPUI1,
+	SCIFA4,
+	ICB,
+	ETHI,
+	I2C1_ALI, I2C1_TACKI, I2C1_WAITI, I2C1_DTEI,
+	I2C0_ALI, I2C0_TACKI, I2C0_WAITI, I2C0_DTEI,
+	SDHI0_SDHII0, SDHI0_SDHII1, SDHI0_SDHII2, SDHI0_SDHII3,
+	CMT,
+	TSIF,
+	FSI,
+	SCIFA5,
+	TMU0_TUNI0, TMU0_TUNI1, TMU0_TUNI2,
+	IRDA,
+	SDHI1_SDHII0, SDHI1_SDHII1, SDHI1_SDHII2,
+	JPU,
+	_2DDMAC,
+	MMC_MMC2I, MMC_MMC3I,
+	LCDC,
+	TMU1_TUNI0, TMU1_TUNI1, TMU1_TUNI2,
+
+	/* interrupt groups */
+	DMAC1A, _2DG, DMAC0A, VIO, USB, RTC,
+	DMAC1B, DMAC0B, I2C0, I2C1, SDHI0, SDHI1, SPU, MMCIF,
+};
+
+static struct intc_vect vectors[] __initdata = {
+	INTC_VECT(IRQ0, 0x600), INTC_VECT(IRQ1, 0x620),
+	INTC_VECT(IRQ2, 0x640), INTC_VECT(IRQ3, 0x660),
+	INTC_VECT(IRQ4, 0x680), INTC_VECT(IRQ5, 0x6a0),
+	INTC_VECT(IRQ6, 0x6c0), INTC_VECT(IRQ7, 0x6e0),
+
+	INTC_VECT(DMAC1A_DEI0, 0x700),
+	INTC_VECT(DMAC1A_DEI1, 0x720),
+	INTC_VECT(DMAC1A_DEI2, 0x740),
+	INTC_VECT(DMAC1A_DEI3, 0x760),
+
+	INTC_VECT(_2DG_TRI, 0x780),
+	INTC_VECT(_2DG_INI, 0x7A0),
+	INTC_VECT(_2DG_CEI, 0x7C0),
+
+	INTC_VECT(DMAC0A_DEI0, 0x800),
+	INTC_VECT(DMAC0A_DEI1, 0x820),
+	INTC_VECT(DMAC0A_DEI2, 0x840),
+	INTC_VECT(DMAC0A_DEI3, 0x860),
+
+	INTC_VECT(VIO_CEU0, 0x880),
+	INTC_VECT(VIO_BEU0, 0x8A0),
+	INTC_VECT(VIO_VEU1, 0x8C0),
+	INTC_VECT(VIO_VOU,  0x8E0),
+
+	INTC_VECT(SCIFA3, 0x900),
+	INTC_VECT(VPU,    0x980),
+	INTC_VECT(TPU,    0x9A0),
+	INTC_VECT(CEU1,   0x9E0),
+	INTC_VECT(BEU1,   0xA00),
+	INTC_VECT(USB0,   0xA20),
+	INTC_VECT(USB1,   0xA40),
+	INTC_VECT(ATAPI,  0xA60),
+
+	INTC_VECT(RTC_ATI, 0xA80),
+	INTC_VECT(RTC_PRI, 0xAA0),
+	INTC_VECT(RTC_CUI, 0xAC0),
+
+	INTC_VECT(DMAC1B_DEI4, 0xB00),
+	INTC_VECT(DMAC1B_DEI5, 0xB20),
+	INTC_VECT(DMAC1B_DADERR, 0xB40),
+
+	INTC_VECT(DMAC0B_DEI4, 0xB80),
+	INTC_VECT(DMAC0B_DEI5, 0xBA0),
+	INTC_VECT(DMAC0B_DADERR, 0xBC0),
+
+	INTC_VECT(KEYSC,      0xBE0),
+	INTC_VECT(SCIF_SCIF0, 0xC00),
+	INTC_VECT(SCIF_SCIF1, 0xC20),
+	INTC_VECT(SCIF_SCIF2, 0xC40),
+	INTC_VECT(VEU0,       0xC60),
+	INTC_VECT(MSIOF_MSIOFI0, 0xC80),
+	INTC_VECT(MSIOF_MSIOFI1, 0xCA0),
+	INTC_VECT(SPU_SPUI0, 0xCC0),
+	INTC_VECT(SPU_SPUI1, 0xCE0),
+	INTC_VECT(SCIFA4,    0xD00),
+
+	INTC_VECT(ICB,  0xD20),
+	INTC_VECT(ETHI, 0xD60),
+
+	INTC_VECT(I2C1_ALI, 0xD80),
+	INTC_VECT(I2C1_TACKI, 0xDA0),
+	INTC_VECT(I2C1_WAITI, 0xDC0),
+	INTC_VECT(I2C1_DTEI, 0xDE0),
+
+	INTC_VECT(I2C0_ALI, 0xE00),
+	INTC_VECT(I2C0_TACKI, 0xE20),
+	INTC_VECT(I2C0_WAITI, 0xE40),
+	INTC_VECT(I2C0_DTEI, 0xE60),
+
+	INTC_VECT(SDHI0_SDHII0, 0xE80),
+	INTC_VECT(SDHI0_SDHII1, 0xEA0),
+	INTC_VECT(SDHI0_SDHII2, 0xEC0),
+	INTC_VECT(SDHI0_SDHII3, 0xEE0),
+
+	INTC_VECT(CMT,    0xF00),
+	INTC_VECT(TSIF,   0xF20),
+	INTC_VECT(FSI,    0xF80),
+	INTC_VECT(SCIFA5, 0xFA0),
+
+	INTC_VECT(TMU0_TUNI0, 0x400),
+	INTC_VECT(TMU0_TUNI1, 0x420),
+	INTC_VECT(TMU0_TUNI2, 0x440),
+
+	INTC_VECT(IRDA,    0x480),
+
+	INTC_VECT(SDHI1_SDHII0, 0x4E0),
+	INTC_VECT(SDHI1_SDHII1, 0x500),
+	INTC_VECT(SDHI1_SDHII2, 0x520),
+
+	INTC_VECT(JPU, 0x560),
+	INTC_VECT(_2DDMAC, 0x4A0),
+
+	INTC_VECT(MMC_MMC2I, 0x5A0),
+	INTC_VECT(MMC_MMC3I, 0x5C0),
+
+	INTC_VECT(LCDC, 0xF40),
+
+	INTC_VECT(TMU1_TUNI0, 0x920),
+	INTC_VECT(TMU1_TUNI1, 0x940),
+	INTC_VECT(TMU1_TUNI2, 0x960),
+};
+
+static struct intc_group groups[] __initdata = {
+	INTC_GROUP(DMAC1A, DMAC1A_DEI0, DMAC1A_DEI1, DMAC1A_DEI2, DMAC1A_DEI3),
+	INTC_GROUP(_2DG, _2DG_TRI, _2DG_INI, _2DG_CEI),
+	INTC_GROUP(DMAC0A, DMAC0A_DEI0, DMAC0A_DEI1, DMAC0A_DEI2, DMAC0A_DEI3),
+	INTC_GROUP(VIO, VIO_CEU0, VIO_BEU0, VIO_VEU1, VIO_VOU),
+	INTC_GROUP(USB, USB0, USB1),
+	INTC_GROUP(RTC, RTC_ATI, RTC_PRI, RTC_CUI),
+	INTC_GROUP(DMAC1B, DMAC1B_DEI4, DMAC1B_DEI5, DMAC1B_DADERR),
+	INTC_GROUP(DMAC0B, DMAC0B_DEI4, DMAC0B_DEI5, DMAC0B_DADERR),
+	INTC_GROUP(I2C0, I2C0_ALI, I2C0_TACKI, I2C0_WAITI, I2C0_DTEI),
+	INTC_GROUP(I2C1, I2C1_ALI, I2C1_TACKI, I2C1_WAITI, I2C1_DTEI),
+	INTC_GROUP(SDHI0, SDHI0_SDHII0, SDHI0_SDHII1, SDHI0_SDHII2, SDHI0_SDHII3),
+	INTC_GROUP(SDHI1, SDHI1_SDHII0, SDHI1_SDHII1, SDHI1_SDHII2),
+	INTC_GROUP(SPU, SPU_SPUI0, SPU_SPUI1),
+	INTC_GROUP(MMCIF, MMC_MMC2I, MMC_MMC3I),
+};
+
+static struct intc_mask_reg mask_registers[] __initdata = {
+	{ 0xa4080080, 0xa40800c0, 8, /* IMR0 / IMCR0 */
+	  { 0, TMU1_TUNI2, TMU1_TUNI1, TMU1_TUNI0,
+	    0, SDHI1_SDHII2, SDHI1_SDHII1, SDHI1_SDHII0 } },
+	{ 0xa4080084, 0xa40800c4, 8, /* IMR1 / IMCR1 */
+	  { VIO_VOU, VIO_VEU1, VIO_BEU0, VIO_CEU0,
+	    DMAC0A_DEI3, DMAC0A_DEI2, DMAC0A_DEI1, DMAC0A_DEI0 } },
+	{ 0xa4080088, 0xa40800c8, 8, /* IMR2 / IMCR2 */
+	  { 0, 0, 0, VPU, ATAPI, ETHI, 0, SCIFA3 } },
+	{ 0xa408008c, 0xa40800cc, 8, /* IMR3 / IMCR3 */
+	  { DMAC1A_DEI3, DMAC1A_DEI2, DMAC1A_DEI1, DMAC1A_DEI0,
+	    SPU_SPUI1, SPU_SPUI0, BEU1, IRDA } },
+	{ 0xa4080090, 0xa40800d0, 8, /* IMR4 / IMCR4 */
+	  { 0, TMU0_TUNI2, TMU0_TUNI1, TMU0_TUNI0,
+	    JPU, 0, 0, LCDC } },
+	{ 0xa4080094, 0xa40800d4, 8, /* IMR5 / IMCR5 */
+	  { KEYSC, DMAC0B_DADERR, DMAC0B_DEI5, DMAC0B_DEI4,
+	    VEU0, SCIF_SCIF2, SCIF_SCIF1, SCIF_SCIF0 } },
+	{ 0xa4080098, 0xa40800d8, 8, /* IMR6 / IMCR6 */
+	  { 0, 0, ICB, SCIFA4,
+	    CEU1, 0, MSIOF_MSIOFI1, MSIOF_MSIOFI0 } },
+	{ 0xa408009c, 0xa40800dc, 8, /* IMR7 / IMCR7 */
+	  { I2C0_DTEI, I2C0_WAITI, I2C0_TACKI, I2C0_ALI,
+	    I2C1_DTEI, I2C1_WAITI, I2C1_TACKI, I2C1_ALI } },
+	{ 0xa40800a0, 0xa40800e0, 8, /* IMR8 / IMCR8 */
+	  { SDHI0_SDHII3, SDHI0_SDHII2, SDHI0_SDHII1, SDHI0_SDHII0,
+	    0, 0, SCIFA5, FSI } },
+	{ 0xa40800a4, 0xa40800e4, 8, /* IMR9 / IMCR9 */
+	  { 0, 0, 0, CMT, 0, USB1, USB0, 0 } },
+	{ 0xa40800a8, 0xa40800e8, 8, /* IMR10 / IMCR10 */
+	  { 0, DMAC1B_DADERR, DMAC1B_DEI5, DMAC1B_DEI4,
+	    0, RTC_CUI, RTC_PRI, RTC_ATI } },
+	{ 0xa40800ac, 0xa40800ec, 8, /* IMR11 / IMCR11 */
+	  { 0, _2DG_CEI, _2DG_INI, _2DG_TRI,
+	    0, TPU, 0, TSIF } },
+	{ 0xa40800b0, 0xa40800f0, 8, /* IMR12 / IMCR12 */
+	  { 0, 0, MMC_MMC3I, MMC_MMC2I, 0, 0, 0, _2DDMAC } },
+	{ 0xa4140044, 0xa4140064, 8, /* INTMSK00 / INTMSKCLR00 */
+	  { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 } },
+};
+
+static struct intc_prio_reg prio_registers[] __initdata = {
+	{ 0xa4080000, 0, 16, 4, /* IPRA */ { TMU0_TUNI0, TMU0_TUNI1,
+					     TMU0_TUNI2, IRDA } },
+	{ 0xa4080004, 0, 16, 4, /* IPRB */ { JPU, LCDC, DMAC1A, BEU1 } },
+	{ 0xa4080008, 0, 16, 4, /* IPRC */ { TMU1_TUNI0, TMU1_TUNI1,
+					     TMU1_TUNI2, SPU } },
+	{ 0xa408000c, 0, 16, 4, /* IPRD */ { 0, MMCIF, 0, ATAPI } },
+	{ 0xa4080010, 0, 16, 4, /* IPRE */ { DMAC0A, VIO, SCIFA3, VPU } },
+	{ 0xa4080014, 0, 16, 4, /* IPRF */ { KEYSC, DMAC0B, USB, CMT } },
+	{ 0xa4080018, 0, 16, 4, /* IPRG */ { SCIF_SCIF0, SCIF_SCIF1,
+					     SCIF_SCIF2, VEU0 } },
+	{ 0xa408001c, 0, 16, 4, /* IPRH */ { MSIOF_MSIOFI0, MSIOF_MSIOFI1,
+					     I2C1, I2C0 } },
+	{ 0xa4080020, 0, 16, 4, /* IPRI */ { SCIFA4, ICB, TSIF, _2DG } },
+	{ 0xa4080024, 0, 16, 4, /* IPRJ */ { CEU1, ETHI, FSI, SDHI1 } },
+	{ 0xa4080028, 0, 16, 4, /* IPRK */ { RTC, DMAC1B, 0, SDHI0 } },
+	{ 0xa408002c, 0, 16, 4, /* IPRL */ { SCIFA5, 0, TPU, _2DDMAC } },
+	{ 0xa4140010, 0, 32, 4, /* INTPRI00 */
+	  { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 } },
+};
+
+static struct intc_sense_reg sense_registers[] __initdata = {
+	{ 0xa414001c, 16, 2, /* ICR1 */
+	  { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 } },
+};
+
+static struct intc_mask_reg ack_registers[] __initdata = {
+	{ 0xa4140024, 0, 8, /* INTREQ00 */
+	  { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 } },
+};
+
+static DECLARE_INTC_DESC_ACK(intc_desc, "sh7724", vectors, groups,
+			     mask_registers, prio_registers, sense_registers,
+			     ack_registers);
+
+void __init plat_irq_setup(void)
+{
+	register_intc_controller(&intc_desc);
+}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
index bdf0f61..f1e0c0d 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
@@ -12,6 +12,7 @@
 #include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/serial.h>
+#include <linux/sh_timer.h>
 #include <linux/io.h>
 #include <linux/serial_sci.h>
 
@@ -113,7 +114,195 @@
 	.resource	= usbf_resources,
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 28,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 29,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 30,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xffd88008,
+		.end	= 0xffd88013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 96,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xffd88014,
+		.end	= 0xffd8801f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 97,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+static struct sh_timer_config tmu5_platform_data = {
+	.name = "TMU5",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu5_resources[] = {
+	[0] = {
+		.name	= "TMU5",
+		.start	= 0xffd88020,
+		.end	= 0xffd8802b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 98,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu5_device = {
+	.name		= "sh_tmu",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &tmu5_platform_data,
+	},
+	.resource	= tmu5_resources,
+	.num_resources	= ARRAY_SIZE(tmu5_resources),
+};
+
 static struct platform_device *sh7763_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
 	&rtc_device,
 	&sci_device,
 	&usb_ohci_device,
@@ -127,6 +316,21 @@
 }
 __initcall(sh7763_devices_setup);
 
+static struct platform_device *sh7763_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7763_early_devices,
+				   ARRAY_SIZE(sh7763_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
index b73578e..1e86209 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
@@ -11,6 +11,8 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
+#include <linux/io.h>
 
 static struct plat_sci_port sci_platform_data[] = {
 	{
@@ -76,7 +78,288 @@
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xffd81008,
+		.end	= 0xffd81013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 19,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xffd81014,
+		.end	= 0xffd8101f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 20,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+static struct sh_timer_config tmu5_platform_data = {
+	.name = "TMU5",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu5_resources[] = {
+	[0] = {
+		.name	= "TMU5",
+		.start	= 0xffd81020,
+		.end	= 0xffd8102f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 21,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu5_device = {
+	.name		= "sh_tmu",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &tmu5_platform_data,
+	},
+	.resource	= tmu5_resources,
+	.num_resources	= ARRAY_SIZE(tmu5_resources),
+};
+
+static struct sh_timer_config tmu6_platform_data = {
+	.name = "TMU6",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu6_resources[] = {
+	[0] = {
+		.name	= "TMU6",
+		.start	= 0xffd82008,
+		.end	= 0xffd82013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 22,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu6_device = {
+	.name		= "sh_tmu",
+	.id		= 6,
+	.dev = {
+		.platform_data	= &tmu6_platform_data,
+	},
+	.resource	= tmu6_resources,
+	.num_resources	= ARRAY_SIZE(tmu6_resources),
+};
+
+static struct sh_timer_config tmu7_platform_data = {
+	.name = "TMU7",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu7_resources[] = {
+	[0] = {
+		.name	= "TMU7",
+		.start	= 0xffd82014,
+		.end	= 0xffd8201f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 23,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu7_device = {
+	.name		= "sh_tmu",
+	.id		= 7,
+	.dev = {
+		.platform_data	= &tmu7_platform_data,
+	},
+	.resource	= tmu7_resources,
+	.num_resources	= ARRAY_SIZE(tmu7_resources),
+};
+
+static struct sh_timer_config tmu8_platform_data = {
+	.name = "TMU8",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu8_resources[] = {
+	[0] = {
+		.name	= "TMU8",
+		.start	= 0xffd82020,
+		.end	= 0xffd8202b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 24,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu8_device = {
+	.name		= "sh_tmu",
+	.id		= 8,
+	.dev = {
+		.platform_data	= &tmu8_platform_data,
+	},
+	.resource	= tmu8_resources,
+	.num_resources	= ARRAY_SIZE(tmu8_resources),
+};
+
 static struct platform_device *sh7770_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+	&tmu6_device,
+	&tmu7_device,
+	&tmu8_device,
 	&sci_device,
 };
 
@@ -87,6 +370,269 @@
 }
 __initcall(sh7770_devices_setup);
 
+static struct platform_device *sh7770_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+	&tmu6_device,
+	&tmu7_device,
+	&tmu8_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7770_early_devices,
+				   ARRAY_SIZE(sh7770_early_devices));
+}
+
+enum {
+	UNUSED = 0,
+
+	/* interrupt sources */
+	IRL_LLLL, IRL_LLLH, IRL_LLHL, IRL_LLHH,
+	IRL_LHLL, IRL_LHLH, IRL_LHHL, IRL_LHHH,
+	IRL_HLLL, IRL_HLLH, IRL_HLHL, IRL_HLHH,
+	IRL_HHLL, IRL_HHLH, IRL_HHHL,
+
+	IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5,
+
+	GPIO,
+	TMU0, TMU1, TMU2, TMU2_TICPI,
+	TMU3, TMU4, TMU5, TMU5_TICPI,
+	TMU6, TMU7, TMU8,
+	HAC, IPI, SPDIF, HUDI, I2C,
+	DMAC0_DMINT0, DMAC0_DMINT1, DMAC0_DMINT2,
+	I2S0, I2S1, I2S2, I2S3,
+	SRC_RX, SRC_TX, SRC_SPDIF,
+	DU, VIDEO_IN, REMOTE, YUV, USB, ATAPI, CAN, GPS, GFX2D,
+	GFX3D_MBX, GFX3D_DMAC,
+	EXBUS_ATA,
+	SPI0, SPI1,
+	SCIF089, SCIF1234, SCIF567,
+	ADC,
+	BBDMAC_0_3, BBDMAC_4_7, BBDMAC_8_10, BBDMAC_11_14,
+	BBDMAC_15_18, BBDMAC_19_22, BBDMAC_23_26, BBDMAC_27,
+	BBDMAC_28, BBDMAC_29, BBDMAC_30, BBDMAC_31,
+
+	/* interrupt groups */
+	TMU, DMAC, I2S, SRC, GFX3D, SPI, SCIF, BBDMAC,
+};
+
+static struct intc_vect vectors[] __initdata = {
+	INTC_VECT(GPIO, 0x3e0),
+	INTC_VECT(TMU0, 0x400), INTC_VECT(TMU1, 0x420),
+	INTC_VECT(TMU2, 0x440), INTC_VECT(TMU2_TICPI, 0x460),
+	INTC_VECT(TMU3, 0x480), INTC_VECT(TMU4, 0x4a0),
+	INTC_VECT(TMU5, 0x4c0), INTC_VECT(TMU5_TICPI, 0x4e0),
+	INTC_VECT(TMU6, 0x500), INTC_VECT(TMU7, 0x520),
+	INTC_VECT(TMU8, 0x540),
+	INTC_VECT(HAC, 0x580), INTC_VECT(IPI, 0x5c0),
+	INTC_VECT(SPDIF, 0x5e0),
+	INTC_VECT(HUDI, 0x600), INTC_VECT(I2C, 0x620),
+	INTC_VECT(DMAC0_DMINT0, 0x640), INTC_VECT(DMAC0_DMINT1, 0x660),
+	INTC_VECT(DMAC0_DMINT2, 0x680),
+	INTC_VECT(I2S0, 0x6a0), INTC_VECT(I2S1, 0x6c0),
+	INTC_VECT(I2S2, 0x6e0), INTC_VECT(I2S3, 0x700),
+	INTC_VECT(SRC_RX, 0x720), INTC_VECT(SRC_TX, 0x740),
+	INTC_VECT(SRC_SPDIF, 0x760),
+	INTC_VECT(DU, 0x780), INTC_VECT(VIDEO_IN, 0x7a0),
+	INTC_VECT(REMOTE, 0x7c0), INTC_VECT(YUV, 0x7e0),
+	INTC_VECT(USB, 0x840), INTC_VECT(ATAPI, 0x860),
+	INTC_VECT(CAN, 0x880), INTC_VECT(GPS, 0x8a0),
+	INTC_VECT(GFX2D, 0x8c0),
+	INTC_VECT(GFX3D_MBX, 0x900), INTC_VECT(GFX3D_DMAC, 0x920),
+	INTC_VECT(EXBUS_ATA, 0x940),
+	INTC_VECT(SPI0, 0x960), INTC_VECT(SPI1, 0x980),
+	INTC_VECT(SCIF089, 0x9a0), INTC_VECT(SCIF1234, 0x9c0),
+	INTC_VECT(SCIF1234, 0x9e0), INTC_VECT(SCIF1234, 0xa00),
+	INTC_VECT(SCIF1234, 0xa20), INTC_VECT(SCIF567, 0xa40),
+	INTC_VECT(SCIF567, 0xa60), INTC_VECT(SCIF567, 0xa80),
+	INTC_VECT(SCIF089, 0xaa0), INTC_VECT(SCIF089, 0xac0),
+	INTC_VECT(ADC, 0xb20),
+	INTC_VECT(BBDMAC_0_3, 0xba0), INTC_VECT(BBDMAC_0_3, 0xbc0),
+	INTC_VECT(BBDMAC_0_3, 0xbe0), INTC_VECT(BBDMAC_0_3, 0xc00),
+	INTC_VECT(BBDMAC_4_7, 0xc20), INTC_VECT(BBDMAC_4_7, 0xc40),
+	INTC_VECT(BBDMAC_4_7, 0xc60), INTC_VECT(BBDMAC_4_7, 0xc80),
+	INTC_VECT(BBDMAC_8_10, 0xca0), INTC_VECT(BBDMAC_8_10, 0xcc0),
+	INTC_VECT(BBDMAC_8_10, 0xce0), INTC_VECT(BBDMAC_11_14, 0xd00),
+	INTC_VECT(BBDMAC_11_14, 0xd20), INTC_VECT(BBDMAC_11_14, 0xd40),
+	INTC_VECT(BBDMAC_11_14, 0xd60), INTC_VECT(BBDMAC_15_18, 0xd80),
+	INTC_VECT(BBDMAC_15_18, 0xda0), INTC_VECT(BBDMAC_15_18, 0xdc0),
+	INTC_VECT(BBDMAC_15_18, 0xde0), INTC_VECT(BBDMAC_19_22, 0xe00),
+	INTC_VECT(BBDMAC_19_22, 0xe20), INTC_VECT(BBDMAC_19_22, 0xe40),
+	INTC_VECT(BBDMAC_19_22, 0xe60), INTC_VECT(BBDMAC_23_26, 0xe80),
+	INTC_VECT(BBDMAC_23_26, 0xea0), INTC_VECT(BBDMAC_23_26, 0xec0),
+	INTC_VECT(BBDMAC_23_26, 0xee0), INTC_VECT(BBDMAC_27, 0xf00),
+	INTC_VECT(BBDMAC_28, 0xf20), INTC_VECT(BBDMAC_29, 0xf40),
+	INTC_VECT(BBDMAC_30, 0xf60), INTC_VECT(BBDMAC_31, 0xf80),
+};
+
+static struct intc_group groups[] __initdata = {
+	INTC_GROUP(TMU, TMU0, TMU1, TMU2, TMU2_TICPI, TMU3, TMU4, TMU5,
+		   TMU5_TICPI, TMU6, TMU7, TMU8),
+	INTC_GROUP(DMAC, DMAC0_DMINT0, DMAC0_DMINT1, DMAC0_DMINT2),
+	INTC_GROUP(I2S, I2S0, I2S1, I2S2, I2S3),
+	INTC_GROUP(SRC, SRC_RX, SRC_TX, SRC_SPDIF),
+	INTC_GROUP(GFX3D, GFX3D_MBX, GFX3D_DMAC),
+	INTC_GROUP(SPI, SPI0, SPI1),
+	INTC_GROUP(SCIF, SCIF089, SCIF1234, SCIF567),
+	INTC_GROUP(BBDMAC,
+		   BBDMAC_0_3, BBDMAC_4_7, BBDMAC_8_10, BBDMAC_11_14,
+		   BBDMAC_15_18, BBDMAC_19_22, BBDMAC_23_26, BBDMAC_27,
+		   BBDMAC_28, BBDMAC_29, BBDMAC_30, BBDMAC_31),
+};
+
+static struct intc_mask_reg mask_registers[] __initdata = {
+	{ 0xffe00040, 0xffe00044, 32, /* INT2MSKR / INT2MSKCR */
+	  { 0, BBDMAC, ADC, SCIF, SPI, EXBUS_ATA, GFX3D, GFX2D,
+	    GPS, CAN, ATAPI, USB, YUV, REMOTE, VIDEO_IN, DU, SRC, I2S,
+	    DMAC, I2C, HUDI, SPDIF, IPI, HAC, TMU, GPIO } },
+};
+
+static struct intc_prio_reg prio_registers[] __initdata = {
+	{ 0xffe00000, 0, 32, 8, /* INT2PRI0 */ { GPIO, TMU0, 0, HAC } },
+	{ 0xffe00004, 0, 32, 8, /* INT2PRI1 */ { IPI, SPDIF, HUDI, I2C } },
+	{ 0xffe00008, 0, 32, 8, /* INT2PRI2 */ { DMAC, I2S, SRC, DU } },
+	{ 0xffe0000c, 0, 32, 8, /* INT2PRI3 */ { VIDEO_IN, REMOTE, YUV, USB } },
+	{ 0xffe00010, 0, 32, 8, /* INT2PRI4 */ { ATAPI, CAN, GPS, GFX2D } },
+	{ 0xffe00014, 0, 32, 8, /* INT2PRI5 */ { 0, GFX3D, EXBUS_ATA, SPI } },
+	{ 0xffe00018, 0, 32, 8, /* INT2PRI6 */ { SCIF1234, SCIF567, SCIF089 } },
+	{ 0xffe0001c, 0, 32, 8, /* INT2PRI7 */ { ADC, 0, 0, BBDMAC_0_3 } },
+	{ 0xffe00020, 0, 32, 8, /* INT2PRI8 */
+	  { BBDMAC_4_7, BBDMAC_8_10, BBDMAC_11_14, BBDMAC_15_18 } },
+	{ 0xffe00024, 0, 32, 8, /* INT2PRI9 */
+	  { BBDMAC_19_22, BBDMAC_23_26, BBDMAC_27, BBDMAC_28 } },
+	{ 0xffe00028, 0, 32, 8, /* INT2PRI10 */
+	  { BBDMAC_29, BBDMAC_30, BBDMAC_31 } },
+	{ 0xffe0002c, 0, 32, 8, /* INT2PRI11 */
+	  { TMU1, TMU2, TMU2_TICPI, TMU3 } },
+	{ 0xffe00030, 0, 32, 8, /* INT2PRI12 */
+	  { TMU4, TMU5, TMU5_TICPI, TMU6 } },
+	{ 0xffe00034, 0, 32, 8, /* INT2PRI13 */
+	  { TMU7, TMU8 } },
+};
+
+static DECLARE_INTC_DESC(intc_desc, "sh7770", vectors, groups,
+			 mask_registers, prio_registers, NULL);
+
+/* Support for external interrupt pins in IRQ mode */
+static struct intc_vect irq_vectors[] __initdata = {
+	INTC_VECT(IRQ0, 0x240), INTC_VECT(IRQ1, 0x280),
+	INTC_VECT(IRQ2, 0x2c0), INTC_VECT(IRQ3, 0x300),
+	INTC_VECT(IRQ4, 0x340), INTC_VECT(IRQ5, 0x380),
+};
+
+static struct intc_mask_reg irq_mask_registers[] __initdata = {
+	{ 0xffd00044, 0xffd00064, 32, /* INTMSK0 / INTMSKCLR0 */
+	  { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, } },
+};
+
+static struct intc_prio_reg irq_prio_registers[] __initdata = {
+	{ 0xffd00010, 0, 32, 4, /* INTPRI */ { IRQ0, IRQ1, IRQ2, IRQ3,
+					       IRQ4, IRQ5, } },
+};
+
+static struct intc_sense_reg irq_sense_registers[] __initdata = {
+	{ 0xffd0001c, 32, 2, /* ICR1 */   { IRQ0, IRQ1, IRQ2, IRQ3,
+					    IRQ4, IRQ5, } },
+};
+
+static DECLARE_INTC_DESC(intc_irq_desc, "sh7770-irq", irq_vectors,
+			 NULL, irq_mask_registers, irq_prio_registers,
+			 irq_sense_registers);
+
+/* External interrupt pins in IRL mode */
+static struct intc_vect irl_vectors[] __initdata = {
+	INTC_VECT(IRL_LLLL, 0x200), INTC_VECT(IRL_LLLH, 0x220),
+	INTC_VECT(IRL_LLHL, 0x240), INTC_VECT(IRL_LLHH, 0x260),
+	INTC_VECT(IRL_LHLL, 0x280), INTC_VECT(IRL_LHLH, 0x2a0),
+	INTC_VECT(IRL_LHHL, 0x2c0), INTC_VECT(IRL_LHHH, 0x2e0),
+	INTC_VECT(IRL_HLLL, 0x300), INTC_VECT(IRL_HLLH, 0x320),
+	INTC_VECT(IRL_HLHL, 0x340), INTC_VECT(IRL_HLHH, 0x360),
+	INTC_VECT(IRL_HHLL, 0x380), INTC_VECT(IRL_HHLH, 0x3a0),
+	INTC_VECT(IRL_HHHL, 0x3c0),
+};
+
+static struct intc_mask_reg irl3210_mask_registers[] __initdata = {
+	{ 0xffd40080, 0xffd40084, 32, /* INTMSK2 / INTMSKCLR2 */
+	  { IRL_LLLL, IRL_LLLH, IRL_LLHL, IRL_LLHH,
+	    IRL_LHLL, IRL_LHLH, IRL_LHHL, IRL_LHHH,
+	    IRL_HLLL, IRL_HLLH, IRL_HLHL, IRL_HLHH,
+	    IRL_HHLL, IRL_HHLH, IRL_HHHL, } },
+};
+
+static struct intc_mask_reg irl7654_mask_registers[] __initdata = {
+	{ 0xffd40080, 0xffd40084, 32, /* INTMSK2 / INTMSKCLR2 */
+	  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	    IRL_LLLL, IRL_LLLH, IRL_LLHL, IRL_LLHH,
+	    IRL_LHLL, IRL_LHLH, IRL_LHHL, IRL_LHHH,
+	    IRL_HLLL, IRL_HLLH, IRL_HLHL, IRL_HLHH,
+	    IRL_HHLL, IRL_HHLH, IRL_HHHL, } },
+};
+
+static DECLARE_INTC_DESC(intc_irl7654_desc, "sh7780-irl7654", irl_vectors,
+			 NULL, irl7654_mask_registers, NULL, NULL);
+
+static DECLARE_INTC_DESC(intc_irl3210_desc, "sh7780-irl3210", irl_vectors,
+			 NULL, irl3210_mask_registers, NULL, NULL);
+
+#define INTC_ICR0	0xffd00000
+#define INTC_INTMSK0	0xffd00044
+#define INTC_INTMSK1	0xffd00048
+#define INTC_INTMSK2	0xffd40080
+#define INTC_INTMSKCLR1	0xffd00068
+#define INTC_INTMSKCLR2	0xffd40084
+
 void __init plat_irq_setup(void)
 {
+	/* disable IRQ7-0 */
+	ctrl_outl(0xff000000, INTC_INTMSK0);
+
+	/* disable IRL3-0 + IRL7-4 */
+	ctrl_outl(0xc0000000, INTC_INTMSK1);
+	ctrl_outl(0xfffefffe, INTC_INTMSK2);
+
+	/* select IRL mode for IRL3-0 + IRL7-4 */
+	ctrl_outl(ctrl_inl(INTC_ICR0) & ~0x00c00000, INTC_ICR0);
+
+	/* disable holding function, ie enable "SH-4 Mode" */
+	ctrl_outl(ctrl_inl(INTC_ICR0) | 0x00200000, INTC_ICR0);
+
+	register_intc_controller(&intc_desc);
+}
+
+void __init plat_irq_setup_pins(int mode)
+{
+	switch (mode) {
+	case IRQ_MODE_IRQ:
+		/* select IRQ mode for IRL3-0 + IRL7-4 */
+		ctrl_outl(ctrl_inl(INTC_ICR0) | 0x00c00000, INTC_ICR0);
+		register_intc_controller(&intc_irq_desc);
+		break;
+	case IRQ_MODE_IRL7654:
+		/* enable IRL7-4 but don't provide any masking */
+		ctrl_outl(0x40000000, INTC_INTMSKCLR1);
+		ctrl_outl(0x0000fffe, INTC_INTMSKCLR2);
+		break;
+	case IRQ_MODE_IRL3210:
+		/* enable IRL0-3 but don't provide any masking */
+		ctrl_outl(0x80000000, INTC_INTMSKCLR1);
+		ctrl_outl(0xfffe0000, INTC_INTMSKCLR2);
+		break;
+	case IRQ_MODE_IRL7654_MASK:
+		/* enable IRL7-4 and mask using cpu intc controller */
+		ctrl_outl(0x40000000, INTC_INTMSKCLR1);
+		register_intc_controller(&intc_irl7654_desc);
+		break;
+	case IRQ_MODE_IRL3210_MASK:
+		/* enable IRL0-3 and mask using cpu intc controller */
+		ctrl_outl(0x80000000, INTC_INTMSKCLR1);
+		register_intc_controller(&intc_irl3210_desc);
+		break;
+	default:
+		BUG();
+	}
 }
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
index 6f7227c..715e05b 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
@@ -12,6 +12,189 @@
 #include <linux/serial.h>
 #include <linux/io.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
+
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 28,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 29,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 30,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xffdc0008,
+		.end	= 0xffdc0013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 96,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xffdc0014,
+		.end	= 0xffdc001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 97,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+static struct sh_timer_config tmu5_platform_data = {
+	.name = "TMU5",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu5_resources[] = {
+	[0] = {
+		.name	= "TMU5",
+		.start	= 0xffdc0020,
+		.end	= 0xffdc002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 98,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu5_device = {
+	.name		= "sh_tmu",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &tmu5_platform_data,
+	},
+	.resource	= tmu5_resources,
+	.num_resources	= ARRAY_SIZE(tmu5_resources),
+};
 
 static struct resource rtc_resources[] = {
 	[0] = {
@@ -58,6 +241,12 @@
 };
 
 static struct platform_device *sh7780_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
 	&rtc_device,
 	&sci_device,
 };
@@ -69,6 +258,21 @@
 }
 __initcall(sh7780_devices_setup);
 
+static struct platform_device *sh7780_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7780_early_devices,
+				   ARRAY_SIZE(sh7780_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
index d80802a..af56140 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
@@ -13,39 +13,228 @@
 #include <linux/serial_sci.h>
 #include <linux/io.h>
 #include <linux/mm.h>
+#include <linux/sh_timer.h>
 #include <asm/mmzone.h>
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "tmu012_fck",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 28,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "tmu012_fck",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 29,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "tmu012_fck",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 30,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "tmu345_fck",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xffdc0008,
+		.end	= 0xffdc0013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 96,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "tmu345_fck",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xffdc0014,
+		.end	= 0xffdc001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 97,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+static struct sh_timer_config tmu5_platform_data = {
+	.name = "TMU5",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "tmu345_fck",
+};
+
+static struct resource tmu5_resources[] = {
+	[0] = {
+		.name	= "TMU5",
+		.start	= 0xffdc0020,
+		.end	= 0xffdc002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 98,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu5_device = {
+	.name		= "sh_tmu",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &tmu5_platform_data,
+	},
+	.resource	= tmu5_resources,
+	.num_resources	= ARRAY_SIZE(tmu5_resources),
+};
+
 static struct plat_sci_port sci_platform_data[] = {
 	{
 		.mapbase	= 0xffea0000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 40, 40, 40, 40 },
+		.clk		= "scif_fck",
 	}, {
 		.mapbase	= 0xffeb0000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 44, 44, 44, 44 },
+		.clk		= "scif_fck",
 	}, {
 		.mapbase	= 0xffec0000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 60, 60, 60, 60 },
+		.clk		= "scif_fck",
 	}, {
 		.mapbase	= 0xffed0000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 61, 61, 61, 61 },
+		.clk		= "scif_fck",
 	}, {
 		.mapbase	= 0xffee0000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 62, 62, 62, 62 },
+		.clk		= "scif_fck",
 	}, {
 		.mapbase	= 0xffef0000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 63, 63, 63, 63 },
+		.clk		= "scif_fck",
 	}, {
 		.flags = 0,
 	}
@@ -60,6 +249,12 @@
 };
 
 static struct platform_device *sh7785_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
 	&sci_device,
 };
 
@@ -70,6 +265,21 @@
 }
 __initcall(sh7785_devices_setup);
 
+static struct platform_device *sh7785_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7785_early_devices,
+				   ARRAY_SIZE(sh7785_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
index 90e8cff..93e0d2c 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2009  Renesas Solutions Corp.
  * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ * Paul Mundt <paul.mundt@renesas.com>
  *
  * Based on SH7785 Setup
  *
@@ -19,6 +20,7 @@
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
+#include <linux/sh_timer.h>
 #include <asm/mmzone.h>
 
 static struct plat_sci_port sci_platform_data[] = {
@@ -69,6 +71,368 @@
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xffda0008,
+		.end	= 0xffda0013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 20,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xffda0014,
+		.end	= 0xffda001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 21,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+static struct sh_timer_config tmu5_platform_data = {
+	.name = "TMU5",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu5_resources[] = {
+	[0] = {
+		.name	= "TMU5",
+		.start	= 0xffda0020,
+		.end	= 0xffda002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 22,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu5_device = {
+	.name		= "sh_tmu",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &tmu5_platform_data,
+	},
+	.resource	= tmu5_resources,
+	.num_resources	= ARRAY_SIZE(tmu5_resources),
+};
+
+static struct sh_timer_config tmu6_platform_data = {
+	.name = "TMU6",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu6_resources[] = {
+	[0] = {
+		.name	= "TMU6",
+		.start	= 0xffdc0008,
+		.end	= 0xffdc0013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 45,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu6_device = {
+	.name		= "sh_tmu",
+	.id		= 6,
+	.dev = {
+		.platform_data	= &tmu6_platform_data,
+	},
+	.resource	= tmu6_resources,
+	.num_resources	= ARRAY_SIZE(tmu6_resources),
+};
+
+static struct sh_timer_config tmu7_platform_data = {
+	.name = "TMU7",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu7_resources[] = {
+	[0] = {
+		.name	= "TMU7",
+		.start	= 0xffdc0014,
+		.end	= 0xffdc001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 45,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu7_device = {
+	.name		= "sh_tmu",
+	.id		= 7,
+	.dev = {
+		.platform_data	= &tmu7_platform_data,
+	},
+	.resource	= tmu7_resources,
+	.num_resources	= ARRAY_SIZE(tmu7_resources),
+};
+
+static struct sh_timer_config tmu8_platform_data = {
+	.name = "TMU8",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu8_resources[] = {
+	[0] = {
+		.name	= "TMU8",
+		.start	= 0xffdc0020,
+		.end	= 0xffdc002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 45,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu8_device = {
+	.name		= "sh_tmu",
+	.id		= 8,
+	.dev = {
+		.platform_data	= &tmu8_platform_data,
+	},
+	.resource	= tmu8_resources,
+	.num_resources	= ARRAY_SIZE(tmu8_resources),
+};
+
+static struct sh_timer_config tmu9_platform_data = {
+	.name = "TMU9",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu9_resources[] = {
+	[0] = {
+		.name	= "TMU9",
+		.start	= 0xffde0008,
+		.end	= 0xffde0013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 46,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu9_device = {
+	.name		= "sh_tmu",
+	.id		= 9,
+	.dev = {
+		.platform_data	= &tmu9_platform_data,
+	},
+	.resource	= tmu9_resources,
+	.num_resources	= ARRAY_SIZE(tmu9_resources),
+};
+
+static struct sh_timer_config tmu10_platform_data = {
+	.name = "TMU10",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu10_resources[] = {
+	[0] = {
+		.name	= "TMU10",
+		.start	= 0xffde0014,
+		.end	= 0xffde001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 46,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu10_device = {
+	.name		= "sh_tmu",
+	.id		= 10,
+	.dev = {
+		.platform_data	= &tmu10_platform_data,
+	},
+	.resource	= tmu10_resources,
+	.num_resources	= ARRAY_SIZE(tmu10_resources),
+};
+
+static struct sh_timer_config tmu11_platform_data = {
+	.name = "TMU11",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu11_resources[] = {
+	[0] = {
+		.name	= "TMU11",
+		.start	= 0xffde0020,
+		.end	= 0xffde002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 46,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu11_device = {
+	.name		= "sh_tmu",
+	.id		= 11,
+	.dev = {
+		.platform_data	= &tmu11_platform_data,
+	},
+	.resource	= tmu11_resources,
+	.num_resources	= ARRAY_SIZE(tmu11_resources),
+};
+
 static struct resource usb_ohci_resources[] = {
 	[0] = {
 		.start	= 0xffe70400,
@@ -94,6 +458,21 @@
 	.resource	= usb_ohci_resources,
 };
 
+static struct platform_device *sh7786_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+	&tmu6_device,
+	&tmu7_device,
+	&tmu8_device,
+	&tmu9_device,
+	&tmu10_device,
+	&tmu11_device,
+};
+
 static struct platform_device *sh7786_devices[] __initdata = {
 	&sci_device,
 	&usb_ohci_device,
@@ -156,12 +535,26 @@
 
 static int __init sh7786_devices_setup(void)
 {
+	int ret;
+
 	sh7786_usb_setup();
+
+	ret = platform_add_devices(sh7786_early_devices,
+				   ARRAY_SIZE(sh7786_early_devices));
+	if (unlikely(ret != 0))
+		return ret;
+
 	return platform_add_devices(sh7786_devices,
 				    ARRAY_SIZE(sh7786_devices));
 }
 device_initcall(sh7786_devices_setup);
 
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7786_early_devices,
+				   ARRAY_SIZE(sh7786_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-shx3.c b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
index bd35f3253..53c65fd 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
@@ -1,7 +1,7 @@
 /*
- * SH-X3 Setup
+ * SH-X3 Prototype Setup
  *
- *  Copyright (C) 2007  Paul Mundt
+ *  Copyright (C) 2007 - 2009  Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -12,6 +12,7 @@
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
 #include <linux/io.h>
+#include <linux/sh_timer.h>
 #include <asm/mmzone.h>
 
 static struct plat_sci_port sci_platform_data[] = {
@@ -48,17 +49,221 @@
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffc10008,
+		.end	= 0xffc10013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffc10014,
+		.end	= 0xffc1001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffc10020,
+		.end	= 0xffc1002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xffc20008,
+		.end	= 0xffc20013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 19,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xffc20014,
+		.end	= 0xffc2001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 20,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+static struct sh_timer_config tmu5_platform_data = {
+	.name = "TMU5",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu5_resources[] = {
+	[0] = {
+		.name	= "TMU5",
+		.start	= 0xffc20020,
+		.end	= 0xffc2002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 21,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu5_device = {
+	.name		= "sh_tmu",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &tmu5_platform_data,
+	},
+	.resource	= tmu5_resources,
+	.num_resources	= ARRAY_SIZE(tmu5_resources),
+};
+
+static struct platform_device *shx3_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+};
+
 static struct platform_device *shx3_devices[] __initdata = {
 	&sci_device,
 };
 
 static int __init shx3_devices_setup(void)
 {
+	int ret;
+
+	ret = platform_add_devices(shx3_early_devices,
+				   ARRAY_SIZE(shx3_early_devices));
+	if (unlikely(ret != 0))
+		return ret;
+
 	return platform_add_devices(shx3_devices,
 				    ARRAY_SIZE(shx3_devices));
 }
 __initcall(shx3_devices_setup);
 
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(shx3_early_devices,
+				   ARRAY_SIZE(shx3_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
diff --git a/arch/sh/kernel/cpu/sh5/Makefile b/arch/sh/kernel/cpu/sh5/Makefile
index ce4602e..a184a31 100644
--- a/arch/sh/kernel/cpu/sh5/Makefile
+++ b/arch/sh/kernel/cpu/sh5/Makefile
@@ -6,6 +6,9 @@
 obj-$(CONFIG_SH_FPU)		+= fpu.o
 obj-$(CONFIG_KALLSYMS)		+= unwind.o
 
+# CPU subtype setup
+obj-$(CONFIG_CPU_SH5)		+= setup-sh5.o
+
 # Primary on-chip clocks (common)
 clock-$(CONFIG_CPU_SH5)		:= clock-sh5.o
 
diff --git a/arch/sh/kernel/cpu/sh5/clock-sh5.c b/arch/sh/kernel/cpu/sh5/clock-sh5.c
index 52c4924..7f864eb 100644
--- a/arch/sh/kernel/cpu/sh5/clock-sh5.c
+++ b/arch/sh/kernel/cpu/sh5/clock-sh5.c
@@ -32,30 +32,30 @@
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(cprc_base) >> 12) & 0x0007;
-	clk->rate = clk->parent->rate / ifc_table[idx];
+	return clk->parent->rate / ifc_table[idx];
 }
 
 static struct clk_ops sh5_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(cprc_base) >> 3) & 0x0007;
-	clk->rate = clk->parent->rate / ifc_table[idx];
+	return clk->parent->rate / ifc_table[idx];
 }
 
 static struct clk_ops sh5_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(cprc_base) & 0x0007);
-	clk->rate = clk->parent->rate / ifc_table[idx];
+	return clk->parent->rate / ifc_table[idx];
 }
 
 static struct clk_ops sh5_cpu_clk_ops = {
@@ -71,7 +71,7 @@
 
 void __init arch_init_clk_ops(struct clk_ops **ops, int idx)
 {
-	cprc_base = onchip_remap(CPRC_BASE, 1024, "CPRC");
+	cprc_base = (unsigned long)ioremap_nocache(CPRC_BASE, 1024);
 	BUG_ON(!cprc_base);
 
 	if (idx < ARRAY_SIZE(sh5_clk_ops))
diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S
index 7e49cb8..b0aacf6 100644
--- a/arch/sh/kernel/cpu/sh5/entry.S
+++ b/arch/sh/kernel/cpu/sh5/entry.S
@@ -812,27 +812,6 @@
 	! exceptions
 	add	SP, ZERO, r14
 
-#ifdef CONFIG_POOR_MANS_STRACE
-	/* We've pushed all the registers now, so only r2-r4 hold anything
-	 * useful. Move them into callee save registers */
-	or	r2, ZERO, r28
-	or	r3, ZERO, r29
-	or	r4, ZERO, r30
-
-	/* Preserve r2 as the event code */
-	movi	evt_debug, r3
-	ori	r3, 1, r3
-	ptabs	r3, tr0
-
-	or	SP, ZERO, r6
-	getcon	TRA, r5
-	blink	tr0, LINK
-
-	or	r28, ZERO, r2
-	or	r29, ZERO, r3
-	or	r30, ZERO, r4
-#endif
-
 	/* For syscall and debug race condition, get TRA now */
 	getcon	TRA, r5
 
@@ -887,11 +866,6 @@
  */
 	.global ret_from_irq
 ret_from_irq:
-#ifdef CONFIG_POOR_MANS_STRACE
-	pta	evt_debug_ret_from_irq, tr0
-	ori	SP, 0, r2
-	blink	tr0, LINK
-#endif
 	ld.q	SP, FRAME_S(FSSR), r6
 	shlri	r6, 30, r6
 	andi	r6, 1, r6
@@ -905,12 +879,6 @@
 ret_from_exception:
 	preempt_stop()
 
-#ifdef CONFIG_POOR_MANS_STRACE
-	pta	evt_debug_ret_from_exc, tr0
-	ori	SP, 0, r2
-	blink	tr0, LINK
-#endif
-
 	ld.q	SP, FRAME_S(FSSR), r6
 	shlri	r6, 30, r6
 	andi	r6, 1, r6
@@ -1236,18 +1204,6 @@
 	.global syscall_ret
 syscall_ret:
 	st.q	SP, FRAME_R(9), r2	/* Expecting SP back to BASIC frame */
-
-#ifdef CONFIG_POOR_MANS_STRACE
-	/* nothing useful in registers at this point */
-
-	movi	evt_debug2, r5
-	ori	r5, 1, r5
-	ptabs	r5, tr0
-	ld.q	SP, FRAME_R(9), r2
-	or	SP, ZERO, r3
-	blink	tr0, LINK
-#endif
-
 	ld.q	SP, FRAME_S(FSPC), r2
 	addi	r2, 4, r2		/* Move PC, being pre-execution event */
 	st.q	SP, FRAME_S(FSPC), r2
@@ -1268,25 +1224,12 @@
 	ptabs	r5, tr0
 	blink	tr0, LINK
 
-#ifdef CONFIG_POOR_MANS_STRACE
-	/* nothing useful in registers at this point */
-
-	movi	evt_debug2, r5
-	ori	r5, 1, r5
-	ptabs	r5, tr0
-	ld.q	SP, FRAME_R(9), r2
-	or	SP, ZERO, r3
-	blink	tr0, LINK
-#endif
-
 	ld.q	SP, FRAME_S(FSPC), r2
 	addi	r2, 4, r2		/* Move PC, being pre-execution event */
 	st.q	SP, FRAME_S(FSPC), r2
 	pta	ret_from_syscall, tr0
 	blink	tr0, ZERO
 
-
-
 syscall_allowed:
 	/* Use LINK to deflect the exit point, default is syscall_ret */
 	pta	syscall_ret, tr0
@@ -1410,8 +1353,8 @@
 	   r2(out) : result quadword
 
 	   This is provided as a cheapskate way of manipulating device
-	   registers for debugging (to avoid the need to onchip_remap the debug
-	   module, and to avoid the need to onchip_remap the watchpoint
+	   registers for debugging (to avoid the need to ioremap the debug
+	   module, and to avoid the need to ioremap the watchpoint
 	   controller in a way that identity maps sufficient bits to avoid the
 	   SH5-101 cut2 silicon defect).
 
@@ -1459,8 +1402,8 @@
 	   r3 : quadword value to write.
 
 	   This is provided as a cheapskate way of manipulating device
-	   registers for debugging (to avoid the need to onchip_remap the debug
-	   module, and to avoid the need to onchip_remap the watchpoint
+	   registers for debugging (to avoid the need to ioremap the debug
+	   module, and to avoid the need to ioremap the watchpoint
 	   controller in a way that identity maps sufficient bits to avoid the
 	   SH5-101 cut2 silicon defect).
 
diff --git a/arch/sh/kernel/cpu/sh5/setup-sh5.c b/arch/sh/kernel/cpu/sh5/setup-sh5.c
new file mode 100644
index 0000000..f5ff1ac
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh5/setup-sh5.c
@@ -0,0 +1,195 @@
+/*
+ * SH5-101/SH5-103 CPU Setup
+ *
+ *  Copyright (C) 2009  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/platform_device.h>
+#include <linux/init.h>
+#include <linux/serial.h>
+#include <linux/serial_sci.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/sh_timer.h>
+#include <asm/addrspace.h>
+
+static struct plat_sci_port sci_platform_data[] = {
+	{
+		.mapbase	= PHYS_PERIPHERAL_BLOCK + 0x01030000,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_IOREMAP,
+		.type		= PORT_SCIF,
+		.irqs		= { 39, 40, 42, 0 },
+	}, {
+		.flags = 0,
+	}
+};
+
+static struct platform_device sci_device = {
+	.name		= "sh-sci",
+	.id		= -1,
+	.dev		= {
+		.platform_data	= sci_platform_data,
+	},
+};
+
+static struct resource rtc_resources[] = {
+	[0] = {
+		.start	= PHYS_PERIPHERAL_BLOCK + 0x01040000,
+		.end	= PHYS_PERIPHERAL_BLOCK + 0x01040000 + 0x58 - 1,
+		.flags	= IORESOURCE_IO,
+	},
+	[1] = {
+		/* Period IRQ */
+		.start	= IRQ_PRI,
+		.flags	= IORESOURCE_IRQ,
+	},
+	[2] = {
+		/* Carry IRQ */
+		.start	= IRQ_CUI,
+		.flags	= IORESOURCE_IRQ,
+	},
+	[3] = {
+		/* Alarm IRQ */
+		.start	= IRQ_ATI,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device rtc_device = {
+	.name		= "sh-rtc",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(rtc_resources),
+	.resource	= rtc_resources,
+};
+
+#define	TMU_BLOCK_OFF	0x01020000
+#define TMU_BASE	PHYS_PERIPHERAL_BLOCK + TMU_BLOCK_OFF
+#define TMU0_BASE	(TMU_BASE + 0x8 + (0xc * 0x0))
+#define TMU1_BASE	(TMU_BASE + 0x8 + (0xc * 0x1))
+#define TMU2_BASE	(TMU_BASE + 0x8 + (0xc * 0x2))
+
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "peripheral_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= TMU0_BASE,
+		.end	= TMU0_BASE + 0xc - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= IRQ_TUNI0,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "peripheral_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= TMU1_BASE,
+		.end	= TMU1_BASE + 0xc - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= IRQ_TUNI1,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "peripheral_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= TMU2_BASE,
+		.end	= TMU2_BASE + 0xc - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= IRQ_TUNI2,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+static struct platform_device *sh5_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
+static struct platform_device *sh5_devices[] __initdata = {
+	&sci_device,
+	&rtc_device,
+};
+
+static int __init sh5_devices_setup(void)
+{
+	int ret;
+
+	ret = platform_add_devices(sh5_early_devices,
+				   ARRAY_SIZE(sh5_early_devices));
+	if (unlikely(ret != 0))
+		return ret;
+
+	return platform_add_devices(sh5_devices,
+				    ARRAY_SIZE(sh5_devices));
+}
+__initcall(sh5_devices_setup);
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh5_early_devices,
+				   ARRAY_SIZE(sh5_early_devices));
+}
diff --git a/arch/sh/kernel/io.c b/arch/sh/kernel/io.c
index 29cf458..4f85fff 100644
--- a/arch/sh/kernel/io.c
+++ b/arch/sh/kernel/io.c
@@ -12,6 +12,7 @@
  * for more details.
  */
 #include <linux/module.h>
+#include <linux/pci.h>
 #include <asm/machvec.h>
 #include <asm/io.h>
 
diff --git a/arch/sh/kernel/io_trapped.c b/arch/sh/kernel/io_trapped.c
index c22853b..77dfecb 100644
--- a/arch/sh/kernel/io_trapped.c
+++ b/arch/sh/kernel/io_trapped.c
@@ -267,7 +267,7 @@
 int handle_trapped_io(struct pt_regs *regs, unsigned long address)
 {
 	mm_segment_t oldfs;
-	opcode_t instruction;
+	insn_size_t instruction;
 	int tmp;
 
 	if (!lookup_tiop(address))
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 3f1372e..3d09062 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -31,39 +31,64 @@
 }
 
 #if defined(CONFIG_PROC_FS)
+/*
+ * /proc/interrupts printing:
+ */
+static int show_other_interrupts(struct seq_file *p, int prec)
+{
+	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
+	return 0;
+}
+
 int show_interrupts(struct seq_file *p, void *v)
 {
-	int i = *(loff_t *) v, j;
-	struct irqaction * action;
-	unsigned long flags;
+	unsigned long flags, any_count = 0;
+	int i = *(loff_t *)v, j, prec;
+	struct irqaction *action;
+	struct irq_desc *desc;
+
+	if (i > nr_irqs)
+		return 0;
+
+	for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
+		j *= 10;
+
+	if (i == nr_irqs)
+		return show_other_interrupts(p, prec);
 
 	if (i == 0) {
-		seq_puts(p, "           ");
+		seq_printf(p, "%*s", prec + 8, "");
 		for_each_online_cpu(j)
-			seq_printf(p, "CPU%d       ",j);
+			seq_printf(p, "CPU%-8d", j);
 		seq_putc(p, '\n');
 	}
 
-	if (i < sh_mv.mv_nr_irqs) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
-		action = irq_desc[i].action;
-		if (!action)
-			goto unlock;
-		seq_printf(p, "%3d: ",i);
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-		seq_printf(p, " %14s", irq_desc[i].chip->name);
-		seq_printf(p, "-%-8s", irq_desc[i].name);
+	desc = irq_to_desc(i);
+	if (!desc)
+		return 0;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	for_each_online_cpu(j)
+		any_count |= kstat_irqs_cpu(i, j);
+	action = desc->action;
+	if (!action && !any_count)
+		goto out;
+
+	seq_printf(p, "%*d: ", prec, i);
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+	seq_printf(p, " %14s", desc->chip->name);
+	seq_printf(p, "-%-8s", desc->name);
+
+	if (action) {
 		seq_printf(p, "  %s", action->name);
-
-		for (action=action->next; action; action = action->next)
+		while ((action = action->next) != NULL)
 			seq_printf(p, ", %s", action->name);
-		seq_putc(p, '\n');
-unlock:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	} else if (i == sh_mv.mv_nr_irqs)
-		seq_printf(p, "Err: %10u\n", atomic_read(&irq_err_count));
+	}
 
+	seq_putc(p, '\n');
+out:
+	spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
 }
 #endif
@@ -254,3 +279,11 @@
 
 	irq_ctx_init(smp_processor_id());
 }
+
+#ifdef CONFIG_SPARSE_IRQ
+int __init arch_probe_nr_irqs(void)
+{
+	nr_irqs = sh_mv.mv_nr_irqs;
+	return 0;
+}
+#endif
diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c
index 7c747e7..305aad7 100644
--- a/arch/sh/kernel/kgdb.c
+++ b/arch/sh/kernel/kgdb.c
@@ -47,7 +47,7 @@
 /* Calculate the new address for after a step */
 static short *get_step_address(struct pt_regs *linux_regs)
 {
-	opcode_t op = __raw_readw(linux_regs->pc);
+	insn_size_t op = __raw_readw(linux_regs->pc);
 	long addr;
 
 	/* BT */
@@ -134,7 +134,7 @@
  */
 
 static unsigned long stepped_address;
-static opcode_t stepped_opcode;
+static insn_size_t stepped_opcode;
 
 static void do_single_step(struct pt_regs *linux_regs)
 {
diff --git a/arch/sh/kernel/timers/timer-broadcast.c b/arch/sh/kernel/localtimer.c
similarity index 100%
rename from arch/sh/kernel/timers/timer-broadcast.c
rename to arch/sh/kernel/localtimer.c
diff --git a/arch/sh/kernel/machvec.c b/arch/sh/kernel/machvec.c
index c1ea41e..548f660 100644
--- a/arch/sh/kernel/machvec.c
+++ b/arch/sh/kernel/machvec.c
@@ -129,6 +129,7 @@
 	mv_set(ioport_map);
 	mv_set(ioport_unmap);
 	mv_set(irq_demux);
+	mv_set(mode_pins);
 
 	if (!sh_mv.mv_nr_irqs)
 		sh_mv.mv_nr_irqs = NR_IRQS;
diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c
index c430810..c2efdcd 100644
--- a/arch/sh/kernel/module.c
+++ b/arch/sh/kernel/module.c
@@ -46,8 +46,6 @@
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 /* We don't need anything special. */
@@ -90,7 +88,7 @@
 		 * SHmedia, the LSB of the symbol needs to be asserted
 		 * for the CPU to be in SHmedia mode when it starts executing
 		 * the branch target. */
-		relocation |= (sym->st_other & 4);
+		relocation |= !!(sym->st_other & 4);
 #endif
 
 		switch (ELF32_R_TYPE(rel[i].r_info)) {
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 6d94725..9289ede 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -251,7 +251,8 @@
 
 	if (current_cpu_data.type == CPU_SH7729 ||
 	    current_cpu_data.type == CPU_SH7710 ||
-	    current_cpu_data.type == CPU_SH7712) {
+	    current_cpu_data.type == CPU_SH7712 ||
+	    current_cpu_data.type == CPU_SH7203){
 		ctrl_outw(BBR_INST | BBR_READ | BBR_CPU, UBC_BBRA);
 		ctrl_outl(BRCR_PCBA | BRCR_PCTE, UBC_BRCR);
 	} else {
@@ -407,6 +408,7 @@
 #else
 	ctrl_outw(0, UBC_BBRA);
 	ctrl_outw(0, UBC_BBRB);
+	ctrl_outl(0, UBC_BRCR);
 #endif
 	current->thread.ubc_pc = 0;
 	ubc_usercnt -= 1;
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index f7b22dd..3392e83 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -334,6 +334,14 @@
 					[(addr - (long)&dummy->fpu) >> 2];
 		} else if (addr == (long) &dummy->u_fpvalid)
 			tmp = !!tsk_used_math(child);
+		else if (addr == PT_TEXT_ADDR)
+			tmp = child->mm->start_code;
+		else if (addr == PT_DATA_ADDR)
+			tmp = child->mm->start_data;
+		else if (addr == PT_TEXT_END_ADDR)
+			tmp = child->mm->end_code;
+		else if (addr == PT_TEXT_LEN)
+			tmp = child->mm->end_code - child->mm->start_code;
 		else
 			tmp = 0;
 		ret = put_user(tmp, datap);
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 04a6004..dd38338 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -29,6 +29,7 @@
 #include <linux/mmzone.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/platform_device.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/page.h>
@@ -155,7 +156,7 @@
 			&crash_size, &crash_base);
 	if (ret == 0 && crash_size) {
 		if (crash_base <= 0) {
-			vp = alloc_bootmem_nopanic(crash_size); 
+			vp = alloc_bootmem_nopanic(crash_size);
 			if (!vp) {
 				printk(KERN_INFO "crashkernel allocation "
 				       "failed\n");
@@ -184,7 +185,6 @@
 {}
 #endif
 
-#ifndef CONFIG_GENERIC_CALIBRATE_DELAY
 void __cpuinit calibrate_delay(void)
 {
 	struct clk *clk = clk_get(NULL, "cpu_clk");
@@ -200,7 +200,6 @@
 			 (loops_per_jiffy/(5000/HZ)) % 100,
 			 loops_per_jiffy);
 }
-#endif
 
 void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
 						unsigned long end_pfn)
@@ -328,6 +327,10 @@
 early_param("elfcorehdr", parse_elfcorehdr);
 #endif
 
+void __init __attribute__ ((weak)) plat_early_device_setup(void)
+{
+}
+
 void __init setup_arch(char **cmdline_p)
 {
 	enable_mmu();
@@ -381,6 +384,8 @@
 
 	parse_early_param();
 
+	plat_early_device_setup();
+
 	sh_mv_setup();
 
 	/*
@@ -415,6 +420,18 @@
 #endif
 }
 
+/* processor boot mode configuration */
+int generic_mode_pins(void)
+{
+	pr_warning("generic_mode_pins(): missing mode pin configuration\n");
+	return 0;
+}
+
+int test_mode_pin(int pin)
+{
+	return sh_mv.mv_mode_pins() & pin;
+}
+
 static const char *cpu_name[] = {
 	[CPU_SH7201]	= "SH7201",
 	[CPU_SH7203]	= "SH7203",	[CPU_SH7263]	= "SH7263",
@@ -435,7 +452,8 @@
 	[CPU_SH7722]	= "SH7722",	[CPU_SHX3]	= "SH-X3",
 	[CPU_SH5_101]	= "SH5-101",	[CPU_SH5_103]	= "SH5-103",
 	[CPU_MXG]	= "MX-G",	[CPU_SH7723]	= "SH7723",
-	[CPU_SH7366]	= "SH7366",	[CPU_SH_NONE]	= "Unknown"
+	[CPU_SH7366]	= "SH7366",	[CPU_SH7724]	= "SH7724",
+	[CPU_SH_NONE]	= "Unknown"
 };
 
 const char *get_cpu_subtype(struct sh_cpuinfo *c)
diff --git a/arch/sh/kernel/sh_ksyms_32.c b/arch/sh/kernel/sh_ksyms_32.c
index 528de29..fcc5de3 100644
--- a/arch/sh/kernel/sh_ksyms_32.c
+++ b/arch/sh/kernel/sh_ksyms_32.c
@@ -19,14 +19,10 @@
 #include <asm/ftrace.h>
 
 extern int dump_fpu(struct pt_regs *, elf_fpregset_t *);
-extern struct hw_interrupt_type no_irq_type;
 
 /* platform dependent support */
 EXPORT_SYMBOL(dump_fpu);
 EXPORT_SYMBOL(kernel_thread);
-EXPORT_SYMBOL(irq_desc);
-EXPORT_SYMBOL(no_irq_type);
-
 EXPORT_SYMBOL(strlen);
 
 /* PCI exports */
@@ -41,11 +37,6 @@
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(__copy_user);
-
-#ifdef CONFIG_MMU
-EXPORT_SYMBOL(get_vm_area);
-#endif
-
 EXPORT_SYMBOL(__udelay);
 EXPORT_SYMBOL(__ndelay);
 EXPORT_SYMBOL(__const_udelay);
diff --git a/arch/sh/kernel/sh_ksyms_64.c b/arch/sh/kernel/sh_ksyms_64.c
index 0d74d6b..8f54ef0 100644
--- a/arch/sh/kernel/sh_ksyms_64.c
+++ b/arch/sh/kernel/sh_ksyms_64.c
@@ -76,5 +76,7 @@
 #define DECLARE_EXPORT(name) extern void name(void);EXPORT_SYMBOL(name)
 
 DECLARE_EXPORT(__sdivsi3);
+DECLARE_EXPORT(__sdivsi3_1);
+DECLARE_EXPORT(__sdivsi3_2);
 DECLARE_EXPORT(__udivsi3);
 DECLARE_EXPORT(__div_table);
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index 05202ed..a9fff9f 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -350,4 +350,5 @@
 	.long sys_pipe2
 	.long sys_inotify_init1
 	.long sys_preadv
-	.long sys_writev
+	.long sys_pwritev
+	.long sys_rt_tgsigqueueinfo	/* 335 */
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index a083609..75c1889 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -389,3 +389,4 @@
 	.long sys_inotify_init1		/* 360 */
 	.long sys_preadv
 	.long sys_pwritev
+	.long sys_rt_tgsigqueueinfo
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
new file mode 100644
index 0000000..2edde32
--- /dev/null
+++ b/arch/sh/kernel/time.c
@@ -0,0 +1,125 @@
+/*
+ *  arch/sh/kernel/time.c
+ *
+ *  Copyright (C) 1999  Tetsuya Okada & Niibe Yutaka
+ *  Copyright (C) 2000  Philipp Rumpf <prumpf@tux.org>
+ *  Copyright (C) 2002 - 2009  Paul Mundt
+ *  Copyright (C) 2002  M. R. Brown  <mrbrown@linux-sh.org>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/profile.h>
+#include <linux/timex.h>
+#include <linux/sched.h>
+#include <linux/clockchips.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+#include <linux/rtc.h>
+#include <asm/clock.h>
+#include <asm/rtc.h>
+
+/* Dummy RTC ops */
+static void null_rtc_get_time(struct timespec *tv)
+{
+	tv->tv_sec = mktime(2000, 1, 1, 0, 0, 0);
+	tv->tv_nsec = 0;
+}
+
+static int null_rtc_set_time(const time_t secs)
+{
+	return 0;
+}
+
+void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time;
+int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time;
+
+#ifdef CONFIG_GENERIC_CMOS_UPDATE
+unsigned long read_persistent_clock(void)
+{
+	struct timespec tv;
+	rtc_sh_get_time(&tv);
+	return tv.tv_sec;
+}
+
+int update_persistent_clock(struct timespec now)
+{
+	return rtc_sh_set_time(now.tv_sec);
+}
+#endif
+
+unsigned int get_rtc_time(struct rtc_time *tm)
+{
+	if (rtc_sh_get_time != null_rtc_get_time) {
+		struct timespec tv;
+
+		rtc_sh_get_time(&tv);
+		rtc_time_to_tm(tv.tv_sec, tm);
+	}
+
+	return RTC_24H;
+}
+EXPORT_SYMBOL(get_rtc_time);
+
+int set_rtc_time(struct rtc_time *tm)
+{
+	unsigned long secs;
+
+	rtc_tm_to_time(tm, &secs);
+	return rtc_sh_set_time(secs);
+}
+EXPORT_SYMBOL(set_rtc_time);
+
+static int __init rtc_generic_init(void)
+{
+	struct platform_device *pdev;
+
+	if (rtc_sh_get_time == null_rtc_get_time)
+		return -ENODEV;
+
+	pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	return 0;
+}
+module_init(rtc_generic_init);
+
+void (*board_time_init)(void);
+
+unsigned long long sched_clock(void)
+{
+	return (jiffies_64 - INITIAL_JIFFIES) * (NSEC_PER_SEC / HZ);
+}
+
+static void __init sh_late_time_init(void)
+{
+	/*
+	 * Make sure all compiled-in early timers register themselves.
+	 * Run probe() for one "earlytimer" device.
+	 */
+	early_platform_driver_register_all("earlytimer");
+	early_platform_driver_probe("earlytimer", 1, 0);
+}
+
+void __init time_init(void)
+{
+	if (board_time_init)
+		board_time_init();
+
+	clk_init();
+
+	rtc_sh_get_time(&xtime);
+	set_normalized_timespec(&wall_to_monotonic,
+				-xtime.tv_sec, -xtime.tv_nsec);
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+	local_timer_setup(smp_processor_id());
+#endif
+
+	late_time_init = sh_late_time_init;
+}
diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c
deleted file mode 100644
index 1700d24..0000000
--- a/arch/sh/kernel/time_32.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- *  arch/sh/kernel/time_32.c
- *
- *  Copyright (C) 1999  Tetsuya Okada & Niibe Yutaka
- *  Copyright (C) 2000  Philipp Rumpf <prumpf@tux.org>
- *  Copyright (C) 2002 - 2008  Paul Mundt
- *  Copyright (C) 2002  M. R. Brown  <mrbrown@linux-sh.org>
- *
- *  Some code taken from i386 version.
- *    Copyright (C) 1991, 1992, 1995  Linus Torvalds
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/profile.h>
-#include <linux/timex.h>
-#include <linux/sched.h>
-#include <linux/clockchips.h>
-#include <linux/mc146818rtc.h>	/* for rtc_lock */
-#include <linux/smp.h>
-#include <asm/clock.h>
-#include <asm/rtc.h>
-#include <asm/timer.h>
-#include <asm/kgdb.h>
-
-struct sys_timer *sys_timer;
-
-/* Move this somewhere more sensible.. */
-DEFINE_SPINLOCK(rtc_lock);
-EXPORT_SYMBOL(rtc_lock);
-
-/* Dummy RTC ops */
-static void null_rtc_get_time(struct timespec *tv)
-{
-	tv->tv_sec = mktime(2000, 1, 1, 0, 0, 0);
-	tv->tv_nsec = 0;
-}
-
-static int null_rtc_set_time(const time_t secs)
-{
-	return 0;
-}
-
-void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time;
-int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time;
-
-#ifndef CONFIG_GENERIC_TIME
-void do_gettimeofday(struct timeval *tv)
-{
-	unsigned long flags;
-	unsigned long seq;
-	unsigned long usec, sec;
-
-	do {
-		/*
-		 * Turn off IRQs when grabbing xtime_lock, so that
-		 * the sys_timer get_offset code doesn't have to handle it.
-		 */
-		seq = read_seqbegin_irqsave(&xtime_lock, flags);
-		usec = get_timer_offset();
-		sec = xtime.tv_sec;
-		usec += xtime.tv_nsec / NSEC_PER_USEC;
-	} while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
-
-	while (usec >= 1000000) {
-		usec -= 1000000;
-		sec++;
-	}
-
-	tv->tv_sec = sec;
-	tv->tv_usec = usec;
-}
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
-	time_t wtm_sec, sec = tv->tv_sec;
-	long wtm_nsec, nsec = tv->tv_nsec;
-
-	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
-		return -EINVAL;
-
-	write_seqlock_irq(&xtime_lock);
-	/*
-	 * This is revolting. We need to set "xtime" correctly. However, the
-	 * value in this location is the value at the most recent update of
-	 * wall time.  Discover what correction gettimeofday() would have
-	 * made, and then undo it!
-	 */
-	nsec -= get_timer_offset() * NSEC_PER_USEC;
-
-	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
-	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
-	set_normalized_timespec(&xtime, sec, nsec);
-	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
-	ntp_clear();
-	write_sequnlock_irq(&xtime_lock);
-	clock_was_set();
-
-	return 0;
-}
-EXPORT_SYMBOL(do_settimeofday);
-#endif /* !CONFIG_GENERIC_TIME */
-
-/* last time the RTC clock got updated */
-static long last_rtc_update;
-
-/*
- * handle_timer_tick() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
- */
-void handle_timer_tick(void)
-{
-	if (current->pid)
-		profile_tick(CPU_PROFILING);
-
-	/*
-	 * Here we are in the timer irq handler. We just have irqs locally
-	 * disabled but we don't know if the timer_bh is running on the other
-	 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
-	 * the irq version of write_lock because as just said we have irq
-	 * locally disabled. -arca
-	 */
-	write_seqlock(&xtime_lock);
-	do_timer(1);
-
-	/*
-	 * If we have an externally synchronized Linux clock, then update
-	 * RTC clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
-	 * called as close as possible to 500 ms before the new second starts.
-	 */
-	if (ntp_synced() &&
-	    xtime.tv_sec > last_rtc_update + 660 &&
-	    (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 &&
-	    (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) {
-		if (rtc_sh_set_time(xtime.tv_sec) == 0)
-			last_rtc_update = xtime.tv_sec;
-		else
-			/* do it again in 60s */
-			last_rtc_update = xtime.tv_sec - 600;
-	}
-	write_sequnlock(&xtime_lock);
-
-#ifndef CONFIG_SMP
-	update_process_times(user_mode(get_irq_regs()));
-#endif
-}
-
-#ifdef CONFIG_PM
-int timer_suspend(struct sys_device *dev, pm_message_t state)
-{
-	struct sys_timer *sys_timer = container_of(dev, struct sys_timer, dev);
-
-	sys_timer->ops->stop();
-
-	return 0;
-}
-
-int timer_resume(struct sys_device *dev)
-{
-	struct sys_timer *sys_timer = container_of(dev, struct sys_timer, dev);
-
-	sys_timer->ops->start();
-
-	return 0;
-}
-#else
-#define timer_suspend NULL
-#define timer_resume NULL
-#endif
-
-static struct sysdev_class timer_sysclass = {
-	.name	 = "timer",
-	.suspend = timer_suspend,
-	.resume	 = timer_resume,
-};
-
-static int __init timer_init_sysfs(void)
-{
-	int ret;
-
-	if (!sys_timer)
-		return 0;
-
-	ret = sysdev_class_register(&timer_sysclass);
-	if (ret != 0)
-		return ret;
-
-	sys_timer->dev.cls = &timer_sysclass;
-	return sysdev_register(&sys_timer->dev);
-}
-device_initcall(timer_init_sysfs);
-
-void (*board_time_init)(void);
-
-struct clocksource clocksource_sh = {
-	.name		= "SuperH",
-};
-
-#ifdef CONFIG_GENERIC_TIME
-unsigned long long sched_clock(void)
-{
-	unsigned long long cycles;
-
-	/* jiffies based sched_clock if no clocksource is installed */
-	if (!clocksource_sh.rating)
-		return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
-
-	cycles = clocksource_sh.read(&clocksource_sh);
-	return cyc2ns(&clocksource_sh, cycles);
-}
-#endif
-
-void __init time_init(void)
-{
-	if (board_time_init)
-		board_time_init();
-
-	clk_init();
-
-	rtc_sh_get_time(&xtime);
-	set_normalized_timespec(&wall_to_monotonic,
-				-xtime.tv_sec, -xtime.tv_nsec);
-
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-	local_timer_setup(smp_processor_id());
-#endif
-
-	/*
-	 * Find the timer to use as the system timer, it will be
-	 * initialized for us.
-	 */
-	sys_timer = get_sys_timer();
-	if (unlikely(!sys_timer))
-		panic("System timer missing.\n");
-
-	printk(KERN_INFO "Using %s for system timer\n", sys_timer->name);
-}
diff --git a/arch/sh/kernel/time_64.c b/arch/sh/kernel/time_64.c
deleted file mode 100644
index 988c77c..0000000
--- a/arch/sh/kernel/time_64.c
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * arch/sh/kernel/time_64.c
- *
- * Copyright (C) 2000, 2001  Paolo Alberelli
- * Copyright (C) 2003 - 2007  Paul Mundt
- * Copyright (C) 2003  Richard Curnow
- *
- *    Original TMU/RTC code taken from sh version.
- *    Copyright (C) 1999  Tetsuya Okada & Niibe Yutaka
- *      Some code taken from i386 version.
- *      Copyright (C) 1991, 1992, 1995  Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/errno.h>
-#include <linux/rwsem.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/time.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/profile.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-#include <linux/bcd.h>
-#include <linux/timex.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
-#include <cpu/registers.h>	 /* required by inline __asm__ stmt. */
-#include <cpu/irq.h>
-#include <asm/addrspace.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-#include <asm/delay.h>
-#include <asm/clock.h>
-
-#define TMU_TOCR_INIT	0x00
-#define TMU0_TCR_INIT	0x0020
-#define TMU_TSTR_INIT	1
-#define TMU_TSTR_OFF	0
-
-/* Real Time Clock */
-#define	RTC_BLOCK_OFF	0x01040000
-#define RTC_BASE	PHYS_PERIPHERAL_BLOCK + RTC_BLOCK_OFF
-#define RTC_RCR1_CIE	0x10	/* Carry Interrupt Enable */
-#define RTC_RCR1	(rtc_base + 0x38)
-
-/* Time Management Unit */
-#define	TMU_BLOCK_OFF	0x01020000
-#define TMU_BASE	PHYS_PERIPHERAL_BLOCK + TMU_BLOCK_OFF
-#define TMU0_BASE	tmu_base + 0x8 + (0xc * 0x0)
-#define TMU1_BASE	tmu_base + 0x8 + (0xc * 0x1)
-#define TMU2_BASE	tmu_base + 0x8 + (0xc * 0x2)
-
-#define TMU_TOCR	tmu_base+0x0	/* Byte access */
-#define TMU_TSTR	tmu_base+0x4	/* Byte access */
-
-#define TMU0_TCOR	TMU0_BASE+0x0	/* Long access */
-#define TMU0_TCNT	TMU0_BASE+0x4	/* Long access */
-#define TMU0_TCR	TMU0_BASE+0x8	/* Word access */
-
-#define TICK_SIZE (tick_nsec / 1000)
-
-static unsigned long tmu_base, rtc_base;
-unsigned long cprc_base;
-
-/* Variables to allow interpolation of time of day to resolution better than a
- * jiffy. */
-
-/* This is effectively protected by xtime_lock */
-static unsigned long ctc_last_interrupt;
-static unsigned long long usecs_per_jiffy = 1000000/HZ; /* Approximation */
-
-#define CTC_JIFFY_SCALE_SHIFT 40
-
-/* 2**CTC_JIFFY_SCALE_SHIFT / ctc_ticks_per_jiffy */
-static unsigned long long scaled_recip_ctc_ticks_per_jiffy;
-
-/* Estimate number of microseconds that have elapsed since the last timer tick,
-   by scaling the delta that has occurred in the CTC register.
-
-   WARNING WARNING WARNING : This algorithm relies on the CTC decrementing at
-   the CPU clock rate.  If the CPU sleeps, the CTC stops counting.  Bear this
-   in mind if enabling SLEEP_WORKS in process.c.  In that case, this algorithm
-   probably needs to use TMU.TCNT0 instead.  This will work even if the CPU is
-   sleeping, though will be coarser.
-
-   FIXME : What if usecs_per_tick is moving around too much, e.g. if an adjtime
-   is running or if the freq or tick arguments of adjtimex are modified after
-   we have calibrated the scaling factor?  This will result in either a jump at
-   the end of a tick period, or a wrap backwards at the start of the next one,
-   if the application is reading the time of day often enough.  I think we
-   ought to do better than this.  For this reason, usecs_per_jiffy is left
-   separated out in the calculation below.  This allows some future hook into
-   the adjtime-related stuff in kernel/timer.c to remove this hazard.
-
-*/
-
-static unsigned long usecs_since_tick(void)
-{
-	unsigned long long current_ctc;
-	long ctc_ticks_since_interrupt;
-	unsigned long long ull_ctc_ticks_since_interrupt;
-	unsigned long result;
-
-	unsigned long long mul1_out;
-	unsigned long long mul1_out_high;
-	unsigned long long mul2_out_low, mul2_out_high;
-
-	/* Read CTC register */
-	asm ("getcon cr62, %0" : "=r" (current_ctc));
-	/* Note, the CTC counts down on each CPU clock, not up.
-	   Note(2), use long type to get correct wraparound arithmetic when
-	   the counter crosses zero. */
-	ctc_ticks_since_interrupt = (long) ctc_last_interrupt - (long) current_ctc;
-	ull_ctc_ticks_since_interrupt = (unsigned long long) ctc_ticks_since_interrupt;
-
-	/* Inline assembly to do 32x32x32->64 multiplier */
-	asm volatile ("mulu.l %1, %2, %0" :
-	     "=r" (mul1_out) :
-	     "r" (ull_ctc_ticks_since_interrupt), "r" (usecs_per_jiffy));
-
-	mul1_out_high = mul1_out >> 32;
-
-	asm volatile ("mulu.l %1, %2, %0" :
-	     "=r" (mul2_out_low) :
-	     "r" (mul1_out), "r" (scaled_recip_ctc_ticks_per_jiffy));
-
-#if 1
-	asm volatile ("mulu.l %1, %2, %0" :
-	     "=r" (mul2_out_high) :
-	     "r" (mul1_out_high), "r" (scaled_recip_ctc_ticks_per_jiffy));
-#endif
-
-	result = (unsigned long) (((mul2_out_high << 32) + mul2_out_low) >> CTC_JIFFY_SCALE_SHIFT);
-
-	return result;
-}
-
-void do_gettimeofday(struct timeval *tv)
-{
-	unsigned long flags;
-	unsigned long seq;
-	unsigned long usec, sec;
-
-	do {
-		seq = read_seqbegin_irqsave(&xtime_lock, flags);
-		usec = usecs_since_tick();
-		sec = xtime.tv_sec;
-		usec += xtime.tv_nsec / 1000;
-	} while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
-
-	while (usec >= 1000000) {
-		usec -= 1000000;
-		sec++;
-	}
-
-	tv->tv_sec = sec;
-	tv->tv_usec = usec;
-}
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
-	time_t wtm_sec, sec = tv->tv_sec;
-	long wtm_nsec, nsec = tv->tv_nsec;
-
-	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
-		return -EINVAL;
-
-	write_seqlock_irq(&xtime_lock);
-	/*
-	 * This is revolting. We need to set "xtime" correctly. However, the
-	 * value in this location is the value at the most recent update of
-	 * wall time.  Discover what correction gettimeofday() would have
-	 * made, and then undo it!
-	 */
-	nsec -= 1000 * usecs_since_tick();
-
-	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
-	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
-	set_normalized_timespec(&xtime, sec, nsec);
-	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
-	ntp_clear();
-	write_sequnlock_irq(&xtime_lock);
-	clock_was_set();
-
-	return 0;
-}
-EXPORT_SYMBOL(do_settimeofday);
-
-/* Dummy RTC ops */
-static void null_rtc_get_time(struct timespec *tv)
-{
-	tv->tv_sec = mktime(2000, 1, 1, 0, 0, 0);
-	tv->tv_nsec = 0;
-}
-
-static int null_rtc_set_time(const time_t secs)
-{
-	return 0;
-}
-
-void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time;
-int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time;
-
-/* last time the RTC clock got updated */
-static long last_rtc_update;
-
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
- */
-static inline void do_timer_interrupt(void)
-{
-	unsigned long long current_ctc;
-
-	if (current->pid)
-		profile_tick(CPU_PROFILING);
-
-	/*
-	 * Here we are in the timer irq handler. We just have irqs locally
-	 * disabled but we don't know if the timer_bh is running on the other
-	 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
-	 * the irq version of write_lock because as just said we have irq
-	 * locally disabled. -arca
-	 */
-	write_seqlock(&xtime_lock);
-	asm ("getcon cr62, %0" : "=r" (current_ctc));
-	ctc_last_interrupt = (unsigned long) current_ctc;
-
-	do_timer(1);
-
-	/*
-	 * If we have an externally synchronized Linux clock, then update
-	 * RTC clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
-	 * called as close as possible to 500 ms before the new second starts.
-	 */
-	if (ntp_synced() &&
-	    xtime.tv_sec > last_rtc_update + 660 &&
-	    (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 &&
-	    (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) {
-		if (rtc_sh_set_time(xtime.tv_sec) == 0)
-			last_rtc_update = xtime.tv_sec;
-		else
-			/* do it again in 60 s */
-			last_rtc_update = xtime.tv_sec - 600;
-	}
-	write_sequnlock(&xtime_lock);
-
-#ifndef CONFIG_SMP
-	update_process_times(user_mode(get_irq_regs()));
-#endif
-}
-
-/*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
- */
-static irqreturn_t timer_interrupt(int irq, void *dev_id)
-{
-	unsigned long timer_status;
-
-	/* Clear UNF bit */
-	timer_status = ctrl_inw(TMU0_TCR);
-	timer_status &= ~0x100;
-	ctrl_outw(timer_status, TMU0_TCR);
-
-	do_timer_interrupt();
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction irq0  = {
-	.handler = timer_interrupt,
-	.flags = IRQF_DISABLED,
-	.name = "timer",
-};
-
-void __init time_init(void)
-{
-	unsigned long interval;
-	struct clk *clk;
-
-	tmu_base = onchip_remap(TMU_BASE, 1024, "TMU");
-	if (!tmu_base) {
-		panic("Unable to remap TMU\n");
-	}
-
-	rtc_base = onchip_remap(RTC_BASE, 1024, "RTC");
-	if (!rtc_base) {
-		panic("Unable to remap RTC\n");
-	}
-
-	clk = clk_get(NULL, "cpu_clk");
-	scaled_recip_ctc_ticks_per_jiffy = ((1ULL << CTC_JIFFY_SCALE_SHIFT) /
-			(unsigned long long)(clk_get_rate(clk) / HZ));
-
-	rtc_sh_get_time(&xtime);
-
-	setup_irq(TIMER_IRQ, &irq0);
-
-	clk = clk_get(NULL, "module_clk");
-	interval = (clk_get_rate(clk)/(HZ*4));
-
-	printk("Interval = %ld\n", interval);
-
-	/* Start TMU0 */
-	ctrl_outb(TMU_TSTR_OFF, TMU_TSTR);
-	ctrl_outb(TMU_TOCR_INIT, TMU_TOCR);
-	ctrl_outw(TMU0_TCR_INIT, TMU0_TCR);
-	ctrl_outl(interval, TMU0_TCOR);
-	ctrl_outl(interval, TMU0_TCNT);
-	ctrl_outb(TMU_TSTR_INIT, TMU_TSTR);
-}
-
-static struct resource rtc_resources[] = {
-	[0] = {
-		/* RTC base, filled in by rtc_init */
-		.flags	= IORESOURCE_IO,
-	},
-	[1] = {
-		/* Period IRQ */
-		.start	= IRQ_PRI,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		/* Carry IRQ */
-		.start	= IRQ_CUI,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[3] = {
-		/* Alarm IRQ */
-		.start	= IRQ_ATI,
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device rtc_device = {
-	.name		= "sh-rtc",
-	.id		= -1,
-	.num_resources	= ARRAY_SIZE(rtc_resources),
-	.resource	= rtc_resources,
-};
-
-static int __init rtc_init(void)
-{
-	rtc_resources[0].start	= rtc_base;
-	rtc_resources[0].end	= rtc_resources[0].start + 0x58 - 1;
-
-	return platform_device_register(&rtc_device);
-}
-device_initcall(rtc_init);
diff --git a/arch/sh/kernel/timers/Makefile b/arch/sh/kernel/timers/Makefile
deleted file mode 100644
index 0b7f857..0000000
--- a/arch/sh/kernel/timers/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-#
-# Makefile for the various Linux/SuperH timers
-#
-
-obj-y	:= timer.o
-
-obj-$(CONFIG_SH_TMU)		+= timer-tmu.o
-obj-$(CONFIG_SH_MTU2)		+= timer-mtu2.o
-obj-$(CONFIG_SH_CMT)		+= timer-cmt.o
-
-obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)	+= timer-broadcast.o
diff --git a/arch/sh/kernel/timers/timer-cmt.c b/arch/sh/kernel/timers/timer-cmt.c
deleted file mode 100644
index 9aa3486..0000000
--- a/arch/sh/kernel/timers/timer-cmt.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * arch/sh/kernel/timers/timer-cmt.c - CMT Timer Support
- *
- *  Copyright (C) 2005  Yoshinori Sato
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/seqlock.h>
-#include <asm/timer.h>
-#include <asm/rtc.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/clock.h>
-
-#if defined(CONFIG_CPU_SUBTYPE_SH7619)
-#define CMT_CMSTR	0xf84a0070
-#define CMT_CMCSR_0	0xf84a0072
-#define CMT_CMCNT_0	0xf84a0074
-#define CMT_CMCOR_0	0xf84a0076
-#define CMT_CMCSR_1	0xf84a0078
-#define CMT_CMCNT_1	0xf84a007a
-#define CMT_CMCOR_1	0xf84a007c
-
-#define STBCR3		0xf80a0000
-#define cmt_clock_enable() do {	ctrl_outb(ctrl_inb(STBCR3) & ~0x10, STBCR3); } while(0)
-#define CMT_CMCSR_INIT	0x0040
-#define CMT_CMCSR_CALIB	0x0000
-#elif defined(CONFIG_CPU_SUBTYPE_SH7203) || \
-      defined(CONFIG_CPU_SUBTYPE_SH7206) || \
-      defined(CONFIG_CPU_SUBTYPE_SH7263)
-#define CMT_CMSTR	0xfffec000
-#define CMT_CMCSR_0	0xfffec002
-#define CMT_CMCNT_0	0xfffec004
-#define CMT_CMCOR_0	0xfffec006
-
-#define STBCR4		0xfffe040c
-#define cmt_clock_enable() do {	ctrl_outb(ctrl_inb(STBCR4) & ~0x04, STBCR4); } while(0)
-#define CMT_CMCSR_INIT	0x0040
-#define CMT_CMCSR_CALIB	0x0000
-#else
-#error "Unknown CPU SUBTYPE"
-#endif
-
-static unsigned long cmt_timer_get_offset(void)
-{
-	int count;
-	static unsigned short count_p = 0xffff;    /* for the first call after boot */
-	static unsigned long jiffies_p = 0;
-
-	/*
-	 * cache volatile jiffies temporarily; we have IRQs turned off.
-	 */
-	unsigned long jiffies_t;
-
-	/* timer count may underflow right here */
-	count =  ctrl_inw(CMT_CMCOR_0);
-	count -= ctrl_inw(CMT_CMCNT_0);
-
-	jiffies_t = jiffies;
-
-	/*
-	 * avoiding timer inconsistencies (they are rare, but they happen)...
-	 * there is one kind of problem that must be avoided here:
-	 *  1. the timer counter underflows
-	 */
-
-	if (jiffies_t == jiffies_p) {
-		if (count > count_p) {
-			/* the nutcase */
-			if (ctrl_inw(CMT_CMCSR_0) & 0x80) { /* Check CMF bit */
-				count -= LATCH;
-			} else {
-				printk("%s (): hardware timer problem?\n",
-				       __func__);
-			}
-		}
-	} else
-		jiffies_p = jiffies_t;
-
-	count_p = count;
-
-	count = ((LATCH-1) - count) * TICK_SIZE;
-	count = (count + LATCH/2) / LATCH;
-
-	return count;
-}
-
-static irqreturn_t cmt_timer_interrupt(int irq, void *dev_id)
-{
-	unsigned long timer_status;
-
-	/* Clear CMF bit */
-	timer_status = ctrl_inw(CMT_CMCSR_0);
-	timer_status &= ~0x80;
-	ctrl_outw(timer_status, CMT_CMCSR_0);
-
-	handle_timer_tick();
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction cmt_irq = {
-	.name		= "timer",
-	.handler	= cmt_timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
-};
-
-static void cmt_clk_init(struct clk *clk)
-{
-	u8 divisor = CMT_CMCSR_INIT & 0x3;
-	ctrl_inw(CMT_CMCSR_0);
-	ctrl_outw(CMT_CMCSR_INIT, CMT_CMCSR_0);
-	clk->parent = clk_get(NULL, "module_clk");
-	clk->rate = clk->parent->rate / (8 << (divisor << 1));
-}
-
-static void cmt_clk_recalc(struct clk *clk)
-{
-	u8 divisor = ctrl_inw(CMT_CMCSR_0) & 0x3;
-	clk->rate = clk->parent->rate / (8 << (divisor << 1));
-}
-
-static struct clk_ops cmt_clk_ops = {
-	.init		= cmt_clk_init,
-	.recalc		= cmt_clk_recalc,
-};
-
-static struct clk cmt0_clk = {
-	.name		= "cmt0_clk",
-	.ops		= &cmt_clk_ops,
-};
-
-static int cmt_timer_start(void)
-{
-	ctrl_outw(ctrl_inw(CMT_CMSTR) | 0x01, CMT_CMSTR);
-	return 0;
-}
-
-static int cmt_timer_stop(void)
-{
-	ctrl_outw(ctrl_inw(CMT_CMSTR) & ~0x01, CMT_CMSTR);
-	return 0;
-}
-
-static int cmt_timer_init(void)
-{
-	unsigned long interval;
-
-	cmt_clock_enable();
-
-	setup_irq(CONFIG_SH_TIMER_IRQ, &cmt_irq);
-
-	cmt0_clk.parent = clk_get(NULL, "module_clk");
-
-	cmt_timer_stop();
-
-	interval = cmt0_clk.parent->rate / 8 / HZ;
-	printk(KERN_INFO "Interval = %ld\n", interval);
-
-	ctrl_outw(interval, CMT_CMCOR_0);
-
-	clk_register(&cmt0_clk);
-	clk_enable(&cmt0_clk);
-
-	cmt_timer_start();
-
-	return 0;
-}
-
-static struct sys_timer_ops cmt_timer_ops = {
-	.init		= cmt_timer_init,
-	.start		= cmt_timer_start,
-	.stop		= cmt_timer_stop,
-#ifndef CONFIG_GENERIC_TIME
-	.get_offset	= cmt_timer_get_offset,
-#endif
-};
-
-struct sys_timer cmt_timer = {
-	.name	= "cmt",
-	.ops	= &cmt_timer_ops,
-};
diff --git a/arch/sh/kernel/timers/timer-mtu2.c b/arch/sh/kernel/timers/timer-mtu2.c
deleted file mode 100644
index 9b0ef01..0000000
--- a/arch/sh/kernel/timers/timer-mtu2.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * arch/sh/kernel/timers/timer-mtu2.c - MTU2 Timer Support
- *
- *  Copyright (C) 2005  Paul Mundt
- *
- * Based off of arch/sh/kernel/timers/timer-tmu.c
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/seqlock.h>
-#include <asm/timer.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/clock.h>
-
-/*
- * We use channel 1 for our lowly system timer. Channel 2 would be the other
- * likely candidate, but we leave it alone as it has higher divisors that
- * would be of more use to other more interesting applications.
- *
- * TODO: Presently we only implement a 16-bit single-channel system timer.
- * However, we can implement channel cascade if we go the overflow route and
- * get away with using 2 MTU2 channels as a 32-bit timer.
- */
-#define MTU2_TSTR	0xfffe4280
-#define MTU2_TCR_1	0xfffe4380
-#define MTU2_TMDR_1	0xfffe4381
-#define MTU2_TIOR_1	0xfffe4382
-#define MTU2_TIER_1	0xfffe4384
-#define MTU2_TSR_1	0xfffe4385
-#define MTU2_TCNT_1	0xfffe4386	/* 16-bit counter */
-
-#if defined(CONFIG_CPU_SUBTYPE_SH7201) || \
-    defined(CONFIG_CPU_SUBTYPE_SH7203)
-#define MTU2_TGRA_1	0xfffe4388
-#else
-#define MTU2_TGRA_1	0xfffe438a
-#endif
-
-#define STBCR3		0xfffe0408
-
-#define MTU2_TSTR_CST1	(1 << 1)	/* Counter Start 1 */
-
-#define MTU2_TSR_TGFA	(1 << 0)	/* GRA compare match */
-
-#define MTU2_TIER_TGIEA	(1 << 0)	/* GRA compare match  interrupt enable */
-
-#define MTU2_TCR_INIT	0x22
-
-#define MTU2_TCR_CALIB  0x00
-
-static unsigned long mtu2_timer_get_offset(void)
-{
-	int count;
-	static int count_p = 0x7fff;	/* for the first call after boot */
-	static unsigned long jiffies_p = 0;
-
-	/*
-	 * cache volatile jiffies temporarily; we have IRQs turned off.
-	 */
-	unsigned long jiffies_t;
-
-	/* timer count may underflow right here */
-	count = ctrl_inw(MTU2_TCNT_1);	/* read the latched count */
-
-	jiffies_t = jiffies;
-
-	/*
-	 * avoiding timer inconsistencies (they are rare, but they happen)...
-	 * there is one kind of problem that must be avoided here:
-	 *  1. the timer counter underflows
-	 */
-
-	if (jiffies_t == jiffies_p) {
-		if (count > count_p) {
-			if (ctrl_inb(MTU2_TSR_1) & MTU2_TSR_TGFA) {
-				count -= LATCH;
-			} else {
-				printk("%s (): hardware timer problem?\n",
-				       __func__);
-			}
-		}
-	} else
-		jiffies_p = jiffies_t;
-
-	count_p = count;
-
-	count = ((LATCH-1) - count) * TICK_SIZE;
-	count = (count + LATCH/2) / LATCH;
-
-	return count;
-}
-
-static irqreturn_t mtu2_timer_interrupt(int irq, void *dev_id)
-{
-	unsigned long timer_status;
-
-	/* Clear TGFA bit */
-	timer_status = ctrl_inb(MTU2_TSR_1);
-	timer_status &= ~MTU2_TSR_TGFA;
-	ctrl_outb(timer_status, MTU2_TSR_1);
-
-	/* Do timer tick */
-	handle_timer_tick();
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction mtu2_irq = {
-	.name		= "timer",
-	.handler	= mtu2_timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
-};
-
-static unsigned int divisors[] = { 1, 4, 16, 64, 1, 1, 256 };
-
-static void mtu2_clk_init(struct clk *clk)
-{
-	u8 idx = MTU2_TCR_INIT & 0x7;
-
-	clk->rate = clk->parent->rate / divisors[idx];
-	/* Start TCNT counting */
-	ctrl_outb(ctrl_inb(MTU2_TSTR) | MTU2_TSTR_CST1, MTU2_TSTR);
-
-}
-
-static void mtu2_clk_recalc(struct clk *clk)
-{
-	u8 idx = ctrl_inb(MTU2_TCR_1) & 0x7;
-	clk->rate = clk->parent->rate / divisors[idx];
-}
-
-static struct clk_ops mtu2_clk_ops = {
-	.init		= mtu2_clk_init,
-	.recalc		= mtu2_clk_recalc,
-};
-
-static struct clk mtu2_clk1 = {
-	.name		= "mtu2_clk1",
-	.ops		= &mtu2_clk_ops,
-};
-
-static int mtu2_timer_start(void)
-{
-	ctrl_outb(ctrl_inb(MTU2_TSTR) | MTU2_TSTR_CST1, MTU2_TSTR);
-	return 0;
-}
-
-static int mtu2_timer_stop(void)
-{
-	ctrl_outb(ctrl_inb(MTU2_TSTR) & ~MTU2_TSTR_CST1, MTU2_TSTR);
-	return 0;
-}
-
-static int mtu2_timer_init(void)
-{
-	unsigned long interval;
-
-	setup_irq(CONFIG_SH_TIMER_IRQ, &mtu2_irq);
-
-	mtu2_clk1.parent = clk_get(NULL, "module_clk");
-
-	ctrl_outb(ctrl_inb(STBCR3) & (~0x20), STBCR3);
-
-	/* Normal operation */
-	ctrl_outb(0, MTU2_TMDR_1);
-	ctrl_outb(MTU2_TCR_INIT, MTU2_TCR_1);
-	ctrl_outb(0x01, MTU2_TIOR_1);
-
-	/* Enable underflow interrupt */
-	ctrl_outb(ctrl_inb(MTU2_TIER_1) | MTU2_TIER_TGIEA, MTU2_TIER_1);
-
-	interval = CONFIG_SH_PCLK_FREQ / 16 / HZ;
-	printk(KERN_INFO "Interval = %ld\n", interval);
-
-	ctrl_outw(interval, MTU2_TGRA_1);
-	ctrl_outw(0, MTU2_TCNT_1);
-
-	clk_register(&mtu2_clk1);
-	clk_enable(&mtu2_clk1);
-
-	return 0;
-}
-
-struct sys_timer_ops mtu2_timer_ops = {
-	.init		= mtu2_timer_init,
-	.start		= mtu2_timer_start,
-	.stop		= mtu2_timer_stop,
-#ifndef CONFIG_GENERIC_TIME
-	.get_offset	= mtu2_timer_get_offset,
-#endif
-};
-
-struct sys_timer mtu2_timer = {
-	.name	= "mtu2",
-	.ops	= &mtu2_timer_ops,
-};
diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c
deleted file mode 100644
index fe8d893..0000000
--- a/arch/sh/kernel/timers/timer-tmu.c
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * arch/sh/kernel/timers/timer-tmu.c - TMU Timer Support
- *
- *  Copyright (C) 2005 - 2007  Paul Mundt
- *
- * TMU handling code hacked out of arch/sh/kernel/time.c
- *
- *  Copyright (C) 1999  Tetsuya Okada & Niibe Yutaka
- *  Copyright (C) 2000  Philipp Rumpf <prumpf@tux.org>
- *  Copyright (C) 2002, 2003, 2004  Paul Mundt
- *  Copyright (C) 2002  M. R. Brown  <mrbrown@linux-sh.org>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/seqlock.h>
-#include <linux/clockchips.h>
-#include <asm/timer.h>
-#include <asm/rtc.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/clock.h>
-
-#define TMU_TOCR_INIT	0x00
-#define TMU_TCR_INIT	0x0020
-
-#define TMU0		(0)
-#define TMU1		(1)
-
-static inline void _tmu_start(int tmu_num)
-{
-	ctrl_outb(ctrl_inb(TMU_012_TSTR) | (0x1<<tmu_num), TMU_012_TSTR);
-}
-
-static inline void _tmu_set_irq(int tmu_num, int enabled)
-{
-	register unsigned long tmu_tcr = TMU0_TCR + (0xc*tmu_num);
-	ctrl_outw( (enabled ? ctrl_inw(tmu_tcr) | (1<<5) : ctrl_inw(tmu_tcr) & ~(1<<5)), tmu_tcr);
-}
-
-static inline void _tmu_stop(int tmu_num)
-{
-	ctrl_outb(ctrl_inb(TMU_012_TSTR) & ~(0x1<<tmu_num), TMU_012_TSTR);
-}
-
-static inline void _tmu_clear_status(int tmu_num)
-{
-	register unsigned long tmu_tcr = TMU0_TCR + (0xc*tmu_num);
-	/* Clear UNF bit */
-	ctrl_outw(ctrl_inw(tmu_tcr) & ~0x100, tmu_tcr);
-}
-
-static inline unsigned long _tmu_read(int tmu_num)
-{
-        return ctrl_inl(TMU0_TCNT+0xC*tmu_num);
-}
-
-static int tmu_timer_start(void)
-{
-	_tmu_start(TMU0);
-	_tmu_start(TMU1);
-	_tmu_set_irq(TMU0,1);
-	return 0;
-}
-
-static int tmu_timer_stop(void)
-{
-	_tmu_stop(TMU0);
-	_tmu_stop(TMU1);
-	_tmu_clear_status(TMU0);
-	return 0;
-}
-
-/*
- * also when the module_clk is scaled the TMU1
- * will show the same frequency
- */
-static int tmus_are_scaled;
-
-static cycle_t tmu_timer_read(struct clocksource *cs)
-{
-	return ((cycle_t)(~_tmu_read(TMU1)))<<tmus_are_scaled;
-}
-
-
-static unsigned long tmu_latest_interval[3];
-static void tmu_timer_set_interval(int tmu_num, unsigned long interval, unsigned int reload)
-{
-	unsigned long tmu_tcnt = TMU0_TCNT + tmu_num*0xC;
-	unsigned long tmu_tcor = TMU0_TCOR + tmu_num*0xC;
-
-	_tmu_stop(tmu_num);
-
-	ctrl_outl(interval, tmu_tcnt);
-	tmu_latest_interval[tmu_num] = interval;
-
-	/*
-	 * TCNT reloads from TCOR on underflow, clear it if we don't
-	 * intend to auto-reload
-	 */
-	ctrl_outl( reload ? interval : 0 , tmu_tcor);
-
-	_tmu_start(tmu_num);
-}
-
-static int tmu_set_next_event(unsigned long cycles,
-			      struct clock_event_device *evt)
-{
-	tmu_timer_set_interval(TMU0,cycles, evt->mode == CLOCK_EVT_MODE_PERIODIC);
-	_tmu_set_irq(TMU0,1);
-	return 0;
-}
-
-static void tmu_set_mode(enum clock_event_mode mode,
-			 struct clock_event_device *evt)
-{
-	switch (mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-		ctrl_outl(tmu_latest_interval[TMU0], TMU0_TCOR);
-		break;
-	case CLOCK_EVT_MODE_ONESHOT:
-		ctrl_outl(0, TMU0_TCOR);
-		break;
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-	case CLOCK_EVT_MODE_RESUME:
-		break;
-	}
-}
-
-static struct clock_event_device tmu0_clockevent = {
-	.name		= "tmu0",
-	.shift		= 32,
-	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
-	.set_mode	= tmu_set_mode,
-	.set_next_event	= tmu_set_next_event,
-};
-
-static irqreturn_t tmu_timer_interrupt(int irq, void *dummy)
-{
-	struct clock_event_device *evt = &tmu0_clockevent;
-	_tmu_clear_status(TMU0);
-	_tmu_set_irq(TMU0,tmu0_clockevent.mode != CLOCK_EVT_MODE_ONESHOT);
-
-	switch (tmu0_clockevent.mode) {
-	case CLOCK_EVT_MODE_ONESHOT:
-	case CLOCK_EVT_MODE_PERIODIC:
-		evt->event_handler(evt);
-		break;
-	default:
-		break;
-	}
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction tmu0_irq = {
-	.name		= "periodic/oneshot timer",
-	.handler	= tmu_timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
-};
-
-static void __init tmu_clk_init(struct clk *clk)
-{
-	u8 divisor  = TMU_TCR_INIT & 0x7;
-	int tmu_num = clk->name[3]-'0';
-	ctrl_outw(TMU_TCR_INIT, TMU0_TCR+(tmu_num*0xC));
-	clk->rate = clk_get_rate(clk->parent) / (4 << (divisor << 1));
-}
-
-static void tmu_clk_recalc(struct clk *clk)
-{
-	int tmu_num = clk->name[3]-'0';
-	unsigned long prev_rate = clk_get_rate(clk);
-	unsigned long flags;
-	u8 divisor = ctrl_inw(TMU0_TCR+tmu_num*0xC) & 0x7;
-	clk->rate  = clk_get_rate(clk->parent) / (4 << (divisor << 1));
-
-	if(prev_rate==clk_get_rate(clk))
-		return;
-
-	if(tmu_num)
-		return; /* No more work on TMU1 */
-
-	local_irq_save(flags);
-	tmus_are_scaled = (prev_rate > clk->rate);
-
-	_tmu_stop(TMU0);
-
-	tmu0_clockevent.mult = div_sc(clk->rate, NSEC_PER_SEC,
-				tmu0_clockevent.shift);
-	tmu0_clockevent.max_delta_ns =
-			clockevent_delta2ns(-1, &tmu0_clockevent);
-	tmu0_clockevent.min_delta_ns =
-			clockevent_delta2ns(1, &tmu0_clockevent);
-
-	if (tmus_are_scaled)
-		tmu_latest_interval[TMU0] >>= 1;
-	else
-		tmu_latest_interval[TMU0] <<= 1;
-
-	tmu_timer_set_interval(TMU0,
-		tmu_latest_interval[TMU0],
-		tmu0_clockevent.mode == CLOCK_EVT_MODE_PERIODIC);
-
-	_tmu_start(TMU0);
-
-	local_irq_restore(flags);
-}
-
-static struct clk_ops tmu_clk_ops = {
-	.init		= tmu_clk_init,
-	.recalc		= tmu_clk_recalc,
-};
-
-static struct clk tmu0_clk = {
-	.name		= "tmu0_clk",
-	.ops		= &tmu_clk_ops,
-};
-
-static struct clk tmu1_clk = {
-	.name		= "tmu1_clk",
-	.ops		= &tmu_clk_ops,
-};
-
-static int tmu_timer_init(void)
-{
-	unsigned long interval;
-	unsigned long frequency;
-
-	setup_irq(CONFIG_SH_TIMER_IRQ, &tmu0_irq);
-
-	tmu0_clk.parent = clk_get(NULL, "module_clk");
-	tmu1_clk.parent = clk_get(NULL, "module_clk");
-
-	tmu_timer_stop();
-
-#if !defined(CONFIG_CPU_SUBTYPE_SH7720) && \
-    !defined(CONFIG_CPU_SUBTYPE_SH7721) && \
-    !defined(CONFIG_CPU_SUBTYPE_SH7760) && \
-    !defined(CONFIG_CPU_SUBTYPE_SH7785) && \
-    !defined(CONFIG_CPU_SUBTYPE_SH7786) && \
-    !defined(CONFIG_CPU_SUBTYPE_SHX3)
-	ctrl_outb(TMU_TOCR_INIT, TMU_TOCR);
-#endif
-
-	clk_register(&tmu0_clk);
-	clk_register(&tmu1_clk);
-	clk_enable(&tmu0_clk);
-	clk_enable(&tmu1_clk);
-
-	frequency = clk_get_rate(&tmu0_clk);
-	interval = (frequency + HZ / 2) / HZ;
-
-	tmu_timer_set_interval(TMU0,interval, 1);
-	tmu_timer_set_interval(TMU1,~0,1);
-
-	_tmu_start(TMU1);
-
-	clocksource_sh.rating = 200;
-	clocksource_sh.mask = CLOCKSOURCE_MASK(32);
-	clocksource_sh.read = tmu_timer_read;
-	clocksource_sh.shift = 10;
-	clocksource_sh.mult = clocksource_hz2mult(clk_get_rate(&tmu1_clk),
-						  clocksource_sh.shift);
-	clocksource_sh.flags = CLOCK_SOURCE_IS_CONTINUOUS;
-	clocksource_register(&clocksource_sh);
-
-	tmu0_clockevent.mult = div_sc(frequency, NSEC_PER_SEC,
-				      tmu0_clockevent.shift);
-	tmu0_clockevent.max_delta_ns =
-			clockevent_delta2ns(-1, &tmu0_clockevent);
-	tmu0_clockevent.min_delta_ns =
-			clockevent_delta2ns(1, &tmu0_clockevent);
-
-	tmu0_clockevent.cpumask = cpumask_of(0);
-	tmu0_clockevent.rating = 100;
-
-	clockevents_register_device(&tmu0_clockevent);
-
-	return 0;
-}
-
-static struct sys_timer_ops tmu_timer_ops = {
-	.init		= tmu_timer_init,
-	.start		= tmu_timer_start,
-	.stop		= tmu_timer_stop,
-};
-
-struct sys_timer tmu_timer = {
-	.name	= "tmu",
-	.ops	= &tmu_timer_ops,
-};
diff --git a/arch/sh/kernel/timers/timer.c b/arch/sh/kernel/timers/timer.c
deleted file mode 100644
index 4e7e747..0000000
--- a/arch/sh/kernel/timers/timer.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * arch/sh/kernel/timers/timer.c - Common timer code
- *
- *  Copyright (C) 2005  Paul Mundt
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <asm/timer.h>
-
-static struct sys_timer *sys_timers[] = {
-#ifdef CONFIG_SH_TMU
-	&tmu_timer,
-#endif
-#ifdef CONFIG_SH_MTU2
-	&mtu2_timer,
-#endif
-#ifdef CONFIG_SH_CMT
-	&cmt_timer,
-#endif
-	NULL,
-};
-
-static char timer_override[10];
-static int __init timer_setup(char *str)
-{
-	if (str)
-		strlcpy(timer_override, str, sizeof(timer_override));
-	return 1;
-}
-__setup("timer=", timer_setup);
-
-struct sys_timer *get_sys_timer(void)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(sys_timers); i++) {
-		struct sys_timer *t = sys_timers[i];
-
-		if (unlikely(!t))
-			break;
-		if (unlikely(timer_override[0]))
-			if ((strcmp(timer_override, t->name) != 0))
-				continue;
-		if (likely(t->ops->init() == 0))
-			return t;
-	}
-
-	return NULL;
-}
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index 438f1eb..46348ed0 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -22,11 +22,11 @@
 
 int is_valid_bugaddr(unsigned long addr)
 {
-	unsigned short opcode;
+	insn_size_t opcode;
 
 	if (addr < PAGE_OFFSET)
 		return 0;
-	if (probe_kernel_address((u16 *)addr, opcode))
+	if (probe_kernel_address((insn_size_t *)addr, opcode))
 		return 0;
 
 	return opcode == TRAPA_BUG_OPCODE;
@@ -66,7 +66,7 @@
 
 #ifdef CONFIG_BUG
 	if (__kernel_text_address(instruction_pointer(regs))) {
-		opcode_t insn = *(opcode_t *)instruction_pointer(regs);
+		insn_size_t insn = *(insn_size_t *)instruction_pointer(regs);
 		if (insn == TRAPA_BUG_OPCODE)
 			handle_BUG(regs);
 	}
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 30ca9c5..2b77277 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -34,6 +34,7 @@
 # define TRAP_ILLEGAL_SLOT_INST	6
 # define TRAP_ADDRESS_ERROR	9
 # ifdef CONFIG_CPU_SH2A
+#  define TRAP_UBC		12
 #  define TRAP_FPU_ERROR	13
 #  define TRAP_DIVZERO_ERROR	17
 #  define TRAP_DIVOVF_ERROR	18
@@ -176,7 +177,7 @@
  *   (if that instruction is in a branch delay slot)
  * - return 0 if emulation okay, -EFAULT on existential error
  */
-static int handle_unaligned_ins(opcode_t instruction, struct pt_regs *regs,
+static int handle_unaligned_ins(insn_size_t instruction, struct pt_regs *regs,
 				struct mem_access *ma)
 {
 	int ret, index, count;
@@ -321,10 +322,10 @@
  * - fetches the instruction from PC+2
  */
 static inline int handle_delayslot(struct pt_regs *regs,
-				   opcode_t old_instruction,
+				   insn_size_t old_instruction,
 				   struct mem_access *ma)
 {
-	opcode_t instruction;
+	insn_size_t instruction;
 	void __user *addr = (void __user *)(regs->pc +
 		instruction_size(old_instruction));
 
@@ -364,7 +365,7 @@
 
 static int handle_unaligned_notify_count = 10;
 
-int handle_unaligned_access(opcode_t instruction, struct pt_regs *regs,
+int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
 			    struct mem_access *ma)
 {
 	u_int rm;
@@ -522,7 +523,7 @@
 	unsigned long error_code = 0;
 	mm_segment_t oldfs;
 	siginfo_t info;
-	opcode_t instruction;
+	insn_size_t instruction;
 	int tmp;
 
 	/* Intentional ifdef */
@@ -849,6 +850,10 @@
 #endif
 #endif
 
+#ifdef TRAP_UBC
+	set_exception_table_vec(TRAP_UBC, break_point_trap);
+#endif
+
 	/* Setup VBR for boot cpu */
 	per_cpu_trap_init();
 }
diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c
index a85831c..267e5eb 100644
--- a/arch/sh/kernel/traps_64.c
+++ b/arch/sh/kernel/traps_64.c
@@ -370,7 +370,6 @@
 		return -1;
 	}
 
-#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
 	/* Check accessible.  For misaligned access in the kernel, assume the
 	   address is always accessible (and if not, just fault when the
 	   load/store gets done.) */
@@ -380,18 +379,13 @@
 		}
 		/* Do access_ok check later - it depends on whether it's a load or a store. */
 	}
-#endif
 
 	*address = addr;
 	return 0;
 }
 
-/* Default value as for sh */
-#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
 static int user_mode_unaligned_fixup_count = 10;
 static int user_mode_unaligned_fixup_enable = 1;
-#endif
-
 static int kernel_mode_unaligned_fixup_count = 32;
 
 static void misaligned_kernel_word_load(__u64 address, int do_sign_extend, __u64 *result)
@@ -440,7 +434,6 @@
 	}
 
 	destreg = (opcode >> 4) & 0x3f;
-#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
 	if (user_mode(regs)) {
 		__u64 buffer;
 
@@ -470,9 +463,7 @@
 				width_shift, (unsigned long) regs->pc);
 			break;
 		}
-	} else
-#endif
-	{
+	} else {
 		/* kernel mode - we can take short cuts since if we fault, it's a genuine bug */
 		__u64 lo, hi;
 
@@ -519,7 +510,6 @@
 	}
 
 	srcreg = (opcode >> 4) & 0x3f;
-#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
 	if (user_mode(regs)) {
 		__u64 buffer;
 
@@ -546,9 +536,7 @@
 		if (__copy_user((void *)(int)address, &buffer, (1 << width_shift)) > 0) {
 			return -1; /* fault */
 		}
-	} else
-#endif
-	{
+	} else {
 		/* kernel mode - we can take short cuts since if we fault, it's a genuine bug */
 		__u64 val = regs->regs[srcreg];
 
@@ -576,7 +564,6 @@
 
 }
 
-#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
 /* Never need to fix up misaligned FPU accesses within the kernel since that's a real
    error. */
 static int misaligned_fpu_load(struct pt_regs *regs,
@@ -727,7 +714,6 @@
 		return -1;
 	}
 }
-#endif
 
 static int misaligned_fixup(struct pt_regs *regs)
 {
@@ -735,12 +721,8 @@
 	int error;
 	int major, minor;
 
-#if !defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
-	/* Never fixup user mode misaligned accesses without this option enabled. */
-	return -1;
-#else
-	if (!user_mode_unaligned_fixup_enable) return -1;
-#endif
+	if (!user_mode_unaligned_fixup_enable)
+		return -1;
 
 	error = read_opcode(regs->pc, &opcode, user_mode(regs));
 	if (error < 0) {
@@ -749,15 +731,12 @@
 	major = (opcode >> 26) & 0x3f;
 	minor = (opcode >> 16) & 0xf;
 
-#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
 	if (user_mode(regs) && (user_mode_unaligned_fixup_count > 0)) {
 		--user_mode_unaligned_fixup_count;
 		/* Only do 'count' worth of these reports, to remove a potential DoS against syslog */
 		printk("Fixing up unaligned userspace access in \"%s\" pid=%d pc=0x%08x ins=0x%08lx\n",
 		       current->comm, task_pid_nr(current), (__u32)regs->pc, opcode);
-	} else
-#endif
-	if (!user_mode(regs) && (kernel_mode_unaligned_fixup_count > 0)) {
+	} else if (!user_mode(regs) && (kernel_mode_unaligned_fixup_count > 0)) {
 		--kernel_mode_unaligned_fixup_count;
 		if (in_interrupt()) {
 			printk("Fixing up unaligned kernelspace access in interrupt pc=0x%08x ins=0x%08lx\n",
@@ -830,7 +809,6 @@
 			}
 			break;
 
-#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
 		case (0x94>>2): /* FLD.S */
 			error = misaligned_fpu_load(regs, opcode, 1, 2, 0);
 			break;
@@ -881,7 +859,6 @@
 				break;
 			}
 			break;
-#endif
 
 		default:
 			/* Fault */
@@ -907,7 +884,6 @@
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
-#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "user_reports",
@@ -923,7 +899,6 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec},
-#endif
 	{}
 };
 
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index d7d4991..f53c76a 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -1,5 +1,178 @@
-#ifdef CONFIG_SUPERH32
-# include "vmlinux_32.lds.S"
+/*
+ * ld script to make SuperH Linux kernel
+ * Written by Niibe Yutaka and Paul Mundt
+ */
+#ifdef CONFIG_SUPERH64
+#define LOAD_OFFSET	CONFIG_PAGE_OFFSET
+OUTPUT_ARCH(sh:sh5)
 #else
-# include "vmlinux_64.lds.S"
+#define LOAD_OFFSET	0
+OUTPUT_ARCH(sh)
 #endif
+
+#include <asm/thread_info.h>
+#include <asm/cache.h>
+#include <asm-generic/vmlinux.lds.h>
+
+ENTRY(_start)
+SECTIONS
+{
+#ifdef CONFIG_PMB_FIXED
+	. = CONFIG_PAGE_OFFSET + (CONFIG_MEMORY_START & 0x1fffffff) +
+	    CONFIG_ZERO_PAGE_OFFSET;
+#elif defined(CONFIG_32BIT)
+	. = CONFIG_PAGE_OFFSET + CONFIG_ZERO_PAGE_OFFSET;
+#else
+	. = CONFIG_PAGE_OFFSET + CONFIG_MEMORY_START + CONFIG_ZERO_PAGE_OFFSET;
+#endif
+
+	_text = .;		/* Text and read-only data */
+
+	.empty_zero_page : AT(ADDR(.empty_zero_page) - LOAD_OFFSET) {
+		*(.empty_zero_page)
+	} = 0
+
+	.text : AT(ADDR(.text) - LOAD_OFFSET) {
+		HEAD_TEXT
+		TEXT_TEXT
+
+#ifdef CONFIG_SUPERH64
+		*(.text64)
+		*(.text..SHmedia32)
+#endif
+
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		IRQENTRY_TEXT
+		*(.fixup)
+		*(.gnu.warning)
+		_etext = .;		/* End of text section */
+	} = 0x0009
+
+	. = ALIGN(16);		/* Exception table */
+	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+		__start___ex_table = .;
+		*(__ex_table)
+		__stop___ex_table = .;
+	}
+
+	NOTES
+	RO_DATA(PAGE_SIZE)
+
+	/*
+	 * Code which must be executed uncached and the associated data
+	 */
+	. = ALIGN(PAGE_SIZE);
+	.uncached : AT(ADDR(.uncached) - LOAD_OFFSET) {
+		__uncached_start = .;
+		*(.uncached.text)
+		*(.uncached.data)
+		__uncached_end = .;
+	}
+
+	. = ALIGN(THREAD_SIZE);
+	.data : AT(ADDR(.data) - LOAD_OFFSET) {		/* Data */
+		*(.data.init_task)
+
+		. = ALIGN(L1_CACHE_BYTES);
+		*(.data.cacheline_aligned)
+
+		. = ALIGN(L1_CACHE_BYTES);
+		*(.data.read_mostly)
+
+		. = ALIGN(PAGE_SIZE);
+		*(.data.page_aligned)
+
+		__nosave_begin = .;
+		*(.data.nosave)
+		. = ALIGN(PAGE_SIZE);
+		__nosave_end = .;
+
+		DATA_DATA
+		CONSTRUCTORS
+	}
+
+	_edata = .;			/* End of data section */
+
+	. = ALIGN(PAGE_SIZE);		/* Init code and data */
+	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+		__init_begin = .;
+		_sinittext = .;
+		INIT_TEXT
+		_einittext = .;
+	}
+
+	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { INIT_DATA }
+
+	. = ALIGN(16);
+	.init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+		__setup_start = .;
+		*(.init.setup)
+		__setup_end = .;
+	}
+
+	.initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
+		__initcall_start = .;
+		INITCALLS
+		__initcall_end = .;
+	}
+
+	.con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+		__con_initcall_start = .;
+		*(.con_initcall.init)
+		__con_initcall_end = .;
+	}
+
+	SECURITY_INIT
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	. = ALIGN(PAGE_SIZE);
+	.init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
+		__initramfs_start = .;
+		*(.init.ramfs)
+		__initramfs_end = .;
+	}
+#endif
+
+	. = ALIGN(4);
+	.machvec.init : AT(ADDR(.machvec.init) - LOAD_OFFSET) {
+		__machvec_start = .;
+		*(.machvec.init)
+		__machvec_end = .;
+	}
+
+	PERCPU(PAGE_SIZE)
+
+	/*
+	 * .exit.text is discarded at runtime, not link time, to deal with
+	 * references from __bug_table
+	 */
+	.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { EXIT_TEXT }
+	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { EXIT_DATA }
+
+	. = ALIGN(PAGE_SIZE);
+	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+		__init_end = .;
+		__bss_start = .;		/* BSS */
+		*(.bss.page_aligned)
+		*(.bss)
+		*(COMMON)
+		. = ALIGN(4);
+		_ebss = .;			/* uClinux MTD sucks */
+		_end = . ;
+	}
+
+	/*
+	 * When something in the kernel is NOT compiled as a module, the
+	 * module cleanup code and data are put into these segments. Both
+	 * can then be thrown away, as cleanup code is never called unless
+	 * it's a module.
+	 */
+	/DISCARD/ : {
+		*(.exitcall.exit)
+	}
+
+	STABS_DEBUG
+	DWARF_DEBUG
+}
diff --git a/arch/sh/kernel/vmlinux_32.lds.S b/arch/sh/kernel/vmlinux_32.lds.S
deleted file mode 100644
index dd9b2ee..0000000
--- a/arch/sh/kernel/vmlinux_32.lds.S
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * ld script to make SuperH Linux kernel
- * Written by Niibe Yutaka
- */
-#include <asm/thread_info.h>
-#include <asm/cache.h>
-#include <asm-generic/vmlinux.lds.h>
-
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-OUTPUT_FORMAT("elf32-sh-linux", "elf32-sh-linux", "elf32-sh-linux")
-#else
-OUTPUT_FORMAT("elf32-shbig-linux", "elf32-shbig-linux", "elf32-shbig-linux")
-#endif
-OUTPUT_ARCH(sh)
-ENTRY(_start)
-SECTIONS
-{
-#ifdef CONFIG_PMB_FIXED
-	. = CONFIG_PAGE_OFFSET + (CONFIG_MEMORY_START & 0x1fffffff) +
-	    CONFIG_ZERO_PAGE_OFFSET;
-#elif defined(CONFIG_32BIT)
-	. = CONFIG_PAGE_OFFSET + CONFIG_ZERO_PAGE_OFFSET;
-#else
-	. = CONFIG_PAGE_OFFSET + CONFIG_MEMORY_START + CONFIG_ZERO_PAGE_OFFSET;
-#endif
-
-	_text = .;			/* Text and read-only data */
-
-	.empty_zero_page : {
-		*(.empty_zero_page)
-	} = 0
-
-	.text : {
-		HEAD_TEXT
-		TEXT_TEXT
-		SCHED_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		*(.fixup)
-		*(.gnu.warning)
-	} = 0x0009
-
-	. = ALIGN(16);		/* Exception table */
-	__start___ex_table = .;
-	__ex_table : { *(__ex_table) }
-	__stop___ex_table = .;
-
-	_etext = .;			/* End of text section */
-
-	NOTES
-	RO_DATA(PAGE_SIZE)
-
-	/*
-	 * Code which must be executed uncached and the associated data
-	 */
-	. = ALIGN(PAGE_SIZE);
-	__uncached_start = .;
-	.uncached.text : { *(.uncached.text) }
-	.uncached.data : { *(.uncached.data) }
-	__uncached_end = .;
-
-	. = ALIGN(THREAD_SIZE);
-	.data : {			/* Data */
-		*(.data.init_task)
-
-		. = ALIGN(L1_CACHE_BYTES);
-		*(.data.cacheline_aligned)
-
-		. = ALIGN(L1_CACHE_BYTES);
-		*(.data.read_mostly)
-
-		. = ALIGN(PAGE_SIZE);
-		*(.data.page_aligned)
-
-		__nosave_begin = .;
-		*(.data.nosave)
-		. = ALIGN(PAGE_SIZE);
-		__nosave_end = .;
-
-		DATA_DATA
-		CONSTRUCTORS
-	}
-
-	_edata = .;			/* End of data section */
-
-	. = ALIGN(PAGE_SIZE);		/* Init code and data */
-	__init_begin = .;
-	_sinittext = .;
-	.init.text : { INIT_TEXT }
-	_einittext = .;
-	.init.data : { INIT_DATA }
-
-	. = ALIGN(16);
-	__setup_start = .;
-	.init.setup : { *(.init.setup) }
-	__setup_end = .;
-
-	__initcall_start = .;
-	.initcall.init : {
-		INITCALLS
-	}
-	__initcall_end = .;
-	__con_initcall_start = .;
-	.con_initcall.init : { *(.con_initcall.init) }
-	__con_initcall_end = .;
-
-	SECURITY_INIT
-
-#ifdef CONFIG_BLK_DEV_INITRD
-	. = ALIGN(PAGE_SIZE);
-	__initramfs_start = .;
-	.init.ramfs : { *(.init.ramfs) }
-	__initramfs_end = .;
-#endif
-
-	. = ALIGN(4);
-	__machvec_start = .;
-	.machvec.init : { *(.machvec.init) }
-	__machvec_end = .;
-
-	PERCPU(PAGE_SIZE)
-
-	/*
-	 * .exit.text is discarded at runtime, not link time, to deal with
-	 * references from __bug_table
-	 */
-	.exit.text : { EXIT_TEXT }
-	.exit.data : { EXIT_DATA }
-
-	. = ALIGN(PAGE_SIZE);
-	.bss : {
-		__init_end = .;
-		__bss_start = .;		/* BSS */
-		*(.bss.page_aligned)
-		*(.bss)
-		*(COMMON)
-		. = ALIGN(4);
-		_ebss = .;			/* uClinux MTD sucks */
-		_end = . ;
-	}
-
-	/*
-	 * When something in the kernel is NOT compiled as a module, the
-	 * module cleanup code and data are put into these segments. Both
-	 * can then be thrown away, as cleanup code is never called unless
-	 * it's a module.
-	 */
-	/DISCARD/ : {
-		*(.exitcall.exit)
-	}
-
-	STABS_DEBUG
-	DWARF_DEBUG
-}
diff --git a/arch/sh/kernel/vmlinux_64.lds.S b/arch/sh/kernel/vmlinux_64.lds.S
deleted file mode 100644
index 6966446..0000000
--- a/arch/sh/kernel/vmlinux_64.lds.S
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * ld script to make SH64 Linux kernel
- *
- * Copyright (C) 2000, 2001  Paolo Alberelli
- *
- * benedict.gaster@superh.com:	 2nd May 2002
- *    Add definition of empty_zero_page to be the first page of kernel image.
- *
- * benedict.gaster@superh.com:	 3rd May 2002
- *    Added support for ramdisk, removing statically linked romfs at the
- *    same time.
- *
- * lethal@linux-sh.org:          9th May 2003
- *    Kill off GLOBAL_NAME() usage and other CDC-isms.
- *
- * lethal@linux-sh.org:         19th May 2003
- *    Remove support for ancient toolchains.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <asm/page.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-
-#define LOAD_OFFSET	CONFIG_PAGE_OFFSET
-#include <asm-generic/vmlinux.lds.h>
-
-OUTPUT_ARCH(sh:sh5)
-
-#define C_PHYS(x) AT (ADDR(x) - LOAD_OFFSET)
-
-ENTRY(__start)
-SECTIONS
-{
-	. = CONFIG_PAGE_OFFSET + CONFIG_MEMORY_START + PAGE_SIZE;
-	_text = .;			/* Text and read-only data */
-
-	.empty_zero_page : C_PHYS(.empty_zero_page) {
-		*(.empty_zero_page)
-	} = 0
-
-	.text : C_PHYS(.text) {
-		HEAD_TEXT
-		TEXT_TEXT
-		*(.text64)
-		*(.text..SHmedia32)
-		SCHED_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		*(.fixup)
-		*(.gnu.warning)
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-	} = 0x6ff0fff0
-#else
-	} = 0xf0fff06f
-#endif
-
-	/* We likely want __ex_table to be Cache Line aligned */
-	. = ALIGN(L1_CACHE_BYTES);		/* Exception table */
-	__start___ex_table = .;
-	__ex_table : C_PHYS(__ex_table) { *(__ex_table) }
-	__stop___ex_table = .;
-
-	_etext = .;			/* End of text section */
-
-	NOTES 
-	RO_DATA(PAGE_SIZE)
-
-	. = ALIGN(THREAD_SIZE);
-	.data : C_PHYS(.data) {			/* Data */
-		*(.data.init_task)
-
-		. = ALIGN(L1_CACHE_BYTES);
-		*(.data.cacheline_aligned)
-
-		. = ALIGN(L1_CACHE_BYTES);
-		*(.data.read_mostly)
-
-		. = ALIGN(PAGE_SIZE);
-		*(.data.page_aligned)
-
-		__nosave_begin = .;
-		*(.data.nosave)
-		. = ALIGN(PAGE_SIZE);
-		__nosave_end = .;
-
-		DATA_DATA
-		CONSTRUCTORS
-	}
-
-	_edata = .;			/* End of data section */
-
-	. = ALIGN(PAGE_SIZE);		/* Init code and data */
-	__init_begin = .;
-	_sinittext = .;
-	.init.text : C_PHYS(.init.text) { INIT_TEXT }
-	_einittext = .;
-	.init.data : C_PHYS(.init.data) { INIT_DATA }
-	. = ALIGN(L1_CACHE_BYTES);	/* Better if Cache Line aligned */
-	__setup_start = .;
-	.init.setup : C_PHYS(.init.setup) { *(.init.setup) }
-	__setup_end = .;
-	__initcall_start = .;
-	.initcall.init : C_PHYS(.initcall.init) {
-		INITCALLS
-	}
-	__initcall_end = .;
-	__con_initcall_start = .;
-	.con_initcall.init : C_PHYS(.con_initcall.init) {
-		*(.con_initcall.init)
-	}
-	__con_initcall_end = .;
-
-	SECURITY_INIT
-
-#ifdef CONFIG_BLK_DEV_INITRD
-	. = ALIGN(PAGE_SIZE);
-	__initramfs_start = .;
-	.init.ramfs : C_PHYS(.init.ramfs) { *(.init.ramfs) }
-	__initramfs_end = .;
-#endif
-
-	. = ALIGN(8);
-	__machvec_start = .;
-	.machvec.init : C_PHYS(.machvec.init) { *(.machvec.init) }
-	__machvec_end = .;
-
-	PERCPU(PAGE_SIZE)
-
-	/*
-	 * .exit.text is discarded at runtime, not link time, to deal with
-	 * references from __bug_table
-	 */
-	.exit.text : C_PHYS(.exit.text) { EXIT_TEXT }
-	.exit.data : C_PHYS(.exit.data) { EXIT_DATA }
-
-	. = ALIGN(PAGE_SIZE);
-	.bss : C_PHYS(.bss) {
-		__init_end = .;
-		__bss_start = .;		/* BSS */
-		*(.bss.page_aligned)
-		*(.bss)
-		*(COMMON)
-		. = ALIGN(4);
-		_ebss = .;			/* uClinux MTD sucks */
-		_end = . ;
-	}
-
-	/*
-	 * When something in the kernel is NOT compiled as a module, the
-	 * module cleanup code and data are put into these segments. Both
-	 * can then be thrown away, as cleanup code is never called unless
-	 * it's a module.
-	 */
-	/DISCARD/ : {
-		*(.exitcall.exit)
-	}
-
-	STABS_DEBUG
-	DWARF_DEBUG
-}
diff --git a/arch/sh/lib64/.gitignore b/arch/sh/lib64/.gitignore
deleted file mode 100644
index 3508c2c..0000000
--- a/arch/sh/lib64/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-syscalltab.h
diff --git a/arch/sh/lib64/dbg.c b/arch/sh/lib64/dbg.c
index 2fb8eaf..6152a6a 100644
--- a/arch/sh/lib64/dbg.c
+++ b/arch/sh/lib64/dbg.c
@@ -135,140 +135,6 @@
 	    (" =============================================================\n");
 }
 
-/* ======================================================================= */
-
-#ifdef CONFIG_POOR_MANS_STRACE
-
-#include "syscalltab.h"
-
-struct ring_node {
-	int evt;
-	int ret_addr;
-	int event;
-	int tra;
-	int pid;
-	unsigned long sp;
-	unsigned long pc;
-};
-
-static struct ring_node event_ring[16];
-static int event_ptr = 0;
-
-struct stored_syscall_data {
-	int pid;
-	int syscall_number;
-};
-
-#define N_STORED_SYSCALLS 16
-
-static struct stored_syscall_data stored_syscalls[N_STORED_SYSCALLS];
-static int syscall_next=0;
-static int syscall_next_print=0;
-
-void evt_debug(int evt, int ret_addr, int event, int tra, struct pt_regs *regs)
-{
-	int syscallno = tra & 0xff;
-	unsigned long sp;
-	unsigned long stack_bottom;
-	int pid;
-	struct ring_node *rr;
-
-	pid = current->pid;
-	stack_bottom = (unsigned long) task_stack_page(current);
-	asm volatile("ori r15, 0, %0" : "=r" (sp));
-	rr = event_ring + event_ptr;
-	rr->evt = evt;
-	rr->ret_addr = ret_addr;
-	rr->event = event;
-	rr->tra = tra;
-	rr->pid = pid;
-	rr->sp = sp;
-	rr->pc = regs->pc;
-
-	if (sp < stack_bottom + 3092) {
-		int i, j;
-		printk("evt_debug : stack underflow report\n");
-		for (j=0, i = event_ptr; j<16; j++) {
-			rr = event_ring + i;
-			printk("evt=%08x event=%08x tra=%08x pid=%5d sp=%08lx pc=%08lx\n",
-				rr->evt, rr->event, rr->tra, rr->pid, rr->sp, rr->pc);
-			i--;
-			i &= 15;
-		}
-		panic("STACK UNDERFLOW\n");
-	}
-
-	event_ptr = (event_ptr + 1) & 15;
-
-	if ((event == 2) && (evt == 0x160)) {
-		if (syscallno < NUM_SYSCALL_INFO_ENTRIES) {
-			/* Store the syscall information to print later.  We
-			 * can't print this now - currently we're running with
-			 * SR.BL=1, so we can't take a tlbmiss (which could occur
-			 * in the console drivers under printk).
-			 *
-			 * Just overwrite old entries on ring overflow - this
-			 * is only for last-hope debugging. */
-			stored_syscalls[syscall_next].pid = current->pid;
-			stored_syscalls[syscall_next].syscall_number = syscallno;
-			syscall_next++;
-			syscall_next &= (N_STORED_SYSCALLS - 1);
-		}
-	}
-}
-
-static void drain_syscalls(void) {
-	while (syscall_next_print != syscall_next) {
-		printk("Task %d: %s()\n",
-			stored_syscalls[syscall_next_print].pid,
-			syscall_info_table[stored_syscalls[syscall_next_print].syscall_number].name);
-			syscall_next_print++;
-			syscall_next_print &= (N_STORED_SYSCALLS - 1);
-	}
-}
-
-void evt_debug2(unsigned int ret)
-{
-	drain_syscalls();
-	printk("Task %d: syscall returns %08x\n", current->pid, ret);
-}
-
-void evt_debug_ret_from_irq(struct pt_regs *regs)
-{
-	int pid;
-	struct ring_node *rr;
-
-	pid = current->pid;
-	rr = event_ring + event_ptr;
-	rr->evt = 0xffff;
-	rr->ret_addr = 0;
-	rr->event = 0;
-	rr->tra = 0;
-	rr->pid = pid;
-	rr->pc = regs->pc;
-	event_ptr = (event_ptr + 1) & 15;
-}
-
-void evt_debug_ret_from_exc(struct pt_regs *regs)
-{
-	int pid;
-	struct ring_node *rr;
-
-	pid = current->pid;
-	rr = event_ring + event_ptr;
-	rr->evt = 0xfffe;
-	rr->ret_addr = 0;
-	rr->event = 0;
-	rr->tra = 0;
-	rr->pid = pid;
-	rr->pc = regs->pc;
-	event_ptr = (event_ptr + 1) & 15;
-}
-
-#endif /* CONFIG_POOR_MANS_STRACE */
-
-/* ======================================================================= */
-
 void show_excp_regs(char *from, int trapnr, int signr, struct pt_regs *regs)
 {
 
@@ -380,51 +246,3 @@
 	print_dtlb();
 	print_itlb();
 }
-
-/* ======================================================================= */
-
-/*
-** Depending on <base> scan the MMU, Data or Instruction side
-** looking for a valid mapping matching Eaddr & asid.
-** Return -1 if not found or the TLB id entry otherwise.
-** Note: it works only for 4k pages!
-*/
-static unsigned long
-lookup_mmu_side(unsigned long base, unsigned long Eaddr, unsigned long asid)
-{
-	regType_t pteH;
-	unsigned long epn;
-	int count;
-
-	epn = Eaddr & 0xfffff000;
-
-	for (count = 0; count < MAX_TLBs; count++, base += TLB_STEP) {
-		pteH = getConfigReg(base);
-		if (GET_VALID(pteH))
-			if ((unsigned long) GET_EPN(pteH) == epn)
-				if ((unsigned long) GET_ASID(pteH) == asid)
-					break;
-	}
-	return ((unsigned long) ((count < MAX_TLBs) ? base : -1));
-}
-
-unsigned long lookup_dtlb(unsigned long Eaddr)
-{
-	unsigned long asid = get_asid();
-	return (lookup_mmu_side((u64) DTLB_BASE, Eaddr, asid));
-}
-
-unsigned long lookup_itlb(unsigned long Eaddr)
-{
-	unsigned long asid = get_asid();
-	return (lookup_mmu_side((u64) ITLB_BASE, Eaddr, asid));
-}
-
-void print_page(struct page *page)
-{
-	printk("  page[%p] -> index 0x%lx,  count 0x%x,  flags 0x%lx\n",
-	       page, page->index, page_count(page), page->flags);
-	printk("       address_space = %p, pages =%ld\n", page->mapping,
-	       page->mapping->nrpages);
-
-}
diff --git a/arch/sh/lib64/panic.c b/arch/sh/lib64/panic.c
index da32ba7..38c954e 100644
--- a/arch/sh/lib64/panic.c
+++ b/arch/sh/lib64/panic.c
@@ -6,53 +6,10 @@
  * for more details.
  */
 
-#include <linux/kernel.h>
-#include <asm/io.h>
-#include <cpu/registers.h>
-
-/* THIS IS A PHYSICAL ADDRESS */
-#define HDSP2534_ADDR (0x04002100)
-
-#ifdef CONFIG_SH_CAYMAN
-
-static void poor_mans_delay(void)
-{
-	int i;
-	for (i = 0; i < 2500000; i++) {
-	}		/* poor man's delay */
-}
-
-static void show_value(unsigned long x)
-{
-	int i;
-	unsigned nibble;
-	for (i = 0; i < 8; i++) {
-		nibble = ((x >> (i * 4)) & 0xf);
-
-		ctrl_outb(nibble + ((nibble > 9) ? 55 : 48),
-			  HDSP2534_ADDR + 0xe0 + ((7 - i) << 2));
-	}
-}
-
-#endif
-
 void
 panic_handler(unsigned long panicPC, unsigned long panicSSR,
 	      unsigned long panicEXPEVT)
 {
-#ifdef CONFIG_SH_CAYMAN
-	while (1) {
-		/* This piece of code displays the PC on the LED display */
-		show_value(panicPC);
-		poor_mans_delay();
-		show_value(panicSSR);
-		poor_mans_delay();
-		show_value(panicEXPEVT);
-		poor_mans_delay();
-	}
-#endif
-
 	/* Never return from the panic handler */
 	for (;;) ;
-
 }
diff --git a/arch/sh/lib64/sdivsi3.S b/arch/sh/lib64/sdivsi3.S
index 6a800c6..1963bbd 100644
--- a/arch/sh/lib64/sdivsi3.S
+++ b/arch/sh/lib64/sdivsi3.S
@@ -1,4 +1,6 @@
 	.global	__sdivsi3
+	.global	__sdivsi3_1
+	.global	__sdivsi3_2
 	.section	.text..SHmedia32,"ax"
 	.align	2
 
@@ -6,13 +8,15 @@
 	/* clobbered: r1,r18,r19,r20,r21,r25,tr0 */
 	/* result in r0 */
 __sdivsi3:
+__sdivsi3_1:
 	ptb __div_table,tr0
+	gettr tr0,r20
 
+__sdivsi3_2:
 	nsb r5, r1
 	shlld r5, r1, r25    /* normalize; [-2 ..1, 1..2) in s2.62 */
 	shari r25, 58, r21   /* extract 5(6) bit index (s2.4 with hole -1..1) */
 	/* bubble */
-	gettr tr0,r20
 	ldx.ub r20, r21, r19 /* u0.8 */
 	shari r25, 32, r25   /* normalize to s2.30 */
 	shlli r21, 1, r21
diff --git a/arch/sh/lib64/udelay.c b/arch/sh/lib64/udelay.c
index d76bd80..f215b06 100644
--- a/arch/sh/lib64/udelay.c
+++ b/arch/sh/lib64/udelay.c
@@ -33,7 +33,7 @@
 			     :"0"(loops));
 }
 
-inline void __const_udelay(unsigned long xloops)
+void __const_udelay(unsigned long xloops)
 {
 	__delay(xloops * (HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy));
 }
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index d4079ca..2795618 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -21,6 +21,29 @@
 	default "0x20000000" if MMU && SUPERH64
 	default "0x00000000"
 
+config FORCE_MAX_ZONEORDER
+	int "Maximum zone order"
+	range 9 64 if PAGE_SIZE_16KB
+	default "9" if PAGE_SIZE_16KB
+	range 7 64 if PAGE_SIZE_64KB
+	default "7" if PAGE_SIZE_64KB
+	range 11 64
+	default "14" if !MMU
+	default "11"
+	help
+	  The kernel memory allocator divides physically contiguous memory
+	  blocks into "zones", where each zone is a power of two number of
+	  pages.  This option selects the largest power of two that the kernel
+	  keeps in the memory allocator.  If you need to allocate very large
+	  blocks of physically contiguous memory, then you may need to
+	  increase this value.
+
+	  This config option is actually maximum order plus one. For example,
+	  a value of 11 means that the largest free memory block is 2^10 pages.
+
+	  The page size is not necessarily 4KB. Keep this in mind when
+	  choosing a value for this option.
+
 config MEMORY_START
 	hex "Physical memory start address"
 	default "0x08000000"
@@ -201,14 +224,6 @@
 
 endchoice
 
-config ENTRY_OFFSET
-	hex
-	default "0x00001000" if PAGE_SIZE_4KB
-	default "0x00002000" if PAGE_SIZE_8KB
-	default "0x00004000" if PAGE_SIZE_16KB
-	default "0x00010000" if PAGE_SIZE_64KB
-	default "0x00000000"
-
 choice
 	prompt "HugeTLB page size"
 	depends on HUGETLB_PAGE && (CPU_SH4 || CPU_SH5) && MMU
diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c
index 9e277ec..8676209 100644
--- a/arch/sh/mm/cache-sh5.c
+++ b/arch/sh/mm/cache-sh5.c
@@ -60,7 +60,7 @@
 static inline void sh64_icache_inv_all(void)
 {
 	unsigned long long addr, flag, data;
-	unsigned int flags;
+	unsigned long flags;
 
 	addr = ICCR0;
 	flag = ICCR0_ICI;
@@ -172,7 +172,7 @@
 		unsigned long eaddr;
 		unsigned long after_last_page_start;
 		unsigned long mm_asid, current_asid;
-		unsigned long long flags = 0ULL;
+		unsigned long flags = 0;
 
 		mm_asid = cpu_asid(smp_processor_id(), mm);
 		current_asid = get_asid();
@@ -236,7 +236,7 @@
 	unsigned long long eaddr = start;
 	unsigned long long eaddr_end = start + len;
 	unsigned long current_asid, mm_asid;
-	unsigned long long flags;
+	unsigned long flags;
 	unsigned long long epage_start;
 
 	/*
@@ -342,7 +342,7 @@
 			 * alloco is a NOP if the cache is write-through.
 			 */
 			if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
-				ctrl_inb(eaddr);
+				__raw_readb((unsigned long)eaddr);
 		}
 	}
 
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 3edf297..ee8e6bb 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -184,7 +184,6 @@
 }
 
 static struct kcore_list kcore_mem, kcore_vmalloc;
-int after_bootmem = 0;
 
 void __init mem_init(void)
 {
@@ -217,8 +216,6 @@
 	memset(empty_zero_page, 0, PAGE_SIZE);
 	__flush_wback_region(empty_zero_page, PAGE_SIZE);
 
-	after_bootmem = 1;
-
 	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
diff --git a/arch/sh/mm/ioremap_32.c b/arch/sh/mm/ioremap_32.c
index 60cc486..da2f418 100644
--- a/arch/sh/mm/ioremap_32.c
+++ b/arch/sh/mm/ioremap_32.c
@@ -46,17 +46,15 @@
 		return NULL;
 
 	/*
-	 * If we're on an SH7751 or SH7780 PCI controller, PCI memory is
-	 * mapped at the end of the address space (typically 0xfd000000)
-	 * in a non-translatable area, so mapping through page tables for
-	 * this area is not only pointless, but also fundamentally
-	 * broken. Just return the physical address instead.
+	 * If we're in the fixed PCI memory range, mapping through page
+	 * tables is not only pointless, but also fundamentally broken.
+	 * Just return the physical address instead.
 	 *
 	 * For boards that map a small PCI memory aperture somewhere in
 	 * P1/P2 space, ioremap() will already do the right thing,
 	 * and we'll never get this far.
 	 */
-	if (is_pci_memaddr(phys_addr) && is_pci_memaddr(last_addr))
+	if (is_pci_memory_fixed_range(phys_addr, size))
 		return (void __iomem *)phys_addr;
 
 #if !defined(CONFIG_PMB_FIXED)
@@ -121,7 +119,9 @@
 	unsigned long seg = PXSEG(vaddr);
 	struct vm_struct *p;
 
-	if (seg < P3SEG || vaddr >= P3_ADDR_MAX || is_pci_memaddr(vaddr))
+	if (seg < P3SEG || vaddr >= P3_ADDR_MAX)
+		return;
+	if (is_pci_memory_fixed_range(vaddr, 0))
 		return;
 
 #ifdef CONFIG_PMB
diff --git a/arch/sh/mm/ioremap_64.c b/arch/sh/mm/ioremap_64.c
index 31e1bb5..828c859 100644
--- a/arch/sh/mm/ioremap_64.c
+++ b/arch/sh/mm/ioremap_64.c
@@ -20,6 +20,7 @@
 #include <linux/io.h>
 #include <linux/bootmem.h>
 #include <linux/proc_fs.h>
+#include <linux/slab.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/addrspace.h>
@@ -27,88 +28,17 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu.h>
 
-static void shmedia_mapioaddr(unsigned long, unsigned long);
-static unsigned long shmedia_ioremap(struct resource *, u32, int);
-
-/*
- * Generic mapping function (not visible outside):
- */
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void *__ioremap(unsigned long phys_addr, unsigned long size,
-		unsigned long flags)
-{
-	void * addr;
-	struct vm_struct * area;
-	unsigned long offset, last_addr;
-	pgprot_t pgprot;
-
-	/* Don't allow wraparound or zero size */
-	last_addr = phys_addr + size - 1;
-	if (!size || last_addr < phys_addr)
-		return NULL;
-
-	pgprot = __pgprot(_PAGE_PRESENT  | _PAGE_READ   |
-			  _PAGE_WRITE    | _PAGE_DIRTY  |
-			  _PAGE_ACCESSED | _PAGE_SHARED | flags);
-
-	/*
-	 * Mappings have to be page-aligned
-	 */
-	offset = phys_addr & ~PAGE_MASK;
-	phys_addr &= PAGE_MASK;
-	size = PAGE_ALIGN(last_addr + 1) - phys_addr;
-
-	/*
-	 * Ok, go for it..
-	 */
-	area = get_vm_area(size, VM_IOREMAP);
-	if (!area)
-		return NULL;
-	pr_debug("Get vm_area returns %p addr %p\n", area, area->addr);
-	area->phys_addr = phys_addr;
-	addr = area->addr;
-	if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
-			       phys_addr, pgprot)) {
-		vunmap(addr);
-		return NULL;
-	}
-	return (void *) (offset + (char *)addr);
-}
-EXPORT_SYMBOL(__ioremap);
-
-void __iounmap(void *addr)
-{
-	struct vm_struct *area;
-
-	vfree((void *) (PAGE_MASK & (unsigned long) addr));
-	area = remove_vm_area((void *) (PAGE_MASK & (unsigned long) addr));
-	if (!area) {
-		printk(KERN_ERR "iounmap: bad address %p\n", addr);
-		return;
-	}
-
-	kfree(area);
-}
-EXPORT_SYMBOL(__iounmap);
-
 static struct resource shmedia_iomap = {
 	.name	= "shmedia_iomap",
 	.start	= IOBASE_VADDR + PAGE_SIZE,
 	.end	= IOBASE_END - 1,
 };
 
-static void shmedia_mapioaddr(unsigned long pa, unsigned long va);
+static void shmedia_mapioaddr(unsigned long pa, unsigned long va,
+			      unsigned long flags);
 static void shmedia_unmapioaddr(unsigned long vaddr);
-static unsigned long shmedia_ioremap(struct resource *res, u32 pa, int sz);
+static void __iomem *shmedia_ioremap(struct resource *res, u32 pa,
+				     int sz, unsigned long flags);
 
 /*
  * We have the same problem as the SPARC, so lets have the same comment:
@@ -130,18 +60,18 @@
 
 static struct xresource *xres_alloc(void)
 {
-        struct xresource *xrp;
-        int n;
+	struct xresource *xrp;
+	int n;
 
-        xrp = xresv;
-        for (n = 0; n < XNRES; n++) {
-                if (xrp->xflag == 0) {
-                        xrp->xflag = 1;
-                        return xrp;
-                }
-                xrp++;
-        }
-        return NULL;
+	xrp = xresv;
+	for (n = 0; n < XNRES; n++) {
+		if (xrp->xflag == 0) {
+			xrp->xflag = 1;
+			return xrp;
+		}
+		xrp++;
+	}
+	return NULL;
 }
 
 static void xres_free(struct xresource *xrp)
@@ -161,76 +91,71 @@
 	return NULL;
 }
 
-static unsigned long shmedia_alloc_io(unsigned long phys, unsigned long size,
-				      const char *name)
+static void __iomem *shmedia_alloc_io(unsigned long phys, unsigned long size,
+				      const char *name, unsigned long flags)
 {
-        static int printed_full = 0;
-        struct xresource *xres;
-        struct resource *res;
-        char *tack;
-        int tlen;
+	static int printed_full;
+	struct xresource *xres;
+	struct resource *res;
+	char *tack;
+	int tlen;
 
-        if (name == NULL) name = "???";
+	if (name == NULL)
+		name = "???";
 
-        if ((xres = xres_alloc()) != 0) {
-                tack = xres->xname;
-                res = &xres->xres;
-        } else {
-                if (!printed_full) {
-                        printk("%s: done with statics, switching to kmalloc\n",
-			       __func__);
-                        printed_full = 1;
-                }
-                tlen = strlen(name);
-                tack = kmalloc(sizeof (struct resource) + tlen + 1, GFP_KERNEL);
-                if (!tack)
-			return -ENOMEM;
-                memset(tack, 0, sizeof(struct resource));
-                res = (struct resource *) tack;
-                tack += sizeof (struct resource);
-        }
+	xres = xres_alloc();
+	if (xres != 0) {
+		tack = xres->xname;
+		res = &xres->xres;
+	} else {
+		if (!printed_full) {
+			printk(KERN_NOTICE "%s: done with statics, "
+			       "switching to kmalloc\n", __func__);
+			printed_full = 1;
+		}
+		tlen = strlen(name);
+		tack = kmalloc(sizeof(struct resource) + tlen + 1, GFP_KERNEL);
+		if (!tack)
+			return NULL;
+		memset(tack, 0, sizeof(struct resource));
+		res = (struct resource *) tack;
+		tack += sizeof(struct resource);
+	}
 
-        strncpy(tack, name, XNMLN);
-        tack[XNMLN] = 0;
-        res->name = tack;
+	strncpy(tack, name, XNMLN);
+	tack[XNMLN] = 0;
+	res->name = tack;
 
-        return shmedia_ioremap(res, phys, size);
+	return shmedia_ioremap(res, phys, size, flags);
 }
 
-static unsigned long shmedia_ioremap(struct resource *res, u32 pa, int sz)
+static void __iomem *shmedia_ioremap(struct resource *res, u32 pa, int sz,
+				     unsigned long flags)
 {
-        unsigned long offset = ((unsigned long) pa) & (~PAGE_MASK);
+	unsigned long offset = ((unsigned long) pa) & (~PAGE_MASK);
 	unsigned long round_sz = (offset + sz + PAGE_SIZE-1) & PAGE_MASK;
-        unsigned long va;
-        unsigned int psz;
+	unsigned long va;
+	unsigned int psz;
 
-        if (allocate_resource(&shmedia_iomap, res, round_sz,
+	if (allocate_resource(&shmedia_iomap, res, round_sz,
 			      shmedia_iomap.start, shmedia_iomap.end,
 			      PAGE_SIZE, NULL, NULL) != 0) {
-                panic("alloc_io_res(%s): cannot occupy\n",
-                    (res->name != NULL)? res->name: "???");
-        }
+		panic("alloc_io_res(%s): cannot occupy\n",
+		      (res->name != NULL) ? res->name : "???");
+	}
 
-        va = res->start;
-        pa &= PAGE_MASK;
+	va = res->start;
+	pa &= PAGE_MASK;
 
 	psz = (res->end - res->start + (PAGE_SIZE - 1)) / PAGE_SIZE;
 
-	/* log at boot time ... */
-	printk("mapioaddr: %6s  [%2d page%s]  va 0x%08lx   pa 0x%08x\n",
-	       ((res->name != NULL) ? res->name : "???"),
-	       psz, psz == 1 ? " " : "s", va, pa);
+	for (psz = res->end - res->start + 1; psz != 0; psz -= PAGE_SIZE) {
+		shmedia_mapioaddr(pa, va, flags);
+		va += PAGE_SIZE;
+		pa += PAGE_SIZE;
+	}
 
-        for (psz = res->end - res->start + 1; psz != 0; psz -= PAGE_SIZE) {
-                shmedia_mapioaddr(pa, va);
-                va += PAGE_SIZE;
-                pa += PAGE_SIZE;
-        }
-
-        res->start += offset;
-        res->end = res->start + sz - 1;         /* not strictly necessary.. */
-
-        return res->start;
+	return (void __iomem *)(unsigned long)(res->start + offset);
 }
 
 static void shmedia_free_io(struct resource *res)
@@ -249,14 +174,12 @@
 
 static __init_refok void *sh64_get_page(void)
 {
-	extern int after_bootmem;
 	void *page;
 
-	if (after_bootmem) {
-		page = (void *)get_zeroed_page(GFP_ATOMIC);
-	} else {
+	if (slab_is_available())
+		page = (void *)get_zeroed_page(GFP_KERNEL);
+	else
 		page = alloc_bootmem_pages(PAGE_SIZE);
-	}
 
 	if (!page || ((unsigned long)page & ~PAGE_MASK))
 		panic("sh64_get_page: Out of memory already?\n");
@@ -264,17 +187,20 @@
 	return page;
 }
 
-static void shmedia_mapioaddr(unsigned long pa, unsigned long va)
+static void shmedia_mapioaddr(unsigned long pa, unsigned long va,
+			      unsigned long flags)
 {
 	pgd_t *pgdp;
 	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep, pte;
 	pgprot_t prot;
-	unsigned long flags = 1; /* 1 = CB0-1 device */
 
 	pr_debug("shmedia_mapiopage pa %08lx va %08lx\n",  pa, va);
 
+	if (!flags)
+		flags = 1; /* 1 = CB0-1 device */
+
 	pgdp = pgd_offset_k(va);
 	if (pgd_none(*pgdp) || !pgd_present(*pgdp)) {
 		pudp = (pud_t *)sh64_get_page();
@@ -288,7 +214,7 @@
 	}
 
 	pmdp = pmd_offset(pudp, va);
-	if (pmd_none(*pmdp) || !pmd_present(*pmdp) ) {
+	if (pmd_none(*pmdp) || !pmd_present(*pmdp)) {
 		ptep = (pte_t *)sh64_get_page();
 		set_pmd(pmdp, __pmd((unsigned long)ptep + _PAGE_TABLE));
 	}
@@ -336,17 +262,19 @@
 	pte_clear(&init_mm, vaddr, ptep);
 }
 
-unsigned long onchip_remap(unsigned long phys, unsigned long size, const char *name)
+void __iomem *__ioremap(unsigned long offset, unsigned long size,
+			unsigned long flags)
 {
-	if (size < PAGE_SIZE)
-		size = PAGE_SIZE;
+	char name[14];
 
-	return shmedia_alloc_io(phys, size, name);
+	sprintf(name, "phys_%08x", (u32)offset);
+	return shmedia_alloc_io(offset, size, name, flags);
 }
-EXPORT_SYMBOL(onchip_remap);
+EXPORT_SYMBOL(__ioremap);
 
-void onchip_unmap(unsigned long vaddr)
+void __iounmap(void __iomem *virtual)
 {
+	unsigned long vaddr = (unsigned long)virtual & PAGE_MASK;
 	struct resource *res;
 	unsigned int psz;
 
@@ -357,10 +285,7 @@
 		return;
 	}
 
-        psz = (res->end - res->start + (PAGE_SIZE - 1)) / PAGE_SIZE;
-
-        printk(KERN_DEBUG "unmapioaddr: %6s  [%2d page%s] freed\n",
-	       res->name, psz, psz == 1 ? " " : "s");
+	psz = (res->end - res->start + (PAGE_SIZE - 1)) / PAGE_SIZE;
 
 	shmedia_free_io(res);
 
@@ -371,9 +296,8 @@
 		kfree(res);
 	}
 }
-EXPORT_SYMBOL(onchip_unmap);
+EXPORT_SYMBOL(__iounmap);
 
-#ifdef CONFIG_PROC_FS
 static int
 ioremap_proc_info(char *buf, char **start, off_t fpos, int length, int *eof,
 		  void *data)
@@ -385,7 +309,10 @@
 	for (r = ((struct resource *)data)->child; r != NULL; r = r->sibling) {
 		if (p + 32 >= e)        /* Better than nothing */
 			break;
-		if ((nm = r->name) == 0) nm = "???";
+		nm = r->name;
+		if (nm == NULL)
+			nm = "???";
+
 		p += sprintf(p, "%08lx-%08lx: %s\n",
 			     (unsigned long)r->start,
 			     (unsigned long)r->end, nm);
@@ -393,14 +320,11 @@
 
 	return p-buf;
 }
-#endif /* CONFIG_PROC_FS */
 
 static int __init register_proc_onchip(void)
 {
-#ifdef CONFIG_PROC_FS
-	create_proc_read_entry("io_map",0,0, ioremap_proc_info, &shmedia_iomap);
-#endif
+	create_proc_read_entry("io_map", 0, 0, ioremap_proc_info,
+			       &shmedia_iomap);
 	return 0;
 }
-
-__initcall(register_proc_onchip);
+late_initcall(register_proc_onchip);
diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
index 931f4d0..1b5fdfb 100644
--- a/arch/sh/mm/mmap.c
+++ b/arch/sh/mm/mmap.c
@@ -1,7 +1,7 @@
 /*
  * arch/sh/mm/mmap.c
  *
- * Copyright (C) 2008  Paul Mundt
+ * Copyright (C) 2008 - 2009  Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -21,9 +21,26 @@
 /*
  * To avoid cache aliases, we map the shared page with same color.
  */
-#define COLOUR_ALIGN(addr, pgoff)				\
-	((((addr) + shm_align_mask) & ~shm_align_mask) +	\
-	 (((pgoff) << PAGE_SHIFT) & shm_align_mask))
+static inline unsigned long COLOUR_ALIGN(unsigned long addr,
+					 unsigned long pgoff)
+{
+	unsigned long base = (addr + shm_align_mask) & ~shm_align_mask;
+	unsigned long off = (pgoff << PAGE_SHIFT) & shm_align_mask;
+
+	return base + off;
+}
+
+static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr,
+					      unsigned long pgoff)
+{
+	unsigned long base = addr & ~shm_align_mask;
+	unsigned long off = (pgoff << PAGE_SHIFT) & shm_align_mask;
+
+	if (base + off <= addr)
+		return base + off;
+
+	return base - off;
+}
 
 unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	unsigned long len, unsigned long pgoff, unsigned long flags)
@@ -103,6 +120,117 @@
 			addr = COLOUR_ALIGN(addr, pgoff);
 	}
 }
+
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+			  const unsigned long len, const unsigned long pgoff,
+			  const unsigned long flags)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	unsigned long addr = addr0;
+	int do_colour_align;
+
+	if (flags & MAP_FIXED) {
+		/* We do not accept a shared mapping if it would violate
+		 * cache aliasing constraints.
+		 */
+		if ((flags & MAP_SHARED) &&
+		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (unlikely(len > TASK_SIZE))
+		return -ENOMEM;
+
+	do_colour_align = 0;
+	if (filp || (flags & MAP_SHARED))
+		do_colour_align = 1;
+
+	/* requesting a specific address */
+	if (addr) {
+		if (do_colour_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+
+	/* check if free_area_cache is useful for us */
+	if (len <= mm->cached_hole_size) {
+	        mm->cached_hole_size = 0;
+		mm->free_area_cache = mm->mmap_base;
+	}
+
+	/* either no address requested or can't fit in requested address hole */
+	addr = mm->free_area_cache;
+	if (do_colour_align) {
+		unsigned long base = COLOUR_ALIGN_DOWN(addr-len, pgoff);
+
+		addr = base + len;
+	}
+
+	/* make sure it can fit in the remaining address space */
+	if (likely(addr > len)) {
+		vma = find_vma(mm, addr-len);
+		if (!vma || addr <= vma->vm_start) {
+			/* remember the address as a hint for next time */
+			return (mm->free_area_cache = addr-len);
+		}
+	}
+
+	if (unlikely(mm->mmap_base < len))
+		goto bottomup;
+
+	addr = mm->mmap_base-len;
+	if (do_colour_align)
+		addr = COLOUR_ALIGN_DOWN(addr, pgoff);
+
+	do {
+		/*
+		 * Lookup failure means no vma is above this address,
+		 * else if new region fits below vma->vm_start,
+		 * return with success:
+		 */
+		vma = find_vma(mm, addr);
+		if (likely(!vma || addr+len <= vma->vm_start)) {
+			/* remember the address as a hint for next time */
+			return (mm->free_area_cache = addr);
+		}
+
+		/* remember the largest hole we saw so far */
+		if (addr + mm->cached_hole_size < vma->vm_start)
+		        mm->cached_hole_size = vma->vm_start - addr;
+
+		/* try just below the current vma->vm_start */
+		addr = vma->vm_start-len;
+		if (do_colour_align)
+			addr = COLOUR_ALIGN_DOWN(addr, pgoff);
+	} while (likely(len < vma->vm_start));
+
+bottomup:
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	mm->cached_hole_size = ~0UL;
+	mm->free_area_cache = TASK_UNMAPPED_BASE;
+	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+	/*
+	 * Restore the topdown base:
+	 */
+	mm->free_area_cache = mm->mmap_base;
+	mm->cached_hole_size = ~0UL;
+
+	return addr;
+}
 #endif /* CONFIG_MMU */
 
 /*
diff --git a/arch/sh/oprofile/common.c b/arch/sh/oprofile/common.c
index 1b9d430..44f4e31 100644
--- a/arch/sh/oprofile/common.c
+++ b/arch/sh/oprofile/common.c
@@ -109,6 +109,7 @@
 	case CPU_SH7785:
 	case CPU_SH7786:
 	case CPU_SH7723:
+	case CPU_SH7724:
 	case CPU_SHX3:
 		lmodel = &op_model_sh4a_ops;
 		break;
diff --git a/arch/sh/tools/mach-types b/arch/sh/tools/mach-types
index 8477b5d..fec3a53 100644
--- a/arch/sh/tools/mach-types
+++ b/arch/sh/tools/mach-types
@@ -23,6 +23,7 @@
 7619SE			SH_7619_SOLUTION_ENGINE
 7721SE			SH_7721_SOLUTION_ENGINE
 7722SE			SH_7722_SOLUTION_ENGINE
+7724SE			SH_7724_SOLUTION_ENGINE
 7751SE			SH_7751_SOLUTION_ENGINE
 7780SE			SH_7780_SOLUTION_ENGINE
 7751SYSTEMH		SH_7751_SYSTEMH
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index 639ac80..6586572 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -102,8 +102,8 @@
 #define TI_KERN_CNTD1	0x00000488
 #define TI_PCR		0x00000490
 #define TI_RESTART_BLOCK 0x00000498
-#define TI_KUNA_REGS	0x000004c0
-#define TI_KUNA_INSN	0x000004c8
+#define TI_KUNA_REGS	0x000004c8
+#define TI_KUNA_INSN	0x000004d0
 #define TI_FPREGS	0x00000500
 
 /* We embed this in the uppermost byte of thread_info->flags */
diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
index 47d5619..8303ac4 100644
--- a/arch/sparc/include/asm/uaccess_32.h
+++ b/arch/sparc/include/asm/uaccess_32.h
@@ -17,6 +17,9 @@
 
 #ifndef __ASSEMBLY__
 
+#define ARCH_HAS_SORT_EXTABLE
+#define ARCH_HAS_SEARCH_EXTABLE
+
 /* Sparc is not segmented, however we need to be able to fool access_ok()
  * when doing system calls from kernel mode legitimately.
  *
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 5deabe9..e5e78f9 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -318,10 +318,12 @@
 	}
 }
 
-static void sun4u_set_affinity(unsigned int virt_irq,
+static int sun4u_set_affinity(unsigned int virt_irq,
 			       const struct cpumask *mask)
 {
 	sun4u_irq_enable(virt_irq);
+
+	return 0;
 }
 
 /* Don't do anything.  The desc->status check for IRQ_DISABLED in
@@ -377,7 +379,7 @@
 		       ino, err);
 }
 
-static void sun4v_set_affinity(unsigned int virt_irq,
+static int sun4v_set_affinity(unsigned int virt_irq,
 			       const struct cpumask *mask)
 {
 	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
@@ -388,6 +390,8 @@
 	if (err != HV_EOK)
 		printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
 		       "err(%d)\n", ino, cpuid, err);
+
+	return 0;
 }
 
 static void sun4v_irq_disable(unsigned int virt_irq)
@@ -445,7 +449,7 @@
 		       dev_handle, dev_ino, err);
 }
 
-static void sun4v_virt_set_affinity(unsigned int virt_irq,
+static int sun4v_virt_set_affinity(unsigned int virt_irq,
 				    const struct cpumask *mask)
 {
 	unsigned long cpuid, dev_handle, dev_ino;
@@ -461,6 +465,8 @@
 		printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
 		       "err(%d)\n",
 		       dev_handle, dev_ino, cpuid, err);
+
+	return 0;
 }
 
 static void sun4v_virq_disable(unsigned int virt_irq)
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index 9027376..0ee642f 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -75,8 +75,6 @@
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 /* Make generic code ignore STT_REGISTER dummy undefined symbols.  */
diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c
index 16cc289..a61c349 100644
--- a/arch/sparc/mm/extable.c
+++ b/arch/sparc/mm/extable.c
@@ -28,6 +28,10 @@
 	 *	word 3: last insn address + 4 bytes
 	 *	word 4: fixup code address
 	 *
+	 * Deleted entries are encoded as:
+	 *	word 1: unused
+	 *	word 2: -1
+	 *
 	 * See asm/uaccess.h for more details.
 	 */
 
@@ -39,6 +43,10 @@
 			continue;
 		}
 
+		/* A deleted entry; see trim_init_extable */
+		if (walk->fixup == -1)
+			continue;
+
 		if (walk->insn == value)
 			return walk;
 	}
@@ -57,6 +65,27 @@
         return NULL;
 }
 
+#ifdef CONFIG_MODULES
+/* We could memmove them around; easier to mark the trimmed ones. */
+void trim_init_extable(struct module *m)
+{
+	unsigned int i;
+	bool range;
+
+	for (i = 0; i < m->num_exentries; i += range ? 2 : 1) {
+		range = m->extable[i].fixup == 0;
+
+		if (within_module_init(m->extable[i].insn, m)) {
+			m->extable[i].fixup = -1;
+			if (range)
+				m->extable[i+1].fixup = -1;
+		}
+		if (range)
+			i++;
+	}
+}
+#endif /* CONFIG_MODULES */
+
 /* Special extable search, which handles ranges.  Returns fixup */
 unsigned long search_extables_range(unsigned long addr, unsigned long *g2)
 {
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index f934225..aa9e926e 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -451,23 +451,6 @@
 
 /* Only changed by ubd_init, which is an initcall. */
 static int thread_fd = -1;
-
-static void ubd_end_request(struct request *req, int bytes, int error)
-{
-	blk_end_request(req, error, bytes);
-}
-
-/* Callable only from interrupt context - otherwise you need to do
- * spin_lock_irq()/spin_lock_irqsave() */
-static inline void ubd_finish(struct request *req, int bytes)
-{
-	if(bytes < 0){
-		ubd_end_request(req, 0, -EIO);
-		return;
-	}
-	ubd_end_request(req, bytes, 0);
-}
-
 static LIST_HEAD(restart);
 
 /* XXX - move this inside ubd_intr. */
@@ -475,7 +458,6 @@
 static void ubd_handler(void)
 {
 	struct io_thread_req *req;
-	struct request *rq;
 	struct ubd *ubd;
 	struct list_head *list, *next_ele;
 	unsigned long flags;
@@ -492,10 +474,7 @@
 			return;
 		}
 
-		rq = req->req;
-		rq->nr_sectors -= req->length >> 9;
-		if(rq->nr_sectors == 0)
-			ubd_finish(rq, rq->hard_nr_sectors << 9);
+		blk_end_request(req->req, 0, req->length);
 		kfree(req);
 	}
 	reactivate_fd(thread_fd, UBD_IRQ);
@@ -1243,27 +1222,26 @@
 {
 	struct io_thread_req *io_req;
 	struct request *req;
-	int n, last_sectors;
+	sector_t sector;
+	int n;
 
 	while(1){
 		struct ubd *dev = q->queuedata;
 		if(dev->end_sg == 0){
-			struct request *req = elv_next_request(q);
+			struct request *req = blk_fetch_request(q);
 			if(req == NULL)
 				return;
 
 			dev->request = req;
-			blkdev_dequeue_request(req);
 			dev->start_sg = 0;
 			dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
 		}
 
 		req = dev->request;
-		last_sectors = 0;
+		sector = blk_rq_pos(req);
 		while(dev->start_sg < dev->end_sg){
 			struct scatterlist *sg = &dev->sg[dev->start_sg];
 
-			req->sector += last_sectors;
 			io_req = kmalloc(sizeof(struct io_thread_req),
 					 GFP_ATOMIC);
 			if(io_req == NULL){
@@ -1272,10 +1250,10 @@
 				return;
 			}
 			prepare_request(req, io_req,
-					(unsigned long long) req->sector << 9,
+					(unsigned long long)sector << 9,
 					sg->offset, sg->length, sg_page(sg));
 
-			last_sectors = sg->length >> 9;
+			sector += sg->length >> 9;
 			n = os_write_file(thread_fd, &io_req,
 					  sizeof(struct io_thread_req *));
 			if(n != sizeof(struct io_thread_req *)){
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 58da248..9ce3f16 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -53,16 +53,21 @@
 #else
 # define VMALLOC_END	(FIXADDR_START-2*PAGE_SIZE)
 #endif
+#define MODULES_VADDR	VMALLOC_START
+#define MODULES_END	VMALLOC_END
+#define MODULES_LEN	(MODULES_VADDR - MODULES_END)
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
-
+#define __PAGE_KERNEL_EXEC                                              \
+	 (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
 #define PAGE_NONE	__pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
 #define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
 #define PAGE_COPY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 #define PAGE_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 #define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define PAGE_KERNEL_EXEC	__pgprot(__PAGE_KERNEL_EXEC)
 
 /*
  * The i386 can't do page protection for execute, and considers that the same
diff --git a/arch/um/include/asm/suspend.h b/arch/um/include/asm/suspend.h
deleted file mode 100644
index f4e8e00..0000000
--- a/arch/um/include/asm/suspend.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __UM_SUSPEND_H
-#define __UM_SUSPEND_H
-
-#endif
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 598b5c1..1b549bc 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -8,7 +8,7 @@
 
 subarch-obj-y = lib/semaphore_32.o lib/string_32.o
 subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o
-subarch-obj-$(CONFIG_MODULES) += kernel/module_32.o
+subarch-obj-$(CONFIG_MODULES) += kernel/module.o
 
 USER_OBJS := bugs.o ptrace_user.o fault.o
 
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
index c8b4cce..2201e9c 100644
--- a/arch/um/sys-x86_64/Makefile
+++ b/arch/um/sys-x86_64/Makefile
@@ -8,10 +8,8 @@
 	setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \
 	sysrq.o ksyms.o tls.o
 
-obj-$(CONFIG_MODULES) += um_module.o
-
 subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o
-subarch-obj-$(CONFIG_MODULES) += kernel/module_64.o
+subarch-obj-$(CONFIG_MODULES) += kernel/module.o
 
 ldt-y = ../sys-i386/ldt.o
 
diff --git a/arch/um/sys-x86_64/um_module.c b/arch/um/sys-x86_64/um_module.c
deleted file mode 100644
index 3dead39..0000000
--- a/arch/um/sys-x86_64/um_module.c
+++ /dev/null
@@ -1,21 +0,0 @@
-#include <linux/vmalloc.h>
-#include <linux/moduleloader.h>
-
-/* Copied from i386 arch/i386/kernel/module.c */
-void *module_alloc(unsigned long size)
-{
-	if (size == 0)
-		return NULL;
-	return vmalloc_exec(size);
-}
-
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
-{
-	vfree(module_region);
-	/*
-	 * FIXME: If module_region == mod->init_region, trim exception
-	 * table entries.
-	 */
-}
-
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
new file mode 100644
index 0000000..ad8ec35
--- /dev/null
+++ b/arch/x86/Kbuild
@@ -0,0 +1,16 @@
+
+obj-$(CONFIG_KVM) += kvm/
+
+# Xen paravirtualization support
+obj-$(CONFIG_XEN) += xen/
+
+# lguest paravirtualization support
+obj-$(CONFIG_LGUEST_GUEST) += lguest/
+
+obj-y += kernel/
+obj-y += mm/
+
+obj-y += crypto/
+obj-y += vdso/
+obj-$(CONFIG_IA32_EMULATION) += ia32/
+
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a6efe0a..68f5578 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -47,6 +47,11 @@
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_LZMA
 
+config OUTPUT_FORMAT
+	string
+	default "elf32-i386" if X86_32
+	default "elf64-x86-64" if X86_64
+
 config ARCH_DEFCONFIG
 	string
 	default "arch/x86/configs/i386_defconfig" if X86_32
@@ -274,15 +279,9 @@
 
 	  If you don't know what to do here, say N.
 
-config NUMA_MIGRATE_IRQ_DESC
-	bool "Move irq desc when changing irq smp_affinity"
+config NUMA_IRQ_DESC
+	def_bool y
 	depends on SPARSE_IRQ && NUMA
-	depends on BROKEN
-	default n
-	---help---
-	  This enables moving irq_desc to cpu/node that irq will use handled.
-
-	  If you don't know what to do here, say N.
 
 config X86_MPPARSE
 	bool "Enable MPS table" if ACPI
@@ -355,7 +354,7 @@
 	depends on X86_64
 	depends on X86_EXTENDED_PLATFORM
 	depends on NUMA
-	select X86_X2APIC
+	depends on X86_X2APIC
 	---help---
 	  This option is needed in order to support SGI Ultraviolet systems.
 	  If you don't have one of these, you should say N here.
@@ -740,6 +739,7 @@
 config X86_LOCAL_APIC
 	def_bool y
 	depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
+	select HAVE_PERF_COUNTERS if (!M386 && !M486)
 
 config X86_IO_APIC
 	def_bool y
@@ -1466,9 +1466,7 @@
 
 config PHYSICAL_START
 	hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
-	default "0x1000000" if X86_NUMAQ
-	default "0x200000" if X86_64
-	default "0x100000"
+	default "0x1000000"
 	---help---
 	  This gives the physical address where the kernel is loaded.
 
@@ -1487,15 +1485,15 @@
 	  to be specifically compiled to run from a specific memory area
 	  (normally a reserved region) and this option comes handy.
 
-	  So if you are using bzImage for capturing the crash dump, leave
-	  the value here unchanged to 0x100000 and set CONFIG_RELOCATABLE=y.
-	  Otherwise if you plan to use vmlinux for capturing the crash dump
-	  change this value to start of the reserved region (Typically 16MB
-	  0x1000000). In other words, it can be set based on the "X" value as
-	  specified in the "crashkernel=YM@XM" command line boot parameter
-	  passed to the panic-ed kernel. Typically this parameter is set as
-	  crashkernel=64M@16M. Please take a look at
-	  Documentation/kdump/kdump.txt for more details about crash dumps.
+	  So if you are using bzImage for capturing the crash dump,
+	  leave the value here unchanged to 0x1000000 and set
+	  CONFIG_RELOCATABLE=y.  Otherwise if you plan to use vmlinux
+	  for capturing the crash dump change this value to start of
+	  the reserved region.  In other words, it can be set based on
+	  the "X" value as specified in the "crashkernel=YM@XM"
+	  command line boot parameter passed to the panic-ed
+	  kernel. Please take a look at Documentation/kdump/kdump.txt
+	  for more details about crash dumps.
 
 	  Usage of bzImage for capturing the crash dump is recommended as
 	  one does not have to build two kernels. Same kernel can be used
@@ -1508,8 +1506,8 @@
 	  Don't change this unless you know what you are doing.
 
 config RELOCATABLE
-	bool "Build a relocatable kernel (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	bool "Build a relocatable kernel"
+	default y
 	---help---
 	  This builds a kernel image that retains relocation information
 	  so it can be loaded someplace besides the default 1MB.
@@ -1524,12 +1522,16 @@
 	  it has been loaded at and the compile time physical address
 	  (CONFIG_PHYSICAL_START) is ignored.
 
+# Relocation on x86-32 needs some additional build support
+config X86_NEED_RELOCS
+	def_bool y
+	depends on X86_32 && RELOCATABLE
+
 config PHYSICAL_ALIGN
 	hex
 	prompt "Alignment value to which kernel should be aligned" if X86_32
-	default "0x100000" if X86_32
-	default "0x200000" if X86_64
-	range 0x2000 0x400000
+	default "0x1000000"
+	range 0x2000 0x1000000
 	---help---
 	  This value puts the alignment restrictions on physical address
 	  where kernel is loaded and run from. Kernel is compiled for an
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d8359e7..d105f29 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -159,14 +159,30 @@
 	  options. See Documentation/x86_64/boot-options.txt for more
 	  details.
 
+config IOMMU_STRESS
+	bool "Enable IOMMU stress-test mode"
+	---help---
+	  This option disables various optimizations in IOMMU related
+	  code to do real stress testing of the IOMMU code. This option
+	  will cause a performance drop and should only be enabled for
+	  testing.
+
 config IOMMU_LEAK
 	bool "IOMMU leak tracing"
-	depends on DEBUG_KERNEL
-	depends on IOMMU_DEBUG
+	depends on IOMMU_DEBUG && DMA_API_DEBUG
 	---help---
 	  Add a simple leak tracer to the IOMMU code. This is useful when you
 	  are debugging a buggy device driver that leaks IOMMU mappings.
 
+config X86_DS_SELFTEST
+    bool "DS selftest"
+    default y
+    depends on DEBUG_KERNEL
+    depends on X86_DS
+	---help---
+	  Perform Debug Store selftests at boot time.
+	  If in doubt, say "N".
+
 config HAVE_MMIOTRACE_SUPPORT
 	def_bool y
 
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 8c86b72..edbd0ca 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -7,8 +7,6 @@
         KBUILD_DEFCONFIG := $(ARCH)_defconfig
 endif
 
-core-$(CONFIG_KVM) += arch/x86/kvm/
-
 # BITS is used as extension for files which are available in a 32 bit
 # and a 64 bit version to simplify shared Makefiles.
 # e.g.: obj-y += foo_$(BITS).o
@@ -118,21 +116,8 @@
 
 libs-y  += arch/x86/lib/
 
-# Sub architecture files that needs linking first
-core-y += $(fcore-y)
-
-# Xen paravirtualization support
-core-$(CONFIG_XEN) += arch/x86/xen/
-
-# lguest paravirtualization support
-core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/
-
-core-y += arch/x86/kernel/
-core-y += arch/x86/mm/
-
-core-y += arch/x86/crypto/
-core-y += arch/x86/vdso/
-core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/
+# See arch/x86/Kbuild for content of core part of the kernel
+core-y += arch/x86/
 
 # drivers-y are linked after core-y
 drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore
index 172cf8a..851fe93 100644
--- a/arch/x86/boot/.gitignore
+++ b/arch/x86/boot/.gitignore
@@ -3,6 +3,8 @@
 cpustr.h
 mkcpustr
 offsets.h
+voffset.h
+zoffset.h
 setup
 setup.bin
 setup.elf
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 6633b6e..8d16ada 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -26,9 +26,10 @@
 targets		+= fdimage fdimage144 fdimage288 image.iso mtools.conf
 subdir-		:= compressed
 
-setup-y		+= a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
+setup-y		+= a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o edd.o
 setup-y		+= header.o main.o mca.o memory.o pm.o pmjump.o
-setup-y		+= printf.o string.o tty.o video.o video-mode.o version.o
+setup-y		+= printf.o regs.o string.o tty.o video.o video-mode.o
+setup-y		+= version.o
 setup-$(CONFIG_X86_APM_BOOT) += apm.o
 
 # The link order of the video-*.o modules can matter.  In particular,
@@ -86,19 +87,27 @@
 
 SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
 
-sed-offsets := -e 's/^00*/0/' \
-        -e 's/^\([0-9a-fA-F]*\) . \(input_data\|input_data_end\)$$/\#define \2 0x\1/p'
+sed-voffset := -e 's/^\([0-9a-fA-F]*\) . \(_text\|_end\)$$/\#define VO_\2 0x\1/p'
 
-quiet_cmd_offsets = OFFSETS $@
-      cmd_offsets = $(NM) $< | sed -n $(sed-offsets) > $@
+quiet_cmd_voffset = VOFFSET $@
+      cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
 
-$(obj)/offsets.h: $(obj)/compressed/vmlinux FORCE
-	$(call if_changed,offsets)
+targets += voffset.h
+$(obj)/voffset.h: vmlinux FORCE
+	$(call if_changed,voffset)
 
-targets += offsets.h
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+
+quiet_cmd_zoffset = ZOFFSET $@
+      cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
+
+targets += zoffset.h
+$(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE
+	$(call if_changed,zoffset)
+
 
 AFLAGS_header.o += -I$(obj)
-$(obj)/header.o: $(obj)/offsets.h
+$(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h
 
 LDFLAGS_setup.elf	:= -T
 $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
index 7c19ce8..64a31a6 100644
--- a/arch/x86/boot/a20.c
+++ b/arch/x86/boot/a20.c
@@ -2,7 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007-2008 rPath, Inc. - All Rights Reserved
- *   Copyright 2009 Intel Corporation
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -90,8 +90,11 @@
 
 static void enable_a20_bios(void)
 {
-	asm volatile("pushfl; int $0x15; popfl"
-		     : : "a" ((u16)0x2401));
+	struct biosregs ireg;
+
+	initregs(&ireg);
+	ireg.ax = 0x2401;
+	intcall(0x15, &ireg, NULL);
 }
 
 static void enable_a20_kbc(void)
diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c
index 7aa6033..ee27483 100644
--- a/arch/x86/boot/apm.c
+++ b/arch/x86/boot/apm.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   Original APM BIOS checking by Stephen Rothwell, May 1994
  *   (sfr@canb.auug.org.au)
@@ -19,75 +20,56 @@
 
 int query_apm_bios(void)
 {
-	u16 ax, bx, cx, dx, di;
-	u32 ebx, esi;
-	u8 err;
+	struct biosregs ireg, oreg;
 
 	/* APM BIOS installation check */
-	ax = 0x5300;
-	bx = cx = 0;
-	asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0"
-		     : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx)
-		     : : "esi", "edi");
+	initregs(&ireg);
+	ireg.ah = 0x53;
+	intcall(0x15, &ireg, &oreg);
 
-	if (err)
+	if (oreg.flags & X86_EFLAGS_CF)
 		return -1;		/* No APM BIOS */
 
-	if (bx != 0x504d)	/* "PM" signature */
+	if (oreg.bx != 0x504d)		/* "PM" signature */
 		return -1;
 
-	if (!(cx & 0x02))		/* 32 bits supported? */
+	if (!(oreg.cx & 0x02))		/* 32 bits supported? */
 		return -1;
 
 	/* Disconnect first, just in case */
-	ax = 0x5304;
-	bx = 0;
-	asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp"
-		     : "+a" (ax), "+b" (bx)
-		     : : "ecx", "edx", "esi", "edi");
-
-	/* Paranoia */
-	ebx = esi = 0;
-	cx = dx = di = 0;
+	ireg.al = 0x04;
+	intcall(0x15, &ireg, NULL);
 
 	/* 32-bit connect */
-	asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %6"
-		     : "=a" (ax), "+b" (ebx), "+c" (cx), "+d" (dx),
-		       "+S" (esi), "+D" (di), "=m" (err)
-		     : "a" (0x5303));
+	ireg.al = 0x03;
+	intcall(0x15, &ireg, &oreg);
 
-	boot_params.apm_bios_info.cseg = ax;
-	boot_params.apm_bios_info.offset = ebx;
-	boot_params.apm_bios_info.cseg_16 = cx;
-	boot_params.apm_bios_info.dseg = dx;
-	boot_params.apm_bios_info.cseg_len = (u16)esi;
-	boot_params.apm_bios_info.cseg_16_len = esi >> 16;
-	boot_params.apm_bios_info.dseg_len = di;
+	boot_params.apm_bios_info.cseg        = oreg.ax;
+	boot_params.apm_bios_info.offset      = oreg.ebx;
+	boot_params.apm_bios_info.cseg_16     = oreg.cx;
+	boot_params.apm_bios_info.dseg        = oreg.dx;
+	boot_params.apm_bios_info.cseg_len    = oreg.si;
+	boot_params.apm_bios_info.cseg_16_len = oreg.hsi;
+	boot_params.apm_bios_info.dseg_len    = oreg.di;
 
-	if (err)
+	if (oreg.flags & X86_EFLAGS_CF)
 		return -1;
 
 	/* Redo the installation check as the 32-bit connect;
 	   some BIOSes return different flags this way... */
 
-	ax = 0x5300;
-	bx = cx = 0;
-	asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0"
-		     : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx)
-		     : : "esi", "edi");
+	ireg.al = 0x00;
+	intcall(0x15, &ireg, &oreg);
 
-	if (err || bx != 0x504d) {
+	if ((oreg.eflags & X86_EFLAGS_CF) || oreg.bx != 0x504d) {
 		/* Failure with 32-bit connect, try to disconect and ignore */
-		ax = 0x5304;
-		bx = 0;
-		asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp"
-			     : "+a" (ax), "+b" (bx)
-			     : : "ecx", "edx", "esi", "edi");
+		ireg.al = 0x04;
+		intcall(0x15, &ireg, NULL);
 		return -1;
 	}
 
-	boot_params.apm_bios_info.version = ax;
-	boot_params.apm_bios_info.flags = cx;
+	boot_params.apm_bios_info.version = oreg.ax;
+	boot_params.apm_bios_info.flags   = oreg.cx;
 	return 0;
 }
 
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
new file mode 100644
index 0000000..5077937
--- /dev/null
+++ b/arch/x86/boot/bioscall.S
@@ -0,0 +1,82 @@
+/* -----------------------------------------------------------------------
+ *
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2 or (at your
+ *   option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * "Glove box" for BIOS calls.  Avoids the constant problems with BIOSes
+ * touching registers they shouldn't be.
+ */
+
+	.code16
+	.text
+	.globl	intcall
+	.type	intcall, @function
+intcall:
+	/* Self-modify the INT instruction.  Ugly, but works. */
+	cmpb	%al, 3f
+	je	1f
+	movb	%al, 3f
+	jmp	1f		/* Synchronize pipeline */
+1:
+	/* Save state */
+	pushfl
+	pushw	%fs
+	pushw	%gs
+	pushal
+
+	/* Copy input state to stack frame */
+	subw	$44, %sp
+	movw	%dx, %si
+	movw	%sp, %di
+	movw	$11, %cx
+	rep; movsd
+
+	/* Pop full state from the stack */
+	popal
+	popw	%gs
+	popw	%fs
+	popw	%es
+	popw	%ds
+	popfl
+
+	/* Actual INT */
+	.byte	0xcd		/* INT opcode */
+3:	.byte	0
+
+	/* Push full state to the stack */
+	pushfl
+	pushw	%ds
+	pushw	%es
+	pushw	%fs
+	pushw	%gs
+	pushal
+
+	/* Re-establish C environment invariants */
+	cld
+	movzwl	%sp, %esp
+	movw	%cs, %ax
+	movw	%ax, %ds
+	movw	%ax, %es
+
+	/* Copy output state from stack frame */
+	movw	68(%esp), %di	/* Original %cx == 3rd argument */
+	andw	%di, %di
+	jz	4f
+	movw	%sp, %si
+	movw	$11, %cx
+	rep; movsd
+4:	addw	$44, %sp
+
+	/* Restore state and return */
+	popal
+	popw	%gs
+	popw	%fs
+	popfl
+	retl
+	.size	intcall, .-intcall
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 7b2692e..98239d2 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -26,6 +27,7 @@
 #include <asm/setup.h>
 #include "bitops.h"
 #include <asm/cpufeature.h>
+#include <asm/processor-flags.h>
 
 /* Useful macros */
 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
@@ -241,6 +243,49 @@
 /* apm.c */
 int query_apm_bios(void);
 
+/* bioscall.c */
+struct biosregs {
+	union {
+		struct {
+			u32 edi;
+			u32 esi;
+			u32 ebp;
+			u32 _esp;
+			u32 ebx;
+			u32 edx;
+			u32 ecx;
+			u32 eax;
+			u32 _fsgs;
+			u32 _dses;
+			u32 eflags;
+		};
+		struct {
+			u16 di, hdi;
+			u16 si, hsi;
+			u16 bp, hbp;
+			u16 _sp, _hsp;
+			u16 bx, hbx;
+			u16 dx, hdx;
+			u16 cx, hcx;
+			u16 ax, hax;
+			u16 gs, fs;
+			u16 es, ds;
+			u16 flags, hflags;
+		};
+		struct {
+			u8 dil, dih, edi2, edi3;
+			u8 sil, sih, esi2, esi3;
+			u8 bpl, bph, ebp2, ebp3;
+			u8 _spl, _sph, _esp2, _esp3;
+			u8 bl, bh, ebx2, ebx3;
+			u8 dl, dh, edx2, edx3;
+			u8 cl, ch, ecx2, ecx3;
+			u8 al, ah, eax2, eax3;
+		};
+	};
+};
+void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);
+
 /* cmdline.c */
 int cmdline_find_option(const char *option, char *buffer, int bufsize);
 int cmdline_find_option_bool(const char *option);
@@ -279,6 +324,9 @@
 int vsprintf(char *buf, const char *fmt, va_list args);
 int printf(const char *fmt, ...);
 
+/* regs.c */
+void initregs(struct biosregs *regs);
+
 /* string.c */
 int strcmp(const char *str1, const char *str2);
 size_t strnlen(const char *s, size_t maxlen);
diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore
index 63eff3b..4a46fab 100644
--- a/arch/x86/boot/compressed/.gitignore
+++ b/arch/x86/boot/compressed/.gitignore
@@ -1,3 +1,6 @@
 relocs
 vmlinux.bin.all
 vmlinux.relocs
+vmlinux.lds
+mkpiggy
+piggy.S
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 65551c9..49c8a4c 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -19,7 +19,9 @@
 LDFLAGS := -m elf_$(UTS_MACHINE)
 LDFLAGS_vmlinux := -T
 
-$(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE
+hostprogs-y	:= mkpiggy
+
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE
 	$(call if_changed,ld)
 	@:
 
@@ -29,7 +31,7 @@
 
 
 targets += vmlinux.bin.all vmlinux.relocs relocs
-hostprogs-$(CONFIG_X86_32) += relocs
+hostprogs-$(CONFIG_X86_NEED_RELOCS) += relocs
 
 quiet_cmd_relocs = RELOCS  $@
       cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $<
@@ -37,46 +39,22 @@
 	$(call if_changed,relocs)
 
 vmlinux.bin.all-y := $(obj)/vmlinux.bin
-vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs
-quiet_cmd_relocbin = BUILD   $@
-      cmd_relocbin = cat $(filter-out FORCE,$^) > $@
-$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE
-	$(call if_changed,relocbin)
+vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs
 
-ifeq ($(CONFIG_X86_32),y)
-
-ifdef CONFIG_RELOCATABLE
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin.all FORCE
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin.all FORCE
-	$(call if_changed,lzma)
-else
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,lzma)
-endif
-LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
-
-else
-
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,lzma)
 
-LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
-endif
+suffix-$(CONFIG_KERNEL_GZIP)	:= gz
+suffix-$(CONFIG_KERNEL_BZIP2)	:= bz2
+suffix-$(CONFIG_KERNEL_LZMA)	:= lzma
 
-suffix_$(CONFIG_KERNEL_GZIP)  = gz
-suffix_$(CONFIG_KERNEL_BZIP2) = bz2
-suffix_$(CONFIG_KERNEL_LZMA)  = lzma
+quiet_cmd_mkpiggy = MKPIGGY $@
+      cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )
 
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix_y) FORCE
-	$(call if_changed,ld)
+targets += piggy.S
+$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE
+	$(call if_changed,mkpiggy)
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 3a8a866..75e4f00 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -12,16 +12,16 @@
  * the page directory. [According to comments etc elsewhere on a compressed
  * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
  *
- * Page 0 is deliberately kept safe, since System Management Mode code in 
+ * Page 0 is deliberately kept safe, since System Management Mode code in
  * laptops may need to access the BIOS data stored there.  This is also
- * useful for future device drivers that either access the BIOS via VM86 
+ * useful for future device drivers that either access the BIOS via VM86
  * mode.
  */
 
 /*
  * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
  */
-.text
+	.text
 
 #include <linux/linkage.h>
 #include <asm/segment.h>
@@ -29,161 +29,151 @@
 #include <asm/boot.h>
 #include <asm/asm-offsets.h>
 
-.section ".text.head","ax",@progbits
+	.section ".text.head","ax",@progbits
 ENTRY(startup_32)
 	cld
-	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
-	 * us to not reload segments */
-	testb $(1<<6), BP_loadflags(%esi)
-	jnz 1f
+	/*
+	 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
+	 * us to not reload segments
+	 */
+	testb	$(1<<6), BP_loadflags(%esi)
+	jnz	1f
 
 	cli
-	movl $(__BOOT_DS),%eax
-	movl %eax,%ds
-	movl %eax,%es
-	movl %eax,%fs
-	movl %eax,%gs
-	movl %eax,%ss
+	movl	$__BOOT_DS, %eax
+	movl	%eax, %ds
+	movl	%eax, %es
+	movl	%eax, %fs
+	movl	%eax, %gs
+	movl	%eax, %ss
 1:
 
-/* Calculate the delta between where we were compiled to run
+/*
+ * Calculate the delta between where we were compiled to run
  * at and where we were actually loaded at.  This can only be done
  * with a short local call on x86.  Nothing  else will tell us what
  * address we are running at.  The reserved chunk of the real-mode
  * data at 0x1e4 (defined as a scratch field) are used as the stack
  * for this calculation. Only 4 bytes are needed.
  */
-	leal (0x1e4+4)(%esi), %esp
-	call 1f
-1:	popl %ebp
-	subl $1b, %ebp
+	leal	(BP_scratch+4)(%esi), %esp
+	call	1f
+1:	popl	%ebp
+	subl	$1b, %ebp
 
-/* %ebp contains the address we are loaded at by the boot loader and %ebx
+/*
+ * %ebp contains the address we are loaded at by the boot loader and %ebx
  * contains the address where we should move the kernel image temporarily
  * for safe in-place decompression.
  */
 
 #ifdef CONFIG_RELOCATABLE
-	movl 	%ebp, %ebx
-	addl    $(CONFIG_PHYSICAL_ALIGN - 1), %ebx
-	andl    $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx
+	movl	%ebp, %ebx
+	movl	BP_kernel_alignment(%esi), %eax
+	decl	%eax
+	addl    %eax, %ebx
+	notl	%eax
+	andl    %eax, %ebx
 #else
-	movl $LOAD_PHYSICAL_ADDR, %ebx
+	movl	$LOAD_PHYSICAL_ADDR, %ebx
 #endif
 
-	/* Replace the compressed data size with the uncompressed size */
-	subl input_len(%ebp), %ebx
-	movl output_len(%ebp), %eax
-	addl %eax, %ebx
-	/* Add 8 bytes for every 32K input block */
-	shrl $12, %eax
-	addl %eax, %ebx
-	/* Add 32K + 18 bytes of extra slack */
-	addl $(32768 + 18), %ebx
-	/* Align on a 4K boundary */
-	addl $4095, %ebx
-	andl $~4095, %ebx
+	/* Target address to relocate to for decompression */
+	addl	$z_extract_offset, %ebx
 
-/* Copy the compressed kernel to the end of our buffer
+	/* Set up the stack */
+	leal	boot_stack_end(%ebx), %esp
+
+	/* Zero EFLAGS */
+	pushl	$0
+	popfl
+
+/*
+ * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
  */
-	pushl %esi
-	leal _end(%ebp), %esi
-	leal _end(%ebx), %edi
-	movl $(_end - startup_32), %ecx
+	pushl	%esi
+	leal	(_bss-4)(%ebp), %esi
+	leal	(_bss-4)(%ebx), %edi
+	movl	$(_bss - startup_32), %ecx
+	shrl	$2, %ecx
 	std
-	rep
-	movsb
+	rep	movsl
 	cld
-	popl %esi
-
-/* Compute the kernel start address.
- */
-#ifdef CONFIG_RELOCATABLE
-	addl    $(CONFIG_PHYSICAL_ALIGN - 1), %ebp
-	andl    $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp
-#else
-	movl	$LOAD_PHYSICAL_ADDR, %ebp
-#endif
+	popl	%esi
 
 /*
  * Jump to the relocated address.
  */
-	leal relocated(%ebx), %eax
-	jmp *%eax
+	leal	relocated(%ebx), %eax
+	jmp	*%eax
 ENDPROC(startup_32)
 
-.section ".text"
+	.text
 relocated:
 
 /*
- * Clear BSS
+ * Clear BSS (stack is currently empty)
  */
-	xorl %eax,%eax
-	leal _edata(%ebx),%edi
-	leal _end(%ebx), %ecx
-	subl %edi,%ecx
-	cld
-	rep
-	stosb
-
-/*
- * Setup the stack for the decompressor
- */
-	leal boot_stack_end(%ebx), %esp
+	xorl	%eax, %eax
+	leal	_bss(%ebx), %edi
+	leal	_ebss(%ebx), %ecx
+	subl	%edi, %ecx
+	shrl	$2, %ecx
+	rep	stosl
 
 /*
  * Do the decompression, and jump to the new kernel..
  */
-	movl output_len(%ebx), %eax
-	pushl %eax
-			# push arguments for decompress_kernel:
-	pushl %ebp	# output address
-	movl input_len(%ebx), %eax
-	pushl %eax	# input_len
-	leal input_data(%ebx), %eax
-	pushl %eax	# input_data
-	leal boot_heap(%ebx), %eax
-	pushl %eax	# heap area
-	pushl %esi	# real mode pointer
-	call decompress_kernel
-	addl $20, %esp
-	popl %ecx
+	leal	z_extract_offset_negative(%ebx), %ebp
+				/* push arguments for decompress_kernel: */
+	pushl	%ebp		/* output address */
+	pushl	$z_input_len	/* input_len */
+	leal	input_data(%ebx), %eax
+	pushl	%eax		/* input_data */
+	leal	boot_heap(%ebx), %eax
+	pushl	%eax		/* heap area */
+	pushl	%esi		/* real mode pointer */
+	call	decompress_kernel
+	addl	$20, %esp
 
 #if CONFIG_RELOCATABLE
-/* Find the address of the relocations.
+/*
+ * Find the address of the relocations.
  */
-	movl %ebp, %edi
-	addl %ecx, %edi
+	leal	z_output_len(%ebp), %edi
 
-/* Calculate the delta between where vmlinux was compiled to run
+/*
+ * Calculate the delta between where vmlinux was compiled to run
  * and where it was actually loaded.
  */
-	movl %ebp, %ebx
-	subl $LOAD_PHYSICAL_ADDR, %ebx
-	jz   2f		/* Nothing to be done if loaded at compiled addr. */
+	movl	%ebp, %ebx
+	subl	$LOAD_PHYSICAL_ADDR, %ebx
+	jz	2f	/* Nothing to be done if loaded at compiled addr. */
 /*
  * Process relocations.
  */
 
-1:	subl $4, %edi
-	movl 0(%edi), %ecx
-	testl %ecx, %ecx
-	jz 2f
-	addl %ebx, -__PAGE_OFFSET(%ebx, %ecx)
-	jmp 1b
+1:	subl	$4, %edi
+	movl	(%edi), %ecx
+	testl	%ecx, %ecx
+	jz	2f
+	addl	%ebx, -__PAGE_OFFSET(%ebx, %ecx)
+	jmp	1b
 2:
 #endif
 
 /*
  * Jump to the decompressed kernel.
  */
-	xorl %ebx,%ebx
-	jmp *%ebp
+	xorl	%ebx, %ebx
+	jmp	*%ebp
 
-.bss
-/* Stack and heap for uncompression */
-.balign 4
+/*
+ * Stack and heap for uncompression
+ */
+	.bss
+	.balign 4
 boot_heap:
 	.fill BOOT_HEAP_SIZE, 1, 0
 boot_stack:
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index ed4a829..f62c284 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -21,8 +21,8 @@
 /*
  * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
  */
-.code32
-.text
+	.code32
+	.text
 
 #include <linux/linkage.h>
 #include <asm/segment.h>
@@ -33,12 +33,14 @@
 #include <asm/processor-flags.h>
 #include <asm/asm-offsets.h>
 
-.section ".text.head"
+	.section ".text.head"
 	.code32
 ENTRY(startup_32)
 	cld
-	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
-	 * us to not reload segments */
+	/*
+	 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
+	 * us to not reload segments
+	 */
 	testb $(1<<6), BP_loadflags(%esi)
 	jnz 1f
 
@@ -49,14 +51,15 @@
 	movl	%eax, %ss
 1:
 
-/* Calculate the delta between where we were compiled to run
+/*
+ * Calculate the delta between where we were compiled to run
  * at and where we were actually loaded at.  This can only be done
  * with a short local call on x86.  Nothing  else will tell us what
  * address we are running at.  The reserved chunk of the real-mode
  * data at 0x1e4 (defined as a scratch field) are used as the stack
  * for this calculation. Only 4 bytes are needed.
  */
-	leal	(0x1e4+4)(%esi), %esp
+	leal	(BP_scratch+4)(%esi), %esp
 	call	1f
 1:	popl	%ebp
 	subl	$1b, %ebp
@@ -70,32 +73,28 @@
 	testl	%eax, %eax
 	jnz	no_longmode
 
-/* Compute the delta between where we were compiled to run at
+/*
+ * Compute the delta between where we were compiled to run at
  * and where the code will actually run at.
- */
-/* %ebp contains the address we are loaded at by the boot loader and %ebx
+ *
+ * %ebp contains the address we are loaded at by the boot loader and %ebx
  * contains the address where we should move the kernel image temporarily
  * for safe in-place decompression.
  */
 
 #ifdef CONFIG_RELOCATABLE
 	movl	%ebp, %ebx
-	addl	$(PMD_PAGE_SIZE -1), %ebx
-	andl	$PMD_PAGE_MASK, %ebx
+	movl	BP_kernel_alignment(%esi), %eax
+	decl	%eax
+	addl	%eax, %ebx
+	notl	%eax
+	andl	%eax, %ebx
 #else
-	movl	$CONFIG_PHYSICAL_START, %ebx
+	movl	$LOAD_PHYSICAL_ADDR, %ebx
 #endif
 
-	/* Replace the compressed data size with the uncompressed size */
-	subl	input_len(%ebp), %ebx
-	movl	output_len(%ebp), %eax
-	addl	%eax, %ebx
-	/* Add 8 bytes for every 32K input block */
-	shrl	$12, %eax
-	addl	%eax, %ebx
-	/* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
-	addl	$(32768 + 18 + 4095), %ebx
-	andl	$~4095, %ebx
+	/* Target address to relocate to for decompression */
+	addl	$z_extract_offset, %ebx
 
 /*
  * Prepare for entering 64 bit mode
@@ -114,7 +113,7 @@
  /*
   * Build early 4G boot pagetable
   */
-	/* Initialize Page tables to 0*/
+	/* Initialize Page tables to 0 */
 	leal	pgtable(%ebx), %edi
 	xorl	%eax, %eax
 	movl	$((4096*6)/4), %ecx
@@ -155,7 +154,8 @@
 	btsl	$_EFER_LME, %eax
 	wrmsr
 
-	/* Setup for the jump to 64bit mode
+	/*
+	 * Setup for the jump to 64bit mode
 	 *
 	 * When the jump is performend we will be in long mode but
 	 * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
@@ -184,7 +184,8 @@
 
 #include "../../kernel/verify_cpu_64.S"
 
-	/* Be careful here startup_64 needs to be at a predictable
+	/*
+	 * Be careful here startup_64 needs to be at a predictable
 	 * address so I can export it in an ELF header.  Bootloaders
 	 * should look at the ELF header to find this address, as
 	 * it may change in the future.
@@ -192,7 +193,8 @@
 	.code64
 	.org 0x200
 ENTRY(startup_64)
-	/* We come here either from startup_32 or directly from a
+	/*
+	 * We come here either from startup_32 or directly from a
 	 * 64bit bootloader.  If we come here from a bootloader we depend on
 	 * an identity mapped page table being provied that maps our
 	 * entire text+data+bss and hopefully all of memory.
@@ -209,50 +211,54 @@
 	movl    $0x20, %eax
 	ltr	%ax
 
-	/* Compute the decompressed kernel start address.  It is where
+	/*
+	 * Compute the decompressed kernel start address.  It is where
 	 * we were loaded at aligned to a 2M boundary. %rbp contains the
 	 * decompressed kernel start address.
 	 *
 	 * If it is a relocatable kernel then decompress and run the kernel
 	 * from load address aligned to 2MB addr, otherwise decompress and
-	 * run the kernel from CONFIG_PHYSICAL_START
+	 * run the kernel from LOAD_PHYSICAL_ADDR
+	 *
+	 * We cannot rely on the calculation done in 32-bit mode, since we
+	 * may have been invoked via the 64-bit entry point.
 	 */
 
 	/* Start with the delta to where the kernel will run at. */
 #ifdef CONFIG_RELOCATABLE
 	leaq	startup_32(%rip) /* - $startup_32 */, %rbp
-	addq	$(PMD_PAGE_SIZE - 1), %rbp
-	andq	$PMD_PAGE_MASK, %rbp
-	movq	%rbp, %rbx
+	movl	BP_kernel_alignment(%rsi), %eax
+	decl	%eax
+	addq	%rax, %rbp
+	notq	%rax
+	andq	%rax, %rbp
 #else
-	movq	$CONFIG_PHYSICAL_START, %rbp
-	movq	%rbp, %rbx
+	movq	$LOAD_PHYSICAL_ADDR, %rbp
 #endif
 
-	/* Replace the compressed data size with the uncompressed size */
-	movl	input_len(%rip), %eax
-	subq	%rax, %rbx
-	movl	output_len(%rip), %eax
-	addq	%rax, %rbx
-	/* Add 8 bytes for every 32K input block */
-	shrq	$12, %rax
-	addq	%rax, %rbx
-	/* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
-	addq	$(32768 + 18 + 4095), %rbx
-	andq	$~4095, %rbx
+	/* Target address to relocate to for decompression */
+	leaq	z_extract_offset(%rbp), %rbx
 
-/* Copy the compressed kernel to the end of our buffer
+	/* Set up the stack */
+	leaq	boot_stack_end(%rbx), %rsp
+
+	/* Zero EFLAGS */
+	pushq	$0
+	popfq
+
+/*
+ * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
  */
-	leaq	_end_before_pgt(%rip), %r8
-	leaq	_end_before_pgt(%rbx), %r9
-	movq	$_end_before_pgt /* - $startup_32 */, %rcx
-1:	subq	$8, %r8
-	subq	$8, %r9
-	movq	0(%r8), %rax
-	movq	%rax, 0(%r9)
-	subq	$8, %rcx
-	jnz	1b
+	pushq	%rsi
+	leaq	(_bss-8)(%rip), %rsi
+	leaq	(_bss-8)(%rbx), %rdi
+	movq	$_bss /* - $startup_32 */, %rcx
+	shrq	$3, %rcx
+	std
+	rep	movsq
+	cld
+	popq	%rsi
 
 /*
  * Jump to the relocated address.
@@ -260,37 +266,28 @@
 	leaq	relocated(%rbx), %rax
 	jmp	*%rax
 
-.section ".text"
+	.text
 relocated:
 
 /*
- * Clear BSS
+ * Clear BSS (stack is currently empty)
  */
-	xorq	%rax, %rax
-	leaq    _edata(%rbx), %rdi
-	leaq    _end_before_pgt(%rbx), %rcx
+	xorl	%eax, %eax
+	leaq    _bss(%rip), %rdi
+	leaq    _ebss(%rip), %rcx
 	subq	%rdi, %rcx
-	cld
-	rep
-	stosb
-
-	/* Setup the stack */
-	leaq	boot_stack_end(%rip), %rsp
-
-	/* zero EFLAGS after setting rsp */
-	pushq	$0
-	popfq
+	shrq	$3, %rcx
+	rep	stosq
 
 /*
  * Do the decompression, and jump to the new kernel..
  */
-	pushq	%rsi			# Save the real mode argument
-	movq	%rsi, %rdi		# real mode address
-	leaq	boot_heap(%rip), %rsi	# malloc area for uncompression
-	leaq	input_data(%rip), %rdx  # input_data
-	movl	input_len(%rip), %eax
-	movq	%rax, %rcx		# input_len
-	movq	%rbp, %r8		# output
+	pushq	%rsi			/* Save the real mode argument */
+	movq	%rsi, %rdi		/* real mode address */
+	leaq	boot_heap(%rip), %rsi	/* malloc area for uncompression */
+	leaq	input_data(%rip), %rdx  /* input_data */
+	movl	$z_input_len, %ecx	/* input_len */
+	movq	%rbp, %r8		/* output target address */
 	call	decompress_kernel
 	popq	%rsi
 
@@ -311,11 +308,21 @@
 	.quad   0x0000000000000000	/* TS continued */
 gdt_end:
 
-.bss
-/* Stack and heap for uncompression */
-.balign 4
+/*
+ * Stack and heap for uncompression
+ */
+	.bss
+	.balign 4
 boot_heap:
 	.fill BOOT_HEAP_SIZE, 1, 0
 boot_stack:
 	.fill BOOT_STACK_SIZE, 1, 0
 boot_stack_end:
+
+/*
+ * Space for page tables (not in .bss so not zeroed)
+ */
+	.section ".pgtable","a",@nobits
+	.balign 4096
+pgtable:
+	.fill 6*4096, 1, 0
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index e45be73..842b2a3 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -325,20 +325,18 @@
 	free_mem_ptr     = heap;	/* Heap */
 	free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
 
+	if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
+		error("Destination address inappropriately aligned");
 #ifdef CONFIG_X86_64
-	if ((unsigned long)output & (__KERNEL_ALIGN - 1))
-		error("Destination address not 2M aligned");
-	if ((unsigned long)output >= 0xffffffffffUL)
+	if (heap > 0x3fffffffffffUL)
 		error("Destination address too large");
 #else
-	if ((u32)output & (CONFIG_PHYSICAL_ALIGN - 1))
-		error("Destination address not CONFIG_PHYSICAL_ALIGN aligned");
 	if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))
 		error("Destination address too large");
-#ifndef CONFIG_RELOCATABLE
-	if ((u32)output != LOAD_PHYSICAL_ADDR)
-		error("Wrong destination address");
 #endif
+#ifndef CONFIG_RELOCATABLE
+	if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
+		error("Wrong destination address");
 #endif
 
 	if (!quiet)
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
new file mode 100644
index 0000000..bcbd36c
--- /dev/null
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -0,0 +1,97 @@
+/* ----------------------------------------------------------------------- *
+ *
+ *  Copyright (C) 2009 Intel Corporation. All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License version
+ *  2 as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ *  02110-1301, USA.
+ *
+ *  H. Peter Anvin <hpa@linux.intel.com>
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Compute the desired load offset from a compressed program; outputs
+ * a small assembly wrapper with the appropriate symbols defined.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+static uint32_t getle32(const void *p)
+{
+	const uint8_t *cp = p;
+
+	return (uint32_t)cp[0] + ((uint32_t)cp[1] << 8) +
+		((uint32_t)cp[2] << 16) + ((uint32_t)cp[3] << 24);
+}
+
+int main(int argc, char *argv[])
+{
+	uint32_t olen;
+	long ilen;
+	unsigned long offs;
+	FILE *f;
+
+	if (argc < 2) {
+		fprintf(stderr, "Usage: %s compressed_file\n", argv[0]);
+		return 1;
+	}
+
+	/* Get the information for the compressed kernel image first */
+
+	f = fopen(argv[1], "r");
+	if (!f) {
+		perror(argv[1]);
+		return 1;
+	}
+
+
+	if (fseek(f, -4L, SEEK_END)) {
+		perror(argv[1]);
+	}
+	fread(&olen, sizeof olen, 1, f);
+	ilen = ftell(f);
+	olen = getle32(&olen);
+	fclose(f);
+
+	/*
+	 * Now we have the input (compressed) and output (uncompressed)
+	 * sizes, compute the necessary decompression offset...
+	 */
+
+	offs = (olen > ilen) ? olen - ilen : 0;
+	offs += olen >> 12;	/* Add 8 bytes for each 32K block */
+	offs += 32*1024 + 18;	/* Add 32K + 18 bytes slack */
+	offs = (offs+4095) & ~4095; /* Round to a 4K boundary */
+
+	printf(".section \".rodata.compressed\",\"a\",@progbits\n");
+	printf(".globl z_input_len\n");
+	printf("z_input_len = %lu\n", ilen);
+	printf(".globl z_output_len\n");
+	printf("z_output_len = %lu\n", (unsigned long)olen);
+	printf(".globl z_extract_offset\n");
+	printf("z_extract_offset = 0x%lx\n", offs);
+	/* z_extract_offset_negative allows simplification of head_32.S */
+	printf(".globl z_extract_offset_negative\n");
+	printf("z_extract_offset_negative = -0x%lx\n", offs);
+
+	printf(".globl input_data, input_data_end\n");
+	printf("input_data:\n");
+	printf(".incbin \"%s\"\n", argv[1]);
+	printf("input_data_end:\n");
+
+	return 0;
+}
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
new file mode 100644
index 0000000..cc353e1
--- /dev/null
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -0,0 +1,65 @@
+OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
+
+#undef i386
+
+#include <asm/page_types.h>
+
+#ifdef CONFIG_X86_64
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(startup_64)
+#else
+OUTPUT_ARCH(i386)
+ENTRY(startup_32)
+#endif
+
+SECTIONS
+{
+	/* Be careful parts of head_64.S assume startup_32 is at
+	 * address 0.
+	 */
+	. = 0;
+	.text.head : {
+		_head = . ;
+		*(.text.head)
+		_ehead = . ;
+	}
+	.rodata.compressed : {
+		*(.rodata.compressed)
+	}
+	.text :	{
+		_text = .; 	/* Text */
+		*(.text)
+		*(.text.*)
+		_etext = . ;
+	}
+	.rodata : {
+		_rodata = . ;
+		*(.rodata)	 /* read-only data */
+		*(.rodata.*)
+		_erodata = . ;
+	}
+	.data :	{
+		_data = . ;
+		*(.data)
+		*(.data.*)
+		_edata = . ;
+	}
+	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+	.bss : {
+		_bss = . ;
+		*(.bss)
+		*(.bss.*)
+		*(COMMON)
+		. = ALIGN(8);	/* For convenience during zeroing */
+		_ebss = .;
+	}
+#ifdef CONFIG_X86_64
+       . = ALIGN(PAGE_SIZE);
+       .pgtable : {
+		_pgtable = . ;
+		*(.pgtable)
+		_epgtable = . ;
+	}
+#endif
+	_end = .;
+}
diff --git a/arch/x86/boot/compressed/vmlinux.scr b/arch/x86/boot/compressed/vmlinux.scr
deleted file mode 100644
index f02382a..0000000
--- a/arch/x86/boot/compressed/vmlinux.scr
+++ /dev/null
@@ -1,10 +0,0 @@
-SECTIONS
-{
-  .rodata.compressed : {
-	input_len = .;
-	LONG(input_data_end - input_data) input_data = .;
-	*(.data)
-	output_len = . - 4;
-	input_data_end = .;
-	}
-}
diff --git a/arch/x86/boot/compressed/vmlinux_32.lds b/arch/x86/boot/compressed/vmlinux_32.lds
deleted file mode 100644
index bb3c483..0000000
--- a/arch/x86/boot/compressed/vmlinux_32.lds
+++ /dev/null
@@ -1,43 +0,0 @@
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-OUTPUT_ARCH(i386)
-ENTRY(startup_32)
-SECTIONS
-{
-	/* Be careful parts of head_32.S assume startup_32 is at
-	 * address 0.
-	 */
-	. = 0;
-	.text.head : {
-		_head = . ;
-		*(.text.head)
-		_ehead = . ;
-	}
-	.rodata.compressed : {
-		*(.rodata.compressed)
-	}
-	.text :	{
-		_text = .; 	/* Text */
-		*(.text)
-		*(.text.*)
-		_etext = . ;
-	}
-	.rodata : {
-		_rodata = . ;
-		*(.rodata)	 /* read-only data */
-		*(.rodata.*)
-		_erodata = . ;
-	}
-	.data :	{
-		_data = . ;
-		*(.data)
-		*(.data.*)
-		_edata = . ;
-	}
-	.bss : {
-		_bss = . ;
-		*(.bss)
-		*(.bss.*)
-		*(COMMON)
-		_end = . ;
-	}
-}
diff --git a/arch/x86/boot/compressed/vmlinux_64.lds b/arch/x86/boot/compressed/vmlinux_64.lds
deleted file mode 100644
index bef1ac8..0000000
--- a/arch/x86/boot/compressed/vmlinux_64.lds
+++ /dev/null
@@ -1,48 +0,0 @@
-OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
-OUTPUT_ARCH(i386:x86-64)
-ENTRY(startup_64)
-SECTIONS
-{
-	/* Be careful parts of head_64.S assume startup_32 is at
-	 * address 0.
-	 */
-	. = 0;
-	.text.head : {
-		_head = . ;
-		*(.text.head)
-		_ehead = . ;
-	}
-	.rodata.compressed : {
-		*(.rodata.compressed)
-	}
-	.text :	{
-		_text = .; 	/* Text */
-		*(.text)
-		*(.text.*)
-		_etext = . ;
-	}
-	.rodata : {
-		_rodata = . ;
-		*(.rodata)	 /* read-only data */
-		*(.rodata.*)
-		_erodata = . ;
-	}
-	.data :	{
-		_data = . ;
-		*(.data)
-		*(.data.*)
-		_edata = . ;
-	}
-	.bss : {
-		_bss = . ;
-		*(.bss)
-		*(.bss.*)
-		*(COMMON)
-		. = ALIGN(8);
-		_end_before_pgt = . ;
-		. = ALIGN(4096);
-		pgtable = . ;
-		. = . + 4096 * 6;
-		_ebss = .;
-	}
-}
diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index 1aae8f3..c501a5b 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -22,17 +23,17 @@
  */
 static int read_mbr(u8 devno, void *buf)
 {
-	u16 ax, bx, cx, dx;
+	struct biosregs ireg, oreg;
 
-	ax = 0x0201;		/* Legacy Read, one sector */
-	cx = 0x0001;		/* Sector 0-0-1 */
-	dx = devno;
-	bx = (size_t)buf;
-	asm volatile("pushfl; stc; int $0x13; setc %%al; popfl"
-		     : "+a" (ax), "+c" (cx), "+d" (dx), "+b" (bx)
-		     : : "esi", "edi", "memory");
+	initregs(&ireg);
+	ireg.ax = 0x0201;		/* Legacy Read, one sector */
+	ireg.cx = 0x0001;		/* Sector 0-0-1 */
+	ireg.dl = devno;
+	ireg.bx = (size_t)buf;
 
-	return -(u8)ax;		/* 0 or -1 */
+	intcall(0x13, &ireg, &oreg);
+
+	return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
 }
 
 static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
@@ -72,56 +73,46 @@
 
 static int get_edd_info(u8 devno, struct edd_info *ei)
 {
-	u16 ax, bx, cx, dx, di;
+	struct biosregs ireg, oreg;
 
 	memset(ei, 0, sizeof *ei);
 
 	/* Check Extensions Present */
 
-	ax = 0x4100;
-	bx = EDDMAGIC1;
-	dx = devno;
-	asm("pushfl; stc; int $0x13; setc %%al; popfl"
-	    : "+a" (ax), "+b" (bx), "=c" (cx), "+d" (dx)
-	    : : "esi", "edi");
+	initregs(&ireg);
+	ireg.ah = 0x41;
+	ireg.bx = EDDMAGIC1;
+	ireg.dl = devno;
+	intcall(0x13, &ireg, &oreg);
 
-	if ((u8)ax)
+	if (oreg.eflags & X86_EFLAGS_CF)
 		return -1;	/* No extended information */
 
-	if (bx != EDDMAGIC2)
+	if (oreg.bx != EDDMAGIC2)
 		return -1;
 
 	ei->device  = devno;
-	ei->version = ax >> 8;	/* EDD version number */
-	ei->interface_support = cx; /* EDD functionality subsets */
+	ei->version = oreg.ah;		 /* EDD version number */
+	ei->interface_support = oreg.cx; /* EDD functionality subsets */
 
 	/* Extended Get Device Parameters */
 
 	ei->params.length = sizeof(ei->params);
-	ax = 0x4800;
-	dx = devno;
-	asm("pushfl; int $0x13; popfl"
-	    : "+a" (ax), "+d" (dx), "=m" (ei->params)
-	    : "S" (&ei->params)
-	    : "ebx", "ecx", "edi");
+	ireg.ah = 0x48;
+	ireg.si = (size_t)&ei->params;
+	intcall(0x13, &ireg, &oreg);
 
 	/* Get legacy CHS parameters */
 
 	/* Ralf Brown recommends setting ES:DI to 0:0 */
-	ax = 0x0800;
-	dx = devno;
-	di = 0;
-	asm("pushw %%es; "
-	    "movw %%di,%%es; "
-	    "pushfl; stc; int $0x13; setc %%al; popfl; "
-	    "popw %%es"
-	    : "+a" (ax), "=b" (bx), "=c" (cx), "+d" (dx), "+D" (di)
-	    : : "esi");
+	ireg.ah = 0x08;
+	ireg.es = 0;
+	intcall(0x13, &ireg, &oreg);
 
-	if ((u8)ax == 0) {
-		ei->legacy_max_cylinder = (cx >> 8) + ((cx & 0xc0) << 2);
-		ei->legacy_max_head = dx >> 8;
-		ei->legacy_sectors_per_track = cx & 0x3f;
+	if (!(oreg.eflags & X86_EFLAGS_CF)) {
+		ei->legacy_max_cylinder = oreg.ch + ((oreg.cl & 0xc0) << 2);
+		ei->legacy_max_head = oreg.dh;
+		ei->legacy_sectors_per_track = oreg.cl & 0x3f;
 	}
 
 	return 0;
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 5d84d1c..b31cc54 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -22,7 +22,8 @@
 #include <asm/page_types.h>
 #include <asm/setup.h>
 #include "boot.h"
-#include "offsets.h"
+#include "voffset.h"
+#include "zoffset.h"
 
 BOOTSEG		= 0x07C0		/* original address of boot-sector */
 SYSSEG		= 0x1000		/* historical load address >> 4 */
@@ -115,7 +116,7 @@
 	# Part 2 of the header, from the old setup.S
 
 		.ascii	"HdrS"		# header signature
-		.word	0x0209		# header version number (>= 0x0105)
+		.word	0x020a		# header version number (>= 0x0105)
 					# or else old loadlin-1.5 will fail)
 		.globl realmode_swtch
 realmode_swtch:	.word	0, 0		# default_switch, SETUPSEG
@@ -168,7 +169,11 @@
 					# end of setup code can be used by setup
 					# for local heap purposes.
 
-pad1:		.word	0
+ext_loader_ver:
+		.byte	0		# Extended boot loader version
+ext_loader_type:
+		.byte	0		# Extended boot loader type
+
 cmd_line_ptr:	.long	0		# (Header version 0x0202 or later)
 					# If nonzero, a 32-bit pointer
 					# to the kernel command line.
@@ -200,7 +205,7 @@
 #else
 relocatable_kernel:    .byte 0
 #endif
-pad2:			.byte 0
+min_alignment:		.byte MIN_KERNEL_ALIGN_LG2	# minimum alignment
 pad3:			.word 0
 
 cmdline_size:   .long   COMMAND_LINE_SIZE-1     #length of the command line,
@@ -212,16 +217,27 @@
 
 hardware_subarch_data:	.quad 0
 
-payload_offset:		.long input_data
-payload_length:		.long input_data_end-input_data
+payload_offset:		.long ZO_input_data
+payload_length:		.long ZO_z_input_len
 
 setup_data:		.quad 0			# 64-bit physical pointer to
 						# single linked list of
 						# struct setup_data
 
+pref_address:		.quad LOAD_PHYSICAL_ADDR	# preferred load addr
+
+#define ZO_INIT_SIZE	(ZO__end - ZO_startup_32 + ZO_z_extract_offset)
+#define VO_INIT_SIZE	(VO__end - VO__text)
+#if ZO_INIT_SIZE > VO_INIT_SIZE
+#define INIT_SIZE ZO_INIT_SIZE
+#else
+#define INIT_SIZE VO_INIT_SIZE
+#endif
+init_size:		.long INIT_SIZE		# kernel initialization size
+
 # End of setup header #####################################################
 
-	.section ".inittext", "ax"
+	.section ".entrytext", "ax"
 start_of_setup:
 #ifdef SAFE_RESET_DISK_CONTROLLER
 # Reset the disk controller.
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 58f0415..140172b 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -61,11 +62,10 @@
  */
 static void keyboard_set_repeat(void)
 {
-	u16 ax = 0x0305;
-	u16 bx = 0;
-	asm volatile("int $0x16"
-		     : "+a" (ax), "+b" (bx)
-		     : : "ecx", "edx", "esi", "edi");
+	struct biosregs ireg;
+	initregs(&ireg);
+	ireg.ax = 0x0305;
+	intcall(0x16, &ireg, NULL);
 }
 
 /*
@@ -73,18 +73,22 @@
  */
 static void query_ist(void)
 {
+	struct biosregs ireg, oreg;
+
 	/* Some older BIOSes apparently crash on this call, so filter
 	   it from machines too old to have SpeedStep at all. */
 	if (cpu.level < 6)
 		return;
 
-	asm("int $0x15"
-	    : "=a" (boot_params.ist_info.signature),
-	      "=b" (boot_params.ist_info.command),
-	      "=c" (boot_params.ist_info.event),
-	      "=d" (boot_params.ist_info.perf_level)
-	    : "a" (0x0000e980),	 /* IST Support */
-	      "d" (0x47534943)); /* Request value */
+	initregs(&ireg);
+	ireg.ax  = 0xe980;	 /* IST Support */
+	ireg.edx = 0x47534943;	 /* Request value */
+	intcall(0x15, &ireg, &oreg);
+
+	boot_params.ist_info.signature  = oreg.eax;
+	boot_params.ist_info.command    = oreg.ebx;
+	boot_params.ist_info.event      = oreg.ecx;
+	boot_params.ist_info.perf_level = oreg.edx;
 }
 
 /*
@@ -93,13 +97,12 @@
 static void set_bios_mode(void)
 {
 #ifdef CONFIG_X86_64
-	u32 eax, ebx;
+	struct biosregs ireg;
 
-	eax = 0xec00;
-	ebx = 2;
-	asm volatile("int $0x15"
-		     : "+a" (eax), "+b" (ebx)
-		     : : "ecx", "edx", "esi", "edi");
+	initregs(&ireg);
+	ireg.ax = 0xec00;
+	ireg.bx = 2;
+	intcall(0x15, &ireg, NULL);
 #endif
 }
 
diff --git a/arch/x86/boot/mca.c b/arch/x86/boot/mca.c
index 911eaae..a95a531 100644
--- a/arch/x86/boot/mca.c
+++ b/arch/x86/boot/mca.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -16,26 +17,22 @@
 
 int query_mca(void)
 {
-	u8 err;
-	u16 es, bx, len;
+	struct biosregs ireg, oreg;
+	u16 len;
 
-	asm("pushw %%es ; "
-	    "int $0x15 ; "
-	    "setc %0 ; "
-	    "movw %%es, %1 ; "
-	    "popw %%es"
-	    : "=acd" (err), "=acdSD" (es), "=b" (bx)
-	    : "a" (0xc000));
+	initregs(&ireg);
+	ireg.ah = 0xc0;
+	intcall(0x15, &ireg, &oreg);
 
-	if (err)
+	if (oreg.eflags & X86_EFLAGS_CF)
 		return -1;	/* No MCA present */
 
-	set_fs(es);
-	len = rdfs16(bx);
+	set_fs(oreg.es);
+	len = rdfs16(oreg.bx);
 
 	if (len > sizeof(boot_params.sys_desc_table))
 		len = sizeof(boot_params.sys_desc_table);
 
-	copy_from_fs(&boot_params.sys_desc_table, bx, len);
+	copy_from_fs(&boot_params.sys_desc_table, oreg.bx, len);
 	return 0;
 }
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index 74b3d2b..cae3feb 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -20,12 +20,16 @@
 static int detect_memory_e820(void)
 {
 	int count = 0;
-	u32 next = 0;
-	u32 size, id, edi;
-	u8 err;
+	struct biosregs ireg, oreg;
 	struct e820entry *desc = boot_params.e820_map;
 	static struct e820entry buf; /* static so it is zeroed */
 
+	initregs(&ireg);
+	ireg.ax  = 0xe820;
+	ireg.cx  = sizeof buf;
+	ireg.edx = SMAP;
+	ireg.di  = (size_t)&buf;
+
 	/*
 	 * Note: at least one BIOS is known which assumes that the
 	 * buffer pointed to by one e820 call is the same one as
@@ -41,22 +45,13 @@
 	 */
 
 	do {
-		size = sizeof buf;
-
-		/* Important: %edx and %esi are clobbered by some BIOSes,
-		   so they must be either used for the error output
-		   or explicitly marked clobbered.  Given that, assume there
-		   is something out there clobbering %ebp and %edi, too. */
-		asm("pushl %%ebp; int $0x15; popl %%ebp; setc %0"
-		    : "=d" (err), "+b" (next), "=a" (id), "+c" (size),
-		      "=D" (edi), "+m" (buf)
-		    : "D" (&buf), "d" (SMAP), "a" (0xe820)
-		    : "esi");
+		intcall(0x15, &ireg, &oreg);
+		ireg.ebx = oreg.ebx; /* for next iteration... */
 
 		/* BIOSes which terminate the chain with CF = 1 as opposed
 		   to %ebx = 0 don't always report the SMAP signature on
 		   the final, failing, probe. */
-		if (err)
+		if (oreg.eflags & X86_EFLAGS_CF)
 			break;
 
 		/* Some BIOSes stop returning SMAP in the middle of
@@ -64,60 +59,64 @@
 		   screwed up the map at that point, we might have a
 		   partial map, the full map, or complete garbage, so
 		   just return failure. */
-		if (id != SMAP) {
+		if (oreg.eax != SMAP) {
 			count = 0;
 			break;
 		}
 
 		*desc++ = buf;
 		count++;
-	} while (next && count < ARRAY_SIZE(boot_params.e820_map));
+	} while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));
 
 	return boot_params.e820_entries = count;
 }
 
 static int detect_memory_e801(void)
 {
-	u16 ax, bx, cx, dx;
-	u8 err;
+	struct biosregs ireg, oreg;
 
-	bx = cx = dx = 0;
-	ax = 0xe801;
-	asm("stc; int $0x15; setc %0"
-	    : "=m" (err), "+a" (ax), "+b" (bx), "+c" (cx), "+d" (dx));
+	initregs(&ireg);
+	ireg.ax = 0xe801;
+	intcall(0x15, &ireg, &oreg);
 
-	if (err)
+	if (oreg.eflags & X86_EFLAGS_CF)
 		return -1;
 
 	/* Do we really need to do this? */
-	if (cx || dx) {
-		ax = cx;
-		bx = dx;
+	if (oreg.cx || oreg.dx) {
+		oreg.ax = oreg.cx;
+		oreg.bx = oreg.dx;
 	}
 
-	if (ax > 15*1024)
+	if (oreg.ax > 15*1024) {
 		return -1;	/* Bogus! */
-
-	/* This ignores memory above 16MB if we have a memory hole
-	   there.  If someone actually finds a machine with a memory
-	   hole at 16MB and no support for 0E820h they should probably
-	   generate a fake e820 map. */
-	boot_params.alt_mem_k = (ax == 15*1024) ? (dx << 6)+ax : ax;
+	} else if (oreg.ax == 15*1024) {
+		boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax;
+	} else {
+		/*
+		 * This ignores memory above 16MB if we have a memory
+		 * hole there.  If someone actually finds a machine
+		 * with a memory hole at 16MB and no support for
+		 * 0E820h they should probably generate a fake e820
+		 * map.
+		 */
+		boot_params.alt_mem_k = oreg.ax;
+	}
 
 	return 0;
 }
 
 static int detect_memory_88(void)
 {
-	u16 ax;
-	u8 err;
+	struct biosregs ireg, oreg;
 
-	ax = 0x8800;
-	asm("stc; int $0x15; setc %0" : "=bcdm" (err), "+a" (ax));
+	initregs(&ireg);
+	ireg.ah = 0x88;
+	intcall(0x15, &ireg, &oreg);
 
-	boot_params.screen_info.ext_mem_k = ax;
+	boot_params.screen_info.ext_mem_k = oreg.ax;
 
-	return -err;
+	return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
 }
 
 int detect_memory(void)
diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c
new file mode 100644
index 0000000..958019b
--- /dev/null
+++ b/arch/x86/boot/regs.c
@@ -0,0 +1,29 @@
+/* -----------------------------------------------------------------------
+ *
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2 or (at your
+ *   option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Simple helper function for initializing a register set.
+ *
+ * Note that this sets EFLAGS_CF in the input register set; this
+ * makes it easier to catch functions which do nothing but don't
+ * explicitly set CF.
+ */
+
+#include "boot.h"
+
+void initregs(struct biosregs *reg)
+{
+	memset(reg, 0, sizeof *reg);
+	reg->eflags |= X86_EFLAGS_CF;
+	reg->ds = ds();
+	reg->es = ds();
+	reg->fs = fs();
+	reg->gs = gs();
+}
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index bb8dc2d..0f6ec455 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -15,8 +15,11 @@
 
 	. = 497;
 	.header		: { *(.header) }
+	.entrytext	: { *(.entrytext) }
 	.inittext	: { *(.inittext) }
 	.initdata	: { *(.initdata) }
+	__end_init = .;
+
 	.text		: { *(.text) }
 	.text32		: { *(.text32) }
 
@@ -52,4 +55,7 @@
 
 	. = ASSERT(_end <= 0x8000, "Setup too big!");
 	. = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
+	/* Necessary for the very-old-loader check to work... */
+	. = ASSERT(__end_init <= 5*512, "init sections too big!");
+
 }
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index 7e8e8b2..01ec69c 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -22,24 +23,23 @@
 
 void __attribute__((section(".inittext"))) putchar(int ch)
 {
-	unsigned char c = ch;
+	struct biosregs ireg;
 
-	if (c == '\n')
+	if (ch == '\n')
 		putchar('\r');	/* \n -> \r\n */
 
-	/* int $0x10 is known to have bugs involving touching registers
-	   it shouldn't.  Be extra conservative... */
-	asm volatile("pushal; pushw %%ds; int $0x10; popw %%ds; popal"
-		     : : "b" (0x0007), "c" (0x0001), "a" (0x0e00|ch));
+	initregs(&ireg);
+	ireg.bx = 0x0007;
+	ireg.cx = 0x0001;
+	ireg.ah = 0x0e;
+	ireg.al = ch;
+	intcall(0x10, &ireg, NULL);
 }
 
 void __attribute__((section(".inittext"))) puts(const char *str)
 {
-	int n = 0;
-	while (*str) {
+	while (*str)
 		putchar(*str++);
-		n++;
-	}
 }
 
 /*
@@ -49,14 +49,13 @@
 
 static u8 gettime(void)
 {
-	u16 ax = 0x0200;
-	u16 cx, dx;
+	struct biosregs ireg, oreg;
 
-	asm volatile("int $0x1a"
-		     : "+a" (ax), "=c" (cx), "=d" (dx)
-		     : : "ebx", "esi", "edi");
+	initregs(&ireg);
+	ireg.ah = 0x02;
+	intcall(0x1a, &ireg, &oreg);
 
-	return dx >> 8;
+	return oreg.dh;
 }
 
 /*
@@ -64,19 +63,24 @@
  */
 int getchar(void)
 {
-	u16 ax = 0;
-	asm volatile("int $0x16" : "+a" (ax));
+	struct biosregs ireg, oreg;
 
-	return ax & 0xff;
+	initregs(&ireg);
+	/* ireg.ah = 0x00; */
+	intcall(0x16, &ireg, &oreg);
+
+	return oreg.al;
 }
 
 static int kbd_pending(void)
 {
-	u8 pending;
-	asm volatile("int $0x16; setnz %0"
-		     : "=qm" (pending)
-		     : "a" (0x0100));
-	return pending;
+	struct biosregs ireg, oreg;
+
+	initregs(&ireg);
+	ireg.ah = 0x01;
+	intcall(0x16, &ireg, &oreg);
+
+	return !(oreg.eflags & X86_EFLAGS_ZF);
 }
 
 void kbd_flush(void)
diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c
index 3fa979c..d660be4 100644
--- a/arch/x86/boot/video-bios.c
+++ b/arch/x86/boot/video-bios.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -29,21 +30,21 @@
 
 static int set_bios_mode(u8 mode)
 {
-	u16 ax;
+	struct biosregs ireg, oreg;
 	u8 new_mode;
 
-	ax = mode;		/* AH=0x00 Set Video Mode */
-	asm volatile(INT10
-		     : "+a" (ax)
-		     : : "ebx", "ecx", "edx", "esi", "edi");
+	initregs(&ireg);
+	ireg.al = mode;		/* AH=0x00 Set Video Mode */
+	intcall(0x10, &ireg, NULL);
 
-	ax = 0x0f00;		/* Get Current Video Mode */
-	asm volatile(INT10
-		     : "+a" (ax)
-		     : : "ebx", "ecx", "edx", "esi", "edi");
+
+	ireg.ah = 0x0f;		/* Get Current Video Mode */
+	intcall(0x10, &ireg, &oreg);
 
 	do_restore = 1;		/* Assume video contents were lost */
-	new_mode = ax & 0x7f;	/* Not all BIOSes are clean with the top bit */
+
+	/* Not all BIOSes are clean with the top bit */
+	new_mode = ireg.al & 0x7f;
 
 	if (new_mode == mode)
 		return 0;	/* Mode change OK */
@@ -53,10 +54,8 @@
 		/* Mode setting failed, but we didn't end up where we
 		   started.  That's bad.  Try to revert to the original
 		   video mode. */
-		ax = boot_params.screen_info.orig_video_mode;
-		asm volatile(INT10
-			     : "+a" (ax)
-			     : : "ebx", "ecx", "edx", "esi", "edi");
+		ireg.ax = boot_params.screen_info.orig_video_mode;
+		intcall(0x10, &ireg, NULL);
 	}
 #endif
 	return -1;
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 4a58c8c..c700147 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -31,7 +32,7 @@
 static int vesa_probe(void)
 {
 #if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID)
-	u16 ax, cx, di;
+	struct biosregs ireg, oreg;
 	u16 mode;
 	addr_t mode_ptr;
 	struct mode_info *mi;
@@ -39,13 +40,12 @@
 
 	video_vesa.modes = GET_HEAP(struct mode_info, 0);
 
-	ax = 0x4f00;
-	di = (size_t)&vginfo;
-	asm(INT10
-	    : "+a" (ax), "+D" (di), "=m" (vginfo)
-	    : : "ebx", "ecx", "edx", "esi");
+	initregs(&ireg);
+	ireg.ax = 0x4f00;
+	ireg.di = (size_t)&vginfo;
+	intcall(0x10, &ireg, &oreg);
 
-	if (ax != 0x004f ||
+	if (ireg.ax != 0x004f ||
 	    vginfo.signature != VESA_MAGIC ||
 	    vginfo.version < 0x0102)
 		return 0;	/* Not present */
@@ -65,14 +65,12 @@
 
 		memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
 
-		ax = 0x4f01;
-		cx = mode;
-		di = (size_t)&vminfo;
-		asm(INT10
-		    : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo)
-		    : : "ebx", "edx", "esi");
+		ireg.ax = 0x4f01;
+		ireg.cx = mode;
+		ireg.di = (size_t)&vminfo;
+		intcall(0x10, &ireg, &oreg);
 
-		if (ax != 0x004f)
+		if (ireg.ax != 0x004f)
 			continue;
 
 		if ((vminfo.mode_attr & 0x15) == 0x05) {
@@ -111,20 +109,19 @@
 
 static int vesa_set_mode(struct mode_info *mode)
 {
-	u16 ax, bx, cx, di;
+	struct biosregs ireg, oreg;
 	int is_graphic;
 	u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA;
 
 	memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
 
-	ax = 0x4f01;
-	cx = vesa_mode;
-	di = (size_t)&vminfo;
-	asm(INT10
-	    : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo)
-	    : : "ebx", "edx", "esi");
+	initregs(&ireg);
+	ireg.ax = 0x4f01;
+	ireg.cx = vesa_mode;
+	ireg.di = (size_t)&vminfo;
+	intcall(0x10, &ireg, &oreg);
 
-	if (ax != 0x004f)
+	if (oreg.ax != 0x004f)
 		return -1;
 
 	if ((vminfo.mode_attr & 0x15) == 0x05) {
@@ -141,14 +138,12 @@
 	}
 
 
-	ax = 0x4f02;
-	bx = vesa_mode;
-	di = 0;
-	asm volatile(INT10
-		     : "+a" (ax), "+b" (bx), "+D" (di)
-		     : : "ecx", "edx", "esi");
+	initregs(&ireg);
+	ireg.ax = 0x4f02;
+	ireg.bx = vesa_mode;
+	intcall(0x10, &ireg, &oreg);
 
-	if (ax != 0x004f)
+	if (oreg.ax != 0x004f)
 		return -1;
 
 	graphic_mode = is_graphic;
@@ -171,50 +166,45 @@
 /* Switch DAC to 8-bit mode */
 static void vesa_dac_set_8bits(void)
 {
+	struct biosregs ireg, oreg;
 	u8 dac_size = 6;
 
 	/* If possible, switch the DAC to 8-bit mode */
 	if (vginfo.capabilities & 1) {
-		u16 ax, bx;
-
-		ax = 0x4f08;
-		bx = 0x0800;
-		asm volatile(INT10
-			     : "+a" (ax), "+b" (bx)
-			     : : "ecx", "edx", "esi", "edi");
-
-		if (ax == 0x004f)
-			dac_size = bx >> 8;
+		initregs(&ireg);
+		ireg.ax = 0x4f08;
+		ireg.bh = 0x08;
+		intcall(0x10, &ireg, &oreg);
+		if (oreg.ax == 0x004f)
+			dac_size = oreg.bh;
 	}
 
 	/* Set the color sizes to the DAC size, and offsets to 0 */
-	boot_params.screen_info.red_size = dac_size;
+	boot_params.screen_info.red_size   = dac_size;
 	boot_params.screen_info.green_size = dac_size;
-	boot_params.screen_info.blue_size = dac_size;
-	boot_params.screen_info.rsvd_size = dac_size;
+	boot_params.screen_info.blue_size  = dac_size;
+	boot_params.screen_info.rsvd_size  = dac_size;
 
-	boot_params.screen_info.red_pos = 0;
-	boot_params.screen_info.green_pos = 0;
-	boot_params.screen_info.blue_pos = 0;
-	boot_params.screen_info.rsvd_pos = 0;
+	boot_params.screen_info.red_pos    = 0;
+	boot_params.screen_info.green_pos  = 0;
+	boot_params.screen_info.blue_pos   = 0;
+	boot_params.screen_info.rsvd_pos   = 0;
 }
 
 /* Save the VESA protected mode info */
 static void vesa_store_pm_info(void)
 {
-	u16 ax, bx, di, es;
+	struct biosregs ireg, oreg;
 
-	ax = 0x4f0a;
-	bx = di = 0;
-	asm("pushw %%es; "INT10"; movw %%es,%0; popw %%es"
-	    : "=d" (es), "+a" (ax), "+b" (bx), "+D" (di)
-	    : : "ecx", "esi");
+	initregs(&ireg);
+	ireg.ax = 0x4f0a;
+	intcall(0x10, &ireg, &oreg);
 
-	if (ax != 0x004f)
+	if (oreg.ax != 0x004f)
 		return;
 
-	boot_params.screen_info.vesapm_seg = es;
-	boot_params.screen_info.vesapm_off = di;
+	boot_params.screen_info.vesapm_seg = oreg.es;
+	boot_params.screen_info.vesapm_off = oreg.di;
 }
 
 /*
@@ -252,7 +242,7 @@
 void vesa_store_edid(void)
 {
 #ifdef CONFIG_FIRMWARE_EDID
-	u16 ax, bx, cx, dx, di;
+	struct biosregs ireg, oreg;
 
 	/* Apparently used as a nonsense token... */
 	memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info);
@@ -260,33 +250,26 @@
 	if (vginfo.version < 0x0200)
 		return;		/* EDID requires VBE 2.0+ */
 
-	ax = 0x4f15;		/* VBE DDC */
-	bx = 0x0000;		/* Report DDC capabilities */
-	cx = 0;			/* Controller 0 */
-	di = 0;			/* ES:DI must be 0 by spec */
+	initregs(&ireg);
+	ireg.ax = 0x4f15;		/* VBE DDC */
+	/* ireg.bx = 0x0000; */		/* Report DDC capabilities */
+	/* ireg.cx = 0;	*/		/* Controller 0 */
+	ireg.es = 0;			/* ES:DI must be 0 by spec */
+	intcall(0x10, &ireg, &oreg);
 
-	/* Note: The VBE DDC spec is different from the main VESA spec;
-	   we genuinely have to assume all registers are destroyed here. */
-
-	asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es"
-	    : "+a" (ax), "+b" (bx), "+c" (cx), "+D" (di)
-	    : : "esi", "edx");
-
-	if (ax != 0x004f)
+	if (oreg.ax != 0x004f)
 		return;		/* No EDID */
 
 	/* BH = time in seconds to transfer EDD information */
 	/* BL = DDC level supported */
 
-	ax = 0x4f15;		/* VBE DDC */
-	bx = 0x0001;		/* Read EDID */
-	cx = 0;			/* Controller 0 */
-	dx = 0;			/* EDID block number */
-	di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */
-	asm(INT10
-	    : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info),
-	      "+c" (cx), "+D" (di)
-	    : : "esi");
+	ireg.ax = 0x4f15;		/* VBE DDC */
+	ireg.bx = 0x0001;		/* Read EDID */
+	/* ireg.cx = 0; */		/* Controller 0 */
+	/* ireg.dx = 0;	*/		/* EDID block number */
+	ireg.es = ds();
+	ireg.di =(size_t)&boot_params.edid_info; /* (ES:)Pointer to block */
+	intcall(0x10, &ireg, &oreg);
 #endif /* CONFIG_FIRMWARE_EDID */
 }
 
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index 9e0587a..8f8d827 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -39,30 +40,30 @@
 /* Set basic 80x25 mode */
 static u8 vga_set_basic_mode(void)
 {
+	struct biosregs ireg, oreg;
 	u16 ax;
 	u8 rows;
 	u8 mode;
 
+	initregs(&ireg);
+
 #ifdef CONFIG_VIDEO_400_HACK
 	if (adapter >= ADAPTER_VGA) {
-		asm volatile(INT10
-			     : : "a" (0x1202), "b" (0x0030)
-			     : "ecx", "edx", "esi", "edi");
+		ireg.ax = 0x1202;
+		ireg.bx = 0x0030;
+		intcall(0x10, &ireg, NULL);
 	}
 #endif
 
 	ax = 0x0f00;
-	asm volatile(INT10
-		     : "+a" (ax)
-		     : : "ebx", "ecx", "edx", "esi", "edi");
-
-	mode = (u8)ax;
+	intcall(0x10, &ireg, &oreg);
+	mode = oreg.al;
 
 	set_fs(0);
 	rows = rdfs8(0x484);	/* rows minus one */
 
 #ifndef CONFIG_VIDEO_400_HACK
-	if ((ax == 0x5003 || ax == 0x5007) &&
+	if ((oreg.ax == 0x5003 || oreg.ax == 0x5007) &&
 	    (rows == 0 || rows == 24))
 		return mode;
 #endif
@@ -71,10 +72,8 @@
 		mode = 3;
 
 	/* Set the mode */
-	ax = mode;
-	asm volatile(INT10
-		     : "+a" (ax)
-		     : : "ebx", "ecx", "edx", "esi", "edi");
+	ireg.ax = mode;		/* AH=0: set mode */
+	intcall(0x10, &ireg, NULL);
 	do_restore = 1;
 	return mode;
 }
@@ -82,43 +81,69 @@
 static void vga_set_8font(void)
 {
 	/* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */
+	struct biosregs ireg;
+
+	initregs(&ireg);
 
 	/* Set 8x8 font */
-	asm volatile(INT10 : : "a" (0x1112), "b" (0));
+	ireg.ax = 0x1112;
+	/* ireg.bl = 0; */
+	intcall(0x10, &ireg, NULL);
 
 	/* Use alternate print screen */
-	asm volatile(INT10 : : "a" (0x1200), "b" (0x20));
+	ireg.ax = 0x1200;
+	ireg.bl = 0x20;
+	intcall(0x10, &ireg, NULL);
 
 	/* Turn off cursor emulation */
-	asm volatile(INT10 : : "a" (0x1201), "b" (0x34));
+	ireg.ax = 0x1201;
+	ireg.bl = 0x34;
+	intcall(0x10, &ireg, NULL);
 
 	/* Cursor is scan lines 6-7 */
-	asm volatile(INT10 : : "a" (0x0100), "c" (0x0607));
+	ireg.ax = 0x0100;
+	ireg.cx = 0x0607;
+	intcall(0x10, &ireg, NULL);
 }
 
 static void vga_set_14font(void)
 {
 	/* Set 9x14 font - 80x28 on VGA */
+	struct biosregs ireg;
+
+	initregs(&ireg);
 
 	/* Set 9x14 font */
-	asm volatile(INT10 : : "a" (0x1111), "b" (0));
+	ireg.ax = 0x1111;
+	/* ireg.bl = 0; */
+	intcall(0x10, &ireg, NULL);
 
 	/* Turn off cursor emulation */
-	asm volatile(INT10 : : "a" (0x1201), "b" (0x34));
+	ireg.ax = 0x1201;
+	ireg.bl = 0x34;
+	intcall(0x10, &ireg, NULL);
 
 	/* Cursor is scan lines 11-12 */
-	asm volatile(INT10 : : "a" (0x0100), "c" (0x0b0c));
+	ireg.ax = 0x0100;
+	ireg.cx = 0x0b0c;
+	intcall(0x10, &ireg, NULL);
 }
 
 static void vga_set_80x43(void)
 {
 	/* Set 80x43 mode on VGA (not EGA) */
+	struct biosregs ireg;
+
+	initregs(&ireg);
 
 	/* Set 350 scans */
-	asm volatile(INT10 : : "a" (0x1201), "b" (0x30));
+	ireg.ax = 0x1201;
+	ireg.bl = 0x30;
+	intcall(0x10, &ireg, NULL);
 
 	/* Reset video mode */
-	asm volatile(INT10 : : "a" (0x0003));
+	ireg.ax = 0x0003;
+	intcall(0x10, &ireg, NULL);
 
 	vga_set_8font();
 }
@@ -225,8 +250,6 @@
  */
 static int vga_probe(void)
 {
-	u16 ega_bx;
-
 	static const char *card_name[] = {
 		"CGA/MDA/HGC", "EGA", "VGA"
 	};
@@ -240,26 +263,26 @@
 		sizeof(ega_modes)/sizeof(struct mode_info),
 		sizeof(vga_modes)/sizeof(struct mode_info),
 	};
-	u8 vga_flag;
 
-	asm(INT10
-	    : "=b" (ega_bx)
-	    : "a" (0x1200), "b" (0x10) /* Check EGA/VGA */
-	    : "ecx", "edx", "esi", "edi");
+	struct biosregs ireg, oreg;
+
+	initregs(&ireg);
+
+	ireg.ax = 0x1200;
+	ireg.bl = 0x10;		/* Check EGA/VGA */
+	intcall(0x10, &ireg, &oreg);
 
 #ifndef _WAKEUP
-	boot_params.screen_info.orig_video_ega_bx = ega_bx;
+	boot_params.screen_info.orig_video_ega_bx = oreg.bx;
 #endif
 
 	/* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */
-	if ((u8)ega_bx != 0x10) {
+	if (oreg.bl != 0x10) {
 		/* EGA/VGA */
-		asm(INT10
-		    : "=a" (vga_flag)
-		    : "a" (0x1a00)
-		    : "ebx", "ecx", "edx", "esi", "edi");
+		ireg.ax = 0x1a00;
+		intcall(0x10, &ireg, &oreg);
 
-		if (vga_flag == 0x1a) {
+		if (oreg.al == 0x1a) {
 			adapter = ADAPTER_VGA;
 #ifndef _WAKEUP
 			boot_params.screen_info.orig_video_isVGA = 1;
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index 3bef2c1..bad728b 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -18,33 +19,29 @@
 
 static void store_cursor_position(void)
 {
-	u16 curpos;
-	u16 ax, bx;
+	struct biosregs ireg, oreg;
 
-	ax = 0x0300;
-	bx = 0;
-	asm(INT10
-	    : "=d" (curpos), "+a" (ax), "+b" (bx)
-	    : : "ecx", "esi", "edi");
+	initregs(&ireg);
+	ireg.ah = 0x03;
+	intcall(0x10, &ireg, &oreg);
 
-	boot_params.screen_info.orig_x = curpos;
-	boot_params.screen_info.orig_y = curpos >> 8;
+	boot_params.screen_info.orig_x = oreg.dl;
+	boot_params.screen_info.orig_y = oreg.dh;
 }
 
 static void store_video_mode(void)
 {
-	u16 ax, page;
+	struct biosregs ireg, oreg;
 
 	/* N.B.: the saving of the video page here is a bit silly,
 	   since we pretty much assume page 0 everywhere. */
-	ax = 0x0f00;
-	asm(INT10
-	    : "+a" (ax), "=b" (page)
-	    : : "ecx", "edx", "esi", "edi");
+	initregs(&ireg);
+	ireg.ah = 0x0f;
+	intcall(0x10, &ireg, &oreg);
 
 	/* Not all BIOSes are clean with respect to the top bit */
-	boot_params.screen_info.orig_video_mode = ax & 0x7f;
-	boot_params.screen_info.orig_video_page = page >> 8;
+	boot_params.screen_info.orig_video_mode = oreg.al & 0x7f;
+	boot_params.screen_info.orig_video_page = oreg.bh;
 }
 
 /*
@@ -257,7 +254,7 @@
 	int y;
 	addr_t dst = 0;
 	u16 *src = saved.data;
-	u16 ax, bx, dx;
+	struct biosregs ireg;
 
 	if (graphic_mode)
 		return;		/* Can't restore onto a graphic mode */
@@ -296,12 +293,11 @@
 	}
 
 	/* Restore cursor position */
-	ax = 0x0200;		/* Set cursor position */
-	bx = 0;			/* Page number (<< 8) */
-	dx = (saved.cury << 8)+saved.curx;
-	asm volatile(INT10
-		     : "+a" (ax), "+b" (bx), "+d" (dx)
-		     : : "ecx", "esi", "edi");
+	initregs(&ireg);
+	ireg.ah = 0x02;		/* Set cursor position */
+	ireg.dh = saved.cury;
+	ireg.dl = saved.curx;
+	intcall(0x10, &ireg, NULL);
 }
 #else
 #define save_screen()		((void)0)
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index ee63f5d..5bb174a 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -112,20 +112,6 @@
 extern int do_restore;		/* Restore screen contents */
 extern int graphic_mode;	/* Graphics mode with linear frame buffer */
 
-/*
- * int $0x10 is notorious for touching registers it shouldn't.
- * gcc doesn't like %ebp being clobbered, so define it as a push/pop
- * sequence here.
- *
- * A number of systems, including the original PC can clobber %bp in
- * certain circumstances, like when scrolling.  There exists at least
- * one Trident video card which could clobber DS under a set of
- * circumstances that we are unlikely to encounter (scrolling when
- * using an extended graphics mode of more than 800x600 pixels), but
- * it's cheap insurance to deal with that here.
- */
-#define INT10 "pushl %%ebp; pushw %%ds; int $0x10; popw %%ds; popl %%ebp"
-
 /* Accessing VGA indexed registers */
 static inline u8 in_idx(u16 port, u8 index)
 {
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 235b81d..edb992e 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,12 +1,13 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29-rc4
-# Tue Feb 24 15:50:58 2009
+# Linux kernel version: 2.6.30-rc2
+# Mon May 11 16:21:55 2009
 #
 # CONFIG_64BIT is not set
 CONFIG_X86_32=y
 # CONFIG_X86_64 is not set
 CONFIG_X86=y
+CONFIG_OUTPUT_FORMAT="elf32-i386"
 CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig"
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CMOS_UPDATE=y
@@ -33,6 +34,7 @@
 CONFIG_ARCH_HAS_DEFAULT_IDLE=y
 CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
 CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y
 # CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set
 CONFIG_ARCH_HIBERNATION_POSSIBLE=y
 CONFIG_ARCH_SUSPEND_POSSIBLE=y
@@ -40,15 +42,16 @@
 CONFIG_ARCH_POPULATES_NODE_MAP=y
 # CONFIG_AUDIT_ARCH is not set
 CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
+CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
 CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_GENERIC_PENDING_IRQ=y
-CONFIG_X86_SMP=y
 CONFIG_USE_GENERIC_SMP_HELPERS=y
 CONFIG_X86_32_SMP=y
 CONFIG_X86_HT=y
-CONFIG_X86_BIOS_REBOOT=y
 CONFIG_X86_TRAMPOLINE=y
+CONFIG_X86_32_LAZY_GS=y
 CONFIG_KTIME_SCALAR=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
@@ -60,10 +63,17 @@
 CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 # CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_BZIP2=y
+CONFIG_HAVE_KERNEL_LZMA=y
+CONFIG_KERNEL_GZIP=y
+# CONFIG_KERNEL_BZIP2 is not set
+# CONFIG_KERNEL_LZMA is not set
 CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 CONFIG_TASKSTATS=y
@@ -113,23 +123,26 @@
 CONFIG_NET_NS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 # CONFIG_EMBEDDED is not set
 CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 CONFIG_ELF_CORE=y
 CONFIG_PCSPKR_PLATFORM=y
-# CONFIG_COMPAT_BRK is not set
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
 CONFIG_SIGNALFD=y
 CONFIG_TIMERFD=y
@@ -139,6 +152,7 @@
 CONFIG_VM_EVENT_COUNTERS=y
 CONFIG_PCI_QUIRKS=y
 CONFIG_SLUB_DEBUG=y
+# CONFIG_COMPAT_BRK is not set
 # CONFIG_SLAB is not set
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
@@ -154,6 +168,8 @@
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -167,7 +183,6 @@
 CONFIG_STOP_MACHINE=y
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_BLK_DEV_BSG=y
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -194,12 +209,12 @@
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
 CONFIG_SMP=y
 CONFIG_SPARSE_IRQ=y
-CONFIG_X86_FIND_SMP_CONFIG=y
 CONFIG_X86_MPPARSE=y
+# CONFIG_X86_BIGSMP is not set
+CONFIG_X86_EXTENDED_PLATFORM=y
 # CONFIG_X86_ELAN is not set
-# CONFIG_X86_GENERICARCH is not set
-# CONFIG_X86_VSMP is not set
 # CONFIG_X86_RDC321X is not set
+# CONFIG_X86_32_NON_STANDARD is not set
 CONFIG_SCHED_OMIT_FRAME_POINTER=y
 # CONFIG_PARAVIRT_GUEST is not set
 # CONFIG_MEMTEST is not set
@@ -230,8 +245,10 @@
 # CONFIG_GENERIC_CPU is not set
 CONFIG_X86_GENERIC=y
 CONFIG_X86_CPU=y
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_INTERNODE_CACHE_BYTES=64
 CONFIG_X86_CMPXCHG=y
-CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_L1_CACHE_SHIFT=5
 CONFIG_X86_XADD=y
 # CONFIG_X86_PPRO_FENCE is not set
 CONFIG_X86_WP_WORKS_OK=y
@@ -247,7 +264,7 @@
 CONFIG_CPU_SUP_INTEL=y
 CONFIG_CPU_SUP_CYRIX_32=y
 CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_CENTAUR_32=y
+CONFIG_CPU_SUP_CENTAUR=y
 CONFIG_CPU_SUP_TRANSMETA_32=y
 CONFIG_CPU_SUP_UMC_32=y
 CONFIG_X86_DS=y
@@ -279,6 +296,7 @@
 CONFIG_MICROCODE_OLD_INTERFACE=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
+# CONFIG_X86_CPU_DEBUG is not set
 # CONFIG_NOHIGHMEM is not set
 CONFIG_HIGHMEM4G=y
 # CONFIG_HIGHMEM64G is not set
@@ -302,6 +320,8 @@
 CONFIG_BOUNCE=y
 CONFIG_VIRT_TO_BUS=y
 CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
 CONFIG_HIGHPTE=y
 CONFIG_X86_CHECK_BIOS_CORRUPTION=y
 CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
@@ -312,6 +332,7 @@
 CONFIG_X86_PAT=y
 CONFIG_EFI=y
 CONFIG_SECCOMP=y
+# CONFIG_CC_STACKPROTECTOR is not set
 # CONFIG_HZ_100 is not set
 # CONFIG_HZ_250 is not set
 # CONFIG_HZ_300 is not set
@@ -322,8 +343,9 @@
 CONFIG_CRASH_DUMP=y
 # CONFIG_KEXEC_JUMP is not set
 CONFIG_PHYSICAL_START=0x1000000
-# CONFIG_RELOCATABLE is not set
-CONFIG_PHYSICAL_ALIGN=0x200000
+CONFIG_RELOCATABLE=y
+CONFIG_X86_NEED_RELOCS=y
+CONFIG_PHYSICAL_ALIGN=0x1000000
 CONFIG_HOTPLUG_CPU=y
 # CONFIG_COMPAT_VDSO is not set
 # CONFIG_CMDLINE_BOOL is not set
@@ -363,7 +385,6 @@
 CONFIG_ACPI_BLACKLIST_YEAR=0
 # CONFIG_ACPI_DEBUG is not set
 # CONFIG_ACPI_PCI_SLOT is not set
-CONFIG_ACPI_SYSTEM=y
 CONFIG_X86_PM_TIMER=y
 CONFIG_ACPI_CONTAINER=y
 # CONFIG_ACPI_SBS is not set
@@ -425,6 +446,7 @@
 CONFIG_PCI_DIRECT=y
 CONFIG_PCI_MMCONFIG=y
 CONFIG_PCI_DOMAINS=y
+# CONFIG_DMAR is not set
 CONFIG_PCIEPORTBUS=y
 # CONFIG_HOTPLUG_PCI_PCIE is not set
 CONFIG_PCIEAER=y
@@ -435,6 +457,7 @@
 # CONFIG_PCI_DEBUG is not set
 # CONFIG_PCI_STUB is not set
 CONFIG_HT_IRQ=y
+# CONFIG_PCI_IOV is not set
 CONFIG_ISA_DMA_API=y
 # CONFIG_ISA is not set
 # CONFIG_MCA is not set
@@ -481,7 +504,6 @@
 #
 # Networking options
 #
-CONFIG_COMPAT_NET_DEV_OPS=y
 CONFIG_PACKET=y
 CONFIG_PACKET_MMAP=y
 CONFIG_UNIX=y
@@ -639,6 +661,7 @@
 # CONFIG_LAPB is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
 CONFIG_NET_SCHED=y
 
 #
@@ -696,6 +719,7 @@
 #
 # CONFIG_NET_PKTGEN is not set
 # CONFIG_NET_TCPPROBE is not set
+# CONFIG_NET_DROP_MONITOR is not set
 CONFIG_HAMRADIO=y
 
 #
@@ -706,12 +730,10 @@
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
 # CONFIG_AF_RXRPC is not set
-# CONFIG_PHONET is not set
 CONFIG_FIB_RULES=y
 CONFIG_WIRELESS=y
 CONFIG_CFG80211=y
 # CONFIG_CFG80211_REG_DEBUG is not set
-CONFIG_NL80211=y
 CONFIG_WIRELESS_OLD_REGULATORY=y
 CONFIG_WIRELESS_EXT=y
 CONFIG_WIRELESS_EXT_SYSFS=y
@@ -789,6 +811,7 @@
 # CONFIG_ICS932S401 is not set
 # CONFIG_ENCLOSURE_SERVICES is not set
 # CONFIG_HP_ILO is not set
+# CONFIG_ISL29003 is not set
 # CONFIG_C2PORT is not set
 
 #
@@ -842,6 +865,7 @@
 # CONFIG_SCSI_LOWLEVEL is not set
 # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
 # CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
 CONFIG_ATA=y
 # CONFIG_ATA_NONSTANDARD is not set
 CONFIG_ATA_ACPI=y
@@ -940,6 +964,7 @@
 CONFIG_MACINTOSH_DRIVERS=y
 CONFIG_MAC_EMUMOUSEBTN=y
 CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
 # CONFIG_IFB is not set
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
@@ -977,6 +1002,8 @@
 CONFIG_NET_VENDOR_3COM=y
 # CONFIG_VORTEX is not set
 # CONFIG_TYPHOON is not set
+# CONFIG_ETHOC is not set
+# CONFIG_DNET is not set
 CONFIG_NET_TULIP=y
 # CONFIG_DE2104X is not set
 # CONFIG_TULIP is not set
@@ -1026,6 +1053,7 @@
 CONFIG_E1000E=y
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -1040,6 +1068,7 @@
 # CONFIG_QLA3XXX is not set
 # CONFIG_ATL1 is not set
 # CONFIG_ATL1E is not set
+# CONFIG_ATL1C is not set
 # CONFIG_JME is not set
 CONFIG_NETDEV_10000=y
 # CONFIG_CHELSIO_T1 is not set
@@ -1049,6 +1078,7 @@
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -1058,6 +1088,7 @@
 # CONFIG_BNX2X is not set
 # CONFIG_QLGE is not set
 # CONFIG_SFC is not set
+# CONFIG_BE2NET is not set
 CONFIG_TR=y
 # CONFIG_IBMOL is not set
 # CONFIG_IBMLS is not set
@@ -1073,8 +1104,8 @@
 # CONFIG_LIBERTAS is not set
 # CONFIG_LIBERTAS_THINFIRM is not set
 # CONFIG_AIRO is not set
-# CONFIG_HERMES is not set
 # CONFIG_ATMEL is not set
+# CONFIG_AT76C50X_USB is not set
 # CONFIG_AIRO_CS is not set
 # CONFIG_PCMCIA_WL3501 is not set
 # CONFIG_PRISM54 is not set
@@ -1084,21 +1115,21 @@
 # CONFIG_RTL8187 is not set
 # CONFIG_ADM8211 is not set
 # CONFIG_MAC80211_HWSIM is not set
+# CONFIG_MWL8K is not set
 # CONFIG_P54_COMMON is not set
 CONFIG_ATH5K=y
 # CONFIG_ATH5K_DEBUG is not set
 # CONFIG_ATH9K is not set
+# CONFIG_AR9170_USB is not set
 # CONFIG_IPW2100 is not set
 # CONFIG_IPW2200 is not set
-# CONFIG_IWLCORE is not set
-# CONFIG_IWLWIFI_LEDS is not set
-# CONFIG_IWLAGN is not set
-# CONFIG_IWL3945 is not set
+# CONFIG_IWLWIFI is not set
 # CONFIG_HOSTAP is not set
 # CONFIG_B43 is not set
 # CONFIG_B43LEGACY is not set
 # CONFIG_ZD1211RW is not set
 # CONFIG_RT2X00 is not set
+# CONFIG_HERMES is not set
 
 #
 # Enable WiMAX (Networking options) to see the WiMAX drivers
@@ -1209,6 +1240,8 @@
 # CONFIG_TABLET_USB_KBTAB is not set
 # CONFIG_TABLET_USB_WACOM is not set
 CONFIG_INPUT_TOUCHSCREEN=y
+# CONFIG_TOUCHSCREEN_AD7879_I2C is not set
+# CONFIG_TOUCHSCREEN_AD7879 is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
@@ -1303,6 +1336,7 @@
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_IPMI_HANDLER is not set
 CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_TIMERIOMEM is not set
 CONFIG_HW_RANDOM_INTEL=y
 CONFIG_HW_RANDOM_AMD=y
 CONFIG_HW_RANDOM_GEODE=y
@@ -1390,7 +1424,6 @@
 # CONFIG_SENSORS_PCF8574 is not set
 # CONFIG_PCF8575 is not set
 # CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
 # CONFIG_SENSORS_MAX6875 is not set
 # CONFIG_SENSORS_TSL2550 is not set
 # CONFIG_I2C_DEBUG_CORE is not set
@@ -1424,6 +1457,7 @@
 # CONFIG_SENSORS_ADT7475 is not set
 # CONFIG_SENSORS_K8TEMP is not set
 # CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ATK0110 is not set
 # CONFIG_SENSORS_ATXP1 is not set
 # CONFIG_SENSORS_DS1621 is not set
 # CONFIG_SENSORS_I5K_AMB is not set
@@ -1433,6 +1467,7 @@
 # CONFIG_SENSORS_FSCHER is not set
 # CONFIG_SENSORS_FSCPOS is not set
 # CONFIG_SENSORS_FSCHMD is not set
+# CONFIG_SENSORS_G760A is not set
 # CONFIG_SENSORS_GL518SM is not set
 # CONFIG_SENSORS_GL520SM is not set
 # CONFIG_SENSORS_CORETEMP is not set
@@ -1448,11 +1483,14 @@
 # CONFIG_SENSORS_LM90 is not set
 # CONFIG_SENSORS_LM92 is not set
 # CONFIG_SENSORS_LM93 is not set
+# CONFIG_SENSORS_LTC4215 is not set
 # CONFIG_SENSORS_LTC4245 is not set
+# CONFIG_SENSORS_LM95241 is not set
 # CONFIG_SENSORS_MAX1619 is not set
 # CONFIG_SENSORS_MAX6650 is not set
 # CONFIG_SENSORS_PC87360 is not set
 # CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_PCF8591 is not set
 # CONFIG_SENSORS_SIS5595 is not set
 # CONFIG_SENSORS_DME1737 is not set
 # CONFIG_SENSORS_SMSC47M1 is not set
@@ -1643,7 +1681,6 @@
 # CONFIG_FB_3DFX is not set
 # CONFIG_FB_VOODOO1 is not set
 # CONFIG_FB_VT8623 is not set
-# CONFIG_FB_CYBLA is not set
 # CONFIG_FB_TRIDENT is not set
 # CONFIG_FB_ARK is not set
 # CONFIG_FB_PM3 is not set
@@ -1652,6 +1689,7 @@
 # CONFIG_FB_VIRTUAL is not set
 # CONFIG_FB_METRONOME is not set
 # CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
 CONFIG_BACKLIGHT_LCD_SUPPORT=y
 # CONFIG_LCD_CLASS_DEVICE is not set
 CONFIG_BACKLIGHT_CLASS_DEVICE=y
@@ -1738,6 +1776,8 @@
 # CONFIG_SND_INDIGO is not set
 # CONFIG_SND_INDIGOIO is not set
 # CONFIG_SND_INDIGODJ is not set
+# CONFIG_SND_INDIGOIOX is not set
+# CONFIG_SND_INDIGODJX is not set
 # CONFIG_SND_EMU10K1 is not set
 # CONFIG_SND_EMU10K1X is not set
 # CONFIG_SND_ENS1370 is not set
@@ -1811,15 +1851,17 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+CONFIG_HID_KYE=y
 CONFIG_HID_GYRATION=y
+CONFIG_HID_KENSINGTON=y
 CONFIG_HID_LOGITECH=y
 CONFIG_LOGITECH_FF=y
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1885,11 +1927,11 @@
 # CONFIG_USB_TMC is not set
 
 #
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
 #
 
 #
-# see USB_STORAGE Help for more information
+# also be needed; see USB_STORAGE Help for more info
 #
 CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
@@ -1931,7 +1973,6 @@
 # CONFIG_USB_LED is not set
 # CONFIG_USB_CYPRESS_CY7C63 is not set
 # CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGET is not set
 # CONFIG_USB_IDMOUSE is not set
 # CONFIG_USB_FTDI_ELAN is not set
 # CONFIG_USB_APPLEDISPLAY is not set
@@ -1947,6 +1988,7 @@
 #
 # OTG and related infrastructure
 #
+# CONFIG_NOP_USB_XCEIV is not set
 # CONFIG_UWB is not set
 # CONFIG_MMC is not set
 # CONFIG_MEMSTICK is not set
@@ -1958,8 +2000,10 @@
 #
 # CONFIG_LEDS_ALIX2 is not set
 # CONFIG_LEDS_PCA9532 is not set
+# CONFIG_LEDS_LP5521 is not set
 # CONFIG_LEDS_CLEVO_MAIL is not set
 # CONFIG_LEDS_PCA955X is not set
+# CONFIG_LEDS_BD2802 is not set
 
 #
 # LED Triggers
@@ -1969,6 +2013,10 @@
 # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
 # CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
 # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
+
+#
+# iptables trigger is under Netfilter config (LED target)
+#
 # CONFIG_ACCESSIBILITY is not set
 # CONFIG_INFINIBAND is not set
 CONFIG_EDAC=y
@@ -2037,6 +2085,7 @@
 # DMA Devices
 #
 # CONFIG_INTEL_IOATDMA is not set
+# CONFIG_AUXDISPLAY is not set
 # CONFIG_UIO is not set
 # CONFIG_STAGING is not set
 CONFIG_X86_PLATFORM_DEVICES=y
@@ -2071,6 +2120,7 @@
 #
 # CONFIG_EXT2_FS is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
@@ -2101,6 +2151,11 @@
 CONFIG_GENERIC_ACL=y
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 CONFIG_ISO9660_FS=y
@@ -2151,6 +2206,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -2164,7 +2220,6 @@
 CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=y
 CONFIG_SUNRPC_GSS=y
-# CONFIG_SUNRPC_REGISTER_V4 is not set
 CONFIG_RPCSEC_GSS_KRB5=y
 # CONFIG_RPCSEC_GSS_SPKM3 is not set
 # CONFIG_SMB_FS is not set
@@ -2251,6 +2306,7 @@
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
@@ -2266,6 +2322,7 @@
 # CONFIG_LOCK_STAT is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+CONFIG_STACKTRACE=y
 # CONFIG_DEBUG_KOBJECT is not set
 # CONFIG_DEBUG_HIGHMEM is not set
 CONFIG_DEBUG_BUGVERBOSE=y
@@ -2289,13 +2346,19 @@
 # CONFIG_FAULT_INJECTION is not set
 # CONFIG_LATENCYTOP is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
+# CONFIG_DEBUG_PAGEALLOC is not set
 CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_NOP_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_HAVE_HW_BRANCH_TRACER=y
+CONFIG_HAVE_FTRACE_SYSCALLS=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -2305,13 +2368,21 @@
 # CONFIG_SYSPROF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_FTRACE_SYSCALLS is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_POWER_TRACER is not set
 # CONFIG_STACK_TRACER is not set
 # CONFIG_HW_BRANCH_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_MMIOTRACE is not set
 CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
-# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -2321,7 +2392,6 @@
 CONFIG_EARLY_PRINTK_DBGP=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_DEBUG_STACK_USAGE=y
-# CONFIG_DEBUG_PAGEALLOC is not set
 # CONFIG_DEBUG_PER_CPU_MAPS is not set
 # CONFIG_X86_PTDUMP is not set
 CONFIG_DEBUG_RODATA=y
@@ -2329,7 +2399,7 @@
 CONFIG_DEBUG_NX_TEST=m
 # CONFIG_4KSTACKS is not set
 CONFIG_DOUBLEFAULT=y
-# CONFIG_MMIOTRACE is not set
+CONFIG_HAVE_MMIOTRACE_SUPPORT=y
 CONFIG_IO_DELAY_TYPE_0X80=0
 CONFIG_IO_DELAY_TYPE_0XED=1
 CONFIG_IO_DELAY_TYPE_UDELAY=2
@@ -2365,6 +2435,8 @@
 CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
 # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
 # CONFIG_SECURITY_SMACK is not set
+# CONFIG_SECURITY_TOMOYO is not set
+# CONFIG_IMA is not set
 CONFIG_CRYPTO=y
 
 #
@@ -2380,10 +2452,12 @@
 CONFIG_CRYPTO_HASH=y
 CONFIG_CRYPTO_HASH2=y
 CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_MANAGER2=y
 # CONFIG_CRYPTO_GF128MUL is not set
 # CONFIG_CRYPTO_NULL is not set
+CONFIG_CRYPTO_WORKQUEUE=y
 # CONFIG_CRYPTO_CRYPTD is not set
 CONFIG_CRYPTO_AUTHENC=y
 # CONFIG_CRYPTO_TEST is not set
@@ -2456,6 +2530,7 @@
 # Compression
 #
 # CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
 # CONFIG_CRYPTO_LZO is not set
 
 #
@@ -2467,11 +2542,13 @@
 # CONFIG_CRYPTO_DEV_GEODE is not set
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
 CONFIG_HAVE_KVM=y
+CONFIG_HAVE_KVM_IRQCHIP=y
 CONFIG_VIRTUALIZATION=y
 # CONFIG_KVM is not set
 # CONFIG_LGUEST is not set
 # CONFIG_VIRTIO_PCI is not set
 # CONFIG_VIRTIO_BALLOON is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
@@ -2489,7 +2566,10 @@
 # CONFIG_LIBCRC32C is not set
 CONFIG_AUDIT_GENERIC=y
 CONFIG_ZLIB_INFLATE=y
-CONFIG_PLIST=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_DECOMPRESS_BZIP2=y
+CONFIG_DECOMPRESS_LZMA=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 9fe5d21..cee1dd2 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,12 +1,13 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29-rc4
-# Tue Feb 24 15:44:16 2009
+# Linux kernel version: 2.6.30-rc2
+# Mon May 11 16:22:00 2009
 #
 CONFIG_64BIT=y
 # CONFIG_X86_32 is not set
 CONFIG_X86_64=y
 CONFIG_X86=y
+CONFIG_OUTPUT_FORMAT="elf64-x86-64"
 CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CMOS_UPDATE=y
@@ -34,6 +35,7 @@
 CONFIG_ARCH_HAS_DEFAULT_IDLE=y
 CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
 CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y
 CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y
 CONFIG_ARCH_HIBERNATION_POSSIBLE=y
 CONFIG_ARCH_SUSPEND_POSSIBLE=y
@@ -41,14 +43,14 @@
 CONFIG_ARCH_POPULATES_NODE_MAP=y
 CONFIG_AUDIT_ARCH=y
 CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
+CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
 CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_GENERIC_PENDING_IRQ=y
-CONFIG_X86_SMP=y
 CONFIG_USE_GENERIC_SMP_HELPERS=y
 CONFIG_X86_64_SMP=y
 CONFIG_X86_HT=y
-CONFIG_X86_BIOS_REBOOT=y
 CONFIG_X86_TRAMPOLINE=y
 # CONFIG_KTIME_SCALAR is not set
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
@@ -61,10 +63,17 @@
 CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 # CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_BZIP2=y
+CONFIG_HAVE_KERNEL_LZMA=y
+CONFIG_KERNEL_GZIP=y
+# CONFIG_KERNEL_BZIP2 is not set
+# CONFIG_KERNEL_LZMA is not set
 CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 CONFIG_TASKSTATS=y
@@ -114,23 +123,26 @@
 CONFIG_NET_NS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 # CONFIG_EMBEDDED is not set
 CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 CONFIG_ELF_CORE=y
 CONFIG_PCSPKR_PLATFORM=y
-# CONFIG_COMPAT_BRK is not set
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
 CONFIG_SIGNALFD=y
 CONFIG_TIMERFD=y
@@ -140,6 +152,7 @@
 CONFIG_VM_EVENT_COUNTERS=y
 CONFIG_PCI_QUIRKS=y
 CONFIG_SLUB_DEBUG=y
+# CONFIG_COMPAT_BRK is not set
 # CONFIG_SLAB is not set
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
@@ -155,6 +168,8 @@
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -167,7 +182,6 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_STOP_MACHINE=y
 CONFIG_BLOCK=y
-CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_BLK_DEV_BSG=y
 # CONFIG_BLK_DEV_INTEGRITY is not set
 CONFIG_BLOCK_COMPAT=y
@@ -195,12 +209,10 @@
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
 CONFIG_SMP=y
 CONFIG_SPARSE_IRQ=y
-# CONFIG_NUMA_MIGRATE_IRQ_DESC is not set
-CONFIG_X86_FIND_SMP_CONFIG=y
 CONFIG_X86_MPPARSE=y
-# CONFIG_X86_ELAN is not set
-# CONFIG_X86_GENERICARCH is not set
+CONFIG_X86_EXTENDED_PLATFORM=y
 # CONFIG_X86_VSMP is not set
+# CONFIG_X86_UV is not set
 CONFIG_SCHED_OMIT_FRAME_POINTER=y
 # CONFIG_PARAVIRT_GUEST is not set
 # CONFIG_MEMTEST is not set
@@ -230,10 +242,10 @@
 # CONFIG_MCORE2 is not set
 CONFIG_GENERIC_CPU=y
 CONFIG_X86_CPU=y
-CONFIG_X86_L1_CACHE_BYTES=128
-CONFIG_X86_INTERNODE_CACHE_BYTES=128
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_INTERNODE_CACHE_BYTES=64
 CONFIG_X86_CMPXCHG=y
-CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_L1_CACHE_SHIFT=6
 CONFIG_X86_WP_WORKS_OK=y
 CONFIG_X86_TSC=y
 CONFIG_X86_CMPXCHG64=y
@@ -242,7 +254,7 @@
 CONFIG_X86_DEBUGCTLMSR=y
 CONFIG_CPU_SUP_INTEL=y
 CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_CENTAUR_64=y
+CONFIG_CPU_SUP_CENTAUR=y
 CONFIG_X86_DS=y
 CONFIG_X86_PTRACE_BTS=y
 CONFIG_HPET_TIMER=y
@@ -269,6 +281,7 @@
 CONFIG_X86_MCE=y
 CONFIG_X86_MCE_INTEL=y
 CONFIG_X86_MCE_AMD=y
+CONFIG_X86_MCE_THRESHOLD=y
 # CONFIG_I8K is not set
 CONFIG_MICROCODE=y
 CONFIG_MICROCODE_INTEL=y
@@ -276,6 +289,7 @@
 CONFIG_MICROCODE_OLD_INTERFACE=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
+# CONFIG_X86_CPU_DEBUG is not set
 CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
 CONFIG_DIRECT_GBPAGES=y
 CONFIG_NUMA=y
@@ -309,6 +323,8 @@
 CONFIG_BOUNCE=y
 CONFIG_VIRT_TO_BUS=y
 CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
 CONFIG_X86_CHECK_BIOS_CORRUPTION=y
 CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
 CONFIG_X86_RESERVE_LOW_64K=y
@@ -317,6 +333,7 @@
 CONFIG_X86_PAT=y
 CONFIG_EFI=y
 CONFIG_SECCOMP=y
+# CONFIG_CC_STACKPROTECTOR is not set
 # CONFIG_HZ_100 is not set
 # CONFIG_HZ_250 is not set
 # CONFIG_HZ_300 is not set
@@ -325,9 +342,10 @@
 CONFIG_SCHED_HRTICK=y
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
+# CONFIG_KEXEC_JUMP is not set
 CONFIG_PHYSICAL_START=0x1000000
-# CONFIG_RELOCATABLE is not set
-CONFIG_PHYSICAL_ALIGN=0x200000
+CONFIG_RELOCATABLE=y
+CONFIG_PHYSICAL_ALIGN=0x1000000
 CONFIG_HOTPLUG_CPU=y
 # CONFIG_COMPAT_VDSO is not set
 # CONFIG_CMDLINE_BOOL is not set
@@ -370,7 +388,6 @@
 CONFIG_ACPI_BLACKLIST_YEAR=0
 # CONFIG_ACPI_DEBUG is not set
 # CONFIG_ACPI_PCI_SLOT is not set
-CONFIG_ACPI_SYSTEM=y
 CONFIG_X86_PM_TIMER=y
 CONFIG_ACPI_CONTAINER=y
 # CONFIG_ACPI_SBS is not set
@@ -436,6 +453,7 @@
 # CONFIG_PCI_DEBUG is not set
 # CONFIG_PCI_STUB is not set
 CONFIG_HT_IRQ=y
+# CONFIG_PCI_IOV is not set
 CONFIG_ISA_DMA_API=y
 CONFIG_K8_NB=y
 CONFIG_PCCARD=y
@@ -481,7 +499,6 @@
 #
 # Networking options
 #
-CONFIG_COMPAT_NET_DEV_OPS=y
 CONFIG_PACKET=y
 CONFIG_PACKET_MMAP=y
 CONFIG_UNIX=y
@@ -639,6 +656,7 @@
 # CONFIG_LAPB is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
 CONFIG_NET_SCHED=y
 
 #
@@ -696,6 +714,7 @@
 #
 # CONFIG_NET_PKTGEN is not set
 # CONFIG_NET_TCPPROBE is not set
+# CONFIG_NET_DROP_MONITOR is not set
 CONFIG_HAMRADIO=y
 
 #
@@ -706,12 +725,10 @@
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
 # CONFIG_AF_RXRPC is not set
-# CONFIG_PHONET is not set
 CONFIG_FIB_RULES=y
 CONFIG_WIRELESS=y
 CONFIG_CFG80211=y
 # CONFIG_CFG80211_REG_DEBUG is not set
-CONFIG_NL80211=y
 CONFIG_WIRELESS_OLD_REGULATORY=y
 CONFIG_WIRELESS_EXT=y
 CONFIG_WIRELESS_EXT_SYSFS=y
@@ -788,9 +805,8 @@
 # CONFIG_TIFM_CORE is not set
 # CONFIG_ICS932S401 is not set
 # CONFIG_ENCLOSURE_SERVICES is not set
-# CONFIG_SGI_XP is not set
 # CONFIG_HP_ILO is not set
-# CONFIG_SGI_GRU is not set
+# CONFIG_ISL29003 is not set
 # CONFIG_C2PORT is not set
 
 #
@@ -844,6 +860,7 @@
 # CONFIG_SCSI_LOWLEVEL is not set
 # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
 # CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
 CONFIG_ATA=y
 # CONFIG_ATA_NONSTANDARD is not set
 CONFIG_ATA_ACPI=y
@@ -940,6 +957,7 @@
 CONFIG_MACINTOSH_DRIVERS=y
 CONFIG_MAC_EMUMOUSEBTN=y
 CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
 # CONFIG_IFB is not set
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
@@ -977,6 +995,8 @@
 CONFIG_NET_VENDOR_3COM=y
 # CONFIG_VORTEX is not set
 # CONFIG_TYPHOON is not set
+# CONFIG_ETHOC is not set
+# CONFIG_DNET is not set
 CONFIG_NET_TULIP=y
 # CONFIG_DE2104X is not set
 # CONFIG_TULIP is not set
@@ -1026,6 +1046,7 @@
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -1040,6 +1061,7 @@
 # CONFIG_QLA3XXX is not set
 # CONFIG_ATL1 is not set
 # CONFIG_ATL1E is not set
+# CONFIG_ATL1C is not set
 # CONFIG_JME is not set
 CONFIG_NETDEV_10000=y
 # CONFIG_CHELSIO_T1 is not set
@@ -1049,6 +1071,7 @@
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -1058,6 +1081,7 @@
 # CONFIG_BNX2X is not set
 # CONFIG_QLGE is not set
 # CONFIG_SFC is not set
+# CONFIG_BE2NET is not set
 CONFIG_TR=y
 # CONFIG_IBMOL is not set
 # CONFIG_3C359 is not set
@@ -1072,8 +1096,8 @@
 # CONFIG_LIBERTAS is not set
 # CONFIG_LIBERTAS_THINFIRM is not set
 # CONFIG_AIRO is not set
-# CONFIG_HERMES is not set
 # CONFIG_ATMEL is not set
+# CONFIG_AT76C50X_USB is not set
 # CONFIG_AIRO_CS is not set
 # CONFIG_PCMCIA_WL3501 is not set
 # CONFIG_PRISM54 is not set
@@ -1083,21 +1107,21 @@
 # CONFIG_RTL8187 is not set
 # CONFIG_ADM8211 is not set
 # CONFIG_MAC80211_HWSIM is not set
+# CONFIG_MWL8K is not set
 # CONFIG_P54_COMMON is not set
 CONFIG_ATH5K=y
 # CONFIG_ATH5K_DEBUG is not set
 # CONFIG_ATH9K is not set
+# CONFIG_AR9170_USB is not set
 # CONFIG_IPW2100 is not set
 # CONFIG_IPW2200 is not set
-# CONFIG_IWLCORE is not set
-# CONFIG_IWLWIFI_LEDS is not set
-# CONFIG_IWLAGN is not set
-# CONFIG_IWL3945 is not set
+# CONFIG_IWLWIFI is not set
 # CONFIG_HOSTAP is not set
 # CONFIG_B43 is not set
 # CONFIG_B43LEGACY is not set
 # CONFIG_ZD1211RW is not set
 # CONFIG_RT2X00 is not set
+# CONFIG_HERMES is not set
 
 #
 # Enable WiMAX (Networking options) to see the WiMAX drivers
@@ -1208,6 +1232,8 @@
 # CONFIG_TABLET_USB_KBTAB is not set
 # CONFIG_TABLET_USB_WACOM is not set
 CONFIG_INPUT_TOUCHSCREEN=y
+# CONFIG_TOUCHSCREEN_AD7879_I2C is not set
+# CONFIG_TOUCHSCREEN_AD7879 is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
@@ -1301,6 +1327,7 @@
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_IPMI_HANDLER is not set
 CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_TIMERIOMEM is not set
 # CONFIG_HW_RANDOM_INTEL is not set
 # CONFIG_HW_RANDOM_AMD is not set
 CONFIG_NVRAM=y
@@ -1382,7 +1409,6 @@
 # CONFIG_SENSORS_PCF8574 is not set
 # CONFIG_PCF8575 is not set
 # CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
 # CONFIG_SENSORS_MAX6875 is not set
 # CONFIG_SENSORS_TSL2550 is not set
 # CONFIG_I2C_DEBUG_CORE is not set
@@ -1416,6 +1442,7 @@
 # CONFIG_SENSORS_ADT7475 is not set
 # CONFIG_SENSORS_K8TEMP is not set
 # CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ATK0110 is not set
 # CONFIG_SENSORS_ATXP1 is not set
 # CONFIG_SENSORS_DS1621 is not set
 # CONFIG_SENSORS_I5K_AMB is not set
@@ -1425,6 +1452,7 @@
 # CONFIG_SENSORS_FSCHER is not set
 # CONFIG_SENSORS_FSCPOS is not set
 # CONFIG_SENSORS_FSCHMD is not set
+# CONFIG_SENSORS_G760A is not set
 # CONFIG_SENSORS_GL518SM is not set
 # CONFIG_SENSORS_GL520SM is not set
 # CONFIG_SENSORS_CORETEMP is not set
@@ -1440,11 +1468,14 @@
 # CONFIG_SENSORS_LM90 is not set
 # CONFIG_SENSORS_LM92 is not set
 # CONFIG_SENSORS_LM93 is not set
+# CONFIG_SENSORS_LTC4215 is not set
 # CONFIG_SENSORS_LTC4245 is not set
+# CONFIG_SENSORS_LM95241 is not set
 # CONFIG_SENSORS_MAX1619 is not set
 # CONFIG_SENSORS_MAX6650 is not set
 # CONFIG_SENSORS_PC87360 is not set
 # CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_PCF8591 is not set
 # CONFIG_SENSORS_SIS5595 is not set
 # CONFIG_SENSORS_DME1737 is not set
 # CONFIG_SENSORS_SMSC47M1 is not set
@@ -1635,6 +1666,7 @@
 # CONFIG_FB_VIRTUAL is not set
 # CONFIG_FB_METRONOME is not set
 # CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
 CONFIG_BACKLIGHT_LCD_SUPPORT=y
 # CONFIG_LCD_CLASS_DEVICE is not set
 CONFIG_BACKLIGHT_CLASS_DEVICE=y
@@ -1720,6 +1752,8 @@
 # CONFIG_SND_INDIGO is not set
 # CONFIG_SND_INDIGOIO is not set
 # CONFIG_SND_INDIGODJ is not set
+# CONFIG_SND_INDIGOIOX is not set
+# CONFIG_SND_INDIGODJX is not set
 # CONFIG_SND_EMU10K1 is not set
 # CONFIG_SND_EMU10K1X is not set
 # CONFIG_SND_ENS1370 is not set
@@ -1792,15 +1826,17 @@
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+CONFIG_HID_KYE=y
 CONFIG_HID_GYRATION=y
+CONFIG_HID_KENSINGTON=y
 CONFIG_HID_LOGITECH=y
 CONFIG_LOGITECH_FF=y
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1866,11 +1902,11 @@
 # CONFIG_USB_TMC is not set
 
 #
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
 #
 
 #
-# see USB_STORAGE Help for more information
+# also be needed; see USB_STORAGE Help for more info
 #
 CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
@@ -1912,7 +1948,6 @@
 # CONFIG_USB_LED is not set
 # CONFIG_USB_CYPRESS_CY7C63 is not set
 # CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGET is not set
 # CONFIG_USB_IDMOUSE is not set
 # CONFIG_USB_FTDI_ELAN is not set
 # CONFIG_USB_APPLEDISPLAY is not set
@@ -1928,6 +1963,7 @@
 #
 # OTG and related infrastructure
 #
+# CONFIG_NOP_USB_XCEIV is not set
 # CONFIG_UWB is not set
 # CONFIG_MMC is not set
 # CONFIG_MEMSTICK is not set
@@ -1939,8 +1975,10 @@
 #
 # CONFIG_LEDS_ALIX2 is not set
 # CONFIG_LEDS_PCA9532 is not set
+# CONFIG_LEDS_LP5521 is not set
 # CONFIG_LEDS_CLEVO_MAIL is not set
 # CONFIG_LEDS_PCA955X is not set
+# CONFIG_LEDS_BD2802 is not set
 
 #
 # LED Triggers
@@ -1950,6 +1988,10 @@
 # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
 # CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
 # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
+
+#
+# iptables trigger is under Netfilter config (LED target)
+#
 # CONFIG_ACCESSIBILITY is not set
 # CONFIG_INFINIBAND is not set
 CONFIG_EDAC=y
@@ -2018,6 +2060,7 @@
 # DMA Devices
 #
 # CONFIG_INTEL_IOATDMA is not set
+# CONFIG_AUXDISPLAY is not set
 # CONFIG_UIO is not set
 # CONFIG_STAGING is not set
 CONFIG_X86_PLATFORM_DEVICES=y
@@ -2051,6 +2094,7 @@
 #
 # CONFIG_EXT2_FS is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
@@ -2082,6 +2126,11 @@
 CONFIG_GENERIC_ACL=y
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 CONFIG_ISO9660_FS=y
@@ -2132,6 +2181,7 @@
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -2145,7 +2195,6 @@
 CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=y
 CONFIG_SUNRPC_GSS=y
-# CONFIG_SUNRPC_REGISTER_V4 is not set
 CONFIG_RPCSEC_GSS_KRB5=y
 # CONFIG_RPCSEC_GSS_SPKM3 is not set
 # CONFIG_SMB_FS is not set
@@ -2232,6 +2281,7 @@
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
@@ -2247,6 +2297,7 @@
 # CONFIG_LOCK_STAT is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+CONFIG_STACKTRACE=y
 # CONFIG_DEBUG_KOBJECT is not set
 CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_INFO is not set
@@ -2269,13 +2320,19 @@
 # CONFIG_FAULT_INJECTION is not set
 # CONFIG_LATENCYTOP is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
+# CONFIG_DEBUG_PAGEALLOC is not set
 CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_NOP_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_HAVE_HW_BRANCH_TRACER=y
+CONFIG_HAVE_FTRACE_SYSCALLS=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -2285,13 +2342,21 @@
 # CONFIG_SYSPROF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_FTRACE_SYSCALLS is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_POWER_TRACER is not set
 # CONFIG_STACK_TRACER is not set
 # CONFIG_HW_BRANCH_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_MMIOTRACE is not set
 CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
-# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -2301,14 +2366,13 @@
 CONFIG_EARLY_PRINTK_DBGP=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_DEBUG_STACK_USAGE=y
-# CONFIG_DEBUG_PAGEALLOC is not set
 # CONFIG_DEBUG_PER_CPU_MAPS is not set
 # CONFIG_X86_PTDUMP is not set
 CONFIG_DEBUG_RODATA=y
 # CONFIG_DEBUG_RODATA_TEST is not set
 CONFIG_DEBUG_NX_TEST=m
 # CONFIG_IOMMU_DEBUG is not set
-# CONFIG_MMIOTRACE is not set
+CONFIG_HAVE_MMIOTRACE_SUPPORT=y
 CONFIG_IO_DELAY_TYPE_0X80=0
 CONFIG_IO_DELAY_TYPE_0XED=1
 CONFIG_IO_DELAY_TYPE_UDELAY=2
@@ -2344,6 +2408,8 @@
 CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
 # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
 # CONFIG_SECURITY_SMACK is not set
+# CONFIG_SECURITY_TOMOYO is not set
+# CONFIG_IMA is not set
 CONFIG_CRYPTO=y
 
 #
@@ -2359,10 +2425,12 @@
 CONFIG_CRYPTO_HASH=y
 CONFIG_CRYPTO_HASH2=y
 CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_MANAGER2=y
 # CONFIG_CRYPTO_GF128MUL is not set
 # CONFIG_CRYPTO_NULL is not set
+CONFIG_CRYPTO_WORKQUEUE=y
 # CONFIG_CRYPTO_CRYPTD is not set
 CONFIG_CRYPTO_AUTHENC=y
 # CONFIG_CRYPTO_TEST is not set
@@ -2414,6 +2482,7 @@
 #
 CONFIG_CRYPTO_AES=y
 # CONFIG_CRYPTO_AES_X86_64 is not set
+# CONFIG_CRYPTO_AES_NI_INTEL is not set
 # CONFIG_CRYPTO_ANUBIS is not set
 CONFIG_CRYPTO_ARC4=y
 # CONFIG_CRYPTO_BLOWFISH is not set
@@ -2435,6 +2504,7 @@
 # Compression
 #
 # CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
 # CONFIG_CRYPTO_LZO is not set
 
 #
@@ -2444,10 +2514,12 @@
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
 CONFIG_HAVE_KVM=y
+CONFIG_HAVE_KVM_IRQCHIP=y
 CONFIG_VIRTUALIZATION=y
 # CONFIG_KVM is not set
 # CONFIG_VIRTIO_PCI is not set
 # CONFIG_VIRTIO_BALLOON is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
@@ -2464,7 +2536,10 @@
 # CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
-CONFIG_PLIST=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_DECOMPRESS_BZIP2=y
+CONFIG_DECOMPRESS_LZMA=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a505202..e590261 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -825,9 +825,11 @@
 	.quad compat_sys_signalfd4
 	.quad sys_eventfd2
 	.quad sys_epoll_create1
-	.quad sys_dup3			/* 330 */
+	.quad sys_dup3				/* 330 */
 	.quad sys_pipe2
 	.quad sys_inotify_init1
 	.quad compat_sys_preadv
 	.quad compat_sys_pwritev
+	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
+	.quad sys_perf_counter_open
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index f6aa18e..1a37bcd 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/stddef.h>
+#include <linux/stringify.h>
 #include <asm/asm.h>
 
 /*
@@ -74,6 +75,22 @@
 
 const unsigned char *const *find_nop_table(void);
 
+/* alternative assembly primitive: */
+#define ALTERNATIVE(oldinstr, newinstr, feature)			\
+									\
+      "661:\n\t" oldinstr "\n662:\n"					\
+      ".section .altinstructions,\"a\"\n"				\
+      _ASM_ALIGN "\n"							\
+      _ASM_PTR "661b\n"				/* label           */	\
+      _ASM_PTR "663f\n"				/* new instruction */	\
+      "	 .byte " __stringify(feature) "\n"	/* feature bit     */	\
+      "	 .byte 662b-661b\n"			/* sourcelen       */	\
+      "	 .byte 664f-663f\n"			/* replacementlen  */	\
+      ".previous\n"							\
+      ".section .altinstr_replacement, \"ax\"\n"			\
+      "663:\n\t" newinstr "\n664:\n"		/* replacement     */	\
+      ".previous"
+
 /*
  * Alternative instructions for different CPU types or capabilities.
  *
@@ -87,18 +104,7 @@
  * without volatile and memory clobber.
  */
 #define alternative(oldinstr, newinstr, feature)			\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      _ASM_ALIGN "\n"					\
-		      _ASM_PTR "661b\n"		/* label */		\
-		      _ASM_PTR "663f\n"		/* new instruction */	\
-		      "	 .byte %c0\n"		/* feature bit */	\
-		      "	 .byte 662b-661b\n"	/* sourcelen */		\
-		      "	 .byte 664f-663f\n"	/* replacementlen */	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"  /* replacement */	\
-		      ".previous" :: "i" (feature) : "memory")
+	asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
 
 /*
  * Alternative inline assembly with input.
@@ -109,35 +115,16 @@
  * Best is to use constraints that are fixed size (like (%1) ... "r")
  * If you use variable sized constraints like "m" or "g" in the
  * replacement make sure to pad to the worst case length.
+ * Leaving an unused argument 0 to keep API compatibility.
  */
 #define alternative_input(oldinstr, newinstr, feature, input...)	\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      _ASM_ALIGN "\n"					\
-		      _ASM_PTR "661b\n"		/* label */		\
-		      _ASM_PTR "663f\n"		/* new instruction */	\
-		      "	 .byte %c0\n"		/* feature bit */	\
-		      "	 .byte 662b-661b\n"	/* sourcelen */		\
-		      "	 .byte 664f-663f\n"	/* replacementlen */	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"  /* replacement */	\
-		      ".previous" :: "i" (feature), ##input)
+	asm volatile (ALTERNATIVE(oldinstr, newinstr, feature)		\
+		: : "i" (0), ## input)
 
 /* Like alternative_input, but with a single output argument */
 #define alternative_io(oldinstr, newinstr, feature, output, input...)	\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      _ASM_ALIGN "\n"					\
-		      _ASM_PTR "661b\n"		/* label */		\
-		      _ASM_PTR "663f\n"		/* new instruction */	\
-		      "	 .byte %c[feat]\n"	/* feature bit */	\
-		      "	 .byte 662b-661b\n"	/* sourcelen */		\
-		      "	 .byte 664f-663f\n"	/* replacementlen */	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"  /* replacement */ \
-		      ".previous" : output : [feat] "i" (feature), ##input)
+	asm volatile (ALTERNATIVE(oldinstr, newinstr, feature)		\
+		: output : "i" (0), ## input)
 
 /*
  * use this macro(s) if you need more than one output parameter
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index f712344..262e028 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -27,6 +27,8 @@
 extern int amd_iommu_init_dma_ops(void);
 extern void amd_iommu_detect(void);
 extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
+extern void amd_iommu_flush_all_domains(void);
+extern void amd_iommu_flush_all_devices(void);
 #else
 static inline int amd_iommu_init(void) { return -ENODEV; }
 static inline void amd_iommu_detect(void) { }
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 95c8cd9..0c878ca 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -194,6 +194,27 @@
 #define PD_DMA_OPS_MASK		(1UL << 0) /* domain used for dma_ops */
 #define PD_DEFAULT_MASK		(1UL << 1) /* domain is a default dma_ops
 					      domain for an IOMMU */
+extern bool amd_iommu_dump;
+#define DUMP_printk(format, arg...)					\
+	do {								\
+		if (amd_iommu_dump)						\
+			printk(KERN_INFO "AMD IOMMU: " format, ## arg);	\
+	} while(0);
+
+/*
+ * Make iterating over all IOMMUs easier
+ */
+#define for_each_iommu(iommu) \
+	list_for_each_entry((iommu), &amd_iommu_list, list)
+#define for_each_iommu_safe(iommu, next) \
+	list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list)
+
+#define APERTURE_RANGE_SHIFT	27	/* 128 MB */
+#define APERTURE_RANGE_SIZE	(1ULL << APERTURE_RANGE_SHIFT)
+#define APERTURE_RANGE_PAGES	(APERTURE_RANGE_SIZE >> PAGE_SHIFT)
+#define APERTURE_MAX_RANGES	32	/* allows 4GB of DMA address space */
+#define APERTURE_RANGE_INDEX(a)	((a) >> APERTURE_RANGE_SHIFT)
+#define APERTURE_PAGE_INDEX(a)	(((a) >> 21) & 0x3fULL)
 
 /*
  * This structure contains generic data for  IOMMU protection domains
@@ -210,6 +231,26 @@
 };
 
 /*
+ * For dynamic growth the aperture size is split into ranges of 128MB of
+ * DMA address space each. This struct represents one such range.
+ */
+struct aperture_range {
+
+	/* address allocation bitmap */
+	unsigned long *bitmap;
+
+	/*
+	 * Array of PTE pages for the aperture. In this array we save all the
+	 * leaf pages of the domain page table used for the aperture. This way
+	 * we don't need to walk the page table to find a specific PTE. We can
+	 * just calculate its address in constant time.
+	 */
+	u64 *pte_pages[64];
+
+	unsigned long offset;
+};
+
+/*
  * Data container for a dma_ops specific protection domain
  */
 struct dma_ops_domain {
@@ -222,18 +263,10 @@
 	unsigned long aperture_size;
 
 	/* address we start to search for free addresses */
-	unsigned long next_bit;
+	unsigned long next_address;
 
-	/* address allocation bitmap */
-	unsigned long *bitmap;
-
-	/*
-	 * Array of PTE pages for the aperture. In this array we save all the
-	 * leaf pages of the domain page table used for the aperture. This way
-	 * we don't need to walk the page table to find a specific PTE. We can
-	 * just calculate its address in constant time.
-	 */
-	u64 **pte_pages;
+	/* address space relevant data */
+	struct aperture_range *aperture[APERTURE_MAX_RANGES];
 
 	/* This will be set to true when TLB needs to be flushed */
 	bool need_flush;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 42f2f83..bb7d479 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -107,8 +107,7 @@
 extern void native_apic_icr_write(u32 low, u32 id);
 extern u64 native_apic_icr_read(void);
 
-#define EIM_8BIT_APIC_ID	0
-#define EIM_32BIT_APIC_ID	1
+extern int x2apic_mode;
 
 #ifdef CONFIG_X86_X2APIC
 /*
@@ -166,10 +165,9 @@
 	return val;
 }
 
-extern int x2apic, x2apic_phys;
+extern int x2apic_phys;
 extern void check_x2apic(void);
 extern void enable_x2apic(void);
-extern void enable_IR_x2apic(void);
 extern void x2apic_icr_write(u32 low, u32 id);
 static inline int x2apic_enabled(void)
 {
@@ -183,6 +181,8 @@
 		return 1;
 	return 0;
 }
+
+#define x2apic_supported()	(cpu_has_x2apic)
 #else
 static inline void check_x2apic(void)
 {
@@ -190,28 +190,20 @@
 static inline void enable_x2apic(void)
 {
 }
-static inline void enable_IR_x2apic(void)
-{
-}
 static inline int x2apic_enabled(void)
 {
 	return 0;
 }
 
-#define	x2apic	0
-
+#define	x2apic_preenabled 0
+#define	x2apic_supported()	0
 #endif
 
+extern void enable_IR_x2apic(void);
+
 extern int get_physical_broadcast(void);
 
-#ifdef CONFIG_X86_X2APIC
-static inline void ack_x2APIC_irq(void)
-{
-	/* Docs say use 0 for future compatibility */
-	native_apic_msr_write(APIC_EOI, 0);
-}
-#endif
-
+extern void apic_disable(void);
 extern int lapic_get_maxlvt(void);
 extern void clear_local_APIC(void);
 extern void connect_bsp_APIC(void);
@@ -252,7 +244,7 @@
 #define local_apic_timer_c2_ok		1
 static inline void init_apic_mappings(void) { }
 static inline void disable_local_APIC(void) { }
-
+static inline void apic_disable(void) { }
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_64
@@ -410,7 +402,7 @@
 {
 	unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
 
-	if (APIC_XAPIC(ver))
+	if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID))
 		return (x >> 24) & 0xFF;
 	else
 		return (x >> 24) & 0x0F;
@@ -478,6 +470,9 @@
 extern void default_setup_apic_routing(void);
 
 #ifdef CONFIG_X86_32
+
+extern struct apic apic_default;
+
 /*
  * Set up the logical destination ID.
  *
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index bc9514f..7ddb36a 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -22,6 +22,7 @@
 #  define	APIC_INTEGRATED(x)	(1)
 #endif
 #define		APIC_XAPIC(x)		((x) >= 0x14)
+#define		APIC_EXT_SPACE(x)	((x) & 0x80000000)
 #define	APIC_TASKPRI	0x80
 #define		APIC_TPRI_MASK		0xFFu
 #define	APIC_ARBPRI	0x90
@@ -116,7 +117,9 @@
 #define		APIC_TDR_DIV_32		0x8
 #define		APIC_TDR_DIV_64		0x9
 #define		APIC_TDR_DIV_128	0xA
-#define	APIC_EILVT0     0x500
+#define	APIC_EFEAT	0x400
+#define	APIC_ECTRL	0x410
+#define APIC_EILVTn(n)	(0x500 + 0x10 * n)
 #define		APIC_EILVT_NR_AMD_K8	1	/* # of extended interrupts */
 #define		APIC_EILVT_NR_AMD_10H	4
 #define		APIC_EILVT_LVTOFF(x)	(((x) >> 4) & 0xF)
@@ -125,9 +128,6 @@
 #define		APIC_EILVT_MSG_NMI	0x4
 #define		APIC_EILVT_MSG_EXT	0x7
 #define		APIC_EILVT_MASKED	(1 << 16)
-#define	APIC_EILVT1     0x510
-#define	APIC_EILVT2     0x520
-#define	APIC_EILVT3     0x530
 
 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
 #define APIC_BASE_MSR	0x800
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index 85b46fb..aff9f1f 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -247,5 +247,241 @@
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
+/* An 64bit atomic type */
+
+typedef struct {
+	unsigned long long counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(val)	{ (val) }
+
+/**
+ * atomic64_read - read atomic64 variable
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically reads the value of @v.
+ * Doesn't imply a read memory barrier.
+ */
+#define __atomic64_read(ptr)		((ptr)->counter)
+
+static inline unsigned long long
+cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new)
+{
+	asm volatile(
+
+		LOCK_PREFIX "cmpxchg8b (%[ptr])\n"
+
+		     :		"=A" (old)
+
+		     : [ptr]	"D" (ptr),
+				"A" (old),
+				"b" (ll_low(new)),
+				"c" (ll_high(new))
+
+		     : "memory");
+
+	return old;
+}
+
+static inline unsigned long long
+atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val,
+		 unsigned long long new_val)
+{
+	return cmpxchg8b(&ptr->counter, old_val, new_val);
+}
+
+/**
+ * atomic64_xchg - xchg atomic64 variable
+ * @ptr:      pointer to type atomic64_t
+ * @new_val:  value to assign
+ * @old_val:  old value that was there
+ *
+ * Atomically xchgs the value of @ptr to @new_val and returns
+ * the old value.
+ */
+
+static inline unsigned long long
+atomic64_xchg(atomic64_t *ptr, unsigned long long new_val)
+{
+	unsigned long long old_val;
+
+	do {
+		old_val = atomic_read(ptr);
+	} while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
+
+	return old_val;
+}
+
+/**
+ * atomic64_set - set atomic64 variable
+ * @ptr:      pointer to type atomic64_t
+ * @new_val:  value to assign
+ *
+ * Atomically sets the value of @ptr to @new_val.
+ */
+static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val)
+{
+	atomic64_xchg(ptr, new_val);
+}
+
+/**
+ * atomic64_read - read atomic64 variable
+ * @ptr:      pointer to type atomic64_t
+ *
+ * Atomically reads the value of @ptr and returns it.
+ */
+static inline unsigned long long atomic64_read(atomic64_t *ptr)
+{
+	unsigned long long curr_val;
+
+	do {
+		curr_val = __atomic64_read(ptr);
+	} while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val);
+
+	return curr_val;
+}
+
+/**
+ * atomic64_add_return - add and return
+ * @delta: integer value to add
+ * @ptr:   pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr and returns @delta + *@ptr
+ */
+static inline unsigned long long
+atomic64_add_return(unsigned long long delta, atomic64_t *ptr)
+{
+	unsigned long long old_val, new_val;
+
+	do {
+		old_val = atomic_read(ptr);
+		new_val = old_val + delta;
+
+	} while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
+
+	return new_val;
+}
+
+static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr)
+{
+	return atomic64_add_return(-delta, ptr);
+}
+
+static inline long atomic64_inc_return(atomic64_t *ptr)
+{
+	return atomic64_add_return(1, ptr);
+}
+
+static inline long atomic64_dec_return(atomic64_t *ptr)
+{
+	return atomic64_sub_return(1, ptr);
+}
+
+/**
+ * atomic64_add - add integer to atomic64 variable
+ * @delta: integer value to add
+ * @ptr:   pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr.
+ */
+static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr)
+{
+	atomic64_add_return(delta, ptr);
+}
+
+/**
+ * atomic64_sub - subtract the atomic64 variable
+ * @delta: integer value to subtract
+ * @ptr:   pointer to type atomic64_t
+ *
+ * Atomically subtracts @delta from @ptr.
+ */
+static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr)
+{
+	atomic64_add(-delta, ptr);
+}
+
+/**
+ * atomic64_sub_and_test - subtract value from variable and test result
+ * @delta: integer value to subtract
+ * @ptr:   pointer to type atomic64_t
+ *
+ * Atomically subtracts @delta from @ptr and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int
+atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr)
+{
+	unsigned long long old_val = atomic64_sub_return(delta, ptr);
+
+	return old_val == 0;
+}
+
+/**
+ * atomic64_inc - increment atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically increments @ptr by 1.
+ */
+static inline void atomic64_inc(atomic64_t *ptr)
+{
+	atomic64_add(1, ptr);
+}
+
+/**
+ * atomic64_dec - decrement atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically decrements @ptr by 1.
+ */
+static inline void atomic64_dec(atomic64_t *ptr)
+{
+	atomic64_sub(1, ptr);
+}
+
+/**
+ * atomic64_dec_and_test - decrement and test
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically decrements @ptr by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline int atomic64_dec_and_test(atomic64_t *ptr)
+{
+	return atomic64_sub_and_test(1, ptr);
+}
+
+/**
+ * atomic64_inc_and_test - increment and test
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically increments @ptr by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int atomic64_inc_and_test(atomic64_t *ptr)
+{
+	return atomic64_sub_and_test(-1, ptr);
+}
+
+/**
+ * atomic64_add_negative - add and test if negative
+ * @delta: integer value to add
+ * @ptr:   pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+static inline int
+atomic64_add_negative(unsigned long long delta, atomic64_t *ptr)
+{
+	long long old_val = atomic64_add_return(delta, ptr);
+
+	return old_val < 0;
+}
+
 #include <asm-generic/atomic.h>
 #endif /* _ASM_X86_ATOMIC_32_H */
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 6ba23dd..418e632 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -8,11 +8,26 @@
 
 #ifdef __KERNEL__
 
+#include <asm/page_types.h>
+
 /* Physical address where kernel should be loaded. */
 #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
 				+ (CONFIG_PHYSICAL_ALIGN - 1)) \
 				& ~(CONFIG_PHYSICAL_ALIGN - 1))
 
+/* Minimum kernel alignment, as a power of two */
+#ifdef CONFIG_x86_64
+#define MIN_KERNEL_ALIGN_LG2	PMD_SHIFT
+#else
+#define MIN_KERNEL_ALIGN_LG2	(PAGE_SHIFT+1)
+#endif
+#define MIN_KERNEL_ALIGN	(_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)
+
+#if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \
+	(CONFIG_PHYSICAL_ALIGN < (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2))
+#error "Invalid value for CONFIG_PHYSICAL_ALIGN"
+#endif
+
 #ifdef CONFIG_KERNEL_BZIP2
 #define BOOT_HEAP_SIZE             0x400000
 #else /* !CONFIG_KERNEL_BZIP2 */
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index 433adae..1724e8d 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -50,7 +50,8 @@
 	__u32	ramdisk_size;
 	__u32	bootsect_kludge;
 	__u16	heap_end_ptr;
-	__u16	_pad1;
+	__u8	ext_loader_ver;
+	__u8	ext_loader_type;
 	__u32	cmd_line_ptr;
 	__u32	initrd_addr_max;
 	__u32	kernel_alignment;
diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h
index 2228020..d96c1ee 100644
--- a/arch/x86/include/asm/cpu_debug.h
+++ b/arch/x86/include/asm/cpu_debug.h
@@ -86,105 +86,7 @@
 	CPU_VALUE_BIT,				/* value		*/
 };
 
-#define	CPU_FILE_VALUE			(1 << CPU_VALUE_BIT)
-
-/*
- * DisplayFamily_DisplayModel	Processor Families/Processor Number Series
- * --------------------------	------------------------------------------
- * 05_01, 05_02, 05_04		Pentium, Pentium with MMX
- *
- * 06_01			Pentium Pro
- * 06_03, 06_05			Pentium II Xeon, Pentium II
- * 06_07, 06_08, 06_0A, 06_0B	Pentium III Xeon, Pentum III
- *
- * 06_09, 060D			Pentium M
- *
- * 06_0E			Core Duo, Core Solo
- *
- * 06_0F			Xeon 3000, 3200, 5100, 5300, 7300 series,
- *				Core 2 Quad, Core 2 Extreme, Core 2 Duo,
- *				Pentium dual-core
- * 06_17			Xeon 5200, 5400 series, Core 2 Quad Q9650
- *
- * 06_1C			Atom
- *
- * 0F_00, 0F_01, 0F_02		Xeon, Xeon MP, Pentium 4
- * 0F_03, 0F_04			Xeon, Xeon MP, Pentium 4, Pentium D
- *
- * 0F_06			Xeon 7100, 5000 Series, Xeon MP,
- *				Pentium 4, Pentium D
- */
-
-/* Register processors bits */
-enum cpu_processor_bit {
-	CPU_NONE,
-/* Intel */
-	CPU_INTEL_PENTIUM_BIT,
-	CPU_INTEL_P6_BIT,
-	CPU_INTEL_PENTIUM_M_BIT,
-	CPU_INTEL_CORE_BIT,
-	CPU_INTEL_CORE2_BIT,
-	CPU_INTEL_ATOM_BIT,
-	CPU_INTEL_XEON_P4_BIT,
-	CPU_INTEL_XEON_MP_BIT,
-/* AMD */
-	CPU_AMD_K6_BIT,
-	CPU_AMD_K7_BIT,
-	CPU_AMD_K8_BIT,
-	CPU_AMD_0F_BIT,
-	CPU_AMD_10_BIT,
-	CPU_AMD_11_BIT,
-};
-
-#define	CPU_INTEL_PENTIUM	(1 << CPU_INTEL_PENTIUM_BIT)
-#define	CPU_INTEL_P6		(1 << CPU_INTEL_P6_BIT)
-#define	CPU_INTEL_PENTIUM_M	(1 << CPU_INTEL_PENTIUM_M_BIT)
-#define	CPU_INTEL_CORE		(1 << CPU_INTEL_CORE_BIT)
-#define	CPU_INTEL_CORE2		(1 << CPU_INTEL_CORE2_BIT)
-#define	CPU_INTEL_ATOM		(1 << CPU_INTEL_ATOM_BIT)
-#define	CPU_INTEL_XEON_P4	(1 << CPU_INTEL_XEON_P4_BIT)
-#define	CPU_INTEL_XEON_MP	(1 << CPU_INTEL_XEON_MP_BIT)
-
-#define	CPU_INTEL_PX		(CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M)
-#define	CPU_INTEL_COREX		(CPU_INTEL_CORE | CPU_INTEL_CORE2)
-#define	CPU_INTEL_XEON		(CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP)
-#define	CPU_CO_AT		(CPU_INTEL_CORE | CPU_INTEL_ATOM)
-#define	CPU_C2_AT		(CPU_INTEL_CORE2 | CPU_INTEL_ATOM)
-#define	CPU_CX_AT		(CPU_INTEL_COREX | CPU_INTEL_ATOM)
-#define	CPU_CX_XE		(CPU_INTEL_COREX | CPU_INTEL_XEON)
-#define	CPU_P6_XE		(CPU_INTEL_P6 | CPU_INTEL_XEON)
-#define	CPU_PM_CO_AT		(CPU_INTEL_PENTIUM_M | CPU_CO_AT)
-#define	CPU_C2_AT_XE		(CPU_C2_AT | CPU_INTEL_XEON)
-#define	CPU_CX_AT_XE		(CPU_CX_AT | CPU_INTEL_XEON)
-#define	CPU_P6_CX_AT		(CPU_INTEL_P6 | CPU_CX_AT)
-#define	CPU_P6_CX_XE		(CPU_P6_XE | CPU_INTEL_COREX)
-#define	CPU_P6_CX_AT_XE		(CPU_INTEL_P6 | CPU_CX_AT_XE)
-#define	CPU_PM_CX_AT_XE		(CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE)
-#define	CPU_PM_CX_AT		(CPU_INTEL_PENTIUM_M | CPU_CX_AT)
-#define	CPU_PM_CX_XE		(CPU_INTEL_PENTIUM_M | CPU_CX_XE)
-#define	CPU_PX_CX_AT		(CPU_INTEL_PX | CPU_CX_AT)
-#define	CPU_PX_CX_AT_XE		(CPU_INTEL_PX | CPU_CX_AT_XE)
-
-/* Select all supported Intel CPUs */
-#define	CPU_INTEL_ALL		(CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE)
-
-#define	CPU_AMD_K6		(1 << CPU_AMD_K6_BIT)
-#define	CPU_AMD_K7		(1 << CPU_AMD_K7_BIT)
-#define	CPU_AMD_K8		(1 << CPU_AMD_K8_BIT)
-#define	CPU_AMD_0F		(1 << CPU_AMD_0F_BIT)
-#define	CPU_AMD_10		(1 << CPU_AMD_10_BIT)
-#define	CPU_AMD_11		(1 << CPU_AMD_11_BIT)
-
-#define	CPU_K10_PLUS		(CPU_AMD_10 | CPU_AMD_11)
-#define	CPU_K0F_PLUS		(CPU_AMD_0F | CPU_K10_PLUS)
-#define	CPU_K8_PLUS		(CPU_AMD_K8 | CPU_K0F_PLUS)
-#define	CPU_K7_PLUS		(CPU_AMD_K7 | CPU_K8_PLUS)
-
-/* Select all supported AMD CPUs */
-#define	CPU_AMD_ALL		(CPU_AMD_K6 | CPU_K7_PLUS)
-
-/* Select all supported CPUs */
-#define	CPU_ALL			(CPU_INTEL_ALL | CPU_AMD_ALL)
+#define	CPU_FILE_VALUE		(1 << CPU_VALUE_BIT)
 
 #define MAX_CPU_FILES		512
 
@@ -220,7 +122,6 @@
 	unsigned		min;		/* Register range min	*/
 	unsigned		max;		/* Register range max	*/
 	unsigned		flag;		/* Supported flags	*/
-	unsigned		model;		/* Supported models	*/
 };
 
 #endif /* _ASM_X86_CPU_DEBUG_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bb83b1c..4a28d22 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -22,7 +22,7 @@
 #define X86_FEATURE_TSC		(0*32+ 4) /* Time Stamp Counter */
 #define X86_FEATURE_MSR		(0*32+ 5) /* Model-Specific Registers */
 #define X86_FEATURE_PAE		(0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE		(0*32+ 7) /* Machine Check Architecture */
+#define X86_FEATURE_MCE		(0*32+ 7) /* Machine Check Exception */
 #define X86_FEATURE_CX8		(0*32+ 8) /* CMPXCHG8 instruction */
 #define X86_FEATURE_APIC	(0*32+ 9) /* Onboard APIC */
 #define X86_FEATURE_SEP		(0*32+11) /* SYSENTER/SYSEXIT */
@@ -94,6 +94,7 @@
 #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
 #define X86_FEATURE_NONSTOP_TSC	(3*32+24) /* TSC does not stop in C states */
 #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */
+#define X86_FEATURE_EXTD_APICID	(3*32+26) /* has extended APICID (8 bits) */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
@@ -115,6 +116,8 @@
 #define X86_FEATURE_XMM4_1	(4*32+19) /* "sse4_1" SSE-4.1 */
 #define X86_FEATURE_XMM4_2	(4*32+20) /* "sse4_2" SSE-4.2 */
 #define X86_FEATURE_X2APIC	(4*32+21) /* x2APIC */
+#define X86_FEATURE_MOVBE	(4*32+22) /* MOVBE instruction */
+#define X86_FEATURE_POPCNT      (4*32+23) /* POPCNT instruction */
 #define X86_FEATURE_AES		(4*32+25) /* AES instructions */
 #define X86_FEATURE_XSAVE	(4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
 #define X86_FEATURE_OSXSAVE	(4*32+27) /* "" XSAVE enabled in the OS */
@@ -192,11 +195,11 @@
 #define clear_cpu_cap(c, bit)	clear_bit(bit, (unsigned long *)((c)->x86_capability))
 #define setup_clear_cpu_cap(bit) do { \
 	clear_cpu_cap(&boot_cpu_data, bit);	\
-	set_bit(bit, (unsigned long *)cleared_cpu_caps); \
+	set_bit(bit, (unsigned long *)cpu_caps_cleared); \
 } while (0)
 #define setup_force_cpu_cap(bit) do { \
 	set_cpu_cap(&boot_cpu_data, bit);	\
-	clear_bit(bit, (unsigned long *)cleared_cpu_caps);	\
+	set_bit(bit, (unsigned long *)cpu_caps_set);	\
 } while (0)
 
 #define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index a8f672b..70dac19 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -15,8 +15,8 @@
  * - buffer allocation (memory accounting)
  *
  *
- * Copyright (C) 2007-2008 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
+ * Copyright (C) 2007-2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
  */
 
 #ifndef _ASM_X86_DS_H
@@ -83,8 +83,10 @@
  * The interrupt threshold is independent from the overflow callback
  * to allow users to use their own overflow interrupt handling mechanism.
  *
- * task: the task to request recording for;
- *       NULL for per-cpu recording on the current cpu
+ * The function might sleep.
+ *
+ * task: the task to request recording for
+ * cpu:  the cpu to request recording for
  * base: the base pointer for the (non-pageable) buffer;
  * size: the size of the provided buffer in bytes
  * ovfl: pointer to a function to be called on buffer overflow;
@@ -93,19 +95,28 @@
  *     -1 if no interrupt threshold is requested.
  * flags: a bit-mask of the above flags
  */
-extern struct bts_tracer *ds_request_bts(struct task_struct *task,
-					 void *base, size_t size,
-					 bts_ovfl_callback_t ovfl,
-					 size_t th, unsigned int flags);
-extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
-					   void *base, size_t size,
-					   pebs_ovfl_callback_t ovfl,
-					   size_t th, unsigned int flags);
+extern struct bts_tracer *ds_request_bts_task(struct task_struct *task,
+					      void *base, size_t size,
+					      bts_ovfl_callback_t ovfl,
+					      size_t th, unsigned int flags);
+extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
+					     bts_ovfl_callback_t ovfl,
+					     size_t th, unsigned int flags);
+extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
+						void *base, size_t size,
+						pebs_ovfl_callback_t ovfl,
+						size_t th, unsigned int flags);
+extern struct pebs_tracer *ds_request_pebs_cpu(int cpu,
+					       void *base, size_t size,
+					       pebs_ovfl_callback_t ovfl,
+					       size_t th, unsigned int flags);
 
 /*
  * Release BTS or PEBS resources
  * Suspend and resume BTS or PEBS tracing
  *
+ * Must be called with irq's enabled.
+ *
  * tracer: the tracer handle returned from ds_request_~()
  */
 extern void ds_release_bts(struct bts_tracer *tracer);
@@ -115,6 +126,28 @@
 extern void ds_suspend_pebs(struct pebs_tracer *tracer);
 extern void ds_resume_pebs(struct pebs_tracer *tracer);
 
+/*
+ * Release BTS or PEBS resources
+ * Suspend and resume BTS or PEBS tracing
+ *
+ * Cpu tracers must call this on the traced cpu.
+ * Task tracers must call ds_release_~_noirq() for themselves.
+ *
+ * May be called with irq's disabled.
+ *
+ * Returns 0 if successful;
+ * -EPERM if the cpu tracer does not trace the current cpu.
+ * -EPERM if the task tracer does not trace itself.
+ *
+ * tracer: the tracer handle returned from ds_request_~()
+ */
+extern int ds_release_bts_noirq(struct bts_tracer *tracer);
+extern int ds_suspend_bts_noirq(struct bts_tracer *tracer);
+extern int ds_resume_bts_noirq(struct bts_tracer *tracer);
+extern int ds_release_pebs_noirq(struct pebs_tracer *tracer);
+extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer);
+extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer);
+
 
 /*
  * The raw DS buffer state as it is used for BTS and PEBS recording.
@@ -170,9 +203,9 @@
 		} lbr;
 		/* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
 		struct {
-			__u64 jiffies;
+			__u64 clock;
 			pid_t pid;
-		} timestamp;
+		} event;
 	} variant;
 };
 
@@ -201,8 +234,12 @@
 struct pebs_trace {
 	struct ds_trace ds;
 
-	/* the PEBS reset value */
-	unsigned long long reset_value;
+	/* the number of valid counters in the below array */
+	unsigned int counters;
+
+#define MAX_PEBS_COUNTERS 4
+	/* the counter reset value */
+	unsigned long long counter_reset[MAX_PEBS_COUNTERS];
 };
 
 
@@ -237,9 +274,11 @@
  * Returns 0 on success; -Eerrno on error
  *
  * tracer: the tracer handle returned from ds_request_pebs()
+ * counter: the index of the counter
  * value: the new counter reset value
  */
-extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
+extern int ds_set_pebs_reset(struct pebs_tracer *tracer,
+			     unsigned int counter, u64 value);
 
 /*
  * Initialization
@@ -252,21 +291,12 @@
  */
 extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
 
-/*
- * Task clone/init and cleanup work
- */
-extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father);
-extern void ds_exit_thread(struct task_struct *tsk);
-
 #else /* CONFIG_X86_DS */
 
 struct cpuinfo_x86;
 static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
 static inline void ds_switch_to(struct task_struct *prev,
 				struct task_struct *next) {}
-static inline void ds_copy_thread(struct task_struct *tsk,
-				  struct task_struct *father) {}
-static inline void ds_exit_thread(struct task_struct *tsk) {}
 
 #endif /* CONFIG_X86_DS */
 #endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index c2e6bed..d750a10 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,7 +49,7 @@
 BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 
 #ifdef CONFIG_PERF_COUNTERS
-BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR)
+BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
 #endif
 
 #ifdef CONFIG_X86_MCE_P4THERMAL
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 37555e5..9ebc5c2 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -13,6 +13,8 @@
 	unsigned int irq_spurious_count;
 #endif
 	unsigned int generic_irqs;	/* arch dependent */
+	unsigned int apic_perf_irqs;
+	unsigned int apic_pending_irqs;
 #ifdef CONFIG_SMP
 	unsigned int irq_resched_count;
 	unsigned int irq_call_count;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b762ea4..6df45f6 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,6 +29,8 @@
 extern void apic_timer_interrupt(void);
 extern void generic_interrupt(void);
 extern void error_interrupt(void);
+extern void perf_pending_interrupt(void);
+
 extern void spurious_interrupt(void);
 extern void thermal_interrupt(void);
 extern void reschedule_interrupt(void);
@@ -63,7 +65,26 @@
 extern void init_VISWS_APIC_irqs(void);
 extern void setup_IO_APIC(void);
 extern void disable_IO_APIC(void);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+
+struct io_apic_irq_attr {
+	int ioapic;
+	int ioapic_pin;
+	int trigger;
+	int polarity;
+};
+
+static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
+					int ioapic, int ioapic_pin,
+					int trigger, int polarity)
+{
+	irq_attr->ioapic     = ioapic;
+	irq_attr->ioapic_pin = ioapic_pin;
+	irq_attr->trigger    = trigger;
+	irq_attr->polarity   = polarity;
+}
+
+extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin,
+					struct io_apic_irq_attr *irq_attr);
 extern void setup_ioapic_dest(void);
 
 extern void enable_IO_APIC(void);
@@ -78,7 +99,11 @@
 /* SMP */
 extern void smp_apic_timer_interrupt(struct pt_regs *);
 extern void smp_spurious_interrupt(struct pt_regs *);
+extern void smp_generic_interrupt(struct pt_regs *);
 extern void smp_error_interrupt(struct pt_regs *);
+#ifdef CONFIG_X86_IO_APIC
+extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
+#endif
 #ifdef CONFIG_SMP
 extern void smp_reschedule_interrupt(struct pt_regs *);
 extern void smp_call_function_interrupt(struct pt_regs *);
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 71c9e51..175adf5 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -67,7 +67,7 @@
 		     ".previous\n"
 		     _ASM_EXTABLE(1b, 3b)
 		     : [err] "=r" (err)
-#if 0 /* See comment in __save_init_fpu() below. */
+#if 0 /* See comment in fxsave() below. */
 		     : [fx] "r" (fx), "m" (*fx), "0" (0));
 #else
 		     : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
@@ -75,14 +75,6 @@
 	return err;
 }
 
-static inline int restore_fpu_checking(struct task_struct *tsk)
-{
-	if (task_thread_info(tsk)->status & TS_XSAVE)
-		return xrstor_checking(&tsk->thread.xstate->xsave);
-	else
-		return fxrstor_checking(&tsk->thread.xstate->fxsave);
-}
-
 /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
    is pending. Clear the x87 state here by setting it to fixed
    values. The kernel data segment can be sometimes 0 and sometimes
@@ -120,7 +112,7 @@
 		     ".previous\n"
 		     _ASM_EXTABLE(1b, 3b)
 		     : [err] "=r" (err), "=m" (*fx)
-#if 0 /* See comment in __fxsave_clear() below. */
+#if 0 /* See comment in fxsave() below. */
 		     : [fx] "r" (fx), "0" (0));
 #else
 		     : [fx] "cdaSDb" (fx), "0" (0));
@@ -185,12 +177,9 @@
 	asm volatile("fnclex ; fwait");
 }
 
-static inline void restore_fpu(struct task_struct *tsk)
+/* perform fxrstor iff the processor has extended states, otherwise frstor */
+static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 {
-	if (task_thread_info(tsk)->status & TS_XSAVE) {
-		xrstor_checking(&tsk->thread.xstate->xsave);
-		return;
-	}
 	/*
 	 * The "nop" is needed to make the instructions the same
 	 * length.
@@ -199,7 +188,9 @@
 		"nop ; frstor %1",
 		"fxrstor %1",
 		X86_FEATURE_FXSR,
-		"m" (tsk->thread.xstate->fxsave));
+		"m" (*fx));
+
+	return 0;
 }
 
 /* We need a safe address that is cheap to find and that is already
@@ -262,6 +253,14 @@
 
 #endif	/* CONFIG_X86_64 */
 
+static inline int restore_fpu_checking(struct task_struct *tsk)
+{
+	if (task_thread_info(tsk)->status & TS_XSAVE)
+		return xrstor_checking(&tsk->thread.xstate->xsave);
+	else
+		return fxrstor_checking(&tsk->thread.xstate->fxsave);
+}
+
 /*
  * Signal frame handlers...
  */
@@ -305,18 +304,18 @@
 /*
  * Some instructions like VIA's padlock instructions generate a spurious
  * DNA fault but don't modify SSE registers. And these instructions
- * get used from interrupt context aswell. To prevent these kernel instructions
- * in interrupt context interact wrongly with other user/kernel fpu usage, we
+ * get used from interrupt context as well. To prevent these kernel instructions
+ * in interrupt context interacting wrongly with other user/kernel fpu usage, we
  * should use them only in the context of irq_ts_save/restore()
  */
 static inline int irq_ts_save(void)
 {
 	/*
-	 * If we are in process context, we are ok to take a spurious DNA fault.
-	 * Otherwise, doing clts() in process context require pre-emption to
-	 * be disabled or some heavy lifting like kernel_fpu_begin()
+	 * If in process context and not atomic, we can take a spurious DNA fault.
+	 * Otherwise, doing clts() in process context requires disabling preemption
+	 * or some heavy lifting like kernel_fpu_begin()
 	 */
-	if (!in_interrupt())
+	if (!in_atomic())
 		return 0;
 
 	if (read_cr0() & X86_CR0_TS) {
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 1a99e6c..58d7091 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -60,8 +60,4 @@
 extern void mask_8259A(void);
 extern void unmask_8259A(void);
 
-#ifdef CONFIG_X86_32
-extern void init_ISA_irqs(void);
-#endif
-
 #endif /* _ASM_X86_I8259_H */
diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h
deleted file mode 100644
index fa0fd06..0000000
--- a/arch/x86/include/asm/intel_arch_perfmon.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H
-#define _ASM_X86_INTEL_ARCH_PERFMON_H
-
-#define MSR_ARCH_PERFMON_PERFCTR0		0xc1
-#define MSR_ARCH_PERFMON_PERFCTR1		0xc2
-
-#define MSR_ARCH_PERFMON_EVENTSEL0		0x186
-#define MSR_ARCH_PERFMON_EVENTSEL1		0x187
-
-#define ARCH_PERFMON_EVENTSEL0_ENABLE	(1 << 22)
-#define ARCH_PERFMON_EVENTSEL_INT	(1 << 20)
-#define ARCH_PERFMON_EVENTSEL_OS	(1 << 17)
-#define ARCH_PERFMON_EVENTSEL_USR	(1 << 16)
-
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL	(0x3c)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK	(0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
-	(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
-
-union cpuid10_eax {
-	struct {
-		unsigned int version_id:8;
-		unsigned int num_counters:8;
-		unsigned int bit_width:8;
-		unsigned int mask_length:8;
-	} split;
-	unsigned int full;
-};
-
-#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 9d826e4..daf866e 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -154,22 +154,19 @@
 extern int io_apic_get_unique_id(int ioapic, int apic_id);
 extern int io_apic_get_version(int ioapic);
 extern int io_apic_get_redir_entries(int ioapic);
-extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
-				   int edge_level, int active_high_low);
 #endif /* CONFIG_ACPI */
 
+struct io_apic_irq_attr;
+extern int io_apic_set_pci_routing(struct device *dev, int irq,
+		 struct io_apic_irq_attr *irq_attr);
 extern int (*ioapic_renumber_irq)(int ioapic, int irq);
 extern void ioapic_init_mappings(void);
 
-#ifdef CONFIG_X86_64
 extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
 extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
 extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
-extern void reinit_intr_remapped_IO_APIC(int intr_remapping,
-	struct IO_APIC_route_entry **ioapic_entries);
-#endif
 
 extern void probe_nr_irqs_gsi(void);
 
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index 86af260..0e9fe1d 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -1,3 +1,6 @@
+#ifndef _ASM_X86_IOMAP_H
+#define _ASM_X86_IOMAP_H
+
 /*
  * Copyright © 2008 Ingo Molnar
  *
@@ -31,3 +34,5 @@
 
 void
 iounmap_atomic(void *kvaddr, enum km_type type);
+
+#endif /* _ASM_X86_IOMAP_H */
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 0396760..f275e22 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -1,6 +1,6 @@
 #ifndef _ASM_X86_IRQ_REMAPPING_H
 #define _ASM_X86_IRQ_REMAPPING_H
 
-#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
+#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
 
 #endif	/* _ASM_X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 3cbd79b..e997be98 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -34,6 +34,7 @@
 
 #ifdef CONFIG_X86_32
 # define SYSCALL_VECTOR			0x80
+# define IA32_SYSCALL_VECTOR		0x80
 #else
 # define IA32_SYSCALL_VECTOR		0x80
 #endif
@@ -107,16 +108,16 @@
 #define LOCAL_TIMER_VECTOR		0xef
 
 /*
- * Performance monitoring interrupt vector:
- */
-#define LOCAL_PERF_VECTOR		0xee
-
-/*
  * Generic system vector for platform specific use
  */
 #define GENERIC_INTERRUPT_VECTOR	0xed
 
 /*
+ * Performance monitoring pending work vector:
+ */
+#define LOCAL_PENDING_VECTOR		0xec
+
+/*
  * First APIC vector available to drivers: (vectors 0x30-0xee) we
  * start at 0x31(0x41) to spread out vectors evenly between priority
  * levels. (0x80 is the syscall vector)
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h
index 54c8cc5..c2d1f3b 100644
--- a/arch/x86/include/asm/k8.h
+++ b/arch/x86/include/asm/k8.h
@@ -12,4 +12,17 @@
 extern void k8_flush_garts(void);
 extern int k8_scan_nodes(unsigned long start, unsigned long end);
 
+#ifdef CONFIG_K8_NB
+static inline struct pci_dev *node_to_k8_nb_misc(int node)
+{
+	return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL;
+}
+#else
+static inline struct pci_dev *node_to_k8_nb_misc(int node)
+{
+	return NULL;
+}
+#endif
+
+
 #endif /* _ASM_X86_K8_H */
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index dc3f6cf..125be8b 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -16,6 +16,7 @@
 #define __KVM_HAVE_MSI
 #define __KVM_HAVE_USER_NMI
 #define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_MSIX
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f0faf58..eabdc1c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -185,6 +185,7 @@
 		unsigned access:3;
 		unsigned invalid:1;
 		unsigned cr4_pge:1;
+		unsigned nxe:1;
 	};
 };
 
@@ -212,7 +213,6 @@
 	int multimapped;         /* More than one parent_pte? */
 	int root_count;          /* Currently serving as active root */
 	bool unsync;
-	bool global;
 	unsigned int unsync_children;
 	union {
 		u64 *parent_pte;               /* !multimapped */
@@ -261,13 +261,11 @@
 	union kvm_mmu_page_role base_role;
 
 	u64 *pae_root;
+	u64 rsvd_bits_mask[2][4];
 };
 
 struct kvm_vcpu_arch {
 	u64 host_tsc;
-	int interrupt_window_open;
-	unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
-	DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
 	/*
 	 * rip and regs accesses must go through
 	 * kvm_{register,rip}_{read,write} functions.
@@ -286,6 +284,7 @@
 	u64 shadow_efer;
 	u64 apic_base;
 	struct kvm_lapic *apic;    /* kernel irqchip context */
+	int32_t apic_arb_prio;
 	int mp_state;
 	int sipi_vector;
 	u64 ia32_misc_enable_msr;
@@ -320,6 +319,8 @@
 	struct kvm_pio_request pio;
 	void *pio_data;
 
+	u8 event_exit_inst_len;
+
 	struct kvm_queued_exception {
 		bool pending;
 		bool has_error_code;
@@ -329,11 +330,12 @@
 
 	struct kvm_queued_interrupt {
 		bool pending;
+		bool soft;
 		u8 nr;
 	} interrupt;
 
 	struct {
-		int active;
+		int vm86_active;
 		u8 save_iopl;
 		struct kvm_save_segment {
 			u16 selector;
@@ -356,9 +358,9 @@
 	unsigned int time_offset;
 	struct page *time_page;
 
+	bool singlestep; /* guest is single stepped by KVM */
 	bool nmi_pending;
 	bool nmi_injected;
-	bool nmi_window_open;
 
 	struct mtrr_state_type mtrr_state;
 	u32 pat;
@@ -392,15 +394,14 @@
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
-	struct list_head oos_global_pages;
 	struct iommu_domain *iommu_domain;
+	int iommu_flags;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
 	struct hlist_head irq_ack_notifier_list;
 	int vapics_in_nmi_mode;
 
-	int round_robin_prev_vcpu;
 	unsigned int tss_addr;
 	struct page *apic_access_page;
 
@@ -423,7 +424,6 @@
 	u32 mmu_recycled;
 	u32 mmu_cache_miss;
 	u32 mmu_unsync;
-	u32 mmu_unsync_global;
 	u32 remote_tlb_flush;
 	u32 lpages;
 };
@@ -443,7 +443,6 @@
 	u32 halt_exits;
 	u32 halt_wakeup;
 	u32 request_irq_exits;
-	u32 request_nmi_exits;
 	u32 irq_exits;
 	u32 host_state_reload;
 	u32 efer_reload;
@@ -511,20 +510,22 @@
 	void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
 	int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
 	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
+	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
 	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
 				unsigned char *hypercall_addr);
-	int (*get_irq)(struct kvm_vcpu *vcpu);
-	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
+	void (*set_irq)(struct kvm_vcpu *vcpu);
+	void (*set_nmi)(struct kvm_vcpu *vcpu);
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code);
-	bool (*exception_injected)(struct kvm_vcpu *vcpu);
-	void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
-	void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
-				       struct kvm_run *run);
-
+	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
+	int (*nmi_allowed)(struct kvm_vcpu *vcpu);
+	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
+	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
+	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
-	int (*get_mt_mask_shift)(void);
+	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 };
 
 extern struct kvm_x86_ops *kvm_x86_ops;
@@ -538,7 +539,7 @@
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
 void kvm_mmu_set_base_ptes(u64 base_pte);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask);
+		u64 dirty_mask, u64 nx_mask, u64 x_mask);
 
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
@@ -552,6 +553,7 @@
 			  const void *val, int bytes);
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 		  gpa_t addr, unsigned long *ret);
+u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 extern bool tdp_enabled;
 
@@ -563,6 +565,7 @@
 
 #define EMULTYPE_NO_DECODE	    (1 << 0)
 #define EMULTYPE_TRAP_UD	    (1 << 1)
+#define EMULTYPE_SKIP		    (1 << 2)
 int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
 			unsigned long cr2, u16 error_code, int emulation_type);
 void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
@@ -638,7 +641,6 @@
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
@@ -769,6 +771,8 @@
 #define HF_GIF_MASK		(1 << 0)
 #define HF_HIF_MASK		(1 << 1)
 #define HF_VINTR_MASK		(1 << 2)
+#define HF_NMI_MASK		(1 << 3)
+#define HF_IRET_MASK		(1 << 4)
 
 /*
  * Hardware virtualization extension instructions may fault if a
@@ -791,5 +795,6 @@
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h
index 6a15973..b7ed2c4 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -143,6 +143,9 @@
 	struct fetch_cache fetch;
 };
 
+#define X86_SHADOW_INT_MOV_SS  1
+#define X86_SHADOW_INT_STI     2
+
 struct x86_emulate_ctxt {
 	/* Register state before/after emulation. */
 	struct kvm_vcpu *vcpu;
@@ -152,6 +155,9 @@
 	int mode;
 	u32 cs_base;
 
+	/* interruptibility state, as a result of execution of STI or MOV SS */
+	int interruptibility;
+
 	/* decode cache */
 	struct decode_cache decode;
 };
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h
index 1caf576..313389c 100644
--- a/arch/x86/include/asm/lguest.h
+++ b/arch/x86/include/asm/lguest.h
@@ -17,8 +17,13 @@
 /* Pages for switcher itself, then two pages per cpu */
 #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids)
 
-/* We map at -4M for ease of mapping into the guest (one PTE page). */
+/* We map at -4M (-2M when PAE is activated) for ease of mapping
+ * into the guest (one PTE page). */
+#ifdef CONFIG_X86_PAE
+#define SWITCHER_ADDR 0xFFE00000
+#else
 #define SWITCHER_ADDR 0xFFC00000
+#endif
 
 /* Found in switcher.S */
 extern unsigned long default_idt_entries[];
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index faae199..d31c4a6 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -12,11 +12,13 @@
 #define LHCALL_TS		8
 #define LHCALL_SET_CLOCKEVENT	9
 #define LHCALL_HALT		10
+#define LHCALL_SET_PMD		13
 #define LHCALL_SET_PTE		14
-#define LHCALL_SET_PMD		15
+#define LHCALL_SET_PGD		15
 #define LHCALL_LOAD_TLS		16
 #define LHCALL_NOTIFY		17
 #define LHCALL_LOAD_GDT_ENTRY	18
+#define LHCALL_SEND_INTERRUPTS	19
 
 #define LGUEST_TRAP_ENTRY 0x1F
 
@@ -32,10 +34,10 @@
  * operations?  There are two ways: the direct way is to make a "hypercall",
  * to make requests of the Host Itself.
  *
- * We use the KVM hypercall mechanism. Eighteen hypercalls are
+ * We use the KVM hypercall mechanism. Seventeen hypercalls are
  * available: the hypercall number is put in the %eax register, and the
- * arguments (when required) are placed in %ebx, %ecx and %edx.  If a return
- * value makes sense, it's returned in %eax.
+ * arguments (when required) are placed in %ebx, %ecx, %edx and %esi.
+ * If a return value makes sense, it's returned in %eax.
  *
  * Grossly invalid calls result in Sudden Death at the hands of the vengeful
  * Host, rather than returning failure.  This reflects Winston Churchill's
@@ -47,8 +49,9 @@
 
 #define LHCALL_RING_SIZE 64
 struct hcall_args {
-	/* These map directly onto eax, ebx, ecx, edx in struct lguest_regs */
-	unsigned long arg0, arg1, arg2, arg3;
+	/* These map directly onto eax, ebx, ecx, edx and esi
+	 * in struct lguest_regs */
+	unsigned long arg0, arg1, arg2, arg3, arg4;
 };
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index c882664..ef51b50 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -9,20 +9,31 @@
 
 struct device;
 
+enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
+
 struct microcode_ops {
-	int  (*request_microcode_user) (int cpu, const void __user *buf, size_t size);
-	int  (*request_microcode_fw) (int cpu, struct device *device);
+	enum ucode_state (*request_microcode_user) (int cpu,
+				const void __user *buf, size_t size);
 
-	void (*apply_microcode) (int cpu);
+	enum ucode_state (*request_microcode_fw) (int cpu,
+				struct device *device);
 
-	int  (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
 	void (*microcode_fini_cpu) (int cpu);
+
+	/*
+	 * The generic 'microcode_core' part guarantees that
+	 * the callbacks below run on a target cpu when they
+	 * are being called.
+	 * See also the "Synchronization" section in microcode_core.c.
+	 */
+	int (*apply_microcode) (int cpu);
+	int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
 };
 
 struct ucode_cpu_info {
-	struct cpu_signature cpu_sig;
-	int valid;
-	void *mc;
+	struct cpu_signature	cpu_sig;
+	int			valid;
+	void			*mc;
 };
 extern struct ucode_cpu_info ucode_cpu_info[];
 
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 642fc7f..e2a1bb6 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -61,9 +61,11 @@
 #ifdef CONFIG_X86_MPPARSE
 extern void find_smp_config(void);
 extern void early_reserve_e820_mpc_new(void);
+extern int enable_update_mptable;
 #else
 static inline void find_smp_config(void) { }
 static inline void early_reserve_e820_mpc_new(void) { }
+#define enable_update_mptable 0
 #endif
 
 void __cpuinit generic_processor_info(int apicid, int version);
@@ -72,20 +74,13 @@
 extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 				   u32 gsi);
 extern void mp_config_acpi_legacy_irqs(void);
-extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
+struct device;
+extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level,
+				 int active_high_low);
 extern int acpi_probe_gsi(void);
 #ifdef CONFIG_X86_IO_APIC
-extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
-				u32 gsi, int triggering, int polarity);
 extern int mp_find_ioapic(int gsi);
 extern int mp_find_ioapic_pin(int ioapic, int gsi);
-#else
-static inline int
-mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
-		   u32 gsi, int triggering, int polarity)
-{
-	return 0;
-}
 #endif
 #else /* !CONFIG_ACPI: */
 static inline int acpi_probe_gsi(void)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ec41fc1..4d58d04 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -121,7 +121,6 @@
 #define MSR_K8_TOP_MEM1			0xc001001a
 #define MSR_K8_TOP_MEM2			0xc001001d
 #define MSR_K8_SYSCFG			0xc0010010
-#define MSR_K8_HWCR			0xc0010015
 #define MSR_K8_INT_PENDING_MSG		0xc0010055
 /* C1E active bits in int pending message */
 #define K8_INTP_C1E_ACTIVE_MASK		0x18000000
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 638bf62..2260376 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -12,6 +12,17 @@
 
 #include <asm/asm.h>
 #include <asm/errno.h>
+#include <asm/cpumask.h>
+
+struct msr {
+	union {
+		struct {
+			u32 l;
+			u32 h;
+		};
+		u64 q;
+	};
+};
 
 static inline unsigned long long native_read_tscp(unsigned int *aux)
 {
@@ -216,6 +227,8 @@
 #ifdef CONFIG_SMP
 int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
 int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
+void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs);
+void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs);
 int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
 int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
 #else  /*  CONFIG_SMP  */
@@ -229,6 +242,16 @@
 	wrmsr(msr_no, l, h);
 	return 0;
 }
+static inline void rdmsr_on_cpus(const cpumask_t *m, u32 msr_no,
+				struct msr *msrs)
+{
+       rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h));
+}
+static inline void wrmsr_on_cpus(const cpumask_t *m, u32 msr_no,
+				struct msr *msrs)
+{
+       wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h);
+}
 static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no,
 				    u32 *l, u32 *h)
 {
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index c45a0a5..c972644 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -64,7 +64,7 @@
 	 * but since they are power of two we could use a
 	 * cheaper way --cvg
 	 */
-	return nmi_watchdog & 0x3;
+	return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC);
 }
 #endif
 
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 064ed6d..c4ae822 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -17,9 +17,6 @@
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern void srat_reserve_add_area(int nodeid);
-extern int hotadd_percent;
-
 extern s16 apicid_to_node[MAX_LOCAL_APIC];
 
 extern unsigned long numa_free_all_bootmem(void);
@@ -27,6 +24,13 @@
 			       unsigned long end);
 
 #ifdef CONFIG_NUMA
+/*
+ * Too small node sizes may confuse the VM badly. Usually they
+ * result from BIOS bugs. So dont recognize nodes as standalone
+ * NUMA entities that have less than this amount of RAM listed:
+ */
+#define NODE_MIN_SIZE (4*1024*1024)
+
 extern void __init init_cpu_to_node(void);
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index 0f915ae..6f1b733 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -54,10 +54,6 @@
 extern int sysctl_legacy_va_layout;
 
 extern void find_low_pfn_range(void);
-extern unsigned long init_memory_mapping(unsigned long start,
-					 unsigned long end);
-extern void initmem_init(unsigned long, unsigned long);
-extern void free_initmem(void);
 extern void setup_bootmem_allocator(void);
 
 #endif	/* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index d38c91b..8d382d3 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -32,22 +32,14 @@
  */
 #define __PAGE_OFFSET           _AC(0xffff880000000000, UL)
 
-#define __PHYSICAL_START	CONFIG_PHYSICAL_START
-#define __KERNEL_ALIGN		0x200000
-
-/*
- * Make sure kernel is aligned to 2MB address. Catching it at compile
- * time is better. Change your config file and compile the kernel
- * for a 2MB aligned address (CONFIG_PHYSICAL_START)
- */
-#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0
-#error "CONFIG_PHYSICAL_START must be a multiple of 2MB"
-#endif
+#define __PHYSICAL_START	((CONFIG_PHYSICAL_START +	 	\
+				  (CONFIG_PHYSICAL_ALIGN - 1)) &	\
+				 ~(CONFIG_PHYSICAL_ALIGN - 1))
 
 #define __START_KERNEL		(__START_KERNEL_map + __PHYSICAL_START)
 #define __START_KERNEL_map	_AC(0xffffffff80000000, UL)
 
-/* See Documentation/x86_64/mm.txt for a description of the memory map. */
+/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
 #define __PHYSICAL_MASK_SHIFT	46
 #define __VIRTUAL_MASK_SHIFT	48
 
@@ -71,12 +63,6 @@
 
 #define vmemmap ((struct page *)VMEMMAP_START)
 
-extern unsigned long init_memory_mapping(unsigned long start,
-					 unsigned long end);
-
-extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
-extern void free_initmem(void);
-
 extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
 extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
 
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 826ad37..6473f5c 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -46,6 +46,12 @@
 extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
 
+extern unsigned long init_memory_mapping(unsigned long start,
+					 unsigned long end);
+
+extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+extern void free_initmem(void);
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif	/* _ASM_X86_PAGE_DEFS_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a53da00..4fb37c8 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -56,6 +56,7 @@
 struct tss_struct;
 struct mm_struct;
 struct desc_struct;
+struct task_struct;
 
 /*
  * Wrapper type for pointers to code which uses the non-standard
@@ -203,7 +204,8 @@
 
 	void (*swapgs)(void);
 
-	struct pv_lazy_ops lazy_mode;
+	void (*start_context_switch)(struct task_struct *prev);
+	void (*end_context_switch)(struct task_struct *next);
 };
 
 struct pv_irq_ops {
@@ -1399,25 +1401,23 @@
 };
 
 enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
-void paravirt_enter_lazy_cpu(void);
-void paravirt_leave_lazy_cpu(void);
+void paravirt_start_context_switch(struct task_struct *prev);
+void paravirt_end_context_switch(struct task_struct *next);
+
 void paravirt_enter_lazy_mmu(void);
 void paravirt_leave_lazy_mmu(void);
-void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
 
-#define  __HAVE_ARCH_ENTER_LAZY_CPU_MODE
-static inline void arch_enter_lazy_cpu_mode(void)
+#define  __HAVE_ARCH_START_CONTEXT_SWITCH
+static inline void arch_start_context_switch(struct task_struct *prev)
 {
-	PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter);
+	PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev);
 }
 
-static inline void arch_leave_lazy_cpu_mode(void)
+static inline void arch_end_context_switch(struct task_struct *next)
 {
-	PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
+	PVOP_VCALL1(pv_cpu_ops.end_context_switch, next);
 }
 
-void arch_flush_lazy_cpu_mode(void);
-
 #define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
 static inline void arch_enter_lazy_mmu_mode(void)
 {
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
new file mode 100644
index 0000000..876ed97
--- /dev/null
+++ b/arch/x86/include/asm/perf_counter.h
@@ -0,0 +1,100 @@
+#ifndef _ASM_X86_PERF_COUNTER_H
+#define _ASM_X86_PERF_COUNTER_H
+
+/*
+ * Performance counter hw details:
+ */
+
+#define X86_PMC_MAX_GENERIC					8
+#define X86_PMC_MAX_FIXED					3
+
+#define X86_PMC_IDX_GENERIC				        0
+#define X86_PMC_IDX_FIXED				       32
+#define X86_PMC_IDX_MAX					       64
+
+#define MSR_ARCH_PERFMON_PERFCTR0			      0xc1
+#define MSR_ARCH_PERFMON_PERFCTR1			      0xc2
+
+#define MSR_ARCH_PERFMON_EVENTSEL0			     0x186
+#define MSR_ARCH_PERFMON_EVENTSEL1			     0x187
+
+#define ARCH_PERFMON_EVENTSEL0_ENABLE			  (1 << 22)
+#define ARCH_PERFMON_EVENTSEL_INT			  (1 << 20)
+#define ARCH_PERFMON_EVENTSEL_OS			  (1 << 17)
+#define ARCH_PERFMON_EVENTSEL_USR			  (1 << 16)
+
+/*
+ * Includes eventsel and unit mask as well:
+ */
+#define ARCH_PERFMON_EVENT_MASK				    0xffff
+
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 		 0
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
+		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
+
+#define ARCH_PERFMON_BRANCH_MISSES_RETIRED			 6
+
+/*
+ * Intel "Architectural Performance Monitoring" CPUID
+ * detection/enumeration details:
+ */
+union cpuid10_eax {
+	struct {
+		unsigned int version_id:8;
+		unsigned int num_counters:8;
+		unsigned int bit_width:8;
+		unsigned int mask_length:8;
+	} split;
+	unsigned int full;
+};
+
+union cpuid10_edx {
+	struct {
+		unsigned int num_counters_fixed:4;
+		unsigned int reserved:28;
+	} split;
+	unsigned int full;
+};
+
+
+/*
+ * Fixed-purpose performance counters:
+ */
+
+/*
+ * All 3 fixed-mode PMCs are configured via this single MSR:
+ */
+#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL			0x38d
+
+/*
+ * The counts are available in three separate MSRs:
+ */
+
+/* Instr_Retired.Any: */
+#define MSR_ARCH_PERFMON_FIXED_CTR0			0x309
+#define X86_PMC_IDX_FIXED_INSTRUCTIONS			(X86_PMC_IDX_FIXED + 0)
+
+/* CPU_CLK_Unhalted.Core: */
+#define MSR_ARCH_PERFMON_FIXED_CTR1			0x30a
+#define X86_PMC_IDX_FIXED_CPU_CYCLES			(X86_PMC_IDX_FIXED + 1)
+
+/* CPU_CLK_Unhalted.Ref: */
+#define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
+#define X86_PMC_IDX_FIXED_BUS_CYCLES			(X86_PMC_IDX_FIXED + 2)
+
+extern void set_perf_counter_pending(void);
+
+#define clear_perf_counter_pending()	do { } while (0)
+#define test_perf_counter_pending()	(0)
+
+#ifdef CONFIG_PERF_COUNTERS
+extern void init_hw_perf_counters(void);
+extern void perf_counters_lapic_init(void);
+#else
+static inline void init_hw_perf_counters(void)		{ }
+static inline void perf_counters_lapic_init(void)	{ }
+#endif
+
+#endif /* _ASM_X86_PERF_COUNTER_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 29d96d1..18ef7eb 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -81,6 +81,8 @@
 #define pte_val(x)	native_pte_val(x)
 #define __pte(x)	native_make_pte(x)
 
+#define arch_end_context_switch(prev)	do {} while(0)
+
 #endif	/* CONFIG_PARAVIRT */
 
 /*
@@ -503,6 +505,8 @@
 
 #ifndef __ASSEMBLY__
 
+extern int direct_gbpages;
+
 /* local pte updates need not use xchg for locking */
 static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
 {
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index 2733fad..5e67c15 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -46,6 +46,10 @@
 # define VMALLOC_END	(FIXADDR_START - 2 * PAGE_SIZE)
 #endif
 
+#define MODULES_VADDR	VMALLOC_START
+#define MODULES_END	VMALLOC_END
+#define MODULES_LEN	(MODULES_VADDR - MODULES_END)
+
 #define MAXMEM	(VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
 
 #endif /* _ASM_X86_PGTABLE_32_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 6b87bc6..abde308 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -25,10 +25,6 @@
 
 extern void paging_init(void);
 
-#endif /* !__ASSEMBLY__ */
-
-#ifndef __ASSEMBLY__
-
 #define pte_ERROR(e)					\
 	printk("%s:%d: bad pte %p(%016lx).\n",		\
 	       __FILE__, __LINE__, &(e), pte_val(e))
@@ -135,8 +131,6 @@
 
 #define update_mmu_cache(vma, address, pte) do { } while (0)
 
-extern int direct_gbpages;
-
 /* Encode and de-code a swap entry */
 #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
 #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index fbf42b8..766ea16 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -51,11 +51,11 @@
 #define PGDIR_SIZE	(_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE - 1))
 
-
+/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
 #define MAXMEM		 _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
-#define VMALLOC_START    _AC(0xffffc20000000000, UL)
-#define VMALLOC_END      _AC(0xffffe1ffffffffff, UL)
-#define VMEMMAP_START	 _AC(0xffffe20000000000, UL)
+#define VMALLOC_START    _AC(0xffffc90000000000, UL)
+#define VMALLOC_END      _AC(0xffffe8ffffffffff, UL)
+#define VMEMMAP_START	 _AC(0xffffea0000000000, UL)
 #define MODULES_VADDR    _AC(0xffffffffa0000000, UL)
 #define MODULES_END      _AC(0xffffffffff000000, UL)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b8238dc..4d258ad 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -273,7 +273,6 @@
 
 extern pteval_t __supported_pte_mask;
 extern int nx_enabled;
-extern void set_nx(void);
 
 #define pgprot_writecombine	pgprot_writecombine
 extern pgprot_t pgprot_writecombine(pgprot_t prot);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c2cceae..c776826 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -135,7 +135,8 @@
 extern struct cpuinfo_x86	new_cpu_data;
 
 extern struct tss_struct	doublefault_tss;
-extern __u32			cleared_cpu_caps[NCAPINTS];
+extern __u32			cpu_caps_cleared[NCAPINTS];
+extern __u32			cpu_caps_set[NCAPINTS];
 
 #ifdef CONFIG_SMP
 DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
@@ -409,9 +410,6 @@
 extern unsigned int xstate_size;
 extern void free_thread_xstate(struct task_struct *);
 extern struct kmem_cache *task_xstate_cachep;
-extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
-extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern unsigned short num_cache_leaves;
 
 struct thread_struct {
 	/* Cached TLS descriptors: */
@@ -427,8 +425,12 @@
 	unsigned short		fsindex;
 	unsigned short		gsindex;
 #endif
+#ifdef CONFIG_X86_32
 	unsigned long		ip;
+#endif
+#ifdef CONFIG_X86_64
 	unsigned long		fs;
+#endif
 	unsigned long		gs;
 	/* Hardware debugging registers: */
 	unsigned long		debugreg0;
@@ -460,14 +462,8 @@
 	unsigned		io_bitmap_max;
 /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
 	unsigned long	debugctlmsr;
-#ifdef CONFIG_X86_DS
-/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
+	/* Debug Store context; see asm/ds.h */
 	struct ds_context	*ds_ctx;
-#endif /* CONFIG_X86_DS */
-#ifdef CONFIG_X86_PTRACE_BTS
-/* the signal to send on a bts buffer overflow */
-	unsigned int	bts_ovfl_signal;
-#endif /* CONFIG_X86_PTRACE_BTS */
 };
 
 static inline unsigned long native_get_debugreg(int regno)
@@ -795,6 +791,21 @@
     return debugctlmsr;
 }
 
+static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
+{
+	u64 debugctlmsr = 0;
+	u32 val1, val2;
+
+#ifndef CONFIG_X86_DEBUGCTLMSR
+	if (boot_cpu_data.x86 < 6)
+		return 0;
+#endif
+	rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
+	debugctlmsr = val1 | ((u64)val2 << 32);
+
+	return debugctlmsr;
+}
+
 static inline void update_debugctlmsr(unsigned long debugctlmsr)
 {
 #ifndef CONFIG_X86_DEBUGCTLMSR
@@ -804,6 +815,18 @@
 	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
 }
 
+static inline void update_debugctlmsr_on_cpu(int cpu,
+					     unsigned long debugctlmsr)
+{
+#ifndef CONFIG_X86_DEBUGCTLMSR
+	if (boot_cpu_data.x86 < 6)
+		return;
+#endif
+	wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
+		     (u32)((u64)debugctlmsr),
+		     (u32)((u64)debugctlmsr >> 32));
+}
+
 /*
  * from system description table in BIOS. Mostly for MCA use, but
  * others may find it useful:
@@ -814,6 +837,7 @@
 
 /* Boot loader type from the setup header: */
 extern int			bootloader_type;
+extern int			bootloader_version;
 
 extern char			ignore_fpu_irq;
 
@@ -874,7 +898,6 @@
 	.vm86_info		= NULL,					  \
 	.sysenter_cs		= __KERNEL_CS,				  \
 	.io_bitmap_ptr		= NULL,					  \
-	.fs			= __KERNEL_PERCPU,			  \
 }
 
 /*
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 624f133..0f0d908 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -236,12 +236,11 @@
 extern int do_set_thread_area(struct task_struct *p, int idx,
 			      struct user_desc __user *info, int can_allocate);
 
-extern void x86_ptrace_untrace(struct task_struct *);
-extern void x86_ptrace_fork(struct task_struct *child,
-			    unsigned long clone_flags);
+#ifdef CONFIG_X86_PTRACE_BTS
+extern void ptrace_bts_untrace(struct task_struct *tsk);
 
-#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk)
-#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags)
+#define arch_ptrace_untrace(tsk)	ptrace_bts_untrace(tsk)
+#endif /* CONFIG_X86_PTRACE_BTS */
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index a4737dd..64cf2d2 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -48,9 +48,15 @@
 #endif
 
 #ifdef CONFIG_X86_64
+#ifdef CONFIG_PARAVIRT
+/* Paravirtualized systems may not have PSE or PGE available */
 #define NEED_PSE	0
-#define NEED_MSR	(1<<(X86_FEATURE_MSR & 31))
 #define NEED_PGE	0
+#else
+#define NEED_PSE	(1<<(X86_FEATURE_PSE) & 31)
+#define NEED_PGE	(1<<(X86_FEATURE_PGE) & 31)
+#endif
+#define NEED_MSR	(1<<(X86_FEATURE_MSR & 31))
 #define NEED_FXSR	(1<<(X86_FEATURE_FXSR & 31))
 #define NEED_XMM	(1<<(X86_FEATURE_XMM & 31))
 #define NEED_XMM2	(1<<(X86_FEATURE_XMM2 & 31))
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index bdc2ada..4093d1e 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -33,7 +33,6 @@
 	int (*setup_ioapic_ids)(void);
 };
 
-extern void x86_quirk_pre_intr_init(void);
 extern void x86_quirk_intr_init(void);
 
 extern void x86_quirk_trap_init(void);
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 19e0d88..6a84ed1 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -180,7 +180,7 @@
 static inline int logical_smp_processor_id(void)
 {
 	/* we don't want to mark this access volatile - bad code generation */
-	return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+	return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
 }
 
 #endif
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index e3cc3c0..4517d6b 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -27,7 +27,7 @@
 #else /* CONFIG_X86_32 */
 # define SECTION_SIZE_BITS	27 /* matt - 128 is convenient right now */
 # define MAX_PHYSADDR_BITS	44
-# define MAX_PHYSMEM_BITS	44 /* Can be max 45 bits */
+# define MAX_PHYSMEM_BITS	46
 #endif
 
 #endif /* CONFIG_SPARSEMEM */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 82ada75..85574b7 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -225,6 +225,7 @@
 #define SVM_EVTINJ_VALID_ERR (1 << 11)
 
 #define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
 
 #define	SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
 #define	SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 7043408..372b76e 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -1,7 +1,7 @@
 /*
  * syscalls.h - Linux syscall interfaces (arch-specific)
  *
- * Copyright (c) 2008 Jaswinder Singh
+ * Copyright (c) 2008 Jaswinder Singh Rajput
  *
  * This file is released under the GPLv2.
  * See the file COPYING for more details.
@@ -12,50 +12,55 @@
 
 #include <linux/compiler.h>
 #include <linux/linkage.h>
-#include <linux/types.h>
 #include <linux/signal.h>
+#include <linux/types.h>
 
 /* Common in X86_32 and X86_64 */
 /* kernel/ioport.c */
 asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
 
+/* kernel/process.c */
+int sys_fork(struct pt_regs *);
+int sys_vfork(struct pt_regs *);
+
 /* kernel/ldt.c */
 asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
 
+/* kernel/signal.c */
+long sys_rt_sigreturn(struct pt_regs *);
+
 /* kernel/tls.c */
 asmlinkage int sys_set_thread_area(struct user_desc __user *);
 asmlinkage int sys_get_thread_area(struct user_desc __user *);
 
 /* X86_32 only */
 #ifdef CONFIG_X86_32
+/* kernel/ioport.c */
+long sys_iopl(struct pt_regs *);
+
 /* kernel/process_32.c */
-int sys_fork(struct pt_regs *);
 int sys_clone(struct pt_regs *);
-int sys_vfork(struct pt_regs *);
 int sys_execve(struct pt_regs *);
 
-/* kernel/signal_32.c */
+/* kernel/signal.c */
 asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
 asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
 			     struct old_sigaction __user *);
 int sys_sigaltstack(struct pt_regs *);
 unsigned long sys_sigreturn(struct pt_regs *);
-long sys_rt_sigreturn(struct pt_regs *);
-
-/* kernel/ioport.c */
-long sys_iopl(struct pt_regs *);
 
 /* kernel/sys_i386_32.c */
+struct mmap_arg_struct;
+struct sel_arg_struct;
+struct oldold_utsname;
+struct old_utsname;
+
 asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
 			  unsigned long, unsigned long, unsigned long);
-struct mmap_arg_struct;
 asmlinkage int old_mmap(struct mmap_arg_struct __user *);
-struct sel_arg_struct;
 asmlinkage int old_select(struct sel_arg_struct __user *);
 asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
-struct old_utsname;
 asmlinkage int sys_uname(struct old_utsname __user *);
-struct oldold_utsname;
 asmlinkage int sys_olduname(struct oldold_utsname __user *);
 
 /* kernel/vm86_32.c */
@@ -65,29 +70,27 @@
 #else /* CONFIG_X86_32 */
 
 /* X86_64 only */
+/* kernel/ioport.c */
+asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
+
 /* kernel/process_64.c */
-asmlinkage long sys_fork(struct pt_regs *);
 asmlinkage long sys_clone(unsigned long, unsigned long,
 			  void __user *, void __user *,
 			  struct pt_regs *);
-asmlinkage long sys_vfork(struct pt_regs *);
 asmlinkage long sys_execve(char __user *, char __user * __user *,
 			   char __user * __user *,
 			   struct pt_regs *);
 long sys_arch_prctl(int, unsigned long);
 
-/* kernel/ioport.c */
-asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
-
-/* kernel/signal_64.c */
+/* kernel/signal.c */
 asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
 				struct pt_regs *);
-long sys_rt_sigreturn(struct pt_regs *);
 
 /* kernel/sys_x86_64.c */
+struct new_utsname;
+
 asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long,
 			 unsigned long, unsigned long, unsigned long);
-struct new_utsname;
 asmlinkage long sys_uname(struct new_utsname __user *);
 
 #endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/asm/termios.h
index f729563..c4ee805 100644
--- a/arch/x86/include/asm/termios.h
+++ b/arch/x86/include/asm/termios.h
@@ -67,6 +67,7 @@
 	SET_LOW_TERMIOS_BITS(termios, termio, c_oflag);
 	SET_LOW_TERMIOS_BITS(termios, termio, c_cflag);
 	SET_LOW_TERMIOS_BITS(termios, termio, c_lflag);
+	get_user(termios->c_line, &termio->c_line);
 	return copy_from_user(termios->c_cc, termio->c_cc, NCC);
 }
 
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8820a73..602c769 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -94,7 +94,8 @@
 #define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
 #define TIF_DEBUGCTLMSR		25	/* uses thread_struct.debugctlmsr */
 #define TIF_DS_AREA_MSR		26      /* uses thread_struct.ds_area_msr */
-#define TIF_SYSCALL_FTRACE	27	/* for ftrace syscall instrumentation */
+#define TIF_LAZY_MMU_UPDATES	27	/* task is updating the mmu lazily */
+#define TIF_SYSCALL_FTRACE	28	/* for ftrace syscall instrumentation */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
@@ -116,6 +117,7 @@
 #define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
 #define _TIF_DEBUGCTLMSR	(1 << TIF_DEBUGCTLMSR)
 #define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
+#define _TIF_LAZY_MMU_UPDATES	(1 << TIF_LAZY_MMU_UPDATES)
 #define _TIF_SYSCALL_FTRACE	(1 << TIF_SYSCALL_FTRACE)
 
 /* work to do in syscall_trace_enter() */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 16a5c84..7f3eba0 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -17,7 +17,7 @@
 
 static inline void __native_flush_tlb(void)
 {
-	write_cr3(read_cr3());
+	native_write_cr3(native_read_cr3());
 }
 
 static inline void __native_flush_tlb_global(void)
@@ -32,11 +32,11 @@
 	 */
 	raw_local_irq_save(flags);
 
-	cr4 = read_cr4();
+	cr4 = native_read_cr4();
 	/* clear PGE */
-	write_cr4(cr4 & ~X86_CR4_PGE);
+	native_write_cr4(cr4 & ~X86_CR4_PGE);
 	/* write old PGE again and flush TLBs */
-	write_cr4(cr4);
+	native_write_cr4(cr4);
 
 	raw_local_irq_restore(flags);
 }
@@ -172,6 +172,6 @@
 	flush_tlb_all();
 }
 
-extern void zap_low_mappings(void);
+extern void zap_low_mappings(bool early);
 
 #endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index f44b49a..066ef59 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -203,7 +203,8 @@
 void x86_pci_root_bus_res_quirks(struct pci_bus *b);
 
 #ifdef CONFIG_SMP
-#define mc_capable()	(cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids)
+#define mc_capable()	((boot_cpu_data.x86_max_cores > 1) && \
+			(cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids))
 #define smt_capable()			(smp_num_siblings > 1)
 #endif
 
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 0d53425..bfd74c0 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_TRAPS_H
 
 #include <asm/debugreg.h>
+#include <asm/siginfo.h>			/* TRAP_TRACE, ... */
 
 #ifdef CONFIG_X86_32
 #define dotraplinkage
@@ -13,6 +14,9 @@
 asmlinkage void debug(void);
 asmlinkage void nmi(void);
 asmlinkage void int3(void);
+asmlinkage void xen_debug(void);
+asmlinkage void xen_int3(void);
+asmlinkage void xen_stack_segment(void);
 asmlinkage void overflow(void);
 asmlinkage void bounds(void);
 asmlinkage void invalid_op(void);
@@ -74,7 +78,6 @@
 }
 
 extern int panic_on_unrecovered_nmi;
-extern int kstack_depth_to_print;
 
 void math_error(void __user *);
 void math_emulate(struct math_emu_info *);
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 6e72d74..732a3070 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -340,6 +340,8 @@
 #define __NR_inotify_init1	332
 #define __NR_preadv		333
 #define __NR_pwritev		334
+#define __NR_rt_tgsigqueueinfo	335
+#define __NR_perf_counter_open	336
 
 #ifdef __KERNEL__
 
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index f818294..900e161 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -657,7 +657,10 @@
 __SYSCALL(__NR_preadv, sys_preadv)
 #define __NR_pwritev				296
 __SYSCALL(__NR_pwritev, sys_pwritev)
-
+#define __NR_rt_tgsigqueueinfo			297
+__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
+#define __NR_perf_counter_open			298
+__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 9b0e61b..bddd44f 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -37,7 +37,7 @@
 #define UV_CPUS_PER_ACT_STATUS		32
 #define UV_ACT_STATUS_MASK		0x3
 #define UV_ACT_STATUS_SIZE		2
-#define UV_ACTIVATION_DESCRIPTOR_SIZE	32
+#define UV_ADP_SIZE			32
 #define UV_DISTRIBUTION_SIZE		256
 #define UV_SW_ACK_NPENDING		8
 #define UV_NET_ENDPOINT_INTD		0x38
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index d3a98ea..341070f 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -133,6 +133,7 @@
 struct uv_hub_info_s {
 	unsigned long		global_mmr_base;
 	unsigned long		gpa_mask;
+	unsigned int		gnode_extra;
 	unsigned long		gnode_upper;
 	unsigned long		lowmem_remap_top;
 	unsigned long		lowmem_remap_base;
@@ -159,7 +160,8 @@
  * 		p -  PNODE (local part of nsids, right shifted 1)
  */
 #define UV_NASID_TO_PNODE(n)		(((n) >> 1) & uv_hub_info->pnode_mask)
-#define UV_PNODE_TO_NASID(p)		(((p) << 1) | uv_hub_info->gnode_upper)
+#define UV_PNODE_TO_GNODE(p)		((p) |uv_hub_info->gnode_extra)
+#define UV_PNODE_TO_NASID(p)		(UV_PNODE_TO_GNODE(p) << 1)
 
 #define UV_LOCAL_MMR_BASE		0xf4000000UL
 #define UV_GLOBAL_MMR32_BASE		0xf8000000UL
@@ -173,7 +175,7 @@
 #define UV_GLOBAL_MMR32_PNODE_BITS(p)	((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT))
 
 #define UV_GLOBAL_MMR64_PNODE_BITS(p)					\
-	((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT)
+	((unsigned long)(UV_PNODE_TO_GNODE(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT)
 
 #define UV_APIC_PNODE_SHIFT	6
 
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 498f944..11be5ad 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -247,6 +247,7 @@
 #define EXIT_REASON_MSR_READ            31
 #define EXIT_REASON_MSR_WRITE           32
 #define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_MCE_DURING_VMENTRY	 41
 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
 #define EXIT_REASON_APIC_ACCESS         44
 #define EXIT_REASON_EPT_VIOLATION       48
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 88d1bfc..f3477bb 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -28,7 +28,7 @@
 obj-y			:= process_$(BITS).o signal.o entry_$(BITS).o
 obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time_$(BITS).o ioport.o ldt.o dumpstack.o
-obj-y			+= setup.o i8259.o irqinit_$(BITS).o
+obj-y			+= setup.o i8259.o irqinit.o
 obj-$(CONFIG_X86_VISWS)	+= visws_quirks.o
 obj-$(CONFIG_X86_32)	+= probe_roms_32.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
@@ -44,6 +44,7 @@
 obj-y				+= i387.o xsave.o
 obj-y				+= ptrace.o
 obj-$(CONFIG_X86_DS)		+= ds.o
+obj-$(CONFIG_X86_DS_SELFTEST)		+= ds_selftest.o
 obj-$(CONFIG_X86_32)		+= tls.o
 obj-$(CONFIG_IA32_EMULATION)	+= tls.o
 obj-y				+= step.o
@@ -72,7 +73,7 @@
 obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
-obj-$(CONFIG_MODULES)		+= module_$(BITS).o
+obj-$(CONFIG_MODULES)		+= module.o
 obj-$(CONFIG_EFI) 		+= efi.o efi_$(BITS).o efi_stub_$(BITS).o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 723989d..6310861 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -33,6 +33,7 @@
 #include <linux/irq.h>
 #include <linux/bootmem.h>
 #include <linux/ioport.h>
+#include <linux/pci.h>
 
 #include <asm/pgtable.h>
 #include <asm/io_apic.h>
@@ -522,7 +523,7 @@
  * success: return IRQ number (>=0)
  * failure: return < 0
  */
-int acpi_register_gsi(u32 gsi, int triggering, int polarity)
+int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
 {
 	unsigned int irq;
 	unsigned int plat_gsi = gsi;
@@ -532,14 +533,14 @@
 	 * Make sure all (legacy) PCI IRQs are set as level-triggered.
 	 */
 	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
-		if (triggering == ACPI_LEVEL_SENSITIVE)
+		if (trigger == ACPI_LEVEL_SENSITIVE)
 			eisa_set_level_irq(gsi);
 	}
 #endif
 
 #ifdef CONFIG_X86_IO_APIC
 	if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
-		plat_gsi = mp_register_gsi(gsi, triggering, polarity);
+		plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity);
 	}
 #endif
 	acpi_gsi_to_irq(plat_gsi, &irq);
@@ -903,10 +904,8 @@
 #endif
 
 static struct {
-	int apic_id;
 	int gsi_base;
 	int gsi_end;
-	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
 } mp_ioapic_routing[MAX_IO_APICS];
 
 int mp_find_ioapic(int gsi)
@@ -986,16 +985,12 @@
 
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
 	mp_ioapics[idx].apicid = uniq_ioapic_id(id);
-#ifdef CONFIG_X86_32
 	mp_ioapics[idx].apicver = io_apic_get_version(idx);
-#else
-	mp_ioapics[idx].apicver = 0;
-#endif
+
 	/*
 	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
 	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
 	 */
-	mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid;
 	mp_ioapic_routing[idx].gsi_base = gsi_base;
 	mp_ioapic_routing[idx].gsi_end = gsi_base +
 	    io_apic_get_redir_entries(idx);
@@ -1158,26 +1153,52 @@
 	}
 }
 
-int mp_register_gsi(u32 gsi, int triggering, int polarity)
+static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
+			int polarity)
+{
+#ifdef CONFIG_X86_MPPARSE
+	struct mpc_intsrc mp_irq;
+	struct pci_dev *pdev;
+	unsigned char number;
+	unsigned int devfn;
+	int ioapic;
+	u8 pin;
+
+	if (!acpi_ioapic)
+		return 0;
+	if (!dev)
+		return 0;
+	if (dev->bus != &pci_bus_type)
+		return 0;
+
+	pdev = to_pci_dev(dev);
+	number = pdev->bus->number;
+	devfn = pdev->devfn;
+	pin = pdev->pin;
+	/* print the entry should happen on mptable identically */
+	mp_irq.type = MP_INTSRC;
+	mp_irq.irqtype = mp_INT;
+	mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
+				(polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
+	mp_irq.srcbus = number;
+	mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
+	ioapic = mp_find_ioapic(gsi);
+	mp_irq.dstapic = mp_ioapics[ioapic].apicid;
+	mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
+
+	save_mp_irq(&mp_irq);
+#endif
+	return 0;
+}
+
+int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
 {
 	int ioapic;
 	int ioapic_pin;
-#ifdef CONFIG_X86_32
-#define MAX_GSI_NUM	4096
-#define IRQ_COMPRESSION_START	64
-
-	static int pci_irq = IRQ_COMPRESSION_START;
-	/*
-	 * Mapping between Global System Interrupts, which
-	 * represent all possible interrupts, and IRQs
-	 * assigned to actual devices.
-	 */
-	static int gsi_to_irq[MAX_GSI_NUM];
-#else
+	struct io_apic_irq_attr irq_attr;
 
 	if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
 		return gsi;
-#endif
 
 	/* Don't set up the ACPI SCI because it's already set up */
 	if (acpi_gbl_FADT.sci_interrupt == gsi)
@@ -1196,95 +1217,24 @@
 		gsi = ioapic_renumber_irq(ioapic, gsi);
 #endif
 
-	/*
-	 * Avoid pin reprogramming.  PRTs typically include entries
-	 * with redundant pin->gsi mappings (but unique PCI devices);
-	 * we only program the IOAPIC on the first.
-	 */
 	if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
 		printk(KERN_ERR "Invalid reference to IOAPIC pin "
-		       "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
+		       "%d-%d\n", mp_ioapics[ioapic].apicid,
 		       ioapic_pin);
 		return gsi;
 	}
-	if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
-		pr_debug("Pin %d-%d already programmed\n",
-			 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
-#ifdef CONFIG_X86_32
-		return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
-#else
-		return gsi;
-#endif
-	}
 
-	set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
-#ifdef CONFIG_X86_32
-	/*
-	 * For GSI >= 64, use IRQ compression
-	 */
-	if ((gsi >= IRQ_COMPRESSION_START)
-	    && (triggering == ACPI_LEVEL_SENSITIVE)) {
-		/*
-		 * For PCI devices assign IRQs in order, avoiding gaps
-		 * due to unused I/O APIC pins.
-		 */
-		int irq = gsi;
-		if (gsi < MAX_GSI_NUM) {
-			/*
-			 * Retain the VIA chipset work-around (gsi > 15), but
-			 * avoid a problem where the 8254 timer (IRQ0) is setup
-			 * via an override (so it's not on pin 0 of the ioapic),
-			 * and at the same time, the pin 0 interrupt is a PCI
-			 * type.  The gsi > 15 test could cause these two pins
-			 * to be shared as IRQ0, and they are not shareable.
-			 * So test for this condition, and if necessary, avoid
-			 * the pin collision.
-			 */
-			gsi = pci_irq++;
-			/*
-			 * Don't assign IRQ used by ACPI SCI
-			 */
-			if (gsi == acpi_gbl_FADT.sci_interrupt)
-				gsi = pci_irq++;
-			gsi_to_irq[irq] = gsi;
-		} else {
-			printk(KERN_ERR "GSI %u is too high\n", gsi);
-			return gsi;
-		}
-	}
-#endif
-	io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
-				triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
-				polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+	if (enable_update_mptable)
+		mp_config_acpi_gsi(dev, gsi, trigger, polarity);
+
+	set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin,
+			     trigger == ACPI_EDGE_SENSITIVE ? 0 : 1,
+			     polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+	io_apic_set_pci_routing(dev, gsi, &irq_attr);
+
 	return gsi;
 }
 
-int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
-			u32 gsi, int triggering, int polarity)
-{
-#ifdef CONFIG_X86_MPPARSE
-	struct mpc_intsrc mp_irq;
-	int ioapic;
-
-	if (!acpi_ioapic)
-		return 0;
-
-	/* print the entry should happen on mptable identically */
-	mp_irq.type = MP_INTSRC;
-	mp_irq.irqtype = mp_INT;
-	mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
-				(polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
-	mp_irq.srcbus = number;
-	mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
-	ioapic = mp_find_ioapic(gsi);
-	mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
-	mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
-
-	save_mp_irq(&mp_irq);
-#endif
-	return 0;
-}
-
 /*
  * Parse IOAPIC related entries in MADT
  * returns 0 on success, < 0 on error
diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile
index 1c31cc0..167bc16 100644
--- a/arch/x86/kernel/acpi/realmode/Makefile
+++ b/arch/x86/kernel/acpi/realmode/Makefile
@@ -9,7 +9,7 @@
 always		:= wakeup.bin
 targets		:= wakeup.elf wakeup.lds
 
-wakeup-y	+= wakeup.o wakemain.o video-mode.o copy.o
+wakeup-y	+= wakeup.o wakemain.o video-mode.o copy.o bioscall.o regs.o
 
 # The link order of the video-*.o modules can matter.  In particular,
 # video-vga.o *must* be listed first, followed by video-vesa.o.
diff --git a/arch/x86/kernel/acpi/realmode/bioscall.S b/arch/x86/kernel/acpi/realmode/bioscall.S
new file mode 100644
index 0000000..f51eb0b
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/bioscall.S
@@ -0,0 +1 @@
+#include "../../../boot/bioscall.S"
diff --git a/arch/x86/kernel/acpi/realmode/regs.c b/arch/x86/kernel/acpi/realmode/regs.c
new file mode 100644
index 0000000..6206033
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/regs.c
@@ -0,0 +1 @@
+#include "../../../boot/regs.c"
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 7c243a2..ca93638 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -104,7 +104,7 @@
 	initial_gs = per_cpu_offset(smp_processor_id());
 #endif
 	initial_code = (unsigned long)wakeup_long64;
-	saved_magic = 0x123456789abcdef0;
+       saved_magic = 0x123456789abcdef0L;
 #endif /* CONFIG_64BIT */
 
 	return 0;
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a97db99..1c60554 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -55,7 +55,16 @@
 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 			     struct unity_map_entry *e);
 static struct dma_ops_domain *find_protection_domain(u16 devid);
+static u64* alloc_pte(struct protection_domain *dom,
+		      unsigned long address, u64
+		      **pte_page, gfp_t gfp);
+static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
+				      unsigned long start_page,
+				      unsigned int pages);
 
+#ifndef BUS_NOTIFY_UNBOUND_DRIVER
+#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
+#endif
 
 #ifdef CONFIG_AMD_IOMMU_STATS
 
@@ -213,7 +222,7 @@
 {
 	struct amd_iommu *iommu;
 
-	list_for_each_entry(iommu, &amd_iommu_list, list)
+	for_each_iommu(iommu)
 		iommu_poll_events(iommu);
 
 	return IRQ_HANDLED;
@@ -440,7 +449,7 @@
 	__iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
 				      domid, 1, 1);
 
-	list_for_each_entry(iommu, &amd_iommu_list, list) {
+	for_each_iommu(iommu) {
 		spin_lock_irqsave(&iommu->lock, flags);
 		__iommu_queue_command(iommu, &cmd);
 		__iommu_completion_wait(iommu);
@@ -449,6 +458,35 @@
 	}
 }
 
+void amd_iommu_flush_all_domains(void)
+{
+	int i;
+
+	for (i = 1; i < MAX_DOMAIN_ID; ++i) {
+		if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
+			continue;
+		iommu_flush_domain(i);
+	}
+}
+
+void amd_iommu_flush_all_devices(void)
+{
+	struct amd_iommu *iommu;
+	int i;
+
+	for (i = 0; i <= amd_iommu_last_bdf; ++i) {
+		if (amd_iommu_pd_table[i] == NULL)
+			continue;
+
+		iommu = amd_iommu_rlookup_table[i];
+		if (!iommu)
+			continue;
+
+		iommu_queue_inv_dev_entry(iommu, i);
+		iommu_completion_wait(iommu);
+	}
+}
+
 /****************************************************************************
  *
  * The functions below are used the create the page table mappings for
@@ -468,7 +506,7 @@
 			  unsigned long phys_addr,
 			  int prot)
 {
-	u64 __pte, *pte, *page;
+	u64 __pte, *pte;
 
 	bus_addr  = PAGE_ALIGN(bus_addr);
 	phys_addr = PAGE_ALIGN(phys_addr);
@@ -477,27 +515,7 @@
 	if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
 		return -EINVAL;
 
-	pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
-
-	if (!IOMMU_PTE_PRESENT(*pte)) {
-		page = (u64 *)get_zeroed_page(GFP_KERNEL);
-		if (!page)
-			return -ENOMEM;
-		*pte = IOMMU_L2_PDE(virt_to_phys(page));
-	}
-
-	pte = IOMMU_PTE_PAGE(*pte);
-	pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
-
-	if (!IOMMU_PTE_PRESENT(*pte)) {
-		page = (u64 *)get_zeroed_page(GFP_KERNEL);
-		if (!page)
-			return -ENOMEM;
-		*pte = IOMMU_L1_PDE(virt_to_phys(page));
-	}
-
-	pte = IOMMU_PTE_PAGE(*pte);
-	pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
+	pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL);
 
 	if (IOMMU_PTE_PRESENT(*pte))
 		return -EBUSY;
@@ -595,7 +613,8 @@
 		 * as allocated in the aperture
 		 */
 		if (addr < dma_dom->aperture_size)
-			__set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap);
+			__set_bit(addr >> PAGE_SHIFT,
+				  dma_dom->aperture[0]->bitmap);
 	}
 
 	return 0;
@@ -632,42 +651,191 @@
  ****************************************************************************/
 
 /*
- * The address allocator core function.
+ * The address allocator core functions.
  *
  * called with domain->lock held
  */
+
+/*
+ * This function checks if there is a PTE for a given dma address. If
+ * there is one, it returns the pointer to it.
+ */
+static u64* fetch_pte(struct protection_domain *domain,
+		      unsigned long address)
+{
+	u64 *pte;
+
+	pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)];
+
+	if (!IOMMU_PTE_PRESENT(*pte))
+		return NULL;
+
+	pte = IOMMU_PTE_PAGE(*pte);
+	pte = &pte[IOMMU_PTE_L1_INDEX(address)];
+
+	if (!IOMMU_PTE_PRESENT(*pte))
+		return NULL;
+
+	pte = IOMMU_PTE_PAGE(*pte);
+	pte = &pte[IOMMU_PTE_L0_INDEX(address)];
+
+	return pte;
+}
+
+/*
+ * This function is used to add a new aperture range to an existing
+ * aperture in case of dma_ops domain allocation or address allocation
+ * failure.
+ */
+static int alloc_new_range(struct amd_iommu *iommu,
+			   struct dma_ops_domain *dma_dom,
+			   bool populate, gfp_t gfp)
+{
+	int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
+	int i;
+
+#ifdef CONFIG_IOMMU_STRESS
+	populate = false;
+#endif
+
+	if (index >= APERTURE_MAX_RANGES)
+		return -ENOMEM;
+
+	dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
+	if (!dma_dom->aperture[index])
+		return -ENOMEM;
+
+	dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
+	if (!dma_dom->aperture[index]->bitmap)
+		goto out_free;
+
+	dma_dom->aperture[index]->offset = dma_dom->aperture_size;
+
+	if (populate) {
+		unsigned long address = dma_dom->aperture_size;
+		int i, num_ptes = APERTURE_RANGE_PAGES / 512;
+		u64 *pte, *pte_page;
+
+		for (i = 0; i < num_ptes; ++i) {
+			pte = alloc_pte(&dma_dom->domain, address,
+					&pte_page, gfp);
+			if (!pte)
+				goto out_free;
+
+			dma_dom->aperture[index]->pte_pages[i] = pte_page;
+
+			address += APERTURE_RANGE_SIZE / 64;
+		}
+	}
+
+	dma_dom->aperture_size += APERTURE_RANGE_SIZE;
+
+	/* Intialize the exclusion range if necessary */
+	if (iommu->exclusion_start &&
+	    iommu->exclusion_start >= dma_dom->aperture[index]->offset &&
+	    iommu->exclusion_start < dma_dom->aperture_size) {
+		unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
+		int pages = iommu_num_pages(iommu->exclusion_start,
+					    iommu->exclusion_length,
+					    PAGE_SIZE);
+		dma_ops_reserve_addresses(dma_dom, startpage, pages);
+	}
+
+	/*
+	 * Check for areas already mapped as present in the new aperture
+	 * range and mark those pages as reserved in the allocator. Such
+	 * mappings may already exist as a result of requested unity
+	 * mappings for devices.
+	 */
+	for (i = dma_dom->aperture[index]->offset;
+	     i < dma_dom->aperture_size;
+	     i += PAGE_SIZE) {
+		u64 *pte = fetch_pte(&dma_dom->domain, i);
+		if (!pte || !IOMMU_PTE_PRESENT(*pte))
+			continue;
+
+		dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
+	}
+
+	return 0;
+
+out_free:
+	free_page((unsigned long)dma_dom->aperture[index]->bitmap);
+
+	kfree(dma_dom->aperture[index]);
+	dma_dom->aperture[index] = NULL;
+
+	return -ENOMEM;
+}
+
+static unsigned long dma_ops_area_alloc(struct device *dev,
+					struct dma_ops_domain *dom,
+					unsigned int pages,
+					unsigned long align_mask,
+					u64 dma_mask,
+					unsigned long start)
+{
+	unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
+	int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
+	int i = start >> APERTURE_RANGE_SHIFT;
+	unsigned long boundary_size;
+	unsigned long address = -1;
+	unsigned long limit;
+
+	next_bit >>= PAGE_SHIFT;
+
+	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+			PAGE_SIZE) >> PAGE_SHIFT;
+
+	for (;i < max_index; ++i) {
+		unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
+
+		if (dom->aperture[i]->offset >= dma_mask)
+			break;
+
+		limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
+					       dma_mask >> PAGE_SHIFT);
+
+		address = iommu_area_alloc(dom->aperture[i]->bitmap,
+					   limit, next_bit, pages, 0,
+					    boundary_size, align_mask);
+		if (address != -1) {
+			address = dom->aperture[i]->offset +
+				  (address << PAGE_SHIFT);
+			dom->next_address = address + (pages << PAGE_SHIFT);
+			break;
+		}
+
+		next_bit = 0;
+	}
+
+	return address;
+}
+
 static unsigned long dma_ops_alloc_addresses(struct device *dev,
 					     struct dma_ops_domain *dom,
 					     unsigned int pages,
 					     unsigned long align_mask,
 					     u64 dma_mask)
 {
-	unsigned long limit;
 	unsigned long address;
-	unsigned long boundary_size;
 
-	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
-			PAGE_SIZE) >> PAGE_SHIFT;
-	limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0,
-				       dma_mask >> PAGE_SHIFT);
+#ifdef CONFIG_IOMMU_STRESS
+	dom->next_address = 0;
+	dom->need_flush = true;
+#endif
 
-	if (dom->next_bit >= limit) {
-		dom->next_bit = 0;
-		dom->need_flush = true;
-	}
+	address = dma_ops_area_alloc(dev, dom, pages, align_mask,
+				     dma_mask, dom->next_address);
 
-	address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
-				   0 , boundary_size, align_mask);
 	if (address == -1) {
-		address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
-				0, boundary_size, align_mask);
+		dom->next_address = 0;
+		address = dma_ops_area_alloc(dev, dom, pages, align_mask,
+					     dma_mask, 0);
 		dom->need_flush = true;
 	}
 
-	if (likely(address != -1)) {
-		dom->next_bit = address + pages;
-		address <<= PAGE_SHIFT;
-	} else
+	if (unlikely(address == -1))
 		address = bad_dma_address;
 
 	WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
@@ -684,11 +852,23 @@
 				   unsigned long address,
 				   unsigned int pages)
 {
-	address >>= PAGE_SHIFT;
-	iommu_area_free(dom->bitmap, address, pages);
+	unsigned i = address >> APERTURE_RANGE_SHIFT;
+	struct aperture_range *range = dom->aperture[i];
 
-	if (address >= dom->next_bit)
+	BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
+
+#ifdef CONFIG_IOMMU_STRESS
+	if (i < 4)
+		return;
+#endif
+
+	if (address >= dom->next_address)
 		dom->need_flush = true;
+
+	address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
+
+	iommu_area_free(range->bitmap, address, pages);
+
 }
 
 /****************************************************************************
@@ -736,12 +916,16 @@
 				      unsigned long start_page,
 				      unsigned int pages)
 {
-	unsigned int last_page = dom->aperture_size >> PAGE_SHIFT;
+	unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
 
 	if (start_page + pages > last_page)
 		pages = last_page - start_page;
 
-	iommu_area_reserve(dom->bitmap, start_page, pages);
+	for (i = start_page; i < start_page + pages; ++i) {
+		int index = i / APERTURE_RANGE_PAGES;
+		int page  = i % APERTURE_RANGE_PAGES;
+		__set_bit(page, dom->aperture[index]->bitmap);
+	}
 }
 
 static void free_pagetable(struct protection_domain *domain)
@@ -780,14 +964,19 @@
  */
 static void dma_ops_domain_free(struct dma_ops_domain *dom)
 {
+	int i;
+
 	if (!dom)
 		return;
 
 	free_pagetable(&dom->domain);
 
-	kfree(dom->pte_pages);
-
-	kfree(dom->bitmap);
+	for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
+		if (!dom->aperture[i])
+			continue;
+		free_page((unsigned long)dom->aperture[i]->bitmap);
+		kfree(dom->aperture[i]);
+	}
 
 	kfree(dom);
 }
@@ -797,19 +986,9 @@
  * It also intializes the page table and the address allocator data
  * structures required for the dma_ops interface
  */
-static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
-						   unsigned order)
+static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
 {
 	struct dma_ops_domain *dma_dom;
-	unsigned i, num_pte_pages;
-	u64 *l2_pde;
-	u64 address;
-
-	/*
-	 * Currently the DMA aperture must be between 32 MB and 1GB in size
-	 */
-	if ((order < 25) || (order > 30))
-		return NULL;
 
 	dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
 	if (!dma_dom)
@@ -826,55 +1005,20 @@
 	dma_dom->domain.priv = dma_dom;
 	if (!dma_dom->domain.pt_root)
 		goto free_dma_dom;
-	dma_dom->aperture_size = (1ULL << order);
-	dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
-				  GFP_KERNEL);
-	if (!dma_dom->bitmap)
-		goto free_dma_dom;
-	/*
-	 * mark the first page as allocated so we never return 0 as
-	 * a valid dma-address. So we can use 0 as error value
-	 */
-	dma_dom->bitmap[0] = 1;
-	dma_dom->next_bit = 0;
 
 	dma_dom->need_flush = false;
 	dma_dom->target_dev = 0xffff;
 
-	/* Intialize the exclusion range if necessary */
-	if (iommu->exclusion_start &&
-	    iommu->exclusion_start < dma_dom->aperture_size) {
-		unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
-		int pages = iommu_num_pages(iommu->exclusion_start,
-					    iommu->exclusion_length,
-					    PAGE_SIZE);
-		dma_ops_reserve_addresses(dma_dom, startpage, pages);
-	}
+	if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL))
+		goto free_dma_dom;
 
 	/*
-	 * At the last step, build the page tables so we don't need to
-	 * allocate page table pages in the dma_ops mapping/unmapping
-	 * path.
+	 * mark the first page as allocated so we never return 0 as
+	 * a valid dma-address. So we can use 0 as error value
 	 */
-	num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
-	dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
-			GFP_KERNEL);
-	if (!dma_dom->pte_pages)
-		goto free_dma_dom;
+	dma_dom->aperture[0]->bitmap[0] = 1;
+	dma_dom->next_address = 0;
 
-	l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
-	if (l2_pde == NULL)
-		goto free_dma_dom;
-
-	dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
-
-	for (i = 0; i < num_pte_pages; ++i) {
-		dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
-		if (!dma_dom->pte_pages[i])
-			goto free_dma_dom;
-		address = virt_to_phys(dma_dom->pte_pages[i]);
-		l2_pde[i] = IOMMU_L1_PDE(address);
-	}
 
 	return dma_dom;
 
@@ -983,7 +1127,6 @@
 	struct protection_domain *domain;
 	struct dma_ops_domain *dma_domain;
 	struct amd_iommu *iommu;
-	int order = amd_iommu_aperture_order;
 	unsigned long flags;
 
 	if (devid > amd_iommu_last_bdf)
@@ -1002,17 +1145,7 @@
 			  "to a non-dma-ops domain\n", dev_name(dev));
 
 	switch (action) {
-	case BUS_NOTIFY_BOUND_DRIVER:
-		if (domain)
-			goto out;
-		dma_domain = find_protection_domain(devid);
-		if (!dma_domain)
-			dma_domain = iommu->default_dom;
-		attach_device(iommu, &dma_domain->domain, devid);
-		printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
-		       "device %s\n", dma_domain->domain.id, dev_name(dev));
-		break;
-	case BUS_NOTIFY_UNBIND_DRIVER:
+	case BUS_NOTIFY_UNBOUND_DRIVER:
 		if (!domain)
 			goto out;
 		detach_device(domain, devid);
@@ -1022,7 +1155,7 @@
 		dma_domain = find_protection_domain(devid);
 		if (dma_domain)
 			goto out;
-		dma_domain = dma_ops_domain_alloc(iommu, order);
+		dma_domain = dma_ops_domain_alloc(iommu);
 		if (!dma_domain)
 			goto out;
 		dma_domain->target_dev = devid;
@@ -1133,8 +1266,8 @@
 			dma_dom = (*iommu)->default_dom;
 		*domain = &dma_dom->domain;
 		attach_device(*iommu, *domain, *bdf);
-		printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
-				"device %s\n", (*domain)->id, dev_name(dev));
+		DUMP_printk("Using protection domain %d for device %s\n",
+			    (*domain)->id, dev_name(dev));
 	}
 
 	if (domain_for_device(_bdf) == NULL)
@@ -1144,6 +1277,66 @@
 }
 
 /*
+ * If the pte_page is not yet allocated this function is called
+ */
+static u64* alloc_pte(struct protection_domain *dom,
+		      unsigned long address, u64 **pte_page, gfp_t gfp)
+{
+	u64 *pte, *page;
+
+	pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)];
+
+	if (!IOMMU_PTE_PRESENT(*pte)) {
+		page = (u64 *)get_zeroed_page(gfp);
+		if (!page)
+			return NULL;
+		*pte = IOMMU_L2_PDE(virt_to_phys(page));
+	}
+
+	pte = IOMMU_PTE_PAGE(*pte);
+	pte = &pte[IOMMU_PTE_L1_INDEX(address)];
+
+	if (!IOMMU_PTE_PRESENT(*pte)) {
+		page = (u64 *)get_zeroed_page(gfp);
+		if (!page)
+			return NULL;
+		*pte = IOMMU_L1_PDE(virt_to_phys(page));
+	}
+
+	pte = IOMMU_PTE_PAGE(*pte);
+
+	if (pte_page)
+		*pte_page = pte;
+
+	pte = &pte[IOMMU_PTE_L0_INDEX(address)];
+
+	return pte;
+}
+
+/*
+ * This function fetches the PTE for a given address in the aperture
+ */
+static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
+			    unsigned long address)
+{
+	struct aperture_range *aperture;
+	u64 *pte, *pte_page;
+
+	aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
+	if (!aperture)
+		return NULL;
+
+	pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
+	if (!pte) {
+		pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC);
+		aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
+	} else
+		pte += IOMMU_PTE_L0_INDEX(address);
+
+	return pte;
+}
+
+/*
  * This is the generic map function. It maps one 4kb page at paddr to
  * the given address in the DMA address space for the domain.
  */
@@ -1159,8 +1352,9 @@
 
 	paddr &= PAGE_MASK;
 
-	pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
-	pte += IOMMU_PTE_L0_INDEX(address);
+	pte  = dma_ops_get_pte(dom, address);
+	if (!pte)
+		return bad_dma_address;
 
 	__pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
 
@@ -1185,14 +1379,20 @@
 				 struct dma_ops_domain *dom,
 				 unsigned long address)
 {
+	struct aperture_range *aperture;
 	u64 *pte;
 
 	if (address >= dom->aperture_size)
 		return;
 
-	WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size);
+	aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
+	if (!aperture)
+		return;
 
-	pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
+	pte  = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
+	if (!pte)
+		return;
+
 	pte += IOMMU_PTE_L0_INDEX(address);
 
 	WARN_ON(!*pte);
@@ -1216,7 +1416,7 @@
 			       u64 dma_mask)
 {
 	dma_addr_t offset = paddr & ~PAGE_MASK;
-	dma_addr_t address, start;
+	dma_addr_t address, start, ret;
 	unsigned int pages;
 	unsigned long align_mask = 0;
 	int i;
@@ -1232,14 +1432,33 @@
 	if (align)
 		align_mask = (1UL << get_order(size)) - 1;
 
+retry:
 	address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
 					  dma_mask);
-	if (unlikely(address == bad_dma_address))
-		goto out;
+	if (unlikely(address == bad_dma_address)) {
+		/*
+		 * setting next_address here will let the address
+		 * allocator only scan the new allocated range in the
+		 * first run. This is a small optimization.
+		 */
+		dma_dom->next_address = dma_dom->aperture_size;
+
+		if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC))
+			goto out;
+
+		/*
+		 * aperture was sucessfully enlarged by 128 MB, try
+		 * allocation again
+		 */
+		goto retry;
+	}
 
 	start = address;
 	for (i = 0; i < pages; ++i) {
-		dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
+		ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
+		if (ret == bad_dma_address)
+			goto out_unmap;
+
 		paddr += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
@@ -1255,6 +1474,17 @@
 
 out:
 	return address;
+
+out_unmap:
+
+	for (--i; i >= 0; --i) {
+		start -= PAGE_SIZE;
+		dma_ops_domain_unmap(iommu, dma_dom, start);
+	}
+
+	dma_ops_free_addresses(dma_dom, address, pages);
+
+	return bad_dma_address;
 }
 
 /*
@@ -1537,8 +1767,10 @@
 	*dma_addr = __map_single(dev, iommu, domain->priv, paddr,
 				 size, DMA_BIDIRECTIONAL, true, dma_mask);
 
-	if (*dma_addr == bad_dma_address)
+	if (*dma_addr == bad_dma_address) {
+		spin_unlock_irqrestore(&domain->lock, flags);
 		goto out_free;
+	}
 
 	iommu_completion_wait(iommu);
 
@@ -1625,7 +1857,6 @@
 	struct pci_dev *dev = NULL;
 	struct dma_ops_domain *dma_dom;
 	struct amd_iommu *iommu;
-	int order = amd_iommu_aperture_order;
 	u16 devid;
 
 	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
@@ -1638,7 +1869,7 @@
 		iommu = amd_iommu_rlookup_table[devid];
 		if (!iommu)
 			continue;
-		dma_dom = dma_ops_domain_alloc(iommu, order);
+		dma_dom = dma_ops_domain_alloc(iommu);
 		if (!dma_dom)
 			continue;
 		init_unity_mappings_for_device(dma_dom, devid);
@@ -1664,7 +1895,6 @@
 int __init amd_iommu_init_dma_ops(void)
 {
 	struct amd_iommu *iommu;
-	int order = amd_iommu_aperture_order;
 	int ret;
 
 	/*
@@ -1672,8 +1902,8 @@
 	 * found in the system. Devices not assigned to any other
 	 * protection domain will be assigned to the default one.
 	 */
-	list_for_each_entry(iommu, &amd_iommu_list, list) {
-		iommu->default_dom = dma_ops_domain_alloc(iommu, order);
+	for_each_iommu(iommu) {
+		iommu->default_dom = dma_ops_domain_alloc(iommu);
 		if (iommu->default_dom == NULL)
 			return -ENOMEM;
 		iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
@@ -1710,7 +1940,7 @@
 
 free_domains:
 
-	list_for_each_entry(iommu, &amd_iommu_list, list) {
+	for_each_iommu(iommu) {
 		if (iommu->default_dom)
 			dma_ops_domain_free(iommu->default_dom);
 	}
@@ -1842,7 +2072,7 @@
 
 	old_domain = domain_for_device(devid);
 	if (old_domain)
-		return -EBUSY;
+		detach_device(old_domain, devid);
 
 	attach_device(iommu, domain, devid);
 
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 8c0be09..238989e 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -115,15 +115,21 @@
 	u64 range_length;
 } __attribute__((packed));
 
+bool amd_iommu_dump;
+
 static int __initdata amd_iommu_detected;
 
 u16 amd_iommu_last_bdf;			/* largest PCI device id we have
 					   to handle */
 LIST_HEAD(amd_iommu_unity_map);		/* a list of required unity mappings
 					   we find in ACPI */
-unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
+#ifdef CONFIG_IOMMU_STRESS
+bool amd_iommu_isolate = false;
+#else
 bool amd_iommu_isolate = true;		/* if true, device isolation is
 					   enabled */
+#endif
+
 bool amd_iommu_unmap_flush;		/* if true, flush on every unmap */
 
 LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
@@ -175,7 +181,7 @@
 static inline unsigned long tbl_size(int entry_size)
 {
 	unsigned shift = PAGE_SHIFT +
-			 get_order(amd_iommu_last_bdf * entry_size);
+			 get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
 
 	return 1UL << shift;
 }
@@ -193,7 +199,7 @@
  * This function set the exclusion range in the IOMMU. DMA accesses to the
  * exclusion range are passed through untranslated
  */
-static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
+static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 {
 	u64 start = iommu->exclusion_start & PAGE_MASK;
 	u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
@@ -225,7 +231,7 @@
 }
 
 /* Generic functions to enable/disable certain features of the IOMMU. */
-static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
+static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
 {
 	u32 ctrl;
 
@@ -244,7 +250,7 @@
 }
 
 /* Function to enable the hardware */
-static void __init iommu_enable(struct amd_iommu *iommu)
+static void iommu_enable(struct amd_iommu *iommu)
 {
 	printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
 	       dev_name(&iommu->dev->dev), iommu->cap_ptr);
@@ -252,11 +258,9 @@
 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 }
 
-/* Function to enable IOMMU event logging and event interrupts */
-static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
+static void iommu_disable(struct amd_iommu *iommu)
 {
-	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
-	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+	iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
 }
 
 /*
@@ -413,25 +417,36 @@
 {
 	u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 			get_order(CMD_BUFFER_SIZE));
-	u64 entry;
 
 	if (cmd_buf == NULL)
 		return NULL;
 
 	iommu->cmd_buf_size = CMD_BUFFER_SIZE;
 
-	entry = (u64)virt_to_phys(cmd_buf);
+	return cmd_buf;
+}
+
+/*
+ * This function writes the command buffer address to the hardware and
+ * enables it.
+ */
+static void iommu_enable_command_buffer(struct amd_iommu *iommu)
+{
+	u64 entry;
+
+	BUG_ON(iommu->cmd_buf == NULL);
+
+	entry = (u64)virt_to_phys(iommu->cmd_buf);
 	entry |= MMIO_CMD_SIZE_512;
+
 	memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
-			&entry, sizeof(entry));
+		    &entry, sizeof(entry));
 
 	/* set head and tail to zero manually */
 	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
 	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 
 	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
-
-	return cmd_buf;
 }
 
 static void __init free_command_buffer(struct amd_iommu *iommu)
@@ -443,20 +458,27 @@
 /* allocates the memory where the IOMMU will log its events to */
 static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
 {
-	u64 entry;
 	iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 						get_order(EVT_BUFFER_SIZE));
 
 	if (iommu->evt_buf == NULL)
 		return NULL;
 
+	return iommu->evt_buf;
+}
+
+static void iommu_enable_event_buffer(struct amd_iommu *iommu)
+{
+	u64 entry;
+
+	BUG_ON(iommu->evt_buf == NULL);
+
 	entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
+
 	memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
 		    &entry, sizeof(entry));
 
-	iommu->evt_buf_size = EVT_BUFFER_SIZE;
-
-	return iommu->evt_buf;
+	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 }
 
 static void __init free_event_buffer(struct amd_iommu *iommu)
@@ -596,32 +618,83 @@
 	p += sizeof(struct ivhd_header);
 	end += h->length;
 
+
 	while (p < end) {
 		e = (struct ivhd_entry *)p;
 		switch (e->type) {
 		case IVHD_DEV_ALL:
+
+			DUMP_printk("  DEV_ALL\t\t\t first devid: %02x:%02x.%x"
+				    " last device %02x:%02x.%x flags: %02x\n",
+				    PCI_BUS(iommu->first_device),
+				    PCI_SLOT(iommu->first_device),
+				    PCI_FUNC(iommu->first_device),
+				    PCI_BUS(iommu->last_device),
+				    PCI_SLOT(iommu->last_device),
+				    PCI_FUNC(iommu->last_device),
+				    e->flags);
+
 			for (dev_i = iommu->first_device;
 					dev_i <= iommu->last_device; ++dev_i)
 				set_dev_entry_from_acpi(iommu, dev_i,
 							e->flags, 0);
 			break;
 		case IVHD_DEV_SELECT:
+
+			DUMP_printk("  DEV_SELECT\t\t\t devid: %02x:%02x.%x "
+				    "flags: %02x\n",
+				    PCI_BUS(e->devid),
+				    PCI_SLOT(e->devid),
+				    PCI_FUNC(e->devid),
+				    e->flags);
+
 			devid = e->devid;
 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
 			break;
 		case IVHD_DEV_SELECT_RANGE_START:
+
+			DUMP_printk("  DEV_SELECT_RANGE_START\t "
+				    "devid: %02x:%02x.%x flags: %02x\n",
+				    PCI_BUS(e->devid),
+				    PCI_SLOT(e->devid),
+				    PCI_FUNC(e->devid),
+				    e->flags);
+
 			devid_start = e->devid;
 			flags = e->flags;
 			ext_flags = 0;
 			alias = false;
 			break;
 		case IVHD_DEV_ALIAS:
+
+			DUMP_printk("  DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
+				    "flags: %02x devid_to: %02x:%02x.%x\n",
+				    PCI_BUS(e->devid),
+				    PCI_SLOT(e->devid),
+				    PCI_FUNC(e->devid),
+				    e->flags,
+				    PCI_BUS(e->ext >> 8),
+				    PCI_SLOT(e->ext >> 8),
+				    PCI_FUNC(e->ext >> 8));
+
 			devid = e->devid;
 			devid_to = e->ext >> 8;
-			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
+			set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
 			amd_iommu_alias_table[devid] = devid_to;
 			break;
 		case IVHD_DEV_ALIAS_RANGE:
+
+			DUMP_printk("  DEV_ALIAS_RANGE\t\t "
+				    "devid: %02x:%02x.%x flags: %02x "
+				    "devid_to: %02x:%02x.%x\n",
+				    PCI_BUS(e->devid),
+				    PCI_SLOT(e->devid),
+				    PCI_FUNC(e->devid),
+				    e->flags,
+				    PCI_BUS(e->ext >> 8),
+				    PCI_SLOT(e->ext >> 8),
+				    PCI_FUNC(e->ext >> 8));
+
 			devid_start = e->devid;
 			flags = e->flags;
 			devid_to = e->ext >> 8;
@@ -629,17 +702,39 @@
 			alias = true;
 			break;
 		case IVHD_DEV_EXT_SELECT:
+
+			DUMP_printk("  DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
+				    "flags: %02x ext: %08x\n",
+				    PCI_BUS(e->devid),
+				    PCI_SLOT(e->devid),
+				    PCI_FUNC(e->devid),
+				    e->flags, e->ext);
+
 			devid = e->devid;
 			set_dev_entry_from_acpi(iommu, devid, e->flags,
 						e->ext);
 			break;
 		case IVHD_DEV_EXT_SELECT_RANGE:
+
+			DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
+				    "%02x:%02x.%x flags: %02x ext: %08x\n",
+				    PCI_BUS(e->devid),
+				    PCI_SLOT(e->devid),
+				    PCI_FUNC(e->devid),
+				    e->flags, e->ext);
+
 			devid_start = e->devid;
 			flags = e->flags;
 			ext_flags = e->ext;
 			alias = false;
 			break;
 		case IVHD_DEV_RANGE_END:
+
+			DUMP_printk("  DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
+				    PCI_BUS(e->devid),
+				    PCI_SLOT(e->devid),
+				    PCI_FUNC(e->devid));
+
 			devid = e->devid;
 			for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
 				if (alias)
@@ -679,7 +774,7 @@
 {
 	struct amd_iommu *iommu, *next;
 
-	list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) {
+	for_each_iommu_safe(iommu, next) {
 		list_del(&iommu->list);
 		free_iommu_one(iommu);
 		kfree(iommu);
@@ -710,7 +805,6 @@
 	if (!iommu->mmio_base)
 		return -ENOMEM;
 
-	iommu_set_device_table(iommu);
 	iommu->cmd_buf = alloc_command_buffer(iommu);
 	if (!iommu->cmd_buf)
 		return -ENOMEM;
@@ -746,6 +840,15 @@
 		h = (struct ivhd_header *)p;
 		switch (*p) {
 		case ACPI_IVHD_TYPE:
+
+			DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x "
+				    "seg: %d flags: %01x info %04x\n",
+				    PCI_BUS(h->devid), PCI_SLOT(h->devid),
+				    PCI_FUNC(h->devid), h->cap_ptr,
+				    h->pci_seg, h->flags, h->info);
+			DUMP_printk("       mmio-addr: %016llx\n",
+				    h->mmio_phys);
+
 			iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
 			if (iommu == NULL)
 				return -ENOMEM;
@@ -773,56 +876,9 @@
  *
  ****************************************************************************/
 
-static int __init iommu_setup_msix(struct amd_iommu *iommu)
-{
-	struct amd_iommu *curr;
-	struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */
-	int nvec = 0, i;
-
-	list_for_each_entry(curr, &amd_iommu_list, list) {
-		if (curr->dev == iommu->dev) {
-			entries[nvec].entry = curr->evt_msi_num;
-			entries[nvec].vector = 0;
-			curr->int_enabled = true;
-			nvec++;
-		}
-	}
-
-	if (pci_enable_msix(iommu->dev, entries, nvec)) {
-		pci_disable_msix(iommu->dev);
-		return 1;
-	}
-
-	for (i = 0; i < nvec; ++i) {
-		int r = request_irq(entries->vector, amd_iommu_int_handler,
-				    IRQF_SAMPLE_RANDOM,
-				    "AMD IOMMU",
-				    NULL);
-		if (r)
-			goto out_free;
-	}
-
-	return 0;
-
-out_free:
-	for (i -= 1; i >= 0; --i)
-		free_irq(entries->vector, NULL);
-
-	pci_disable_msix(iommu->dev);
-
-	return 1;
-}
-
 static int __init iommu_setup_msi(struct amd_iommu *iommu)
 {
 	int r;
-	struct amd_iommu *curr;
-
-	list_for_each_entry(curr, &amd_iommu_list, list) {
-		if (curr->dev == iommu->dev)
-			curr->int_enabled = true;
-	}
-
 
 	if (pci_enable_msi(iommu->dev))
 		return 1;
@@ -837,17 +893,18 @@
 		return 1;
 	}
 
+	iommu->int_enabled = true;
+	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+
 	return 0;
 }
 
-static int __init iommu_init_msi(struct amd_iommu *iommu)
+static int iommu_init_msi(struct amd_iommu *iommu)
 {
 	if (iommu->int_enabled)
 		return 0;
 
-	if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX))
-		return iommu_setup_msix(iommu);
-	else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
+	if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
 		return iommu_setup_msi(iommu);
 
 	return 1;
@@ -899,6 +956,7 @@
 static int __init init_unity_map_range(struct ivmd_header *m)
 {
 	struct unity_map_entry *e = 0;
+	char *s;
 
 	e = kzalloc(sizeof(*e), GFP_KERNEL);
 	if (e == NULL)
@@ -906,14 +964,19 @@
 
 	switch (m->type) {
 	default:
+		kfree(e);
+		return 0;
 	case ACPI_IVMD_TYPE:
+		s = "IVMD_TYPEi\t\t\t";
 		e->devid_start = e->devid_end = m->devid;
 		break;
 	case ACPI_IVMD_TYPE_ALL:
+		s = "IVMD_TYPE_ALL\t\t";
 		e->devid_start = 0;
 		e->devid_end = amd_iommu_last_bdf;
 		break;
 	case ACPI_IVMD_TYPE_RANGE:
+		s = "IVMD_TYPE_RANGE\t\t";
 		e->devid_start = m->devid;
 		e->devid_end = m->aux;
 		break;
@@ -922,6 +985,13 @@
 	e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
 	e->prot = m->flags >> 1;
 
+	DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
+		    " range_start: %016llx range_end: %016llx flags: %x\n", s,
+		    PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start),
+		    PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end),
+		    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
+		    e->address_start, e->address_end, m->flags);
+
 	list_add_tail(&e->list, &amd_iommu_unity_map);
 
 	return 0;
@@ -967,18 +1037,28 @@
  * This function finally enables all IOMMUs found in the system after
  * they have been initialized
  */
-static void __init enable_iommus(void)
+static void enable_iommus(void)
 {
 	struct amd_iommu *iommu;
 
-	list_for_each_entry(iommu, &amd_iommu_list, list) {
+	for_each_iommu(iommu) {
+		iommu_set_device_table(iommu);
+		iommu_enable_command_buffer(iommu);
+		iommu_enable_event_buffer(iommu);
 		iommu_set_exclusion_range(iommu);
 		iommu_init_msi(iommu);
-		iommu_enable_event_logging(iommu);
 		iommu_enable(iommu);
 	}
 }
 
+static void disable_iommus(void)
+{
+	struct amd_iommu *iommu;
+
+	for_each_iommu(iommu)
+		iommu_disable(iommu);
+}
+
 /*
  * Suspend/Resume support
  * disable suspend until real resume implemented
@@ -986,12 +1066,31 @@
 
 static int amd_iommu_resume(struct sys_device *dev)
 {
+	/*
+	 * Disable IOMMUs before reprogramming the hardware registers.
+	 * IOMMU is still enabled from the resume kernel.
+	 */
+	disable_iommus();
+
+	/* re-load the hardware */
+	enable_iommus();
+
+	/*
+	 * we have to flush after the IOMMUs are enabled because a
+	 * disabled IOMMU will never execute the commands we send
+	 */
+	amd_iommu_flush_all_domains();
+	amd_iommu_flush_all_devices();
+
 	return 0;
 }
 
 static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
 {
-	return -EINVAL;
+	/* disable IOMMUs to go out of the way for BIOS */
+	disable_iommus();
+
+	return 0;
 }
 
 static struct sysdev_class amd_iommu_sysdev_class = {
@@ -1137,9 +1236,6 @@
 
 	enable_iommus();
 
-	printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
-			(1 << (amd_iommu_aperture_order-20)));
-
 	printk(KERN_INFO "AMD IOMMU: device isolation ");
 	if (amd_iommu_isolate)
 		printk("enabled\n");
@@ -1211,6 +1307,13 @@
  *
  ****************************************************************************/
 
+static int __init parse_amd_iommu_dump(char *str)
+{
+	amd_iommu_dump = true;
+
+	return 1;
+}
+
 static int __init parse_amd_iommu_options(char *str)
 {
 	for (; *str; ++str) {
@@ -1225,15 +1328,5 @@
 	return 1;
 }
 
-static int __init parse_amd_iommu_size_options(char *str)
-{
-	unsigned order = PAGE_SHIFT + get_order(memparse(str, &str));
-
-	if ((order > 24) && (order < 31))
-		amd_iommu_aperture_order = order;
-
-	return 1;
-}
-
+__setup("amd_iommu_dump", parse_amd_iommu_dump);
 __setup("amd_iommu=", parse_amd_iommu_options);
-__setup("amd_iommu_size=", parse_amd_iommu_size_options);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f287092..076d388 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -14,6 +14,7 @@
  *	Mikael Pettersson	:	PM converted to driver model.
  */
 
+#include <linux/perf_counter.h>
 #include <linux/kernel_stat.h>
 #include <linux/mc146818rtc.h>
 #include <linux/acpi_pmtmr.h>
@@ -34,6 +35,7 @@
 #include <linux/smp.h>
 #include <linux/mm.h>
 
+#include <asm/perf_counter.h>
 #include <asm/pgalloc.h>
 #include <asm/atomic.h>
 #include <asm/mpspec.h>
@@ -98,6 +100,29 @@
 /* Local APIC was disabled by the BIOS and enabled by the kernel */
 static int enabled_via_apicbase;
 
+/*
+ * Handle interrupt mode configuration register (IMCR).
+ * This register controls whether the interrupt signals
+ * that reach the BSP come from the master PIC or from the
+ * local APIC. Before entering Symmetric I/O Mode, either
+ * the BIOS or the operating system must switch out of
+ * PIC Mode by changing the IMCR.
+ */
+static inline void imcr_pic_to_apic(void)
+{
+	/* select IMCR register */
+	outb(0x70, 0x22);
+	/* NMI and 8259 INTR go through APIC */
+	outb(0x01, 0x23);
+}
+
+static inline void imcr_apic_to_pic(void)
+{
+	/* select IMCR register */
+	outb(0x70, 0x22);
+	/* NMI and 8259 INTR go directly to BSP */
+	outb(0x00, 0x23);
+}
 #endif
 
 #ifdef CONFIG_X86_64
@@ -111,13 +136,19 @@
 __setup("apicpmtimer", setup_apicpmtimer);
 #endif
 
+int x2apic_mode;
 #ifdef CONFIG_X86_X2APIC
-int x2apic;
 /* x2apic enabled before OS handover */
 static int x2apic_preenabled;
 static int disable_x2apic;
 static __init int setup_nox2apic(char *str)
 {
+	if (x2apic_enabled()) {
+		pr_warning("Bios already enabled x2apic, "
+			   "can't enforce nox2apic");
+		return 0;
+	}
+
 	disable_x2apic = 1;
 	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
 	return 0;
@@ -209,6 +240,31 @@
 	return lapic_get_version() >= 0x14;
 }
 
+/*
+ * bare function to substitute write operation
+ * and it's _that_ fast :)
+ */
+static void native_apic_write_dummy(u32 reg, u32 v)
+{
+	WARN_ON_ONCE((cpu_has_apic || !disable_apic));
+}
+
+static u32 native_apic_read_dummy(u32 reg)
+{
+	WARN_ON_ONCE((cpu_has_apic && !disable_apic));
+	return 0;
+}
+
+/*
+ * right after this call apic->write/read doesn't do anything
+ * note that there is no restore operation it works one way
+ */
+void apic_disable(void)
+{
+	apic->read = native_apic_read_dummy;
+	apic->write = native_apic_write_dummy;
+}
+
 void native_apic_wait_icr_idle(void)
 {
 	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
@@ -348,7 +404,7 @@
 
 static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
 {
-	unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
+	unsigned long reg = (lvt_off << 4) + APIC_EILVTn(0);
 	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
 
 	apic_write(reg, v);
@@ -815,7 +871,7 @@
 	u32 v;
 
 	/* APIC hasn't been mapped yet */
-	if (!x2apic && !apic_phys)
+	if (!x2apic_mode && !apic_phys)
 		return;
 
 	maxlvt = lapic_get_maxlvt();
@@ -1133,6 +1189,7 @@
 		apic_write(APIC_ESR, 0);
 	}
 #endif
+	perf_counters_lapic_init();
 
 	preempt_disable();
 
@@ -1287,7 +1344,7 @@
 {
 	if (x2apic_enabled()) {
 		pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
-		x2apic_preenabled = x2apic = 1;
+		x2apic_preenabled = x2apic_mode = 1;
 	}
 }
 
@@ -1295,7 +1352,7 @@
 {
 	int msr, msr2;
 
-	if (!x2apic)
+	if (!x2apic_mode)
 		return;
 
 	rdmsr(MSR_IA32_APICBASE, msr, msr2);
@@ -1304,6 +1361,7 @@
 		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
 	}
 }
+#endif /* CONFIG_X86_X2APIC */
 
 void __init enable_IR_x2apic(void)
 {
@@ -1312,32 +1370,21 @@
 	unsigned long flags;
 	struct IO_APIC_route_entry **ioapic_entries = NULL;
 
-	if (!cpu_has_x2apic)
-		return;
-
-	if (!x2apic_preenabled && disable_x2apic) {
-		pr_info("Skipped enabling x2apic and Interrupt-remapping "
-			"because of nox2apic\n");
-		return;
-	}
-
-	if (x2apic_preenabled && disable_x2apic)
-		panic("Bios already enabled x2apic, can't enforce nox2apic");
-
-	if (!x2apic_preenabled && skip_ioapic_setup) {
-		pr_info("Skipped enabling x2apic and Interrupt-remapping "
-			"because of skipping io-apic setup\n");
-		return;
-	}
-
 	ret = dmar_table_init();
 	if (ret) {
-		pr_info("dmar_table_init() failed with %d:\n", ret);
+		pr_debug("dmar_table_init() failed with %d:\n", ret);
+		goto ir_failed;
+	}
 
-		if (x2apic_preenabled)
-			panic("x2apic enabled by bios. But IR enabling failed");
-		else
-			pr_info("Not enabling x2apic,Intr-remapping\n");
+	if (!intr_remapping_supported()) {
+		pr_debug("intr-remapping not supported\n");
+		goto ir_failed;
+	}
+
+
+	if (!x2apic_preenabled && skip_ioapic_setup) {
+		pr_info("Skipped enabling intr-remap because of skipping "
+			"io-apic setup\n");
 		return;
 	}
 
@@ -1357,19 +1404,16 @@
 	mask_IO_APIC_setup(ioapic_entries);
 	mask_8259A();
 
-	ret = enable_intr_remapping(EIM_32BIT_APIC_ID);
-
-	if (ret && x2apic_preenabled) {
-		local_irq_restore(flags);
-		panic("x2apic enabled by bios. But IR enabling failed");
-	}
-
+	ret = enable_intr_remapping(x2apic_supported());
 	if (ret)
 		goto end_restore;
 
-	if (!x2apic) {
-		x2apic = 1;
+	pr_info("Enabled Interrupt-remapping\n");
+
+	if (x2apic_supported() && !x2apic_mode) {
+		x2apic_mode = 1;
 		enable_x2apic();
+		pr_info("Enabled x2apic\n");
 	}
 
 end_restore:
@@ -1378,37 +1422,34 @@
 		 * IR enabling failed
 		 */
 		restore_IO_APIC_setup(ioapic_entries);
-	else
-		reinit_intr_remapped_IO_APIC(x2apic_preenabled, ioapic_entries);
 
 	unmask_8259A();
 	local_irq_restore(flags);
 
 end:
-	if (!ret) {
-		if (!x2apic_preenabled)
-			pr_info("Enabled x2apic and interrupt-remapping\n");
-		else
-			pr_info("Enabled Interrupt-remapping\n");
-	} else
-		pr_err("Failed to enable Interrupt-remapping and x2apic\n");
 	if (ioapic_entries)
 		free_ioapic_entries(ioapic_entries);
+
+	if (!ret)
+		return;
+
+ir_failed:
+	if (x2apic_preenabled)
+		panic("x2apic enabled by bios. But IR enabling failed");
+	else if (cpu_has_x2apic)
+		pr_info("Not enabling x2apic,Intr-remapping\n");
 #else
 	if (!cpu_has_x2apic)
 		return;
 
 	if (x2apic_preenabled)
 		panic("x2apic enabled prior OS handover,"
-		      " enable CONFIG_INTR_REMAP");
-
-	pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
-		" and x2apic\n");
+		      " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP");
 #endif
 
 	return;
 }
-#endif /* CONFIG_X86_X2APIC */
+
 
 #ifdef CONFIG_X86_64
 /*
@@ -1425,7 +1466,6 @@
 	}
 
 	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-	boot_cpu_physical_apicid = 0;
 	return 0;
 }
 #else
@@ -1539,32 +1579,49 @@
  */
 void __init init_apic_mappings(void)
 {
-	if (x2apic) {
+	unsigned int new_apicid;
+
+	if (x2apic_mode) {
 		boot_cpu_physical_apicid = read_apic_id();
 		return;
 	}
 
-	/*
-	 * If no local APIC can be found then set up a fake all
-	 * zeroes page to simulate the local APIC and another
-	 * one for the IO-APIC.
-	 */
+	/* If no local APIC can be found return early */
 	if (!smp_found_config && detect_init_APIC()) {
-		apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
-		apic_phys = __pa(apic_phys);
-	} else
+		/* lets NOP'ify apic operations */
+		pr_info("APIC: disable apic facility\n");
+		apic_disable();
+	} else {
 		apic_phys = mp_lapic_addr;
 
-	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-	apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
-				APIC_BASE, apic_phys);
+		/*
+		 * acpi lapic path already maps that address in
+		 * acpi_register_lapic_address()
+		 */
+		if (!acpi_lapic)
+			set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+
+		apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
+					APIC_BASE, apic_phys);
+	}
 
 	/*
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
 	 */
-	if (boot_cpu_physical_apicid == -1U)
-		boot_cpu_physical_apicid = read_apic_id();
+	new_apicid = read_apic_id();
+	if (boot_cpu_physical_apicid != new_apicid) {
+		boot_cpu_physical_apicid = new_apicid;
+		/*
+		 * yeah -- we lie about apic_version
+		 * in case if apic was disabled via boot option
+		 * but it's not a problem for SMP compiled kernel
+		 * since smp_sanity_check is prepared for such a case
+		 * and disable smp mode
+		 */
+		apic_version[new_apicid] =
+			 GET_APIC_VERSION(apic_read(APIC_LVR));
+	}
 }
 
 /*
@@ -1733,8 +1790,7 @@
 		 */
 		apic_printk(APIC_VERBOSE, "leaving PIC mode, "
 				"enabling APIC mode.\n");
-		outb(0x70, 0x22);
-		outb(0x01, 0x23);
+		imcr_pic_to_apic();
 	}
 #endif
 	if (apic->enable_apic_mode)
@@ -1762,8 +1818,7 @@
 		 */
 		apic_printk(APIC_VERBOSE, "disabling APIC mode, "
 				"entering PIC mode.\n");
-		outb(0x70, 0x22);
-		outb(0x00, 0x23);
+		imcr_apic_to_pic();
 		return;
 	}
 #endif
@@ -1969,10 +2024,10 @@
 
 	local_irq_save(flags);
 	disable_local_APIC();
-#ifdef CONFIG_INTR_REMAP
+
 	if (intr_remapping_enabled)
 		disable_intr_remapping();
-#endif
+
 	local_irq_restore(flags);
 	return 0;
 }
@@ -1982,42 +2037,34 @@
 	unsigned int l, h;
 	unsigned long flags;
 	int maxlvt;
-
-#ifdef CONFIG_INTR_REMAP
-	int ret;
+	int ret = 0;
 	struct IO_APIC_route_entry **ioapic_entries = NULL;
 
 	if (!apic_pm_state.active)
 		return 0;
 
 	local_irq_save(flags);
-	if (x2apic) {
+	if (intr_remapping_enabled) {
 		ioapic_entries = alloc_ioapic_entries();
 		if (!ioapic_entries) {
 			WARN(1, "Alloc ioapic_entries in lapic resume failed.");
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto restore;
 		}
 
 		ret = save_IO_APIC_setup(ioapic_entries);
 		if (ret) {
 			WARN(1, "Saving IO-APIC state failed: %d\n", ret);
 			free_ioapic_entries(ioapic_entries);
-			return ret;
+			goto restore;
 		}
 
 		mask_IO_APIC_setup(ioapic_entries);
 		mask_8259A();
-		enable_x2apic();
 	}
-#else
-	if (!apic_pm_state.active)
-		return 0;
 
-	local_irq_save(flags);
-	if (x2apic)
+	if (x2apic_mode)
 		enable_x2apic();
-#endif
-
 	else {
 		/*
 		 * Make sure the APICBASE points to the right address
@@ -2055,21 +2102,16 @@
 	apic_write(APIC_ESR, 0);
 	apic_read(APIC_ESR);
 
-#ifdef CONFIG_INTR_REMAP
-	if (intr_remapping_enabled)
-		reenable_intr_remapping(EIM_32BIT_APIC_ID);
-
-	if (x2apic) {
+	if (intr_remapping_enabled) {
+		reenable_intr_remapping(x2apic_mode);
 		unmask_8259A();
 		restore_IO_APIC_setup(ioapic_entries);
 		free_ioapic_entries(ioapic_entries);
 	}
-#endif
-
+restore:
 	local_irq_restore(flags);
 
-
-	return 0;
+	return ret;
 }
 
 /*
@@ -2117,31 +2159,14 @@
 #endif	/* CONFIG_PM */
 
 #ifdef CONFIG_X86_64
-/*
- * apic_is_clustered_box() -- Check if we can expect good TSC
- *
- * Thus far, the major user of this is IBM's Summit2 series:
- *
- * Clustered boxes may have unsynced TSC problems if they are
- * multi-chassis. Use available data to take a good guess.
- * If in doubt, go HPET.
- */
-__cpuinit int apic_is_clustered_box(void)
+
+static int __cpuinit apic_cluster_num(void)
 {
 	int i, clusters, zeros;
 	unsigned id;
 	u16 *bios_cpu_apicid;
 	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
 
-	/*
-	 * there is not this kind of box with AMD CPU yet.
-	 * Some AMD box with quadcore cpu and 8 sockets apicid
-	 * will be [4, 0x23] or [8, 0x27] could be thought to
-	 * vsmp box still need checking...
-	 */
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
-		return 0;
-
 	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
 	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
 
@@ -2177,18 +2202,67 @@
 			++zeros;
 	}
 
-	/* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
-	 * not guaranteed to be synced between boards
-	 */
-	if (is_vsmp_box() && clusters > 1)
+	return clusters;
+}
+
+static int __cpuinitdata multi_checked;
+static int __cpuinitdata multi;
+
+static int __cpuinit set_multi(const struct dmi_system_id *d)
+{
+	if (multi)
+		return 0;
+	pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
+	multi = 1;
+	return 0;
+}
+
+static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = {
+	{
+		.callback = set_multi,
+		.ident = "IBM System Summit2",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
+		},
+	},
+	{}
+};
+
+static void __cpuinit dmi_check_multi(void)
+{
+	if (multi_checked)
+		return;
+
+	dmi_check_system(multi_dmi_table);
+	multi_checked = 1;
+}
+
+/*
+ * apic_is_clustered_box() -- Check if we can expect good TSC
+ *
+ * Thus far, the major user of this is IBM's Summit2 series:
+ * Clustered boxes may have unsynced TSC problems if they are
+ * multi-chassis.
+ * Use DMI to check them
+ */
+__cpuinit int apic_is_clustered_box(void)
+{
+	dmi_check_multi();
+	if (multi)
 		return 1;
 
+	if (!is_vsmp_box())
+		return 0;
+
 	/*
-	 * If clusters > 2, then should be multi-chassis.
-	 * May have to revisit this when multi-core + hyperthreaded CPUs come
-	 * out, but AFAIK this will work even for them.
+	 * ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+	 * not guaranteed to be synced between boards
 	 */
-	return (clusters > 2);
+	if (apic_cluster_num() > 1)
+		return 1;
+
+	return 0;
 }
 #endif
 
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 306e5e8..d0c99ab 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -161,7 +161,7 @@
 
 static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
 {
-	return hard_smp_processor_id() >> index_msb;
+	return initial_apic_id >> index_msb;
 }
 
 struct apic apic_flat =  {
@@ -235,7 +235,7 @@
 	 * regardless of how many processors are present (x86_64 ES7000
 	 * is an example).
 	 */
-	if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
+	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
 		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
 		printk(KERN_DEBUG "system APIC only can use physical flat");
 		return 1;
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 3029477..69328ac 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -145,7 +145,7 @@
 	return gsi;
 }
 
-static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
+static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
 {
 	unsigned long vect = 0, psaival = 0;
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 30da617..ef8d929 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -59,6 +59,7 @@
 #include <asm/setup.h>
 #include <asm/irq_remapping.h>
 #include <asm/hpet.h>
+#include <asm/hw_irq.h>
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/uv_irq.h>
 
@@ -129,12 +130,9 @@
 	struct irq_pin_list *next;
 };
 
-static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+static struct irq_pin_list *get_one_free_irq_2_pin(int node)
 {
 	struct irq_pin_list *pin;
-	int node;
-
-	node = cpu_to_node(cpu);
 
 	pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
 
@@ -148,9 +146,6 @@
 	unsigned move_cleanup_count;
 	u8 vector;
 	u8 move_in_progress : 1;
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-	u8 move_desc_pending : 1;
-#endif
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -182,16 +177,18 @@
 	struct irq_cfg *cfg;
 	struct irq_desc *desc;
 	int count;
+	int node;
 	int i;
 
 	cfg = irq_cfgx;
 	count = ARRAY_SIZE(irq_cfgx);
+	node= cpu_to_node(boot_cpu_id);
 
 	for (i = 0; i < count; i++) {
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
-		alloc_bootmem_cpumask_var(&cfg[i].domain);
-		alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+		zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
+		zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
 		if (i < NR_IRQS_LEGACY)
 			cpumask_setall(cfg[i].domain);
 	}
@@ -212,12 +209,9 @@
 	return cfg;
 }
 
-static struct irq_cfg *get_one_free_irq_cfg(int cpu)
+static struct irq_cfg *get_one_free_irq_cfg(int node)
 {
 	struct irq_cfg *cfg;
-	int node;
-
-	node = cpu_to_node(cpu);
 
 	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
 	if (cfg) {
@@ -238,13 +232,13 @@
 	return cfg;
 }
 
-int arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int node)
 {
 	struct irq_cfg *cfg;
 
 	cfg = desc->chip_data;
 	if (!cfg) {
-		desc->chip_data = get_one_free_irq_cfg(cpu);
+		desc->chip_data = get_one_free_irq_cfg(node);
 		if (!desc->chip_data) {
 			printk(KERN_ERR "can not alloc irq_cfg\n");
 			BUG_ON(1);
@@ -254,10 +248,9 @@
 	return 0;
 }
 
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-
+/* for move_irq_desc */
 static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
 {
 	struct irq_pin_list *old_entry, *head, *tail, *entry;
 
@@ -266,7 +259,7 @@
 	if (!old_entry)
 		return;
 
-	entry = get_one_free_irq_2_pin(cpu);
+	entry = get_one_free_irq_2_pin(node);
 	if (!entry)
 		return;
 
@@ -276,7 +269,7 @@
 	tail		= entry;
 	old_entry	= old_entry->next;
 	while (old_entry) {
-		entry = get_one_free_irq_2_pin(cpu);
+		entry = get_one_free_irq_2_pin(node);
 		if (!entry) {
 			entry = head;
 			while (entry) {
@@ -316,12 +309,12 @@
 }
 
 void arch_init_copy_chip_data(struct irq_desc *old_desc,
-				 struct irq_desc *desc, int cpu)
+				 struct irq_desc *desc, int node)
 {
 	struct irq_cfg *cfg;
 	struct irq_cfg *old_cfg;
 
-	cfg = get_one_free_irq_cfg(cpu);
+	cfg = get_one_free_irq_cfg(node);
 
 	if (!cfg)
 		return;
@@ -332,7 +325,7 @@
 
 	memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
 
-	init_copy_irq_2_pin(old_cfg, cfg, cpu);
+	init_copy_irq_2_pin(old_cfg, cfg, node);
 }
 
 static void free_irq_cfg(struct irq_cfg *old_cfg)
@@ -356,19 +349,7 @@
 		old_desc->chip_data = NULL;
 	}
 }
-
-static void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-	struct irq_cfg *cfg = desc->chip_data;
-
-	if (!cfg->move_in_progress) {
-		/* it means that domain is not changed */
-		if (!cpumask_intersects(desc->affinity, mask))
-			cfg->move_desc_pending = 1;
-	}
-}
-#endif
+/* end for move_irq_desc */
 
 #else
 static struct irq_cfg *irq_cfg(unsigned int irq)
@@ -378,13 +359,6 @@
 
 #endif
 
-#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
-static inline void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-}
-#endif
-
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -518,132 +492,18 @@
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-#ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
-{
-	cpumask_var_t cleanup_mask;
-
-	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-		unsigned int i;
-		cfg->move_cleanup_count = 0;
-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			cfg->move_cleanup_count++;
-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
-	} else {
-		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
-		cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
-		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		free_cpumask_var(cleanup_mask);
-	}
-	cfg->move_in_progress = 0;
-}
-
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
-{
-	int apic, pin;
-	struct irq_pin_list *entry;
-	u8 vector = cfg->vector;
-
-	entry = cfg->irq_2_pin;
-	for (;;) {
-		unsigned int reg;
-
-		if (!entry)
-			break;
-
-		apic = entry->apic;
-		pin = entry->pin;
-		/*
-		 * With interrupt-remapping, destination information comes
-		 * from interrupt-remapping table entry.
-		 */
-		if (!irq_remapped(irq))
-			io_apic_write(apic, 0x11 + pin*2, dest);
-		reg = io_apic_read(apic, 0x10 + pin*2);
-		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
-		reg |= vector;
-		io_apic_modify(apic, 0x10 + pin*2, reg);
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
-}
-
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
-/*
- * Either sets desc->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
- * leaves desc->affinity untouched.
- */
-static unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
-{
-	struct irq_cfg *cfg;
-	unsigned int irq;
-
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return BAD_APICID;
-
-	irq = desc->irq;
-	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return BAD_APICID;
-
-	/* check that before desc->addinity get updated */
-	set_extra_move_desc(desc, mask);
-
-	cpumask_copy(desc->affinity, mask);
-
-	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
-}
-
-static void
-set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int dest;
-	unsigned int irq;
-
-	irq = desc->irq;
-	cfg = desc->chip_data;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	dest = set_desc_affinity(desc, mask);
-	if (dest != BAD_APICID) {
-		/* Only the high 8 bits are valid. */
-		dest = SET_APIC_LOGICAL_ID(dest);
-		__target_IO_APIC_irq(irq, dest, cfg);
-	}
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void
-set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-
-	set_ioapic_affinity_irq_desc(desc, mask);
-}
-#endif /* CONFIG_SMP */
-
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 {
 	struct irq_pin_list *entry;
 
 	entry = cfg->irq_2_pin;
 	if (!entry) {
-		entry = get_one_free_irq_2_pin(cpu);
+		entry = get_one_free_irq_2_pin(node);
 		if (!entry) {
 			printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
 					apic, pin);
@@ -663,7 +523,7 @@
 		entry = entry->next;
 	}
 
-	entry->next = get_one_free_irq_2_pin(cpu);
+	entry->next = get_one_free_irq_2_pin(node);
 	entry = entry->next;
 	entry->apic = apic;
 	entry->pin = pin;
@@ -672,7 +532,7 @@
 /*
  * Reroute an IRQ to a different pin.
  */
-static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
+static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
@@ -692,7 +552,7 @@
 
 	/* why? call replace before add? */
 	if (!replaced)
-		add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
+		add_pin_to_irq_node(cfg, node, newapic, newpin);
 }
 
 static inline void io_apic_modify_irq(struct irq_cfg *cfg,
@@ -850,7 +710,6 @@
 __setup("pirq=", ioapic_pirq_setup);
 #endif /* CONFIG_X86_32 */
 
-#ifdef CONFIG_INTR_REMAP
 struct IO_APIC_route_entry **alloc_ioapic_entries(void)
 {
 	int apic;
@@ -948,20 +807,6 @@
 	return 0;
 }
 
-void reinit_intr_remapped_IO_APIC(int intr_remapping,
-	struct IO_APIC_route_entry **ioapic_entries)
-
-{
-	/*
-	 * for now plain restore of previous settings.
-	 * TBD: In the case of OS enabling interrupt-remapping,
-	 * IO-APIC RTE's need to be setup to point to interrupt-remapping
-	 * table entries. for now, do a plain restore, and wait for
-	 * the setup_IO_APIC_irqs() to do proper initialization.
-	 */
-	restore_IO_APIC_setup(ioapic_entries);
-}
-
 void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
 {
 	int apic;
@@ -971,7 +816,6 @@
 
 	kfree(ioapic_entries);
 }
-#endif
 
 /*
  * Find the IRQ entry number of a certain pin.
@@ -1032,54 +876,6 @@
 	return -1;
 }
 
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
-	int apic, i, best_guess = -1;
-
-	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
-		bus, slot, pin);
-	if (test_bit(bus, mp_bus_not_pci)) {
-		apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
-		return -1;
-	}
-	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].srcbus;
-
-		for (apic = 0; apic < nr_ioapics; apic++)
-			if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
-			    mp_irqs[i].dstapic == MP_APIC_ALL)
-				break;
-
-		if (!test_bit(lbus, mp_bus_not_pci) &&
-		    !mp_irqs[i].irqtype &&
-		    (bus == lbus) &&
-		    (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
-			int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
-
-			if (!(apic || IO_APIC_IRQ(irq)))
-				continue;
-
-			if (pin == (mp_irqs[i].srcbusirq & 3))
-				return irq;
-			/*
-			 * Use the first all-but-pin matching entry as a
-			 * best-guess fuzzy result for broken mptables.
-			 */
-			if (best_guess < 0)
-				best_guess = irq;
-		}
-	}
-	return best_guess;
-}
-
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 /*
  * EISA Edge/Level control register, ELCR
@@ -1298,6 +1094,64 @@
 	return irq;
 }
 
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
+				struct io_apic_irq_attr *irq_attr)
+{
+	int apic, i, best_guess = -1;
+
+	apic_printk(APIC_DEBUG,
+		    "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+		    bus, slot, pin);
+	if (test_bit(bus, mp_bus_not_pci)) {
+		apic_printk(APIC_VERBOSE,
+			    "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+		return -1;
+	}
+	for (i = 0; i < mp_irq_entries; i++) {
+		int lbus = mp_irqs[i].srcbus;
+
+		for (apic = 0; apic < nr_ioapics; apic++)
+			if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
+			    mp_irqs[i].dstapic == MP_APIC_ALL)
+				break;
+
+		if (!test_bit(lbus, mp_bus_not_pci) &&
+		    !mp_irqs[i].irqtype &&
+		    (bus == lbus) &&
+		    (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+			int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
+
+			if (!(apic || IO_APIC_IRQ(irq)))
+				continue;
+
+			if (pin == (mp_irqs[i].srcbusirq & 3)) {
+				set_io_apic_irq_attr(irq_attr, apic,
+						     mp_irqs[i].dstirq,
+						     irq_trigger(i),
+						     irq_polarity(i));
+				return irq;
+			}
+			/*
+			 * Use the first all-but-pin matching entry as a
+			 * best-guess fuzzy result for broken mptables.
+			 */
+			if (best_guess < 0) {
+				set_io_apic_irq_attr(irq_attr, apic,
+						     mp_irqs[i].dstirq,
+						     irq_trigger(i),
+						     irq_polarity(i));
+				best_guess = irq;
+			}
+		}
+	}
+	return best_guess;
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
 void lock_vector_lock(void)
 {
 	/* Used to the online set of cpus does not change
@@ -1628,58 +1482,70 @@
 	ioapic_write_entry(apic_id, pin, entry);
 }
 
+static struct {
+	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} mp_ioapic_routing[MAX_IO_APICS];
+
 static void __init setup_IO_APIC_irqs(void)
 {
-	int apic_id, pin, idx, irq;
+	int apic_id = 0, pin, idx, irq;
 	int notcon = 0;
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
-	int cpu = boot_cpu_id;
+	int node = cpu_to_node(boot_cpu_id);
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
-	for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
-		for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+#ifdef CONFIG_ACPI
+	if (!acpi_disabled && acpi_ioapic) {
+		apic_id = mp_find_ioapic(0);
+		if (apic_id < 0)
+			apic_id = 0;
+	}
+#endif
 
-			idx = find_irq_entry(apic_id, pin, mp_INT);
-			if (idx == -1) {
-				if (!notcon) {
-					notcon = 1;
-					apic_printk(APIC_VERBOSE,
-						KERN_DEBUG " %d-%d",
-						mp_ioapics[apic_id].apicid, pin);
-				} else
-					apic_printk(APIC_VERBOSE, " %d-%d",
-						mp_ioapics[apic_id].apicid, pin);
-				continue;
-			}
-			if (notcon) {
+	for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+		idx = find_irq_entry(apic_id, pin, mp_INT);
+		if (idx == -1) {
+			if (!notcon) {
+				notcon = 1;
 				apic_printk(APIC_VERBOSE,
-					" (apicid-pin) not connected\n");
-				notcon = 0;
-			}
-
-			irq = pin_2_irq(idx, apic_id, pin);
-
-			/*
-			 * Skip the timer IRQ if there's a quirk handler
-			 * installed and if it returns 1:
-			 */
-			if (apic->multi_timer_check &&
-					apic->multi_timer_check(apic_id, irq))
-				continue;
-
-			desc = irq_to_desc_alloc_cpu(irq, cpu);
-			if (!desc) {
-				printk(KERN_INFO "can not get irq_desc for %d\n", irq);
-				continue;
-			}
-			cfg = desc->chip_data;
-			add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
-
-			setup_IO_APIC_irq(apic_id, pin, irq, desc,
-					irq_trigger(idx), irq_polarity(idx));
+					KERN_DEBUG " %d-%d",
+					mp_ioapics[apic_id].apicid, pin);
+			} else
+				apic_printk(APIC_VERBOSE, " %d-%d",
+					mp_ioapics[apic_id].apicid, pin);
+			continue;
 		}
+		if (notcon) {
+			apic_printk(APIC_VERBOSE,
+				" (apicid-pin) not connected\n");
+			notcon = 0;
+		}
+
+		irq = pin_2_irq(idx, apic_id, pin);
+
+		/*
+		 * Skip the timer IRQ if there's a quirk handler
+		 * installed and if it returns 1:
+		 */
+		if (apic->multi_timer_check &&
+				apic->multi_timer_check(apic_id, irq))
+			continue;
+
+		desc = irq_to_desc_alloc_node(irq, node);
+		if (!desc) {
+			printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+			continue;
+		}
+		cfg = desc->chip_data;
+		add_pin_to_irq_node(cfg, node, apic_id, pin);
+		/*
+		 * don't mark it in pin_programmed, so later acpi could
+		 * set it correctly when irq < 16
+		 */
+		setup_IO_APIC_irq(apic_id, pin, irq, desc,
+				irq_trigger(idx), irq_polarity(idx));
 	}
 
 	if (notcon)
@@ -1869,7 +1735,7 @@
 
 __apicdebuginit(void) print_local_APIC(void *dummy)
 {
-	unsigned int v, ver, maxlvt;
+	unsigned int i, v, ver, maxlvt;
 	u64 icr;
 
 	if (apic_verbosity == APIC_QUIET)
@@ -1957,6 +1823,18 @@
 	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
 	v = apic_read(APIC_TDCR);
 	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+
+	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+		v = apic_read(APIC_EFEAT);
+		maxlvt = (v >> 16) & 0xff;
+		printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
+		v = apic_read(APIC_ECTRL);
+		printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
+		for (i = 0; i < maxlvt; i++) {
+			v = apic_read(APIC_EILVTn(i));
+			printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
+		}
+	}
 	printk("\n");
 }
 
@@ -2005,6 +1883,11 @@
 __apicdebuginit(int) print_all_ICs(void)
 {
 	print_PIC();
+
+	/* don't print out if apic is not there */
+	if (!cpu_has_apic || disable_apic)
+		return 0;
+
 	print_all_local_APICs();
 	print_IO_APIC();
 
@@ -2360,6 +2243,118 @@
  */
 
 #ifdef CONFIG_SMP
+static void send_cleanup_vector(struct irq_cfg *cfg)
+{
+	cpumask_var_t cleanup_mask;
+
+	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+		unsigned int i;
+		cfg->move_cleanup_count = 0;
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			cfg->move_cleanup_count++;
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+	} else {
+		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+		cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		free_cpumask_var(cleanup_mask);
+	}
+	cfg->move_in_progress = 0;
+}
+
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+{
+	int apic, pin;
+	struct irq_pin_list *entry;
+	u8 vector = cfg->vector;
+
+	entry = cfg->irq_2_pin;
+	for (;;) {
+		unsigned int reg;
+
+		if (!entry)
+			break;
+
+		apic = entry->apic;
+		pin = entry->pin;
+		/*
+		 * With interrupt-remapping, destination information comes
+		 * from interrupt-remapping table entry.
+		 */
+		if (!irq_remapped(irq))
+			io_apic_write(apic, 0x11 + pin*2, dest);
+		reg = io_apic_read(apic, 0x10 + pin*2);
+		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+		reg |= vector;
+		io_apic_modify(apic, 0x10 + pin*2, reg);
+		if (!entry->next)
+			break;
+		entry = entry->next;
+	}
+}
+
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
+
+/*
+ * Either sets desc->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
+ * leaves desc->affinity untouched.
+ */
+static unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+{
+	struct irq_cfg *cfg;
+	unsigned int irq;
+
+	if (!cpumask_intersects(mask, cpu_online_mask))
+		return BAD_APICID;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
+		return BAD_APICID;
+
+	cpumask_copy(desc->affinity, mask);
+
+	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+}
+
+static int
+set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int dest;
+	unsigned int irq;
+	int ret = -1;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	dest = set_desc_affinity(desc, mask);
+	if (dest != BAD_APICID) {
+		/* Only the high 8 bits are valid. */
+		dest = SET_APIC_LOGICAL_ID(dest);
+		__target_IO_APIC_irq(irq, dest, cfg);
+		ret = 0;
+	}
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return ret;
+}
+
+static int
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	return set_ioapic_affinity_irq_desc(desc, mask);
+}
 
 #ifdef CONFIG_INTR_REMAP
 
@@ -2374,26 +2369,25 @@
  * Real vector that is used for interrupting cpu will be coming from
  * the interrupt-remapping table entry.
  */
-static void
+static int
 migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	struct irte irte;
 	unsigned int dest;
 	unsigned int irq;
+	int ret = -1;
 
 	if (!cpumask_intersects(mask, cpu_online_mask))
-		return;
+		return ret;
 
 	irq = desc->irq;
 	if (get_irte(irq, &irte))
-		return;
+		return ret;
 
 	cfg = desc->chip_data;
 	if (assign_irq_vector(irq, cfg, mask))
-		return;
-
-	set_extra_move_desc(desc, mask);
+		return ret;
 
 	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
 
@@ -2409,27 +2403,30 @@
 		send_cleanup_vector(cfg);
 
 	cpumask_copy(desc->affinity, mask);
+
+	return 0;
 }
 
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
 					    const struct cpumask *mask)
 {
-	migrate_ioapic_irq_desc(desc, mask);
+	return migrate_ioapic_irq_desc(desc, mask);
 }
-static void set_ir_ioapic_affinity_irq(unsigned int irq,
+static int set_ir_ioapic_affinity_irq(unsigned int irq,
 				       const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
-	set_ir_ioapic_affinity_irq_desc(desc, mask);
+	return set_ir_ioapic_affinity_irq_desc(desc, mask);
 }
 #else
-static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
 						   const struct cpumask *mask)
 {
+	return 0;
 }
 #endif
 
@@ -2491,86 +2488,19 @@
 	struct irq_cfg *cfg = desc->chip_data;
 	unsigned vector, me;
 
-	if (likely(!cfg->move_in_progress)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-		if (likely(!cfg->move_desc_pending))
-			return;
-
-		/* domain has not changed, but affinity did */
-		me = smp_processor_id();
-		if (cpumask_test_cpu(me, desc->affinity)) {
-			*descp = desc = move_irq_desc(desc, me);
-			/* get the new one */
-			cfg = desc->chip_data;
-			cfg->move_desc_pending = 0;
-		}
-#endif
+	if (likely(!cfg->move_in_progress))
 		return;
-	}
 
 	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
 
-	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-		*descp = desc = move_irq_desc(desc, me);
-		/* get the new one */
-		cfg = desc->chip_data;
-#endif
+	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
 		send_cleanup_vector(cfg);
-	}
 }
 #else
 static inline void irq_complete_move(struct irq_desc **descp) {}
 #endif
 
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
-	int apic, pin;
-	struct irq_pin_list *entry;
-
-	entry = cfg->irq_2_pin;
-	for (;;) {
-
-		if (!entry)
-			break;
-
-		apic = entry->apic;
-		pin = entry->pin;
-		io_apic_eoi(apic, pin);
-		entry = entry->next;
-	}
-}
-
-static void
-eoi_ioapic_irq(struct irq_desc *desc)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int irq;
-
-	irq = desc->irq;
-	cfg = desc->chip_data;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__eoi_ioapic_irq(irq, cfg);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#ifdef CONFIG_X86_X2APIC
-static void ack_x2apic_level(unsigned int irq)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-	ack_x2APIC_irq();
-	eoi_ioapic_irq(desc);
-}
-
-static void ack_x2apic_edge(unsigned int irq)
-{
-	ack_x2APIC_irq();
-}
-#endif
-
 static void ack_apic_edge(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -2634,9 +2564,6 @@
 	 */
 	ack_APIC_irq();
 
-	if (irq_remapped(irq))
-		eoi_ioapic_irq(desc);
-
 	/* Now we can move and renable the irq */
 	if (unlikely(do_unmask_irq)) {
 		/* Only migrate the irq if the ack has been received.
@@ -2683,22 +2610,50 @@
 }
 
 #ifdef CONFIG_INTR_REMAP
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+	int apic, pin;
+	struct irq_pin_list *entry;
+
+	entry = cfg->irq_2_pin;
+	for (;;) {
+
+		if (!entry)
+			break;
+
+		apic = entry->apic;
+		pin = entry->pin;
+		io_apic_eoi(apic, pin);
+		entry = entry->next;
+	}
+}
+
+static void
+eoi_ioapic_irq(struct irq_desc *desc)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int irq;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__eoi_ioapic_irq(irq, cfg);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
 static void ir_ack_apic_edge(unsigned int irq)
 {
-#ifdef CONFIG_X86_X2APIC
-       if (x2apic_enabled())
-               return ack_x2apic_edge(irq);
-#endif
-       return ack_apic_edge(irq);
+	ack_APIC_irq();
 }
 
 static void ir_ack_apic_level(unsigned int irq)
 {
-#ifdef CONFIG_X86_X2APIC
-       if (x2apic_enabled())
-               return ack_x2apic_level(irq);
-#endif
-       return ack_apic_level(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	ack_APIC_irq();
+	eoi_ioapic_irq(desc);
 }
 #endif /* CONFIG_INTR_REMAP */
 
@@ -2903,7 +2858,7 @@
 {
 	struct irq_desc *desc = irq_to_desc(0);
 	struct irq_cfg *cfg = desc->chip_data;
-	int cpu = boot_cpu_id;
+	int node = cpu_to_node(boot_cpu_id);
 	int apic1, pin1, apic2, pin2;
 	unsigned long flags;
 	int no_pin1 = 0;
@@ -2969,7 +2924,7 @@
 		 * Ok, does IRQ0 through the IOAPIC work?
 		 */
 		if (no_pin1) {
-			add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
+			add_pin_to_irq_node(cfg, node, apic1, pin1);
 			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		} else {
 			/* for edge trigger, setup_IO_APIC_irq already
@@ -3006,7 +2961,7 @@
 		/*
 		 * legacy devices should be connected to IO APIC #0
 		 */
-		replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
+		replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
 		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
 		enable_8259A_irq(0);
 		if (timer_irq_works()) {
@@ -3218,14 +3173,13 @@
 /*
  * Dynamic irq allocate and deallocation
  */
-unsigned int create_irq_nr(unsigned int irq_want)
+unsigned int create_irq_nr(unsigned int irq_want, int node)
 {
 	/* Allocate an unused irq */
 	unsigned int irq;
 	unsigned int new;
 	unsigned long flags;
 	struct irq_cfg *cfg_new = NULL;
-	int cpu = boot_cpu_id;
 	struct irq_desc *desc_new = NULL;
 
 	irq = 0;
@@ -3234,7 +3188,7 @@
 
 	spin_lock_irqsave(&vector_lock, flags);
 	for (new = irq_want; new < nr_irqs; new++) {
-		desc_new = irq_to_desc_alloc_cpu(new, cpu);
+		desc_new = irq_to_desc_alloc_node(new, node);
 		if (!desc_new) {
 			printk(KERN_INFO "can not get irq_desc for %d\n", new);
 			continue;
@@ -3243,6 +3197,9 @@
 
 		if (cfg_new->vector != 0)
 			continue;
+
+		desc_new = move_irq_desc(desc_new, node);
+
 		if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
 			irq = new;
 		break;
@@ -3260,11 +3217,12 @@
 
 int create_irq(void)
 {
+	int node = cpu_to_node(boot_cpu_id);
 	unsigned int irq_want;
 	int irq;
 
 	irq_want = nr_irqs_gsi;
-	irq = create_irq_nr(irq_want);
+	irq = create_irq_nr(irq_want, node);
 
 	if (irq == 0)
 		irq = -1;
@@ -3366,7 +3324,7 @@
 }
 
 #ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3375,7 +3333,7 @@
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
@@ -3387,13 +3345,15 @@
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg_desc(desc, &msg);
+
+	return 0;
 }
 #ifdef CONFIG_INTR_REMAP
 /*
  * Migrate the MSI irq to another cpumask. This migration is
  * done in the process context using interrupt-remapping hardware.
  */
-static void
+static int
 ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -3402,11 +3362,11 @@
 	struct irte irte;
 
 	if (get_irte(irq, &irte))
-		return;
+		return -1;
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
@@ -3423,6 +3383,8 @@
 	 */
 	if (cfg->move_in_progress)
 		send_cleanup_vector(cfg);
+
+	return 0;
 }
 
 #endif
@@ -3518,15 +3480,17 @@
 	unsigned int irq_want;
 	struct intel_iommu *iommu = NULL;
 	int index = 0;
+	int node;
 
 	/* x86 doesn't support multiple MSI yet */
 	if (type == PCI_CAP_ID_MSI && nvec > 1)
 		return 1;
 
+	node = dev_to_node(&dev->dev);
 	irq_want = nr_irqs_gsi;
 	sub_handle = 0;
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
-		irq = create_irq_nr(irq_want);
+		irq = create_irq_nr(irq_want, node);
 		if (irq == 0)
 			return -1;
 		irq_want = irq + 1;
@@ -3576,7 +3540,7 @@
 
 #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3585,7 +3549,7 @@
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
@@ -3597,6 +3561,8 @@
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
+
+	return 0;
 }
 
 #endif /* CONFIG_SMP */
@@ -3630,7 +3596,7 @@
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3639,7 +3605,7 @@
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
@@ -3651,6 +3617,8 @@
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	hpet_msi_write(irq, &msg);
+
+	return 0;
 }
 
 #endif /* CONFIG_SMP */
@@ -3707,7 +3675,7 @@
 	write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3715,11 +3683,13 @@
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
 	target_ht_irq(irq, dest, cfg->vector);
+
+	return 0;
 }
 
 #endif
@@ -3794,6 +3764,8 @@
 	unsigned long flags;
 	int err;
 
+	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
 	cfg = irq_cfg(irq);
 
 	err = assign_irq_vector(irq, cfg, eligible_cpu);
@@ -3807,15 +3779,13 @@
 
 	mmr_value = 0;
 	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-	entry->vector = cfg->vector;
-	entry->delivery_mode = apic->irq_delivery_mode;
-	entry->dest_mode = apic->irq_dest_mode;
-	entry->polarity = 0;
-	entry->trigger = 0;
-	entry->mask = 0;
-	entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
+	entry->vector		= cfg->vector;
+	entry->delivery_mode	= apic->irq_delivery_mode;
+	entry->dest_mode	= apic->irq_dest_mode;
+	entry->polarity		= 0;
+	entry->trigger		= 0;
+	entry->mask		= 0;
+	entry->dest		= apic->cpu_mask_to_apicid(eligible_cpu);
 
 	mmr_pnode = uv_blade_to_pnode(mmr_blade);
 	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3833,10 +3803,10 @@
 	struct uv_IO_APIC_route_entry *entry;
 	int mmr_pnode;
 
+	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
 	mmr_value = 0;
 	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
 	entry->mask = 1;
 
 	mmr_pnode = uv_blade_to_pnode(mmr_blade);
@@ -3900,6 +3870,71 @@
 }
 #endif
 
+static int __io_apic_set_pci_routing(struct device *dev, int irq,
+				struct io_apic_irq_attr *irq_attr)
+{
+	struct irq_desc *desc;
+	struct irq_cfg *cfg;
+	int node;
+	int ioapic, pin;
+	int trigger, polarity;
+
+	ioapic = irq_attr->ioapic;
+	if (!IO_APIC_IRQ(irq)) {
+		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+			ioapic);
+		return -EINVAL;
+	}
+
+	if (dev)
+		node = dev_to_node(dev);
+	else
+		node = cpu_to_node(boot_cpu_id);
+
+	desc = irq_to_desc_alloc_node(irq, node);
+	if (!desc) {
+		printk(KERN_INFO "can not get irq_desc %d\n", irq);
+		return 0;
+	}
+
+	pin = irq_attr->ioapic_pin;
+	trigger = irq_attr->trigger;
+	polarity = irq_attr->polarity;
+
+	/*
+	 * IRQs < 16 are already in the irq_2_pin[] map
+	 */
+	if (irq >= NR_IRQS_LEGACY) {
+		cfg = desc->chip_data;
+		add_pin_to_irq_node(cfg, node, ioapic, pin);
+	}
+
+	setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
+
+	return 0;
+}
+
+int io_apic_set_pci_routing(struct device *dev, int irq,
+				struct io_apic_irq_attr *irq_attr)
+{
+	int ioapic, pin;
+	/*
+	 * Avoid pin reprogramming.  PRTs typically include entries
+	 * with redundant pin->gsi mappings (but unique PCI devices);
+	 * we only program the IOAPIC on the first.
+	 */
+	ioapic = irq_attr->ioapic;
+	pin = irq_attr->ioapic_pin;
+	if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
+		pr_debug("Pin %d-%d already programmed\n",
+			 mp_ioapics[ioapic].apicid, pin);
+		return 0;
+	}
+	set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
+
+	return __io_apic_set_pci_routing(dev, irq, irq_attr);
+}
+
 /* --------------------------------------------------------------------------
                           ACPI-based IOAPIC Configuration
    -------------------------------------------------------------------------- */
@@ -3980,6 +4015,7 @@
 
 	return apic_id;
 }
+#endif
 
 int __init io_apic_get_version(int ioapic)
 {
@@ -3992,39 +4028,6 @@
 
 	return reg_01.bits.version;
 }
-#endif
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
-{
-	struct irq_desc *desc;
-	struct irq_cfg *cfg;
-	int cpu = boot_cpu_id;
-
-	if (!IO_APIC_IRQ(irq)) {
-		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
-			ioapic);
-		return -EINVAL;
-	}
-
-	desc = irq_to_desc_alloc_cpu(irq, cpu);
-	if (!desc) {
-		printk(KERN_INFO "can not get irq_desc %d\n", irq);
-		return 0;
-	}
-
-	/*
-	 * IRQs < 16 are already in the irq_2_pin[] map
-	 */
-	if (irq >= NR_IRQS_LEGACY) {
-		cfg = desc->chip_data;
-		add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
-	}
-
-	setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
-
-	return 0;
-}
-
 
 int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 {
@@ -4055,51 +4058,44 @@
 #ifdef CONFIG_SMP
 void __init setup_ioapic_dest(void)
 {
-	int pin, ioapic, irq, irq_entry;
+	int pin, ioapic = 0, irq, irq_entry;
 	struct irq_desc *desc;
-	struct irq_cfg *cfg;
 	const struct cpumask *mask;
 
 	if (skip_ioapic_setup == 1)
 		return;
 
-	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-			if (irq_entry == -1)
-				continue;
-			irq = pin_2_irq(irq_entry, ioapic, pin);
-
-			/* setup_IO_APIC_irqs could fail to get vector for some device
-			 * when you have too many devices, because at that time only boot
-			 * cpu is online.
-			 */
-			desc = irq_to_desc(irq);
-			cfg = desc->chip_data;
-			if (!cfg->vector) {
-				setup_IO_APIC_irq(ioapic, pin, irq, desc,
-						  irq_trigger(irq_entry),
-						  irq_polarity(irq_entry));
-				continue;
-
-			}
-
-			/*
-			 * Honour affinities which have been set in early boot
-			 */
-			if (desc->status &
-			    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-				mask = desc->affinity;
-			else
-				mask = apic->target_cpus();
-
-			if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq_desc(desc, mask);
-			else
-				set_ioapic_affinity_irq_desc(desc, mask);
-		}
-
+#ifdef CONFIG_ACPI
+	if (!acpi_disabled && acpi_ioapic) {
+		ioapic = mp_find_ioapic(0);
+		if (ioapic < 0)
+			ioapic = 0;
 	}
+#endif
+
+	for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+		irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+		if (irq_entry == -1)
+			continue;
+		irq = pin_2_irq(irq_entry, ioapic, pin);
+
+		desc = irq_to_desc(irq);
+
+		/*
+		 * Honour affinities which have been set in early boot
+		 */
+		if (desc->status &
+		    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+			mask = desc->affinity;
+		else
+			mask = apic->target_cpus();
+
+		if (intr_remapping_enabled)
+			set_ir_ioapic_affinity_irq_desc(desc, mask);
+		else
+			set_ioapic_affinity_irq_desc(desc, mask);
+	}
+
 }
 #endif
 
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index ce4fbfa..a691302 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -104,7 +104,7 @@
 }
 #endif
 
-static void report_broken_nmi(int cpu, int *prev_nmi_count)
+static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
 {
 	printk(KERN_CONT "\n");
 
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 01eda2a..440a8bc 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -160,7 +160,6 @@
 extern struct apic apic_bigsmp;
 extern struct apic apic_es7000;
 extern struct apic apic_es7000_cluster;
-extern struct apic apic_default;
 
 struct apic *apic = &apic_default;
 EXPORT_SYMBOL_GPL(apic);
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 1783652..bc3e880 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -50,7 +50,7 @@
 void __init default_setup_apic_routing(void)
 {
 #ifdef CONFIG_X86_X2APIC
-	if (x2apic && (apic != &apic_x2apic_phys &&
+	if (x2apic_mode && (apic != &apic_x2apic_phys &&
 #ifdef CONFIG_X86_UV
 		       apic != &apic_x2apic_uv_x &&
 #endif
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 9cfe1f4..344eee4 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -173,13 +173,6 @@
 		rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
 }
 
-
-/* In clustered mode, the high nibble of APIC ID is a cluster number.
- * The low nibble is a 4-bit bitmap. */
-#define XAPIC_DEST_CPUS_SHIFT	4
-#define XAPIC_DEST_CPUS_MASK	((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
-#define XAPIC_DEST_CLUSTER_MASK	(XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
-
 #define SUMMIT_APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
 
 static const struct cpumask *summit_target_cpus(void)
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 4a903e2..8e4cbb2 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -10,7 +10,7 @@
 #include <asm/apic.h>
 #include <asm/ipi.h>
 
-DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
 
 static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 2bda693..ef0ae20 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -105,7 +105,7 @@
 	cpumask_set_cpu(cpu, retmask);
 }
 
-static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
 {
 #ifdef CONFIG_SMP
 	unsigned long val;
@@ -562,7 +562,7 @@
 	union uvh_node_id_u node_id;
 	unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
 	int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
-	int max_pnode = 0;
+	int gnode_extra, max_pnode = 0;
 	unsigned long mmr_base, present, paddr;
 	unsigned short pnode_mask;
 
@@ -574,6 +574,13 @@
 	mmr_base =
 	    uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
 	    ~UV_MMR_ENABLE;
+	pnode_mask = (1 << n_val) - 1;
+	node_id.v = uv_read_local_mmr(UVH_NODE_ID);
+	gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
+	gnode_upper = ((unsigned long)gnode_extra  << m_val);
+	printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n",
+			n_val, m_val, gnode_upper, gnode_extra);
+
 	printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
 
 	for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
@@ -583,15 +590,18 @@
 
 	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
 	uv_blade_info = kmalloc(bytes, GFP_KERNEL);
+	BUG_ON(!uv_blade_info);
 
 	get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
 
 	bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
 	uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
+	BUG_ON(!uv_node_to_blade);
 	memset(uv_node_to_blade, 255, bytes);
 
 	bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
 	uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
+	BUG_ON(!uv_cpu_to_blade);
 	memset(uv_cpu_to_blade, 255, bytes);
 
 	blade = 0;
@@ -607,11 +617,6 @@
 		}
 	}
 
-	pnode_mask = (1 << n_val) - 1;
-	node_id.v = uv_read_local_mmr(UVH_NODE_ID);
-	gnode_upper = (((unsigned long)node_id.s.node_id) &
-		       ~((1 << n_val) - 1)) << m_val;
-
 	uv_bios_init();
 	uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
 			    &sn_coherency_id, &sn_region_size);
@@ -634,6 +639,7 @@
 		uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
 		uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
 		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
+		uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
 		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
 		uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
 		uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 49e0939..79302e9 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1233,9 +1233,9 @@
 	int err;
 	struct apm_user	*as;
 
-	device_suspend(PMSG_SUSPEND);
+	dpm_suspend_start(PMSG_SUSPEND);
 
-	device_power_down(PMSG_SUSPEND);
+	dpm_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
 	sysdev_suspend(PMSG_SUSPEND);
@@ -1259,9 +1259,9 @@
 	sysdev_resume();
 	local_irq_enable();
 
-	device_power_up(PMSG_RESUME);
+	dpm_resume_noirq(PMSG_RESUME);
 
-	device_resume(PMSG_RESUME);
+	dpm_resume_end(PMSG_RESUME);
 	queue_event(APM_NORMAL_RESUME, NULL);
 	spin_lock(&user_list_lock);
 	for (as = user_list; as != NULL; as = as->next) {
@@ -1277,7 +1277,7 @@
 {
 	int err;
 
-	device_power_down(PMSG_SUSPEND);
+	dpm_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
 	sysdev_suspend(PMSG_SUSPEND);
@@ -1291,7 +1291,7 @@
 	sysdev_resume();
 	local_irq_enable();
 
-	device_power_up(PMSG_RESUME);
+	dpm_resume_noirq(PMSG_RESUME);
 }
 
 static apm_event_t get_event(void)
@@ -1376,7 +1376,7 @@
 			ignore_bounce = 1;
 			if ((event != APM_NORMAL_RESUME)
 			    || (ignore_normal_resume == 0)) {
-				device_resume(PMSG_RESUME);
+				dpm_resume_end(PMSG_RESUME);
 				queue_event(event, NULL);
 			}
 			ignore_normal_resume = 0;
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 5a6aa1c..dfdbf64 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -126,6 +126,7 @@
 #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
 	BLANK();
 	OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
+	OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending);
 	OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir);
 
 	BLANK();
@@ -146,4 +147,5 @@
 	OFFSET(BP_loadflags, boot_params, hdr.loadflags);
 	OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
 	OFFSET(BP_version, boot_params, hdr.version);
+	OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
 }
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index e72f062..898ecc4 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -125,6 +125,7 @@
 	OFFSET(BP_loadflags, boot_params, hdr.loadflags);
 	OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
 	OFFSET(BP_version, boot_params, hdr.version);
+	OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
 
 	BLANK();
 	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 4e242f9..3efcb2b 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -1,5 +1,5 @@
 #
-# Makefile for x86-compatible CPU details and quirks
+# Makefile for x86-compatible CPU details, features and quirks
 #
 
 # Don't trace early stages of a secondary CPU boot
@@ -23,11 +23,13 @@
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 
-obj-$(CONFIG_X86_MCE)	+= mcheck/
-obj-$(CONFIG_MTRR)	+= mtrr/
-obj-$(CONFIG_CPU_FREQ)	+= cpufreq/
+obj-$(CONFIG_PERF_COUNTERS)		+= perf_counter.o
 
-obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
+obj-$(CONFIG_X86_MCE)			+= mcheck/
+obj-$(CONFIG_MTRR)			+= mtrr/
+obj-$(CONFIG_CPU_FREQ)			+= cpufreq/
+
+obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
 
 quiet_cmd_mkcapflags = MKCAP   $@
       cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7e4a459..e5b27d8 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -6,6 +6,7 @@
 #include <asm/processor.h>
 #include <asm/apic.h>
 #include <asm/cpu.h>
+#include <asm/pci-direct.h>
 
 #ifdef CONFIG_X86_64
 # include <asm/numa_64.h>
@@ -272,7 +273,7 @@
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 	int cpu = smp_processor_id();
 	int node;
-	unsigned apicid = hard_smp_processor_id();
+	unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
 
 	node = c->phys_proc_id;
 	if (apicid_to_node[apicid] != NUMA_NO_NODE)
@@ -351,6 +352,15 @@
 		    (c->x86_model == 8 && c->x86_mask >= 8))
 			set_cpu_cap(c, X86_FEATURE_K6_MTRR);
 #endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
+	/* check CPU config space for extended APIC ID */
+	if (c->x86 >= 0xf) {
+		unsigned int val;
+		val = read_pci_config(0, 24, 0, 0x68);
+		if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
+			set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+	}
+#endif
 }
 
 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 77848d9..3ffdcfa 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,6 +13,7 @@
 #include <linux/io.h>
 
 #include <asm/stackprotector.h>
+#include <asm/perf_counter.h>
 #include <asm/mmu_context.h>
 #include <asm/hypervisor.h>
 #include <asm/processor.h>
@@ -299,7 +300,8 @@
 	return NULL;		/* Not found */
 }
 
-__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_set[NCAPINTS] __cpuinitdata;
 
 void load_percpu_segment(int cpu)
 {
@@ -768,6 +770,12 @@
 	if (this_cpu->c_identify)
 		this_cpu->c_identify(c);
 
+	/* Clear/Set all flags overriden by options, after probe */
+	for (i = 0; i < NCAPINTS; i++) {
+		c->x86_capability[i] &= ~cpu_caps_cleared[i];
+		c->x86_capability[i] |= cpu_caps_set[i];
+	}
+
 #ifdef CONFIG_X86_64
 	c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
 #endif
@@ -813,6 +821,16 @@
 #endif
 
 	init_hypervisor(c);
+
+	/*
+	 * Clear/Set all flags overriden by options, need do it
+	 * before following smp all cpus cap AND.
+	 */
+	for (i = 0; i < NCAPINTS; i++) {
+		c->x86_capability[i] &= ~cpu_caps_cleared[i];
+		c->x86_capability[i] |= cpu_caps_set[i];
+	}
+
 	/*
 	 * On SMP, boot_cpu_data holds the common feature set between
 	 * all CPUs; so make sure that we indicate which features are
@@ -825,10 +843,6 @@
 			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
 	}
 
-	/* Clear all flags overriden by options */
-	for (i = 0; i < NCAPINTS; i++)
-		c->x86_capability[i] &= ~cleared_cpu_caps[i];
-
 #ifdef CONFIG_X86_MCE
 	/* Init Machine Check Exception if available. */
 	mcheck_init(c);
@@ -861,6 +875,7 @@
 #else
 	vgetcpu_set_mode();
 #endif
+	init_hw_perf_counters();
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
index 46e29ab..6b2a52d 100644
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -32,9 +32,7 @@
 
 static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]);
 static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]);
-static DEFINE_PER_CPU(unsigned, cpu_modelflag);
 static DEFINE_PER_CPU(int, cpu_priv_count);
-static DEFINE_PER_CPU(unsigned, cpu_model);
 
 static DEFINE_MUTEX(cpu_debug_lock);
 
@@ -80,302 +78,102 @@
 	{ "value",	CPU_REG_ALL,	1	},
 };
 
-/* Intel Registers Range */
-static struct cpu_debug_range cpu_intel_range[] = {
-	{ 0x00000000, 0x00000001, CPU_MC,	CPU_INTEL_ALL		},
-	{ 0x00000006, 0x00000007, CPU_MONITOR,	CPU_CX_AT_XE		},
-	{ 0x00000010, 0x00000010, CPU_TIME,	CPU_INTEL_ALL		},
-	{ 0x00000011, 0x00000013, CPU_PMC,	CPU_INTEL_PENTIUM	},
-	{ 0x00000017, 0x00000017, CPU_PLATFORM,	CPU_PX_CX_AT_XE		},
-	{ 0x0000001B, 0x0000001B, CPU_APIC,	CPU_P6_CX_AT_XE		},
+/* CPU Registers Range */
+static struct cpu_debug_range cpu_reg_range[] = {
+	{ 0x00000000, 0x00000001, CPU_MC,	},
+	{ 0x00000006, 0x00000007, CPU_MONITOR,	},
+	{ 0x00000010, 0x00000010, CPU_TIME,	},
+	{ 0x00000011, 0x00000013, CPU_PMC,	},
+	{ 0x00000017, 0x00000017, CPU_PLATFORM,	},
+	{ 0x0000001B, 0x0000001B, CPU_APIC,	},
+	{ 0x0000002A, 0x0000002B, CPU_POWERON,	},
+	{ 0x0000002C, 0x0000002C, CPU_FREQ,	},
+	{ 0x0000003A, 0x0000003A, CPU_CONTROL,	},
+	{ 0x00000040, 0x00000047, CPU_LBRANCH,	},
+	{ 0x00000060, 0x00000067, CPU_LBRANCH,	},
+	{ 0x00000079, 0x00000079, CPU_BIOS,	},
+	{ 0x00000088, 0x0000008A, CPU_CACHE,	},
+	{ 0x0000008B, 0x0000008B, CPU_BIOS,	},
+	{ 0x0000009B, 0x0000009B, CPU_MONITOR,	},
+	{ 0x000000C1, 0x000000C4, CPU_PMC,	},
+	{ 0x000000CD, 0x000000CD, CPU_FREQ,	},
+	{ 0x000000E7, 0x000000E8, CPU_PERF,	},
+	{ 0x000000FE, 0x000000FE, CPU_MTRR,	},
 
-	{ 0x0000002A, 0x0000002A, CPU_POWERON,	CPU_PX_CX_AT_XE		},
-	{ 0x0000002B, 0x0000002B, CPU_POWERON,	CPU_INTEL_XEON		},
-	{ 0x0000002C, 0x0000002C, CPU_FREQ,	CPU_INTEL_XEON		},
-	{ 0x0000003A, 0x0000003A, CPU_CONTROL,	CPU_CX_AT_XE		},
+	{ 0x00000116, 0x0000011E, CPU_CACHE,	},
+	{ 0x00000174, 0x00000176, CPU_SYSENTER,	},
+	{ 0x00000179, 0x0000017B, CPU_MC,	},
+	{ 0x00000186, 0x00000189, CPU_PMC,	},
+	{ 0x00000198, 0x00000199, CPU_PERF,	},
+	{ 0x0000019A, 0x0000019A, CPU_TIME,	},
+	{ 0x0000019B, 0x0000019D, CPU_THERM,	},
+	{ 0x000001A0, 0x000001A0, CPU_MISC,	},
+	{ 0x000001C9, 0x000001C9, CPU_LBRANCH,	},
+	{ 0x000001D7, 0x000001D8, CPU_LBRANCH,	},
+	{ 0x000001D9, 0x000001D9, CPU_DEBUG,	},
+	{ 0x000001DA, 0x000001E0, CPU_LBRANCH,	},
 
-	{ 0x00000040, 0x00000043, CPU_LBRANCH,	CPU_PM_CX_AT_XE		},
-	{ 0x00000044, 0x00000047, CPU_LBRANCH,	CPU_PM_CO_AT		},
-	{ 0x00000060, 0x00000063, CPU_LBRANCH,	CPU_C2_AT		},
-	{ 0x00000064, 0x00000067, CPU_LBRANCH,	CPU_INTEL_ATOM		},
+	{ 0x00000200, 0x0000020F, CPU_MTRR,	},
+	{ 0x00000250, 0x00000250, CPU_MTRR,	},
+	{ 0x00000258, 0x00000259, CPU_MTRR,	},
+	{ 0x00000268, 0x0000026F, CPU_MTRR,	},
+	{ 0x00000277, 0x00000277, CPU_PAT,	},
+	{ 0x000002FF, 0x000002FF, CPU_MTRR,	},
 
-	{ 0x00000079, 0x00000079, CPU_BIOS,	CPU_P6_CX_AT_XE		},
-	{ 0x00000088, 0x0000008A, CPU_CACHE,	CPU_INTEL_P6		},
-	{ 0x0000008B, 0x0000008B, CPU_BIOS,	CPU_P6_CX_AT_XE		},
-	{ 0x0000009B, 0x0000009B, CPU_MONITOR,	CPU_INTEL_XEON		},
+	{ 0x00000300, 0x00000311, CPU_PMC,	},
+	{ 0x00000345, 0x00000345, CPU_PMC,	},
+	{ 0x00000360, 0x00000371, CPU_PMC,	},
+	{ 0x0000038D, 0x00000390, CPU_PMC,	},
+	{ 0x000003A0, 0x000003BE, CPU_PMC,	},
+	{ 0x000003C0, 0x000003CD, CPU_PMC,	},
+	{ 0x000003E0, 0x000003E1, CPU_PMC,	},
+	{ 0x000003F0, 0x000003F2, CPU_PMC,	},
 
-	{ 0x000000C1, 0x000000C2, CPU_PMC,	CPU_P6_CX_AT		},
-	{ 0x000000CD, 0x000000CD, CPU_FREQ,	CPU_CX_AT		},
-	{ 0x000000E7, 0x000000E8, CPU_PERF,	CPU_CX_AT		},
-	{ 0x000000FE, 0x000000FE, CPU_MTRR,	CPU_P6_CX_XE		},
+	{ 0x00000400, 0x00000417, CPU_MC,	},
+	{ 0x00000480, 0x0000048B, CPU_VMX,	},
 
-	{ 0x00000116, 0x00000116, CPU_CACHE,	CPU_INTEL_P6		},
-	{ 0x00000118, 0x00000118, CPU_CACHE,	CPU_INTEL_P6		},
-	{ 0x00000119, 0x00000119, CPU_CACHE,	CPU_INTEL_PX		},
-	{ 0x0000011A, 0x0000011B, CPU_CACHE,	CPU_INTEL_P6		},
-	{ 0x0000011E, 0x0000011E, CPU_CACHE,	CPU_PX_CX_AT		},
+	{ 0x00000600, 0x00000600, CPU_DEBUG,	},
+	{ 0x00000680, 0x0000068F, CPU_LBRANCH,	},
+	{ 0x000006C0, 0x000006CF, CPU_LBRANCH,	},
 
-	{ 0x00000174, 0x00000176, CPU_SYSENTER,	CPU_P6_CX_AT_XE		},
-	{ 0x00000179, 0x0000017A, CPU_MC,	CPU_PX_CX_AT_XE		},
-	{ 0x0000017B, 0x0000017B, CPU_MC,	CPU_P6_XE		},
-	{ 0x00000186, 0x00000187, CPU_PMC,	CPU_P6_CX_AT		},
-	{ 0x00000198, 0x00000199, CPU_PERF,	CPU_PM_CX_AT_XE		},
-	{ 0x0000019A, 0x0000019A, CPU_TIME,	CPU_PM_CX_AT_XE		},
-	{ 0x0000019B, 0x0000019D, CPU_THERM,	CPU_PM_CX_AT_XE		},
-	{ 0x000001A0, 0x000001A0, CPU_MISC,	CPU_PM_CX_AT_XE		},
+	{ 0x000107CC, 0x000107D3, CPU_PMC,	},
 
-	{ 0x000001C9, 0x000001C9, CPU_LBRANCH,	CPU_PM_CX_AT		},
-	{ 0x000001D7, 0x000001D8, CPU_LBRANCH,	CPU_INTEL_XEON		},
-	{ 0x000001D9, 0x000001D9, CPU_DEBUG,	CPU_CX_AT_XE		},
-	{ 0x000001DA, 0x000001DA, CPU_LBRANCH,	CPU_INTEL_XEON		},
-	{ 0x000001DB, 0x000001DB, CPU_LBRANCH,	CPU_P6_XE		},
-	{ 0x000001DC, 0x000001DC, CPU_LBRANCH,	CPU_INTEL_P6		},
-	{ 0x000001DD, 0x000001DE, CPU_LBRANCH,	CPU_PX_CX_AT_XE		},
-	{ 0x000001E0, 0x000001E0, CPU_LBRANCH,	CPU_INTEL_P6		},
+	{ 0xC0000080, 0xC0000080, CPU_FEATURES,	},
+	{ 0xC0000081, 0xC0000084, CPU_CALL,	},
+	{ 0xC0000100, 0xC0000102, CPU_BASE,	},
+	{ 0xC0000103, 0xC0000103, CPU_TIME,	},
 
-	{ 0x00000200, 0x0000020F, CPU_MTRR,	CPU_P6_CX_XE		},
-	{ 0x00000250, 0x00000250, CPU_MTRR,	CPU_P6_CX_XE		},
-	{ 0x00000258, 0x00000259, CPU_MTRR,	CPU_P6_CX_XE		},
-	{ 0x00000268, 0x0000026F, CPU_MTRR,	CPU_P6_CX_XE		},
-	{ 0x00000277, 0x00000277, CPU_PAT,	CPU_C2_AT_XE		},
-	{ 0x000002FF, 0x000002FF, CPU_MTRR,	CPU_P6_CX_XE		},
-
-	{ 0x00000300, 0x00000308, CPU_PMC,	CPU_INTEL_XEON		},
-	{ 0x00000309, 0x0000030B, CPU_PMC,	CPU_C2_AT_XE		},
-	{ 0x0000030C, 0x00000311, CPU_PMC,	CPU_INTEL_XEON		},
-	{ 0x00000345, 0x00000345, CPU_PMC,	CPU_C2_AT		},
-	{ 0x00000360, 0x00000371, CPU_PMC,	CPU_INTEL_XEON		},
-	{ 0x0000038D, 0x00000390, CPU_PMC,	CPU_C2_AT		},
-	{ 0x000003A0, 0x000003BE, CPU_PMC,	CPU_INTEL_XEON		},
-	{ 0x000003C0, 0x000003CD, CPU_PMC,	CPU_INTEL_XEON		},
-	{ 0x000003E0, 0x000003E1, CPU_PMC,	CPU_INTEL_XEON		},
-	{ 0x000003F0, 0x000003F0, CPU_PMC,	CPU_INTEL_XEON		},
-	{ 0x000003F1, 0x000003F1, CPU_PMC,	CPU_C2_AT_XE		},
-	{ 0x000003F2, 0x000003F2, CPU_PMC,	CPU_INTEL_XEON		},
-
-	{ 0x00000400, 0x00000402, CPU_MC,	CPU_PM_CX_AT_XE		},
-	{ 0x00000403, 0x00000403, CPU_MC,	CPU_INTEL_XEON		},
-	{ 0x00000404, 0x00000406, CPU_MC,	CPU_PM_CX_AT_XE		},
-	{ 0x00000407, 0x00000407, CPU_MC,	CPU_INTEL_XEON		},
-	{ 0x00000408, 0x0000040A, CPU_MC,	CPU_PM_CX_AT_XE		},
-	{ 0x0000040B, 0x0000040B, CPU_MC,	CPU_INTEL_XEON		},
-	{ 0x0000040C, 0x0000040E, CPU_MC,	CPU_PM_CX_XE		},
-	{ 0x0000040F, 0x0000040F, CPU_MC,	CPU_INTEL_XEON		},
-	{ 0x00000410, 0x00000412, CPU_MC,	CPU_PM_CX_AT_XE		},
-	{ 0x00000413, 0x00000417, CPU_MC,	CPU_CX_AT_XE		},
-	{ 0x00000480, 0x0000048B, CPU_VMX,	CPU_CX_AT_XE		},
-
-	{ 0x00000600, 0x00000600, CPU_DEBUG,	CPU_PM_CX_AT_XE		},
-	{ 0x00000680, 0x0000068F, CPU_LBRANCH,	CPU_INTEL_XEON		},
-	{ 0x000006C0, 0x000006CF, CPU_LBRANCH,	CPU_INTEL_XEON		},
-
-	{ 0x000107CC, 0x000107D3, CPU_PMC,	CPU_INTEL_XEON_MP	},
-
-	{ 0xC0000080, 0xC0000080, CPU_FEATURES,	CPU_INTEL_XEON		},
-	{ 0xC0000081, 0xC0000082, CPU_CALL,	CPU_INTEL_XEON		},
-	{ 0xC0000084, 0xC0000084, CPU_CALL,	CPU_INTEL_XEON		},
-	{ 0xC0000100, 0xC0000102, CPU_BASE,	CPU_INTEL_XEON		},
+	{ 0xC0010000, 0xC0010007, CPU_PMC,	},
+	{ 0xC0010010, 0xC0010010, CPU_CONF,	},
+	{ 0xC0010015, 0xC0010015, CPU_CONF,	},
+	{ 0xC0010016, 0xC001001A, CPU_MTRR,	},
+	{ 0xC001001D, 0xC001001D, CPU_MTRR,	},
+	{ 0xC001001F, 0xC001001F, CPU_CONF,	},
+	{ 0xC0010030, 0xC0010035, CPU_BIOS,	},
+	{ 0xC0010044, 0xC0010048, CPU_MC,	},
+	{ 0xC0010050, 0xC0010056, CPU_SMM,	},
+	{ 0xC0010058, 0xC0010058, CPU_CONF,	},
+	{ 0xC0010060, 0xC0010060, CPU_CACHE,	},
+	{ 0xC0010061, 0xC0010068, CPU_SMM,	},
+	{ 0xC0010069, 0xC001006B, CPU_SMM,	},
+	{ 0xC0010070, 0xC0010071, CPU_SMM,	},
+	{ 0xC0010111, 0xC0010113, CPU_SMM,	},
+	{ 0xC0010114, 0xC0010118, CPU_SVM,	},
+	{ 0xC0010140, 0xC0010141, CPU_OSVM,	},
+	{ 0xC0011022, 0xC0011023, CPU_CONF,	},
 };
 
-/* AMD Registers Range */
-static struct cpu_debug_range cpu_amd_range[] = {
-	{ 0x00000000, 0x00000001, CPU_MC,	CPU_K10_PLUS,		},
-	{ 0x00000010, 0x00000010, CPU_TIME,	CPU_K8_PLUS,		},
-	{ 0x0000001B, 0x0000001B, CPU_APIC,	CPU_K8_PLUS,		},
-	{ 0x0000002A, 0x0000002A, CPU_POWERON,	CPU_K7_PLUS		},
-	{ 0x0000008B, 0x0000008B, CPU_VER,	CPU_K8_PLUS		},
-	{ 0x000000FE, 0x000000FE, CPU_MTRR,	CPU_K8_PLUS,		},
-
-	{ 0x00000174, 0x00000176, CPU_SYSENTER,	CPU_K8_PLUS,		},
-	{ 0x00000179, 0x0000017B, CPU_MC,	CPU_K8_PLUS,		},
-	{ 0x000001D9, 0x000001D9, CPU_DEBUG,	CPU_K8_PLUS,		},
-	{ 0x000001DB, 0x000001DE, CPU_LBRANCH,	CPU_K8_PLUS,		},
-
-	{ 0x00000200, 0x0000020F, CPU_MTRR,	CPU_K8_PLUS,		},
-	{ 0x00000250, 0x00000250, CPU_MTRR,	CPU_K8_PLUS,		},
-	{ 0x00000258, 0x00000259, CPU_MTRR,	CPU_K8_PLUS,		},
-	{ 0x00000268, 0x0000026F, CPU_MTRR,	CPU_K8_PLUS,		},
-	{ 0x00000277, 0x00000277, CPU_PAT,	CPU_K8_PLUS,		},
-	{ 0x000002FF, 0x000002FF, CPU_MTRR,	CPU_K8_PLUS,		},
-
-	{ 0x00000400, 0x00000413, CPU_MC,	CPU_K8_PLUS,		},
-
-	{ 0xC0000080, 0xC0000080, CPU_FEATURES,	CPU_AMD_ALL,		},
-	{ 0xC0000081, 0xC0000084, CPU_CALL,	CPU_K8_PLUS,		},
-	{ 0xC0000100, 0xC0000102, CPU_BASE,	CPU_K8_PLUS,		},
-	{ 0xC0000103, 0xC0000103, CPU_TIME,	CPU_K10_PLUS,		},
-
-	{ 0xC0010000, 0xC0010007, CPU_PMC,	CPU_K8_PLUS,		},
-	{ 0xC0010010, 0xC0010010, CPU_CONF,	CPU_K7_PLUS,		},
-	{ 0xC0010015, 0xC0010015, CPU_CONF,	CPU_K7_PLUS,		},
-	{ 0xC0010016, 0xC001001A, CPU_MTRR,	CPU_K8_PLUS,		},
-	{ 0xC001001D, 0xC001001D, CPU_MTRR,	CPU_K8_PLUS,		},
-	{ 0xC001001F, 0xC001001F, CPU_CONF,	CPU_K8_PLUS,		},
-	{ 0xC0010030, 0xC0010035, CPU_BIOS,	CPU_K8_PLUS,		},
-	{ 0xC0010044, 0xC0010048, CPU_MC,	CPU_K8_PLUS,		},
-	{ 0xC0010050, 0xC0010056, CPU_SMM,	CPU_K0F_PLUS,		},
-	{ 0xC0010058, 0xC0010058, CPU_CONF,	CPU_K10_PLUS,		},
-	{ 0xC0010060, 0xC0010060, CPU_CACHE,	CPU_AMD_11,		},
-	{ 0xC0010061, 0xC0010068, CPU_SMM,	CPU_K10_PLUS,		},
-	{ 0xC0010069, 0xC001006B, CPU_SMM,	CPU_AMD_11,		},
-	{ 0xC0010070, 0xC0010071, CPU_SMM,	CPU_K10_PLUS,		},
-	{ 0xC0010111, 0xC0010113, CPU_SMM,	CPU_K8_PLUS,		},
-	{ 0xC0010114, 0xC0010118, CPU_SVM,	CPU_K10_PLUS,		},
-	{ 0xC0010140, 0xC0010141, CPU_OSVM,	CPU_K10_PLUS,		},
-	{ 0xC0011022, 0xC0011023, CPU_CONF,	CPU_K10_PLUS,		},
-};
-
-
-/* Intel */
-static int get_intel_modelflag(unsigned model)
-{
-	int flag;
-
-	switch (model) {
-	case 0x0501:
-	case 0x0502:
-	case 0x0504:
-		flag = CPU_INTEL_PENTIUM;
-		break;
-	case 0x0601:
-	case 0x0603:
-	case 0x0605:
-	case 0x0607:
-	case 0x0608:
-	case 0x060A:
-	case 0x060B:
-		flag = CPU_INTEL_P6;
-		break;
-	case 0x0609:
-	case 0x060D:
-		flag = CPU_INTEL_PENTIUM_M;
-		break;
-	case 0x060E:
-		flag = CPU_INTEL_CORE;
-		break;
-	case 0x060F:
-	case 0x0617:
-		flag = CPU_INTEL_CORE2;
-		break;
-	case 0x061C:
-		flag = CPU_INTEL_ATOM;
-		break;
-	case 0x0F00:
-	case 0x0F01:
-	case 0x0F02:
-	case 0x0F03:
-	case 0x0F04:
-		flag = CPU_INTEL_XEON_P4;
-		break;
-	case 0x0F06:
-		flag = CPU_INTEL_XEON_MP;
-		break;
-	default:
-		flag = CPU_NONE;
-		break;
-	}
-
-	return flag;
-}
-
-/* AMD */
-static int get_amd_modelflag(unsigned model)
-{
-	int flag;
-
-	switch (model >> 8) {
-	case 0x6:
-		flag = CPU_AMD_K6;
-		break;
-	case 0x7:
-		flag = CPU_AMD_K7;
-		break;
-	case 0x8:
-		flag = CPU_AMD_K8;
-		break;
-	case 0xf:
-		flag = CPU_AMD_0F;
-		break;
-	case 0x10:
-		flag = CPU_AMD_10;
-		break;
-	case 0x11:
-		flag = CPU_AMD_11;
-		break;
-	default:
-		flag = CPU_NONE;
-		break;
-	}
-
-	return flag;
-}
-
-static int get_cpu_modelflag(unsigned cpu)
-{
-	int flag;
-
-	flag = per_cpu(cpu_model, cpu);
-
-	switch (flag >> 16) {
-	case X86_VENDOR_INTEL:
-		flag = get_intel_modelflag(flag);
-		break;
-	case X86_VENDOR_AMD:
-		flag = get_amd_modelflag(flag & 0xffff);
-		break;
-	default:
-		flag = CPU_NONE;
-		break;
-	}
-
-	return flag;
-}
-
-static int get_cpu_range_count(unsigned cpu)
-{
-	int index;
-
-	switch (per_cpu(cpu_model, cpu) >> 16) {
-	case X86_VENDOR_INTEL:
-		index = ARRAY_SIZE(cpu_intel_range);
-		break;
-	case X86_VENDOR_AMD:
-		index = ARRAY_SIZE(cpu_amd_range);
-		break;
-	default:
-		index = 0;
-		break;
-	}
-
-	return index;
-}
-
 static int is_typeflag_valid(unsigned cpu, unsigned flag)
 {
-	unsigned vendor, modelflag;
-	int i, index;
+	int i;
 
 	/* Standard Registers should be always valid */
 	if (flag >= CPU_TSS)
 		return 1;
 
-	modelflag = per_cpu(cpu_modelflag, cpu);
-	vendor = per_cpu(cpu_model, cpu) >> 16;
-	index = get_cpu_range_count(cpu);
-
-	for (i = 0; i < index; i++) {
-		switch (vendor) {
-		case X86_VENDOR_INTEL:
-			if ((cpu_intel_range[i].model & modelflag) &&
-			    (cpu_intel_range[i].flag & flag))
-				return 1;
-			break;
-		case X86_VENDOR_AMD:
-			if ((cpu_amd_range[i].model & modelflag) &&
-			    (cpu_amd_range[i].flag & flag))
-				return 1;
-			break;
-		}
+	for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
+		if (cpu_reg_range[i].flag == flag)
+			return 1;
 	}
 
 	/* Invalid */
@@ -385,26 +183,11 @@
 static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
 			      int index, unsigned flag)
 {
-	unsigned modelflag;
-
-	modelflag = per_cpu(cpu_modelflag, cpu);
-	*max = 0;
-	switch (per_cpu(cpu_model, cpu) >> 16) {
-	case X86_VENDOR_INTEL:
-		if ((cpu_intel_range[index].model & modelflag) &&
-		    (cpu_intel_range[index].flag & flag)) {
-			*min = cpu_intel_range[index].min;
-			*max = cpu_intel_range[index].max;
-		}
-		break;
-	case X86_VENDOR_AMD:
-		if ((cpu_amd_range[index].model & modelflag) &&
-		    (cpu_amd_range[index].flag & flag)) {
-			*min = cpu_amd_range[index].min;
-			*max = cpu_amd_range[index].max;
-		}
-		break;
-	}
+	if (cpu_reg_range[index].flag == flag) {
+		*min = cpu_reg_range[index].min;
+		*max = cpu_reg_range[index].max;
+	} else
+		*max = 0;
 
 	return *max;
 }
@@ -434,7 +217,7 @@
 	unsigned msr, msr_min, msr_max;
 	struct cpu_private *priv;
 	u32 low, high;
-	int i, range;
+	int i;
 
 	if (seq) {
 		priv = seq->private;
@@ -446,9 +229,7 @@
 		}
 	}
 
-	range = get_cpu_range_count(cpu);
-
-	for (i = 0; i < range; i++) {
+	for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
 		if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
 			continue;
 
@@ -588,8 +369,20 @@
 	seq_printf(seq, " TMICT\t\t: %08x\n",  apic_read(APIC_TMICT));
 	seq_printf(seq, " TMCCT\t\t: %08x\n",  apic_read(APIC_TMCCT));
 	seq_printf(seq, " TDCR\t\t: %08x\n",  apic_read(APIC_TDCR));
-#endif /* CONFIG_X86_LOCAL_APIC */
+	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+		unsigned int i, v, maxeilvt;
 
+		v = apic_read(APIC_EFEAT);
+		maxeilvt = (v >> 16) & 0xff;
+		seq_printf(seq, " EFEAT\t\t: %08x\n", v);
+		seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL));
+
+		for (i = 0; i < maxeilvt; i++) {
+			v = apic_read(APIC_EILVTn(i));
+			seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v);
+		}
+	}
+#endif /* CONFIG_X86_LOCAL_APIC */
 	seq_printf(seq, "\n MSR\t:\n");
 }
 
@@ -788,13 +581,11 @@
 {
 	struct dentry *cpu_dentry = NULL;
 	unsigned reg, reg_min, reg_max;
-	int i, range, err = 0;
+	int i, err = 0;
 	char reg_dir[12];
 	u32 low, high;
 
-	range = get_cpu_range_count(cpu);
-
-	for (i = 0; i < range; i++) {
+	for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
 		if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
 				   cpu_base[type].flag))
 			continue;
@@ -850,10 +641,6 @@
 		cpui = &cpu_data(cpu);
 		if (!cpu_has(cpui, X86_FEATURE_MSR))
 			continue;
-		per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) |
-					   (cpui->x86 << 8) |
-					   (cpui->x86_model));
-		per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu);
 
 		sprintf(cpu_dir, "cpu%d", cpu);
 		cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index 52c8398..f138c6c 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -220,11 +220,14 @@
 	  If in doubt, say N.
 
 config X86_E_POWERSAVER
-	tristate "VIA C7 Enhanced PowerSaver"
+	tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)"
 	select CPU_FREQ_TABLE
-	depends on X86_32
+	depends on X86_32 && EXPERIMENTAL
 	help
-	  This adds the CPUFreq driver for VIA C7 processors.
+	  This adds the CPUFreq driver for VIA C7 processors.  However, this driver
+	  does not have any safeguards to prevent operating the CPU out of spec
+	  and is thus considered dangerous.  Please use the regular ACPI cpufreq
+	  driver, enabled by CONFIG_X86_ACPI_CPUFREQ.
 
 	  If in doubt, say N.
 
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 54b6de2..ae9b5032 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -90,11 +90,7 @@
 {
 	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
 
-	if (cpu->x86_vendor != X86_VENDOR_INTEL ||
-	    !cpu_has(cpu, X86_FEATURE_EST))
-		return 0;
-
-	return 1;
+	return cpu_has(cpu, X86_FEATURE_EST);
 }
 
 static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
@@ -550,7 +546,7 @@
 		return -ENOMEM;
 	}
 	for_each_possible_cpu(i) {
-		if (!alloc_cpumask_var_node(
+		if (!zalloc_cpumask_var_node(
 			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
 			GFP_KERNEL, cpu_to_node(i))) {
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index a8363e5..d47c775 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -322,7 +322,7 @@
 		goto err0;
 	}
 
-	if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
+	if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
 								GFP_KERNEL)) {
 		retval = -ENOMEM;
 		goto err05;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index f6b32d1..cf52215 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -835,7 +835,7 @@
 {
 	struct cpufreq_frequency_table *powernow_table;
 	int ret_val = -ENODEV;
-	acpi_integer space_id;
+	acpi_integer control, status;
 
 	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
 		dprintk("register performance failed: bad ACPI data\n");
@@ -848,12 +848,13 @@
 		goto err_out;
 	}
 
-	space_id = data->acpi_data.control_register.space_id;
-	if ((space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
-		(space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+	control = data->acpi_data.control_register.space_id;
+	status = data->acpi_data.status_register.space_id;
+
+	if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+	    (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
 		dprintk("Invalid control/status registers (%x - %x)\n",
-			data->acpi_data.control_register.space_id,
-			space_id);
+			control, status);
 		goto err_out;
 	}
 
@@ -886,7 +887,7 @@
 	/* notify BIOS that we exist */
 	acpi_processor_notify_smm(THIS_MODULE);
 
-	if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
+	if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
 		printk(KERN_ERR PFX
 				"unable to alloc powernow_k8_data cpumask\n");
 		ret_val = -ENOMEM;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index c9f1fdc..55c831e 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -471,7 +471,7 @@
 
 	if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL)))
 		return -ENOMEM;
-	if (unlikely(!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))) {
+	if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) {
 		free_cpumask_var(saved_mask);
 		return -ENOMEM;
 	}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 7437fa1..daed39b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -229,12 +229,12 @@
 }
 #endif
 
-static void __cpuinit srat_detect_node(void)
+static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 {
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 	unsigned node;
 	int cpu = smp_processor_id();
-	int apicid = hard_smp_processor_id();
+	int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
 
 	/* Don't do the funky fallback heuristics the AMD version employs
 	   for now. */
@@ -400,7 +400,7 @@
 	}
 
 	/* Work around errata */
-	srat_detect_node();
+	srat_detect_node(c);
 
 	if (cpu_has(c, X86_FEATURE_VMX))
 		detect_vmx_virtcap(c);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 483eda9..789efe2 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -17,6 +17,7 @@
 
 #include <asm/processor.h>
 #include <asm/smp.h>
+#include <asm/k8.h>
 
 #define LVL_1_INST	1
 #define LVL_1_DATA	2
@@ -159,14 +160,6 @@
 	unsigned long can_disable;
 };
 
-#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS)
-static struct pci_device_id k8_nb_id[] = {
-	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
-	{}
-};
-#endif
-
 unsigned short			num_cache_leaves;
 
 /* AMD doesn't have CPUID4. Emulate it here to report the same
@@ -207,10 +200,17 @@
 };
 
 static const unsigned short __cpuinitconst assocs[] = {
-	[1] = 1, [2] = 2, [4] = 4, [6] = 8,
-	[8] = 16, [0xa] = 32, [0xb] = 48,
+	[1] = 1,
+	[2] = 2,
+	[4] = 4,
+	[6] = 8,
+	[8] = 16,
+	[0xa] = 32,
+	[0xb] = 48,
 	[0xc] = 64,
-	[0xf] = 0xffff // ??
+	[0xd] = 96,
+	[0xe] = 128,
+	[0xf] = 0xffff /* fully associative - no way to show this currently */
 };
 
 static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
@@ -271,7 +271,8 @@
 	eax->split.type = types[leaf];
 	eax->split.level = levels[leaf];
 	if (leaf == 3)
-		eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1;
+		eax->split.num_threads_sharing =
+			current_cpu_data.x86_max_cores - 1;
 	else
 		eax->split.num_threads_sharing = 0;
 	eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
@@ -291,6 +292,14 @@
 {
 	if (index < 3)
 		return;
+
+	if (boot_cpu_data.x86 == 0x11)
+		return;
+
+	/* see erratum #382 */
+	if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8))
+		return;
+
 	this_leaf->can_disable = 1;
 }
 
@@ -696,98 +705,76 @@
 #define to_object(k)	container_of(k, struct _index_kobject, kobj)
 #define to_attr(a)	container_of(a, struct _cache_attr, attr)
 
-#ifdef CONFIG_PCI
-static struct pci_dev *get_k8_northbridge(int node)
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
+				  unsigned int index)
 {
-	struct pci_dev *dev = NULL;
-	int i;
-
-	for (i = 0; i <= node; i++) {
-		do {
-			dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
-			if (!dev)
-				break;
-		} while (!pci_match_id(&k8_nb_id[0], dev));
-		if (!dev)
-			break;
-	}
-	return dev;
-}
-#else
-static struct pci_dev *get_k8_northbridge(int node)
-{
-	return NULL;
-}
-#endif
-
-static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
-{
-	const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
-	int node = cpu_to_node(cpumask_first(mask));
-	struct pci_dev *dev = NULL;
-	ssize_t ret = 0;
-	int i;
+	int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
+	int node = cpu_to_node(cpu);
+	struct pci_dev *dev = node_to_k8_nb_misc(node);
+	unsigned int reg = 0;
 
 	if (!this_leaf->can_disable)
-		return sprintf(buf, "Feature not enabled\n");
-
-	dev = get_k8_northbridge(node);
-	if (!dev) {
-		printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
 		return -EINVAL;
-	}
 
-	for (i = 0; i < 2; i++) {
-		unsigned int reg;
+	if (!dev)
+		return -EINVAL;
 
-		pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
-
-		ret += sprintf(buf, "%sEntry: %d\n", buf, i);
-		ret += sprintf(buf, "%sReads:  %s\tNew Entries: %s\n",  
-			buf,
-			reg & 0x80000000 ? "Disabled" : "Allowed",
-			reg & 0x40000000 ? "Disabled" : "Allowed");
-		ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n",
-			buf, (reg & 0x30000) >> 16, reg & 0xfff);
-	}
-	return ret;
+	pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
+	return sprintf(buf, "%x\n", reg);
 }
 
-static ssize_t
-store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
-		    size_t count)
+#define SHOW_CACHE_DISABLE(index)					\
+static ssize_t								\
+show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf)  	\
+{									\
+	return show_cache_disable(this_leaf, buf, index);		\
+}
+SHOW_CACHE_DISABLE(0)
+SHOW_CACHE_DISABLE(1)
+
+static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
+	const char *buf, size_t count, unsigned int index)
 {
-	const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
-	int node = cpu_to_node(cpumask_first(mask));
-	struct pci_dev *dev = NULL;
-	unsigned int ret, index, val;
+	int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
+	int node = cpu_to_node(cpu);
+	struct pci_dev *dev = node_to_k8_nb_misc(node);
+	unsigned long val = 0;
+	unsigned int scrubber = 0;
 
 	if (!this_leaf->can_disable)
-		return 0;
-
-	if (strlen(buf) > 15)
 		return -EINVAL;
 
-	ret = sscanf(buf, "%x %x", &index, &val);
-	if (ret != 2)
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!dev)
 		return -EINVAL;
-	if (index > 1)
+
+	if (strict_strtoul(buf, 10, &val) < 0)
 		return -EINVAL;
 
 	val |= 0xc0000000;
-	dev = get_k8_northbridge(node);
-	if (!dev) {
-		printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
-		return -EINVAL;
-	}
+
+	pci_read_config_dword(dev, 0x58, &scrubber);
+	scrubber &= ~0x1f000000;
+	pci_write_config_dword(dev, 0x58, scrubber);
 
 	pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
 	wbinvd();
 	pci_write_config_dword(dev, 0x1BC + index * 4, val);
-
-	return 1;
+	return count;
 }
 
+#define STORE_CACHE_DISABLE(index)					\
+static ssize_t								\
+store_cache_disable_##index(struct _cpuid4_info *this_leaf,	     	\
+			    const char *buf, size_t count)		\
+{									\
+	return store_cache_disable(this_leaf, buf, count, index);	\
+}
+STORE_CACHE_DISABLE(0)
+STORE_CACHE_DISABLE(1)
+
 struct _cache_attr {
 	struct attribute attr;
 	ssize_t (*show)(struct _cpuid4_info *, char *);
@@ -808,7 +795,10 @@
 define_one_ro(shared_cpu_map);
 define_one_ro(shared_cpu_list);
 
-static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable);
+static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
+		show_cache_disable_0, store_cache_disable_0);
+static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
+		show_cache_disable_1, store_cache_disable_1);
 
 static struct attribute * default_attrs[] = {
 	&type.attr,
@@ -820,7 +810,8 @@
 	&size.attr,
 	&shared_cpu_map.attr,
 	&shared_cpu_list.attr,
-	&cache_disable.attr,
+	&cache_disable_0.attr,
+	&cache_disable_1.attr,
 	NULL
 };
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 6fb0b35..289cc48 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -420,6 +420,7 @@
  out2:
 	atomic_dec(&mce_entry);
 }
+EXPORT_SYMBOL_GPL(do_machine_check);
 
 #ifdef CONFIG_X86_MCE_INTEL
 /***
@@ -1163,7 +1164,7 @@
 	if (!mce_available(&boot_cpu_data))
 		return -EIO;
 
-	alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
+	zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
 
 	err = mce_init_banks();
 	if (err)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index cef3ee3..65a0fce 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -15,7 +15,6 @@
 #include <asm/hw_irq.h>
 #include <asm/idle.h>
 #include <asm/therm_throt.h>
-#include <asm/apic.h>
 
 asmlinkage void smp_thermal_interrupt(void)
 {
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index ce0fe4b..1d584a1 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -808,7 +808,7 @@
 
 	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
 		return 0;
-	rdmsr(MTRRdefType_MSR, def, dummy);
+	rdmsr(MSR_MTRRdefType, def, dummy);
 	def &= 0xff;
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
@@ -1003,7 +1003,7 @@
 	 */
 	if (!is_cpu(INTEL) || disable_mtrr_trim)
 		return 0;
-	rdmsr(MTRRdefType_MSR, def, dummy);
+	rdmsr(MSR_MTRRdefType, def, dummy);
 	def &= 0xff;
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index d21d4fb..0543f69 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -20,9 +20,9 @@
 };
 
 static struct fixed_range_block fixed_range_blocks[] = {
-	{ MTRRfix64K_00000_MSR, 1 }, /* one  64k MTRR  */
-	{ MTRRfix16K_80000_MSR, 2 }, /* two  16k MTRRs */
-	{ MTRRfix4K_C0000_MSR,  8 }, /* eight 4k MTRRs */
+	{ MSR_MTRRfix64K_00000, 1 }, /* one  64k MTRR  */
+	{ MSR_MTRRfix16K_80000, 2 }, /* two  16k MTRRs */
+	{ MSR_MTRRfix4K_C0000,  8 }, /* eight 4k MTRRs */
 	{}
 };
 
@@ -194,12 +194,12 @@
 
 	k8_check_syscfg_dram_mod_en();
 
-	rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
+	rdmsr(MSR_MTRRfix64K_00000, p[0], p[1]);
 
 	for (i = 0; i < 2; i++)
-		rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]);
+		rdmsr(MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]);
 	for (i = 0; i < 8; i++)
-		rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]);
+		rdmsr(MSR_MTRRfix4K_C0000 + i, p[6 + i * 2], p[7 + i * 2]);
 }
 
 void mtrr_save_fixed_ranges(void *info)
@@ -310,7 +310,7 @@
 
 	vrs = mtrr_state.var_ranges;
 
-	rdmsr(MTRRcap_MSR, lo, dummy);
+	rdmsr(MSR_MTRRcap, lo, dummy);
 	mtrr_state.have_fixed = (lo >> 8) & 1;
 
 	for (i = 0; i < num_var_ranges; i++)
@@ -318,7 +318,7 @@
 	if (mtrr_state.have_fixed)
 		get_fixed_ranges(mtrr_state.fixed_ranges);
 
-	rdmsr(MTRRdefType_MSR, lo, dummy);
+	rdmsr(MSR_MTRRdefType, lo, dummy);
 	mtrr_state.def_type = (lo & 0xff);
 	mtrr_state.enabled = (lo & 0xc00) >> 10;
 
@@ -583,10 +583,10 @@
 	__flush_tlb();
 
 	/*  Save MTRR state */
-	rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
+	rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
 
 	/*  Disable MTRRs, and set the default type to uncached  */
-	mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi);
+	mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
 }
 
 static void post_set(void) __releases(set_atomicity_lock)
@@ -595,7 +595,7 @@
 	__flush_tlb();
 
 	/* Intel (P6) standard MTRRs */
-	mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
+	mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
 		
 	/*  Enable caches  */
 	write_cr0(read_cr0() & 0xbfffffff);
@@ -707,7 +707,7 @@
 static int generic_have_wrcomb(void)
 {
 	unsigned long config, dummy;
-	rdmsr(MTRRcap_MSR, config, dummy);
+	rdmsr(MSR_MTRRcap, config, dummy);
 	return (config & (1 << 10));
 }
 
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 03cda01..8fc248b 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -104,7 +104,7 @@
 	unsigned long config = 0, dummy;
 
 	if (use_intel()) {
-		rdmsr(MTRRcap_MSR, config, dummy);
+		rdmsr(MSR_MTRRcap, config, dummy);
 	} else if (is_cpu(AMD))
 		config = 2;
 	else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 77f67f7..7538b76 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -5,21 +5,6 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 
-#define MTRRcap_MSR     0x0fe
-#define MTRRdefType_MSR 0x2ff
-
-#define MTRRfix64K_00000_MSR 0x250
-#define MTRRfix16K_80000_MSR 0x258
-#define MTRRfix16K_A0000_MSR 0x259
-#define MTRRfix4K_C0000_MSR 0x268
-#define MTRRfix4K_C8000_MSR 0x269
-#define MTRRfix4K_D0000_MSR 0x26a
-#define MTRRfix4K_D8000_MSR 0x26b
-#define MTRRfix4K_E0000_MSR 0x26c
-#define MTRRfix4K_E8000_MSR 0x26d
-#define MTRRfix4K_F0000_MSR 0x26e
-#define MTRRfix4K_F8000_MSR 0x26f
-
 #define MTRR_CHANGE_MASK_FIXED     0x01
 #define MTRR_CHANGE_MASK_VARIABLE  0x02
 #define MTRR_CHANGE_MASK_DEFTYPE   0x04
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
index 7f7e275..1f5fb15 100644
--- a/arch/x86/kernel/cpu/mtrr/state.c
+++ b/arch/x86/kernel/cpu/mtrr/state.c
@@ -35,7 +35,7 @@
 
 		if (use_intel())
 			/*  Save MTRR state */
-			rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
+			rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
 		else
 			/* Cyrix ARRs - everything else were excluded at the top */
 			ctxt->ccr3 = getCx86(CX86_CCR3);
@@ -46,7 +46,7 @@
 {
 	if (use_intel())
 		/*  Disable MTRRs, and set the default type to uncached  */
-		mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL,
+		mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL,
 		      ctxt->deftype_hi);
 	else if (is_cpu(CYRIX))
 		/* Cyrix ARRs - everything else were excluded at the top */
@@ -64,7 +64,7 @@
 		/*  Restore MTRRdefType  */
 		if (use_intel())
 			/* Intel (P6) standard MTRRs */
-			mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
+			mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
 		else
 			/* Cyrix ARRs - everything else was excluded at the top */
 			setCx86(CX86_CCR3, ctxt->ccr3);
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
new file mode 100644
index 0000000..275bc14
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -0,0 +1,1711 @@
+/*
+ * Performance counter x86 architecture code
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_counter.h>
+#include <linux/capability.h>
+#include <linux/notifier.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+
+#include <asm/apic.h>
+#include <asm/stacktrace.h>
+#include <asm/nmi.h>
+
+static u64 perf_counter_mask __read_mostly;
+
+struct cpu_hw_counters {
+	struct perf_counter	*counters[X86_PMC_IDX_MAX];
+	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	unsigned long		interrupts;
+	int			enabled;
+};
+
+/*
+ * struct x86_pmu - generic x86 pmu
+ */
+struct x86_pmu {
+	const char	*name;
+	int		version;
+	int		(*handle_irq)(struct pt_regs *);
+	void		(*disable_all)(void);
+	void		(*enable_all)(void);
+	void		(*enable)(struct hw_perf_counter *, int);
+	void		(*disable)(struct hw_perf_counter *, int);
+	unsigned	eventsel;
+	unsigned	perfctr;
+	u64		(*event_map)(int);
+	u64		(*raw_event)(u64);
+	int		max_events;
+	int		num_counters;
+	int		num_counters_fixed;
+	int		counter_bits;
+	u64		counter_mask;
+	u64		max_period;
+	u64		intel_ctrl;
+};
+
+static struct x86_pmu x86_pmu __read_mostly;
+
+static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
+	.enabled = 1,
+};
+
+/*
+ * Intel PerfMon v3. Used on Core2 and later.
+ */
+static const u64 intel_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
+};
+
+static u64 intel_pmu_event_map(int event)
+{
+	return intel_perfmon_event_map[event];
+}
+
+/*
+ * Generalized hw caching related event table, filled
+ * in on a per model basis. A value of 0 means
+ * 'not supported', -1 means 'event makes no sense on
+ * this CPU', any other value means the raw event
+ * ID.
+ */
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+static u64 __read_mostly hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+static const u64 nehalem_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
+		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
+		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
+		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
+		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
+		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
+		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
+		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static const u64 core2_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
+		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
+		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
+		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static const u64 atom_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static u64 intel_pmu_raw_event(u64 event)
+{
+#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
+#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
+#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
+#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
+#define CORE_EVNTSEL_COUNTER_MASK	0xFF000000ULL
+
+#define CORE_EVNTSEL_MASK		\
+	(CORE_EVNTSEL_EVENT_MASK |	\
+	 CORE_EVNTSEL_UNIT_MASK  |	\
+	 CORE_EVNTSEL_EDGE_MASK  |	\
+	 CORE_EVNTSEL_INV_MASK  |	\
+	 CORE_EVNTSEL_COUNTER_MASK)
+
+	return event & CORE_EVNTSEL_MASK;
+}
+
+static const u64 amd_0f_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
+		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
+		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
+		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+/*
+ * AMD Performance Monitor K7 and later.
+ */
+static const u64 amd_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+};
+
+static u64 amd_pmu_event_map(int event)
+{
+	return amd_perfmon_event_map[event];
+}
+
+static u64 amd_pmu_raw_event(u64 event)
+{
+#define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
+#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
+#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
+#define K7_EVNTSEL_INV_MASK	0x000800000ULL
+#define K7_EVNTSEL_COUNTER_MASK	0x0FF000000ULL
+
+#define K7_EVNTSEL_MASK			\
+	(K7_EVNTSEL_EVENT_MASK |	\
+	 K7_EVNTSEL_UNIT_MASK  |	\
+	 K7_EVNTSEL_EDGE_MASK  |	\
+	 K7_EVNTSEL_INV_MASK   |	\
+	 K7_EVNTSEL_COUNTER_MASK)
+
+	return event & K7_EVNTSEL_MASK;
+}
+
+/*
+ * Propagate counter elapsed time into the generic counter.
+ * Can only be executed on the CPU where the counter is active.
+ * Returns the delta events processed.
+ */
+static u64
+x86_perf_counter_update(struct perf_counter *counter,
+			struct hw_perf_counter *hwc, int idx)
+{
+	int shift = 64 - x86_pmu.counter_bits;
+	u64 prev_raw_count, new_raw_count;
+	s64 delta;
+
+	/*
+	 * Careful: an NMI might modify the previous counter value.
+	 *
+	 * Our tactic to handle this is to first atomically read and
+	 * exchange a new raw count - then add that new-prev delta
+	 * count to the generic counter atomically:
+	 */
+again:
+	prev_raw_count = atomic64_read(&hwc->prev_count);
+	rdmsrl(hwc->counter_base + idx, new_raw_count);
+
+	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+					new_raw_count) != prev_raw_count)
+		goto again;
+
+	/*
+	 * Now we have the new raw value and have updated the prev
+	 * timestamp already. We can now calculate the elapsed delta
+	 * (counter-)time and add that to the generic counter.
+	 *
+	 * Careful, not all hw sign-extends above the physical width
+	 * of the count.
+	 */
+	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+	delta >>= shift;
+
+	atomic64_add(delta, &counter->count);
+	atomic64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+static atomic_t active_counters;
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+static bool reserve_pmc_hardware(void)
+{
+	int i;
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		disable_lapic_nmi_watchdog();
+
+	for (i = 0; i < x86_pmu.num_counters; i++) {
+		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
+			goto perfctr_fail;
+	}
+
+	for (i = 0; i < x86_pmu.num_counters; i++) {
+		if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
+			goto eventsel_fail;
+	}
+
+	return true;
+
+eventsel_fail:
+	for (i--; i >= 0; i--)
+		release_evntsel_nmi(x86_pmu.eventsel + i);
+
+	i = x86_pmu.num_counters;
+
+perfctr_fail:
+	for (i--; i >= 0; i--)
+		release_perfctr_nmi(x86_pmu.perfctr + i);
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		enable_lapic_nmi_watchdog();
+
+	return false;
+}
+
+static void release_pmc_hardware(void)
+{
+	int i;
+
+	for (i = 0; i < x86_pmu.num_counters; i++) {
+		release_perfctr_nmi(x86_pmu.perfctr + i);
+		release_evntsel_nmi(x86_pmu.eventsel + i);
+	}
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		enable_lapic_nmi_watchdog();
+}
+
+static void hw_perf_counter_destroy(struct perf_counter *counter)
+{
+	if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
+		release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+static inline int x86_pmu_initialized(void)
+{
+	return x86_pmu.handle_irq != NULL;
+}
+
+static inline int
+set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	u64 config, val;
+
+	config = attr->config;
+
+	cache_type = (config >>  0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return -EINVAL;
+
+	cache_op = (config >>  8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return -EINVAL;
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	val = hw_cache_event_ids[cache_type][cache_op][cache_result];
+
+	if (val == 0)
+		return -ENOENT;
+
+	if (val == -1)
+		return -EINVAL;
+
+	hwc->config |= val;
+
+	return 0;
+}
+
+/*
+ * Setup the hardware configuration for a given attr_type
+ */
+static int __hw_perf_counter_init(struct perf_counter *counter)
+{
+	struct perf_counter_attr *attr = &counter->attr;
+	struct hw_perf_counter *hwc = &counter->hw;
+	int err;
+
+	if (!x86_pmu_initialized())
+		return -ENODEV;
+
+	err = 0;
+	if (!atomic_inc_not_zero(&active_counters)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware())
+			err = -EBUSY;
+		else
+			atomic_inc(&active_counters);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	if (err)
+		return err;
+
+	/*
+	 * Generate PMC IRQs:
+	 * (keep 'enabled' bit clear for now)
+	 */
+	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
+
+	/*
+	 * Count user and OS events unless requested not to.
+	 */
+	if (!attr->exclude_user)
+		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
+	if (!attr->exclude_kernel)
+		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
+
+	if (!hwc->sample_period) {
+		hwc->sample_period = x86_pmu.max_period;
+		hwc->last_period = hwc->sample_period;
+		atomic64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	counter->destroy = hw_perf_counter_destroy;
+
+	/*
+	 * Raw event type provide the config in the event structure
+	 */
+	if (attr->type == PERF_TYPE_RAW) {
+		hwc->config |= x86_pmu.raw_event(attr->config);
+		return 0;
+	}
+
+	if (attr->type == PERF_TYPE_HW_CACHE)
+		return set_ext_hw_attr(hwc, attr);
+
+	if (attr->config >= x86_pmu.max_events)
+		return -EINVAL;
+	/*
+	 * The generic map:
+	 */
+	hwc->config |= x86_pmu.event_map(attr->config);
+
+	return 0;
+}
+
+static void intel_pmu_disable_all(void)
+{
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+}
+
+static void amd_pmu_disable_all(void)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	int idx;
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+	/*
+	 * ensure we write the disable before we start disabling the
+	 * counters proper, so that amd_pmu_enable_counter() does the
+	 * right thing.
+	 */
+	barrier();
+
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+		u64 val;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
+		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
+			continue;
+		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+	}
+}
+
+void hw_perf_disable(void)
+{
+	if (!x86_pmu_initialized())
+		return;
+	return x86_pmu.disable_all();
+}
+
+static void intel_pmu_enable_all(void)
+{
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+}
+
+static void amd_pmu_enable_all(void)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	int idx;
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+		u64 val;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
+		if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
+			continue;
+		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+	}
+}
+
+void hw_perf_enable(void)
+{
+	if (!x86_pmu_initialized())
+		return;
+	x86_pmu.enable_all();
+}
+
+static inline u64 intel_pmu_get_status(void)
+{
+	u64 status;
+
+	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+
+	return status;
+}
+
+static inline void intel_pmu_ack_status(u64 ack)
+{
+	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+}
+
+static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+{
+	int err;
+	err = checking_wrmsrl(hwc->config_base + idx,
+			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
+}
+
+static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+{
+	int err;
+	err = checking_wrmsrl(hwc->config_base + idx,
+			      hwc->config);
+}
+
+static inline void
+intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
+{
+	int idx = __idx - X86_PMC_IDX_FIXED;
+	u64 ctrl_val, mask;
+	int err;
+
+	mask = 0xfULL << (idx * 4);
+
+	rdmsrl(hwc->config_base, ctrl_val);
+	ctrl_val &= ~mask;
+	err = checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static inline void
+intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+{
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+		intel_pmu_disable_fixed(hwc, idx);
+		return;
+	}
+
+	x86_pmu_disable_counter(hwc, idx);
+}
+
+static inline void
+amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+{
+	x86_pmu_disable_counter(hwc, idx);
+}
+
+static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
+
+/*
+ * Set the next IRQ period, based on the hwc->period_left value.
+ * To be called with the counter disabled in hw:
+ */
+static int
+x86_perf_counter_set_period(struct perf_counter *counter,
+			     struct hw_perf_counter *hwc, int idx)
+{
+	s64 left = atomic64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int err, ret = 0;
+
+	/*
+	 * If we are way outside a reasoable range then just skip forward:
+	 */
+	if (unlikely(left <= -period)) {
+		left = period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+	/*
+	 * Quirk: certain CPUs dont like it if just 1 event is left:
+	 */
+	if (unlikely(left < 2))
+		left = 2;
+
+	if (left > x86_pmu.max_period)
+		left = x86_pmu.max_period;
+
+	per_cpu(prev_left[idx], smp_processor_id()) = left;
+
+	/*
+	 * The hw counter starts counting from this counter offset,
+	 * mark it to be able to extra future deltas:
+	 */
+	atomic64_set(&hwc->prev_count, (u64)-left);
+
+	err = checking_wrmsrl(hwc->counter_base + idx,
+			     (u64)(-left) & x86_pmu.counter_mask);
+
+	return ret;
+}
+
+static inline void
+intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
+{
+	int idx = __idx - X86_PMC_IDX_FIXED;
+	u64 ctrl_val, bits, mask;
+	int err;
+
+	/*
+	 * Enable IRQ generation (0x8),
+	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
+	 * if requested:
+	 */
+	bits = 0x8ULL;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
+		bits |= 0x2;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+		bits |= 0x1;
+	bits <<= (idx * 4);
+	mask = 0xfULL << (idx * 4);
+
+	rdmsrl(hwc->config_base, ctrl_val);
+	ctrl_val &= ~mask;
+	ctrl_val |= bits;
+	err = checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+{
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+		intel_pmu_enable_fixed(hwc, idx);
+		return;
+	}
+
+	x86_pmu_enable_counter(hwc, idx);
+}
+
+static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+	if (cpuc->enabled)
+		x86_pmu_enable_counter(hwc, idx);
+	else
+		x86_pmu_disable_counter(hwc, idx);
+}
+
+static int
+fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
+{
+	unsigned int event;
+
+	if (!x86_pmu.num_counters_fixed)
+		return -1;
+
+	/*
+	 * Quirk, IA32_FIXED_CTRs do not work on current Atom processors:
+	 */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+					boot_cpu_data.x86_model == 28)
+		return -1;
+
+	event = hwc->config & ARCH_PERFMON_EVENT_MASK;
+
+	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
+		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
+	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
+		return X86_PMC_IDX_FIXED_CPU_CYCLES;
+	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
+		return X86_PMC_IDX_FIXED_BUS_CYCLES;
+
+	return -1;
+}
+
+/*
+ * Find a PMC slot for the freshly enabled / scheduled in counter:
+ */
+static int x86_pmu_enable(struct perf_counter *counter)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct hw_perf_counter *hwc = &counter->hw;
+	int idx;
+
+	idx = fixed_mode_idx(counter, hwc);
+	if (idx >= 0) {
+		/*
+		 * Try to get the fixed counter, if that is already taken
+		 * then try to get a generic counter:
+		 */
+		if (test_and_set_bit(idx, cpuc->used_mask))
+			goto try_generic;
+
+		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+		/*
+		 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
+		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
+		 */
+		hwc->counter_base =
+			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+		hwc->idx = idx;
+	} else {
+		idx = hwc->idx;
+		/* Try to get the previous generic counter again */
+		if (test_and_set_bit(idx, cpuc->used_mask)) {
+try_generic:
+			idx = find_first_zero_bit(cpuc->used_mask,
+						  x86_pmu.num_counters);
+			if (idx == x86_pmu.num_counters)
+				return -EAGAIN;
+
+			set_bit(idx, cpuc->used_mask);
+			hwc->idx = idx;
+		}
+		hwc->config_base  = x86_pmu.eventsel;
+		hwc->counter_base = x86_pmu.perfctr;
+	}
+
+	perf_counters_lapic_init();
+
+	x86_pmu.disable(hwc, idx);
+
+	cpuc->counters[idx] = counter;
+	set_bit(idx, cpuc->active_mask);
+
+	x86_perf_counter_set_period(counter, hwc, idx);
+	x86_pmu.enable(hwc, idx);
+
+	return 0;
+}
+
+static void x86_pmu_unthrottle(struct perf_counter *counter)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct hw_perf_counter *hwc = &counter->hw;
+
+	if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
+				cpuc->counters[hwc->idx] != counter))
+		return;
+
+	x86_pmu.enable(hwc, hwc->idx);
+}
+
+void perf_counter_print_debug(void)
+{
+	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
+	struct cpu_hw_counters *cpuc;
+	unsigned long flags;
+	int cpu, idx;
+
+	if (!x86_pmu.num_counters)
+		return;
+
+	local_irq_save(flags);
+
+	cpu = smp_processor_id();
+	cpuc = &per_cpu(cpu_hw_counters, cpu);
+
+	if (x86_pmu.version >= 2) {
+		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
+		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
+
+		pr_info("\n");
+		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
+		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
+		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
+		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
+	}
+	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
+
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
+		rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
+
+		prev_left = per_cpu(prev_left[idx], cpu);
+
+		pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
+			cpu, idx, pmc_ctrl);
+		pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
+			cpu, idx, pmc_count);
+		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
+			cpu, idx, prev_left);
+	}
+	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
+		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
+
+		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
+			cpu, idx, pmc_count);
+	}
+	local_irq_restore(flags);
+}
+
+static void x86_pmu_disable(struct perf_counter *counter)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct hw_perf_counter *hwc = &counter->hw;
+	int idx = hwc->idx;
+
+	/*
+	 * Must be done before we disable, otherwise the nmi handler
+	 * could reenable again:
+	 */
+	clear_bit(idx, cpuc->active_mask);
+	x86_pmu.disable(hwc, idx);
+
+	/*
+	 * Make sure the cleared pointer becomes visible before we
+	 * (potentially) free the counter:
+	 */
+	barrier();
+
+	/*
+	 * Drain the remaining delta count out of a counter
+	 * that we are disabling:
+	 */
+	x86_perf_counter_update(counter, hwc, idx);
+	cpuc->counters[idx] = NULL;
+	clear_bit(idx, cpuc->used_mask);
+}
+
+/*
+ * Save and restart an expired counter. Called by NMI contexts,
+ * so it has to be careful about preempting normal counter ops:
+ */
+static int intel_pmu_save_and_restart(struct perf_counter *counter)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	int idx = hwc->idx;
+	int ret;
+
+	x86_perf_counter_update(counter, hwc, idx);
+	ret = x86_perf_counter_set_period(counter, hwc, idx);
+
+	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
+		intel_pmu_enable_counter(hwc, idx);
+
+	return ret;
+}
+
+static void intel_pmu_reset(void)
+{
+	unsigned long flags;
+	int idx;
+
+	if (!x86_pmu.num_counters)
+		return;
+
+	local_irq_save(flags);
+
+	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
+
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
+		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
+	}
+	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
+		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
+	}
+
+	local_irq_restore(flags);
+}
+
+
+/*
+ * This handler is triggered by the local APIC, so the APIC IRQ handling
+ * rules apply:
+ */
+static int intel_pmu_handle_irq(struct pt_regs *regs)
+{
+	struct perf_sample_data data;
+	struct cpu_hw_counters *cpuc;
+	int bit, cpu, loops;
+	u64 ack, status;
+
+	data.regs = regs;
+	data.addr = 0;
+
+	cpu = smp_processor_id();
+	cpuc = &per_cpu(cpu_hw_counters, cpu);
+
+	perf_disable();
+	status = intel_pmu_get_status();
+	if (!status) {
+		perf_enable();
+		return 0;
+	}
+
+	loops = 0;
+again:
+	if (++loops > 100) {
+		WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
+		perf_counter_print_debug();
+		intel_pmu_reset();
+		perf_enable();
+		return 1;
+	}
+
+	inc_irq_stat(apic_perf_irqs);
+	ack = status;
+	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+		struct perf_counter *counter = cpuc->counters[bit];
+
+		clear_bit(bit, (unsigned long *) &status);
+		if (!test_bit(bit, cpuc->active_mask))
+			continue;
+
+		if (!intel_pmu_save_and_restart(counter))
+			continue;
+
+		if (perf_counter_overflow(counter, 1, &data))
+			intel_pmu_disable_counter(&counter->hw, bit);
+	}
+
+	intel_pmu_ack_status(ack);
+
+	/*
+	 * Repeat if there is more work to be done:
+	 */
+	status = intel_pmu_get_status();
+	if (status)
+		goto again;
+
+	perf_enable();
+
+	return 1;
+}
+
+static int amd_pmu_handle_irq(struct pt_regs *regs)
+{
+	struct perf_sample_data data;
+	struct cpu_hw_counters *cpuc;
+	struct perf_counter *counter;
+	struct hw_perf_counter *hwc;
+	int cpu, idx, handled = 0;
+	u64 val;
+
+	data.regs = regs;
+	data.addr = 0;
+
+	cpu = smp_processor_id();
+	cpuc = &per_cpu(cpu_hw_counters, cpu);
+
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		counter = cpuc->counters[idx];
+		hwc = &counter->hw;
+
+		val = x86_perf_counter_update(counter, hwc, idx);
+		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
+			continue;
+
+		/*
+		 * counter overflow
+		 */
+		handled		= 1;
+		data.period	= counter->hw.last_period;
+
+		if (!x86_perf_counter_set_period(counter, hwc, idx))
+			continue;
+
+		if (perf_counter_overflow(counter, 1, &data))
+			amd_pmu_disable_counter(hwc, idx);
+	}
+
+	if (handled)
+		inc_irq_stat(apic_perf_irqs);
+
+	return handled;
+}
+
+void smp_perf_pending_interrupt(struct pt_regs *regs)
+{
+	irq_enter();
+	ack_APIC_irq();
+	inc_irq_stat(apic_pending_irqs);
+	perf_counter_do_pending();
+	irq_exit();
+}
+
+void set_perf_counter_pending(void)
+{
+	apic->send_IPI_self(LOCAL_PENDING_VECTOR);
+}
+
+void perf_counters_lapic_init(void)
+{
+	if (!x86_pmu_initialized())
+		return;
+
+	/*
+	 * Always use NMI for PMU
+	 */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+}
+
+static int __kprobes
+perf_counter_nmi_handler(struct notifier_block *self,
+			 unsigned long cmd, void *__args)
+{
+	struct die_args *args = __args;
+	struct pt_regs *regs;
+
+	if (!atomic_read(&active_counters))
+		return NOTIFY_DONE;
+
+	switch (cmd) {
+	case DIE_NMI:
+	case DIE_NMI_IPI:
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	regs = args->regs;
+
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	/*
+	 * Can't rely on the handled return value to say it was our NMI, two
+	 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
+	 *
+	 * If the first NMI handles both, the latter will be empty and daze
+	 * the CPU.
+	 */
+	x86_pmu.handle_irq(regs);
+
+	return NOTIFY_STOP;
+}
+
+static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
+	.notifier_call		= perf_counter_nmi_handler,
+	.next			= NULL,
+	.priority		= 1
+};
+
+static struct x86_pmu intel_pmu = {
+	.name			= "Intel",
+	.handle_irq		= intel_pmu_handle_irq,
+	.disable_all		= intel_pmu_disable_all,
+	.enable_all		= intel_pmu_enable_all,
+	.enable			= intel_pmu_enable_counter,
+	.disable		= intel_pmu_disable_counter,
+	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
+	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
+	.event_map		= intel_pmu_event_map,
+	.raw_event		= intel_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
+	/*
+	 * Intel PMCs cannot be accessed sanely above 32 bit width,
+	 * so we install an artificial 1<<31 period regardless of
+	 * the generic counter period:
+	 */
+	.max_period		= (1ULL << 31) - 1,
+};
+
+static struct x86_pmu amd_pmu = {
+	.name			= "AMD",
+	.handle_irq		= amd_pmu_handle_irq,
+	.disable_all		= amd_pmu_disable_all,
+	.enable_all		= amd_pmu_enable_all,
+	.enable			= amd_pmu_enable_counter,
+	.disable		= amd_pmu_disable_counter,
+	.eventsel		= MSR_K7_EVNTSEL0,
+	.perfctr		= MSR_K7_PERFCTR0,
+	.event_map		= amd_pmu_event_map,
+	.raw_event		= amd_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
+	.num_counters		= 4,
+	.counter_bits		= 48,
+	.counter_mask		= (1ULL << 48) - 1,
+	/* use highest bit to detect overflow */
+	.max_period		= (1ULL << 47) - 1,
+};
+
+static int intel_pmu_init(void)
+{
+	union cpuid10_edx edx;
+	union cpuid10_eax eax;
+	unsigned int unused;
+	unsigned int ebx;
+	int version;
+
+	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+		return -ENODEV;
+
+	/*
+	 * Check whether the Architectural PerfMon supports
+	 * Branch Misses Retired Event or not.
+	 */
+	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
+	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
+		return -ENODEV;
+
+	version = eax.split.version_id;
+	if (version < 2)
+		return -ENODEV;
+
+	x86_pmu				= intel_pmu;
+	x86_pmu.version			= version;
+	x86_pmu.num_counters		= eax.split.num_counters;
+	x86_pmu.counter_bits		= eax.split.bit_width;
+	x86_pmu.counter_mask		= (1ULL << eax.split.bit_width) - 1;
+
+	/*
+	 * Quirk: v2 perfmon does not report fixed-purpose counters, so
+	 * assume at least 3 counters:
+	 */
+	x86_pmu.num_counters_fixed	= max((int)edx.split.num_counters_fixed, 3);
+
+	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+
+	/*
+	 * Install the hw-cache-events table:
+	 */
+	switch (boot_cpu_data.x86_model) {
+	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
+	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
+	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
+	case 29: /* six-core 45 nm xeon "Dunnington" */
+		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		pr_cont("Core2 events, ");
+		break;
+	default:
+	case 26:
+		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		pr_cont("Nehalem/Corei7 events, ");
+		break;
+	case 28:
+		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		pr_cont("Atom events, ");
+		break;
+	}
+	return 0;
+}
+
+static int amd_pmu_init(void)
+{
+	x86_pmu = amd_pmu;
+
+	switch (boot_cpu_data.x86) {
+	case 0x0f:
+	case 0x10:
+	case 0x11:
+		memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		pr_cont("AMD Family 0f/10/11 events, ");
+		break;
+	}
+	return 0;
+}
+
+void __init init_hw_perf_counters(void)
+{
+	int err;
+
+	pr_info("Performance Counters: ");
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_INTEL:
+		err = intel_pmu_init();
+		break;
+	case X86_VENDOR_AMD:
+		err = amd_pmu_init();
+		break;
+	default:
+		return;
+	}
+	if (err != 0) {
+		pr_cont("no PMU driver, software counters only.\n");
+		return;
+	}
+
+	pr_cont("%s PMU driver.\n", x86_pmu.name);
+
+	if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
+		x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
+		WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
+		     x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
+	}
+	perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
+	perf_max_counters = x86_pmu.num_counters;
+
+	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
+		x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
+		WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
+		     x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
+	}
+
+	perf_counter_mask |=
+		((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
+
+	perf_counters_lapic_init();
+	register_die_notifier(&perf_counter_nmi_notifier);
+
+	pr_info("... version:                 %d\n",     x86_pmu.version);
+	pr_info("... bit width:               %d\n",     x86_pmu.counter_bits);
+	pr_info("... generic counters:        %d\n",     x86_pmu.num_counters);
+	pr_info("... value mask:              %016Lx\n", x86_pmu.counter_mask);
+	pr_info("... max period:              %016Lx\n", x86_pmu.max_period);
+	pr_info("... fixed-purpose counters:  %d\n",     x86_pmu.num_counters_fixed);
+	pr_info("... counter mask:            %016Lx\n", perf_counter_mask);
+}
+
+static inline void x86_pmu_read(struct perf_counter *counter)
+{
+	x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
+}
+
+static const struct pmu pmu = {
+	.enable		= x86_pmu_enable,
+	.disable	= x86_pmu_disable,
+	.read		= x86_pmu_read,
+	.unthrottle	= x86_pmu_unthrottle,
+};
+
+const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
+{
+	int err;
+
+	err = __hw_perf_counter_init(counter);
+	if (err)
+		return ERR_PTR(err);
+
+	return &pmu;
+}
+
+/*
+ * callchain support
+ */
+
+static inline
+void callchain_store(struct perf_callchain_entry *entry, unsigned long ip)
+{
+	if (entry->nr < MAX_STACK_DEPTH)
+		entry->ip[entry->nr++] = ip;
+}
+
+static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
+static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
+
+
+static void
+backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
+{
+	/* Ignore warnings */
+}
+
+static void backtrace_warning(void *data, char *msg)
+{
+	/* Ignore warnings */
+}
+
+static int backtrace_stack(void *data, char *name)
+{
+	/* Don't bother with IRQ stacks for now */
+	return -1;
+}
+
+static void backtrace_address(void *data, unsigned long addr, int reliable)
+{
+	struct perf_callchain_entry *entry = data;
+
+	if (reliable)
+		callchain_store(entry, addr);
+}
+
+static const struct stacktrace_ops backtrace_ops = {
+	.warning		= backtrace_warning,
+	.warning_symbol		= backtrace_warning_symbol,
+	.stack			= backtrace_stack,
+	.address		= backtrace_address,
+};
+
+static void
+perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	unsigned long bp;
+	char *stack;
+	int nr = entry->nr;
+
+	callchain_store(entry, instruction_pointer(regs));
+
+	stack = ((char *)regs + sizeof(struct pt_regs));
+#ifdef CONFIG_FRAME_POINTER
+	bp = frame_pointer(regs);
+#else
+	bp = 0;
+#endif
+
+	dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry);
+
+	entry->kernel = entry->nr - nr;
+}
+
+
+struct stack_frame {
+	const void __user	*next_fp;
+	unsigned long		return_address;
+};
+
+static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+{
+	int ret;
+
+	if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
+		return 0;
+
+	ret = 1;
+	pagefault_disable();
+	if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
+		ret = 0;
+	pagefault_enable();
+
+	return ret;
+}
+
+static void
+perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	struct stack_frame frame;
+	const void __user *fp;
+	int nr = entry->nr;
+
+	regs = (struct pt_regs *)current->thread.sp0 - 1;
+	fp   = (void __user *)regs->bp;
+
+	callchain_store(entry, regs->ip);
+
+	while (entry->nr < MAX_STACK_DEPTH) {
+		frame.next_fp	     = NULL;
+		frame.return_address = 0;
+
+		if (!copy_stack_frame(fp, &frame))
+			break;
+
+		if ((unsigned long)fp < user_stack_pointer(regs))
+			break;
+
+		callchain_store(entry, frame.return_address);
+		fp = frame.next_fp;
+	}
+
+	entry->user = entry->nr - nr;
+}
+
+static void
+perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	int is_user;
+
+	if (!regs)
+		return;
+
+	is_user = user_mode(regs);
+
+	if (!current || current->pid == 0)
+		return;
+
+	if (is_user && current->state != TASK_RUNNING)
+		return;
+
+	if (!is_user)
+		perf_callchain_kernel(regs, entry);
+
+	if (current->mm)
+		perf_callchain_user(regs, entry);
+}
+
+struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+	struct perf_callchain_entry *entry;
+
+	if (in_nmi())
+		entry = &__get_cpu_var(nmi_entry);
+	else
+		entry = &__get_cpu_var(irq_entry);
+
+	entry->nr = 0;
+	entry->hv = 0;
+	entry->kernel = 0;
+	entry->user = 0;
+
+	perf_do_callchain(regs, entry);
+
+	return entry;
+}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index f6c70a1..d6f5b9f 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -19,8 +19,8 @@
 #include <linux/nmi.h>
 #include <linux/kprobes.h>
 
-#include <asm/genapic.h>
-#include <asm/intel_arch_perfmon.h>
+#include <asm/apic.h>
+#include <asm/perf_counter.h>
 
 struct nmi_watchdog_ctlblk {
 	unsigned int cccr_msr;
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 87b67e3..48bfe13 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -19,45 +19,61 @@
  * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
  */
 
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/trace_clock.h>
 
 #include <asm/ds.h>
 
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-
+#include "ds_selftest.h"
 
 /*
- * The configuration for a particular DS hardware implementation.
+ * The configuration for a particular DS hardware implementation:
  */
 struct ds_configuration {
-	/* the name of the configuration */
-	const char *name;
-	/* the size of one pointer-typed field in the DS structure and
-	   in the BTS and PEBS buffers in bytes;
-	   this covers the first 8 DS fields related to buffer management. */
-	unsigned char  sizeof_field;
-	/* the size of a BTS/PEBS record in bytes */
-	unsigned char  sizeof_rec[2];
-	/* a series of bit-masks to control various features indexed
-	 * by enum ds_feature */
-	unsigned long ctl[dsf_ctl_max];
+	/* The name of the configuration: */
+	const char		*name;
+
+	/* The size of pointer-typed fields in DS, BTS, and PEBS: */
+	unsigned char		sizeof_ptr_field;
+
+	/* The size of a BTS/PEBS record in bytes: */
+	unsigned char		sizeof_rec[2];
+
+	/* The number of pebs counter reset values in the DS structure. */
+	unsigned char		nr_counter_reset;
+
+	/* Control bit-masks indexed by enum ds_feature: */
+	unsigned long		ctl[dsf_ctl_max];
 };
-static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
+static struct ds_configuration ds_cfg __read_mostly;
 
-#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
 
-#define MAX_SIZEOF_DS (12 * 8)	/* maximal size of a DS configuration */
-#define MAX_SIZEOF_BTS (3 * 8)	/* maximal size of a BTS record */
-#define DS_ALIGNMENT (1 << 3)	/* BTS and PEBS buffer alignment */
+/* Maximal size of a DS configuration: */
+#define MAX_SIZEOF_DS		0x80
 
-#define BTS_CONTROL \
- (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
-  ds_cfg.ctl[dsf_bts_overflow])
+/* Maximal size of a BTS record: */
+#define MAX_SIZEOF_BTS		(3 * 8)
 
+/* BTS and PEBS buffer alignment: */
+#define DS_ALIGNMENT		(1 << 3)
+
+/* Number of buffer pointers in DS: */
+#define NUM_DS_PTR_FIELDS	8
+
+/* Size of a pebs reset value in DS: */
+#define PEBS_RESET_FIELD_SIZE	8
+
+/* Mask of control bits in the DS MSR register: */
+#define BTS_CONTROL				  \
+	( ds_cfg.ctl[dsf_bts]			| \
+	  ds_cfg.ctl[dsf_bts_kernel]		| \
+	  ds_cfg.ctl[dsf_bts_user]		| \
+	  ds_cfg.ctl[dsf_bts_overflow] )
 
 /*
  * A BTS or PEBS tracer.
@@ -66,29 +82,36 @@
  * to identify tracers.
  */
 struct ds_tracer {
-	/* the DS context (partially) owned by this tracer */
-	struct ds_context *context;
-	/* the buffer provided on ds_request() and its size in bytes */
-	void *buffer;
-	size_t size;
+	/* The DS context (partially) owned by this tracer. */
+	struct ds_context	*context;
+	/* The buffer provided on ds_request() and its size in bytes. */
+	void			*buffer;
+	size_t			size;
 };
 
 struct bts_tracer {
-	/* the common DS part */
-	struct ds_tracer ds;
-	/* the trace including the DS configuration */
-	struct bts_trace trace;
-	/* buffer overflow notification function */
-	bts_ovfl_callback_t ovfl;
+	/* The common DS part: */
+	struct ds_tracer	ds;
+
+	/* The trace including the DS configuration: */
+	struct bts_trace	trace;
+
+	/* Buffer overflow notification function: */
+	bts_ovfl_callback_t	ovfl;
+
+	/* Active flags affecting trace collection. */
+	unsigned int		flags;
 };
 
 struct pebs_tracer {
-	/* the common DS part */
-	struct ds_tracer ds;
-	/* the trace including the DS configuration */
-	struct pebs_trace trace;
-	/* buffer overflow notification function */
-	pebs_ovfl_callback_t ovfl;
+	/* The common DS part: */
+	struct ds_tracer	ds;
+
+	/* The trace including the DS configuration: */
+	struct pebs_trace	trace;
+
+	/* Buffer overflow notification function: */
+	pebs_ovfl_callback_t	ovfl;
 };
 
 /*
@@ -97,6 +120,7 @@
  *
  * The DS configuration consists of the following fields; different
  * architetures vary in the size of those fields.
+ *
  * - double-word aligned base linear address of the BTS buffer
  * - write pointer into the BTS buffer
  * - end linear address of the BTS buffer (one byte beyond the end of
@@ -135,21 +159,22 @@
 };
 
 enum ds_qualifier {
-	ds_bts  = 0,
+	ds_bts = 0,
 	ds_pebs
 };
 
-static inline unsigned long ds_get(const unsigned char *base,
-				   enum ds_qualifier qual, enum ds_field field)
+static inline unsigned long
+ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
 {
-	base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+	base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
 	return *(unsigned long *)base;
 }
 
-static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
-			  enum ds_field field, unsigned long value)
+static inline void
+ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
+       unsigned long value)
 {
-	base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+	base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
 	(*(unsigned long *)base) = value;
 }
 
@@ -159,7 +184,6 @@
  */
 static DEFINE_SPINLOCK(ds_lock);
 
-
 /*
  * We either support (system-wide) per-cpu or per-thread allocation.
  * We distinguish the two based on the task_struct pointer, where a
@@ -178,12 +202,28 @@
  */
 static atomic_t tracers = ATOMIC_INIT(0);
 
-static inline void get_tracer(struct task_struct *task)
+static inline int get_tracer(struct task_struct *task)
 {
-	if (task)
+	int error;
+
+	spin_lock_irq(&ds_lock);
+
+	if (task) {
+		error = -EPERM;
+		if (atomic_read(&tracers) < 0)
+			goto out;
 		atomic_inc(&tracers);
-	else
+	} else {
+		error = -EPERM;
+		if (atomic_read(&tracers) > 0)
+			goto out;
 		atomic_dec(&tracers);
+	}
+
+	error = 0;
+out:
+	spin_unlock_irq(&ds_lock);
+	return error;
 }
 
 static inline void put_tracer(struct task_struct *task)
@@ -194,14 +234,6 @@
 		atomic_inc(&tracers);
 }
 
-static inline int check_tracer(struct task_struct *task)
-{
-	return task ?
-		(atomic_read(&tracers) >= 0) :
-		(atomic_read(&tracers) <= 0);
-}
-
-
 /*
  * The DS context is either attached to a thread or to a cpu:
  * - in the former case, the thread_struct contains a pointer to the
@@ -213,61 +245,58 @@
  * deallocated when the last user puts the context.
  */
 struct ds_context {
-	/* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
-	unsigned char ds[MAX_SIZEOF_DS];
-	/* the owner of the BTS and PEBS configuration, respectively */
-	struct bts_tracer *bts_master;
-	struct pebs_tracer *pebs_master;
-	/* use count */
-	unsigned long count;
-	/* a pointer to the context location inside the thread_struct
-	 * or the per_cpu context array */
-	struct ds_context **this;
-	/* a pointer to the task owning this context, or NULL, if the
-	 * context is owned by a cpu */
-	struct task_struct *task;
+	/* The DS configuration; goes into MSR_IA32_DS_AREA: */
+	unsigned char		ds[MAX_SIZEOF_DS];
+
+	/* The owner of the BTS and PEBS configuration, respectively: */
+	struct bts_tracer	*bts_master;
+	struct pebs_tracer	*pebs_master;
+
+	/* Use count: */
+	unsigned long		count;
+
+	/* Pointer to the context pointer field: */
+	struct ds_context	**this;
+
+	/* The traced task; NULL for cpu tracing: */
+	struct task_struct	*task;
+
+	/* The traced cpu; only valid if task is NULL: */
+	int			cpu;
 };
 
-static DEFINE_PER_CPU(struct ds_context *, system_context_array);
-
-#define system_context per_cpu(system_context_array, smp_processor_id())
+static DEFINE_PER_CPU(struct ds_context *, cpu_context);
 
 
-static inline struct ds_context *ds_get_context(struct task_struct *task)
+static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
 {
 	struct ds_context **p_context =
-		(task ? &task->thread.ds_ctx : &system_context);
+		(task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu));
 	struct ds_context *context = NULL;
 	struct ds_context *new_context = NULL;
-	unsigned long irq;
 
 	/* Chances are small that we already have a context. */
 	new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
 	if (!new_context)
 		return NULL;
 
-	spin_lock_irqsave(&ds_lock, irq);
+	spin_lock_irq(&ds_lock);
 
 	context = *p_context;
-	if (!context) {
+	if (likely(!context)) {
 		context = new_context;
 
 		context->this = p_context;
 		context->task = task;
+		context->cpu = cpu;
 		context->count = 0;
 
-		if (task)
-			set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
-
-		if (!task || (task == current))
-			wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
-
 		*p_context = context;
 	}
 
 	context->count++;
 
-	spin_unlock_irqrestore(&ds_lock, irq);
+	spin_unlock_irq(&ds_lock);
 
 	if (context != new_context)
 		kfree(new_context);
@@ -275,8 +304,9 @@
 	return context;
 }
 
-static inline void ds_put_context(struct ds_context *context)
+static void ds_put_context(struct ds_context *context)
 {
+	struct task_struct *task;
 	unsigned long irq;
 
 	if (!context)
@@ -291,17 +321,55 @@
 
 	*(context->this) = NULL;
 
-	if (context->task)
-		clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
+	task = context->task;
 
-	if (!context->task || (context->task == current))
-		wrmsrl(MSR_IA32_DS_AREA, 0);
+	if (task)
+		clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
+
+	/*
+	 * We leave the (now dangling) pointer to the DS configuration in
+	 * the DS_AREA msr. This is as good or as bad as replacing it with
+	 * NULL - the hardware would crash if we enabled tracing.
+	 *
+	 * This saves us some problems with having to write an msr on a
+	 * different cpu while preventing others from doing the same for the
+	 * next context for that same cpu.
+	 */
 
 	spin_unlock_irqrestore(&ds_lock, irq);
 
+	/* The context might still be in use for context switching. */
+	if (task && (task != current))
+		wait_task_context_switch(task);
+
 	kfree(context);
 }
 
+static void ds_install_ds_area(struct ds_context *context)
+{
+	unsigned long ds;
+
+	ds = (unsigned long)context->ds;
+
+	/*
+	 * There is a race between the bts master and the pebs master.
+	 *
+	 * The thread/cpu access is synchronized via get/put_cpu() for
+	 * task tracing and via wrmsr_on_cpu for cpu tracing.
+	 *
+	 * If bts and pebs are collected for the same task or same cpu,
+	 * the same confiuration is written twice.
+	 */
+	if (context->task) {
+		get_cpu();
+		if (context->task == current)
+			wrmsrl(MSR_IA32_DS_AREA, ds);
+		set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
+		put_cpu();
+	} else
+		wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
+			     (u32)((u64)ds), (u32)((u64)ds >> 32));
+}
 
 /*
  * Call the tracer's callback on a buffer overflow.
@@ -332,9 +400,9 @@
  * The remainder of any partially written record is zeroed out.
  *
  * context: the DS context
- * qual: the buffer type
- * record: the data to write
- * size: the size of the data
+ * qual:    the buffer type
+ * record:  the data to write
+ * size:    the size of the data
  */
 static int ds_write(struct ds_context *context, enum ds_qualifier qual,
 		    const void *record, size_t size)
@@ -349,14 +417,14 @@
 		unsigned long write_size, adj_write_size;
 
 		/*
-		 * write as much as possible without producing an
+		 * Write as much as possible without producing an
 		 * overflow interrupt.
 		 *
-		 * interrupt_threshold must either be
+		 * Interrupt_threshold must either be
 		 * - bigger than absolute_maximum or
 		 * - point to a record between buffer_base and absolute_maximum
 		 *
-		 * index points to a valid record.
+		 * Index points to a valid record.
 		 */
 		base   = ds_get(context->ds, qual, ds_buffer_base);
 		index  = ds_get(context->ds, qual, ds_index);
@@ -365,8 +433,10 @@
 
 		write_end = min(end, int_th);
 
-		/* if we are already beyond the interrupt threshold,
-		 * we fill the entire buffer */
+		/*
+		 * If we are already beyond the interrupt threshold,
+		 * we fill the entire buffer.
+		 */
 		if (write_end <= index)
 			write_end = end;
 
@@ -383,7 +453,7 @@
 		adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
 		adj_write_size *= ds_cfg.sizeof_rec[qual];
 
-		/* zero out trailing bytes */
+		/* Zero out trailing bytes. */
 		memset((char *)index + write_size, 0,
 		       adj_write_size - write_size);
 		index += adj_write_size;
@@ -410,7 +480,7 @@
  * Later architectures use 64bit pointers throughout, whereas earlier
  * architectures use 32bit pointers in 32bit mode.
  *
- * We compute the base address for the first 8 fields based on:
+ * We compute the base address for the fields based on:
  * - the field size stored in the DS configuration
  * - the relative field position
  *
@@ -431,23 +501,23 @@
 	bts_to,
 	bts_flags,
 
-	bts_qual = bts_from,
-	bts_jiffies = bts_to,
-	bts_pid = bts_flags,
+	bts_qual		= bts_from,
+	bts_clock		= bts_to,
+	bts_pid			= bts_flags,
 
-	bts_qual_mask = (bts_qual_max - 1),
-	bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
+	bts_qual_mask		= (bts_qual_max - 1),
+	bts_escape		= ((unsigned long)-1 & ~bts_qual_mask)
 };
 
 static inline unsigned long bts_get(const char *base, enum bts_field field)
 {
-	base += (ds_cfg.sizeof_field * field);
+	base += (ds_cfg.sizeof_ptr_field * field);
 	return *(unsigned long *)base;
 }
 
 static inline void bts_set(char *base, enum bts_field field, unsigned long val)
 {
-	base += (ds_cfg.sizeof_field * field);;
+	base += (ds_cfg.sizeof_ptr_field * field);;
 	(*(unsigned long *)base) = val;
 }
 
@@ -463,8 +533,8 @@
  *
  * return: bytes read/written on success; -Eerrno, otherwise
  */
-static int bts_read(struct bts_tracer *tracer, const void *at,
-		    struct bts_struct *out)
+static int
+bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
 {
 	if (!tracer)
 		return -EINVAL;
@@ -478,8 +548,8 @@
 	memset(out, 0, sizeof(*out));
 	if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
 		out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
-		out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
-		out->variant.timestamp.pid = bts_get(at, bts_pid);
+		out->variant.event.clock = bts_get(at, bts_clock);
+		out->variant.event.pid = bts_get(at, bts_pid);
 	} else {
 		out->qualifier = bts_branch;
 		out->variant.lbr.from = bts_get(at, bts_from);
@@ -516,8 +586,8 @@
 	case bts_task_arrives:
 	case bts_task_departs:
 		bts_set(raw, bts_qual, (bts_escape | in->qualifier));
-		bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
-		bts_set(raw, bts_pid, in->variant.timestamp.pid);
+		bts_set(raw, bts_clock, in->variant.event.clock);
+		bts_set(raw, bts_pid, in->variant.event.pid);
 		break;
 	default:
 		return -EINVAL;
@@ -555,7 +625,8 @@
 			     unsigned int flags) {
 	unsigned long buffer, adj;
 
-	/* adjust the buffer address and size to meet alignment
+	/*
+	 * Adjust the buffer address and size to meet alignment
 	 * constraints:
 	 * - buffer is double-word aligned
 	 * - size is multiple of record size
@@ -577,9 +648,11 @@
 	trace->begin = (void *)buffer;
 	trace->top = trace->begin;
 	trace->end = (void *)(buffer + size);
-	/* The value for 'no threshold' is -1, which will set the
+	/*
+	 * The value for 'no threshold' is -1, which will set the
 	 * threshold outside of the buffer, just like we want it.
 	 */
+	ith *= ds_cfg.sizeof_rec[qual];
 	trace->ith = (void *)(buffer + size - ith);
 
 	trace->flags = flags;
@@ -588,18 +661,27 @@
 
 static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
 		      enum ds_qualifier qual, struct task_struct *task,
-		      void *base, size_t size, size_t th, unsigned int flags)
+		      int cpu, void *base, size_t size, size_t th)
 {
 	struct ds_context *context;
 	int error;
+	size_t req_size;
+
+	error = -EOPNOTSUPP;
+	if (!ds_cfg.sizeof_rec[qual])
+		goto out;
 
 	error = -EINVAL;
 	if (!base)
 		goto out;
 
-	/* we require some space to do alignment adjustments below */
+	req_size = ds_cfg.sizeof_rec[qual];
+	/* We might need space for alignment adjustments. */
+	if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT))
+		req_size += DS_ALIGNMENT;
+
 	error = -EINVAL;
-	if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
+	if (size < req_size)
 		goto out;
 
 	if (th != (size_t)-1) {
@@ -614,182 +696,318 @@
 	tracer->size = size;
 
 	error = -ENOMEM;
-	context = ds_get_context(task);
+	context = ds_get_context(task, cpu);
 	if (!context)
 		goto out;
 	tracer->context = context;
 
-	ds_init_ds_trace(trace, qual, base, size, th, flags);
+	/*
+	 * Defer any tracer-specific initialization work for the context until
+	 * context ownership has been clarified.
+	 */
 
 	error = 0;
  out:
 	return error;
 }
 
-struct bts_tracer *ds_request_bts(struct task_struct *task,
-				  void *base, size_t size,
-				  bts_ovfl_callback_t ovfl, size_t th,
-				  unsigned int flags)
+static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
+					 void *base, size_t size,
+					 bts_ovfl_callback_t ovfl, size_t th,
+					 unsigned int flags)
 {
 	struct bts_tracer *tracer;
-	unsigned long irq;
 	int error;
 
-	error = -EOPNOTSUPP;
-	if (!ds_cfg.ctl[dsf_bts])
-		goto out;
-
-	/* buffer overflow notification is not yet implemented */
+	/* Buffer overflow notification is not yet implemented. */
 	error = -EOPNOTSUPP;
 	if (ovfl)
 		goto out;
 
+	error = get_tracer(task);
+	if (error < 0)
+		goto out;
+
 	error = -ENOMEM;
 	tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
 	if (!tracer)
-		goto out;
+		goto out_put_tracer;
 	tracer->ovfl = ovfl;
 
+	/* Do some more error checking and acquire a tracing context. */
 	error = ds_request(&tracer->ds, &tracer->trace.ds,
-			   ds_bts, task, base, size, th, flags);
+			   ds_bts, task, cpu, base, size, th);
 	if (error < 0)
 		goto out_tracer;
 
-
-	spin_lock_irqsave(&ds_lock, irq);
-
-	error = -EPERM;
-	if (!check_tracer(task))
-		goto out_unlock;
-	get_tracer(task);
+	/* Claim the bts part of the tracing context we acquired above. */
+	spin_lock_irq(&ds_lock);
 
 	error = -EPERM;
 	if (tracer->ds.context->bts_master)
-		goto out_put_tracer;
+		goto out_unlock;
 	tracer->ds.context->bts_master = tracer;
 
-	spin_unlock_irqrestore(&ds_lock, irq);
+	spin_unlock_irq(&ds_lock);
 
+	/*
+	 * Now that we own the bts part of the context, let's complete the
+	 * initialization for that part.
+	 */
+	ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
+	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+	ds_install_ds_area(tracer->ds.context);
 
 	tracer->trace.read  = bts_read;
 	tracer->trace.write = bts_write;
 
-	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+	/* Start tracing. */
 	ds_resume_bts(tracer);
 
 	return tracer;
 
- out_put_tracer:
-	put_tracer(task);
  out_unlock:
-	spin_unlock_irqrestore(&ds_lock, irq);
+	spin_unlock_irq(&ds_lock);
 	ds_put_context(tracer->ds.context);
  out_tracer:
 	kfree(tracer);
+ out_put_tracer:
+	put_tracer(task);
  out:
 	return ERR_PTR(error);
 }
 
-struct pebs_tracer *ds_request_pebs(struct task_struct *task,
-				    void *base, size_t size,
-				    pebs_ovfl_callback_t ovfl, size_t th,
-				    unsigned int flags)
+struct bts_tracer *ds_request_bts_task(struct task_struct *task,
+				       void *base, size_t size,
+				       bts_ovfl_callback_t ovfl,
+				       size_t th, unsigned int flags)
+{
+	return ds_request_bts(task, 0, base, size, ovfl, th, flags);
+}
+
+struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
+				      bts_ovfl_callback_t ovfl,
+				      size_t th, unsigned int flags)
+{
+	return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
+}
+
+static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
+					   void *base, size_t size,
+					   pebs_ovfl_callback_t ovfl, size_t th,
+					   unsigned int flags)
 {
 	struct pebs_tracer *tracer;
-	unsigned long irq;
 	int error;
 
-	/* buffer overflow notification is not yet implemented */
+	/* Buffer overflow notification is not yet implemented. */
 	error = -EOPNOTSUPP;
 	if (ovfl)
 		goto out;
 
+	error = get_tracer(task);
+	if (error < 0)
+		goto out;
+
 	error = -ENOMEM;
 	tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
 	if (!tracer)
-		goto out;
+		goto out_put_tracer;
 	tracer->ovfl = ovfl;
 
+	/* Do some more error checking and acquire a tracing context. */
 	error = ds_request(&tracer->ds, &tracer->trace.ds,
-			   ds_pebs, task, base, size, th, flags);
+			   ds_pebs, task, cpu, base, size, th);
 	if (error < 0)
 		goto out_tracer;
 
-	spin_lock_irqsave(&ds_lock, irq);
-
-	error = -EPERM;
-	if (!check_tracer(task))
-		goto out_unlock;
-	get_tracer(task);
+	/* Claim the pebs part of the tracing context we acquired above. */
+	spin_lock_irq(&ds_lock);
 
 	error = -EPERM;
 	if (tracer->ds.context->pebs_master)
-		goto out_put_tracer;
+		goto out_unlock;
 	tracer->ds.context->pebs_master = tracer;
 
-	spin_unlock_irqrestore(&ds_lock, irq);
+	spin_unlock_irq(&ds_lock);
 
+	/*
+	 * Now that we own the pebs part of the context, let's complete the
+	 * initialization for that part.
+	 */
+	ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
 	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
+	ds_install_ds_area(tracer->ds.context);
+
+	/* Start tracing. */
 	ds_resume_pebs(tracer);
 
 	return tracer;
 
- out_put_tracer:
-	put_tracer(task);
  out_unlock:
-	spin_unlock_irqrestore(&ds_lock, irq);
+	spin_unlock_irq(&ds_lock);
 	ds_put_context(tracer->ds.context);
  out_tracer:
 	kfree(tracer);
+ out_put_tracer:
+	put_tracer(task);
  out:
 	return ERR_PTR(error);
 }
 
-void ds_release_bts(struct bts_tracer *tracer)
+struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
+					 void *base, size_t size,
+					 pebs_ovfl_callback_t ovfl,
+					 size_t th, unsigned int flags)
 {
-	if (!tracer)
-		return;
+	return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
+}
 
-	ds_suspend_bts(tracer);
+struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
+					pebs_ovfl_callback_t ovfl,
+					size_t th, unsigned int flags)
+{
+	return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
+}
+
+static void ds_free_bts(struct bts_tracer *tracer)
+{
+	struct task_struct *task;
+
+	task = tracer->ds.context->task;
 
 	WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
 	tracer->ds.context->bts_master = NULL;
 
-	put_tracer(tracer->ds.context->task);
+	/* Make sure tracing stopped and the tracer is not in use. */
+	if (task && (task != current))
+		wait_task_context_switch(task);
+
 	ds_put_context(tracer->ds.context);
+	put_tracer(task);
 
 	kfree(tracer);
 }
 
+void ds_release_bts(struct bts_tracer *tracer)
+{
+	might_sleep();
+
+	if (!tracer)
+		return;
+
+	ds_suspend_bts(tracer);
+	ds_free_bts(tracer);
+}
+
+int ds_release_bts_noirq(struct bts_tracer *tracer)
+{
+	struct task_struct *task;
+	unsigned long irq;
+	int error;
+
+	if (!tracer)
+		return 0;
+
+	task = tracer->ds.context->task;
+
+	local_irq_save(irq);
+
+	error = -EPERM;
+	if (!task &&
+	    (tracer->ds.context->cpu != smp_processor_id()))
+		goto out;
+
+	error = -EPERM;
+	if (task && (task != current))
+		goto out;
+
+	ds_suspend_bts_noirq(tracer);
+	ds_free_bts(tracer);
+
+	error = 0;
+ out:
+	local_irq_restore(irq);
+	return error;
+}
+
+static void update_task_debugctlmsr(struct task_struct *task,
+				    unsigned long debugctlmsr)
+{
+	task->thread.debugctlmsr = debugctlmsr;
+
+	get_cpu();
+	if (task == current)
+		update_debugctlmsr(debugctlmsr);
+	put_cpu();
+}
+
 void ds_suspend_bts(struct bts_tracer *tracer)
 {
 	struct task_struct *task;
+	unsigned long debugctlmsr;
+	int cpu;
 
 	if (!tracer)
 		return;
 
+	tracer->flags = 0;
+
 	task = tracer->ds.context->task;
+	cpu  = tracer->ds.context->cpu;
 
-	if (!task || (task == current))
-		update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
+	WARN_ON(!task && irqs_disabled());
 
-	if (task) {
-		task->thread.debugctlmsr &= ~BTS_CONTROL;
+	debugctlmsr = (task ?
+		       task->thread.debugctlmsr :
+		       get_debugctlmsr_on_cpu(cpu));
+	debugctlmsr &= ~BTS_CONTROL;
 
-		if (!task->thread.debugctlmsr)
-			clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
-	}
+	if (task)
+		update_task_debugctlmsr(task, debugctlmsr);
+	else
+		update_debugctlmsr_on_cpu(cpu, debugctlmsr);
 }
 
-void ds_resume_bts(struct bts_tracer *tracer)
+int ds_suspend_bts_noirq(struct bts_tracer *tracer)
 {
 	struct task_struct *task;
-	unsigned long control;
+	unsigned long debugctlmsr, irq;
+	int cpu, error = 0;
 
 	if (!tracer)
-		return;
+		return 0;
+
+	tracer->flags = 0;
 
 	task = tracer->ds.context->task;
+	cpu  = tracer->ds.context->cpu;
+
+	local_irq_save(irq);
+
+	error = -EPERM;
+	if (!task && (cpu != smp_processor_id()))
+		goto out;
+
+	debugctlmsr = (task ?
+		       task->thread.debugctlmsr :
+		       get_debugctlmsr());
+	debugctlmsr &= ~BTS_CONTROL;
+
+	if (task)
+		update_task_debugctlmsr(task, debugctlmsr);
+	else
+		update_debugctlmsr(debugctlmsr);
+
+	error = 0;
+ out:
+	local_irq_restore(irq);
+	return error;
+}
+
+static unsigned long ds_bts_control(struct bts_tracer *tracer)
+{
+	unsigned long control;
 
 	control = ds_cfg.ctl[dsf_bts];
 	if (!(tracer->trace.ds.flags & BTS_KERNEL))
@@ -797,41 +1015,149 @@
 	if (!(tracer->trace.ds.flags & BTS_USER))
 		control |= ds_cfg.ctl[dsf_bts_user];
 
-	if (task) {
-		task->thread.debugctlmsr |= control;
-		set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
-	}
-
-	if (!task || (task == current))
-		update_debugctlmsr(get_debugctlmsr() | control);
+	return control;
 }
 
-void ds_release_pebs(struct pebs_tracer *tracer)
+void ds_resume_bts(struct bts_tracer *tracer)
 {
+	struct task_struct *task;
+	unsigned long debugctlmsr;
+	int cpu;
+
 	if (!tracer)
 		return;
 
-	ds_suspend_pebs(tracer);
+	tracer->flags = tracer->trace.ds.flags;
+
+	task = tracer->ds.context->task;
+	cpu  = tracer->ds.context->cpu;
+
+	WARN_ON(!task && irqs_disabled());
+
+	debugctlmsr = (task ?
+		       task->thread.debugctlmsr :
+		       get_debugctlmsr_on_cpu(cpu));
+	debugctlmsr |= ds_bts_control(tracer);
+
+	if (task)
+		update_task_debugctlmsr(task, debugctlmsr);
+	else
+		update_debugctlmsr_on_cpu(cpu, debugctlmsr);
+}
+
+int ds_resume_bts_noirq(struct bts_tracer *tracer)
+{
+	struct task_struct *task;
+	unsigned long debugctlmsr, irq;
+	int cpu, error = 0;
+
+	if (!tracer)
+		return 0;
+
+	tracer->flags = tracer->trace.ds.flags;
+
+	task = tracer->ds.context->task;
+	cpu  = tracer->ds.context->cpu;
+
+	local_irq_save(irq);
+
+	error = -EPERM;
+	if (!task && (cpu != smp_processor_id()))
+		goto out;
+
+	debugctlmsr = (task ?
+		       task->thread.debugctlmsr :
+		       get_debugctlmsr());
+	debugctlmsr |= ds_bts_control(tracer);
+
+	if (task)
+		update_task_debugctlmsr(task, debugctlmsr);
+	else
+		update_debugctlmsr(debugctlmsr);
+
+	error = 0;
+ out:
+	local_irq_restore(irq);
+	return error;
+}
+
+static void ds_free_pebs(struct pebs_tracer *tracer)
+{
+	struct task_struct *task;
+
+	task = tracer->ds.context->task;
 
 	WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
 	tracer->ds.context->pebs_master = NULL;
 
-	put_tracer(tracer->ds.context->task);
 	ds_put_context(tracer->ds.context);
+	put_tracer(task);
 
 	kfree(tracer);
 }
 
+void ds_release_pebs(struct pebs_tracer *tracer)
+{
+	might_sleep();
+
+	if (!tracer)
+		return;
+
+	ds_suspend_pebs(tracer);
+	ds_free_pebs(tracer);
+}
+
+int ds_release_pebs_noirq(struct pebs_tracer *tracer)
+{
+	struct task_struct *task;
+	unsigned long irq;
+	int error;
+
+	if (!tracer)
+		return 0;
+
+	task = tracer->ds.context->task;
+
+	local_irq_save(irq);
+
+	error = -EPERM;
+	if (!task &&
+	    (tracer->ds.context->cpu != smp_processor_id()))
+		goto out;
+
+	error = -EPERM;
+	if (task && (task != current))
+		goto out;
+
+	ds_suspend_pebs_noirq(tracer);
+	ds_free_pebs(tracer);
+
+	error = 0;
+ out:
+	local_irq_restore(irq);
+	return error;
+}
+
 void ds_suspend_pebs(struct pebs_tracer *tracer)
 {
 
 }
 
+int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
+{
+	return 0;
+}
+
 void ds_resume_pebs(struct pebs_tracer *tracer)
 {
 
 }
 
+int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
+{
+	return 0;
+}
+
 const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
 {
 	if (!tracer)
@@ -847,8 +1173,12 @@
 		return NULL;
 
 	ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
-	tracer->trace.reset_value =
-		*(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
+
+	tracer->trace.counters = ds_cfg.nr_counter_reset;
+	memcpy(tracer->trace.counter_reset,
+	       tracer->ds.context->ds +
+	       (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field),
+	       ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE);
 
 	return &tracer->trace;
 }
@@ -873,18 +1203,24 @@
 
 	tracer->trace.ds.top = tracer->trace.ds.begin;
 
-	ds_set(tracer->ds.context->ds, ds_bts, ds_index,
+	ds_set(tracer->ds.context->ds, ds_pebs, ds_index,
 	       (unsigned long)tracer->trace.ds.top);
 
 	return 0;
 }
 
-int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
+int ds_set_pebs_reset(struct pebs_tracer *tracer,
+		      unsigned int counter, u64 value)
 {
 	if (!tracer)
 		return -EINVAL;
 
-	*(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
+	if (ds_cfg.nr_counter_reset < counter)
+		return -EINVAL;
+
+	*(u64 *)(tracer->ds.context->ds +
+		 (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) +
+		 (counter * PEBS_RESET_FIELD_SIZE)) = value;
 
 	return 0;
 }
@@ -894,73 +1230,117 @@
 	.ctl[dsf_bts]		= (1 << 2) | (1 << 3),
 	.ctl[dsf_bts_kernel]	= (1 << 5),
 	.ctl[dsf_bts_user]	= (1 << 6),
-
-	.sizeof_field		= sizeof(long),
-	.sizeof_rec[ds_bts]	= sizeof(long) * 3,
-#ifdef __i386__
-	.sizeof_rec[ds_pebs]	= sizeof(long) * 10,
-#else
-	.sizeof_rec[ds_pebs]	= sizeof(long) * 18,
-#endif
+	.nr_counter_reset	= 1,
 };
 static const struct ds_configuration ds_cfg_pentium_m = {
 	.name = "Pentium M",
 	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
-
-	.sizeof_field		= sizeof(long),
-	.sizeof_rec[ds_bts]	= sizeof(long) * 3,
-#ifdef __i386__
-	.sizeof_rec[ds_pebs]	= sizeof(long) * 10,
-#else
-	.sizeof_rec[ds_pebs]	= sizeof(long) * 18,
-#endif
+	.nr_counter_reset	= 1,
 };
 static const struct ds_configuration ds_cfg_core2_atom = {
 	.name = "Core 2/Atom",
 	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
 	.ctl[dsf_bts_kernel]	= (1 << 9),
 	.ctl[dsf_bts_user]	= (1 << 10),
-
-	.sizeof_field		= 8,
-	.sizeof_rec[ds_bts]	= 8 * 3,
-	.sizeof_rec[ds_pebs]	= 8 * 18,
+	.nr_counter_reset	= 1,
+};
+static const struct ds_configuration ds_cfg_core_i7 = {
+	.name = "Core i7",
+	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
+	.ctl[dsf_bts_kernel]	= (1 << 9),
+	.ctl[dsf_bts_user]	= (1 << 10),
+	.nr_counter_reset	= 4,
 };
 
 static void
-ds_configure(const struct ds_configuration *cfg)
+ds_configure(const struct ds_configuration *cfg,
+	     struct cpuinfo_x86 *cpu)
 {
+	unsigned long nr_pebs_fields = 0;
+
+	printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
+
+#ifdef __i386__
+	nr_pebs_fields = 10;
+#else
+	nr_pebs_fields = 18;
+#endif
+
+	/*
+	 * Starting with version 2, architectural performance
+	 * monitoring supports a format specifier.
+	 */
+	if ((cpuid_eax(0xa) & 0xff) > 1) {
+		unsigned long perf_capabilities, format;
+
+		rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
+
+		format = (perf_capabilities >> 8) & 0xf;
+
+		switch (format) {
+		case 0:
+			nr_pebs_fields = 18;
+			break;
+		case 1:
+			nr_pebs_fields = 22;
+			break;
+		default:
+			printk(KERN_INFO
+			       "[ds] unknown PEBS format: %lu\n", format);
+			nr_pebs_fields = 0;
+			break;
+		}
+	}
+
 	memset(&ds_cfg, 0, sizeof(ds_cfg));
 	ds_cfg = *cfg;
 
-	printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
+	ds_cfg.sizeof_ptr_field =
+		(cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
 
-	if (!cpu_has_bts) {
-		ds_cfg.ctl[dsf_bts] = 0;
+	ds_cfg.sizeof_rec[ds_bts]  = ds_cfg.sizeof_ptr_field * 3;
+	ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
+
+	if (!cpu_has(cpu, X86_FEATURE_BTS)) {
+		ds_cfg.sizeof_rec[ds_bts] = 0;
 		printk(KERN_INFO "[ds] bts not available\n");
 	}
-	if (!cpu_has_pebs)
+	if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
+		ds_cfg.sizeof_rec[ds_pebs] = 0;
 		printk(KERN_INFO "[ds] pebs not available\n");
+	}
 
-	WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
+	printk(KERN_INFO "[ds] sizes: address: %u bit, ",
+	       8 * ds_cfg.sizeof_ptr_field);
+	printk("bts/pebs record: %u/%u bytes\n",
+	       ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
+
+	WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset);
 }
 
 void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 {
+	/* Only configure the first cpu. Others are identical. */
+	if (ds_cfg.name)
+		return;
+
 	switch (c->x86) {
 	case 0x6:
 		switch (c->x86_model) {
 		case 0x9:
 		case 0xd: /* Pentium M */
-			ds_configure(&ds_cfg_pentium_m);
+			ds_configure(&ds_cfg_pentium_m, c);
 			break;
 		case 0xf:
 		case 0x17: /* Core2 */
 		case 0x1c: /* Atom */
-			ds_configure(&ds_cfg_core2_atom);
+			ds_configure(&ds_cfg_core2_atom, c);
 			break;
-		case 0x1a: /* i7 */
+		case 0x1a: /* Core i7 */
+			ds_configure(&ds_cfg_core_i7, c);
+			break;
 		default:
-			/* sorry, don't know about them */
+			/* Sorry, don't know about them. */
 			break;
 		}
 		break;
@@ -969,64 +1349,89 @@
 		case 0x0:
 		case 0x1:
 		case 0x2: /* Netburst */
-			ds_configure(&ds_cfg_netburst);
+			ds_configure(&ds_cfg_netburst, c);
 			break;
 		default:
-			/* sorry, don't know about them */
+			/* Sorry, don't know about them. */
 			break;
 		}
 		break;
 	default:
-		/* sorry, don't know about them */
+		/* Sorry, don't know about them. */
 		break;
 	}
 }
 
+static inline void ds_take_timestamp(struct ds_context *context,
+				     enum bts_qualifier qualifier,
+				     struct task_struct *task)
+{
+	struct bts_tracer *tracer = context->bts_master;
+	struct bts_struct ts;
+
+	/* Prevent compilers from reading the tracer pointer twice. */
+	barrier();
+
+	if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
+		return;
+
+	memset(&ts, 0, sizeof(ts));
+	ts.qualifier		= qualifier;
+	ts.variant.event.clock	= trace_clock_global();
+	ts.variant.event.pid	= task->pid;
+
+	bts_write(tracer, &ts);
+}
+
 /*
  * Change the DS configuration from tracing prev to tracing next.
  */
 void ds_switch_to(struct task_struct *prev, struct task_struct *next)
 {
-	struct ds_context *prev_ctx = prev->thread.ds_ctx;
-	struct ds_context *next_ctx = next->thread.ds_ctx;
+	struct ds_context *prev_ctx	= prev->thread.ds_ctx;
+	struct ds_context *next_ctx	= next->thread.ds_ctx;
+	unsigned long debugctlmsr	= next->thread.debugctlmsr;
+
+	/* Make sure all data is read before we start. */
+	barrier();
 
 	if (prev_ctx) {
 		update_debugctlmsr(0);
 
-		if (prev_ctx->bts_master &&
-		    (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
-			struct bts_struct ts = {
-				.qualifier = bts_task_departs,
-				.variant.timestamp.jiffies = jiffies_64,
-				.variant.timestamp.pid = prev->pid
-			};
-			bts_write(prev_ctx->bts_master, &ts);
-		}
+		ds_take_timestamp(prev_ctx, bts_task_departs, prev);
 	}
 
 	if (next_ctx) {
-		if (next_ctx->bts_master &&
-		    (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
-			struct bts_struct ts = {
-				.qualifier = bts_task_arrives,
-				.variant.timestamp.jiffies = jiffies_64,
-				.variant.timestamp.pid = next->pid
-			};
-			bts_write(next_ctx->bts_master, &ts);
-		}
+		ds_take_timestamp(next_ctx, bts_task_arrives, next);
 
 		wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
 	}
 
-	update_debugctlmsr(next->thread.debugctlmsr);
+	update_debugctlmsr(debugctlmsr);
 }
 
-void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
+static __init int ds_selftest(void)
 {
-	clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR);
-	tsk->thread.ds_ctx = NULL;
-}
+	if (ds_cfg.sizeof_rec[ds_bts]) {
+		int error;
 
-void ds_exit_thread(struct task_struct *tsk)
-{
+		error = ds_selftest_bts();
+		if (error) {
+			WARN(1, "[ds] selftest failed. disabling bts.\n");
+			ds_cfg.sizeof_rec[ds_bts] = 0;
+		}
+	}
+
+	if (ds_cfg.sizeof_rec[ds_pebs]) {
+		int error;
+
+		error = ds_selftest_pebs();
+		if (error) {
+			WARN(1, "[ds] selftest failed. disabling pebs.\n");
+			ds_cfg.sizeof_rec[ds_pebs] = 0;
+		}
+	}
+
+	return 0;
 }
+device_initcall(ds_selftest);
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c
new file mode 100644
index 0000000..6bc7c19
--- /dev/null
+++ b/arch/x86/kernel/ds_selftest.c
@@ -0,0 +1,408 @@
+/*
+ * Debug Store support - selftest
+ *
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2009
+ */
+
+#include "ds_selftest.h"
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/smp.h>
+#include <linux/cpu.h>
+
+#include <asm/ds.h>
+
+
+#define BUFFER_SIZE		521	/* Intentionally chose an odd size. */
+#define SMALL_BUFFER_SIZE	24	/* A single bts entry. */
+
+struct ds_selftest_bts_conf {
+	struct bts_tracer *tracer;
+	int error;
+	int (*suspend)(struct bts_tracer *);
+	int (*resume)(struct bts_tracer *);
+};
+
+static int ds_selftest_bts_consistency(const struct bts_trace *trace)
+{
+	int error = 0;
+
+	if (!trace) {
+		printk(KERN_CONT "failed to access trace...");
+		/* Bail out. Other tests are pointless. */
+		return -1;
+	}
+
+	if (!trace->read) {
+		printk(KERN_CONT "bts read not available...");
+		error = -1;
+	}
+
+	/* Do some sanity checks on the trace configuration. */
+	if (!trace->ds.n) {
+		printk(KERN_CONT "empty bts buffer...");
+		error = -1;
+	}
+	if (!trace->ds.size) {
+		printk(KERN_CONT "bad bts trace setup...");
+		error = -1;
+	}
+	if (trace->ds.end !=
+	    (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) {
+		printk(KERN_CONT "bad bts buffer setup...");
+		error = -1;
+	}
+	/*
+	 * We allow top in [begin; end], since its not clear when the
+	 * overflow adjustment happens: after the increment or before the
+	 * write.
+	 */
+	if ((trace->ds.top < trace->ds.begin) ||
+	    (trace->ds.end < trace->ds.top)) {
+		printk(KERN_CONT "bts top out of bounds...");
+		error = -1;
+	}
+
+	return error;
+}
+
+static int ds_selftest_bts_read(struct bts_tracer *tracer,
+				const struct bts_trace *trace,
+				const void *from, const void *to)
+{
+	const unsigned char *at;
+
+	/*
+	 * Check a few things which do not belong to this test.
+	 * They should be covered by other tests.
+	 */
+	if (!trace)
+		return -1;
+
+	if (!trace->read)
+		return -1;
+
+	if (to < from)
+		return -1;
+
+	if (from < trace->ds.begin)
+		return -1;
+
+	if (trace->ds.end < to)
+		return -1;
+
+	if (!trace->ds.size)
+		return -1;
+
+	/* Now to the test itself. */
+	for (at = from; (void *)at < to; at += trace->ds.size) {
+		struct bts_struct bts;
+		unsigned long index;
+		int error;
+
+		if (((void *)at - trace->ds.begin) % trace->ds.size) {
+			printk(KERN_CONT
+			       "read from non-integer index...");
+			return -1;
+		}
+		index = ((void *)at - trace->ds.begin) / trace->ds.size;
+
+		memset(&bts, 0, sizeof(bts));
+		error = trace->read(tracer, at, &bts);
+		if (error < 0) {
+			printk(KERN_CONT
+			       "error reading bts trace at [%lu] (0x%p)...",
+			       index, at);
+			return error;
+		}
+
+		switch (bts.qualifier) {
+		case BTS_BRANCH:
+			break;
+		default:
+			printk(KERN_CONT
+			       "unexpected bts entry %llu at [%lu] (0x%p)...",
+			       bts.qualifier, index, at);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static void ds_selftest_bts_cpu(void *arg)
+{
+	struct ds_selftest_bts_conf *conf = arg;
+	const struct bts_trace *trace;
+	void *top;
+
+	if (IS_ERR(conf->tracer)) {
+		conf->error = PTR_ERR(conf->tracer);
+		conf->tracer = NULL;
+
+		printk(KERN_CONT
+		       "initialization failed (err: %d)...", conf->error);
+		return;
+	}
+
+	/* We should meanwhile have enough trace. */
+	conf->error = conf->suspend(conf->tracer);
+	if (conf->error < 0)
+		return;
+
+	/* Let's see if we can access the trace. */
+	trace = ds_read_bts(conf->tracer);
+
+	conf->error = ds_selftest_bts_consistency(trace);
+	if (conf->error < 0)
+		return;
+
+	/* If everything went well, we should have a few trace entries. */
+	if (trace->ds.top == trace->ds.begin) {
+		/*
+		 * It is possible but highly unlikely that we got a
+		 * buffer overflow and end up at exactly the same
+		 * position we started from.
+		 * Let's issue a warning, but continue.
+		 */
+		printk(KERN_CONT "no trace/overflow...");
+	}
+
+	/* Let's try to read the trace we collected. */
+	conf->error =
+		ds_selftest_bts_read(conf->tracer, trace,
+				     trace->ds.begin, trace->ds.top);
+	if (conf->error < 0)
+		return;
+
+	/*
+	 * Let's read the trace again.
+	 * Since we suspended tracing, we should get the same result.
+	 */
+	top = trace->ds.top;
+
+	trace = ds_read_bts(conf->tracer);
+	conf->error = ds_selftest_bts_consistency(trace);
+	if (conf->error < 0)
+		return;
+
+	if (top != trace->ds.top) {
+		printk(KERN_CONT "suspend not working...");
+		conf->error = -1;
+		return;
+	}
+
+	/* Let's collect some more trace - see if resume is working. */
+	conf->error = conf->resume(conf->tracer);
+	if (conf->error < 0)
+		return;
+
+	conf->error = conf->suspend(conf->tracer);
+	if (conf->error < 0)
+		return;
+
+	trace = ds_read_bts(conf->tracer);
+
+	conf->error = ds_selftest_bts_consistency(trace);
+	if (conf->error < 0)
+		return;
+
+	if (trace->ds.top == top) {
+		/*
+		 * It is possible but highly unlikely that we got a
+		 * buffer overflow and end up at exactly the same
+		 * position we started from.
+		 * Let's issue a warning and check the full trace.
+		 */
+		printk(KERN_CONT
+		       "no resume progress/overflow...");
+
+		conf->error =
+			ds_selftest_bts_read(conf->tracer, trace,
+					     trace->ds.begin, trace->ds.end);
+	} else if (trace->ds.top < top) {
+		/*
+		 * We had a buffer overflow - the entire buffer should
+		 * contain trace records.
+		 */
+		conf->error =
+			ds_selftest_bts_read(conf->tracer, trace,
+					     trace->ds.begin, trace->ds.end);
+	} else {
+		/*
+		 * It is quite likely that the buffer did not overflow.
+		 * Let's just check the delta trace.
+		 */
+		conf->error =
+			ds_selftest_bts_read(conf->tracer, trace, top,
+					     trace->ds.top);
+	}
+	if (conf->error < 0)
+		return;
+
+	conf->error = 0;
+}
+
+static int ds_suspend_bts_wrap(struct bts_tracer *tracer)
+{
+	ds_suspend_bts(tracer);
+	return 0;
+}
+
+static int ds_resume_bts_wrap(struct bts_tracer *tracer)
+{
+	ds_resume_bts(tracer);
+	return 0;
+}
+
+static void ds_release_bts_noirq_wrap(void *tracer)
+{
+	(void)ds_release_bts_noirq(tracer);
+}
+
+static int ds_selftest_bts_bad_release_noirq(int cpu,
+					     struct bts_tracer *tracer)
+{
+	int error = -EPERM;
+
+	/* Try to release the tracer on the wrong cpu. */
+	get_cpu();
+	if (cpu != smp_processor_id()) {
+		error = ds_release_bts_noirq(tracer);
+		if (error != -EPERM)
+			printk(KERN_CONT "release on wrong cpu...");
+	}
+	put_cpu();
+
+	return error ? 0 : -1;
+}
+
+static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer)
+{
+	struct bts_tracer *tracer;
+	int error;
+
+	/* Try to request cpu tracing while task tracing is active. */
+	tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL,
+				    (size_t)-1, BTS_KERNEL);
+	error = PTR_ERR(tracer);
+	if (!IS_ERR(tracer)) {
+		ds_release_bts(tracer);
+		error = 0;
+	}
+
+	if (error != -EPERM)
+		printk(KERN_CONT "cpu/task tracing overlap...");
+
+	return error ? 0 : -1;
+}
+
+static int ds_selftest_bts_bad_request_task(void *buffer)
+{
+	struct bts_tracer *tracer;
+	int error;
+
+	/* Try to request cpu tracing while task tracing is active. */
+	tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL,
+				    (size_t)-1, BTS_KERNEL);
+	error = PTR_ERR(tracer);
+	if (!IS_ERR(tracer)) {
+		error = 0;
+		ds_release_bts(tracer);
+	}
+
+	if (error != -EPERM)
+		printk(KERN_CONT "task/cpu tracing overlap...");
+
+	return error ? 0 : -1;
+}
+
+int ds_selftest_bts(void)
+{
+	struct ds_selftest_bts_conf conf;
+	unsigned char buffer[BUFFER_SIZE], *small_buffer;
+	unsigned long irq;
+	int cpu;
+
+	printk(KERN_INFO "[ds] bts selftest...");
+	conf.error = 0;
+
+	small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8;
+
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		conf.suspend = ds_suspend_bts_wrap;
+		conf.resume = ds_resume_bts_wrap;
+		conf.tracer =
+			ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
+					   NULL, (size_t)-1, BTS_KERNEL);
+		ds_selftest_bts_cpu(&conf);
+		if (conf.error >= 0)
+			conf.error = ds_selftest_bts_bad_request_task(buffer);
+		ds_release_bts(conf.tracer);
+		if (conf.error < 0)
+			goto out;
+
+		conf.suspend = ds_suspend_bts_noirq;
+		conf.resume = ds_resume_bts_noirq;
+		conf.tracer =
+			ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
+					   NULL, (size_t)-1, BTS_KERNEL);
+		smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1);
+		if (conf.error >= 0) {
+			conf.error =
+				ds_selftest_bts_bad_release_noirq(cpu,
+								  conf.tracer);
+			/* We must not release the tracer twice. */
+			if (conf.error < 0)
+				conf.tracer = NULL;
+		}
+		if (conf.error >= 0)
+			conf.error = ds_selftest_bts_bad_request_task(buffer);
+		smp_call_function_single(cpu, ds_release_bts_noirq_wrap,
+					 conf.tracer, 1);
+		if (conf.error < 0)
+			goto out;
+	}
+
+	conf.suspend = ds_suspend_bts_wrap;
+	conf.resume = ds_resume_bts_wrap;
+	conf.tracer =
+		ds_request_bts_task(current, buffer, BUFFER_SIZE,
+				    NULL, (size_t)-1, BTS_KERNEL);
+	ds_selftest_bts_cpu(&conf);
+	if (conf.error >= 0)
+		conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
+	ds_release_bts(conf.tracer);
+	if (conf.error < 0)
+		goto out;
+
+	conf.suspend = ds_suspend_bts_noirq;
+	conf.resume = ds_resume_bts_noirq;
+	conf.tracer =
+		ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE,
+				   NULL, (size_t)-1, BTS_KERNEL);
+	local_irq_save(irq);
+	ds_selftest_bts_cpu(&conf);
+	if (conf.error >= 0)
+		conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
+	ds_release_bts_noirq(conf.tracer);
+	local_irq_restore(irq);
+	if (conf.error < 0)
+		goto out;
+
+	conf.error = 0;
+ out:
+	put_online_cpus();
+	printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed"));
+
+	return conf.error;
+}
+
+int ds_selftest_pebs(void)
+{
+	return 0;
+}
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h
new file mode 100644
index 0000000..2ba8745
--- /dev/null
+++ b/arch/x86/kernel/ds_selftest.h
@@ -0,0 +1,15 @@
+/*
+ * Debug Store support - selftest
+ *
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2009
+ */
+
+#ifdef CONFIG_X86_DS_SELFTEST
+extern int ds_selftest_bts(void);
+extern int ds_selftest_pebs(void);
+#else
+static inline int ds_selftest_bts(void) { return 0; }
+static inline int ds_selftest_pebs(void) { return 0; }
+#endif
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
index da87590b..81086c2 100644
--- a/arch/x86/kernel/dumpstack.h
+++ b/arch/x86/kernel/dumpstack.h
@@ -29,7 +29,6 @@
 		unsigned long *sp, unsigned long bp, char *log_lvl);
 
 extern unsigned int code_bytes;
-extern int kstack_depth_to_print;
 
 /* The form of the top of the frame on the stack */
 struct stack_frame {
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 0062813..7271fa3 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -617,7 +617,7 @@
  */
 __init void e820_setup_gap(void)
 {
-	unsigned long gapstart, gapsize, round;
+	unsigned long gapstart, gapsize;
 	int found;
 
 	gapstart = 0x10000000;
@@ -635,14 +635,9 @@
 #endif
 
 	/*
-	 * See how much we want to round up: start off with
-	 * rounding to the next 1MB area.
+	 * e820_reserve_resources_late protect stolen RAM already
 	 */
-	round = 0x100000;
-	while ((gapsize >> 4) > round)
-		round += round;
-	/* Fun with two's complement */
-	pci_mem_start = (gapstart + round) & -round;
+	pci_mem_start = gapstart;
 
 	printk(KERN_INFO
 	       "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
@@ -1371,6 +1366,23 @@
 	}
 }
 
+/* How much should we pad RAM ending depending on where it is? */
+static unsigned long ram_alignment(resource_size_t pos)
+{
+	unsigned long mb = pos >> 20;
+
+	/* To 64kB in the first megabyte */
+	if (!mb)
+		return 64*1024;
+
+	/* To 1MB in the first 16MB */
+	if (mb < 16)
+		return 1024*1024;
+
+	/* To 32MB for anything above that */
+	return 32*1024*1024;
+}
+
 void __init e820_reserve_resources_late(void)
 {
 	int i;
@@ -1382,6 +1394,24 @@
 			insert_resource_expand_to_fit(&iomem_resource, res);
 		res++;
 	}
+
+	/*
+	 * Try to bump up RAM regions to reasonable boundaries to
+	 * avoid stolen RAM:
+	 */
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *entry = &e820_saved.map[i];
+		resource_size_t start, end;
+
+		if (entry->type != E820_RAM)
+			continue;
+		start = entry->addr + entry->size;
+		end = round_up(start, ram_alignment(start));
+		if (start == end)
+			continue;
+		reserve_region_with_split(&iomem_resource, start,
+						  end - 1, "RAM buffer");
+	}
 }
 
 char *__init default_machine_specific_memory_setup(void)
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 76b8cd9..ebdb85c 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -97,6 +97,7 @@
 }
 
 #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
+#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
 static u32 __init ati_ixp4x0_rev(int num, int slot, int func)
 {
 	u32 d;
@@ -114,6 +115,7 @@
 	d &= 0xff;
 	return d;
 }
+#endif
 
 static void __init ati_bugs(int num, int slot, int func)
 {
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 38946c6..a4742a3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -147,27 +147,14 @@
 GLOBAL(return_to_handler)
 	subq  $80, %rsp
 
+	/* Save the return values */
 	movq %rax, (%rsp)
-	movq %rcx, 8(%rsp)
-	movq %rdx, 16(%rsp)
-	movq %rsi, 24(%rsp)
-	movq %rdi, 32(%rsp)
-	movq %r8, 40(%rsp)
-	movq %r9, 48(%rsp)
-	movq %r10, 56(%rsp)
-	movq %r11, 64(%rsp)
+	movq %rdx, 8(%rsp)
 
 	call ftrace_return_to_handler
 
 	movq %rax, 72(%rsp)
-	movq 64(%rsp), %r11
-	movq 56(%rsp), %r10
-	movq 48(%rsp), %r9
-	movq 40(%rsp), %r8
-	movq 32(%rsp), %rdi
-	movq 24(%rsp), %rsi
-	movq 16(%rsp), %rdx
-	movq 8(%rsp), %rcx
+	movq 8(%rsp), %rdx
 	movq (%rsp), %rax
 	addq $72, %rsp
 	retq
@@ -1025,6 +1012,11 @@
 apicinterrupt SPURIOUS_APIC_VECTOR \
 	spurious_interrupt smp_spurious_interrupt
 
+#ifdef CONFIG_PERF_COUNTERS
+apicinterrupt LOCAL_PENDING_VECTOR \
+	perf_pending_interrupt smp_perf_pending_interrupt
+#endif
+
 /*
  * Exception entry points.
  */
@@ -1379,6 +1371,11 @@
 paranoidzeroentry_ist debug do_debug DEBUG_STACK
 paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
 paranoiderrorentry stack_segment do_stack_segment
+#ifdef CONFIG_XEN
+zeroentry xen_debug do_debug
+zeroentry xen_int3 do_int3
+errorentry xen_stack_segment do_stack_segment
+#endif
 errorentry general_protection do_general_protection
 errorentry page_fault do_page_fault
 #ifdef CONFIG_X86_MCE
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 3068388..dc5ed4b 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -608,13 +608,6 @@
 ENTRY(initial_code)
 	.long i386_start_kernel
 
-.section .text
-/*
- * Real beginning of normal "text" segment
- */
-ENTRY(stext)
-ENTRY(_stext)
-
 /*
  * BSS section
  */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index c3fe010..38287b5 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -12,6 +12,7 @@
 #include <asm/io_apic.h>
 #include <asm/irq.h>
 #include <asm/idle.h>
+#include <asm/hw_irq.h>
 
 atomic_t irq_err_count;
 
@@ -24,9 +25,9 @@
  */
 void ack_bad_irq(unsigned int irq)
 {
-	printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
+	if (printk_ratelimit())
+		pr_err("unexpected IRQ trap at vector %02x\n", irq);
 
-#ifdef CONFIG_X86_LOCAL_APIC
 	/*
 	 * Currently unexpected vectors happen only on SMP and APIC.
 	 * We _must_ ack these because every local APIC has only N
@@ -36,9 +37,7 @@
 	 * completely.
 	 * But only ack when the APIC is enabled -AK
 	 */
-	if (cpu_has_apic)
-		ack_APIC_irq();
-#endif
+	ack_APIC_irq();
 }
 
 #define irq_stats(x)		(&per_cpu(irq_stat, x))
@@ -63,6 +62,14 @@
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
 	seq_printf(p, "  Spurious interrupts\n");
+	seq_printf(p, "%*s: ", prec, "CNT");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
+	seq_printf(p, "  Performance counter interrupts\n");
+	seq_printf(p, "%*s: ", prec, "PND");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
+	seq_printf(p, "  Performance pending work\n");
 #endif
 	if (generic_interrupt_extension) {
 		seq_printf(p, "%*s: ", prec, "PLT");
@@ -166,6 +173,8 @@
 #ifdef CONFIG_X86_LOCAL_APIC
 	sum += irq_stats(cpu)->apic_timer_irqs;
 	sum += irq_stats(cpu)->irq_spurious_count;
+	sum += irq_stats(cpu)->apic_perf_irqs;
+	sum += irq_stats(cpu)->apic_pending_irqs;
 #endif
 	if (generic_interrupt_extension)
 		sum += irq_stats(cpu)->generic_irqs;
@@ -178,7 +187,7 @@
 	sum += irq_stats(cpu)->irq_thermal_count;
 # ifdef CONFIG_X86_64
 	sum += irq_stats(cpu)->irq_threshold_count;
-#endif
+# endif
 #endif
 	return sum;
 }
@@ -213,14 +222,11 @@
 	irq = __get_cpu_var(vector_irq)[vector];
 
 	if (!handle_irq(irq, regs)) {
-#ifdef CONFIG_X86_64
-		if (!disable_apic)
-			ack_APIC_irq();
-#endif
+		ack_APIC_irq();
 
 		if (printk_ratelimit())
-			printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n",
-			       __func__, smp_processor_id(), vector, irq);
+			pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n",
+				__func__, smp_processor_id(), vector, irq);
 	}
 
 	irq_exit();
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit.c
similarity index 67%
rename from arch/x86/kernel/irqinit_32.c
rename to arch/x86/kernel/irqinit.c
index 368b0a8..267c662 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit.c
@@ -1,20 +1,25 @@
+#include <linux/linkage.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
+#include <linux/timex.h>
 #include <linux/slab.h>
 #include <linux/random.h>
+#include <linux/kprobes.h>
 #include <linux/init.h>
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
 #include <linux/bitops.h>
+#include <linux/acpi.h>
 #include <linux/io.h>
 #include <linux/delay.h>
 
 #include <asm/atomic.h>
 #include <asm/system.h>
 #include <asm/timer.h>
+#include <asm/hw_irq.h>
 #include <asm/pgtable.h>
 #include <asm/desc.h>
 #include <asm/apic.h>
@@ -22,8 +27,24 @@
 #include <asm/i8259.h>
 #include <asm/traps.h>
 
+/*
+ * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
+ * (these are usually mapped to vectors 0x30-0x3f)
+ */
 
 /*
+ * The IO-APIC gives us many more interrupt sources. Most of these
+ * are unused but an SMP system is supposed to have enough memory ...
+ * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
+ * across the spectrum, so we really want to be prepared to get all
+ * of these. Plus, more powerful systems might have more than 64
+ * IO-APIC registers.
+ *
+ * (these are usually mapped into the 0x30-0xff vector range)
+ */
+
+#ifdef CONFIG_X86_32
+/*
  * Note that on a 486, we don't want to do a SIGFPE on an irq13
  * as the irq is unreliable, and exception 16 works correctly
  * (ie as explained in the intel literature). On a 386, you
@@ -52,30 +73,7 @@
 	.handler = math_error_irq,
 	.name = "fpu",
 };
-
-void __init init_ISA_irqs(void)
-{
-	int i;
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	init_bsp_APIC();
 #endif
-	init_8259A(0);
-
-	/*
-	 * 16 old-style INTA-cycle interrupts:
-	 */
-	for (i = 0; i < NR_IRQS_LEGACY; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
-
-		desc->status = IRQ_DISABLED;
-		desc->action = NULL;
-		desc->depth = 1;
-
-		set_irq_chip_and_handler_name(i, &i8259A_chip,
-					      handle_level_irq, "XT");
-	}
-}
 
 /*
  * IRQ2 is cascade interrupt to second interrupt controller
@@ -118,29 +116,37 @@
 	return 0;
 }
 
-/* Overridden in paravirt.c */
-void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
-
-void __init native_init_IRQ(void)
+static void __init init_ISA_irqs(void)
 {
 	int i;
 
-	/* Execute any quirks before the call gates are initialised: */
-	x86_quirk_pre_intr_init();
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+	init_bsp_APIC();
+#endif
+	init_8259A(0);
 
 	/*
-	 * Cover the whole vector space, no vector can escape
-	 * us. (some of these will be overridden and become
-	 * 'special' SMP interrupts)
+	 * 16 old-style INTA-cycle interrupts:
 	 */
-	for (i =  FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
-		/* SYSCALL_VECTOR was reserved in trap_init. */
-		if (i != SYSCALL_VECTOR)
-			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
+	for (i = 0; i < NR_IRQS_LEGACY; i++) {
+		struct irq_desc *desc = irq_to_desc(i);
+
+		desc->status = IRQ_DISABLED;
+		desc->action = NULL;
+		desc->depth = 1;
+
+		set_irq_chip_and_handler_name(i, &i8259A_chip,
+					      handle_level_irq, "XT");
 	}
+}
 
+/* Overridden in paravirt.c */
+void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
 
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
+static void __init smp_intr_init(void)
+{
+#ifdef CONFIG_SMP
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
 	/*
 	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
 	 * IPI, driven by wakeup.
@@ -160,16 +166,32 @@
 	/* IPI for generic function call */
 	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 
-	/* IPI for single call function */
+	/* IPI for generic single function call */
 	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
-				 call_function_single_interrupt);
+			call_function_single_interrupt);
 
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
 #endif
+#endif /* CONFIG_SMP */
+}
 
-#ifdef CONFIG_X86_LOCAL_APIC
+static void __init apic_intr_init(void)
+{
+	smp_intr_init();
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
+	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+#endif
+#ifdef CONFIG_X86_THRESHOLD
+	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
+#endif
+#if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC)
+	alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt);
+#endif
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
 	/* self generated IPI for local APIC timer */
 	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
 
@@ -179,16 +201,59 @@
 	/* IPI vectors for APIC spurious and error interrupts */
 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-#endif
 
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
-	/* thermal monitor LVT interrupt */
-	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+	/* Performance monitoring interrupts: */
+# ifdef CONFIG_PERF_COUNTERS
+	alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
+# endif
+
 #endif
+}
+
+/**
+ * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
+ *
+ * Description:
+ *	Perform any necessary interrupt initialisation prior to setting up
+ *	the "ordinary" interrupt call gates.  For legacy reasons, the ISA
+ *	interrupts should be initialised here if the machine emulates a PC
+ *	in any way.
+ **/
+static void __init x86_quirk_pre_intr_init(void)
+{
+#ifdef CONFIG_X86_32
+	if (x86_quirks->arch_pre_intr_init) {
+		if (x86_quirks->arch_pre_intr_init())
+			return;
+	}
+#endif
+	init_ISA_irqs();
+}
+
+void __init native_init_IRQ(void)
+{
+	int i;
+
+	/* Execute any quirks before the call gates are initialised: */
+	x86_quirk_pre_intr_init();
+
+	apic_intr_init();
+
+	/*
+	 * Cover the whole vector space, no vector can escape
+	 * us. (some of these will be overridden and become
+	 * 'special' SMP interrupts)
+	 */
+	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+		/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
+		if (!test_bit(i, used_vectors))
+			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
+	}
 
 	if (!acpi_ioapic)
 		setup_irq(2, &irq2);
 
+#ifdef CONFIG_X86_32
 	/*
 	 * Call quirks after call gates are initialised (usually add in
 	 * the architecture specific gates):
@@ -203,4 +268,5 @@
 		setup_irq(FPU_IRQ, &fpu_irq);
 
 	irq_ctx_init(smp_processor_id());
+#endif
 }
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
deleted file mode 100644
index 8cd1053..0000000
--- a/arch/x86/kernel/irqinit_64.c
+++ /dev/null
@@ -1,177 +0,0 @@
-#include <linux/linkage.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/init.h>
-#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/bitops.h>
-#include <linux/acpi.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-
-#include <asm/atomic.h>
-#include <asm/system.h>
-#include <asm/hw_irq.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-#include <asm/apic.h>
-#include <asm/i8259.h>
-
-/*
- * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
- * (these are usually mapped to vectors 0x30-0x3f)
- */
-
-/*
- * The IO-APIC gives us many more interrupt sources. Most of these
- * are unused but an SMP system is supposed to have enough memory ...
- * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
- * across the spectrum, so we really want to be prepared to get all
- * of these. Plus, more powerful systems might have more than 64
- * IO-APIC registers.
- *
- * (these are usually mapped into the 0x30-0xff vector range)
- */
-
-/*
- * IRQ2 is cascade interrupt to second interrupt controller
- */
-
-static struct irqaction irq2 = {
-	.handler = no_action,
-	.name = "cascade",
-};
-DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
-	[0 ... IRQ0_VECTOR - 1] = -1,
-	[IRQ0_VECTOR] = 0,
-	[IRQ1_VECTOR] = 1,
-	[IRQ2_VECTOR] = 2,
-	[IRQ3_VECTOR] = 3,
-	[IRQ4_VECTOR] = 4,
-	[IRQ5_VECTOR] = 5,
-	[IRQ6_VECTOR] = 6,
-	[IRQ7_VECTOR] = 7,
-	[IRQ8_VECTOR] = 8,
-	[IRQ9_VECTOR] = 9,
-	[IRQ10_VECTOR] = 10,
-	[IRQ11_VECTOR] = 11,
-	[IRQ12_VECTOR] = 12,
-	[IRQ13_VECTOR] = 13,
-	[IRQ14_VECTOR] = 14,
-	[IRQ15_VECTOR] = 15,
-	[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
-};
-
-int vector_used_by_percpu_irq(unsigned int vector)
-{
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		if (per_cpu(vector_irq, cpu)[vector] != -1)
-			return 1;
-	}
-
-	return 0;
-}
-
-static void __init init_ISA_irqs(void)
-{
-	int i;
-
-	init_bsp_APIC();
-	init_8259A(0);
-
-	for (i = 0; i < NR_IRQS_LEGACY; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
-
-		desc->status = IRQ_DISABLED;
-		desc->action = NULL;
-		desc->depth = 1;
-
-		/*
-		 * 16 old-style INTA-cycle interrupts:
-		 */
-		set_irq_chip_and_handler_name(i, &i8259A_chip,
-						      handle_level_irq, "XT");
-	}
-}
-
-void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
-
-static void __init smp_intr_init(void)
-{
-#ifdef CONFIG_SMP
-	/*
-	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
-	 * IPI, driven by wakeup.
-	 */
-	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-
-	/* IPIs for invalidation */
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
-
-	/* IPI for generic function call */
-	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-
-	/* IPI for generic single function call */
-	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
-			call_function_single_interrupt);
-
-	/* Low priority IPI to cleanup after moving an irq */
-	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
-	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
-#endif
-}
-
-static void __init apic_intr_init(void)
-{
-	smp_intr_init();
-
-	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
-	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
-
-	/* self generated IPI for local APIC timer */
-	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-
-	/* generic IPI for platform specific use */
-	alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
-
-	/* IPI vectors for APIC spurious and error interrupts */
-	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
-	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-}
-
-void __init native_init_IRQ(void)
-{
-	int i;
-
-	init_ISA_irqs();
-	/*
-	 * Cover the whole vector space, no vector can escape
-	 * us. (some of these will be overridden and become
-	 * 'special' SMP interrupts)
-	 */
-	for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
-		int vector = FIRST_EXTERNAL_VECTOR + i;
-		if (vector != IA32_SYSCALL_VECTOR)
-			set_intr_gate(vector, interrupt[i]);
-	}
-
-	apic_intr_init();
-
-	if (!acpi_ioapic)
-		setup_irq(2, &irq2);
-}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index b1f4dff..8d82a77 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -142,7 +142,7 @@
 	gdb_regs32[GDB_PS]	= *(unsigned long *)(p->thread.sp + 8);
 	gdb_regs32[GDB_CS]	= __KERNEL_CS;
 	gdb_regs32[GDB_SS]	= __KERNEL_DS;
-	gdb_regs[GDB_PC]	= p->thread.ip;
+	gdb_regs[GDB_PC]	= 0;
 	gdb_regs[GDB_R8]	= 0;
 	gdb_regs[GDB_R9]	= 0;
 	gdb_regs[GDB_R10]	= 0;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 33019dd..a78ecad 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -27,6 +27,7 @@
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/hardirq.h>
+#include <asm/timer.h>
 
 #define MMU_QUEUE_SIZE 1024
 
@@ -195,7 +196,7 @@
 	struct kvm_para_state *state = kvm_para_state();
 
 	mmu_queue_flush(state);
-	paravirt_leave_lazy(paravirt_get_lazy_mode());
+	paravirt_leave_lazy_mmu();
 	state->mode = paravirt_get_lazy_mode();
 }
 
@@ -230,6 +231,9 @@
 		pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
 		pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
 	}
+#ifdef CONFIG_X86_IO_APIC
+	no_timer_check = 1;
+#endif
 }
 
 void __init kvm_guest_init(void)
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 453b579..366baa1 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -13,25 +13,13 @@
  *  Licensed under the terms of the GNU General Public
  *  License version 2. See file COPYING for details.
  */
-#include <linux/platform_device.h>
-#include <linux/capability.h>
-#include <linux/miscdevice.h>
 #include <linux/firmware.h>
-#include <linux/spinlock.h>
-#include <linux/cpumask.h>
 #include <linux/pci_ids.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/cpu.h>
 #include <linux/pci.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
 
 #include <asm/microcode.h>
 #include <asm/processor.h>
@@ -79,9 +67,6 @@
 #define UCODE_CONTAINER_SECTION_HDR	8
 #define UCODE_CONTAINER_HEADER_SIZE	12
 
-/* serialize access to the physical write */
-static DEFINE_SPINLOCK(microcode_update_lock);
-
 static struct equiv_cpu_entry *equiv_cpu_table;
 
 static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
@@ -144,9 +129,8 @@
 	return 1;
 }
 
-static void apply_microcode_amd(int cpu)
+static int apply_microcode_amd(int cpu)
 {
-	unsigned long flags;
 	u32 rev, dummy;
 	int cpu_num = raw_smp_processor_id();
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
@@ -156,25 +140,25 @@
 	BUG_ON(cpu_num != cpu);
 
 	if (mc_amd == NULL)
-		return;
+		return 0;
 
-	spin_lock_irqsave(&microcode_update_lock, flags);
 	wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
 	/* get patch id after patching */
 	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
-	spin_unlock_irqrestore(&microcode_update_lock, flags);
 
 	/* check current patch id and patch's id for match */
 	if (rev != mc_amd->hdr.patch_id) {
 		printk(KERN_ERR "microcode: CPU%d: update failed "
 		       "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id);
-		return;
+		return -1;
 	}
 
 	printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n",
 	       cpu, rev);
 
 	uci->cpu_sig.rev = rev;
+
+	return 0;
 }
 
 static int get_ucode_data(void *to, const u8 *from, size_t n)
@@ -257,13 +241,12 @@
 
 static void free_equiv_cpu_table(void)
 {
-	if (equiv_cpu_table) {
-		vfree(equiv_cpu_table);
-		equiv_cpu_table = NULL;
-	}
+	vfree(equiv_cpu_table);
+	equiv_cpu_table = NULL;
 }
 
-static int generic_load_microcode(int cpu, const u8 *data, size_t size)
+static enum ucode_state
+generic_load_microcode(int cpu, const u8 *data, size_t size)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	const u8 *ucode_ptr = data;
@@ -272,12 +255,13 @@
 	int new_rev = uci->cpu_sig.rev;
 	unsigned int leftover;
 	unsigned long offset;
+	enum ucode_state state = UCODE_OK;
 
 	offset = install_equiv_cpu_table(ucode_ptr);
 	if (!offset) {
 		printk(KERN_ERR "microcode: failed to create "
 		       "equivalent cpu table\n");
-		return -EINVAL;
+		return UCODE_ERROR;
 	}
 
 	ucode_ptr += offset;
@@ -293,8 +277,7 @@
 
 		mc_header = (struct microcode_header_amd *)mc;
 		if (get_matching_microcode(cpu, mc, new_rev)) {
-			if (new_mc)
-				vfree(new_mc);
+			vfree(new_mc);
 			new_rev = mc_header->patch_id;
 			new_mc  = mc;
 		} else
@@ -306,34 +289,32 @@
 
 	if (new_mc) {
 		if (!leftover) {
-			if (uci->mc)
-				vfree(uci->mc);
+			vfree(uci->mc);
 			uci->mc = new_mc;
 			pr_debug("microcode: CPU%d found a matching microcode "
 				 "update with version 0x%x (current=0x%x)\n",
 				 cpu, new_rev, uci->cpu_sig.rev);
-		} else
+		} else {
 			vfree(new_mc);
-	}
+			state = UCODE_ERROR;
+		}
+	} else
+		state = UCODE_NFOUND;
 
 	free_equiv_cpu_table();
 
-	return (int)leftover;
+	return state;
 }
 
-static int request_microcode_fw(int cpu, struct device *device)
+static enum ucode_state request_microcode_fw(int cpu, struct device *device)
 {
 	const char *fw_name = "amd-ucode/microcode_amd.bin";
 	const struct firmware *firmware;
-	int ret;
+	enum ucode_state ret;
 
-	/* We should bind the task to the CPU */
-	BUG_ON(cpu != raw_smp_processor_id());
-
-	ret = request_firmware(&firmware, fw_name, device);
-	if (ret) {
+	if (request_firmware(&firmware, fw_name, device)) {
 		printk(KERN_ERR "microcode: failed to load file %s\n", fw_name);
-		return ret;
+		return UCODE_NFOUND;
 	}
 
 	ret = generic_load_microcode(cpu, firmware->data, firmware->size);
@@ -343,11 +324,12 @@
 	return ret;
 }
 
-static int request_microcode_user(int cpu, const void __user *buf, size_t size)
+static enum ucode_state
+request_microcode_user(int cpu, const void __user *buf, size_t size)
 {
 	printk(KERN_INFO "microcode: AMD microcode update via "
 	       "/dev/cpu/microcode not supported\n");
-	return -1;
+	return UCODE_ERROR;
 }
 
 static void microcode_fini_cpu_amd(int cpu)
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 98c470c..9c44615 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -71,27 +71,18 @@
  *		Thanks to Stuart Swales for pointing out this bug.
  */
 #include <linux/platform_device.h>
-#include <linux/capability.h>
 #include <linux/miscdevice.h>
-#include <linux/firmware.h>
+#include <linux/capability.h>
 #include <linux/smp_lock.h>
-#include <linux/spinlock.h>
-#include <linux/cpumask.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 
 #include <asm/microcode.h>
 #include <asm/processor.h>
-#include <asm/msr.h>
 
 MODULE_DESCRIPTION("Microcode Update Driver");
 MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -101,36 +92,110 @@
 
 static struct microcode_ops	*microcode_ops;
 
-/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
+/*
+ * Synchronization.
+ *
+ * All non cpu-hotplug-callback call sites use:
+ *
+ * - microcode_mutex to synchronize with each other;
+ * - get/put_online_cpus() to synchronize with
+ *   the cpu-hotplug-callback call sites.
+ *
+ * We guarantee that only a single cpu is being
+ * updated at any particular moment of time.
+ */
 static DEFINE_MUTEX(microcode_mutex);
 
 struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
 EXPORT_SYMBOL_GPL(ucode_cpu_info);
 
+/*
+ * Operations that are run on a target cpu:
+ */
+
+struct cpu_info_ctx {
+	struct cpu_signature	*cpu_sig;
+	int			err;
+};
+
+static void collect_cpu_info_local(void *arg)
+{
+	struct cpu_info_ctx *ctx = arg;
+
+	ctx->err = microcode_ops->collect_cpu_info(smp_processor_id(),
+						   ctx->cpu_sig);
+}
+
+static int collect_cpu_info_on_target(int cpu, struct cpu_signature *cpu_sig)
+{
+	struct cpu_info_ctx ctx = { .cpu_sig = cpu_sig, .err = 0 };
+	int ret;
+
+	ret = smp_call_function_single(cpu, collect_cpu_info_local, &ctx, 1);
+	if (!ret)
+		ret = ctx.err;
+
+	return ret;
+}
+
+static int collect_cpu_info(int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	int ret;
+
+	memset(uci, 0, sizeof(*uci));
+
+	ret = collect_cpu_info_on_target(cpu, &uci->cpu_sig);
+	if (!ret)
+		uci->valid = 1;
+
+	return ret;
+}
+
+struct apply_microcode_ctx {
+	int err;
+};
+
+static void apply_microcode_local(void *arg)
+{
+	struct apply_microcode_ctx *ctx = arg;
+
+	ctx->err = microcode_ops->apply_microcode(smp_processor_id());
+}
+
+static int apply_microcode_on_target(int cpu)
+{
+	struct apply_microcode_ctx ctx = { .err = 0 };
+	int ret;
+
+	ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1);
+	if (!ret)
+		ret = ctx.err;
+
+	return ret;
+}
+
 #ifdef CONFIG_MICROCODE_OLD_INTERFACE
 static int do_microcode_update(const void __user *buf, size_t size)
 {
-	cpumask_t old;
 	int error = 0;
 	int cpu;
 
-	old = current->cpus_allowed;
-
 	for_each_online_cpu(cpu) {
 		struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+		enum ucode_state ustate;
 
 		if (!uci->valid)
 			continue;
 
-		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-		error = microcode_ops->request_microcode_user(cpu, buf, size);
-		if (error < 0)
-			goto out;
-		if (!error)
-			microcode_ops->apply_microcode(cpu);
+		ustate = microcode_ops->request_microcode_user(cpu, buf, size);
+		if (ustate == UCODE_ERROR) {
+			error = -1;
+			break;
+		} else if (ustate == UCODE_OK)
+			apply_microcode_on_target(cpu);
 	}
-out:
-	set_cpus_allowed_ptr(current, &old);
+
 	return error;
 }
 
@@ -143,19 +208,17 @@
 static ssize_t microcode_write(struct file *file, const char __user *buf,
 			       size_t len, loff_t *ppos)
 {
-	ssize_t ret;
+	ssize_t ret = -EINVAL;
 
 	if ((len >> PAGE_SHIFT) > num_physpages) {
-		printk(KERN_ERR "microcode: too much data (max %ld pages)\n",
-		       num_physpages);
-		return -EINVAL;
+		pr_err("microcode: too much data (max %ld pages)\n", num_physpages);
+		return ret;
 	}
 
 	get_online_cpus();
 	mutex_lock(&microcode_mutex);
 
-	ret = do_microcode_update(buf, len);
-	if (!ret)
+	if (do_microcode_update(buf, len) == 0)
 		ret = (ssize_t)len;
 
 	mutex_unlock(&microcode_mutex);
@@ -165,15 +228,15 @@
 }
 
 static const struct file_operations microcode_fops = {
-	.owner		= THIS_MODULE,
-	.write		= microcode_write,
-	.open		= microcode_open,
+	.owner			= THIS_MODULE,
+	.write			= microcode_write,
+	.open			= microcode_open,
 };
 
 static struct miscdevice microcode_dev = {
-	.minor		= MICROCODE_MINOR,
-	.name		= "microcode",
-	.fops		= &microcode_fops,
+	.minor			= MICROCODE_MINOR,
+	.name			= "microcode",
+	.fops			= &microcode_fops,
 };
 
 static int __init microcode_dev_init(void)
@@ -182,9 +245,7 @@
 
 	error = misc_register(&microcode_dev);
 	if (error) {
-		printk(KERN_ERR
-			"microcode: can't misc_register on minor=%d\n",
-			MICROCODE_MINOR);
+		pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR);
 		return error;
 	}
 
@@ -205,42 +266,51 @@
 /* fake device for request_firmware */
 static struct platform_device	*microcode_pdev;
 
-static long reload_for_cpu(void *unused)
+static int reload_for_cpu(int cpu)
 {
-	struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id();
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	int err = 0;
 
 	mutex_lock(&microcode_mutex);
 	if (uci->valid) {
-		err = microcode_ops->request_microcode_fw(smp_processor_id(),
-							  &microcode_pdev->dev);
-		if (!err)
-			microcode_ops->apply_microcode(smp_processor_id());
+		enum ucode_state ustate;
+
+		ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
+		if (ustate == UCODE_OK)
+			apply_microcode_on_target(cpu);
+		else
+			if (ustate == UCODE_ERROR)
+				err = -EINVAL;
 	}
 	mutex_unlock(&microcode_mutex);
+
 	return err;
 }
 
 static ssize_t reload_store(struct sys_device *dev,
 			    struct sysdev_attribute *attr,
-			    const char *buf, size_t sz)
+			    const char *buf, size_t size)
 {
-	char *end;
-	unsigned long val = simple_strtoul(buf, &end, 0);
-	int err = 0;
+	unsigned long val;
 	int cpu = dev->id;
+	int ret = 0;
+	char *end;
 
+	val = simple_strtoul(buf, &end, 0);
 	if (end == buf)
 		return -EINVAL;
+
 	if (val == 1) {
 		get_online_cpus();
 		if (cpu_online(cpu))
-			err = work_on_cpu(cpu, reload_for_cpu, NULL);
+			ret = reload_for_cpu(cpu);
 		put_online_cpus();
 	}
-	if (err)
-		return err;
-	return sz;
+
+	if (!ret)
+		ret = size;
+
+	return ret;
 }
 
 static ssize_t version_show(struct sys_device *dev,
@@ -271,11 +341,11 @@
 };
 
 static struct attribute_group mc_attr_group = {
-	.attrs		= mc_default_attrs,
-	.name		= "microcode",
+	.attrs			= mc_default_attrs,
+	.name			= "microcode",
 };
 
-static void __microcode_fini_cpu(int cpu)
+static void microcode_fini_cpu(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
@@ -283,103 +353,68 @@
 	uci->valid = 0;
 }
 
-static void microcode_fini_cpu(int cpu)
-{
-	mutex_lock(&microcode_mutex);
-	__microcode_fini_cpu(cpu);
-	mutex_unlock(&microcode_mutex);
-}
-
-static void collect_cpu_info(int cpu)
+static enum ucode_state microcode_resume_cpu(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
-	memset(uci, 0, sizeof(*uci));
-	if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig))
-		uci->valid = 1;
-}
-
-static int microcode_resume_cpu(int cpu)
-{
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	struct cpu_signature nsig;
-
-	pr_debug("microcode: CPU%d resumed\n", cpu);
-
 	if (!uci->mc)
-		return 1;
+		return UCODE_NFOUND;
 
-	/*
-	 * Let's verify that the 'cached' ucode does belong
-	 * to this cpu (a bit of paranoia):
-	 */
-	if (microcode_ops->collect_cpu_info(cpu, &nsig)) {
-		__microcode_fini_cpu(cpu);
-		printk(KERN_ERR "failed to collect_cpu_info for resuming cpu #%d\n",
-				cpu);
-		return -1;
-	}
+	pr_debug("microcode: CPU%d updated upon resume\n", cpu);
+	apply_microcode_on_target(cpu);
 
-	if ((nsig.sig != uci->cpu_sig.sig) || (nsig.pf != uci->cpu_sig.pf)) {
-		__microcode_fini_cpu(cpu);
-		printk(KERN_ERR "cached ucode doesn't match the resuming cpu #%d\n",
-				cpu);
-		/* Should we look for a new ucode here? */
-		return 1;
-	}
-
-	return 0;
+	return UCODE_OK;
 }
 
-static long microcode_update_cpu(void *unused)
+static enum ucode_state microcode_init_cpu(int cpu)
 {
-	struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id();
-	int err = 0;
+	enum ucode_state ustate;
 
-	/*
-	 * Check if the system resume is in progress (uci->valid != NULL),
-	 * otherwise just request a firmware:
-	 */
-	if (uci->valid) {
-		err = microcode_resume_cpu(smp_processor_id());
-	} else {
-		collect_cpu_info(smp_processor_id());
-		if (uci->valid && system_state == SYSTEM_RUNNING)
-			err = microcode_ops->request_microcode_fw(
-					smp_processor_id(),
-					&microcode_pdev->dev);
+	if (collect_cpu_info(cpu))
+		return UCODE_ERROR;
+
+	/* --dimm. Trigger a delayed update? */
+	if (system_state != SYSTEM_RUNNING)
+		return UCODE_NFOUND;
+
+	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
+
+	if (ustate == UCODE_OK) {
+		pr_debug("microcode: CPU%d updated upon init\n", cpu);
+		apply_microcode_on_target(cpu);
 	}
-	if (!err)
-		microcode_ops->apply_microcode(smp_processor_id());
-	return err;
+
+	return ustate;
 }
 
-static int microcode_init_cpu(int cpu)
+static enum ucode_state microcode_update_cpu(int cpu)
 {
-	int err;
-	mutex_lock(&microcode_mutex);
-	err = work_on_cpu(cpu, microcode_update_cpu, NULL);
-	mutex_unlock(&microcode_mutex);
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	enum ucode_state ustate;
 
-	return err;
+	if (uci->valid)
+		ustate = microcode_resume_cpu(cpu);
+	else
+		ustate = microcode_init_cpu(cpu);
+
+	return ustate;
 }
 
 static int mc_sysdev_add(struct sys_device *sys_dev)
 {
 	int err, cpu = sys_dev->id;
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
 	if (!cpu_online(cpu))
 		return 0;
 
 	pr_debug("microcode: CPU%d added\n", cpu);
-	memset(uci, 0, sizeof(*uci));
 
 	err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
 	if (err)
 		return err;
 
-	err = microcode_init_cpu(cpu);
+	if (microcode_init_cpu(cpu) == UCODE_ERROR)
+		err = -EINVAL;
 
 	return err;
 }
@@ -400,19 +435,30 @@
 static int mc_sysdev_resume(struct sys_device *dev)
 {
 	int cpu = dev->id;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
 	if (!cpu_online(cpu))
 		return 0;
 
-	/* only CPU 0 will apply ucode here */
-	microcode_update_cpu(NULL);
+	/*
+	 * All non-bootup cpus are still disabled,
+	 * so only CPU 0 will apply ucode here.
+	 *
+	 * Moreover, there can be no concurrent
+	 * updates from any other places at this point.
+	 */
+	WARN_ON(cpu != 0);
+
+	if (uci->valid && uci->mc)
+		microcode_ops->apply_microcode(cpu);
+
 	return 0;
 }
 
 static struct sysdev_driver mc_sysdev_driver = {
-	.add		= mc_sysdev_add,
-	.remove		= mc_sysdev_remove,
-	.resume		= mc_sysdev_resume,
+	.add			= mc_sysdev_add,
+	.remove			= mc_sysdev_remove,
+	.resume			= mc_sysdev_resume,
 };
 
 static __cpuinit int
@@ -425,15 +471,12 @@
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
-		if (microcode_init_cpu(cpu))
-			printk(KERN_ERR "microcode: failed to init CPU%d\n",
-			       cpu);
+		microcode_update_cpu(cpu);
 	case CPU_DOWN_FAILED:
 	case CPU_DOWN_FAILED_FROZEN:
 		pr_debug("microcode: CPU%d added\n", cpu);
 		if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
-			printk(KERN_ERR "microcode: Failed to create the sysfs "
-				"group for CPU%d\n", cpu);
+			pr_err("microcode: Failed to create group for CPU%d\n", cpu);
 		break;
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
@@ -465,13 +508,10 @@
 		microcode_ops = init_amd_microcode();
 
 	if (!microcode_ops) {
-		printk(KERN_ERR "microcode: no support for this CPU vendor\n");
+		pr_err("microcode: no support for this CPU vendor\n");
 		return -ENODEV;
 	}
 
-	error = microcode_dev_init();
-	if (error)
-		return error;
 	microcode_pdev = platform_device_register_simple("microcode", -1,
 							 NULL, 0);
 	if (IS_ERR(microcode_pdev)) {
@@ -480,23 +520,31 @@
 	}
 
 	get_online_cpus();
+	mutex_lock(&microcode_mutex);
+
 	error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver);
+
+	mutex_unlock(&microcode_mutex);
 	put_online_cpus();
+
 	if (error) {
-		microcode_dev_exit();
 		platform_device_unregister(microcode_pdev);
 		return error;
 	}
 
+	error = microcode_dev_init();
+	if (error)
+		return error;
+
 	register_hotcpu_notifier(&mc_cpu_notifier);
 
-	printk(KERN_INFO
-	       "Microcode Update Driver: v" MICROCODE_VERSION
+	pr_info("Microcode Update Driver: v" MICROCODE_VERSION
 	       " <tigran@aivazian.fsnet.co.uk>,"
 	       " Peter Oruba\n");
 
 	return 0;
 }
+module_init(microcode_init);
 
 static void __exit microcode_exit(void)
 {
@@ -505,16 +553,17 @@
 	unregister_hotcpu_notifier(&mc_cpu_notifier);
 
 	get_online_cpus();
+	mutex_lock(&microcode_mutex);
+
 	sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
+
+	mutex_unlock(&microcode_mutex);
 	put_online_cpus();
 
 	platform_device_unregister(microcode_pdev);
 
 	microcode_ops = NULL;
 
-	printk(KERN_INFO
-	       "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
+	pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
 }
-
-module_init(microcode_init);
 module_exit(microcode_exit);
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 149b9ec..0d334dd 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -70,24 +70,11 @@
  *		Fix sigmatch() macro to handle old CPUs with pf == 0.
  *		Thanks to Stuart Swales for pointing out this bug.
  */
-#include <linux/platform_device.h>
-#include <linux/capability.h>
-#include <linux/miscdevice.h>
 #include <linux/firmware.h>
-#include <linux/smp_lock.h>
-#include <linux/spinlock.h>
-#include <linux/cpumask.h>
 #include <linux/uaccess.h>
-#include <linux/vmalloc.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/cpu.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
+#include <linux/vmalloc.h>
 
 #include <asm/microcode.h>
 #include <asm/processor.h>
@@ -150,13 +137,9 @@
 
 #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
 
-/* serialize access to the physical write to MSR 0x79 */
-static DEFINE_SPINLOCK(microcode_update_lock);
-
 static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu_num);
-	unsigned long flags;
 	unsigned int val[2];
 
 	memset(csig, 0, sizeof(*csig));
@@ -176,18 +159,14 @@
 		csig->pf = 1 << ((val[1] >> 18) & 7);
 	}
 
-	/* serialize access to the physical write to MSR 0x79 */
-	spin_lock_irqsave(&microcode_update_lock, flags);
-
 	wrmsr(MSR_IA32_UCODE_REV, 0, 0);
 	/* see notes above for revision 1.07.  Apparent chip bug */
 	sync_core();
 	/* get the current revision from MSR 0x8B */
 	rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
-	spin_unlock_irqrestore(&microcode_update_lock, flags);
 
-	pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
-			csig->sig, csig->pf, csig->rev);
+	printk(KERN_INFO "microcode: CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
+			cpu_num, csig->sig, csig->pf, csig->rev);
 
 	return 0;
 }
@@ -318,11 +297,10 @@
 	return 0;
 }
 
-static void apply_microcode(int cpu)
+static int apply_microcode(int cpu)
 {
 	struct microcode_intel *mc_intel;
 	struct ucode_cpu_info *uci;
-	unsigned long flags;
 	unsigned int val[2];
 	int cpu_num;
 
@@ -334,10 +312,7 @@
 	BUG_ON(cpu_num != cpu);
 
 	if (mc_intel == NULL)
-		return;
-
-	/* serialize access to the physical write to MSR 0x79 */
-	spin_lock_irqsave(&microcode_update_lock, flags);
+		return 0;
 
 	/* write microcode via MSR 0x79 */
 	wrmsr(MSR_IA32_UCODE_WRITE,
@@ -351,30 +326,32 @@
 	/* get the current revision from MSR 0x8B */
 	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
 
-	spin_unlock_irqrestore(&microcode_update_lock, flags);
 	if (val[1] != mc_intel->hdr.rev) {
-		printk(KERN_ERR "microcode: CPU%d update from revision "
-				"0x%x to 0x%x failed\n",
-			cpu_num, uci->cpu_sig.rev, val[1]);
-		return;
+		printk(KERN_ERR "microcode: CPU%d update "
+				"to revision 0x%x failed\n",
+			cpu_num, mc_intel->hdr.rev);
+		return -1;
 	}
-	printk(KERN_INFO "microcode: CPU%d updated from revision "
-			 "0x%x to 0x%x, date = %04x-%02x-%02x \n",
-		cpu_num, uci->cpu_sig.rev, val[1],
+	printk(KERN_INFO "microcode: CPU%d updated to revision "
+			 "0x%x, date = %04x-%02x-%02x \n",
+		cpu_num, val[1],
 		mc_intel->hdr.date & 0xffff,
 		mc_intel->hdr.date >> 24,
 		(mc_intel->hdr.date >> 16) & 0xff);
 
 	uci->cpu_sig.rev = val[1];
+
+	return 0;
 }
 
-static int generic_load_microcode(int cpu, void *data, size_t size,
-		int (*get_ucode_data)(void *, const void *, size_t))
+static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
+				int (*get_ucode_data)(void *, const void *, size_t))
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	u8 *ucode_ptr = data, *new_mc = NULL, *mc;
 	int new_rev = uci->cpu_sig.rev;
 	unsigned int leftover = size;
+	enum ucode_state state = UCODE_OK;
 
 	while (leftover) {
 		struct microcode_header_intel mc_header;
@@ -412,11 +389,15 @@
 		leftover  -= mc_size;
 	}
 
-	if (!new_mc)
-		goto out;
-
 	if (leftover) {
-		vfree(new_mc);
+		if (new_mc)
+			vfree(new_mc);
+		state = UCODE_ERROR;
+		goto out;
+	}
+
+	if (!new_mc) {
+		state = UCODE_NFOUND;
 		goto out;
 	}
 
@@ -427,9 +408,8 @@
 	pr_debug("microcode: CPU%d found a matching microcode update with"
 		 " version 0x%x (current=0x%x)\n",
 			cpu, new_rev, uci->cpu_sig.rev);
-
- out:
-	return (int)leftover;
+out:
+	return state;
 }
 
 static int get_ucode_fw(void *to, const void *from, size_t n)
@@ -438,21 +418,19 @@
 	return 0;
 }
 
-static int request_microcode_fw(int cpu, struct device *device)
+static enum ucode_state request_microcode_fw(int cpu, struct device *device)
 {
 	char name[30];
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	const struct firmware *firmware;
-	int ret;
+	enum ucode_state ret;
 
-	/* We should bind the task to the CPU */
-	BUG_ON(cpu != raw_smp_processor_id());
 	sprintf(name, "intel-ucode/%02x-%02x-%02x",
 		c->x86, c->x86_model, c->x86_mask);
-	ret = request_firmware(&firmware, name, device);
-	if (ret) {
+
+	if (request_firmware(&firmware, name, device)) {
 		pr_debug("microcode: data file %s load failed\n", name);
-		return ret;
+		return UCODE_NFOUND;
 	}
 
 	ret = generic_load_microcode(cpu, (void *)firmware->data,
@@ -468,11 +446,9 @@
 	return copy_from_user(to, from, n);
 }
 
-static int request_microcode_user(int cpu, const void __user *buf, size_t size)
+static enum ucode_state
+request_microcode_user(int cpu, const void __user *buf, size_t size)
 {
-	/* We should bind the task to the CPU */
-	BUG_ON(cpu != raw_smp_processor_id());
-
 	return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
 }
 
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module.c
similarity index 74%
rename from arch/x86/kernel/module_64.c
rename to arch/x86/kernel/module.c
index c23880b..89f386f 100644
--- a/arch/x86/kernel/module_64.c
+++ b/arch/x86/kernel/module.c
@@ -1,6 +1,5 @@
-/*  Kernel module help for x86-64
+/*  Kernel module help for x86.
     Copyright (C) 2001 Rusty Russell.
-    Copyright (C) 2002,2003 Andi Kleen, SuSE Labs.
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -22,23 +21,18 @@
 #include <linux/fs.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
 #include <linux/bug.h>
+#include <linux/mm.h>
 
 #include <asm/system.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
+#if 0
+#define DEBUGP printk
+#else
 #define DEBUGP(fmt...)
-
-#ifndef CONFIG_UML
-void module_free(struct module *mod, void *module_region)
-{
-	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-	   table entries. */
-}
+#endif
 
 void *module_alloc(unsigned long size)
 {
@@ -54,9 +48,15 @@
 	if (!area)
 		return NULL;
 
-	return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC);
+	return __vmalloc_area(area, GFP_KERNEL | __GFP_HIGHMEM,
+					PAGE_KERNEL_EXEC);
 }
-#endif
+
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+	vfree(module_region);
+}
 
 /* We don't need anything special. */
 int module_frob_arch_sections(Elf_Ehdr *hdr,
@@ -67,6 +67,58 @@
 	return 0;
 }
 
+#ifdef CONFIG_X86_32
+int apply_relocate(Elf32_Shdr *sechdrs,
+		   const char *strtab,
+		   unsigned int symindex,
+		   unsigned int relsec,
+		   struct module *me)
+{
+	unsigned int i;
+	Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr;
+	Elf32_Sym *sym;
+	uint32_t *location;
+
+	DEBUGP("Applying relocate section %u to %u\n", relsec,
+	       sechdrs[relsec].sh_info);
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+		/* This is where to make the change */
+		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rel[i].r_offset;
+		/* This is the symbol it is referring to.  Note that all
+		   undefined symbols have been resolved.  */
+		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+			+ ELF32_R_SYM(rel[i].r_info);
+
+		switch (ELF32_R_TYPE(rel[i].r_info)) {
+		case R_386_32:
+			/* We add the value into the location given */
+			*location += sym->st_value;
+			break;
+		case R_386_PC32:
+			/* Add the value, subtract its postition */
+			*location += sym->st_value - (uint32_t)location;
+			break;
+		default:
+			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
+			       me->name, ELF32_R_TYPE(rel[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
+
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *me)
+{
+	printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n",
+	       me->name);
+	return -ENOEXEC;
+}
+#else /*X86_64*/
 int apply_relocate_add(Elf64_Shdr *sechdrs,
 		   const char *strtab,
 		   unsigned int symindex,
@@ -147,6 +199,8 @@
 	return -ENOSYS;
 }
 
+#endif
+
 int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c
deleted file mode 100644
index 0edd819..0000000
--- a/arch/x86/kernel/module_32.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*  Kernel module help for i386.
-    Copyright (C) 2001 Rusty Russell.
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
-#include <linux/moduleloader.h>
-#include <linux/elf.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/bug.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(fmt...)
-#endif
-
-void *module_alloc(unsigned long size)
-{
-	if (size == 0)
-		return NULL;
-	return vmalloc_exec(size);
-}
-
-
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
-{
-	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-	   table entries. */
-}
-
-/* We don't need anything special. */
-int module_frob_arch_sections(Elf_Ehdr *hdr,
-			      Elf_Shdr *sechdrs,
-			      char *secstrings,
-			      struct module *mod)
-{
-	return 0;
-}
-
-int apply_relocate(Elf32_Shdr *sechdrs,
-		   const char *strtab,
-		   unsigned int symindex,
-		   unsigned int relsec,
-		   struct module *me)
-{
-	unsigned int i;
-	Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr;
-	Elf32_Sym *sym;
-	uint32_t *location;
-
-	DEBUGP("Applying relocate section %u to %u\n", relsec,
-	       sechdrs[relsec].sh_info);
-	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
-		/* This is where to make the change */
-		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
-			+ rel[i].r_offset;
-		/* This is the symbol it is referring to.  Note that all
-		   undefined symbols have been resolved.  */
-		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
-			+ ELF32_R_SYM(rel[i].r_info);
-
-		switch (ELF32_R_TYPE(rel[i].r_info)) {
-		case R_386_32:
-			/* We add the value into the location given */
-			*location += sym->st_value;
-			break;
-		case R_386_PC32:
-			/* Add the value, subtract its postition */
-			*location += sym->st_value - (uint32_t)location;
-			break;
-		default:
-			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
-			       me->name, ELF32_R_TYPE(rel[i].r_info));
-			return -ENOEXEC;
-		}
-	}
-	return 0;
-}
-
-int apply_relocate_add(Elf32_Shdr *sechdrs,
-		       const char *strtab,
-		       unsigned int symindex,
-		       unsigned int relsec,
-		       struct module *me)
-{
-	printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n",
-	       me->name);
-	return -ENOEXEC;
-}
-
-int module_finalize(const Elf_Ehdr *hdr,
-		    const Elf_Shdr *sechdrs,
-		    struct module *me)
-{
-	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
-		*para = NULL;
-	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
-
-	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
-		if (!strcmp(".text", secstrings + s->sh_name))
-			text = s;
-		if (!strcmp(".altinstructions", secstrings + s->sh_name))
-			alt = s;
-		if (!strcmp(".smp_locks", secstrings + s->sh_name))
-			locks = s;
-		if (!strcmp(".parainstructions", secstrings + s->sh_name))
-			para = s;
-	}
-
-	if (alt) {
-		/* patch .altinstructions */
-		void *aseg = (void *)alt->sh_addr;
-		apply_alternatives(aseg, aseg + alt->sh_size);
-	}
-	if (locks && text) {
-		void *lseg = (void *)locks->sh_addr;
-		void *tseg = (void *)text->sh_addr;
-		alternatives_smp_module_add(me, me->name,
-					    lseg, lseg + locks->sh_size,
-					    tseg, tseg + text->sh_size);
-	}
-
-	if (para) {
-		void *pseg = (void *)para->sh_addr;
-		apply_paravirt(pseg, pseg + para->sh_size);
-	}
-
-	return module_bug_finalize(hdr, sechdrs, me);
-}
-
-void module_arch_cleanup(struct module *mod)
-{
-	alternatives_smp_module_del(mod);
-	module_bug_cleanup(mod);
-}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 70fd7e4..651c93b 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -17,6 +17,7 @@
 #include <linux/acpi.h>
 #include <linux/module.h>
 #include <linux/smp.h>
+#include <linux/pci.h>
 
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
@@ -870,24 +871,17 @@
 inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
 #endif /* CONFIG_X86_IO_APIC */
 
-static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length,
-		      int count)
+static int
+check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
 {
-	if (!mpc_new_phys) {
-		pr_info("No spare slots, try to append...take your risk, "
-			"new mpc_length %x\n", count);
-	} else {
-		if (count <= mpc_new_length)
-			pr_info("No spare slots, try to append..., "
-				"new mpc_length %x\n", count);
-		else {
-			pr_err("mpc_new_length %lx is too small\n",
-				mpc_new_length);
-			return -1;
-		}
+	int ret = 0;
+
+	if (!mpc_new_phys || count <= mpc_new_length) {
+		WARN(1, "update_mptable: No spare slots (length: %x)\n", count);
+		return -1;
 	}
 
-	return 0;
+	return ret;
 }
 
 static int  __init replace_intsrc_all(struct mpc_table *mpc,
@@ -946,7 +940,7 @@
 		} else {
 			struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
 			count += sizeof(struct mpc_intsrc);
-			if (!check_slot(mpc_new_phys, mpc_new_length, count))
+			if (check_slot(mpc_new_phys, mpc_new_length, count) < 0)
 				goto out;
 			assign_to_mpc_intsrc(&mp_irqs[i], m);
 			mpc->length = count;
@@ -963,11 +957,14 @@
 	return 0;
 }
 
-static int __initdata enable_update_mptable;
+int enable_update_mptable;
 
 static int __init update_mptable_setup(char *str)
 {
 	enable_update_mptable = 1;
+#ifdef CONFIG_PCI
+	pci_routeirq = 1;
+#endif
 	return 0;
 }
 early_param("update_mptable", update_mptable_setup);
@@ -980,6 +977,9 @@
 static int __init parse_alloc_mptable_opt(char *p)
 {
 	enable_update_mptable = 1;
+#ifdef CONFIG_PCI
+	pci_routeirq = 1;
+#endif
 	alloc_mptable = 1;
 	if (!p)
 		return 0;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 9faf43b..70ec9b9 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -248,18 +248,16 @@
 
 static inline void enter_lazy(enum paravirt_lazy_mode mode)
 {
-	BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
-	BUG_ON(preemptible());
+	BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
 
-	__get_cpu_var(paravirt_lazy_mode) = mode;
+	percpu_write(paravirt_lazy_mode, mode);
 }
 
-void paravirt_leave_lazy(enum paravirt_lazy_mode mode)
+static void leave_lazy(enum paravirt_lazy_mode mode)
 {
-	BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode);
-	BUG_ON(preemptible());
+	BUG_ON(percpu_read(paravirt_lazy_mode) != mode);
 
-	__get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
+	percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
 }
 
 void paravirt_enter_lazy_mmu(void)
@@ -269,22 +267,36 @@
 
 void paravirt_leave_lazy_mmu(void)
 {
-	paravirt_leave_lazy(PARAVIRT_LAZY_MMU);
+	leave_lazy(PARAVIRT_LAZY_MMU);
 }
 
-void paravirt_enter_lazy_cpu(void)
+void paravirt_start_context_switch(struct task_struct *prev)
 {
+	BUG_ON(preemptible());
+
+	if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
+		arch_leave_lazy_mmu_mode();
+		set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
+	}
 	enter_lazy(PARAVIRT_LAZY_CPU);
 }
 
-void paravirt_leave_lazy_cpu(void)
+void paravirt_end_context_switch(struct task_struct *next)
 {
-	paravirt_leave_lazy(PARAVIRT_LAZY_CPU);
+	BUG_ON(preemptible());
+
+	leave_lazy(PARAVIRT_LAZY_CPU);
+
+	if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
+		arch_enter_lazy_mmu_mode();
 }
 
 enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 {
-	return __get_cpu_var(paravirt_lazy_mode);
+	if (in_interrupt())
+		return PARAVIRT_LAZY_NONE;
+
+	return percpu_read(paravirt_lazy_mode);
 }
 
 void arch_flush_lazy_mmu_mode(void)
@@ -292,7 +304,6 @@
 	preempt_disable();
 
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
-		WARN_ON(preempt_count() == 1);
 		arch_leave_lazy_mmu_mode();
 		arch_enter_lazy_mmu_mode();
 	}
@@ -300,19 +311,6 @@
 	preempt_enable();
 }
 
-void arch_flush_lazy_cpu_mode(void)
-{
-	preempt_disable();
-
-	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
-		WARN_ON(preempt_count() == 1);
-		arch_leave_lazy_cpu_mode();
-		arch_enter_lazy_cpu_mode();
-	}
-
-	preempt_enable();
-}
-
 struct pv_info pv_info = {
 	.name = "bare hardware",
 	.paravirt_enabled = 0,
@@ -404,10 +402,8 @@
 	.set_iopl_mask = native_set_iopl_mask,
 	.io_delay = native_io_delay,
 
-	.lazy_mode = {
-		.enter = paravirt_nop,
-		.leave = paravirt_nop,
-	},
+	.start_context_switch = paravirt_nop,
+	.end_context_switch = paravirt_nop,
 };
 
 struct pv_apic_ops pv_apic_ops = {
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 755c21e..971a3be 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -186,37 +186,6 @@
 
 static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
 
-/* enable this to stress test the chip's TCE cache */
-#ifdef CONFIG_IOMMU_DEBUG
-static int debugging = 1;
-
-static inline unsigned long verify_bit_range(unsigned long* bitmap,
-	int expected, unsigned long start, unsigned long end)
-{
-	unsigned long idx = start;
-
-	BUG_ON(start >= end);
-
-	while (idx < end) {
-		if (!!test_bit(idx, bitmap) != expected)
-			return idx;
-		++idx;
-	}
-
-	/* all bits have the expected value */
-	return ~0UL;
-}
-#else /* debugging is disabled */
-static int debugging;
-
-static inline unsigned long verify_bit_range(unsigned long* bitmap,
-	int expected, unsigned long start, unsigned long end)
-{
-	return ~0UL;
-}
-
-#endif /* CONFIG_IOMMU_DEBUG */
-
 static inline int translation_enabled(struct iommu_table *tbl)
 {
 	/* only PHBs with translation enabled have an IOMMU table */
@@ -228,7 +197,6 @@
 {
 	unsigned long index;
 	unsigned long end;
-	unsigned long badbit;
 	unsigned long flags;
 
 	index = start_addr >> PAGE_SHIFT;
@@ -243,14 +211,6 @@
 
 	spin_lock_irqsave(&tbl->it_lock, flags);
 
-	badbit = verify_bit_range(tbl->it_map, 0, index, end);
-	if (badbit != ~0UL) {
-		if (printk_ratelimit())
-			printk(KERN_ERR "Calgary: entry already allocated at "
-			       "0x%lx tbl %p dma 0x%lx npages %u\n",
-			       badbit, tbl, start_addr, npages);
-	}
-
 	iommu_area_reserve(tbl->it_map, index, npages);
 
 	spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -326,7 +286,6 @@
 	unsigned int npages)
 {
 	unsigned long entry;
-	unsigned long badbit;
 	unsigned long badend;
 	unsigned long flags;
 
@@ -346,14 +305,6 @@
 
 	spin_lock_irqsave(&tbl->it_lock, flags);
 
-	badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages);
-	if (badbit != ~0UL) {
-		if (printk_ratelimit())
-			printk(KERN_ERR "Calgary: bit is off at 0x%lx "
-			       "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
-			       badbit, tbl, dma_addr, entry, npages);
-	}
-
 	iommu_area_free(tbl->it_map, entry, npages);
 
 	spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -1488,9 +1439,8 @@
 		iommu_detected = 1;
 		calgary_detected = 1;
 		printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n");
-		printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, "
-		       "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size,
-		       debugging ? "enabled" : "disabled");
+		printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
+		       specified_table_size);
 
 		/* swiotlb for devices that aren't behind the Calgary. */
 		if (max_pfn > MAX_DMA32_PFN)
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index b284b58..cfd9f90 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -144,48 +144,21 @@
 }
 
 #ifdef CONFIG_IOMMU_LEAK
-
-#define SET_LEAK(x)							\
-	do {								\
-		if (iommu_leak_tab)					\
-			iommu_leak_tab[x] = __builtin_return_address(0);\
-	} while (0)
-
-#define CLEAR_LEAK(x)							\
-	do {								\
-		if (iommu_leak_tab)					\
-			iommu_leak_tab[x] = NULL;			\
-	} while (0)
-
 /* Debugging aid for drivers that don't free their IOMMU tables */
-static void **iommu_leak_tab;
 static int leak_trace;
 static int iommu_leak_pages = 20;
 
 static void dump_leak(void)
 {
-	int i;
 	static int dump;
 
-	if (dump || !iommu_leak_tab)
+	if (dump)
 		return;
 	dump = 1;
-	show_stack(NULL, NULL);
 
-	/* Very crude. dump some from the end of the table too */
-	printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n",
-	       iommu_leak_pages);
-	for (i = 0; i < iommu_leak_pages; i += 2) {
-		printk(KERN_DEBUG "%lu: ", iommu_pages-i);
-		printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
-				0);
-		printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
-	}
-	printk(KERN_DEBUG "\n");
+	show_stack(NULL, NULL);
+	debug_dma_dump_mappings(NULL);
 }
-#else
-# define SET_LEAK(x)
-# define CLEAR_LEAK(x)
 #endif
 
 static void iommu_full(struct device *dev, size_t size, int dir)
@@ -248,7 +221,6 @@
 
 	for (i = 0; i < npages; i++) {
 		iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
-		SET_LEAK(iommu_page + i);
 		phys_mem += PAGE_SIZE;
 	}
 	return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
@@ -294,7 +266,6 @@
 	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
 	for (i = 0; i < npages; i++) {
 		iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
-		CLEAR_LEAK(iommu_page + i);
 	}
 	free_iommu(iommu_page, npages);
 }
@@ -377,7 +348,6 @@
 		pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
 		while (pages--) {
 			iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
-			SET_LEAK(iommu_page);
 			addr += PAGE_SIZE;
 			iommu_page++;
 		}
@@ -688,8 +658,6 @@
 
 	agp_gatt_table = gatt;
 
-	enable_gart_translations();
-
 	error = sysdev_class_register(&gart_sysdev_class);
 	if (!error)
 		error = sysdev_register(&device_gart);
@@ -801,11 +769,12 @@
 
 #ifdef CONFIG_IOMMU_LEAK
 	if (leak_trace) {
-		iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
-				  get_order(iommu_pages*sizeof(void *)));
-		if (!iommu_leak_tab)
+		int ret;
+
+		ret = dma_debug_resize_entries(iommu_pages);
+		if (ret)
 			printk(KERN_DEBUG
-			       "PCI-DMA: Cannot allocate leak trace area\n");
+			       "PCI-DMA: Cannot trace all the entries\n");
 	}
 #endif
 
@@ -845,6 +814,14 @@
 	 * the pages as Not-Present:
 	 */
 	wbinvd();
+	
+	/*
+	 * Now all caches are flushed and we can safely enable
+	 * GART hardware.  Doing it early leaves the possibility
+	 * of stale cache entries that can lead to GART PTE
+	 * errors.
+	 */
+	enable_gart_translations();
 
 	/*
 	 * Try to workaround a bug (thanks to BenH):
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 221a385..a1712f2 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -28,7 +28,7 @@
 	return paddr;
 }
 
-phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
+phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
 {
 	return baddr;
 }
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ca98915..3bb2be1 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -8,12 +8,15 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 #include <linux/clockchips.h>
+#include <linux/random.h>
 #include <trace/power.h>
 #include <asm/system.h>
 #include <asm/apic.h>
+#include <asm/syscalls.h>
 #include <asm/idle.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
+#include <asm/ds.h>
 
 unsigned long idle_halt;
 EXPORT_SYMBOL(idle_halt);
@@ -45,6 +48,8 @@
 		kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
 		tsk->thread.xstate = NULL;
 	}
+
+	WARN(tsk->thread.ds_ctx, "leaking DS context\n");
 }
 
 void free_thread_info(struct thread_info *ti)
@@ -83,8 +88,6 @@
 		put_cpu();
 		kfree(bp);
 	}
-
-	ds_exit_thread(current);
 }
 
 void flush_thread(void)
@@ -613,3 +616,16 @@
 }
 early_param("idle", idle_setup);
 
+unsigned long arch_align_stack(unsigned long sp)
+{
+	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+		sp -= get_random_int() % 8192;
+	return sp & ~0xf;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	unsigned long range_end = mm->brk + 0x02000000;
+	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
+
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 76f8f84..59f4524 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -9,8 +9,6 @@
  * This file handles the architecture-dependent parts of process handling..
  */
 
-#include <stdarg.h>
-
 #include <linux/stackprotector.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
@@ -33,7 +31,6 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/ptrace.h>
-#include <linux/random.h>
 #include <linux/personality.h>
 #include <linux/tick.h>
 #include <linux/percpu.h>
@@ -290,7 +287,8 @@
 		p->thread.io_bitmap_max = 0;
 	}
 
-	ds_copy_thread(p, current);
+	clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
+	p->thread.ds_ctx = NULL;
 
 	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
 	p->thread.debugctlmsr = 0;
@@ -407,7 +405,7 @@
 	 * done before math_state_restore, so the TS bit is up
 	 * to date.
 	 */
-	arch_leave_lazy_cpu_mode();
+	arch_end_context_switch(next_p);
 
 	/* If the task has used fpu the last 5 timeslices, just do a full
 	 * restore of the math state immediately to avoid the trap; the
@@ -497,15 +495,3 @@
 	return 0;
 }
 
-unsigned long arch_align_stack(unsigned long sp)
-{
-	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-		sp -= get_random_int() % 8192;
-	return sp & ~0xf;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-	unsigned long range_end = mm->brk + 0x02000000;
-	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
-}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b751a41..ebefb54 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -14,8 +14,6 @@
  * This file handles the architecture-dependent parts of process handling..
  */
 
-#include <stdarg.h>
-
 #include <linux/stackprotector.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
@@ -32,7 +30,6 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
-#include <linux/random.h>
 #include <linux/notifier.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
@@ -335,7 +332,8 @@
 			goto out;
 	}
 
-	ds_copy_thread(p, me);
+	clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
+	p->thread.ds_ctx = NULL;
 
 	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
 	p->thread.debugctlmsr = 0;
@@ -428,7 +426,7 @@
 	 * done before math_state_restore, so the TS bit is up
 	 * to date.
 	 */
-	arch_leave_lazy_cpu_mode();
+	arch_end_context_switch(next_p);
 
 	/*
 	 * Switch FS and GS.
@@ -660,15 +658,3 @@
 	return do_arch_prctl(current, code, addr);
 }
 
-unsigned long arch_align_stack(unsigned long sp)
-{
-	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-		sp -= get_random_int() % 8192;
-	return sp & ~0xf;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-	unsigned long range_end = mm->brk + 0x02000000;
-	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
-}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 23b7c8f..09ecbde 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,6 +21,7 @@
 #include <linux/audit.h>
 #include <linux/seccomp.h>
 #include <linux/signal.h>
+#include <linux/workqueue.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -578,17 +579,130 @@
 }
 
 #ifdef CONFIG_X86_PTRACE_BTS
+/*
+ * A branch trace store context.
+ *
+ * Contexts may only be installed by ptrace_bts_config() and only for
+ * ptraced tasks.
+ *
+ * Contexts are destroyed when the tracee is detached from the tracer.
+ * The actual destruction work requires interrupts enabled, so the
+ * work is deferred and will be scheduled during __ptrace_unlink().
+ *
+ * Contexts hold an additional task_struct reference on the traced
+ * task, as well as a reference on the tracer's mm.
+ *
+ * Ptrace already holds a task_struct for the duration of ptrace operations,
+ * but since destruction is deferred, it may be executed after both
+ * tracer and tracee exited.
+ */
+struct bts_context {
+	/* The branch trace handle. */
+	struct bts_tracer	*tracer;
+
+	/* The buffer used to store the branch trace and its size. */
+	void			*buffer;
+	unsigned int		size;
+
+	/* The mm that paid for the above buffer. */
+	struct mm_struct	*mm;
+
+	/* The task this context belongs to. */
+	struct task_struct	*task;
+
+	/* The signal to send on a bts buffer overflow. */
+	unsigned int		bts_ovfl_signal;
+
+	/* The work struct to destroy a context. */
+	struct work_struct	work;
+};
+
+static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
+{
+	void *buffer = NULL;
+	int err = -ENOMEM;
+
+	err = account_locked_memory(current->mm, current->signal->rlim, size);
+	if (err < 0)
+		return err;
+
+	buffer = kzalloc(size, GFP_KERNEL);
+	if (!buffer)
+		goto out_refund;
+
+	context->buffer = buffer;
+	context->size = size;
+	context->mm = get_task_mm(current);
+
+	return 0;
+
+ out_refund:
+	refund_locked_memory(current->mm, size);
+	return err;
+}
+
+static inline void free_bts_buffer(struct bts_context *context)
+{
+	if (!context->buffer)
+		return;
+
+	kfree(context->buffer);
+	context->buffer = NULL;
+
+	refund_locked_memory(context->mm, context->size);
+	context->size = 0;
+
+	mmput(context->mm);
+	context->mm = NULL;
+}
+
+static void free_bts_context_work(struct work_struct *w)
+{
+	struct bts_context *context;
+
+	context = container_of(w, struct bts_context, work);
+
+	ds_release_bts(context->tracer);
+	put_task_struct(context->task);
+	free_bts_buffer(context);
+	kfree(context);
+}
+
+static inline void free_bts_context(struct bts_context *context)
+{
+	INIT_WORK(&context->work, free_bts_context_work);
+	schedule_work(&context->work);
+}
+
+static inline struct bts_context *alloc_bts_context(struct task_struct *task)
+{
+	struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
+	if (context) {
+		context->task = task;
+		task->bts = context;
+
+		get_task_struct(task);
+	}
+
+	return context;
+}
+
 static int ptrace_bts_read_record(struct task_struct *child, size_t index,
 				  struct bts_struct __user *out)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 	struct bts_struct bts;
 	const unsigned char *at;
 	int error;
 
-	trace = ds_read_bts(child->bts);
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	at = trace->ds.top - ((index + 1) * trace->ds.size);
 	if ((void *)at < trace->ds.begin)
@@ -597,7 +711,7 @@
 	if (!trace->read)
 		return -EOPNOTSUPP;
 
-	error = trace->read(child->bts, at, &bts);
+	error = trace->read(context->tracer, at, &bts);
 	if (error < 0)
 		return error;
 
@@ -611,13 +725,18 @@
 			    long size,
 			    struct bts_struct __user *out)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 	const unsigned char *at;
 	int error, drained = 0;
 
-	trace = ds_read_bts(child->bts);
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	if (!trace->read)
 		return -EOPNOTSUPP;
@@ -628,9 +747,8 @@
 	for (at = trace->ds.begin; (void *)at < trace->ds.top;
 	     out++, drained++, at += trace->ds.size) {
 		struct bts_struct bts;
-		int error;
 
-		error = trace->read(child->bts, at, &bts);
+		error = trace->read(context->tracer, at, &bts);
 		if (error < 0)
 			return error;
 
@@ -640,35 +758,18 @@
 
 	memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
 
-	error = ds_reset_bts(child->bts);
+	error = ds_reset_bts(context->tracer);
 	if (error < 0)
 		return error;
 
 	return drained;
 }
 
-static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size)
-{
-	child->bts_buffer = alloc_locked_buffer(size);
-	if (!child->bts_buffer)
-		return -ENOMEM;
-
-	child->bts_size = size;
-
-	return 0;
-}
-
-static void ptrace_bts_free_buffer(struct task_struct *child)
-{
-	free_locked_buffer(child->bts_buffer, child->bts_size);
-	child->bts_buffer = NULL;
-	child->bts_size = 0;
-}
-
 static int ptrace_bts_config(struct task_struct *child,
 			     long cfg_size,
 			     const struct ptrace_bts_config __user *ucfg)
 {
+	struct bts_context *context;
 	struct ptrace_bts_config cfg;
 	unsigned int flags = 0;
 
@@ -678,28 +779,33 @@
 	if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
 		return -EFAULT;
 
-	if (child->bts) {
-		ds_release_bts(child->bts);
-		child->bts = NULL;
-	}
+	context = child->bts;
+	if (!context)
+		context = alloc_bts_context(child);
+	if (!context)
+		return -ENOMEM;
 
 	if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
 		if (!cfg.signal)
 			return -EINVAL;
 
-		child->thread.bts_ovfl_signal = cfg.signal;
 		return -EOPNOTSUPP;
+		context->bts_ovfl_signal = cfg.signal;
 	}
 
-	if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
-	    (cfg.size != child->bts_size)) {
-		int error;
+	ds_release_bts(context->tracer);
+	context->tracer = NULL;
 
-		ptrace_bts_free_buffer(child);
+	if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
+		int err;
 
-		error = ptrace_bts_allocate_buffer(child, cfg.size);
-		if (error < 0)
-			return error;
+		free_bts_buffer(context);
+		if (!cfg.size)
+			return 0;
+
+		err = alloc_bts_buffer(context, cfg.size);
+		if (err < 0)
+			return err;
 	}
 
 	if (cfg.flags & PTRACE_BTS_O_TRACE)
@@ -708,15 +814,14 @@
 	if (cfg.flags & PTRACE_BTS_O_SCHED)
 		flags |= BTS_TIMESTAMPS;
 
-	child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size,
-				    /* ovfl = */ NULL, /* th = */ (size_t)-1,
-				    flags);
-	if (IS_ERR(child->bts)) {
-		int error = PTR_ERR(child->bts);
+	context->tracer =
+		ds_request_bts_task(child, context->buffer, context->size,
+				    NULL, (size_t)-1, flags);
+	if (unlikely(IS_ERR(context->tracer))) {
+		int error = PTR_ERR(context->tracer);
 
-		ptrace_bts_free_buffer(child);
-		child->bts = NULL;
-
+		free_bts_buffer(context);
+		context->tracer = NULL;
 		return error;
 	}
 
@@ -727,20 +832,25 @@
 			     long cfg_size,
 			     struct ptrace_bts_config __user *ucfg)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 	struct ptrace_bts_config cfg;
 
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
 	if (cfg_size < sizeof(cfg))
 		return -EIO;
 
-	trace = ds_read_bts(child->bts);
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	memset(&cfg, 0, sizeof(cfg));
-	cfg.size = trace->ds.end - trace->ds.begin;
-	cfg.signal = child->thread.bts_ovfl_signal;
-	cfg.bts_size = sizeof(struct bts_struct);
+	cfg.size	= trace->ds.end - trace->ds.begin;
+	cfg.signal	= context->bts_ovfl_signal;
+	cfg.bts_size	= sizeof(struct bts_struct);
 
 	if (cfg.signal)
 		cfg.flags |= PTRACE_BTS_O_SIGNAL;
@@ -759,80 +869,51 @@
 
 static int ptrace_bts_clear(struct task_struct *child)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 
-	trace = ds_read_bts(child->bts);
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
 
-	return ds_reset_bts(child->bts);
+	return ds_reset_bts(context->tracer);
 }
 
 static int ptrace_bts_size(struct task_struct *child)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 
-	trace = ds_read_bts(child->bts);
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	return (trace->ds.top - trace->ds.begin) / trace->ds.size;
 }
 
-static void ptrace_bts_fork(struct task_struct *tsk)
-{
-	tsk->bts = NULL;
-	tsk->bts_buffer = NULL;
-	tsk->bts_size = 0;
-	tsk->thread.bts_ovfl_signal = 0;
-}
-
-static void ptrace_bts_untrace(struct task_struct *child)
+/*
+ * Called from __ptrace_unlink() after the child has been moved back
+ * to its original parent.
+ */
+void ptrace_bts_untrace(struct task_struct *child)
 {
 	if (unlikely(child->bts)) {
-		ds_release_bts(child->bts);
+		free_bts_context(child->bts);
 		child->bts = NULL;
-
-		/* We cannot update total_vm and locked_vm since
-		   child's mm is already gone. But we can reclaim the
-		   memory. */
-		kfree(child->bts_buffer);
-		child->bts_buffer = NULL;
-		child->bts_size = 0;
 	}
 }
-
-static void ptrace_bts_detach(struct task_struct *child)
-{
-	/*
-	 * Ptrace_detach() races with ptrace_untrace() in case
-	 * the child dies and is reaped by another thread.
-	 *
-	 * We only do the memory accounting at this point and
-	 * leave the buffer deallocation and the bts tracer
-	 * release to ptrace_bts_untrace() which will be called
-	 * later on with tasklist_lock held.
-	 */
-	release_locked_buffer(child->bts_buffer, child->bts_size);
-}
-#else
-static inline void ptrace_bts_fork(struct task_struct *tsk) {}
-static inline void ptrace_bts_detach(struct task_struct *child) {}
-static inline void ptrace_bts_untrace(struct task_struct *child) {}
 #endif /* CONFIG_X86_PTRACE_BTS */
 
-void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags)
-{
-	ptrace_bts_fork(child);
-}
-
-void x86_ptrace_untrace(struct task_struct *child)
-{
-	ptrace_bts_untrace(child);
-}
-
 /*
  * Called by kernel/ptrace.c when detaching..
  *
@@ -844,7 +925,6 @@
 #ifdef TIF_SYSCALL_EMU
 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
-	ptrace_bts_detach(child);
 }
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 7563b31..af71d06 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -491,5 +491,42 @@
 		break;
 	}
 }
+#endif
 
+#if defined(CONFIG_PCI) && defined(CONFIG_NUMA)
+/* Set correct numa_node information for AMD NB functions */
+static void __init quirk_amd_nb_node(struct pci_dev *dev)
+{
+	struct pci_dev *nb_ht;
+	unsigned int devfn;
+	u32 val;
+
+	devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0);
+	nb_ht = pci_get_slot(dev->bus, devfn);
+	if (!nb_ht)
+		return;
+
+	pci_read_config_dword(nb_ht, 0x60, &val);
+	set_dev_node(&dev->dev, val & 7);
+	pci_dev_put(dev);
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_HT,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MAP,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK,
+			quirk_amd_nb_node);
 #endif
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 667188e..d2d1ce8 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -192,6 +192,15 @@
 			DMI_MATCH(DMI_BOARD_NAME, "0KP561"),
 		},
 	},
+	{   /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */
+		.callback = set_bios_reboot,
+		.ident = "Dell OptiPlex 360",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 360"),
+			DMI_MATCH(DMI_BOARD_NAME, "0T656F"),
+		},
+	},
 	{	/* Handle problems with rebooting on Dell 2400's */
 		.callback = set_bios_reboot,
 		.ident = "Dell PowerEdge 2400",
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b415843..be5ae80 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -112,6 +112,14 @@
 #define ARCH_SETUP
 #endif
 
+/*
+ * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
+ * The direct mapping extends to max_pfn_mapped, so that we can directly access
+ * apertures, ACPI and other tables without having to play with fixmaps.
+ */
+unsigned long max_low_pfn_mapped;
+unsigned long max_pfn_mapped;
+
 RESERVE_BRK(dmi_alloc, 65536);
 
 unsigned int boot_cpu_id __read_mostly;
@@ -214,8 +222,8 @@
 unsigned long mmu_cr4_features = X86_CR4_PAE;
 #endif
 
-/* Boot loader ID as an integer, for the benefit of proc_dointvec */
-int bootloader_type;
+/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
+int bootloader_type, bootloader_version;
 
 /*
  * Setup options
@@ -293,15 +301,13 @@
 
 #ifdef CONFIG_BLK_DEV_INITRD
 
-#ifdef CONFIG_X86_32
-
 #define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)
 static void __init relocate_initrd(void)
 {
 
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
-	u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+	u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
 	u64 ramdisk_here;
 	unsigned long slop, clen, mapaddr;
 	char *p, *q;
@@ -357,14 +363,13 @@
 		ramdisk_image, ramdisk_image + ramdisk_size - 1,
 		ramdisk_here, ramdisk_here + ramdisk_size - 1);
 }
-#endif
 
 static void __init reserve_initrd(void)
 {
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
 	u64 ramdisk_end   = ramdisk_image + ramdisk_size;
-	u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+	u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
 
 	if (!boot_params.hdr.type_of_loader ||
 	    !ramdisk_image || !ramdisk_size)
@@ -394,14 +399,8 @@
 		return;
 	}
 
-#ifdef CONFIG_X86_32
 	relocate_initrd();
-#else
-	printk(KERN_ERR "initrd extends beyond end of memory "
-	       "(0x%08llx > 0x%08llx)\ndisabling initrd\n",
-	       ramdisk_end, end_of_lowmem);
-	initrd_start = 0;
-#endif
+
 	free_early(ramdisk_image, ramdisk_end);
 }
 #else
@@ -706,6 +705,12 @@
 #endif
 	saved_video_mode = boot_params.hdr.vid_mode;
 	bootloader_type = boot_params.hdr.type_of_loader;
+	if ((bootloader_type >> 4) == 0xe) {
+		bootloader_type &= 0xf;
+		bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4;
+	}
+	bootloader_version  = bootloader_type & 0xf;
+	bootloader_version |= boot_params.hdr.ext_loader_ver << 4;
 
 #ifdef CONFIG_BLK_DEV_RAM
 	rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
@@ -854,12 +859,16 @@
 		max_low_pfn = max_pfn;
 
 	high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+	max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
 #endif
 
 #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
 	setup_bios_corruption_check();
 #endif
 
+	printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
+			max_pfn_mapped<<PAGE_SHIFT);
+
 	reserve_brk();
 
 	/* max_pfn_mapped is updated here */
@@ -997,24 +1006,6 @@
 #ifdef CONFIG_X86_32
 
 /**
- * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
- *
- * Description:
- *	Perform any necessary interrupt initialisation prior to setting up
- *	the "ordinary" interrupt call gates.  For legacy reasons, the ISA
- *	interrupts should be initialised here if the machine emulates a PC
- *	in any way.
- **/
-void __init x86_quirk_pre_intr_init(void)
-{
-	if (x86_quirks->arch_pre_intr_init) {
-		if (x86_quirks->arch_pre_intr_init())
-			return;
-	}
-	init_ISA_irqs();
-}
-
-/**
  * x86_quirk_intr_init - post gate setup interrupt initialisation
  *
  * Description:
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 8f0e13b..9c3f082 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -425,6 +425,14 @@
 	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
 #endif
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
+	/*
+	 * make sure boot cpu node_number is right, when boot cpu is on the
+	 * node that doesn't have mem installed
+	 */
+	per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id);
+#endif
+
 	/* Setup node to cpumask map */
 	setup_node_to_cpumask_map();
 
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 1442516..0a813b1 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -6,7 +6,6 @@
  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
  *  2000-2002   x86-64 support by Andi Kleen
  */
-
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 13f33ea..28f5fb4 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -172,6 +172,9 @@
 {
 	ack_APIC_irq();
 	inc_irq_stat(irq_resched_count);
+	/*
+	 * KVM uses this interrupt to force a cpu out of guest mode
+	 */
 }
 
 void smp_call_function_interrupt(struct pt_regs *regs)
@@ -193,19 +196,19 @@
 }
 
 struct smp_ops smp_ops = {
-	.smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
-	.smp_prepare_cpus = native_smp_prepare_cpus,
-	.smp_cpus_done = native_smp_cpus_done,
+	.smp_prepare_boot_cpu	= native_smp_prepare_boot_cpu,
+	.smp_prepare_cpus	= native_smp_prepare_cpus,
+	.smp_cpus_done		= native_smp_cpus_done,
 
-	.smp_send_stop = native_smp_send_stop,
-	.smp_send_reschedule = native_smp_send_reschedule,
+	.smp_send_stop		= native_smp_send_stop,
+	.smp_send_reschedule	= native_smp_send_reschedule,
 
-	.cpu_up = native_cpu_up,
-	.cpu_die = native_cpu_die,
-	.cpu_disable = native_cpu_disable,
-	.play_dead = native_play_dead,
+	.cpu_up			= native_cpu_up,
+	.cpu_die		= native_cpu_die,
+	.cpu_disable		= native_cpu_disable,
+	.play_dead		= native_play_dead,
 
-	.send_call_func_ipi = native_send_call_func_ipi,
+	.send_call_func_ipi	= native_send_call_func_ipi,
 	.send_call_func_single_ipi = native_send_call_func_single_ipi,
 };
 EXPORT_SYMBOL_GPL(smp_ops);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 58d24ef..2fecda6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -504,7 +504,7 @@
  * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
  * won't ... remember to clear down the APIC, etc later.
  */
-int __devinit
+int __cpuinit
 wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
 {
 	unsigned long send_status, accept_status = 0;
@@ -538,7 +538,7 @@
 	return (send_status | accept_status);
 }
 
-int __devinit
+static int __cpuinit
 wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 {
 	unsigned long send_status, accept_status = 0;
@@ -822,10 +822,12 @@
 	/* mark "stuck" area as not stuck */
 	*((volatile unsigned long *)trampoline_base) = 0;
 
-	/*
-	 * Cleanup possible dangling ends...
-	 */
-	smpboot_restore_warm_reset_vector();
+	if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
+		/*
+		 * Cleanup possible dangling ends...
+		 */
+		smpboot_restore_warm_reset_vector();
+	}
 
 	return boot_error;
 }
@@ -871,7 +873,7 @@
 
 	err = do_boot_cpu(apicid, cpu);
 
-	zap_low_mappings();
+	zap_low_mappings(false);
 	low_mappings = 0;
 #else
 	err = do_boot_cpu(apicid, cpu);
@@ -990,10 +992,12 @@
 	 */
 	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
 	    !cpu_has_apic) {
-		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
-			boot_cpu_physical_apicid);
-		printk(KERN_ERR "... forcing use of dummy APIC emulation."
+		if (!disable_apic) {
+			pr_err("BIOS bug, local APIC #%d not detected!...\n",
+				boot_cpu_physical_apicid);
+			pr_err("... forcing use of dummy APIC emulation."
 				"(tell your hw vendor)\n");
+		}
 		smpboot_clear_io_apic();
 		arch_disable_smp_support();
 		return -1;
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index f7bddc2..4aaf7e4 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -20,7 +20,7 @@
 
 static int save_stack_stack(void *data, char *name)
 {
-	return -1;
+	return 0;
 }
 
 static void save_stack_address(void *data, unsigned long addr, int reliable)
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index ff5c873..d51321d 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -334,3 +334,5 @@
 	.long sys_inotify_init1
 	.long sys_preadv
 	.long sys_pwritev
+	.long sys_rt_tgsigqueueinfo	/* 335 */
+	.long sys_perf_counter_open
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index ed0c337..124d40c 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -715,7 +715,12 @@
 	struct bau_desc *adp;
 	struct bau_desc *ad2;
 
-	adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node);
+	/*
+	 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
+	 * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per blade
+	 */
+	adp = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)*
+		UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
 	BUG_ON(!adp);
 
 	pa = uv_gpa(adp); /* need the real nasid*/
@@ -729,7 +734,13 @@
 				      (n << UV_DESC_BASE_PNODE_SHIFT | m));
 	}
 
-	for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) {
+	/*
+	 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
+	 * cpu even though we only use the first one; one descriptor can
+	 * describe a broadcast to 256 nodes.
+	 */
+	for (i = 0, ad2 = adp; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR);
+		i++, ad2++) {
 		memset(ad2, 0, sizeof(struct bau_desc));
 		ad2->header.sw_ack_flag = 1;
 		/*
@@ -832,7 +843,7 @@
 		return 0;
 
 	for_each_possible_cpu(cur_cpu)
-		alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
+		zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
 				       GFP_KERNEL, cpu_to_node(cur_cpu));
 
 	uv_bau_retry_limit = 1;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a1d2883..07d60c8 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -839,9 +839,6 @@
 	}
 
 	clts();				/* Allow maths ops (or we recurse) */
-#ifdef CONFIG_X86_32
-	restore_fpu(tsk);
-#else
 	/*
 	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
 	 */
@@ -850,7 +847,7 @@
 		force_sig(SIGSEGV, tsk);
 		return;
 	}
-#endif
+
 	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
 	tsk->fpu_counter++;
 }
@@ -945,8 +942,13 @@
 #endif
 	set_intr_gate(19, &simd_coprocessor_error);
 
+	/* Reserve all the builtin and the syscall vector: */
+	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
+		set_bit(i, used_vectors);
+
 #ifdef CONFIG_IA32_EMULATION
 	set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
+	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
 #endif
 
 #ifdef CONFIG_X86_32
@@ -963,17 +965,9 @@
 	}
 
 	set_system_trap_gate(SYSCALL_VECTOR, &system_call);
-#endif
-
-	/* Reserve all the builtin and the syscall vector: */
-	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
-		set_bit(i, used_vectors);
-
-#ifdef CONFIG_X86_64
-	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
-#else
 	set_bit(SYSCALL_VECTOR, used_vectors);
 #endif
+
 	/*
 	 * Should be a barrier for any external CPU state:
 	 */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index d57de05..3e1c057 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -384,13 +384,13 @@
 {
 	u64 tsc1, tsc2, delta, ref1, ref2;
 	unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
-	unsigned long flags, latch, ms, fast_calibrate, tsc_khz;
+	unsigned long flags, latch, ms, fast_calibrate, hv_tsc_khz;
 	int hpet = is_hpet_enabled(), i, loopmin;
 
-	tsc_khz = get_hypervisor_tsc_freq();
-	if (tsc_khz) {
+	hv_tsc_khz = get_hypervisor_tsc_freq();
+	if (hv_tsc_khz) {
 		printk(KERN_INFO "TSC: Frequency read from the hypervisor\n");
-		return tsc_khz;
+		return hv_tsc_khz;
 	}
 
 	local_irq_save(flags);
@@ -710,7 +710,16 @@
 #ifdef CONFIG_X86_64
 static cycle_t __vsyscall_fn vread_tsc(void)
 {
-	cycle_t ret = (cycle_t)vget_cycles();
+	cycle_t ret;
+
+	/*
+	 * Surround the RDTSC by barriers, to make sure it's not
+	 * speculated to outside the seqlock critical section and
+	 * does not cause time warps:
+	 */
+	rdtsc_barrier();
+	ret = (cycle_t)vget_cycles();
+	rdtsc_barrier();
 
 	return ret >= __vsyscall_gtod_data.clock.cycle_last ?
 		ret : __vsyscall_gtod_data.clock.cycle_last;
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index bf36328..027b5b4 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -34,6 +34,7 @@
  * of a critical section, to be able to prove TSC time-warps:
  */
 static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED;
+
 static __cpuinitdata cycles_t last_tsc;
 static __cpuinitdata cycles_t max_warp;
 static __cpuinitdata int nr_warps;
@@ -113,13 +114,12 @@
 		return;
 
 	if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
-		printk(KERN_INFO
-		       "Skipping synchronization checks as TSC is reliable.\n");
+		pr_info("Skipping synchronization checks as TSC is reliable.\n");
 		return;
 	}
 
-	printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:",
-			  smp_processor_id(), cpu);
+	pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:",
+		smp_processor_id(), cpu);
 
 	/*
 	 * Reset it - in case this is a second bootup:
@@ -143,8 +143,8 @@
 
 	if (nr_warps) {
 		printk("\n");
-		printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs,"
-				    " turning off TSC clock.\n", max_warp);
+		pr_warning("Measured %Ld cycles TSC warp between CPUs, "
+			   "turning off TSC clock.\n", max_warp);
 		mark_tsc_unstable("check_tsc_sync_source failed");
 	} else {
 		printk(" passed.\n");
@@ -195,5 +195,3 @@
 	while (atomic_read(&stop_count) != cpus)
 		cpu_relax();
 }
-#undef NR_LOOPS
-
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index d7ac84e..9c4e625 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -287,10 +287,9 @@
 	info->regs.pt.ds = 0;
 	info->regs.pt.es = 0;
 	info->regs.pt.fs = 0;
-
-/* we are clearing gs later just before "jmp resume_userspace",
- * because it is not saved/restored.
- */
+#ifndef CONFIG_X86_32_LAZY_GS
+	info->regs.pt.gs = 0;
+#endif
 
 /*
  * The flags register is also special: we cannot trust that the user
@@ -318,9 +317,9 @@
 	}
 
 /*
- * Save old state, set default return value (%ax) to 0
+ * Save old state, set default return value (%ax) to 0 (VM86_SIGNAL)
  */
-	info->regs32->ax = 0;
+	info->regs32->ax = VM86_SIGNAL;
 	tsk->thread.saved_sp0 = tsk->thread.sp0;
 	tsk->thread.saved_fs = info->regs32->fs;
 	tsk->thread.saved_gs = get_user_gs(info->regs32);
@@ -343,7 +342,9 @@
 	__asm__ __volatile__(
 		"movl %0,%%esp\n\t"
 		"movl %1,%%ebp\n\t"
+#ifdef CONFIG_X86_32_LAZY_GS
 		"mov  %2, %%gs\n\t"
+#endif
 		"jmp resume_userspace"
 		: /* no outputs */
 		:"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0));
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 95deb9f2..b263423 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -462,22 +462,28 @@
 }
 #endif
 
-static void vmi_enter_lazy_cpu(void)
+static void vmi_start_context_switch(struct task_struct *prev)
 {
-	paravirt_enter_lazy_cpu();
+	paravirt_start_context_switch(prev);
 	vmi_ops.set_lazy_mode(2);
 }
 
+static void vmi_end_context_switch(struct task_struct *next)
+{
+	vmi_ops.set_lazy_mode(0);
+	paravirt_end_context_switch(next);
+}
+
 static void vmi_enter_lazy_mmu(void)
 {
 	paravirt_enter_lazy_mmu();
 	vmi_ops.set_lazy_mode(1);
 }
 
-static void vmi_leave_lazy(void)
+static void vmi_leave_lazy_mmu(void)
 {
-	paravirt_leave_lazy(paravirt_get_lazy_mode());
 	vmi_ops.set_lazy_mode(0);
+	paravirt_leave_lazy_mmu();
 }
 
 static inline int __init check_vmi_rom(struct vrom_header *rom)
@@ -711,14 +717,14 @@
 	para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
 	para_fill(pv_cpu_ops.io_delay, IODelay);
 
-	para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu,
+	para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch,
 		  set_lazy_mode, SetLazyMode);
-	para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy,
+	para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch,
 		  set_lazy_mode, SetLazyMode);
 
 	para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu,
 		  set_lazy_mode, SetLazyMode);
-	para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy,
+	para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu,
 		  set_lazy_mode, SetLazyMode);
 
 	/* user and kernel flush are just handled with different flags to FlushTLB */
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 849ee61..367e878 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -1,5 +1,433 @@
+/*
+ * ld script for the x86 kernel
+ *
+ * Historic 32-bit version written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ * Modernisation, unification and other changes and fixes:
+ *   Copyright (C) 2007-2009  Sam Ravnborg <sam@ravnborg.org>
+ *
+ *
+ * Don't define absolute symbols until and unless you know that symbol
+ * value is should remain constant even if kernel image is relocated
+ * at run time. Absolute symbols are not relocated. If symbol value should
+ * change if kernel is relocated, make the symbol section relative and
+ * put it inside the section definition.
+ */
+
 #ifdef CONFIG_X86_32
-# include "vmlinux_32.lds.S"
+#define LOAD_OFFSET __PAGE_OFFSET
 #else
-# include "vmlinux_64.lds.S"
+#define LOAD_OFFSET __START_KERNEL_map
 #endif
+
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page_types.h>
+#include <asm/cache.h>
+#include <asm/boot.h>
+
+#undef i386     /* in case the preprocessor is a 32bit one */
+
+OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
+
+#ifdef CONFIG_X86_32
+OUTPUT_ARCH(i386)
+ENTRY(phys_startup_32)
+jiffies = jiffies_64;
+#else
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(phys_startup_64)
+jiffies_64 = jiffies;
+#endif
+
+PHDRS {
+	text PT_LOAD FLAGS(5);          /* R_E */
+	data PT_LOAD FLAGS(7);          /* RWE */
+#ifdef CONFIG_X86_64
+	user PT_LOAD FLAGS(7);          /* RWE */
+	data.init PT_LOAD FLAGS(7);     /* RWE */
+#ifdef CONFIG_SMP
+	percpu PT_LOAD FLAGS(7);        /* RWE */
+#endif
+	data.init2 PT_LOAD FLAGS(7);    /* RWE */
+#endif
+	note PT_NOTE FLAGS(0);          /* ___ */
+}
+
+SECTIONS
+{
+#ifdef CONFIG_X86_32
+        . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
+        phys_startup_32 = startup_32 - LOAD_OFFSET;
+#else
+        . = __START_KERNEL;
+        phys_startup_64 = startup_64 - LOAD_OFFSET;
+#endif
+
+	/* Text and read-only data */
+
+	/* bootstrapping code */
+	.text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
+		_text = .;
+		*(.text.head)
+	} :text = 0x9090
+
+	/* The rest of the text */
+	.text :  AT(ADDR(.text) - LOAD_OFFSET) {
+#ifdef CONFIG_X86_32
+		/* not really needed, already page aligned */
+		. = ALIGN(PAGE_SIZE);
+		*(.text.page_aligned)
+#endif
+		. = ALIGN(8);
+		_stext = .;
+		TEXT_TEXT
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		IRQENTRY_TEXT
+		*(.fixup)
+		*(.gnu.warning)
+		/* End of text section */
+		_etext = .;
+	} :text = 0x9090
+
+	NOTES :text :note
+
+	/* Exception table */
+	. = ALIGN(16);
+	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+		__start___ex_table = .;
+		*(__ex_table)
+		__stop___ex_table = .;
+	} :text = 0x9090
+
+	RODATA
+
+	/* Data */
+	. = ALIGN(PAGE_SIZE);
+	.data : AT(ADDR(.data) - LOAD_OFFSET) {
+		/* Start of data section */
+		_sdata = .;
+		DATA_DATA
+		CONSTRUCTORS
+
+#ifdef CONFIG_X86_64
+		/* End of data section */
+		_edata = .;
+#endif
+	} :data
+
+#ifdef CONFIG_X86_32
+	/* 32 bit has nosave before _edata */
+	. = ALIGN(PAGE_SIZE);
+	.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
+		__nosave_begin = .;
+		*(.data.nosave)
+		. = ALIGN(PAGE_SIZE);
+		__nosave_end = .;
+	}
+#endif
+
+	. = ALIGN(PAGE_SIZE);
+	.data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
+		*(.data.page_aligned)
+		*(.data.idt)
+	}
+
+#ifdef CONFIG_X86_32
+	. = ALIGN(32);
+#else
+	. = ALIGN(PAGE_SIZE);
+	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+#endif
+	.data.cacheline_aligned :
+		AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
+		*(.data.cacheline_aligned)
+	}
+
+	/* rarely changed data like cpu maps */
+#ifdef CONFIG_X86_32
+	. = ALIGN(32);
+#else
+	. = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
+#endif
+	.data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
+		*(.data.read_mostly)
+
+#ifdef CONFIG_X86_32
+		/* End of data section */
+		_edata = .;
+#endif
+	}
+
+#ifdef CONFIG_X86_64
+
+#define VSYSCALL_ADDR (-10*1024*1024)
+#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \
+                            SIZEOF(.data.read_mostly) + 4095) & ~(4095))
+#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \
+                            SIZEOF(.data.read_mostly) + 4095) & ~(4095))
+
+#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
+#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
+
+#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR)
+#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
+
+	. = VSYSCALL_ADDR;
+	.vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) {
+		*(.vsyscall_0)
+	} :user
+
+	__vsyscall_0 = VSYSCALL_VIRT_ADDR;
+
+	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+	.vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
+		*(.vsyscall_fn)
+	}
+
+	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+	.vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
+		*(.vsyscall_gtod_data)
+	}
+
+	vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
+	.vsyscall_clock : AT(VLOAD(.vsyscall_clock)) {
+		*(.vsyscall_clock)
+	}
+	vsyscall_clock = VVIRT(.vsyscall_clock);
+
+
+	.vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) {
+		*(.vsyscall_1)
+	}
+	.vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) {
+		*(.vsyscall_2)
+	}
+
+	.vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) {
+		*(.vgetcpu_mode)
+	}
+	vgetcpu_mode = VVIRT(.vgetcpu_mode);
+
+	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+	.jiffies : AT(VLOAD(.jiffies)) {
+		*(.jiffies)
+	}
+	jiffies = VVIRT(.jiffies);
+
+	.vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) {
+		*(.vsyscall_3)
+	}
+
+	. = VSYSCALL_VIRT_ADDR + PAGE_SIZE;
+
+#undef VSYSCALL_ADDR
+#undef VSYSCALL_PHYS_ADDR
+#undef VSYSCALL_VIRT_ADDR
+#undef VLOAD_OFFSET
+#undef VLOAD
+#undef VVIRT_OFFSET
+#undef VVIRT
+
+#endif /* CONFIG_X86_64 */
+
+	/* init_task */
+	. = ALIGN(THREAD_SIZE);
+	.data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
+		*(.data.init_task)
+	}
+#ifdef CONFIG_X86_64
+	 :data.init
+#endif
+
+	/*
+	 * smp_locks might be freed after init
+	 * start/end must be page aligned
+	 */
+	. = ALIGN(PAGE_SIZE);
+	.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+		__smp_locks = .;
+		*(.smp_locks)
+		__smp_locks_end = .;
+		. = ALIGN(PAGE_SIZE);
+	}
+
+	/* Init code and data - will be freed after init */
+	. = ALIGN(PAGE_SIZE);
+	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+		__init_begin = .; /* paired with __init_end */
+		_sinittext = .;
+		INIT_TEXT
+		_einittext = .;
+	}
+
+	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
+		INIT_DATA
+	}
+
+	. = ALIGN(16);
+	.init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+		__setup_start = .;
+		*(.init.setup)
+		__setup_end = .;
+	}
+	.initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
+		__initcall_start = .;
+		INITCALLS
+		__initcall_end = .;
+	}
+
+	.con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+		__con_initcall_start = .;
+		*(.con_initcall.init)
+		__con_initcall_end = .;
+	}
+
+	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
+		__x86_cpu_dev_start = .;
+		*(.x86_cpu_dev.init)
+		__x86_cpu_dev_end = .;
+	}
+
+	SECURITY_INIT
+
+	. = ALIGN(8);
+	.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
+		__parainstructions = .;
+		*(.parainstructions)
+		__parainstructions_end = .;
+	}
+
+	. = ALIGN(8);
+	.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
+		__alt_instructions = .;
+		*(.altinstructions)
+		__alt_instructions_end = .;
+	}
+
+	.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
+		*(.altinstr_replacement)
+	}
+
+	/*
+	 * .exit.text is discard at runtime, not link time, to deal with
+	 *  references from .altinstructions and .eh_frame
+	 */
+	.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
+		EXIT_TEXT
+	}
+
+	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
+		EXIT_DATA
+	}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	. = ALIGN(PAGE_SIZE);
+	.init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
+		__initramfs_start = .;
+		*(.init.ramfs)
+		__initramfs_end = .;
+	}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
+	/*
+	 * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
+	 * output PHDR, so the next output section - __data_nosave - should
+	 * start another section data.init2.  Also, pda should be at the head of
+	 * percpu area.  Preallocate it and define the percpu offset symbol
+	 * so that it can be accessed as a percpu variable.
+	 */
+	. = ALIGN(PAGE_SIZE);
+	PERCPU_VADDR(0, :percpu)
+#else
+	PERCPU(PAGE_SIZE)
+#endif
+
+	. = ALIGN(PAGE_SIZE);
+
+	/* freed after init ends here */
+	.init.end : AT(ADDR(.init.end) - LOAD_OFFSET) {
+		__init_end = .;
+	}
+
+#ifdef CONFIG_X86_64
+	.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
+		. = ALIGN(PAGE_SIZE);
+		__nosave_begin = .;
+		*(.data.nosave)
+		. = ALIGN(PAGE_SIZE);
+		__nosave_end = .;
+	} :data.init2
+	/* use another section data.init2, see PERCPU_VADDR() above */
+#endif
+
+	/* BSS */
+	. = ALIGN(PAGE_SIZE);
+	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+		__bss_start = .;
+		*(.bss.page_aligned)
+		*(.bss)
+		. = ALIGN(4);
+		__bss_stop = .;
+	}
+
+	. = ALIGN(PAGE_SIZE);
+	.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
+		__brk_base = .;
+		. += 64 * 1024;		/* 64k alignment slop space */
+		*(.brk_reservation)	/* areas brk users have reserved */
+		__brk_limit = .;
+	}
+
+	.end : AT(ADDR(.end) - LOAD_OFFSET) {
+		_end = .;
+	}
+
+	/* Sections to be discarded */
+	/DISCARD/ : {
+		*(.exitcall.exit)
+		*(.eh_frame)
+		*(.discard)
+	}
+
+        STABS_DEBUG
+        DWARF_DEBUG
+}
+
+
+#ifdef CONFIG_X86_32
+ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
+        "kernel image bigger than KERNEL_IMAGE_SIZE")
+#else
+/*
+ * Per-cpu symbols which need to be offset from __per_cpu_load
+ * for the boot processor.
+ */
+#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
+INIT_PER_CPU(gdt_page);
+INIT_PER_CPU(irq_stack_union);
+
+/*
+ * Build-time check on the image size:
+ */
+ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
+	"kernel image bigger than KERNEL_IMAGE_SIZE")
+
+#ifdef CONFIG_SMP
+ASSERT((per_cpu__irq_stack_union == 0),
+        "irq_stack_union is not at start of per-cpu area");
+#endif
+
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_KEXEC
+#include <asm/kexec.h>
+
+ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
+       "kexec control code size is too big")
+#endif
+
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
deleted file mode 100644
index 62ad500..0000000
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ /dev/null
@@ -1,229 +0,0 @@
-/* ld script to make i386 Linux kernel
- * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
- *
- * Don't define absolute symbols until and unless you know that symbol
- * value is should remain constant even if kernel image is relocated
- * at run time. Absolute symbols are not relocated. If symbol value should
- * change if kernel is relocated, make the symbol section relative and
- * put it inside the section definition.
- */
-
-#define LOAD_OFFSET __PAGE_OFFSET
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/thread_info.h>
-#include <asm/page_types.h>
-#include <asm/cache.h>
-#include <asm/boot.h>
-
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-OUTPUT_ARCH(i386)
-ENTRY(phys_startup_32)
-jiffies = jiffies_64;
-
-PHDRS {
-	text PT_LOAD FLAGS(5);	/* R_E */
-	data PT_LOAD FLAGS(7);	/* RWE */
-	note PT_NOTE FLAGS(0);	/* ___ */
-}
-SECTIONS
-{
-  . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
-  phys_startup_32 = startup_32 - LOAD_OFFSET;
-
-  .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
-  	_text = .;			/* Text and read-only data */
-	*(.text.head)
-  } :text = 0x9090
-
-  /* read-only */
-  .text : AT(ADDR(.text) - LOAD_OFFSET) {
-	. = ALIGN(PAGE_SIZE); /* not really needed, already page aligned */
-	*(.text.page_aligned)
-	TEXT_TEXT
-	SCHED_TEXT
-	LOCK_TEXT
-	KPROBES_TEXT
-	IRQENTRY_TEXT
-	*(.fixup)
-	*(.gnu.warning)
-  	_etext = .;			/* End of text section */
-  } :text = 0x9090
-
-  NOTES :text :note
-
-  . = ALIGN(16);		/* Exception table */
-  __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
-  	__start___ex_table = .;
-	 *(__ex_table)
-  	__stop___ex_table = .;
-  } :text = 0x9090
-
-  RODATA
-
-  /* writeable */
-  . = ALIGN(PAGE_SIZE);
-  .data : AT(ADDR(.data) - LOAD_OFFSET) {	/* Data */
-	DATA_DATA
-	CONSTRUCTORS
-	} :data
-
-  . = ALIGN(PAGE_SIZE);
-  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
-  	__nosave_begin = .;
-	*(.data.nosave)
-  	. = ALIGN(PAGE_SIZE);
-  	__nosave_end = .;
-  }
-
-  . = ALIGN(PAGE_SIZE);
-  .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
-	*(.data.page_aligned)
-	*(.data.idt)
-  }
-
-  . = ALIGN(32);
-  .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
-	*(.data.cacheline_aligned)
-  }
-
-  /* rarely changed data like cpu maps */
-  . = ALIGN(32);
-  .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
-	*(.data.read_mostly)
-	_edata = .;		/* End of data section */
-  }
-
-  . = ALIGN(THREAD_SIZE);	/* init_task */
-  .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
-	*(.data.init_task)
-  }
-
-  /* might get freed after init */
-  . = ALIGN(PAGE_SIZE);
-  .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
-  	__smp_locks = .;
-	*(.smp_locks)
-	__smp_locks_end = .;
-  }
-  /* will be freed after init
-   * Following ALIGN() is required to make sure no other data falls on the
-   * same page where __smp_alt_end is pointing as that page might be freed
-   * after boot. Always make sure that ALIGN() directive is present after
-   * the section which contains __smp_alt_end.
-   */
-  . = ALIGN(PAGE_SIZE);
-
-  /* will be freed after init */
-  . = ALIGN(PAGE_SIZE);		/* Init code and data */
-  .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
-  	__init_begin = .;
-	_sinittext = .;
-	INIT_TEXT
-	_einittext = .;
-  }
-  .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
-	INIT_DATA
-  }
-  . = ALIGN(16);
-  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
-  	__setup_start = .;
-	*(.init.setup)
-  	__setup_end = .;
-   }
-  .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
-  	__initcall_start = .;
-	INITCALLS
-  	__initcall_end = .;
-  }
-  .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
-  	__con_initcall_start = .;
-	*(.con_initcall.init)
-  	__con_initcall_end = .;
-  }
-  .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
-	__x86_cpu_dev_start = .;
-	*(.x86_cpu_dev.init)
-	__x86_cpu_dev_end = .;
-  }
-  SECURITY_INIT
-  . = ALIGN(4);
-  .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
-  	__alt_instructions = .;
-	*(.altinstructions)
-	__alt_instructions_end = .;
-  }
-  .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
-	*(.altinstr_replacement)
-  }
-  . = ALIGN(4);
-  .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
-  	__parainstructions = .;
-	*(.parainstructions)
-  	__parainstructions_end = .;
-  }
-  /* .exit.text is discard at runtime, not link time, to deal with references
-     from .altinstructions and .eh_frame */
-  .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
-	EXIT_TEXT
-  }
-  .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
-	EXIT_DATA
-  }
-#if defined(CONFIG_BLK_DEV_INITRD)
-  . = ALIGN(PAGE_SIZE);
-  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
-	__initramfs_start = .;
-	*(.init.ramfs)
-	__initramfs_end = .;
-  }
-#endif
-  PERCPU(PAGE_SIZE)
-  . = ALIGN(PAGE_SIZE);
-  /* freed after init ends here */
-
-  .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
-	__init_end = .;
-	__bss_start = .;		/* BSS */
-	*(.bss.page_aligned)
-	*(.bss)
-	. = ALIGN(4);
-	__bss_stop = .;
-  }
-
-  .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
-	. = ALIGN(PAGE_SIZE);
-	__brk_base = . ;
- 	. += 64 * 1024 ;	/* 64k alignment slop space */
-	*(.brk_reservation)	/* areas brk users have reserved */
-	__brk_limit = . ;
-  }
-
-  .end : AT(ADDR(.end) - LOAD_OFFSET) {
-	_end = . ;
-  }
-
-  /* Sections to be discarded */
-  /DISCARD/ : {
-	*(.exitcall.exit)
-	*(.discard)
-	}
-
-  STABS_DEBUG
-
-  DWARF_DEBUG
-}
-
-/*
- * Build-time check on the image size:
- */
-ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
-	"kernel image bigger than KERNEL_IMAGE_SIZE")
-
-#ifdef CONFIG_KEXEC
-/* Link time checks */
-#include <asm/kexec.h>
-
-ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
-       "kexec control code size is too big")
-#endif
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
deleted file mode 100644
index c874250..0000000
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ /dev/null
@@ -1,298 +0,0 @@
-/* ld script to make x86-64 Linux kernel
- * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
- */
-
-#define LOAD_OFFSET __START_KERNEL_map
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/asm-offsets.h>
-#include <asm/page_types.h>
-
-#undef i386	/* in case the preprocessor is a 32bit one */
-
-OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
-OUTPUT_ARCH(i386:x86-64)
-ENTRY(phys_startup_64)
-jiffies_64 = jiffies;
-PHDRS {
-	text PT_LOAD FLAGS(5);	/* R_E */
-	data PT_LOAD FLAGS(7);	/* RWE */
-	user PT_LOAD FLAGS(7);	/* RWE */
-	data.init PT_LOAD FLAGS(7);	/* RWE */
-#ifdef CONFIG_SMP
-	percpu PT_LOAD FLAGS(7);	/* RWE */
-#endif
-	data.init2 PT_LOAD FLAGS(7);	/* RWE */
-	note PT_NOTE FLAGS(0);	/* ___ */
-}
-SECTIONS
-{
-  . = __START_KERNEL;
-  phys_startup_64 = startup_64 - LOAD_OFFSET;
-  .text :  AT(ADDR(.text) - LOAD_OFFSET) {
-	_text = .;			/* Text and read-only data */
-	/* First the code that has to be first for bootstrapping */
-	*(.text.head)
-	_stext = .;
-	/* Then the rest */
-	TEXT_TEXT
-	SCHED_TEXT
-	LOCK_TEXT
-	KPROBES_TEXT
-	IRQENTRY_TEXT
-	*(.fixup)
-	*(.gnu.warning)
-	_etext = .;		/* End of text section */
-  } :text = 0x9090
-
-  NOTES :text :note
-
-  . = ALIGN(16);		/* Exception table */
-  __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
-  	__start___ex_table = .;
-	 *(__ex_table)
-  	__stop___ex_table = .;
-  } :text = 0x9090
-
-  RODATA
-
-  . = ALIGN(PAGE_SIZE);		/* Align data segment to page size boundary */
-				/* Data */
-  .data : AT(ADDR(.data) - LOAD_OFFSET) {
-	DATA_DATA
-	CONSTRUCTORS
-	_edata = .;			/* End of data section */
-	} :data
-
-
-  .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
-	. = ALIGN(PAGE_SIZE);
-	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-	*(.data.cacheline_aligned)
-  }
-  . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
-  .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
-  	*(.data.read_mostly)
-  }
-
-#define VSYSCALL_ADDR (-10*1024*1024)
-#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095))
-#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095))
-
-#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
-#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
-
-#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR)
-#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
-
-  . = VSYSCALL_ADDR;
-  .vsyscall_0 :	 AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user
-  __vsyscall_0 = VSYSCALL_VIRT_ADDR;
-
-  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-  .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) }
-  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-  .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data))
-		{ *(.vsyscall_gtod_data) }
-  vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
-  .vsyscall_clock : AT(VLOAD(.vsyscall_clock))
-		{ *(.vsyscall_clock) }
-  vsyscall_clock = VVIRT(.vsyscall_clock);
-
-
-  .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1))
-		{ *(.vsyscall_1) }
-  .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2))
-		{ *(.vsyscall_2) }
-
-  .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
-  vgetcpu_mode = VVIRT(.vgetcpu_mode);
-
-  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-  .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) }
-  jiffies = VVIRT(.jiffies);
-
-  .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3))
-		{ *(.vsyscall_3) }
-
-  . = VSYSCALL_VIRT_ADDR + PAGE_SIZE;
-
-#undef VSYSCALL_ADDR
-#undef VSYSCALL_PHYS_ADDR
-#undef VSYSCALL_VIRT_ADDR
-#undef VLOAD_OFFSET
-#undef VLOAD
-#undef VVIRT_OFFSET
-#undef VVIRT
-
-  .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
-	. = ALIGN(THREAD_SIZE);	/* init_task */
-	*(.data.init_task)
-  }:data.init
-
-  .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
-	. = ALIGN(PAGE_SIZE);
-	*(.data.page_aligned)
-  }
-
-  .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
-	/* might get freed after init */
-	. = ALIGN(PAGE_SIZE);
-	__smp_alt_begin = .;
-	__smp_locks = .;
-	*(.smp_locks)
-	__smp_locks_end = .;
-	. = ALIGN(PAGE_SIZE);
-	__smp_alt_end = .;
-  }
-
-  . = ALIGN(PAGE_SIZE);		/* Init code and data */
-  __init_begin = .;	/* paired with __init_end */
-  .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
-	_sinittext = .;
-	INIT_TEXT
-	_einittext = .;
-  }
-  .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
-	__initdata_begin = .;
-	INIT_DATA
-	__initdata_end = .;
-   }
-
-  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
-	. = ALIGN(16);
-	__setup_start = .;
-	*(.init.setup)
-	__setup_end = .;
-  }
-  .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
-	__initcall_start = .;
-	INITCALLS
-	__initcall_end = .;
-  }
-  .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
-	__con_initcall_start = .;
-	*(.con_initcall.init)
-	__con_initcall_end = .;
-  }
-  .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
-	__x86_cpu_dev_start = .;
-	*(.x86_cpu_dev.init)
-	__x86_cpu_dev_end = .;
-  }
-  SECURITY_INIT
-
-  . = ALIGN(8);
-  .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
-	__parainstructions = .;
-       *(.parainstructions)
-	__parainstructions_end = .;
-  }
-
-  .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
-	. = ALIGN(8);
-	__alt_instructions = .;
-	*(.altinstructions)
-	__alt_instructions_end = .;
-  }
-  .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
-	*(.altinstr_replacement)
-  }
-  /* .exit.text is discard at runtime, not link time, to deal with references
-     from .altinstructions and .eh_frame */
-  .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
-	EXIT_TEXT
-  }
-  .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
-	EXIT_DATA
-  }
-
-#ifdef CONFIG_BLK_DEV_INITRD
-  . = ALIGN(PAGE_SIZE);
-  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
-	__initramfs_start = .;
-	*(.init.ramfs)
-	__initramfs_end = .;
-  }
-#endif
-
-#ifdef CONFIG_SMP
-  /*
-   * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
-   * output PHDR, so the next output section - __data_nosave - should
-   * start another section data.init2.  Also, pda should be at the head of
-   * percpu area.  Preallocate it and define the percpu offset symbol
-   * so that it can be accessed as a percpu variable.
-   */
-  . = ALIGN(PAGE_SIZE);
-  PERCPU_VADDR(0, :percpu)
-#else
-  PERCPU(PAGE_SIZE)
-#endif
-
-  . = ALIGN(PAGE_SIZE);
-  __init_end = .;
-
-  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
-	. = ALIGN(PAGE_SIZE);
-	__nosave_begin = .;
-	*(.data.nosave)
-	. = ALIGN(PAGE_SIZE);
-	__nosave_end = .;
-  } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */
-
-  .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
-	. = ALIGN(PAGE_SIZE);
-	__bss_start = .;		/* BSS */
-	*(.bss.page_aligned)
-	*(.bss)
-	__bss_stop = .;
-  }
-
-  .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
-	. = ALIGN(PAGE_SIZE);
-	__brk_base = . ;
- 	. += 64 * 1024 ;	/* 64k alignment slop space */
-	*(.brk_reservation)	/* areas brk users have reserved */
-	__brk_limit = . ;
-  }
-
-  _end = . ;
-
-  /* Sections to be discarded */
-  /DISCARD/ : {
-	*(.exitcall.exit)
-	*(.eh_frame)
-	*(.discard)
-	}
-
-  STABS_DEBUG
-
-  DWARF_DEBUG
-}
-
- /*
-  * Per-cpu symbols which need to be offset from __per_cpu_load
-  * for the boot processor.
-  */
-#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
-INIT_PER_CPU(gdt_page);
-INIT_PER_CPU(irq_stack_union);
-
-/*
- * Build-time check on the image size:
- */
-ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
-	"kernel image bigger than KERNEL_IMAGE_SIZE")
-
-#ifdef CONFIG_SMP
-ASSERT((per_cpu__irq_stack_union == 0),
-        "irq_stack_union is not at start of per-cpu area");
-#endif
-
-#ifdef CONFIG_KEXEC
-#include <asm/kexec.h>
-
-ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
-       "kexec control code size is too big")
-#endif
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 44153af..25ee06a 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -132,15 +132,7 @@
 			return;
 		}
 
-		/*
-		 * Surround the RDTSC by barriers, to make sure it's not
-		 * speculated to outside the seqlock critical section and
-		 * does not cause time warps:
-		 */
-		rdtsc_barrier();
 		now = vread();
-		rdtsc_barrier();
-
 		base = __vsyscall_gtod_data.clock.cycle_last;
 		mask = __vsyscall_gtod_data.clock.mask;
 		mult = __vsyscall_gtod_data.clock.mult;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a58504e..8600a09 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -50,6 +50,9 @@
 	  Provides support for KVM on Intel processors equipped with the VT
 	  extensions.
 
+	  To compile this as a module, choose M here: the module
+	  will be called kvm-intel.
+
 config KVM_AMD
 	tristate "KVM for AMD processors support"
 	depends on KVM
@@ -57,6 +60,9 @@
 	  Provides support for KVM on AMD processors equipped with the AMD-V
 	  (SVM) extensions.
 
+	  To compile this as a module, choose M here: the module
+	  will be called kvm-amd.
+
 config KVM_TRACE
 	bool "KVM trace support"
 	depends on KVM && SYSFS
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d3ec292..b43c4ef 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -14,7 +14,7 @@
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
-	i8254.o
+	i8254.o timer.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index c13bb92..4d6f0d2 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -98,6 +98,37 @@
 	return kvm->arch.vpit->pit_state.channels[channel].gate;
 }
 
+static s64 __kpit_elapsed(struct kvm *kvm)
+{
+	s64 elapsed;
+	ktime_t remaining;
+	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
+
+	/*
+	 * The Counter does not stop when it reaches zero. In
+	 * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to
+	 * the highest count, either FFFF hex for binary counting
+	 * or 9999 for BCD counting, and continues counting.
+	 * Modes 2 and 3 are periodic; the Counter reloads
+	 * itself with the initial count and continues counting
+	 * from there.
+	 */
+	remaining = hrtimer_expires_remaining(&ps->pit_timer.timer);
+	elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
+	elapsed = mod_64(elapsed, ps->pit_timer.period);
+
+	return elapsed;
+}
+
+static s64 kpit_elapsed(struct kvm *kvm, struct kvm_kpit_channel_state *c,
+			int channel)
+{
+	if (channel == 0)
+		return __kpit_elapsed(kvm);
+
+	return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+}
+
 static int pit_get_count(struct kvm *kvm, int channel)
 {
 	struct kvm_kpit_channel_state *c =
@@ -107,7 +138,7 @@
 
 	WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
 
-	t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+	t = kpit_elapsed(kvm, c, channel);
 	d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
 
 	switch (c->mode) {
@@ -137,7 +168,7 @@
 
 	WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
 
-	t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+	t = kpit_elapsed(kvm, c, channel);
 	d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
 
 	switch (c->mode) {
@@ -193,28 +224,6 @@
 	}
 }
 
-static int __pit_timer_fn(struct kvm_kpit_state *ps)
-{
-	struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
-	struct kvm_kpit_timer *pt = &ps->pit_timer;
-
-	if (!atomic_inc_and_test(&pt->pending))
-		set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
-
-	if (!pt->reinject)
-		atomic_set(&pt->pending, 1);
-
-	if (vcpu0 && waitqueue_active(&vcpu0->wq))
-		wake_up_interruptible(&vcpu0->wq);
-
-	hrtimer_add_expires_ns(&pt->timer, pt->period);
-	pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
-	if (pt->period)
-		ps->channels[0].count_load_time = ktime_get();
-
-	return (pt->period == 0 ? 0 : 1);
-}
-
 int pit_has_pending_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
@@ -235,21 +244,6 @@
 	spin_unlock(&ps->inject_lock);
 }
 
-static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
-{
-	struct kvm_kpit_state *ps;
-	int restart_timer = 0;
-
-	ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
-
-	restart_timer = __pit_timer_fn(ps);
-
-	if (restart_timer)
-		return HRTIMER_RESTART;
-	else
-		return HRTIMER_NORESTART;
-}
-
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
@@ -263,15 +257,26 @@
 		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
 }
 
-static void destroy_pit_timer(struct kvm_kpit_timer *pt)
+static void destroy_pit_timer(struct kvm_timer *pt)
 {
 	pr_debug("pit: execute del timer!\n");
 	hrtimer_cancel(&pt->timer);
 }
 
+static bool kpit_is_periodic(struct kvm_timer *ktimer)
+{
+	struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state,
+						 pit_timer);
+	return ps->is_periodic;
+}
+
+static struct kvm_timer_ops kpit_ops = {
+	.is_periodic = kpit_is_periodic,
+};
+
 static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
 {
-	struct kvm_kpit_timer *pt = &ps->pit_timer;
+	struct kvm_timer *pt = &ps->pit_timer;
 	s64 interval;
 
 	interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
@@ -280,8 +285,14 @@
 
 	/* TODO The new value only affected after the retriggered */
 	hrtimer_cancel(&pt->timer);
-	pt->period = (is_period == 0) ? 0 : interval;
-	pt->timer.function = pit_timer_fn;
+	pt->period = interval;
+	ps->is_periodic = is_period;
+
+	pt->timer.function = kvm_timer_fn;
+	pt->t_ops = &kpit_ops;
+	pt->kvm = ps->pit->kvm;
+	pt->vcpu_id = 0;
+
 	atomic_set(&pt->pending, 0);
 	ps->irq_ack = 1;
 
@@ -298,23 +309,23 @@
 	pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
 
 	/*
-	 * Though spec said the state of 8254 is undefined after power-up,
-	 * seems some tricky OS like Windows XP depends on IRQ0 interrupt
-	 * when booting up.
-	 * So here setting initialize rate for it, and not a specific number
+	 * The largest possible initial count is 0; this is equivalent
+	 * to 216 for binary counting and 104 for BCD counting.
 	 */
 	if (val == 0)
 		val = 0x10000;
 
-	ps->channels[channel].count_load_time = ktime_get();
 	ps->channels[channel].count = val;
 
-	if (channel != 0)
+	if (channel != 0) {
+		ps->channels[channel].count_load_time = ktime_get();
 		return;
+	}
 
 	/* Two types of timer
 	 * mode 1 is one shot, mode 2 is period, otherwise del timer */
 	switch (ps->channels[0].mode) {
+	case 0:
 	case 1:
         /* FIXME: enhance mode 4 precision */
 	case 4:
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 6acbe4b..bbd863f 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -3,15 +3,6 @@
 
 #include "iodev.h"
 
-struct kvm_kpit_timer {
-	struct hrtimer timer;
-	int irq;
-	s64 period; /* unit: ns */
-	s64 scheduled;
-	atomic_t pending;
-	bool reinject;
-};
-
 struct kvm_kpit_channel_state {
 	u32 count; /* can be 65536 */
 	u16 latched_count;
@@ -30,7 +21,8 @@
 
 struct kvm_kpit_state {
 	struct kvm_kpit_channel_state channels[3];
-	struct kvm_kpit_timer pit_timer;
+	struct kvm_timer pit_timer;
+	bool is_periodic;
 	u32    speaker_data_on;
 	struct mutex lock;
 	struct kvm_pit *pit;
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index cf17ed5..96dfbb6 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -24,6 +24,7 @@
 
 #include "irq.h"
 #include "i8254.h"
+#include "x86.h"
 
 /*
  * check if there are pending timer events
@@ -48,6 +49,9 @@
 {
 	struct kvm_pic *s;
 
+	if (!irqchip_in_kernel(v->kvm))
+		return v->arch.interrupt.pending;
+
 	if (kvm_apic_has_interrupt(v) == -1) {	/* LAPIC */
 		if (kvm_apic_accept_pic_intr(v)) {
 			s = pic_irqchip(v->kvm);	/* PIC */
@@ -67,6 +71,9 @@
 	struct kvm_pic *s;
 	int vector;
 
+	if (!irqchip_in_kernel(v->kvm))
+		return v->arch.interrupt.nr;
+
 	vector = kvm_get_apic_interrupt(v);	/* APIC */
 	if (vector == -1) {
 		if (kvm_apic_accept_pic_intr(v)) {
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h
new file mode 100644
index 0000000..26bd6ba
--- /dev/null
+++ b/arch/x86/kvm/kvm_timer.h
@@ -0,0 +1,18 @@
+
+struct kvm_timer {
+	struct hrtimer timer;
+	s64 period; 				/* unit: ns */
+	atomic_t pending;			/* accumulated triggered timers */
+	bool reinject;
+	struct kvm_timer_ops *t_ops;
+	struct kvm *kvm;
+	int vcpu_id;
+};
+
+struct kvm_timer_ops {
+        bool (*is_periodic)(struct kvm_timer *);
+};
+
+
+enum hrtimer_restart kvm_timer_fn(struct hrtimer *data);
+
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index f0b67f2..ae99d83 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -196,20 +196,15 @@
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
 
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig)
+static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
+			     int vector, int level, int trig_mode);
+
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!apic_test_and_set_irr(vec, apic)) {
-		/* a new pending irq is set in IRR */
-		if (trig)
-			apic_set_vector(vec, apic->regs + APIC_TMR);
-		else
-			apic_clear_vector(vec, apic->regs + APIC_TMR);
-		kvm_vcpu_kick(apic->vcpu);
-		return 1;
-	}
-	return 0;
+	return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
+			irq->level, irq->trig_mode);
 }
 
 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
@@ -250,7 +245,7 @@
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
 {
-	return kvm_apic_id(apic) == dest;
+	return dest == 0xff || kvm_apic_id(apic) == dest;
 }
 
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
@@ -279,37 +274,34 @@
 	return result;
 }
 
-static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 			   int short_hand, int dest, int dest_mode)
 {
 	int result = 0;
 	struct kvm_lapic *target = vcpu->arch.apic;
 
 	apic_debug("target %p, source %p, dest 0x%x, "
-		   "dest_mode 0x%x, short_hand 0x%x",
+		   "dest_mode 0x%x, short_hand 0x%x\n",
 		   target, source, dest, dest_mode, short_hand);
 
 	ASSERT(!target);
 	switch (short_hand) {
 	case APIC_DEST_NOSHORT:
-		if (dest_mode == 0) {
+		if (dest_mode == 0)
 			/* Physical mode. */
-			if ((dest == 0xFF) || (dest == kvm_apic_id(target)))
-				result = 1;
-		} else
+			result = kvm_apic_match_physical_addr(target, dest);
+		else
 			/* Logical mode. */
 			result = kvm_apic_match_logical_addr(target, dest);
 		break;
 	case APIC_DEST_SELF:
-		if (target == source)
-			result = 1;
+		result = (target == source);
 		break;
 	case APIC_DEST_ALLINC:
 		result = 1;
 		break;
 	case APIC_DEST_ALLBUT:
-		if (target != source)
-			result = 1;
+		result = (target != source);
 		break;
 	default:
 		printk(KERN_WARNING "Bad dest shorthand value %x\n",
@@ -327,20 +319,22 @@
 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 			     int vector, int level, int trig_mode)
 {
-	int orig_irr, result = 0;
+	int result = 0;
 	struct kvm_vcpu *vcpu = apic->vcpu;
 
 	switch (delivery_mode) {
-	case APIC_DM_FIXED:
 	case APIC_DM_LOWEST:
+		vcpu->arch.apic_arb_prio++;
+	case APIC_DM_FIXED:
 		/* FIXME add logic for vcpu on reset */
 		if (unlikely(!apic_enabled(apic)))
 			break;
 
-		orig_irr = apic_test_and_set_irr(vector, apic);
-		if (orig_irr && trig_mode) {
-			apic_debug("level trig mode repeatedly for vector %d",
-				   vector);
+		result = !apic_test_and_set_irr(vector, apic);
+		if (!result) {
+			if (trig_mode)
+				apic_debug("level trig mode repeatedly for "
+						"vector %d", vector);
 			break;
 		}
 
@@ -349,10 +343,7 @@
 			apic_set_vector(vector, apic->regs + APIC_TMR);
 		} else
 			apic_clear_vector(vector, apic->regs + APIC_TMR);
-
 		kvm_vcpu_kick(vcpu);
-
-		result = (orig_irr == 0);
 		break;
 
 	case APIC_DM_REMRD:
@@ -364,12 +355,14 @@
 		break;
 
 	case APIC_DM_NMI:
+		result = 1;
 		kvm_inject_nmi(vcpu);
 		kvm_vcpu_kick(vcpu);
 		break;
 
 	case APIC_DM_INIT:
 		if (level) {
+			result = 1;
 			if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
 				printk(KERN_DEBUG
 				       "INIT on a runnable vcpu %d\n",
@@ -386,6 +379,7 @@
 		apic_debug("SIPI to vcpu %d vector 0x%02x\n",
 			   vcpu->vcpu_id, vector);
 		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
+			result = 1;
 			vcpu->arch.sipi_vector = vector;
 			vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
 			kvm_vcpu_kick(vcpu);
@@ -408,43 +402,9 @@
 	return result;
 }
 
-static struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
-				       unsigned long bitmap)
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 {
-	int last;
-	int next;
-	struct kvm_lapic *apic = NULL;
-
-	last = kvm->arch.round_robin_prev_vcpu;
-	next = last;
-
-	do {
-		if (++next == KVM_MAX_VCPUS)
-			next = 0;
-		if (kvm->vcpus[next] == NULL || !test_bit(next, &bitmap))
-			continue;
-		apic = kvm->vcpus[next]->arch.apic;
-		if (apic && apic_enabled(apic))
-			break;
-		apic = NULL;
-	} while (next != last);
-	kvm->arch.round_robin_prev_vcpu = next;
-
-	if (!apic)
-		printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n");
-
-	return apic;
-}
-
-struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
-		unsigned long bitmap)
-{
-	struct kvm_lapic *apic;
-
-	apic = kvm_apic_round_robin(kvm, vector, bitmap);
-	if (apic)
-		return apic->vcpu;
-	return NULL;
+	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
 }
 
 static void apic_set_eoi(struct kvm_lapic *apic)
@@ -472,47 +432,24 @@
 {
 	u32 icr_low = apic_get_reg(apic, APIC_ICR);
 	u32 icr_high = apic_get_reg(apic, APIC_ICR2);
+	struct kvm_lapic_irq irq;
 
-	unsigned int dest = GET_APIC_DEST_FIELD(icr_high);
-	unsigned int short_hand = icr_low & APIC_SHORT_MASK;
-	unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG;
-	unsigned int level = icr_low & APIC_INT_ASSERT;
-	unsigned int dest_mode = icr_low & APIC_DEST_MASK;
-	unsigned int delivery_mode = icr_low & APIC_MODE_MASK;
-	unsigned int vector = icr_low & APIC_VECTOR_MASK;
-
-	struct kvm_vcpu *target;
-	struct kvm_vcpu *vcpu;
-	unsigned long lpr_map = 0;
-	int i;
+	irq.vector = icr_low & APIC_VECTOR_MASK;
+	irq.delivery_mode = icr_low & APIC_MODE_MASK;
+	irq.dest_mode = icr_low & APIC_DEST_MASK;
+	irq.level = icr_low & APIC_INT_ASSERT;
+	irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
+	irq.shorthand = icr_low & APIC_SHORT_MASK;
+	irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
 
 	apic_debug("icr_high 0x%x, icr_low 0x%x, "
 		   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
 		   "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
-		   icr_high, icr_low, short_hand, dest,
-		   trig_mode, level, dest_mode, delivery_mode, vector);
+		   icr_high, icr_low, irq.shorthand, irq.dest_id,
+		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
+		   irq.vector);
 
-	for (i = 0; i < KVM_MAX_VCPUS; i++) {
-		vcpu = apic->vcpu->kvm->vcpus[i];
-		if (!vcpu)
-			continue;
-
-		if (vcpu->arch.apic &&
-		    apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) {
-			if (delivery_mode == APIC_DM_LOWEST)
-				set_bit(vcpu->vcpu_id, &lpr_map);
-			else
-				__apic_accept_irq(vcpu->arch.apic, delivery_mode,
-						  vector, level, trig_mode);
-		}
-	}
-
-	if (delivery_mode == APIC_DM_LOWEST) {
-		target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, lpr_map);
-		if (target != NULL)
-			__apic_accept_irq(target->arch.apic, delivery_mode,
-					  vector, level, trig_mode);
-	}
+	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
 }
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
@@ -527,12 +464,13 @@
 	if (apic_get_reg(apic, APIC_TMICT) == 0)
 		return 0;
 
-	remaining = hrtimer_expires_remaining(&apic->timer.dev);
+	remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer);
 	if (ktime_to_ns(remaining) < 0)
 		remaining = ktime_set(0, 0);
 
-	ns = mod_64(ktime_to_ns(remaining), apic->timer.period);
-	tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
+	ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
+	tmcct = div64_u64(ns,
+			 (APIC_BUS_CYCLE_NS * apic->divide_count));
 
 	return tmcct;
 }
@@ -619,25 +557,25 @@
 	tdcr = apic_get_reg(apic, APIC_TDCR);
 	tmp1 = tdcr & 0xf;
 	tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
-	apic->timer.divide_count = 0x1 << (tmp2 & 0x7);
+	apic->divide_count = 0x1 << (tmp2 & 0x7);
 
 	apic_debug("timer divide count is 0x%x\n",
-				   apic->timer.divide_count);
+				   apic->divide_count);
 }
 
 static void start_apic_timer(struct kvm_lapic *apic)
 {
-	ktime_t now = apic->timer.dev.base->get_time();
+	ktime_t now = apic->lapic_timer.timer.base->get_time();
 
-	apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
-		    APIC_BUS_CYCLE_NS * apic->timer.divide_count;
-	atomic_set(&apic->timer.pending, 0);
+	apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) *
+		    APIC_BUS_CYCLE_NS * apic->divide_count;
+	atomic_set(&apic->lapic_timer.pending, 0);
 
-	if (!apic->timer.period)
+	if (!apic->lapic_timer.period)
 		return;
 
-	hrtimer_start(&apic->timer.dev,
-		      ktime_add_ns(now, apic->timer.period),
+	hrtimer_start(&apic->lapic_timer.timer,
+		      ktime_add_ns(now, apic->lapic_timer.period),
 		      HRTIMER_MODE_ABS);
 
 	apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
@@ -646,9 +584,9 @@
 			   "expire @ 0x%016" PRIx64 ".\n", __func__,
 			   APIC_BUS_CYCLE_NS, ktime_to_ns(now),
 			   apic_get_reg(apic, APIC_TMICT),
-			   apic->timer.period,
+			   apic->lapic_timer.period,
 			   ktime_to_ns(ktime_add_ns(now,
-					apic->timer.period)));
+					apic->lapic_timer.period)));
 }
 
 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
@@ -730,7 +668,7 @@
 				apic_set_reg(apic, APIC_LVTT + 0x10 * i,
 					     lvt_val | APIC_LVT_MASKED);
 			}
-			atomic_set(&apic->timer.pending, 0);
+			atomic_set(&apic->lapic_timer.pending, 0);
 
 		}
 		break;
@@ -762,7 +700,7 @@
 		break;
 
 	case APIC_TMICT:
-		hrtimer_cancel(&apic->timer.dev);
+		hrtimer_cancel(&apic->lapic_timer.timer);
 		apic_set_reg(apic, APIC_TMICT, val);
 		start_apic_timer(apic);
 		return;
@@ -802,7 +740,7 @@
 	if (!vcpu->arch.apic)
 		return;
 
-	hrtimer_cancel(&vcpu->arch.apic->timer.dev);
+	hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer);
 
 	if (vcpu->arch.apic->regs_page)
 		__free_page(vcpu->arch.apic->regs_page);
@@ -880,7 +818,7 @@
 	ASSERT(apic != NULL);
 
 	/* Stop the timer in case it's a reset to an active apic */
-	hrtimer_cancel(&apic->timer.dev);
+	hrtimer_cancel(&apic->lapic_timer.timer);
 
 	apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
 	apic_set_reg(apic, APIC_LVR, APIC_VERSION);
@@ -905,11 +843,13 @@
 		apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
 	}
 	update_divide_count(apic);
-	atomic_set(&apic->timer.pending, 0);
+	atomic_set(&apic->lapic_timer.pending, 0);
 	if (vcpu->vcpu_id == 0)
 		vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
 	apic_update_ppr(apic);
 
+	vcpu->arch.apic_arb_prio = 0;
+
 	apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
 		   "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
 		   vcpu, kvm_apic_id(apic),
@@ -917,16 +857,14 @@
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_reset);
 
+bool kvm_apic_present(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic);
+}
+
 int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = vcpu->arch.apic;
-	int ret = 0;
-
-	if (!apic)
-		return 0;
-	ret = apic_enabled(apic);
-
-	return ret;
+	return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic);
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_enabled);
 
@@ -936,22 +874,11 @@
  *----------------------------------------------------------------------
  */
 
-/* TODO: make sure __apic_timer_fn runs in current pCPU */
-static int __apic_timer_fn(struct kvm_lapic *apic)
+static bool lapic_is_periodic(struct kvm_timer *ktimer)
 {
-	int result = 0;
-	wait_queue_head_t *q = &apic->vcpu->wq;
-
-	if(!atomic_inc_and_test(&apic->timer.pending))
-		set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
-	if (waitqueue_active(q))
-		wake_up_interruptible(q);
-
-	if (apic_lvtt_period(apic)) {
-		result = 1;
-		hrtimer_add_expires_ns(&apic->timer.dev, apic->timer.period);
-	}
-	return result;
+	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic,
+					      lapic_timer);
+	return apic_lvtt_period(apic);
 }
 
 int apic_has_pending_timer(struct kvm_vcpu *vcpu)
@@ -959,7 +886,7 @@
 	struct kvm_lapic *lapic = vcpu->arch.apic;
 
 	if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT))
-		return atomic_read(&lapic->timer.pending);
+		return atomic_read(&lapic->lapic_timer.pending);
 
 	return 0;
 }
@@ -986,20 +913,9 @@
 		kvm_apic_local_deliver(apic, APIC_LVT0);
 }
 
-static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
-{
-	struct kvm_lapic *apic;
-	int restart_timer = 0;
-
-	apic = container_of(data, struct kvm_lapic, timer.dev);
-
-	restart_timer = __apic_timer_fn(apic);
-
-	if (restart_timer)
-		return HRTIMER_RESTART;
-	else
-		return HRTIMER_NORESTART;
-}
+static struct kvm_timer_ops lapic_timer_ops = {
+	.is_periodic = lapic_is_periodic,
+};
 
 int kvm_create_lapic(struct kvm_vcpu *vcpu)
 {
@@ -1024,8 +940,13 @@
 	memset(apic->regs, 0, PAGE_SIZE);
 	apic->vcpu = vcpu;
 
-	hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-	apic->timer.dev.function = apic_timer_fn;
+	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_ABS);
+	apic->lapic_timer.timer.function = kvm_timer_fn;
+	apic->lapic_timer.t_ops = &lapic_timer_ops;
+	apic->lapic_timer.kvm = vcpu->kvm;
+	apic->lapic_timer.vcpu_id = vcpu->vcpu_id;
+
 	apic->base_address = APIC_DEFAULT_PHYS_BASE;
 	vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
 
@@ -1078,9 +999,9 @@
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (apic && atomic_read(&apic->timer.pending) > 0) {
+	if (apic && atomic_read(&apic->lapic_timer.pending) > 0) {
 		if (kvm_apic_local_deliver(apic, APIC_LVTT))
-			atomic_dec(&apic->timer.pending);
+			atomic_dec(&apic->lapic_timer.pending);
 	}
 }
 
@@ -1106,7 +1027,7 @@
 			     MSR_IA32_APICBASE_BASE;
 	apic_set_reg(apic, APIC_LVR, APIC_VERSION);
 	apic_update_ppr(apic);
-	hrtimer_cancel(&apic->timer.dev);
+	hrtimer_cancel(&apic->lapic_timer.timer);
 	update_divide_count(apic);
 	start_apic_timer(apic);
 }
@@ -1119,7 +1040,7 @@
 	if (!apic)
 		return;
 
-	timer = &apic->timer.dev;
+	timer = &apic->lapic_timer.timer;
 	if (hrtimer_cancel(timer))
 		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
 }
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 45ab6ee..a587f83 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -2,18 +2,15 @@
 #define __KVM_X86_LAPIC_H
 
 #include "iodev.h"
+#include "kvm_timer.h"
 
 #include <linux/kvm_host.h>
 
 struct kvm_lapic {
 	unsigned long base_address;
 	struct kvm_io_device dev;
-	struct {
-		atomic_t pending;
-		s64 period;	/* unit: ns */
-		u32 divide_count;
-		struct hrtimer dev;
-	} timer;
+	struct kvm_timer lapic_timer;
+	u32 divide_count;
 	struct kvm_vcpu *vcpu;
 	struct page *regs_page;
 	void *regs;
@@ -34,12 +31,13 @@
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
 int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
+bool kvm_apic_present(struct kvm_vcpu *vcpu);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
 
 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 32cf11e..5c3d6e8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -126,6 +126,7 @@
 #define PFERR_PRESENT_MASK (1U << 0)
 #define PFERR_WRITE_MASK (1U << 1)
 #define PFERR_USER_MASK (1U << 2)
+#define PFERR_RSVD_MASK (1U << 3)
 #define PFERR_FETCH_MASK (1U << 4)
 
 #define PT_DIRECTORY_LEVEL 2
@@ -177,7 +178,11 @@
 static u64 __read_mostly shadow_user_mask;
 static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
-static u64 __read_mostly shadow_mt_mask;
+
+static inline u64 rsvd_bits(int s, int e)
+{
+	return ((1ULL << (e - s + 1)) - 1) << s;
+}
 
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
@@ -193,14 +198,13 @@
 EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
 
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask)
+		u64 dirty_mask, u64 nx_mask, u64 x_mask)
 {
 	shadow_user_mask = user_mask;
 	shadow_accessed_mask = accessed_mask;
 	shadow_dirty_mask = dirty_mask;
 	shadow_nx_mask = nx_mask;
 	shadow_x_mask = x_mask;
-	shadow_mt_mask = mt_mask;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
@@ -219,11 +223,6 @@
 	return vcpu->arch.shadow_efer & EFER_NX;
 }
 
-static int is_present_pte(unsigned long pte)
-{
-	return pte & PT_PRESENT_MASK;
-}
-
 static int is_shadow_present_pte(u64 pte)
 {
 	return pte != shadow_trap_nonpresent_pte
@@ -1074,18 +1073,10 @@
 	return NULL;
 }
 
-static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
-	list_del(&sp->oos_link);
-	--kvm->stat.mmu_unsync_global;
-}
-
 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	WARN_ON(!sp->unsync);
 	sp->unsync = 0;
-	if (sp->global)
-		kvm_unlink_unsync_global(kvm, sp);
 	--kvm->stat.mmu_unsync;
 }
 
@@ -1248,7 +1239,6 @@
 	pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
 	sp->gfn = gfn;
 	sp->role = role;
-	sp->global = 0;
 	hlist_add_head(&sp->hash_link, bucket);
 	if (!direct) {
 		if (rmap_write_protect(vcpu->kvm, gfn))
@@ -1616,7 +1606,7 @@
 	return mtrr_state->def_type;
 }
 
-static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
+u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
 	u8 mtrr;
 
@@ -1626,6 +1616,7 @@
 		mtrr = MTRR_TYPE_WRBACK;
 	return mtrr;
 }
+EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type);
 
 static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
@@ -1646,11 +1637,7 @@
 	++vcpu->kvm->stat.mmu_unsync;
 	sp->unsync = 1;
 
-	if (sp->global) {
-		list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages);
-		++vcpu->kvm->stat.mmu_unsync_global;
-	} else
-		kvm_mmu_mark_parents_unsync(vcpu, sp);
+	kvm_mmu_mark_parents_unsync(vcpu, sp);
 
 	mmu_convert_notrap(sp);
 	return 0;
@@ -1677,21 +1664,11 @@
 static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		    unsigned pte_access, int user_fault,
 		    int write_fault, int dirty, int largepage,
-		    int global, gfn_t gfn, pfn_t pfn, bool speculative,
+		    gfn_t gfn, pfn_t pfn, bool speculative,
 		    bool can_unsync)
 {
 	u64 spte;
 	int ret = 0;
-	u64 mt_mask = shadow_mt_mask;
-	struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
-
-	if (!global && sp->global) {
-		sp->global = 0;
-		if (sp->unsync) {
-			kvm_unlink_unsync_global(vcpu->kvm, sp);
-			kvm_mmu_mark_parents_unsync(vcpu, sp);
-		}
-	}
 
 	/*
 	 * We don't set the accessed bit, since we sometimes want to see
@@ -1711,16 +1688,9 @@
 		spte |= shadow_user_mask;
 	if (largepage)
 		spte |= PT_PAGE_SIZE_MASK;
-	if (mt_mask) {
-		if (!kvm_is_mmio_pfn(pfn)) {
-			mt_mask = get_memory_type(vcpu, gfn) <<
-				kvm_x86_ops->get_mt_mask_shift();
-			mt_mask |= VMX_EPT_IGMT_BIT;
-		} else
-			mt_mask = MTRR_TYPE_UNCACHABLE <<
-				kvm_x86_ops->get_mt_mask_shift();
-		spte |= mt_mask;
-	}
+	if (tdp_enabled)
+		spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
+			kvm_is_mmio_pfn(pfn));
 
 	spte |= (u64)pfn << PAGE_SHIFT;
 
@@ -1765,8 +1735,8 @@
 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 			 unsigned pt_access, unsigned pte_access,
 			 int user_fault, int write_fault, int dirty,
-			 int *ptwrite, int largepage, int global,
-			 gfn_t gfn, pfn_t pfn, bool speculative)
+			 int *ptwrite, int largepage, gfn_t gfn,
+			 pfn_t pfn, bool speculative)
 {
 	int was_rmapped = 0;
 	int was_writeble = is_writeble_pte(*shadow_pte);
@@ -1795,7 +1765,7 @@
 			was_rmapped = 1;
 	}
 	if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
-		      dirty, largepage, global, gfn, pfn, speculative, true)) {
+		      dirty, largepage, gfn, pfn, speculative, true)) {
 		if (write_fault)
 			*ptwrite = 1;
 		kvm_x86_ops->tlb_flush(vcpu);
@@ -1843,7 +1813,7 @@
 		    || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
 			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
 				     0, write, 1, &pt_write,
-				     largepage, 0, gfn, pfn, false);
+				     largepage, gfn, pfn, false);
 			++vcpu->stat.pf_fixed;
 			break;
 		}
@@ -1942,7 +1912,19 @@
 	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
 }
 
-static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
+static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
+{
+	int ret = 0;
+
+	if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) {
+		set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+		ret = 1;
+	}
+
+	return ret;
+}
+
+static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
 {
 	int i;
 	gfn_t root_gfn;
@@ -1957,13 +1939,15 @@
 		ASSERT(!VALID_PAGE(root));
 		if (tdp_enabled)
 			direct = 1;
+		if (mmu_check_root(vcpu, root_gfn))
+			return 1;
 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
 				      PT64_ROOT_LEVEL, direct,
 				      ACC_ALL, NULL);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		vcpu->arch.mmu.root_hpa = root;
-		return;
+		return 0;
 	}
 	direct = !is_paging(vcpu);
 	if (tdp_enabled)
@@ -1980,6 +1964,8 @@
 			root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT;
 		} else if (vcpu->arch.mmu.root_level == 0)
 			root_gfn = 0;
+		if (mmu_check_root(vcpu, root_gfn))
+			return 1;
 		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
 				      PT32_ROOT_LEVEL, direct,
 				      ACC_ALL, NULL);
@@ -1988,6 +1974,7 @@
 		vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
 	}
 	vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
+	return 0;
 }
 
 static void mmu_sync_roots(struct kvm_vcpu *vcpu)
@@ -2006,7 +1993,7 @@
 	for (i = 0; i < 4; ++i) {
 		hpa_t root = vcpu->arch.mmu.pae_root[i];
 
-		if (root) {
+		if (root && VALID_PAGE(root)) {
 			root &= PT64_BASE_ADDR_MASK;
 			sp = page_header(root);
 			mmu_sync_children(vcpu, sp);
@@ -2014,15 +2001,6 @@
 	}
 }
 
-static void mmu_sync_global(struct kvm_vcpu *vcpu)
-{
-	struct kvm *kvm = vcpu->kvm;
-	struct kvm_mmu_page *sp, *n;
-
-	list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link)
-		kvm_sync_page(vcpu, sp);
-}
-
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 {
 	spin_lock(&vcpu->kvm->mmu_lock);
@@ -2030,13 +2008,6 @@
 	spin_unlock(&vcpu->kvm->mmu_lock);
 }
 
-void kvm_mmu_sync_global(struct kvm_vcpu *vcpu)
-{
-	spin_lock(&vcpu->kvm->mmu_lock);
-	mmu_sync_global(vcpu);
-	spin_unlock(&vcpu->kvm->mmu_lock);
-}
-
 static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
 {
 	return vaddr;
@@ -2151,6 +2122,14 @@
 	nonpaging_free(vcpu);
 }
 
+static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
+{
+	int bit7;
+
+	bit7 = (gpte >> 7) & 1;
+	return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0;
+}
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
@@ -2159,6 +2138,59 @@
 #include "paging_tmpl.h"
 #undef PTTYPE
 
+static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
+{
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	u64 exb_bit_rsvd = 0;
+
+	if (!is_nx(vcpu))
+		exb_bit_rsvd = rsvd_bits(63, 63);
+	switch (level) {
+	case PT32_ROOT_LEVEL:
+		/* no rsvd bits for 2 level 4K page table entries */
+		context->rsvd_bits_mask[0][1] = 0;
+		context->rsvd_bits_mask[0][0] = 0;
+		if (is_cpuid_PSE36())
+			/* 36bits PSE 4MB page */
+			context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+		else
+			/* 32 bits PSE 4MB page */
+			context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
+		context->rsvd_bits_mask[1][0] = ~0ull;
+		break;
+	case PT32E_ROOT_LEVEL:
+		context->rsvd_bits_mask[0][2] =
+			rsvd_bits(maxphyaddr, 63) |
+			rsvd_bits(7, 8) | rsvd_bits(1, 2);	/* PDPTE */
+		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62);	/* PDE */
+		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62); 	/* PTE */
+		context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62) |
+			rsvd_bits(13, 20);		/* large page */
+		context->rsvd_bits_mask[1][0] = ~0ull;
+		break;
+	case PT64_ROOT_LEVEL:
+		context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+		context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51);
+		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51);
+		context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
+		context->rsvd_bits_mask[1][2] = context->rsvd_bits_mask[0][2];
+		context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51) |
+			rsvd_bits(13, 20);		/* large page */
+		context->rsvd_bits_mask[1][0] = ~0ull;
+		break;
+	}
+}
+
 static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
 {
 	struct kvm_mmu *context = &vcpu->arch.mmu;
@@ -2179,6 +2211,7 @@
 
 static int paging64_init_context(struct kvm_vcpu *vcpu)
 {
+	reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
 	return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
 }
 
@@ -2186,6 +2219,7 @@
 {
 	struct kvm_mmu *context = &vcpu->arch.mmu;
 
+	reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
 	context->new_cr3 = paging_new_cr3;
 	context->page_fault = paging32_page_fault;
 	context->gva_to_gpa = paging32_gva_to_gpa;
@@ -2201,6 +2235,7 @@
 
 static int paging32E_init_context(struct kvm_vcpu *vcpu)
 {
+	reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
 	return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
 }
 
@@ -2221,12 +2256,15 @@
 		context->gva_to_gpa = nonpaging_gva_to_gpa;
 		context->root_level = 0;
 	} else if (is_long_mode(vcpu)) {
+		reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
 		context->gva_to_gpa = paging64_gva_to_gpa;
 		context->root_level = PT64_ROOT_LEVEL;
 	} else if (is_pae(vcpu)) {
+		reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
 		context->gva_to_gpa = paging64_gva_to_gpa;
 		context->root_level = PT32E_ROOT_LEVEL;
 	} else {
+		reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
 		context->gva_to_gpa = paging32_gva_to_gpa;
 		context->root_level = PT32_ROOT_LEVEL;
 	}
@@ -2290,9 +2328,11 @@
 		goto out;
 	spin_lock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_free_some_pages(vcpu);
-	mmu_alloc_roots(vcpu);
+	r = mmu_alloc_roots(vcpu);
 	mmu_sync_roots(vcpu);
 	spin_unlock(&vcpu->kvm->mmu_lock);
+	if (r)
+		goto out;
 	kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
 	kvm_mmu_flush_tlb(vcpu);
 out:
@@ -2638,14 +2678,6 @@
 
 static void free_mmu_pages(struct kvm_vcpu *vcpu)
 {
-	struct kvm_mmu_page *sp;
-
-	while (!list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
-		sp = container_of(vcpu->kvm->arch.active_mmu_pages.next,
-				  struct kvm_mmu_page, link);
-		kvm_mmu_zap_page(vcpu->kvm, sp);
-		cond_resched();
-	}
 	free_page((unsigned long)vcpu->arch.mmu.pae_root);
 }
 
@@ -2710,7 +2742,6 @@
 {
 	struct kvm_mmu_page *sp;
 
-	spin_lock(&kvm->mmu_lock);
 	list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) {
 		int i;
 		u64 *pt;
@@ -2725,7 +2756,6 @@
 				pt[i] &= ~PT_WRITABLE_MASK;
 	}
 	kvm_flush_remote_tlbs(kvm);
-	spin_unlock(&kvm->mmu_lock);
 }
 
 void kvm_mmu_zap_all(struct kvm *kvm)
@@ -3007,11 +3037,13 @@
 				       " in nonleaf level: levels %d gva %lx"
 				       " level %d pte %llx\n", audit_msg,
 				       vcpu->arch.mmu.root_level, va, level, ent);
-
-			audit_mappings_page(vcpu, ent, va, level - 1);
+			else
+				audit_mappings_page(vcpu, ent, va, level - 1);
 		} else {
 			gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
-			hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT;
+			gfn_t gfn = gpa >> PAGE_SHIFT;
+			pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
+			hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;
 
 			if (is_shadow_present_pte(ent)
 			    && (ent & PT64_BASE_ADDR_MASK) != hpa)
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index eaab214..3494a2f 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -75,4 +75,9 @@
 	return vcpu->arch.cr0 & X86_CR0_PG;
 }
 
+static inline int is_present_pte(unsigned long pte)
+{
+	return pte & PT_PRESENT_MASK;
+}
+
 #endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6bd7020..258e459 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -123,6 +123,7 @@
 	gfn_t table_gfn;
 	unsigned index, pt_access, pte_access;
 	gpa_t pte_gpa;
+	int rsvd_fault = 0;
 
 	pgprintk("%s: addr %lx\n", __func__, addr);
 walk:
@@ -157,6 +158,10 @@
 		if (!is_present_pte(pte))
 			goto not_present;
 
+		rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
+		if (rsvd_fault)
+			goto access_error;
+
 		if (write_fault && !is_writeble_pte(pte))
 			if (user_fault || is_write_protection(vcpu))
 				goto access_error;
@@ -209,7 +214,6 @@
 		if (ret)
 			goto walk;
 		pte |= PT_DIRTY_MASK;
-		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
 		walker->ptes[walker->level - 1] = pte;
 	}
 
@@ -233,6 +237,8 @@
 		walker->error_code |= PFERR_USER_MASK;
 	if (fetch_fault)
 		walker->error_code |= PFERR_FETCH_MASK;
+	if (rsvd_fault)
+		walker->error_code |= PFERR_RSVD_MASK;
 	return 0;
 }
 
@@ -262,8 +268,7 @@
 	kvm_get_pfn(pfn);
 	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
 		     gpte & PT_DIRTY_MASK, NULL, largepage,
-		     gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte),
-		     pfn, true);
+		     gpte_to_gfn(gpte), pfn, true);
 }
 
 /*
@@ -297,7 +302,6 @@
 				     user_fault, write_fault,
 				     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
 				     ptwrite, largepage,
-				     gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
 				     gw->gfn, pfn, false);
 			break;
 		}
@@ -380,7 +384,7 @@
 		return r;
 
 	/*
-	 * Look up the shadow pte for the faulting address.
+	 * Look up the guest pte for the faulting address.
 	 */
 	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
 			     fetch_fault);
@@ -586,7 +590,7 @@
 		nr_present++;
 		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
 		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
-			 is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
+			 is_dirty_pte(gpte), 0, gfn,
 			 spte_to_pfn(sp->spt[i]), true, false);
 	}
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1f8510c..71510e0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -19,6 +19,7 @@
 #include "irq.h"
 #include "mmu.h"
 #include "kvm_cache_regs.h"
+#include "x86.h"
 
 #include <linux/module.h>
 #include <linux/kernel.h>
@@ -69,7 +70,6 @@
 static int nested = 0;
 module_param(nested, int, S_IRUGO);
 
-static void kvm_reput_irq(struct vcpu_svm *svm);
 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
 
 static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
@@ -132,24 +132,6 @@
 	return svm_features & feat;
 }
 
-static inline u8 pop_irq(struct kvm_vcpu *vcpu)
-{
-	int word_index = __ffs(vcpu->arch.irq_summary);
-	int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
-	int irq = word_index * BITS_PER_LONG + bit_index;
-
-	clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
-	if (!vcpu->arch.irq_pending[word_index])
-		clear_bit(word_index, &vcpu->arch.irq_summary);
-	return irq;
-}
-
-static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
-{
-	set_bit(irq, vcpu->arch.irq_pending);
-	set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
-}
-
 static inline void clgi(void)
 {
 	asm volatile (__ex(SVM_CLGI));
@@ -214,25 +196,41 @@
 	svm->vmcb->control.event_inj_err = error_code;
 }
 
-static bool svm_exception_injected(struct kvm_vcpu *vcpu)
-{
-	struct vcpu_svm *svm = to_svm(vcpu);
-
-	return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID);
-}
-
 static int is_external_interrupt(u32 info)
 {
 	info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
 	return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
 }
 
+static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	u32 ret = 0;
+
+	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
+		ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS;
+	return ret & mask;
+}
+
+static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (mask == 0)
+		svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+	else
+		svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
+
+}
+
 static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	if (!svm->next_rip) {
-		printk(KERN_DEBUG "%s: NOP\n", __func__);
+		if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) !=
+				EMULATE_DONE)
+			printk(KERN_DEBUG "%s: NOP\n", __func__);
 		return;
 	}
 	if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
@@ -240,9 +238,7 @@
 		       __func__, kvm_rip_read(vcpu), svm->next_rip);
 
 	kvm_rip_write(vcpu, svm->next_rip);
-	svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
-
-	vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
+	svm_set_interrupt_shadow(vcpu, 0);
 }
 
 static int has_svm(void)
@@ -830,6 +826,15 @@
 		if (!var->unusable)
 			var->type |= 0x1;
 		break;
+	case VCPU_SREG_SS:
+		/* On AMD CPUs sometimes the DB bit in the segment
+		 * descriptor is left as 1, although the whole segment has
+		 * been made unusable. Clear it here to pass an Intel VMX
+		 * entry check when cross vendor migrating.
+		 */
+		if (var->unusable)
+			var->db = 0;
+		break;
 	}
 }
 
@@ -960,15 +965,16 @@
 
 }
 
-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+static void update_db_intercept(struct kvm_vcpu *vcpu)
 {
-	int old_debug = vcpu->guest_debug;
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	vcpu->guest_debug = dbg->control;
-
 	svm->vmcb->control.intercept_exceptions &=
 		~((1 << DB_VECTOR) | (1 << BP_VECTOR));
+
+	if (vcpu->arch.singlestep)
+		svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
+
 	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
 		if (vcpu->guest_debug &
 		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
@@ -979,6 +985,16 @@
 				1 << BP_VECTOR;
 	} else
 		vcpu->guest_debug = 0;
+}
+
+static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+{
+	int old_debug = vcpu->guest_debug;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	vcpu->guest_debug = dbg->control;
+
+	update_db_intercept(vcpu);
 
 	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
 		svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
@@ -993,16 +1009,6 @@
 	return 0;
 }
 
-static int svm_get_irq(struct kvm_vcpu *vcpu)
-{
-	struct vcpu_svm *svm = to_svm(vcpu);
-	u32 exit_int_info = svm->vmcb->control.exit_int_info;
-
-	if (is_external_interrupt(exit_int_info))
-		return exit_int_info & SVM_EVTINJ_VEC_MASK;
-	return -1;
-}
-
 static void load_host_msrs(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_X86_64
@@ -1107,17 +1113,8 @@
 
 static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
-	u32 exit_int_info = svm->vmcb->control.exit_int_info;
-	struct kvm *kvm = svm->vcpu.kvm;
 	u64 fault_address;
 	u32 error_code;
-	bool event_injection = false;
-
-	if (!irqchip_in_kernel(kvm) &&
-	    is_external_interrupt(exit_int_info)) {
-		event_injection = true;
-		push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
-	}
 
 	fault_address  = svm->vmcb->control.exit_info_2;
 	error_code = svm->vmcb->control.exit_info_1;
@@ -1137,23 +1134,40 @@
 	 */
 	if (npt_enabled)
 		svm_flush_tlb(&svm->vcpu);
-
-	if (!npt_enabled && event_injection)
-		kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
+	else {
+		if (kvm_event_needs_reinjection(&svm->vcpu))
+			kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
+	}
 	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
 }
 
 static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	if (!(svm->vcpu.guest_debug &
-	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
+		!svm->vcpu.arch.singlestep) {
 		kvm_queue_exception(&svm->vcpu, DB_VECTOR);
 		return 1;
 	}
-	kvm_run->exit_reason = KVM_EXIT_DEBUG;
-	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
-	kvm_run->debug.arch.exception = DB_VECTOR;
-	return 0;
+
+	if (svm->vcpu.arch.singlestep) {
+		svm->vcpu.arch.singlestep = false;
+		if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
+			svm->vmcb->save.rflags &=
+				~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+		update_db_intercept(&svm->vcpu);
+	}
+
+	if (svm->vcpu.guest_debug &
+	    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){
+		kvm_run->exit_reason = KVM_EXIT_DEBUG;
+		kvm_run->debug.arch.pc =
+			svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+		kvm_run->debug.arch.exception = DB_VECTOR;
+		return 0;
+	}
+
+	return 1;
 }
 
 static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@@ -1842,17 +1856,51 @@
 				    struct kvm_run *kvm_run)
 {
 	u16 tss_selector;
+	int reason;
+	int int_type = svm->vmcb->control.exit_int_info &
+		SVM_EXITINTINFO_TYPE_MASK;
+	int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
+	uint32_t type =
+		svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
+	uint32_t idt_v =
+		svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
 
 	tss_selector = (u16)svm->vmcb->control.exit_info_1;
+
 	if (svm->vmcb->control.exit_info_2 &
 	    (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
-		return kvm_task_switch(&svm->vcpu, tss_selector,
-				       TASK_SWITCH_IRET);
-	if (svm->vmcb->control.exit_info_2 &
-	    (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
-		return kvm_task_switch(&svm->vcpu, tss_selector,
-				       TASK_SWITCH_JMP);
-	return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL);
+		reason = TASK_SWITCH_IRET;
+	else if (svm->vmcb->control.exit_info_2 &
+		 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
+		reason = TASK_SWITCH_JMP;
+	else if (idt_v)
+		reason = TASK_SWITCH_GATE;
+	else
+		reason = TASK_SWITCH_CALL;
+
+	if (reason == TASK_SWITCH_GATE) {
+		switch (type) {
+		case SVM_EXITINTINFO_TYPE_NMI:
+			svm->vcpu.arch.nmi_injected = false;
+			break;
+		case SVM_EXITINTINFO_TYPE_EXEPT:
+			kvm_clear_exception_queue(&svm->vcpu);
+			break;
+		case SVM_EXITINTINFO_TYPE_INTR:
+			kvm_clear_interrupt_queue(&svm->vcpu);
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (reason != TASK_SWITCH_GATE ||
+	    int_type == SVM_EXITINTINFO_TYPE_SOFT ||
+	    (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
+	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
+		skip_emulated_instruction(&svm->vcpu);
+
+	return kvm_task_switch(&svm->vcpu, tss_selector, reason);
 }
 
 static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@@ -1862,6 +1910,14 @@
 	return 1;
 }
 
+static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	++svm->vcpu.stat.nmi_window_exits;
+	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
+	svm->vcpu.arch.hflags |= HF_IRET_MASK;
+	return 1;
+}
+
 static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
@@ -1879,8 +1935,14 @@
 
 static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
+	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
+	/* instruction emulation calls kvm_set_cr8() */
 	emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
-	if (irqchip_in_kernel(svm->vcpu.kvm))
+	if (irqchip_in_kernel(svm->vcpu.kvm)) {
+		svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
+		return 1;
+	}
+	if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
 		return 1;
 	kvm_run->exit_reason = KVM_EXIT_SET_TPR;
 	return 0;
@@ -2090,8 +2152,9 @@
 	 * If the user space waits to inject interrupts, exit as soon as
 	 * possible
 	 */
-	if (kvm_run->request_interrupt_window &&
-	    !svm->vcpu.arch.irq_summary) {
+	if (!irqchip_in_kernel(svm->vcpu.kvm) &&
+	    kvm_run->request_interrupt_window &&
+	    !kvm_cpu_has_interrupt(&svm->vcpu)) {
 		++svm->vcpu.stat.irq_window_exits;
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 		return 0;
@@ -2134,6 +2197,7 @@
 	[SVM_EXIT_VINTR]			= interrupt_window_interception,
 	/* [SVM_EXIT_CR0_SEL_WRITE]		= emulate_on_interception, */
 	[SVM_EXIT_CPUID]			= cpuid_interception,
+	[SVM_EXIT_IRET]                         = iret_interception,
 	[SVM_EXIT_INVD]                         = emulate_on_interception,
 	[SVM_EXIT_HLT]				= halt_interception,
 	[SVM_EXIT_INVLPG]			= invlpg_interception,
@@ -2194,7 +2258,6 @@
 		}
 	}
 
-	kvm_reput_irq(svm);
 
 	if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -2205,7 +2268,7 @@
 
 	if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
 	    exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
-	    exit_code != SVM_EXIT_NPF)
+	    exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH)
 		printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
 		       "exit_code 0x%x\n",
 		       __func__, svm->vmcb->control.exit_int_info,
@@ -2242,6 +2305,15 @@
 		new_asid(svm, svm_data);
 }
 
+static void svm_inject_nmi(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
+	vcpu->arch.hflags |= HF_NMI_MASK;
+	svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
+	++vcpu->stat.nmi_injections;
+}
 
 static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
 {
@@ -2257,134 +2329,71 @@
 		((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
 }
 
-static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
+static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	svm->vmcb->control.event_inj = nr |
+		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
+}
+
+static void svm_set_irq(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	nested_svm_intr(svm);
 
-	svm_inject_irq(svm, irq);
+	svm_queue_irq(vcpu, vcpu->arch.interrupt.nr);
 }
 
-static void update_cr8_intercept(struct kvm_vcpu *vcpu)
+static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (irr == -1)
+		return;
+
+	if (tpr >= irr)
+		svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
+}
+
+static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct vmcb *vmcb = svm->vmcb;
-	int max_irr, tpr;
-
-	if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr)
-		return;
-
-	vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
-
-	max_irr = kvm_lapic_find_highest_irr(vcpu);
-	if (max_irr == -1)
-		return;
-
-	tpr = kvm_lapic_get_cr8(vcpu) << 4;
-
-	if (tpr >= (max_irr & 0xf0))
-		vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
+	return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+		!(svm->vcpu.arch.hflags & HF_NMI_MASK);
 }
 
-static void svm_intr_assist(struct kvm_vcpu *vcpu)
+static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct vmcb *vmcb = svm->vmcb;
-	int intr_vector = -1;
-
-	if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) &&
-	    ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) {
-		intr_vector = vmcb->control.exit_int_info &
-			      SVM_EVTINJ_VEC_MASK;
-		vmcb->control.exit_int_info = 0;
-		svm_inject_irq(svm, intr_vector);
-		goto out;
-	}
-
-	if (vmcb->control.int_ctl & V_IRQ_MASK)
-		goto out;
-
-	if (!kvm_cpu_has_interrupt(vcpu))
-		goto out;
-
-	if (nested_svm_intr(svm))
-		goto out;
-
-	if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
-		goto out;
-
-	if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
-	    (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
-	    (vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
-		/* unable to deliver irq, set pending irq */
-		svm_set_vintr(svm);
-		svm_inject_irq(svm, 0x0);
-		goto out;
-	}
-	/* Okay, we can deliver the interrupt: grab it and update PIC state. */
-	intr_vector = kvm_cpu_get_interrupt(vcpu);
-	svm_inject_irq(svm, intr_vector);
-out:
-	update_cr8_intercept(vcpu);
+	return (vmcb->save.rflags & X86_EFLAGS_IF) &&
+		!(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+		(svm->vcpu.arch.hflags & HF_GIF_MASK);
 }
 
-static void kvm_reput_irq(struct vcpu_svm *svm)
+static void enable_irq_window(struct kvm_vcpu *vcpu)
 {
-	struct vmcb_control_area *control = &svm->vmcb->control;
-
-	if ((control->int_ctl & V_IRQ_MASK)
-	    && !irqchip_in_kernel(svm->vcpu.kvm)) {
-		control->int_ctl &= ~V_IRQ_MASK;
-		push_irq(&svm->vcpu, control->int_vector);
-	}
-
-	svm->vcpu.arch.interrupt_window_open =
-		!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
-		 (svm->vcpu.arch.hflags & HF_GIF_MASK);
+	svm_set_vintr(to_svm(vcpu));
+	svm_inject_irq(to_svm(vcpu), 0x0);
 }
 
-static void svm_do_inject_vector(struct vcpu_svm *svm)
-{
-	struct kvm_vcpu *vcpu = &svm->vcpu;
-	int word_index = __ffs(vcpu->arch.irq_summary);
-	int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
-	int irq = word_index * BITS_PER_LONG + bit_index;
-
-	clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
-	if (!vcpu->arch.irq_pending[word_index])
-		clear_bit(word_index, &vcpu->arch.irq_summary);
-	svm_inject_irq(svm, irq);
-}
-
-static void do_interrupt_requests(struct kvm_vcpu *vcpu,
-				       struct kvm_run *kvm_run)
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	struct vmcb_control_area *control = &svm->vmcb->control;
 
-	if (nested_svm_intr(svm))
-		return;
+	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
+	    == HF_NMI_MASK)
+		return; /* IRET will cause a vm exit */
 
-	svm->vcpu.arch.interrupt_window_open =
-		(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
-		 (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
-		 (svm->vcpu.arch.hflags & HF_GIF_MASK));
-
-	if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
-		/*
-		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
-		 */
-		svm_do_inject_vector(svm);
-
-	/*
-	 * Interrupts blocked.  Wait for unblock.
-	 */
-	if (!svm->vcpu.arch.interrupt_window_open &&
-	    (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
-		svm_set_vintr(svm);
-	else
-		svm_clear_vintr(svm);
+	/* Something prevents NMI from been injected. Single step over
+	   possible problem (IRET or exception injection or interrupt
+	   shadow) */
+	vcpu->arch.singlestep = true;
+	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+	update_db_intercept(vcpu);
 }
 
 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -2407,7 +2416,7 @@
 
 	if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
 		int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
-		kvm_lapic_set_tpr(vcpu, cr8);
+		kvm_set_cr8(vcpu, cr8);
 	}
 }
 
@@ -2416,14 +2425,54 @@
 	struct vcpu_svm *svm = to_svm(vcpu);
 	u64 cr8;
 
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return;
-
 	cr8 = kvm_get_cr8(vcpu);
 	svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
 	svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
 }
 
+static void svm_complete_interrupts(struct vcpu_svm *svm)
+{
+	u8 vector;
+	int type;
+	u32 exitintinfo = svm->vmcb->control.exit_int_info;
+
+	if (svm->vcpu.arch.hflags & HF_IRET_MASK)
+		svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
+
+	svm->vcpu.arch.nmi_injected = false;
+	kvm_clear_exception_queue(&svm->vcpu);
+	kvm_clear_interrupt_queue(&svm->vcpu);
+
+	if (!(exitintinfo & SVM_EXITINTINFO_VALID))
+		return;
+
+	vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
+	type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
+
+	switch (type) {
+	case SVM_EXITINTINFO_TYPE_NMI:
+		svm->vcpu.arch.nmi_injected = true;
+		break;
+	case SVM_EXITINTINFO_TYPE_EXEPT:
+		/* In case of software exception do not reinject an exception
+		   vector, but re-execute and instruction instead */
+		if (kvm_exception_is_soft(vector))
+			break;
+		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
+			u32 err = svm->vmcb->control.exit_int_info_err;
+			kvm_queue_exception_e(&svm->vcpu, vector, err);
+
+		} else
+			kvm_queue_exception(&svm->vcpu, vector);
+		break;
+	case SVM_EXITINTINFO_TYPE_INTR:
+		kvm_queue_interrupt(&svm->vcpu, vector, false);
+		break;
+	default:
+		break;
+	}
+}
+
 #ifdef CONFIG_X86_64
 #define R "r"
 #else
@@ -2552,6 +2601,8 @@
 	sync_cr8_to_lapic(vcpu);
 
 	svm->next_rip = 0;
+
+	svm_complete_interrupts(svm);
 }
 
 #undef R
@@ -2617,7 +2668,7 @@
 #endif
 }
 
-static int svm_get_mt_mask_shift(void)
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
 	return 0;
 }
@@ -2667,17 +2718,21 @@
 	.run = svm_vcpu_run,
 	.handle_exit = handle_exit,
 	.skip_emulated_instruction = skip_emulated_instruction,
+	.set_interrupt_shadow = svm_set_interrupt_shadow,
+	.get_interrupt_shadow = svm_get_interrupt_shadow,
 	.patch_hypercall = svm_patch_hypercall,
-	.get_irq = svm_get_irq,
 	.set_irq = svm_set_irq,
+	.set_nmi = svm_inject_nmi,
 	.queue_exception = svm_queue_exception,
-	.exception_injected = svm_exception_injected,
-	.inject_pending_irq = svm_intr_assist,
-	.inject_pending_vectors = do_interrupt_requests,
+	.interrupt_allowed = svm_interrupt_allowed,
+	.nmi_allowed = svm_nmi_allowed,
+	.enable_nmi_window = enable_nmi_window,
+	.enable_irq_window = enable_irq_window,
+	.update_cr8_intercept = update_cr8_intercept,
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
-	.get_mt_mask_shift = svm_get_mt_mask_shift,
+	.get_mt_mask = svm_get_mt_mask,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c
new file mode 100644
index 0000000..86dbac0
--- /dev/null
+++ b/arch/x86/kvm/timer.c
@@ -0,0 +1,46 @@
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/hrtimer.h>
+#include <asm/atomic.h>
+#include "kvm_timer.h"
+
+static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
+{
+	int restart_timer = 0;
+	wait_queue_head_t *q = &vcpu->wq;
+
+	/* FIXME: this code should not know anything about vcpus */
+	if (!atomic_inc_and_test(&ktimer->pending))
+		set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
+
+	if (!ktimer->reinject)
+		atomic_set(&ktimer->pending, 1);
+
+	if (waitqueue_active(q))
+		wake_up_interruptible(q);
+
+	if (ktimer->t_ops->is_periodic(ktimer)) {
+		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
+		restart_timer = 1;
+	}
+
+	return restart_timer;
+}
+
+enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
+{
+	int restart_timer;
+	struct kvm_vcpu *vcpu;
+	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
+
+	vcpu = ktimer->kvm->vcpus[ktimer->vcpu_id];
+	if (!vcpu)
+		return HRTIMER_NORESTART;
+
+	restart_timer = __kvm_timer_fn(vcpu, ktimer);
+	if (restart_timer)
+		return HRTIMER_RESTART;
+	else
+		return HRTIMER_NORESTART;
+}
+
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bb48133..32d6ae8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -32,26 +32,27 @@
 #include <asm/desc.h>
 #include <asm/vmx.h>
 #include <asm/virtext.h>
+#include <asm/mce.h>
 
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
-static int bypass_guest_pf = 1;
-module_param(bypass_guest_pf, bool, 0);
+static int __read_mostly bypass_guest_pf = 1;
+module_param(bypass_guest_pf, bool, S_IRUGO);
 
-static int enable_vpid = 1;
-module_param(enable_vpid, bool, 0);
+static int __read_mostly enable_vpid = 1;
+module_param_named(vpid, enable_vpid, bool, 0444);
 
-static int flexpriority_enabled = 1;
-module_param(flexpriority_enabled, bool, 0);
+static int __read_mostly flexpriority_enabled = 1;
+module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
 
-static int enable_ept = 1;
-module_param(enable_ept, bool, 0);
+static int __read_mostly enable_ept = 1;
+module_param_named(ept, enable_ept, bool, S_IRUGO);
 
-static int emulate_invalid_guest_state = 0;
-module_param(emulate_invalid_guest_state, bool, 0);
+static int __read_mostly emulate_invalid_guest_state = 0;
+module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 
 struct vmcs {
 	u32 revision_id;
@@ -97,6 +98,7 @@
 	int soft_vnmi_blocked;
 	ktime_t entry_time;
 	s64 vnmi_blocked_time;
+	u32 exit_reason;
 };
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -111,9 +113,10 @@
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu);
 
-static struct page *vmx_io_bitmap_a;
-static struct page *vmx_io_bitmap_b;
-static struct page *vmx_msr_bitmap;
+static unsigned long *vmx_io_bitmap_a;
+static unsigned long *vmx_io_bitmap_b;
+static unsigned long *vmx_msr_bitmap_legacy;
+static unsigned long *vmx_msr_bitmap_longmode;
 
 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -213,70 +216,78 @@
 		== (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
 }
 
+static inline int is_machine_check(u32 intr_info)
+{
+	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
+			     INTR_INFO_VALID_MASK)) ==
+		(INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
+}
+
 static inline int cpu_has_vmx_msr_bitmap(void)
 {
-	return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS);
+	return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
 }
 
 static inline int cpu_has_vmx_tpr_shadow(void)
 {
-	return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW);
+	return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW;
 }
 
 static inline int vm_need_tpr_shadow(struct kvm *kvm)
 {
-	return ((cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)));
+	return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm));
 }
 
 static inline int cpu_has_secondary_exec_ctrls(void)
 {
-	return (vmcs_config.cpu_based_exec_ctrl &
-		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
+	return vmcs_config.cpu_based_exec_ctrl &
+		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 }
 
 static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
 {
-	return flexpriority_enabled
-		&& (vmcs_config.cpu_based_2nd_exec_ctrl &
-		    SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+}
+
+static inline bool cpu_has_vmx_flexpriority(void)
+{
+	return cpu_has_vmx_tpr_shadow() &&
+		cpu_has_vmx_virtualize_apic_accesses();
 }
 
 static inline int cpu_has_vmx_invept_individual_addr(void)
 {
-	return (!!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT));
+	return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT);
 }
 
 static inline int cpu_has_vmx_invept_context(void)
 {
-	return (!!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT));
+	return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT);
 }
 
 static inline int cpu_has_vmx_invept_global(void)
 {
-	return (!!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT));
+	return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT);
 }
 
 static inline int cpu_has_vmx_ept(void)
 {
-	return (vmcs_config.cpu_based_2nd_exec_ctrl &
-		SECONDARY_EXEC_ENABLE_EPT);
-}
-
-static inline int vm_need_ept(void)
-{
-	return (cpu_has_vmx_ept() && enable_ept);
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_ENABLE_EPT;
 }
 
 static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
 {
-	return ((cpu_has_vmx_virtualize_apic_accesses()) &&
-		(irqchip_in_kernel(kvm)));
+	return flexpriority_enabled &&
+		(cpu_has_vmx_virtualize_apic_accesses()) &&
+		(irqchip_in_kernel(kvm));
 }
 
 static inline int cpu_has_vmx_vpid(void)
 {
-	return (vmcs_config.cpu_based_2nd_exec_ctrl &
-		SECONDARY_EXEC_ENABLE_VPID);
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_ENABLE_VPID;
 }
 
 static inline int cpu_has_virtual_nmis(void)
@@ -284,6 +295,11 @@
 	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
 }
 
+static inline bool report_flexpriority(void)
+{
+	return flexpriority_enabled;
+}
+
 static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
 {
 	int i;
@@ -381,7 +397,7 @@
 
 static inline void ept_sync_context(u64 eptp)
 {
-	if (vm_need_ept()) {
+	if (enable_ept) {
 		if (cpu_has_vmx_invept_context())
 			__invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
 		else
@@ -391,7 +407,7 @@
 
 static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
 {
-	if (vm_need_ept()) {
+	if (enable_ept) {
 		if (cpu_has_vmx_invept_individual_addr())
 			__invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
 					eptp, gpa);
@@ -478,7 +494,7 @@
 {
 	u32 eb;
 
-	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
+	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR);
 	if (!vcpu->fpu_active)
 		eb |= 1u << NM_VECTOR;
 	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
@@ -488,9 +504,9 @@
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
 			eb |= 1u << BP_VECTOR;
 	}
-	if (vcpu->arch.rmode.active)
+	if (vcpu->arch.rmode.vm86_active)
 		eb = ~0;
-	if (vm_need_ept())
+	if (enable_ept)
 		eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
 	vmcs_write32(EXCEPTION_BITMAP, eb);
 }
@@ -724,29 +740,50 @@
 
 static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
-	if (vcpu->arch.rmode.active)
+	if (vcpu->arch.rmode.vm86_active)
 		rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
 	vmcs_writel(GUEST_RFLAGS, rflags);
 }
 
+static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+{
+	u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+	int ret = 0;
+
+	if (interruptibility & GUEST_INTR_STATE_STI)
+		ret |= X86_SHADOW_INT_STI;
+	if (interruptibility & GUEST_INTR_STATE_MOV_SS)
+		ret |= X86_SHADOW_INT_MOV_SS;
+
+	return ret & mask;
+}
+
+static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+{
+	u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+	u32 interruptibility = interruptibility_old;
+
+	interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
+
+	if (mask & X86_SHADOW_INT_MOV_SS)
+		interruptibility |= GUEST_INTR_STATE_MOV_SS;
+	if (mask & X86_SHADOW_INT_STI)
+		interruptibility |= GUEST_INTR_STATE_STI;
+
+	if ((interruptibility != interruptibility_old))
+		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
+}
+
 static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
 	unsigned long rip;
-	u32 interruptibility;
 
 	rip = kvm_rip_read(vcpu);
 	rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
 	kvm_rip_write(vcpu, rip);
 
-	/*
-	 * We emulated an instruction, so temporary interrupt blocking
-	 * should be removed, if set.
-	 */
-	interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-	if (interruptibility & 3)
-		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
-			     interruptibility & ~3);
-	vcpu->arch.interrupt_window_open = 1;
+	/* skipping an emulated instruction also counts */
+	vmx_set_interrupt_shadow(vcpu, 0);
 }
 
 static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
@@ -760,7 +797,7 @@
 		intr_info |= INTR_INFO_DELIVER_CODE_MASK;
 	}
 
-	if (vcpu->arch.rmode.active) {
+	if (vcpu->arch.rmode.vm86_active) {
 		vmx->rmode.irq.pending = true;
 		vmx->rmode.irq.vector = nr;
 		vmx->rmode.irq.rip = kvm_rip_read(vcpu);
@@ -773,8 +810,9 @@
 		return;
 	}
 
-	if (nr == BP_VECTOR || nr == OF_VECTOR) {
-		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+	if (kvm_exception_is_soft(nr)) {
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+			     vmx->vcpu.arch.event_exit_inst_len);
 		intr_info |= INTR_TYPE_SOFT_EXCEPTION;
 	} else
 		intr_info |= INTR_TYPE_HARD_EXCEPTION;
@@ -782,11 +820,6 @@
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
 }
 
-static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
-{
-	return false;
-}
-
 /*
  * Swap MSR entry in host/guest MSR entry array.
  */
@@ -812,6 +845,7 @@
 static void setup_msrs(struct vcpu_vmx *vmx)
 {
 	int save_nmsrs;
+	unsigned long *msr_bitmap;
 
 	vmx_load_host_state(vmx);
 	save_nmsrs = 0;
@@ -847,6 +881,15 @@
 		__find_msr_index(vmx, MSR_KERNEL_GS_BASE);
 #endif
 	vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER);
+
+	if (cpu_has_vmx_msr_bitmap()) {
+		if (is_long_mode(&vmx->vcpu))
+			msr_bitmap = vmx_msr_bitmap_longmode;
+		else
+			msr_bitmap = vmx_msr_bitmap_legacy;
+
+		vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
+	}
 }
 
 /*
@@ -1034,13 +1077,6 @@
 	return 0;
 }
 
-static int vmx_get_irq(struct kvm_vcpu *vcpu)
-{
-	if (!vcpu->arch.interrupt.pending)
-		return -1;
-	return vcpu->arch.interrupt.nr;
-}
-
 static __init int cpu_has_kvm_support(void)
 {
 	return cpu_has_vmx();
@@ -1294,6 +1330,18 @@
 	if (boot_cpu_has(X86_FEATURE_NX))
 		kvm_enable_efer_bits(EFER_NX);
 
+	if (!cpu_has_vmx_vpid())
+		enable_vpid = 0;
+
+	if (!cpu_has_vmx_ept())
+		enable_ept = 0;
+
+	if (!cpu_has_vmx_flexpriority())
+		flexpriority_enabled = 0;
+
+	if (!cpu_has_vmx_tpr_shadow())
+		kvm_x86_ops->update_cr8_intercept = NULL;
+
 	return alloc_kvm_area();
 }
 
@@ -1324,7 +1372,7 @@
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	vmx->emulation_required = 1;
-	vcpu->arch.rmode.active = 0;
+	vcpu->arch.rmode.vm86_active = 0;
 
 	vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base);
 	vmcs_write32(GUEST_TR_LIMIT, vcpu->arch.rmode.tr.limit);
@@ -1386,7 +1434,7 @@
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	vmx->emulation_required = 1;
-	vcpu->arch.rmode.active = 1;
+	vcpu->arch.rmode.vm86_active = 1;
 
 	vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
 	vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
@@ -1485,7 +1533,7 @@
 static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
 {
 	vpid_sync_vcpu_all(to_vmx(vcpu));
-	if (vm_need_ept())
+	if (enable_ept)
 		ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
 }
 
@@ -1555,10 +1603,10 @@
 
 	vmx_fpu_deactivate(vcpu);
 
-	if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE))
+	if (vcpu->arch.rmode.vm86_active && (cr0 & X86_CR0_PE))
 		enter_pmode(vcpu);
 
-	if (!vcpu->arch.rmode.active && !(cr0 & X86_CR0_PE))
+	if (!vcpu->arch.rmode.vm86_active && !(cr0 & X86_CR0_PE))
 		enter_rmode(vcpu);
 
 #ifdef CONFIG_X86_64
@@ -1570,7 +1618,7 @@
 	}
 #endif
 
-	if (vm_need_ept())
+	if (enable_ept)
 		ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
 
 	vmcs_writel(CR0_READ_SHADOW, cr0);
@@ -1599,7 +1647,7 @@
 	u64 eptp;
 
 	guest_cr3 = cr3;
-	if (vm_need_ept()) {
+	if (enable_ept) {
 		eptp = construct_eptp(cr3);
 		vmcs_write64(EPT_POINTER, eptp);
 		ept_sync_context(eptp);
@@ -1616,11 +1664,11 @@
 
 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-	unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ?
+	unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.vm86_active ?
 		    KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
 
 	vcpu->arch.cr4 = cr4;
-	if (vm_need_ept())
+	if (enable_ept)
 		ept_update_paging_mode_cr4(&hw_cr4, vcpu);
 
 	vmcs_writel(CR4_READ_SHADOW, cr4);
@@ -1699,7 +1747,7 @@
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 	u32 ar;
 
-	if (vcpu->arch.rmode.active && seg == VCPU_SREG_TR) {
+	if (vcpu->arch.rmode.vm86_active && seg == VCPU_SREG_TR) {
 		vcpu->arch.rmode.tr.selector = var->selector;
 		vcpu->arch.rmode.tr.base = var->base;
 		vcpu->arch.rmode.tr.limit = var->limit;
@@ -1709,7 +1757,7 @@
 	vmcs_writel(sf->base, var->base);
 	vmcs_write32(sf->limit, var->limit);
 	vmcs_write16(sf->selector, var->selector);
-	if (vcpu->arch.rmode.active && var->s) {
+	if (vcpu->arch.rmode.vm86_active && var->s) {
 		/*
 		 * Hack real-mode segments into vm86 compatibility.
 		 */
@@ -1982,7 +2030,7 @@
 	pfn_t identity_map_pfn;
 	u32 tmp;
 
-	if (!vm_need_ept())
+	if (!enable_ept)
 		return 1;
 	if (unlikely(!kvm->arch.ept_identity_pagetable)) {
 		printk(KERN_ERR "EPT: identity-mapping pagetable "
@@ -2071,7 +2119,7 @@
 	int vpid;
 
 	vmx->vpid = 0;
-	if (!enable_vpid || !cpu_has_vmx_vpid())
+	if (!enable_vpid)
 		return;
 	spin_lock(&vmx_vpid_lock);
 	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
@@ -2082,9 +2130,9 @@
 	spin_unlock(&vmx_vpid_lock);
 }
 
-static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
+static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
 {
-	void *va;
+	int f = sizeof(unsigned long);
 
 	if (!cpu_has_vmx_msr_bitmap())
 		return;
@@ -2094,16 +2142,21 @@
 	 * have the write-low and read-high bitmap offsets the wrong way round.
 	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
 	 */
-	va = kmap(msr_bitmap);
 	if (msr <= 0x1fff) {
-		__clear_bit(msr, va + 0x000); /* read-low */
-		__clear_bit(msr, va + 0x800); /* write-low */
+		__clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
+		__clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
 	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
 		msr &= 0x1fff;
-		__clear_bit(msr, va + 0x400); /* read-high */
-		__clear_bit(msr, va + 0xc00); /* write-high */
+		__clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
+		__clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
 	}
-	kunmap(msr_bitmap);
+}
+
+static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
+{
+	if (!longmode_only)
+		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
+	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
 }
 
 /*
@@ -2121,11 +2174,11 @@
 	u32 exec_control;
 
 	/* I/O */
-	vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a));
-	vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b));
+	vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a));
+	vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b));
 
 	if (cpu_has_vmx_msr_bitmap())
-		vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap));
+		vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
 
 	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
 
@@ -2141,7 +2194,7 @@
 				CPU_BASED_CR8_LOAD_EXITING;
 #endif
 	}
-	if (!vm_need_ept())
+	if (!enable_ept)
 		exec_control |= CPU_BASED_CR3_STORE_EXITING |
 				CPU_BASED_CR3_LOAD_EXITING  |
 				CPU_BASED_INVLPG_EXITING;
@@ -2154,7 +2207,7 @@
 				~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
 		if (vmx->vpid == 0)
 			exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
-		if (!vm_need_ept())
+		if (!enable_ept)
 			exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
 	}
@@ -2273,7 +2326,7 @@
 		goto out;
 	}
 
-	vmx->vcpu.arch.rmode.active = 0;
+	vmx->vcpu.arch.rmode.vm86_active = 0;
 
 	vmx->soft_vnmi_blocked = 0;
 
@@ -2402,14 +2455,16 @@
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 }
 
-static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
+static void vmx_inject_irq(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	uint32_t intr;
+	int irq = vcpu->arch.interrupt.nr;
 
 	KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
 
 	++vcpu->stat.irq_injections;
-	if (vcpu->arch.rmode.active) {
+	if (vcpu->arch.rmode.vm86_active) {
 		vmx->rmode.irq.pending = true;
 		vmx->rmode.irq.vector = irq;
 		vmx->rmode.irq.rip = kvm_rip_read(vcpu);
@@ -2419,8 +2474,14 @@
 		kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
 		return;
 	}
-	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-			irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
+	intr = irq | INTR_INFO_VALID_MASK;
+	if (vcpu->arch.interrupt.soft) {
+		intr |= INTR_TYPE_SOFT_INTR;
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+			     vmx->vcpu.arch.event_exit_inst_len);
+	} else
+		intr |= INTR_TYPE_EXT_INTR;
+	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
 }
 
 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -2441,7 +2502,7 @@
 	}
 
 	++vcpu->stat.nmi_injections;
-	if (vcpu->arch.rmode.active) {
+	if (vcpu->arch.rmode.vm86_active) {
 		vmx->rmode.irq.pending = true;
 		vmx->rmode.irq.vector = NMI_VECTOR;
 		vmx->rmode.irq.rip = kvm_rip_read(vcpu);
@@ -2456,76 +2517,21 @@
 			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
 }
 
-static void vmx_update_window_states(struct kvm_vcpu *vcpu)
+static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
 {
-	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-
-	vcpu->arch.nmi_window_open =
-		!(guest_intr & (GUEST_INTR_STATE_STI |
-				GUEST_INTR_STATE_MOV_SS |
-				GUEST_INTR_STATE_NMI));
 	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
-		vcpu->arch.nmi_window_open = 0;
+		return 0;
 
-	vcpu->arch.interrupt_window_open =
-		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
-		 !(guest_intr & (GUEST_INTR_STATE_STI |
-				 GUEST_INTR_STATE_MOV_SS)));
+	return	!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+			(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS |
+				GUEST_INTR_STATE_NMI));
 }
 
-static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
+static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
-	int word_index = __ffs(vcpu->arch.irq_summary);
-	int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
-	int irq = word_index * BITS_PER_LONG + bit_index;
-
-	clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
-	if (!vcpu->arch.irq_pending[word_index])
-		clear_bit(word_index, &vcpu->arch.irq_summary);
-	kvm_queue_interrupt(vcpu, irq);
-}
-
-static void do_interrupt_requests(struct kvm_vcpu *vcpu,
-				       struct kvm_run *kvm_run)
-{
-	vmx_update_window_states(vcpu);
-
-	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
-				GUEST_INTR_STATE_STI |
-				GUEST_INTR_STATE_MOV_SS);
-
-	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-		if (vcpu->arch.interrupt.pending) {
-			enable_nmi_window(vcpu);
-		} else if (vcpu->arch.nmi_window_open) {
-			vcpu->arch.nmi_pending = false;
-			vcpu->arch.nmi_injected = true;
-		} else {
-			enable_nmi_window(vcpu);
-			return;
-		}
-	}
-	if (vcpu->arch.nmi_injected) {
-		vmx_inject_nmi(vcpu);
-		if (vcpu->arch.nmi_pending)
-			enable_nmi_window(vcpu);
-		else if (vcpu->arch.irq_summary
-			 || kvm_run->request_interrupt_window)
-			enable_irq_window(vcpu);
-		return;
-	}
-
-	if (vcpu->arch.interrupt_window_open) {
-		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
-			kvm_do_inject_irq(vcpu);
-
-		if (vcpu->arch.interrupt.pending)
-			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-	}
-	if (!vcpu->arch.interrupt_window_open &&
-	    (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
-		enable_irq_window(vcpu);
+	return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+		!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+			(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
 }
 
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -2585,6 +2591,31 @@
 	return 0;
 }
 
+/*
+ * Trigger machine check on the host. We assume all the MSRs are already set up
+ * by the CPU and that we still run on the same CPU as the MCE occurred on.
+ * We pass a fake environment to the machine check handler because we want
+ * the guest to be always treated like user space, no matter what context
+ * it used internally.
+ */
+static void kvm_machine_check(void)
+{
+#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
+	struct pt_regs regs = {
+		.cs = 3, /* Fake ring 3 no matter what the guest ran on */
+		.flags = X86_EFLAGS_IF,
+	};
+
+	do_machine_check(&regs, 0);
+#endif
+}
+
+static int handle_machine_check(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	/* already handled by vcpu_run */
+	return 1;
+}
+
 static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2596,17 +2627,14 @@
 	vect_info = vmx->idt_vectoring_info;
 	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 
+	if (is_machine_check(intr_info))
+		return handle_machine_check(vcpu, kvm_run);
+
 	if ((vect_info & VECTORING_INFO_VALID_MASK) &&
 						!is_page_fault(intr_info))
 		printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
 		       "intr info 0x%x\n", __func__, vect_info, intr_info);
 
-	if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) {
-		int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
-		set_bit(irq, vcpu->arch.irq_pending);
-		set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
-	}
-
 	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
 		return 1;  /* already handled by vmx_vcpu_run() */
 
@@ -2628,17 +2656,17 @@
 		error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
 	if (is_page_fault(intr_info)) {
 		/* EPT won't cause page fault directly */
-		if (vm_need_ept())
+		if (enable_ept)
 			BUG();
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
 		KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
 			    (u32)((u64)cr2 >> 32), handler);
-		if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending)
+		if (kvm_event_needs_reinjection(vcpu))
 			kvm_mmu_unprotect_page_virt(vcpu, cr2);
 		return kvm_mmu_page_fault(vcpu, cr2, error_code);
 	}
 
-	if (vcpu->arch.rmode.active &&
+	if (vcpu->arch.rmode.vm86_active &&
 	    handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
 								error_code)) {
 		if (vcpu->arch.halt_request) {
@@ -2753,13 +2781,18 @@
 			kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg));
 			skip_emulated_instruction(vcpu);
 			return 1;
-		case 8:
-			kvm_set_cr8(vcpu, kvm_register_read(vcpu, reg));
-			skip_emulated_instruction(vcpu);
-			if (irqchip_in_kernel(vcpu->kvm))
-				return 1;
-			kvm_run->exit_reason = KVM_EXIT_SET_TPR;
-			return 0;
+		case 8: {
+				u8 cr8_prev = kvm_get_cr8(vcpu);
+				u8 cr8 = kvm_register_read(vcpu, reg);
+				kvm_set_cr8(vcpu, cr8);
+				skip_emulated_instruction(vcpu);
+				if (irqchip_in_kernel(vcpu->kvm))
+					return 1;
+				if (cr8_prev <= cr8)
+					return 1;
+				kvm_run->exit_reason = KVM_EXIT_SET_TPR;
+				return 0;
+			}
 		};
 		break;
 	case 2: /* clts */
@@ -2957,8 +2990,9 @@
 	 * If the user space waits to inject interrupts, exit as soon as
 	 * possible
 	 */
-	if (kvm_run->request_interrupt_window &&
-	    !vcpu->arch.irq_summary) {
+	if (!irqchip_in_kernel(vcpu->kvm) &&
+	    kvm_run->request_interrupt_window &&
+	    !kvm_cpu_has_interrupt(vcpu)) {
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 		return 0;
 	}
@@ -2980,7 +3014,7 @@
 
 static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	u64 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
+	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
 	kvm_mmu_invlpg(vcpu, exit_qualification);
 	skip_emulated_instruction(vcpu);
@@ -2996,11 +3030,11 @@
 
 static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	u64 exit_qualification;
+	unsigned long exit_qualification;
 	enum emulation_result er;
 	unsigned long offset;
 
-	exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
+	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 	offset = exit_qualification & 0xffful;
 
 	er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
@@ -3019,22 +3053,41 @@
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long exit_qualification;
 	u16 tss_selector;
-	int reason;
+	int reason, type, idt_v;
+
+	idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
+	type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
 
 	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
 	reason = (u32)exit_qualification >> 30;
-	if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
-	    (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
-	    (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
-	    == INTR_TYPE_NMI_INTR) {
-		vcpu->arch.nmi_injected = false;
-		if (cpu_has_virtual_nmis())
-			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
-				      GUEST_INTR_STATE_NMI);
+	if (reason == TASK_SWITCH_GATE && idt_v) {
+		switch (type) {
+		case INTR_TYPE_NMI_INTR:
+			vcpu->arch.nmi_injected = false;
+			if (cpu_has_virtual_nmis())
+				vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+					      GUEST_INTR_STATE_NMI);
+			break;
+		case INTR_TYPE_EXT_INTR:
+		case INTR_TYPE_SOFT_INTR:
+			kvm_clear_interrupt_queue(vcpu);
+			break;
+		case INTR_TYPE_HARD_EXCEPTION:
+		case INTR_TYPE_SOFT_EXCEPTION:
+			kvm_clear_exception_queue(vcpu);
+			break;
+		default:
+			break;
+		}
 	}
 	tss_selector = exit_qualification;
 
+	if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
+		       type != INTR_TYPE_EXT_INTR &&
+		       type != INTR_TYPE_NMI_INTR))
+		skip_emulated_instruction(vcpu);
+
 	if (!kvm_task_switch(vcpu, tss_selector, reason))
 		return 0;
 
@@ -3051,11 +3104,11 @@
 
 static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	u64 exit_qualification;
+	unsigned long exit_qualification;
 	gpa_t gpa;
 	int gla_validity;
 
-	exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
+	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
 	if (exit_qualification & (1 << 6)) {
 		printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
@@ -3067,7 +3120,7 @@
 		printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
 		printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
 			(long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
-			(long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
+			vmcs_readl(GUEST_LINEAR_ADDRESS));
 		printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
 			(long unsigned int)exit_qualification);
 		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -3150,6 +3203,7 @@
 	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
 	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
 	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
+	[EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -3159,10 +3213,10 @@
  * The guest has exited.  See if we can fix it or if we need userspace
  * assistance.
  */
-static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
-	u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u32 exit_reason = vmx->exit_reason;
 	u32 vectoring_info = vmx->idt_vectoring_info;
 
 	KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
@@ -3178,7 +3232,7 @@
 
 	/* Access CR3 don't cause VMExit in paging mode, so we need
 	 * to sync with guest real CR3. */
-	if (vm_need_ept() && is_paging(vcpu)) {
+	if (enable_ept && is_paging(vcpu)) {
 		vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
 		ept_load_pdptrs(vcpu);
 	}
@@ -3199,9 +3253,8 @@
 		       __func__, vectoring_info, exit_reason);
 
 	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
-		if (vcpu->arch.interrupt_window_open) {
+		if (vmx_interrupt_allowed(vcpu)) {
 			vmx->soft_vnmi_blocked = 0;
-			vcpu->arch.nmi_window_open = 1;
 		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
 			   vcpu->arch.nmi_pending) {
 			/*
@@ -3214,7 +3267,6 @@
 			       "state on VCPU %d after 1 s timeout\n",
 			       __func__, vcpu->vcpu_id);
 			vmx->soft_vnmi_blocked = 0;
-			vmx->vcpu.arch.nmi_window_open = 1;
 		}
 	}
 
@@ -3228,122 +3280,107 @@
 	return 0;
 }
 
-static void update_tpr_threshold(struct kvm_vcpu *vcpu)
+static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
-	int max_irr, tpr;
-
-	if (!vm_need_tpr_shadow(vcpu->kvm))
-		return;
-
-	if (!kvm_lapic_enabled(vcpu) ||
-	    ((max_irr = kvm_lapic_find_highest_irr(vcpu)) == -1)) {
+	if (irr == -1 || tpr < irr) {
 		vmcs_write32(TPR_THRESHOLD, 0);
 		return;
 	}
 
-	tpr = (kvm_lapic_get_cr8(vcpu) & 0x0f) << 4;
-	vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
+	vmcs_write32(TPR_THRESHOLD, irr);
 }
 
 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 {
 	u32 exit_intr_info;
-	u32 idt_vectoring_info;
+	u32 idt_vectoring_info = vmx->idt_vectoring_info;
 	bool unblock_nmi;
 	u8 vector;
 	int type;
 	bool idtv_info_valid;
-	u32 error;
 
 	exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
+
+	/* Handle machine checks before interrupts are enabled */
+	if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
+	    || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI
+		&& is_machine_check(exit_intr_info)))
+		kvm_machine_check();
+
+	/* We need to handle NMIs before interrupts are enabled */
+	if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
+	    (exit_intr_info & INTR_INFO_VALID_MASK)) {
+		KVMTRACE_0D(NMI, &vmx->vcpu, handler);
+		asm("int $2");
+	}
+
+	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
+
 	if (cpu_has_virtual_nmis()) {
 		unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
 		vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
 		/*
-		 * SDM 3: 25.7.1.2
+		 * SDM 3: 27.7.1.2 (September 2008)
 		 * Re-set bit "block by NMI" before VM entry if vmexit caused by
 		 * a guest IRET fault.
+		 * SDM 3: 23.2.2 (September 2008)
+		 * Bit 12 is undefined in any of the following cases:
+		 *  If the VM exit sets the valid bit in the IDT-vectoring
+		 *   information field.
+		 *  If the VM exit is due to a double fault.
 		 */
-		if (unblock_nmi && vector != DF_VECTOR)
+		if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
+		    vector != DF_VECTOR && !idtv_info_valid)
 			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
 				      GUEST_INTR_STATE_NMI);
 	} else if (unlikely(vmx->soft_vnmi_blocked))
 		vmx->vnmi_blocked_time +=
 			ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
 
-	idt_vectoring_info = vmx->idt_vectoring_info;
-	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
+	vmx->vcpu.arch.nmi_injected = false;
+	kvm_clear_exception_queue(&vmx->vcpu);
+	kvm_clear_interrupt_queue(&vmx->vcpu);
+
+	if (!idtv_info_valid)
+		return;
+
 	vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
 	type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
-	if (vmx->vcpu.arch.nmi_injected) {
+
+	switch (type) {
+	case INTR_TYPE_NMI_INTR:
+		vmx->vcpu.arch.nmi_injected = true;
 		/*
-		 * SDM 3: 25.7.1.2
-		 * Clear bit "block by NMI" before VM entry if a NMI delivery
-		 * faulted.
+		 * SDM 3: 27.7.1.2 (September 2008)
+		 * Clear bit "block by NMI" before VM entry if a NMI
+		 * delivery faulted.
 		 */
-		if (idtv_info_valid && type == INTR_TYPE_NMI_INTR)
-			vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
-					GUEST_INTR_STATE_NMI);
-		else
-			vmx->vcpu.arch.nmi_injected = false;
-	}
-	kvm_clear_exception_queue(&vmx->vcpu);
-	if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION ||
-				type == INTR_TYPE_SOFT_EXCEPTION)) {
+		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+				GUEST_INTR_STATE_NMI);
+		break;
+	case INTR_TYPE_SOFT_EXCEPTION:
+		vmx->vcpu.arch.event_exit_inst_len =
+			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+		/* fall through */
+	case INTR_TYPE_HARD_EXCEPTION:
 		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
-			error = vmcs_read32(IDT_VECTORING_ERROR_CODE);
-			kvm_queue_exception_e(&vmx->vcpu, vector, error);
+			u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE);
+			kvm_queue_exception_e(&vmx->vcpu, vector, err);
 		} else
 			kvm_queue_exception(&vmx->vcpu, vector);
-		vmx->idt_vectoring_info = 0;
-	}
-	kvm_clear_interrupt_queue(&vmx->vcpu);
-	if (idtv_info_valid && type == INTR_TYPE_EXT_INTR) {
-		kvm_queue_interrupt(&vmx->vcpu, vector);
-		vmx->idt_vectoring_info = 0;
-	}
-}
-
-static void vmx_intr_assist(struct kvm_vcpu *vcpu)
-{
-	update_tpr_threshold(vcpu);
-
-	vmx_update_window_states(vcpu);
-
-	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
-				GUEST_INTR_STATE_STI |
-				GUEST_INTR_STATE_MOV_SS);
-
-	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-		if (vcpu->arch.interrupt.pending) {
-			enable_nmi_window(vcpu);
-		} else if (vcpu->arch.nmi_window_open) {
-			vcpu->arch.nmi_pending = false;
-			vcpu->arch.nmi_injected = true;
-		} else {
-			enable_nmi_window(vcpu);
-			return;
-		}
-	}
-	if (vcpu->arch.nmi_injected) {
-		vmx_inject_nmi(vcpu);
-		if (vcpu->arch.nmi_pending)
-			enable_nmi_window(vcpu);
-		else if (kvm_cpu_has_interrupt(vcpu))
-			enable_irq_window(vcpu);
-		return;
-	}
-	if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
-		if (vcpu->arch.interrupt_window_open)
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-		else
-			enable_irq_window(vcpu);
-	}
-	if (vcpu->arch.interrupt.pending) {
-		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-		if (kvm_cpu_has_interrupt(vcpu))
-			enable_irq_window(vcpu);
+		break;
+	case INTR_TYPE_SOFT_INTR:
+		vmx->vcpu.arch.event_exit_inst_len =
+			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+		/* fall through */
+	case INTR_TYPE_EXT_INTR:
+		kvm_queue_interrupt(&vmx->vcpu, vector,
+			type == INTR_TYPE_SOFT_INTR);
+		break;
+	default:
+		break;
 	}
 }
 
@@ -3381,7 +3418,6 @@
 static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	u32 intr_info;
 
 	/* Record the guest's net vcpu time for enforced NMI injections. */
 	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
@@ -3505,20 +3541,9 @@
 	if (vmx->rmode.irq.pending)
 		fixup_rmode_irq(vmx);
 
-	vmx_update_window_states(vcpu);
-
 	asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
 	vmx->launched = 1;
 
-	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-
-	/* We need to handle NMIs before interrupts are enabled */
-	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
-	    (intr_info & INTR_INFO_VALID_MASK)) {
-		KVMTRACE_0D(NMI, vcpu, handler);
-		asm("int $2");
-	}
-
 	vmx_complete_interrupts(vmx);
 }
 
@@ -3593,7 +3618,7 @@
 		if (alloc_apic_access_page(kvm) != 0)
 			goto free_vmcs;
 
-	if (vm_need_ept())
+	if (enable_ept)
 		if (alloc_identity_pagetable(kvm) != 0)
 			goto free_vmcs;
 
@@ -3631,9 +3656,32 @@
 	return VMX_EPT_DEFAULT_GAW + 1;
 }
 
-static int vmx_get_mt_mask_shift(void)
+static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
-	return VMX_EPT_MT_EPTE_SHIFT;
+	u64 ret;
+
+	/* For VT-d and EPT combination
+	 * 1. MMIO: always map as UC
+	 * 2. EPT with VT-d:
+	 *   a. VT-d without snooping control feature: can't guarantee the
+	 *	result, try to trust guest.
+	 *   b. VT-d with snooping control feature: snooping control feature of
+	 *	VT-d engine can guarantee the cache correctness. Just set it
+	 *	to WB to keep consistent with host. So the same as item 3.
+	 * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep
+	 *    consistent with host MTRR
+	 */
+	if (is_mmio)
+		ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
+	else if (vcpu->kvm->arch.iommu_domain &&
+		!(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY))
+		ret = kvm_get_guest_memory_type(vcpu, gfn) <<
+		      VMX_EPT_MT_EPTE_SHIFT;
+	else
+		ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
+			| VMX_EPT_IGMT_BIT;
+
+	return ret;
 }
 
 static struct kvm_x86_ops vmx_x86_ops = {
@@ -3644,7 +3692,7 @@
 	.check_processor_compatibility = vmx_check_processor_compat,
 	.hardware_enable = hardware_enable,
 	.hardware_disable = hardware_disable,
-	.cpu_has_accelerated_tpr = cpu_has_vmx_virtualize_apic_accesses,
+	.cpu_has_accelerated_tpr = report_flexpriority,
 
 	.vcpu_create = vmx_create_vcpu,
 	.vcpu_free = vmx_free_vcpu,
@@ -3678,78 +3726,82 @@
 	.tlb_flush = vmx_flush_tlb,
 
 	.run = vmx_vcpu_run,
-	.handle_exit = kvm_handle_exit,
+	.handle_exit = vmx_handle_exit,
 	.skip_emulated_instruction = skip_emulated_instruction,
+	.set_interrupt_shadow = vmx_set_interrupt_shadow,
+	.get_interrupt_shadow = vmx_get_interrupt_shadow,
 	.patch_hypercall = vmx_patch_hypercall,
-	.get_irq = vmx_get_irq,
 	.set_irq = vmx_inject_irq,
+	.set_nmi = vmx_inject_nmi,
 	.queue_exception = vmx_queue_exception,
-	.exception_injected = vmx_exception_injected,
-	.inject_pending_irq = vmx_intr_assist,
-	.inject_pending_vectors = do_interrupt_requests,
+	.interrupt_allowed = vmx_interrupt_allowed,
+	.nmi_allowed = vmx_nmi_allowed,
+	.enable_nmi_window = enable_nmi_window,
+	.enable_irq_window = enable_irq_window,
+	.update_cr8_intercept = update_cr8_intercept,
 
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
-	.get_mt_mask_shift = vmx_get_mt_mask_shift,
+	.get_mt_mask = vmx_get_mt_mask,
 };
 
 static int __init vmx_init(void)
 {
-	void *va;
 	int r;
 
-	vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+	vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
 	if (!vmx_io_bitmap_a)
 		return -ENOMEM;
 
-	vmx_io_bitmap_b = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+	vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
 	if (!vmx_io_bitmap_b) {
 		r = -ENOMEM;
 		goto out;
 	}
 
-	vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
-	if (!vmx_msr_bitmap) {
+	vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_legacy) {
 		r = -ENOMEM;
 		goto out1;
 	}
 
+	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_longmode) {
+		r = -ENOMEM;
+		goto out2;
+	}
+
 	/*
 	 * Allow direct access to the PC debug port (it is often used for I/O
 	 * delays, but the vmexits simply slow things down).
 	 */
-	va = kmap(vmx_io_bitmap_a);
-	memset(va, 0xff, PAGE_SIZE);
-	clear_bit(0x80, va);
-	kunmap(vmx_io_bitmap_a);
+	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
+	clear_bit(0x80, vmx_io_bitmap_a);
 
-	va = kmap(vmx_io_bitmap_b);
-	memset(va, 0xff, PAGE_SIZE);
-	kunmap(vmx_io_bitmap_b);
+	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
 
-	va = kmap(vmx_msr_bitmap);
-	memset(va, 0xff, PAGE_SIZE);
-	kunmap(vmx_msr_bitmap);
+	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
+	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
 
 	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
 
 	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE);
 	if (r)
-		goto out2;
+		goto out3;
 
-	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_FS_BASE);
-	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_GS_BASE);
-	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_CS);
-	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
-	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
+	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
+	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
+	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
 
-	if (vm_need_ept()) {
+	if (enable_ept) {
 		bypass_guest_pf = 0;
 		kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
 			VMX_EPT_WRITABLE_MASK);
 		kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
-				VMX_EPT_EXECUTABLE_MASK,
-				VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
+				VMX_EPT_EXECUTABLE_MASK);
 		kvm_enable_tdp();
 	} else
 		kvm_disable_tdp();
@@ -3761,20 +3813,23 @@
 
 	return 0;
 
+out3:
+	free_page((unsigned long)vmx_msr_bitmap_longmode);
 out2:
-	__free_page(vmx_msr_bitmap);
+	free_page((unsigned long)vmx_msr_bitmap_legacy);
 out1:
-	__free_page(vmx_io_bitmap_b);
+	free_page((unsigned long)vmx_io_bitmap_b);
 out:
-	__free_page(vmx_io_bitmap_a);
+	free_page((unsigned long)vmx_io_bitmap_a);
 	return r;
 }
 
 static void __exit vmx_exit(void)
 {
-	__free_page(vmx_msr_bitmap);
-	__free_page(vmx_io_bitmap_b);
-	__free_page(vmx_io_bitmap_a);
+	free_page((unsigned long)vmx_msr_bitmap_legacy);
+	free_page((unsigned long)vmx_msr_bitmap_longmode);
+	free_page((unsigned long)vmx_io_bitmap_b);
+	free_page((unsigned long)vmx_io_bitmap_a);
 
 	kvm_exit();
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3944e91..249540f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -91,7 +91,6 @@
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "hypercalls", VCPU_STAT(hypercalls) },
 	{ "request_irq", VCPU_STAT(request_irq_exits) },
-	{ "request_nmi", VCPU_STAT(request_nmi_exits) },
 	{ "irq_exits", VCPU_STAT(irq_exits) },
 	{ "host_state_reload", VCPU_STAT(host_state_reload) },
 	{ "efer_reload", VCPU_STAT(efer_reload) },
@@ -108,7 +107,6 @@
 	{ "mmu_recycled", VM_STAT(mmu_recycled) },
 	{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
 	{ "mmu_unsync", VM_STAT(mmu_unsync) },
-	{ "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
 	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
 	{ "largepages", VM_STAT(lpages) },
 	{ NULL }
@@ -234,7 +232,8 @@
 		goto out;
 	}
 	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
-		if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
+		if (is_present_pte(pdpte[i]) &&
+		    (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
 			ret = 0;
 			goto out;
 		}
@@ -321,7 +320,6 @@
 	kvm_x86_ops->set_cr0(vcpu, cr0);
 	vcpu->arch.cr0 = cr0;
 
-	kvm_mmu_sync_global(vcpu);
 	kvm_mmu_reset_context(vcpu);
 	return;
 }
@@ -370,7 +368,6 @@
 	kvm_x86_ops->set_cr4(vcpu, cr4);
 	vcpu->arch.cr4 = cr4;
 	vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
-	kvm_mmu_sync_global(vcpu);
 	kvm_mmu_reset_context(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -523,6 +520,9 @@
 	efer |= vcpu->arch.shadow_efer & EFER_LMA;
 
 	vcpu->arch.shadow_efer = efer;
+
+	vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
+	kvm_mmu_reset_context(vcpu);
 }
 
 void kvm_enable_efer_bits(u64 mask)
@@ -630,14 +630,17 @@
 	unsigned long flags;
 	struct kvm_vcpu_arch *vcpu = &v->arch;
 	void *shared_kaddr;
+	unsigned long this_tsc_khz;
 
 	if ((!vcpu->time_page))
 		return;
 
-	if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) {
-		kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock);
-		vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz);
+	this_tsc_khz = get_cpu_var(cpu_tsc_khz);
+	if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) {
+		kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock);
+		vcpu->hv_clock_tsc_khz = this_tsc_khz;
 	}
+	put_cpu_var(cpu_tsc_khz);
 
 	/* Keep irq disabled to prevent changes to the clock */
 	local_irq_save(flags);
@@ -893,6 +896,8 @@
 	case MSR_IA32_LASTINTFROMIP:
 	case MSR_IA32_LASTINTTOIP:
 	case MSR_VM_HSAVE_PA:
+	case MSR_P6_EVNTSEL0:
+	case MSR_P6_EVNTSEL1:
 		data = 0;
 		break;
 	case MSR_MTRRcap:
@@ -1024,6 +1029,7 @@
 	case KVM_CAP_SYNC_MMU:
 	case KVM_CAP_REINJECT_CONTROL:
 	case KVM_CAP_IRQ_INJECT_STATUS:
+	case KVM_CAP_ASSIGN_DEV_IRQ:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -1241,41 +1247,53 @@
 	entry->flags = 0;
 }
 
+#define F(x) bit(X86_FEATURE_##x)
+
 static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			 u32 index, int *nent, int maxnent)
 {
-	const u32 kvm_supported_word0_x86_features = bit(X86_FEATURE_FPU) |
-		bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
-		bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
-		bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
-		bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
-		bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) |
-		bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
-		bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) |
-		bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) |
-		bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP);
-	const u32 kvm_supported_word1_x86_features = bit(X86_FEATURE_FPU) |
-		bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
-		bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
-		bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
-		bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
-		bit(X86_FEATURE_PGE) |
-		bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
-		bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) |
-		bit(X86_FEATURE_SYSCALL) |
-		(is_efer_nx() ? bit(X86_FEATURE_NX) : 0) |
+	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
 #ifdef CONFIG_X86_64
-		bit(X86_FEATURE_LM) |
+	unsigned f_lm = F(LM);
+#else
+	unsigned f_lm = 0;
 #endif
-		bit(X86_FEATURE_FXSR_OPT) |
-		bit(X86_FEATURE_MMXEXT) |
-		bit(X86_FEATURE_3DNOWEXT) |
-		bit(X86_FEATURE_3DNOW);
-	const u32 kvm_supported_word3_x86_features =
-		bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
+
+	/* cpuid 1.edx */
+	const u32 kvm_supported_word0_x86_features =
+		F(FPU) | F(VME) | F(DE) | F(PSE) |
+		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
+		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
+		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
+		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) |
+		0 /* Reserved, DS, ACPI */ | F(MMX) |
+		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
+		0 /* HTT, TM, Reserved, PBE */;
+	/* cpuid 0x80000001.edx */
+	const u32 kvm_supported_word1_x86_features =
+		F(FPU) | F(VME) | F(DE) | F(PSE) |
+		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
+		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
+		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
+		F(PAT) | F(PSE36) | 0 /* Reserved */ |
+		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
+		F(FXSR) | F(FXSR_OPT) | 0 /* GBPAGES */ | 0 /* RDTSCP */ |
+		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
+	/* cpuid 1.ecx */
+	const u32 kvm_supported_word4_x86_features =
+		F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ |
+		0 /* DS-CPL, VMX, SMX, EST */ |
+		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
+		0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
+		0 /* Reserved, DCA */ | F(XMM4_1) |
+		F(XMM4_2) | 0 /* x2APIC */ | F(MOVBE) | F(POPCNT) |
+		0 /* Reserved, XSAVE, OSXSAVE */;
+	/* cpuid 0x80000001.ecx */
 	const u32 kvm_supported_word6_x86_features =
-		bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) |
-		bit(X86_FEATURE_SVM);
+		F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
+		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
+		F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) |
+		0 /* SKINIT */ | 0 /* WDT */;
 
 	/* all calls to cpuid_count() should be made on the same cpu */
 	get_cpu();
@@ -1288,7 +1306,7 @@
 		break;
 	case 1:
 		entry->edx &= kvm_supported_word0_x86_features;
-		entry->ecx &= kvm_supported_word3_x86_features;
+		entry->ecx &= kvm_supported_word4_x86_features;
 		break;
 	/* function 2 entries are STATEFUL. That is, repeated cpuid commands
 	 * may return different values. This forces us to get_cpu() before
@@ -1350,6 +1368,8 @@
 	put_cpu();
 }
 
+#undef F
+
 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 				     struct kvm_cpuid_entry2 __user *entries)
 {
@@ -1421,8 +1441,7 @@
 		return -ENXIO;
 	vcpu_load(vcpu);
 
-	set_bit(irq->irq, vcpu->arch.irq_pending);
-	set_bit(irq->irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
+	kvm_queue_interrupt(vcpu, irq->irq, false);
 
 	vcpu_put(vcpu);
 
@@ -1584,8 +1603,7 @@
 		r = -EINVAL;
 	}
 out:
-	if (lapic)
-		kfree(lapic);
+	kfree(lapic);
 	return r;
 }
 
@@ -1606,10 +1624,12 @@
 		return -EINVAL;
 
 	down_write(&kvm->slots_lock);
+	spin_lock(&kvm->mmu_lock);
 
 	kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
 	kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
 
+	spin_unlock(&kvm->mmu_lock);
 	up_write(&kvm->slots_lock);
 	return 0;
 }
@@ -1785,7 +1805,9 @@
 
 	/* If nothing is dirty, don't bother messing with page tables. */
 	if (is_dirty) {
+		spin_lock(&kvm->mmu_lock);
 		kvm_mmu_slot_remove_write_access(kvm, log->slot);
+		spin_unlock(&kvm->mmu_lock);
 		kvm_flush_remote_tlbs(kvm);
 		memslot = &kvm->memslots[log->slot];
 		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
@@ -2360,7 +2382,7 @@
 			u16 error_code,
 			int emulation_type)
 {
-	int r;
+	int r, shadow_mask;
 	struct decode_cache *c;
 
 	kvm_clear_exception_queue(vcpu);
@@ -2408,7 +2430,16 @@
 		}
 	}
 
+	if (emulation_type & EMULTYPE_SKIP) {
+		kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip);
+		return EMULATE_DONE;
+	}
+
 	r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
+	shadow_mask = vcpu->arch.emulate_ctxt.interruptibility;
+
+	if (r == 0)
+		kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask);
 
 	if (vcpu->arch.pio.string)
 		return EMULATE_DO_MMIO;
@@ -2761,7 +2792,7 @@
 	kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
 	kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
 	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
-			PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
+			PT_DIRTY_MASK, PT64_NX_MASK, 0);
 
 	for_each_possible_cpu(cpu)
 		per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
@@ -3012,6 +3043,16 @@
 	return best;
 }
 
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+	if (best)
+		return best->eax & 0xff;
+	return 36;
+}
+
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
 	u32 function, index;
@@ -3048,10 +3089,9 @@
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 					  struct kvm_run *kvm_run)
 {
-	return (!vcpu->arch.irq_summary &&
+	return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
 		kvm_run->request_interrupt_window &&
-		vcpu->arch.interrupt_window_open &&
-		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
+		kvm_arch_interrupt_allowed(vcpu));
 }
 
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
@@ -3064,8 +3104,9 @@
 		kvm_run->ready_for_interrupt_injection = 1;
 	else
 		kvm_run->ready_for_interrupt_injection =
-					(vcpu->arch.interrupt_window_open &&
-					 vcpu->arch.irq_summary == 0);
+			kvm_arch_interrupt_allowed(vcpu) &&
+			!kvm_cpu_has_interrupt(vcpu) &&
+			!kvm_event_needs_reinjection(vcpu);
 }
 
 static void vapic_enter(struct kvm_vcpu *vcpu)
@@ -3094,9 +3135,63 @@
 	up_read(&vcpu->kvm->slots_lock);
 }
 
+static void update_cr8_intercept(struct kvm_vcpu *vcpu)
+{
+	int max_irr, tpr;
+
+	if (!kvm_x86_ops->update_cr8_intercept)
+		return;
+
+	if (!vcpu->arch.apic->vapic_addr)
+		max_irr = kvm_lapic_find_highest_irr(vcpu);
+	else
+		max_irr = -1;
+
+	if (max_irr != -1)
+		max_irr >>= 4;
+
+	tpr = kvm_lapic_get_cr8(vcpu);
+
+	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
+}
+
+static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
+
+	/* try to reinject previous events if any */
+	if (vcpu->arch.nmi_injected) {
+		kvm_x86_ops->set_nmi(vcpu);
+		return;
+	}
+
+	if (vcpu->arch.interrupt.pending) {
+		kvm_x86_ops->set_irq(vcpu);
+		return;
+	}
+
+	/* try to inject new event if pending */
+	if (vcpu->arch.nmi_pending) {
+		if (kvm_x86_ops->nmi_allowed(vcpu)) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+			kvm_x86_ops->set_nmi(vcpu);
+		}
+	} else if (kvm_cpu_has_interrupt(vcpu)) {
+		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
+			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
+					    false);
+			kvm_x86_ops->set_irq(vcpu);
+		}
+	}
+}
+
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
+	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+		kvm_run->request_interrupt_window;
 
 	if (vcpu->requests)
 		if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
@@ -3128,9 +3223,6 @@
 		}
 	}
 
-	clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
-	kvm_inject_pending_timer_irqs(vcpu);
-
 	preempt_disable();
 
 	kvm_x86_ops->prepare_guest_switch(vcpu);
@@ -3138,6 +3230,9 @@
 
 	local_irq_disable();
 
+	clear_bit(KVM_REQ_KICK, &vcpu->requests);
+	smp_mb__after_clear_bit();
+
 	if (vcpu->requests || need_resched() || signal_pending(current)) {
 		local_irq_enable();
 		preempt_enable();
@@ -3145,21 +3240,21 @@
 		goto out;
 	}
 
-	vcpu->guest_mode = 1;
-	/*
-	 * Make sure that guest_mode assignment won't happen after
-	 * testing the pending IRQ vector bitmap.
-	 */
-	smp_wmb();
-
 	if (vcpu->arch.exception.pending)
 		__queue_exception(vcpu);
-	else if (irqchip_in_kernel(vcpu->kvm))
-		kvm_x86_ops->inject_pending_irq(vcpu);
 	else
-		kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
+		inject_pending_irq(vcpu, kvm_run);
 
-	kvm_lapic_sync_to_vapic(vcpu);
+	/* enable NMI/IRQ window open exits if needed */
+	if (vcpu->arch.nmi_pending)
+		kvm_x86_ops->enable_nmi_window(vcpu);
+	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+		kvm_x86_ops->enable_irq_window(vcpu);
+
+	if (kvm_lapic_enabled(vcpu)) {
+		update_cr8_intercept(vcpu);
+		kvm_lapic_sync_to_vapic(vcpu);
+	}
 
 	up_read(&vcpu->kvm->slots_lock);
 
@@ -3193,7 +3288,7 @@
 	set_debugreg(vcpu->arch.host_dr6, 6);
 	set_debugreg(vcpu->arch.host_dr7, 7);
 
-	vcpu->guest_mode = 0;
+	set_bit(KVM_REQ_KICK, &vcpu->requests);
 	local_irq_enable();
 
 	++vcpu->stat.exits;
@@ -3220,8 +3315,6 @@
 		profile_hit(KVM_PROFILING, (void *)rip);
 	}
 
-	if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu))
-		vcpu->arch.exception.pending = false;
 
 	kvm_lapic_sync_from_vapic(vcpu);
 
@@ -3230,6 +3323,7 @@
 	return r;
 }
 
+
 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -3256,29 +3350,42 @@
 			kvm_vcpu_block(vcpu);
 			down_read(&vcpu->kvm->slots_lock);
 			if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
-				if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
+			{
+				switch(vcpu->arch.mp_state) {
+				case KVM_MP_STATE_HALTED:
 					vcpu->arch.mp_state =
-							KVM_MP_STATE_RUNNABLE;
-			if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
-				r = -EINTR;
+						KVM_MP_STATE_RUNNABLE;
+				case KVM_MP_STATE_RUNNABLE:
+					break;
+				case KVM_MP_STATE_SIPI_RECEIVED:
+				default:
+					r = -EINTR;
+					break;
+				}
+			}
 		}
 
-		if (r > 0) {
-			if (dm_request_for_irq_injection(vcpu, kvm_run)) {
-				r = -EINTR;
-				kvm_run->exit_reason = KVM_EXIT_INTR;
-				++vcpu->stat.request_irq_exits;
-			}
-			if (signal_pending(current)) {
-				r = -EINTR;
-				kvm_run->exit_reason = KVM_EXIT_INTR;
-				++vcpu->stat.signal_exits;
-			}
-			if (need_resched()) {
-				up_read(&vcpu->kvm->slots_lock);
-				kvm_resched(vcpu);
-				down_read(&vcpu->kvm->slots_lock);
-			}
+		if (r <= 0)
+			break;
+
+		clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
+		if (kvm_cpu_has_pending_timer(vcpu))
+			kvm_inject_pending_timer_irqs(vcpu);
+
+		if (dm_request_for_irq_injection(vcpu, kvm_run)) {
+			r = -EINTR;
+			kvm_run->exit_reason = KVM_EXIT_INTR;
+			++vcpu->stat.request_irq_exits;
+		}
+		if (signal_pending(current)) {
+			r = -EINTR;
+			kvm_run->exit_reason = KVM_EXIT_INTR;
+			++vcpu->stat.signal_exits;
+		}
+		if (need_resched()) {
+			up_read(&vcpu->kvm->slots_lock);
+			kvm_resched(vcpu);
+			down_read(&vcpu->kvm->slots_lock);
 		}
 	}
 
@@ -3442,7 +3549,6 @@
 				  struct kvm_sregs *sregs)
 {
 	struct descriptor_table dt;
-	int pending_vec;
 
 	vcpu_load(vcpu);
 
@@ -3472,16 +3578,11 @@
 	sregs->efer = vcpu->arch.shadow_efer;
 	sregs->apic_base = kvm_get_apic_base(vcpu);
 
-	if (irqchip_in_kernel(vcpu->kvm)) {
-		memset(sregs->interrupt_bitmap, 0,
-		       sizeof sregs->interrupt_bitmap);
-		pending_vec = kvm_x86_ops->get_irq(vcpu);
-		if (pending_vec >= 0)
-			set_bit(pending_vec,
-				(unsigned long *)sregs->interrupt_bitmap);
-	} else
-		memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
-		       sizeof sregs->interrupt_bitmap);
+	memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
+
+	if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
+		set_bit(vcpu->arch.interrupt.nr,
+			(unsigned long *)sregs->interrupt_bitmap);
 
 	vcpu_put(vcpu);
 
@@ -3688,7 +3789,6 @@
 	tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
 	tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
 	tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
-	tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
 }
 
 static int load_state_from_tss32(struct kvm_vcpu *vcpu,
@@ -3785,8 +3885,8 @@
 }
 
 static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
-		       u32 old_tss_base,
-		       struct desc_struct *nseg_desc)
+			      u16 old_tss_sel, u32 old_tss_base,
+			      struct desc_struct *nseg_desc)
 {
 	struct tss_segment_16 tss_segment_16;
 	int ret = 0;
@@ -3805,6 +3905,16 @@
 			   &tss_segment_16, sizeof tss_segment_16))
 		goto out;
 
+	if (old_tss_sel != 0xffff) {
+		tss_segment_16.prev_task_link = old_tss_sel;
+
+		if (kvm_write_guest(vcpu->kvm,
+				    get_tss_base_addr(vcpu, nseg_desc),
+				    &tss_segment_16.prev_task_link,
+				    sizeof tss_segment_16.prev_task_link))
+			goto out;
+	}
+
 	if (load_state_from_tss16(vcpu, &tss_segment_16))
 		goto out;
 
@@ -3814,7 +3924,7 @@
 }
 
 static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
-		       u32 old_tss_base,
+		       u16 old_tss_sel, u32 old_tss_base,
 		       struct desc_struct *nseg_desc)
 {
 	struct tss_segment_32 tss_segment_32;
@@ -3834,6 +3944,16 @@
 			   &tss_segment_32, sizeof tss_segment_32))
 		goto out;
 
+	if (old_tss_sel != 0xffff) {
+		tss_segment_32.prev_task_link = old_tss_sel;
+
+		if (kvm_write_guest(vcpu->kvm,
+				    get_tss_base_addr(vcpu, nseg_desc),
+				    &tss_segment_32.prev_task_link,
+				    sizeof tss_segment_32.prev_task_link))
+			goto out;
+	}
+
 	if (load_state_from_tss32(vcpu, &tss_segment_32))
 		goto out;
 
@@ -3887,14 +4007,22 @@
 		kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
 	}
 
-	kvm_x86_ops->skip_emulated_instruction(vcpu);
+	/* set back link to prev task only if NT bit is set in eflags
+	   note that old_tss_sel is not used afetr this point */
+	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
+		old_tss_sel = 0xffff;
+
+	/* set back link to prev task only if NT bit is set in eflags
+	   note that old_tss_sel is not used afetr this point */
+	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
+		old_tss_sel = 0xffff;
 
 	if (nseg_desc.type & 8)
-		ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base,
-					 &nseg_desc);
+		ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
+					 old_tss_base, &nseg_desc);
 	else
-		ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base,
-					 &nseg_desc);
+		ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel,
+					 old_tss_base, &nseg_desc);
 
 	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
 		u32 eflags = kvm_x86_ops->get_rflags(vcpu);
@@ -3920,7 +4048,7 @@
 				  struct kvm_sregs *sregs)
 {
 	int mmu_reset_needed = 0;
-	int i, pending_vec, max_bits;
+	int pending_vec, max_bits;
 	struct descriptor_table dt;
 
 	vcpu_load(vcpu);
@@ -3934,7 +4062,13 @@
 
 	vcpu->arch.cr2 = sregs->cr2;
 	mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
-	vcpu->arch.cr3 = sregs->cr3;
+
+	down_read(&vcpu->kvm->slots_lock);
+	if (gfn_to_memslot(vcpu->kvm, sregs->cr3 >> PAGE_SHIFT))
+		vcpu->arch.cr3 = sregs->cr3;
+	else
+		set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+	up_read(&vcpu->kvm->slots_lock);
 
 	kvm_set_cr8(vcpu, sregs->cr8);
 
@@ -3956,25 +4090,14 @@
 	if (mmu_reset_needed)
 		kvm_mmu_reset_context(vcpu);
 
-	if (!irqchip_in_kernel(vcpu->kvm)) {
-		memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap,
-		       sizeof vcpu->arch.irq_pending);
-		vcpu->arch.irq_summary = 0;
-		for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i)
-			if (vcpu->arch.irq_pending[i])
-				__set_bit(i, &vcpu->arch.irq_summary);
-	} else {
-		max_bits = (sizeof sregs->interrupt_bitmap) << 3;
-		pending_vec = find_first_bit(
-			(const unsigned long *)sregs->interrupt_bitmap,
-			max_bits);
-		/* Only pending external irq is handled here */
-		if (pending_vec < max_bits) {
-			kvm_x86_ops->set_irq(vcpu, pending_vec);
-			pr_debug("Set back pending irq %d\n",
-				 pending_vec);
-		}
-		kvm_pic_clear_isr_ack(vcpu->kvm);
+	max_bits = (sizeof sregs->interrupt_bitmap) << 3;
+	pending_vec = find_first_bit(
+		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
+	if (pending_vec < max_bits) {
+		kvm_queue_interrupt(vcpu, pending_vec, false);
+		pr_debug("Set back pending irq %d\n", pending_vec);
+		if (irqchip_in_kernel(vcpu->kvm))
+			kvm_pic_clear_isr_ack(vcpu->kvm);
 	}
 
 	kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
@@ -4308,7 +4431,6 @@
 		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
-	INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 
 	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
@@ -4411,12 +4533,14 @@
 		}
 	}
 
+	spin_lock(&kvm->mmu_lock);
 	if (!kvm->arch.n_requested_mmu_pages) {
 		unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
 		kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
 	}
 
 	kvm_mmu_slot_remove_write_access(kvm, mem->slot);
+	spin_unlock(&kvm->mmu_lock);
 	kvm_flush_remote_tlbs(kvm);
 
 	return 0;
@@ -4425,6 +4549,7 @@
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
 	kvm_mmu_zap_all(kvm);
+	kvm_reload_remote_mmus(kvm);
 }
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
@@ -4434,28 +4559,24 @@
 	       || vcpu->arch.nmi_pending;
 }
 
-static void vcpu_kick_intr(void *info)
-{
-#ifdef DEBUG
-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
-	printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
-#endif
-}
-
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
-	int ipi_pcpu = vcpu->cpu;
-	int cpu = get_cpu();
+	int me;
+	int cpu = vcpu->cpu;
 
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
 		++vcpu->stat.halt_wakeup;
 	}
-	/*
-	 * We may be called synchronously with irqs disabled in guest mode,
-	 * So need not to call smp_call_function_single() in that case.
-	 */
-	if (vcpu->guest_mode && vcpu->cpu != cpu)
-		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
+
+	me = get_cpu();
+	if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
+		if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
+			smp_send_reschedule(cpu);
 	put_cpu();
 }
+
+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
+{
+	return kvm_x86_ops->interrupt_allowed(vcpu);
+}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 6a4be78..4c8e10a 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -8,9 +8,11 @@
 	vcpu->arch.exception.pending = false;
 }
 
-static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector)
+static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
+	bool soft)
 {
 	vcpu->arch.interrupt.pending = true;
+	vcpu->arch.interrupt.soft = soft;
 	vcpu->arch.interrupt.nr = vector;
 }
 
@@ -19,4 +21,14 @@
 	vcpu->arch.interrupt.pending = false;
 }
 
+static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
+		vcpu->arch.nmi_injected;
+}
+
+static inline bool kvm_exception_is_soft(unsigned int nr)
+{
+	return (nr == BP_VECTOR) || (nr == OF_VECTOR);
+}
 #endif
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index ca91749..c1b6c23 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -59,13 +59,14 @@
 #define SrcImm      (5<<4)	/* Immediate operand. */
 #define SrcImmByte  (6<<4)	/* 8-bit sign-extended immediate operand. */
 #define SrcOne      (7<<4)	/* Implied '1' */
-#define SrcMask     (7<<4)
+#define SrcImmUByte (8<<4)      /* 8-bit unsigned immediate operand. */
+#define SrcMask     (0xf<<4)
 /* Generic ModRM decode. */
-#define ModRM       (1<<7)
+#define ModRM       (1<<8)
 /* Destination is only written; never read. */
-#define Mov         (1<<8)
-#define BitOp       (1<<9)
-#define MemAbs      (1<<10)      /* Memory operand is absolute displacement */
+#define Mov         (1<<9)
+#define BitOp       (1<<10)
+#define MemAbs      (1<<11)      /* Memory operand is absolute displacement */
 #define String      (1<<12)     /* String instruction (rep capable) */
 #define Stack       (1<<13)     /* Stack instruction (push/pop) */
 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
@@ -76,6 +77,7 @@
 #define Src2CL      (1<<29)
 #define Src2ImmByte (2<<29)
 #define Src2One     (3<<29)
+#define Src2Imm16   (4<<29)
 #define Src2Mask    (7<<29)
 
 enum {
@@ -135,11 +137,11 @@
 	SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* insb, insw/insd */
 	SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* outsb, outsw/outsd */
 	/* 0x70 - 0x77 */
-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
+	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
 	/* 0x78 - 0x7F */
-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
+	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
 	/* 0x80 - 0x87 */
 	Group | Group1_80, Group | Group1_81,
 	Group | Group1_82, Group | Group1_83,
@@ -153,7 +155,8 @@
 	/* 0x90 - 0x97 */
 	DstReg, DstReg, DstReg, DstReg,	DstReg, DstReg, DstReg, DstReg,
 	/* 0x98 - 0x9F */
-	0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
+	0, 0, SrcImm | Src2Imm16, 0,
+	ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
 	/* 0xA0 - 0xA7 */
 	ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
 	ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
@@ -178,7 +181,8 @@
 	0, ImplicitOps | Stack, 0, 0,
 	ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
 	/* 0xC8 - 0xCF */
-	0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0,
+	0, 0, 0, ImplicitOps | Stack,
+	ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps,
 	/* 0xD0 - 0xD7 */
 	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
 	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
@@ -187,11 +191,11 @@
 	0, 0, 0, 0, 0, 0, 0, 0,
 	/* 0xE0 - 0xE7 */
 	0, 0, 0, 0,
-	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
-	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
+	ByteOp | SrcImmUByte, SrcImmUByte,
+	ByteOp | SrcImmUByte, SrcImmUByte,
 	/* 0xE8 - 0xEF */
-	ImplicitOps | Stack, SrcImm | ImplicitOps,
-	ImplicitOps, SrcImmByte | ImplicitOps,
+	SrcImm | Stack, SrcImm | ImplicitOps,
+	SrcImm | Src2Imm16, SrcImmByte | ImplicitOps,
 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
 	/* 0xF0 - 0xF7 */
@@ -230,10 +234,8 @@
 	/* 0x70 - 0x7F */
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	/* 0x80 - 0x8F */
-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+	SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
+	SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
 	/* 0x90 - 0x9F */
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	/* 0xA0 - 0xA7 */
@@ -1044,10 +1046,14 @@
 		}
 		break;
 	case SrcImmByte:
+	case SrcImmUByte:
 		c->src.type = OP_IMM;
 		c->src.ptr = (unsigned long *)c->eip;
 		c->src.bytes = 1;
-		c->src.val = insn_fetch(s8, 1, c->eip);
+		if ((c->d & SrcMask) == SrcImmByte)
+			c->src.val = insn_fetch(s8, 1, c->eip);
+		else
+			c->src.val = insn_fetch(u8, 1, c->eip);
 		break;
 	case SrcOne:
 		c->src.bytes = 1;
@@ -1072,6 +1078,12 @@
 		c->src2.bytes = 1;
 		c->src2.val = insn_fetch(u8, 1, c->eip);
 		break;
+	case Src2Imm16:
+		c->src2.type = OP_IMM;
+		c->src2.ptr = (unsigned long *)c->eip;
+		c->src2.bytes = 2;
+		c->src2.val = insn_fetch(u16, 2, c->eip);
+		break;
 	case Src2One:
 		c->src2.bytes = 1;
 		c->src2.val = 1;
@@ -1349,6 +1361,20 @@
 	return 0;
 }
 
+void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
+{
+	u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
+	/*
+	 * an sti; sti; sequence only disable interrupts for the first
+	 * instruction. So, if the last instruction, be it emulated or
+	 * not, left the system with the INT_STI flag enabled, it
+	 * means that the last instruction is an sti. We should not
+	 * leave the flag on in this case. The same goes for mov ss
+	 */
+	if (!(int_shadow & mask))
+		ctxt->interruptibility = mask;
+}
+
 int
 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 {
@@ -1360,6 +1386,8 @@
 	int io_dir_in;
 	int rc = 0;
 
+	ctxt->interruptibility = 0;
+
 	/* Shadow copy of register state. Committed on successful emulation.
 	 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
 	 * modify them.
@@ -1531,13 +1559,10 @@
 			return -1;
 		}
 		return 0;
-	case 0x70 ... 0x7f: /* jcc (short) */ {
-		int rel = insn_fetch(s8, 1, c->eip);
-
+	case 0x70 ... 0x7f: /* jcc (short) */
 		if (test_cc(c->b, ctxt->eflags))
-			jmp_rel(c, rel);
+			jmp_rel(c, c->src.val);
 		break;
-	}
 	case 0x80 ... 0x83:	/* Grp1 */
 		switch (c->modrm_reg) {
 		case 0:
@@ -1609,6 +1634,9 @@
 		int err;
 
 		sel = c->src.val;
+		if (c->modrm_reg == VCPU_SREG_SS)
+			toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
+
 		if (c->modrm_reg <= 5) {
 			type_bits = (c->modrm_reg == 1) ? 9 : 1;
 			err = kvm_load_segment_descriptor(ctxt->vcpu, sel,
@@ -1769,59 +1797,32 @@
 		break;
 	case 0xe4: 	/* inb */
 	case 0xe5: 	/* in */
-		port = insn_fetch(u8, 1, c->eip);
+		port = c->src.val;
 		io_dir_in = 1;
 		goto do_io;
 	case 0xe6: /* outb */
 	case 0xe7: /* out */
-		port = insn_fetch(u8, 1, c->eip);
+		port = c->src.val;
 		io_dir_in = 0;
 		goto do_io;
 	case 0xe8: /* call (near) */ {
-		long int rel;
-		switch (c->op_bytes) {
-		case 2:
-			rel = insn_fetch(s16, 2, c->eip);
-			break;
-		case 4:
-			rel = insn_fetch(s32, 4, c->eip);
-			break;
-		default:
-			DPRINTF("Call: Invalid op_bytes\n");
-			goto cannot_emulate;
-		}
+		long int rel = c->src.val;
 		c->src.val = (unsigned long) c->eip;
 		jmp_rel(c, rel);
-		c->op_bytes = c->ad_bytes;
 		emulate_push(ctxt);
 		break;
 	}
 	case 0xe9: /* jmp rel */
 		goto jmp;
-	case 0xea: /* jmp far */ {
-		uint32_t eip;
-		uint16_t sel;
-
-		switch (c->op_bytes) {
-		case 2:
-			eip = insn_fetch(u16, 2, c->eip);
-			break;
-		case 4:
-			eip = insn_fetch(u32, 4, c->eip);
-			break;
-		default:
-			DPRINTF("jmp far: Invalid op_bytes\n");
-			goto cannot_emulate;
-		}
-		sel = insn_fetch(u16, 2, c->eip);
-		if (kvm_load_segment_descriptor(ctxt->vcpu, sel, 9, VCPU_SREG_CS) < 0) {
+	case 0xea: /* jmp far */
+		if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9,
+					VCPU_SREG_CS) < 0) {
 			DPRINTF("jmp far: Failed to load CS descriptor\n");
 			goto cannot_emulate;
 		}
 
-		c->eip = eip;
+		c->eip = c->src.val;
 		break;
-	}
 	case 0xeb:
 	      jmp:		/* jmp rel short */
 		jmp_rel(c, c->src.val);
@@ -1865,6 +1866,7 @@
 		c->dst.type = OP_NONE;	/* Disable writeback. */
 		break;
 	case 0xfb: /* sti */
+		toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
 		ctxt->eflags |= X86_EFLAGS_IF;
 		c->dst.type = OP_NONE;	/* Disable writeback. */
 		break;
@@ -2039,28 +2041,11 @@
 		if (!test_cc(c->b, ctxt->eflags))
 			c->dst.type = OP_NONE; /* no writeback */
 		break;
-	case 0x80 ... 0x8f: /* jnz rel, etc*/ {
-		long int rel;
-
-		switch (c->op_bytes) {
-		case 2:
-			rel = insn_fetch(s16, 2, c->eip);
-			break;
-		case 4:
-			rel = insn_fetch(s32, 4, c->eip);
-			break;
-		case 8:
-			rel = insn_fetch(s64, 8, c->eip);
-			break;
-		default:
-			DPRINTF("jnz: Invalid op_bytes\n");
-			goto cannot_emulate;
-		}
+	case 0x80 ... 0x8f: /* jnz rel, etc*/
 		if (test_cc(c->b, ctxt->eflags))
-			jmp_rel(c, rel);
+			jmp_rel(c, c->src.val);
 		c->dst.type = OP_NONE;
 		break;
-	}
 	case 0xa3:
 	      bt:		/* bt */
 		c->dst.type = OP_NONE;
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index 8dab8f7..3871804 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -2,7 +2,6 @@
 	bool "Lguest guest support"
 	select PARAVIRT
 	depends on X86_32
-	depends on !X86_PAE
 	select VIRTIO
 	select VIRTIO_RING
 	select VIRTIO_CONSOLE
diff --git a/arch/x86/lguest/Makefile b/arch/x86/lguest/Makefile
index 27f0c9e..94e0e54 100644
--- a/arch/x86/lguest/Makefile
+++ b/arch/x86/lguest/Makefile
@@ -1 +1,2 @@
 obj-y		:= i386_head.o boot.o
+CFLAGS_boot.o	:= $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index ca7ec44..7bc65f0 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -67,6 +67,7 @@
 #include <asm/mce.h>
 #include <asm/io.h>
 #include <asm/i387.h>
+#include <asm/stackprotector.h>
 #include <asm/reboot.h>		/* for struct machine_ops */
 
 /*G:010 Welcome to the Guest!
@@ -86,7 +87,7 @@
 
 /*G:037 async_hcall() is pretty simple: I'm quite proud of it really.  We have a
  * ring buffer of stored hypercalls which the Host will run though next time we
- * do a normal hypercall.  Each entry in the ring has 4 slots for the hypercall
+ * do a normal hypercall.  Each entry in the ring has 5 slots for the hypercall
  * arguments, and a "hcall_status" word which is 0 if the call is ready to go,
  * and 255 once the Host has finished with it.
  *
@@ -95,7 +96,8 @@
  * effect of causing the Host to run all the stored calls in the ring buffer
  * which empties it for next time! */
 static void async_hcall(unsigned long call, unsigned long arg1,
-			unsigned long arg2, unsigned long arg3)
+			unsigned long arg2, unsigned long arg3,
+			unsigned long arg4)
 {
 	/* Note: This code assumes we're uniprocessor. */
 	static unsigned int next_call;
@@ -107,12 +109,13 @@
 	local_irq_save(flags);
 	if (lguest_data.hcall_status[next_call] != 0xFF) {
 		/* Table full, so do normal hcall which will flush table. */
-		kvm_hypercall3(call, arg1, arg2, arg3);
+		kvm_hypercall4(call, arg1, arg2, arg3, arg4);
 	} else {
 		lguest_data.hcalls[next_call].arg0 = call;
 		lguest_data.hcalls[next_call].arg1 = arg1;
 		lguest_data.hcalls[next_call].arg2 = arg2;
 		lguest_data.hcalls[next_call].arg3 = arg3;
+		lguest_data.hcalls[next_call].arg4 = arg4;
 		/* Arguments must all be written before we mark it to go */
 		wmb();
 		lguest_data.hcall_status[next_call] = 0;
@@ -140,7 +143,7 @@
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
 		kvm_hypercall1(call, arg1);
 	else
-		async_hcall(call, arg1, 0, 0);
+		async_hcall(call, arg1, 0, 0, 0);
 }
 
 static void lazy_hcall2(unsigned long call,
@@ -150,7 +153,7 @@
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
 		kvm_hypercall2(call, arg1, arg2);
 	else
-		async_hcall(call, arg1, arg2, 0);
+		async_hcall(call, arg1, arg2, 0, 0);
 }
 
 static void lazy_hcall3(unsigned long call,
@@ -161,18 +164,38 @@
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
 		kvm_hypercall3(call, arg1, arg2, arg3);
 	else
-		async_hcall(call, arg1, arg2, arg3);
+		async_hcall(call, arg1, arg2, arg3, 0);
 }
 
+#ifdef CONFIG_X86_PAE
+static void lazy_hcall4(unsigned long call,
+		       unsigned long arg1,
+		       unsigned long arg2,
+		       unsigned long arg3,
+		       unsigned long arg4)
+{
+	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
+		kvm_hypercall4(call, arg1, arg2, arg3, arg4);
+	else
+		async_hcall(call, arg1, arg2, arg3, arg4);
+}
+#endif
+
 /* When lazy mode is turned off reset the per-cpu lazy mode variable and then
  * issue the do-nothing hypercall to flush any stored calls. */
-static void lguest_leave_lazy_mode(void)
+static void lguest_leave_lazy_mmu_mode(void)
 {
-	paravirt_leave_lazy(paravirt_get_lazy_mode());
 	kvm_hypercall0(LHCALL_FLUSH_ASYNC);
+	paravirt_leave_lazy_mmu();
 }
 
-/*G:033
+static void lguest_end_context_switch(struct task_struct *next)
+{
+	kvm_hypercall0(LHCALL_FLUSH_ASYNC);
+	paravirt_end_context_switch(next);
+}
+
+/*G:032
  * After that diversion we return to our first native-instruction
  * replacements: four functions for interrupt control.
  *
@@ -192,30 +215,28 @@
 {
 	return lguest_data.irq_enabled;
 }
-PV_CALLEE_SAVE_REGS_THUNK(save_fl);
-
-/* restore_flags() just sets the flags back to the value given. */
-static void restore_fl(unsigned long flags)
-{
-	lguest_data.irq_enabled = flags;
-}
-PV_CALLEE_SAVE_REGS_THUNK(restore_fl);
 
 /* Interrupts go off... */
 static void irq_disable(void)
 {
 	lguest_data.irq_enabled = 0;
 }
+
+/* Let's pause a moment.  Remember how I said these are called so often?
+ * Jeremy Fitzhardinge optimized them so hard early in 2009 that he had to
+ * break some rules.  In particular, these functions are assumed to save their
+ * own registers if they need to: normal C functions assume they can trash the
+ * eax register.  To use normal C functions, we use
+ * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the
+ * C function, then restores it. */
+PV_CALLEE_SAVE_REGS_THUNK(save_fl);
 PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
-
-/* Interrupts go on... */
-static void irq_enable(void)
-{
-	lguest_data.irq_enabled = X86_EFLAGS_IF;
-}
-PV_CALLEE_SAVE_REGS_THUNK(irq_enable);
-
 /*:*/
+
+/* These are in i386_head.S */
+extern void lg_irq_enable(void);
+extern void lg_restore_fl(unsigned long flags);
+
 /*M:003 Note that we don't check for outstanding interrupts when we re-enable
  * them (or when we unmask an interrupt).  This seems to work for the moment,
  * since interrupts are rare and we'll just get the interrupt on the next timer
@@ -361,8 +382,8 @@
 	case 1:	/* Basic feature request. */
 		/* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
 		*cx &= 0x00002201;
-		/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */
-		*dx &= 0x07808111;
+		/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU, PAE. */
+		*dx &= 0x07808151;
 		/* The Host can do a nice optimization if it knows that the
 		 * kernel mappings (addresses above 0xC0000000 or whatever
 		 * PAGE_OFFSET is set to) haven't changed.  But Linux calls
@@ -381,6 +402,11 @@
 		if (*ax > 0x80000008)
 			*ax = 0x80000008;
 		break;
+	case 0x80000001:
+		/* Here we should fix nx cap depending on host. */
+		/* For this version of PAE, we just clear NX bit. */
+		*dx &= ~(1 << 20);
+		break;
 	}
 }
 
@@ -514,25 +540,52 @@
 static void lguest_pte_update(struct mm_struct *mm, unsigned long addr,
 			       pte_t *ptep)
 {
+#ifdef CONFIG_X86_PAE
+	lazy_hcall4(LHCALL_SET_PTE, __pa(mm->pgd), addr,
+		    ptep->pte_low, ptep->pte_high);
+#else
 	lazy_hcall3(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low);
+#endif
 }
 
 static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pteval)
 {
-	*ptep = pteval;
+	native_set_pte(ptep, pteval);
 	lguest_pte_update(mm, addr, ptep);
 }
 
-/* The Guest calls this to set a top-level entry.  Again, we set the entry then
- * tell the Host which top-level page we changed, and the index of the entry we
- * changed. */
+/* The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd
+ * to set a middle-level entry when PAE is activated.
+ * Again, we set the entry then tell the Host which page we changed,
+ * and the index of the entry we changed. */
+#ifdef CONFIG_X86_PAE
+static void lguest_set_pud(pud_t *pudp, pud_t pudval)
+{
+	native_set_pud(pudp, pudval);
+
+	/* 32 bytes aligned pdpt address and the index. */
+	lazy_hcall2(LHCALL_SET_PGD, __pa(pudp) & 0xFFFFFFE0,
+		   (__pa(pudp) & 0x1F) / sizeof(pud_t));
+}
+
 static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
-	*pmdp = pmdval;
+	native_set_pmd(pmdp, pmdval);
 	lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK,
-		   (__pa(pmdp) & (PAGE_SIZE - 1)) / 4);
+		   (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
 }
+#else
+
+/* The Guest calls lguest_set_pmd to set a top-level entry when PAE is not
+ * activated. */
+static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
+{
+	native_set_pmd(pmdp, pmdval);
+	lazy_hcall2(LHCALL_SET_PGD, __pa(pmdp) & PAGE_MASK,
+		   (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
+}
+#endif
 
 /* There are a couple of legacy places where the kernel sets a PTE, but we
  * don't know the top level any more.  This is useless for us, since we don't
@@ -545,11 +598,31 @@
  * which brings boot back to 0.25 seconds. */
 static void lguest_set_pte(pte_t *ptep, pte_t pteval)
 {
-	*ptep = pteval;
+	native_set_pte(ptep, pteval);
 	if (cr3_changed)
 		lazy_hcall1(LHCALL_FLUSH_TLB, 1);
 }
 
+#ifdef CONFIG_X86_PAE
+static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+	native_set_pte_atomic(ptep, pte);
+	if (cr3_changed)
+		lazy_hcall1(LHCALL_FLUSH_TLB, 1);
+}
+
+void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	native_pte_clear(mm, addr, ptep);
+	lguest_pte_update(mm, addr, ptep);
+}
+
+void lguest_pmd_clear(pmd_t *pmdp)
+{
+	lguest_set_pmd(pmdp, __pmd(0));
+}
+#endif
+
 /* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
  * native page table operations.  On native hardware you can set a new page
  * table entry whenever you want, but if you want to remove one you have to do
@@ -621,13 +694,12 @@
 {
 	unsigned int i;
 
-	for (i = 0; i < LGUEST_IRQS; i++) {
-		int vector = FIRST_EXTERNAL_VECTOR + i;
+	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
 		/* Some systems map "vectors" to interrupts weirdly.  Lguest has
 		 * a straightforward 1 to 1 mapping, so force that here. */
-		__get_cpu_var(vector_irq)[vector] = i;
-		if (vector != SYSCALL_VECTOR)
-			set_intr_gate(vector, interrupt[i]);
+		__get_cpu_var(vector_irq)[i] = i - FIRST_EXTERNAL_VECTOR;
+		if (i != SYSCALL_VECTOR)
+			set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
 	}
 	/* This call is required to set up for 4k stacks, where we have
 	 * separate stacks for hard and soft interrupts. */
@@ -636,7 +708,7 @@
 
 void lguest_setup_irq(unsigned int irq)
 {
-	irq_to_desc_alloc_cpu(irq, 0);
+	irq_to_desc_alloc_node(irq, 0);
 	set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
 				      handle_level_irq, "level");
 }
@@ -966,10 +1038,10 @@
  *
  * Our current solution is to allow the paravirt back end to optionally patch
  * over the indirect calls to replace them with something more efficient.  We
- * patch the four most commonly called functions: disable interrupts, enable
- * interrupts, restore interrupts and save interrupts.  We usually have 6 or 10
- * bytes to patch into: the Guest versions of these operations are small enough
- * that we can fit comfortably.
+ * patch two of the simplest of the most commonly called functions: disable
+ * interrupts and save interrupts.  We usually have 6 or 10 bytes to patch
+ * into: the Guest versions of these operations are small enough that we can
+ * fit comfortably.
  *
  * First we need assembly templates of each of the patchable Guest operations,
  * and these are in i386_head.S. */
@@ -980,8 +1052,6 @@
 	const char *start, *end;
 } lguest_insns[] = {
 	[PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli },
-	[PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti },
-	[PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf },
 	[PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },
 };
 
@@ -1019,6 +1089,7 @@
 	pv_info.name = "lguest";
 	pv_info.paravirt_enabled = 1;
 	pv_info.kernel_rpl = 1;
+	pv_info.shared_kernel_pmd = 1;
 
 	/* We set up all the lguest overrides for sensitive operations.  These
 	 * are detailed with the operations themselves. */
@@ -1026,9 +1097,9 @@
 	/* interrupt-related operations */
 	pv_irq_ops.init_IRQ = lguest_init_IRQ;
 	pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl);
-	pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl);
+	pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl);
 	pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable);
-	pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable);
+	pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable);
 	pv_irq_ops.safe_halt = lguest_safe_halt;
 
 	/* init-time operations */
@@ -1053,8 +1124,8 @@
 	pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
 	pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
 	pv_cpu_ops.wbinvd = lguest_wbinvd;
-	pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu;
-	pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
+	pv_cpu_ops.start_context_switch = paravirt_start_context_switch;
+	pv_cpu_ops.end_context_switch = lguest_end_context_switch;
 
 	/* pagetable management */
 	pv_mmu_ops.write_cr3 = lguest_write_cr3;
@@ -1064,10 +1135,16 @@
 	pv_mmu_ops.set_pte = lguest_set_pte;
 	pv_mmu_ops.set_pte_at = lguest_set_pte_at;
 	pv_mmu_ops.set_pmd = lguest_set_pmd;
+#ifdef CONFIG_X86_PAE
+	pv_mmu_ops.set_pte_atomic = lguest_set_pte_atomic;
+	pv_mmu_ops.pte_clear = lguest_pte_clear;
+	pv_mmu_ops.pmd_clear = lguest_pmd_clear;
+	pv_mmu_ops.set_pud = lguest_set_pud;
+#endif
 	pv_mmu_ops.read_cr2 = lguest_read_cr2;
 	pv_mmu_ops.read_cr3 = lguest_read_cr3;
 	pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
-	pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
+	pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
 	pv_mmu_ops.pte_update = lguest_pte_update;
 	pv_mmu_ops.pte_update_defer = lguest_pte_update;
 
@@ -1088,13 +1165,21 @@
 	 * lguest_init() where the rest of the fairly chaotic boot setup
 	 * occurs. */
 
+	/* The stack protector is a weird thing where gcc places a canary
+	 * value on the stack and then checks it on return.  This file is
+	 * compiled with -fno-stack-protector it, so we got this far without
+	 * problems.  The value of the canary is kept at offset 20 from the
+	 * %gs register, so we need to set that up before calling C functions
+	 * in other files. */
+	setup_stack_canary_segment(0);
+	/* We could just call load_stack_canary_segment(), but we might as
+	 * call switch_to_new_gdt() which loads the whole table and sets up
+	 * the per-cpu segment descriptor register %fs as well. */
+	switch_to_new_gdt(0);
+
 	/* As described in head_32.S, we map the first 128M of memory. */
 	max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
 
-	/* Load the %fs segment register (the per-cpu segment register) with
-	 * the normal data segment to get through booting. */
-	asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory");
-
 	/* The Host<->Guest Switcher lives at the top of our address space, and
 	 * the Host told us how big it is when we made LGUEST_INIT hypercall:
 	 * it put the answer in lguest_data.reserve_mem  */
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index f795419..a9c8cfe 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -46,10 +46,64 @@
 	.globl lgstart_##name; .globl lgend_##name
 
 LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
-LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled)
-LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled)
 LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
-/*:*/
+
+/*G:033 But using those wrappers is inefficient (we'll see why that doesn't
+ * matter for save_fl and irq_disable later).  If we write our routines
+ * carefully in assembler, we can avoid clobbering any registers and avoid
+ * jumping through the wrapper functions.
+ *
+ * I skipped over our first piece of assembler, but this one is worth studying
+ * in a bit more detail so I'll describe in easy stages.  First, the routine
+ * to enable interrupts: */
+ENTRY(lg_irq_enable)
+	/* The reverse of irq_disable, this sets lguest_data.irq_enabled to
+	 * X86_EFLAGS_IF (ie. "Interrupts enabled"). */
+	movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled
+	/* But now we need to check if the Host wants to know: there might have
+	 * been interrupts waiting to be delivered, in which case it will have
+	 * set lguest_data.irq_pending to X86_EFLAGS_IF.  If it's not zero, we
+	 * jump to send_interrupts, otherwise we're done. */
+	testl $0, lguest_data+LGUEST_DATA_irq_pending
+	jnz send_interrupts
+	/* One cool thing about x86 is that you can do many things without using
+	 * a register.  In this case, the normal path hasn't needed to save or
+	 * restore any registers at all! */
+	ret
+send_interrupts:
+	/* OK, now we need a register: eax is used for the hypercall number,
+	 * which is LHCALL_SEND_INTERRUPTS.
+	 *
+	 * We used not to bother with this pending detection at all, which was
+	 * much simpler.  Sooner or later the Host would realize it had to
+	 * send us an interrupt.  But that turns out to make performance 7
+	 * times worse on a simple tcp benchmark.  So now we do this the hard
+	 * way. */
+	pushl %eax
+	movl $LHCALL_SEND_INTERRUPTS, %eax
+	/* This is a vmcall instruction (same thing that KVM uses).  Older
+	 * assembler versions might not know the "vmcall" instruction, so we
+	 * create one manually here. */
+	.byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */
+	popl %eax
+	ret
+
+/* Finally, the "popf" or "restore flags" routine.  The %eax register holds the
+ * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're
+ * enabling interrupts again, if it's 0 we're leaving them off. */
+ENTRY(lg_restore_fl)
+	/* This is just "lguest_data.irq_enabled = flags;" */
+	movl %eax, lguest_data+LGUEST_DATA_irq_enabled
+	/* Now, if the %eax value has enabled interrupts and
+	 * lguest_data.irq_pending is set, we want to tell the Host so it can
+	 * deliver any outstanding interrupts.  Fortunately, both values will
+	 * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl"
+	 * instruction will AND them together for us.  If both are set, we
+	 * jump to send_interrupts. */
+	testl lguest_data+LGUEST_DATA_irq_pending, %eax
+	jnz send_interrupts
+	/* Again, the normal path has used no extra registers.  Clever, huh? */
+	ret
 
 /* These demark the EIP range where host should never deliver interrupts. */
 .global lguest_noirq_start
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 55e11aa..f9d3563 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -2,7 +2,7 @@
 # Makefile for x86 specific library files.
 #
 
-obj-$(CONFIG_SMP) := msr-on-cpu.o
+obj-$(CONFIG_SMP) := msr.o
 
 lib-y := delay.o
 lib-y += thunk_$(BITS).o
diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c
deleted file mode 100644
index 321cf72..0000000
--- a/arch/x86/lib/msr-on-cpu.c
+++ /dev/null
@@ -1,97 +0,0 @@
-#include <linux/module.h>
-#include <linux/preempt.h>
-#include <linux/smp.h>
-#include <asm/msr.h>
-
-struct msr_info {
-	u32 msr_no;
-	u32 l, h;
-	int err;
-};
-
-static void __rdmsr_on_cpu(void *info)
-{
-	struct msr_info *rv = info;
-
-	rdmsr(rv->msr_no, rv->l, rv->h);
-}
-
-static void __wrmsr_on_cpu(void *info)
-{
-	struct msr_info *rv = info;
-
-	wrmsr(rv->msr_no, rv->l, rv->h);
-}
-
-int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
-	int err;
-	struct msr_info rv;
-
-	rv.msr_no = msr_no;
-	err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
-	*l = rv.l;
-	*h = rv.h;
-
-	return err;
-}
-
-int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
-	int err;
-	struct msr_info rv;
-
-	rv.msr_no = msr_no;
-	rv.l = l;
-	rv.h = h;
-	err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
-
-	return err;
-}
-
-/* These "safe" variants are slower and should be used when the target MSR
-   may not actually exist. */
-static void __rdmsr_safe_on_cpu(void *info)
-{
-	struct msr_info *rv = info;
-
-	rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h);
-}
-
-static void __wrmsr_safe_on_cpu(void *info)
-{
-	struct msr_info *rv = info;
-
-	rv->err = wrmsr_safe(rv->msr_no, rv->l, rv->h);
-}
-
-int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
-	int err;
-	struct msr_info rv;
-
-	rv.msr_no = msr_no;
-	err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
-	*l = rv.l;
-	*h = rv.h;
-
-	return err ? err : rv.err;
-}
-
-int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
-	int err;
-	struct msr_info rv;
-
-	rv.msr_no = msr_no;
-	rv.l = l;
-	rv.h = h;
-	err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
-
-	return err ? err : rv.err;
-}
-
-EXPORT_SYMBOL(rdmsr_on_cpu);
-EXPORT_SYMBOL(wrmsr_on_cpu);
-EXPORT_SYMBOL(rdmsr_safe_on_cpu);
-EXPORT_SYMBOL(wrmsr_safe_on_cpu);
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
new file mode 100644
index 0000000..1440b9c
--- /dev/null
+++ b/arch/x86/lib/msr.c
@@ -0,0 +1,183 @@
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/smp.h>
+#include <asm/msr.h>
+
+struct msr_info {
+	u32 msr_no;
+	struct msr reg;
+	struct msr *msrs;
+	int off;
+	int err;
+};
+
+static void __rdmsr_on_cpu(void *info)
+{
+	struct msr_info *rv = info;
+	struct msr *reg;
+	int this_cpu = raw_smp_processor_id();
+
+	if (rv->msrs)
+		reg = &rv->msrs[this_cpu - rv->off];
+	else
+		reg = &rv->reg;
+
+	rdmsr(rv->msr_no, reg->l, reg->h);
+}
+
+static void __wrmsr_on_cpu(void *info)
+{
+	struct msr_info *rv = info;
+	struct msr *reg;
+	int this_cpu = raw_smp_processor_id();
+
+	if (rv->msrs)
+		reg = &rv->msrs[this_cpu - rv->off];
+	else
+		reg = &rv->reg;
+
+	wrmsr(rv->msr_no, reg->l, reg->h);
+}
+
+int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+{
+	int err;
+	struct msr_info rv;
+
+	memset(&rv, 0, sizeof(rv));
+
+	rv.msr_no = msr_no;
+	err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
+	*l = rv.reg.l;
+	*h = rv.reg.h;
+
+	return err;
+}
+EXPORT_SYMBOL(rdmsr_on_cpu);
+
+int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+	int err;
+	struct msr_info rv;
+
+	memset(&rv, 0, sizeof(rv));
+
+	rv.msr_no = msr_no;
+	rv.reg.l = l;
+	rv.reg.h = h;
+	err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
+
+	return err;
+}
+EXPORT_SYMBOL(wrmsr_on_cpu);
+
+/* rdmsr on a bunch of CPUs
+ *
+ * @mask:       which CPUs
+ * @msr_no:     which MSR
+ * @msrs:       array of MSR values
+ *
+ */
+void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs)
+{
+	struct msr_info rv;
+	int this_cpu;
+
+	memset(&rv, 0, sizeof(rv));
+
+	rv.off    = cpumask_first(mask);
+	rv.msrs	  = msrs;
+	rv.msr_no = msr_no;
+
+	preempt_disable();
+	/*
+	 * FIXME: handle the CPU we're executing on separately for now until
+	 * smp_call_function_many has been fixed to not skip it.
+	 */
+	this_cpu = raw_smp_processor_id();
+	smp_call_function_single(this_cpu, __rdmsr_on_cpu, &rv, 1);
+
+	smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1);
+	preempt_enable();
+}
+EXPORT_SYMBOL(rdmsr_on_cpus);
+
+/*
+ * wrmsr on a bunch of CPUs
+ *
+ * @mask:       which CPUs
+ * @msr_no:     which MSR
+ * @msrs:       array of MSR values
+ *
+ */
+void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs)
+{
+	struct msr_info rv;
+	int this_cpu;
+
+	memset(&rv, 0, sizeof(rv));
+
+	rv.off    = cpumask_first(mask);
+	rv.msrs   = msrs;
+	rv.msr_no = msr_no;
+
+	preempt_disable();
+	/*
+	 * FIXME: handle the CPU we're executing on separately for now until
+	 * smp_call_function_many has been fixed to not skip it.
+	 */
+	this_cpu = raw_smp_processor_id();
+	smp_call_function_single(this_cpu, __wrmsr_on_cpu, &rv, 1);
+
+	smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1);
+	preempt_enable();
+}
+EXPORT_SYMBOL(wrmsr_on_cpus);
+
+/* These "safe" variants are slower and should be used when the target MSR
+   may not actually exist. */
+static void __rdmsr_safe_on_cpu(void *info)
+{
+	struct msr_info *rv = info;
+
+	rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h);
+}
+
+static void __wrmsr_safe_on_cpu(void *info)
+{
+	struct msr_info *rv = info;
+
+	rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h);
+}
+
+int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+{
+	int err;
+	struct msr_info rv;
+
+	memset(&rv, 0, sizeof(rv));
+
+	rv.msr_no = msr_no;
+	err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
+	*l = rv.reg.l;
+	*h = rv.reg.h;
+
+	return err ? err : rv.err;
+}
+EXPORT_SYMBOL(rdmsr_safe_on_cpu);
+
+int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+	int err;
+	struct msr_info rv;
+
+	memset(&rv, 0, sizeof(rv));
+
+	rv.msr_no = msr_no;
+	rv.reg.l = l;
+	rv.reg.h = h;
+	err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
+
+	return err ? err : rv.err;
+}
+EXPORT_SYMBOL(wrmsr_safe_on_cpu);
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index e7277cb..a725b7f 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -161,13 +161,14 @@
 		   st->current_address >= st->marker[1].start_address) {
 		const char *unit = units;
 		unsigned long delta;
+		int width = sizeof(unsigned long) * 2;
 
 		/*
 		 * Now print the actual finished series
 		 */
-		seq_printf(m, "0x%p-0x%p   ",
-			   (void *)st->start_address,
-			   (void *)st->current_address);
+		seq_printf(m, "0x%0*lx-0x%0*lx   ",
+			   width, st->start_address,
+			   width, st->current_address);
 
 		delta = (st->current_address - st->start_address) >> 10;
 		while (!(delta & 1023) && unit[1]) {
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a03b727..c6acc63 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -3,40 +3,17 @@
  *  Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
  *  Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
  */
-#include <linux/interrupt.h>
-#include <linux/mmiotrace.h>
-#include <linux/bootmem.h>
-#include <linux/compiler.h>
-#include <linux/highmem.h>
-#include <linux/kprobes.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-#include <linux/vt_kern.h>
-#include <linux/signal.h>
-#include <linux/kernel.h>
-#include <linux/ptrace.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-#include <linux/errno.h>
-#include <linux/magic.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/mman.h>
-#include <linux/tty.h>
-#include <linux/smp.h>
-#include <linux/mm.h>
+#include <linux/magic.h>		/* STACK_END_MAGIC		*/
+#include <linux/sched.h>		/* test_thread_flag(), ...	*/
+#include <linux/kdebug.h>		/* oops_begin/end, ...		*/
+#include <linux/module.h>		/* search_exception_table	*/
+#include <linux/bootmem.h>		/* max_low_pfn			*/
+#include <linux/kprobes.h>		/* __kprobes, ...		*/
+#include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
+#include <linux/perf_counter.h>		/* perf_swcounter_event		*/
 
-#include <asm-generic/sections.h>
-
-#include <asm/tlbflush.h>
-#include <asm/pgalloc.h>
-#include <asm/segment.h>
-#include <asm/system.h>
-#include <asm/proto.h>
-#include <asm/traps.h>
-#include <asm/desc.h>
+#include <asm/traps.h>			/* dotraplinkage, ...		*/
+#include <asm/pgalloc.h>		/* pgd_*(), ...			*/
 
 /*
  * Page fault error code bits:
@@ -225,12 +202,10 @@
 	if (!pmd_present(*pmd_k))
 		return NULL;
 
-	if (!pmd_present(*pmd)) {
+	if (!pmd_present(*pmd))
 		set_pmd(pmd, *pmd_k);
-		arch_flush_lazy_mmu_mode();
-	} else {
+	else
 		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
-	}
 
 	return pmd_k;
 }
@@ -538,8 +513,6 @@
 static int is_errata93(struct pt_regs *regs, unsigned long address)
 {
 #ifdef CONFIG_X86_64
-	static int once;
-
 	if (address != regs->ip)
 		return 0;
 
@@ -549,10 +522,7 @@
 	address |= 0xffffffffUL << 32;
 	if ((address >= (u64)_stext && address <= (u64)_etext) ||
 	    (address >= MODULES_VADDR && address <= MODULES_END)) {
-		if (!once) {
-			printk(errata93_warning);
-			once = 1;
-		}
+		printk_once(errata93_warning);
 		regs->ip = address;
 		return 1;
 	}
@@ -1044,6 +1014,8 @@
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
+	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+
 	/*
 	 * If we're in an interrupt, have no user context or are running
 	 * in an atomic region then we must not take the fault:
@@ -1137,10 +1109,15 @@
 		return;
 	}
 
-	if (fault & VM_FAULT_MAJOR)
+	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-	else
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+				     regs, address);
+	} else {
 		tsk->min_flt++;
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+				     regs, address);
+	}
 
 	check_v8086_mode(regs, address, tsk);
 
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 8126e8d..58f621e 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -44,7 +44,6 @@
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	BUG_ON(!pte_none(*(kmap_pte-idx)));
 	set_pte(kmap_pte-idx, mk_pte(page, prot));
-	arch_flush_lazy_mmu_mode();
 
 	return (void *)vaddr;
 }
@@ -74,7 +73,6 @@
 #endif
 	}
 
-	arch_flush_lazy_mmu_mode();
 	pagefault_enable();
 }
 
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index ae4f7b5..34c1bfb 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1,3 +1,4 @@
+#include <linux/initrd.h>
 #include <linux/ioport.h>
 #include <linux/swap.h>
 
@@ -10,6 +11,9 @@
 #include <asm/setup.h>
 #include <asm/system.h>
 #include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 unsigned long __initdata e820_table_start;
 unsigned long __meminitdata e820_table_end;
@@ -23,6 +27,69 @@
 #endif
 ;
 
+int nx_enabled;
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+static int disable_nx __cpuinitdata;
+
+/*
+ * noexec = on|off
+ *
+ * Control non-executable mappings for processes.
+ *
+ * on      Enable
+ * off     Disable
+ */
+static int __init noexec_setup(char *str)
+{
+	if (!str)
+		return -EINVAL;
+	if (!strncmp(str, "on", 2)) {
+		__supported_pte_mask |= _PAGE_NX;
+		disable_nx = 0;
+	} else if (!strncmp(str, "off", 3)) {
+		disable_nx = 1;
+		__supported_pte_mask &= ~_PAGE_NX;
+	}
+	return 0;
+}
+early_param("noexec", noexec_setup);
+#endif
+
+#ifdef CONFIG_X86_PAE
+static void __init set_nx(void)
+{
+	unsigned int v[4], l, h;
+
+	if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
+		cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
+
+		if ((v[3] & (1 << 20)) && !disable_nx) {
+			rdmsr(MSR_EFER, l, h);
+			l |= EFER_NX;
+			wrmsr(MSR_EFER, l, h);
+			nx_enabled = 1;
+			__supported_pte_mask |= _PAGE_NX;
+		}
+	}
+}
+#else
+static inline void set_nx(void)
+{
+}
+#endif
+
+#ifdef CONFIG_X86_64
+void __cpuinit check_efer(void)
+{
+	unsigned long efer;
+
+	rdmsrl(MSR_EFER, efer);
+	if (!(efer & EFER_NX) || disable_nx)
+		__supported_pte_mask &= ~_PAGE_NX;
+}
+#endif
+
 static void __init find_early_table_space(unsigned long end, int use_pse,
 					  int use_gbpages)
 {
@@ -66,12 +133,11 @@
 	 */
 #ifdef CONFIG_X86_32
 	start = 0x7000;
+#else
+	start = 0x8000;
+#endif
 	e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
 					tables, PAGE_SIZE);
-#else /* CONFIG_X86_64 */
-	start = 0x8000;
-	e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE);
-#endif
 	if (e820_table_start == -1UL)
 		panic("Cannot find space for the kernel page tables");
 
@@ -159,12 +225,9 @@
 	use_gbpages = direct_gbpages;
 #endif
 
-#ifdef CONFIG_X86_32
-#ifdef CONFIG_X86_PAE
 	set_nx();
 	if (nx_enabled)
 		printk(KERN_INFO "NX (Execute Disable) protection: active\n");
-#endif
 
 	/* Enable PSE if available */
 	if (cpu_has_pse)
@@ -175,7 +238,6 @@
 		set_in_cr4(X86_CR4_PGE);
 		__supported_pte_mask |= _PAGE_GLOBAL;
 	}
-#endif
 
 	if (use_gbpages)
 		page_size_mask |= 1 << PG_LEVEL_1G;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 749559e..9ff3c08 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -49,12 +49,9 @@
 #include <asm/paravirt.h>
 #include <asm/setup.h>
 #include <asm/cacheflush.h>
+#include <asm/page_types.h>
 #include <asm/init.h>
 
-unsigned long max_low_pfn_mapped;
-unsigned long max_pfn_mapped;
-
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 unsigned long highstart_pfn, highend_pfn;
 
 static noinline int do_test_wp_bit(void);
@@ -567,7 +564,7 @@
 }
 #endif /* !CONFIG_ACPI_SLEEP */
 
-void zap_low_mappings(void)
+void zap_low_mappings(bool early)
 {
 	int i;
 
@@ -584,64 +581,16 @@
 		set_pgd(swapper_pg_dir+i, __pgd(0));
 #endif
 	}
-	flush_tlb_all();
-}
 
-int nx_enabled;
+	if (early)
+		__flush_tlb();
+	else
+		flush_tlb_all();
+}
 
 pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP);
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
-#ifdef CONFIG_X86_PAE
-
-static int disable_nx __initdata;
-
-/*
- * noexec = on|off
- *
- * Control non executable mappings.
- *
- * on      Enable
- * off     Disable
- */
-static int __init noexec_setup(char *str)
-{
-	if (!str || !strcmp(str, "on")) {
-		if (cpu_has_nx) {
-			__supported_pte_mask |= _PAGE_NX;
-			disable_nx = 0;
-		}
-	} else {
-		if (!strcmp(str, "off")) {
-			disable_nx = 1;
-			__supported_pte_mask &= ~_PAGE_NX;
-		} else {
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-early_param("noexec", noexec_setup);
-
-void __init set_nx(void)
-{
-	unsigned int v[4], l, h;
-
-	if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
-		cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
-
-		if ((v[3] & (1 << 20)) && !disable_nx) {
-			rdmsr(MSR_EFER, l, h);
-			l |= EFER_NX;
-			wrmsr(MSR_EFER, l, h);
-			nx_enabled = 1;
-			__supported_pte_mask |= _PAGE_NX;
-		}
-	}
-}
-#endif
-
 /* user-defined highmem size */
 static unsigned int highmem_pages = -1;
 
@@ -761,15 +710,15 @@
 	highstart_pfn = highend_pfn = max_pfn;
 	if (max_pfn > max_low_pfn)
 		highstart_pfn = max_low_pfn;
-	memory_present(0, 0, highend_pfn);
 	e820_register_active_regions(0, 0, highend_pfn);
+	sparse_memory_present_with_active_regions(0);
 	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 		pages_to_mb(highend_pfn - highstart_pfn));
 	num_physpages = highend_pfn;
 	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 #else
-	memory_present(0, 0, max_low_pfn);
 	e820_register_active_regions(0, 0, max_low_pfn);
+	sparse_memory_present_with_active_regions(0);
 	num_physpages = max_low_pfn;
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
@@ -1011,7 +960,7 @@
 		test_wp_bit();
 
 	save_pg_dir();
-	zap_low_mappings();
+	zap_low_mappings(true);
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1753e80..52bb951 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -50,18 +50,8 @@
 #include <asm/cacheflush.h>
 #include <asm/init.h>
 
-/*
- * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
- * The direct mapping extends to max_pfn_mapped, so that we can directly access
- * apertures, ACPI and other tables without having to play with fixmaps.
- */
-unsigned long max_low_pfn_mapped;
-unsigned long max_pfn_mapped;
-
 static unsigned long dma_reserve __initdata;
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 static int __init parse_direct_gbpages_off(char *arg)
 {
 	direct_gbpages = 0;
@@ -85,39 +75,6 @@
 pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
-static int disable_nx __cpuinitdata;
-
-/*
- * noexec=on|off
- * Control non-executable mappings for 64-bit processes.
- *
- * on	Enable (default)
- * off	Disable
- */
-static int __init nonx_setup(char *str)
-{
-	if (!str)
-		return -EINVAL;
-	if (!strncmp(str, "on", 2)) {
-		__supported_pte_mask |= _PAGE_NX;
-		disable_nx = 0;
-	} else if (!strncmp(str, "off", 3)) {
-		disable_nx = 1;
-		__supported_pte_mask &= ~_PAGE_NX;
-	}
-	return 0;
-}
-early_param("noexec", nonx_setup);
-
-void __cpuinit check_efer(void)
-{
-	unsigned long efer;
-
-	rdmsrl(MSR_EFER, efer);
-	if (!(efer & EFER_NX) || disable_nx)
-		__supported_pte_mask &= ~_PAGE_NX;
-}
-
 int force_personality32;
 
 /*
@@ -628,6 +585,7 @@
 	early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
 	reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
 }
+#endif
 
 void __init paging_init(void)
 {
@@ -638,11 +596,10 @@
 	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
 	max_zone_pfns[ZONE_NORMAL] = max_pfn;
 
-	memory_present(0, 0, max_pfn);
+	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
 	free_area_init_nodes(max_zone_pfns);
 }
-#endif
 
 /*
  * Memory hotplug specific functions
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 8056545..fe6f84c 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -82,7 +82,6 @@
 	if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
 		kpte_clear_flush(kmap_pte-idx, vaddr);
 
-	arch_flush_lazy_mmu_mode();
 	pagefault_enable();
 }
 EXPORT_SYMBOL_GPL(iounmap_atomic);
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 50dc802..16ccbd7 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -32,7 +32,7 @@
 	struct list_head list;
 	struct kmmio_fault_page *release_next;
 	unsigned long page; /* location of the fault page */
-	bool old_presence; /* page presence prior to arming */
+	pteval_t old_presence; /* page presence prior to arming */
 	bool armed;
 
 	/*
@@ -97,60 +97,62 @@
 static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
 {
 	struct list_head *head;
-	struct kmmio_fault_page *p;
+	struct kmmio_fault_page *f;
 
 	page &= PAGE_MASK;
 	head = kmmio_page_list(page);
-	list_for_each_entry_rcu(p, head, list) {
-		if (p->page == page)
-			return p;
+	list_for_each_entry_rcu(f, head, list) {
+		if (f->page == page)
+			return f;
 	}
 	return NULL;
 }
 
-static void set_pmd_presence(pmd_t *pmd, bool present, bool *old)
+static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
 {
 	pmdval_t v = pmd_val(*pmd);
-	*old = !!(v & _PAGE_PRESENT);
-	v &= ~_PAGE_PRESENT;
-	if (present)
-		v |= _PAGE_PRESENT;
+	if (clear) {
+		*old = v & _PAGE_PRESENT;
+		v &= ~_PAGE_PRESENT;
+	} else	/* presume this has been called with clear==true previously */
+		v |= *old;
 	set_pmd(pmd, __pmd(v));
 }
 
-static void set_pte_presence(pte_t *pte, bool present, bool *old)
+static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
 {
 	pteval_t v = pte_val(*pte);
-	*old = !!(v & _PAGE_PRESENT);
-	v &= ~_PAGE_PRESENT;
-	if (present)
-		v |= _PAGE_PRESENT;
+	if (clear) {
+		*old = v & _PAGE_PRESENT;
+		v &= ~_PAGE_PRESENT;
+	} else	/* presume this has been called with clear==true previously */
+		v |= *old;
 	set_pte_atomic(pte, __pte(v));
 }
 
-static int set_page_presence(unsigned long addr, bool present, bool *old)
+static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
 {
 	unsigned int level;
-	pte_t *pte = lookup_address(addr, &level);
+	pte_t *pte = lookup_address(f->page, &level);
 
 	if (!pte) {
-		pr_err("kmmio: no pte for page 0x%08lx\n", addr);
+		pr_err("kmmio: no pte for page 0x%08lx\n", f->page);
 		return -1;
 	}
 
 	switch (level) {
 	case PG_LEVEL_2M:
-		set_pmd_presence((pmd_t *)pte, present, old);
+		clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence);
 		break;
 	case PG_LEVEL_4K:
-		set_pte_presence(pte, present, old);
+		clear_pte_presence(pte, clear, &f->old_presence);
 		break;
 	default:
 		pr_err("kmmio: unexpected page level 0x%x.\n", level);
 		return -1;
 	}
 
-	__flush_tlb_one(addr);
+	__flush_tlb_one(f->page);
 	return 0;
 }
 
@@ -171,9 +173,9 @@
 	WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
 	if (f->armed) {
 		pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
-					f->page, f->count, f->old_presence);
+					f->page, f->count, !!f->old_presence);
 	}
-	ret = set_page_presence(f->page, false, &f->old_presence);
+	ret = clear_page_presence(f, true);
 	WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
 	f->armed = true;
 	return ret;
@@ -182,8 +184,7 @@
 /** Restore the given page to saved presence state. */
 static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
 {
-	bool tmp;
-	int ret = set_page_presence(f->page, f->old_presence, &tmp);
+	int ret = clear_page_presence(f, false);
 	WARN_ONCE(ret < 0,
 			KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
 	f->armed = false;
@@ -310,7 +311,12 @@
 	struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
 
 	if (!ctx->active) {
-		pr_debug("kmmio: spurious debug trap on CPU %d.\n",
+		/*
+		 * debug traps without an active context are due to either
+		 * something external causing them (f.e. using a debugger while
+		 * mmio tracing enabled), or erroneous behaviour
+		 */
+		pr_warning("kmmio: unexpected debug trap on CPU %d.\n",
 							smp_processor_id());
 		goto out;
 	}
@@ -439,12 +445,12 @@
 						head,
 						struct kmmio_delayed_release,
 						rcu);
-	struct kmmio_fault_page *p = dr->release_list;
-	while (p) {
-		struct kmmio_fault_page *next = p->release_next;
-		BUG_ON(p->count);
-		kfree(p);
-		p = next;
+	struct kmmio_fault_page *f = dr->release_list;
+	while (f) {
+		struct kmmio_fault_page *next = f->release_next;
+		BUG_ON(f->count);
+		kfree(f);
+		f = next;
 	}
 	kfree(dr);
 }
@@ -453,19 +459,19 @@
 {
 	struct kmmio_delayed_release *dr =
 		container_of(head, struct kmmio_delayed_release, rcu);
-	struct kmmio_fault_page *p = dr->release_list;
+	struct kmmio_fault_page *f = dr->release_list;
 	struct kmmio_fault_page **prevp = &dr->release_list;
 	unsigned long flags;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
-	while (p) {
-		if (!p->count) {
-			list_del_rcu(&p->list);
-			prevp = &p->release_next;
+	while (f) {
+		if (!f->count) {
+			list_del_rcu(&f->list);
+			prevp = &f->release_next;
 		} else {
-			*prevp = p->release_next;
+			*prevp = f->release_next;
 		}
-		p = p->release_next;
+		f = f->release_next;
 	}
 	spin_unlock_irqrestore(&kmmio_lock, flags);
 
@@ -528,8 +534,8 @@
 }
 EXPORT_SYMBOL(unregister_kmmio_probe);
 
-static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
-								void *args)
+static int
+kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
 {
 	struct die_args *arg = args;
 
@@ -544,11 +550,23 @@
 	.notifier_call = kmmio_die_notifier
 };
 
-static int __init init_kmmio(void)
+int kmmio_init(void)
 {
 	int i;
+
 	for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
 		INIT_LIST_HEAD(&kmmio_page_table[i]);
+
 	return register_die_notifier(&nb_die);
 }
-fs_initcall(init_kmmio); /* should be before device_initcall() */
+
+void kmmio_cleanup(void)
+{
+	int i;
+
+	unregister_die_notifier(&nb_die);
+	for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) {
+		WARN_ONCE(!list_empty(&kmmio_page_table[i]),
+			KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n");
+	}
+}
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 605c8be..18d244f 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -40,23 +40,22 @@
 
 static void __init memtest(u64 pattern, u64 start_phys, u64 size)
 {
-	u64 i, count;
-	u64 *start;
+	u64 *p, *start, *end;
 	u64 start_bad, last_bad;
 	u64 start_phys_aligned;
-	size_t incr;
+	const size_t incr = sizeof(pattern);
 
-	incr = sizeof(pattern);
 	start_phys_aligned = ALIGN(start_phys, incr);
-	count = (size - (start_phys_aligned - start_phys))/incr;
 	start = __va(start_phys_aligned);
+	end = start + (size - (start_phys_aligned - start_phys)) / incr;
 	start_bad = 0;
 	last_bad = 0;
 
-	for (i = 0; i < count; i++)
-		start[i] = pattern;
-	for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
-		if (*start == pattern)
+	for (p = start; p < end; p++)
+		*p = pattern;
+
+	for (p = start; p < end; p++, start_phys_aligned += incr) {
+		if (*p == pattern)
 			continue;
 		if (start_phys_aligned == last_bad + incr) {
 			last_bad += incr;
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index c9342ed..132772a 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -451,6 +451,7 @@
 
 	if (nommiotrace)
 		pr_info(NAME "MMIO tracing disabled.\n");
+	kmmio_init();
 	enter_uniprocessor();
 	spin_lock_irq(&trace_lock);
 	atomic_inc(&mmiotrace_enabled);
@@ -473,6 +474,7 @@
 
 	clear_trace_list(); /* guarantees: no more kmmio callbacks */
 	leave_uniprocessor();
+	kmmio_cleanup();
 	pr_info(NAME "disabled.\n");
 out:
 	mutex_unlock(&mmiotrace_mutex);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 2d05a12..459913b 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -179,18 +179,25 @@
 }
 
 /* Initialize bootmem allocator for a node */
-void __init setup_node_bootmem(int nodeid, unsigned long start,
-			       unsigned long end)
+void __init
+setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 {
 	unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
+	const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
 	unsigned long bootmap_start, nodedata_phys;
 	void *bootmap;
-	const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
 	int nid;
 
 	if (!end)
 		return;
 
+	/*
+	 * Don't confuse VM with a node that doesn't have the
+	 * minimum amount of memory:
+	 */
+	if (end && (end - start) < NODE_MIN_SIZE)
+		return;
+
 	start = roundup(start, ZONE_ALIGN);
 
 	printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
@@ -272,9 +279,6 @@
 		reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
 				 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
 
-#ifdef CONFIG_ACPI_NUMA
-	srat_reserve_add_area(nodeid);
-#endif
 	node_set_online(nodeid);
 }
 
@@ -578,21 +582,6 @@
 	return pages;
 }
 
-void __init paging_init(void)
-{
-	unsigned long max_zone_pfns[MAX_NR_ZONES];
-
-	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
-	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
-	max_zone_pfns[ZONE_NORMAL] = max_pfn;
-
-	sparse_memory_present_with_active_regions(MAX_NUMNODES);
-	sparse_init();
-
-	free_area_init_nodes(max_zone_pfns);
-}
-
 static __init int numa_setup(char *opt)
 {
 	if (!opt)
@@ -606,8 +595,6 @@
 #ifdef CONFIG_ACPI_NUMA
 	if (!strncmp(opt, "noacpi", 6))
 		acpi_numa = -1;
-	if (!strncmp(opt, "hotadd=", 7))
-		hotadd_percent = simple_strtoul(opt+7, NULL, 10);
 #endif
 	return 0;
 }
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e17efed..6ce9518 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -839,13 +839,6 @@
 
 	vm_unmap_aliases();
 
-	/*
-	 * If we're called with lazy mmu updates enabled, the
-	 * in-memory pte state may be stale.  Flush pending updates to
-	 * bring them up to date.
-	 */
-	arch_flush_lazy_mmu_mode();
-
 	cpa.vaddr = addr;
 	cpa.pages = pages;
 	cpa.numpages = numpages;
@@ -890,13 +883,6 @@
 	} else
 		cpa_flush_all(cache);
 
-	/*
-	 * If we've been called with lazy mmu updates enabled, then
-	 * make sure that everything gets flushed out before we
-	 * return.
-	 */
-	arch_flush_lazy_mmu_mode();
-
 out:
 	return ret;
 }
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 0176595..2dfcbf9 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -31,17 +31,11 @@
 static nodemask_t cpu_nodes_parsed __initdata;
 static struct bootnode nodes[MAX_NUMNODES] __initdata;
 static struct bootnode nodes_add[MAX_NUMNODES];
-static int found_add_area __initdata;
-int hotadd_percent __initdata = 0;
 
 static int num_node_memblks __initdata;
 static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
 static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
 
-/* Too small nodes confuse the VM badly. Usually they result
-   from BIOS bugs. */
-#define NODE_MIN_SIZE (4*1024*1024)
-
 static __init int setup_node(int pxm)
 {
 	return acpi_map_pxm_to_node(pxm);
@@ -66,9 +60,6 @@
 {
 	struct bootnode *nd = &nodes[i];
 
-	if (found_add_area)
-		return;
-
 	if (nd->start < start) {
 		nd->start = start;
 		if (nd->end < nd->start)
@@ -86,7 +77,6 @@
 	int i;
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
-	found_add_area = 0;
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
 		apicid_to_node[i] = NUMA_NO_NODE;
 	for (i = 0; i < MAX_NUMNODES; i++)
@@ -182,24 +172,21 @@
 	       pxm, apic_id, node);
 }
 
-static int update_end_of_memory(unsigned long end) {return -1;}
-static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 static inline int save_add_info(void) {return 1;}
 #else
 static inline int save_add_info(void) {return 0;}
 #endif
 /*
- * Update nodes_add and decide if to include add are in the zone.
- * Both SPARSE and RESERVE need nodes_add information.
- * This code supports one contiguous hot add area per node.
+ * Update nodes_add[]
+ * This code supports one contiguous hot add area per node
  */
-static int __init
-reserve_hotadd(int node, unsigned long start, unsigned long end)
+static void __init
+update_nodes_add(int node, unsigned long start, unsigned long end)
 {
 	unsigned long s_pfn = start >> PAGE_SHIFT;
 	unsigned long e_pfn = end >> PAGE_SHIFT;
-	int ret = 0, changed = 0;
+	int changed = 0;
 	struct bootnode *nd = &nodes_add[node];
 
 	/* I had some trouble with strange memory hotadd regions breaking
@@ -210,7 +197,7 @@
 	   mistakes */
 	if ((signed long)(end - start) < NODE_MIN_SIZE) {
 		printk(KERN_ERR "SRAT: Hotplug area too small\n");
-		return -1;
+		return;
 	}
 
 	/* This check might be a bit too strict, but I'm keeping it for now. */
@@ -218,12 +205,7 @@
 		printk(KERN_ERR
 			"SRAT: Hotplug area %lu -> %lu has existing memory\n",
 			s_pfn, e_pfn);
-		return -1;
-	}
-
-	if (!hotadd_enough_memory(&nodes_add[node]))  {
-		printk(KERN_ERR "SRAT: Hotplug area too large\n");
-		return -1;
+		return;
 	}
 
 	/* Looks good */
@@ -245,11 +227,9 @@
 			printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
 	}
 
-	ret = update_end_of_memory(nd->end);
-
 	if (changed)
-	 	printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
-	return ret;
+		printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
+				 nd->start, nd->end);
 }
 
 /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
@@ -310,13 +290,10 @@
 	       start, end);
 	e820_register_active_regions(node, start >> PAGE_SHIFT,
 				     end >> PAGE_SHIFT);
-	push_node_boundaries(node, nd->start >> PAGE_SHIFT,
-						nd->end >> PAGE_SHIFT);
 
-	if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
-	    (reserve_hotadd(node, start, end) < 0)) {
-		/* Ignore hotadd region. Undo damage */
-		printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
+	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
+		update_nodes_add(node, start, end);
+		/* restore nodes[node] */
 		*nd = oldnode;
 		if ((nd->start | nd->end) == 0)
 			node_clear(node, nodes_parsed);
@@ -345,9 +322,9 @@
 			pxmram = 0;
 	}
 
-	e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
-	/* We seem to lose 3 pages somewhere. Allow a bit of slack. */
-	if ((long)(e820ram - pxmram) >= 1*1024*1024) {
+	e820ram = max_pfn - (e820_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
+	/* We seem to lose 3 pages somewhere. Allow 1M of slack. */
+	if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
 		printk(KERN_ERR
 	"SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
 			(pxmram << PAGE_SHIFT) >> 20,
@@ -357,17 +334,6 @@
 	return 1;
 }
 
-static void __init unparse_node(int node)
-{
-	int i;
-	node_clear(node, nodes_parsed);
-	node_clear(node, cpu_nodes_parsed);
-	for (i = 0; i < MAX_LOCAL_APIC; i++) {
-		if (apicid_to_node[i] == node)
-			apicid_to_node[i] = NUMA_NO_NODE;
-	}
-}
-
 void __init acpi_numa_arch_fixup(void) {}
 
 /* Use the information discovered above to actually set up the nodes. */
@@ -379,18 +345,8 @@
 		return -1;
 
 	/* First clean up the node list */
-	for (i = 0; i < MAX_NUMNODES; i++) {
+	for (i = 0; i < MAX_NUMNODES; i++)
 		cutoff_node(i, start, end);
-		/*
-		 * don't confuse VM with a node that doesn't have the
-		 * minimum memory.
-		 */
-		if (nodes[i].end &&
-			(nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
-			unparse_node(i);
-			node_set_offline(i);
-		}
-	}
 
 	if (!nodes_cover_memory(nodes)) {
 		bad_srat();
@@ -423,7 +379,7 @@
 
 		if (node == NUMA_NO_NODE)
 			continue;
-		if (!node_isset(node, node_possible_map))
+		if (!node_online(node))
 			numa_clear_node(i);
 	}
 	numa_init_array();
@@ -510,26 +466,6 @@
 }
 #endif /* CONFIG_NUMA_EMU */
 
-void __init srat_reserve_add_area(int nodeid)
-{
-	if (found_add_area && nodes_add[nodeid].end) {
-		u64 total_mb;
-
-		printk(KERN_INFO "SRAT: Reserving hot-add memory space "
-				"for node %d at %Lx-%Lx\n",
-			nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
-		total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
-					>> PAGE_SHIFT;
-		total_mb *= sizeof(struct page);
-		total_mb >>= 20;
-		printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
-				"pre-allocated memory.\n", (unsigned long long)total_mb);
-		reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
-			       nodes_add[nodeid].end - nodes_add[nodeid].start,
-			       BOOTMEM_DEFAULT);
-	}
-}
-
 int __node_distance(int a, int b)
 {
 	int index;
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 202864a..b07dd8d 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -40,8 +40,9 @@
 
 	switch (val) {
 	case DIE_NMI:
-		if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
-			ret = NOTIFY_STOP;
+	case DIE_NMI_IPI:
+		model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu));
+		ret = NOTIFY_STOP;
 		break;
 	default:
 		break;
@@ -134,7 +135,7 @@
 static struct notifier_block profile_exceptions_nb = {
 	.notifier_call = profile_exceptions_notify,
 	.next = NULL,
-	.priority = 0
+	.priority = 2
 };
 
 static int nmi_setup(void)
@@ -356,14 +357,11 @@
 #define exit_sysfs() do { } while (0)
 #endif /* CONFIG_PM */
 
-static int p4force;
-module_param(p4force, int, 0);
-
 static int __init p4_init(char **cpu_type)
 {
 	__u8 cpu_model = boot_cpu_data.x86_model;
 
-	if (!p4force && (cpu_model > 6 || cpu_model == 5))
+	if (cpu_model > 6 || cpu_model == 5)
 		return 0;
 
 #ifndef CONFIG_SMP
@@ -389,10 +387,25 @@
 	return 0;
 }
 
+static int force_arch_perfmon;
+static int force_cpu_type(const char *str, struct kernel_param *kp)
+{
+	if (!strcmp(str, "archperfmon")) {
+		force_arch_perfmon = 1;
+		printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
+	}
+
+	return 0;
+}
+module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
+
 static int __init ppro_init(char **cpu_type)
 {
 	__u8 cpu_model = boot_cpu_data.x86_model;
 
+	if (force_arch_perfmon && cpu_has_arch_perfmon)
+		return 0;
+
 	switch (cpu_model) {
 	case 0 ... 2:
 		*cpu_type = "i386/ppro";
@@ -414,6 +427,13 @@
 	case 15: case 23:
 		*cpu_type = "i386/core_2";
 		break;
+	case 26:
+		arch_perfmon_setup_counters();
+		*cpu_type = "i386/core_i7";
+		break;
+	case 28:
+		*cpu_type = "i386/atom";
+		break;
 	default:
 		/* Unknown */
 		return 0;
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 10131fb..4da7230 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -18,7 +18,7 @@
 #include <asm/msr.h>
 #include <asm/apic.h>
 #include <asm/nmi.h>
-#include <asm/intel_arch_perfmon.h>
+#include <asm/perf_counter.h>
 
 #include "op_x86_model.h"
 #include "op_counter.h"
@@ -136,6 +136,13 @@
 	u64 val;
 	int i;
 
+	/*
+	 * This can happen if perf counters are in use when
+	 * we steal the die notifier NMI.
+	 */
+	if (unlikely(!reset_value))
+		goto out;
+
 	for (i = 0 ; i < num_counters; ++i) {
 		if (!reset_value[i])
 			continue;
@@ -146,6 +153,7 @@
 		}
 	}
 
+out:
 	/* Only P6 based Pentium M need to re-unmask the apic vector but it
 	 * doesn't hurt other P6 variant */
 	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index fecbce6..0696d50 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -889,6 +889,9 @@
 		return 0;
 	}
 
+	if (io_apic_assign_pci_irqs)
+		return 0;
+
 	/* Find IRQ routing entry */
 
 	if (!pirq_table)
@@ -1039,56 +1042,15 @@
 		pirq_penalty[dev->irq]++;
 	}
 
+	if (io_apic_assign_pci_irqs)
+		return;
+
 	dev = NULL;
 	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
 		pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
 		if (!pin)
 			continue;
 
-#ifdef CONFIG_X86_IO_APIC
-		/*
-		 * Recalculate IRQ numbers if we use the I/O APIC.
-		 */
-		if (io_apic_assign_pci_irqs) {
-			int irq;
-
-			/*
-			 * interrupt pins are numbered starting from 1
-			 */
-			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
-				PCI_SLOT(dev->devfn), pin - 1);
-			/*
-			 * Busses behind bridges are typically not listed in the
-			 * MP-table.  In this case we have to look up the IRQ
-			 * based on the parent bus, parent slot, and pin number.
-			 * The SMP code detects such bridged busses itself so we
-			 * should get into this branch reliably.
-			 */
-			if (irq < 0 && dev->bus->parent) {
-				/* go back to the bridge */
-				struct pci_dev *bridge = dev->bus->self;
-				int bus;
-
-				pin = pci_swizzle_interrupt_pin(dev, pin);
-				bus = bridge->bus->number;
-				irq = IO_APIC_get_PCI_irq_vector(bus,
-						PCI_SLOT(bridge->devfn), pin - 1);
-				if (irq >= 0)
-					dev_warn(&dev->dev,
-						"using bridge %s INT %c to "
-							"get IRQ %d\n",
-						 pci_name(bridge),
-						 'A' + pin - 1, irq);
-			}
-			if (irq >= 0) {
-				dev_info(&dev->dev,
-					"PCI->APIC IRQ transform: INT %c "
-						"-> IRQ %d\n",
-					'A' + pin - 1, irq);
-				dev->irq = irq;
-			}
-		}
-#endif
 		/*
 		 * Still no IRQ? Try to lookup one...
 		 */
@@ -1183,6 +1145,19 @@
 	pcibios_enable_irq = pirq_enable_irq;
 
 	pcibios_fixup_irqs();
+
+	if (io_apic_assign_pci_irqs && pci_routeirq) {
+		struct pci_dev *dev = NULL;
+		/*
+		 * PCI IRQ routing is set up by pci_enable_device(), but we
+		 * also do it here in case there are still broken drivers that
+		 * don't use pci_enable_device().
+		 */
+		printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
+		for_each_pci_dev(dev)
+			pirq_enable_irq(dev);
+	}
+
 	return 0;
 }
 
@@ -1213,16 +1188,23 @@
 static int pirq_enable_irq(struct pci_dev *dev)
 {
 	u8 pin;
-	struct pci_dev *temp_dev;
 
 	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
-	if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
+	if (pin && !pcibios_lookup_irq(dev, 1)) {
 		char *msg = "";
 
-		if (io_apic_assign_pci_irqs) {
-			int irq;
+		if (!io_apic_assign_pci_irqs && dev->irq)
+			return 0;
 
-			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin - 1);
+		if (io_apic_assign_pci_irqs) {
+#ifdef CONFIG_X86_IO_APIC
+			struct pci_dev *temp_dev;
+			int irq;
+			struct io_apic_irq_attr irq_attr;
+
+			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
+						PCI_SLOT(dev->devfn),
+						pin - 1, &irq_attr);
 			/*
 			 * Busses behind bridges are typically not listed in the MP-table.
 			 * In this case we have to look up the IRQ based on the parent bus,
@@ -1235,7 +1217,8 @@
 
 				pin = pci_swizzle_interrupt_pin(dev, pin);
 				irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
-						PCI_SLOT(bridge->devfn), pin - 1);
+						PCI_SLOT(bridge->devfn),
+						pin - 1, &irq_attr);
 				if (irq >= 0)
 					dev_warn(&dev->dev, "using bridge %s "
 						 "INT %c to get IRQ %d\n",
@@ -1245,12 +1228,15 @@
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
+				io_apic_set_pci_routing(&dev->dev, irq,
+							 &irq_attr);
+				dev->irq = irq;
 				dev_info(&dev->dev, "PCI->APIC IRQ transform: "
 					 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
-				dev->irq = irq;
 				return 0;
 			} else
 				msg = "; probably buggy MP table";
+#endif
 		} else if (pci_probe & PCI_BIOS_IRQ_SCAN)
 			msg = "";
 		else
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 5fa10bb9..8766b0e 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -375,7 +375,7 @@
 		if (!fixmem32)
 			return AE_OK;
 		if ((mcfg_res->start >= fixmem32->address) &&
-		    (mcfg_res->end <= (fixmem32->address +
+		    (mcfg_res->end < (fixmem32->address +
 				      fixmem32->address_length))) {
 			mcfg_res->flags = 1;
 			return AE_CTRL_TERMINATE;
@@ -392,7 +392,7 @@
 		return AE_OK;
 
 	if ((mcfg_res->start >= address.minimum) &&
-	    (mcfg_res->end <= (address.minimum + address.address_length))) {
+	    (mcfg_res->end < (address.minimum + address.address_length))) {
 		mcfg_res->flags = 1;
 		return AE_CTRL_TERMINATE;
 	}
@@ -418,7 +418,7 @@
 	struct resource mcfg_res;
 
 	mcfg_res.start = start;
-	mcfg_res.end = end;
+	mcfg_res.end = end - 1;
 	mcfg_res.flags = 0;
 
 	acpi_get_devices("PNP0C01", find_mboard_resource, &mcfg_res, NULL);
diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile
index 58b32db..de2abbd 100644
--- a/arch/x86/power/Makefile
+++ b/arch/x86/power/Makefile
@@ -3,5 +3,5 @@
 nostackp := $(call cc-option, -fno-stack-protector)
 CFLAGS_cpu_$(BITS).o	:= $(nostackp)
 
-obj-$(CONFIG_PM_SLEEP)		+= cpu_$(BITS).o
+obj-$(CONFIG_PM_SLEEP)		+= cpu.o
 obj-$(CONFIG_HIBERNATION)	+= hibernate_$(BITS).o hibernate_asm_$(BITS).o
diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu.c
similarity index 62%
rename from arch/x86/power/cpu_64.c
rename to arch/x86/power/cpu.c
index 5343540..d277ef1 100644
--- a/arch/x86/power/cpu_64.c
+++ b/arch/x86/power/cpu.c
@@ -1,5 +1,5 @@
 /*
- * Suspend and hibernation support for x86-64
+ * Suspend support specific for i386/x86-64.
  *
  * Distribute under GPLv2
  *
@@ -8,18 +8,28 @@
  * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
  */
 
-#include <linux/smp.h>
 #include <linux/suspend.h>
-#include <asm/proto.h>
-#include <asm/page.h>
+#include <linux/smp.h>
+
 #include <asm/pgtable.h>
+#include <asm/proto.h>
 #include <asm/mtrr.h>
+#include <asm/page.h>
+#include <asm/mce.h>
 #include <asm/xcr.h>
 #include <asm/suspend.h>
 
-static void fix_processor_context(void);
+#ifdef CONFIG_X86_32
+static struct saved_context saved_context;
 
+unsigned long saved_context_ebx;
+unsigned long saved_context_esp, saved_context_ebp;
+unsigned long saved_context_esi, saved_context_edi;
+unsigned long saved_context_eflags;
+#else
+/* CONFIG_X86_64 */
 struct saved_context saved_context;
+#endif
 
 /**
  *	__save_processor_state - save CPU registers before creating a
@@ -38,19 +48,35 @@
  */
 static void __save_processor_state(struct saved_context *ctxt)
 {
+#ifdef CONFIG_X86_32
+	mtrr_save_fixed_ranges(NULL);
+#endif
 	kernel_fpu_begin();
 
 	/*
 	 * descriptor tables
 	 */
+#ifdef CONFIG_X86_32
+	store_gdt(&ctxt->gdt);
+	store_idt(&ctxt->idt);
+#else
+/* CONFIG_X86_64 */
 	store_gdt((struct desc_ptr *)&ctxt->gdt_limit);
 	store_idt((struct desc_ptr *)&ctxt->idt_limit);
+#endif
 	store_tr(ctxt->tr);
 
 	/* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
 	/*
 	 * segment registers
 	 */
+#ifdef CONFIG_X86_32
+	savesegment(es, ctxt->es);
+	savesegment(fs, ctxt->fs);
+	savesegment(gs, ctxt->gs);
+	savesegment(ss, ctxt->ss);
+#else
+/* CONFIG_X86_64 */
 	asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
 	asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
 	asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
@@ -62,30 +88,87 @@
 	rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
 	mtrr_save_fixed_ranges(NULL);
 
+	rdmsrl(MSR_EFER, ctxt->efer);
+#endif
+
 	/*
 	 * control registers
 	 */
-	rdmsrl(MSR_EFER, ctxt->efer);
 	ctxt->cr0 = read_cr0();
 	ctxt->cr2 = read_cr2();
 	ctxt->cr3 = read_cr3();
+#ifdef CONFIG_X86_32
+	ctxt->cr4 = read_cr4_safe();
+#else
+/* CONFIG_X86_64 */
 	ctxt->cr4 = read_cr4();
 	ctxt->cr8 = read_cr8();
+#endif
 }
 
+/* Needed by apm.c */
 void save_processor_state(void)
 {
 	__save_processor_state(&saved_context);
 }
+#ifdef CONFIG_X86_32
+EXPORT_SYMBOL(save_processor_state);
+#endif
 
 static void do_fpu_end(void)
 {
 	/*
-	 * Restore FPU regs if necessary
+	 * Restore FPU regs if necessary.
 	 */
 	kernel_fpu_end();
 }
 
+static void fix_processor_context(void)
+{
+	int cpu = smp_processor_id();
+	struct tss_struct *t = &per_cpu(init_tss, cpu);
+
+	set_tss_desc(cpu, t);	/*
+				 * This just modifies memory; should not be
+				 * necessary. But... This is necessary, because
+				 * 386 hardware has concept of busy TSS or some
+				 * similar stupidity.
+				 */
+
+#ifdef CONFIG_X86_64
+	get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9;
+
+	syscall_init();				/* This sets MSR_*STAR and related */
+#endif
+	load_TR_desc();				/* This does ltr */
+	load_LDT(&current->active_mm->context);	/* This does lldt */
+
+	/*
+	 * Now maybe reload the debug registers
+	 */
+	if (current->thread.debugreg7) {
+#ifdef CONFIG_X86_32
+		set_debugreg(current->thread.debugreg0, 0);
+		set_debugreg(current->thread.debugreg1, 1);
+		set_debugreg(current->thread.debugreg2, 2);
+		set_debugreg(current->thread.debugreg3, 3);
+		/* no 4 and 5 */
+		set_debugreg(current->thread.debugreg6, 6);
+		set_debugreg(current->thread.debugreg7, 7);
+#else
+		/* CONFIG_X86_64 */
+		loaddebug(&current->thread, 0);
+		loaddebug(&current->thread, 1);
+		loaddebug(&current->thread, 2);
+		loaddebug(&current->thread, 3);
+		/* no 4 and 5 */
+		loaddebug(&current->thread, 6);
+		loaddebug(&current->thread, 7);
+#endif
+	}
+
+}
+
 /**
  *	__restore_processor_state - restore the contents of CPU registers saved
  *		by __save_processor_state()
@@ -96,9 +179,16 @@
 	/*
 	 * control registers
 	 */
+	/* cr4 was introduced in the Pentium CPU */
+#ifdef CONFIG_X86_32
+	if (ctxt->cr4)
+		write_cr4(ctxt->cr4);
+#else
+/* CONFIG X86_64 */
 	wrmsrl(MSR_EFER, ctxt->efer);
 	write_cr8(ctxt->cr8);
 	write_cr4(ctxt->cr4);
+#endif
 	write_cr3(ctxt->cr3);
 	write_cr2(ctxt->cr2);
 	write_cr0(ctxt->cr0);
@@ -107,13 +197,31 @@
 	 * now restore the descriptor tables to their proper values
 	 * ltr is done i fix_processor_context().
 	 */
+#ifdef CONFIG_X86_32
+	load_gdt(&ctxt->gdt);
+	load_idt(&ctxt->idt);
+#else
+/* CONFIG_X86_64 */
 	load_gdt((const struct desc_ptr *)&ctxt->gdt_limit);
 	load_idt((const struct desc_ptr *)&ctxt->idt_limit);
-
+#endif
 
 	/*
 	 * segment registers
 	 */
+#ifdef CONFIG_X86_32
+	loadsegment(es, ctxt->es);
+	loadsegment(fs, ctxt->fs);
+	loadsegment(gs, ctxt->gs);
+	loadsegment(ss, ctxt->ss);
+
+	/*
+	 * sysenter MSRs
+	 */
+	if (boot_cpu_has(X86_FEATURE_SEP))
+		enable_sep_cpu();
+#else
+/* CONFIG_X86_64 */
 	asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
 	asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
 	asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
@@ -123,6 +231,7 @@
 	wrmsrl(MSR_FS_BASE, ctxt->fs_base);
 	wrmsrl(MSR_GS_BASE, ctxt->gs_base);
 	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+#endif
 
 	/*
 	 * restore XCR0 for xsave capable cpu's.
@@ -134,41 +243,17 @@
 
 	do_fpu_end();
 	mtrr_ap_init();
+
+#ifdef CONFIG_X86_32
+	mcheck_init(&boot_cpu_data);
+#endif
 }
 
+/* Needed by apm.c */
 void restore_processor_state(void)
 {
 	__restore_processor_state(&saved_context);
 }
-
-static void fix_processor_context(void)
-{
-	int cpu = smp_processor_id();
-	struct tss_struct *t = &per_cpu(init_tss, cpu);
-
-	/*
-	 * This just modifies memory; should not be necessary. But... This
-	 * is necessary, because 386 hardware has concept of busy TSS or some
-	 * similar stupidity.
-	 */
-	set_tss_desc(cpu, t);
-
-	get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9;
-
-	syscall_init();                         /* This sets MSR_*STAR and related */
-	load_TR_desc();				/* This does ltr */
-	load_LDT(&current->active_mm->context);	/* This does lldt */
-
-	/*
-	 * Now maybe reload the debug registers
-	 */
-	if (current->thread.debugreg7){
-                loaddebug(&current->thread, 0);
-                loaddebug(&current->thread, 1);
-                loaddebug(&current->thread, 2);
-                loaddebug(&current->thread, 3);
-                /* no 4 and 5 */
-                loaddebug(&current->thread, 6);
-                loaddebug(&current->thread, 7);
-	}
-}
+#ifdef CONFIG_X86_32
+EXPORT_SYMBOL(restore_processor_state);
+#endif
diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c
deleted file mode 100644
index ce702c5..0000000
--- a/arch/x86/power/cpu_32.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Suspend support specific for i386.
- *
- * Distribute under GPLv2
- *
- * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
- * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
- */
-
-#include <linux/module.h>
-#include <linux/suspend.h>
-#include <asm/mtrr.h>
-#include <asm/mce.h>
-#include <asm/xcr.h>
-#include <asm/suspend.h>
-
-static struct saved_context saved_context;
-
-unsigned long saved_context_ebx;
-unsigned long saved_context_esp, saved_context_ebp;
-unsigned long saved_context_esi, saved_context_edi;
-unsigned long saved_context_eflags;
-
-static void __save_processor_state(struct saved_context *ctxt)
-{
-	mtrr_save_fixed_ranges(NULL);
-	kernel_fpu_begin();
-
-	/*
-	 * descriptor tables
-	 */
-	store_gdt(&ctxt->gdt);
-	store_idt(&ctxt->idt);
-	store_tr(ctxt->tr);
-
-	/*
-	 * segment registers
-	 */
-	savesegment(es, ctxt->es);
-	savesegment(fs, ctxt->fs);
-	savesegment(gs, ctxt->gs);
-	savesegment(ss, ctxt->ss);
-
-	/*
-	 * control registers
-	 */
-	ctxt->cr0 = read_cr0();
-	ctxt->cr2 = read_cr2();
-	ctxt->cr3 = read_cr3();
-	ctxt->cr4 = read_cr4_safe();
-}
-
-/* Needed by apm.c */
-void save_processor_state(void)
-{
-	__save_processor_state(&saved_context);
-}
-EXPORT_SYMBOL(save_processor_state);
-
-static void do_fpu_end(void)
-{
-	/*
-	 * Restore FPU regs if necessary.
-	 */
-	kernel_fpu_end();
-}
-
-static void fix_processor_context(void)
-{
-	int cpu = smp_processor_id();
-	struct tss_struct *t = &per_cpu(init_tss, cpu);
-
-	set_tss_desc(cpu, t);	/*
-				 * This just modifies memory; should not be
-				 * necessary. But... This is necessary, because
-				 * 386 hardware has concept of busy TSS or some
-				 * similar stupidity.
-				 */
-
-	load_TR_desc();				/* This does ltr */
-	load_LDT(&current->active_mm->context);	/* This does lldt */
-
-	/*
-	 * Now maybe reload the debug registers
-	 */
-	if (current->thread.debugreg7) {
-		set_debugreg(current->thread.debugreg0, 0);
-		set_debugreg(current->thread.debugreg1, 1);
-		set_debugreg(current->thread.debugreg2, 2);
-		set_debugreg(current->thread.debugreg3, 3);
-		/* no 4 and 5 */
-		set_debugreg(current->thread.debugreg6, 6);
-		set_debugreg(current->thread.debugreg7, 7);
-	}
-
-}
-
-static void __restore_processor_state(struct saved_context *ctxt)
-{
-	/*
-	 * control registers
-	 */
-	/* cr4 was introduced in the Pentium CPU */
-	if (ctxt->cr4)
-		write_cr4(ctxt->cr4);
-	write_cr3(ctxt->cr3);
-	write_cr2(ctxt->cr2);
-	write_cr0(ctxt->cr0);
-
-	/*
-	 * now restore the descriptor tables to their proper values
-	 * ltr is done i fix_processor_context().
-	 */
-	load_gdt(&ctxt->gdt);
-	load_idt(&ctxt->idt);
-
-	/*
-	 * segment registers
-	 */
-	loadsegment(es, ctxt->es);
-	loadsegment(fs, ctxt->fs);
-	loadsegment(gs, ctxt->gs);
-	loadsegment(ss, ctxt->ss);
-
-	/*
-	 * sysenter MSRs
-	 */
-	if (boot_cpu_has(X86_FEATURE_SEP))
-		enable_sep_cpu();
-
-	/*
-	 * restore XCR0 for xsave capable cpu's.
-	 */
-	if (cpu_has_xsave)
-		xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
-
-	fix_processor_context();
-	do_fpu_end();
-	mtrr_ap_init();
-	mcheck_init(&boot_cpu_data);
-}
-
-/* Needed by apm.c */
-void restore_processor_state(void)
-{
-	__restore_processor_state(&saved_context);
-}
-EXPORT_SYMBOL(restore_processor_state);
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 1241f11..58bc00f 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -338,6 +338,8 @@
 		}
 	}
 
+	current->mm->context.vdso = (void *)addr;
+
 	if (compat_uses_vma || !compat) {
 		/*
 		 * MAYWRITE to allow gdb to COW and set breakpoints
@@ -358,11 +360,13 @@
 			goto up_fail;
 	}
 
-	current->mm->context.vdso = (void *)addr;
 	current_thread_info()->sysenter_return =
 		VDSO32_SYMBOL(addr, SYSENTER_RETURN);
 
   up_fail:
+	if (ret)
+		current->mm->context.vdso = NULL;
+
 	up_write(&mm->mmap_sem);
 
 	return ret;
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 7133cdf..21e1aeb 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/random.h>
+#include <linux/elf.h>
 #include <asm/vsyscall.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
@@ -115,15 +116,18 @@
 		goto up_fail;
 	}
 
+	current->mm->context.vdso = (void *)addr;
+
 	ret = install_special_mapping(mm, addr, vdso_size,
 				      VM_READ|VM_EXEC|
 				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
 				      VM_ALWAYSDUMP,
 				      vdso_pages);
-	if (ret)
+	if (ret) {
+		current->mm->context.vdso = NULL;
 		goto up_fail;
+	}
 
-	current->mm->context.vdso = (void *)addr;
 up_fail:
 	up_write(&mm->mmap_sem);
 	return ret;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f09e8c3..0a1700a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -20,6 +20,7 @@
 #include <linux/delay.h>
 #include <linux/start_kernel.h>
 #include <linux/sched.h>
+#include <linux/kprobes.h>
 #include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/mm.h>
@@ -44,6 +45,7 @@
 #include <asm/processor.h>
 #include <asm/proto.h>
 #include <asm/msr-index.h>
+#include <asm/traps.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
@@ -240,10 +242,10 @@
 	return HYPERVISOR_get_debugreg(reg);
 }
 
-void xen_leave_lazy(void)
+static void xen_end_context_switch(struct task_struct *next)
 {
-	paravirt_leave_lazy(paravirt_get_lazy_mode());
 	xen_mc_flush();
+	paravirt_end_context_switch(next);
 }
 
 static unsigned long xen_store_tr(void)
@@ -428,11 +430,44 @@
 static int cvt_gate_to_trap(int vector, const gate_desc *val,
 			    struct trap_info *info)
 {
+	unsigned long addr;
+
 	if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
 		return 0;
 
 	info->vector = vector;
-	info->address = gate_offset(*val);
+
+	addr = gate_offset(*val);
+#ifdef CONFIG_X86_64
+	/*
+	 * Look for known traps using IST, and substitute them
+	 * appropriately.  The debugger ones are the only ones we care
+	 * about.  Xen will handle faults like double_fault and
+	 * machine_check, so we should never see them.  Warn if
+	 * there's an unexpected IST-using fault handler.
+	 */
+	if (addr == (unsigned long)debug)
+		addr = (unsigned long)xen_debug;
+	else if (addr == (unsigned long)int3)
+		addr = (unsigned long)xen_int3;
+	else if (addr == (unsigned long)stack_segment)
+		addr = (unsigned long)xen_stack_segment;
+	else if (addr == (unsigned long)double_fault ||
+		 addr == (unsigned long)nmi) {
+		/* Don't need to handle these */
+		return 0;
+#ifdef CONFIG_X86_MCE
+	} else if (addr == (unsigned long)machine_check) {
+		return 0;
+#endif
+	} else {
+		/* Some other trap using IST? */
+		if (WARN_ON(val->ist != 0))
+			return 0;
+	}
+#endif	/* CONFIG_X86_64 */
+	info->address = addr;
+
 	info->cs = gate_segment(*val);
 	info->flags = val->dpl;
 	/* interrupt gates clear IF */
@@ -623,10 +658,26 @@
 	xen_mc_issue(PARAVIRT_LAZY_CPU);
 }
 
+static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
+
+static unsigned long xen_read_cr0(void)
+{
+	unsigned long cr0 = percpu_read(xen_cr0_value);
+
+	if (unlikely(cr0 == 0)) {
+		cr0 = native_read_cr0();
+		percpu_write(xen_cr0_value, cr0);
+	}
+
+	return cr0;
+}
+
 static void xen_write_cr0(unsigned long cr0)
 {
 	struct multicall_space mcs;
 
+	percpu_write(xen_cr0_value, cr0);
+
 	/* Only pay attention to cr0.TS; everything else is
 	   ignored. */
 	mcs = xen_mc_entry(0);
@@ -812,7 +863,7 @@
 
 	.clts = xen_clts,
 
-	.read_cr0 = native_read_cr0,
+	.read_cr0 = xen_read_cr0,
 	.write_cr0 = xen_write_cr0,
 
 	.read_cr4 = native_read_cr4,
@@ -860,10 +911,8 @@
 	/* Xen takes care of %gs when switching to usermode for us */
 	.swapgs = paravirt_nop,
 
-	.lazy_mode = {
-		.enter = paravirt_enter_lazy_cpu,
-		.leave = xen_leave_lazy,
-	},
+	.start_context_switch = paravirt_start_context_switch,
+	.end_context_switch = xen_end_context_switch,
 };
 
 static const struct pv_apic_ops xen_apic_ops __initdata = {
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index fba55b1..4ceb285 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -452,10 +452,6 @@
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval)
 {
-	/* updates to init_mm may be done without lock */
-	if (mm == &init_mm)
-		preempt_disable();
-
 	ADD_STATS(set_pte_at, 1);
 //	ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
 	ADD_STATS(set_pte_at_current, mm == current->mm);
@@ -476,9 +472,7 @@
 	}
 	xen_set_pte(ptep, pteval);
 
-out:
-	if (mm == &init_mm)
-		preempt_enable();
+out:	return;
 }
 
 pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
@@ -1152,10 +1146,8 @@
 
 	/* If this cpu still has a stale cr3 reference, then make sure
 	   it has been flushed. */
-	if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) {
+	if (percpu_read(xen_current_cr3) == __pa(mm->pgd))
 		load_cr3(swapper_pg_dir);
-		arch_flush_lazy_cpu_mode();
-	}
 }
 
 static void xen_drop_mm_ref(struct mm_struct *mm)
@@ -1168,7 +1160,6 @@
 			load_cr3(swapper_pg_dir);
 		else
 			leave_mm(smp_processor_id());
-		arch_flush_lazy_cpu_mode();
 	}
 
 	/* Get the "official" set of cpus referring to our pagetable. */
@@ -1876,6 +1867,14 @@
 	xen_mark_init_mm_pinned();
 }
 
+static void xen_leave_lazy_mmu(void)
+{
+	preempt_disable();
+	xen_mc_flush();
+	paravirt_leave_lazy_mmu();
+	preempt_enable();
+}
+
 const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.pagetable_setup_start = xen_pagetable_setup_start,
 	.pagetable_setup_done = xen_pagetable_setup_done,
@@ -1949,7 +1948,7 @@
 
 	.lazy_mode = {
 		.enter = paravirt_enter_lazy_mmu,
-		.leave = xen_leave_lazy,
+		.leave = xen_leave_lazy_mmu,
 	},
 
 	.set_fixmap = xen_set_fixmap,
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 15c6c68..ad0047f 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -61,9 +61,9 @@
 	 *  - xen_start_info
 	 * See comment above "struct start_info" in <xen/interface/xen.h>
 	 */
-	e820_add_region(__pa(xen_start_info->mfn_list),
-			xen_start_info->pt_base - xen_start_info->mfn_list,
-			E820_RESERVED);
+	reserve_early(__pa(xen_start_info->mfn_list),
+		      __pa(xen_start_info->pt_base),
+			"XEN START INFO");
 
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index ca6596b..22494fd 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -30,7 +30,6 @@
 void xen_ident_map_ISA(void);
 void xen_reserve_top(void);
 
-void xen_leave_lazy(void);
 void xen_post_allocator_init(void);
 
 char * __init xen_memory_setup(void);
diff --git a/arch/xtensa/kernel/module.c b/arch/xtensa/kernel/module.c
index 3981a46..c1accea 100644
--- a/arch/xtensa/kernel/module.c
+++ b/arch/xtensa/kernel/module.c
@@ -34,8 +34,6 @@
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-	   table entries. */
 }
 
 int module_frob_arch_sections(Elf32_Ehdr *hdr,
diff --git a/block/Kconfig b/block/Kconfig
index e7d1278..2c39527 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -26,6 +26,7 @@
 config LBD
 	bool "Support for large block devices and files"
 	depends on !64BIT
+	default y
 	help
 	  Enable block devices or files of size 2TB and larger.
 
@@ -38,11 +39,13 @@
 
 	  The ext4 filesystem requires that this feature be enabled in
 	  order to support filesystems that have the huge_file feature
-	  enabled.    Otherwise, it will refuse to mount any filesystems
-	  that use the huge_file feature, which is enabled by default
-	  by mke2fs.ext4.   The GFS2 filesystem also requires this feature.
+	  enabled.  Otherwise, it will refuse to mount in the read-write
+	  mode any filesystems that use the huge_file feature, which is
+	  enabled by default by mke2fs.ext4.
 
-	  If unsure, say N.
+	  The GFS2 filesystem also requires this feature.
+
+	  If unsure, say Y.
 
 config BLK_DEV_BSG
 	bool "Block layer SG support v4 (EXPERIMENTAL)"
diff --git a/block/as-iosched.c b/block/as-iosched.c
index c48fa67..7a12cf6 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -306,8 +306,8 @@
 	data_dir = rq_is_sync(rq1);
 
 	last = ad->last_sector[data_dir];
-	s1 = rq1->sector;
-	s2 = rq2->sector;
+	s1 = blk_rq_pos(rq1);
+	s2 = blk_rq_pos(rq2);
 
 	BUG_ON(data_dir != rq_is_sync(rq2));
 
@@ -566,13 +566,15 @@
 			as_update_thinktime(ad, aic, thinktime);
 
 			/* Calculate read -> read seek distance */
-			if (aic->last_request_pos < rq->sector)
-				seek_dist = rq->sector - aic->last_request_pos;
+			if (aic->last_request_pos < blk_rq_pos(rq))
+				seek_dist = blk_rq_pos(rq) -
+					    aic->last_request_pos;
 			else
-				seek_dist = aic->last_request_pos - rq->sector;
+				seek_dist = aic->last_request_pos -
+					    blk_rq_pos(rq);
 			as_update_seekdist(ad, aic, seek_dist);
 		}
-		aic->last_request_pos = rq->sector + rq->nr_sectors;
+		aic->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
 		set_bit(AS_TASK_IOSTARTED, &aic->state);
 		spin_unlock(&aic->lock);
 	}
@@ -587,7 +589,7 @@
 {
 	unsigned long delay;	/* jiffies */
 	sector_t last = ad->last_sector[ad->batch_data_dir];
-	sector_t next = rq->sector;
+	sector_t next = blk_rq_pos(rq);
 	sector_t delta; /* acceptable close offset (in sectors) */
 	sector_t s;
 
@@ -981,7 +983,7 @@
 	 * This has to be set in order to be correctly updated by
 	 * as_find_next_rq
 	 */
-	ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
+	ad->last_sector[data_dir] = blk_rq_pos(rq) + blk_rq_sectors(rq);
 
 	if (data_dir == BLK_RW_SYNC) {
 		struct io_context *ioc = RQ_IOC(rq);
@@ -1312,12 +1314,8 @@
 static void as_work_handler(struct work_struct *work)
 {
 	struct as_data *ad = container_of(work, struct as_data, antic_work);
-	struct request_queue *q = ad->q;
-	unsigned long flags;
 
-	spin_lock_irqsave(q->queue_lock, flags);
-	blk_start_queueing(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
+	blk_run_queue(ad->q);
 }
 
 static int as_may_queue(struct request_queue *q, int rw)
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 20b4111..30022b4 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -106,10 +106,7 @@
 	 */
 	q->ordseq = 0;
 	rq = q->orig_bar_rq;
-
-	if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq)))
-		BUG();
-
+	__blk_end_request_all(rq, q->orderr);
 	return true;
 }
 
@@ -166,7 +163,7 @@
 	 * For an empty barrier, there's no actual BAR request, which
 	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
 	 */
-	if (!rq->hard_nr_sectors) {
+	if (!blk_rq_sectors(rq)) {
 		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
 				QUEUE_ORDERED_DO_POSTFLUSH);
 		/*
@@ -183,7 +180,7 @@
 	}
 
 	/* stash away the original request */
-	elv_dequeue_request(q, rq);
+	blk_dequeue_request(rq);
 	q->orig_bar_rq = rq;
 	rq = NULL;
 
@@ -221,7 +218,7 @@
 	} else
 		skip |= QUEUE_ORDSEQ_PREFLUSH;
 
-	if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight)
+	if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q))
 		rq = NULL;
 	else
 		skip |= QUEUE_ORDSEQ_DRAIN;
@@ -251,10 +248,8 @@
 			 * Queue ordering not supported.  Terminate
 			 * with prejudice.
 			 */
-			elv_dequeue_request(q, rq);
-			if (__blk_end_request(rq, -EOPNOTSUPP,
-					      blk_rq_bytes(rq)))
-				BUG();
+			blk_dequeue_request(rq);
+			__blk_end_request_all(rq, -EOPNOTSUPP);
 			*rqp = NULL;
 			return false;
 		}
@@ -329,7 +324,7 @@
 	/*
 	 * The driver must store the error location in ->bi_sector, if
 	 * it supports it. For non-stacked drivers, this should be copied
-	 * from rq->sector.
+	 * from blk_rq_pos(rq).
 	 */
 	if (error_sector)
 		*error_sector = bio->bi_sector;
@@ -393,10 +388,10 @@
 
 		bio->bi_sector = sector;
 
-		if (nr_sects > q->max_hw_sectors) {
-			bio->bi_size = q->max_hw_sectors << 9;
-			nr_sects -= q->max_hw_sectors;
-			sector += q->max_hw_sectors;
+		if (nr_sects > queue_max_hw_sectors(q)) {
+			bio->bi_size = queue_max_hw_sectors(q) << 9;
+			nr_sects -= queue_max_hw_sectors(q);
+			sector += queue_max_hw_sectors(q);
 		} else {
 			bio->bi_size = nr_sects << 9;
 			nr_sects = 0;
diff --git a/block/blk-core.c b/block/blk-core.c
index c89883b..f6452f6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -28,22 +28,14 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/blktrace_api.h>
 #include <linux/fault-inject.h>
-#include <trace/block.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/block.h>
 
 #include "blk.h"
 
-DEFINE_TRACE(block_plug);
-DEFINE_TRACE(block_unplug_io);
-DEFINE_TRACE(block_unplug_timer);
-DEFINE_TRACE(block_getrq);
-DEFINE_TRACE(block_sleeprq);
-DEFINE_TRACE(block_rq_requeue);
-DEFINE_TRACE(block_bio_backmerge);
-DEFINE_TRACE(block_bio_frontmerge);
-DEFINE_TRACE(block_bio_queue);
-DEFINE_TRACE(block_rq_complete);
-DEFINE_TRACE(block_remap);	/* Also used in drivers/md/dm.c */
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 
 static int __make_request(struct request_queue *q, struct bio *bio);
 
@@ -68,11 +60,11 @@
 	int rw = rq_data_dir(rq);
 	int cpu;
 
-	if (!blk_fs_request(rq) || !blk_do_io_stat(rq))
+	if (!blk_do_io_stat(rq))
 		return;
 
 	cpu = part_stat_lock();
-	part = disk_map_sector_rcu(rq->rq_disk, rq->sector);
+	part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
 
 	if (!new_io)
 		part_stat_inc(cpu, part, merges[rw]);
@@ -127,13 +119,14 @@
 	INIT_LIST_HEAD(&rq->timeout_list);
 	rq->cpu = -1;
 	rq->q = q;
-	rq->sector = rq->hard_sector = (sector_t) -1;
+	rq->__sector = (sector_t) -1;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->cmd = rq->__cmd;
 	rq->cmd_len = BLK_MAX_CDB;
 	rq->tag = -1;
 	rq->ref_count = 1;
+	rq->start_time = jiffies;
 }
 EXPORT_SYMBOL(blk_rq_init);
 
@@ -184,14 +177,11 @@
 		rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
 		rq->cmd_flags);
 
-	printk(KERN_INFO "  sector %llu, nr/cnr %lu/%u\n",
-						(unsigned long long)rq->sector,
-						rq->nr_sectors,
-						rq->current_nr_sectors);
-	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, data %p, len %u\n",
-						rq->bio, rq->biotail,
-						rq->buffer, rq->data,
-						rq->data_len);
+	printk(KERN_INFO "  sector %llu, nr/cnr %u/%u\n",
+	       (unsigned long long)blk_rq_pos(rq),
+	       blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
+	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, len %u\n",
+	       rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
 
 	if (blk_pc_request(rq)) {
 		printk(KERN_INFO "  cdb: ");
@@ -333,24 +323,6 @@
 }
 EXPORT_SYMBOL(blk_unplug);
 
-static void blk_invoke_request_fn(struct request_queue *q)
-{
-	if (unlikely(blk_queue_stopped(q)))
-		return;
-
-	/*
-	 * one level of recursion is ok and is much faster than kicking
-	 * the unplug handling
-	 */
-	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
-		q->request_fn(q);
-		queue_flag_clear(QUEUE_FLAG_REENTER, q);
-	} else {
-		queue_flag_set(QUEUE_FLAG_PLUGGED, q);
-		kblockd_schedule_work(q, &q->unplug_work);
-	}
-}
-
 /**
  * blk_start_queue - restart a previously stopped queue
  * @q:    The &struct request_queue in question
@@ -365,7 +337,7 @@
 	WARN_ON(!irqs_disabled());
 
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
-	blk_invoke_request_fn(q);
+	__blk_run_queue(q);
 }
 EXPORT_SYMBOL(blk_start_queue);
 
@@ -425,12 +397,23 @@
 {
 	blk_remove_plug(q);
 
+	if (unlikely(blk_queue_stopped(q)))
+		return;
+
+	if (elv_queue_empty(q))
+		return;
+
 	/*
 	 * Only recurse once to avoid overrunning the stack, let the unplug
 	 * handling reinvoke the handler shortly if we already got there.
 	 */
-	if (!elv_queue_empty(q))
-		blk_invoke_request_fn(q);
+	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
+		q->request_fn(q);
+		queue_flag_clear(QUEUE_FLAG_REENTER, q);
+	} else {
+		queue_flag_set(QUEUE_FLAG_PLUGGED, q);
+		kblockd_schedule_work(q, &q->unplug_work);
+	}
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
@@ -440,9 +423,7 @@
  *
  * Description:
  *    Invoke request handling on this queue, if it has pending work to do.
- *    May be used to restart queueing when a request has completed. Also
- *    See @blk_start_queueing.
- *
+ *    May be used to restart queueing when a request has completed.
  */
 void blk_run_queue(struct request_queue *q)
 {
@@ -902,26 +883,59 @@
 EXPORT_SYMBOL(blk_get_request);
 
 /**
- * blk_start_queueing - initiate dispatch of requests to device
- * @q:		request queue to kick into gear
+ * blk_make_request - given a bio, allocate a corresponding struct request.
+ * @q: target request queue
+ * @bio:  The bio describing the memory mappings that will be submitted for IO.
+ *        It may be a chained-bio properly constructed by block/bio layer.
+ * @gfp_mask: gfp flags to be used for memory allocation
  *
- * This is basically a helper to remove the need to know whether a queue
- * is plugged or not if someone just wants to initiate dispatch of requests
- * for this queue. Should be used to start queueing on a device outside
- * of ->request_fn() context. Also see @blk_run_queue.
+ * blk_make_request is the parallel of generic_make_request for BLOCK_PC
+ * type commands. Where the struct request needs to be farther initialized by
+ * the caller. It is passed a &struct bio, which describes the memory info of
+ * the I/O transfer.
  *
- * The queue lock must be held with interrupts disabled.
+ * The caller of blk_make_request must make sure that bi_io_vec
+ * are set to describe the memory buffers. That bio_data_dir() will return
+ * the needed direction of the request. (And all bio's in the passed bio-chain
+ * are properly set accordingly)
+ *
+ * If called under none-sleepable conditions, mapped bio buffers must not
+ * need bouncing, by calling the appropriate masked or flagged allocator,
+ * suitable for the target device. Otherwise the call to blk_queue_bounce will
+ * BUG.
+ *
+ * WARNING: When allocating/cloning a bio-chain, careful consideration should be
+ * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for
+ * anything but the first bio in the chain. Otherwise you risk waiting for IO
+ * completion of a bio that hasn't been submitted yet, thus resulting in a
+ * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead
+ * of bio_alloc(), as that avoids the mempool deadlock.
+ * If possible a big IO should be split into smaller parts when allocation
+ * fails. Partial allocation should not be an error, or you risk a live-lock.
  */
-void blk_start_queueing(struct request_queue *q)
+struct request *blk_make_request(struct request_queue *q, struct bio *bio,
+				 gfp_t gfp_mask)
 {
-	if (!blk_queue_plugged(q)) {
-		if (unlikely(blk_queue_stopped(q)))
-			return;
-		q->request_fn(q);
-	} else
-		__generic_unplug_device(q);
+	struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
+
+	if (unlikely(!rq))
+		return ERR_PTR(-ENOMEM);
+
+	for_each_bio(bio) {
+		struct bio *bounce_bio = bio;
+		int ret;
+
+		blk_queue_bounce(q, &bounce_bio);
+		ret = blk_rq_append_bio(q, rq, bounce_bio);
+		if (unlikely(ret)) {
+			blk_put_request(rq);
+			return ERR_PTR(ret);
+		}
+	}
+
+	return rq;
 }
-EXPORT_SYMBOL(blk_start_queueing);
+EXPORT_SYMBOL(blk_make_request);
 
 /**
  * blk_requeue_request - put a request back on queue
@@ -942,6 +956,8 @@
 	if (blk_rq_tagged(rq))
 		blk_queue_end_tag(q, rq);
 
+	BUG_ON(blk_queued_rq(rq));
+
 	elv_requeue_request(q, rq);
 }
 EXPORT_SYMBOL(blk_requeue_request);
@@ -977,7 +993,6 @@
 	 * barrier
 	 */
 	rq->cmd_type = REQ_TYPE_SPECIAL;
-	rq->cmd_flags |= REQ_SOFTBARRIER;
 
 	rq->special = data;
 
@@ -991,7 +1006,7 @@
 
 	drive_stat_acct(rq, 1);
 	__elv_add_request(q, rq, where, 0);
-	blk_start_queueing(q);
+	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_insert_request);
@@ -1113,16 +1128,13 @@
 	if (bio_failfast_driver(bio))
 		req->cmd_flags |= REQ_FAILFAST_DRIVER;
 
-	/*
-	 * REQ_BARRIER implies no merging, but lets make it explicit
-	 */
 	if (unlikely(bio_discard(bio))) {
 		req->cmd_flags |= REQ_DISCARD;
 		if (bio_barrier(bio))
 			req->cmd_flags |= REQ_SOFTBARRIER;
 		req->q->prepare_discard_fn(req->q, req);
 	} else if (unlikely(bio_barrier(bio)))
-		req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
+		req->cmd_flags |= REQ_HARDBARRIER;
 
 	if (bio_sync(bio))
 		req->cmd_flags |= REQ_RW_SYNC;
@@ -1132,9 +1144,8 @@
 		req->cmd_flags |= REQ_NOIDLE;
 
 	req->errors = 0;
-	req->hard_sector = req->sector = bio->bi_sector;
+	req->__sector = bio->bi_sector;
 	req->ioprio = bio_prio(bio);
-	req->start_time = jiffies;
 	blk_rq_bio_prep(req->q, req, bio);
 }
 
@@ -1150,14 +1161,13 @@
 static int __make_request(struct request_queue *q, struct bio *bio)
 {
 	struct request *req;
-	int el_ret, nr_sectors;
+	int el_ret;
+	unsigned int bytes = bio->bi_size;
 	const unsigned short prio = bio_prio(bio);
 	const int sync = bio_sync(bio);
 	const int unplug = bio_unplug(bio);
 	int rw_flags;
 
-	nr_sectors = bio_sectors(bio);
-
 	/*
 	 * low level driver can indicate that it wants pages above a
 	 * certain limit bounced to low memory (ie for highmem, or even
@@ -1182,7 +1192,7 @@
 
 		req->biotail->bi_next = bio;
 		req->biotail = bio;
-		req->nr_sectors = req->hard_nr_sectors += nr_sectors;
+		req->__data_len += bytes;
 		req->ioprio = ioprio_best(req->ioprio, prio);
 		if (!blk_rq_cpu_valid(req))
 			req->cpu = bio->bi_comp_cpu;
@@ -1208,10 +1218,8 @@
 		 * not touch req->buffer either...
 		 */
 		req->buffer = bio_data(bio);
-		req->current_nr_sectors = bio_cur_sectors(bio);
-		req->hard_cur_sectors = req->current_nr_sectors;
-		req->sector = req->hard_sector = bio->bi_sector;
-		req->nr_sectors = req->hard_nr_sectors += nr_sectors;
+		req->__sector = bio->bi_sector;
+		req->__data_len += bytes;
 		req->ioprio = ioprio_best(req->ioprio, prio);
 		if (!blk_rq_cpu_valid(req))
 			req->cpu = bio->bi_comp_cpu;
@@ -1277,7 +1285,7 @@
 		bio->bi_bdev = bdev->bd_contains;
 
 		trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
-				    bdev->bd_dev, bio->bi_sector,
+				    bdev->bd_dev,
 				    bio->bi_sector - p->start_sect);
 	}
 }
@@ -1422,11 +1430,11 @@
 			goto end_io;
 		}
 
-		if (unlikely(nr_sectors > q->max_hw_sectors)) {
+		if (unlikely(nr_sectors > queue_max_hw_sectors(q))) {
 			printk(KERN_ERR "bio too big device %s (%u > %u)\n",
-				bdevname(bio->bi_bdev, b),
-				bio_sectors(bio),
-				q->max_hw_sectors);
+			       bdevname(bio->bi_bdev, b),
+			       bio_sectors(bio),
+			       queue_max_hw_sectors(q));
 			goto end_io;
 		}
 
@@ -1446,8 +1454,7 @@
 			goto end_io;
 
 		if (old_sector != -1)
-			trace_block_remap(q, bio, old_dev, bio->bi_sector,
-					    old_sector);
+			trace_block_remap(q, bio, old_dev, old_sector);
 
 		trace_block_bio_queue(q, bio);
 
@@ -1593,8 +1600,8 @@
  */
 int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 {
-	if (rq->nr_sectors > q->max_sectors ||
-	    rq->data_len > q->max_hw_sectors << 9) {
+	if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
+	    blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
 		printk(KERN_ERR "%s: over max size limit.\n", __func__);
 		return -EIO;
 	}
@@ -1606,8 +1613,8 @@
 	 * limitation.
 	 */
 	blk_recalc_rq_segments(rq);
-	if (rq->nr_phys_segments > q->max_phys_segments ||
-	    rq->nr_phys_segments > q->max_hw_segments) {
+	if (rq->nr_phys_segments > queue_max_phys_segments(q) ||
+	    rq->nr_phys_segments > queue_max_hw_segments(q)) {
 		printk(KERN_ERR "%s: over max segments limit.\n", __func__);
 		return -EIO;
 	}
@@ -1651,40 +1658,15 @@
 }
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 
-/**
- * blkdev_dequeue_request - dequeue request and start timeout timer
- * @req: request to dequeue
- *
- * Dequeue @req and start timeout timer on it.  This hands off the
- * request to the driver.
- *
- * Block internal functions which don't want to start timer should
- * call elv_dequeue_request().
- */
-void blkdev_dequeue_request(struct request *req)
-{
-	elv_dequeue_request(req->q, req);
-
-	/*
-	 * We are now handing the request to the hardware, add the
-	 * timeout handler.
-	 */
-	blk_add_timer(req);
-}
-EXPORT_SYMBOL(blkdev_dequeue_request);
-
 static void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
-	if (!blk_do_io_stat(req))
-		return;
-
-	if (blk_fs_request(req)) {
+	if (blk_do_io_stat(req)) {
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
 		int cpu;
 
 		cpu = part_stat_lock();
-		part = disk_map_sector_rcu(req->rq_disk, req->sector);
+		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 		part_stat_add(cpu, part, sectors[rw], bytes >> 9);
 		part_stat_unlock();
 	}
@@ -1692,22 +1674,19 @@
 
 static void blk_account_io_done(struct request *req)
 {
-	if (!blk_do_io_stat(req))
-		return;
-
 	/*
 	 * Account IO completion.  bar_rq isn't accounted as a normal
 	 * IO on queueing nor completion.  Accounting the containing
 	 * request is enough.
 	 */
-	if (blk_fs_request(req) && req != &req->q->bar_rq) {
+	if (blk_do_io_stat(req) && req != &req->q->bar_rq) {
 		unsigned long duration = jiffies - req->start_time;
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
 		int cpu;
 
 		cpu = part_stat_lock();
-		part = disk_map_sector_rcu(req->rq_disk, req->sector);
+		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 
 		part_stat_inc(cpu, part, ios[rw]);
 		part_stat_add(cpu, part, ticks[rw], duration);
@@ -1719,38 +1698,226 @@
 }
 
 /**
- * __end_that_request_first - end I/O on a request
- * @req:      the request being processed
- * @error:    %0 for success, < %0 for error
- * @nr_bytes: number of bytes to complete
+ * blk_peek_request - peek at the top of a request queue
+ * @q: request queue to peek at
  *
  * Description:
- *     Ends I/O on a number of bytes attached to @req, and sets it up
- *     for the next range of segments (if any) in the cluster.
+ *     Return the request at the top of @q.  The returned request
+ *     should be started using blk_start_request() before LLD starts
+ *     processing it.
  *
  * Return:
- *     %0 - we are done with this request, call end_that_request_last()
- *     %1 - still buffers pending for this request
+ *     Pointer to the request at the top of @q if available.  Null
+ *     otherwise.
+ *
+ * Context:
+ *     queue_lock must be held.
+ */
+struct request *blk_peek_request(struct request_queue *q)
+{
+	struct request *rq;
+	int ret;
+
+	while ((rq = __elv_next_request(q)) != NULL) {
+		if (!(rq->cmd_flags & REQ_STARTED)) {
+			/*
+			 * This is the first time the device driver
+			 * sees this request (possibly after
+			 * requeueing).  Notify IO scheduler.
+			 */
+			if (blk_sorted_rq(rq))
+				elv_activate_rq(q, rq);
+
+			/*
+			 * just mark as started even if we don't start
+			 * it, a request that has been delayed should
+			 * not be passed by new incoming requests
+			 */
+			rq->cmd_flags |= REQ_STARTED;
+			trace_block_rq_issue(q, rq);
+		}
+
+		if (!q->boundary_rq || q->boundary_rq == rq) {
+			q->end_sector = rq_end_sector(rq);
+			q->boundary_rq = NULL;
+		}
+
+		if (rq->cmd_flags & REQ_DONTPREP)
+			break;
+
+		if (q->dma_drain_size && blk_rq_bytes(rq)) {
+			/*
+			 * make sure space for the drain appears we
+			 * know we can do this because max_hw_segments
+			 * has been adjusted to be one fewer than the
+			 * device can handle
+			 */
+			rq->nr_phys_segments++;
+		}
+
+		if (!q->prep_rq_fn)
+			break;
+
+		ret = q->prep_rq_fn(q, rq);
+		if (ret == BLKPREP_OK) {
+			break;
+		} else if (ret == BLKPREP_DEFER) {
+			/*
+			 * the request may have been (partially) prepped.
+			 * we need to keep this request in the front to
+			 * avoid resource deadlock.  REQ_STARTED will
+			 * prevent other fs requests from passing this one.
+			 */
+			if (q->dma_drain_size && blk_rq_bytes(rq) &&
+			    !(rq->cmd_flags & REQ_DONTPREP)) {
+				/*
+				 * remove the space for the drain we added
+				 * so that we don't add it again
+				 */
+				--rq->nr_phys_segments;
+			}
+
+			rq = NULL;
+			break;
+		} else if (ret == BLKPREP_KILL) {
+			rq->cmd_flags |= REQ_QUIET;
+			/*
+			 * Mark this request as started so we don't trigger
+			 * any debug logic in the end I/O path.
+			 */
+			blk_start_request(rq);
+			__blk_end_request_all(rq, -EIO);
+		} else {
+			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
+			break;
+		}
+	}
+
+	return rq;
+}
+EXPORT_SYMBOL(blk_peek_request);
+
+void blk_dequeue_request(struct request *rq)
+{
+	struct request_queue *q = rq->q;
+
+	BUG_ON(list_empty(&rq->queuelist));
+	BUG_ON(ELV_ON_HASH(rq));
+
+	list_del_init(&rq->queuelist);
+
+	/*
+	 * the time frame between a request being removed from the lists
+	 * and to it is freed is accounted as io that is in progress at
+	 * the driver side.
+	 */
+	if (blk_account_rq(rq))
+		q->in_flight[rq_is_sync(rq)]++;
+}
+
+/**
+ * blk_start_request - start request processing on the driver
+ * @req: request to dequeue
+ *
+ * Description:
+ *     Dequeue @req and start timeout timer on it.  This hands off the
+ *     request to the driver.
+ *
+ *     Block internal functions which don't want to start timer should
+ *     call blk_dequeue_request().
+ *
+ * Context:
+ *     queue_lock must be held.
+ */
+void blk_start_request(struct request *req)
+{
+	blk_dequeue_request(req);
+
+	/*
+	 * We are now handing the request to the hardware, initialize
+	 * resid_len to full count and add the timeout handler.
+	 */
+	req->resid_len = blk_rq_bytes(req);
+	if (unlikely(blk_bidi_rq(req)))
+		req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
+
+	blk_add_timer(req);
+}
+EXPORT_SYMBOL(blk_start_request);
+
+/**
+ * blk_fetch_request - fetch a request from a request queue
+ * @q: request queue to fetch a request from
+ *
+ * Description:
+ *     Return the request at the top of @q.  The request is started on
+ *     return and LLD can start processing it immediately.
+ *
+ * Return:
+ *     Pointer to the request at the top of @q if available.  Null
+ *     otherwise.
+ *
+ * Context:
+ *     queue_lock must be held.
+ */
+struct request *blk_fetch_request(struct request_queue *q)
+{
+	struct request *rq;
+
+	rq = blk_peek_request(q);
+	if (rq)
+		blk_start_request(rq);
+	return rq;
+}
+EXPORT_SYMBOL(blk_fetch_request);
+
+/**
+ * blk_update_request - Special helper function for request stacking drivers
+ * @req:      the request being processed
+ * @error:    %0 for success, < %0 for error
+ * @nr_bytes: number of bytes to complete @req
+ *
+ * Description:
+ *     Ends I/O on a number of bytes attached to @req, but doesn't complete
+ *     the request structure even if @req doesn't have leftover.
+ *     If @req has leftover, sets it up for the next range of segments.
+ *
+ *     This special helper function is only for request stacking drivers
+ *     (e.g. request-based dm) so that they can handle partial completion.
+ *     Actual device drivers should use blk_end_request instead.
+ *
+ *     Passing the result of blk_rq_bytes() as @nr_bytes guarantees
+ *     %false return from this function.
+ *
+ * Return:
+ *     %false - this request doesn't have any more data
+ *     %true  - this request has more data
  **/
-static int __end_that_request_first(struct request *req, int error,
-				    int nr_bytes)
+bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 {
 	int total_bytes, bio_nbytes, next_idx = 0;
 	struct bio *bio;
 
+	if (!req->bio)
+		return false;
+
 	trace_block_rq_complete(req->q, req);
 
 	/*
-	 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
-	 * sense key with us all the way through
+	 * For fs requests, rq is just carrier of independent bio's
+	 * and each partial completion should be handled separately.
+	 * Reset per-request error on each partial completion.
+	 *
+	 * TODO: tj: This is too subtle.  It would be better to let
+	 * low level drivers do what they see fit.
 	 */
-	if (!blk_pc_request(req))
+	if (blk_fs_request(req))
 		req->errors = 0;
 
 	if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
 		printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",
 				req->rq_disk ? req->rq_disk->disk_name : "?",
-				(unsigned long long)req->sector);
+				(unsigned long long)blk_rq_pos(req));
 	}
 
 	blk_account_io_completion(req, nr_bytes);
@@ -1810,8 +1977,15 @@
 	/*
 	 * completely done
 	 */
-	if (!req->bio)
-		return 0;
+	if (!req->bio) {
+		/*
+		 * Reset counters so that the request stacking driver
+		 * can find how many bytes remain in the request
+		 * later.
+		 */
+		req->__data_len = 0;
+		return false;
+	}
 
 	/*
 	 * if the request wasn't completed, update state
@@ -1823,21 +1997,55 @@
 		bio_iovec(bio)->bv_len -= nr_bytes;
 	}
 
-	blk_recalc_rq_sectors(req, total_bytes >> 9);
+	req->__data_len -= total_bytes;
+	req->buffer = bio_data(req->bio);
+
+	/* update sector only for requests with clear definition of sector */
+	if (blk_fs_request(req) || blk_discard_rq(req))
+		req->__sector += total_bytes >> 9;
+
+	/*
+	 * If total number of sectors is less than the first segment
+	 * size, something has gone terribly wrong.
+	 */
+	if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
+		printk(KERN_ERR "blk: request botched\n");
+		req->__data_len = blk_rq_cur_bytes(req);
+	}
+
+	/* recalculate the number of segments */
 	blk_recalc_rq_segments(req);
-	return 1;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(blk_update_request);
+
+static bool blk_update_bidi_request(struct request *rq, int error,
+				    unsigned int nr_bytes,
+				    unsigned int bidi_bytes)
+{
+	if (blk_update_request(rq, error, nr_bytes))
+		return true;
+
+	/* Bidi request must be completed as a whole */
+	if (unlikely(blk_bidi_rq(rq)) &&
+	    blk_update_request(rq->next_rq, error, bidi_bytes))
+		return true;
+
+	add_disk_randomness(rq->rq_disk);
+
+	return false;
 }
 
 /*
  * queue lock must be held
  */
-static void end_that_request_last(struct request *req, int error)
+static void blk_finish_request(struct request *req, int error)
 {
 	if (blk_rq_tagged(req))
 		blk_queue_end_tag(req->q, req);
 
-	if (blk_queued_rq(req))
-		elv_dequeue_request(req->q, req);
+	BUG_ON(blk_queued_rq(req));
 
 	if (unlikely(laptop_mode) && blk_fs_request(req))
 		laptop_io_completion();
@@ -1857,117 +2065,62 @@
 }
 
 /**
- * blk_rq_bytes - Returns bytes left to complete in the entire request
- * @rq: the request being processed
- **/
-unsigned int blk_rq_bytes(struct request *rq)
-{
-	if (blk_fs_request(rq))
-		return rq->hard_nr_sectors << 9;
-
-	return rq->data_len;
-}
-EXPORT_SYMBOL_GPL(blk_rq_bytes);
-
-/**
- * blk_rq_cur_bytes - Returns bytes left to complete in the current segment
- * @rq: the request being processed
- **/
-unsigned int blk_rq_cur_bytes(struct request *rq)
-{
-	if (blk_fs_request(rq))
-		return rq->current_nr_sectors << 9;
-
-	if (rq->bio)
-		return rq->bio->bi_size;
-
-	return rq->data_len;
-}
-EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
-
-/**
- * end_request - end I/O on the current segment of the request
- * @req:	the request being processed
- * @uptodate:	error value or %0/%1 uptodate flag
- *
- * Description:
- *     Ends I/O on the current segment of a request. If that is the only
- *     remaining segment, the request is also completed and freed.
- *
- *     This is a remnant of how older block drivers handled I/O completions.
- *     Modern drivers typically end I/O on the full request in one go, unless
- *     they have a residual value to account for. For that case this function
- *     isn't really useful, unless the residual just happens to be the
- *     full current segment. In other words, don't use this function in new
- *     code. Use blk_end_request() or __blk_end_request() to end a request.
- **/
-void end_request(struct request *req, int uptodate)
-{
-	int error = 0;
-
-	if (uptodate <= 0)
-		error = uptodate ? uptodate : -EIO;
-
-	__blk_end_request(req, error, req->hard_cur_sectors << 9);
-}
-EXPORT_SYMBOL(end_request);
-
-static int end_that_request_data(struct request *rq, int error,
-				 unsigned int nr_bytes, unsigned int bidi_bytes)
-{
-	if (rq->bio) {
-		if (__end_that_request_first(rq, error, nr_bytes))
-			return 1;
-
-		/* Bidi request must be completed as a whole */
-		if (blk_bidi_rq(rq) &&
-		    __end_that_request_first(rq->next_rq, error, bidi_bytes))
-			return 1;
-	}
-
-	return 0;
-}
-
-/**
- * blk_end_io - Generic end_io function to complete a request.
- * @rq:           the request being processed
- * @error:        %0 for success, < %0 for error
- * @nr_bytes:     number of bytes to complete @rq
- * @bidi_bytes:   number of bytes to complete @rq->next_rq
- * @drv_callback: function called between completion of bios in the request
- *                and completion of the request.
- *                If the callback returns non %0, this helper returns without
- *                completion of the request.
+ * blk_end_bidi_request - Complete a bidi request
+ * @rq:         the request to complete
+ * @error:      %0 for success, < %0 for error
+ * @nr_bytes:   number of bytes to complete @rq
+ * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
- *     If @rq has leftover, sets it up for the next range of segments.
+ *     Drivers that supports bidi can safely call this member for any
+ *     type of request, bidi or uni.  In the later case @bidi_bytes is
+ *     just ignored.
  *
  * Return:
- *     %0 - we are done with this request
- *     %1 - this request is not freed yet, it still has pending buffers.
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
  **/
-static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
-		      unsigned int bidi_bytes,
-		      int (drv_callback)(struct request *))
+static bool blk_end_bidi_request(struct request *rq, int error,
+				 unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	struct request_queue *q = rq->q;
-	unsigned long flags = 0UL;
+	unsigned long flags;
 
-	if (end_that_request_data(rq, error, nr_bytes, bidi_bytes))
-		return 1;
-
-	/* Special feature for tricky drivers */
-	if (drv_callback && drv_callback(rq))
-		return 1;
-
-	add_disk_randomness(rq->rq_disk);
+	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
+		return true;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	end_that_request_last(rq, error);
+	blk_finish_request(rq, error);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
-	return 0;
+	return false;
+}
+
+/**
+ * __blk_end_bidi_request - Complete a bidi request with queue lock held
+ * @rq:         the request to complete
+ * @error:      %0 for success, < %0 for error
+ * @nr_bytes:   number of bytes to complete @rq
+ * @bidi_bytes: number of bytes to complete @rq->next_rq
+ *
+ * Description:
+ *     Identical to blk_end_bidi_request() except that queue lock is
+ *     assumed to be locked on entry and remains so on return.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ **/
+static bool __blk_end_bidi_request(struct request *rq, int error,
+				   unsigned int nr_bytes, unsigned int bidi_bytes)
+{
+	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
+		return true;
+
+	blk_finish_request(rq, error);
+
+	return false;
 }
 
 /**
@@ -1981,16 +2134,55 @@
  *     If @rq has leftover, sets it up for the next range of segments.
  *
  * Return:
- *     %0 - we are done with this request
- *     %1 - still buffers pending for this request
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
  **/
-int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
 {
-	return blk_end_io(rq, error, nr_bytes, 0, NULL);
+	return blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL_GPL(blk_end_request);
 
 /**
+ * blk_end_request_all - Helper function for drives to finish the request.
+ * @rq: the request to finish
+ * @error: %0 for success, < %0 for error
+ *
+ * Description:
+ *     Completely finish @rq.
+ */
+void blk_end_request_all(struct request *rq, int error)
+{
+	bool pending;
+	unsigned int bidi_bytes = 0;
+
+	if (unlikely(blk_bidi_rq(rq)))
+		bidi_bytes = blk_rq_bytes(rq->next_rq);
+
+	pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
+	BUG_ON(pending);
+}
+EXPORT_SYMBOL_GPL(blk_end_request_all);
+
+/**
+ * blk_end_request_cur - Helper function to finish the current request chunk.
+ * @rq: the request to finish the current chunk for
+ * @error: %0 for success, < %0 for error
+ *
+ * Description:
+ *     Complete the current consecutively mapped chunk from @rq.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ */
+bool blk_end_request_cur(struct request *rq, int error)
+{
+	return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
+}
+EXPORT_SYMBOL_GPL(blk_end_request_cur);
+
+/**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
  * @error:    %0 for success, < %0 for error
@@ -2000,105 +2192,54 @@
  *     Must be called with queue lock held unlike blk_end_request().
  *
  * Return:
- *     %0 - we are done with this request
- *     %1 - still buffers pending for this request
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
  **/
-int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
 {
-	if (rq->bio && __end_that_request_first(rq, error, nr_bytes))
-		return 1;
-
-	add_disk_randomness(rq->rq_disk);
-
-	end_that_request_last(rq, error);
-
-	return 0;
+	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL_GPL(__blk_end_request);
 
 /**
- * blk_end_bidi_request - Helper function for drivers to complete bidi request.
- * @rq:         the bidi request being processed
- * @error:      %0 for success, < %0 for error
- * @nr_bytes:   number of bytes to complete @rq
- * @bidi_bytes: number of bytes to complete @rq->next_rq
+ * __blk_end_request_all - Helper function for drives to finish the request.
+ * @rq: the request to finish
+ * @error: %0 for success, < %0 for error
  *
  * Description:
- *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
- *
- * Return:
- *     %0 - we are done with this request
- *     %1 - still buffers pending for this request
- **/
-int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
-			 unsigned int bidi_bytes)
-{
-	return blk_end_io(rq, error, nr_bytes, bidi_bytes, NULL);
-}
-EXPORT_SYMBOL_GPL(blk_end_bidi_request);
-
-/**
- * blk_update_request - Special helper function for request stacking drivers
- * @rq:           the request being processed
- * @error:        %0 for success, < %0 for error
- * @nr_bytes:     number of bytes to complete @rq
- *
- * Description:
- *     Ends I/O on a number of bytes attached to @rq, but doesn't complete
- *     the request structure even if @rq doesn't have leftover.
- *     If @rq has leftover, sets it up for the next range of segments.
- *
- *     This special helper function is only for request stacking drivers
- *     (e.g. request-based dm) so that they can handle partial completion.
- *     Actual device drivers should use blk_end_request instead.
+ *     Completely finish @rq.  Must be called with queue lock held.
  */
-void blk_update_request(struct request *rq, int error, unsigned int nr_bytes)
+void __blk_end_request_all(struct request *rq, int error)
 {
-	if (!end_that_request_data(rq, error, nr_bytes, 0)) {
-		/*
-		 * These members are not updated in end_that_request_data()
-		 * when all bios are completed.
-		 * Update them so that the request stacking driver can find
-		 * how many bytes remain in the request later.
-		 */
-		rq->nr_sectors = rq->hard_nr_sectors = 0;
-		rq->current_nr_sectors = rq->hard_cur_sectors = 0;
-	}
+	bool pending;
+	unsigned int bidi_bytes = 0;
+
+	if (unlikely(blk_bidi_rq(rq)))
+		bidi_bytes = blk_rq_bytes(rq->next_rq);
+
+	pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
+	BUG_ON(pending);
 }
-EXPORT_SYMBOL_GPL(blk_update_request);
+EXPORT_SYMBOL_GPL(__blk_end_request_all);
 
 /**
- * blk_end_request_callback - Special helper function for tricky drivers
- * @rq:           the request being processed
- * @error:        %0 for success, < %0 for error
- * @nr_bytes:     number of bytes to complete
- * @drv_callback: function called between completion of bios in the request
- *                and completion of the request.
- *                If the callback returns non %0, this helper returns without
- *                completion of the request.
+ * __blk_end_request_cur - Helper function to finish the current request chunk.
+ * @rq: the request to finish the current chunk for
+ * @error: %0 for success, < %0 for error
  *
  * Description:
- *     Ends I/O on a number of bytes attached to @rq.
- *     If @rq has leftover, sets it up for the next range of segments.
- *
- *     This special helper function is used only for existing tricky drivers.
- *     (e.g. cdrom_newpc_intr() of ide-cd)
- *     This interface will be removed when such drivers are rewritten.
- *     Don't use this interface in other places anymore.
+ *     Complete the current consecutively mapped chunk from @rq.  Must
+ *     be called with queue lock held.
  *
  * Return:
- *     %0 - we are done with this request
- *     %1 - this request is not freed yet.
- *          this request still has pending buffers or
- *          the driver doesn't want to finish this request yet.
- **/
-int blk_end_request_callback(struct request *rq, int error,
-			     unsigned int nr_bytes,
-			     int (drv_callback)(struct request *))
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ */
+bool __blk_end_request_cur(struct request *rq, int error)
 {
-	return blk_end_io(rq, error, nr_bytes, 0, drv_callback);
+	return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
 }
-EXPORT_SYMBOL_GPL(blk_end_request_callback);
+EXPORT_SYMBOL_GPL(__blk_end_request_cur);
 
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
@@ -2111,11 +2252,7 @@
 		rq->nr_phys_segments = bio_phys_segments(q, bio);
 		rq->buffer = bio_data(bio);
 	}
-	rq->current_nr_sectors = bio_cur_sectors(bio);
-	rq->hard_cur_sectors = rq->current_nr_sectors;
-	rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
-	rq->data_len = bio->bi_size;
-
+	rq->__data_len = bio->bi_size;
 	rq->bio = rq->biotail = bio;
 
 	if (bio->bi_bdev)
@@ -2150,6 +2287,106 @@
 }
 EXPORT_SYMBOL_GPL(blk_lld_busy);
 
+/**
+ * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
+ * @rq: the clone request to be cleaned up
+ *
+ * Description:
+ *     Free all bios in @rq for a cloned request.
+ */
+void blk_rq_unprep_clone(struct request *rq)
+{
+	struct bio *bio;
+
+	while ((bio = rq->bio) != NULL) {
+		rq->bio = bio->bi_next;
+
+		bio_put(bio);
+	}
+}
+EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
+
+/*
+ * Copy attributes of the original request to the clone request.
+ * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.
+ */
+static void __blk_rq_prep_clone(struct request *dst, struct request *src)
+{
+	dst->cpu = src->cpu;
+	dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);
+	dst->cmd_type = src->cmd_type;
+	dst->__sector = blk_rq_pos(src);
+	dst->__data_len = blk_rq_bytes(src);
+	dst->nr_phys_segments = src->nr_phys_segments;
+	dst->ioprio = src->ioprio;
+	dst->extra_len = src->extra_len;
+}
+
+/**
+ * blk_rq_prep_clone - Helper function to setup clone request
+ * @rq: the request to be setup
+ * @rq_src: original request to be cloned
+ * @bs: bio_set that bios for clone are allocated from
+ * @gfp_mask: memory allocation mask for bio
+ * @bio_ctr: setup function to be called for each clone bio.
+ *           Returns %0 for success, non %0 for failure.
+ * @data: private data to be passed to @bio_ctr
+ *
+ * Description:
+ *     Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
+ *     The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)
+ *     are not copied, and copying such parts is the caller's responsibility.
+ *     Also, pages which the original bios are pointing to are not copied
+ *     and the cloned bios just point same pages.
+ *     So cloned bios must be completed before original bios, which means
+ *     the caller must complete @rq before @rq_src.
+ */
+int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
+		      struct bio_set *bs, gfp_t gfp_mask,
+		      int (*bio_ctr)(struct bio *, struct bio *, void *),
+		      void *data)
+{
+	struct bio *bio, *bio_src;
+
+	if (!bs)
+		bs = fs_bio_set;
+
+	blk_rq_init(NULL, rq);
+
+	__rq_for_each_bio(bio_src, rq_src) {
+		bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
+		if (!bio)
+			goto free_and_out;
+
+		__bio_clone(bio, bio_src);
+
+		if (bio_integrity(bio_src) &&
+		    bio_integrity_clone(bio, bio_src, gfp_mask))
+			goto free_and_out;
+
+		if (bio_ctr && bio_ctr(bio, bio_src, data))
+			goto free_and_out;
+
+		if (rq->bio) {
+			rq->biotail->bi_next = bio;
+			rq->biotail = bio;
+		} else
+			rq->bio = rq->biotail = bio;
+	}
+
+	__blk_rq_prep_clone(rq, rq_src);
+
+	return 0;
+
+free_and_out:
+	if (bio)
+		bio_free(bio, bs);
+	blk_rq_unprep_clone(rq);
+
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
+
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 {
 	return queue_work(kblockd_workqueue, work);
@@ -2158,6 +2395,9 @@
 
 int __init blk_dev_init(void)
 {
+	BUILD_BUG_ON(__REQ_NR_BITS > 8 *
+			sizeof(((struct request *)0)->cmd_flags));
+
 	kblockd_workqueue = create_workqueue("kblockd");
 	if (!kblockd_workqueue)
 		panic("Failed to create kblockd\n");
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 6af716d..49557e9 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -51,7 +51,6 @@
 	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
 
 	rq->rq_disk = bd_disk;
-	rq->cmd_flags |= REQ_NOMERGE;
 	rq->end_io = done;
 	WARN_ON(irqs_disabled());
 	spin_lock_irq(q->queue_lock);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 91fa8e0..73e28d3 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -340,7 +340,7 @@
 		kobject_uevent(&bi->kobj, KOBJ_ADD);
 
 		bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE;
-		bi->sector_size = disk->queue->hardsect_size;
+		bi->sector_size = queue_logical_block_size(disk->queue);
 		disk->integrity = bi;
 	} else
 		bi = disk->integrity;
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 012f065..d4ed600 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -35,9 +35,9 @@
 	if (ioc == NULL)
 		return 1;
 
-	BUG_ON(atomic_read(&ioc->refcount) == 0);
+	BUG_ON(atomic_long_read(&ioc->refcount) == 0);
 
-	if (atomic_dec_and_test(&ioc->refcount)) {
+	if (atomic_long_dec_and_test(&ioc->refcount)) {
 		rcu_read_lock();
 		if (ioc->aic && ioc->aic->dtor)
 			ioc->aic->dtor(ioc->aic);
@@ -90,7 +90,7 @@
 
 	ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
 	if (ret) {
-		atomic_set(&ret->refcount, 1);
+		atomic_long_set(&ret->refcount, 1);
 		atomic_set(&ret->nr_tasks, 1);
 		spin_lock_init(&ret->lock);
 		ret->ioprio_changed = 0;
@@ -151,7 +151,7 @@
 		ret = current_io_context(gfp_flags, node);
 		if (unlikely(!ret))
 			break;
-	} while (!atomic_inc_not_zero(&ret->refcount));
+	} while (!atomic_long_inc_not_zero(&ret->refcount));
 
 	return ret;
 }
@@ -163,8 +163,8 @@
 	struct io_context *dst = *pdst;
 
 	if (src) {
-		BUG_ON(atomic_read(&src->refcount) == 0);
-		atomic_inc(&src->refcount);
+		BUG_ON(atomic_long_read(&src->refcount) == 0);
+		atomic_long_inc(&src->refcount);
 		put_io_context(dst);
 		*pdst = src;
 	}
diff --git a/block/blk-map.c b/block/blk-map.c
index f103729..9083cf0 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -20,11 +20,10 @@
 		rq->biotail->bi_next = bio;
 		rq->biotail = bio;
 
-		rq->data_len += bio->bi_size;
+		rq->__data_len += bio->bi_size;
 	}
 	return 0;
 }
-EXPORT_SYMBOL(blk_rq_append_bio);
 
 static int __blk_rq_unmap_user(struct bio *bio)
 {
@@ -116,7 +115,7 @@
 	struct bio *bio = NULL;
 	int ret;
 
-	if (len > (q->max_hw_sectors << 9))
+	if (len > (queue_max_hw_sectors(q) << 9))
 		return -EINVAL;
 	if (!len)
 		return -EINVAL;
@@ -156,7 +155,7 @@
 	if (!bio_flagged(bio, BIO_USER_MAPPED))
 		rq->cmd_flags |= REQ_COPY_USER;
 
-	rq->buffer = rq->data = NULL;
+	rq->buffer = NULL;
 	return 0;
 unmap_rq:
 	blk_rq_unmap_user(bio);
@@ -235,7 +234,7 @@
 	blk_queue_bounce(q, &bio);
 	bio_get(bio);
 	blk_rq_bio_prep(q, rq, bio);
-	rq->buffer = rq->data = NULL;
+	rq->buffer = NULL;
 	return 0;
 }
 EXPORT_SYMBOL(blk_rq_map_user_iov);
@@ -282,7 +281,8 @@
  *
  * Description:
  *    Data will be mapped directly if possible. Otherwise a bounce
- *    buffer is used.
+ *    buffer is used. Can be called multple times to append multple
+ *    buffers.
  */
 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 		    unsigned int len, gfp_t gfp_mask)
@@ -290,8 +290,9 @@
 	int reading = rq_data_dir(rq) == READ;
 	int do_copy = 0;
 	struct bio *bio;
+	int ret;
 
-	if (len > (q->max_hw_sectors << 9))
+	if (len > (queue_max_hw_sectors(q) << 9))
 		return -EINVAL;
 	if (!len || !kbuf)
 		return -EINVAL;
@@ -311,9 +312,15 @@
 	if (do_copy)
 		rq->cmd_flags |= REQ_COPY_USER;
 
-	blk_rq_bio_prep(q, rq, bio);
+	ret = blk_rq_append_bio(q, rq, bio);
+	if (unlikely(ret)) {
+		/* request is too big */
+		bio_put(bio);
+		return ret;
+	}
+
 	blk_queue_bounce(q, &rq->bio);
-	rq->buffer = rq->data = NULL;
+	rq->buffer = NULL;
 	return 0;
 }
 EXPORT_SYMBOL(blk_rq_map_kern);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 23d2a6f..39ce644 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -9,35 +9,6 @@
 
 #include "blk.h"
 
-void blk_recalc_rq_sectors(struct request *rq, int nsect)
-{
-	if (blk_fs_request(rq) || blk_discard_rq(rq)) {
-		rq->hard_sector += nsect;
-		rq->hard_nr_sectors -= nsect;
-
-		/*
-		 * Move the I/O submission pointers ahead if required.
-		 */
-		if ((rq->nr_sectors >= rq->hard_nr_sectors) &&
-		    (rq->sector <= rq->hard_sector)) {
-			rq->sector = rq->hard_sector;
-			rq->nr_sectors = rq->hard_nr_sectors;
-			rq->hard_cur_sectors = bio_cur_sectors(rq->bio);
-			rq->current_nr_sectors = rq->hard_cur_sectors;
-			rq->buffer = bio_data(rq->bio);
-		}
-
-		/*
-		 * if total number of sectors is less than the first segment
-		 * size, something has gone terribly wrong
-		 */
-		if (rq->nr_sectors < rq->current_nr_sectors) {
-			printk(KERN_ERR "blk: request botched\n");
-			rq->nr_sectors = rq->current_nr_sectors;
-		}
-	}
-}
-
 static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 					     struct bio *bio)
 {
@@ -61,11 +32,12 @@
 			 * never considered part of another segment, since that
 			 * might change with the bounce page.
 			 */
-			high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
+			high = page_to_pfn(bv->bv_page) > queue_bounce_pfn(q);
 			if (high || highprv)
 				goto new_segment;
 			if (cluster) {
-				if (seg_size + bv->bv_len > q->max_segment_size)
+				if (seg_size + bv->bv_len
+				    > queue_max_segment_size(q))
 					goto new_segment;
 				if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
 					goto new_segment;
@@ -120,7 +92,7 @@
 		return 0;
 
 	if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
-	    q->max_segment_size)
+	    queue_max_segment_size(q))
 		return 0;
 
 	if (!bio_has_data(bio))
@@ -163,7 +135,7 @@
 		int nbytes = bvec->bv_len;
 
 		if (bvprv && cluster) {
-			if (sg->length + nbytes > q->max_segment_size)
+			if (sg->length + nbytes > queue_max_segment_size(q))
 				goto new_segment;
 
 			if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
@@ -199,8 +171,9 @@
 
 
 	if (unlikely(rq->cmd_flags & REQ_COPY_USER) &&
-	    (rq->data_len & q->dma_pad_mask)) {
-		unsigned int pad_len = (q->dma_pad_mask & ~rq->data_len) + 1;
+	    (blk_rq_bytes(rq) & q->dma_pad_mask)) {
+		unsigned int pad_len =
+			(q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
 
 		sg->length += pad_len;
 		rq->extra_len += pad_len;
@@ -233,8 +206,8 @@
 {
 	int nr_phys_segs = bio_phys_segments(q, bio);
 
-	if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments
-	    || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
+	if (req->nr_phys_segments + nr_phys_segs > queue_max_hw_segments(q) ||
+	    req->nr_phys_segments + nr_phys_segs > queue_max_phys_segments(q)) {
 		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
@@ -255,11 +228,11 @@
 	unsigned short max_sectors;
 
 	if (unlikely(blk_pc_request(req)))
-		max_sectors = q->max_hw_sectors;
+		max_sectors = queue_max_hw_sectors(q);
 	else
-		max_sectors = q->max_sectors;
+		max_sectors = queue_max_sectors(q);
 
-	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
+	if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
 		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
@@ -279,12 +252,12 @@
 	unsigned short max_sectors;
 
 	if (unlikely(blk_pc_request(req)))
-		max_sectors = q->max_hw_sectors;
+		max_sectors = queue_max_hw_sectors(q);
 	else
-		max_sectors = q->max_sectors;
+		max_sectors = queue_max_sectors(q);
 
 
-	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
+	if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
 		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
@@ -315,7 +288,7 @@
 	/*
 	 * Will it become too large?
 	 */
-	if ((req->nr_sectors + next->nr_sectors) > q->max_sectors)
+	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > queue_max_sectors(q))
 		return 0;
 
 	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
@@ -327,10 +300,10 @@
 		total_phys_segments--;
 	}
 
-	if (total_phys_segments > q->max_phys_segments)
+	if (total_phys_segments > queue_max_phys_segments(q))
 		return 0;
 
-	if (total_phys_segments > q->max_hw_segments)
+	if (total_phys_segments > queue_max_hw_segments(q))
 		return 0;
 
 	/* Merge is OK... */
@@ -345,7 +318,7 @@
 		int cpu;
 
 		cpu = part_stat_lock();
-		part = disk_map_sector_rcu(req->rq_disk, req->sector);
+		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 
 		part_round_stats(cpu, part);
 		part_dec_in_flight(part);
@@ -366,7 +339,7 @@
 	/*
 	 * not contiguous
 	 */
-	if (req->sector + req->nr_sectors != next->sector)
+	if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
 		return 0;
 
 	if (rq_data_dir(req) != rq_data_dir(next)
@@ -398,7 +371,7 @@
 	req->biotail->bi_next = next->bio;
 	req->biotail = next->biotail;
 
-	req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
+	req->__data_len += blk_rq_bytes(next);
 
 	elv_merge_requests(q, req, next);
 
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 57af728..d71cedc 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -134,7 +134,7 @@
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
-	blk_queue_hardsect_size(q, 512);
+	blk_queue_logical_block_size(q, 512);
 	blk_queue_dma_alignment(q, 511);
 	blk_queue_congestion_threshold(q);
 	q->nr_batching = BLK_BATCH_REQ;
@@ -179,16 +179,16 @@
 	 */
 	if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
 		dma = 1;
-	q->bounce_pfn = max_low_pfn;
+	q->limits.bounce_pfn = max_low_pfn;
 #else
 	if (b_pfn < blk_max_low_pfn)
 		dma = 1;
-	q->bounce_pfn = b_pfn;
+	q->limits.bounce_pfn = b_pfn;
 #endif
 	if (dma) {
 		init_emergency_isa_pool();
 		q->bounce_gfp = GFP_NOIO | GFP_DMA;
-		q->bounce_pfn = b_pfn;
+		q->limits.bounce_pfn = b_pfn;
 	}
 }
 EXPORT_SYMBOL(blk_queue_bounce_limit);
@@ -211,14 +211,23 @@
 	}
 
 	if (BLK_DEF_MAX_SECTORS > max_sectors)
-		q->max_hw_sectors = q->max_sectors = max_sectors;
+		q->limits.max_hw_sectors = q->limits.max_sectors = max_sectors;
 	else {
-		q->max_sectors = BLK_DEF_MAX_SECTORS;
-		q->max_hw_sectors = max_sectors;
+		q->limits.max_sectors = BLK_DEF_MAX_SECTORS;
+		q->limits.max_hw_sectors = max_sectors;
 	}
 }
 EXPORT_SYMBOL(blk_queue_max_sectors);
 
+void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
+{
+	if (BLK_DEF_MAX_SECTORS > max_sectors)
+		q->limits.max_hw_sectors = BLK_DEF_MAX_SECTORS;
+	else
+		q->limits.max_hw_sectors = max_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_hw_sectors);
+
 /**
  * blk_queue_max_phys_segments - set max phys segments for a request for this queue
  * @q:  the request queue for the device
@@ -238,7 +247,7 @@
 		       __func__, max_segments);
 	}
 
-	q->max_phys_segments = max_segments;
+	q->limits.max_phys_segments = max_segments;
 }
 EXPORT_SYMBOL(blk_queue_max_phys_segments);
 
@@ -262,7 +271,7 @@
 		       __func__, max_segments);
 	}
 
-	q->max_hw_segments = max_segments;
+	q->limits.max_hw_segments = max_segments;
 }
 EXPORT_SYMBOL(blk_queue_max_hw_segments);
 
@@ -283,26 +292,110 @@
 		       __func__, max_size);
 	}
 
-	q->max_segment_size = max_size;
+	q->limits.max_segment_size = max_size;
 }
 EXPORT_SYMBOL(blk_queue_max_segment_size);
 
 /**
- * blk_queue_hardsect_size - set hardware sector size for the queue
+ * blk_queue_logical_block_size - set logical block size for the queue
  * @q:  the request queue for the device
- * @size:  the hardware sector size, in bytes
+ * @size:  the logical block size, in bytes
  *
  * Description:
- *   This should typically be set to the lowest possible sector size
- *   that the hardware can operate on (possible without reverting to
- *   even internal read-modify-write operations). Usually the default
- *   of 512 covers most hardware.
+ *   This should be set to the lowest possible block size that the
+ *   storage device can address.  The default of 512 covers most
+ *   hardware.
  **/
-void blk_queue_hardsect_size(struct request_queue *q, unsigned short size)
+void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)
 {
-	q->hardsect_size = size;
+	q->limits.logical_block_size = size;
+
+	if (q->limits.physical_block_size < size)
+		q->limits.physical_block_size = size;
+
+	if (q->limits.io_min < q->limits.physical_block_size)
+		q->limits.io_min = q->limits.physical_block_size;
 }
-EXPORT_SYMBOL(blk_queue_hardsect_size);
+EXPORT_SYMBOL(blk_queue_logical_block_size);
+
+/**
+ * blk_queue_physical_block_size - set physical block size for the queue
+ * @q:  the request queue for the device
+ * @size:  the physical block size, in bytes
+ *
+ * Description:
+ *   This should be set to the lowest possible sector size that the
+ *   hardware can operate on without reverting to read-modify-write
+ *   operations.
+ */
+void blk_queue_physical_block_size(struct request_queue *q, unsigned short size)
+{
+	q->limits.physical_block_size = size;
+
+	if (q->limits.physical_block_size < q->limits.logical_block_size)
+		q->limits.physical_block_size = q->limits.logical_block_size;
+
+	if (q->limits.io_min < q->limits.physical_block_size)
+		q->limits.io_min = q->limits.physical_block_size;
+}
+EXPORT_SYMBOL(blk_queue_physical_block_size);
+
+/**
+ * blk_queue_alignment_offset - set physical block alignment offset
+ * @q:	the request queue for the device
+ * @offset: alignment offset in bytes
+ *
+ * Description:
+ *   Some devices are naturally misaligned to compensate for things like
+ *   the legacy DOS partition table 63-sector offset.  Low-level drivers
+ *   should call this function for devices whose first sector is not
+ *   naturally aligned.
+ */
+void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
+{
+	q->limits.alignment_offset =
+		offset & (q->limits.physical_block_size - 1);
+	q->limits.misaligned = 0;
+}
+EXPORT_SYMBOL(blk_queue_alignment_offset);
+
+/**
+ * blk_queue_io_min - set minimum request size for the queue
+ * @q:	the request queue for the device
+ * @min:  smallest I/O size in bytes
+ *
+ * Description:
+ *   Some devices have an internal block size bigger than the reported
+ *   hardware sector size.  This function can be used to signal the
+ *   smallest I/O the device can perform without incurring a performance
+ *   penalty.
+ */
+void blk_queue_io_min(struct request_queue *q, unsigned int min)
+{
+	q->limits.io_min = min;
+
+	if (q->limits.io_min < q->limits.logical_block_size)
+		q->limits.io_min = q->limits.logical_block_size;
+
+	if (q->limits.io_min < q->limits.physical_block_size)
+		q->limits.io_min = q->limits.physical_block_size;
+}
+EXPORT_SYMBOL(blk_queue_io_min);
+
+/**
+ * blk_queue_io_opt - set optimal request size for the queue
+ * @q:	the request queue for the device
+ * @opt:  optimal request size in bytes
+ *
+ * Description:
+ *   Drivers can call this function to set the preferred I/O request
+ *   size for devices that report such a value.
+ */
+void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
+{
+	q->limits.io_opt = opt;
+}
+EXPORT_SYMBOL(blk_queue_io_opt);
 
 /*
  * Returns the minimum that is _not_ zero, unless both are zero.
@@ -317,14 +410,27 @@
 void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 {
 	/* zero is "infinity" */
-	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
-	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
-	t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask);
+	t->limits.max_sectors = min_not_zero(queue_max_sectors(t),
+					     queue_max_sectors(b));
 
-	t->max_phys_segments = min_not_zero(t->max_phys_segments, b->max_phys_segments);
-	t->max_hw_segments = min_not_zero(t->max_hw_segments, b->max_hw_segments);
-	t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size);
-	t->hardsect_size = max(t->hardsect_size, b->hardsect_size);
+	t->limits.max_hw_sectors = min_not_zero(queue_max_hw_sectors(t),
+						queue_max_hw_sectors(b));
+
+	t->limits.seg_boundary_mask = min_not_zero(queue_segment_boundary(t),
+						   queue_segment_boundary(b));
+
+	t->limits.max_phys_segments = min_not_zero(queue_max_phys_segments(t),
+						   queue_max_phys_segments(b));
+
+	t->limits.max_hw_segments = min_not_zero(queue_max_hw_segments(t),
+						 queue_max_hw_segments(b));
+
+	t->limits.max_segment_size = min_not_zero(queue_max_segment_size(t),
+						  queue_max_segment_size(b));
+
+	t->limits.logical_block_size = max(queue_logical_block_size(t),
+					   queue_logical_block_size(b));
+
 	if (!t->queue_lock)
 		WARN_ON_ONCE(1);
 	else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
@@ -337,6 +443,109 @@
 EXPORT_SYMBOL(blk_queue_stack_limits);
 
 /**
+ * blk_stack_limits - adjust queue_limits for stacked devices
+ * @t:	the stacking driver limits (top)
+ * @b:  the underlying queue limits (bottom)
+ * @offset:  offset to beginning of data within component device
+ *
+ * Description:
+ *    Merges two queue_limit structs.  Returns 0 if alignment didn't
+ *    change.  Returns -1 if adding the bottom device caused
+ *    misalignment.
+ */
+int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
+		     sector_t offset)
+{
+	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
+	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
+	t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
+
+	t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
+					    b->seg_boundary_mask);
+
+	t->max_phys_segments = min_not_zero(t->max_phys_segments,
+					    b->max_phys_segments);
+
+	t->max_hw_segments = min_not_zero(t->max_hw_segments,
+					  b->max_hw_segments);
+
+	t->max_segment_size = min_not_zero(t->max_segment_size,
+					   b->max_segment_size);
+
+	t->logical_block_size = max(t->logical_block_size,
+				    b->logical_block_size);
+
+	t->physical_block_size = max(t->physical_block_size,
+				     b->physical_block_size);
+
+	t->io_min = max(t->io_min, b->io_min);
+	t->no_cluster |= b->no_cluster;
+
+	/* Bottom device offset aligned? */
+	if (offset &&
+	    (offset & (b->physical_block_size - 1)) != b->alignment_offset) {
+		t->misaligned = 1;
+		return -1;
+	}
+
+	/* If top has no alignment offset, inherit from bottom */
+	if (!t->alignment_offset)
+		t->alignment_offset =
+			b->alignment_offset & (b->physical_block_size - 1);
+
+	/* Top device aligned on logical block boundary? */
+	if (t->alignment_offset & (t->logical_block_size - 1)) {
+		t->misaligned = 1;
+		return -1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(blk_stack_limits);
+
+/**
+ * disk_stack_limits - adjust queue limits for stacked drivers
+ * @disk:  MD/DM gendisk (top)
+ * @bdev:  the underlying block device (bottom)
+ * @offset:  offset to beginning of data within component device
+ *
+ * Description:
+ *    Merges the limits for two queues.  Returns 0 if alignment
+ *    didn't change.  Returns -1 if adding the bottom device caused
+ *    misalignment.
+ */
+void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
+		       sector_t offset)
+{
+	struct request_queue *t = disk->queue;
+	struct request_queue *b = bdev_get_queue(bdev);
+
+	offset += get_start_sect(bdev) << 9;
+
+	if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) {
+		char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];
+
+		disk_name(disk, 0, top);
+		bdevname(bdev, bottom);
+
+		printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
+		       top, bottom);
+	}
+
+	if (!t->queue_lock)
+		WARN_ON_ONCE(1);
+	else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(t->queue_lock, flags);
+		if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
+			queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
+		spin_unlock_irqrestore(t->queue_lock, flags);
+	}
+}
+EXPORT_SYMBOL(disk_stack_limits);
+
+/**
  * blk_queue_dma_pad - set pad mask
  * @q:     the request queue for the device
  * @mask:  pad mask
@@ -396,11 +605,11 @@
 			       dma_drain_needed_fn *dma_drain_needed,
 			       void *buf, unsigned int size)
 {
-	if (q->max_hw_segments < 2 || q->max_phys_segments < 2)
+	if (queue_max_hw_segments(q) < 2 || queue_max_phys_segments(q) < 2)
 		return -EINVAL;
 	/* make room for appending the drain */
-	--q->max_hw_segments;
-	--q->max_phys_segments;
+	blk_queue_max_hw_segments(q, queue_max_hw_segments(q) - 1);
+	blk_queue_max_phys_segments(q, queue_max_phys_segments(q) - 1);
 	q->dma_drain_needed = dma_drain_needed;
 	q->dma_drain_buffer = buf;
 	q->dma_drain_size = size;
@@ -422,7 +631,7 @@
 		       __func__, mask);
 	}
 
-	q->seg_boundary_mask = mask;
+	q->limits.seg_boundary_mask = mask;
 }
 EXPORT_SYMBOL(blk_queue_segment_boundary);
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3ff9bba..b1cd040 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -95,21 +95,36 @@
 
 static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
 {
-	int max_sectors_kb = q->max_sectors >> 1;
+	int max_sectors_kb = queue_max_sectors(q) >> 1;
 
 	return queue_var_show(max_sectors_kb, (page));
 }
 
-static ssize_t queue_hw_sector_size_show(struct request_queue *q, char *page)
+static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(q->hardsect_size, page);
+	return queue_var_show(queue_logical_block_size(q), page);
+}
+
+static ssize_t queue_physical_block_size_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(queue_physical_block_size(q), page);
+}
+
+static ssize_t queue_io_min_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(queue_io_min(q), page);
+}
+
+static ssize_t queue_io_opt_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(queue_io_opt(q), page);
 }
 
 static ssize_t
 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 {
 	unsigned long max_sectors_kb,
-			max_hw_sectors_kb = q->max_hw_sectors >> 1,
+		max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1,
 			page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
 	ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
 
@@ -117,7 +132,7 @@
 		return -EINVAL;
 
 	spin_lock_irq(q->queue_lock);
-	q->max_sectors = max_sectors_kb << 1;
+	blk_queue_max_sectors(q, max_sectors_kb << 1);
 	spin_unlock_irq(q->queue_lock);
 
 	return ret;
@@ -125,7 +140,7 @@
 
 static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
 {
-	int max_hw_sectors_kb = q->max_hw_sectors >> 1;
+	int max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1;
 
 	return queue_var_show(max_hw_sectors_kb, (page));
 }
@@ -249,7 +264,27 @@
 
 static struct queue_sysfs_entry queue_hw_sector_size_entry = {
 	.attr = {.name = "hw_sector_size", .mode = S_IRUGO },
-	.show = queue_hw_sector_size_show,
+	.show = queue_logical_block_size_show,
+};
+
+static struct queue_sysfs_entry queue_logical_block_size_entry = {
+	.attr = {.name = "logical_block_size", .mode = S_IRUGO },
+	.show = queue_logical_block_size_show,
+};
+
+static struct queue_sysfs_entry queue_physical_block_size_entry = {
+	.attr = {.name = "physical_block_size", .mode = S_IRUGO },
+	.show = queue_physical_block_size_show,
+};
+
+static struct queue_sysfs_entry queue_io_min_entry = {
+	.attr = {.name = "minimum_io_size", .mode = S_IRUGO },
+	.show = queue_io_min_show,
+};
+
+static struct queue_sysfs_entry queue_io_opt_entry = {
+	.attr = {.name = "optimal_io_size", .mode = S_IRUGO },
+	.show = queue_io_opt_show,
 };
 
 static struct queue_sysfs_entry queue_nonrot_entry = {
@@ -283,6 +318,10 @@
 	&queue_max_sectors_entry.attr,
 	&queue_iosched_entry.attr,
 	&queue_hw_sector_size_entry.attr,
+	&queue_logical_block_size_entry.attr,
+	&queue_physical_block_size_entry.attr,
+	&queue_io_min_entry.attr,
+	&queue_io_opt_entry.attr,
 	&queue_nonrot_entry.attr,
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
@@ -383,22 +422,26 @@
 int blk_register_queue(struct gendisk *disk)
 {
 	int ret;
+	struct device *dev = disk_to_dev(disk);
 
 	struct request_queue *q = disk->queue;
 
 	if (WARN_ON(!q))
 		return -ENXIO;
 
-	if (!q->request_fn)
-		return 0;
+	ret = blk_trace_init_sysfs(dev);
+	if (ret)
+		return ret;
 
-	ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj),
-			  "%s", "queue");
+	ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue");
 	if (ret < 0)
 		return ret;
 
 	kobject_uevent(&q->kobj, KOBJ_ADD);
 
+	if (!q->request_fn)
+		return 0;
+
 	ret = elv_register_queue(q);
 	if (ret) {
 		kobject_uevent(&q->kobj, KOBJ_REMOVE);
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 3c518e3..2e5cfeb 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -336,7 +336,7 @@
 int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
-	unsigned max_depth, offset;
+	unsigned max_depth;
 	int tag;
 
 	if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
@@ -355,13 +355,16 @@
 	 * to starve sync IO on behalf of flooding async IO.
 	 */
 	max_depth = bqt->max_depth;
-	if (rq_is_sync(rq))
-		offset = 0;
-	else
-		offset = max_depth >> 2;
+	if (!rq_is_sync(rq) && max_depth > 1) {
+		max_depth -= 2;
+		if (!max_depth)
+			max_depth = 1;
+		if (q->in_flight[0] > max_depth)
+			return 1;
+	}
 
 	do {
-		tag = find_next_zero_bit(bqt->tag_map, max_depth, offset);
+		tag = find_first_zero_bit(bqt->tag_map, max_depth);
 		if (tag >= max_depth)
 			return 1;
 
@@ -374,7 +377,7 @@
 	rq->cmd_flags |= REQ_QUEUED;
 	rq->tag = tag;
 	bqt->tag_index[tag] = rq;
-	blkdev_dequeue_request(rq);
+	blk_start_request(rq);
 	list_add(&rq->queuelist, &q->tag_busy_list);
 	return 0;
 }
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 1ec0d50..1ba7e0a 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -122,10 +122,8 @@
 			if (blk_mark_rq_complete(rq))
 				continue;
 			blk_rq_timed_out(rq);
-		} else {
-			if (!next || time_after(next, rq->deadline))
-				next = rq->deadline;
-		}
+		} else if (!next || time_after(next, rq->deadline))
+			next = rq->deadline;
 	}
 
 	/*
@@ -176,16 +174,14 @@
 	BUG_ON(!list_empty(&req->timeout_list));
 	BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
 
-	if (req->timeout)
-		req->deadline = jiffies + req->timeout;
-	else {
-		req->deadline = jiffies + q->rq_timeout;
-		/*
-		 * Some LLDs, like scsi, peek at the timeout to prevent
-		 * a command from being retried forever.
-		 */
+	/*
+	 * Some LLDs, like scsi, peek at the timeout to prevent a
+	 * command from being retried forever.
+	 */
+	if (!req->timeout)
 		req->timeout = q->rq_timeout;
-	}
+
+	req->deadline = jiffies + req->timeout;
 	list_add_tail(&req->timeout_list, &q->timeout_list);
 
 	/*
diff --git a/block/blk.h b/block/blk.h
index 79c85f7..3fae6ad 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -13,6 +13,9 @@
 void init_request_from_bio(struct request *req, struct bio *bio);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 			struct bio *bio);
+int blk_rq_append_bio(struct request_queue *q, struct request *rq,
+		      struct bio *bio);
+void blk_dequeue_request(struct request *rq);
 void __blk_queue_free_tags(struct request_queue *q);
 
 void blk_unplug_work(struct work_struct *work);
@@ -43,6 +46,43 @@
 	clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
 }
 
+/*
+ * Internal elevator interface
+ */
+#define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
+
+static inline struct request *__elv_next_request(struct request_queue *q)
+{
+	struct request *rq;
+
+	while (1) {
+		while (!list_empty(&q->queue_head)) {
+			rq = list_entry_rq(q->queue_head.next);
+			if (blk_do_ordered(q, &rq))
+				return rq;
+		}
+
+		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
+			return NULL;
+	}
+}
+
+static inline void elv_activate_rq(struct request_queue *q, struct request *rq)
+{
+	struct elevator_queue *e = q->elevator;
+
+	if (e->ops->elevator_activate_req_fn)
+		e->ops->elevator_activate_req_fn(q, rq);
+}
+
+static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq)
+{
+	struct elevator_queue *e = q->elevator;
+
+	if (e->ops->elevator_deactivate_req_fn)
+		e->ops->elevator_deactivate_req_fn(q, rq);
+}
+
 #ifdef CONFIG_FAIL_IO_TIMEOUT
 int blk_should_fake_timeout(struct request_queue *);
 ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
@@ -64,7 +104,6 @@
 int attempt_back_merge(struct request_queue *q, struct request *rq);
 int attempt_front_merge(struct request_queue *q, struct request *rq);
 void blk_recalc_rq_segments(struct request *rq);
-void blk_recalc_rq_sectors(struct request *rq, int nsect);
 
 void blk_queue_congestion_threshold(struct request_queue *q);
 
@@ -112,9 +151,17 @@
 #endif
 }
 
+/*
+ * Contribute to IO statistics IFF:
+ *
+ *	a) it's attached to a gendisk, and
+ *	b) the queue had IO stats enabled when this request was started, and
+ *	c) it's a file system request or a discard request
+ */
 static inline int blk_do_io_stat(struct request *rq)
 {
-	return rq->rq_disk && blk_rq_io_stat(rq);
+	return rq->rq_disk && blk_rq_io_stat(rq) &&
+	       (blk_fs_request(rq) || blk_discard_rq(rq));
 }
 
 #endif
diff --git a/block/bsg.c b/block/bsg.c
index 206060e..5358f9a 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -315,6 +315,7 @@
 	blk_put_request(rq);
 	if (next_rq) {
 		blk_rq_unmap_user(next_rq->bio);
+		next_rq->bio = NULL;
 		blk_put_request(next_rq);
 	}
 	return ERR_PTR(ret);
@@ -445,14 +446,15 @@
 	}
 
 	if (rq->next_rq) {
-		hdr->dout_resid = rq->data_len;
-		hdr->din_resid = rq->next_rq->data_len;
+		hdr->dout_resid = rq->resid_len;
+		hdr->din_resid = rq->next_rq->resid_len;
 		blk_rq_unmap_user(bidi_bio);
+		rq->next_rq->bio = NULL;
 		blk_put_request(rq->next_rq);
 	} else if (rq_data_dir(rq) == READ)
-		hdr->din_resid = rq->data_len;
+		hdr->din_resid = rq->resid_len;
 	else
-		hdr->dout_resid = rq->data_len;
+		hdr->dout_resid = rq->resid_len;
 
 	/*
 	 * If the request generated a negative error number, return it
@@ -466,6 +468,7 @@
 	blk_rq_unmap_user(bio);
 	if (rq->cmd != rq->__cmd)
 		kfree(rq->cmd);
+	rq->bio = NULL;
 	blk_put_request(rq);
 
 	return ret;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index a55a9bd..ef2f72d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -349,8 +349,8 @@
 	else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
 		return rq2;
 
-	s1 = rq1->sector;
-	s2 = rq2->sector;
+	s1 = blk_rq_pos(rq1);
+	s2 = blk_rq_pos(rq2);
 
 	last = cfqd->last_position;
 
@@ -579,9 +579,9 @@
 		 * Sort strictly based on sector.  Smallest to the left,
 		 * largest to the right.
 		 */
-		if (sector > cfqq->next_rq->sector)
+		if (sector > blk_rq_pos(cfqq->next_rq))
 			n = &(*p)->rb_right;
-		else if (sector < cfqq->next_rq->sector)
+		else if (sector < blk_rq_pos(cfqq->next_rq))
 			n = &(*p)->rb_left;
 		else
 			break;
@@ -611,8 +611,8 @@
 		return;
 
 	cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio];
-	__cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, cfqq->next_rq->sector,
-					 &parent, &p);
+	__cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root,
+				      blk_rq_pos(cfqq->next_rq), &parent, &p);
 	if (!__cfqq) {
 		rb_link_node(&cfqq->p_node, parent, p);
 		rb_insert_color(&cfqq->p_node, cfqq->p_root);
@@ -760,7 +760,7 @@
 	cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
 						cfqd->rq_in_driver);
 
-	cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors;
+	cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
 }
 
 static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
@@ -949,10 +949,10 @@
 static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
 					  struct request *rq)
 {
-	if (rq->sector >= cfqd->last_position)
-		return rq->sector - cfqd->last_position;
+	if (blk_rq_pos(rq) >= cfqd->last_position)
+		return blk_rq_pos(rq) - cfqd->last_position;
 	else
-		return cfqd->last_position - rq->sector;
+		return cfqd->last_position - blk_rq_pos(rq);
 }
 
 #define CIC_SEEK_THR	8 * 1024
@@ -996,7 +996,7 @@
 	if (cfq_rq_close(cfqd, __cfqq->next_rq))
 		return __cfqq;
 
-	if (__cfqq->next_rq->sector < sector)
+	if (blk_rq_pos(__cfqq->next_rq) < sector)
 		node = rb_next(&__cfqq->p_node);
 	else
 		node = rb_prev(&__cfqq->p_node);
@@ -1282,7 +1282,7 @@
 	if (!cfqd->active_cic) {
 		struct cfq_io_context *cic = RQ_CIC(rq);
 
-		atomic_inc(&cic->ioc->refcount);
+		atomic_long_inc(&cic->ioc->refcount);
 		cfqd->active_cic = cic;
 	}
 }
@@ -1918,10 +1918,10 @@
 
 	if (!cic->last_request_pos)
 		sdist = 0;
-	else if (cic->last_request_pos < rq->sector)
-		sdist = rq->sector - cic->last_request_pos;
+	else if (cic->last_request_pos < blk_rq_pos(rq))
+		sdist = blk_rq_pos(rq) - cic->last_request_pos;
 	else
-		sdist = cic->last_request_pos - rq->sector;
+		sdist = cic->last_request_pos - blk_rq_pos(rq);
 
 	/*
 	 * Don't allow the seek distance to get too large from the
@@ -2071,7 +2071,7 @@
 	cfq_update_io_seektime(cfqd, cic, rq);
 	cfq_update_idle_window(cfqd, cfqq, cic);
 
-	cic->last_request_pos = rq->sector + rq->nr_sectors;
+	cic->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
 
 	if (cfqq == cfqd->active_queue) {
 		/*
@@ -2088,7 +2088,7 @@
 			if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
 			    cfqd->busy_queues > 1) {
 				del_timer(&cfqd->idle_slice_timer);
-				blk_start_queueing(cfqd->queue);
+			__blk_run_queue(cfqd->queue);
 			}
 			cfq_mark_cfqq_must_dispatch(cfqq);
 		}
@@ -2100,7 +2100,7 @@
 		 * this new queue is RT and the current one is BE
 		 */
 		cfq_preempt_queue(cfqd, cfqq);
-		blk_start_queueing(cfqd->queue);
+		__blk_run_queue(cfqd->queue);
 	}
 }
 
@@ -2345,7 +2345,7 @@
 	struct request_queue *q = cfqd->queue;
 
 	spin_lock_irq(q->queue_lock);
-	blk_start_queueing(q);
+	__blk_run_queue(cfqd->queue);
 	spin_unlock_irq(q->queue_lock);
 }
 
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index f87615d..7865a34 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -568,7 +568,7 @@
 	memcpy(&buts.name, &cbuts.name, 32);
 
 	mutex_lock(&bdev->bd_mutex);
-	ret = do_blk_trace_setup(q, b, bdev->bd_dev, &buts);
+	ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts);
 	mutex_unlock(&bdev->bd_mutex);
 	if (ret)
 		return ret;
@@ -763,10 +763,10 @@
 	case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */
 		return compat_put_int(arg, block_size(bdev));
 	case BLKSSZGET: /* get block device hardware sector size */
-		return compat_put_int(arg, bdev_hardsect_size(bdev));
+		return compat_put_int(arg, bdev_logical_block_size(bdev));
 	case BLKSECTGET:
 		return compat_put_ushort(arg,
-					 bdev_get_queue(bdev)->max_sectors);
+					 queue_max_sectors(bdev_get_queue(bdev)));
 	case BLKRASET: /* compatible, but no compat_ptr (!) */
 	case BLKFRASET:
 		if (!capable(CAP_SYS_ADMIN))
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index c4d991d..b547cbc 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -138,7 +138,7 @@
 
 		__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
 		if (__rq) {
-			BUG_ON(sector != __rq->sector);
+			BUG_ON(sector != blk_rq_pos(__rq));
 
 			if (elv_rq_merge_ok(__rq, bio)) {
 				ret = ELEVATOR_FRONT_MERGE;
diff --git a/block/elevator.c b/block/elevator.c
index 7073a90..ca86192 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -33,17 +33,16 @@
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
 #include <linux/hash.h>
 #include <linux/uaccess.h>
 
+#include <trace/events/block.h>
+
 #include "blk.h"
 
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
 
-DEFINE_TRACE(block_rq_abort);
-
 /*
  * Merge hash stuff.
  */
@@ -52,11 +51,7 @@
 #define ELV_HASH_FN(sec)	\
 		(hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
 #define ELV_HASH_ENTRIES	(1 << elv_hash_shift)
-#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
-#define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
-
-DEFINE_TRACE(block_rq_insert);
-DEFINE_TRACE(block_rq_issue);
+#define rq_hash_key(rq)		(blk_rq_pos(rq) + blk_rq_sectors(rq))
 
 /*
  * Query io scheduler to see if the current process issuing bio may be
@@ -120,9 +115,9 @@
 	 * we can merge and sequence is ok, check if it's possible
 	 */
 	if (elv_rq_merge_ok(__rq, bio)) {
-		if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
+		if (blk_rq_pos(__rq) + blk_rq_sectors(__rq) == bio->bi_sector)
 			ret = ELEVATOR_BACK_MERGE;
-		else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
+		else if (blk_rq_pos(__rq) - bio_sectors(bio) == bio->bi_sector)
 			ret = ELEVATOR_FRONT_MERGE;
 	}
 
@@ -310,22 +305,6 @@
 }
 EXPORT_SYMBOL(elevator_exit);
 
-static void elv_activate_rq(struct request_queue *q, struct request *rq)
-{
-	struct elevator_queue *e = q->elevator;
-
-	if (e->ops->elevator_activate_req_fn)
-		e->ops->elevator_activate_req_fn(q, rq);
-}
-
-static void elv_deactivate_rq(struct request_queue *q, struct request *rq)
-{
-	struct elevator_queue *e = q->elevator;
-
-	if (e->ops->elevator_deactivate_req_fn)
-		e->ops->elevator_deactivate_req_fn(q, rq);
-}
-
 static inline void __elv_rqhash_del(struct request *rq)
 {
 	hlist_del_init(&rq->hash);
@@ -387,9 +366,9 @@
 		parent = *p;
 		__rq = rb_entry(parent, struct request, rb_node);
 
-		if (rq->sector < __rq->sector)
+		if (blk_rq_pos(rq) < blk_rq_pos(__rq))
 			p = &(*p)->rb_left;
-		else if (rq->sector > __rq->sector)
+		else if (blk_rq_pos(rq) > blk_rq_pos(__rq))
 			p = &(*p)->rb_right;
 		else
 			return __rq;
@@ -417,9 +396,9 @@
 	while (n) {
 		rq = rb_entry(n, struct request, rb_node);
 
-		if (sector < rq->sector)
+		if (sector < blk_rq_pos(rq))
 			n = n->rb_left;
-		else if (sector > rq->sector)
+		else if (sector > blk_rq_pos(rq))
 			n = n->rb_right;
 		else
 			return rq;
@@ -458,14 +437,14 @@
 			break;
 		if (pos->cmd_flags & stop_flags)
 			break;
-		if (rq->sector >= boundary) {
-			if (pos->sector < boundary)
+		if (blk_rq_pos(rq) >= boundary) {
+			if (blk_rq_pos(pos) < boundary)
 				continue;
 		} else {
-			if (pos->sector >= boundary)
+			if (blk_rq_pos(pos) >= boundary)
 				break;
 		}
-		if (rq->sector >= pos->sector)
+		if (blk_rq_pos(rq) >= blk_rq_pos(pos))
 			break;
 	}
 
@@ -563,7 +542,7 @@
 	 * in_flight count again
 	 */
 	if (blk_account_rq(rq)) {
-		q->in_flight--;
+		q->in_flight[rq_is_sync(rq)]--;
 		if (blk_sorted_rq(rq))
 			elv_deactivate_rq(q, rq);
 	}
@@ -592,6 +571,9 @@
  */
 void elv_quiesce_start(struct request_queue *q)
 {
+	if (!q->elevator)
+		return;
+
 	queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
 
 	/*
@@ -599,7 +581,7 @@
 	 */
 	elv_drain_elevator(q);
 	while (q->rq.elvpriv) {
-		blk_start_queueing(q);
+		__blk_run_queue(q);
 		spin_unlock_irq(q->queue_lock);
 		msleep(10);
 		spin_lock_irq(q->queue_lock);
@@ -643,8 +625,7 @@
 		 *   with anything.  There's no point in delaying queue
 		 *   processing.
 		 */
-		blk_remove_plug(q);
-		blk_start_queueing(q);
+		__blk_run_queue(q);
 		break;
 
 	case ELEVATOR_INSERT_SORT:
@@ -703,7 +684,7 @@
 
 	if (unplug_it && blk_queue_plugged(q)) {
 		int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
-			- q->in_flight;
+				- queue_in_flight(q);
 
 		if (nrq >= q->unplug_thresh)
 			__generic_unplug_device(q);
@@ -759,117 +740,6 @@
 }
 EXPORT_SYMBOL(elv_add_request);
 
-static inline struct request *__elv_next_request(struct request_queue *q)
-{
-	struct request *rq;
-
-	while (1) {
-		while (!list_empty(&q->queue_head)) {
-			rq = list_entry_rq(q->queue_head.next);
-			if (blk_do_ordered(q, &rq))
-				return rq;
-		}
-
-		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
-			return NULL;
-	}
-}
-
-struct request *elv_next_request(struct request_queue *q)
-{
-	struct request *rq;
-	int ret;
-
-	while ((rq = __elv_next_request(q)) != NULL) {
-		if (!(rq->cmd_flags & REQ_STARTED)) {
-			/*
-			 * This is the first time the device driver
-			 * sees this request (possibly after
-			 * requeueing).  Notify IO scheduler.
-			 */
-			if (blk_sorted_rq(rq))
-				elv_activate_rq(q, rq);
-
-			/*
-			 * just mark as started even if we don't start
-			 * it, a request that has been delayed should
-			 * not be passed by new incoming requests
-			 */
-			rq->cmd_flags |= REQ_STARTED;
-			trace_block_rq_issue(q, rq);
-		}
-
-		if (!q->boundary_rq || q->boundary_rq == rq) {
-			q->end_sector = rq_end_sector(rq);
-			q->boundary_rq = NULL;
-		}
-
-		if (rq->cmd_flags & REQ_DONTPREP)
-			break;
-
-		if (q->dma_drain_size && rq->data_len) {
-			/*
-			 * make sure space for the drain appears we
-			 * know we can do this because max_hw_segments
-			 * has been adjusted to be one fewer than the
-			 * device can handle
-			 */
-			rq->nr_phys_segments++;
-		}
-
-		if (!q->prep_rq_fn)
-			break;
-
-		ret = q->prep_rq_fn(q, rq);
-		if (ret == BLKPREP_OK) {
-			break;
-		} else if (ret == BLKPREP_DEFER) {
-			/*
-			 * the request may have been (partially) prepped.
-			 * we need to keep this request in the front to
-			 * avoid resource deadlock.  REQ_STARTED will
-			 * prevent other fs requests from passing this one.
-			 */
-			if (q->dma_drain_size && rq->data_len &&
-			    !(rq->cmd_flags & REQ_DONTPREP)) {
-				/*
-				 * remove the space for the drain we added
-				 * so that we don't add it again
-				 */
-				--rq->nr_phys_segments;
-			}
-
-			rq = NULL;
-			break;
-		} else if (ret == BLKPREP_KILL) {
-			rq->cmd_flags |= REQ_QUIET;
-			__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
-		} else {
-			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
-			break;
-		}
-	}
-
-	return rq;
-}
-EXPORT_SYMBOL(elv_next_request);
-
-void elv_dequeue_request(struct request_queue *q, struct request *rq)
-{
-	BUG_ON(list_empty(&rq->queuelist));
-	BUG_ON(ELV_ON_HASH(rq));
-
-	list_del_init(&rq->queuelist);
-
-	/*
-	 * the time frame between a request being removed from the lists
-	 * and to it is freed is accounted as io that is in progress at
-	 * the driver side.
-	 */
-	if (blk_account_rq(rq))
-		q->in_flight++;
-}
-
 int elv_queue_empty(struct request_queue *q)
 {
 	struct elevator_queue *e = q->elevator;
@@ -939,7 +809,12 @@
 		rq = list_entry_rq(q->queue_head.next);
 		rq->cmd_flags |= REQ_QUIET;
 		trace_block_rq_abort(q, rq);
-		__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
+		/*
+		 * Mark this request as started so we don't trigger
+		 * any debug logic in the end I/O path.
+		 */
+		blk_start_request(rq);
+		__blk_end_request_all(rq, -EIO);
 	}
 }
 EXPORT_SYMBOL(elv_abort_queue);
@@ -952,7 +827,7 @@
 	 * request is released from the driver, io must be done
 	 */
 	if (blk_account_rq(rq)) {
-		q->in_flight--;
+		q->in_flight[rq_is_sync(rq)]--;
 		if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
 			e->ops->elevator_completed_req_fn(q, rq);
 	}
@@ -967,11 +842,11 @@
 		if (!list_empty(&q->queue_head))
 			next = list_entry_rq(q->queue_head.next);
 
-		if (!q->in_flight &&
+		if (!queue_in_flight(q) &&
 		    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
 		    (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
 			blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
-			blk_start_queueing(q);
+			__blk_run_queue(q);
 		}
 	}
 }
@@ -1179,6 +1054,9 @@
 	char elevator_name[ELV_NAME_MAX];
 	struct elevator_type *e;
 
+	if (!q->elevator)
+		return count;
+
 	strlcpy(elevator_name, name, sizeof(elevator_name));
 	strstrip(elevator_name);
 
@@ -1202,10 +1080,15 @@
 ssize_t elv_iosched_show(struct request_queue *q, char *name)
 {
 	struct elevator_queue *e = q->elevator;
-	struct elevator_type *elv = e->elevator_type;
+	struct elevator_type *elv;
 	struct elevator_type *__e;
 	int len = 0;
 
+	if (!q->elevator)
+		return sprintf(name, "none\n");
+
+	elv = e->elevator_type;
+
 	spin_lock(&elv_list_lock);
 	list_for_each_entry(__e, &elv_list, list) {
 		if (!strcmp(elv->elevator_name, __e->elevator_name))
diff --git a/block/genhd.c b/block/genhd.c
index 1a4916e..fe7ccc0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -852,11 +852,21 @@
 	return sprintf(buf, "%x\n", disk->flags);
 }
 
+static ssize_t disk_alignment_offset_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
+}
+
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
 static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
+static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -875,6 +885,7 @@
 	&dev_attr_removable.attr,
 	&dev_attr_ro.attr,
 	&dev_attr_size.attr,
+	&dev_attr_alignment_offset.attr,
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
diff --git a/block/ioctl.c b/block/ioctl.c
index ad474d4..500e4c7 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -152,10 +152,10 @@
 		bio->bi_private = &wait;
 		bio->bi_sector = start;
 
-		if (len > q->max_hw_sectors) {
-			bio->bi_size = q->max_hw_sectors << 9;
-			len -= q->max_hw_sectors;
-			start += q->max_hw_sectors;
+		if (len > queue_max_hw_sectors(q)) {
+			bio->bi_size = queue_max_hw_sectors(q) << 9;
+			len -= queue_max_hw_sectors(q);
+			start += queue_max_hw_sectors(q);
 		} else {
 			bio->bi_size = len << 9;
 			len = 0;
@@ -311,9 +311,9 @@
 	case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */
 		return put_int(arg, block_size(bdev));
 	case BLKSSZGET: /* get block device hardware sector size */
-		return put_int(arg, bdev_hardsect_size(bdev));
+		return put_int(arg, bdev_logical_block_size(bdev));
 	case BLKSECTGET:
-		return put_ushort(arg, bdev_get_queue(bdev)->max_sectors);
+		return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
 	case BLKRASET:
 	case BLKFRASET:
 		if(!capable(CAP_SYS_ADMIN))
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 82a0ca2..5f8e798 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -75,7 +75,7 @@
 
 static int sg_get_reserved_size(struct request_queue *q, int __user *p)
 {
-	unsigned val = min(q->sg_reserved_size, q->max_sectors << 9);
+	unsigned val = min(q->sg_reserved_size, queue_max_sectors(q) << 9);
 
 	return put_user(val, p);
 }
@@ -89,8 +89,8 @@
 
 	if (size < 0)
 		return -EINVAL;
-	if (size > (q->max_sectors << 9))
-		size = q->max_sectors << 9;
+	if (size > (queue_max_sectors(q) << 9))
+		size = queue_max_sectors(q) << 9;
 
 	q->sg_reserved_size = size;
 	return 0;
@@ -230,7 +230,7 @@
 	hdr->info = 0;
 	if (hdr->masked_status || hdr->host_status || hdr->driver_status)
 		hdr->info |= SG_INFO_CHECK;
-	hdr->resid = rq->data_len;
+	hdr->resid = rq->resid_len;
 	hdr->sb_len_wr = 0;
 
 	if (rq->sense_len && hdr->sbp) {
@@ -264,7 +264,7 @@
 	if (hdr->cmd_len > BLK_MAX_CDB)
 		return -EINVAL;
 
-	if (hdr->dxfer_len > (q->max_hw_sectors << 9))
+	if (hdr->dxfer_len > (queue_max_hw_sectors(q) << 9))
 		return -EIO;
 
 	if (hdr->dxfer_len)
@@ -500,9 +500,6 @@
 
 	rq = blk_get_request(q, WRITE, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_BLOCK_PC;
-	rq->data = NULL;
-	rq->data_len = 0;
-	rq->extra_len = 0;
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
 	rq->cmd[0] = cmd;
 	rq->cmd[4] = data;
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 51b9f82..2faa9e2 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -401,7 +401,8 @@
 		/* Interrupt Line values above 0xF are forbidden */
 		if (dev->irq > 0 && (dev->irq <= 0xF)) {
 			printk(" - using IRQ %d\n", dev->irq);
-			acpi_register_gsi(dev->irq, ACPI_LEVEL_SENSITIVE,
+			acpi_register_gsi(&dev->dev, dev->irq,
+					  ACPI_LEVEL_SENSITIVE,
 					  ACPI_ACTIVE_LOW);
 			return 0;
 		} else {
@@ -410,7 +411,7 @@
 		}
 	}
 
-	rc = acpi_register_gsi(gsi, triggering, polarity);
+	rc = acpi_register_gsi(&dev->dev, gsi, triggering, polarity);
 	if (rc < 0) {
 		dev_warn(&dev->dev, "PCI INT %c: failed to register GSI\n",
 			 pin_name(pin));
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 45ad328..23f0fb8 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -844,7 +844,7 @@
 	if (!pr)
 		return -ENOMEM;
 
-	if (!alloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) {
+	if (!zalloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) {
 		kfree(pr);
 		return -ENOMEM;
 	}
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 08186ec..15a2303 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -77,8 +77,6 @@
 			      size_t size);
 static ssize_t ahci_transmit_led_message(struct ata_port *ap, u32 state,
 					ssize_t size);
-#define MAX_SLOTS 8
-#define MAX_RETRY 15
 
 enum {
 	AHCI_PCI_BAR		= 5,
@@ -220,6 +218,7 @@
 	AHCI_HFLAG_NO_HOTPLUG		= (1 << 7), /* ignore PxSERR.DIAG.N */
 	AHCI_HFLAG_SECT255		= (1 << 8), /* max 255 sectors */
 	AHCI_HFLAG_YES_NCQ		= (1 << 9), /* force NCQ cap on */
+	AHCI_HFLAG_NO_SUSPEND		= (1 << 10), /* don't suspend */
 
 	/* ap->flags bits */
 
@@ -230,6 +229,10 @@
 
 	ICH_MAP				= 0x90, /* ICH MAP register */
 
+	/* em constants */
+	EM_MAX_SLOTS			= 8,
+	EM_MAX_RETRY			= 5,
+
 	/* em_ctl bits */
 	EM_CTL_RST			= (1 << 9), /* Reset */
 	EM_CTL_TM			= (1 << 8), /* Transmit Message */
@@ -281,8 +284,8 @@
 	unsigned int		ncq_saw_dmas:1;
 	unsigned int		ncq_saw_sdb:1;
 	u32 			intr_mask;	/* interrupts to enable */
-	struct ahci_em_priv	em_priv[MAX_SLOTS];/* enclosure management info
-					 	 * per PM slot */
+	/* enclosure management info per PM slot */
+	struct ahci_em_priv	em_priv[EM_MAX_SLOTS];
 };
 
 static int ahci_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
@@ -312,7 +315,6 @@
 static void ahci_post_internal_cmd(struct ata_queued_cmd *qc);
 static int ahci_port_resume(struct ata_port *ap);
 static void ahci_dev_config(struct ata_device *dev);
-static unsigned int ahci_fill_sg(struct ata_queued_cmd *qc, void *cmd_tbl);
 static void ahci_fill_cmd_slot(struct ahci_port_priv *pp, unsigned int tag,
 			       u32 opts);
 #ifdef CONFIG_PM
@@ -403,14 +405,14 @@
 #define AHCI_HFLAGS(flags)	.private_data	= (void *)(flags)
 
 static const struct ata_port_info ahci_port_info[] = {
-	/* board_ahci */
+	[board_ahci] =
 	{
 		.flags		= AHCI_FLAG_COMMON,
 		.pio_mask	= ATA_PIO4,
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_ops,
 	},
-	/* board_ahci_vt8251 */
+	[board_ahci_vt8251] =
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_NO_NCQ | AHCI_HFLAG_NO_PMP),
 		.flags		= AHCI_FLAG_COMMON,
@@ -418,7 +420,7 @@
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_vt8251_ops,
 	},
-	/* board_ahci_ign_iferr */
+	[board_ahci_ign_iferr] =
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_IGN_IRQ_IF_ERR),
 		.flags		= AHCI_FLAG_COMMON,
@@ -426,17 +428,16 @@
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_ops,
 	},
-	/* board_ahci_sb600 */
+	[board_ahci_sb600] =
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_IGN_SERR_INTERNAL |
-				 AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI |
-				 AHCI_HFLAG_SECT255),
+				 AHCI_HFLAG_NO_MSI | AHCI_HFLAG_SECT255),
 		.flags		= AHCI_FLAG_COMMON,
 		.pio_mask	= ATA_PIO4,
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_sb600_ops,
 	},
-	/* board_ahci_mv */
+	[board_ahci_mv] =
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_NO_NCQ | AHCI_HFLAG_NO_MSI |
 				 AHCI_HFLAG_MV_PATA | AHCI_HFLAG_NO_PMP),
@@ -446,7 +447,7 @@
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_ops,
 	},
-	/* board_ahci_sb700, for SB700 and SB800 */
+	[board_ahci_sb700] =	/* for SB700 and SB800 */
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_IGN_SERR_INTERNAL),
 		.flags		= AHCI_FLAG_COMMON,
@@ -454,7 +455,7 @@
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_sb600_ops,
 	},
-	/* board_ahci_mcp65 */
+	[board_ahci_mcp65] =
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_YES_NCQ),
 		.flags		= AHCI_FLAG_COMMON,
@@ -462,7 +463,7 @@
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_ops,
 	},
-	/* board_ahci_nopmp */
+	[board_ahci_nopmp] =
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_NO_PMP),
 		.flags		= AHCI_FLAG_COMMON,
@@ -1140,12 +1141,12 @@
 			emp = &pp->em_priv[link->pmp];
 
 			/* EM Transmit bit maybe busy during init */
-			for (i = 0; i < MAX_RETRY; i++) {
+			for (i = 0; i < EM_MAX_RETRY; i++) {
 				rc = ahci_transmit_led_message(ap,
 							       emp->led_state,
 							       4);
 				if (rc == -EBUSY)
-					udelay(100);
+					msleep(1);
 				else
 					break;
 			}
@@ -1339,7 +1340,7 @@
 
 	/* get the slot number from the message */
 	pmp = (state & EM_MSG_LED_PMP_SLOT) >> 8;
-	if (pmp < MAX_SLOTS)
+	if (pmp < EM_MAX_SLOTS)
 		emp = &pp->em_priv[pmp];
 	else
 		return -EINVAL;
@@ -1407,7 +1408,7 @@
 
 	/* get the slot number from the message */
 	pmp = (state & EM_MSG_LED_PMP_SLOT) >> 8;
-	if (pmp < MAX_SLOTS)
+	if (pmp < EM_MAX_SLOTS)
 		emp = &pp->em_priv[pmp];
 	else
 		return -EINVAL;
@@ -2316,9 +2317,17 @@
 static int ahci_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg)
 {
 	struct ata_host *host = dev_get_drvdata(&pdev->dev);
+	struct ahci_host_priv *hpriv = host->private_data;
 	void __iomem *mmio = host->iomap[AHCI_PCI_BAR];
 	u32 ctl;
 
+	if (mesg.event & PM_EVENT_SUSPEND &&
+	    hpriv->flags & AHCI_HFLAG_NO_SUSPEND) {
+		dev_printk(KERN_ERR, &pdev->dev,
+			   "BIOS update required for suspend/resume\n");
+		return -EIO;
+	}
+
 	if (mesg.event & PM_EVENT_SLEEP) {
 		/* AHCI spec rev1.1 section 8.3.3:
 		 * Software must disable interrupts prior to requesting a
@@ -2575,6 +2584,51 @@
 	}
 }
 
+/*
+ * SB600 ahci controller on ASUS M2A-VM can't do 64bit DMA with older
+ * BIOS.  The oldest version known to be broken is 0901 and working is
+ * 1501 which was released on 2007-10-26.  Force 32bit DMA on anything
+ * older than 1501.  Please read bko#9412 for more info.
+ */
+static bool ahci_asus_m2a_vm_32bit_only(struct pci_dev *pdev)
+{
+	static const struct dmi_system_id sysids[] = {
+		{
+			.ident = "ASUS M2A-VM",
+			.matches = {
+				DMI_MATCH(DMI_BOARD_VENDOR,
+					  "ASUSTeK Computer INC."),
+				DMI_MATCH(DMI_BOARD_NAME, "M2A-VM"),
+			},
+		},
+		{ }
+	};
+	const char *cutoff_mmdd = "10/26";
+	const char *date;
+	int year;
+
+	if (pdev->bus->number != 0 || pdev->devfn != PCI_DEVFN(0x12, 0) ||
+	    !dmi_check_system(sysids))
+		return false;
+
+	/*
+	 * Argh.... both version and date are free form strings.
+	 * Let's hope they're using the same date format across
+	 * different versions.
+	 */
+	date = dmi_get_system_info(DMI_BIOS_DATE);
+	year = dmi_get_year(DMI_BIOS_DATE);
+	if (date && strlen(date) >= 10 && date[2] == '/' && date[5] == '/' &&
+	    (year > 2007 ||
+	     (year == 2007 && strncmp(date, cutoff_mmdd, 5) >= 0)))
+		return false;
+
+	dev_printk(KERN_WARNING, &pdev->dev, "ASUS M2A-VM: BIOS too old, "
+		   "forcing 32bit DMA, update BIOS\n");
+
+	return true;
+}
+
 static bool ahci_broken_system_poweroff(struct pci_dev *pdev)
 {
 	static const struct dmi_system_id broken_systems[] = {
@@ -2610,6 +2664,63 @@
 	return false;
 }
 
+static bool ahci_broken_suspend(struct pci_dev *pdev)
+{
+	static const struct dmi_system_id sysids[] = {
+		/*
+		 * On HP dv[4-6] and HDX18 with earlier BIOSen, link
+		 * to the harddisk doesn't become online after
+		 * resuming from STR.  Warn and fail suspend.
+		 */
+		{
+			.ident = "dv4",
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+				DMI_MATCH(DMI_PRODUCT_NAME,
+					  "HP Pavilion dv4 Notebook PC"),
+			},
+			.driver_data = "F.30", /* cutoff BIOS version */
+		},
+		{
+			.ident = "dv5",
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+				DMI_MATCH(DMI_PRODUCT_NAME,
+					  "HP Pavilion dv5 Notebook PC"),
+			},
+			.driver_data = "F.16", /* cutoff BIOS version */
+		},
+		{
+			.ident = "dv6",
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+				DMI_MATCH(DMI_PRODUCT_NAME,
+					  "HP Pavilion dv6 Notebook PC"),
+			},
+			.driver_data = "F.21",	/* cutoff BIOS version */
+		},
+		{
+			.ident = "HDX18",
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+				DMI_MATCH(DMI_PRODUCT_NAME,
+					  "HP HDX18 Notebook PC"),
+			},
+			.driver_data = "F.23",	/* cutoff BIOS version */
+		},
+		{ }	/* terminate list */
+	};
+	const struct dmi_system_id *dmi = dmi_first_match(sysids);
+	const char *ver;
+
+	if (!dmi || pdev->bus->number || pdev->devfn != PCI_DEVFN(0x1f, 2))
+		return false;
+
+	ver = dmi_get_system_info(DMI_BIOS_VERSION);
+
+	return !ver || strcmp(ver, dmi->driver_data) < 0;
+}
+
 static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	static int printed_version;
@@ -2678,6 +2789,10 @@
 	if (board_id == board_ahci_sb700 && pdev->revision >= 0x40)
 		hpriv->flags &= ~AHCI_HFLAG_IGN_SERR_INTERNAL;
 
+	/* apply ASUS M2A_VM quirk */
+	if (ahci_asus_m2a_vm_32bit_only(pdev))
+		hpriv->flags |= AHCI_HFLAG_32BIT_ONLY;
+
 	if (!(hpriv->flags & AHCI_HFLAG_NO_MSI))
 		pci_enable_msi(pdev);
 
@@ -2715,6 +2830,12 @@
 			"quirky BIOS, skipping spindown on poweroff\n");
 	}
 
+	if (ahci_broken_suspend(pdev)) {
+		hpriv->flags |= AHCI_HFLAG_NO_SUSPEND;
+		dev_printk(KERN_WARNING, &pdev->dev,
+			   "BIOS update required for suspend/resume\n");
+	}
+
 	/* CAP.NP sometimes indicate the index of the last enabled
 	 * port, at other times, that of the last possible port, so
 	 * determining the maximum port number requires looking at
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index d51a17c..d0a14cf 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -223,10 +223,8 @@
 	/* ICH8 Mobile PATA Controller */
 	{ 0x8086, 0x2850, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 },
 
-	/* NOTE: The following PCI ids must be kept in sync with the
-	 * list in drivers/pci/quirks.c.
-	 */
-
+	/* SATA ports */
+	
 	/* 82801EB (ICH5) */
 	{ 0x8086, 0x24d1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich5_sata },
 	/* 82801EB (ICH5) */
@@ -1455,6 +1453,15 @@
 			/* PCI slot number of the controller */
 			.driver_data = (void *)0x1FUL,
 		},
+		{
+			.ident = "HP Compaq nc6000",
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+				DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nc6000"),
+			},
+			/* PCI slot number of the controller */
+			.driver_data = (void *)0x1FUL,
+		},
 
 		{ }	/* terminate list */
 	};
@@ -1500,8 +1507,8 @@
 		dev_printk(KERN_DEBUG, &pdev->dev,
 			   "version " DRV_VERSION "\n");
 
-	/* no hotplugging support (FIXME) */
-	if (!in_module_init)
+	/* no hotplugging support for later devices (FIXME) */
+	if (!in_module_init && ent->driver_data >= ich5_sata)
 		return -ENODEV;
 
 	if (piix_broken_system_poweroff(pdev)) {
@@ -1582,6 +1589,7 @@
 		host->ports[1]->mwdma_mask = 0;
 		host->ports[1]->udma_mask = 0;
 	}
+	host->flags |= ATA_HOST_PARALLEL_SCAN;
 
 	pci_set_master(pdev);
 	return ata_pci_sff_activate_host(host, ata_sff_interrupt, &piix_sht);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index c924230..ca4d208 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5031,7 +5031,6 @@
 {
 	int nr_done = 0;
 	u32 done_mask;
-	int i;
 
 	done_mask = ap->qc_active ^ qc_active;
 
@@ -5041,16 +5040,16 @@
 		return -EINVAL;
 	}
 
-	for (i = 0; i < ATA_MAX_QUEUE; i++) {
+	while (done_mask) {
 		struct ata_queued_cmd *qc;
+		unsigned int tag = __ffs(done_mask);
 
-		if (!(done_mask & (1 << i)))
-			continue;
-
-		if ((qc = ata_qc_from_tag(ap, i))) {
+		qc = ata_qc_from_tag(ap, tag);
+		if (qc) {
 			ata_qc_complete(qc);
 			nr_done++;
 		}
+		done_mask &= ~(1 << tag);
 	}
 
 	return nr_done;
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 3423160..d0dfeef 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1084,7 +1084,7 @@
 	if (likely(!blk_pc_request(rq)))
 		return 0;
 
-	if (!rq->data_len || (rq->cmd_flags & REQ_RW))
+	if (!blk_rq_bytes(rq) || (rq->cmd_flags & REQ_RW))
 		return 0;
 
 	return atapi_cmd_type(rq->cmd[0]) == ATAPI_MISC;
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index bb18415..bbbb1fa 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -727,17 +727,23 @@
 	else
 		iowrite16_rep(data_addr, buf, words);
 
-	/* Transfer trailing 1 byte, if any. */
+	/* Transfer trailing byte, if any. */
 	if (unlikely(buflen & 0x01)) {
-		__le16 align_buf[1] = { 0 };
-		unsigned char *trailing_buf = buf + buflen - 1;
+		unsigned char pad[2];
 
+		/* Point buf to the tail of buffer */
+		buf += buflen - 1;
+
+		/*
+		 * Use io*16_rep() accessors here as well to avoid pointlessly
+		 * swapping bytes to and fro on the big endian machines...
+		 */
 		if (rw == READ) {
-			align_buf[0] = cpu_to_le16(ioread16(data_addr));
-			memcpy(trailing_buf, align_buf, 1);
+			ioread16_rep(data_addr, pad, 1);
+			*buf = pad[0];
 		} else {
-			memcpy(align_buf, trailing_buf, 1);
-			iowrite16(le16_to_cpu(align_buf[0]), data_addr);
+			pad[0] = *buf;
+			iowrite16_rep(data_addr, pad, 1);
 		}
 		words++;
 	}
diff --git a/drivers/ata/pata_ali.c b/drivers/ata/pata_ali.c
index 751b7ea..fc9c5d6 100644
--- a/drivers/ata/pata_ali.c
+++ b/drivers/ata/pata_ali.c
@@ -497,14 +497,16 @@
 	};
 	/* Revision 0x20 added DMA */
 	static const struct ata_port_info info_20 = {
-		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48,
+		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
+							ATA_FLAG_IGN_SIMPLEX,
 		.pio_mask = ATA_PIO4,
 		.mwdma_mask = ATA_MWDMA2,
 		.port_ops = &ali_20_port_ops
 	};
 	/* Revision 0x20 with support logic added UDMA */
 	static const struct ata_port_info info_20_udma = {
-		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48,
+		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
+							ATA_FLAG_IGN_SIMPLEX,
 		.pio_mask = ATA_PIO4,
 		.mwdma_mask = ATA_MWDMA2,
 		.udma_mask = ATA_UDMA2,
@@ -512,7 +514,8 @@
 	};
 	/* Revision 0xC2 adds UDMA66 */
 	static const struct ata_port_info info_c2 = {
-		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48,
+		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
+							ATA_FLAG_IGN_SIMPLEX,
 		.pio_mask = ATA_PIO4,
 		.mwdma_mask = ATA_MWDMA2,
 		.udma_mask = ATA_UDMA4,
@@ -520,7 +523,8 @@
 	};
 	/* Revision 0xC3 is UDMA66 for now */
 	static const struct ata_port_info info_c3 = {
-		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48,
+		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
+							ATA_FLAG_IGN_SIMPLEX,
 		.pio_mask = ATA_PIO4,
 		.mwdma_mask = ATA_MWDMA2,
 		.udma_mask = ATA_UDMA4,
@@ -528,7 +532,8 @@
 	};
 	/* Revision 0xC4 is UDMA100 */
 	static const struct ata_port_info info_c4 = {
-		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48,
+		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
+							ATA_FLAG_IGN_SIMPLEX,
 		.pio_mask = ATA_PIO4,
 		.mwdma_mask = ATA_MWDMA2,
 		.udma_mask = ATA_UDMA5,
@@ -536,7 +541,7 @@
 	};
 	/* Revision 0xC5 is UDMA133 with LBA48 DMA */
 	static const struct ata_port_info info_c5 = {
-		.flags = ATA_FLAG_SLAVE_POSS,
+		.flags = ATA_FLAG_SLAVE_POSS | 	ATA_FLAG_IGN_SIMPLEX,
 		.pio_mask = ATA_PIO4,
 		.mwdma_mask = ATA_MWDMA2,
 		.udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/pata_efar.c b/drivers/ata/pata_efar.c
index 2085e0a..2a6412f 100644
--- a/drivers/ata/pata_efar.c
+++ b/drivers/ata/pata_efar.c
@@ -22,7 +22,7 @@
 #include <linux/ata.h>
 
 #define DRV_NAME	"pata_efar"
-#define DRV_VERSION	"0.4.4"
+#define DRV_VERSION	"0.4.5"
 
 /**
  *	efar_pre_reset	-	Enable bits
@@ -98,18 +98,17 @@
 			    { 2, 1 },
 			    { 2, 3 }, };
 
-	if (pio > 2)
-		control |= 1;	/* TIME1 enable */
+	if (pio > 1)
+		control |= 1;	/* TIME */
 	if (ata_pio_need_iordy(adev))	/* PIO 3/4 require IORDY */
-		control |= 2;	/* IE enable */
-	/* Intel specifies that the PPE functionality is for disk only */
+		control |= 2;	/* IE */
+	/* Intel specifies that the prefetch/posting is for disk only */
 	if (adev->class == ATA_DEV_ATA)
-		control |= 4;	/* PPE enable */
+		control |= 4;	/* PPE */
 
 	pci_read_config_word(dev, idetm_port, &idetm_data);
 
-	/* Enable PPE, IE and TIME as appropriate */
-
+	/* Set PPE, IE, and TIME as appropriate */
 	if (adev->devno == 0) {
 		idetm_data &= 0xCCF0;
 		idetm_data |= control;
@@ -129,7 +128,7 @@
 		pci_write_config_byte(dev, 0x44, slave_data);
 	}
 
-	idetm_data |= 0x4000;	/* Ensure SITRE is enabled */
+	idetm_data |= 0x4000;	/* Ensure SITRE is set */
 	pci_write_config_word(dev, idetm_port, idetm_data);
 }
 
diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c
index f72c6c5..6932e56 100644
--- a/drivers/ata/pata_legacy.c
+++ b/drivers/ata/pata_legacy.c
@@ -48,6 +48,7 @@
  *
  */
 
+#include <linux/async.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
@@ -1028,6 +1029,7 @@
 				&legacy_sht);
 	if (ret)
 		goto fail;
+	async_synchronize_full();
 	ld->platform_dev = pdev;
 
 	/* Nothing found means we drop the port as its probably not there */
diff --git a/drivers/ata/pata_netcell.c b/drivers/ata/pata_netcell.c
index 9a69809..f0d52f7 100644
--- a/drivers/ata/pata_netcell.c
+++ b/drivers/ata/pata_netcell.c
@@ -26,7 +26,7 @@
 	unsigned int err_mask = ata_do_dev_read_id(adev, tf, id);
 	/* Firmware forgets to mark words 85-87 valid */
 	if (err_mask == 0)
-		id[ATA_ID_CSF_DEFAULT] |= 0x0400;
+		id[ATA_ID_CSF_DEFAULT] |= 0x4000;
 	return err_mask;
 }
 
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 6cda12b..b2d11f3 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -305,8 +305,8 @@
 static int nv_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
 static int nv_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 
-static int nv_noclassify_hardreset(struct ata_link *link, unsigned int *class,
-				   unsigned long deadline);
+static int nv_hardreset(struct ata_link *link, unsigned int *class,
+			unsigned long deadline);
 static void nv_nf2_freeze(struct ata_port *ap);
 static void nv_nf2_thaw(struct ata_port *ap);
 static void nv_ck804_freeze(struct ata_port *ap);
@@ -406,49 +406,82 @@
 	.slave_configure	= nv_swncq_slave_config,
 };
 
-static struct ata_port_operations nv_common_ops = {
+/*
+ * NV SATA controllers have various different problems with hardreset
+ * protocol depending on the specific controller and device.
+ *
+ * GENERIC:
+ *
+ *  bko11195 reports that link doesn't come online after hardreset on
+ *  generic nv's and there have been several other similar reports on
+ *  linux-ide.
+ *
+ *  bko12351#c23 reports that warmplug on MCP61 doesn't work with
+ *  softreset.
+ *
+ * NF2/3:
+ *
+ *  bko3352 reports nf2/3 controllers can't determine device signature
+ *  reliably after hardreset.  The following thread reports detection
+ *  failure on cold boot with the standard debouncing timing.
+ *
+ *  http://thread.gmane.org/gmane.linux.ide/34098
+ *
+ *  bko12176 reports that hardreset fails to bring up the link during
+ *  boot on nf2.
+ *
+ * CK804:
+ *
+ *  For initial probing after boot and hot plugging, hardreset mostly
+ *  works fine on CK804 but curiously, reprobing on the initial port
+ *  by rescanning or rmmod/insmod fails to acquire the initial D2H Reg
+ *  FIS in somewhat undeterministic way.
+ *
+ * SWNCQ:
+ *
+ *  bko12351 reports that when SWNCQ is enabled, for hotplug to work,
+ *  hardreset should be used and hardreset can't report proper
+ *  signature, which suggests that mcp5x is closer to nf2 as long as
+ *  reset quirkiness is concerned.
+ *
+ *  bko12703 reports that boot probing fails for intel SSD with
+ *  hardreset.  Link fails to come online.  Softreset works fine.
+ *
+ * The failures are varied but the following patterns seem true for
+ * all flavors.
+ *
+ * - Softreset during boot always works.
+ *
+ * - Hardreset during boot sometimes fails to bring up the link on
+ *   certain comibnations and device signature acquisition is
+ *   unreliable.
+ *
+ * - Hardreset is often necessary after hotplug.
+ *
+ * So, preferring softreset for boot probing and error handling (as
+ * hardreset might bring down the link) but using hardreset for
+ * post-boot probing should work around the above issues in most
+ * cases.  Define nv_hardreset() which only kicks in for post-boot
+ * probing and use it for all variants.
+ */
+static struct ata_port_operations nv_generic_ops = {
 	.inherits		= &ata_bmdma_port_ops,
 	.lost_interrupt		= ATA_OP_NULL,
 	.scr_read		= nv_scr_read,
 	.scr_write		= nv_scr_write,
+	.hardreset		= nv_hardreset,
 };
 
-/* OSDL bz11195 reports that link doesn't come online after hardreset
- * on generic nv's and there have been several other similar reports
- * on linux-ide.  Disable hardreset for generic nv's.
- */
-static struct ata_port_operations nv_generic_ops = {
-	.inherits		= &nv_common_ops,
-	.hardreset		= ATA_OP_NULL,
-};
-
-/* nf2 is ripe with hardreset related problems.
- *
- * kernel bz#3352 reports nf2/3 controllers can't determine device
- * signature reliably.  The following thread reports detection failure
- * on cold boot with the standard debouncing timing.
- *
- * http://thread.gmane.org/gmane.linux.ide/34098
- *
- * And bz#12176 reports that hardreset simply doesn't work on nf2.
- * Give up on it and just don't do hardreset.
- */
 static struct ata_port_operations nv_nf2_ops = {
 	.inherits		= &nv_generic_ops,
 	.freeze			= nv_nf2_freeze,
 	.thaw			= nv_nf2_thaw,
 };
 
-/* For initial probing after boot and hot plugging, hardreset mostly
- * works fine on CK804 but curiously, reprobing on the initial port by
- * rescanning or rmmod/insmod fails to acquire the initial D2H Reg FIS
- * in somewhat undeterministic way.  Use noclassify hardreset.
- */
 static struct ata_port_operations nv_ck804_ops = {
-	.inherits		= &nv_common_ops,
+	.inherits		= &nv_generic_ops,
 	.freeze			= nv_ck804_freeze,
 	.thaw			= nv_ck804_thaw,
-	.hardreset		= nv_noclassify_hardreset,
 	.host_stop		= nv_ck804_host_stop,
 };
 
@@ -476,19 +509,8 @@
 	.host_stop		= nv_adma_host_stop,
 };
 
-/* Kernel bz#12351 reports that when SWNCQ is enabled, for hotplug to
- * work, hardreset should be used and hardreset can't report proper
- * signature, which suggests that mcp5x is closer to nf2 as long as
- * reset quirkiness is concerned.  Define separate ops for mcp5x with
- * nv_noclassify_hardreset().
- */
-static struct ata_port_operations nv_mcp5x_ops = {
-	.inherits		= &nv_common_ops,
-	.hardreset		= nv_noclassify_hardreset,
-};
-
 static struct ata_port_operations nv_swncq_ops = {
-	.inherits		= &nv_mcp5x_ops,
+	.inherits		= &nv_generic_ops,
 
 	.qc_defer		= ata_std_qc_defer,
 	.qc_prep		= nv_swncq_qc_prep,
@@ -557,7 +579,7 @@
 		.pio_mask	= NV_PIO_MASK,
 		.mwdma_mask	= NV_MWDMA_MASK,
 		.udma_mask	= NV_UDMA_MASK,
-		.port_ops	= &nv_mcp5x_ops,
+		.port_ops	= &nv_generic_ops,
 		.private_data	= NV_PI_PRIV(nv_generic_interrupt, &nv_sht),
 	},
 	/* SWNCQ */
@@ -1559,15 +1581,24 @@
 	return 0;
 }
 
-static int nv_noclassify_hardreset(struct ata_link *link, unsigned int *class,
-				   unsigned long deadline)
+static int nv_hardreset(struct ata_link *link, unsigned int *class,
+			unsigned long deadline)
 {
-	bool online;
-	int rc;
+	struct ata_eh_context *ehc = &link->eh_context;
 
-	rc = sata_link_hardreset(link, sata_deb_timing_hotplug, deadline,
-				 &online, NULL);
-	return online ? -EAGAIN : rc;
+	/* Do hardreset iff it's post-boot probing, please read the
+	 * comment above port ops for details.
+	 */
+	if (!(link->ap->pflags & ATA_PFLAG_LOADING) &&
+	    !ata_dev_enabled(link->device))
+		sata_link_hardreset(link, sata_deb_timing_hotplug, deadline,
+				    NULL, NULL);
+	else if (!(ehc->i.flags & ATA_EHI_QUIET))
+		ata_link_printk(link, KERN_INFO,
+				"nv: skipping hardreset on occupied port\n");
+
+	/* device signature acquisition is unreliable */
+	return -EAGAIN;
 }
 
 static void nv_nf2_freeze(struct ata_port *ap)
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index e67ce8e..030ec07 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -183,7 +183,7 @@
 };
 
 static struct ata_port_operations sil_ops = {
-	.inherits		= &ata_bmdma_port_ops,
+	.inherits		= &ata_bmdma32_port_ops,
 	.dev_config		= sil_dev_config,
 	.set_mode		= sil_set_mode,
 	.bmdma_setup            = sil_bmdma_setup,
diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index eb05a3c..bbcf970 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -193,6 +193,7 @@
 					  PDC_TIMER_MASK_INT,
 };
 
+#define ECC_ERASE_BUF_SZ (128 * 1024)
 
 struct pdc_port_priv {
 	u8			dimm_buf[(ATA_PRD_SZ * ATA_MAX_PRD) + 512];
@@ -1280,7 +1281,6 @@
 {
 	int speed, size, length;
 	u32 addr, spd0, pci_status;
-	u32 tmp = 0;
 	u32 time_period = 0;
 	u32 tcount = 0;
 	u32 ticks = 0;
@@ -1395,14 +1395,17 @@
 	pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS,
 			  PDC_DIMM_SPD_TYPE, &spd0);
 	if (spd0 == 0x02) {
+		void *buf;
 		VPRINTK("Start ECC initialization\n");
 		addr = 0;
 		length = size * 1024 * 1024;
+		buf = kzalloc(ECC_ERASE_BUF_SZ, GFP_KERNEL);
 		while (addr < length) {
-			pdc20621_put_to_dimm(host, (void *) &tmp, addr,
-					     sizeof(u32));
-			addr += sizeof(u32);
+			pdc20621_put_to_dimm(host, buf, addr,
+					     ECC_ERASE_BUF_SZ);
+			addr += ECC_ERASE_BUF_SZ;
 		}
+		kfree(buf);
 		VPRINTK("Finish ECC initialization\n");
 	}
 	return 0;
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index d3a59c68..8a267c4 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -17,7 +17,7 @@
 #include <linux/bitops.h>
 #include <linux/mutex.h>
 #include <linux/kthread.h>
-
+#include <linux/highmem.h>
 #include <linux/firmware.h>
 #include "base.h"
 
@@ -45,7 +45,10 @@
 	struct bin_attribute attr_data;
 	struct firmware *fw;
 	unsigned long status;
-	int alloc_size;
+	struct page **pages;
+	int nr_pages;
+	int page_array_size;
+	const char *vdata;
 	struct timer_list timeout;
 };
 
@@ -122,6 +125,10 @@
 	return sprintf(buf, "%d\n", loading);
 }
 
+/* Some architectures don't have PAGE_KERNEL_RO */
+#ifndef PAGE_KERNEL_RO
+#define PAGE_KERNEL_RO PAGE_KERNEL
+#endif
 /**
  * firmware_loading_store - set value in the 'loading' control file
  * @dev: device pointer
@@ -141,6 +148,7 @@
 {
 	struct firmware_priv *fw_priv = dev_get_drvdata(dev);
 	int loading = simple_strtol(buf, NULL, 10);
+	int i;
 
 	switch (loading) {
 	case 1:
@@ -151,13 +159,30 @@
 		}
 		vfree(fw_priv->fw->data);
 		fw_priv->fw->data = NULL;
+		for (i = 0; i < fw_priv->nr_pages; i++)
+			__free_page(fw_priv->pages[i]);
+		kfree(fw_priv->pages);
+		fw_priv->pages = NULL;
+		fw_priv->page_array_size = 0;
+		fw_priv->nr_pages = 0;
 		fw_priv->fw->size = 0;
-		fw_priv->alloc_size = 0;
 		set_bit(FW_STATUS_LOADING, &fw_priv->status);
 		mutex_unlock(&fw_lock);
 		break;
 	case 0:
 		if (test_bit(FW_STATUS_LOADING, &fw_priv->status)) {
+			vfree(fw_priv->fw->data);
+			fw_priv->fw->data = vmap(fw_priv->pages,
+						 fw_priv->nr_pages,
+						 0, PAGE_KERNEL_RO);
+			if (!fw_priv->fw->data) {
+				dev_err(dev, "%s: vmap() failed\n", __func__);
+				goto err;
+			}
+			/* Pages will be freed by vfree() */
+			fw_priv->pages = NULL;
+			fw_priv->page_array_size = 0;
+			fw_priv->nr_pages = 0;
 			complete(&fw_priv->completion);
 			clear_bit(FW_STATUS_LOADING, &fw_priv->status);
 			break;
@@ -167,6 +192,7 @@
 		dev_err(dev, "%s: unexpected value (%d)\n", __func__, loading);
 		/* fallthrough */
 	case -1:
+	err:
 		fw_load_abort(fw_priv);
 		break;
 	}
@@ -191,8 +217,28 @@
 		ret_count = -ENODEV;
 		goto out;
 	}
-	ret_count = memory_read_from_buffer(buffer, count, &offset,
-						fw->data, fw->size);
+	if (offset > fw->size)
+		return 0;
+	if (count > fw->size - offset)
+		count = fw->size - offset;
+
+	ret_count = count;
+
+	while (count) {
+		void *page_data;
+		int page_nr = offset >> PAGE_SHIFT;
+		int page_ofs = offset & (PAGE_SIZE-1);
+		int page_cnt = min_t(size_t, PAGE_SIZE - page_ofs, count);
+
+		page_data = kmap(fw_priv->pages[page_nr]);
+
+		memcpy(buffer, page_data + page_ofs, page_cnt);
+
+		kunmap(fw_priv->pages[page_nr]);
+		buffer += page_cnt;
+		offset += page_cnt;
+		count -= page_cnt;
+	}
 out:
 	mutex_unlock(&fw_lock);
 	return ret_count;
@@ -201,27 +247,39 @@
 static int
 fw_realloc_buffer(struct firmware_priv *fw_priv, int min_size)
 {
-	u8 *new_data;
-	int new_size = fw_priv->alloc_size;
+	int pages_needed = ALIGN(min_size, PAGE_SIZE) >> PAGE_SHIFT;
 
-	if (min_size <= fw_priv->alloc_size)
-		return 0;
+	/* If the array of pages is too small, grow it... */
+	if (fw_priv->page_array_size < pages_needed) {
+		int new_array_size = max(pages_needed,
+					 fw_priv->page_array_size * 2);
+		struct page **new_pages;
 
-	new_size = ALIGN(min_size, PAGE_SIZE);
-	new_data = vmalloc(new_size);
-	if (!new_data) {
-		printk(KERN_ERR "%s: unable to alloc buffer\n", __func__);
-		/* Make sure that we don't keep incomplete data */
-		fw_load_abort(fw_priv);
-		return -ENOMEM;
+		new_pages = kmalloc(new_array_size * sizeof(void *),
+				    GFP_KERNEL);
+		if (!new_pages) {
+			fw_load_abort(fw_priv);
+			return -ENOMEM;
+		}
+		memcpy(new_pages, fw_priv->pages,
+		       fw_priv->page_array_size * sizeof(void *));
+		memset(&new_pages[fw_priv->page_array_size], 0, sizeof(void *) *
+		       (new_array_size - fw_priv->page_array_size));
+		kfree(fw_priv->pages);
+		fw_priv->pages = new_pages;
+		fw_priv->page_array_size = new_array_size;
 	}
-	fw_priv->alloc_size = new_size;
-	if (fw_priv->fw->data) {
-		memcpy(new_data, fw_priv->fw->data, fw_priv->fw->size);
-		vfree(fw_priv->fw->data);
+
+	while (fw_priv->nr_pages < pages_needed) {
+		fw_priv->pages[fw_priv->nr_pages] =
+			alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+
+		if (!fw_priv->pages[fw_priv->nr_pages]) {
+			fw_load_abort(fw_priv);
+			return -ENOMEM;
+		}
+		fw_priv->nr_pages++;
 	}
-	fw_priv->fw->data = new_data;
-	BUG_ON(min_size > fw_priv->alloc_size);
 	return 0;
 }
 
@@ -258,10 +316,25 @@
 	if (retval)
 		goto out;
 
-	memcpy((u8 *)fw->data + offset, buffer, count);
-
-	fw->size = max_t(size_t, offset + count, fw->size);
 	retval = count;
+
+	while (count) {
+		void *page_data;
+		int page_nr = offset >> PAGE_SHIFT;
+		int page_ofs = offset & (PAGE_SIZE - 1);
+		int page_cnt = min_t(size_t, PAGE_SIZE - page_ofs, count);
+
+		page_data = kmap(fw_priv->pages[page_nr]);
+
+		memcpy(page_data + page_ofs, buffer, page_cnt);
+
+		kunmap(fw_priv->pages[page_nr]);
+		buffer += page_cnt;
+		offset += page_cnt;
+		count -= page_cnt;
+	}
+
+	fw->size = max_t(size_t, offset, fw->size);
 out:
 	mutex_unlock(&fw_lock);
 	return retval;
@@ -277,7 +350,11 @@
 static void fw_dev_release(struct device *dev)
 {
 	struct firmware_priv *fw_priv = dev_get_drvdata(dev);
+	int i;
 
+	for (i = 0; i < fw_priv->nr_pages; i++)
+		__free_page(fw_priv->pages[i]);
+	kfree(fw_priv->pages);
 	kfree(fw_priv);
 	kfree(dev);
 
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 8b4708e..ead3f64 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -469,22 +469,6 @@
 	drv->shutdown(dev);
 }
 
-static int platform_drv_suspend(struct device *_dev, pm_message_t state)
-{
-	struct platform_driver *drv = to_platform_driver(_dev->driver);
-	struct platform_device *dev = to_platform_device(_dev);
-
-	return drv->suspend(dev, state);
-}
-
-static int platform_drv_resume(struct device *_dev)
-{
-	struct platform_driver *drv = to_platform_driver(_dev->driver);
-	struct platform_device *dev = to_platform_device(_dev);
-
-	return drv->resume(dev);
-}
-
 /**
  * platform_driver_register
  * @drv: platform driver structure
@@ -498,10 +482,10 @@
 		drv->driver.remove = platform_drv_remove;
 	if (drv->shutdown)
 		drv->driver.shutdown = platform_drv_shutdown;
-	if (drv->suspend)
-		drv->driver.suspend = platform_drv_suspend;
-	if (drv->resume)
-		drv->driver.resume = platform_drv_resume;
+	if (drv->suspend || drv->resume)
+		pr_warning("Platform driver '%s' needs updating - please use "
+			"dev_pm_ops\n", drv->driver.name);
+
 	return driver_register(&drv->driver);
 }
 EXPORT_SYMBOL_GPL(platform_driver_register);
@@ -633,10 +617,12 @@
 
 static int platform_legacy_suspend(struct device *dev, pm_message_t mesg)
 {
+	struct platform_driver *pdrv = to_platform_driver(dev->driver);
+	struct platform_device *pdev = to_platform_device(dev);
 	int ret = 0;
 
-	if (dev->driver && dev->driver->suspend)
-		ret = dev->driver->suspend(dev, mesg);
+	if (dev->driver && pdrv->suspend)
+		ret = pdrv->suspend(pdev, mesg);
 
 	return ret;
 }
@@ -667,10 +653,12 @@
 
 static int platform_legacy_resume(struct device *dev)
 {
+	struct platform_driver *pdrv = to_platform_driver(dev->driver);
+	struct platform_device *pdev = to_platform_device(dev);
 	int ret = 0;
 
-	if (dev->driver && dev->driver->resume)
-		ret = dev->driver->resume(dev);
+	if (dev->driver && pdrv->resume)
+		ret = pdrv->resume(pdev);
 
 	return ret;
 }
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 3e4bc69..fae7254 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -315,13 +315,13 @@
 /*------------------------- Resume routines -------------------------*/
 
 /**
- *	resume_device_noirq - Power on one device (early resume).
+ *	device_resume_noirq - Power on one device (early resume).
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  *
  *	Must be called with interrupts disabled.
  */
-static int resume_device_noirq(struct device *dev, pm_message_t state)
+static int device_resume_noirq(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -334,9 +334,6 @@
 	if (dev->bus->pm) {
 		pm_dev_dbg(dev, state, "EARLY ");
 		error = pm_noirq_op(dev, dev->bus->pm, state);
-	} else if (dev->bus->resume_early) {
-		pm_dev_dbg(dev, state, "legacy EARLY ");
-		error = dev->bus->resume_early(dev);
 	}
  End:
 	TRACE_RESUME(error);
@@ -344,16 +341,16 @@
 }
 
 /**
- *	dpm_power_up - Power on all regular (non-sysdev) devices.
+ *	dpm_resume_noirq - Power on all regular (non-sysdev) devices.
  *	@state: PM transition of the system being carried out.
  *
- *	Execute the appropriate "noirq resume" callback for all devices marked
- *	as DPM_OFF_IRQ.
+ *	Call the "noirq" resume handlers for all devices marked as
+ *	DPM_OFF_IRQ and enable device drivers to receive interrupts.
  *
  *	Must be called under dpm_list_mtx.  Device drivers should not receive
  *	interrupts while it's being executed.
  */
-static void dpm_power_up(pm_message_t state)
+void dpm_resume_noirq(pm_message_t state)
 {
 	struct device *dev;
 
@@ -363,33 +360,21 @@
 			int error;
 
 			dev->power.status = DPM_OFF;
-			error = resume_device_noirq(dev, state);
+			error = device_resume_noirq(dev, state);
 			if (error)
 				pm_dev_err(dev, state, " early", error);
 		}
 	mutex_unlock(&dpm_list_mtx);
-}
-
-/**
- *	device_power_up - Turn on all devices that need special attention.
- *	@state: PM transition of the system being carried out.
- *
- *	Call the "early" resume handlers and enable device drivers to receive
- *	interrupts.
- */
-void device_power_up(pm_message_t state)
-{
-	dpm_power_up(state);
 	resume_device_irqs();
 }
-EXPORT_SYMBOL_GPL(device_power_up);
+EXPORT_SYMBOL_GPL(dpm_resume_noirq);
 
 /**
- *	resume_device - Restore state for one device.
+ *	device_resume - Restore state for one device.
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  */
-static int resume_device(struct device *dev, pm_message_t state)
+static int device_resume(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -414,9 +399,6 @@
 		if (dev->type->pm) {
 			pm_dev_dbg(dev, state, "type ");
 			error = pm_op(dev, dev->type->pm, state);
-		} else if (dev->type->resume) {
-			pm_dev_dbg(dev, state, "legacy type ");
-			error = dev->type->resume(dev);
 		}
 		if (error)
 			goto End;
@@ -462,7 +444,7 @@
 			dev->power.status = DPM_RESUMING;
 			mutex_unlock(&dpm_list_mtx);
 
-			error = resume_device(dev, state);
+			error = device_resume(dev, state);
 
 			mutex_lock(&dpm_list_mtx);
 			if (error)
@@ -480,11 +462,11 @@
 }
 
 /**
- *	complete_device - Complete a PM transition for given device
+ *	device_complete - Complete a PM transition for given device
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  */
-static void complete_device(struct device *dev, pm_message_t state)
+static void device_complete(struct device *dev, pm_message_t state)
 {
 	down(&dev->sem);
 
@@ -527,7 +509,7 @@
 			dev->power.status = DPM_ON;
 			mutex_unlock(&dpm_list_mtx);
 
-			complete_device(dev, state);
+			device_complete(dev, state);
 
 			mutex_lock(&dpm_list_mtx);
 		}
@@ -540,19 +522,19 @@
 }
 
 /**
- *	device_resume - Restore state of each device in system.
+ *	dpm_resume_end - Restore state of each device in system.
  *	@state: PM transition of the system being carried out.
  *
  *	Resume all the devices, unlock them all, and allow new
  *	devices to be registered once again.
  */
-void device_resume(pm_message_t state)
+void dpm_resume_end(pm_message_t state)
 {
 	might_sleep();
 	dpm_resume(state);
 	dpm_complete(state);
 }
-EXPORT_SYMBOL_GPL(device_resume);
+EXPORT_SYMBOL_GPL(dpm_resume_end);
 
 
 /*------------------------- Suspend routines -------------------------*/
@@ -577,13 +559,13 @@
 }
 
 /**
- *	suspend_device_noirq - Shut down one device (late suspend).
+ *	device_suspend_noirq - Shut down one device (late suspend).
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  *
  *	This is called with interrupts off and only a single CPU running.
  */
-static int suspend_device_noirq(struct device *dev, pm_message_t state)
+static int device_suspend_noirq(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -593,24 +575,20 @@
 	if (dev->bus->pm) {
 		pm_dev_dbg(dev, state, "LATE ");
 		error = pm_noirq_op(dev, dev->bus->pm, state);
-	} else if (dev->bus->suspend_late) {
-		pm_dev_dbg(dev, state, "legacy LATE ");
-		error = dev->bus->suspend_late(dev, state);
-		suspend_report_result(dev->bus->suspend_late, error);
 	}
 	return error;
 }
 
 /**
- *	device_power_down - Shut down special devices.
+ *	dpm_suspend_noirq - Power down all regular (non-sysdev) devices.
  *	@state: PM transition of the system being carried out.
  *
- *	Prevent device drivers from receiving interrupts and call the "late"
+ *	Prevent device drivers from receiving interrupts and call the "noirq"
  *	suspend handlers.
  *
  *	Must be called under dpm_list_mtx.
  */
-int device_power_down(pm_message_t state)
+int dpm_suspend_noirq(pm_message_t state)
 {
 	struct device *dev;
 	int error = 0;
@@ -618,7 +596,7 @@
 	suspend_device_irqs();
 	mutex_lock(&dpm_list_mtx);
 	list_for_each_entry_reverse(dev, &dpm_list, power.entry) {
-		error = suspend_device_noirq(dev, state);
+		error = device_suspend_noirq(dev, state);
 		if (error) {
 			pm_dev_err(dev, state, " late", error);
 			break;
@@ -627,17 +605,17 @@
 	}
 	mutex_unlock(&dpm_list_mtx);
 	if (error)
-		device_power_up(resume_event(state));
+		dpm_resume_noirq(resume_event(state));
 	return error;
 }
-EXPORT_SYMBOL_GPL(device_power_down);
+EXPORT_SYMBOL_GPL(dpm_suspend_noirq);
 
 /**
- *	suspend_device - Save state of one device.
+ *	device_suspend - Save state of one device.
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  */
-static int suspend_device(struct device *dev, pm_message_t state)
+static int device_suspend(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -660,10 +638,6 @@
 		if (dev->type->pm) {
 			pm_dev_dbg(dev, state, "type ");
 			error = pm_op(dev, dev->type->pm, state);
-		} else if (dev->type->suspend) {
-			pm_dev_dbg(dev, state, "legacy type ");
-			error = dev->type->suspend(dev, state);
-			suspend_report_result(dev->type->suspend, error);
 		}
 		if (error)
 			goto End;
@@ -704,7 +678,7 @@
 		get_device(dev);
 		mutex_unlock(&dpm_list_mtx);
 
-		error = suspend_device(dev, state);
+		error = device_suspend(dev, state);
 
 		mutex_lock(&dpm_list_mtx);
 		if (error) {
@@ -723,11 +697,11 @@
 }
 
 /**
- *	prepare_device - Execute the ->prepare() callback(s) for given device.
+ *	device_prepare - Execute the ->prepare() callback(s) for given device.
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  */
-static int prepare_device(struct device *dev, pm_message_t state)
+static int device_prepare(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -781,7 +755,7 @@
 		dev->power.status = DPM_PREPARING;
 		mutex_unlock(&dpm_list_mtx);
 
-		error = prepare_device(dev, state);
+		error = device_prepare(dev, state);
 
 		mutex_lock(&dpm_list_mtx);
 		if (error) {
@@ -807,12 +781,12 @@
 }
 
 /**
- *	device_suspend - Save state and stop all devices in system.
+ *	dpm_suspend_start - Save state and stop all devices in system.
  *	@state: PM transition of the system being carried out.
  *
  *	Prepare and suspend all devices.
  */
-int device_suspend(pm_message_t state)
+int dpm_suspend_start(pm_message_t state)
 {
 	int error;
 
@@ -822,7 +796,7 @@
 		error = dpm_suspend(state);
 	return error;
 }
-EXPORT_SYMBOL_GPL(device_suspend);
+EXPORT_SYMBOL_GPL(dpm_suspend_start);
 
 void __suspend_report_result(const char *function, void *fn, int ret)
 {
diff --git a/drivers/base/sys.c b/drivers/base/sys.c
index 3236b43..9742a78 100644
--- a/drivers/base/sys.c
+++ b/drivers/base/sys.c
@@ -343,11 +343,15 @@
 	/* First, call the class-specific one */
 	if (cls->resume)
 		cls->resume(dev);
+	WARN_ONCE(!irqs_disabled(),
+		"Interrupts enabled after %pF\n", cls->resume);
 
 	/* Call auxillary drivers next. */
 	list_for_each_entry(drv, &cls->drivers, entry) {
 		if (drv->resume)
 			drv->resume(dev);
+		WARN_ONCE(!irqs_disabled(),
+			"Interrupts enabled after %pF\n", drv->resume);
 	}
 }
 
@@ -377,6 +381,9 @@
 	if (ret)
 		return ret;
 
+	WARN_ONCE(!irqs_disabled(),
+		"Interrupts enabled while suspending system devices\n");
+
 	pr_debug("Suspending System Devices\n");
 
 	list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) {
@@ -393,6 +400,9 @@
 					if (ret)
 						goto aux_driver;
 				}
+				WARN_ONCE(!irqs_disabled(),
+					"Interrupts enabled after %pF\n",
+					drv->suspend);
 			}
 
 			/* Now call the generic one */
@@ -400,6 +410,9 @@
 				ret = cls->suspend(sysdev, state);
 				if (ret)
 					goto cls_driver;
+				WARN_ONCE(!irqs_disabled(),
+					"Interrupts enabled after %pF\n",
+					cls->suspend);
 			}
 		}
 	}
@@ -452,6 +465,9 @@
 {
 	struct sysdev_class *cls;
 
+	WARN_ONCE(!irqs_disabled(),
+		"Interrupts enabled while resuming system devices\n");
+
 	pr_debug("Resuming System Devices\n");
 
 	list_for_each_entry(cls, &system_kset->list, kset.kobj.entry) {
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index f22ed6c..668dc23 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3321,7 +3321,7 @@
 	DAC960_Command_T *Command;
 
    while(1) {
-	Request = elv_next_request(req_q);
+	Request = blk_peek_request(req_q);
 	if (!Request)
 		return 1;
 
@@ -3338,10 +3338,10 @@
 	}
 	Command->Completion = Request->end_io_data;
 	Command->LogicalDriveNumber = (long)Request->rq_disk->private_data;
-	Command->BlockNumber = Request->sector;
-	Command->BlockCount = Request->nr_sectors;
+	Command->BlockNumber = blk_rq_pos(Request);
+	Command->BlockCount = blk_rq_sectors(Request);
 	Command->Request = Request;
-	blkdev_dequeue_request(Request);
+	blk_start_request(Request);
 	Command->SegmentCount = blk_rq_map_sg(req_q,
 		  Command->Request, Command->cmd_sglist);
 	/* pci_map_sg MAY change the value of SegCount */
@@ -3431,7 +3431,7 @@
    * successfully as possible.
    */
   Command->SegmentCount = 1;
-  Command->BlockNumber = Request->sector;
+  Command->BlockNumber = blk_rq_pos(Request);
   Command->BlockCount = 1;
   DAC960_QueueReadWriteCommand(Command);
   return;
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index ddea8e4..ac5e05a 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -412,7 +412,7 @@
 
 config MG_DISK
 	tristate "mGine mflash, gflash support"
-	depends on ARM && ATA && GPIOLIB
+	depends on ARM && GPIOLIB
 	help
 	  mGine mFlash(gFlash) block device driver
 
@@ -438,7 +438,7 @@
 
 config XILINX_SYSACE
 	tristate "Xilinx SystemACE support"
-	depends on 4xx
+	depends on 4xx || MICROBLAZE
 	help
 	  Include support for the Xilinx SystemACE CompactFlash interface
 
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 8df436f..9c6e5b0 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -112,8 +112,6 @@
 MODULE_LICENSE("GPL");
 
 static struct request_queue *floppy_queue;
-#define QUEUE (floppy_queue)
-#define CURRENT elv_next_request(floppy_queue)
 
 /*
  *  Macros
@@ -1335,64 +1333,60 @@
 
 static void redo_fd_request(void)
 {
+	struct request *rq;
 	unsigned int cnt, block, track, sector;
 	int drive;
 	struct amiga_floppy_struct *floppy;
 	char *data;
 	unsigned long flags;
+	int err;
 
- repeat:
-	if (!CURRENT) {
+next_req:
+	rq = blk_fetch_request(floppy_queue);
+	if (!rq) {
 		/* Nothing left to do */
 		return;
 	}
 
-	floppy = CURRENT->rq_disk->private_data;
+	floppy = rq->rq_disk->private_data;
 	drive = floppy - unit;
 
+next_segment:
 	/* Here someone could investigate to be more efficient */
-	for (cnt = 0; cnt < CURRENT->current_nr_sectors; cnt++) { 
+	for (cnt = 0, err = 0; cnt < blk_rq_cur_sectors(rq); cnt++) {
 #ifdef DEBUG
 		printk("fd: sector %ld + %d requested for %s\n",
-		       CURRENT->sector,cnt,
-		       (rq_data_dir(CURRENT) == READ) ? "read" : "write");
+		       blk_rq_pos(rq), cnt,
+		       (rq_data_dir(rq) == READ) ? "read" : "write");
 #endif
-		block = CURRENT->sector + cnt;
+		block = blk_rq_pos(rq) + cnt;
 		if ((int)block > floppy->blocks) {
-			end_request(CURRENT, 0);
-			goto repeat;
+			err = -EIO;
+			break;
 		}
 
 		track = block / (floppy->dtype->sects * floppy->type->sect_mult);
 		sector = block % (floppy->dtype->sects * floppy->type->sect_mult);
-		data = CURRENT->buffer + 512 * cnt;
+		data = rq->buffer + 512 * cnt;
 #ifdef DEBUG
 		printk("access to track %d, sector %d, with buffer at "
 		       "0x%08lx\n", track, sector, data);
 #endif
 
-		if ((rq_data_dir(CURRENT) != READ) && (rq_data_dir(CURRENT) != WRITE)) {
-			printk(KERN_WARNING "do_fd_request: unknown command\n");
-			end_request(CURRENT, 0);
-			goto repeat;
-		}
 		if (get_track(drive, track) == -1) {
-			end_request(CURRENT, 0);
-			goto repeat;
+			err = -EIO;
+			break;
 		}
 
-		switch (rq_data_dir(CURRENT)) {
-		case READ:
+		if (rq_data_dir(rq) == READ) {
 			memcpy(data, floppy->trackbuf + sector * 512, 512);
-			break;
-
-		case WRITE:
+		} else {
 			memcpy(floppy->trackbuf + sector * 512, data, 512);
 
 			/* keep the drive spinning while writes are scheduled */
 			if (!fd_motor_on(drive)) {
-				end_request(CURRENT, 0);
-				goto repeat;
+				err = -EIO;
+				break;
 			}
 			/*
 			 * setup a callback to write the track buffer
@@ -1404,14 +1398,12 @@
 		        /* reset the timer */
 			mod_timer (flush_track_timer + drive, jiffies + 1);
 			local_irq_restore(flags);
-			break;
 		}
 	}
-	CURRENT->nr_sectors -= CURRENT->current_nr_sectors;
-	CURRENT->sector += CURRENT->current_nr_sectors;
 
-	end_request(CURRENT, 1);
-	goto repeat;
+	if (__blk_end_request_cur(rq, err))
+		goto next_segment;
+	goto next_req;
 }
 
 static void do_fd_request(struct request_queue * q)
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 4234c11..f5e7180 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -79,9 +79,7 @@
 #undef DEBUG
 
 static struct request_queue *floppy_queue;
-
-#define QUEUE (floppy_queue)
-#define CURRENT elv_next_request(floppy_queue)
+static struct request *fd_request;
 
 /* Disk types: DD, HD, ED */
 static struct atari_disk_type {
@@ -376,6 +374,12 @@
 static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0);
 static DEFINE_TIMER(fd_timer, check_change, 0, 0);
 	
+static void fd_end_request_cur(int err)
+{
+	if (!__blk_end_request_cur(fd_request, err))
+		fd_request = NULL;
+}
+
 static inline void start_motor_off_timer(void)
 {
 	mod_timer(&motor_off_timer, jiffies + FD_MOTOR_OFF_DELAY);
@@ -606,15 +610,15 @@
 		return;
 	}
 
-	if (!CURRENT)
+	if (!fd_request)
 		return;
 
-	CURRENT->errors++;
-	if (CURRENT->errors >= MAX_ERRORS) {
+	fd_request->errors++;
+	if (fd_request->errors >= MAX_ERRORS) {
 		printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
-		end_request(CURRENT, 0);
+		fd_end_request_cur(-EIO);
 	}
-	else if (CURRENT->errors == RECALIBRATE_ERRORS) {
+	else if (fd_request->errors == RECALIBRATE_ERRORS) {
 		printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
 		if (SelectedDrive != -1)
 			SUD.track = -1;
@@ -725,16 +729,14 @@
 	    if (IS_BUFFERED( drive, ReqSide, ReqTrack )) {
 		if (ReqCmd == READ) {
 		    copy_buffer( SECTOR_BUFFER(ReqSector), ReqData );
-		    if (++ReqCnt < CURRENT->current_nr_sectors) {
+		    if (++ReqCnt < blk_rq_cur_sectors(fd_request)) {
 			/* read next sector */
 			setup_req_params( drive );
 			goto repeat;
 		    }
 		    else {
 			/* all sectors finished */
-			CURRENT->nr_sectors -= CURRENT->current_nr_sectors;
-			CURRENT->sector += CURRENT->current_nr_sectors;
-			end_request(CURRENT, 1);
+			fd_end_request_cur(0);
 			redo_fd_request();
 			return;
 		    }
@@ -1132,16 +1134,14 @@
 		}
 	}
   
-	if (++ReqCnt < CURRENT->current_nr_sectors) {
+	if (++ReqCnt < blk_rq_cur_sectors(fd_request)) {
 		/* read next sector */
 		setup_req_params( SelectedDrive );
 		do_fd_action( SelectedDrive );
 	}
 	else {
 		/* all sectors finished */
-		CURRENT->nr_sectors -= CURRENT->current_nr_sectors;
-		CURRENT->sector += CURRENT->current_nr_sectors;
-		end_request(CURRENT, 1);
+		fd_end_request_cur(0);
 		redo_fd_request();
 	}
 	return;
@@ -1382,7 +1382,7 @@
 	ReqData = ReqBuffer + 512 * ReqCnt;
 
 	if (UseTrackbuffer)
-		read_track = (ReqCmd == READ && CURRENT->errors == 0);
+		read_track = (ReqCmd == READ && fd_request->errors == 0);
 	else
 		read_track = 0;
 
@@ -1396,25 +1396,27 @@
 	int drive, type;
 	struct atari_floppy_struct *floppy;
 
-	DPRINT(("redo_fd_request: CURRENT=%p dev=%s CURRENT->sector=%ld\n",
-		CURRENT, CURRENT ? CURRENT->rq_disk->disk_name : "",
-		CURRENT ? CURRENT->sector : 0 ));
+	DPRINT(("redo_fd_request: fd_request=%p dev=%s fd_request->sector=%ld\n",
+		fd_request, fd_request ? fd_request->rq_disk->disk_name : "",
+		fd_request ? blk_rq_pos(fd_request) : 0 ));
 
 	IsFormatting = 0;
 
 repeat:
+	if (!fd_request) {
+		fd_request = blk_fetch_request(floppy_queue);
+		if (!fd_request)
+			goto the_end;
+	}
 
-	if (!CURRENT)
-		goto the_end;
-
-	floppy = CURRENT->rq_disk->private_data;
+	floppy = fd_request->rq_disk->private_data;
 	drive = floppy - unit;
 	type = floppy->type;
 	
 	if (!UD.connected) {
 		/* drive not connected */
 		printk(KERN_ERR "Unknown Device: fd%d\n", drive );
-		end_request(CURRENT, 0);
+		fd_end_request_cur(-EIO);
 		goto repeat;
 	}
 		
@@ -1430,12 +1432,12 @@
 		/* user supplied disk type */
 		if (--type >= NUM_DISK_MINORS) {
 			printk(KERN_WARNING "fd%d: invalid disk format", drive );
-			end_request(CURRENT, 0);
+			fd_end_request_cur(-EIO);
 			goto repeat;
 		}
 		if (minor2disktype[type].drive_types > DriveType)  {
 			printk(KERN_WARNING "fd%d: unsupported disk format", drive );
-			end_request(CURRENT, 0);
+			fd_end_request_cur(-EIO);
 			goto repeat;
 		}
 		type = minor2disktype[type].index;
@@ -1444,8 +1446,8 @@
 		UD.autoprobe = 0;
 	}
 	
-	if (CURRENT->sector + 1 > UDT->blocks) {
-		end_request(CURRENT, 0);
+	if (blk_rq_pos(fd_request) + 1 > UDT->blocks) {
+		fd_end_request_cur(-EIO);
 		goto repeat;
 	}
 
@@ -1453,9 +1455,9 @@
 	del_timer( &motor_off_timer );
 		
 	ReqCnt = 0;
-	ReqCmd = rq_data_dir(CURRENT);
-	ReqBlock = CURRENT->sector;
-	ReqBuffer = CURRENT->buffer;
+	ReqCmd = rq_data_dir(fd_request);
+	ReqBlock = blk_rq_pos(fd_request);
+	ReqBuffer = fd_request->buffer;
 	setup_req_params( drive );
 	do_fd_action( drive );
 
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 5f7e64b..4bf8705 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -407,12 +407,7 @@
 	rd_size = simple_strtol(str, NULL, 0);
 	return 1;
 }
-static int __init ramdisk_size2(char *str)
-{
-	return ramdisk_size(str);
-}
-__setup("ramdisk=", ramdisk_size);
-__setup("ramdisk_size=", ramdisk_size2);
+__setup("ramdisk_size=", ramdisk_size);
 #endif
 
 /*
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 4d4d5e0..b22cec9 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -180,11 +180,13 @@
 					   __u32);
 static void start_io(ctlr_info_t *h);
 static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size,
-		   unsigned int use_unit_num, unsigned int log_unit,
 		   __u8 page_code, unsigned char *scsi3addr, int cmd_type);
 static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
-			   unsigned int use_unit_num, unsigned int log_unit,
-			   __u8 page_code, int cmd_type);
+			__u8 page_code, unsigned char scsi3addr[],
+			int cmd_type);
+static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c,
+	int attempt_retry);
+static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c);
 
 static void fail_all_cmds(unsigned long ctlr);
 static int scan_thread(void *data);
@@ -437,6 +439,194 @@
 }
 #endif				/* CONFIG_PROC_FS */
 
+#define MAX_PRODUCT_NAME_LEN 19
+
+#define to_hba(n) container_of(n, struct ctlr_info, dev)
+#define to_drv(n) container_of(n, drive_info_struct, dev)
+
+static struct device_type cciss_host_type = {
+	.name		= "cciss_host",
+};
+
+static ssize_t dev_show_unique_id(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	__u8 sn[16];
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	if (h->busy_configuring)
+		ret = -EBUSY;
+	else
+		memcpy(sn, drv->serial_no, sizeof(sn));
+	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+
+	if (ret)
+		return ret;
+	else
+		return snprintf(buf, 16 * 2 + 2,
+				"%02X%02X%02X%02X%02X%02X%02X%02X"
+				"%02X%02X%02X%02X%02X%02X%02X%02X\n",
+				sn[0], sn[1], sn[2], sn[3],
+				sn[4], sn[5], sn[6], sn[7],
+				sn[8], sn[9], sn[10], sn[11],
+				sn[12], sn[13], sn[14], sn[15]);
+}
+DEVICE_ATTR(unique_id, S_IRUGO, dev_show_unique_id, NULL);
+
+static ssize_t dev_show_vendor(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	char vendor[VENDOR_LEN + 1];
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	if (h->busy_configuring)
+		ret = -EBUSY;
+	else
+		memcpy(vendor, drv->vendor, VENDOR_LEN + 1);
+	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+
+	if (ret)
+		return ret;
+	else
+		return snprintf(buf, sizeof(vendor) + 1, "%s\n", drv->vendor);
+}
+DEVICE_ATTR(vendor, S_IRUGO, dev_show_vendor, NULL);
+
+static ssize_t dev_show_model(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	char model[MODEL_LEN + 1];
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	if (h->busy_configuring)
+		ret = -EBUSY;
+	else
+		memcpy(model, drv->model, MODEL_LEN + 1);
+	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+
+	if (ret)
+		return ret;
+	else
+		return snprintf(buf, sizeof(model) + 1, "%s\n", drv->model);
+}
+DEVICE_ATTR(model, S_IRUGO, dev_show_model, NULL);
+
+static ssize_t dev_show_rev(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	char rev[REV_LEN + 1];
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	if (h->busy_configuring)
+		ret = -EBUSY;
+	else
+		memcpy(rev, drv->rev, REV_LEN + 1);
+	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+
+	if (ret)
+		return ret;
+	else
+		return snprintf(buf, sizeof(rev) + 1, "%s\n", drv->rev);
+}
+DEVICE_ATTR(rev, S_IRUGO, dev_show_rev, NULL);
+
+static struct attribute *cciss_dev_attrs[] = {
+	&dev_attr_unique_id.attr,
+	&dev_attr_model.attr,
+	&dev_attr_vendor.attr,
+	&dev_attr_rev.attr,
+	NULL
+};
+
+static struct attribute_group cciss_dev_attr_group = {
+	.attrs = cciss_dev_attrs,
+};
+
+static struct attribute_group *cciss_dev_attr_groups[] = {
+	&cciss_dev_attr_group,
+	NULL
+};
+
+static struct device_type cciss_dev_type = {
+	.name		= "cciss_device",
+	.groups		= cciss_dev_attr_groups,
+};
+
+static struct bus_type cciss_bus_type = {
+	.name		= "cciss",
+};
+
+
+/*
+ * Initialize sysfs entry for each controller.  This sets up and registers
+ * the 'cciss#' directory for each individual controller under
+ * /sys/bus/pci/devices/<dev>/.
+ */
+static int cciss_create_hba_sysfs_entry(struct ctlr_info *h)
+{
+	device_initialize(&h->dev);
+	h->dev.type = &cciss_host_type;
+	h->dev.bus = &cciss_bus_type;
+	dev_set_name(&h->dev, "%s", h->devname);
+	h->dev.parent = &h->pdev->dev;
+
+	return device_add(&h->dev);
+}
+
+/*
+ * Remove sysfs entries for an hba.
+ */
+static void cciss_destroy_hba_sysfs_entry(struct ctlr_info *h)
+{
+	device_del(&h->dev);
+}
+
+/*
+ * Initialize sysfs for each logical drive.  This sets up and registers
+ * the 'c#d#' directory for each individual logical drive under
+ * /sys/bus/pci/devices/<dev/ccis#/. We also create a link from
+ * /sys/block/cciss!c#d# to this entry.
+ */
+static int cciss_create_ld_sysfs_entry(struct ctlr_info *h,
+				       drive_info_struct *drv,
+				       int drv_index)
+{
+	device_initialize(&drv->dev);
+	drv->dev.type = &cciss_dev_type;
+	drv->dev.bus = &cciss_bus_type;
+	dev_set_name(&drv->dev, "c%dd%d", h->ctlr, drv_index);
+	drv->dev.parent = &h->dev;
+	return device_add(&drv->dev);
+}
+
+/*
+ * Remove sysfs entries for a logical drive.
+ */
+static void cciss_destroy_ld_sysfs_entry(drive_info_struct *drv)
+{
+	device_del(&drv->dev);
+}
+
 /*
  * For operations that cannot sleep, a command block is allocated at init,
  * and managed by cmd_alloc() and cmd_free() using a simple bitmap to track
@@ -1299,7 +1489,6 @@
 {
 	CommandList_struct *cmd = rq->completion_data;
 	ctlr_info_t *h = hba[cmd->ctlr];
-	unsigned int nr_bytes;
 	unsigned long flags;
 	u64bit temp64;
 	int i, ddir;
@@ -1321,15 +1510,11 @@
 	printk("Done with %p\n", rq);
 #endif				/* CCISS_DEBUG */
 
-	/*
-	 * Store the full size and set the residual count for pc requests
-	 */
-	nr_bytes = blk_rq_bytes(rq);
+	/* set the residual count for pc requests */
 	if (blk_pc_request(rq))
-		rq->data_len = cmd->err_info->ResidualCnt;
+		rq->resid_len = cmd->err_info->ResidualCnt;
 
-	if (blk_end_request(rq, (rq->errors == 0) ? 0 : -EIO, nr_bytes))
-		BUG();
+	blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO);
 
 	spin_lock_irqsave(&h->lock, flags);
 	cmd_free(h, cmd, 1);
@@ -1337,6 +1522,56 @@
 	spin_unlock_irqrestore(&h->lock, flags);
 }
 
+static void log_unit_to_scsi3addr(ctlr_info_t *h, unsigned char scsi3addr[],
+	uint32_t log_unit)
+{
+	log_unit = h->drv[log_unit].LunID & 0x03fff;
+	memset(&scsi3addr[4], 0, 4);
+	memcpy(&scsi3addr[0], &log_unit, 4);
+	scsi3addr[3] |= 0x40;
+}
+
+/* This function gets the SCSI vendor, model, and revision of a logical drive
+ * via the inquiry page 0.  Model, vendor, and rev are set to empty strings if
+ * they cannot be read.
+ */
+static void cciss_get_device_descr(int ctlr, int logvol, int withirq,
+				   char *vendor, char *model, char *rev)
+{
+	int rc;
+	InquiryData_struct *inq_buf;
+	unsigned char scsi3addr[8];
+
+	*vendor = '\0';
+	*model = '\0';
+	*rev = '\0';
+
+	inq_buf = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL);
+	if (!inq_buf)
+		return;
+
+	log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
+	if (withirq)
+		rc = sendcmd_withirq(CISS_INQUIRY, ctlr, inq_buf,
+			     sizeof(InquiryData_struct), 0,
+				scsi3addr, TYPE_CMD);
+	else
+		rc = sendcmd(CISS_INQUIRY, ctlr, inq_buf,
+			     sizeof(InquiryData_struct), 0,
+				scsi3addr, TYPE_CMD);
+	if (rc == IO_OK) {
+		memcpy(vendor, &inq_buf->data_byte[8], VENDOR_LEN);
+		vendor[VENDOR_LEN] = '\0';
+		memcpy(model, &inq_buf->data_byte[16], MODEL_LEN);
+		model[MODEL_LEN] = '\0';
+		memcpy(rev, &inq_buf->data_byte[32], REV_LEN);
+		rev[REV_LEN] = '\0';
+	}
+
+	kfree(inq_buf);
+	return;
+}
+
 /* This function gets the serial number of a logical drive via
  * inquiry page 0x83.  Serial no. is 16 bytes.  If the serial
  * number cannot be had, for whatever reason, 16 bytes of 0xff
@@ -1348,6 +1583,7 @@
 #define PAGE_83_INQ_BYTES 64
 	int rc;
 	unsigned char *buf;
+	unsigned char scsi3addr[8];
 
 	if (buflen > 16)
 		buflen = 16;
@@ -1356,12 +1592,13 @@
 	if (!buf)
 		return;
 	memset(serial_no, 0, buflen);
+	log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
 	if (withirq)
 		rc = sendcmd_withirq(CISS_INQUIRY, ctlr, buf,
-			PAGE_83_INQ_BYTES, 1, logvol, 0x83, TYPE_CMD);
+			PAGE_83_INQ_BYTES, 0x83, scsi3addr, TYPE_CMD);
 	else
 		rc = sendcmd(CISS_INQUIRY, ctlr, buf,
-			PAGE_83_INQ_BYTES, 1, logvol, 0x83, NULL, TYPE_CMD);
+			PAGE_83_INQ_BYTES, 0x83, scsi3addr, TYPE_CMD);
 	if (rc == IO_OK)
 		memcpy(serial_no, &buf[8], buflen);
 	kfree(buf);
@@ -1377,7 +1614,7 @@
 	disk->first_minor = drv_index << NWD_SHIFT;
 	disk->fops = &cciss_fops;
 	disk->private_data = &h->drv[drv_index];
-	disk->driverfs_dev = &h->pdev->dev;
+	disk->driverfs_dev = &h->drv[drv_index].dev;
 
 	/* Set up queue information */
 	blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask);
@@ -1394,8 +1631,8 @@
 
 	disk->queue->queuedata = h;
 
-	blk_queue_hardsect_size(disk->queue,
-				h->drv[drv_index].block_size);
+	blk_queue_logical_block_size(disk->queue,
+				     h->drv[drv_index].block_size);
 
 	/* Make sure all queue data is written out before */
 	/* setting h->drv[drv_index].queue, as setting this */
@@ -1468,6 +1705,8 @@
 	drvinfo->block_size = block_size;
 	drvinfo->nr_blocks = total_size + 1;
 
+	cciss_get_device_descr(ctlr, drv_index, 1, drvinfo->vendor,
+				drvinfo->model, drvinfo->rev);
 	cciss_get_serial_no(ctlr, drv_index, 1, drvinfo->serial_no,
 			sizeof(drvinfo->serial_no));
 
@@ -1517,6 +1756,9 @@
 	h->drv[drv_index].cylinders = drvinfo->cylinders;
 	h->drv[drv_index].raid_level = drvinfo->raid_level;
 	memcpy(h->drv[drv_index].serial_no, drvinfo->serial_no, 16);
+	memcpy(h->drv[drv_index].vendor, drvinfo->vendor, VENDOR_LEN + 1);
+	memcpy(h->drv[drv_index].model, drvinfo->model, MODEL_LEN + 1);
+	memcpy(h->drv[drv_index].rev, drvinfo->rev, REV_LEN + 1);
 
 	++h->num_luns;
 	disk = h->gendisk[drv_index];
@@ -1591,6 +1833,8 @@
 		}
 	}
 	h->drv[drv_index].LunID = lunid;
+	if (cciss_create_ld_sysfs_entry(h, &h->drv[drv_index], drv_index))
+		goto err_free_disk;
 
 	/* Don't need to mark this busy because nobody */
 	/* else knows about this disk yet to contend */
@@ -1598,6 +1842,11 @@
 	h->drv[drv_index].busy_configuring = 0;
 	wmb();
 	return drv_index;
+
+err_free_disk:
+	put_disk(h->gendisk[drv_index]);
+	h->gendisk[drv_index] = NULL;
+	return -1;
 }
 
 /* This is for the special case of a controller which
@@ -1668,8 +1917,8 @@
 		goto mem_msg;
 
 	return_code = sendcmd_withirq(CISS_REPORT_LOG, ctlr, ld_buff,
-				      sizeof(ReportLunData_struct), 0,
-				      0, 0, TYPE_CMD);
+				      sizeof(ReportLunData_struct),
+				      0, CTLR_LUNID, TYPE_CMD);
 
 	if (return_code == IO_OK)
 		listlength = be32_to_cpu(*(__be32 *) ld_buff->LUNListLength);
@@ -1718,6 +1967,7 @@
 			h->drv[i].busy_configuring = 1;
 			spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
 			return_code = deregister_disk(h, i, 1);
+			cciss_destroy_ld_sysfs_entry(&h->drv[i]);
 			h->drv[i].busy_configuring = 0;
 		}
 	}
@@ -1877,11 +2127,9 @@
 	return 0;
 }
 
-static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff, size_t size, unsigned int use_unit_num,	/* 0: address the controller,
-															   1: address logical volume log_unit,
-															   2: periph device address is scsi3addr */
-		    unsigned int log_unit, __u8 page_code,
-		    unsigned char *scsi3addr, int cmd_type)
+static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff,
+		size_t size, __u8 page_code, unsigned char *scsi3addr,
+		int cmd_type)
 {
 	ctlr_info_t *h = hba[ctlr];
 	u64bit buff_dma_handle;
@@ -1897,27 +2145,12 @@
 		c->Header.SGTotal = 0;
 	}
 	c->Header.Tag.lower = c->busaddr;
+	memcpy(c->Header.LUN.LunAddrBytes, scsi3addr, 8);
 
 	c->Request.Type.Type = cmd_type;
 	if (cmd_type == TYPE_CMD) {
 		switch (cmd) {
 		case CISS_INQUIRY:
-			/* If the logical unit number is 0 then, this is going
-			   to controller so It's a physical command
-			   mode = 0 target = 0.  So we have nothing to write.
-			   otherwise, if use_unit_num == 1,
-			   mode = 1(volume set addressing) target = LUNID
-			   otherwise, if use_unit_num == 2,
-			   mode = 0(periph dev addr) target = scsi3addr */
-			if (use_unit_num == 1) {
-				c->Header.LUN.LogDev.VolId =
-				    h->drv[log_unit].LunID;
-				c->Header.LUN.LogDev.Mode = 1;
-			} else if (use_unit_num == 2) {
-				memcpy(c->Header.LUN.LunAddrBytes, scsi3addr,
-				       8);
-				c->Header.LUN.LogDev.Mode = 0;
-			}
 			/* are we trying to read a vital product page */
 			if (page_code != 0) {
 				c->Request.CDB[1] = 0x01;
@@ -1947,8 +2180,6 @@
 			break;
 
 		case CCISS_READ_CAPACITY:
-			c->Header.LUN.LogDev.VolId = h->drv[log_unit].LunID;
-			c->Header.LUN.LogDev.Mode = 1;
 			c->Request.CDBLen = 10;
 			c->Request.Type.Attribute = ATTR_SIMPLE;
 			c->Request.Type.Direction = XFER_READ;
@@ -1956,8 +2187,6 @@
 			c->Request.CDB[0] = cmd;
 			break;
 		case CCISS_READ_CAPACITY_16:
-			c->Header.LUN.LogDev.VolId = h->drv[log_unit].LunID;
-			c->Header.LUN.LogDev.Mode = 1;
 			c->Request.CDBLen = 16;
 			c->Request.Type.Attribute = ATTR_SIMPLE;
 			c->Request.Type.Direction = XFER_READ;
@@ -1979,6 +2208,12 @@
 			c->Request.CDB[0] = BMIC_WRITE;
 			c->Request.CDB[6] = BMIC_CACHE_FLUSH;
 			break;
+		case TEST_UNIT_READY:
+			c->Request.CDBLen = 6;
+			c->Request.Type.Attribute = ATTR_SIMPLE;
+			c->Request.Type.Direction = XFER_NONE;
+			c->Request.Timeout = 0;
+			break;
 		default:
 			printk(KERN_WARNING
 			       "cciss%d:  Unknown Command 0x%c\n", ctlr, cmd);
@@ -1997,13 +2232,13 @@
 			memcpy(&c->Request.CDB[4], buff, 8);
 			break;
 		case 1:	/* RESET message */
-			c->Request.CDBLen = 12;
+			c->Request.CDBLen = 16;
 			c->Request.Type.Attribute = ATTR_SIMPLE;
-			c->Request.Type.Direction = XFER_WRITE;
+			c->Request.Type.Direction = XFER_NONE;
 			c->Request.Timeout = 0;
 			memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB));
 			c->Request.CDB[0] = cmd;	/* reset */
-			c->Request.CDB[1] = 0x04;	/* reset a LUN */
+			c->Request.CDB[1] = 0x03;	/* reset a target */
 			break;
 		case 3:	/* No-Op message */
 			c->Request.CDBLen = 1;
@@ -2035,114 +2270,152 @@
 	return status;
 }
 
-static int sendcmd_withirq(__u8 cmd,
-			   int ctlr,
-			   void *buff,
-			   size_t size,
-			   unsigned int use_unit_num,
-			   unsigned int log_unit, __u8 page_code, int cmd_type)
+static int check_target_status(ctlr_info_t *h, CommandList_struct *c)
 {
-	ctlr_info_t *h = hba[ctlr];
-	CommandList_struct *c;
+	switch (c->err_info->ScsiStatus) {
+	case SAM_STAT_GOOD:
+		return IO_OK;
+	case SAM_STAT_CHECK_CONDITION:
+		switch (0xf & c->err_info->SenseInfo[2]) {
+		case 0: return IO_OK; /* no sense */
+		case 1: return IO_OK; /* recovered error */
+		default:
+			printk(KERN_WARNING "cciss%d: cmd 0x%02x "
+				"check condition, sense key = 0x%02x\n",
+				h->ctlr, c->Request.CDB[0],
+				c->err_info->SenseInfo[2]);
+		}
+		break;
+	default:
+		printk(KERN_WARNING "cciss%d: cmd 0x%02x"
+			"scsi status = 0x%02x\n", h->ctlr,
+			c->Request.CDB[0], c->err_info->ScsiStatus);
+		break;
+	}
+	return IO_ERROR;
+}
+
+static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c)
+{
+	int return_status = IO_OK;
+
+	if (c->err_info->CommandStatus == CMD_SUCCESS)
+		return IO_OK;
+
+	switch (c->err_info->CommandStatus) {
+	case CMD_TARGET_STATUS:
+		return_status = check_target_status(h, c);
+		break;
+	case CMD_DATA_UNDERRUN:
+	case CMD_DATA_OVERRUN:
+		/* expected for inquiry and report lun commands */
+		break;
+	case CMD_INVALID:
+		printk(KERN_WARNING "cciss: cmd 0x%02x is "
+		       "reported invalid\n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_PROTOCOL_ERR:
+		printk(KERN_WARNING "cciss: cmd 0x%02x has "
+		       "protocol error \n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_HARDWARE_ERR:
+		printk(KERN_WARNING "cciss: cmd 0x%02x had "
+		       " hardware error\n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_CONNECTION_LOST:
+		printk(KERN_WARNING "cciss: cmd 0x%02x had "
+		       "connection lost\n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_ABORTED:
+		printk(KERN_WARNING "cciss: cmd 0x%02x was "
+		       "aborted\n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_ABORT_FAILED:
+		printk(KERN_WARNING "cciss: cmd 0x%02x reports "
+		       "abort failed\n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_UNSOLICITED_ABORT:
+		printk(KERN_WARNING
+		       "cciss%d: unsolicited abort 0x%02x\n", h->ctlr,
+			c->Request.CDB[0]);
+		return_status = IO_NEEDS_RETRY;
+		break;
+	default:
+		printk(KERN_WARNING "cciss: cmd 0x%02x returned "
+		       "unknown status %x\n", c->Request.CDB[0],
+		       c->err_info->CommandStatus);
+		return_status = IO_ERROR;
+	}
+	return return_status;
+}
+
+static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c,
+	int attempt_retry)
+{
+	DECLARE_COMPLETION_ONSTACK(wait);
 	u64bit buff_dma_handle;
 	unsigned long flags;
-	int return_status;
-	DECLARE_COMPLETION_ONSTACK(wait);
+	int return_status = IO_OK;
 
-	if ((c = cmd_alloc(h, 0)) == NULL)
-		return -ENOMEM;
-	return_status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
-				 log_unit, page_code, NULL, cmd_type);
-	if (return_status != IO_OK) {
-		cmd_free(h, c, 0);
-		return return_status;
-	}
-      resend_cmd2:
+resend_cmd2:
 	c->waiting = &wait;
-
 	/* Put the request on the tail of the queue and send it */
-	spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
+	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
 	addQ(&h->reqQ, c);
 	h->Qdepth++;
 	start_io(h);
-	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
 
 	wait_for_completion(&wait);
 
-	if (c->err_info->CommandStatus != 0) {	/* an error has occurred */
-		switch (c->err_info->CommandStatus) {
-		case CMD_TARGET_STATUS:
-			printk(KERN_WARNING "cciss: cmd %p has "
-			       " completed with errors\n", c);
-			if (c->err_info->ScsiStatus) {
-				printk(KERN_WARNING "cciss: cmd %p "
-				       "has SCSI Status = %x\n",
-				       c, c->err_info->ScsiStatus);
-			}
+	if (c->err_info->CommandStatus == 0 || !attempt_retry)
+		goto command_done;
 
-			break;
-		case CMD_DATA_UNDERRUN:
-		case CMD_DATA_OVERRUN:
-			/* expected for inquire and report lun commands */
-			break;
-		case CMD_INVALID:
-			printk(KERN_WARNING "cciss: Cmd %p is "
-			       "reported invalid\n", c);
-			return_status = IO_ERROR;
-			break;
-		case CMD_PROTOCOL_ERR:
-			printk(KERN_WARNING "cciss: cmd %p has "
-			       "protocol error \n", c);
-			return_status = IO_ERROR;
-			break;
-		case CMD_HARDWARE_ERR:
-			printk(KERN_WARNING "cciss: cmd %p had "
-			       " hardware error\n", c);
-			return_status = IO_ERROR;
-			break;
-		case CMD_CONNECTION_LOST:
-			printk(KERN_WARNING "cciss: cmd %p had "
-			       "connection lost\n", c);
-			return_status = IO_ERROR;
-			break;
-		case CMD_ABORTED:
-			printk(KERN_WARNING "cciss: cmd %p was "
-			       "aborted\n", c);
-			return_status = IO_ERROR;
-			break;
-		case CMD_ABORT_FAILED:
-			printk(KERN_WARNING "cciss: cmd %p reports "
-			       "abort failed\n", c);
-			return_status = IO_ERROR;
-			break;
-		case CMD_UNSOLICITED_ABORT:
-			printk(KERN_WARNING
-			       "cciss%d: unsolicited abort %p\n", ctlr, c);
-			if (c->retry_count < MAX_CMD_RETRIES) {
-				printk(KERN_WARNING
-				       "cciss%d: retrying %p\n", ctlr, c);
-				c->retry_count++;
-				/* erase the old error information */
-				memset(c->err_info, 0,
-				       sizeof(ErrorInfo_struct));
-				return_status = IO_OK;
-				INIT_COMPLETION(wait);
-				goto resend_cmd2;
-			}
-			return_status = IO_ERROR;
-			break;
-		default:
-			printk(KERN_WARNING "cciss: cmd %p returned "
-			       "unknown status %x\n", c,
-			       c->err_info->CommandStatus);
-			return_status = IO_ERROR;
-		}
+	return_status = process_sendcmd_error(h, c);
+
+	if (return_status == IO_NEEDS_RETRY &&
+		c->retry_count < MAX_CMD_RETRIES) {
+		printk(KERN_WARNING "cciss%d: retrying 0x%02x\n", h->ctlr,
+			c->Request.CDB[0]);
+		c->retry_count++;
+		/* erase the old error information */
+		memset(c->err_info, 0, sizeof(ErrorInfo_struct));
+		return_status = IO_OK;
+		INIT_COMPLETION(wait);
+		goto resend_cmd2;
 	}
+
+command_done:
 	/* unlock the buffers from DMA */
 	buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
 	buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
 	pci_unmap_single(h->pdev, (dma_addr_t) buff_dma_handle.val,
 			 c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
+	return return_status;
+}
+
+static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
+			   __u8 page_code, unsigned char scsi3addr[],
+			int cmd_type)
+{
+	ctlr_info_t *h = hba[ctlr];
+	CommandList_struct *c;
+	int return_status;
+
+	c = cmd_alloc(h, 0);
+	if (!c)
+		return -ENOMEM;
+	return_status = fill_cmd(c, cmd, ctlr, buff, size, page_code,
+		scsi3addr, cmd_type);
+	if (return_status == IO_OK)
+		return_status = sendcmd_withirq_core(h, c, 1);
+
 	cmd_free(h, c, 0);
 	return return_status;
 }
@@ -2155,15 +2428,17 @@
 {
 	int return_code;
 	unsigned long t;
+	unsigned char scsi3addr[8];
 
 	memset(inq_buff, 0, sizeof(InquiryData_struct));
+	log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
 	if (withirq)
 		return_code = sendcmd_withirq(CISS_INQUIRY, ctlr,
-					      inq_buff, sizeof(*inq_buff), 1,
-					      logvol, 0xC1, TYPE_CMD);
+					      inq_buff, sizeof(*inq_buff),
+					      0xC1, scsi3addr, TYPE_CMD);
 	else
 		return_code = sendcmd(CISS_INQUIRY, ctlr, inq_buff,
-				      sizeof(*inq_buff), 1, logvol, 0xC1, NULL,
+				      sizeof(*inq_buff), 0xC1, scsi3addr,
 				      TYPE_CMD);
 	if (return_code == IO_OK) {
 		if (inq_buff->data_byte[8] == 0xFF) {
@@ -2204,6 +2479,7 @@
 {
 	ReadCapdata_struct *buf;
 	int return_code;
+	unsigned char scsi3addr[8];
 
 	buf = kzalloc(sizeof(ReadCapdata_struct), GFP_KERNEL);
 	if (!buf) {
@@ -2211,14 +2487,15 @@
 		return;
 	}
 
+	log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
 	if (withirq)
 		return_code = sendcmd_withirq(CCISS_READ_CAPACITY,
 				ctlr, buf, sizeof(ReadCapdata_struct),
-					1, logvol, 0, TYPE_CMD);
+					0, scsi3addr, TYPE_CMD);
 	else
 		return_code = sendcmd(CCISS_READ_CAPACITY,
 				ctlr, buf, sizeof(ReadCapdata_struct),
-					1, logvol, 0, NULL, TYPE_CMD);
+					0, scsi3addr, TYPE_CMD);
 	if (return_code == IO_OK) {
 		*total_size = be32_to_cpu(*(__be32 *) buf->total_size);
 		*block_size = be32_to_cpu(*(__be32 *) buf->block_size);
@@ -2238,6 +2515,7 @@
 {
 	ReadCapdata_struct_16 *buf;
 	int return_code;
+	unsigned char scsi3addr[8];
 
 	buf = kzalloc(sizeof(ReadCapdata_struct_16), GFP_KERNEL);
 	if (!buf) {
@@ -2245,15 +2523,16 @@
 		return;
 	}
 
+	log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
 	if (withirq) {
 		return_code = sendcmd_withirq(CCISS_READ_CAPACITY_16,
 			ctlr, buf, sizeof(ReadCapdata_struct_16),
-				1, logvol, 0, TYPE_CMD);
+				0, scsi3addr, TYPE_CMD);
 	}
 	else {
 		return_code = sendcmd(CCISS_READ_CAPACITY_16,
 			ctlr, buf, sizeof(ReadCapdata_struct_16),
-				1, logvol, 0, NULL, TYPE_CMD);
+				0, scsi3addr, TYPE_CMD);
 	}
 	if (return_code == IO_OK) {
 		*total_size = be64_to_cpu(*(__be64 *) buf->total_size);
@@ -2303,7 +2582,7 @@
 	cciss_geometry_inquiry(h->ctlr, logvol, 1, total_size, block_size,
 			       inq_buff, drv);
 
-	blk_queue_hardsect_size(drv->queue, drv->block_size);
+	blk_queue_logical_block_size(drv->queue, drv->block_size);
 	set_capacity(disk, drv->nr_blocks);
 
 	kfree(inq_buff);
@@ -2333,86 +2612,21 @@
 	return 1;
 }
 
-static int add_sendcmd_reject(__u8 cmd, int ctlr, unsigned long complete)
-{
-	/* We get in here if sendcmd() is polling for completions
-	   and gets some command back that it wasn't expecting --
-	   something other than that which it just sent down.
-	   Ordinarily, that shouldn't happen, but it can happen when
-	   the scsi tape stuff gets into error handling mode, and
-	   starts using sendcmd() to try to abort commands and
-	   reset tape drives.  In that case, sendcmd may pick up
-	   completions of commands that were sent to logical drives
-	   through the block i/o system, or cciss ioctls completing, etc.
-	   In that case, we need to save those completions for later
-	   processing by the interrupt handler.
-	 */
-
-#ifdef CONFIG_CISS_SCSI_TAPE
-	struct sendcmd_reject_list *srl = &hba[ctlr]->scsi_rejects;
-
-	/* If it's not the scsi tape stuff doing error handling, (abort */
-	/* or reset) then we don't expect anything weird. */
-	if (cmd != CCISS_RESET_MSG && cmd != CCISS_ABORT_MSG) {
-#endif
-		printk(KERN_WARNING "cciss cciss%d: SendCmd "
-		       "Invalid command list address returned! (%lx)\n",
-		       ctlr, complete);
-		/* not much we can do. */
-#ifdef CONFIG_CISS_SCSI_TAPE
-		return 1;
-	}
-
-	/* We've sent down an abort or reset, but something else
-	   has completed */
-	if (srl->ncompletions >= (hba[ctlr]->nr_cmds + 2)) {
-		/* Uh oh.  No room to save it for later... */
-		printk(KERN_WARNING "cciss%d: Sendcmd: Invalid command addr, "
-		       "reject list overflow, command lost!\n", ctlr);
-		return 1;
-	}
-	/* Save it for later */
-	srl->complete[srl->ncompletions] = complete;
-	srl->ncompletions++;
-#endif
-	return 0;
-}
-
-/*
- * Send a command to the controller, and wait for it to complete.
- * Only used at init time.
+/* Send command c to controller h and poll for it to complete.
+ * Turns interrupts off on the board.  Used at driver init time
+ * and during SCSI error recovery.
  */
-static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size, unsigned int use_unit_num,	/* 0: address the controller,
-												   1: address logical volume log_unit,
-												   2: periph device address is scsi3addr */
-		   unsigned int log_unit,
-		   __u8 page_code, unsigned char *scsi3addr, int cmd_type)
+static int sendcmd_core(ctlr_info_t *h, CommandList_struct *c)
 {
-	CommandList_struct *c;
 	int i;
 	unsigned long complete;
-	ctlr_info_t *info_p = hba[ctlr];
+	int status = IO_ERROR;
 	u64bit buff_dma_handle;
-	int status, done = 0;
 
-	if ((c = cmd_alloc(info_p, 1)) == NULL) {
-		printk(KERN_WARNING "cciss: unable to get memory");
-		return IO_ERROR;
-	}
-	status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
-			  log_unit, page_code, scsi3addr, cmd_type);
-	if (status != IO_OK) {
-		cmd_free(info_p, c, 1);
-		return status;
-	}
-      resend_cmd1:
-	/*
-	 * Disable interrupt
-	 */
-#ifdef CCISS_DEBUG
-	printk(KERN_DEBUG "cciss: turning intr off\n");
-#endif				/* CCISS_DEBUG */
-	info_p->access.set_intr_mask(info_p, CCISS_INTR_OFF);
+resend_cmd1:
+
+	/* Disable interrupt on the board. */
+	h->access.set_intr_mask(h, CCISS_INTR_OFF);
 
 	/* Make sure there is room in the command FIFO */
 	/* Actually it should be completely empty at this time */
@@ -2420,21 +2634,15 @@
 	/* tape side of the driver. */
 	for (i = 200000; i > 0; i--) {
 		/* if fifo isn't full go */
-		if (!(info_p->access.fifo_full(info_p))) {
-
+		if (!(h->access.fifo_full(h)))
 			break;
-		}
 		udelay(10);
 		printk(KERN_WARNING "cciss cciss%d: SendCmd FIFO full,"
-		       " waiting!\n", ctlr);
+		       " waiting!\n", h->ctlr);
 	}
-	/*
-	 * Send the cmd
-	 */
-	info_p->access.submit_command(info_p, c);
-	done = 0;
+	h->access.submit_command(h, c); /* Send the cmd */
 	do {
-		complete = pollcomplete(ctlr);
+		complete = pollcomplete(h->ctlr);
 
 #ifdef CCISS_DEBUG
 		printk(KERN_DEBUG "cciss: command completed\n");
@@ -2443,97 +2651,102 @@
 		if (complete == 1) {
 			printk(KERN_WARNING
 			       "cciss cciss%d: SendCmd Timeout out, "
-			       "No command list address returned!\n", ctlr);
+			       "No command list address returned!\n", h->ctlr);
 			status = IO_ERROR;
-			done = 1;
 			break;
 		}
 
-		/* This will need to change for direct lookup completions */
-		if ((complete & CISS_ERROR_BIT)
-		    && (complete & ~CISS_ERROR_BIT) == c->busaddr) {
-			/* if data overrun or underun on Report command
-			   ignore it
-			 */
-			if (((c->Request.CDB[0] == CISS_REPORT_LOG) ||
-			     (c->Request.CDB[0] == CISS_REPORT_PHYS) ||
-			     (c->Request.CDB[0] == CISS_INQUIRY)) &&
-			    ((c->err_info->CommandStatus ==
-			      CMD_DATA_OVERRUN) ||
-			     (c->err_info->CommandStatus == CMD_DATA_UNDERRUN)
-			    )) {
-				complete = c->busaddr;
-			} else {
-				if (c->err_info->CommandStatus ==
-				    CMD_UNSOLICITED_ABORT) {
-					printk(KERN_WARNING "cciss%d: "
-					       "unsolicited abort %p\n",
-					       ctlr, c);
-					if (c->retry_count < MAX_CMD_RETRIES) {
-						printk(KERN_WARNING
-						       "cciss%d: retrying %p\n",
-						       ctlr, c);
-						c->retry_count++;
-						/* erase the old error */
-						/* information */
-						memset(c->err_info, 0,
-						       sizeof
-						       (ErrorInfo_struct));
-						goto resend_cmd1;
-					} else {
-						printk(KERN_WARNING
-						       "cciss%d: retried %p too "
-						       "many times\n", ctlr, c);
-						status = IO_ERROR;
-						goto cleanup1;
-					}
-				} else if (c->err_info->CommandStatus ==
-					   CMD_UNABORTABLE) {
-					printk(KERN_WARNING
-					       "cciss%d: command could not be aborted.\n",
-					       ctlr);
-					status = IO_ERROR;
-					goto cleanup1;
-				}
-				printk(KERN_WARNING "ciss ciss%d: sendcmd"
-				       " Error %x \n", ctlr,
-				       c->err_info->CommandStatus);
-				printk(KERN_WARNING "ciss ciss%d: sendcmd"
-				       " offensive info\n"
-				       "  size %x\n   num %x   value %x\n",
-				       ctlr,
-				       c->err_info->MoreErrInfo.Invalid_Cmd.
-				       offense_size,
-				       c->err_info->MoreErrInfo.Invalid_Cmd.
-				       offense_num,
-				       c->err_info->MoreErrInfo.Invalid_Cmd.
-				       offense_value);
-				status = IO_ERROR;
-				goto cleanup1;
-			}
-		}
-		/* This will need changing for direct lookup completions */
-		if (complete != c->busaddr) {
-			if (add_sendcmd_reject(cmd, ctlr, complete) != 0) {
-				BUG();	/* we are pretty much hosed if we get here. */
-			}
+		/* Make sure it's the command we're expecting. */
+		if ((complete & ~CISS_ERROR_BIT) != c->busaddr) {
+			printk(KERN_WARNING "cciss%d: Unexpected command "
+				"completion.\n", h->ctlr);
 			continue;
-		} else
-			done = 1;
-	} while (!done);
+		}
 
-      cleanup1:
+		/* It is our command.  If no error, we're done. */
+		if (!(complete & CISS_ERROR_BIT)) {
+			status = IO_OK;
+			break;
+		}
+
+		/* There is an error... */
+
+		/* if data overrun or underun on Report command ignore it */
+		if (((c->Request.CDB[0] == CISS_REPORT_LOG) ||
+		     (c->Request.CDB[0] == CISS_REPORT_PHYS) ||
+		     (c->Request.CDB[0] == CISS_INQUIRY)) &&
+			((c->err_info->CommandStatus == CMD_DATA_OVERRUN) ||
+			 (c->err_info->CommandStatus == CMD_DATA_UNDERRUN))) {
+			complete = c->busaddr;
+			status = IO_OK;
+			break;
+		}
+
+		if (c->err_info->CommandStatus == CMD_UNSOLICITED_ABORT) {
+			printk(KERN_WARNING "cciss%d: unsolicited abort %p\n",
+				h->ctlr, c);
+			if (c->retry_count < MAX_CMD_RETRIES) {
+				printk(KERN_WARNING "cciss%d: retrying %p\n",
+				   h->ctlr, c);
+				c->retry_count++;
+				/* erase the old error information */
+				memset(c->err_info, 0, sizeof(c->err_info));
+				goto resend_cmd1;
+			}
+			printk(KERN_WARNING "cciss%d: retried %p too many "
+				"times\n", h->ctlr, c);
+			status = IO_ERROR;
+			break;
+		}
+
+		if (c->err_info->CommandStatus == CMD_UNABORTABLE) {
+			printk(KERN_WARNING "cciss%d: command could not be "
+				"aborted.\n", h->ctlr);
+			status = IO_ERROR;
+			break;
+		}
+
+		if (c->err_info->CommandStatus == CMD_TARGET_STATUS) {
+			status = check_target_status(h, c);
+			break;
+		}
+
+		printk(KERN_WARNING "cciss%d: sendcmd error\n", h->ctlr);
+		printk(KERN_WARNING "cmd = 0x%02x, CommandStatus = 0x%02x\n",
+			c->Request.CDB[0], c->err_info->CommandStatus);
+		status = IO_ERROR;
+		break;
+
+	} while (1);
+
 	/* unlock the data buffer from DMA */
 	buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
 	buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
-	pci_unmap_single(info_p->pdev, (dma_addr_t) buff_dma_handle.val,
+	pci_unmap_single(h->pdev, (dma_addr_t) buff_dma_handle.val,
 			 c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
-#ifdef CONFIG_CISS_SCSI_TAPE
-	/* if we saved some commands for later, process them now. */
-	if (info_p->scsi_rejects.ncompletions > 0)
-		do_cciss_intr(0, info_p);
-#endif
-	cmd_free(info_p, c, 1);
+	return status;
+}
+
+/*
+ * Send a command to the controller, and wait for it to complete.
+ * Used at init time, and during SCSI error recovery.
+ */
+static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size,
+	__u8 page_code, unsigned char *scsi3addr, int cmd_type)
+{
+	CommandList_struct *c;
+	int status;
+
+	c = cmd_alloc(hba[ctlr], 1);
+	if (!c) {
+		printk(KERN_WARNING "cciss: unable to get memory");
+		return IO_ERROR;
+	}
+	status = fill_cmd(c, cmd, ctlr, buff, size, page_code,
+		scsi3addr, cmd_type);
+	if (status == IO_OK)
+		status = sendcmd_core(hba[ctlr], c);
+	cmd_free(hba[ctlr], c, 1);
 	return status;
 }
 
@@ -2691,7 +2904,7 @@
 			printk(KERN_WARNING "cciss: cmd %p has"
 			       " completed with data underrun "
 			       "reported\n", cmd);
-			cmd->rq->data_len = cmd->err_info->ResidualCnt;
+			cmd->rq->resid_len = cmd->err_info->ResidualCnt;
 		}
 		break;
 	case CMD_DATA_OVERRUN:
@@ -2806,7 +3019,7 @@
 		goto startio;
 
       queue:
-	creq = elv_next_request(q);
+	creq = blk_peek_request(q);
 	if (!creq)
 		goto startio;
 
@@ -2815,7 +3028,7 @@
 	if ((c = cmd_alloc(h, 1)) == NULL)
 		goto full;
 
-	blkdev_dequeue_request(creq);
+	blk_start_request(creq);
 
 	spin_unlock_irq(q->queue_lock);
 
@@ -2840,10 +3053,10 @@
 	c->Request.Timeout = 0;	// Don't time out
 	c->Request.CDB[0] =
 	    (rq_data_dir(creq) == READ) ? h->cciss_read : h->cciss_write;
-	start_blk = creq->sector;
+	start_blk = blk_rq_pos(creq);
 #ifdef CCISS_DEBUG
-	printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n", (int)creq->sector,
-	       (int)creq->nr_sectors);
+	printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",
+	       (int)blk_rq_pos(creq), (int)blk_rq_sectors(creq));
 #endif				/* CCISS_DEBUG */
 
 	sg_init_table(tmp_sg, MAXSGENTRIES);
@@ -2869,8 +3082,8 @@
 		h->maxSG = seg;
 
 #ifdef CCISS_DEBUG
-	printk(KERN_DEBUG "cciss: Submitting %lu sectors in %d segments\n",
-	       creq->nr_sectors, seg);
+	printk(KERN_DEBUG "cciss: Submitting %u sectors in %d segments\n",
+	       blk_rq_sectors(creq), seg);
 #endif				/* CCISS_DEBUG */
 
 	c->Header.SGList = c->Header.SGTotal = seg;
@@ -2882,8 +3095,8 @@
 			c->Request.CDB[4] = (start_blk >> 8) & 0xff;
 			c->Request.CDB[5] = start_blk & 0xff;
 			c->Request.CDB[6] = 0;	// (sect >> 24) & 0xff; MSB
-			c->Request.CDB[7] = (creq->nr_sectors >> 8) & 0xff;
-			c->Request.CDB[8] = creq->nr_sectors & 0xff;
+			c->Request.CDB[7] = (blk_rq_sectors(creq) >> 8) & 0xff;
+			c->Request.CDB[8] = blk_rq_sectors(creq) & 0xff;
 			c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0;
 		} else {
 			u32 upper32 = upper_32_bits(start_blk);
@@ -2898,10 +3111,10 @@
 			c->Request.CDB[7]= (start_blk >> 16) & 0xff;
 			c->Request.CDB[8]= (start_blk >>  8) & 0xff;
 			c->Request.CDB[9]= start_blk & 0xff;
-			c->Request.CDB[10]= (creq->nr_sectors >>  24) & 0xff;
-			c->Request.CDB[11]= (creq->nr_sectors >>  16) & 0xff;
-			c->Request.CDB[12]= (creq->nr_sectors >>  8) & 0xff;
-			c->Request.CDB[13]= creq->nr_sectors & 0xff;
+			c->Request.CDB[10]= (blk_rq_sectors(creq) >> 24) & 0xff;
+			c->Request.CDB[11]= (blk_rq_sectors(creq) >> 16) & 0xff;
+			c->Request.CDB[12]= (blk_rq_sectors(creq) >>  8) & 0xff;
+			c->Request.CDB[13]= blk_rq_sectors(creq) & 0xff;
 			c->Request.CDB[14] = c->Request.CDB[15] = 0;
 		}
 	} else if (blk_pc_request(creq)) {
@@ -2931,44 +3144,18 @@
 
 static inline unsigned long get_next_completion(ctlr_info_t *h)
 {
-#ifdef CONFIG_CISS_SCSI_TAPE
-	/* Any rejects from sendcmd() lying around? Process them first */
-	if (h->scsi_rejects.ncompletions == 0)
-		return h->access.command_completed(h);
-	else {
-		struct sendcmd_reject_list *srl;
-		int n;
-		srl = &h->scsi_rejects;
-		n = --srl->ncompletions;
-		/* printk("cciss%d: processing saved reject\n", h->ctlr); */
-		printk("p");
-		return srl->complete[n];
-	}
-#else
 	return h->access.command_completed(h);
-#endif
 }
 
 static inline int interrupt_pending(ctlr_info_t *h)
 {
-#ifdef CONFIG_CISS_SCSI_TAPE
-	return (h->access.intr_pending(h)
-		|| (h->scsi_rejects.ncompletions > 0));
-#else
 	return h->access.intr_pending(h);
-#endif
 }
 
 static inline long interrupt_not_for_us(ctlr_info_t *h)
 {
-#ifdef CONFIG_CISS_SCSI_TAPE
-	return (((h->access.intr_pending(h) == 0) ||
-		 (h->interrupts_enabled == 0))
-		&& (h->scsi_rejects.ncompletions == 0));
-#else
 	return (((h->access.intr_pending(h) == 0) ||
 		 (h->interrupts_enabled == 0)));
-#endif
 }
 
 static irqreturn_t do_cciss_intr(int irq, void *dev_id)
@@ -3723,12 +3910,15 @@
 	INIT_HLIST_HEAD(&hba[i]->reqQ);
 
 	if (cciss_pci_init(hba[i], pdev) != 0)
-		goto clean1;
+		goto clean0;
 
 	sprintf(hba[i]->devname, "cciss%d", i);
 	hba[i]->ctlr = i;
 	hba[i]->pdev = pdev;
 
+	if (cciss_create_hba_sysfs_entry(hba[i]))
+		goto clean0;
+
 	/* configure PCI DMA stuff */
 	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)))
 		dac = 1;
@@ -3787,15 +3977,6 @@
 		printk(KERN_ERR "cciss: out of memory");
 		goto clean4;
 	}
-#ifdef CONFIG_CISS_SCSI_TAPE
-	hba[i]->scsi_rejects.complete =
-	    kmalloc(sizeof(hba[i]->scsi_rejects.complete[0]) *
-		    (hba[i]->nr_cmds + 5), GFP_KERNEL);
-	if (hba[i]->scsi_rejects.complete == NULL) {
-		printk(KERN_ERR "cciss: out of memory");
-		goto clean4;
-	}
-#endif
 	spin_lock_init(&hba[i]->lock);
 
 	/* Initialize the pdev driver private data.
@@ -3828,7 +4009,7 @@
 	}
 
 	return_code = sendcmd_withirq(CISS_INQUIRY, i, inq_buff,
-		sizeof(InquiryData_struct), 0, 0 , 0, TYPE_CMD);
+		sizeof(InquiryData_struct), 0, CTLR_LUNID, TYPE_CMD);
 	if (return_code == IO_OK) {
 		hba[i]->firm_ver[0] = inq_buff->data_byte[32];
 		hba[i]->firm_ver[1] = inq_buff->data_byte[33];
@@ -3855,9 +4036,6 @@
 
 clean4:
 	kfree(inq_buff);
-#ifdef CONFIG_CISS_SCSI_TAPE
-	kfree(hba[i]->scsi_rejects.complete);
-#endif
 	kfree(hba[i]->cmd_pool_bits);
 	if (hba[i]->cmd_pool)
 		pci_free_consistent(hba[i]->pdev,
@@ -3872,6 +4050,8 @@
 clean2:
 	unregister_blkdev(hba[i]->major, hba[i]->devname);
 clean1:
+	cciss_destroy_hba_sysfs_entry(hba[i]);
+clean0:
 	hba[i]->busy_initializing = 0;
 	/* cleanup any queues that may have been initialized */
 	for (j=0; j <= hba[i]->highest_lun; j++){
@@ -3907,8 +4087,8 @@
 	/* sendcmd will turn off interrupt, and send the flush...
 	 * To write all data in the battery backed cache to disks */
 	memset(flush_buf, 0, 4);
-	return_code = sendcmd(CCISS_CACHE_FLUSH, i, flush_buf, 4, 0, 0, 0, NULL,
-			      TYPE_CMD);
+	return_code = sendcmd(CCISS_CACHE_FLUSH, i, flush_buf, 4, 0,
+		CTLR_LUNID, TYPE_CMD);
 	if (return_code == IO_OK) {
 		printk(KERN_INFO "Completed flushing cache on controller %d\n", i);
 	} else {
@@ -3973,15 +4153,13 @@
 	pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
 			    hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle);
 	kfree(hba[i]->cmd_pool_bits);
-#ifdef CONFIG_CISS_SCSI_TAPE
-	kfree(hba[i]->scsi_rejects.complete);
-#endif
 	/*
 	 * Deliberately omit pci_disable_device(): it does something nasty to
 	 * Smart Array controllers that pci_enable_device does not undo
 	 */
 	pci_release_regions(pdev);
 	pci_set_drvdata(pdev, NULL);
+	cciss_destroy_hba_sysfs_entry(hba[i]);
 	free_hba(i);
 }
 
@@ -3999,6 +4177,8 @@
  */
 static int __init cciss_init(void)
 {
+	int err;
+
 	/*
 	 * The hardware requires that commands are aligned on a 64-bit
 	 * boundary. Given that we use pci_alloc_consistent() to allocate an
@@ -4008,8 +4188,20 @@
 
 	printk(KERN_INFO DRIVER_NAME "\n");
 
+	err = bus_register(&cciss_bus_type);
+	if (err)
+		return err;
+
 	/* Register for our PCI devices */
-	return pci_register_driver(&cciss_pci_driver);
+	err = pci_register_driver(&cciss_pci_driver);
+	if (err)
+		goto err_bus_register;
+
+	return 0;
+
+err_bus_register:
+	bus_unregister(&cciss_bus_type);
+	return err;
 }
 
 static void __exit cciss_cleanup(void)
@@ -4026,6 +4218,7 @@
 		}
 	}
 	remove_proc_entry("driver/cciss", NULL);
+	bus_unregister(&cciss_bus_type);
 }
 
 static void fail_all_cmds(unsigned long ctlr)
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 703e080..06a5db2 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -11,6 +11,11 @@
 
 #define IO_OK		0
 #define IO_ERROR	1
+#define IO_NEEDS_RETRY  3
+
+#define VENDOR_LEN	8
+#define MODEL_LEN	16
+#define REV_LEN		4
 
 struct ctlr_info;
 typedef struct ctlr_info ctlr_info_t;
@@ -34,23 +39,20 @@
 	int 	cylinders;
 	int	raid_level; /* set to -1 to indicate that
 			     * the drive is not in use/configured
-			    */
-	int	busy_configuring; /*This is set when the drive is being removed
-				   *to prevent it from being opened or it's queue
-				   *from being started.
-				  */
-	__u8 serial_no[16]; /* from inquiry page 0x83, */
-			    /* not necc. null terminated. */
+			     */
+	int	busy_configuring; /* This is set when a drive is being removed
+				   * to prevent it from being opened or it's
+				   * queue from being started.
+				   */
+	struct	device dev;
+	__u8 serial_no[16]; /* from inquiry page 0x83,
+			     * not necc. null terminated.
+			     */
+	char vendor[VENDOR_LEN + 1]; /* SCSI vendor string */
+	char model[MODEL_LEN + 1];   /* SCSI model string */
+	char rev[REV_LEN + 1];       /* SCSI revision string */
 } drive_info_struct;
 
-#ifdef CONFIG_CISS_SCSI_TAPE
-
-struct sendcmd_reject_list {
-	int ncompletions;
-	unsigned long *complete; /* array of NR_CMDS tags */
-};
-
-#endif
 struct ctlr_info 
 {
 	int	ctlr;
@@ -118,11 +120,11 @@
 	void *scsi_ctlr; /* ptr to structure containing scsi related stuff */
 	/* list of block side commands the scsi error handling sucked up */
 	/* and saved for later processing */
-	struct sendcmd_reject_list scsi_rejects;
 #endif
 	unsigned char alive;
 	struct completion *rescan_wait;
 	struct task_struct *cciss_scan_thread;
+	struct device dev;
 };
 
 /*  Defining the diffent access_menthods */
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index 40b1b92..cd665b0 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -217,6 +217,8 @@
   LogDevAddr_struct  LogDev;
 } LUNAddr_struct;
 
+#define CTLR_LUNID "\0\0\0\0\0\0\0\0"
+
 typedef struct _CommandListHeader_struct {
   BYTE              ReplyQueue;
   BYTE              SGList;
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index a3fd87b..3315268 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -44,20 +44,13 @@
 #define CCISS_ABORT_MSG 0x00
 #define CCISS_RESET_MSG 0x01
 
-/* some prototypes... */ 
-static int sendcmd(
-	__u8	cmd,
-	int	ctlr,
-	void	*buff,
-	size_t	size,
-	unsigned int use_unit_num, /* 0: address the controller,
-				      1: address logical volume log_unit, 
-				      2: address is in scsi3addr */
-	unsigned int log_unit,
-	__u8	page_code,
-	unsigned char *scsi3addr,
+static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff,
+	size_t size,
+	__u8 page_code, unsigned char *scsi3addr,
 	int cmd_type);
 
+static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool);
+static void cmd_free(ctlr_info_t *h, CommandList_struct *c, int got_from_pool);
 
 static int cciss_scsi_proc_info(
 		struct Scsi_Host *sh,
@@ -1575,6 +1568,75 @@
 	CPQ_TAPE_UNLOCK(ctlr, flags);
 }
 
+static int wait_for_device_to_become_ready(ctlr_info_t *h,
+	unsigned char lunaddr[])
+{
+	int rc;
+	int count = 0;
+	int waittime = HZ;
+	CommandList_struct *c;
+
+	c = cmd_alloc(h, 1);
+	if (!c) {
+		printk(KERN_WARNING "cciss%d: out of memory in "
+			"wait_for_device_to_become_ready.\n", h->ctlr);
+		return IO_ERROR;
+	}
+
+	/* Send test unit ready until device ready, or give up. */
+	while (count < 20) {
+
+		/* Wait for a bit.  do this first, because if we send
+		 * the TUR right away, the reset will just abort it.
+		 */
+		schedule_timeout_uninterruptible(waittime);
+		count++;
+
+		/* Increase wait time with each try, up to a point. */
+		if (waittime < (HZ * 30))
+			waittime = waittime * 2;
+
+		/* Send the Test Unit Ready */
+		rc = fill_cmd(c, TEST_UNIT_READY, h->ctlr, NULL, 0, 0,
+			lunaddr, TYPE_CMD);
+		if (rc == 0)
+			rc = sendcmd_withirq_core(h, c, 0);
+
+		(void) process_sendcmd_error(h, c);
+
+		if (rc != 0)
+			goto retry_tur;
+
+		if (c->err_info->CommandStatus == CMD_SUCCESS)
+			break;
+
+		if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
+			c->err_info->ScsiStatus == SAM_STAT_CHECK_CONDITION) {
+			if (c->err_info->SenseInfo[2] == NO_SENSE)
+				break;
+			if (c->err_info->SenseInfo[2] == UNIT_ATTENTION) {
+				unsigned char asc;
+				asc = c->err_info->SenseInfo[12];
+				check_for_unit_attention(h, c);
+				if (asc == POWER_OR_RESET)
+					break;
+			}
+		}
+retry_tur:
+		printk(KERN_WARNING "cciss%d: Waiting %d secs "
+			"for device to become ready.\n",
+			h->ctlr, waittime / HZ);
+		rc = 1; /* device not ready. */
+	}
+
+	if (rc)
+		printk("cciss%d: giving up on device.\n", h->ctlr);
+	else
+		printk(KERN_WARNING "cciss%d: device is ready.\n", h->ctlr);
+
+	cmd_free(h, c, 1);
+	return rc;
+}
 
 /* Need at least one of these error handlers to keep ../scsi/hosts.c from 
  * complaining.  Doing a host- or bus-reset can't do anything good here. 
@@ -1591,6 +1653,7 @@
 {
 	int rc;
 	CommandList_struct *cmd_in_trouble;
+	unsigned char lunaddr[8];
 	ctlr_info_t **c;
 	int ctlr;
 
@@ -1600,19 +1663,15 @@
 		return FAILED;
 	ctlr = (*c)->ctlr;
 	printk(KERN_WARNING "cciss%d: resetting tape drive or medium changer.\n", ctlr);
-
 	/* find the command that's giving us trouble */
 	cmd_in_trouble = (CommandList_struct *) scsicmd->host_scribble;
-	if (cmd_in_trouble == NULL) { /* paranoia */
+	if (cmd_in_trouble == NULL) /* paranoia */
 		return FAILED;
-	}
+	memcpy(lunaddr, &cmd_in_trouble->Header.LUN.LunAddrBytes[0], 8);
 	/* send a reset to the SCSI LUN which the command was sent to */
-	rc = sendcmd(CCISS_RESET_MSG, ctlr, NULL, 0, 2, 0, 0, 
-		(unsigned char *) &cmd_in_trouble->Header.LUN.LunAddrBytes[0], 
+	rc = sendcmd_withirq(CCISS_RESET_MSG, ctlr, NULL, 0, 0, lunaddr,
 		TYPE_MSG);
-	/* sendcmd turned off interrupts on the board, turn 'em back on. */
-	(*c)->access.set_intr_mask(*c, CCISS_INTR_ON);
-	if (rc == 0)
+	if (rc == 0 && wait_for_device_to_become_ready(*c, lunaddr) == 0)
 		return SUCCESS;
 	printk(KERN_WARNING "cciss%d: resetting device failed.\n", ctlr);
 	return FAILED;
@@ -1622,6 +1681,7 @@
 {
 	int rc;
 	CommandList_struct *cmd_to_abort;
+	unsigned char lunaddr[8];
 	ctlr_info_t **c;
 	int ctlr;
 
@@ -1636,12 +1696,9 @@
 	cmd_to_abort = (CommandList_struct *) scsicmd->host_scribble;
 	if (cmd_to_abort == NULL) /* paranoia */
 		return FAILED;
-	rc = sendcmd(CCISS_ABORT_MSG, ctlr, &cmd_to_abort->Header.Tag, 
-		0, 2, 0, 0, 
-		(unsigned char *) &cmd_to_abort->Header.LUN.LunAddrBytes[0], 
-		TYPE_MSG);
-	/* sendcmd turned off interrupts on the board, turn 'em back on. */
-	(*c)->access.set_intr_mask(*c, CCISS_INTR_ON);
+	memcpy(lunaddr, &cmd_to_abort->Header.LUN.LunAddrBytes[0], 8);
+	rc = sendcmd_withirq(CCISS_ABORT_MSG, ctlr, &cmd_to_abort->Header.Tag,
+		0, 0, lunaddr, TYPE_MSG);
 	if (rc == 0)
 		return SUCCESS;
 	return FAILED;
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index ca268ca..44fa201 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -474,7 +474,7 @@
 		disk->fops = &ida_fops;
 		if (j && !drv->nr_blks)
 			continue;
-		blk_queue_hardsect_size(hba[i]->queue, drv->blk_size);
+		blk_queue_logical_block_size(hba[i]->queue, drv->blk_size);
 		set_capacity(disk, drv->nr_blks);
 		disk->queue = hba[i]->queue;
 		disk->private_data = drv;
@@ -903,7 +903,7 @@
 		goto startio;
 
 queue_next:
-	creq = elv_next_request(q);
+	creq = blk_peek_request(q);
 	if (!creq)
 		goto startio;
 
@@ -912,17 +912,18 @@
 	if ((c = cmd_alloc(h,1)) == NULL)
 		goto startio;
 
-	blkdev_dequeue_request(creq);
+	blk_start_request(creq);
 
 	c->ctlr = h->ctlr;
 	c->hdr.unit = (drv_info_t *)(creq->rq_disk->private_data) - h->drv;
 	c->hdr.size = sizeof(rblk_t) >> 2;
 	c->size += sizeof(rblk_t);
 
-	c->req.hdr.blk = creq->sector;
+	c->req.hdr.blk = blk_rq_pos(creq);
 	c->rq = creq;
 DBGPX(
-	printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors);
+	printk("sector=%d, nr_sectors=%u\n",
+	       blk_rq_pos(creq), blk_rq_sectors(creq));
 );
 	sg_init_table(tmp_sg, SG_MAX);
 	seg = blk_rq_map_sg(q, creq, tmp_sg);
@@ -940,9 +941,9 @@
 						 tmp_sg[i].offset,
 						 tmp_sg[i].length, dir);
 	}
-DBGPX(	printk("Submitting %d sectors in %d segments\n", creq->nr_sectors, seg); );
+DBGPX(	printk("Submitting %u sectors in %d segments\n", blk_rq_sectors(creq), seg); );
 	c->req.hdr.sg_cnt = seg;
-	c->req.hdr.blk_cnt = creq->nr_sectors;
+	c->req.hdr.blk_cnt = blk_rq_sectors(creq);
 	c->req.hdr.cmd = (rq_data_dir(creq) == READ) ? IDA_READ : IDA_WRITE;
 	c->type = CMD_RWREQ;
 
@@ -1024,8 +1025,7 @@
 				cmd->req.sg[i].size, ddir);
 
 	DBGPX(printk("Done with %p\n", rq););
-	if (__blk_end_request(rq, error, blk_rq_bytes(rq)))
-		BUG();
+	__blk_end_request_all(rq, error);
 }
 
 /*
@@ -1546,7 +1546,7 @@
 		drv_info_t *drv = &host->drv[i];
 		if (i && !drv->nr_blks)
 			continue;
-		blk_queue_hardsect_size(host->queue, drv->blk_size);
+		blk_queue_logical_block_size(host->queue, drv->blk_size);
 		set_capacity(disk, drv->nr_blks);
 		disk->queue = host->queue;
 		disk->private_data = drv;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 1300df6..862b40c 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -931,7 +931,7 @@
 	del_timer(&fd_timeout);
 	cont = NULL;
 	clear_bit(0, &fdc_busy);
-	if (elv_next_request(floppy_queue))
+	if (current_req || blk_peek_request(floppy_queue))
 		do_fd_request(floppy_queue);
 	spin_unlock_irqrestore(&floppy_lock, flags);
 	wake_up(&fdc_wait);
@@ -2303,7 +2303,7 @@
 
 	/* current_count_sectors can be zero if transfer failed */
 	if (error)
-		nr_sectors = req->current_nr_sectors;
+		nr_sectors = blk_rq_cur_sectors(req);
 	if (__blk_end_request(req, error, nr_sectors << 9))
 		return;
 
@@ -2332,7 +2332,7 @@
 	if (uptodate) {
 		/* maintain values for invalidation on geometry
 		 * change */
-		block = current_count_sectors + req->sector;
+		block = current_count_sectors + blk_rq_pos(req);
 		INFBOUND(DRS->maxblock, block);
 		if (block > _floppy->sect)
 			DRS->maxtrack = 1;
@@ -2346,10 +2346,10 @@
 			/* record write error information */
 			DRWE->write_errors++;
 			if (DRWE->write_errors == 1) {
-				DRWE->first_error_sector = req->sector;
+				DRWE->first_error_sector = blk_rq_pos(req);
 				DRWE->first_error_generation = DRS->generation;
 			}
-			DRWE->last_error_sector = req->sector;
+			DRWE->last_error_sector = blk_rq_pos(req);
 			DRWE->last_error_generation = DRS->generation;
 		}
 		spin_lock_irqsave(q->queue_lock, flags);
@@ -2503,24 +2503,23 @@
 
 	max_sector = transfer_size(ssize,
 				   min(max_sector, max_sector_2),
-				   current_req->nr_sectors);
+				   blk_rq_sectors(current_req));
 
 	if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE &&
-	    buffer_max > fsector_t + current_req->nr_sectors)
+	    buffer_max > fsector_t + blk_rq_sectors(current_req))
 		current_count_sectors = min_t(int, buffer_max - fsector_t,
-					      current_req->nr_sectors);
+					      blk_rq_sectors(current_req));
 
 	remaining = current_count_sectors << 9;
 #ifdef FLOPPY_SANITY_CHECK
-	if ((remaining >> 9) > current_req->nr_sectors &&
-	    CT(COMMAND) == FD_WRITE) {
+	if (remaining > blk_rq_bytes(current_req) && CT(COMMAND) == FD_WRITE) {
 		DPRINT("in copy buffer\n");
 		printk("current_count_sectors=%ld\n", current_count_sectors);
 		printk("remaining=%d\n", remaining >> 9);
-		printk("current_req->nr_sectors=%ld\n",
-		       current_req->nr_sectors);
+		printk("current_req->nr_sectors=%u\n",
+		       blk_rq_sectors(current_req));
 		printk("current_req->current_nr_sectors=%u\n",
-		       current_req->current_nr_sectors);
+		       blk_rq_cur_sectors(current_req));
 		printk("max_sector=%d\n", max_sector);
 		printk("ssize=%d\n", ssize);
 	}
@@ -2530,7 +2529,7 @@
 
 	dma_buffer = floppy_track_buffer + ((fsector_t - buffer_min) << 9);
 
-	size = current_req->current_nr_sectors << 9;
+	size = blk_rq_cur_bytes(current_req);
 
 	rq_for_each_segment(bv, current_req, iter) {
 		if (!remaining)
@@ -2648,10 +2647,10 @@
 
 	max_sector = _floppy->sect * _floppy->head;
 
-	TRACK = (int)current_req->sector / max_sector;
-	fsector_t = (int)current_req->sector % max_sector;
+	TRACK = (int)blk_rq_pos(current_req) / max_sector;
+	fsector_t = (int)blk_rq_pos(current_req) % max_sector;
 	if (_floppy->track && TRACK >= _floppy->track) {
-		if (current_req->current_nr_sectors & 1) {
+		if (blk_rq_cur_sectors(current_req) & 1) {
 			current_count_sectors = 1;
 			return 1;
 		} else
@@ -2669,7 +2668,7 @@
 		if (fsector_t >= max_sector) {
 			current_count_sectors =
 			    min_t(int, _floppy->sect - fsector_t,
-				  current_req->nr_sectors);
+				  blk_rq_sectors(current_req));
 			return 1;
 		}
 		SIZECODE = 2;
@@ -2720,7 +2719,7 @@
 
 	in_sector_offset = (fsector_t % _floppy->sect) % ssize;
 	aligned_sector_t = fsector_t - in_sector_offset;
-	max_size = current_req->nr_sectors;
+	max_size = blk_rq_sectors(current_req);
 	if ((raw_cmd->track == buffer_track) &&
 	    (current_drive == buffer_drive) &&
 	    (fsector_t >= buffer_min) && (fsector_t < buffer_max)) {
@@ -2729,10 +2728,10 @@
 			copy_buffer(1, max_sector, buffer_max);
 			return 1;
 		}
-	} else if (in_sector_offset || current_req->nr_sectors < ssize) {
+	} else if (in_sector_offset || blk_rq_sectors(current_req) < ssize) {
 		if (CT(COMMAND) == FD_WRITE) {
-			if (fsector_t + current_req->nr_sectors > ssize &&
-			    fsector_t + current_req->nr_sectors < ssize + ssize)
+			if (fsector_t + blk_rq_sectors(current_req) > ssize &&
+			    fsector_t + blk_rq_sectors(current_req) < ssize + ssize)
 				max_size = ssize + ssize;
 			else
 				max_size = ssize;
@@ -2776,7 +2775,7 @@
 		    (indirect * 2 > direct * 3 &&
 		     *errors < DP->max_errors.read_track && ((!probing
 		       || (DP->read_track & (1 << DRS->probed_format)))))) {
-			max_size = current_req->nr_sectors;
+			max_size = blk_rq_sectors(current_req);
 		} else {
 			raw_cmd->kernel_data = current_req->buffer;
 			raw_cmd->length = current_count_sectors << 9;
@@ -2801,7 +2800,7 @@
 	    fsector_t > buffer_max ||
 	    fsector_t < buffer_min ||
 	    ((CT(COMMAND) == FD_READ ||
-	      (!in_sector_offset && current_req->nr_sectors >= ssize)) &&
+	      (!in_sector_offset && blk_rq_sectors(current_req) >= ssize)) &&
 	     max_sector > 2 * max_buffer_sectors + buffer_min &&
 	     max_size + fsector_t > 2 * max_buffer_sectors + buffer_min)
 	    /* not enough space */
@@ -2879,8 +2878,8 @@
 				printk("write\n");
 			return 0;
 		}
-	} else if (raw_cmd->length > current_req->nr_sectors << 9 ||
-		   current_count_sectors > current_req->nr_sectors) {
+	} else if (raw_cmd->length > blk_rq_bytes(current_req) ||
+		   current_count_sectors > blk_rq_sectors(current_req)) {
 		DPRINT("buffer overrun in direct transfer\n");
 		return 0;
 	} else if (raw_cmd->length < current_count_sectors << 9) {
@@ -2913,7 +2912,7 @@
 			struct request *req;
 
 			spin_lock_irq(floppy_queue->queue_lock);
-			req = elv_next_request(floppy_queue);
+			req = blk_fetch_request(floppy_queue);
 			spin_unlock_irq(floppy_queue->queue_lock);
 			if (!req) {
 				do_floppy = NULL;
@@ -2990,8 +2989,9 @@
 	if (usage_count == 0) {
 		printk("warning: usage count=0, current_req=%p exiting\n",
 		       current_req);
-		printk("sect=%ld type=%x flags=%x\n", (long)current_req->sector,
-		       current_req->cmd_type, current_req->cmd_flags);
+		printk("sect=%ld type=%x flags=%x\n",
+		       (long)blk_rq_pos(current_req), current_req->cmd_type,
+		       current_req->cmd_flags);
 		return;
 	}
 	if (test_bit(0, &fdc_busy)) {
@@ -4148,6 +4148,24 @@
 {
 }
 
+static int floppy_resume(struct platform_device *dev)
+{
+	int fdc;
+
+	for (fdc = 0; fdc < N_FDC; fdc++)
+		if (FDCS->address != -1)
+			user_reset_fdc(-1, FD_RESET_ALWAYS, 0);
+
+	return 0;
+}
+
+static struct platform_driver floppy_driver = {
+	.resume = floppy_resume,
+	.driver = {
+		.name = "floppy",
+	},
+};
+
 static struct platform_device floppy_device[N_DRIVE];
 
 static struct kobject *floppy_find(dev_t dev, int *part, void *data)
@@ -4196,10 +4214,14 @@
 	if (err)
 		goto out_put_disk;
 
+	err = platform_driver_register(&floppy_driver);
+	if (err)
+		goto out_unreg_blkdev;
+
 	floppy_queue = blk_init_queue(do_fd_request, &floppy_lock);
 	if (!floppy_queue) {
 		err = -ENOMEM;
-		goto out_unreg_blkdev;
+		goto out_unreg_driver;
 	}
 	blk_queue_max_sectors(floppy_queue, 64);
 
@@ -4346,6 +4368,8 @@
 out_unreg_region:
 	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
 	blk_cleanup_queue(floppy_queue);
+out_unreg_driver:
+	platform_driver_unregister(&floppy_driver);
 out_unreg_blkdev:
 	unregister_blkdev(FLOPPY_MAJOR, "fd");
 out_put_disk:
@@ -4566,6 +4590,7 @@
 
 	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
 	unregister_blkdev(FLOPPY_MAJOR, "fd");
+	platform_driver_unregister(&floppy_driver);
 
 	for (drive = 0; drive < N_DRIVE; drive++) {
 		del_timer_sync(&motor_off_timer[drive]);
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index baaa9e4..f65b3f3 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -98,10 +98,9 @@
 
 static DEFINE_SPINLOCK(hd_lock);
 static struct request_queue *hd_queue;
+static struct request *hd_req;
 
 #define MAJOR_NR HD_MAJOR
-#define QUEUE (hd_queue)
-#define CURRENT elv_next_request(hd_queue)
 
 #define TIMEOUT_VALUE	(6*HZ)
 #define	HD_DELAY	0
@@ -195,11 +194,24 @@
 	NR_HD = hdind+1;
 }
 
+static bool hd_end_request(int err, unsigned int bytes)
+{
+	if (__blk_end_request(hd_req, err, bytes))
+		return true;
+	hd_req = NULL;
+	return false;
+}
+
+static bool hd_end_request_cur(int err)
+{
+	return hd_end_request(err, blk_rq_cur_bytes(hd_req));
+}
+
 static void dump_status(const char *msg, unsigned int stat)
 {
 	char *name = "hd?";
-	if (CURRENT)
-		name = CURRENT->rq_disk->disk_name;
+	if (hd_req)
+		name = hd_req->rq_disk->disk_name;
 
 #ifdef VERBOSE_ERRORS
 	printk("%s: %s: status=0x%02x { ", name, msg, stat & 0xff);
@@ -227,8 +239,8 @@
 		if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) {
 			printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL),
 				inb(HD_CURRENT) & 0xf, inb(HD_SECTOR));
-			if (CURRENT)
-				printk(", sector=%ld", CURRENT->sector);
+			if (hd_req)
+				printk(", sector=%ld", blk_rq_pos(hd_req));
 		}
 		printk("\n");
 	}
@@ -406,11 +418,12 @@
  */
 static void bad_rw_intr(void)
 {
-	struct request *req = CURRENT;
+	struct request *req = hd_req;
+
 	if (req != NULL) {
 		struct hd_i_struct *disk = req->rq_disk->private_data;
 		if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) {
-			end_request(req, 0);
+			hd_end_request_cur(-EIO);
 			disk->special_op = disk->recalibrate = 1;
 		} else if (req->errors % RESET_FREQ == 0)
 			reset = 1;
@@ -452,37 +465,30 @@
 	bad_rw_intr();
 	hd_request();
 	return;
+
 ok_to_read:
-	req = CURRENT;
+	req = hd_req;
 	insw(HD_DATA, req->buffer, 256);
-	req->sector++;
-	req->buffer += 512;
-	req->errors = 0;
-	i = --req->nr_sectors;
-	--req->current_nr_sectors;
 #ifdef DEBUG
-	printk("%s: read: sector %ld, remaining = %ld, buffer=%p\n",
-		req->rq_disk->disk_name, req->sector, req->nr_sectors,
-		req->buffer+512);
+	printk("%s: read: sector %ld, remaining = %u, buffer=%p\n",
+	       req->rq_disk->disk_name, blk_rq_pos(req) + 1,
+	       blk_rq_sectors(req) - 1, req->buffer+512);
 #endif
-	if (req->current_nr_sectors <= 0)
-		end_request(req, 1);
-	if (i > 0) {
+	if (hd_end_request(0, 512)) {
 		SET_HANDLER(&read_intr);
 		return;
 	}
+
 	(void) inb_p(HD_STATUS);
 #if (HD_DELAY > 0)
 	last_req = read_timer();
 #endif
-	if (elv_next_request(QUEUE))
-		hd_request();
-	return;
+	hd_request();
 }
 
 static void write_intr(void)
 {
-	struct request *req = CURRENT;
+	struct request *req = hd_req;
 	int i;
 	int retries = 100000;
 
@@ -492,30 +498,25 @@
 			continue;
 		if (!OK_STATUS(i))
 			break;
-		if ((req->nr_sectors <= 1) || (i & DRQ_STAT))
+		if ((blk_rq_sectors(req) <= 1) || (i & DRQ_STAT))
 			goto ok_to_write;
 	} while (--retries > 0);
 	dump_status("write_intr", i);
 	bad_rw_intr();
 	hd_request();
 	return;
+
 ok_to_write:
-	req->sector++;
-	i = --req->nr_sectors;
-	--req->current_nr_sectors;
-	req->buffer += 512;
-	if (!i || (req->bio && req->current_nr_sectors <= 0))
-		end_request(req, 1);
-	if (i > 0) {
+	if (hd_end_request(0, 512)) {
 		SET_HANDLER(&write_intr);
 		outsw(HD_DATA, req->buffer, 256);
-	} else {
-#if (HD_DELAY > 0)
-		last_req = read_timer();
-#endif
-		hd_request();
+		return;
 	}
-	return;
+
+#if (HD_DELAY > 0)
+	last_req = read_timer();
+#endif
+	hd_request();
 }
 
 static void recal_intr(void)
@@ -537,18 +538,18 @@
 
 	do_hd = NULL;
 
-	if (!CURRENT)
+	if (!hd_req)
 		return;
 
 	spin_lock_irq(hd_queue->queue_lock);
 	reset = 1;
-	name = CURRENT->rq_disk->disk_name;
+	name = hd_req->rq_disk->disk_name;
 	printk("%s: timeout\n", name);
-	if (++CURRENT->errors >= MAX_ERRORS) {
+	if (++hd_req->errors >= MAX_ERRORS) {
 #ifdef DEBUG
 		printk("%s: too many errors\n", name);
 #endif
-		end_request(CURRENT, 0);
+		hd_end_request_cur(-EIO);
 	}
 	hd_request();
 	spin_unlock_irq(hd_queue->queue_lock);
@@ -563,7 +564,7 @@
 	}
 	if (disk->head > 16) {
 		printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name);
-		end_request(req, 0);
+		hd_end_request_cur(-EIO);
 	}
 	disk->special_op = 0;
 	return 1;
@@ -590,24 +591,27 @@
 repeat:
 	del_timer(&device_timer);
 
-	req = CURRENT;
-	if (!req) {
-		do_hd = NULL;
-		return;
+	if (!hd_req) {
+		hd_req = blk_fetch_request(hd_queue);
+		if (!hd_req) {
+			do_hd = NULL;
+			return;
+		}
 	}
+	req = hd_req;
 
 	if (reset) {
 		reset_hd();
 		return;
 	}
 	disk = req->rq_disk->private_data;
-	block = req->sector;
-	nsect = req->nr_sectors;
+	block = blk_rq_pos(req);
+	nsect = blk_rq_sectors(req);
 	if (block >= get_capacity(req->rq_disk) ||
 	    ((block+nsect) > get_capacity(req->rq_disk))) {
 		printk("%s: bad access: block=%d, count=%d\n",
 			req->rq_disk->disk_name, block, nsect);
-		end_request(req, 0);
+		hd_end_request_cur(-EIO);
 		goto repeat;
 	}
 
@@ -647,7 +651,7 @@
 			break;
 		default:
 			printk("unknown hd-command\n");
-			end_request(req, 0);
+			hd_end_request_cur(-EIO);
 			break;
 		}
 	}
@@ -720,7 +724,7 @@
 	blk_queue_max_sectors(hd_queue, 255);
 	init_timer(&device_timer);
 	device_timer.function = hd_times_out;
-	blk_queue_hardsect_size(hd_queue, 512);
+	blk_queue_logical_block_size(hd_queue, 512);
 
 	if (!NR_HD) {
 		/*
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ddae808..801f4ab 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -511,11 +511,7 @@
  */
 static void loop_add_bio(struct loop_device *lo, struct bio *bio)
 {
-	if (lo->lo_biotail) {
-		lo->lo_biotail->bi_next = bio;
-		lo->lo_biotail = bio;
-	} else
-		lo->lo_bio = lo->lo_biotail = bio;
+	bio_list_add(&lo->lo_bio_list, bio);
 }
 
 /*
@@ -523,16 +519,7 @@
  */
 static struct bio *loop_get_bio(struct loop_device *lo)
 {
-	struct bio *bio;
-
-	if ((bio = lo->lo_bio)) {
-		if (bio == lo->lo_biotail)
-			lo->lo_biotail = NULL;
-		lo->lo_bio = bio->bi_next;
-		bio->bi_next = NULL;
-	}
-
-	return bio;
+	return bio_list_pop(&lo->lo_bio_list);
 }
 
 static int loop_make_request(struct request_queue *q, struct bio *old_bio)
@@ -609,12 +596,13 @@
 
 	set_user_nice(current, -20);
 
-	while (!kthread_should_stop() || lo->lo_bio) {
+	while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) {
 
 		wait_event_interruptible(lo->lo_event,
-				lo->lo_bio || kthread_should_stop());
+				!bio_list_empty(&lo->lo_bio_list) ||
+				kthread_should_stop());
 
-		if (!lo->lo_bio)
+		if (bio_list_empty(&lo->lo_bio_list))
 			continue;
 		spin_lock_irq(&lo->lo_lock);
 		bio = loop_get_bio(lo);
@@ -721,10 +709,6 @@
 	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
 		goto out_putf;
 
-	/* new backing store needs to support loop (eg splice_read) */
-	if (!inode->i_fop->splice_read)
-		goto out_putf;
-
 	/* size of the new backing store needs to be the same */
 	if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
 		goto out_putf;
@@ -800,12 +784,7 @@
 	error = -EINVAL;
 	if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
 		const struct address_space_operations *aops = mapping->a_ops;
-		/*
-		 * If we can't read - sorry. If we only can't write - well,
-		 * it's going to be read-only.
-		 */
-		if (!file->f_op->splice_read)
-			goto out_putf;
+
 		if (aops->write_begin)
 			lo_flags |= LO_FLAGS_USE_AOPS;
 		if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
@@ -841,7 +820,7 @@
 	lo->old_gfp_mask = mapping_gfp_mask(mapping);
 	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 
-	lo->lo_bio = lo->lo_biotail = NULL;
+	bio_list_init(&lo->lo_bio_list);
 
 	/*
 	 * set queue make_request_fn, and add limits based on lower level
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index f389835..60de5a0 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -17,71 +17,220 @@
 #include <linux/fs.h>
 #include <linux/blkdev.h>
 #include <linux/hdreg.h>
-#include <linux/libata.h>
+#include <linux/ata.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
-#include <linux/mg_disk.h>
 
 #define MG_RES_SEC (CONFIG_MG_DISK_RES << 1)
 
+/* name for block device */
+#define MG_DISK_NAME "mgd"
+/* name for platform device */
+#define MG_DEV_NAME "mg_disk"
+
+#define MG_DISK_MAJ 0
+#define MG_DISK_MAX_PART 16
+#define MG_SECTOR_SIZE 512
+#define MG_MAX_SECTS 256
+
+/* Register offsets */
+#define MG_BUFF_OFFSET			0x8000
+#define MG_STORAGE_BUFFER_SIZE		0x200
+#define MG_REG_OFFSET			0xC000
+#define MG_REG_FEATURE			(MG_REG_OFFSET + 2)	/* write case */
+#define MG_REG_ERROR			(MG_REG_OFFSET + 2)	/* read case */
+#define MG_REG_SECT_CNT			(MG_REG_OFFSET + 4)
+#define MG_REG_SECT_NUM			(MG_REG_OFFSET + 6)
+#define MG_REG_CYL_LOW			(MG_REG_OFFSET + 8)
+#define MG_REG_CYL_HIGH			(MG_REG_OFFSET + 0xA)
+#define MG_REG_DRV_HEAD			(MG_REG_OFFSET + 0xC)
+#define MG_REG_COMMAND			(MG_REG_OFFSET + 0xE)	/* write case */
+#define MG_REG_STATUS			(MG_REG_OFFSET + 0xE)	/* read  case */
+#define MG_REG_DRV_CTRL			(MG_REG_OFFSET + 0x10)
+#define MG_REG_BURST_CTRL		(MG_REG_OFFSET + 0x12)
+
+/* handy status */
+#define MG_STAT_READY	(ATA_DRDY | ATA_DSC)
+#define MG_READY_OK(s)	(((s) & (MG_STAT_READY | (ATA_BUSY | ATA_DF | \
+				 ATA_ERR))) == MG_STAT_READY)
+
+/* error code for others */
+#define MG_ERR_NONE		0
+#define MG_ERR_TIMEOUT		0x100
+#define MG_ERR_INIT_STAT	0x101
+#define MG_ERR_TRANSLATION	0x102
+#define MG_ERR_CTRL_RST		0x103
+#define MG_ERR_INV_STAT		0x104
+#define MG_ERR_RSTOUT		0x105
+
+#define MG_MAX_ERRORS	6	/* Max read/write errors */
+
+/* command */
+#define MG_CMD_RD 0x20
+#define MG_CMD_WR 0x30
+#define MG_CMD_SLEEP 0x99
+#define MG_CMD_WAKEUP 0xC3
+#define MG_CMD_ID 0xEC
+#define MG_CMD_WR_CONF 0x3C
+#define MG_CMD_RD_CONF 0x40
+
+/* operation mode */
+#define MG_OP_CASCADE (1 << 0)
+#define MG_OP_CASCADE_SYNC_RD (1 << 1)
+#define MG_OP_CASCADE_SYNC_WR (1 << 2)
+#define MG_OP_INTERLEAVE (1 << 3)
+
+/* synchronous */
+#define MG_BURST_LAT_4 (3 << 4)
+#define MG_BURST_LAT_5 (4 << 4)
+#define MG_BURST_LAT_6 (5 << 4)
+#define MG_BURST_LAT_7 (6 << 4)
+#define MG_BURST_LAT_8 (7 << 4)
+#define MG_BURST_LEN_4 (1 << 1)
+#define MG_BURST_LEN_8 (2 << 1)
+#define MG_BURST_LEN_16 (3 << 1)
+#define MG_BURST_LEN_32 (4 << 1)
+#define MG_BURST_LEN_CONT (0 << 1)
+
+/* timeout value (unit: ms) */
+#define MG_TMAX_CONF_TO_CMD	1
+#define MG_TMAX_WAIT_RD_DRQ	10
+#define MG_TMAX_WAIT_WR_DRQ	500
+#define MG_TMAX_RST_TO_BUSY	10
+#define MG_TMAX_HDRST_TO_RDY	500
+#define MG_TMAX_SWRST_TO_RDY	500
+#define MG_TMAX_RSTOUT		3000
+
+/* device attribution */
+/* use mflash as boot device */
+#define MG_BOOT_DEV		(1 << 0)
+/* use mflash as storage device */
+#define MG_STORAGE_DEV		(1 << 1)
+/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */
+#define MG_STORAGE_DEV_SKIP_RST	(1 << 2)
+
+#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST)
+
+/* names of GPIO resource */
+#define MG_RST_PIN	"mg_rst"
+/* except MG_BOOT_DEV, reset-out pin should be assigned */
+#define MG_RSTOUT_PIN	"mg_rstout"
+
+/* private driver data */
+struct mg_drv_data {
+	/* disk resource */
+	u32 use_polling;
+
+	/* device attribution */
+	u32 dev_attr;
+
+	/* internally used */
+	struct mg_host *host;
+};
+
+/* main structure for mflash driver */
+struct mg_host {
+	struct device *dev;
+
+	struct request_queue *breq;
+	struct request *req;
+	spinlock_t lock;
+	struct gendisk *gd;
+
+	struct timer_list timer;
+	void (*mg_do_intr) (struct mg_host *);
+
+	u16 id[ATA_ID_WORDS];
+
+	u16 cyls;
+	u16 heads;
+	u16 sectors;
+	u32 n_sectors;
+	u32 nres_sectors;
+
+	void __iomem *dev_base;
+	unsigned int irq;
+	unsigned int rst;
+	unsigned int rstout;
+
+	u32 major;
+	u32 error;
+};
+
+/*
+ * Debugging macro and defines
+ */
+#undef DO_MG_DEBUG
+#ifdef DO_MG_DEBUG
+#  define MG_DBG(fmt, args...) \
+	printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args)
+#else /* CONFIG_MG_DEBUG */
+#  define MG_DBG(fmt, args...) do { } while (0)
+#endif /* CONFIG_MG_DEBUG */
+
 static void mg_request(struct request_queue *);
 
+static bool mg_end_request(struct mg_host *host, int err, unsigned int nr_bytes)
+{
+	if (__blk_end_request(host->req, err, nr_bytes))
+		return true;
+
+	host->req = NULL;
+	return false;
+}
+
+static bool mg_end_request_cur(struct mg_host *host, int err)
+{
+	return mg_end_request(host, err, blk_rq_cur_bytes(host->req));
+}
+
 static void mg_dump_status(const char *msg, unsigned int stat,
 		struct mg_host *host)
 {
 	char *name = MG_DISK_NAME;
-	struct request *req;
 
-	if (host->breq) {
-		req = elv_next_request(host->breq);
-		if (req)
-			name = req->rq_disk->disk_name;
-	}
+	if (host->req)
+		name = host->req->rq_disk->disk_name;
 
 	printk(KERN_ERR "%s: %s: status=0x%02x { ", name, msg, stat & 0xff);
-	if (stat & MG_REG_STATUS_BIT_BUSY)
+	if (stat & ATA_BUSY)
 		printk("Busy ");
-	if (stat & MG_REG_STATUS_BIT_READY)
+	if (stat & ATA_DRDY)
 		printk("DriveReady ");
-	if (stat & MG_REG_STATUS_BIT_WRITE_FAULT)
+	if (stat & ATA_DF)
 		printk("WriteFault ");
-	if (stat & MG_REG_STATUS_BIT_SEEK_DONE)
+	if (stat & ATA_DSC)
 		printk("SeekComplete ");
-	if (stat & MG_REG_STATUS_BIT_DATA_REQ)
+	if (stat & ATA_DRQ)
 		printk("DataRequest ");
-	if (stat & MG_REG_STATUS_BIT_CORRECTED_ERROR)
+	if (stat & ATA_CORR)
 		printk("CorrectedError ");
-	if (stat & MG_REG_STATUS_BIT_ERROR)
+	if (stat & ATA_ERR)
 		printk("Error ");
 	printk("}\n");
-	if ((stat & MG_REG_STATUS_BIT_ERROR) == 0) {
+	if ((stat & ATA_ERR) == 0) {
 		host->error = 0;
 	} else {
 		host->error = inb((unsigned long)host->dev_base + MG_REG_ERROR);
 		printk(KERN_ERR "%s: %s: error=0x%02x { ", name, msg,
 				host->error & 0xff);
-		if (host->error & MG_REG_ERR_BBK)
+		if (host->error & ATA_BBK)
 			printk("BadSector ");
-		if (host->error & MG_REG_ERR_UNC)
+		if (host->error & ATA_UNC)
 			printk("UncorrectableError ");
-		if (host->error & MG_REG_ERR_IDNF)
+		if (host->error & ATA_IDNF)
 			printk("SectorIdNotFound ");
-		if (host->error & MG_REG_ERR_ABRT)
+		if (host->error & ATA_ABORTED)
 			printk("DriveStatusError ");
-		if (host->error & MG_REG_ERR_AMNF)
+		if (host->error & ATA_AMNF)
 			printk("AddrMarkNotFound ");
 		printk("}");
-		if (host->error &
-				(MG_REG_ERR_BBK | MG_REG_ERR_UNC |
-				 MG_REG_ERR_IDNF | MG_REG_ERR_AMNF)) {
-			if (host->breq) {
-				req = elv_next_request(host->breq);
-				if (req)
-					printk(", sector=%u", (u32)req->sector);
-			}
-
+		if (host->error & (ATA_BBK | ATA_UNC | ATA_IDNF | ATA_AMNF)) {
+			if (host->req)
+				printk(", sector=%u",
+				       (unsigned int)blk_rq_pos(host->req));
 		}
 		printk("\n");
 	}
@@ -100,12 +249,12 @@
 
 	do {
 		cur_jiffies = jiffies;
-		if (status & MG_REG_STATUS_BIT_BUSY) {
-			if (expect == MG_REG_STATUS_BIT_BUSY)
+		if (status & ATA_BUSY) {
+			if (expect == ATA_BUSY)
 				break;
 		} else {
 			/* Check the error condition! */
-			if (status & MG_REG_STATUS_BIT_ERROR) {
+			if (status & ATA_ERR) {
 				mg_dump_status("mg_wait", status, host);
 				break;
 			}
@@ -114,8 +263,8 @@
 				if (MG_READY_OK(status))
 					break;
 
-			if (expect == MG_REG_STATUS_BIT_DATA_REQ)
-				if (status & MG_REG_STATUS_BIT_DATA_REQ)
+			if (expect == ATA_DRQ)
+				if (status & ATA_DRQ)
 					break;
 		}
 		if (!msec) {
@@ -173,6 +322,42 @@
 	return IRQ_HANDLED;
 }
 
+/* local copy of ata_id_string() */
+static void mg_id_string(const u16 *id, unsigned char *s,
+			 unsigned int ofs, unsigned int len)
+{
+	unsigned int c;
+
+	BUG_ON(len & 1);
+
+	while (len > 0) {
+		c = id[ofs] >> 8;
+		*s = c;
+		s++;
+
+		c = id[ofs] & 0xff;
+		*s = c;
+		s++;
+
+		ofs++;
+		len -= 2;
+	}
+}
+
+/* local copy of ata_id_c_string() */
+static void mg_id_c_string(const u16 *id, unsigned char *s,
+			   unsigned int ofs, unsigned int len)
+{
+	unsigned char *p;
+
+	mg_id_string(id, s, ofs, len - 1);
+
+	p = s + strnlen(s, len - 1);
+	while (p > s && p[-1] == ' ')
+		p--;
+	*p = '\0';
+}
+
 static int mg_get_disk_id(struct mg_host *host)
 {
 	u32 i;
@@ -184,12 +369,10 @@
 	char serial[ATA_ID_SERNO_LEN + 1];
 
 	if (!prv_data->use_polling)
-		outb(MG_REG_CTRL_INTR_DISABLE,
-				(unsigned long)host->dev_base +
-				MG_REG_DRV_CTRL);
+		outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 
 	outb(MG_CMD_ID, (unsigned long)host->dev_base + MG_REG_COMMAND);
-	err = mg_wait(host, MG_REG_STATUS_BIT_DATA_REQ, MG_TMAX_WAIT_RD_DRQ);
+	err = mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_RD_DRQ);
 	if (err)
 		return err;
 
@@ -219,9 +402,9 @@
 		host->n_sectors -= host->nres_sectors;
 	}
 
-	ata_id_c_string(id, fwrev, ATA_ID_FW_REV, sizeof(fwrev));
-	ata_id_c_string(id, model, ATA_ID_PROD, sizeof(model));
-	ata_id_c_string(id, serial, ATA_ID_SERNO, sizeof(serial));
+	mg_id_c_string(id, fwrev, ATA_ID_FW_REV, sizeof(fwrev));
+	mg_id_c_string(id, model, ATA_ID_PROD, sizeof(model));
+	mg_id_c_string(id, serial, ATA_ID_SERNO, sizeof(serial));
 	printk(KERN_INFO "mg_disk: model: %s\n", model);
 	printk(KERN_INFO "mg_disk: firm: %.8s\n", fwrev);
 	printk(KERN_INFO "mg_disk: serial: %s\n", serial);
@@ -229,8 +412,7 @@
 			host->n_sectors, host->nres_sectors);
 
 	if (!prv_data->use_polling)
-		outb(MG_REG_CTRL_INTR_ENABLE, (unsigned long)host->dev_base +
-				MG_REG_DRV_CTRL);
+		outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 
 	return err;
 }
@@ -244,7 +426,7 @@
 
 	/* hdd rst low */
 	gpio_set_value(host->rst, 0);
-	err = mg_wait(host, MG_REG_STATUS_BIT_BUSY, MG_TMAX_RST_TO_BUSY);
+	err = mg_wait(host, ATA_BUSY, MG_TMAX_RST_TO_BUSY);
 	if (err)
 		return err;
 
@@ -255,17 +437,14 @@
 		return err;
 
 	/* soft reset on */
-	outb(MG_REG_CTRL_RESET |
-			(prv_data->use_polling ? MG_REG_CTRL_INTR_DISABLE :
-			 MG_REG_CTRL_INTR_ENABLE),
+	outb(ATA_SRST | (prv_data->use_polling ? ATA_NIEN : 0),
 			(unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-	err = mg_wait(host, MG_REG_STATUS_BIT_BUSY, MG_TMAX_RST_TO_BUSY);
+	err = mg_wait(host, ATA_BUSY, MG_TMAX_RST_TO_BUSY);
 	if (err)
 		return err;
 
 	/* soft reset off */
-	outb(prv_data->use_polling ? MG_REG_CTRL_INTR_DISABLE :
-			MG_REG_CTRL_INTR_ENABLE,
+	outb(prv_data->use_polling ? ATA_NIEN : 0,
 			(unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 	err = mg_wait(host, MG_STAT_READY, MG_TMAX_SWRST_TO_RDY);
 	if (err)
@@ -281,11 +460,10 @@
 
 static void mg_bad_rw_intr(struct mg_host *host)
 {
-	struct request *req = elv_next_request(host->breq);
-	if (req != NULL)
-		if (++req->errors >= MG_MAX_ERRORS ||
-				host->error == MG_ERR_TIMEOUT)
-			end_request(req, 0);
+	if (host->req)
+		if (++host->req->errors >= MG_MAX_ERRORS ||
+		    host->error == MG_ERR_TIMEOUT)
+			mg_end_request_cur(host, -EIO);
 }
 
 static unsigned int mg_out(struct mg_host *host,
@@ -311,7 +489,7 @@
 			MG_REG_CYL_LOW);
 	outb((u8)(sect_num >> 16), (unsigned long)host->dev_base +
 			MG_REG_CYL_HIGH);
-	outb((u8)((sect_num >> 24) | MG_REG_HEAD_LBA_MODE),
+	outb((u8)((sect_num >> 24) | ATA_LBA | ATA_DEVICE_OBS),
 			(unsigned long)host->dev_base + MG_REG_DRV_HEAD);
 	outb(cmd, (unsigned long)host->dev_base + MG_REG_COMMAND);
 	return MG_ERR_NONE;
@@ -319,105 +497,77 @@
 
 static void mg_read(struct request *req)
 {
-	u32 remains, j;
+	u32 j;
 	struct mg_host *host = req->rq_disk->private_data;
 
-	remains = req->nr_sectors;
-
-	if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_RD, NULL) !=
-			MG_ERR_NONE)
+	if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req),
+		   MG_CMD_RD, NULL) != MG_ERR_NONE)
 		mg_bad_rw_intr(host);
 
 	MG_DBG("requested %d sects (from %ld), buffer=0x%p\n",
-			remains, req->sector, req->buffer);
+	       blk_rq_sectors(req), blk_rq_pos(req), req->buffer);
 
-	while (remains) {
-		if (mg_wait(host, MG_REG_STATUS_BIT_DATA_REQ,
-					MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) {
+	do {
+		u16 *buff = (u16 *)req->buffer;
+
+		if (mg_wait(host, ATA_DRQ,
+			    MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) {
 			mg_bad_rw_intr(host);
 			return;
 		}
-		for (j = 0; j < MG_SECTOR_SIZE >> 1; j++) {
-			*(u16 *)req->buffer =
-				inw((unsigned long)host->dev_base +
-						MG_BUFF_OFFSET + (j << 1));
-			req->buffer += 2;
-		}
-
-		req->sector++;
-		req->errors = 0;
-		remains = --req->nr_sectors;
-		--req->current_nr_sectors;
-
-		if (req->current_nr_sectors <= 0) {
-			MG_DBG("remain : %d sects\n", remains);
-			end_request(req, 1);
-			if (remains > 0)
-				req = elv_next_request(host->breq);
-		}
+		for (j = 0; j < MG_SECTOR_SIZE >> 1; j++)
+			*buff++ = inw((unsigned long)host->dev_base +
+				      MG_BUFF_OFFSET + (j << 1));
 
 		outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base +
 				MG_REG_COMMAND);
-	}
+	} while (mg_end_request(host, 0, MG_SECTOR_SIZE));
 }
 
 static void mg_write(struct request *req)
 {
-	u32 remains, j;
+	u32 j;
 	struct mg_host *host = req->rq_disk->private_data;
 
-	remains = req->nr_sectors;
-
-	if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_WR, NULL) !=
-			MG_ERR_NONE) {
+	if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req),
+		   MG_CMD_WR, NULL) != MG_ERR_NONE) {
 		mg_bad_rw_intr(host);
 		return;
 	}
 
-
 	MG_DBG("requested %d sects (from %ld), buffer=0x%p\n",
-			remains, req->sector, req->buffer);
-	while (remains) {
-		if (mg_wait(host, MG_REG_STATUS_BIT_DATA_REQ,
-					MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) {
+	       blk_rq_sectors(req), blk_rq_pos(req), req->buffer);
+
+	do {
+		u16 *buff = (u16 *)req->buffer;
+
+	if (mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) {
 			mg_bad_rw_intr(host);
 			return;
 		}
-		for (j = 0; j < MG_SECTOR_SIZE >> 1; j++) {
-			outw(*(u16 *)req->buffer,
-					(unsigned long)host->dev_base +
-					MG_BUFF_OFFSET + (j << 1));
-			req->buffer += 2;
-		}
-		req->sector++;
-		remains = --req->nr_sectors;
-		--req->current_nr_sectors;
-
-		if (req->current_nr_sectors <= 0) {
-			MG_DBG("remain : %d sects\n", remains);
-			end_request(req, 1);
-			if (remains > 0)
-				req = elv_next_request(host->breq);
-		}
+		for (j = 0; j < MG_SECTOR_SIZE >> 1; j++)
+			outw(*buff++, (unsigned long)host->dev_base +
+				      MG_BUFF_OFFSET + (j << 1));
 
 		outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base +
 				MG_REG_COMMAND);
-	}
+	} while (mg_end_request(host, 0, MG_SECTOR_SIZE));
 }
 
 static void mg_read_intr(struct mg_host *host)
 {
+	struct request *req = host->req;
 	u32 i;
-	struct request *req;
+	u16 *buff;
 
 	/* check status */
 	do {
 		i = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-		if (i & MG_REG_STATUS_BIT_BUSY)
+		if (i & ATA_BUSY)
 			break;
 		if (!MG_READY_OK(i))
 			break;
-		if (i & MG_REG_STATUS_BIT_DATA_REQ)
+		if (i & ATA_DRQ)
 			goto ok_to_read;
 	} while (0);
 	mg_dump_status("mg_read_intr", i, host);
@@ -427,60 +577,42 @@
 
 ok_to_read:
 	/* get current segment of request */
-	req = elv_next_request(host->breq);
+	buff = (u16 *)req->buffer;
 
 	/* read 1 sector */
-	for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) {
-		*(u16 *)req->buffer =
-			inw((unsigned long)host->dev_base + MG_BUFF_OFFSET +
-					(i << 1));
-		req->buffer += 2;
-	}
+	for (i = 0; i < MG_SECTOR_SIZE >> 1; i++)
+		*buff++ = inw((unsigned long)host->dev_base + MG_BUFF_OFFSET +
+			      (i << 1));
 
-	/* manipulate request */
 	MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n",
-			req->sector, req->nr_sectors - 1, req->buffer);
-
-	req->sector++;
-	req->errors = 0;
-	i = --req->nr_sectors;
-	--req->current_nr_sectors;
-
-	/* let know if current segment done */
-	if (req->current_nr_sectors <= 0)
-		end_request(req, 1);
-
-	/* set handler if read remains */
-	if (i > 0) {
-		host->mg_do_intr = mg_read_intr;
-		mod_timer(&host->timer, jiffies + 3 * HZ);
-	}
+	       blk_rq_pos(req), blk_rq_sectors(req) - 1, req->buffer);
 
 	/* send read confirm */
 	outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
 
-	/* goto next request */
-	if (!i)
+	if (mg_end_request(host, 0, MG_SECTOR_SIZE)) {
+		/* set handler if read remains */
+		host->mg_do_intr = mg_read_intr;
+		mod_timer(&host->timer, jiffies + 3 * HZ);
+	} else /* goto next request */
 		mg_request(host->breq);
 }
 
 static void mg_write_intr(struct mg_host *host)
 {
+	struct request *req = host->req;
 	u32 i, j;
 	u16 *buff;
-	struct request *req;
-
-	/* get current segment of request */
-	req = elv_next_request(host->breq);
+	bool rem;
 
 	/* check status */
 	do {
 		i = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-		if (i & MG_REG_STATUS_BIT_BUSY)
+		if (i & ATA_BUSY)
 			break;
 		if (!MG_READY_OK(i))
 			break;
-		if ((req->nr_sectors <= 1) || (i & MG_REG_STATUS_BIT_DATA_REQ))
+		if ((blk_rq_sectors(req) <= 1) || (i & ATA_DRQ))
 			goto ok_to_write;
 	} while (0);
 	mg_dump_status("mg_write_intr", i, host);
@@ -489,18 +621,8 @@
 	return;
 
 ok_to_write:
-	/* manipulate request */
-	req->sector++;
-	i = --req->nr_sectors;
-	--req->current_nr_sectors;
-	req->buffer += MG_SECTOR_SIZE;
-
-	/* let know if current segment or all done */
-	if (!i || (req->bio && req->current_nr_sectors <= 0))
-		end_request(req, 1);
-
-	/* write 1 sector and set handler if remains */
-	if (i > 0) {
+	if ((rem = mg_end_request(host, 0, MG_SECTOR_SIZE))) {
+		/* write 1 sector and set handler if remains */
 		buff = (u16 *)req->buffer;
 		for (j = 0; j < MG_STORAGE_BUFFER_SIZE >> 1; j++) {
 			outw(*buff, (unsigned long)host->dev_base +
@@ -508,7 +630,7 @@
 			buff++;
 		}
 		MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n",
-				req->sector, req->nr_sectors, req->buffer);
+		       blk_rq_pos(req), blk_rq_sectors(req), req->buffer);
 		host->mg_do_intr = mg_write_intr;
 		mod_timer(&host->timer, jiffies + 3 * HZ);
 	}
@@ -516,7 +638,7 @@
 	/* send write confirm */
 	outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
 
-	if (!i)
+	if (!rem)
 		mg_request(host->breq);
 }
 
@@ -524,49 +646,45 @@
 {
 	struct mg_host *host = (struct mg_host *)data;
 	char *name;
-	struct request *req;
 
 	spin_lock_irq(&host->lock);
 
-	req = elv_next_request(host->breq);
-	if (!req)
+	if (!host->req)
 		goto out_unlock;
 
 	host->mg_do_intr = NULL;
 
-	name = req->rq_disk->disk_name;
+	name = host->req->rq_disk->disk_name;
 	printk(KERN_DEBUG "%s: timeout\n", name);
 
 	host->error = MG_ERR_TIMEOUT;
 	mg_bad_rw_intr(host);
 
-	mg_request(host->breq);
 out_unlock:
+	mg_request(host->breq);
 	spin_unlock_irq(&host->lock);
 }
 
 static void mg_request_poll(struct request_queue *q)
 {
-	struct request *req;
-	struct mg_host *host;
+	struct mg_host *host = q->queuedata;
 
-	while ((req = elv_next_request(q)) != NULL) {
-		host = req->rq_disk->private_data;
-		if (blk_fs_request(req)) {
-			switch (rq_data_dir(req)) {
-			case READ:
-				mg_read(req);
+	while (1) {
+		if (!host->req) {
+			host->req = blk_fetch_request(q);
+			if (!host->req)
 				break;
-			case WRITE:
-				mg_write(req);
-				break;
-			default:
-				printk(KERN_WARNING "%s:%d unknown command\n",
-						__func__, __LINE__);
-				end_request(req, 0);
-				break;
-			}
 		}
+
+		if (unlikely(!blk_fs_request(host->req))) {
+			mg_end_request_cur(host, -EIO);
+			continue;
+		}
+
+		if (rq_data_dir(host->req) == READ)
+			mg_read(host->req);
+		else
+			mg_write(host->req);
 	}
 }
 
@@ -588,18 +706,15 @@
 		break;
 	case WRITE:
 		/* TODO : handler */
-		outb(MG_REG_CTRL_INTR_DISABLE,
-				(unsigned long)host->dev_base +
-				MG_REG_DRV_CTRL);
+		outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 		if (mg_out(host, sect_num, sect_cnt, MG_CMD_WR, &mg_write_intr)
 				!= MG_ERR_NONE) {
 			mg_bad_rw_intr(host);
 			return host->error;
 		}
 		del_timer(&host->timer);
-		mg_wait(host, MG_REG_STATUS_BIT_DATA_REQ, MG_TMAX_WAIT_WR_DRQ);
-		outb(MG_REG_CTRL_INTR_ENABLE, (unsigned long)host->dev_base +
-				MG_REG_DRV_CTRL);
+		mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_WR_DRQ);
+		outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 		if (host->error) {
 			mg_bad_rw_intr(host);
 			return host->error;
@@ -614,11 +729,6 @@
 		outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base +
 				MG_REG_COMMAND);
 		break;
-	default:
-		printk(KERN_WARNING "%s:%d unknown command\n",
-				__func__, __LINE__);
-		end_request(req, 0);
-		break;
 	}
 	return MG_ERR_NONE;
 }
@@ -626,16 +736,17 @@
 /* This function also called from IRQ context */
 static void mg_request(struct request_queue *q)
 {
+	struct mg_host *host = q->queuedata;
 	struct request *req;
-	struct mg_host *host;
 	u32 sect_num, sect_cnt;
 
 	while (1) {
-		req = elv_next_request(q);
-		if (!req)
-			return;
-
-		host = req->rq_disk->private_data;
+		if (!host->req) {
+			host->req = blk_fetch_request(q);
+			if (!host->req)
+				break;
+		}
+		req = host->req;
 
 		/* check unwanted request call */
 		if (host->mg_do_intr)
@@ -643,9 +754,9 @@
 
 		del_timer(&host->timer);
 
-		sect_num = req->sector;
+		sect_num = blk_rq_pos(req);
 		/* deal whole segments */
-		sect_cnt = req->nr_sectors;
+		sect_cnt = blk_rq_sectors(req);
 
 		/* sanity check */
 		if (sect_num >= get_capacity(req->rq_disk) ||
@@ -655,12 +766,14 @@
 					"%s: bad access: sector=%d, count=%d\n",
 					req->rq_disk->disk_name,
 					sect_num, sect_cnt);
-			end_request(req, 0);
+			mg_end_request_cur(host, -EIO);
 			continue;
 		}
 
-		if (!blk_fs_request(req))
-			return;
+		if (unlikely(!blk_fs_request(req))) {
+			mg_end_request_cur(host, -EIO);
+			continue;
+		}
 
 		if (!mg_issue_req(req, host, sect_num, sect_cnt))
 			return;
@@ -690,9 +803,7 @@
 		return -EIO;
 
 	if (!prv_data->use_polling)
-		outb(MG_REG_CTRL_INTR_DISABLE,
-				(unsigned long)host->dev_base +
-				MG_REG_DRV_CTRL);
+		outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 
 	outb(MG_CMD_SLEEP, (unsigned long)host->dev_base + MG_REG_COMMAND);
 	/* wait until mflash deep sleep */
@@ -700,9 +811,7 @@
 
 	if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD)) {
 		if (!prv_data->use_polling)
-			outb(MG_REG_CTRL_INTR_ENABLE,
-					(unsigned long)host->dev_base +
-					MG_REG_DRV_CTRL);
+			outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 		return -EIO;
 	}
 
@@ -725,8 +834,7 @@
 		return -EIO;
 
 	if (!prv_data->use_polling)
-		outb(MG_REG_CTRL_INTR_ENABLE, (unsigned long)host->dev_base +
-				MG_REG_DRV_CTRL);
+		outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 
 	return 0;
 }
@@ -877,6 +985,7 @@
 				__func__, __LINE__);
 		goto probe_err_5;
 	}
+	host->breq->queuedata = host;
 
 	/* mflash is random device, thanx for the noop */
 	elevator_exit(host->breq->elevator);
@@ -887,7 +996,7 @@
 		goto probe_err_6;
 	}
 	blk_queue_max_sectors(host->breq, MG_MAX_SECTS);
-	blk_queue_hardsect_size(host->breq, MG_SECTOR_SIZE);
+	blk_queue_logical_block_size(host->breq, MG_SECTOR_SIZE);
 
 	init_timer(&host->timer);
 	host->timer.function = mg_times_out;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 4d6de4f..5d23ffa 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -110,7 +110,7 @@
 			req, error ? "failed" : "done");
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	__blk_end_request(req, error, req->nr_sectors << 9);
+	__blk_end_request_all(req, error);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
@@ -231,19 +231,19 @@
 {
 	int result, flags;
 	struct nbd_request request;
-	unsigned long size = req->nr_sectors << 9;
+	unsigned long size = blk_rq_bytes(req);
 
 	request.magic = htonl(NBD_REQUEST_MAGIC);
 	request.type = htonl(nbd_cmd(req));
-	request.from = cpu_to_be64((u64) req->sector << 9);
+	request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
 	request.len = htonl(size);
 	memcpy(request.handle, &req, sizeof(req));
 
-	dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%luB)\n",
+	dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n",
 			lo->disk->disk_name, req,
 			nbdcmd_to_ascii(nbd_cmd(req)),
-			(unsigned long long)req->sector << 9,
-			req->nr_sectors << 9);
+			(unsigned long long)blk_rq_pos(req) << 9,
+			blk_rq_bytes(req));
 	result = sock_xmit(lo, 1, &request, sizeof(request),
 			(nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
 	if (result <= 0) {
@@ -533,11 +533,9 @@
 {
 	struct request *req;
 	
-	while ((req = elv_next_request(q)) != NULL) {
+	while ((req = blk_fetch_request(q)) != NULL) {
 		struct nbd_device *lo;
 
-		blkdev_dequeue_request(req);
-
 		spin_unlock_irq(q->queue_lock);
 
 		dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
@@ -580,13 +578,6 @@
 		blk_rq_init(NULL, &sreq);
 		sreq.cmd_type = REQ_TYPE_SPECIAL;
 		nbd_cmd(&sreq) = NBD_CMD_DISC;
-		/*
-		 * Set these to sane values in case server implementation
-		 * fails to check the request type first and also to keep
-		 * debugging output cleaner.
-		 */
-		sreq.sector = 0;
-		sreq.nr_sectors = 0;
 		if (!lo->sock)
 			return -EINVAL;
 		nbd_send_req(lo, &sreq);
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index e91d4b4..911dfd9 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -719,32 +719,37 @@
 	if (pcd_busy)
 		return;
 	while (1) {
-		pcd_req = elv_next_request(q);
-		if (!pcd_req)
-			return;
+		if (!pcd_req) {
+			pcd_req = blk_fetch_request(q);
+			if (!pcd_req)
+				return;
+		}
 
 		if (rq_data_dir(pcd_req) == READ) {
 			struct pcd_unit *cd = pcd_req->rq_disk->private_data;
 			if (cd != pcd_current)
 				pcd_bufblk = -1;
 			pcd_current = cd;
-			pcd_sector = pcd_req->sector;
-			pcd_count = pcd_req->current_nr_sectors;
+			pcd_sector = blk_rq_pos(pcd_req);
+			pcd_count = blk_rq_cur_sectors(pcd_req);
 			pcd_buf = pcd_req->buffer;
 			pcd_busy = 1;
 			ps_set_intr(do_pcd_read, NULL, 0, nice);
 			return;
-		} else
-			end_request(pcd_req, 0);
+		} else {
+			__blk_end_request_all(pcd_req, -EIO);
+			pcd_req = NULL;
+		}
 	}
 }
 
-static inline void next_request(int success)
+static inline void next_request(int err)
 {
 	unsigned long saved_flags;
 
 	spin_lock_irqsave(&pcd_lock, saved_flags);
-	end_request(pcd_req, success);
+	if (!__blk_end_request_cur(pcd_req, err))
+		pcd_req = NULL;
 	pcd_busy = 0;
 	do_pcd_request(pcd_queue);
 	spin_unlock_irqrestore(&pcd_lock, saved_flags);
@@ -781,7 +786,7 @@
 
 	if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) {
 		pcd_bufblk = -1;
-		next_request(0);
+		next_request(-EIO);
 		return;
 	}
 
@@ -796,7 +801,7 @@
 	pcd_retries = 0;
 	pcd_transfer();
 	if (!pcd_count) {
-		next_request(1);
+		next_request(0);
 		return;
 	}
 
@@ -815,7 +820,7 @@
 			return;
 		}
 		pcd_bufblk = -1;
-		next_request(0);
+		next_request(-EIO);
 		return;
 	}
 
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 9299455..bf5955b 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -410,10 +410,12 @@
 				pd_claimed = 0;
 				phase = NULL;
 				spin_lock_irqsave(&pd_lock, saved_flags);
-				end_request(pd_req, res);
-				pd_req = elv_next_request(pd_queue);
-				if (!pd_req)
-					stop = 1;
+				if (!__blk_end_request_cur(pd_req,
+						res == Ok ? 0 : -EIO)) {
+					pd_req = blk_fetch_request(pd_queue);
+					if (!pd_req)
+						stop = 1;
+				}
 				spin_unlock_irqrestore(&pd_lock, saved_flags);
 				if (stop)
 					return;
@@ -443,11 +445,11 @@
 
 	pd_cmd = rq_data_dir(pd_req);
 	if (pd_cmd == READ || pd_cmd == WRITE) {
-		pd_block = pd_req->sector;
-		pd_count = pd_req->current_nr_sectors;
+		pd_block = blk_rq_pos(pd_req);
+		pd_count = blk_rq_cur_sectors(pd_req);
 		if (pd_block + pd_count > get_capacity(pd_req->rq_disk))
 			return Fail;
-		pd_run = pd_req->nr_sectors;
+		pd_run = blk_rq_sectors(pd_req);
 		pd_buf = pd_req->buffer;
 		pd_retries = 0;
 		if (pd_cmd == READ)
@@ -477,8 +479,8 @@
 	if (pd_count)
 		return 0;
 	spin_lock_irqsave(&pd_lock, saved_flags);
-	end_request(pd_req, 1);
-	pd_count = pd_req->current_nr_sectors;
+	__blk_end_request_cur(pd_req, 0);
+	pd_count = blk_rq_cur_sectors(pd_req);
 	pd_buf = pd_req->buffer;
 	spin_unlock_irqrestore(&pd_lock, saved_flags);
 	return 0;
@@ -702,7 +704,7 @@
 {
 	if (pd_req)
 		return;
-	pd_req = elv_next_request(q);
+	pd_req = blk_fetch_request(q);
 	if (!pd_req)
 		return;
 
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index bef3b99..68a90834 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -750,12 +750,10 @@
 
 static struct request_queue *pf_queue;
 
-static void pf_end_request(int uptodate)
+static void pf_end_request(int err)
 {
-	if (pf_req) {
-		end_request(pf_req, uptodate);
+	if (pf_req && !__blk_end_request_cur(pf_req, err))
 		pf_req = NULL;
-	}
 }
 
 static void do_pf_request(struct request_queue * q)
@@ -763,17 +761,19 @@
 	if (pf_busy)
 		return;
 repeat:
-	pf_req = elv_next_request(q);
-	if (!pf_req)
-		return;
+	if (!pf_req) {
+		pf_req = blk_fetch_request(q);
+		if (!pf_req)
+			return;
+	}
 
 	pf_current = pf_req->rq_disk->private_data;
-	pf_block = pf_req->sector;
-	pf_run = pf_req->nr_sectors;
-	pf_count = pf_req->current_nr_sectors;
+	pf_block = blk_rq_pos(pf_req);
+	pf_run = blk_rq_sectors(pf_req);
+	pf_count = blk_rq_cur_sectors(pf_req);
 
 	if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
-		pf_end_request(0);
+		pf_end_request(-EIO);
 		goto repeat;
 	}
 
@@ -788,7 +788,7 @@
 		pi_do_claimed(pf_current->pi, do_pf_write);
 	else {
 		pf_busy = 0;
-		pf_end_request(0);
+		pf_end_request(-EIO);
 		goto repeat;
 	}
 }
@@ -805,23 +805,22 @@
 		return 1;
 	if (!pf_count) {
 		spin_lock_irqsave(&pf_spin_lock, saved_flags);
-		pf_end_request(1);
-		pf_req = elv_next_request(pf_queue);
+		pf_end_request(0);
 		spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
 		if (!pf_req)
 			return 1;
-		pf_count = pf_req->current_nr_sectors;
+		pf_count = blk_rq_cur_sectors(pf_req);
 		pf_buf = pf_req->buffer;
 	}
 	return 0;
 }
 
-static inline void next_request(int success)
+static inline void next_request(int err)
 {
 	unsigned long saved_flags;
 
 	spin_lock_irqsave(&pf_spin_lock, saved_flags);
-	pf_end_request(success);
+	pf_end_request(err);
 	pf_busy = 0;
 	do_pf_request(pf_queue);
 	spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
@@ -844,7 +843,7 @@
 			pi_do_claimed(pf_current->pi, do_pf_read_start);
 			return;
 		}
-		next_request(0);
+		next_request(-EIO);
 		return;
 	}
 	pf_mask = STAT_DRQ;
@@ -863,7 +862,7 @@
 				pi_do_claimed(pf_current->pi, do_pf_read_start);
 				return;
 			}
-			next_request(0);
+			next_request(-EIO);
 			return;
 		}
 		pi_read_block(pf_current->pi, pf_buf, 512);
@@ -871,7 +870,7 @@
 			break;
 	}
 	pi_disconnect(pf_current->pi);
-	next_request(1);
+	next_request(0);
 }
 
 static void do_pf_write(void)
@@ -890,7 +889,7 @@
 			pi_do_claimed(pf_current->pi, do_pf_write_start);
 			return;
 		}
-		next_request(0);
+		next_request(-EIO);
 		return;
 	}
 
@@ -903,7 +902,7 @@
 				pi_do_claimed(pf_current->pi, do_pf_write_start);
 				return;
 			}
-			next_request(0);
+			next_request(-EIO);
 			return;
 		}
 		pi_write_block(pf_current->pi, pf_buf, 512);
@@ -923,11 +922,11 @@
 			pi_do_claimed(pf_current->pi, do_pf_write_start);
 			return;
 		}
-		next_request(0);
+		next_request(-EIO);
 		return;
 	}
 	pi_disconnect(pf_current->pi);
-	next_request(1);
+	next_request(0);
 }
 
 static int __init pf_init(void)
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index dc7a8c3..d57f117 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -991,13 +991,15 @@
  */
 static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_queue *q)
 {
-	if ((pd->settings.size << 9) / CD_FRAMESIZE <= q->max_phys_segments) {
+	if ((pd->settings.size << 9) / CD_FRAMESIZE
+	    <= queue_max_phys_segments(q)) {
 		/*
 		 * The cdrom device can handle one segment/frame
 		 */
 		clear_bit(PACKET_MERGE_SEGS, &pd->flags);
 		return 0;
-	} else if ((pd->settings.size << 9) / PAGE_SIZE <= q->max_phys_segments) {
+	} else if ((pd->settings.size << 9) / PAGE_SIZE
+		   <= queue_max_phys_segments(q)) {
 		/*
 		 * We can handle this case at the expense of some extra memory
 		 * copies during write operations
@@ -2657,7 +2659,7 @@
 	struct request_queue *q = pd->disk->queue;
 
 	blk_queue_make_request(q, pkt_make_request);
-	blk_queue_hardsect_size(q, CD_FRAMESIZE);
+	blk_queue_logical_block_size(q, CD_FRAMESIZE);
 	blk_queue_max_sectors(q, PACKET_MAX_SECTORS);
 	blk_queue_merge_bvec(q, pkt_merge_bvec);
 	q->queuedata = pd;
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index bccc42b..aaeeb54 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -134,13 +134,12 @@
 	rq_for_each_segment(bv, req, iter)
 		n++;
 	dev_dbg(&dev->sbd.core,
-		"%s:%u: %s req has %u bvecs for %lu sectors %lu hard sectors\n",
-		__func__, __LINE__, op, n, req->nr_sectors,
-		req->hard_nr_sectors);
+		"%s:%u: %s req has %u bvecs for %u sectors\n",
+		__func__, __LINE__, op, n, blk_rq_sectors(req));
 #endif
 
-	start_sector = req->sector * priv->blocking_factor;
-	sectors = req->nr_sectors * priv->blocking_factor;
+	start_sector = blk_rq_pos(req) * priv->blocking_factor;
+	sectors = blk_rq_sectors(req) * priv->blocking_factor;
 	dev_dbg(&dev->sbd.core, "%s:%u: %s %llu sectors starting at %llu\n",
 		__func__, __LINE__, op, sectors, start_sector);
 
@@ -158,7 +157,7 @@
 	if (res) {
 		dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
 			__LINE__, op, res);
-		end_request(req, 0);
+		__blk_end_request_all(req, -EIO);
 		return 0;
 	}
 
@@ -180,7 +179,7 @@
 	if (res) {
 		dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
 			__func__, __LINE__, res);
-		end_request(req, 0);
+		__blk_end_request_all(req, -EIO);
 		return 0;
 	}
 
@@ -195,7 +194,7 @@
 
 	dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
 
-	while ((req = elv_next_request(q))) {
+	while ((req = blk_fetch_request(q))) {
 		if (blk_fs_request(req)) {
 			if (ps3disk_submit_request_sg(dev, req))
 				break;
@@ -205,7 +204,7 @@
 				break;
 		} else {
 			blk_dump_rq_flags(req, DEVICE_NAME " bad request");
-			end_request(req, 0);
+			__blk_end_request_all(req, -EIO);
 			continue;
 		}
 	}
@@ -231,7 +230,6 @@
 	struct request *req;
 	int res, read, error;
 	u64 tag, status;
-	unsigned long num_sectors;
 	const char *op;
 
 	res = lv1_storage_get_async_status(dev->sbd.dev_id, &tag, &status);
@@ -261,11 +259,9 @@
 	if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
 	    req->cmd[0] == REQ_LB_OP_FLUSH) {
 		read = 0;
-		num_sectors = req->hard_cur_sectors;
 		op = "flush";
 	} else {
 		read = !rq_data_dir(req);
-		num_sectors = req->nr_sectors;
 		op = read ? "read" : "write";
 	}
 	if (status) {
@@ -281,7 +277,7 @@
 	}
 
 	spin_lock(&priv->lock);
-	__blk_end_request(req, error, num_sectors << 9);
+	__blk_end_request_all(req, error);
 	priv->req = NULL;
 	ps3disk_do_request(dev, priv->queue);
 	spin_unlock(&priv->lock);
@@ -481,7 +477,7 @@
 	blk_queue_max_sectors(queue, dev->bounce_size >> 9);
 	blk_queue_segment_boundary(queue, -1UL);
 	blk_queue_dma_alignment(queue, dev->blk_size-1);
-	blk_queue_hardsect_size(queue, dev->blk_size);
+	blk_queue_logical_block_size(queue, dev->blk_size);
 
 	blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH,
 			  ps3disk_prepare_flush);
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 5861e33..cbfd9c0 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -212,11 +212,6 @@
 	vdc_finish(&port->vio, -err, WAITING_FOR_GEN_CMD);
 }
 
-static void vdc_end_request(struct request *req, int error, int num_sectors)
-{
-	__blk_end_request(req, error, num_sectors << 9);
-}
-
 static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
 			unsigned int index)
 {
@@ -239,7 +234,7 @@
 
 	rqe->req = NULL;
 
-	vdc_end_request(req, (desc->status ? -EIO : 0), desc->size >> 9);
+	__blk_end_request(req, (desc->status ? -EIO : 0), desc->size);
 
 	if (blk_queue_stopped(port->disk->queue))
 		blk_start_queue(port->disk->queue);
@@ -421,7 +416,7 @@
 		desc->slice = 0;
 	}
 	desc->status = ~0;
-	desc->offset = (req->sector << 9) / port->vdisk_block_size;
+	desc->offset = (blk_rq_pos(req) << 9) / port->vdisk_block_size;
 	desc->size = len;
 	desc->ncookies = err;
 
@@ -446,14 +441,13 @@
 static void do_vdc_request(struct request_queue *q)
 {
 	while (1) {
-		struct request *req = elv_next_request(q);
+		struct request *req = blk_fetch_request(q);
 
 		if (!req)
 			break;
 
-		blkdev_dequeue_request(req);
 		if (__send_request(req) < 0)
-			vdc_end_request(req, -EIO, req->hard_nr_sectors);
+			__blk_end_request_all(req, -EIO);
 	}
 }
 
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index d22cc38..cf7877f 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -514,7 +514,7 @@
 			ret = swim_read_sector(fs, side, track, sector,
 						buffer);
 			if (try-- == 0)
-				return -1;
+				return -EIO;
 		} while (ret != 512);
 
 		buffer += ret;
@@ -528,45 +528,31 @@
 	struct request *req;
 	struct floppy_state *fs;
 
-	while ((req = elv_next_request(q))) {
+	req = blk_fetch_request(q);
+	while (req) {
+		int err = -EIO;
 
 		fs = req->rq_disk->private_data;
-		if (req->sector < 0 || req->sector >= fs->total_secs) {
-			end_request(req, 0);
-			continue;
-		}
-		if (req->current_nr_sectors == 0) {
-			end_request(req, 1);
-			continue;
-		}
-		if (!fs->disk_in) {
-			end_request(req, 0);
-			continue;
-		}
-		if (rq_data_dir(req) == WRITE) {
-			if (fs->write_protected) {
-				end_request(req, 0);
-				continue;
-			}
-		}
+		if (blk_rq_pos(req) >= fs->total_secs)
+			goto done;
+		if (!fs->disk_in)
+			goto done;
+		if (rq_data_dir(req) == WRITE && fs->write_protected)
+			goto done;
+
 		switch (rq_data_dir(req)) {
 		case WRITE:
 			/* NOT IMPLEMENTED */
-			end_request(req, 0);
 			break;
 		case READ:
-			if (floppy_read_sectors(fs, req->sector,
-						req->current_nr_sectors,
-						req->buffer)) {
-				end_request(req, 0);
-				continue;
-			}
-			req->nr_sectors -= req->current_nr_sectors;
-			req->sector += req->current_nr_sectors;
-			req->buffer += req->current_nr_sectors * 512;
-			end_request(req, 1);
+			err = floppy_read_sectors(fs, blk_rq_pos(req),
+						  blk_rq_cur_sectors(req),
+						  req->buffer);
 			break;
 		}
+	done:
+		if (!__blk_end_request_cur(req, err))
+			req = blk_fetch_request(q);
 	}
 }
 
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 61296530..80df93e 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -251,6 +251,20 @@
 static int floppy_check_change(struct gendisk *disk);
 static int floppy_revalidate(struct gendisk *disk);
 
+static bool swim3_end_request(int err, unsigned int nr_bytes)
+{
+	if (__blk_end_request(fd_req, err, nr_bytes))
+		return true;
+
+	fd_req = NULL;
+	return false;
+}
+
+static bool swim3_end_request_cur(int err)
+{
+	return swim3_end_request(err, blk_rq_cur_bytes(fd_req));
+}
+
 static void swim3_select(struct floppy_state *fs, int sel)
 {
 	struct swim3 __iomem *sw = fs->swim3;
@@ -310,25 +324,27 @@
 		wake_up(&fs->wait);
 		return;
 	}
-	while (fs->state == idle && (req = elv_next_request(swim3_queue))) {
+	while (fs->state == idle) {
+		if (!fd_req) {
+			fd_req = blk_fetch_request(swim3_queue);
+			if (!fd_req)
+				break;
+		}
+		req = fd_req;
 #if 0
-		printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
+		printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n",
 		       req->rq_disk->disk_name, req->cmd,
-		       (long)req->sector, req->nr_sectors, req->buffer);
-		printk("           errors=%d current_nr_sectors=%ld\n",
-		       req->errors, req->current_nr_sectors);
+		       (long)blk_rq_pos(req), blk_rq_sectors(req), req->buffer);
+		printk("           errors=%d current_nr_sectors=%u\n",
+		       req->errors, blk_rq_cur_sectors(req));
 #endif
 
-		if (req->sector < 0 || req->sector >= fs->total_secs) {
-			end_request(req, 0);
-			continue;
-		}
-		if (req->current_nr_sectors == 0) {
-			end_request(req, 1);
+		if (blk_rq_pos(req) >= fs->total_secs) {
+			swim3_end_request_cur(-EIO);
 			continue;
 		}
 		if (fs->ejected) {
-			end_request(req, 0);
+			swim3_end_request_cur(-EIO);
 			continue;
 		}
 
@@ -336,18 +352,19 @@
 			if (fs->write_prot < 0)
 				fs->write_prot = swim3_readbit(fs, WRITE_PROT);
 			if (fs->write_prot) {
-				end_request(req, 0);
+				swim3_end_request_cur(-EIO);
 				continue;
 			}
 		}
 
-		/* Do not remove the cast. req->sector is now a sector_t and
-		 * can be 64 bits, but it will never go past 32 bits for this
-		 * driver anyway, so we can safely cast it down and not have
-		 * to do a 64/32 division
+		/* Do not remove the cast. blk_rq_pos(req) is now a
+		 * sector_t and can be 64 bits, but it will never go
+		 * past 32 bits for this driver anyway, so we can
+		 * safely cast it down and not have to do a 64/32
+		 * division
 		 */
-		fs->req_cyl = ((long)req->sector) / fs->secpercyl;
-		x = ((long)req->sector) % fs->secpercyl;
+		fs->req_cyl = ((long)blk_rq_pos(req)) / fs->secpercyl;
+		x = ((long)blk_rq_pos(req)) % fs->secpercyl;
 		fs->head = x / fs->secpertrack;
 		fs->req_sector = x % fs->secpertrack + 1;
 		fd_req = req;
@@ -424,7 +441,7 @@
 	struct dbdma_cmd *cp = fs->dma_cmd;
 	struct dbdma_regs __iomem *dr = fs->dma;
 
-	if (fd_req->current_nr_sectors <= 0) {
+	if (blk_rq_cur_sectors(fd_req) <= 0) {
 		printk(KERN_ERR "swim3: transfer 0 sectors?\n");
 		return;
 	}
@@ -432,8 +449,8 @@
 		n = 1;
 	else {
 		n = fs->secpertrack - fs->req_sector + 1;
-		if (n > fd_req->current_nr_sectors)
-			n = fd_req->current_nr_sectors;
+		if (n > blk_rq_cur_sectors(fd_req))
+			n = blk_rq_cur_sectors(fd_req);
 	}
 	fs->scount = n;
 	swim3_select(fs, fs->head? READ_DATA_1: READ_DATA_0);
@@ -508,7 +525,7 @@
 		case do_transfer:
 			if (fs->cur_cyl != fs->req_cyl) {
 				if (fs->retries > 5) {
-					end_request(fd_req, 0);
+					swim3_end_request_cur(-EIO);
 					fs->state = idle;
 					return;
 				}
@@ -540,7 +557,7 @@
 	out_8(&sw->intr_enable, 0);
 	fs->cur_cyl = -1;
 	if (fs->retries > 5) {
-		end_request(fd_req, 0);
+		swim3_end_request_cur(-EIO);
 		fs->state = idle;
 		start_request(fs);
 	} else {
@@ -559,7 +576,7 @@
 	out_8(&sw->select, RELAX);
 	out_8(&sw->intr_enable, 0);
 	printk(KERN_ERR "swim3: seek timeout\n");
-	end_request(fd_req, 0);
+	swim3_end_request_cur(-EIO);
 	fs->state = idle;
 	start_request(fs);
 }
@@ -583,7 +600,7 @@
 		return;
 	}
 	printk(KERN_ERR "swim3: seek settle timeout\n");
-	end_request(fd_req, 0);
+	swim3_end_request_cur(-EIO);
 	fs->state = idle;
 	start_request(fs);
 }
@@ -593,8 +610,6 @@
 	struct floppy_state *fs = (struct floppy_state *) data;
 	struct swim3 __iomem *sw = fs->swim3;
 	struct dbdma_regs __iomem *dr = fs->dma;
-	struct dbdma_cmd *cp = fs->dma_cmd;
-	unsigned long s;
 	int n;
 
 	fs->timeout_pending = 0;
@@ -605,17 +620,10 @@
 	out_8(&sw->intr_enable, 0);
 	out_8(&sw->control_bic, WRITE_SECTORS | DO_ACTION);
 	out_8(&sw->select, RELAX);
-	if (rq_data_dir(fd_req) == WRITE)
-		++cp;
-	if (ld_le16(&cp->xfer_status) != 0)
-		s = fs->scount - ((ld_le16(&cp->res_count) + 511) >> 9);
-	else
-		s = 0;
-	fd_req->sector += s;
-	fd_req->current_nr_sectors -= s;
 	printk(KERN_ERR "swim3: timeout %sing sector %ld\n",
-	       (rq_data_dir(fd_req)==WRITE? "writ": "read"), (long)fd_req->sector);
-	end_request(fd_req, 0);
+	       (rq_data_dir(fd_req)==WRITE? "writ": "read"),
+	       (long)blk_rq_pos(fd_req));
+	swim3_end_request_cur(-EIO);
 	fs->state = idle;
 	start_request(fs);
 }
@@ -646,7 +654,7 @@
 				printk(KERN_ERR "swim3: seen sector but cyl=ff?\n");
 				fs->cur_cyl = -1;
 				if (fs->retries > 5) {
-					end_request(fd_req, 0);
+					swim3_end_request_cur(-EIO);
 					fs->state = idle;
 					start_request(fs);
 				} else {
@@ -719,9 +727,7 @@
 		if (intr & ERROR_INTR) {
 			n = fs->scount - 1 - resid / 512;
 			if (n > 0) {
-				fd_req->sector += n;
-				fd_req->current_nr_sectors -= n;
-				fd_req->buffer += n * 512;
+				blk_update_request(fd_req, 0, n << 9);
 				fs->req_sector += n;
 			}
 			if (fs->retries < 5) {
@@ -730,8 +736,8 @@
 			} else {
 				printk("swim3: error %sing block %ld (err=%x)\n",
 				       rq_data_dir(fd_req) == WRITE? "writ": "read",
-				       (long)fd_req->sector, err);
-				end_request(fd_req, 0);
+				       (long)blk_rq_pos(fd_req), err);
+				swim3_end_request_cur(-EIO);
 				fs->state = idle;
 			}
 		} else {
@@ -740,18 +746,12 @@
 				printk(KERN_ERR "swim3: fd dma: stat=%x resid=%d\n", stat, resid);
 				printk(KERN_ERR "  state=%d, dir=%x, intr=%x, err=%x\n",
 				       fs->state, rq_data_dir(fd_req), intr, err);
-				end_request(fd_req, 0);
+				swim3_end_request_cur(-EIO);
 				fs->state = idle;
 				start_request(fs);
 				break;
 			}
-			fd_req->sector += fs->scount;
-			fd_req->current_nr_sectors -= fs->scount;
-			fd_req->buffer += fs->scount * 512;
-			if (fd_req->current_nr_sectors <= 0) {
-				end_request(fd_req, 1);
-				fs->state = idle;
-			} else {
+			if (swim3_end_request(0, fs->scount << 9)) {
 				fs->req_sector += fs->scount;
 				if (fs->req_sector > fs->secpertrack) {
 					fs->req_sector -= fs->secpertrack;
@@ -761,7 +761,8 @@
 					}
 				}
 				act(fs);
-			}
+			} else
+				fs->state = idle;
 		}
 		if (fs->state == idle)
 			start_request(fs);
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index ff0448e..da403b6 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -749,8 +749,7 @@
 	struct request *req = crq->rq;
 	int rc;
 
-	rc = __blk_end_request(req, error, blk_rq_bytes(req));
-	assert(rc == 0);
+	__blk_end_request_all(req, error);
 
 	rc = carm_put_request(host, crq);
 	assert(rc == 0);
@@ -811,12 +810,10 @@
 
 	while (1) {
 		DPRINTK("get req\n");
-		rq = elv_next_request(q);
+		rq = blk_fetch_request(q);
 		if (!rq)
 			break;
 
-		blkdev_dequeue_request(rq);
-
 		crq = rq->special;
 		assert(crq != NULL);
 		assert(crq->rq == rq);
@@ -847,7 +844,7 @@
 
 queue_one_request:
 	VPRINTK("get req\n");
-	rq = elv_next_request(q);
+	rq = blk_peek_request(q);
 	if (!rq)
 		return;
 
@@ -858,7 +855,7 @@
 	}
 	crq->rq = rq;
 
-	blkdev_dequeue_request(rq);
+	blk_start_request(rq);
 
 	if (rq_data_dir(rq) == WRITE) {
 		writing = 1;
@@ -904,10 +901,10 @@
 	msg->sg_count	= n_elem;
 	msg->sg_type	= SGT_32BIT;
 	msg->handle	= cpu_to_le32(TAG_ENCODE(crq->tag));
-	msg->lba	= cpu_to_le32(rq->sector & 0xffffffff);
-	tmp		= (rq->sector >> 16) >> 16;
+	msg->lba	= cpu_to_le32(blk_rq_pos(rq) & 0xffffffff);
+	tmp		= (blk_rq_pos(rq) >> 16) >> 16;
 	msg->lba_high	= cpu_to_le16( (u16) tmp );
-	msg->lba_count	= cpu_to_le16(rq->nr_sectors);
+	msg->lba_count	= cpu_to_le16(blk_rq_sectors(rq));
 
 	msg_size = sizeof(struct carm_msg_rw) - sizeof(msg->sg);
 	for (i = 0; i < n_elem; i++) {
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 689cd27..cc54473 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -360,8 +360,7 @@
 static void ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun,
     struct ub_scsi_cmd *cmd, struct ub_request *urq);
 static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
-static void ub_end_rq(struct request *rq, unsigned int status,
-    unsigned int cmd_len);
+static void ub_end_rq(struct request *rq, unsigned int status);
 static int ub_rw_cmd_retry(struct ub_dev *sc, struct ub_lun *lun,
     struct ub_request *urq, struct ub_scsi_cmd *cmd);
 static int ub_submit_scsi(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
@@ -627,7 +626,7 @@
 	struct ub_lun *lun = q->queuedata;
 	struct request *rq;
 
-	while ((rq = elv_next_request(q)) != NULL) {
+	while ((rq = blk_peek_request(q)) != NULL) {
 		if (ub_request_fn_1(lun, rq) != 0) {
 			blk_stop_queue(q);
 			break;
@@ -643,14 +642,14 @@
 	int n_elem;
 
 	if (atomic_read(&sc->poison)) {
-		blkdev_dequeue_request(rq);
-		ub_end_rq(rq, DID_NO_CONNECT << 16, blk_rq_bytes(rq));
+		blk_start_request(rq);
+		ub_end_rq(rq, DID_NO_CONNECT << 16);
 		return 0;
 	}
 
 	if (lun->changed && !blk_pc_request(rq)) {
-		blkdev_dequeue_request(rq);
-		ub_end_rq(rq, SAM_STAT_CHECK_CONDITION, blk_rq_bytes(rq));
+		blk_start_request(rq);
+		ub_end_rq(rq, SAM_STAT_CHECK_CONDITION);
 		return 0;
 	}
 
@@ -660,7 +659,7 @@
 		return -1;
 	memset(cmd, 0, sizeof(struct ub_scsi_cmd));
 
-	blkdev_dequeue_request(rq);
+	blk_start_request(rq);
 
 	urq = &lun->urq;
 	memset(urq, 0, sizeof(struct ub_request));
@@ -702,7 +701,7 @@
 
 drop:
 	ub_put_cmd(lun, cmd);
-	ub_end_rq(rq, DID_ERROR << 16, blk_rq_bytes(rq));
+	ub_end_rq(rq, DID_ERROR << 16);
 	return 0;
 }
 
@@ -723,11 +722,11 @@
 	/*
 	 * build the command
 	 *
-	 * The call to blk_queue_hardsect_size() guarantees that request
+	 * The call to blk_queue_logical_block_size() guarantees that request
 	 * is aligned, but it is given in terms of 512 byte units, always.
 	 */
-	block = rq->sector >> lun->capacity.bshift;
-	nblks = rq->nr_sectors >> lun->capacity.bshift;
+	block = blk_rq_pos(rq) >> lun->capacity.bshift;
+	nblks = blk_rq_sectors(rq) >> lun->capacity.bshift;
 
 	cmd->cdb[0] = (cmd->dir == UB_DIR_READ)? READ_10: WRITE_10;
 	/* 10-byte uses 4 bytes of LBA: 2147483648KB, 2097152MB, 2048GB */
@@ -739,7 +738,7 @@
 	cmd->cdb[8] = nblks;
 	cmd->cdb_len = 10;
 
-	cmd->len = rq->nr_sectors * 512;
+	cmd->len = blk_rq_bytes(rq);
 }
 
 static void ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun,
@@ -747,7 +746,7 @@
 {
 	struct request *rq = urq->rq;
 
-	if (rq->data_len == 0) {
+	if (blk_rq_bytes(rq) == 0) {
 		cmd->dir = UB_DIR_NONE;
 	} else {
 		if (rq_data_dir(rq) == WRITE)
@@ -762,7 +761,7 @@
 	memcpy(&cmd->cdb, rq->cmd, rq->cmd_len);
 	cmd->cdb_len = rq->cmd_len;
 
-	cmd->len = rq->data_len;
+	cmd->len = blk_rq_bytes(rq);
 
 	/*
 	 * To reapply this to every URB is not as incorrect as it looks.
@@ -777,16 +776,15 @@
 	struct ub_request *urq = cmd->back;
 	struct request *rq;
 	unsigned int scsi_status;
-	unsigned int cmd_len;
 
 	rq = urq->rq;
 
 	if (cmd->error == 0) {
 		if (blk_pc_request(rq)) {
-			if (cmd->act_len >= rq->data_len)
-				rq->data_len = 0;
+			if (cmd->act_len >= rq->resid_len)
+				rq->resid_len = 0;
 			else
-				rq->data_len -= cmd->act_len;
+				rq->resid_len -= cmd->act_len;
 			scsi_status = 0;
 		} else {
 			if (cmd->act_len != cmd->len) {
@@ -818,17 +816,14 @@
 
 	urq->rq = NULL;
 
-	cmd_len = cmd->len;
 	ub_put_cmd(lun, cmd);
-	ub_end_rq(rq, scsi_status, cmd_len);
+	ub_end_rq(rq, scsi_status);
 	blk_start_queue(lun->disk->queue);
 }
 
-static void ub_end_rq(struct request *rq, unsigned int scsi_status,
-    unsigned int cmd_len)
+static void ub_end_rq(struct request *rq, unsigned int scsi_status)
 {
 	int error;
-	long rqlen;
 
 	if (scsi_status == 0) {
 		error = 0;
@@ -836,12 +831,7 @@
 		error = -EIO;
 		rq->errors = scsi_status;
 	}
-	rqlen = blk_rq_bytes(rq);    /* Oddly enough, this is the residue. */
-	if (__blk_end_request(rq, error, cmd_len)) {
-		printk(KERN_WARNING DRV_NAME
-		    ": __blk_end_request blew, %s-cmd total %u rqlen %ld\n",
-		    blk_pc_request(rq)? "pc": "fs", cmd_len, rqlen);
-	}
+	__blk_end_request_all(rq, error);
 }
 
 static int ub_rw_cmd_retry(struct ub_dev *sc, struct ub_lun *lun,
@@ -1759,7 +1749,7 @@
 	ub_revalidate(lun->udev, lun);
 
 	/* XXX Support sector size switching like in sr.c */
-	blk_queue_hardsect_size(disk->queue, lun->capacity.bsize);
+	blk_queue_logical_block_size(disk->queue, lun->capacity.bsize);
 	set_capacity(disk, lun->capacity.nsec);
 	// set_disk_ro(sdkp->disk, lun->readonly);
 
@@ -2334,7 +2324,7 @@
 	blk_queue_max_phys_segments(q, UB_MAX_REQ_SG);
 	blk_queue_segment_boundary(q, 0xffffffff);	/* Dubious. */
 	blk_queue_max_sectors(q, UB_MAX_SECTORS);
-	blk_queue_hardsect_size(q, lun->capacity.bsize);
+	blk_queue_logical_block_size(q, lun->capacity.bsize);
 
 	lun->disk = disk;
 	q->queuedata = lun;
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index ecccf65..390d69b 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -252,7 +252,7 @@
 	struct viodasd_device *d;
 	unsigned long flags;
 
-	start = (u64)req->sector << 9;
+	start = (u64)blk_rq_pos(req) << 9;
 
 	if (rq_data_dir(req) == READ) {
 		direction = DMA_FROM_DEVICE;
@@ -361,19 +361,17 @@
 	 * back later.
 	 */
 	while (num_req_outstanding < VIOMAXREQ) {
-		req = elv_next_request(q);
+		req = blk_fetch_request(q);
 		if (req == NULL)
 			return;
-		/* dequeue the current request from the queue */
-		blkdev_dequeue_request(req);
 		/* check that request contains a valid command */
 		if (!blk_fs_request(req)) {
-			viodasd_end_request(req, -EIO, req->hard_nr_sectors);
+			viodasd_end_request(req, -EIO, blk_rq_sectors(req));
 			continue;
 		}
 		/* Try sending the request */
 		if (send_request(req) != 0)
-			viodasd_end_request(req, -EIO, req->hard_nr_sectors);
+			viodasd_end_request(req, -EIO, blk_rq_sectors(req));
 	}
 }
 
@@ -590,7 +588,7 @@
 		err = vio_lookup_rc(viodasd_err_table, bevent->sub_result);
 		printk(VIOD_KERN_WARNING "read/write error %d:0x%04x (%s)\n",
 				event->xRc, bevent->sub_result, err->msg);
-		num_sect = req->hard_nr_sectors;
+		num_sect = blk_rq_sectors(req);
 	}
 	qlock = req->q->queue_lock;
 	spin_lock_irqsave(qlock, irq_flags);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5d34764..43db3ea 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -37,6 +37,7 @@
 	struct list_head list;
 	struct request *req;
 	struct virtio_blk_outhdr out_hdr;
+	struct virtio_scsi_inhdr in_hdr;
 	u8 status;
 };
 
@@ -50,6 +51,7 @@
 	spin_lock_irqsave(&vblk->lock, flags);
 	while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) {
 		int error;
+
 		switch (vbr->status) {
 		case VIRTIO_BLK_S_OK:
 			error = 0;
@@ -62,7 +64,13 @@
 			break;
 		}
 
-		__blk_end_request(vbr->req, error, blk_rq_bytes(vbr->req));
+		if (blk_pc_request(vbr->req)) {
+			vbr->req->resid_len = vbr->in_hdr.residual;
+			vbr->req->sense_len = vbr->in_hdr.sense_len;
+			vbr->req->errors = vbr->in_hdr.errors;
+		}
+
+		__blk_end_request_all(vbr->req, error);
 		list_del(&vbr->list);
 		mempool_free(vbr, vblk->pool);
 	}
@@ -74,7 +82,7 @@
 static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		   struct request *req)
 {
-	unsigned long num, out, in;
+	unsigned long num, out = 0, in = 0;
 	struct virtblk_req *vbr;
 
 	vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
@@ -85,7 +93,7 @@
 	vbr->req = req;
 	if (blk_fs_request(vbr->req)) {
 		vbr->out_hdr.type = 0;
-		vbr->out_hdr.sector = vbr->req->sector;
+		vbr->out_hdr.sector = blk_rq_pos(vbr->req);
 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
 	} else if (blk_pc_request(vbr->req)) {
 		vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
@@ -99,18 +107,36 @@
 	if (blk_barrier_rq(vbr->req))
 		vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER;
 
-	sg_set_buf(&vblk->sg[0], &vbr->out_hdr, sizeof(vbr->out_hdr));
-	num = blk_rq_map_sg(q, vbr->req, vblk->sg+1);
-	sg_set_buf(&vblk->sg[num+1], &vbr->status, sizeof(vbr->status));
+	sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
 
-	if (rq_data_dir(vbr->req) == WRITE) {
-		vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
-		out = 1 + num;
-		in = 1;
-	} else {
-		vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
-		out = 1;
-		in = 1 + num;
+	/*
+	 * If this is a packet command we need a couple of additional headers.
+	 * Behind the normal outhdr we put a segment with the scsi command
+	 * block, and before the normal inhdr we put the sense data and the
+	 * inhdr with additional status information before the normal inhdr.
+	 */
+	if (blk_pc_request(vbr->req))
+		sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
+
+	num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
+
+	if (blk_pc_request(vbr->req)) {
+		sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96);
+		sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
+			   sizeof(vbr->in_hdr));
+	}
+
+	sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
+		   sizeof(vbr->status));
+
+	if (num) {
+		if (rq_data_dir(vbr->req) == WRITE) {
+			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
+			out += num;
+		} else {
+			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
+			in += num;
+		}
 	}
 
 	if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr)) {
@@ -124,12 +150,11 @@
 
 static void do_virtblk_request(struct request_queue *q)
 {
-	struct virtio_blk *vblk = NULL;
+	struct virtio_blk *vblk = q->queuedata;
 	struct request *req;
 	unsigned int issued = 0;
 
-	while ((req = elv_next_request(q)) != NULL) {
-		vblk = req->rq_disk->private_data;
+	while ((req = blk_peek_request(q)) != NULL) {
 		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
 
 		/* If this request fails, stop queue and wait for something to
@@ -138,7 +163,7 @@
 			blk_stop_queue(q);
 			break;
 		}
-		blkdev_dequeue_request(req);
+		blk_start_request(req);
 		issued++;
 	}
 
@@ -146,12 +171,51 @@
 		vblk->vq->vq_ops->kick(vblk->vq);
 }
 
+/* return ATA identify data
+ */
+static int virtblk_identify(struct gendisk *disk, void *argp)
+{
+	struct virtio_blk *vblk = disk->private_data;
+	void *opaque;
+	int err = -ENOMEM;
+
+	opaque = kmalloc(VIRTIO_BLK_ID_BYTES, GFP_KERNEL);
+	if (!opaque)
+		goto out;
+
+	err = virtio_config_buf(vblk->vdev, VIRTIO_BLK_F_IDENTIFY,
+		offsetof(struct virtio_blk_config, identify), opaque,
+		VIRTIO_BLK_ID_BYTES);
+
+	if (err)
+		goto out_kfree;
+
+	if (copy_to_user(argp, opaque, VIRTIO_BLK_ID_BYTES))
+		err = -EFAULT;
+
+out_kfree:
+	kfree(opaque);
+out:
+	return err;
+}
+
 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 			 unsigned cmd, unsigned long data)
 {
-	return scsi_cmd_ioctl(bdev->bd_disk->queue,
-			      bdev->bd_disk, mode, cmd,
-			      (void __user *)data);
+	struct gendisk *disk = bdev->bd_disk;
+	struct virtio_blk *vblk = disk->private_data;
+	void __user *argp = (void __user *)data;
+
+	if (cmd == HDIO_GET_IDENTITY)
+		return virtblk_identify(disk, argp);
+
+	/*
+	 * Only allow the generic SCSI ioctls if the host can support it.
+	 */
+	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
+		return -ENOIOCTLCMD;
+
+	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp);
 }
 
 /* We provide getgeo only to please some old bootloader/partitioning tools */
@@ -190,7 +254,7 @@
 	return index << PART_BITS;
 }
 
-static int virtblk_probe(struct virtio_device *vdev)
+static int __devinit virtblk_probe(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk;
 	int err;
@@ -224,7 +288,7 @@
 	sg_init_table(vblk->sg, vblk->sg_elems);
 
 	/* We expect one virtqueue, for output. */
-	vblk->vq = vdev->config->find_vq(vdev, 0, blk_done);
+	vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
 	if (IS_ERR(vblk->vq)) {
 		err = PTR_ERR(vblk->vq);
 		goto out_free_vblk;
@@ -249,6 +313,7 @@
 		goto out_put_disk;
 	}
 
+	vblk->disk->queue->queuedata = vblk;
 	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, vblk->disk->queue);
 
 	if (index < 26) {
@@ -313,7 +378,7 @@
 				offsetof(struct virtio_blk_config, blk_size),
 				&blk_size);
 	if (!err)
-		blk_queue_hardsect_size(vblk->disk->queue, blk_size);
+		blk_queue_logical_block_size(vblk->disk->queue, blk_size);
 
 	add_disk(vblk->disk);
 	return 0;
@@ -323,14 +388,14 @@
 out_mempool:
 	mempool_destroy(vblk->pool);
 out_free_vq:
-	vdev->config->del_vq(vblk->vq);
+	vdev->config->del_vqs(vdev);
 out_free_vblk:
 	kfree(vblk);
 out:
 	return err;
 }
 
-static void virtblk_remove(struct virtio_device *vdev)
+static void __devexit virtblk_remove(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk = vdev->priv;
 
@@ -344,7 +409,7 @@
 	blk_cleanup_queue(vblk->disk->queue);
 	put_disk(vblk->disk);
 	mempool_destroy(vblk->pool);
-	vdev->config->del_vq(vblk->vq);
+	vdev->config->del_vqs(vdev);
 	kfree(vblk);
 }
 
@@ -356,6 +421,7 @@
 static unsigned int features[] = {
 	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
 	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
+	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 64b496f..ce24292 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -305,30 +305,25 @@
 	if (xdc_busy)
 		return;
 
-	while ((req = elv_next_request(q)) != NULL) {
-		unsigned block = req->sector;
-		unsigned count = req->nr_sectors;
-		int rw = rq_data_dir(req);
+	req = blk_fetch_request(q);
+	while (req) {
+		unsigned block = blk_rq_pos(req);
+		unsigned count = blk_rq_cur_sectors(req);
 		XD_INFO *disk = req->rq_disk->private_data;
-		int res = 0;
+		int res = -EIO;
 		int retry;
 
-		if (!blk_fs_request(req)) {
-			end_request(req, 0);
-			continue;
-		}
-		if (block + count > get_capacity(req->rq_disk)) {
-			end_request(req, 0);
-			continue;
-		}
-		if (rw != READ && rw != WRITE) {
-			printk("do_xd_request: unknown request\n");
-			end_request(req, 0);
-			continue;
-		}
+		if (!blk_fs_request(req))
+			goto done;
+		if (block + count > get_capacity(req->rq_disk))
+			goto done;
 		for (retry = 0; (retry < XD_RETRIES) && !res; retry++)
-			res = xd_readwrite(rw, disk, req->buffer, block, count);
-		end_request(req, res);	/* wrap up, 0 = fail, 1 = success */
+			res = xd_readwrite(rq_data_dir(req), disk, req->buffer,
+					   block, count);
+	done:
+		/* wrap up, 0 = success, -errno = fail */
+		if (!__blk_end_request_cur(req, res))
+			req = blk_fetch_request(q);
 	}
 }
 
@@ -418,7 +413,7 @@
 				printk("xd%c: %s timeout, recalibrating drive\n",'a'+drive,(operation == READ ? "read" : "write"));
 				xd_recalibrate(drive);
 				spin_lock_irq(&xd_lock);
-				return (0);
+				return -EIO;
 			case 2:
 				if (sense[0] & 0x30) {
 					printk("xd%c: %s - ",'a'+drive,(operation == READ ? "reading" : "writing"));
@@ -439,7 +434,7 @@
 				else
 					printk(" - no valid disk address\n");
 				spin_lock_irq(&xd_lock);
-				return (0);
+				return -EIO;
 		}
 		if (xd_dma_buffer)
 			for (i=0; i < (temp * 0x200); i++)
@@ -448,7 +443,7 @@
 		count -= temp, buffer += temp * 0x200, block += temp;
 	}
 	spin_lock_irq(&xd_lock);
-	return (1);
+	return 0;
 }
 
 /* xd_recalibrate: recalibrate a given drive and reset controller if necessary */
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a6cbf7b..c199682 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -122,7 +122,7 @@
 static int get_id_from_freelist(struct blkfront_info *info)
 {
 	unsigned long free = info->shadow_free;
-	BUG_ON(free > BLK_RING_SIZE);
+	BUG_ON(free >= BLK_RING_SIZE);
 	info->shadow_free = info->shadow[free].req.id;
 	info->shadow[free].req.id = 0x0fffffee; /* debug */
 	return free;
@@ -231,7 +231,7 @@
 	info->shadow[id].request = (unsigned long)req;
 
 	ring_req->id = id;
-	ring_req->sector_number = (blkif_sector_t)req->sector;
+	ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
 	ring_req->handle = info->handle;
 
 	ring_req->operation = rq_data_dir(req) ?
@@ -299,25 +299,25 @@
 
 	queued = 0;
 
-	while ((req = elv_next_request(rq)) != NULL) {
+	while ((req = blk_peek_request(rq)) != NULL) {
 		info = req->rq_disk->private_data;
-		if (!blk_fs_request(req)) {
-			end_request(req, 0);
-			continue;
-		}
 
 		if (RING_FULL(&info->ring))
 			goto wait;
 
+		blk_start_request(req);
+
+		if (!blk_fs_request(req)) {
+			__blk_end_request_all(req, -EIO);
+			continue;
+		}
+
 		pr_debug("do_blk_req %p: cmd %p, sec %lx, "
-			 "(%u/%li) buffer:%p [%s]\n",
-			 req, req->cmd, (unsigned long)req->sector,
-			 req->current_nr_sectors,
-			 req->nr_sectors, req->buffer,
-			 rq_data_dir(req) ? "write" : "read");
+			 "(%u/%u) buffer:%p [%s]\n",
+			 req, req->cmd, (unsigned long)blk_rq_pos(req),
+			 blk_rq_cur_sectors(req), blk_rq_sectors(req),
+			 req->buffer, rq_data_dir(req) ? "write" : "read");
 
-
-		blkdev_dequeue_request(req);
 		if (blkif_queue_request(req)) {
 			blk_requeue_request(rq, req);
 wait:
@@ -344,7 +344,7 @@
 	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
 
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
-	blk_queue_hardsect_size(rq, sector_size);
+	blk_queue_logical_block_size(rq, sector_size);
 	blk_queue_max_sectors(rq, 512);
 
 	/* Each segment in a request is up to an aligned page in size. */
@@ -551,7 +551,6 @@
 
 	for (i = info->ring.rsp_cons; i != rp; i++) {
 		unsigned long id;
-		int ret;
 
 		bret = RING_GET_RESPONSE(&info->ring, i);
 		id   = bret->id;
@@ -578,8 +577,7 @@
 				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
 					"request: %x\n", bret->status);
 
-			ret = __blk_end_request(req, error, blk_rq_bytes(req));
-			BUG_ON(ret);
+			__blk_end_request_all(req, error);
 			break;
 		default:
 			BUG();
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 4aecf5d..f08491a 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -463,10 +463,11 @@
 {
 	struct request *req;
 
-	while ((req = elv_next_request(q)) != NULL) {
+	while ((req = blk_peek_request(q)) != NULL) {
 		if (blk_fs_request(req))
 			break;
-		end_request(req, 0);
+		blk_start_request(req);
+		__blk_end_request_all(req, -EIO);
 	}
 	return req;
 }
@@ -492,9 +493,13 @@
 		set_capacity(ace->gd, 0);
 		dev_info(ace->dev, "No CF in slot\n");
 
-		/* Drop all pending requests */
-		while ((req = elv_next_request(ace->queue)) != NULL)
-			end_request(req, 0);
+		/* Drop all in-flight and pending requests */
+		if (ace->req) {
+			__blk_end_request_all(ace->req, -EIO);
+			ace->req = NULL;
+		}
+		while ((req = blk_fetch_request(ace->queue)) != NULL)
+			__blk_end_request_all(req, -EIO);
 
 		/* Drop back to IDLE state and notify waiters */
 		ace->fsm_state = ACE_FSM_STATE_IDLE;
@@ -642,19 +647,21 @@
 			ace->fsm_state = ACE_FSM_STATE_IDLE;
 			break;
 		}
+		blk_start_request(req);
 
 		/* Okay, it's a data request, set it up for transfer */
 		dev_dbg(ace->dev,
-			"request: sec=%llx hcnt=%lx, ccnt=%x, dir=%i\n",
-			(unsigned long long) req->sector, req->hard_nr_sectors,
-			req->current_nr_sectors, rq_data_dir(req));
+			"request: sec=%llx hcnt=%x, ccnt=%x, dir=%i\n",
+			(unsigned long long)blk_rq_pos(req),
+			blk_rq_sectors(req), blk_rq_cur_sectors(req),
+			rq_data_dir(req));
 
 		ace->req = req;
 		ace->data_ptr = req->buffer;
-		ace->data_count = req->current_nr_sectors * ACE_BUF_PER_SECTOR;
-		ace_out32(ace, ACE_MPULBA, req->sector & 0x0FFFFFFF);
+		ace->data_count = blk_rq_cur_sectors(req) * ACE_BUF_PER_SECTOR;
+		ace_out32(ace, ACE_MPULBA, blk_rq_pos(req) & 0x0FFFFFFF);
 
-		count = req->hard_nr_sectors;
+		count = blk_rq_sectors(req);
 		if (rq_data_dir(req)) {
 			/* Kick off write request */
 			dev_dbg(ace->dev, "write data\n");
@@ -688,7 +695,7 @@
 			dev_dbg(ace->dev,
 				"CFBSY set; t=%i iter=%i c=%i dc=%i irq=%i\n",
 				ace->fsm_task, ace->fsm_iter_num,
-				ace->req->current_nr_sectors * 16,
+				blk_rq_cur_sectors(ace->req) * 16,
 				ace->data_count, ace->in_irq);
 			ace_fsm_yield(ace);	/* need to poll CFBSY bit */
 			break;
@@ -697,7 +704,7 @@
 			dev_dbg(ace->dev,
 				"DATABUF not set; t=%i iter=%i c=%i dc=%i irq=%i\n",
 				ace->fsm_task, ace->fsm_iter_num,
-				ace->req->current_nr_sectors * 16,
+				blk_rq_cur_sectors(ace->req) * 16,
 				ace->data_count, ace->in_irq);
 			ace_fsm_yieldirq(ace);
 			break;
@@ -717,14 +724,13 @@
 		}
 
 		/* bio finished; is there another one? */
-		if (__blk_end_request(ace->req, 0,
-					blk_rq_cur_bytes(ace->req))) {
-			/* dev_dbg(ace->dev, "next block; h=%li c=%i\n",
-			 *      ace->req->hard_nr_sectors,
-			 *      ace->req->current_nr_sectors);
+		if (__blk_end_request_cur(ace->req, 0)) {
+			/* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
+			 *      blk_rq_sectors(ace->req),
+			 *      blk_rq_cur_sectors(ace->req));
 			 */
 			ace->data_ptr = ace->req->buffer;
-			ace->data_count = ace->req->current_nr_sectors * 16;
+			ace->data_count = blk_rq_cur_sectors(ace->req) * 16;
 			ace_fsm_yieldirq(ace);
 			break;
 		}
@@ -978,7 +984,7 @@
 	ace->queue = blk_init_queue(ace_request, &ace->lock);
 	if (ace->queue == NULL)
 		goto err_blk_initq;
-	blk_queue_hardsect_size(ace->queue, 512);
+	blk_queue_logical_block_size(ace->queue, 512);
 
 	/*
 	 * Allocate and initialize GD structure
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 80754cd..4575171 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -70,15 +70,18 @@
 static void do_z2_request(struct request_queue *q)
 {
 	struct request *req;
-	while ((req = elv_next_request(q)) != NULL) {
-		unsigned long start = req->sector << 9;
-		unsigned long len  = req->current_nr_sectors << 9;
+
+	req = blk_fetch_request(q);
+	while (req) {
+		unsigned long start = blk_rq_pos(req) << 9;
+		unsigned long len  = blk_rq_cur_bytes(req);
+		int err = 0;
 
 		if (start + len > z2ram_size) {
 			printk( KERN_ERR DEVICE_NAME ": bad access: block=%lu, count=%u\n",
-				req->sector, req->current_nr_sectors);
-			end_request(req, 0);
-			continue;
+				blk_rq_pos(req), blk_rq_cur_sectors(req));
+			err = -EIO;
+			goto done;
 		}
 		while (len) {
 			unsigned long addr = start & Z2RAM_CHUNKMASK;
@@ -93,7 +96,9 @@
 			start += size;
 			len -= size;
 		}
-		end_request(req, 1);
+	done:
+		if (!__blk_end_request_cur(req, err))
+			req = blk_fetch_request(q);
 	}
 }
 
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index af761dc..4895f0e 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -277,8 +277,8 @@
 	/* FIXME: why is this needed. Note don't use ldisc_ref here as the
 	   open path is before the ldisc is referencable */
 
-	if (tty->ldisc.ops->flush_buffer)
-		tty->ldisc.ops->flush_buffer(tty);
+	if (tty->ldisc->ops->flush_buffer)
+		tty->ldisc->ops->flush_buffer(tty);
 	tty_driver_flush_buffer(tty);
 
 	return 0;
@@ -463,7 +463,6 @@
 				clear_bit(HCI_UART_PROTO_SET, &hu->flags);
 				return err;
 			}
-			tty->low_latency = 1;
 		} else
 			return -EBUSY;
 		break;
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index cceace6..71d1b9b 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2101,8 +2101,8 @@
 		nr = nframes;
 		if (cdi->cdda_method == CDDA_BPC_SINGLE)
 			nr = 1;
-		if (nr * CD_FRAMESIZE_RAW > (q->max_sectors << 9))
-			nr = (q->max_sectors << 9) / CD_FRAMESIZE_RAW;
+		if (nr * CD_FRAMESIZE_RAW > (queue_max_sectors(q) << 9))
+			nr = (queue_max_sectors(q) << 9) / CD_FRAMESIZE_RAW;
 
 		len = nr * CD_FRAMESIZE_RAW;
 
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 2eecb77..b5621f2 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -584,8 +584,8 @@
 	list_for_each_safe(elem, next, &gdrom_deferred) {
 		req = list_entry(elem, struct request, queuelist);
 		spin_unlock(&gdrom_lock);
-		block = req->sector/GD_TO_BLK + GD_SESSION_OFFSET;
-		block_cnt = req->nr_sectors/GD_TO_BLK;
+		block = blk_rq_pos(req)/GD_TO_BLK + GD_SESSION_OFFSET;
+		block_cnt = blk_rq_sectors(req)/GD_TO_BLK;
 		ctrl_outl(PHYSADDR(req->buffer), GDROM_DMA_STARTADDR_REG);
 		ctrl_outl(block_cnt * GDROM_HARD_SECTOR, GDROM_DMA_LENGTH_REG);
 		ctrl_outl(1, GDROM_DMA_DIRECTION_REG);
@@ -632,39 +632,35 @@
 		* before handling ending the request */
 		spin_lock(&gdrom_lock);
 		list_del_init(&req->queuelist);
-		__blk_end_request(req, err, blk_rq_bytes(req));
+		__blk_end_request_all(req, err);
 	}
 	spin_unlock(&gdrom_lock);
 	kfree(read_command);
 }
 
-static void gdrom_request_handler_dma(struct request *req)
-{
-	/* dequeue, add to list of deferred work
-	* and then schedule workqueue */
-	blkdev_dequeue_request(req);
-	list_add_tail(&req->queuelist, &gdrom_deferred);
-	schedule_work(&work);
-}
-
 static void gdrom_request(struct request_queue *rq)
 {
 	struct request *req;
 
-	while ((req = elv_next_request(rq)) != NULL) {
+	while ((req = blk_fetch_request(rq)) != NULL) {
 		if (!blk_fs_request(req)) {
 			printk(KERN_DEBUG "GDROM: Non-fs request ignored\n");
-			end_request(req, 0);
+			__blk_end_request_all(req, -EIO);
+			continue;
 		}
 		if (rq_data_dir(req) != READ) {
 			printk(KERN_NOTICE "GDROM: Read only device -");
 			printk(" write request ignored\n");
-			end_request(req, 0);
+			__blk_end_request_all(req, -EIO);
+			continue;
 		}
-		if (req->nr_sectors)
-			gdrom_request_handler_dma(req);
-		else
-			end_request(req, 0);
+
+		/*
+		 * Add to list of deferred work and then schedule
+		 * workqueue.
+		 */
+		list_add_tail(&req->queuelist, &gdrom_deferred);
+		schedule_work(&work);
 	}
 }
 
@@ -743,7 +739,7 @@
 
 static int __devinit probe_gdrom_setupqueue(void)
 {
-	blk_queue_hardsect_size(gd.gdrom_rq, GDROM_HARD_SECTOR);
+	blk_queue_logical_block_size(gd.gdrom_rq, GDROM_HARD_SECTOR);
 	/* using DMA so memory will need to be contiguous */
 	blk_queue_max_hw_segments(gd.gdrom_rq, 1);
 	/* set a large max size to get most from DMA */
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index 9b1624e..0fff646 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -282,7 +282,7 @@
 			viopath_targetinst(viopath_hostLp),
 			(u64)req, VIOVERSION << 16,
 			((u64)DEVICE_NR(diskinfo) << 48) | dmaaddr,
-			(u64)req->sector * 512, len, 0);
+			(u64)blk_rq_pos(req) * 512, len, 0);
 	if (hvrc != HvLpEvent_Rc_Good) {
 		printk(VIOCD_KERN_WARNING "hv error on op %d\n", (int)hvrc);
 		return -1;
@@ -291,36 +291,19 @@
 	return 0;
 }
 
-static void viocd_end_request(struct request *req, int error)
-{
-	int nsectors = req->hard_nr_sectors;
-
-	/*
-	 * Make sure it's fully ended, and ensure that we process
-	 * at least one sector.
-	 */
-	if (blk_pc_request(req))
-		nsectors = (req->data_len + 511) >> 9;
-	if (!nsectors)
-		nsectors = 1;
-
-	if (__blk_end_request(req, error, nsectors << 9))
-		BUG();
-}
-
 static int rwreq;
 
 static void do_viocd_request(struct request_queue *q)
 {
 	struct request *req;
 
-	while ((rwreq == 0) && ((req = elv_next_request(q)) != NULL)) {
+	while ((rwreq == 0) && ((req = blk_fetch_request(q)) != NULL)) {
 		if (!blk_fs_request(req))
-			viocd_end_request(req, -EIO);
+			__blk_end_request_all(req, -EIO);
 		else if (send_request(req) < 0) {
 			printk(VIOCD_KERN_WARNING
 					"unable to send message to OS/400!");
-			viocd_end_request(req, -EIO);
+			__blk_end_request_all(req, -EIO);
 		} else
 			rwreq++;
 	}
@@ -486,8 +469,8 @@
 	case viocdopen:
 		if (event->xRc == 0) {
 			di = &viocd_diskinfo[bevent->disk];
-			blk_queue_hardsect_size(di->viocd_disk->queue,
-					bevent->block_size);
+			blk_queue_logical_block_size(di->viocd_disk->queue,
+						     bevent->block_size);
 			set_capacity(di->viocd_disk,
 					bevent->media_size *
 					bevent->block_size / 512);
@@ -531,9 +514,9 @@
 					"with rc %d:0x%04X: %s\n",
 					req, event->xRc,
 					bevent->sub_result, err->msg);
-			viocd_end_request(req, -EIO);
+			__blk_end_request_all(req, -EIO);
 		} else
-			viocd_end_request(req, 0);
+			__blk_end_request_all(req, 0);
 
 		/* restart handling of incoming requests */
 		spin_unlock_irqrestore(&viocd_reqlock, flags);
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 735bbe2..b1e9652 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -97,6 +97,19 @@
 	  kind of kernel debugging operations.
 	  When in doubt, say "N".
 
+config BFIN_JTAG_COMM
+	tristate "Blackfin JTAG Communication"
+	depends on BLACKFIN
+	help
+	  Add support for emulating a TTY device over the Blackfin JTAG.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called bfin_jtag_comm.
+
+config BFIN_JTAG_COMM_CONSOLE
+	bool "Console on Blackfin JTAG"
+	depends on BFIN_JTAG_COMM=y
+
 config SERIAL_NONSTANDARD
 	bool "Non-standard serial port support"
 	depends on HAS_IOMEM
@@ -893,7 +906,7 @@
 
 config XILINX_HWICAP
 	tristate "Xilinx HWICAP Support"
-	depends on XILINX_VIRTEX
+	depends on XILINX_VIRTEX || MICROBLAZE
 	help
 	  This option enables support for Xilinx Internal Configuration
 	  Access Port (ICAP) driver.  The ICAP is used on Xilinx Virtex
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 9caf5b5..189efcf 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -13,6 +13,7 @@
 obj-$(CONFIG_UNIX98_PTYS)	+= pty.o
 obj-y				+= misc.o
 obj-$(CONFIG_VT)		+= vt_ioctl.o vc_screen.o selection.o keyboard.o
+obj-$(CONFIG_BFIN_JTAG_COMM)	+= bfin_jtag_comm.o
 obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o
 obj-$(CONFIG_HW_CONSOLE)	+= vt.o defkeymap.o
 obj-$(CONFIG_AUDIT)		+= tty_audit.o
diff --git a/drivers/char/bfin_jtag_comm.c b/drivers/char/bfin_jtag_comm.c
new file mode 100644
index 0000000..44c113d
--- /dev/null
+++ b/drivers/char/bfin_jtag_comm.c
@@ -0,0 +1,365 @@
+/*
+ * TTY over Blackfin JTAG Communication
+ *
+ * Copyright 2008-2009 Analog Devices Inc.
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/circ_buf.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+#include <asm/atomic.h>
+
+/* See the Debug/Emulation chapter in the HRM */
+#define EMUDOF   0x00000001	/* EMUDAT_OUT full & valid */
+#define EMUDIF   0x00000002	/* EMUDAT_IN full & valid */
+#define EMUDOOVF 0x00000004	/* EMUDAT_OUT overflow */
+#define EMUDIOVF 0x00000008	/* EMUDAT_IN overflow */
+
+#define DRV_NAME "bfin-jtag-comm"
+#define DEV_NAME "ttyBFJC"
+
+#define pr_init(fmt, args...) ({ static const __initdata char __fmt[] = fmt; printk(__fmt, ## args); })
+#define debug(fmt, args...) pr_debug(DRV_NAME ": " fmt, ## args)
+
+static inline uint32_t bfin_write_emudat(uint32_t emudat)
+{
+	__asm__ __volatile__("emudat = %0;" : : "d"(emudat));
+	return emudat;
+}
+
+static inline uint32_t bfin_read_emudat(void)
+{
+	uint32_t emudat;
+	__asm__ __volatile__("%0 = emudat;" : "=d"(emudat));
+	return emudat;
+}
+
+static inline uint32_t bfin_write_emudat_chars(char a, char b, char c, char d)
+{
+	return bfin_write_emudat((a << 0) | (b << 8) | (c << 16) | (d << 24));
+}
+
+#define CIRC_SIZE 2048	/* see comment in tty_io.c:do_tty_write() */
+#define CIRC_MASK (CIRC_SIZE - 1)
+#define circ_empty(circ)     ((circ)->head == (circ)->tail)
+#define circ_free(circ)      CIRC_SPACE((circ)->head, (circ)->tail, CIRC_SIZE)
+#define circ_cnt(circ)       CIRC_CNT((circ)->head, (circ)->tail, CIRC_SIZE)
+#define circ_byte(circ, idx) ((circ)->buf[(idx) & CIRC_MASK])
+
+static struct tty_driver *bfin_jc_driver;
+static struct task_struct *bfin_jc_kthread;
+static struct tty_struct * volatile bfin_jc_tty;
+static unsigned long bfin_jc_count;
+static DEFINE_MUTEX(bfin_jc_tty_mutex);
+static volatile struct circ_buf bfin_jc_write_buf;
+
+static int
+bfin_jc_emudat_manager(void *arg)
+{
+	uint32_t inbound_len = 0, outbound_len = 0;
+
+	while (!kthread_should_stop()) {
+		/* no one left to give data to, so sleep */
+		if (bfin_jc_tty == NULL && circ_empty(&bfin_jc_write_buf)) {
+			debug("waiting for readers\n");
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule();
+			__set_current_state(TASK_RUNNING);
+		}
+
+		/* no data available, so just chill */
+		if (!(bfin_read_DBGSTAT() & EMUDIF) && circ_empty(&bfin_jc_write_buf)) {
+			debug("waiting for data (in_len = %i) (circ: %i %i)\n",
+				inbound_len, bfin_jc_write_buf.tail, bfin_jc_write_buf.head);
+			if (inbound_len)
+				schedule();
+			else
+				schedule_timeout_interruptible(HZ);
+			continue;
+		}
+
+		/* if incoming data is ready, eat it */
+		if (bfin_read_DBGSTAT() & EMUDIF) {
+			struct tty_struct *tty;
+			mutex_lock(&bfin_jc_tty_mutex);
+			tty = (struct tty_struct *)bfin_jc_tty;
+			if (tty != NULL) {
+				uint32_t emudat = bfin_read_emudat();
+				if (inbound_len == 0) {
+					debug("incoming length: 0x%08x\n", emudat);
+					inbound_len = emudat;
+				} else {
+					size_t num_chars = (4 <= inbound_len ? 4 : inbound_len);
+					debug("  incoming data: 0x%08x (pushing %zu)\n", emudat, num_chars);
+					inbound_len -= num_chars;
+					tty_insert_flip_string(tty, (unsigned char *)&emudat, num_chars);
+					tty_flip_buffer_push(tty);
+				}
+			}
+			mutex_unlock(&bfin_jc_tty_mutex);
+		}
+
+		/* if outgoing data is ready, post it */
+		if (!(bfin_read_DBGSTAT() & EMUDOF) && !circ_empty(&bfin_jc_write_buf)) {
+			if (outbound_len == 0) {
+				outbound_len = circ_cnt(&bfin_jc_write_buf);
+				bfin_write_emudat(outbound_len);
+				debug("outgoing length: 0x%08x\n", outbound_len);
+			} else {
+				struct tty_struct *tty;
+				int tail = bfin_jc_write_buf.tail;
+				size_t ate = (4 <= outbound_len ? 4 : outbound_len);
+				uint32_t emudat =
+				bfin_write_emudat_chars(
+					circ_byte(&bfin_jc_write_buf, tail + 0),
+					circ_byte(&bfin_jc_write_buf, tail + 1),
+					circ_byte(&bfin_jc_write_buf, tail + 2),
+					circ_byte(&bfin_jc_write_buf, tail + 3)
+				);
+				bfin_jc_write_buf.tail += ate;
+				outbound_len -= ate;
+				mutex_lock(&bfin_jc_tty_mutex);
+				tty = (struct tty_struct *)bfin_jc_tty;
+				if (tty)
+					tty_wakeup(tty);
+				mutex_unlock(&bfin_jc_tty_mutex);
+				debug("  outgoing data: 0x%08x (pushing %zu)\n", emudat, ate);
+			}
+		}
+	}
+
+	__set_current_state(TASK_RUNNING);
+	return 0;
+}
+
+static int
+bfin_jc_open(struct tty_struct *tty, struct file *filp)
+{
+	mutex_lock(&bfin_jc_tty_mutex);
+	debug("open %lu\n", bfin_jc_count);
+	++bfin_jc_count;
+	bfin_jc_tty = tty;
+	wake_up_process(bfin_jc_kthread);
+	mutex_unlock(&bfin_jc_tty_mutex);
+	return 0;
+}
+
+static void
+bfin_jc_close(struct tty_struct *tty, struct file *filp)
+{
+	mutex_lock(&bfin_jc_tty_mutex);
+	debug("close %lu\n", bfin_jc_count);
+	if (--bfin_jc_count == 0)
+		bfin_jc_tty = NULL;
+	wake_up_process(bfin_jc_kthread);
+	mutex_unlock(&bfin_jc_tty_mutex);
+}
+
+/* XXX: we dont handle the put_char() case where we must handle count = 1 */
+static int
+bfin_jc_circ_write(const unsigned char *buf, int count)
+{
+	int i;
+	count = min(count, circ_free(&bfin_jc_write_buf));
+	debug("going to write chunk of %i bytes\n", count);
+	for (i = 0; i < count; ++i)
+		circ_byte(&bfin_jc_write_buf, bfin_jc_write_buf.head + i) = buf[i];
+	bfin_jc_write_buf.head += i;
+	return i;
+}
+
+#ifndef CONFIG_BFIN_JTAG_COMM_CONSOLE
+# define acquire_console_sem()
+# define release_console_sem()
+#endif
+static int
+bfin_jc_write(struct tty_struct *tty, const unsigned char *buf, int count)
+{
+	int i;
+	acquire_console_sem();
+	i = bfin_jc_circ_write(buf, count);
+	release_console_sem();
+	wake_up_process(bfin_jc_kthread);
+	return i;
+}
+
+static void
+bfin_jc_flush_chars(struct tty_struct *tty)
+{
+	wake_up_process(bfin_jc_kthread);
+}
+
+static int
+bfin_jc_write_room(struct tty_struct *tty)
+{
+	return circ_free(&bfin_jc_write_buf);
+}
+
+static int
+bfin_jc_chars_in_buffer(struct tty_struct *tty)
+{
+	return circ_cnt(&bfin_jc_write_buf);
+}
+
+static void
+bfin_jc_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+	unsigned long expire = jiffies + timeout;
+	while (!circ_empty(&bfin_jc_write_buf)) {
+		if (signal_pending(current))
+			break;
+		if (time_after(jiffies, expire))
+			break;
+	}
+}
+
+static struct tty_operations bfin_jc_ops = {
+	.open            = bfin_jc_open,
+	.close           = bfin_jc_close,
+	.write           = bfin_jc_write,
+	/*.put_char        = bfin_jc_put_char,*/
+	.flush_chars     = bfin_jc_flush_chars,
+	.write_room      = bfin_jc_write_room,
+	.chars_in_buffer = bfin_jc_chars_in_buffer,
+	.wait_until_sent = bfin_jc_wait_until_sent,
+};
+
+static int __init bfin_jc_init(void)
+{
+	int ret;
+
+	bfin_jc_kthread = kthread_create(bfin_jc_emudat_manager, NULL, DRV_NAME);
+	if (IS_ERR(bfin_jc_kthread))
+		return PTR_ERR(bfin_jc_kthread);
+
+	ret = -ENOMEM;
+
+	bfin_jc_write_buf.head = bfin_jc_write_buf.tail = 0;
+	bfin_jc_write_buf.buf = kmalloc(CIRC_SIZE, GFP_KERNEL);
+	if (!bfin_jc_write_buf.buf)
+		goto err;
+
+	bfin_jc_driver = alloc_tty_driver(1);
+	if (!bfin_jc_driver)
+		goto err;
+
+	bfin_jc_driver->owner        = THIS_MODULE;
+	bfin_jc_driver->driver_name  = DRV_NAME;
+	bfin_jc_driver->name         = DEV_NAME;
+	bfin_jc_driver->type         = TTY_DRIVER_TYPE_SERIAL;
+	bfin_jc_driver->subtype      = SERIAL_TYPE_NORMAL;
+	bfin_jc_driver->init_termios = tty_std_termios;
+	tty_set_operations(bfin_jc_driver, &bfin_jc_ops);
+
+	ret = tty_register_driver(bfin_jc_driver);
+	if (ret)
+		goto err;
+
+	pr_init(KERN_INFO DRV_NAME ": initialized\n");
+
+	return 0;
+
+ err:
+	put_tty_driver(bfin_jc_driver);
+	kfree(bfin_jc_write_buf.buf);
+	kthread_stop(bfin_jc_kthread);
+	return ret;
+}
+module_init(bfin_jc_init);
+
+static void __exit bfin_jc_exit(void)
+{
+	kthread_stop(bfin_jc_kthread);
+	kfree(bfin_jc_write_buf.buf);
+	tty_unregister_driver(bfin_jc_driver);
+	put_tty_driver(bfin_jc_driver);
+}
+module_exit(bfin_jc_exit);
+
+#if defined(CONFIG_BFIN_JTAG_COMM_CONSOLE) || defined(CONFIG_EARLY_PRINTK)
+static void
+bfin_jc_straight_buffer_write(const char *buf, unsigned count)
+{
+	unsigned ate = 0;
+	while (bfin_read_DBGSTAT() & EMUDOF)
+		continue;
+	bfin_write_emudat(count);
+	while (ate < count) {
+		while (bfin_read_DBGSTAT() & EMUDOF)
+			continue;
+		bfin_write_emudat_chars(buf[ate], buf[ate+1], buf[ate+2], buf[ate+3]);
+		ate += 4;
+	}
+}
+#endif
+
+#ifdef CONFIG_BFIN_JTAG_COMM_CONSOLE
+static void
+bfin_jc_console_write(struct console *co, const char *buf, unsigned count)
+{
+	if (bfin_jc_kthread == NULL)
+		bfin_jc_straight_buffer_write(buf, count);
+	else
+		bfin_jc_circ_write(buf, count);
+}
+
+static struct tty_driver *
+bfin_jc_console_device(struct console *co, int *index)
+{
+	*index = co->index;
+	return bfin_jc_driver;
+}
+
+static struct console bfin_jc_console = {
+	.name    = DEV_NAME,
+	.write   = bfin_jc_console_write,
+	.device  = bfin_jc_console_device,
+	.flags   = CON_ANYTIME | CON_PRINTBUFFER,
+	.index   = -1,
+};
+
+static int __init bfin_jc_console_init(void)
+{
+	register_console(&bfin_jc_console);
+	return 0;
+}
+console_initcall(bfin_jc_console_init);
+#endif
+
+#ifdef CONFIG_EARLY_PRINTK
+static void __init
+bfin_jc_early_write(struct console *co, const char *buf, unsigned int count)
+{
+	bfin_jc_straight_buffer_write(buf, count);
+}
+
+static struct __initdata console bfin_jc_early_console = {
+	.name   = "early_BFJC",
+	.write   = bfin_jc_early_write,
+	.flags   = CON_ANYTIME | CON_PRINTBUFFER,
+	.index   = -1,
+};
+
+struct console * __init
+bfin_jc_early_init(unsigned int port, unsigned int cflag)
+{
+	return &bfin_jc_early_console;
+}
+#endif
+
+MODULE_AUTHOR("Mike Frysinger <vapier@gentoo.org>");
+MODULE_DESCRIPTION("TTY over Blackfin JTAG Communication");
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 1fdb9f6..f3366d3 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -604,7 +604,6 @@
 
 #define NR_PORTS	256
 
-#define ZE_V1_NPORTS	64
 #define ZO_V1	0
 #define ZO_V2	1
 #define ZE_V1	2
@@ -663,18 +662,6 @@
 static void cy_throttle(struct tty_struct *tty);
 static void cy_send_xchar(struct tty_struct *tty, char ch);
 
-#define IS_CYC_Z(card) ((card).num_chips == (unsigned int)-1)
-
-#define Z_FPGA_CHECK(card) \
-	((readl(&((struct RUNTIME_9060 __iomem *) \
-		((card).ctl_addr))->init_ctrl) & (1<<17)) != 0)
-
-#define ISZLOADED(card)	(((ZO_V1 == readl(&((struct RUNTIME_9060 __iomem *) \
-			((card).ctl_addr))->mail_box_0)) || \
-			Z_FPGA_CHECK(card)) && \
-			(ZFIRM_ID == readl(&((struct FIRM_ID __iomem *) \
-			((card).base_addr+ID_ADDRESS))->signature)))
-
 #ifndef SERIAL_XMIT_SIZE
 #define	SERIAL_XMIT_SIZE	(min(PAGE_SIZE, 4096))
 #endif
@@ -687,8 +674,6 @@
 #define DRIVER_VERSION	0x02010203
 #define RAM_SIZE 0x80000
 
-#define Z_FPGA_LOADED(X)	((readl(&(X)->init_ctrl) & (1<<17)) != 0)
-
 enum zblock_type {
 	ZBLOCK_PRG = 0,
 	ZBLOCK_FPGA = 1
@@ -883,6 +868,29 @@
 static struct timer_list cyz_rx_full_timer[NR_PORTS];
 #endif				/* CONFIG_CYZ_INTR */
 
+static inline bool cy_is_Z(struct cyclades_card *card)
+{
+	return card->num_chips == (unsigned int)-1;
+}
+
+static inline bool __cyz_fpga_loaded(struct RUNTIME_9060 __iomem *ctl_addr)
+{
+	return readl(&ctl_addr->init_ctrl) & (1 << 17);
+}
+
+static inline bool cyz_fpga_loaded(struct cyclades_card *card)
+{
+	return __cyz_fpga_loaded(card->ctl_addr.p9060);
+}
+
+static inline bool cyz_is_loaded(struct cyclades_card *card)
+{
+	struct FIRM_ID __iomem *fw_id = card->base_addr + ID_ADDRESS;
+
+	return (card->hw_ver == ZO_V1 || cyz_fpga_loaded(card)) &&
+			readl(&fw_id->signature) == ZFIRM_ID;
+}
+
 static inline int serial_paranoia_check(struct cyclades_port *info,
 		char *name, const char *routine)
 {
@@ -1395,19 +1403,15 @@
 	unsigned long loc_doorbell;
 
 	firm_id = cinfo->base_addr + ID_ADDRESS;
-	if (!ISZLOADED(*cinfo))
-		return -1;
 	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
 	board_ctrl = &zfw_ctrl->board_ctrl;
 
-	loc_doorbell = readl(&((struct RUNTIME_9060 __iomem *)
-				  (cinfo->ctl_addr))->loc_doorbell);
+	loc_doorbell = readl(&cinfo->ctl_addr.p9060->loc_doorbell);
 	if (loc_doorbell) {
 		*cmd = (char)(0xff & loc_doorbell);
 		*channel = readl(&board_ctrl->fwcmd_channel);
 		*param = (__u32) readl(&board_ctrl->fwcmd_param);
-		cy_writel(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->
-			  loc_doorbell, 0xffffffff);
+		cy_writel(&cinfo->ctl_addr.p9060->loc_doorbell, 0xffffffff);
 		return 1;
 	}
 	return 0;
@@ -1424,15 +1428,14 @@
 	unsigned int index;
 
 	firm_id = cinfo->base_addr + ID_ADDRESS;
-	if (!ISZLOADED(*cinfo))
+	if (!cyz_is_loaded(cinfo))
 		return -1;
 
 	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
 	board_ctrl = &zfw_ctrl->board_ctrl;
 
 	index = 0;
-	pci_doorbell =
-	    &((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->pci_doorbell;
+	pci_doorbell = &cinfo->ctl_addr.p9060->pci_doorbell;
 	while ((readl(pci_doorbell) & 0xff) != 0) {
 		if (index++ == 1000)
 			return (int)(readl(pci_doorbell) & 0xff);
@@ -1624,10 +1627,8 @@
 	static struct BOARD_CTRL __iomem *board_ctrl;
 	static struct CH_CTRL __iomem *ch_ctrl;
 	static struct BUF_CTRL __iomem *buf_ctrl;
-	__u32 channel;
+	__u32 channel, param, fw_ver;
 	__u8 cmd;
-	__u32 param;
-	__u32 hw_ver, fw_ver;
 	int special_count;
 	int delta_count;
 
@@ -1635,8 +1636,6 @@
 	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
 	board_ctrl = &zfw_ctrl->board_ctrl;
 	fw_ver = readl(&board_ctrl->fw_version);
-	hw_ver = readl(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->
-			mail_box_0);
 
 	while (cyz_fetch_msg(cinfo, &channel, &cmd, &param) == 1) {
 		special_count = 0;
@@ -1737,15 +1736,7 @@
 {
 	struct cyclades_card *cinfo = dev_id;
 
-	if (unlikely(cinfo == NULL)) {
-#ifdef CY_DEBUG_INTERRUPTS
-		printk(KERN_DEBUG "cyz_interrupt: spurious interrupt %d\n",
-									irq);
-#endif
-		return IRQ_NONE;	/* spurious interrupt */
-	}
-
-	if (unlikely(!ISZLOADED(*cinfo))) {
+	if (unlikely(!cyz_is_loaded(cinfo))) {
 #ifdef CY_DEBUG_INTERRUPTS
 		printk(KERN_DEBUG "cyz_interrupt: board not yet loaded "
 				"(IRQ%d).\n", irq);
@@ -1785,7 +1776,6 @@
 	struct tty_struct *tty;
 	struct FIRM_ID __iomem *firm_id;
 	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct BOARD_CTRL __iomem *board_ctrl;
 	struct BUF_CTRL __iomem *buf_ctrl;
 	unsigned long expires = jiffies + HZ;
 	unsigned int port, card;
@@ -1793,19 +1783,17 @@
 	for (card = 0; card < NR_CARDS; card++) {
 		cinfo = &cy_card[card];
 
-		if (!IS_CYC_Z(*cinfo))
+		if (!cy_is_Z(cinfo))
 			continue;
-		if (!ISZLOADED(*cinfo))
+		if (!cyz_is_loaded(cinfo))
 			continue;
 
 		firm_id = cinfo->base_addr + ID_ADDRESS;
 		zfw_ctrl = cinfo->base_addr +
 				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-		board_ctrl = &(zfw_ctrl->board_ctrl);
 
 	/* Skip first polling cycle to avoid racing conditions with the FW */
 		if (!cinfo->intr_enabled) {
-			cinfo->nports = (int)readl(&board_ctrl->n_channel);
 			cinfo->intr_enabled = 1;
 			continue;
 		}
@@ -1874,7 +1862,7 @@
 
 	set_line_char(info);
 
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -1931,7 +1919,7 @@
 		base_addr = card->base_addr;
 
 		firm_id = base_addr + ID_ADDRESS;
-		if (!ISZLOADED(*card))
+		if (!cyz_is_loaded(card))
 			return -ENODEV;
 
 		zfw_ctrl = card->base_addr +
@@ -2026,7 +2014,7 @@
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -2070,7 +2058,7 @@
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -2126,7 +2114,7 @@
 #endif
 
 		firm_id = base_addr + ID_ADDRESS;
-		if (!ISZLOADED(*card))
+		if (!cyz_is_loaded(card))
 			return;
 
 		zfw_ctrl = card->base_addr +
@@ -2233,7 +2221,7 @@
 #endif
 	info->port.blocked_open++;
 
-	if (!IS_CYC_Z(*cinfo)) {
+	if (!cy_is_Z(cinfo)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = cinfo->bus_index;
@@ -2296,7 +2284,7 @@
 
 		base_addr = cinfo->base_addr;
 		firm_id = base_addr + ID_ADDRESS;
-		if (!ISZLOADED(*cinfo)) {
+		if (!cyz_is_loaded(cinfo)) {
 			__set_current_state(TASK_RUNNING);
 			remove_wait_queue(&info->port.open_wait, &wait);
 			return -EINVAL;
@@ -2397,16 +2385,14 @@
 	   treat it as absent from the system.  This
 	   will make the user pay attention.
 	 */
-	if (IS_CYC_Z(*info->card)) {
+	if (cy_is_Z(info->card)) {
 		struct cyclades_card *cinfo = info->card;
 		struct FIRM_ID __iomem *firm_id = cinfo->base_addr + ID_ADDRESS;
 
-		if (!ISZLOADED(*cinfo)) {
-			if (((ZE_V1 == readl(&((struct RUNTIME_9060 __iomem *)
-					 (cinfo->ctl_addr))->mail_box_0)) &&
-					Z_FPGA_CHECK(*cinfo)) &&
-					(ZFIRM_HLT == readl(
-						&firm_id->signature))) {
+		if (!cyz_is_loaded(cinfo)) {
+			if (cinfo->hw_ver == ZE_V1 && cyz_fpga_loaded(cinfo) &&
+					readl(&firm_id->signature) ==
+					ZFIRM_HLT) {
 				printk(KERN_ERR "cyc:Cyclades-Z Error: you "
 					"need an external power supply for "
 					"this number of ports.\nFirmware "
@@ -2423,18 +2409,13 @@
 		   interrupts should be enabled as soon as the first open
 		   happens to one of its ports. */
 			if (!cinfo->intr_enabled) {
-				struct ZFW_CTRL __iomem *zfw_ctrl;
-				struct BOARD_CTRL __iomem *board_ctrl;
-
-				zfw_ctrl = cinfo->base_addr +
-					(readl(&firm_id->zfwctrl_addr) &
-					 0xfffff);
-
-				board_ctrl = &zfw_ctrl->board_ctrl;
+				u16 intr;
 
 				/* Enable interrupts on the PLX chip */
-				cy_writew(cinfo->ctl_addr + 0x68,
-					readw(cinfo->ctl_addr + 0x68) | 0x0900);
+				intr = readw(&cinfo->ctl_addr.p9060->
+						intr_ctrl_stat) | 0x0900;
+				cy_writew(&cinfo->ctl_addr.p9060->
+						intr_ctrl_stat, intr);
 				/* Enable interrupts on the FW */
 				retval = cyz_issue_cmd(cinfo, 0,
 						C_CM_IRQ_ENBL, 0L);
@@ -2442,8 +2423,6 @@
 					printk(KERN_ERR "cyc:IRQ enable retval "
 						"was %x\n", retval);
 				}
-				cinfo->nports =
-					(int)readl(&board_ctrl->n_channel);
 				cinfo->intr_enabled = 1;
 			}
 		}
@@ -2556,7 +2535,7 @@
 #endif
 	card = info->card;
 	channel = (info->line) - (card->first_line);
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -2601,7 +2580,7 @@
 	info->xmit_cnt = info->xmit_head = info->xmit_tail = 0;
 	spin_unlock_irqrestore(&card->card_lock, flags);
 
-	if (IS_CYC_Z(*card)) {	/* If it is a Z card, flush the on-board
+	if (cy_is_Z(card)) {	/* If it is a Z card, flush the on-board
 					   buffers as well */
 		spin_lock_irqsave(&card->card_lock, flags);
 		retval = cyz_issue_cmd(card, channel, C_CM_FLUSH_TX, 0L);
@@ -2682,7 +2661,7 @@
 
 	spin_lock_irqsave(&card->card_lock, flags);
 
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		int channel = info->line - card->first_line;
 		int index = card->bus_index;
 		void __iomem *base_addr = card->base_addr +
@@ -2902,7 +2881,7 @@
 	channel = (info->line) - (card->first_line);
 
 #ifdef Z_EXT_CHARS_IN_BUFFER
-	if (!IS_CYC_Z(cy_card[card])) {
+	if (!cy_is_Z(card)) {
 #endif				/* Z_EXT_CHARS_IN_BUFFER */
 #ifdef CY_DEBUG_IO
 		printk(KERN_DEBUG "cyc:cy_chars_in_buffer ttyC%d %d\n",
@@ -2984,7 +2963,6 @@
 	void __iomem *base_addr;
 	int chip, channel, index;
 	unsigned cflag, iflag;
-	unsigned short chip_number;
 	int baud, baud_rate = 0;
 	int i;
 
@@ -3013,9 +2991,8 @@
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	chip_number = channel / 4;
 
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 
 		index = card->bus_index;
 
@@ -3233,21 +3210,17 @@
 	} else {
 		struct FIRM_ID __iomem *firm_id;
 		struct ZFW_CTRL __iomem *zfw_ctrl;
-		struct BOARD_CTRL __iomem *board_ctrl;
 		struct CH_CTRL __iomem *ch_ctrl;
-		struct BUF_CTRL __iomem *buf_ctrl;
 		__u32 sw_flow;
 		int retval;
 
 		firm_id = card->base_addr + ID_ADDRESS;
-		if (!ISZLOADED(*card))
+		if (!cyz_is_loaded(card))
 			return;
 
 		zfw_ctrl = card->base_addr +
 			(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-		board_ctrl = &zfw_ctrl->board_ctrl;
 		ch_ctrl = &(zfw_ctrl->ch_ctrl[channel]);
-		buf_ctrl = &zfw_ctrl->buf_ctrl[channel];
 
 		/* baud rate */
 		baud = tty_get_baud_rate(info->port.tty);
@@ -3457,7 +3430,7 @@
 
 	card = info->card;
 	channel = (info->line) - (card->first_line);
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3497,7 +3470,7 @@
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3523,7 +3496,7 @@
 	} else {
 		base_addr = card->base_addr;
 		firm_id = card->base_addr + ID_ADDRESS;
-		if (ISZLOADED(*card)) {
+		if (cyz_is_loaded(card)) {
 			zfw_ctrl = card->base_addr +
 				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
 			board_ctrl = &zfw_ctrl->board_ctrl;
@@ -3566,7 +3539,7 @@
 
 	card = info->card;
 	channel = (info->line) - (card->first_line);
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3641,7 +3614,7 @@
 		base_addr = card->base_addr;
 
 		firm_id = card->base_addr + ID_ADDRESS;
-		if (ISZLOADED(*card)) {
+		if (cyz_is_loaded(card)) {
 			zfw_ctrl = card->base_addr +
 				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
 			board_ctrl = &zfw_ctrl->board_ctrl;
@@ -3713,7 +3686,7 @@
 	card = info->card;
 
 	spin_lock_irqsave(&card->card_lock, flags);
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		/* Let the transmit ISR take care of this (since it
 		   requires stuffing characters into the output stream).
 		 */
@@ -3782,7 +3755,7 @@
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3810,7 +3783,7 @@
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3844,7 +3817,7 @@
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3867,7 +3840,7 @@
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -4121,7 +4094,7 @@
 	card = info->card;
 	channel = info->line - card->first_line;
 
-	if (IS_CYC_Z(*card)) {
+	if (cy_is_Z(card)) {
 		if (ch == STOP_CHAR(tty))
 			cyz_issue_cmd(card, channel, C_CM_SENDXOFF, 0L);
 		else if (ch == START_CHAR(tty))
@@ -4154,7 +4127,7 @@
 	card = info->card;
 
 	if (I_IXOFF(tty)) {
-		if (!IS_CYC_Z(*card))
+		if (!cy_is_Z(card))
 			cy_send_xchar(tty, STOP_CHAR(tty));
 		else
 			info->throttle = 1;
@@ -4162,7 +4135,7 @@
 
 	if (tty->termios->c_cflag & CRTSCTS) {
 		channel = info->line - card->first_line;
-		if (!IS_CYC_Z(*card)) {
+		if (!cy_is_Z(card)) {
 			chip = channel >> 2;
 			channel &= 0x03;
 			index = card->bus_index;
@@ -4219,7 +4192,7 @@
 	if (tty->termios->c_cflag & CRTSCTS) {
 		card = info->card;
 		channel = info->line - card->first_line;
-		if (!IS_CYC_Z(*card)) {
+		if (!cy_is_Z(card)) {
 			chip = channel >> 2;
 			channel &= 0x03;
 			index = card->bus_index;
@@ -4263,7 +4236,7 @@
 
 	cinfo = info->card;
 	channel = info->line - cinfo->first_line;
-	if (!IS_CYC_Z(*cinfo)) {
+	if (!cy_is_Z(cinfo)) {
 		index = cinfo->bus_index;
 		chip = channel >> 2;
 		channel &= 0x03;
@@ -4296,7 +4269,7 @@
 	cinfo = info->card;
 	channel = info->line - cinfo->first_line;
 	index = cinfo->bus_index;
-	if (!IS_CYC_Z(*cinfo)) {
+	if (!cy_is_Z(cinfo)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		base_addr = cinfo->base_addr + (cy_chip_offset[chip] << index);
@@ -4347,33 +4320,20 @@
 static int __devinit cy_init_card(struct cyclades_card *cinfo)
 {
 	struct cyclades_port *info;
-	u32 uninitialized_var(mailbox);
-	unsigned int nports, port;
+	unsigned int port;
 	unsigned short chip_number;
-	int uninitialized_var(index);
 
 	spin_lock_init(&cinfo->card_lock);
+	cinfo->intr_enabled = 0;
 
-	if (IS_CYC_Z(*cinfo)) {	/* Cyclades-Z */
-		mailbox = readl(&((struct RUNTIME_9060 __iomem *)
-				     cinfo->ctl_addr)->mail_box_0);
-		nports = (mailbox == ZE_V1) ? ZE_V1_NPORTS : 8;
-		cinfo->intr_enabled = 0;
-		cinfo->nports = 0;	/* Will be correctly set later, after
-					   Z FW is loaded */
-	} else {
-		index = cinfo->bus_index;
-		nports = cinfo->nports = CyPORTS_PER_CHIP * cinfo->num_chips;
-	}
-
-	cinfo->ports = kzalloc(sizeof(*cinfo->ports) * nports, GFP_KERNEL);
+	cinfo->ports = kcalloc(cinfo->nports, sizeof(*cinfo->ports),
+			GFP_KERNEL);
 	if (cinfo->ports == NULL) {
 		printk(KERN_ERR "Cyclades: cannot allocate ports\n");
-		cinfo->nports = 0;
 		return -ENOMEM;
 	}
 
-	for (port = cinfo->first_line; port < cinfo->first_line + nports;
+	for (port = cinfo->first_line; port < cinfo->first_line + cinfo->nports;
 			port++) {
 		info = &cinfo->ports[port - cinfo->first_line];
 		tty_port_init(&info->port);
@@ -4387,9 +4347,9 @@
 		init_completion(&info->shutdown_wait);
 		init_waitqueue_head(&info->delta_msr_wait);
 
-		if (IS_CYC_Z(*cinfo)) {
+		if (cy_is_Z(cinfo)) {
 			info->type = PORT_STARTECH;
-			if (mailbox == ZO_V1)
+			if (cinfo->hw_ver == ZO_V1)
 				info->xmit_fifo_size = CYZ_FIFO_SIZE;
 			else
 				info->xmit_fifo_size = 4 * CYZ_FIFO_SIZE;
@@ -4398,6 +4358,7 @@
 				cyz_rx_restart, (unsigned long)info);
 #endif
 		} else {
+			int index = cinfo->bus_index;
 			info->type = PORT_CIRRUS;
 			info->xmit_fifo_size = CyMAX_CHAR_FIFO;
 			info->cor1 = CyPARITY_NONE | Cy_1_STOP | Cy_8_BITS;
@@ -4430,7 +4391,7 @@
 	}
 
 #ifndef CONFIG_CYZ_INTR
-	if (IS_CYC_Z(*cinfo) && !timer_pending(&cyz_timerlist)) {
+	if (cy_is_Z(cinfo) && !timer_pending(&cyz_timerlist)) {
 		mod_timer(&cyz_timerlist, jiffies + 1);
 #ifdef CY_PCI_DEBUG
 		printk(KERN_DEBUG "Cyclades-Z polling initialized\n");
@@ -4621,11 +4582,12 @@
 
 		/* set cy_card */
 		cy_card[j].base_addr = cy_isa_address;
-		cy_card[j].ctl_addr = NULL;
+		cy_card[j].ctl_addr.p9050 = NULL;
 		cy_card[j].irq = (int)cy_isa_irq;
 		cy_card[j].bus_index = 0;
 		cy_card[j].first_line = cy_next_channel;
-		cy_card[j].num_chips = cy_isa_nchan / 4;
+		cy_card[j].num_chips = cy_isa_nchan / CyPORTS_PER_CHIP;
+		cy_card[j].nports = cy_isa_nchan;
 		if (cy_init_card(&cy_card[j])) {
 			cy_card[j].base_addr = NULL;
 			free_irq(cy_isa_irq, &cy_card[j]);
@@ -4781,7 +4743,7 @@
 	struct CUSTOM_REG __iomem *cust = base_addr;
 	struct ZFW_CTRL __iomem *pt_zfwctrl;
 	void __iomem *tmp;
-	u32 mailbox, status;
+	u32 mailbox, status, nchan;
 	unsigned int i;
 	int retval;
 
@@ -4793,7 +4755,7 @@
 
 	/* Check whether the firmware is already loaded and running. If
 	   positive, skip this board */
-	if (Z_FPGA_LOADED(ctl_addr) && readl(&fid->signature) == ZFIRM_ID) {
+	if (__cyz_fpga_loaded(ctl_addr) && readl(&fid->signature) == ZFIRM_ID) {
 		u32 cntval = readl(base_addr + 0x190);
 
 		udelay(100);
@@ -4812,7 +4774,7 @@
 
 	mailbox = readl(&ctl_addr->mail_box_0);
 
-	if (mailbox == 0 || Z_FPGA_LOADED(ctl_addr)) {
+	if (mailbox == 0 || __cyz_fpga_loaded(ctl_addr)) {
 		/* stops CPU and set window to beginning of RAM */
 		cy_writel(&ctl_addr->loc_addr_base, WIN_CREG);
 		cy_writel(&cust->cpu_stop, 0);
@@ -4828,7 +4790,7 @@
 				base_addr);
 		if (retval)
 			goto err_rel;
-		if (!Z_FPGA_LOADED(ctl_addr)) {
+		if (!__cyz_fpga_loaded(ctl_addr)) {
 			dev_err(&pdev->dev, "fw upload successful, but fw is "
 					"not loaded\n");
 			goto err_rel;
@@ -4887,7 +4849,7 @@
 				"system before loading the new FW to the "
 				"Cyclades-Z.\n");
 
-			if (Z_FPGA_LOADED(ctl_addr))
+			if (__cyz_fpga_loaded(ctl_addr))
 				plx_init(pdev, irq, ctl_addr);
 
 			retval = -EIO;
@@ -4902,16 +4864,16 @@
 			base_addr + ID_ADDRESS, readl(&fid->zfwctrl_addr),
 			base_addr + readl(&fid->zfwctrl_addr));
 
+	nchan = readl(&pt_zfwctrl->board_ctrl.n_channel);
 	dev_info(&pdev->dev, "Cyclades-Z FW loaded: version = %x, ports = %u\n",
-		readl(&pt_zfwctrl->board_ctrl.fw_version),
-		readl(&pt_zfwctrl->board_ctrl.n_channel));
+		readl(&pt_zfwctrl->board_ctrl.fw_version), nchan);
 
-	if (readl(&pt_zfwctrl->board_ctrl.n_channel) == 0) {
+	if (nchan == 0) {
 		dev_warn(&pdev->dev, "no Cyclades-Z ports were found. Please "
 			"check the connection between the Z host card and the "
 			"serial expanders.\n");
 
-		if (Z_FPGA_LOADED(ctl_addr))
+		if (__cyz_fpga_loaded(ctl_addr))
 			plx_init(pdev, irq, ctl_addr);
 
 		dev_info(&pdev->dev, "Null number of ports detected. Board "
@@ -4932,9 +4894,7 @@
 	cy_writel(&ctl_addr->intr_ctrl_stat, readl(&ctl_addr->intr_ctrl_stat) |
 			0x00030800UL);
 
-	plx_init(pdev, irq, ctl_addr);
-
-	return 0;
+	return nchan;
 err_rel:
 	release_firmware(fw);
 err:
@@ -4946,7 +4906,7 @@
 {
 	void __iomem *addr0 = NULL, *addr2 = NULL;
 	char *card_name = NULL;
-	u32 mailbox;
+	u32 uninitialized_var(mailbox);
 	unsigned int device_id, nchan = 0, card_no, i;
 	unsigned char plx_ver;
 	int retval, irq;
@@ -5023,11 +4983,12 @@
 		}
 
 		/* Disable interrupts on the PLX before resetting it */
-		cy_writew(addr0 + 0x68, readw(addr0 + 0x68) & ~0x0900);
+		cy_writew(&ctl_addr->intr_ctrl_stat,
+				readw(&ctl_addr->intr_ctrl_stat) & ~0x0900);
 
 		plx_init(pdev, irq, addr0);
 
-		mailbox = (u32)readl(&ctl_addr->mail_box_0);
+		mailbox = readl(&ctl_addr->mail_box_0);
 
 		addr2 = ioremap_nocache(pci_resource_start(pdev, 2),
 				mailbox == ZE_V1 ? CyPCI_Ze_win : CyPCI_Zwin);
@@ -5038,12 +4999,8 @@
 
 		if (mailbox == ZE_V1) {
 			card_name = "Cyclades-Ze";
-
-			readl(&ctl_addr->mail_box_0);
-			nchan = ZE_V1_NPORTS;
 		} else {
 			card_name = "Cyclades-8Zo";
-
 #ifdef CY_PCI_DEBUG
 			if (mailbox == ZO_V1) {
 				cy_writel(&ctl_addr->loc_addr_base, WIN_CREG);
@@ -5065,15 +5022,12 @@
 			 */
 			if ((mailbox == ZO_V1) || (mailbox == ZO_V2))
 				cy_writel(addr2 + ID_ADDRESS, 0L);
-
-			retval = cyz_load_fw(pdev, addr2, addr0, irq);
-			if (retval)
-				goto err_unmap;
-			/* This must be a Cyclades-8Zo/PCI.  The extendable
-			   version will have a different device_id and will
-			   be allocated its maximum number of ports. */
-			nchan = 8;
 		}
+
+		retval = cyz_load_fw(pdev, addr2, addr0, irq);
+		if (retval <= 0)
+			goto err_unmap;
+		nchan = retval;
 	}
 
 	if ((cy_next_channel + nchan) > NR_PORTS) {
@@ -5103,8 +5057,10 @@
 			dev_err(&pdev->dev, "could not allocate IRQ\n");
 			goto err_unmap;
 		}
-		cy_card[card_no].num_chips = nchan / 4;
+		cy_card[card_no].num_chips = nchan / CyPORTS_PER_CHIP;
 	} else {
+		cy_card[card_no].hw_ver = mailbox;
+		cy_card[card_no].num_chips = (unsigned int)-1;
 #ifdef CONFIG_CYZ_INTR
 		/* allocate IRQ only if board has an IRQ */
 		if (irq != 0 && irq != 255) {
@@ -5117,15 +5073,15 @@
 			}
 		}
 #endif				/* CONFIG_CYZ_INTR */
-		cy_card[card_no].num_chips = (unsigned int)-1;
 	}
 
 	/* set cy_card */
 	cy_card[card_no].base_addr = addr2;
-	cy_card[card_no].ctl_addr = addr0;
+	cy_card[card_no].ctl_addr.p9050 = addr0;
 	cy_card[card_no].irq = irq;
 	cy_card[card_no].bus_index = 1;
 	cy_card[card_no].first_line = cy_next_channel;
+	cy_card[card_no].nports = nchan;
 	retval = cy_init_card(&cy_card[card_no]);
 	if (retval)
 		goto err_null;
@@ -5138,17 +5094,20 @@
 		plx_ver = readb(addr2 + CyPLX_VER) & 0x0f;
 		switch (plx_ver) {
 		case PLX_9050:
-
 			cy_writeb(addr0 + 0x4c, 0x43);
 			break;
 
 		case PLX_9060:
 		case PLX_9080:
 		default:	/* Old boards, use PLX_9060 */
-			plx_init(pdev, irq, addr0);
-			cy_writew(addr0 + 0x68, readw(addr0 + 0x68) | 0x0900);
+		{
+			struct RUNTIME_9060 __iomem *ctl_addr = addr0;
+			plx_init(pdev, irq, ctl_addr);
+			cy_writew(&ctl_addr->intr_ctrl_stat,
+				readw(&ctl_addr->intr_ctrl_stat) | 0x0900);
 			break;
 		}
+		}
 	}
 
 	dev_info(&pdev->dev, "%s/PCI #%d found: %d channels starting from "
@@ -5179,22 +5138,23 @@
 	unsigned int i;
 
 	/* non-Z with old PLX */
-	if (!IS_CYC_Z(*cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) ==
+	if (!cy_is_Z(cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) ==
 			PLX_9050)
-		cy_writeb(cinfo->ctl_addr + 0x4c, 0);
+		cy_writeb(cinfo->ctl_addr.p9050 + 0x4c, 0);
 	else
 #ifndef CONFIG_CYZ_INTR
-		if (!IS_CYC_Z(*cinfo))
+		if (!cy_is_Z(cinfo))
 #endif
-		cy_writew(cinfo->ctl_addr + 0x68,
-				readw(cinfo->ctl_addr + 0x68) & ~0x0900);
+		cy_writew(&cinfo->ctl_addr.p9060->intr_ctrl_stat,
+			readw(&cinfo->ctl_addr.p9060->intr_ctrl_stat) &
+			~0x0900);
 
 	iounmap(cinfo->base_addr);
-	if (cinfo->ctl_addr)
-		iounmap(cinfo->ctl_addr);
+	if (cinfo->ctl_addr.p9050)
+		iounmap(cinfo->ctl_addr.p9050);
 	if (cinfo->irq
 #ifndef CONFIG_CYZ_INTR
-		&& !IS_CYC_Z(*cinfo)
+		&& !cy_is_Z(cinfo)
 #endif /* CONFIG_CYZ_INTR */
 		)
 		free_irq(cinfo->irq, cinfo);
@@ -5240,7 +5200,7 @@
 					(cur_jifs - info->idle_stats.recv_idle)/
 					HZ, info->idle_stats.overruns,
 					/* FIXME: double check locking */
-					(long)info->port.tty->ldisc.ops->num);
+					(long)info->port.tty->ldisc->ops->num);
 			else
 				seq_printf(m, "%3d %8lu %10lu %8lu "
 					"%10lu %8lu %9lu %6ld\n",
@@ -5386,11 +5346,11 @@
 			/* clear interrupt */
 			cy_writeb(card->base_addr + Cy_ClrIntr, 0);
 			iounmap(card->base_addr);
-			if (card->ctl_addr)
-				iounmap(card->ctl_addr);
+			if (card->ctl_addr.p9050)
+				iounmap(card->ctl_addr.p9050);
 			if (card->irq
 #ifndef CONFIG_CYZ_INTR
-				&& !IS_CYC_Z(*card)
+				&& !cy_is_Z(card)
 #endif /* CONFIG_CYZ_INTR */
 				)
 				free_irq(card->irq, card);
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index af7c13c..abef1f7 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -745,7 +745,7 @@
 	return 0;
 }
 
-static void epca_raise_dtr_rts(struct tty_port *port)
+static void epca_dtr_rts(struct tty_port *port, int onoff)
 {
 }
 
@@ -925,7 +925,7 @@
 
 static const struct tty_port_operations epca_port_ops = {
 	.carrier_raised = epca_carrier_raised,
-	.raise_dtr_rts = epca_raise_dtr_rts,
+	.dtr_rts = epca_dtr_rts,
 };
 
 static int info_open(struct tty_struct *tty, struct file *filp)
@@ -1518,7 +1518,7 @@
 		if (event & MODEMCHG_IND) {
 			/* A modem signal change has been indicated */
 			ch->imodem = mstat;
-			if (test_bit(ASYNC_CHECK_CD, &ch->port.flags)) {
+			if (test_bit(ASYNCB_CHECK_CD, &ch->port.flags)) {
 				/* We are now receiving dcd */
 				if (mstat & ch->dcd)
 					wake_up_interruptible(&ch->port.open_wait);
@@ -1765,9 +1765,9 @@
 		 * that the driver will wait on carrier detect.
 		 */
 		if (ts->c_cflag & CLOCAL)
-			clear_bit(ASYNC_CHECK_CD, &ch->port.flags);
+			clear_bit(ASYNCB_CHECK_CD, &ch->port.flags);
 		else
-			set_bit(ASYNC_CHECK_CD, &ch->port.flags);
+			set_bit(ASYNCB_CHECK_CD, &ch->port.flags);
 		mval = ch->m_dtr | ch->m_rts;
 	} /* End CBAUD not detected */
 	iflag = termios2digi_i(ch, ts->c_iflag);
@@ -2114,8 +2114,8 @@
 			tty_wait_until_sent(tty, 0);
 		} else {
 			/* ldisc lock already held in ioctl */
-			if (tty->ldisc.ops->flush_buffer)
-				tty->ldisc.ops->flush_buffer(tty);
+			if (tty->ldisc->ops->flush_buffer)
+				tty->ldisc->ops->flush_buffer(tty);
 		}
 		unlock_kernel();
 		/* Fall Thru */
@@ -2244,7 +2244,8 @@
 			if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) {
 				tty_hangup(tty);
 				wake_up_interruptible(&ch->port.open_wait);
-				clear_bit(ASYNC_NORMAL_ACTIVE, &ch->port.flags);
+				clear_bit(ASYNCB_NORMAL_ACTIVE,
+						&ch->port.flags);
 			}
 		}
 		tty_kref_put(tty);
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 340ba4f..4a9f349 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -224,7 +224,7 @@
 			break;
 		}
 
-		gsi = acpi_register_gsi(irq, ACPI_LEVEL_SENSITIVE,
+		gsi = acpi_register_gsi(NULL, irq, ACPI_LEVEL_SENSITIVE,
 					ACPI_ACTIVE_LOW);
 		if (gsi > 0)
 			break;
@@ -939,7 +939,7 @@
 		irqp = &res->data.extended_irq;
 
 		for (i = 0; i < irqp->interrupt_count; i++) {
-			irq = acpi_register_gsi(irqp->interrupts[i],
+			irq = acpi_register_gsi(NULL, irqp->interrupts[i],
 				      irqp->triggering, irqp->polarity);
 			if (irq < 0)
 				return AE_ERROR;
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 86e83f8..32216b6 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -35,13 +35,13 @@
 
 static void random_recv_done(struct virtqueue *vq)
 {
-	int len;
+	unsigned int len;
 
 	/* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */
 	if (!vq->vq_ops->get_buf(vq, &len))
 		return;
 
-	data_left = len / sizeof(random_data[0]);
+	data_left += len;
 	complete(&have_data);
 }
 
@@ -49,7 +49,7 @@
 {
 	struct scatterlist sg;
 
-	sg_init_one(&sg, random_data, RANDOM_DATA_SIZE);
+	sg_init_one(&sg, random_data+data_left, RANDOM_DATA_SIZE-data_left);
 	/* There should always be room for one buffer. */
 	if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) != 0)
 		BUG();
@@ -59,24 +59,32 @@
 /* At least we don't udelay() in a loop like some other drivers. */
 static int virtio_data_present(struct hwrng *rng, int wait)
 {
-	if (data_left)
+	if (data_left >= sizeof(u32))
 		return 1;
 
+again:
 	if (!wait)
 		return 0;
 
 	wait_for_completion(&have_data);
+
+	/* Not enough?  Re-register. */
+	if (unlikely(data_left < sizeof(u32))) {
+		register_buffer();
+		goto again;
+	}
+
 	return 1;
 }
 
 /* virtio_data_present() must have succeeded before this is called. */
 static int virtio_data_read(struct hwrng *rng, u32 *data)
 {
-	BUG_ON(!data_left);
+	BUG_ON(data_left < sizeof(u32));
+	data_left -= sizeof(u32);
+	*data = random_data[data_left / 4];
 
-	*data = random_data[--data_left];
-
-	if (!data_left) {
+	if (data_left < sizeof(u32)) {
 		init_completion(&have_data);
 		register_buffer();
 	}
@@ -94,13 +102,13 @@
 	int err;
 
 	/* We expect a single virtqueue. */
-	vq = vdev->config->find_vq(vdev, 0, random_recv_done);
+	vq = virtio_find_single_vq(vdev, random_recv_done, "input");
 	if (IS_ERR(vq))
 		return PTR_ERR(vq);
 
 	err = hwrng_register(&virtio_hwrng);
 	if (err) {
-		vdev->config->del_vq(vq);
+		vdev->config->del_vqs(vdev);
 		return err;
 	}
 
@@ -112,7 +120,7 @@
 {
 	vdev->config->reset(vdev);
 	hwrng_unregister(&virtio_hwrng);
-	vdev->config->del_vq(vq);
+	vdev->config->del_vqs(vdev);
 }
 
 static struct virtio_device_id id_table[] = {
diff --git a/drivers/char/ip2/i2lib.c b/drivers/char/ip2/i2lib.c
index 0061e18..0d10b89 100644
--- a/drivers/char/ip2/i2lib.c
+++ b/drivers/char/ip2/i2lib.c
@@ -868,11 +868,11 @@
 		amountToMove = count;
 	}
 	// Move the first block
-	pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY,
+	pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY,
 		 &(pCh->Ibuf[stripIndex]), NULL, amountToMove );
 	// If we needed to wrap, do the second data move
 	if (count > amountToMove) {
-		pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY,
+		pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY,
 		 pCh->Ibuf, NULL, count - amountToMove );
 	}
 	// Bump and wrap the stripIndex all at once by the amount of data read. This
diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c
index afd9247..517271c 100644
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -1315,8 +1315,8 @@
 	if (tty->pgrp)
 		kill_pgrp(tty->pgrp, sig, 1);
 	if (flush || !L_NOFLSH(tty)) {
-		if ( tty->ldisc.ops->flush_buffer )  
-			tty->ldisc.ops->flush_buffer(tty);
+		if ( tty->ldisc->ops->flush_buffer )  
+			tty->ldisc->ops->flush_buffer(tty);
 		i2InputFlush( tty->driver_data );
 	}
 }
diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index a59eac5..4d745a8 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -329,7 +329,7 @@
 
 /* card->lock MUST NOT be held */
 
-static void isicom_raise_dtr_rts(struct tty_port *port)
+static void isicom_dtr_rts(struct tty_port *port, int on)
 {
 	struct isi_port *ip = container_of(port, struct isi_port, port);
 	struct isi_board *card = ip->card;
@@ -339,10 +339,17 @@
 	if (!lock_card(card))
 		return;
 
-	outw(0x8000 | (channel << card->shift_count) | 0x02, base);
-	outw(0x0f04, base);
-	InterruptTheCard(base);
-	ip->status |= (ISI_DTR | ISI_RTS);
+	if (on) {
+		outw(0x8000 | (channel << card->shift_count) | 0x02, base);
+		outw(0x0f04, base);
+		InterruptTheCard(base);
+		ip->status |= (ISI_DTR | ISI_RTS);
+	} else {
+		outw(0x8000 | (channel << card->shift_count) | 0x02, base);
+		outw(0x0C04, base);
+		InterruptTheCard(base);
+		ip->status &= ~(ISI_DTR | ISI_RTS);
+	}
 	unlock_card(card);
 }
 
@@ -1339,7 +1346,7 @@
 
 static const struct tty_port_operations isicom_port_ops = {
 	.carrier_raised		= isicom_carrier_raised,
-	.raise_dtr_rts		= isicom_raise_dtr_rts,
+	.dtr_rts		= isicom_dtr_rts,
 };
 
 static int __devinit reset_card(struct pci_dev *pdev,
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index fff19f7..e18800c 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -1140,14 +1140,14 @@
 	return (portp->sigs & TIOCM_CD) ? 1 : 0;
 }
 
-static void stli_raise_dtr_rts(struct tty_port *port)
+static void stli_dtr_rts(struct tty_port *port, int on)
 {
 	struct stliport *portp = container_of(port, struct stliport, port);
 	struct stlibrd *brdp = stli_brds[portp->brdnr];
-	stli_mkasysigs(&portp->asig, 1, 1);
+	stli_mkasysigs(&portp->asig, on, on);
 	if (stli_cmdwait(brdp, portp, A_SETSIGNALS, &portp->asig,
 		sizeof(asysigs_t), 0) < 0)
-			printk(KERN_WARNING "istallion: dtr raise failed.\n");
+			printk(KERN_WARNING "istallion: dtr set failed.\n");
 }
 
 
@@ -4417,7 +4417,7 @@
 
 static const struct tty_port_operations stli_port_ops = {
 	.carrier_raised = stli_carrier_raised,
-	.raise_dtr_rts = stli_raise_dtr_rts,
+	.dtr_rts = stli_dtr_rts,
 };
 
 /*****************************************************************************/
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 8f05c38..f96d0be 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -694,6 +694,8 @@
 		written += chunk - unwritten;
 		if (unwritten)
 			break;
+		if (signal_pending(current))
+			return written ? written : -ERESTARTSYS;
 		buf += chunk;
 		count -= chunk;
 		cond_resched();
diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c
index 4a4cab7..65b6ff2 100644
--- a/drivers/char/moxa.c
+++ b/drivers/char/moxa.c
@@ -1184,6 +1184,11 @@
 		return -ENODEV;
 	}
 
+	if (port % MAX_PORTS_PER_BOARD >= brd->numPorts) {
+		mutex_unlock(&moxa_openlock);
+		return -ENODEV;
+	}
+
 	ch = &brd->ports[port % MAX_PORTS_PER_BOARD];
 	ch->port.count++;
 	tty->driver_data = ch;
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index a420e8d..9533f43 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -547,14 +547,18 @@
 	return (inb(mp->ioaddr + UART_MSR) & UART_MSR_DCD)?1:0;
 }
 
-static void mxser_raise_dtr_rts(struct tty_port *port)
+static void mxser_dtr_rts(struct tty_port *port, int on)
 {
 	struct mxser_port *mp = container_of(port, struct mxser_port, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&mp->slock, flags);
-	outb(inb(mp->ioaddr + UART_MCR) |
-		UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
+	if (on)
+		outb(inb(mp->ioaddr + UART_MCR) |
+			UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
+	else
+		outb(inb(mp->ioaddr + UART_MCR)&~(UART_MCR_DTR | UART_MCR_RTS),
+			mp->ioaddr + UART_MCR);
 	spin_unlock_irqrestore(&mp->slock, flags);
 }
 
@@ -2356,7 +2360,7 @@
 
 struct tty_port_operations mxser_port_ops = {
 	.carrier_raised = mxser_carrier_raised,
-	.raise_dtr_rts = mxser_raise_dtr_rts,
+	.dtr_rts = mxser_dtr_rts,
 };
 
 /*
@@ -2711,7 +2715,7 @@
 			continue;
 
 		brd = &mxser_boards[m];
-		retval = mxser_get_ISA_conf(!ioaddr[b], brd);
+		retval = mxser_get_ISA_conf(ioaddr[b], brd);
 		if (retval <= 0) {
 			brd->info = NULL;
 			continue;
diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c
index bacb3e2..461ece5 100644
--- a/drivers/char/n_hdlc.c
+++ b/drivers/char/n_hdlc.c
@@ -342,8 +342,8 @@
 #endif
 	
 	/* Flush any pending characters in the driver and discipline. */
-	if (tty->ldisc.ops->flush_buffer)
-		tty->ldisc.ops->flush_buffer(tty);
+	if (tty->ldisc->ops->flush_buffer)
+		tty->ldisc->ops->flush_buffer(tty);
 
 	tty_driver_flush_buffer(tty);
 		
diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index f6f0e4e..94a5d50 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -73,24 +73,6 @@
 #define ECHO_OP_SET_CANON_COL 0x81
 #define ECHO_OP_ERASE_TAB 0x82
 
-static inline unsigned char *alloc_buf(void)
-{
-	gfp_t prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
-
-	if (PAGE_SIZE != N_TTY_BUF_SIZE)
-		return kmalloc(N_TTY_BUF_SIZE, prio);
-	else
-		return (unsigned char *)__get_free_page(prio);
-}
-
-static inline void free_buf(unsigned char *buf)
-{
-	if (PAGE_SIZE != N_TTY_BUF_SIZE)
-		kfree(buf);
-	else
-		free_page((unsigned long) buf);
-}
-
 static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
 			       unsigned char __user *ptr)
 {
@@ -1558,11 +1540,11 @@
 {
 	n_tty_flush_buffer(tty);
 	if (tty->read_buf) {
-		free_buf(tty->read_buf);
+		kfree(tty->read_buf);
 		tty->read_buf = NULL;
 	}
 	if (tty->echo_buf) {
-		free_buf(tty->echo_buf);
+		kfree(tty->echo_buf);
 		tty->echo_buf = NULL;
 	}
 }
@@ -1584,17 +1566,16 @@
 
 	/* These are ugly. Currently a malloc failure here can panic */
 	if (!tty->read_buf) {
-		tty->read_buf = alloc_buf();
+		tty->read_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
 		if (!tty->read_buf)
 			return -ENOMEM;
 	}
 	if (!tty->echo_buf) {
-		tty->echo_buf = alloc_buf();
+		tty->echo_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
+
 		if (!tty->echo_buf)
 			return -ENOMEM;
 	}
-	memset(tty->read_buf, 0, N_TTY_BUF_SIZE);
-	memset(tty->echo_buf, 0, N_TTY_BUF_SIZE);
 	reset_buffer_flags(tty);
 	tty->column = 0;
 	n_tty_set_termios(tty, NULL);
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 19d79fc..77b3648 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -383,7 +383,7 @@
 static void tx_timeout(unsigned long context);
 
 static int carrier_raised(struct tty_port *port);
-static void raise_dtr_rts(struct tty_port *port);
+static void dtr_rts(struct tty_port *port, int onoff);
 
 #if SYNCLINK_GENERIC_HDLC
 #define dev_to_port(D) (dev_to_hdlc(D)->priv)
@@ -513,7 +513,7 @@
 
 static const struct tty_port_operations mgslpc_port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts
+	.dtr_rts = dtr_rts
 };
 
 static int mgslpc_probe(struct pcmcia_device *link)
@@ -2528,13 +2528,16 @@
 	return 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int onoff)
 {
 	MGSLPC_INFO *info = container_of(port, MGSLPC_INFO, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&info->lock,flags);
-	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	if (onoff)
+		info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	else
+		info->serial_signals &= ~SerialSignal_RTS + SerialSignal_DTR;
 	set_signals(info);
 	spin_unlock_irqrestore(&info->lock,flags);
 }
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 31038a0..5acd29e 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -30,7 +30,6 @@
 
 #include <asm/system.h>
 
-/* These are global because they are accessed in tty_io.c */
 #ifdef CONFIG_UNIX98_PTYS
 static struct tty_driver *ptm_driver;
 static struct tty_driver *pts_driver;
@@ -111,7 +110,7 @@
 	c = to->receive_room;
 	if (c > count)
 		c = count;
-	to->ldisc.ops->receive_buf(to, buf, NULL, c);
+	to->ldisc->ops->receive_buf(to, buf, NULL, c);
 
 	return c;
 }
@@ -149,11 +148,11 @@
 	int count;
 
 	/* We should get the line discipline lock for "tty->link" */
-	if (!to || !to->ldisc.ops->chars_in_buffer)
+	if (!to || !to->ldisc->ops->chars_in_buffer)
 		return 0;
 
 	/* The ldisc must report 0 if no characters available to be read */
-	count = to->ldisc.ops->chars_in_buffer(to);
+	count = to->ldisc->ops->chars_in_buffer(to);
 
 	if (tty->driver->subtype == PTY_TYPE_SLAVE)
 		return count;
@@ -187,8 +186,8 @@
 	if (!to)
 		return;
 
-	if (to->ldisc.ops->flush_buffer)
-		to->ldisc.ops->flush_buffer(to);
+	if (to->ldisc->ops->flush_buffer)
+		to->ldisc->ops->flush_buffer(to);
 
 	if (to->packet) {
 		spin_lock_irqsave(&tty->ctrl_lock, flags);
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index 20d90e6..db32f0e 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -71,7 +71,7 @@
 	err = bd_claim(bdev, raw_open);
 	if (err)
 		goto out1;
-	err = set_blocksize(bdev, bdev_hardsect_size(bdev));
+	err = set_blocksize(bdev, bdev_logical_block_size(bdev));
 	if (err)
 		goto out2;
 	filp->f_flags |= O_DIRECT;
diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index f59fc5c..63d5b62 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -872,11 +872,16 @@
 	return (sGetChanStatusLo(&info->channel) & CD_ACT) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
 	struct r_port *info = container_of(port, struct r_port, port);
-	sSetDTR(&info->channel);
-	sSetRTS(&info->channel);
+	if (on) {
+		sSetDTR(&info->channel);
+		sSetRTS(&info->channel);
+	} else {
+		sClrDTR(&info->channel);
+		sClrRTS(&info->channel);
+	}
 }
 
 /*
@@ -934,7 +939,7 @@
 	/*
 	 * Info->count is now 1; so it's safe to sleep now.
 	 */
-	if (!test_bit(ASYNC_INITIALIZED, &port->flags)) {
+	if (!test_bit(ASYNCB_INITIALIZED, &port->flags)) {
 		cp = &info->channel;
 		sSetRxTrigger(cp, TRIG_1);
 		if (sGetChanStatus(cp) & CD_ACT)
@@ -958,7 +963,7 @@
 		sEnRxFIFO(cp);
 		sEnTransmit(cp);
 
-		set_bit(ASYNC_INITIALIZED, &info->port.flags);
+		set_bit(ASYNCB_INITIALIZED, &info->port.flags);
 
 		/*
 		 * Set up the tty->alt_speed kludge
@@ -1641,7 +1646,7 @@
 	/*  Write remaining data into the port's xmit_buf */
 	while (1) {
 		/* Hung up ? */
-		if (!test_bit(ASYNC_NORMAL_ACTIVE, &info->port.flags))
+		if (!test_bit(ASYNCB_NORMAL_ACTIVE, &info->port.flags))
 			goto end;
 		c = min(count, XMIT_BUF_SIZE - info->xmit_cnt - 1);
 		c = min(c, XMIT_BUF_SIZE - info->xmit_head);
@@ -2250,7 +2255,7 @@
 
 static const struct tty_port_operations rocket_port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts,
+	.dtr_rts = dtr_rts,
 };
 
 /*
diff --git a/drivers/char/selection.c b/drivers/char/selection.c
index cb8ca56..f97b9e8 100644
--- a/drivers/char/selection.c
+++ b/drivers/char/selection.c
@@ -327,7 +327,7 @@
 		}
 		count = sel_buffer_lth - pasted;
 		count = min(count, tty->receive_room);
-		tty->ldisc.ops->receive_buf(tty, sel_buffer + pasted,
+		tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted,
 								NULL, count);
 		pasted += count;
 	}
diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index 2ad813a..53e504f 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -772,11 +772,11 @@
 	return (portp->sigs & TIOCM_CD) ? 1 : 0;
 }
 
-static void stl_raise_dtr_rts(struct tty_port *port)
+static void stl_dtr_rts(struct tty_port *port, int on)
 {
 	struct stlport *portp = container_of(port, struct stlport, port);
 	/* Takes brd_lock internally */
-	stl_setsignals(portp, 1, 1);
+	stl_setsignals(portp, on, on);
 }
 
 /*****************************************************************************/
@@ -2547,7 +2547,7 @@
 
 static const struct tty_port_operations stl_port_ops = {
 	.carrier_raised = stl_carrier_raised,
-	.raise_dtr_rts = stl_raise_dtr_rts,
+	.dtr_rts = stl_dtr_rts,
 };
 
 /*****************************************************************************/
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index afd0b26..afded3a 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -3247,13 +3247,16 @@
 	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
 	struct mgsl_struct *info = container_of(port, struct mgsl_struct, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&info->irq_spinlock,flags);
-	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	if (on)
+		info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	else
+		info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
  	usc_set_serial_signals(info);
 	spin_unlock_irqrestore(&info->irq_spinlock,flags);
 }
@@ -4258,7 +4261,7 @@
 
 static const struct tty_port_operations mgsl_port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts,
+	.dtr_rts = dtr_rts,
 };
 
 
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 5e25649..1386625 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -214,6 +214,7 @@
 #define set_desc_next(a,b) (a).next   = cpu_to_le32((unsigned int)(b))
 #define set_desc_count(a,b)(a).count  = cpu_to_le16((unsigned short)(b))
 #define set_desc_eof(a,b)  (a).status = cpu_to_le16((b) ? (le16_to_cpu((a).status) | BIT0) : (le16_to_cpu((a).status) & ~BIT0))
+#define set_desc_status(a, b) (a).status = cpu_to_le16((unsigned short)(b))
 #define desc_count(a)      (le16_to_cpu((a).count))
 #define desc_status(a)     (le16_to_cpu((a).status))
 #define desc_complete(a)   (le16_to_cpu((a).status) & BIT15)
@@ -297,6 +298,7 @@
 	u32 max_frame_size;       /* as set by device config */
 
 	unsigned int rbuf_fill_level;
+	unsigned int rx_pio;
 	unsigned int if_mode;
 	unsigned int base_clock;
 
@@ -331,6 +333,8 @@
 	struct slgt_desc *rbufs;
 	unsigned int rbuf_current;
 	unsigned int rbuf_index;
+	unsigned int rbuf_fill_index;
+	unsigned short rbuf_fill_count;
 
 	unsigned int tbuf_count;
 	struct slgt_desc *tbufs;
@@ -2110,6 +2114,40 @@
 	info->pending_bh |= BH_STATUS;
 }
 
+static void isr_rxdata(struct slgt_info *info)
+{
+	unsigned int count = info->rbuf_fill_count;
+	unsigned int i = info->rbuf_fill_index;
+	unsigned short reg;
+
+	while (rd_reg16(info, SSR) & IRQ_RXDATA) {
+		reg = rd_reg16(info, RDR);
+		DBGISR(("isr_rxdata %s RDR=%04X\n", info->device_name, reg));
+		if (desc_complete(info->rbufs[i])) {
+			/* all buffers full */
+			rx_stop(info);
+			info->rx_restart = 1;
+			continue;
+		}
+		info->rbufs[i].buf[count++] = (unsigned char)reg;
+		/* async mode saves status byte to buffer for each data byte */
+		if (info->params.mode == MGSL_MODE_ASYNC)
+			info->rbufs[i].buf[count++] = (unsigned char)(reg >> 8);
+		if (count == info->rbuf_fill_level || (reg & BIT10)) {
+			/* buffer full or end of frame */
+			set_desc_count(info->rbufs[i], count);
+			set_desc_status(info->rbufs[i], BIT15 | (reg >> 8));
+			info->rbuf_fill_count = count = 0;
+			if (++i == info->rbuf_count)
+				i = 0;
+			info->pending_bh |= BH_RECEIVE;
+		}
+	}
+
+	info->rbuf_fill_index = i;
+	info->rbuf_fill_count = count;
+}
+
 static void isr_serial(struct slgt_info *info)
 {
 	unsigned short status = rd_reg16(info, SSR);
@@ -2125,6 +2163,8 @@
 			if (info->tx_count)
 				isr_txeom(info, status);
 		}
+		if (info->rx_pio && (status & IRQ_RXDATA))
+			isr_rxdata(info);
 		if ((status & IRQ_RXBREAK) && (status & RXBREAK)) {
 			info->icount.brk++;
 			/* process break detection if tty control allows */
@@ -2141,7 +2181,8 @@
 	} else {
 		if (status & (IRQ_TXIDLE + IRQ_TXUNDER))
 			isr_txeom(info, status);
-
+		if (info->rx_pio && (status & IRQ_RXDATA))
+			isr_rxdata(info);
 		if (status & IRQ_RXIDLE) {
 			if (status & RXIDLE)
 				info->icount.rxidle++;
@@ -2642,6 +2683,10 @@
 			return -EINVAL;
 		}
 		info->rbuf_fill_level = rbuf_fill_level;
+		if (rbuf_fill_level < 128)
+			info->rx_pio = 1; /* PIO mode */
+		else
+			info->rx_pio = 0; /* DMA mode */
 		rx_stop(info); /* restart receiver to use new fill level */
 	}
 
@@ -3099,13 +3144,16 @@
 	return (info->signals & SerialSignal_DCD) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
 	unsigned long flags;
 	struct slgt_info *info = container_of(port, struct slgt_info, port);
 
 	spin_lock_irqsave(&info->lock,flags);
-	info->signals |= SerialSignal_RTS + SerialSignal_DTR;
+	if (on)
+		info->signals |= SerialSignal_RTS + SerialSignal_DTR;
+	else
+		info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
  	set_signals(info);
 	spin_unlock_irqrestore(&info->lock,flags);
 }
@@ -3419,7 +3467,7 @@
 
 static const struct tty_port_operations slgt_port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts,
+	.dtr_rts = dtr_rts,
 };
 
 /*
@@ -3841,15 +3889,27 @@
 	rdma_reset(info);
 	reset_rbufs(info);
 
-	/* set 1st descriptor address */
-	wr_reg32(info, RDDAR, info->rbufs[0].pdesc);
-
-	if (info->params.mode != MGSL_MODE_ASYNC) {
-		/* enable rx DMA and DMA interrupt */
-		wr_reg32(info, RDCSR, (BIT2 + BIT0));
+	if (info->rx_pio) {
+		/* rx request when rx FIFO not empty */
+		wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) & ~BIT14));
+		slgt_irq_on(info, IRQ_RXDATA);
+		if (info->params.mode == MGSL_MODE_ASYNC) {
+			/* enable saving of rx status */
+			wr_reg32(info, RDCSR, BIT6);
+		}
 	} else {
-		/* enable saving of rx status, rx DMA and DMA interrupt */
-		wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0));
+		/* rx request when rx FIFO half full */
+		wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) | BIT14));
+		/* set 1st descriptor address */
+		wr_reg32(info, RDDAR, info->rbufs[0].pdesc);
+
+		if (info->params.mode != MGSL_MODE_ASYNC) {
+			/* enable rx DMA and DMA interrupt */
+			wr_reg32(info, RDCSR, (BIT2 + BIT0));
+		} else {
+			/* enable saving of rx status, rx DMA and DMA interrupt */
+			wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0));
+		}
 	}
 
 	slgt_irq_on(info, IRQ_RXOVER);
@@ -4467,6 +4527,8 @@
 static void reset_rbufs(struct slgt_info *info)
 {
 	free_rbufs(info, 0, info->rbuf_count - 1);
+	info->rbuf_fill_index = 0;
+	info->rbuf_fill_count = 0;
 }
 
 /*
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 26de60e..6f727e3 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -3277,13 +3277,16 @@
 	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
 	SLMP_INFO *info = container_of(port, SLMP_INFO, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&info->lock,flags);
-	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	if (on)
+		info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	else
+		info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
  	set_signals(info);
 	spin_unlock_irqrestore(&info->lock,flags);
 }
@@ -3746,7 +3749,7 @@
 
 static const struct tty_port_operations port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts,
+	.dtr_rts = dtr_rts,
 };
 
 /* Allocate and initialize a device instance structure
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index d6a807f..39a05b5 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -25,6 +25,7 @@
 #include <linux/kbd_kern.h>
 #include <linux/proc_fs.h>
 #include <linux/quotaops.h>
+#include <linux/perf_counter.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/suspend.h>
@@ -243,6 +244,7 @@
 	struct pt_regs *regs = get_irq_regs();
 	if (regs)
 		show_regs(regs);
+	perf_counter_print_debug();
 }
 static struct sysrq_key_op sysrq_showregs_op = {
 	.handler	= sysrq_handle_showregs,
diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c
index 55ba6f1..ac16fbe 100644
--- a/drivers/char/tty_audit.c
+++ b/drivers/char/tty_audit.c
@@ -29,10 +29,7 @@
 	buf = kmalloc(sizeof(*buf), GFP_KERNEL);
 	if (!buf)
 		goto err;
-	if (PAGE_SIZE != N_TTY_BUF_SIZE)
-		buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
-	else
-		buf->data = (unsigned char *)__get_free_page(GFP_KERNEL);
+	buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
 	if (!buf->data)
 		goto err_buf;
 	atomic_set(&buf->count, 1);
@@ -52,10 +49,7 @@
 static void tty_audit_buf_free(struct tty_audit_buf *buf)
 {
 	WARN_ON(buf->valid != 0);
-	if (PAGE_SIZE != N_TTY_BUF_SIZE)
-		kfree(buf->data);
-	else
-		free_page((unsigned long)buf->data);
+	kfree(buf->data);
 	kfree(buf);
 }
 
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 66b99a2..939e198 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -295,7 +295,7 @@
 	struct tty_driver *p, *res = NULL;
 	int tty_line = 0;
 	int len;
-	char *str;
+	char *str, *stp;
 
 	for (str = name; *str; str++)
 		if ((*str >= '0' && *str <= '9') || *str == ',')
@@ -311,13 +311,14 @@
 	list_for_each_entry(p, &tty_drivers, tty_drivers) {
 		if (strncmp(name, p->name, len) != 0)
 			continue;
-		if (*str == ',')
-			str++;
-		if (*str == '\0')
-			str = NULL;
+		stp = str;
+		if (*stp == ',')
+			stp++;
+		if (*stp == '\0')
+			stp = NULL;
 
 		if (tty_line >= 0 && tty_line <= p->num && p->ops &&
-		    p->ops->poll_init && !p->ops->poll_init(p, tty_line, str)) {
+		    p->ops->poll_init && !p->ops->poll_init(p, tty_line, stp)) {
 			res = tty_driver_kref_get(p);
 			*line = tty_line;
 			break;
@@ -470,43 +471,6 @@
 EXPORT_SYMBOL_GPL(tty_wakeup);
 
 /**
- *	tty_ldisc_flush	-	flush line discipline queue
- *	@tty: tty
- *
- *	Flush the line discipline queue (if any) for this tty. If there
- *	is no line discipline active this is a no-op.
- */
-
-void tty_ldisc_flush(struct tty_struct *tty)
-{
-	struct tty_ldisc *ld = tty_ldisc_ref(tty);
-	if (ld) {
-		if (ld->ops->flush_buffer)
-			ld->ops->flush_buffer(tty);
-		tty_ldisc_deref(ld);
-	}
-	tty_buffer_flush(tty);
-}
-
-EXPORT_SYMBOL_GPL(tty_ldisc_flush);
-
-/**
- *	tty_reset_termios	-	reset terminal state
- *	@tty: tty to reset
- *
- *	Restore a terminal to the driver default state
- */
-
-static void tty_reset_termios(struct tty_struct *tty)
-{
-	mutex_lock(&tty->termios_mutex);
-	*tty->termios = tty->driver->init_termios;
-	tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios);
-	tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios);
-	mutex_unlock(&tty->termios_mutex);
-}
-
-/**
  *	do_tty_hangup		-	actual handler for hangup events
  *	@work: tty device
  *
@@ -535,7 +499,6 @@
 	struct file *cons_filp = NULL;
 	struct file *filp, *f = NULL;
 	struct task_struct *p;
-	struct tty_ldisc *ld;
 	int    closecount = 0, n;
 	unsigned long flags;
 	int refs = 0;
@@ -566,40 +529,8 @@
 		filp->f_op = &hung_up_tty_fops;
 	}
 	file_list_unlock();
-	/*
-	 * FIXME! What are the locking issues here? This may me overdoing
-	 * things... This question is especially important now that we've
-	 * removed the irqlock.
-	 */
-	ld = tty_ldisc_ref(tty);
-	if (ld != NULL) {
-		/* We may have no line discipline at this point */
-		if (ld->ops->flush_buffer)
-			ld->ops->flush_buffer(tty);
-		tty_driver_flush_buffer(tty);
-		if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
-		    ld->ops->write_wakeup)
-			ld->ops->write_wakeup(tty);
-		if (ld->ops->hangup)
-			ld->ops->hangup(tty);
-	}
-	/*
-	 * FIXME: Once we trust the LDISC code better we can wait here for
-	 * ldisc completion and fix the driver call race
-	 */
-	wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
-	wake_up_interruptible_poll(&tty->read_wait, POLLIN);
-	/*
-	 * Shutdown the current line discipline, and reset it to
-	 * N_TTY.
-	 */
-	if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS)
-		tty_reset_termios(tty);
-	/* Defer ldisc switch */
-	/* tty_deferred_ldisc_switch(N_TTY);
 
-	  This should get done automatically when the port closes and
-	  tty_release is called */
+	tty_ldisc_hangup(tty);
 
 	read_lock(&tasklist_lock);
 	if (tty->session) {
@@ -628,12 +559,15 @@
 	read_unlock(&tasklist_lock);
 
 	spin_lock_irqsave(&tty->ctrl_lock, flags);
-	tty->flags = 0;
+	clear_bit(TTY_THROTTLED, &tty->flags);
+	clear_bit(TTY_PUSH, &tty->flags);
+	clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
 	put_pid(tty->session);
 	put_pid(tty->pgrp);
 	tty->session = NULL;
 	tty->pgrp = NULL;
 	tty->ctrl_status = 0;
+	set_bit(TTY_HUPPED, &tty->flags);
 	spin_unlock_irqrestore(&tty->ctrl_lock, flags);
 
 	/* Account for the p->signal references we killed */
@@ -659,10 +593,7 @@
 	 * can't yet guarantee all that.
 	 */
 	set_bit(TTY_HUPPED, &tty->flags);
-	if (ld) {
-		tty_ldisc_enable(tty);
-		tty_ldisc_deref(ld);
-	}
+	tty_ldisc_enable(tty);
 	unlock_kernel();
 	if (f)
 		fput(f);
@@ -2480,6 +2411,24 @@
 	return tty->ops->tiocmset(tty, file, set, clear);
 }
 
+struct tty_struct *tty_pair_get_tty(struct tty_struct *tty)
+{
+	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+	    tty->driver->subtype == PTY_TYPE_MASTER)
+		tty = tty->link;
+	return tty;
+}
+EXPORT_SYMBOL(tty_pair_get_tty);
+
+struct tty_struct *tty_pair_get_pty(struct tty_struct *tty)
+{
+	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+	    tty->driver->subtype == PTY_TYPE_MASTER)
+	    return tty;
+	return tty->link;
+}
+EXPORT_SYMBOL(tty_pair_get_pty);
+
 /*
  * Split this up, as gcc can choke on it otherwise..
  */
@@ -2495,11 +2444,7 @@
 	if (tty_paranoia_check(tty, inode, "tty_ioctl"))
 		return -EINVAL;
 
-	real_tty = tty;
-	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
-	    tty->driver->subtype == PTY_TYPE_MASTER)
-		real_tty = tty->link;
-
+	real_tty = tty_pair_get_tty(tty);
 
 	/*
 	 * Factor out some common prep work
@@ -2555,7 +2500,7 @@
 	case TIOCGSID:
 		return tiocgsid(tty, real_tty, p);
 	case TIOCGETD:
-		return put_user(tty->ldisc.ops->num, (int __user *)p);
+		return put_user(tty->ldisc->ops->num, (int __user *)p);
 	case TIOCSETD:
 		return tiocsetd(tty, p);
 	/*
@@ -2770,6 +2715,7 @@
 	tty->buf.head = tty->buf.tail = NULL;
 	tty_buffer_init(tty);
 	mutex_init(&tty->termios_mutex);
+	mutex_init(&tty->ldisc_mutex);
 	init_waitqueue_head(&tty->write_wait);
 	init_waitqueue_head(&tty->read_wait);
 	INIT_WORK(&tty->hangup_work, do_tty_hangup);
diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
index 6f4c7d0..8116bb1c 100644
--- a/drivers/char/tty_ioctl.c
+++ b/drivers/char/tty_ioctl.c
@@ -97,14 +97,19 @@
  *	@tty: terminal
  *
  *	Indicate that a tty should stop transmitting data down the stack.
+ *	Takes the termios mutex to protect against parallel throttle/unthrottle
+ *	and also to ensure the driver can consistently reference its own
+ *	termios data at this point when implementing software flow control.
  */
 
 void tty_throttle(struct tty_struct *tty)
 {
+	mutex_lock(&tty->termios_mutex);
 	/* check TTY_THROTTLED first so it indicates our state */
 	if (!test_and_set_bit(TTY_THROTTLED, &tty->flags) &&
 	    tty->ops->throttle)
 		tty->ops->throttle(tty);
+	mutex_unlock(&tty->termios_mutex);
 }
 EXPORT_SYMBOL(tty_throttle);
 
@@ -113,13 +118,21 @@
  *	@tty: terminal
  *
  *	Indicate that a tty may continue transmitting data down the stack.
+ *	Takes the termios mutex to protect against parallel throttle/unthrottle
+ *	and also to ensure the driver can consistently reference its own
+ *	termios data at this point when implementing software flow control.
+ *
+ *	Drivers should however remember that the stack can issue a throttle,
+ *	then change flow control method, then unthrottle.
  */
 
 void tty_unthrottle(struct tty_struct *tty)
 {
+	mutex_lock(&tty->termios_mutex);
 	if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) &&
 	    tty->ops->unthrottle)
 		tty->ops->unthrottle(tty);
+	mutex_unlock(&tty->termios_mutex);
 }
 EXPORT_SYMBOL(tty_unthrottle);
 
@@ -613,9 +626,25 @@
 	return 0;
 }
 
+static void copy_termios(struct tty_struct *tty, struct ktermios *kterm)
+{
+	mutex_lock(&tty->termios_mutex);
+	memcpy(kterm, tty->termios, sizeof(struct ktermios));
+	mutex_unlock(&tty->termios_mutex);
+}
+
+static void copy_termios_locked(struct tty_struct *tty, struct ktermios *kterm)
+{
+	mutex_lock(&tty->termios_mutex);
+	memcpy(kterm, tty->termios_locked, sizeof(struct ktermios));
+	mutex_unlock(&tty->termios_mutex);
+}
+
 static int get_termio(struct tty_struct *tty, struct termio __user *termio)
 {
-	if (kernel_termios_to_user_termio(termio, tty->termios))
+	struct ktermios kterm;
+	copy_termios(tty, &kterm);
+	if (kernel_termios_to_user_termio(termio, &kterm))
 		return -EFAULT;
 	return 0;
 }
@@ -917,6 +946,8 @@
 	struct tty_struct *real_tty;
 	void __user *p = (void __user *)arg;
 	int ret = 0;
+	struct ktermios kterm;
+	struct termiox ktermx;
 
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_MASTER)
@@ -952,23 +983,20 @@
 		return set_termios(real_tty, p, TERMIOS_OLD);
 #ifndef TCGETS2
 	case TCGETS:
-		mutex_lock(&real_tty->termios_mutex);
-		if (kernel_termios_to_user_termios((struct termios __user *)arg, real_tty->termios))
+		copy_termios(real_tty, &kterm);
+		if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm))
 			ret = -EFAULT;
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 #else
 	case TCGETS:
-		mutex_lock(&real_tty->termios_mutex);
-		if (kernel_termios_to_user_termios_1((struct termios __user *)arg, real_tty->termios))
+		copy_termios(real_tty, &kterm);
+		if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm))
 			ret = -EFAULT;
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 	case TCGETS2:
-		mutex_lock(&real_tty->termios_mutex);
-		if (kernel_termios_to_user_termios((struct termios2 __user *)arg, real_tty->termios))
+		copy_termios(real_tty, &kterm);
+		if (kernel_termios_to_user_termios((struct termios2 __user *)arg, &kterm))
 			ret = -EFAULT;
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 	case TCSETSF2:
 		return set_termios(real_tty, p,  TERMIOS_FLUSH | TERMIOS_WAIT);
@@ -987,34 +1015,36 @@
 		return set_termios(real_tty, p, TERMIOS_TERMIO);
 #ifndef TCGETS2
 	case TIOCGLCKTRMIOS:
-		mutex_lock(&real_tty->termios_mutex);
-		if (kernel_termios_to_user_termios((struct termios __user *)arg, real_tty->termios_locked))
+		copy_termios_locked(real_tty, &kterm);
+		if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm))
 			ret = -EFAULT;
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 	case TIOCSLCKTRMIOS:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		mutex_lock(&real_tty->termios_mutex);
-		if (user_termios_to_kernel_termios(real_tty->termios_locked,
+		copy_termios_locked(real_tty, &kterm);
+		if (user_termios_to_kernel_termios(&kterm,
 					       (struct termios __user *) arg))
-			ret = -EFAULT;
+			return -EFAULT;
+		mutex_lock(&real_tty->termios_mutex);
+		memcpy(real_tty->termios_locked, &kterm, sizeof(struct ktermios));
 		mutex_unlock(&real_tty->termios_mutex);
-		return ret;
+		return 0;
 #else
 	case TIOCGLCKTRMIOS:
-		mutex_lock(&real_tty->termios_mutex);
-		if (kernel_termios_to_user_termios_1((struct termios __user *)arg, real_tty->termios_locked))
+		copy_termios_locked(real_tty, &kterm);
+		if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm))
 			ret = -EFAULT;
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 	case TIOCSLCKTRMIOS:
 		if (!capable(CAP_SYS_ADMIN))
-			ret = -EPERM;
-		mutex_lock(&real_tty->termios_mutex);
-		if (user_termios_to_kernel_termios_1(real_tty->termios_locked,
+			return -EPERM;
+		copy_termios_locked(real_tty, &kterm);
+		if (user_termios_to_kernel_termios_1(&kterm,
 					       (struct termios __user *) arg))
-			ret = -EFAULT;
+			return -EFAULT;
+		mutex_lock(&real_tty->termios_mutex);
+		memcpy(real_tty->termios_locked, &kterm, sizeof(struct ktermios));
 		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 #endif
@@ -1023,9 +1053,10 @@
 		if (real_tty->termiox == NULL)
 			return -EINVAL;
 		mutex_lock(&real_tty->termios_mutex);
-		if (copy_to_user(p, real_tty->termiox, sizeof(struct termiox)))
-			ret = -EFAULT;
+		memcpy(&ktermx, real_tty->termiox, sizeof(struct termiox));
 		mutex_unlock(&real_tty->termios_mutex);
+		if (copy_to_user(p, &ktermx, sizeof(struct termiox)))
+			ret = -EFAULT;
 		return ret;
 	case TCSETX:
 		return set_termiox(real_tty, p, 0);
@@ -1035,10 +1066,9 @@
 		return set_termiox(real_tty, p, TERMIOS_FLUSH);
 #endif		
 	case TIOCGSOFTCAR:
-		mutex_lock(&real_tty->termios_mutex);
-		ret = put_user(C_CLOCAL(real_tty) ? 1 : 0,
+		copy_termios(real_tty, &kterm);
+		ret = put_user((kterm.c_cflag & CLOCAL) ? 1 : 0,
 						(int __user *)arg);
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 	case TIOCSSOFTCAR:
 		if (get_user(arg, (unsigned int __user *) arg))
diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c
index f78f5b0..39c8f86 100644
--- a/drivers/char/tty_ldisc.c
+++ b/drivers/char/tty_ldisc.c
@@ -115,19 +115,22 @@
 /**
  *	tty_ldisc_try_get	-	try and reference an ldisc
  *	@disc: ldisc number
- *	@ld: tty ldisc structure to complete
  *
  *	Attempt to open and lock a line discipline into place. Return
- *	the line discipline refcounted and assigned in ld. On an error
- *	report the error code back
+ *	the line discipline refcounted or an error.
  */
 
-static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld)
+static struct tty_ldisc *tty_ldisc_try_get(int disc)
 {
 	unsigned long flags;
+	struct tty_ldisc *ld;
 	struct tty_ldisc_ops *ldops;
 	int err = -EINVAL;
-	
+
+	ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL);
+	if (ld == NULL)
+		return ERR_PTR(-ENOMEM);
+
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
 	ld->ops = NULL;
 	ldops = tty_ldiscs[disc];
@@ -140,17 +143,19 @@
 			/* lock it */
 			ldops->refcount++;
 			ld->ops = ldops;
+			ld->refcount = 0;
 			err = 0;
 		}
 	}
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-	return err;
+	if (err)
+		return ERR_PTR(err);
+	return ld;
 }
 
 /**
  *	tty_ldisc_get		-	take a reference to an ldisc
  *	@disc: ldisc number
- *	@ld: tty line discipline structure to use
  *
  *	Takes a reference to a line discipline. Deals with refcounts and
  *	module locking counts. Returns NULL if the discipline is not available.
@@ -161,52 +166,54 @@
  *		takes tty_ldisc_lock to guard against ldisc races
  */
 
-static int tty_ldisc_get(int disc, struct tty_ldisc *ld)
+static struct tty_ldisc *tty_ldisc_get(int disc)
 {
-	int err;
+	struct tty_ldisc *ld;
 
 	if (disc < N_TTY || disc >= NR_LDISCS)
-		return -EINVAL;
-	err = tty_ldisc_try_get(disc, ld);
-	if (err < 0) {
+		return ERR_PTR(-EINVAL);
+	ld = tty_ldisc_try_get(disc);
+	if (IS_ERR(ld)) {
 		request_module("tty-ldisc-%d", disc);
-		err = tty_ldisc_try_get(disc, ld);
+		ld = tty_ldisc_try_get(disc);
 	}
-	return err;
+	return ld;
 }
 
 /**
  *	tty_ldisc_put		-	drop ldisc reference
- *	@disc: ldisc number
+ *	@ld: ldisc
  *
  *	Drop a reference to a line discipline. Manage refcounts and
- *	module usage counts
+ *	module usage counts. Free the ldisc once the recount hits zero.
  *
  *	Locking:
  *		takes tty_ldisc_lock to guard against ldisc races
  */
 
-static void tty_ldisc_put(struct tty_ldisc_ops *ld)
+static void tty_ldisc_put(struct tty_ldisc *ld)
 {
 	unsigned long flags;
-	int disc = ld->num;
+	int disc = ld->ops->num;
+	struct tty_ldisc_ops *ldo;
 
 	BUG_ON(disc < N_TTY || disc >= NR_LDISCS);
 
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	ld = tty_ldiscs[disc];
-	BUG_ON(ld->refcount == 0);
-	ld->refcount--;
-	module_put(ld->owner);
+	ldo = tty_ldiscs[disc];
+	BUG_ON(ldo->refcount == 0);
+	ldo->refcount--;
+	module_put(ldo->owner);
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+	kfree(ld);
 }
 
-static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
+static void *tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
 {
 	return (*pos < NR_LDISCS) ? pos : NULL;
 }
 
-static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
+static void *tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	(*pos)++;
 	return (*pos < NR_LDISCS) ? pos : NULL;
@@ -219,12 +226,13 @@
 static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
 {
 	int i = *(loff_t *)v;
-	struct tty_ldisc ld;
-	
-	if (tty_ldisc_get(i, &ld) < 0)
+	struct tty_ldisc *ld;
+
+	ld = tty_ldisc_try_get(i);
+	if (IS_ERR(ld))
 		return 0;
-	seq_printf(m, "%-10s %2d\n", ld.ops->name ? ld.ops->name : "???", i);
-	tty_ldisc_put(ld.ops);
+	seq_printf(m, "%-10s %2d\n", ld->ops->name ? ld->ops->name : "???", i);
+	tty_ldisc_put(ld);
 	return 0;
 }
 
@@ -263,8 +271,7 @@
 
 static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld)
 {
-	ld->refcount = 0;
-	tty->ldisc = *ld;
+	tty->ldisc = ld;
 }
 
 /**
@@ -286,7 +293,7 @@
 	int ret = 0;
 
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	ld = &tty->ldisc;
+	ld = tty->ldisc;
 	if (test_bit(TTY_LDISC, &tty->flags)) {
 		ld->refcount++;
 		ret = 1;
@@ -315,10 +322,9 @@
 {
 	/* wait_event is a macro */
 	wait_event(tty_ldisc_wait, tty_ldisc_try(tty));
-	WARN_ON(tty->ldisc.refcount == 0);
-	return &tty->ldisc;
+	WARN_ON(tty->ldisc->refcount == 0);
+	return tty->ldisc;
 }
-
 EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
 
 /**
@@ -335,10 +341,9 @@
 struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty)
 {
 	if (tty_ldisc_try(tty))
-		return &tty->ldisc;
+		return tty->ldisc;
 	return NULL;
 }
-
 EXPORT_SYMBOL_GPL(tty_ldisc_ref);
 
 /**
@@ -366,7 +371,6 @@
 		wake_up(&tty_ldisc_wait);
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 }
-
 EXPORT_SYMBOL_GPL(tty_ldisc_deref);
 
 /**
@@ -389,6 +393,26 @@
 }
 
 /**
+ *	tty_ldisc_flush	-	flush line discipline queue
+ *	@tty: tty
+ *
+ *	Flush the line discipline queue (if any) for this tty. If there
+ *	is no line discipline active this is a no-op.
+ */
+
+void tty_ldisc_flush(struct tty_struct *tty)
+{
+	struct tty_ldisc *ld = tty_ldisc_ref(tty);
+	if (ld) {
+		if (ld->ops->flush_buffer)
+			ld->ops->flush_buffer(tty);
+		tty_ldisc_deref(ld);
+	}
+	tty_buffer_flush(tty);
+}
+EXPORT_SYMBOL_GPL(tty_ldisc_flush);
+
+/**
  *	tty_set_termios_ldisc		-	set ldisc field
  *	@tty: tty structure
  *	@num: line discipline number
@@ -407,6 +431,39 @@
 	mutex_unlock(&tty->termios_mutex);
 }
 
+/**
+ *	tty_ldisc_open		-	open a line discipline
+ *	@tty: tty we are opening the ldisc on
+ *	@ld: discipline to open
+ *
+ *	A helper opening method. Also a convenient debugging and check
+ *	point.
+ */
+
+static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld)
+{
+	WARN_ON(test_and_set_bit(TTY_LDISC_OPEN, &tty->flags));
+	if (ld->ops->open)
+		return ld->ops->open(tty);
+	return 0;
+}
+
+/**
+ *	tty_ldisc_close		-	close a line discipline
+ *	@tty: tty we are opening the ldisc on
+ *	@ld: discipline to close
+ *
+ *	A helper close method. Also a convenient debugging and check
+ *	point.
+ */
+
+static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld)
+{
+	WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags));
+	clear_bit(TTY_LDISC_OPEN, &tty->flags);
+	if (ld->ops->close)
+		ld->ops->close(tty);
+}
 
 /**
  *	tty_ldisc_restore	-	helper for tty ldisc change
@@ -420,66 +477,136 @@
 static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
 {
 	char buf[64];
-	struct tty_ldisc new_ldisc;
+	struct tty_ldisc *new_ldisc;
+	int r;
 
 	/* There is an outstanding reference here so this is safe */
-	tty_ldisc_get(old->ops->num, old);
+	old = tty_ldisc_get(old->ops->num);
+	WARN_ON(IS_ERR(old));
 	tty_ldisc_assign(tty, old);
 	tty_set_termios_ldisc(tty, old->ops->num);
-	if (old->ops->open && (old->ops->open(tty) < 0)) {
-		tty_ldisc_put(old->ops);
+	if (tty_ldisc_open(tty, old) < 0) {
+		tty_ldisc_put(old);
 		/* This driver is always present */
-		if (tty_ldisc_get(N_TTY, &new_ldisc) < 0)
+		new_ldisc = tty_ldisc_get(N_TTY);
+		if (IS_ERR(new_ldisc))
 			panic("n_tty: get");
-		tty_ldisc_assign(tty, &new_ldisc);
+		tty_ldisc_assign(tty, new_ldisc);
 		tty_set_termios_ldisc(tty, N_TTY);
-		if (new_ldisc.ops->open) {
-			int r = new_ldisc.ops->open(tty);
-				if (r < 0)
-				panic("Couldn't open N_TTY ldisc for "
-				      "%s --- error %d.",
-				      tty_name(tty, buf), r);
-		}
+		r = tty_ldisc_open(tty, new_ldisc);
+		if (r < 0)
+			panic("Couldn't open N_TTY ldisc for "
+			      "%s --- error %d.",
+			      tty_name(tty, buf), r);
 	}
 }
 
 /**
+ *	tty_ldisc_halt		-	shut down the line discipline
+ *	@tty: tty device
+ *
+ *	Shut down the line discipline and work queue for this tty device.
+ *	The TTY_LDISC flag being cleared ensures no further references can
+ *	be obtained while the delayed work queue halt ensures that no more
+ *	data is fed to the ldisc.
+ *
+ *	In order to wait for any existing references to complete see
+ *	tty_ldisc_wait_idle.
+ */
+
+static int tty_ldisc_halt(struct tty_struct *tty)
+{
+	clear_bit(TTY_LDISC, &tty->flags);
+	return cancel_delayed_work(&tty->buf.work);
+}
+
+/**
+ *	tty_ldisc_wait_idle	-	wait for the ldisc to become idle
+ *	@tty: tty to wait for
+ *
+ *	Wait for the line discipline to become idle. The discipline must
+ *	have been halted for this to guarantee it remains idle.
+ *
+ *	tty_ldisc_lock protects the ref counts currently.
+ */
+
+static int tty_ldisc_wait_idle(struct tty_struct *tty)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&tty_ldisc_lock, flags);
+	while (tty->ldisc->refcount) {
+		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+		if (wait_event_timeout(tty_ldisc_wait,
+				tty->ldisc->refcount == 0, 5 * HZ) == 0)
+			return -EBUSY;
+		spin_lock_irqsave(&tty_ldisc_lock, flags);
+	}
+	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+	return 0;
+}
+
+/**
  *	tty_set_ldisc		-	set line discipline
  *	@tty: the terminal to set
  *	@ldisc: the line discipline
  *
  *	Set the discipline of a tty line. Must be called from a process
- *	context.
+ *	context. The ldisc change logic has to protect itself against any
+ *	overlapping ldisc change (including on the other end of pty pairs),
+ *	the close of one side of a tty/pty pair, and eventually hangup.
  *
- *	Locking: takes tty_ldisc_lock.
- *		 called functions take termios_mutex
+ *	Locking: takes tty_ldisc_lock, termios_mutex
  */
 
 int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 {
 	int retval;
-	struct tty_ldisc o_ldisc, new_ldisc;
-	int work;
-	unsigned long flags;
+	struct tty_ldisc *o_ldisc, *new_ldisc;
+	int work, o_work = 0;
 	struct tty_struct *o_tty;
 
-restart:
-	/* This is a bit ugly for now but means we can break the 'ldisc
-	   is part of the tty struct' assumption later */
-	retval = tty_ldisc_get(ldisc, &new_ldisc);
-	if (retval)
-		return retval;
+	new_ldisc = tty_ldisc_get(ldisc);
+	if (IS_ERR(new_ldisc))
+		return PTR_ERR(new_ldisc);
+
+	/*
+	 *	We need to look at the tty locking here for pty/tty pairs
+	 *	when both sides try to change in parallel.
+	 */
+
+	o_tty = tty->link;	/* o_tty is the pty side or NULL */
+
+
+	/*
+	 *	Check the no-op case
+	 */
+
+	if (tty->ldisc->ops->num == ldisc) {
+		tty_ldisc_put(new_ldisc);
+		return 0;
+	}
 
 	/*
 	 *	Problem: What do we do if this blocks ?
+	 *	We could deadlock here
 	 */
 
 	tty_wait_until_sent(tty, 0);
 
-	if (tty->ldisc.ops->num == ldisc) {
-		tty_ldisc_put(new_ldisc.ops);
-		return 0;
+	mutex_lock(&tty->ldisc_mutex);
+
+	/*
+	 *	We could be midstream of another ldisc change which has
+	 *	dropped the lock during processing. If so we need to wait.
+	 */
+
+	while (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
+		mutex_unlock(&tty->ldisc_mutex);
+		wait_event(tty_ldisc_wait,
+			test_bit(TTY_LDISC_CHANGING, &tty->flags) == 0);
+		mutex_lock(&tty->ldisc_mutex);
 	}
+	set_bit(TTY_LDISC_CHANGING, &tty->flags);
 
 	/*
 	 *	No more input please, we are switching. The new ldisc
@@ -489,8 +616,6 @@
 	tty->receive_room = 0;
 
 	o_ldisc = tty->ldisc;
-	o_tty = tty->link;
-
 	/*
 	 *	Make sure we don't change while someone holds a
 	 *	reference to the line discipline. The TTY_LDISC bit
@@ -501,108 +626,181 @@
 	 *	with a userspace app continually trying to use the tty in
 	 *	parallel to the change and re-referencing the tty.
 	 */
-	clear_bit(TTY_LDISC, &tty->flags);
-	if (o_tty)
-		clear_bit(TTY_LDISC, &o_tty->flags);
 
-	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	if (tty->ldisc.refcount || (o_tty && o_tty->ldisc.refcount)) {
-		if (tty->ldisc.refcount) {
-			/* Free the new ldisc we grabbed. Must drop the lock
-			   first. */
-			spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-			tty_ldisc_put(o_ldisc.ops);
-			/*
-			 * There are several reasons we may be busy, including
-			 * random momentary I/O traffic. We must therefore
-			 * retry. We could distinguish between blocking ops
-			 * and retries if we made tty_ldisc_wait() smarter.
-			 * That is up for discussion.
-			 */
-			if (wait_event_interruptible(tty_ldisc_wait, tty->ldisc.refcount == 0) < 0)
-				return -ERESTARTSYS;
-			goto restart;
-		}
-		if (o_tty && o_tty->ldisc.refcount) {
-			spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-			tty_ldisc_put(o_tty->ldisc.ops);
-			if (wait_event_interruptible(tty_ldisc_wait, o_tty->ldisc.refcount == 0) < 0)
-				return -ERESTARTSYS;
-			goto restart;
-		}
-	}
-	/*
-	 *	If the TTY_LDISC bit is set, then we are racing against
-	 *	another ldisc change
-	 */
-	if (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
-		struct tty_ldisc *ld;
-		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-		tty_ldisc_put(new_ldisc.ops);
-		ld = tty_ldisc_ref_wait(tty);
-		tty_ldisc_deref(ld);
-		goto restart;
-	}
-	/*
-	 *	This flag is used to avoid two parallel ldisc changes. Once
-	 *	open and close are fine grained locked this may work better
-	 *	as a mutex shared with the open/close/hup paths
-	 */
-	set_bit(TTY_LDISC_CHANGING, &tty->flags);
+	work = tty_ldisc_halt(tty);
 	if (o_tty)
-		set_bit(TTY_LDISC_CHANGING, &o_tty->flags);
-	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-	
+		o_work = tty_ldisc_halt(o_tty);
+
 	/*
-	 *	From this point on we know nobody has an ldisc
-	 *	usage reference, nor can they obtain one until
-	 *	we say so later on.
+	 * Wait for ->hangup_work and ->buf.work handlers to terminate.
+	 * We must drop the mutex here in case a hangup is also in process.
 	 */
 
-	work = cancel_delayed_work(&tty->buf.work);
-	/*
-	 * Wait for ->hangup_work and ->buf.work handlers to terminate
-	 * MUST NOT hold locks here.
-	 */
+	mutex_unlock(&tty->ldisc_mutex);
+
 	flush_scheduled_work();
+
+	/* Let any existing reference holders finish */
+	retval = tty_ldisc_wait_idle(tty);
+	if (retval < 0) {
+		clear_bit(TTY_LDISC_CHANGING, &tty->flags);
+		tty_ldisc_put(new_ldisc);
+		return retval;
+	}
+
+	mutex_lock(&tty->ldisc_mutex);
+	if (test_bit(TTY_HUPPED, &tty->flags)) {
+		/* We were raced by the hangup method. It will have stomped
+		   the ldisc data and closed the ldisc down */
+		clear_bit(TTY_LDISC_CHANGING, &tty->flags);
+		mutex_unlock(&tty->ldisc_mutex);
+		tty_ldisc_put(new_ldisc);
+		return -EIO;
+	}
+
 	/* Shutdown the current discipline. */
-	if (o_ldisc.ops->close)
-		(o_ldisc.ops->close)(tty);
+	tty_ldisc_close(tty, o_ldisc);
 
 	/* Now set up the new line discipline. */
-	tty_ldisc_assign(tty, &new_ldisc);
+	tty_ldisc_assign(tty, new_ldisc);
 	tty_set_termios_ldisc(tty, ldisc);
-	if (new_ldisc.ops->open)
-		retval = (new_ldisc.ops->open)(tty);
+
+	retval = tty_ldisc_open(tty, new_ldisc);
 	if (retval < 0) {
-		tty_ldisc_put(new_ldisc.ops);
-		tty_ldisc_restore(tty, &o_ldisc);
+		/* Back to the old one or N_TTY if we can't */
+		tty_ldisc_put(new_ldisc);
+		tty_ldisc_restore(tty, o_ldisc);
 	}
+
 	/* At this point we hold a reference to the new ldisc and a
 	   a reference to the old ldisc. If we ended up flipping back
 	   to the existing ldisc we have two references to it */
 
-	if (tty->ldisc.ops->num != o_ldisc.ops->num && tty->ops->set_ldisc)
+	if (tty->ldisc->ops->num != o_ldisc->ops->num && tty->ops->set_ldisc)
 		tty->ops->set_ldisc(tty);
 
-	tty_ldisc_put(o_ldisc.ops);
+	tty_ldisc_put(o_ldisc);
 
 	/*
-	 *	Allow ldisc referencing to occur as soon as the driver
-	 *	ldisc callback completes.
+	 *	Allow ldisc referencing to occur again
 	 */
 
 	tty_ldisc_enable(tty);
 	if (o_tty)
 		tty_ldisc_enable(o_tty);
 
-	/* Restart it in case no characters kick it off. Safe if
+	/* Restart the work queue in case no characters kick it off. Safe if
 	   already running */
 	if (work)
 		schedule_delayed_work(&tty->buf.work, 1);
+	if (o_work)
+		schedule_delayed_work(&o_tty->buf.work, 1);
+	mutex_unlock(&tty->ldisc_mutex);
 	return retval;
 }
 
+/**
+ *	tty_reset_termios	-	reset terminal state
+ *	@tty: tty to reset
+ *
+ *	Restore a terminal to the driver default state.
+ */
+
+static void tty_reset_termios(struct tty_struct *tty)
+{
+	mutex_lock(&tty->termios_mutex);
+	*tty->termios = tty->driver->init_termios;
+	tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios);
+	tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios);
+	mutex_unlock(&tty->termios_mutex);
+}
+
+
+/**
+ *	tty_ldisc_reinit	-	reinitialise the tty ldisc
+ *	@tty: tty to reinit
+ *
+ *	Switch the tty back to N_TTY line discipline and leave the
+ *	ldisc state closed
+ */
+
+static void tty_ldisc_reinit(struct tty_struct *tty)
+{
+	struct tty_ldisc *ld;
+
+	tty_ldisc_close(tty, tty->ldisc);
+	tty_ldisc_put(tty->ldisc);
+	tty->ldisc = NULL;
+	/*
+	 *	Switch the line discipline back
+	 */
+	ld = tty_ldisc_get(N_TTY);
+	BUG_ON(IS_ERR(ld));
+	tty_ldisc_assign(tty, ld);
+	tty_set_termios_ldisc(tty, N_TTY);
+}
+
+/**
+ *	tty_ldisc_hangup		-	hangup ldisc reset
+ *	@tty: tty being hung up
+ *
+ *	Some tty devices reset their termios when they receive a hangup
+ *	event. In that situation we must also switch back to N_TTY properly
+ *	before we reset the termios data.
+ *
+ *	Locking: We can take the ldisc mutex as the rest of the code is
+ *	careful to allow for this.
+ *
+ *	In the pty pair case this occurs in the close() path of the
+ *	tty itself so we must be careful about locking rules.
+ */
+
+void tty_ldisc_hangup(struct tty_struct *tty)
+{
+	struct tty_ldisc *ld;
+
+	/*
+	 * FIXME! What are the locking issues here? This may me overdoing
+	 * things... This question is especially important now that we've
+	 * removed the irqlock.
+	 */
+	ld = tty_ldisc_ref(tty);
+	if (ld != NULL) {
+		/* We may have no line discipline at this point */
+		if (ld->ops->flush_buffer)
+			ld->ops->flush_buffer(tty);
+		tty_driver_flush_buffer(tty);
+		if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
+		    ld->ops->write_wakeup)
+			ld->ops->write_wakeup(tty);
+		if (ld->ops->hangup)
+			ld->ops->hangup(tty);
+		tty_ldisc_deref(ld);
+	}
+	/*
+	 * FIXME: Once we trust the LDISC code better we can wait here for
+	 * ldisc completion and fix the driver call race
+	 */
+	wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
+	wake_up_interruptible_poll(&tty->read_wait, POLLIN);
+	/*
+	 * Shutdown the current line discipline, and reset it to
+	 * N_TTY.
+	 */
+	if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) {
+		/* Avoid racing set_ldisc */
+		mutex_lock(&tty->ldisc_mutex);
+		/* Switch back to N_TTY */
+		tty_ldisc_reinit(tty);
+		/* At this point we have a closed ldisc and we want to
+		   reopen it. We could defer this to the next open but
+		   it means auditing a lot of other paths so this is a FIXME */
+		WARN_ON(tty_ldisc_open(tty, tty->ldisc));
+		tty_ldisc_enable(tty);
+		mutex_unlock(&tty->ldisc_mutex);
+		tty_reset_termios(tty);
+	}
+}
 
 /**
  *	tty_ldisc_setup			-	open line discipline
@@ -610,24 +808,23 @@
  *	@o_tty: pair tty for pty/tty pairs
  *
  *	Called during the initial open of a tty/pty pair in order to set up the
- *	line discplines and bind them to the tty.
+ *	line disciplines and bind them to the tty. This has no locking issues
+ *	as the device isn't yet active.
  */
 
 int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
 {
-	struct tty_ldisc *ld = &tty->ldisc;
+	struct tty_ldisc *ld = tty->ldisc;
 	int retval;
 
-	if (ld->ops->open) {
-		retval = (ld->ops->open)(tty);
-		if (retval)
-			return retval;
-	}
-	if (o_tty && o_tty->ldisc.ops->open) {
-		retval = (o_tty->ldisc.ops->open)(o_tty);
+	retval = tty_ldisc_open(tty, ld);
+	if (retval)
+		return retval;
+
+	if (o_tty) {
+		retval = tty_ldisc_open(o_tty, o_tty->ldisc);
 		if (retval) {
-			if (ld->ops->close)
-				(ld->ops->close)(tty);
+			tty_ldisc_close(tty, ld);
 			return retval;
 		}
 		tty_ldisc_enable(o_tty);
@@ -635,32 +832,25 @@
 	tty_ldisc_enable(tty);
 	return 0;
 }
-
 /**
  *	tty_ldisc_release		-	release line discipline
  *	@tty: tty being shut down
  *	@o_tty: pair tty for pty/tty pairs
  *
- *	Called during the final close of a tty/pty pair in order to shut down the
- *	line discpline layer.
+ *	Called during the final close of a tty/pty pair in order to shut down
+ *	the line discpline layer. On exit the ldisc assigned is N_TTY and the
+ *	ldisc has not been opened.
  */
 
 void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
 {
-	unsigned long flags;
-	struct tty_ldisc ld;
 	/*
 	 * Prevent flush_to_ldisc() from rescheduling the work for later.  Then
 	 * kill any delayed work. As this is the final close it does not
 	 * race with the set_ldisc code path.
 	 */
-	clear_bit(TTY_LDISC, &tty->flags);
-	cancel_delayed_work(&tty->buf.work);
 
-	/*
-	 * Wait for ->hangup_work and ->buf.work handlers to terminate
-	 */
-
+	tty_ldisc_halt(tty);
 	flush_scheduled_work();
 
 	/*
@@ -668,38 +858,19 @@
 	 * side waiters as the file is closing so user count on the file
 	 * side is zero.
 	 */
-	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	while (tty->ldisc.refcount) {
-		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-		wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0);
-		spin_lock_irqsave(&tty_ldisc_lock, flags);
-	}
-	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+
+	tty_ldisc_wait_idle(tty);
+
 	/*
 	 * Shutdown the current line discipline, and reset it to N_TTY.
 	 *
 	 * FIXME: this MUST get fixed for the new reflocking
 	 */
-	if (tty->ldisc.ops->close)
-		(tty->ldisc.ops->close)(tty);
-	tty_ldisc_put(tty->ldisc.ops);
 
-	/*
-	 *	Switch the line discipline back
-	 */
-	WARN_ON(tty_ldisc_get(N_TTY, &ld));
-	tty_ldisc_assign(tty, &ld);
-	tty_set_termios_ldisc(tty, N_TTY);
-	if (o_tty) {
-		/* FIXME: could o_tty be in setldisc here ? */
-		clear_bit(TTY_LDISC, &o_tty->flags);
-		if (o_tty->ldisc.ops->close)
-			(o_tty->ldisc.ops->close)(o_tty);
-		tty_ldisc_put(o_tty->ldisc.ops);
-		WARN_ON(tty_ldisc_get(N_TTY, &ld));
-		tty_ldisc_assign(o_tty, &ld);
-		tty_set_termios_ldisc(o_tty, N_TTY);
-	}
+	tty_ldisc_reinit(tty);
+	/* This will need doing differently if we need to lock */
+	if (o_tty)
+		tty_ldisc_release(o_tty, NULL);
 }
 
 /**
@@ -712,10 +883,10 @@
 
 void tty_ldisc_init(struct tty_struct *tty)
 {
-	struct tty_ldisc ld;
-	if (tty_ldisc_get(N_TTY, &ld) < 0)
+	struct tty_ldisc *ld = tty_ldisc_get(N_TTY);
+	if (IS_ERR(ld))
 		panic("n_tty: init_tty");
-	tty_ldisc_assign(tty, &ld);
+	tty_ldisc_assign(tty, ld);
 }
 
 void tty_ldisc_begin(void)
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 9b8004c..62dadfc 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -137,7 +137,7 @@
 EXPORT_SYMBOL(tty_port_carrier_raised);
 
 /**
- *	tty_port_raise_dtr_rts	-	Riase DTR/RTS
+ *	tty_port_raise_dtr_rts	-	Raise DTR/RTS
  *	@port: tty port
  *
  *	Wrapper for the DTR/RTS raise logic. For the moment this is used
@@ -147,12 +147,28 @@
 
 void tty_port_raise_dtr_rts(struct tty_port *port)
 {
-	if (port->ops->raise_dtr_rts)
-		port->ops->raise_dtr_rts(port);
+	if (port->ops->dtr_rts)
+		port->ops->dtr_rts(port, 1);
 }
 EXPORT_SYMBOL(tty_port_raise_dtr_rts);
 
 /**
+ *	tty_port_lower_dtr_rts	-	Lower DTR/RTS
+ *	@port: tty port
+ *
+ *	Wrapper for the DTR/RTS raise logic. For the moment this is used
+ *	to hide some internal details. This will eventually become entirely
+ *	internal to the tty port.
+ */
+
+void tty_port_lower_dtr_rts(struct tty_port *port)
+{
+	if (port->ops->dtr_rts)
+		port->ops->dtr_rts(port, 0);
+}
+EXPORT_SYMBOL(tty_port_lower_dtr_rts);
+
+/**
  *	tty_port_block_til_ready	-	Waiting logic for tty open
  *	@port: the tty port being opened
  *	@tty: the tty device being bound
@@ -167,7 +183,7 @@
  *		- port flags and counts
  *
  *	The passed tty_port must implement the carrier_raised method if it can
- *	do carrier detect and the raise_dtr_rts method if it supports software
+ *	do carrier detect and the dtr_rts method if it supports software
  *	management of these lines. Note that the dtr/rts raise is done each
  *	iteration as a hangup may have previously dropped them while we wait.
  */
@@ -182,7 +198,8 @@
 
 	/* block if port is in the process of being closed */
 	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&port->close_wait);
+		wait_event_interruptible(port->close_wait,
+				!(port->flags & ASYNC_CLOSING));
 		if (port->flags & ASYNC_HUP_NOTIFY)
 			return -EAGAIN;
 		else
@@ -205,7 +222,6 @@
 	   before the next open may complete */
 
 	retval = 0;
-	add_wait_queue(&port->open_wait, &wait);
 
 	/* The port lock protects the port counts */
 	spin_lock_irqsave(&port->lock, flags);
@@ -219,7 +235,7 @@
 		if (tty->termios->c_cflag & CBAUD)
 			tty_port_raise_dtr_rts(port);
 
-		set_current_state(TASK_INTERRUPTIBLE);
+		prepare_to_wait(&port->open_wait, &wait, TASK_INTERRUPTIBLE);
 		/* Check for a hangup or uninitialised port. Return accordingly */
 		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
 			if (port->flags & ASYNC_HUP_NOTIFY)
@@ -240,8 +256,7 @@
 		}
 		schedule();
 	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->open_wait, &wait);
+	finish_wait(&port->open_wait, &wait);
 
 	/* Update counts. A parallel hangup will have set count to zero and
 	   we must not mess that up further */
@@ -292,6 +307,17 @@
 	if (port->flags & ASYNC_INITIALIZED &&
 			port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, port->closing_wait);
+	if (port->drain_delay) {
+		unsigned int bps = tty_get_baud_rate(tty);
+		long timeout;
+
+		if (bps > 1200)
+			timeout = max_t(long, (HZ * 10 * port->drain_delay) / bps,
+								HZ / 10);
+		else
+			timeout = 2 * HZ;
+		schedule_timeout_interruptible(timeout);
+	}
 	return 1;
 }
 EXPORT_SYMBOL(tty_port_close_start);
@@ -302,6 +328,9 @@
 
 	tty_ldisc_flush(tty);
 
+	if (tty->termios->c_cflag & HUPCL)
+		tty_port_lower_dtr_rts(port);
+
 	spin_lock_irqsave(&port->lock, flags);
 	tty->closing = 0;
 
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index ff6f5a4..c74dacf 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -188,6 +188,9 @@
  * Finally we put our input buffer in the input queue, ready to receive. */
 static int __devinit virtcons_probe(struct virtio_device *dev)
 {
+	vq_callback_t *callbacks[] = { hvc_handle_input, NULL};
+	const char *names[] = { "input", "output" };
+	struct virtqueue *vqs[2];
 	int err;
 
 	vdev = dev;
@@ -199,20 +202,15 @@
 		goto fail;
 	}
 
-	/* Find the input queue. */
+	/* Find the queues. */
 	/* FIXME: This is why we want to wean off hvc: we do nothing
 	 * when input comes in. */
-	in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input);
-	if (IS_ERR(in_vq)) {
-		err = PTR_ERR(in_vq);
+	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
+	if (err)
 		goto free;
-	}
 
-	out_vq = vdev->config->find_vq(vdev, 1, NULL);
-	if (IS_ERR(out_vq)) {
-		err = PTR_ERR(out_vq);
-		goto free_in_vq;
-	}
+	in_vq = vqs[0];
+	out_vq = vqs[1];
 
 	/* Start using the new console output. */
 	virtio_cons.get_chars = get_chars;
@@ -233,17 +231,15 @@
 	hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE);
 	if (IS_ERR(hvc)) {
 		err = PTR_ERR(hvc);
-		goto free_out_vq;
+		goto free_vqs;
 	}
 
 	/* Register the input buffer the first time. */
 	add_inbuf();
 	return 0;
 
-free_out_vq:
-	vdev->config->del_vq(out_vq);
-free_in_vq:
-	vdev->config->del_vq(in_vq);
+free_vqs:
+	vdev->config->del_vqs(vdev);
 free:
 	kfree(inbuf);
 fail:
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 08151d4..c796a86 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -95,7 +95,6 @@
 #include <linux/timer.h>
 #include <linux/interrupt.h>
 #include <linux/workqueue.h>
-#include <linux/bootmem.h>
 #include <linux/pm.h>
 #include <linux/font.h>
 #include <linux/bitops.h>
@@ -2875,14 +2874,11 @@
 		mod_timer(&console_timer, jiffies + blankinterval);
 	}
 
-	/*
-	 * kmalloc is not running yet - we use the bootmem allocator.
-	 */
 	for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) {
-		vc_cons[currcons].d = vc = alloc_bootmem(sizeof(struct vc_data));
+		vc_cons[currcons].d = vc = kzalloc(sizeof(struct vc_data), GFP_NOWAIT);
 		INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK);
 		visual_init(vc, currcons, 1);
-		vc->vc_screenbuf = (unsigned short *)alloc_bootmem(vc->vc_screenbuf_size);
+		vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_NOWAIT);
 		vc->vc_kmalloced = 0;
 		vc_init(vc, vc->vc_rows, vc->vc_cols,
 			currcons || !vc->vc_sw->con_save_screen);
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 1efb287..eef216f 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -3,3 +3,5 @@
 obj-$(CONFIG_X86_PM_TIMER)	+= acpi_pm.o
 obj-$(CONFIG_SCx200HR_TIMER)	+= scx200_hrt.o
 obj-$(CONFIG_SH_TIMER_CMT)	+= sh_cmt.o
+obj-$(CONFIG_SH_TIMER_MTU2)	+= sh_mtu2.o
+obj-$(CONFIG_SH_TIMER_TMU)	+= sh_tmu.o
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index 1c92c39..cf56a2a 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -18,7 +18,6 @@
  */
 
 #include <linux/init.h>
-#include <linux/bootmem.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
@@ -29,7 +28,7 @@
 #include <linux/err.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
 
 struct sh_cmt_priv {
 	void __iomem *mapbase;
@@ -47,6 +46,7 @@
 	unsigned long rate;
 	spinlock_t lock;
 	struct clock_event_device ced;
+	struct clocksource cs;
 	unsigned long total_cycles;
 };
 
@@ -59,7 +59,7 @@
 
 static inline unsigned long sh_cmt_read(struct sh_cmt_priv *p, int reg_nr)
 {
-	struct sh_cmt_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	void __iomem *base = p->mapbase;
 	unsigned long offs;
 
@@ -83,7 +83,7 @@
 static inline void sh_cmt_write(struct sh_cmt_priv *p, int reg_nr,
 				unsigned long value)
 {
-	struct sh_cmt_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	void __iomem *base = p->mapbase;
 	unsigned long offs;
 
@@ -110,23 +110,28 @@
 					int *has_wrapped)
 {
 	unsigned long v1, v2, v3;
+	int o1, o2;
+
+	o1 = sh_cmt_read(p, CMCSR) & p->overflow_bit;
 
 	/* Make sure the timer value is stable. Stolen from acpi_pm.c */
 	do {
+		o2 = o1;
 		v1 = sh_cmt_read(p, CMCNT);
 		v2 = sh_cmt_read(p, CMCNT);
 		v3 = sh_cmt_read(p, CMCNT);
-	} while (unlikely((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
-			  || (v3 > v1 && v3 < v2)));
+		o1 = sh_cmt_read(p, CMCSR) & p->overflow_bit;
+	} while (unlikely((o1 != o2) || (v1 > v2 && v1 < v3)
+			  || (v2 > v3 && v2 < v1) || (v3 > v1 && v3 < v2)));
 
-	*has_wrapped = sh_cmt_read(p, CMCSR) & p->overflow_bit;
+	*has_wrapped = o1;
 	return v2;
 }
 
 
 static void sh_cmt_start_stop_ch(struct sh_cmt_priv *p, int start)
 {
-	struct sh_cmt_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	unsigned long flags, value;
 
 	/* start stop register shared by multiple timer channels */
@@ -144,7 +149,7 @@
 
 static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate)
 {
-	struct sh_cmt_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	int ret;
 
 	/* enable clock */
@@ -153,16 +158,18 @@
 		pr_err("sh_cmt: cannot enable clock \"%s\"\n", cfg->clk);
 		return ret;
 	}
-	*rate = clk_get_rate(p->clk) / 8;
 
 	/* make sure channel is disabled */
 	sh_cmt_start_stop_ch(p, 0);
 
 	/* configure channel, periodic mode and maximum timeout */
-	if (p->width == 16)
-		sh_cmt_write(p, CMCSR, 0);
-	else
+	if (p->width == 16) {
+		*rate = clk_get_rate(p->clk) / 512;
+		sh_cmt_write(p, CMCSR, 0x43);
+	} else {
+		*rate = clk_get_rate(p->clk) / 8;
 		sh_cmt_write(p, CMCSR, 0x01a4);
+	}
 
 	sh_cmt_write(p, CMCOR, 0xffffffff);
 	sh_cmt_write(p, CMCNT, 0);
@@ -376,6 +383,68 @@
 	spin_unlock_irqrestore(&p->lock, flags);
 }
 
+static struct sh_cmt_priv *cs_to_sh_cmt(struct clocksource *cs)
+{
+	return container_of(cs, struct sh_cmt_priv, cs);
+}
+
+static cycle_t sh_cmt_clocksource_read(struct clocksource *cs)
+{
+	struct sh_cmt_priv *p = cs_to_sh_cmt(cs);
+	unsigned long flags, raw;
+	unsigned long value;
+	int has_wrapped;
+
+	spin_lock_irqsave(&p->lock, flags);
+	value = p->total_cycles;
+	raw = sh_cmt_get_counter(p, &has_wrapped);
+
+	if (unlikely(has_wrapped))
+		raw += p->match_value;
+	spin_unlock_irqrestore(&p->lock, flags);
+
+	return value + raw;
+}
+
+static int sh_cmt_clocksource_enable(struct clocksource *cs)
+{
+	struct sh_cmt_priv *p = cs_to_sh_cmt(cs);
+	int ret;
+
+	p->total_cycles = 0;
+
+	ret = sh_cmt_start(p, FLAG_CLOCKSOURCE);
+	if (ret)
+		return ret;
+
+	/* TODO: calculate good shift from rate and counter bit width */
+	cs->shift = 0;
+	cs->mult = clocksource_hz2mult(p->rate, cs->shift);
+	return 0;
+}
+
+static void sh_cmt_clocksource_disable(struct clocksource *cs)
+{
+	sh_cmt_stop(cs_to_sh_cmt(cs), FLAG_CLOCKSOURCE);
+}
+
+static int sh_cmt_register_clocksource(struct sh_cmt_priv *p,
+				       char *name, unsigned long rating)
+{
+	struct clocksource *cs = &p->cs;
+
+	cs->name = name;
+	cs->rating = rating;
+	cs->read = sh_cmt_clocksource_read;
+	cs->enable = sh_cmt_clocksource_enable;
+	cs->disable = sh_cmt_clocksource_disable;
+	cs->mask = CLOCKSOURCE_MASK(sizeof(unsigned long) * 8);
+	cs->flags = CLOCK_SOURCE_IS_CONTINUOUS;
+	pr_info("sh_cmt: %s used as clock source\n", cs->name);
+	clocksource_register(cs);
+	return 0;
+}
+
 static struct sh_cmt_priv *ced_to_sh_cmt(struct clock_event_device *ced)
 {
 	return container_of(ced, struct sh_cmt_priv, ced);
@@ -468,9 +537,9 @@
 	clockevents_register_device(ced);
 }
 
-int sh_cmt_register(struct sh_cmt_priv *p, char *name,
-		    unsigned long clockevent_rating,
-		    unsigned long clocksource_rating)
+static int sh_cmt_register(struct sh_cmt_priv *p, char *name,
+			   unsigned long clockevent_rating,
+			   unsigned long clocksource_rating)
 {
 	if (p->width == (sizeof(p->max_match_value) * 8))
 		p->max_match_value = ~0;
@@ -483,12 +552,15 @@
 	if (clockevent_rating)
 		sh_cmt_register_clockevent(p, name, clockevent_rating);
 
+	if (clocksource_rating)
+		sh_cmt_register_clocksource(p, name, clocksource_rating);
+
 	return 0;
 }
 
 static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev)
 {
-	struct sh_cmt_config *cfg = pdev->dev.platform_data;
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
 	struct resource *res;
 	int irq, ret;
 	ret = -ENXIO;
@@ -545,7 +617,7 @@
 	if (resource_size(res) == 6) {
 		p->width = 16;
 		p->overflow_bit = 0x80;
-		p->clear_bits = ~0xc0;
+		p->clear_bits = ~0x80;
 	} else {
 		p->width = 32;
 		p->overflow_bit = 0x8000;
@@ -566,8 +638,14 @@
 static int __devinit sh_cmt_probe(struct platform_device *pdev)
 {
 	struct sh_cmt_priv *p = platform_get_drvdata(pdev);
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
 	int ret;
 
+	if (p) {
+		pr_info("sh_cmt: %s kept as earlytimer\n", cfg->name);
+		return 0;
+	}
+
 	p = kmalloc(sizeof(*p), GFP_KERNEL);
 	if (p == NULL) {
 		dev_err(&pdev->dev, "failed to allocate driver data\n");
@@ -577,7 +655,6 @@
 	ret = sh_cmt_setup(p, pdev);
 	if (ret) {
 		kfree(p);
-
 		platform_set_drvdata(pdev, NULL);
 	}
 	return ret;
@@ -606,6 +683,7 @@
 	platform_driver_unregister(&sh_cmt_device_driver);
 }
 
+early_platform_init("earlytimer", &sh_cmt_device_driver);
 module_init(sh_cmt_init);
 module_exit(sh_cmt_exit);
 
diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c
new file mode 100644
index 0000000..d1ae754
--- /dev/null
+++ b/drivers/clocksource/sh_mtu2.c
@@ -0,0 +1,357 @@
+/*
+ * SuperH Timer Support - MTU2
+ *
+ *  Copyright (C) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/err.h>
+#include <linux/clockchips.h>
+#include <linux/sh_timer.h>
+
+struct sh_mtu2_priv {
+	void __iomem *mapbase;
+	struct clk *clk;
+	struct irqaction irqaction;
+	struct platform_device *pdev;
+	unsigned long rate;
+	unsigned long periodic;
+	struct clock_event_device ced;
+};
+
+static DEFINE_SPINLOCK(sh_mtu2_lock);
+
+#define TSTR -1 /* shared register */
+#define TCR  0 /* channel register */
+#define TMDR 1 /* channel register */
+#define TIOR 2 /* channel register */
+#define TIER 3 /* channel register */
+#define TSR  4 /* channel register */
+#define TCNT 5 /* channel register */
+#define TGR  6 /* channel register */
+
+static unsigned long mtu2_reg_offs[] = {
+	[TCR] = 0,
+	[TMDR] = 1,
+	[TIOR] = 2,
+	[TIER] = 4,
+	[TSR] = 5,
+	[TCNT] = 6,
+	[TGR] = 8,
+};
+
+static inline unsigned long sh_mtu2_read(struct sh_mtu2_priv *p, int reg_nr)
+{
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
+	void __iomem *base = p->mapbase;
+	unsigned long offs;
+
+	if (reg_nr == TSTR)
+		return ioread8(base + cfg->channel_offset);
+
+	offs = mtu2_reg_offs[reg_nr];
+
+	if ((reg_nr == TCNT) || (reg_nr == TGR))
+		return ioread16(base + offs);
+	else
+		return ioread8(base + offs);
+}
+
+static inline void sh_mtu2_write(struct sh_mtu2_priv *p, int reg_nr,
+				unsigned long value)
+{
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
+	void __iomem *base = p->mapbase;
+	unsigned long offs;
+
+	if (reg_nr == TSTR) {
+		iowrite8(value, base + cfg->channel_offset);
+		return;
+	}
+
+	offs = mtu2_reg_offs[reg_nr];
+
+	if ((reg_nr == TCNT) || (reg_nr == TGR))
+		iowrite16(value, base + offs);
+	else
+		iowrite8(value, base + offs);
+}
+
+static void sh_mtu2_start_stop_ch(struct sh_mtu2_priv *p, int start)
+{
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
+	unsigned long flags, value;
+
+	/* start stop register shared by multiple timer channels */
+	spin_lock_irqsave(&sh_mtu2_lock, flags);
+	value = sh_mtu2_read(p, TSTR);
+
+	if (start)
+		value |= 1 << cfg->timer_bit;
+	else
+		value &= ~(1 << cfg->timer_bit);
+
+	sh_mtu2_write(p, TSTR, value);
+	spin_unlock_irqrestore(&sh_mtu2_lock, flags);
+}
+
+static int sh_mtu2_enable(struct sh_mtu2_priv *p)
+{
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
+	int ret;
+
+	/* enable clock */
+	ret = clk_enable(p->clk);
+	if (ret) {
+		pr_err("sh_mtu2: cannot enable clock \"%s\"\n", cfg->clk);
+		return ret;
+	}
+
+	/* make sure channel is disabled */
+	sh_mtu2_start_stop_ch(p, 0);
+
+	p->rate = clk_get_rate(p->clk) / 64;
+	p->periodic = (p->rate + HZ/2) / HZ;
+
+	/* "Periodic Counter Operation" */
+	sh_mtu2_write(p, TCR, 0x23); /* TGRA clear, divide clock by 64 */
+	sh_mtu2_write(p, TIOR, 0);
+	sh_mtu2_write(p, TGR, p->periodic);
+	sh_mtu2_write(p, TCNT, 0);
+	sh_mtu2_write(p, TMDR, 0);
+	sh_mtu2_write(p, TIER, 0x01);
+
+	/* enable channel */
+	sh_mtu2_start_stop_ch(p, 1);
+
+	return 0;
+}
+
+static void sh_mtu2_disable(struct sh_mtu2_priv *p)
+{
+	/* disable channel */
+	sh_mtu2_start_stop_ch(p, 0);
+
+	/* stop clock */
+	clk_disable(p->clk);
+}
+
+static irqreturn_t sh_mtu2_interrupt(int irq, void *dev_id)
+{
+	struct sh_mtu2_priv *p = dev_id;
+
+	/* acknowledge interrupt */
+	sh_mtu2_read(p, TSR);
+	sh_mtu2_write(p, TSR, 0xfe);
+
+	/* notify clockevent layer */
+	p->ced.event_handler(&p->ced);
+	return IRQ_HANDLED;
+}
+
+static struct sh_mtu2_priv *ced_to_sh_mtu2(struct clock_event_device *ced)
+{
+	return container_of(ced, struct sh_mtu2_priv, ced);
+}
+
+static void sh_mtu2_clock_event_mode(enum clock_event_mode mode,
+				    struct clock_event_device *ced)
+{
+	struct sh_mtu2_priv *p = ced_to_sh_mtu2(ced);
+	int disabled = 0;
+
+	/* deal with old setting first */
+	switch (ced->mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		sh_mtu2_disable(p);
+		disabled = 1;
+		break;
+	default:
+		break;
+	}
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		pr_info("sh_mtu2: %s used for periodic clock events\n",
+			ced->name);
+		sh_mtu2_enable(p);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+		if (!disabled)
+			sh_mtu2_disable(p);
+		break;
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	default:
+		break;
+	}
+}
+
+static void sh_mtu2_register_clockevent(struct sh_mtu2_priv *p,
+				       char *name, unsigned long rating)
+{
+	struct clock_event_device *ced = &p->ced;
+	int ret;
+
+	memset(ced, 0, sizeof(*ced));
+
+	ced->name = name;
+	ced->features = CLOCK_EVT_FEAT_PERIODIC;
+	ced->rating = rating;
+	ced->cpumask = cpumask_of(0);
+	ced->set_mode = sh_mtu2_clock_event_mode;
+
+	ret = setup_irq(p->irqaction.irq, &p->irqaction);
+	if (ret) {
+		pr_err("sh_mtu2: failed to request irq %d\n",
+		       p->irqaction.irq);
+		return;
+	}
+
+	pr_info("sh_mtu2: %s used for clock events\n", ced->name);
+	clockevents_register_device(ced);
+}
+
+static int sh_mtu2_register(struct sh_mtu2_priv *p, char *name,
+			    unsigned long clockevent_rating)
+{
+	if (clockevent_rating)
+		sh_mtu2_register_clockevent(p, name, clockevent_rating);
+
+	return 0;
+}
+
+static int sh_mtu2_setup(struct sh_mtu2_priv *p, struct platform_device *pdev)
+{
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
+	struct resource *res;
+	int irq, ret;
+	ret = -ENXIO;
+
+	memset(p, 0, sizeof(*p));
+	p->pdev = pdev;
+
+	if (!cfg) {
+		dev_err(&p->pdev->dev, "missing platform data\n");
+		goto err0;
+	}
+
+	platform_set_drvdata(pdev, p);
+
+	res = platform_get_resource(p->pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&p->pdev->dev, "failed to get I/O memory\n");
+		goto err0;
+	}
+
+	irq = platform_get_irq(p->pdev, 0);
+	if (irq < 0) {
+		dev_err(&p->pdev->dev, "failed to get irq\n");
+		goto err0;
+	}
+
+	/* map memory, let mapbase point to our channel */
+	p->mapbase = ioremap_nocache(res->start, resource_size(res));
+	if (p->mapbase == NULL) {
+		pr_err("sh_mtu2: failed to remap I/O memory\n");
+		goto err0;
+	}
+
+	/* setup data for setup_irq() (too early for request_irq()) */
+	p->irqaction.name = cfg->name;
+	p->irqaction.handler = sh_mtu2_interrupt;
+	p->irqaction.dev_id = p;
+	p->irqaction.irq = irq;
+	p->irqaction.flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL;
+	p->irqaction.mask = CPU_MASK_NONE;
+
+	/* get hold of clock */
+	p->clk = clk_get(&p->pdev->dev, cfg->clk);
+	if (IS_ERR(p->clk)) {
+		pr_err("sh_mtu2: cannot get clock \"%s\"\n", cfg->clk);
+		ret = PTR_ERR(p->clk);
+		goto err1;
+	}
+
+	return sh_mtu2_register(p, cfg->name, cfg->clockevent_rating);
+ err1:
+	iounmap(p->mapbase);
+ err0:
+	return ret;
+}
+
+static int __devinit sh_mtu2_probe(struct platform_device *pdev)
+{
+	struct sh_mtu2_priv *p = platform_get_drvdata(pdev);
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
+	int ret;
+
+	if (p) {
+		pr_info("sh_mtu2: %s kept as earlytimer\n", cfg->name);
+		return 0;
+	}
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL) {
+		dev_err(&pdev->dev, "failed to allocate driver data\n");
+		return -ENOMEM;
+	}
+
+	ret = sh_mtu2_setup(p, pdev);
+	if (ret) {
+		kfree(p);
+		platform_set_drvdata(pdev, NULL);
+	}
+	return ret;
+}
+
+static int __devexit sh_mtu2_remove(struct platform_device *pdev)
+{
+	return -EBUSY; /* cannot unregister clockevent */
+}
+
+static struct platform_driver sh_mtu2_device_driver = {
+	.probe		= sh_mtu2_probe,
+	.remove		= __devexit_p(sh_mtu2_remove),
+	.driver		= {
+		.name	= "sh_mtu2",
+	}
+};
+
+static int __init sh_mtu2_init(void)
+{
+	return platform_driver_register(&sh_mtu2_device_driver);
+}
+
+static void __exit sh_mtu2_exit(void)
+{
+	platform_driver_unregister(&sh_mtu2_device_driver);
+}
+
+early_platform_init("earlytimer", &sh_mtu2_device_driver);
+module_init(sh_mtu2_init);
+module_exit(sh_mtu2_exit);
+
+MODULE_AUTHOR("Magnus Damm");
+MODULE_DESCRIPTION("SuperH MTU2 Timer Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
new file mode 100644
index 0000000..d6ea439
--- /dev/null
+++ b/drivers/clocksource/sh_tmu.c
@@ -0,0 +1,461 @@
+/*
+ * SuperH Timer Support - TMU
+ *
+ *  Copyright (C) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/err.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/sh_timer.h>
+
+struct sh_tmu_priv {
+	void __iomem *mapbase;
+	struct clk *clk;
+	struct irqaction irqaction;
+	struct platform_device *pdev;
+	unsigned long rate;
+	unsigned long periodic;
+	struct clock_event_device ced;
+	struct clocksource cs;
+};
+
+static DEFINE_SPINLOCK(sh_tmu_lock);
+
+#define TSTR -1 /* shared register */
+#define TCOR  0 /* channel register */
+#define TCNT 1 /* channel register */
+#define TCR 2 /* channel register */
+
+static inline unsigned long sh_tmu_read(struct sh_tmu_priv *p, int reg_nr)
+{
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
+	void __iomem *base = p->mapbase;
+	unsigned long offs;
+
+	if (reg_nr == TSTR)
+		return ioread8(base - cfg->channel_offset);
+
+	offs = reg_nr << 2;
+
+	if (reg_nr == TCR)
+		return ioread16(base + offs);
+	else
+		return ioread32(base + offs);
+}
+
+static inline void sh_tmu_write(struct sh_tmu_priv *p, int reg_nr,
+				unsigned long value)
+{
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
+	void __iomem *base = p->mapbase;
+	unsigned long offs;
+
+	if (reg_nr == TSTR) {
+		iowrite8(value, base - cfg->channel_offset);
+		return;
+	}
+
+	offs = reg_nr << 2;
+
+	if (reg_nr == TCR)
+		iowrite16(value, base + offs);
+	else
+		iowrite32(value, base + offs);
+}
+
+static void sh_tmu_start_stop_ch(struct sh_tmu_priv *p, int start)
+{
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
+	unsigned long flags, value;
+
+	/* start stop register shared by multiple timer channels */
+	spin_lock_irqsave(&sh_tmu_lock, flags);
+	value = sh_tmu_read(p, TSTR);
+
+	if (start)
+		value |= 1 << cfg->timer_bit;
+	else
+		value &= ~(1 << cfg->timer_bit);
+
+	sh_tmu_write(p, TSTR, value);
+	spin_unlock_irqrestore(&sh_tmu_lock, flags);
+}
+
+static int sh_tmu_enable(struct sh_tmu_priv *p)
+{
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
+	int ret;
+
+	/* enable clock */
+	ret = clk_enable(p->clk);
+	if (ret) {
+		pr_err("sh_tmu: cannot enable clock \"%s\"\n", cfg->clk);
+		return ret;
+	}
+
+	/* make sure channel is disabled */
+	sh_tmu_start_stop_ch(p, 0);
+
+	/* maximum timeout */
+	sh_tmu_write(p, TCOR, 0xffffffff);
+	sh_tmu_write(p, TCNT, 0xffffffff);
+
+	/* configure channel to parent clock / 4, irq off */
+	p->rate = clk_get_rate(p->clk) / 4;
+	sh_tmu_write(p, TCR, 0x0000);
+
+	/* enable channel */
+	sh_tmu_start_stop_ch(p, 1);
+
+	return 0;
+}
+
+static void sh_tmu_disable(struct sh_tmu_priv *p)
+{
+	/* disable channel */
+	sh_tmu_start_stop_ch(p, 0);
+
+	/* stop clock */
+	clk_disable(p->clk);
+}
+
+static void sh_tmu_set_next(struct sh_tmu_priv *p, unsigned long delta,
+			    int periodic)
+{
+	/* stop timer */
+	sh_tmu_start_stop_ch(p, 0);
+
+	/* acknowledge interrupt */
+	sh_tmu_read(p, TCR);
+
+	/* enable interrupt */
+	sh_tmu_write(p, TCR, 0x0020);
+
+	/* reload delta value in case of periodic timer */
+	if (periodic)
+		sh_tmu_write(p, TCOR, delta);
+	else
+		sh_tmu_write(p, TCOR, 0);
+
+	sh_tmu_write(p, TCNT, delta);
+
+	/* start timer */
+	sh_tmu_start_stop_ch(p, 1);
+}
+
+static irqreturn_t sh_tmu_interrupt(int irq, void *dev_id)
+{
+	struct sh_tmu_priv *p = dev_id;
+
+	/* disable or acknowledge interrupt */
+	if (p->ced.mode == CLOCK_EVT_MODE_ONESHOT)
+		sh_tmu_write(p, TCR, 0x0000);
+	else
+		sh_tmu_write(p, TCR, 0x0020);
+
+	/* notify clockevent layer */
+	p->ced.event_handler(&p->ced);
+	return IRQ_HANDLED;
+}
+
+static struct sh_tmu_priv *cs_to_sh_tmu(struct clocksource *cs)
+{
+	return container_of(cs, struct sh_tmu_priv, cs);
+}
+
+static cycle_t sh_tmu_clocksource_read(struct clocksource *cs)
+{
+	struct sh_tmu_priv *p = cs_to_sh_tmu(cs);
+
+	return sh_tmu_read(p, TCNT) ^ 0xffffffff;
+}
+
+static int sh_tmu_clocksource_enable(struct clocksource *cs)
+{
+	struct sh_tmu_priv *p = cs_to_sh_tmu(cs);
+	int ret;
+
+	ret = sh_tmu_enable(p);
+	if (ret)
+		return ret;
+
+	/* TODO: calculate good shift from rate and counter bit width */
+	cs->shift = 10;
+	cs->mult = clocksource_hz2mult(p->rate, cs->shift);
+	return 0;
+}
+
+static void sh_tmu_clocksource_disable(struct clocksource *cs)
+{
+	sh_tmu_disable(cs_to_sh_tmu(cs));
+}
+
+static int sh_tmu_register_clocksource(struct sh_tmu_priv *p,
+				       char *name, unsigned long rating)
+{
+	struct clocksource *cs = &p->cs;
+
+	cs->name = name;
+	cs->rating = rating;
+	cs->read = sh_tmu_clocksource_read;
+	cs->enable = sh_tmu_clocksource_enable;
+	cs->disable = sh_tmu_clocksource_disable;
+	cs->mask = CLOCKSOURCE_MASK(32);
+	cs->flags = CLOCK_SOURCE_IS_CONTINUOUS;
+	pr_info("sh_tmu: %s used as clock source\n", cs->name);
+	clocksource_register(cs);
+	return 0;
+}
+
+static struct sh_tmu_priv *ced_to_sh_tmu(struct clock_event_device *ced)
+{
+	return container_of(ced, struct sh_tmu_priv, ced);
+}
+
+static void sh_tmu_clock_event_start(struct sh_tmu_priv *p, int periodic)
+{
+	struct clock_event_device *ced = &p->ced;
+
+	sh_tmu_enable(p);
+
+	/* TODO: calculate good shift from rate and counter bit width */
+
+	ced->shift = 32;
+	ced->mult = div_sc(p->rate, NSEC_PER_SEC, ced->shift);
+	ced->max_delta_ns = clockevent_delta2ns(0xffffffff, ced);
+	ced->min_delta_ns = 5000;
+
+	if (periodic) {
+		p->periodic = (p->rate + HZ/2) / HZ;
+		sh_tmu_set_next(p, p->periodic, 1);
+	}
+}
+
+static void sh_tmu_clock_event_mode(enum clock_event_mode mode,
+				    struct clock_event_device *ced)
+{
+	struct sh_tmu_priv *p = ced_to_sh_tmu(ced);
+	int disabled = 0;
+
+	/* deal with old setting first */
+	switch (ced->mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+	case CLOCK_EVT_MODE_ONESHOT:
+		sh_tmu_disable(p);
+		disabled = 1;
+		break;
+	default:
+		break;
+	}
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		pr_info("sh_tmu: %s used for periodic clock events\n",
+			ced->name);
+		sh_tmu_clock_event_start(p, 1);
+		break;
+	case CLOCK_EVT_MODE_ONESHOT:
+		pr_info("sh_tmu: %s used for oneshot clock events\n",
+			ced->name);
+		sh_tmu_clock_event_start(p, 0);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+		if (!disabled)
+			sh_tmu_disable(p);
+		break;
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	default:
+		break;
+	}
+}
+
+static int sh_tmu_clock_event_next(unsigned long delta,
+				   struct clock_event_device *ced)
+{
+	struct sh_tmu_priv *p = ced_to_sh_tmu(ced);
+
+	BUG_ON(ced->mode != CLOCK_EVT_MODE_ONESHOT);
+
+	/* program new delta value */
+	sh_tmu_set_next(p, delta, 0);
+	return 0;
+}
+
+static void sh_tmu_register_clockevent(struct sh_tmu_priv *p,
+				       char *name, unsigned long rating)
+{
+	struct clock_event_device *ced = &p->ced;
+	int ret;
+
+	memset(ced, 0, sizeof(*ced));
+
+	ced->name = name;
+	ced->features = CLOCK_EVT_FEAT_PERIODIC;
+	ced->features |= CLOCK_EVT_FEAT_ONESHOT;
+	ced->rating = rating;
+	ced->cpumask = cpumask_of(0);
+	ced->set_next_event = sh_tmu_clock_event_next;
+	ced->set_mode = sh_tmu_clock_event_mode;
+
+	ret = setup_irq(p->irqaction.irq, &p->irqaction);
+	if (ret) {
+		pr_err("sh_tmu: failed to request irq %d\n",
+		       p->irqaction.irq);
+		return;
+	}
+
+	pr_info("sh_tmu: %s used for clock events\n", ced->name);
+	clockevents_register_device(ced);
+}
+
+static int sh_tmu_register(struct sh_tmu_priv *p, char *name,
+		    unsigned long clockevent_rating,
+		    unsigned long clocksource_rating)
+{
+	if (clockevent_rating)
+		sh_tmu_register_clockevent(p, name, clockevent_rating);
+	else if (clocksource_rating)
+		sh_tmu_register_clocksource(p, name, clocksource_rating);
+
+	return 0;
+}
+
+static int sh_tmu_setup(struct sh_tmu_priv *p, struct platform_device *pdev)
+{
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
+	struct resource *res;
+	int irq, ret;
+	ret = -ENXIO;
+
+	memset(p, 0, sizeof(*p));
+	p->pdev = pdev;
+
+	if (!cfg) {
+		dev_err(&p->pdev->dev, "missing platform data\n");
+		goto err0;
+	}
+
+	platform_set_drvdata(pdev, p);
+
+	res = platform_get_resource(p->pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&p->pdev->dev, "failed to get I/O memory\n");
+		goto err0;
+	}
+
+	irq = platform_get_irq(p->pdev, 0);
+	if (irq < 0) {
+		dev_err(&p->pdev->dev, "failed to get irq\n");
+		goto err0;
+	}
+
+	/* map memory, let mapbase point to our channel */
+	p->mapbase = ioremap_nocache(res->start, resource_size(res));
+	if (p->mapbase == NULL) {
+		pr_err("sh_tmu: failed to remap I/O memory\n");
+		goto err0;
+	}
+
+	/* setup data for setup_irq() (too early for request_irq()) */
+	p->irqaction.name = cfg->name;
+	p->irqaction.handler = sh_tmu_interrupt;
+	p->irqaction.dev_id = p;
+	p->irqaction.irq = irq;
+	p->irqaction.flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL;
+	p->irqaction.mask = CPU_MASK_NONE;
+
+	/* get hold of clock */
+	p->clk = clk_get(&p->pdev->dev, cfg->clk);
+	if (IS_ERR(p->clk)) {
+		pr_err("sh_tmu: cannot get clock \"%s\"\n", cfg->clk);
+		ret = PTR_ERR(p->clk);
+		goto err1;
+	}
+
+	return sh_tmu_register(p, cfg->name,
+			       cfg->clockevent_rating,
+			       cfg->clocksource_rating);
+ err1:
+	iounmap(p->mapbase);
+ err0:
+	return ret;
+}
+
+static int __devinit sh_tmu_probe(struct platform_device *pdev)
+{
+	struct sh_tmu_priv *p = platform_get_drvdata(pdev);
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
+	int ret;
+
+	if (p) {
+		pr_info("sh_tmu: %s kept as earlytimer\n", cfg->name);
+		return 0;
+	}
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL) {
+		dev_err(&pdev->dev, "failed to allocate driver data\n");
+		return -ENOMEM;
+	}
+
+	ret = sh_tmu_setup(p, pdev);
+	if (ret) {
+		kfree(p);
+		platform_set_drvdata(pdev, NULL);
+	}
+	return ret;
+}
+
+static int __devexit sh_tmu_remove(struct platform_device *pdev)
+{
+	return -EBUSY; /* cannot unregister clockevent and clocksource */
+}
+
+static struct platform_driver sh_tmu_device_driver = {
+	.probe		= sh_tmu_probe,
+	.remove		= __devexit_p(sh_tmu_remove),
+	.driver		= {
+		.name	= "sh_tmu",
+	}
+};
+
+static int __init sh_tmu_init(void)
+{
+	return platform_driver_register(&sh_tmu_device_driver);
+}
+
+static void __exit sh_tmu_exit(void)
+{
+	platform_driver_unregister(&sh_tmu_device_driver);
+}
+
+early_platform_init("earlytimer", &sh_tmu_device_driver);
+module_init(sh_tmu_init);
+module_exit(sh_tmu_exit);
+
+MODULE_AUTHOR("Magnus Damm");
+MODULE_DESCRIPTION("SuperH TMU Timer Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 47d2ad0..6e2ec0b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -808,7 +808,7 @@
 		ret = -ENOMEM;
 		goto nomem_out;
 	}
-	if (!alloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) {
+	if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) {
 		free_cpumask_var(policy->cpus);
 		kfree(policy);
 		ret = -ENOMEM;
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 956982f..ab4f359 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -49,7 +49,6 @@
 
 config EDAC_MM_EDAC
 	tristate "Main Memory EDAC (Error Detection And Correction) reporting"
-	default y
 	help
 	  Some systems are able to detect and correct errors in main
 	  memory.  EDAC can report statistics on memory error
@@ -58,6 +57,31 @@
 	  occurred so that a particular failing memory module can be
 	  replaced.  If unsure, select 'Y'.
 
+config EDAC_AMD64
+	tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
+	depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI
+	help
+	  Support for error detection and correction on the AMD 64
+	  Families of Memory Controllers (K8, F10h and F11h)
+
+config EDAC_AMD64_ERROR_INJECTION
+	bool "Sysfs Error Injection facilities"
+	depends on EDAC_AMD64
+	help
+	  Recent Opterons (Family 10h and later) provide for Memory Error
+	  Injection into the ECC detection circuits. The amd64_edac module
+	  allows the operator/user to inject Uncorrectable and Correctable
+	  errors into DRAM.
+
+	  When enabled, in each of the respective memory controller directories
+	  (/sys/devices/system/edac/mc/mcX), there are 3 input files:
+
+	  - inject_section (0..3, 16-byte section of 64-byte cacheline),
+	  - inject_word (0..8, 16-bit word of 16-byte section),
+	  - inject_ecc_vector (hex ecc vector: select bits of inject word)
+
+	  In addition, there are two control files, inject_read and inject_write,
+	  which trigger the DRAM ECC Read and Write respectively.
 
 config EDAC_AMD76X
 	tristate "AMD 76x (760, 762, 768)"
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 5907681..633dc56 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -30,6 +30,13 @@
 obj-$(CONFIG_EDAC_X38)			+= x38_edac.o
 obj-$(CONFIG_EDAC_I82860)		+= i82860_edac.o
 obj-$(CONFIG_EDAC_R82600)		+= r82600_edac.o
+
+amd64_edac_mod-y :=  amd64_edac_err_types.o amd64_edac.o
+amd64_edac_mod-$(CONFIG_EDAC_DEBUG) += amd64_edac_dbg.o
+amd64_edac_mod-$(CONFIG_EDAC_AMD64_ERROR_INJECTION) += amd64_edac_inj.o
+
+obj-$(CONFIG_EDAC_AMD64)		+= amd64_edac_mod.o
+
 obj-$(CONFIG_EDAC_PASEMI)		+= pasemi_edac.o
 obj-$(CONFIG_EDAC_MPC85XX)		+= mpc85xx_edac.o
 obj-$(CONFIG_EDAC_MV64X60)		+= mv64x60_edac.o
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
new file mode 100644
index 0000000..c36bf40
--- /dev/null
+++ b/drivers/edac/amd64_edac.c
@@ -0,0 +1,3354 @@
+#include "amd64_edac.h"
+#include <asm/k8.h>
+
+static struct edac_pci_ctl_info *amd64_ctl_pci;
+
+static int report_gart_errors;
+module_param(report_gart_errors, int, 0644);
+
+/*
+ * Set by command line parameter. If BIOS has enabled the ECC, this override is
+ * cleared to prevent re-enabling the hardware by this driver.
+ */
+static int ecc_enable_override;
+module_param(ecc_enable_override, int, 0644);
+
+/* Lookup table for all possible MC control instances */
+struct amd64_pvt;
+static struct mem_ctl_info *mci_lookup[MAX_NUMNODES];
+static struct amd64_pvt *pvt_lookup[MAX_NUMNODES];
+
+/*
+ * Memory scrubber control interface. For K8, memory scrubbing is handled by
+ * hardware and can involve L2 cache, dcache as well as the main memory. With
+ * F10, this is extended to L3 cache scrubbing on CPU models sporting that
+ * functionality.
+ *
+ * This causes the "units" for the scrubbing speed to vary from 64 byte blocks
+ * (dram) over to cache lines. This is nasty, so we will use bandwidth in
+ * bytes/sec for the setting.
+ *
+ * Currently, we only do dram scrubbing. If the scrubbing is done in software on
+ * other archs, we might not have access to the caches directly.
+ */
+
+/*
+ * scan the scrub rate mapping table for a close or matching bandwidth value to
+ * issue. If requested is too big, then use last maximum value found.
+ */
+static int amd64_search_set_scrub_rate(struct pci_dev *ctl, u32 new_bw,
+				       u32 min_scrubrate)
+{
+	u32 scrubval;
+	int i;
+
+	/*
+	 * map the configured rate (new_bw) to a value specific to the AMD64
+	 * memory controller and apply to register. Search for the first
+	 * bandwidth entry that is greater or equal than the setting requested
+	 * and program that. If at last entry, turn off DRAM scrubbing.
+	 */
+	for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
+		/*
+		 * skip scrub rates which aren't recommended
+		 * (see F10 BKDG, F3x58)
+		 */
+		if (scrubrates[i].scrubval < min_scrubrate)
+			continue;
+
+		if (scrubrates[i].bandwidth <= new_bw)
+			break;
+
+		/*
+		 * if no suitable bandwidth found, turn off DRAM scrubbing
+		 * entirely by falling back to the last element in the
+		 * scrubrates array.
+		 */
+	}
+
+	scrubval = scrubrates[i].scrubval;
+	if (scrubval)
+		edac_printk(KERN_DEBUG, EDAC_MC,
+			    "Setting scrub rate bandwidth: %u\n",
+			    scrubrates[i].bandwidth);
+	else
+		edac_printk(KERN_DEBUG, EDAC_MC, "Turning scrubbing off.\n");
+
+	pci_write_bits32(ctl, K8_SCRCTRL, scrubval, 0x001F);
+
+	return 0;
+}
+
+static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 *bandwidth)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u32 min_scrubrate = 0x0;
+
+	switch (boot_cpu_data.x86) {
+	case 0xf:
+		min_scrubrate = K8_MIN_SCRUB_RATE_BITS;
+		break;
+	case 0x10:
+		min_scrubrate = F10_MIN_SCRUB_RATE_BITS;
+		break;
+	case 0x11:
+		min_scrubrate = F11_MIN_SCRUB_RATE_BITS;
+		break;
+
+	default:
+		amd64_printk(KERN_ERR, "Unsupported family!\n");
+		break;
+	}
+	return amd64_search_set_scrub_rate(pvt->misc_f3_ctl, *bandwidth,
+			min_scrubrate);
+}
+
+static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u32 scrubval = 0;
+	int status = -1, i, ret = 0;
+
+	ret = pci_read_config_dword(pvt->misc_f3_ctl, K8_SCRCTRL, &scrubval);
+	if (ret)
+		debugf0("Reading K8_SCRCTRL failed\n");
+
+	scrubval = scrubval & 0x001F;
+
+	edac_printk(KERN_DEBUG, EDAC_MC,
+		    "pci-read, sdram scrub control value: %d \n", scrubval);
+
+	for (i = 0; ARRAY_SIZE(scrubrates); i++) {
+		if (scrubrates[i].scrubval == scrubval) {
+			*bw = scrubrates[i].bandwidth;
+			status = 0;
+			break;
+		}
+	}
+
+	return status;
+}
+
+/* Map from a CSROW entry to the mask entry that operates on it */
+static inline u32 amd64_map_to_dcs_mask(struct amd64_pvt *pvt, int csrow)
+{
+	return csrow >> (pvt->num_dcsm >> 3);
+}
+
+/* return the 'base' address the i'th CS entry of the 'dct' DRAM controller */
+static u32 amd64_get_dct_base(struct amd64_pvt *pvt, int dct, int csrow)
+{
+	if (dct == 0)
+		return pvt->dcsb0[csrow];
+	else
+		return pvt->dcsb1[csrow];
+}
+
+/*
+ * Return the 'mask' address the i'th CS entry. This function is needed because
+ * there number of DCSM registers on Rev E and prior vs Rev F and later is
+ * different.
+ */
+static u32 amd64_get_dct_mask(struct amd64_pvt *pvt, int dct, int csrow)
+{
+	if (dct == 0)
+		return pvt->dcsm0[amd64_map_to_dcs_mask(pvt, csrow)];
+	else
+		return pvt->dcsm1[amd64_map_to_dcs_mask(pvt, csrow)];
+}
+
+
+/*
+ * In *base and *limit, pass back the full 40-bit base and limit physical
+ * addresses for the node given by node_id.  This information is obtained from
+ * DRAM Base (section 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers. The
+ * base and limit addresses are of type SysAddr, as defined at the start of
+ * section 3.4.4 (p. 70).  They are the lowest and highest physical addresses
+ * in the address range they represent.
+ */
+static void amd64_get_base_and_limit(struct amd64_pvt *pvt, int node_id,
+			       u64 *base, u64 *limit)
+{
+	*base = pvt->dram_base[node_id];
+	*limit = pvt->dram_limit[node_id];
+}
+
+/*
+ * Return 1 if the SysAddr given by sys_addr matches the base/limit associated
+ * with node_id
+ */
+static int amd64_base_limit_match(struct amd64_pvt *pvt,
+					u64 sys_addr, int node_id)
+{
+	u64 base, limit, addr;
+
+	amd64_get_base_and_limit(pvt, node_id, &base, &limit);
+
+	/* The K8 treats this as a 40-bit value.  However, bits 63-40 will be
+	 * all ones if the most significant implemented address bit is 1.
+	 * Here we discard bits 63-40.  See section 3.4.2 of AMD publication
+	 * 24592: AMD x86-64 Architecture Programmer's Manual Volume 1
+	 * Application Programming.
+	 */
+	addr = sys_addr & 0x000000ffffffffffull;
+
+	return (addr >= base) && (addr <= limit);
+}
+
+/*
+ * Attempt to map a SysAddr to a node. On success, return a pointer to the
+ * mem_ctl_info structure for the node that the SysAddr maps to.
+ *
+ * On failure, return NULL.
+ */
+static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
+						u64 sys_addr)
+{
+	struct amd64_pvt *pvt;
+	int node_id;
+	u32 intlv_en, bits;
+
+	/*
+	 * Here we use the DRAM Base (section 3.4.4.1) and DRAM Limit (section
+	 * 3.4.4.2) registers to map the SysAddr to a node ID.
+	 */
+	pvt = mci->pvt_info;
+
+	/*
+	 * The value of this field should be the same for all DRAM Base
+	 * registers.  Therefore we arbitrarily choose to read it from the
+	 * register for node 0.
+	 */
+	intlv_en = pvt->dram_IntlvEn[0];
+
+	if (intlv_en == 0) {
+		for (node_id = 0; ; ) {
+			if (amd64_base_limit_match(pvt, sys_addr, node_id))
+				break;
+
+			if (++node_id >= DRAM_REG_COUNT)
+				goto err_no_match;
+		}
+		goto found;
+	}
+
+	if (unlikely((intlv_en != (0x01 << 8)) &&
+		     (intlv_en != (0x03 << 8)) &&
+		     (intlv_en != (0x07 << 8)))) {
+		amd64_printk(KERN_WARNING, "junk value of 0x%x extracted from "
+			     "IntlvEn field of DRAM Base Register for node 0: "
+			     "This probably indicates a BIOS bug.\n", intlv_en);
+		return NULL;
+	}
+
+	bits = (((u32) sys_addr) >> 12) & intlv_en;
+
+	for (node_id = 0; ; ) {
+		if ((pvt->dram_limit[node_id] & intlv_en) == bits)
+			break;	/* intlv_sel field matches */
+
+		if (++node_id >= DRAM_REG_COUNT)
+			goto err_no_match;
+	}
+
+	/* sanity test for sys_addr */
+	if (unlikely(!amd64_base_limit_match(pvt, sys_addr, node_id))) {
+		amd64_printk(KERN_WARNING,
+			  "%s(): sys_addr 0x%lx falls outside base/limit "
+			  "address range for node %d with node interleaving "
+			  "enabled.\n", __func__, (unsigned long)sys_addr,
+			  node_id);
+		return NULL;
+	}
+
+found:
+	return edac_mc_find(node_id);
+
+err_no_match:
+	debugf2("sys_addr 0x%lx doesn't match any node\n",
+		(unsigned long)sys_addr);
+
+	return NULL;
+}
+
+/*
+ * Extract the DRAM CS base address from selected csrow register.
+ */
+static u64 base_from_dct_base(struct amd64_pvt *pvt, int csrow)
+{
+	return ((u64) (amd64_get_dct_base(pvt, 0, csrow) & pvt->dcsb_base)) <<
+				pvt->dcs_shift;
+}
+
+/*
+ * Extract the mask from the dcsb0[csrow] entry in a CPU revision-specific way.
+ */
+static u64 mask_from_dct_mask(struct amd64_pvt *pvt, int csrow)
+{
+	u64 dcsm_bits, other_bits;
+	u64 mask;
+
+	/* Extract bits from DRAM CS Mask. */
+	dcsm_bits = amd64_get_dct_mask(pvt, 0, csrow) & pvt->dcsm_mask;
+
+	other_bits = pvt->dcsm_mask;
+	other_bits = ~(other_bits << pvt->dcs_shift);
+
+	/*
+	 * The extracted bits from DCSM belong in the spaces represented by
+	 * the cleared bits in other_bits.
+	 */
+	mask = (dcsm_bits << pvt->dcs_shift) | other_bits;
+
+	return mask;
+}
+
+/*
+ * @input_addr is an InputAddr associated with the node given by mci. Return the
+ * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
+ */
+static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
+{
+	struct amd64_pvt *pvt;
+	int csrow;
+	u64 base, mask;
+
+	pvt = mci->pvt_info;
+
+	/*
+	 * Here we use the DRAM CS Base and DRAM CS Mask registers. For each CS
+	 * base/mask register pair, test the condition shown near the start of
+	 * section 3.5.4 (p. 84, BKDG #26094, K8, revA-E).
+	 */
+	for (csrow = 0; csrow < CHIPSELECT_COUNT; csrow++) {
+
+		/* This DRAM chip select is disabled on this node */
+		if ((pvt->dcsb0[csrow] & K8_DCSB_CS_ENABLE) == 0)
+			continue;
+
+		base = base_from_dct_base(pvt, csrow);
+		mask = ~mask_from_dct_mask(pvt, csrow);
+
+		if ((input_addr & mask) == (base & mask)) {
+			debugf2("InputAddr 0x%lx matches csrow %d (node %d)\n",
+				(unsigned long)input_addr, csrow,
+				pvt->mc_node_id);
+
+			return csrow;
+		}
+	}
+
+	debugf2("no matching csrow for InputAddr 0x%lx (MC node %d)\n",
+		(unsigned long)input_addr, pvt->mc_node_id);
+
+	return -1;
+}
+
+/*
+ * Return the base value defined by the DRAM Base register for the node
+ * represented by mci.  This function returns the full 40-bit value despite the
+ * fact that the register only stores bits 39-24 of the value. See section
+ * 3.4.4.1 (BKDG #26094, K8, revA-E)
+ */
+static inline u64 get_dram_base(struct mem_ctl_info *mci)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+
+	return pvt->dram_base[pvt->mc_node_id];
+}
+
+/*
+ * Obtain info from the DRAM Hole Address Register (section 3.4.8, pub #26094)
+ * for the node represented by mci. Info is passed back in *hole_base,
+ * *hole_offset, and *hole_size.  Function returns 0 if info is valid or 1 if
+ * info is invalid. Info may be invalid for either of the following reasons:
+ *
+ * - The revision of the node is not E or greater.  In this case, the DRAM Hole
+ *   Address Register does not exist.
+ *
+ * - The DramHoleValid bit is cleared in the DRAM Hole Address Register,
+ *   indicating that its contents are not valid.
+ *
+ * The values passed back in *hole_base, *hole_offset, and *hole_size are
+ * complete 32-bit values despite the fact that the bitfields in the DHAR
+ * only represent bits 31-24 of the base and offset values.
+ */
+int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
+			     u64 *hole_offset, u64 *hole_size)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u64 base;
+
+	/* only revE and later have the DRAM Hole Address Register */
+	if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_E) {
+		debugf1("  revision %d for node %d does not support DHAR\n",
+			pvt->ext_model, pvt->mc_node_id);
+		return 1;
+	}
+
+	/* only valid for Fam10h */
+	if (boot_cpu_data.x86 == 0x10 &&
+	    (pvt->dhar & F10_DRAM_MEM_HOIST_VALID) == 0) {
+		debugf1("  Dram Memory Hoisting is DISABLED on this system\n");
+		return 1;
+	}
+
+	if ((pvt->dhar & DHAR_VALID) == 0) {
+		debugf1("  Dram Memory Hoisting is DISABLED on this node %d\n",
+			pvt->mc_node_id);
+		return 1;
+	}
+
+	/* This node has Memory Hoisting */
+
+	/* +------------------+--------------------+--------------------+-----
+	 * | memory           | DRAM hole          | relocated          |
+	 * | [0, (x - 1)]     | [x, 0xffffffff]    | addresses from     |
+	 * |                  |                    | DRAM hole          |
+	 * |                  |                    | [0x100000000,      |
+	 * |                  |                    |  (0x100000000+     |
+	 * |                  |                    |   (0xffffffff-x))] |
+	 * +------------------+--------------------+--------------------+-----
+	 *
+	 * Above is a diagram of physical memory showing the DRAM hole and the
+	 * relocated addresses from the DRAM hole.  As shown, the DRAM hole
+	 * starts at address x (the base address) and extends through address
+	 * 0xffffffff.  The DRAM Hole Address Register (DHAR) relocates the
+	 * addresses in the hole so that they start at 0x100000000.
+	 */
+
+	base = dhar_base(pvt->dhar);
+
+	*hole_base = base;
+	*hole_size = (0x1ull << 32) - base;
+
+	if (boot_cpu_data.x86 > 0xf)
+		*hole_offset = f10_dhar_offset(pvt->dhar);
+	else
+		*hole_offset = k8_dhar_offset(pvt->dhar);
+
+	debugf1("  DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n",
+		pvt->mc_node_id, (unsigned long)*hole_base,
+		(unsigned long)*hole_offset, (unsigned long)*hole_size);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(amd64_get_dram_hole_info);
+
+/*
+ * Return the DramAddr that the SysAddr given by @sys_addr maps to.  It is
+ * assumed that sys_addr maps to the node given by mci.
+ *
+ * The first part of section 3.4.4 (p. 70) shows how the DRAM Base (section
+ * 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers are used to translate a
+ * SysAddr to a DramAddr. If the DRAM Hole Address Register (DHAR) is enabled,
+ * then it is also involved in translating a SysAddr to a DramAddr. Sections
+ * 3.4.8 and 3.5.8.2 describe the DHAR and how it is used for memory hoisting.
+ * These parts of the documentation are unclear. I interpret them as follows:
+ *
+ * When node n receives a SysAddr, it processes the SysAddr as follows:
+ *
+ * 1. It extracts the DRAMBase and DRAMLimit values from the DRAM Base and DRAM
+ *    Limit registers for node n. If the SysAddr is not within the range
+ *    specified by the base and limit values, then node n ignores the Sysaddr
+ *    (since it does not map to node n). Otherwise continue to step 2 below.
+ *
+ * 2. If the DramHoleValid bit of the DHAR for node n is clear, the DHAR is
+ *    disabled so skip to step 3 below. Otherwise see if the SysAddr is within
+ *    the range of relocated addresses (starting at 0x100000000) from the DRAM
+ *    hole. If not, skip to step 3 below. Else get the value of the
+ *    DramHoleOffset field from the DHAR. To obtain the DramAddr, subtract the
+ *    offset defined by this value from the SysAddr.
+ *
+ * 3. Obtain the base address for node n from the DRAMBase field of the DRAM
+ *    Base register for node n. To obtain the DramAddr, subtract the base
+ *    address from the SysAddr, as shown near the start of section 3.4.4 (p.70).
+ */
+static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
+{
+	u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
+	int ret = 0;
+
+	dram_base = get_dram_base(mci);
+
+	ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
+				      &hole_size);
+	if (!ret) {
+		if ((sys_addr >= (1ull << 32)) &&
+		    (sys_addr < ((1ull << 32) + hole_size))) {
+			/* use DHAR to translate SysAddr to DramAddr */
+			dram_addr = sys_addr - hole_offset;
+
+			debugf2("using DHAR to translate SysAddr 0x%lx to "
+				"DramAddr 0x%lx\n",
+				(unsigned long)sys_addr,
+				(unsigned long)dram_addr);
+
+			return dram_addr;
+		}
+	}
+
+	/*
+	 * Translate the SysAddr to a DramAddr as shown near the start of
+	 * section 3.4.4 (p. 70).  Although sys_addr is a 64-bit value, the k8
+	 * only deals with 40-bit values.  Therefore we discard bits 63-40 of
+	 * sys_addr below.  If bit 39 of sys_addr is 1 then the bits we
+	 * discard are all 1s.  Otherwise the bits we discard are all 0s.  See
+	 * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture
+	 * Programmer's Manual Volume 1 Application Programming.
+	 */
+	dram_addr = (sys_addr & 0xffffffffffull) - dram_base;
+
+	debugf2("using DRAM Base register to translate SysAddr 0x%lx to "
+		"DramAddr 0x%lx\n", (unsigned long)sys_addr,
+		(unsigned long)dram_addr);
+	return dram_addr;
+}
+
+/*
+ * @intlv_en is the value of the IntlvEn field from a DRAM Base register
+ * (section 3.4.4.1).  Return the number of bits from a SysAddr that are used
+ * for node interleaving.
+ */
+static int num_node_interleave_bits(unsigned intlv_en)
+{
+	static const int intlv_shift_table[] = { 0, 1, 0, 2, 0, 0, 0, 3 };
+	int n;
+
+	BUG_ON(intlv_en > 7);
+	n = intlv_shift_table[intlv_en];
+	return n;
+}
+
+/* Translate the DramAddr given by @dram_addr to an InputAddr. */
+static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr)
+{
+	struct amd64_pvt *pvt;
+	int intlv_shift;
+	u64 input_addr;
+
+	pvt = mci->pvt_info;
+
+	/*
+	 * See the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
+	 * concerning translating a DramAddr to an InputAddr.
+	 */
+	intlv_shift = num_node_interleave_bits(pvt->dram_IntlvEn[0]);
+	input_addr = ((dram_addr >> intlv_shift) & 0xffffff000ull) +
+	    (dram_addr & 0xfff);
+
+	debugf2("  Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n",
+		intlv_shift, (unsigned long)dram_addr,
+		(unsigned long)input_addr);
+
+	return input_addr;
+}
+
+/*
+ * Translate the SysAddr represented by @sys_addr to an InputAddr.  It is
+ * assumed that @sys_addr maps to the node given by mci.
+ */
+static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr)
+{
+	u64 input_addr;
+
+	input_addr =
+	    dram_addr_to_input_addr(mci, sys_addr_to_dram_addr(mci, sys_addr));
+
+	debugf2("SysAdddr 0x%lx translates to InputAddr 0x%lx\n",
+		(unsigned long)sys_addr, (unsigned long)input_addr);
+
+	return input_addr;
+}
+
+
+/*
+ * @input_addr is an InputAddr associated with the node represented by mci.
+ * Translate @input_addr to a DramAddr and return the result.
+ */
+static u64 input_addr_to_dram_addr(struct mem_ctl_info *mci, u64 input_addr)
+{
+	struct amd64_pvt *pvt;
+	int node_id, intlv_shift;
+	u64 bits, dram_addr;
+	u32 intlv_sel;
+
+	/*
+	 * Near the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
+	 * shows how to translate a DramAddr to an InputAddr. Here we reverse
+	 * this procedure. When translating from a DramAddr to an InputAddr, the
+	 * bits used for node interleaving are discarded.  Here we recover these
+	 * bits from the IntlvSel field of the DRAM Limit register (section
+	 * 3.4.4.2) for the node that input_addr is associated with.
+	 */
+	pvt = mci->pvt_info;
+	node_id = pvt->mc_node_id;
+	BUG_ON((node_id < 0) || (node_id > 7));
+
+	intlv_shift = num_node_interleave_bits(pvt->dram_IntlvEn[0]);
+
+	if (intlv_shift == 0) {
+		debugf1("    InputAddr 0x%lx translates to DramAddr of "
+			"same value\n",	(unsigned long)input_addr);
+
+		return input_addr;
+	}
+
+	bits = ((input_addr & 0xffffff000ull) << intlv_shift) +
+	    (input_addr & 0xfff);
+
+	intlv_sel = pvt->dram_IntlvSel[node_id] & ((1 << intlv_shift) - 1);
+	dram_addr = bits + (intlv_sel << 12);
+
+	debugf1("InputAddr 0x%lx translates to DramAddr 0x%lx "
+		"(%d node interleave bits)\n", (unsigned long)input_addr,
+		(unsigned long)dram_addr, intlv_shift);
+
+	return dram_addr;
+}
+
+/*
+ * @dram_addr is a DramAddr that maps to the node represented by mci. Convert
+ * @dram_addr to a SysAddr.
+ */
+static u64 dram_addr_to_sys_addr(struct mem_ctl_info *mci, u64 dram_addr)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u64 hole_base, hole_offset, hole_size, base, limit, sys_addr;
+	int ret = 0;
+
+	ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
+				      &hole_size);
+	if (!ret) {
+		if ((dram_addr >= hole_base) &&
+		    (dram_addr < (hole_base + hole_size))) {
+			sys_addr = dram_addr + hole_offset;
+
+			debugf1("using DHAR to translate DramAddr 0x%lx to "
+				"SysAddr 0x%lx\n", (unsigned long)dram_addr,
+				(unsigned long)sys_addr);
+
+			return sys_addr;
+		}
+	}
+
+	amd64_get_base_and_limit(pvt, pvt->mc_node_id, &base, &limit);
+	sys_addr = dram_addr + base;
+
+	/*
+	 * The sys_addr we have computed up to this point is a 40-bit value
+	 * because the k8 deals with 40-bit values.  However, the value we are
+	 * supposed to return is a full 64-bit physical address.  The AMD
+	 * x86-64 architecture specifies that the most significant implemented
+	 * address bit through bit 63 of a physical address must be either all
+	 * 0s or all 1s.  Therefore we sign-extend the 40-bit sys_addr to a
+	 * 64-bit value below.  See section 3.4.2 of AMD publication 24592:
+	 * AMD x86-64 Architecture Programmer's Manual Volume 1 Application
+	 * Programming.
+	 */
+	sys_addr |= ~((sys_addr & (1ull << 39)) - 1);
+
+	debugf1("    Node %d, DramAddr 0x%lx to SysAddr 0x%lx\n",
+		pvt->mc_node_id, (unsigned long)dram_addr,
+		(unsigned long)sys_addr);
+
+	return sys_addr;
+}
+
+/*
+ * @input_addr is an InputAddr associated with the node given by mci. Translate
+ * @input_addr to a SysAddr.
+ */
+static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci,
+					 u64 input_addr)
+{
+	return dram_addr_to_sys_addr(mci,
+				     input_addr_to_dram_addr(mci, input_addr));
+}
+
+/*
+ * Find the minimum and maximum InputAddr values that map to the given @csrow.
+ * Pass back these values in *input_addr_min and *input_addr_max.
+ */
+static void find_csrow_limits(struct mem_ctl_info *mci, int csrow,
+			      u64 *input_addr_min, u64 *input_addr_max)
+{
+	struct amd64_pvt *pvt;
+	u64 base, mask;
+
+	pvt = mci->pvt_info;
+	BUG_ON((csrow < 0) || (csrow >= CHIPSELECT_COUNT));
+
+	base = base_from_dct_base(pvt, csrow);
+	mask = mask_from_dct_mask(pvt, csrow);
+
+	*input_addr_min = base & ~mask;
+	*input_addr_max = base | mask | pvt->dcs_mask_notused;
+}
+
+/*
+ * Extract error address from MCA NB Address Low (section 3.6.4.5) and MCA NB
+ * Address High (section 3.6.4.6) register values and return the result. Address
+ * is located in the info structure (nbeah and nbeal), the encoding is device
+ * specific.
+ */
+static u64 extract_error_address(struct mem_ctl_info *mci,
+				 struct amd64_error_info_regs *info)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+
+	return pvt->ops->get_error_address(mci, info);
+}
+
+
+/* Map the Error address to a PAGE and PAGE OFFSET. */
+static inline void error_address_to_page_and_offset(u64 error_address,
+						    u32 *page, u32 *offset)
+{
+	*page = (u32) (error_address >> PAGE_SHIFT);
+	*offset = ((u32) error_address) & ~PAGE_MASK;
+}
+
+/*
+ * @sys_addr is an error address (a SysAddr) extracted from the MCA NB Address
+ * Low (section 3.6.4.5) and MCA NB Address High (section 3.6.4.6) registers
+ * of a node that detected an ECC memory error.  mci represents the node that
+ * the error address maps to (possibly different from the node that detected
+ * the error).  Return the number of the csrow that sys_addr maps to, or -1 on
+ * error.
+ */
+static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
+{
+	int csrow;
+
+	csrow = input_addr_to_csrow(mci, sys_addr_to_input_addr(mci, sys_addr));
+
+	if (csrow == -1)
+		amd64_mc_printk(mci, KERN_ERR,
+			     "Failed to translate InputAddr to csrow for "
+			     "address 0x%lx\n", (unsigned long)sys_addr);
+	return csrow;
+}
+
+static int get_channel_from_ecc_syndrome(unsigned short syndrome);
+
+static void amd64_cpu_display_info(struct amd64_pvt *pvt)
+{
+	if (boot_cpu_data.x86 == 0x11)
+		edac_printk(KERN_DEBUG, EDAC_MC, "F11h CPU detected\n");
+	else if (boot_cpu_data.x86 == 0x10)
+		edac_printk(KERN_DEBUG, EDAC_MC, "F10h CPU detected\n");
+	else if (boot_cpu_data.x86 == 0xf)
+		edac_printk(KERN_DEBUG, EDAC_MC, "%s detected\n",
+			(pvt->ext_model >= OPTERON_CPU_REV_F) ?
+			"Rev F or later" : "Rev E or earlier");
+	else
+		/* we'll hardly ever ever get here */
+		edac_printk(KERN_ERR, EDAC_MC, "Unknown cpu!\n");
+}
+
+/*
+ * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
+ * are ECC capable.
+ */
+static enum edac_type amd64_determine_edac_cap(struct amd64_pvt *pvt)
+{
+	int bit;
+	enum dev_type edac_cap = EDAC_NONE;
+
+	bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= OPTERON_CPU_REV_F)
+		? 19
+		: 17;
+
+	if (pvt->dclr0 >> BIT(bit))
+		edac_cap = EDAC_FLAG_SECDED;
+
+	return edac_cap;
+}
+
+
+static void f10_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt,
+					 int ganged);
+
+/* Display and decode various NB registers for debug purposes. */
+static void amd64_dump_misc_regs(struct amd64_pvt *pvt)
+{
+	int ganged;
+
+	debugf1("  nbcap:0x%8.08x DctDualCap=%s DualNode=%s 8-Node=%s\n",
+		pvt->nbcap,
+		(pvt->nbcap & K8_NBCAP_DCT_DUAL) ? "True" : "False",
+		(pvt->nbcap & K8_NBCAP_DUAL_NODE) ? "True" : "False",
+		(pvt->nbcap & K8_NBCAP_8_NODE) ? "True" : "False");
+	debugf1("    ECC Capable=%s   ChipKill Capable=%s\n",
+		(pvt->nbcap & K8_NBCAP_SECDED) ? "True" : "False",
+		(pvt->nbcap & K8_NBCAP_CHIPKILL) ? "True" : "False");
+	debugf1("  DramCfg0-low=0x%08x DIMM-ECC=%s Parity=%s Width=%s\n",
+		pvt->dclr0,
+		(pvt->dclr0 & BIT(19)) ?  "Enabled" : "Disabled",
+		(pvt->dclr0 & BIT(8)) ?  "Enabled" : "Disabled",
+		(pvt->dclr0 & BIT(11)) ?  "128b" : "64b");
+	debugf1("    DIMM x4 Present: L0=%s L1=%s L2=%s L3=%s  DIMM Type=%s\n",
+		(pvt->dclr0 & BIT(12)) ?  "Y" : "N",
+		(pvt->dclr0 & BIT(13)) ?  "Y" : "N",
+		(pvt->dclr0 & BIT(14)) ?  "Y" : "N",
+		(pvt->dclr0 & BIT(15)) ?  "Y" : "N",
+		(pvt->dclr0 & BIT(16)) ?  "UN-Buffered" : "Buffered");
+
+
+	debugf1("  online-spare: 0x%8.08x\n", pvt->online_spare);
+
+	if (boot_cpu_data.x86 == 0xf) {
+		debugf1("  dhar: 0x%8.08x Base=0x%08x Offset=0x%08x\n",
+			pvt->dhar, dhar_base(pvt->dhar),
+			k8_dhar_offset(pvt->dhar));
+		debugf1("      DramHoleValid=%s\n",
+			(pvt->dhar & DHAR_VALID) ?  "True" : "False");
+
+		debugf1("  dbam-dkt: 0x%8.08x\n", pvt->dbam0);
+
+		/* everything below this point is Fam10h and above */
+		return;
+
+	} else {
+		debugf1("  dhar: 0x%8.08x Base=0x%08x Offset=0x%08x\n",
+			pvt->dhar, dhar_base(pvt->dhar),
+			f10_dhar_offset(pvt->dhar));
+		debugf1("    DramMemHoistValid=%s DramHoleValid=%s\n",
+			(pvt->dhar & F10_DRAM_MEM_HOIST_VALID) ?
+			"True" : "False",
+			(pvt->dhar & DHAR_VALID) ?
+			"True" : "False");
+	}
+
+	/* Only if NOT ganged does dcl1 have valid info */
+	if (!dct_ganging_enabled(pvt)) {
+		debugf1("  DramCfg1-low=0x%08x DIMM-ECC=%s Parity=%s "
+			"Width=%s\n", pvt->dclr1,
+			(pvt->dclr1 & BIT(19)) ?  "Enabled" : "Disabled",
+			(pvt->dclr1 & BIT(8)) ?  "Enabled" : "Disabled",
+			(pvt->dclr1 & BIT(11)) ?  "128b" : "64b");
+		debugf1("    DIMM x4 Present: L0=%s L1=%s L2=%s L3=%s  "
+			"DIMM Type=%s\n",
+			(pvt->dclr1 & BIT(12)) ?  "Y" : "N",
+			(pvt->dclr1 & BIT(13)) ?  "Y" : "N",
+			(pvt->dclr1 & BIT(14)) ?  "Y" : "N",
+			(pvt->dclr1 & BIT(15)) ?  "Y" : "N",
+			(pvt->dclr1 & BIT(16)) ?  "UN-Buffered" : "Buffered");
+	}
+
+	/*
+	 * Determine if ganged and then dump memory sizes for first controller,
+	 * and if NOT ganged dump info for 2nd controller.
+	 */
+	ganged = dct_ganging_enabled(pvt);
+
+	f10_debug_display_dimm_sizes(0, pvt, ganged);
+
+	if (!ganged)
+		f10_debug_display_dimm_sizes(1, pvt, ganged);
+}
+
+/* Read in both of DBAM registers */
+static void amd64_read_dbam_reg(struct amd64_pvt *pvt)
+{
+	int err = 0;
+	unsigned int reg;
+
+	reg = DBAM0;
+	err = pci_read_config_dword(pvt->dram_f2_ctl, reg, &pvt->dbam0);
+	if (err)
+		goto err_reg;
+
+	if (boot_cpu_data.x86 >= 0x10) {
+		reg = DBAM1;
+		err = pci_read_config_dword(pvt->dram_f2_ctl, reg, &pvt->dbam1);
+
+		if (err)
+			goto err_reg;
+	}
+
+err_reg:
+	debugf0("Error reading F2x%03x.\n", reg);
+}
+
+/*
+ * NOTE: CPU Revision Dependent code: Rev E and Rev F
+ *
+ * Set the DCSB and DCSM mask values depending on the CPU revision value. Also
+ * set the shift factor for the DCSB and DCSM values.
+ *
+ * ->dcs_mask_notused, RevE:
+ *
+ * To find the max InputAddr for the csrow, start with the base address and set
+ * all bits that are "don't care" bits in the test at the start of section
+ * 3.5.4 (p. 84).
+ *
+ * The "don't care" bits are all set bits in the mask and all bits in the gaps
+ * between bit ranges [35:25] and [19:13]. The value REV_E_DCS_NOTUSED_BITS
+ * represents bits [24:20] and [12:0], which are all bits in the above-mentioned
+ * gaps.
+ *
+ * ->dcs_mask_notused, RevF and later:
+ *
+ * To find the max InputAddr for the csrow, start with the base address and set
+ * all bits that are "don't care" bits in the test at the start of NPT section
+ * 4.5.4 (p. 87).
+ *
+ * The "don't care" bits are all set bits in the mask and all bits in the gaps
+ * between bit ranges [36:27] and [21:13].
+ *
+ * The value REV_F_F1Xh_DCS_NOTUSED_BITS represents bits [26:22] and [12:0],
+ * which are all bits in the above-mentioned gaps.
+ */
+static void amd64_set_dct_base_and_mask(struct amd64_pvt *pvt)
+{
+	if (pvt->ext_model >= OPTERON_CPU_REV_F) {
+		pvt->dcsb_base		= REV_F_F1Xh_DCSB_BASE_BITS;
+		pvt->dcsm_mask		= REV_F_F1Xh_DCSM_MASK_BITS;
+		pvt->dcs_mask_notused	= REV_F_F1Xh_DCS_NOTUSED_BITS;
+		pvt->dcs_shift		= REV_F_F1Xh_DCS_SHIFT;
+
+		switch (boot_cpu_data.x86) {
+		case 0xf:
+			pvt->num_dcsm = REV_F_DCSM_COUNT;
+			break;
+
+		case 0x10:
+			pvt->num_dcsm = F10_DCSM_COUNT;
+			break;
+
+		case 0x11:
+			pvt->num_dcsm = F11_DCSM_COUNT;
+			break;
+
+		default:
+			amd64_printk(KERN_ERR, "Unsupported family!\n");
+			break;
+		}
+	} else {
+		pvt->dcsb_base		= REV_E_DCSB_BASE_BITS;
+		pvt->dcsm_mask		= REV_E_DCSM_MASK_BITS;
+		pvt->dcs_mask_notused	= REV_E_DCS_NOTUSED_BITS;
+		pvt->dcs_shift		= REV_E_DCS_SHIFT;
+		pvt->num_dcsm		= REV_E_DCSM_COUNT;
+	}
+}
+
+/*
+ * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask hw registers
+ */
+static void amd64_read_dct_base_mask(struct amd64_pvt *pvt)
+{
+	int cs, reg, err = 0;
+
+	amd64_set_dct_base_and_mask(pvt);
+
+	for (cs = 0; cs < CHIPSELECT_COUNT; cs++) {
+		reg = K8_DCSB0 + (cs * 4);
+		err = pci_read_config_dword(pvt->dram_f2_ctl, reg,
+						&pvt->dcsb0[cs]);
+		if (unlikely(err))
+			debugf0("Reading K8_DCSB0[%d] failed\n", cs);
+		else
+			debugf0("  DCSB0[%d]=0x%08x reg: F2x%x\n",
+				cs, pvt->dcsb0[cs], reg);
+
+		/* If DCT are NOT ganged, then read in DCT1's base */
+		if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) {
+			reg = F10_DCSB1 + (cs * 4);
+			err = pci_read_config_dword(pvt->dram_f2_ctl, reg,
+							&pvt->dcsb1[cs]);
+			if (unlikely(err))
+				debugf0("Reading F10_DCSB1[%d] failed\n", cs);
+			else
+				debugf0("  DCSB1[%d]=0x%08x reg: F2x%x\n",
+					cs, pvt->dcsb1[cs], reg);
+		} else {
+			pvt->dcsb1[cs] = 0;
+		}
+	}
+
+	for (cs = 0; cs < pvt->num_dcsm; cs++) {
+		reg = K8_DCSB0 + (cs * 4);
+		err = pci_read_config_dword(pvt->dram_f2_ctl, reg,
+					&pvt->dcsm0[cs]);
+		if (unlikely(err))
+			debugf0("Reading K8_DCSM0 failed\n");
+		else
+			debugf0("    DCSM0[%d]=0x%08x reg: F2x%x\n",
+				cs, pvt->dcsm0[cs], reg);
+
+		/* If DCT are NOT ganged, then read in DCT1's mask */
+		if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) {
+			reg = F10_DCSM1 + (cs * 4);
+			err = pci_read_config_dword(pvt->dram_f2_ctl, reg,
+					&pvt->dcsm1[cs]);
+			if (unlikely(err))
+				debugf0("Reading F10_DCSM1[%d] failed\n", cs);
+			else
+				debugf0("    DCSM1[%d]=0x%08x reg: F2x%x\n",
+					cs, pvt->dcsm1[cs], reg);
+		} else
+			pvt->dcsm1[cs] = 0;
+	}
+}
+
+static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt)
+{
+	enum mem_type type;
+
+	if (boot_cpu_data.x86 >= 0x10 || pvt->ext_model >= OPTERON_CPU_REV_F) {
+		/* Rev F and later */
+		type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2;
+	} else {
+		/* Rev E and earlier */
+		type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR;
+	}
+
+	debugf1("  Memory type is: %s\n",
+		(type == MEM_DDR2) ? "MEM_DDR2" :
+		(type == MEM_RDDR2) ? "MEM_RDDR2" :
+		(type == MEM_DDR) ? "MEM_DDR" : "MEM_RDDR");
+
+	return type;
+}
+
+/*
+ * Read the DRAM Configuration Low register. It differs between CG, D & E revs
+ * and the later RevF memory controllers (DDR vs DDR2)
+ *
+ * Return:
+ *      number of memory channels in operation
+ * Pass back:
+ *      contents of the DCL0_LOW register
+ */
+static int k8_early_channel_count(struct amd64_pvt *pvt)
+{
+	int flag, err = 0;
+
+	err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
+	if (err)
+		return err;
+
+	if ((boot_cpu_data.x86_model >> 4) >= OPTERON_CPU_REV_F) {
+		/* RevF (NPT) and later */
+		flag = pvt->dclr0 & F10_WIDTH_128;
+	} else {
+		/* RevE and earlier */
+		flag = pvt->dclr0 & REVE_WIDTH_128;
+	}
+
+	/* not used */
+	pvt->dclr1 = 0;
+
+	return (flag) ? 2 : 1;
+}
+
+/* extract the ERROR ADDRESS for the K8 CPUs */
+static u64 k8_get_error_address(struct mem_ctl_info *mci,
+				struct amd64_error_info_regs *info)
+{
+	return (((u64) (info->nbeah & 0xff)) << 32) +
+			(info->nbeal & ~0x03);
+}
+
+/*
+ * Read the Base and Limit registers for K8 based Memory controllers; extract
+ * fields from the 'raw' reg into separate data fields
+ *
+ * Isolates: BASE, LIMIT, IntlvEn, IntlvSel, RW_EN
+ */
+static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
+{
+	u32 low;
+	u32 off = dram << 3;	/* 8 bytes between DRAM entries */
+	int err;
+
+	err = pci_read_config_dword(pvt->addr_f1_ctl,
+				    K8_DRAM_BASE_LOW + off, &low);
+	if (err)
+		debugf0("Reading K8_DRAM_BASE_LOW failed\n");
+
+	/* Extract parts into separate data entries */
+	pvt->dram_base[dram] = ((u64) low & 0xFFFF0000) << 8;
+	pvt->dram_IntlvEn[dram] = (low >> 8) & 0x7;
+	pvt->dram_rw_en[dram] = (low & 0x3);
+
+	err = pci_read_config_dword(pvt->addr_f1_ctl,
+				    K8_DRAM_LIMIT_LOW + off, &low);
+	if (err)
+		debugf0("Reading K8_DRAM_LIMIT_LOW failed\n");
+
+	/*
+	 * Extract parts into separate data entries. Limit is the HIGHEST memory
+	 * location of the region, so lower 24 bits need to be all ones
+	 */
+	pvt->dram_limit[dram] = (((u64) low & 0xFFFF0000) << 8) | 0x00FFFFFF;
+	pvt->dram_IntlvSel[dram] = (low >> 8) & 0x7;
+	pvt->dram_DstNode[dram] = (low & 0x7);
+}
+
+static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
+					struct amd64_error_info_regs *info,
+					u64 SystemAddress)
+{
+	struct mem_ctl_info *src_mci;
+	unsigned short syndrome;
+	int channel, csrow;
+	u32 page, offset;
+
+	/* Extract the syndrome parts and form a 16-bit syndrome */
+	syndrome = EXTRACT_HIGH_SYNDROME(info->nbsl) << 8;
+	syndrome |= EXTRACT_LOW_SYNDROME(info->nbsh);
+
+	/* CHIPKILL enabled */
+	if (info->nbcfg & K8_NBCFG_CHIPKILL) {
+		channel = get_channel_from_ecc_syndrome(syndrome);
+		if (channel < 0) {
+			/*
+			 * Syndrome didn't map, so we don't know which of the
+			 * 2 DIMMs is in error. So we need to ID 'both' of them
+			 * as suspect.
+			 */
+			amd64_mc_printk(mci, KERN_WARNING,
+				       "unknown syndrome 0x%x - possible error "
+				       "reporting race\n", syndrome);
+			edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
+			return;
+		}
+	} else {
+		/*
+		 * non-chipkill ecc mode
+		 *
+		 * The k8 documentation is unclear about how to determine the
+		 * channel number when using non-chipkill memory.  This method
+		 * was obtained from email communication with someone at AMD.
+		 * (Wish the email was placed in this comment - norsk)
+		 */
+		channel = ((SystemAddress & BIT(3)) != 0);
+	}
+
+	/*
+	 * Find out which node the error address belongs to. This may be
+	 * different from the node that detected the error.
+	 */
+	src_mci = find_mc_by_sys_addr(mci, SystemAddress);
+	if (src_mci) {
+		amd64_mc_printk(mci, KERN_ERR,
+			     "failed to map error address 0x%lx to a node\n",
+			     (unsigned long)SystemAddress);
+		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
+		return;
+	}
+
+	/* Now map the SystemAddress to a CSROW */
+	csrow = sys_addr_to_csrow(src_mci, SystemAddress);
+	if (csrow < 0) {
+		edac_mc_handle_ce_no_info(src_mci, EDAC_MOD_STR);
+	} else {
+		error_address_to_page_and_offset(SystemAddress, &page, &offset);
+
+		edac_mc_handle_ce(src_mci, page, offset, syndrome, csrow,
+				  channel, EDAC_MOD_STR);
+	}
+}
+
+/*
+ * determrine the number of PAGES in for this DIMM's size based on its DRAM
+ * Address Mapping.
+ *
+ * First step is to calc the number of bits to shift a value of 1 left to
+ * indicate show many pages. Start with the DBAM value as the starting bits,
+ * then proceed to adjust those shift bits, based on CPU rev and the table.
+ * See BKDG on the DBAM
+ */
+static int k8_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map)
+{
+	int nr_pages;
+
+	if (pvt->ext_model >= OPTERON_CPU_REV_F) {
+		nr_pages = 1 << (revf_quad_ddr2_shift[dram_map] - PAGE_SHIFT);
+	} else {
+		/*
+		 * RevE and less section; this line is tricky. It collapses the
+		 * table used by RevD and later to one that matches revisions CG
+		 * and earlier.
+		 */
+		dram_map -= (pvt->ext_model >= OPTERON_CPU_REV_D) ?
+				(dram_map > 8 ? 4 : (dram_map > 5 ?
+				3 : (dram_map > 2 ? 1 : 0))) : 0;
+
+		/* 25 shift is 32MiB minimum DIMM size in RevE and prior */
+		nr_pages = 1 << (dram_map + 25 - PAGE_SHIFT);
+	}
+
+	return nr_pages;
+}
+
+/*
+ * Get the number of DCT channels in use.
+ *
+ * Return:
+ *	number of Memory Channels in operation
+ * Pass back:
+ *	contents of the DCL0_LOW register
+ */
+static int f10_early_channel_count(struct amd64_pvt *pvt)
+{
+	int err = 0, channels = 0;
+	u32 dbam;
+
+	err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
+	if (err)
+		goto err_reg;
+
+	err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1);
+	if (err)
+		goto err_reg;
+
+	/* If we are in 128 bit mode, then we are using 2 channels */
+	if (pvt->dclr0 & F10_WIDTH_128) {
+		debugf0("Data WIDTH is 128 bits - 2 channels\n");
+		channels = 2;
+		return channels;
+	}
+
+	/*
+	 * Need to check if in UN-ganged mode: In such, there are 2 channels,
+	 * but they are NOT in 128 bit mode and thus the above 'dcl0' status bit
+	 * will be OFF.
+	 *
+	 * Need to check DCT0[0] and DCT1[0] to see if only one of them has
+	 * their CSEnable bit on. If so, then SINGLE DIMM case.
+	 */
+	debugf0("Data WIDTH is NOT 128 bits - need more decoding\n");
+
+	/*
+	 * Check DRAM Bank Address Mapping values for each DIMM to see if there
+	 * is more than just one DIMM present in unganged mode. Need to check
+	 * both controllers since DIMMs can be placed in either one.
+	 */
+	channels = 0;
+	err = pci_read_config_dword(pvt->dram_f2_ctl, DBAM0, &dbam);
+	if (err)
+		goto err_reg;
+
+	if (DBAM_DIMM(0, dbam) > 0)
+		channels++;
+	if (DBAM_DIMM(1, dbam) > 0)
+		channels++;
+	if (DBAM_DIMM(2, dbam) > 0)
+		channels++;
+	if (DBAM_DIMM(3, dbam) > 0)
+		channels++;
+
+	/* If more than 2 DIMMs are present, then we have 2 channels */
+	if (channels > 2)
+		channels = 2;
+	else if (channels == 0) {
+		/* No DIMMs on DCT0, so look at DCT1 */
+		err = pci_read_config_dword(pvt->dram_f2_ctl, DBAM1, &dbam);
+		if (err)
+			goto err_reg;
+
+		if (DBAM_DIMM(0, dbam) > 0)
+			channels++;
+		if (DBAM_DIMM(1, dbam) > 0)
+			channels++;
+		if (DBAM_DIMM(2, dbam) > 0)
+			channels++;
+		if (DBAM_DIMM(3, dbam) > 0)
+			channels++;
+
+		if (channels > 2)
+			channels = 2;
+	}
+
+	/* If we found ALL 0 values, then assume just ONE DIMM-ONE Channel */
+	if (channels == 0)
+		channels = 1;
+
+	debugf0("DIMM count= %d\n", channels);
+
+	return channels;
+
+err_reg:
+	return -1;
+
+}
+
+static int f10_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map)
+{
+	return 1 << (revf_quad_ddr2_shift[dram_map] - PAGE_SHIFT);
+}
+
+/* Enable extended configuration access via 0xCF8 feature */
+static void amd64_setup(struct amd64_pvt *pvt)
+{
+	u32 reg;
+
+	pci_read_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, &reg);
+
+	pvt->flags.cf8_extcfg = !!(reg & F10_NB_CFG_LOW_ENABLE_EXT_CFG);
+	reg |= F10_NB_CFG_LOW_ENABLE_EXT_CFG;
+	pci_write_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, reg);
+}
+
+/* Restore the extended configuration access via 0xCF8 feature */
+static void amd64_teardown(struct amd64_pvt *pvt)
+{
+	u32 reg;
+
+	pci_read_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, &reg);
+
+	reg &= ~F10_NB_CFG_LOW_ENABLE_EXT_CFG;
+	if (pvt->flags.cf8_extcfg)
+		reg |= F10_NB_CFG_LOW_ENABLE_EXT_CFG;
+	pci_write_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, reg);
+}
+
+static u64 f10_get_error_address(struct mem_ctl_info *mci,
+			struct amd64_error_info_regs *info)
+{
+	return (((u64) (info->nbeah & 0xffff)) << 32) +
+			(info->nbeal & ~0x01);
+}
+
+/*
+ * Read the Base and Limit registers for F10 based Memory controllers. Extract
+ * fields from the 'raw' reg into separate data fields.
+ *
+ * Isolates: BASE, LIMIT, IntlvEn, IntlvSel, RW_EN.
+ */
+static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
+{
+	u32 high_offset, low_offset, high_base, low_base, high_limit, low_limit;
+
+	low_offset = K8_DRAM_BASE_LOW + (dram << 3);
+	high_offset = F10_DRAM_BASE_HIGH + (dram << 3);
+
+	/* read the 'raw' DRAM BASE Address register */
+	pci_read_config_dword(pvt->addr_f1_ctl, low_offset, &low_base);
+
+	/* Read from the ECS data register */
+	pci_read_config_dword(pvt->addr_f1_ctl, high_offset, &high_base);
+
+	/* Extract parts into separate data entries */
+	pvt->dram_rw_en[dram] = (low_base & 0x3);
+
+	if (pvt->dram_rw_en[dram] == 0)
+		return;
+
+	pvt->dram_IntlvEn[dram] = (low_base >> 8) & 0x7;
+
+	pvt->dram_base[dram] = (((((u64) high_base & 0x000000FF) << 32) |
+				((u64) low_base & 0xFFFF0000))) << 8;
+
+	low_offset = K8_DRAM_LIMIT_LOW + (dram << 3);
+	high_offset = F10_DRAM_LIMIT_HIGH + (dram << 3);
+
+	/* read the 'raw' LIMIT registers */
+	pci_read_config_dword(pvt->addr_f1_ctl, low_offset, &low_limit);
+
+	/* Read from the ECS data register for the HIGH portion */
+	pci_read_config_dword(pvt->addr_f1_ctl, high_offset, &high_limit);
+
+	debugf0("  HW Regs: BASE=0x%08x-%08x      LIMIT=  0x%08x-%08x\n",
+		high_base, low_base, high_limit, low_limit);
+
+	pvt->dram_DstNode[dram] = (low_limit & 0x7);
+	pvt->dram_IntlvSel[dram] = (low_limit >> 8) & 0x7;
+
+	/*
+	 * Extract address values and form a LIMIT address. Limit is the HIGHEST
+	 * memory location of the region, so low 24 bits need to be all ones.
+	 */
+	low_limit |= 0x0000FFFF;
+	pvt->dram_limit[dram] =
+		((((u64) high_limit << 32) + (u64) low_limit) << 8) | (0xFF);
+}
+
+static void f10_read_dram_ctl_register(struct amd64_pvt *pvt)
+{
+	int err = 0;
+
+	err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCTL_SEL_LOW,
+				    &pvt->dram_ctl_select_low);
+	if (err) {
+		debugf0("Reading F10_DCTL_SEL_LOW failed\n");
+	} else {
+		debugf0("DRAM_DCTL_SEL_LOW=0x%x  DctSelBaseAddr=0x%x\n",
+			pvt->dram_ctl_select_low, dct_sel_baseaddr(pvt));
+
+		debugf0("  DRAM DCTs are=%s DRAM Is=%s DRAM-Ctl-"
+				"sel-hi-range=%s\n",
+			(dct_ganging_enabled(pvt) ? "GANGED" : "NOT GANGED"),
+			(dct_dram_enabled(pvt) ? "Enabled"   : "Disabled"),
+			(dct_high_range_enabled(pvt) ? "Enabled" : "Disabled"));
+
+		debugf0("  DctDatIntLv=%s MemCleared=%s DctSelIntLvAddr=0x%x\n",
+			(dct_data_intlv_enabled(pvt) ? "Enabled" : "Disabled"),
+			(dct_memory_cleared(pvt) ? "True " : "False "),
+			dct_sel_interleave_addr(pvt));
+	}
+
+	err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCTL_SEL_HIGH,
+				    &pvt->dram_ctl_select_high);
+	if (err)
+		debugf0("Reading F10_DCTL_SEL_HIGH failed\n");
+}
+
+/*
+ * determine channel based on the interleaving mode: F10h BKDG, 2.8.9 Memory
+ * Interleaving Modes.
+ */
+static u32 f10_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
+				int hi_range_sel, u32 intlv_en)
+{
+	u32 cs, temp, dct_sel_high = (pvt->dram_ctl_select_low >> 1) & 1;
+
+	if (dct_ganging_enabled(pvt))
+		cs = 0;
+	else if (hi_range_sel)
+		cs = dct_sel_high;
+	else if (dct_interleave_enabled(pvt)) {
+		/*
+		 * see F2x110[DctSelIntLvAddr] - channel interleave mode
+		 */
+		if (dct_sel_interleave_addr(pvt) == 0)
+			cs = sys_addr >> 6 & 1;
+		else if ((dct_sel_interleave_addr(pvt) >> 1) & 1) {
+			temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) % 2;
+
+			if (dct_sel_interleave_addr(pvt) & 1)
+				cs = (sys_addr >> 9 & 1) ^ temp;
+			else
+				cs = (sys_addr >> 6 & 1) ^ temp;
+		} else if (intlv_en & 4)
+			cs = sys_addr >> 15 & 1;
+		else if (intlv_en & 2)
+			cs = sys_addr >> 14 & 1;
+		else if (intlv_en & 1)
+			cs = sys_addr >> 13 & 1;
+		else
+			cs = sys_addr >> 12 & 1;
+	} else if (dct_high_range_enabled(pvt) && !dct_ganging_enabled(pvt))
+		cs = ~dct_sel_high & 1;
+	else
+		cs = 0;
+
+	return cs;
+}
+
+static inline u32 f10_map_intlv_en_to_shift(u32 intlv_en)
+{
+	if (intlv_en == 1)
+		return 1;
+	else if (intlv_en == 3)
+		return 2;
+	else if (intlv_en == 7)
+		return 3;
+
+	return 0;
+}
+
+/* See F10h BKDG, 2.8.10.2 DctSelBaseOffset Programming */
+static inline u64 f10_get_base_addr_offset(u64 sys_addr, int hi_range_sel,
+						 u32 dct_sel_base_addr,
+						 u64 dct_sel_base_off,
+						 u32 hole_valid, u32 hole_off,
+						 u64 dram_base)
+{
+	u64 chan_off;
+
+	if (hi_range_sel) {
+		if (!(dct_sel_base_addr & 0xFFFFF800) &&
+		   hole_valid && (sys_addr >= 0x100000000ULL))
+			chan_off = hole_off << 16;
+		else
+			chan_off = dct_sel_base_off;
+	} else {
+		if (hole_valid && (sys_addr >= 0x100000000ULL))
+			chan_off = hole_off << 16;
+		else
+			chan_off = dram_base & 0xFFFFF8000000ULL;
+	}
+
+	return (sys_addr & 0x0000FFFFFFFFFFC0ULL) -
+			(chan_off & 0x0000FFFFFF800000ULL);
+}
+
+/* Hack for the time being - Can we get this from BIOS?? */
+#define	CH0SPARE_RANK	0
+#define	CH1SPARE_RANK	1
+
+/*
+ * checks if the csrow passed in is marked as SPARED, if so returns the new
+ * spare row
+ */
+static inline int f10_process_possible_spare(int csrow,
+				u32 cs, struct amd64_pvt *pvt)
+{
+	u32 swap_done;
+	u32 bad_dram_cs;
+
+	/* Depending on channel, isolate respective SPARING info */
+	if (cs) {
+		swap_done = F10_ONLINE_SPARE_SWAPDONE1(pvt->online_spare);
+		bad_dram_cs = F10_ONLINE_SPARE_BADDRAM_CS1(pvt->online_spare);
+		if (swap_done && (csrow == bad_dram_cs))
+			csrow = CH1SPARE_RANK;
+	} else {
+		swap_done = F10_ONLINE_SPARE_SWAPDONE0(pvt->online_spare);
+		bad_dram_cs = F10_ONLINE_SPARE_BADDRAM_CS0(pvt->online_spare);
+		if (swap_done && (csrow == bad_dram_cs))
+			csrow = CH0SPARE_RANK;
+	}
+	return csrow;
+}
+
+/*
+ * Iterate over the DRAM DCT "base" and "mask" registers looking for a
+ * SystemAddr match on the specified 'ChannelSelect' and 'NodeID'
+ *
+ * Return:
+ *	-EINVAL:  NOT FOUND
+ *	0..csrow = Chip-Select Row
+ */
+static int f10_lookup_addr_in_dct(u32 in_addr, u32 nid, u32 cs)
+{
+	struct mem_ctl_info *mci;
+	struct amd64_pvt *pvt;
+	u32 cs_base, cs_mask;
+	int cs_found = -EINVAL;
+	int csrow;
+
+	mci = mci_lookup[nid];
+	if (!mci)
+		return cs_found;
+
+	pvt = mci->pvt_info;
+
+	debugf1("InputAddr=0x%x  channelselect=%d\n", in_addr, cs);
+
+	for (csrow = 0; csrow < CHIPSELECT_COUNT; csrow++) {
+
+		cs_base = amd64_get_dct_base(pvt, cs, csrow);
+		if (!(cs_base & K8_DCSB_CS_ENABLE))
+			continue;
+
+		/*
+		 * We have an ENABLED CSROW, Isolate just the MASK bits of the
+		 * target: [28:19] and [13:5], which map to [36:27] and [21:13]
+		 * of the actual address.
+		 */
+		cs_base &= REV_F_F1Xh_DCSB_BASE_BITS;
+
+		/*
+		 * Get the DCT Mask, and ENABLE the reserved bits: [18:16] and
+		 * [4:0] to become ON. Then mask off bits [28:0] ([36:8])
+		 */
+		cs_mask = amd64_get_dct_mask(pvt, cs, csrow);
+
+		debugf1("    CSROW=%d CSBase=0x%x RAW CSMask=0x%x\n",
+				csrow, cs_base, cs_mask);
+
+		cs_mask = (cs_mask | 0x0007C01F) & 0x1FFFFFFF;
+
+		debugf1("              Final CSMask=0x%x\n", cs_mask);
+		debugf1("    (InputAddr & ~CSMask)=0x%x "
+				"(CSBase & ~CSMask)=0x%x\n",
+				(in_addr & ~cs_mask), (cs_base & ~cs_mask));
+
+		if ((in_addr & ~cs_mask) == (cs_base & ~cs_mask)) {
+			cs_found = f10_process_possible_spare(csrow, cs, pvt);
+
+			debugf1(" MATCH csrow=%d\n", cs_found);
+			break;
+		}
+	}
+	return cs_found;
+}
+
+/* For a given @dram_range, check if @sys_addr falls within it. */
+static int f10_match_to_this_node(struct amd64_pvt *pvt, int dram_range,
+				  u64 sys_addr, int *nid, int *chan_sel)
+{
+	int node_id, cs_found = -EINVAL, high_range = 0;
+	u32 intlv_en, intlv_sel, intlv_shift, hole_off;
+	u32 hole_valid, tmp, dct_sel_base, channel;
+	u64 dram_base, chan_addr, dct_sel_base_off;
+
+	dram_base = pvt->dram_base[dram_range];
+	intlv_en = pvt->dram_IntlvEn[dram_range];
+
+	node_id = pvt->dram_DstNode[dram_range];
+	intlv_sel = pvt->dram_IntlvSel[dram_range];
+
+	debugf1("(dram=%d) Base=0x%llx SystemAddr= 0x%llx Limit=0x%llx\n",
+		dram_range, dram_base, sys_addr, pvt->dram_limit[dram_range]);
+
+	/*
+	 * This assumes that one node's DHAR is the same as all the other
+	 * nodes' DHAR.
+	 */
+	hole_off = (pvt->dhar & 0x0000FF80);
+	hole_valid = (pvt->dhar & 0x1);
+	dct_sel_base_off = (pvt->dram_ctl_select_high & 0xFFFFFC00) << 16;
+
+	debugf1("   HoleOffset=0x%x  HoleValid=0x%x IntlvSel=0x%x\n",
+			hole_off, hole_valid, intlv_sel);
+
+	if (intlv_en ||
+	    (intlv_sel != ((sys_addr >> 12) & intlv_en)))
+		return -EINVAL;
+
+	dct_sel_base = dct_sel_baseaddr(pvt);
+
+	/*
+	 * check whether addresses >= DctSelBaseAddr[47:27] are to be used to
+	 * select between DCT0 and DCT1.
+	 */
+	if (dct_high_range_enabled(pvt) &&
+	   !dct_ganging_enabled(pvt) &&
+	   ((sys_addr >> 27) >= (dct_sel_base >> 11)))
+		high_range = 1;
+
+	channel = f10_determine_channel(pvt, sys_addr, high_range, intlv_en);
+
+	chan_addr = f10_get_base_addr_offset(sys_addr, high_range, dct_sel_base,
+					     dct_sel_base_off, hole_valid,
+					     hole_off, dram_base);
+
+	intlv_shift = f10_map_intlv_en_to_shift(intlv_en);
+
+	/* remove Node ID (in case of memory interleaving) */
+	tmp = chan_addr & 0xFC0;
+
+	chan_addr = ((chan_addr >> intlv_shift) & 0xFFFFFFFFF000ULL) | tmp;
+
+	/* remove channel interleave and hash */
+	if (dct_interleave_enabled(pvt) &&
+	   !dct_high_range_enabled(pvt) &&
+	   !dct_ganging_enabled(pvt)) {
+		if (dct_sel_interleave_addr(pvt) != 1)
+			chan_addr = (chan_addr >> 1) & 0xFFFFFFFFFFFFFFC0ULL;
+		else {
+			tmp = chan_addr & 0xFC0;
+			chan_addr = ((chan_addr & 0xFFFFFFFFFFFFC000ULL) >> 1)
+					| tmp;
+		}
+	}
+
+	debugf1("   (ChannelAddrLong=0x%llx) >> 8 becomes InputAddr=0x%x\n",
+		chan_addr, (u32)(chan_addr >> 8));
+
+	cs_found = f10_lookup_addr_in_dct(chan_addr >> 8, node_id, channel);
+
+	if (cs_found >= 0) {
+		*nid = node_id;
+		*chan_sel = channel;
+	}
+	return cs_found;
+}
+
+static int f10_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
+				       int *node, int *chan_sel)
+{
+	int dram_range, cs_found = -EINVAL;
+	u64 dram_base, dram_limit;
+
+	for (dram_range = 0; dram_range < DRAM_REG_COUNT; dram_range++) {
+
+		if (!pvt->dram_rw_en[dram_range])
+			continue;
+
+		dram_base = pvt->dram_base[dram_range];
+		dram_limit = pvt->dram_limit[dram_range];
+
+		if ((dram_base <= sys_addr) && (sys_addr <= dram_limit)) {
+
+			cs_found = f10_match_to_this_node(pvt, dram_range,
+							  sys_addr, node,
+							  chan_sel);
+			if (cs_found >= 0)
+				break;
+		}
+	}
+	return cs_found;
+}
+
+/*
+ * This the F10h reference code from AMD to map a @sys_addr to NodeID,
+ * CSROW, Channel.
+ *
+ * The @sys_addr is usually an error address received from the hardware.
+ */
+static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
+				     struct amd64_error_info_regs *info,
+				     u64 sys_addr)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u32 page, offset;
+	unsigned short syndrome;
+	int nid, csrow, chan = 0;
+
+	csrow = f10_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);
+
+	if (csrow >= 0) {
+		error_address_to_page_and_offset(sys_addr, &page, &offset);
+
+		syndrome = EXTRACT_HIGH_SYNDROME(info->nbsl) << 8;
+		syndrome |= EXTRACT_LOW_SYNDROME(info->nbsh);
+
+		/*
+		 * Is CHIPKILL on? If so, then we can attempt to use the
+		 * syndrome to isolate which channel the error was on.
+		 */
+		if (pvt->nbcfg & K8_NBCFG_CHIPKILL)
+			chan = get_channel_from_ecc_syndrome(syndrome);
+
+		if (chan >= 0) {
+			edac_mc_handle_ce(mci, page, offset, syndrome,
+					csrow, chan, EDAC_MOD_STR);
+		} else {
+			/*
+			 * Channel unknown, report all channels on this
+			 * CSROW as failed.
+			 */
+			for (chan = 0; chan < mci->csrows[csrow].nr_channels;
+								chan++) {
+					edac_mc_handle_ce(mci, page, offset,
+							syndrome,
+							csrow, chan,
+							EDAC_MOD_STR);
+			}
+		}
+
+	} else {
+		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
+	}
+}
+
+/*
+ * Input (@index) is the DBAM DIMM value (1 of 4) used as an index into a shift
+ * table (revf_quad_ddr2_shift) which starts at 128MB DIMM size. Index of 0
+ * indicates an empty DIMM slot, as reported by Hardware on empty slots.
+ *
+ * Normalize to 128MB by subracting 27 bit shift.
+ */
+static int map_dbam_to_csrow_size(int index)
+{
+	int mega_bytes = 0;
+
+	if (index > 0 && index <= DBAM_MAX_VALUE)
+		mega_bytes = ((128 << (revf_quad_ddr2_shift[index]-27)));
+
+	return mega_bytes;
+}
+
+/*
+ * debug routine to display the memory sizes of a DIMM (ganged or not) and it
+ * CSROWs as well
+ */
+static void f10_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt,
+					 int ganged)
+{
+	int dimm, size0, size1;
+	u32 dbam;
+	u32 *dcsb;
+
+	debugf1("  dbam%d: 0x%8.08x  CSROW is %s\n", ctrl,
+			ctrl ? pvt->dbam1 : pvt->dbam0,
+			ganged ? "GANGED - dbam1 not used" : "NON-GANGED");
+
+	dbam = ctrl ? pvt->dbam1 : pvt->dbam0;
+	dcsb = ctrl ? pvt->dcsb1 : pvt->dcsb0;
+
+	/* Dump memory sizes for DIMM and its CSROWs */
+	for (dimm = 0; dimm < 4; dimm++) {
+
+		size0 = 0;
+		if (dcsb[dimm*2] & K8_DCSB_CS_ENABLE)
+			size0 = map_dbam_to_csrow_size(DBAM_DIMM(dimm, dbam));
+
+		size1 = 0;
+		if (dcsb[dimm*2 + 1] & K8_DCSB_CS_ENABLE)
+			size1 = map_dbam_to_csrow_size(DBAM_DIMM(dimm, dbam));
+
+		debugf1("     CTRL-%d DIMM-%d=%5dMB   CSROW-%d=%5dMB "
+				"CSROW-%d=%5dMB\n",
+				ctrl,
+				dimm,
+				size0 + size1,
+				dimm * 2,
+				size0,
+				dimm * 2 + 1,
+				size1);
+	}
+}
+
+/*
+ * Very early hardware probe on pci_probe thread to determine if this module
+ * supports the hardware.
+ *
+ * Return:
+ *      0 for OK
+ *      1 for error
+ */
+static int f10_probe_valid_hardware(struct amd64_pvt *pvt)
+{
+	int ret = 0;
+
+	/*
+	 * If we are on a DDR3 machine, we don't know yet if
+	 * we support that properly at this time
+	 */
+	if ((pvt->dchr0 & F10_DCHR_Ddr3Mode) ||
+	    (pvt->dchr1 & F10_DCHR_Ddr3Mode)) {
+
+		amd64_printk(KERN_WARNING,
+			"%s() This machine is running with DDR3 memory. "
+			"This is not currently supported. "
+			"DCHR0=0x%x DCHR1=0x%x\n",
+			__func__, pvt->dchr0, pvt->dchr1);
+
+		amd64_printk(KERN_WARNING,
+			"   Contact '%s' module MAINTAINER to help add"
+			" support.\n",
+			EDAC_MOD_STR);
+
+		ret = 1;
+
+	}
+	return ret;
+}
+
+/*
+ * There currently are 3 types type of MC devices for AMD Athlon/Opterons
+ * (as per PCI DEVICE_IDs):
+ *
+ * Family K8: That is the Athlon64 and Opteron CPUs. They all have the same PCI
+ * DEVICE ID, even though there is differences between the different Revisions
+ * (CG,D,E,F).
+ *
+ * Family F10h and F11h.
+ *
+ */
+static struct amd64_family_type amd64_family_types[] = {
+	[K8_CPUS] = {
+		.ctl_name = "RevF",
+		.addr_f1_ctl = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
+		.misc_f3_ctl = PCI_DEVICE_ID_AMD_K8_NB_MISC,
+		.ops = {
+			.early_channel_count = k8_early_channel_count,
+			.get_error_address = k8_get_error_address,
+			.read_dram_base_limit = k8_read_dram_base_limit,
+			.map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow,
+			.dbam_map_to_pages = k8_dbam_map_to_pages,
+		}
+	},
+	[F10_CPUS] = {
+		.ctl_name = "Family 10h",
+		.addr_f1_ctl = PCI_DEVICE_ID_AMD_10H_NB_MAP,
+		.misc_f3_ctl = PCI_DEVICE_ID_AMD_10H_NB_MISC,
+		.ops = {
+			.probe_valid_hardware = f10_probe_valid_hardware,
+			.early_channel_count = f10_early_channel_count,
+			.get_error_address = f10_get_error_address,
+			.read_dram_base_limit = f10_read_dram_base_limit,
+			.read_dram_ctl_register = f10_read_dram_ctl_register,
+			.map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow,
+			.dbam_map_to_pages = f10_dbam_map_to_pages,
+		}
+	},
+	[F11_CPUS] = {
+		.ctl_name = "Family 11h",
+		.addr_f1_ctl = PCI_DEVICE_ID_AMD_11H_NB_MAP,
+		.misc_f3_ctl = PCI_DEVICE_ID_AMD_11H_NB_MISC,
+		.ops = {
+			.probe_valid_hardware = f10_probe_valid_hardware,
+			.early_channel_count = f10_early_channel_count,
+			.get_error_address = f10_get_error_address,
+			.read_dram_base_limit = f10_read_dram_base_limit,
+			.read_dram_ctl_register = f10_read_dram_ctl_register,
+			.map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow,
+			.dbam_map_to_pages = f10_dbam_map_to_pages,
+		}
+	},
+};
+
+static struct pci_dev *pci_get_related_function(unsigned int vendor,
+						unsigned int device,
+						struct pci_dev *related)
+{
+	struct pci_dev *dev = NULL;
+
+	dev = pci_get_device(vendor, device, dev);
+	while (dev) {
+		if ((dev->bus->number == related->bus->number) &&
+		    (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
+			break;
+		dev = pci_get_device(vendor, device, dev);
+	}
+
+	return dev;
+}
+
+/*
+ * syndrome mapping table for ECC ChipKill devices
+ *
+ * The comment in each row is the token (nibble) number that is in error.
+ * The least significant nibble of the syndrome is the mask for the bits
+ * that are in error (need to be toggled) for the particular nibble.
+ *
+ * Each row contains 16 entries.
+ * The first entry (0th) is the channel number for that row of syndromes.
+ * The remaining 15 entries are the syndromes for the respective Error
+ * bit mask index.
+ *
+ * 1st index entry is 0x0001 mask, indicating that the rightmost bit is the
+ * bit in error.
+ * The 2nd index entry is 0x0010 that the second bit is damaged.
+ * The 3rd index entry is 0x0011 indicating that the rightmost 2 bits
+ * are damaged.
+ * Thus so on until index 15, 0x1111, whose entry has the syndrome
+ * indicating that all 4 bits are damaged.
+ *
+ * A search is performed on this table looking for a given syndrome.
+ *
+ * See the AMD documentation for ECC syndromes. This ECC table is valid
+ * across all the versions of the AMD64 processors.
+ *
+ * A fast lookup is to use the LAST four bits of the 16-bit syndrome as a
+ * COLUMN index, then search all ROWS of that column, looking for a match
+ * with the input syndrome. The ROW value will be the token number.
+ *
+ * The 0'th entry on that row, can be returned as the CHANNEL (0 or 1) of this
+ * error.
+ */
+#define NUMBER_ECC_ROWS  36
+static const unsigned short ecc_chipkill_syndromes[NUMBER_ECC_ROWS][16] = {
+	/* Channel 0 syndromes */
+	{/*0*/  0, 0xe821, 0x7c32, 0x9413, 0xbb44, 0x5365, 0xc776, 0x2f57,
+	   0xdd88, 0x35a9, 0xa1ba, 0x499b, 0x66cc, 0x8eed, 0x1afe, 0xf2df },
+	{/*1*/  0, 0x5d31, 0xa612, 0xfb23, 0x9584, 0xc8b5, 0x3396, 0x6ea7,
+	   0xeac8, 0xb7f9, 0x4cda, 0x11eb, 0x7f4c, 0x227d, 0xd95e, 0x846f },
+	{/*2*/  0, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+	   0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f },
+	{/*3*/  0, 0x2021, 0x3032, 0x1013, 0x4044, 0x6065, 0x7076, 0x5057,
+	   0x8088, 0xa0a9, 0xb0ba, 0x909b, 0xc0cc, 0xe0ed, 0xf0fe, 0xd0df },
+	{/*4*/  0, 0x5041, 0xa082, 0xf0c3, 0x9054, 0xc015, 0x30d6, 0x6097,
+	   0xe0a8, 0xb0e9, 0x402a, 0x106b, 0x70fc, 0x20bd, 0xd07e, 0x803f },
+	{/*5*/  0, 0xbe21, 0xd732, 0x6913, 0x2144, 0x9f65, 0xf676, 0x4857,
+	   0x3288, 0x8ca9, 0xe5ba, 0x5b9b, 0x13cc, 0xaded, 0xc4fe, 0x7adf },
+	{/*6*/  0, 0x4951, 0x8ea2, 0xc7f3, 0x5394, 0x1ac5, 0xdd36, 0x9467,
+	   0xa1e8, 0xe8b9, 0x2f4a, 0x661b, 0xf27c, 0xbb2d, 0x7cde, 0x358f },
+	{/*7*/  0, 0x74e1, 0x9872, 0xec93, 0xd6b4, 0xa255, 0x4ec6, 0x3a27,
+	   0x6bd8, 0x1f39, 0xf3aa, 0x874b, 0xbd6c, 0xc98d, 0x251e, 0x51ff },
+	{/*8*/  0, 0x15c1, 0x2a42, 0x3f83, 0xcef4, 0xdb35, 0xe4b6, 0xf177,
+	   0x4758, 0x5299, 0x6d1a, 0x78db, 0x89ac, 0x9c6d, 0xa3ee, 0xb62f },
+	{/*9*/  0, 0x3d01, 0x1602, 0x2b03, 0x8504, 0xb805, 0x9306, 0xae07,
+	   0xca08, 0xf709, 0xdc0a, 0xe10b, 0x4f0c, 0x720d, 0x590e, 0x640f },
+	{/*a*/  0, 0x9801, 0xec02, 0x7403, 0x6b04, 0xf305, 0x8706, 0x1f07,
+	   0xbd08, 0x2509, 0x510a, 0xc90b, 0xd60c, 0x4e0d, 0x3a0e, 0xa20f },
+	{/*b*/  0, 0xd131, 0x6212, 0xb323, 0x3884, 0xe9b5, 0x5a96, 0x8ba7,
+	   0x1cc8, 0xcdf9, 0x7eda, 0xafeb, 0x244c, 0xf57d, 0x465e, 0x976f },
+	{/*c*/  0, 0xe1d1, 0x7262, 0x93b3, 0xb834, 0x59e5, 0xca56, 0x2b87,
+	   0xdc18, 0x3dc9, 0xae7a, 0x4fab, 0x542c, 0x85fd, 0x164e, 0xf79f },
+	{/*d*/  0, 0x6051, 0xb0a2, 0xd0f3, 0x1094, 0x70c5, 0xa036, 0xc067,
+	   0x20e8, 0x40b9, 0x904a, 0x601b, 0x307c, 0x502d, 0x80de, 0xe08f },
+	{/*e*/  0, 0xa4c1, 0xf842, 0x5c83, 0xe6f4, 0x4235, 0x1eb6, 0xba77,
+	   0x7b58, 0xdf99, 0x831a, 0x27db, 0x9dac, 0x396d, 0x65ee, 0xc12f },
+	{/*f*/  0, 0x11c1, 0x2242, 0x3383, 0xc8f4, 0xd935, 0xeab6, 0xfb77,
+	   0x4c58, 0x5d99, 0x6e1a, 0x7fdb, 0x84ac, 0x956d, 0xa6ee, 0xb72f },
+
+	/* Channel 1 syndromes */
+	{/*10*/ 1, 0x45d1, 0x8a62, 0xcfb3, 0x5e34, 0x1be5, 0xd456, 0x9187,
+	   0xa718, 0xe2c9, 0x2d7a, 0x68ab, 0xf92c, 0xbcfd, 0x734e, 0x369f },
+	{/*11*/ 1, 0x63e1, 0xb172, 0xd293, 0x14b4, 0x7755, 0xa5c6, 0xc627,
+	   0x28d8, 0x4b39, 0x99aa, 0xfa4b, 0x3c6c, 0x5f8d, 0x8d1e, 0xeeff },
+	{/*12*/ 1, 0xb741, 0xd982, 0x6ec3, 0x2254, 0x9515, 0xfbd6, 0x4c97,
+	   0x33a8, 0x84e9, 0xea2a, 0x5d6b, 0x11fc, 0xa6bd, 0xc87e, 0x7f3f },
+	{/*13*/ 1, 0xdd41, 0x6682, 0xbbc3, 0x3554, 0xe815, 0x53d6, 0xce97,
+	   0x1aa8, 0xc7e9, 0x7c2a, 0xa1fb, 0x2ffc, 0xf2bd, 0x497e, 0x943f },
+	{/*14*/ 1, 0x2bd1, 0x3d62, 0x16b3, 0x4f34, 0x64e5, 0x7256, 0x5987,
+	   0x8518, 0xaec9, 0xb87a, 0x93ab, 0xca2c, 0xe1fd, 0xf74e, 0xdc9f },
+	{/*15*/ 1, 0x83c1, 0xc142, 0x4283, 0xa4f4, 0x2735, 0x65b6, 0xe677,
+	   0xf858, 0x7b99, 0x391a, 0xbadb, 0x5cac, 0xdf6d, 0x9dee, 0x1e2f },
+	{/*16*/ 1, 0x8fd1, 0xc562, 0x4ab3, 0xa934, 0x26e5, 0x6c56, 0xe387,
+	   0xfe18, 0x71c9, 0x3b7a, 0xb4ab, 0x572c, 0xd8fd, 0x924e, 0x1d9f },
+	{/*17*/ 1, 0x4791, 0x89e2, 0xce73, 0x5264, 0x15f5, 0xdb86, 0x9c17,
+	   0xa3b8, 0xe429, 0x2a5a, 0x6dcb, 0xf1dc, 0xb64d, 0x783e, 0x3faf },
+	{/*18*/ 1, 0x5781, 0xa9c2, 0xfe43, 0x92a4, 0xc525, 0x3b66, 0x6ce7,
+	   0xe3f8, 0xb479, 0x4a3a, 0x1dbb, 0x715c, 0x26dd, 0xd89e, 0x8f1f },
+	{/*19*/ 1, 0xbf41, 0xd582, 0x6ac3, 0x2954, 0x9615, 0xfcd6, 0x4397,
+	   0x3ea8, 0x81e9, 0xeb2a, 0x546b, 0x17fc, 0xa8bd, 0xc27e, 0x7d3f },
+	{/*1a*/ 1, 0x9891, 0xe1e2, 0x7273, 0x6464, 0xf7f5, 0x8586, 0x1617,
+	   0xb8b8, 0x2b29, 0x595a, 0xcacb, 0xdcdc, 0x4f4d, 0x3d3e, 0xaeaf },
+	{/*1b*/ 1, 0xcce1, 0x4472, 0x8893, 0xfdb4, 0x3f55, 0xb9c6, 0x7527,
+	   0x56d8, 0x9a39, 0x12aa, 0xde4b, 0xab6c, 0x678d, 0xef1e, 0x23ff },
+	{/*1c*/ 1, 0xa761, 0xf9b2, 0x5ed3, 0xe214, 0x4575, 0x1ba6, 0xbcc7,
+	   0x7328, 0xd449, 0x8a9a, 0x2dfb, 0x913c, 0x365d, 0x688e, 0xcfef },
+	{/*1d*/ 1, 0xff61, 0x55b2, 0xaad3, 0x7914, 0x8675, 0x2ca6, 0xd3c7,
+	   0x9e28, 0x6149, 0xcb9a, 0x34fb, 0xe73c, 0x185d, 0xb28e, 0x4def },
+	{/*1e*/ 1, 0x5451, 0xa8a2, 0xfcf3, 0x9694, 0xc2c5, 0x3e36, 0x6a67,
+	   0xebe8, 0xbfb9, 0x434a, 0x171b, 0x7d7c, 0x292d, 0xd5de, 0x818f },
+	{/*1f*/ 1, 0x6fc1, 0xb542, 0xda83, 0x19f4, 0x7635, 0xacb6, 0xc377,
+	   0x2e58, 0x4199, 0x9b1a, 0xf4db, 0x37ac, 0x586d, 0x82ee, 0xed2f },
+
+	/* ECC bits are also in the set of tokens and they too can go bad
+	 * first 2 cover channel 0, while the second 2 cover channel 1
+	 */
+	{/*20*/ 0, 0xbe01, 0xd702, 0x6903, 0x2104, 0x9f05, 0xf606, 0x4807,
+	   0x3208, 0x8c09, 0xe50a, 0x5b0b, 0x130c, 0xad0d, 0xc40e, 0x7a0f },
+	{/*21*/ 0, 0x4101, 0x8202, 0xc303, 0x5804, 0x1905, 0xda06, 0x9b07,
+	   0xac08, 0xed09, 0x2e0a, 0x6f0b, 0x640c, 0xb50d, 0x760e, 0x370f },
+	{/*22*/ 1, 0xc441, 0x4882, 0x8cc3, 0xf654, 0x3215, 0xbed6, 0x7a97,
+	   0x5ba8, 0x9fe9, 0x132a, 0xd76b, 0xadfc, 0x69bd, 0xe57e, 0x213f },
+	{/*23*/ 1, 0x7621, 0x9b32, 0xed13, 0xda44, 0xac65, 0x4176, 0x3757,
+	   0x6f88, 0x19a9, 0xf4ba, 0x829b, 0xb5cc, 0xc3ed, 0x2efe, 0x58df }
+};
+
+/*
+ * Given the syndrome argument, scan each of the channel tables for a syndrome
+ * match. Depending on which table it is found, return the channel number.
+ */
+static int get_channel_from_ecc_syndrome(unsigned short syndrome)
+{
+	int row;
+	int column;
+
+	/* Determine column to scan */
+	column = syndrome & 0xF;
+
+	/* Scan all rows, looking for syndrome, or end of table */
+	for (row = 0; row < NUMBER_ECC_ROWS; row++) {
+		if (ecc_chipkill_syndromes[row][column] == syndrome)
+			return ecc_chipkill_syndromes[row][0];
+	}
+
+	debugf0("syndrome(%x) not found\n", syndrome);
+	return -1;
+}
+
+/*
+ * Check for valid error in the NB Status High register. If so, proceed to read
+ * NB Status Low, NB Address Low and NB Address High registers and store data
+ * into error structure.
+ *
+ * Returns:
+ *	- 1: if hardware regs contains valid error info
+ *	- 0: if no valid error is indicated
+ */
+static int amd64_get_error_info_regs(struct mem_ctl_info *mci,
+				     struct amd64_error_info_regs *regs)
+{
+	struct amd64_pvt *pvt;
+	struct pci_dev *misc_f3_ctl;
+	int err = 0;
+
+	pvt = mci->pvt_info;
+	misc_f3_ctl = pvt->misc_f3_ctl;
+
+	err = pci_read_config_dword(misc_f3_ctl, K8_NBSH, &regs->nbsh);
+	if (err)
+		goto err_reg;
+
+	if (!(regs->nbsh & K8_NBSH_VALID_BIT))
+		return 0;
+
+	/* valid error, read remaining error information registers */
+	err = pci_read_config_dword(misc_f3_ctl, K8_NBSL, &regs->nbsl);
+	if (err)
+		goto err_reg;
+
+	err = pci_read_config_dword(misc_f3_ctl, K8_NBEAL, &regs->nbeal);
+	if (err)
+		goto err_reg;
+
+	err = pci_read_config_dword(misc_f3_ctl, K8_NBEAH, &regs->nbeah);
+	if (err)
+		goto err_reg;
+
+	err = pci_read_config_dword(misc_f3_ctl, K8_NBCFG, &regs->nbcfg);
+	if (err)
+		goto err_reg;
+
+	return 1;
+
+err_reg:
+	debugf0("Reading error info register failed\n");
+	return 0;
+}
+
+/*
+ * This function is called to retrieve the error data from hardware and store it
+ * in the info structure.
+ *
+ * Returns:
+ *	- 1: if a valid error is found
+ *	- 0: if no error is found
+ */
+static int amd64_get_error_info(struct mem_ctl_info *mci,
+				struct amd64_error_info_regs *info)
+{
+	struct amd64_pvt *pvt;
+	struct amd64_error_info_regs regs;
+
+	pvt = mci->pvt_info;
+
+	if (!amd64_get_error_info_regs(mci, info))
+		return 0;
+
+	/*
+	 * Here's the problem with the K8's EDAC reporting: There are four
+	 * registers which report pieces of error information. They are shared
+	 * between CEs and UEs. Furthermore, contrary to what is stated in the
+	 * BKDG, the overflow bit is never used! Every error always updates the
+	 * reporting registers.
+	 *
+	 * Can you see the race condition? All four error reporting registers
+	 * must be read before a new error updates them! There is no way to read
+	 * all four registers atomically. The best than can be done is to detect
+	 * that a race has occured and then report the error without any kind of
+	 * precision.
+	 *
+	 * What is still positive is that errors are still reported and thus
+	 * problems can still be detected - just not localized because the
+	 * syndrome and address are spread out across registers.
+	 *
+	 * Grrrrr!!!!!  Here's hoping that AMD fixes this in some future K8 rev.
+	 * UEs and CEs should have separate register sets with proper overflow
+	 * bits that are used! At very least the problem can be fixed by
+	 * honoring the ErrValid bit in 'nbsh' and not updating registers - just
+	 * set the overflow bit - unless the current error is CE and the new
+	 * error is UE which would be the only situation for overwriting the
+	 * current values.
+	 */
+
+	regs = *info;
+
+	/* Use info from the second read - most current */
+	if (unlikely(!amd64_get_error_info_regs(mci, info)))
+		return 0;
+
+	/* clear the error bits in hardware */
+	pci_write_bits32(pvt->misc_f3_ctl, K8_NBSH, 0, K8_NBSH_VALID_BIT);
+
+	/* Check for the possible race condition */
+	if ((regs.nbsh != info->nbsh) ||
+	     (regs.nbsl != info->nbsl) ||
+	     (regs.nbeah != info->nbeah) ||
+	     (regs.nbeal != info->nbeal)) {
+		amd64_mc_printk(mci, KERN_WARNING,
+				"hardware STATUS read access race condition "
+				"detected!\n");
+		return 0;
+	}
+	return 1;
+}
+
+static inline void amd64_decode_gart_tlb_error(struct mem_ctl_info *mci,
+					 struct amd64_error_info_regs *info)
+{
+	u32 err_code;
+	u32 ec_tt;		/* error code transaction type (2b) */
+	u32 ec_ll;		/* error code cache level (2b) */
+
+	err_code = EXTRACT_ERROR_CODE(info->nbsl);
+	ec_ll = EXTRACT_LL_CODE(err_code);
+	ec_tt = EXTRACT_TT_CODE(err_code);
+
+	amd64_mc_printk(mci, KERN_ERR,
+		     "GART TLB event: transaction type(%s), "
+		     "cache level(%s)\n", tt_msgs[ec_tt], ll_msgs[ec_ll]);
+}
+
+static inline void amd64_decode_mem_cache_error(struct mem_ctl_info *mci,
+				      struct amd64_error_info_regs *info)
+{
+	u32 err_code;
+	u32 ec_rrrr;		/* error code memory transaction (4b) */
+	u32 ec_tt;		/* error code transaction type (2b) */
+	u32 ec_ll;		/* error code cache level (2b) */
+
+	err_code = EXTRACT_ERROR_CODE(info->nbsl);
+	ec_ll = EXTRACT_LL_CODE(err_code);
+	ec_tt = EXTRACT_TT_CODE(err_code);
+	ec_rrrr = EXTRACT_RRRR_CODE(err_code);
+
+	amd64_mc_printk(mci, KERN_ERR,
+		     "cache hierarchy error: memory transaction type(%s), "
+		     "transaction type(%s), cache level(%s)\n",
+		     rrrr_msgs[ec_rrrr], tt_msgs[ec_tt], ll_msgs[ec_ll]);
+}
+
+
+/*
+ * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR
+ * ADDRESS and process.
+ */
+static void amd64_handle_ce(struct mem_ctl_info *mci,
+			    struct amd64_error_info_regs *info)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u64 SystemAddress;
+
+	/* Ensure that the Error Address is VALID */
+	if ((info->nbsh & K8_NBSH_VALID_ERROR_ADDR) == 0) {
+		amd64_mc_printk(mci, KERN_ERR,
+			"HW has no ERROR_ADDRESS available\n");
+		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
+		return;
+	}
+
+	SystemAddress = extract_error_address(mci, info);
+
+	amd64_mc_printk(mci, KERN_ERR,
+		"CE ERROR_ADDRESS= 0x%llx\n", SystemAddress);
+
+	pvt->ops->map_sysaddr_to_csrow(mci, info, SystemAddress);
+}
+
+/* Handle any Un-correctable Errors (UEs) */
+static void amd64_handle_ue(struct mem_ctl_info *mci,
+			    struct amd64_error_info_regs *info)
+{
+	int csrow;
+	u64 SystemAddress;
+	u32 page, offset;
+	struct mem_ctl_info *log_mci, *src_mci = NULL;
+
+	log_mci = mci;
+
+	if ((info->nbsh & K8_NBSH_VALID_ERROR_ADDR) == 0) {
+		amd64_mc_printk(mci, KERN_CRIT,
+			"HW has no ERROR_ADDRESS available\n");
+		edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
+		return;
+	}
+
+	SystemAddress = extract_error_address(mci, info);
+
+	/*
+	 * Find out which node the error address belongs to. This may be
+	 * different from the node that detected the error.
+	 */
+	src_mci = find_mc_by_sys_addr(mci, SystemAddress);
+	if (!src_mci) {
+		amd64_mc_printk(mci, KERN_CRIT,
+			"ERROR ADDRESS (0x%lx) value NOT mapped to a MC\n",
+			(unsigned long)SystemAddress);
+		edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
+		return;
+	}
+
+	log_mci = src_mci;
+
+	csrow = sys_addr_to_csrow(log_mci, SystemAddress);
+	if (csrow < 0) {
+		amd64_mc_printk(mci, KERN_CRIT,
+			"ERROR_ADDRESS (0x%lx) value NOT mapped to 'csrow'\n",
+			(unsigned long)SystemAddress);
+		edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
+	} else {
+		error_address_to_page_and_offset(SystemAddress, &page, &offset);
+		edac_mc_handle_ue(log_mci, page, offset, csrow, EDAC_MOD_STR);
+	}
+}
+
+static void amd64_decode_bus_error(struct mem_ctl_info *mci,
+				   struct amd64_error_info_regs *info)
+{
+	u32 err_code, ext_ec;
+	u32 ec_pp;		/* error code participating processor (2p) */
+	u32 ec_to;		/* error code timed out (1b) */
+	u32 ec_rrrr;		/* error code memory transaction (4b) */
+	u32 ec_ii;		/* error code memory or I/O (2b) */
+	u32 ec_ll;		/* error code cache level (2b) */
+
+	ext_ec = EXTRACT_EXT_ERROR_CODE(info->nbsl);
+	err_code = EXTRACT_ERROR_CODE(info->nbsl);
+
+	ec_ll = EXTRACT_LL_CODE(err_code);
+	ec_ii = EXTRACT_II_CODE(err_code);
+	ec_rrrr = EXTRACT_RRRR_CODE(err_code);
+	ec_to = EXTRACT_TO_CODE(err_code);
+	ec_pp = EXTRACT_PP_CODE(err_code);
+
+	amd64_mc_printk(mci, KERN_ERR,
+		"BUS ERROR:\n"
+		"  time-out(%s) mem or i/o(%s)\n"
+		"  participating processor(%s)\n"
+		"  memory transaction type(%s)\n"
+		"  cache level(%s) Error Found by: %s\n",
+		to_msgs[ec_to],
+		ii_msgs[ec_ii],
+		pp_msgs[ec_pp],
+		rrrr_msgs[ec_rrrr],
+		ll_msgs[ec_ll],
+		(info->nbsh & K8_NBSH_ERR_SCRUBER) ?
+			"Scrubber" : "Normal Operation");
+
+	/* If this was an 'observed' error, early out */
+	if (ec_pp == K8_NBSL_PP_OBS)
+		return;		/* We aren't the node involved */
+
+	/* Parse out the extended error code for ECC events */
+	switch (ext_ec) {
+	/* F10 changed to one Extended ECC error code */
+	case F10_NBSL_EXT_ERR_RES:		/* Reserved field */
+	case F10_NBSL_EXT_ERR_ECC:		/* F10 ECC ext err code */
+		break;
+
+	default:
+		amd64_mc_printk(mci, KERN_ERR, "NOT ECC: no special error "
+					       "handling for this error\n");
+		return;
+	}
+
+	if (info->nbsh & K8_NBSH_CECC)
+		amd64_handle_ce(mci, info);
+	else if (info->nbsh & K8_NBSH_UECC)
+		amd64_handle_ue(mci, info);
+
+	/*
+	 * If main error is CE then overflow must be CE.  If main error is UE
+	 * then overflow is unknown.  We'll call the overflow a CE - if
+	 * panic_on_ue is set then we're already panic'ed and won't arrive
+	 * here. Else, then apparently someone doesn't think that UE's are
+	 * catastrophic.
+	 */
+	if (info->nbsh & K8_NBSH_OVERFLOW)
+		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR
+					  "Error Overflow set");
+}
+
+int amd64_process_error_info(struct mem_ctl_info *mci,
+			     struct amd64_error_info_regs *info,
+			     int handle_errors)
+{
+	struct amd64_pvt *pvt;
+	struct amd64_error_info_regs *regs;
+	u32 err_code, ext_ec;
+	int gart_tlb_error = 0;
+
+	pvt = mci->pvt_info;
+
+	/* If caller doesn't want us to process the error, return */
+	if (!handle_errors)
+		return 1;
+
+	regs = info;
+
+	debugf1("NorthBridge ERROR: mci(0x%p)\n", mci);
+	debugf1("  MC node(%d) Error-Address(0x%.8x-%.8x)\n",
+		pvt->mc_node_id, regs->nbeah, regs->nbeal);
+	debugf1("  nbsh(0x%.8x) nbsl(0x%.8x)\n",
+		regs->nbsh, regs->nbsl);
+	debugf1("  Valid Error=%s Overflow=%s\n",
+		(regs->nbsh & K8_NBSH_VALID_BIT) ? "True" : "False",
+		(regs->nbsh & K8_NBSH_OVERFLOW) ? "True" : "False");
+	debugf1("  Err Uncorrected=%s MCA Error Reporting=%s\n",
+		(regs->nbsh & K8_NBSH_UNCORRECTED_ERR) ?
+			"True" : "False",
+		(regs->nbsh & K8_NBSH_ERR_ENABLE) ?
+			"True" : "False");
+	debugf1("  MiscErr Valid=%s ErrAddr Valid=%s PCC=%s\n",
+		(regs->nbsh & K8_NBSH_MISC_ERR_VALID) ?
+			"True" : "False",
+		(regs->nbsh & K8_NBSH_VALID_ERROR_ADDR) ?
+			"True" : "False",
+		(regs->nbsh & K8_NBSH_PCC) ?
+			"True" : "False");
+	debugf1("  CECC=%s UECC=%s Found by Scruber=%s\n",
+		(regs->nbsh & K8_NBSH_CECC) ?
+			"True" : "False",
+		(regs->nbsh & K8_NBSH_UECC) ?
+			"True" : "False",
+		(regs->nbsh & K8_NBSH_ERR_SCRUBER) ?
+			"True" : "False");
+	debugf1("  CORE0=%s CORE1=%s CORE2=%s CORE3=%s\n",
+		(regs->nbsh & K8_NBSH_CORE0) ? "True" : "False",
+		(regs->nbsh & K8_NBSH_CORE1) ? "True" : "False",
+		(regs->nbsh & K8_NBSH_CORE2) ? "True" : "False",
+		(regs->nbsh & K8_NBSH_CORE3) ? "True" : "False");
+
+
+	err_code = EXTRACT_ERROR_CODE(regs->nbsl);
+
+	/* Determine which error type:
+	 *	1) GART errors - non-fatal, developmental events
+	 *	2) MEMORY errors
+	 *	3) BUS errors
+	 *	4) Unknown error
+	 */
+	if (TEST_TLB_ERROR(err_code)) {
+		/*
+		 * GART errors are intended to help graphics driver developers
+		 * to detect bad GART PTEs. It is recommended by AMD to disable
+		 * GART table walk error reporting by default[1] (currently
+		 * being disabled in mce_cpu_quirks()) and according to the
+		 * comment in mce_cpu_quirks(), such GART errors can be
+		 * incorrectly triggered. We may see these errors anyway and
+		 * unless requested by the user, they won't be reported.
+		 *
+		 * [1] section 13.10.1 on BIOS and Kernel Developers Guide for
+		 *     AMD NPT family 0Fh processors
+		 */
+		if (report_gart_errors == 0)
+			return 1;
+
+		/*
+		 * Only if GART error reporting is requested should we generate
+		 * any logs.
+		 */
+		gart_tlb_error = 1;
+
+		debugf1("GART TLB error\n");
+		amd64_decode_gart_tlb_error(mci, info);
+	} else if (TEST_MEM_ERROR(err_code)) {
+		debugf1("Memory/Cache error\n");
+		amd64_decode_mem_cache_error(mci, info);
+	} else if (TEST_BUS_ERROR(err_code)) {
+		debugf1("Bus (Link/DRAM) error\n");
+		amd64_decode_bus_error(mci, info);
+	} else {
+		/* shouldn't reach here! */
+		amd64_mc_printk(mci, KERN_WARNING,
+			     "%s(): unknown MCE error 0x%x\n", __func__,
+			     err_code);
+	}
+
+	ext_ec = EXTRACT_EXT_ERROR_CODE(regs->nbsl);
+	amd64_mc_printk(mci, KERN_ERR,
+		"ExtErr=(0x%x) %s\n", ext_ec, ext_msgs[ext_ec]);
+
+	if (((ext_ec >= F10_NBSL_EXT_ERR_CRC &&
+			ext_ec <= F10_NBSL_EXT_ERR_TGT) ||
+			(ext_ec == F10_NBSL_EXT_ERR_RMW)) &&
+			EXTRACT_LDT_LINK(info->nbsh)) {
+
+		amd64_mc_printk(mci, KERN_ERR,
+			"Error on hypertransport link: %s\n",
+			htlink_msgs[
+			EXTRACT_LDT_LINK(info->nbsh)]);
+	}
+
+	/*
+	 * Check the UE bit of the NB status high register, if set generate some
+	 * logs. If NOT a GART error, then process the event as a NO-INFO event.
+	 * If it was a GART error, skip that process.
+	 */
+	if (regs->nbsh & K8_NBSH_UNCORRECTED_ERR) {
+		amd64_mc_printk(mci, KERN_CRIT, "uncorrected error\n");
+		if (!gart_tlb_error)
+			edac_mc_handle_ue_no_info(mci, "UE bit is set\n");
+	}
+
+	if (regs->nbsh & K8_NBSH_PCC)
+		amd64_mc_printk(mci, KERN_CRIT,
+			"PCC (processor context corrupt) set\n");
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(amd64_process_error_info);
+
+/*
+ * The main polling 'check' function, called FROM the edac core to perform the
+ * error checking and if an error is encountered, error processing.
+ */
+static void amd64_check(struct mem_ctl_info *mci)
+{
+	struct amd64_error_info_regs info;
+
+	if (amd64_get_error_info(mci, &info))
+		amd64_process_error_info(mci, &info, 1);
+}
+
+/*
+ * Input:
+ *	1) struct amd64_pvt which contains pvt->dram_f2_ctl pointer
+ *	2) AMD Family index value
+ *
+ * Ouput:
+ *	Upon return of 0, the following filled in:
+ *
+ *		struct pvt->addr_f1_ctl
+ *		struct pvt->misc_f3_ctl
+ *
+ *	Filled in with related device funcitions of 'dram_f2_ctl'
+ *	These devices are "reserved" via the pci_get_device()
+ *
+ *	Upon return of 1 (error status):
+ *
+ *		Nothing reserved
+ */
+static int amd64_reserve_mc_sibling_devices(struct amd64_pvt *pvt, int mc_idx)
+{
+	const struct amd64_family_type *amd64_dev = &amd64_family_types[mc_idx];
+
+	/* Reserve the ADDRESS MAP Device */
+	pvt->addr_f1_ctl = pci_get_related_function(pvt->dram_f2_ctl->vendor,
+						    amd64_dev->addr_f1_ctl,
+						    pvt->dram_f2_ctl);
+
+	if (!pvt->addr_f1_ctl) {
+		amd64_printk(KERN_ERR, "error address map device not found: "
+			     "vendor %x device 0x%x (broken BIOS?)\n",
+			     PCI_VENDOR_ID_AMD, amd64_dev->addr_f1_ctl);
+		return 1;
+	}
+
+	/* Reserve the MISC Device */
+	pvt->misc_f3_ctl = pci_get_related_function(pvt->dram_f2_ctl->vendor,
+						    amd64_dev->misc_f3_ctl,
+						    pvt->dram_f2_ctl);
+
+	if (!pvt->misc_f3_ctl) {
+		pci_dev_put(pvt->addr_f1_ctl);
+		pvt->addr_f1_ctl = NULL;
+
+		amd64_printk(KERN_ERR, "error miscellaneous device not found: "
+			     "vendor %x device 0x%x (broken BIOS?)\n",
+			     PCI_VENDOR_ID_AMD, amd64_dev->misc_f3_ctl);
+		return 1;
+	}
+
+	debugf1("    Addr Map device PCI Bus ID:\t%s\n",
+		pci_name(pvt->addr_f1_ctl));
+	debugf1("    DRAM MEM-CTL PCI Bus ID:\t%s\n",
+		pci_name(pvt->dram_f2_ctl));
+	debugf1("    Misc device PCI Bus ID:\t%s\n",
+		pci_name(pvt->misc_f3_ctl));
+
+	return 0;
+}
+
+static void amd64_free_mc_sibling_devices(struct amd64_pvt *pvt)
+{
+	pci_dev_put(pvt->addr_f1_ctl);
+	pci_dev_put(pvt->misc_f3_ctl);
+}
+
+/*
+ * Retrieve the hardware registers of the memory controller (this includes the
+ * 'Address Map' and 'Misc' device regs)
+ */
+static void amd64_read_mc_registers(struct amd64_pvt *pvt)
+{
+	u64 msr_val;
+	int dram, err = 0;
+
+	/*
+	 * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
+	 * those are Read-As-Zero
+	 */
+	rdmsrl(MSR_K8_TOP_MEM1, msr_val);
+	pvt->top_mem = msr_val >> 23;
+	debugf0("  TOP_MEM=0x%08llx\n", pvt->top_mem);
+
+	/* check first whether TOP_MEM2 is enabled */
+	rdmsrl(MSR_K8_SYSCFG, msr_val);
+	if (msr_val & (1U << 21)) {
+		rdmsrl(MSR_K8_TOP_MEM2, msr_val);
+		pvt->top_mem2 = msr_val >> 23;
+		debugf0("  TOP_MEM2=0x%08llx\n", pvt->top_mem2);
+	} else
+		debugf0("  TOP_MEM2 disabled.\n");
+
+	amd64_cpu_display_info(pvt);
+
+	err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCAP, &pvt->nbcap);
+	if (err)
+		goto err_reg;
+
+	if (pvt->ops->read_dram_ctl_register)
+		pvt->ops->read_dram_ctl_register(pvt);
+
+	for (dram = 0; dram < DRAM_REG_COUNT; dram++) {
+		/*
+		 * Call CPU specific READ function to get the DRAM Base and
+		 * Limit values from the DCT.
+		 */
+		pvt->ops->read_dram_base_limit(pvt, dram);
+
+		/*
+		 * Only print out debug info on rows with both R and W Enabled.
+		 * Normal processing, compiler should optimize this whole 'if'
+		 * debug output block away.
+		 */
+		if (pvt->dram_rw_en[dram] != 0) {
+			debugf1("  DRAM_BASE[%d]: 0x%8.08x-%8.08x "
+				"DRAM_LIMIT:  0x%8.08x-%8.08x\n",
+				dram,
+				(u32)(pvt->dram_base[dram] >> 32),
+				(u32)(pvt->dram_base[dram] & 0xFFFFFFFF),
+				(u32)(pvt->dram_limit[dram] >> 32),
+				(u32)(pvt->dram_limit[dram] & 0xFFFFFFFF));
+			debugf1("        IntlvEn=%s %s %s "
+				"IntlvSel=%d DstNode=%d\n",
+				pvt->dram_IntlvEn[dram] ?
+					"Enabled" : "Disabled",
+				(pvt->dram_rw_en[dram] & 0x2) ? "W" : "!W",
+				(pvt->dram_rw_en[dram] & 0x1) ? "R" : "!R",
+				pvt->dram_IntlvSel[dram],
+				pvt->dram_DstNode[dram]);
+		}
+	}
+
+	amd64_read_dct_base_mask(pvt);
+
+	err = pci_read_config_dword(pvt->addr_f1_ctl, K8_DHAR, &pvt->dhar);
+	if (err)
+		goto err_reg;
+
+	amd64_read_dbam_reg(pvt);
+
+	err = pci_read_config_dword(pvt->misc_f3_ctl,
+				F10_ONLINE_SPARE, &pvt->online_spare);
+	if (err)
+		goto err_reg;
+
+	err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
+	if (err)
+		goto err_reg;
+
+	err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCHR_0, &pvt->dchr0);
+	if (err)
+		goto err_reg;
+
+	if (!dct_ganging_enabled(pvt)) {
+		err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_1,
+						&pvt->dclr1);
+		if (err)
+			goto err_reg;
+
+		err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCHR_1,
+						&pvt->dchr1);
+		if (err)
+			goto err_reg;
+	}
+
+	amd64_dump_misc_regs(pvt);
+
+err_reg:
+	debugf0("Reading an MC register failed\n");
+
+}
+
+/*
+ * NOTE: CPU Revision Dependent code
+ *
+ * Input:
+ *	@csrow_nr ChipSelect Row Number (0..CHIPSELECT_COUNT-1)
+ *	k8 private pointer to -->
+ *			DRAM Bank Address mapping register
+ *			node_id
+ *			DCL register where dual_channel_active is
+ *
+ * The DBAM register consists of 4 sets of 4 bits each definitions:
+ *
+ * Bits:	CSROWs
+ * 0-3		CSROWs 0 and 1
+ * 4-7		CSROWs 2 and 3
+ * 8-11		CSROWs 4 and 5
+ * 12-15	CSROWs 6 and 7
+ *
+ * Values range from: 0 to 15
+ * The meaning of the values depends on CPU revision and dual-channel state,
+ * see relevant BKDG more info.
+ *
+ * The memory controller provides for total of only 8 CSROWs in its current
+ * architecture. Each "pair" of CSROWs normally represents just one DIMM in
+ * single channel or two (2) DIMMs in dual channel mode.
+ *
+ * The following code logic collapses the various tables for CSROW based on CPU
+ * revision.
+ *
+ * Returns:
+ *	The number of PAGE_SIZE pages on the specified CSROW number it
+ *	encompasses
+ *
+ */
+static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt)
+{
+	u32 dram_map, nr_pages;
+
+	/*
+	 * The math on this doesn't look right on the surface because x/2*4 can
+	 * be simplified to x*2 but this expression makes use of the fact that
+	 * it is integral math where 1/2=0. This intermediate value becomes the
+	 * number of bits to shift the DBAM register to extract the proper CSROW
+	 * field.
+	 */
+	dram_map = (pvt->dbam0 >> ((csrow_nr / 2) * 4)) & 0xF;
+
+	nr_pages = pvt->ops->dbam_map_to_pages(pvt, dram_map);
+
+	/*
+	 * If dual channel then double the memory size of single channel.
+	 * Channel count is 1 or 2
+	 */
+	nr_pages <<= (pvt->channel_count - 1);
+
+	debugf0("  (csrow=%d) DBAM map index= %d\n", csrow_nr, dram_map);
+	debugf0("    nr_pages= %u  channel-count = %d\n",
+		nr_pages, pvt->channel_count);
+
+	return nr_pages;
+}
+
+/*
+ * Initialize the array of csrow attribute instances, based on the values
+ * from pci config hardware registers.
+ */
+static int amd64_init_csrows(struct mem_ctl_info *mci)
+{
+	struct csrow_info *csrow;
+	struct amd64_pvt *pvt;
+	u64 input_addr_min, input_addr_max, sys_addr;
+	int i, err = 0, empty = 1;
+
+	pvt = mci->pvt_info;
+
+	err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &pvt->nbcfg);
+	if (err)
+		debugf0("Reading K8_NBCFG failed\n");
+
+	debugf0("NBCFG= 0x%x  CHIPKILL= %s DRAM ECC= %s\n", pvt->nbcfg,
+		(pvt->nbcfg & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
+		(pvt->nbcfg & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"
+		);
+
+	for (i = 0; i < CHIPSELECT_COUNT; i++) {
+		csrow = &mci->csrows[i];
+
+		if ((pvt->dcsb0[i] & K8_DCSB_CS_ENABLE) == 0) {
+			debugf1("----CSROW %d EMPTY for node %d\n", i,
+				pvt->mc_node_id);
+			continue;
+		}
+
+		debugf1("----CSROW %d VALID for MC node %d\n",
+			i, pvt->mc_node_id);
+
+		empty = 0;
+		csrow->nr_pages = amd64_csrow_nr_pages(i, pvt);
+		find_csrow_limits(mci, i, &input_addr_min, &input_addr_max);
+		sys_addr = input_addr_to_sys_addr(mci, input_addr_min);
+		csrow->first_page = (u32) (sys_addr >> PAGE_SHIFT);
+		sys_addr = input_addr_to_sys_addr(mci, input_addr_max);
+		csrow->last_page = (u32) (sys_addr >> PAGE_SHIFT);
+		csrow->page_mask = ~mask_from_dct_mask(pvt, i);
+		/* 8 bytes of resolution */
+
+		csrow->mtype = amd64_determine_memory_type(pvt);
+
+		debugf1("  for MC node %d csrow %d:\n", pvt->mc_node_id, i);
+		debugf1("    input_addr_min: 0x%lx input_addr_max: 0x%lx\n",
+			(unsigned long)input_addr_min,
+			(unsigned long)input_addr_max);
+		debugf1("    sys_addr: 0x%lx  page_mask: 0x%lx\n",
+			(unsigned long)sys_addr, csrow->page_mask);
+		debugf1("    nr_pages: %u  first_page: 0x%lx "
+			"last_page: 0x%lx\n",
+			(unsigned)csrow->nr_pages,
+			csrow->first_page, csrow->last_page);
+
+		/*
+		 * determine whether CHIPKILL or JUST ECC or NO ECC is operating
+		 */
+		if (pvt->nbcfg & K8_NBCFG_ECC_ENABLE)
+			csrow->edac_mode =
+			    (pvt->nbcfg & K8_NBCFG_CHIPKILL) ?
+			    EDAC_S4ECD4ED : EDAC_SECDED;
+		else
+			csrow->edac_mode = EDAC_NONE;
+	}
+
+	return empty;
+}
+
+/*
+ * Only if 'ecc_enable_override' is set AND BIOS had ECC disabled, do "we"
+ * enable it.
+ */
+static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id);
+	int cpu, idx = 0, err = 0;
+	struct msr msrs[cpumask_weight(cpumask)];
+	u32 value;
+	u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
+
+	if (!ecc_enable_override)
+		return;
+
+	memset(msrs, 0, sizeof(msrs));
+
+	amd64_printk(KERN_WARNING,
+		"'ecc_enable_override' parameter is active, "
+		"Enabling AMD ECC hardware now: CAUTION\n");
+
+	err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value);
+	if (err)
+		debugf0("Reading K8_NBCTL failed\n");
+
+	/* turn on UECCn and CECCEn bits */
+	pvt->old_nbctl = value & mask;
+	pvt->nbctl_mcgctl_saved = 1;
+
+	value |= mask;
+	pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
+
+	rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+
+	for_each_cpu(cpu, cpumask) {
+		if (msrs[idx].l & K8_MSR_MCGCTL_NBE)
+			set_bit(idx, &pvt->old_mcgctl);
+
+		msrs[idx].l |= K8_MSR_MCGCTL_NBE;
+		idx++;
+	}
+	wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+
+	err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
+	if (err)
+		debugf0("Reading K8_NBCFG failed\n");
+
+	debugf0("NBCFG(1)= 0x%x  CHIPKILL= %s ECC_ENABLE= %s\n", value,
+		(value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
+		(value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
+
+	if (!(value & K8_NBCFG_ECC_ENABLE)) {
+		amd64_printk(KERN_WARNING,
+			"This node reports that DRAM ECC is "
+			"currently Disabled; ENABLING now\n");
+
+		/* Attempt to turn on DRAM ECC Enable */
+		value |= K8_NBCFG_ECC_ENABLE;
+		pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value);
+
+		err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
+		if (err)
+			debugf0("Reading K8_NBCFG failed\n");
+
+		if (!(value & K8_NBCFG_ECC_ENABLE)) {
+			amd64_printk(KERN_WARNING,
+				"Hardware rejects Enabling DRAM ECC checking\n"
+				"Check memory DIMM configuration\n");
+		} else {
+			amd64_printk(KERN_DEBUG,
+				"Hardware accepted DRAM ECC Enable\n");
+		}
+	}
+	debugf0("NBCFG(2)= 0x%x  CHIPKILL= %s ECC_ENABLE= %s\n", value,
+		(value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
+		(value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
+
+	pvt->ctl_error_info.nbcfg = value;
+}
+
+static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
+{
+	const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id);
+	int cpu, idx = 0, err = 0;
+	struct msr msrs[cpumask_weight(cpumask)];
+	u32 value;
+	u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
+
+	if (!pvt->nbctl_mcgctl_saved)
+		return;
+
+	memset(msrs, 0, sizeof(msrs));
+
+	err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value);
+	if (err)
+		debugf0("Reading K8_NBCTL failed\n");
+	value &= ~mask;
+	value |= pvt->old_nbctl;
+
+	/* restore the NB Enable MCGCTL bit */
+	pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
+
+	rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+
+	for_each_cpu(cpu, cpumask) {
+		msrs[idx].l &= ~K8_MSR_MCGCTL_NBE;
+		msrs[idx].l |=
+			test_bit(idx, &pvt->old_mcgctl) << K8_MSR_MCGCTL_NBE;
+		idx++;
+	}
+
+	wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+}
+
+static void check_mcg_ctl(void *ret)
+{
+	u64 msr_val = 0;
+	u8 nbe;
+
+	rdmsrl(MSR_IA32_MCG_CTL, msr_val);
+	nbe = msr_val & K8_MSR_MCGCTL_NBE;
+
+	debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
+		raw_smp_processor_id(), msr_val,
+		(nbe ? "enabled" : "disabled"));
+
+	if (!nbe)
+		*(int *)ret = 0;
+}
+
+/* check MCG_CTL on all the cpus on this node */
+static int amd64_mcg_ctl_enabled_on_cpus(const cpumask_t *mask)
+{
+	int ret = 1;
+	preempt_disable();
+	smp_call_function_many(mask, check_mcg_ctl, &ret, 1);
+	preempt_enable();
+
+	return ret;
+}
+
+/*
+ * EDAC requires that the BIOS have ECC enabled before taking over the
+ * processing of ECC errors. This is because the BIOS can properly initialize
+ * the memory system completely. A command line option allows to force-enable
+ * hardware ECC later in amd64_enable_ecc_error_reporting().
+ */
+static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
+{
+	u32 value;
+	int err = 0, ret = 0;
+	u8 ecc_enabled = 0;
+
+	err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
+	if (err)
+		debugf0("Reading K8_NBCTL failed\n");
+
+	ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE);
+
+	ret = amd64_mcg_ctl_enabled_on_cpus(cpumask_of_node(pvt->mc_node_id));
+
+	debugf0("K8_NBCFG=0x%x,  DRAM ECC is %s\n", value,
+			(value & K8_NBCFG_ECC_ENABLE ? "enabled" : "disabled"));
+
+	if (!ecc_enabled || !ret) {
+		if (!ecc_enabled) {
+			amd64_printk(KERN_WARNING, "This node reports that "
+						   "Memory ECC is currently "
+						   "disabled.\n");
+
+			amd64_printk(KERN_WARNING, "bit 0x%lx in register "
+				"F3x%x of the MISC_CONTROL device (%s) "
+				"should be enabled\n", K8_NBCFG_ECC_ENABLE,
+				K8_NBCFG, pci_name(pvt->misc_f3_ctl));
+		}
+		if (!ret) {
+			amd64_printk(KERN_WARNING, "bit 0x%016lx in MSR 0x%08x "
+					"of node %d should be enabled\n",
+					K8_MSR_MCGCTL_NBE, MSR_IA32_MCG_CTL,
+					pvt->mc_node_id);
+		}
+		if (!ecc_enable_override) {
+			amd64_printk(KERN_WARNING, "WARNING: ECC is NOT "
+				"currently enabled by the BIOS. Module "
+				"will NOT be loaded.\n"
+				"    Either Enable ECC in the BIOS, "
+				"or use the 'ecc_enable_override' "
+				"parameter.\n"
+				"    Might be a BIOS bug, if BIOS says "
+				"ECC is enabled\n"
+				"    Use of the override can cause "
+				"unknown side effects.\n");
+			ret = -ENODEV;
+		}
+	} else {
+		amd64_printk(KERN_INFO,
+			"ECC is enabled by BIOS, Proceeding "
+			"with EDAC module initialization\n");
+
+		/* CLEAR the override, since BIOS controlled it */
+		ecc_enable_override = 0;
+	}
+
+	return ret;
+}
+
+struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +
+					  ARRAY_SIZE(amd64_inj_attrs) +
+					  1];
+
+struct mcidev_sysfs_attribute terminator = { .attr = { .name = NULL } };
+
+static void amd64_set_mc_sysfs_attributes(struct mem_ctl_info *mci)
+{
+	unsigned int i = 0, j = 0;
+
+	for (; i < ARRAY_SIZE(amd64_dbg_attrs); i++)
+		sysfs_attrs[i] = amd64_dbg_attrs[i];
+
+	for (j = 0; j < ARRAY_SIZE(amd64_inj_attrs); j++, i++)
+		sysfs_attrs[i] = amd64_inj_attrs[j];
+
+	sysfs_attrs[i] = terminator;
+
+	mci->mc_driver_sysfs_attributes = sysfs_attrs;
+}
+
+static void amd64_setup_mci_misc_attributes(struct mem_ctl_info *mci)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+
+	mci->mtype_cap		= MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
+	mci->edac_ctl_cap	= EDAC_FLAG_NONE;
+	mci->edac_cap		= EDAC_FLAG_NONE;
+
+	if (pvt->nbcap & K8_NBCAP_SECDED)
+		mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
+
+	if (pvt->nbcap & K8_NBCAP_CHIPKILL)
+		mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
+
+	mci->edac_cap		= amd64_determine_edac_cap(pvt);
+	mci->mod_name		= EDAC_MOD_STR;
+	mci->mod_ver		= EDAC_AMD64_VERSION;
+	mci->ctl_name		= get_amd_family_name(pvt->mc_type_index);
+	mci->dev_name		= pci_name(pvt->dram_f2_ctl);
+	mci->ctl_page_to_phys	= NULL;
+
+	/* IMPORTANT: Set the polling 'check' function in this module */
+	mci->edac_check		= amd64_check;
+
+	/* memory scrubber interface */
+	mci->set_sdram_scrub_rate = amd64_set_scrub_rate;
+	mci->get_sdram_scrub_rate = amd64_get_scrub_rate;
+}
+
+/*
+ * Init stuff for this DRAM Controller device.
+ *
+ * Due to a hardware feature on Fam10h CPUs, the Enable Extended Configuration
+ * Space feature MUST be enabled on ALL Processors prior to actually reading
+ * from the ECS registers. Since the loading of the module can occur on any
+ * 'core', and cores don't 'see' all the other processors ECS data when the
+ * others are NOT enabled. Our solution is to first enable ECS access in this
+ * routine on all processors, gather some data in a amd64_pvt structure and
+ * later come back in a finish-setup function to perform that final
+ * initialization. See also amd64_init_2nd_stage() for that.
+ */
+static int amd64_probe_one_instance(struct pci_dev *dram_f2_ctl,
+				    int mc_type_index)
+{
+	struct amd64_pvt *pvt = NULL;
+	int err = 0, ret;
+
+	ret = -ENOMEM;
+	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
+	if (!pvt)
+		goto err_exit;
+
+	pvt->mc_node_id = get_mc_node_id_from_pdev(dram_f2_ctl);
+
+	pvt->dram_f2_ctl	= dram_f2_ctl;
+	pvt->ext_model		= boot_cpu_data.x86_model >> 4;
+	pvt->mc_type_index	= mc_type_index;
+	pvt->ops		= family_ops(mc_type_index);
+	pvt->old_mcgctl		= 0;
+
+	/*
+	 * We have the dram_f2_ctl device as an argument, now go reserve its
+	 * sibling devices from the PCI system.
+	 */
+	ret = -ENODEV;
+	err = amd64_reserve_mc_sibling_devices(pvt, mc_type_index);
+	if (err)
+		goto err_free;
+
+	ret = -EINVAL;
+	err = amd64_check_ecc_enabled(pvt);
+	if (err)
+		goto err_put;
+
+	/*
+	 * Key operation here: setup of HW prior to performing ops on it. Some
+	 * setup is required to access ECS data. After this is performed, the
+	 * 'teardown' function must be called upon error and normal exit paths.
+	 */
+	if (boot_cpu_data.x86 >= 0x10)
+		amd64_setup(pvt);
+
+	/*
+	 * Save the pointer to the private data for use in 2nd initialization
+	 * stage
+	 */
+	pvt_lookup[pvt->mc_node_id] = pvt;
+
+	return 0;
+
+err_put:
+	amd64_free_mc_sibling_devices(pvt);
+
+err_free:
+	kfree(pvt);
+
+err_exit:
+	return ret;
+}
+
+/*
+ * This is the finishing stage of the init code. Needs to be performed after all
+ * MCs' hardware have been prepped for accessing extended config space.
+ */
+static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
+{
+	int node_id = pvt->mc_node_id;
+	struct mem_ctl_info *mci;
+	int ret, err = 0;
+
+	amd64_read_mc_registers(pvt);
+
+	ret = -ENODEV;
+	if (pvt->ops->probe_valid_hardware) {
+		err = pvt->ops->probe_valid_hardware(pvt);
+		if (err)
+			goto err_exit;
+	}
+
+	/*
+	 * We need to determine how many memory channels there are. Then use
+	 * that information for calculating the size of the dynamic instance
+	 * tables in the 'mci' structure
+	 */
+	pvt->channel_count = pvt->ops->early_channel_count(pvt);
+	if (pvt->channel_count < 0)
+		goto err_exit;
+
+	ret = -ENOMEM;
+	mci = edac_mc_alloc(0, CHIPSELECT_COUNT, pvt->channel_count, node_id);
+	if (!mci)
+		goto err_exit;
+
+	mci->pvt_info = pvt;
+
+	mci->dev = &pvt->dram_f2_ctl->dev;
+	amd64_setup_mci_misc_attributes(mci);
+
+	if (amd64_init_csrows(mci))
+		mci->edac_cap = EDAC_FLAG_NONE;
+
+	amd64_enable_ecc_error_reporting(mci);
+	amd64_set_mc_sysfs_attributes(mci);
+
+	ret = -ENODEV;
+	if (edac_mc_add_mc(mci)) {
+		debugf1("failed edac_mc_add_mc()\n");
+		goto err_add_mc;
+	}
+
+	mci_lookup[node_id] = mci;
+	pvt_lookup[node_id] = NULL;
+	return 0;
+
+err_add_mc:
+	edac_mc_free(mci);
+
+err_exit:
+	debugf0("failure to init 2nd stage: ret=%d\n", ret);
+
+	amd64_restore_ecc_error_reporting(pvt);
+
+	if (boot_cpu_data.x86 > 0xf)
+		amd64_teardown(pvt);
+
+	amd64_free_mc_sibling_devices(pvt);
+
+	kfree(pvt_lookup[pvt->mc_node_id]);
+	pvt_lookup[node_id] = NULL;
+
+	return ret;
+}
+
+
+static int __devinit amd64_init_one_instance(struct pci_dev *pdev,
+				 const struct pci_device_id *mc_type)
+{
+	int ret = 0;
+
+	debugf0("(MC node=%d,mc_type='%s')\n",
+		get_mc_node_id_from_pdev(pdev),
+		get_amd_family_name(mc_type->driver_data));
+
+	ret = pci_enable_device(pdev);
+	if (ret < 0)
+		ret = -EIO;
+	else
+		ret = amd64_probe_one_instance(pdev, mc_type->driver_data);
+
+	if (ret < 0)
+		debugf0("ret=%d\n", ret);
+
+	return ret;
+}
+
+static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
+{
+	struct mem_ctl_info *mci;
+	struct amd64_pvt *pvt;
+
+	/* Remove from EDAC CORE tracking list */
+	mci = edac_mc_del_mc(&pdev->dev);
+	if (!mci)
+		return;
+
+	pvt = mci->pvt_info;
+
+	amd64_restore_ecc_error_reporting(pvt);
+
+	if (boot_cpu_data.x86 > 0xf)
+		amd64_teardown(pvt);
+
+	amd64_free_mc_sibling_devices(pvt);
+
+	kfree(pvt);
+	mci->pvt_info = NULL;
+
+	mci_lookup[pvt->mc_node_id] = NULL;
+
+	/* Free the EDAC CORE resources */
+	edac_mc_free(mci);
+}
+
+/*
+ * This table is part of the interface for loading drivers for PCI devices. The
+ * PCI core identifies what devices are on a system during boot, and then
+ * inquiry this table to see if this driver is for a given device found.
+ */
+static const struct pci_device_id amd64_pci_table[] __devinitdata = {
+	{
+		.vendor		= PCI_VENDOR_ID_AMD,
+		.device		= PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.class		= 0,
+		.class_mask	= 0,
+		.driver_data	= K8_CPUS
+	},
+	{
+		.vendor		= PCI_VENDOR_ID_AMD,
+		.device		= PCI_DEVICE_ID_AMD_10H_NB_DRAM,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.class		= 0,
+		.class_mask	= 0,
+		.driver_data	= F10_CPUS
+	},
+	{
+		.vendor		= PCI_VENDOR_ID_AMD,
+		.device		= PCI_DEVICE_ID_AMD_11H_NB_DRAM,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.class		= 0,
+		.class_mask	= 0,
+		.driver_data	= F11_CPUS
+	},
+	{0, }
+};
+MODULE_DEVICE_TABLE(pci, amd64_pci_table);
+
+static struct pci_driver amd64_pci_driver = {
+	.name		= EDAC_MOD_STR,
+	.probe		= amd64_init_one_instance,
+	.remove		= __devexit_p(amd64_remove_one_instance),
+	.id_table	= amd64_pci_table,
+};
+
+static void amd64_setup_pci_device(void)
+{
+	struct mem_ctl_info *mci;
+	struct amd64_pvt *pvt;
+
+	if (amd64_ctl_pci)
+		return;
+
+	mci = mci_lookup[0];
+	if (mci) {
+
+		pvt = mci->pvt_info;
+		amd64_ctl_pci =
+			edac_pci_create_generic_ctl(&pvt->dram_f2_ctl->dev,
+						    EDAC_MOD_STR);
+
+		if (!amd64_ctl_pci) {
+			pr_warning("%s(): Unable to create PCI control\n",
+				   __func__);
+
+			pr_warning("%s(): PCI error report via EDAC not set\n",
+				   __func__);
+			}
+	}
+}
+
+static int __init amd64_edac_init(void)
+{
+	int nb, err = -ENODEV;
+
+	edac_printk(KERN_INFO, EDAC_MOD_STR, EDAC_AMD64_VERSION "\n");
+
+	opstate_init();
+
+	if (cache_k8_northbridges() < 0)
+		goto err_exit;
+
+	err = pci_register_driver(&amd64_pci_driver);
+	if (err)
+		return err;
+
+	/*
+	 * At this point, the array 'pvt_lookup[]' contains pointers to alloc'd
+	 * amd64_pvt structs. These will be used in the 2nd stage init function
+	 * to finish initialization of the MC instances.
+	 */
+	for (nb = 0; nb < num_k8_northbridges; nb++) {
+		if (!pvt_lookup[nb])
+			continue;
+
+		err = amd64_init_2nd_stage(pvt_lookup[nb]);
+		if (err)
+			goto err_exit;
+	}
+
+	amd64_setup_pci_device();
+
+	return 0;
+
+err_exit:
+	debugf0("'finish_setup' stage failed\n");
+	pci_unregister_driver(&amd64_pci_driver);
+
+	return err;
+}
+
+static void __exit amd64_edac_exit(void)
+{
+	if (amd64_ctl_pci)
+		edac_pci_release_generic_ctl(amd64_ctl_pci);
+
+	pci_unregister_driver(&amd64_pci_driver);
+}
+
+module_init(amd64_edac_init);
+module_exit(amd64_edac_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
+		"Dave Peterson, Thayne Harbaugh");
+MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
+		EDAC_AMD64_VERSION);
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
new file mode 100644
index 0000000..a159957
--- /dev/null
+++ b/drivers/edac/amd64_edac.h
@@ -0,0 +1,644 @@
+/*
+ * AMD64 class Memory Controller kernel module
+ *
+ * Copyright (c) 2009 SoftwareBitMaker.
+ * Copyright (c) 2009 Advanced Micro Devices, Inc.
+ *
+ * This file may be distributed under the terms of the
+ * GNU General Public License.
+ *
+ *	Originally Written by Thayne Harbaugh
+ *
+ *      Changes by Douglas "norsk" Thompson  <dougthompson@xmission.com>:
+ *		- K8 CPU Revision D and greater support
+ *
+ *      Changes by Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>:
+ *		- Module largely rewritten, with new (and hopefully correct)
+ *		code for dealing with node and chip select interleaving,
+ *		various code cleanup, and bug fixes
+ *		- Added support for memory hoisting using DRAM hole address
+ *		register
+ *
+ *	Changes by Douglas "norsk" Thompson <dougthompson@xmission.com>:
+ *		-K8 Rev (1207) revision support added, required Revision
+ *		specific mini-driver code to support Rev F as well as
+ *		prior revisions
+ *
+ *	Changes by Douglas "norsk" Thompson <dougthompson@xmission.com>:
+ *		-Family 10h revision support added. New PCI Device IDs,
+ *		indicating new changes. Actual registers modified
+ *		were slight, less than the Rev E to Rev F transition
+ *		but changing the PCI Device ID was the proper thing to
+ *		do, as it provides for almost automactic family
+ *		detection. The mods to Rev F required more family
+ *		information detection.
+ *
+ *	Changes/Fixes by Borislav Petkov <borislav.petkov@amd.com>:
+ *		- misc fixes and code cleanups
+ *
+ * This module is based on the following documents
+ * (available from http://www.amd.com/):
+ *
+ *	Title:	BIOS and Kernel Developer's Guide for AMD Athlon 64 and AMD
+ *		Opteron Processors
+ *	AMD publication #: 26094
+ *`	Revision: 3.26
+ *
+ *	Title:	BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh
+ *		Processors
+ *	AMD publication #: 32559
+ *	Revision: 3.00
+ *	Issue Date: May 2006
+ *
+ *	Title:	BIOS and Kernel Developer's Guide (BKDG) For AMD Family 10h
+ *		Processors
+ *	AMD publication #: 31116
+ *	Revision: 3.00
+ *	Issue Date: September 07, 2007
+ *
+ * Sections in the first 2 documents are no longer in sync with each other.
+ * The Family 10h BKDG was totally re-written from scratch with a new
+ * presentation model.
+ * Therefore, comments that refer to a Document section might be off.
+ */
+
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/mmzone.h>
+#include <linux/edac.h>
+#include <asm/msr.h>
+#include "edac_core.h"
+
+#define amd64_printk(level, fmt, arg...) \
+	edac_printk(level, "amd64", fmt, ##arg)
+
+#define amd64_mc_printk(mci, level, fmt, arg...) \
+	edac_mc_chipset_printk(mci, level, "amd64", fmt, ##arg)
+
+/*
+ * Throughout the comments in this code, the following terms are used:
+ *
+ *	SysAddr, DramAddr, and InputAddr
+ *
+ *  These terms come directly from the amd64 documentation
+ * (AMD publication #26094).  They are defined as follows:
+ *
+ *     SysAddr:
+ *         This is a physical address generated by a CPU core or a device
+ *         doing DMA.  If generated by a CPU core, a SysAddr is the result of
+ *         a virtual to physical address translation by the CPU core's address
+ *         translation mechanism (MMU).
+ *
+ *     DramAddr:
+ *         A DramAddr is derived from a SysAddr by subtracting an offset that
+ *         depends on which node the SysAddr maps to and whether the SysAddr
+ *         is within a range affected by memory hoisting.  The DRAM Base
+ *         (section 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers
+ *         determine which node a SysAddr maps to.
+ *
+ *         If the DRAM Hole Address Register (DHAR) is enabled and the SysAddr
+ *         is within the range of addresses specified by this register, then
+ *         a value x from the DHAR is subtracted from the SysAddr to produce a
+ *         DramAddr.  Here, x represents the base address for the node that
+ *         the SysAddr maps to plus an offset due to memory hoisting.  See
+ *         section 3.4.8 and the comments in amd64_get_dram_hole_info() and
+ *         sys_addr_to_dram_addr() below for more information.
+ *
+ *         If the SysAddr is not affected by the DHAR then a value y is
+ *         subtracted from the SysAddr to produce a DramAddr.  Here, y is the
+ *         base address for the node that the SysAddr maps to.  See section
+ *         3.4.4 and the comments in sys_addr_to_dram_addr() below for more
+ *         information.
+ *
+ *     InputAddr:
+ *         A DramAddr is translated to an InputAddr before being passed to the
+ *         memory controller for the node that the DramAddr is associated
+ *         with.  The memory controller then maps the InputAddr to a csrow.
+ *         If node interleaving is not in use, then the InputAddr has the same
+ *         value as the DramAddr.  Otherwise, the InputAddr is produced by
+ *         discarding the bits used for node interleaving from the DramAddr.
+ *         See section 3.4.4 for more information.
+ *
+ *         The memory controller for a given node uses its DRAM CS Base and
+ *         DRAM CS Mask registers to map an InputAddr to a csrow.  See
+ *         sections 3.5.4 and 3.5.5 for more information.
+ */
+
+#define EDAC_AMD64_VERSION		" Ver: 3.2.0 " __DATE__
+#define EDAC_MOD_STR			"amd64_edac"
+
+/* Extended Model from CPUID, for CPU Revision numbers */
+#define OPTERON_CPU_LE_REV_C		0
+#define OPTERON_CPU_REV_D		1
+#define OPTERON_CPU_REV_E		2
+
+/* NPT processors have the following Extended Models */
+#define OPTERON_CPU_REV_F		4
+#define OPTERON_CPU_REV_FA		5
+
+/* Hardware limit on ChipSelect rows per MC and processors per system */
+#define CHIPSELECT_COUNT		8
+#define DRAM_REG_COUNT			8
+
+
+/*
+ * PCI-defined configuration space registers
+ */
+
+
+/*
+ * Function 1 - Address Map
+ */
+#define K8_DRAM_BASE_LOW		0x40
+#define K8_DRAM_LIMIT_LOW		0x44
+#define K8_DHAR				0xf0
+
+#define DHAR_VALID			BIT(0)
+#define F10_DRAM_MEM_HOIST_VALID	BIT(1)
+
+#define DHAR_BASE_MASK			0xff000000
+#define dhar_base(dhar)			(dhar & DHAR_BASE_MASK)
+
+#define K8_DHAR_OFFSET_MASK		0x0000ff00
+#define k8_dhar_offset(dhar)		((dhar & K8_DHAR_OFFSET_MASK) << 16)
+
+#define F10_DHAR_OFFSET_MASK		0x0000ff80
+					/* NOTE: Extra mask bit vs K8 */
+#define f10_dhar_offset(dhar)		((dhar & F10_DHAR_OFFSET_MASK) << 16)
+
+
+/* F10 High BASE/LIMIT registers */
+#define F10_DRAM_BASE_HIGH		0x140
+#define F10_DRAM_LIMIT_HIGH		0x144
+
+
+/*
+ * Function 2 - DRAM controller
+ */
+#define K8_DCSB0			0x40
+#define F10_DCSB1			0x140
+
+#define K8_DCSB_CS_ENABLE		BIT(0)
+#define K8_DCSB_NPT_SPARE		BIT(1)
+#define K8_DCSB_NPT_TESTFAIL		BIT(2)
+
+/*
+ * REV E: select [31:21] and [15:9] from DCSB and the shift amount to form
+ * the address
+ */
+#define REV_E_DCSB_BASE_BITS		(0xFFE0FE00ULL)
+#define REV_E_DCS_SHIFT			4
+#define REV_E_DCSM_COUNT		8
+
+#define REV_F_F1Xh_DCSB_BASE_BITS	(0x1FF83FE0ULL)
+#define REV_F_F1Xh_DCS_SHIFT		8
+
+/*
+ * REV F and later: selects [28:19] and [13:5] from DCSB and the shift amount
+ * to form the address
+ */
+#define REV_F_DCSB_BASE_BITS		(0x1FF83FE0ULL)
+#define REV_F_DCS_SHIFT			8
+#define REV_F_DCSM_COUNT		4
+#define F10_DCSM_COUNT			4
+#define F11_DCSM_COUNT			2
+
+/* DRAM CS Mask Registers */
+#define K8_DCSM0			0x60
+#define F10_DCSM1			0x160
+
+/* REV E: select [29:21] and [15:9] from DCSM */
+#define REV_E_DCSM_MASK_BITS		0x3FE0FE00
+
+/* unused bits [24:20] and [12:0] */
+#define REV_E_DCS_NOTUSED_BITS		0x01F01FFF
+
+/* REV F and later: select [28:19] and [13:5] from DCSM */
+#define REV_F_F1Xh_DCSM_MASK_BITS	0x1FF83FE0
+
+/* unused bits [26:22] and [12:0] */
+#define REV_F_F1Xh_DCS_NOTUSED_BITS	0x07C01FFF
+
+#define DBAM0				0x80
+#define DBAM1				0x180
+
+/* Extract the DIMM 'type' on the i'th DIMM from the DBAM reg value passed */
+#define DBAM_DIMM(i, reg)		((((reg) >> (4*i))) & 0xF)
+
+#define DBAM_MAX_VALUE			11
+
+
+#define F10_DCLR_0			0x90
+#define F10_DCLR_1			0x190
+#define REVE_WIDTH_128			BIT(16)
+#define F10_WIDTH_128			BIT(11)
+
+
+#define F10_DCHR_0			0x94
+#define F10_DCHR_1			0x194
+
+#define F10_DCHR_FOUR_RANK_DIMM		BIT(18)
+#define F10_DCHR_Ddr3Mode		BIT(8)
+#define F10_DCHR_MblMode		BIT(6)
+
+
+#define F10_DCTL_SEL_LOW		0x110
+
+#define dct_sel_baseaddr(pvt)    \
+	((pvt->dram_ctl_select_low) & 0xFFFFF800)
+
+#define dct_sel_interleave_addr(pvt)    \
+	(((pvt->dram_ctl_select_low) >> 6) & 0x3)
+
+enum {
+	F10_DCTL_SEL_LOW_DctSelHiRngEn	= BIT(0),
+	F10_DCTL_SEL_LOW_DctSelIntLvEn	= BIT(2),
+	F10_DCTL_SEL_LOW_DctGangEn	= BIT(4),
+	F10_DCTL_SEL_LOW_DctDatIntLv	= BIT(5),
+	F10_DCTL_SEL_LOW_DramEnable	= BIT(8),
+	F10_DCTL_SEL_LOW_MemCleared	= BIT(10),
+};
+
+#define    dct_high_range_enabled(pvt)    \
+	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctSelHiRngEn)
+
+#define dct_interleave_enabled(pvt)	   \
+	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctSelIntLvEn)
+
+#define dct_ganging_enabled(pvt)        \
+	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctGangEn)
+
+#define dct_data_intlv_enabled(pvt)    \
+	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctDatIntLv)
+
+#define dct_dram_enabled(pvt)    \
+	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DramEnable)
+
+#define dct_memory_cleared(pvt)    \
+	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_MemCleared)
+
+
+#define F10_DCTL_SEL_HIGH		0x114
+
+
+/*
+ * Function 3 - Misc Control
+ */
+#define K8_NBCTL			0x40
+
+/* Correctable ECC error reporting enable */
+#define K8_NBCTL_CECCEn			BIT(0)
+
+/* UnCorrectable ECC error reporting enable */
+#define K8_NBCTL_UECCEn			BIT(1)
+
+#define K8_NBCFG			0x44
+#define K8_NBCFG_CHIPKILL		BIT(23)
+#define K8_NBCFG_ECC_ENABLE		BIT(22)
+
+#define K8_NBSL				0x48
+
+
+#define EXTRACT_HIGH_SYNDROME(x)	(((x) >> 24) & 0xff)
+#define EXTRACT_EXT_ERROR_CODE(x)	(((x) >> 16) & 0x1f)
+
+/* Family F10h: Normalized Extended Error Codes */
+#define F10_NBSL_EXT_ERR_RES		0x0
+#define F10_NBSL_EXT_ERR_CRC		0x1
+#define F10_NBSL_EXT_ERR_SYNC		0x2
+#define F10_NBSL_EXT_ERR_MST		0x3
+#define F10_NBSL_EXT_ERR_TGT		0x4
+#define F10_NBSL_EXT_ERR_GART		0x5
+#define F10_NBSL_EXT_ERR_RMW		0x6
+#define F10_NBSL_EXT_ERR_WDT		0x7
+#define F10_NBSL_EXT_ERR_ECC		0x8
+#define F10_NBSL_EXT_ERR_DEV		0x9
+#define F10_NBSL_EXT_ERR_LINK_DATA	0xA
+
+/* Next two are overloaded values */
+#define F10_NBSL_EXT_ERR_LINK_PROTO	0xB
+#define F10_NBSL_EXT_ERR_L3_PROTO	0xB
+
+#define F10_NBSL_EXT_ERR_NB_ARRAY	0xC
+#define F10_NBSL_EXT_ERR_DRAM_PARITY	0xD
+#define F10_NBSL_EXT_ERR_LINK_RETRY	0xE
+
+/* Next two are overloaded values */
+#define F10_NBSL_EXT_ERR_GART_WALK	0xF
+#define F10_NBSL_EXT_ERR_DEV_WALK	0xF
+
+/* 0x10 to 0x1B: Reserved */
+#define F10_NBSL_EXT_ERR_L3_DATA	0x1C
+#define F10_NBSL_EXT_ERR_L3_TAG		0x1D
+#define F10_NBSL_EXT_ERR_L3_LRU		0x1E
+
+/* K8: Normalized Extended Error Codes */
+#define K8_NBSL_EXT_ERR_ECC		0x0
+#define K8_NBSL_EXT_ERR_CRC		0x1
+#define K8_NBSL_EXT_ERR_SYNC		0x2
+#define K8_NBSL_EXT_ERR_MST		0x3
+#define K8_NBSL_EXT_ERR_TGT		0x4
+#define K8_NBSL_EXT_ERR_GART		0x5
+#define K8_NBSL_EXT_ERR_RMW		0x6
+#define K8_NBSL_EXT_ERR_WDT		0x7
+#define K8_NBSL_EXT_ERR_CHIPKILL_ECC	0x8
+#define K8_NBSL_EXT_ERR_DRAM_PARITY	0xD
+
+#define EXTRACT_ERROR_CODE(x)		((x) & 0xffff)
+#define	TEST_TLB_ERROR(x)		(((x) & 0xFFF0) == 0x0010)
+#define	TEST_MEM_ERROR(x)		(((x) & 0xFF00) == 0x0100)
+#define	TEST_BUS_ERROR(x)		(((x) & 0xF800) == 0x0800)
+#define	EXTRACT_TT_CODE(x)		(((x) >> 2) & 0x3)
+#define	EXTRACT_II_CODE(x)		(((x) >> 2) & 0x3)
+#define	EXTRACT_LL_CODE(x)		(((x) >> 0) & 0x3)
+#define	EXTRACT_RRRR_CODE(x)		(((x) >> 4) & 0xf)
+#define	EXTRACT_TO_CODE(x)		(((x) >> 8) & 0x1)
+#define	EXTRACT_PP_CODE(x)		(((x) >> 9) & 0x3)
+
+/*
+ * The following are for BUS type errors AFTER values have been normalized by
+ * shifting right
+ */
+#define K8_NBSL_PP_SRC			0x0
+#define K8_NBSL_PP_RES			0x1
+#define K8_NBSL_PP_OBS			0x2
+#define K8_NBSL_PP_GENERIC		0x3
+
+
+#define K8_NBSH				0x4C
+
+#define K8_NBSH_VALID_BIT		BIT(31)
+#define K8_NBSH_OVERFLOW		BIT(30)
+#define K8_NBSH_UNCORRECTED_ERR		BIT(29)
+#define K8_NBSH_ERR_ENABLE		BIT(28)
+#define K8_NBSH_MISC_ERR_VALID		BIT(27)
+#define K8_NBSH_VALID_ERROR_ADDR	BIT(26)
+#define K8_NBSH_PCC			BIT(25)
+#define K8_NBSH_CECC			BIT(14)
+#define K8_NBSH_UECC			BIT(13)
+#define K8_NBSH_ERR_SCRUBER		BIT(8)
+#define K8_NBSH_CORE3			BIT(3)
+#define K8_NBSH_CORE2			BIT(2)
+#define K8_NBSH_CORE1			BIT(1)
+#define K8_NBSH_CORE0			BIT(0)
+
+#define EXTRACT_LDT_LINK(x)		(((x) >> 4) & 0x7)
+#define EXTRACT_ERR_CPU_MAP(x)		((x) & 0xF)
+#define EXTRACT_LOW_SYNDROME(x)		(((x) >> 15) & 0xff)
+
+
+#define K8_NBEAL			0x50
+#define K8_NBEAH			0x54
+#define K8_SCRCTRL			0x58
+
+#define F10_NB_CFG_LOW			0x88
+#define	F10_NB_CFG_LOW_ENABLE_EXT_CFG	BIT(14)
+
+#define F10_NB_CFG_HIGH			0x8C
+
+#define F10_ONLINE_SPARE		0xB0
+#define F10_ONLINE_SPARE_SWAPDONE0(x)	((x) & BIT(1))
+#define F10_ONLINE_SPARE_SWAPDONE1(x)	((x) & BIT(3))
+#define F10_ONLINE_SPARE_BADDRAM_CS0(x) (((x) >> 4) & 0x00000007)
+#define F10_ONLINE_SPARE_BADDRAM_CS1(x) (((x) >> 8) & 0x00000007)
+
+#define F10_NB_ARRAY_ADDR		0xB8
+
+#define F10_NB_ARRAY_DRAM_ECC		0x80000000
+
+/* Bits [2:1] are used to select 16-byte section within a 64-byte cacheline  */
+#define SET_NB_ARRAY_ADDRESS(section)	(((section) & 0x3) << 1)
+
+#define F10_NB_ARRAY_DATA		0xBC
+
+#define SET_NB_DRAM_INJECTION_WRITE(word, bits)  \
+					(BIT(((word) & 0xF) + 20) | \
+					BIT(17) |  \
+					((bits) & 0xF))
+
+#define SET_NB_DRAM_INJECTION_READ(word, bits)  \
+					(BIT(((word) & 0xF) + 20) | \
+					BIT(16) |  \
+					((bits) & 0xF))
+
+#define K8_NBCAP			0xE8
+#define K8_NBCAP_CORES			(BIT(12)|BIT(13))
+#define K8_NBCAP_CHIPKILL		BIT(4)
+#define K8_NBCAP_SECDED			BIT(3)
+#define K8_NBCAP_8_NODE			BIT(2)
+#define K8_NBCAP_DUAL_NODE		BIT(1)
+#define K8_NBCAP_DCT_DUAL		BIT(0)
+
+/*
+ * MSR Regs
+ */
+#define K8_MSR_MCGCTL			0x017b
+#define K8_MSR_MCGCTL_NBE		BIT(4)
+
+#define K8_MSR_MC4CTL			0x0410
+#define K8_MSR_MC4STAT			0x0411
+#define K8_MSR_MC4ADDR			0x0412
+
+/* AMD sets the first MC device at device ID 0x18. */
+static inline int get_mc_node_id_from_pdev(struct pci_dev *pdev)
+{
+	return PCI_SLOT(pdev->devfn) - 0x18;
+}
+
+enum amd64_chipset_families {
+	K8_CPUS = 0,
+	F10_CPUS,
+	F11_CPUS,
+};
+
+/*
+ * Structure to hold:
+ *
+ * 1) dynamically read status and error address HW registers
+ * 2) sysfs entered values
+ * 3) MCE values
+ *
+ * Depends on entry into the modules
+ */
+struct amd64_error_info_regs {
+	u32 nbcfg;
+	u32 nbsh;
+	u32 nbsl;
+	u32 nbeah;
+	u32 nbeal;
+};
+
+/* Error injection control structure */
+struct error_injection {
+	u32	section;
+	u32	word;
+	u32	bit_map;
+};
+
+struct amd64_pvt {
+	/* pci_device handles which we utilize */
+	struct pci_dev *addr_f1_ctl;
+	struct pci_dev *dram_f2_ctl;
+	struct pci_dev *misc_f3_ctl;
+
+	int mc_node_id;		/* MC index of this MC node */
+	int ext_model;		/* extended model value of this node */
+
+	struct low_ops *ops;	/* pointer to per PCI Device ID func table */
+
+	int channel_count;
+
+	/* Raw registers */
+	u32 dclr0;		/* DRAM Configuration Low DCT0 reg */
+	u32 dclr1;		/* DRAM Configuration Low DCT1 reg */
+	u32 dchr0;		/* DRAM Configuration High DCT0 reg */
+	u32 dchr1;		/* DRAM Configuration High DCT1 reg */
+	u32 nbcap;		/* North Bridge Capabilities */
+	u32 nbcfg;		/* F10 North Bridge Configuration */
+	u32 ext_nbcfg;		/* Extended F10 North Bridge Configuration */
+	u32 dhar;		/* DRAM Hoist reg */
+	u32 dbam0;		/* DRAM Base Address Mapping reg for DCT0 */
+	u32 dbam1;		/* DRAM Base Address Mapping reg for DCT1 */
+
+	/* DRAM CS Base Address Registers F2x[1,0][5C:40] */
+	u32 dcsb0[CHIPSELECT_COUNT];
+	u32 dcsb1[CHIPSELECT_COUNT];
+
+	/* DRAM CS Mask Registers F2x[1,0][6C:60] */
+	u32 dcsm0[CHIPSELECT_COUNT];
+	u32 dcsm1[CHIPSELECT_COUNT];
+
+	/*
+	 * Decoded parts of DRAM BASE and LIMIT Registers
+	 * F1x[78,70,68,60,58,50,48,40]
+	 */
+	u64 dram_base[DRAM_REG_COUNT];
+	u64 dram_limit[DRAM_REG_COUNT];
+	u8  dram_IntlvSel[DRAM_REG_COUNT];
+	u8  dram_IntlvEn[DRAM_REG_COUNT];
+	u8  dram_DstNode[DRAM_REG_COUNT];
+	u8  dram_rw_en[DRAM_REG_COUNT];
+
+	/*
+	 * The following fields are set at (load) run time, after CPU revision
+	 * has been determined, since the dct_base and dct_mask registers vary
+	 * based on revision
+	 */
+	u32 dcsb_base;		/* DCSB base bits */
+	u32 dcsm_mask;		/* DCSM mask bits */
+	u32 num_dcsm;		/* Number of DCSM registers */
+	u32 dcs_mask_notused;	/* DCSM notused mask bits */
+	u32 dcs_shift;		/* DCSB and DCSM shift value */
+
+	u64 top_mem;		/* top of memory below 4GB */
+	u64 top_mem2;		/* top of memory above 4GB */
+
+	u32 dram_ctl_select_low;	/* DRAM Controller Select Low Reg */
+	u32 dram_ctl_select_high;	/* DRAM Controller Select High Reg */
+	u32 online_spare;               /* On-Line spare Reg */
+
+	/* temp storage for when input is received from sysfs */
+	struct amd64_error_info_regs ctl_error_info;
+
+	/* place to store error injection parameters prior to issue */
+	struct error_injection injection;
+
+	/* Save old hw registers' values before we modified them */
+	u32 nbctl_mcgctl_saved;		/* When true, following 2 are valid */
+	u32 old_nbctl;
+	unsigned long old_mcgctl;	/* per core on this node */
+
+	/* MC Type Index value: socket F vs Family 10h */
+	u32 mc_type_index;
+
+	/* misc settings */
+	struct flags {
+		unsigned long cf8_extcfg:1;
+	} flags;
+};
+
+struct scrubrate {
+       u32 scrubval;           /* bit pattern for scrub rate */
+       u32 bandwidth;          /* bandwidth consumed (bytes/sec) */
+};
+
+extern struct scrubrate scrubrates[23];
+extern u32 revf_quad_ddr2_shift[16];
+extern const char *tt_msgs[4];
+extern const char *ll_msgs[4];
+extern const char *rrrr_msgs[16];
+extern const char *to_msgs[2];
+extern const char *pp_msgs[4];
+extern const char *ii_msgs[4];
+extern const char *ext_msgs[32];
+extern const char *htlink_msgs[8];
+
+#ifdef CONFIG_EDAC_DEBUG
+#define NUM_DBG_ATTRS 9
+#else
+#define NUM_DBG_ATTRS 0
+#endif
+
+#ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
+#define NUM_INJ_ATTRS 5
+#else
+#define NUM_INJ_ATTRS 0
+#endif
+
+extern struct mcidev_sysfs_attribute amd64_dbg_attrs[NUM_DBG_ATTRS],
+				     amd64_inj_attrs[NUM_INJ_ATTRS];
+
+/*
+ * Each of the PCI Device IDs types have their own set of hardware accessor
+ * functions and per device encoding/decoding logic.
+ */
+struct low_ops {
+	int (*probe_valid_hardware)(struct amd64_pvt *pvt);
+	int (*early_channel_count)(struct amd64_pvt *pvt);
+
+	u64 (*get_error_address)(struct mem_ctl_info *mci,
+			struct amd64_error_info_regs *info);
+	void (*read_dram_base_limit)(struct amd64_pvt *pvt, int dram);
+	void (*read_dram_ctl_register)(struct amd64_pvt *pvt);
+	void (*map_sysaddr_to_csrow)(struct mem_ctl_info *mci,
+					struct amd64_error_info_regs *info,
+					u64 SystemAddr);
+	int (*dbam_map_to_pages)(struct amd64_pvt *pvt, int dram_map);
+};
+
+struct amd64_family_type {
+	const char *ctl_name;
+	u16 addr_f1_ctl;
+	u16 misc_f3_ctl;
+	struct low_ops ops;
+};
+
+static struct amd64_family_type amd64_family_types[];
+
+static inline const char *get_amd_family_name(int index)
+{
+	return amd64_family_types[index].ctl_name;
+}
+
+static inline struct low_ops *family_ops(int index)
+{
+	return &amd64_family_types[index].ops;
+}
+
+/*
+ * For future CPU versions, verify the following as new 'slow' rates appear and
+ * modify the necessary skip values for the supported CPU.
+ */
+#define K8_MIN_SCRUB_RATE_BITS	0x0
+#define F10_MIN_SCRUB_RATE_BITS	0x5
+#define F11_MIN_SCRUB_RATE_BITS	0x6
+
+int amd64_process_error_info(struct mem_ctl_info *mci,
+			     struct amd64_error_info_regs *info,
+			     int handle_errors);
+int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
+			     u64 *hole_offset, u64 *hole_size);
diff --git a/drivers/edac/amd64_edac_dbg.c b/drivers/edac/amd64_edac_dbg.c
new file mode 100644
index 0000000..0a41b24
--- /dev/null
+++ b/drivers/edac/amd64_edac_dbg.c
@@ -0,0 +1,255 @@
+#include "amd64_edac.h"
+
+/*
+ * accept a hex value and store it into the virtual error register file, field:
+ * nbeal and nbeah. Assume virtual error values have already been set for: NBSL,
+ * NBSH and NBCFG. Then proceed to map the error values to a MC, CSROW and
+ * CHANNEL
+ */
+static ssize_t amd64_nbea_store(struct mem_ctl_info *mci, const char *data,
+				size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long long value;
+	int ret = 0;
+
+	ret = strict_strtoull(data, 16, &value);
+	if (ret != -EINVAL) {
+		debugf0("received NBEA= 0x%llx\n", value);
+
+		/* place the value into the virtual error packet */
+		pvt->ctl_error_info.nbeal = (u32) value;
+		value >>= 32;
+		pvt->ctl_error_info.nbeah = (u32) value;
+
+		/* Process the Mapping request */
+		/* TODO: Add race prevention */
+		amd64_process_error_info(mci, &pvt->ctl_error_info, 1);
+
+		return count;
+	}
+	return ret;
+}
+
+/* display back what the last NBEA (MCA NB Address (MC4_ADDR)) was written */
+static ssize_t amd64_nbea_show(struct mem_ctl_info *mci, char *data)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u64 value;
+
+	value = pvt->ctl_error_info.nbeah;
+	value <<= 32;
+	value |= pvt->ctl_error_info.nbeal;
+
+	return sprintf(data, "%llx\n", value);
+}
+
+/* store the NBSL (MCA NB Status Low (MC4_STATUS)) value user desires */
+static ssize_t amd64_nbsl_store(struct mem_ctl_info *mci, const char *data,
+				size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long value;
+	int ret = 0;
+
+	ret = strict_strtoul(data, 16, &value);
+	if (ret != -EINVAL) {
+		debugf0("received NBSL= 0x%lx\n", value);
+
+		pvt->ctl_error_info.nbsl = (u32) value;
+
+		return count;
+	}
+	return ret;
+}
+
+/* display back what the last NBSL value written */
+static ssize_t amd64_nbsl_show(struct mem_ctl_info *mci, char *data)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u32 value;
+
+	value = pvt->ctl_error_info.nbsl;
+
+	return sprintf(data, "%x\n", value);
+}
+
+/* store the NBSH (MCA NB Status High) value user desires */
+static ssize_t amd64_nbsh_store(struct mem_ctl_info *mci, const char *data,
+				size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long value;
+	int ret = 0;
+
+	ret = strict_strtoul(data, 16, &value);
+	if (ret != -EINVAL) {
+		debugf0("received NBSH= 0x%lx\n", value);
+
+		pvt->ctl_error_info.nbsh = (u32) value;
+
+		return count;
+	}
+	return ret;
+}
+
+/* display back what the last NBSH value written */
+static ssize_t amd64_nbsh_show(struct mem_ctl_info *mci, char *data)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u32 value;
+
+	value = pvt->ctl_error_info.nbsh;
+
+	return sprintf(data, "%x\n", value);
+}
+
+/* accept and store the NBCFG (MCA NB Configuration) value user desires */
+static ssize_t amd64_nbcfg_store(struct mem_ctl_info *mci,
+					const char *data, size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long value;
+	int ret = 0;
+
+	ret = strict_strtoul(data, 16, &value);
+	if (ret != -EINVAL) {
+		debugf0("received NBCFG= 0x%lx\n", value);
+
+		pvt->ctl_error_info.nbcfg = (u32) value;
+
+		return count;
+	}
+	return ret;
+}
+
+/* various show routines for the controls of a MCI */
+static ssize_t amd64_nbcfg_show(struct mem_ctl_info *mci, char *data)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+
+	return sprintf(data, "%x\n", pvt->ctl_error_info.nbcfg);
+}
+
+
+static ssize_t amd64_dhar_show(struct mem_ctl_info *mci, char *data)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+
+	return sprintf(data, "%x\n", pvt->dhar);
+}
+
+
+static ssize_t amd64_dbam_show(struct mem_ctl_info *mci, char *data)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+
+	return sprintf(data, "%x\n", pvt->dbam0);
+}
+
+
+static ssize_t amd64_topmem_show(struct mem_ctl_info *mci, char *data)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+
+	return sprintf(data, "%llx\n", pvt->top_mem);
+}
+
+
+static ssize_t amd64_topmem2_show(struct mem_ctl_info *mci, char *data)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+
+	return sprintf(data, "%llx\n", pvt->top_mem2);
+}
+
+static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data)
+{
+	u64 hole_base = 0;
+	u64 hole_offset = 0;
+	u64 hole_size = 0;
+
+	amd64_get_dram_hole_info(mci, &hole_base, &hole_offset, &hole_size);
+
+	return sprintf(data, "%llx %llx %llx\n", hole_base, hole_offset,
+						 hole_size);
+}
+
+/*
+ * update NUM_DBG_ATTRS in case you add new members
+ */
+struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
+
+	{
+		.attr = {
+			.name = "nbea_ctl",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = amd64_nbea_show,
+		.store = amd64_nbea_store,
+	},
+	{
+		.attr = {
+			.name = "nbsl_ctl",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = amd64_nbsl_show,
+		.store = amd64_nbsl_store,
+	},
+	{
+		.attr = {
+			.name = "nbsh_ctl",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = amd64_nbsh_show,
+		.store = amd64_nbsh_store,
+	},
+	{
+		.attr = {
+			.name = "nbcfg_ctl",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = amd64_nbcfg_show,
+		.store = amd64_nbcfg_store,
+	},
+	{
+		.attr = {
+			.name = "dhar",
+			.mode = (S_IRUGO)
+		},
+		.show = amd64_dhar_show,
+		.store = NULL,
+	},
+	{
+		.attr = {
+			.name = "dbam",
+			.mode = (S_IRUGO)
+		},
+		.show = amd64_dbam_show,
+		.store = NULL,
+	},
+	{
+		.attr = {
+			.name = "topmem",
+			.mode = (S_IRUGO)
+		},
+		.show = amd64_topmem_show,
+		.store = NULL,
+	},
+	{
+		.attr = {
+			.name = "topmem2",
+			.mode = (S_IRUGO)
+		},
+		.show = amd64_topmem2_show,
+		.store = NULL,
+	},
+	{
+		.attr = {
+			.name = "dram_hole",
+			.mode = (S_IRUGO)
+		},
+		.show = amd64_hole_show,
+		.store = NULL,
+	},
+};
diff --git a/drivers/edac/amd64_edac_err_types.c b/drivers/edac/amd64_edac_err_types.c
new file mode 100644
index 0000000..f212ff1
--- /dev/null
+++ b/drivers/edac/amd64_edac_err_types.c
@@ -0,0 +1,161 @@
+#include "amd64_edac.h"
+
+/*
+ * See F2x80 for K8 and F2x[1,0]80 for Fam10 and later. The table below is only
+ * for DDR2 DRAM mapping.
+ */
+u32 revf_quad_ddr2_shift[] = {
+	0,	/* 0000b NULL DIMM (128mb) */
+	28,	/* 0001b 256mb */
+	29,	/* 0010b 512mb */
+	29,	/* 0011b 512mb */
+	29,	/* 0100b 512mb */
+	30,	/* 0101b 1gb */
+	30,	/* 0110b 1gb */
+	31,	/* 0111b 2gb */
+	31,	/* 1000b 2gb */
+	32,	/* 1001b 4gb */
+	32,	/* 1010b 4gb */
+	33,	/* 1011b 8gb */
+	0,	/* 1100b future */
+	0,	/* 1101b future */
+	0,	/* 1110b future */
+	0	/* 1111b future */
+};
+
+/*
+ * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
+ * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
+ * or higher value'.
+ *
+ *FIXME: Produce a better mapping/linearisation.
+ */
+
+struct scrubrate scrubrates[] = {
+	{ 0x01, 1600000000UL},
+	{ 0x02, 800000000UL},
+	{ 0x03, 400000000UL},
+	{ 0x04, 200000000UL},
+	{ 0x05, 100000000UL},
+	{ 0x06, 50000000UL},
+	{ 0x07, 25000000UL},
+	{ 0x08, 12284069UL},
+	{ 0x09, 6274509UL},
+	{ 0x0A, 3121951UL},
+	{ 0x0B, 1560975UL},
+	{ 0x0C, 781440UL},
+	{ 0x0D, 390720UL},
+	{ 0x0E, 195300UL},
+	{ 0x0F, 97650UL},
+	{ 0x10, 48854UL},
+	{ 0x11, 24427UL},
+	{ 0x12, 12213UL},
+	{ 0x13, 6101UL},
+	{ 0x14, 3051UL},
+	{ 0x15, 1523UL},
+	{ 0x16, 761UL},
+	{ 0x00, 0UL},        /* scrubbing off */
+};
+
+/*
+ * string representation for the different MCA reported error types, see F3x48
+ * or MSR0000_0411.
+ */
+const char *tt_msgs[] = {        /* transaction type */
+	"instruction",
+	"data",
+	"generic",
+	"reserved"
+};
+
+const char *ll_msgs[] = {	/* cache level */
+	"L0",
+	"L1",
+	"L2",
+	"L3/generic"
+};
+
+const char *rrrr_msgs[] = {
+	"generic",
+	"generic read",
+	"generic write",
+	"data read",
+	"data write",
+	"inst fetch",
+	"prefetch",
+	"evict",
+	"snoop",
+	"reserved RRRR= 9",
+	"reserved RRRR= 10",
+	"reserved RRRR= 11",
+	"reserved RRRR= 12",
+	"reserved RRRR= 13",
+	"reserved RRRR= 14",
+	"reserved RRRR= 15"
+};
+
+const char *pp_msgs[] = {	/* participating processor */
+	"local node originated (SRC)",
+	"local node responded to request (RES)",
+	"local node observed as 3rd party (OBS)",
+	"generic"
+};
+
+const char *to_msgs[] = {
+	"no timeout",
+	"timed out"
+};
+
+const char *ii_msgs[] = {	/* memory or i/o */
+	"mem access",
+	"reserved",
+	"i/o access",
+	"generic"
+};
+
+/* Map the 5 bits of Extended Error code to the string table. */
+const char *ext_msgs[] = {	/* extended error */
+	"K8 ECC error/F10 reserved",	/* 0_0000b */
+	"CRC error",			/* 0_0001b */
+	"sync error",			/* 0_0010b */
+	"mst abort",			/* 0_0011b */
+	"tgt abort",			/* 0_0100b */
+	"GART error",			/* 0_0101b */
+	"RMW error",			/* 0_0110b */
+	"Wdog timer error",		/* 0_0111b */
+	"F10-ECC/K8-Chipkill error",	/* 0_1000b */
+	"DEV Error",			/* 0_1001b */
+	"Link Data error",		/* 0_1010b */
+	"Link or L3 Protocol error",	/* 0_1011b */
+	"NB Array error",		/* 0_1100b */
+	"DRAM Parity error",		/* 0_1101b */
+	"Link Retry/GART Table Walk/DEV Table Walk error", /* 0_1110b */
+	"Res 0x0ff error",		/* 0_1111b */
+	"Res 0x100 error",		/* 1_0000b */
+	"Res 0x101 error",		/* 1_0001b */
+	"Res 0x102 error",		/* 1_0010b */
+	"Res 0x103 error",		/* 1_0011b */
+	"Res 0x104 error",		/* 1_0100b */
+	"Res 0x105 error",		/* 1_0101b */
+	"Res 0x106 error",		/* 1_0110b */
+	"Res 0x107 error",		/* 1_0111b */
+	"Res 0x108 error",		/* 1_1000b */
+	"Res 0x109 error",		/* 1_1001b */
+	"Res 0x10A error",		/* 1_1010b */
+	"Res 0x10B error",		/* 1_1011b */
+	"L3 Cache Data error",		/* 1_1100b */
+	"L3 CacheTag error",		/* 1_1101b */
+	"L3 Cache LRU error",		/* 1_1110b */
+	"Res 0x1FF error"		/* 1_1111b */
+};
+
+const char *htlink_msgs[] = {
+	"none",
+	"1",
+	"2",
+	"1 2",
+	"3",
+	"1 3",
+	"2 3",
+	"1 2 3"
+};
diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c
new file mode 100644
index 0000000..d3675b7
--- /dev/null
+++ b/drivers/edac/amd64_edac_inj.c
@@ -0,0 +1,185 @@
+#include "amd64_edac.h"
+
+/*
+ * store error injection section value which refers to one of 4 16-byte sections
+ * within a 64-byte cacheline
+ *
+ * range: 0..3
+ */
+static ssize_t amd64_inject_section_store(struct mem_ctl_info *mci,
+					  const char *data, size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long value;
+	int ret = 0;
+
+	ret = strict_strtoul(data, 10, &value);
+	if (ret != -EINVAL) {
+		pvt->injection.section = (u32) value;
+		return count;
+	}
+	return ret;
+}
+
+/*
+ * store error injection word value which refers to one of 9 16-bit word of the
+ * 16-byte (128-bit + ECC bits) section
+ *
+ * range: 0..8
+ */
+static ssize_t amd64_inject_word_store(struct mem_ctl_info *mci,
+					const char *data, size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long value;
+	int ret = 0;
+
+	ret = strict_strtoul(data, 10, &value);
+	if (ret != -EINVAL) {
+
+		value = (value <= 8) ? value : 0;
+		pvt->injection.word = (u32) value;
+
+		return count;
+	}
+	return ret;
+}
+
+/*
+ * store 16 bit error injection vector which enables injecting errors to the
+ * corresponding bit within the error injection word above. When used during a
+ * DRAM ECC read, it holds the contents of the of the DRAM ECC bits.
+ */
+static ssize_t amd64_inject_ecc_vector_store(struct mem_ctl_info *mci,
+					     const char *data, size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long value;
+	int ret = 0;
+
+	ret = strict_strtoul(data, 16, &value);
+	if (ret != -EINVAL) {
+
+		pvt->injection.bit_map = (u32) value & 0xFFFF;
+
+		return count;
+	}
+	return ret;
+}
+
+/*
+ * Do a DRAM ECC read. Assemble staged values in the pvt area, format into
+ * fields needed by the injection registers and read the NB Array Data Port.
+ */
+static ssize_t amd64_inject_read_store(struct mem_ctl_info *mci,
+					const char *data, size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long value;
+	u32 section, word_bits;
+	int ret = 0;
+
+	ret = strict_strtoul(data, 10, &value);
+	if (ret != -EINVAL) {
+
+		/* Form value to choose 16-byte section of cacheline */
+		section = F10_NB_ARRAY_DRAM_ECC |
+				SET_NB_ARRAY_ADDRESS(pvt->injection.section);
+		pci_write_config_dword(pvt->misc_f3_ctl,
+					F10_NB_ARRAY_ADDR, section);
+
+		word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection.word,
+						pvt->injection.bit_map);
+
+		/* Issue 'word' and 'bit' along with the READ request */
+		pci_write_config_dword(pvt->misc_f3_ctl,
+					F10_NB_ARRAY_DATA, word_bits);
+
+		debugf0("section=0x%x word_bits=0x%x\n", section, word_bits);
+
+		return count;
+	}
+	return ret;
+}
+
+/*
+ * Do a DRAM ECC write. Assemble staged values in the pvt area and format into
+ * fields needed by the injection registers.
+ */
+static ssize_t amd64_inject_write_store(struct mem_ctl_info *mci,
+					const char *data, size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long value;
+	u32 section, word_bits;
+	int ret = 0;
+
+	ret = strict_strtoul(data, 10, &value);
+	if (ret != -EINVAL) {
+
+		/* Form value to choose 16-byte section of cacheline */
+		section = F10_NB_ARRAY_DRAM_ECC |
+				SET_NB_ARRAY_ADDRESS(pvt->injection.section);
+		pci_write_config_dword(pvt->misc_f3_ctl,
+					F10_NB_ARRAY_ADDR, section);
+
+		word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection.word,
+						pvt->injection.bit_map);
+
+		/* Issue 'word' and 'bit' along with the READ request */
+		pci_write_config_dword(pvt->misc_f3_ctl,
+					F10_NB_ARRAY_DATA, word_bits);
+
+		debugf0("section=0x%x word_bits=0x%x\n", section, word_bits);
+
+		return count;
+	}
+	return ret;
+}
+
+/*
+ * update NUM_INJ_ATTRS in case you add new members
+ */
+struct mcidev_sysfs_attribute amd64_inj_attrs[] = {
+
+	{
+		.attr = {
+			.name = "inject_section",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = NULL,
+		.store = amd64_inject_section_store,
+	},
+	{
+		.attr = {
+			.name = "inject_word",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = NULL,
+		.store = amd64_inject_word_store,
+	},
+	{
+		.attr = {
+			.name = "inject_ecc_vector",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = NULL,
+		.store = amd64_inject_ecc_vector_store,
+	},
+	{
+		.attr = {
+			.name = "inject_write",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = NULL,
+		.store = amd64_inject_write_store,
+	},
+	{
+		.attr = {
+			.name = "inject_read",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = NULL,
+		.store = amd64_inject_read_store,
+	},
+};
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 6ad95c8..48d3b14 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -76,10 +76,11 @@
 extern int edac_debug_level;
 
 #ifndef CONFIG_EDAC_DEBUG_VERBOSE
-#define edac_debug_printk(level, fmt, arg...)                            \
-	do {                                                             \
-		if (level <= edac_debug_level)                           \
-			edac_printk(KERN_DEBUG, EDAC_DEBUG, fmt, ##arg); \
+#define edac_debug_printk(level, fmt, arg...)                           \
+	do {                                                            \
+		if (level <= edac_debug_level)                          \
+			edac_printk(KERN_DEBUG, EDAC_DEBUG,		\
+				    "%s: " fmt, __func__, ##arg);	\
 	} while (0)
 #else  /* CONFIG_EDAC_DEBUG_VERBOSE */
 #define edac_debug_printk(level, fmt, arg...)                            \
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 5f1b540..24c84ae 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -596,6 +596,7 @@
 
 	return year;
 }
+EXPORT_SYMBOL(dmi_get_year);
 
 /**
  *	dmi_walk - Walk the DMI table and get called back for every record
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index edb0253..11f3739 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -69,7 +69,7 @@
 
 config GPIO_XILINX
 	bool "Xilinx GPIO support"
-	depends on PPC_OF
+	depends on PPC_OF || MICROBLAZE
 	help
 	  Say yes here to support the Xilinx FPGA GPIO device
 
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 94a7688..8fab789 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -2294,7 +2294,12 @@
 		}
 	}
 
-	if (connector->funcs->set_property)
+	/* Do DPMS ourselves */
+	if (property == connector->dev->mode_config.dpms_property) {
+		if (connector->funcs->dpms)
+			(*connector->funcs->dpms)(connector, (int) out_resp->value);
+		ret = 0;
+	} else if (connector->funcs->set_property)
 		ret = connector->funcs->set_property(connector, property, out_resp->value);
 
 	/* store the property value if succesful */
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 4589044..a6f73f1 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -199,6 +199,29 @@
 }
 
 /**
+ * drm_helper_encoder_in_use - check if a given encoder is in use
+ * @encoder: encoder to check
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Walk @encoders's DRM device's mode_config and see if it's in use.
+ *
+ * RETURNS:
+ * True if @encoder is part of the mode_config, false otherwise.
+ */
+bool drm_helper_encoder_in_use(struct drm_encoder *encoder)
+{
+	struct drm_connector *connector;
+	struct drm_device *dev = encoder->dev;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+		if (connector->encoder == encoder)
+			return true;
+	return false;
+}
+EXPORT_SYMBOL(drm_helper_encoder_in_use);
+
+/**
  * drm_helper_crtc_in_use - check if a given CRTC is in a mode_config
  * @crtc: CRTC to check
  *
@@ -216,7 +239,7 @@
 	struct drm_device *dev = crtc->dev;
 	/* FIXME: Locking around list access? */
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head)
-		if (encoder->crtc == crtc)
+		if (encoder->crtc == crtc && drm_helper_encoder_in_use(encoder))
 			return true;
 	return false;
 }
@@ -240,7 +263,7 @@
 
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		encoder_funcs = encoder->helper_private;
-		if (!encoder->crtc)
+		if (!drm_helper_encoder_in_use(encoder))
 			(*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF);
 	}
 
@@ -935,6 +958,88 @@
 }
 EXPORT_SYMBOL(drm_helper_initial_config);
 
+static int drm_helper_choose_encoder_dpms(struct drm_encoder *encoder)
+{
+	int dpms = DRM_MODE_DPMS_OFF;
+	struct drm_connector *connector;
+	struct drm_device *dev = encoder->dev;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+		if (connector->encoder == encoder)
+			if (connector->dpms < dpms)
+				dpms = connector->dpms;
+	return dpms;
+}
+
+static int drm_helper_choose_crtc_dpms(struct drm_crtc *crtc)
+{
+	int dpms = DRM_MODE_DPMS_OFF;
+	struct drm_connector *connector;
+	struct drm_device *dev = crtc->dev;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+		if (connector->encoder && connector->encoder->crtc == crtc)
+			if (connector->dpms < dpms)
+				dpms = connector->dpms;
+	return dpms;
+}
+
+/**
+ * drm_helper_connector_dpms
+ * @connector affected connector
+ * @mode DPMS mode
+ *
+ * Calls the low-level connector DPMS function, then
+ * calls appropriate encoder and crtc DPMS functions as well
+ */
+void drm_helper_connector_dpms(struct drm_connector *connector, int mode)
+{
+	struct drm_encoder *encoder = connector->encoder;
+	struct drm_crtc *crtc = encoder ? encoder->crtc : NULL;
+	int old_dpms;
+
+	if (mode == connector->dpms)
+		return;
+
+	old_dpms = connector->dpms;
+	connector->dpms = mode;
+
+	/* from off to on, do crtc then encoder */
+	if (mode < old_dpms) {
+		if (crtc) {
+			struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+			if (crtc_funcs->dpms)
+				(*crtc_funcs->dpms) (crtc,
+						     drm_helper_choose_crtc_dpms(crtc));
+		}
+		if (encoder) {
+			struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+			if (encoder_funcs->dpms)
+				(*encoder_funcs->dpms) (encoder,
+							drm_helper_choose_encoder_dpms(encoder));
+		}
+	}
+
+	/* from on to off, do encoder then crtc */
+	if (mode > old_dpms) {
+		if (encoder) {
+			struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+			if (encoder_funcs->dpms)
+				(*encoder_funcs->dpms) (encoder,
+							drm_helper_choose_encoder_dpms(encoder));
+		}
+		if (crtc) {
+			struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+			if (crtc_funcs->dpms)
+				(*crtc_funcs->dpms) (crtc,
+						     drm_helper_choose_crtc_dpms(crtc));
+		}
+	}
+
+	return;
+}
+EXPORT_SYMBOL(drm_helper_connector_dpms);
+
 /**
  * drm_hotplug_stage_two
  * @dev DRM device
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index c4d9b33..801a0d0 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -289,6 +289,11 @@
 	struct drm_display_mode *mode;
 	struct detailed_pixel_timing *pt = &timing->data.pixel_data;
 
+	/* ignore tiny modes */
+	if (((pt->hactive_hi << 8) | pt->hactive_lo) < 64 ||
+	    ((pt->vactive_hi << 8) | pt->hactive_lo) < 64)
+		return NULL;
+
 	if (pt->stereo) {
 		printk(KERN_WARNING "stereo mode not supported\n");
 		return NULL;
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 93e677a..fc8e5ac 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -196,6 +196,7 @@
 {
 	int ret = 0;
 	unsigned long sh_flags = 0;
+	char *irqname;
 
 	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
 		return -EINVAL;
@@ -227,8 +228,13 @@
 	if (drm_core_check_feature(dev, DRIVER_IRQ_SHARED))
 		sh_flags = IRQF_SHARED;
 
+	if (dev->devname)
+		irqname = dev->devname;
+	else
+		irqname = dev->driver->name;
+
 	ret = request_irq(drm_dev_to_irq(dev), dev->driver->irq_handler,
-			  sh_flags, dev->devname, dev);
+			  sh_flags, irqname, dev);
 
 	if (ret < 0) {
 		mutex_lock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 8f93729..9987ab8 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -147,7 +147,7 @@
 	enum drm_connector_status status;
 
 	status = connector->funcs->detect(connector);
-	return snprintf(buf, PAGE_SIZE, "%s",
+	return snprintf(buf, PAGE_SIZE, "%s\n",
 			drm_get_connector_status_name(status));
 }
 
@@ -166,7 +166,7 @@
 	if (ret)
 		return 0;
 
-	return snprintf(buf, PAGE_SIZE, "%s",
+	return snprintf(buf, PAGE_SIZE, "%s\n",
 			drm_get_dpms_name((int)dpms_status));
 }
 
@@ -176,7 +176,7 @@
 {
 	struct drm_connector *connector = to_drm_connector(device);
 
-	return snprintf(buf, PAGE_SIZE, connector->encoder ? "enabled" :
+	return snprintf(buf, PAGE_SIZE, "%s\n", connector->encoder ? "enabled" :
 			"disabled");
 }
 
@@ -317,6 +317,7 @@
 
 static struct bin_attribute edid_attr = {
 	.attr.name = "edid",
+	.attr.mode = 0444,
 	.size = 128,
 	.read = edid_show,
 };
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 054576d..1a60626f 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -996,12 +996,6 @@
 	int fb_bar = IS_I9XX(dev) ? 2 : 0;
 	int ret = 0;
 
-	dev->devname = kstrdup(DRIVER_NAME, GFP_KERNEL);
-	if (!dev->devname) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
 	dev->mode_config.fb_base = drm_get_resource_start(dev, fb_bar) &
 		0xff000000;
 
@@ -1015,7 +1009,7 @@
 
 	ret = i915_probe_agp(dev, &agp_size, &prealloc_size);
 	if (ret)
-		goto kfree_devname;
+		goto out;
 
 	/* Basic memrange allocator for stolen space (aka vram) */
 	drm_mm_init(&dev_priv->vram, 0, prealloc_size);
@@ -1033,7 +1027,7 @@
 
 	ret = i915_gem_init_ringbuffer(dev);
 	if (ret)
-		goto kfree_devname;
+		goto out;
 
 	/* Allow hardware batchbuffers unless told otherwise.
 	 */
@@ -1065,8 +1059,6 @@
 
 destroy_ringbuffer:
 	i915_gem_cleanup_ringbuffer(dev);
-kfree_devname:
-	kfree(dev->devname);
 out:
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index ff9bcca..6de97fc 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -436,11 +436,6 @@
 				  struct drm_property *property,
 				  uint64_t value)
 {
-	struct drm_device *dev = connector->dev;
-
-	if (property == dev->mode_config.dpms_property && connector->encoder)
-		intel_crt_dpms(connector->encoder, (uint32_t)(value & 0xf));
-
 	return 0;
 }
 
@@ -457,6 +452,7 @@
 };
 
 static const struct drm_connector_funcs intel_crt_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
 	.detect = intel_crt_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.destroy = intel_crt_destroy,
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index 8b8d6e6..1ee3007 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -316,6 +316,7 @@
 };
 
 static const struct drm_connector_funcs intel_dvo_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
 	.save = intel_dvo_save,
 	.restore = intel_dvo_restore,
 	.detect = intel_dvo_detect,
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index d874b0c..4ea2a65 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -239,6 +239,7 @@
 };
 
 static const struct drm_connector_funcs intel_hdmi_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
 	.save = intel_hdmi_save,
 	.restore = intel_hdmi_restore,
 	.detect = intel_hdmi_detect,
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index f22e6ef..f073ed8 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -406,11 +406,6 @@
 				   struct drm_property *property,
 				   uint64_t value)
 {
-	struct drm_device *dev = connector->dev;
-
-	if (property == dev->mode_config.dpms_property && connector->encoder)
-		intel_lvds_dpms(connector->encoder, (uint32_t)(value & 0xf));
-
 	return 0;
 }
 
@@ -429,6 +424,7 @@
 };
 
 static const struct drm_connector_funcs intel_lvds_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
 	.save = intel_lvds_save,
 	.restore = intel_lvds_restore,
 	.detect = intel_lvds_detect,
@@ -455,7 +451,7 @@
 }
 
 /* These systems claim to have LVDS, but really don't */
-static const struct dmi_system_id __initdata intel_no_lvds[] = {
+static const struct dmi_system_id intel_no_lvds[] = {
 	{
 		.callback = intel_no_lvds_dmi_callback,
 		.ident = "Apple Mac Mini (Core series)",
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 7cb9ddf..9a00adb 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -1617,6 +1617,7 @@
 };
 
 static const struct drm_connector_funcs intel_sdvo_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
 	.save = intel_sdvo_save,
 	.restore = intel_sdvo_restore,
 	.detect = intel_sdvo_detect,
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index c7d9ef0..50d7ed7 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1629,6 +1629,7 @@
 };
 
 static const struct drm_connector_funcs intel_tv_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
 	.save = intel_tv_save,
 	.restore = intel_tv_restore,
 	.detect = intel_tv_detect,
diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index 521ef6a..89c4c44 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -2185,9 +2185,9 @@
 
 	/* check if the ring is padded out to 16-dword alignment */
 
-	tail_aligned = dev_priv->ring.tail & 0xf;
+	tail_aligned = dev_priv->ring.tail & (RADEON_RING_ALIGN-1);
 	if (tail_aligned) {
-		int num_p2 = 16 - tail_aligned;
+		int num_p2 = RADEON_RING_ALIGN - tail_aligned;
 
 		ring = dev_priv->ring.start;
 		/* pad with some CP_PACKET2 */
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index e266e5f..127d045 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -1965,11 +1965,14 @@
 
 #define RING_LOCALS	int write, _nr, _align_nr; unsigned int mask; u32 *ring;
 
+#define RADEON_RING_ALIGN 16
+
 #define BEGIN_RING( n ) do {						\
 	if ( RADEON_VERBOSE ) {						\
 		DRM_INFO( "BEGIN_RING( %d )\n", (n));			\
 	}								\
-	_align_nr = (n + 0xf) & ~0xf;					\
+	_align_nr = RADEON_RING_ALIGN - ((dev_priv->ring.tail + n) & (RADEON_RING_ALIGN-1));	\
+	_align_nr += n;							\
 	if (dev_priv->ring.space <= (_align_nr * sizeof(u32))) {	\
                 COMMIT_RING();						\
 		radeon_wait_ring( dev_priv, _align_nr * sizeof(u32));	\
diff --git a/drivers/i2c/busses/i2c-sh7760.c b/drivers/i2c/busses/i2c-sh7760.c
index baa28b7..b9680f5 100644
--- a/drivers/i2c/busses/i2c-sh7760.c
+++ b/drivers/i2c/busses/i2c-sh7760.c
@@ -396,7 +396,7 @@
 	signed char cdf, cdfm;
 	int scgd, scgdm, scgds;
 
-	mclk = clk_get(NULL, "module_clk");
+	mclk = clk_get(NULL, "peripheral_clk");
 	if (IS_ERR(mclk)) {
 		return PTR_ERR(mclk);
 	} else {
diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
index 537da1c..e59b6de 100644
--- a/drivers/ide/alim15x3.c
+++ b/drivers/ide/alim15x3.c
@@ -402,27 +402,23 @@
 	return cbl;
 }
 
-#if !defined(CONFIG_SPARC64) && !defined(CONFIG_PPC)
+#ifndef CONFIG_SPARC64
 /**
  *	init_hwif_ali15x3	-	Initialize the ALI IDE x86 stuff
  *	@hwif: interface to configure
  *
  *	Obtain the IRQ tables for an ALi based IDE solution on the PC
  *	class platforms. This part of the code isn't applicable to the
- *	Sparc and PowerPC systems.
+ *	Sparc systems.
  */
 
 static void __devinit init_hwif_ali15x3 (ide_hwif_t *hwif)
 {
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	u8 ideic, inmir;
 	s8 irq_routing_table[] = { -1,  9, 3, 10, 4,  5, 7,  6,
 				      1, 11, 0, 12, 0, 14, 0, 15 };
 	int irq = -1;
 
-	if (dev->device == PCI_DEVICE_ID_AL_M5229)
-		hwif->irq = hwif->channel ? 15 : 14;
-
 	if (isa_dev) {
 		/*
 		 * read IDE interface control
@@ -455,7 +451,7 @@
 }
 #else
 #define init_hwif_ali15x3 NULL
-#endif /* !defined(CONFIG_SPARC64) && !defined(CONFIG_PPC) */
+#endif /* CONFIG_SPARC64 */
 
 /**
  *	init_dma_ali15x3	-	set up DMA on ALi15x3
diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c
index 403d0e4..fc0949a 100644
--- a/drivers/ide/at91_ide.c
+++ b/drivers/ide/at91_ide.c
@@ -216,6 +216,7 @@
 	.host_flags 	= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA | IDE_HFLAG_SINGLE |
 			  IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_UNMASK_IRQS,
 	.pio_mask 	= ATA_PIO6,
+	.chipset	= ide_generic,
 };
 
 /*
@@ -246,8 +247,7 @@
 static int __init at91_ide_probe(struct platform_device *pdev)
 {
 	int ret;
-	hw_regs_t hw;
-	hw_regs_t *hws[] = { &hw, NULL, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw };
 	struct ide_host *host;
 	struct resource *res;
 	unsigned long tf_base = 0, ctl_base = 0;
@@ -304,10 +304,9 @@
 		ide_std_init_ports(&hw, tf_base, ctl_base + 6);
 
 	hw.irq = board->irq_pin;
-	hw.chipset = ide_generic;
 	hw.dev = &pdev->dev;
 
-	host = ide_host_alloc(&at91_ide_port_info, hws);
+	host = ide_host_alloc(&at91_ide_port_info, hws, 1);
 	if (!host) {
 		perr("failed to allocate ide host\n");
 		return -ENOMEM;
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index 4601364..58121bd 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -449,7 +449,7 @@
 }
 #endif
 
-static void auide_setup_ports(hw_regs_t *hw, _auide_hwif *ahwif)
+static void auide_setup_ports(struct ide_hw *hw, _auide_hwif *ahwif)
 {
 	int i;
 	unsigned long *ata_regs = hw->io_ports_array;
@@ -499,6 +499,7 @@
 #ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
 	.mwdma_mask		= ATA_MWDMA2,
 #endif
+	.chipset		= ide_au1xxx,
 };
 
 static int au_ide_probe(struct platform_device *dev)
@@ -507,7 +508,7 @@
 	struct resource *res;
 	struct ide_host *host;
 	int ret = 0;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw };
 
 #if defined(CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA)
 	char *mode = "MWDMA2";
@@ -548,9 +549,8 @@
 	auide_setup_ports(&hw, ahwif);
 	hw.irq = ahwif->irq;
 	hw.dev = &dev->dev;
-	hw.chipset = ide_au1xxx;
 
-	ret = ide_host_add(&au1xxx_port_info, hws, &host);
+	ret = ide_host_add(&au1xxx_port_info, hws, 1, &host);
 	if (ret)
 		goto out;
 
diff --git a/drivers/ide/buddha.c b/drivers/ide/buddha.c
index d028f88..e3c6a59 100644
--- a/drivers/ide/buddha.c
+++ b/drivers/ide/buddha.c
@@ -121,7 +121,7 @@
     return 1;
 }
 
-static void __init buddha_setup_ports(hw_regs_t *hw, unsigned long base,
+static void __init buddha_setup_ports(struct ide_hw *hw, unsigned long base,
 				      unsigned long ctl, unsigned long irq_port,
 				      ide_ack_intr_t *ack_intr)
 {
@@ -139,13 +139,12 @@
 
 	hw->irq = IRQ_AMIGA_PORTS;
 	hw->ack_intr = ack_intr;
-
-	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info buddha_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_generic,
 };
 
     /*
@@ -161,7 +160,7 @@
 
 	while ((z = zorro_find_device(ZORRO_WILDCARD, z))) {
 		unsigned long board;
-		hw_regs_t hw[MAX_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL };
+		struct ide_hw hw[MAX_NUM_HWIFS], *hws[MAX_NUM_HWIFS];
 
 		if (z->id == ZORRO_PROD_INDIVIDUAL_COMPUTERS_BUDDHA) {
 			buddha_num_hwifs = BUDDHA_NUM_HWIFS;
@@ -225,7 +224,7 @@
 			hws[i] = &hw[i];
 		}
 
-		ide_host_add(&buddha_port_info, hws, NULL);
+		ide_host_add(&buddha_port_info, hws, i, NULL);
 	}
 
 	return 0;
diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c
index 8890276..1683ed5 100644
--- a/drivers/ide/cmd640.c
+++ b/drivers/ide/cmd640.c
@@ -708,7 +708,7 @@
 	int second_port_cmd640 = 0, rc;
 	const char *bus_type, *port2;
 	u8 b, cfr;
-	hw_regs_t hw[2], *hws[] = { NULL, NULL, NULL, NULL };
+	struct ide_hw hw[2], *hws[2];
 
 	if (cmd640_vlb && probe_for_cmd640_vlb()) {
 		bus_type = "VLB";
@@ -762,11 +762,9 @@
 
 	ide_std_init_ports(&hw[0], 0x1f0, 0x3f6);
 	hw[0].irq = 14;
-	hw[0].chipset = ide_cmd640;
 
 	ide_std_init_ports(&hw[1], 0x170, 0x376);
 	hw[1].irq = 15;
-	hw[1].chipset = ide_cmd640;
 
 	printk(KERN_INFO "cmd640: buggy cmd640%c interface on %s, config=0x%02x"
 			 "\n", 'a' + cmd640_chip_version - 1, bus_type, cfr);
@@ -824,7 +822,8 @@
 	cmd640_dump_regs();
 #endif
 
-	return ide_host_add(&cmd640_port_info, hws, NULL);
+	return ide_host_add(&cmd640_port_info, hws, second_port_cmd640 ? 2 : 1,
+			    NULL);
 }
 
 module_param_named(probe_vlb, cmd640_vlb, bool, 0);
diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c
index 87987a7..bd066bb 100644
--- a/drivers/ide/cs5520.c
+++ b/drivers/ide/cs5520.c
@@ -110,7 +110,7 @@
 static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	const struct ide_port_info *d = &cyrix_chipset;
-	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
+	struct ide_hw hw[2], *hws[] = { NULL, NULL };
 
 	ide_setup_pci_noise(dev, d);
 
@@ -136,7 +136,7 @@
 	ide_pci_setup_ports(dev, d, &hw[0], &hws[0]);
 	hw[0].irq = 14;
 
-	return ide_host_add(d, hws, NULL);
+	return ide_host_add(d, hws, 2, NULL);
 }
 
 static const struct pci_device_id cs5520_pci_tbl[] = {
diff --git a/drivers/ide/delkin_cb.c b/drivers/ide/delkin_cb.c
index f153b95..1e10eba 100644
--- a/drivers/ide/delkin_cb.c
+++ b/drivers/ide/delkin_cb.c
@@ -68,6 +68,7 @@
 				  IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
 	.init_chipset		= delkin_cb_init_chipset,
+	.chipset		= ide_pci,
 };
 
 static int __devinit
@@ -76,7 +77,7 @@
 	struct ide_host *host;
 	unsigned long base;
 	int rc;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	rc = pci_enable_device(dev);
 	if (rc) {
@@ -97,9 +98,8 @@
 	ide_std_init_ports(&hw, base + 0x10, base + 0x1e);
 	hw.irq = dev->irq;
 	hw.dev = &dev->dev;
-	hw.chipset = ide_pci;		/* this enables IRQ sharing */
 
-	rc = ide_host_add(&delkin_cb_port_info, hws, &host);
+	rc = ide_host_add(&delkin_cb_port_info, hws, 1, &host);
 	if (rc)
 		goto out_disable;
 
diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c
index 0e2df67..22fa273 100644
--- a/drivers/ide/falconide.c
+++ b/drivers/ide/falconide.c
@@ -111,9 +111,10 @@
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE |
 				  IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_generic,
 };
 
-static void __init falconide_setup_ports(hw_regs_t *hw)
+static void __init falconide_setup_ports(struct ide_hw *hw)
 {
 	int i;
 
@@ -128,8 +129,6 @@
 
 	hw->irq = IRQ_MFP_IDE;
 	hw->ack_intr = NULL;
-
-	hw->chipset = ide_generic;
 }
 
     /*
@@ -139,7 +138,7 @@
 static int __init falconide_init(void)
 {
 	struct ide_host *host;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw };
 	int rc;
 
 	if (!MACH_IS_ATARI || !ATARIHW_PRESENT(IDE))
@@ -154,7 +153,7 @@
 
 	falconide_setup_ports(&hw);
 
-	host = ide_host_alloc(&falconide_port_info, hws);
+	host = ide_host_alloc(&falconide_port_info, hws, 1);
 	if (host == NULL) {
 		rc = -ENOMEM;
 		goto err;
diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c
index c711951..4451a6a 100644
--- a/drivers/ide/gayle.c
+++ b/drivers/ide/gayle.c
@@ -88,7 +88,7 @@
     return 1;
 }
 
-static void __init gayle_setup_ports(hw_regs_t *hw, unsigned long base,
+static void __init gayle_setup_ports(struct ide_hw *hw, unsigned long base,
 				     unsigned long ctl, unsigned long irq_port,
 				     ide_ack_intr_t *ack_intr)
 {
@@ -106,14 +106,13 @@
 
 	hw->irq = IRQ_AMIGA_PORTS;
 	hw->ack_intr = ack_intr;
-
-	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info gayle_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE |
 				  IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_generic,
 };
 
     /*
@@ -126,7 +125,7 @@
     unsigned long base, ctrlport, irqport;
     ide_ack_intr_t *ack_intr;
     int a4000, i, rc;
-    hw_regs_t hw[GAYLE_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL };
+    struct ide_hw hw[GAYLE_NUM_HWIFS], *hws[GAYLE_NUM_HWIFS];
 
     if (!MACH_IS_AMIGA)
 	return -ENODEV;
@@ -171,7 +170,7 @@
 	hws[i] = &hw[i];
     }
 
-    rc = ide_host_add(&gayle_port_info, hws, NULL);
+    rc = ide_host_add(&gayle_port_info, hws, i, NULL);
     if (rc)
 	release_mem_region(res_start, res_n);
 
diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index 0feb66c..7ce68ef 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -138,14 +138,6 @@
 #undef	HPT_RESET_STATE_ENGINE
 #undef	HPT_DELAY_INTERRUPT
 
-static const char *quirk_drives[] = {
-	"QUANTUM FIREBALLlct08 08",
-	"QUANTUM FIREBALLP KA6.4",
-	"QUANTUM FIREBALLP LM20.4",
-	"QUANTUM FIREBALLP LM20.5",
-	NULL
-};
-
 static const char *bad_ata100_5[] = {
 	"IBM-DTLA-307075",
 	"IBM-DTLA-307060",
@@ -729,27 +721,13 @@
 	hpt3xx_set_mode(drive, XFER_PIO_0 + pio);
 }
 
-static void hpt3xx_quirkproc(ide_drive_t *drive)
-{
-	char *m			= (char *)&drive->id[ATA_ID_PROD];
-	const  char **list	= quirk_drives;
-
-	while (*list)
-		if (strstr(m, *list++)) {
-			drive->quirk_list = 1;
-			return;
-		}
-
-	drive->quirk_list = 0;
-}
-
 static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
 {
 	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev	*dev	= to_pci_dev(hwif->dev);
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 
-	if (drive->quirk_list == 0)
+	if ((drive->dev_flags & IDE_DFLAG_NIEN_QUIRK) == 0)
 		return;
 
 	if (info->chip_type >= HPT370) {
@@ -1404,7 +1382,6 @@
 static const struct ide_port_ops hpt3xx_port_ops = {
 	.set_pio_mode		= hpt3xx_set_pio_mode,
 	.set_dma_mode		= hpt3xx_set_mode,
-	.quirkproc		= hpt3xx_quirkproc,
 	.maskproc		= hpt3xx_maskproc,
 	.mdma_filter		= hpt3xx_mdma_filter,
 	.udma_filter		= hpt3xx_udma_filter,
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 36da913..5af3d0f 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -65,8 +65,6 @@
 };
 
 struct icside_state {
-	unsigned int channel;
-	unsigned int enabled;
 	void __iomem *irq_port;
 	void __iomem *ioc_base;
 	unsigned int sel;
@@ -116,18 +114,11 @@
 	struct icside_state *state = ec->irq_data;
 	void __iomem *base = state->irq_port;
 
-	state->enabled = 1;
+	writeb(0, base + ICS_ARCIN_V6_INTROFFSET_1);
+	readb(base + ICS_ARCIN_V6_INTROFFSET_2);
 
-	switch (state->channel) {
-	case 0:
-		writeb(0, base + ICS_ARCIN_V6_INTROFFSET_1);
-		readb(base + ICS_ARCIN_V6_INTROFFSET_2);
-		break;
-	case 1:
-		writeb(0, base + ICS_ARCIN_V6_INTROFFSET_2);
-		readb(base + ICS_ARCIN_V6_INTROFFSET_1);
-		break;
-	}
+	writeb(0, base + ICS_ARCIN_V6_INTROFFSET_2);
+	readb(base + ICS_ARCIN_V6_INTROFFSET_1);
 }
 
 /* Prototype: icside_irqdisable_arcin_v6 (struct expansion_card *ec, int irqnr)
@@ -137,8 +128,6 @@
 {
 	struct icside_state *state = ec->irq_data;
 
-	state->enabled = 0;
-
 	readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
 	readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
 }
@@ -160,44 +149,6 @@
 	.irqpending	= icside_irqpending_arcin_v6,
 };
 
-/*
- * Handle routing of interrupts.  This is called before
- * we write the command to the drive.
- */
-static void icside_maskproc(ide_drive_t *drive, int mask)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct expansion_card *ec = ECARD_DEV(hwif->dev);
-	struct icside_state *state = ecard_get_drvdata(ec);
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	state->channel = hwif->channel;
-
-	if (state->enabled && !mask) {
-		switch (hwif->channel) {
-		case 0:
-			writeb(0, state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
-			readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
-			break;
-		case 1:
-			writeb(0, state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
-			readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
-			break;
-		}
-	} else {
-		readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
-		readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
-	}
-
-	local_irq_restore(flags);
-}
-
-static const struct ide_port_ops icside_v6_no_dma_port_ops = {
-	.maskproc		= icside_maskproc,
-};
-
 #ifdef CONFIG_BLK_DEV_IDEDMA_ICS
 /*
  * SG-DMA support.
@@ -275,7 +226,6 @@
 
 static const struct ide_port_ops icside_v6_port_ops = {
 	.set_dma_mode		= icside_set_dma_mode,
-	.maskproc		= icside_maskproc,
 };
 
 static void icside_dma_host_set(ide_drive_t *drive, int on)
@@ -320,11 +270,6 @@
 	BUG_ON(dma_channel_active(ec->dma));
 
 	/*
-	 * Ensure that we have the right interrupt routed.
-	 */
-	icside_maskproc(drive, 0);
-
-	/*
 	 * Route the DMA signals to the correct interface.
 	 */
 	writeb(state->sel | hwif->channel, state->ioc_base);
@@ -381,7 +326,7 @@
 	return -EOPNOTSUPP;
 }
 
-static void icside_setup_ports(hw_regs_t *hw, void __iomem *base,
+static void icside_setup_ports(struct ide_hw *hw, void __iomem *base,
 			       struct cardinfo *info, struct expansion_card *ec)
 {
 	unsigned long port = (unsigned long)base + info->dataoffset;
@@ -398,11 +343,11 @@
 
 	hw->irq = ec->irq;
 	hw->dev = &ec->dev;
-	hw->chipset = ide_acorn;
 }
 
 static const struct ide_port_info icside_v5_port_info = {
 	.host_flags		= IDE_HFLAG_NO_DMA,
+	.chipset		= ide_acorn,
 };
 
 static int __devinit
@@ -410,7 +355,7 @@
 {
 	void __iomem *base;
 	struct ide_host *host;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw };
 	int ret;
 
 	base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0);
@@ -431,7 +376,7 @@
 
 	icside_setup_ports(&hw, base, &icside_cardinfo_v5, ec);
 
-	host = ide_host_alloc(&icside_v5_port_info, hws);
+	host = ide_host_alloc(&icside_v5_port_info, hws, 1);
 	if (host == NULL)
 		return -ENODEV;
 
@@ -452,11 +397,11 @@
 
 static const struct ide_port_info icside_v6_port_info __initdata = {
 	.init_dma		= icside_dma_off_init,
-	.port_ops		= &icside_v6_no_dma_port_ops,
 	.dma_ops		= &icside_v6_dma_ops,
 	.host_flags		= IDE_HFLAG_SERIALIZE | IDE_HFLAG_MMIO,
 	.mwdma_mask		= ATA_MWDMA2,
 	.swdma_mask		= ATA_SWDMA2,
+	.chipset		= ide_acorn,
 };
 
 static int __devinit
@@ -466,7 +411,7 @@
 	struct ide_host *host;
 	unsigned int sel = 0;
 	int ret;
-	hw_regs_t hw[2], *hws[] = { &hw[0], &hw[1], NULL, NULL };
+	struct ide_hw hw[2], *hws[] = { &hw[0], &hw[1] };
 	struct ide_port_info d = icside_v6_port_info;
 
 	ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0);
@@ -506,7 +451,7 @@
 	icside_setup_ports(&hw[0], easi_base, &icside_cardinfo_v6_1, ec);
 	icside_setup_ports(&hw[1], easi_base, &icside_cardinfo_v6_2, ec);
 
-	host = ide_host_alloc(&d, hws);
+	host = ide_host_alloc(&d, hws, 2);
 	if (host == NULL)
 		return -ENODEV;
 
diff --git a/drivers/ide/ide-4drives.c b/drivers/ide/ide-4drives.c
index 78aca75..979d342 100644
--- a/drivers/ide/ide-4drives.c
+++ b/drivers/ide/ide-4drives.c
@@ -25,12 +25,13 @@
 	.port_ops		= &ide_4drives_port_ops,
 	.host_flags		= IDE_HFLAG_SERIALIZE | IDE_HFLAG_NO_DMA |
 				  IDE_HFLAG_4DRIVES,
+	.chipset		= ide_4drives,
 };
 
 static int __init ide_4drives_init(void)
 {
 	unsigned long base = 0x1f0, ctl = 0x3f6;
-	hw_regs_t hw, *hws[] = { &hw, &hw, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw, &hw };
 
 	if (probe_4drives == 0)
 		return -ENODEV;
@@ -52,9 +53,8 @@
 
 	ide_std_init_ports(&hw, base, ctl);
 	hw.irq = 14;
-	hw.chipset = ide_4drives;
 
-	return ide_host_add(&ide_4drives_port_info, hws, NULL);
+	return ide_host_add(&ide_4drives_port_info, hws, 2, NULL);
 }
 
 module_init(ide_4drives_init);
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 7201b17..bbdd254 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -80,34 +80,6 @@
 EXPORT_SYMBOL_GPL(ide_init_pc);
 
 /*
- * Generate a new packet command request in front of the request queue, before
- * the current request, so that it will be processed immediately, on the next
- * pass through the driver.
- */
-static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk,
-			      struct ide_atapi_pc *pc, struct request *rq)
-{
-	blk_rq_init(NULL, rq);
-	rq->cmd_type = REQ_TYPE_SPECIAL;
-	rq->cmd_flags |= REQ_PREEMPT;
-	rq->buffer = (char *)pc;
-	rq->rq_disk = disk;
-
-	if (pc->req_xfer) {
-		rq->data = pc->buf;
-		rq->data_len = pc->req_xfer;
-	}
-
-	memcpy(rq->cmd, pc->c, 12);
-	if (drive->media == ide_tape)
-		rq->cmd[13] = REQ_IDETAPE_PC1;
-
-	drive->hwif->rq = NULL;
-
-	elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0);
-}
-
-/*
  * Add a special packet command request to the tail of the request queue,
  * and wait for it to be serviced.
  */
@@ -119,19 +91,21 @@
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_SPECIAL;
-	rq->buffer = (char *)pc;
+	rq->special = (char *)pc;
 
 	if (pc->req_xfer) {
-		rq->data = pc->buf;
-		rq->data_len = pc->req_xfer;
+		error = blk_rq_map_kern(drive->queue, rq, pc->buf, pc->req_xfer,
+					GFP_NOIO);
+		if (error)
+			goto put_req;
 	}
 
 	memcpy(rq->cmd, pc->c, 12);
 	if (drive->media == ide_tape)
 		rq->cmd[13] = REQ_IDETAPE_PC1;
 	error = blk_execute_rq(drive->queue, disk, rq, 0);
+put_req:
 	blk_put_request(rq);
-
 	return error;
 }
 EXPORT_SYMBOL_GPL(ide_queue_pc_tail);
@@ -191,20 +165,113 @@
 }
 EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd);
 
+void ide_prep_sense(ide_drive_t *drive, struct request *rq)
+{
+	struct request_sense *sense = &drive->sense_data;
+	struct request *sense_rq = &drive->sense_rq;
+	unsigned int cmd_len, sense_len;
+	int err;
+
+	debug_log("%s: enter\n", __func__);
+
+	switch (drive->media) {
+	case ide_floppy:
+		cmd_len = 255;
+		sense_len = 18;
+		break;
+	case ide_tape:
+		cmd_len = 20;
+		sense_len = 20;
+		break;
+	default:
+		cmd_len = 18;
+		sense_len = 18;
+	}
+
+	BUG_ON(sense_len > sizeof(*sense));
+
+	if (blk_sense_request(rq) || drive->sense_rq_armed)
+		return;
+
+	memset(sense, 0, sizeof(*sense));
+
+	blk_rq_init(rq->q, sense_rq);
+
+	err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
+			      GFP_NOIO);
+	if (unlikely(err)) {
+		if (printk_ratelimit())
+			printk(KERN_WARNING "%s: failed to map sense buffer\n",
+			       drive->name);
+		return;
+	}
+
+	sense_rq->rq_disk = rq->rq_disk;
+	sense_rq->cmd[0] = GPCMD_REQUEST_SENSE;
+	sense_rq->cmd[4] = cmd_len;
+	sense_rq->cmd_type = REQ_TYPE_SENSE;
+	sense_rq->cmd_flags |= REQ_PREEMPT;
+
+	if (drive->media == ide_tape)
+		sense_rq->cmd[13] = REQ_IDETAPE_PC1;
+
+	drive->sense_rq_armed = true;
+}
+EXPORT_SYMBOL_GPL(ide_prep_sense);
+
+int ide_queue_sense_rq(ide_drive_t *drive, void *special)
+{
+	/* deferred failure from ide_prep_sense() */
+	if (!drive->sense_rq_armed) {
+		printk(KERN_WARNING "%s: failed queue sense request\n",
+		       drive->name);
+		return -ENOMEM;
+	}
+
+	drive->sense_rq.special = special;
+	drive->sense_rq_armed = false;
+
+	drive->hwif->rq = NULL;
+
+	elv_add_request(drive->queue, &drive->sense_rq,
+			ELEVATOR_INSERT_FRONT, 0);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
+
 /*
  * Called when an error was detected during the last packet command.
- * We queue a request sense packet command in the head of the request list.
+ * We queue a request sense packet command at the head of the request
+ * queue.
  */
-void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk)
+void ide_retry_pc(ide_drive_t *drive)
 {
-	struct request *rq = &drive->request_sense_rq;
+	struct request *failed_rq = drive->hwif->rq;
+	struct request *sense_rq = &drive->sense_rq;
 	struct ide_atapi_pc *pc = &drive->request_sense_pc;
 
 	(void)ide_read_error(drive);
-	ide_create_request_sense_cmd(drive, pc);
+
+	/* init pc from sense_rq */
+	ide_init_pc(pc);
+	memcpy(pc->c, sense_rq->cmd, 12);
+	pc->buf = bio_data(sense_rq->bio);	/* pointer to mapped address */
+	pc->req_xfer = blk_rq_bytes(sense_rq);
+
 	if (drive->media == ide_tape)
-		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
-	ide_queue_pc_head(drive, disk, pc, rq);
+		drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
+
+	/*
+	 * Push back the failed request and put request sense on top
+	 * of it.  The failed command will be retried after sense data
+	 * is acquired.
+	 */
+	blk_requeue_request(failed_rq->q, failed_rq);
+	drive->hwif->rq = NULL;
+	if (ide_queue_sense_rq(drive, pc)) {
+		blk_start_request(failed_rq);
+		ide_complete_rq(drive, -EIO, blk_rq_bytes(failed_rq));
+	}
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
 
@@ -246,7 +313,7 @@
 		return 32768;
 	else if (blk_sense_request(rq) || blk_pc_request(rq) ||
 			 rq->cmd_type == REQ_TYPE_ATA_PC)
-		return rq->data_len;
+		return blk_rq_bytes(rq);
 	else
 		return 0;
 }
@@ -276,7 +343,6 @@
 	struct ide_cmd *cmd = &hwif->cmd;
 	struct request *rq = hwif->rq;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-	xfer_func_t *xferfunc;
 	unsigned int timeout, done;
 	u16 bcount;
 	u8 stat, ireason, dsc = 0;
@@ -303,18 +369,14 @@
 					drive->name, rq_data_dir(pc->rq)
 						     ? "write" : "read");
 			pc->flags |= PC_FLAG_DMA_ERROR;
-		} else {
+		} else
 			pc->xferred = pc->req_xfer;
-			if (drive->pc_update_buffers)
-				drive->pc_update_buffers(drive, pc);
-		}
 		debug_log("%s: DMA finished\n", drive->name);
 	}
 
 	/* No more interrupts */
 	if ((stat & ATA_DRQ) == 0) {
 		int uptodate, error;
-		unsigned int done;
 
 		debug_log("Packet command completed, %d bytes transferred\n",
 			  pc->xferred);
@@ -343,7 +405,7 @@
 			debug_log("[cmd %x]: check condition\n", rq->cmd[0]);
 
 			/* Retry operation */
-			ide_retry_pc(drive, rq->rq_disk);
+			ide_retry_pc(drive);
 
 			/* queued, but not started */
 			return ide_stopped;
@@ -353,6 +415,12 @@
 		if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0)
 			dsc = 1;
 
+		/*
+		 * ->pc_callback() might change rq->data_len for
+		 * residual count, cache total length.
+		 */
+		done = blk_rq_bytes(rq);
+
 		/* Command finished - Call the callback function */
 		uptodate = drive->pc_callback(drive, dsc);
 
@@ -361,7 +429,6 @@
 
 		if (blk_special_request(rq)) {
 			rq->errors = 0;
-			done = blk_rq_bytes(rq);
 			error = 0;
 		} else {
 
@@ -370,15 +437,10 @@
 					rq->errors = -EIO;
 			}
 
-			if (drive->media == ide_tape)
-				done = ide_rq_bytes(rq); /* FIXME */
-			else
-				done = blk_rq_bytes(rq);
-
 			error = uptodate ? 0 : -EIO;
 		}
 
-		ide_complete_rq(drive, error, done);
+		ide_complete_rq(drive, error, blk_rq_bytes(rq));
 		return ide_stopped;
 	}
 
@@ -407,21 +469,11 @@
 		return ide_do_reset(drive);
 	}
 
-	xferfunc = write ? tp_ops->output_data : tp_ops->input_data;
+	done = min_t(unsigned int, bcount, cmd->nleft);
+	ide_pio_bytes(drive, cmd, write, done);
 
-	if (drive->media == ide_floppy && pc->buf == NULL) {
-		done = min_t(unsigned int, bcount, cmd->nleft);
-		ide_pio_bytes(drive, cmd, write, done);
-	} else if (drive->media == ide_tape && pc->bh) {
-		done = drive->pc_io_buffers(drive, pc, bcount, write);
-	} else {
-		done = min_t(unsigned int, bcount, pc->req_xfer - pc->xferred);
-		xferfunc(drive, NULL, pc->cur_pos, done);
-	}
-
-	/* Update the current position */
+	/* Update transferred byte count */
 	pc->xferred += done;
-	pc->cur_pos += done;
 
 	bcount -= done;
 
@@ -599,7 +651,6 @@
 
 		/* We haven't transferred any data yet */
 		pc->xferred = 0;
-		pc->cur_pos = pc->buf;
 
 		valid_tf = IDE_VALID_DEVICE;
 		bcount = ((drive->media == ide_tape) ?
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 925eb9e..424140c 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -182,7 +182,7 @@
 				 (sense->information[2] <<  8) |
 				 (sense->information[3]);
 
-			if (drive->queue->hardsect_size == 2048)
+			if (queue_logical_block_size(drive->queue) == 2048)
 				/* device sector size is 2K */
 				sector <<= 2;
 
@@ -206,54 +206,25 @@
 	ide_cd_log_error(drive->name, failed_command, sense);
 }
 
-static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
-				      struct request *failed_command)
-{
-	struct cdrom_info *info		= drive->driver_data;
-	struct request *rq		= &drive->request_sense_rq;
-
-	ide_debug_log(IDE_DBG_SENSE, "enter");
-
-	if (sense == NULL)
-		sense = &info->sense_data;
-
-	/* stuff the sense request in front of our current request */
-	blk_rq_init(NULL, rq);
-	rq->cmd_type = REQ_TYPE_ATA_PC;
-	rq->rq_disk = info->disk;
-
-	rq->data = sense;
-	rq->cmd[0] = GPCMD_REQUEST_SENSE;
-	rq->cmd[4] = 18;
-	rq->data_len = 18;
-
-	rq->cmd_type = REQ_TYPE_SENSE;
-	rq->cmd_flags |= REQ_PREEMPT;
-
-	/* NOTE! Save the failed command in "rq->buffer" */
-	rq->buffer = (void *) failed_command;
-
-	if (failed_command)
-		ide_debug_log(IDE_DBG_SENSE, "failed_cmd: 0x%x",
-					     failed_command->cmd[0]);
-
-	drive->hwif->rq = NULL;
-
-	elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0);
-}
-
 static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 {
 	/*
-	 * For REQ_TYPE_SENSE, "rq->buffer" points to the original
-	 * failed request
+	 * For REQ_TYPE_SENSE, "rq->special" points to the original
+	 * failed request.  Also, the sense data should be read
+	 * directly from rq which might be different from the original
+	 * sense buffer if it got copied during mapping.
 	 */
-	struct request *failed = (struct request *)rq->buffer;
-	struct cdrom_info *info = drive->driver_data;
-	void *sense = &info->sense_data;
+	struct request *failed = (struct request *)rq->special;
+	void *sense = bio_data(rq->bio);
 
 	if (failed) {
 		if (failed->sense) {
+			/*
+			 * Sense is always read into drive->sense_data.
+			 * Copy back if the failed request has its
+			 * sense pointer set.
+			 */
+			memcpy(failed->sense, sense, 18);
 			sense = failed->sense;
 			failed->sense_len = rq->sense_len;
 		}
@@ -428,22 +399,13 @@
 
 	/* if we got a CHECK_CONDITION status, queue a request sense command */
 	if (stat & ATA_ERR)
-		cdrom_queue_request_sense(drive, NULL, NULL);
+		return ide_queue_sense_rq(drive, NULL) ? 2 : 1;
 	return 1;
 
 end_request:
 	if (stat & ATA_ERR) {
-		struct request_queue *q = drive->queue;
-		unsigned long flags;
-
-		spin_lock_irqsave(q->queue_lock, flags);
-		blkdev_dequeue_request(rq);
-		spin_unlock_irqrestore(q->queue_lock, flags);
-
 		hwif->rq = NULL;
-
-		cdrom_queue_request_sense(drive, rq->sense, rq);
-		return 1;
+		return ide_queue_sense_rq(drive, rq) ? 2 : 1;
 	} else
 		return 2;
 }
@@ -503,14 +465,8 @@
 	 * and some drives don't send them.  Sigh.
 	 */
 	if (rq->cmd[0] == GPCMD_REQUEST_SENSE &&
-	    cmd->nleft > 0 && cmd->nleft <= 5) {
-		unsigned int ofs = cmd->nbytes - cmd->nleft;
-
-		while (cmd->nleft > 0) {
-			*((u8 *)rq->data + ofs++) = 0;
-			cmd->nleft--;
-		}
-	}
+	    cmd->nleft > 0 && cmd->nleft <= 5)
+		cmd->nleft = 0;
 }
 
 int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
@@ -543,14 +499,18 @@
 		rq->cmd_flags |= cmd_flags;
 		rq->timeout = timeout;
 		if (buffer) {
-			rq->data = buffer;
-			rq->data_len = *bufflen;
+			error = blk_rq_map_kern(drive->queue, rq, buffer,
+						*bufflen, GFP_NOIO);
+			if (error) {
+				blk_put_request(rq);
+				return error;
+			}
 		}
 
 		error = blk_execute_rq(drive->queue, info->disk, rq, 0);
 
 		if (buffer)
-			*bufflen = rq->data_len;
+			*bufflen = rq->resid_len;
 
 		flags = rq->cmd_flags;
 		blk_put_request(rq);
@@ -608,7 +568,7 @@
 	struct request *rq = hwif->rq;
 	ide_expiry_t *expiry = NULL;
 	int dma_error = 0, dma, thislen, uptodate = 0;
-	int write = (rq_data_dir(rq) == WRITE) ? 1 : 0, rc = 0, nsectors;
+	int write = (rq_data_dir(rq) == WRITE) ? 1 : 0, rc = 0;
 	int sense = blk_sense_request(rq);
 	unsigned int timeout;
 	u16 len;
@@ -738,13 +698,8 @@
 
 out_end:
 	if (blk_pc_request(rq) && rc == 0) {
-		unsigned int dlen = rq->data_len;
-
-		rq->data_len = 0;
-
-		if (blk_end_request(rq, 0, dlen))
-			BUG();
-
+		rq->resid_len = 0;
+		blk_end_request_all(rq, 0);
 		hwif->rq = NULL;
 	} else {
 		if (sense && uptodate)
@@ -762,21 +717,13 @@
 			ide_cd_error_cmd(drive, cmd);
 
 		/* make sure it's fully ended */
-		if (blk_pc_request(rq))
-			nsectors = (rq->data_len + 511) >> 9;
-		else
-			nsectors = rq->hard_nr_sectors;
-
-		if (nsectors == 0)
-			nsectors = 1;
-
 		if (blk_fs_request(rq) == 0) {
-			rq->data_len -= (cmd->nbytes - cmd->nleft);
+			rq->resid_len -= cmd->nbytes - cmd->nleft;
 			if (uptodate == 0 && (cmd->tf_flags & IDE_TFLAG_WRITE))
-				rq->data_len += cmd->last_xfer_len;
+				rq->resid_len += cmd->last_xfer_len;
 		}
 
-		ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9);
+		ide_complete_rq(drive, uptodate ? 0 : -EIO, blk_rq_bytes(rq));
 
 		if (sense && rc == 2)
 			ide_error(drive, "request sense failure", stat);
@@ -790,7 +737,7 @@
 	struct request_queue *q = drive->queue;
 	int write = rq_data_dir(rq) == WRITE;
 	unsigned short sectors_per_frame =
-		queue_hardsect_size(q) >> SECTOR_BITS;
+		queue_logical_block_size(q) >> SECTOR_BITS;
 
 	ide_debug_log(IDE_DBG_RQ, "rq->cmd[0]: 0x%x, rq->cmd_flags: 0x%x, "
 				  "secs_per_frame: %u",
@@ -809,8 +756,8 @@
 	}
 
 	/* fs requests *must* be hardware frame aligned */
-	if ((rq->nr_sectors & (sectors_per_frame - 1)) ||
-	    (rq->sector & (sectors_per_frame - 1)))
+	if ((blk_rq_sectors(rq) & (sectors_per_frame - 1)) ||
+	    (blk_rq_pos(rq) & (sectors_per_frame - 1)))
 		return ide_stopped;
 
 	/* use DMA, if possible */
@@ -838,15 +785,10 @@
 	drive->dma = 0;
 
 	/* sg request */
-	if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) {
+	if (rq->bio) {
 		struct request_queue *q = drive->queue;
+		char *buf = bio_data(rq->bio);
 		unsigned int alignment;
-		char *buf;
-
-		if (rq->bio)
-			buf = bio_data(rq->bio);
-		else
-			buf = rq->data;
 
 		drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
 
@@ -858,7 +800,7 @@
 		 */
 		alignment = queue_dma_alignment(q) | q->dma_pad_mask;
 		if ((unsigned long)buf & alignment
-		    || rq->data_len & q->dma_pad_mask
+		    || blk_rq_bytes(rq) & q->dma_pad_mask
 		    || object_is_on_stack(buf))
 			drive->dma = 0;
 	}
@@ -896,6 +838,9 @@
 		goto out_end;
 	}
 
+	/* prepare sense request for this command */
+	ide_prep_sense(drive, rq);
+
 	memset(&cmd, 0, sizeof(cmd));
 
 	if (rq_data_dir(rq))
@@ -903,15 +848,14 @@
 
 	cmd.rq = rq;
 
-	if (blk_fs_request(rq) || rq->data_len) {
-		ide_init_sg_cmd(&cmd, blk_fs_request(rq) ? (rq->nr_sectors << 9)
-							 : rq->data_len);
+	if (blk_fs_request(rq) || blk_rq_bytes(rq)) {
+		ide_init_sg_cmd(&cmd, blk_rq_bytes(rq));
 		ide_map_sg(drive, &cmd);
 	}
 
 	return ide_issue_pc(drive, &cmd);
 out_end:
-	nsectors = rq->hard_nr_sectors;
+	nsectors = blk_rq_sectors(rq);
 
 	if (nsectors == 0)
 		nsectors = 1;
@@ -1077,8 +1021,8 @@
 	/* save a private copy of the TOC capacity for error handling */
 	drive->probed_capacity = toc->capacity * sectors_per_frame;
 
-	blk_queue_hardsect_size(drive->queue,
-				sectors_per_frame << SECTOR_BITS);
+	blk_queue_logical_block_size(drive->queue,
+				     sectors_per_frame << SECTOR_BITS);
 
 	/* first read just the header, so we know how long the TOC is */
 	stat = cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr,
@@ -1394,9 +1338,9 @@
 /* standard prep_rq_fn that builds 10 byte cmds */
 static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq)
 {
-	int hard_sect = queue_hardsect_size(q);
-	long block = (long)rq->hard_sector / (hard_sect >> 9);
-	unsigned long blocks = rq->hard_nr_sectors / (hard_sect >> 9);
+	int hard_sect = queue_logical_block_size(q);
+	long block = (long)blk_rq_pos(rq) / (hard_sect >> 9);
+	unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9);
 
 	memset(rq->cmd, 0, BLK_MAX_CDB);
 
@@ -1599,7 +1543,7 @@
 
 	nslots = ide_cdrom_probe_capabilities(drive);
 
-	blk_queue_hardsect_size(q, CD_FRAMESIZE);
+	blk_queue_logical_block_size(q, CD_FRAMESIZE);
 
 	if (ide_cdrom_register(drive, nslots)) {
 		printk(KERN_ERR PFX "%s: %s failed to register device with the"
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index 1d97101..93a3cf1b 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -87,10 +87,6 @@
 
 	struct atapi_toc *toc;
 
-	/* The result of the last successful request sense command
-	   on this device. */
-	struct request_sense sense_data;
-
 	u8 max_speed;		/* Max speed of the drive. */
 	u8 current_speed;	/* Current speed of the drive. */
 
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 9e47f35..527908f 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -155,6 +155,7 @@
 	.port_ops		= &idecs_port_ops,
 	.host_flags		= IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_pci,
 };
 
 static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
@@ -163,7 +164,7 @@
     struct ide_host *host;
     ide_hwif_t *hwif;
     int i, rc;
-    hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+    struct ide_hw hw, *hws[] = { &hw };
 
     if (!request_region(io, 8, DRV_NAME)) {
 	printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX not free.\n",
@@ -181,10 +182,9 @@
     memset(&hw, 0, sizeof(hw));
     ide_std_init_ports(&hw, io, ctl);
     hw.irq = irq;
-    hw.chipset = ide_pci;
     hw.dev = &handle->dev;
 
-    rc = ide_host_add(&idecs_port_info, hws, &host);
+    rc = ide_host_add(&idecs_port_info, hws, 1, &host);
     if (rc)
 	goto out_release;
 
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index a9fbe2c..6a1de21 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -82,7 +82,7 @@
 					sector_t block)
 {
 	ide_hwif_t *hwif	= drive->hwif;
-	u16 nsectors		= (u16)rq->nr_sectors;
+	u16 nsectors		= (u16)blk_rq_sectors(rq);
 	u8 lba48		= !!(drive->dev_flags & IDE_DFLAG_LBA48);
 	u8 dma			= !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
 	struct ide_cmd		cmd;
@@ -90,7 +90,7 @@
 	ide_startstop_t		rc;
 
 	if ((hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) && lba48 && dma) {
-		if (block + rq->nr_sectors > 1ULL << 28)
+		if (block + blk_rq_sectors(rq) > 1ULL << 28)
 			dma = 0;
 		else
 			lba48 = 0;
@@ -195,9 +195,9 @@
 
 	ledtrig_ide_activity();
 
-	pr_debug("%s: %sing: block=%llu, sectors=%lu, buffer=0x%08lx\n",
+	pr_debug("%s: %sing: block=%llu, sectors=%u, buffer=0x%08lx\n",
 		 drive->name, rq_data_dir(rq) == READ ? "read" : "writ",
-		 (unsigned long long)block, rq->nr_sectors,
+		 (unsigned long long)block, blk_rq_sectors(rq),
 		 (unsigned long)rq->buffer);
 
 	if (hwif->rw_disk)
@@ -302,14 +302,12 @@
 	{ NULL,		NULL }
 };
 
-static void idedisk_check_hpa(ide_drive_t *drive)
+static u64 ide_disk_hpa_get_native_capacity(ide_drive_t *drive, int lba48)
 {
-	unsigned long long capacity, set_max;
-	int lba48 = ata_id_lba48_enabled(drive->id);
+	u64 capacity, set_max;
 
 	capacity = drive->capacity64;
-
-	set_max = idedisk_read_native_max_address(drive, lba48);
+	set_max  = idedisk_read_native_max_address(drive, lba48);
 
 	if (ide_in_drive_list(drive->id, hpa_list)) {
 		/*
@@ -320,9 +318,31 @@
 			set_max--;
 	}
 
+	return set_max;
+}
+
+static u64 ide_disk_hpa_set_capacity(ide_drive_t *drive, u64 set_max, int lba48)
+{
+	set_max = idedisk_set_max_address(drive, set_max, lba48);
+	if (set_max)
+		drive->capacity64 = set_max;
+
+	return set_max;
+}
+
+static void idedisk_check_hpa(ide_drive_t *drive)
+{
+	u64 capacity, set_max;
+	int lba48 = ata_id_lba48_enabled(drive->id);
+
+	capacity = drive->capacity64;
+	set_max  = ide_disk_hpa_get_native_capacity(drive, lba48);
+
 	if (set_max <= capacity)
 		return;
 
+	drive->probed_capacity = set_max;
+
 	printk(KERN_INFO "%s: Host Protected Area detected.\n"
 			 "\tcurrent capacity is %llu sectors (%llu MB)\n"
 			 "\tnative  capacity is %llu sectors (%llu MB)\n",
@@ -330,13 +350,13 @@
 			 capacity, sectors_to_MB(capacity),
 			 set_max, sectors_to_MB(set_max));
 
-	set_max = idedisk_set_max_address(drive, set_max, lba48);
+	if ((drive->dev_flags & IDE_DFLAG_NOHPA) == 0)
+		return;
 
-	if (set_max) {
-		drive->capacity64 = set_max;
+	set_max = ide_disk_hpa_set_capacity(drive, set_max, lba48);
+	if (set_max)
 		printk(KERN_INFO "%s: Host Protected Area disabled.\n",
 				 drive->name);
-	}
 }
 
 static int ide_disk_get_capacity(ide_drive_t *drive)
@@ -358,6 +378,8 @@
 		drive->capacity64 = drive->cyl * drive->head * drive->sect;
 	}
 
+	drive->probed_capacity = drive->capacity64;
+
 	if (lba) {
 		drive->dev_flags |= IDE_DFLAG_LBA;
 
@@ -376,7 +398,7 @@
 		       "%llu sectors (%llu MB)\n",
 		       drive->name, (unsigned long long)drive->capacity64,
 		       sectors_to_MB(drive->capacity64));
-		drive->capacity64 = 1ULL << 28;
+		drive->probed_capacity = drive->capacity64 = 1ULL << 28;
 	}
 
 	if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
@@ -392,6 +414,34 @@
 	return 0;
 }
 
+static u64 ide_disk_set_capacity(ide_drive_t *drive, u64 capacity)
+{
+	u64 set = min(capacity, drive->probed_capacity);
+	u16 *id = drive->id;
+	int lba48 = ata_id_lba48_enabled(id);
+
+	if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 ||
+	    ata_id_hpa_enabled(id) == 0)
+		goto out;
+
+	/*
+	 * according to the spec the SET MAX ADDRESS command shall be
+	 * immediately preceded by a READ NATIVE MAX ADDRESS command
+	 */
+	capacity = ide_disk_hpa_get_native_capacity(drive, lba48);
+	if (capacity == 0)
+		goto out;
+
+	set = ide_disk_hpa_set_capacity(drive, set, lba48);
+	if (set) {
+		/* needed for ->resume to disable HPA */
+		drive->dev_flags |= IDE_DFLAG_NOHPA;
+		return set;
+	}
+out:
+	return drive->capacity64;
+}
+
 static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
 {
 	ide_drive_t *drive = q->queuedata;
@@ -411,7 +461,6 @@
 	cmd->protocol = ATA_PROT_NODATA;
 
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
-	rq->cmd_flags |= REQ_SOFTBARRIER;
 	rq->special = cmd;
 }
 
@@ -429,14 +478,14 @@
 	if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff))
 		return -EINVAL;
 
-	if (drive->special.b.set_multmode)
+	if (drive->special_flags & IDE_SFLAG_SET_MULTMODE)
 		return -EBUSY;
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 
 	drive->mult_req = arg;
-	drive->special.b.set_multmode = 1;
+	drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
 	error = blk_execute_rq(drive->queue, NULL, rq, 0);
 	blk_put_request(rq);
 
@@ -640,7 +689,7 @@
 	}
 
 	printk(KERN_INFO "%s: max request size: %dKiB\n", drive->name,
-		q->max_sectors / 2);
+	       queue_max_sectors(q) / 2);
 
 	if (ata_id_is_ssd(id))
 		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
@@ -741,6 +790,7 @@
 
 const struct ide_disk_ops ide_ata_disk_ops = {
 	.check		= ide_disk_check,
+	.set_capacity	= ide_disk_set_capacity,
 	.get_capacity	= ide_disk_get_capacity,
 	.setup		= ide_disk_setup,
 	.flush		= ide_disk_flush,
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index a0b8cab1..219e6fb 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -103,7 +103,7 @@
 				ide_finish_cmd(drive, cmd, stat);
 			else
 				ide_complete_rq(drive, 0,
-						cmd->rq->nr_sectors << 9);
+						blk_rq_sectors(cmd->rq) << 9);
 			return ide_stopped;
 		}
 		printk(KERN_ERR "%s: %s: bad DMA status (0x%02x)\n",
@@ -347,7 +347,6 @@
 
 	return mode;
 }
-EXPORT_SYMBOL_GPL(ide_find_dma_mode);
 
 static int ide_tune_dma(ide_drive_t *drive)
 {
@@ -510,23 +509,11 @@
 	/*
 	 * un-busy drive etc and make sure request is sane
 	 */
-
 	rq = hwif->rq;
-	if (!rq)
-		goto out;
-
-	hwif->rq = NULL;
-
-	rq->errors = 0;
-
-	if (!rq->bio)
-		goto out;
-
-	rq->sector = rq->bio->bi_sector;
-	rq->current_nr_sectors = bio_iovec(rq->bio)->bv_len >> 9;
-	rq->hard_cur_sectors = rq->current_nr_sectors;
-	rq->buffer = bio_data(rq->bio);
-out:
+	if (rq) {
+		hwif->rq = NULL;
+		rq->errors = 0;
+	}
 	return ret;
 }
 
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 5d5fb96..2b91419 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -52,7 +52,7 @@
 	}
 
 	if ((rq->errors & ERROR_RECAL) == ERROR_RECAL)
-		drive->special.b.recalibrate = 1;
+		drive->special_flags |= IDE_SFLAG_RECALIBRATE;
 
 	++rq->errors;
 
@@ -268,9 +268,8 @@
 {
 	int legacy = (drive->id[ATA_ID_CFS_ENABLE_2] & 0x0400) ? 0 : 1;
 
-	drive->special.all = 0;
-	drive->special.b.set_geometry = legacy;
-	drive->special.b.recalibrate  = legacy;
+	drive->special_flags =
+		legacy ? (IDE_SFLAG_SET_GEOMETRY | IDE_SFLAG_RECALIBRATE) : 0;
 
 	drive->mult_count = 0;
 	drive->dev_flags &= ~IDE_DFLAG_PARKED;
@@ -280,7 +279,7 @@
 		drive->mult_req = 0;
 
 	if (drive->mult_req != drive->mult_count)
-		drive->special.b.set_multmode = 1;
+		drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
 }
 
 static void pre_reset(ide_drive_t *drive)
@@ -408,8 +407,9 @@
 	/* more than enough time */
 	udelay(10);
 	/* clear SRST, leave nIEN (unless device is on the quirk list) */
-	tp_ops->write_devctl(hwif, (drive->quirk_list == 2 ? 0 : ATA_NIEN) |
-			     ATA_DEVCTL_OBS);
+	tp_ops->write_devctl(hwif,
+		((drive->dev_flags & IDE_DFLAG_NIEN_QUIRK) ? 0 : ATA_NIEN) |
+		 ATA_DEVCTL_OBS);
 	/* more than enough time */
 	udelay(10);
 	hwif->poll_timeout = jiffies + WAIT_WORSTCASE;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 2b4868d..6509817 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -134,13 +134,17 @@
 	drive->pc = pc;
 
 	if (pc->retries > IDEFLOPPY_MAX_PC_RETRIES) {
+		unsigned int done = blk_rq_bytes(drive->hwif->rq);
+
 		if (!(pc->flags & PC_FLAG_SUPPRESS_ERROR))
 			ide_floppy_report_error(floppy, pc);
+
 		/* Giving up */
 		pc->error = IDE_DRV_ERROR_GENERAL;
 
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
+		ide_complete_rq(drive, -EIO, done);
 		return ide_stopped;
 	}
 
@@ -190,7 +194,7 @@
 {
 	struct ide_disk_obj *floppy = drive->driver_data;
 	int block = sector / floppy->bs_factor;
-	int blocks = rq->nr_sectors / floppy->bs_factor;
+	int blocks = blk_rq_sectors(rq) / floppy->bs_factor;
 	int cmd = rq_data_dir(rq);
 
 	ide_debug_log(IDE_DBG_FUNC, "block: %d, blocks: %d", block, blocks);
@@ -216,16 +220,14 @@
 	ide_init_pc(pc);
 	memcpy(pc->c, rq->cmd, sizeof(pc->c));
 	pc->rq = rq;
-	if (rq->data_len && rq_data_dir(rq) == WRITE)
-		pc->flags |= PC_FLAG_WRITING;
-	pc->buf = rq->data;
-	if (rq->bio)
+	if (blk_rq_bytes(rq)) {
 		pc->flags |= PC_FLAG_DMA_OK;
-	/*
-	 * possibly problematic, doesn't look like ide-floppy correctly
-	 * handled scattered requests if dma fails...
-	 */
-	pc->req_xfer = pc->buf_size = rq->data_len;
+		if (rq_data_dir(rq) == WRITE)
+			pc->flags |= PC_FLAG_WRITING;
+	}
+	/* pio will be performed by ide_pio_bytes() which handles sg fine */
+	pc->buf = NULL;
+	pc->req_xfer = pc->buf_size = blk_rq_bytes(rq);
 }
 
 static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
@@ -257,16 +259,16 @@
 			goto out_end;
 	}
 	if (blk_fs_request(rq)) {
-		if (((long)rq->sector % floppy->bs_factor) ||
-		    (rq->nr_sectors % floppy->bs_factor)) {
+		if (((long)blk_rq_pos(rq) % floppy->bs_factor) ||
+		    (blk_rq_sectors(rq) % floppy->bs_factor)) {
 			printk(KERN_ERR PFX "%s: unsupported r/w rq size\n",
 				drive->name);
 			goto out_end;
 		}
 		pc = &floppy->queued_pc;
 		idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
-	} else if (blk_special_request(rq)) {
-		pc = (struct ide_atapi_pc *) rq->buffer;
+	} else if (blk_special_request(rq) || blk_sense_request(rq)) {
+		pc = (struct ide_atapi_pc *)rq->special;
 	} else if (blk_pc_request(rq)) {
 		pc = &floppy->queued_pc;
 		idefloppy_blockpc_cmd(floppy, pc, rq);
@@ -275,6 +277,8 @@
 		goto out_end;
 	}
 
+	ide_prep_sense(drive, rq);
+
 	memset(&cmd, 0, sizeof(cmd));
 
 	if (rq_data_dir(rq))
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index 4b6b71e..2141190 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -287,6 +287,19 @@
 	return ret;
 }
 
+static unsigned long long ide_gd_set_capacity(struct gendisk *disk,
+					      unsigned long long capacity)
+{
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+	const struct ide_disk_ops *disk_ops = drive->disk_ops;
+
+	if (disk_ops->set_capacity)
+		return disk_ops->set_capacity(drive, capacity);
+
+	return drive->capacity64;
+}
+
 static int ide_gd_revalidate_disk(struct gendisk *disk)
 {
 	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
@@ -315,6 +328,7 @@
 	.locked_ioctl		= ide_gd_ioctl,
 	.getgeo			= ide_gd_getgeo,
 	.media_changed		= ide_gd_media_changed,
+	.set_capacity		= ide_gd_set_capacity,
 	.revalidate_disk	= ide_gd_revalidate_disk
 };
 
diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c
index 7812ca0..54d7c46 100644
--- a/drivers/ide/ide-generic.c
+++ b/drivers/ide/ide-generic.c
@@ -29,6 +29,7 @@
 
 static const struct ide_port_info ide_generic_port_info = {
 	.host_flags		= IDE_HFLAG_NO_DMA,
+	.chipset		= ide_generic,
 };
 
 #ifdef CONFIG_ARM
@@ -85,7 +86,7 @@
 
 static int __init ide_generic_init(void)
 {
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw };
 	unsigned long io_addr;
 	int i, rc = 0, primary = 0, secondary = 0;
 
@@ -132,9 +133,7 @@
 #else
 			hw.irq = legacy_irqs[i];
 #endif
-			hw.chipset = ide_generic;
-
-			rc = ide_host_add(&ide_generic_port_info, hws, NULL);
+			rc = ide_host_add(&ide_generic_port_info, hws, 1, NULL);
 			if (rc) {
 				release_region(io_addr + 0x206, 1);
 				release_region(io_addr, 8);
diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c
index c06ebdc..520f42c 100644
--- a/drivers/ide/ide-h8300.c
+++ b/drivers/ide/ide-h8300.c
@@ -64,26 +64,26 @@
 
 #define H8300_IDE_GAP (2)
 
-static inline void hw_setup(hw_regs_t *hw)
+static inline void hw_setup(struct ide_hw *hw)
 {
 	int i;
 
-	memset(hw, 0, sizeof(hw_regs_t));
+	memset(hw, 0, sizeof(*hw));
 	for (i = 0; i <= 7; i++)
 		hw->io_ports_array[i] = CONFIG_H8300_IDE_BASE + H8300_IDE_GAP*i;
 	hw->io_ports.ctl_addr = CONFIG_H8300_IDE_ALT;
 	hw->irq = EXT_IRQ0 + CONFIG_H8300_IDE_IRQ;
-	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info h8300_port_info = {
 	.tp_ops			= &h8300_tp_ops,
 	.host_flags		= IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_NO_DMA,
+	.chipset		= ide_generic,
 };
 
 static int __init h8300_ide_init(void)
 {
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	printk(KERN_INFO DRV_NAME ": H8/300 generic IDE interface\n");
 
@@ -96,7 +96,7 @@
 
 	hw_setup(&hw);
 
-	return ide_host_add(&h8300_port_info, hws, NULL);
+	return ide_host_add(&h8300_port_info, hws, 1, NULL);
 
 out_busy:
 	printk(KERN_ERR "ide-h8300: IDE I/F resource already used.\n");
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 6415a2e..272cc38 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -116,9 +116,9 @@
 unsigned int ide_rq_bytes(struct request *rq)
 {
 	if (blk_pc_request(rq))
-		return rq->data_len;
+		return blk_rq_bytes(rq);
 	else
-		return rq->hard_cur_sectors << 9;
+		return blk_rq_cur_sectors(rq) << 9;
 }
 EXPORT_SYMBOL_GPL(ide_rq_bytes);
 
@@ -133,7 +133,7 @@
 	 * and complete the whole request right now
 	 */
 	if (blk_noretry_request(rq) && error <= 0)
-		nr_bytes = rq->hard_nr_sectors << 9;
+		nr_bytes = blk_rq_sectors(rq) << 9;
 
 	rc = ide_end_rq(drive, rq, error, nr_bytes);
 	if (rc == 0)
@@ -184,29 +184,42 @@
 	tf->command = ATA_CMD_SET_MULTI;
 }
 
-static ide_startstop_t ide_disk_special(ide_drive_t *drive)
+/**
+ *	do_special		-	issue some special commands
+ *	@drive: drive the command is for
+ *
+ *	do_special() is used to issue ATA_CMD_INIT_DEV_PARAMS,
+ *	ATA_CMD_RESTORE and ATA_CMD_SET_MULTI commands to a drive.
+ */
+
+static ide_startstop_t do_special(ide_drive_t *drive)
 {
-	special_t *s = &drive->special;
 	struct ide_cmd cmd;
 
+#ifdef DEBUG
+	printk(KERN_DEBUG "%s: %s: 0x%02x\n", drive->name, __func__,
+		drive->special_flags);
+#endif
+	if (drive->media != ide_disk) {
+		drive->special_flags = 0;
+		drive->mult_req = 0;
+		return ide_stopped;
+	}
+
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.protocol = ATA_PROT_NODATA;
 
-	if (s->b.set_geometry) {
-		s->b.set_geometry = 0;
+	if (drive->special_flags & IDE_SFLAG_SET_GEOMETRY) {
+		drive->special_flags &= ~IDE_SFLAG_SET_GEOMETRY;
 		ide_tf_set_specify_cmd(drive, &cmd.tf);
-	} else if (s->b.recalibrate) {
-		s->b.recalibrate = 0;
+	} else if (drive->special_flags & IDE_SFLAG_RECALIBRATE) {
+		drive->special_flags &= ~IDE_SFLAG_RECALIBRATE;
 		ide_tf_set_restore_cmd(drive, &cmd.tf);
-	} else if (s->b.set_multmode) {
-		s->b.set_multmode = 0;
+	} else if (drive->special_flags & IDE_SFLAG_SET_MULTMODE) {
+		drive->special_flags &= ~IDE_SFLAG_SET_MULTMODE;
 		ide_tf_set_setmult_cmd(drive, &cmd.tf);
-	} else if (s->all) {
-		int special = s->all;
-		s->all = 0;
-		printk(KERN_ERR "%s: bad special flag: 0x%02x\n", drive->name, special);
-		return ide_stopped;
-	}
+	} else
+		BUG();
 
 	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
 	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
@@ -217,45 +230,13 @@
 	return ide_started;
 }
 
-/**
- *	do_special		-	issue some special commands
- *	@drive: drive the command is for
- *
- *	do_special() is used to issue ATA_CMD_INIT_DEV_PARAMS,
- *	ATA_CMD_RESTORE and ATA_CMD_SET_MULTI commands to a drive.
- *
- *	It used to do much more, but has been scaled back.
- */
-
-static ide_startstop_t do_special (ide_drive_t *drive)
-{
-	special_t *s = &drive->special;
-
-#ifdef DEBUG
-	printk("%s: do_special: 0x%02x\n", drive->name, s->all);
-#endif
-	if (drive->media == ide_disk)
-		return ide_disk_special(drive);
-
-	s->all = 0;
-	drive->mult_req = 0;
-	return ide_stopped;
-}
-
 void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	struct scatterlist *sg = hwif->sg_table;
 	struct request *rq = cmd->rq;
 
-	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
-		sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE);
-		cmd->sg_nents = 1;
-	} else if (!rq->bio) {
-		sg_init_one(sg, rq->data, rq->data_len);
-		cmd->sg_nents = 1;
-	} else
-		cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
+	cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
 }
 EXPORT_SYMBOL_GPL(ide_map_sg);
 
@@ -286,7 +267,7 @@
 
 	if (cmd) {
 		if (cmd->protocol == ATA_PROT_PIO) {
-			ide_init_sg_cmd(cmd, rq->nr_sectors << 9);
+			ide_init_sg_cmd(cmd, blk_rq_sectors(rq) << 9);
 			ide_map_sg(drive, cmd);
 		}
 
@@ -358,7 +339,8 @@
 		printk(KERN_ERR "%s: drive not ready for command\n", drive->name);
 		return startstop;
 	}
-	if (!drive->special.all) {
+
+	if (drive->special_flags == 0) {
 		struct ide_driver *drv;
 
 		/*
@@ -371,7 +353,7 @@
 		if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
 			return execute_drive_cmd(drive, rq);
 		else if (blk_pm_request(rq)) {
-			struct request_pm_state *pm = rq->data;
+			struct request_pm_state *pm = rq->special;
 #ifdef DEBUG_PM
 			printk("%s: start_power_step(step: %d)\n",
 				drive->name, pm->pm_step);
@@ -394,7 +376,7 @@
 
 		drv = *(struct ide_driver **)rq->rq_disk->private_data;
 
-		return drv->do_request(drive, rq, rq->sector);
+		return drv->do_request(drive, rq, blk_rq_pos(rq));
 	}
 	return do_special(drive);
 kill_rq:
@@ -484,6 +466,9 @@
 
 	spin_unlock_irq(q->queue_lock);
 
+	/* HLD do_request() callback might sleep, make sure it's okay */
+	might_sleep();
+
 	if (ide_lock_host(host, hwif))
 		goto plug_device_2;
 
@@ -491,10 +476,10 @@
 
 	if (!ide_lock_port(hwif)) {
 		ide_hwif_t *prev_port;
+
+		WARN_ON_ONCE(hwif->rq);
 repeat:
 		prev_port = hwif->host->cur_port;
-		hwif->rq = NULL;
-
 		if (drive->dev_flags & IDE_DFLAG_SLEEPING &&
 		    time_after(drive->sleep, jiffies)) {
 			ide_unlock_port(hwif);
@@ -503,11 +488,15 @@
 
 		if ((hwif->host->host_flags & IDE_HFLAG_SERIALIZE) &&
 		    hwif != prev_port) {
+			ide_drive_t *cur_dev =
+				prev_port ? prev_port->cur_dev : NULL;
+
 			/*
 			 * set nIEN for previous port, drives in the
-			 * quirk_list may not like intr setups/cleanups
+			 * quirk list may not like intr setups/cleanups
 			 */
-			if (prev_port && prev_port->cur_dev->quirk_list == 0)
+			if (cur_dev &&
+			    (cur_dev->dev_flags & IDE_DFLAG_NIEN_QUIRK) == 0)
 				prev_port->tp_ops->write_devctl(prev_port,
 								ATA_NIEN |
 								ATA_DEVCTL_OBS);
@@ -523,7 +512,9 @@
 		 * we know that the queue isn't empty, but this can happen
 		 * if the q->prep_rq_fn() decides to kill a request
 		 */
-		rq = elv_next_request(drive->queue);
+		if (!rq)
+			rq = blk_fetch_request(drive->queue);
+
 		spin_unlock_irq(q->queue_lock);
 		spin_lock_irq(&hwif->lock);
 
@@ -535,7 +526,7 @@
 		/*
 		 * Sanity: don't accept a request that isn't a PM request
 		 * if we are currently power managed. This is very important as
-		 * blk_stop_queue() doesn't prevent the elv_next_request()
+		 * blk_stop_queue() doesn't prevent the blk_fetch_request()
 		 * above to return us whatever is in the queue. Since we call
 		 * ide_do_request() ourselves, we end up taking requests while
 		 * the queue is blocked...
@@ -559,8 +550,11 @@
 		startstop = start_request(drive, rq);
 		spin_lock_irq(&hwif->lock);
 
-		if (startstop == ide_stopped)
+		if (startstop == ide_stopped) {
+			rq = hwif->rq;
+			hwif->rq = NULL;
 			goto repeat;
+		}
 	} else
 		goto plug_device;
 out:
@@ -576,18 +570,24 @@
 plug_device_2:
 	spin_lock_irq(q->queue_lock);
 
+	if (rq)
+		blk_requeue_request(q, rq);
 	if (!elv_queue_empty(q))
 		blk_plug_device(q);
 }
 
-static void ide_plug_device(ide_drive_t *drive)
+static void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq)
 {
 	struct request_queue *q = drive->queue;
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
+
+	if (rq)
+		blk_requeue_request(q, rq);
 	if (!elv_queue_empty(q))
 		blk_plug_device(q);
+
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
@@ -636,6 +636,7 @@
 	unsigned long	flags;
 	int		wait = -1;
 	int		plug_device = 0;
+	struct request	*uninitialized_var(rq_in_flight);
 
 	spin_lock_irqsave(&hwif->lock, flags);
 
@@ -697,6 +698,8 @@
 		spin_lock_irq(&hwif->lock);
 		enable_irq(hwif->irq);
 		if (startstop == ide_stopped && hwif->polling == 0) {
+			rq_in_flight = hwif->rq;
+			hwif->rq = NULL;
 			ide_unlock_port(hwif);
 			plug_device = 1;
 		}
@@ -705,7 +708,7 @@
 
 	if (plug_device) {
 		ide_unlock_host(hwif->host);
-		ide_plug_device(drive);
+		ide_requeue_and_plug(drive, rq_in_flight);
 	}
 }
 
@@ -791,6 +794,7 @@
 	ide_startstop_t startstop;
 	irqreturn_t irq_ret = IRQ_NONE;
 	int plug_device = 0;
+	struct request *uninitialized_var(rq_in_flight);
 
 	if (host->host_flags & IDE_HFLAG_SERIALIZE) {
 		if (hwif != host->cur_port)
@@ -870,6 +874,8 @@
 	 */
 	if (startstop == ide_stopped && hwif->polling == 0) {
 		BUG_ON(hwif->handler);
+		rq_in_flight = hwif->rq;
+		hwif->rq = NULL;
 		ide_unlock_port(hwif);
 		plug_device = 1;
 	}
@@ -879,7 +885,7 @@
 out_early:
 	if (plug_device) {
 		ide_unlock_host(hwif->host);
-		ide_plug_device(drive);
+		ide_requeue_and_plug(drive, rq_in_flight);
 	}
 
 	return irq_ret;
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index c1c25eb..5991b23 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -231,7 +231,6 @@
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd_len = 1;
 	rq->cmd[0] = REQ_DRIVE_RESET;
-	rq->cmd_flags |= REQ_SOFTBARRIER;
 	if (blk_execute_rq(drive->queue, NULL, rq, 1))
 		ret = rq->errors;
 	blk_put_request(rq);
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 06fe002..fa04715 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -282,6 +282,29 @@
 	return 0;
 }
 
+static const char *nien_quirk_list[] = {
+	"QUANTUM FIREBALLlct08 08",
+	"QUANTUM FIREBALLP KA6.4",
+	"QUANTUM FIREBALLP KA9.1",
+	"QUANTUM FIREBALLP KX13.6",
+	"QUANTUM FIREBALLP KX20.5",
+	"QUANTUM FIREBALLP KX27.3",
+	"QUANTUM FIREBALLP LM20.4",
+	"QUANTUM FIREBALLP LM20.5",
+	NULL
+};
+
+void ide_check_nien_quirk_list(ide_drive_t *drive)
+{
+	const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
+
+	for (list = nien_quirk_list; *list != NULL; list++)
+		if (strstr(m, *list) != NULL) {
+			drive->dev_flags |= IDE_DFLAG_NIEN_QUIRK;
+			return;
+		}
+}
+
 int ide_driveid_update(ide_drive_t *drive)
 {
 	u16 *id;
@@ -311,7 +334,6 @@
 
 	return 1;
 out_err:
-	SELECT_MASK(drive, 0);
 	if (rc == 2)
 		printk(KERN_ERR "%s: %s: bad status\n", drive->name, __func__);
 	kfree(id);
@@ -365,7 +387,7 @@
 
 	tp_ops->exec_command(hwif, ATA_CMD_SET_FEATURES);
 
-	if (drive->quirk_list == 2)
+	if (drive->dev_flags & IDE_DFLAG_NIEN_QUIRK)
 		tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
 
 	error = __ide_wait_stat(drive, drive->ready_stat,
diff --git a/drivers/ide/ide-legacy.c b/drivers/ide/ide-legacy.c
index 8c5dcbf..b9654a7 100644
--- a/drivers/ide/ide-legacy.c
+++ b/drivers/ide/ide-legacy.c
@@ -1,7 +1,7 @@
 #include <linux/kernel.h>
 #include <linux/ide.h>
 
-static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw,
+static void ide_legacy_init_one(struct ide_hw **hws, struct ide_hw *hw,
 				u8 port_no, const struct ide_port_info *d,
 				unsigned long config)
 {
@@ -33,7 +33,6 @@
 
 	ide_std_init_ports(hw, base, ctl);
 	hw->irq = irq;
-	hw->chipset = d->chipset;
 	hw->config = config;
 
 	hws[port_no] = hw;
@@ -41,7 +40,7 @@
 
 int ide_legacy_device_add(const struct ide_port_info *d, unsigned long config)
 {
-	hw_regs_t hw[2], *hws[] = { NULL, NULL, NULL, NULL };
+	struct ide_hw hw[2], *hws[] = { NULL, NULL };
 
 	memset(&hw, 0, sizeof(hw));
 
@@ -53,6 +52,6 @@
 	    (d->host_flags & IDE_HFLAG_SINGLE))
 		return -ENOENT;
 
-	return ide_host_add(d, hws, NULL);
+	return ide_host_add(d, hws, 2, NULL);
 }
 EXPORT_SYMBOL_GPL(ide_legacy_device_add);
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index 2148df8..e386a32 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -96,7 +96,7 @@
 
 		if (rq)
 			printk(KERN_CONT ", sector=%llu",
-			       (unsigned long long)rq->sector);
+			       (unsigned long long)blk_rq_pos(rq));
 	}
 	printk(KERN_CONT "\n");
 }
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 310d03f..a914023 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -24,11 +24,8 @@
 			start_queue = 1;
 		spin_unlock_irq(&hwif->lock);
 
-		if (start_queue) {
-			spin_lock_irq(q->queue_lock);
-			blk_start_queueing(q);
-			spin_unlock_irq(q->queue_lock);
-		}
+		if (start_queue)
+			blk_run_queue(q);
 		return;
 	}
 	spin_unlock_irq(&hwif->lock);
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 0d8a151..ba1488b 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -7,7 +7,6 @@
 	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq;
 	struct request_pm_state rqpm;
-	struct ide_cmd cmd;
 	int ret;
 
 	/* call ACPI _GTM only once */
@@ -15,11 +14,9 @@
 		ide_acpi_get_timing(hwif);
 
 	memset(&rqpm, 0, sizeof(rqpm));
-	memset(&cmd, 0, sizeof(cmd));
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_PM_SUSPEND;
-	rq->special = &cmd;
-	rq->data = &rqpm;
+	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_SUSPEND;
 	if (mesg.event == PM_EVENT_PRETHAW)
 		mesg.event = PM_EVENT_FREEZE;
@@ -41,7 +38,6 @@
 	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq;
 	struct request_pm_state rqpm;
-	struct ide_cmd cmd;
 	int err;
 
 	/* call ACPI _PS0 / _STM only once */
@@ -53,12 +49,10 @@
 	ide_acpi_exec_tfs(drive);
 
 	memset(&rqpm, 0, sizeof(rqpm));
-	memset(&cmd, 0, sizeof(cmd));
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_PM_RESUME;
 	rq->cmd_flags |= REQ_PREEMPT;
-	rq->special = &cmd;
-	rq->data = &rqpm;
+	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_RESUME;
 	rqpm.pm_state = PM_EVENT_ON;
 
@@ -77,7 +71,7 @@
 
 void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
 {
-	struct request_pm_state *pm = rq->data;
+	struct request_pm_state *pm = rq->special;
 
 #ifdef DEBUG_PM
 	printk(KERN_INFO "%s: complete_power_step(step: %d)\n",
@@ -107,10 +101,8 @@
 
 ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
 {
-	struct request_pm_state *pm = rq->data;
-	struct ide_cmd *cmd = rq->special;
-
-	memset(cmd, 0, sizeof(*cmd));
+	struct request_pm_state *pm = rq->special;
+	struct ide_cmd cmd = { };
 
 	switch (pm->pm_step) {
 	case IDE_PM_FLUSH_CACHE:	/* Suspend step 1 (flush cache) */
@@ -123,12 +115,12 @@
 			return ide_stopped;
 		}
 		if (ata_id_flush_ext_enabled(drive->id))
-			cmd->tf.command = ATA_CMD_FLUSH_EXT;
+			cmd.tf.command = ATA_CMD_FLUSH_EXT;
 		else
-			cmd->tf.command = ATA_CMD_FLUSH;
+			cmd.tf.command = ATA_CMD_FLUSH;
 		goto out_do_tf;
 	case IDE_PM_STANDBY:		/* Suspend step 2 (standby) */
-		cmd->tf.command = ATA_CMD_STANDBYNOW1;
+		cmd.tf.command = ATA_CMD_STANDBYNOW1;
 		goto out_do_tf;
 	case IDE_PM_RESTORE_PIO:	/* Resume step 1 (restore PIO) */
 		ide_set_max_pio(drive);
@@ -141,7 +133,7 @@
 			ide_complete_power_step(drive, rq);
 		return ide_stopped;
 	case IDE_PM_IDLE:		/* Resume step 2 (idle) */
-		cmd->tf.command = ATA_CMD_IDLEIMMEDIATE;
+		cmd.tf.command = ATA_CMD_IDLEIMMEDIATE;
 		goto out_do_tf;
 	case IDE_PM_RESTORE_DMA:	/* Resume step 3 (restore DMA) */
 		/*
@@ -163,11 +155,11 @@
 	return ide_stopped;
 
 out_do_tf:
-	cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd->valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-	cmd->protocol = ATA_PROT_NODATA;
+	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
+	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
+	cmd.protocol = ATA_PROT_NODATA;
 
-	return do_rw_taskfile(drive, cmd);
+	return do_rw_taskfile(drive, &cmd);
 }
 
 /**
@@ -181,7 +173,7 @@
 void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
 {
 	struct request_queue *q = drive->queue;
-	struct request_pm_state *pm = rq->data;
+	struct request_pm_state *pm = rq->special;
 	unsigned long flags;
 
 	ide_complete_power_step(drive, rq);
@@ -207,7 +199,7 @@
 
 void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
 {
-	struct request_pm_state *pm = rq->data;
+	struct request_pm_state *pm = rq->special;
 
 	if (blk_pm_suspend_request(rq) &&
 	    pm->pm_step == IDE_PM_START_SUSPEND)
diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c
index 6e80b77..017b1df 100644
--- a/drivers/ide/ide-pnp.c
+++ b/drivers/ide/ide-pnp.c
@@ -29,6 +29,7 @@
 
 static const struct ide_port_info ide_pnp_port_info = {
 	.host_flags		= IDE_HFLAG_NO_DMA,
+	.chipset		= ide_generic,
 };
 
 static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
@@ -36,7 +37,7 @@
 	struct ide_host *host;
 	unsigned long base, ctl;
 	int rc;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	printk(KERN_INFO DRV_NAME ": generic PnP IDE interface\n");
 
@@ -62,9 +63,8 @@
 	memset(&hw, 0, sizeof(hw));
 	ide_std_init_ports(&hw, base, ctl);
 	hw.irq = pnp_irq(dev, 0);
-	hw.chipset = ide_generic;
 
-	rc = ide_host_add(&ide_pnp_port_info, hws, &host);
+	rc = ide_host_add(&ide_pnp_port_info, hws, 1, &host);
 	if (rc)
 		goto out;
 
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index c895ed5..f371b0d 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -97,7 +97,7 @@
 		drive->mult_req = id[ATA_ID_MULTSECT] & 0xff;
 
 		if (drive->mult_req)
-			drive->special.b.set_multmode = 1;
+			drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
 	}
 }
 
@@ -465,23 +465,8 @@
 	int rc;
 	u8 cmd;
 
-	/*
-	 *	In order to keep things simple we have an id
-	 *	block for all drives at all times. If the device
-	 *	is pre ATA or refuses ATA/ATAPI identify we
-	 *	will add faked data to this.
-	 *
-	 *	Also note that 0 everywhere means "can't do X"
-	 */
- 
 	drive->dev_flags &= ~IDE_DFLAG_ID_READ;
 
-	drive->id = kzalloc(SECTOR_SIZE, GFP_KERNEL);
-	if (drive->id == NULL) {
-		printk(KERN_ERR "ide: out of memory for id data.\n");
-		return 0;
-	}
-
 	m = (char *)&drive->id[ATA_ID_PROD];
 	strcpy(m, "UNKNOWN");
 
@@ -497,7 +482,7 @@
 		}
 
 		if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
-			goto out_free;
+			return 0;
 
 		/* identification failed? */
 		if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0) {
@@ -521,7 +506,7 @@
 	}
 
 	if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
-		goto out_free;
+		return 0;
 
 	/* The drive wasn't being helpful. Add generic info only */
 	if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0) {
@@ -535,9 +520,6 @@
 	}
 
 	return 1;
-out_free:
-	kfree(drive->id);
-	return 0;
 }
 
 static void hwif_release_dev(struct device *dev)
@@ -702,8 +684,14 @@
 	if (irqd)
 		disable_irq(hwif->irq);
 
-	if (ide_port_wait_ready(hwif) == -EBUSY)
-		printk(KERN_DEBUG "%s: Wait for ready failed before probe !\n", hwif->name);
+	rc = ide_port_wait_ready(hwif);
+	if (rc == -ENODEV) {
+		printk(KERN_INFO "%s: no devices on the port\n", hwif->name);
+		goto out;
+	} else if (rc == -EBUSY)
+		printk(KERN_ERR "%s: not ready before the probe\n", hwif->name);
+	else
+		rc = -ENODEV;
 
 	/*
 	 * Second drive should only exist if first drive was found,
@@ -714,7 +702,7 @@
 		if (drive->dev_flags & IDE_DFLAG_PRESENT)
 			rc = 0;
 	}
-
+out:
 	/*
 	 * Use cached IRQ number. It might be (and is...) changed by probe
 	 * code above
@@ -732,6 +720,8 @@
 	int i;
 
 	ide_port_for_each_present_dev(i, drive, hwif) {
+		ide_check_nien_quirk_list(drive);
+
 		if (port_ops && port_ops->quirkproc)
 			port_ops->quirkproc(drive);
 	}
@@ -817,8 +807,6 @@
 		if (ide_init_queue(drive)) {
 			printk(KERN_ERR "ide: failed to init %s\n",
 					drive->name);
-			kfree(drive->id);
-			drive->id = NULL;
 			drive->dev_flags &= ~IDE_DFLAG_PRESENT;
 			continue;
 		}
@@ -947,9 +935,6 @@
 	blk_cleanup_queue(drive->queue);
 	drive->queue = NULL;
 
-	kfree(drive->id);
-	drive->id = NULL;
-
 	drive->dev_flags &= ~IDE_DFLAG_PRESENT;
 
 	complete(&drive->gendev_rel_comp);
@@ -1035,6 +1020,15 @@
 		if (port_ops && port_ops->init_dev)
 			port_ops->init_dev(drive);
 	}
+
+	ide_port_for_each_dev(i, drive, hwif) {
+		/*
+		 * default to PIO Mode 0 before we figure out
+		 * the most suited mode for the attached device
+		 */
+		if (port_ops && port_ops->set_pio_mode)
+			port_ops->set_pio_mode(drive, 0);
+	}
 }
 
 static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
@@ -1042,8 +1036,7 @@
 {
 	hwif->channel = port;
 
-	if (d->chipset)
-		hwif->chipset = d->chipset;
+	hwif->chipset = d->chipset ? d->chipset : ide_pci;
 
 	if (d->init_iops)
 		d->init_iops(hwif);
@@ -1124,16 +1117,19 @@
 
 	ide_port_for_each_dev(i, drive, hwif) {
 		u8 j = (hwif->index * MAX_DRIVES) + i;
+		u16 *saved_id = drive->id;
 
 		memset(drive, 0, sizeof(*drive));
+		memset(saved_id, 0, SECTOR_SIZE);
+		drive->id = saved_id;
 
 		drive->media			= ide_disk;
 		drive->select			= (i << 4) | ATA_DEVICE_OBS;
 		drive->hwif			= hwif;
 		drive->ready_stat		= ATA_DRDY;
 		drive->bad_wstat		= BAD_W_STAT;
-		drive->special.b.recalibrate	= 1;
-		drive->special.b.set_geometry	= 1;
+		drive->special_flags		= IDE_SFLAG_RECALIBRATE |
+						  IDE_SFLAG_SET_GEOMETRY;
 		drive->name[0]			= 'h';
 		drive->name[1]			= 'd';
 		drive->name[2]			= 'a' + j;
@@ -1168,11 +1164,10 @@
 	ide_port_init_devices_data(hwif);
 }
 
-static void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw)
+static void ide_init_port_hw(ide_hwif_t *hwif, struct ide_hw *hw)
 {
 	memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports));
 	hwif->irq = hw->irq;
-	hwif->chipset = hw->chipset;
 	hwif->dev = hw->dev;
 	hwif->gendev.parent = hw->parent ? hw->parent : hw->dev;
 	hwif->ack_intr = hw->ack_intr;
@@ -1233,8 +1228,10 @@
 	ide_drive_t *drive;
 	int i;
 
-	ide_port_for_each_dev(i, drive, hwif)
+	ide_port_for_each_dev(i, drive, hwif) {
+		kfree(drive->id);
 		kfree(drive);
+	}
 }
 
 static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
@@ -1248,6 +1245,18 @@
 		if (drive == NULL)
 			goto out_nomem;
 
+		/*
+		 * In order to keep things simple we have an id
+		 * block for all drives at all times. If the device
+		 * is pre ATA or refuses ATA/ATAPI identify we
+		 * will add faked data to this.
+		 *
+		 * Also note that 0 everywhere means "can't do X"
+		 */
+		drive->id = kzalloc_node(SECTOR_SIZE, GFP_KERNEL, node);
+		if (drive->id == NULL)
+			goto out_nomem;
+
 		hwif->devices[i] = drive;
 	}
 	return 0;
@@ -1257,7 +1266,8 @@
 	return -ENOMEM;
 }
 
-struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws)
+struct ide_host *ide_host_alloc(const struct ide_port_info *d,
+				struct ide_hw **hws, unsigned int n_ports)
 {
 	struct ide_host *host;
 	struct device *dev = hws[0] ? hws[0]->dev : NULL;
@@ -1268,7 +1278,7 @@
 	if (host == NULL)
 		return NULL;
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
+	for (i = 0; i < n_ports; i++) {
 		ide_hwif_t *hwif;
 		int idx;
 
@@ -1288,6 +1298,7 @@
 		if (idx < 0) {
 			printk(KERN_ERR "%s: no free slot for interface\n",
 					d ? d->name : "ide");
+			ide_port_free_devices(hwif);
 			kfree(hwif);
 			continue;
 		}
@@ -1344,7 +1355,7 @@
 }
 
 int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
-		      hw_regs_t **hws)
+		      struct ide_hw **hws)
 {
 	ide_hwif_t *hwif, *mate = NULL;
 	int i, j = 0;
@@ -1438,13 +1449,13 @@
 }
 EXPORT_SYMBOL_GPL(ide_host_register);
 
-int ide_host_add(const struct ide_port_info *d, hw_regs_t **hws,
-		 struct ide_host **hostp)
+int ide_host_add(const struct ide_port_info *d, struct ide_hw **hws,
+		 unsigned int n_ports, struct ide_host **hostp)
 {
 	struct ide_host *host;
 	int rc;
 
-	host = ide_host_alloc(d, hws);
+	host = ide_host_alloc(d, hws, n_ports);
 	if (host == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 3a53e08..4b447a8 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -131,13 +131,6 @@
 	IDETAPE_DIR_WRITE = (1 << 2),
 };
 
-struct idetape_bh {
-	u32 b_size;
-	atomic_t b_count;
-	struct idetape_bh *b_reqnext;
-	char *b_data;
-};
-
 /* Tape door status */
 #define DOOR_UNLOCKED			0
 #define DOOR_LOCKED			1
@@ -219,18 +212,12 @@
 
 	/* Data buffer size chosen based on the tape's recommendation */
 	int buffer_size;
-	/* merge buffer */
-	struct idetape_bh *merge_bh;
-	/* size of the merge buffer */
-	int merge_bh_size;
-	/* pointer to current buffer head within the merge buffer */
-	struct idetape_bh *bh;
-	char *b_data;
-	int b_count;
-
-	int pages_per_buffer;
-	/* Wasted space in each stage */
-	int excess_bh_size;
+	/* Staging buffer of buffer_size bytes */
+	void *buf;
+	/* The read/write cursor */
+	void *cur;
+	/* The number of valid bytes in buf */
+	size_t valid;
 
 	/* Measures average tape speed */
 	unsigned long avg_time;
@@ -253,18 +240,27 @@
 
 static void ide_tape_release(struct device *);
 
-static struct ide_tape_obj *ide_tape_get(struct gendisk *disk)
+static struct ide_tape_obj *idetape_devs[MAX_HWIFS * MAX_DRIVES];
+
+static struct ide_tape_obj *ide_tape_get(struct gendisk *disk, bool cdev,
+					 unsigned int i)
 {
 	struct ide_tape_obj *tape = NULL;
 
 	mutex_lock(&idetape_ref_mutex);
-	tape = ide_drv_g(disk, ide_tape_obj);
+
+	if (cdev)
+		tape = idetape_devs[i];
+	else
+		tape = ide_drv_g(disk, ide_tape_obj);
+
 	if (tape) {
 		if (ide_device_get(tape->drive))
 			tape = NULL;
 		else
 			get_device(&tape->dev);
 	}
+
 	mutex_unlock(&idetape_ref_mutex);
 	return tape;
 }
@@ -280,102 +276,6 @@
 }
 
 /*
- * The variables below are used for the character device interface. Additional
- * state variables are defined in our ide_drive_t structure.
- */
-static struct ide_tape_obj *idetape_devs[MAX_HWIFS * MAX_DRIVES];
-
-static struct ide_tape_obj *ide_tape_chrdev_get(unsigned int i)
-{
-	struct ide_tape_obj *tape = NULL;
-
-	mutex_lock(&idetape_ref_mutex);
-	tape = idetape_devs[i];
-	if (tape)
-		get_device(&tape->dev);
-	mutex_unlock(&idetape_ref_mutex);
-	return tape;
-}
-
-static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-				  unsigned int bcount)
-{
-	struct idetape_bh *bh = pc->bh;
-	int count;
-
-	while (bcount) {
-		if (bh == NULL)
-			break;
-		count = min(
-			(unsigned int)(bh->b_size - atomic_read(&bh->b_count)),
-			bcount);
-		drive->hwif->tp_ops->input_data(drive, NULL, bh->b_data +
-					atomic_read(&bh->b_count), count);
-		bcount -= count;
-		atomic_add(count, &bh->b_count);
-		if (atomic_read(&bh->b_count) == bh->b_size) {
-			bh = bh->b_reqnext;
-			if (bh)
-				atomic_set(&bh->b_count, 0);
-		}
-	}
-
-	pc->bh = bh;
-
-	return bcount;
-}
-
-static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-				   unsigned int bcount)
-{
-	struct idetape_bh *bh = pc->bh;
-	int count;
-
-	while (bcount) {
-		if (bh == NULL)
-			break;
-		count = min((unsigned int)pc->b_count, (unsigned int)bcount);
-		drive->hwif->tp_ops->output_data(drive, NULL, pc->b_data, count);
-		bcount -= count;
-		pc->b_data += count;
-		pc->b_count -= count;
-		if (!pc->b_count) {
-			bh = bh->b_reqnext;
-			pc->bh = bh;
-			if (bh) {
-				pc->b_data = bh->b_data;
-				pc->b_count = atomic_read(&bh->b_count);
-			}
-		}
-	}
-
-	return bcount;
-}
-
-static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc)
-{
-	struct idetape_bh *bh = pc->bh;
-	int count;
-	unsigned int bcount = pc->xferred;
-
-	if (pc->flags & PC_FLAG_WRITING)
-		return;
-	while (bcount) {
-		if (bh == NULL) {
-			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
-					__func__);
-			return;
-		}
-		count = min((unsigned int)bh->b_size, (unsigned int)bcount);
-		atomic_set(&bh->b_count, count);
-		if (atomic_read(&bh->b_count) == bh->b_size)
-			bh = bh->b_reqnext;
-		bcount -= count;
-	}
-	pc->bh = bh;
-}
-
-/*
  * called on each failed packet command retry to analyze the request sense. We
  * currently do not utilize this information.
  */
@@ -392,12 +292,10 @@
 		 pc->c[0], tape->sense_key, tape->asc, tape->ascq);
 
 	/* Correct pc->xferred by asking the tape.	 */
-	if (pc->flags & PC_FLAG_DMA_ERROR) {
+	if (pc->flags & PC_FLAG_DMA_ERROR)
 		pc->xferred = pc->req_xfer -
 			tape->blk_size *
 			get_unaligned_be32(&sense[3]);
-		idetape_update_buffers(drive, pc);
-	}
 
 	/*
 	 * If error was the result of a zero-length read or write command,
@@ -436,29 +334,6 @@
 	}
 }
 
-/* Free data buffers completely. */
-static void ide_tape_kfree_buffer(idetape_tape_t *tape)
-{
-	struct idetape_bh *prev_bh, *bh = tape->merge_bh;
-
-	while (bh) {
-		u32 size = bh->b_size;
-
-		while (size) {
-			unsigned int order = fls(size >> PAGE_SHIFT)-1;
-
-			if (bh->b_data)
-				free_pages((unsigned long)bh->b_data, order);
-
-			size &= (order-1);
-			bh->b_data += (1 << order) * PAGE_SIZE;
-		}
-		prev_bh = bh;
-		bh = bh->b_reqnext;
-		kfree(prev_bh);
-	}
-}
-
 static void ide_tape_handle_dsc(ide_drive_t *);
 
 static int ide_tape_callback(ide_drive_t *drive, int dsc)
@@ -496,7 +371,7 @@
 		}
 
 		tape->first_frame += blocks;
-		rq->current_nr_sectors -= blocks;
+		rq->resid_len -= blocks * tape->blk_size;
 
 		if (pc->error) {
 			uptodate = 0;
@@ -513,7 +388,8 @@
 		if (readpos[0] & 0x4) {
 			printk(KERN_INFO "ide-tape: Block location is unknown"
 					 "to the tape\n");
-			clear_bit(IDE_AFLAG_ADDRESS_VALID, &drive->atapi_flags);
+			clear_bit(ilog2(IDE_AFLAG_ADDRESS_VALID),
+				  &drive->atapi_flags);
 			uptodate = 0;
 			err = IDE_DRV_ERROR_GENERAL;
 		} else {
@@ -522,7 +398,8 @@
 
 			tape->partition = readpos[1];
 			tape->first_frame = be32_to_cpup((__be32 *)&readpos[4]);
-			set_bit(IDE_AFLAG_ADDRESS_VALID, &drive->atapi_flags);
+			set_bit(ilog2(IDE_AFLAG_ADDRESS_VALID),
+				&drive->atapi_flags);
 		}
 	}
 
@@ -558,19 +435,6 @@
 	idetape_postpone_request(drive);
 }
 
-static int ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-				unsigned int bcount, int write)
-{
-	unsigned int bleft;
-
-	if (write)
-		bleft = idetape_output_buffers(drive, pc, bcount);
-	else
-		bleft = idetape_input_buffers(drive, pc, bcount);
-
-	return bcount - bleft;
-}
-
 /*
  * Packet Command Interface
  *
@@ -622,6 +486,8 @@
 
 	if (pc->retries > IDETAPE_MAX_PC_RETRIES ||
 		(pc->flags & PC_FLAG_ABORT)) {
+		unsigned int done = blk_rq_bytes(drive->hwif->rq);
+
 		/*
 		 * We will "abort" retrying a packet command in case legitimate
 		 * error code was received (crossing a filemark, or end of the
@@ -641,8 +507,10 @@
 			/* Giving up */
 			pc->error = IDE_DRV_ERROR_GENERAL;
 		}
+
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
+		ide_complete_rq(drive, -EIO, done);
 		return ide_stopped;
 	}
 	debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]);
@@ -695,7 +563,7 @@
 				printk(KERN_ERR "ide-tape: %s: I/O error, ",
 						tape->name);
 			/* Retry operation */
-			ide_retry_pc(drive, tape->disk);
+			ide_retry_pc(drive);
 			return ide_stopped;
 		}
 		pc->error = 0;
@@ -711,27 +579,22 @@
 				   struct ide_atapi_pc *pc, struct request *rq,
 				   u8 opcode)
 {
-	struct idetape_bh *bh = (struct idetape_bh *)rq->special;
-	unsigned int length = rq->current_nr_sectors;
+	unsigned int length = blk_rq_sectors(rq);
 
 	ide_init_pc(pc);
 	put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
 	pc->c[1] = 1;
-	pc->bh = bh;
 	pc->buf = NULL;
 	pc->buf_size = length * tape->blk_size;
 	pc->req_xfer = pc->buf_size;
 	if (pc->req_xfer == tape->buffer_size)
 		pc->flags |= PC_FLAG_DMA_OK;
 
-	if (opcode == READ_6) {
+	if (opcode == READ_6)
 		pc->c[0] = READ_6;
-		atomic_set(&bh->b_count, 0);
-	} else if (opcode == WRITE_6) {
+	else if (opcode == WRITE_6) {
 		pc->c[0] = WRITE_6;
 		pc->flags |= PC_FLAG_WRITING;
-		pc->b_data = bh->b_data;
-		pc->b_count = atomic_read(&bh->b_count);
 	}
 
 	memcpy(rq->cmd, pc->c, 12);
@@ -747,12 +610,10 @@
 	struct ide_cmd cmd;
 	u8 stat;
 
-	debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu,"
-			" current_nr_sectors: %u\n",
-			(unsigned long long)rq->sector, rq->nr_sectors,
-			rq->current_nr_sectors);
+	debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %u\n"
+		  (unsigned long long)blk_rq_pos(rq), blk_rq_sectors(rq));
 
-	if (!blk_special_request(rq)) {
+	if (!(blk_special_request(rq) || blk_sense_request(rq))) {
 		/* We do not support buffer cache originated requests. */
 		printk(KERN_NOTICE "ide-tape: %s: Unsupported request in "
 			"request queue (%d)\n", drive->name, rq->cmd_type);
@@ -788,15 +649,15 @@
 
 	if ((drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) == 0 &&
 	    (rq->cmd[13] & REQ_IDETAPE_PC2) == 0)
-		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
+		drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
 
 	if (drive->dev_flags & IDE_DFLAG_POST_RESET) {
-		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
+		drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
 		drive->dev_flags &= ~IDE_DFLAG_POST_RESET;
 	}
 
-	if (!test_and_clear_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags) &&
-	    (stat & ATA_DSC) == 0) {
+	if (!(drive->atapi_flags & IDE_AFLAG_IGNORE_DSC) &&
+	    !(stat & ATA_DSC)) {
 		if (postponed_rq == NULL) {
 			tape->dsc_polling_start = jiffies;
 			tape->dsc_poll_freq = tape->best_dsc_rw_freq;
@@ -816,7 +677,9 @@
 			tape->dsc_poll_freq = IDETAPE_DSC_MA_SLOW;
 		idetape_postpone_request(drive);
 		return ide_stopped;
-	}
+	} else
+		drive->atapi_flags &= ~IDE_AFLAG_IGNORE_DSC;
+
 	if (rq->cmd[13] & REQ_IDETAPE_READ) {
 		pc = &tape->queued_pc;
 		ide_tape_create_rw_cmd(tape, pc, rq, READ_6);
@@ -828,7 +691,7 @@
 		goto out;
 	}
 	if (rq->cmd[13] & REQ_IDETAPE_PC1) {
-		pc = (struct ide_atapi_pc *) rq->buffer;
+		pc = (struct ide_atapi_pc *)rq->special;
 		rq->cmd[13] &= ~(REQ_IDETAPE_PC1);
 		rq->cmd[13] |= REQ_IDETAPE_PC2;
 		goto out;
@@ -840,6 +703,9 @@
 	BUG();
 
 out:
+	/* prepare sense request for this command */
+	ide_prep_sense(drive, rq);
+
 	memset(&cmd, 0, sizeof(cmd));
 
 	if (rq_data_dir(rq))
@@ -847,170 +713,13 @@
 
 	cmd.rq = rq;
 
+	ide_init_sg_cmd(&cmd, pc->req_xfer);
+	ide_map_sg(drive, &cmd);
+
 	return ide_tape_issue_pc(drive, &cmd, pc);
 }
 
 /*
- * The function below uses __get_free_pages to allocate a data buffer of size
- * tape->buffer_size (or a bit more). We attempt to combine sequential pages as
- * much as possible.
- *
- * It returns a pointer to the newly allocated buffer, or NULL in case of
- * failure.
- */
-static struct idetape_bh *ide_tape_kmalloc_buffer(idetape_tape_t *tape,
-						  int full, int clear)
-{
-	struct idetape_bh *prev_bh, *bh, *merge_bh;
-	int pages = tape->pages_per_buffer;
-	unsigned int order, b_allocd;
-	char *b_data = NULL;
-
-	merge_bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL);
-	bh = merge_bh;
-	if (bh == NULL)
-		goto abort;
-
-	order = fls(pages) - 1;
-	bh->b_data = (char *) __get_free_pages(GFP_KERNEL, order);
-	if (!bh->b_data)
-		goto abort;
-	b_allocd = (1 << order) * PAGE_SIZE;
-	pages &= (order-1);
-
-	if (clear)
-		memset(bh->b_data, 0, b_allocd);
-	bh->b_reqnext = NULL;
-	bh->b_size = b_allocd;
-	atomic_set(&bh->b_count, full ? bh->b_size : 0);
-
-	while (pages) {
-		order = fls(pages) - 1;
-		b_data = (char *) __get_free_pages(GFP_KERNEL, order);
-		if (!b_data)
-			goto abort;
-		b_allocd = (1 << order) * PAGE_SIZE;
-
-		if (clear)
-			memset(b_data, 0, b_allocd);
-
-		/* newly allocated page frames below buffer header or ...*/
-		if (bh->b_data == b_data + b_allocd) {
-			bh->b_size += b_allocd;
-			bh->b_data -= b_allocd;
-			if (full)
-				atomic_add(b_allocd, &bh->b_count);
-			continue;
-		}
-		/* they are above the header */
-		if (b_data == bh->b_data + bh->b_size) {
-			bh->b_size += b_allocd;
-			if (full)
-				atomic_add(b_allocd, &bh->b_count);
-			continue;
-		}
-		prev_bh = bh;
-		bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL);
-		if (!bh) {
-			free_pages((unsigned long) b_data, order);
-			goto abort;
-		}
-		bh->b_reqnext = NULL;
-		bh->b_data = b_data;
-		bh->b_size = b_allocd;
-		atomic_set(&bh->b_count, full ? bh->b_size : 0);
-		prev_bh->b_reqnext = bh;
-
-		pages &= (order-1);
-	}
-
-	bh->b_size -= tape->excess_bh_size;
-	if (full)
-		atomic_sub(tape->excess_bh_size, &bh->b_count);
-	return merge_bh;
-abort:
-	ide_tape_kfree_buffer(tape);
-	return NULL;
-}
-
-static int idetape_copy_stage_from_user(idetape_tape_t *tape,
-					const char __user *buf, int n)
-{
-	struct idetape_bh *bh = tape->bh;
-	int count;
-	int ret = 0;
-
-	while (n) {
-		if (bh == NULL) {
-			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
-					__func__);
-			return 1;
-		}
-		count = min((unsigned int)
-				(bh->b_size - atomic_read(&bh->b_count)),
-				(unsigned int)n);
-		if (copy_from_user(bh->b_data + atomic_read(&bh->b_count), buf,
-				count))
-			ret = 1;
-		n -= count;
-		atomic_add(count, &bh->b_count);
-		buf += count;
-		if (atomic_read(&bh->b_count) == bh->b_size) {
-			bh = bh->b_reqnext;
-			if (bh)
-				atomic_set(&bh->b_count, 0);
-		}
-	}
-	tape->bh = bh;
-	return ret;
-}
-
-static int idetape_copy_stage_to_user(idetape_tape_t *tape, char __user *buf,
-				      int n)
-{
-	struct idetape_bh *bh = tape->bh;
-	int count;
-	int ret = 0;
-
-	while (n) {
-		if (bh == NULL) {
-			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
-					__func__);
-			return 1;
-		}
-		count = min(tape->b_count, n);
-		if  (copy_to_user(buf, tape->b_data, count))
-			ret = 1;
-		n -= count;
-		tape->b_data += count;
-		tape->b_count -= count;
-		buf += count;
-		if (!tape->b_count) {
-			bh = bh->b_reqnext;
-			tape->bh = bh;
-			if (bh) {
-				tape->b_data = bh->b_data;
-				tape->b_count = atomic_read(&bh->b_count);
-			}
-		}
-	}
-	return ret;
-}
-
-static void idetape_init_merge_buffer(idetape_tape_t *tape)
-{
-	struct idetape_bh *bh = tape->merge_bh;
-	tape->bh = tape->merge_bh;
-
-	if (tape->chrdev_dir == IDETAPE_DIR_WRITE)
-		atomic_set(&bh->b_count, 0);
-	else {
-		tape->b_data = bh->b_data;
-		tape->b_count = atomic_read(&bh->b_count);
-	}
-}
-
-/*
  * Write a filemark if write_filemark=1. Flush the device buffers without
  * writing a filemark otherwise.
  */
@@ -1030,7 +739,7 @@
 	int load_attempted = 0;
 
 	/* Wait for the tape to become ready */
-	set_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags);
+	set_bit(ilog2(IDE_AFLAG_MEDIUM_PRESENT), &drive->atapi_flags);
 	timeout += jiffies;
 	while (time_before(jiffies, timeout)) {
 		if (ide_do_test_unit_ready(drive, disk) == 0)
@@ -1106,11 +815,11 @@
 	if (tape->chrdev_dir != IDETAPE_DIR_READ)
 		return;
 
-	clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags);
-	tape->merge_bh_size = 0;
-	if (tape->merge_bh != NULL) {
-		ide_tape_kfree_buffer(tape);
-		tape->merge_bh = NULL;
+	clear_bit(ilog2(IDE_AFLAG_FILEMARK), &drive->atapi_flags);
+	tape->valid = 0;
+	if (tape->buf != NULL) {
+		kfree(tape->buf);
+		tape->buf = NULL;
 	}
 
 	tape->chrdev_dir = IDETAPE_DIR_NONE;
@@ -1164,36 +873,44 @@
  * Generate a read/write request for the block device interface and wait for it
  * to be serviced.
  */
-static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks,
-				 struct idetape_bh *bh)
+static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 {
 	idetape_tape_t *tape = drive->driver_data;
 	struct request *rq;
-	int ret, errors;
+	int ret;
 
 	debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd);
+	BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
+	BUG_ON(size < 0 || size % tape->blk_size);
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd[13] = cmd;
 	rq->rq_disk = tape->disk;
-	rq->special = (void *)bh;
-	rq->sector = tape->first_frame;
-	rq->nr_sectors = blocks;
-	rq->current_nr_sectors = blocks;
+	rq->__sector = tape->first_frame;
+
+	if (size) {
+		ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size,
+				      __GFP_WAIT);
+		if (ret)
+			goto out_put;
+	}
+
 	blk_execute_rq(drive->queue, tape->disk, rq, 0);
 
-	errors = rq->errors;
-	ret = tape->blk_size * (blocks - rq->current_nr_sectors);
+	/* calculate the number of transferred bytes and update buffer state */
+	size -= rq->resid_len;
+	tape->cur = tape->buf;
+	if (cmd == REQ_IDETAPE_READ)
+		tape->valid = size;
+	else
+		tape->valid = 0;
+
+	ret = size;
+	if (rq->errors == IDE_DRV_ERROR_GENERAL)
+		ret = -EIO;
+out_put:
 	blk_put_request(rq);
-
-	if ((cmd & (REQ_IDETAPE_READ | REQ_IDETAPE_WRITE)) == 0)
-		return 0;
-
-	if (tape->merge_bh)
-		idetape_init_merge_buffer(tape);
-	if (errors == IDE_DRV_ERROR_GENERAL)
-		return -EIO;
 	return ret;
 }
 
@@ -1230,153 +947,87 @@
 	pc->flags |= PC_FLAG_WAIT_FOR_DSC;
 }
 
-/* Queue up a character device originated write request. */
-static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks)
-{
-	idetape_tape_t *tape = drive->driver_data;
-
-	debug_log(DBG_CHRDEV, "Enter %s\n", __func__);
-
-	return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
-				     blocks, tape->merge_bh);
-}
-
 static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	int blocks, min;
-	struct idetape_bh *bh;
 
 	if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
 		printk(KERN_ERR "ide-tape: bug: Trying to empty merge buffer"
 				" but we are not writing.\n");
 		return;
 	}
-	if (tape->merge_bh_size > tape->buffer_size) {
-		printk(KERN_ERR "ide-tape: bug: merge_buffer too big\n");
-		tape->merge_bh_size = tape->buffer_size;
-	}
-	if (tape->merge_bh_size) {
-		blocks = tape->merge_bh_size / tape->blk_size;
-		if (tape->merge_bh_size % tape->blk_size) {
-			unsigned int i;
+	if (tape->buf) {
+		size_t aligned = roundup(tape->valid, tape->blk_size);
 
-			blocks++;
-			i = tape->blk_size - tape->merge_bh_size %
-				tape->blk_size;
-			bh = tape->bh->b_reqnext;
-			while (bh) {
-				atomic_set(&bh->b_count, 0);
-				bh = bh->b_reqnext;
-			}
-			bh = tape->bh;
-			while (i) {
-				if (bh == NULL) {
-					printk(KERN_INFO "ide-tape: bug,"
-							 " bh NULL\n");
-					break;
-				}
-				min = min(i, (unsigned int)(bh->b_size -
-						atomic_read(&bh->b_count)));
-				memset(bh->b_data + atomic_read(&bh->b_count),
-						0, min);
-				atomic_add(min, &bh->b_count);
-				i -= min;
-				bh = bh->b_reqnext;
-			}
-		}
-		(void) idetape_add_chrdev_write_request(drive, blocks);
-		tape->merge_bh_size = 0;
-	}
-	if (tape->merge_bh != NULL) {
-		ide_tape_kfree_buffer(tape);
-		tape->merge_bh = NULL;
+		memset(tape->cur, 0, aligned - tape->valid);
+		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, aligned);
+		kfree(tape->buf);
+		tape->buf = NULL;
 	}
 	tape->chrdev_dir = IDETAPE_DIR_NONE;
 }
 
-static int idetape_init_read(ide_drive_t *drive)
+static int idetape_init_rw(ide_drive_t *drive, int dir)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	int bytes_read;
+	int rc;
 
-	/* Initialize read operation */
-	if (tape->chrdev_dir != IDETAPE_DIR_READ) {
-		if (tape->chrdev_dir == IDETAPE_DIR_WRITE) {
-			ide_tape_flush_merge_buffer(drive);
-			idetape_flush_tape_buffers(drive);
-		}
-		if (tape->merge_bh || tape->merge_bh_size) {
-			printk(KERN_ERR "ide-tape: merge_bh_size should be"
-					 " 0 now\n");
-			tape->merge_bh_size = 0;
-		}
-		tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0);
-		if (!tape->merge_bh)
-			return -ENOMEM;
-		tape->chrdev_dir = IDETAPE_DIR_READ;
+	BUG_ON(dir != IDETAPE_DIR_READ && dir != IDETAPE_DIR_WRITE);
 
-		/*
-		 * Issue a read 0 command to ensure that DSC handshake is
-		 * switched from completion mode to buffer available mode.
-		 * No point in issuing this if DSC overlap isn't supported, some
-		 * drives (Seagate STT3401A) will return an error.
-		 */
-		if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
-			bytes_read = idetape_queue_rw_tail(drive,
-							REQ_IDETAPE_READ, 0,
-							tape->merge_bh);
-			if (bytes_read < 0) {
-				ide_tape_kfree_buffer(tape);
-				tape->merge_bh = NULL;
-				tape->chrdev_dir = IDETAPE_DIR_NONE;
-				return bytes_read;
-			}
+	if (tape->chrdev_dir == dir)
+		return 0;
+
+	if (tape->chrdev_dir == IDETAPE_DIR_READ)
+		ide_tape_discard_merge_buffer(drive, 1);
+	else if (tape->chrdev_dir == IDETAPE_DIR_WRITE) {
+		ide_tape_flush_merge_buffer(drive);
+		idetape_flush_tape_buffers(drive);
+	}
+
+	if (tape->buf || tape->valid) {
+		printk(KERN_ERR "ide-tape: valid should be 0 now\n");
+		tape->valid = 0;
+	}
+
+	tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
+	if (!tape->buf)
+		return -ENOMEM;
+	tape->chrdev_dir = dir;
+	tape->cur = tape->buf;
+
+	/*
+	 * Issue a 0 rw command to ensure that DSC handshake is
+	 * switched from completion mode to buffer available mode.  No
+	 * point in issuing this if DSC overlap isn't supported, some
+	 * drives (Seagate STT3401A) will return an error.
+	 */
+	if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
+		int cmd = dir == IDETAPE_DIR_READ ? REQ_IDETAPE_READ
+						  : REQ_IDETAPE_WRITE;
+
+		rc = idetape_queue_rw_tail(drive, cmd, 0);
+		if (rc < 0) {
+			kfree(tape->buf);
+			tape->buf = NULL;
+			tape->chrdev_dir = IDETAPE_DIR_NONE;
+			return rc;
 		}
 	}
 
 	return 0;
 }
 
-/* called from idetape_chrdev_read() to service a chrdev read request. */
-static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks)
-{
-	idetape_tape_t *tape = drive->driver_data;
-
-	debug_log(DBG_PROCS, "Enter %s, %d blocks\n", __func__, blocks);
-
-	/* If we are at a filemark, return a read length of 0 */
-	if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
-		return 0;
-
-	idetape_init_read(drive);
-
-	return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks,
-				     tape->merge_bh);
-}
-
 static void idetape_pad_zeros(ide_drive_t *drive, int bcount)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	struct idetape_bh *bh;
-	int blocks;
+
+	memset(tape->buf, 0, tape->buffer_size);
 
 	while (bcount) {
-		unsigned int count;
+		unsigned int count = min(tape->buffer_size, bcount);
 
-		bh = tape->merge_bh;
-		count = min(tape->buffer_size, bcount);
+		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, count);
 		bcount -= count;
-		blocks = count / tape->blk_size;
-		while (count) {
-			atomic_set(&bh->b_count,
-				   min(count, (unsigned int)bh->b_size));
-			memset(bh->b_data, 0, atomic_read(&bh->b_count));
-			count -= atomic_read(&bh->b_count);
-			bh = bh->b_reqnext;
-		}
-		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks,
-				      tape->merge_bh);
 	}
 }
 
@@ -1456,8 +1107,9 @@
 	}
 
 	if (tape->chrdev_dir == IDETAPE_DIR_READ) {
-		tape->merge_bh_size = 0;
-		if (test_and_clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
+		tape->valid = 0;
+		if (test_and_clear_bit(ilog2(IDE_AFLAG_FILEMARK),
+				       &drive->atapi_flags))
 			++count;
 		ide_tape_discard_merge_buffer(drive, 0);
 	}
@@ -1505,61 +1157,56 @@
 {
 	struct ide_tape_obj *tape = file->private_data;
 	ide_drive_t *drive = tape->drive;
-	ssize_t bytes_read, temp, actually_read = 0, rc;
+	size_t done = 0;
 	ssize_t ret = 0;
-	u16 ctl = *(u16 *)&tape->caps[12];
+	int rc;
 
 	debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
 
 	if (tape->chrdev_dir != IDETAPE_DIR_READ) {
-		if (test_bit(IDE_AFLAG_DETECT_BS, &drive->atapi_flags))
+		if (test_bit(ilog2(IDE_AFLAG_DETECT_BS), &drive->atapi_flags))
 			if (count > tape->blk_size &&
 			    (count % tape->blk_size) == 0)
 				tape->user_bs_factor = count / tape->blk_size;
 	}
-	rc = idetape_init_read(drive);
+
+	rc = idetape_init_rw(drive, IDETAPE_DIR_READ);
 	if (rc < 0)
 		return rc;
-	if (count == 0)
-		return (0);
-	if (tape->merge_bh_size) {
-		actually_read = min((unsigned int)(tape->merge_bh_size),
-				    (unsigned int)count);
-		if (idetape_copy_stage_to_user(tape, buf, actually_read))
+
+	while (done < count) {
+		size_t todo;
+
+		/* refill if staging buffer is empty */
+		if (!tape->valid) {
+			/* If we are at a filemark, nothing more to read */
+			if (test_bit(ilog2(IDE_AFLAG_FILEMARK),
+				     &drive->atapi_flags))
+				break;
+			/* read */
+			if (idetape_queue_rw_tail(drive, REQ_IDETAPE_READ,
+						  tape->buffer_size) <= 0)
+				break;
+		}
+
+		/* copy out */
+		todo = min_t(size_t, count - done, tape->valid);
+		if (copy_to_user(buf + done, tape->cur, todo))
 			ret = -EFAULT;
-		buf += actually_read;
-		tape->merge_bh_size -= actually_read;
-		count -= actually_read;
+
+		tape->cur += todo;
+		tape->valid -= todo;
+		done += todo;
 	}
-	while (count >= tape->buffer_size) {
-		bytes_read = idetape_add_chrdev_read_request(drive, ctl);
-		if (bytes_read <= 0)
-			goto finish;
-		if (idetape_copy_stage_to_user(tape, buf, bytes_read))
-			ret = -EFAULT;
-		buf += bytes_read;
-		count -= bytes_read;
-		actually_read += bytes_read;
-	}
-	if (count) {
-		bytes_read = idetape_add_chrdev_read_request(drive, ctl);
-		if (bytes_read <= 0)
-			goto finish;
-		temp = min((unsigned long)count, (unsigned long)bytes_read);
-		if (idetape_copy_stage_to_user(tape, buf, temp))
-			ret = -EFAULT;
-		actually_read += temp;
-		tape->merge_bh_size = bytes_read-temp;
-	}
-finish:
-	if (!actually_read && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) {
+
+	if (!done && test_bit(ilog2(IDE_AFLAG_FILEMARK), &drive->atapi_flags)) {
 		debug_log(DBG_SENSE, "%s: spacing over filemark\n", tape->name);
 
 		idetape_space_over_filemarks(drive, MTFSF, 1);
 		return 0;
 	}
 
-	return ret ? ret : actually_read;
+	return ret ? ret : done;
 }
 
 static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
@@ -1567,9 +1214,9 @@
 {
 	struct ide_tape_obj *tape = file->private_data;
 	ide_drive_t *drive = tape->drive;
-	ssize_t actually_written = 0;
+	size_t done = 0;
 	ssize_t ret = 0;
-	u16 ctl = *(u16 *)&tape->caps[12];
+	int rc;
 
 	/* The drive is write protected. */
 	if (tape->write_prot)
@@ -1578,80 +1225,31 @@
 	debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
 
 	/* Initialize write operation */
-	if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
-		if (tape->chrdev_dir == IDETAPE_DIR_READ)
-			ide_tape_discard_merge_buffer(drive, 1);
-		if (tape->merge_bh || tape->merge_bh_size) {
-			printk(KERN_ERR "ide-tape: merge_bh_size "
-				"should be 0 now\n");
-			tape->merge_bh_size = 0;
-		}
-		tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0);
-		if (!tape->merge_bh)
-			return -ENOMEM;
-		tape->chrdev_dir = IDETAPE_DIR_WRITE;
-		idetape_init_merge_buffer(tape);
+	rc = idetape_init_rw(drive, IDETAPE_DIR_WRITE);
+	if (rc < 0)
+		return rc;
 
-		/*
-		 * Issue a write 0 command to ensure that DSC handshake is
-		 * switched from completion mode to buffer available mode. No
-		 * point in issuing this if DSC overlap isn't supported, some
-		 * drives (Seagate STT3401A) will return an error.
-		 */
-		if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
-			ssize_t retval = idetape_queue_rw_tail(drive,
-							REQ_IDETAPE_WRITE, 0,
-							tape->merge_bh);
-			if (retval < 0) {
-				ide_tape_kfree_buffer(tape);
-				tape->merge_bh = NULL;
-				tape->chrdev_dir = IDETAPE_DIR_NONE;
-				return retval;
-			}
-		}
-	}
-	if (count == 0)
-		return (0);
-	if (tape->merge_bh_size) {
-		if (tape->merge_bh_size >= tape->buffer_size) {
-			printk(KERN_ERR "ide-tape: bug: merge buf too big\n");
-			tape->merge_bh_size = 0;
-		}
-		actually_written = min((unsigned int)
-				(tape->buffer_size - tape->merge_bh_size),
-				(unsigned int)count);
-		if (idetape_copy_stage_from_user(tape, buf, actually_written))
-				ret = -EFAULT;
-		buf += actually_written;
-		tape->merge_bh_size += actually_written;
-		count -= actually_written;
+	while (done < count) {
+		size_t todo;
 
-		if (tape->merge_bh_size == tape->buffer_size) {
-			ssize_t retval;
-			tape->merge_bh_size = 0;
-			retval = idetape_add_chrdev_write_request(drive, ctl);
-			if (retval <= 0)
-				return (retval);
-		}
-	}
-	while (count >= tape->buffer_size) {
-		ssize_t retval;
-		if (idetape_copy_stage_from_user(tape, buf, tape->buffer_size))
+		/* flush if staging buffer is full */
+		if (tape->valid == tape->buffer_size &&
+		    idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
+					  tape->buffer_size) <= 0)
+			return rc;
+
+		/* copy in */
+		todo = min_t(size_t, count - done,
+			     tape->buffer_size - tape->valid);
+		if (copy_from_user(tape->cur, buf + done, todo))
 			ret = -EFAULT;
-		buf += tape->buffer_size;
-		count -= tape->buffer_size;
-		retval = idetape_add_chrdev_write_request(drive, ctl);
-		actually_written += tape->buffer_size;
-		if (retval <= 0)
-			return (retval);
+
+		tape->cur += todo;
+		tape->valid += todo;
+		done += todo;
 	}
-	if (count) {
-		actually_written += count;
-		if (idetape_copy_stage_from_user(tape, buf, count))
-			ret = -EFAULT;
-		tape->merge_bh_size += count;
-	}
-	return ret ? ret : actually_written;
+
+	return ret ? ret : done;
 }
 
 static int idetape_write_filemark(ide_drive_t *drive)
@@ -1735,7 +1333,8 @@
 		ide_tape_discard_merge_buffer(drive, 0);
 		retval = ide_do_start_stop(drive, disk, !IDETAPE_LU_LOAD_MASK);
 		if (!retval)
-			clear_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags);
+			clear_bit(ilog2(IDE_AFLAG_MEDIUM_PRESENT),
+				  &drive->atapi_flags);
 		return retval;
 	case MTNOP:
 		ide_tape_discard_merge_buffer(drive, 0);
@@ -1757,9 +1356,11 @@
 			    mt_count % tape->blk_size)
 				return -EIO;
 			tape->user_bs_factor = mt_count / tape->blk_size;
-			clear_bit(IDE_AFLAG_DETECT_BS, &drive->atapi_flags);
+			clear_bit(ilog2(IDE_AFLAG_DETECT_BS),
+				  &drive->atapi_flags);
 		} else
-			set_bit(IDE_AFLAG_DETECT_BS, &drive->atapi_flags);
+			set_bit(ilog2(IDE_AFLAG_DETECT_BS),
+				&drive->atapi_flags);
 		return 0;
 	case MTSEEK:
 		ide_tape_discard_merge_buffer(drive, 0);
@@ -1812,7 +1413,7 @@
 		idetape_flush_tape_buffers(drive);
 	}
 	if (cmd == MTIOCGET || cmd == MTIOCPOS) {
-		block_offset = tape->merge_bh_size /
+		block_offset = tape->valid /
 			(tape->blk_size * tape->user_bs_factor);
 		position = idetape_read_position(drive);
 		if (position < 0)
@@ -1885,7 +1486,7 @@
 		return -ENXIO;
 
 	lock_kernel();
-	tape = ide_tape_chrdev_get(i);
+	tape = ide_tape_get(NULL, true, i);
 	if (!tape) {
 		unlock_kernel();
 		return -ENXIO;
@@ -1904,20 +1505,20 @@
 
 	filp->private_data = tape;
 
-	if (test_and_set_bit(IDE_AFLAG_BUSY, &drive->atapi_flags)) {
+	if (test_and_set_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags)) {
 		retval = -EBUSY;
 		goto out_put_tape;
 	}
 
 	retval = idetape_wait_ready(drive, 60 * HZ);
 	if (retval) {
-		clear_bit(IDE_AFLAG_BUSY, &drive->atapi_flags);
+		clear_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags);
 		printk(KERN_ERR "ide-tape: %s: drive not ready\n", tape->name);
 		goto out_put_tape;
 	}
 
 	idetape_read_position(drive);
-	if (!test_bit(IDE_AFLAG_ADDRESS_VALID, &drive->atapi_flags))
+	if (!test_bit(ilog2(IDE_AFLAG_ADDRESS_VALID), &drive->atapi_flags))
 		(void)idetape_rewind_tape(drive);
 
 	/* Read block size and write protect status from drive. */
@@ -1933,7 +1534,7 @@
 	if (tape->write_prot) {
 		if ((filp->f_flags & O_ACCMODE) == O_WRONLY ||
 		    (filp->f_flags & O_ACCMODE) == O_RDWR) {
-			clear_bit(IDE_AFLAG_BUSY, &drive->atapi_flags);
+			clear_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags);
 			retval = -EROFS;
 			goto out_put_tape;
 		}
@@ -1960,12 +1561,12 @@
 	idetape_tape_t *tape = drive->driver_data;
 
 	ide_tape_flush_merge_buffer(drive);
-	tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1, 0);
-	if (tape->merge_bh != NULL) {
+	tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
+	if (tape->buf != NULL) {
 		idetape_pad_zeros(drive, tape->blk_size *
 				(tape->user_bs_factor - 1));
-		ide_tape_kfree_buffer(tape);
-		tape->merge_bh = NULL;
+		kfree(tape->buf);
+		tape->buf = NULL;
 	}
 	idetape_write_filemark(drive);
 	idetape_flush_tape_buffers(drive);
@@ -1990,15 +1591,17 @@
 			ide_tape_discard_merge_buffer(drive, 1);
 	}
 
-	if (minor < 128 && test_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags))
+	if (minor < 128 && test_bit(ilog2(IDE_AFLAG_MEDIUM_PRESENT),
+				    &drive->atapi_flags))
 		(void) idetape_rewind_tape(drive);
+
 	if (tape->chrdev_dir == IDETAPE_DIR_NONE) {
 		if (tape->door_locked == DOOR_LOCKED) {
 			if (!ide_set_media_lock(drive, tape->disk, 0))
 				tape->door_locked = DOOR_UNLOCKED;
 		}
 	}
-	clear_bit(IDE_AFLAG_BUSY, &drive->atapi_flags);
+	clear_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags);
 	ide_tape_put(tape);
 	unlock_kernel();
 	return 0;
@@ -2159,8 +1762,6 @@
 	u16 *ctl = (u16 *)&tape->caps[12];
 
 	drive->pc_callback	 = ide_tape_callback;
-	drive->pc_update_buffers = idetape_update_buffers;
-	drive->pc_io_buffers	 = ide_tape_io_buffers;
 
 	drive->dev_flags |= IDE_DFLAG_DSC_OVERLAP;
 
@@ -2191,11 +1792,6 @@
 		tape->buffer_size = *ctl * tape->blk_size;
 	}
 	buffer_size = tape->buffer_size;
-	tape->pages_per_buffer = buffer_size / PAGE_SIZE;
-	if (buffer_size % PAGE_SIZE) {
-		tape->pages_per_buffer++;
-		tape->excess_bh_size = PAGE_SIZE - buffer_size % PAGE_SIZE;
-	}
 
 	/* select the "best" DSC read/write polling freq */
 	speed = max(*(u16 *)&tape->caps[14], *(u16 *)&tape->caps[8]);
@@ -2238,7 +1834,7 @@
 	ide_drive_t *drive = tape->drive;
 	struct gendisk *g = tape->disk;
 
-	BUG_ON(tape->merge_bh_size);
+	BUG_ON(tape->valid);
 
 	drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP;
 	drive->driver_data = NULL;
@@ -2311,7 +1907,7 @@
 
 static int idetape_open(struct block_device *bdev, fmode_t mode)
 {
-	struct ide_tape_obj *tape = ide_tape_get(bdev->bd_disk);
+	struct ide_tape_obj *tape = ide_tape_get(bdev->bd_disk, false, 0);
 
 	if (!tape)
 		return -ENXIO;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 4aa6223..75b85a8 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -98,7 +98,6 @@
 	if ((cmd->tf_flags & IDE_TFLAG_DMA_PIO_FALLBACK) == 0) {
 		ide_tf_dump(drive->name, cmd);
 		tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
-		SELECT_MASK(drive, 0);
 
 		if (cmd->ftf_flags & IDE_FTFLAG_OUT_DATA) {
 			u8 data[2] = { cmd->tf.data, cmd->hob.data };
@@ -166,7 +165,7 @@
 	if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) {
 		if (custom && tf->command == ATA_CMD_SET_MULTI) {
 			drive->mult_req = drive->mult_count = 0;
-			drive->special.b.recalibrate = 1;
+			drive->special_flags |= IDE_SFLAG_RECALIBRATE;
 			(void)ide_dump_status(drive, __func__, stat);
 			return ide_stopped;
 		} else if (custom && tf->command == ATA_CMD_INIT_DEV_PARAMS) {
@@ -385,7 +384,7 @@
 	if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
 		ide_finish_cmd(drive, cmd, stat);
 	else
-		ide_complete_rq(drive, 0, cmd->rq->nr_sectors << 9);
+		ide_complete_rq(drive, 0, blk_rq_sectors(cmd->rq) << 9);
 	return ide_stopped;
 out_err:
 	ide_error_cmd(drive, cmd);
@@ -424,7 +423,9 @@
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
-	rq->buffer = buf;
+
+	if (cmd->tf_flags & IDE_TFLAG_WRITE)
+		rq->cmd_flags |= REQ_RW;
 
 	/*
 	 * (ks) We transfer currently only whole sectors.
@@ -432,18 +433,20 @@
 	 * if we would find a solution to transfer any size.
 	 * To support special commands like READ LONG.
 	 */
-	rq->hard_nr_sectors = rq->nr_sectors = nsect;
-	rq->hard_cur_sectors = rq->current_nr_sectors = nsect;
-
-	if (cmd->tf_flags & IDE_TFLAG_WRITE)
-		rq->cmd_flags |= REQ_RW;
+	if (nsect) {
+		error = blk_rq_map_kern(drive->queue, rq, buf,
+					nsect * SECTOR_SIZE, __GFP_WAIT);
+		if (error)
+			goto put_req;
+	}
 
 	rq->special = cmd;
 	cmd->rq = rq;
 
 	error = blk_execute_rq(drive->queue, NULL, rq, 0);
-	blk_put_request(rq);
 
+put_req:
+	blk_put_request(rq);
 	return error;
 }
 
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 92c9b90..16d0569 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -211,6 +211,11 @@
 module_param_call(noflush, ide_set_dev_param_mask, NULL, &ide_noflush, 0);
 MODULE_PARM_DESC(noflush, "disable flush requests for a device");
 
+static unsigned int ide_nohpa;
+
+module_param_call(nohpa, ide_set_dev_param_mask, NULL, &ide_nohpa, 0);
+MODULE_PARM_DESC(nohpa, "disable Host Protected Area for a device");
+
 static unsigned int ide_noprobe;
 
 module_param_call(noprobe, ide_set_dev_param_mask, NULL, &ide_noprobe, 0);
@@ -281,6 +286,11 @@
 				 drive->name);
 		drive->dev_flags |= IDE_DFLAG_NOFLUSH;
 	}
+	if (ide_nohpa & (1 << i)) {
+		printk(KERN_INFO "ide: disabling Host Protected Area for %s\n",
+				 drive->name);
+		drive->dev_flags |= IDE_DFLAG_NOHPA;
+	}
 	if (ide_noprobe & (1 << i)) {
 		printk(KERN_INFO "ide: skipping probe for %s\n", drive->name);
 		drive->dev_flags |= IDE_DFLAG_NOPROBE;
diff --git a/drivers/ide/ide_platform.c b/drivers/ide/ide_platform.c
index 051b4ab..ee9b55e 100644
--- a/drivers/ide/ide_platform.c
+++ b/drivers/ide/ide_platform.c
@@ -21,7 +21,7 @@
 #include <linux/platform_device.h>
 #include <linux/io.h>
 
-static void __devinit plat_ide_setup_ports(hw_regs_t *hw,
+static void __devinit plat_ide_setup_ports(struct ide_hw *hw,
 					   void __iomem *base,
 					   void __iomem *ctrl,
 					   struct pata_platform_info *pdata,
@@ -40,12 +40,11 @@
 	hw->io_ports.ctl_addr = (unsigned long)ctrl;
 
 	hw->irq = irq;
-
-	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info platform_ide_port_info = {
 	.host_flags		= IDE_HFLAG_NO_DMA,
+	.chipset		= ide_generic,
 };
 
 static int __devinit plat_ide_probe(struct platform_device *pdev)
@@ -55,7 +54,7 @@
 	struct pata_platform_info *pdata;
 	struct ide_host *host;
 	int ret = 0, mmio = 0;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw };
 	struct ide_port_info d = platform_ide_port_info;
 
 	pdata = pdev->dev.platform_data;
@@ -99,7 +98,7 @@
 	if (mmio)
 		d.host_flags |= IDE_HFLAG_MMIO;
 
-	ret = ide_host_add(&d, hws, &host);
+	ret = ide_host_add(&d, hws, 1, &host);
 	if (ret)
 		goto out;
 
diff --git a/drivers/ide/macide.c b/drivers/ide/macide.c
index 4b1718e..1447c8c 100644
--- a/drivers/ide/macide.c
+++ b/drivers/ide/macide.c
@@ -62,7 +62,7 @@
 	return 0;
 }
 
-static void __init macide_setup_ports(hw_regs_t *hw, unsigned long base,
+static void __init macide_setup_ports(struct ide_hw *hw, unsigned long base,
 				      int irq, ide_ack_intr_t *ack_intr)
 {
 	int i;
@@ -76,13 +76,12 @@
 
 	hw->irq = irq;
 	hw->ack_intr = ack_intr;
-
-	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info macide_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_generic,
 };
 
 static const char *mac_ide_name[] =
@@ -97,7 +96,7 @@
 	ide_ack_intr_t *ack_intr;
 	unsigned long base;
 	int irq;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	if (!MACH_IS_MAC)
 		return -ENODEV;
@@ -127,7 +126,7 @@
 
 	macide_setup_ports(&hw, base, irq, ack_intr);
 
-	return ide_host_add(&macide_port_info, hws, NULL);
+	return ide_host_add(&macide_port_info, hws, 1, NULL);
 }
 
 module_init(macide_init);
diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
index 09d813d..3c1dc01 100644
--- a/drivers/ide/palm_bk3710.c
+++ b/drivers/ide/palm_bk3710.c
@@ -306,6 +306,7 @@
 	.host_flags		= IDE_HFLAG_MMIO,
 	.pio_mask		= ATA_PIO4,
 	.mwdma_mask		= ATA_MWDMA2,
+	.chipset		= ide_palm3710,
 };
 
 static int __init palm_bk3710_probe(struct platform_device *pdev)
@@ -315,7 +316,7 @@
 	void __iomem *base;
 	unsigned long rate, mem_size;
 	int i, rc;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	clk = clk_get(&pdev->dev, "IDECLK");
 	if (IS_ERR(clk))
@@ -363,13 +364,12 @@
 			(base + IDE_PALM_ATA_PRI_CTL_OFFSET);
 	hw.irq = irq->start;
 	hw.dev = &pdev->dev;
-	hw.chipset = ide_palm3710;
 
 	palm_bk3710_port_info.udma_mask = rate < 100000000 ? ATA_UDMA4 :
 							     ATA_UDMA5;
 
 	/* Register the IDE interface with Linux */
-	rc = ide_host_add(&palm_bk3710_port_info, hws, NULL);
+	rc = ide_host_add(&palm_bk3710_port_info, hws, 1, NULL);
 	if (rc)
 		goto out;
 
diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c
index b68906c..65ba823 100644
--- a/drivers/ide/pdc202xx_new.c
+++ b/drivers/ide/pdc202xx_new.c
@@ -40,18 +40,6 @@
 #define DBG(fmt, args...)
 #endif
 
-static const char *pdc_quirk_drives[] = {
-	"QUANTUM FIREBALLlct08 08",
-	"QUANTUM FIREBALLP KA6.4",
-	"QUANTUM FIREBALLP KA9.1",
-	"QUANTUM FIREBALLP LM20.4",
-	"QUANTUM FIREBALLP KX13.6",
-	"QUANTUM FIREBALLP KX20.5",
-	"QUANTUM FIREBALLP KX27.3",
-	"QUANTUM FIREBALLP LM20.5",
-	NULL
-};
-
 static u8 max_dma_rate(struct pci_dev *pdev)
 {
 	u8 mode;
@@ -200,19 +188,6 @@
 		return ATA_CBL_PATA80;
 }
 
-static void pdcnew_quirkproc(ide_drive_t *drive)
-{
-	const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
-
-	for (list = pdc_quirk_drives; *list != NULL; list++)
-		if (strstr(m, *list) != NULL) {
-			drive->quirk_list = 2;
-			return;
-		}
-
-	drive->quirk_list = 0;
-}
-
 static void pdcnew_reset(ide_drive_t *drive)
 {
 	/*
@@ -473,7 +448,6 @@
 static const struct ide_port_ops pdcnew_port_ops = {
 	.set_pio_mode		= pdcnew_set_pio_mode,
 	.set_dma_mode		= pdcnew_set_dma_mode,
-	.quirkproc		= pdcnew_quirkproc,
 	.resetproc		= pdcnew_reset,
 	.cable_detect		= pdcnew_cable_detect,
 };
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 248a54b..b6abf7e 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -1,6 +1,6 @@
 /*
  *  Copyright (C) 1998-2002		Andre Hedrick <andre@linux-ide.org>
- *  Copyright (C) 2006-2007		MontaVista Software, Inc.
+ *  Copyright (C) 2006-2007, 2009	MontaVista Software, Inc.
  *  Copyright (C) 2007			Bartlomiej Zolnierkiewicz
  *
  *  Portions Copyright (C) 1999 Promise Technology, Inc.
@@ -23,18 +23,6 @@
 
 #define PDC202XX_DEBUG_DRIVE_INFO	0
 
-static const char *pdc_quirk_drives[] = {
-	"QUANTUM FIREBALLlct08 08",
-	"QUANTUM FIREBALLP KA6.4",
-	"QUANTUM FIREBALLP KA9.1",
-	"QUANTUM FIREBALLP LM20.4",
-	"QUANTUM FIREBALLP KX13.6",
-	"QUANTUM FIREBALLP KX20.5",
-	"QUANTUM FIREBALLP KX27.3",
-	"QUANTUM FIREBALLP LM20.5",
-	NULL
-};
-
 static void pdc_old_disable_66MHz_clock(ide_hwif_t *);
 
 static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
@@ -151,19 +139,6 @@
 	outb(clock & ~(hwif->channel ? 0x08 : 0x02), clock_reg);
 }
 
-static void pdc202xx_quirkproc(ide_drive_t *drive)
-{
-	const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
-
-	for (list = pdc_quirk_drives; *list != NULL; list++)
-		if (strstr(m, *list) != NULL) {
-			drive->quirk_list = 2;
-			return;
-		}
-
-	drive->quirk_list = 0;
-}
-
 static void pdc202xx_dma_start(ide_drive_t *drive)
 {
 	if (drive->current_speed > XFER_UDMA_2)
@@ -177,7 +152,7 @@
 		u8 clock = inb(high_16 + 0x11);
 
 		outb(clock | (hwif->channel ? 0x08 : 0x02), high_16 + 0x11);
-		word_count = (rq->nr_sectors << 8);
+		word_count = (blk_rq_sectors(rq) << 8);
 		word_count = (rq_data_dir(rq) == READ) ?
 					word_count | 0x05000000 :
 					word_count | 0x06000000;
@@ -203,61 +178,6 @@
 	return ide_dma_end(drive);
 }
 
-static int pdc202xx_dma_test_irq(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	unsigned long high_16	= hwif->extra_base - 16;
-	u8 dma_stat		= inb(hwif->dma_base + ATA_DMA_STATUS);
-	u8 sc1d			= inb(high_16 + 0x001d);
-
-	if (hwif->channel) {
-		/* bit7: Error, bit6: Interrupting, bit5: FIFO Full, bit4: FIFO Empty */
-		if ((sc1d & 0x50) == 0x50)
-			goto somebody_else;
-		else if ((sc1d & 0x40) == 0x40)
-			return (dma_stat & 4) == 4;
-	} else {
-		/* bit3: Error, bit2: Interrupting, bit1: FIFO Full, bit0: FIFO Empty */
-		if ((sc1d & 0x05) == 0x05)
-			goto somebody_else;
-		else if ((sc1d & 0x04) == 0x04)
-			return (dma_stat & 4) == 4;
-	}
-somebody_else:
-	return (dma_stat & 4) == 4;	/* return 1 if INTR asserted */
-}
-
-static void pdc202xx_reset_host (ide_hwif_t *hwif)
-{
-	unsigned long high_16	= hwif->extra_base - 16;
-	u8 udma_speed_flag	= inb(high_16 | 0x001f);
-
-	outb(udma_speed_flag | 0x10, high_16 | 0x001f);
-	mdelay(100);
-	outb(udma_speed_flag & ~0x10, high_16 | 0x001f);
-	mdelay(2000);	/* 2 seconds ?! */
-
-	printk(KERN_WARNING "PDC202XX: %s channel reset.\n",
-		hwif->channel ? "Secondary" : "Primary");
-}
-
-static void pdc202xx_reset (ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	ide_hwif_t *mate	= hwif->mate;
-
-	pdc202xx_reset_host(hwif);
-	pdc202xx_reset_host(mate);
-
-	ide_set_max_pio(drive);
-}
-
-static void pdc202xx_dma_lost_irq(ide_drive_t *drive)
-{
-	pdc202xx_reset(drive);
-	ide_dma_lost_irq(drive);
-}
-
 static int init_chipset_pdc202xx(struct pci_dev *dev)
 {
 	unsigned long dmabase = pci_resource_start(dev, 4);
@@ -311,38 +231,22 @@
 static const struct ide_port_ops pdc20246_port_ops = {
 	.set_pio_mode		= pdc202xx_set_pio_mode,
 	.set_dma_mode		= pdc202xx_set_mode,
-	.quirkproc		= pdc202xx_quirkproc,
 };
 
 static const struct ide_port_ops pdc2026x_port_ops = {
 	.set_pio_mode		= pdc202xx_set_pio_mode,
 	.set_dma_mode		= pdc202xx_set_mode,
-	.quirkproc		= pdc202xx_quirkproc,
-	.resetproc		= pdc202xx_reset,
 	.cable_detect		= pdc2026x_cable_detect,
 };
 
-static const struct ide_dma_ops pdc20246_dma_ops = {
-	.dma_host_set		= ide_dma_host_set,
-	.dma_setup		= ide_dma_setup,
-	.dma_start		= ide_dma_start,
-	.dma_end		= ide_dma_end,
-	.dma_test_irq		= pdc202xx_dma_test_irq,
-	.dma_lost_irq		= pdc202xx_dma_lost_irq,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_clear		= pdc202xx_reset,
-	.dma_sff_read_status	= ide_dma_sff_read_status,
-};
-
 static const struct ide_dma_ops pdc2026x_dma_ops = {
 	.dma_host_set		= ide_dma_host_set,
 	.dma_setup		= ide_dma_setup,
 	.dma_start		= pdc202xx_dma_start,
 	.dma_end		= pdc202xx_dma_end,
-	.dma_test_irq		= pdc202xx_dma_test_irq,
-	.dma_lost_irq		= pdc202xx_dma_lost_irq,
+	.dma_test_irq		= ide_dma_test_irq,
+	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_clear		= pdc202xx_reset,
 	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
@@ -364,7 +268,7 @@
 		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_pdc202xx,
 		.port_ops	= &pdc20246_port_ops,
-		.dma_ops	= &pdc20246_dma_ops,
+		.dma_ops	= &sff_dma_ops,
 		.host_flags	= IDE_HFLAGS_PDC202XX,
 		.pio_mask	= ATA_PIO4,
 		.mwdma_mask	= ATA_MWDMA2,
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index f76e4e6..97642a7a 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -1023,13 +1023,14 @@
  * Setup, register & probe an IDE channel driven by this driver, this is
  * called by one of the 2 probe functions (macio or PCI).
  */
-static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw)
+static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif,
+					   struct ide_hw *hw)
 {
 	struct device_node *np = pmif->node;
 	const int *bidp;
 	struct ide_host *host;
 	ide_hwif_t *hwif;
-	hw_regs_t *hws[] = { hw, NULL, NULL, NULL };
+	struct ide_hw *hws[] = { hw };
 	struct ide_port_info d = pmac_port_info;
 	int rc;
 
@@ -1077,7 +1078,7 @@
 	/* Make sure we have sane timings */
 	sanitize_timings(pmif);
 
-	host = ide_host_alloc(&d, hws);
+	host = ide_host_alloc(&d, hws, 1);
 	if (host == NULL)
 		return -ENOMEM;
 	hwif = host->ports[0];
@@ -1124,7 +1125,7 @@
 	return 0;
 }
 
-static void __devinit pmac_ide_init_ports(hw_regs_t *hw, unsigned long base)
+static void __devinit pmac_ide_init_ports(struct ide_hw *hw, unsigned long base)
 {
 	int i;
 
@@ -1144,7 +1145,7 @@
 	unsigned long regbase;
 	pmac_ide_hwif_t *pmif;
 	int irq, rc;
-	hw_regs_t hw;
+	struct ide_hw hw;
 
 	pmif = kzalloc(sizeof(*pmif), GFP_KERNEL);
 	if (pmif == NULL)
@@ -1268,7 +1269,7 @@
 	void __iomem *base;
 	unsigned long rbase, rlen;
 	int rc;
-	hw_regs_t hw;
+	struct ide_hw hw;
 
 	np = pci_device_to_OF_node(pdev);
 	if (np == NULL) {
diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c
index c793466..ab49a97 100644
--- a/drivers/ide/q40ide.c
+++ b/drivers/ide/q40ide.c
@@ -51,11 +51,11 @@
 /*
  * Addresses are pretranslated for Q40 ISA access.
  */
-static void q40_ide_setup_ports(hw_regs_t *hw, unsigned long base,
+static void q40_ide_setup_ports(struct ide_hw *hw, unsigned long base,
 			ide_ack_intr_t *ack_intr,
 			int irq)
 {
-	memset(hw, 0, sizeof(hw_regs_t));
+	memset(hw, 0, sizeof(*hw));
 	/* BIG FAT WARNING: 
 	   assumption: only DATA port is ever used in 16 bit mode */
 	hw->io_ports.data_addr = Q40_ISA_IO_W(base);
@@ -70,8 +70,6 @@
 
 	hw->irq = irq;
 	hw->ack_intr = ack_intr;
-
-	hw->chipset = ide_generic;
 }
 
 static void q40ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd,
@@ -119,6 +117,7 @@
 	.tp_ops			= &q40ide_tp_ops,
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_generic,
 };
 
 /* 
@@ -136,7 +135,7 @@
 static int __init q40ide_init(void)
 {
     int i;
-    hw_regs_t hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL };
+    struct ide_hw hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL };
 
     if (!MACH_IS_Q40)
       return -ENODEV;
@@ -163,7 +162,7 @@
 	hws[i] = &hw[i];
     }
 
-    return ide_host_add(&q40ide_port_info, hws, NULL);
+    return ide_host_add(&q40ide_port_info, hws, Q40IDE_NUM_HWIFS, NULL);
 }
 
 module_init(q40ide_init);
diff --git a/drivers/ide/rapide.c b/drivers/ide/rapide.c
index d5003ca..00f5424 100644
--- a/drivers/ide/rapide.c
+++ b/drivers/ide/rapide.c
@@ -13,9 +13,10 @@
 
 static const struct ide_port_info rapide_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
+	.chipset		= ide_generic,
 };
 
-static void rapide_setup_ports(hw_regs_t *hw, void __iomem *base,
+static void rapide_setup_ports(struct ide_hw *hw, void __iomem *base,
 			       void __iomem *ctrl, unsigned int sz, int irq)
 {
 	unsigned long port = (unsigned long)base;
@@ -35,7 +36,7 @@
 	void __iomem *base;
 	struct ide_host *host;
 	int ret;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	ret = ecard_request_resources(ec);
 	if (ret)
@@ -49,10 +50,9 @@
 
 	memset(&hw, 0, sizeof(hw));
 	rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq);
-	hw.chipset = ide_generic;
 	hw.dev = &ec->dev;
 
-	ret = ide_host_add(&rapide_port_info, hws, &host);
+	ret = ide_host_add(&rapide_port_info, hws, 1, &host);
 	if (ret)
 		goto release;
 
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 5be41f2..1104bb3 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -559,7 +559,7 @@
 {
 	struct scc_ports *ports = pci_get_drvdata(dev);
 	struct ide_host *host;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw };
 	int i, rc;
 
 	memset(&hw, 0, sizeof(hw));
@@ -567,9 +567,8 @@
 		hw.io_ports_array[i] = ports->dma + 0x20 + i * 4;
 	hw.irq = dev->irq;
 	hw.dev = &dev->dev;
-	hw.chipset = ide_pci;
 
-	rc = ide_host_add(d, hws, &host);
+	rc = ide_host_add(d, hws, 1, &host);
 	if (rc)
 		return rc;
 
@@ -823,6 +822,7 @@
 	.host_flags	= IDE_HFLAG_SINGLE,
 	.irq_flags	= IRQF_SHARED,
 	.pio_mask	= ATA_PIO4,
+	.chipset	= ide_pci,
 };
 
 /**
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index 7a3a12d..ab3db61 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -1,7 +1,7 @@
 /*
  *  Copyright (C) 1998-2000  Andre Hedrick <andre@linux-ide.org>
  *  Copyright (C) 1995-1998  Mark Lord
- *  Copyright (C)      2007  Bartlomiej Zolnierkiewicz
+ *  Copyright (C) 2007-2009  Bartlomiej Zolnierkiewicz
  *
  *  May be copied or modified under the terms of the GNU General Public License
  */
@@ -301,11 +301,11 @@
 }
 
 /**
- *	ide_hw_configure	-	configure a hw_regs_t instance
+ *	ide_hw_configure	-	configure a struct ide_hw instance
  *	@dev: PCI device holding interface
  *	@d: IDE port info
  *	@port: port number
- *	@hw: hw_regs_t instance corresponding to this port
+ *	@hw: struct ide_hw instance corresponding to this port
  *
  *	Perform the initial set up for the hardware interface structure. This
  *	is done per interface port rather than per PCI device. There may be
@@ -315,7 +315,7 @@
  */
 
 static int ide_hw_configure(struct pci_dev *dev, const struct ide_port_info *d,
-			    unsigned int port, hw_regs_t *hw)
+			    unsigned int port, struct ide_hw *hw)
 {
 	unsigned long ctl = 0, base = 0;
 
@@ -344,7 +344,6 @@
 
 	memset(hw, 0, sizeof(*hw));
 	hw->dev = &dev->dev;
-	hw->chipset = d->chipset ? d->chipset : ide_pci;
 	ide_std_init_ports(hw, base, ctl | 2);
 
 	return 0;
@@ -446,8 +445,8 @@
  *	ide_pci_setup_ports	-	configure ports/devices on PCI IDE
  *	@dev: PCI device
  *	@d: IDE port info
- *	@hw: hw_regs_t instances corresponding to this PCI IDE device
- *	@hws: hw_regs_t pointers table to update
+ *	@hw: struct ide_hw instances corresponding to this PCI IDE device
+ *	@hws: struct ide_hw pointers table to update
  *
  *	Scan the interfaces attached to this device and do any
  *	necessary per port setup. Attach the devices and ask the
@@ -459,7 +458,7 @@
  */
 
 void ide_pci_setup_ports(struct pci_dev *dev, const struct ide_port_info *d,
-			 hw_regs_t *hw, hw_regs_t **hws)
+			 struct ide_hw *hw, struct ide_hw **hws)
 {
 	int channels = (d->host_flags & IDE_HFLAG_SINGLE) ? 1 : 2, port;
 	u8 tmp;
@@ -535,61 +534,15 @@
 	return ret;
 }
 
-int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
-		     void *priv)
-{
-	struct ide_host *host;
-	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
-	int ret;
-
-	ret = ide_setup_pci_controller(dev, d, 1);
-	if (ret < 0)
-		goto out;
-
-	ide_pci_setup_ports(dev, d, &hw[0], &hws[0]);
-
-	host = ide_host_alloc(d, hws);
-	if (host == NULL) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	host->dev[0] = &dev->dev;
-
-	host->host_priv = priv;
-
-	host->irq_flags = IRQF_SHARED;
-
-	pci_set_drvdata(dev, host);
-
-	ret = do_ide_setup_pci_device(dev, d, 1);
-	if (ret < 0)
-		goto out;
-
-	/* fixup IRQ */
-	if (ide_pci_is_in_compatibility_mode(dev)) {
-		hw[0].irq = pci_get_legacy_ide_irq(dev, 0);
-		hw[1].irq = pci_get_legacy_ide_irq(dev, 1);
-	} else
-		hw[1].irq = hw[0].irq = ret;
-
-	ret = ide_host_register(host, d, hws);
-	if (ret)
-		ide_host_free(host);
-out:
-	return ret;
-}
-EXPORT_SYMBOL_GPL(ide_pci_init_one);
-
 int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
 		     const struct ide_port_info *d, void *priv)
 {
 	struct pci_dev *pdev[] = { dev1, dev2 };
 	struct ide_host *host;
-	int ret, i;
-	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
+	int ret, i, n_ports = dev2 ? 4 : 2;
+	struct ide_hw hw[4], *hws[] = { NULL, NULL, NULL, NULL };
 
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < n_ports / 2; i++) {
 		ret = ide_setup_pci_controller(pdev[i], d, !i);
 		if (ret < 0)
 			goto out;
@@ -597,23 +550,24 @@
 		ide_pci_setup_ports(pdev[i], d, &hw[i*2], &hws[i*2]);
 	}
 
-	host = ide_host_alloc(d, hws);
+	host = ide_host_alloc(d, hws, n_ports);
 	if (host == NULL) {
 		ret = -ENOMEM;
 		goto out;
 	}
 
 	host->dev[0] = &dev1->dev;
-	host->dev[1] = &dev2->dev;
+	if (dev2)
+		host->dev[1] = &dev2->dev;
 
 	host->host_priv = priv;
-
 	host->irq_flags = IRQF_SHARED;
 
 	pci_set_drvdata(pdev[0], host);
-	pci_set_drvdata(pdev[1], host);
+	if (dev2)
+		pci_set_drvdata(pdev[1], host);
 
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < n_ports / 2; i++) {
 		ret = do_ide_setup_pci_device(pdev[i], d, !i);
 
 		/*
@@ -639,6 +593,13 @@
 }
 EXPORT_SYMBOL_GPL(ide_pci_init_two);
 
+int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
+		     void *priv)
+{
+	return ide_pci_init_two(dev, NULL, d, priv);
+}
+EXPORT_SYMBOL_GPL(ide_pci_init_one);
+
 void ide_pci_remove(struct pci_dev *dev)
 {
 	struct ide_host *host = pci_get_drvdata(dev);
diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c
index e5d2a48..5f37f16 100644
--- a/drivers/ide/sgiioc4.c
+++ b/drivers/ide/sgiioc4.c
@@ -91,7 +91,7 @@
 
 
 static void
-sgiioc4_init_hwif_ports(hw_regs_t * hw, unsigned long data_port,
+sgiioc4_init_hwif_ports(struct ide_hw *hw, unsigned long data_port,
 			unsigned long ctrl_port, unsigned long irq_port)
 {
 	unsigned long reg = data_port;
@@ -546,7 +546,7 @@
 	unsigned long cmd_base, irqport;
 	unsigned long bar0, cmd_phys_base, ctl;
 	void __iomem *virt_base;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw };
 	int rc;
 
 	/*  Get the CmdBlk and CtrlBlk Base Registers */
@@ -575,13 +575,12 @@
 	memset(&hw, 0, sizeof(hw));
 	sgiioc4_init_hwif_ports(&hw, cmd_base, ctl, irqport);
 	hw.irq = dev->irq;
-	hw.chipset = ide_pci;
 	hw.dev = &dev->dev;
 
 	/* Initializing chipset IRQ Registers */
 	writel(0x03, (void __iomem *)(irqport + IOC4_INTR_SET * 4));
 
-	rc = ide_host_add(&sgiioc4_port_info, hws, NULL);
+	rc = ide_host_add(&sgiioc4_port_info, hws, 1, NULL);
 	if (!rc)
 		return 0;
 
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index e4973cd..bd82d22 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -451,8 +451,8 @@
 static void sil_sata_pre_reset(ide_drive_t *drive)
 {
 	if (drive->media == ide_disk) {
-		drive->special.b.set_geometry = 0;
-		drive->special.b.recalibrate = 0;
+		drive->special_flags &=
+			~(IDE_SFLAG_SET_GEOMETRY | IDE_SFLAG_RECALIBRATE);
 	}
 }
 
diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c
index b0a4606..0924abf 100644
--- a/drivers/ide/sl82c105.c
+++ b/drivers/ide/sl82c105.c
@@ -10,7 +10,7 @@
  * with the timing registers setup.
  *  -- Benjamin Herrenschmidt (01/11/03) benh@kernel.crashing.org
  *
- * Copyright (C) 2006-2007 MontaVista Software, Inc. <source@mvista.com>
+ * Copyright (C) 2006-2007,2009 MontaVista Software, Inc. <source@mvista.com>
  * Copyright (C)      2007 Bartlomiej Zolnierkiewicz
  */
 
@@ -146,14 +146,15 @@
 	u32 val, mask		= hwif->channel ? CTRL_IDE_IRQB : CTRL_IDE_IRQA;
 	u8 dma_cmd;
 
-	printk("sl82c105: lost IRQ, resetting host\n");
+	printk(KERN_WARNING "sl82c105: lost IRQ, resetting host\n");
 
 	/*
 	 * Check the raw interrupt from the drive.
 	 */
 	pci_read_config_dword(dev, 0x40, &val);
 	if (val & mask)
-		printk("sl82c105: drive was requesting IRQ, but host lost it\n");
+		printk(KERN_INFO "sl82c105: drive was requesting IRQ, "
+		       "but host lost it\n");
 
 	/*
 	 * Was DMA enabled?  If so, disable it - we're resetting the
@@ -162,7 +163,7 @@
 	dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD);
 	if (dma_cmd & 1) {
 		outb(dma_cmd & ~1, hwif->dma_base + ATA_DMA_CMD);
-		printk("sl82c105: DMA was enabled\n");
+		printk(KERN_INFO "sl82c105: DMA was enabled\n");
 	}
 
 	sl82c105_reset_host(dev);
diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c
index b4cf42d..05a93d6b 100644
--- a/drivers/ide/tc86c001.c
+++ b/drivers/ide/tc86c001.c
@@ -112,7 +112,7 @@
 	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long sc_base	= hwif->config_data;
 	unsigned long twcr_port	= sc_base + (drive->dn ? 0x06 : 0x04);
-	unsigned long nsectors	= hwif->rq->nr_sectors;
+	unsigned long nsectors	= blk_rq_sectors(hwif->rq);
 
 	/*
 	 * We have to manually load the sector count and size into
diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c
index e33d764..ea89fdd 100644
--- a/drivers/ide/tx4938ide.c
+++ b/drivers/ide/tx4938ide.c
@@ -130,8 +130,7 @@
 
 static int __init tx4938ide_probe(struct platform_device *pdev)
 {
-	hw_regs_t hw;
-	hw_regs_t *hws[] = { &hw, NULL, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw };
 	struct ide_host *host;
 	struct resource *res;
 	struct tx4938ide_platform_info *pdata = pdev->dev.platform_data;
@@ -183,7 +182,7 @@
 		tx4938ide_tune_ebusc(pdata->ebus_ch, pdata->gbus_clock, 0);
 	else
 		d.port_ops = NULL;
-	ret = ide_host_add(&d, hws, &host);
+	ret = ide_host_add(&d, hws, 1, &host);
 	if (!ret)
 		platform_set_drvdata(pdev, host);
 	return ret;
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 564422d..64b58ec 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -307,7 +307,7 @@
 	tx4939ide_writew(SECTOR_SIZE / 2, base, drive->dn ?
 			 TX4939IDE_Xfer_Cnt_2 : TX4939IDE_Xfer_Cnt_1);
 
-	tx4939ide_writew(cmd->rq->nr_sectors, base, TX4939IDE_Sec_Cnt);
+	tx4939ide_writew(blk_rq_sectors(cmd->rq), base, TX4939IDE_Sec_Cnt);
 
 	return 0;
 }
@@ -537,8 +537,7 @@
 
 static int __init tx4939ide_probe(struct platform_device *pdev)
 {
-	hw_regs_t hw;
-	hw_regs_t *hws[] = { &hw, NULL, NULL, NULL };
+	struct ide_hw hw, *hws[] = { &hw };
 	struct ide_host *host;
 	struct resource *res;
 	int irq, ret;
@@ -581,7 +580,7 @@
 	hw.dev = &pdev->dev;
 
 	pr_info("TX4939 IDE interface (base %#lx, irq %d)\n", mapbase, irq);
-	host = ide_host_alloc(&tx4939ide_port_info, hws);
+	host = ide_host_alloc(&tx4939ide_port_info, hws, 1);
 	if (!host)
 		return -ENOMEM;
 	/* use extra_base for base address of the all registers */
diff --git a/drivers/ieee1394/dv1394.c b/drivers/ieee1394/dv1394.c
index 823a629..2cd00b5 100644
--- a/drivers/ieee1394/dv1394.c
+++ b/drivers/ieee1394/dv1394.c
@@ -1789,12 +1789,13 @@
 	} else {
 		/* look up the card by ID */
 		unsigned long flags;
+		int idx = ieee1394_file_to_instance(file);
 
 		spin_lock_irqsave(&dv1394_cards_lock, flags);
 		if (!list_empty(&dv1394_cards)) {
 			struct video_card *p;
 			list_for_each_entry(p, &dv1394_cards, list) {
-				if ((p->id) == ieee1394_file_to_instance(file)) {
+				if ((p->id) == idx) {
 					video = p;
 					break;
 				}
@@ -1803,7 +1804,7 @@
 		spin_unlock_irqrestore(&dv1394_cards_lock, flags);
 
 		if (!video) {
-			debug_printk("dv1394: OHCI card %d not found", ieee1394_file_to_instance(file));
+			debug_printk("dv1394: OHCI card %d not found", idx);
 			return -ENODEV;
 		}
 
diff --git a/drivers/ieee1394/ieee1394_core.h b/drivers/ieee1394/ieee1394_core.h
index 21d50f7..28b9f58 100644
--- a/drivers/ieee1394/ieee1394_core.h
+++ b/drivers/ieee1394/ieee1394_core.h
@@ -5,6 +5,7 @@
 #include <linux/fs.h>
 #include <linux/list.h>
 #include <linux/types.h>
+#include <linux/cdev.h>
 #include <asm/atomic.h>
 
 #include "hosts.h"
@@ -155,7 +156,10 @@
  */
 static inline unsigned char ieee1394_file_to_instance(struct file *file)
 {
-	return file->f_path.dentry->d_inode->i_cindex;
+	int idx = cdev_index(file->f_path.dentry->d_inode);
+	if (idx < 0)
+		idx = 0;
+	return idx;
 }
 
 extern int hpsb_disable_irm;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 75223f5..0ba6ec8 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -257,11 +257,8 @@
 {
 	struct iscsi_iser_task *iser_task = task->dd_data;
 
-	/*
-	 * mgmt tasks do not need special cleanup and we do not
-	 * allocate anything in the init task callout
-	 */
-	if (!task->sc || task->state == ISCSI_TASK_PENDING)
+	/* mgmt tasks do not need special cleanup */
+	if (!task->sc)
 		return;
 
 	if (iser_task->status == ISER_TASK_STATUS_STARTED) {
@@ -517,7 +514,8 @@
 }
 
 static struct iscsi_endpoint *
-iscsi_iser_ep_connect(struct sockaddr *dst_addr, int non_blocking)
+iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
+		      int non_blocking)
 {
 	int err;
 	struct iser_conn *ib_conn;
diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig
index da3c3a5..c4b3fbd 100644
--- a/drivers/input/serio/Kconfig
+++ b/drivers/input/serio/Kconfig
@@ -192,7 +192,7 @@
 
 config SERIO_XILINX_XPS_PS2
 	tristate "Xilinx XPS PS/2 Controller Support"
-	depends on PPC
+	depends on PPC || MICROBLAZE
 	help
 	  This driver supports XPS PS/2 IP from the Xilinx EDK on
 	  PowerPC platform.
diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig
index a3d3cba..0aaa059 100644
--- a/drivers/lguest/Kconfig
+++ b/drivers/lguest/Kconfig
@@ -1,6 +1,6 @@
 config LGUEST
 	tristate "Linux hypervisor example code"
-	depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX
+	depends on X86_32 && EXPERIMENTAL && EVENTFD
 	select HVC_DRIVER
 	---help---
 	  This is a very simple module which allows you to run
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 4845fb3..a6974e9 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -95,7 +95,7 @@
 	 * array of struct pages.  It increments that pointer, but we don't
 	 * care. */
 	pagep = switcher_page;
-	err = map_vm_area(switcher_vma, PAGE_KERNEL, &pagep);
+	err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
 	if (err) {
 		printk("lguest: map_vm_area failed: %i\n", err);
 		goto free_vma;
@@ -188,6 +188,9 @@
 {
 	/* We stop running once the Guest is dead. */
 	while (!cpu->lg->dead) {
+		unsigned int irq;
+		bool more;
+
 		/* First we run any hypercalls the Guest wants done. */
 		if (cpu->hcall)
 			do_hypercalls(cpu);
@@ -195,23 +198,23 @@
 		/* It's possible the Guest did a NOTIFY hypercall to the
 		 * Launcher, in which case we return from the read() now. */
 		if (cpu->pending_notify) {
-			if (put_user(cpu->pending_notify, user))
-				return -EFAULT;
-			return sizeof(cpu->pending_notify);
+			if (!send_notify_to_eventfd(cpu)) {
+				if (put_user(cpu->pending_notify, user))
+					return -EFAULT;
+				return sizeof(cpu->pending_notify);
+			}
 		}
 
 		/* Check for signals */
 		if (signal_pending(current))
 			return -ERESTARTSYS;
 
-		/* If Waker set break_out, return to Launcher. */
-		if (cpu->break_out)
-			return -EAGAIN;
-
 		/* Check if there are any interrupts which can be delivered now:
 		 * if so, this sets up the hander to be executed when we next
 		 * run the Guest. */
-		maybe_do_interrupt(cpu);
+		irq = interrupt_pending(cpu, &more);
+		if (irq < LGUEST_IRQS)
+			try_deliver_interrupt(cpu, irq, more);
 
 		/* All long-lived kernel loops need to check with this horrible
 		 * thing called the freezer.  If the Host is trying to suspend,
@@ -224,10 +227,15 @@
 			break;
 
 		/* If the Guest asked to be stopped, we sleep.  The Guest's
-		 * clock timer or LHREQ_BREAK from the Waker will wake us. */
+		 * clock timer will wake us. */
 		if (cpu->halted) {
 			set_current_state(TASK_INTERRUPTIBLE);
-			schedule();
+			/* Just before we sleep, make sure no interrupt snuck in
+			 * which we should be doing. */
+			if (interrupt_pending(cpu, &more) < LGUEST_IRQS)
+				set_current_state(TASK_RUNNING);
+			else
+				schedule();
 			continue;
 		}
 
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index 54d66f0..c29ffa1 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -37,6 +37,10 @@
 		/* This call does nothing, except by breaking out of the Guest
 		 * it makes us process all the asynchronous hypercalls. */
 		break;
+	case LHCALL_SEND_INTERRUPTS:
+		/* This call does nothing too, but by breaking out of the Guest
+		 * it makes us process any pending interrupts. */
+		break;
 	case LHCALL_LGUEST_INIT:
 		/* You can't get here unless you're already initialized.  Don't
 		 * do that. */
@@ -73,11 +77,21 @@
 		guest_set_stack(cpu, args->arg1, args->arg2, args->arg3);
 		break;
 	case LHCALL_SET_PTE:
+#ifdef CONFIG_X86_PAE
+		guest_set_pte(cpu, args->arg1, args->arg2,
+				__pte(args->arg3 | (u64)args->arg4 << 32));
+#else
 		guest_set_pte(cpu, args->arg1, args->arg2, __pte(args->arg3));
+#endif
 		break;
+	case LHCALL_SET_PGD:
+		guest_set_pgd(cpu->lg, args->arg1, args->arg2);
+		break;
+#ifdef CONFIG_X86_PAE
 	case LHCALL_SET_PMD:
 		guest_set_pmd(cpu->lg, args->arg1, args->arg2);
 		break;
+#endif
 	case LHCALL_SET_CLOCKEVENT:
 		guest_set_clockevent(cpu, args->arg1);
 		break;
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 6e99adb..0e9067b 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -128,30 +128,39 @@
 /*H:205
  * Virtual Interrupts.
  *
- * maybe_do_interrupt() gets called before every entry to the Guest, to see if
- * we should divert the Guest to running an interrupt handler. */
-void maybe_do_interrupt(struct lg_cpu *cpu)
+ * interrupt_pending() returns the first pending interrupt which isn't blocked
+ * by the Guest.  It is called before every entry to the Guest, and just before
+ * we go to sleep when the Guest has halted itself. */
+unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more)
 {
 	unsigned int irq;
 	DECLARE_BITMAP(blk, LGUEST_IRQS);
-	struct desc_struct *idt;
 
 	/* If the Guest hasn't even initialized yet, we can do nothing. */
 	if (!cpu->lg->lguest_data)
-		return;
+		return LGUEST_IRQS;
 
 	/* Take our "irqs_pending" array and remove any interrupts the Guest
 	 * wants blocked: the result ends up in "blk". */
 	if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts,
 			   sizeof(blk)))
-		return;
+		return LGUEST_IRQS;
 	bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS);
 
 	/* Find the first interrupt. */
 	irq = find_first_bit(blk, LGUEST_IRQS);
-	/* None?  Nothing to do */
-	if (irq >= LGUEST_IRQS)
-		return;
+	*more = find_next_bit(blk, LGUEST_IRQS, irq+1);
+
+	return irq;
+}
+
+/* This actually diverts the Guest to running an interrupt handler, once an
+ * interrupt has been identified by interrupt_pending(). */
+void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
+{
+	struct desc_struct *idt;
+
+	BUG_ON(irq >= LGUEST_IRQS);
 
 	/* They may be in the middle of an iret, where they asked us never to
 	 * deliver interrupts. */
@@ -170,8 +179,12 @@
 		u32 irq_enabled;
 		if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled))
 			irq_enabled = 0;
-		if (!irq_enabled)
+		if (!irq_enabled) {
+			/* Make sure they know an IRQ is pending. */
+			put_user(X86_EFLAGS_IF,
+				 &cpu->lg->lguest_data->irq_pending);
 			return;
+		}
 	}
 
 	/* Look at the IDT entry the Guest gave us for this interrupt.  The
@@ -194,6 +207,25 @@
 	 * here is a compromise which means at least it gets updated every
 	 * timer interrupt. */
 	write_timestamp(cpu);
+
+	/* If there are no other interrupts we want to deliver, clear
+	 * the pending flag. */
+	if (!more)
+		put_user(0, &cpu->lg->lguest_data->irq_pending);
+}
+
+/* And this is the routine when we want to set an interrupt for the Guest. */
+void set_interrupt(struct lg_cpu *cpu, unsigned int irq)
+{
+	/* Next time the Guest runs, the core code will see if it can deliver
+	 * this interrupt. */
+	set_bit(irq, cpu->irqs_pending);
+
+	/* Make sure it sees it; it might be asleep (eg. halted), or
+	 * running the Guest right now, in which case kick_process()
+	 * will knock it out. */
+	if (!wake_up_process(cpu->tsk))
+		kick_process(cpu->tsk);
 }
 /*:*/
 
@@ -510,10 +542,7 @@
 	struct lg_cpu *cpu = container_of(timer, struct lg_cpu, hrt);
 
 	/* Remember the first interrupt is the timer interrupt. */
-	set_bit(0, cpu->irqs_pending);
-	/* If the Guest is actually stopped, we need to wake it up. */
-	if (cpu->halted)
-		wake_up_process(cpu->tsk);
+	set_interrupt(cpu, 0);
 	return HRTIMER_NORESTART;
 }
 
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index af92a17..d4e8979 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -49,7 +49,7 @@
 	u32 cr2;
 	int ts;
 	u32 esp1;
-	u8 ss1;
+	u16 ss1;
 
 	/* Bitmap of what has changed: see CHANGED_* above. */
 	int changed;
@@ -71,9 +71,7 @@
 	/* Virtual clock device */
 	struct hrtimer hrt;
 
-	/* Do we need to stop what we're doing and return to userspace? */
-	int break_out;
-	wait_queue_head_t break_wq;
+	/* Did the Guest tell us to halt? */
 	int halted;
 
 	/* Pending virtual interrupts */
@@ -82,6 +80,16 @@
 	struct lg_cpu_arch arch;
 };
 
+struct lg_eventfd {
+	unsigned long addr;
+	struct file *event;
+};
+
+struct lg_eventfd_map {
+	unsigned int num;
+	struct lg_eventfd map[];
+};
+
 /* The private info the thread maintains about the guest. */
 struct lguest
 {
@@ -102,6 +110,8 @@
 	unsigned int stack_pages;
 	u32 tsc_khz;
 
+	struct lg_eventfd_map *eventfds;
+
 	/* Dead? */
 	const char *dead;
 };
@@ -137,9 +147,13 @@
  * in the kernel. */
 #define pgd_flags(x)	(pgd_val(x) & ~PAGE_MASK)
 #define pgd_pfn(x)	(pgd_val(x) >> PAGE_SHIFT)
+#define pmd_flags(x)    (pmd_val(x) & ~PAGE_MASK)
+#define pmd_pfn(x)	(pmd_val(x) >> PAGE_SHIFT)
 
 /* interrupts_and_traps.c: */
-void maybe_do_interrupt(struct lg_cpu *cpu);
+unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more);
+void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more);
+void set_interrupt(struct lg_cpu *cpu, unsigned int irq);
 bool deliver_trap(struct lg_cpu *cpu, unsigned int num);
 void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i,
 			  u32 low, u32 hi);
@@ -150,6 +164,7 @@
 void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
 		const unsigned long *def);
 void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta);
+bool send_notify_to_eventfd(struct lg_cpu *cpu);
 void init_clockdev(struct lg_cpu *cpu);
 bool check_syscall_vector(struct lguest *lg);
 int init_interrupts(void);
@@ -168,7 +183,10 @@
 int init_guest_pagetable(struct lguest *lg);
 void free_guest_pagetable(struct lguest *lg);
 void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable);
+void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 i);
+#ifdef CONFIG_X86_PAE
 void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i);
+#endif
 void guest_pagetable_clear_all(struct lg_cpu *cpu);
 void guest_pagetable_flush_user(struct lg_cpu *cpu);
 void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir,
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index df44d96..e082cda 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -228,7 +228,8 @@
  * function. */
 static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 				    unsigned index,
-				    void (*callback)(struct virtqueue *vq))
+				    void (*callback)(struct virtqueue *vq),
+				    const char *name)
 {
 	struct lguest_device *ldev = to_lgdev(vdev);
 	struct lguest_vq_info *lvq;
@@ -263,7 +264,7 @@
 	/* OK, tell virtio_ring.c to set up a virtqueue now we know its size
 	 * and we've got a pointer to its pages. */
 	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN,
-				 vdev, lvq->pages, lg_notify, callback);
+				 vdev, lvq->pages, lg_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto unmap;
@@ -312,6 +313,38 @@
 	kfree(lvq);
 }
 
+static void lg_del_vqs(struct virtio_device *vdev)
+{
+	struct virtqueue *vq, *n;
+
+	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
+		lg_del_vq(vq);
+}
+
+static int lg_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+		       struct virtqueue *vqs[],
+		       vq_callback_t *callbacks[],
+		       const char *names[])
+{
+	struct lguest_device *ldev = to_lgdev(vdev);
+	int i;
+
+	/* We must have this many virtqueues. */
+	if (nvqs > ldev->desc->num_vq)
+		return -ENOENT;
+
+	for (i = 0; i < nvqs; ++i) {
+		vqs[i] = lg_find_vq(vdev, i, callbacks[i], names[i]);
+		if (IS_ERR(vqs[i]))
+			goto error;
+	}
+	return 0;
+
+error:
+	lg_del_vqs(vdev);
+	return PTR_ERR(vqs[i]);
+}
+
 /* The ops structure which hooks everything together. */
 static struct virtio_config_ops lguest_config_ops = {
 	.get_features = lg_get_features,
@@ -321,8 +354,8 @@
 	.get_status = lg_get_status,
 	.set_status = lg_set_status,
 	.reset = lg_reset,
-	.find_vq = lg_find_vq,
-	.del_vq = lg_del_vq,
+	.find_vqs = lg_find_vqs,
+	.del_vqs = lg_del_vqs,
 };
 
 /* The root device for the lguest virtio devices.  This makes them appear as
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index b8ee103..32e2971 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -7,32 +7,83 @@
 #include <linux/miscdevice.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
+#include <linux/eventfd.h>
+#include <linux/file.h>
 #include "lg.h"
 
-/*L:055 When something happens, the Waker process needs a way to stop the
- * kernel running the Guest and return to the Launcher.  So the Waker writes
- * LHREQ_BREAK and the value "1" to /dev/lguest to do this.  Once the Launcher
- * has done whatever needs attention, it writes LHREQ_BREAK and "0" to release
- * the Waker. */
-static int break_guest_out(struct lg_cpu *cpu, const unsigned long __user*input)
+bool send_notify_to_eventfd(struct lg_cpu *cpu)
 {
-	unsigned long on;
+	unsigned int i;
+	struct lg_eventfd_map *map;
 
-	/* Fetch whether they're turning break on or off. */
-	if (get_user(on, input) != 0)
+	/* lg->eventfds is RCU-protected */
+	rcu_read_lock();
+	map = rcu_dereference(cpu->lg->eventfds);
+	for (i = 0; i < map->num; i++) {
+		if (map->map[i].addr == cpu->pending_notify) {
+			eventfd_signal(map->map[i].event, 1);
+			cpu->pending_notify = 0;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return cpu->pending_notify == 0;
+}
+
+static int add_eventfd(struct lguest *lg, unsigned long addr, int fd)
+{
+	struct lg_eventfd_map *new, *old = lg->eventfds;
+
+	if (!addr)
+		return -EINVAL;
+
+	/* Replace the old array with the new one, carefully: others can
+	 * be accessing it at the same time */
+	new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1),
+		      GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+
+	/* First make identical copy. */
+	memcpy(new->map, old->map, sizeof(old->map[0]) * old->num);
+	new->num = old->num;
+
+	/* Now append new entry. */
+	new->map[new->num].addr = addr;
+	new->map[new->num].event = eventfd_fget(fd);
+	if (IS_ERR(new->map[new->num].event)) {
+		kfree(new);
+		return PTR_ERR(new->map[new->num].event);
+	}
+	new->num++;
+
+	/* Now put new one in place. */
+	rcu_assign_pointer(lg->eventfds, new);
+
+	/* We're not in a big hurry.  Wait until noone's looking at old
+	 * version, then delete it. */
+	synchronize_rcu();
+	kfree(old);
+
+	return 0;
+}
+
+static int attach_eventfd(struct lguest *lg, const unsigned long __user *input)
+{
+	unsigned long addr, fd;
+	int err;
+
+	if (get_user(addr, input) != 0)
+		return -EFAULT;
+	input++;
+	if (get_user(fd, input) != 0)
 		return -EFAULT;
 
-	if (on) {
-		cpu->break_out = 1;
-		/* Pop it out of the Guest (may be running on different CPU) */
-		wake_up_process(cpu->tsk);
-		/* Wait for them to reset it */
-		return wait_event_interruptible(cpu->break_wq, !cpu->break_out);
-	} else {
-		cpu->break_out = 0;
-		wake_up(&cpu->break_wq);
-		return 0;
-	}
+	mutex_lock(&lguest_lock);
+	err = add_eventfd(lg, addr, fd);
+	mutex_unlock(&lguest_lock);
+
+	return 0;
 }
 
 /*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt
@@ -45,9 +96,8 @@
 		return -EFAULT;
 	if (irq >= LGUEST_IRQS)
 		return -EINVAL;
-	/* Next time the Guest runs, the core code will see if it can deliver
-	 * this interrupt. */
-	set_bit(irq, cpu->irqs_pending);
+
+	set_interrupt(cpu, irq);
 	return 0;
 }
 
@@ -126,9 +176,6 @@
 	 * address. */
 	lguest_arch_setup_regs(cpu, start_ip);
 
-	/* Initialize the queue for the Waker to wait on */
-	init_waitqueue_head(&cpu->break_wq);
-
 	/* We keep a pointer to the Launcher task (ie. current task) for when
 	 * other Guests want to wake this one (eg. console input). */
 	cpu->tsk = current;
@@ -185,6 +232,13 @@
 		goto unlock;
 	}
 
+	lg->eventfds = kmalloc(sizeof(*lg->eventfds), GFP_KERNEL);
+	if (!lg->eventfds) {
+		err = -ENOMEM;
+		goto free_lg;
+	}
+	lg->eventfds->num = 0;
+
 	/* Populate the easy fields of our "struct lguest" */
 	lg->mem_base = (void __user *)args[0];
 	lg->pfn_limit = args[1];
@@ -192,7 +246,7 @@
 	/* This is the first cpu (cpu 0) and it will start booting at args[2] */
 	err = lg_cpu_start(&lg->cpus[0], 0, args[2]);
 	if (err)
-		goto release_guest;
+		goto free_eventfds;
 
 	/* Initialize the Guest's shadow page tables, using the toplevel
 	 * address the Launcher gave us.  This allocates memory, so can fail. */
@@ -211,7 +265,9 @@
 free_regs:
 	/* FIXME: This should be in free_vcpu */
 	free_page(lg->cpus[0].regs_page);
-release_guest:
+free_eventfds:
+	kfree(lg->eventfds);
+free_lg:
 	kfree(lg);
 unlock:
 	mutex_unlock(&lguest_lock);
@@ -252,11 +308,6 @@
 		/* Once the Guest is dead, you can only read() why it died. */
 		if (lg->dead)
 			return -ENOENT;
-
-		/* If you're not the task which owns the Guest, all you can do
-		 * is break the Launcher out of running the Guest. */
-		if (current != cpu->tsk && req != LHREQ_BREAK)
-			return -EPERM;
 	}
 
 	switch (req) {
@@ -264,8 +315,8 @@
 		return initialize(file, input);
 	case LHREQ_IRQ:
 		return user_send_irq(cpu, input);
-	case LHREQ_BREAK:
-		return break_guest_out(cpu, input);
+	case LHREQ_EVENTFD:
+		return attach_eventfd(lg, input);
 	default:
 		return -EINVAL;
 	}
@@ -303,6 +354,12 @@
 		 * the Launcher's memory management structure. */
 		mmput(lg->cpus[i].mm);
 	}
+
+	/* Release any eventfds they registered. */
+	for (i = 0; i < lg->eventfds->num; i++)
+		fput(lg->eventfds->map[i].event);
+	kfree(lg->eventfds);
+
 	/* If lg->dead doesn't contain an error code it will be NULL or a
 	 * kmalloc()ed string, either of which is ok to hand to kfree(). */
 	if (!IS_ERR(lg->dead))
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index a059cf9..a6fe1ab 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -53,6 +53,17 @@
  * page.  */
 #define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1)
 
+/* For PAE we need the PMD index as well. We use the last 2MB, so we
+ * will need the last pmd entry of the last pmd page.  */
+#ifdef CONFIG_X86_PAE
+#define SWITCHER_PMD_INDEX 	(PTRS_PER_PMD - 1)
+#define RESERVE_MEM 		2U
+#define CHECK_GPGD_MASK		_PAGE_PRESENT
+#else
+#define RESERVE_MEM 		4U
+#define CHECK_GPGD_MASK		_PAGE_TABLE
+#endif
+
 /* We actually need a separate PTE page for each CPU.  Remember that after the
  * Switcher code itself comes two pages for each CPU, and we don't want this
  * CPU's guest to see the pages of any other CPU. */
@@ -73,24 +84,59 @@
 {
 	unsigned int index = pgd_index(vaddr);
 
+#ifndef CONFIG_X86_PAE
 	/* We kill any Guest trying to touch the Switcher addresses. */
 	if (index >= SWITCHER_PGD_INDEX) {
 		kill_guest(cpu, "attempt to access switcher pages");
 		index = 0;
 	}
+#endif
 	/* Return a pointer index'th pgd entry for the i'th page table. */
 	return &cpu->lg->pgdirs[i].pgdir[index];
 }
 
+#ifdef CONFIG_X86_PAE
+/* This routine then takes the PGD entry given above, which contains the
+ * address of the PMD page.  It then returns a pointer to the PMD entry for the
+ * given address. */
+static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
+{
+	unsigned int index = pmd_index(vaddr);
+	pmd_t *page;
+
+	/* We kill any Guest trying to touch the Switcher addresses. */
+	if (pgd_index(vaddr) == SWITCHER_PGD_INDEX &&
+					index >= SWITCHER_PMD_INDEX) {
+		kill_guest(cpu, "attempt to access switcher pages");
+		index = 0;
+	}
+
+	/* You should never call this if the PGD entry wasn't valid */
+	BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
+	page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
+
+	return &page[index];
+}
+#endif
+
 /* This routine then takes the page directory entry returned above, which
  * contains the address of the page table entry (PTE) page.  It then returns a
  * pointer to the PTE entry for the given address. */
-static pte_t *spte_addr(pgd_t spgd, unsigned long vaddr)
+static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
 {
+#ifdef CONFIG_X86_PAE
+	pmd_t *pmd = spmd_addr(cpu, spgd, vaddr);
+	pte_t *page = __va(pmd_pfn(*pmd) << PAGE_SHIFT);
+
+	/* You should never call this if the PMD entry wasn't valid */
+	BUG_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT));
+#else
 	pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
 	/* You should never call this if the PGD entry wasn't valid */
 	BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
-	return &page[(vaddr >> PAGE_SHIFT) % PTRS_PER_PTE];
+#endif
+
+	return &page[pte_index(vaddr)];
 }
 
 /* These two functions just like the above two, except they access the Guest
@@ -101,12 +147,32 @@
 	return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t);
 }
 
-static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr)
+#ifdef CONFIG_X86_PAE
+static unsigned long gpmd_addr(pgd_t gpgd, unsigned long vaddr)
 {
 	unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
 	BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
-	return gpage + ((vaddr>>PAGE_SHIFT) % PTRS_PER_PTE) * sizeof(pte_t);
+	return gpage + pmd_index(vaddr) * sizeof(pmd_t);
 }
+
+static unsigned long gpte_addr(struct lg_cpu *cpu,
+			       pmd_t gpmd, unsigned long vaddr)
+{
+	unsigned long gpage = pmd_pfn(gpmd) << PAGE_SHIFT;
+
+	BUG_ON(!(pmd_flags(gpmd) & _PAGE_PRESENT));
+	return gpage + pte_index(vaddr) * sizeof(pte_t);
+}
+#else
+static unsigned long gpte_addr(struct lg_cpu *cpu,
+				pgd_t gpgd, unsigned long vaddr)
+{
+	unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
+
+	BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
+	return gpage + pte_index(vaddr) * sizeof(pte_t);
+}
+#endif
 /*:*/
 
 /*M:014 get_pfn is slow: we could probably try to grab batches of pages here as
@@ -171,7 +237,7 @@
 	/* Remember that get_user_pages_fast() took a reference to the page, in
 	 * get_pfn()?  We have to put it back now. */
 	if (pte_flags(pte) & _PAGE_PRESENT)
-		put_page(pfn_to_page(pte_pfn(pte)));
+		put_page(pte_page(pte));
 }
 /*:*/
 
@@ -184,11 +250,20 @@
 
 static void check_gpgd(struct lg_cpu *cpu, pgd_t gpgd)
 {
-	if ((pgd_flags(gpgd) & ~_PAGE_TABLE) ||
+	if ((pgd_flags(gpgd) & ~CHECK_GPGD_MASK) ||
 	   (pgd_pfn(gpgd) >= cpu->lg->pfn_limit))
 		kill_guest(cpu, "bad page directory entry");
 }
 
+#ifdef CONFIG_X86_PAE
+static void check_gpmd(struct lg_cpu *cpu, pmd_t gpmd)
+{
+	if ((pmd_flags(gpmd) & ~_PAGE_TABLE) ||
+	   (pmd_pfn(gpmd) >= cpu->lg->pfn_limit))
+		kill_guest(cpu, "bad page middle directory entry");
+}
+#endif
+
 /*H:330
  * (i) Looking up a page table entry when the Guest faults.
  *
@@ -207,6 +282,11 @@
 	pte_t gpte;
 	pte_t *spte;
 
+#ifdef CONFIG_X86_PAE
+	pmd_t *spmd;
+	pmd_t gpmd;
+#endif
+
 	/* First step: get the top-level Guest page table entry. */
 	gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
 	/* Toplevel not present?  We can't map it in. */
@@ -228,12 +308,45 @@
 		check_gpgd(cpu, gpgd);
 		/* And we copy the flags to the shadow PGD entry.  The page
 		 * number in the shadow PGD is the page we just allocated. */
-		*spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd));
+		set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd)));
+	}
+
+#ifdef CONFIG_X86_PAE
+	gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
+	/* middle level not present?  We can't map it in. */
+	if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
+		return false;
+
+	/* Now look at the matching shadow entry. */
+	spmd = spmd_addr(cpu, *spgd, vaddr);
+
+	if (!(pmd_flags(*spmd) & _PAGE_PRESENT)) {
+		/* No shadow entry: allocate a new shadow PTE page. */
+		unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
+
+		/* This is not really the Guest's fault, but killing it is
+		* simple for this corner case. */
+		if (!ptepage) {
+			kill_guest(cpu, "out of memory allocating pte page");
+			return false;
+		}
+
+		/* We check that the Guest pmd is OK. */
+		check_gpmd(cpu, gpmd);
+
+		/* And we copy the flags to the shadow PMD entry.  The page
+		 * number in the shadow PMD is the page we just allocated. */
+		native_set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
 	}
 
 	/* OK, now we look at the lower level in the Guest page table: keep its
 	 * address, because we might update it later. */
-	gpte_ptr = gpte_addr(gpgd, vaddr);
+	gpte_ptr = gpte_addr(cpu, gpmd, vaddr);
+#else
+	/* OK, now we look at the lower level in the Guest page table: keep its
+	 * address, because we might update it later. */
+	gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
+#endif
 	gpte = lgread(cpu, gpte_ptr, pte_t);
 
 	/* If this page isn't in the Guest page tables, we can't page it in. */
@@ -259,7 +372,7 @@
 		gpte = pte_mkdirty(gpte);
 
 	/* Get the pointer to the shadow PTE entry we're going to set. */
-	spte = spte_addr(*spgd, vaddr);
+	spte = spte_addr(cpu, *spgd, vaddr);
 	/* If there was a valid shadow PTE entry here before, we release it.
 	 * This can happen with a write to a previously read-only entry. */
 	release_pte(*spte);
@@ -273,7 +386,7 @@
 		 * table entry, even if the Guest says it's writable.  That way
 		 * we will come back here when a write does actually occur, so
 		 * we can update the Guest's _PAGE_DIRTY flag. */
-		*spte = gpte_to_spte(cpu, pte_wrprotect(gpte), 0);
+		native_set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));
 
 	/* Finally, we write the Guest PTE entry back: we've set the
 	 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */
@@ -301,14 +414,23 @@
 	pgd_t *spgd;
 	unsigned long flags;
 
+#ifdef CONFIG_X86_PAE
+	pmd_t *spmd;
+#endif
 	/* Look at the current top level entry: is it present? */
 	spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
 	if (!(pgd_flags(*spgd) & _PAGE_PRESENT))
 		return false;
 
+#ifdef CONFIG_X86_PAE
+	spmd = spmd_addr(cpu, *spgd, vaddr);
+	if (!(pmd_flags(*spmd) & _PAGE_PRESENT))
+		return false;
+#endif
+
 	/* Check the flags on the pte entry itself: it must be present and
 	 * writable. */
-	flags = pte_flags(*(spte_addr(*spgd, vaddr)));
+	flags = pte_flags(*(spte_addr(cpu, *spgd, vaddr)));
 
 	return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
 }
@@ -322,8 +444,43 @@
 		kill_guest(cpu, "bad stack page %#lx", vaddr);
 }
 
+#ifdef CONFIG_X86_PAE
+static void release_pmd(pmd_t *spmd)
+{
+	/* If the entry's not present, there's nothing to release. */
+	if (pmd_flags(*spmd) & _PAGE_PRESENT) {
+		unsigned int i;
+		pte_t *ptepage = __va(pmd_pfn(*spmd) << PAGE_SHIFT);
+		/* For each entry in the page, we might need to release it. */
+		for (i = 0; i < PTRS_PER_PTE; i++)
+			release_pte(ptepage[i]);
+		/* Now we can free the page of PTEs */
+		free_page((long)ptepage);
+		/* And zero out the PMD entry so we never release it twice. */
+		native_set_pmd(spmd, __pmd(0));
+	}
+}
+
+static void release_pgd(pgd_t *spgd)
+{
+	/* If the entry's not present, there's nothing to release. */
+	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
+		unsigned int i;
+		pmd_t *pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
+
+		for (i = 0; i < PTRS_PER_PMD; i++)
+			release_pmd(&pmdpage[i]);
+
+		/* Now we can free the page of PMDs */
+		free_page((long)pmdpage);
+		/* And zero out the PGD entry so we never release it twice. */
+		set_pgd(spgd, __pgd(0));
+	}
+}
+
+#else /* !CONFIG_X86_PAE */
 /*H:450 If we chase down the release_pgd() code, it looks like this: */
-static void release_pgd(struct lguest *lg, pgd_t *spgd)
+static void release_pgd(pgd_t *spgd)
 {
 	/* If the entry's not present, there's nothing to release. */
 	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
@@ -341,7 +498,7 @@
 		*spgd = __pgd(0);
 	}
 }
-
+#endif
 /*H:445 We saw flush_user_mappings() twice: once from the flush_user_mappings()
  * hypercall and once in new_pgdir() when we re-used a top-level pgdir page.
  * It simply releases every PTE page from 0 up to the Guest's kernel address. */
@@ -350,7 +507,7 @@
 	unsigned int i;
 	/* Release every pgd entry up to the kernel's address. */
 	for (i = 0; i < pgd_index(lg->kernel_address); i++)
-		release_pgd(lg, lg->pgdirs[idx].pgdir + i);
+		release_pgd(lg->pgdirs[idx].pgdir + i);
 }
 
 /*H:440 (v) Flushing (throwing away) page tables,
@@ -369,7 +526,9 @@
 {
 	pgd_t gpgd;
 	pte_t gpte;
-
+#ifdef CONFIG_X86_PAE
+	pmd_t gpmd;
+#endif
 	/* First step: get the top-level Guest page table entry. */
 	gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
 	/* Toplevel not present?  We can't map it in. */
@@ -378,7 +537,14 @@
 		return -1UL;
 	}
 
-	gpte = lgread(cpu, gpte_addr(gpgd, vaddr), pte_t);
+#ifdef CONFIG_X86_PAE
+	gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
+	if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
+		kill_guest(cpu, "Bad address %#lx", vaddr);
+	gpte = lgread(cpu, gpte_addr(cpu, gpmd, vaddr), pte_t);
+#else
+	gpte = lgread(cpu, gpte_addr(cpu, gpgd, vaddr), pte_t);
+#endif
 	if (!(pte_flags(gpte) & _PAGE_PRESENT))
 		kill_guest(cpu, "Bad address %#lx", vaddr);
 
@@ -405,6 +571,9 @@
 			      int *blank_pgdir)
 {
 	unsigned int next;
+#ifdef CONFIG_X86_PAE
+	pmd_t *pmd_table;
+#endif
 
 	/* We pick one entry at random to throw out.  Choosing the Least
 	 * Recently Used might be better, but this is easy. */
@@ -416,10 +585,27 @@
 		/* If the allocation fails, just keep using the one we have */
 		if (!cpu->lg->pgdirs[next].pgdir)
 			next = cpu->cpu_pgd;
-		else
-			/* This is a blank page, so there are no kernel
-			 * mappings: caller must map the stack! */
+		else {
+#ifdef CONFIG_X86_PAE
+			/* In PAE mode, allocate a pmd page and populate the
+			 * last pgd entry. */
+			pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+			if (!pmd_table) {
+				free_page((long)cpu->lg->pgdirs[next].pgdir);
+				set_pgd(cpu->lg->pgdirs[next].pgdir, __pgd(0));
+				next = cpu->cpu_pgd;
+			} else {
+				set_pgd(cpu->lg->pgdirs[next].pgdir +
+					SWITCHER_PGD_INDEX,
+					__pgd(__pa(pmd_table) | _PAGE_PRESENT));
+				/* This is a blank page, so there are no kernel
+				 * mappings: caller must map the stack! */
+				*blank_pgdir = 1;
+			}
+#else
 			*blank_pgdir = 1;
+#endif
+		}
 	}
 	/* Record which Guest toplevel this shadows. */
 	cpu->lg->pgdirs[next].gpgdir = gpgdir;
@@ -431,7 +617,7 @@
 
 /*H:430 (iv) Switching page tables
  *
- * Now we've seen all the page table setting and manipulation, let's see what
+ * Now we've seen all the page table setting and manipulation, let's see
  * what happens when the Guest changes page tables (ie. changes the top-level
  * pgdir).  This occurs on almost every context switch. */
 void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
@@ -460,10 +646,25 @@
 
 	/* Every shadow pagetable this Guest has */
 	for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
-		if (lg->pgdirs[i].pgdir)
+		if (lg->pgdirs[i].pgdir) {
+#ifdef CONFIG_X86_PAE
+			pgd_t *spgd;
+			pmd_t *pmdpage;
+			unsigned int k;
+
+			/* Get the last pmd page. */
+			spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX;
+			pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
+
+			/* And release the pmd entries of that pmd page,
+			 * except for the switcher pmd. */
+			for (k = 0; k < SWITCHER_PMD_INDEX; k++)
+				release_pmd(&pmdpage[k]);
+#endif
 			/* Every PGD entry except the Switcher at the top */
 			for (j = 0; j < SWITCHER_PGD_INDEX; j++)
-				release_pgd(lg, lg->pgdirs[i].pgdir + j);
+				release_pgd(lg->pgdirs[i].pgdir + j);
+		}
 }
 
 /* We also throw away everything when a Guest tells us it's changed a kernel
@@ -504,24 +705,37 @@
 {
 	/* Look up the matching shadow page directory entry. */
 	pgd_t *spgd = spgd_addr(cpu, idx, vaddr);
+#ifdef CONFIG_X86_PAE
+	pmd_t *spmd;
+#endif
 
 	/* If the top level isn't present, there's no entry to update. */
 	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
-		/* Otherwise, we start by releasing the existing entry. */
-		pte_t *spte = spte_addr(*spgd, vaddr);
-		release_pte(*spte);
+#ifdef CONFIG_X86_PAE
+		spmd = spmd_addr(cpu, *spgd, vaddr);
+		if (pmd_flags(*spmd) & _PAGE_PRESENT) {
+#endif
+			/* Otherwise, we start by releasing
+			 * the existing entry. */
+			pte_t *spte = spte_addr(cpu, *spgd, vaddr);
+			release_pte(*spte);
 
-		/* If they're setting this entry as dirty or accessed, we might
-		 * as well put that entry they've given us in now.  This shaves
-		 * 10% off a copy-on-write micro-benchmark. */
-		if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
-			check_gpte(cpu, gpte);
-			*spte = gpte_to_spte(cpu, gpte,
-					     pte_flags(gpte) & _PAGE_DIRTY);
-		} else
-			/* Otherwise kill it and we can demand_page() it in
-			 * later. */
-			*spte = __pte(0);
+			/* If they're setting this entry as dirty or accessed,
+			 * we might as well put that entry they've given us
+			 * in now.  This shaves 10% off a
+			 * copy-on-write micro-benchmark. */
+			if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
+				check_gpte(cpu, gpte);
+				native_set_pte(spte,
+						gpte_to_spte(cpu, gpte,
+						pte_flags(gpte) & _PAGE_DIRTY));
+			} else
+				/* Otherwise kill it and we can demand_page()
+				 * it in later. */
+				native_set_pte(spte, __pte(0));
+#ifdef CONFIG_X86_PAE
+		}
+#endif
 	}
 }
 
@@ -568,12 +782,10 @@
  *
  * So with that in mind here's our code to to update a (top-level) PGD entry:
  */
-void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx)
+void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx)
 {
 	int pgdir;
 
-	/* The kernel seems to try to initialize this early on: we ignore its
-	 * attempts to map over the Switcher. */
 	if (idx >= SWITCHER_PGD_INDEX)
 		return;
 
@@ -581,8 +793,14 @@
 	pgdir = find_pgdir(lg, gpgdir);
 	if (pgdir < ARRAY_SIZE(lg->pgdirs))
 		/* ... throw it away. */
-		release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx);
+		release_pgd(lg->pgdirs[pgdir].pgdir + idx);
 }
+#ifdef CONFIG_X86_PAE
+void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx)
+{
+	guest_pagetable_clear_all(&lg->cpus[0]);
+}
+#endif
 
 /* Once we know how much memory we have we can construct simple identity
  * (which set virtual == physical) and linear mappings
@@ -596,8 +814,16 @@
 {
 	pgd_t __user *pgdir;
 	pte_t __user *linear;
-	unsigned int mapped_pages, i, linear_pages, phys_linear;
 	unsigned long mem_base = (unsigned long)lg->mem_base;
+	unsigned int mapped_pages, i, linear_pages;
+#ifdef CONFIG_X86_PAE
+	pmd_t __user *pmds;
+	unsigned int j;
+	pgd_t pgd;
+	pmd_t pmd;
+#else
+	unsigned int phys_linear;
+#endif
 
 	/* We have mapped_pages frames to map, so we need
 	 * linear_pages page tables to map them. */
@@ -610,6 +836,9 @@
 	/* Now we use the next linear_pages pages as pte pages */
 	linear = (void *)pgdir - linear_pages * PAGE_SIZE;
 
+#ifdef CONFIG_X86_PAE
+	pmds = (void *)linear - PAGE_SIZE;
+#endif
 	/* Linear mapping is easy: put every page's address into the
 	 * mapping in order. */
 	for (i = 0; i < mapped_pages; i++) {
@@ -621,6 +850,22 @@
 
 	/* The top level points to the linear page table pages above.
 	 * We setup the identity and linear mappings here. */
+#ifdef CONFIG_X86_PAE
+	for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD;
+	     i += PTRS_PER_PTE, j++) {
+		native_set_pmd(&pmd, __pmd(((unsigned long)(linear + i)
+		- mem_base) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
+
+		if (copy_to_user(&pmds[j], &pmd, sizeof(pmd)) != 0)
+			return -EFAULT;
+	}
+
+	set_pgd(&pgd, __pgd(((u32)pmds - mem_base) | _PAGE_PRESENT));
+	if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0)
+		return -EFAULT;
+	if (copy_to_user(&pgdir[3], &pgd, sizeof(pgd)) != 0)
+		return -EFAULT;
+#else
 	phys_linear = (unsigned long)linear - mem_base;
 	for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) {
 		pgd_t pgd;
@@ -633,6 +878,7 @@
 				    &pgd, sizeof(pgd)))
 			return -EFAULT;
 	}
+#endif
 
 	/* We return the top level (guest-physical) address: remember where
 	 * this is. */
@@ -648,7 +894,10 @@
 	u64 mem;
 	u32 initrd_size;
 	struct boot_params __user *boot = (struct boot_params *)lg->mem_base;
-
+#ifdef CONFIG_X86_PAE
+	pgd_t *pgd;
+	pmd_t *pmd_table;
+#endif
 	/* Get the Guest memory size and the ramdisk size from the boot header
 	 * located at lg->mem_base (Guest address 0). */
 	if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem))
@@ -663,6 +912,15 @@
 	lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
 	if (!lg->pgdirs[0].pgdir)
 		return -ENOMEM;
+#ifdef CONFIG_X86_PAE
+	pgd = lg->pgdirs[0].pgdir;
+	pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL);
+	if (!pmd_table)
+		return -ENOMEM;
+
+	set_pgd(pgd + SWITCHER_PGD_INDEX,
+		__pgd(__pa(pmd_table) | _PAGE_PRESENT));
+#endif
 	lg->cpus[0].cpu_pgd = 0;
 	return 0;
 }
@@ -672,17 +930,24 @@
 {
 	/* We get the kernel address: above this is all kernel memory. */
 	if (get_user(cpu->lg->kernel_address,
-		     &cpu->lg->lguest_data->kernel_address)
-	    /* We tell the Guest that it can't use the top 4MB of virtual
-	     * addresses used by the Switcher. */
-	    || put_user(4U*1024*1024, &cpu->lg->lguest_data->reserve_mem)
-	    || put_user(cpu->lg->pgdirs[0].gpgdir, &cpu->lg->lguest_data->pgdir))
+		&cpu->lg->lguest_data->kernel_address)
+		/* We tell the Guest that it can't use the top 2 or 4 MB
+		 * of virtual addresses used by the Switcher. */
+		|| put_user(RESERVE_MEM * 1024 * 1024,
+			&cpu->lg->lguest_data->reserve_mem)
+		|| put_user(cpu->lg->pgdirs[0].gpgdir,
+			&cpu->lg->lguest_data->pgdir))
 		kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
 
 	/* In flush_user_mappings() we loop from 0 to
 	 * "pgd_index(lg->kernel_address)".  This assumes it won't hit the
 	 * Switcher mappings, so check that now. */
+#ifdef CONFIG_X86_PAE
+	if (pgd_index(cpu->lg->kernel_address) == SWITCHER_PGD_INDEX &&
+		pmd_index(cpu->lg->kernel_address) == SWITCHER_PMD_INDEX)
+#else
 	if (pgd_index(cpu->lg->kernel_address) >= SWITCHER_PGD_INDEX)
+#endif
 		kill_guest(cpu, "bad kernel address %#lx",
 				 cpu->lg->kernel_address);
 }
@@ -708,16 +973,30 @@
 void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
 {
 	pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
-	pgd_t switcher_pgd;
 	pte_t regs_pte;
 	unsigned long pfn;
 
+#ifdef CONFIG_X86_PAE
+	pmd_t switcher_pmd;
+	pmd_t *pmd_table;
+
+	native_set_pmd(&switcher_pmd, pfn_pmd(__pa(switcher_pte_page) >>
+		       PAGE_SHIFT, PAGE_KERNEL_EXEC));
+
+	pmd_table = __va(pgd_pfn(cpu->lg->
+			pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX])
+								<< PAGE_SHIFT);
+	native_set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd);
+#else
+	pgd_t switcher_pgd;
+
 	/* Make the last PGD entry for this Guest point to the Switcher's PTE
 	 * page for this CPU (with appropriate flags). */
-	switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL);
+	switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC);
 
 	cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
 
+#endif
 	/* We also change the Switcher PTE page.  When we're running the Guest,
 	 * we want the Guest's "regs" page to appear where the first Switcher
 	 * page for this CPU is.  This is an optimization: when the Switcher
@@ -726,8 +1005,9 @@
 	 * page is already mapped there, we don't have to copy them out
 	 * again. */
 	pfn = __pa(cpu->regs_page) >> PAGE_SHIFT;
-	regs_pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL));
-	switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte;
+	native_set_pte(&regs_pte, pfn_pte(pfn, PAGE_KERNEL));
+	native_set_pte(&switcher_pte_page[pte_index((unsigned long)pages)],
+			regs_pte);
 }
 /*:*/
 
@@ -752,21 +1032,21 @@
 
 	/* The first entries are easy: they map the Switcher code. */
 	for (i = 0; i < pages; i++) {
-		pte[i] = mk_pte(switcher_page[i],
-				__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED));
+		native_set_pte(&pte[i], mk_pte(switcher_page[i],
+				__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
 	}
 
 	/* The only other thing we map is this CPU's pair of pages. */
 	i = pages + cpu*2;
 
 	/* First page (Guest registers) is writable from the Guest */
-	pte[i] = pfn_pte(page_to_pfn(switcher_page[i]),
-			 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW));
+	native_set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]),
+			 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW)));
 
 	/* The second page contains the "struct lguest_ro_state", and is
 	 * read-only. */
-	pte[i+1] = pfn_pte(page_to_pfn(switcher_page[i+1]),
-			   __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED));
+	native_set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]),
+			   __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
 }
 
 /* We've made it through the page table code.  Perhaps our tired brains are
diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c
index 7ede64f..482ed5a 100644
--- a/drivers/lguest/segments.c
+++ b/drivers/lguest/segments.c
@@ -150,7 +150,7 @@
 {
 	/* We assume the Guest has the same number of GDT entries as the
 	 * Host, otherwise we'd have to dynamically allocate the Guest GDT. */
-	if (num > ARRAY_SIZE(cpu->arch.gdt))
+	if (num >= ARRAY_SIZE(cpu->arch.gdt))
 		kill_guest(cpu, "too many gdt entries %i", num);
 
 	/* Set it up, then fix it. */
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 56df1ce..3319c2f 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -232,7 +232,7 @@
 		target = rdev->sb_start + offset + index * (PAGE_SIZE/512);
 
 		if (sync_page_io(rdev->bdev, target,
-				 roundup(size, bdev_hardsect_size(rdev->bdev)),
+				 roundup(size, bdev_logical_block_size(rdev->bdev)),
 				 page, READ)) {
 			page->index = index;
 			attach_page_buffers(page, NULL); /* so that free_buffer will
@@ -287,7 +287,7 @@
 			int size = PAGE_SIZE;
 			if (page->index == bitmap->file_pages-1)
 				size = roundup(bitmap->last_page_size,
-					       bdev_hardsect_size(rdev->bdev));
+					       bdev_logical_block_size(rdev->bdev));
 			/* Just make sure we aren't corrupting data or
 			 * metadata
 			 */
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index a2e26c2..75d8081 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -178,7 +178,7 @@
 	}
 
 	/* Validate the chunk size against the device block size */
-	if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) {
+	if (chunk_size_ulong % (bdev_logical_block_size(store->cow->bdev) >> 9)) {
 		*error = "Chunk size is not a multiple of device blocksize";
 		return -EINVAL;
 	}
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index be233bc..6fa8ccf 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -413,7 +413,8 @@
 		 * Buffer holds both header and bitset.
 		 */
 		buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) +
-				       bitset_size, ti->limits.hardsect_size);
+				       bitset_size,
+				       ti->limits.logical_block_size);
 
 		if (buf_size > dev->bdev->bd_inode->i_size) {
 			DMWARN("log device %s too small: need %llu bytes",
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index e75c6dd..2662a41 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -282,7 +282,7 @@
 	 */
 	if (!ps->store->chunk_size) {
 		ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
-		    bdev_hardsect_size(ps->store->cow->bdev) >> 9);
+		    bdev_logical_block_size(ps->store->cow->bdev) >> 9);
 		ps->store->chunk_mask = ps->store->chunk_size - 1;
 		ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
 		chunk_size_supplied = 0;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 429b50b..e9a73bb 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -108,7 +108,8 @@
 	lhs->max_hw_segments =
 		min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments);
 
-	lhs->hardsect_size = max(lhs->hardsect_size, rhs->hardsect_size);
+	lhs->logical_block_size = max(lhs->logical_block_size,
+				      rhs->logical_block_size);
 
 	lhs->max_segment_size =
 		min_not_zero(lhs->max_segment_size, rhs->max_segment_size);
@@ -509,7 +510,7 @@
 	 *        combine_restrictions_low()
 	 */
 	rs->max_sectors =
-		min_not_zero(rs->max_sectors, q->max_sectors);
+		min_not_zero(rs->max_sectors, queue_max_sectors(q));
 
 	/*
 	 * Check if merge fn is supported.
@@ -524,24 +525,25 @@
 
 	rs->max_phys_segments =
 		min_not_zero(rs->max_phys_segments,
-			     q->max_phys_segments);
+			     queue_max_phys_segments(q));
 
 	rs->max_hw_segments =
-		min_not_zero(rs->max_hw_segments, q->max_hw_segments);
+		min_not_zero(rs->max_hw_segments, queue_max_hw_segments(q));
 
-	rs->hardsect_size = max(rs->hardsect_size, q->hardsect_size);
+	rs->logical_block_size = max(rs->logical_block_size,
+				     queue_logical_block_size(q));
 
 	rs->max_segment_size =
-		min_not_zero(rs->max_segment_size, q->max_segment_size);
+		min_not_zero(rs->max_segment_size, queue_max_segment_size(q));
 
 	rs->max_hw_sectors =
-		min_not_zero(rs->max_hw_sectors, q->max_hw_sectors);
+		min_not_zero(rs->max_hw_sectors, queue_max_hw_sectors(q));
 
 	rs->seg_boundary_mask =
 		min_not_zero(rs->seg_boundary_mask,
-			     q->seg_boundary_mask);
+			     queue_segment_boundary(q));
 
-	rs->bounce_pfn = min_not_zero(rs->bounce_pfn, q->bounce_pfn);
+	rs->bounce_pfn = min_not_zero(rs->bounce_pfn, queue_bounce_pfn(q));
 
 	rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
 }
@@ -683,8 +685,8 @@
 		rs->max_phys_segments = MAX_PHYS_SEGMENTS;
 	if (!rs->max_hw_segments)
 		rs->max_hw_segments = MAX_HW_SEGMENTS;
-	if (!rs->hardsect_size)
-		rs->hardsect_size = 1 << SECTOR_SHIFT;
+	if (!rs->logical_block_size)
+		rs->logical_block_size = 1 << SECTOR_SHIFT;
 	if (!rs->max_segment_size)
 		rs->max_segment_size = MAX_SEGMENT_SIZE;
 	if (!rs->seg_boundary_mask)
@@ -912,13 +914,13 @@
 	 * restrictions.
 	 */
 	blk_queue_max_sectors(q, t->limits.max_sectors);
-	q->max_phys_segments = t->limits.max_phys_segments;
-	q->max_hw_segments = t->limits.max_hw_segments;
-	q->hardsect_size = t->limits.hardsect_size;
-	q->max_segment_size = t->limits.max_segment_size;
-	q->max_hw_sectors = t->limits.max_hw_sectors;
-	q->seg_boundary_mask = t->limits.seg_boundary_mask;
-	q->bounce_pfn = t->limits.bounce_pfn;
+	blk_queue_max_phys_segments(q, t->limits.max_phys_segments);
+	blk_queue_max_hw_segments(q, t->limits.max_hw_segments);
+	blk_queue_logical_block_size(q, t->limits.logical_block_size);
+	blk_queue_max_segment_size(q, t->limits.max_segment_size);
+	blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors);
+	blk_queue_segment_boundary(q, t->limits.seg_boundary_mask);
+	blk_queue_bounce_limit(q, t->limits.bounce_pfn);
 
 	if (t->limits.no_cluster)
 		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 424f7b0..3fd8b1e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -20,7 +20,8 @@
 #include <linux/idr.h>
 #include <linux/hdreg.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
+
+#include <trace/events/block.h>
 
 #define DM_MSG_PREFIX "core"
 
@@ -53,8 +54,6 @@
 	union map_info info;
 };
 
-DEFINE_TRACE(block_bio_complete);
-
 /*
  * For request-based dm.
  * One of these is allocated per request.
@@ -656,8 +655,7 @@
 		/* the bio has been remapped so dispatch it */
 
 		trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
-				    tio->io->bio->bi_bdev->bd_dev,
-				    clone->bi_sector, sector);
+				    tio->io->bio->bi_bdev->bd_dev, sector);
 
 		generic_make_request(clone);
 	} else if (r < 0 || r == DM_MAPIO_REQUEUE) {
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 7a36e38..64f1f3e0 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -146,7 +146,7 @@
 		 * a one page request is never in violation.
 		 */
 		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
 			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		disk->num_sectors = rdev->sectors;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 641b211..20f6ac3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1202,7 +1202,7 @@
 	atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
 
 	rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
-	bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
+	bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
 	if (rdev->sb_size & bmask)
 		rdev->sb_size = (rdev->sb_size | bmask) + 1;
 
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 41ced0c..4ee31aa 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -303,7 +303,7 @@
 		 * merge_bvec_fn will be involved in multipath.)
 		 */
 			if (q->merge_bvec_fn &&
-			    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+			    queue_max_sectors(q) > (PAGE_SIZE>>9))
 				blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 			conf->working_disks++;
@@ -467,7 +467,7 @@
 		 * violating it, not that we ever expect a device with
 		 * a merge_bvec_fn to be involved in multipath */
 		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
 			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		if (!test_bit(Faulty, &rdev->flags))
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index c08d755..925507e 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -144,7 +144,7 @@
 		 */
 
 		if (rdev1->bdev->bd_disk->queue->merge_bvec_fn &&
-		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
 			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		if (!smallest || (rdev1->sectors < smallest->sectors))
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 36df910..e23758b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1130,7 +1130,7 @@
 			 * a one page request is never in violation.
 			 */
 			if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-			    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+			    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
 				blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 			p->head_position = 0;
@@ -1996,7 +1996,7 @@
 		 * a one page request is never in violation.
 		 */
 		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
 			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		disk->head_position = 0;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 499620a..750550c 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1158,8 +1158,8 @@
 			 * a one page request is never in violation.
 			 */
 			if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-			    mddev->queue->max_sectors > (PAGE_SIZE>>9))
-				mddev->queue->max_sectors = (PAGE_SIZE>>9);
+			    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
+				blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 			p->head_position = 0;
 			rdev->raid_disk = mirror;
@@ -2145,8 +2145,8 @@
 		 * a one page request is never in violation.
 		 */
 		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
-			mddev->queue->max_sectors = (PAGE_SIZE>>9);
+		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
+			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		disk->head_position = 0;
 	}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 5d400ae..bef8766 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -362,7 +362,7 @@
 
 static struct stripe_head *
 get_active_stripe(raid5_conf_t *conf, sector_t sector,
-		  int previous, int noblock)
+		  int previous, int noblock, int noquiesce)
 {
 	struct stripe_head *sh;
 
@@ -372,7 +372,7 @@
 
 	do {
 		wait_event_lock_irq(conf->wait_for_stripe,
-				    conf->quiesce == 0,
+				    conf->quiesce == 0 || noquiesce,
 				    conf->device_lock, /* nothing */);
 		sh = __find_stripe(conf, sector, conf->generation - previous);
 		if (!sh) {
@@ -2671,7 +2671,7 @@
 			sector_t bn = compute_blocknr(sh, i, 1);
 			sector_t s = raid5_compute_sector(conf, bn, 0,
 							  &dd_idx, NULL);
-			sh2 = get_active_stripe(conf, s, 0, 1);
+			sh2 = get_active_stripe(conf, s, 0, 1, 1);
 			if (sh2 == NULL)
 				/* so far only the early blocks of this stripe
 				 * have been requested.  When later blocks
@@ -2944,7 +2944,7 @@
 	/* Finish reconstruct operations initiated by the expansion process */
 	if (sh->reconstruct_state == reconstruct_state_result) {
 		struct stripe_head *sh2
-			= get_active_stripe(conf, sh->sector, 1, 1);
+			= get_active_stripe(conf, sh->sector, 1, 1, 1);
 		if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
 			/* sh cannot be written until sh2 has been read.
 			 * so arrange for sh to be delayed a little
@@ -3189,7 +3189,7 @@
 
 	if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
 		struct stripe_head *sh2
-			= get_active_stripe(conf, sh->sector, 1, 1);
+			= get_active_stripe(conf, sh->sector, 1, 1, 1);
 		if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
 			/* sh cannot be written until sh2 has been read.
 			 * so arrange for sh to be delayed a little
@@ -3288,7 +3288,7 @@
 	int i;
 
 	rcu_read_lock();
-	for (i=0; i<mddev->raid_disks; i++) {
+	for (i = 0; i < conf->raid_disks; i++) {
 		mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
 		if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
 			struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
@@ -3463,10 +3463,10 @@
 {
 	struct request_queue *q = bdev_get_queue(bi->bi_bdev);
 
-	if ((bi->bi_size>>9) > q->max_sectors)
+	if ((bi->bi_size>>9) > queue_max_sectors(q))
 		return 0;
 	blk_recount_segments(q, bi);
-	if (bi->bi_phys_segments > q->max_phys_segments)
+	if (bi->bi_phys_segments > queue_max_phys_segments(q))
 		return 0;
 
 	if (q->merge_bvec_fn)
@@ -3675,7 +3675,7 @@
 			(unsigned long long)logical_sector);
 
 		sh = get_active_stripe(conf, new_sector, previous,
-				       (bi->bi_rw&RWA_MASK));
+				       (bi->bi_rw&RWA_MASK), 0);
 		if (sh) {
 			if (unlikely(previous)) {
 				/* expansion might have moved on while waiting for a
@@ -3873,7 +3873,7 @@
 	for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
 		int j;
 		int skipped = 0;
-		sh = get_active_stripe(conf, stripe_addr+i, 0, 0);
+		sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
 		set_bit(STRIPE_EXPANDING, &sh->state);
 		atomic_inc(&conf->reshape_stripes);
 		/* If any of this stripe is beyond the end of the old
@@ -3916,13 +3916,13 @@
 		raid5_compute_sector(conf, stripe_addr*(new_data_disks),
 				     1, &dd_idx, NULL);
 	last_sector =
-		raid5_compute_sector(conf, ((stripe_addr+conf->chunk_size/512)
+		raid5_compute_sector(conf, ((stripe_addr+reshape_sectors)
 					    *(new_data_disks) - 1),
 				     1, &dd_idx, NULL);
 	if (last_sector >= mddev->dev_sectors)
 		last_sector = mddev->dev_sectors - 1;
 	while (first_sector <= last_sector) {
-		sh = get_active_stripe(conf, first_sector, 1, 0);
+		sh = get_active_stripe(conf, first_sector, 1, 0, 1);
 		set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
 		set_bit(STRIPE_HANDLE, &sh->state);
 		release_stripe(sh);
@@ -4022,9 +4022,9 @@
 
 	bitmap_cond_end_sync(mddev->bitmap, sector_nr);
 
-	sh = get_active_stripe(conf, sector_nr, 0, 1);
+	sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
 	if (sh == NULL) {
-		sh = get_active_stripe(conf, sector_nr, 0, 0);
+		sh = get_active_stripe(conf, sector_nr, 0, 0, 0);
 		/* make sure we don't swamp the stripe cache if someone else
 		 * is trying to get access
 		 */
@@ -4034,7 +4034,7 @@
 	 * We don't need to check the 'failed' flag as when that gets set,
 	 * recovery aborts.
 	 */
-	for (i=0; i<mddev->raid_disks; i++)
+	for (i = 0; i < conf->raid_disks; i++)
 		if (conf->disks[i].rdev == NULL)
 			still_degraded = 1;
 
@@ -4086,7 +4086,7 @@
 			/* already done this stripe */
 			continue;
 
-		sh = get_active_stripe(conf, sector, 0, 1);
+		sh = get_active_stripe(conf, sector, 0, 1, 0);
 
 		if (!sh) {
 			/* failed to get a stripe - must wait */
diff --git a/drivers/media/video/ivtv/ivtv-queue.c b/drivers/media/video/ivtv/ivtv-queue.c
index ff7b7de..7fde36e 100644
--- a/drivers/media/video/ivtv/ivtv-queue.c
+++ b/drivers/media/video/ivtv/ivtv-queue.c
@@ -230,7 +230,8 @@
 		return -ENOMEM;
 	}
 	if (ivtv_might_use_dma(s)) {
-		s->sg_handle = pci_map_single(itv->pdev, s->sg_dma, sizeof(struct ivtv_sg_element), s->dma);
+		s->sg_handle = pci_map_single(itv->pdev, s->sg_dma,
+				sizeof(struct ivtv_sg_element), PCI_DMA_TODEVICE);
 		ivtv_stream_sync_for_cpu(s);
 	}
 
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index de143de..7847bbc 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -672,15 +672,14 @@
 					       msb->req_sg);
 
 		if (!msb->seg_count) {
-			chunk = __blk_end_request(msb->block_req, -ENOMEM,
-					blk_rq_cur_bytes(msb->block_req));
+			chunk = __blk_end_request_cur(msb->block_req, -ENOMEM);
 			continue;
 		}
 
-		t_sec = msb->block_req->sector << 9;
+		t_sec = blk_rq_pos(msb->block_req) << 9;
 		sector_div(t_sec, msb->page_size);
 
-		count = msb->block_req->nr_sectors << 9;
+		count = blk_rq_bytes(msb->block_req);
 		count /= msb->page_size;
 
 		param.system = msb->system;
@@ -705,8 +704,8 @@
 		return 0;
 	}
 
-	dev_dbg(&card->dev, "elv_next\n");
-	msb->block_req = elv_next_request(msb->queue);
+	dev_dbg(&card->dev, "blk_fetch\n");
+	msb->block_req = blk_fetch_request(msb->queue);
 	if (!msb->block_req) {
 		dev_dbg(&card->dev, "issue end\n");
 		return -EAGAIN;
@@ -745,7 +744,7 @@
 					t_len *= msb->page_size;
 			}
 		} else
-			t_len = msb->block_req->nr_sectors << 9;
+			t_len = blk_rq_bytes(msb->block_req);
 
 		dev_dbg(&card->dev, "transferred %x (%d)\n", t_len, error);
 
@@ -825,8 +824,8 @@
 		return;
 
 	if (msb->eject) {
-		while ((req = elv_next_request(q)) != NULL)
-			__blk_end_request(req, -ENODEV, blk_rq_bytes(req));
+		while ((req = blk_fetch_request(q)) != NULL)
+			__blk_end_request_all(req, -ENODEV);
 
 		return;
 	}
@@ -1243,7 +1242,7 @@
 
 	sprintf(msb->disk->disk_name, "mspblk%d", disk_id);
 
-	blk_queue_hardsect_size(msb->queue, msb->page_size);
+	blk_queue_logical_block_size(msb->queue, msb->page_size);
 
 	capacity = be16_to_cpu(sys_info->user_block_count);
 	capacity *= be16_to_cpu(sys_info->block_size);
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 5d496a9..44b9315 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -146,7 +146,6 @@
 static MPT_RESETHANDLER		 MptResetHandlers[MPT_MAX_PROTOCOL_DRIVERS];
 static struct mpt_pci_driver 	*MptDeviceDriverHandlers[MPT_MAX_PROTOCOL_DRIVERS];
 
-static DECLARE_WAIT_QUEUE_HEAD(mpt_waitq);
 
 /*
  *  Driver Callback Index's
@@ -159,7 +158,8 @@
  *  Forward protos...
  */
 static irqreturn_t mpt_interrupt(int irq, void *bus_id);
-static int	mpt_base_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req, MPT_FRAME_HDR *reply);
+static int	mptbase_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req,
+		MPT_FRAME_HDR *reply);
 static int	mpt_handshake_req_reply_wait(MPT_ADAPTER *ioc, int reqBytes,
 			u32 *req, int replyBytes, u16 *u16reply, int maxwait,
 			int sleepFlag);
@@ -190,9 +190,9 @@
 static int	mpt_readScsiDevicePageHeaders(MPT_ADAPTER *ioc, int portnum);
 static void 	mpt_read_ioc_pg_1(MPT_ADAPTER *ioc);
 static void 	mpt_read_ioc_pg_4(MPT_ADAPTER *ioc);
-static void	mpt_timer_expired(unsigned long data);
 static void	mpt_get_manufacturing_pg_0(MPT_ADAPTER *ioc);
-static int	SendEventNotification(MPT_ADAPTER *ioc, u8 EvSwitch);
+static int	SendEventNotification(MPT_ADAPTER *ioc, u8 EvSwitch,
+	int sleepFlag);
 static int	SendEventAck(MPT_ADAPTER *ioc, EventNotificationReply_t *evnp);
 static int	mpt_host_page_access_control(MPT_ADAPTER *ioc, u8 access_control_value, int sleepFlag);
 static int	mpt_host_page_alloc(MPT_ADAPTER *ioc, pIOCInit_t ioc_init);
@@ -207,8 +207,8 @@
 #endif
 static void	mpt_get_fw_exp_ver(char *buf, MPT_ADAPTER *ioc);
 
-//int		mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag);
-static int	ProcessEventNotification(MPT_ADAPTER *ioc, EventNotificationReply_t *evReply, int *evHandlers);
+static int	ProcessEventNotification(MPT_ADAPTER *ioc,
+		EventNotificationReply_t *evReply, int *evHandlers);
 static void	mpt_iocstatus_info(MPT_ADAPTER *ioc, u32 ioc_status, MPT_FRAME_HDR *mf);
 static void	mpt_fc_log_info(MPT_ADAPTER *ioc, u32 log_info);
 static void	mpt_spi_log_info(MPT_ADAPTER *ioc, u32 log_info);
@@ -277,6 +277,56 @@
 }
 
 /**
+ * mpt_is_discovery_complete - determine if discovery has completed
+ * @ioc: per adatper instance
+ *
+ * Returns 1 when discovery completed, else zero.
+ */
+static int
+mpt_is_discovery_complete(MPT_ADAPTER *ioc)
+{
+	ConfigExtendedPageHeader_t hdr;
+	CONFIGPARMS cfg;
+	SasIOUnitPage0_t *buffer;
+	dma_addr_t dma_handle;
+	int rc = 0;
+
+	memset(&hdr, 0, sizeof(ConfigExtendedPageHeader_t));
+	memset(&cfg, 0, sizeof(CONFIGPARMS));
+	hdr.PageVersion = MPI_SASIOUNITPAGE0_PAGEVERSION;
+	hdr.PageType = MPI_CONFIG_PAGETYPE_EXTENDED;
+	hdr.ExtPageType = MPI_CONFIG_EXTPAGETYPE_SAS_IO_UNIT;
+	cfg.cfghdr.ehdr = &hdr;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER;
+
+	if ((mpt_config(ioc, &cfg)))
+		goto out;
+	if (!hdr.ExtPageLength)
+		goto out;
+
+	buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
+	    &dma_handle);
+	if (!buffer)
+		goto out;
+
+	cfg.physAddr = dma_handle;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
+
+	if ((mpt_config(ioc, &cfg)))
+		goto out_free_consistent;
+
+	if (!(buffer->PhyData[0].PortFlags &
+	    MPI_SAS_IOUNIT0_PORT_FLAGS_DISCOVERY_IN_PROGRESS))
+		rc = 1;
+
+ out_free_consistent:
+	pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
+	    buffer, dma_handle);
+ out:
+	return rc;
+}
+
+/**
  *	mpt_fault_reset_work - work performed on workq after ioc fault
  *	@work: input argument, used to derive ioc
  *
@@ -290,7 +340,7 @@
 	int		 rc;
 	unsigned long	 flags;
 
-	if (ioc->diagPending || !ioc->active)
+	if (ioc->ioc_reset_in_progress || !ioc->active)
 		goto out;
 
 	ioc_raw_state = mpt_GetIocState(ioc, 0);
@@ -307,6 +357,12 @@
 			printk(MYIOC_s_WARN_FMT "IOC is in FAULT state after "
 			    "reset (%04xh)\n", ioc->name, ioc_raw_state &
 			    MPI_DOORBELL_DATA_MASK);
+	} else if (ioc->bus_type == SAS && ioc->sas_discovery_quiesce_io) {
+		if ((mpt_is_discovery_complete(ioc))) {
+			devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "clearing "
+			    "discovery_quiesce_io flag\n", ioc->name));
+			ioc->sas_discovery_quiesce_io = 0;
+		}
 	}
 
  out:
@@ -317,11 +373,11 @@
 		ioc = ioc->alt_ioc;
 
 	/* rearm the timer */
-	spin_lock_irqsave(&ioc->fault_reset_work_lock, flags);
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
 	if (ioc->reset_work_q)
 		queue_delayed_work(ioc->reset_work_q, &ioc->fault_reset_work,
 			msecs_to_jiffies(MPT_POLLING_INTERVAL));
-	spin_unlock_irqrestore(&ioc->fault_reset_work_lock, flags);
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 }
 
 
@@ -501,9 +557,9 @@
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
- *	mpt_base_reply - MPT base driver's callback routine
+ *	mptbase_reply - MPT base driver's callback routine
  *	@ioc: Pointer to MPT_ADAPTER structure
- *	@mf: Pointer to original MPT request frame
+ *	@req: Pointer to original MPT request frame
  *	@reply: Pointer to MPT reply frame (NULL if TurboReply)
  *
  *	MPT base driver's callback routine; all base driver
@@ -514,122 +570,49 @@
  *	should be freed, or 0 if it shouldn't.
  */
 static int
-mpt_base_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *reply)
+mptbase_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req, MPT_FRAME_HDR *reply)
 {
+	EventNotificationReply_t *pEventReply;
+	u8 event;
+	int evHandlers;
 	int freereq = 1;
-	u8 func;
 
-	dmfprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mpt_base_reply() called\n", ioc->name));
-#ifdef CONFIG_FUSION_LOGGING
-	if ((ioc->debug_level & MPT_DEBUG_MSG_FRAME) &&
-			!(reply->u.hdr.MsgFlags & MPI_MSGFLAGS_CONTINUATION_REPLY)) {
-		dmfprintk(ioc, printk(MYIOC_s_INFO_FMT ": Original request frame (@%p) header\n",
-		    ioc->name, mf));
-		DBG_DUMP_REQUEST_FRAME_HDR(ioc, (u32 *)mf);
-	}
-#endif
-
-	func = reply->u.hdr.Function;
-	dmfprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mpt_base_reply, Function=%02Xh\n",
-			ioc->name, func));
-
-	if (func == MPI_FUNCTION_EVENT_NOTIFICATION) {
-		EventNotificationReply_t *pEvReply = (EventNotificationReply_t *) reply;
-		int evHandlers = 0;
-		int results;
-
-		results = ProcessEventNotification(ioc, pEvReply, &evHandlers);
-		if (results != evHandlers) {
-			/* CHECKME! Any special handling needed here? */
-			devtverboseprintk(ioc, printk(MYIOC_s_WARN_FMT "Called %d event handlers, sum results = %d\n",
-					ioc->name, evHandlers, results));
-		}
-
-		/*
-		 *	Hmmm...  It seems that EventNotificationReply is an exception
-		 *	to the rule of one reply per request.
-		 */
-		if (pEvReply->MsgFlags & MPI_MSGFLAGS_CONTINUATION_REPLY) {
+	switch (reply->u.hdr.Function) {
+	case MPI_FUNCTION_EVENT_NOTIFICATION:
+		pEventReply = (EventNotificationReply_t *)reply;
+		evHandlers = 0;
+		ProcessEventNotification(ioc, pEventReply, &evHandlers);
+		event = le32_to_cpu(pEventReply->Event) & 0xFF;
+		if (pEventReply->MsgFlags & MPI_MSGFLAGS_CONTINUATION_REPLY)
 			freereq = 0;
-		} else {
-			devtverboseprintk(ioc, printk(MYIOC_s_WARN_FMT "EVENT_NOTIFICATION reply %p returns Request frame\n",
-				ioc->name, pEvReply));
+		if (event != MPI_EVENT_EVENT_CHANGE)
+			break;
+	case MPI_FUNCTION_CONFIG:
+	case MPI_FUNCTION_SAS_IO_UNIT_CONTROL:
+		ioc->mptbase_cmds.status |= MPT_MGMT_STATUS_COMMAND_GOOD;
+		if (reply) {
+			ioc->mptbase_cmds.status |= MPT_MGMT_STATUS_RF_VALID;
+			memcpy(ioc->mptbase_cmds.reply, reply,
+			    min(MPT_DEFAULT_FRAME_SIZE,
+				4 * reply->u.reply.MsgLength));
 		}
-
-#ifdef CONFIG_PROC_FS
-//		LogEvent(ioc, pEvReply);
-#endif
-
-	} else if (func == MPI_FUNCTION_EVENT_ACK) {
-		dprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mpt_base_reply, EventAck reply received\n",
-				ioc->name));
-	} else if (func == MPI_FUNCTION_CONFIG) {
-		CONFIGPARMS *pCfg;
-		unsigned long flags;
-
-		dcprintk(ioc, printk(MYIOC_s_DEBUG_FMT "config_complete (mf=%p,mr=%p)\n",
-				ioc->name, mf, reply));
-
-		pCfg = * ((CONFIGPARMS **)((u8 *) mf + ioc->req_sz - sizeof(void *)));
-
-		if (pCfg) {
-			/* disable timer and remove from linked list */
-			del_timer(&pCfg->timer);
-
-			spin_lock_irqsave(&ioc->FreeQlock, flags);
-			list_del(&pCfg->linkage);
-			spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-
-			/*
-			 *	If IOC Status is SUCCESS, save the header
-			 *	and set the status code to GOOD.
-			 */
-			pCfg->status = MPT_CONFIG_ERROR;
-			if (reply) {
-				ConfigReply_t	*pReply = (ConfigReply_t *)reply;
-				u16		 status;
-
-				status = le16_to_cpu(pReply->IOCStatus) & MPI_IOCSTATUS_MASK;
-				dcprintk(ioc, printk(MYIOC_s_NOTE_FMT "  IOCStatus=%04xh, IOCLogInfo=%08xh\n",
-				     ioc->name, status, le32_to_cpu(pReply->IOCLogInfo)));
-
-				pCfg->status = status;
-				if (status == MPI_IOCSTATUS_SUCCESS) {
-					if ((pReply->Header.PageType &
-					    MPI_CONFIG_PAGETYPE_MASK) ==
-					    MPI_CONFIG_PAGETYPE_EXTENDED) {
-						pCfg->cfghdr.ehdr->ExtPageLength =
-						    le16_to_cpu(pReply->ExtPageLength);
-						pCfg->cfghdr.ehdr->ExtPageType =
-						    pReply->ExtPageType;
-					}
-					pCfg->cfghdr.hdr->PageVersion = pReply->Header.PageVersion;
-
-					/* If this is a regular header, save PageLength. */
-					/* LMP Do this better so not using a reserved field! */
-					pCfg->cfghdr.hdr->PageLength = pReply->Header.PageLength;
-					pCfg->cfghdr.hdr->PageNumber = pReply->Header.PageNumber;
-					pCfg->cfghdr.hdr->PageType = pReply->Header.PageType;
-				}
-			}
-
-			/*
-			 *	Wake up the original calling thread
-			 */
-			pCfg->wait_done = 1;
-			wake_up(&mpt_waitq);
-		}
-	} else if (func == MPI_FUNCTION_SAS_IO_UNIT_CONTROL) {
-		/* we should be always getting a reply frame */
-		memcpy(ioc->persist_reply_frame, reply,
-		    min(MPT_DEFAULT_FRAME_SIZE,
-		    4*reply->u.reply.MsgLength));
-		del_timer(&ioc->persist_timer);
-		ioc->persist_wait_done = 1;
-		wake_up(&mpt_waitq);
-	} else {
-		printk(MYIOC_s_ERR_FMT "Unexpected msg function (=%02Xh) reply received!\n",
-				ioc->name, func);
+		if (ioc->mptbase_cmds.status & MPT_MGMT_STATUS_PENDING) {
+			ioc->mptbase_cmds.status &= ~MPT_MGMT_STATUS_PENDING;
+			complete(&ioc->mptbase_cmds.done);
+		} else
+			freereq = 0;
+		if (ioc->mptbase_cmds.status & MPT_MGMT_STATUS_FREE_MF)
+			freereq = 1;
+		break;
+	case MPI_FUNCTION_EVENT_ACK:
+		devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "EventAck reply received\n", ioc->name));
+		break;
+	default:
+		printk(MYIOC_s_ERR_FMT
+		    "Unexpected msg function (=%02Xh) reply received!\n",
+		    ioc->name, reply->u.hdr.Function);
+		break;
 	}
 
 	/*
@@ -988,17 +971,21 @@
 
 	/*  Put Request back on FreeQ!  */
 	spin_lock_irqsave(&ioc->FreeQlock, flags);
-	mf->u.frame.linkage.arg1 = 0xdeadbeaf; /* signature to know if this mf is freed */
+	if (cpu_to_le32(mf->u.frame.linkage.arg1) == 0xdeadbeaf)
+		goto out;
+	/* signature to know if this mf is freed */
+	mf->u.frame.linkage.arg1 = cpu_to_le32(0xdeadbeaf);
 	list_add_tail(&mf->u.frame.linkage.list, &ioc->FreeQ);
 #ifdef MFCNT
 	ioc->mfcnt--;
 #endif
+ out:
 	spin_unlock_irqrestore(&ioc->FreeQlock, flags);
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
- *	mpt_add_sge - Place a simple SGE at address pAddr.
+ *	mpt_add_sge - Place a simple 32 bit SGE at address pAddr.
  *	@pAddr: virtual address for SGE
  *	@flagslength: SGE flags and data transfer length
  *	@dma_addr: Physical address
@@ -1006,23 +993,117 @@
  *	This routine places a MPT request frame back on the MPT adapter's
  *	FreeQ.
  */
-void
-mpt_add_sge(char *pAddr, u32 flagslength, dma_addr_t dma_addr)
+static void
+mpt_add_sge(void *pAddr, u32 flagslength, dma_addr_t dma_addr)
 {
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
-		SGESimple64_t *pSge = (SGESimple64_t *) pAddr;
+	SGESimple32_t *pSge = (SGESimple32_t *) pAddr;
+	pSge->FlagsLength = cpu_to_le32(flagslength);
+	pSge->Address = cpu_to_le32(dma_addr);
+}
+
+/**
+ *	mpt_add_sge_64bit - Place a simple 64 bit SGE at address pAddr.
+ *	@pAddr: virtual address for SGE
+ *	@flagslength: SGE flags and data transfer length
+ *	@dma_addr: Physical address
+ *
+ *	This routine places a MPT request frame back on the MPT adapter's
+ *	FreeQ.
+ **/
+static void
+mpt_add_sge_64bit(void *pAddr, u32 flagslength, dma_addr_t dma_addr)
+{
+	SGESimple64_t *pSge = (SGESimple64_t *) pAddr;
+	pSge->Address.Low = cpu_to_le32
+			(lower_32_bits((unsigned long)(dma_addr)));
+	pSge->Address.High = cpu_to_le32
+			(upper_32_bits((unsigned long)dma_addr));
+	pSge->FlagsLength = cpu_to_le32
+			((flagslength | MPT_SGE_FLAGS_64_BIT_ADDRESSING));
+}
+
+/**
+ *	mpt_add_sge_64bit_1078 - Place a simple 64 bit SGE at address pAddr
+ *	(1078 workaround).
+ *	@pAddr: virtual address for SGE
+ *	@flagslength: SGE flags and data transfer length
+ *	@dma_addr: Physical address
+ *
+ *	This routine places a MPT request frame back on the MPT adapter's
+ *	FreeQ.
+ **/
+static void
+mpt_add_sge_64bit_1078(void *pAddr, u32 flagslength, dma_addr_t dma_addr)
+{
+	SGESimple64_t *pSge = (SGESimple64_t *) pAddr;
+	u32 tmp;
+
+	pSge->Address.Low = cpu_to_le32
+			(lower_32_bits((unsigned long)(dma_addr)));
+	tmp = (u32)(upper_32_bits((unsigned long)dma_addr));
+
+	/*
+	 * 1078 errata workaround for the 36GB limitation
+	 */
+	if ((((u64)dma_addr + MPI_SGE_LENGTH(flagslength)) >> 32)  == 9) {
+		flagslength |=
+		    MPI_SGE_SET_FLAGS(MPI_SGE_FLAGS_LOCAL_ADDRESS);
+		tmp |= (1<<31);
+		if (mpt_debug_level & MPT_DEBUG_36GB_MEM)
+			printk(KERN_DEBUG "1078 P0M2 addressing for "
+			    "addr = 0x%llx len = %d\n",
+			    (unsigned long long)dma_addr,
+			    MPI_SGE_LENGTH(flagslength));
+	}
+
+	pSge->Address.High = cpu_to_le32(tmp);
+	pSge->FlagsLength = cpu_to_le32(
+		(flagslength | MPT_SGE_FLAGS_64_BIT_ADDRESSING));
+}
+
+/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+/**
+ *	mpt_add_chain - Place a 32 bit chain SGE at address pAddr.
+ *	@pAddr: virtual address for SGE
+ *	@next: nextChainOffset value (u32's)
+ *	@length: length of next SGL segment
+ *	@dma_addr: Physical address
+ *
+ */
+static void
+mpt_add_chain(void *pAddr, u8 next, u16 length, dma_addr_t dma_addr)
+{
+		SGEChain32_t *pChain = (SGEChain32_t *) pAddr;
+		pChain->Length = cpu_to_le16(length);
+		pChain->Flags = MPI_SGE_FLAGS_CHAIN_ELEMENT;
+		pChain->NextChainOffset = next;
+		pChain->Address = cpu_to_le32(dma_addr);
+}
+
+/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+/**
+ *	mpt_add_chain_64bit - Place a 64 bit chain SGE at address pAddr.
+ *	@pAddr: virtual address for SGE
+ *	@next: nextChainOffset value (u32's)
+ *	@length: length of next SGL segment
+ *	@dma_addr: Physical address
+ *
+ */
+static void
+mpt_add_chain_64bit(void *pAddr, u8 next, u16 length, dma_addr_t dma_addr)
+{
+		SGEChain64_t *pChain = (SGEChain64_t *) pAddr;
 		u32 tmp = dma_addr & 0xFFFFFFFF;
 
-		pSge->FlagsLength = cpu_to_le32(flagslength);
-		pSge->Address.Low = cpu_to_le32(tmp);
-		tmp = (u32) ((u64)dma_addr >> 32);
-		pSge->Address.High = cpu_to_le32(tmp);
+		pChain->Length = cpu_to_le16(length);
+		pChain->Flags = (MPI_SGE_FLAGS_CHAIN_ELEMENT |
+				 MPI_SGE_FLAGS_64_BIT_ADDRESSING);
 
-	} else {
-		SGESimple32_t *pSge = (SGESimple32_t *) pAddr;
-		pSge->FlagsLength = cpu_to_le32(flagslength);
-		pSge->Address = cpu_to_le32(dma_addr);
-	}
+		pChain->NextChainOffset = next;
+
+		pChain->Address.Low = cpu_to_le32(tmp);
+		tmp = (u32)(upper_32_bits((unsigned long)dma_addr));
+		pChain->Address.High = cpu_to_le32(tmp);
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -1225,7 +1306,7 @@
 	}
 	flags_length = flags_length << MPI_SGE_FLAGS_SHIFT;
 	flags_length |= ioc->HostPageBuffer_sz;
-	mpt_add_sge(psge, flags_length, ioc->HostPageBuffer_dma);
+	ioc->add_sge(psge, flags_length, ioc->HostPageBuffer_dma);
 	ioc->facts.HostPageBufferSGE = ioc_init->HostPageBufferSGE;
 
 return 0;
@@ -1534,21 +1615,42 @@
 
 	pci_read_config_byte(pdev, PCI_CLASS_REVISION, &revision);
 
-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))
-	    && !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
-		dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
-		    ": 64 BIT PCI BUS DMA ADDRESSING SUPPORTED\n",
-		    ioc->name));
-	} else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
-	    && !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
-		dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
-		    ": 32 BIT PCI BUS DMA ADDRESSING SUPPORTED\n",
-		    ioc->name));
+	if (sizeof(dma_addr_t) > 4) {
+		const uint64_t required_mask = dma_get_required_mask
+		    (&pdev->dev);
+		if (required_mask > DMA_BIT_MASK(32)
+			&& !pci_set_dma_mask(pdev, DMA_BIT_MASK(64))
+			&& !pci_set_consistent_dma_mask(pdev,
+						 DMA_BIT_MASK(64))) {
+			ioc->dma_mask = DMA_BIT_MASK(64);
+			dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
+				": 64 BIT PCI BUS DMA ADDRESSING SUPPORTED\n",
+				ioc->name));
+		} else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
+			&& !pci_set_consistent_dma_mask(pdev,
+						DMA_BIT_MASK(32))) {
+			ioc->dma_mask = DMA_BIT_MASK(32);
+			dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
+				": 32 BIT PCI BUS DMA ADDRESSING SUPPORTED\n",
+				ioc->name));
+		} else {
+			printk(MYIOC_s_WARN_FMT "no suitable DMA mask for %s\n",
+			    ioc->name, pci_name(pdev));
+			return r;
+		}
 	} else {
-		printk(MYIOC_s_WARN_FMT "no suitable DMA mask for %s\n",
-		    ioc->name, pci_name(pdev));
-		pci_release_selected_regions(pdev, ioc->bars);
-		return r;
+		if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
+			&& !pci_set_consistent_dma_mask(pdev,
+						DMA_BIT_MASK(32))) {
+			ioc->dma_mask = DMA_BIT_MASK(32);
+			dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
+				": 32 BIT PCI BUS DMA ADDRESSING SUPPORTED\n",
+				ioc->name));
+		} else {
+			printk(MYIOC_s_WARN_FMT "no suitable DMA mask for %s\n",
+			    ioc->name, pci_name(pdev));
+			return r;
+		}
 	}
 
 	mem_phys = msize = 0;
@@ -1632,6 +1734,7 @@
 
 	ioc->id = mpt_ids++;
 	sprintf(ioc->name, "ioc%d", ioc->id);
+	dinitprintk(ioc, printk(KERN_WARNING MYNAM ": mpt_adapter_install\n"));
 
 	/*
 	 * set initial debug level
@@ -1650,14 +1753,36 @@
 		return r;
 	}
 
+	/*
+	 * Setting up proper handlers for scatter gather handling
+	 */
+	if (ioc->dma_mask == DMA_BIT_MASK(64)) {
+		if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1078)
+			ioc->add_sge = &mpt_add_sge_64bit_1078;
+		else
+			ioc->add_sge = &mpt_add_sge_64bit;
+		ioc->add_chain = &mpt_add_chain_64bit;
+		ioc->sg_addr_size = 8;
+	} else {
+		ioc->add_sge = &mpt_add_sge;
+		ioc->add_chain = &mpt_add_chain;
+		ioc->sg_addr_size = 4;
+	}
+	ioc->SGE_size = sizeof(u32) + ioc->sg_addr_size;
+
 	ioc->alloc_total = sizeof(MPT_ADAPTER);
 	ioc->req_sz = MPT_DEFAULT_FRAME_SIZE;		/* avoid div by zero! */
 	ioc->reply_sz = MPT_REPLY_FRAME_SIZE;
 
 	ioc->pcidev = pdev;
-	ioc->diagPending = 0;
-	spin_lock_init(&ioc->diagLock);
-	spin_lock_init(&ioc->initializing_hba_lock);
+
+	spin_lock_init(&ioc->taskmgmt_lock);
+	mutex_init(&ioc->internal_cmds.mutex);
+	init_completion(&ioc->internal_cmds.done);
+	mutex_init(&ioc->mptbase_cmds.mutex);
+	init_completion(&ioc->mptbase_cmds.done);
+	mutex_init(&ioc->taskmgmt_cmds.mutex);
+	init_completion(&ioc->taskmgmt_cmds.done);
 
 	/* Initialize the event logging.
 	 */
@@ -1670,16 +1795,13 @@
 	ioc->mfcnt = 0;
 #endif
 
+	ioc->sh = NULL;
 	ioc->cached_fw = NULL;
 
 	/* Initilize SCSI Config Data structure
 	 */
 	memset(&ioc->spi_data, 0, sizeof(SpiCfgData));
 
-	/* Initialize the running configQ head.
-	 */
-	INIT_LIST_HEAD(&ioc->configQ);
-
 	/* Initialize the fc rport list head.
 	 */
 	INIT_LIST_HEAD(&ioc->fc_rports);
@@ -1690,9 +1812,8 @@
 
 	/* Initialize workqueue */
 	INIT_DELAYED_WORK(&ioc->fault_reset_work, mpt_fault_reset_work);
-	spin_lock_init(&ioc->fault_reset_work_lock);
 
-	snprintf(ioc->reset_work_q_name, sizeof(ioc->reset_work_q_name),
+	snprintf(ioc->reset_work_q_name, MPT_KOBJ_NAME_LEN,
 		 "mpt_poll_%d", ioc->id);
 	ioc->reset_work_q =
 		create_singlethread_workqueue(ioc->reset_work_q_name);
@@ -1767,11 +1888,14 @@
 	case MPI_MANUFACTPAGE_DEVID_SAS1064:
 	case MPI_MANUFACTPAGE_DEVID_SAS1068:
 		ioc->errata_flag_1064 = 1;
+		ioc->bus_type = SAS;
+		break;
 
 	case MPI_MANUFACTPAGE_DEVID_SAS1064E:
 	case MPI_MANUFACTPAGE_DEVID_SAS1068E:
 	case MPI_MANUFACTPAGE_DEVID_SAS1078:
 		ioc->bus_type = SAS;
+		break;
 	}
 
 
@@ -1813,6 +1937,11 @@
 	 */
 	mpt_detect_bound_ports(ioc, pdev);
 
+	INIT_LIST_HEAD(&ioc->fw_event_list);
+	spin_lock_init(&ioc->fw_event_lock);
+	snprintf(ioc->fw_event_q_name, MPT_KOBJ_NAME_LEN, "mpt/%d", ioc->id);
+	ioc->fw_event_q = create_singlethread_workqueue(ioc->fw_event_q_name);
+
 	if ((r = mpt_do_ioc_recovery(ioc, MPT_HOSTEVENT_IOC_BRINGUP,
 	    CAN_SLEEP)) != 0){
 		printk(MYIOC_s_ERR_FMT "didn't initialize properly! (%d)\n",
@@ -1885,13 +2014,18 @@
 	/*
 	 * Stop polling ioc for fault condition
 	 */
-	spin_lock_irqsave(&ioc->fault_reset_work_lock, flags);
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
 	wq = ioc->reset_work_q;
 	ioc->reset_work_q = NULL;
-	spin_unlock_irqrestore(&ioc->fault_reset_work_lock, flags);
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 	cancel_delayed_work(&ioc->fault_reset_work);
 	destroy_workqueue(wq);
 
+	spin_lock_irqsave(&ioc->fw_event_lock, flags);
+	wq = ioc->fw_event_q;
+	ioc->fw_event_q = NULL;
+	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
+	destroy_workqueue(wq);
 
 	sprintf(pname, MPT_PROCFS_MPTBASEDIR "/%s/summary", ioc->name);
 	remove_proc_entry(pname, NULL);
@@ -1994,6 +2128,21 @@
 	if (err)
 		return err;
 
+	if (ioc->dma_mask == DMA_BIT_MASK(64)) {
+		if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1078)
+			ioc->add_sge = &mpt_add_sge_64bit_1078;
+		else
+			ioc->add_sge = &mpt_add_sge_64bit;
+		ioc->add_chain = &mpt_add_chain_64bit;
+		ioc->sg_addr_size = 8;
+	} else {
+
+		ioc->add_sge = &mpt_add_sge;
+		ioc->add_chain = &mpt_add_chain;
+		ioc->sg_addr_size = 4;
+	}
+	ioc->SGE_size = sizeof(u32) + ioc->sg_addr_size;
+
 	printk(MYIOC_s_INFO_FMT "pci-resume: ioc-state=0x%x,doorbell=0x%x\n",
 	    ioc->name, (mpt_GetIocState(ioc, 1) >> MPI_IOC_STATE_SHIFT),
 	    CHIPREG_READ32(&ioc->chip->Doorbell));
@@ -2091,12 +2240,16 @@
 	ioc->active = 0;
 
 	if (ioc->alt_ioc) {
-		if (ioc->alt_ioc->active)
+		if (ioc->alt_ioc->active ||
+		    reason == MPT_HOSTEVENT_IOC_RECOVER) {
 			reset_alt_ioc_active = 1;
-
-		/* Disable alt-IOC's reply interrupts (and FreeQ) for a bit ... */
-		CHIPREG_WRITE32(&ioc->alt_ioc->chip->IntMask, 0xFFFFFFFF);
-		ioc->alt_ioc->active = 0;
+			/* Disable alt-IOC's reply interrupts
+			 *  (and FreeQ) for a bit
+			 **/
+			CHIPREG_WRITE32(&ioc->alt_ioc->chip->IntMask,
+				0xFFFFFFFF);
+			ioc->alt_ioc->active = 0;
+		}
 	}
 
 	hard = 1;
@@ -2117,9 +2270,11 @@
 			}
 
 		} else {
-			printk(MYIOC_s_WARN_FMT "NOT READY!\n", ioc->name);
+			printk(MYIOC_s_WARN_FMT
+			    "NOT READY WARNING!\n", ioc->name);
 		}
-		return -1;
+		ret = -1;
+		goto out;
 	}
 
 	/* hard_reset_done = 0 if a soft reset was performed
@@ -2129,7 +2284,9 @@
 		if ((rc = MakeIocReady(ioc->alt_ioc, 0, sleepFlag)) == 0)
 			alt_ioc_ready = 1;
 		else
-			printk(MYIOC_s_WARN_FMT "alt_ioc not ready!\n", ioc->alt_ioc->name);
+			printk(MYIOC_s_WARN_FMT
+			    ": alt-ioc Not ready WARNING!\n",
+			    ioc->alt_ioc->name);
 	}
 
 	for (ii=0; ii<5; ii++) {
@@ -2150,7 +2307,8 @@
 	if (alt_ioc_ready) {
 		if ((rc = GetIocFacts(ioc->alt_ioc, sleepFlag, reason)) != 0) {
 			dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-			    "Initial Alt IocFacts failed rc=%x\n", ioc->name, rc));
+			    "Initial Alt IocFacts failed rc=%x\n",
+			    ioc->name, rc));
 			/* Retry - alt IOC was initialized once
 			 */
 			rc = GetIocFacts(ioc->alt_ioc, sleepFlag, reason);
@@ -2194,16 +2352,20 @@
 			    IRQF_SHARED, ioc->name, ioc);
 			if (rc < 0) {
 				printk(MYIOC_s_ERR_FMT "Unable to allocate "
-				    "interrupt %d!\n", ioc->name, ioc->pcidev->irq);
+				    "interrupt %d!\n",
+				    ioc->name, ioc->pcidev->irq);
 				if (ioc->msi_enable)
 					pci_disable_msi(ioc->pcidev);
-				return -EBUSY;
+				ret = -EBUSY;
+				goto out;
 			}
 			irq_allocated = 1;
 			ioc->pci_irq = ioc->pcidev->irq;
 			pci_set_master(ioc->pcidev);		/* ?? */
-			dprintk(ioc, printk(MYIOC_s_INFO_FMT "installed at interrupt "
-			    "%d\n", ioc->name, ioc->pcidev->irq));
+			pci_set_drvdata(ioc->pcidev, ioc);
+			dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
+			    "installed at interrupt %d\n", ioc->name,
+			    ioc->pcidev->irq));
 		}
 	}
 
@@ -2212,17 +2374,22 @@
 	 * init as upper addresses are needed for init.
 	 * If fails, continue with alt-ioc processing
 	 */
+	dinitprintk(ioc, printk(MYIOC_s_INFO_FMT "PrimeIocFifos\n",
+	    ioc->name));
 	if ((ret == 0) && ((rc = PrimeIocFifos(ioc)) != 0))
 		ret = -3;
 
 	/* May need to check/upload firmware & data here!
 	 * If fails, continue with alt-ioc processing
 	 */
+	dinitprintk(ioc, printk(MYIOC_s_INFO_FMT "SendIocInit\n",
+	    ioc->name));
 	if ((ret == 0) && ((rc = SendIocInit(ioc, sleepFlag)) != 0))
 		ret = -4;
 // NEW!
 	if (alt_ioc_ready && ((rc = PrimeIocFifos(ioc->alt_ioc)) != 0)) {
-		printk(MYIOC_s_WARN_FMT ": alt_ioc (%d) FIFO mgmt alloc!\n",
+		printk(MYIOC_s_WARN_FMT
+		    ": alt-ioc (%d) FIFO mgmt alloc WARNING!\n",
 		    ioc->alt_ioc->name, rc);
 		alt_ioc_ready = 0;
 		reset_alt_ioc_active = 0;
@@ -2232,8 +2399,9 @@
 		if ((rc = SendIocInit(ioc->alt_ioc, sleepFlag)) != 0) {
 			alt_ioc_ready = 0;
 			reset_alt_ioc_active = 0;
-			printk(MYIOC_s_WARN_FMT "alt_ioc (%d) init failure!\n",
-			    ioc->alt_ioc->name, rc);
+			printk(MYIOC_s_WARN_FMT
+				": alt-ioc: (%d) init failure WARNING!\n",
+					ioc->alt_ioc->name, rc);
 		}
 	}
 
@@ -2269,28 +2437,36 @@
 		}
 	}
 
+	/*  Enable MPT base driver management of EventNotification
+	 *  and EventAck handling.
+	 */
+	if ((ret == 0) && (!ioc->facts.EventState)) {
+		dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
+			"SendEventNotification\n",
+		    ioc->name));
+		ret = SendEventNotification(ioc, 1, sleepFlag);	/* 1=Enable */
+	}
+
+	if (ioc->alt_ioc && alt_ioc_ready && !ioc->alt_ioc->facts.EventState)
+		rc = SendEventNotification(ioc->alt_ioc, 1, sleepFlag);
+
 	if (ret == 0) {
 		/* Enable! (reply interrupt) */
 		CHIPREG_WRITE32(&ioc->chip->IntMask, MPI_HIM_DIM);
 		ioc->active = 1;
 	}
-
-	if (reset_alt_ioc_active && ioc->alt_ioc) {
-		/* (re)Enable alt-IOC! (reply interrupt) */
-		dinitprintk(ioc, printk(MYIOC_s_INFO_FMT "alt_ioc reply irq re-enabled\n",
-		    ioc->alt_ioc->name));
-		CHIPREG_WRITE32(&ioc->alt_ioc->chip->IntMask, MPI_HIM_DIM);
-		ioc->alt_ioc->active = 1;
+	if (rc == 0) {	/* alt ioc */
+		if (reset_alt_ioc_active && ioc->alt_ioc) {
+			/* (re)Enable alt-IOC! (reply interrupt) */
+			dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT "alt-ioc"
+				"reply irq re-enabled\n",
+				ioc->alt_ioc->name));
+			CHIPREG_WRITE32(&ioc->alt_ioc->chip->IntMask,
+				MPI_HIM_DIM);
+			ioc->alt_ioc->active = 1;
+		}
 	}
 
-	/*  Enable MPT base driver management of EventNotification
-	 *  and EventAck handling.
-	 */
-	if ((ret == 0) && (!ioc->facts.EventState))
-		(void) SendEventNotification(ioc, 1);	/* 1=Enable EventNotification */
-
-	if (ioc->alt_ioc && alt_ioc_ready && !ioc->alt_ioc->facts.EventState)
-		(void) SendEventNotification(ioc->alt_ioc, 1);	/* 1=Enable EventNotification */
 
 	/*	Add additional "reason" check before call to GetLanConfigPages
 	 *	(combined with GetIoUnitPage2 call).  This prevents a somewhat
@@ -2306,8 +2482,9 @@
 		mutex_init(&ioc->raid_data.inactive_list_mutex);
 		INIT_LIST_HEAD(&ioc->raid_data.inactive_list);
 
-		if (ioc->bus_type == SAS) {
+		switch (ioc->bus_type) {
 
+		case SAS:
 			/* clear persistency table */
 			if(ioc->facts.IOCExceptions &
 			    MPI_IOCFACTS_EXCEPT_PERSISTENT_TABLE_FULL) {
@@ -2321,8 +2498,15 @@
 			 */
 			mpt_findImVolumes(ioc);
 
-		} else if (ioc->bus_type == FC) {
-			if ((ioc->pfacts[0].ProtocolFlags & MPI_PORTFACTS_PROTOCOL_LAN) &&
+			/* Check, and possibly reset, the coalescing value
+			 */
+			mpt_read_ioc_pg_1(ioc);
+
+			break;
+
+		case FC:
+			if ((ioc->pfacts[0].ProtocolFlags &
+				MPI_PORTFACTS_PROTOCOL_LAN) &&
 			    (ioc->lan_cnfg_page0.Header.PageLength == 0)) {
 				/*
 				 *  Pre-fetch the ports LAN MAC address!
@@ -2331,11 +2515,14 @@
 				(void) GetLanConfigPages(ioc);
 				a = (u8*)&ioc->lan_cnfg_page1.HardwareAddressLow;
 				dprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-				    "LanAddr = %02X:%02X:%02X:%02X:%02X:%02X\n",
-				    ioc->name, a[5], a[4], a[3], a[2], a[1], a[0]));
-
+					"LanAddr = %02X:%02X:%02X"
+					":%02X:%02X:%02X\n",
+					ioc->name, a[5], a[4],
+					a[3], a[2], a[1], a[0]));
 			}
-		} else {
+			break;
+
+		case SPI:
 			/* Get NVRAM and adapter maximums from SPP 0 and 2
 			 */
 			mpt_GetScsiPortSettings(ioc, 0);
@@ -2354,6 +2541,8 @@
 			mpt_read_ioc_pg_1(ioc);
 
 			mpt_read_ioc_pg_4(ioc);
+
+			break;
 		}
 
 		GetIoUnitPage2(ioc);
@@ -2435,16 +2624,20 @@
 		if (_pcidev == peer) {
 			/* Paranoia checks */
 			if (ioc->alt_ioc != NULL) {
-				printk(MYIOC_s_WARN_FMT "Oops, already bound to %s!\n",
-					ioc->name, ioc->alt_ioc->name);
+				printk(MYIOC_s_WARN_FMT
+				    "Oops, already bound (%s <==> %s)!\n",
+				    ioc->name, ioc->name, ioc->alt_ioc->name);
 				break;
 			} else if (ioc_srch->alt_ioc != NULL) {
-				printk(MYIOC_s_WARN_FMT "Oops, already bound to %s!\n",
-					ioc_srch->name, ioc_srch->alt_ioc->name);
+				printk(MYIOC_s_WARN_FMT
+				    "Oops, already bound (%s <==> %s)!\n",
+				    ioc_srch->name, ioc_srch->name,
+				    ioc_srch->alt_ioc->name);
 				break;
 			}
-			dprintk(ioc, printk(MYIOC_s_INFO_FMT "FOUND! binding to %s\n",
-				ioc->name, ioc_srch->name));
+			dprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+				"FOUND! binding %s <==> %s\n",
+				ioc->name, ioc->name, ioc_srch->name));
 			ioc_srch->alt_ioc = ioc;
 			ioc->alt_ioc = ioc_srch;
 		}
@@ -2464,8 +2657,8 @@
 	int ret;
 
 	if (ioc->cached_fw != NULL) {
-		ddlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: Pushing FW onto "
-		    "adapter\n", __func__, ioc->name));
+		ddlprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			"%s: Pushing FW onto adapter\n", __func__, ioc->name));
 		if ((ret = mpt_downloadboot(ioc, (MpiFwHeader_t *)
 		    ioc->cached_fw, CAN_SLEEP)) < 0) {
 			printk(MYIOC_s_WARN_FMT
@@ -2474,11 +2667,30 @@
 		}
 	}
 
+	/*
+	 * Put the controller into ready state (if its not already)
+	 */
+	if (mpt_GetIocState(ioc, 1) != MPI_IOC_STATE_READY) {
+		if (!SendIocReset(ioc, MPI_FUNCTION_IOC_MESSAGE_UNIT_RESET,
+		    CAN_SLEEP)) {
+			if (mpt_GetIocState(ioc, 1) != MPI_IOC_STATE_READY)
+				printk(MYIOC_s_ERR_FMT "%s:  IOC msg unit "
+				    "reset failed to put ioc in ready state!\n",
+				    ioc->name, __func__);
+		} else
+			printk(MYIOC_s_ERR_FMT "%s:  IOC msg unit reset "
+			    "failed!\n", ioc->name, __func__);
+	}
+
+
 	/* Disable adapter interrupts! */
+	synchronize_irq(ioc->pcidev->irq);
 	CHIPREG_WRITE32(&ioc->chip->IntMask, 0xFFFFFFFF);
 	ioc->active = 0;
+
 	/* Clear any lingering interrupt */
 	CHIPREG_WRITE32(&ioc->chip->IntStatus, 0);
+	CHIPREG_READ32(&ioc->chip->IntStatus);
 
 	if (ioc->alloc != NULL) {
 		sz = ioc->alloc_sz;
@@ -2538,19 +2750,22 @@
 		if((ret = mpt_host_page_access_control(ioc,
 		    MPI_DB_HPBAC_FREE_BUFFER, NO_SLEEP)) != 0) {
 			printk(MYIOC_s_ERR_FMT
-			   "host page buffers free failed (%d)!\n",
-			    ioc->name, ret);
+			   ": %s: host page buffers free failed (%d)!\n",
+			    ioc->name, __func__, ret);
 		}
-		dexitprintk(ioc, printk(MYIOC_s_INFO_FMT "HostPageBuffer free  @ %p, sz=%d bytes\n",
-		 	ioc->name, ioc->HostPageBuffer, ioc->HostPageBuffer_sz));
+		dexitprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			"HostPageBuffer free  @ %p, sz=%d bytes\n",
+			ioc->name, ioc->HostPageBuffer,
+			ioc->HostPageBuffer_sz));
 		pci_free_consistent(ioc->pcidev, ioc->HostPageBuffer_sz,
 		    ioc->HostPageBuffer, ioc->HostPageBuffer_dma);
 		ioc->HostPageBuffer = NULL;
 		ioc->HostPageBuffer_sz = 0;
 		ioc->alloc_total -= ioc->HostPageBuffer_sz;
 	}
-}
 
+	pci_set_drvdata(ioc->pcidev, NULL);
+}
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
  *	mpt_adapter_dispose - Free all resources associated with an MPT adapter
@@ -2690,8 +2905,12 @@
 	}
 
 	/* Is it already READY? */
-	if (!statefault && (ioc_state & MPI_IOC_STATE_MASK) == MPI_IOC_STATE_READY)
+	if (!statefault &&
+	    ((ioc_state & MPI_IOC_STATE_MASK) == MPI_IOC_STATE_READY)) {
+		dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
+		    "IOC is in READY state\n", ioc->name));
 		return 0;
+	}
 
 	/*
 	 *	Check to see if IOC is in FAULT state.
@@ -2764,8 +2983,9 @@
 
 		ii++; cntdn--;
 		if (!cntdn) {
-			printk(MYIOC_s_ERR_FMT "Wait IOC_READY state timeout(%d)!\n",
-					ioc->name, (int)((ii+5)/HZ));
+			printk(MYIOC_s_ERR_FMT
+				"Wait IOC_READY state (0x%x) timeout(%d)!\n",
+				ioc->name, ioc_state, (int)((ii+5)/HZ));
 			return -ETIME;
 		}
 
@@ -2778,9 +2998,8 @@
 	}
 
 	if (statefault < 3) {
-		printk(MYIOC_s_INFO_FMT "Recovered from %s\n",
-				ioc->name,
-				statefault==1 ? "stuck handshake" : "IOC FAULT");
+		printk(MYIOC_s_INFO_FMT "Recovered from %s\n", ioc->name,
+			statefault == 1 ? "stuck handshake" : "IOC FAULT");
 	}
 
 	return hard_reset_done;
@@ -2833,8 +3052,9 @@
 
 	/* IOC *must* NOT be in RESET state! */
 	if (ioc->last_state == MPI_IOC_STATE_RESET) {
-		printk(MYIOC_s_ERR_FMT "Can't get IOCFacts NOT READY! (%08x)\n",
-		    ioc->name, ioc->last_state );
+		printk(KERN_ERR MYNAM
+		    ": ERROR - Can't get IOCFacts, %s NOT READY! (%08x)\n",
+		    ioc->name, ioc->last_state);
 		return -44;
 	}
 
@@ -2896,7 +3116,7 @@
 		 *	Old: u16{Major(4),Minor(4),SubMinor(8)}
 		 *	New: u32{Major(8),Minor(8),Unit(8),Dev(8)}
 		 */
-		if (facts->MsgVersion < 0x0102) {
+		if (facts->MsgVersion < MPI_VERSION_01_02) {
 			/*
 			 *	Handle old FC f/w style, convert to new...
 			 */
@@ -2908,9 +3128,11 @@
 			facts->FWVersion.Word = le32_to_cpu(facts->FWVersion.Word);
 
 		facts->ProductID = le16_to_cpu(facts->ProductID);
+
 		if ((ioc->facts.ProductID & MPI_FW_HEADER_PID_PROD_MASK)
 		    > MPI_FW_HEADER_PID_PROD_TARGET_SCSI)
 			ioc->ir_firmware = 1;
+
 		facts->CurrentHostMfaHighAddr =
 				le32_to_cpu(facts->CurrentHostMfaHighAddr);
 		facts->GlobalCredits = le16_to_cpu(facts->GlobalCredits);
@@ -2926,7 +3148,7 @@
 		 * to 14 in MPI-1.01.0x.
 		 */
 		if (facts->MsgLength >= (offsetof(IOCFactsReply_t,FWImageSize) + 7)/4 &&
-		    facts->MsgVersion > 0x0100) {
+		    facts->MsgVersion > MPI_VERSION_01_00) {
 			facts->FWImageSize = le32_to_cpu(facts->FWImageSize);
 		}
 
@@ -3108,6 +3330,7 @@
 
 	ioc_init.MaxDevices = (U8)ioc->devices_per_bus;
 	ioc_init.MaxBuses = (U8)ioc->number_of_buses;
+
 	dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT "facts.MsgVersion=%x\n",
 		   ioc->name, ioc->facts.MsgVersion));
 	if (ioc->facts.MsgVersion >= MPI_VERSION_01_05) {
@@ -3122,7 +3345,7 @@
 	}
 	ioc_init.ReplyFrameSize = cpu_to_le16(ioc->reply_sz);	/* in BYTES */
 
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
+	if (ioc->sg_addr_size == sizeof(u64)) {
 		/* Save the upper 32-bits of the request
 		 * (reply) and sense buffers.
 		 */
@@ -3325,11 +3548,10 @@
 	FWUpload_t		*prequest;
 	FWUploadReply_t		*preply;
 	FWUploadTCSGE_t		*ptcsge;
-	int			 sgeoffset;
 	u32			 flagsLength;
 	int			 ii, sz, reply_sz;
 	int			 cmdStatus;
-
+	int			request_size;
 	/* If the image size is 0, we are done.
 	 */
 	if ((sz = ioc->facts.FWImageSize) == 0)
@@ -3364,42 +3586,41 @@
 	ptcsge->ImageSize = cpu_to_le32(sz);
 	ptcsge++;
 
-	sgeoffset = sizeof(FWUpload_t) - sizeof(SGE_MPI_UNION) + sizeof(FWUploadTCSGE_t);
-
 	flagsLength = MPT_SGE_FLAGS_SSIMPLE_READ | sz;
-	mpt_add_sge((char *)ptcsge, flagsLength, ioc->cached_fw_dma);
-
-	sgeoffset += sizeof(u32) + sizeof(dma_addr_t);
-	dinitprintk(ioc, printk(MYIOC_s_INFO_FMT ": Sending FW Upload (req @ %p) sgeoffset=%d \n",
-	    ioc->name, prequest, sgeoffset));
+	ioc->add_sge((char *)ptcsge, flagsLength, ioc->cached_fw_dma);
+	request_size = offsetof(FWUpload_t, SGL) + sizeof(FWUploadTCSGE_t) +
+	    ioc->SGE_size;
+	dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Sending FW Upload "
+	    " (req @ %p) fw_size=%d mf_request_size=%d\n", ioc->name, prequest,
+	    ioc->facts.FWImageSize, request_size));
 	DBG_DUMP_FW_REQUEST_FRAME(ioc, (u32 *)prequest);
 
-	ii = mpt_handshake_req_reply_wait(ioc, sgeoffset, (u32*)prequest,
-				reply_sz, (u16*)preply, 65 /*seconds*/, sleepFlag);
+	ii = mpt_handshake_req_reply_wait(ioc, request_size, (u32 *)prequest,
+	    reply_sz, (u16 *)preply, 65 /*seconds*/, sleepFlag);
 
-	dinitprintk(ioc, printk(MYIOC_s_INFO_FMT ": FW Upload completed rc=%x \n", ioc->name, ii));
+	dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT "FW Upload completed "
+	    "rc=%x \n", ioc->name, ii));
 
 	cmdStatus = -EFAULT;
 	if (ii == 0) {
 		/* Handshake transfer was complete and successful.
 		 * Check the Reply Frame.
 		 */
-		int status, transfer_sz;
-		status = le16_to_cpu(preply->IOCStatus);
-		if (status == MPI_IOCSTATUS_SUCCESS) {
-			transfer_sz = le32_to_cpu(preply->ActualImageSize);
-			if (transfer_sz == sz)
+		int status;
+		status = le16_to_cpu(preply->IOCStatus) &
+				MPI_IOCSTATUS_MASK;
+		if (status == MPI_IOCSTATUS_SUCCESS &&
+		    ioc->facts.FWImageSize ==
+		    le32_to_cpu(preply->ActualImageSize))
 				cmdStatus = 0;
-		}
 	}
 	dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT ": do_upload cmdStatus=%d \n",
 			ioc->name, cmdStatus));
 
 
 	if (cmdStatus) {
-
-		ddlprintk(ioc, printk(MYIOC_s_DEBUG_FMT ": fw upload failed, freeing image \n",
-			ioc->name));
+		ddlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "fw upload failed, "
+		    "freeing image \n", ioc->name));
 		mpt_free_fw_memory(ioc);
 	}
 	kfree(prequest);
@@ -3723,6 +3944,10 @@
 	CHIPREG_WRITE32(&ioc->chip->IntStatus, 0);
 
 	if (ioc->pcidev->device == MPI_MANUFACTPAGE_DEVID_SAS1078) {
+
+		if (!ignore)
+			return 0;
+
 		drsprintk(ioc, printk(MYIOC_s_WARN_FMT "%s: Doorbell=%p; 1078 reset "
 			"address=%p\n",  ioc->name, __func__,
 			&ioc->chip->Doorbell, &ioc->chip->Reset_1078));
@@ -3740,6 +3965,7 @@
 				"looking for READY STATE: doorbell=%x"
 			        " count=%d\n",
 				ioc->name, doorbell, count));
+
 			if (doorbell == MPI_IOC_STATE_READY) {
 				return 1;
 			}
@@ -3890,6 +4116,10 @@
 				doorbell = CHIPREG_READ32(&ioc->chip->Doorbell);
 				doorbell &= MPI_IOC_STATE_MASK;
 
+				drsprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+				    "looking for READY STATE: doorbell=%x"
+				    " count=%d\n", ioc->name, doorbell, count));
+
 				if (doorbell == MPI_IOC_STATE_READY) {
 					break;
 				}
@@ -3901,6 +4131,11 @@
 					mdelay (1000);
 				}
 			}
+
+			if (doorbell != MPI_IOC_STATE_READY)
+				printk(MYIOC_s_ERR_FMT "Failed to come READY "
+				    "after reset! IocState=%x", ioc->name,
+				    doorbell);
 		}
 	}
 
@@ -4019,8 +4254,9 @@
 			if (sleepFlag != CAN_SLEEP)
 				count *= 10;
 
-			printk(MYIOC_s_ERR_FMT "Wait IOC_READY state timeout(%d)!\n",
-			    ioc->name, (int)((count+5)/HZ));
+			printk(MYIOC_s_ERR_FMT
+			    "Wait IOC_READY state (0x%x) timeout(%d)!\n",
+			    ioc->name, state, (int)((count+5)/HZ));
 			return -ETIME;
 		}
 
@@ -4090,24 +4326,29 @@
 	 * num_sge = num sge in request frame + last chain buffer
 	 * scale = num sge per chain buffer if no chain element
 	 */
-	scale = ioc->req_sz/(sizeof(dma_addr_t) + sizeof(u32));
-	if (sizeof(dma_addr_t) == sizeof(u64))
-		num_sge =  scale + (ioc->req_sz - 60) / (sizeof(dma_addr_t) + sizeof(u32));
+	scale = ioc->req_sz / ioc->SGE_size;
+	if (ioc->sg_addr_size == sizeof(u64))
+		num_sge =  scale + (ioc->req_sz - 60) / ioc->SGE_size;
 	else
-		num_sge =  1+ scale + (ioc->req_sz - 64) / (sizeof(dma_addr_t) + sizeof(u32));
+		num_sge =  1 + scale + (ioc->req_sz - 64) / ioc->SGE_size;
 
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
+	if (ioc->sg_addr_size == sizeof(u64)) {
 		numSGE = (scale - 1) * (ioc->facts.MaxChainDepth-1) + scale +
-			(ioc->req_sz - 60) / (sizeof(dma_addr_t) + sizeof(u32));
+			(ioc->req_sz - 60) / ioc->SGE_size;
 	} else {
-		numSGE = 1 + (scale - 1) * (ioc->facts.MaxChainDepth-1) + scale +
-			(ioc->req_sz - 64) / (sizeof(dma_addr_t) + sizeof(u32));
+		numSGE = 1 + (scale - 1) * (ioc->facts.MaxChainDepth-1) +
+		    scale + (ioc->req_sz - 64) / ioc->SGE_size;
 	}
 	dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT "num_sge=%d numSGE=%d\n",
 		ioc->name, num_sge, numSGE));
 
-	if ( numSGE > MPT_SCSI_SG_DEPTH	)
-		numSGE = MPT_SCSI_SG_DEPTH;
+	if (ioc->bus_type == FC) {
+		if (numSGE > MPT_SCSI_FC_SG_DEPTH)
+			numSGE = MPT_SCSI_FC_SG_DEPTH;
+	} else {
+		if (numSGE > MPT_SCSI_SG_DEPTH)
+			numSGE = MPT_SCSI_SG_DEPTH;
+	}
 
 	num_chain = 1;
 	while (numSGE - num_sge > 0) {
@@ -4161,12 +4402,42 @@
 	dma_addr_t alloc_dma;
 	u8 *mem;
 	int i, reply_sz, sz, total_size, num_chain;
+	u64	dma_mask;
+
+	dma_mask = 0;
 
 	/*  Prime reply FIFO...  */
 
 	if (ioc->reply_frames == NULL) {
 		if ( (num_chain = initChainBuffers(ioc)) < 0)
 			return -1;
+		/*
+		 * 1078 errata workaround for the 36GB limitation
+		 */
+		if (ioc->pcidev->device == MPI_MANUFACTPAGE_DEVID_SAS1078 &&
+		    ioc->dma_mask > DMA_35BIT_MASK) {
+			if (!pci_set_dma_mask(ioc->pcidev, DMA_BIT_MASK(32))
+			    && !pci_set_consistent_dma_mask(ioc->pcidev,
+			    DMA_BIT_MASK(32))) {
+				dma_mask = DMA_35BIT_MASK;
+				d36memprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+				    "setting 35 bit addressing for "
+				    "Request/Reply/Chain and Sense Buffers\n",
+				    ioc->name));
+			} else {
+				/*Reseting DMA mask to 64 bit*/
+				pci_set_dma_mask(ioc->pcidev,
+					DMA_BIT_MASK(64));
+				pci_set_consistent_dma_mask(ioc->pcidev,
+					DMA_BIT_MASK(64));
+
+				printk(MYIOC_s_ERR_FMT
+				    "failed setting 35 bit addressing for "
+				    "Request/Reply/Chain and Sense Buffers\n",
+				    ioc->name);
+				return -1;
+			}
+		}
 
 		total_size = reply_sz = (ioc->reply_sz * ioc->reply_depth);
 		dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT "ReplyBuffer sz=%d bytes, ReplyDepth=%d\n",
@@ -4305,9 +4576,16 @@
 		alloc_dma += ioc->reply_sz;
 	}
 
+	if (dma_mask == DMA_35BIT_MASK && !pci_set_dma_mask(ioc->pcidev,
+	    ioc->dma_mask) && !pci_set_consistent_dma_mask(ioc->pcidev,
+	    ioc->dma_mask))
+		d36memprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "restoring 64 bit addressing\n", ioc->name));
+
 	return 0;
 
 out_fail:
+
 	if (ioc->alloc != NULL) {
 		sz = ioc->alloc_sz;
 		pci_free_consistent(ioc->pcidev,
@@ -4324,6 +4602,13 @@
 				ioc->sense_buf_pool, ioc->sense_buf_pool_dma);
 		ioc->sense_buf_pool = NULL;
 	}
+
+	if (dma_mask == DMA_35BIT_MASK && !pci_set_dma_mask(ioc->pcidev,
+	    DMA_BIT_MASK(64)) && !pci_set_consistent_dma_mask(ioc->pcidev,
+	    DMA_BIT_MASK(64)))
+		d36memprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "restoring 64 bit addressing\n", ioc->name));
+
 	return -1;
 }
 
@@ -4759,7 +5044,14 @@
 	SasIoUnitControlReply_t		*sasIoUnitCntrReply;
 	MPT_FRAME_HDR			*mf = NULL;
 	MPIHeader_t			*mpi_hdr;
+	int				ret = 0;
+	unsigned long 	 		timeleft;
 
+	mutex_lock(&ioc->mptbase_cmds.mutex);
+
+	/* init the internal cmd struct */
+	memset(ioc->mptbase_cmds.reply, 0 , MPT_DEFAULT_FRAME_SIZE);
+	INITIALIZE_MGMT_STATUS(ioc->mptbase_cmds.status)
 
 	/* insure garbage is not sent to fw */
 	switch(persist_opcode) {
@@ -4769,17 +5061,19 @@
 		break;
 
 	default:
-		return -1;
-		break;
+		ret = -1;
+		goto out;
 	}
 
-	printk("%s: persist_opcode=%x\n",__func__, persist_opcode);
+	printk(KERN_DEBUG  "%s: persist_opcode=%x\n",
+		__func__, persist_opcode);
 
 	/* Get a MF for this command.
 	 */
 	if ((mf = mpt_get_msg_frame(mpt_base_index, ioc)) == NULL) {
-		printk("%s: no msg frames!\n",__func__);
-		return -1;
+		printk(KERN_DEBUG "%s: no msg frames!\n", __func__);
+		ret = -1;
+		goto out;
         }
 
 	mpi_hdr = (MPIHeader_t *) mf;
@@ -4789,27 +5083,42 @@
 	sasIoUnitCntrReq->MsgContext = mpi_hdr->MsgContext;
 	sasIoUnitCntrReq->Operation = persist_opcode;
 
-	init_timer(&ioc->persist_timer);
-	ioc->persist_timer.data = (unsigned long) ioc;
-	ioc->persist_timer.function = mpt_timer_expired;
-	ioc->persist_timer.expires = jiffies + HZ*10 /* 10 sec */;
-	ioc->persist_wait_done=0;
-	add_timer(&ioc->persist_timer);
 	mpt_put_msg_frame(mpt_base_index, ioc, mf);
-	wait_event(mpt_waitq, ioc->persist_wait_done);
-
-	sasIoUnitCntrReply =
-	    (SasIoUnitControlReply_t *)ioc->persist_reply_frame;
-	if (le16_to_cpu(sasIoUnitCntrReply->IOCStatus) != MPI_IOCSTATUS_SUCCESS) {
-		printk("%s: IOCStatus=0x%X IOCLogInfo=0x%X\n",
-		    __func__,
-		    sasIoUnitCntrReply->IOCStatus,
-		    sasIoUnitCntrReply->IOCLogInfo);
-		return -1;
+	timeleft = wait_for_completion_timeout(&ioc->mptbase_cmds.done, 10*HZ);
+	if (!(ioc->mptbase_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		ret = -ETIME;
+		printk(KERN_DEBUG "%s: failed\n", __func__);
+		if (ioc->mptbase_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET)
+			goto out;
+		if (!timeleft) {
+			printk(KERN_DEBUG "%s: Issuing Reset from %s!!\n",
+			    ioc->name, __func__);
+			mpt_HardResetHandler(ioc, CAN_SLEEP);
+			mpt_free_msg_frame(ioc, mf);
+		}
+		goto out;
 	}
 
-	printk("%s: success\n",__func__);
-	return 0;
+	if (!(ioc->mptbase_cmds.status & MPT_MGMT_STATUS_RF_VALID)) {
+		ret = -1;
+		goto out;
+	}
+
+	sasIoUnitCntrReply =
+	    (SasIoUnitControlReply_t *)ioc->mptbase_cmds.reply;
+	if (le16_to_cpu(sasIoUnitCntrReply->IOCStatus) != MPI_IOCSTATUS_SUCCESS) {
+		printk(KERN_DEBUG "%s: IOCStatus=0x%X IOCLogInfo=0x%X\n",
+		    __func__, sasIoUnitCntrReply->IOCStatus,
+		    sasIoUnitCntrReply->IOCLogInfo);
+		printk(KERN_DEBUG "%s: failed\n", __func__);
+		ret = -1;
+	} else
+		printk(KERN_DEBUG "%s: success\n", __func__);
+ out:
+
+	CLEAR_MGMT_STATUS(ioc->mptbase_cmds.status)
+	mutex_unlock(&ioc->mptbase_cmds.mutex);
+	return ret;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -5394,17 +5703,20 @@
  *	-ENOMEM if pci_alloc failed
  **/
 int
-mpt_raid_phys_disk_pg0(MPT_ADAPTER *ioc, u8 phys_disk_num, pRaidPhysDiskPage0_t phys_disk)
+mpt_raid_phys_disk_pg0(MPT_ADAPTER *ioc, u8 phys_disk_num,
+			RaidPhysDiskPage0_t *phys_disk)
 {
-	CONFIGPARMS		 	cfg;
-	ConfigPageHeader_t	 	hdr;
+	CONFIGPARMS			cfg;
+	ConfigPageHeader_t		hdr;
 	dma_addr_t			dma_handle;
 	pRaidPhysDiskPage0_t		buffer = NULL;
 	int				rc;
 
 	memset(&cfg, 0 , sizeof(CONFIGPARMS));
 	memset(&hdr, 0 , sizeof(ConfigPageHeader_t));
+	memset(phys_disk, 0, sizeof(RaidPhysDiskPage0_t));
 
+	hdr.PageVersion = MPI_RAIDPHYSDISKPAGE0_PAGEVERSION;
 	hdr.PageType = MPI_CONFIG_PAGETYPE_RAID_PHYSDISK;
 	cfg.cfghdr.hdr = &hdr;
 	cfg.physAddr = -1;
@@ -5451,6 +5763,161 @@
 }
 
 /**
+ *	mpt_raid_phys_disk_get_num_paths - returns number paths associated to this phys_num
+ *	@ioc: Pointer to a Adapter Structure
+ *	@phys_disk_num: io unit unique phys disk num generated by the ioc
+ *
+ *	Return:
+ *	returns number paths
+ **/
+int
+mpt_raid_phys_disk_get_num_paths(MPT_ADAPTER *ioc, u8 phys_disk_num)
+{
+	CONFIGPARMS		 	cfg;
+	ConfigPageHeader_t	 	hdr;
+	dma_addr_t			dma_handle;
+	pRaidPhysDiskPage1_t		buffer = NULL;
+	int				rc;
+
+	memset(&cfg, 0 , sizeof(CONFIGPARMS));
+	memset(&hdr, 0 , sizeof(ConfigPageHeader_t));
+
+	hdr.PageVersion = MPI_RAIDPHYSDISKPAGE1_PAGEVERSION;
+	hdr.PageType = MPI_CONFIG_PAGETYPE_RAID_PHYSDISK;
+	hdr.PageNumber = 1;
+	cfg.cfghdr.hdr = &hdr;
+	cfg.physAddr = -1;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER;
+
+	if (mpt_config(ioc, &cfg) != 0) {
+		rc = 0;
+		goto out;
+	}
+
+	if (!hdr.PageLength) {
+		rc = 0;
+		goto out;
+	}
+
+	buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
+	    &dma_handle);
+
+	if (!buffer) {
+		rc = 0;
+		goto out;
+	}
+
+	cfg.physAddr = dma_handle;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
+	cfg.pageAddr = phys_disk_num;
+
+	if (mpt_config(ioc, &cfg) != 0) {
+		rc = 0;
+		goto out;
+	}
+
+	rc = buffer->NumPhysDiskPaths;
+ out:
+
+	if (buffer)
+		pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
+		    dma_handle);
+
+	return rc;
+}
+EXPORT_SYMBOL(mpt_raid_phys_disk_get_num_paths);
+
+/**
+ *	mpt_raid_phys_disk_pg1 - returns phys disk page 1
+ *	@ioc: Pointer to a Adapter Structure
+ *	@phys_disk_num: io unit unique phys disk num generated by the ioc
+ *	@phys_disk: requested payload data returned
+ *
+ *	Return:
+ *	0 on success
+ *	-EFAULT if read of config page header fails or data pointer not NULL
+ *	-ENOMEM if pci_alloc failed
+ **/
+int
+mpt_raid_phys_disk_pg1(MPT_ADAPTER *ioc, u8 phys_disk_num,
+		RaidPhysDiskPage1_t *phys_disk)
+{
+	CONFIGPARMS		 	cfg;
+	ConfigPageHeader_t	 	hdr;
+	dma_addr_t			dma_handle;
+	pRaidPhysDiskPage1_t		buffer = NULL;
+	int				rc;
+	int				i;
+	__le64				sas_address;
+
+	memset(&cfg, 0 , sizeof(CONFIGPARMS));
+	memset(&hdr, 0 , sizeof(ConfigPageHeader_t));
+	rc = 0;
+
+	hdr.PageVersion = MPI_RAIDPHYSDISKPAGE1_PAGEVERSION;
+	hdr.PageType = MPI_CONFIG_PAGETYPE_RAID_PHYSDISK;
+	hdr.PageNumber = 1;
+	cfg.cfghdr.hdr = &hdr;
+	cfg.physAddr = -1;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER;
+
+	if (mpt_config(ioc, &cfg) != 0) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	if (!hdr.PageLength) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
+	    &dma_handle);
+
+	if (!buffer) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	cfg.physAddr = dma_handle;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
+	cfg.pageAddr = phys_disk_num;
+
+	if (mpt_config(ioc, &cfg) != 0) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	phys_disk->NumPhysDiskPaths = buffer->NumPhysDiskPaths;
+	phys_disk->PhysDiskNum = phys_disk_num;
+	for (i = 0; i < phys_disk->NumPhysDiskPaths; i++) {
+		phys_disk->Path[i].PhysDiskID = buffer->Path[i].PhysDiskID;
+		phys_disk->Path[i].PhysDiskBus = buffer->Path[i].PhysDiskBus;
+		phys_disk->Path[i].OwnerIdentifier =
+				buffer->Path[i].OwnerIdentifier;
+		phys_disk->Path[i].Flags = le16_to_cpu(buffer->Path[i].Flags);
+		memcpy(&sas_address, &buffer->Path[i].WWID, sizeof(__le64));
+		sas_address = le64_to_cpu(sas_address);
+		memcpy(&phys_disk->Path[i].WWID, &sas_address, sizeof(__le64));
+		memcpy(&sas_address,
+				&buffer->Path[i].OwnerWWID, sizeof(__le64));
+		sas_address = le64_to_cpu(sas_address);
+		memcpy(&phys_disk->Path[i].OwnerWWID,
+				&sas_address, sizeof(__le64));
+	}
+
+ out:
+
+	if (buffer)
+		pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
+		    dma_handle);
+
+	return rc;
+}
+EXPORT_SYMBOL(mpt_raid_phys_disk_pg1);
+
+
+/**
  *	mpt_findImVolumes - Identify IDs of hidden disks and RAID Volumes
  *	@ioc: Pointer to a Adapter Strucutre
  *
@@ -5775,30 +6242,28 @@
  *	SendEventNotification - Send EventNotification (on or off) request to adapter
  *	@ioc: Pointer to MPT_ADAPTER structure
  *	@EvSwitch: Event switch flags
+ *	@sleepFlag: Specifies whether the process can sleep
  */
 static int
-SendEventNotification(MPT_ADAPTER *ioc, u8 EvSwitch)
+SendEventNotification(MPT_ADAPTER *ioc, u8 EvSwitch, int sleepFlag)
 {
-	EventNotification_t	*evnp;
+	EventNotification_t	evn;
+	MPIDefaultReply_t	reply_buf;
 
-	evnp = (EventNotification_t *) mpt_get_msg_frame(mpt_base_index, ioc);
-	if (evnp == NULL) {
-		devtverboseprintk(ioc, printk(MYIOC_s_WARN_FMT "Unable to allocate event request frame!\n",
-				ioc->name));
-		return 0;
-	}
-	memset(evnp, 0, sizeof(*evnp));
+	memset(&evn, 0, sizeof(EventNotification_t));
+	memset(&reply_buf, 0, sizeof(MPIDefaultReply_t));
 
-	devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Sending EventNotification (%d) request %p\n", ioc->name, EvSwitch, evnp));
+	evn.Function = MPI_FUNCTION_EVENT_NOTIFICATION;
+	evn.Switch = EvSwitch;
+	evn.MsgContext = cpu_to_le32(mpt_base_index << 16);
 
-	evnp->Function = MPI_FUNCTION_EVENT_NOTIFICATION;
-	evnp->ChainOffset = 0;
-	evnp->MsgFlags = 0;
-	evnp->Switch = EvSwitch;
+	devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "Sending EventNotification (%d) request %p\n",
+	    ioc->name, EvSwitch, &evn));
 
-	mpt_put_msg_frame(mpt_base_index, ioc, (MPT_FRAME_HDR *)evnp);
-
-	return 0;
+	return mpt_handshake_req_reply_wait(ioc, sizeof(EventNotification_t),
+	    (u32 *)&evn, sizeof(MPIDefaultReply_t), (u16 *)&reply_buf, 30,
+	    sleepFlag);
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -5814,7 +6279,7 @@
 
 	if ((pAck = (EventAck_t *) mpt_get_msg_frame(mpt_base_index, ioc)) == NULL) {
 		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT "%s, no msg frames!!\n",
-		    ioc->name,__func__));
+		    ioc->name, __func__));
 		return -1;
 	}
 
@@ -5851,12 +6316,19 @@
 mpt_config(MPT_ADAPTER *ioc, CONFIGPARMS *pCfg)
 {
 	Config_t	*pReq;
+	ConfigReply_t	*pReply;
 	ConfigExtendedPageHeader_t  *pExtHdr = NULL;
 	MPT_FRAME_HDR	*mf;
-	unsigned long	 flags;
-	int		 ii, rc;
+	int		 ii;
 	int		 flagsLength;
-	int		 in_isr;
+	long		 timeout;
+	int		 ret;
+	u8		 page_type = 0, extend_page;
+	unsigned long 	 timeleft;
+	unsigned long	 flags;
+    int		 in_isr;
+	u8		 issue_hard_reset = 0;
+	u8		 retry_count = 0;
 
 	/*	Prevent calling wait_event() (below), if caller happens
 	 *	to be in ISR context, because that is fatal!
@@ -5866,15 +6338,43 @@
 		dcprintk(ioc, printk(MYIOC_s_WARN_FMT "Config request not allowed in ISR context!\n",
 				ioc->name));
 		return -EPERM;
+    }
+
+	/* don't send a config page during diag reset */
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	if (ioc->ioc_reset_in_progress) {
+		dfailprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: busy with host reset\n", ioc->name, __func__));
+		spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+		return -EBUSY;
 	}
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+
+	/* don't send if no chance of success */
+	if (!ioc->active ||
+	    mpt_GetIocState(ioc, 1) != MPI_IOC_STATE_OPERATIONAL) {
+		dfailprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: ioc not operational, %d, %xh\n",
+		    ioc->name, __func__, ioc->active,
+		    mpt_GetIocState(ioc, 0)));
+		return -EFAULT;
+	}
+
+ retry_config:
+	mutex_lock(&ioc->mptbase_cmds.mutex);
+	/* init the internal cmd struct */
+	memset(ioc->mptbase_cmds.reply, 0 , MPT_DEFAULT_FRAME_SIZE);
+	INITIALIZE_MGMT_STATUS(ioc->mptbase_cmds.status)
 
 	/* Get and Populate a free Frame
 	 */
 	if ((mf = mpt_get_msg_frame(mpt_base_index, ioc)) == NULL) {
-		dcprintk(ioc, printk(MYIOC_s_WARN_FMT "mpt_config: no msg frames!\n",
-				ioc->name));
-		return -EAGAIN;
+		dcprintk(ioc, printk(MYIOC_s_WARN_FMT
+		"mpt_config: no msg frames!\n", ioc->name));
+		ret = -EAGAIN;
+		goto out;
 	}
+
 	pReq = (Config_t *)mf;
 	pReq->Action = pCfg->action;
 	pReq->Reserved = 0;
@@ -5900,7 +6400,9 @@
 		pReq->ExtPageType = pExtHdr->ExtPageType;
 		pReq->Header.PageType = MPI_CONFIG_PAGETYPE_EXTENDED;
 
-		/* Page Length must be treated as a reserved field for the extended header. */
+		/* Page Length must be treated as a reserved field for the
+		 * extended header.
+		 */
 		pReq->Header.PageLength = 0;
 	}
 
@@ -5913,78 +6415,91 @@
 	else
 		flagsLength = MPT_SGE_FLAGS_SSIMPLE_READ;
 
-	if ((pCfg->cfghdr.hdr->PageType & MPI_CONFIG_PAGETYPE_MASK) == MPI_CONFIG_PAGETYPE_EXTENDED) {
+	if ((pCfg->cfghdr.hdr->PageType & MPI_CONFIG_PAGETYPE_MASK) ==
+	    MPI_CONFIG_PAGETYPE_EXTENDED) {
 		flagsLength |= pExtHdr->ExtPageLength * 4;
-
-		dcprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Sending Config request type %d, page %d and action %d\n",
-			ioc->name, pReq->ExtPageType, pReq->Header.PageNumber, pReq->Action));
-	}
-	else {
+		page_type = pReq->ExtPageType;
+		extend_page = 1;
+	} else {
 		flagsLength |= pCfg->cfghdr.hdr->PageLength * 4;
-
-		dcprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Sending Config request type %d, page %d and action %d\n",
-			ioc->name, pReq->Header.PageType, pReq->Header.PageNumber, pReq->Action));
+		page_type = pReq->Header.PageType;
+		extend_page = 0;
 	}
 
-	mpt_add_sge((char *)&pReq->PageBufferSGE, flagsLength, pCfg->physAddr);
+	dcprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "Sending Config request type 0x%x, page 0x%x and action %d\n",
+	    ioc->name, page_type, pReq->Header.PageNumber, pReq->Action));
 
-	/* Append pCfg pointer to end of mf
-	 */
-	*((void **) (((u8 *) mf) + (ioc->req_sz - sizeof(void *)))) =  (void *) pCfg;
-
-	/* Initalize the timer
-	 */
-	init_timer_on_stack(&pCfg->timer);
-	pCfg->timer.data = (unsigned long) ioc;
-	pCfg->timer.function = mpt_timer_expired;
-	pCfg->wait_done = 0;
-
-	/* Set the timer; ensure 10 second minimum */
-	if (pCfg->timeout < 10)
-		pCfg->timer.expires = jiffies + HZ*10;
-	else
-		pCfg->timer.expires = jiffies + HZ*pCfg->timeout;
-
-	/* Add to end of Q, set timer and then issue this command */
-	spin_lock_irqsave(&ioc->FreeQlock, flags);
-	list_add_tail(&pCfg->linkage, &ioc->configQ);
-	spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-
-	add_timer(&pCfg->timer);
+	ioc->add_sge((char *)&pReq->PageBufferSGE, flagsLength, pCfg->physAddr);
+	timeout = (pCfg->timeout < 15) ? HZ*15 : HZ*pCfg->timeout;
 	mpt_put_msg_frame(mpt_base_index, ioc, mf);
-	wait_event(mpt_waitq, pCfg->wait_done);
+	timeleft = wait_for_completion_timeout(&ioc->mptbase_cmds.done,
+		timeout);
+	if (!(ioc->mptbase_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		ret = -ETIME;
+		dfailprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "Failed Sending Config request type 0x%x, page 0x%x,"
+		    " action %d, status %xh, time left %ld\n\n",
+			ioc->name, page_type, pReq->Header.PageNumber,
+			pReq->Action, ioc->mptbase_cmds.status, timeleft));
+		if (ioc->mptbase_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET)
+			goto out;
+		if (!timeleft)
+			issue_hard_reset = 1;
+		goto out;
+	}
 
-	/* mf has been freed - do not access */
+	if (!(ioc->mptbase_cmds.status & MPT_MGMT_STATUS_RF_VALID)) {
+		ret = -1;
+		goto out;
+	}
+	pReply = (ConfigReply_t	*)ioc->mptbase_cmds.reply;
+	ret = le16_to_cpu(pReply->IOCStatus) & MPI_IOCSTATUS_MASK;
+	if (ret == MPI_IOCSTATUS_SUCCESS) {
+		if (extend_page) {
+			pCfg->cfghdr.ehdr->ExtPageLength =
+			    le16_to_cpu(pReply->ExtPageLength);
+			pCfg->cfghdr.ehdr->ExtPageType =
+			    pReply->ExtPageType;
+		}
+		pCfg->cfghdr.hdr->PageVersion = pReply->Header.PageVersion;
+		pCfg->cfghdr.hdr->PageLength = pReply->Header.PageLength;
+		pCfg->cfghdr.hdr->PageNumber = pReply->Header.PageNumber;
+		pCfg->cfghdr.hdr->PageType = pReply->Header.PageType;
 
-	rc = pCfg->status;
+	}
 
-	return rc;
-}
+	if (retry_count)
+		printk(MYIOC_s_INFO_FMT "Retry completed "
+		    "ret=0x%x timeleft=%ld\n",
+		    ioc->name, ret, timeleft);
 
-/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-/**
- *	mpt_timer_expired - Callback for timer process.
- *	Used only internal config functionality.
- *	@data: Pointer to MPT_SCSI_HOST recast as an unsigned long
- */
-static void
-mpt_timer_expired(unsigned long data)
-{
-	MPT_ADAPTER *ioc = (MPT_ADAPTER *) data;
+	dcprintk(ioc, printk(KERN_DEBUG "IOCStatus=%04xh, IOCLogInfo=%08xh\n",
+	     ret, le32_to_cpu(pReply->IOCLogInfo)));
 
-	dcprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mpt_timer_expired! \n", ioc->name));
+out:
 
-	/* Perform a FW reload */
-	if (mpt_HardResetHandler(ioc, NO_SLEEP) < 0)
-		printk(MYIOC_s_WARN_FMT "Firmware Reload FAILED!\n", ioc->name);
+	CLEAR_MGMT_STATUS(ioc->mptbase_cmds.status)
+	mutex_unlock(&ioc->mptbase_cmds.mutex);
+	if (issue_hard_reset) {
+		issue_hard_reset = 0;
+		printk(MYIOC_s_WARN_FMT "Issuing Reset from %s!!\n",
+		    ioc->name, __func__);
+		mpt_HardResetHandler(ioc, CAN_SLEEP);
+		mpt_free_msg_frame(ioc, mf);
+		/* attempt one retry for a timed out command */
+		if (!retry_count) {
+			printk(MYIOC_s_INFO_FMT
+			    "Attempting Retry Config request"
+			    " type 0x%x, page 0x%x,"
+			    " action %d\n", ioc->name, page_type,
+			    pCfg->cfghdr.hdr->PageNumber, pCfg->action);
+			retry_count++;
+			goto retry_config;
+		}
+	}
+	return ret;
 
-	/* No more processing.
-	 * Hard reset clean-up will wake up
-	 * process and free all resources.
-	 */
-	dcprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mpt_timer_expired complete!\n", ioc->name));
-
-	return;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -5998,41 +6513,34 @@
 static int
 mpt_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 {
-	CONFIGPARMS *pCfg;
-	unsigned long flags;
-
-	dprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-	    ": IOC %s_reset routed to MPT base driver!\n",
-	    ioc->name, reset_phase==MPT_IOC_SETUP_RESET ? "setup" : (
-	    reset_phase==MPT_IOC_PRE_RESET ? "pre" : "post")));
-
-	if (reset_phase == MPT_IOC_SETUP_RESET) {
-		;
-	} else if (reset_phase == MPT_IOC_PRE_RESET) {
-		/* If the internal config Q is not empty -
-		 * delete timer. MF resources will be freed when
-		 * the FIFO's are primed.
-		 */
-		spin_lock_irqsave(&ioc->FreeQlock, flags);
-		list_for_each_entry(pCfg, &ioc->configQ, linkage)
-			del_timer(&pCfg->timer);
-		spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-
-	} else {
-		CONFIGPARMS *pNext;
-
-		/* Search the configQ for internal commands.
-		 * Flush the Q, and wake up all suspended threads.
-		 */
-		spin_lock_irqsave(&ioc->FreeQlock, flags);
-		list_for_each_entry_safe(pCfg, pNext, &ioc->configQ, linkage) {
-			list_del(&pCfg->linkage);
-
-			pCfg->status = MPT_CONFIG_ERROR;
-			pCfg->wait_done = 1;
-			wake_up(&mpt_waitq);
+	switch (reset_phase) {
+	case MPT_IOC_SETUP_RESET:
+		ioc->taskmgmt_quiesce_io = 1;
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_SETUP_RESET\n", ioc->name, __func__));
+		break;
+	case MPT_IOC_PRE_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_PRE_RESET\n", ioc->name, __func__));
+		break;
+	case MPT_IOC_POST_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_POST_RESET\n",  ioc->name, __func__));
+/* wake up mptbase_cmds */
+		if (ioc->mptbase_cmds.status & MPT_MGMT_STATUS_PENDING) {
+			ioc->mptbase_cmds.status |=
+			    MPT_MGMT_STATUS_DID_IOCRESET;
+			complete(&ioc->mptbase_cmds.done);
 		}
-		spin_unlock_irqrestore(&ioc->FreeQlock, flags);
+/* wake up taskmgmt_cmds */
+		if (ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_PENDING) {
+			ioc->taskmgmt_cmds.status |=
+				MPT_MGMT_STATUS_DID_IOCRESET;
+			complete(&ioc->taskmgmt_cmds.done);
+		}
+		break;
+	default:
+		break;
 	}
 
 	return 1;		/* currently means nothing really */
@@ -6344,6 +6852,59 @@
 
 	*size = y;
 }
+/**
+ *	mpt_set_taskmgmt_in_progress_flag - set flags associated with task managment
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *
+ *	Returns 0 for SUCCESS or -1 if FAILED.
+ *
+ *	If -1 is return, then it was not possible to set the flags
+ **/
+int
+mpt_set_taskmgmt_in_progress_flag(MPT_ADAPTER *ioc)
+{
+	unsigned long	 flags;
+	int		 retval;
+
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	if (ioc->ioc_reset_in_progress || ioc->taskmgmt_in_progress ||
+	    (ioc->alt_ioc && ioc->alt_ioc->taskmgmt_in_progress)) {
+		retval = -1;
+		goto out;
+	}
+	retval = 0;
+	ioc->taskmgmt_in_progress = 1;
+	ioc->taskmgmt_quiesce_io = 1;
+	if (ioc->alt_ioc) {
+		ioc->alt_ioc->taskmgmt_in_progress = 1;
+		ioc->alt_ioc->taskmgmt_quiesce_io = 1;
+	}
+ out:
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+	return retval;
+}
+EXPORT_SYMBOL(mpt_set_taskmgmt_in_progress_flag);
+
+/**
+ *	mpt_clear_taskmgmt_in_progress_flag - clear flags associated with task managment
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *
+ **/
+void
+mpt_clear_taskmgmt_in_progress_flag(MPT_ADAPTER *ioc)
+{
+	unsigned long	 flags;
+
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	ioc->taskmgmt_in_progress = 0;
+	ioc->taskmgmt_quiesce_io = 0;
+	if (ioc->alt_ioc) {
+		ioc->alt_ioc->taskmgmt_in_progress = 0;
+		ioc->alt_ioc->taskmgmt_quiesce_io = 0;
+	}
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+}
+EXPORT_SYMBOL(mpt_clear_taskmgmt_in_progress_flag);
 
 
 /**
@@ -6397,7 +6958,9 @@
 mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag)
 {
 	int		 rc;
+	u8	 cb_idx;
 	unsigned long	 flags;
+	unsigned long	 time_count;
 
 	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "HardResetHandler Entered!\n", ioc->name));
 #ifdef MFCNT
@@ -6410,14 +6973,15 @@
 	/* Reset the adapter. Prevent more than 1 call to
 	 * mpt_do_ioc_recovery at any instant in time.
 	 */
-	spin_lock_irqsave(&ioc->diagLock, flags);
-	if ((ioc->diagPending) || (ioc->alt_ioc && ioc->alt_ioc->diagPending)){
-		spin_unlock_irqrestore(&ioc->diagLock, flags);
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	if (ioc->ioc_reset_in_progress) {
+		spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 		return 0;
-	} else {
-		ioc->diagPending = 1;
 	}
-	spin_unlock_irqrestore(&ioc->diagLock, flags);
+	ioc->ioc_reset_in_progress = 1;
+	if (ioc->alt_ioc)
+		ioc->alt_ioc->ioc_reset_in_progress = 1;
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 
 	/* FIXME: If do_ioc_recovery fails, repeat....
 	 */
@@ -6427,47 +6991,57 @@
 	 * Prevents timeouts occurring during a diagnostic reset...very bad.
 	 * For all other protocol drivers, this is a no-op.
 	 */
-	{
-		u8	 cb_idx;
-		int	 r = 0;
-
-		for (cb_idx = MPT_MAX_PROTOCOL_DRIVERS-1; cb_idx; cb_idx--) {
-			if (MptResetHandlers[cb_idx]) {
-				dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Calling IOC reset_setup handler #%d\n",
-						ioc->name, cb_idx));
-				r += mpt_signal_reset(cb_idx, ioc, MPT_IOC_SETUP_RESET);
-				if (ioc->alt_ioc) {
-					dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Calling alt-%s setup reset handler #%d\n",
-							ioc->name, ioc->alt_ioc->name, cb_idx));
-					r += mpt_signal_reset(cb_idx, ioc->alt_ioc, MPT_IOC_SETUP_RESET);
-				}
-			}
+	for (cb_idx = MPT_MAX_PROTOCOL_DRIVERS-1; cb_idx; cb_idx--) {
+		if (MptResetHandlers[cb_idx]) {
+			mpt_signal_reset(cb_idx, ioc, MPT_IOC_SETUP_RESET);
+			if (ioc->alt_ioc)
+				mpt_signal_reset(cb_idx, ioc->alt_ioc,
+					MPT_IOC_SETUP_RESET);
 		}
 	}
 
-	if ((rc = mpt_do_ioc_recovery(ioc, MPT_HOSTEVENT_IOC_RECOVER, sleepFlag)) != 0) {
-		printk(MYIOC_s_WARN_FMT "Cannot recover rc = %d!\n", ioc->name, rc);
+	time_count = jiffies;
+	rc = mpt_do_ioc_recovery(ioc, MPT_HOSTEVENT_IOC_RECOVER, sleepFlag);
+	if (rc != 0) {
+		printk(KERN_WARNING MYNAM
+		    ": WARNING - (%d) Cannot recover %s\n", rc, ioc->name);
+	} else {
+		if (ioc->hard_resets < -1)
+			ioc->hard_resets++;
 	}
-	ioc->reload_fw = 0;
-	if (ioc->alt_ioc)
-		ioc->alt_ioc->reload_fw = 0;
 
-	spin_lock_irqsave(&ioc->diagLock, flags);
-	ioc->diagPending = 0;
-	if (ioc->alt_ioc)
-		ioc->alt_ioc->diagPending = 0;
-	spin_unlock_irqrestore(&ioc->diagLock, flags);
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	ioc->ioc_reset_in_progress = 0;
+	ioc->taskmgmt_quiesce_io = 0;
+	ioc->taskmgmt_in_progress = 0;
+	if (ioc->alt_ioc) {
+		ioc->alt_ioc->ioc_reset_in_progress = 0;
+		ioc->alt_ioc->taskmgmt_quiesce_io = 0;
+		ioc->alt_ioc->taskmgmt_in_progress = 0;
+	}
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 
-	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "HardResetHandler rc = %d!\n", ioc->name, rc));
+	dtmprintk(ioc,
+	    printk(MYIOC_s_DEBUG_FMT
+		"HardResetHandler: completed (%d seconds): %s\n", ioc->name,
+		jiffies_to_msecs(jiffies - time_count)/1000, ((rc == 0) ?
+		"SUCCESS" : "FAILED")));
 
 	return rc;
 }
 
-/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+#ifdef CONFIG_FUSION_LOGGING
 static void
-EventDescriptionStr(u8 event, u32 evData0, char *evStr)
+mpt_display_event_info(MPT_ADAPTER *ioc, EventNotificationReply_t *pEventReply)
 {
 	char *ds = NULL;
+	u32 evData0;
+	int ii;
+	u8 event;
+	char *evStr = ioc->evStr;
+
+	event = le32_to_cpu(pEventReply->Event) & 0xFF;
+	evData0 = le32_to_cpu(pEventReply->Data[0]);
 
 	switch(event) {
 	case MPI_EVENT_NONE:
@@ -6501,9 +7075,9 @@
 		if (evData0 == MPI_EVENT_LOOP_STATE_CHANGE_LIP)
 			ds = "Loop State(LIP) Change";
 		else if (evData0 == MPI_EVENT_LOOP_STATE_CHANGE_LPE)
-			ds = "Loop State(LPE) Change";		/* ??? */
+			ds = "Loop State(LPE) Change";
 		else
-			ds = "Loop State(LPB) Change";		/* ??? */
+			ds = "Loop State(LPB) Change";
 		break;
 	case MPI_EVENT_LOGOUT:
 		ds = "Logout";
@@ -6703,28 +7277,65 @@
 	}
 	case MPI_EVENT_IR2:
 	{
+		u8 id = (u8)(evData0);
+		u8 channel = (u8)(evData0 >> 8);
+		u8 phys_num = (u8)(evData0 >> 24);
 		u8 ReasonCode = (u8)(evData0 >> 16);
+
 		switch (ReasonCode) {
 		case MPI_EVENT_IR2_RC_LD_STATE_CHANGED:
-			ds = "IR2: LD State Changed";
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: LD State Changed: "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
 			break;
 		case MPI_EVENT_IR2_RC_PD_STATE_CHANGED:
-			ds = "IR2: PD State Changed";
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: PD State Changed "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
 			break;
 		case MPI_EVENT_IR2_RC_BAD_BLOCK_TABLE_FULL:
-			ds = "IR2: Bad Block Table Full";
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: Bad Block Table Full: "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
 			break;
 		case MPI_EVENT_IR2_RC_PD_INSERTED:
-			ds = "IR2: PD Inserted";
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: PD Inserted: "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
 			break;
 		case MPI_EVENT_IR2_RC_PD_REMOVED:
-			ds = "IR2: PD Removed";
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: PD Removed: "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
 			break;
 		case MPI_EVENT_IR2_RC_FOREIGN_CFG_DETECTED:
-			ds = "IR2: Foreign CFG Detected";
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: Foreign CFG Detected: "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
 			break;
 		case MPI_EVENT_IR2_RC_REBUILD_MEDIUM_ERROR:
-			ds = "IR2: Rebuild Medium Error";
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: Rebuild Medium Error: "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
+			break;
+		case MPI_EVENT_IR2_RC_DUAL_PORT_ADDED:
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: Dual Port Added: "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
+			break;
+		case MPI_EVENT_IR2_RC_DUAL_PORT_REMOVED:
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: Dual Port Removed: "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
 			break;
 		default:
 			ds = "IR2";
@@ -6760,13 +7371,18 @@
 	case MPI_EVENT_SAS_INIT_DEVICE_STATUS_CHANGE:
 	{
 		u8 reason = (u8)(evData0);
-		u8 port_num = (u8)(evData0 >> 8);
-		u16 handle = le16_to_cpu(evData0 >> 16);
 
-		snprintf(evStr, EVENT_DESCR_STR_SZ,
-		    "SAS Initiator Device Status Change: reason=0x%02x "
-		    "port=%d handle=0x%04x",
-		    reason, port_num, handle);
+		switch (reason) {
+		case MPI_EVENT_SAS_INIT_RC_ADDED:
+			ds = "SAS Initiator Status Change: Added";
+			break;
+		case MPI_EVENT_SAS_INIT_RC_REMOVED:
+			ds = "SAS Initiator Status Change: Deleted";
+			break;
+		default:
+			ds = "SAS Initiator Status Change";
+			break;
+		}
 		break;
 	}
 
@@ -6814,6 +7430,24 @@
 		break;
 	}
 
+	case MPI_EVENT_SAS_EXPANDER_STATUS_CHANGE:
+	{
+		u8 reason = (u8)(evData0);
+
+		switch (reason) {
+		case MPI_EVENT_SAS_EXP_RC_ADDED:
+			ds = "Expander Status Change: Added";
+			break;
+		case MPI_EVENT_SAS_EXP_RC_NOT_RESPONDING:
+			ds = "Expander Status Change: Deleted";
+			break;
+		default:
+			ds = "Expander Status Change";
+			break;
+		}
+		break;
+	}
+
 	/*
 	 *  MPT base "custom" events may be added here...
 	 */
@@ -6823,8 +7457,20 @@
 	}
 	if (ds)
 		strncpy(evStr, ds, EVENT_DESCR_STR_SZ);
-}
 
+
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "MPT event:(%02Xh) : %s\n",
+	    ioc->name, event, evStr));
+
+	devtverboseprintk(ioc, printk(KERN_DEBUG MYNAM
+	    ": Event data:\n"));
+	for (ii = 0; ii < le16_to_cpu(pEventReply->EventDataLength); ii++)
+		devtverboseprintk(ioc, printk(" %08x",
+		    le32_to_cpu(pEventReply->Data[ii])));
+	devtverboseprintk(ioc, printk(KERN_DEBUG "\n"));
+}
+#endif
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
  *	ProcessEventNotification - Route EventNotificationReply to all event handlers
@@ -6841,37 +7487,24 @@
 {
 	u16 evDataLen;
 	u32 evData0 = 0;
-//	u32 evCtx;
 	int ii;
 	u8 cb_idx;
 	int r = 0;
 	int handlers = 0;
-	char evStr[EVENT_DESCR_STR_SZ];
 	u8 event;
 
 	/*
 	 *  Do platform normalization of values
 	 */
 	event = le32_to_cpu(pEventReply->Event) & 0xFF;
-//	evCtx = le32_to_cpu(pEventReply->EventContext);
 	evDataLen = le16_to_cpu(pEventReply->EventDataLength);
 	if (evDataLen) {
 		evData0 = le32_to_cpu(pEventReply->Data[0]);
 	}
 
-	EventDescriptionStr(event, evData0, evStr);
-	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "MPT event:(%02Xh) : %s\n",
-			ioc->name,
-			event,
-			evStr));
-
 #ifdef CONFIG_FUSION_LOGGING
-	devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-	    ": Event data:\n", ioc->name));
-	for (ii = 0; ii < evDataLen; ii++)
-		devtverboseprintk(ioc, printk(" %08x",
-		    le32_to_cpu(pEventReply->Data[ii])));
-	devtverboseprintk(ioc, printk("\n"));
+	if (evDataLen)
+		mpt_display_event_info(ioc, pEventReply);
 #endif
 
 	/*
@@ -6926,8 +7559,9 @@
 	 */
 	for (cb_idx = MPT_MAX_PROTOCOL_DRIVERS-1; cb_idx; cb_idx--) {
 		if (MptEvHandlers[cb_idx]) {
-			devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Routing Event to event handler #%d\n",
-					ioc->name, cb_idx));
+			devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			    "Routing Event to event handler #%d\n",
+			    ioc->name, cb_idx));
 			r += (*(MptEvHandlers[cb_idx]))(ioc, pEventReply);
 			handlers++;
 		}
@@ -7011,8 +7645,6 @@
 	switch (info) {
 	case 0x00010000:
 		desc = "bug! MID not found";
-		if (ioc->reload_fw == 0)
-			ioc->reload_fw++;
 		break;
 
 	case 0x00020000:
@@ -7613,7 +8245,6 @@
 EXPORT_SYMBOL(mpt_put_msg_frame);
 EXPORT_SYMBOL(mpt_put_msg_frame_hi_pri);
 EXPORT_SYMBOL(mpt_free_msg_frame);
-EXPORT_SYMBOL(mpt_add_sge);
 EXPORT_SYMBOL(mpt_send_handshake_request);
 EXPORT_SYMBOL(mpt_verify_adapter);
 EXPORT_SYMBOL(mpt_GetIocState);
@@ -7650,7 +8281,7 @@
 	/*  Register ourselves (mptbase) in order to facilitate
 	 *  EventNotification handling.
 	 */
-	mpt_base_index = mpt_register(mpt_base_reply, MPTBASE_DRIVER);
+	mpt_base_index = mpt_register(mptbase_reply, MPTBASE_DRIVER);
 
 	/* Register for hard reset handling callbacks.
 	 */
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index b3e981d..1c8514d 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -76,8 +76,8 @@
 #define COPYRIGHT	"Copyright (c) 1999-2008 " MODULEAUTHOR
 #endif
 
-#define MPT_LINUX_VERSION_COMMON	"3.04.07"
-#define MPT_LINUX_PACKAGE_NAME		"@(#)mptlinux-3.04.07"
+#define MPT_LINUX_VERSION_COMMON	"3.04.10"
+#define MPT_LINUX_PACKAGE_NAME		"@(#)mptlinux-3.04.09"
 #define WHAT_MAGIC_STRING		"@" "(" "#" ")"
 
 #define show_mptmod_ver(s,ver)  \
@@ -104,6 +104,7 @@
 #endif
 
 #define MPT_NAME_LENGTH			32
+#define MPT_KOBJ_NAME_LEN		20
 
 #define MPT_PROCFS_MPTBASEDIR		"mpt"
 						/* chg it to "driver/fusion" ? */
@@ -134,6 +135,7 @@
 
 #define MPT_COALESCING_TIMEOUT		0x10
 
+
 /*
  * SCSI transfer rate defines.
  */
@@ -161,10 +163,10 @@
 /*
  * Set the MAX_SGE value based on user input.
  */
-#ifdef  CONFIG_FUSION_MAX_SGE
-#if     CONFIG_FUSION_MAX_SGE  < 16
+#ifdef CONFIG_FUSION_MAX_SGE
+#if CONFIG_FUSION_MAX_SGE  < 16
 #define MPT_SCSI_SG_DEPTH	16
-#elif   CONFIG_FUSION_MAX_SGE  > 128
+#elif CONFIG_FUSION_MAX_SGE  > 128
 #define MPT_SCSI_SG_DEPTH	128
 #else
 #define MPT_SCSI_SG_DEPTH	CONFIG_FUSION_MAX_SGE
@@ -173,6 +175,18 @@
 #define MPT_SCSI_SG_DEPTH	40
 #endif
 
+#ifdef CONFIG_FUSION_MAX_FC_SGE
+#if CONFIG_FUSION_MAX_FC_SGE  < 16
+#define MPT_SCSI_FC_SG_DEPTH	16
+#elif CONFIG_FUSION_MAX_FC_SGE  > 256
+#define MPT_SCSI_FC_SG_DEPTH	256
+#else
+#define MPT_SCSI_FC_SG_DEPTH	CONFIG_FUSION_MAX_FC_SGE
+#endif
+#else
+#define MPT_SCSI_FC_SG_DEPTH	40
+#endif
+
 /* debug print string length used for events and iocstatus */
 # define EVENT_DESCR_STR_SZ             100
 
@@ -431,38 +445,36 @@
  *	IOCTL structure and associated defines
  */
 
-#define MPT_IOCTL_STATUS_DID_IOCRESET	0x01	/* IOC Reset occurred on the current*/
-#define MPT_IOCTL_STATUS_RF_VALID	0x02	/* The Reply Frame is VALID */
-#define MPT_IOCTL_STATUS_TIMER_ACTIVE	0x04	/* The timer is running */
-#define MPT_IOCTL_STATUS_SENSE_VALID	0x08	/* Sense data is valid */
-#define MPT_IOCTL_STATUS_COMMAND_GOOD	0x10	/* Command Status GOOD */
-#define MPT_IOCTL_STATUS_TMTIMER_ACTIVE	0x20	/* The TM timer is running */
-#define MPT_IOCTL_STATUS_TM_FAILED	0x40	/* User TM request failed */
-
 #define MPTCTL_RESET_OK			0x01	/* Issue Bus Reset */
 
-typedef struct _MPT_IOCTL {
-	struct _MPT_ADAPTER	*ioc;
-	u8			 ReplyFrame[MPT_DEFAULT_FRAME_SIZE];	/* reply frame data */
-	u8			 sense[MPT_SENSE_BUFFER_ALLOC];
-	int			 wait_done;	/* wake-up value for this ioc */
-	u8			 rsvd;
-	u8			 status;	/* current command status */
-	u8			 reset;		/* 1 if bus reset allowed */
-	u8			 id;		/* target for reset */
-	struct mutex		 ioctl_mutex;
-} MPT_IOCTL;
+#define MPT_MGMT_STATUS_RF_VALID	0x01	/* The Reply Frame is VALID */
+#define MPT_MGMT_STATUS_COMMAND_GOOD	0x02	/* Command Status GOOD */
+#define MPT_MGMT_STATUS_PENDING		0x04	/* command is pending */
+#define MPT_MGMT_STATUS_DID_IOCRESET	0x08	/* IOC Reset occurred
+						   on the current*/
+#define MPT_MGMT_STATUS_SENSE_VALID	0x10	/* valid sense info */
+#define MPT_MGMT_STATUS_TIMER_ACTIVE	0x20	/* obsolete */
+#define MPT_MGMT_STATUS_FREE_MF		0x40	/* free the mf from
+						   complete routine */
 
-#define MPT_SAS_MGMT_STATUS_RF_VALID	0x02	/* The Reply Frame is VALID */
-#define MPT_SAS_MGMT_STATUS_COMMAND_GOOD	0x10	/* Command Status GOOD */
-#define MPT_SAS_MGMT_STATUS_TM_FAILED	0x40	/* User TM request failed */
+#define INITIALIZE_MGMT_STATUS(status) \
+	status = MPT_MGMT_STATUS_PENDING;
+#define CLEAR_MGMT_STATUS(status) \
+	status = 0;
+#define CLEAR_MGMT_PENDING_STATUS(status) \
+	status &= ~MPT_MGMT_STATUS_PENDING;
+#define SET_MGMT_MSG_CONTEXT(msg_context, value) \
+	msg_context = value;
 
-typedef struct _MPT_SAS_MGMT {
+typedef struct _MPT_MGMT {
 	struct mutex		 mutex;
 	struct completion	 done;
 	u8			 reply[MPT_DEFAULT_FRAME_SIZE]; /* reply frame data */
+	u8			 sense[MPT_SENSE_BUFFER_ALLOC];
 	u8			 status;	/* current command status */
-}MPT_SAS_MGMT;
+	int			 completion_code;
+	u32			 msg_context;
+} MPT_MGMT;
 
 /*
  *  Event Structure and define
@@ -564,6 +576,10 @@
 	u8		flags;
 };
 
+typedef void (*MPT_ADD_SGE)(void *pAddr, u32 flagslength, dma_addr_t dma_addr);
+typedef void (*MPT_ADD_CHAIN)(void *pAddr, u8 next, u16 length,
+		dma_addr_t dma_addr);
+
 /*
  *  Adapter Structure - pci_dev specific. Maximum: MPT_MAX_ADAPTERS
  */
@@ -573,6 +589,10 @@
 	int			 pci_irq;	/* This irq           */
 	char			 name[MPT_NAME_LENGTH];	/* "iocN"             */
 	char			 prod_name[MPT_NAME_LENGTH];	/* "LSIFC9x9"         */
+#ifdef CONFIG_FUSION_LOGGING
+	/* used in mpt_display_event_info */
+	char			 evStr[EVENT_DESCR_STR_SZ];
+#endif
 	char			 board_name[16];
 	char			 board_assembly[16];
 	char			 board_tracer[16];
@@ -600,6 +620,10 @@
 	int			 reply_depth;	/* Num Allocated reply frames */
 	int			 reply_sz;	/* Reply frame size */
 	int			 num_chain;	/* Number of chain buffers */
+	MPT_ADD_SGE              add_sge;       /* Pointer to add_sge
+						   function */
+	MPT_ADD_CHAIN		 add_chain;	/* Pointer to add_chain
+						   function */
 		/* Pool of buffers for chaining. ReqToChain
 		 * and ChainToChain track index of chain buffers.
 		 * ChainBuffer (DMA) virt/phys addresses.
@@ -640,11 +664,8 @@
 	RaidCfgData		raid_data;	/* Raid config. data */
 	SasCfgData		sas_data;	/* Sas config. data */
 	FcCfgData		fc_data;	/* Fc config. data */
-	MPT_IOCTL		*ioctl;		/* ioctl data pointer */
 	struct proc_dir_entry	*ioc_dentry;
 	struct _MPT_ADAPTER	*alt_ioc;	/* ptr to 929 bound adapter port */
-	spinlock_t		 diagLock;	/* diagnostic reset lock */
-	int			 diagPending;
 	u32			 biosVersion;	/* BIOS version from IO Unit Page 2 */
 	int			 eventTypes;	/* Event logging parameters */
 	int			 eventContext;	/* Next event context */
@@ -652,7 +673,6 @@
 	struct _mpt_ioctl_events *events;	/* pointer to event log */
 	u8			*cached_fw;	/* Pointer to FW */
 	dma_addr_t	 	cached_fw_dma;
-	struct list_head	 configQ;	/* linked list of config. requests */
 	int			 hs_reply_idx;
 #ifndef MFCNT
 	u32			 pad0;
@@ -665,9 +685,6 @@
 	IOCFactsReply_t		 facts;
 	PortFactsReply_t	 pfacts[2];
 	FCPortPage0_t		 fc_port_page0[2];
-	struct timer_list	 persist_timer;	/* persist table timer */
-	int			 persist_wait_done; /* persist completion flag */
-	u8			 persist_reply_frame[MPT_DEFAULT_FRAME_SIZE]; /* persist reply */
 	LANPage0_t		 lan_cnfg_page0;
 	LANPage1_t		 lan_cnfg_page1;
 
@@ -682,23 +699,44 @@
 	int			 aen_event_read_flag; /* flag to indicate event log was read*/
 	u8			 FirstWhoInit;
 	u8			 upload_fw;	/* If set, do a fw upload */
-	u8			 reload_fw;	/* Force a FW Reload on next reset */
 	u8			 NBShiftFactor;  /* NB Shift Factor based on Block Size (Facts)  */
 	u8			 pad1[4];
 	u8			 DoneCtx;
 	u8			 TaskCtx;
 	u8			 InternalCtx;
-	spinlock_t		 initializing_hba_lock;
-	int 	 		 initializing_hba_lock_flag;
 	struct list_head	 list;
 	struct net_device	*netdev;
 	struct list_head	 sas_topology;
 	struct mutex		 sas_topology_mutex;
+
+	struct workqueue_struct	*fw_event_q;
+	struct list_head	 fw_event_list;
+	spinlock_t		 fw_event_lock;
+	u8			 fw_events_off; /* if '1', then ignore events */
+	char 			 fw_event_q_name[MPT_KOBJ_NAME_LEN];
+
 	struct mutex		 sas_discovery_mutex;
 	u8			 sas_discovery_runtime;
 	u8			 sas_discovery_ignore_events;
+
+	/* port_info object for the host */
+	struct mptsas_portinfo	*hba_port_info;
+	u64			 hba_port_sas_addr;
+	u16			 hba_port_num_phy;
+	struct list_head	 sas_device_info_list;
+	struct mutex		 sas_device_info_mutex;
+	u8			 old_sas_discovery_protocal;
+	u8			 sas_discovery_quiesce_io;
 	int			 sas_index; /* index refrencing */
-	MPT_SAS_MGMT		 sas_mgmt;
+	MPT_MGMT		 sas_mgmt;
+	MPT_MGMT		 mptbase_cmds; /* for sending config pages */
+	MPT_MGMT		 internal_cmds;
+	MPT_MGMT		 taskmgmt_cmds;
+	MPT_MGMT		 ioctl_cmds;
+	spinlock_t		 taskmgmt_lock; /* diagnostic reset lock */
+	int			 taskmgmt_in_progress;
+	u8			 taskmgmt_quiesce_io;
+	u8			 ioc_reset_in_progress;
 	struct work_struct	 sas_persist_task;
 
 	struct work_struct	 fc_setup_reset_work;
@@ -707,15 +745,27 @@
 	u8			 fc_link_speed[2];
 	spinlock_t		 fc_rescan_work_lock;
 	struct work_struct	 fc_rescan_work;
-	char			 fc_rescan_work_q_name[20];
+	char			 fc_rescan_work_q_name[MPT_KOBJ_NAME_LEN];
 	struct workqueue_struct *fc_rescan_work_q;
+
+	/* driver forced bus resets count */
+	unsigned long		  hard_resets;
+	/* fw/external bus resets count */
+	unsigned long		  soft_resets;
+	/* cmd timeouts */
+	unsigned long		  timeouts;
+
 	struct scsi_cmnd	**ScsiLookup;
 	spinlock_t		  scsi_lookup_lock;
-
-	char			 reset_work_q_name[20];
+	u64			dma_mask;
+	u32			  broadcast_aen_busy;
+	char			 reset_work_q_name[MPT_KOBJ_NAME_LEN];
 	struct workqueue_struct *reset_work_q;
 	struct delayed_work	 fault_reset_work;
-	spinlock_t		 fault_reset_work_lock;
+
+	u8			sg_addr_size;
+	u8			in_rescan;
+	u8			SGE_size;
 
 } MPT_ADAPTER;
 
@@ -753,13 +803,14 @@
 	dma_addr_t	Address;
 } MptSge_t;
 
-#define mpt_addr_size() \
-	((sizeof(dma_addr_t) == sizeof(u64)) ? MPI_SGE_FLAGS_64_BIT_ADDRESSING : \
-		MPI_SGE_FLAGS_32_BIT_ADDRESSING)
 
-#define mpt_msg_flags() \
-	((sizeof(dma_addr_t) == sizeof(u64)) ? MPI_SCSIIO_MSGFLGS_SENSE_WIDTH_64 : \
-		MPI_SCSIIO_MSGFLGS_SENSE_WIDTH_32)
+#define mpt_msg_flags(ioc) \
+	(ioc->sg_addr_size == sizeof(u64)) ?		\
+	MPI_SCSIIO_MSGFLGS_SENSE_WIDTH_64 : 		\
+	MPI_SCSIIO_MSGFLGS_SENSE_WIDTH_32
+
+#define MPT_SGE_FLAGS_64_BIT_ADDRESSING \
+	(MPI_SGE_FLAGS_64_BIT_ADDRESSING << MPI_SGE_FLAGS_SHIFT)
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /*
@@ -835,22 +886,14 @@
 		/* Pool of memory for holding SCpnts before doing
 		 * OS callbacks. freeQ is the free pool.
 		 */
-	u8			  tmPending;
-	u8			  resetPending;
 	u8			  negoNvram;		/* DV disabled, nego NVRAM */
 	u8			  pad1;
-	u8                        tmState;
 	u8			  rsvd[2];
 	MPT_FRAME_HDR		 *cmdPtr;		/* Ptr to nonOS request */
 	struct scsi_cmnd	 *abortSCpnt;
 	MPT_LOCAL_REPLY		  localReply;		/* internal cmd reply struct */
-	unsigned long		  hard_resets;		/* driver forced bus resets count */
-	unsigned long		  soft_resets;		/* fw/external bus resets count */
-	unsigned long		  timeouts;		/* cmd timeouts */
 	ushort			  sel_timeout[MPT_MAX_FC_DEVICES];
 	char 			  *info_kbuf;
-	wait_queue_head_t	  scandv_waitq;
-	int			  scandv_wait_done;
 	long			  last_queue_full;
 	u16			  tm_iocstatus;
 	u16			  spi_pending;
@@ -870,21 +913,16 @@
  * Generic structure passed to the base mpt_config function.
  */
 typedef struct _x_config_parms {
-	struct list_head	 linkage;	/* linked list */
-	struct timer_list	 timer;		/* timer function for this request  */
 	union {
 		ConfigExtendedPageHeader_t	*ehdr;
 		ConfigPageHeader_t	*hdr;
 	} cfghdr;
 	dma_addr_t		 physAddr;
-	int			 wait_done;	/* wait for this request */
 	u32			 pageAddr;	/* properly formatted */
+	u16			 status;
 	u8			 action;
 	u8			 dir;
 	u8			 timeout;	/* seconds */
-	u8			 pad1;
-	u16			 status;
-	u16			 pad2;
 } CONFIGPARMS;
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -909,7 +947,6 @@
 extern void	 mpt_free_msg_frame(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf);
 extern void	 mpt_put_msg_frame(u8 cb_idx, MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf);
 extern void	 mpt_put_msg_frame_hi_pri(u8 cb_idx, MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf);
-extern void	 mpt_add_sge(char *pAddr, u32 flagslength, dma_addr_t dma_addr);
 
 extern int	 mpt_send_handshake_request(u8 cb_idx, MPT_ADAPTER *ioc, int reqBytes, u32 *req, int sleepFlag);
 extern int	 mpt_verify_adapter(int iocid, MPT_ADAPTER **iocpp);
@@ -922,6 +959,12 @@
 extern int	 mpt_findImVolumes(MPT_ADAPTER *ioc);
 extern int	 mptbase_sas_persist_operation(MPT_ADAPTER *ioc, u8 persist_opcode);
 extern int	 mpt_raid_phys_disk_pg0(MPT_ADAPTER *ioc, u8 phys_disk_num, pRaidPhysDiskPage0_t phys_disk);
+extern int	mpt_raid_phys_disk_pg1(MPT_ADAPTER *ioc, u8 phys_disk_num,
+		pRaidPhysDiskPage1_t phys_disk);
+extern int	mpt_raid_phys_disk_get_num_paths(MPT_ADAPTER *ioc,
+		u8 phys_disk_num);
+extern int	 mpt_set_taskmgmt_in_progress_flag(MPT_ADAPTER *ioc);
+extern void	 mpt_clear_taskmgmt_in_progress_flag(MPT_ADAPTER *ioc);
 extern void     mpt_halt_firmware(MPT_ADAPTER *ioc);
 
 
@@ -959,7 +1002,6 @@
 #define MPT_SGE_FLAGS_END_OF_BUFFER		(0x40000000)
 #define MPT_SGE_FLAGS_LOCAL_ADDRESS		(0x08000000)
 #define MPT_SGE_FLAGS_DIRECTION			(0x04000000)
-#define MPT_SGE_FLAGS_ADDRESSING		(mpt_addr_size() << MPI_SGE_FLAGS_SHIFT)
 #define MPT_SGE_FLAGS_END_OF_LIST		(0x01000000)
 
 #define MPT_SGE_FLAGS_TRANSACTION_ELEMENT	(0x00000000)
@@ -972,14 +1014,12 @@
 	 MPT_SGE_FLAGS_END_OF_BUFFER |	\
 	 MPT_SGE_FLAGS_END_OF_LIST |	\
 	 MPT_SGE_FLAGS_SIMPLE_ELEMENT |	\
-	 MPT_SGE_FLAGS_ADDRESSING | \
 	 MPT_TRANSFER_IOC_TO_HOST)
 #define MPT_SGE_FLAGS_SSIMPLE_WRITE \
 	(MPT_SGE_FLAGS_LAST_ELEMENT |	\
 	 MPT_SGE_FLAGS_END_OF_BUFFER |	\
 	 MPT_SGE_FLAGS_END_OF_LIST |	\
 	 MPT_SGE_FLAGS_SIMPLE_ELEMENT |	\
-	 MPT_SGE_FLAGS_ADDRESSING | \
 	 MPT_TRANSFER_HOST_TO_IOC)
 
 /*}-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c
index c638171..9b2e219 100644
--- a/drivers/message/fusion/mptctl.c
+++ b/drivers/message/fusion/mptctl.c
@@ -84,6 +84,7 @@
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 
 static u8 mptctl_id = MPT_MAX_PROTOCOL_DRIVERS;
+static u8 mptctl_taskmgmt_id = MPT_MAX_PROTOCOL_DRIVERS;
 
 static DECLARE_WAIT_QUEUE_HEAD ( mptctl_wait );
 
@@ -127,10 +128,7 @@
 		struct buflist **blp, dma_addr_t *sglbuf_dma, MPT_ADAPTER *ioc);
 static void kfree_sgl(MptSge_t *sgl, dma_addr_t sgl_dma,
 		struct buflist *buflist, MPT_ADAPTER *ioc);
-static void mptctl_timeout_expired (MPT_IOCTL *ioctl);
-static int  mptctl_bus_reset(MPT_IOCTL *ioctl);
-static int mptctl_set_tm_flags(MPT_SCSI_HOST *hd);
-static void mptctl_free_tm_flags(MPT_ADAPTER *ioc);
+static int mptctl_bus_reset(MPT_ADAPTER *ioc, u8 function);
 
 /*
  * Reset Handler cleanup function
@@ -183,10 +181,10 @@
 	int rc = 0;
 
 	if (nonblock) {
-		if (!mutex_trylock(&ioc->ioctl->ioctl_mutex))
+		if (!mutex_trylock(&ioc->ioctl_cmds.mutex))
 			rc = -EAGAIN;
 	} else {
-		if (mutex_lock_interruptible(&ioc->ioctl->ioctl_mutex))
+		if (mutex_lock_interruptible(&ioc->ioctl_cmds.mutex))
 			rc = -ERESTARTSYS;
 	}
 	return rc;
@@ -202,99 +200,78 @@
 static int
 mptctl_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req, MPT_FRAME_HDR *reply)
 {
-	char *sense_data;
-	int sz, req_index;
-	u16 iocStatus;
-	u8 cmd;
+	char	*sense_data;
+	int	req_index;
+	int	sz;
 
-	if (req)
-		 cmd = req->u.hdr.Function;
-	else
-		return 1;
-	dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "\tcompleting mpi function (0x%02X), req=%p, "
-	    "reply=%p\n", ioc->name,  req->u.hdr.Function, req, reply));
+	if (!req)
+		return 0;
 
-	if (ioc->ioctl) {
+	dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "completing mpi function "
+	    "(0x%02X), req=%p, reply=%p\n", ioc->name,  req->u.hdr.Function,
+	    req, reply));
 
-		if (reply==NULL) {
+	/*
+	 * Handling continuation of the same reply. Processing the first
+	 * reply, and eating the other replys that come later.
+	 */
+	if (ioc->ioctl_cmds.msg_context != req->u.hdr.MsgContext)
+		goto out_continuation;
 
-			dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_reply() NULL Reply "
-				"Function=%x!\n", ioc->name, cmd));
+	ioc->ioctl_cmds.status |= MPT_MGMT_STATUS_COMMAND_GOOD;
 
-			ioc->ioctl->status |= MPT_IOCTL_STATUS_COMMAND_GOOD;
-			ioc->ioctl->reset &= ~MPTCTL_RESET_OK;
+	if (!reply)
+		goto out;
 
-			/* We are done, issue wake up
-	 		*/
-			ioc->ioctl->wait_done = 1;
-			wake_up (&mptctl_wait);
-			return 1;
+	ioc->ioctl_cmds.status |= MPT_MGMT_STATUS_RF_VALID;
+	sz = min(ioc->reply_sz, 4*reply->u.reply.MsgLength);
+	memcpy(ioc->ioctl_cmds.reply, reply, sz);
 
-		}
+	if (reply->u.reply.IOCStatus || reply->u.reply.IOCLogInfo)
+		dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "iocstatus (0x%04X), loginfo (0x%08X)\n", ioc->name,
+		    le16_to_cpu(reply->u.reply.IOCStatus),
+		    le32_to_cpu(reply->u.reply.IOCLogInfo)));
 
-		/* Copy the reply frame (which much exist
-		 * for non-SCSI I/O) to the IOC structure.
-		 */
-		memcpy(ioc->ioctl->ReplyFrame, reply,
-			min(ioc->reply_sz, 4*reply->u.reply.MsgLength));
-		ioc->ioctl->status |= MPT_IOCTL_STATUS_RF_VALID;
+	if ((req->u.hdr.Function == MPI_FUNCTION_SCSI_IO_REQUEST) ||
+		(req->u.hdr.Function ==
+		 MPI_FUNCTION_RAID_SCSI_IO_PASSTHROUGH)) {
 
-		/* Set the command status to GOOD if IOC Status is GOOD
-		 * OR if SCSI I/O cmd and data underrun or recovered error.
-		 */
-		iocStatus = le16_to_cpu(reply->u.reply.IOCStatus) & MPI_IOCSTATUS_MASK;
-		if (iocStatus  == MPI_IOCSTATUS_SUCCESS)
-			ioc->ioctl->status |= MPT_IOCTL_STATUS_COMMAND_GOOD;
+		if (reply->u.sreply.SCSIStatus || reply->u.sreply.SCSIState)
+			dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			"scsi_status (0x%02x), scsi_state (0x%02x), "
+			"tag = (0x%04x), transfer_count (0x%08x)\n", ioc->name,
+			reply->u.sreply.SCSIStatus,
+			reply->u.sreply.SCSIState,
+			le16_to_cpu(reply->u.sreply.TaskTag),
+			le32_to_cpu(reply->u.sreply.TransferCount)));
 
-		if (iocStatus || reply->u.reply.IOCLogInfo)
-			dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "\tiocstatus (0x%04X), "
-				"loginfo (0x%08X)\n", ioc->name,
-				iocStatus,
-				le32_to_cpu(reply->u.reply.IOCLogInfo)));
-
-		if ((cmd == MPI_FUNCTION_SCSI_IO_REQUEST) ||
-			(cmd == MPI_FUNCTION_RAID_SCSI_IO_PASSTHROUGH)) {
-
-			if (reply->u.sreply.SCSIStatus || reply->u.sreply.SCSIState)
-				dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-					"\tscsi_status (0x%02x), scsi_state (0x%02x), "
-					"tag = (0x%04x), transfer_count (0x%08x)\n", ioc->name,
-					reply->u.sreply.SCSIStatus,
-					reply->u.sreply.SCSIState,
-					le16_to_cpu(reply->u.sreply.TaskTag),
-					le32_to_cpu(reply->u.sreply.TransferCount)));
-
-			ioc->ioctl->reset &= ~MPTCTL_RESET_OK;
-
-			if ((iocStatus == MPI_IOCSTATUS_SCSI_DATA_UNDERRUN) ||
-			(iocStatus == MPI_IOCSTATUS_SCSI_RECOVERED_ERROR)) {
-			ioc->ioctl->status |= MPT_IOCTL_STATUS_COMMAND_GOOD;
-			}
-		}
-
-		/* Copy the sense data - if present
-		 */
-		if ((cmd == MPI_FUNCTION_SCSI_IO_REQUEST) &&
-			(reply->u.sreply.SCSIState &
-			 MPI_SCSI_STATE_AUTOSENSE_VALID)){
+		if (reply->u.sreply.SCSIState &
+			MPI_SCSI_STATE_AUTOSENSE_VALID) {
 			sz = req->u.scsireq.SenseBufferLength;
 			req_index =
 			    le16_to_cpu(req->u.frame.hwhdr.msgctxu.fld.req_idx);
-			sense_data =
-			    ((u8 *)ioc->sense_buf_pool +
+			sense_data = ((u8 *)ioc->sense_buf_pool +
 			     (req_index * MPT_SENSE_BUFFER_ALLOC));
-			memcpy(ioc->ioctl->sense, sense_data, sz);
-			ioc->ioctl->status |= MPT_IOCTL_STATUS_SENSE_VALID;
+			memcpy(ioc->ioctl_cmds.sense, sense_data, sz);
+			ioc->ioctl_cmds.status |= MPT_MGMT_STATUS_SENSE_VALID;
 		}
-
-		if (cmd == MPI_FUNCTION_SCSI_TASK_MGMT)
-			mptctl_free_tm_flags(ioc);
-
-		/* We are done, issue wake up
-		 */
-		ioc->ioctl->wait_done = 1;
-		wake_up (&mptctl_wait);
 	}
+
+ out:
+	/* We are done, issue wake up
+	 */
+	if (ioc->ioctl_cmds.status & MPT_MGMT_STATUS_PENDING) {
+		if (req->u.hdr.Function == MPI_FUNCTION_SCSI_TASK_MGMT)
+			mpt_clear_taskmgmt_in_progress_flag(ioc);
+		ioc->ioctl_cmds.status &= ~MPT_MGMT_STATUS_PENDING;
+		complete(&ioc->ioctl_cmds.done);
+	}
+
+ out_continuation:
+	if (reply && (reply->u.reply.MsgFlags &
+	    MPI_MSGFLAGS_CONTINUATION_REPLY))
+		return 0;
 	return 1;
 }
 
@@ -304,30 +281,66 @@
  * Expecting an interrupt, however timed out.
  *
  */
-static void mptctl_timeout_expired (MPT_IOCTL *ioctl)
+static void
+mptctl_timeout_expired(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf)
 {
-	int rc = 1;
+	unsigned long flags;
 
-	if (ioctl == NULL)
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT ": %s\n",
+		ioc->name, __func__));
+
+	if (mpt_fwfault_debug)
+		mpt_halt_firmware(ioc);
+
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	if (ioc->ioc_reset_in_progress) {
+		spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+		CLEAR_MGMT_PENDING_STATUS(ioc->ioctl_cmds.status)
+		mpt_free_msg_frame(ioc, mf);
 		return;
-	dctlprintk(ioctl->ioc,
-		   printk(MYIOC_s_DEBUG_FMT ": Timeout Expired! Host %d\n",
-		   ioctl->ioc->name, ioctl->ioc->id));
-
-	ioctl->wait_done = 0;
-	if (ioctl->reset & MPTCTL_RESET_OK)
-		rc = mptctl_bus_reset(ioctl);
-
-	if (rc) {
-		/* Issue a reset for this device.
-		 * The IOC is not responding.
-		 */
-		dctlprintk(ioctl->ioc, printk(MYIOC_s_DEBUG_FMT "Calling HardReset! \n",
-			 ioctl->ioc->name));
-		mpt_HardResetHandler(ioctl->ioc, CAN_SLEEP);
 	}
-	return;
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 
+
+	if (!mptctl_bus_reset(ioc, mf->u.hdr.Function))
+		return;
+
+	/* Issue a reset for this device.
+	 * The IOC is not responding.
+	 */
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Calling HardReset! \n",
+		 ioc->name));
+	CLEAR_MGMT_PENDING_STATUS(ioc->ioctl_cmds.status)
+	mpt_HardResetHandler(ioc, CAN_SLEEP);
+	mpt_free_msg_frame(ioc, mf);
+}
+
+static int
+mptctl_taskmgmt_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
+{
+	if (!mf)
+		return 0;
+
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		"TaskMgmt completed (mf=%p, mr=%p)\n",
+		ioc->name, mf, mr));
+
+	ioc->taskmgmt_cmds.status |= MPT_MGMT_STATUS_COMMAND_GOOD;
+
+	if (!mr)
+		goto out;
+
+	ioc->taskmgmt_cmds.status |= MPT_MGMT_STATUS_RF_VALID;
+	memcpy(ioc->taskmgmt_cmds.reply, mr,
+	    min(MPT_DEFAULT_FRAME_SIZE, 4 * mr->u.reply.MsgLength));
+ out:
+	if (ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_PENDING) {
+		mpt_clear_taskmgmt_in_progress_flag(ioc);
+		ioc->taskmgmt_cmds.status &= ~MPT_MGMT_STATUS_PENDING;
+		complete(&ioc->taskmgmt_cmds.done);
+		return 1;
+	}
+	return 0;
 }
 
 /* mptctl_bus_reset
@@ -335,133 +348,150 @@
  * Bus reset code.
  *
  */
-static int mptctl_bus_reset(MPT_IOCTL *ioctl)
+static int mptctl_bus_reset(MPT_ADAPTER *ioc, u8 function)
 {
 	MPT_FRAME_HDR	*mf;
 	SCSITaskMgmt_t	*pScsiTm;
-	MPT_SCSI_HOST	*hd;
+	SCSITaskMgmtReply_t *pScsiTmReply;
 	int		 ii;
-	int		 retval=0;
+	int		 retval;
+	unsigned long	 timeout;
+	unsigned long	 time_count;
+	u16		 iocstatus;
 
-
-	ioctl->reset &= ~MPTCTL_RESET_OK;
-
-	if (ioctl->ioc->sh == NULL)
+	/* bus reset is only good for SCSI IO, RAID PASSTHRU */
+	if (!(function == MPI_FUNCTION_RAID_SCSI_IO_PASSTHROUGH) ||
+	    (function == MPI_FUNCTION_SCSI_IO_REQUEST)) {
+		dtmprintk(ioc, printk(MYIOC_s_WARN_FMT
+			"TaskMgmt, not SCSI_IO!!\n", ioc->name));
 		return -EPERM;
+	}
 
-	hd = shost_priv(ioctl->ioc->sh);
-	if (hd == NULL)
+	mutex_lock(&ioc->taskmgmt_cmds.mutex);
+	if (mpt_set_taskmgmt_in_progress_flag(ioc) != 0) {
+		mutex_unlock(&ioc->taskmgmt_cmds.mutex);
 		return -EPERM;
+	}
 
-	/* Single threading ....
-	 */
-	if (mptctl_set_tm_flags(hd) != 0)
-		return -EPERM;
+	retval = 0;
 
 	/* Send request
 	 */
-	if ((mf = mpt_get_msg_frame(mptctl_id, ioctl->ioc)) == NULL) {
-		dtmprintk(ioctl->ioc, printk(MYIOC_s_DEBUG_FMT "IssueTaskMgmt, no msg frames!!\n",
-				ioctl->ioc->name));
-
-		mptctl_free_tm_flags(ioctl->ioc);
-		return -ENOMEM;
+	mf = mpt_get_msg_frame(mptctl_taskmgmt_id, ioc);
+	if (mf == NULL) {
+		dtmprintk(ioc, printk(MYIOC_s_WARN_FMT
+			"TaskMgmt, no msg frames!!\n", ioc->name));
+		mpt_clear_taskmgmt_in_progress_flag(ioc);
+		retval = -ENOMEM;
+		goto mptctl_bus_reset_done;
 	}
 
-	dtmprintk(ioctl->ioc, printk(MYIOC_s_DEBUG_FMT "IssueTaskMgmt request @ %p\n",
-			ioctl->ioc->name, mf));
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt request (mf=%p)\n",
+		ioc->name, mf));
 
 	pScsiTm = (SCSITaskMgmt_t *) mf;
-	pScsiTm->TargetID = ioctl->id;
-	pScsiTm->Bus = hd->port;	/* 0 */
-	pScsiTm->ChainOffset = 0;
+	memset(pScsiTm, 0, sizeof(SCSITaskMgmt_t));
 	pScsiTm->Function = MPI_FUNCTION_SCSI_TASK_MGMT;
-	pScsiTm->Reserved = 0;
 	pScsiTm->TaskType = MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS;
-	pScsiTm->Reserved1 = 0;
 	pScsiTm->MsgFlags = MPI_SCSITASKMGMT_MSGFLAGS_LIPRESET_RESET_OPTION;
-
+	pScsiTm->TargetID = 0;
+	pScsiTm->Bus = 0;
+	pScsiTm->ChainOffset = 0;
+	pScsiTm->Reserved = 0;
+	pScsiTm->Reserved1 = 0;
+	pScsiTm->TaskMsgContext = 0;
 	for (ii= 0; ii < 8; ii++)
 		pScsiTm->LUN[ii] = 0;
-
 	for (ii=0; ii < 7; ii++)
 		pScsiTm->Reserved2[ii] = 0;
 
-	pScsiTm->TaskMsgContext = 0;
-	dtmprintk(ioctl->ioc, printk(MYIOC_s_DEBUG_FMT
-		"mptctl_bus_reset: issued.\n", ioctl->ioc->name));
+	switch (ioc->bus_type) {
+	case FC:
+		timeout = 40;
+		break;
+	case SAS:
+		timeout = 30;
+		break;
+	case SPI:
+	default:
+		timeout = 2;
+		break;
+	}
 
-	DBG_DUMP_TM_REQUEST_FRAME(ioctl->ioc, (u32 *)mf);
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		"TaskMgmt type=%d timeout=%ld\n",
+		ioc->name, MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS, timeout));
 
-	ioctl->wait_done=0;
-
-	if ((ioctl->ioc->facts.IOCCapabilities & MPI_IOCFACTS_CAPABILITY_HIGH_PRI_Q) &&
-	    (ioctl->ioc->facts.MsgVersion >= MPI_VERSION_01_05))
-		mpt_put_msg_frame_hi_pri(mptctl_id, ioctl->ioc, mf);
+	INITIALIZE_MGMT_STATUS(ioc->taskmgmt_cmds.status)
+	CLEAR_MGMT_STATUS(ioc->taskmgmt_cmds.status)
+	time_count = jiffies;
+	if ((ioc->facts.IOCCapabilities & MPI_IOCFACTS_CAPABILITY_HIGH_PRI_Q) &&
+	    (ioc->facts.MsgVersion >= MPI_VERSION_01_05))
+		mpt_put_msg_frame_hi_pri(mptctl_taskmgmt_id, ioc, mf);
 	else {
-		retval = mpt_send_handshake_request(mptctl_id, ioctl->ioc,
-			sizeof(SCSITaskMgmt_t), (u32*)pScsiTm, CAN_SLEEP);
+		retval = mpt_send_handshake_request(mptctl_taskmgmt_id, ioc,
+		    sizeof(SCSITaskMgmt_t), (u32 *)pScsiTm, CAN_SLEEP);
 		if (retval != 0) {
-			dfailprintk(ioctl->ioc, printk(MYIOC_s_ERR_FMT "_send_handshake FAILED!"
-				" (hd %p, ioc %p, mf %p) \n", hd->ioc->name, hd,
-				hd->ioc, mf));
+			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+				"TaskMgmt send_handshake FAILED!"
+				" (ioc %p, mf %p, rc=%d) \n", ioc->name,
+				ioc, mf, retval));
+			mpt_clear_taskmgmt_in_progress_flag(ioc);
 			goto mptctl_bus_reset_done;
 		}
 	}
 
 	/* Now wait for the command to complete */
-	ii = wait_event_timeout(mptctl_wait,
-	     ioctl->wait_done == 1,
-	     HZ*5 /* 5 second timeout */);
+	ii = wait_for_completion_timeout(&ioc->taskmgmt_cmds.done, timeout*HZ);
+	if (!(ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "TaskMgmt failed\n", ioc->name));
+		mpt_free_msg_frame(ioc, mf);
+		mpt_clear_taskmgmt_in_progress_flag(ioc);
+		if (ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET)
+			retval = 0;
+		else
+			retval = -1; /* return failure */
+		goto mptctl_bus_reset_done;
+	}
 
-	if(ii <=0 && (ioctl->wait_done != 1 ))  {
-		mpt_free_msg_frame(hd->ioc, mf);
-		ioctl->wait_done = 0;
+	if (!(ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_RF_VALID)) {
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "TaskMgmt failed\n", ioc->name));
+		retval = -1; /* return failure */
+		goto mptctl_bus_reset_done;
+	}
+
+	pScsiTmReply = (SCSITaskMgmtReply_t *) ioc->taskmgmt_cmds.reply;
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "TaskMgmt fw_channel = %d, fw_id = %d, task_type=0x%02X, "
+	    "iocstatus=0x%04X\n\tloginfo=0x%08X, response_code=0x%02X, "
+	    "term_cmnds=%d\n", ioc->name, pScsiTmReply->Bus,
+	    pScsiTmReply->TargetID, MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS,
+	    le16_to_cpu(pScsiTmReply->IOCStatus),
+	    le32_to_cpu(pScsiTmReply->IOCLogInfo),
+	    pScsiTmReply->ResponseCode,
+	    le32_to_cpu(pScsiTmReply->TerminationCount)));
+
+	iocstatus = le16_to_cpu(pScsiTmReply->IOCStatus) & MPI_IOCSTATUS_MASK;
+
+	if (iocstatus == MPI_IOCSTATUS_SCSI_TASK_TERMINATED ||
+	   iocstatus == MPI_IOCSTATUS_SCSI_IOC_TERMINATED ||
+	   iocstatus == MPI_IOCSTATUS_SUCCESS)
+		retval = 0;
+	else {
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "TaskMgmt failed\n", ioc->name));
 		retval = -1; /* return failure */
 	}
 
-mptctl_bus_reset_done:
 
-	mptctl_free_tm_flags(ioctl->ioc);
+ mptctl_bus_reset_done:
+	mutex_unlock(&ioc->taskmgmt_cmds.mutex);
+	CLEAR_MGMT_STATUS(ioc->taskmgmt_cmds.status)
 	return retval;
 }
 
-static int
-mptctl_set_tm_flags(MPT_SCSI_HOST *hd) {
-	unsigned long flags;
-
-	spin_lock_irqsave(&hd->ioc->FreeQlock, flags);
-
-	if (hd->tmState == TM_STATE_NONE) {
-		hd->tmState = TM_STATE_IN_PROGRESS;
-		hd->tmPending = 1;
-		spin_unlock_irqrestore(&hd->ioc->FreeQlock, flags);
-	} else {
-		spin_unlock_irqrestore(&hd->ioc->FreeQlock, flags);
-		return -EBUSY;
-	}
-
-	return 0;
-}
-
-static void
-mptctl_free_tm_flags(MPT_ADAPTER *ioc)
-{
-	MPT_SCSI_HOST * hd;
-	unsigned long flags;
-
-	hd = shost_priv(ioc->sh);
-	if (hd == NULL)
-		return;
-
-	spin_lock_irqsave(&ioc->FreeQlock, flags);
-
-	hd->tmState = TM_STATE_NONE;
-	hd->tmPending = 0;
-	spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-
-	return;
-}
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /* mptctl_ioc_reset
@@ -473,22 +503,23 @@
 static int
 mptctl_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 {
-	MPT_IOCTL *ioctl = ioc->ioctl;
-	dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "IOC %s_reset routed to IOCTL driver!\n", ioc->name,
-		reset_phase==MPT_IOC_SETUP_RESET ? "setup" : (
-		reset_phase==MPT_IOC_PRE_RESET ? "pre" : "post")));
-
-	if(ioctl == NULL)
-		return 1;
-
 	switch(reset_phase) {
 	case MPT_IOC_SETUP_RESET:
-		ioctl->status |= MPT_IOCTL_STATUS_DID_IOCRESET;
-		break;
-	case MPT_IOC_POST_RESET:
-		ioctl->status &= ~MPT_IOCTL_STATUS_DID_IOCRESET;
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_SETUP_RESET\n", ioc->name, __func__));
 		break;
 	case MPT_IOC_PRE_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_PRE_RESET\n", ioc->name, __func__));
+		break;
+	case MPT_IOC_POST_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_POST_RESET\n", ioc->name, __func__));
+		if (ioc->ioctl_cmds.status & MPT_MGMT_STATUS_PENDING) {
+			ioc->ioctl_cmds.status |= MPT_MGMT_STATUS_DID_IOCRESET;
+			complete(&ioc->ioctl_cmds.done);
+		}
+		break;
 	default:
 		break;
 	}
@@ -642,7 +673,7 @@
 	else
 		ret = -EINVAL;
 
-	mutex_unlock(&iocp->ioctl->ioctl_mutex);
+	mutex_unlock(&iocp->ioctl_cmds.mutex);
 
 	return ret;
 }
@@ -758,6 +789,7 @@
 	int			 sge_offset = 0;
 	u16			 iocstat;
 	pFWDownloadReply_t	 ReplyMsg = NULL;
+	unsigned long		 timeleft;
 
 	if (mpt_verify_adapter(ioc, &iocp) < 0) {
 		printk(KERN_DEBUG MYNAM "ioctl_fwdl - ioc%d not found!\n",
@@ -841,8 +873,9 @@
 	 *	96		8
 	 *	64		4
 	 */
-	maxfrags = (iocp->req_sz - sizeof(MPIHeader_t) - sizeof(FWDownloadTCSGE_t))
-			/ (sizeof(dma_addr_t) + sizeof(u32));
+	maxfrags = (iocp->req_sz - sizeof(MPIHeader_t) -
+			sizeof(FWDownloadTCSGE_t))
+			/ iocp->SGE_size;
 	if (numfrags > maxfrags) {
 		ret = -EMLINK;
 		goto fwdl_out;
@@ -870,7 +903,7 @@
 		if (nib == 0 || nib == 3) {
 			;
 		} else if (sgIn->Address) {
-			mpt_add_sge(sgOut, sgIn->FlagsLength, sgIn->Address);
+			iocp->add_sge(sgOut, sgIn->FlagsLength, sgIn->Address);
 			n++;
 			if (copy_from_user(bl->kptr, ufwbuf+fw_bytes_copied, bl->len)) {
 				printk(MYIOC_s_ERR_FMT "%s@%d::_ioctl_fwdl - "
@@ -882,7 +915,7 @@
 		}
 		sgIn++;
 		bl++;
-		sgOut += (sizeof(dma_addr_t) + sizeof(u32));
+		sgOut += iocp->SGE_size;
 	}
 
 	DBG_DUMP_FW_DOWNLOAD(iocp, (u32 *)mf, numfrags);
@@ -891,16 +924,30 @@
 	 * Finally, perform firmware download.
 	 */
 	ReplyMsg = NULL;
+	SET_MGMT_MSG_CONTEXT(iocp->ioctl_cmds.msg_context, dlmsg->MsgContext);
+	INITIALIZE_MGMT_STATUS(iocp->ioctl_cmds.status)
 	mpt_put_msg_frame(mptctl_id, iocp, mf);
 
 	/* Now wait for the command to complete */
-	ret = wait_event_timeout(mptctl_wait,
-	     iocp->ioctl->wait_done == 1,
-	     HZ*60);
+retry_wait:
+	timeleft = wait_for_completion_timeout(&iocp->ioctl_cmds.done, HZ*60);
+	if (!(iocp->ioctl_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		ret = -ETIME;
+		printk(MYIOC_s_WARN_FMT "%s: failed\n", iocp->name, __func__);
+		if (iocp->ioctl_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET) {
+			mpt_free_msg_frame(iocp, mf);
+			goto fwdl_out;
+		}
+		if (!timeleft)
+			mptctl_timeout_expired(iocp, mf);
+		else
+			goto retry_wait;
+		goto fwdl_out;
+	}
 
-	if(ret <=0 && (iocp->ioctl->wait_done != 1 )) {
-	/* Now we need to reset the board */
-		mptctl_timeout_expired(iocp->ioctl);
+	if (!(iocp->ioctl_cmds.status & MPT_MGMT_STATUS_RF_VALID)) {
+		printk(MYIOC_s_WARN_FMT "%s: failed\n", iocp->name, __func__);
+		mpt_free_msg_frame(iocp, mf);
 		ret = -ENODATA;
 		goto fwdl_out;
 	}
@@ -908,7 +955,7 @@
 	if (sgl)
 		kfree_sgl(sgl, sgl_dma, buflist, iocp);
 
-	ReplyMsg = (pFWDownloadReply_t)iocp->ioctl->ReplyFrame;
+	ReplyMsg = (pFWDownloadReply_t)iocp->ioctl_cmds.reply;
 	iocstat = le16_to_cpu(ReplyMsg->IOCStatus) & MPI_IOCSTATUS_MASK;
 	if (iocstat == MPI_IOCSTATUS_SUCCESS) {
 		printk(MYIOC_s_INFO_FMT "F/W update successfull!\n", iocp->name);
@@ -932,6 +979,9 @@
 	return 0;
 
 fwdl_out:
+
+	CLEAR_MGMT_STATUS(iocp->ioctl_cmds.status);
+	SET_MGMT_MSG_CONTEXT(iocp->ioctl_cmds.msg_context, 0);
         kfree_sgl(sgl, sgl_dma, buflist, iocp);
 	return ret;
 }
@@ -1003,7 +1053,7 @@
 	 *
 	 */
 	sgl = sglbuf;
-	sg_spill = ((ioc->req_sz - sge_offset)/(sizeof(dma_addr_t) + sizeof(u32))) - 1;
+	sg_spill = ((ioc->req_sz - sge_offset)/ioc->SGE_size) - 1;
 	while (bytes_allocd < bytes) {
 		this_alloc = min(alloc_sz, bytes-bytes_allocd);
 		buflist[buflist_ent].len = this_alloc;
@@ -1024,8 +1074,9 @@
 			dma_addr_t dma_addr;
 
 			bytes_allocd += this_alloc;
-			sgl->FlagsLength = (0x10000000|MPT_SGE_FLAGS_ADDRESSING|sgdir|this_alloc);
-			dma_addr = pci_map_single(ioc->pcidev, buflist[buflist_ent].kptr, this_alloc, dir);
+			sgl->FlagsLength = (0x10000000|sgdir|this_alloc);
+			dma_addr = pci_map_single(ioc->pcidev,
+				buflist[buflist_ent].kptr, this_alloc, dir);
 			sgl->Address = dma_addr;
 
 			fragcnt++;
@@ -1771,7 +1822,10 @@
 	int		msgContext;
 	u16		req_idx;
 	ulong 		timeout;
+	unsigned long	timeleft;
 	struct scsi_device *sdev;
+	unsigned long	 flags;
+	u8		 function;
 
 	/* bufIn and bufOut are used for user to kernel space transfers
 	 */
@@ -1784,24 +1838,23 @@
 				__FILE__, __LINE__, iocnum);
 		return -ENODEV;
 	}
-	if (!ioc->ioctl) {
+
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	if (ioc->ioc_reset_in_progress) {
+		spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 		printk(KERN_ERR MYNAM "%s@%d::mptctl_do_mpt_command - "
-			"No memory available during driver init.\n",
-				__FILE__, __LINE__);
-		return -ENOMEM;
-	} else if (ioc->ioctl->status & MPT_IOCTL_STATUS_DID_IOCRESET) {
-		printk(KERN_ERR MYNAM "%s@%d::mptctl_do_mpt_command - "
-			"Busy with IOC Reset \n", __FILE__, __LINE__);
+			"Busy with diagnostic reset\n", __FILE__, __LINE__);
 		return -EBUSY;
 	}
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 
 	/* Verify that the final request frame will not be too large.
 	 */
 	sz = karg.dataSgeOffset * 4;
 	if (karg.dataInSize > 0)
-		sz += sizeof(dma_addr_t) + sizeof(u32);
+		sz += ioc->SGE_size;
 	if (karg.dataOutSize > 0)
-		sz += sizeof(dma_addr_t) + sizeof(u32);
+		sz += ioc->SGE_size;
 
 	if (sz > ioc->req_sz) {
 		printk(MYIOC_s_ERR_FMT "%s@%d::mptctl_do_mpt_command - "
@@ -1827,10 +1880,12 @@
 		printk(MYIOC_s_ERR_FMT "%s@%d::mptctl_do_mpt_command - "
 			"Unable to read MF from mpt_ioctl_command struct @ %p\n",
 			ioc->name, __FILE__, __LINE__, mfPtr);
+		function = -1;
 		rc = -EFAULT;
 		goto done_free_mem;
 	}
 	hdr->MsgContext = cpu_to_le32(msgContext);
+	function = hdr->Function;
 
 
 	/* Verify that this request is allowed.
@@ -1838,7 +1893,7 @@
 	dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "sending mpi function (0x%02X), req=%p\n",
 	    ioc->name, hdr->Function, mf));
 
-	switch (hdr->Function) {
+	switch (function) {
 	case MPI_FUNCTION_IOC_FACTS:
 	case MPI_FUNCTION_PORT_FACTS:
 		karg.dataOutSize  = karg.dataInSize = 0;
@@ -1893,7 +1948,7 @@
 			}
 
 			pScsiReq->MsgFlags &= ~MPI_SCSIIO_MSGFLGS_SENSE_WIDTH;
-			pScsiReq->MsgFlags |= mpt_msg_flags();
+			pScsiReq->MsgFlags |= mpt_msg_flags(ioc);
 
 
 			/* verify that app has not requested
@@ -1935,8 +1990,6 @@
 			pScsiReq->Control = cpu_to_le32(scsidir | qtag);
 			pScsiReq->DataLength = cpu_to_le32(dataSize);
 
-			ioc->ioctl->reset = MPTCTL_RESET_OK;
-			ioc->ioctl->id = pScsiReq->TargetID;
 
 		} else {
 			printk(MYIOC_s_ERR_FMT "%s@%d::mptctl_do_mpt_command - "
@@ -1979,7 +2032,7 @@
 			int dataSize;
 
 			pScsiReq->MsgFlags &= ~MPI_SCSIIO_MSGFLGS_SENSE_WIDTH;
-			pScsiReq->MsgFlags |= mpt_msg_flags();
+			pScsiReq->MsgFlags |= mpt_msg_flags(ioc);
 
 
 			/* verify that app has not requested
@@ -2014,8 +2067,6 @@
 			pScsiReq->Control = cpu_to_le32(scsidir | qtag);
 			pScsiReq->DataLength = cpu_to_le32(dataSize);
 
-			ioc->ioctl->reset = MPTCTL_RESET_OK;
-			ioc->ioctl->id = pScsiReq->TargetID;
 		} else {
 			printk(MYIOC_s_ERR_FMT "%s@%d::mptctl_do_mpt_command - "
 				"SCSI driver is not loaded. \n",
@@ -2026,20 +2077,17 @@
 		break;
 
 	case MPI_FUNCTION_SCSI_TASK_MGMT:
-		{
-			MPT_SCSI_HOST *hd = NULL;
-			if ((ioc->sh == NULL) || ((hd = shost_priv(ioc->sh)) == NULL)) {
-				printk(MYIOC_s_ERR_FMT "%s@%d::mptctl_do_mpt_command - "
-					"SCSI driver not loaded or SCSI host not found. \n",
-					ioc->name, __FILE__, __LINE__);
-				rc = -EFAULT;
-				goto done_free_mem;
-			} else if (mptctl_set_tm_flags(hd) != 0) {
-				rc = -EPERM;
-				goto done_free_mem;
-			}
-		}
+	{
+		SCSITaskMgmt_t	*pScsiTm;
+		pScsiTm = (SCSITaskMgmt_t *)mf;
+		dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			"\tTaskType=0x%x MsgFlags=0x%x "
+			"TaskMsgContext=0x%x id=%d channel=%d\n",
+			ioc->name, pScsiTm->TaskType, le32_to_cpu
+			(pScsiTm->TaskMsgContext), pScsiTm->MsgFlags,
+			pScsiTm->TargetID, pScsiTm->Bus));
 		break;
+	}
 
 	case MPI_FUNCTION_IOC_INIT:
 		{
@@ -2123,8 +2171,7 @@
 			if (karg.dataInSize > 0) {
 				flagsLength = ( MPI_SGE_FLAGS_SIMPLE_ELEMENT |
 						MPI_SGE_FLAGS_END_OF_BUFFER |
-						MPI_SGE_FLAGS_DIRECTION |
-						mpt_addr_size() )
+						MPI_SGE_FLAGS_DIRECTION)
 						<< MPI_SGE_FLAGS_SHIFT;
 			} else {
 				flagsLength = MPT_SGE_FLAGS_SSIMPLE_WRITE;
@@ -2141,8 +2188,8 @@
 				/* Set up this SGE.
 				 * Copy to MF and to sglbuf
 				 */
-				mpt_add_sge(psge, flagsLength, dma_addr_out);
-				psge += (sizeof(u32) + sizeof(dma_addr_t));
+				ioc->add_sge(psge, flagsLength, dma_addr_out);
+				psge += ioc->SGE_size;
 
 				/* Copy user data to kernel space.
 				 */
@@ -2175,18 +2222,25 @@
 				/* Set up this SGE
 				 * Copy to MF and to sglbuf
 				 */
-				mpt_add_sge(psge, flagsLength, dma_addr_in);
+				ioc->add_sge(psge, flagsLength, dma_addr_in);
 			}
 		}
 	} else  {
 		/* Add a NULL SGE
 		 */
-		mpt_add_sge(psge, flagsLength, (dma_addr_t) -1);
+		ioc->add_sge(psge, flagsLength, (dma_addr_t) -1);
 	}
 
-	ioc->ioctl->wait_done = 0;
+	SET_MGMT_MSG_CONTEXT(ioc->ioctl_cmds.msg_context, hdr->MsgContext);
+	INITIALIZE_MGMT_STATUS(ioc->ioctl_cmds.status)
 	if (hdr->Function == MPI_FUNCTION_SCSI_TASK_MGMT) {
 
+		mutex_lock(&ioc->taskmgmt_cmds.mutex);
+		if (mpt_set_taskmgmt_in_progress_flag(ioc) != 0) {
+			mutex_unlock(&ioc->taskmgmt_cmds.mutex);
+			goto done_free_mem;
+		}
+
 		DBG_DUMP_TM_REQUEST_FRAME(ioc, (u32 *)mf);
 
 		if ((ioc->facts.IOCCapabilities & MPI_IOCFACTS_CAPABILITY_HIGH_PRI_Q) &&
@@ -2197,10 +2251,11 @@
 				sizeof(SCSITaskMgmt_t), (u32*)mf, CAN_SLEEP);
 			if (rc != 0) {
 				dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				    "_send_handshake FAILED! (ioc %p, mf %p)\n",
+				    "send_handshake FAILED! (ioc %p, mf %p)\n",
 				    ioc->name, ioc, mf));
-				mptctl_free_tm_flags(ioc);
+				mpt_clear_taskmgmt_in_progress_flag(ioc);
 				rc = -ENODATA;
+				mutex_unlock(&ioc->taskmgmt_cmds.mutex);
 				goto done_free_mem;
 			}
 		}
@@ -2210,36 +2265,47 @@
 
 	/* Now wait for the command to complete */
 	timeout = (karg.timeout > 0) ? karg.timeout : MPT_IOCTL_DEFAULT_TIMEOUT;
-	timeout = wait_event_timeout(mptctl_wait,
-	     ioc->ioctl->wait_done == 1,
-	     HZ*timeout);
-
-	if(timeout <=0 && (ioc->ioctl->wait_done != 1 )) {
-	/* Now we need to reset the board */
-
-		if (hdr->Function == MPI_FUNCTION_SCSI_TASK_MGMT)
-			mptctl_free_tm_flags(ioc);
-
-		mptctl_timeout_expired(ioc->ioctl);
-		rc = -ENODATA;
+retry_wait:
+	timeleft = wait_for_completion_timeout(&ioc->ioctl_cmds.done,
+				HZ*timeout);
+	if (!(ioc->ioctl_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		rc = -ETIME;
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT "%s: TIMED OUT!\n",
+		    ioc->name, __func__));
+		if (ioc->ioctl_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET) {
+			if (function == MPI_FUNCTION_SCSI_TASK_MGMT)
+				mutex_unlock(&ioc->taskmgmt_cmds.mutex);
+			goto done_free_mem;
+		}
+		if (!timeleft) {
+			if (function == MPI_FUNCTION_SCSI_TASK_MGMT)
+				mutex_unlock(&ioc->taskmgmt_cmds.mutex);
+			mptctl_timeout_expired(ioc, mf);
+			mf = NULL;
+		} else
+			goto retry_wait;
 		goto done_free_mem;
 	}
 
+	if (function == MPI_FUNCTION_SCSI_TASK_MGMT)
+		mutex_unlock(&ioc->taskmgmt_cmds.mutex);
+
+
 	mf = NULL;
 
 	/* If a valid reply frame, copy to the user.
 	 * Offset 2: reply length in U32's
 	 */
-	if (ioc->ioctl->status & MPT_IOCTL_STATUS_RF_VALID) {
+	if (ioc->ioctl_cmds.status & MPT_MGMT_STATUS_RF_VALID) {
 		if (karg.maxReplyBytes < ioc->reply_sz) {
-			 sz = min(karg.maxReplyBytes, 4*ioc->ioctl->ReplyFrame[2]);
+			sz = min(karg.maxReplyBytes,
+				4*ioc->ioctl_cmds.reply[2]);
 		} else {
-			 sz = min(ioc->reply_sz, 4*ioc->ioctl->ReplyFrame[2]);
+			 sz = min(ioc->reply_sz, 4*ioc->ioctl_cmds.reply[2]);
 		}
-
 		if (sz > 0) {
 			if (copy_to_user(karg.replyFrameBufPtr,
-				 &ioc->ioctl->ReplyFrame, sz)){
+				 ioc->ioctl_cmds.reply, sz)){
 				 printk(MYIOC_s_ERR_FMT
 				     "%s@%d::mptctl_do_mpt_command - "
 				 "Unable to write out reply frame %p\n",
@@ -2252,10 +2318,11 @@
 
 	/* If valid sense data, copy to user.
 	 */
-	if (ioc->ioctl->status & MPT_IOCTL_STATUS_SENSE_VALID) {
+	if (ioc->ioctl_cmds.status & MPT_MGMT_STATUS_SENSE_VALID) {
 		sz = min(karg.maxSenseBytes, MPT_SENSE_BUFFER_SIZE);
 		if (sz > 0) {
-			if (copy_to_user(karg.senseDataPtr, ioc->ioctl->sense, sz)) {
+			if (copy_to_user(karg.senseDataPtr,
+				ioc->ioctl_cmds.sense, sz)) {
 				printk(MYIOC_s_ERR_FMT "%s@%d::mptctl_do_mpt_command - "
 				"Unable to write sense data to user %p\n",
 				ioc->name, __FILE__, __LINE__,
@@ -2269,7 +2336,7 @@
 	/* If the overall status is _GOOD and data in, copy data
 	 * to user.
 	 */
-	if ((ioc->ioctl->status & MPT_IOCTL_STATUS_COMMAND_GOOD) &&
+	if ((ioc->ioctl_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD) &&
 				(karg.dataInSize > 0) && (bufIn.kptr)) {
 
 		if (copy_to_user(karg.dataInBufPtr,
@@ -2284,9 +2351,8 @@
 
 done_free_mem:
 
-	ioc->ioctl->status &= ~(MPT_IOCTL_STATUS_COMMAND_GOOD |
-		MPT_IOCTL_STATUS_SENSE_VALID |
-		MPT_IOCTL_STATUS_RF_VALID );
+	CLEAR_MGMT_STATUS(ioc->ioctl_cmds.status)
+	SET_MGMT_MSG_CONTEXT(ioc->ioctl_cmds.msg_context, 0);
 
 	/* Free the allocated memory.
 	 */
@@ -2336,6 +2402,8 @@
 	ToolboxIstwiReadWriteRequest_t	*IstwiRWRequest;
 	MPT_FRAME_HDR		*mf = NULL;
 	MPIHeader_t		*mpi_hdr;
+	unsigned long		timeleft;
+	int			retval;
 
 	/* Reset long to int. Should affect IA64 and SPARC only
 	 */
@@ -2466,9 +2534,9 @@
 		MPT_SCSI_HOST *hd =  shost_priv(ioc->sh);
 
 		if (hd && (cim_rev == 1)) {
-			karg.hard_resets = hd->hard_resets;
-			karg.soft_resets = hd->soft_resets;
-			karg.timeouts = hd->timeouts;
+			karg.hard_resets = ioc->hard_resets;
+			karg.soft_resets = ioc->soft_resets;
+			karg.timeouts = ioc->timeouts;
 		}
 	}
 
@@ -2476,8 +2544,8 @@
 	 * Gather ISTWI(Industry Standard Two Wire Interface) Data
 	 */
 	if ((mf = mpt_get_msg_frame(mptctl_id, ioc)) == NULL) {
-		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT "%s, no msg frames!!\n",
-		    ioc->name,__func__));
+		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT
+			"%s, no msg frames!!\n", ioc->name, __func__));
 		goto out;
 	}
 
@@ -2498,22 +2566,29 @@
 	pbuf = pci_alloc_consistent(ioc->pcidev, 4, &buf_dma);
 	if (!pbuf)
 		goto out;
-	mpt_add_sge((char *)&IstwiRWRequest->SGL,
+	ioc->add_sge((char *)&IstwiRWRequest->SGL,
 	    (MPT_SGE_FLAGS_SSIMPLE_READ|4), buf_dma);
 
-	ioc->ioctl->wait_done = 0;
+	retval = 0;
+	SET_MGMT_MSG_CONTEXT(ioc->ioctl_cmds.msg_context,
+				IstwiRWRequest->MsgContext);
+	INITIALIZE_MGMT_STATUS(ioc->ioctl_cmds.status)
 	mpt_put_msg_frame(mptctl_id, ioc, mf);
 
-	rc = wait_event_timeout(mptctl_wait,
-	     ioc->ioctl->wait_done == 1,
-	     HZ*MPT_IOCTL_DEFAULT_TIMEOUT /* 10 sec */);
-
-	if(rc <=0 && (ioc->ioctl->wait_done != 1 )) {
-		/*
-		 * Now we need to reset the board
-		 */
-		mpt_free_msg_frame(ioc, mf);
-		mptctl_timeout_expired(ioc->ioctl);
+retry_wait:
+	timeleft = wait_for_completion_timeout(&ioc->ioctl_cmds.done,
+			HZ*MPT_IOCTL_DEFAULT_TIMEOUT);
+	if (!(ioc->ioctl_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		retval = -ETIME;
+		printk(MYIOC_s_WARN_FMT "%s: failed\n", ioc->name, __func__);
+		if (ioc->ioctl_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET) {
+			mpt_free_msg_frame(ioc, mf);
+			goto out;
+		}
+		if (!timeleft)
+			mptctl_timeout_expired(ioc, mf);
+		else
+			goto retry_wait;
 		goto out;
 	}
 
@@ -2526,10 +2601,13 @@
 	 *   bays have drives in them
 	 * pbuf[3] = Checksum (0x100 = (byte0 + byte2 + byte3)
 	 */
-	if (ioc->ioctl->status & MPT_IOCTL_STATUS_RF_VALID)
+	if (ioc->ioctl_cmds.status & MPT_MGMT_STATUS_RF_VALID)
 		karg.rsvd = *(u32 *)pbuf;
 
  out:
+	CLEAR_MGMT_STATUS(ioc->ioctl_cmds.status)
+	SET_MGMT_MSG_CONTEXT(ioc->ioctl_cmds.msg_context, 0);
+
 	if (pbuf)
 		pci_free_consistent(ioc->pcidev, 4, pbuf, buf_dma);
 
@@ -2753,7 +2831,7 @@
 
 	ret = mptctl_do_fw_download(kfw.iocnum, kfw.bufp, kfw.fwlen);
 
-	mutex_unlock(&iocp->ioctl->ioctl_mutex);
+	mutex_unlock(&iocp->ioctl_cmds.mutex);
 
 	return ret;
 }
@@ -2807,7 +2885,7 @@
 	 */
 	ret = mptctl_do_mpt_command (karg, &uarg->MF);
 
-	mutex_unlock(&iocp->ioctl->ioctl_mutex);
+	mutex_unlock(&iocp->ioctl_cmds.mutex);
 
 	return ret;
 }
@@ -2859,21 +2937,10 @@
 static int
 mptctl_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
-	MPT_IOCTL *mem;
 	MPT_ADAPTER *ioc = pci_get_drvdata(pdev);
 
-	/*
-	 * Allocate and inite a MPT_IOCTL structure
-	*/
-	mem = kzalloc(sizeof(MPT_IOCTL), GFP_KERNEL);
-	if (!mem) {
-		mptctl_remove(pdev);
-		return -ENOMEM;
-	}
-
-	ioc->ioctl = mem;
-	ioc->ioctl->ioc = ioc;
-	mutex_init(&ioc->ioctl->ioctl_mutex);
+	mutex_init(&ioc->ioctl_cmds.mutex);
+	init_completion(&ioc->ioctl_cmds.done);
 	return 0;
 }
 
@@ -2887,9 +2954,6 @@
 static void
 mptctl_remove(struct pci_dev *pdev)
 {
-	MPT_ADAPTER *ioc = pci_get_drvdata(pdev);
-
-	kfree ( ioc->ioctl );
 }
 
 static struct mpt_pci_driver mptctl_driver = {
@@ -2929,6 +2993,7 @@
 		goto out_fail;
 	}
 
+	mptctl_taskmgmt_id = mpt_register(mptctl_taskmgmt_reply, MPTCTL_DRIVER);
 	mpt_reset_register(mptctl_id, mptctl_ioc_reset);
 	mpt_event_register(mptctl_id, mptctl_event_process);
 
@@ -2953,6 +3018,7 @@
 
 	/* De-register callback handler from base module */
 	mpt_deregister(mptctl_id);
+	mpt_reset_deregister(mptctl_taskmgmt_id);
 
         mpt_device_driver_deregister(MPTCTL_DRIVER);
 
diff --git a/drivers/message/fusion/mptdebug.h b/drivers/message/fusion/mptdebug.h
index 510b9f4..28e4788 100644
--- a/drivers/message/fusion/mptdebug.h
+++ b/drivers/message/fusion/mptdebug.h
@@ -58,6 +58,7 @@
 #define MPT_DEBUG_FC			0x00080000
 #define MPT_DEBUG_SAS			0x00100000
 #define MPT_DEBUG_SAS_WIDE		0x00200000
+#define MPT_DEBUG_36GB_MEM              0x00400000
 
 /*
  * CONFIG_FUSION_LOGGING - enabled in Kconfig
@@ -135,6 +136,8 @@
 #define dsaswideprintk(IOC, CMD)		\
 	MPT_CHECK_LOGGING(IOC, CMD, MPT_DEBUG_SAS_WIDE)
 
+#define d36memprintk(IOC, CMD)		\
+	MPT_CHECK_LOGGING(IOC, CMD, MPT_DEBUG_36GB_MEM)
 
 
 /*
diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c
index c3c24fdf..e61df13 100644
--- a/drivers/message/fusion/mptfc.c
+++ b/drivers/message/fusion/mptfc.c
@@ -1251,17 +1251,15 @@
 	 * A slightly different algorithm is required for
 	 * 64bit SGEs.
 	 */
-	scale = ioc->req_sz/(sizeof(dma_addr_t) + sizeof(u32));
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
+	scale = ioc->req_sz/ioc->SGE_size;
+	if (ioc->sg_addr_size == sizeof(u64)) {
 		numSGE = (scale - 1) *
 		  (ioc->facts.MaxChainDepth-1) + scale +
-		  (ioc->req_sz - 60) / (sizeof(dma_addr_t) +
-		  sizeof(u32));
+		  (ioc->req_sz - 60) / ioc->SGE_size;
 	} else {
 		numSGE = 1 + (scale - 1) *
 		  (ioc->facts.MaxChainDepth-1) + scale +
-		  (ioc->req_sz - 64) / (sizeof(dma_addr_t) +
-		  sizeof(u32));
+		  (ioc->req_sz - 64) / ioc->SGE_size;
 	}
 
 	if (numSGE < sh->sg_tablesize) {
@@ -1292,9 +1290,6 @@
 
 	/* Clear the TM flags
 	 */
-	hd->tmPending = 0;
-	hd->tmState = TM_STATE_NONE;
-	hd->resetPending = 0;
 	hd->abortSCpnt = NULL;
 
 	/* Clear the pointer used to store
@@ -1312,8 +1307,6 @@
 	hd->timer.data = (unsigned long) hd;
 	hd->timer.function = mptscsih_timer_expired;
 
-	init_waitqueue_head(&hd->scandv_waitq);
-	hd->scandv_wait_done = 0;
 	hd->last_queue_full = 0;
 
 	sh->transportt = mptfc_transport_template;
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index a9019f0..20e0b44 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -93,8 +93,37 @@
 static u8	mptsasTaskCtx = MPT_MAX_PROTOCOL_DRIVERS;
 static u8	mptsasInternalCtx = MPT_MAX_PROTOCOL_DRIVERS; /* Used only for internal commands */
 static u8	mptsasMgmtCtx = MPT_MAX_PROTOCOL_DRIVERS;
+static u8	mptsasDeviceResetCtx = MPT_MAX_PROTOCOL_DRIVERS;
 
-static void mptsas_hotplug_work(struct work_struct *work);
+static void mptsas_firmware_event_work(struct work_struct *work);
+static void mptsas_send_sas_event(struct fw_event_work *fw_event);
+static void mptsas_send_raid_event(struct fw_event_work *fw_event);
+static void mptsas_send_ir2_event(struct fw_event_work *fw_event);
+static void mptsas_parse_device_info(struct sas_identify *identify,
+		struct mptsas_devinfo *device_info);
+static inline void mptsas_set_rphy(MPT_ADAPTER *ioc,
+		struct mptsas_phyinfo *phy_info, struct sas_rphy *rphy);
+static struct mptsas_phyinfo	*mptsas_find_phyinfo_by_sas_address
+		(MPT_ADAPTER *ioc, u64 sas_address);
+static int mptsas_sas_device_pg0(MPT_ADAPTER *ioc,
+	struct mptsas_devinfo *device_info, u32 form, u32 form_specific);
+static int mptsas_sas_enclosure_pg0(MPT_ADAPTER *ioc,
+	struct mptsas_enclosure *enclosure, u32 form, u32 form_specific);
+static int mptsas_add_end_device(MPT_ADAPTER *ioc,
+	struct mptsas_phyinfo *phy_info);
+static void mptsas_del_end_device(MPT_ADAPTER *ioc,
+	struct mptsas_phyinfo *phy_info);
+static void mptsas_send_link_status_event(struct fw_event_work *fw_event);
+static struct mptsas_portinfo	*mptsas_find_portinfo_by_sas_address
+		(MPT_ADAPTER *ioc, u64 sas_address);
+static void mptsas_expander_delete(MPT_ADAPTER *ioc,
+		struct mptsas_portinfo *port_info, u8 force);
+static void mptsas_send_expander_event(struct fw_event_work *fw_event);
+static void mptsas_not_responding_devices(MPT_ADAPTER *ioc);
+static void mptsas_scan_sas_topology(MPT_ADAPTER *ioc);
+static void mptsas_broadcast_primative_work(struct fw_event_work *fw_event);
+static void mptsas_handle_queue_full_event(struct fw_event_work *fw_event);
+static void mptsas_volume_delete(MPT_ADAPTER *ioc, u8 id);
 
 static void mptsas_print_phy_data(MPT_ADAPTER *ioc,
 					MPI_SAS_IO_UNIT0_PHY_DATA *phy_data)
@@ -218,6 +247,115 @@
 	    le16_to_cpu(pg1->AttachedDevHandle)));
 }
 
+/* inhibit sas firmware event handling */
+static void
+mptsas_fw_event_off(MPT_ADAPTER *ioc)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioc->fw_event_lock, flags);
+	ioc->fw_events_off = 1;
+	ioc->sas_discovery_quiesce_io = 0;
+	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
+
+}
+
+/* enable sas firmware event handling */
+static void
+mptsas_fw_event_on(MPT_ADAPTER *ioc)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioc->fw_event_lock, flags);
+	ioc->fw_events_off = 0;
+	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
+}
+
+/* queue a sas firmware event */
+static void
+mptsas_add_fw_event(MPT_ADAPTER *ioc, struct fw_event_work *fw_event,
+    unsigned long delay)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioc->fw_event_lock, flags);
+	list_add_tail(&fw_event->list, &ioc->fw_event_list);
+	INIT_DELAYED_WORK(&fw_event->work, mptsas_firmware_event_work);
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: add (fw_event=0x%p)\n",
+	    ioc->name, __func__, fw_event));
+	queue_delayed_work(ioc->fw_event_q, &fw_event->work,
+	    delay);
+	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
+}
+
+/* requeue a sas firmware event */
+static void
+mptsas_requeue_fw_event(MPT_ADAPTER *ioc, struct fw_event_work *fw_event,
+    unsigned long delay)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&ioc->fw_event_lock, flags);
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: reschedule task "
+	    "(fw_event=0x%p)\n", ioc->name, __func__, fw_event));
+	fw_event->retries++;
+	queue_delayed_work(ioc->fw_event_q, &fw_event->work,
+	    msecs_to_jiffies(delay));
+	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
+}
+
+/* free memory assoicated to a sas firmware event */
+static void
+mptsas_free_fw_event(MPT_ADAPTER *ioc, struct fw_event_work *fw_event)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioc->fw_event_lock, flags);
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: kfree (fw_event=0x%p)\n",
+	    ioc->name, __func__, fw_event));
+	list_del(&fw_event->list);
+	kfree(fw_event);
+	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
+}
+
+/* walk the firmware event queue, and either stop or wait for
+ * outstanding events to complete */
+static void
+mptsas_cleanup_fw_event_q(MPT_ADAPTER *ioc)
+{
+	struct fw_event_work *fw_event, *next;
+	struct mptsas_target_reset_event *target_reset_list, *n;
+	u8	flush_q;
+	MPT_SCSI_HOST	*hd = shost_priv(ioc->sh);
+
+	/* flush the target_reset_list */
+	if (!list_empty(&hd->target_reset_list)) {
+		list_for_each_entry_safe(target_reset_list, n,
+		    &hd->target_reset_list, list) {
+			dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			    "%s: removing target reset for id=%d\n",
+			    ioc->name, __func__,
+			   target_reset_list->sas_event_data.TargetID));
+			list_del(&target_reset_list->list);
+			kfree(target_reset_list);
+		}
+	}
+
+	if (list_empty(&ioc->fw_event_list) ||
+	     !ioc->fw_event_q || in_interrupt())
+		return;
+
+	flush_q = 0;
+	list_for_each_entry_safe(fw_event, next, &ioc->fw_event_list, list) {
+		if (cancel_delayed_work(&fw_event->work))
+			mptsas_free_fw_event(ioc, fw_event);
+		else
+			flush_q = 1;
+	}
+	if (flush_q)
+		flush_workqueue(ioc->fw_event_q);
+}
+
+
 static inline MPT_ADAPTER *phy_to_ioc(struct sas_phy *phy)
 {
 	struct Scsi_Host *shost = dev_to_shost(phy->dev.parent);
@@ -230,20 +368,6 @@
 	return ((MPT_SCSI_HOST *)shost->hostdata)->ioc;
 }
 
-static struct mptsas_portinfo *
-mptsas_get_hba_portinfo(MPT_ADAPTER *ioc)
-{
-	struct list_head	*head = &ioc->sas_topology;
-	struct mptsas_portinfo	*pi = NULL;
-
-	/* always the first entry on sas_topology list */
-
-	if (!list_empty(head))
-		pi = list_entry(head->next, struct mptsas_portinfo, list);
-
-	return pi;
-}
-
 /*
  * mptsas_find_portinfo_by_handle
  *
@@ -265,6 +389,38 @@
 	return rc;
 }
 
+/**
+ *	mptsas_find_portinfo_by_sas_address -
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@handle:
+ *
+ *	This function should be called with the sas_topology_mutex already held
+ *
+ **/
+static struct mptsas_portinfo *
+mptsas_find_portinfo_by_sas_address(MPT_ADAPTER *ioc, u64 sas_address)
+{
+	struct mptsas_portinfo *port_info, *rc = NULL;
+	int i;
+
+	if (sas_address >= ioc->hba_port_sas_addr &&
+	    sas_address < (ioc->hba_port_sas_addr +
+	    ioc->hba_port_num_phy))
+		return ioc->hba_port_info;
+
+	mutex_lock(&ioc->sas_topology_mutex);
+	list_for_each_entry(port_info, &ioc->sas_topology, list)
+		for (i = 0; i < port_info->num_phys; i++)
+			if (port_info->phy_info[i].identify.sas_address ==
+			    sas_address) {
+				rc = port_info;
+				goto out;
+			}
+ out:
+	mutex_unlock(&ioc->sas_topology_mutex);
+	return rc;
+}
+
 /*
  * Returns true if there is a scsi end device
  */
@@ -308,6 +464,7 @@
 		if(phy_info->port_details != port_details)
 			continue;
 		memset(&phy_info->attached, 0, sizeof(struct mptsas_devinfo));
+		mptsas_set_rphy(ioc, phy_info, NULL);
 		phy_info->port_details = NULL;
 	}
 	kfree(port_details);
@@ -379,6 +536,285 @@
 		phy_info->port_details->starget = starget;
 }
 
+/**
+ *	mptsas_add_device_component -
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@channel: fw mapped id's
+ *	@id:
+ *	@sas_address:
+ *	@device_info:
+ *
+ **/
+static void
+mptsas_add_device_component(MPT_ADAPTER *ioc, u8 channel, u8 id,
+	u64 sas_address, u32 device_info, u16 slot, u64 enclosure_logical_id)
+{
+	struct mptsas_device_info	*sas_info, *next;
+	struct scsi_device	*sdev;
+	struct scsi_target	*starget;
+	struct sas_rphy	*rphy;
+
+	/*
+	 * Delete all matching devices out of the list
+	 */
+	mutex_lock(&ioc->sas_device_info_mutex);
+	list_for_each_entry_safe(sas_info, next, &ioc->sas_device_info_list,
+	    list) {
+		if (!sas_info->is_logical_volume &&
+		    (sas_info->sas_address == sas_address ||
+		    (sas_info->fw.channel == channel &&
+		     sas_info->fw.id == id))) {
+			list_del(&sas_info->list);
+			kfree(sas_info);
+		}
+	}
+
+	sas_info = kzalloc(sizeof(struct mptsas_device_info), GFP_KERNEL);
+	if (!sas_info)
+		goto out;
+
+	/*
+	 * Set Firmware mapping
+	 */
+	sas_info->fw.id = id;
+	sas_info->fw.channel = channel;
+
+	sas_info->sas_address = sas_address;
+	sas_info->device_info = device_info;
+	sas_info->slot = slot;
+	sas_info->enclosure_logical_id = enclosure_logical_id;
+	INIT_LIST_HEAD(&sas_info->list);
+	list_add_tail(&sas_info->list, &ioc->sas_device_info_list);
+
+	/*
+	 * Set OS mapping
+	 */
+	shost_for_each_device(sdev, ioc->sh) {
+		starget = scsi_target(sdev);
+		rphy = dev_to_rphy(starget->dev.parent);
+		if (rphy->identify.sas_address == sas_address) {
+			sas_info->os.id = starget->id;
+			sas_info->os.channel = starget->channel;
+		}
+	}
+
+ out:
+	mutex_unlock(&ioc->sas_device_info_mutex);
+	return;
+}
+
+/**
+ *	mptsas_add_device_component_by_fw -
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@channel:  fw mapped id's
+ *	@id:
+ *
+ **/
+static void
+mptsas_add_device_component_by_fw(MPT_ADAPTER *ioc, u8 channel, u8 id)
+{
+	struct mptsas_devinfo sas_device;
+	struct mptsas_enclosure enclosure_info;
+	int rc;
+
+	rc = mptsas_sas_device_pg0(ioc, &sas_device,
+	    (MPI_SAS_DEVICE_PGAD_FORM_BUS_TARGET_ID <<
+	     MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+	    (channel << 8) + id);
+	if (rc)
+		return;
+
+	memset(&enclosure_info, 0, sizeof(struct mptsas_enclosure));
+	mptsas_sas_enclosure_pg0(ioc, &enclosure_info,
+	    (MPI_SAS_ENCLOS_PGAD_FORM_HANDLE <<
+	     MPI_SAS_ENCLOS_PGAD_FORM_SHIFT),
+	     sas_device.handle_enclosure);
+
+	mptsas_add_device_component(ioc, sas_device.channel,
+	    sas_device.id, sas_device.sas_address, sas_device.device_info,
+	    sas_device.slot, enclosure_info.enclosure_logical_id);
+}
+
+/**
+ *	mptsas_add_device_component_starget_ir - Handle Integrated RAID, adding each individual device to list
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@channel: fw mapped id's
+ *	@id:
+ *
+ **/
+static void
+mptsas_add_device_component_starget_ir(MPT_ADAPTER *ioc,
+		struct scsi_target *starget)
+{
+	CONFIGPARMS			cfg;
+	ConfigPageHeader_t		hdr;
+	dma_addr_t			dma_handle;
+	pRaidVolumePage0_t		buffer = NULL;
+	int				i;
+	RaidPhysDiskPage0_t 		phys_disk;
+	struct mptsas_device_info	*sas_info, *next;
+
+	memset(&cfg, 0 , sizeof(CONFIGPARMS));
+	memset(&hdr, 0 , sizeof(ConfigPageHeader_t));
+	hdr.PageType = MPI_CONFIG_PAGETYPE_RAID_VOLUME;
+	/* assumption that all volumes on channel = 0 */
+	cfg.pageAddr = starget->id;
+	cfg.cfghdr.hdr = &hdr;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER;
+	cfg.timeout = 10;
+
+	if (mpt_config(ioc, &cfg) != 0)
+		goto out;
+
+	if (!hdr.PageLength)
+		goto out;
+
+	buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
+	    &dma_handle);
+
+	if (!buffer)
+		goto out;
+
+	cfg.physAddr = dma_handle;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
+
+	if (mpt_config(ioc, &cfg) != 0)
+		goto out;
+
+	if (!buffer->NumPhysDisks)
+		goto out;
+
+	/*
+	 * Adding entry for hidden components
+	 */
+	for (i = 0; i < buffer->NumPhysDisks; i++) {
+
+		if (mpt_raid_phys_disk_pg0(ioc,
+		    buffer->PhysDisk[i].PhysDiskNum, &phys_disk) != 0)
+			continue;
+
+		mptsas_add_device_component_by_fw(ioc, phys_disk.PhysDiskBus,
+		    phys_disk.PhysDiskID);
+
+		mutex_lock(&ioc->sas_device_info_mutex);
+		list_for_each_entry(sas_info, &ioc->sas_device_info_list,
+		    list) {
+			if (!sas_info->is_logical_volume &&
+			    (sas_info->fw.channel == phys_disk.PhysDiskBus &&
+			    sas_info->fw.id == phys_disk.PhysDiskID)) {
+				sas_info->is_hidden_raid_component = 1;
+				sas_info->volume_id = starget->id;
+			}
+		}
+		mutex_unlock(&ioc->sas_device_info_mutex);
+
+	}
+
+	/*
+	 * Delete all matching devices out of the list
+	 */
+	mutex_lock(&ioc->sas_device_info_mutex);
+	list_for_each_entry_safe(sas_info, next, &ioc->sas_device_info_list,
+	    list) {
+		if (sas_info->is_logical_volume && sas_info->fw.id ==
+		    starget->id) {
+			list_del(&sas_info->list);
+			kfree(sas_info);
+		}
+	}
+
+	sas_info = kzalloc(sizeof(struct mptsas_device_info), GFP_KERNEL);
+	if (sas_info) {
+		sas_info->fw.id = starget->id;
+		sas_info->os.id = starget->id;
+		sas_info->os.channel = starget->channel;
+		sas_info->is_logical_volume = 1;
+		INIT_LIST_HEAD(&sas_info->list);
+		list_add_tail(&sas_info->list, &ioc->sas_device_info_list);
+	}
+	mutex_unlock(&ioc->sas_device_info_mutex);
+
+ out:
+	if (buffer)
+		pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
+		    dma_handle);
+}
+
+/**
+ *	mptsas_add_device_component_starget -
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@starget:
+ *
+ **/
+static void
+mptsas_add_device_component_starget(MPT_ADAPTER *ioc,
+	struct scsi_target *starget)
+{
+	VirtTarget	*vtarget;
+	struct sas_rphy	*rphy;
+	struct mptsas_phyinfo	*phy_info = NULL;
+	struct mptsas_enclosure	enclosure_info;
+
+	rphy = dev_to_rphy(starget->dev.parent);
+	vtarget = starget->hostdata;
+	phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
+			rphy->identify.sas_address);
+	if (!phy_info)
+		return;
+
+	memset(&enclosure_info, 0, sizeof(struct mptsas_enclosure));
+	mptsas_sas_enclosure_pg0(ioc, &enclosure_info,
+		(MPI_SAS_ENCLOS_PGAD_FORM_HANDLE <<
+		MPI_SAS_ENCLOS_PGAD_FORM_SHIFT),
+		phy_info->attached.handle_enclosure);
+
+	mptsas_add_device_component(ioc, phy_info->attached.channel,
+		phy_info->attached.id, phy_info->attached.sas_address,
+		phy_info->attached.device_info,
+		phy_info->attached.slot, enclosure_info.enclosure_logical_id);
+}
+
+/**
+ *	mptsas_del_device_component_by_os - Once a device has been removed, we mark the entry in the list as being cached
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@channel: os mapped id's
+ *	@id:
+ *
+ **/
+static void
+mptsas_del_device_component_by_os(MPT_ADAPTER *ioc, u8 channel, u8 id)
+{
+	struct mptsas_device_info	*sas_info, *next;
+
+	/*
+	 * Set is_cached flag
+	 */
+	list_for_each_entry_safe(sas_info, next, &ioc->sas_device_info_list,
+		list) {
+		if (sas_info->os.channel == channel && sas_info->os.id == id)
+			sas_info->is_cached = 1;
+	}
+}
+
+/**
+ *	mptsas_del_device_components - Cleaning the list
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *
+ **/
+static void
+mptsas_del_device_components(MPT_ADAPTER *ioc)
+{
+	struct mptsas_device_info	*sas_info, *next;
+
+	mutex_lock(&ioc->sas_device_info_mutex);
+	list_for_each_entry_safe(sas_info, next, &ioc->sas_device_info_list,
+		list) {
+		list_del(&sas_info->list);
+		kfree(sas_info);
+	}
+	mutex_unlock(&ioc->sas_device_info_mutex);
+}
+
 
 /*
  * mptsas_setup_wide_ports
@@ -434,8 +870,8 @@
 		 * Forming a port
 		 */
 		if (!port_details) {
-			port_details = kzalloc(sizeof(*port_details),
-				GFP_KERNEL);
+			port_details = kzalloc(sizeof(struct
+				mptsas_portinfo_details), GFP_KERNEL);
 			if (!port_details)
 				goto out;
 			port_details->num_phys = 1;
@@ -523,15 +959,62 @@
 	VirtTarget 			*vtarget = NULL;
 
 	shost_for_each_device(sdev, ioc->sh) {
-		if ((vdevice = sdev->hostdata) == NULL)
+		vdevice = sdev->hostdata;
+		if ((vdevice == NULL) ||
+			(vdevice->vtarget == NULL))
+			continue;
+		if ((vdevice->vtarget->tflags &
+		    MPT_TARGET_FLAGS_RAID_COMPONENT ||
+		    vdevice->vtarget->raidVolume))
 			continue;
 		if (vdevice->vtarget->id == id &&
-		    vdevice->vtarget->channel == channel)
+			vdevice->vtarget->channel == channel)
 			vtarget = vdevice->vtarget;
 	}
 	return vtarget;
 }
 
+static void
+mptsas_queue_device_delete(MPT_ADAPTER *ioc,
+	MpiEventDataSasDeviceStatusChange_t *sas_event_data)
+{
+	struct fw_event_work *fw_event;
+	int sz;
+
+	sz = offsetof(struct fw_event_work, event_data) +
+	    sizeof(MpiEventDataSasDeviceStatusChange_t);
+	fw_event = kzalloc(sz, GFP_ATOMIC);
+	if (!fw_event) {
+		printk(MYIOC_s_WARN_FMT "%s: failed at (line=%d)\n",
+		    ioc->name, __func__, __LINE__);
+		return;
+	}
+	memcpy(fw_event->event_data, sas_event_data,
+	    sizeof(MpiEventDataSasDeviceStatusChange_t));
+	fw_event->event = MPI_EVENT_SAS_DEVICE_STATUS_CHANGE;
+	fw_event->ioc = ioc;
+	mptsas_add_fw_event(ioc, fw_event, msecs_to_jiffies(1));
+}
+
+static void
+mptsas_queue_rescan(MPT_ADAPTER *ioc)
+{
+	struct fw_event_work *fw_event;
+	int sz;
+
+	sz = offsetof(struct fw_event_work, event_data);
+	fw_event = kzalloc(sz, GFP_ATOMIC);
+	if (!fw_event) {
+		printk(MYIOC_s_WARN_FMT "%s: failed at (line=%d)\n",
+		    ioc->name, __func__, __LINE__);
+		return;
+	}
+	fw_event->event = -1;
+	fw_event->ioc = ioc;
+	mptsas_add_fw_event(ioc, fw_event, msecs_to_jiffies(1));
+}
+
+
 /**
  * mptsas_target_reset
  *
@@ -550,13 +1033,21 @@
 {
 	MPT_FRAME_HDR	*mf;
 	SCSITaskMgmt_t	*pScsiTm;
-
-	if ((mf = mpt_get_msg_frame(ioc->TaskCtx, ioc)) == NULL) {
-		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT "%s, no msg frames @%d!!\n",
-		    ioc->name,__func__, __LINE__));
+	if (mpt_set_taskmgmt_in_progress_flag(ioc) != 0)
 		return 0;
+
+
+	mf = mpt_get_msg_frame(mptsasDeviceResetCtx, ioc);
+	if (mf == NULL) {
+		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT
+			"%s, no msg frames @%d!!\n", ioc->name,
+			__func__, __LINE__));
+		goto out_fail;
 	}
 
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt request (mf=%p)\n",
+		ioc->name, mf));
+
 	/* Format the Request
 	 */
 	pScsiTm = (SCSITaskMgmt_t *) mf;
@@ -569,9 +1060,18 @@
 
 	DBG_DUMP_TM_REQUEST_FRAME(ioc, (u32 *)mf);
 
-	mpt_put_msg_frame_hi_pri(ioc->TaskCtx, ioc, mf);
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	   "TaskMgmt type=%d (sas device delete) fw_channel = %d fw_id = %d)\n",
+	   ioc->name, MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET, channel, id));
+
+	mpt_put_msg_frame_hi_pri(mptsasDeviceResetCtx, ioc, mf);
 
 	return 1;
+
+ out_fail:
+
+	mpt_clear_taskmgmt_in_progress_flag(ioc);
+	return 0;
 }
 
 /**
@@ -602,11 +1102,12 @@
 
 	vtarget->deleted = 1; /* block IO */
 
-	target_reset_list = kzalloc(sizeof(*target_reset_list),
+	target_reset_list = kzalloc(sizeof(struct mptsas_target_reset_event),
 	    GFP_ATOMIC);
 	if (!target_reset_list) {
-		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT "%s, failed to allocate mem @%d..!!\n",
-		    ioc->name,__func__, __LINE__));
+		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT
+			"%s, failed to allocate mem @%d..!!\n",
+			ioc->name, __func__, __LINE__));
 		return;
 	}
 
@@ -614,84 +1115,101 @@
 		sizeof(*sas_event_data));
 	list_add_tail(&target_reset_list->list, &hd->target_reset_list);
 
-	if (hd->resetPending)
-		return;
+	target_reset_list->time_count = jiffies;
 
 	if (mptsas_target_reset(ioc, channel, id)) {
 		target_reset_list->target_reset_issued = 1;
-		hd->resetPending = 1;
 	}
 }
 
 /**
- * mptsas_dev_reset_complete
+ *	mptsas_taskmgmt_complete - complete SAS task management function
+ *	@ioc: Pointer to MPT_ADAPTER structure
  *
- * Completion for TARGET_RESET after NOT_RESPONDING_EVENT,
- * enable work queue to finish off removing device from upper layers.
- * then send next TARGET_RESET in the queue.
- *
- * @ioc
- *
+ *	Completion for TARGET_RESET after NOT_RESPONDING_EVENT, enable work
+ *	queue to finish off removing device from upper layers. then send next
+ *	TARGET_RESET in the queue.
  **/
-static void
-mptsas_dev_reset_complete(MPT_ADAPTER *ioc)
+static int
+mptsas_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
 {
 	MPT_SCSI_HOST	*hd = shost_priv(ioc->sh);
         struct list_head *head = &hd->target_reset_list;
-	struct mptsas_target_reset_event *target_reset_list;
-	struct mptsas_hotplug_event *ev;
-	EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *sas_event_data;
 	u8		id, channel;
-	__le64		sas_address;
+	struct mptsas_target_reset_event	*target_reset_list;
+	SCSITaskMgmtReply_t *pScsiTmReply;
+
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt completed: "
+	    "(mf = %p, mr = %p)\n", ioc->name, mf, mr));
+
+	pScsiTmReply = (SCSITaskMgmtReply_t *)mr;
+	if (pScsiTmReply) {
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "\tTaskMgmt completed: fw_channel = %d, fw_id = %d,\n"
+		    "\ttask_type = 0x%02X, iocstatus = 0x%04X "
+		    "loginfo = 0x%08X,\n\tresponse_code = 0x%02X, "
+		    "term_cmnds = %d\n", ioc->name,
+		    pScsiTmReply->Bus, pScsiTmReply->TargetID,
+		    pScsiTmReply->TaskType,
+		    le16_to_cpu(pScsiTmReply->IOCStatus),
+		    le32_to_cpu(pScsiTmReply->IOCLogInfo),
+		    pScsiTmReply->ResponseCode,
+		    le32_to_cpu(pScsiTmReply->TerminationCount)));
+
+		if (pScsiTmReply->ResponseCode)
+			mptscsih_taskmgmt_response_code(ioc,
+			pScsiTmReply->ResponseCode);
+	}
+
+	if (pScsiTmReply && (pScsiTmReply->TaskType ==
+	    MPI_SCSITASKMGMT_TASKTYPE_QUERY_TASK || pScsiTmReply->TaskType ==
+	     MPI_SCSITASKMGMT_TASKTYPE_ABRT_TASK_SET)) {
+		ioc->taskmgmt_cmds.status |= MPT_MGMT_STATUS_COMMAND_GOOD;
+		ioc->taskmgmt_cmds.status |= MPT_MGMT_STATUS_RF_VALID;
+		memcpy(ioc->taskmgmt_cmds.reply, mr,
+		    min(MPT_DEFAULT_FRAME_SIZE, 4 * mr->u.reply.MsgLength));
+		if (ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_PENDING) {
+			ioc->taskmgmt_cmds.status &= ~MPT_MGMT_STATUS_PENDING;
+			complete(&ioc->taskmgmt_cmds.done);
+			return 1;
+		}
+		return 0;
+	}
+
+	mpt_clear_taskmgmt_in_progress_flag(ioc);
 
 	if (list_empty(head))
-		return;
+		return 1;
 
-	target_reset_list = list_entry(head->next, struct mptsas_target_reset_event, list);
+	target_reset_list = list_entry(head->next,
+	    struct mptsas_target_reset_event, list);
 
-	sas_event_data = &target_reset_list->sas_event_data;
-	id = sas_event_data->TargetID;
-	channel = sas_event_data->Bus;
-	hd->resetPending = 0;
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "TaskMgmt: completed (%d seconds)\n",
+	    ioc->name, jiffies_to_msecs(jiffies -
+	    target_reset_list->time_count)/1000));
+
+	id = pScsiTmReply->TargetID;
+	channel = pScsiTmReply->Bus;
+	target_reset_list->time_count = jiffies;
 
 	/*
 	 * retry target reset
 	 */
 	if (!target_reset_list->target_reset_issued) {
-		if (mptsas_target_reset(ioc, channel, id)) {
+		if (mptsas_target_reset(ioc, channel, id))
 			target_reset_list->target_reset_issued = 1;
-			hd->resetPending = 1;
-		}
-		return;
+		return 1;
 	}
 
 	/*
 	 * enable work queue to remove device from upper layers
 	 */
 	list_del(&target_reset_list->list);
+	if ((mptsas_find_vtarget(ioc, channel, id)) && !ioc->fw_events_off)
+		mptsas_queue_device_delete(ioc,
+			&target_reset_list->sas_event_data);
 
-	ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
-	if (!ev) {
-		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT "%s, failed to allocate mem @%d..!!\n",
-		    ioc->name,__func__, __LINE__));
-		return;
-	}
-
-	INIT_WORK(&ev->work, mptsas_hotplug_work);
-	ev->ioc = ioc;
-	ev->handle = le16_to_cpu(sas_event_data->DevHandle);
-	ev->parent_handle =
-	    le16_to_cpu(sas_event_data->ParentDevHandle);
-	ev->channel = channel;
-	ev->id =id;
-	ev->phy_id = sas_event_data->PhyNum;
-	memcpy(&sas_address, &sas_event_data->SASAddress,
-	    sizeof(__le64));
-	ev->sas_address = le64_to_cpu(sas_address);
-	ev->device_info = le32_to_cpu(sas_event_data->DeviceInfo);
-	ev->event_type = MPTSAS_DEL_DEVICE;
-	schedule_work(&ev->work);
-	kfree(target_reset_list);
 
 	/*
 	 * issue target reset to next device in the queue
@@ -699,34 +1217,19 @@
 
 	head = &hd->target_reset_list;
 	if (list_empty(head))
-		return;
+		return 1;
 
 	target_reset_list = list_entry(head->next, struct mptsas_target_reset_event,
 	    list);
 
-	sas_event_data = &target_reset_list->sas_event_data;
-	id = sas_event_data->TargetID;
-	channel = sas_event_data->Bus;
+	id = target_reset_list->sas_event_data.TargetID;
+	channel = target_reset_list->sas_event_data.Bus;
+	target_reset_list->time_count = jiffies;
 
-	if (mptsas_target_reset(ioc, channel, id)) {
+	if (mptsas_target_reset(ioc, channel, id))
 		target_reset_list->target_reset_issued = 1;
-		hd->resetPending = 1;
-	}
-}
 
-/**
- * mptsas_taskmgmt_complete
- *
- * @ioc
- * @mf
- * @mr
- *
- **/
-static int
-mptsas_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
-{
-	mptsas_dev_reset_complete(ioc);
-	return mptscsih_taskmgmt_complete(ioc, mf, mr);
+	return 1;
 }
 
 /**
@@ -740,37 +1243,59 @@
 mptsas_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 {
 	MPT_SCSI_HOST	*hd;
-	struct mptsas_target_reset_event *target_reset_list, *n;
 	int rc;
 
 	rc = mptscsih_ioc_reset(ioc, reset_phase);
+	if ((ioc->bus_type != SAS) || (!rc))
+		return rc;
 
-	if (ioc->bus_type != SAS)
-		goto out;
-
-	if (reset_phase != MPT_IOC_POST_RESET)
-		goto out;
-
-	if (!ioc->sh || !ioc->sh->hostdata)
-		goto out;
 	hd = shost_priv(ioc->sh);
 	if (!hd->ioc)
 		goto out;
 
-	if (list_empty(&hd->target_reset_list))
-		goto out;
-
-	/* flush the target_reset_list */
-	list_for_each_entry_safe(target_reset_list, n,
-	    &hd->target_reset_list, list) {
-		list_del(&target_reset_list->list);
-		kfree(target_reset_list);
+	switch (reset_phase) {
+	case MPT_IOC_SETUP_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_SETUP_RESET\n", ioc->name, __func__));
+		mptsas_fw_event_off(ioc);
+		break;
+	case MPT_IOC_PRE_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_PRE_RESET\n", ioc->name, __func__));
+		break;
+	case MPT_IOC_POST_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_POST_RESET\n", ioc->name, __func__));
+		if (ioc->sas_mgmt.status & MPT_MGMT_STATUS_PENDING) {
+			ioc->sas_mgmt.status |= MPT_MGMT_STATUS_DID_IOCRESET;
+			complete(&ioc->sas_mgmt.done);
+		}
+		mptsas_cleanup_fw_event_q(ioc);
+		mptsas_queue_rescan(ioc);
+		mptsas_fw_event_on(ioc);
+		break;
+	default:
+		break;
 	}
 
  out:
 	return rc;
 }
 
+
+/**
+ * enum device_state -
+ * @DEVICE_RETRY: need to retry the TUR
+ * @DEVICE_ERROR: TUR return error, don't add device
+ * @DEVICE_READY: device can be added
+ *
+ */
+enum device_state{
+	DEVICE_RETRY,
+	DEVICE_ERROR,
+	DEVICE_READY,
+};
+
 static int
 mptsas_sas_enclosure_pg0(MPT_ADAPTER *ioc, struct mptsas_enclosure *enclosure,
 		u32 form, u32 form_specific)
@@ -836,15 +1361,308 @@
 	return error;
 }
 
+/**
+ *	mptsas_add_end_device - report a new end device to sas transport layer
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@phy_info: decribes attached device
+ *
+ *	return (0) success (1) failure
+ *
+ **/
+static int
+mptsas_add_end_device(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info)
+{
+	struct sas_rphy *rphy;
+	struct sas_port *port;
+	struct sas_identify identify;
+	char *ds = NULL;
+	u8 fw_id;
+
+	if (!phy_info) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: exit at line=%d\n", ioc->name,
+			 __func__, __LINE__));
+		return 1;
+	}
+
+	fw_id = phy_info->attached.id;
+
+	if (mptsas_get_rphy(phy_info)) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, fw_id, __LINE__));
+		return 2;
+	}
+
+	port = mptsas_get_port(phy_info);
+	if (!port) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, fw_id, __LINE__));
+		return 3;
+	}
+
+	if (phy_info->attached.device_info &
+	    MPI_SAS_DEVICE_INFO_SSP_TARGET)
+		ds = "ssp";
+	if (phy_info->attached.device_info &
+	    MPI_SAS_DEVICE_INFO_STP_TARGET)
+		ds = "stp";
+	if (phy_info->attached.device_info &
+	    MPI_SAS_DEVICE_INFO_SATA_DEVICE)
+		ds = "sata";
+
+	printk(MYIOC_s_INFO_FMT "attaching %s device: fw_channel %d, fw_id %d,"
+	    " phy %d, sas_addr 0x%llx\n", ioc->name, ds,
+	    phy_info->attached.channel, phy_info->attached.id,
+	    phy_info->attached.phy_id, (unsigned long long)
+	    phy_info->attached.sas_address);
+
+	mptsas_parse_device_info(&identify, &phy_info->attached);
+	rphy = sas_end_device_alloc(port);
+	if (!rphy) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, fw_id, __LINE__));
+		return 5; /* non-fatal: an rphy can be added later */
+	}
+
+	rphy->identify = identify;
+	if (sas_rphy_add(rphy)) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, fw_id, __LINE__));
+		sas_rphy_free(rphy);
+		return 6;
+	}
+	mptsas_set_rphy(ioc, phy_info, rphy);
+	return 0;
+}
+
+/**
+ *	mptsas_del_end_device - report a deleted end device to sas transport layer
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@phy_info: decribes attached device
+ *
+ **/
+static void
+mptsas_del_end_device(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info)
+{
+	struct sas_rphy *rphy;
+	struct sas_port *port;
+	struct mptsas_portinfo *port_info;
+	struct mptsas_phyinfo *phy_info_parent;
+	int i;
+	char *ds = NULL;
+	u8 fw_id;
+	u64 sas_address;
+
+	if (!phy_info)
+		return;
+
+	fw_id = phy_info->attached.id;
+	sas_address = phy_info->attached.sas_address;
+
+	if (!phy_info->port_details) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, fw_id, __LINE__));
+		return;
+	}
+	rphy = mptsas_get_rphy(phy_info);
+	if (!rphy) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, fw_id, __LINE__));
+		return;
+	}
+
+	if (phy_info->attached.device_info & MPI_SAS_DEVICE_INFO_SSP_INITIATOR
+		|| phy_info->attached.device_info
+			& MPI_SAS_DEVICE_INFO_SMP_INITIATOR
+		|| phy_info->attached.device_info
+			& MPI_SAS_DEVICE_INFO_STP_INITIATOR)
+		ds = "initiator";
+	if (phy_info->attached.device_info &
+	    MPI_SAS_DEVICE_INFO_SSP_TARGET)
+		ds = "ssp";
+	if (phy_info->attached.device_info &
+	    MPI_SAS_DEVICE_INFO_STP_TARGET)
+		ds = "stp";
+	if (phy_info->attached.device_info &
+	    MPI_SAS_DEVICE_INFO_SATA_DEVICE)
+		ds = "sata";
+
+	dev_printk(KERN_DEBUG, &rphy->dev, MYIOC_s_FMT
+	    "removing %s device: fw_channel %d, fw_id %d, phy %d,"
+	    "sas_addr 0x%llx\n", ioc->name, ds, phy_info->attached.channel,
+	    phy_info->attached.id, phy_info->attached.phy_id,
+	    (unsigned long long) sas_address);
+
+	port = mptsas_get_port(phy_info);
+	if (!port) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, fw_id, __LINE__));
+		return;
+	}
+	port_info = phy_info->portinfo;
+	phy_info_parent = port_info->phy_info;
+	for (i = 0; i < port_info->num_phys; i++, phy_info_parent++) {
+		if (!phy_info_parent->phy)
+			continue;
+		if (phy_info_parent->attached.sas_address !=
+		    sas_address)
+			continue;
+		dev_printk(KERN_DEBUG, &phy_info_parent->phy->dev,
+		    MYIOC_s_FMT "delete phy %d, phy-obj (0x%p)\n",
+		    ioc->name, phy_info_parent->phy_id,
+		    phy_info_parent->phy);
+		sas_port_delete_phy(port, phy_info_parent->phy);
+	}
+
+	dev_printk(KERN_DEBUG, &port->dev, MYIOC_s_FMT
+	    "delete port %d, sas_addr (0x%llx)\n", ioc->name,
+	     port->port_identifier, (unsigned long long)sas_address);
+	sas_port_delete(port);
+	mptsas_set_port(ioc, phy_info, NULL);
+	mptsas_port_delete(ioc, phy_info->port_details);
+}
+
+struct mptsas_phyinfo *
+mptsas_refreshing_device_handles(MPT_ADAPTER *ioc,
+	struct mptsas_devinfo *sas_device)
+{
+	struct mptsas_phyinfo *phy_info;
+	struct mptsas_portinfo *port_info;
+	int i;
+
+	phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
+	    sas_device->sas_address);
+	if (!phy_info)
+		goto out;
+	port_info = phy_info->portinfo;
+	if (!port_info)
+		goto out;
+	mutex_lock(&ioc->sas_topology_mutex);
+	for (i = 0; i < port_info->num_phys; i++) {
+		if (port_info->phy_info[i].attached.sas_address !=
+			sas_device->sas_address)
+			continue;
+		port_info->phy_info[i].attached.channel = sas_device->channel;
+		port_info->phy_info[i].attached.id = sas_device->id;
+		port_info->phy_info[i].attached.sas_address =
+		    sas_device->sas_address;
+		port_info->phy_info[i].attached.handle = sas_device->handle;
+		port_info->phy_info[i].attached.handle_parent =
+		    sas_device->handle_parent;
+		port_info->phy_info[i].attached.handle_enclosure =
+		    sas_device->handle_enclosure;
+	}
+	mutex_unlock(&ioc->sas_topology_mutex);
+ out:
+	return phy_info;
+}
+
+/**
+ * mptsas_firmware_event_work - work thread for processing fw events
+ * @work: work queue payload containing info describing the event
+ * Context: user
+ *
+ */
+static void
+mptsas_firmware_event_work(struct work_struct *work)
+{
+	struct fw_event_work *fw_event =
+		container_of(work, struct fw_event_work, work.work);
+	MPT_ADAPTER *ioc = fw_event->ioc;
+
+	/* special rescan topology handling */
+	if (fw_event->event == -1) {
+		if (ioc->in_rescan) {
+			devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+				"%s: rescan ignored as it is in progress\n",
+				ioc->name, __func__));
+			return;
+		}
+		devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: rescan after "
+		    "reset\n", ioc->name, __func__));
+		ioc->in_rescan = 1;
+		mptsas_not_responding_devices(ioc);
+		mptsas_scan_sas_topology(ioc);
+		ioc->in_rescan = 0;
+		mptsas_free_fw_event(ioc, fw_event);
+		return;
+	}
+
+	/* events handling turned off during host reset */
+	if (ioc->fw_events_off) {
+		mptsas_free_fw_event(ioc, fw_event);
+		return;
+	}
+
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: fw_event=(0x%p), "
+	    "event = (0x%02x)\n", ioc->name, __func__, fw_event,
+	    (fw_event->event & 0xFF)));
+
+	switch (fw_event->event) {
+	case MPI_EVENT_SAS_DEVICE_STATUS_CHANGE:
+		mptsas_send_sas_event(fw_event);
+		break;
+	case MPI_EVENT_INTEGRATED_RAID:
+		mptsas_send_raid_event(fw_event);
+		break;
+	case MPI_EVENT_IR2:
+		mptsas_send_ir2_event(fw_event);
+		break;
+	case MPI_EVENT_PERSISTENT_TABLE_FULL:
+		mptbase_sas_persist_operation(ioc,
+		    MPI_SAS_OP_CLEAR_NOT_PRESENT);
+		mptsas_free_fw_event(ioc, fw_event);
+		break;
+	case MPI_EVENT_SAS_BROADCAST_PRIMITIVE:
+		mptsas_broadcast_primative_work(fw_event);
+		break;
+	case MPI_EVENT_SAS_EXPANDER_STATUS_CHANGE:
+		mptsas_send_expander_event(fw_event);
+		break;
+	case MPI_EVENT_SAS_PHY_LINK_STATUS:
+		mptsas_send_link_status_event(fw_event);
+		break;
+	case MPI_EVENT_QUEUE_FULL:
+		mptsas_handle_queue_full_event(fw_event);
+		break;
+	}
+}
+
+
+
 static int
 mptsas_slave_configure(struct scsi_device *sdev)
 {
+	struct Scsi_Host	*host = sdev->host;
+	MPT_SCSI_HOST	*hd = shost_priv(host);
+	MPT_ADAPTER	*ioc = hd->ioc;
+	VirtDevice	*vdevice = sdev->hostdata;
 
-	if (sdev->channel == MPTSAS_RAID_CHANNEL)
+	if (vdevice->vtarget->deleted) {
+		sdev_printk(KERN_INFO, sdev, "clearing deleted flag\n");
+		vdevice->vtarget->deleted = 0;
+	}
+
+	/*
+	 * RAID volumes placed beyond the last expected port.
+	 * Ignore sending sas mode pages in that case..
+	 */
+	if (sdev->channel == MPTSAS_RAID_CHANNEL) {
+		mptsas_add_device_component_starget_ir(ioc, scsi_target(sdev));
 		goto out;
+	}
 
 	sas_read_port_mode_page(sdev);
 
+	mptsas_add_device_component_starget(ioc, scsi_target(sdev));
+
  out:
 	return mptscsih_slave_configure(sdev);
 }
@@ -875,9 +1693,18 @@
 	 * RAID volumes placed beyond the last expected port.
 	 */
 	if (starget->channel == MPTSAS_RAID_CHANNEL) {
-		for (i=0; i < ioc->raid_data.pIocPg2->NumActiveVolumes; i++)
-			if (id == ioc->raid_data.pIocPg2->RaidVolume[i].VolumeID)
-				channel = ioc->raid_data.pIocPg2->RaidVolume[i].VolumeBus;
+		if (!ioc->raid_data.pIocPg2) {
+			kfree(vtarget);
+			return -ENXIO;
+		}
+		for (i = 0; i < ioc->raid_data.pIocPg2->NumActiveVolumes; i++) {
+			if (id == ioc->raid_data.pIocPg2->
+					RaidVolume[i].VolumeID) {
+				channel = ioc->raid_data.pIocPg2->
+					RaidVolume[i].VolumeBus;
+			}
+		}
+		vtarget->raidVolume = 1;
 		goto out;
 	}
 
@@ -926,11 +1753,18 @@
 	struct sas_rphy		*rphy;
 	struct mptsas_portinfo	*p;
 	int 			 i;
-	MPT_ADAPTER *ioc = hd->ioc;
+	MPT_ADAPTER	*ioc = hd->ioc;
+	VirtTarget	*vtarget;
 
 	if (!starget->hostdata)
 		return;
 
+	vtarget = starget->hostdata;
+
+	mptsas_del_device_component_by_os(ioc, starget->channel,
+	    starget->id);
+
+
 	if (starget->channel == MPTSAS_RAID_CHANNEL)
 		goto out;
 
@@ -940,12 +1774,21 @@
 			if (p->phy_info[i].attached.sas_address !=
 					rphy->identify.sas_address)
 				continue;
+
+			starget_printk(KERN_INFO, starget, MYIOC_s_FMT
+			"delete device: fw_channel %d, fw_id %d, phy %d, "
+			"sas_addr 0x%llx\n", ioc->name,
+			p->phy_info[i].attached.channel,
+			p->phy_info[i].attached.id,
+			p->phy_info[i].attached.phy_id, (unsigned long long)
+			p->phy_info[i].attached.sas_address);
+
 			mptsas_set_starget(&p->phy_info[i], NULL);
-			goto out;
 		}
 	}
 
  out:
+	vtarget->starget = NULL;
 	kfree(starget->hostdata);
 	starget->hostdata = NULL;
 }
@@ -1008,6 +1851,8 @@
 static int
 mptsas_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 {
+	MPT_SCSI_HOST	*hd;
+	MPT_ADAPTER	*ioc;
 	VirtDevice	*vdevice = SCpnt->device->hostdata;
 
 	if (!vdevice || !vdevice->vtarget || vdevice->vtarget->deleted) {
@@ -1016,6 +1861,12 @@
 		return 0;
 	}
 
+	hd = shost_priv(SCpnt->device->host);
+	ioc = hd->ioc;
+
+	if (ioc->sas_discovery_quiesce_io)
+		return SCSI_MLQUEUE_HOST_BUSY;
+
 //	scsi_print_command(SCpnt);
 
 	return mptscsih_qcmd(SCpnt,done);
@@ -1114,14 +1965,19 @@
 static int mptsas_mgmt_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req,
 		MPT_FRAME_HDR *reply)
 {
-	ioc->sas_mgmt.status |= MPT_SAS_MGMT_STATUS_COMMAND_GOOD;
+	ioc->sas_mgmt.status |= MPT_MGMT_STATUS_COMMAND_GOOD;
 	if (reply != NULL) {
-		ioc->sas_mgmt.status |= MPT_SAS_MGMT_STATUS_RF_VALID;
+		ioc->sas_mgmt.status |= MPT_MGMT_STATUS_RF_VALID;
 		memcpy(ioc->sas_mgmt.reply, reply,
 		    min(ioc->reply_sz, 4 * reply->u.reply.MsgLength));
 	}
-	complete(&ioc->sas_mgmt.done);
-	return 1;
+
+	if (ioc->sas_mgmt.status & MPT_MGMT_STATUS_PENDING) {
+		ioc->sas_mgmt.status &= ~MPT_MGMT_STATUS_PENDING;
+		complete(&ioc->sas_mgmt.done);
+		return 1;
+	}
+	return 0;
 }
 
 static int mptsas_phy_reset(struct sas_phy *phy, int hard_reset)
@@ -1160,6 +2016,7 @@
 		MPI_SAS_OP_PHY_HARD_RESET : MPI_SAS_OP_PHY_LINK_RESET;
 	req->PhyNum = phy->identify.phy_identifier;
 
+	INITIALIZE_MGMT_STATUS(ioc->sas_mgmt.status)
 	mpt_put_msg_frame(mptsasMgmtCtx, ioc, mf);
 
 	timeleft = wait_for_completion_timeout(&ioc->sas_mgmt.done,
@@ -1174,7 +2031,7 @@
 
 	/* a reply frame is expected */
 	if ((ioc->sas_mgmt.status &
-	    MPT_IOCTL_STATUS_RF_VALID) == 0) {
+	    MPT_MGMT_STATUS_RF_VALID) == 0) {
 		error = -ENXIO;
 		goto out_unlock;
 	}
@@ -1191,6 +2048,7 @@
 	error = 0;
 
  out_unlock:
+	CLEAR_MGMT_STATUS(ioc->sas_mgmt.status)
 	mutex_unlock(&ioc->sas_mgmt.mutex);
  out:
 	return error;
@@ -1277,8 +2135,8 @@
 	/* do we need to support multiple segments? */
 	if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) {
 		printk(MYIOC_s_ERR_FMT "%s: multiple segments req %u %u, rsp %u %u\n",
-		    ioc->name, __func__, req->bio->bi_vcnt, req->data_len,
-		    rsp->bio->bi_vcnt, rsp->data_len);
+		    ioc->name, __func__, req->bio->bi_vcnt, blk_rq_bytes(req),
+		    rsp->bio->bi_vcnt, blk_rq_bytes(rsp));
 		return -EINVAL;
 	}
 
@@ -1295,7 +2153,7 @@
 	smpreq = (SmpPassthroughRequest_t *)mf;
 	memset(smpreq, 0, sizeof(*smpreq));
 
-	smpreq->RequestDataLength = cpu_to_le16(req->data_len - 4);
+	smpreq->RequestDataLength = cpu_to_le16(blk_rq_bytes(req) - 4);
 	smpreq->Function = MPI_FUNCTION_SMP_PASSTHROUGH;
 
 	if (rphy)
@@ -1304,7 +2162,7 @@
 		struct mptsas_portinfo *port_info;
 
 		mutex_lock(&ioc->sas_topology_mutex);
-		port_info = mptsas_get_hba_portinfo(ioc);
+		port_info = ioc->hba_port_info;
 		if (port_info && port_info->phy_info)
 			sas_address =
 				port_info->phy_info[0].phy->identify.sas_address;
@@ -1319,26 +2177,32 @@
 	/* request */
 	flagsLength = (MPI_SGE_FLAGS_SIMPLE_ELEMENT |
 		       MPI_SGE_FLAGS_END_OF_BUFFER |
-		       MPI_SGE_FLAGS_DIRECTION |
-		       mpt_addr_size()) << MPI_SGE_FLAGS_SHIFT;
-	flagsLength |= (req->data_len - 4);
+		       MPI_SGE_FLAGS_DIRECTION)
+		       << MPI_SGE_FLAGS_SHIFT;
+	flagsLength |= (blk_rq_bytes(req) - 4);
 
 	dma_addr_out = pci_map_single(ioc->pcidev, bio_data(req->bio),
-				      req->data_len, PCI_DMA_BIDIRECTIONAL);
+				      blk_rq_bytes(req), PCI_DMA_BIDIRECTIONAL);
 	if (!dma_addr_out)
 		goto put_mf;
-	mpt_add_sge(psge, flagsLength, dma_addr_out);
-	psge += (sizeof(u32) + sizeof(dma_addr_t));
+	ioc->add_sge(psge, flagsLength, dma_addr_out);
+	psge += ioc->SGE_size;
 
 	/* response */
-	flagsLength = MPT_SGE_FLAGS_SSIMPLE_READ;
-	flagsLength |= rsp->data_len + 4;
+	flagsLength = MPI_SGE_FLAGS_SIMPLE_ELEMENT |
+		MPI_SGE_FLAGS_SYSTEM_ADDRESS |
+		MPI_SGE_FLAGS_IOC_TO_HOST |
+		MPI_SGE_FLAGS_END_OF_BUFFER;
+
+	flagsLength = flagsLength << MPI_SGE_FLAGS_SHIFT;
+	flagsLength |= blk_rq_bytes(rsp) + 4;
 	dma_addr_in =  pci_map_single(ioc->pcidev, bio_data(rsp->bio),
-				      rsp->data_len, PCI_DMA_BIDIRECTIONAL);
+				      blk_rq_bytes(rsp), PCI_DMA_BIDIRECTIONAL);
 	if (!dma_addr_in)
 		goto unmap;
-	mpt_add_sge(psge, flagsLength, dma_addr_in);
+	ioc->add_sge(psge, flagsLength, dma_addr_in);
 
+	INITIALIZE_MGMT_STATUS(ioc->sas_mgmt.status)
 	mpt_put_msg_frame(mptsasMgmtCtx, ioc, mf);
 
 	timeleft = wait_for_completion_timeout(&ioc->sas_mgmt.done, 10 * HZ);
@@ -1351,30 +2215,32 @@
 	}
 	mf = NULL;
 
-	if (ioc->sas_mgmt.status & MPT_IOCTL_STATUS_RF_VALID) {
+	if (ioc->sas_mgmt.status & MPT_MGMT_STATUS_RF_VALID) {
 		SmpPassthroughReply_t *smprep;
 
 		smprep = (SmpPassthroughReply_t *)ioc->sas_mgmt.reply;
 		memcpy(req->sense, smprep, sizeof(*smprep));
 		req->sense_len = sizeof(*smprep);
-		req->data_len = 0;
-		rsp->data_len -= smprep->ResponseDataLength;
+		req->resid_len = 0;
+		rsp->resid_len -= smprep->ResponseDataLength;
 	} else {
-		printk(MYIOC_s_ERR_FMT "%s: smp passthru reply failed to be returned\n",
+		printk(MYIOC_s_ERR_FMT
+		    "%s: smp passthru reply failed to be returned\n",
 		    ioc->name, __func__);
 		ret = -ENXIO;
 	}
 unmap:
 	if (dma_addr_out)
-		pci_unmap_single(ioc->pcidev, dma_addr_out, req->data_len,
+		pci_unmap_single(ioc->pcidev, dma_addr_out, blk_rq_bytes(req),
 				 PCI_DMA_BIDIRECTIONAL);
 	if (dma_addr_in)
-		pci_unmap_single(ioc->pcidev, dma_addr_in, rsp->data_len,
+		pci_unmap_single(ioc->pcidev, dma_addr_in, blk_rq_bytes(rsp),
 				 PCI_DMA_BIDIRECTIONAL);
 put_mf:
 	if (mf)
 		mpt_free_msg_frame(ioc, mf);
 out_unlock:
+	CLEAR_MGMT_STATUS(ioc->sas_mgmt.status)
 	mutex_unlock(&ioc->sas_mgmt.mutex);
 out:
 	return ret;
@@ -1438,7 +2304,7 @@
 
 	port_info->num_phys = buffer->NumPhys;
 	port_info->phy_info = kcalloc(port_info->num_phys,
-		sizeof(*port_info->phy_info),GFP_KERNEL);
+		sizeof(struct mptsas_phyinfo), GFP_KERNEL);
 	if (!port_info->phy_info) {
 		error = -ENOMEM;
 		goto out_free_consistent;
@@ -1600,10 +2466,6 @@
 	__le64 sas_address;
 	int error=0;
 
-	if (ioc->sas_discovery_runtime &&
-		mptsas_is_end_device(device_info))
-			goto out;
-
 	hdr.PageVersion = MPI_SASDEVICE0_PAGEVERSION;
 	hdr.ExtPageLength = 0;
 	hdr.PageNumber = 0;
@@ -1644,6 +2506,7 @@
 
 	mptsas_print_device_pg0(ioc, buffer);
 
+	memset(device_info, 0, sizeof(struct mptsas_devinfo));
 	device_info->handle = le16_to_cpu(buffer->DevHandle);
 	device_info->handle_parent = le16_to_cpu(buffer->ParentDevHandle);
 	device_info->handle_enclosure =
@@ -1675,7 +2538,9 @@
 	SasExpanderPage0_t *buffer;
 	dma_addr_t dma_handle;
 	int i, error;
+	__le64 sas_address;
 
+	memset(port_info, 0, sizeof(struct mptsas_portinfo));
 	hdr.PageVersion = MPI_SASEXPANDER0_PAGEVERSION;
 	hdr.ExtPageLength = 0;
 	hdr.PageNumber = 0;
@@ -1721,18 +2586,23 @@
 	}
 
 	/* save config data */
-	port_info->num_phys = buffer->NumPhys;
+	port_info->num_phys = (buffer->NumPhys) ? buffer->NumPhys : 1;
 	port_info->phy_info = kcalloc(port_info->num_phys,
-		sizeof(*port_info->phy_info),GFP_KERNEL);
+		sizeof(struct mptsas_phyinfo), GFP_KERNEL);
 	if (!port_info->phy_info) {
 		error = -ENOMEM;
 		goto out_free_consistent;
 	}
 
+	memcpy(&sas_address, &buffer->SASAddress, sizeof(__le64));
 	for (i = 0; i < port_info->num_phys; i++) {
 		port_info->phy_info[i].portinfo = port_info;
 		port_info->phy_info[i].handle =
 		    le16_to_cpu(buffer->DevHandle);
+		port_info->phy_info[i].identify.sas_address =
+		    le64_to_cpu(sas_address);
+		port_info->phy_info[i].identify.handle_parent =
+		    le16_to_cpu(buffer->ParentDevHandle);
 	}
 
  out_free_consistent:
@@ -1752,11 +2622,7 @@
 	dma_addr_t dma_handle;
 	int error=0;
 
-	if (ioc->sas_discovery_runtime &&
-		mptsas_is_end_device(&phy_info->attached))
-			goto out;
-
-	hdr.PageVersion = MPI_SASEXPANDER0_PAGEVERSION;
+	hdr.PageVersion = MPI_SASEXPANDER1_PAGEVERSION;
 	hdr.ExtPageLength = 0;
 	hdr.PageNumber = 1;
 	hdr.Reserved1 = 0;
@@ -1791,6 +2657,12 @@
 	cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
 
 	error = mpt_config(ioc, &cfg);
+
+	if (error == MPI_IOCSTATUS_CONFIG_INVALID_PAGE) {
+		error = -ENODEV;
+		goto out;
+	}
+
 	if (error)
 		goto out_free_consistent;
 
@@ -2010,16 +2882,21 @@
 				goto out;
 			}
 			mptsas_set_port(ioc, phy_info, port);
-			dsaswideprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-			    "sas_port_alloc: port=%p dev=%p port_id=%d\n",
-			    ioc->name, port, dev, port->port_identifier));
+			devtprintk(ioc, dev_printk(KERN_DEBUG, &port->dev,
+			    MYIOC_s_FMT "add port %d, sas_addr (0x%llx)\n",
+			    ioc->name, port->port_identifier,
+			    (unsigned long long)phy_info->
+			    attached.sas_address));
 		}
-		dsaswideprintk(ioc, printk(MYIOC_s_DEBUG_FMT "sas_port_add_phy: phy_id=%d\n",
-		    ioc->name, phy_info->phy_id));
+		dsaswideprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			"sas_port_add_phy: phy_id=%d\n",
+			ioc->name, phy_info->phy_id));
 		sas_port_add_phy(port, phy_info->phy);
 		phy_info->sas_port_add_phy = 0;
+		devtprintk(ioc, dev_printk(KERN_DEBUG, &phy_info->phy->dev,
+		    MYIOC_s_FMT "add phy %d, phy-obj (0x%p)\n", ioc->name,
+		     phy_info->phy_id, phy_info->phy));
 	}
-
 	if (!mptsas_get_rphy(phy_info) && port && !port->rphy) {
 
 		struct sas_rphy *rphy;
@@ -2032,18 +2909,17 @@
 		 * the adding/removing of devices that occur
 		 * after start of day.
 		 */
-		if (ioc->sas_discovery_runtime &&
-			mptsas_is_end_device(&phy_info->attached))
-				goto out;
+		if (mptsas_is_end_device(&phy_info->attached) &&
+		    phy_info->attached.handle_parent) {
+			goto out;
+		}
 
 		mptsas_parse_device_info(&identify, &phy_info->attached);
 		if (scsi_is_host_device(parent)) {
 			struct mptsas_portinfo *port_info;
 			int i;
 
-			mutex_lock(&ioc->sas_topology_mutex);
-			port_info = mptsas_get_hba_portinfo(ioc);
-			mutex_unlock(&ioc->sas_topology_mutex);
+			port_info = ioc->hba_port_info;
 
 			for (i = 0; i < port_info->num_phys; i++)
 				if (port_info->phy_info[i].identify.sas_address ==
@@ -2102,7 +2978,7 @@
 	struct mptsas_portinfo *port_info, *hba;
 	int error = -ENOMEM, i;
 
-	hba = kzalloc(sizeof(*port_info), GFP_KERNEL);
+	hba = kzalloc(sizeof(struct mptsas_portinfo), GFP_KERNEL);
 	if (! hba)
 		goto out;
 
@@ -2112,9 +2988,10 @@
 
 	mptsas_sas_io_unit_pg1(ioc);
 	mutex_lock(&ioc->sas_topology_mutex);
-	port_info = mptsas_get_hba_portinfo(ioc);
+	port_info = ioc->hba_port_info;
 	if (!port_info) {
-		port_info = hba;
+		ioc->hba_port_info = port_info = hba;
+		ioc->hba_port_num_phy = port_info->num_phys;
 		list_add_tail(&port_info->list, &ioc->sas_topology);
 	} else {
 		for (i = 0; i < hba->num_phys; i++) {
@@ -2130,15 +3007,22 @@
 		hba = NULL;
 	}
 	mutex_unlock(&ioc->sas_topology_mutex);
+#if defined(CPQ_CIM)
+	ioc->num_ports = port_info->num_phys;
+#endif
 	for (i = 0; i < port_info->num_phys; i++) {
 		mptsas_sas_phy_pg0(ioc, &port_info->phy_info[i],
 			(MPI_SAS_PHY_PGAD_FORM_PHY_NUMBER <<
 			 MPI_SAS_PHY_PGAD_FORM_SHIFT), i);
-
+		port_info->phy_info[i].identify.handle =
+		    port_info->phy_info[i].handle;
 		mptsas_sas_device_pg0(ioc, &port_info->phy_info[i].identify,
 			(MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
 			 MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
-			 port_info->phy_info[i].handle);
+			 port_info->phy_info[i].identify.handle);
+		if (!ioc->hba_port_sas_addr)
+			ioc->hba_port_sas_addr =
+			    port_info->phy_info[i].identify.sas_address;
 		port_info->phy_info[i].identify.phy_id =
 		    port_info->phy_info[i].phy_id = i;
 		if (port_info->phy_info[i].attached.handle)
@@ -2163,247 +3047,720 @@
 	return error;
 }
 
-static int
-mptsas_probe_expander_phys(MPT_ADAPTER *ioc, u32 *handle)
+static void
+mptsas_expander_refresh(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info)
 {
-	struct mptsas_portinfo *port_info, *p, *ex;
-	struct device *parent;
-	struct sas_rphy *rphy;
-	int error = -ENOMEM, i, j;
+	struct mptsas_portinfo *parent;
+	struct device *parent_dev;
+	struct sas_rphy	*rphy;
+	int		i;
+	u64		sas_address; /* expander sas address */
+	u32		handle;
 
-	ex = kzalloc(sizeof(*port_info), GFP_KERNEL);
-	if (!ex)
-		goto out;
-
-	error = mptsas_sas_expander_pg0(ioc, ex,
-	    (MPI_SAS_EXPAND_PGAD_FORM_GET_NEXT_HANDLE <<
-	     MPI_SAS_EXPAND_PGAD_FORM_SHIFT), *handle);
-	if (error)
-		goto out_free_port_info;
-
-	*handle = ex->phy_info[0].handle;
-
-	mutex_lock(&ioc->sas_topology_mutex);
-	port_info = mptsas_find_portinfo_by_handle(ioc, *handle);
-	if (!port_info) {
-		port_info = ex;
-		list_add_tail(&port_info->list, &ioc->sas_topology);
-	} else {
-		for (i = 0; i < ex->num_phys; i++) {
-			port_info->phy_info[i].handle =
-				ex->phy_info[i].handle;
-			port_info->phy_info[i].port_id =
-				ex->phy_info[i].port_id;
-		}
-		kfree(ex->phy_info);
-		kfree(ex);
-		ex = NULL;
-	}
-	mutex_unlock(&ioc->sas_topology_mutex);
-
+	handle = port_info->phy_info[0].handle;
+	sas_address = port_info->phy_info[0].identify.sas_address;
 	for (i = 0; i < port_info->num_phys; i++) {
 		mptsas_sas_expander_pg1(ioc, &port_info->phy_info[i],
-			(MPI_SAS_EXPAND_PGAD_FORM_HANDLE_PHY_NUM <<
-			 MPI_SAS_EXPAND_PGAD_FORM_SHIFT), (i << 16) + *handle);
+		    (MPI_SAS_EXPAND_PGAD_FORM_HANDLE_PHY_NUM <<
+		    MPI_SAS_EXPAND_PGAD_FORM_SHIFT), (i << 16) + handle);
 
-		if (port_info->phy_info[i].identify.handle) {
-			mptsas_sas_device_pg0(ioc,
-				&port_info->phy_info[i].identify,
-				(MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
-				 MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
-				port_info->phy_info[i].identify.handle);
-			port_info->phy_info[i].identify.phy_id =
-			    port_info->phy_info[i].phy_id;
-		}
+		mptsas_sas_device_pg0(ioc,
+		    &port_info->phy_info[i].identify,
+		    (MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
+		    MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+		    port_info->phy_info[i].identify.handle);
+		port_info->phy_info[i].identify.phy_id =
+		    port_info->phy_info[i].phy_id;
 
 		if (port_info->phy_info[i].attached.handle) {
 			mptsas_sas_device_pg0(ioc,
-				&port_info->phy_info[i].attached,
-				(MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
-				 MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
-				port_info->phy_info[i].attached.handle);
+			    &port_info->phy_info[i].attached,
+			    (MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
+			     MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+			    port_info->phy_info[i].attached.handle);
 			port_info->phy_info[i].attached.phy_id =
 			    port_info->phy_info[i].phy_id;
 		}
 	}
 
-	parent = &ioc->sh->shost_gendev;
-	for (i = 0; i < port_info->num_phys; i++) {
-		mutex_lock(&ioc->sas_topology_mutex);
-		list_for_each_entry(p, &ioc->sas_topology, list) {
-			for (j = 0; j < p->num_phys; j++) {
-				if (port_info->phy_info[i].identify.handle !=
-						p->phy_info[j].attached.handle)
-					continue;
-				rphy = mptsas_get_rphy(&p->phy_info[j]);
-				parent = &rphy->dev;
-			}
-		}
-		mutex_unlock(&ioc->sas_topology_mutex);
-	}
-
-	mptsas_setup_wide_ports(ioc, port_info);
-
-	for (i = 0; i < port_info->num_phys; i++, ioc->sas_index++)
-		mptsas_probe_one_phy(parent, &port_info->phy_info[i],
-		    ioc->sas_index, 0);
-
-	return 0;
-
- out_free_port_info:
-	if (ex) {
-		kfree(ex->phy_info);
-		kfree(ex);
-	}
- out:
-	return error;
-}
-
-/*
- * mptsas_delete_expander_phys
- *
- *
- * This will traverse topology, and remove expanders
- * that are no longer present
- */
-static void
-mptsas_delete_expander_phys(MPT_ADAPTER *ioc)
-{
-	struct mptsas_portinfo buffer;
-	struct mptsas_portinfo *port_info, *n, *parent;
-	struct mptsas_phyinfo *phy_info;
-	struct sas_port * port;
-	int i;
-	u64	expander_sas_address;
-
 	mutex_lock(&ioc->sas_topology_mutex);
-	list_for_each_entry_safe(port_info, n, &ioc->sas_topology, list) {
-
-		if (!(port_info->phy_info[0].identify.device_info &
-		    MPI_SAS_DEVICE_INFO_SMP_TARGET))
-			continue;
-
-		if (mptsas_sas_expander_pg0(ioc, &buffer,
-		     (MPI_SAS_EXPAND_PGAD_FORM_HANDLE <<
-		     MPI_SAS_EXPAND_PGAD_FORM_SHIFT),
-		     port_info->phy_info[0].handle)) {
-
-			/*
-			 * Obtain the port_info instance to the parent port
-			 */
-			parent = mptsas_find_portinfo_by_handle(ioc,
-			    port_info->phy_info[0].identify.handle_parent);
-
-			if (!parent)
-				goto next_port;
-
-			expander_sas_address =
-				port_info->phy_info[0].identify.sas_address;
-
-			/*
-			 * Delete rphys in the parent that point
-			 * to this expander.  The transport layer will
-			 * cleanup all the children.
-			 */
-			phy_info = parent->phy_info;
-			for (i = 0; i < parent->num_phys; i++, phy_info++) {
-				port = mptsas_get_port(phy_info);
-				if (!port)
-					continue;
-				if (phy_info->attached.sas_address !=
-					expander_sas_address)
-					continue;
-				dsaswideprintk(ioc,
-				    dev_printk(KERN_DEBUG, &port->dev,
-				    MYIOC_s_FMT "delete port (%d)\n", ioc->name,
-				    port->port_identifier));
-				sas_port_delete(port);
-				mptsas_port_delete(ioc, phy_info->port_details);
-			}
- next_port:
-
-			phy_info = port_info->phy_info;
-			for (i = 0; i < port_info->num_phys; i++, phy_info++)
-				mptsas_port_delete(ioc, phy_info->port_details);
-
-			list_del(&port_info->list);
-			kfree(port_info->phy_info);
-			kfree(port_info);
+	parent = mptsas_find_portinfo_by_handle(ioc,
+	    port_info->phy_info[0].identify.handle_parent);
+	if (!parent) {
+		mutex_unlock(&ioc->sas_topology_mutex);
+		return;
+	}
+	for (i = 0, parent_dev = NULL; i < parent->num_phys && !parent_dev;
+	    i++) {
+		if (parent->phy_info[i].attached.sas_address == sas_address) {
+			rphy = mptsas_get_rphy(&parent->phy_info[i]);
+			parent_dev = &rphy->dev;
 		}
-		/*
-		* Free this memory allocated from inside
-		* mptsas_sas_expander_pg0
-		*/
-		kfree(buffer.phy_info);
 	}
 	mutex_unlock(&ioc->sas_topology_mutex);
+
+	mptsas_setup_wide_ports(ioc, port_info);
+	for (i = 0; i < port_info->num_phys; i++, ioc->sas_index++)
+		mptsas_probe_one_phy(parent_dev, &port_info->phy_info[i],
+		    ioc->sas_index, 0);
 }
 
-/*
- * Start of day discovery
+static void
+mptsas_expander_event_add(MPT_ADAPTER *ioc,
+    MpiEventDataSasExpanderStatusChange_t *expander_data)
+{
+	struct mptsas_portinfo *port_info;
+	int i;
+	__le64 sas_address;
+
+	port_info = kzalloc(sizeof(struct mptsas_portinfo), GFP_KERNEL);
+	if (!port_info)
+		BUG();
+	port_info->num_phys = (expander_data->NumPhys) ?
+	    expander_data->NumPhys : 1;
+	port_info->phy_info = kcalloc(port_info->num_phys,
+	    sizeof(struct mptsas_phyinfo), GFP_KERNEL);
+	if (!port_info->phy_info)
+		BUG();
+	memcpy(&sas_address, &expander_data->SASAddress, sizeof(__le64));
+	for (i = 0; i < port_info->num_phys; i++) {
+		port_info->phy_info[i].portinfo = port_info;
+		port_info->phy_info[i].handle =
+		    le16_to_cpu(expander_data->DevHandle);
+		port_info->phy_info[i].identify.sas_address =
+		    le64_to_cpu(sas_address);
+		port_info->phy_info[i].identify.handle_parent =
+		    le16_to_cpu(expander_data->ParentDevHandle);
+	}
+
+	mutex_lock(&ioc->sas_topology_mutex);
+	list_add_tail(&port_info->list, &ioc->sas_topology);
+	mutex_unlock(&ioc->sas_topology_mutex);
+
+	printk(MYIOC_s_INFO_FMT "add expander: num_phys %d, "
+	    "sas_addr (0x%llx)\n", ioc->name, port_info->num_phys,
+	    (unsigned long long)sas_address);
+
+	mptsas_expander_refresh(ioc, port_info);
+}
+
+/**
+ * mptsas_delete_expander_siblings - remove siblings attached to expander
+ * @ioc: Pointer to MPT_ADAPTER structure
+ * @parent: the parent port_info object
+ * @expander: the expander port_info object
+ **/
+static void
+mptsas_delete_expander_siblings(MPT_ADAPTER *ioc, struct mptsas_portinfo
+    *parent, struct mptsas_portinfo *expander)
+{
+	struct mptsas_phyinfo *phy_info;
+	struct mptsas_portinfo *port_info;
+	struct sas_rphy *rphy;
+	int i;
+
+	phy_info = expander->phy_info;
+	for (i = 0; i < expander->num_phys; i++, phy_info++) {
+		rphy = mptsas_get_rphy(phy_info);
+		if (!rphy)
+			continue;
+		if (rphy->identify.device_type == SAS_END_DEVICE)
+			mptsas_del_end_device(ioc, phy_info);
+	}
+
+	phy_info = expander->phy_info;
+	for (i = 0; i < expander->num_phys; i++, phy_info++) {
+		rphy = mptsas_get_rphy(phy_info);
+		if (!rphy)
+			continue;
+		if (rphy->identify.device_type ==
+		    MPI_SAS_DEVICE_INFO_EDGE_EXPANDER ||
+		    rphy->identify.device_type ==
+		    MPI_SAS_DEVICE_INFO_FANOUT_EXPANDER) {
+			port_info = mptsas_find_portinfo_by_sas_address(ioc,
+			    rphy->identify.sas_address);
+			if (!port_info)
+				continue;
+			if (port_info == parent) /* backlink rphy */
+				continue;
+			/*
+			Delete this expander even if the expdevpage is exists
+			because the parent expander is already deleted
+			*/
+			mptsas_expander_delete(ioc, port_info, 1);
+		}
+	}
+}
+
+
+/**
+ *	mptsas_expander_delete - remove this expander
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@port_info: expander port_info struct
+ *	@force: Flag to forcefully delete the expander
+ *
+ **/
+
+static void mptsas_expander_delete(MPT_ADAPTER *ioc,
+		struct mptsas_portinfo *port_info, u8 force)
+{
+
+	struct mptsas_portinfo *parent;
+	int		i;
+	u64		expander_sas_address;
+	struct mptsas_phyinfo *phy_info;
+	struct mptsas_portinfo buffer;
+	struct mptsas_portinfo_details *port_details;
+	struct sas_port *port;
+
+	if (!port_info)
+		return;
+
+	/* see if expander is still there before deleting */
+	mptsas_sas_expander_pg0(ioc, &buffer,
+	    (MPI_SAS_EXPAND_PGAD_FORM_HANDLE <<
+	    MPI_SAS_EXPAND_PGAD_FORM_SHIFT),
+	    port_info->phy_info[0].identify.handle);
+
+	if (buffer.num_phys) {
+		kfree(buffer.phy_info);
+		if (!force)
+			return;
+	}
+
+
+	/*
+	 * Obtain the port_info instance to the parent port
+	 */
+	port_details = NULL;
+	expander_sas_address =
+	    port_info->phy_info[0].identify.sas_address;
+	parent = mptsas_find_portinfo_by_handle(ioc,
+	    port_info->phy_info[0].identify.handle_parent);
+	mptsas_delete_expander_siblings(ioc, parent, port_info);
+	if (!parent)
+		goto out;
+
+	/*
+	 * Delete rphys in the parent that point
+	 * to this expander.
+	 */
+	phy_info = parent->phy_info;
+	port = NULL;
+	for (i = 0; i < parent->num_phys; i++, phy_info++) {
+		if (!phy_info->phy)
+			continue;
+		if (phy_info->attached.sas_address !=
+		    expander_sas_address)
+			continue;
+		if (!port) {
+			port = mptsas_get_port(phy_info);
+			port_details = phy_info->port_details;
+		}
+		dev_printk(KERN_DEBUG, &phy_info->phy->dev,
+		    MYIOC_s_FMT "delete phy %d, phy-obj (0x%p)\n", ioc->name,
+		    phy_info->phy_id, phy_info->phy);
+		sas_port_delete_phy(port, phy_info->phy);
+	}
+	if (port) {
+		dev_printk(KERN_DEBUG, &port->dev,
+		    MYIOC_s_FMT "delete port %d, sas_addr (0x%llx)\n",
+		    ioc->name, port->port_identifier,
+		    (unsigned long long)expander_sas_address);
+		sas_port_delete(port);
+		mptsas_port_delete(ioc, port_details);
+	}
+ out:
+
+	printk(MYIOC_s_INFO_FMT "delete expander: num_phys %d, "
+	    "sas_addr (0x%llx)\n",  ioc->name, port_info->num_phys,
+	    (unsigned long long)expander_sas_address);
+
+	/*
+	 * free link
+	 */
+	list_del(&port_info->list);
+	kfree(port_info->phy_info);
+	kfree(port_info);
+}
+
+
+/**
+ * mptsas_send_expander_event - expanders events
+ * @ioc: Pointer to MPT_ADAPTER structure
+ * @expander_data: event data
+ *
+ *
+ * This function handles adding, removing, and refreshing
+ * device handles within the expander objects.
  */
 static void
+mptsas_send_expander_event(struct fw_event_work *fw_event)
+{
+	MPT_ADAPTER *ioc;
+	MpiEventDataSasExpanderStatusChange_t *expander_data;
+	struct mptsas_portinfo *port_info;
+	__le64 sas_address;
+	int i;
+
+	ioc = fw_event->ioc;
+	expander_data = (MpiEventDataSasExpanderStatusChange_t *)
+	    fw_event->event_data;
+	memcpy(&sas_address, &expander_data->SASAddress, sizeof(__le64));
+	port_info = mptsas_find_portinfo_by_sas_address(ioc, sas_address);
+
+	if (expander_data->ReasonCode == MPI_EVENT_SAS_EXP_RC_ADDED) {
+		if (port_info) {
+			for (i = 0; i < port_info->num_phys; i++) {
+				port_info->phy_info[i].portinfo = port_info;
+				port_info->phy_info[i].handle =
+				    le16_to_cpu(expander_data->DevHandle);
+				port_info->phy_info[i].identify.sas_address =
+				    le64_to_cpu(sas_address);
+				port_info->phy_info[i].identify.handle_parent =
+				    le16_to_cpu(expander_data->ParentDevHandle);
+			}
+			mptsas_expander_refresh(ioc, port_info);
+		} else if (!port_info && expander_data->NumPhys)
+			mptsas_expander_event_add(ioc, expander_data);
+	} else if (expander_data->ReasonCode ==
+	    MPI_EVENT_SAS_EXP_RC_NOT_RESPONDING)
+		mptsas_expander_delete(ioc, port_info, 0);
+
+	mptsas_free_fw_event(ioc, fw_event);
+}
+
+
+/**
+ * mptsas_expander_add -
+ * @ioc: Pointer to MPT_ADAPTER structure
+ * @handle:
+ *
+ */
+struct mptsas_portinfo *
+mptsas_expander_add(MPT_ADAPTER *ioc, u16 handle)
+{
+	struct mptsas_portinfo buffer, *port_info;
+	int i;
+
+	if ((mptsas_sas_expander_pg0(ioc, &buffer,
+	    (MPI_SAS_EXPAND_PGAD_FORM_HANDLE <<
+	    MPI_SAS_EXPAND_PGAD_FORM_SHIFT), handle)))
+		return NULL;
+
+	port_info = kzalloc(sizeof(struct mptsas_portinfo), GFP_ATOMIC);
+	if (!port_info) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+		"%s: exit at line=%d\n", ioc->name,
+		__func__, __LINE__));
+		return NULL;
+	}
+	port_info->num_phys = buffer.num_phys;
+	port_info->phy_info = buffer.phy_info;
+	for (i = 0; i < port_info->num_phys; i++)
+		port_info->phy_info[i].portinfo = port_info;
+	mutex_lock(&ioc->sas_topology_mutex);
+	list_add_tail(&port_info->list, &ioc->sas_topology);
+	mutex_unlock(&ioc->sas_topology_mutex);
+	printk(MYIOC_s_INFO_FMT "add expander: num_phys %d, "
+	    "sas_addr (0x%llx)\n", ioc->name, port_info->num_phys,
+	    (unsigned long long)buffer.phy_info[0].identify.sas_address);
+	mptsas_expander_refresh(ioc, port_info);
+	return port_info;
+}
+
+static void
+mptsas_send_link_status_event(struct fw_event_work *fw_event)
+{
+	MPT_ADAPTER *ioc;
+	MpiEventDataSasPhyLinkStatus_t *link_data;
+	struct mptsas_portinfo *port_info;
+	struct mptsas_phyinfo *phy_info = NULL;
+	__le64 sas_address;
+	u8 phy_num;
+	u8 link_rate;
+
+	ioc = fw_event->ioc;
+	link_data = (MpiEventDataSasPhyLinkStatus_t *)fw_event->event_data;
+
+	memcpy(&sas_address, &link_data->SASAddress, sizeof(__le64));
+	sas_address = le64_to_cpu(sas_address);
+	link_rate = link_data->LinkRates >> 4;
+	phy_num = link_data->PhyNum;
+
+	port_info = mptsas_find_portinfo_by_sas_address(ioc, sas_address);
+	if (port_info) {
+		phy_info = &port_info->phy_info[phy_num];
+		if (phy_info)
+			phy_info->negotiated_link_rate = link_rate;
+	}
+
+	if (link_rate == MPI_SAS_IOUNIT0_RATE_1_5 ||
+	    link_rate == MPI_SAS_IOUNIT0_RATE_3_0) {
+
+		if (!port_info) {
+			if (ioc->old_sas_discovery_protocal) {
+				port_info = mptsas_expander_add(ioc,
+					le16_to_cpu(link_data->DevHandle));
+				if (port_info)
+					goto out;
+			}
+			goto out;
+		}
+
+		if (port_info == ioc->hba_port_info)
+			mptsas_probe_hba_phys(ioc);
+		else
+			mptsas_expander_refresh(ioc, port_info);
+	} else if (phy_info && phy_info->phy) {
+		if (link_rate ==  MPI_SAS_IOUNIT0_RATE_PHY_DISABLED)
+			phy_info->phy->negotiated_linkrate =
+			    SAS_PHY_DISABLED;
+		else if (link_rate ==
+		    MPI_SAS_IOUNIT0_RATE_FAILED_SPEED_NEGOTIATION)
+			phy_info->phy->negotiated_linkrate =
+			    SAS_LINK_RATE_FAILED;
+		else
+			phy_info->phy->negotiated_linkrate =
+			    SAS_LINK_RATE_UNKNOWN;
+	}
+ out:
+	mptsas_free_fw_event(ioc, fw_event);
+}
+
+static void
+mptsas_not_responding_devices(MPT_ADAPTER *ioc)
+{
+	struct mptsas_portinfo buffer, *port_info;
+	struct mptsas_device_info	*sas_info;
+	struct mptsas_devinfo sas_device;
+	u32	handle;
+	VirtTarget *vtarget = NULL;
+	struct mptsas_phyinfo *phy_info;
+	u8 found_expander;
+	int retval, retry_count;
+	unsigned long flags;
+
+	mpt_findImVolumes(ioc);
+
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	if (ioc->ioc_reset_in_progress) {
+		dfailprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		   "%s: exiting due to a parallel reset \n", ioc->name,
+		    __func__));
+		spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+
+	/* devices, logical volumes */
+	mutex_lock(&ioc->sas_device_info_mutex);
+ redo_device_scan:
+	list_for_each_entry(sas_info, &ioc->sas_device_info_list, list) {
+		if (sas_info->is_cached)
+			continue;
+		if (!sas_info->is_logical_volume) {
+			sas_device.handle = 0;
+			retry_count = 0;
+retry_page:
+			retval = mptsas_sas_device_pg0(ioc, &sas_device,
+				(MPI_SAS_DEVICE_PGAD_FORM_BUS_TARGET_ID
+				<< MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+				(sas_info->fw.channel << 8) +
+				sas_info->fw.id);
+
+			if (sas_device.handle)
+				continue;
+			if (retval == -EBUSY) {
+				spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+				if (ioc->ioc_reset_in_progress) {
+					dfailprintk(ioc,
+					printk(MYIOC_s_DEBUG_FMT
+					"%s: exiting due to reset\n",
+					ioc->name, __func__));
+					spin_unlock_irqrestore
+					(&ioc->taskmgmt_lock, flags);
+					mutex_unlock(&ioc->
+					sas_device_info_mutex);
+					return;
+				}
+				spin_unlock_irqrestore(&ioc->taskmgmt_lock,
+				flags);
+			}
+
+			if (retval && (retval != -ENODEV)) {
+				if (retry_count < 10) {
+					retry_count++;
+					goto retry_page;
+				} else {
+					devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+					"%s: Config page retry exceeded retry "
+					"count deleting device 0x%llx\n",
+					ioc->name, __func__,
+					sas_info->sas_address));
+				}
+			}
+
+			/* delete device */
+			vtarget = mptsas_find_vtarget(ioc,
+				sas_info->fw.channel, sas_info->fw.id);
+
+			if (vtarget)
+				vtarget->deleted = 1;
+
+			phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
+					sas_info->sas_address);
+
+			if (phy_info) {
+				mptsas_del_end_device(ioc, phy_info);
+				goto redo_device_scan;
+			}
+		} else
+			mptsas_volume_delete(ioc, sas_info->fw.id);
+	}
+	mutex_lock(&ioc->sas_device_info_mutex);
+
+	/* expanders */
+	mutex_lock(&ioc->sas_topology_mutex);
+ redo_expander_scan:
+	list_for_each_entry(port_info, &ioc->sas_topology, list) {
+
+		if (port_info->phy_info &&
+		    (!(port_info->phy_info[0].identify.device_info &
+		    MPI_SAS_DEVICE_INFO_SMP_TARGET)))
+			continue;
+		found_expander = 0;
+		handle = 0xFFFF;
+		while (!mptsas_sas_expander_pg0(ioc, &buffer,
+		    (MPI_SAS_EXPAND_PGAD_FORM_GET_NEXT_HANDLE <<
+		     MPI_SAS_EXPAND_PGAD_FORM_SHIFT), handle) &&
+		    !found_expander) {
+
+			handle = buffer.phy_info[0].handle;
+			if (buffer.phy_info[0].identify.sas_address ==
+			    port_info->phy_info[0].identify.sas_address) {
+				found_expander = 1;
+			}
+			kfree(buffer.phy_info);
+		}
+
+		if (!found_expander) {
+			mptsas_expander_delete(ioc, port_info, 0);
+			goto redo_expander_scan;
+		}
+	}
+	mutex_lock(&ioc->sas_topology_mutex);
+}
+
+/**
+ *	mptsas_probe_expanders - adding expanders
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *
+ **/
+static void
+mptsas_probe_expanders(MPT_ADAPTER *ioc)
+{
+	struct mptsas_portinfo buffer, *port_info;
+	u32 			handle;
+	int i;
+
+	handle = 0xFFFF;
+	while (!mptsas_sas_expander_pg0(ioc, &buffer,
+	    (MPI_SAS_EXPAND_PGAD_FORM_GET_NEXT_HANDLE <<
+	     MPI_SAS_EXPAND_PGAD_FORM_SHIFT), handle)) {
+
+		handle = buffer.phy_info[0].handle;
+		port_info = mptsas_find_portinfo_by_sas_address(ioc,
+		    buffer.phy_info[0].identify.sas_address);
+
+		if (port_info) {
+			/* refreshing handles */
+			for (i = 0; i < buffer.num_phys; i++) {
+				port_info->phy_info[i].handle = handle;
+				port_info->phy_info[i].identify.handle_parent =
+				    buffer.phy_info[0].identify.handle_parent;
+			}
+			mptsas_expander_refresh(ioc, port_info);
+			kfree(buffer.phy_info);
+			continue;
+		}
+
+		port_info = kzalloc(sizeof(struct mptsas_portinfo), GFP_KERNEL);
+		if (!port_info) {
+			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: exit at line=%d\n", ioc->name,
+			__func__, __LINE__));
+			return;
+		}
+		port_info->num_phys = buffer.num_phys;
+		port_info->phy_info = buffer.phy_info;
+		for (i = 0; i < port_info->num_phys; i++)
+			port_info->phy_info[i].portinfo = port_info;
+		mutex_lock(&ioc->sas_topology_mutex);
+		list_add_tail(&port_info->list, &ioc->sas_topology);
+		mutex_unlock(&ioc->sas_topology_mutex);
+		printk(MYIOC_s_INFO_FMT "add expander: num_phys %d, "
+		    "sas_addr (0x%llx)\n", ioc->name, port_info->num_phys,
+	    (unsigned long long)buffer.phy_info[0].identify.sas_address);
+		mptsas_expander_refresh(ioc, port_info);
+	}
+}
+
+static void
+mptsas_probe_devices(MPT_ADAPTER *ioc)
+{
+	u16 handle;
+	struct mptsas_devinfo sas_device;
+	struct mptsas_phyinfo *phy_info;
+
+	handle = 0xFFFF;
+	while (!(mptsas_sas_device_pg0(ioc, &sas_device,
+	    MPI_SAS_DEVICE_PGAD_FORM_GET_NEXT_HANDLE, handle))) {
+
+		handle = sas_device.handle;
+
+		if ((sas_device.device_info &
+		     (MPI_SAS_DEVICE_INFO_SSP_TARGET |
+		      MPI_SAS_DEVICE_INFO_STP_TARGET |
+		      MPI_SAS_DEVICE_INFO_SATA_DEVICE)) == 0)
+			continue;
+
+		phy_info = mptsas_refreshing_device_handles(ioc, &sas_device);
+		if (!phy_info)
+			continue;
+
+		if (mptsas_get_rphy(phy_info))
+			continue;
+
+		mptsas_add_end_device(ioc, phy_info);
+	}
+}
+
+/**
+ *	mptsas_scan_sas_topology -
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@sas_address:
+ *
+ **/
+static void
 mptsas_scan_sas_topology(MPT_ADAPTER *ioc)
 {
-	u32 handle = 0xFFFF;
+	struct scsi_device *sdev;
 	int i;
 
-	mutex_lock(&ioc->sas_discovery_mutex);
 	mptsas_probe_hba_phys(ioc);
-	while (!mptsas_probe_expander_phys(ioc, &handle))
-		;
+	mptsas_probe_expanders(ioc);
+	mptsas_probe_devices(ioc);
+
 	/*
 	  Reporting RAID volumes.
 	*/
-	if (!ioc->ir_firmware)
-		goto out;
-	if (!ioc->raid_data.pIocPg2)
-		goto out;
-	if (!ioc->raid_data.pIocPg2->NumActiveVolumes)
-		goto out;
+	if (!ioc->ir_firmware || !ioc->raid_data.pIocPg2 ||
+	    !ioc->raid_data.pIocPg2->NumActiveVolumes)
+		return;
 	for (i = 0; i < ioc->raid_data.pIocPg2->NumActiveVolumes; i++) {
+		sdev = scsi_device_lookup(ioc->sh, MPTSAS_RAID_CHANNEL,
+		    ioc->raid_data.pIocPg2->RaidVolume[i].VolumeID, 0);
+		if (sdev) {
+			scsi_device_put(sdev);
+			continue;
+		}
+		printk(MYIOC_s_INFO_FMT "attaching raid volume, channel %d, "
+		    "id %d\n", ioc->name, MPTSAS_RAID_CHANNEL,
+		    ioc->raid_data.pIocPg2->RaidVolume[i].VolumeID);
 		scsi_add_device(ioc->sh, MPTSAS_RAID_CHANNEL,
 		    ioc->raid_data.pIocPg2->RaidVolume[i].VolumeID, 0);
 	}
+}
+
+
+static void
+mptsas_handle_queue_full_event(struct fw_event_work *fw_event)
+{
+	MPT_ADAPTER *ioc;
+	EventDataQueueFull_t *qfull_data;
+	struct mptsas_device_info *sas_info;
+	struct scsi_device	*sdev;
+	int depth;
+	int id = -1;
+	int channel = -1;
+	int fw_id, fw_channel;
+	u16 current_depth;
+
+
+	ioc = fw_event->ioc;
+	qfull_data = (EventDataQueueFull_t *)fw_event->event_data;
+	fw_id = qfull_data->TargetID;
+	fw_channel = qfull_data->Bus;
+	current_depth = le16_to_cpu(qfull_data->CurrentDepth);
+
+	/* if hidden raid component, look for the volume id */
+	mutex_lock(&ioc->sas_device_info_mutex);
+	if (mptscsih_is_phys_disk(ioc, fw_channel, fw_id)) {
+		list_for_each_entry(sas_info, &ioc->sas_device_info_list,
+		    list) {
+			if (sas_info->is_cached ||
+			    sas_info->is_logical_volume)
+				continue;
+			if (sas_info->is_hidden_raid_component &&
+			    (sas_info->fw.channel == fw_channel &&
+			    sas_info->fw.id == fw_id)) {
+				id = sas_info->volume_id;
+				channel = MPTSAS_RAID_CHANNEL;
+				goto out;
+			}
+		}
+	} else {
+		list_for_each_entry(sas_info, &ioc->sas_device_info_list,
+		    list) {
+			if (sas_info->is_cached ||
+			    sas_info->is_hidden_raid_component ||
+			    sas_info->is_logical_volume)
+				continue;
+			if (sas_info->fw.channel == fw_channel &&
+			    sas_info->fw.id == fw_id) {
+				id = sas_info->os.id;
+				channel = sas_info->os.channel;
+				goto out;
+			}
+		}
+
+	}
+
  out:
-	mutex_unlock(&ioc->sas_discovery_mutex);
+	mutex_unlock(&ioc->sas_device_info_mutex);
+
+	if (id != -1) {
+		shost_for_each_device(sdev, ioc->sh) {
+			if (sdev->id == id && sdev->channel == channel) {
+				if (current_depth > sdev->queue_depth) {
+					sdev_printk(KERN_INFO, sdev,
+					    "strange observation, the queue "
+					    "depth is (%d) meanwhile fw queue "
+					    "depth (%d)\n", sdev->queue_depth,
+					    current_depth);
+					continue;
+				}
+				depth = scsi_track_queue_full(sdev,
+				    current_depth - 1);
+				if (depth > 0)
+					sdev_printk(KERN_INFO, sdev,
+					"Queue depth reduced to (%d)\n",
+					   depth);
+				else if (depth < 0)
+					sdev_printk(KERN_INFO, sdev,
+					"Tagged Command Queueing is being "
+					"disabled\n");
+				else if (depth == 0)
+					sdev_printk(KERN_INFO, sdev,
+					"Queue depth not changed yet\n");
+			}
+		}
+	}
+
+	mptsas_free_fw_event(ioc, fw_event);
 }
 
-/*
- * Work queue thread to handle Runtime discovery
- * Mere purpose is the hot add/delete of expanders
- *(Mutex UNLOCKED)
- */
-static void
-__mptsas_discovery_work(MPT_ADAPTER *ioc)
-{
-	u32 handle = 0xFFFF;
-
-	ioc->sas_discovery_runtime=1;
-	mptsas_delete_expander_phys(ioc);
-	mptsas_probe_hba_phys(ioc);
-	while (!mptsas_probe_expander_phys(ioc, &handle))
-		;
-	ioc->sas_discovery_runtime=0;
-}
-
-/*
- * Work queue thread to handle Runtime discovery
- * Mere purpose is the hot add/delete of expanders
- *(Mutex LOCKED)
- */
-static void
-mptsas_discovery_work(struct work_struct *work)
-{
-	struct mptsas_discovery_event *ev =
-		container_of(work, struct mptsas_discovery_event, work);
-	MPT_ADAPTER *ioc = ev->ioc;
-
-	mutex_lock(&ioc->sas_discovery_mutex);
-	__mptsas_discovery_work(ioc);
-	mutex_unlock(&ioc->sas_discovery_mutex);
-	kfree(ev);
-}
 
 static struct mptsas_phyinfo *
 mptsas_find_phyinfo_by_sas_address(MPT_ADAPTER *ioc, u64 sas_address)
@@ -2429,69 +3786,80 @@
 	return phy_info;
 }
 
+/**
+ *	mptsas_find_phyinfo_by_phys_disk_num -
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@phys_disk_num:
+ *	@channel:
+ *	@id:
+ *
+ **/
 static struct mptsas_phyinfo *
-mptsas_find_phyinfo_by_target(MPT_ADAPTER *ioc, u8 channel, u8 id)
+mptsas_find_phyinfo_by_phys_disk_num(MPT_ADAPTER *ioc, u8 phys_disk_num,
+	u8 channel, u8 id)
 {
-	struct mptsas_portinfo *port_info;
 	struct mptsas_phyinfo *phy_info = NULL;
+	struct mptsas_portinfo *port_info;
+	RaidPhysDiskPage1_t *phys_disk = NULL;
+	int num_paths;
+	u64 sas_address = 0;
 	int i;
 
-	mutex_lock(&ioc->sas_topology_mutex);
-	list_for_each_entry(port_info, &ioc->sas_topology, list) {
-		for (i = 0; i < port_info->num_phys; i++) {
-			if (!mptsas_is_end_device(
-				&port_info->phy_info[i].attached))
-				continue;
-			if (port_info->phy_info[i].attached.id != id)
-				continue;
-			if (port_info->phy_info[i].attached.channel != channel)
-				continue;
-			phy_info = &port_info->phy_info[i];
-			break;
+	phy_info = NULL;
+	if (!ioc->raid_data.pIocPg3)
+		return NULL;
+	/* dual port support */
+	num_paths = mpt_raid_phys_disk_get_num_paths(ioc, phys_disk_num);
+	if (!num_paths)
+		goto out;
+	phys_disk = kzalloc(offsetof(RaidPhysDiskPage1_t, Path) +
+	   (num_paths * sizeof(RAID_PHYS_DISK1_PATH)), GFP_KERNEL);
+	if (!phys_disk)
+		goto out;
+	mpt_raid_phys_disk_pg1(ioc, phys_disk_num, phys_disk);
+	for (i = 0; i < num_paths; i++) {
+		if ((phys_disk->Path[i].Flags & 1) != 0)
+			/* entry no longer valid */
+			continue;
+		if ((id == phys_disk->Path[i].PhysDiskID) &&
+		    (channel == phys_disk->Path[i].PhysDiskBus)) {
+			memcpy(&sas_address, &phys_disk->Path[i].WWID,
+				sizeof(u64));
+			phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
+					sas_address);
+			goto out;
 		}
 	}
-	mutex_unlock(&ioc->sas_topology_mutex);
-	return phy_info;
-}
 
-static struct mptsas_phyinfo *
-mptsas_find_phyinfo_by_phys_disk_num(MPT_ADAPTER *ioc, u8 channel, u8 id)
-{
-	struct mptsas_portinfo *port_info;
-	struct mptsas_phyinfo *phy_info = NULL;
-	int i;
+ out:
+	kfree(phys_disk);
+	if (phy_info)
+		return phy_info;
 
+	/*
+	 * Extra code to handle RAID0 case, where the sas_address is not updated
+	 * in phys_disk_page_1 when hotswapped
+	 */
 	mutex_lock(&ioc->sas_topology_mutex);
 	list_for_each_entry(port_info, &ioc->sas_topology, list) {
-		for (i = 0; i < port_info->num_phys; i++) {
+		for (i = 0; i < port_info->num_phys && !phy_info; i++) {
 			if (!mptsas_is_end_device(
 				&port_info->phy_info[i].attached))
 				continue;
 			if (port_info->phy_info[i].attached.phys_disk_num == ~0)
 				continue;
-			if (port_info->phy_info[i].attached.phys_disk_num != id)
-				continue;
-			if (port_info->phy_info[i].attached.channel != channel)
-				continue;
-			phy_info = &port_info->phy_info[i];
-			break;
+			if ((port_info->phy_info[i].attached.phys_disk_num ==
+			    phys_disk_num) &&
+			    (port_info->phy_info[i].attached.id == id) &&
+			    (port_info->phy_info[i].attached.channel ==
+			     channel))
+				phy_info = &port_info->phy_info[i];
 		}
 	}
 	mutex_unlock(&ioc->sas_topology_mutex);
 	return phy_info;
 }
 
-/*
- * Work queue thread to clear the persitency table
- */
-static void
-mptsas_persist_clear_table(struct work_struct *work)
-{
-	MPT_ADAPTER *ioc = container_of(work, MPT_ADAPTER, sas_persist_task);
-
-	mptbase_sas_persist_operation(ioc, MPI_SAS_OP_CLEAR_NOT_PRESENT);
-}
-
 static void
 mptsas_reprobe_lun(struct scsi_device *sdev, void *data)
 {
@@ -2517,7 +3885,8 @@
 	pRaidVolumePage0_t		buffer = NULL;
 	RaidPhysDiskPage0_t 		phys_disk;
 	int				i;
-	struct mptsas_hotplug_event 	*ev;
+	struct mptsas_phyinfo	*phy_info;
+	struct mptsas_devinfo		sas_device;
 
 	memset(&cfg, 0 , sizeof(CONFIGPARMS));
 	memset(&hdr, 0 , sizeof(ConfigPageHeader_t));
@@ -2557,20 +3926,16 @@
 		    buffer->PhysDisk[i].PhysDiskNum, &phys_disk) != 0)
 			continue;
 
-		ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
-		if (!ev) {
-			printk(MYIOC_s_WARN_FMT "mptsas: lost hotplug event\n", ioc->name);
-			goto out;
-		}
+		if (mptsas_sas_device_pg0(ioc, &sas_device,
+		    (MPI_SAS_DEVICE_PGAD_FORM_BUS_TARGET_ID <<
+		     MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+			(phys_disk.PhysDiskBus << 8) +
+			phys_disk.PhysDiskID))
+			continue;
 
-		INIT_WORK(&ev->work, mptsas_hotplug_work);
-		ev->ioc = ioc;
-		ev->id = phys_disk.PhysDiskID;
-		ev->channel = phys_disk.PhysDiskBus;
-		ev->phys_disk_num_valid = 1;
-		ev->phys_disk_num = phys_disk.PhysDiskNum;
-		ev->event_type = MPTSAS_ADD_DEVICE;
-		schedule_work(&ev->work);
+		phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
+		    sas_device.sas_address);
+		mptsas_add_end_device(ioc, phy_info);
 	}
 
  out:
@@ -2582,417 +3947,386 @@
  * Work queue thread to handle SAS hotplug events
  */
 static void
-mptsas_hotplug_work(struct work_struct *work)
+mptsas_hotplug_work(MPT_ADAPTER *ioc, struct fw_event_work *fw_event,
+    struct mptsas_hotplug_event *hot_plug_info)
 {
-	struct mptsas_hotplug_event *ev =
-		container_of(work, struct mptsas_hotplug_event, work);
-
-	MPT_ADAPTER *ioc = ev->ioc;
 	struct mptsas_phyinfo *phy_info;
-	struct sas_rphy *rphy;
-	struct sas_port *port;
-	struct scsi_device *sdev;
 	struct scsi_target * starget;
-	struct sas_identify identify;
-	char *ds = NULL;
 	struct mptsas_devinfo sas_device;
 	VirtTarget *vtarget;
-	VirtDevice *vdevice;
+	int i;
 
-	mutex_lock(&ioc->sas_discovery_mutex);
-	switch (ev->event_type) {
-	case MPTSAS_DEL_DEVICE:
+	switch (hot_plug_info->event_type) {
 
-		phy_info = NULL;
-		if (ev->phys_disk_num_valid) {
-			if (ev->hidden_raid_component){
-				if (mptsas_sas_device_pg0(ioc, &sas_device,
-				    (MPI_SAS_DEVICE_PGAD_FORM_BUS_TARGET_ID <<
-				     MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
-				    (ev->channel << 8) + ev->id)) {
-					dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-					"%s: exit at line=%d\n", ioc->name,
-						__func__, __LINE__));
-					break;
-				}
-				phy_info = mptsas_find_phyinfo_by_sas_address(
-				    ioc, sas_device.sas_address);
-			}else
-				phy_info = mptsas_find_phyinfo_by_phys_disk_num(
-				    ioc, ev->channel, ev->phys_disk_num);
-		}
+	case MPTSAS_ADD_PHYSDISK:
 
-		if (!phy_info)
-			phy_info = mptsas_find_phyinfo_by_target(ioc,
-			    ev->channel, ev->id);
-
-		/*
-		 * Sanity checks, for non-existing phys and remote rphys.
-		 */
-		if (!phy_info){
-			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-				__func__, __LINE__));
+		if (!ioc->raid_data.pIocPg2)
 			break;
-		}
-		if (!phy_info->port_details) {
-			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-			       	__func__, __LINE__));
-			break;
-		}
-		rphy = mptsas_get_rphy(phy_info);
-		if (!rphy) {
-			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-			       	__func__, __LINE__));
-			break;
-		}
 
-		port = mptsas_get_port(phy_info);
-		if (!port) {
-			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-			       	__func__, __LINE__));
-			break;
-		}
-
-		starget = mptsas_get_starget(phy_info);
-		if (starget) {
-			vtarget = starget->hostdata;
-
-			if (!vtarget) {
-				dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-					"%s: exit at line=%d\n", ioc->name,
-					__func__, __LINE__));
-				break;
-			}
-
-			/*
-			 * Handling  RAID components
-			 */
-			if (ev->phys_disk_num_valid &&
-			    ev->hidden_raid_component) {
-				printk(MYIOC_s_INFO_FMT
-				    "RAID Hidding: channel=%d, id=%d, "
-				    "physdsk %d \n", ioc->name, ev->channel,
-				    ev->id, ev->phys_disk_num);
-				vtarget->id = ev->phys_disk_num;
-				vtarget->tflags |=
-				    MPT_TARGET_FLAGS_RAID_COMPONENT;
-				mptsas_reprobe_target(starget, 1);
-				phy_info->attached.phys_disk_num =
-				    ev->phys_disk_num;
-			break;
+		for (i = 0; i < ioc->raid_data.pIocPg2->NumActiveVolumes; i++) {
+			if (ioc->raid_data.pIocPg2->RaidVolume[i].VolumeID ==
+			    hot_plug_info->id) {
+				printk(MYIOC_s_WARN_FMT "firmware bug: unable "
+				    "to add hidden disk - target_id matchs "
+				    "volume_id\n", ioc->name);
+				mptsas_free_fw_event(ioc, fw_event);
+				return;
 			}
 		}
+		mpt_findImVolumes(ioc);
 
-		if (phy_info->attached.device_info &
-		    MPI_SAS_DEVICE_INFO_SSP_TARGET)
-			ds = "ssp";
-		if (phy_info->attached.device_info &
-		    MPI_SAS_DEVICE_INFO_STP_TARGET)
-			ds = "stp";
-		if (phy_info->attached.device_info &
-		    MPI_SAS_DEVICE_INFO_SATA_DEVICE)
-			ds = "sata";
-
-		printk(MYIOC_s_INFO_FMT
-		       "removing %s device, channel %d, id %d, phy %d\n",
-		       ioc->name, ds, ev->channel, ev->id, phy_info->phy_id);
-		dev_printk(KERN_DEBUG, &port->dev, MYIOC_s_FMT
-		    "delete port (%d)\n", ioc->name, port->port_identifier);
-		sas_port_delete(port);
-		mptsas_port_delete(ioc, phy_info->port_details);
-		break;
 	case MPTSAS_ADD_DEVICE:
+		memset(&sas_device, 0, sizeof(struct mptsas_devinfo));
+		mptsas_sas_device_pg0(ioc, &sas_device,
+		    (MPI_SAS_DEVICE_PGAD_FORM_BUS_TARGET_ID <<
+		    MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+		    (hot_plug_info->channel << 8) +
+		    hot_plug_info->id);
 
-		if (ev->phys_disk_num_valid)
-			mpt_findImVolumes(ioc);
+		if (!sas_device.handle)
+			return;
 
-		/*
-		 * Refresh sas device pg0 data
-		 */
+		phy_info = mptsas_refreshing_device_handles(ioc, &sas_device);
+		if (!phy_info)
+			break;
+
+		if (mptsas_get_rphy(phy_info))
+			break;
+
+		mptsas_add_end_device(ioc, phy_info);
+		break;
+
+	case MPTSAS_DEL_DEVICE:
+		phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
+		    hot_plug_info->sas_address);
+		mptsas_del_end_device(ioc, phy_info);
+		break;
+
+	case MPTSAS_DEL_PHYSDISK:
+
+		mpt_findImVolumes(ioc);
+
+		phy_info = mptsas_find_phyinfo_by_phys_disk_num(
+				ioc, hot_plug_info->phys_disk_num,
+				hot_plug_info->channel,
+				hot_plug_info->id);
+		mptsas_del_end_device(ioc, phy_info);
+		break;
+
+	case MPTSAS_ADD_PHYSDISK_REPROBE:
+
 		if (mptsas_sas_device_pg0(ioc, &sas_device,
 		    (MPI_SAS_DEVICE_PGAD_FORM_BUS_TARGET_ID <<
 		     MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
-			(ev->channel << 8) + ev->id)) {
-				dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-					"%s: exit at line=%d\n", ioc->name,
-					__func__, __LINE__));
+		    (hot_plug_info->channel << 8) + hot_plug_info->id)) {
+			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: fw_id=%d exit at line=%d\n", ioc->name,
+				 __func__, hot_plug_info->id, __LINE__));
 			break;
 		}
 
-		__mptsas_discovery_work(ioc);
+		phy_info = mptsas_find_phyinfo_by_sas_address(
+		    ioc, sas_device.sas_address);
 
-		phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
-				sas_device.sas_address);
-
-		if (!phy_info || !phy_info->port_details) {
+		if (!phy_info) {
 			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-			       	__func__, __LINE__));
+				"%s: fw_id=%d exit at line=%d\n", ioc->name,
+				 __func__, hot_plug_info->id, __LINE__));
 			break;
 		}
 
 		starget = mptsas_get_starget(phy_info);
-		if (starget && (!ev->hidden_raid_component)){
+		if (!starget) {
+			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+				"%s: fw_id=%d exit at line=%d\n", ioc->name,
+				 __func__, hot_plug_info->id, __LINE__));
+			break;
+		}
 
-			vtarget = starget->hostdata;
+		vtarget = starget->hostdata;
+		if (!vtarget) {
+			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+				"%s: fw_id=%d exit at line=%d\n", ioc->name,
+				 __func__, hot_plug_info->id, __LINE__));
+			break;
+		}
 
-			if (!vtarget) {
+		mpt_findImVolumes(ioc);
+
+		starget_printk(KERN_INFO, starget, MYIOC_s_FMT "RAID Hidding: "
+		    "fw_channel=%d, fw_id=%d, physdsk %d, sas_addr 0x%llx\n",
+		    ioc->name, hot_plug_info->channel, hot_plug_info->id,
+		    hot_plug_info->phys_disk_num, (unsigned long long)
+		    sas_device.sas_address);
+
+		vtarget->id = hot_plug_info->phys_disk_num;
+		vtarget->tflags |= MPT_TARGET_FLAGS_RAID_COMPONENT;
+		phy_info->attached.phys_disk_num = hot_plug_info->phys_disk_num;
+		mptsas_reprobe_target(starget, 1);
+		break;
+
+	case MPTSAS_DEL_PHYSDISK_REPROBE:
+
+		if (mptsas_sas_device_pg0(ioc, &sas_device,
+		    (MPI_SAS_DEVICE_PGAD_FORM_BUS_TARGET_ID <<
+		     MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+			(hot_plug_info->channel << 8) + hot_plug_info->id)) {
 				dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				    "%s: exit at line=%d\n", ioc->name,
-				    __func__, __LINE__));
-				break;
-			}
-			/*
-			 * Handling  RAID components
-			 */
-			if (vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT) {
-				printk(MYIOC_s_INFO_FMT
-				    "RAID Exposing: channel=%d, id=%d, "
-				    "physdsk %d \n", ioc->name, ev->channel,
-				    ev->id, ev->phys_disk_num);
-				vtarget->tflags &=
-				    ~MPT_TARGET_FLAGS_RAID_COMPONENT;
-				vtarget->id = ev->id;
-				mptsas_reprobe_target(starget, 0);
-				phy_info->attached.phys_disk_num = ~0;
-			}
+				    "%s: fw_id=%d exit at line=%d\n",
+				    ioc->name, __func__,
+				    hot_plug_info->id, __LINE__));
 			break;
 		}
 
-		if (mptsas_get_rphy(phy_info)) {
+		phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
+				sas_device.sas_address);
+		if (!phy_info) {
 			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-			       	__func__, __LINE__));
-			if (ev->channel) printk("%d\n", __LINE__);
+			    "%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, hot_plug_info->id, __LINE__));
 			break;
 		}
 
-		port = mptsas_get_port(phy_info);
-		if (!port) {
+		starget = mptsas_get_starget(phy_info);
+		if (!starget) {
 			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-			       	__func__, __LINE__));
+			    "%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, hot_plug_info->id, __LINE__));
 			break;
 		}
-		memcpy(&phy_info->attached, &sas_device,
-		    sizeof(struct mptsas_devinfo));
 
-		if (phy_info->attached.device_info &
-		    MPI_SAS_DEVICE_INFO_SSP_TARGET)
-			ds = "ssp";
-		if (phy_info->attached.device_info &
-		    MPI_SAS_DEVICE_INFO_STP_TARGET)
-			ds = "stp";
-		if (phy_info->attached.device_info &
-		    MPI_SAS_DEVICE_INFO_SATA_DEVICE)
-			ds = "sata";
-
-		printk(MYIOC_s_INFO_FMT
-		       "attaching %s device, channel %d, id %d, phy %d\n",
-		       ioc->name, ds, ev->channel, ev->id, ev->phy_id);
-
-		mptsas_parse_device_info(&identify, &phy_info->attached);
-		rphy = sas_end_device_alloc(port);
-		if (!rphy) {
+		vtarget = starget->hostdata;
+		if (!vtarget) {
 			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-			       	__func__, __LINE__));
-			break; /* non-fatal: an rphy can be added later */
-		}
-
-		rphy->identify = identify;
-		if (sas_rphy_add(rphy)) {
-			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-			       	__func__, __LINE__));
-			sas_rphy_free(rphy);
+			    "%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, hot_plug_info->id, __LINE__));
 			break;
 		}
-		mptsas_set_rphy(ioc, phy_info, rphy);
+
+		if (!(vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT)) {
+			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			    "%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, hot_plug_info->id, __LINE__));
+			break;
+		}
+
+		mpt_findImVolumes(ioc);
+
+		starget_printk(KERN_INFO, starget, MYIOC_s_FMT "RAID Exposing:"
+		    " fw_channel=%d, fw_id=%d, physdsk %d, sas_addr 0x%llx\n",
+		    ioc->name, hot_plug_info->channel, hot_plug_info->id,
+		    hot_plug_info->phys_disk_num, (unsigned long long)
+		    sas_device.sas_address);
+
+		vtarget->tflags &= ~MPT_TARGET_FLAGS_RAID_COMPONENT;
+		vtarget->id = hot_plug_info->id;
+		phy_info->attached.phys_disk_num = ~0;
+		mptsas_reprobe_target(starget, 0);
+		mptsas_add_device_component_by_fw(ioc,
+		    hot_plug_info->channel, hot_plug_info->id);
 		break;
+
 	case MPTSAS_ADD_RAID:
-		sdev = scsi_device_lookup(ioc->sh, MPTSAS_RAID_CHANNEL,
-		    ev->id, 0);
-		if (sdev) {
-			scsi_device_put(sdev);
-			break;
-		}
-		printk(MYIOC_s_INFO_FMT
-		       "attaching raid volume, channel %d, id %d\n",
-		       ioc->name, MPTSAS_RAID_CHANNEL, ev->id);
-		scsi_add_device(ioc->sh, MPTSAS_RAID_CHANNEL, ev->id, 0);
+
 		mpt_findImVolumes(ioc);
+		printk(MYIOC_s_INFO_FMT "attaching raid volume, channel %d, "
+		    "id %d\n", ioc->name, MPTSAS_RAID_CHANNEL,
+		    hot_plug_info->id);
+		scsi_add_device(ioc->sh, MPTSAS_RAID_CHANNEL,
+		    hot_plug_info->id, 0);
 		break;
+
 	case MPTSAS_DEL_RAID:
-		sdev = scsi_device_lookup(ioc->sh, MPTSAS_RAID_CHANNEL,
-		    ev->id, 0);
-		if (!sdev)
-			break;
-		printk(MYIOC_s_INFO_FMT
-		       "removing raid volume, channel %d, id %d\n",
-		       ioc->name, MPTSAS_RAID_CHANNEL, ev->id);
-		vdevice = sdev->hostdata;
-		scsi_remove_device(sdev);
-		scsi_device_put(sdev);
+
 		mpt_findImVolumes(ioc);
+		printk(MYIOC_s_INFO_FMT "removing raid volume, channel %d, "
+		    "id %d\n", ioc->name, MPTSAS_RAID_CHANNEL,
+		    hot_plug_info->id);
+		scsi_remove_device(hot_plug_info->sdev);
+		scsi_device_put(hot_plug_info->sdev);
 		break;
+
 	case MPTSAS_ADD_INACTIVE_VOLUME:
+
+		mpt_findImVolumes(ioc);
 		mptsas_adding_inactive_raid_components(ioc,
-		    ev->channel, ev->id);
+		    hot_plug_info->channel, hot_plug_info->id);
 		break;
-	case MPTSAS_IGNORE_EVENT:
+
 	default:
 		break;
 	}
 
-	mutex_unlock(&ioc->sas_discovery_mutex);
-	kfree(ev);
+	mptsas_free_fw_event(ioc, fw_event);
 }
 
 static void
-mptsas_send_sas_event(MPT_ADAPTER *ioc,
-		EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *sas_event_data)
+mptsas_send_sas_event(struct fw_event_work *fw_event)
 {
-	struct mptsas_hotplug_event *ev;
-	u32 device_info = le32_to_cpu(sas_event_data->DeviceInfo);
-	__le64 sas_address;
+	MPT_ADAPTER *ioc;
+	struct mptsas_hotplug_event hot_plug_info;
+	EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *sas_event_data;
+	u32 device_info;
+	u64 sas_address;
+
+	ioc = fw_event->ioc;
+	sas_event_data = (EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *)
+	    fw_event->event_data;
+	device_info = le32_to_cpu(sas_event_data->DeviceInfo);
 
 	if ((device_info &
-	     (MPI_SAS_DEVICE_INFO_SSP_TARGET |
-	      MPI_SAS_DEVICE_INFO_STP_TARGET |
-	      MPI_SAS_DEVICE_INFO_SATA_DEVICE )) == 0)
+		(MPI_SAS_DEVICE_INFO_SSP_TARGET |
+		MPI_SAS_DEVICE_INFO_STP_TARGET |
+		MPI_SAS_DEVICE_INFO_SATA_DEVICE)) == 0) {
+		mptsas_free_fw_event(ioc, fw_event);
 		return;
+	}
+
+	if (sas_event_data->ReasonCode ==
+		MPI_EVENT_SAS_DEV_STAT_RC_NO_PERSIST_ADDED) {
+		mptbase_sas_persist_operation(ioc,
+		MPI_SAS_OP_CLEAR_NOT_PRESENT);
+		mptsas_free_fw_event(ioc, fw_event);
+		return;
+	}
 
 	switch (sas_event_data->ReasonCode) {
 	case MPI_EVENT_SAS_DEV_STAT_RC_NOT_RESPONDING:
-
-		mptsas_target_reset_queue(ioc, sas_event_data);
-		break;
-
 	case MPI_EVENT_SAS_DEV_STAT_RC_ADDED:
-		ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
-		if (!ev) {
-			printk(MYIOC_s_WARN_FMT "lost hotplug event\n", ioc->name);
-			break;
-		}
-
-		INIT_WORK(&ev->work, mptsas_hotplug_work);
-		ev->ioc = ioc;
-		ev->handle = le16_to_cpu(sas_event_data->DevHandle);
-		ev->parent_handle =
-		    le16_to_cpu(sas_event_data->ParentDevHandle);
-		ev->channel = sas_event_data->Bus;
-		ev->id = sas_event_data->TargetID;
-		ev->phy_id = sas_event_data->PhyNum;
+		memset(&hot_plug_info, 0, sizeof(struct mptsas_hotplug_event));
+		hot_plug_info.handle = le16_to_cpu(sas_event_data->DevHandle);
+		hot_plug_info.channel = sas_event_data->Bus;
+		hot_plug_info.id = sas_event_data->TargetID;
+		hot_plug_info.phy_id = sas_event_data->PhyNum;
 		memcpy(&sas_address, &sas_event_data->SASAddress,
-		    sizeof(__le64));
-		ev->sas_address = le64_to_cpu(sas_address);
-		ev->device_info = device_info;
-
+		    sizeof(u64));
+		hot_plug_info.sas_address = le64_to_cpu(sas_address);
+		hot_plug_info.device_info = device_info;
 		if (sas_event_data->ReasonCode &
 		    MPI_EVENT_SAS_DEV_STAT_RC_ADDED)
-			ev->event_type = MPTSAS_ADD_DEVICE;
+			hot_plug_info.event_type = MPTSAS_ADD_DEVICE;
 		else
-			ev->event_type = MPTSAS_DEL_DEVICE;
-		schedule_work(&ev->work);
+			hot_plug_info.event_type = MPTSAS_DEL_DEVICE;
+		mptsas_hotplug_work(ioc, fw_event, &hot_plug_info);
 		break;
+
 	case MPI_EVENT_SAS_DEV_STAT_RC_NO_PERSIST_ADDED:
-	/*
-	 * Persistent table is full.
-	 */
-		INIT_WORK(&ioc->sas_persist_task,
-		    mptsas_persist_clear_table);
-		schedule_work(&ioc->sas_persist_task);
+		mptbase_sas_persist_operation(ioc,
+		    MPI_SAS_OP_CLEAR_NOT_PRESENT);
+		mptsas_free_fw_event(ioc, fw_event);
 		break;
-	/*
-	 * TODO, handle other events
-	 */
+
 	case MPI_EVENT_SAS_DEV_STAT_RC_SMART_DATA:
-	case MPI_EVENT_SAS_DEV_STAT_RC_UNSUPPORTED:
+	/* TODO */
 	case MPI_EVENT_SAS_DEV_STAT_RC_INTERNAL_DEVICE_RESET:
-	case MPI_EVENT_SAS_DEV_STAT_RC_TASK_ABORT_INTERNAL:
-	case MPI_EVENT_SAS_DEV_STAT_RC_ABORT_TASK_SET_INTERNAL:
-	case MPI_EVENT_SAS_DEV_STAT_RC_CLEAR_TASK_SET_INTERNAL:
-	case MPI_EVENT_SAS_DEV_STAT_RC_QUERY_TASK_INTERNAL:
+	/* TODO */
 	default:
+		mptsas_free_fw_event(ioc, fw_event);
 		break;
 	}
 }
+
 static void
-mptsas_send_raid_event(MPT_ADAPTER *ioc,
-		EVENT_DATA_RAID *raid_event_data)
+mptsas_send_raid_event(struct fw_event_work *fw_event)
 {
-	struct mptsas_hotplug_event *ev;
-	int status = le32_to_cpu(raid_event_data->SettingsStatus);
-	int state = (status >> 8) & 0xff;
+	MPT_ADAPTER *ioc;
+	EVENT_DATA_RAID *raid_event_data;
+	struct mptsas_hotplug_event hot_plug_info;
+	int status;
+	int state;
+	struct scsi_device *sdev = NULL;
+	VirtDevice *vdevice = NULL;
+	RaidPhysDiskPage0_t phys_disk;
 
-	if (ioc->bus_type != SAS)
-		return;
+	ioc = fw_event->ioc;
+	raid_event_data = (EVENT_DATA_RAID *)fw_event->event_data;
+	status = le32_to_cpu(raid_event_data->SettingsStatus);
+	state = (status >> 8) & 0xff;
 
-	ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
-	if (!ev) {
-		printk(MYIOC_s_WARN_FMT "lost hotplug event\n", ioc->name);
-		return;
+	memset(&hot_plug_info, 0, sizeof(struct mptsas_hotplug_event));
+	hot_plug_info.id = raid_event_data->VolumeID;
+	hot_plug_info.channel = raid_event_data->VolumeBus;
+	hot_plug_info.phys_disk_num = raid_event_data->PhysDiskNum;
+
+	if (raid_event_data->ReasonCode == MPI_EVENT_RAID_RC_VOLUME_DELETED ||
+	    raid_event_data->ReasonCode == MPI_EVENT_RAID_RC_VOLUME_CREATED ||
+	    raid_event_data->ReasonCode ==
+	    MPI_EVENT_RAID_RC_VOLUME_STATUS_CHANGED) {
+		sdev = scsi_device_lookup(ioc->sh, MPTSAS_RAID_CHANNEL,
+		    hot_plug_info.id, 0);
+		hot_plug_info.sdev = sdev;
+		if (sdev)
+			vdevice = sdev->hostdata;
 	}
 
-	INIT_WORK(&ev->work, mptsas_hotplug_work);
-	ev->ioc = ioc;
-	ev->id = raid_event_data->VolumeID;
-	ev->channel = raid_event_data->VolumeBus;
-	ev->event_type = MPTSAS_IGNORE_EVENT;
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Entering %s: "
+	    "ReasonCode=%02x\n", ioc->name, __func__,
+	    raid_event_data->ReasonCode));
 
 	switch (raid_event_data->ReasonCode) {
 	case MPI_EVENT_RAID_RC_PHYSDISK_DELETED:
-		ev->phys_disk_num_valid = 1;
-		ev->phys_disk_num = raid_event_data->PhysDiskNum;
-		ev->event_type = MPTSAS_ADD_DEVICE;
+		hot_plug_info.event_type = MPTSAS_DEL_PHYSDISK_REPROBE;
 		break;
 	case MPI_EVENT_RAID_RC_PHYSDISK_CREATED:
-		ev->phys_disk_num_valid = 1;
-		ev->phys_disk_num = raid_event_data->PhysDiskNum;
-		ev->hidden_raid_component = 1;
-		ev->event_type = MPTSAS_DEL_DEVICE;
+		hot_plug_info.event_type = MPTSAS_ADD_PHYSDISK_REPROBE;
 		break;
 	case MPI_EVENT_RAID_RC_PHYSDISK_STATUS_CHANGED:
 		switch (state) {
 		case MPI_PD_STATE_ONLINE:
 		case MPI_PD_STATE_NOT_COMPATIBLE:
-			ev->phys_disk_num_valid = 1;
-			ev->phys_disk_num = raid_event_data->PhysDiskNum;
-			ev->hidden_raid_component = 1;
-			ev->event_type = MPTSAS_ADD_DEVICE;
+			mpt_raid_phys_disk_pg0(ioc,
+			    raid_event_data->PhysDiskNum, &phys_disk);
+			hot_plug_info.id = phys_disk.PhysDiskID;
+			hot_plug_info.channel = phys_disk.PhysDiskBus;
+			hot_plug_info.event_type = MPTSAS_ADD_PHYSDISK;
 			break;
+		case MPI_PD_STATE_FAILED:
 		case MPI_PD_STATE_MISSING:
 		case MPI_PD_STATE_OFFLINE_AT_HOST_REQUEST:
 		case MPI_PD_STATE_FAILED_AT_HOST_REQUEST:
 		case MPI_PD_STATE_OFFLINE_FOR_ANOTHER_REASON:
-			ev->phys_disk_num_valid = 1;
-			ev->phys_disk_num = raid_event_data->PhysDiskNum;
-			ev->event_type = MPTSAS_DEL_DEVICE;
+			hot_plug_info.event_type = MPTSAS_DEL_PHYSDISK;
 			break;
 		default:
 			break;
 		}
 		break;
 	case MPI_EVENT_RAID_RC_VOLUME_DELETED:
-		ev->event_type = MPTSAS_DEL_RAID;
+		if (!sdev)
+			break;
+		vdevice->vtarget->deleted = 1; /* block IO */
+		hot_plug_info.event_type = MPTSAS_DEL_RAID;
 		break;
 	case MPI_EVENT_RAID_RC_VOLUME_CREATED:
-		ev->event_type = MPTSAS_ADD_RAID;
+		if (sdev) {
+			scsi_device_put(sdev);
+			break;
+		}
+		hot_plug_info.event_type = MPTSAS_ADD_RAID;
 		break;
 	case MPI_EVENT_RAID_RC_VOLUME_STATUS_CHANGED:
+		if (!(status & MPI_RAIDVOL0_STATUS_FLAG_ENABLED)) {
+			if (!sdev)
+				break;
+			vdevice->vtarget->deleted = 1; /* block IO */
+			hot_plug_info.event_type = MPTSAS_DEL_RAID;
+			break;
+		}
 		switch (state) {
 		case MPI_RAIDVOL0_STATUS_STATE_FAILED:
 		case MPI_RAIDVOL0_STATUS_STATE_MISSING:
-			ev->event_type = MPTSAS_DEL_RAID;
+			if (!sdev)
+				break;
+			vdevice->vtarget->deleted = 1; /* block IO */
+			hot_plug_info.event_type = MPTSAS_DEL_RAID;
 			break;
 		case MPI_RAIDVOL0_STATUS_STATE_OPTIMAL:
 		case MPI_RAIDVOL0_STATUS_STATE_DEGRADED:
-			ev->event_type = MPTSAS_ADD_RAID;
+			if (sdev) {
+				scsi_device_put(sdev);
+				break;
+			}
+			hot_plug_info.event_type = MPTSAS_ADD_RAID;
 			break;
 		default:
 			break;
@@ -3001,32 +4335,188 @@
 	default:
 		break;
 	}
-	schedule_work(&ev->work);
+
+	if (hot_plug_info.event_type != MPTSAS_IGNORE_EVENT)
+		mptsas_hotplug_work(ioc, fw_event, &hot_plug_info);
+	else
+		mptsas_free_fw_event(ioc, fw_event);
 }
 
-static void
-mptsas_send_discovery_event(MPT_ADAPTER *ioc,
-	EVENT_DATA_SAS_DISCOVERY *discovery_data)
+/**
+ *	mptsas_issue_tm - send mptsas internal tm request
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@type: Task Management type
+ *	@channel: channel number for task management
+ *	@id: Logical Target ID for reset (if appropriate)
+ *	@lun: Logical unit for reset (if appropriate)
+ *	@task_context: Context for the task to be aborted
+ *	@timeout: timeout for task management control
+ *
+ *	return 0 on success and -1 on failure:
+ *
+ */
+static int
+mptsas_issue_tm(MPT_ADAPTER *ioc, u8 type, u8 channel, u8 id, u64 lun,
+	int task_context, ulong timeout, u8 *issue_reset)
 {
-	struct mptsas_discovery_event *ev;
+	MPT_FRAME_HDR	*mf;
+	SCSITaskMgmt_t	*pScsiTm;
+	int		 retval;
+	unsigned long	 timeleft;
 
-	/*
-	 * DiscoveryStatus
-	 *
-	 * This flag will be non-zero when firmware
-	 * kicks off discovery, and return to zero
-	 * once its completed.
-	 */
-	if (discovery_data->DiscoveryStatus)
-		return;
+	*issue_reset = 0;
+	mf = mpt_get_msg_frame(mptsasDeviceResetCtx, ioc);
+	if (mf == NULL) {
+		retval = -1; /* return failure */
+		dtmprintk(ioc, printk(MYIOC_s_WARN_FMT "TaskMgmt request: no "
+		    "msg frames!!\n", ioc->name));
+		goto out;
+	}
 
-	ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
-	if (!ev)
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt request: mr = %p, "
+	    "task_type = 0x%02X,\n\t timeout = %ld, fw_channel = %d, "
+	    "fw_id = %d, lun = %lld,\n\t task_context = 0x%x\n", ioc->name, mf,
+	     type, timeout, channel, id, (unsigned long long)lun,
+	     task_context));
+
+	pScsiTm = (SCSITaskMgmt_t *) mf;
+	memset(pScsiTm, 0, sizeof(SCSITaskMgmt_t));
+	pScsiTm->Function = MPI_FUNCTION_SCSI_TASK_MGMT;
+	pScsiTm->TaskType = type;
+	pScsiTm->MsgFlags = 0;
+	pScsiTm->TargetID = id;
+	pScsiTm->Bus = channel;
+	pScsiTm->ChainOffset = 0;
+	pScsiTm->Reserved = 0;
+	pScsiTm->Reserved1 = 0;
+	pScsiTm->TaskMsgContext = task_context;
+	int_to_scsilun(lun, (struct scsi_lun *)pScsiTm->LUN);
+
+	INITIALIZE_MGMT_STATUS(ioc->taskmgmt_cmds.status)
+	CLEAR_MGMT_STATUS(ioc->internal_cmds.status)
+	retval = 0;
+	mpt_put_msg_frame_hi_pri(mptsasDeviceResetCtx, ioc, mf);
+
+	/* Now wait for the command to complete */
+	timeleft = wait_for_completion_timeout(&ioc->taskmgmt_cmds.done,
+	    timeout*HZ);
+	if (!(ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		retval = -1; /* return failure */
+		dtmprintk(ioc, printk(MYIOC_s_ERR_FMT
+		    "TaskMgmt request: TIMED OUT!(mr=%p)\n", ioc->name, mf));
+		mpt_free_msg_frame(ioc, mf);
+		if (ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET)
+			goto out;
+		*issue_reset = 1;
+		goto out;
+	}
+
+	if (!(ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_RF_VALID)) {
+		retval = -1; /* return failure */
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "TaskMgmt request: failed with no reply\n", ioc->name));
+		goto out;
+	}
+
+ out:
+	CLEAR_MGMT_STATUS(ioc->taskmgmt_cmds.status)
+	return retval;
+}
+
+/**
+ *	mptsas_broadcast_primative_work - Handle broadcast primitives
+ *	@work: work queue payload containing info describing the event
+ *
+ *	this will be handled in workqueue context.
+ */
+static void
+mptsas_broadcast_primative_work(struct fw_event_work *fw_event)
+{
+	MPT_ADAPTER *ioc = fw_event->ioc;
+	MPT_FRAME_HDR	*mf;
+	VirtDevice	*vdevice;
+	int			ii;
+	struct scsi_cmnd	*sc;
+	SCSITaskMgmtReply_t	*pScsiTmReply;
+	u8			issue_reset;
+	int			task_context;
+	u8			channel, id;
+	int			 lun;
+	u32			 termination_count;
+	u32			 query_count;
+
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "%s - enter\n", ioc->name, __func__));
+
+	mutex_lock(&ioc->taskmgmt_cmds.mutex);
+	if (mpt_set_taskmgmt_in_progress_flag(ioc) != 0) {
+		mutex_unlock(&ioc->taskmgmt_cmds.mutex);
+		mptsas_requeue_fw_event(ioc, fw_event, 1000);
 		return;
-	INIT_WORK(&ev->work, mptsas_discovery_work);
-	ev->ioc = ioc;
-	schedule_work(&ev->work);
-};
+	}
+
+	issue_reset = 0;
+	termination_count = 0;
+	query_count = 0;
+	mpt_findImVolumes(ioc);
+	pScsiTmReply = (SCSITaskMgmtReply_t *) ioc->taskmgmt_cmds.reply;
+
+	for (ii = 0; ii < ioc->req_depth; ii++) {
+		if (ioc->fw_events_off)
+			goto out;
+		sc = mptscsih_get_scsi_lookup(ioc, ii);
+		if (!sc)
+			continue;
+		mf = MPT_INDEX_2_MFPTR(ioc, ii);
+		if (!mf)
+			continue;
+		task_context = mf->u.frame.hwhdr.msgctxu.MsgContext;
+		vdevice = sc->device->hostdata;
+		if (!vdevice || !vdevice->vtarget)
+			continue;
+		if (vdevice->vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT)
+			continue; /* skip hidden raid components */
+		if (vdevice->vtarget->raidVolume)
+			continue; /* skip hidden raid components */
+		channel = vdevice->vtarget->channel;
+		id = vdevice->vtarget->id;
+		lun = vdevice->lun;
+		if (mptsas_issue_tm(ioc, MPI_SCSITASKMGMT_TASKTYPE_QUERY_TASK,
+		    channel, id, (u64)lun, task_context, 30, &issue_reset))
+			goto out;
+		query_count++;
+		termination_count +=
+		    le32_to_cpu(pScsiTmReply->TerminationCount);
+		if ((pScsiTmReply->IOCStatus == MPI_IOCSTATUS_SUCCESS) &&
+		    (pScsiTmReply->ResponseCode ==
+		    MPI_SCSITASKMGMT_RSP_TM_SUCCEEDED ||
+		    pScsiTmReply->ResponseCode ==
+		    MPI_SCSITASKMGMT_RSP_IO_QUEUED_ON_IOC))
+			continue;
+		if (mptsas_issue_tm(ioc,
+		    MPI_SCSITASKMGMT_TASKTYPE_ABRT_TASK_SET,
+		    channel, id, (u64)lun, 0, 30, &issue_reset))
+			goto out;
+		termination_count +=
+		    le32_to_cpu(pScsiTmReply->TerminationCount);
+	}
+
+ out:
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "%s - exit, query_count = %d termination_count = %d\n",
+	    ioc->name, __func__, query_count, termination_count));
+
+	ioc->broadcast_aen_busy = 0;
+	mpt_clear_taskmgmt_in_progress_flag(ioc);
+	mutex_unlock(&ioc->taskmgmt_cmds.mutex);
+
+	if (issue_reset) {
+		printk(MYIOC_s_WARN_FMT "Issuing Reset from %s!!\n",
+		    ioc->name, __func__);
+		mpt_HardResetHandler(ioc, CAN_SLEEP);
+	}
+	mptsas_free_fw_event(ioc, fw_event);
+}
 
 /*
  * mptsas_send_ir2_event - handle exposing hidden disk when
@@ -3037,76 +4527,159 @@
  *
  */
 static void
-mptsas_send_ir2_event(MPT_ADAPTER *ioc, PTR_MPI_EVENT_DATA_IR2 ir2_data)
+mptsas_send_ir2_event(struct fw_event_work *fw_event)
 {
-	struct mptsas_hotplug_event *ev;
+	MPT_ADAPTER	*ioc;
+	struct mptsas_hotplug_event hot_plug_info;
+	MPI_EVENT_DATA_IR2	*ir2_data;
+	u8 reasonCode;
+	RaidPhysDiskPage0_t phys_disk;
 
-	if (ir2_data->ReasonCode !=
-	    MPI_EVENT_IR2_RC_FOREIGN_CFG_DETECTED)
+	ioc = fw_event->ioc;
+	ir2_data = (MPI_EVENT_DATA_IR2 *)fw_event->event_data;
+	reasonCode = ir2_data->ReasonCode;
+
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Entering %s: "
+	    "ReasonCode=%02x\n", ioc->name, __func__, reasonCode));
+
+	memset(&hot_plug_info, 0, sizeof(struct mptsas_hotplug_event));
+	hot_plug_info.id = ir2_data->TargetID;
+	hot_plug_info.channel = ir2_data->Bus;
+	switch (reasonCode) {
+	case MPI_EVENT_IR2_RC_FOREIGN_CFG_DETECTED:
+		hot_plug_info.event_type = MPTSAS_ADD_INACTIVE_VOLUME;
+		break;
+	case MPI_EVENT_IR2_RC_DUAL_PORT_REMOVED:
+		hot_plug_info.phys_disk_num = ir2_data->PhysDiskNum;
+		hot_plug_info.event_type = MPTSAS_DEL_PHYSDISK;
+		break;
+	case MPI_EVENT_IR2_RC_DUAL_PORT_ADDED:
+		hot_plug_info.phys_disk_num = ir2_data->PhysDiskNum;
+		mpt_raid_phys_disk_pg0(ioc,
+		    ir2_data->PhysDiskNum, &phys_disk);
+		hot_plug_info.id = phys_disk.PhysDiskID;
+		hot_plug_info.event_type = MPTSAS_ADD_PHYSDISK;
+		break;
+	default:
+		mptsas_free_fw_event(ioc, fw_event);
 		return;
-
-	ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
-	if (!ev)
-		return;
-
-	INIT_WORK(&ev->work, mptsas_hotplug_work);
-	ev->ioc = ioc;
-	ev->id = ir2_data->TargetID;
-	ev->channel = ir2_data->Bus;
-	ev->event_type = MPTSAS_ADD_INACTIVE_VOLUME;
-
-	schedule_work(&ev->work);
-};
+	}
+	mptsas_hotplug_work(ioc, fw_event, &hot_plug_info);
+}
 
 static int
 mptsas_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *reply)
 {
-	int rc=1;
-	u8 event = le32_to_cpu(reply->Event) & 0xFF;
+	u32 event = le32_to_cpu(reply->Event);
+	int sz, event_data_sz;
+	struct fw_event_work *fw_event;
+	unsigned long delay;
 
-	if (!ioc->sh)
-		goto out;
+	/* events turned off due to host reset or driver unloading */
+	if (ioc->fw_events_off)
+		return 0;
 
-	/*
-	 * sas_discovery_ignore_events
-	 *
-	 * This flag is to prevent anymore processing of
-	 * sas events once mptsas_remove function is called.
-	 */
-	if (ioc->sas_discovery_ignore_events) {
-		rc = mptscsih_event_process(ioc, reply);
-		goto out;
-	}
-
+	delay = msecs_to_jiffies(1);
 	switch (event) {
+	case MPI_EVENT_SAS_BROADCAST_PRIMITIVE:
+	{
+		EVENT_DATA_SAS_BROADCAST_PRIMITIVE *broadcast_event_data =
+		    (EVENT_DATA_SAS_BROADCAST_PRIMITIVE *)reply->Data;
+		if (broadcast_event_data->Primitive !=
+		    MPI_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT)
+			return 0;
+		if (ioc->broadcast_aen_busy)
+			return 0;
+		ioc->broadcast_aen_busy = 1;
+		break;
+	}
 	case MPI_EVENT_SAS_DEVICE_STATUS_CHANGE:
-		mptsas_send_sas_event(ioc,
-			(EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *)reply->Data);
+	{
+		EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *sas_event_data =
+		    (EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *)reply->Data;
+
+		if (sas_event_data->ReasonCode ==
+		    MPI_EVENT_SAS_DEV_STAT_RC_NOT_RESPONDING) {
+			mptsas_target_reset_queue(ioc, sas_event_data);
+			return 0;
+		}
 		break;
+	}
+	case MPI_EVENT_SAS_EXPANDER_STATUS_CHANGE:
+	{
+		MpiEventDataSasExpanderStatusChange_t *expander_data =
+		    (MpiEventDataSasExpanderStatusChange_t *)reply->Data;
+
+		if (ioc->old_sas_discovery_protocal)
+			return 0;
+
+		if (expander_data->ReasonCode ==
+		    MPI_EVENT_SAS_EXP_RC_NOT_RESPONDING &&
+		    ioc->device_missing_delay)
+			delay = HZ * ioc->device_missing_delay;
+		break;
+	}
+	case MPI_EVENT_SAS_DISCOVERY:
+	{
+		u32 discovery_status;
+		EventDataSasDiscovery_t *discovery_data =
+		    (EventDataSasDiscovery_t *)reply->Data;
+
+		discovery_status = le32_to_cpu(discovery_data->DiscoveryStatus);
+		ioc->sas_discovery_quiesce_io = discovery_status ? 1 : 0;
+		if (ioc->old_sas_discovery_protocal && !discovery_status)
+			mptsas_queue_rescan(ioc);
+		return 0;
+	}
 	case MPI_EVENT_INTEGRATED_RAID:
-		mptsas_send_raid_event(ioc,
-			(EVENT_DATA_RAID *)reply->Data);
-		break;
 	case MPI_EVENT_PERSISTENT_TABLE_FULL:
-		INIT_WORK(&ioc->sas_persist_task,
-		    mptsas_persist_clear_table);
-		schedule_work(&ioc->sas_persist_task);
-		break;
-	 case MPI_EVENT_SAS_DISCOVERY:
-		mptsas_send_discovery_event(ioc,
-			(EVENT_DATA_SAS_DISCOVERY *)reply->Data);
-		break;
 	case MPI_EVENT_IR2:
-		mptsas_send_ir2_event(ioc,
-		    (PTR_MPI_EVENT_DATA_IR2)reply->Data);
+	case MPI_EVENT_SAS_PHY_LINK_STATUS:
+	case MPI_EVENT_QUEUE_FULL:
 		break;
 	default:
-		rc = mptscsih_event_process(ioc, reply);
-		break;
+		return 0;
 	}
- out:
 
-	return rc;
+	event_data_sz = ((reply->MsgLength * 4) -
+	    offsetof(EventNotificationReply_t, Data));
+	sz = offsetof(struct fw_event_work, event_data) + event_data_sz;
+	fw_event = kzalloc(sz, GFP_ATOMIC);
+	if (!fw_event) {
+		printk(MYIOC_s_WARN_FMT "%s: failed at (line=%d)\n", ioc->name,
+		 __func__, __LINE__);
+		return 0;
+	}
+	memcpy(fw_event->event_data, reply->Data, event_data_sz);
+	fw_event->event = event;
+	fw_event->ioc = ioc;
+	mptsas_add_fw_event(ioc, fw_event, delay);
+	return 0;
+}
+
+/* Delete a volume when no longer listed in ioc pg2
+ */
+static void mptsas_volume_delete(MPT_ADAPTER *ioc, u8 id)
+{
+	struct scsi_device *sdev;
+	int i;
+
+	sdev = scsi_device_lookup(ioc->sh, MPTSAS_RAID_CHANNEL, id, 0);
+	if (!sdev)
+		return;
+	if (!ioc->raid_data.pIocPg2)
+		goto out;
+	if (!ioc->raid_data.pIocPg2->NumActiveVolumes)
+		goto out;
+	for (i = 0; i < ioc->raid_data.pIocPg2->NumActiveVolumes; i++)
+		if (ioc->raid_data.pIocPg2->RaidVolume[i].VolumeID == id)
+			goto release_sdev;
+ out:
+	printk(MYIOC_s_INFO_FMT "removing raid volume, channel %d, "
+	    "id %d\n", ioc->name, MPTSAS_RAID_CHANNEL, id);
+	scsi_remove_device(sdev);
+ release_sdev:
+	scsi_device_put(sdev);
 }
 
 static int
@@ -3128,6 +4701,7 @@
 		return r;
 
 	ioc = pci_get_drvdata(pdev);
+	mptsas_fw_event_off(ioc);
 	ioc->DoneCtx = mptsasDoneCtx;
 	ioc->TaskCtx = mptsasTaskCtx;
 	ioc->InternalCtx = mptsasInternalCtx;
@@ -3211,17 +4785,15 @@
 	 * A slightly different algorithm is required for
 	 * 64bit SGEs.
 	 */
-	scale = ioc->req_sz/(sizeof(dma_addr_t) + sizeof(u32));
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
+	scale = ioc->req_sz/ioc->SGE_size;
+	if (ioc->sg_addr_size == sizeof(u64)) {
 		numSGE = (scale - 1) *
 		  (ioc->facts.MaxChainDepth-1) + scale +
-		  (ioc->req_sz - 60) / (sizeof(dma_addr_t) +
-		  sizeof(u32));
+		  (ioc->req_sz - 60) / ioc->SGE_size;
 	} else {
 		numSGE = 1 + (scale - 1) *
 		  (ioc->facts.MaxChainDepth-1) + scale +
-		  (ioc->req_sz - 64) / (sizeof(dma_addr_t) +
-		  sizeof(u32));
+		  (ioc->req_sz - 64) / ioc->SGE_size;
 	}
 
 	if (numSGE < sh->sg_tablesize) {
@@ -3251,9 +4823,6 @@
 
 	/* Clear the TM flags
 	 */
-	hd->tmPending = 0;
-	hd->tmState = TM_STATE_NONE;
-	hd->resetPending = 0;
 	hd->abortSCpnt = NULL;
 
 	/* Clear the pointer used to store
@@ -3273,10 +4842,11 @@
 
 	ioc->sas_data.ptClear = mpt_pt_clear;
 
-	init_waitqueue_head(&hd->scandv_waitq);
-	hd->scandv_wait_done = 0;
 	hd->last_queue_full = 0;
 	INIT_LIST_HEAD(&hd->target_reset_list);
+	INIT_LIST_HEAD(&ioc->sas_device_info_list);
+	mutex_init(&ioc->sas_device_info_mutex);
+
 	spin_unlock_irqrestore(&ioc->FreeQlock, flags);
 
 	if (ioc->sas_data.ptClear==1) {
@@ -3291,8 +4861,11 @@
 		goto out_mptsas_probe;
 	}
 
+	/* older firmware doesn't support expander events */
+	if ((ioc->facts.HeaderVersion >> 8) < 0xE)
+		ioc->old_sas_discovery_protocal = 1;
 	mptsas_scan_sas_topology(ioc);
-
+	mptsas_fw_event_on(ioc);
 	return 0;
 
  out_mptsas_probe:
@@ -3301,12 +4874,25 @@
 	return error;
 }
 
+void
+mptsas_shutdown(struct pci_dev *pdev)
+{
+	MPT_ADAPTER *ioc = pci_get_drvdata(pdev);
+
+	mptsas_fw_event_off(ioc);
+	mptsas_cleanup_fw_event_q(ioc);
+}
+
 static void __devexit mptsas_remove(struct pci_dev *pdev)
 {
 	MPT_ADAPTER *ioc = pci_get_drvdata(pdev);
 	struct mptsas_portinfo *p, *n;
 	int i;
 
+	mptsas_shutdown(pdev);
+
+	mptsas_del_device_components(ioc);
+
 	ioc->sas_discovery_ignore_events = 1;
 	sas_remove_host(ioc->sh);
 
@@ -3315,11 +4901,12 @@
 		list_del(&p->list);
 		for (i = 0 ; i < p->num_phys ; i++)
 			mptsas_port_delete(ioc, p->phy_info[i].port_details);
+
 		kfree(p->phy_info);
 		kfree(p);
 	}
 	mutex_unlock(&ioc->sas_topology_mutex);
-
+	ioc->hba_port_info = NULL;
 	mptscsih_remove(pdev);
 }
 
@@ -3344,7 +4931,7 @@
 	.id_table	= mptsas_pci_table,
 	.probe		= mptsas_probe,
 	.remove		= __devexit_p(mptsas_remove),
-	.shutdown	= mptscsih_shutdown,
+	.shutdown	= mptsas_shutdown,
 #ifdef CONFIG_PM
 	.suspend	= mptscsih_suspend,
 	.resume		= mptscsih_resume,
@@ -3364,10 +4951,12 @@
 		return -ENODEV;
 
 	mptsasDoneCtx = mpt_register(mptscsih_io_done, MPTSAS_DRIVER);
-	mptsasTaskCtx = mpt_register(mptsas_taskmgmt_complete, MPTSAS_DRIVER);
+	mptsasTaskCtx = mpt_register(mptscsih_taskmgmt_complete, MPTSAS_DRIVER);
 	mptsasInternalCtx =
 		mpt_register(mptscsih_scandv_complete, MPTSAS_DRIVER);
 	mptsasMgmtCtx = mpt_register(mptsas_mgmt_done, MPTSAS_DRIVER);
+	mptsasDeviceResetCtx =
+		mpt_register(mptsas_taskmgmt_complete, MPTSAS_DRIVER);
 
 	mpt_event_register(mptsasDoneCtx, mptsas_event_process);
 	mpt_reset_register(mptsasDoneCtx, mptsas_ioc_reset);
@@ -3392,6 +4981,7 @@
 	mpt_deregister(mptsasInternalCtx);
 	mpt_deregister(mptsasTaskCtx);
 	mpt_deregister(mptsasDoneCtx);
+	mpt_deregister(mptsasDeviceResetCtx);
 }
 
 module_init(mptsas_init);
diff --git a/drivers/message/fusion/mptsas.h b/drivers/message/fusion/mptsas.h
index 2b544e0..953c2bf 100644
--- a/drivers/message/fusion/mptsas.h
+++ b/drivers/message/fusion/mptsas.h
@@ -53,6 +53,7 @@
 	struct list_head 	list;
 	EVENT_DATA_SAS_DEVICE_STATUS_CHANGE sas_event_data;
 	u8	target_reset_issued;
+	unsigned long	 time_count;
 };
 
 enum mptsas_hotplug_action {
@@ -60,12 +61,37 @@
 	MPTSAS_DEL_DEVICE,
 	MPTSAS_ADD_RAID,
 	MPTSAS_DEL_RAID,
+	MPTSAS_ADD_PHYSDISK,
+	MPTSAS_ADD_PHYSDISK_REPROBE,
+	MPTSAS_DEL_PHYSDISK,
+	MPTSAS_DEL_PHYSDISK_REPROBE,
 	MPTSAS_ADD_INACTIVE_VOLUME,
 	MPTSAS_IGNORE_EVENT,
 };
 
+struct mptsas_mapping{
+	u8			id;
+	u8			channel;
+};
+
+struct mptsas_device_info {
+	struct list_head 	list;
+	struct mptsas_mapping	os;	/* operating system mapping*/
+	struct mptsas_mapping	fw;	/* firmware mapping */
+	u64			sas_address;
+	u32			device_info; /* specific bits for devices */
+	u16			slot;		/* enclosure slot id */
+	u64			enclosure_logical_id; /*enclosure address */
+	u8			is_logical_volume; /* is this logical volume */
+	/* this belongs to volume */
+	u8			is_hidden_raid_component;
+	/* this valid when is_hidden_raid_component set */
+	u8			volume_id;
+	/* cached data for a removed device */
+	u8			is_cached;
+};
+
 struct mptsas_hotplug_event {
-	struct work_struct	work;
 	MPT_ADAPTER		*ioc;
 	enum mptsas_hotplug_action event_type;
 	u64			sas_address;
@@ -73,11 +99,18 @@
 	u8			id;
 	u32			device_info;
 	u16			handle;
-	u16			parent_handle;
 	u8			phy_id;
-	u8			phys_disk_num_valid;	/* hrc (hidden raid component) */
 	u8			phys_disk_num;		/* hrc - unique index*/
-	u8			hidden_raid_component;	/* hrc - don't expose*/
+	struct scsi_device	*sdev;
+};
+
+struct fw_event_work {
+	struct list_head 	list;
+	struct delayed_work	 work;
+	MPT_ADAPTER	*ioc;
+	u32			event;
+	u8			retries;
+	u8			event_data[1];
 };
 
 struct mptsas_discovery_event {
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index e62c6bc..024e830 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -80,7 +80,7 @@
 /*
  *  Other private/forward protos...
  */
-static struct scsi_cmnd * mptscsih_get_scsi_lookup(MPT_ADAPTER *ioc, int i);
+struct scsi_cmnd	*mptscsih_get_scsi_lookup(MPT_ADAPTER *ioc, int i);
 static struct scsi_cmnd * mptscsih_getclear_scsi_lookup(MPT_ADAPTER *ioc, int i);
 static void	mptscsih_set_scsi_lookup(MPT_ADAPTER *ioc, int i, struct scsi_cmnd *scmd);
 static int	SCPNT_TO_LOOKUP_IDX(MPT_ADAPTER *ioc, struct scsi_cmnd *scmd);
@@ -92,18 +92,24 @@
 				 SCSIIORequest_t *pReq, int req_idx);
 static void	mptscsih_freeChainBuffers(MPT_ADAPTER *ioc, int req_idx);
 static void	mptscsih_copy_sense_data(struct scsi_cmnd *sc, MPT_SCSI_HOST *hd, MPT_FRAME_HDR *mf, SCSIIOReply_t *pScsiReply);
-static int	mptscsih_tm_pending_wait(MPT_SCSI_HOST * hd);
-static int	mptscsih_tm_wait_for_completion(MPT_SCSI_HOST * hd, ulong timeout );
 
-static int	mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun, int ctx2abort, ulong timeout);
+int	mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id,
+		int lun, int ctx2abort, ulong timeout);
 
 int		mptscsih_ioc_reset(MPT_ADAPTER *ioc, int post_reset);
 int		mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply);
 
+void
+mptscsih_taskmgmt_response_code(MPT_ADAPTER *ioc, u8 response_code);
+static int	mptscsih_get_completion_code(MPT_ADAPTER *ioc,
+		MPT_FRAME_HDR *req, MPT_FRAME_HDR *reply);
 int		mptscsih_scandv_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *r);
 static int	mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *iocmd);
 static void	mptscsih_synchronize_cache(MPT_SCSI_HOST *hd, VirtDevice *vdevice);
 
+static int
+mptscsih_taskmgmt_reply(MPT_ADAPTER *ioc, u8 type,
+				SCSITaskMgmtReply_t *pScsiTmReply);
 void 		mptscsih_remove(struct pci_dev *);
 void 		mptscsih_shutdown(struct pci_dev *);
 #ifdef CONFIG_PM
@@ -113,69 +119,6 @@
 
 #define SNS_LEN(scp)	SCSI_SENSE_BUFFERSIZE
 
-/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-/**
- *	mptscsih_add_sge - Place a simple SGE at address pAddr.
- *	@pAddr: virtual address for SGE
- *	@flagslength: SGE flags and data transfer length
- *	@dma_addr: Physical address
- *
- *	This routine places a MPT request frame back on the MPT adapter's
- *	FreeQ.
- */
-static inline void
-mptscsih_add_sge(char *pAddr, u32 flagslength, dma_addr_t dma_addr)
-{
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
-		SGESimple64_t *pSge = (SGESimple64_t *) pAddr;
-		u32 tmp = dma_addr & 0xFFFFFFFF;
-
-		pSge->FlagsLength = cpu_to_le32(flagslength);
-		pSge->Address.Low = cpu_to_le32(tmp);
-		tmp = (u32) ((u64)dma_addr >> 32);
-		pSge->Address.High = cpu_to_le32(tmp);
-
-	} else {
-		SGESimple32_t *pSge = (SGESimple32_t *) pAddr;
-		pSge->FlagsLength = cpu_to_le32(flagslength);
-		pSge->Address = cpu_to_le32(dma_addr);
-	}
-} /* mptscsih_add_sge() */
-
-/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-/**
- *	mptscsih_add_chain - Place a chain SGE at address pAddr.
- *	@pAddr: virtual address for SGE
- *	@next: nextChainOffset value (u32's)
- *	@length: length of next SGL segment
- *	@dma_addr: Physical address
- *
- *	This routine places a MPT request frame back on the MPT adapter's
- *	FreeQ.
- */
-static inline void
-mptscsih_add_chain(char *pAddr, u8 next, u16 length, dma_addr_t dma_addr)
-{
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
-		SGEChain64_t *pChain = (SGEChain64_t *) pAddr;
-		u32 tmp = dma_addr & 0xFFFFFFFF;
-
-		pChain->Length = cpu_to_le16(length);
-		pChain->Flags = MPI_SGE_FLAGS_CHAIN_ELEMENT | mpt_addr_size();
-
-		pChain->NextChainOffset = next;
-
-		pChain->Address.Low = cpu_to_le32(tmp);
-		tmp = (u32) ((u64)dma_addr >> 32);
-		pChain->Address.High = cpu_to_le32(tmp);
-	} else {
-		SGEChain32_t *pChain = (SGEChain32_t *) pAddr;
-		pChain->Length = cpu_to_le16(length);
-		pChain->Flags = MPI_SGE_FLAGS_CHAIN_ELEMENT | mpt_addr_size();
-		pChain->NextChainOffset = next;
-		pChain->Address = cpu_to_le32(dma_addr);
-	}
-} /* mptscsih_add_chain() */
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /*
@@ -281,10 +224,10 @@
 	 */
 
 nextSGEset:
-	numSgeSlots = ((frm_sz - sgeOffset) / (sizeof(u32) + sizeof(dma_addr_t)) );
+	numSgeSlots = ((frm_sz - sgeOffset) / ioc->SGE_size);
 	numSgeThisFrame = (sges_left < numSgeSlots) ? sges_left : numSgeSlots;
 
-	sgflags = MPT_SGE_FLAGS_SIMPLE_ELEMENT | MPT_SGE_FLAGS_ADDRESSING | sgdir;
+	sgflags = MPT_SGE_FLAGS_SIMPLE_ELEMENT | sgdir;
 
 	/* Get first (num - 1) SG elements
 	 * Skip any SG entries with a length of 0
@@ -293,17 +236,19 @@
 	for (ii=0; ii < (numSgeThisFrame-1); ii++) {
 		thisxfer = sg_dma_len(sg);
 		if (thisxfer == 0) {
-			sg = sg_next(sg); /* Get next SG element from the OS */
+			/* Get next SG element from the OS */
+			sg = sg_next(sg);
 			sg_done++;
 			continue;
 		}
 
 		v2 = sg_dma_address(sg);
-		mptscsih_add_sge(psge, sgflags | thisxfer, v2);
+		ioc->add_sge(psge, sgflags | thisxfer, v2);
 
-		sg = sg_next(sg);	/* Get next SG element from the OS */
-		psge += (sizeof(u32) + sizeof(dma_addr_t));
-		sgeOffset += (sizeof(u32) + sizeof(dma_addr_t));
+		/* Get next SG element from the OS */
+		sg = sg_next(sg);
+		psge += ioc->SGE_size;
+		sgeOffset += ioc->SGE_size;
 		sg_done++;
 	}
 
@@ -320,12 +265,8 @@
 		thisxfer = sg_dma_len(sg);
 
 		v2 = sg_dma_address(sg);
-		mptscsih_add_sge(psge, sgflags | thisxfer, v2);
-		/*
-		sg = sg_next(sg);
-		psge += (sizeof(u32) + sizeof(dma_addr_t));
-		*/
-		sgeOffset += (sizeof(u32) + sizeof(dma_addr_t));
+		ioc->add_sge(psge, sgflags | thisxfer, v2);
+		sgeOffset += ioc->SGE_size;
 		sg_done++;
 
 		if (chainSge) {
@@ -334,7 +275,8 @@
 			 * Update the chain element
 			 * Offset and Length fields.
 			 */
-			mptscsih_add_chain((char *)chainSge, 0, sgeOffset, ioc->ChainBufferDMA + chain_dma_off);
+			ioc->add_chain((char *)chainSge, 0, sgeOffset,
+				ioc->ChainBufferDMA + chain_dma_off);
 		} else {
 			/* The current buffer is the original MF
 			 * and there is no Chain buffer.
@@ -367,7 +309,7 @@
 		 * set properly).
 		 */
 		if (sg_done) {
-			u32 *ptmp = (u32 *) (psge - (sizeof(u32) + sizeof(dma_addr_t)));
+			u32 *ptmp = (u32 *) (psge - ioc->SGE_size);
 			sgflags = le32_to_cpu(*ptmp);
 			sgflags |= MPT_SGE_FLAGS_LAST_ELEMENT;
 			*ptmp = cpu_to_le32(sgflags);
@@ -381,8 +323,9 @@
 			 * Old chain element is now complete.
 			 */
 			u8 nextChain = (u8) (sgeOffset >> 2);
-			sgeOffset += (sizeof(u32) + sizeof(dma_addr_t));
-			mptscsih_add_chain((char *)chainSge, nextChain, sgeOffset, ioc->ChainBufferDMA + chain_dma_off);
+			sgeOffset += ioc->SGE_size;
+			ioc->add_chain((char *)chainSge, nextChain, sgeOffset,
+					 ioc->ChainBufferDMA + chain_dma_off);
 		} else {
 			/* The original MF buffer requires a chain buffer -
 			 * set the offset.
@@ -592,14 +535,15 @@
 	}
 
 	scsi_print_command(sc);
-	printk(MYIOC_s_DEBUG_FMT "\tfw_channel = %d, fw_id = %d\n",
-	    ioc->name, pScsiReply->Bus, pScsiReply->TargetID);
+	printk(MYIOC_s_DEBUG_FMT "\tfw_channel = %d, fw_id = %d, lun = %d\n",
+	    ioc->name, pScsiReply->Bus, pScsiReply->TargetID, sc->device->lun);
 	printk(MYIOC_s_DEBUG_FMT "\trequest_len = %d, underflow = %d, "
 	    "resid = %d\n", ioc->name, scsi_bufflen(sc), sc->underflow,
 	    scsi_get_resid(sc));
 	printk(MYIOC_s_DEBUG_FMT "\ttag = %d, transfer_count = %d, "
 	    "sc->result = %08X\n", ioc->name, le16_to_cpu(pScsiReply->TaskTag),
 	    le32_to_cpu(pScsiReply->TransferCount), sc->result);
+
 	printk(MYIOC_s_DEBUG_FMT "\tiocstatus = %s (0x%04x), "
 	    "scsi_status = %s (0x%02x), scsi_state = (0x%02x)\n",
 	    ioc->name, desc, ioc_status, desc1, pScsiReply->SCSIStatus,
@@ -654,16 +598,14 @@
 	req_idx = le16_to_cpu(mf->u.frame.hwhdr.msgctxu.fld.req_idx);
 	req_idx_MR = (mr != NULL) ?
 	    le16_to_cpu(mr->u.frame.hwhdr.msgctxu.fld.req_idx) : req_idx;
+
+	/* Special case, where already freed message frame is received from
+	 * Firmware. It happens with Resetting IOC.
+	 * Return immediately. Do not care
+	 */
 	if ((req_idx != req_idx_MR) ||
-	    (mf->u.frame.linkage.arg1 == 0xdeadbeaf)) {
-		printk(MYIOC_s_ERR_FMT "Received a mf that was already freed\n",
-		    ioc->name);
-		printk (MYIOC_s_ERR_FMT
-		    "req_idx=%x req_idx_MR=%x mf=%p mr=%p sc=%p\n",
-		    ioc->name, req_idx, req_idx_MR, mf, mr,
-		    mptscsih_get_scsi_lookup(ioc, req_idx_MR));
+	    (le32_to_cpu(mf->u.frame.linkage.arg1) == 0xdeadbeaf))
 		return 0;
-	}
 
 	sc = mptscsih_getclear_scsi_lookup(ioc, req_idx);
 	if (sc == NULL) {
@@ -810,12 +752,16 @@
 			 */
 
 		case MPI_IOCSTATUS_SCSI_TASK_TERMINATED:	/* 0x0048 */
-		case MPI_IOCSTATUS_SCSI_EXT_TERMINATED:		/* 0x004C */
 			/* Linux handles an unsolicited DID_RESET better
 			 * than an unsolicited DID_ABORT.
 			 */
 			sc->result = DID_RESET << 16;
 
+		case MPI_IOCSTATUS_SCSI_EXT_TERMINATED:		/* 0x004C */
+			if (ioc->bus_type == FC)
+				sc->result = DID_ERROR << 16;
+			else
+				sc->result = DID_RESET << 16;
 			break;
 
 		case MPI_IOCSTATUS_SCSI_RESIDUAL_MISMATCH:	/* 0x0049 */
@@ -992,9 +938,9 @@
 		scsi_dma_unmap(sc);
 		sc->result = DID_RESET << 16;
 		sc->host_scribble = NULL;
-		sdev_printk(KERN_INFO, sc->device, MYIOC_s_FMT
-		    "completing cmds: fw_channel %d, fw_id %d, sc=%p,"
-		    " mf = %p, idx=%x\n", ioc->name, channel, id, sc, mf, ii);
+		dtmprintk(ioc, sdev_printk(KERN_INFO, sc->device, MYIOC_s_FMT
+		    "completing cmds: fw_channel %d, fw_id %d, sc=%p, mf = %p, "
+		    "idx=%x\n", ioc->name, channel, id, sc, mf, ii));
 		sc->scsi_done(sc);
 	}
 }
@@ -1053,9 +999,11 @@
 			scsi_dma_unmap(sc);
 			sc->host_scribble = NULL;
 			sc->result = DID_NO_CONNECT << 16;
-			sdev_printk(KERN_INFO, sc->device, MYIOC_s_FMT "completing cmds: fw_channel %d,"
-			   "fw_id %d, sc=%p, mf = %p, idx=%x\n", ioc->name, vdevice->vtarget->channel,
-			   vdevice->vtarget->id, sc, mf, ii);
+			dtmprintk(ioc, sdev_printk(KERN_INFO, sc->device,
+			   MYIOC_s_FMT "completing cmds: fw_channel %d, "
+			   "fw_id %d, sc=%p, mf = %p, idx=%x\n", ioc->name,
+			   vdevice->vtarget->channel, vdevice->vtarget->id,
+			   sc, mf, ii));
 			sc->scsi_done(sc);
 			spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
 		}
@@ -1346,7 +1294,6 @@
 	MPT_FRAME_HDR		*mf;
 	SCSIIORequest_t		*pScsiReq;
 	VirtDevice		*vdevice = SCpnt->device->hostdata;
-	int	 lun;
 	u32	 datalen;
 	u32	 scsictl;
 	u32	 scsidir;
@@ -1357,13 +1304,12 @@
 
 	hd = shost_priv(SCpnt->device->host);
 	ioc = hd->ioc;
-	lun = SCpnt->device->lun;
 	SCpnt->scsi_done = done;
 
 	dmfprintk(ioc, printk(MYIOC_s_DEBUG_FMT "qcmd: SCpnt=%p, done()=%p\n",
 		ioc->name, SCpnt, done));
 
-	if (hd->resetPending) {
+	if (ioc->taskmgmt_quiesce_io) {
 		dtmprintk(ioc, printk(MYIOC_s_WARN_FMT "qcmd: SCpnt=%p timeout + 60HZ\n",
 			ioc->name, SCpnt));
 		return SCSI_MLQUEUE_HOST_BUSY;
@@ -1422,7 +1368,7 @@
 	pScsiReq->CDBLength = SCpnt->cmd_len;
 	pScsiReq->SenseBufferLength = MPT_SENSE_BUFFER_SIZE;
 	pScsiReq->Reserved = 0;
-	pScsiReq->MsgFlags = mpt_msg_flags();
+	pScsiReq->MsgFlags = mpt_msg_flags(ioc);
 	int_to_scsilun(SCpnt->device->lun, (struct scsi_lun *)pScsiReq->LUN);
 	pScsiReq->Control = cpu_to_le32(scsictl);
 
@@ -1448,7 +1394,8 @@
 	 */
 	if (datalen == 0) {
 		/* Add a NULL SGE */
-		mptscsih_add_sge((char *)&pScsiReq->SGL, MPT_SGE_FLAGS_SSIMPLE_READ | 0,
+		ioc->add_sge((char *)&pScsiReq->SGL,
+			MPT_SGE_FLAGS_SSIMPLE_READ | 0,
 			(dma_addr_t) -1);
 	} else {
 		/* Add a 32 or 64 bit SGE */
@@ -1528,120 +1475,6 @@
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
- *	mptscsih_TMHandler - Generic handler for SCSI Task Management.
- *	@hd: Pointer to MPT SCSI HOST structure
- *	@type: Task Management type
- *	@channel: channel number for task management
- *	@id: Logical Target ID for reset (if appropriate)
- *	@lun: Logical Unit for reset (if appropriate)
- *	@ctx2abort: Context for the task to be aborted (if appropriate)
- *	@timeout: timeout for task management control
- *
- *	Fall through to mpt_HardResetHandler if: not operational, too many
- *	failed TM requests or handshake failure.
- *
- *	Remark: Currently invoked from a non-interrupt thread (_bh).
- *
- *	Note: With old EH code, at most 1 SCSI TaskMgmt function per IOC
- *	will be active.
- *
- *	Returns 0 for SUCCESS, or %FAILED.
- **/
-int
-mptscsih_TMHandler(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun, int ctx2abort, ulong timeout)
-{
-	MPT_ADAPTER	*ioc;
-	int		 rc = -1;
-	u32		 ioc_raw_state;
-	unsigned long	 flags;
-
-	ioc = hd->ioc;
-	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TMHandler Entered!\n", ioc->name));
-
-	// SJR - CHECKME - Can we avoid this here?
-	// (mpt_HardResetHandler has this check...)
-	spin_lock_irqsave(&ioc->diagLock, flags);
-	if ((ioc->diagPending) || (ioc->alt_ioc && ioc->alt_ioc->diagPending)) {
-		spin_unlock_irqrestore(&ioc->diagLock, flags);
-		return FAILED;
-	}
-	spin_unlock_irqrestore(&ioc->diagLock, flags);
-
-	/*  Wait a fixed amount of time for the TM pending flag to be cleared.
-	 *  If we time out and not bus reset, then we return a FAILED status
-	 *  to the caller.
-	 *  The call to mptscsih_tm_pending_wait() will set the pending flag
-	 *  if we are
-	 *  successful. Otherwise, reload the FW.
-	 */
-	if (mptscsih_tm_pending_wait(hd) == FAILED) {
-		if (type == MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK) {
-			dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TMHandler abort: "
-			   "Timed out waiting for last TM (%d) to complete! \n",
-			   ioc->name, hd->tmPending));
-			return FAILED;
-		} else if (type == MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET) {
-			dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TMHandler target "
-				"reset: Timed out waiting for last TM (%d) "
-				"to complete! \n", ioc->name,
-				hd->tmPending));
-			return FAILED;
-		} else if (type == MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS) {
-			dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TMHandler bus reset: "
-			   "Timed out waiting for last TM (%d) to complete! \n",
-			  ioc->name, hd->tmPending));
-			return FAILED;
-		}
-	} else {
-		spin_lock_irqsave(&ioc->FreeQlock, flags);
-		hd->tmPending |=  (1 << type);
-		spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-	}
-
-	ioc_raw_state = mpt_GetIocState(ioc, 0);
-
-	if ((ioc_raw_state & MPI_IOC_STATE_MASK) != MPI_IOC_STATE_OPERATIONAL) {
-		printk(MYIOC_s_WARN_FMT
-			"TM Handler for type=%x: IOC Not operational (0x%x)!\n",
-			ioc->name, type, ioc_raw_state);
-		printk(MYIOC_s_WARN_FMT " Issuing HardReset!!\n", ioc->name);
-		if (mpt_HardResetHandler(ioc, CAN_SLEEP) < 0)
-			printk(MYIOC_s_WARN_FMT "TMHandler: HardReset "
-			    "FAILED!!\n", ioc->name);
-		return FAILED;
-	}
-
-	if (ioc_raw_state & MPI_DOORBELL_ACTIVE) {
-		printk(MYIOC_s_WARN_FMT
-			"TM Handler for type=%x: ioc_state: "
-			"DOORBELL_ACTIVE (0x%x)!\n",
-			ioc->name, type, ioc_raw_state);
-		return FAILED;
-	}
-
-	/* Isse the Task Mgmt request.
-	 */
-	if (hd->hard_resets < -1)
-		hd->hard_resets++;
-
-	rc = mptscsih_IssueTaskMgmt(hd, type, channel, id, lun,
-	    ctx2abort, timeout);
-	if (rc)
-		printk(MYIOC_s_INFO_FMT "Issue of TaskMgmt failed!\n",
-		       ioc->name);
-	else
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Issue of TaskMgmt Successful!\n",
-			   ioc->name));
-
-	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-			"TMHandler rc = %d!\n", ioc->name, rc));
-
-	return rc;
-}
-
-
-/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-/**
  *	mptscsih_IssueTaskMgmt - Generic send Task Management function.
  *	@hd: Pointer to MPT_SCSI_HOST structure
  *	@type: Task Management type
@@ -1659,23 +1492,60 @@
  *	Returns 0 for SUCCESS, or FAILED.
  *
  **/
-static int
-mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun, int ctx2abort, ulong timeout)
+int
+mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun,
+	int ctx2abort, ulong timeout)
 {
 	MPT_FRAME_HDR	*mf;
 	SCSITaskMgmt_t	*pScsiTm;
 	int		 ii;
 	int		 retval;
 	MPT_ADAPTER 	*ioc = hd->ioc;
+	unsigned long	 timeleft;
+	u8		 issue_hard_reset;
+	u32		 ioc_raw_state;
+	unsigned long	 time_count;
+
+	issue_hard_reset = 0;
+	ioc_raw_state = mpt_GetIocState(ioc, 0);
+
+	if ((ioc_raw_state & MPI_IOC_STATE_MASK) != MPI_IOC_STATE_OPERATIONAL) {
+		printk(MYIOC_s_WARN_FMT
+			"TaskMgmt type=%x: IOC Not operational (0x%x)!\n",
+			ioc->name, type, ioc_raw_state);
+		printk(MYIOC_s_WARN_FMT "Issuing HardReset from %s!!\n",
+		    ioc->name, __func__);
+		if (mpt_HardResetHandler(ioc, CAN_SLEEP) < 0)
+			printk(MYIOC_s_WARN_FMT "TaskMgmt HardReset "
+			    "FAILED!!\n", ioc->name);
+		return 0;
+	}
+
+	if (ioc_raw_state & MPI_DOORBELL_ACTIVE) {
+		printk(MYIOC_s_WARN_FMT
+			"TaskMgmt type=%x: ioc_state: "
+			"DOORBELL_ACTIVE (0x%x)!\n",
+			ioc->name, type, ioc_raw_state);
+		return FAILED;
+	}
+
+	mutex_lock(&ioc->taskmgmt_cmds.mutex);
+	if (mpt_set_taskmgmt_in_progress_flag(ioc) != 0) {
+		mf = NULL;
+		retval = FAILED;
+		goto out;
+	}
 
 	/* Return Fail to calling function if no message frames available.
 	 */
 	if ((mf = mpt_get_msg_frame(ioc->TaskCtx, ioc)) == NULL) {
-		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT "IssueTaskMgmt, no msg frames!!\n",
-				ioc->name));
-		return FAILED;
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"TaskMgmt no msg frames!!\n", ioc->name));
+		retval = FAILED;
+		mpt_clear_taskmgmt_in_progress_flag(ioc);
+		goto out;
 	}
-	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "IssueTaskMgmt request @ %p\n",
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt request (mf=%p)\n",
 			ioc->name, mf));
 
 	/* Format the Request
@@ -1699,11 +1569,14 @@
 
 	pScsiTm->TaskMsgContext = ctx2abort;
 
-	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "IssueTaskMgmt: ctx2abort (0x%08x) "
-		"type=%d\n", ioc->name, ctx2abort, type));
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt: ctx2abort (0x%08x) "
+		"task_type = 0x%02X, timeout = %ld\n", ioc->name, ctx2abort,
+		type, timeout));
 
 	DBG_DUMP_TM_REQUEST_FRAME(ioc, (u32 *)pScsiTm);
 
+	INITIALIZE_MGMT_STATUS(ioc->taskmgmt_cmds.status)
+	time_count = jiffies;
 	if ((ioc->facts.IOCCapabilities & MPI_IOCFACTS_CAPABILITY_HIGH_PRI_Q) &&
 	    (ioc->facts.MsgVersion >= MPI_VERSION_01_05))
 		mpt_put_msg_frame_hi_pri(ioc->TaskCtx, ioc, mf);
@@ -1711,47 +1584,50 @@
 		retval = mpt_send_handshake_request(ioc->TaskCtx, ioc,
 			sizeof(SCSITaskMgmt_t), (u32*)pScsiTm, CAN_SLEEP);
 		if (retval) {
-			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT "send_handshake FAILED!"
-			" (hd %p, ioc %p, mf %p, rc=%d) \n", ioc->name, hd,
-			ioc, mf, retval));
-			goto fail_out;
+			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+				"TaskMgmt handshake FAILED!(mf=%p, rc=%d) \n",
+				ioc->name, mf, retval));
+			mpt_free_msg_frame(ioc, mf);
+			mpt_clear_taskmgmt_in_progress_flag(ioc);
+			goto out;
 		}
 	}
 
-	if(mptscsih_tm_wait_for_completion(hd, timeout) == FAILED) {
-		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT "task management request TIMED OUT!"
-			" (hd %p, ioc %p, mf %p) \n", ioc->name, hd,
-			ioc, mf));
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Calling HardReset! \n",
-			 ioc->name));
-		retval = mpt_HardResetHandler(ioc, CAN_SLEEP);
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "rc=%d \n",
-			 ioc->name, retval));
-		goto fail_out;
+	timeleft = wait_for_completion_timeout(&ioc->taskmgmt_cmds.done,
+		timeout*HZ);
+	if (!(ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		retval = FAILED;
+		dtmprintk(ioc, printk(MYIOC_s_ERR_FMT
+		    "TaskMgmt TIMED OUT!(mf=%p)\n", ioc->name, mf));
+		mpt_clear_taskmgmt_in_progress_flag(ioc);
+		if (ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET)
+			goto out;
+		issue_hard_reset = 1;
+		goto out;
 	}
 
-	/*
-	 * Handle success case, see if theres a non-zero ioc_status.
-	 */
-	if (hd->tm_iocstatus == MPI_IOCSTATUS_SUCCESS ||
-	   hd->tm_iocstatus == MPI_IOCSTATUS_SCSI_TASK_TERMINATED ||
-	   hd->tm_iocstatus == MPI_IOCSTATUS_SCSI_IOC_TERMINATED)
-		retval = 0;
-	else
-		retval = FAILED;
+	retval = mptscsih_taskmgmt_reply(ioc, type,
+	    (SCSITaskMgmtReply_t *) ioc->taskmgmt_cmds.reply);
 
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "TaskMgmt completed (%d seconds)\n",
+	    ioc->name, jiffies_to_msecs(jiffies - time_count)/1000));
+
+ out:
+
+	CLEAR_MGMT_STATUS(ioc->taskmgmt_cmds.status)
+	if (issue_hard_reset) {
+		printk(MYIOC_s_WARN_FMT "Issuing Reset from %s!!\n",
+			ioc->name, __func__);
+		retval = mpt_HardResetHandler(ioc, CAN_SLEEP);
+		mpt_free_msg_frame(ioc, mf);
+	}
+
+	retval = (retval == 0) ? 0 : FAILED;
+	mutex_unlock(&ioc->taskmgmt_cmds.mutex);
 	return retval;
-
- fail_out:
-
-	/*
-	 * Free task management mf, and corresponding tm flags
-	 */
-	mpt_free_msg_frame(ioc, mf);
-	hd->tmPending = 0;
-	hd->tmState = TM_STATE_NONE;
-	return FAILED;
 }
+EXPORT_SYMBOL(mptscsih_IssueTaskMgmt);
 
 static int
 mptscsih_get_tm_timeout(MPT_ADAPTER *ioc)
@@ -1838,13 +1714,8 @@
 		goto out;
 	}
 
-	if (hd->resetPending) {
-		retval = FAILED;
-		goto out;
-	}
-
-	if (hd->timeouts < -1)
-		hd->timeouts++;
+	if (ioc->timeouts < -1)
+		ioc->timeouts++;
 
 	if (mpt_fwfault_debug)
 		mpt_halt_firmware(ioc);
@@ -1861,22 +1732,30 @@
 
 	hd->abortSCpnt = SCpnt;
 
-	retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK,
-	    vdevice->vtarget->channel, vdevice->vtarget->id, vdevice->lun,
-	    ctx2abort, mptscsih_get_tm_timeout(ioc));
+	retval = mptscsih_IssueTaskMgmt(hd,
+			 MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK,
+			 vdevice->vtarget->channel,
+			 vdevice->vtarget->id, vdevice->lun,
+			 ctx2abort, mptscsih_get_tm_timeout(ioc));
 
 	if (SCPNT_TO_LOOKUP_IDX(ioc, SCpnt) == scpnt_idx &&
-	    SCpnt->serial_number == sn)
+	    SCpnt->serial_number == sn) {
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "task abort: command still in active list! (sc=%p)\n",
+		    ioc->name, SCpnt));
 		retval = FAILED;
+	} else {
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "task abort: command cleared from active list! (sc=%p)\n",
+		    ioc->name, SCpnt));
+		retval = SUCCESS;
+	}
 
  out:
 	printk(MYIOC_s_INFO_FMT "task abort: %s (sc=%p)\n",
-	    ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
+	    ioc->name, ((retval == SUCCESS) ? "SUCCESS" : "FAILED"), SCpnt);
 
-	if (retval == 0)
-		return SUCCESS;
-	else
-		return FAILED;
+	return retval;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -1909,14 +1788,9 @@
 	       ioc->name, SCpnt);
 	scsi_print_command(SCpnt);
 
-	if (hd->resetPending) {
-		retval = FAILED;
-		goto out;
-	}
-
 	vdevice = SCpnt->device->hostdata;
 	if (!vdevice || !vdevice->vtarget) {
-		retval = 0;
+		retval = SUCCESS;
 		goto out;
 	}
 
@@ -1927,9 +1801,11 @@
 		goto out;
 	}
 
-	retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET,
-	    vdevice->vtarget->channel, vdevice->vtarget->id, 0, 0,
-	    mptscsih_get_tm_timeout(ioc));
+	retval = mptscsih_IssueTaskMgmt(hd,
+				MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET,
+				vdevice->vtarget->channel,
+				vdevice->vtarget->id, 0, 0,
+				mptscsih_get_tm_timeout(ioc));
 
  out:
 	printk (MYIOC_s_INFO_FMT "target reset: %s (sc=%p)\n",
@@ -1972,12 +1848,16 @@
 	       ioc->name, SCpnt);
 	scsi_print_command(SCpnt);
 
-	if (hd->timeouts < -1)
-		hd->timeouts++;
+	if (ioc->timeouts < -1)
+		ioc->timeouts++;
 
 	vdevice = SCpnt->device->hostdata;
-	retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS,
-	    vdevice->vtarget->channel, 0, 0, 0, mptscsih_get_tm_timeout(ioc));
+	if (!vdevice || !vdevice->vtarget)
+		return SUCCESS;
+	retval = mptscsih_IssueTaskMgmt(hd,
+					MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS,
+					vdevice->vtarget->channel, 0, 0, 0,
+					mptscsih_get_tm_timeout(ioc));
 
 	printk(MYIOC_s_INFO_FMT "bus reset: %s (sc=%p)\n",
 	    ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
@@ -2001,8 +1881,9 @@
 mptscsih_host_reset(struct scsi_cmnd *SCpnt)
 {
 	MPT_SCSI_HOST *  hd;
-	int              retval;
+	int              status = SUCCESS;
 	MPT_ADAPTER	*ioc;
+	int		retval;
 
 	/*  If we can't locate the host to reset, then we failed. */
 	if ((hd = shost_priv(SCpnt->device->host)) == NULL){
@@ -2021,86 +1902,71 @@
 	/*  If our attempts to reset the host failed, then return a failed
 	 *  status.  The host will be taken off line by the SCSI mid-layer.
 	 */
-	if (mpt_HardResetHandler(ioc, CAN_SLEEP) < 0) {
-		retval = FAILED;
-	} else {
-		/*  Make sure TM pending is cleared and TM state is set to
-		 *  NONE.
-		 */
-		retval = 0;
-		hd->tmPending = 0;
-		hd->tmState = TM_STATE_NONE;
-	}
+    retval = mpt_HardResetHandler(ioc, CAN_SLEEP);
+	if (retval < 0)
+		status = FAILED;
+	else
+		status = SUCCESS;
 
 	printk(MYIOC_s_INFO_FMT "host reset: %s (sc=%p)\n",
 	    ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
 
+	return status;
+}
+
+static int
+mptscsih_taskmgmt_reply(MPT_ADAPTER *ioc, u8 type,
+	SCSITaskMgmtReply_t *pScsiTmReply)
+{
+	u16			 iocstatus;
+	u32			 termination_count;
+	int			 retval;
+
+	if (!(ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_RF_VALID)) {
+		retval = FAILED;
+		goto out;
+	}
+
+	DBG_DUMP_TM_REPLY_FRAME(ioc, (u32 *)pScsiTmReply);
+
+	iocstatus = le16_to_cpu(pScsiTmReply->IOCStatus) & MPI_IOCSTATUS_MASK;
+	termination_count = le32_to_cpu(pScsiTmReply->TerminationCount);
+
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "TaskMgmt fw_channel = %d, fw_id = %d, task_type = 0x%02X,\n"
+	    "\tiocstatus = 0x%04X, loginfo = 0x%08X, response_code = 0x%02X,\n"
+	    "\tterm_cmnds = %d\n", ioc->name, pScsiTmReply->Bus,
+	    pScsiTmReply->TargetID, type, le16_to_cpu(pScsiTmReply->IOCStatus),
+	    le32_to_cpu(pScsiTmReply->IOCLogInfo), pScsiTmReply->ResponseCode,
+	    termination_count));
+
+	if (ioc->facts.MsgVersion >= MPI_VERSION_01_05 &&
+	    pScsiTmReply->ResponseCode)
+		mptscsih_taskmgmt_response_code(ioc,
+		    pScsiTmReply->ResponseCode);
+
+	if (iocstatus == MPI_IOCSTATUS_SUCCESS) {
+		retval = 0;
+		goto out;
+	}
+
+	retval = FAILED;
+	if (type == MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK) {
+		if (termination_count == 1)
+			retval = 0;
+		goto out;
+	}
+
+	if (iocstatus == MPI_IOCSTATUS_SCSI_TASK_TERMINATED ||
+	   iocstatus == MPI_IOCSTATUS_SCSI_IOC_TERMINATED)
+		retval = 0;
+
+ out:
 	return retval;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-/**
- *	mptscsih_tm_pending_wait - wait for pending task management request to complete
- *	@hd: Pointer to MPT host structure.
- *
- *	Returns {SUCCESS,FAILED}.
- */
-static int
-mptscsih_tm_pending_wait(MPT_SCSI_HOST * hd)
-{
-	unsigned long  flags;
-	int            loop_count = 4 * 10;  /* Wait 10 seconds */
-	int            status = FAILED;
-	MPT_ADAPTER	*ioc = hd->ioc;
-
-	do {
-		spin_lock_irqsave(&ioc->FreeQlock, flags);
-		if (hd->tmState == TM_STATE_NONE) {
-			hd->tmState = TM_STATE_IN_PROGRESS;
-			hd->tmPending = 1;
-			spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-			status = SUCCESS;
-			break;
-		}
-		spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-		msleep(250);
-	} while (--loop_count);
-
-	return status;
-}
-
-/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-/**
- *	mptscsih_tm_wait_for_completion - wait for completion of TM task
- *	@hd: Pointer to MPT host structure.
- *	@timeout: timeout value
- *
- *	Returns {SUCCESS,FAILED}.
- */
-static int
-mptscsih_tm_wait_for_completion(MPT_SCSI_HOST * hd, ulong timeout )
-{
-	unsigned long  flags;
-	int            loop_count = 4 * timeout;
-	int            status = FAILED;
-	MPT_ADAPTER	*ioc = hd->ioc;
-
-	do {
-		spin_lock_irqsave(&ioc->FreeQlock, flags);
-		if(hd->tmPending == 0) {
-			status = SUCCESS;
-			spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-			break;
-		}
-		spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-		msleep(250);
-	} while (--loop_count);
-
-	return status;
-}
-
-/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-static void
+void
 mptscsih_taskmgmt_response_code(MPT_ADAPTER *ioc, u8 response_code)
 {
 	char *desc;
@@ -2134,6 +2000,7 @@
 	printk(MYIOC_s_INFO_FMT "Response Code(0x%08x): F/W: %s\n",
 		ioc->name, response_code, desc);
 }
+EXPORT_SYMBOL(mptscsih_taskmgmt_response_code);
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
@@ -2150,97 +2017,28 @@
  *	Returns 1 indicating alloc'd request frame ptr should be freed.
  **/
 int
-mptscsih_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
+mptscsih_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf,
+	MPT_FRAME_HDR *mr)
 {
-	SCSITaskMgmtReply_t	*pScsiTmReply;
-	SCSITaskMgmt_t		*pScsiTmReq;
-	MPT_SCSI_HOST		*hd;
-	unsigned long		 flags;
-	u16			 iocstatus;
-	u8			 tmType;
-	u32			 termination_count;
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		"TaskMgmt completed (mf=%p, mr=%p)\n", ioc->name, mf, mr));
 
-	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt completed (mf=%p,mr=%p)\n",
-	    ioc->name, mf, mr));
-	if (!ioc->sh) {
-		dtmprintk(ioc, printk(MYIOC_s_WARN_FMT
-		    "TaskMgmt Complete: NULL Scsi Host Ptr\n", ioc->name));
-		return 1;
-	}
+	ioc->taskmgmt_cmds.status |= MPT_MGMT_STATUS_COMMAND_GOOD;
 
-	if (mr == NULL) {
-		dtmprintk(ioc, printk(MYIOC_s_WARN_FMT
-		    "ERROR! TaskMgmt Reply: NULL Request %p\n", ioc->name, mf));
-		return 1;
-	}
-
-	hd = shost_priv(ioc->sh);
-	pScsiTmReply = (SCSITaskMgmtReply_t*)mr;
-	pScsiTmReq = (SCSITaskMgmt_t*)mf;
-	tmType = pScsiTmReq->TaskType;
-	iocstatus = le16_to_cpu(pScsiTmReply->IOCStatus) & MPI_IOCSTATUS_MASK;
-	termination_count = le32_to_cpu(pScsiTmReply->TerminationCount);
-
-	if (ioc->facts.MsgVersion >= MPI_VERSION_01_05 &&
-	    pScsiTmReply->ResponseCode)
-		mptscsih_taskmgmt_response_code(ioc,
-		    pScsiTmReply->ResponseCode);
-	DBG_DUMP_TM_REPLY_FRAME(ioc, (u32 *)pScsiTmReply);
-
-#ifdef CONFIG_FUSION_LOGGING
-	if ((ioc->debug_level & MPT_DEBUG_REPLY) ||
-				(ioc->debug_level & MPT_DEBUG_TM ))
-		printk("%s: ha=%d [%d:%d:0] task_type=0x%02X "
-			"iocstatus=0x%04X\n\tloginfo=0x%08X response_code=0x%02X "
-			"term_cmnds=%d\n", __func__, ioc->id, pScsiTmReply->Bus,
-			 pScsiTmReply->TargetID, pScsiTmReq->TaskType,
-			le16_to_cpu(pScsiTmReply->IOCStatus),
-			le32_to_cpu(pScsiTmReply->IOCLogInfo),pScsiTmReply->ResponseCode,
-			le32_to_cpu(pScsiTmReply->TerminationCount));
-#endif
-	if (!iocstatus) {
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT " TaskMgmt SUCCESS\n", ioc->name));
-			hd->abortSCpnt = NULL;
+	if (!mr)
 		goto out;
-	}
 
-	/* Error?  (anything non-zero?) */
-
-	/* clear flags and continue.
-	 */
-	switch (tmType) {
-
-	case MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK:
-		if (termination_count == 1)
-			iocstatus = MPI_IOCSTATUS_SCSI_TASK_TERMINATED;
-		hd->abortSCpnt = NULL;
-		break;
-
-	case MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS:
-
-		/* If an internal command is present
-		 * or the TM failed - reload the FW.
-		 * FC FW may respond FAILED to an ABORT
-		 */
-		if (iocstatus == MPI_IOCSTATUS_SCSI_TASK_MGMT_FAILED ||
-		    hd->cmdPtr)
-			if (mpt_HardResetHandler(ioc, NO_SLEEP) < 0)
-				printk(MYIOC_s_WARN_FMT " Firmware Reload FAILED!!\n", ioc->name);
-		break;
-
-	case MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET:
-	default:
-		break;
-	}
-
+	ioc->taskmgmt_cmds.status |= MPT_MGMT_STATUS_RF_VALID;
+	memcpy(ioc->taskmgmt_cmds.reply, mr,
+	    min(MPT_DEFAULT_FRAME_SIZE, 4 * mr->u.reply.MsgLength));
  out:
-	spin_lock_irqsave(&ioc->FreeQlock, flags);
-	hd->tmPending = 0;
-	hd->tmState = TM_STATE_NONE;
-	hd->tm_iocstatus = iocstatus;
-	spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-
-	return 1;
+	if (ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_PENDING) {
+		mpt_clear_taskmgmt_in_progress_flag(ioc);
+		ioc->taskmgmt_cmds.status &= ~MPT_MGMT_STATUS_PENDING;
+		complete(&ioc->taskmgmt_cmds.done);
+		return 1;
+	}
+	return 0;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -2290,8 +2088,10 @@
 mptscsih_is_phys_disk(MPT_ADAPTER *ioc, u8 channel, u8 id)
 {
 	struct inactive_raid_component_info *component_info;
-	int i;
+	int i, j;
+	RaidPhysDiskPage1_t *phys_disk;
 	int rc = 0;
+	int num_paths;
 
 	if (!ioc->raid_data.pIocPg3)
 		goto out;
@@ -2303,6 +2103,45 @@
 		}
 	}
 
+	if (ioc->bus_type != SAS)
+		goto out;
+
+	/*
+	 * Check if dual path
+	 */
+	for (i = 0; i < ioc->raid_data.pIocPg3->NumPhysDisks; i++) {
+		num_paths = mpt_raid_phys_disk_get_num_paths(ioc,
+		    ioc->raid_data.pIocPg3->PhysDisk[i].PhysDiskNum);
+		if (num_paths < 2)
+			continue;
+		phys_disk = kzalloc(offsetof(RaidPhysDiskPage1_t, Path) +
+		   (num_paths * sizeof(RAID_PHYS_DISK1_PATH)), GFP_KERNEL);
+		if (!phys_disk)
+			continue;
+		if ((mpt_raid_phys_disk_pg1(ioc,
+		    ioc->raid_data.pIocPg3->PhysDisk[i].PhysDiskNum,
+		    phys_disk))) {
+			kfree(phys_disk);
+			continue;
+		}
+		for (j = 0; j < num_paths; j++) {
+			if ((phys_disk->Path[j].Flags &
+			    MPI_RAID_PHYSDISK1_FLAG_INVALID))
+				continue;
+			if ((phys_disk->Path[j].Flags &
+			    MPI_RAID_PHYSDISK1_FLAG_BROKEN))
+				continue;
+			if ((id == phys_disk->Path[j].PhysDiskID) &&
+			    (channel == phys_disk->Path[j].PhysDiskBus)) {
+				rc = 1;
+				kfree(phys_disk);
+				goto out;
+			}
+		}
+		kfree(phys_disk);
+	}
+
+
 	/*
 	 * Check inactive list for matching phys disks
 	 */
@@ -2327,8 +2166,10 @@
 mptscsih_raid_id_to_num(MPT_ADAPTER *ioc, u8 channel, u8 id)
 {
 	struct inactive_raid_component_info *component_info;
-	int i;
+	int i, j;
+	RaidPhysDiskPage1_t *phys_disk;
 	int rc = -ENXIO;
+	int num_paths;
 
 	if (!ioc->raid_data.pIocPg3)
 		goto out;
@@ -2340,6 +2181,44 @@
 		}
 	}
 
+	if (ioc->bus_type != SAS)
+		goto out;
+
+	/*
+	 * Check if dual path
+	 */
+	for (i = 0; i < ioc->raid_data.pIocPg3->NumPhysDisks; i++) {
+		num_paths = mpt_raid_phys_disk_get_num_paths(ioc,
+		    ioc->raid_data.pIocPg3->PhysDisk[i].PhysDiskNum);
+		if (num_paths < 2)
+			continue;
+		phys_disk = kzalloc(offsetof(RaidPhysDiskPage1_t, Path) +
+		   (num_paths * sizeof(RAID_PHYS_DISK1_PATH)), GFP_KERNEL);
+		if (!phys_disk)
+			continue;
+		if ((mpt_raid_phys_disk_pg1(ioc,
+		    ioc->raid_data.pIocPg3->PhysDisk[i].PhysDiskNum,
+		    phys_disk))) {
+			kfree(phys_disk);
+			continue;
+		}
+		for (j = 0; j < num_paths; j++) {
+			if ((phys_disk->Path[j].Flags &
+			    MPI_RAID_PHYSDISK1_FLAG_INVALID))
+				continue;
+			if ((phys_disk->Path[j].Flags &
+			    MPI_RAID_PHYSDISK1_FLAG_BROKEN))
+				continue;
+			if ((id == phys_disk->Path[j].PhysDiskID) &&
+			    (channel == phys_disk->Path[j].PhysDiskBus)) {
+				rc = phys_disk->PhysDiskNum;
+				kfree(phys_disk);
+				goto out;
+			}
+		}
+		kfree(phys_disk);
+	}
+
 	/*
 	 * Check inactive list for matching phys disks
 	 */
@@ -2457,7 +2336,6 @@
 		    sdev->ppr, sdev->inquiry_len));
 
 	vdevice->configured_lun = 1;
-	mptscsih_change_queue_depth(sdev, MPT_SCSI_CMD_PER_DEV_HIGH);
 
 	dsprintk(ioc, printk(MYIOC_s_DEBUG_FMT
 		"Queue depth=%d, tflags=%x\n",
@@ -2469,6 +2347,7 @@
 		    ioc->name, vtarget->negoFlags, vtarget->maxOffset,
 		    vtarget->minSyncFactor));
 
+	mptscsih_change_queue_depth(sdev, MPT_SCSI_CMD_PER_DEV_HIGH);
 	dsprintk(ioc, printk(MYIOC_s_DEBUG_FMT
 		"tagged %d, simple %d, ordered %d\n",
 		ioc->name,sdev->tagged_supported, sdev->simple_tags,
@@ -2542,15 +2421,13 @@
 }
 
 /**
- * mptscsih_get_scsi_lookup
+ * mptscsih_get_scsi_lookup - retrieves scmd entry
  * @ioc: Pointer to MPT_ADAPTER structure
  * @i: index into the array
  *
- * retrieves scmd entry from ScsiLookup[] array list
- *
  * Returns the scsi_cmd pointer
- **/
-static struct scsi_cmnd *
+ */
+struct scsi_cmnd *
 mptscsih_get_scsi_lookup(MPT_ADAPTER *ioc, int i)
 {
 	unsigned long	flags;
@@ -2562,15 +2439,15 @@
 
 	return scmd;
 }
+EXPORT_SYMBOL(mptscsih_get_scsi_lookup);
 
 /**
- * mptscsih_getclear_scsi_lookup
+ * mptscsih_getclear_scsi_lookup -  retrieves and clears scmd entry from ScsiLookup[] array list
  * @ioc: Pointer to MPT_ADAPTER structure
  * @i: index into the array
  *
- * retrieves and clears scmd entry from ScsiLookup[] array list
- *
  * Returns the scsi_cmd pointer
+ *
  **/
 static struct scsi_cmnd *
 mptscsih_getclear_scsi_lookup(MPT_ADAPTER *ioc, int i)
@@ -2635,94 +2512,33 @@
 mptscsih_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 {
 	MPT_SCSI_HOST	*hd;
-	unsigned long	 flags;
 
-	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-	    ": IOC %s_reset routed to SCSI host driver!\n",
-	    ioc->name, reset_phase==MPT_IOC_SETUP_RESET ? "setup" : (
-	    reset_phase==MPT_IOC_PRE_RESET ? "pre" : "post")));
-
-	/* If a FW reload request arrives after base installed but
-	 * before all scsi hosts have been attached, then an alt_ioc
-	 * may have a NULL sh pointer.
-	 */
 	if (ioc->sh == NULL || shost_priv(ioc->sh) == NULL)
 		return 0;
-	else
-		hd = shost_priv(ioc->sh);
 
-	if (reset_phase == MPT_IOC_SETUP_RESET) {
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Setup-Diag Reset\n", ioc->name));
-
-		/* Clean Up:
-		 * 1. Set Hard Reset Pending Flag
-		 * All new commands go to doneQ
-		 */
-		hd->resetPending = 1;
-
-	} else if (reset_phase == MPT_IOC_PRE_RESET) {
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Pre-Diag Reset\n", ioc->name));
-
-		/* 2. Flush running commands
-		 *	Clean ScsiLookup (and associated memory)
-		 *	AND clean mytaskQ
-		 */
-
-		/* 2b. Reply to OS all known outstanding I/O commands.
-		 */
+	hd = shost_priv(ioc->sh);
+	switch (reset_phase) {
+	case MPT_IOC_SETUP_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_SETUP_RESET\n", ioc->name, __func__));
+		break;
+	case MPT_IOC_PRE_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_PRE_RESET\n", ioc->name, __func__));
 		mptscsih_flush_running_cmds(hd);
-
-		/* 2c. If there was an internal command that
-		 * has not completed, configuration or io request,
-		 * free these resources.
-		 */
-		if (hd->cmdPtr) {
-			del_timer(&hd->timer);
-			mpt_free_msg_frame(ioc, hd->cmdPtr);
+		break;
+	case MPT_IOC_POST_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_POST_RESET\n", ioc->name, __func__));
+		if (ioc->internal_cmds.status & MPT_MGMT_STATUS_PENDING) {
+			ioc->internal_cmds.status |=
+				MPT_MGMT_STATUS_DID_IOCRESET;
+			complete(&ioc->internal_cmds.done);
 		}
-
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Pre-Reset complete.\n", ioc->name));
-
-	} else {
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Post-Diag Reset\n", ioc->name));
-
-		/* Once a FW reload begins, all new OS commands are
-		 * redirected to the doneQ w/ a reset status.
-		 * Init all control structures.
-		 */
-
-		/* 2. Chain Buffer initialization
-		 */
-
-		/* 4. Renegotiate to all devices, if SPI
-		 */
-
-		/* 5. Enable new commands to be posted
-		 */
-		spin_lock_irqsave(&ioc->FreeQlock, flags);
-		hd->tmPending = 0;
-		spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-		hd->resetPending = 0;
-		hd->tmState = TM_STATE_NONE;
-
-		/* 6. If there was an internal command,
-		 * wake this process up.
-		 */
-		if (hd->cmdPtr) {
-			/*
-			 * Wake up the original calling thread
-			 */
-			hd->pLocal = &hd->localReply;
-			hd->pLocal->completion = MPT_SCANDV_DID_RESET;
-			hd->scandv_wait_done = 1;
-			wake_up(&hd->scandv_waitq);
-			hd->cmdPtr = NULL;
-		}
-
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Post-Reset complete.\n", ioc->name));
-
+		break;
+	default:
+		break;
 	}
-
 	return 1;		/* currently means nothing really */
 }
 
@@ -2730,55 +2546,16 @@
 int
 mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply)
 {
-	MPT_SCSI_HOST *hd;
 	u8 event = le32_to_cpu(pEvReply->Event) & 0xFF;
 
-	devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT "MPT event (=%02Xh) routed to SCSI host driver!\n",
-			ioc->name, event));
+	devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		"MPT event (=%02Xh) routed to SCSI host driver!\n",
+		ioc->name, event));
 
-	if (ioc->sh == NULL ||
-		((hd = shost_priv(ioc->sh)) == NULL))
-		return 1;
-
-	switch (event) {
-	case MPI_EVENT_UNIT_ATTENTION:			/* 03 */
-		/* FIXME! */
-		break;
-	case MPI_EVENT_IOC_BUS_RESET:			/* 04 */
-	case MPI_EVENT_EXT_BUS_RESET:			/* 05 */
-		if (hd && (ioc->bus_type == SPI) && (hd->soft_resets < -1))
-			hd->soft_resets++;
-		break;
-	case MPI_EVENT_LOGOUT:				/* 09 */
-		/* FIXME! */
-		break;
-
-	case MPI_EVENT_RESCAN:				/* 06 */
-		break;
-
-		/*
-		 *  CHECKME! Don't think we need to do
-		 *  anything for these, but...
-		 */
-	case MPI_EVENT_LINK_STATUS_CHANGE:		/* 07 */
-	case MPI_EVENT_LOOP_STATE_CHANGE:		/* 08 */
-		/*
-		 *  CHECKME!  Falling thru...
-		 */
-		break;
-
-	case MPI_EVENT_INTEGRATED_RAID:			/* 0B */
-		break;
-
-	case MPI_EVENT_NONE:				/* 00 */
-	case MPI_EVENT_LOG_DATA:			/* 01 */
-	case MPI_EVENT_STATE_CHANGE:			/* 02 */
-	case MPI_EVENT_EVENT_CHANGE:			/* 0A */
-	default:
-		dprintk(ioc, printk(MYIOC_s_DEBUG_FMT ": Ignoring event (=%02Xh)\n",
-		    ioc->name, event));
-		break;
-	}
+	if ((event == MPI_EVENT_IOC_BUS_RESET ||
+	    event == MPI_EVENT_EXT_BUS_RESET) &&
+	    (ioc->bus_type == SPI) && (ioc->soft_resets < -1))
+			ioc->soft_resets++;
 
 	return 1;		/* currently means nothing really */
 }
@@ -2809,153 +2586,44 @@
  *	Used ONLY for DV and other internal commands.
  */
 int
-mptscsih_scandv_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
+mptscsih_scandv_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req,
+				MPT_FRAME_HDR *reply)
 {
-	MPT_SCSI_HOST	*hd;
 	SCSIIORequest_t *pReq;
-	int		 completionCode;
+	SCSIIOReply_t	*pReply;
+	u8		 cmd;
 	u16		 req_idx;
+	u8	*sense_data;
+	int		 sz;
 
-	hd = shost_priv(ioc->sh);
+	ioc->internal_cmds.status |= MPT_MGMT_STATUS_COMMAND_GOOD;
+	ioc->internal_cmds.completion_code = MPT_SCANDV_GOOD;
+	if (!reply)
+		goto out;
 
-	if ((mf == NULL) ||
-	    (mf >= MPT_INDEX_2_MFPTR(ioc, ioc->req_depth))) {
-		printk(MYIOC_s_ERR_FMT
-			"ScanDvComplete, %s req frame ptr! (=%p)\n",
-				ioc->name, mf?"BAD":"NULL", (void *) mf);
-		goto wakeup;
+	pReply = (SCSIIOReply_t *) reply;
+	pReq = (SCSIIORequest_t *) req;
+	ioc->internal_cmds.completion_code =
+	    mptscsih_get_completion_code(ioc, req, reply);
+	ioc->internal_cmds.status |= MPT_MGMT_STATUS_RF_VALID;
+	memcpy(ioc->internal_cmds.reply, reply,
+	    min(MPT_DEFAULT_FRAME_SIZE, 4 * reply->u.reply.MsgLength));
+	cmd = reply->u.hdr.Function;
+	if (((cmd == MPI_FUNCTION_SCSI_IO_REQUEST) ||
+	    (cmd == MPI_FUNCTION_RAID_SCSI_IO_PASSTHROUGH)) &&
+	    (pReply->SCSIState & MPI_SCSI_STATE_AUTOSENSE_VALID)) {
+		req_idx = le16_to_cpu(req->u.frame.hwhdr.msgctxu.fld.req_idx);
+		sense_data = ((u8 *)ioc->sense_buf_pool +
+		    (req_idx * MPT_SENSE_BUFFER_ALLOC));
+		sz = min_t(int, pReq->SenseBufferLength,
+		    MPT_SENSE_BUFFER_ALLOC);
+		memcpy(ioc->internal_cmds.sense, sense_data, sz);
 	}
-
-	del_timer(&hd->timer);
-	req_idx = le16_to_cpu(mf->u.frame.hwhdr.msgctxu.fld.req_idx);
-	mptscsih_set_scsi_lookup(ioc, req_idx, NULL);
-	pReq = (SCSIIORequest_t *) mf;
-
-	if (mf != hd->cmdPtr) {
-		printk(MYIOC_s_WARN_FMT "ScanDvComplete (mf=%p, cmdPtr=%p, idx=%d)\n",
-				ioc->name, (void *)mf, (void *) hd->cmdPtr, req_idx);
-	}
-	hd->cmdPtr = NULL;
-
-	ddvprintk(ioc, printk(MYIOC_s_DEBUG_FMT "ScanDvComplete (mf=%p,mr=%p,idx=%d)\n",
-			ioc->name, mf, mr, req_idx));
-
-	hd->pLocal = &hd->localReply;
-	hd->pLocal->scsiStatus = 0;
-
-	/* If target struct exists, clear sense valid flag.
-	 */
-	if (mr == NULL) {
-		completionCode = MPT_SCANDV_GOOD;
-	} else {
-		SCSIIOReply_t	*pReply;
-		u16		 status;
-		u8		 scsi_status;
-
-		pReply = (SCSIIOReply_t *) mr;
-
-		status = le16_to_cpu(pReply->IOCStatus) & MPI_IOCSTATUS_MASK;
-		scsi_status = pReply->SCSIStatus;
-
-
-		switch(status) {
-
-		case MPI_IOCSTATUS_SCSI_DEVICE_NOT_THERE:	/* 0x0043 */
-			completionCode = MPT_SCANDV_SELECTION_TIMEOUT;
-			break;
-
-		case MPI_IOCSTATUS_SCSI_IO_DATA_ERROR:		/* 0x0046 */
-		case MPI_IOCSTATUS_SCSI_TASK_TERMINATED:	/* 0x0048 */
-		case MPI_IOCSTATUS_SCSI_IOC_TERMINATED:		/* 0x004B */
-		case MPI_IOCSTATUS_SCSI_EXT_TERMINATED:		/* 0x004C */
-			completionCode = MPT_SCANDV_DID_RESET;
-			break;
-
-		case MPI_IOCSTATUS_SCSI_DATA_UNDERRUN:		/* 0x0045 */
-		case MPI_IOCSTATUS_SCSI_RECOVERED_ERROR:	/* 0x0040 */
-		case MPI_IOCSTATUS_SUCCESS:			/* 0x0000 */
-			if (pReply->Function == MPI_FUNCTION_CONFIG) {
-				ConfigReply_t *pr = (ConfigReply_t *)mr;
-				completionCode = MPT_SCANDV_GOOD;
-				hd->pLocal->header.PageVersion = pr->Header.PageVersion;
-				hd->pLocal->header.PageLength = pr->Header.PageLength;
-				hd->pLocal->header.PageNumber = pr->Header.PageNumber;
-				hd->pLocal->header.PageType = pr->Header.PageType;
-
-			} else if (pReply->Function == MPI_FUNCTION_RAID_ACTION) {
-				/* If the RAID Volume request is successful,
-				 * return GOOD, else indicate that
-				 * some type of error occurred.
-				 */
-				MpiRaidActionReply_t	*pr = (MpiRaidActionReply_t *)mr;
-				if (le16_to_cpu(pr->ActionStatus) == MPI_RAID_ACTION_ASTATUS_SUCCESS)
-					completionCode = MPT_SCANDV_GOOD;
-				else
-					completionCode = MPT_SCANDV_SOME_ERROR;
-				memcpy(hd->pLocal->sense, pr, sizeof(hd->pLocal->sense));
-
-			} else if (pReply->SCSIState & MPI_SCSI_STATE_AUTOSENSE_VALID) {
-				u8		*sense_data;
-				int		 sz;
-
-				/* save sense data in global structure
-				 */
-				completionCode = MPT_SCANDV_SENSE;
-				hd->pLocal->scsiStatus = scsi_status;
-				sense_data = ((u8 *)ioc->sense_buf_pool +
-					(req_idx * MPT_SENSE_BUFFER_ALLOC));
-
-				sz = min_t(int, pReq->SenseBufferLength,
-							SCSI_STD_SENSE_BYTES);
-				memcpy(hd->pLocal->sense, sense_data, sz);
-
-				ddvprintk(ioc, printk(MYIOC_s_DEBUG_FMT "  Check Condition, sense ptr %p\n",
-				    ioc->name, sense_data));
-			} else if (pReply->SCSIState & MPI_SCSI_STATE_AUTOSENSE_FAILED) {
-				if (pReq->CDB[0] == INQUIRY)
-					completionCode = MPT_SCANDV_ISSUE_SENSE;
-				else
-					completionCode = MPT_SCANDV_DID_RESET;
-			}
-			else if (pReply->SCSIState & MPI_SCSI_STATE_NO_SCSI_STATUS)
-				completionCode = MPT_SCANDV_DID_RESET;
-			else if (pReply->SCSIState & MPI_SCSI_STATE_TERMINATED)
-				completionCode = MPT_SCANDV_DID_RESET;
-			else {
-				completionCode = MPT_SCANDV_GOOD;
-				hd->pLocal->scsiStatus = scsi_status;
-			}
-			break;
-
-		case MPI_IOCSTATUS_SCSI_PROTOCOL_ERROR:		/* 0x0047 */
-			if (pReply->SCSIState & MPI_SCSI_STATE_TERMINATED)
-				completionCode = MPT_SCANDV_DID_RESET;
-			else
-				completionCode = MPT_SCANDV_SOME_ERROR;
-			break;
-
-		default:
-			completionCode = MPT_SCANDV_SOME_ERROR;
-			break;
-
-		}	/* switch(status) */
-
-	} /* end of address reply case */
-
-	hd->pLocal->completion = completionCode;
-
-	/* MF and RF are freed in mpt_interrupt
-	 */
-wakeup:
-	/* Free Chain buffers (will never chain) in scan or dv */
-	//mptscsih_freeChainBuffers(ioc, req_idx);
-
-	/*
-	 * Wake up the original calling thread
-	 */
-	hd->scandv_wait_done = 1;
-	wake_up(&hd->scandv_waitq);
-
+ out:
+	if (!(ioc->internal_cmds.status & MPT_MGMT_STATUS_PENDING))
+		return 0;
+	ioc->internal_cmds.status &= ~MPT_MGMT_STATUS_PENDING;
+	complete(&ioc->internal_cmds.done);
 	return 1;
 }
 
@@ -3004,6 +2672,95 @@
 	return;
 }
 
+/**
+ *	mptscsih_get_completion_code -
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@reply:
+ *	@cmd:
+ *
+ **/
+static int
+mptscsih_get_completion_code(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req,
+				MPT_FRAME_HDR *reply)
+{
+	SCSIIOReply_t	*pReply;
+	MpiRaidActionReply_t *pr;
+	u8		 scsi_status;
+	u16		 status;
+	int		 completion_code;
+
+	pReply = (SCSIIOReply_t *)reply;
+	status = le16_to_cpu(pReply->IOCStatus) & MPI_IOCSTATUS_MASK;
+	scsi_status = pReply->SCSIStatus;
+
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "IOCStatus=%04xh, SCSIState=%02xh, SCSIStatus=%02xh,"
+	    "IOCLogInfo=%08xh\n", ioc->name, status, pReply->SCSIState,
+	    scsi_status, le32_to_cpu(pReply->IOCLogInfo)));
+
+	switch (status) {
+
+	case MPI_IOCSTATUS_SCSI_DEVICE_NOT_THERE:	/* 0x0043 */
+		completion_code = MPT_SCANDV_SELECTION_TIMEOUT;
+		break;
+
+	case MPI_IOCSTATUS_SCSI_IO_DATA_ERROR:		/* 0x0046 */
+	case MPI_IOCSTATUS_SCSI_TASK_TERMINATED:	/* 0x0048 */
+	case MPI_IOCSTATUS_SCSI_IOC_TERMINATED:		/* 0x004B */
+	case MPI_IOCSTATUS_SCSI_EXT_TERMINATED:		/* 0x004C */
+		completion_code = MPT_SCANDV_DID_RESET;
+		break;
+
+	case MPI_IOCSTATUS_BUSY:
+	case MPI_IOCSTATUS_INSUFFICIENT_RESOURCES:
+		completion_code = MPT_SCANDV_BUSY;
+		break;
+
+	case MPI_IOCSTATUS_SCSI_DATA_UNDERRUN:		/* 0x0045 */
+	case MPI_IOCSTATUS_SCSI_RECOVERED_ERROR:	/* 0x0040 */
+	case MPI_IOCSTATUS_SUCCESS:			/* 0x0000 */
+		if (pReply->Function == MPI_FUNCTION_CONFIG) {
+			completion_code = MPT_SCANDV_GOOD;
+		} else if (pReply->Function == MPI_FUNCTION_RAID_ACTION) {
+			pr = (MpiRaidActionReply_t *)reply;
+			if (le16_to_cpu(pr->ActionStatus) ==
+				MPI_RAID_ACTION_ASTATUS_SUCCESS)
+				completion_code = MPT_SCANDV_GOOD;
+			else
+				completion_code = MPT_SCANDV_SOME_ERROR;
+		} else if (pReply->SCSIState & MPI_SCSI_STATE_AUTOSENSE_VALID)
+			completion_code = MPT_SCANDV_SENSE;
+		else if (pReply->SCSIState & MPI_SCSI_STATE_AUTOSENSE_FAILED) {
+			if (req->u.scsireq.CDB[0] == INQUIRY)
+				completion_code = MPT_SCANDV_ISSUE_SENSE;
+			else
+				completion_code = MPT_SCANDV_DID_RESET;
+		} else if (pReply->SCSIState & MPI_SCSI_STATE_NO_SCSI_STATUS)
+			completion_code = MPT_SCANDV_DID_RESET;
+		else if (pReply->SCSIState & MPI_SCSI_STATE_TERMINATED)
+			completion_code = MPT_SCANDV_DID_RESET;
+		else if (scsi_status == MPI_SCSI_STATUS_BUSY)
+			completion_code = MPT_SCANDV_BUSY;
+		else
+			completion_code = MPT_SCANDV_GOOD;
+		break;
+
+	case MPI_IOCSTATUS_SCSI_PROTOCOL_ERROR:		/* 0x0047 */
+		if (pReply->SCSIState & MPI_SCSI_STATE_TERMINATED)
+			completion_code = MPT_SCANDV_DID_RESET;
+		else
+			completion_code = MPT_SCANDV_SOME_ERROR;
+		break;
+	default:
+		completion_code = MPT_SCANDV_SOME_ERROR;
+		break;
+
+	}	/* switch(status) */
+
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "  completionCode set to %08xh\n", ioc->name, completion_code));
+	return completion_code;
+}
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
@@ -3030,22 +2787,27 @@
 {
 	MPT_FRAME_HDR	*mf;
 	SCSIIORequest_t	*pScsiReq;
-	SCSIIORequest_t	 ReqCopy;
 	int		 my_idx, ii, dir;
-	int		 rc, cmdTimeout;
-	int		in_isr;
+	int		 timeout;
 	char		 cmdLen;
 	char		 CDB[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-	char		 cmd = io->cmd;
-	MPT_ADAPTER 	*ioc = hd->ioc;
+	u8		 cmd = io->cmd;
+	MPT_ADAPTER *ioc = hd->ioc;
+	int		 ret = 0;
+	unsigned long	 timeleft;
+	unsigned long	 flags;
 
-	in_isr = in_interrupt();
-	if (in_isr) {
-		dprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Internal SCSI IO request not allowed in ISR context!\n",
-					ioc->name));
-		return -EPERM;
+	/* don't send internal command during diag reset */
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	if (ioc->ioc_reset_in_progress) {
+		spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+		dfailprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			"%s: busy with host reset\n", ioc->name, __func__));
+		return MPT_SCANDV_BUSY;
 	}
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 
+	mutex_lock(&ioc->internal_cmds.mutex);
 
 	/* Set command specific information
 	 */
@@ -3055,13 +2817,13 @@
 		dir = MPI_SCSIIO_CONTROL_READ;
 		CDB[0] = cmd;
 		CDB[4] = io->size;
-		cmdTimeout = 10;
+		timeout = 10;
 		break;
 
 	case TEST_UNIT_READY:
 		cmdLen = 6;
 		dir = MPI_SCSIIO_CONTROL_READ;
-		cmdTimeout = 10;
+		timeout = 10;
 		break;
 
 	case START_STOP:
@@ -3069,7 +2831,7 @@
 		dir = MPI_SCSIIO_CONTROL_READ;
 		CDB[0] = cmd;
 		CDB[4] = 1;	/*Spin up the disk */
-		cmdTimeout = 15;
+		timeout = 15;
 		break;
 
 	case REQUEST_SENSE:
@@ -3077,7 +2839,7 @@
 		CDB[0] = cmd;
 		CDB[4] = io->size;
 		dir = MPI_SCSIIO_CONTROL_READ;
-		cmdTimeout = 10;
+		timeout = 10;
 		break;
 
 	case READ_BUFFER:
@@ -3096,7 +2858,7 @@
 		CDB[6] = (io->size >> 16) & 0xFF;
 		CDB[7] = (io->size >>  8) & 0xFF;
 		CDB[8] = io->size & 0xFF;
-		cmdTimeout = 10;
+		timeout = 10;
 		break;
 
 	case WRITE_BUFFER:
@@ -3111,21 +2873,21 @@
 		CDB[6] = (io->size >> 16) & 0xFF;
 		CDB[7] = (io->size >>  8) & 0xFF;
 		CDB[8] = io->size & 0xFF;
-		cmdTimeout = 10;
+		timeout = 10;
 		break;
 
 	case RESERVE:
 		cmdLen = 6;
 		dir = MPI_SCSIIO_CONTROL_READ;
 		CDB[0] = cmd;
-		cmdTimeout = 10;
+		timeout = 10;
 		break;
 
 	case RELEASE:
 		cmdLen = 6;
 		dir = MPI_SCSIIO_CONTROL_READ;
 		CDB[0] = cmd;
-		cmdTimeout = 10;
+		timeout = 10;
 		break;
 
 	case SYNCHRONIZE_CACHE:
@@ -3133,20 +2895,23 @@
 		dir = MPI_SCSIIO_CONTROL_READ;
 		CDB[0] = cmd;
 //		CDB[1] = 0x02;	/* set immediate bit */
-		cmdTimeout = 10;
+		timeout = 10;
 		break;
 
 	default:
 		/* Error Case */
-		return -EFAULT;
+		ret = -EFAULT;
+		goto out;
 	}
 
 	/* Get and Populate a free Frame
+	 * MsgContext set in mpt_get_msg_frame call
 	 */
 	if ((mf = mpt_get_msg_frame(ioc->InternalCtx, ioc)) == NULL) {
-		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT "No msg frames!\n",
-		    ioc->name));
-		return -EBUSY;
+		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT "%s: No msg frames!\n",
+		    ioc->name, __func__));
+		ret = MPT_SCANDV_BUSY;
+		goto out;
 	}
 
 	pScsiReq = (SCSIIORequest_t *) mf;
@@ -3172,7 +2937,7 @@
 
 	pScsiReq->Reserved = 0;
 
-	pScsiReq->MsgFlags = mpt_msg_flags();
+	pScsiReq->MsgFlags = mpt_msg_flags(ioc);
 	/* MsgContext set in mpt_get_msg_fram call  */
 
 	int_to_scsilun(io->lun, (struct scsi_lun *)pScsiReq->LUN);
@@ -3184,74 +2949,58 @@
 
 	if (cmd == REQUEST_SENSE) {
 		pScsiReq->Control = cpu_to_le32(dir | MPI_SCSIIO_CONTROL_UNTAGGED);
-		ddvprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Untagged! 0x%2x\n",
-			ioc->name, cmd));
+		devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: Untagged! 0x%02x\n", ioc->name, __func__, cmd));
 	}
 
-	for (ii=0; ii < 16; ii++)
+	for (ii = 0; ii < 16; ii++)
 		pScsiReq->CDB[ii] = CDB[ii];
 
 	pScsiReq->DataLength = cpu_to_le32(io->size);
 	pScsiReq->SenseBufferLowAddr = cpu_to_le32(ioc->sense_buf_low_dma
 					   + (my_idx * MPT_SENSE_BUFFER_ALLOC));
 
-	ddvprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Sending Command 0x%x for (%d:%d:%d)\n",
-			ioc->name, cmd, io->channel, io->id, io->lun));
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "%s: Sending Command 0x%02x for fw_channel=%d fw_id=%d lun=%d\n",
+	    ioc->name, __func__, cmd, io->channel, io->id, io->lun));
 
-	if (dir == MPI_SCSIIO_CONTROL_READ) {
-		mpt_add_sge((char *) &pScsiReq->SGL,
-			MPT_SGE_FLAGS_SSIMPLE_READ | io->size,
-			io->data_dma);
-	} else {
-		mpt_add_sge((char *) &pScsiReq->SGL,
-			MPT_SGE_FLAGS_SSIMPLE_WRITE | io->size,
-			io->data_dma);
-	}
+	if (dir == MPI_SCSIIO_CONTROL_READ)
+		ioc->add_sge((char *) &pScsiReq->SGL,
+		    MPT_SGE_FLAGS_SSIMPLE_READ | io->size, io->data_dma);
+	else
+		ioc->add_sge((char *) &pScsiReq->SGL,
+		    MPT_SGE_FLAGS_SSIMPLE_WRITE | io->size, io->data_dma);
 
-	/* The ISR will free the request frame, but we need
-	 * the information to initialize the target. Duplicate.
-	 */
-	memcpy(&ReqCopy, pScsiReq, sizeof(SCSIIORequest_t));
-
-	/* Issue this command after:
-	 *	finish init
-	 *	add timer
-	 * Wait until the reply has been received
-	 *  ScsiScanDvCtx callback function will
-	 *	set hd->pLocal;
-	 *	set scandv_wait_done and call wake_up
-	 */
-	hd->pLocal = NULL;
-	hd->timer.expires = jiffies + HZ*cmdTimeout;
-	hd->scandv_wait_done = 0;
-
-	/* Save cmd pointer, for resource free if timeout or
-	 * FW reload occurs
-	 */
-	hd->cmdPtr = mf;
-
-	add_timer(&hd->timer);
+	INITIALIZE_MGMT_STATUS(ioc->internal_cmds.status)
 	mpt_put_msg_frame(ioc->InternalCtx, ioc, mf);
-	wait_event(hd->scandv_waitq, hd->scandv_wait_done);
-
-	if (hd->pLocal) {
-		rc = hd->pLocal->completion;
-		hd->pLocal->skip = 0;
-
-		/* Always set fatal error codes in some cases.
-		 */
-		if (rc == MPT_SCANDV_SELECTION_TIMEOUT)
-			rc = -ENXIO;
-		else if (rc == MPT_SCANDV_SOME_ERROR)
-			rc =  -rc;
-	} else {
-		rc = -EFAULT;
-		/* This should never happen. */
-		ddvprintk(ioc, printk(MYIOC_s_DEBUG_FMT "_do_cmd: Null pLocal!!!\n",
-				ioc->name));
+	timeleft = wait_for_completion_timeout(&ioc->internal_cmds.done,
+	    timeout*HZ);
+	if (!(ioc->internal_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		ret = MPT_SCANDV_DID_RESET;
+		dfailprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: TIMED OUT for cmd=0x%02x\n", ioc->name, __func__,
+		    cmd));
+		if (ioc->internal_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET) {
+			mpt_free_msg_frame(ioc, mf);
+			goto out;
+		}
+		if (!timeleft) {
+			printk(MYIOC_s_WARN_FMT "Issuing Reset from %s!!\n",
+			    ioc->name, __func__);
+			mpt_HardResetHandler(ioc, CAN_SLEEP);
+			mpt_free_msg_frame(ioc, mf);
+		}
+		goto out;
 	}
 
-	return rc;
+	ret = ioc->internal_cmds.completion_code;
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: success, rc=0x%02x\n",
+			ioc->name, __func__, ret));
+
+ out:
+	CLEAR_MGMT_STATUS(ioc->internal_cmds.status)
+	mutex_unlock(&ioc->internal_cmds.mutex);
+	return ret;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -3491,6 +3240,7 @@
 	&dev_attr_debug_level,
 	NULL,
 };
+
 EXPORT_SYMBOL(mptscsih_host_attrs);
 
 EXPORT_SYMBOL(mptscsih_remove);
@@ -3516,6 +3266,5 @@
 EXPORT_SYMBOL(mptscsih_ioc_reset);
 EXPORT_SYMBOL(mptscsih_change_queue_depth);
 EXPORT_SYMBOL(mptscsih_timer_expired);
-EXPORT_SYMBOL(mptscsih_TMHandler);
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
diff --git a/drivers/message/fusion/mptscsih.h b/drivers/message/fusion/mptscsih.h
index 319aa30..eb3f677 100644
--- a/drivers/message/fusion/mptscsih.h
+++ b/drivers/message/fusion/mptscsih.h
@@ -60,6 +60,7 @@
 #define MPT_SCANDV_SELECTION_TIMEOUT	(0x00000008)
 #define MPT_SCANDV_ISSUE_SENSE		(0x00000010)
 #define MPT_SCANDV_FALLBACK		(0x00000020)
+#define MPT_SCANDV_BUSY			(0x00000040)
 
 #define MPT_SCANDV_MAX_RETRIES		(10)
 
@@ -89,6 +90,7 @@
 
 #endif
 
+
 typedef struct _internal_cmd {
 	char		*data;		/* data pointer */
 	dma_addr_t	data_dma;	/* data dma address */
@@ -112,6 +114,8 @@
 extern int mptscsih_proc_info(struct Scsi_Host *host, char *buffer, char **start, off_t offset, int length, int func);
 extern const char * mptscsih_info(struct Scsi_Host *SChost);
 extern int mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *));
+extern int mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel,
+	u8 id, int lun, int ctx2abort, ulong timeout);
 extern void mptscsih_slave_destroy(struct scsi_device *device);
 extern int mptscsih_slave_configure(struct scsi_device *device);
 extern int mptscsih_abort(struct scsi_cmnd * SCpnt);
@@ -126,7 +130,8 @@
 extern int mptscsih_ioc_reset(MPT_ADAPTER *ioc, int post_reset);
 extern int mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth);
 extern void mptscsih_timer_expired(unsigned long data);
-extern int mptscsih_TMHandler(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun, int ctx2abort, ulong timeout);
 extern u8 mptscsih_raid_id_to_num(MPT_ADAPTER *ioc, u8 channel, u8 id);
 extern int mptscsih_is_phys_disk(MPT_ADAPTER *ioc, u8 channel, u8 id);
 extern struct device_attribute *mptscsih_host_attrs[];
+extern struct scsi_cmnd	*mptscsih_get_scsi_lookup(MPT_ADAPTER *ioc, int i);
+extern void mptscsih_taskmgmt_response_code(MPT_ADAPTER *ioc, u8 response_code);
diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
index 6162014..c5b808f 100644
--- a/drivers/message/fusion/mptspi.c
+++ b/drivers/message/fusion/mptspi.c
@@ -300,7 +300,7 @@
 	flagsLength = MPT_SGE_FLAGS_SSIMPLE_WRITE |
 		(IOCPage4Ptr->Header.PageLength + ii) * 4;
 
-	mpt_add_sge((char *)&pReq->PageBufferSGE, flagsLength, dataDma);
+	ioc->add_sge((char *)&pReq->PageBufferSGE, flagsLength, dataDma);
 
 	ddvprintk(ioc, printk(MYIOC_s_DEBUG_FMT
 		"writeIOCPage4: MaxSEP=%d ActiveSEP=%d id=%d bus=%d\n",
@@ -614,19 +614,24 @@
 	spi_width(starget) = (nego & MPI_SCSIDEVPAGE0_NP_WIDE) ? 1 : 0;
 }
 
-static int
+int
 mptscsih_quiesce_raid(MPT_SCSI_HOST *hd, int quiesce, u8 channel, u8 id)
 {
+	MPT_ADAPTER	*ioc = hd->ioc;
 	MpiRaidActionRequest_t	*pReq;
 	MPT_FRAME_HDR		*mf;
-	MPT_ADAPTER *ioc = hd->ioc;
+	int			ret;
+	unsigned long 	 	timeleft;
+
+	mutex_lock(&ioc->internal_cmds.mutex);
 
 	/* Get and Populate a free Frame
 	 */
 	if ((mf = mpt_get_msg_frame(ioc->InternalCtx, ioc)) == NULL) {
-		ddvprintk(ioc, printk(MYIOC_s_WARN_FMT "_do_raid: no msg frames!\n",
-					ioc->name));
-		return -EAGAIN;
+		dfailprintk(hd->ioc, printk(MYIOC_s_WARN_FMT
+			"%s: no msg frames!\n", ioc->name, __func__));
+		ret = -EAGAIN;
+		goto out;
 	}
 	pReq = (MpiRaidActionRequest_t *)mf;
 	if (quiesce)
@@ -643,29 +648,36 @@
 	pReq->Reserved2 = 0;
 	pReq->ActionDataWord = 0; /* Reserved for this action */
 
-	mpt_add_sge((char *)&pReq->ActionDataSGE,
+	ioc->add_sge((char *)&pReq->ActionDataSGE,
 		MPT_SGE_FLAGS_SSIMPLE_READ | 0, (dma_addr_t) -1);
 
 	ddvprintk(ioc, printk(MYIOC_s_DEBUG_FMT "RAID Volume action=%x channel=%d id=%d\n",
 			ioc->name, pReq->Action, channel, id));
 
-	hd->pLocal = NULL;
-	hd->timer.expires = jiffies + HZ*10; /* 10 second timeout */
-	hd->scandv_wait_done = 0;
-
-	/* Save cmd pointer, for resource free if timeout or
-	 * FW reload occurs
-	 */
-	hd->cmdPtr = mf;
-
-	add_timer(&hd->timer);
+	INITIALIZE_MGMT_STATUS(ioc->internal_cmds.status)
 	mpt_put_msg_frame(ioc->InternalCtx, ioc, mf);
-	wait_event(hd->scandv_waitq, hd->scandv_wait_done);
+	timeleft = wait_for_completion_timeout(&ioc->internal_cmds.done, 10*HZ);
+	if (!(ioc->internal_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		ret = -ETIME;
+		dfailprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: TIMED OUT!\n",
+		    ioc->name, __func__));
+		if (ioc->internal_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET)
+			goto out;
+		if (!timeleft) {
+			printk(MYIOC_s_WARN_FMT "Issuing Reset from %s!!\n",
+			    ioc->name, __func__);
+			mpt_HardResetHandler(ioc, CAN_SLEEP);
+			mpt_free_msg_frame(ioc, mf);
+		}
+		goto out;
+	}
 
-	if ((hd->pLocal == NULL) || (hd->pLocal->completion != 0))
-		return -1;
+	ret = ioc->internal_cmds.completion_code;
 
-	return 0;
+ out:
+	CLEAR_MGMT_STATUS(ioc->internal_cmds.status)
+	mutex_unlock(&ioc->internal_cmds.mutex);
+	return ret;
 }
 
 static void mptspi_dv_device(struct _MPT_SCSI_HOST *hd,
@@ -1423,17 +1435,15 @@
 	 * A slightly different algorithm is required for
 	 * 64bit SGEs.
 	 */
-	scale = ioc->req_sz/(sizeof(dma_addr_t) + sizeof(u32));
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
+	scale = ioc->req_sz/ioc->SGE_size;
+	if (ioc->sg_addr_size == sizeof(u64)) {
 		numSGE = (scale - 1) *
 		  (ioc->facts.MaxChainDepth-1) + scale +
-		  (ioc->req_sz - 60) / (sizeof(dma_addr_t) +
-		  sizeof(u32));
+		  (ioc->req_sz - 60) / ioc->SGE_size;
 	} else {
 		numSGE = 1 + (scale - 1) *
 		  (ioc->facts.MaxChainDepth-1) + scale +
-		  (ioc->req_sz - 64) / (sizeof(dma_addr_t) +
-		  sizeof(u32));
+		  (ioc->req_sz - 64) / ioc->SGE_size;
 	}
 
 	if (numSGE < sh->sg_tablesize) {
@@ -1464,9 +1474,6 @@
 
 	/* Clear the TM flags
 	 */
-	hd->tmPending = 0;
-	hd->tmState = TM_STATE_NONE;
-	hd->resetPending = 0;
 	hd->abortSCpnt = NULL;
 
 	/* Clear the pointer used to store
@@ -1493,8 +1500,6 @@
 		mpt_saf_te));
 	ioc->spi_data.noQas = 0;
 
-	init_waitqueue_head(&hd->scandv_waitq);
-	hd->scandv_wait_done = 0;
 	hd->last_queue_full = 0;
 	hd->spi_pending = 0;
 
@@ -1514,7 +1519,7 @@
 	 * issue internal bus reset
 	 */
 	if (ioc->spi_data.bus_reset)
-		mptscsih_TMHandler(hd,
+		mptscsih_IssueTaskMgmt(hd,
 		    MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS,
 		    0, 0, 0, 0, 5);
 
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index a443e13..335d4c7 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -426,15 +426,9 @@
 	struct request_queue *q = req->q;
 	unsigned long flags;
 
-	if (blk_end_request(req, error, nr_bytes)) {
-		int leftover = (req->hard_nr_sectors << KERNEL_SECTOR_SHIFT);
-
-		if (blk_pc_request(req))
-			leftover = req->data_len;
-
+	if (blk_end_request(req, error, nr_bytes))
 		if (error)
-			blk_end_request(req, -EIO, leftover);
-	}
+			blk_end_request_all(req, -EIO);
 
 	spin_lock_irqsave(q->queue_lock, flags);
 
@@ -761,7 +755,7 @@
 			break;
 
 		case CACHE_SMARTFETCH:
-			if (req->nr_sectors > 16)
+			if (blk_rq_sectors(req) > 16)
 				ctl_flags = 0x201F0008;
 			else
 				ctl_flags = 0x001F0000;
@@ -781,13 +775,13 @@
 			ctl_flags = 0x001F0010;
 			break;
 		case CACHE_SMARTBACK:
-			if (req->nr_sectors > 16)
+			if (blk_rq_sectors(req) > 16)
 				ctl_flags = 0x001F0004;
 			else
 				ctl_flags = 0x001F0010;
 			break;
 		case CACHE_SMARTTHROUGH:
-			if (req->nr_sectors > 16)
+			if (blk_rq_sectors(req) > 16)
 				ctl_flags = 0x001F0004;
 			else
 				ctl_flags = 0x001F0010;
@@ -800,8 +794,9 @@
 	if (c->adaptec) {
 		u8 cmd[10];
 		u32 scsi_flags;
-		u16 hwsec = queue_hardsect_size(req->q) >> KERNEL_SECTOR_SHIFT;
+		u16 hwsec;
 
+		hwsec = queue_logical_block_size(req->q) >> KERNEL_SECTOR_SHIFT;
 		memset(cmd, 0, 10);
 
 		sgl_offset = SGL_OFFSET_12;
@@ -827,22 +822,22 @@
 
 		*mptr++ = cpu_to_le32(scsi_flags);
 
-		*((u32 *) & cmd[2]) = cpu_to_be32(req->sector * hwsec);
-		*((u16 *) & cmd[7]) = cpu_to_be16(req->nr_sectors * hwsec);
+		*((u32 *) & cmd[2]) = cpu_to_be32(blk_rq_pos(req) * hwsec);
+		*((u16 *) & cmd[7]) = cpu_to_be16(blk_rq_sectors(req) * hwsec);
 
 		memcpy(mptr, cmd, 10);
 		mptr += 4;
-		*mptr++ = cpu_to_le32(req->nr_sectors << KERNEL_SECTOR_SHIFT);
+		*mptr++ = cpu_to_le32(blk_rq_bytes(req));
 	} else
 #endif
 	{
 		msg->u.head[1] = cpu_to_le32(cmd | HOST_TID << 12 | tid);
 		*mptr++ = cpu_to_le32(ctl_flags);
-		*mptr++ = cpu_to_le32(req->nr_sectors << KERNEL_SECTOR_SHIFT);
+		*mptr++ = cpu_to_le32(blk_rq_bytes(req));
 		*mptr++ =
-		    cpu_to_le32((u32) (req->sector << KERNEL_SECTOR_SHIFT));
+		    cpu_to_le32((u32) (blk_rq_pos(req) << KERNEL_SECTOR_SHIFT));
 		*mptr++ =
-		    cpu_to_le32(req->sector >> (32 - KERNEL_SECTOR_SHIFT));
+		    cpu_to_le32(blk_rq_pos(req) >> (32 - KERNEL_SECTOR_SHIFT));
 	}
 
 	if (!i2o_block_sglist_alloc(c, ireq, &mptr)) {
@@ -883,7 +878,7 @@
 	struct request *req;
 
 	while (!blk_queue_plugged(q)) {
-		req = elv_next_request(q);
+		req = blk_peek_request(q);
 		if (!req)
 			break;
 
@@ -896,7 +891,7 @@
 
 			if (queue_depth < I2O_BLOCK_MAX_OPEN_REQUESTS) {
 				if (!i2o_block_transfer(req)) {
-					blkdev_dequeue_request(req);
+					blk_start_request(req);
 					continue;
 				} else
 					osm_info("transfer error\n");
@@ -922,8 +917,10 @@
 				blk_stop_queue(q);
 				break;
 			}
-		} else
-			end_request(req, 0);
+		} else {
+			blk_start_request(req);
+			__blk_end_request_all(req, -EIO);
+		}
 	}
 };
 
@@ -1082,7 +1079,7 @@
 	 */
 	if (!i2o_parm_field_get(i2o_dev, 0x0004, 1, &blocksize, 4) ||
 	    !i2o_parm_field_get(i2o_dev, 0x0000, 3, &blocksize, 4)) {
-		blk_queue_hardsect_size(queue, le32_to_cpu(blocksize));
+		blk_queue_logical_block_size(queue, le32_to_cpu(blocksize));
 	} else
 		osm_warn("unable to get blocksize of %s\n", gd->disk_name);
 
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index b25e9b6..98ffc41e 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -243,7 +243,7 @@
 		brq.mrq.cmd = &brq.cmd;
 		brq.mrq.data = &brq.data;
 
-		brq.cmd.arg = req->sector;
+		brq.cmd.arg = blk_rq_pos(req);
 		if (!mmc_card_blockaddr(card))
 			brq.cmd.arg <<= 9;
 		brq.cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
@@ -251,7 +251,7 @@
 		brq.stop.opcode = MMC_STOP_TRANSMISSION;
 		brq.stop.arg = 0;
 		brq.stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
-		brq.data.blocks = req->nr_sectors;
+		brq.data.blocks = blk_rq_sectors(req);
 
 		/*
 		 * The block layer doesn't support all sector count
@@ -301,7 +301,7 @@
 		 * Adjust the sg list so it is the same size as the
 		 * request.
 		 */
-		if (brq.data.blocks != req->nr_sectors) {
+		if (brq.data.blocks != blk_rq_sectors(req)) {
 			int i, data_size = brq.data.blocks << 9;
 			struct scatterlist *sg;
 
@@ -352,8 +352,8 @@
 			printk(KERN_ERR "%s: error %d transferring data,"
 			       " sector %u, nr %u, card status %#x\n",
 			       req->rq_disk->disk_name, brq.data.error,
-			       (unsigned)req->sector,
-			       (unsigned)req->nr_sectors, status);
+			       (unsigned)blk_rq_pos(req),
+			       (unsigned)blk_rq_sectors(req), status);
 		}
 
 		if (brq.stop.error) {
@@ -521,7 +521,7 @@
 
 	sprintf(md->disk->disk_name, "mmcblk%d", devidx);
 
-	blk_queue_hardsect_size(md->queue.queue, 512);
+	blk_queue_logical_block_size(md->queue.queue, 512);
 
 	if (!mmc_card_sd(card) && mmc_card_blockaddr(card)) {
 		/*
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 7a72e75..49e5823 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -55,7 +55,7 @@
 		spin_lock_irq(q->queue_lock);
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (!blk_queue_plugged(q))
-			req = elv_next_request(q);
+			req = blk_fetch_request(q);
 		mq->req = req;
 		spin_unlock_irq(q->queue_lock);
 
@@ -88,16 +88,11 @@
 {
 	struct mmc_queue *mq = q->queuedata;
 	struct request *req;
-	int ret;
 
 	if (!mq) {
 		printk(KERN_ERR "MMC: killing requests for dead queue\n");
-		while ((req = elv_next_request(q)) != NULL) {
-			do {
-				ret = __blk_end_request(req, -EIO,
-							blk_rq_cur_bytes(req));
-			} while (ret);
-		}
+		while ((req = blk_fetch_request(q)) != NULL)
+			__blk_end_request_all(req, -EIO);
 		return;
 	}
 
diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index c643d0f..b56d72f 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -64,6 +64,31 @@
 	unsigned int tmout;
 	int tmout_index;
 
+	/*
+	 * Hardware weirdness.  The FIFO_EMPTY bit of the HW_STATE
+	 * register is sometimes not set before a while when some
+	 * "unusual" data block sizes are used (such as with the SWITCH
+	 * command), even despite the fact that the XFER_DONE interrupt
+	 * was raised.  And if another data transfer starts before
+	 * this bit comes to good sense (which eventually happens by
+	 * itself) then the new transfer simply fails with a timeout.
+	 */
+	if (!(mvsd_read(MVSD_HW_STATE) & (1 << 13))) {
+		unsigned long t = jiffies + HZ;
+		unsigned int hw_state,  count = 0;
+		do {
+			if (time_after(jiffies, t)) {
+				dev_warn(host->dev, "FIFO_EMPTY bit missing\n");
+				break;
+			}
+			hw_state = mvsd_read(MVSD_HW_STATE);
+			count++;
+		} while (!(hw_state & (1 << 13)));
+		dev_dbg(host->dev, "*** wait for FIFO_EMPTY bit "
+				   "(hw=0x%04x, count=%d, jiffies=%ld)\n",
+				   hw_state, count, jiffies - (t - HZ));
+	}
+
 	/* If timeout=0 then maximum timeout index is used. */
 	tmout = DIV_ROUND_UP(data->timeout_ns, host->ns_per_clk);
 	tmout += data->timeout_clks;
@@ -620,9 +645,18 @@
 	if (ios->bus_width == MMC_BUS_WIDTH_4)
 		ctrl_reg |= MVSD_HOST_CTRL_DATA_WIDTH_4_BITS;
 
+	/*
+	 * The HI_SPEED_EN bit is causing trouble with many (but not all)
+	 * high speed SD, SDHC and SDIO cards.  Not enabling that bit
+	 * makes all cards work.  So let's just ignore that bit for now
+	 * and revisit this issue if problems for not enabling this bit
+	 * are ever reported.
+	 */
+#if 0
 	if (ios->timing == MMC_TIMING_MMC_HS ||
 	    ios->timing == MMC_TIMING_SD_HS)
 		ctrl_reg |= MVSD_HOST_CTRL_HI_SPEED_EN;
+#endif
 
 	host->ctrl = ctrl_reg;
 	mvsd_write(MVSD_HOST_CTRL, ctrl_reg);
@@ -882,3 +916,4 @@
 MODULE_AUTHOR("Maen Suleiman, Nicolas Pitre");
 MODULE_DESCRIPTION("Marvell MMC,SD,SDIO Host Controller driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:mvsdio");
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index b4a615c..f4cbe47 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -140,6 +140,8 @@
 	struct work_struct	datawork;
 };
 
+static void mxcmci_set_clk_rate(struct mxcmci_host *host, unsigned int clk_ios);
+
 static inline int mxcmci_use_dma(struct mxcmci_host *host)
 {
 	return host->do_dma;
@@ -160,7 +162,7 @@
 	writew(0xff, host->base + MMC_REG_RES_TO);
 }
 
-static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
+static int mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
 {
 	unsigned int nob = data->blocks;
 	unsigned int blksz = data->blksz;
@@ -168,6 +170,7 @@
 #ifdef HAS_DMA
 	struct scatterlist *sg;
 	int i;
+	int ret;
 #endif
 	if (data->flags & MMC_DATA_STREAM)
 		nob = 0xffff;
@@ -183,7 +186,7 @@
 	for_each_sg(data->sg, sg, data->sg_len, i) {
 		if (sg->offset & 3 || sg->length & 3) {
 			host->do_dma = 0;
-			return;
+			return 0;
 		}
 	}
 
@@ -192,23 +195,30 @@
 		host->dma_nents = dma_map_sg(mmc_dev(host->mmc), data->sg,
 					     data->sg_len,  host->dma_dir);
 
-		imx_dma_setup_sg(host->dma, data->sg, host->dma_nents, datasize,
-				 host->res->start + MMC_REG_BUFFER_ACCESS,
-				 DMA_MODE_READ);
+		ret = imx_dma_setup_sg(host->dma, data->sg, host->dma_nents,
+				datasize,
+				host->res->start + MMC_REG_BUFFER_ACCESS,
+				DMA_MODE_READ);
 	} else {
 		host->dma_dir = DMA_TO_DEVICE;
 		host->dma_nents = dma_map_sg(mmc_dev(host->mmc), data->sg,
 					     data->sg_len,  host->dma_dir);
 
-		imx_dma_setup_sg(host->dma, data->sg, host->dma_nents, datasize,
-				 host->res->start + MMC_REG_BUFFER_ACCESS,
-				 DMA_MODE_WRITE);
+		ret = imx_dma_setup_sg(host->dma, data->sg, host->dma_nents,
+				datasize,
+				host->res->start + MMC_REG_BUFFER_ACCESS,
+				DMA_MODE_WRITE);
 	}
 
+	if (ret) {
+		dev_err(mmc_dev(host->mmc), "failed to setup DMA : %d\n", ret);
+		return ret;
+	}
 	wmb();
 
 	imx_dma_enable(host->dma);
 #endif /* HAS_DMA */
+	return 0;
 }
 
 static int mxcmci_start_cmd(struct mxcmci_host *host, struct mmc_command *cmd,
@@ -345,8 +355,11 @@
 		stat = readl(host->base + MMC_REG_STATUS);
 		if (stat & STATUS_ERR_MASK)
 			return stat;
-		if (time_after(jiffies, timeout))
+		if (time_after(jiffies, timeout)) {
+			mxcmci_softreset(host);
+			mxcmci_set_clk_rate(host, host->clock);
 			return STATUS_TIME_OUT_READ;
+		}
 		if (stat & mask)
 			return 0;
 		cpu_relax();
@@ -531,6 +544,7 @@
 {
 	struct mxcmci_host *host = mmc_priv(mmc);
 	unsigned int cmdat = host->cmdat;
+	int error;
 
 	WARN_ON(host->req != NULL);
 
@@ -540,7 +554,12 @@
 	host->do_dma = 1;
 #endif
 	if (req->data) {
-		mxcmci_setup_data(host, req->data);
+		error = mxcmci_setup_data(host, req->data);
+		if (error) {
+			req->cmd->error = error;
+			goto out;
+		}
+
 
 		cmdat |= CMD_DAT_CONT_DATA_ENABLE;
 
@@ -548,7 +567,9 @@
 			cmdat |= CMD_DAT_CONT_WRITE;
 	}
 
-	if (mxcmci_start_cmd(host, req->cmd, cmdat))
+	error = mxcmci_start_cmd(host, req->cmd, cmdat);
+out:
+	if (error)
 		mxcmci_finish_request(host, req);
 }
 
@@ -724,7 +745,9 @@
 		goto out_clk_put;
 	}
 
-	mmc->f_min = clk_get_rate(host->clk) >> 7;
+	mmc->f_min = clk_get_rate(host->clk) >> 16;
+	if (mmc->f_min < 400000)
+		mmc->f_min = 400000;
 	mmc->f_max = clk_get_rate(host->clk) >> 1;
 
 	/* recommended in data sheet */
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index bfa25c0..dceb5ee 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -822,7 +822,7 @@
 		del_timer(&host->cmd_abort_timer);
 		host->abort = 1;
 		OMAP_MMC_WRITE(host, IE, 0);
-		disable_irq(host->irq);
+		disable_irq_nosync(host->irq);
 		schedule_work(&host->cmd_abort_work);
 		return IRQ_HANDLED;
 	}
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index e62a22a..c40cb96 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -680,7 +680,7 @@
 	host->dma_ch = -1;
 	/*
 	 * DMA Callback: run in interrupt context.
-	 * mutex_unlock will through a kernel warning if used.
+	 * mutex_unlock will throw a kernel warning if used.
 	 */
 	up(&host->sem);
 }
diff --git a/drivers/mmc/host/sdhci-of.c b/drivers/mmc/host/sdhci-of.c
index 3ff4ac3..128c614 100644
--- a/drivers/mmc/host/sdhci-of.c
+++ b/drivers/mmc/host/sdhci-of.c
@@ -55,7 +55,13 @@
 
 static u16 esdhc_readw(struct sdhci_host *host, int reg)
 {
-	return in_be16(host->ioaddr + (reg ^ 0x2));
+	u16 ret;
+
+	if (unlikely(reg == SDHCI_HOST_VERSION))
+		ret = in_be16(host->ioaddr + reg);
+	else
+		ret = in_be16(host->ioaddr + (reg ^ 0x2));
+	return ret;
 }
 
 static u8 esdhc_readb(struct sdhci_host *host, int reg)
@@ -277,6 +283,7 @@
 static const struct of_device_id sdhci_of_match[] = {
 	{ .compatible = "fsl,mpc8379-esdhc", .data = &sdhci_esdhc, },
 	{ .compatible = "fsl,mpc8536-esdhc", .data = &sdhci_esdhc, },
+	{ .compatible = "fsl,esdhc", .data = &sdhci_esdhc, },
 	{ .compatible = "generic-sdhci", },
 	{},
 };
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index a49a9c8..aaac3b6 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -47,40 +47,41 @@
 	unsigned long block, nsect;
 	char *buf;
 
-	block = req->sector << 9 >> tr->blkshift;
-	nsect = req->current_nr_sectors << 9 >> tr->blkshift;
+	block = blk_rq_pos(req) << 9 >> tr->blkshift;
+	nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
 
 	buf = req->buffer;
 
 	if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
 	    req->cmd[0] == REQ_LB_OP_DISCARD)
-		return !tr->discard(dev, block, nsect);
+		return tr->discard(dev, block, nsect);
 
 	if (!blk_fs_request(req))
-		return 0;
+		return -EIO;
 
-	if (req->sector + req->current_nr_sectors > get_capacity(req->rq_disk))
-		return 0;
+	if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
+	    get_capacity(req->rq_disk))
+		return -EIO;
 
 	switch(rq_data_dir(req)) {
 	case READ:
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->readsect(dev, block, buf))
-				return 0;
-		return 1;
+				return -EIO;
+		return 0;
 
 	case WRITE:
 		if (!tr->writesect)
-			return 0;
+			return -EIO;
 
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->writesect(dev, block, buf))
-				return 0;
-		return 1;
+				return -EIO;
+		return 0;
 
 	default:
 		printk(KERN_NOTICE "Unknown request %u\n", rq_data_dir(req));
-		return 0;
+		return -EIO;
 	}
 }
 
@@ -88,19 +89,18 @@
 {
 	struct mtd_blktrans_ops *tr = arg;
 	struct request_queue *rq = tr->blkcore_priv->rq;
+	struct request *req = NULL;
 
 	/* we might get involved when memory gets low, so use PF_MEMALLOC */
 	current->flags |= PF_MEMALLOC;
 
 	spin_lock_irq(rq->queue_lock);
+
 	while (!kthread_should_stop()) {
-		struct request *req;
 		struct mtd_blktrans_dev *dev;
-		int res = 0;
+		int res;
 
-		req = elv_next_request(rq);
-
-		if (!req) {
+		if (!req && !(req = blk_fetch_request(rq))) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			spin_unlock_irq(rq->queue_lock);
 			schedule();
@@ -119,8 +119,13 @@
 
 		spin_lock_irq(rq->queue_lock);
 
-		end_request(req, res);
+		if (!__blk_end_request_cur(req, res))
+			req = NULL;
 	}
+
+	if (req)
+		__blk_end_request_all(req, -EIO);
+
 	spin_unlock_irq(rq->queue_lock);
 
 	return 0;
@@ -373,7 +378,7 @@
 	}
 
 	tr->blkcore_priv->rq->queuedata = tr;
-	blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize);
+	blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize);
 	if (tr->discard)
 		blk_queue_set_discard(tr->blkcore_priv->rq,
 				      blktrans_discard_request);
diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index 0119220..02700f7 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c
@@ -407,16 +407,17 @@
 	}
 	info->chip.ecc.mode = ecc_mode;
 
-	info->clk = clk_get(&pdev->dev, "AEMIFCLK");
+	info->clk = clk_get(&pdev->dev, "aemif");
 	if (IS_ERR(info->clk)) {
 		ret = PTR_ERR(info->clk);
-		dev_dbg(&pdev->dev, "unable to get AEMIFCLK, err %d\n", ret);
+		dev_dbg(&pdev->dev, "unable to get AEMIF clock, err %d\n", ret);
 		goto err_clk;
 	}
 
 	ret = clk_enable(info->clk);
 	if (ret < 0) {
-		dev_dbg(&pdev->dev, "unable to enable AEMIFCLK, err %d\n", ret);
+		dev_dbg(&pdev->dev, "unable to enable AEMIF clock, err %d\n",
+			ret);
 		goto err_clk_enable;
 	}
 
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 214a92d..f3c4a3b 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2264,6 +2264,17 @@
 	  To compile this driver as a module, choose M here: the module
 	  will be called bnx2.  This is recommended.
 
+config CNIC
+	tristate "Broadcom CNIC support"
+	depends on BNX2
+	depends on UIO
+	help
+	  This driver supports offload features of Broadcom NetXtremeII
+	  gigabit Ethernet cards.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called cnic.  This is recommended.
+
 config SPIDER_NET
 	tristate "Spider Gigabit Ethernet driver"
 	depends on PCI && (PPC_IBM_CELL_BLADE || PPC_CELLEB)
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index a1c25cb..db30ebd 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -73,6 +73,7 @@
 obj-$(CONFIG_FEALNX) += fealnx.o
 obj-$(CONFIG_TIGON3) += tg3.o
 obj-$(CONFIG_BNX2) += bnx2.o
+obj-$(CONFIG_CNIC) += cnic.o
 obj-$(CONFIG_BNX2X) += bnx2x.o
 bnx2x-objs := bnx2x_main.o bnx2x_link.o
 spidernet-y += spider_net.o spider_net_ethtool.o
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index b0cb29d..3f5fcb0 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -49,6 +49,10 @@
 #include <linux/firmware.h>
 #include <linux/log2.h>
 
+#if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE)
+#define BCM_CNIC 1
+#include "cnic_if.h"
+#endif
 #include "bnx2.h"
 #include "bnx2_fw.h"
 
@@ -315,6 +319,158 @@
 	spin_unlock_bh(&bp->indirect_lock);
 }
 
+#ifdef BCM_CNIC
+static int
+bnx2_drv_ctl(struct net_device *dev, struct drv_ctl_info *info)
+{
+	struct bnx2 *bp = netdev_priv(dev);
+	struct drv_ctl_io *io = &info->data.io;
+
+	switch (info->cmd) {
+	case DRV_CTL_IO_WR_CMD:
+		bnx2_reg_wr_ind(bp, io->offset, io->data);
+		break;
+	case DRV_CTL_IO_RD_CMD:
+		io->data = bnx2_reg_rd_ind(bp, io->offset);
+		break;
+	case DRV_CTL_CTX_WR_CMD:
+		bnx2_ctx_wr(bp, io->cid_addr, io->offset, io->data);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void bnx2_setup_cnic_irq_info(struct bnx2 *bp)
+{
+	struct cnic_eth_dev *cp = &bp->cnic_eth_dev;
+	struct bnx2_napi *bnapi = &bp->bnx2_napi[0];
+	int sb_id;
+
+	if (bp->flags & BNX2_FLAG_USING_MSIX) {
+		cp->drv_state |= CNIC_DRV_STATE_USING_MSIX;
+		bnapi->cnic_present = 0;
+		sb_id = bp->irq_nvecs;
+		cp->irq_arr[0].irq_flags |= CNIC_IRQ_FL_MSIX;
+	} else {
+		cp->drv_state &= ~CNIC_DRV_STATE_USING_MSIX;
+		bnapi->cnic_tag = bnapi->last_status_idx;
+		bnapi->cnic_present = 1;
+		sb_id = 0;
+		cp->irq_arr[0].irq_flags &= ~CNIC_IRQ_FL_MSIX;
+	}
+
+	cp->irq_arr[0].vector = bp->irq_tbl[sb_id].vector;
+	cp->irq_arr[0].status_blk = (void *)
+		((unsigned long) bnapi->status_blk.msi +
+		(BNX2_SBLK_MSIX_ALIGN_SIZE * sb_id));
+	cp->irq_arr[0].status_blk_num = sb_id;
+	cp->num_irq = 1;
+}
+
+static int bnx2_register_cnic(struct net_device *dev, struct cnic_ops *ops,
+			      void *data)
+{
+	struct bnx2 *bp = netdev_priv(dev);
+	struct cnic_eth_dev *cp = &bp->cnic_eth_dev;
+
+	if (ops == NULL)
+		return -EINVAL;
+
+	if (cp->drv_state & CNIC_DRV_STATE_REGD)
+		return -EBUSY;
+
+	bp->cnic_data = data;
+	rcu_assign_pointer(bp->cnic_ops, ops);
+
+	cp->num_irq = 0;
+	cp->drv_state = CNIC_DRV_STATE_REGD;
+
+	bnx2_setup_cnic_irq_info(bp);
+
+	return 0;
+}
+
+static int bnx2_unregister_cnic(struct net_device *dev)
+{
+	struct bnx2 *bp = netdev_priv(dev);
+	struct bnx2_napi *bnapi = &bp->bnx2_napi[0];
+	struct cnic_eth_dev *cp = &bp->cnic_eth_dev;
+
+	cp->drv_state = 0;
+	bnapi->cnic_present = 0;
+	rcu_assign_pointer(bp->cnic_ops, NULL);
+	synchronize_rcu();
+	return 0;
+}
+
+struct cnic_eth_dev *bnx2_cnic_probe(struct net_device *dev)
+{
+	struct bnx2 *bp = netdev_priv(dev);
+	struct cnic_eth_dev *cp = &bp->cnic_eth_dev;
+
+	cp->drv_owner = THIS_MODULE;
+	cp->chip_id = bp->chip_id;
+	cp->pdev = bp->pdev;
+	cp->io_base = bp->regview;
+	cp->drv_ctl = bnx2_drv_ctl;
+	cp->drv_register_cnic = bnx2_register_cnic;
+	cp->drv_unregister_cnic = bnx2_unregister_cnic;
+
+	return cp;
+}
+EXPORT_SYMBOL(bnx2_cnic_probe);
+
+static void
+bnx2_cnic_stop(struct bnx2 *bp)
+{
+	struct cnic_ops *c_ops;
+	struct cnic_ctl_info info;
+
+	rcu_read_lock();
+	c_ops = rcu_dereference(bp->cnic_ops);
+	if (c_ops) {
+		info.cmd = CNIC_CTL_STOP_CMD;
+		c_ops->cnic_ctl(bp->cnic_data, &info);
+	}
+	rcu_read_unlock();
+}
+
+static void
+bnx2_cnic_start(struct bnx2 *bp)
+{
+	struct cnic_ops *c_ops;
+	struct cnic_ctl_info info;
+
+	rcu_read_lock();
+	c_ops = rcu_dereference(bp->cnic_ops);
+	if (c_ops) {
+		if (!(bp->flags & BNX2_FLAG_USING_MSIX)) {
+			struct bnx2_napi *bnapi = &bp->bnx2_napi[0];
+
+			bnapi->cnic_tag = bnapi->last_status_idx;
+		}
+		info.cmd = CNIC_CTL_START_CMD;
+		c_ops->cnic_ctl(bp->cnic_data, &info);
+	}
+	rcu_read_unlock();
+}
+
+#else
+
+static void
+bnx2_cnic_stop(struct bnx2 *bp)
+{
+}
+
+static void
+bnx2_cnic_start(struct bnx2 *bp)
+{
+}
+
+#endif
+
 static int
 bnx2_read_phy(struct bnx2 *bp, u32 reg, u32 *val)
 {
@@ -488,6 +644,7 @@
 static void
 bnx2_netif_stop(struct bnx2 *bp)
 {
+	bnx2_cnic_stop(bp);
 	bnx2_disable_int_sync(bp);
 	if (netif_running(bp->dev)) {
 		bnx2_napi_disable(bp);
@@ -504,6 +661,7 @@
 			netif_tx_wake_all_queues(bp->dev);
 			bnx2_napi_enable(bp);
 			bnx2_enable_int(bp);
+			bnx2_cnic_start(bp);
 		}
 	}
 }
@@ -3164,6 +3322,11 @@
 	if (bnx2_has_fast_work(bnapi))
 		return 1;
 
+#ifdef BCM_CNIC
+	if (bnapi->cnic_present && (bnapi->cnic_tag != sblk->status_idx))
+		return 1;
+#endif
+
 	if ((sblk->status_attn_bits & STATUS_ATTN_EVENTS) !=
 	    (sblk->status_attn_bits_ack & STATUS_ATTN_EVENTS))
 		return 1;
@@ -3193,6 +3356,23 @@
 	bp->idle_chk_status_idx = bnapi->last_status_idx;
 }
 
+#ifdef BCM_CNIC
+static void bnx2_poll_cnic(struct bnx2 *bp, struct bnx2_napi *bnapi)
+{
+	struct cnic_ops *c_ops;
+
+	if (!bnapi->cnic_present)
+		return;
+
+	rcu_read_lock();
+	c_ops = rcu_dereference(bp->cnic_ops);
+	if (c_ops)
+		bnapi->cnic_tag = c_ops->cnic_handler(bp->cnic_data,
+						      bnapi->status_blk.msi);
+	rcu_read_unlock();
+}
+#endif
+
 static void bnx2_poll_link(struct bnx2 *bp, struct bnx2_napi *bnapi)
 {
 	struct status_block *sblk = bnapi->status_blk.msi;
@@ -3267,6 +3447,10 @@
 
 		work_done = bnx2_poll_work(bp, bnapi, work_done, budget);
 
+#ifdef BCM_CNIC
+		bnx2_poll_cnic(bp, bnapi);
+#endif
+
 		/* bnapi->last_status_idx is used below to tell the hw how
 		 * much work has been processed, so we must read it before
 		 * checking for more work.
@@ -4632,8 +4816,11 @@
 	val = REG_RD(bp, BNX2_MQ_CONFIG);
 	val &= ~BNX2_MQ_CONFIG_KNL_BYP_BLK_SIZE;
 	val |= BNX2_MQ_CONFIG_KNL_BYP_BLK_SIZE_256;
-	if (CHIP_ID(bp) == CHIP_ID_5709_A0 || CHIP_ID(bp) == CHIP_ID_5709_A1)
-		val |= BNX2_MQ_CONFIG_HALT_DIS;
+	if (CHIP_NUM(bp) == CHIP_NUM_5709) {
+		val |= BNX2_MQ_CONFIG_BIN_MQ_MODE;
+		if (CHIP_REV(bp) == CHIP_REV_Ax)
+			val |= BNX2_MQ_CONFIG_HALT_DIS;
+	}
 
 	REG_WR(bp, BNX2_MQ_CONFIG, val);
 
@@ -7471,7 +7658,7 @@
 	INIT_WORK(&bp->reset_task, bnx2_reset_task);
 
 	dev->base_addr = dev->mem_start = pci_resource_start(pdev, 0);
-	mem_len = MB_GET_CID_ADDR(TX_TSS_CID + TX_MAX_TSS_RINGS);
+	mem_len = MB_GET_CID_ADDR(TX_TSS_CID + TX_MAX_TSS_RINGS + 1);
 	dev->mem_end = dev->mem_start + mem_len;
 	dev->irq = pdev->irq;
 
diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h
index 5b570e1..a1ff739 100644
--- a/drivers/net/bnx2.h
+++ b/drivers/net/bnx2.h
@@ -361,6 +361,9 @@
 #define BNX2_L2CTX_CTX_TYPE_CTX_BD_CHN_TYPE_VALUE	 (1<<28)
 
 #define BNX2_L2CTX_HOST_BDIDX				0x00000004
+#define BNX2_L2CTX_STATUSB_NUM_SHIFT			 16
+#define BNX2_L2CTX_STATUSB_NUM(sb_id)			 \
+	(((sb_id) > 0) ? (((sb_id) + 7) << BNX2_L2CTX_STATUSB_NUM_SHIFT) : 0)
 #define BNX2_L2CTX_HOST_BSEQ				0x00000008
 #define BNX2_L2CTX_NX_BSEQ				0x0000000c
 #define BNX2_L2CTX_NX_BDHADDR_HI			0x00000010
@@ -5900,6 +5903,7 @@
 #define BNX2_RXP_FTQ_CTL_CUR_DEPTH			 (0x3ffL<<22)
 
 #define BNX2_RXP_SCRATCH				0x000e0000
+#define BNX2_RXP_SCRATCH_RXP_FLOOD			 0x000e0024
 #define BNX2_RXP_SCRATCH_RSS_TBL_SZ			 0x000e0038
 #define BNX2_RXP_SCRATCH_RSS_TBL			 0x000e003c
 #define BNX2_RXP_SCRATCH_RSS_TBL_MAX_ENTRIES		 128
@@ -6678,6 +6682,11 @@
 	u32 			last_status_idx;
 	u32			int_num;
 
+#ifdef BCM_CNIC
+	u32			cnic_tag;
+	int			cnic_present;
+#endif
+
 	struct bnx2_rx_ring_info	rx_ring;
 	struct bnx2_tx_ring_info	tx_ring;
 };
@@ -6727,6 +6736,11 @@
 	int		tx_ring_size;
 	u32		tx_wake_thresh;
 
+#ifdef BCM_CNIC
+	struct cnic_ops		*cnic_ops;
+	void			*cnic_data;
+#endif
+
 	/* End of fields used in the performance code paths. */
 
 	unsigned int		current_interval;
@@ -6885,6 +6899,10 @@
 
 	u32			idle_chk_status_idx;
 
+#ifdef BCM_CNIC
+	struct cnic_eth_dev	cnic_eth_dev;
+#endif
+
 	const struct firmware	*mips_firmware;
 	const struct firmware	*rv2p_firmware;
 };
diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c
new file mode 100644
index 0000000..8d74037
--- /dev/null
+++ b/drivers/net/cnic.c
@@ -0,0 +1,2711 @@
+/* cnic.c: Broadcom CNIC core network driver.
+ *
+ * Copyright (c) 2006-2009 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Original skeleton written by: John(Zongxi) Chen (zongxi@broadcom.com)
+ * Modified and maintained by: Michael Chan <mchan@broadcom.com>
+ */
+
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/uio_driver.h>
+#include <linux/in.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#define BCM_VLAN 1
+#endif
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/route.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <scsi/iscsi_if.h>
+
+#include "cnic_if.h"
+#include "bnx2.h"
+#include "cnic.h"
+#include "cnic_defs.h"
+
+#define DRV_MODULE_NAME		"cnic"
+#define PFX DRV_MODULE_NAME	": "
+
+static char version[] __devinitdata =
+	"Broadcom NetXtreme II CNIC Driver " DRV_MODULE_NAME " v" CNIC_MODULE_VERSION " (" CNIC_MODULE_RELDATE ")\n";
+
+MODULE_AUTHOR("Michael Chan <mchan@broadcom.com> and John(Zongxi) "
+	      "Chen (zongxi@broadcom.com");
+MODULE_DESCRIPTION("Broadcom NetXtreme II CNIC Driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(CNIC_MODULE_VERSION);
+
+static LIST_HEAD(cnic_dev_list);
+static DEFINE_RWLOCK(cnic_dev_lock);
+static DEFINE_MUTEX(cnic_lock);
+
+static struct cnic_ulp_ops *cnic_ulp_tbl[MAX_CNIC_ULP_TYPE];
+
+static int cnic_service_bnx2(void *, void *);
+static int cnic_ctl(void *, struct cnic_ctl_info *);
+
+static struct cnic_ops cnic_bnx2_ops = {
+	.cnic_owner	= THIS_MODULE,
+	.cnic_handler	= cnic_service_bnx2,
+	.cnic_ctl	= cnic_ctl,
+};
+
+static void cnic_shutdown_bnx2_rx_ring(struct cnic_dev *);
+static void cnic_init_bnx2_tx_ring(struct cnic_dev *);
+static void cnic_init_bnx2_rx_ring(struct cnic_dev *);
+static int cnic_cm_set_pg(struct cnic_sock *);
+
+static int cnic_uio_open(struct uio_info *uinfo, struct inode *inode)
+{
+	struct cnic_dev *dev = uinfo->priv;
+	struct cnic_local *cp = dev->cnic_priv;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (cp->uio_dev != -1)
+		return -EBUSY;
+
+	cp->uio_dev = iminor(inode);
+
+	cnic_shutdown_bnx2_rx_ring(dev);
+
+	cnic_init_bnx2_tx_ring(dev);
+	cnic_init_bnx2_rx_ring(dev);
+
+	return 0;
+}
+
+static int cnic_uio_close(struct uio_info *uinfo, struct inode *inode)
+{
+	struct cnic_dev *dev = uinfo->priv;
+	struct cnic_local *cp = dev->cnic_priv;
+
+	cp->uio_dev = -1;
+	return 0;
+}
+
+static inline void cnic_hold(struct cnic_dev *dev)
+{
+	atomic_inc(&dev->ref_count);
+}
+
+static inline void cnic_put(struct cnic_dev *dev)
+{
+	atomic_dec(&dev->ref_count);
+}
+
+static inline void csk_hold(struct cnic_sock *csk)
+{
+	atomic_inc(&csk->ref_count);
+}
+
+static inline void csk_put(struct cnic_sock *csk)
+{
+	atomic_dec(&csk->ref_count);
+}
+
+static struct cnic_dev *cnic_from_netdev(struct net_device *netdev)
+{
+	struct cnic_dev *cdev;
+
+	read_lock(&cnic_dev_lock);
+	list_for_each_entry(cdev, &cnic_dev_list, list) {
+		if (netdev == cdev->netdev) {
+			cnic_hold(cdev);
+			read_unlock(&cnic_dev_lock);
+			return cdev;
+		}
+	}
+	read_unlock(&cnic_dev_lock);
+	return NULL;
+}
+
+static void cnic_ctx_wr(struct cnic_dev *dev, u32 cid_addr, u32 off, u32 val)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+	struct drv_ctl_info info;
+	struct drv_ctl_io *io = &info.data.io;
+
+	info.cmd = DRV_CTL_CTX_WR_CMD;
+	io->cid_addr = cid_addr;
+	io->offset = off;
+	io->data = val;
+	ethdev->drv_ctl(dev->netdev, &info);
+}
+
+static void cnic_reg_wr_ind(struct cnic_dev *dev, u32 off, u32 val)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+	struct drv_ctl_info info;
+	struct drv_ctl_io *io = &info.data.io;
+
+	info.cmd = DRV_CTL_IO_WR_CMD;
+	io->offset = off;
+	io->data = val;
+	ethdev->drv_ctl(dev->netdev, &info);
+}
+
+static u32 cnic_reg_rd_ind(struct cnic_dev *dev, u32 off)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+	struct drv_ctl_info info;
+	struct drv_ctl_io *io = &info.data.io;
+
+	info.cmd = DRV_CTL_IO_RD_CMD;
+	io->offset = off;
+	ethdev->drv_ctl(dev->netdev, &info);
+	return io->data;
+}
+
+static int cnic_in_use(struct cnic_sock *csk)
+{
+	return test_bit(SK_F_INUSE, &csk->flags);
+}
+
+static void cnic_kwq_completion(struct cnic_dev *dev, u32 count)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+	struct drv_ctl_info info;
+
+	info.cmd = DRV_CTL_COMPLETION_CMD;
+	info.data.comp.comp_count = count;
+	ethdev->drv_ctl(dev->netdev, &info);
+}
+
+static int cnic_send_nlmsg(struct cnic_local *cp, u32 type,
+			   struct cnic_sock *csk)
+{
+	struct iscsi_path path_req;
+	char *buf = NULL;
+	u16 len = 0;
+	u32 msg_type = ISCSI_KEVENT_IF_DOWN;
+	struct cnic_ulp_ops *ulp_ops;
+
+	if (cp->uio_dev == -1)
+		return -ENODEV;
+
+	if (csk) {
+		len = sizeof(path_req);
+		buf = (char *) &path_req;
+		memset(&path_req, 0, len);
+
+		msg_type = ISCSI_KEVENT_PATH_REQ;
+		path_req.handle = (u64) csk->l5_cid;
+		if (test_bit(SK_F_IPV6, &csk->flags)) {
+			memcpy(&path_req.dst.v6_addr, &csk->dst_ip[0],
+			       sizeof(struct in6_addr));
+			path_req.ip_addr_len = 16;
+		} else {
+			memcpy(&path_req.dst.v4_addr, &csk->dst_ip[0],
+			       sizeof(struct in_addr));
+			path_req.ip_addr_len = 4;
+		}
+		path_req.vlan_id = csk->vlan_id;
+		path_req.pmtu = csk->mtu;
+	}
+
+	rcu_read_lock();
+	ulp_ops = rcu_dereference(cp->ulp_ops[CNIC_ULP_ISCSI]);
+	if (ulp_ops)
+		ulp_ops->iscsi_nl_send_msg(cp->dev, msg_type, buf, len);
+	rcu_read_unlock();
+	return 0;
+}
+
+static int cnic_iscsi_nl_msg_recv(struct cnic_dev *dev, u32 msg_type,
+				  char *buf, u16 len)
+{
+	int rc = -EINVAL;
+
+	switch (msg_type) {
+	case ISCSI_UEVENT_PATH_UPDATE: {
+		struct cnic_local *cp;
+		u32 l5_cid;
+		struct cnic_sock *csk;
+		struct iscsi_path *path_resp;
+
+		if (len < sizeof(*path_resp))
+			break;
+
+		path_resp = (struct iscsi_path *) buf;
+		cp = dev->cnic_priv;
+		l5_cid = (u32) path_resp->handle;
+		if (l5_cid >= MAX_CM_SK_TBL_SZ)
+			break;
+
+		csk = &cp->csk_tbl[l5_cid];
+		csk_hold(csk);
+		if (cnic_in_use(csk)) {
+			memcpy(csk->ha, path_resp->mac_addr, 6);
+			if (test_bit(SK_F_IPV6, &csk->flags))
+				memcpy(&csk->src_ip[0], &path_resp->src.v6_addr,
+				       sizeof(struct in6_addr));
+			else
+				memcpy(&csk->src_ip[0], &path_resp->src.v4_addr,
+				       sizeof(struct in_addr));
+			if (is_valid_ether_addr(csk->ha))
+				cnic_cm_set_pg(csk);
+		}
+		csk_put(csk);
+		rc = 0;
+	}
+	}
+
+	return rc;
+}
+
+static int cnic_offld_prep(struct cnic_sock *csk)
+{
+	if (test_and_set_bit(SK_F_OFFLD_SCHED, &csk->flags))
+		return 0;
+
+	if (!test_bit(SK_F_CONNECT_START, &csk->flags)) {
+		clear_bit(SK_F_OFFLD_SCHED, &csk->flags);
+		return 0;
+	}
+
+	return 1;
+}
+
+static int cnic_close_prep(struct cnic_sock *csk)
+{
+	clear_bit(SK_F_CONNECT_START, &csk->flags);
+	smp_mb__after_clear_bit();
+
+	if (test_and_clear_bit(SK_F_OFFLD_COMPLETE, &csk->flags)) {
+		while (test_and_set_bit(SK_F_OFFLD_SCHED, &csk->flags))
+			msleep(1);
+
+		return 1;
+	}
+	return 0;
+}
+
+static int cnic_abort_prep(struct cnic_sock *csk)
+{
+	clear_bit(SK_F_CONNECT_START, &csk->flags);
+	smp_mb__after_clear_bit();
+
+	while (test_and_set_bit(SK_F_OFFLD_SCHED, &csk->flags))
+		msleep(1);
+
+	if (test_and_clear_bit(SK_F_OFFLD_COMPLETE, &csk->flags)) {
+		csk->state = L4_KCQE_OPCODE_VALUE_RESET_COMP;
+		return 1;
+	}
+
+	return 0;
+}
+
+int cnic_register_driver(int ulp_type, struct cnic_ulp_ops *ulp_ops)
+{
+	struct cnic_dev *dev;
+
+	if (ulp_type >= MAX_CNIC_ULP_TYPE) {
+		printk(KERN_ERR PFX "cnic_register_driver: Bad type %d\n",
+		       ulp_type);
+		return -EINVAL;
+	}
+	mutex_lock(&cnic_lock);
+	if (cnic_ulp_tbl[ulp_type]) {
+		printk(KERN_ERR PFX "cnic_register_driver: Type %d has already "
+				    "been registered\n", ulp_type);
+		mutex_unlock(&cnic_lock);
+		return -EBUSY;
+	}
+
+	read_lock(&cnic_dev_lock);
+	list_for_each_entry(dev, &cnic_dev_list, list) {
+		struct cnic_local *cp = dev->cnic_priv;
+
+		clear_bit(ULP_F_INIT, &cp->ulp_flags[ulp_type]);
+	}
+	read_unlock(&cnic_dev_lock);
+
+	rcu_assign_pointer(cnic_ulp_tbl[ulp_type], ulp_ops);
+	mutex_unlock(&cnic_lock);
+
+	/* Prevent race conditions with netdev_event */
+	rtnl_lock();
+	read_lock(&cnic_dev_lock);
+	list_for_each_entry(dev, &cnic_dev_list, list) {
+		struct cnic_local *cp = dev->cnic_priv;
+
+		if (!test_and_set_bit(ULP_F_INIT, &cp->ulp_flags[ulp_type]))
+			ulp_ops->cnic_init(dev);
+	}
+	read_unlock(&cnic_dev_lock);
+	rtnl_unlock();
+
+	return 0;
+}
+
+int cnic_unregister_driver(int ulp_type)
+{
+	struct cnic_dev *dev;
+
+	if (ulp_type >= MAX_CNIC_ULP_TYPE) {
+		printk(KERN_ERR PFX "cnic_unregister_driver: Bad type %d\n",
+		       ulp_type);
+		return -EINVAL;
+	}
+	mutex_lock(&cnic_lock);
+	if (!cnic_ulp_tbl[ulp_type]) {
+		printk(KERN_ERR PFX "cnic_unregister_driver: Type %d has not "
+				    "been registered\n", ulp_type);
+		goto out_unlock;
+	}
+	read_lock(&cnic_dev_lock);
+	list_for_each_entry(dev, &cnic_dev_list, list) {
+		struct cnic_local *cp = dev->cnic_priv;
+
+		if (rcu_dereference(cp->ulp_ops[ulp_type])) {
+			printk(KERN_ERR PFX "cnic_unregister_driver: Type %d "
+			       "still has devices registered\n", ulp_type);
+			read_unlock(&cnic_dev_lock);
+			goto out_unlock;
+		}
+	}
+	read_unlock(&cnic_dev_lock);
+
+	rcu_assign_pointer(cnic_ulp_tbl[ulp_type], NULL);
+
+	mutex_unlock(&cnic_lock);
+	synchronize_rcu();
+	return 0;
+
+out_unlock:
+	mutex_unlock(&cnic_lock);
+	return -EINVAL;
+}
+
+static int cnic_start_hw(struct cnic_dev *);
+static void cnic_stop_hw(struct cnic_dev *);
+
+static int cnic_register_device(struct cnic_dev *dev, int ulp_type,
+				void *ulp_ctx)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_ulp_ops *ulp_ops;
+
+	if (ulp_type >= MAX_CNIC_ULP_TYPE) {
+		printk(KERN_ERR PFX "cnic_register_device: Bad type %d\n",
+		       ulp_type);
+		return -EINVAL;
+	}
+	mutex_lock(&cnic_lock);
+	if (cnic_ulp_tbl[ulp_type] == NULL) {
+		printk(KERN_ERR PFX "cnic_register_device: Driver with type %d "
+				    "has not been registered\n", ulp_type);
+		mutex_unlock(&cnic_lock);
+		return -EAGAIN;
+	}
+	if (rcu_dereference(cp->ulp_ops[ulp_type])) {
+		printk(KERN_ERR PFX "cnic_register_device: Type %d has already "
+		       "been registered to this device\n", ulp_type);
+		mutex_unlock(&cnic_lock);
+		return -EBUSY;
+	}
+
+	clear_bit(ULP_F_START, &cp->ulp_flags[ulp_type]);
+	cp->ulp_handle[ulp_type] = ulp_ctx;
+	ulp_ops = cnic_ulp_tbl[ulp_type];
+	rcu_assign_pointer(cp->ulp_ops[ulp_type], ulp_ops);
+	cnic_hold(dev);
+
+	if (test_bit(CNIC_F_CNIC_UP, &dev->flags))
+		if (!test_and_set_bit(ULP_F_START, &cp->ulp_flags[ulp_type]))
+			ulp_ops->cnic_start(cp->ulp_handle[ulp_type]);
+
+	mutex_unlock(&cnic_lock);
+
+	return 0;
+
+}
+EXPORT_SYMBOL(cnic_register_driver);
+
+static int cnic_unregister_device(struct cnic_dev *dev, int ulp_type)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+
+	if (ulp_type >= MAX_CNIC_ULP_TYPE) {
+		printk(KERN_ERR PFX "cnic_unregister_device: Bad type %d\n",
+		       ulp_type);
+		return -EINVAL;
+	}
+	mutex_lock(&cnic_lock);
+	if (rcu_dereference(cp->ulp_ops[ulp_type])) {
+		rcu_assign_pointer(cp->ulp_ops[ulp_type], NULL);
+		cnic_put(dev);
+	} else {
+		printk(KERN_ERR PFX "cnic_unregister_device: device not "
+		       "registered to this ulp type %d\n", ulp_type);
+		mutex_unlock(&cnic_lock);
+		return -EINVAL;
+	}
+	mutex_unlock(&cnic_lock);
+
+	synchronize_rcu();
+
+	return 0;
+}
+EXPORT_SYMBOL(cnic_unregister_driver);
+
+static int cnic_init_id_tbl(struct cnic_id_tbl *id_tbl, u32 size, u32 start_id)
+{
+	id_tbl->start = start_id;
+	id_tbl->max = size;
+	id_tbl->next = 0;
+	spin_lock_init(&id_tbl->lock);
+	id_tbl->table = kzalloc(DIV_ROUND_UP(size, 32) * 4, GFP_KERNEL);
+	if (!id_tbl->table)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void cnic_free_id_tbl(struct cnic_id_tbl *id_tbl)
+{
+	kfree(id_tbl->table);
+	id_tbl->table = NULL;
+}
+
+static int cnic_alloc_id(struct cnic_id_tbl *id_tbl, u32 id)
+{
+	int ret = -1;
+
+	id -= id_tbl->start;
+	if (id >= id_tbl->max)
+		return ret;
+
+	spin_lock(&id_tbl->lock);
+	if (!test_bit(id, id_tbl->table)) {
+		set_bit(id, id_tbl->table);
+		ret = 0;
+	}
+	spin_unlock(&id_tbl->lock);
+	return ret;
+}
+
+/* Returns -1 if not successful */
+static u32 cnic_alloc_new_id(struct cnic_id_tbl *id_tbl)
+{
+	u32 id;
+
+	spin_lock(&id_tbl->lock);
+	id = find_next_zero_bit(id_tbl->table, id_tbl->max, id_tbl->next);
+	if (id >= id_tbl->max) {
+		id = -1;
+		if (id_tbl->next != 0) {
+			id = find_first_zero_bit(id_tbl->table, id_tbl->next);
+			if (id >= id_tbl->next)
+				id = -1;
+		}
+	}
+
+	if (id < id_tbl->max) {
+		set_bit(id, id_tbl->table);
+		id_tbl->next = (id + 1) & (id_tbl->max - 1);
+		id += id_tbl->start;
+	}
+
+	spin_unlock(&id_tbl->lock);
+
+	return id;
+}
+
+static void cnic_free_id(struct cnic_id_tbl *id_tbl, u32 id)
+{
+	if (id == -1)
+		return;
+
+	id -= id_tbl->start;
+	if (id >= id_tbl->max)
+		return;
+
+	clear_bit(id, id_tbl->table);
+}
+
+static void cnic_free_dma(struct cnic_dev *dev, struct cnic_dma *dma)
+{
+	int i;
+
+	if (!dma->pg_arr)
+		return;
+
+	for (i = 0; i < dma->num_pages; i++) {
+		if (dma->pg_arr[i]) {
+			pci_free_consistent(dev->pcidev, BCM_PAGE_SIZE,
+					    dma->pg_arr[i], dma->pg_map_arr[i]);
+			dma->pg_arr[i] = NULL;
+		}
+	}
+	if (dma->pgtbl) {
+		pci_free_consistent(dev->pcidev, dma->pgtbl_size,
+				    dma->pgtbl, dma->pgtbl_map);
+		dma->pgtbl = NULL;
+	}
+	kfree(dma->pg_arr);
+	dma->pg_arr = NULL;
+	dma->num_pages = 0;
+}
+
+static void cnic_setup_page_tbl(struct cnic_dev *dev, struct cnic_dma *dma)
+{
+	int i;
+	u32 *page_table = dma->pgtbl;
+
+	for (i = 0; i < dma->num_pages; i++) {
+		/* Each entry needs to be in big endian format. */
+		*page_table = (u32) ((u64) dma->pg_map_arr[i] >> 32);
+		page_table++;
+		*page_table = (u32) dma->pg_map_arr[i];
+		page_table++;
+	}
+}
+
+static int cnic_alloc_dma(struct cnic_dev *dev, struct cnic_dma *dma,
+			  int pages, int use_pg_tbl)
+{
+	int i, size;
+	struct cnic_local *cp = dev->cnic_priv;
+
+	size = pages * (sizeof(void *) + sizeof(dma_addr_t));
+	dma->pg_arr = kzalloc(size, GFP_ATOMIC);
+	if (dma->pg_arr == NULL)
+		return -ENOMEM;
+
+	dma->pg_map_arr = (dma_addr_t *) (dma->pg_arr + pages);
+	dma->num_pages = pages;
+
+	for (i = 0; i < pages; i++) {
+		dma->pg_arr[i] = pci_alloc_consistent(dev->pcidev,
+						      BCM_PAGE_SIZE,
+						      &dma->pg_map_arr[i]);
+		if (dma->pg_arr[i] == NULL)
+			goto error;
+	}
+	if (!use_pg_tbl)
+		return 0;
+
+	dma->pgtbl_size = ((pages * 8) + BCM_PAGE_SIZE - 1) &
+			  ~(BCM_PAGE_SIZE - 1);
+	dma->pgtbl = pci_alloc_consistent(dev->pcidev, dma->pgtbl_size,
+					  &dma->pgtbl_map);
+	if (dma->pgtbl == NULL)
+		goto error;
+
+	cp->setup_pgtbl(dev, dma);
+
+	return 0;
+
+error:
+	cnic_free_dma(dev, dma);
+	return -ENOMEM;
+}
+
+static void cnic_free_resc(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	int i = 0;
+
+	if (cp->cnic_uinfo) {
+		cnic_send_nlmsg(cp, ISCSI_KEVENT_IF_DOWN, NULL);
+		while (cp->uio_dev != -1 && i < 15) {
+			msleep(100);
+			i++;
+		}
+		uio_unregister_device(cp->cnic_uinfo);
+		kfree(cp->cnic_uinfo);
+		cp->cnic_uinfo = NULL;
+	}
+
+	if (cp->l2_buf) {
+		pci_free_consistent(dev->pcidev, cp->l2_buf_size,
+				    cp->l2_buf, cp->l2_buf_map);
+		cp->l2_buf = NULL;
+	}
+
+	if (cp->l2_ring) {
+		pci_free_consistent(dev->pcidev, cp->l2_ring_size,
+				    cp->l2_ring, cp->l2_ring_map);
+		cp->l2_ring = NULL;
+	}
+
+	for (i = 0; i < cp->ctx_blks; i++) {
+		if (cp->ctx_arr[i].ctx) {
+			pci_free_consistent(dev->pcidev, cp->ctx_blk_size,
+					    cp->ctx_arr[i].ctx,
+					    cp->ctx_arr[i].mapping);
+			cp->ctx_arr[i].ctx = NULL;
+		}
+	}
+	kfree(cp->ctx_arr);
+	cp->ctx_arr = NULL;
+	cp->ctx_blks = 0;
+
+	cnic_free_dma(dev, &cp->gbl_buf_info);
+	cnic_free_dma(dev, &cp->conn_buf_info);
+	cnic_free_dma(dev, &cp->kwq_info);
+	cnic_free_dma(dev, &cp->kcq_info);
+	kfree(cp->iscsi_tbl);
+	cp->iscsi_tbl = NULL;
+	kfree(cp->ctx_tbl);
+	cp->ctx_tbl = NULL;
+
+	cnic_free_id_tbl(&cp->cid_tbl);
+}
+
+static int cnic_alloc_context(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+
+	if (CHIP_NUM(cp) == CHIP_NUM_5709) {
+		int i, k, arr_size;
+
+		cp->ctx_blk_size = BCM_PAGE_SIZE;
+		cp->cids_per_blk = BCM_PAGE_SIZE / 128;
+		arr_size = BNX2_MAX_CID / cp->cids_per_blk *
+			   sizeof(struct cnic_ctx);
+		cp->ctx_arr = kzalloc(arr_size, GFP_KERNEL);
+		if (cp->ctx_arr == NULL)
+			return -ENOMEM;
+
+		k = 0;
+		for (i = 0; i < 2; i++) {
+			u32 j, reg, off, lo, hi;
+
+			if (i == 0)
+				off = BNX2_PG_CTX_MAP;
+			else
+				off = BNX2_ISCSI_CTX_MAP;
+
+			reg = cnic_reg_rd_ind(dev, off);
+			lo = reg >> 16;
+			hi = reg & 0xffff;
+			for (j = lo; j < hi; j += cp->cids_per_blk, k++)
+				cp->ctx_arr[k].cid = j;
+		}
+
+		cp->ctx_blks = k;
+		if (cp->ctx_blks >= (BNX2_MAX_CID / cp->cids_per_blk)) {
+			cp->ctx_blks = 0;
+			return -ENOMEM;
+		}
+
+		for (i = 0; i < cp->ctx_blks; i++) {
+			cp->ctx_arr[i].ctx =
+				pci_alloc_consistent(dev->pcidev, BCM_PAGE_SIZE,
+						     &cp->ctx_arr[i].mapping);
+			if (cp->ctx_arr[i].ctx == NULL)
+				return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+static int cnic_alloc_bnx2_resc(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct uio_info *uinfo;
+	int ret;
+
+	ret = cnic_alloc_dma(dev, &cp->kwq_info, KWQ_PAGE_CNT, 1);
+	if (ret)
+		goto error;
+	cp->kwq = (struct kwqe **) cp->kwq_info.pg_arr;
+
+	ret = cnic_alloc_dma(dev, &cp->kcq_info, KCQ_PAGE_CNT, 1);
+	if (ret)
+		goto error;
+	cp->kcq = (struct kcqe **) cp->kcq_info.pg_arr;
+
+	ret = cnic_alloc_context(dev);
+	if (ret)
+		goto error;
+
+	cp->l2_ring_size = 2 * BCM_PAGE_SIZE;
+	cp->l2_ring = pci_alloc_consistent(dev->pcidev, cp->l2_ring_size,
+					   &cp->l2_ring_map);
+	if (!cp->l2_ring)
+		goto error;
+
+	cp->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size;
+	cp->l2_buf_size = PAGE_ALIGN(cp->l2_buf_size);
+	cp->l2_buf = pci_alloc_consistent(dev->pcidev, cp->l2_buf_size,
+					   &cp->l2_buf_map);
+	if (!cp->l2_buf)
+		goto error;
+
+	uinfo = kzalloc(sizeof(*uinfo), GFP_ATOMIC);
+	if (!uinfo)
+		goto error;
+
+	uinfo->mem[0].addr = dev->netdev->base_addr;
+	uinfo->mem[0].internal_addr = dev->regview;
+	uinfo->mem[0].size = dev->netdev->mem_end - dev->netdev->mem_start;
+	uinfo->mem[0].memtype = UIO_MEM_PHYS;
+
+	uinfo->mem[1].addr = (unsigned long) cp->status_blk & PAGE_MASK;
+	if (cp->ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX)
+		uinfo->mem[1].size = BNX2_SBLK_MSIX_ALIGN_SIZE * 9;
+	else
+		uinfo->mem[1].size = BNX2_SBLK_MSIX_ALIGN_SIZE;
+	uinfo->mem[1].memtype = UIO_MEM_LOGICAL;
+
+	uinfo->mem[2].addr = (unsigned long) cp->l2_ring;
+	uinfo->mem[2].size = cp->l2_ring_size;
+	uinfo->mem[2].memtype = UIO_MEM_LOGICAL;
+
+	uinfo->mem[3].addr = (unsigned long) cp->l2_buf;
+	uinfo->mem[3].size = cp->l2_buf_size;
+	uinfo->mem[3].memtype = UIO_MEM_LOGICAL;
+
+	uinfo->name = "bnx2_cnic";
+	uinfo->version = CNIC_MODULE_VERSION;
+	uinfo->irq = UIO_IRQ_CUSTOM;
+
+	uinfo->open = cnic_uio_open;
+	uinfo->release = cnic_uio_close;
+
+	uinfo->priv = dev;
+
+	ret = uio_register_device(&dev->pcidev->dev, uinfo);
+	if (ret) {
+		kfree(uinfo);
+		goto error;
+	}
+
+	cp->cnic_uinfo = uinfo;
+
+	return 0;
+
+error:
+	cnic_free_resc(dev);
+	return ret;
+}
+
+static inline u32 cnic_kwq_avail(struct cnic_local *cp)
+{
+	return cp->max_kwq_idx -
+		((cp->kwq_prod_idx - cp->kwq_con_idx) & cp->max_kwq_idx);
+}
+
+static int cnic_submit_bnx2_kwqes(struct cnic_dev *dev, struct kwqe *wqes[],
+				  u32 num_wqes)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct kwqe *prod_qe;
+	u16 prod, sw_prod, i;
+
+	if (!test_bit(CNIC_F_CNIC_UP, &dev->flags))
+		return -EAGAIN;		/* bnx2 is down */
+
+	spin_lock_bh(&cp->cnic_ulp_lock);
+	if (num_wqes > cnic_kwq_avail(cp) &&
+	    !(cp->cnic_local_flags & CNIC_LCL_FL_KWQ_INIT)) {
+		spin_unlock_bh(&cp->cnic_ulp_lock);
+		return -EAGAIN;
+	}
+
+	cp->cnic_local_flags &= ~CNIC_LCL_FL_KWQ_INIT;
+
+	prod = cp->kwq_prod_idx;
+	sw_prod = prod & MAX_KWQ_IDX;
+	for (i = 0; i < num_wqes; i++) {
+		prod_qe = &cp->kwq[KWQ_PG(sw_prod)][KWQ_IDX(sw_prod)];
+		memcpy(prod_qe, wqes[i], sizeof(struct kwqe));
+		prod++;
+		sw_prod = prod & MAX_KWQ_IDX;
+	}
+	cp->kwq_prod_idx = prod;
+
+	CNIC_WR16(dev, cp->kwq_io_addr, cp->kwq_prod_idx);
+
+	spin_unlock_bh(&cp->cnic_ulp_lock);
+	return 0;
+}
+
+static void service_kcqes(struct cnic_dev *dev, int num_cqes)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	int i, j;
+
+	i = 0;
+	j = 1;
+	while (num_cqes) {
+		struct cnic_ulp_ops *ulp_ops;
+		int ulp_type;
+		u32 kcqe_op_flag = cp->completed_kcq[i]->kcqe_op_flag;
+		u32 kcqe_layer = kcqe_op_flag & KCQE_FLAGS_LAYER_MASK;
+
+		if (unlikely(kcqe_op_flag & KCQE_RAMROD_COMPLETION))
+			cnic_kwq_completion(dev, 1);
+
+		while (j < num_cqes) {
+			u32 next_op = cp->completed_kcq[i + j]->kcqe_op_flag;
+
+			if ((next_op & KCQE_FLAGS_LAYER_MASK) != kcqe_layer)
+				break;
+
+			if (unlikely(next_op & KCQE_RAMROD_COMPLETION))
+				cnic_kwq_completion(dev, 1);
+			j++;
+		}
+
+		if (kcqe_layer == KCQE_FLAGS_LAYER_MASK_L5_RDMA)
+			ulp_type = CNIC_ULP_RDMA;
+		else if (kcqe_layer == KCQE_FLAGS_LAYER_MASK_L5_ISCSI)
+			ulp_type = CNIC_ULP_ISCSI;
+		else if (kcqe_layer == KCQE_FLAGS_LAYER_MASK_L4)
+			ulp_type = CNIC_ULP_L4;
+		else if (kcqe_layer == KCQE_FLAGS_LAYER_MASK_L2)
+			goto end;
+		else {
+			printk(KERN_ERR PFX "%s: Unknown type of KCQE(0x%x)\n",
+			       dev->netdev->name, kcqe_op_flag);
+			goto end;
+		}
+
+		rcu_read_lock();
+		ulp_ops = rcu_dereference(cp->ulp_ops[ulp_type]);
+		if (likely(ulp_ops)) {
+			ulp_ops->indicate_kcqes(cp->ulp_handle[ulp_type],
+						  cp->completed_kcq + i, j);
+		}
+		rcu_read_unlock();
+end:
+		num_cqes -= j;
+		i += j;
+		j = 1;
+	}
+	return;
+}
+
+static u16 cnic_bnx2_next_idx(u16 idx)
+{
+	return idx + 1;
+}
+
+static u16 cnic_bnx2_hw_idx(u16 idx)
+{
+	return idx;
+}
+
+static int cnic_get_kcqes(struct cnic_dev *dev, u16 hw_prod, u16 *sw_prod)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	u16 i, ri, last;
+	struct kcqe *kcqe;
+	int kcqe_cnt = 0, last_cnt = 0;
+
+	i = ri = last = *sw_prod;
+	ri &= MAX_KCQ_IDX;
+
+	while ((i != hw_prod) && (kcqe_cnt < MAX_COMPLETED_KCQE)) {
+		kcqe = &cp->kcq[KCQ_PG(ri)][KCQ_IDX(ri)];
+		cp->completed_kcq[kcqe_cnt++] = kcqe;
+		i = cp->next_idx(i);
+		ri = i & MAX_KCQ_IDX;
+		if (likely(!(kcqe->kcqe_op_flag & KCQE_FLAGS_NEXT))) {
+			last_cnt = kcqe_cnt;
+			last = i;
+		}
+	}
+
+	*sw_prod = last;
+	return last_cnt;
+}
+
+static void cnic_chk_bnx2_pkt_rings(struct cnic_local *cp)
+{
+	u16 rx_cons = *cp->rx_cons_ptr;
+	u16 tx_cons = *cp->tx_cons_ptr;
+
+	if (cp->tx_cons != tx_cons || cp->rx_cons != rx_cons) {
+		cp->tx_cons = tx_cons;
+		cp->rx_cons = rx_cons;
+		uio_event_notify(cp->cnic_uinfo);
+	}
+}
+
+static int cnic_service_bnx2(void *data, void *status_blk)
+{
+	struct cnic_dev *dev = data;
+	struct status_block *sblk = status_blk;
+	struct cnic_local *cp = dev->cnic_priv;
+	u32 status_idx = sblk->status_idx;
+	u16 hw_prod, sw_prod;
+	int kcqe_cnt;
+
+	if (unlikely(!test_bit(CNIC_F_CNIC_UP, &dev->flags)))
+		return status_idx;
+
+	cp->kwq_con_idx = *cp->kwq_con_idx_ptr;
+
+	hw_prod = sblk->status_completion_producer_index;
+	sw_prod = cp->kcq_prod_idx;
+	while (sw_prod != hw_prod) {
+		kcqe_cnt = cnic_get_kcqes(dev, hw_prod, &sw_prod);
+		if (kcqe_cnt == 0)
+			goto done;
+
+		service_kcqes(dev, kcqe_cnt);
+
+		/* Tell compiler that status_blk fields can change. */
+		barrier();
+		if (status_idx != sblk->status_idx) {
+			status_idx = sblk->status_idx;
+			cp->kwq_con_idx = *cp->kwq_con_idx_ptr;
+			hw_prod = sblk->status_completion_producer_index;
+		} else
+			break;
+	}
+
+done:
+	CNIC_WR16(dev, cp->kcq_io_addr, sw_prod);
+
+	cp->kcq_prod_idx = sw_prod;
+
+	cnic_chk_bnx2_pkt_rings(cp);
+	return status_idx;
+}
+
+static void cnic_service_bnx2_msix(unsigned long data)
+{
+	struct cnic_dev *dev = (struct cnic_dev *) data;
+	struct cnic_local *cp = dev->cnic_priv;
+	struct status_block_msix *status_blk = cp->bnx2_status_blk;
+	u32 status_idx = status_blk->status_idx;
+	u16 hw_prod, sw_prod;
+	int kcqe_cnt;
+
+	cp->kwq_con_idx = status_blk->status_cmd_consumer_index;
+
+	hw_prod = status_blk->status_completion_producer_index;
+	sw_prod = cp->kcq_prod_idx;
+	while (sw_prod != hw_prod) {
+		kcqe_cnt = cnic_get_kcqes(dev, hw_prod, &sw_prod);
+		if (kcqe_cnt == 0)
+			goto done;
+
+		service_kcqes(dev, kcqe_cnt);
+
+		/* Tell compiler that status_blk fields can change. */
+		barrier();
+		if (status_idx != status_blk->status_idx) {
+			status_idx = status_blk->status_idx;
+			cp->kwq_con_idx = status_blk->status_cmd_consumer_index;
+			hw_prod = status_blk->status_completion_producer_index;
+		} else
+			break;
+	}
+
+done:
+	CNIC_WR16(dev, cp->kcq_io_addr, sw_prod);
+	cp->kcq_prod_idx = sw_prod;
+
+	cnic_chk_bnx2_pkt_rings(cp);
+
+	cp->last_status_idx = status_idx;
+	CNIC_WR(dev, BNX2_PCICFG_INT_ACK_CMD, cp->int_num |
+		BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID | cp->last_status_idx);
+}
+
+static irqreturn_t cnic_irq(int irq, void *dev_instance)
+{
+	struct cnic_dev *dev = dev_instance;
+	struct cnic_local *cp = dev->cnic_priv;
+	u16 prod = cp->kcq_prod_idx & MAX_KCQ_IDX;
+
+	if (cp->ack_int)
+		cp->ack_int(dev);
+
+	prefetch(cp->status_blk);
+	prefetch(&cp->kcq[KCQ_PG(prod)][KCQ_IDX(prod)]);
+
+	if (likely(test_bit(CNIC_F_CNIC_UP, &dev->flags)))
+		tasklet_schedule(&cp->cnic_irq_task);
+
+	return IRQ_HANDLED;
+}
+
+static void cnic_ulp_stop(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	int if_type;
+
+	rcu_read_lock();
+	for (if_type = 0; if_type < MAX_CNIC_ULP_TYPE; if_type++) {
+		struct cnic_ulp_ops *ulp_ops;
+
+		ulp_ops = rcu_dereference(cp->ulp_ops[if_type]);
+		if (!ulp_ops)
+			continue;
+
+		if (test_and_clear_bit(ULP_F_START, &cp->ulp_flags[if_type]))
+			ulp_ops->cnic_stop(cp->ulp_handle[if_type]);
+	}
+	rcu_read_unlock();
+}
+
+static void cnic_ulp_start(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	int if_type;
+
+	rcu_read_lock();
+	for (if_type = 0; if_type < MAX_CNIC_ULP_TYPE; if_type++) {
+		struct cnic_ulp_ops *ulp_ops;
+
+		ulp_ops = rcu_dereference(cp->ulp_ops[if_type]);
+		if (!ulp_ops || !ulp_ops->cnic_start)
+			continue;
+
+		if (!test_and_set_bit(ULP_F_START, &cp->ulp_flags[if_type]))
+			ulp_ops->cnic_start(cp->ulp_handle[if_type]);
+	}
+	rcu_read_unlock();
+}
+
+static int cnic_ctl(void *data, struct cnic_ctl_info *info)
+{
+	struct cnic_dev *dev = data;
+
+	switch (info->cmd) {
+	case CNIC_CTL_STOP_CMD:
+		cnic_hold(dev);
+		mutex_lock(&cnic_lock);
+
+		cnic_ulp_stop(dev);
+		cnic_stop_hw(dev);
+
+		mutex_unlock(&cnic_lock);
+		cnic_put(dev);
+		break;
+	case CNIC_CTL_START_CMD:
+		cnic_hold(dev);
+		mutex_lock(&cnic_lock);
+
+		if (!cnic_start_hw(dev))
+			cnic_ulp_start(dev);
+
+		mutex_unlock(&cnic_lock);
+		cnic_put(dev);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void cnic_ulp_init(struct cnic_dev *dev)
+{
+	int i;
+	struct cnic_local *cp = dev->cnic_priv;
+
+	rcu_read_lock();
+	for (i = 0; i < MAX_CNIC_ULP_TYPE_EXT; i++) {
+		struct cnic_ulp_ops *ulp_ops;
+
+		ulp_ops = rcu_dereference(cnic_ulp_tbl[i]);
+		if (!ulp_ops || !ulp_ops->cnic_init)
+			continue;
+
+		if (!test_and_set_bit(ULP_F_INIT, &cp->ulp_flags[i]))
+			ulp_ops->cnic_init(dev);
+
+	}
+	rcu_read_unlock();
+}
+
+static void cnic_ulp_exit(struct cnic_dev *dev)
+{
+	int i;
+	struct cnic_local *cp = dev->cnic_priv;
+
+	rcu_read_lock();
+	for (i = 0; i < MAX_CNIC_ULP_TYPE_EXT; i++) {
+		struct cnic_ulp_ops *ulp_ops;
+
+		ulp_ops = rcu_dereference(cnic_ulp_tbl[i]);
+		if (!ulp_ops || !ulp_ops->cnic_exit)
+			continue;
+
+		if (test_and_clear_bit(ULP_F_INIT, &cp->ulp_flags[i]))
+			ulp_ops->cnic_exit(dev);
+
+	}
+	rcu_read_unlock();
+}
+
+static int cnic_cm_offload_pg(struct cnic_sock *csk)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct l4_kwq_offload_pg *l4kwqe;
+	struct kwqe *wqes[1];
+
+	l4kwqe = (struct l4_kwq_offload_pg *) &csk->kwqe1;
+	memset(l4kwqe, 0, sizeof(*l4kwqe));
+	wqes[0] = (struct kwqe *) l4kwqe;
+
+	l4kwqe->op_code = L4_KWQE_OPCODE_VALUE_OFFLOAD_PG;
+	l4kwqe->flags =
+		L4_LAYER_CODE << L4_KWQ_OFFLOAD_PG_LAYER_CODE_SHIFT;
+	l4kwqe->l2hdr_nbytes = ETH_HLEN;
+
+	l4kwqe->da0 = csk->ha[0];
+	l4kwqe->da1 = csk->ha[1];
+	l4kwqe->da2 = csk->ha[2];
+	l4kwqe->da3 = csk->ha[3];
+	l4kwqe->da4 = csk->ha[4];
+	l4kwqe->da5 = csk->ha[5];
+
+	l4kwqe->sa0 = dev->mac_addr[0];
+	l4kwqe->sa1 = dev->mac_addr[1];
+	l4kwqe->sa2 = dev->mac_addr[2];
+	l4kwqe->sa3 = dev->mac_addr[3];
+	l4kwqe->sa4 = dev->mac_addr[4];
+	l4kwqe->sa5 = dev->mac_addr[5];
+
+	l4kwqe->etype = ETH_P_IP;
+	l4kwqe->ipid_count = DEF_IPID_COUNT;
+	l4kwqe->host_opaque = csk->l5_cid;
+
+	if (csk->vlan_id) {
+		l4kwqe->pg_flags |= L4_KWQ_OFFLOAD_PG_VLAN_TAGGING;
+		l4kwqe->vlan_tag = csk->vlan_id;
+		l4kwqe->l2hdr_nbytes += 4;
+	}
+
+	return dev->submit_kwqes(dev, wqes, 1);
+}
+
+static int cnic_cm_update_pg(struct cnic_sock *csk)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct l4_kwq_update_pg *l4kwqe;
+	struct kwqe *wqes[1];
+
+	l4kwqe = (struct l4_kwq_update_pg *) &csk->kwqe1;
+	memset(l4kwqe, 0, sizeof(*l4kwqe));
+	wqes[0] = (struct kwqe *) l4kwqe;
+
+	l4kwqe->opcode = L4_KWQE_OPCODE_VALUE_UPDATE_PG;
+	l4kwqe->flags =
+		L4_LAYER_CODE << L4_KWQ_UPDATE_PG_LAYER_CODE_SHIFT;
+	l4kwqe->pg_cid = csk->pg_cid;
+
+	l4kwqe->da0 = csk->ha[0];
+	l4kwqe->da1 = csk->ha[1];
+	l4kwqe->da2 = csk->ha[2];
+	l4kwqe->da3 = csk->ha[3];
+	l4kwqe->da4 = csk->ha[4];
+	l4kwqe->da5 = csk->ha[5];
+
+	l4kwqe->pg_host_opaque = csk->l5_cid;
+	l4kwqe->pg_valids = L4_KWQ_UPDATE_PG_VALIDS_DA;
+
+	return dev->submit_kwqes(dev, wqes, 1);
+}
+
+static int cnic_cm_upload_pg(struct cnic_sock *csk)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct l4_kwq_upload *l4kwqe;
+	struct kwqe *wqes[1];
+
+	l4kwqe = (struct l4_kwq_upload *) &csk->kwqe1;
+	memset(l4kwqe, 0, sizeof(*l4kwqe));
+	wqes[0] = (struct kwqe *) l4kwqe;
+
+	l4kwqe->opcode = L4_KWQE_OPCODE_VALUE_UPLOAD_PG;
+	l4kwqe->flags =
+		L4_LAYER_CODE << L4_KWQ_UPLOAD_LAYER_CODE_SHIFT;
+	l4kwqe->cid = csk->pg_cid;
+
+	return dev->submit_kwqes(dev, wqes, 1);
+}
+
+static int cnic_cm_conn_req(struct cnic_sock *csk)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct l4_kwq_connect_req1 *l4kwqe1;
+	struct l4_kwq_connect_req2 *l4kwqe2;
+	struct l4_kwq_connect_req3 *l4kwqe3;
+	struct kwqe *wqes[3];
+	u8 tcp_flags = 0;
+	int num_wqes = 2;
+
+	l4kwqe1 = (struct l4_kwq_connect_req1 *) &csk->kwqe1;
+	l4kwqe2 = (struct l4_kwq_connect_req2 *) &csk->kwqe2;
+	l4kwqe3 = (struct l4_kwq_connect_req3 *) &csk->kwqe3;
+	memset(l4kwqe1, 0, sizeof(*l4kwqe1));
+	memset(l4kwqe2, 0, sizeof(*l4kwqe2));
+	memset(l4kwqe3, 0, sizeof(*l4kwqe3));
+
+	l4kwqe3->op_code = L4_KWQE_OPCODE_VALUE_CONNECT3;
+	l4kwqe3->flags =
+		L4_LAYER_CODE << L4_KWQ_CONNECT_REQ3_LAYER_CODE_SHIFT;
+	l4kwqe3->ka_timeout = csk->ka_timeout;
+	l4kwqe3->ka_interval = csk->ka_interval;
+	l4kwqe3->ka_max_probe_count = csk->ka_max_probe_count;
+	l4kwqe3->tos = csk->tos;
+	l4kwqe3->ttl = csk->ttl;
+	l4kwqe3->snd_seq_scale = csk->snd_seq_scale;
+	l4kwqe3->pmtu = csk->mtu;
+	l4kwqe3->rcv_buf = csk->rcv_buf;
+	l4kwqe3->snd_buf = csk->snd_buf;
+	l4kwqe3->seed = csk->seed;
+
+	wqes[0] = (struct kwqe *) l4kwqe1;
+	if (test_bit(SK_F_IPV6, &csk->flags)) {
+		wqes[1] = (struct kwqe *) l4kwqe2;
+		wqes[2] = (struct kwqe *) l4kwqe3;
+		num_wqes = 3;
+
+		l4kwqe1->conn_flags = L4_KWQ_CONNECT_REQ1_IP_V6;
+		l4kwqe2->op_code = L4_KWQE_OPCODE_VALUE_CONNECT2;
+		l4kwqe2->flags =
+			L4_KWQ_CONNECT_REQ2_LINKED_WITH_NEXT |
+			L4_LAYER_CODE << L4_KWQ_CONNECT_REQ2_LAYER_CODE_SHIFT;
+		l4kwqe2->src_ip_v6_2 = be32_to_cpu(csk->src_ip[1]);
+		l4kwqe2->src_ip_v6_3 = be32_to_cpu(csk->src_ip[2]);
+		l4kwqe2->src_ip_v6_4 = be32_to_cpu(csk->src_ip[3]);
+		l4kwqe2->dst_ip_v6_2 = be32_to_cpu(csk->dst_ip[1]);
+		l4kwqe2->dst_ip_v6_3 = be32_to_cpu(csk->dst_ip[2]);
+		l4kwqe2->dst_ip_v6_4 = be32_to_cpu(csk->dst_ip[3]);
+		l4kwqe3->mss = l4kwqe3->pmtu - sizeof(struct ipv6hdr) -
+			       sizeof(struct tcphdr);
+	} else {
+		wqes[1] = (struct kwqe *) l4kwqe3;
+		l4kwqe3->mss = l4kwqe3->pmtu - sizeof(struct iphdr) -
+			       sizeof(struct tcphdr);
+	}
+
+	l4kwqe1->op_code = L4_KWQE_OPCODE_VALUE_CONNECT1;
+	l4kwqe1->flags =
+		(L4_LAYER_CODE << L4_KWQ_CONNECT_REQ1_LAYER_CODE_SHIFT) |
+		 L4_KWQ_CONNECT_REQ3_LINKED_WITH_NEXT;
+	l4kwqe1->cid = csk->cid;
+	l4kwqe1->pg_cid = csk->pg_cid;
+	l4kwqe1->src_ip = be32_to_cpu(csk->src_ip[0]);
+	l4kwqe1->dst_ip = be32_to_cpu(csk->dst_ip[0]);
+	l4kwqe1->src_port = be16_to_cpu(csk->src_port);
+	l4kwqe1->dst_port = be16_to_cpu(csk->dst_port);
+	if (csk->tcp_flags & SK_TCP_NO_DELAY_ACK)
+		tcp_flags |= L4_KWQ_CONNECT_REQ1_NO_DELAY_ACK;
+	if (csk->tcp_flags & SK_TCP_KEEP_ALIVE)
+		tcp_flags |= L4_KWQ_CONNECT_REQ1_KEEP_ALIVE;
+	if (csk->tcp_flags & SK_TCP_NAGLE)
+		tcp_flags |= L4_KWQ_CONNECT_REQ1_NAGLE_ENABLE;
+	if (csk->tcp_flags & SK_TCP_TIMESTAMP)
+		tcp_flags |= L4_KWQ_CONNECT_REQ1_TIME_STAMP;
+	if (csk->tcp_flags & SK_TCP_SACK)
+		tcp_flags |= L4_KWQ_CONNECT_REQ1_SACK;
+	if (csk->tcp_flags & SK_TCP_SEG_SCALING)
+		tcp_flags |= L4_KWQ_CONNECT_REQ1_SEG_SCALING;
+
+	l4kwqe1->tcp_flags = tcp_flags;
+
+	return dev->submit_kwqes(dev, wqes, num_wqes);
+}
+
+static int cnic_cm_close_req(struct cnic_sock *csk)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct l4_kwq_close_req *l4kwqe;
+	struct kwqe *wqes[1];
+
+	l4kwqe = (struct l4_kwq_close_req *) &csk->kwqe2;
+	memset(l4kwqe, 0, sizeof(*l4kwqe));
+	wqes[0] = (struct kwqe *) l4kwqe;
+
+	l4kwqe->op_code = L4_KWQE_OPCODE_VALUE_CLOSE;
+	l4kwqe->flags = L4_LAYER_CODE << L4_KWQ_CLOSE_REQ_LAYER_CODE_SHIFT;
+	l4kwqe->cid = csk->cid;
+
+	return dev->submit_kwqes(dev, wqes, 1);
+}
+
+static int cnic_cm_abort_req(struct cnic_sock *csk)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct l4_kwq_reset_req *l4kwqe;
+	struct kwqe *wqes[1];
+
+	l4kwqe = (struct l4_kwq_reset_req *) &csk->kwqe2;
+	memset(l4kwqe, 0, sizeof(*l4kwqe));
+	wqes[0] = (struct kwqe *) l4kwqe;
+
+	l4kwqe->op_code = L4_KWQE_OPCODE_VALUE_RESET;
+	l4kwqe->flags = L4_LAYER_CODE << L4_KWQ_RESET_REQ_LAYER_CODE_SHIFT;
+	l4kwqe->cid = csk->cid;
+
+	return dev->submit_kwqes(dev, wqes, 1);
+}
+
+static int cnic_cm_create(struct cnic_dev *dev, int ulp_type, u32 cid,
+			  u32 l5_cid, struct cnic_sock **csk, void *context)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_sock *csk1;
+
+	if (l5_cid >= MAX_CM_SK_TBL_SZ)
+		return -EINVAL;
+
+	csk1 = &cp->csk_tbl[l5_cid];
+	if (atomic_read(&csk1->ref_count))
+		return -EAGAIN;
+
+	if (test_and_set_bit(SK_F_INUSE, &csk1->flags))
+		return -EBUSY;
+
+	csk1->dev = dev;
+	csk1->cid = cid;
+	csk1->l5_cid = l5_cid;
+	csk1->ulp_type = ulp_type;
+	csk1->context = context;
+
+	csk1->ka_timeout = DEF_KA_TIMEOUT;
+	csk1->ka_interval = DEF_KA_INTERVAL;
+	csk1->ka_max_probe_count = DEF_KA_MAX_PROBE_COUNT;
+	csk1->tos = DEF_TOS;
+	csk1->ttl = DEF_TTL;
+	csk1->snd_seq_scale = DEF_SND_SEQ_SCALE;
+	csk1->rcv_buf = DEF_RCV_BUF;
+	csk1->snd_buf = DEF_SND_BUF;
+	csk1->seed = DEF_SEED;
+
+	*csk = csk1;
+	return 0;
+}
+
+static void cnic_cm_cleanup(struct cnic_sock *csk)
+{
+	if (csk->src_port) {
+		struct cnic_dev *dev = csk->dev;
+		struct cnic_local *cp = dev->cnic_priv;
+
+		cnic_free_id(&cp->csk_port_tbl, csk->src_port);
+		csk->src_port = 0;
+	}
+}
+
+static void cnic_close_conn(struct cnic_sock *csk)
+{
+	if (test_bit(SK_F_PG_OFFLD_COMPLETE, &csk->flags)) {
+		cnic_cm_upload_pg(csk);
+		clear_bit(SK_F_PG_OFFLD_COMPLETE, &csk->flags);
+	}
+	cnic_cm_cleanup(csk);
+}
+
+static int cnic_cm_destroy(struct cnic_sock *csk)
+{
+	if (!cnic_in_use(csk))
+		return -EINVAL;
+
+	csk_hold(csk);
+	clear_bit(SK_F_INUSE, &csk->flags);
+	smp_mb__after_clear_bit();
+	while (atomic_read(&csk->ref_count) != 1)
+		msleep(1);
+	cnic_cm_cleanup(csk);
+
+	csk->flags = 0;
+	csk_put(csk);
+	return 0;
+}
+
+static inline u16 cnic_get_vlan(struct net_device *dev,
+				struct net_device **vlan_dev)
+{
+	if (dev->priv_flags & IFF_802_1Q_VLAN) {
+		*vlan_dev = vlan_dev_real_dev(dev);
+		return vlan_dev_vlan_id(dev);
+	}
+	*vlan_dev = dev;
+	return 0;
+}
+
+static int cnic_get_v4_route(struct sockaddr_in *dst_addr,
+			     struct dst_entry **dst)
+{
+	struct flowi fl;
+	int err;
+	struct rtable *rt;
+
+	memset(&fl, 0, sizeof(fl));
+	fl.nl_u.ip4_u.daddr = dst_addr->sin_addr.s_addr;
+
+	err = ip_route_output_key(&init_net, &rt, &fl);
+	if (!err)
+		*dst = &rt->u.dst;
+	return err;
+}
+
+static int cnic_get_v6_route(struct sockaddr_in6 *dst_addr,
+			     struct dst_entry **dst)
+{
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+	ipv6_addr_copy(&fl.fl6_dst, &dst_addr->sin6_addr);
+	if (ipv6_addr_type(&fl.fl6_dst) & IPV6_ADDR_LINKLOCAL)
+		fl.oif = dst_addr->sin6_scope_id;
+
+	*dst = ip6_route_output(&init_net, NULL, &fl);
+	if (*dst)
+		return 0;
+#endif
+
+	return -ENETUNREACH;
+}
+
+static struct cnic_dev *cnic_cm_select_dev(struct sockaddr_in *dst_addr,
+					   int ulp_type)
+{
+	struct cnic_dev *dev = NULL;
+	struct dst_entry *dst;
+	struct net_device *netdev = NULL;
+	int err = -ENETUNREACH;
+
+	if (dst_addr->sin_family == AF_INET)
+		err = cnic_get_v4_route(dst_addr, &dst);
+	else if (dst_addr->sin_family == AF_INET6) {
+		struct sockaddr_in6 *dst_addr6 =
+			(struct sockaddr_in6 *) dst_addr;
+
+		err = cnic_get_v6_route(dst_addr6, &dst);
+	} else
+		return NULL;
+
+	if (err)
+		return NULL;
+
+	if (!dst->dev)
+		goto done;
+
+	cnic_get_vlan(dst->dev, &netdev);
+
+	dev = cnic_from_netdev(netdev);
+
+done:
+	dst_release(dst);
+	if (dev)
+		cnic_put(dev);
+	return dev;
+}
+
+static int cnic_resolve_addr(struct cnic_sock *csk, struct cnic_sockaddr *saddr)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct cnic_local *cp = dev->cnic_priv;
+
+	return cnic_send_nlmsg(cp, ISCSI_KEVENT_PATH_REQ, csk);
+}
+
+static int cnic_get_route(struct cnic_sock *csk, struct cnic_sockaddr *saddr)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct cnic_local *cp = dev->cnic_priv;
+	int is_v6, err, rc = -ENETUNREACH;
+	struct dst_entry *dst;
+	struct net_device *realdev;
+	u32 local_port;
+
+	if (saddr->local.v6.sin6_family == AF_INET6 &&
+	    saddr->remote.v6.sin6_family == AF_INET6)
+		is_v6 = 1;
+	else if (saddr->local.v4.sin_family == AF_INET &&
+		 saddr->remote.v4.sin_family == AF_INET)
+		is_v6 = 0;
+	else
+		return -EINVAL;
+
+	clear_bit(SK_F_IPV6, &csk->flags);
+
+	if (is_v6) {
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		set_bit(SK_F_IPV6, &csk->flags);
+		err = cnic_get_v6_route(&saddr->remote.v6, &dst);
+		if (err)
+			return err;
+
+		if (!dst || dst->error || !dst->dev)
+			goto err_out;
+
+		memcpy(&csk->dst_ip[0], &saddr->remote.v6.sin6_addr,
+		       sizeof(struct in6_addr));
+		csk->dst_port = saddr->remote.v6.sin6_port;
+		local_port = saddr->local.v6.sin6_port;
+#else
+		return rc;
+#endif
+
+	} else {
+		err = cnic_get_v4_route(&saddr->remote.v4, &dst);
+		if (err)
+			return err;
+
+		if (!dst || dst->error || !dst->dev)
+			goto err_out;
+
+		csk->dst_ip[0] = saddr->remote.v4.sin_addr.s_addr;
+		csk->dst_port = saddr->remote.v4.sin_port;
+		local_port = saddr->local.v4.sin_port;
+	}
+
+	csk->vlan_id = cnic_get_vlan(dst->dev, &realdev);
+	if (realdev != dev->netdev)
+		goto err_out;
+
+	if (local_port >= CNIC_LOCAL_PORT_MIN &&
+	    local_port < CNIC_LOCAL_PORT_MAX) {
+		if (cnic_alloc_id(&cp->csk_port_tbl, local_port))
+			local_port = 0;
+	} else
+		local_port = 0;
+
+	if (!local_port) {
+		local_port = cnic_alloc_new_id(&cp->csk_port_tbl);
+		if (local_port == -1) {
+			rc = -ENOMEM;
+			goto err_out;
+		}
+	}
+	csk->src_port = local_port;
+
+	csk->mtu = dst_mtu(dst);
+	rc = 0;
+
+err_out:
+	dst_release(dst);
+	return rc;
+}
+
+static void cnic_init_csk_state(struct cnic_sock *csk)
+{
+	csk->state = 0;
+	clear_bit(SK_F_OFFLD_SCHED, &csk->flags);
+	clear_bit(SK_F_CLOSING, &csk->flags);
+}
+
+static int cnic_cm_connect(struct cnic_sock *csk, struct cnic_sockaddr *saddr)
+{
+	int err = 0;
+
+	if (!cnic_in_use(csk))
+		return -EINVAL;
+
+	if (test_and_set_bit(SK_F_CONNECT_START, &csk->flags))
+		return -EINVAL;
+
+	cnic_init_csk_state(csk);
+
+	err = cnic_get_route(csk, saddr);
+	if (err)
+		goto err_out;
+
+	err = cnic_resolve_addr(csk, saddr);
+	if (!err)
+		return 0;
+
+err_out:
+	clear_bit(SK_F_CONNECT_START, &csk->flags);
+	return err;
+}
+
+static int cnic_cm_abort(struct cnic_sock *csk)
+{
+	struct cnic_local *cp = csk->dev->cnic_priv;
+	u32 opcode;
+
+	if (!cnic_in_use(csk))
+		return -EINVAL;
+
+	if (cnic_abort_prep(csk))
+		return cnic_cm_abort_req(csk);
+
+	/* Getting here means that we haven't started connect, or
+	 * connect was not successful.
+	 */
+
+	csk->state = L4_KCQE_OPCODE_VALUE_RESET_COMP;
+	if (test_bit(SK_F_PG_OFFLD_COMPLETE, &csk->flags))
+		opcode = csk->state;
+	else
+		opcode = L5CM_RAMROD_CMD_ID_TERMINATE_OFFLOAD;
+	cp->close_conn(csk, opcode);
+
+	return 0;
+}
+
+static int cnic_cm_close(struct cnic_sock *csk)
+{
+	if (!cnic_in_use(csk))
+		return -EINVAL;
+
+	if (cnic_close_prep(csk)) {
+		csk->state = L4_KCQE_OPCODE_VALUE_CLOSE_COMP;
+		return cnic_cm_close_req(csk);
+	}
+	return 0;
+}
+
+static void cnic_cm_upcall(struct cnic_local *cp, struct cnic_sock *csk,
+			   u8 opcode)
+{
+	struct cnic_ulp_ops *ulp_ops;
+	int ulp_type = csk->ulp_type;
+
+	rcu_read_lock();
+	ulp_ops = rcu_dereference(cp->ulp_ops[ulp_type]);
+	if (ulp_ops) {
+		if (opcode == L4_KCQE_OPCODE_VALUE_CONNECT_COMPLETE)
+			ulp_ops->cm_connect_complete(csk);
+		else if (opcode == L4_KCQE_OPCODE_VALUE_CLOSE_COMP)
+			ulp_ops->cm_close_complete(csk);
+		else if (opcode == L4_KCQE_OPCODE_VALUE_RESET_RECEIVED)
+			ulp_ops->cm_remote_abort(csk);
+		else if (opcode == L4_KCQE_OPCODE_VALUE_RESET_COMP)
+			ulp_ops->cm_abort_complete(csk);
+		else if (opcode == L4_KCQE_OPCODE_VALUE_CLOSE_RECEIVED)
+			ulp_ops->cm_remote_close(csk);
+	}
+	rcu_read_unlock();
+}
+
+static int cnic_cm_set_pg(struct cnic_sock *csk)
+{
+	if (cnic_offld_prep(csk)) {
+		if (test_bit(SK_F_PG_OFFLD_COMPLETE, &csk->flags))
+			cnic_cm_update_pg(csk);
+		else
+			cnic_cm_offload_pg(csk);
+	}
+	return 0;
+}
+
+static void cnic_cm_process_offld_pg(struct cnic_dev *dev, struct l4_kcq *kcqe)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	u32 l5_cid = kcqe->pg_host_opaque;
+	u8 opcode = kcqe->op_code;
+	struct cnic_sock *csk = &cp->csk_tbl[l5_cid];
+
+	csk_hold(csk);
+	if (!cnic_in_use(csk))
+		goto done;
+
+	if (opcode == L4_KCQE_OPCODE_VALUE_UPDATE_PG) {
+		clear_bit(SK_F_OFFLD_SCHED, &csk->flags);
+		goto done;
+	}
+	csk->pg_cid = kcqe->pg_cid;
+	set_bit(SK_F_PG_OFFLD_COMPLETE, &csk->flags);
+	cnic_cm_conn_req(csk);
+
+done:
+	csk_put(csk);
+}
+
+static void cnic_cm_process_kcqe(struct cnic_dev *dev, struct kcqe *kcqe)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct l4_kcq *l4kcqe = (struct l4_kcq *) kcqe;
+	u8 opcode = l4kcqe->op_code;
+	u32 l5_cid;
+	struct cnic_sock *csk;
+
+	if (opcode == L4_KCQE_OPCODE_VALUE_OFFLOAD_PG ||
+	    opcode == L4_KCQE_OPCODE_VALUE_UPDATE_PG) {
+		cnic_cm_process_offld_pg(dev, l4kcqe);
+		return;
+	}
+
+	l5_cid = l4kcqe->conn_id;
+	if (opcode & 0x80)
+		l5_cid = l4kcqe->cid;
+	if (l5_cid >= MAX_CM_SK_TBL_SZ)
+		return;
+
+	csk = &cp->csk_tbl[l5_cid];
+	csk_hold(csk);
+
+	if (!cnic_in_use(csk)) {
+		csk_put(csk);
+		return;
+	}
+
+	switch (opcode) {
+	case L4_KCQE_OPCODE_VALUE_CONNECT_COMPLETE:
+		if (l4kcqe->status == 0)
+			set_bit(SK_F_OFFLD_COMPLETE, &csk->flags);
+
+		smp_mb__before_clear_bit();
+		clear_bit(SK_F_OFFLD_SCHED, &csk->flags);
+		cnic_cm_upcall(cp, csk, opcode);
+		break;
+
+	case L4_KCQE_OPCODE_VALUE_RESET_RECEIVED:
+		if (test_and_clear_bit(SK_F_OFFLD_COMPLETE, &csk->flags))
+			csk->state = opcode;
+		/* fall through */
+	case L4_KCQE_OPCODE_VALUE_CLOSE_COMP:
+	case L4_KCQE_OPCODE_VALUE_RESET_COMP:
+		cp->close_conn(csk, opcode);
+		break;
+
+	case L4_KCQE_OPCODE_VALUE_CLOSE_RECEIVED:
+		cnic_cm_upcall(cp, csk, opcode);
+		break;
+	}
+	csk_put(csk);
+}
+
+static void cnic_cm_indicate_kcqe(void *data, struct kcqe *kcqe[], u32 num)
+{
+	struct cnic_dev *dev = data;
+	int i;
+
+	for (i = 0; i < num; i++)
+		cnic_cm_process_kcqe(dev, kcqe[i]);
+}
+
+static struct cnic_ulp_ops cm_ulp_ops = {
+	.indicate_kcqes		= cnic_cm_indicate_kcqe,
+};
+
+static void cnic_cm_free_mem(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+
+	kfree(cp->csk_tbl);
+	cp->csk_tbl = NULL;
+	cnic_free_id_tbl(&cp->csk_port_tbl);
+}
+
+static int cnic_cm_alloc_mem(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+
+	cp->csk_tbl = kzalloc(sizeof(struct cnic_sock) * MAX_CM_SK_TBL_SZ,
+			      GFP_KERNEL);
+	if (!cp->csk_tbl)
+		return -ENOMEM;
+
+	if (cnic_init_id_tbl(&cp->csk_port_tbl, CNIC_LOCAL_PORT_RANGE,
+			     CNIC_LOCAL_PORT_MIN)) {
+		cnic_cm_free_mem(dev);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static int cnic_ready_to_close(struct cnic_sock *csk, u32 opcode)
+{
+	if ((opcode == csk->state) ||
+	    (opcode == L4_KCQE_OPCODE_VALUE_RESET_RECEIVED &&
+	     csk->state == L4_KCQE_OPCODE_VALUE_CLOSE_COMP)) {
+		if (!test_and_set_bit(SK_F_CLOSING, &csk->flags))
+			return 1;
+	}
+	return 0;
+}
+
+static void cnic_close_bnx2_conn(struct cnic_sock *csk, u32 opcode)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct cnic_local *cp = dev->cnic_priv;
+
+	clear_bit(SK_F_CONNECT_START, &csk->flags);
+	if (cnic_ready_to_close(csk, opcode)) {
+		cnic_close_conn(csk);
+		cnic_cm_upcall(cp, csk, opcode);
+	}
+}
+
+static void cnic_cm_stop_bnx2_hw(struct cnic_dev *dev)
+{
+}
+
+static int cnic_cm_init_bnx2_hw(struct cnic_dev *dev)
+{
+	u32 seed;
+
+	get_random_bytes(&seed, 4);
+	cnic_ctx_wr(dev, 45, 0, seed);
+	return 0;
+}
+
+static int cnic_cm_open(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	int err;
+
+	err = cnic_cm_alloc_mem(dev);
+	if (err)
+		return err;
+
+	err = cp->start_cm(dev);
+
+	if (err)
+		goto err_out;
+
+	dev->cm_create = cnic_cm_create;
+	dev->cm_destroy = cnic_cm_destroy;
+	dev->cm_connect = cnic_cm_connect;
+	dev->cm_abort = cnic_cm_abort;
+	dev->cm_close = cnic_cm_close;
+	dev->cm_select_dev = cnic_cm_select_dev;
+
+	cp->ulp_handle[CNIC_ULP_L4] = dev;
+	rcu_assign_pointer(cp->ulp_ops[CNIC_ULP_L4], &cm_ulp_ops);
+	return 0;
+
+err_out:
+	cnic_cm_free_mem(dev);
+	return err;
+}
+
+static int cnic_cm_shutdown(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	int i;
+
+	cp->stop_cm(dev);
+
+	if (!cp->csk_tbl)
+		return 0;
+
+	for (i = 0; i < MAX_CM_SK_TBL_SZ; i++) {
+		struct cnic_sock *csk = &cp->csk_tbl[i];
+
+		clear_bit(SK_F_INUSE, &csk->flags);
+		cnic_cm_cleanup(csk);
+	}
+	cnic_cm_free_mem(dev);
+
+	return 0;
+}
+
+static void cnic_init_context(struct cnic_dev *dev, u32 cid)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	u32 cid_addr;
+	int i;
+
+	if (CHIP_NUM(cp) == CHIP_NUM_5709)
+		return;
+
+	cid_addr = GET_CID_ADDR(cid);
+
+	for (i = 0; i < CTX_SIZE; i += 4)
+		cnic_ctx_wr(dev, cid_addr, i, 0);
+}
+
+static int cnic_setup_5709_context(struct cnic_dev *dev, int valid)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	int ret = 0, i;
+	u32 valid_bit = valid ? BNX2_CTX_HOST_PAGE_TBL_DATA0_VALID : 0;
+
+	if (CHIP_NUM(cp) != CHIP_NUM_5709)
+		return 0;
+
+	for (i = 0; i < cp->ctx_blks; i++) {
+		int j;
+		u32 idx = cp->ctx_arr[i].cid / cp->cids_per_blk;
+		u32 val;
+
+		memset(cp->ctx_arr[i].ctx, 0, BCM_PAGE_SIZE);
+
+		CNIC_WR(dev, BNX2_CTX_HOST_PAGE_TBL_DATA0,
+			(cp->ctx_arr[i].mapping & 0xffffffff) | valid_bit);
+		CNIC_WR(dev, BNX2_CTX_HOST_PAGE_TBL_DATA1,
+			(u64) cp->ctx_arr[i].mapping >> 32);
+		CNIC_WR(dev, BNX2_CTX_HOST_PAGE_TBL_CTRL, idx |
+			BNX2_CTX_HOST_PAGE_TBL_CTRL_WRITE_REQ);
+		for (j = 0; j < 10; j++) {
+
+			val = CNIC_RD(dev, BNX2_CTX_HOST_PAGE_TBL_CTRL);
+			if (!(val & BNX2_CTX_HOST_PAGE_TBL_CTRL_WRITE_REQ))
+				break;
+			udelay(5);
+		}
+		if (val & BNX2_CTX_HOST_PAGE_TBL_CTRL_WRITE_REQ) {
+			ret = -EBUSY;
+			break;
+		}
+	}
+	return ret;
+}
+
+static void cnic_free_irq(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+
+	if (ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) {
+		cp->disable_int_sync(dev);
+		tasklet_disable(&cp->cnic_irq_task);
+		free_irq(ethdev->irq_arr[0].vector, dev);
+	}
+}
+
+static int cnic_init_bnx2_irq(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+
+	if (ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) {
+		int err, i = 0;
+		int sblk_num = cp->status_blk_num;
+		u32 base = ((sblk_num - 1) * BNX2_HC_SB_CONFIG_SIZE) +
+			   BNX2_HC_SB_CONFIG_1;
+
+		CNIC_WR(dev, base, BNX2_HC_SB_CONFIG_1_ONE_SHOT);
+
+		CNIC_WR(dev, base + BNX2_HC_COMP_PROD_TRIP_OFF, (2 << 16) | 8);
+		CNIC_WR(dev, base + BNX2_HC_COM_TICKS_OFF, (64 << 16) | 220);
+		CNIC_WR(dev, base + BNX2_HC_CMD_TICKS_OFF, (64 << 16) | 220);
+
+		cp->bnx2_status_blk = cp->status_blk;
+		cp->last_status_idx = cp->bnx2_status_blk->status_idx;
+		tasklet_init(&cp->cnic_irq_task, &cnic_service_bnx2_msix,
+			     (unsigned long) dev);
+		err = request_irq(ethdev->irq_arr[0].vector, cnic_irq, 0,
+				  "cnic", dev);
+		if (err) {
+			tasklet_disable(&cp->cnic_irq_task);
+			return err;
+		}
+		while (cp->bnx2_status_blk->status_completion_producer_index &&
+		       i < 10) {
+			CNIC_WR(dev, BNX2_HC_COALESCE_NOW,
+				1 << (11 + sblk_num));
+			udelay(10);
+			i++;
+			barrier();
+		}
+		if (cp->bnx2_status_blk->status_completion_producer_index) {
+			cnic_free_irq(dev);
+			goto failed;
+		}
+
+	} else {
+		struct status_block *sblk = cp->status_blk;
+		u32 hc_cmd = CNIC_RD(dev, BNX2_HC_COMMAND);
+		int i = 0;
+
+		while (sblk->status_completion_producer_index && i < 10) {
+			CNIC_WR(dev, BNX2_HC_COMMAND,
+				hc_cmd | BNX2_HC_COMMAND_COAL_NOW_WO_INT);
+			udelay(10);
+			i++;
+			barrier();
+		}
+		if (sblk->status_completion_producer_index)
+			goto failed;
+
+	}
+	return 0;
+
+failed:
+	printk(KERN_ERR PFX "%s: " "KCQ index not resetting to 0.\n",
+	       dev->netdev->name);
+	return -EBUSY;
+}
+
+static void cnic_enable_bnx2_int(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+
+	if (!(ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX))
+		return;
+
+	CNIC_WR(dev, BNX2_PCICFG_INT_ACK_CMD, cp->int_num |
+		BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID | cp->last_status_idx);
+}
+
+static void cnic_disable_bnx2_int_sync(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+
+	if (!(ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX))
+		return;
+
+	CNIC_WR(dev, BNX2_PCICFG_INT_ACK_CMD, cp->int_num |
+		BNX2_PCICFG_INT_ACK_CMD_MASK_INT);
+	CNIC_RD(dev, BNX2_PCICFG_INT_ACK_CMD);
+	synchronize_irq(ethdev->irq_arr[0].vector);
+}
+
+static void cnic_init_bnx2_tx_ring(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+	u32 cid_addr, tx_cid, sb_id;
+	u32 val, offset0, offset1, offset2, offset3;
+	int i;
+	struct tx_bd *txbd;
+	dma_addr_t buf_map;
+	struct status_block *s_blk = cp->status_blk;
+
+	sb_id = cp->status_blk_num;
+	tx_cid = 20;
+	cnic_init_context(dev, tx_cid);
+	cnic_init_context(dev, tx_cid + 1);
+	cp->tx_cons_ptr = &s_blk->status_tx_quick_consumer_index2;
+	if (ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) {
+		struct status_block_msix *sblk = cp->status_blk;
+
+		tx_cid = TX_TSS_CID + sb_id - 1;
+		cnic_init_context(dev, tx_cid);
+		CNIC_WR(dev, BNX2_TSCH_TSS_CFG, (sb_id << 24) |
+			(TX_TSS_CID << 7));
+		cp->tx_cons_ptr = &sblk->status_tx_quick_consumer_index;
+	}
+	cp->tx_cons = *cp->tx_cons_ptr;
+
+	cid_addr = GET_CID_ADDR(tx_cid);
+	if (CHIP_NUM(cp) == CHIP_NUM_5709) {
+		u32 cid_addr2 = GET_CID_ADDR(tx_cid + 4) + 0x40;
+
+		for (i = 0; i < PHY_CTX_SIZE; i += 4)
+			cnic_ctx_wr(dev, cid_addr2, i, 0);
+
+		offset0 = BNX2_L2CTX_TYPE_XI;
+		offset1 = BNX2_L2CTX_CMD_TYPE_XI;
+		offset2 = BNX2_L2CTX_TBDR_BHADDR_HI_XI;
+		offset3 = BNX2_L2CTX_TBDR_BHADDR_LO_XI;
+	} else {
+		offset0 = BNX2_L2CTX_TYPE;
+		offset1 = BNX2_L2CTX_CMD_TYPE;
+		offset2 = BNX2_L2CTX_TBDR_BHADDR_HI;
+		offset3 = BNX2_L2CTX_TBDR_BHADDR_LO;
+	}
+	val = BNX2_L2CTX_TYPE_TYPE_L2 | BNX2_L2CTX_TYPE_SIZE_L2;
+	cnic_ctx_wr(dev, cid_addr, offset0, val);
+
+	val = BNX2_L2CTX_CMD_TYPE_TYPE_L2 | (8 << 16);
+	cnic_ctx_wr(dev, cid_addr, offset1, val);
+
+	txbd = (struct tx_bd *) cp->l2_ring;
+
+	buf_map = cp->l2_buf_map;
+	for (i = 0; i < MAX_TX_DESC_CNT; i++, txbd++) {
+		txbd->tx_bd_haddr_hi = (u64) buf_map >> 32;
+		txbd->tx_bd_haddr_lo = (u64) buf_map & 0xffffffff;
+	}
+	val = (u64) cp->l2_ring_map >> 32;
+	cnic_ctx_wr(dev, cid_addr, offset2, val);
+	txbd->tx_bd_haddr_hi = val;
+
+	val = (u64) cp->l2_ring_map & 0xffffffff;
+	cnic_ctx_wr(dev, cid_addr, offset3, val);
+	txbd->tx_bd_haddr_lo = val;
+}
+
+static void cnic_init_bnx2_rx_ring(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+	u32 cid_addr, sb_id, val, coal_reg, coal_val;
+	int i;
+	struct rx_bd *rxbd;
+	struct status_block *s_blk = cp->status_blk;
+
+	sb_id = cp->status_blk_num;
+	cnic_init_context(dev, 2);
+	cp->rx_cons_ptr = &s_blk->status_rx_quick_consumer_index2;
+	coal_reg = BNX2_HC_COMMAND;
+	coal_val = CNIC_RD(dev, coal_reg);
+	if (ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) {
+		struct status_block_msix *sblk = cp->status_blk;
+
+		cp->rx_cons_ptr = &sblk->status_rx_quick_consumer_index;
+		coal_reg = BNX2_HC_COALESCE_NOW;
+		coal_val = 1 << (11 + sb_id);
+	}
+	i = 0;
+	while (!(*cp->rx_cons_ptr != 0) && i < 10) {
+		CNIC_WR(dev, coal_reg, coal_val);
+		udelay(10);
+		i++;
+		barrier();
+	}
+	cp->rx_cons = *cp->rx_cons_ptr;
+
+	cid_addr = GET_CID_ADDR(2);
+	val = BNX2_L2CTX_CTX_TYPE_CTX_BD_CHN_TYPE_VALUE |
+	      BNX2_L2CTX_CTX_TYPE_SIZE_L2 | (0x02 << 8);
+	cnic_ctx_wr(dev, cid_addr, BNX2_L2CTX_CTX_TYPE, val);
+
+	if (sb_id == 0)
+		val = 2 << BNX2_L2CTX_STATUSB_NUM_SHIFT;
+	else
+		val = BNX2_L2CTX_STATUSB_NUM(sb_id);
+	cnic_ctx_wr(dev, cid_addr, BNX2_L2CTX_HOST_BDIDX, val);
+
+	rxbd = (struct rx_bd *) (cp->l2_ring + BCM_PAGE_SIZE);
+	for (i = 0; i < MAX_RX_DESC_CNT; i++, rxbd++) {
+		dma_addr_t buf_map;
+		int n = (i % cp->l2_rx_ring_size) + 1;
+
+		buf_map = cp->l2_buf_map + (n * cp->l2_single_buf_size);
+		rxbd->rx_bd_len = cp->l2_single_buf_size;
+		rxbd->rx_bd_flags = RX_BD_FLAGS_START | RX_BD_FLAGS_END;
+		rxbd->rx_bd_haddr_hi = (u64) buf_map >> 32;
+		rxbd->rx_bd_haddr_lo = (u64) buf_map & 0xffffffff;
+	}
+	val = (u64) (cp->l2_ring_map + BCM_PAGE_SIZE) >> 32;
+	cnic_ctx_wr(dev, cid_addr, BNX2_L2CTX_NX_BDHADDR_HI, val);
+	rxbd->rx_bd_haddr_hi = val;
+
+	val = (u64) (cp->l2_ring_map + BCM_PAGE_SIZE) & 0xffffffff;
+	cnic_ctx_wr(dev, cid_addr, BNX2_L2CTX_NX_BDHADDR_LO, val);
+	rxbd->rx_bd_haddr_lo = val;
+
+	val = cnic_reg_rd_ind(dev, BNX2_RXP_SCRATCH_RXP_FLOOD);
+	cnic_reg_wr_ind(dev, BNX2_RXP_SCRATCH_RXP_FLOOD, val | (1 << 2));
+}
+
+static void cnic_shutdown_bnx2_rx_ring(struct cnic_dev *dev)
+{
+	struct kwqe *wqes[1], l2kwqe;
+
+	memset(&l2kwqe, 0, sizeof(l2kwqe));
+	wqes[0] = &l2kwqe;
+	l2kwqe.kwqe_op_flag = (L2_LAYER_CODE << KWQE_FLAGS_LAYER_SHIFT) |
+			      (L2_KWQE_OPCODE_VALUE_FLUSH <<
+			       KWQE_OPCODE_SHIFT) | 2;
+	dev->submit_kwqes(dev, wqes, 1);
+}
+
+static void cnic_set_bnx2_mac(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	u32 val;
+
+	val = cp->func << 2;
+
+	cp->shmem_base = cnic_reg_rd_ind(dev, BNX2_SHM_HDR_ADDR_0 + val);
+
+	val = cnic_reg_rd_ind(dev, cp->shmem_base +
+			      BNX2_PORT_HW_CFG_ISCSI_MAC_UPPER);
+	dev->mac_addr[0] = (u8) (val >> 8);
+	dev->mac_addr[1] = (u8) val;
+
+	CNIC_WR(dev, BNX2_EMAC_MAC_MATCH4, val);
+
+	val = cnic_reg_rd_ind(dev, cp->shmem_base +
+			      BNX2_PORT_HW_CFG_ISCSI_MAC_LOWER);
+	dev->mac_addr[2] = (u8) (val >> 24);
+	dev->mac_addr[3] = (u8) (val >> 16);
+	dev->mac_addr[4] = (u8) (val >> 8);
+	dev->mac_addr[5] = (u8) val;
+
+	CNIC_WR(dev, BNX2_EMAC_MAC_MATCH5, val);
+
+	val = 4 | BNX2_RPM_SORT_USER2_BC_EN;
+	if (CHIP_NUM(cp) != CHIP_NUM_5709)
+		val |= BNX2_RPM_SORT_USER2_PROM_VLAN;
+
+	CNIC_WR(dev, BNX2_RPM_SORT_USER2, 0x0);
+	CNIC_WR(dev, BNX2_RPM_SORT_USER2, val);
+	CNIC_WR(dev, BNX2_RPM_SORT_USER2, val | BNX2_RPM_SORT_USER2_ENA);
+}
+
+static int cnic_start_bnx2_hw(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+	struct status_block *sblk = cp->status_blk;
+	u32 val;
+	int err;
+
+	cnic_set_bnx2_mac(dev);
+
+	val = CNIC_RD(dev, BNX2_MQ_CONFIG);
+	val &= ~BNX2_MQ_CONFIG_KNL_BYP_BLK_SIZE;
+	if (BCM_PAGE_BITS > 12)
+		val |= (12 - 8)  << 4;
+	else
+		val |= (BCM_PAGE_BITS - 8)  << 4;
+
+	CNIC_WR(dev, BNX2_MQ_CONFIG, val);
+
+	CNIC_WR(dev, BNX2_HC_COMP_PROD_TRIP, (2 << 16) | 8);
+	CNIC_WR(dev, BNX2_HC_COM_TICKS, (64 << 16) | 220);
+	CNIC_WR(dev, BNX2_HC_CMD_TICKS, (64 << 16) | 220);
+
+	err = cnic_setup_5709_context(dev, 1);
+	if (err)
+		return err;
+
+	cnic_init_context(dev, KWQ_CID);
+	cnic_init_context(dev, KCQ_CID);
+
+	cp->kwq_cid_addr = GET_CID_ADDR(KWQ_CID);
+	cp->kwq_io_addr = MB_GET_CID_ADDR(KWQ_CID) + L5_KRNLQ_HOST_QIDX;
+
+	cp->max_kwq_idx = MAX_KWQ_IDX;
+	cp->kwq_prod_idx = 0;
+	cp->kwq_con_idx = 0;
+	cp->cnic_local_flags |= CNIC_LCL_FL_KWQ_INIT;
+
+	if (CHIP_NUM(cp) == CHIP_NUM_5706 || CHIP_NUM(cp) == CHIP_NUM_5708)
+		cp->kwq_con_idx_ptr = &sblk->status_rx_quick_consumer_index15;
+	else
+		cp->kwq_con_idx_ptr = &sblk->status_cmd_consumer_index;
+
+	/* Initialize the kernel work queue context. */
+	val = KRNLQ_TYPE_TYPE_KRNLQ | KRNLQ_SIZE_TYPE_SIZE |
+	      (BCM_PAGE_BITS - 8) | KRNLQ_FLAGS_QE_SELF_SEQ;
+	cnic_ctx_wr(dev, cp->kwq_cid_addr, L5_KRNLQ_TYPE, val);
+
+	val = (BCM_PAGE_SIZE / sizeof(struct kwqe) - 1) << 16;
+	cnic_ctx_wr(dev, cp->kwq_cid_addr, L5_KRNLQ_QE_SELF_SEQ_MAX, val);
+
+	val = ((BCM_PAGE_SIZE / sizeof(struct kwqe)) << 16) | KWQ_PAGE_CNT;
+	cnic_ctx_wr(dev, cp->kwq_cid_addr, L5_KRNLQ_PGTBL_NPAGES, val);
+
+	val = (u32) ((u64) cp->kwq_info.pgtbl_map >> 32);
+	cnic_ctx_wr(dev, cp->kwq_cid_addr, L5_KRNLQ_PGTBL_HADDR_HI, val);
+
+	val = (u32) cp->kwq_info.pgtbl_map;
+	cnic_ctx_wr(dev, cp->kwq_cid_addr, L5_KRNLQ_PGTBL_HADDR_LO, val);
+
+	cp->kcq_cid_addr = GET_CID_ADDR(KCQ_CID);
+	cp->kcq_io_addr = MB_GET_CID_ADDR(KCQ_CID) + L5_KRNLQ_HOST_QIDX;
+
+	cp->kcq_prod_idx = 0;
+
+	/* Initialize the kernel complete queue context. */
+	val = KRNLQ_TYPE_TYPE_KRNLQ | KRNLQ_SIZE_TYPE_SIZE |
+	      (BCM_PAGE_BITS - 8) | KRNLQ_FLAGS_QE_SELF_SEQ;
+	cnic_ctx_wr(dev, cp->kcq_cid_addr, L5_KRNLQ_TYPE, val);
+
+	val = (BCM_PAGE_SIZE / sizeof(struct kcqe) - 1) << 16;
+	cnic_ctx_wr(dev, cp->kcq_cid_addr, L5_KRNLQ_QE_SELF_SEQ_MAX, val);
+
+	val = ((BCM_PAGE_SIZE / sizeof(struct kcqe)) << 16) | KCQ_PAGE_CNT;
+	cnic_ctx_wr(dev, cp->kcq_cid_addr, L5_KRNLQ_PGTBL_NPAGES, val);
+
+	val = (u32) ((u64) cp->kcq_info.pgtbl_map >> 32);
+	cnic_ctx_wr(dev, cp->kcq_cid_addr, L5_KRNLQ_PGTBL_HADDR_HI, val);
+
+	val = (u32) cp->kcq_info.pgtbl_map;
+	cnic_ctx_wr(dev, cp->kcq_cid_addr, L5_KRNLQ_PGTBL_HADDR_LO, val);
+
+	cp->int_num = 0;
+	if (ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) {
+		u32 sb_id = cp->status_blk_num;
+		u32 sb = BNX2_L2CTX_STATUSB_NUM(sb_id);
+
+		cp->int_num = sb_id << BNX2_PCICFG_INT_ACK_CMD_INT_NUM_SHIFT;
+		cnic_ctx_wr(dev, cp->kwq_cid_addr, L5_KRNLQ_HOST_QIDX, sb);
+		cnic_ctx_wr(dev, cp->kcq_cid_addr, L5_KRNLQ_HOST_QIDX, sb);
+	}
+
+	/* Enable Commnad Scheduler notification when we write to the
+	 * host producer index of the kernel contexts. */
+	CNIC_WR(dev, BNX2_MQ_KNL_CMD_MASK1, 2);
+
+	/* Enable Command Scheduler notification when we write to either
+	 * the Send Queue or Receive Queue producer indexes of the kernel
+	 * bypass contexts. */
+	CNIC_WR(dev, BNX2_MQ_KNL_BYP_CMD_MASK1, 7);
+	CNIC_WR(dev, BNX2_MQ_KNL_BYP_WRITE_MASK1, 7);
+
+	/* Notify COM when the driver post an application buffer. */
+	CNIC_WR(dev, BNX2_MQ_KNL_RX_V2P_MASK2, 0x2000);
+
+	/* Set the CP and COM doorbells.  These two processors polls the
+	 * doorbell for a non zero value before running.  This must be done
+	 * after setting up the kernel queue contexts. */
+	cnic_reg_wr_ind(dev, BNX2_CP_SCRATCH + 0x20, 1);
+	cnic_reg_wr_ind(dev, BNX2_COM_SCRATCH + 0x20, 1);
+
+	cnic_init_bnx2_tx_ring(dev);
+	cnic_init_bnx2_rx_ring(dev);
+
+	err = cnic_init_bnx2_irq(dev);
+	if (err) {
+		printk(KERN_ERR PFX "%s: cnic_init_irq failed\n",
+		       dev->netdev->name);
+		cnic_reg_wr_ind(dev, BNX2_CP_SCRATCH + 0x20, 0);
+		cnic_reg_wr_ind(dev, BNX2_COM_SCRATCH + 0x20, 0);
+		return err;
+	}
+
+	return 0;
+}
+
+static int cnic_start_hw(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+	int err;
+
+	if (test_bit(CNIC_F_CNIC_UP, &dev->flags))
+		return -EALREADY;
+
+	err = ethdev->drv_register_cnic(dev->netdev, cp->cnic_ops, dev);
+	if (err) {
+		printk(KERN_ERR PFX "%s: register_cnic failed\n",
+		       dev->netdev->name);
+		goto err2;
+	}
+
+	dev->regview = ethdev->io_base;
+	cp->chip_id = ethdev->chip_id;
+	pci_dev_get(dev->pcidev);
+	cp->func = PCI_FUNC(dev->pcidev->devfn);
+	cp->status_blk = ethdev->irq_arr[0].status_blk;
+	cp->status_blk_num = ethdev->irq_arr[0].status_blk_num;
+
+	err = cp->alloc_resc(dev);
+	if (err) {
+		printk(KERN_ERR PFX "%s: allocate resource failure\n",
+		       dev->netdev->name);
+		goto err1;
+	}
+
+	err = cp->start_hw(dev);
+	if (err)
+		goto err1;
+
+	err = cnic_cm_open(dev);
+	if (err)
+		goto err1;
+
+	set_bit(CNIC_F_CNIC_UP, &dev->flags);
+
+	cp->enable_int(dev);
+
+	return 0;
+
+err1:
+	ethdev->drv_unregister_cnic(dev->netdev);
+	cp->free_resc(dev);
+	pci_dev_put(dev->pcidev);
+err2:
+	return err;
+}
+
+static void cnic_stop_bnx2_hw(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+
+	cnic_disable_bnx2_int_sync(dev);
+
+	cnic_reg_wr_ind(dev, BNX2_CP_SCRATCH + 0x20, 0);
+	cnic_reg_wr_ind(dev, BNX2_COM_SCRATCH + 0x20, 0);
+
+	cnic_init_context(dev, KWQ_CID);
+	cnic_init_context(dev, KCQ_CID);
+
+	cnic_setup_5709_context(dev, 0);
+	cnic_free_irq(dev);
+
+	ethdev->drv_unregister_cnic(dev->netdev);
+
+	cnic_free_resc(dev);
+}
+
+static void cnic_stop_hw(struct cnic_dev *dev)
+{
+	if (test_bit(CNIC_F_CNIC_UP, &dev->flags)) {
+		struct cnic_local *cp = dev->cnic_priv;
+
+		clear_bit(CNIC_F_CNIC_UP, &dev->flags);
+		rcu_assign_pointer(cp->ulp_ops[CNIC_ULP_L4], NULL);
+		synchronize_rcu();
+		cnic_cm_shutdown(dev);
+		cp->stop_hw(dev);
+		pci_dev_put(dev->pcidev);
+	}
+}
+
+static void cnic_free_dev(struct cnic_dev *dev)
+{
+	int i = 0;
+
+	while ((atomic_read(&dev->ref_count) != 0) && i < 10) {
+		msleep(100);
+		i++;
+	}
+	if (atomic_read(&dev->ref_count) != 0)
+		printk(KERN_ERR PFX "%s: Failed waiting for ref count to go"
+				    " to zero.\n", dev->netdev->name);
+
+	printk(KERN_INFO PFX "Removed CNIC device: %s\n", dev->netdev->name);
+	dev_put(dev->netdev);
+	kfree(dev);
+}
+
+static struct cnic_dev *cnic_alloc_dev(struct net_device *dev,
+				       struct pci_dev *pdev)
+{
+	struct cnic_dev *cdev;
+	struct cnic_local *cp;
+	int alloc_size;
+
+	alloc_size = sizeof(struct cnic_dev) + sizeof(struct cnic_local);
+
+	cdev = kzalloc(alloc_size , GFP_KERNEL);
+	if (cdev == NULL) {
+		printk(KERN_ERR PFX "%s: allocate dev struct failure\n",
+		       dev->name);
+		return NULL;
+	}
+
+	cdev->netdev = dev;
+	cdev->cnic_priv = (char *)cdev + sizeof(struct cnic_dev);
+	cdev->register_device = cnic_register_device;
+	cdev->unregister_device = cnic_unregister_device;
+	cdev->iscsi_nl_msg_recv = cnic_iscsi_nl_msg_recv;
+
+	cp = cdev->cnic_priv;
+	cp->dev = cdev;
+	cp->uio_dev = -1;
+	cp->l2_single_buf_size = 0x400;
+	cp->l2_rx_ring_size = 3;
+
+	spin_lock_init(&cp->cnic_ulp_lock);
+
+	printk(KERN_INFO PFX "Added CNIC device: %s\n", dev->name);
+
+	return cdev;
+}
+
+static struct cnic_dev *init_bnx2_cnic(struct net_device *dev)
+{
+	struct pci_dev *pdev;
+	struct cnic_dev *cdev;
+	struct cnic_local *cp;
+	struct cnic_eth_dev *ethdev = NULL;
+	struct cnic_eth_dev *(*probe)(void *) = NULL;
+
+	probe = __symbol_get("bnx2_cnic_probe");
+	if (probe) {
+		ethdev = (*probe)(dev);
+		symbol_put_addr(probe);
+	}
+	if (!ethdev)
+		return NULL;
+
+	pdev = ethdev->pdev;
+	if (!pdev)
+		return NULL;
+
+	dev_hold(dev);
+	pci_dev_get(pdev);
+	if (pdev->device == PCI_DEVICE_ID_NX2_5709 ||
+	    pdev->device == PCI_DEVICE_ID_NX2_5709S) {
+		u8 rev;
+
+		pci_read_config_byte(pdev, PCI_REVISION_ID, &rev);
+		if (rev < 0x10) {
+			pci_dev_put(pdev);
+			goto cnic_err;
+		}
+	}
+	pci_dev_put(pdev);
+
+	cdev = cnic_alloc_dev(dev, pdev);
+	if (cdev == NULL)
+		goto cnic_err;
+
+	set_bit(CNIC_F_BNX2_CLASS, &cdev->flags);
+	cdev->submit_kwqes = cnic_submit_bnx2_kwqes;
+
+	cp = cdev->cnic_priv;
+	cp->ethdev = ethdev;
+	cdev->pcidev = pdev;
+
+	cp->cnic_ops = &cnic_bnx2_ops;
+	cp->start_hw = cnic_start_bnx2_hw;
+	cp->stop_hw = cnic_stop_bnx2_hw;
+	cp->setup_pgtbl = cnic_setup_page_tbl;
+	cp->alloc_resc = cnic_alloc_bnx2_resc;
+	cp->free_resc = cnic_free_resc;
+	cp->start_cm = cnic_cm_init_bnx2_hw;
+	cp->stop_cm = cnic_cm_stop_bnx2_hw;
+	cp->enable_int = cnic_enable_bnx2_int;
+	cp->disable_int_sync = cnic_disable_bnx2_int_sync;
+	cp->close_conn = cnic_close_bnx2_conn;
+	cp->next_idx = cnic_bnx2_next_idx;
+	cp->hw_idx = cnic_bnx2_hw_idx;
+	return cdev;
+
+cnic_err:
+	dev_put(dev);
+	return NULL;
+}
+
+static struct cnic_dev *is_cnic_dev(struct net_device *dev)
+{
+	struct ethtool_drvinfo drvinfo;
+	struct cnic_dev *cdev = NULL;
+
+	if (dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) {
+		memset(&drvinfo, 0, sizeof(drvinfo));
+		dev->ethtool_ops->get_drvinfo(dev, &drvinfo);
+
+		if (!strcmp(drvinfo.driver, "bnx2"))
+			cdev = init_bnx2_cnic(dev);
+		if (cdev) {
+			write_lock(&cnic_dev_lock);
+			list_add(&cdev->list, &cnic_dev_list);
+			write_unlock(&cnic_dev_lock);
+		}
+	}
+	return cdev;
+}
+
+/**
+ * netdev event handler
+ */
+static int cnic_netdev_event(struct notifier_block *this, unsigned long event,
+							 void *ptr)
+{
+	struct net_device *netdev = ptr;
+	struct cnic_dev *dev;
+	int if_type;
+	int new_dev = 0;
+
+	dev = cnic_from_netdev(netdev);
+
+	if (!dev && (event == NETDEV_REGISTER || event == NETDEV_UP)) {
+		/* Check for the hot-plug device */
+		dev = is_cnic_dev(netdev);
+		if (dev) {
+			new_dev = 1;
+			cnic_hold(dev);
+		}
+	}
+	if (dev) {
+		struct cnic_local *cp = dev->cnic_priv;
+
+		if (new_dev)
+			cnic_ulp_init(dev);
+		else if (event == NETDEV_UNREGISTER)
+			cnic_ulp_exit(dev);
+		else if (event == NETDEV_UP) {
+			mutex_lock(&cnic_lock);
+			if (!cnic_start_hw(dev))
+				cnic_ulp_start(dev);
+			mutex_unlock(&cnic_lock);
+		}
+
+		rcu_read_lock();
+		for (if_type = 0; if_type < MAX_CNIC_ULP_TYPE; if_type++) {
+			struct cnic_ulp_ops *ulp_ops;
+			void *ctx;
+
+			ulp_ops = rcu_dereference(cp->ulp_ops[if_type]);
+			if (!ulp_ops || !ulp_ops->indicate_netevent)
+				continue;
+
+			ctx = cp->ulp_handle[if_type];
+
+			ulp_ops->indicate_netevent(ctx, event);
+		}
+		rcu_read_unlock();
+
+		if (event == NETDEV_GOING_DOWN) {
+			mutex_lock(&cnic_lock);
+			cnic_ulp_stop(dev);
+			cnic_stop_hw(dev);
+			mutex_unlock(&cnic_lock);
+		} else if (event == NETDEV_UNREGISTER) {
+			write_lock(&cnic_dev_lock);
+			list_del_init(&dev->list);
+			write_unlock(&cnic_dev_lock);
+
+			cnic_put(dev);
+			cnic_free_dev(dev);
+			goto done;
+		}
+		cnic_put(dev);
+	}
+done:
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block cnic_netdev_notifier = {
+	.notifier_call = cnic_netdev_event
+};
+
+static void cnic_release(void)
+{
+	struct cnic_dev *dev;
+
+	while (!list_empty(&cnic_dev_list)) {
+		dev = list_entry(cnic_dev_list.next, struct cnic_dev, list);
+		if (test_bit(CNIC_F_CNIC_UP, &dev->flags)) {
+			cnic_ulp_stop(dev);
+			cnic_stop_hw(dev);
+		}
+
+		cnic_ulp_exit(dev);
+		list_del_init(&dev->list);
+		cnic_free_dev(dev);
+	}
+}
+
+static int __init cnic_init(void)
+{
+	int rc = 0;
+
+	printk(KERN_INFO "%s", version);
+
+	rc = register_netdevice_notifier(&cnic_netdev_notifier);
+	if (rc) {
+		cnic_release();
+		return rc;
+	}
+
+	return 0;
+}
+
+static void __exit cnic_exit(void)
+{
+	unregister_netdevice_notifier(&cnic_netdev_notifier);
+	cnic_release();
+	return;
+}
+
+module_init(cnic_init);
+module_exit(cnic_exit);
diff --git a/drivers/net/cnic.h b/drivers/net/cnic.h
new file mode 100644
index 0000000..5192d4a
--- /dev/null
+++ b/drivers/net/cnic.h
@@ -0,0 +1,299 @@
+/* cnic.h: Broadcom CNIC core network driver.
+ *
+ * Copyright (c) 2006-2009 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ */
+
+
+#ifndef CNIC_H
+#define CNIC_H
+
+#define KWQ_PAGE_CNT	4
+#define KCQ_PAGE_CNT	16
+
+#define KWQ_CID 		24
+#define KCQ_CID 		25
+
+/*
+ *	krnlq_context definition
+ */
+#define L5_KRNLQ_FLAGS	0x00000000
+#define L5_KRNLQ_SIZE	0x00000000
+#define L5_KRNLQ_TYPE	0x00000000
+#define KRNLQ_FLAGS_PG_SZ					(0xf<<0)
+#define KRNLQ_FLAGS_PG_SZ_256					(0<<0)
+#define KRNLQ_FLAGS_PG_SZ_512					(1<<0)
+#define KRNLQ_FLAGS_PG_SZ_1K					(2<<0)
+#define KRNLQ_FLAGS_PG_SZ_2K					(3<<0)
+#define KRNLQ_FLAGS_PG_SZ_4K					(4<<0)
+#define KRNLQ_FLAGS_PG_SZ_8K					(5<<0)
+#define KRNLQ_FLAGS_PG_SZ_16K					(6<<0)
+#define KRNLQ_FLAGS_PG_SZ_32K					(7<<0)
+#define KRNLQ_FLAGS_PG_SZ_64K					(8<<0)
+#define KRNLQ_FLAGS_PG_SZ_128K					(9<<0)
+#define KRNLQ_FLAGS_PG_SZ_256K					(10<<0)
+#define KRNLQ_FLAGS_PG_SZ_512K					(11<<0)
+#define KRNLQ_FLAGS_PG_SZ_1M					(12<<0)
+#define KRNLQ_FLAGS_PG_SZ_2M					(13<<0)
+#define KRNLQ_FLAGS_QE_SELF_SEQ					(1<<15)
+#define KRNLQ_SIZE_TYPE_SIZE	((((0x28 + 0x1f) & ~0x1f) / 0x20) << 16)
+#define KRNLQ_TYPE_TYPE						(0xf<<28)
+#define KRNLQ_TYPE_TYPE_EMPTY					(0<<28)
+#define KRNLQ_TYPE_TYPE_KRNLQ					(6<<28)
+
+#define L5_KRNLQ_HOST_QIDX		0x00000004
+#define L5_KRNLQ_HOST_FW_QIDX		0x00000008
+#define L5_KRNLQ_NX_QE_SELF_SEQ 	0x0000000c
+#define L5_KRNLQ_QE_SELF_SEQ_MAX	0x0000000c
+#define L5_KRNLQ_NX_QE_HADDR_HI 	0x00000010
+#define L5_KRNLQ_NX_QE_HADDR_LO 	0x00000014
+#define L5_KRNLQ_PGTBL_PGIDX		0x00000018
+#define L5_KRNLQ_NX_PG_QIDX 		0x00000018
+#define L5_KRNLQ_PGTBL_NPAGES		0x0000001c
+#define L5_KRNLQ_QIDX_INCR		0x0000001c
+#define L5_KRNLQ_PGTBL_HADDR_HI 	0x00000020
+#define L5_KRNLQ_PGTBL_HADDR_LO 	0x00000024
+
+#define BNX2_PG_CTX_MAP			0x1a0034
+#define BNX2_ISCSI_CTX_MAP		0x1a0074
+
+struct cnic_redirect_entry {
+	struct dst_entry *old_dst;
+	struct dst_entry *new_dst;
+};
+
+#define MAX_COMPLETED_KCQE	64
+
+#define MAX_CNIC_L5_CONTEXT	256
+
+#define MAX_CM_SK_TBL_SZ	MAX_CNIC_L5_CONTEXT
+
+#define MAX_ISCSI_TBL_SZ	256
+
+#define CNIC_LOCAL_PORT_MIN	60000
+#define CNIC_LOCAL_PORT_MAX	61000
+#define CNIC_LOCAL_PORT_RANGE	(CNIC_LOCAL_PORT_MAX - CNIC_LOCAL_PORT_MIN)
+
+#define KWQE_CNT (BCM_PAGE_SIZE / sizeof(struct kwqe))
+#define KCQE_CNT (BCM_PAGE_SIZE / sizeof(struct kcqe))
+#define MAX_KWQE_CNT (KWQE_CNT - 1)
+#define MAX_KCQE_CNT (KCQE_CNT - 1)
+
+#define MAX_KWQ_IDX	((KWQ_PAGE_CNT * KWQE_CNT) - 1)
+#define MAX_KCQ_IDX	((KCQ_PAGE_CNT * KCQE_CNT) - 1)
+
+#define KWQ_PG(x) (((x) & ~MAX_KWQE_CNT) >> (BCM_PAGE_BITS - 5))
+#define KWQ_IDX(x) ((x) & MAX_KWQE_CNT)
+
+#define KCQ_PG(x) (((x) & ~MAX_KCQE_CNT) >> (BCM_PAGE_BITS - 5))
+#define KCQ_IDX(x) ((x) & MAX_KCQE_CNT)
+
+#define BNX2X_NEXT_KCQE(x) (((x) & (MAX_KCQE_CNT - 1)) ==		\
+		(MAX_KCQE_CNT - 1)) ?					\
+		(x) + 2 : (x) + 1
+
+#define BNX2X_KWQ_DATA_PG(cp, x) ((x) / (cp)->kwq_16_data_pp)
+#define BNX2X_KWQ_DATA_IDX(cp, x) ((x) % (cp)->kwq_16_data_pp)
+#define BNX2X_KWQ_DATA(cp, x)						\
+	&(cp)->kwq_16_data[BNX2X_KWQ_DATA_PG(cp, x)][BNX2X_KWQ_DATA_IDX(cp, x)]
+
+#define DEF_IPID_COUNT		0xc001
+
+#define DEF_KA_TIMEOUT		10000
+#define DEF_KA_INTERVAL		300000
+#define DEF_KA_MAX_PROBE_COUNT	3
+#define DEF_TOS			0
+#define DEF_TTL			0xfe
+#define DEF_SND_SEQ_SCALE	0
+#define DEF_RCV_BUF		0xffff
+#define DEF_SND_BUF		0xffff
+#define DEF_SEED		0
+#define DEF_MAX_RT_TIME		500
+#define DEF_MAX_DA_COUNT	2
+#define DEF_SWS_TIMER		1000
+#define DEF_MAX_CWND		0xffff
+
+struct cnic_ctx {
+	u32		cid;
+	void		*ctx;
+	dma_addr_t	mapping;
+};
+
+#define BNX2_MAX_CID		0x2000
+
+struct cnic_dma {
+	int		num_pages;
+	void		**pg_arr;
+	dma_addr_t	*pg_map_arr;
+	int		pgtbl_size;
+	u32		*pgtbl;
+	dma_addr_t	pgtbl_map;
+};
+
+struct cnic_id_tbl {
+	spinlock_t	lock;
+	u32		start;
+	u32		max;
+	u32		next;
+	unsigned long	*table;
+};
+
+#define CNIC_KWQ16_DATA_SIZE	128
+
+struct kwqe_16_data {
+	u8	data[CNIC_KWQ16_DATA_SIZE];
+};
+
+struct cnic_iscsi {
+	struct cnic_dma		task_array_info;
+	struct cnic_dma		r2tq_info;
+	struct cnic_dma		hq_info;
+};
+
+struct cnic_context {
+	u32			cid;
+	struct kwqe_16_data	*kwqe_data;
+	dma_addr_t		kwqe_data_mapping;
+	wait_queue_head_t	waitq;
+	int			wait_cond;
+	unsigned long		timestamp;
+	u32			ctx_flags;
+#define	CTX_FL_OFFLD_START	0x00000001
+	u8			ulp_proto_id;
+	union {
+		struct cnic_iscsi	*iscsi;
+	} proto;
+};
+
+struct cnic_local {
+
+	spinlock_t cnic_ulp_lock;
+	void *ulp_handle[MAX_CNIC_ULP_TYPE];
+	unsigned long ulp_flags[MAX_CNIC_ULP_TYPE];
+#define ULP_F_INIT	0
+#define ULP_F_START	1
+	struct cnic_ulp_ops *ulp_ops[MAX_CNIC_ULP_TYPE];
+
+	/* protected by ulp_lock */
+	u32 cnic_local_flags;
+#define	CNIC_LCL_FL_KWQ_INIT	0x00000001
+
+	struct cnic_dev *dev;
+
+	struct cnic_eth_dev *ethdev;
+
+	void		*l2_ring;
+	dma_addr_t	l2_ring_map;
+	int		l2_ring_size;
+	int		l2_rx_ring_size;
+
+	void		*l2_buf;
+	dma_addr_t	l2_buf_map;
+	int		l2_buf_size;
+	int		l2_single_buf_size;
+
+	u16		*rx_cons_ptr;
+	u16		*tx_cons_ptr;
+	u16		rx_cons;
+	u16		tx_cons;
+
+	u32 kwq_cid_addr;
+	u32 kcq_cid_addr;
+
+	struct cnic_dma		kwq_info;
+	struct kwqe		**kwq;
+
+	struct cnic_dma		kwq_16_data_info;
+
+	u16		max_kwq_idx;
+
+	u16		kwq_prod_idx;
+	u32		kwq_io_addr;
+
+	u16		*kwq_con_idx_ptr;
+	u16		kwq_con_idx;
+
+	struct cnic_dma	kcq_info;
+	struct kcqe	**kcq;
+
+	u16		kcq_prod_idx;
+	u32		kcq_io_addr;
+
+	void				*status_blk;
+	struct status_block_msix	*bnx2_status_blk;
+	struct host_status_block	*bnx2x_status_blk;
+
+	u32				status_blk_num;
+	u32				int_num;
+	u32				last_status_idx;
+	struct tasklet_struct		cnic_irq_task;
+
+	struct kcqe		*completed_kcq[MAX_COMPLETED_KCQE];
+
+	struct cnic_sock	*csk_tbl;
+	struct cnic_id_tbl	csk_port_tbl;
+
+	struct cnic_dma		conn_buf_info;
+	struct cnic_dma		gbl_buf_info;
+
+	struct cnic_iscsi	*iscsi_tbl;
+	struct cnic_context	*ctx_tbl;
+	struct cnic_id_tbl	cid_tbl;
+	int			max_iscsi_conn;
+	atomic_t		iscsi_conn;
+
+	/* per connection parameters */
+	int			num_iscsi_tasks;
+	int			num_ccells;
+	int			task_array_size;
+	int			r2tq_size;
+	int			hq_size;
+	int			num_cqs;
+
+	struct cnic_ctx		*ctx_arr;
+	int			ctx_blks;
+	int			ctx_blk_size;
+	int			cids_per_blk;
+
+	u32			chip_id;
+	int			func;
+	u32			shmem_base;
+
+	u32			uio_dev;
+	struct uio_info		*cnic_uinfo;
+
+	struct cnic_ops		*cnic_ops;
+	int			(*start_hw)(struct cnic_dev *);
+	void			(*stop_hw)(struct cnic_dev *);
+	void			(*setup_pgtbl)(struct cnic_dev *,
+					       struct cnic_dma *);
+	int			(*alloc_resc)(struct cnic_dev *);
+	void			(*free_resc)(struct cnic_dev *);
+	int			(*start_cm)(struct cnic_dev *);
+	void			(*stop_cm)(struct cnic_dev *);
+	void			(*enable_int)(struct cnic_dev *);
+	void			(*disable_int_sync)(struct cnic_dev *);
+	void			(*ack_int)(struct cnic_dev *);
+	void			(*close_conn)(struct cnic_sock *, u32 opcode);
+	u16			(*next_idx)(u16);
+	u16			(*hw_idx)(u16);
+};
+
+struct bnx2x_bd_chain_next {
+	u32	addr_lo;
+	u32	addr_hi;
+	u8	reserved[8];
+};
+
+#define ISCSI_RAMROD_CMD_ID_UPDATE_CONN		(ISCSI_KCQE_OPCODE_UPDATE_CONN)
+#define ISCSI_RAMROD_CMD_ID_INIT		(ISCSI_KCQE_OPCODE_INIT)
+
+#define CDU_REGION_NUMBER_XCM_AG 2
+#define CDU_REGION_NUMBER_UCM_AG 4
+
+#endif
+
diff --git a/drivers/net/cnic_defs.h b/drivers/net/cnic_defs.h
new file mode 100644
index 0000000..cee80f6
--- /dev/null
+++ b/drivers/net/cnic_defs.h
@@ -0,0 +1,580 @@
+
+/* cnic.c: Broadcom CNIC core network driver.
+ *
+ * Copyright (c) 2006-2009 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ */
+
+#ifndef CNIC_DEFS_H
+#define CNIC_DEFS_H
+
+/* KWQ (kernel work queue) request op codes */
+#define L2_KWQE_OPCODE_VALUE_FLUSH                  (4)
+
+#define L4_KWQE_OPCODE_VALUE_CONNECT1               (50)
+#define L4_KWQE_OPCODE_VALUE_CONNECT2               (51)
+#define L4_KWQE_OPCODE_VALUE_CONNECT3               (52)
+#define L4_KWQE_OPCODE_VALUE_RESET                  (53)
+#define L4_KWQE_OPCODE_VALUE_CLOSE                  (54)
+#define L4_KWQE_OPCODE_VALUE_UPDATE_SECRET          (60)
+#define L4_KWQE_OPCODE_VALUE_INIT_ULP               (61)
+
+#define L4_KWQE_OPCODE_VALUE_OFFLOAD_PG             (1)
+#define L4_KWQE_OPCODE_VALUE_UPDATE_PG              (9)
+#define L4_KWQE_OPCODE_VALUE_UPLOAD_PG              (14)
+
+#define L5CM_RAMROD_CMD_ID_BASE			(0x80)
+#define L5CM_RAMROD_CMD_ID_TCP_CONNECT		(L5CM_RAMROD_CMD_ID_BASE + 3)
+#define L5CM_RAMROD_CMD_ID_CLOSE		(L5CM_RAMROD_CMD_ID_BASE + 12)
+#define L5CM_RAMROD_CMD_ID_ABORT		(L5CM_RAMROD_CMD_ID_BASE + 13)
+#define L5CM_RAMROD_CMD_ID_SEARCHER_DELETE	(L5CM_RAMROD_CMD_ID_BASE + 14)
+#define L5CM_RAMROD_CMD_ID_TERMINATE_OFFLOAD	(L5CM_RAMROD_CMD_ID_BASE + 15)
+
+/* KCQ (kernel completion queue) response op codes */
+#define L4_KCQE_OPCODE_VALUE_CLOSE_COMP             (53)
+#define L4_KCQE_OPCODE_VALUE_RESET_COMP             (54)
+#define L4_KCQE_OPCODE_VALUE_FW_TCP_UPDATE          (55)
+#define L4_KCQE_OPCODE_VALUE_CONNECT_COMPLETE       (56)
+#define L4_KCQE_OPCODE_VALUE_RESET_RECEIVED         (57)
+#define L4_KCQE_OPCODE_VALUE_CLOSE_RECEIVED         (58)
+#define L4_KCQE_OPCODE_VALUE_INIT_ULP               (61)
+
+#define L4_KCQE_OPCODE_VALUE_OFFLOAD_PG             (1)
+#define L4_KCQE_OPCODE_VALUE_UPDATE_PG              (9)
+#define L4_KCQE_OPCODE_VALUE_UPLOAD_PG              (14)
+
+/* KCQ (kernel completion queue) completion status */
+#define L4_KCQE_COMPLETION_STATUS_SUCCESS		    (0)
+#define L4_KCQE_COMPLETION_STATUS_TIMEOUT        (0x93)
+
+#define L4_LAYER_CODE (4)
+#define L2_LAYER_CODE (2)
+
+/*
+ * L4 KCQ CQE
+ */
+struct l4_kcq {
+	u32 cid;
+	u32 pg_cid;
+	u32 conn_id;
+	u32 pg_host_opaque;
+#if defined(__BIG_ENDIAN)
+	u16 status;
+	u16 reserved1;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved1;
+	u16 status;
+#endif
+	u32 reserved2[2];
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KCQ_RESERVED3 (0x7<<0)
+#define L4_KCQ_RESERVED3_SHIFT 0
+#define L4_KCQ_RAMROD_COMPLETION (0x1<<3) /* Everest only */
+#define L4_KCQ_RAMROD_COMPLETION_SHIFT 3
+#define L4_KCQ_LAYER_CODE (0x7<<4)
+#define L4_KCQ_LAYER_CODE_SHIFT 4
+#define L4_KCQ_RESERVED4 (0x1<<7)
+#define L4_KCQ_RESERVED4_SHIFT 7
+	u8 op_code;
+	u16 qe_self_seq;
+#elif defined(__LITTLE_ENDIAN)
+	u16 qe_self_seq;
+	u8 op_code;
+	u8 flags;
+#define L4_KCQ_RESERVED3 (0xF<<0)
+#define L4_KCQ_RESERVED3_SHIFT 0
+#define L4_KCQ_RAMROD_COMPLETION (0x1<<3) /* Everest only */
+#define L4_KCQ_RAMROD_COMPLETION_SHIFT 3
+#define L4_KCQ_LAYER_CODE (0x7<<4)
+#define L4_KCQ_LAYER_CODE_SHIFT 4
+#define L4_KCQ_RESERVED4 (0x1<<7)
+#define L4_KCQ_RESERVED4_SHIFT 7
+#endif
+};
+
+
+/*
+ * L4 KCQ CQE PG upload
+ */
+struct l4_kcq_upload_pg {
+	u32 pg_cid;
+#if defined(__BIG_ENDIAN)
+	u16 pg_status;
+	u16 pg_ipid_count;
+#elif defined(__LITTLE_ENDIAN)
+	u16 pg_ipid_count;
+	u16 pg_status;
+#endif
+	u32 reserved1[5];
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KCQ_UPLOAD_PG_RESERVED3 (0xF<<0)
+#define L4_KCQ_UPLOAD_PG_RESERVED3_SHIFT 0
+#define L4_KCQ_UPLOAD_PG_LAYER_CODE (0x7<<4)
+#define L4_KCQ_UPLOAD_PG_LAYER_CODE_SHIFT 4
+#define L4_KCQ_UPLOAD_PG_RESERVED4 (0x1<<7)
+#define L4_KCQ_UPLOAD_PG_RESERVED4_SHIFT 7
+	u8 op_code;
+	u16 qe_self_seq;
+#elif defined(__LITTLE_ENDIAN)
+	u16 qe_self_seq;
+	u8 op_code;
+	u8 flags;
+#define L4_KCQ_UPLOAD_PG_RESERVED3 (0xF<<0)
+#define L4_KCQ_UPLOAD_PG_RESERVED3_SHIFT 0
+#define L4_KCQ_UPLOAD_PG_LAYER_CODE (0x7<<4)
+#define L4_KCQ_UPLOAD_PG_LAYER_CODE_SHIFT 4
+#define L4_KCQ_UPLOAD_PG_RESERVED4 (0x1<<7)
+#define L4_KCQ_UPLOAD_PG_RESERVED4_SHIFT 7
+#endif
+};
+
+
+/*
+ * Gracefully close the connection request
+ */
+struct l4_kwq_close_req {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KWQ_CLOSE_REQ_RESERVED1 (0xF<<0)
+#define L4_KWQ_CLOSE_REQ_RESERVED1_SHIFT 0
+#define L4_KWQ_CLOSE_REQ_LAYER_CODE (0x7<<4)
+#define L4_KWQ_CLOSE_REQ_LAYER_CODE_SHIFT 4
+#define L4_KWQ_CLOSE_REQ_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_CLOSE_REQ_LINKED_WITH_NEXT_SHIFT 7
+	u8 op_code;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_code;
+	u8 flags;
+#define L4_KWQ_CLOSE_REQ_RESERVED1 (0xF<<0)
+#define L4_KWQ_CLOSE_REQ_RESERVED1_SHIFT 0
+#define L4_KWQ_CLOSE_REQ_LAYER_CODE (0x7<<4)
+#define L4_KWQ_CLOSE_REQ_LAYER_CODE_SHIFT 4
+#define L4_KWQ_CLOSE_REQ_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_CLOSE_REQ_LINKED_WITH_NEXT_SHIFT 7
+#endif
+	u32 cid;
+	u32 reserved2[6];
+};
+
+
+/*
+ * The first request to be passed in order to establish connection in option2
+ */
+struct l4_kwq_connect_req1 {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KWQ_CONNECT_REQ1_RESERVED1 (0xF<<0)
+#define L4_KWQ_CONNECT_REQ1_RESERVED1_SHIFT 0
+#define L4_KWQ_CONNECT_REQ1_LAYER_CODE (0x7<<4)
+#define L4_KWQ_CONNECT_REQ1_LAYER_CODE_SHIFT 4
+#define L4_KWQ_CONNECT_REQ1_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_CONNECT_REQ1_LINKED_WITH_NEXT_SHIFT 7
+	u8 op_code;
+	u8 reserved0;
+	u8 conn_flags;
+#define L4_KWQ_CONNECT_REQ1_IS_PG_HOST_OPAQUE (0x1<<0)
+#define L4_KWQ_CONNECT_REQ1_IS_PG_HOST_OPAQUE_SHIFT 0
+#define L4_KWQ_CONNECT_REQ1_IP_V6 (0x1<<1)
+#define L4_KWQ_CONNECT_REQ1_IP_V6_SHIFT 1
+#define L4_KWQ_CONNECT_REQ1_PASSIVE_FLAG (0x1<<2)
+#define L4_KWQ_CONNECT_REQ1_PASSIVE_FLAG_SHIFT 2
+#define L4_KWQ_CONNECT_REQ1_RSRV (0x1F<<3)
+#define L4_KWQ_CONNECT_REQ1_RSRV_SHIFT 3
+#elif defined(__LITTLE_ENDIAN)
+	u8 conn_flags;
+#define L4_KWQ_CONNECT_REQ1_IS_PG_HOST_OPAQUE (0x1<<0)
+#define L4_KWQ_CONNECT_REQ1_IS_PG_HOST_OPAQUE_SHIFT 0
+#define L4_KWQ_CONNECT_REQ1_IP_V6 (0x1<<1)
+#define L4_KWQ_CONNECT_REQ1_IP_V6_SHIFT 1
+#define L4_KWQ_CONNECT_REQ1_PASSIVE_FLAG (0x1<<2)
+#define L4_KWQ_CONNECT_REQ1_PASSIVE_FLAG_SHIFT 2
+#define L4_KWQ_CONNECT_REQ1_RSRV (0x1F<<3)
+#define L4_KWQ_CONNECT_REQ1_RSRV_SHIFT 3
+	u8 reserved0;
+	u8 op_code;
+	u8 flags;
+#define L4_KWQ_CONNECT_REQ1_RESERVED1 (0xF<<0)
+#define L4_KWQ_CONNECT_REQ1_RESERVED1_SHIFT 0
+#define L4_KWQ_CONNECT_REQ1_LAYER_CODE (0x7<<4)
+#define L4_KWQ_CONNECT_REQ1_LAYER_CODE_SHIFT 4
+#define L4_KWQ_CONNECT_REQ1_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_CONNECT_REQ1_LINKED_WITH_NEXT_SHIFT 7
+#endif
+	u32 cid;
+	u32 pg_cid;
+	u32 src_ip;
+	u32 dst_ip;
+#if defined(__BIG_ENDIAN)
+	u16 dst_port;
+	u16 src_port;
+#elif defined(__LITTLE_ENDIAN)
+	u16 src_port;
+	u16 dst_port;
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 rsrv1[3];
+	u8 tcp_flags;
+#define L4_KWQ_CONNECT_REQ1_NO_DELAY_ACK (0x1<<0)
+#define L4_KWQ_CONNECT_REQ1_NO_DELAY_ACK_SHIFT 0
+#define L4_KWQ_CONNECT_REQ1_KEEP_ALIVE (0x1<<1)
+#define L4_KWQ_CONNECT_REQ1_KEEP_ALIVE_SHIFT 1
+#define L4_KWQ_CONNECT_REQ1_NAGLE_ENABLE (0x1<<2)
+#define L4_KWQ_CONNECT_REQ1_NAGLE_ENABLE_SHIFT 2
+#define L4_KWQ_CONNECT_REQ1_TIME_STAMP (0x1<<3)
+#define L4_KWQ_CONNECT_REQ1_TIME_STAMP_SHIFT 3
+#define L4_KWQ_CONNECT_REQ1_SACK (0x1<<4)
+#define L4_KWQ_CONNECT_REQ1_SACK_SHIFT 4
+#define L4_KWQ_CONNECT_REQ1_SEG_SCALING (0x1<<5)
+#define L4_KWQ_CONNECT_REQ1_SEG_SCALING_SHIFT 5
+#define L4_KWQ_CONNECT_REQ1_RESERVED2 (0x3<<6)
+#define L4_KWQ_CONNECT_REQ1_RESERVED2_SHIFT 6
+#elif defined(__LITTLE_ENDIAN)
+	u8 tcp_flags;
+#define L4_KWQ_CONNECT_REQ1_NO_DELAY_ACK (0x1<<0)
+#define L4_KWQ_CONNECT_REQ1_NO_DELAY_ACK_SHIFT 0
+#define L4_KWQ_CONNECT_REQ1_KEEP_ALIVE (0x1<<1)
+#define L4_KWQ_CONNECT_REQ1_KEEP_ALIVE_SHIFT 1
+#define L4_KWQ_CONNECT_REQ1_NAGLE_ENABLE (0x1<<2)
+#define L4_KWQ_CONNECT_REQ1_NAGLE_ENABLE_SHIFT 2
+#define L4_KWQ_CONNECT_REQ1_TIME_STAMP (0x1<<3)
+#define L4_KWQ_CONNECT_REQ1_TIME_STAMP_SHIFT 3
+#define L4_KWQ_CONNECT_REQ1_SACK (0x1<<4)
+#define L4_KWQ_CONNECT_REQ1_SACK_SHIFT 4
+#define L4_KWQ_CONNECT_REQ1_SEG_SCALING (0x1<<5)
+#define L4_KWQ_CONNECT_REQ1_SEG_SCALING_SHIFT 5
+#define L4_KWQ_CONNECT_REQ1_RESERVED2 (0x3<<6)
+#define L4_KWQ_CONNECT_REQ1_RESERVED2_SHIFT 6
+	u8 rsrv1[3];
+#endif
+	u32 rsrv2;
+};
+
+
+/*
+ * The second ( optional )request to be passed in order to establish
+ * connection in option2 - for IPv6 only
+ */
+struct l4_kwq_connect_req2 {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KWQ_CONNECT_REQ2_RESERVED1 (0xF<<0)
+#define L4_KWQ_CONNECT_REQ2_RESERVED1_SHIFT 0
+#define L4_KWQ_CONNECT_REQ2_LAYER_CODE (0x7<<4)
+#define L4_KWQ_CONNECT_REQ2_LAYER_CODE_SHIFT 4
+#define L4_KWQ_CONNECT_REQ2_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_CONNECT_REQ2_LINKED_WITH_NEXT_SHIFT 7
+	u8 op_code;
+	u8 reserved0;
+	u8 rsrv;
+#elif defined(__LITTLE_ENDIAN)
+	u8 rsrv;
+	u8 reserved0;
+	u8 op_code;
+	u8 flags;
+#define L4_KWQ_CONNECT_REQ2_RESERVED1 (0xF<<0)
+#define L4_KWQ_CONNECT_REQ2_RESERVED1_SHIFT 0
+#define L4_KWQ_CONNECT_REQ2_LAYER_CODE (0x7<<4)
+#define L4_KWQ_CONNECT_REQ2_LAYER_CODE_SHIFT 4
+#define L4_KWQ_CONNECT_REQ2_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_CONNECT_REQ2_LINKED_WITH_NEXT_SHIFT 7
+#endif
+	u32 reserved2;
+	u32 src_ip_v6_2;
+	u32 src_ip_v6_3;
+	u32 src_ip_v6_4;
+	u32 dst_ip_v6_2;
+	u32 dst_ip_v6_3;
+	u32 dst_ip_v6_4;
+};
+
+
+/*
+ * The third ( and last )request to be passed in order to establish
+ * connection in option2
+ */
+struct l4_kwq_connect_req3 {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KWQ_CONNECT_REQ3_RESERVED1 (0xF<<0)
+#define L4_KWQ_CONNECT_REQ3_RESERVED1_SHIFT 0
+#define L4_KWQ_CONNECT_REQ3_LAYER_CODE (0x7<<4)
+#define L4_KWQ_CONNECT_REQ3_LAYER_CODE_SHIFT 4
+#define L4_KWQ_CONNECT_REQ3_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_CONNECT_REQ3_LINKED_WITH_NEXT_SHIFT 7
+	u8 op_code;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_code;
+	u8 flags;
+#define L4_KWQ_CONNECT_REQ3_RESERVED1 (0xF<<0)
+#define L4_KWQ_CONNECT_REQ3_RESERVED1_SHIFT 0
+#define L4_KWQ_CONNECT_REQ3_LAYER_CODE (0x7<<4)
+#define L4_KWQ_CONNECT_REQ3_LAYER_CODE_SHIFT 4
+#define L4_KWQ_CONNECT_REQ3_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_CONNECT_REQ3_LINKED_WITH_NEXT_SHIFT 7
+#endif
+	u32 ka_timeout;
+	u32 ka_interval ;
+#if defined(__BIG_ENDIAN)
+	u8 snd_seq_scale;
+	u8 ttl;
+	u8 tos;
+	u8 ka_max_probe_count;
+#elif defined(__LITTLE_ENDIAN)
+	u8 ka_max_probe_count;
+	u8 tos;
+	u8 ttl;
+	u8 snd_seq_scale;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 pmtu;
+	u16 mss;
+#elif defined(__LITTLE_ENDIAN)
+	u16 mss;
+	u16 pmtu;
+#endif
+	u32 rcv_buf;
+	u32 snd_buf;
+	u32 seed;
+};
+
+
+/*
+ * a KWQE request to offload a PG connection
+ */
+struct l4_kwq_offload_pg {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KWQ_OFFLOAD_PG_RESERVED1 (0xF<<0)
+#define L4_KWQ_OFFLOAD_PG_RESERVED1_SHIFT 0
+#define L4_KWQ_OFFLOAD_PG_LAYER_CODE (0x7<<4)
+#define L4_KWQ_OFFLOAD_PG_LAYER_CODE_SHIFT 4
+#define L4_KWQ_OFFLOAD_PG_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_OFFLOAD_PG_LINKED_WITH_NEXT_SHIFT 7
+	u8 op_code;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_code;
+	u8 flags;
+#define L4_KWQ_OFFLOAD_PG_RESERVED1 (0xF<<0)
+#define L4_KWQ_OFFLOAD_PG_RESERVED1_SHIFT 0
+#define L4_KWQ_OFFLOAD_PG_LAYER_CODE (0x7<<4)
+#define L4_KWQ_OFFLOAD_PG_LAYER_CODE_SHIFT 4
+#define L4_KWQ_OFFLOAD_PG_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_OFFLOAD_PG_LINKED_WITH_NEXT_SHIFT 7
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 l2hdr_nbytes;
+	u8 pg_flags;
+#define L4_KWQ_OFFLOAD_PG_SNAP_ENCAP (0x1<<0)
+#define L4_KWQ_OFFLOAD_PG_SNAP_ENCAP_SHIFT 0
+#define L4_KWQ_OFFLOAD_PG_VLAN_TAGGING (0x1<<1)
+#define L4_KWQ_OFFLOAD_PG_VLAN_TAGGING_SHIFT 1
+#define L4_KWQ_OFFLOAD_PG_RESERVED2 (0x3F<<2)
+#define L4_KWQ_OFFLOAD_PG_RESERVED2_SHIFT 2
+	u8 da0;
+	u8 da1;
+#elif defined(__LITTLE_ENDIAN)
+	u8 da1;
+	u8 da0;
+	u8 pg_flags;
+#define L4_KWQ_OFFLOAD_PG_SNAP_ENCAP (0x1<<0)
+#define L4_KWQ_OFFLOAD_PG_SNAP_ENCAP_SHIFT 0
+#define L4_KWQ_OFFLOAD_PG_VLAN_TAGGING (0x1<<1)
+#define L4_KWQ_OFFLOAD_PG_VLAN_TAGGING_SHIFT 1
+#define L4_KWQ_OFFLOAD_PG_RESERVED2 (0x3F<<2)
+#define L4_KWQ_OFFLOAD_PG_RESERVED2_SHIFT 2
+	u8 l2hdr_nbytes;
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 da2;
+	u8 da3;
+	u8 da4;
+	u8 da5;
+#elif defined(__LITTLE_ENDIAN)
+	u8 da5;
+	u8 da4;
+	u8 da3;
+	u8 da2;
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 sa0;
+	u8 sa1;
+	u8 sa2;
+	u8 sa3;
+#elif defined(__LITTLE_ENDIAN)
+	u8 sa3;
+	u8 sa2;
+	u8 sa1;
+	u8 sa0;
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 sa4;
+	u8 sa5;
+	u16 etype;
+#elif defined(__LITTLE_ENDIAN)
+	u16 etype;
+	u8 sa5;
+	u8 sa4;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 vlan_tag;
+	u16 ipid_start;
+#elif defined(__LITTLE_ENDIAN)
+	u16 ipid_start;
+	u16 vlan_tag;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 ipid_count;
+	u16 reserved3;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved3;
+	u16 ipid_count;
+#endif
+	u32 host_opaque;
+};
+
+
+/*
+ * Abortively close the connection request
+ */
+struct l4_kwq_reset_req {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KWQ_RESET_REQ_RESERVED1 (0xF<<0)
+#define L4_KWQ_RESET_REQ_RESERVED1_SHIFT 0
+#define L4_KWQ_RESET_REQ_LAYER_CODE (0x7<<4)
+#define L4_KWQ_RESET_REQ_LAYER_CODE_SHIFT 4
+#define L4_KWQ_RESET_REQ_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_RESET_REQ_LINKED_WITH_NEXT_SHIFT 7
+	u8 op_code;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_code;
+	u8 flags;
+#define L4_KWQ_RESET_REQ_RESERVED1 (0xF<<0)
+#define L4_KWQ_RESET_REQ_RESERVED1_SHIFT 0
+#define L4_KWQ_RESET_REQ_LAYER_CODE (0x7<<4)
+#define L4_KWQ_RESET_REQ_LAYER_CODE_SHIFT 4
+#define L4_KWQ_RESET_REQ_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_RESET_REQ_LINKED_WITH_NEXT_SHIFT 7
+#endif
+	u32 cid;
+	u32 reserved2[6];
+};
+
+
+/*
+ * a KWQE request to update a PG connection
+ */
+struct l4_kwq_update_pg {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KWQ_UPDATE_PG_RESERVED1 (0xF<<0)
+#define L4_KWQ_UPDATE_PG_RESERVED1_SHIFT 0
+#define L4_KWQ_UPDATE_PG_LAYER_CODE (0x7<<4)
+#define L4_KWQ_UPDATE_PG_LAYER_CODE_SHIFT 4
+#define L4_KWQ_UPDATE_PG_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_UPDATE_PG_LINKED_WITH_NEXT_SHIFT 7
+	u8 opcode;
+	u16 oper16;
+#elif defined(__LITTLE_ENDIAN)
+	u16 oper16;
+	u8 opcode;
+	u8 flags;
+#define L4_KWQ_UPDATE_PG_RESERVED1 (0xF<<0)
+#define L4_KWQ_UPDATE_PG_RESERVED1_SHIFT 0
+#define L4_KWQ_UPDATE_PG_LAYER_CODE (0x7<<4)
+#define L4_KWQ_UPDATE_PG_LAYER_CODE_SHIFT 4
+#define L4_KWQ_UPDATE_PG_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_UPDATE_PG_LINKED_WITH_NEXT_SHIFT 7
+#endif
+	u32 pg_cid;
+	u32 pg_host_opaque;
+#if defined(__BIG_ENDIAN)
+	u8 pg_valids;
+#define L4_KWQ_UPDATE_PG_VALIDS_IPID_COUNT (0x1<<0)
+#define L4_KWQ_UPDATE_PG_VALIDS_IPID_COUNT_SHIFT 0
+#define L4_KWQ_UPDATE_PG_VALIDS_DA (0x1<<1)
+#define L4_KWQ_UPDATE_PG_VALIDS_DA_SHIFT 1
+#define L4_KWQ_UPDATE_PG_RESERVERD2 (0x3F<<2)
+#define L4_KWQ_UPDATE_PG_RESERVERD2_SHIFT 2
+	u8 pg_unused_a;
+	u16 pg_ipid_count;
+#elif defined(__LITTLE_ENDIAN)
+	u16 pg_ipid_count;
+	u8 pg_unused_a;
+	u8 pg_valids;
+#define L4_KWQ_UPDATE_PG_VALIDS_IPID_COUNT (0x1<<0)
+#define L4_KWQ_UPDATE_PG_VALIDS_IPID_COUNT_SHIFT 0
+#define L4_KWQ_UPDATE_PG_VALIDS_DA (0x1<<1)
+#define L4_KWQ_UPDATE_PG_VALIDS_DA_SHIFT 1
+#define L4_KWQ_UPDATE_PG_RESERVERD2 (0x3F<<2)
+#define L4_KWQ_UPDATE_PG_RESERVERD2_SHIFT 2
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 reserverd3;
+	u8 da0;
+	u8 da1;
+#elif defined(__LITTLE_ENDIAN)
+	u8 da1;
+	u8 da0;
+	u16 reserverd3;
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 da2;
+	u8 da3;
+	u8 da4;
+	u8 da5;
+#elif defined(__LITTLE_ENDIAN)
+	u8 da5;
+	u8 da4;
+	u8 da3;
+	u8 da2;
+#endif
+	u32 reserved4;
+	u32 reserved5;
+};
+
+
+/*
+ * a KWQE request to upload a PG or L4 context
+ */
+struct l4_kwq_upload {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KWQ_UPLOAD_RESERVED1 (0xF<<0)
+#define L4_KWQ_UPLOAD_RESERVED1_SHIFT 0
+#define L4_KWQ_UPLOAD_LAYER_CODE (0x7<<4)
+#define L4_KWQ_UPLOAD_LAYER_CODE_SHIFT 4
+#define L4_KWQ_UPLOAD_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_UPLOAD_LINKED_WITH_NEXT_SHIFT 7
+	u8 opcode;
+	u16 oper16;
+#elif defined(__LITTLE_ENDIAN)
+	u16 oper16;
+	u8 opcode;
+	u8 flags;
+#define L4_KWQ_UPLOAD_RESERVED1 (0xF<<0)
+#define L4_KWQ_UPLOAD_RESERVED1_SHIFT 0
+#define L4_KWQ_UPLOAD_LAYER_CODE (0x7<<4)
+#define L4_KWQ_UPLOAD_LAYER_CODE_SHIFT 4
+#define L4_KWQ_UPLOAD_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_UPLOAD_LINKED_WITH_NEXT_SHIFT 7
+#endif
+	u32 cid;
+	u32 reserved2[6];
+};
+
+#endif /* CNIC_DEFS_H */
diff --git a/drivers/net/cnic_if.h b/drivers/net/cnic_if.h
new file mode 100644
index 0000000..0638096
--- /dev/null
+++ b/drivers/net/cnic_if.h
@@ -0,0 +1,299 @@
+/* cnic_if.h: Broadcom CNIC core network driver.
+ *
+ * Copyright (c) 2006 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ */
+
+
+#ifndef CNIC_IF_H
+#define CNIC_IF_H
+
+#define CNIC_MODULE_VERSION	"2.0.0"
+#define CNIC_MODULE_RELDATE	"May 21, 2009"
+
+#define CNIC_ULP_RDMA		0
+#define CNIC_ULP_ISCSI		1
+#define CNIC_ULP_L4		2
+#define MAX_CNIC_ULP_TYPE_EXT	2
+#define MAX_CNIC_ULP_TYPE	3
+
+struct kwqe {
+	u32 kwqe_op_flag;
+
+#define KWQE_OPCODE_MASK	0x00ff0000
+#define KWQE_OPCODE_SHIFT	16
+#define KWQE_FLAGS_LAYER_SHIFT	28
+#define KWQE_OPCODE(x)		((x & KWQE_OPCODE_MASK) >> KWQE_OPCODE_SHIFT)
+
+	u32 kwqe_info0;
+	u32 kwqe_info1;
+	u32 kwqe_info2;
+	u32 kwqe_info3;
+	u32 kwqe_info4;
+	u32 kwqe_info5;
+	u32 kwqe_info6;
+};
+
+struct kwqe_16 {
+	u32 kwqe_info0;
+	u32 kwqe_info1;
+	u32 kwqe_info2;
+	u32 kwqe_info3;
+};
+
+struct kcqe {
+	u32 kcqe_info0;
+	u32 kcqe_info1;
+	u32 kcqe_info2;
+	u32 kcqe_info3;
+	u32 kcqe_info4;
+	u32 kcqe_info5;
+	u32 kcqe_info6;
+	u32 kcqe_op_flag;
+		#define KCQE_RAMROD_COMPLETION		(0x1<<27) /* Everest */
+		#define KCQE_FLAGS_LAYER_MASK		(0x7<<28)
+		#define KCQE_FLAGS_LAYER_MASK_MISC	(0<<28)
+		#define KCQE_FLAGS_LAYER_MASK_L2	(2<<28)
+		#define KCQE_FLAGS_LAYER_MASK_L3	(3<<28)
+		#define KCQE_FLAGS_LAYER_MASK_L4	(4<<28)
+		#define KCQE_FLAGS_LAYER_MASK_L5_RDMA	(5<<28)
+		#define KCQE_FLAGS_LAYER_MASK_L5_ISCSI	(6<<28)
+		#define KCQE_FLAGS_NEXT 		(1<<31)
+		#define KCQE_FLAGS_OPCODE_MASK		(0xff<<16)
+		#define KCQE_FLAGS_OPCODE_SHIFT		(16)
+		#define KCQE_OPCODE(op)			\
+		(((op) & KCQE_FLAGS_OPCODE_MASK) >> KCQE_FLAGS_OPCODE_SHIFT)
+};
+
+#define MAX_CNIC_CTL_DATA	64
+#define MAX_DRV_CTL_DATA	64
+
+#define CNIC_CTL_STOP_CMD		1
+#define CNIC_CTL_START_CMD		2
+#define CNIC_CTL_COMPLETION_CMD		3
+
+#define DRV_CTL_IO_WR_CMD		0x101
+#define DRV_CTL_IO_RD_CMD		0x102
+#define DRV_CTL_CTX_WR_CMD		0x103
+#define DRV_CTL_CTXTBL_WR_CMD		0x104
+#define DRV_CTL_COMPLETION_CMD		0x105
+
+struct cnic_ctl_completion {
+	u32	cid;
+};
+
+struct drv_ctl_completion {
+	u32	comp_count;
+};
+
+struct cnic_ctl_info {
+	int	cmd;
+	union {
+		struct cnic_ctl_completion comp;
+		char bytes[MAX_CNIC_CTL_DATA];
+	} data;
+};
+
+struct drv_ctl_io {
+	u32		cid_addr;
+	u32		offset;
+	u32		data;
+	dma_addr_t	dma_addr;
+};
+
+struct drv_ctl_info {
+	int	cmd;
+	union {
+		struct drv_ctl_completion comp;
+		struct drv_ctl_io io;
+		char bytes[MAX_DRV_CTL_DATA];
+	} data;
+};
+
+struct cnic_ops {
+	struct module	*cnic_owner;
+	/* Calls to these functions are protected by RCU.  When
+	 * unregistering, we wait for any calls to complete before
+	 * continuing.
+	 */
+	int		(*cnic_handler)(void *, void *);
+	int		(*cnic_ctl)(void *, struct cnic_ctl_info *);
+};
+
+#define MAX_CNIC_VEC	8
+
+struct cnic_irq {
+	unsigned int	vector;
+	void		*status_blk;
+	u32		status_blk_num;
+	u32		irq_flags;
+#define CNIC_IRQ_FL_MSIX		0x00000001
+};
+
+struct cnic_eth_dev {
+	struct module	*drv_owner;
+	u32		drv_state;
+#define CNIC_DRV_STATE_REGD		0x00000001
+#define CNIC_DRV_STATE_USING_MSIX	0x00000002
+	u32		chip_id;
+	u32		max_kwqe_pending;
+	struct pci_dev	*pdev;
+	void __iomem	*io_base;
+
+	u32		ctx_tbl_offset;
+	u32		ctx_tbl_len;
+	int		ctx_blk_size;
+	u32		starting_cid;
+	u32		max_iscsi_conn;
+	u32		max_fcoe_conn;
+	u32		max_rdma_conn;
+	u32		reserved0[2];
+
+	int		num_irq;
+	struct cnic_irq	irq_arr[MAX_CNIC_VEC];
+	int		(*drv_register_cnic)(struct net_device *,
+					     struct cnic_ops *, void *);
+	int		(*drv_unregister_cnic)(struct net_device *);
+	int		(*drv_submit_kwqes_32)(struct net_device *,
+					       struct kwqe *[], u32);
+	int		(*drv_submit_kwqes_16)(struct net_device *,
+					       struct kwqe_16 *[], u32);
+	int		(*drv_ctl)(struct net_device *, struct drv_ctl_info *);
+	unsigned long	reserved1[2];
+};
+
+struct cnic_sockaddr {
+	union {
+		struct sockaddr_in	v4;
+		struct sockaddr_in6	v6;
+	} local;
+	union {
+		struct sockaddr_in	v4;
+		struct sockaddr_in6	v6;
+	} remote;
+};
+
+struct cnic_sock {
+	struct cnic_dev *dev;
+	void	*context;
+	u32	src_ip[4];
+	u32	dst_ip[4];
+	u16	src_port;
+	u16	dst_port;
+	u16	vlan_id;
+	unsigned char old_ha[6];
+	unsigned char ha[6];
+	u32	mtu;
+	u32	cid;
+	u32	l5_cid;
+	u32	pg_cid;
+	int	ulp_type;
+
+	u32	ka_timeout;
+	u32	ka_interval;
+	u8	ka_max_probe_count;
+	u8	tos;
+	u8	ttl;
+	u8	snd_seq_scale;
+	u32	rcv_buf;
+	u32	snd_buf;
+	u32	seed;
+
+	unsigned long	tcp_flags;
+#define SK_TCP_NO_DELAY_ACK	0x1
+#define SK_TCP_KEEP_ALIVE	0x2
+#define SK_TCP_NAGLE		0x4
+#define SK_TCP_TIMESTAMP	0x8
+#define SK_TCP_SACK		0x10
+#define SK_TCP_SEG_SCALING	0x20
+	unsigned long	flags;
+#define SK_F_INUSE		0
+#define SK_F_OFFLD_COMPLETE	1
+#define SK_F_OFFLD_SCHED	2
+#define SK_F_PG_OFFLD_COMPLETE	3
+#define SK_F_CONNECT_START	4
+#define SK_F_IPV6		5
+#define SK_F_CLOSING		7
+
+	atomic_t ref_count;
+	u32 state;
+	struct kwqe kwqe1;
+	struct kwqe kwqe2;
+	struct kwqe kwqe3;
+};
+
+struct cnic_dev {
+	struct net_device	*netdev;
+	struct pci_dev		*pcidev;
+	void __iomem		*regview;
+	struct list_head	list;
+
+	int (*register_device)(struct cnic_dev *dev, int ulp_type,
+			       void *ulp_ctx);
+	int (*unregister_device)(struct cnic_dev *dev, int ulp_type);
+	int (*submit_kwqes)(struct cnic_dev *dev, struct kwqe *wqes[],
+				u32 num_wqes);
+	int (*submit_kwqes_16)(struct cnic_dev *dev, struct kwqe_16 *wqes[],
+				u32 num_wqes);
+
+	int (*cm_create)(struct cnic_dev *, int, u32, u32, struct cnic_sock **,
+			 void *);
+	int (*cm_destroy)(struct cnic_sock *);
+	int (*cm_connect)(struct cnic_sock *, struct cnic_sockaddr *);
+	int (*cm_abort)(struct cnic_sock *);
+	int (*cm_close)(struct cnic_sock *);
+	struct cnic_dev *(*cm_select_dev)(struct sockaddr_in *, int ulp_type);
+	int (*iscsi_nl_msg_recv)(struct cnic_dev *dev, u32 msg_type,
+				 char *data, u16 data_size);
+	unsigned long	flags;
+#define CNIC_F_CNIC_UP		1
+#define CNIC_F_BNX2_CLASS	3
+#define CNIC_F_BNX2X_CLASS	4
+	atomic_t	ref_count;
+	u8		mac_addr[6];
+
+	int		max_iscsi_conn;
+	int		max_fcoe_conn;
+	int		max_rdma_conn;
+
+	void		*cnic_priv;
+};
+
+#define CNIC_WR(dev, off, val)		writel(val, dev->regview + off)
+#define CNIC_WR16(dev, off, val)	writew(val, dev->regview + off)
+#define CNIC_WR8(dev, off, val)		writeb(val, dev->regview + off)
+#define CNIC_RD(dev, off)		readl(dev->regview + off)
+#define CNIC_RD16(dev, off)		readw(dev->regview + off)
+
+struct cnic_ulp_ops {
+	/* Calls to these functions are protected by RCU.  When
+	 * unregistering, we wait for any calls to complete before
+	 * continuing.
+	 */
+
+	void (*cnic_init)(struct cnic_dev *dev);
+	void (*cnic_exit)(struct cnic_dev *dev);
+	void (*cnic_start)(void *ulp_ctx);
+	void (*cnic_stop)(void *ulp_ctx);
+	void (*indicate_kcqes)(void *ulp_ctx, struct kcqe *cqes[],
+				u32 num_cqes);
+	void (*indicate_netevent)(void *ulp_ctx, unsigned long event);
+	void (*cm_connect_complete)(struct cnic_sock *);
+	void (*cm_close_complete)(struct cnic_sock *);
+	void (*cm_abort_complete)(struct cnic_sock *);
+	void (*cm_remote_close)(struct cnic_sock *);
+	void (*cm_remote_abort)(struct cnic_sock *);
+	void (*iscsi_nl_send_msg)(struct cnic_dev *dev, u32 msg_type,
+				  char *data, u16 data_size);
+	struct module *owner;
+};
+
+extern int cnic_register_driver(int ulp_type, struct cnic_ulp_ops *ulp_ops);
+
+extern int cnic_unregister_driver(int ulp_type);
+
+#endif
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 8247a94..3b19e0c 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -66,7 +66,6 @@
 #define RX_DMA_BURST	6	/* Maximum PCI burst, '6' is 1024 */
 #define TX_DMA_BURST	6	/* Maximum PCI burst, '6' is 1024 */
 #define EarlyTxThld	0x3F	/* 0x3F means NO early transmit */
-#define RxPacketMaxSize	0x3FE8	/* 16K - 1 - ETH_HLEN - VLAN - CRC... */
 #define SafeMtu		0x1c20	/* ... actually life sucks beyond ~7k */
 #define InterFrameGap	0x03	/* 3 means InterFrameGap = the shortest one */
 
@@ -2357,10 +2356,10 @@
 	return cmd;
 }
 
-static void rtl_set_rx_max_size(void __iomem *ioaddr)
+static void rtl_set_rx_max_size(void __iomem *ioaddr, unsigned int rx_buf_sz)
 {
 	/* Low hurts. Let's disable the filtering. */
-	RTL_W16(RxMaxSize, 16383);
+	RTL_W16(RxMaxSize, rx_buf_sz);
 }
 
 static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
@@ -2407,7 +2406,7 @@
 
 	RTL_W8(EarlyTxThres, EarlyTxThld);
 
-	rtl_set_rx_max_size(ioaddr);
+	rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz);
 
 	if ((tp->mac_version == RTL_GIGA_MAC_VER_01) ||
 	    (tp->mac_version == RTL_GIGA_MAC_VER_02) ||
@@ -2668,7 +2667,7 @@
 
 	RTL_W8(EarlyTxThres, EarlyTxThld);
 
-	rtl_set_rx_max_size(ioaddr);
+	rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz);
 
 	tp->cp_cmd |= RTL_R16(CPlusCmd) | PktCntrDisable | INTT_1;
 
@@ -2846,7 +2845,7 @@
 
 	RTL_W8(EarlyTxThres, EarlyTxThld);
 
-	rtl_set_rx_max_size(ioaddr);
+	rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz);
 
 	tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW;
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4d1d479..7fa620dd 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -845,6 +845,10 @@
 	int err;
 	struct net_device *dev;
 	struct virtnet_info *vi;
+	struct virtqueue *vqs[3];
+	vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
+	const char *names[] = { "input", "output", "control" };
+	int nvqs;
 
 	/* Allocate ourselves a network device with room for our info */
 	dev = alloc_etherdev(sizeof(struct virtnet_info));
@@ -905,25 +909,19 @@
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 		vi->mergeable_rx_bufs = true;
 
-	/* We expect two virtqueues, receive then send. */
-	vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
-	if (IS_ERR(vi->rvq)) {
-		err = PTR_ERR(vi->rvq);
-		goto free;
-	}
+	/* We expect two virtqueues, receive then send,
+	 * and optionally control. */
+	nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
 
-	vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done);
-	if (IS_ERR(vi->svq)) {
-		err = PTR_ERR(vi->svq);
-		goto free_recv;
-	}
+	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
+	if (err)
+		goto free;
+
+	vi->rvq = vqs[0];
+	vi->svq = vqs[1];
 
 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
-		vi->cvq = vdev->config->find_vq(vdev, 2, NULL);
-		if (IS_ERR(vi->cvq)) {
-			err = PTR_ERR(vi->svq);
-			goto free_send;
-		}
+		vi->cvq = vqs[2];
 
 		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
 			dev->features |= NETIF_F_HW_VLAN_FILTER;
@@ -941,7 +939,7 @@
 	err = register_netdev(dev);
 	if (err) {
 		pr_debug("virtio_net: registering device failed\n");
-		goto free_ctrl;
+		goto free_vqs;
 	}
 
 	/* Last of all, set up some receive buffers. */
@@ -962,13 +960,8 @@
 
 unregister:
 	unregister_netdev(dev);
-free_ctrl:
-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ))
-		vdev->config->del_vq(vi->cvq);
-free_send:
-	vdev->config->del_vq(vi->svq);
-free_recv:
-	vdev->config->del_vq(vi->rvq);
+free_vqs:
+	vdev->config->del_vqs(vdev);
 free:
 	free_netdev(dev);
 	return err;
@@ -994,12 +987,10 @@
 
 	BUG_ON(vi->num != 0);
 
-	vdev->config->del_vq(vi->svq);
-	vdev->config->del_vq(vi->rvq);
-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ))
-		vdev->config->del_vq(vi->cvq);
 	unregister_netdev(vi->dev);
 
+	vdev->config->del_vqs(vi->vdev);
+
 	while (vi->pages)
 		__free_pages(get_a_page(vi, GFP_KERNEL), 0);
 
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index f821dbc..27f3b81 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -1,21 +1,21 @@
 config OF_DEVICE
 	def_bool y
-	depends on OF && (SPARC || PPC_OF)
+	depends on OF && (SPARC || PPC_OF || MICROBLAZE)
 
 config OF_GPIO
 	def_bool y
-	depends on OF && PPC_OF && GPIOLIB
+	depends on OF && (PPC_OF || MICROBLAZE) && GPIOLIB
 	help
 	  OpenFirmware GPIO accessors
 
 config OF_I2C
 	def_tristate I2C
-	depends on PPC_OF && I2C
+	depends on (PPC_OF || MICROBLAZE) && I2C
 	help
 	  OpenFirmware I2C accessors
 
 config OF_SPI
 	def_tristate SPI
-	depends on OF && PPC_OF && SPI
+	depends on OF && (PPC_OF || MICROBLAZE) && SPI
 	help
 	  OpenFirmware SPI accessors
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 73348c4..4a9cc92 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -702,7 +702,7 @@
 }
 
 #ifdef CONFIG_SMP
-static void iosapic_set_affinity_irq(unsigned int irq,
+static int iosapic_set_affinity_irq(unsigned int irq,
 				     const struct cpumask *dest)
 {
 	struct vector_info *vi = iosapic_get_vector(irq);
@@ -712,7 +712,7 @@
 
 	dest_cpu = cpu_check_affinity(irq, dest);
 	if (dest_cpu < 0)
-		return;
+		return -1;
 
 	cpumask_copy(irq_desc[irq].affinity, cpumask_of(dest_cpu));
 	vi->txn_addr = txn_affinity_addr(irq, dest_cpu);
@@ -724,6 +724,8 @@
 	iosapic_set_irt_data(vi, &dummy_d0, &d1);
 	iosapic_wr_irt_entry(vi, d0, d1);
 	spin_unlock_irqrestore(&iosapic_lock, flags);
+
+	return 0;
 }
 #endif
 
diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index 4e63cc9..151bf5b 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -1,5 +1,5 @@
 /* Low-level parallel-port routines for 8255-based PC-style hardware.
- * 
+ *
  * Authors: Phil Blundell <philb@gnu.org>
  *          Tim Waugh <tim@cyberelk.demon.co.uk>
  *	    Jose Renau <renau@acm.org>
@@ -11,7 +11,7 @@
  * Cleaned up include files - Russell King <linux@arm.uk.linux.org>
  * DMA support - Bert De Jonghe <bert@sophis.be>
  * Many ECP bugs fixed.  Fred Barnes & Jamie Lokier, 1999
- * More PCI support now conditional on CONFIG_PCI, 03/2001, Paul G. 
+ * More PCI support now conditional on CONFIG_PCI, 03/2001, Paul G.
  * Various hacks, Fred Barnes, 04/2001
  * Updated probing logic - Adam Belay <ambx1@neo.rr.com>
  */
@@ -56,10 +56,10 @@
 #include <linux/pnp.h>
 #include <linux/platform_device.h>
 #include <linux/sysctl.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
 
-#include <asm/io.h>
 #include <asm/dma.h>
-#include <asm/uaccess.h>
 
 #include <linux/parport.h>
 #include <linux/parport_pc.h>
@@ -82,7 +82,7 @@
 #define ECR_TST 06
 #define ECR_CNF 07
 #define ECR_MODE_MASK 0xe0
-#define ECR_WRITE(p,v) frob_econtrol((p),0xff,(v))
+#define ECR_WRITE(p, v) frob_econtrol((p), 0xff, (v))
 
 #undef DEBUG
 
@@ -109,27 +109,27 @@
 static int pnp_registered_parport;
 
 /* frob_control, but for ECR */
-static void frob_econtrol (struct parport *pb, unsigned char m,
+static void frob_econtrol(struct parport *pb, unsigned char m,
 			   unsigned char v)
 {
 	unsigned char ectr = 0;
 
 	if (m != 0xff)
-		ectr = inb (ECONTROL (pb));
+		ectr = inb(ECONTROL(pb));
 
-	DPRINTK (KERN_DEBUG "frob_econtrol(%02x,%02x): %02x -> %02x\n",
+	DPRINTK(KERN_DEBUG "frob_econtrol(%02x,%02x): %02x -> %02x\n",
 		m, v, ectr, (ectr & ~m) ^ v);
 
-	outb ((ectr & ~m) ^ v, ECONTROL (pb));
+	outb((ectr & ~m) ^ v, ECONTROL(pb));
 }
 
-static __inline__ void frob_set_mode (struct parport *p, int mode)
+static inline void frob_set_mode(struct parport *p, int mode)
 {
-	frob_econtrol (p, ECR_MODE_MASK, mode << 5);
+	frob_econtrol(p, ECR_MODE_MASK, mode << 5);
 }
 
 #ifdef CONFIG_PARPORT_PC_FIFO
-/* Safely change the mode bits in the ECR 
+/* Safely change the mode bits in the ECR
    Returns:
 	    0    : Success
 	   -EBUSY: Could not drain FIFO in some finite amount of time,
@@ -141,17 +141,18 @@
 	unsigned char oecr;
 	int mode;
 
-	DPRINTK(KERN_INFO "parport change_mode ECP-ISA to mode 0x%02x\n",m);
+	DPRINTK(KERN_INFO "parport change_mode ECP-ISA to mode 0x%02x\n", m);
 
 	if (!priv->ecr) {
-		printk (KERN_DEBUG "change_mode: but there's no ECR!\n");
+		printk(KERN_DEBUG "change_mode: but there's no ECR!\n");
 		return 0;
 	}
 
 	/* Bits <7:5> contain the mode. */
-	oecr = inb (ECONTROL (p));
+	oecr = inb(ECONTROL(p));
 	mode = (oecr >> 5) & 0x7;
-	if (mode == m) return 0;
+	if (mode == m)
+		return 0;
 
 	if (mode >= 2 && !(priv->ctr & 0x20)) {
 		/* This mode resets the FIFO, so we may
@@ -163,19 +164,21 @@
 		case ECR_ECP: /* ECP Parallel Port mode */
 			/* Busy wait for 200us */
 			for (counter = 0; counter < 40; counter++) {
-				if (inb (ECONTROL (p)) & 0x01)
+				if (inb(ECONTROL(p)) & 0x01)
 					break;
-				if (signal_pending (current)) break;
-				udelay (5);
+				if (signal_pending(current))
+					break;
+				udelay(5);
 			}
 
 			/* Poll slowly. */
-			while (!(inb (ECONTROL (p)) & 0x01)) {
-				if (time_after_eq (jiffies, expire))
+			while (!(inb(ECONTROL(p)) & 0x01)) {
+				if (time_after_eq(jiffies, expire))
 					/* The FIFO is stuck. */
 					return -EBUSY;
-				schedule_timeout_interruptible(msecs_to_jiffies(10));
-				if (signal_pending (current))
+				schedule_timeout_interruptible(
+							msecs_to_jiffies(10));
+				if (signal_pending(current))
 					break;
 			}
 		}
@@ -185,20 +188,20 @@
 		/* We have to go through mode 001 */
 		oecr &= ~(7 << 5);
 		oecr |= ECR_PS2 << 5;
-		ECR_WRITE (p, oecr);
+		ECR_WRITE(p, oecr);
 	}
 
 	/* Set the mode. */
 	oecr &= ~(7 << 5);
 	oecr |= m << 5;
-	ECR_WRITE (p, oecr);
+	ECR_WRITE(p, oecr);
 	return 0;
 }
 
 #ifdef CONFIG_PARPORT_1284
 /* Find FIFO lossage; FIFO is reset */
 #if 0
-static int get_fifo_residue (struct parport *p)
+static int get_fifo_residue(struct parport *p)
 {
 	int residue;
 	int cnfga;
@@ -206,26 +209,26 @@
 
 	/* Adjust for the contents of the FIFO. */
 	for (residue = priv->fifo_depth; ; residue--) {
-		if (inb (ECONTROL (p)) & 0x2)
+		if (inb(ECONTROL(p)) & 0x2)
 				/* Full up. */
 			break;
 
-		outb (0, FIFO (p));
+		outb(0, FIFO(p));
 	}
 
-	printk (KERN_DEBUG "%s: %d PWords were left in FIFO\n", p->name,
+	printk(KERN_DEBUG "%s: %d PWords were left in FIFO\n", p->name,
 		residue);
 
 	/* Reset the FIFO. */
-	frob_set_mode (p, ECR_PS2);
+	frob_set_mode(p, ECR_PS2);
 
 	/* Now change to config mode and clean up. FIXME */
-	frob_set_mode (p, ECR_CNF);
-	cnfga = inb (CONFIGA (p));
-	printk (KERN_DEBUG "%s: cnfgA contains 0x%02x\n", p->name, cnfga);
+	frob_set_mode(p, ECR_CNF);
+	cnfga = inb(CONFIGA(p));
+	printk(KERN_DEBUG "%s: cnfgA contains 0x%02x\n", p->name, cnfga);
 
 	if (!(cnfga & (1<<2))) {
-		printk (KERN_DEBUG "%s: Accounting for extra byte\n", p->name);
+		printk(KERN_DEBUG "%s: Accounting for extra byte\n", p->name);
 		residue++;
 	}
 
@@ -233,9 +236,11 @@
 	 * PWord != 1 byte. */
 
 	/* Back to PS2 mode. */
-	frob_set_mode (p, ECR_PS2);
+	frob_set_mode(p, ECR_PS2);
 
-	DPRINTK (KERN_DEBUG "*** get_fifo_residue: done residue collecting (ecr = 0x%2.2x)\n", inb (ECONTROL (p)));
+	DPRINTK(KERN_DEBUG
+	     "*** get_fifo_residue: done residue collecting (ecr = 0x%2.2x)\n",
+							inb(ECONTROL(p)));
 	return residue;
 }
 #endif  /*  0 */
@@ -257,8 +262,8 @@
 	/* To clear timeout some chips require double read */
 	parport_pc_read_status(pb);
 	r = parport_pc_read_status(pb);
-	outb (r | 0x01, STATUS (pb)); /* Some reset by writing 1 */
-	outb (r & 0xfe, STATUS (pb)); /* Others by writing 0 */
+	outb(r | 0x01, STATUS(pb)); /* Some reset by writing 1 */
+	outb(r & 0xfe, STATUS(pb)); /* Others by writing 0 */
 	r = parport_pc_read_status(pb);
 
 	return !(r & 0x01);
@@ -272,7 +277,8 @@
  * of these are in parport_pc.h.
  */
 
-static void parport_pc_init_state(struct pardevice *dev, struct parport_state *s)
+static void parport_pc_init_state(struct pardevice *dev,
+						struct parport_state *s)
 {
 	s->u.pc.ctr = 0xc;
 	if (dev->irq_func &&
@@ -289,22 +295,23 @@
 	const struct parport_pc_private *priv = p->physport->private_data;
 	s->u.pc.ctr = priv->ctr;
 	if (priv->ecr)
-		s->u.pc.ecr = inb (ECONTROL (p));
+		s->u.pc.ecr = inb(ECONTROL(p));
 }
 
-static void parport_pc_restore_state(struct parport *p, struct parport_state *s)
+static void parport_pc_restore_state(struct parport *p,
+						struct parport_state *s)
 {
 	struct parport_pc_private *priv = p->physport->private_data;
 	register unsigned char c = s->u.pc.ctr & priv->ctr_writable;
-	outb (c, CONTROL (p));
+	outb(c, CONTROL(p));
 	priv->ctr = c;
 	if (priv->ecr)
-		ECR_WRITE (p, s->u.pc.ecr);
+		ECR_WRITE(p, s->u.pc.ecr);
 }
 
 #ifdef CONFIG_PARPORT_1284
-static size_t parport_pc_epp_read_data (struct parport *port, void *buf,
-					size_t length, int flags)
+static size_t parport_pc_epp_read_data(struct parport *port, void *buf,
+				       size_t length, int flags)
 {
 	size_t got = 0;
 
@@ -316,54 +323,52 @@
 		 *  nFault is 0 if there is at least 1 byte in the Warp's FIFO
 		 *  pError is 1 if there are 16 bytes in the Warp's FIFO
 		 */
-		status = inb (STATUS (port));
+		status = inb(STATUS(port));
 
-		while (!(status & 0x08) && (got < length)) {
-			if ((left >= 16) && (status & 0x20) && !(status & 0x08)) {
+		while (!(status & 0x08) && got < length) {
+			if (left >= 16 && (status & 0x20) && !(status & 0x08)) {
 				/* can grab 16 bytes from warp fifo */
-				if (!((long)buf & 0x03)) {
-					insl (EPPDATA (port), buf, 4);
-				} else {
-					insb (EPPDATA (port), buf, 16);
-				}
+				if (!((long)buf & 0x03))
+					insl(EPPDATA(port), buf, 4);
+				else
+					insb(EPPDATA(port), buf, 16);
 				buf += 16;
 				got += 16;
 				left -= 16;
 			} else {
 				/* grab single byte from the warp fifo */
-				*((char *)buf) = inb (EPPDATA (port));
+				*((char *)buf) = inb(EPPDATA(port));
 				buf++;
 				got++;
 				left--;
 			}
-			status = inb (STATUS (port));
+			status = inb(STATUS(port));
 			if (status & 0x01) {
 				/* EPP timeout should never occur... */
-				printk (KERN_DEBUG "%s: EPP timeout occurred while talking to "
-					"w91284pic (should not have done)\n", port->name);
-				clear_epp_timeout (port);
+				printk(KERN_DEBUG
+"%s: EPP timeout occurred while talking to w91284pic (should not have done)\n", port->name);
+				clear_epp_timeout(port);
 			}
 		}
 		return got;
 	}
 	if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
-		if (!(((long)buf | length) & 0x03)) {
-			insl (EPPDATA (port), buf, (length >> 2));
-		} else {
-			insb (EPPDATA (port), buf, length);
-		}
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		if (!(((long)buf | length) & 0x03))
+			insl(EPPDATA(port), buf, (length >> 2));
+		else
+			insb(EPPDATA(port), buf, length);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			return -EIO;
 		}
 		return length;
 	}
 	for (; got < length; got++) {
-		*((char*)buf) = inb (EPPDATA(port));
+		*((char *)buf) = inb(EPPDATA(port));
 		buf++;
-		if (inb (STATUS (port)) & 0x01) {
+		if (inb(STATUS(port)) & 0x01) {
 			/* EPP timeout */
-			clear_epp_timeout (port);
+			clear_epp_timeout(port);
 			break;
 		}
 	}
@@ -371,28 +376,27 @@
 	return got;
 }
 
-static size_t parport_pc_epp_write_data (struct parport *port, const void *buf,
-					 size_t length, int flags)
+static size_t parport_pc_epp_write_data(struct parport *port, const void *buf,
+					size_t length, int flags)
 {
 	size_t written = 0;
 
 	if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
-		if (!(((long)buf | length) & 0x03)) {
-			outsl (EPPDATA (port), buf, (length >> 2));
-		} else {
-			outsb (EPPDATA (port), buf, length);
-		}
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		if (!(((long)buf | length) & 0x03))
+			outsl(EPPDATA(port), buf, (length >> 2));
+		else
+			outsb(EPPDATA(port), buf, length);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			return -EIO;
 		}
 		return length;
 	}
 	for (; written < length; written++) {
-		outb (*((char*)buf), EPPDATA(port));
+		outb(*((char *)buf), EPPDATA(port));
 		buf++;
-		if (inb (STATUS(port)) & 0x01) {
-			clear_epp_timeout (port);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			break;
 		}
 	}
@@ -400,24 +404,24 @@
 	return written;
 }
 
-static size_t parport_pc_epp_read_addr (struct parport *port, void *buf,
+static size_t parport_pc_epp_read_addr(struct parport *port, void *buf,
 					size_t length, int flags)
 {
 	size_t got = 0;
 
 	if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
-		insb (EPPADDR (port), buf, length);
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		insb(EPPADDR(port), buf, length);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			return -EIO;
 		}
 		return length;
 	}
 	for (; got < length; got++) {
-		*((char*)buf) = inb (EPPADDR (port));
+		*((char *)buf) = inb(EPPADDR(port));
 		buf++;
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			break;
 		}
 	}
@@ -425,25 +429,25 @@
 	return got;
 }
 
-static size_t parport_pc_epp_write_addr (struct parport *port,
+static size_t parport_pc_epp_write_addr(struct parport *port,
 					 const void *buf, size_t length,
 					 int flags)
 {
 	size_t written = 0;
 
 	if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
-		outsb (EPPADDR (port), buf, length);
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		outsb(EPPADDR(port), buf, length);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			return -EIO;
 		}
 		return length;
 	}
 	for (; written < length; written++) {
-		outb (*((char*)buf), EPPADDR (port));
+		outb(*((char *)buf), EPPADDR(port));
 		buf++;
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			break;
 		}
 	}
@@ -451,74 +455,74 @@
 	return written;
 }
 
-static size_t parport_pc_ecpepp_read_data (struct parport *port, void *buf,
-					   size_t length, int flags)
+static size_t parport_pc_ecpepp_read_data(struct parport *port, void *buf,
+					  size_t length, int flags)
 {
 	size_t got;
 
-	frob_set_mode (port, ECR_EPP);
-	parport_pc_data_reverse (port);
-	parport_pc_write_control (port, 0x4);
-	got = parport_pc_epp_read_data (port, buf, length, flags);
-	frob_set_mode (port, ECR_PS2);
+	frob_set_mode(port, ECR_EPP);
+	parport_pc_data_reverse(port);
+	parport_pc_write_control(port, 0x4);
+	got = parport_pc_epp_read_data(port, buf, length, flags);
+	frob_set_mode(port, ECR_PS2);
 
 	return got;
 }
 
-static size_t parport_pc_ecpepp_write_data (struct parport *port,
-					    const void *buf, size_t length,
-					    int flags)
+static size_t parport_pc_ecpepp_write_data(struct parport *port,
+					   const void *buf, size_t length,
+					   int flags)
 {
 	size_t written;
 
-	frob_set_mode (port, ECR_EPP);
-	parport_pc_write_control (port, 0x4);
-	parport_pc_data_forward (port);
-	written = parport_pc_epp_write_data (port, buf, length, flags);
-	frob_set_mode (port, ECR_PS2);
+	frob_set_mode(port, ECR_EPP);
+	parport_pc_write_control(port, 0x4);
+	parport_pc_data_forward(port);
+	written = parport_pc_epp_write_data(port, buf, length, flags);
+	frob_set_mode(port, ECR_PS2);
 
 	return written;
 }
 
-static size_t parport_pc_ecpepp_read_addr (struct parport *port, void *buf,
-					   size_t length, int flags)
+static size_t parport_pc_ecpepp_read_addr(struct parport *port, void *buf,
+					  size_t length, int flags)
 {
 	size_t got;
 
-	frob_set_mode (port, ECR_EPP);
-	parport_pc_data_reverse (port);
-	parport_pc_write_control (port, 0x4);
-	got = parport_pc_epp_read_addr (port, buf, length, flags);
-	frob_set_mode (port, ECR_PS2);
+	frob_set_mode(port, ECR_EPP);
+	parport_pc_data_reverse(port);
+	parport_pc_write_control(port, 0x4);
+	got = parport_pc_epp_read_addr(port, buf, length, flags);
+	frob_set_mode(port, ECR_PS2);
 
 	return got;
 }
 
-static size_t parport_pc_ecpepp_write_addr (struct parport *port,
+static size_t parport_pc_ecpepp_write_addr(struct parport *port,
 					    const void *buf, size_t length,
 					    int flags)
 {
 	size_t written;
 
-	frob_set_mode (port, ECR_EPP);
-	parport_pc_write_control (port, 0x4);
-	parport_pc_data_forward (port);
-	written = parport_pc_epp_write_addr (port, buf, length, flags);
-	frob_set_mode (port, ECR_PS2);
+	frob_set_mode(port, ECR_EPP);
+	parport_pc_write_control(port, 0x4);
+	parport_pc_data_forward(port);
+	written = parport_pc_epp_write_addr(port, buf, length, flags);
+	frob_set_mode(port, ECR_PS2);
 
 	return written;
 }
 #endif /* IEEE 1284 support */
 
 #ifdef CONFIG_PARPORT_PC_FIFO
-static size_t parport_pc_fifo_write_block_pio (struct parport *port,
+static size_t parport_pc_fifo_write_block_pio(struct parport *port,
 					       const void *buf, size_t length)
 {
 	int ret = 0;
 	const unsigned char *bufp = buf;
 	size_t left = length;
 	unsigned long expire = jiffies + port->physport->cad->timeout;
-	const int fifo = FIFO (port);
+	const int fifo = FIFO(port);
 	int poll_for = 8; /* 80 usecs */
 	const struct parport_pc_private *priv = port->physport->private_data;
 	const int fifo_depth = priv->fifo_depth;
@@ -526,25 +530,25 @@
 	port = port->physport;
 
 	/* We don't want to be interrupted every character. */
-	parport_pc_disable_irq (port);
+	parport_pc_disable_irq(port);
 	/* set nErrIntrEn and serviceIntr */
-	frob_econtrol (port, (1<<4) | (1<<2), (1<<4) | (1<<2));
+	frob_econtrol(port, (1<<4) | (1<<2), (1<<4) | (1<<2));
 
 	/* Forward mode. */
-	parport_pc_data_forward (port); /* Must be in PS2 mode */
+	parport_pc_data_forward(port); /* Must be in PS2 mode */
 
 	while (left) {
 		unsigned char byte;
-		unsigned char ecrval = inb (ECONTROL (port));
+		unsigned char ecrval = inb(ECONTROL(port));
 		int i = 0;
 
-		if (need_resched() && time_before (jiffies, expire))
+		if (need_resched() && time_before(jiffies, expire))
 			/* Can't yield the port. */
-			schedule ();
+			schedule();
 
 		/* Anyone else waiting for the port? */
 		if (port->waithead) {
-			printk (KERN_DEBUG "Somebody wants the port\n");
+			printk(KERN_DEBUG "Somebody wants the port\n");
 			break;
 		}
 
@@ -552,21 +556,22 @@
 			/* FIFO is full. Wait for interrupt. */
 
 			/* Clear serviceIntr */
-			ECR_WRITE (port, ecrval & ~(1<<2));
-		false_alarm:
-			ret = parport_wait_event (port, HZ);
-			if (ret < 0) break;
+			ECR_WRITE(port, ecrval & ~(1<<2));
+false_alarm:
+			ret = parport_wait_event(port, HZ);
+			if (ret < 0)
+				break;
 			ret = 0;
-			if (!time_before (jiffies, expire)) {
+			if (!time_before(jiffies, expire)) {
 				/* Timed out. */
-				printk (KERN_DEBUG "FIFO write timed out\n");
+				printk(KERN_DEBUG "FIFO write timed out\n");
 				break;
 			}
-			ecrval = inb (ECONTROL (port));
+			ecrval = inb(ECONTROL(port));
 			if (!(ecrval & (1<<2))) {
 				if (need_resched() &&
-				    time_before (jiffies, expire))
-					schedule ();
+				    time_before(jiffies, expire))
+					schedule();
 
 				goto false_alarm;
 			}
@@ -577,38 +582,38 @@
 		/* Can't fail now. */
 		expire = jiffies + port->cad->timeout;
 
-	poll:
-		if (signal_pending (current))
+poll:
+		if (signal_pending(current))
 			break;
 
 		if (ecrval & 0x01) {
 			/* FIFO is empty. Blast it full. */
 			const int n = left < fifo_depth ? left : fifo_depth;
-			outsb (fifo, bufp, n);
+			outsb(fifo, bufp, n);
 			bufp += n;
 			left -= n;
 
 			/* Adjust the poll time. */
-			if (i < (poll_for - 2)) poll_for--;
+			if (i < (poll_for - 2))
+				poll_for--;
 			continue;
 		} else if (i++ < poll_for) {
-			udelay (10);
-			ecrval = inb (ECONTROL (port));
+			udelay(10);
+			ecrval = inb(ECONTROL(port));
 			goto poll;
 		}
 
-		/* Half-full (call me an optimist) */
+		/* Half-full(call me an optimist) */
 		byte = *bufp++;
-		outb (byte, fifo);
+		outb(byte, fifo);
 		left--;
-        }
-
-dump_parport_state ("leave fifo_write_block_pio", port);
+	}
+	dump_parport_state("leave fifo_write_block_pio", port);
 	return length - left;
 }
 
 #ifdef HAS_DMA
-static size_t parport_pc_fifo_write_block_dma (struct parport *port,
+static size_t parport_pc_fifo_write_block_dma(struct parport *port,
 					       const void *buf, size_t length)
 {
 	int ret = 0;
@@ -621,7 +626,7 @@
 	unsigned long start = (unsigned long) buf;
 	unsigned long end = (unsigned long) buf + length - 1;
 
-dump_parport_state ("enter fifo_write_block_dma", port);
+	dump_parport_state("enter fifo_write_block_dma", port);
 	if (end < MAX_DMA_ADDRESS) {
 		/* If it would cross a 64k boundary, cap it at the end. */
 		if ((start ^ end) & ~0xffffUL)
@@ -629,8 +634,9 @@
 
 		dma_addr = dma_handle = dma_map_single(dev, (void *)buf, length,
 						       DMA_TO_DEVICE);
-        } else {
-		/* above 16 MB we use a bounce buffer as ISA-DMA is not possible */
+	} else {
+		/* above 16 MB we use a bounce buffer as ISA-DMA
+		   is not possible */
 		maxlen   = PAGE_SIZE;          /* sizeof(priv->dma_buf) */
 		dma_addr = priv->dma_handle;
 		dma_handle = 0;
@@ -639,12 +645,12 @@
 	port = port->physport;
 
 	/* We don't want to be interrupted every character. */
-	parport_pc_disable_irq (port);
+	parport_pc_disable_irq(port);
 	/* set nErrIntrEn and serviceIntr */
-	frob_econtrol (port, (1<<4) | (1<<2), (1<<4) | (1<<2));
+	frob_econtrol(port, (1<<4) | (1<<2), (1<<4) | (1<<2));
 
 	/* Forward mode. */
-	parport_pc_data_forward (port); /* Must be in PS2 mode */
+	parport_pc_data_forward(port); /* Must be in PS2 mode */
 
 	while (left) {
 		unsigned long expire = jiffies + port->physport->cad->timeout;
@@ -665,10 +671,10 @@
 		set_dma_count(port->dma, count);
 
 		/* Set DMA mode */
-		frob_econtrol (port, 1<<3, 1<<3);
+		frob_econtrol(port, 1<<3, 1<<3);
 
 		/* Clear serviceIntr */
-		frob_econtrol (port, 1<<2, 0);
+		frob_econtrol(port, 1<<2, 0);
 
 		enable_dma(port->dma);
 		release_dma_lock(dmaflag);
@@ -676,20 +682,22 @@
 		/* assume DMA will be successful */
 		left -= count;
 		buf  += count;
-		if (dma_handle) dma_addr += count;
+		if (dma_handle)
+			dma_addr += count;
 
 		/* Wait for interrupt. */
-	false_alarm:
-		ret = parport_wait_event (port, HZ);
-		if (ret < 0) break;
+false_alarm:
+		ret = parport_wait_event(port, HZ);
+		if (ret < 0)
+			break;
 		ret = 0;
-		if (!time_before (jiffies, expire)) {
+		if (!time_before(jiffies, expire)) {
 			/* Timed out. */
-			printk (KERN_DEBUG "DMA write timed out\n");
+			printk(KERN_DEBUG "DMA write timed out\n");
 			break;
 		}
 		/* Is serviceIntr set? */
-		if (!(inb (ECONTROL (port)) & (1<<2))) {
+		if (!(inb(ECONTROL(port)) & (1<<2))) {
 			cond_resched();
 
 			goto false_alarm;
@@ -705,14 +713,15 @@
 
 		/* Anyone else waiting for the port? */
 		if (port->waithead) {
-			printk (KERN_DEBUG "Somebody wants the port\n");
+			printk(KERN_DEBUG "Somebody wants the port\n");
 			break;
 		}
 
 		/* update for possible DMA residue ! */
 		buf  -= count;
 		left += count;
-		if (dma_handle) dma_addr -= count;
+		if (dma_handle)
+			dma_addr -= count;
 	}
 
 	/* Maybe got here through break, so adjust for DMA residue! */
@@ -723,12 +732,12 @@
 	release_dma_lock(dmaflag);
 
 	/* Turn off DMA mode */
-	frob_econtrol (port, 1<<3, 0);
+	frob_econtrol(port, 1<<3, 0);
 
 	if (dma_handle)
 		dma_unmap_single(dev, dma_handle, length, DMA_TO_DEVICE);
 
-dump_parport_state ("leave fifo_write_block_dma", port);
+	dump_parport_state("leave fifo_write_block_dma", port);
 	return length - left;
 }
 #endif
@@ -738,13 +747,13 @@
 {
 #ifdef HAS_DMA
 	if (port->dma != PARPORT_DMA_NONE)
-		return parport_pc_fifo_write_block_dma (port, buf, length);
+		return parport_pc_fifo_write_block_dma(port, buf, length);
 #endif
-	return parport_pc_fifo_write_block_pio (port, buf, length);
+	return parport_pc_fifo_write_block_pio(port, buf, length);
 }
 
 /* Parallel Port FIFO mode (ECP chipsets) */
-static size_t parport_pc_compat_write_block_pio (struct parport *port,
+static size_t parport_pc_compat_write_block_pio(struct parport *port,
 						 const void *buf, size_t length,
 						 int flags)
 {
@@ -756,14 +765,16 @@
 	/* Special case: a timeout of zero means we cannot call schedule().
 	 * Also if O_NONBLOCK is set then use the default implementation. */
 	if (port->physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
-		return parport_ieee1284_write_compat (port, buf,
+		return parport_ieee1284_write_compat(port, buf,
 						      length, flags);
 
 	/* Set up parallel port FIFO mode.*/
-	parport_pc_data_forward (port); /* Must be in PS2 mode */
-	parport_pc_frob_control (port, PARPORT_CONTROL_STROBE, 0);
-	r = change_mode (port, ECR_PPF); /* Parallel port FIFO */
-	if (r)  printk (KERN_DEBUG "%s: Warning change_mode ECR_PPF failed\n", port->name);
+	parport_pc_data_forward(port); /* Must be in PS2 mode */
+	parport_pc_frob_control(port, PARPORT_CONTROL_STROBE, 0);
+	r = change_mode(port, ECR_PPF); /* Parallel port FIFO */
+	if (r)
+		printk(KERN_DEBUG "%s: Warning change_mode ECR_PPF failed\n",
+								port->name);
 
 	port->physport->ieee1284.phase = IEEE1284_PH_FWD_DATA;
 
@@ -775,40 +786,39 @@
 	 * the FIFO is empty, so allow 4 seconds for each position
 	 * in the fifo.
 	 */
-        expire = jiffies + (priv->fifo_depth * HZ * 4);
+	expire = jiffies + (priv->fifo_depth * HZ * 4);
 	do {
 		/* Wait for the FIFO to empty */
-		r = change_mode (port, ECR_PS2);
-		if (r != -EBUSY) {
+		r = change_mode(port, ECR_PS2);
+		if (r != -EBUSY)
 			break;
-		}
-	} while (time_before (jiffies, expire));
+	} while (time_before(jiffies, expire));
 	if (r == -EBUSY) {
 
-		printk (KERN_DEBUG "%s: FIFO is stuck\n", port->name);
+		printk(KERN_DEBUG "%s: FIFO is stuck\n", port->name);
 
 		/* Prevent further data transfer. */
-		frob_set_mode (port, ECR_TST);
+		frob_set_mode(port, ECR_TST);
 
 		/* Adjust for the contents of the FIFO. */
 		for (written -= priv->fifo_depth; ; written++) {
-			if (inb (ECONTROL (port)) & 0x2) {
+			if (inb(ECONTROL(port)) & 0x2) {
 				/* Full up. */
 				break;
 			}
-			outb (0, FIFO (port));
+			outb(0, FIFO(port));
 		}
 
 		/* Reset the FIFO and return to PS2 mode. */
-		frob_set_mode (port, ECR_PS2);
+		frob_set_mode(port, ECR_PS2);
 	}
 
-	r = parport_wait_peripheral (port,
+	r = parport_wait_peripheral(port,
 				     PARPORT_STATUS_BUSY,
 				     PARPORT_STATUS_BUSY);
 	if (r)
-		printk (KERN_DEBUG
-			"%s: BUSY timeout (%d) in compat_write_block_pio\n", 
+		printk(KERN_DEBUG
+			"%s: BUSY timeout (%d) in compat_write_block_pio\n",
 			port->name, r);
 
 	port->physport->ieee1284.phase = IEEE1284_PH_FWD_IDLE;
@@ -818,7 +828,7 @@
 
 /* ECP */
 #ifdef CONFIG_PARPORT_1284
-static size_t parport_pc_ecp_write_block_pio (struct parport *port,
+static size_t parport_pc_ecp_write_block_pio(struct parport *port,
 					      const void *buf, size_t length,
 					      int flags)
 {
@@ -830,36 +840,38 @@
 	/* Special case: a timeout of zero means we cannot call schedule().
 	 * Also if O_NONBLOCK is set then use the default implementation. */
 	if (port->physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
-		return parport_ieee1284_ecp_write_data (port, buf,
+		return parport_ieee1284_ecp_write_data(port, buf,
 							length, flags);
 
 	/* Switch to forward mode if necessary. */
 	if (port->physport->ieee1284.phase != IEEE1284_PH_FWD_IDLE) {
 		/* Event 47: Set nInit high. */
-		parport_frob_control (port,
+		parport_frob_control(port,
 				      PARPORT_CONTROL_INIT
 				      | PARPORT_CONTROL_AUTOFD,
 				      PARPORT_CONTROL_INIT
 				      | PARPORT_CONTROL_AUTOFD);
 
 		/* Event 49: PError goes high. */
-		r = parport_wait_peripheral (port,
+		r = parport_wait_peripheral(port,
 					     PARPORT_STATUS_PAPEROUT,
 					     PARPORT_STATUS_PAPEROUT);
 		if (r) {
-			printk (KERN_DEBUG "%s: PError timeout (%d) "
+			printk(KERN_DEBUG "%s: PError timeout (%d) "
 				"in ecp_write_block_pio\n", port->name, r);
 		}
 	}
 
 	/* Set up ECP parallel port mode.*/
-	parport_pc_data_forward (port); /* Must be in PS2 mode */
-	parport_pc_frob_control (port,
+	parport_pc_data_forward(port); /* Must be in PS2 mode */
+	parport_pc_frob_control(port,
 				 PARPORT_CONTROL_STROBE |
 				 PARPORT_CONTROL_AUTOFD,
 				 0);
-	r = change_mode (port, ECR_ECP); /* ECP FIFO */
-	if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", port->name);
+	r = change_mode(port, ECR_ECP); /* ECP FIFO */
+	if (r)
+		printk(KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n",
+								port->name);
 	port->physport->ieee1284.phase = IEEE1284_PH_FWD_DATA;
 
 	/* Write the data to the FIFO. */
@@ -873,55 +885,54 @@
 	expire = jiffies + (priv->fifo_depth * (HZ * 4));
 	do {
 		/* Wait for the FIFO to empty */
-		r = change_mode (port, ECR_PS2);
-		if (r != -EBUSY) {
+		r = change_mode(port, ECR_PS2);
+		if (r != -EBUSY)
 			break;
-		}
-	} while (time_before (jiffies, expire));
+	} while (time_before(jiffies, expire));
 	if (r == -EBUSY) {
 
-		printk (KERN_DEBUG "%s: FIFO is stuck\n", port->name);
+		printk(KERN_DEBUG "%s: FIFO is stuck\n", port->name);
 
 		/* Prevent further data transfer. */
-		frob_set_mode (port, ECR_TST);
+		frob_set_mode(port, ECR_TST);
 
 		/* Adjust for the contents of the FIFO. */
 		for (written -= priv->fifo_depth; ; written++) {
-			if (inb (ECONTROL (port)) & 0x2) {
+			if (inb(ECONTROL(port)) & 0x2) {
 				/* Full up. */
 				break;
 			}
-			outb (0, FIFO (port));
+			outb(0, FIFO(port));
 		}
 
 		/* Reset the FIFO and return to PS2 mode. */
-		frob_set_mode (port, ECR_PS2);
+		frob_set_mode(port, ECR_PS2);
 
 		/* Host transfer recovery. */
-		parport_pc_data_reverse (port); /* Must be in PS2 mode */
-		udelay (5);
-		parport_frob_control (port, PARPORT_CONTROL_INIT, 0);
-		r = parport_wait_peripheral (port, PARPORT_STATUS_PAPEROUT, 0);
+		parport_pc_data_reverse(port); /* Must be in PS2 mode */
+		udelay(5);
+		parport_frob_control(port, PARPORT_CONTROL_INIT, 0);
+		r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, 0);
 		if (r)
-			printk (KERN_DEBUG "%s: PE,1 timeout (%d) "
+			printk(KERN_DEBUG "%s: PE,1 timeout (%d) "
 				"in ecp_write_block_pio\n", port->name, r);
 
-		parport_frob_control (port,
+		parport_frob_control(port,
 				      PARPORT_CONTROL_INIT,
 				      PARPORT_CONTROL_INIT);
-		r = parport_wait_peripheral (port,
+		r = parport_wait_peripheral(port,
 					     PARPORT_STATUS_PAPEROUT,
 					     PARPORT_STATUS_PAPEROUT);
-                if (r)
-                        printk (KERN_DEBUG "%s: PE,2 timeout (%d) "
+		if (r)
+			printk(KERN_DEBUG "%s: PE,2 timeout (%d) "
 				"in ecp_write_block_pio\n", port->name, r);
 	}
 
-	r = parport_wait_peripheral (port,
-				     PARPORT_STATUS_BUSY, 
+	r = parport_wait_peripheral(port,
+				     PARPORT_STATUS_BUSY,
 				     PARPORT_STATUS_BUSY);
-	if(r)
-		printk (KERN_DEBUG
+	if (r)
+		printk(KERN_DEBUG
 			"%s: BUSY timeout (%d) in ecp_write_block_pio\n",
 			port->name, r);
 
@@ -931,7 +942,7 @@
 }
 
 #if 0
-static size_t parport_pc_ecp_read_block_pio (struct parport *port,
+static size_t parport_pc_ecp_read_block_pio(struct parport *port,
 					     void *buf, size_t length,
 					     int flags)
 {
@@ -944,13 +955,13 @@
 	char *bufp = buf;
 
 	port = port->physport;
-DPRINTK (KERN_DEBUG "parport_pc: parport_pc_ecp_read_block_pio\n");
-dump_parport_state ("enter fcn", port);
+	DPRINTK(KERN_DEBUG "parport_pc: parport_pc_ecp_read_block_pio\n");
+	dump_parport_state("enter fcn", port);
 
 	/* Special case: a timeout of zero means we cannot call schedule().
 	 * Also if O_NONBLOCK is set then use the default implementation. */
 	if (port->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
-		return parport_ieee1284_ecp_read_data (port, buf,
+		return parport_ieee1284_ecp_read_data(port, buf,
 						       length, flags);
 
 	if (port->ieee1284.mode == IEEE1284_MODE_ECPRLE) {
@@ -966,173 +977,178 @@
 	 * go through software emulation.  Otherwise we may have to throw
 	 * away data. */
 	if (length < fifofull)
-		return parport_ieee1284_ecp_read_data (port, buf,
+		return parport_ieee1284_ecp_read_data(port, buf,
 						       length, flags);
 
 	if (port->ieee1284.phase != IEEE1284_PH_REV_IDLE) {
 		/* change to reverse-idle phase (must be in forward-idle) */
 
 		/* Event 38: Set nAutoFd low (also make sure nStrobe is high) */
-		parport_frob_control (port,
+		parport_frob_control(port,
 				      PARPORT_CONTROL_AUTOFD
 				      | PARPORT_CONTROL_STROBE,
 				      PARPORT_CONTROL_AUTOFD);
-		parport_pc_data_reverse (port); /* Must be in PS2 mode */
-		udelay (5);
+		parport_pc_data_reverse(port); /* Must be in PS2 mode */
+		udelay(5);
 		/* Event 39: Set nInit low to initiate bus reversal */
-		parport_frob_control (port,
+		parport_frob_control(port,
 				      PARPORT_CONTROL_INIT,
 				      0);
 		/* Event 40: Wait for  nAckReverse (PError) to go low */
-		r = parport_wait_peripheral (port, PARPORT_STATUS_PAPEROUT, 0);
-                if (r) {
-                        printk (KERN_DEBUG "%s: PE timeout Event 40 (%d) "
+		r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, 0);
+		if (r) {
+			printk(KERN_DEBUG "%s: PE timeout Event 40 (%d) "
 				"in ecp_read_block_pio\n", port->name, r);
 			return 0;
 		}
 	}
 
 	/* Set up ECP FIFO mode.*/
-/*	parport_pc_frob_control (port,
+/*	parport_pc_frob_control(port,
 				 PARPORT_CONTROL_STROBE |
 				 PARPORT_CONTROL_AUTOFD,
 				 PARPORT_CONTROL_AUTOFD); */
-	r = change_mode (port, ECR_ECP); /* ECP FIFO */
-	if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", port->name);
+	r = change_mode(port, ECR_ECP); /* ECP FIFO */
+	if (r)
+		printk(KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n",
+								port->name);
 
 	port->ieee1284.phase = IEEE1284_PH_REV_DATA;
 
 	/* the first byte must be collected manually */
-dump_parport_state ("pre 43", port);
+	dump_parport_state("pre 43", port);
 	/* Event 43: Wait for nAck to go low */
-	r = parport_wait_peripheral (port, PARPORT_STATUS_ACK, 0);
+	r = parport_wait_peripheral(port, PARPORT_STATUS_ACK, 0);
 	if (r) {
 		/* timed out while reading -- no data */
-		printk (KERN_DEBUG "PIO read timed out (initial byte)\n");
+		printk(KERN_DEBUG "PIO read timed out (initial byte)\n");
 		goto out_no_data;
 	}
 	/* read byte */
-	*bufp++ = inb (DATA (port));
+	*bufp++ = inb(DATA(port));
 	left--;
-dump_parport_state ("43-44", port);
+	dump_parport_state("43-44", port);
 	/* Event 44: nAutoFd (HostAck) goes high to acknowledge */
-	parport_pc_frob_control (port,
+	parport_pc_frob_control(port,
 				 PARPORT_CONTROL_AUTOFD,
 				 0);
-dump_parport_state ("pre 45", port);
+	dump_parport_state("pre 45", port);
 	/* Event 45: Wait for nAck to go high */
-/*	r = parport_wait_peripheral (port, PARPORT_STATUS_ACK, PARPORT_STATUS_ACK); */
-dump_parport_state ("post 45", port);
-r = 0;
+	/* r = parport_wait_peripheral(port, PARPORT_STATUS_ACK,
+						PARPORT_STATUS_ACK); */
+	dump_parport_state("post 45", port);
+	r = 0;
 	if (r) {
 		/* timed out while waiting for peripheral to respond to ack */
-		printk (KERN_DEBUG "ECP PIO read timed out (waiting for nAck)\n");
+		printk(KERN_DEBUG "ECP PIO read timed out (waiting for nAck)\n");
 
 		/* keep hold of the byte we've got already */
 		goto out_no_data;
 	}
 	/* Event 46: nAutoFd (HostAck) goes low to accept more data */
-	parport_pc_frob_control (port,
+	parport_pc_frob_control(port,
 				 PARPORT_CONTROL_AUTOFD,
 				 PARPORT_CONTROL_AUTOFD);
 
 
-dump_parport_state ("rev idle", port);
+	dump_parport_state("rev idle", port);
 	/* Do the transfer. */
 	while (left > fifofull) {
 		int ret;
 		unsigned long expire = jiffies + port->cad->timeout;
-		unsigned char ecrval = inb (ECONTROL (port));
+		unsigned char ecrval = inb(ECONTROL(port));
 
-		if (need_resched() && time_before (jiffies, expire))
+		if (need_resched() && time_before(jiffies, expire))
 			/* Can't yield the port. */
-			schedule ();
+			schedule();
 
 		/* At this point, the FIFO may already be full. In
-                 * that case ECP is already holding back the
-                 * peripheral (assuming proper design) with a delayed
-                 * handshake.  Work fast to avoid a peripheral
-                 * timeout.  */
+		 * that case ECP is already holding back the
+		 * peripheral (assuming proper design) with a delayed
+		 * handshake.  Work fast to avoid a peripheral
+		 * timeout.  */
 
 		if (ecrval & 0x01) {
 			/* FIFO is empty. Wait for interrupt. */
-dump_parport_state ("FIFO empty", port);
+			dump_parport_state("FIFO empty", port);
 
 			/* Anyone else waiting for the port? */
 			if (port->waithead) {
-				printk (KERN_DEBUG "Somebody wants the port\n");
+				printk(KERN_DEBUG "Somebody wants the port\n");
 				break;
 			}
 
 			/* Clear serviceIntr */
-			ECR_WRITE (port, ecrval & ~(1<<2));
-		false_alarm:
-dump_parport_state ("waiting", port);
-			ret = parport_wait_event (port, HZ);
-DPRINTK (KERN_DEBUG "parport_wait_event returned %d\n", ret);
+			ECR_WRITE(port, ecrval & ~(1<<2));
+false_alarm:
+			dump_parport_state("waiting", port);
+			ret = parport_wait_event(port, HZ);
+			DPRINTK(KERN_DEBUG "parport_wait_event returned %d\n",
+									ret);
 			if (ret < 0)
 				break;
 			ret = 0;
-			if (!time_before (jiffies, expire)) {
+			if (!time_before(jiffies, expire)) {
 				/* Timed out. */
-dump_parport_state ("timeout", port);
-				printk (KERN_DEBUG "PIO read timed out\n");
+				dump_parport_state("timeout", port);
+				printk(KERN_DEBUG "PIO read timed out\n");
 				break;
 			}
-			ecrval = inb (ECONTROL (port));
+			ecrval = inb(ECONTROL(port));
 			if (!(ecrval & (1<<2))) {
 				if (need_resched() &&
-				    time_before (jiffies, expire)) {
-					schedule ();
+				    time_before(jiffies, expire)) {
+					schedule();
 				}
 				goto false_alarm;
 			}
 
 			/* Depending on how the FIFO threshold was
-                         * set, how long interrupt service took, and
-                         * how fast the peripheral is, we might be
-                         * lucky and have a just filled FIFO. */
+			 * set, how long interrupt service took, and
+			 * how fast the peripheral is, we might be
+			 * lucky and have a just filled FIFO. */
 			continue;
 		}
 
 		if (ecrval & 0x02) {
 			/* FIFO is full. */
-dump_parport_state ("FIFO full", port);
-			insb (fifo, bufp, fifo_depth);
+			dump_parport_state("FIFO full", port);
+			insb(fifo, bufp, fifo_depth);
 			bufp += fifo_depth;
 			left -= fifo_depth;
 			continue;
 		}
 
-DPRINTK (KERN_DEBUG "*** ecp_read_block_pio: reading one byte from the FIFO\n");
+		DPRINTK(KERN_DEBUG
+		  "*** ecp_read_block_pio: reading one byte from the FIFO\n");
 
 		/* FIFO not filled.  We will cycle this loop for a while
-                 * and either the peripheral will fill it faster,
-                 * tripping a fast empty with insb, or we empty it. */
-		*bufp++ = inb (fifo);
+		 * and either the peripheral will fill it faster,
+		 * tripping a fast empty with insb, or we empty it. */
+		*bufp++ = inb(fifo);
 		left--;
 	}
 
 	/* scoop up anything left in the FIFO */
-	while (left && !(inb (ECONTROL (port) & 0x01))) {
-		*bufp++ = inb (fifo);
+	while (left && !(inb(ECONTROL(port) & 0x01))) {
+		*bufp++ = inb(fifo);
 		left--;
 	}
 
 	port->ieee1284.phase = IEEE1284_PH_REV_IDLE;
-dump_parport_state ("rev idle2", port);
+	dump_parport_state("rev idle2", port);
 
 out_no_data:
 
 	/* Go to forward idle mode to shut the peripheral up (event 47). */
-	parport_frob_control (port, PARPORT_CONTROL_INIT, PARPORT_CONTROL_INIT);
+	parport_frob_control(port, PARPORT_CONTROL_INIT, PARPORT_CONTROL_INIT);
 
 	/* event 49: PError goes high */
-	r = parport_wait_peripheral (port,
+	r = parport_wait_peripheral(port,
 				     PARPORT_STATUS_PAPEROUT,
 				     PARPORT_STATUS_PAPEROUT);
 	if (r) {
-		printk (KERN_DEBUG
+		printk(KERN_DEBUG
 			"%s: PE timeout FWDIDLE (%d) in ecp_read_block_pio\n",
 			port->name, r);
 	}
@@ -1141,14 +1157,14 @@
 
 	/* Finish up. */
 	{
-		int lost = get_fifo_residue (port);
+		int lost = get_fifo_residue(port);
 		if (lost)
 			/* Shouldn't happen with compliant peripherals. */
-			printk (KERN_DEBUG "%s: DATA LOSS (%d bytes)!\n",
+			printk(KERN_DEBUG "%s: DATA LOSS (%d bytes)!\n",
 				port->name, lost);
 	}
 
-dump_parport_state ("fwd idle", port);
+	dump_parport_state("fwd idle", port);
 	return length - left;
 }
 #endif  /*  0  */
@@ -1164,8 +1180,7 @@
 
 /* GCC is not inlining extern inline function later overwriten to non-inline,
    so we use outlined_ variants here.  */
-static const struct parport_operations parport_pc_ops =
-{
+static const struct parport_operations parport_pc_ops = {
 	.write_data	= parport_pc_write_data,
 	.read_data	= parport_pc_read_data,
 
@@ -1202,88 +1217,107 @@
 };
 
 #ifdef CONFIG_PARPORT_PC_SUPERIO
+
+static struct superio_struct *find_free_superio(void)
+{
+	int i;
+	for (i = 0; i < NR_SUPERIOS; i++)
+		if (superios[i].io == 0)
+			return &superios[i];
+	return NULL;
+}
+
+
 /* Super-IO chipset detection, Winbond, SMSC */
 static void __devinit show_parconfig_smsc37c669(int io, int key)
 {
-	int cr1,cr4,cra,cr23,cr26,cr27,i=0;
-	static const char *const modes[]={
+	int cr1, cr4, cra, cr23, cr26, cr27;
+	struct superio_struct *s;
+
+	static const char *const modes[] = {
 		"SPP and Bidirectional (PS/2)",
 		"EPP and SPP",
 		"ECP",
 		"ECP and EPP" };
 
-	outb(key,io);
-	outb(key,io);
-	outb(1,io);
-	cr1=inb(io+1);
-	outb(4,io);
-	cr4=inb(io+1);
-	outb(0x0a,io);
-	cra=inb(io+1);
-	outb(0x23,io);
-	cr23=inb(io+1);
-	outb(0x26,io);
-	cr26=inb(io+1);
-	outb(0x27,io);
-	cr27=inb(io+1);
-	outb(0xaa,io);
+	outb(key, io);
+	outb(key, io);
+	outb(1, io);
+	cr1 = inb(io + 1);
+	outb(4, io);
+	cr4 = inb(io + 1);
+	outb(0x0a, io);
+	cra = inb(io + 1);
+	outb(0x23, io);
+	cr23 = inb(io + 1);
+	outb(0x26, io);
+	cr26 = inb(io + 1);
+	outb(0x27, io);
+	cr27 = inb(io + 1);
+	outb(0xaa, io);
 
 	if (verbose_probing) {
-		printk (KERN_INFO "SMSC 37c669 LPT Config: cr_1=0x%02x, 4=0x%02x, "
+		printk(KERN_INFO
+			"SMSC 37c669 LPT Config: cr_1=0x%02x, 4=0x%02x, "
 			"A=0x%2x, 23=0x%02x, 26=0x%02x, 27=0x%02x\n",
-			cr1,cr4,cra,cr23,cr26,cr27);
-		
+			cr1, cr4, cra, cr23, cr26, cr27);
+
 		/* The documentation calls DMA and IRQ-Lines by letters, so
 		   the board maker can/will wire them
 		   appropriately/randomly...  G=reserved H=IDE-irq, */
-		printk (KERN_INFO "SMSC LPT Config: io=0x%04x, irq=%c, dma=%c, "
-			"fifo threshold=%d\n", cr23*4,
-			(cr27 &0x0f) ? 'A'-1+(cr27 &0x0f): '-',
-			(cr26 &0x0f) ? 'A'-1+(cr26 &0x0f): '-', cra & 0x0f);
+		printk(KERN_INFO
+	"SMSC LPT Config: io=0x%04x, irq=%c, dma=%c, fifo threshold=%d\n",
+				cr23 * 4,
+				(cr27 & 0x0f) ? 'A' - 1 + (cr27 & 0x0f) : '-',
+				(cr26 & 0x0f) ? 'A' - 1 + (cr26 & 0x0f) : '-',
+				cra & 0x0f);
 		printk(KERN_INFO "SMSC LPT Config: enabled=%s power=%s\n",
-		       (cr23*4 >=0x100) ?"yes":"no", (cr1 & 4) ? "yes" : "no");
-		printk(KERN_INFO "SMSC LPT Config: Port mode=%s, EPP version =%s\n",
-		       (cr1 & 0x08 ) ? "Standard mode only (SPP)" : modes[cr4 & 0x03], 
-		       (cr4 & 0x40) ? "1.7" : "1.9");
+		       (cr23 * 4 >= 0x100) ? "yes" : "no",
+		       (cr1 & 4) ? "yes" : "no");
+		printk(KERN_INFO
+			"SMSC LPT Config: Port mode=%s, EPP version =%s\n",
+				(cr1 & 0x08) ? "Standard mode only (SPP)"
+					      : modes[cr4 & 0x03],
+				(cr4 & 0x40) ? "1.7" : "1.9");
 	}
-		
+
 	/* Heuristics !  BIOS setup for this mainboard device limits
 	   the choices to standard settings, i.e. io-address and IRQ
 	   are related, however DMA can be 1 or 3, assume DMA_A=DMA1,
 	   DMA_C=DMA3 (this is true e.g. for TYAN 1564D Tomcat IV) */
-	if(cr23*4 >=0x100) { /* if active */
-		while((superios[i].io!= 0) && (i<NR_SUPERIOS))
-			i++;
-		if(i==NR_SUPERIOS)
+	if (cr23 * 4 >= 0x100) { /* if active */
+		s = find_free_superio();
+		if (s == NULL)
 			printk(KERN_INFO "Super-IO: too many chips!\n");
 		else {
 			int d;
-			switch (cr23*4) {
-				case 0x3bc:
-					superios[i].io = 0x3bc;
-					superios[i].irq = 7;
-					break;
-				case 0x378:
-					superios[i].io = 0x378;
-					superios[i].irq = 7;
-					break;
-				case 0x278:
-					superios[i].io = 0x278;
-					superios[i].irq = 5;
+			switch (cr23 * 4) {
+			case 0x3bc:
+				s->io = 0x3bc;
+				s->irq = 7;
+				break;
+			case 0x378:
+				s->io = 0x378;
+				s->irq = 7;
+				break;
+			case 0x278:
+				s->io = 0x278;
+				s->irq = 5;
 			}
-			d=(cr26 &0x0f);
-			if((d==1) || (d==3)) 
-				superios[i].dma= d;
+			d = (cr26 & 0x0f);
+			if (d == 1 || d == 3)
+				s->dma = d;
 			else
-				superios[i].dma= PARPORT_DMA_NONE;
+				s->dma = PARPORT_DMA_NONE;
 		}
- 	}
+	}
 }
 
 
 static void __devinit show_parconfig_winbond(int io, int key)
 {
-	int cr30,cr60,cr61,cr70,cr74,crf0,i=0;
+	int cr30, cr60, cr61, cr70, cr74, crf0;
+	struct superio_struct *s;
 	static const char *const modes[] = {
 		"Standard (SPP) and Bidirectional(PS/2)", /* 0 */
 		"EPP-1.9 and SPP",
@@ -1296,110 +1330,134 @@
 	static char *const irqtypes[] = {
 		"pulsed low, high-Z",
 		"follows nACK" };
-		
+
 	/* The registers are called compatible-PnP because the
-           register layout is modelled after ISA-PnP, the access
-           method is just another ... */
-	outb(key,io);
-	outb(key,io);
-	outb(0x07,io);   /* Register 7: Select Logical Device */
-	outb(0x01,io+1); /* LD1 is Parallel Port */
-	outb(0x30,io);
-	cr30=inb(io+1);
-	outb(0x60,io);
-	cr60=inb(io+1);
-	outb(0x61,io);
-	cr61=inb(io+1);
-	outb(0x70,io);
-	cr70=inb(io+1);
-	outb(0x74,io);
-	cr74=inb(io+1);
-	outb(0xf0,io);
-	crf0=inb(io+1);
-	outb(0xaa,io);
+	   register layout is modelled after ISA-PnP, the access
+	   method is just another ... */
+	outb(key, io);
+	outb(key, io);
+	outb(0x07, io);   /* Register 7: Select Logical Device */
+	outb(0x01, io + 1); /* LD1 is Parallel Port */
+	outb(0x30, io);
+	cr30 = inb(io + 1);
+	outb(0x60, io);
+	cr60 = inb(io + 1);
+	outb(0x61, io);
+	cr61 = inb(io + 1);
+	outb(0x70, io);
+	cr70 = inb(io + 1);
+	outb(0x74, io);
+	cr74 = inb(io + 1);
+	outb(0xf0, io);
+	crf0 = inb(io + 1);
+	outb(0xaa, io);
 
 	if (verbose_probing) {
-		printk(KERN_INFO "Winbond LPT Config: cr_30=%02x 60,61=%02x%02x "
-		       "70=%02x 74=%02x, f0=%02x\n", cr30,cr60,cr61,cr70,cr74,crf0);
-		printk(KERN_INFO "Winbond LPT Config: active=%s, io=0x%02x%02x irq=%d, ", 
-		       (cr30 & 0x01) ? "yes":"no", cr60,cr61,cr70&0x0f );
+		printk(KERN_INFO
+    "Winbond LPT Config: cr_30=%02x 60,61=%02x%02x 70=%02x 74=%02x, f0=%02x\n",
+					cr30, cr60, cr61, cr70, cr74, crf0);
+		printk(KERN_INFO "Winbond LPT Config: active=%s, io=0x%02x%02x irq=%d, ",
+		       (cr30 & 0x01) ? "yes" : "no", cr60, cr61, cr70 & 0x0f);
 		if ((cr74 & 0x07) > 3)
 			printk("dma=none\n");
 		else
-			printk("dma=%d\n",cr74 & 0x07);
-		printk(KERN_INFO "Winbond LPT Config: irqtype=%s, ECP fifo threshold=%d\n",
-		       irqtypes[crf0>>7], (crf0>>3)&0x0f);
-		printk(KERN_INFO "Winbond LPT Config: Port mode=%s\n", modes[crf0 & 0x07]);
+			printk("dma=%d\n", cr74 & 0x07);
+		printk(KERN_INFO
+		    "Winbond LPT Config: irqtype=%s, ECP fifo threshold=%d\n",
+					irqtypes[crf0>>7], (crf0>>3)&0x0f);
+		printk(KERN_INFO "Winbond LPT Config: Port mode=%s\n",
+					modes[crf0 & 0x07]);
 	}
 
-	if(cr30 & 0x01) { /* the settings can be interrogated later ... */
-		while((superios[i].io!= 0) && (i<NR_SUPERIOS))
-			i++;
-		if(i==NR_SUPERIOS) 
+	if (cr30 & 0x01) { /* the settings can be interrogated later ... */
+		s = find_free_superio();
+		if (s == NULL)
 			printk(KERN_INFO "Super-IO: too many chips!\n");
 		else {
-			superios[i].io = (cr60<<8)|cr61;
-			superios[i].irq = cr70&0x0f;
-			superios[i].dma = (((cr74 & 0x07) > 3) ?
+			s->io = (cr60 << 8) | cr61;
+			s->irq = cr70 & 0x0f;
+			s->dma = (((cr74 & 0x07) > 3) ?
 					   PARPORT_DMA_NONE : (cr74 & 0x07));
 		}
 	}
 }
 
-static void __devinit decode_winbond(int efer, int key, int devid, int devrev, int oldid)
+static void __devinit decode_winbond(int efer, int key, int devid,
+							int devrev, int oldid)
 {
 	const char *type = "unknown";
-	int id,progif=2;
+	int id, progif = 2;
 
 	if (devid == devrev)
 		/* simple heuristics, we happened to read some
-                   non-winbond register */
+		   non-winbond register */
 		return;
 
-	id=(devid<<8) | devrev;
+	id = (devid << 8) | devrev;
 
 	/* Values are from public data sheets pdf files, I can just
-           confirm 83977TF is correct :-) */
-	if      (id == 0x9771) type="83977F/AF";
-	else if (id == 0x9773) type="83977TF / SMSC 97w33x/97w34x";
-	else if (id == 0x9774) type="83977ATF";
-	else if ((id & ~0x0f) == 0x5270) type="83977CTF / SMSC 97w36x";
-	else if ((id & ~0x0f) == 0x52f0) type="83977EF / SMSC 97w35x";
-	else if ((id & ~0x0f) == 0x5210) type="83627";
-	else if ((id & ~0x0f) == 0x6010) type="83697HF";
-	else if ((oldid &0x0f ) == 0x0a) { type="83877F"; progif=1;}
-	else if ((oldid &0x0f ) == 0x0b) { type="83877AF"; progif=1;}
-	else if ((oldid &0x0f ) == 0x0c) { type="83877TF"; progif=1;}
-	else if ((oldid &0x0f ) == 0x0d) { type="83877ATF"; progif=1;}
-	else progif=0;
+	   confirm 83977TF is correct :-) */
+	if (id == 0x9771)
+		type = "83977F/AF";
+	else if (id == 0x9773)
+		type = "83977TF / SMSC 97w33x/97w34x";
+	else if (id == 0x9774)
+		type = "83977ATF";
+	else if ((id & ~0x0f) == 0x5270)
+		type = "83977CTF / SMSC 97w36x";
+	else if ((id & ~0x0f) == 0x52f0)
+		type = "83977EF / SMSC 97w35x";
+	else if ((id & ~0x0f) == 0x5210)
+		type = "83627";
+	else if ((id & ~0x0f) == 0x6010)
+		type = "83697HF";
+	else if ((oldid & 0x0f) == 0x0a) {
+		type = "83877F";
+		progif = 1;
+	} else if ((oldid & 0x0f) == 0x0b) {
+		type = "83877AF";
+		progif = 1;
+	} else if ((oldid & 0x0f) == 0x0c) {
+		type = "83877TF";
+		progif = 1;
+	} else if ((oldid & 0x0f) == 0x0d) {
+		type = "83877ATF";
+		progif = 1;
+	} else
+		progif = 0;
 
 	if (verbose_probing)
 		printk(KERN_INFO "Winbond chip at EFER=0x%x key=0x%02x "
-		       "devid=%02x devrev=%02x oldid=%02x type=%s\n", 
+		       "devid=%02x devrev=%02x oldid=%02x type=%s\n",
 		       efer, key, devid, devrev, oldid, type);
 
 	if (progif == 2)
-		show_parconfig_winbond(efer,key);
+		show_parconfig_winbond(efer, key);
 }
 
 static void __devinit decode_smsc(int efer, int key, int devid, int devrev)
 {
-        const char *type = "unknown";
+	const char *type = "unknown";
 	void (*func)(int io, int key);
-        int id;
+	int id;
 
-        if (devid == devrev)
+	if (devid == devrev)
 		/* simple heuristics, we happened to read some
-                   non-smsc register */
+		   non-smsc register */
 		return;
 
-	func=NULL;
-        id=(devid<<8) | devrev;
+	func = NULL;
+	id = (devid << 8) | devrev;
 
-	if	(id==0x0302) {type="37c669"; func=show_parconfig_smsc37c669;}
-	else if	(id==0x6582) type="37c665IR";
-	else if	(devid==0x65) type="37c665GT";
-	else if	(devid==0x66) type="37c666GT";
+	if (id == 0x0302) {
+		type = "37c669";
+		func = show_parconfig_smsc37c669;
+	} else if (id == 0x6582)
+		type = "37c665IR";
+	else if	(devid == 0x65)
+		type = "37c665GT";
+	else if	(devid == 0x66)
+		type = "37c666GT";
 
 	if (verbose_probing)
 		printk(KERN_INFO "SMSC chip at EFER=0x%x "
@@ -1407,138 +1465,138 @@
 		       efer, key, devid, devrev, type);
 
 	if (func)
-		func(efer,key);
+		func(efer, key);
 }
 
 
 static void __devinit winbond_check(int io, int key)
 {
-	int devid,devrev,oldid,x_devid,x_devrev,x_oldid;
+	int devid, devrev, oldid, x_devid, x_devrev, x_oldid;
 
 	if (!request_region(io, 3, __func__))
 		return;
 
 	/* First probe without key */
-	outb(0x20,io);
-	x_devid=inb(io+1);
-	outb(0x21,io);
-	x_devrev=inb(io+1);
-	outb(0x09,io);
-	x_oldid=inb(io+1);
+	outb(0x20, io);
+	x_devid = inb(io + 1);
+	outb(0x21, io);
+	x_devrev = inb(io + 1);
+	outb(0x09, io);
+	x_oldid = inb(io + 1);
 
-	outb(key,io);
-	outb(key,io);     /* Write Magic Sequence to EFER, extended
-                             funtion enable register */
-	outb(0x20,io);    /* Write EFIR, extended function index register */
-	devid=inb(io+1);  /* Read EFDR, extended function data register */
-	outb(0x21,io);
-	devrev=inb(io+1);
-	outb(0x09,io);
-	oldid=inb(io+1);
-	outb(0xaa,io);    /* Magic Seal */
+	outb(key, io);
+	outb(key, io);     /* Write Magic Sequence to EFER, extended
+			      funtion enable register */
+	outb(0x20, io);    /* Write EFIR, extended function index register */
+	devid = inb(io + 1);  /* Read EFDR, extended function data register */
+	outb(0x21, io);
+	devrev = inb(io + 1);
+	outb(0x09, io);
+	oldid = inb(io + 1);
+	outb(0xaa, io);    /* Magic Seal */
 
 	if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid))
 		goto out; /* protection against false positives */
 
-	decode_winbond(io,key,devid,devrev,oldid);
+	decode_winbond(io, key, devid, devrev, oldid);
 out:
 	release_region(io, 3);
 }
 
-static void __devinit winbond_check2(int io,int key)
+static void __devinit winbond_check2(int io, int key)
 {
-        int devid,devrev,oldid,x_devid,x_devrev,x_oldid;
+	int devid, devrev, oldid, x_devid, x_devrev, x_oldid;
 
 	if (!request_region(io, 3, __func__))
 		return;
 
 	/* First probe without the key */
-	outb(0x20,io+2);
-	x_devid=inb(io+2);
-	outb(0x21,io+1);
-	x_devrev=inb(io+2);
-	outb(0x09,io+1);
-	x_oldid=inb(io+2);
+	outb(0x20, io + 2);
+	x_devid = inb(io + 2);
+	outb(0x21, io + 1);
+	x_devrev = inb(io + 2);
+	outb(0x09, io + 1);
+	x_oldid = inb(io + 2);
 
-        outb(key,io);     /* Write Magic Byte to EFER, extended
-                             funtion enable register */
-        outb(0x20,io+2);  /* Write EFIR, extended function index register */
-        devid=inb(io+2);  /* Read EFDR, extended function data register */
-        outb(0x21,io+1);
-        devrev=inb(io+2);
-        outb(0x09,io+1);
-        oldid=inb(io+2);
-        outb(0xaa,io);    /* Magic Seal */
+	outb(key, io);     /* Write Magic Byte to EFER, extended
+			      funtion enable register */
+	outb(0x20, io + 2);  /* Write EFIR, extended function index register */
+	devid = inb(io + 2);  /* Read EFDR, extended function data register */
+	outb(0x21, io + 1);
+	devrev = inb(io + 2);
+	outb(0x09, io + 1);
+	oldid = inb(io + 2);
+	outb(0xaa, io);    /* Magic Seal */
 
-	if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid))
+	if (x_devid == devid && x_devrev == devrev && x_oldid == oldid)
 		goto out; /* protection against false positives */
 
-	decode_winbond(io,key,devid,devrev,oldid);
+	decode_winbond(io, key, devid, devrev, oldid);
 out:
 	release_region(io, 3);
 }
 
 static void __devinit smsc_check(int io, int key)
 {
-        int id,rev,oldid,oldrev,x_id,x_rev,x_oldid,x_oldrev;
+	int id, rev, oldid, oldrev, x_id, x_rev, x_oldid, x_oldrev;
 
 	if (!request_region(io, 3, __func__))
 		return;
 
 	/* First probe without the key */
-	outb(0x0d,io);
-	x_oldid=inb(io+1);
-	outb(0x0e,io);
-	x_oldrev=inb(io+1);
-	outb(0x20,io);
-	x_id=inb(io+1);
-	outb(0x21,io);
-	x_rev=inb(io+1);
+	outb(0x0d, io);
+	x_oldid = inb(io + 1);
+	outb(0x0e, io);
+	x_oldrev = inb(io + 1);
+	outb(0x20, io);
+	x_id = inb(io + 1);
+	outb(0x21, io);
+	x_rev = inb(io + 1);
 
-        outb(key,io);
-        outb(key,io);     /* Write Magic Sequence to EFER, extended
-                             funtion enable register */
-        outb(0x0d,io);    /* Write EFIR, extended function index register */
-        oldid=inb(io+1);  /* Read EFDR, extended function data register */
-        outb(0x0e,io);
-        oldrev=inb(io+1);
-	outb(0x20,io);
-	id=inb(io+1);
-	outb(0x21,io);
-	rev=inb(io+1);
-        outb(0xaa,io);    /* Magic Seal */
+	outb(key, io);
+	outb(key, io);     /* Write Magic Sequence to EFER, extended
+			      funtion enable register */
+	outb(0x0d, io);    /* Write EFIR, extended function index register */
+	oldid = inb(io + 1);  /* Read EFDR, extended function data register */
+	outb(0x0e, io);
+	oldrev = inb(io + 1);
+	outb(0x20, io);
+	id = inb(io + 1);
+	outb(0x21, io);
+	rev = inb(io + 1);
+	outb(0xaa, io);    /* Magic Seal */
 
-	if ((x_id == id) && (x_oldrev == oldrev) &&
-	    (x_oldid == oldid) && (x_rev == rev))
+	if (x_id == id && x_oldrev == oldrev &&
+	    x_oldid == oldid && x_rev == rev)
 		goto out; /* protection against false positives */
 
-        decode_smsc(io,key,oldid,oldrev);
+	decode_smsc(io, key, oldid, oldrev);
 out:
 	release_region(io, 3);
 }
 
 
-static void __devinit detect_and_report_winbond (void)
-{ 
+static void __devinit detect_and_report_winbond(void)
+{
 	if (verbose_probing)
 		printk(KERN_DEBUG "Winbond Super-IO detection, now testing ports 3F0,370,250,4E,2E ...\n");
-	winbond_check(0x3f0,0x87);
-	winbond_check(0x370,0x87);
-	winbond_check(0x2e ,0x87);
-	winbond_check(0x4e ,0x87);
-	winbond_check(0x3f0,0x86);
-	winbond_check2(0x250,0x88); 
-	winbond_check2(0x250,0x89);
+	winbond_check(0x3f0, 0x87);
+	winbond_check(0x370, 0x87);
+	winbond_check(0x2e , 0x87);
+	winbond_check(0x4e , 0x87);
+	winbond_check(0x3f0, 0x86);
+	winbond_check2(0x250, 0x88);
+	winbond_check2(0x250, 0x89);
 }
 
-static void __devinit detect_and_report_smsc (void)
+static void __devinit detect_and_report_smsc(void)
 {
 	if (verbose_probing)
 		printk(KERN_DEBUG "SMSC Super-IO detection, now testing Ports 2F0, 370 ...\n");
-	smsc_check(0x3f0,0x55);
-	smsc_check(0x370,0x55);
-	smsc_check(0x3f0,0x44);
-	smsc_check(0x370,0x44);
+	smsc_check(0x3f0, 0x55);
+	smsc_check(0x370, 0x55);
+	smsc_check(0x3f0, 0x44);
+	smsc_check(0x370, 0x44);
 }
 
 static void __devinit detect_and_report_it87(void)
@@ -1573,34 +1631,39 @@
 }
 #endif /* CONFIG_PARPORT_PC_SUPERIO */
 
-static int get_superio_dma (struct parport *p)
+static struct superio_struct *find_superio(struct parport *p)
 {
-	int i=0;
-	while( (superios[i].io != p->base) && (i<NR_SUPERIOS))
-		i++;
-	if (i!=NR_SUPERIOS)
-		return superios[i].dma;
+	int i;
+	for (i = 0; i < NR_SUPERIOS; i++)
+		if (superios[i].io != p->base)
+			return &superios[i];
+	return NULL;
+}
+
+static int get_superio_dma(struct parport *p)
+{
+	struct superio_struct *s = find_superio(p);
+	if (s)
+		return s->dma;
 	return PARPORT_DMA_NONE;
 }
 
-static int get_superio_irq (struct parport *p)
+static int get_superio_irq(struct parport *p)
 {
-	int i=0;
-        while( (superios[i].io != p->base) && (i<NR_SUPERIOS))
-                i++;
-        if (i!=NR_SUPERIOS)
-                return superios[i].irq;
-        return PARPORT_IRQ_NONE;
+	struct superio_struct *s = find_superio(p);
+	if (s)
+		return s->irq;
+	return PARPORT_IRQ_NONE;
 }
-	
+
 
 /* --- Mode detection ------------------------------------- */
 
 /*
  * Checks for port existence, all ports support SPP MODE
- * Returns: 
+ * Returns:
  *         0           :  No parallel port at this address
- *  PARPORT_MODE_PCSPP :  SPP port detected 
+ *  PARPORT_MODE_PCSPP :  SPP port detected
  *                        (if the user specified an ioport himself,
  *                         this shall always be the case!)
  *
@@ -1610,7 +1673,7 @@
 	unsigned char r, w;
 
 	/*
-	 * first clear an eventually pending EPP timeout 
+	 * first clear an eventually pending EPP timeout
 	 * I (sailer@ife.ee.ethz.ch) have an SMSC chipset
 	 * that does not even respond to SPP cycles if an EPP
 	 * timeout is pending
@@ -1619,19 +1682,19 @@
 
 	/* Do a simple read-write test to make sure the port exists. */
 	w = 0xc;
-	outb (w, CONTROL (pb));
+	outb(w, CONTROL(pb));
 
 	/* Is there a control register that we can read from?  Some
 	 * ports don't allow reads, so read_control just returns a
 	 * software copy. Some ports _do_ allow reads, so bypass the
 	 * software copy here.  In addition, some bits aren't
 	 * writable. */
-	r = inb (CONTROL (pb));
+	r = inb(CONTROL(pb));
 	if ((r & 0xf) == w) {
 		w = 0xe;
-		outb (w, CONTROL (pb));
-		r = inb (CONTROL (pb));
-		outb (0xc, CONTROL (pb));
+		outb(w, CONTROL(pb));
+		r = inb(CONTROL(pb));
+		outb(0xc, CONTROL(pb));
 		if ((r & 0xf) == w)
 			return PARPORT_MODE_PCSPP;
 	}
@@ -1639,18 +1702,18 @@
 	if (user_specified)
 		/* That didn't work, but the user thinks there's a
 		 * port here. */
-		printk (KERN_INFO "parport 0x%lx (WARNING): CTR: "
+		printk(KERN_INFO "parport 0x%lx (WARNING): CTR: "
 			"wrote 0x%02x, read 0x%02x\n", pb->base, w, r);
 
 	/* Try the data register.  The data lines aren't tri-stated at
 	 * this stage, so we expect back what we wrote. */
 	w = 0xaa;
-	parport_pc_write_data (pb, w);
-	r = parport_pc_read_data (pb);
+	parport_pc_write_data(pb, w);
+	r = parport_pc_read_data(pb);
 	if (r == w) {
 		w = 0x55;
-		parport_pc_write_data (pb, w);
-		r = parport_pc_read_data (pb);
+		parport_pc_write_data(pb, w);
+		r = parport_pc_read_data(pb);
 		if (r == w)
 			return PARPORT_MODE_PCSPP;
 	}
@@ -1658,9 +1721,9 @@
 	if (user_specified) {
 		/* Didn't work, but the user is convinced this is the
 		 * place. */
-		printk (KERN_INFO "parport 0x%lx (WARNING): DATA: "
+		printk(KERN_INFO "parport 0x%lx (WARNING): DATA: "
 			"wrote 0x%02x, read 0x%02x\n", pb->base, w, r);
-		printk (KERN_INFO "parport 0x%lx: You gave this address, "
+		printk(KERN_INFO "parport 0x%lx: You gave this address, "
 			"but there is probably no parallel port there!\n",
 			pb->base);
 	}
@@ -1691,33 +1754,33 @@
 	struct parport_pc_private *priv = pb->private_data;
 	unsigned char r = 0xc;
 
-	outb (r, CONTROL (pb));
-	if ((inb (ECONTROL (pb)) & 0x3) == (r & 0x3)) {
-		outb (r ^ 0x2, CONTROL (pb)); /* Toggle bit 1 */
+	outb(r, CONTROL(pb));
+	if ((inb(ECONTROL(pb)) & 0x3) == (r & 0x3)) {
+		outb(r ^ 0x2, CONTROL(pb)); /* Toggle bit 1 */
 
-		r = inb (CONTROL (pb));
-		if ((inb (ECONTROL (pb)) & 0x2) == (r & 0x2))
+		r = inb(CONTROL(pb));
+		if ((inb(ECONTROL(pb)) & 0x2) == (r & 0x2))
 			goto no_reg; /* Sure that no ECR register exists */
 	}
-	
-	if ((inb (ECONTROL (pb)) & 0x3 ) != 0x1)
+
+	if ((inb(ECONTROL(pb)) & 0x3) != 0x1)
 		goto no_reg;
 
-	ECR_WRITE (pb, 0x34);
-	if (inb (ECONTROL (pb)) != 0x35)
+	ECR_WRITE(pb, 0x34);
+	if (inb(ECONTROL(pb)) != 0x35)
 		goto no_reg;
 
 	priv->ecr = 1;
-	outb (0xc, CONTROL (pb));
-	
+	outb(0xc, CONTROL(pb));
+
 	/* Go to mode 000 */
-	frob_set_mode (pb, ECR_SPP);
+	frob_set_mode(pb, ECR_SPP);
 
 	return 1;
 
  no_reg:
-	outb (0xc, CONTROL (pb));
-	return 0; 
+	outb(0xc, CONTROL(pb));
+	return 0;
 }
 
 #ifdef CONFIG_PARPORT_1284
@@ -1727,7 +1790,7 @@
  * allows us to read data from the data lines.  In theory we would get back
  * 0xff but any peripheral attached to the port may drag some or all of the
  * lines down to zero.  So if we get back anything that isn't the contents
- * of the data register we deem PS/2 support to be present. 
+ * of the data register we deem PS/2 support to be present.
  *
  * Some SPP ports have "half PS/2" ability - you can't turn off the line
  * drivers, but an external peripheral with sufficiently beefy drivers of
@@ -1735,26 +1798,28 @@
  * where they can then be read back as normal.  Ports with this property
  * and the right type of device attached are likely to fail the SPP test,
  * (as they will appear to have stuck bits) and so the fact that they might
- * be misdetected here is rather academic. 
+ * be misdetected here is rather academic.
  */
 
 static int parport_PS2_supported(struct parport *pb)
 {
 	int ok = 0;
-  
+
 	clear_epp_timeout(pb);
 
 	/* try to tri-state the buffer */
-	parport_pc_data_reverse (pb);
-	
+	parport_pc_data_reverse(pb);
+
 	parport_pc_write_data(pb, 0x55);
-	if (parport_pc_read_data(pb) != 0x55) ok++;
+	if (parport_pc_read_data(pb) != 0x55)
+		ok++;
 
 	parport_pc_write_data(pb, 0xaa);
-	if (parport_pc_read_data(pb) != 0xaa) ok++;
+	if (parport_pc_read_data(pb) != 0xaa)
+		ok++;
 
 	/* cancel input mode */
-	parport_pc_data_forward (pb);
+	parport_pc_data_forward(pb);
 
 	if (ok) {
 		pb->modes |= PARPORT_MODE_TRISTATE;
@@ -1773,68 +1838,68 @@
 	int config, configb;
 	int pword;
 	struct parport_pc_private *priv = pb->private_data;
-	/* Translate ECP intrLine to ISA irq value */	
-	static const int intrline[]= { 0, 7, 9, 10, 11, 14, 15, 5 }; 
+	/* Translate ECP intrLine to ISA irq value */
+	static const int intrline[] = { 0, 7, 9, 10, 11, 14, 15, 5 };
 
 	/* If there is no ECR, we have no hope of supporting ECP. */
 	if (!priv->ecr)
 		return 0;
 
 	/* Find out FIFO depth */
-	ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */
-	ECR_WRITE (pb, ECR_TST << 5); /* TEST FIFO */
-	for (i=0; i < 1024 && !(inb (ECONTROL (pb)) & 0x02); i++)
-		outb (0xaa, FIFO (pb));
+	ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */
+	ECR_WRITE(pb, ECR_TST << 5); /* TEST FIFO */
+	for (i = 0; i < 1024 && !(inb(ECONTROL(pb)) & 0x02); i++)
+		outb(0xaa, FIFO(pb));
 
 	/*
 	 * Using LGS chipset it uses ECR register, but
 	 * it doesn't support ECP or FIFO MODE
 	 */
 	if (i == 1024) {
-		ECR_WRITE (pb, ECR_SPP << 5);
+		ECR_WRITE(pb, ECR_SPP << 5);
 		return 0;
 	}
 
 	priv->fifo_depth = i;
 	if (verbose_probing)
-		printk (KERN_DEBUG "0x%lx: FIFO is %d bytes\n", pb->base, i);
+		printk(KERN_DEBUG "0x%lx: FIFO is %d bytes\n", pb->base, i);
 
 	/* Find out writeIntrThreshold */
-	frob_econtrol (pb, 1<<2, 1<<2);
-	frob_econtrol (pb, 1<<2, 0);
+	frob_econtrol(pb, 1<<2, 1<<2);
+	frob_econtrol(pb, 1<<2, 0);
 	for (i = 1; i <= priv->fifo_depth; i++) {
-		inb (FIFO (pb));
-		udelay (50);
-		if (inb (ECONTROL (pb)) & (1<<2))
+		inb(FIFO(pb));
+		udelay(50);
+		if (inb(ECONTROL(pb)) & (1<<2))
 			break;
 	}
 
 	if (i <= priv->fifo_depth) {
 		if (verbose_probing)
-			printk (KERN_DEBUG "0x%lx: writeIntrThreshold is %d\n",
+			printk(KERN_DEBUG "0x%lx: writeIntrThreshold is %d\n",
 				pb->base, i);
 	} else
 		/* Number of bytes we know we can write if we get an
-                   interrupt. */
+		   interrupt. */
 		i = 0;
 
 	priv->writeIntrThreshold = i;
 
 	/* Find out readIntrThreshold */
-	frob_set_mode (pb, ECR_PS2); /* Reset FIFO and enable PS2 */
-	parport_pc_data_reverse (pb); /* Must be in PS2 mode */
-	frob_set_mode (pb, ECR_TST); /* Test FIFO */
-	frob_econtrol (pb, 1<<2, 1<<2);
-	frob_econtrol (pb, 1<<2, 0);
+	frob_set_mode(pb, ECR_PS2); /* Reset FIFO and enable PS2 */
+	parport_pc_data_reverse(pb); /* Must be in PS2 mode */
+	frob_set_mode(pb, ECR_TST); /* Test FIFO */
+	frob_econtrol(pb, 1<<2, 1<<2);
+	frob_econtrol(pb, 1<<2, 0);
 	for (i = 1; i <= priv->fifo_depth; i++) {
-		outb (0xaa, FIFO (pb));
-		if (inb (ECONTROL (pb)) & (1<<2))
+		outb(0xaa, FIFO(pb));
+		if (inb(ECONTROL(pb)) & (1<<2))
 			break;
 	}
 
 	if (i <= priv->fifo_depth) {
 		if (verbose_probing)
-			printk (KERN_INFO "0x%lx: readIntrThreshold is %d\n",
+			printk(KERN_INFO "0x%lx: readIntrThreshold is %d\n",
 				pb->base, i);
 	} else
 		/* Number of bytes we can read if we get an interrupt. */
@@ -1842,23 +1907,23 @@
 
 	priv->readIntrThreshold = i;
 
-	ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */
-	ECR_WRITE (pb, 0xf4); /* Configuration mode */
-	config = inb (CONFIGA (pb));
+	ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */
+	ECR_WRITE(pb, 0xf4); /* Configuration mode */
+	config = inb(CONFIGA(pb));
 	pword = (config >> 4) & 0x7;
 	switch (pword) {
 	case 0:
 		pword = 2;
-		printk (KERN_WARNING "0x%lx: Unsupported pword size!\n",
+		printk(KERN_WARNING "0x%lx: Unsupported pword size!\n",
 			pb->base);
 		break;
 	case 2:
 		pword = 4;
-		printk (KERN_WARNING "0x%lx: Unsupported pword size!\n",
+		printk(KERN_WARNING "0x%lx: Unsupported pword size!\n",
 			pb->base);
 		break;
 	default:
-		printk (KERN_WARNING "0x%lx: Unknown implementation ID\n",
+		printk(KERN_WARNING "0x%lx: Unknown implementation ID\n",
 			pb->base);
 		/* Assume 1 */
 	case 1:
@@ -1867,28 +1932,29 @@
 	priv->pword = pword;
 
 	if (verbose_probing) {
-		printk (KERN_DEBUG "0x%lx: PWord is %d bits\n", pb->base, 8 * pword);
-		
-		printk (KERN_DEBUG "0x%lx: Interrupts are ISA-%s\n", pb->base,
+		printk(KERN_DEBUG "0x%lx: PWord is %d bits\n",
+			pb->base, 8 * pword);
+
+		printk(KERN_DEBUG "0x%lx: Interrupts are ISA-%s\n", pb->base,
 			config & 0x80 ? "Level" : "Pulses");
 
-		configb = inb (CONFIGB (pb));
-		printk (KERN_DEBUG "0x%lx: ECP port cfgA=0x%02x cfgB=0x%02x\n",
+		configb = inb(CONFIGB(pb));
+		printk(KERN_DEBUG "0x%lx: ECP port cfgA=0x%02x cfgB=0x%02x\n",
 			pb->base, config, configb);
-		printk (KERN_DEBUG "0x%lx: ECP settings irq=", pb->base);
-		if ((configb >>3) & 0x07)
-			printk("%d",intrline[(configb >>3) & 0x07]);
+		printk(KERN_DEBUG "0x%lx: ECP settings irq=", pb->base);
+		if ((configb >> 3) & 0x07)
+			printk("%d", intrline[(configb >> 3) & 0x07]);
 		else
 			printk("<none or set by other means>");
-		printk (" dma=");
-		if( (configb & 0x03 ) == 0x00)
+		printk(" dma=");
+		if ((configb & 0x03) == 0x00)
 			printk("<none or set by other means>\n");
 		else
-			printk("%d\n",configb & 0x07);
+			printk("%d\n", configb & 0x07);
 	}
 
 	/* Go back to mode 000 */
-	frob_set_mode (pb, ECR_SPP);
+	frob_set_mode(pb, ECR_SPP);
 
 	return 1;
 }
@@ -1903,10 +1969,10 @@
 	if (!priv->ecr)
 		return 0;
 
-	oecr = inb (ECONTROL (pb));
-	ECR_WRITE (pb, ECR_PS2 << 5);
+	oecr = inb(ECONTROL(pb));
+	ECR_WRITE(pb, ECR_PS2 << 5);
 	result = parport_PS2_supported(pb);
-	ECR_WRITE (pb, oecr);
+	ECR_WRITE(pb, oecr);
 	return result;
 }
 
@@ -1930,16 +1996,15 @@
 	 */
 
 	/* If EPP timeout bit clear then EPP available */
-	if (!clear_epp_timeout(pb)) {
+	if (!clear_epp_timeout(pb))
 		return 0;  /* No way to clear timeout */
-	}
 
 	/* Check for Intel bug. */
 	if (priv->ecr) {
 		unsigned char i;
 		for (i = 0x00; i < 0x80; i += 0x20) {
-			ECR_WRITE (pb, i);
-			if (clear_epp_timeout (pb)) {
+			ECR_WRITE(pb, i);
+			if (clear_epp_timeout(pb)) {
 				/* Phony EPP in ECP. */
 				return 0;
 			}
@@ -1963,17 +2028,16 @@
 	int result;
 	unsigned char oecr;
 
-	if (!priv->ecr) {
+	if (!priv->ecr)
 		return 0;
-	}
 
-	oecr = inb (ECONTROL (pb));
+	oecr = inb(ECONTROL(pb));
 	/* Search for SMC style EPP+ECP mode */
-	ECR_WRITE (pb, 0x80);
-	outb (0x04, CONTROL (pb));
+	ECR_WRITE(pb, 0x80);
+	outb(0x04, CONTROL(pb));
 	result = parport_EPP_supported(pb);
 
-	ECR_WRITE (pb, oecr);
+	ECR_WRITE(pb, oecr);
 
 	if (result) {
 		/* Set up access functions to use ECP+EPP hardware. */
@@ -1991,11 +2055,25 @@
 /* Don't bother probing for modes we know we won't use. */
 static int __devinit parport_PS2_supported(struct parport *pb) { return 0; }
 #ifdef CONFIG_PARPORT_PC_FIFO
-static int parport_ECP_supported(struct parport *pb) { return 0; }
+static int parport_ECP_supported(struct parport *pb)
+{
+	return 0;
+}
 #endif
-static int __devinit parport_EPP_supported(struct parport *pb) { return 0; }
-static int __devinit parport_ECPEPP_supported(struct parport *pb){return 0;}
-static int __devinit parport_ECPPS2_supported(struct parport *pb){return 0;}
+static int __devinit parport_EPP_supported(struct parport *pb)
+{
+	return 0;
+}
+
+static int __devinit parport_ECPEPP_supported(struct parport *pb)
+{
+	return 0;
+}
+
+static int __devinit parport_ECPPS2_supported(struct parport *pb)
+{
+	return 0;
+}
 
 #endif /* No IEEE 1284 support */
 
@@ -2005,17 +2083,17 @@
 static int programmable_irq_support(struct parport *pb)
 {
 	int irq, intrLine;
-	unsigned char oecr = inb (ECONTROL (pb));
+	unsigned char oecr = inb(ECONTROL(pb));
 	static const int lookup[8] = {
 		PARPORT_IRQ_NONE, 7, 9, 10, 11, 14, 15, 5
 	};
 
-	ECR_WRITE (pb, ECR_CNF << 5); /* Configuration MODE */
+	ECR_WRITE(pb, ECR_CNF << 5); /* Configuration MODE */
 
-	intrLine = (inb (CONFIGB (pb)) >> 3) & 0x07;
+	intrLine = (inb(CONFIGB(pb)) >> 3) & 0x07;
 	irq = lookup[intrLine];
 
-	ECR_WRITE (pb, oecr);
+	ECR_WRITE(pb, oecr);
 	return irq;
 }
 
@@ -2025,17 +2103,17 @@
 	unsigned long irqs;
 
 	irqs = probe_irq_on();
-		
-	ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */
-	ECR_WRITE (pb, (ECR_TST << 5) | 0x04);
-	ECR_WRITE (pb, ECR_TST << 5);
+
+	ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */
+	ECR_WRITE(pb, (ECR_TST << 5) | 0x04);
+	ECR_WRITE(pb, ECR_TST << 5);
 
 	/* If Full FIFO sure that writeIntrThreshold is generated */
-	for (i=0; i < 1024 && !(inb (ECONTROL (pb)) & 0x02) ; i++) 
-		outb (0xaa, FIFO (pb));
-		
+	for (i = 0; i < 1024 && !(inb(ECONTROL(pb)) & 0x02) ; i++)
+		outb(0xaa, FIFO(pb));
+
 	pb->irq = probe_irq_off(irqs);
-	ECR_WRITE (pb, ECR_SPP << 5);
+	ECR_WRITE(pb, ECR_SPP << 5);
 
 	if (pb->irq <= 0)
 		pb->irq = PARPORT_IRQ_NONE;
@@ -2045,7 +2123,7 @@
 
 /*
  * This detection seems that only works in National Semiconductors
- * This doesn't work in SMC, LGS, and Winbond 
+ * This doesn't work in SMC, LGS, and Winbond
  */
 static int irq_probe_EPP(struct parport *pb)
 {
@@ -2056,16 +2134,16 @@
 	unsigned char oecr;
 
 	if (pb->modes & PARPORT_MODE_PCECR)
-		oecr = inb (ECONTROL (pb));
+		oecr = inb(ECONTROL(pb));
 
 	irqs = probe_irq_on();
 
 	if (pb->modes & PARPORT_MODE_PCECR)
-		frob_econtrol (pb, 0x10, 0x10);
-	
+		frob_econtrol(pb, 0x10, 0x10);
+
 	clear_epp_timeout(pb);
-	parport_pc_frob_control (pb, 0x20, 0x20);
-	parport_pc_frob_control (pb, 0x10, 0x10);
+	parport_pc_frob_control(pb, 0x20, 0x20);
+	parport_pc_frob_control(pb, 0x10, 0x10);
 	clear_epp_timeout(pb);
 
 	/* Device isn't expecting an EPP read
@@ -2074,9 +2152,9 @@
 	parport_pc_read_epp(pb);
 	udelay(20);
 
-	pb->irq = probe_irq_off (irqs);
+	pb->irq = probe_irq_off(irqs);
 	if (pb->modes & PARPORT_MODE_PCECR)
-		ECR_WRITE (pb, oecr);
+		ECR_WRITE(pb, oecr);
 	parport_pc_write_control(pb, 0xc);
 
 	if (pb->irq <= 0)
@@ -2133,28 +2211,28 @@
 /* --- DMA detection -------------------------------------- */
 
 /* Only if chipset conforms to ECP ISA Interface Standard */
-static int programmable_dma_support (struct parport *p)
+static int programmable_dma_support(struct parport *p)
 {
-	unsigned char oecr = inb (ECONTROL (p));
+	unsigned char oecr = inb(ECONTROL(p));
 	int dma;
 
-	frob_set_mode (p, ECR_CNF);
-	
-	dma = inb (CONFIGB(p)) & 0x07;
+	frob_set_mode(p, ECR_CNF);
+
+	dma = inb(CONFIGB(p)) & 0x07;
 	/* 000: Indicates jumpered 8-bit DMA if read-only.
 	   100: Indicates jumpered 16-bit DMA if read-only. */
 	if ((dma & 0x03) == 0)
 		dma = PARPORT_DMA_NONE;
 
-	ECR_WRITE (p, oecr);
+	ECR_WRITE(p, oecr);
 	return dma;
 }
 
-static int parport_dma_probe (struct parport *p)
+static int parport_dma_probe(struct parport *p)
 {
 	const struct parport_pc_private *priv = p->private_data;
-	if (priv->ecr)
-		p->dma = programmable_dma_support(p); /* ask ECP chipset first */
+	if (priv->ecr)		/* ask ECP chipset first */
+		p->dma = programmable_dma_support(p);
 	if (p->dma == PARPORT_DMA_NONE) {
 		/* ask known Super-IO chips proper, although these
 		   claim ECP compatible, some don't report their DMA
@@ -2212,7 +2290,7 @@
 	if (!base_res)
 		goto out4;
 
-	memcpy(ops, &parport_pc_ops, sizeof (struct parport_operations));
+	memcpy(ops, &parport_pc_ops, sizeof(struct parport_operations));
 	priv->ctr = 0xc;
 	priv->ctr_writable = ~0x10;
 	priv->ecr = 0;
@@ -2239,7 +2317,7 @@
 			if (!parport_EPP_supported(p))
 				parport_ECPEPP_supported(p);
 	}
-	if (!parport_SPP_supported (p))
+	if (!parport_SPP_supported(p))
 		/* No port. */
 		goto out5;
 	if (priv->ecr)
@@ -2247,7 +2325,7 @@
 	else
 		parport_PS2_supported(p);
 
-	p->size = (p->modes & PARPORT_MODE_EPP)?8:3;
+	p->size = (p->modes & PARPORT_MODE_EPP) ? 8 : 3;
 
 	printk(KERN_INFO "%s: PC-style at 0x%lx", p->name, p->base);
 	if (p->base_hi && priv->ecr)
@@ -2271,7 +2349,7 @@
 		}
 	}
 	if (p->dma == PARPORT_DMA_AUTO) /* To use DMA, giving the irq
-                                           is mandatory (see above) */
+					   is mandatory (see above) */
 		p->dma = PARPORT_DMA_NONE;
 
 #ifdef CONFIG_PARPORT_PC_FIFO
@@ -2288,16 +2366,23 @@
 		if (p->dma != PARPORT_DMA_NONE) {
 			printk(", dma %d", p->dma);
 			p->modes |= PARPORT_MODE_DMA;
-		}
-		else printk(", using FIFO");
-	}
-	else
+		} else
+			printk(", using FIFO");
+	} else
 		/* We can't use the DMA channel after all. */
 		p->dma = PARPORT_DMA_NONE;
 #endif /* Allowed to use FIFO/DMA */
 
 	printk(" [");
-#define printmode(x) {if(p->modes&PARPORT_MODE_##x){printk("%s%s",f?",":"",#x);f++;}}
+
+#define printmode(x) \
+	{\
+		if (p->modes & PARPORT_MODE_##x) {\
+			printk("%s%s", f ? "," : "", #x);\
+			f++;\
+		} \
+	}
+
 	{
 		int f = 0;
 		printmode(PCSPP);
@@ -2309,10 +2394,10 @@
 	}
 #undef printmode
 #ifndef CONFIG_PARPORT_1284
-	printk ("(,...)");
+	printk("(,...)");
 #endif /* CONFIG_PARPORT_1284 */
 	printk("]\n");
-	if (probedirq != PARPORT_IRQ_NONE) 
+	if (probedirq != PARPORT_IRQ_NONE)
 		printk(KERN_INFO "%s: irq %d detected\n", p->name, probedirq);
 
 	/* If No ECP release the ports grabbed above. */
@@ -2328,7 +2413,7 @@
 	if (p->irq != PARPORT_IRQ_NONE) {
 		if (request_irq(p->irq, parport_irq_handler,
 				 irqflags, p->name, p)) {
-			printk (KERN_WARNING "%s: irq %d in use, "
+			printk(KERN_WARNING "%s: irq %d in use, "
 				"resorting to polled operation\n",
 				p->name, p->irq);
 			p->irq = PARPORT_IRQ_NONE;
@@ -2338,8 +2423,8 @@
 #ifdef CONFIG_PARPORT_PC_FIFO
 #ifdef HAS_DMA
 		if (p->dma != PARPORT_DMA_NONE) {
-			if (request_dma (p->dma, p->name)) {
-				printk (KERN_WARNING "%s: dma %d in use, "
+			if (request_dma(p->dma, p->name)) {
+				printk(KERN_WARNING "%s: dma %d in use, "
 					"resorting to PIO operation\n",
 					p->name, p->dma);
 				p->dma = PARPORT_DMA_NONE;
@@ -2349,8 +2434,8 @@
 						       PAGE_SIZE,
 						       &priv->dma_handle,
 						       GFP_KERNEL);
-				if (! priv->dma_buf) {
-					printk (KERN_WARNING "%s: "
+				if (!priv->dma_buf) {
+					printk(KERN_WARNING "%s: "
 						"cannot get buffer for DMA, "
 						"resorting to PIO operation\n",
 						p->name);
@@ -2369,10 +2454,10 @@
 		 * Put the ECP detected port in PS2 mode.
 		 * Do this also for ports that have ECR but don't do ECP.
 		 */
-		ECR_WRITE (p, 0x34);
+		ECR_WRITE(p, 0x34);
 
 	parport_pc_write_data(p, 0);
-	parport_pc_data_forward (p);
+	parport_pc_data_forward(p);
 
 	/* Now that we've told the sharing engine about the port, and
 	   found out its characteristics, let the high-level drivers
@@ -2380,7 +2465,7 @@
 	spin_lock(&ports_lock);
 	list_add(&priv->list, &ports_list);
 	spin_unlock(&ports_lock);
-	parport_announce_port (p);
+	parport_announce_port(p);
 
 	return p;
 
@@ -2393,18 +2478,17 @@
 out4:
 	parport_put_port(p);
 out3:
-	kfree (priv);
+	kfree(priv);
 out2:
-	kfree (ops);
+	kfree(ops);
 out1:
 	if (pdev)
 		platform_device_unregister(pdev);
 	return NULL;
 }
+EXPORT_SYMBOL(parport_pc_probe_port);
 
-EXPORT_SYMBOL (parport_pc_probe_port);
-
-void parport_pc_unregister_port (struct parport *p)
+void parport_pc_unregister_port(struct parport *p)
 {
 	struct parport_pc_private *priv = p->private_data;
 	struct parport_operations *ops = p->ops;
@@ -2430,17 +2514,16 @@
 				    priv->dma_buf,
 				    priv->dma_handle);
 #endif
-	kfree (p->private_data);
+	kfree(p->private_data);
 	parport_put_port(p);
-	kfree (ops); /* hope no-one cached it */
+	kfree(ops); /* hope no-one cached it */
 }
-
-EXPORT_SYMBOL (parport_pc_unregister_port);
+EXPORT_SYMBOL(parport_pc_unregister_port);
 
 #ifdef CONFIG_PCI
 
 /* ITE support maintained by Rich Liu <richliu@poorman.org> */
-static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq,
+static int __devinit sio_ite_8872_probe(struct pci_dev *pdev, int autoirq,
 					 int autodma,
 					 const struct parport_pc_via_data *via)
 {
@@ -2452,73 +2535,74 @@
 	int irq;
 	int i;
 
-	DPRINTK (KERN_DEBUG "sio_ite_8872_probe()\n");
-	
-	// make sure which one chip
-	for(i = 0; i < 5; i++) {
+	DPRINTK(KERN_DEBUG "sio_ite_8872_probe()\n");
+
+	/* make sure which one chip */
+	for (i = 0; i < 5; i++) {
 		base_res = request_region(inta_addr[i], 32, "it887x");
 		if (base_res) {
 			int test;
-			pci_write_config_dword (pdev, 0x60,
+			pci_write_config_dword(pdev, 0x60,
 						0xe5000000 | inta_addr[i]);
-			pci_write_config_dword (pdev, 0x78,
+			pci_write_config_dword(pdev, 0x78,
 						0x00000000 | inta_addr[i]);
-			test = inb (inta_addr[i]);
-			if (test != 0xff) break;
+			test = inb(inta_addr[i]);
+			if (test != 0xff)
+				break;
 			release_region(inta_addr[i], 0x8);
 		}
 	}
-	if(i >= 5) {
-		printk (KERN_INFO "parport_pc: cannot find ITE8872 INTA\n");
+	if (i >= 5) {
+		printk(KERN_INFO "parport_pc: cannot find ITE8872 INTA\n");
 		return 0;
 	}
 
-	type = inb (inta_addr[i] + 0x18);
+	type = inb(inta_addr[i] + 0x18);
 	type &= 0x0f;
 
 	switch (type) {
 	case 0x2:
-		printk (KERN_INFO "parport_pc: ITE8871 found (1P)\n");
+		printk(KERN_INFO "parport_pc: ITE8871 found (1P)\n");
 		ite8872set = 0x64200000;
 		break;
 	case 0xa:
-		printk (KERN_INFO "parport_pc: ITE8875 found (1P)\n");
+		printk(KERN_INFO "parport_pc: ITE8875 found (1P)\n");
 		ite8872set = 0x64200000;
 		break;
 	case 0xe:
-		printk (KERN_INFO "parport_pc: ITE8872 found (2S1P)\n");
+		printk(KERN_INFO "parport_pc: ITE8872 found (2S1P)\n");
 		ite8872set = 0x64e00000;
 		break;
 	case 0x6:
-		printk (KERN_INFO "parport_pc: ITE8873 found (1S)\n");
+		printk(KERN_INFO "parport_pc: ITE8873 found (1S)\n");
 		return 0;
 	case 0x8:
-		DPRINTK (KERN_DEBUG "parport_pc: ITE8874 found (2S)\n");
+		DPRINTK(KERN_DEBUG "parport_pc: ITE8874 found (2S)\n");
 		return 0;
 	default:
-		printk (KERN_INFO "parport_pc: unknown ITE887x\n");
-		printk (KERN_INFO "parport_pc: please mail 'lspci -nvv' "
+		printk(KERN_INFO "parport_pc: unknown ITE887x\n");
+		printk(KERN_INFO "parport_pc: please mail 'lspci -nvv' "
 			"output to Rich.Liu@ite.com.tw\n");
 		return 0;
 	}
 
-	pci_read_config_byte (pdev, 0x3c, &ite8872_irq);
-	pci_read_config_dword (pdev, 0x1c, &ite8872_lpt);
+	pci_read_config_byte(pdev, 0x3c, &ite8872_irq);
+	pci_read_config_dword(pdev, 0x1c, &ite8872_lpt);
 	ite8872_lpt &= 0x0000ff00;
-	pci_read_config_dword (pdev, 0x20, &ite8872_lpthi);
+	pci_read_config_dword(pdev, 0x20, &ite8872_lpthi);
 	ite8872_lpthi &= 0x0000ff00;
-	pci_write_config_dword (pdev, 0x6c, 0xe3000000 | ite8872_lpt);
-	pci_write_config_dword (pdev, 0x70, 0xe3000000 | ite8872_lpthi);
-	pci_write_config_dword (pdev, 0x80, (ite8872_lpthi<<16) | ite8872_lpt);
-	// SET SPP&EPP , Parallel Port NO DMA , Enable All Function
-	// SET Parallel IRQ
-	pci_write_config_dword (pdev, 0x9c,
+	pci_write_config_dword(pdev, 0x6c, 0xe3000000 | ite8872_lpt);
+	pci_write_config_dword(pdev, 0x70, 0xe3000000 | ite8872_lpthi);
+	pci_write_config_dword(pdev, 0x80, (ite8872_lpthi<<16) | ite8872_lpt);
+	/* SET SPP&EPP , Parallel Port NO DMA , Enable All Function */
+	/* SET Parallel IRQ */
+	pci_write_config_dword(pdev, 0x9c,
 				ite8872set | (ite8872_irq * 0x11111));
 
-	DPRINTK (KERN_DEBUG "ITE887x: The IRQ is %d.\n", ite8872_irq);
-	DPRINTK (KERN_DEBUG "ITE887x: The PARALLEL I/O port is 0x%x.\n",
+	DPRINTK(KERN_DEBUG "ITE887x: The IRQ is %d.\n", ite8872_irq);
+	DPRINTK(KERN_DEBUG "ITE887x: The PARALLEL I/O port is 0x%x.\n",
 		 ite8872_lpt);
-	DPRINTK (KERN_DEBUG "ITE887x: The PARALLEL I/O porthi is 0x%x.\n",
+	DPRINTK(KERN_DEBUG "ITE887x: The PARALLEL I/O porthi is 0x%x.\n",
 		 ite8872_lpthi);
 
 	/* Let the user (or defaults) steer us away from interrupts */
@@ -2530,14 +2614,14 @@
 	 * Release the resource so that parport_pc_probe_port can get it.
 	 */
 	release_resource(base_res);
-	if (parport_pc_probe_port (ite8872_lpt, ite8872_lpthi,
+	if (parport_pc_probe_port(ite8872_lpt, ite8872_lpthi,
 				   irq, PARPORT_DMA_NONE, &pdev->dev, 0)) {
-		printk (KERN_INFO
+		printk(KERN_INFO
 			"parport_pc: ITE 8872 parallel port: io=0x%X",
-			ite8872_lpt);
+								ite8872_lpt);
 		if (irq != PARPORT_IRQ_NONE)
-			printk (", irq=%d", irq);
-		printk ("\n");
+			printk(", irq=%d", irq);
+		printk("\n");
 		return 1;
 	}
 
@@ -2546,7 +2630,7 @@
 
 /* VIA 8231 support by Pavel Fedin <sonic_amiga@rambler.ru>
    based on VIA 686a support code by Jeff Garzik <jgarzik@pobox.com> */
-static int __devinitdata parport_init_mode = 0;
+static int __devinitdata parport_init_mode;
 
 /* Data for two known VIA chips */
 static struct parport_pc_via_data via_686a_data __devinitdata = {
@@ -2568,7 +2652,7 @@
 	0xF6
 };
 
-static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
+static int __devinit sio_via_probe(struct pci_dev *pdev, int autoirq,
 				    int autodma,
 				    const struct parport_pc_via_data *via)
 {
@@ -2580,38 +2664,38 @@
 
 	printk(KERN_DEBUG "parport_pc: VIA 686A/8231 detected\n");
 
-	switch(parport_init_mode)
-	{
+	switch (parport_init_mode) {
 	case 1:
-	    printk(KERN_DEBUG "parport_pc: setting SPP mode\n");
-	    siofunc = VIA_FUNCTION_PARPORT_SPP;
-	    break;
+		printk(KERN_DEBUG "parport_pc: setting SPP mode\n");
+		siofunc = VIA_FUNCTION_PARPORT_SPP;
+		break;
 	case 2:
-	    printk(KERN_DEBUG "parport_pc: setting PS/2 mode\n");
-	    siofunc = VIA_FUNCTION_PARPORT_SPP;
-	    ppcontrol = VIA_PARPORT_BIDIR;
-	    break;
+		printk(KERN_DEBUG "parport_pc: setting PS/2 mode\n");
+		siofunc = VIA_FUNCTION_PARPORT_SPP;
+		ppcontrol = VIA_PARPORT_BIDIR;
+		break;
 	case 3:
-	    printk(KERN_DEBUG "parport_pc: setting EPP mode\n");
-	    siofunc = VIA_FUNCTION_PARPORT_EPP;
-	    ppcontrol = VIA_PARPORT_BIDIR;
-	    have_epp = 1;
-	    break;
+		printk(KERN_DEBUG "parport_pc: setting EPP mode\n");
+		siofunc = VIA_FUNCTION_PARPORT_EPP;
+		ppcontrol = VIA_PARPORT_BIDIR;
+		have_epp = 1;
+		break;
 	case 4:
-	    printk(KERN_DEBUG "parport_pc: setting ECP mode\n");
-	    siofunc = VIA_FUNCTION_PARPORT_ECP;
-	    ppcontrol = VIA_PARPORT_BIDIR;
-	    break;
+		printk(KERN_DEBUG "parport_pc: setting ECP mode\n");
+		siofunc = VIA_FUNCTION_PARPORT_ECP;
+		ppcontrol = VIA_PARPORT_BIDIR;
+		break;
 	case 5:
-	    printk(KERN_DEBUG "parport_pc: setting EPP+ECP mode\n");
-	    siofunc = VIA_FUNCTION_PARPORT_ECP;
-	    ppcontrol = VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP;
-	    have_epp = 1;
-	    break;
-	 default:
-	    printk(KERN_DEBUG "parport_pc: probing current configuration\n");
-	    siofunc = VIA_FUNCTION_PROBE;
-	    break;
+		printk(KERN_DEBUG "parport_pc: setting EPP+ECP mode\n");
+		siofunc = VIA_FUNCTION_PARPORT_ECP;
+		ppcontrol = VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP;
+		have_epp = 1;
+		break;
+	default:
+		printk(KERN_DEBUG
+			"parport_pc: probing current configuration\n");
+		siofunc = VIA_FUNCTION_PROBE;
+		break;
 	}
 	/*
 	 * unlock super i/o configuration
@@ -2622,38 +2706,36 @@
 
 	/* Bits 1-0: Parallel Port Mode / Enable */
 	outb(via->viacfg_function, VIA_CONFIG_INDEX);
-	tmp = inb (VIA_CONFIG_DATA);
+	tmp = inb(VIA_CONFIG_DATA);
 	/* Bit 5: EPP+ECP enable; bit 7: PS/2 bidirectional port enable */
 	outb(via->viacfg_parport_control, VIA_CONFIG_INDEX);
-	tmp2 = inb (VIA_CONFIG_DATA);
-	if (siofunc == VIA_FUNCTION_PROBE)
-	{
-	    siofunc = tmp & VIA_FUNCTION_PARPORT_DISABLE;
-	    ppcontrol = tmp2;
+	tmp2 = inb(VIA_CONFIG_DATA);
+	if (siofunc == VIA_FUNCTION_PROBE) {
+		siofunc = tmp & VIA_FUNCTION_PARPORT_DISABLE;
+		ppcontrol = tmp2;
+	} else {
+		tmp &= ~VIA_FUNCTION_PARPORT_DISABLE;
+		tmp |= siofunc;
+		outb(via->viacfg_function, VIA_CONFIG_INDEX);
+		outb(tmp, VIA_CONFIG_DATA);
+		tmp2 &= ~(VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP);
+		tmp2 |= ppcontrol;
+		outb(via->viacfg_parport_control, VIA_CONFIG_INDEX);
+		outb(tmp2, VIA_CONFIG_DATA);
 	}
-	else
-	{
-	    tmp &= ~VIA_FUNCTION_PARPORT_DISABLE;
-	    tmp |= siofunc;
-	    outb(via->viacfg_function, VIA_CONFIG_INDEX);
-	    outb(tmp, VIA_CONFIG_DATA);
-	    tmp2 &= ~(VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP);
-	    tmp2 |= ppcontrol;
-	    outb(via->viacfg_parport_control, VIA_CONFIG_INDEX);
-	    outb(tmp2, VIA_CONFIG_DATA);
-	}
-	
+
 	/* Parallel Port I/O Base Address, bits 9-2 */
 	outb(via->viacfg_parport_base, VIA_CONFIG_INDEX);
 	port1 = inb(VIA_CONFIG_DATA) << 2;
-	
-	printk (KERN_DEBUG "parport_pc: Current parallel port base: 0x%X\n",port1);
-	if ((port1 == 0x3BC) && have_epp)
-	{
-	    outb(via->viacfg_parport_base, VIA_CONFIG_INDEX);
-	    outb((0x378 >> 2), VIA_CONFIG_DATA);
-	    printk(KERN_DEBUG "parport_pc: Parallel port base changed to 0x378\n");
-	    port1 = 0x378;
+
+	printk(KERN_DEBUG "parport_pc: Current parallel port base: 0x%X\n",
+									port1);
+	if (port1 == 0x3BC && have_epp) {
+		outb(via->viacfg_parport_base, VIA_CONFIG_INDEX);
+		outb((0x378 >> 2), VIA_CONFIG_DATA);
+		printk(KERN_DEBUG
+			"parport_pc: Parallel port base changed to 0x378\n");
+		port1 = 0x378;
 	}
 
 	/*
@@ -2667,36 +2749,39 @@
 		printk(KERN_INFO "parport_pc: VIA parallel port disabled in BIOS\n");
 		return 0;
 	}
-	
+
 	/* Bits 7-4: PnP Routing for Parallel Port IRQ */
 	pci_read_config_byte(pdev, via->via_pci_parport_irq_reg, &tmp);
 	irq = ((tmp & VIA_IRQCONTROL_PARALLEL) >> 4);
 
-	if (siofunc == VIA_FUNCTION_PARPORT_ECP)
-	{
-	    /* Bits 3-2: PnP Routing for Parallel Port DMA */
-	    pci_read_config_byte(pdev, via->via_pci_parport_dma_reg, &tmp);
-	    dma = ((tmp & VIA_DMACONTROL_PARALLEL) >> 2);
-	}
-	else
-	    /* if ECP not enabled, DMA is not enabled, assumed bogus 'dma' value */
-	    dma = PARPORT_DMA_NONE;
+	if (siofunc == VIA_FUNCTION_PARPORT_ECP) {
+		/* Bits 3-2: PnP Routing for Parallel Port DMA */
+		pci_read_config_byte(pdev, via->via_pci_parport_dma_reg, &tmp);
+		dma = ((tmp & VIA_DMACONTROL_PARALLEL) >> 2);
+	} else
+		/* if ECP not enabled, DMA is not enabled, assumed
+		   bogus 'dma' value */
+		dma = PARPORT_DMA_NONE;
 
 	/* Let the user (or defaults) steer us away from interrupts and DMA */
 	if (autoirq == PARPORT_IRQ_NONE) {
-	    irq = PARPORT_IRQ_NONE;
-	    dma = PARPORT_DMA_NONE;
+		irq = PARPORT_IRQ_NONE;
+		dma = PARPORT_DMA_NONE;
 	}
 	if (autodma == PARPORT_DMA_NONE)
-	    dma = PARPORT_DMA_NONE;
+		dma = PARPORT_DMA_NONE;
 
 	switch (port1) {
-	case 0x3bc: port2 = 0x7bc; break;
-	case 0x378: port2 = 0x778; break;
-	case 0x278: port2 = 0x678; break;
+	case 0x3bc:
+		port2 = 0x7bc; break;
+	case 0x378:
+		port2 = 0x778; break;
+	case 0x278:
+		port2 = 0x678; break;
 	default:
-		printk(KERN_INFO "parport_pc: Weird VIA parport base 0x%X, ignoring\n",
-			port1);
+		printk(KERN_INFO
+			"parport_pc: Weird VIA parport base 0x%X, ignoring\n",
+									port1);
 		return 0;
 	}
 
@@ -2714,17 +2799,17 @@
 	}
 
 	/* finally, do the probe with values obtained */
-	if (parport_pc_probe_port (port1, port2, irq, dma, &pdev->dev, 0)) {
-		printk (KERN_INFO
+	if (parport_pc_probe_port(port1, port2, irq, dma, &pdev->dev, 0)) {
+		printk(KERN_INFO
 			"parport_pc: VIA parallel port: io=0x%X", port1);
 		if (irq != PARPORT_IRQ_NONE)
-			printk (", irq=%d", irq);
+			printk(", irq=%d", irq);
 		if (dma != PARPORT_DMA_NONE)
-			printk (", dma=%d", dma);
-		printk ("\n");
+			printk(", dma=%d", dma);
+		printk("\n");
 		return 1;
 	}
-	
+
 	printk(KERN_WARNING "parport_pc: Strange, can't probe VIA parallel port: io=0x%X, irq=%d, dma=%d\n",
 		port1, irq, dma);
 	return 0;
@@ -2732,8 +2817,8 @@
 
 
 enum parport_pc_sio_types {
-	sio_via_686a = 0,	/* Via VT82C686A motherboard Super I/O */
-	sio_via_8231,		/* Via VT8231 south bridge integrated Super IO */
+	sio_via_686a = 0,   /* Via VT82C686A motherboard Super I/O */
+	sio_via_8231,	    /* Via VT8231 south bridge integrated Super IO */
 	sio_ite_8872,
 	last_sio
 };
@@ -2804,15 +2889,15 @@
 };
 
 
-/* each element directly indexed from enum list, above 
+/* each element directly indexed from enum list, above
  * (but offset by last_sio) */
 static struct parport_pc_pci {
 	int numports;
 	struct { /* BAR (base address registers) numbers in the config
-                    space header */
+		    space header */
 		int lo;
-		int hi; /* -1 if not there, >6 for offset-method (max
-                           BAR is 6) */
+		int hi;
+		/* -1 if not there, >6 for offset-method (max BAR is 6) */
 	} addr[4];
 
 	/* If set, this is called immediately after pci_enable_device.
@@ -2857,7 +2942,7 @@
 	/* timedia_4018  */             { 2, { { 0, 1 }, { 2, 3 }, } },
 	/* timedia_9018a */             { 2, { { 0, 1 }, { 2, 3 }, } },
 					/* SYBA uses fixed offsets in
-                                           a 1K io window */
+					   a 1K io window */
 	/* syba_2p_epp AP138B */	{ 2, { { 0, 0x078 }, { 0, 0x178 }, } },
 	/* syba_1p_ecp W83787 */	{ 1, { { 0, 0x078 }, } },
 	/* titan_010l */		{ 1, { { 3, -1 }, } },
@@ -2873,11 +2958,14 @@
 	/* oxsemi_pcie_pport */		{ 1, { { 0, 1 }, } },
 	/* aks_0100 */                  { 1, { { 0, -1 }, } },
 	/* mobility_pp */		{ 1, { { 0, 1 }, } },
-	/* netmos_9705 */               { 1, { { 0, -1 }, } }, /* untested */
-        /* netmos_9715 */               { 2, { { 0, 1 }, { 2, 3 },} }, /* untested */
-        /* netmos_9755 */               { 2, { { 0, 1 }, { 2, 3 },} }, /* untested */
-	/* netmos_9805 */               { 1, { { 0, -1 }, } }, /* untested */
-	/* netmos_9815 */               { 2, { { 0, -1 }, { 2, -1 }, } }, /* untested */
+
+	/* The netmos entries below are untested */
+	/* netmos_9705 */               { 1, { { 0, -1 }, } },
+	/* netmos_9715 */               { 2, { { 0, 1 }, { 2, 3 },} },
+	/* netmos_9755 */               { 2, { { 0, 1 }, { 2, 3 },} },
+	/* netmos_9805 */               { 1, { { 0, -1 }, } },
+	/* netmos_9815 */               { 2, { { 0, -1 }, { 2, -1 }, } },
+
 	/* quatech_sppxp100 */		{ 1, { { 0, 1 }, } },
 };
 
@@ -2906,7 +2994,7 @@
 	{ PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_BOCA_IOPPAR,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, boca_ioppar },
 	{ PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050,
-	  PCI_SUBVENDOR_ID_EXSYS, PCI_SUBDEVICE_ID_EXSYS_4014, 0,0, plx_9050 },
+	  PCI_SUBVENDOR_ID_EXSYS, PCI_SUBDEVICE_ID_EXSYS_4014, 0, 0, plx_9050 },
 	/* PCI_VENDOR_ID_TIMEDIA/SUNIX has many differing cards ...*/
 	{ 0x1409, 0x7168, 0x1409, 0x4078, 0, 0, timedia_4078a },
 	{ 0x1409, 0x7168, 0x1409, 0x4079, 0, 0, timedia_4079h },
@@ -2940,7 +3028,8 @@
 	{ 0x9710, 0x9805, 0x1000, 0x0010, 0, 0, titan_1284p1 },
 	{ 0x9710, 0x9815, 0x1000, 0x0020, 0, 0, titan_1284p2 },
 	/* PCI_VENDOR_ID_AVLAB/Intek21 has another bunch of cards ...*/
-	{ 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p}, /* AFAVLAB_TK9902 */
+	/* AFAVLAB_TK9902 */
+	{ 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p},
 	{ 0x14db, 0x2121, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_2p},
 	{ PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI952PP,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_952 },
@@ -2983,14 +3072,14 @@
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 },
 	{ 0, } /* terminate list */
 };
-MODULE_DEVICE_TABLE(pci,parport_pc_pci_tbl);
+MODULE_DEVICE_TABLE(pci, parport_pc_pci_tbl);
 
 struct pci_parport_data {
 	int num;
 	struct parport *ports[2];
 };
 
-static int parport_pc_pci_probe (struct pci_dev *dev,
+static int parport_pc_pci_probe(struct pci_dev *dev,
 					   const struct pci_device_id *id)
 {
 	int err, count, n, i = id->driver_data;
@@ -3003,7 +3092,8 @@
 	/* This is a PCI card */
 	i -= last_sio;
 	count = 0;
-	if ((err = pci_enable_device (dev)) != 0)
+	err = pci_enable_device(dev);
+	if (err)
 		return err;
 
 	data = kmalloc(sizeof(struct pci_parport_data), GFP_KERNEL);
@@ -3011,7 +3101,7 @@
 		return -ENOMEM;
 
 	if (cards[i].preinit_hook &&
-	    cards[i].preinit_hook (dev, PARPORT_IRQ_NONE, PARPORT_DMA_NONE)) {
+	    cards[i].preinit_hook(dev, PARPORT_IRQ_NONE, PARPORT_DMA_NONE)) {
 		kfree(data);
 		return -ENODEV;
 	}
@@ -3021,25 +3111,25 @@
 		int hi = cards[i].addr[n].hi;
 		int irq;
 		unsigned long io_lo, io_hi;
-		io_lo = pci_resource_start (dev, lo);
+		io_lo = pci_resource_start(dev, lo);
 		io_hi = 0;
 		if ((hi >= 0) && (hi <= 6))
-			io_hi = pci_resource_start (dev, hi);
+			io_hi = pci_resource_start(dev, hi);
 		else if (hi > 6)
 			io_lo += hi; /* Reinterpret the meaning of
-                                        "hi" as an offset (see SYBA
-                                        def.) */
+					"hi" as an offset (see SYBA
+					def.) */
 		/* TODO: test if sharing interrupts works */
 		irq = dev->irq;
 		if (irq == IRQ_NONE) {
-			printk (KERN_DEBUG
+			printk(KERN_DEBUG
 	"PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx)\n",
 				parport_pc_pci_tbl[i + last_sio].vendor,
 				parport_pc_pci_tbl[i + last_sio].device,
 				io_lo, io_hi);
 			irq = PARPORT_IRQ_NONE;
 		} else {
-			printk (KERN_DEBUG
+			printk(KERN_DEBUG
 	"PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx), IRQ %d\n",
 				parport_pc_pci_tbl[i + last_sio].vendor,
 				parport_pc_pci_tbl[i + last_sio].device,
@@ -3056,7 +3146,7 @@
 	data->num = count;
 
 	if (cards[i].postinit_hook)
-		cards[i].postinit_hook (dev, count == 0);
+		cards[i].postinit_hook(dev, count == 0);
 
 	if (count) {
 		pci_set_drvdata(dev, data);
@@ -3090,7 +3180,7 @@
 	.remove		= __devexit_p(parport_pc_pci_remove),
 };
 
-static int __init parport_pc_init_superio (int autoirq, int autodma)
+static int __init parport_pc_init_superio(int autoirq, int autodma)
 {
 	const struct pci_device_id *id;
 	struct pci_dev *pdev = NULL;
@@ -3101,8 +3191,9 @@
 		if (id == NULL || id->driver_data >= last_sio)
 			continue;
 
-		if (parport_pc_superio_info[id->driver_data].probe
-			(pdev, autoirq, autodma,parport_pc_superio_info[id->driver_data].via)) {
+		if (parport_pc_superio_info[id->driver_data].probe(
+			pdev, autoirq, autodma,
+			parport_pc_superio_info[id->driver_data].via)) {
 			ret++;
 		}
 	}
@@ -3111,7 +3202,10 @@
 }
 #else
 static struct pci_driver parport_pc_pci_driver;
-static int __init parport_pc_init_superio(int autoirq, int autodma) {return 0;}
+static int __init parport_pc_init_superio(int autoirq, int autodma)
+{
+	return 0;
+}
 #endif /* CONFIG_PCI */
 
 #ifdef CONFIG_PNP
@@ -3124,44 +3218,45 @@
 	{ }
 };
 
-MODULE_DEVICE_TABLE(pnp,parport_pc_pnp_tbl);
+MODULE_DEVICE_TABLE(pnp, parport_pc_pnp_tbl);
 
-static int parport_pc_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *id)
+static int parport_pc_pnp_probe(struct pnp_dev *dev,
+						const struct pnp_device_id *id)
 {
 	struct parport *pdata;
 	unsigned long io_lo, io_hi;
 	int dma, irq;
 
-	if (pnp_port_valid(dev,0) &&
-		!(pnp_port_flags(dev,0) & IORESOURCE_DISABLED)) {
-		io_lo = pnp_port_start(dev,0);
+	if (pnp_port_valid(dev, 0) &&
+		!(pnp_port_flags(dev, 0) & IORESOURCE_DISABLED)) {
+		io_lo = pnp_port_start(dev, 0);
 	} else
 		return -EINVAL;
 
-	if (pnp_port_valid(dev,1) &&
-		!(pnp_port_flags(dev,1) & IORESOURCE_DISABLED)) {
-		io_hi = pnp_port_start(dev,1);
+	if (pnp_port_valid(dev, 1) &&
+		!(pnp_port_flags(dev, 1) & IORESOURCE_DISABLED)) {
+		io_hi = pnp_port_start(dev, 1);
 	} else
 		io_hi = 0;
 
-	if (pnp_irq_valid(dev,0) &&
-		!(pnp_irq_flags(dev,0) & IORESOURCE_DISABLED)) {
-		irq = pnp_irq(dev,0);
+	if (pnp_irq_valid(dev, 0) &&
+		!(pnp_irq_flags(dev, 0) & IORESOURCE_DISABLED)) {
+		irq = pnp_irq(dev, 0);
 	} else
 		irq = PARPORT_IRQ_NONE;
 
-	if (pnp_dma_valid(dev,0) &&
-		!(pnp_dma_flags(dev,0) & IORESOURCE_DISABLED)) {
-		dma = pnp_dma(dev,0);
+	if (pnp_dma_valid(dev, 0) &&
+		!(pnp_dma_flags(dev, 0) & IORESOURCE_DISABLED)) {
+		dma = pnp_dma(dev, 0);
 	} else
 		dma = PARPORT_DMA_NONE;
 
 	dev_info(&dev->dev, "reported by %s\n", dev->protocol->name);
-	if (!(pdata = parport_pc_probe_port(io_lo, io_hi,
-					irq, dma, &dev->dev, 0)))
+	pdata = parport_pc_probe_port(io_lo, io_hi, irq, dma, &dev->dev, 0);
+	if (pdata == NULL)
 		return -ENODEV;
 
-	pnp_set_drvdata(dev,pdata);
+	pnp_set_drvdata(dev, pdata);
 	return 0;
 }
 
@@ -3203,7 +3298,7 @@
 
 /* This is called by parport_pc_find_nonpci_ports (in asm/parport.h) */
 static int __devinit __attribute__((unused))
-parport_pc_find_isa_ports (int autoirq, int autodma)
+parport_pc_find_isa_ports(int autoirq, int autodma)
 {
 	int count = 0;
 
@@ -3227,7 +3322,7 @@
  * autoirq is PARPORT_IRQ_NONE, PARPORT_IRQ_AUTO, or PARPORT_IRQ_PROBEONLY
  * autodma is PARPORT_DMA_NONE or PARPORT_DMA_AUTO
  */
-static void __init parport_pc_find_ports (int autoirq, int autodma)
+static void __init parport_pc_find_ports(int autoirq, int autodma)
 {
 	int count = 0, err;
 
@@ -3261,11 +3356,18 @@
  *	syntax and keep in mind that code below is a cleaned up version.
  */
 
-static int __initdata io[PARPORT_PC_MAX_PORTS+1] = { [0 ... PARPORT_PC_MAX_PORTS] = 0 };
-static int __initdata io_hi[PARPORT_PC_MAX_PORTS+1] =
-	{ [0 ... PARPORT_PC_MAX_PORTS] = PARPORT_IOHI_AUTO };
-static int __initdata dmaval[PARPORT_PC_MAX_PORTS] = { [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_DMA_NONE };
-static int __initdata irqval[PARPORT_PC_MAX_PORTS] = { [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_IRQ_PROBEONLY };
+static int __initdata io[PARPORT_PC_MAX_PORTS+1] = {
+	[0 ... PARPORT_PC_MAX_PORTS] = 0
+};
+static int __initdata io_hi[PARPORT_PC_MAX_PORTS+1] = {
+	[0 ... PARPORT_PC_MAX_PORTS] = PARPORT_IOHI_AUTO
+};
+static int __initdata dmaval[PARPORT_PC_MAX_PORTS] = {
+	[0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_DMA_NONE
+};
+static int __initdata irqval[PARPORT_PC_MAX_PORTS] = {
+	[0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_IRQ_PROBEONLY
+};
 
 static int __init parport_parse_param(const char *s, int *val,
 				int automatic, int none, int nofifo)
@@ -3306,18 +3408,19 @@
 #ifdef CONFIG_PCI
 static int __init parport_init_mode_setup(char *str)
 {
-	printk(KERN_DEBUG "parport_pc.c: Specified parameter parport_init_mode=%s\n", str);
+	printk(KERN_DEBUG
+	     "parport_pc.c: Specified parameter parport_init_mode=%s\n", str);
 
-	if (!strcmp (str, "spp"))
-		parport_init_mode=1;
-	if (!strcmp (str, "ps2"))
-		parport_init_mode=2;
-	if (!strcmp (str, "epp"))
-		parport_init_mode=3;
-	if (!strcmp (str, "ecp"))
-		parport_init_mode=4;
-	if (!strcmp (str, "ecpepp"))
-		parport_init_mode=5;
+	if (!strcmp(str, "spp"))
+		parport_init_mode = 1;
+	if (!strcmp(str, "ps2"))
+		parport_init_mode = 2;
+	if (!strcmp(str, "epp"))
+		parport_init_mode = 3;
+	if (!strcmp(str, "ecp"))
+		parport_init_mode = 4;
+	if (!strcmp(str, "ecpepp"))
+		parport_init_mode = 5;
 	return 1;
 }
 #endif
@@ -3341,7 +3444,8 @@
 #endif
 #ifdef CONFIG_PCI
 static char *init_mode;
-MODULE_PARM_DESC(init_mode, "Initialise mode for VIA VT8231 port (spp, ps2, epp, ecp or ecpepp)");
+MODULE_PARM_DESC(init_mode,
+	"Initialise mode for VIA VT8231 port (spp, ps2, epp, ecp or ecpepp)");
 module_param(init_mode, charp, 0);
 #endif
 
@@ -3372,7 +3476,7 @@
 				irqval[0] = val;
 				break;
 			default:
-				printk (KERN_WARNING
+				printk(KERN_WARNING
 					"parport_pc: irq specified "
 					"without base address.  Use 'io=' "
 					"to specify one\n");
@@ -3385,7 +3489,7 @@
 				dmaval[0] = val;
 				break;
 			default:
-				printk (KERN_WARNING
+				printk(KERN_WARNING
 					"parport_pc: dma specified "
 					"without base address.  Use 'io=' "
 					"to specify one\n");
@@ -3396,7 +3500,7 @@
 
 #else
 
-static int parport_setup_ptr __initdata = 0;
+static int parport_setup_ptr __initdata;
 
 /*
  * Acceptable parameters:
@@ -3407,7 +3511,7 @@
  *
  * IRQ/DMA may be numeric or 'auto' or 'none'
  */
-static int __init parport_setup (char *str)
+static int __init parport_setup(char *str)
 {
 	char *endptr;
 	char *sep;
@@ -3419,15 +3523,15 @@
 		return 1;
 	}
 
-	if (!strncmp (str, "auto", 4)) {
+	if (!strncmp(str, "auto", 4)) {
 		irqval[0] = PARPORT_IRQ_AUTO;
 		dmaval[0] = PARPORT_DMA_AUTO;
 		return 1;
 	}
 
-	val = simple_strtoul (str, &endptr, 0);
+	val = simple_strtoul(str, &endptr, 0);
 	if (endptr == str) {
-		printk (KERN_WARNING "parport=%s not understood\n", str);
+		printk(KERN_WARNING "parport=%s not understood\n", str);
 		return 1;
 	}
 
@@ -3461,7 +3565,7 @@
 	return io[0] == PARPORT_DISABLE;
 }
 
-__setup ("parport=", parport_setup);
+__setup("parport=", parport_setup);
 
 /*
  * Acceptable parameters:
@@ -3469,7 +3573,7 @@
  * parport_init_mode=[spp|ps2|epp|ecp|ecpepp]
  */
 #ifdef CONFIG_PCI
-__setup("parport_init_mode=",parport_init_mode_setup);
+__setup("parport_init_mode=", parport_init_mode_setup);
 #endif
 #endif
 
@@ -3493,13 +3597,13 @@
 		for (i = 0; i < PARPORT_PC_MAX_PORTS; i++) {
 			if (!io[i])
 				break;
-			if ((io_hi[i]) == PARPORT_IOHI_AUTO)
-			       io_hi[i] = 0x400 + io[i];
+			if (io_hi[i] == PARPORT_IOHI_AUTO)
+				io_hi[i] = 0x400 + io[i];
 			parport_pc_probe_port(io[i], io_hi[i],
-					  irqval[i], dmaval[i], NULL, 0);
+					irqval[i], dmaval[i], NULL, 0);
 		}
 	} else
-		parport_pc_find_ports (irqval[0], dmaval[0]);
+		parport_pc_find_ports(irqval[0], dmaval[0]);
 
 	return 0;
 }
@@ -3507,9 +3611,9 @@
 static void __exit parport_pc_exit(void)
 {
 	if (pci_registered_parport)
-		pci_unregister_driver (&parport_pc_pci_driver);
+		pci_unregister_driver(&parport_pc_pci_driver);
 	if (pnp_registered_parport)
-		pnp_unregister_driver (&parport_pc_pnp_driver);
+		pnp_unregister_driver(&parport_pc_pnp_driver);
 	platform_driver_unregister(&parport_pc_platform_driver);
 
 	while (!list_empty(&ports_list)) {
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index dd18f85..42e4260 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -153,45 +153,47 @@
 		return -1;
 	}
 	for (loop = 0; loop < len; loop++) {
-		if ((*cur_slot)->number == rtable->slots[loop].slot) {
-		if ((*cur_slot)->bus == rtable->slots[loop].bus) {
+		if ((*cur_slot)->number == rtable->slots[loop].slot &&
+		    (*cur_slot)->bus == rtable->slots[loop].bus) {
+			struct io_apic_irq_attr irq_attr;
+
 			(*cur_slot)->device = PCI_SLOT(rtable->slots[loop].devfn);
 			for (i = 0; i < 4; i++)
 				(*cur_slot)->irq[i] = IO_APIC_get_PCI_irq_vector((int) (*cur_slot)->bus,
-						(int) (*cur_slot)->device, i);
+						(int) (*cur_slot)->device, i,
+						&irq_attr);
 
-				debug("(*cur_slot)->irq[0] = %x\n",
-						(*cur_slot)->irq[0]);
-				debug("(*cur_slot)->irq[1] = %x\n",
-						(*cur_slot)->irq[1]);
-				debug("(*cur_slot)->irq[2] = %x\n",
-						(*cur_slot)->irq[2]);
-				debug("(*cur_slot)->irq[3] = %x\n",
-						(*cur_slot)->irq[3]);
+			debug("(*cur_slot)->irq[0] = %x\n",
+					(*cur_slot)->irq[0]);
+			debug("(*cur_slot)->irq[1] = %x\n",
+					(*cur_slot)->irq[1]);
+			debug("(*cur_slot)->irq[2] = %x\n",
+					(*cur_slot)->irq[2]);
+			debug("(*cur_slot)->irq[3] = %x\n",
+					(*cur_slot)->irq[3]);
 
-				debug("rtable->exlusive_irqs = %x\n",
+			debug("rtable->exlusive_irqs = %x\n",
 					rtable->exclusive_irqs);
-				debug("rtable->slots[loop].irq[0].bitmap = %x\n",
+			debug("rtable->slots[loop].irq[0].bitmap = %x\n",
 					rtable->slots[loop].irq[0].bitmap);
-				debug("rtable->slots[loop].irq[1].bitmap = %x\n",
+			debug("rtable->slots[loop].irq[1].bitmap = %x\n",
 					rtable->slots[loop].irq[1].bitmap);
-				debug("rtable->slots[loop].irq[2].bitmap = %x\n",
+			debug("rtable->slots[loop].irq[2].bitmap = %x\n",
 					rtable->slots[loop].irq[2].bitmap);
-				debug("rtable->slots[loop].irq[3].bitmap = %x\n",
+			debug("rtable->slots[loop].irq[3].bitmap = %x\n",
 					rtable->slots[loop].irq[3].bitmap);
 
-				debug("rtable->slots[loop].irq[0].link = %x\n",
+			debug("rtable->slots[loop].irq[0].link = %x\n",
 					rtable->slots[loop].irq[0].link);
-				debug("rtable->slots[loop].irq[1].link = %x\n",
+			debug("rtable->slots[loop].irq[1].link = %x\n",
 					rtable->slots[loop].irq[1].link);
-				debug("rtable->slots[loop].irq[2].link = %x\n",
+			debug("rtable->slots[loop].irq[2].link = %x\n",
 					rtable->slots[loop].irq[2].link);
-				debug("rtable->slots[loop].irq[3].link = %x\n",
+			debug("rtable->slots[loop].irq[3].link = %x\n",
 					rtable->slots[loop].irq[3].link);
-				debug("end of init_devno\n");
-				kfree(rtable);
-				return 0;
-			}
+			debug("end of init_devno\n");
+			kfree(rtable);
+			return 0;
 		}
 	}
 
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 6808d83..737a1c4 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -98,6 +98,7 @@
 	int max_irq;
 	int pos;
 	int irq;
+	int node;
 
 	pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ);
 	if (!pos)
@@ -125,7 +126,8 @@
 	cfg->msg.address_lo = 0xffffffff;
 	cfg->msg.address_hi = 0xffffffff;
 
-	irq = create_irq();
+	node = dev_to_node(&dev->dev);
+	irq = create_irq_nr(0, node);
 
 	if (irq <= 0) {
 		kfree(cfg);
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index a563fbe..cd38916 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1972,15 +1972,6 @@
 		}
 	}
 
-#ifdef CONFIG_INTR_REMAP
-	if (!intr_remapping_enabled) {
-		ret = enable_intr_remapping(0);
-		if (ret)
-			printk(KERN_ERR
-			       "IOMMU: enable interrupt remapping failed\n");
-	}
-#endif
-
 	/*
 	 * For each rmrr
 	 *   for each dev attached to rmrr
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index f5e0ea7..3a0cb0b 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -15,6 +15,14 @@
 static int ir_ioapic_num;
 int intr_remapping_enabled;
 
+static int disable_intremap;
+static __init int setup_nointremap(char *str)
+{
+	disable_intremap = 1;
+	return 0;
+}
+early_param("nointremap", setup_nointremap);
+
 struct irq_2_iommu {
 	struct intel_iommu *iommu;
 	u16 irte_index;
@@ -23,15 +31,12 @@
 };
 
 #ifdef CONFIG_GENERIC_HARDIRQS
-static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu)
+static struct irq_2_iommu *get_one_free_irq_2_iommu(int node)
 {
 	struct irq_2_iommu *iommu;
-	int node;
-
-	node = cpu_to_node(cpu);
 
 	iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node);
-	printk(KERN_DEBUG "alloc irq_2_iommu on cpu %d node %d\n", cpu, node);
+	printk(KERN_DEBUG "alloc irq_2_iommu on node %d\n", node);
 
 	return iommu;
 }
@@ -48,7 +53,7 @@
 	return desc->irq_2_iommu;
 }
 
-static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu)
+static struct irq_2_iommu *irq_2_iommu_alloc_node(unsigned int irq, int node)
 {
 	struct irq_desc *desc;
 	struct irq_2_iommu *irq_iommu;
@@ -56,7 +61,7 @@
 	/*
 	 * alloc irq desc if not allocated already.
 	 */
-	desc = irq_to_desc_alloc_cpu(irq, cpu);
+	desc = irq_to_desc_alloc_node(irq, node);
 	if (!desc) {
 		printk(KERN_INFO "can not get irq_desc for %d\n", irq);
 		return NULL;
@@ -65,14 +70,14 @@
 	irq_iommu = desc->irq_2_iommu;
 
 	if (!irq_iommu)
-		desc->irq_2_iommu = get_one_free_irq_2_iommu(cpu);
+		desc->irq_2_iommu = get_one_free_irq_2_iommu(node);
 
 	return desc->irq_2_iommu;
 }
 
 static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
 {
-	return irq_2_iommu_alloc_cpu(irq, boot_cpu_id);
+	return irq_2_iommu_alloc_node(irq, cpu_to_node(boot_cpu_id));
 }
 
 #else /* !CONFIG_SPARSE_IRQ */
@@ -423,20 +428,6 @@
 		      readl, (sts & DMA_GSTS_IRTPS), sts);
 	spin_unlock_irqrestore(&iommu->register_lock, flags);
 
-	if (mode == 0) {
-		spin_lock_irqsave(&iommu->register_lock, flags);
-
-		/* enable comaptiblity format interrupt pass through */
-		cmd = iommu->gcmd | DMA_GCMD_CFI;
-		iommu->gcmd |= DMA_GCMD_CFI;
-		writel(cmd, iommu->reg + DMAR_GCMD_REG);
-
-		IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
-			      readl, (sts & DMA_GSTS_CFIS), sts);
-
-		spin_unlock_irqrestore(&iommu->register_lock, flags);
-	}
-
 	/*
 	 * global invalidation of interrupt entry cache before enabling
 	 * interrupt-remapping.
@@ -516,6 +507,23 @@
 	spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
+int __init intr_remapping_supported(void)
+{
+	struct dmar_drhd_unit *drhd;
+
+	if (disable_intremap)
+		return 0;
+
+	for_each_drhd_unit(drhd) {
+		struct intel_iommu *iommu = drhd->iommu;
+
+		if (!ecap_ir_support(iommu->ecap))
+			return 0;
+	}
+
+	return 1;
+}
+
 int __init enable_intr_remapping(int eim)
 {
 	struct dmar_drhd_unit *drhd;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index e3c3e08..f1ae247 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -745,6 +745,8 @@
 
 	/* Early fixups, before probing the BARs */
 	pci_fixup_device(pci_fixup_early, dev);
+	/* device class may be changed after fixup */
+	class = dev->class >> 8;
 
 	switch (dev->hdr_type) {		    /* header type */
 	case PCI_HEADER_TYPE_NORMAL:		    /* standard header */
diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c
index adf1785..7f207f3 100644
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -123,7 +123,7 @@
 	}
 
 	flags = irq_flags(triggering, polarity, shareable);
-	irq = acpi_register_gsi(gsi, triggering, polarity);
+	irq = acpi_register_gsi(&dev->dev, gsi, triggering, polarity);
 	if (irq >= 0)
 		pcibios_penalize_isa_irq(irq, 1);
 	else
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 4e9851f..277d35d 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -692,7 +692,7 @@
 	tristate "Generic RTC support"
 	# Please consider writing a new RTC driver instead of using the generic
 	# RTC abstraction
-	depends on PARISC || M68K || PPC
+	depends on PARISC || M68K || PPC || SUPERH32
 	help
 	  Say Y or M here to enable RTC support on systems using the generic
 	  RTC abstraction. If you do not know what you are doing, you should
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index d181527..27a1be0 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -603,7 +603,7 @@
 	if (dasd_profile_level != DASD_PROFILE_ON)
 		return;
 
-	sectors = req->nr_sectors;
+	sectors = blk_rq_sectors(req);
 	if (!cqr->buildclk || !cqr->startclk ||
 	    !cqr->stopclk || !cqr->endclk ||
 	    !sectors)
@@ -1614,15 +1614,6 @@
 }
 
 /*
- * posts the buffer_cache about a finalized request
- */
-static inline void dasd_end_request(struct request *req, int error)
-{
-	if (__blk_end_request(req, error, blk_rq_bytes(req)))
-		BUG();
-}
-
-/*
  * Process finished error recovery ccw.
  */
 static inline void __dasd_block_process_erp(struct dasd_block *block,
@@ -1665,18 +1656,14 @@
 	if (basedev->state < DASD_STATE_READY)
 		return;
 	/* Now we try to fetch requests from the request queue */
-	while (!blk_queue_plugged(queue) &&
-	       elv_next_request(queue)) {
-
-		req = elv_next_request(queue);
-
+	while (!blk_queue_plugged(queue) && (req = blk_peek_request(queue))) {
 		if (basedev->features & DASD_FEATURE_READONLY &&
 		    rq_data_dir(req) == WRITE) {
 			DBF_DEV_EVENT(DBF_ERR, basedev,
 				      "Rejecting write request %p",
 				      req);
-			blkdev_dequeue_request(req);
-			dasd_end_request(req, -EIO);
+			blk_start_request(req);
+			__blk_end_request_all(req, -EIO);
 			continue;
 		}
 		cqr = basedev->discipline->build_cp(basedev, block, req);
@@ -1704,8 +1691,8 @@
 				      "CCW creation failed (rc=%ld) "
 				      "on request %p",
 				      PTR_ERR(cqr), req);
-			blkdev_dequeue_request(req);
-			dasd_end_request(req, -EIO);
+			blk_start_request(req);
+			__blk_end_request_all(req, -EIO);
 			continue;
 		}
 		/*
@@ -1714,7 +1701,7 @@
 		 */
 		cqr->callback_data = (void *) req;
 		cqr->status = DASD_CQR_FILLED;
-		blkdev_dequeue_request(req);
+		blk_start_request(req);
 		list_add_tail(&cqr->blocklist, &block->ccw_queue);
 		dasd_profile_start(block, cqr, req);
 	}
@@ -1731,7 +1718,7 @@
 	status = cqr->block->base->discipline->free_cp(cqr, req);
 	if (status <= 0)
 		error = status ? status : -EIO;
-	dasd_end_request(req, error);
+	__blk_end_request_all(req, error);
 }
 
 /*
@@ -2003,7 +1990,7 @@
 {
 	int max;
 
-	blk_queue_hardsect_size(block->request_queue, block->bp_block);
+	blk_queue_logical_block_size(block->request_queue, block->bp_block);
 	max = block->base->discipline->max_blocks << block->s2b_shift;
 	blk_queue_max_sectors(block->request_queue, max);
 	blk_queue_max_phys_segments(block->request_queue, -1L);
@@ -2038,10 +2025,8 @@
 		return;
 
 	spin_lock_irq(&block->request_queue_lock);
-	while ((req = elv_next_request(block->request_queue))) {
-		blkdev_dequeue_request(req);
-		dasd_end_request(req, -EIO);
-	}
+	while ((req = blk_fetch_request(block->request_queue)))
+		__blk_end_request_all(req, -EIO);
 	spin_unlock_irq(&block->request_queue_lock);
 }
 
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index b9a7f77..2efaddf 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -505,8 +505,9 @@
 		return ERR_PTR(-EINVAL);
 	blksize = block->bp_block;
 	/* Calculate record id of first and last block. */
-	first_rec = req->sector >> block->s2b_shift;
-	last_rec = (req->sector + req->nr_sectors - 1) >> block->s2b_shift;
+	first_rec = blk_rq_pos(req) >> block->s2b_shift;
+	last_rec =
+		(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
 	/* Check struct bio and count the number of blocks for the request. */
 	count = 0;
 	rq_for_each_segment(bv, req, iter) {
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index cb52da0..a41c940 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -2354,10 +2354,10 @@
 	blksize = block->bp_block;
 	blk_per_trk = recs_per_track(&private->rdc_data, 0, blksize);
 	/* Calculate record id of first and last block. */
-	first_rec = first_trk = req->sector >> block->s2b_shift;
+	first_rec = first_trk = blk_rq_pos(req) >> block->s2b_shift;
 	first_offs = sector_div(first_trk, blk_per_trk);
 	last_rec = last_trk =
-		(req->sector + req->nr_sectors - 1) >> block->s2b_shift;
+		(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
 	last_offs = sector_div(last_trk, blk_per_trk);
 	cdlspecial = (private->uses_cdl && first_rec < 2*blk_per_trk);
 
@@ -2420,7 +2420,7 @@
 	private = (struct dasd_eckd_private *) cqr->block->base->private;
 	blksize = cqr->block->bp_block;
 	blk_per_trk = recs_per_track(&private->rdc_data, 0, blksize);
-	recid = req->sector >> cqr->block->s2b_shift;
+	recid = blk_rq_pos(req) >> cqr->block->s2b_shift;
 	ccw = cqr->cpaddr;
 	/* Skip over define extent & locate record. */
 	ccw++;
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index a3eb6fd..8912358 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -270,8 +270,9 @@
 		return ERR_PTR(-EINVAL);
 	blksize = block->bp_block;
 	/* Calculate record id of first and last block. */
-	first_rec = req->sector >> block->s2b_shift;
-	last_rec = (req->sector + req->nr_sectors - 1) >> block->s2b_shift;
+	first_rec = blk_rq_pos(req) >> block->s2b_shift;
+	last_rec =
+		(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
 	/* Check struct bio and count the number of blocks for the request. */
 	count = 0;
 	cidaw = 0;
@@ -309,7 +310,7 @@
 	ccw = cqr->cpaddr;
 	/* First ccw is define extent. */
 	define_extent(ccw++, cqr->data, rq_data_dir(req),
-		      block->bp_block, req->sector, req->nr_sectors);
+		      block->bp_block, blk_rq_pos(req), blk_rq_sectors(req));
 	/* Build locate_record + read/write ccws. */
 	idaws = (unsigned long *) (cqr->data + sizeof(struct DE_fba_data));
 	LO_data = (struct LO_fba_data *) (idaws + cidaw);
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index cfdcf1a..a4c7ffc 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -602,7 +602,7 @@
 	dev_info->gd->private_data = dev_info;
 	dev_info->gd->driverfs_dev = &dev_info->dev;
 	blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request);
-	blk_queue_hardsect_size(dev_info->dcssblk_queue, 4096);
+	blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096);
 
 	seg_byte_size = (dev_info->end - dev_info->start + 1);
 	set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index 76814f3..0ae0c83 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -343,7 +343,7 @@
 			goto out;
 		}
 		blk_queue_make_request(xpram_queues[i], xpram_make_request);
-		blk_queue_hardsect_size(xpram_queues[i], 4096);
+		blk_queue_logical_block_size(xpram_queues[i], 4096);
 	}
 
 	/*
diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c
index 5f8e8ef..2d00a38 100644
--- a/drivers/s390/char/tape_34xx.c
+++ b/drivers/s390/char/tape_34xx.c
@@ -1134,7 +1134,7 @@
 	/* Setup ccws. */
 	request->op = TO_BLOCK;
 	start_block = (struct tape_34xx_block_id *) request->cpdata;
-	start_block->block = req->sector >> TAPEBLOCK_HSEC_S2B;
+	start_block->block = blk_rq_pos(req) >> TAPEBLOCK_HSEC_S2B;
 	DBF_EVENT(6, "start_block = %i\n", start_block->block);
 
 	ccw = request->cpaddr;
diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c
index 823b05b..c453b2f 100644
--- a/drivers/s390/char/tape_3590.c
+++ b/drivers/s390/char/tape_3590.c
@@ -633,7 +633,7 @@
 	struct req_iterator iter;
 
 	DBF_EVENT(6, "xBREDid:");
-	start_block = req->sector >> TAPEBLOCK_HSEC_S2B;
+	start_block = blk_rq_pos(req) >> TAPEBLOCK_HSEC_S2B;
 	DBF_EVENT(6, "start_block = %i\n", start_block);
 
 	rq_for_each_segment(bv, req, iter)
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index f32e89e..47ff695 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -74,13 +74,6 @@
  * Post finished request.
  */
 static void
-tapeblock_end_request(struct request *req, int error)
-{
-	if (blk_end_request(req, error, blk_rq_bytes(req)))
-		BUG();
-}
-
-static void
 __tapeblock_end_request(struct tape_request *ccw_req, void *data)
 {
 	struct tape_device *device;
@@ -90,17 +83,17 @@
 
 	device = ccw_req->device;
 	req = (struct request *) data;
-	tapeblock_end_request(req, (ccw_req->rc == 0) ? 0 : -EIO);
+	blk_end_request_all(req, (ccw_req->rc == 0) ? 0 : -EIO);
 	if (ccw_req->rc == 0)
 		/* Update position. */
 		device->blk_data.block_position =
-			(req->sector + req->nr_sectors) >> TAPEBLOCK_HSEC_S2B;
+		  (blk_rq_pos(req) + blk_rq_sectors(req)) >> TAPEBLOCK_HSEC_S2B;
 	else
 		/* We lost the position information due to an error. */
 		device->blk_data.block_position = -1;
 	device->discipline->free_bread(ccw_req);
 	if (!list_empty(&device->req_queue) ||
-	    elv_next_request(device->blk_data.request_queue))
+	    blk_peek_request(device->blk_data.request_queue))
 		tapeblock_trigger_requeue(device);
 }
 
@@ -118,7 +111,7 @@
 	ccw_req = device->discipline->bread(device, req);
 	if (IS_ERR(ccw_req)) {
 		DBF_EVENT(1, "TBLOCK: bread failed\n");
-		tapeblock_end_request(req, -EIO);
+		blk_end_request_all(req, -EIO);
 		return PTR_ERR(ccw_req);
 	}
 	ccw_req->callback = __tapeblock_end_request;
@@ -131,7 +124,7 @@
 		 * Start/enqueueing failed. No retries in
 		 * this case.
 		 */
-		tapeblock_end_request(req, -EIO);
+		blk_end_request_all(req, -EIO);
 		device->discipline->free_bread(ccw_req);
 	}
 
@@ -169,19 +162,16 @@
 	spin_lock_irq(&device->blk_data.request_queue_lock);
 	while (
 		!blk_queue_plugged(queue) &&
-		elv_next_request(queue)   &&
+		(req = blk_fetch_request(queue)) &&
 		nr_queued < TAPEBLOCK_MIN_REQUEUE
 	) {
-		req = elv_next_request(queue);
 		if (rq_data_dir(req) == WRITE) {
 			DBF_EVENT(1, "TBLOCK: Rejecting write request\n");
-			blkdev_dequeue_request(req);
 			spin_unlock_irq(&device->blk_data.request_queue_lock);
-			tapeblock_end_request(req, -EIO);
+			blk_end_request_all(req, -EIO);
 			spin_lock_irq(&device->blk_data.request_queue_lock);
 			continue;
 		}
-		blkdev_dequeue_request(req);
 		nr_queued++;
 		spin_unlock_irq(&device->blk_data.request_queue_lock);
 		rc = tapeblock_start_request(device, req);
@@ -232,7 +222,7 @@
 	if (rc)
 		goto cleanup_queue;
 
-	blk_queue_hardsect_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE);
+	blk_queue_logical_block_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE);
 	blk_queue_max_sectors(blkdat->request_queue, TAPEBLOCK_MAX_SEC);
 	blk_queue_max_phys_segments(blkdat->request_queue, -1L);
 	blk_queue_max_hw_segments(blkdat->request_queue, -1L);
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index cbc8566..e38e5d3 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -173,8 +173,9 @@
  * this device and sets it up.
  */
 static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
-				    unsigned index,
-				    void (*callback)(struct virtqueue *vq))
+				     unsigned index,
+				     void (*callback)(struct virtqueue *vq),
+				     const char *name)
 {
 	struct kvm_device *kdev = to_kvmdev(vdev);
 	struct kvm_vqconfig *config;
@@ -194,7 +195,7 @@
 
 	vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN,
 				 vdev, (void *) config->address,
-				 kvm_notify, callback);
+				 kvm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto unmap;
@@ -226,6 +227,38 @@
 				       KVM_S390_VIRTIO_RING_ALIGN));
 }
 
+static void kvm_del_vqs(struct virtio_device *vdev)
+{
+	struct virtqueue *vq, *n;
+
+	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
+		kvm_del_vq(vq);
+}
+
+static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+			struct virtqueue *vqs[],
+			vq_callback_t *callbacks[],
+			const char *names[])
+{
+	struct kvm_device *kdev = to_kvmdev(vdev);
+	int i;
+
+	/* We must have this many virtqueues. */
+	if (nvqs > kdev->desc->num_vq)
+		return -ENOENT;
+
+	for (i = 0; i < nvqs; ++i) {
+		vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i]);
+		if (IS_ERR(vqs[i]))
+			goto error;
+	}
+	return 0;
+
+error:
+	kvm_del_vqs(vdev);
+	return PTR_ERR(vqs[i]);
+}
+
 /*
  * The config ops structure as defined by virtio config
  */
@@ -237,8 +270,8 @@
 	.get_status = kvm_get_status,
 	.set_status = kvm_set_status,
 	.reset = kvm_reset,
-	.find_vq = kvm_find_vq,
-	.del_vq = kvm_del_vq,
+	.find_vqs = kvm_find_vqs,
+	.del_vqs = kvm_del_vqs,
 };
 
 /*
diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c
index 733fe3b..b2fe5cd 100644
--- a/drivers/s390/scsi/zfcp_ccw.c
+++ b/drivers/s390/scsi/zfcp_ccw.c
@@ -11,6 +11,24 @@
 
 #include "zfcp_ext.h"
 
+#define ZFCP_MODEL_PRIV 0x4
+
+static struct ccw_device_id zfcp_ccw_device_id[] = {
+	{ CCW_DEVICE_DEVTYPE(0x1731, 0x3, 0x1732, 0x3) },
+	{ CCW_DEVICE_DEVTYPE(0x1731, 0x3, 0x1732, ZFCP_MODEL_PRIV) },
+	{},
+};
+MODULE_DEVICE_TABLE(ccw, zfcp_ccw_device_id);
+
+/**
+ * zfcp_ccw_priv_sch - check if subchannel is privileged
+ * @adapter: Adapter/Subchannel to check
+ */
+int zfcp_ccw_priv_sch(struct zfcp_adapter *adapter)
+{
+	return adapter->ccw_device->id.dev_model == ZFCP_MODEL_PRIV;
+}
+
 /**
  * zfcp_ccw_probe - probe function of zfcp driver
  * @ccw_device: pointer to belonging ccw device
@@ -176,8 +194,8 @@
 					"ccnoti4", NULL);
 		break;
 	case CIO_BOXED:
-		dev_warn(&adapter->ccw_device->dev,
-			 "The ccw device did not respond in time.\n");
+		dev_warn(&adapter->ccw_device->dev, "The FCP device "
+			 "did not respond within the specified time\n");
 		zfcp_erp_adapter_shutdown(adapter, 0, "ccnoti5", NULL);
 		break;
 	}
@@ -199,14 +217,6 @@
 	up(&zfcp_data.config_sema);
 }
 
-static struct ccw_device_id zfcp_ccw_device_id[] = {
-	{ CCW_DEVICE_DEVTYPE(0x1731, 0x3, 0x1732, 0x3) },
-	{ CCW_DEVICE_DEVTYPE(0x1731, 0x3, 0x1732, 0x4) }, /* priv. */
-	{},
-};
-
-MODULE_DEVICE_TABLE(ccw, zfcp_ccw_device_id);
-
 static struct ccw_driver zfcp_ccw_driver = {
 	.owner       = THIS_MODULE,
 	.name        = "zfcp",
diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c
index 0a1a5dd..b99b87c 100644
--- a/drivers/s390/scsi/zfcp_dbf.c
+++ b/drivers/s390/scsi/zfcp_dbf.c
@@ -163,7 +163,7 @@
 	}
 
 	response->fsf_command = fsf_req->fsf_command;
-	response->fsf_reqid = (unsigned long)fsf_req;
+	response->fsf_reqid = fsf_req->req_id;
 	response->fsf_seqno = fsf_req->seq_no;
 	response->fsf_issued = fsf_req->issued;
 	response->fsf_prot_status = qtcb->prefix.prot_status;
@@ -737,7 +737,7 @@
 	spin_lock_irqsave(&adapter->san_dbf_lock, flags);
 	memset(r, 0, sizeof(*r));
 	strncpy(r->tag, "octc", ZFCP_DBF_TAG_SIZE);
-	r->fsf_reqid = (unsigned long)fsf_req;
+	r->fsf_reqid = fsf_req->req_id;
 	r->fsf_seqno = fsf_req->seq_no;
 	r->s_id = fc_host_port_id(adapter->scsi_host);
 	r->d_id = wka_port->d_id;
@@ -773,7 +773,7 @@
 	spin_lock_irqsave(&adapter->san_dbf_lock, flags);
 	memset(r, 0, sizeof(*r));
 	strncpy(r->tag, "rctc", ZFCP_DBF_TAG_SIZE);
-	r->fsf_reqid = (unsigned long)fsf_req;
+	r->fsf_reqid = fsf_req->req_id;
 	r->fsf_seqno = fsf_req->seq_no;
 	r->s_id = wka_port->d_id;
 	r->d_id = fc_host_port_id(adapter->scsi_host);
@@ -803,7 +803,7 @@
 	spin_lock_irqsave(&adapter->san_dbf_lock, flags);
 	memset(rec, 0, sizeof(*rec));
 	strncpy(rec->tag, tag, ZFCP_DBF_TAG_SIZE);
-	rec->fsf_reqid = (unsigned long)fsf_req;
+	rec->fsf_reqid = fsf_req->req_id;
 	rec->fsf_seqno = fsf_req->seq_no;
 	rec->s_id = s_id;
 	rec->d_id = d_id;
@@ -965,7 +965,7 @@
 						      ZFCP_DBF_SCSI_FCP_SNS_INFO);
 				}
 
-				rec->fsf_reqid = (unsigned long)fsf_req;
+				rec->fsf_reqid = fsf_req->req_id;
 				rec->fsf_seqno = fsf_req->seq_no;
 				rec->fsf_issued = fsf_req->issued;
 			}
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 4c362a9..2074d45 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -47,13 +47,6 @@
 
 /********************* CIO/QDIO SPECIFIC DEFINES *****************************/
 
-/* Adapter Identification Parameters */
-#define ZFCP_CONTROL_UNIT_TYPE  0x1731
-#define ZFCP_CONTROL_UNIT_MODEL 0x03
-#define ZFCP_DEVICE_TYPE        0x1732
-#define ZFCP_DEVICE_MODEL       0x03
-#define ZFCP_DEVICE_MODEL_PRIV	0x04
-
 /* DMQ bug workaround: don't use last SBALE */
 #define ZFCP_MAX_SBALES_PER_SBAL	(QDIO_MAX_ELEMENTS_PER_BUFFER - 1)
 
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index fdc9b43..e50ea46 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -880,6 +880,7 @@
 				zfcp_port_put(port);
 			return ZFCP_ERP_CONTINUES;
 		}
+		/* fall through */
 	case ZFCP_ERP_STEP_NAMESERVER_LOOKUP:
 		if (!port->d_id)
 			return ZFCP_ERP_FAILED;
@@ -894,8 +895,13 @@
 				act->step = ZFCP_ERP_STEP_PORT_CLOSING;
 				return ZFCP_ERP_CONTINUES;
 			}
-		/* fall through otherwise */
 		}
+		if (port->d_id && !(p_status & ZFCP_STATUS_COMMON_NOESC)) {
+			port->d_id = 0;
+			_zfcp_erp_port_reopen(port, 0, "erpsoc1", NULL);
+			return ZFCP_ERP_EXIT;
+		}
+		/* fall through otherwise */
 	}
 	return ZFCP_ERP_FAILED;
 }
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index 2e31b53..120a9a1 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -27,6 +27,7 @@
 
 /* zfcp_ccw.c */
 extern int zfcp_ccw_register(void);
+extern int zfcp_ccw_priv_sch(struct zfcp_adapter *);
 extern struct zfcp_adapter *zfcp_get_adapter_by_busid(char *);
 
 /* zfcp_cfdc.c */
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index 19ae084..bb2752b 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -150,9 +150,14 @@
 	struct zfcp_port *port;
 
 	read_lock_irqsave(&zfcp_data.config_lock, flags);
-	list_for_each_entry(port, &fsf_req->adapter->port_list_head, list)
+	list_for_each_entry(port, &fsf_req->adapter->port_list_head, list) {
 		if ((port->d_id & range) == (elem->nport_did & range))
 			zfcp_test_link(port);
+		if (!port->d_id)
+			zfcp_erp_port_reopen(port,
+					     ZFCP_STATUS_COMMON_ERP_FAILED,
+					     "fcrscn1", NULL);
+	}
 
 	read_unlock_irqrestore(&zfcp_data.config_lock, flags);
 }
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 74dee32..e6dae37 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -526,6 +526,7 @@
 		break;
 	case FSF_TOPO_AL:
 		fc_host_port_type(shost) = FC_PORTTYPE_NLPORT;
+		/* fall through */
 	default:
 		dev_err(&adapter->ccw_device->dev,
 			"Unknown or unsupported arbitrated loop "
@@ -897,6 +898,7 @@
 		switch (fsq->word[0]) {
 		case FSF_SQ_INVOKE_LINK_TEST_PROCEDURE:
 			zfcp_test_link(unit->port);
+			/* fall through */
 		case FSF_SQ_ULP_DEPENDENT_ERP_REQUIRED:
 			req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 			break;
@@ -993,6 +995,7 @@
 		break;
 	case FSF_PORT_HANDLE_NOT_VALID:
 		zfcp_erp_adapter_reopen(adapter, 0, "fsscth1", req);
+		/* fall through */
 	case FSF_GENERIC_COMMAND_REJECTED:
 	case FSF_PAYLOAD_SIZE_MISMATCH:
 	case FSF_REQUEST_SIZE_TOO_LARGE:
@@ -1399,7 +1402,7 @@
 	struct fsf_plogi *plogi;
 
 	if (req->status & ZFCP_STATUS_FSFREQ_ERROR)
-		return;
+		goto out;
 
 	switch (header->fsf_status) {
 	case FSF_PORT_ALREADY_OPEN:
@@ -1461,6 +1464,9 @@
 		req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 	}
+
+out:
+	zfcp_port_put(port);
 }
 
 /**
@@ -1473,6 +1479,7 @@
 	struct qdio_buffer_element *sbale;
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_fsf_req *req;
+	struct zfcp_port *port = erp_action->port;
 	int retval = -EIO;
 
 	spin_lock_bh(&adapter->req_q_lock);
@@ -1493,16 +1500,18 @@
         sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
 	req->handler = zfcp_fsf_open_port_handler;
-	req->qtcb->bottom.support.d_id = erp_action->port->d_id;
-	req->data = erp_action->port;
+	req->qtcb->bottom.support.d_id = port->d_id;
+	req->data = port;
 	req->erp_action = erp_action;
 	erp_action->fsf_req = req;
+	zfcp_port_get(port);
 
 	zfcp_fsf_start_erp_timer(req);
 	retval = zfcp_fsf_req_send(req);
 	if (retval) {
 		zfcp_fsf_req_free(req);
 		erp_action->fsf_req = NULL;
+		zfcp_port_put(port);
 	}
 out:
 	spin_unlock_bh(&adapter->req_q_lock);
@@ -1590,8 +1599,10 @@
 	case FSF_MAXIMUM_NUMBER_OF_PORTS_EXCEEDED:
 		dev_warn(&req->adapter->ccw_device->dev,
 			 "Opening WKA port 0x%x failed\n", wka_port->d_id);
+		/* fall through */
 	case FSF_ADAPTER_STATUS_AVAILABLE:
 		req->status |= ZFCP_STATUS_FSFREQ_ERROR;
+		/* fall through */
 	case FSF_ACCESS_DENIED:
 		wka_port->status = ZFCP_WKA_PORT_OFFLINE;
 		break;
@@ -1876,7 +1887,7 @@
 
 		if (!(adapter->connection_features & FSF_FEATURE_NPIV_MODE) &&
 		    (adapter->adapter_features & FSF_FEATURE_LUN_SHARING) &&
-		    (adapter->ccw_device->id.dev_model != ZFCP_DEVICE_MODEL_PRIV)) {
+		    !zfcp_ccw_priv_sch(adapter)) {
 			exclusive = (bottom->lun_access_info &
 					FSF_UNIT_ACCESS_EXCLUSIVE);
 			readwrite = (bottom->lun_access_info &
@@ -2314,7 +2325,7 @@
 {
 	struct zfcp_fsf_req *req;
 	struct fcp_cmnd_iu *fcp_cmnd_iu;
-	unsigned int sbtype;
+	unsigned int sbtype = SBAL_FLAGS0_TYPE_READ;
 	int real_bytes, retval = -EIO;
 	struct zfcp_adapter *adapter = unit->port->adapter;
 
@@ -2356,11 +2367,9 @@
 	switch (scsi_cmnd->sc_data_direction) {
 	case DMA_NONE:
 		req->qtcb->bottom.io.data_direction = FSF_DATADIR_CMND;
-		sbtype = SBAL_FLAGS0_TYPE_READ;
 		break;
 	case DMA_FROM_DEVICE:
 		req->qtcb->bottom.io.data_direction = FSF_DATADIR_READ;
-		sbtype = SBAL_FLAGS0_TYPE_READ;
 		fcp_cmnd_iu->rddata = 1;
 		break;
 	case DMA_TO_DEVICE:
@@ -2369,8 +2378,6 @@
 		fcp_cmnd_iu->wddata = 1;
 		break;
 	case DMA_BIDIRECTIONAL:
-	default:
-		retval = -EIO;
 		goto failed_scsi_cmnd;
 	}
 
@@ -2394,9 +2401,7 @@
 					     scsi_sglist(scsi_cmnd),
 					     FSF_MAX_SBALS_PER_REQ);
 	if (unlikely(real_bytes < 0)) {
-		if (req->sbal_number < FSF_MAX_SBALS_PER_REQ)
-			retval = -EIO;
-		else {
+		if (req->sbal_number >= FSF_MAX_SBALS_PER_REQ) {
 			dev_err(&adapter->ccw_device->dev,
 				"Oversize data package, unit 0x%016Lx "
 				"on port 0x%016Lx closed\n",
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index e8fbeae..7d0da23 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -12,6 +12,10 @@
 #include "zfcp_ext.h"
 #include <asm/atomic.h>
 
+static unsigned int default_depth = 32;
+module_param_named(queue_depth, default_depth, uint, 0600);
+MODULE_PARM_DESC(queue_depth, "Default queue depth for new SCSI devices");
+
 /* Find start of Sense Information in FCP response unit*/
 char *zfcp_get_fcp_sns_info_ptr(struct fcp_rsp_iu *fcp_rsp_iu)
 {
@@ -24,6 +28,12 @@
 	return fcp_sns_info_ptr;
 }
 
+static int zfcp_scsi_change_queue_depth(struct scsi_device *sdev, int depth)
+{
+	scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), depth);
+	return sdev->queue_depth;
+}
+
 static void zfcp_scsi_slave_destroy(struct scsi_device *sdpnt)
 {
 	struct zfcp_unit *unit = (struct zfcp_unit *) sdpnt->hostdata;
@@ -34,7 +44,7 @@
 static int zfcp_scsi_slave_configure(struct scsi_device *sdp)
 {
 	if (sdp->tagged_supported)
-		scsi_adjust_queue_depth(sdp, MSG_SIMPLE_TAG, 32);
+		scsi_adjust_queue_depth(sdp, MSG_SIMPLE_TAG, default_depth);
 	else
 		scsi_adjust_queue_depth(sdp, 0, 1);
 	return 0;
@@ -647,6 +657,7 @@
 		.name			 = "zfcp",
 		.module			 = THIS_MODULE,
 		.proc_name		 = "zfcp",
+		.change_queue_depth	 = zfcp_scsi_change_queue_depth,
 		.slave_alloc		 = zfcp_scsi_slave_alloc,
 		.slave_configure	 = zfcp_scsi_slave_configure,
 		.slave_destroy		 = zfcp_scsi_slave_destroy,
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index a85ad05..6d46516 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -186,31 +186,31 @@
 {
 	struct request *req;
 
-	while ((req = elv_next_request(q)) != NULL) {
+	req = blk_fetch_request(q);
+	while (req) {
 		struct jsfd_part *jdp = req->rq_disk->private_data;
-		unsigned long offset = req->sector << 9;
-		size_t len = req->current_nr_sectors << 9;
+		unsigned long offset = blk_rq_pos(req) << 9;
+		size_t len = blk_rq_cur_bytes(req);
+		int err = -EIO;
 
-		if ((offset + len) > jdp->dsize) {
-               		end_request(req, 0);
-			continue;
-		}
+		if ((offset + len) > jdp->dsize)
+			goto end;
 
 		if (rq_data_dir(req) != READ) {
 			printk(KERN_ERR "jsfd: write\n");
-			end_request(req, 0);
-			continue;
+			goto end;
 		}
 
 		if ((jdp->dbase & 0xff000000) != 0x20000000) {
 			printk(KERN_ERR "jsfd: bad base %x\n", (int)jdp->dbase);
-			end_request(req, 0);
-			continue;
+			goto end;
 		}
 
 		jsfd_read(req->buffer, jdp->dbase + offset, len);
-
-		end_request(req, 1);
+		err = 0;
+	end:
+		if (!__blk_end_request_cur(req, err))
+			req = blk_fetch_request(q);
 	}
 }
 
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index fb27407..6a19ed9 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -191,20 +191,19 @@
 	  it has an enclosure device.  Selecting this option will just allow
 	  certain enclosure conditions to be reported and is not required.
 
-comment "Some SCSI devices (e.g. CD jukebox) support multiple LUNs"
-	depends on SCSI
-
 config SCSI_MULTI_LUN
 	bool "Probe all LUNs on each SCSI device"
 	depends on SCSI
 	help
-	  If you have a SCSI device that supports more than one LUN (Logical
-	  Unit Number), e.g. a CD jukebox, and only one LUN is detected, you
-	  can say Y here to force the SCSI driver to probe for multiple LUNs.
-	  A SCSI device with multiple LUNs acts logically like multiple SCSI
-	  devices. The vast majority of SCSI devices have only one LUN, and
-	  so most people can say N here. The max_luns boot/module parameter 
-	  allows to override this setting.
+	  Some devices support more than one LUN (Logical Unit Number) in order
+	  to allow access to several media, e.g. CD jukebox, USB card reader,
+	  mobile phone in mass storage mode. This option forces the kernel to
+	  probe for all LUNs by default. This setting can be overriden by
+	  max_luns boot/module parameter. Note that this option does not affect
+	  devices conforming to SCSI-3 or higher as they can explicitely report
+	  their number of LUNs. It is safe to say Y here unless you have one of
+	  those rare devices which reacts in an unexpected way when probed for
+	  multiple LUNs.
 
 config SCSI_CONSTANTS
 	bool "Verbose SCSI error reporting (kernel size +=12K)"
@@ -355,6 +354,7 @@
 	 http://open-iscsi.org
 
 source "drivers/scsi/cxgb3i/Kconfig"
+source "drivers/scsi/bnx2i/Kconfig"
 
 config SGIWD93_SCSI
 	tristate "SGI WD93C93 SCSI Driver"
@@ -508,6 +508,7 @@
 
 source "drivers/scsi/aic7xxx/Kconfig.aic79xx"
 source "drivers/scsi/aic94xx/Kconfig"
+source "drivers/scsi/mvsas/Kconfig"
 
 config SCSI_DPT_I2O
 	tristate "Adaptec I2O RAID support "
@@ -1050,16 +1051,6 @@
 
 	  Generally, saying N is fine.
 
-config SCSI_MVSAS
-	tristate "Marvell 88SE6440 SAS/SATA support"
-	depends on PCI && SCSI
-	select SCSI_SAS_LIBSAS
-	help
-	  This driver supports Marvell SAS/SATA PCI devices.
-
-	  To compiler this driver as a module, choose M here: the module
-	  will be called mvsas.
-
 config SCSI_NCR53C406A
 	tristate "NCR53c406a SCSI support"
 	depends on ISA && SCSI
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index a5049cf..25429ea 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -126,9 +126,10 @@
 obj-$(CONFIG_SCSI_IBMVFC)	+= ibmvscsi/
 obj-$(CONFIG_SCSI_HPTIOP)	+= hptiop.o
 obj-$(CONFIG_SCSI_STEX)		+= stex.o
-obj-$(CONFIG_SCSI_MVSAS)	+= mvsas.o
+obj-$(CONFIG_SCSI_MVSAS)	+= mvsas/
 obj-$(CONFIG_PS3_ROM)		+= ps3rom.o
 obj-$(CONFIG_SCSI_CXGB3_ISCSI)	+= libiscsi.o libiscsi_tcp.o cxgb3i/
+obj-$(CONFIG_SCSI_BNX2_ISCSI)	+= libiscsi.o bnx2i/
 
 obj-$(CONFIG_ARM)		+= arm/
 
diff --git a/drivers/scsi/NCR_D700.c b/drivers/scsi/NCR_D700.c
index c889d84..1cdf09a 100644
--- a/drivers/scsi/NCR_D700.c
+++ b/drivers/scsi/NCR_D700.c
@@ -224,7 +224,7 @@
 	return ret;
 }
 
-static int
+static irqreturn_t
 NCR_D700_intr(int irq, void *data)
 {
 	struct NCR_D700_private *p = (struct NCR_D700_private *)data;
diff --git a/drivers/scsi/bnx2i/57xx_iscsi_constants.h b/drivers/scsi/bnx2i/57xx_iscsi_constants.h
new file mode 100644
index 0000000..2fceb19
--- /dev/null
+++ b/drivers/scsi/bnx2i/57xx_iscsi_constants.h
@@ -0,0 +1,155 @@
+/* 57xx_iscsi_constants.h: Broadcom NetXtreme II iSCSI HSI
+ *
+ * Copyright (c) 2006 - 2009 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Anil Veerabhadrappa (anilgv@broadcom.com)
+ */
+#ifndef __57XX_ISCSI_CONSTANTS_H_
+#define __57XX_ISCSI_CONSTANTS_H_
+
+/**
+* This file defines HSI constants for the iSCSI flows
+*/
+
+/* iSCSI request op codes */
+#define ISCSI_OPCODE_CLEANUP_REQUEST    (7)
+
+/* iSCSI response/messages op codes */
+#define ISCSI_OPCODE_CLEANUP_RESPONSE 		(0x27)
+#define ISCSI_OPCODE_NOPOUT_LOCAL_COMPLETION    (0)
+
+/* iSCSI task types */
+#define ISCSI_TASK_TYPE_READ    (0)
+#define ISCSI_TASK_TYPE_WRITE   (1)
+#define ISCSI_TASK_TYPE_MPATH   (2)
+
+/* initial CQ sequence numbers */
+#define ISCSI_INITIAL_SN    (1)
+
+/* KWQ (kernel work queue) layer codes */
+#define ISCSI_KWQE_LAYER_CODE   (6)
+
+/* KWQ (kernel work queue) request op codes */
+#define ISCSI_KWQE_OPCODE_OFFLOAD_CONN1 (0)
+#define ISCSI_KWQE_OPCODE_OFFLOAD_CONN2 (1)
+#define ISCSI_KWQE_OPCODE_UPDATE_CONN   (2)
+#define ISCSI_KWQE_OPCODE_DESTROY_CONN  (3)
+#define ISCSI_KWQE_OPCODE_INIT1         (4)
+#define ISCSI_KWQE_OPCODE_INIT2         (5)
+
+/* KCQ (kernel completion queue) response op codes */
+#define ISCSI_KCQE_OPCODE_OFFLOAD_CONN  (0x10)
+#define ISCSI_KCQE_OPCODE_UPDATE_CONN   (0x12)
+#define ISCSI_KCQE_OPCODE_DESTROY_CONN  (0x13)
+#define ISCSI_KCQE_OPCODE_INIT          (0x14)
+#define ISCSI_KCQE_OPCODE_FW_CLEAN_TASK	(0x15)
+#define ISCSI_KCQE_OPCODE_TCP_RESET     (0x16)
+#define ISCSI_KCQE_OPCODE_TCP_SYN       (0x17)
+#define ISCSI_KCQE_OPCODE_TCP_FIN       (0X18)
+#define ISCSI_KCQE_OPCODE_TCP_ERROR     (0x19)
+#define ISCSI_KCQE_OPCODE_CQ_EVENT_NOTIFICATION (0x20)
+#define ISCSI_KCQE_OPCODE_ISCSI_ERROR   (0x21)
+
+/* KCQ (kernel completion queue) completion status */
+#define ISCSI_KCQE_COMPLETION_STATUS_SUCCESS                            (0x0)
+#define ISCSI_KCQE_COMPLETION_STATUS_INVALID_OPCODE                     (0x1)
+#define ISCSI_KCQE_COMPLETION_STATUS_CTX_ALLOC_FAILURE                  (0x2)
+#define ISCSI_KCQE_COMPLETION_STATUS_CTX_FREE_FAILURE                   (0x3)
+#define ISCSI_KCQE_COMPLETION_STATUS_NIC_ERROR                          (0x4)
+
+#define ISCSI_KCQE_COMPLETION_STATUS_HDR_DIG_ERR                        (0x5)
+#define ISCSI_KCQE_COMPLETION_STATUS_DATA_DIG_ERR                       (0x6)
+
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_UNEXPECTED_OPCODE     (0xa)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_OPCODE                (0xb)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_AHS_LEN               (0xc)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_ITT                   (0xd)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_STATSN                (0xe)
+
+/* Response */
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_EXP_DATASN            (0xf)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_PEND_R2T              (0x10)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DATA_SEG_LEN_IS_ZERO  (0x2c)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DATA_SEG_LEN_TOO_BIG  (0x2d)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_0                 (0x11)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_1                 (0x12)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_2                 (0x13)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_3                 (0x14)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_4                 (0x15)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_5                 (0x16)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_6                 (0x17)
+
+/* Data-In */
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_REMAIN_RCV_LEN        (0x18)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_MAX_RCV_PDU_LEN       (0x19)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_F_BIT_ZERO            (0x1a)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_TTT_NOT_RSRV          (0x1b)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DATASN                (0x1c)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_REMAIN_BURST_LEN      (0x1d)
+
+/* R2T */
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_BUFFER_OFF            (0x1f)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_LUN                   (0x20)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_R2TSN                 (0x21)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_0 (0x22)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_1 (0x23)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_PEND_R2T_EXCEED       (0x24)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_TTT_IS_RSRV           (0x25)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_MAX_BURST_LEN         (0x26)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DATA_SEG_LEN_NOT_ZERO (0x27)
+
+/* TMF */
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_REJECT_PDU_LEN        (0x28)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_ASYNC_PDU_LEN         (0x29)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_NOPIN_PDU_LEN         (0x2a)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_PEND_R2T_IN_CLEANUP   (0x2b)
+
+/* IP/TCP processing errors: */
+#define ISCI_KCQE_COMPLETION_STATUS_TCP_ERROR_IP_FRAGMENT               (0x40)
+#define ISCI_KCQE_COMPLETION_STATUS_TCP_ERROR_IP_OPTIONS                (0x41)
+#define ISCI_KCQE_COMPLETION_STATUS_TCP_ERROR_URGENT_FLAG               (0x42)
+#define ISCI_KCQE_COMPLETION_STATUS_TCP_ERROR_MAX_RTRANS                (0x43)
+
+/* iSCSI licensing errors */
+/* general iSCSI license not installed */
+#define ISCSI_KCQE_COMPLETION_STATUS_ISCSI_NOT_SUPPORTED                (0x50)
+/* additional LOM specific iSCSI license not installed */
+#define ISCSI_KCQE_COMPLETION_STATUS_LOM_ISCSI_NOT_ENABLED              (0x51)
+
+/* SQ/RQ/CQ DB structure sizes */
+#define ISCSI_SQ_DB_SIZE    (16)
+#define ISCSI_RQ_DB_SIZE    (16)
+#define ISCSI_CQ_DB_SIZE    (80)
+
+#define ISCSI_SQN_TO_NOTIFY_NOT_VALID                                   0xFFFF
+
+/* Page size codes (for flags field in connection offload request) */
+#define ISCSI_PAGE_SIZE_256     (0)
+#define ISCSI_PAGE_SIZE_512     (1)
+#define ISCSI_PAGE_SIZE_1K      (2)
+#define ISCSI_PAGE_SIZE_2K      (3)
+#define ISCSI_PAGE_SIZE_4K      (4)
+#define ISCSI_PAGE_SIZE_8K      (5)
+#define ISCSI_PAGE_SIZE_16K     (6)
+#define ISCSI_PAGE_SIZE_32K     (7)
+#define ISCSI_PAGE_SIZE_64K     (8)
+#define ISCSI_PAGE_SIZE_128K    (9)
+#define ISCSI_PAGE_SIZE_256K    (10)
+#define ISCSI_PAGE_SIZE_512K    (11)
+#define ISCSI_PAGE_SIZE_1M      (12)
+#define ISCSI_PAGE_SIZE_2M      (13)
+#define ISCSI_PAGE_SIZE_4M      (14)
+#define ISCSI_PAGE_SIZE_8M      (15)
+
+/* Iscsi PDU related defines */
+#define ISCSI_HEADER_SIZE   (48)
+#define ISCSI_DIGEST_SHIFT  (2)
+#define ISCSI_DIGEST_SIZE   (4)
+
+#define B577XX_ISCSI_CONNECTION_TYPE    3
+
+#endif /*__57XX_ISCSI_CONSTANTS_H_ */
diff --git a/drivers/scsi/bnx2i/57xx_iscsi_hsi.h b/drivers/scsi/bnx2i/57xx_iscsi_hsi.h
new file mode 100644
index 0000000..36af1af
--- /dev/null
+++ b/drivers/scsi/bnx2i/57xx_iscsi_hsi.h
@@ -0,0 +1,1509 @@
+/* 57xx_iscsi_hsi.h: Broadcom NetXtreme II iSCSI HSI.
+ *
+ * Copyright (c) 2006 - 2009 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Anil Veerabhadrappa (anilgv@broadcom.com)
+ */
+#ifndef __57XX_ISCSI_HSI_LINUX_LE__
+#define __57XX_ISCSI_HSI_LINUX_LE__
+
+/*
+ * iSCSI Async CQE
+ */
+struct bnx2i_async_msg {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 reserved1;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 reserved1;
+	u8 op_code;
+#endif
+	u32 reserved2;
+	u32 exp_cmd_sn;
+	u32 max_cmd_sn;
+	u32 reserved3[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved5;
+	u8 err_code;
+	u8 reserved4;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved4;
+	u8 err_code;
+	u16 reserved5;
+#endif
+	u32 reserved6;
+	u32 lun[2];
+#if defined(__BIG_ENDIAN)
+	u8 async_event;
+	u8 async_vcode;
+	u16 param1;
+#elif defined(__LITTLE_ENDIAN)
+	u16 param1;
+	u8 async_vcode;
+	u8 async_event;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 param2;
+	u16 param3;
+#elif defined(__LITTLE_ENDIAN)
+	u16 param3;
+	u16 param2;
+#endif
+	u32 reserved7[3];
+	u32 cq_req_sn;
+};
+
+
+/*
+ * iSCSI Buffer Descriptor (BD)
+ */
+struct iscsi_bd {
+	u32 buffer_addr_hi;
+	u32 buffer_addr_lo;
+#if defined(__BIG_ENDIAN)
+	u16 reserved0;
+	u16 buffer_length;
+#elif defined(__LITTLE_ENDIAN)
+	u16 buffer_length;
+	u16 reserved0;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 reserved3;
+	u16 flags;
+#define ISCSI_BD_RESERVED1 (0x3F<<0)
+#define ISCSI_BD_RESERVED1_SHIFT 0
+#define ISCSI_BD_LAST_IN_BD_CHAIN (0x1<<6)
+#define ISCSI_BD_LAST_IN_BD_CHAIN_SHIFT 6
+#define ISCSI_BD_FIRST_IN_BD_CHAIN (0x1<<7)
+#define ISCSI_BD_FIRST_IN_BD_CHAIN_SHIFT 7
+#define ISCSI_BD_RESERVED2 (0xFF<<8)
+#define ISCSI_BD_RESERVED2_SHIFT 8
+#elif defined(__LITTLE_ENDIAN)
+	u16 flags;
+#define ISCSI_BD_RESERVED1 (0x3F<<0)
+#define ISCSI_BD_RESERVED1_SHIFT 0
+#define ISCSI_BD_LAST_IN_BD_CHAIN (0x1<<6)
+#define ISCSI_BD_LAST_IN_BD_CHAIN_SHIFT 6
+#define ISCSI_BD_FIRST_IN_BD_CHAIN (0x1<<7)
+#define ISCSI_BD_FIRST_IN_BD_CHAIN_SHIFT 7
+#define ISCSI_BD_RESERVED2 (0xFF<<8)
+#define ISCSI_BD_RESERVED2_SHIFT 8
+	u16 reserved3;
+#endif
+};
+
+
+/*
+ * iSCSI Cleanup SQ WQE
+ */
+struct bnx2i_cleanup_request {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 reserved1;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 reserved1;
+	u8 op_code;
+#endif
+	u32 reserved2[3];
+#if defined(__BIG_ENDIAN)
+	u16 reserved3;
+	u16 itt;
+#define ISCSI_CLEANUP_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_CLEANUP_REQUEST_INDEX_SHIFT 0
+#define ISCSI_CLEANUP_REQUEST_TYPE (0x3<<14)
+#define ISCSI_CLEANUP_REQUEST_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_CLEANUP_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_CLEANUP_REQUEST_INDEX_SHIFT 0
+#define ISCSI_CLEANUP_REQUEST_TYPE (0x3<<14)
+#define ISCSI_CLEANUP_REQUEST_TYPE_SHIFT 14
+	u16 reserved3;
+#endif
+	u32 reserved4[10];
+#if defined(__BIG_ENDIAN)
+	u8 cq_index;
+	u8 reserved6;
+	u16 reserved5;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved5;
+	u8 reserved6;
+	u8 cq_index;
+#endif
+};
+
+
+/*
+ * iSCSI Cleanup CQE
+ */
+struct bnx2i_cleanup_response {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 status;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 status;
+	u8 op_code;
+#endif
+	u32 reserved1[3];
+	u32 reserved2[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved4;
+	u8 err_code;
+	u8 reserved3;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved3;
+	u8 err_code;
+	u16 reserved4;
+#endif
+	u32 reserved5[7];
+#if defined(__BIG_ENDIAN)
+	u16 reserved6;
+	u16 itt;
+#define ISCSI_CLEANUP_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_CLEANUP_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_CLEANUP_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_CLEANUP_RESPONSE_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_CLEANUP_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_CLEANUP_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_CLEANUP_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_CLEANUP_RESPONSE_TYPE_SHIFT 14
+	u16 reserved6;
+#endif
+	u32 cq_req_sn;
+};
+
+
+/*
+ * SCSI read/write SQ WQE
+ */
+struct bnx2i_cmd_request {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 op_attr;
+#define ISCSI_CMD_REQUEST_TASK_ATTR (0x7<<0)
+#define ISCSI_CMD_REQUEST_TASK_ATTR_SHIFT 0
+#define ISCSI_CMD_REQUEST_RESERVED1 (0x3<<3)
+#define ISCSI_CMD_REQUEST_RESERVED1_SHIFT 3
+#define ISCSI_CMD_REQUEST_WRITE (0x1<<5)
+#define ISCSI_CMD_REQUEST_WRITE_SHIFT 5
+#define ISCSI_CMD_REQUEST_READ (0x1<<6)
+#define ISCSI_CMD_REQUEST_READ_SHIFT 6
+#define ISCSI_CMD_REQUEST_FINAL (0x1<<7)
+#define ISCSI_CMD_REQUEST_FINAL_SHIFT 7
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_attr;
+#define ISCSI_CMD_REQUEST_TASK_ATTR (0x7<<0)
+#define ISCSI_CMD_REQUEST_TASK_ATTR_SHIFT 0
+#define ISCSI_CMD_REQUEST_RESERVED1 (0x3<<3)
+#define ISCSI_CMD_REQUEST_RESERVED1_SHIFT 3
+#define ISCSI_CMD_REQUEST_WRITE (0x1<<5)
+#define ISCSI_CMD_REQUEST_WRITE_SHIFT 5
+#define ISCSI_CMD_REQUEST_READ (0x1<<6)
+#define ISCSI_CMD_REQUEST_READ_SHIFT 6
+#define ISCSI_CMD_REQUEST_FINAL (0x1<<7)
+#define ISCSI_CMD_REQUEST_FINAL_SHIFT 7
+	u8 op_code;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 ud_buffer_offset;
+	u16 sd_buffer_offset;
+#elif defined(__LITTLE_ENDIAN)
+	u16 sd_buffer_offset;
+	u16 ud_buffer_offset;
+#endif
+	u32 lun[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved2;
+	u16 itt;
+#define ISCSI_CMD_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_CMD_REQUEST_INDEX_SHIFT 0
+#define ISCSI_CMD_REQUEST_TYPE (0x3<<14)
+#define ISCSI_CMD_REQUEST_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_CMD_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_CMD_REQUEST_INDEX_SHIFT 0
+#define ISCSI_CMD_REQUEST_TYPE (0x3<<14)
+#define ISCSI_CMD_REQUEST_TYPE_SHIFT 14
+	u16 reserved2;
+#endif
+	u32 total_data_transfer_length;
+	u32 cmd_sn;
+	u32 reserved3;
+	u32 cdb[4];
+	u32 zero_fill;
+	u32 bd_list_addr_lo;
+	u32 bd_list_addr_hi;
+#if defined(__BIG_ENDIAN)
+	u8 cq_index;
+	u8 sd_start_bd_index;
+	u8 ud_start_bd_index;
+	u8 num_bds;
+#elif defined(__LITTLE_ENDIAN)
+	u8 num_bds;
+	u8 ud_start_bd_index;
+	u8 sd_start_bd_index;
+	u8 cq_index;
+#endif
+};
+
+
+/*
+ * task statistics for write response
+ */
+struct bnx2i_write_resp_task_stat {
+	u32 num_data_ins;
+};
+
+/*
+ * task statistics for read response
+ */
+struct bnx2i_read_resp_task_stat {
+#if defined(__BIG_ENDIAN)
+	u16 num_data_outs;
+	u16 num_r2ts;
+#elif defined(__LITTLE_ENDIAN)
+	u16 num_r2ts;
+	u16 num_data_outs;
+#endif
+};
+
+/*
+ * task statistics for iSCSI cmd response
+ */
+union bnx2i_cmd_resp_task_stat {
+	struct bnx2i_write_resp_task_stat write_stat;
+	struct bnx2i_read_resp_task_stat read_stat;
+};
+
+/*
+ * SCSI Command CQE
+ */
+struct bnx2i_cmd_response {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 response_flags;
+#define ISCSI_CMD_RESPONSE_RESERVED0 (0x1<<0)
+#define ISCSI_CMD_RESPONSE_RESERVED0_SHIFT 0
+#define ISCSI_CMD_RESPONSE_RESIDUAL_UNDERFLOW (0x1<<1)
+#define ISCSI_CMD_RESPONSE_RESIDUAL_UNDERFLOW_SHIFT 1
+#define ISCSI_CMD_RESPONSE_RESIDUAL_OVERFLOW (0x1<<2)
+#define ISCSI_CMD_RESPONSE_RESIDUAL_OVERFLOW_SHIFT 2
+#define ISCSI_CMD_RESPONSE_BR_RESIDUAL_UNDERFLOW (0x1<<3)
+#define ISCSI_CMD_RESPONSE_BR_RESIDUAL_UNDERFLOW_SHIFT 3
+#define ISCSI_CMD_RESPONSE_BR_RESIDUAL_OVERFLOW (0x1<<4)
+#define ISCSI_CMD_RESPONSE_BR_RESIDUAL_OVERFLOW_SHIFT 4
+#define ISCSI_CMD_RESPONSE_RESERVED1 (0x7<<5)
+#define ISCSI_CMD_RESPONSE_RESERVED1_SHIFT 5
+	u8 response;
+	u8 status;
+#elif defined(__LITTLE_ENDIAN)
+	u8 status;
+	u8 response;
+	u8 response_flags;
+#define ISCSI_CMD_RESPONSE_RESERVED0 (0x1<<0)
+#define ISCSI_CMD_RESPONSE_RESERVED0_SHIFT 0
+#define ISCSI_CMD_RESPONSE_RESIDUAL_UNDERFLOW (0x1<<1)
+#define ISCSI_CMD_RESPONSE_RESIDUAL_UNDERFLOW_SHIFT 1
+#define ISCSI_CMD_RESPONSE_RESIDUAL_OVERFLOW (0x1<<2)
+#define ISCSI_CMD_RESPONSE_RESIDUAL_OVERFLOW_SHIFT 2
+#define ISCSI_CMD_RESPONSE_BR_RESIDUAL_UNDERFLOW (0x1<<3)
+#define ISCSI_CMD_RESPONSE_BR_RESIDUAL_UNDERFLOW_SHIFT 3
+#define ISCSI_CMD_RESPONSE_BR_RESIDUAL_OVERFLOW (0x1<<4)
+#define ISCSI_CMD_RESPONSE_BR_RESIDUAL_OVERFLOW_SHIFT 4
+#define ISCSI_CMD_RESPONSE_RESERVED1 (0x7<<5)
+#define ISCSI_CMD_RESPONSE_RESERVED1_SHIFT 5
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 exp_cmd_sn;
+	u32 max_cmd_sn;
+	u32 reserved2;
+	u32 residual_count;
+#if defined(__BIG_ENDIAN)
+	u16 reserved4;
+	u8 err_code;
+	u8 reserved3;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved3;
+	u8 err_code;
+	u16 reserved4;
+#endif
+	u32 reserved5[5];
+	union bnx2i_cmd_resp_task_stat task_stat;
+	u32 reserved6;
+#if defined(__BIG_ENDIAN)
+	u16 reserved7;
+	u16 itt;
+#define ISCSI_CMD_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_CMD_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_CMD_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_CMD_RESPONSE_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_CMD_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_CMD_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_CMD_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_CMD_RESPONSE_TYPE_SHIFT 14
+	u16 reserved7;
+#endif
+	u32 cq_req_sn;
+};
+
+
+
+/*
+ * firmware middle-path request SQ WQE
+ */
+struct bnx2i_fw_mp_request {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 op_attr;
+	u16 hdr_opaque1;
+#elif defined(__LITTLE_ENDIAN)
+	u16 hdr_opaque1;
+	u8 op_attr;
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 hdr_opaque2[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved0;
+	u16 itt;
+#define ISCSI_FW_MP_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_FW_MP_REQUEST_INDEX_SHIFT 0
+#define ISCSI_FW_MP_REQUEST_TYPE (0x3<<14)
+#define ISCSI_FW_MP_REQUEST_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_FW_MP_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_FW_MP_REQUEST_INDEX_SHIFT 0
+#define ISCSI_FW_MP_REQUEST_TYPE (0x3<<14)
+#define ISCSI_FW_MP_REQUEST_TYPE_SHIFT 14
+	u16 reserved0;
+#endif
+	u32 hdr_opaque3[4];
+	u32 resp_bd_list_addr_lo;
+	u32 resp_bd_list_addr_hi;
+	u32 resp_buffer;
+#define ISCSI_FW_MP_REQUEST_RESP_BUFFER_LENGTH (0xFFFFFF<<0)
+#define ISCSI_FW_MP_REQUEST_RESP_BUFFER_LENGTH_SHIFT 0
+#define ISCSI_FW_MP_REQUEST_NUM_RESP_BDS (0xFF<<24)
+#define ISCSI_FW_MP_REQUEST_NUM_RESP_BDS_SHIFT 24
+#if defined(__BIG_ENDIAN)
+	u16 reserved4;
+	u8 reserved3;
+	u8 flags;
+#define ISCSI_FW_MP_REQUEST_RESERVED1 (0x1<<0)
+#define ISCSI_FW_MP_REQUEST_RESERVED1_SHIFT 0
+#define ISCSI_FW_MP_REQUEST_LOCAL_COMPLETION (0x1<<1)
+#define ISCSI_FW_MP_REQUEST_LOCAL_COMPLETION_SHIFT 1
+#define ISCSI_FW_MP_REQUEST_UPDATE_EXP_STAT_SN (0x1<<2)
+#define ISCSI_FW_MP_REQUEST_UPDATE_EXP_STAT_SN_SHIFT 2
+#define ISCSI_FW_MP_REQUEST_RESERVED2 (0x1F<<3)
+#define ISCSI_FW_MP_REQUEST_RESERVED2_SHIFT 3
+#elif defined(__LITTLE_ENDIAN)
+	u8 flags;
+#define ISCSI_FW_MP_REQUEST_RESERVED1 (0x1<<0)
+#define ISCSI_FW_MP_REQUEST_RESERVED1_SHIFT 0
+#define ISCSI_FW_MP_REQUEST_LOCAL_COMPLETION (0x1<<1)
+#define ISCSI_FW_MP_REQUEST_LOCAL_COMPLETION_SHIFT 1
+#define ISCSI_FW_MP_REQUEST_UPDATE_EXP_STAT_SN (0x1<<2)
+#define ISCSI_FW_MP_REQUEST_UPDATE_EXP_STAT_SN_SHIFT 2
+#define ISCSI_FW_MP_REQUEST_RESERVED2 (0x1F<<3)
+#define ISCSI_FW_MP_REQUEST_RESERVED2_SHIFT 3
+	u8 reserved3;
+	u16 reserved4;
+#endif
+	u32 bd_list_addr_lo;
+	u32 bd_list_addr_hi;
+#if defined(__BIG_ENDIAN)
+	u8 cq_index;
+	u8 reserved6;
+	u8 reserved5;
+	u8 num_bds;
+#elif defined(__LITTLE_ENDIAN)
+	u8 num_bds;
+	u8 reserved5;
+	u8 reserved6;
+	u8 cq_index;
+#endif
+};
+
+
+/*
+ * firmware response - CQE: used only by firmware
+ */
+struct bnx2i_fw_response {
+	u32 hdr_dword1[2];
+	u32 hdr_exp_cmd_sn;
+	u32 hdr_max_cmd_sn;
+	u32 hdr_ttt;
+	u32 hdr_res_cnt;
+	u32 cqe_flags;
+#define ISCSI_FW_RESPONSE_RESERVED2 (0xFF<<0)
+#define ISCSI_FW_RESPONSE_RESERVED2_SHIFT 0
+#define ISCSI_FW_RESPONSE_ERR_CODE (0xFF<<8)
+#define ISCSI_FW_RESPONSE_ERR_CODE_SHIFT 8
+#define ISCSI_FW_RESPONSE_RESERVED3 (0xFFFF<<16)
+#define ISCSI_FW_RESPONSE_RESERVED3_SHIFT 16
+	u32 stat_sn;
+	u32 hdr_dword2[2];
+	u32 hdr_dword3[2];
+	u32 task_stat;
+	u32 reserved0;
+	u32 hdr_itt;
+	u32 cq_req_sn;
+};
+
+
+/*
+ * iSCSI KCQ CQE parameters
+ */
+union iscsi_kcqe_params {
+	u32 reserved0[4];
+};
+
+/*
+ * iSCSI KCQ CQE
+ */
+struct iscsi_kcqe {
+	u32 iscsi_conn_id;
+	u32 completion_status;
+	u32 iscsi_conn_context_id;
+	union iscsi_kcqe_params params;
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define ISCSI_KCQE_RESERVED0 (0xF<<0)
+#define ISCSI_KCQE_RESERVED0_SHIFT 0
+#define ISCSI_KCQE_LAYER_CODE (0x7<<4)
+#define ISCSI_KCQE_LAYER_CODE_SHIFT 4
+#define ISCSI_KCQE_RESERVED1 (0x1<<7)
+#define ISCSI_KCQE_RESERVED1_SHIFT 7
+	u8 op_code;
+	u16 qe_self_seq;
+#elif defined(__LITTLE_ENDIAN)
+	u16 qe_self_seq;
+	u8 op_code;
+	u8 flags;
+#define ISCSI_KCQE_RESERVED0 (0xF<<0)
+#define ISCSI_KCQE_RESERVED0_SHIFT 0
+#define ISCSI_KCQE_LAYER_CODE (0x7<<4)
+#define ISCSI_KCQE_LAYER_CODE_SHIFT 4
+#define ISCSI_KCQE_RESERVED1 (0x1<<7)
+#define ISCSI_KCQE_RESERVED1_SHIFT 7
+#endif
+};
+
+
+
+/*
+ * iSCSI KWQE header
+ */
+struct iscsi_kwqe_header {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define ISCSI_KWQE_HEADER_RESERVED0 (0xF<<0)
+#define ISCSI_KWQE_HEADER_RESERVED0_SHIFT 0
+#define ISCSI_KWQE_HEADER_LAYER_CODE (0x7<<4)
+#define ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT 4
+#define ISCSI_KWQE_HEADER_RESERVED1 (0x1<<7)
+#define ISCSI_KWQE_HEADER_RESERVED1_SHIFT 7
+	u8 op_code;
+#elif defined(__LITTLE_ENDIAN)
+	u8 op_code;
+	u8 flags;
+#define ISCSI_KWQE_HEADER_RESERVED0 (0xF<<0)
+#define ISCSI_KWQE_HEADER_RESERVED0_SHIFT 0
+#define ISCSI_KWQE_HEADER_LAYER_CODE (0x7<<4)
+#define ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT 4
+#define ISCSI_KWQE_HEADER_RESERVED1 (0x1<<7)
+#define ISCSI_KWQE_HEADER_RESERVED1_SHIFT 7
+#endif
+};
+
+/*
+ * iSCSI firmware init request 1
+ */
+struct iscsi_kwqe_init1 {
+#if defined(__BIG_ENDIAN)
+	struct iscsi_kwqe_header hdr;
+	u8 reserved0;
+	u8 num_cqs;
+#elif defined(__LITTLE_ENDIAN)
+	u8 num_cqs;
+	u8 reserved0;
+	struct iscsi_kwqe_header hdr;
+#endif
+	u32 dummy_buffer_addr_lo;
+	u32 dummy_buffer_addr_hi;
+#if defined(__BIG_ENDIAN)
+	u16 num_ccells_per_conn;
+	u16 num_tasks_per_conn;
+#elif defined(__LITTLE_ENDIAN)
+	u16 num_tasks_per_conn;
+	u16 num_ccells_per_conn;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 sq_wqes_per_page;
+	u16 sq_num_wqes;
+#elif defined(__LITTLE_ENDIAN)
+	u16 sq_num_wqes;
+	u16 sq_wqes_per_page;
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 cq_log_wqes_per_page;
+	u8 flags;
+#define ISCSI_KWQE_INIT1_PAGE_SIZE (0xF<<0)
+#define ISCSI_KWQE_INIT1_PAGE_SIZE_SHIFT 0
+#define ISCSI_KWQE_INIT1_DELAYED_ACK_ENABLE (0x1<<4)
+#define ISCSI_KWQE_INIT1_DELAYED_ACK_ENABLE_SHIFT 4
+#define ISCSI_KWQE_INIT1_KEEP_ALIVE_ENABLE (0x1<<5)
+#define ISCSI_KWQE_INIT1_KEEP_ALIVE_ENABLE_SHIFT 5
+#define ISCSI_KWQE_INIT1_RESERVED1 (0x3<<6)
+#define ISCSI_KWQE_INIT1_RESERVED1_SHIFT 6
+	u16 cq_num_wqes;
+#elif defined(__LITTLE_ENDIAN)
+	u16 cq_num_wqes;
+	u8 flags;
+#define ISCSI_KWQE_INIT1_PAGE_SIZE (0xF<<0)
+#define ISCSI_KWQE_INIT1_PAGE_SIZE_SHIFT 0
+#define ISCSI_KWQE_INIT1_DELAYED_ACK_ENABLE (0x1<<4)
+#define ISCSI_KWQE_INIT1_DELAYED_ACK_ENABLE_SHIFT 4
+#define ISCSI_KWQE_INIT1_KEEP_ALIVE_ENABLE (0x1<<5)
+#define ISCSI_KWQE_INIT1_KEEP_ALIVE_ENABLE_SHIFT 5
+#define ISCSI_KWQE_INIT1_RESERVED1 (0x3<<6)
+#define ISCSI_KWQE_INIT1_RESERVED1_SHIFT 6
+	u8 cq_log_wqes_per_page;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 cq_num_pages;
+	u16 sq_num_pages;
+#elif defined(__LITTLE_ENDIAN)
+	u16 sq_num_pages;
+	u16 cq_num_pages;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 rq_buffer_size;
+	u16 rq_num_wqes;
+#elif defined(__LITTLE_ENDIAN)
+	u16 rq_num_wqes;
+	u16 rq_buffer_size;
+#endif
+};
+
+/*
+ * iSCSI firmware init request 2
+ */
+struct iscsi_kwqe_init2 {
+#if defined(__BIG_ENDIAN)
+	struct iscsi_kwqe_header hdr;
+	u16 max_cq_sqn;
+#elif defined(__LITTLE_ENDIAN)
+	u16 max_cq_sqn;
+	struct iscsi_kwqe_header hdr;
+#endif
+	u32 error_bit_map[2];
+	u32 reserved1[5];
+};
+
+/*
+ * Initial iSCSI connection offload request 1
+ */
+struct iscsi_kwqe_conn_offload1 {
+#if defined(__BIG_ENDIAN)
+	struct iscsi_kwqe_header hdr;
+	u16 iscsi_conn_id;
+#elif defined(__LITTLE_ENDIAN)
+	u16 iscsi_conn_id;
+	struct iscsi_kwqe_header hdr;
+#endif
+	u32 sq_page_table_addr_lo;
+	u32 sq_page_table_addr_hi;
+	u32 cq_page_table_addr_lo;
+	u32 cq_page_table_addr_hi;
+	u32 reserved0[3];
+};
+
+/*
+ * iSCSI Page Table Entry (PTE)
+ */
+struct iscsi_pte {
+	u32 hi;
+	u32 lo;
+};
+
+/*
+ * Initial iSCSI connection offload request 2
+ */
+struct iscsi_kwqe_conn_offload2 {
+#if defined(__BIG_ENDIAN)
+	struct iscsi_kwqe_header hdr;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	struct iscsi_kwqe_header hdr;
+#endif
+	u32 rq_page_table_addr_lo;
+	u32 rq_page_table_addr_hi;
+	struct iscsi_pte sq_first_pte;
+	struct iscsi_pte cq_first_pte;
+	u32 num_additional_wqes;
+};
+
+
+/*
+ * Initial iSCSI connection offload request 3
+ */
+struct iscsi_kwqe_conn_offload3 {
+#if defined(__BIG_ENDIAN)
+	struct iscsi_kwqe_header hdr;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	struct iscsi_kwqe_header hdr;
+#endif
+	u32 reserved1;
+	struct iscsi_pte qp_first_pte[3];
+};
+
+
+/*
+ * iSCSI connection update request
+ */
+struct iscsi_kwqe_conn_update {
+#if defined(__BIG_ENDIAN)
+	struct iscsi_kwqe_header hdr;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	struct iscsi_kwqe_header hdr;
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 session_error_recovery_level;
+	u8 max_outstanding_r2ts;
+	u8 reserved2;
+	u8 conn_flags;
+#define ISCSI_KWQE_CONN_UPDATE_HEADER_DIGEST (0x1<<0)
+#define ISCSI_KWQE_CONN_UPDATE_HEADER_DIGEST_SHIFT 0
+#define ISCSI_KWQE_CONN_UPDATE_DATA_DIGEST (0x1<<1)
+#define ISCSI_KWQE_CONN_UPDATE_DATA_DIGEST_SHIFT 1
+#define ISCSI_KWQE_CONN_UPDATE_INITIAL_R2T (0x1<<2)
+#define ISCSI_KWQE_CONN_UPDATE_INITIAL_R2T_SHIFT 2
+#define ISCSI_KWQE_CONN_UPDATE_IMMEDIATE_DATA (0x1<<3)
+#define ISCSI_KWQE_CONN_UPDATE_IMMEDIATE_DATA_SHIFT 3
+#define ISCSI_KWQE_CONN_UPDATE_RESERVED1 (0xF<<4)
+#define ISCSI_KWQE_CONN_UPDATE_RESERVED1_SHIFT 4
+#elif defined(__LITTLE_ENDIAN)
+	u8 conn_flags;
+#define ISCSI_KWQE_CONN_UPDATE_HEADER_DIGEST (0x1<<0)
+#define ISCSI_KWQE_CONN_UPDATE_HEADER_DIGEST_SHIFT 0
+#define ISCSI_KWQE_CONN_UPDATE_DATA_DIGEST (0x1<<1)
+#define ISCSI_KWQE_CONN_UPDATE_DATA_DIGEST_SHIFT 1
+#define ISCSI_KWQE_CONN_UPDATE_INITIAL_R2T (0x1<<2)
+#define ISCSI_KWQE_CONN_UPDATE_INITIAL_R2T_SHIFT 2
+#define ISCSI_KWQE_CONN_UPDATE_IMMEDIATE_DATA (0x1<<3)
+#define ISCSI_KWQE_CONN_UPDATE_IMMEDIATE_DATA_SHIFT 3
+#define ISCSI_KWQE_CONN_UPDATE_RESERVED1 (0xF<<4)
+#define ISCSI_KWQE_CONN_UPDATE_RESERVED1_SHIFT 4
+	u8 reserved2;
+	u8 max_outstanding_r2ts;
+	u8 session_error_recovery_level;
+#endif
+	u32 context_id;
+	u32 max_send_pdu_length;
+	u32 max_recv_pdu_length;
+	u32 first_burst_length;
+	u32 max_burst_length;
+	u32 exp_stat_sn;
+};
+
+/*
+ * iSCSI destroy connection request
+ */
+struct iscsi_kwqe_conn_destroy {
+#if defined(__BIG_ENDIAN)
+	struct iscsi_kwqe_header hdr;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	struct iscsi_kwqe_header hdr;
+#endif
+	u32 context_id;
+	u32 reserved1[6];
+};
+
+/*
+ * iSCSI KWQ WQE
+ */
+union iscsi_kwqe {
+	struct iscsi_kwqe_init1 init1;
+	struct iscsi_kwqe_init2 init2;
+	struct iscsi_kwqe_conn_offload1 conn_offload1;
+	struct iscsi_kwqe_conn_offload2 conn_offload2;
+	struct iscsi_kwqe_conn_update conn_update;
+	struct iscsi_kwqe_conn_destroy conn_destroy;
+};
+
+/*
+ * iSCSI Login SQ WQE
+ */
+struct bnx2i_login_request {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 op_attr;
+#define ISCSI_LOGIN_REQUEST_NEXT_STAGE (0x3<<0)
+#define ISCSI_LOGIN_REQUEST_NEXT_STAGE_SHIFT 0
+#define ISCSI_LOGIN_REQUEST_CURRENT_STAGE (0x3<<2)
+#define ISCSI_LOGIN_REQUEST_CURRENT_STAGE_SHIFT 2
+#define ISCSI_LOGIN_REQUEST_RESERVED0 (0x3<<4)
+#define ISCSI_LOGIN_REQUEST_RESERVED0_SHIFT 4
+#define ISCSI_LOGIN_REQUEST_CONT (0x1<<6)
+#define ISCSI_LOGIN_REQUEST_CONT_SHIFT 6
+#define ISCSI_LOGIN_REQUEST_TRANSIT (0x1<<7)
+#define ISCSI_LOGIN_REQUEST_TRANSIT_SHIFT 7
+	u8 version_max;
+	u8 version_min;
+#elif defined(__LITTLE_ENDIAN)
+	u8 version_min;
+	u8 version_max;
+	u8 op_attr;
+#define ISCSI_LOGIN_REQUEST_NEXT_STAGE (0x3<<0)
+#define ISCSI_LOGIN_REQUEST_NEXT_STAGE_SHIFT 0
+#define ISCSI_LOGIN_REQUEST_CURRENT_STAGE (0x3<<2)
+#define ISCSI_LOGIN_REQUEST_CURRENT_STAGE_SHIFT 2
+#define ISCSI_LOGIN_REQUEST_RESERVED0 (0x3<<4)
+#define ISCSI_LOGIN_REQUEST_RESERVED0_SHIFT 4
+#define ISCSI_LOGIN_REQUEST_CONT (0x1<<6)
+#define ISCSI_LOGIN_REQUEST_CONT_SHIFT 6
+#define ISCSI_LOGIN_REQUEST_TRANSIT (0x1<<7)
+#define ISCSI_LOGIN_REQUEST_TRANSIT_SHIFT 7
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 isid_lo;
+#if defined(__BIG_ENDIAN)
+	u16 isid_hi;
+	u16 tsih;
+#elif defined(__LITTLE_ENDIAN)
+	u16 tsih;
+	u16 isid_hi;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 reserved2;
+	u16 itt;
+#define ISCSI_LOGIN_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_LOGIN_REQUEST_INDEX_SHIFT 0
+#define ISCSI_LOGIN_REQUEST_TYPE (0x3<<14)
+#define ISCSI_LOGIN_REQUEST_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_LOGIN_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_LOGIN_REQUEST_INDEX_SHIFT 0
+#define ISCSI_LOGIN_REQUEST_TYPE (0x3<<14)
+#define ISCSI_LOGIN_REQUEST_TYPE_SHIFT 14
+	u16 reserved2;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 cid;
+	u16 reserved3;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved3;
+	u16 cid;
+#endif
+	u32 cmd_sn;
+	u32 exp_stat_sn;
+	u32 reserved4;
+	u32 resp_bd_list_addr_lo;
+	u32 resp_bd_list_addr_hi;
+	u32 resp_buffer;
+#define ISCSI_LOGIN_REQUEST_RESP_BUFFER_LENGTH (0xFFFFFF<<0)
+#define ISCSI_LOGIN_REQUEST_RESP_BUFFER_LENGTH_SHIFT 0
+#define ISCSI_LOGIN_REQUEST_NUM_RESP_BDS (0xFF<<24)
+#define ISCSI_LOGIN_REQUEST_NUM_RESP_BDS_SHIFT 24
+#if defined(__BIG_ENDIAN)
+	u16 reserved8;
+	u8 reserved7;
+	u8 flags;
+#define ISCSI_LOGIN_REQUEST_RESERVED5 (0x3<<0)
+#define ISCSI_LOGIN_REQUEST_RESERVED5_SHIFT 0
+#define ISCSI_LOGIN_REQUEST_UPDATE_EXP_STAT_SN (0x1<<2)
+#define ISCSI_LOGIN_REQUEST_UPDATE_EXP_STAT_SN_SHIFT 2
+#define ISCSI_LOGIN_REQUEST_RESERVED6 (0x1F<<3)
+#define ISCSI_LOGIN_REQUEST_RESERVED6_SHIFT 3
+#elif defined(__LITTLE_ENDIAN)
+	u8 flags;
+#define ISCSI_LOGIN_REQUEST_RESERVED5 (0x3<<0)
+#define ISCSI_LOGIN_REQUEST_RESERVED5_SHIFT 0
+#define ISCSI_LOGIN_REQUEST_UPDATE_EXP_STAT_SN (0x1<<2)
+#define ISCSI_LOGIN_REQUEST_UPDATE_EXP_STAT_SN_SHIFT 2
+#define ISCSI_LOGIN_REQUEST_RESERVED6 (0x1F<<3)
+#define ISCSI_LOGIN_REQUEST_RESERVED6_SHIFT 3
+	u8 reserved7;
+	u16 reserved8;
+#endif
+	u32 bd_list_addr_lo;
+	u32 bd_list_addr_hi;
+#if defined(__BIG_ENDIAN)
+	u8 cq_index;
+	u8 reserved10;
+	u8 reserved9;
+	u8 num_bds;
+#elif defined(__LITTLE_ENDIAN)
+	u8 num_bds;
+	u8 reserved9;
+	u8 reserved10;
+	u8 cq_index;
+#endif
+};
+
+
+/*
+ * iSCSI Login CQE
+ */
+struct bnx2i_login_response {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 response_flags;
+#define ISCSI_LOGIN_RESPONSE_NEXT_STAGE (0x3<<0)
+#define ISCSI_LOGIN_RESPONSE_NEXT_STAGE_SHIFT 0
+#define ISCSI_LOGIN_RESPONSE_CURRENT_STAGE (0x3<<2)
+#define ISCSI_LOGIN_RESPONSE_CURRENT_STAGE_SHIFT 2
+#define ISCSI_LOGIN_RESPONSE_RESERVED0 (0x3<<4)
+#define ISCSI_LOGIN_RESPONSE_RESERVED0_SHIFT 4
+#define ISCSI_LOGIN_RESPONSE_CONT (0x1<<6)
+#define ISCSI_LOGIN_RESPONSE_CONT_SHIFT 6
+#define ISCSI_LOGIN_RESPONSE_TRANSIT (0x1<<7)
+#define ISCSI_LOGIN_RESPONSE_TRANSIT_SHIFT 7
+	u8 version_max;
+	u8 version_active;
+#elif defined(__LITTLE_ENDIAN)
+	u8 version_active;
+	u8 version_max;
+	u8 response_flags;
+#define ISCSI_LOGIN_RESPONSE_NEXT_STAGE (0x3<<0)
+#define ISCSI_LOGIN_RESPONSE_NEXT_STAGE_SHIFT 0
+#define ISCSI_LOGIN_RESPONSE_CURRENT_STAGE (0x3<<2)
+#define ISCSI_LOGIN_RESPONSE_CURRENT_STAGE_SHIFT 2
+#define ISCSI_LOGIN_RESPONSE_RESERVED0 (0x3<<4)
+#define ISCSI_LOGIN_RESPONSE_RESERVED0_SHIFT 4
+#define ISCSI_LOGIN_RESPONSE_CONT (0x1<<6)
+#define ISCSI_LOGIN_RESPONSE_CONT_SHIFT 6
+#define ISCSI_LOGIN_RESPONSE_TRANSIT (0x1<<7)
+#define ISCSI_LOGIN_RESPONSE_TRANSIT_SHIFT 7
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 exp_cmd_sn;
+	u32 max_cmd_sn;
+	u32 reserved1[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved3;
+	u8 err_code;
+	u8 reserved2;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved2;
+	u8 err_code;
+	u16 reserved3;
+#endif
+	u32 stat_sn;
+	u32 isid_lo;
+#if defined(__BIG_ENDIAN)
+	u16 isid_hi;
+	u16 tsih;
+#elif defined(__LITTLE_ENDIAN)
+	u16 tsih;
+	u16 isid_hi;
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 status_class;
+	u8 status_detail;
+	u16 reserved4;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved4;
+	u8 status_detail;
+	u8 status_class;
+#endif
+	u32 reserved5[3];
+#if defined(__BIG_ENDIAN)
+	u16 reserved6;
+	u16 itt;
+#define ISCSI_LOGIN_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_LOGIN_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_LOGIN_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_LOGIN_RESPONSE_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_LOGIN_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_LOGIN_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_LOGIN_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_LOGIN_RESPONSE_TYPE_SHIFT 14
+	u16 reserved6;
+#endif
+	u32 cq_req_sn;
+};
+
+
+/*
+ * iSCSI Logout SQ WQE
+ */
+struct bnx2i_logout_request {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 op_attr;
+#define ISCSI_LOGOUT_REQUEST_REASON (0x7F<<0)
+#define ISCSI_LOGOUT_REQUEST_REASON_SHIFT 0
+#define ISCSI_LOGOUT_REQUEST_ALWAYS_ONE (0x1<<7)
+#define ISCSI_LOGOUT_REQUEST_ALWAYS_ONE_SHIFT 7
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_attr;
+#define ISCSI_LOGOUT_REQUEST_REASON (0x7F<<0)
+#define ISCSI_LOGOUT_REQUEST_REASON_SHIFT 0
+#define ISCSI_LOGOUT_REQUEST_ALWAYS_ONE (0x1<<7)
+#define ISCSI_LOGOUT_REQUEST_ALWAYS_ONE_SHIFT 7
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 reserved1[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved2;
+	u16 itt;
+#define ISCSI_LOGOUT_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_LOGOUT_REQUEST_INDEX_SHIFT 0
+#define ISCSI_LOGOUT_REQUEST_TYPE (0x3<<14)
+#define ISCSI_LOGOUT_REQUEST_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_LOGOUT_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_LOGOUT_REQUEST_INDEX_SHIFT 0
+#define ISCSI_LOGOUT_REQUEST_TYPE (0x3<<14)
+#define ISCSI_LOGOUT_REQUEST_TYPE_SHIFT 14
+	u16 reserved2;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 cid;
+	u16 reserved3;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved3;
+	u16 cid;
+#endif
+	u32 cmd_sn;
+	u32 reserved4[5];
+	u32 zero_fill;
+	u32 bd_list_addr_lo;
+	u32 bd_list_addr_hi;
+#if defined(__BIG_ENDIAN)
+	u8 cq_index;
+	u8 reserved6;
+	u8 reserved5;
+	u8 num_bds;
+#elif defined(__LITTLE_ENDIAN)
+	u8 num_bds;
+	u8 reserved5;
+	u8 reserved6;
+	u8 cq_index;
+#endif
+};
+
+
+/*
+ * iSCSI Logout CQE
+ */
+struct bnx2i_logout_response {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 reserved1;
+	u8 response;
+	u8 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved0;
+	u8 response;
+	u8 reserved1;
+	u8 op_code;
+#endif
+	u32 reserved2;
+	u32 exp_cmd_sn;
+	u32 max_cmd_sn;
+	u32 reserved3[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved5;
+	u8 err_code;
+	u8 reserved4;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved4;
+	u8 err_code;
+	u16 reserved5;
+#endif
+	u32 reserved6[3];
+#if defined(__BIG_ENDIAN)
+	u16 time_to_wait;
+	u16 time_to_retain;
+#elif defined(__LITTLE_ENDIAN)
+	u16 time_to_retain;
+	u16 time_to_wait;
+#endif
+	u32 reserved7[3];
+#if defined(__BIG_ENDIAN)
+	u16 reserved8;
+	u16 itt;
+#define ISCSI_LOGOUT_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_LOGOUT_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_LOGOUT_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_LOGOUT_RESPONSE_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_LOGOUT_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_LOGOUT_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_LOGOUT_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_LOGOUT_RESPONSE_TYPE_SHIFT 14
+	u16 reserved8;
+#endif
+	u32 cq_req_sn;
+};
+
+
+/*
+ * iSCSI Nop-In CQE
+ */
+struct bnx2i_nop_in_msg {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 reserved1;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 reserved1;
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 exp_cmd_sn;
+	u32 max_cmd_sn;
+	u32 ttt;
+	u32 reserved2;
+#if defined(__BIG_ENDIAN)
+	u16 reserved4;
+	u8 err_code;
+	u8 reserved3;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved3;
+	u8 err_code;
+	u16 reserved4;
+#endif
+	u32 reserved5;
+	u32 lun[2];
+	u32 reserved6[4];
+#if defined(__BIG_ENDIAN)
+	u16 reserved7;
+	u16 itt;
+#define ISCSI_NOP_IN_MSG_INDEX (0x3FFF<<0)
+#define ISCSI_NOP_IN_MSG_INDEX_SHIFT 0
+#define ISCSI_NOP_IN_MSG_TYPE (0x3<<14)
+#define ISCSI_NOP_IN_MSG_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_NOP_IN_MSG_INDEX (0x3FFF<<0)
+#define ISCSI_NOP_IN_MSG_INDEX_SHIFT 0
+#define ISCSI_NOP_IN_MSG_TYPE (0x3<<14)
+#define ISCSI_NOP_IN_MSG_TYPE_SHIFT 14
+	u16 reserved7;
+#endif
+	u32 cq_req_sn;
+};
+
+
+/*
+ * iSCSI NOP-OUT SQ WQE
+ */
+struct bnx2i_nop_out_request {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 op_attr;
+#define ISCSI_NOP_OUT_REQUEST_RESERVED1 (0x7F<<0)
+#define ISCSI_NOP_OUT_REQUEST_RESERVED1_SHIFT 0
+#define ISCSI_NOP_OUT_REQUEST_ALWAYS_ONE (0x1<<7)
+#define ISCSI_NOP_OUT_REQUEST_ALWAYS_ONE_SHIFT 7
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_attr;
+#define ISCSI_NOP_OUT_REQUEST_RESERVED1 (0x7F<<0)
+#define ISCSI_NOP_OUT_REQUEST_RESERVED1_SHIFT 0
+#define ISCSI_NOP_OUT_REQUEST_ALWAYS_ONE (0x1<<7)
+#define ISCSI_NOP_OUT_REQUEST_ALWAYS_ONE_SHIFT 7
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 lun[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved2;
+	u16 itt;
+#define ISCSI_NOP_OUT_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_NOP_OUT_REQUEST_INDEX_SHIFT 0
+#define ISCSI_NOP_OUT_REQUEST_TYPE (0x3<<14)
+#define ISCSI_NOP_OUT_REQUEST_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_NOP_OUT_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_NOP_OUT_REQUEST_INDEX_SHIFT 0
+#define ISCSI_NOP_OUT_REQUEST_TYPE (0x3<<14)
+#define ISCSI_NOP_OUT_REQUEST_TYPE_SHIFT 14
+	u16 reserved2;
+#endif
+	u32 ttt;
+	u32 cmd_sn;
+	u32 reserved3[2];
+	u32 resp_bd_list_addr_lo;
+	u32 resp_bd_list_addr_hi;
+	u32 resp_buffer;
+#define ISCSI_NOP_OUT_REQUEST_RESP_BUFFER_LENGTH (0xFFFFFF<<0)
+#define ISCSI_NOP_OUT_REQUEST_RESP_BUFFER_LENGTH_SHIFT 0
+#define ISCSI_NOP_OUT_REQUEST_NUM_RESP_BDS (0xFF<<24)
+#define ISCSI_NOP_OUT_REQUEST_NUM_RESP_BDS_SHIFT 24
+#if defined(__BIG_ENDIAN)
+	u16 reserved7;
+	u8 reserved6;
+	u8 flags;
+#define ISCSI_NOP_OUT_REQUEST_RESERVED4 (0x1<<0)
+#define ISCSI_NOP_OUT_REQUEST_RESERVED4_SHIFT 0
+#define ISCSI_NOP_OUT_REQUEST_LOCAL_COMPLETION (0x1<<1)
+#define ISCSI_NOP_OUT_REQUEST_LOCAL_COMPLETION_SHIFT 1
+#define ISCSI_NOP_OUT_REQUEST_ZERO_FILL (0x3F<<2)
+#define ISCSI_NOP_OUT_REQUEST_ZERO_FILL_SHIFT 2
+#elif defined(__LITTLE_ENDIAN)
+	u8 flags;
+#define ISCSI_NOP_OUT_REQUEST_RESERVED4 (0x1<<0)
+#define ISCSI_NOP_OUT_REQUEST_RESERVED4_SHIFT 0
+#define ISCSI_NOP_OUT_REQUEST_LOCAL_COMPLETION (0x1<<1)
+#define ISCSI_NOP_OUT_REQUEST_LOCAL_COMPLETION_SHIFT 1
+#define ISCSI_NOP_OUT_REQUEST_ZERO_FILL (0x3F<<2)
+#define ISCSI_NOP_OUT_REQUEST_ZERO_FILL_SHIFT 2
+	u8 reserved6;
+	u16 reserved7;
+#endif
+	u32 bd_list_addr_lo;
+	u32 bd_list_addr_hi;
+#if defined(__BIG_ENDIAN)
+	u8 cq_index;
+	u8 reserved9;
+	u8 reserved8;
+	u8 num_bds;
+#elif defined(__LITTLE_ENDIAN)
+	u8 num_bds;
+	u8 reserved8;
+	u8 reserved9;
+	u8 cq_index;
+#endif
+};
+
+/*
+ * iSCSI Reject CQE
+ */
+struct bnx2i_reject_msg {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 reserved1;
+	u8 reason;
+	u8 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved0;
+	u8 reason;
+	u8 reserved1;
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 exp_cmd_sn;
+	u32 max_cmd_sn;
+	u32 reserved2[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved4;
+	u8 err_code;
+	u8 reserved3;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved3;
+	u8 err_code;
+	u16 reserved4;
+#endif
+	u32 reserved5[8];
+	u32 cq_req_sn;
+};
+
+/*
+ * bnx2i iSCSI TMF SQ WQE
+ */
+struct bnx2i_tmf_request {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 op_attr;
+#define ISCSI_TMF_REQUEST_FUNCTION (0x7F<<0)
+#define ISCSI_TMF_REQUEST_FUNCTION_SHIFT 0
+#define ISCSI_TMF_REQUEST_ALWAYS_ONE (0x1<<7)
+#define ISCSI_TMF_REQUEST_ALWAYS_ONE_SHIFT 7
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_attr;
+#define ISCSI_TMF_REQUEST_FUNCTION (0x7F<<0)
+#define ISCSI_TMF_REQUEST_FUNCTION_SHIFT 0
+#define ISCSI_TMF_REQUEST_ALWAYS_ONE (0x1<<7)
+#define ISCSI_TMF_REQUEST_ALWAYS_ONE_SHIFT 7
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 lun[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved1;
+	u16 itt;
+#define ISCSI_TMF_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_TMF_REQUEST_INDEX_SHIFT 0
+#define ISCSI_TMF_REQUEST_TYPE (0x3<<14)
+#define ISCSI_TMF_REQUEST_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_TMF_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_TMF_REQUEST_INDEX_SHIFT 0
+#define ISCSI_TMF_REQUEST_TYPE (0x3<<14)
+#define ISCSI_TMF_REQUEST_TYPE_SHIFT 14
+	u16 reserved1;
+#endif
+	u32 ref_itt;
+	u32 cmd_sn;
+	u32 reserved2;
+	u32 ref_cmd_sn;
+	u32 reserved3[3];
+	u32 zero_fill;
+	u32 bd_list_addr_lo;
+	u32 bd_list_addr_hi;
+#if defined(__BIG_ENDIAN)
+	u8 cq_index;
+	u8 reserved5;
+	u8 reserved4;
+	u8 num_bds;
+#elif defined(__LITTLE_ENDIAN)
+	u8 num_bds;
+	u8 reserved4;
+	u8 reserved5;
+	u8 cq_index;
+#endif
+};
+
+/*
+ * iSCSI Text SQ WQE
+ */
+struct bnx2i_text_request {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 op_attr;
+#define ISCSI_TEXT_REQUEST_RESERVED1 (0x3F<<0)
+#define ISCSI_TEXT_REQUEST_RESERVED1_SHIFT 0
+#define ISCSI_TEXT_REQUEST_CONT (0x1<<6)
+#define ISCSI_TEXT_REQUEST_CONT_SHIFT 6
+#define ISCSI_TEXT_REQUEST_FINAL (0x1<<7)
+#define ISCSI_TEXT_REQUEST_FINAL_SHIFT 7
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_attr;
+#define ISCSI_TEXT_REQUEST_RESERVED1 (0x3F<<0)
+#define ISCSI_TEXT_REQUEST_RESERVED1_SHIFT 0
+#define ISCSI_TEXT_REQUEST_CONT (0x1<<6)
+#define ISCSI_TEXT_REQUEST_CONT_SHIFT 6
+#define ISCSI_TEXT_REQUEST_FINAL (0x1<<7)
+#define ISCSI_TEXT_REQUEST_FINAL_SHIFT 7
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 lun[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved3;
+	u16 itt;
+#define ISCSI_TEXT_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_TEXT_REQUEST_INDEX_SHIFT 0
+#define ISCSI_TEXT_REQUEST_TYPE (0x3<<14)
+#define ISCSI_TEXT_REQUEST_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_TEXT_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_TEXT_REQUEST_INDEX_SHIFT 0
+#define ISCSI_TEXT_REQUEST_TYPE (0x3<<14)
+#define ISCSI_TEXT_REQUEST_TYPE_SHIFT 14
+	u16 reserved3;
+#endif
+	u32 ttt;
+	u32 cmd_sn;
+	u32 reserved4[2];
+	u32 resp_bd_list_addr_lo;
+	u32 resp_bd_list_addr_hi;
+	u32 resp_buffer;
+#define ISCSI_TEXT_REQUEST_RESP_BUFFER_LENGTH (0xFFFFFF<<0)
+#define ISCSI_TEXT_REQUEST_RESP_BUFFER_LENGTH_SHIFT 0
+#define ISCSI_TEXT_REQUEST_NUM_RESP_BDS (0xFF<<24)
+#define ISCSI_TEXT_REQUEST_NUM_RESP_BDS_SHIFT 24
+	u32 zero_fill;
+	u32 bd_list_addr_lo;
+	u32 bd_list_addr_hi;
+#if defined(__BIG_ENDIAN)
+	u8 cq_index;
+	u8 reserved7;
+	u8 reserved6;
+	u8 num_bds;
+#elif defined(__LITTLE_ENDIAN)
+	u8 num_bds;
+	u8 reserved6;
+	u8 reserved7;
+	u8 cq_index;
+#endif
+};
+
+/*
+ * iSCSI SQ WQE
+ */
+union iscsi_request {
+	struct bnx2i_cmd_request cmd;
+	struct bnx2i_tmf_request tmf;
+	struct bnx2i_nop_out_request nop_out;
+	struct bnx2i_login_request login_req;
+	struct bnx2i_text_request text;
+	struct bnx2i_logout_request logout_req;
+	struct bnx2i_cleanup_request cleanup;
+};
+
+
+/*
+ * iSCSI TMF CQE
+ */
+struct bnx2i_tmf_response {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 reserved1;
+	u8 response;
+	u8 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved0;
+	u8 response;
+	u8 reserved1;
+	u8 op_code;
+#endif
+	u32 reserved2;
+	u32 exp_cmd_sn;
+	u32 max_cmd_sn;
+	u32 reserved3[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved5;
+	u8 err_code;
+	u8 reserved4;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved4;
+	u8 err_code;
+	u16 reserved5;
+#endif
+	u32 reserved6[7];
+#if defined(__BIG_ENDIAN)
+	u16 reserved7;
+	u16 itt;
+#define ISCSI_TMF_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_TMF_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_TMF_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_TMF_RESPONSE_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_TMF_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_TMF_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_TMF_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_TMF_RESPONSE_TYPE_SHIFT 14
+	u16 reserved7;
+#endif
+	u32 cq_req_sn;
+};
+
+/*
+ * iSCSI Text CQE
+ */
+struct bnx2i_text_response {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 response_flags;
+#define ISCSI_TEXT_RESPONSE_RESERVED1 (0x3F<<0)
+#define ISCSI_TEXT_RESPONSE_RESERVED1_SHIFT 0
+#define ISCSI_TEXT_RESPONSE_CONT (0x1<<6)
+#define ISCSI_TEXT_RESPONSE_CONT_SHIFT 6
+#define ISCSI_TEXT_RESPONSE_FINAL (0x1<<7)
+#define ISCSI_TEXT_RESPONSE_FINAL_SHIFT 7
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 response_flags;
+#define ISCSI_TEXT_RESPONSE_RESERVED1 (0x3F<<0)
+#define ISCSI_TEXT_RESPONSE_RESERVED1_SHIFT 0
+#define ISCSI_TEXT_RESPONSE_CONT (0x1<<6)
+#define ISCSI_TEXT_RESPONSE_CONT_SHIFT 6
+#define ISCSI_TEXT_RESPONSE_FINAL (0x1<<7)
+#define ISCSI_TEXT_RESPONSE_FINAL_SHIFT 7
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 exp_cmd_sn;
+	u32 max_cmd_sn;
+	u32 ttt;
+	u32 reserved2;
+#if defined(__BIG_ENDIAN)
+	u16 reserved4;
+	u8 err_code;
+	u8 reserved3;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved3;
+	u8 err_code;
+	u16 reserved4;
+#endif
+	u32 reserved5;
+	u32 lun[2];
+	u32 reserved6[4];
+#if defined(__BIG_ENDIAN)
+	u16 reserved7;
+	u16 itt;
+#define ISCSI_TEXT_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_TEXT_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_TEXT_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_TEXT_RESPONSE_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_TEXT_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_TEXT_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_TEXT_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_TEXT_RESPONSE_TYPE_SHIFT 14
+	u16 reserved7;
+#endif
+	u32 cq_req_sn;
+};
+
+/*
+ * iSCSI CQE
+ */
+union iscsi_response {
+	struct bnx2i_cmd_response cmd;
+	struct bnx2i_tmf_response tmf;
+	struct bnx2i_login_response login_resp;
+	struct bnx2i_text_response text;
+	struct bnx2i_logout_response logout_resp;
+	struct bnx2i_cleanup_response cleanup;
+	struct bnx2i_reject_msg reject;
+	struct bnx2i_async_msg async;
+	struct bnx2i_nop_in_msg nop_in;
+};
+
+#endif /* __57XX_ISCSI_HSI_LINUX_LE__ */
diff --git a/drivers/scsi/bnx2i/Kconfig b/drivers/scsi/bnx2i/Kconfig
new file mode 100644
index 0000000..820d428
--- /dev/null
+++ b/drivers/scsi/bnx2i/Kconfig
@@ -0,0 +1,7 @@
+config SCSI_BNX2_ISCSI
+	tristate "Broadcom NetXtreme II iSCSI support"
+	select SCSI_ISCSI_ATTRS
+	select CNIC
+	---help---
+	This driver supports iSCSI offload for the Broadcom NetXtreme II
+	devices.
diff --git a/drivers/scsi/bnx2i/Makefile b/drivers/scsi/bnx2i/Makefile
new file mode 100644
index 0000000..b5802bd
--- /dev/null
+++ b/drivers/scsi/bnx2i/Makefile
@@ -0,0 +1,3 @@
+bnx2i-y := bnx2i_init.o bnx2i_hwi.o bnx2i_iscsi.o bnx2i_sysfs.o
+
+obj-$(CONFIG_SCSI_BNX2_ISCSI) += bnx2i.o
diff --git a/drivers/scsi/bnx2i/bnx2i.h b/drivers/scsi/bnx2i/bnx2i.h
new file mode 100644
index 0000000..d7576f2
--- /dev/null
+++ b/drivers/scsi/bnx2i/bnx2i.h
@@ -0,0 +1,771 @@
+/* bnx2i.h: Broadcom NetXtreme II iSCSI driver.
+ *
+ * Copyright (c) 2006 - 2009 Broadcom Corporation
+ * Copyright (c) 2007, 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (c) 2007, 2008 Mike Christie
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Anil Veerabhadrappa (anilgv@broadcom.com)
+ */
+
+#ifndef _BNX2I_H_
+#define _BNX2I_H_
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/in.h>
+#include <linux/kfifo.h>
+#include <linux/netdevice.h>
+#include <linux/completion.h>
+
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_eh.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi.h>
+#include <scsi/iscsi_proto.h>
+#include <scsi/libiscsi.h>
+#include <scsi/scsi_transport_iscsi.h>
+
+#include "../../net/cnic_if.h"
+#include "57xx_iscsi_hsi.h"
+#include "57xx_iscsi_constants.h"
+
+#define BNX2_ISCSI_DRIVER_NAME		"bnx2i"
+
+#define BNX2I_MAX_ADAPTERS		8
+
+#define ISCSI_MAX_CONNS_PER_HBA		128
+#define ISCSI_MAX_SESS_PER_HBA		ISCSI_MAX_CONNS_PER_HBA
+#define ISCSI_MAX_CMDS_PER_SESS		128
+
+/* Total active commands across all connections supported by devices */
+#define ISCSI_MAX_CMDS_PER_HBA_5708	(28 * (ISCSI_MAX_CMDS_PER_SESS - 1))
+#define ISCSI_MAX_CMDS_PER_HBA_5709	(128 * (ISCSI_MAX_CMDS_PER_SESS - 1))
+#define ISCSI_MAX_CMDS_PER_HBA_57710	(256 * (ISCSI_MAX_CMDS_PER_SESS - 1))
+
+#define ISCSI_MAX_BDS_PER_CMD		32
+
+#define MAX_PAGES_PER_CTRL_STRUCT_POOL	8
+#define BNX2I_RESERVED_SLOW_PATH_CMD_SLOTS	4
+
+/* 5706/08 hardware has limit on maximum buffer size per BD it can handle */
+#define MAX_BD_LENGTH			65535
+#define BD_SPLIT_SIZE			32768
+
+/* min, max & default values for SQ/RQ/CQ size, configurable via' modparam */
+#define BNX2I_SQ_WQES_MIN 		16
+#define BNX2I_570X_SQ_WQES_MAX 		128
+#define BNX2I_5770X_SQ_WQES_MAX 	512
+#define BNX2I_570X_SQ_WQES_DEFAULT 	128
+#define BNX2I_5770X_SQ_WQES_DEFAULT 	256
+
+#define BNX2I_570X_CQ_WQES_MAX 		128
+#define BNX2I_5770X_CQ_WQES_MAX 	512
+
+#define BNX2I_RQ_WQES_MIN 		16
+#define BNX2I_RQ_WQES_MAX 		32
+#define BNX2I_RQ_WQES_DEFAULT 		16
+
+/* CCELLs per conn */
+#define BNX2I_CCELLS_MIN		16
+#define BNX2I_CCELLS_MAX		96
+#define BNX2I_CCELLS_DEFAULT		64
+
+#define ITT_INVALID_SIGNATURE		0xFFFF
+
+#define ISCSI_CMD_CLEANUP_TIMEOUT	100
+
+#define BNX2I_CONN_CTX_BUF_SIZE		16384
+
+#define BNX2I_SQ_WQE_SIZE		64
+#define BNX2I_RQ_WQE_SIZE		256
+#define BNX2I_CQE_SIZE			64
+
+#define MB_KERNEL_CTX_SHIFT		8
+#define MB_KERNEL_CTX_SIZE		(1 << MB_KERNEL_CTX_SHIFT)
+
+#define CTX_SHIFT			7
+#define GET_CID_NUM(cid_addr)		((cid_addr) >> CTX_SHIFT)
+
+#define CTX_OFFSET 			0x10000
+#define MAX_CID_CNT			0x4000
+
+/* 5709 context registers */
+#define BNX2_MQ_CONFIG2			0x00003d00
+#define BNX2_MQ_CONFIG2_CONT_SZ		(0x7L<<4)
+#define BNX2_MQ_CONFIG2_FIRST_L4L5	(0x1fL<<8)
+
+/* 57710's BAR2 is mapped to doorbell registers */
+#define BNX2X_DOORBELL_PCI_BAR		2
+#define BNX2X_MAX_CQS			8
+
+#define CNIC_ARM_CQE			1
+#define CNIC_DISARM_CQE			0
+
+#define REG_RD(__hba, offset)				\
+		readl(__hba->regview + offset)
+#define REG_WR(__hba, offset, val)			\
+		writel(val, __hba->regview + offset)
+
+
+/**
+ * struct generic_pdu_resc - login pdu resource structure
+ *
+ * @req_buf:            driver buffer used to stage payload associated with
+ *                      the login request
+ * @req_dma_addr:       dma address for iscsi login request payload buffer
+ * @req_buf_size:       actual login request payload length
+ * @req_wr_ptr:         pointer into login request buffer when next data is
+ *                      to be written
+ * @resp_hdr:           iscsi header where iscsi login response header is to
+ *                      be recreated
+ * @resp_buf:           buffer to stage login response payload
+ * @resp_dma_addr:      login response payload buffer dma address
+ * @resp_buf_size:      login response paylod length
+ * @resp_wr_ptr:        pointer into login response buffer when next data is
+ *                      to be written
+ * @req_bd_tbl:         iscsi login request payload BD table
+ * @req_bd_dma:         login request BD table dma address
+ * @resp_bd_tbl:        iscsi login response payload BD table
+ * @resp_bd_dma:        login request BD table dma address
+ *
+ * following structure defines buffer info for generic pdus such as iSCSI Login,
+ *	Logout and NOP
+ */
+struct generic_pdu_resc {
+	char *req_buf;
+	dma_addr_t req_dma_addr;
+	u32 req_buf_size;
+	char *req_wr_ptr;
+	struct iscsi_hdr resp_hdr;
+	char *resp_buf;
+	dma_addr_t resp_dma_addr;
+	u32 resp_buf_size;
+	char *resp_wr_ptr;
+	char *req_bd_tbl;
+	dma_addr_t req_bd_dma;
+	char *resp_bd_tbl;
+	dma_addr_t resp_bd_dma;
+};
+
+
+/**
+ * struct bd_resc_page - tracks DMA'able memory allocated for BD tables
+ *
+ * @link:               list head to link elements
+ * @max_ptrs:           maximun pointers that can be stored in this page
+ * @num_valid:          number of pointer valid in this page
+ * @page:               base addess for page pointer array
+ *
+ * structure to track DMA'able memory allocated for command BD tables
+ */
+struct bd_resc_page {
+	struct list_head link;
+	u32 max_ptrs;
+	u32 num_valid;
+	void *page[1];
+};
+
+
+/**
+ * struct io_bdt - I/O buffer destricptor table
+ *
+ * @bd_tbl:             BD table's virtual address
+ * @bd_tbl_dma:         BD table's dma address
+ * @bd_valid:           num valid BD entries
+ *
+ * IO BD table
+ */
+struct io_bdt {
+	struct iscsi_bd *bd_tbl;
+	dma_addr_t bd_tbl_dma;
+	u16 bd_valid;
+};
+
+
+/**
+ * bnx2i_cmd - iscsi command structure
+ *
+ * @scsi_cmd:           SCSI-ML task pointer corresponding to this iscsi cmd
+ * @sg:                 SG list
+ * @io_tbl:             buffer descriptor (BD) table
+ * @bd_tbl_dma:         buffer descriptor (BD) table's dma address
+ */
+struct bnx2i_cmd {
+	struct iscsi_hdr hdr;
+	struct bnx2i_conn *conn;
+	struct scsi_cmnd *scsi_cmd;
+	struct scatterlist *sg;
+	struct io_bdt io_tbl;
+	dma_addr_t bd_tbl_dma;
+	struct bnx2i_cmd_request req;
+};
+
+
+/**
+ * struct bnx2i_conn - iscsi connection structure
+ *
+ * @cls_conn:              pointer to iscsi cls conn
+ * @hba:                   adapter structure pointer
+ * @iscsi_conn_cid:        iscsi conn id
+ * @fw_cid:                firmware iscsi context id
+ * @ep:                    endpoint structure pointer
+ * @gen_pdu:               login/nopout/logout pdu resources
+ * @violation_notified:    bit mask used to track iscsi error/warning messages
+ *                         already printed out
+ *
+ * iSCSI connection structure
+ */
+struct bnx2i_conn {
+	struct iscsi_cls_conn *cls_conn;
+	struct bnx2i_hba *hba;
+	struct completion cmd_cleanup_cmpl;
+	int is_bound;
+
+	u32 iscsi_conn_cid;
+#define BNX2I_CID_RESERVED	0x5AFF
+	u32 fw_cid;
+
+	struct timer_list poll_timer;
+	/*
+	 * Queue Pair (QP) related structure elements.
+	 */
+	struct bnx2i_endpoint *ep;
+
+	/*
+	 * Buffer for login negotiation process
+	 */
+	struct generic_pdu_resc gen_pdu;
+	u64 violation_notified;
+};
+
+
+
+/**
+ * struct iscsi_cid_queue - Per adapter iscsi cid queue
+ *
+ * @cid_que_base:           queue base memory
+ * @cid_que:                queue memory pointer
+ * @cid_q_prod_idx:         produce index
+ * @cid_q_cons_idx:         consumer index
+ * @cid_q_max_idx:          max index. used to detect wrap around condition
+ * @cid_free_cnt:           queue size
+ * @conn_cid_tbl:           iscsi cid to conn structure mapping table
+ *
+ * Per adapter iSCSI CID Queue
+ */
+struct iscsi_cid_queue {
+	void *cid_que_base;
+	u32 *cid_que;
+	u32 cid_q_prod_idx;
+	u32 cid_q_cons_idx;
+	u32 cid_q_max_idx;
+	u32 cid_free_cnt;
+	struct bnx2i_conn **conn_cid_tbl;
+};
+
+/**
+ * struct bnx2i_hba - bnx2i adapter structure
+ *
+ * @link:                  list head to link elements
+ * @cnic:                  pointer to cnic device
+ * @pcidev:                pointer to pci dev
+ * @netdev:                pointer to netdev structure
+ * @regview:               mapped PCI register space
+ * @age:                   age, incremented by every recovery
+ * @cnic_dev_type:         cnic device type, 5706/5708/5709/57710
+ * @mail_queue_access:     mailbox queue access mode, applicable to 5709 only
+ * @reg_with_cnic:         indicates whether the device is register with CNIC
+ * @adapter_state:         adapter state, UP, GOING_DOWN, LINK_DOWN
+ * @mtu_supported:         Ethernet MTU supported
+ * @shost:                 scsi host pointer
+ * @max_sqes:              SQ size
+ * @max_rqes:              RQ size
+ * @max_cqes:              CQ size
+ * @num_ccell:             number of command cells per connection
+ * @ofld_conns_active:     active connection list
+ * @max_active_conns:      max offload connections supported by this device
+ * @cid_que:               iscsi cid queue
+ * @ep_rdwr_lock:          read / write lock to synchronize various ep lists
+ * @ep_ofld_list:          connection list for pending offload completion
+ * @ep_destroy_list:       connection list for pending offload completion
+ * @mp_bd_tbl:             BD table to be used with middle path requests
+ * @mp_bd_dma:             DMA address of 'mp_bd_tbl' memory buffer
+ * @dummy_buffer:          Dummy buffer to be used with zero length scsicmd reqs
+ * @dummy_buf_dma:         DMA address of 'dummy_buffer' memory buffer
+ * @lock:              	   lock to synchonize access to hba structure
+ * @pci_did:               PCI device ID
+ * @pci_vid:               PCI vendor ID
+ * @pci_sdid:              PCI subsystem device ID
+ * @pci_svid:              PCI subsystem vendor ID
+ * @pci_func:              PCI function number in system pci tree
+ * @pci_devno:             PCI device number in system pci tree
+ * @num_wqe_sent:          statistic counter, total wqe's sent
+ * @num_cqe_rcvd:          statistic counter, total cqe's received
+ * @num_intr_claimed:      statistic counter, total interrupts claimed
+ * @link_changed_count:    statistic counter, num of link change notifications
+ *                         received
+ * @ipaddr_changed_count:  statistic counter, num times IP address changed while
+ *                         at least one connection is offloaded
+ * @num_sess_opened:       statistic counter, total num sessions opened
+ * @num_conn_opened:       statistic counter, total num conns opened on this hba
+ * @ctx_ccell_tasks:       captures number of ccells and tasks supported by
+ *                         currently offloaded connection, used to decode
+ *                         context memory
+ *
+ * Adapter Data Structure
+ */
+struct bnx2i_hba {
+	struct list_head link;
+	struct cnic_dev *cnic;
+	struct pci_dev *pcidev;
+	struct net_device *netdev;
+	void __iomem *regview;
+
+	u32 age;
+	unsigned long cnic_dev_type;
+		#define BNX2I_NX2_DEV_5706		0x0
+		#define BNX2I_NX2_DEV_5708		0x1
+		#define BNX2I_NX2_DEV_5709		0x2
+		#define BNX2I_NX2_DEV_57710		0x3
+	u32 mail_queue_access;
+		#define BNX2I_MQ_KERNEL_MODE		0x0
+		#define BNX2I_MQ_KERNEL_BYPASS_MODE	0x1
+		#define BNX2I_MQ_BIN_MODE		0x2
+	unsigned long  reg_with_cnic;
+		#define BNX2I_CNIC_REGISTERED		1
+
+	unsigned long  adapter_state;
+		#define ADAPTER_STATE_UP		0
+		#define ADAPTER_STATE_GOING_DOWN	1
+		#define ADAPTER_STATE_LINK_DOWN		2
+		#define ADAPTER_STATE_INIT_FAILED	31
+	unsigned int mtu_supported;
+		#define BNX2I_MAX_MTU_SUPPORTED		1500
+
+	struct Scsi_Host *shost;
+
+	u32 max_sqes;
+	u32 max_rqes;
+	u32 max_cqes;
+	u32 num_ccell;
+
+	int ofld_conns_active;
+
+	int max_active_conns;
+	struct iscsi_cid_queue cid_que;
+
+	rwlock_t ep_rdwr_lock;
+	struct list_head ep_ofld_list;
+	struct list_head ep_destroy_list;
+
+	/*
+	 * BD table to be used with MP (Middle Path requests.
+	 */
+	char *mp_bd_tbl;
+	dma_addr_t mp_bd_dma;
+	char *dummy_buffer;
+	dma_addr_t dummy_buf_dma;
+
+	spinlock_t lock;	/* protects hba structure access */
+	struct mutex net_dev_lock;/* sync net device access */
+
+	/*
+	 * PCI related info.
+	 */
+	u16 pci_did;
+	u16 pci_vid;
+	u16 pci_sdid;
+	u16 pci_svid;
+	u16 pci_func;
+	u16 pci_devno;
+
+	/*
+	 * Following are a bunch of statistics useful during development
+	 * and later stage for score boarding.
+	 */
+	u32 num_wqe_sent;
+	u32 num_cqe_rcvd;
+	u32 num_intr_claimed;
+	u32 link_changed_count;
+	u32 ipaddr_changed_count;
+	u32 num_sess_opened;
+	u32 num_conn_opened;
+	unsigned int ctx_ccell_tasks;
+};
+
+
+/*******************************************************************************
+ * 	QP [ SQ / RQ / CQ ] info.
+ ******************************************************************************/
+
+/*
+ * SQ/RQ/CQ generic structure definition
+ */
+struct 	sqe {
+	u8 sqe_byte[BNX2I_SQ_WQE_SIZE];
+};
+
+struct 	rqe {
+	u8 rqe_byte[BNX2I_RQ_WQE_SIZE];
+};
+
+struct 	cqe {
+	u8 cqe_byte[BNX2I_CQE_SIZE];
+};
+
+
+enum {
+#if defined(__LITTLE_ENDIAN)
+	CNIC_EVENT_COAL_INDEX	= 0x0,
+	CNIC_SEND_DOORBELL	= 0x4,
+	CNIC_EVENT_CQ_ARM	= 0x7,
+	CNIC_RECV_DOORBELL	= 0x8
+#elif defined(__BIG_ENDIAN)
+	CNIC_EVENT_COAL_INDEX	= 0x2,
+	CNIC_SEND_DOORBELL	= 0x6,
+	CNIC_EVENT_CQ_ARM	= 0x4,
+	CNIC_RECV_DOORBELL	= 0xa
+#endif
+};
+
+
+/*
+ * CQ DB
+ */
+struct bnx2x_iscsi_cq_pend_cmpl {
+	/* CQ producer, updated by Ustorm */
+	u16 ustrom_prod;
+	/* CQ pending completion counter */
+	u16 pend_cntr;
+};
+
+
+struct bnx2i_5771x_cq_db {
+	struct bnx2x_iscsi_cq_pend_cmpl qp_pend_cmpl[BNX2X_MAX_CQS];
+	/* CQ pending completion ITT array */
+	u16 itt[BNX2X_MAX_CQS];
+	/* Cstorm CQ sequence to notify array, updated by driver */;
+	u16 sqn[BNX2X_MAX_CQS];
+	u32 reserved[4] /* 16 byte allignment */;
+};
+
+
+struct bnx2i_5771x_sq_rq_db {
+	u16 prod_idx;
+	u8 reserved0[14]; /* Pad structure size to 16 bytes */
+};
+
+
+struct bnx2i_5771x_dbell_hdr {
+	u8 header;
+	/* 1 for rx doorbell, 0 for tx doorbell */
+#define B577XX_DOORBELL_HDR_RX				(0x1<<0)
+#define B577XX_DOORBELL_HDR_RX_SHIFT			0
+	/* 0 for normal doorbell, 1 for advertise wnd doorbell */
+#define B577XX_DOORBELL_HDR_DB_TYPE			(0x1<<1)
+#define B577XX_DOORBELL_HDR_DB_TYPE_SHIFT		1
+	/* rdma tx only: DPM transaction size specifier (64/128/256/512B) */
+#define B577XX_DOORBELL_HDR_DPM_SIZE			(0x3<<2)
+#define B577XX_DOORBELL_HDR_DPM_SIZE_SHIFT		2
+	/* connection type */
+#define B577XX_DOORBELL_HDR_CONN_TYPE			(0xF<<4)
+#define B577XX_DOORBELL_HDR_CONN_TYPE_SHIFT		4
+};
+
+struct bnx2i_5771x_dbell {
+	struct bnx2i_5771x_dbell_hdr dbell;
+	u8 pad[3];
+
+};
+
+/**
+ * struct qp_info - QP (share queue region) atrributes structure
+ *
+ * @ctx_base:           ioremapped pci register base to access doorbell register
+ *                      pertaining to this offloaded connection
+ * @sq_virt:            virtual address of send queue (SQ) region
+ * @sq_phys:            DMA address of SQ memory region
+ * @sq_mem_size:        SQ size
+ * @sq_prod_qe:         SQ producer entry pointer
+ * @sq_cons_qe:         SQ consumer entry pointer
+ * @sq_first_qe:        virtaul address of first entry in SQ
+ * @sq_last_qe:         virtaul address of last entry in SQ
+ * @sq_prod_idx:        SQ producer index
+ * @sq_cons_idx:        SQ consumer index
+ * @sqe_left:           number sq entry left
+ * @sq_pgtbl_virt:      page table describing buffer consituting SQ region
+ * @sq_pgtbl_phys:      dma address of 'sq_pgtbl_virt'
+ * @sq_pgtbl_size:      SQ page table size
+ * @cq_virt:            virtual address of completion queue (CQ) region
+ * @cq_phys:            DMA address of RQ memory region
+ * @cq_mem_size:        CQ size
+ * @cq_prod_qe:         CQ producer entry pointer
+ * @cq_cons_qe:         CQ consumer entry pointer
+ * @cq_first_qe:        virtaul address of first entry in CQ
+ * @cq_last_qe:         virtaul address of last entry in CQ
+ * @cq_prod_idx:        CQ producer index
+ * @cq_cons_idx:        CQ consumer index
+ * @cqe_left:           number cq entry left
+ * @cqe_size:           size of each CQ entry
+ * @cqe_exp_seq_sn:     next expected CQE sequence number
+ * @cq_pgtbl_virt:      page table describing buffer consituting CQ region
+ * @cq_pgtbl_phys:      dma address of 'cq_pgtbl_virt'
+ * @cq_pgtbl_size:    	CQ page table size
+ * @rq_virt:            virtual address of receive queue (RQ) region
+ * @rq_phys:            DMA address of RQ memory region
+ * @rq_mem_size:        RQ size
+ * @rq_prod_qe:         RQ producer entry pointer
+ * @rq_cons_qe:         RQ consumer entry pointer
+ * @rq_first_qe:        virtaul address of first entry in RQ
+ * @rq_last_qe:         virtaul address of last entry in RQ
+ * @rq_prod_idx:        RQ producer index
+ * @rq_cons_idx:        RQ consumer index
+ * @rqe_left:           number rq entry left
+ * @rq_pgtbl_virt:      page table describing buffer consituting RQ region
+ * @rq_pgtbl_phys:      dma address of 'rq_pgtbl_virt'
+ * @rq_pgtbl_size:      RQ page table size
+ *
+ * queue pair (QP) is a per connection shared data structure which is used
+ *	to send work requests (SQ), receive completion notifications (CQ)
+ *	and receive asynchoronous / scsi sense info (RQ). 'qp_info' structure
+ *	below holds queue memory, consumer/producer indexes and page table
+ *	information
+ */
+struct qp_info {
+	void __iomem *ctx_base;
+#define DPM_TRIGER_TYPE			0x40
+
+#define BNX2I_570x_QUE_DB_SIZE		0
+#define BNX2I_5771x_QUE_DB_SIZE		16
+	struct sqe *sq_virt;
+	dma_addr_t sq_phys;
+	u32 sq_mem_size;
+
+	struct sqe *sq_prod_qe;
+	struct sqe *sq_cons_qe;
+	struct sqe *sq_first_qe;
+	struct sqe *sq_last_qe;
+	u16 sq_prod_idx;
+	u16 sq_cons_idx;
+	u32 sqe_left;
+
+	void *sq_pgtbl_virt;
+	dma_addr_t sq_pgtbl_phys;
+	u32 sq_pgtbl_size;	/* set to PAGE_SIZE for 5708 & 5709 */
+
+	struct cqe *cq_virt;
+	dma_addr_t cq_phys;
+	u32 cq_mem_size;
+
+	struct cqe *cq_prod_qe;
+	struct cqe *cq_cons_qe;
+	struct cqe *cq_first_qe;
+	struct cqe *cq_last_qe;
+	u16 cq_prod_idx;
+	u16 cq_cons_idx;
+	u32 cqe_left;
+	u32 cqe_size;
+	u32 cqe_exp_seq_sn;
+
+	void *cq_pgtbl_virt;
+	dma_addr_t cq_pgtbl_phys;
+	u32 cq_pgtbl_size;	/* set to PAGE_SIZE for 5708 & 5709 */
+
+	struct rqe *rq_virt;
+	dma_addr_t rq_phys;
+	u32 rq_mem_size;
+
+	struct rqe *rq_prod_qe;
+	struct rqe *rq_cons_qe;
+	struct rqe *rq_first_qe;
+	struct rqe *rq_last_qe;
+	u16 rq_prod_idx;
+	u16 rq_cons_idx;
+	u32 rqe_left;
+
+	void *rq_pgtbl_virt;
+	dma_addr_t rq_pgtbl_phys;
+	u32 rq_pgtbl_size;	/* set to PAGE_SIZE for 5708 & 5709 */
+};
+
+
+
+/*
+ * CID handles
+ */
+struct ep_handles {
+	u32 fw_cid;
+	u32 drv_iscsi_cid;
+	u16 pg_cid;
+	u16 rsvd;
+};
+
+
+enum {
+	EP_STATE_IDLE                   = 0x0,
+	EP_STATE_PG_OFLD_START          = 0x1,
+	EP_STATE_PG_OFLD_COMPL          = 0x2,
+	EP_STATE_OFLD_START             = 0x4,
+	EP_STATE_OFLD_COMPL             = 0x8,
+	EP_STATE_CONNECT_START          = 0x10,
+	EP_STATE_CONNECT_COMPL          = 0x20,
+	EP_STATE_ULP_UPDATE_START       = 0x40,
+	EP_STATE_ULP_UPDATE_COMPL       = 0x80,
+	EP_STATE_DISCONN_START          = 0x100,
+	EP_STATE_DISCONN_COMPL          = 0x200,
+	EP_STATE_CLEANUP_START          = 0x400,
+	EP_STATE_CLEANUP_CMPL           = 0x800,
+	EP_STATE_TCP_FIN_RCVD           = 0x1000,
+	EP_STATE_TCP_RST_RCVD           = 0x2000,
+	EP_STATE_PG_OFLD_FAILED         = 0x1000000,
+	EP_STATE_ULP_UPDATE_FAILED      = 0x2000000,
+	EP_STATE_CLEANUP_FAILED         = 0x4000000,
+	EP_STATE_OFLD_FAILED            = 0x8000000,
+	EP_STATE_CONNECT_FAILED         = 0x10000000,
+	EP_STATE_DISCONN_TIMEDOUT       = 0x20000000,
+};
+
+/**
+ * struct bnx2i_endpoint - representation of tcp connection in NX2 world
+ *
+ * @link:               list head to link elements
+ * @hba:                adapter to which this connection belongs
+ * @conn:               iscsi connection this EP is linked to
+ * @sess:               iscsi session this EP is linked to
+ * @cm_sk:              cnic sock struct
+ * @hba_age:            age to detect if 'iscsid' issues ep_disconnect()
+ *                      after HBA reset is completed by bnx2i/cnic/bnx2
+ *                      modules
+ * @state:              tracks offload connection state machine
+ * @teardown_mode:      indicates if conn teardown is abortive or orderly
+ * @qp:                 QP information
+ * @ids:                contains chip allocated *context id* & driver assigned
+ *                      *iscsi cid*
+ * @ofld_timer:         offload timer to detect timeout
+ * @ofld_wait:          wait queue
+ *
+ * Endpoint Structure - equivalent of tcp socket structure
+ */
+struct bnx2i_endpoint {
+	struct list_head link;
+	struct bnx2i_hba *hba;
+	struct bnx2i_conn *conn;
+	struct cnic_sock *cm_sk;
+	u32 hba_age;
+	u32 state;
+	unsigned long timestamp;
+	int num_active_cmds;
+
+	struct qp_info qp;
+	struct ep_handles ids;
+		#define ep_iscsi_cid	ids.drv_iscsi_cid
+		#define ep_cid		ids.fw_cid
+		#define ep_pg_cid	ids.pg_cid
+	struct timer_list ofld_timer;
+	wait_queue_head_t ofld_wait;
+};
+
+
+
+/* Global variables */
+extern unsigned int error_mask1, error_mask2;
+extern u64 iscsi_error_mask;
+extern unsigned int en_tcp_dack;
+extern unsigned int event_coal_div;
+
+extern struct scsi_transport_template *bnx2i_scsi_xport_template;
+extern struct iscsi_transport bnx2i_iscsi_transport;
+extern struct cnic_ulp_ops bnx2i_cnic_cb;
+
+extern unsigned int sq_size;
+extern unsigned int rq_size;
+
+extern struct device_attribute *bnx2i_dev_attributes[];
+
+
+
+/*
+ * Function Prototypes
+ */
+extern void bnx2i_identify_device(struct bnx2i_hba *hba);
+extern void bnx2i_register_device(struct bnx2i_hba *hba);
+
+extern void bnx2i_ulp_init(struct cnic_dev *dev);
+extern void bnx2i_ulp_exit(struct cnic_dev *dev);
+extern void bnx2i_start(void *handle);
+extern void bnx2i_stop(void *handle);
+extern void bnx2i_reg_dev_all(void);
+extern void bnx2i_unreg_dev_all(void);
+extern struct bnx2i_hba *get_adapter_list_head(void);
+
+struct bnx2i_conn *bnx2i_get_conn_from_id(struct bnx2i_hba *hba,
+					  u16 iscsi_cid);
+
+int bnx2i_alloc_ep_pool(void);
+void bnx2i_release_ep_pool(void);
+struct bnx2i_endpoint *bnx2i_ep_ofld_list_next(struct bnx2i_hba *hba);
+struct bnx2i_endpoint *bnx2i_ep_destroy_list_next(struct bnx2i_hba *hba);
+
+struct bnx2i_hba *bnx2i_find_hba_for_cnic(struct cnic_dev *cnic);
+
+struct bnx2i_hba *bnx2i_alloc_hba(struct cnic_dev *cnic);
+void bnx2i_free_hba(struct bnx2i_hba *hba);
+
+void bnx2i_get_rq_buf(struct bnx2i_conn *conn, char *ptr, int len);
+void bnx2i_put_rq_buf(struct bnx2i_conn *conn, int count);
+
+void bnx2i_iscsi_unmap_sg_list(struct bnx2i_cmd *cmd);
+
+void bnx2i_drop_session(struct iscsi_cls_session *session);
+
+extern int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba);
+extern int bnx2i_send_iscsi_login(struct bnx2i_conn *conn,
+				  struct iscsi_task *mtask);
+extern int bnx2i_send_iscsi_tmf(struct bnx2i_conn *conn,
+				  struct iscsi_task *mtask);
+extern int bnx2i_send_iscsi_scsicmd(struct bnx2i_conn *conn,
+				    struct bnx2i_cmd *cmnd);
+extern int bnx2i_send_iscsi_nopout(struct bnx2i_conn *conn,
+				   struct iscsi_task *mtask, u32 ttt,
+				   char *datap, int data_len, int unsol);
+extern int bnx2i_send_iscsi_logout(struct bnx2i_conn *conn,
+				   struct iscsi_task *mtask);
+extern void bnx2i_send_cmd_cleanup_req(struct bnx2i_hba *hba,
+				       struct bnx2i_cmd *cmd);
+extern void bnx2i_send_conn_ofld_req(struct bnx2i_hba *hba,
+				     struct bnx2i_endpoint *ep);
+extern void bnx2i_update_iscsi_conn(struct iscsi_conn *conn);
+extern void bnx2i_send_conn_destroy(struct bnx2i_hba *hba,
+				    struct bnx2i_endpoint *ep);
+
+extern int bnx2i_alloc_qp_resc(struct bnx2i_hba *hba,
+			       struct bnx2i_endpoint *ep);
+extern void bnx2i_free_qp_resc(struct bnx2i_hba *hba,
+			       struct bnx2i_endpoint *ep);
+extern void bnx2i_ep_ofld_timer(unsigned long data);
+extern struct bnx2i_endpoint *bnx2i_find_ep_in_ofld_list(
+		struct bnx2i_hba *hba, u32 iscsi_cid);
+extern struct bnx2i_endpoint *bnx2i_find_ep_in_destroy_list(
+		struct bnx2i_hba *hba, u32 iscsi_cid);
+
+extern int bnx2i_map_ep_dbell_regs(struct bnx2i_endpoint *ep);
+extern void bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action);
+
+/* Debug related function prototypes */
+extern void bnx2i_print_pend_cmd_queue(struct bnx2i_conn *conn);
+extern void bnx2i_print_active_cmd_queue(struct bnx2i_conn *conn);
+extern void bnx2i_print_xmit_pdu_queue(struct bnx2i_conn *conn);
+extern void bnx2i_print_recv_state(struct bnx2i_conn *conn);
+
+#endif
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
new file mode 100644
index 0000000..906cef5
--- /dev/null
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -0,0 +1,2405 @@
+/* bnx2i_hwi.c: Broadcom NetXtreme II iSCSI driver.
+ *
+ * Copyright (c) 2006 - 2009 Broadcom Corporation
+ * Copyright (c) 2007, 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (c) 2007, 2008 Mike Christie
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Anil Veerabhadrappa (anilgv@broadcom.com)
+ */
+
+#include <scsi/scsi_tcq.h>
+#include <scsi/libiscsi.h>
+#include "bnx2i.h"
+
+/**
+ * bnx2i_get_cid_num - get cid from ep
+ * @ep: 	endpoint pointer
+ *
+ * Only applicable to 57710 family of devices
+ */
+static u32 bnx2i_get_cid_num(struct bnx2i_endpoint *ep)
+{
+	u32 cid;
+
+	if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type))
+		cid = ep->ep_cid;
+	else
+		cid = GET_CID_NUM(ep->ep_cid);
+	return cid;
+}
+
+
+/**
+ * bnx2i_adjust_qp_size - Adjust SQ/RQ/CQ size for 57710 device type
+ * @hba: 		Adapter for which adjustments is to be made
+ *
+ * Only applicable to 57710 family of devices
+ */
+static void bnx2i_adjust_qp_size(struct bnx2i_hba *hba)
+{
+	u32 num_elements_per_pg;
+
+	if (test_bit(BNX2I_NX2_DEV_5706, &hba->cnic_dev_type) ||
+	    test_bit(BNX2I_NX2_DEV_5708, &hba->cnic_dev_type) ||
+	    test_bit(BNX2I_NX2_DEV_5709, &hba->cnic_dev_type)) {
+		if (!is_power_of_2(hba->max_sqes))
+			hba->max_sqes = rounddown_pow_of_two(hba->max_sqes);
+
+		if (!is_power_of_2(hba->max_rqes))
+			hba->max_rqes = rounddown_pow_of_two(hba->max_rqes);
+	}
+
+	/* Adjust each queue size if the user selection does not
+	 * yield integral num of page buffers
+	 */
+	/* adjust SQ */
+	num_elements_per_pg = PAGE_SIZE / BNX2I_SQ_WQE_SIZE;
+	if (hba->max_sqes < num_elements_per_pg)
+		hba->max_sqes = num_elements_per_pg;
+	else if (hba->max_sqes % num_elements_per_pg)
+		hba->max_sqes = (hba->max_sqes + num_elements_per_pg - 1) &
+				 ~(num_elements_per_pg - 1);
+
+	/* adjust CQ */
+	num_elements_per_pg = PAGE_SIZE / BNX2I_CQE_SIZE;
+	if (hba->max_cqes < num_elements_per_pg)
+		hba->max_cqes = num_elements_per_pg;
+	else if (hba->max_cqes % num_elements_per_pg)
+		hba->max_cqes = (hba->max_cqes + num_elements_per_pg - 1) &
+				 ~(num_elements_per_pg - 1);
+
+	/* adjust RQ */
+	num_elements_per_pg = PAGE_SIZE / BNX2I_RQ_WQE_SIZE;
+	if (hba->max_rqes < num_elements_per_pg)
+		hba->max_rqes = num_elements_per_pg;
+	else if (hba->max_rqes % num_elements_per_pg)
+		hba->max_rqes = (hba->max_rqes + num_elements_per_pg - 1) &
+				 ~(num_elements_per_pg - 1);
+}
+
+
+/**
+ * bnx2i_get_link_state - get network interface link state
+ * @hba:	adapter instance pointer
+ *
+ * updates adapter structure flag based on netdev state
+ */
+static void bnx2i_get_link_state(struct bnx2i_hba *hba)
+{
+	if (test_bit(__LINK_STATE_NOCARRIER, &hba->netdev->state))
+		set_bit(ADAPTER_STATE_LINK_DOWN, &hba->adapter_state);
+	else
+		clear_bit(ADAPTER_STATE_LINK_DOWN, &hba->adapter_state);
+}
+
+
+/**
+ * bnx2i_iscsi_license_error - displays iscsi license related error message
+ * @hba:		adapter instance pointer
+ * @error_code:		error classification
+ *
+ * Puts out an error log when driver is unable to offload iscsi connection
+ *	due to license restrictions
+ */
+static void bnx2i_iscsi_license_error(struct bnx2i_hba *hba, u32 error_code)
+{
+	if (error_code == ISCSI_KCQE_COMPLETION_STATUS_ISCSI_NOT_SUPPORTED)
+		/* iSCSI offload not supported on this device */
+		printk(KERN_ERR "bnx2i: iSCSI not supported, dev=%s\n",
+				hba->netdev->name);
+	if (error_code == ISCSI_KCQE_COMPLETION_STATUS_LOM_ISCSI_NOT_ENABLED)
+		/* iSCSI offload not supported on this LOM device */
+		printk(KERN_ERR "bnx2i: LOM is not enable to "
+				"offload iSCSI connections, dev=%s\n",
+				hba->netdev->name);
+	set_bit(ADAPTER_STATE_INIT_FAILED, &hba->adapter_state);
+}
+
+
+/**
+ * bnx2i_arm_cq_event_coalescing - arms CQ to enable EQ notification
+ * @ep:		endpoint (transport indentifier) structure
+ * @action:	action, ARM or DISARM. For now only ARM_CQE is used
+ *
+ * Arm'ing CQ will enable chip to generate global EQ events inorder to interrupt
+ *	the driver. EQ event is generated CQ index is hit or at least 1 CQ is
+ *	outstanding and on chip timer expires
+ */
+void bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action)
+{
+	struct bnx2i_5771x_cq_db *cq_db;
+	u16 cq_index;
+
+	if (!test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type))
+		return;
+
+	if (action == CNIC_ARM_CQE) {
+		cq_index = ep->qp.cqe_exp_seq_sn +
+			   ep->num_active_cmds / event_coal_div;
+		cq_index %= (ep->qp.cqe_size * 2 + 1);
+		if (!cq_index) {
+			cq_index = 1;
+			cq_db = (struct bnx2i_5771x_cq_db *)
+					ep->qp.cq_pgtbl_virt;
+			cq_db->sqn[0] = cq_index;
+		}
+	}
+}
+
+
+/**
+ * bnx2i_get_rq_buf - copy RQ buffer contents to driver buffer
+ * @conn:		iscsi connection on which RQ event occured
+ * @ptr:		driver buffer to which RQ buffer contents is to
+ *			be copied
+ * @len:		length of valid data inside RQ buf
+ *
+ * Copies RQ buffer contents from shared (DMA'able) memory region to
+ *	driver buffer. RQ is used to DMA unsolicitated iscsi pdu's and
+ *	scsi sense info
+ */
+void bnx2i_get_rq_buf(struct bnx2i_conn *bnx2i_conn, char *ptr, int len)
+{
+	if (!bnx2i_conn->ep->qp.rqe_left)
+		return;
+
+	bnx2i_conn->ep->qp.rqe_left--;
+	memcpy(ptr, (u8 *) bnx2i_conn->ep->qp.rq_cons_qe, len);
+	if (bnx2i_conn->ep->qp.rq_cons_qe == bnx2i_conn->ep->qp.rq_last_qe) {
+		bnx2i_conn->ep->qp.rq_cons_qe = bnx2i_conn->ep->qp.rq_first_qe;
+		bnx2i_conn->ep->qp.rq_cons_idx = 0;
+	} else {
+		bnx2i_conn->ep->qp.rq_cons_qe++;
+		bnx2i_conn->ep->qp.rq_cons_idx++;
+	}
+}
+
+
+static void bnx2i_ring_577xx_doorbell(struct bnx2i_conn *conn)
+{
+	struct bnx2i_5771x_dbell dbell;
+	u32 msg;
+
+	memset(&dbell, 0, sizeof(dbell));
+	dbell.dbell.header = (B577XX_ISCSI_CONNECTION_TYPE <<
+			      B577XX_DOORBELL_HDR_CONN_TYPE_SHIFT);
+	msg = *((u32 *)&dbell);
+	/* TODO : get doorbell register mapping */
+	writel(cpu_to_le32(msg), conn->ep->qp.ctx_base);
+}
+
+
+/**
+ * bnx2i_put_rq_buf - Replenish RQ buffer, if required ring on chip doorbell
+ * @conn:	iscsi connection on which event to post
+ * @count:	number of RQ buffer being posted to chip
+ *
+ * No need to ring hardware doorbell for 57710 family of devices
+ */
+void bnx2i_put_rq_buf(struct bnx2i_conn *bnx2i_conn, int count)
+{
+	struct bnx2i_5771x_sq_rq_db *rq_db;
+	u16 hi_bit = (bnx2i_conn->ep->qp.rq_prod_idx & 0x8000);
+	struct bnx2i_endpoint *ep = bnx2i_conn->ep;
+
+	ep->qp.rqe_left += count;
+	ep->qp.rq_prod_idx &= 0x7FFF;
+	ep->qp.rq_prod_idx += count;
+
+	if (ep->qp.rq_prod_idx > bnx2i_conn->hba->max_rqes) {
+		ep->qp.rq_prod_idx %= bnx2i_conn->hba->max_rqes;
+		if (!hi_bit)
+			ep->qp.rq_prod_idx |= 0x8000;
+	} else
+		ep->qp.rq_prod_idx |= hi_bit;
+
+	if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type)) {
+		rq_db = (struct bnx2i_5771x_sq_rq_db *) ep->qp.rq_pgtbl_virt;
+		rq_db->prod_idx = ep->qp.rq_prod_idx;
+		/* no need to ring hardware doorbell for 57710 */
+	} else {
+		writew(ep->qp.rq_prod_idx,
+		       ep->qp.ctx_base + CNIC_RECV_DOORBELL);
+	}
+	mmiowb();
+}
+
+
+/**
+ * bnx2i_ring_sq_dbell - Ring SQ doorbell to wake-up the processing engine
+ * @conn: 		iscsi connection to which new SQ entries belong
+ * @count: 		number of SQ WQEs to post
+ *
+ * SQ DB is updated in host memory and TX Doorbell is rung for 57710 family
+ *	of devices. For 5706/5708/5709 new SQ WQE count is written into the
+ *	doorbell register
+ */
+static void bnx2i_ring_sq_dbell(struct bnx2i_conn *bnx2i_conn, int count)
+{
+	struct bnx2i_5771x_sq_rq_db *sq_db;
+	struct bnx2i_endpoint *ep = bnx2i_conn->ep;
+
+	ep->num_active_cmds++;
+	wmb();	/* flush SQ WQE memory before the doorbell is rung */
+	if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type)) {
+		sq_db = (struct bnx2i_5771x_sq_rq_db *) ep->qp.sq_pgtbl_virt;
+		sq_db->prod_idx = ep->qp.sq_prod_idx;
+		bnx2i_ring_577xx_doorbell(bnx2i_conn);
+	} else
+		writew(count, ep->qp.ctx_base + CNIC_SEND_DOORBELL);
+
+	mmiowb(); /* flush posted PCI writes */
+}
+
+
+/**
+ * bnx2i_ring_dbell_update_sq_params - update SQ driver parameters
+ * @conn:	iscsi connection to which new SQ entries belong
+ * @count:	number of SQ WQEs to post
+ *
+ * this routine will update SQ driver parameters and ring the doorbell
+ */
+static void bnx2i_ring_dbell_update_sq_params(struct bnx2i_conn *bnx2i_conn,
+					      int count)
+{
+	int tmp_cnt;
+
+	if (count == 1) {
+		if (bnx2i_conn->ep->qp.sq_prod_qe ==
+		    bnx2i_conn->ep->qp.sq_last_qe)
+			bnx2i_conn->ep->qp.sq_prod_qe =
+						bnx2i_conn->ep->qp.sq_first_qe;
+		else
+			bnx2i_conn->ep->qp.sq_prod_qe++;
+	} else {
+		if ((bnx2i_conn->ep->qp.sq_prod_qe + count) <=
+		    bnx2i_conn->ep->qp.sq_last_qe)
+			bnx2i_conn->ep->qp.sq_prod_qe += count;
+		else {
+			tmp_cnt = bnx2i_conn->ep->qp.sq_last_qe -
+				bnx2i_conn->ep->qp.sq_prod_qe;
+			bnx2i_conn->ep->qp.sq_prod_qe =
+				&bnx2i_conn->ep->qp.sq_first_qe[count -
+								(tmp_cnt + 1)];
+		}
+	}
+	bnx2i_conn->ep->qp.sq_prod_idx += count;
+	/* Ring the doorbell */
+	bnx2i_ring_sq_dbell(bnx2i_conn, bnx2i_conn->ep->qp.sq_prod_idx);
+}
+
+
+/**
+ * bnx2i_send_iscsi_login - post iSCSI login request MP WQE to hardware
+ * @conn:	iscsi connection
+ * @cmd:	driver command structure which is requesting
+ *		a WQE to sent to chip for further processing
+ *
+ * prepare and post an iSCSI Login request WQE to CNIC firmware
+ */
+int bnx2i_send_iscsi_login(struct bnx2i_conn *bnx2i_conn,
+			   struct iscsi_task *task)
+{
+	struct bnx2i_cmd *bnx2i_cmd;
+	struct bnx2i_login_request *login_wqe;
+	struct iscsi_login *login_hdr;
+	u32 dword;
+
+	bnx2i_cmd = (struct bnx2i_cmd *)task->dd_data;
+	login_hdr = (struct iscsi_login *)task->hdr;
+	login_wqe = (struct bnx2i_login_request *)
+						bnx2i_conn->ep->qp.sq_prod_qe;
+
+	login_wqe->op_code = login_hdr->opcode;
+	login_wqe->op_attr = login_hdr->flags;
+	login_wqe->version_max = login_hdr->max_version;
+	login_wqe->version_min = login_hdr->min_version;
+	login_wqe->data_length = ntoh24(login_hdr->dlength);
+	login_wqe->isid_lo = *((u32 *) login_hdr->isid);
+	login_wqe->isid_hi = *((u16 *) login_hdr->isid + 2);
+	login_wqe->tsih = login_hdr->tsih;
+	login_wqe->itt = task->itt |
+		(ISCSI_TASK_TYPE_MPATH << ISCSI_LOGIN_REQUEST_TYPE_SHIFT);
+	login_wqe->cid = login_hdr->cid;
+
+	login_wqe->cmd_sn = be32_to_cpu(login_hdr->cmdsn);
+	login_wqe->exp_stat_sn = be32_to_cpu(login_hdr->exp_statsn);
+
+	login_wqe->resp_bd_list_addr_lo = (u32) bnx2i_conn->gen_pdu.resp_bd_dma;
+	login_wqe->resp_bd_list_addr_hi =
+		(u32) ((u64) bnx2i_conn->gen_pdu.resp_bd_dma >> 32);
+
+	dword = ((1 << ISCSI_LOGIN_REQUEST_NUM_RESP_BDS_SHIFT) |
+		 (bnx2i_conn->gen_pdu.resp_buf_size <<
+		  ISCSI_LOGIN_REQUEST_RESP_BUFFER_LENGTH_SHIFT));
+	login_wqe->resp_buffer = dword;
+	login_wqe->flags = 0;
+	login_wqe->bd_list_addr_lo = (u32) bnx2i_conn->gen_pdu.req_bd_dma;
+	login_wqe->bd_list_addr_hi =
+		(u32) ((u64) bnx2i_conn->gen_pdu.req_bd_dma >> 32);
+	login_wqe->num_bds = 1;
+	login_wqe->cq_index = 0; /* CQ# used for completion, 5771x only */
+
+	bnx2i_ring_dbell_update_sq_params(bnx2i_conn, 1);
+	return 0;
+}
+
+/**
+ * bnx2i_send_iscsi_tmf - post iSCSI task management request MP WQE to hardware
+ * @conn:	iscsi connection
+ * @mtask:	driver command structure which is requesting
+ *		a WQE to sent to chip for further processing
+ *
+ * prepare and post an iSCSI Login request WQE to CNIC firmware
+ */
+int bnx2i_send_iscsi_tmf(struct bnx2i_conn *bnx2i_conn,
+			 struct iscsi_task *mtask)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct iscsi_tm *tmfabort_hdr;
+	struct scsi_cmnd *ref_sc;
+	struct iscsi_task *ctask;
+	struct bnx2i_cmd *bnx2i_cmd;
+	struct bnx2i_tmf_request *tmfabort_wqe;
+	u32 dword;
+
+	bnx2i_cmd = (struct bnx2i_cmd *)mtask->dd_data;
+	tmfabort_hdr = (struct iscsi_tm *)mtask->hdr;
+	tmfabort_wqe = (struct bnx2i_tmf_request *)
+						bnx2i_conn->ep->qp.sq_prod_qe;
+
+	tmfabort_wqe->op_code = tmfabort_hdr->opcode;
+	tmfabort_wqe->op_attr = 0;
+	tmfabort_wqe->op_attr =
+		ISCSI_TMF_REQUEST_ALWAYS_ONE | ISCSI_TM_FUNC_ABORT_TASK;
+	tmfabort_wqe->lun[0] = be32_to_cpu(tmfabort_hdr->lun[0]);
+	tmfabort_wqe->lun[1] = be32_to_cpu(tmfabort_hdr->lun[1]);
+
+	tmfabort_wqe->itt = (mtask->itt | (ISCSI_TASK_TYPE_MPATH << 14));
+	tmfabort_wqe->reserved2 = 0;
+	tmfabort_wqe->cmd_sn = be32_to_cpu(tmfabort_hdr->cmdsn);
+
+	ctask = iscsi_itt_to_task(conn, tmfabort_hdr->rtt);
+	if (!ctask || ctask->sc)
+		/*
+		 * the iscsi layer must have completed the cmd while this
+		 * was starting up.
+		 */
+		return 0;
+	ref_sc = ctask->sc;
+
+	if (ref_sc->sc_data_direction == DMA_TO_DEVICE)
+		dword = (ISCSI_TASK_TYPE_WRITE << ISCSI_CMD_REQUEST_TYPE_SHIFT);
+	else
+		dword = (ISCSI_TASK_TYPE_READ << ISCSI_CMD_REQUEST_TYPE_SHIFT);
+	tmfabort_wqe->ref_itt = (dword | tmfabort_hdr->rtt);
+	tmfabort_wqe->ref_cmd_sn = be32_to_cpu(tmfabort_hdr->refcmdsn);
+
+	tmfabort_wqe->bd_list_addr_lo = (u32) bnx2i_conn->hba->mp_bd_dma;
+	tmfabort_wqe->bd_list_addr_hi = (u32)
+				((u64) bnx2i_conn->hba->mp_bd_dma >> 32);
+	tmfabort_wqe->num_bds = 1;
+	tmfabort_wqe->cq_index = 0; /* CQ# used for completion, 5771x only */
+
+	bnx2i_ring_dbell_update_sq_params(bnx2i_conn, 1);
+	return 0;
+}
+
+/**
+ * bnx2i_send_iscsi_scsicmd - post iSCSI scsicmd request WQE to hardware
+ * @conn:	iscsi connection
+ * @cmd:	driver command structure which is requesting
+ *		a WQE to sent to chip for further processing
+ *
+ * prepare and post an iSCSI SCSI-CMD request WQE to CNIC firmware
+ */
+int bnx2i_send_iscsi_scsicmd(struct bnx2i_conn *bnx2i_conn,
+			     struct bnx2i_cmd *cmd)
+{
+	struct bnx2i_cmd_request *scsi_cmd_wqe;
+
+	scsi_cmd_wqe = (struct bnx2i_cmd_request *)
+						bnx2i_conn->ep->qp.sq_prod_qe;
+	memcpy(scsi_cmd_wqe, &cmd->req, sizeof(struct bnx2i_cmd_request));
+	scsi_cmd_wqe->cq_index = 0; /* CQ# used for completion, 5771x only */
+
+	bnx2i_ring_dbell_update_sq_params(bnx2i_conn, 1);
+	return 0;
+}
+
+/**
+ * bnx2i_send_iscsi_nopout - post iSCSI NOPOUT request WQE to hardware
+ * @conn:		iscsi connection
+ * @cmd:		driver command structure which is requesting
+ *			a WQE to sent to chip for further processing
+ * @ttt:		TTT to be used when building pdu header
+ * @datap:		payload buffer pointer
+ * @data_len:		payload data length
+ * @unsol:		indicated whether nopout pdu is unsolicited pdu or
+ *			in response to target's NOPIN w/ TTT != FFFFFFFF
+ *
+ * prepare and post a nopout request WQE to CNIC firmware
+ */
+int bnx2i_send_iscsi_nopout(struct bnx2i_conn *bnx2i_conn,
+			    struct iscsi_task *task, u32 ttt,
+			    char *datap, int data_len, int unsol)
+{
+	struct bnx2i_endpoint *ep = bnx2i_conn->ep;
+	struct bnx2i_cmd *bnx2i_cmd;
+	struct bnx2i_nop_out_request *nopout_wqe;
+	struct iscsi_nopout *nopout_hdr;
+
+	bnx2i_cmd = (struct bnx2i_cmd *)task->dd_data;
+	nopout_hdr = (struct iscsi_nopout *)task->hdr;
+	nopout_wqe = (struct bnx2i_nop_out_request *)ep->qp.sq_prod_qe;
+	nopout_wqe->op_code = nopout_hdr->opcode;
+	nopout_wqe->op_attr = ISCSI_FLAG_CMD_FINAL;
+	memcpy(nopout_wqe->lun, nopout_hdr->lun, 8);
+
+	if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type)) {
+		u32 tmp = nopout_hdr->lun[0];
+		/* 57710 requires LUN field to be swapped */
+		nopout_hdr->lun[0] = nopout_hdr->lun[1];
+		nopout_hdr->lun[1] = tmp;
+	}
+
+	nopout_wqe->itt = ((u16)task->itt |
+			   (ISCSI_TASK_TYPE_MPATH <<
+			    ISCSI_TMF_REQUEST_TYPE_SHIFT));
+	nopout_wqe->ttt = ttt;
+	nopout_wqe->flags = 0;
+	if (!unsol)
+		nopout_wqe->flags = ISCSI_NOP_OUT_REQUEST_LOCAL_COMPLETION;
+	else if (nopout_hdr->itt == RESERVED_ITT)
+		nopout_wqe->flags = ISCSI_NOP_OUT_REQUEST_LOCAL_COMPLETION;
+
+	nopout_wqe->cmd_sn = be32_to_cpu(nopout_hdr->cmdsn);
+	nopout_wqe->data_length = data_len;
+	if (data_len) {
+		/* handle payload data, not required in first release */
+		printk(KERN_ALERT "NOPOUT: WARNING!! payload len != 0\n");
+	} else {
+		nopout_wqe->bd_list_addr_lo = (u32)
+					bnx2i_conn->hba->mp_bd_dma;
+		nopout_wqe->bd_list_addr_hi =
+			(u32) ((u64) bnx2i_conn->hba->mp_bd_dma >> 32);
+		nopout_wqe->num_bds = 1;
+	}
+	nopout_wqe->cq_index = 0; /* CQ# used for completion, 5771x only */
+
+	bnx2i_ring_dbell_update_sq_params(bnx2i_conn, 1);
+	return 0;
+}
+
+
+/**
+ * bnx2i_send_iscsi_logout - post iSCSI logout request WQE to hardware
+ * @conn:	iscsi connection
+ * @cmd:	driver command structure which is requesting
+ *		a WQE to sent to chip for further processing
+ *
+ * prepare and post logout request WQE to CNIC firmware
+ */
+int bnx2i_send_iscsi_logout(struct bnx2i_conn *bnx2i_conn,
+			    struct iscsi_task *task)
+{
+	struct bnx2i_cmd *bnx2i_cmd;
+	struct bnx2i_logout_request *logout_wqe;
+	struct iscsi_logout *logout_hdr;
+
+	bnx2i_cmd = (struct bnx2i_cmd *)task->dd_data;
+	logout_hdr = (struct iscsi_logout *)task->hdr;
+
+	logout_wqe = (struct bnx2i_logout_request *)
+						bnx2i_conn->ep->qp.sq_prod_qe;
+	memset(logout_wqe, 0x00, sizeof(struct bnx2i_logout_request));
+
+	logout_wqe->op_code = logout_hdr->opcode;
+	logout_wqe->cmd_sn = be32_to_cpu(logout_hdr->cmdsn);
+	logout_wqe->op_attr =
+			logout_hdr->flags | ISCSI_LOGOUT_REQUEST_ALWAYS_ONE;
+	logout_wqe->itt = ((u16)task->itt |
+			   (ISCSI_TASK_TYPE_MPATH <<
+			    ISCSI_LOGOUT_REQUEST_TYPE_SHIFT));
+	logout_wqe->data_length = 0;
+	logout_wqe->cid = 0;
+
+	logout_wqe->bd_list_addr_lo = (u32) bnx2i_conn->hba->mp_bd_dma;
+	logout_wqe->bd_list_addr_hi = (u32)
+				((u64) bnx2i_conn->hba->mp_bd_dma >> 32);
+	logout_wqe->num_bds = 1;
+	logout_wqe->cq_index = 0; /* CQ# used for completion, 5771x only */
+
+	bnx2i_ring_dbell_update_sq_params(bnx2i_conn, 1);
+	return 0;
+}
+
+
+/**
+ * bnx2i_update_iscsi_conn - post iSCSI logout request WQE to hardware
+ * @conn:	iscsi connection which requires iscsi parameter update
+ *
+ * sends down iSCSI Conn Update request to move iSCSI conn to FFP
+ */
+void bnx2i_update_iscsi_conn(struct iscsi_conn *conn)
+{
+	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
+	struct bnx2i_hba *hba = bnx2i_conn->hba;
+	struct kwqe *kwqe_arr[2];
+	struct iscsi_kwqe_conn_update *update_wqe;
+	struct iscsi_kwqe_conn_update conn_update_kwqe;
+
+	update_wqe = &conn_update_kwqe;
+
+	update_wqe->hdr.op_code = ISCSI_KWQE_OPCODE_UPDATE_CONN;
+	update_wqe->hdr.flags =
+		(ISCSI_KWQE_LAYER_CODE << ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT);
+
+	/* 5771x requires conn context id to be passed as is */
+	if (test_bit(BNX2I_NX2_DEV_57710, &bnx2i_conn->ep->hba->cnic_dev_type))
+		update_wqe->context_id = bnx2i_conn->ep->ep_cid;
+	else
+		update_wqe->context_id = (bnx2i_conn->ep->ep_cid >> 7);
+	update_wqe->conn_flags = 0;
+	if (conn->hdrdgst_en)
+		update_wqe->conn_flags |= ISCSI_KWQE_CONN_UPDATE_HEADER_DIGEST;
+	if (conn->datadgst_en)
+		update_wqe->conn_flags |= ISCSI_KWQE_CONN_UPDATE_DATA_DIGEST;
+	if (conn->session->initial_r2t_en)
+		update_wqe->conn_flags |= ISCSI_KWQE_CONN_UPDATE_INITIAL_R2T;
+	if (conn->session->imm_data_en)
+		update_wqe->conn_flags |= ISCSI_KWQE_CONN_UPDATE_IMMEDIATE_DATA;
+
+	update_wqe->max_send_pdu_length = conn->max_xmit_dlength;
+	update_wqe->max_recv_pdu_length = conn->max_recv_dlength;
+	update_wqe->first_burst_length = conn->session->first_burst;
+	update_wqe->max_burst_length = conn->session->max_burst;
+	update_wqe->exp_stat_sn = conn->exp_statsn;
+	update_wqe->max_outstanding_r2ts = conn->session->max_r2t;
+	update_wqe->session_error_recovery_level = conn->session->erl;
+	iscsi_conn_printk(KERN_ALERT, conn,
+			  "bnx2i: conn update - MBL 0x%x FBL 0x%x"
+			  "MRDSL_I 0x%x MRDSL_T 0x%x \n",
+			  update_wqe->max_burst_length,
+			  update_wqe->first_burst_length,
+			  update_wqe->max_recv_pdu_length,
+			  update_wqe->max_send_pdu_length);
+
+	kwqe_arr[0] = (struct kwqe *) update_wqe;
+	if (hba->cnic && hba->cnic->submit_kwqes)
+		hba->cnic->submit_kwqes(hba->cnic, kwqe_arr, 1);
+}
+
+
+/**
+ * bnx2i_ep_ofld_timer - post iSCSI logout request WQE to hardware
+ * @data:	endpoint (transport handle) structure pointer
+ *
+ * routine to handle connection offload/destroy request timeout
+ */
+void bnx2i_ep_ofld_timer(unsigned long data)
+{
+	struct bnx2i_endpoint *ep = (struct bnx2i_endpoint *) data;
+
+	if (ep->state == EP_STATE_OFLD_START) {
+		printk(KERN_ALERT "ofld_timer: CONN_OFLD timeout\n");
+		ep->state = EP_STATE_OFLD_FAILED;
+	} else if (ep->state == EP_STATE_DISCONN_START) {
+		printk(KERN_ALERT "ofld_timer: CONN_DISCON timeout\n");
+		ep->state = EP_STATE_DISCONN_TIMEDOUT;
+	} else if (ep->state == EP_STATE_CLEANUP_START) {
+		printk(KERN_ALERT "ofld_timer: CONN_CLEANUP timeout\n");
+		ep->state = EP_STATE_CLEANUP_FAILED;
+	}
+
+	wake_up_interruptible(&ep->ofld_wait);
+}
+
+
+static int bnx2i_power_of2(u32 val)
+{
+	u32 power = 0;
+	if (val & (val - 1))
+		return power;
+	val--;
+	while (val) {
+		val = val >> 1;
+		power++;
+	}
+	return power;
+}
+
+
+/**
+ * bnx2i_send_cmd_cleanup_req - send iscsi cmd context clean-up request
+ * @hba:	adapter structure pointer
+ * @cmd:	driver command structure which is requesting
+ *		a WQE to sent to chip for further processing
+ *
+ * prepares and posts CONN_OFLD_REQ1/2 KWQE
+ */
+void bnx2i_send_cmd_cleanup_req(struct bnx2i_hba *hba, struct bnx2i_cmd *cmd)
+{
+	struct bnx2i_cleanup_request *cmd_cleanup;
+
+	cmd_cleanup =
+		(struct bnx2i_cleanup_request *)cmd->conn->ep->qp.sq_prod_qe;
+	memset(cmd_cleanup, 0x00, sizeof(struct bnx2i_cleanup_request));
+
+	cmd_cleanup->op_code = ISCSI_OPCODE_CLEANUP_REQUEST;
+	cmd_cleanup->itt = cmd->req.itt;
+	cmd_cleanup->cq_index = 0; /* CQ# used for completion, 5771x only */
+
+	bnx2i_ring_dbell_update_sq_params(cmd->conn, 1);
+}
+
+
+/**
+ * bnx2i_send_conn_destroy - initiates iscsi connection teardown process
+ * @hba:	adapter structure pointer
+ * @ep:		endpoint (transport indentifier) structure
+ *
+ * this routine prepares and posts CONN_OFLD_REQ1/2 KWQE to initiate
+ * 	iscsi connection context clean-up process
+ */
+void bnx2i_send_conn_destroy(struct bnx2i_hba *hba, struct bnx2i_endpoint *ep)
+{
+	struct kwqe *kwqe_arr[2];
+	struct iscsi_kwqe_conn_destroy conn_cleanup;
+
+	memset(&conn_cleanup, 0x00, sizeof(struct iscsi_kwqe_conn_destroy));
+
+	conn_cleanup.hdr.op_code = ISCSI_KWQE_OPCODE_DESTROY_CONN;
+	conn_cleanup.hdr.flags =
+		(ISCSI_KWQE_LAYER_CODE << ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT);
+	/* 5771x requires conn context id to be passed as is */
+	if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type))
+		conn_cleanup.context_id = ep->ep_cid;
+	else
+		conn_cleanup.context_id = (ep->ep_cid >> 7);
+
+	conn_cleanup.reserved0 = (u16)ep->ep_iscsi_cid;
+
+	kwqe_arr[0] = (struct kwqe *) &conn_cleanup;
+	if (hba->cnic && hba->cnic->submit_kwqes)
+		hba->cnic->submit_kwqes(hba->cnic, kwqe_arr, 1);
+}
+
+
+/**
+ * bnx2i_570x_send_conn_ofld_req - initiates iscsi conn context setup process
+ * @hba: 		adapter structure pointer
+ * @ep: 		endpoint (transport indentifier) structure
+ *
+ * 5706/5708/5709 specific - prepares and posts CONN_OFLD_REQ1/2 KWQE
+ */
+static void bnx2i_570x_send_conn_ofld_req(struct bnx2i_hba *hba,
+					  struct bnx2i_endpoint *ep)
+{
+	struct kwqe *kwqe_arr[2];
+	struct iscsi_kwqe_conn_offload1 ofld_req1;
+	struct iscsi_kwqe_conn_offload2 ofld_req2;
+	dma_addr_t dma_addr;
+	int num_kwqes = 2;
+	u32 *ptbl;
+
+	ofld_req1.hdr.op_code = ISCSI_KWQE_OPCODE_OFFLOAD_CONN1;
+	ofld_req1.hdr.flags =
+		(ISCSI_KWQE_LAYER_CODE << ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT);
+
+	ofld_req1.iscsi_conn_id = (u16) ep->ep_iscsi_cid;
+
+	dma_addr = ep->qp.sq_pgtbl_phys;
+	ofld_req1.sq_page_table_addr_lo = (u32) dma_addr;
+	ofld_req1.sq_page_table_addr_hi = (u32) ((u64) dma_addr >> 32);
+
+	dma_addr = ep->qp.cq_pgtbl_phys;
+	ofld_req1.cq_page_table_addr_lo = (u32) dma_addr;
+	ofld_req1.cq_page_table_addr_hi = (u32) ((u64) dma_addr >> 32);
+
+	ofld_req2.hdr.op_code = ISCSI_KWQE_OPCODE_OFFLOAD_CONN2;
+	ofld_req2.hdr.flags =
+		(ISCSI_KWQE_LAYER_CODE << ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT);
+
+	dma_addr = ep->qp.rq_pgtbl_phys;
+	ofld_req2.rq_page_table_addr_lo = (u32) dma_addr;
+	ofld_req2.rq_page_table_addr_hi = (u32) ((u64) dma_addr >> 32);
+
+	ptbl = (u32 *) ep->qp.sq_pgtbl_virt;
+
+	ofld_req2.sq_first_pte.hi = *ptbl++;
+	ofld_req2.sq_first_pte.lo = *ptbl;
+
+	ptbl = (u32 *) ep->qp.cq_pgtbl_virt;
+	ofld_req2.cq_first_pte.hi = *ptbl++;
+	ofld_req2.cq_first_pte.lo = *ptbl;
+
+	kwqe_arr[0] = (struct kwqe *) &ofld_req1;
+	kwqe_arr[1] = (struct kwqe *) &ofld_req2;
+	ofld_req2.num_additional_wqes = 0;
+
+	if (hba->cnic && hba->cnic->submit_kwqes)
+		hba->cnic->submit_kwqes(hba->cnic, kwqe_arr, num_kwqes);
+}
+
+
+/**
+ * bnx2i_5771x_send_conn_ofld_req - initiates iscsi connection context creation
+ * @hba: 		adapter structure pointer
+ * @ep: 		endpoint (transport indentifier) structure
+ *
+ * 57710 specific - prepares and posts CONN_OFLD_REQ1/2 KWQE
+ */
+static void bnx2i_5771x_send_conn_ofld_req(struct bnx2i_hba *hba,
+					   struct bnx2i_endpoint *ep)
+{
+	struct kwqe *kwqe_arr[5];
+	struct iscsi_kwqe_conn_offload1 ofld_req1;
+	struct iscsi_kwqe_conn_offload2 ofld_req2;
+	struct iscsi_kwqe_conn_offload3 ofld_req3[1];
+	dma_addr_t dma_addr;
+	int num_kwqes = 2;
+	u32 *ptbl;
+
+	ofld_req1.hdr.op_code = ISCSI_KWQE_OPCODE_OFFLOAD_CONN1;
+	ofld_req1.hdr.flags =
+		(ISCSI_KWQE_LAYER_CODE << ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT);
+
+	ofld_req1.iscsi_conn_id = (u16) ep->ep_iscsi_cid;
+
+	dma_addr = ep->qp.sq_pgtbl_phys + ISCSI_SQ_DB_SIZE;
+	ofld_req1.sq_page_table_addr_lo = (u32) dma_addr;
+	ofld_req1.sq_page_table_addr_hi = (u32) ((u64) dma_addr >> 32);
+
+	dma_addr = ep->qp.cq_pgtbl_phys + ISCSI_CQ_DB_SIZE;
+	ofld_req1.cq_page_table_addr_lo = (u32) dma_addr;
+	ofld_req1.cq_page_table_addr_hi = (u32) ((u64) dma_addr >> 32);
+
+	ofld_req2.hdr.op_code = ISCSI_KWQE_OPCODE_OFFLOAD_CONN2;
+	ofld_req2.hdr.flags =
+		(ISCSI_KWQE_LAYER_CODE << ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT);
+
+	dma_addr = ep->qp.rq_pgtbl_phys + ISCSI_RQ_DB_SIZE;
+	ofld_req2.rq_page_table_addr_lo = (u32) dma_addr;
+	ofld_req2.rq_page_table_addr_hi = (u32) ((u64) dma_addr >> 32);
+
+	ptbl = (u32 *)((u8 *)ep->qp.sq_pgtbl_virt + ISCSI_SQ_DB_SIZE);
+	ofld_req2.sq_first_pte.hi = *ptbl++;
+	ofld_req2.sq_first_pte.lo = *ptbl;
+
+	ptbl = (u32 *)((u8 *)ep->qp.cq_pgtbl_virt + ISCSI_CQ_DB_SIZE);
+	ofld_req2.cq_first_pte.hi = *ptbl++;
+	ofld_req2.cq_first_pte.lo = *ptbl;
+
+	kwqe_arr[0] = (struct kwqe *) &ofld_req1;
+	kwqe_arr[1] = (struct kwqe *) &ofld_req2;
+
+	ofld_req2.num_additional_wqes = 1;
+	memset(ofld_req3, 0x00, sizeof(ofld_req3[0]));
+	ptbl = (u32 *)((u8 *)ep->qp.rq_pgtbl_virt + ISCSI_RQ_DB_SIZE);
+	ofld_req3[0].qp_first_pte[0].hi = *ptbl++;
+	ofld_req3[0].qp_first_pte[0].lo = *ptbl;
+
+	kwqe_arr[2] = (struct kwqe *) ofld_req3;
+	/* need if we decide to go with multiple KCQE's per conn */
+	num_kwqes += 1;
+
+	if (hba->cnic && hba->cnic->submit_kwqes)
+		hba->cnic->submit_kwqes(hba->cnic, kwqe_arr, num_kwqes);
+}
+
+/**
+ * bnx2i_send_conn_ofld_req - initiates iscsi connection context setup process
+ *
+ * @hba: 		adapter structure pointer
+ * @ep: 		endpoint (transport indentifier) structure
+ *
+ * this routine prepares and posts CONN_OFLD_REQ1/2 KWQE
+ */
+void bnx2i_send_conn_ofld_req(struct bnx2i_hba *hba, struct bnx2i_endpoint *ep)
+{
+	if (test_bit(BNX2I_NX2_DEV_57710, &hba->cnic_dev_type))
+		bnx2i_5771x_send_conn_ofld_req(hba, ep);
+	else
+		bnx2i_570x_send_conn_ofld_req(hba, ep);
+}
+
+
+/**
+ * setup_qp_page_tables - iscsi QP page table setup function
+ * @ep:		endpoint (transport indentifier) structure
+ *
+ * Sets up page tables for SQ/RQ/CQ, 1G/sec (5706/5708/5709) devices requires
+ * 	64-bit address in big endian format. Whereas 10G/sec (57710) requires
+ * 	PT in little endian format
+ */
+static void setup_qp_page_tables(struct bnx2i_endpoint *ep)
+{
+	int num_pages;
+	u32 *ptbl;
+	dma_addr_t page;
+	int cnic_dev_10g;
+
+	if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type))
+		cnic_dev_10g = 1;
+	else
+		cnic_dev_10g = 0;
+
+	/* SQ page table */
+	memset(ep->qp.sq_pgtbl_virt, 0, ep->qp.sq_pgtbl_size);
+	num_pages = ep->qp.sq_mem_size / PAGE_SIZE;
+	page = ep->qp.sq_phys;
+
+	if (cnic_dev_10g)
+		ptbl = (u32 *)((u8 *)ep->qp.sq_pgtbl_virt + ISCSI_SQ_DB_SIZE);
+	else
+		ptbl = (u32 *) ep->qp.sq_pgtbl_virt;
+	while (num_pages--) {
+		if (cnic_dev_10g) {
+			/* PTE is written in little endian format for 57710 */
+			*ptbl = (u32) page;
+			ptbl++;
+			*ptbl = (u32) ((u64) page >> 32);
+			ptbl++;
+			page += PAGE_SIZE;
+		} else {
+			/* PTE is written in big endian format for
+			 * 5706/5708/5709 devices */
+			*ptbl = (u32) ((u64) page >> 32);
+			ptbl++;
+			*ptbl = (u32) page;
+			ptbl++;
+			page += PAGE_SIZE;
+		}
+	}
+
+	/* RQ page table */
+	memset(ep->qp.rq_pgtbl_virt, 0, ep->qp.rq_pgtbl_size);
+	num_pages = ep->qp.rq_mem_size / PAGE_SIZE;
+	page = ep->qp.rq_phys;
+
+	if (cnic_dev_10g)
+		ptbl = (u32 *)((u8 *)ep->qp.rq_pgtbl_virt + ISCSI_RQ_DB_SIZE);
+	else
+		ptbl = (u32 *) ep->qp.rq_pgtbl_virt;
+	while (num_pages--) {
+		if (cnic_dev_10g) {
+			/* PTE is written in little endian format for 57710 */
+			*ptbl = (u32) page;
+			ptbl++;
+			*ptbl = (u32) ((u64) page >> 32);
+			ptbl++;
+			page += PAGE_SIZE;
+		} else {
+			/* PTE is written in big endian format for
+			 * 5706/5708/5709 devices */
+			*ptbl = (u32) ((u64) page >> 32);
+			ptbl++;
+			*ptbl = (u32) page;
+			ptbl++;
+			page += PAGE_SIZE;
+		}
+	}
+
+	/* CQ page table */
+	memset(ep->qp.cq_pgtbl_virt, 0, ep->qp.cq_pgtbl_size);
+	num_pages = ep->qp.cq_mem_size / PAGE_SIZE;
+	page = ep->qp.cq_phys;
+
+	if (cnic_dev_10g)
+		ptbl = (u32 *)((u8 *)ep->qp.cq_pgtbl_virt + ISCSI_CQ_DB_SIZE);
+	else
+		ptbl = (u32 *) ep->qp.cq_pgtbl_virt;
+	while (num_pages--) {
+		if (cnic_dev_10g) {
+			/* PTE is written in little endian format for 57710 */
+			*ptbl = (u32) page;
+			ptbl++;
+			*ptbl = (u32) ((u64) page >> 32);
+			ptbl++;
+			page += PAGE_SIZE;
+		} else {
+			/* PTE is written in big endian format for
+			 * 5706/5708/5709 devices */
+			*ptbl = (u32) ((u64) page >> 32);
+			ptbl++;
+			*ptbl = (u32) page;
+			ptbl++;
+			page += PAGE_SIZE;
+		}
+	}
+}
+
+
+/**
+ * bnx2i_alloc_qp_resc - allocates required resources for QP.
+ * @hba:	adapter structure pointer
+ * @ep:		endpoint (transport indentifier) structure
+ *
+ * Allocate QP (transport layer for iSCSI connection) resources, DMA'able
+ *	memory for SQ/RQ/CQ and page tables. EP structure elements such
+ *	as producer/consumer indexes/pointers, queue sizes and page table
+ *	contents are setup
+ */
+int bnx2i_alloc_qp_resc(struct bnx2i_hba *hba, struct bnx2i_endpoint *ep)
+{
+	struct bnx2i_5771x_cq_db *cq_db;
+
+	ep->hba = hba;
+	ep->conn = NULL;
+	ep->ep_cid = ep->ep_iscsi_cid = ep->ep_pg_cid = 0;
+
+	/* Allocate page table memory for SQ which is page aligned */
+	ep->qp.sq_mem_size = hba->max_sqes * BNX2I_SQ_WQE_SIZE;
+	ep->qp.sq_mem_size =
+		(ep->qp.sq_mem_size + (PAGE_SIZE - 1)) & PAGE_MASK;
+	ep->qp.sq_pgtbl_size =
+		(ep->qp.sq_mem_size / PAGE_SIZE) * sizeof(void *);
+	ep->qp.sq_pgtbl_size =
+		(ep->qp.sq_pgtbl_size + (PAGE_SIZE - 1)) & PAGE_MASK;
+
+	ep->qp.sq_pgtbl_virt =
+		dma_alloc_coherent(&hba->pcidev->dev, ep->qp.sq_pgtbl_size,
+				   &ep->qp.sq_pgtbl_phys, GFP_KERNEL);
+	if (!ep->qp.sq_pgtbl_virt) {
+		printk(KERN_ALERT "bnx2i: unable to alloc SQ PT mem (%d)\n",
+				  ep->qp.sq_pgtbl_size);
+		goto mem_alloc_err;
+	}
+
+	/* Allocate memory area for actual SQ element */
+	ep->qp.sq_virt =
+		dma_alloc_coherent(&hba->pcidev->dev, ep->qp.sq_mem_size,
+				   &ep->qp.sq_phys, GFP_KERNEL);
+	if (!ep->qp.sq_virt) {
+		printk(KERN_ALERT "bnx2i: unable to alloc SQ BD memory %d\n",
+				  ep->qp.sq_mem_size);
+		goto mem_alloc_err;
+	}
+
+	memset(ep->qp.sq_virt, 0x00, ep->qp.sq_mem_size);
+	ep->qp.sq_first_qe = ep->qp.sq_virt;
+	ep->qp.sq_prod_qe = ep->qp.sq_first_qe;
+	ep->qp.sq_cons_qe = ep->qp.sq_first_qe;
+	ep->qp.sq_last_qe = &ep->qp.sq_first_qe[hba->max_sqes - 1];
+	ep->qp.sq_prod_idx = 0;
+	ep->qp.sq_cons_idx = 0;
+	ep->qp.sqe_left = hba->max_sqes;
+
+	/* Allocate page table memory for CQ which is page aligned */
+	ep->qp.cq_mem_size = hba->max_cqes * BNX2I_CQE_SIZE;
+	ep->qp.cq_mem_size =
+		(ep->qp.cq_mem_size + (PAGE_SIZE - 1)) & PAGE_MASK;
+	ep->qp.cq_pgtbl_size =
+		(ep->qp.cq_mem_size / PAGE_SIZE) * sizeof(void *);
+	ep->qp.cq_pgtbl_size =
+		(ep->qp.cq_pgtbl_size + (PAGE_SIZE - 1)) & PAGE_MASK;
+
+	ep->qp.cq_pgtbl_virt =
+		dma_alloc_coherent(&hba->pcidev->dev, ep->qp.cq_pgtbl_size,
+				   &ep->qp.cq_pgtbl_phys, GFP_KERNEL);
+	if (!ep->qp.cq_pgtbl_virt) {
+		printk(KERN_ALERT "bnx2i: unable to alloc CQ PT memory %d\n",
+				  ep->qp.cq_pgtbl_size);
+		goto mem_alloc_err;
+	}
+
+	/* Allocate memory area for actual CQ element */
+	ep->qp.cq_virt =
+		dma_alloc_coherent(&hba->pcidev->dev, ep->qp.cq_mem_size,
+				   &ep->qp.cq_phys, GFP_KERNEL);
+	if (!ep->qp.cq_virt) {
+		printk(KERN_ALERT "bnx2i: unable to alloc CQ BD memory %d\n",
+				  ep->qp.cq_mem_size);
+		goto mem_alloc_err;
+	}
+	memset(ep->qp.cq_virt, 0x00, ep->qp.cq_mem_size);
+
+	ep->qp.cq_first_qe = ep->qp.cq_virt;
+	ep->qp.cq_prod_qe = ep->qp.cq_first_qe;
+	ep->qp.cq_cons_qe = ep->qp.cq_first_qe;
+	ep->qp.cq_last_qe = &ep->qp.cq_first_qe[hba->max_cqes - 1];
+	ep->qp.cq_prod_idx = 0;
+	ep->qp.cq_cons_idx = 0;
+	ep->qp.cqe_left = hba->max_cqes;
+	ep->qp.cqe_exp_seq_sn = ISCSI_INITIAL_SN;
+	ep->qp.cqe_size = hba->max_cqes;
+
+	/* Invalidate all EQ CQE index, req only for 57710 */
+	cq_db = (struct bnx2i_5771x_cq_db *) ep->qp.cq_pgtbl_virt;
+	memset(cq_db->sqn, 0xFF, sizeof(cq_db->sqn[0]) * BNX2X_MAX_CQS);
+
+	/* Allocate page table memory for RQ which is page aligned */
+	ep->qp.rq_mem_size = hba->max_rqes * BNX2I_RQ_WQE_SIZE;
+	ep->qp.rq_mem_size =
+		(ep->qp.rq_mem_size + (PAGE_SIZE - 1)) & PAGE_MASK;
+	ep->qp.rq_pgtbl_size =
+		(ep->qp.rq_mem_size / PAGE_SIZE) * sizeof(void *);
+	ep->qp.rq_pgtbl_size =
+		(ep->qp.rq_pgtbl_size + (PAGE_SIZE - 1)) & PAGE_MASK;
+
+	ep->qp.rq_pgtbl_virt =
+		dma_alloc_coherent(&hba->pcidev->dev, ep->qp.rq_pgtbl_size,
+				   &ep->qp.rq_pgtbl_phys, GFP_KERNEL);
+	if (!ep->qp.rq_pgtbl_virt) {
+		printk(KERN_ALERT "bnx2i: unable to alloc RQ PT mem %d\n",
+				  ep->qp.rq_pgtbl_size);
+		goto mem_alloc_err;
+	}
+
+	/* Allocate memory area for actual RQ element */
+	ep->qp.rq_virt =
+		dma_alloc_coherent(&hba->pcidev->dev, ep->qp.rq_mem_size,
+				   &ep->qp.rq_phys, GFP_KERNEL);
+	if (!ep->qp.rq_virt) {
+		printk(KERN_ALERT "bnx2i: unable to alloc RQ BD memory %d\n",
+				  ep->qp.rq_mem_size);
+		goto mem_alloc_err;
+	}
+
+	ep->qp.rq_first_qe = ep->qp.rq_virt;
+	ep->qp.rq_prod_qe = ep->qp.rq_first_qe;
+	ep->qp.rq_cons_qe = ep->qp.rq_first_qe;
+	ep->qp.rq_last_qe = &ep->qp.rq_first_qe[hba->max_rqes - 1];
+	ep->qp.rq_prod_idx = 0x8000;
+	ep->qp.rq_cons_idx = 0;
+	ep->qp.rqe_left = hba->max_rqes;
+
+	setup_qp_page_tables(ep);
+
+	return 0;
+
+mem_alloc_err:
+	bnx2i_free_qp_resc(hba, ep);
+	return -ENOMEM;
+}
+
+
+
+/**
+ * bnx2i_free_qp_resc - free memory resources held by QP
+ * @hba:	adapter structure pointer
+ * @ep:	endpoint (transport indentifier) structure
+ *
+ * Free QP resources - SQ/RQ/CQ memory and page tables.
+ */
+void bnx2i_free_qp_resc(struct bnx2i_hba *hba, struct bnx2i_endpoint *ep)
+{
+	if (ep->qp.ctx_base) {
+		iounmap(ep->qp.ctx_base);
+		ep->qp.ctx_base = NULL;
+	}
+	/* Free SQ mem */
+	if (ep->qp.sq_pgtbl_virt) {
+		dma_free_coherent(&hba->pcidev->dev, ep->qp.sq_pgtbl_size,
+				  ep->qp.sq_pgtbl_virt, ep->qp.sq_pgtbl_phys);
+		ep->qp.sq_pgtbl_virt = NULL;
+		ep->qp.sq_pgtbl_phys = 0;
+	}
+	if (ep->qp.sq_virt) {
+		dma_free_coherent(&hba->pcidev->dev, ep->qp.sq_mem_size,
+				  ep->qp.sq_virt, ep->qp.sq_phys);
+		ep->qp.sq_virt = NULL;
+		ep->qp.sq_phys = 0;
+	}
+
+	/* Free RQ mem */
+	if (ep->qp.rq_pgtbl_virt) {
+		dma_free_coherent(&hba->pcidev->dev, ep->qp.rq_pgtbl_size,
+				  ep->qp.rq_pgtbl_virt, ep->qp.rq_pgtbl_phys);
+		ep->qp.rq_pgtbl_virt = NULL;
+		ep->qp.rq_pgtbl_phys = 0;
+	}
+	if (ep->qp.rq_virt) {
+		dma_free_coherent(&hba->pcidev->dev, ep->qp.rq_mem_size,
+				  ep->qp.rq_virt, ep->qp.rq_phys);
+		ep->qp.rq_virt = NULL;
+		ep->qp.rq_phys = 0;
+	}
+
+	/* Free CQ mem */
+	if (ep->qp.cq_pgtbl_virt) {
+		dma_free_coherent(&hba->pcidev->dev, ep->qp.cq_pgtbl_size,
+				  ep->qp.cq_pgtbl_virt, ep->qp.cq_pgtbl_phys);
+		ep->qp.cq_pgtbl_virt = NULL;
+		ep->qp.cq_pgtbl_phys = 0;
+	}
+	if (ep->qp.cq_virt) {
+		dma_free_coherent(&hba->pcidev->dev, ep->qp.cq_mem_size,
+				  ep->qp.cq_virt, ep->qp.cq_phys);
+		ep->qp.cq_virt = NULL;
+		ep->qp.cq_phys = 0;
+	}
+}
+
+
+/**
+ * bnx2i_send_fw_iscsi_init_msg - initiates initial handshake with iscsi f/w
+ * @hba:	adapter structure pointer
+ *
+ * Send down iscsi_init KWQEs which initiates the initial handshake with the f/w
+ * 	This results in iSCSi support validation and on-chip context manager
+ * 	initialization.  Firmware completes this handshake with a CQE carrying
+ * 	the result of iscsi support validation. Parameter carried by
+ * 	iscsi init request determines the number of offloaded connection and
+ * 	tolerance level for iscsi protocol violation this hba/chip can support
+ */
+int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba)
+{
+	struct kwqe *kwqe_arr[3];
+	struct iscsi_kwqe_init1 iscsi_init;
+	struct iscsi_kwqe_init2 iscsi_init2;
+	int rc = 0;
+	u64 mask64;
+
+	bnx2i_adjust_qp_size(hba);
+
+	iscsi_init.flags =
+		ISCSI_PAGE_SIZE_4K << ISCSI_KWQE_INIT1_PAGE_SIZE_SHIFT;
+	if (en_tcp_dack)
+		iscsi_init.flags |= ISCSI_KWQE_INIT1_DELAYED_ACK_ENABLE;
+	iscsi_init.reserved0 = 0;
+	iscsi_init.num_cqs = 1;
+	iscsi_init.hdr.op_code = ISCSI_KWQE_OPCODE_INIT1;
+	iscsi_init.hdr.flags =
+		(ISCSI_KWQE_LAYER_CODE << ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT);
+
+	iscsi_init.dummy_buffer_addr_lo = (u32) hba->dummy_buf_dma;
+	iscsi_init.dummy_buffer_addr_hi =
+		(u32) ((u64) hba->dummy_buf_dma >> 32);
+
+	hba->ctx_ccell_tasks =
+			((hba->num_ccell & 0xFFFF) | (hba->max_sqes << 16));
+	iscsi_init.num_ccells_per_conn = hba->num_ccell;
+	iscsi_init.num_tasks_per_conn = hba->max_sqes;
+	iscsi_init.sq_wqes_per_page = PAGE_SIZE / BNX2I_SQ_WQE_SIZE;
+	iscsi_init.sq_num_wqes = hba->max_sqes;
+	iscsi_init.cq_log_wqes_per_page =
+		(u8) bnx2i_power_of2(PAGE_SIZE / BNX2I_CQE_SIZE);
+	iscsi_init.cq_num_wqes = hba->max_cqes;
+	iscsi_init.cq_num_pages = (hba->max_cqes * BNX2I_CQE_SIZE +
+				   (PAGE_SIZE - 1)) / PAGE_SIZE;
+	iscsi_init.sq_num_pages = (hba->max_sqes * BNX2I_SQ_WQE_SIZE +
+				   (PAGE_SIZE - 1)) / PAGE_SIZE;
+	iscsi_init.rq_buffer_size = BNX2I_RQ_WQE_SIZE;
+	iscsi_init.rq_num_wqes = hba->max_rqes;
+
+
+	iscsi_init2.hdr.op_code = ISCSI_KWQE_OPCODE_INIT2;
+	iscsi_init2.hdr.flags =
+		(ISCSI_KWQE_LAYER_CODE << ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT);
+	iscsi_init2.max_cq_sqn = hba->max_cqes * 2 + 1;
+	mask64 = 0x0ULL;
+	mask64 |= (
+		/* CISCO MDS */
+		(1UL <<
+		  ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_TTT_NOT_RSRV) |
+		/* HP MSA1510i */
+		(1UL <<
+		  ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_EXP_DATASN) |
+		/* EMC */
+		(1ULL << ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_LUN));
+	if (error_mask1)
+		iscsi_init2.error_bit_map[0] = error_mask1;
+	else
+		iscsi_init2.error_bit_map[0] = (u32) mask64;
+
+	if (error_mask2)
+		iscsi_init2.error_bit_map[1] = error_mask2;
+	else
+		iscsi_init2.error_bit_map[1] = (u32) (mask64 >> 32);
+
+	iscsi_error_mask = mask64;
+
+	kwqe_arr[0] = (struct kwqe *) &iscsi_init;
+	kwqe_arr[1] = (struct kwqe *) &iscsi_init2;
+
+	if (hba->cnic && hba->cnic->submit_kwqes)
+		rc = hba->cnic->submit_kwqes(hba->cnic, kwqe_arr, 2);
+	return rc;
+}
+
+
+/**
+ * bnx2i_process_scsi_cmd_resp - this function handles scsi cmd completion.
+ * @conn:	iscsi connection
+ * @cqe:	pointer to newly DMA'ed CQE entry for processing
+ *
+ * process SCSI CMD Response CQE & complete the request to SCSI-ML
+ */
+static int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session,
+				       struct bnx2i_conn *bnx2i_conn,
+				       struct cqe *cqe)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct bnx2i_cmd_response *resp_cqe;
+	struct bnx2i_cmd *bnx2i_cmd;
+	struct iscsi_task *task;
+	struct iscsi_cmd_rsp *hdr;
+	u32 datalen = 0;
+
+	resp_cqe = (struct bnx2i_cmd_response *)cqe;
+	spin_lock(&session->lock);
+	task = iscsi_itt_to_task(conn,
+				 resp_cqe->itt & ISCSI_CMD_RESPONSE_INDEX);
+	if (!task)
+		goto fail;
+
+	bnx2i_cmd = task->dd_data;
+
+	if (bnx2i_cmd->req.op_attr & ISCSI_CMD_REQUEST_READ) {
+		conn->datain_pdus_cnt +=
+			resp_cqe->task_stat.read_stat.num_data_outs;
+		conn->rxdata_octets +=
+			bnx2i_cmd->req.total_data_transfer_length;
+	} else {
+		conn->dataout_pdus_cnt +=
+			resp_cqe->task_stat.read_stat.num_data_outs;
+		conn->r2t_pdus_cnt +=
+			resp_cqe->task_stat.read_stat.num_r2ts;
+		conn->txdata_octets +=
+			bnx2i_cmd->req.total_data_transfer_length;
+	}
+	bnx2i_iscsi_unmap_sg_list(bnx2i_cmd);
+
+	hdr = (struct iscsi_cmd_rsp *)task->hdr;
+	resp_cqe = (struct bnx2i_cmd_response *)cqe;
+	hdr->opcode = resp_cqe->op_code;
+	hdr->max_cmdsn = cpu_to_be32(resp_cqe->max_cmd_sn);
+	hdr->exp_cmdsn = cpu_to_be32(resp_cqe->exp_cmd_sn);
+	hdr->response = resp_cqe->response;
+	hdr->cmd_status = resp_cqe->status;
+	hdr->flags = resp_cqe->response_flags;
+	hdr->residual_count = cpu_to_be32(resp_cqe->residual_count);
+
+	if (resp_cqe->op_code == ISCSI_OP_SCSI_DATA_IN)
+		goto done;
+
+	if (resp_cqe->status == SAM_STAT_CHECK_CONDITION) {
+		datalen = resp_cqe->data_length;
+		if (datalen < 2)
+			goto done;
+
+		if (datalen > BNX2I_RQ_WQE_SIZE) {
+			iscsi_conn_printk(KERN_ERR, conn,
+					  "sense data len %d > RQ sz\n",
+					  datalen);
+			datalen = BNX2I_RQ_WQE_SIZE;
+		} else if (datalen > ISCSI_DEF_MAX_RECV_SEG_LEN) {
+			iscsi_conn_printk(KERN_ERR, conn,
+					  "sense data len %d > conn data\n",
+					  datalen);
+			datalen = ISCSI_DEF_MAX_RECV_SEG_LEN;
+		}
+
+		bnx2i_get_rq_buf(bnx2i_cmd->conn, conn->data, datalen);
+		bnx2i_put_rq_buf(bnx2i_cmd->conn, 1);
+	}
+
+done:
+	__iscsi_complete_pdu(conn, (struct iscsi_hdr *)hdr,
+			     conn->data, datalen);
+fail:
+	spin_unlock(&session->lock);
+	return 0;
+}
+
+
+/**
+ * bnx2i_process_login_resp - this function handles iscsi login response
+ * @session:		iscsi session pointer
+ * @bnx2i_conn:		iscsi connection pointer
+ * @cqe:		pointer to newly DMA'ed CQE entry for processing
+ *
+ * process Login Response CQE & complete it to open-iscsi user daemon
+ */
+static int bnx2i_process_login_resp(struct iscsi_session *session,
+				    struct bnx2i_conn *bnx2i_conn,
+				    struct cqe *cqe)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct iscsi_task *task;
+	struct bnx2i_login_response *login;
+	struct iscsi_login_rsp *resp_hdr;
+	int pld_len;
+	int pad_len;
+
+	login = (struct bnx2i_login_response *) cqe;
+	spin_lock(&session->lock);
+	task = iscsi_itt_to_task(conn,
+				 login->itt & ISCSI_LOGIN_RESPONSE_INDEX);
+	if (!task)
+		goto done;
+
+	resp_hdr = (struct iscsi_login_rsp *) &bnx2i_conn->gen_pdu.resp_hdr;
+	memset(resp_hdr, 0, sizeof(struct iscsi_hdr));
+	resp_hdr->opcode = login->op_code;
+	resp_hdr->flags = login->response_flags;
+	resp_hdr->max_version = login->version_max;
+	resp_hdr->active_version = login->version_active;;
+	resp_hdr->hlength = 0;
+
+	hton24(resp_hdr->dlength, login->data_length);
+	memcpy(resp_hdr->isid, &login->isid_lo, 6);
+	resp_hdr->tsih = cpu_to_be16(login->tsih);
+	resp_hdr->itt = task->hdr->itt;
+	resp_hdr->statsn = cpu_to_be32(login->stat_sn);
+	resp_hdr->exp_cmdsn = cpu_to_be32(login->exp_cmd_sn);
+	resp_hdr->max_cmdsn = cpu_to_be32(login->max_cmd_sn);
+	resp_hdr->status_class = login->status_class;
+	resp_hdr->status_detail = login->status_detail;
+	pld_len = login->data_length;
+	bnx2i_conn->gen_pdu.resp_wr_ptr =
+					bnx2i_conn->gen_pdu.resp_buf + pld_len;
+
+	pad_len = 0;
+	if (pld_len & 0x3)
+		pad_len = 4 - (pld_len % 4);
+
+	if (pad_len) {
+		int i = 0;
+		for (i = 0; i < pad_len; i++) {
+			bnx2i_conn->gen_pdu.resp_wr_ptr[0] = 0;
+			bnx2i_conn->gen_pdu.resp_wr_ptr++;
+		}
+	}
+
+	__iscsi_complete_pdu(conn, (struct iscsi_hdr *)resp_hdr,
+		bnx2i_conn->gen_pdu.resp_buf,
+		bnx2i_conn->gen_pdu.resp_wr_ptr - bnx2i_conn->gen_pdu.resp_buf);
+done:
+	spin_unlock(&session->lock);
+	return 0;
+}
+
+/**
+ * bnx2i_process_tmf_resp - this function handles iscsi TMF response
+ * @session:		iscsi session pointer
+ * @bnx2i_conn:		iscsi connection pointer
+ * @cqe:		pointer to newly DMA'ed CQE entry for processing
+ *
+ * process iSCSI TMF Response CQE and wake up the driver eh thread.
+ */
+static int bnx2i_process_tmf_resp(struct iscsi_session *session,
+				  struct bnx2i_conn *bnx2i_conn,
+				  struct cqe *cqe)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct iscsi_task *task;
+	struct bnx2i_tmf_response *tmf_cqe;
+	struct iscsi_tm_rsp *resp_hdr;
+
+	tmf_cqe = (struct bnx2i_tmf_response *)cqe;
+	spin_lock(&session->lock);
+	task = iscsi_itt_to_task(conn,
+				 tmf_cqe->itt & ISCSI_TMF_RESPONSE_INDEX);
+	if (!task)
+		goto done;
+
+	resp_hdr = (struct iscsi_tm_rsp *) &bnx2i_conn->gen_pdu.resp_hdr;
+	memset(resp_hdr, 0, sizeof(struct iscsi_hdr));
+	resp_hdr->opcode = tmf_cqe->op_code;
+	resp_hdr->max_cmdsn = cpu_to_be32(tmf_cqe->max_cmd_sn);
+	resp_hdr->exp_cmdsn = cpu_to_be32(tmf_cqe->exp_cmd_sn);
+	resp_hdr->itt = task->hdr->itt;
+	resp_hdr->response = tmf_cqe->response;
+
+	__iscsi_complete_pdu(conn, (struct iscsi_hdr *)resp_hdr, NULL, 0);
+done:
+	spin_unlock(&session->lock);
+	return 0;
+}
+
+/**
+ * bnx2i_process_logout_resp - this function handles iscsi logout response
+ * @session:		iscsi session pointer
+ * @bnx2i_conn:		iscsi connection pointer
+ * @cqe:		pointer to newly DMA'ed CQE entry for processing
+ *
+ * process iSCSI Logout Response CQE & make function call to
+ * notify the user daemon.
+ */
+static int bnx2i_process_logout_resp(struct iscsi_session *session,
+				     struct bnx2i_conn *bnx2i_conn,
+				     struct cqe *cqe)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct iscsi_task *task;
+	struct bnx2i_logout_response *logout;
+	struct iscsi_logout_rsp *resp_hdr;
+
+	logout = (struct bnx2i_logout_response *) cqe;
+	spin_lock(&session->lock);
+	task = iscsi_itt_to_task(conn,
+				 logout->itt & ISCSI_LOGOUT_RESPONSE_INDEX);
+	if (!task)
+		goto done;
+
+	resp_hdr = (struct iscsi_logout_rsp *) &bnx2i_conn->gen_pdu.resp_hdr;
+	memset(resp_hdr, 0, sizeof(struct iscsi_hdr));
+	resp_hdr->opcode = logout->op_code;
+	resp_hdr->flags = logout->response;
+	resp_hdr->hlength = 0;
+
+	resp_hdr->itt = task->hdr->itt;
+	resp_hdr->statsn = task->hdr->exp_statsn;
+	resp_hdr->exp_cmdsn = cpu_to_be32(logout->exp_cmd_sn);
+	resp_hdr->max_cmdsn = cpu_to_be32(logout->max_cmd_sn);
+
+	resp_hdr->t2wait = cpu_to_be32(logout->time_to_wait);
+	resp_hdr->t2retain = cpu_to_be32(logout->time_to_retain);
+
+	__iscsi_complete_pdu(conn, (struct iscsi_hdr *)resp_hdr, NULL, 0);
+done:
+	spin_unlock(&session->lock);
+	return 0;
+}
+
+/**
+ * bnx2i_process_nopin_local_cmpl - this function handles iscsi nopin CQE
+ * @session:		iscsi session pointer
+ * @bnx2i_conn:		iscsi connection pointer
+ * @cqe:		pointer to newly DMA'ed CQE entry for processing
+ *
+ * process iSCSI NOPIN local completion CQE, frees IIT and command structures
+ */
+static void bnx2i_process_nopin_local_cmpl(struct iscsi_session *session,
+					   struct bnx2i_conn *bnx2i_conn,
+					   struct cqe *cqe)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct bnx2i_nop_in_msg *nop_in;
+	struct iscsi_task *task;
+
+	nop_in = (struct bnx2i_nop_in_msg *)cqe;
+	spin_lock(&session->lock);
+	task = iscsi_itt_to_task(conn,
+				 nop_in->itt & ISCSI_NOP_IN_MSG_INDEX);
+	if (task)
+		iscsi_put_task(task);
+	spin_unlock(&session->lock);
+}
+
+/**
+ * bnx2i_unsol_pdu_adjust_rq - makes adjustments to RQ after unsol pdu is recvd
+ * @conn:	iscsi connection
+ *
+ * Firmware advances RQ producer index for every unsolicited PDU even if
+ *	payload data length is '0'. This function makes corresponding
+ *	adjustments on the driver side to match this f/w behavior
+ */
+static void bnx2i_unsol_pdu_adjust_rq(struct bnx2i_conn *bnx2i_conn)
+{
+	char dummy_rq_data[2];
+	bnx2i_get_rq_buf(bnx2i_conn, dummy_rq_data, 1);
+	bnx2i_put_rq_buf(bnx2i_conn, 1);
+}
+
+
+/**
+ * bnx2i_process_nopin_mesg - this function handles iscsi nopin CQE
+ * @session:		iscsi session pointer
+ * @bnx2i_conn:		iscsi connection pointer
+ * @cqe:		pointer to newly DMA'ed CQE entry for processing
+ *
+ * process iSCSI target's proactive iSCSI NOPIN request
+ */
+static int bnx2i_process_nopin_mesg(struct iscsi_session *session,
+				     struct bnx2i_conn *bnx2i_conn,
+				     struct cqe *cqe)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct iscsi_task *task;
+	struct bnx2i_nop_in_msg *nop_in;
+	struct iscsi_nopin *hdr;
+	u32 itt;
+	int tgt_async_nop = 0;
+
+	nop_in = (struct bnx2i_nop_in_msg *)cqe;
+	itt = nop_in->itt & ISCSI_NOP_IN_MSG_INDEX;
+
+	spin_lock(&session->lock);
+	hdr = (struct iscsi_nopin *)&bnx2i_conn->gen_pdu.resp_hdr;
+	memset(hdr, 0, sizeof(struct iscsi_hdr));
+	hdr->opcode = nop_in->op_code;
+	hdr->max_cmdsn = cpu_to_be32(nop_in->max_cmd_sn);
+	hdr->exp_cmdsn = cpu_to_be32(nop_in->exp_cmd_sn);
+	hdr->ttt = cpu_to_be32(nop_in->ttt);
+
+	if (itt == (u16) RESERVED_ITT) {
+		bnx2i_unsol_pdu_adjust_rq(bnx2i_conn);
+		hdr->itt = RESERVED_ITT;
+		tgt_async_nop = 1;
+		goto done;
+	}
+
+	/* this is a response to one of our nop-outs */
+	task = iscsi_itt_to_task(conn, itt);
+	if (task) {
+		hdr->flags = ISCSI_FLAG_CMD_FINAL;
+		hdr->itt = task->hdr->itt;
+		hdr->ttt = cpu_to_be32(nop_in->ttt);
+		memcpy(hdr->lun, nop_in->lun, 8);
+	}
+done:
+	__iscsi_complete_pdu(conn, (struct iscsi_hdr *)hdr, NULL, 0);
+	spin_unlock(&session->lock);
+
+	return tgt_async_nop;
+}
+
+
+/**
+ * bnx2i_process_async_mesg - this function handles iscsi async message
+ * @session:		iscsi session pointer
+ * @bnx2i_conn:		iscsi connection pointer
+ * @cqe:		pointer to newly DMA'ed CQE entry for processing
+ *
+ * process iSCSI ASYNC Message
+ */
+static void bnx2i_process_async_mesg(struct iscsi_session *session,
+				     struct bnx2i_conn *bnx2i_conn,
+				     struct cqe *cqe)
+{
+	struct bnx2i_async_msg *async_cqe;
+	struct iscsi_async *resp_hdr;
+	u8 async_event;
+
+	bnx2i_unsol_pdu_adjust_rq(bnx2i_conn);
+
+	async_cqe = (struct bnx2i_async_msg *)cqe;
+	async_event = async_cqe->async_event;
+
+	if (async_event == ISCSI_ASYNC_MSG_SCSI_EVENT) {
+		iscsi_conn_printk(KERN_ALERT, bnx2i_conn->cls_conn->dd_data,
+				  "async: scsi events not supported\n");
+		return;
+	}
+
+	spin_lock(&session->lock);
+	resp_hdr = (struct iscsi_async *) &bnx2i_conn->gen_pdu.resp_hdr;
+	memset(resp_hdr, 0, sizeof(struct iscsi_hdr));
+	resp_hdr->opcode = async_cqe->op_code;
+	resp_hdr->flags = 0x80;
+
+	memcpy(resp_hdr->lun, async_cqe->lun, 8);
+	resp_hdr->exp_cmdsn = cpu_to_be32(async_cqe->exp_cmd_sn);
+	resp_hdr->max_cmdsn = cpu_to_be32(async_cqe->max_cmd_sn);
+
+	resp_hdr->async_event = async_cqe->async_event;
+	resp_hdr->async_vcode = async_cqe->async_vcode;
+
+	resp_hdr->param1 = cpu_to_be16(async_cqe->param1);
+	resp_hdr->param2 = cpu_to_be16(async_cqe->param2);
+	resp_hdr->param3 = cpu_to_be16(async_cqe->param3);
+
+	__iscsi_complete_pdu(bnx2i_conn->cls_conn->dd_data,
+			     (struct iscsi_hdr *)resp_hdr, NULL, 0);
+	spin_unlock(&session->lock);
+}
+
+
+/**
+ * bnx2i_process_reject_mesg - process iscsi reject pdu
+ * @session:		iscsi session pointer
+ * @bnx2i_conn:		iscsi connection pointer
+ * @cqe:		pointer to newly DMA'ed CQE entry for processing
+ *
+ * process iSCSI REJECT message
+ */
+static void bnx2i_process_reject_mesg(struct iscsi_session *session,
+				      struct bnx2i_conn *bnx2i_conn,
+				      struct cqe *cqe)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct bnx2i_reject_msg *reject;
+	struct iscsi_reject *hdr;
+
+	reject = (struct bnx2i_reject_msg *) cqe;
+	if (reject->data_length) {
+		bnx2i_get_rq_buf(bnx2i_conn, conn->data, reject->data_length);
+		bnx2i_put_rq_buf(bnx2i_conn, 1);
+	} else
+		bnx2i_unsol_pdu_adjust_rq(bnx2i_conn);
+
+	spin_lock(&session->lock);
+	hdr = (struct iscsi_reject *) &bnx2i_conn->gen_pdu.resp_hdr;
+	memset(hdr, 0, sizeof(struct iscsi_hdr));
+	hdr->opcode = reject->op_code;
+	hdr->reason = reject->reason;
+	hton24(hdr->dlength, reject->data_length);
+	hdr->max_cmdsn = cpu_to_be32(reject->max_cmd_sn);
+	hdr->exp_cmdsn = cpu_to_be32(reject->exp_cmd_sn);
+	hdr->ffffffff = cpu_to_be32(RESERVED_ITT);
+	__iscsi_complete_pdu(conn, (struct iscsi_hdr *)hdr, conn->data,
+			     reject->data_length);
+	spin_unlock(&session->lock);
+}
+
+/**
+ * bnx2i_process_cmd_cleanup_resp - process scsi command clean-up completion
+ * @session:		iscsi session pointer
+ * @bnx2i_conn:		iscsi connection pointer
+ * @cqe:		pointer to newly DMA'ed CQE entry for processing
+ *
+ * process command cleanup response CQE during conn shutdown or error recovery
+ */
+static void bnx2i_process_cmd_cleanup_resp(struct iscsi_session *session,
+					   struct bnx2i_conn *bnx2i_conn,
+					   struct cqe *cqe)
+{
+	struct bnx2i_cleanup_response *cmd_clean_rsp;
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct iscsi_task *task;
+
+	cmd_clean_rsp = (struct bnx2i_cleanup_response *)cqe;
+	spin_lock(&session->lock);
+	task = iscsi_itt_to_task(conn,
+			cmd_clean_rsp->itt & ISCSI_CLEANUP_RESPONSE_INDEX);
+	if (!task)
+		printk(KERN_ALERT "bnx2i: cmd clean ITT %x not active\n",
+			cmd_clean_rsp->itt & ISCSI_CLEANUP_RESPONSE_INDEX);
+	spin_unlock(&session->lock);
+	complete(&bnx2i_conn->cmd_cleanup_cmpl);
+}
+
+
+
+/**
+ * bnx2i_process_new_cqes - process newly DMA'ed CQE's
+ * @bnx2i_conn:		iscsi connection
+ *
+ * this function is called by generic KCQ handler to process all pending CQE's
+ */
+static void bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct iscsi_session *session = conn->session;
+	struct qp_info *qp = &bnx2i_conn->ep->qp;
+	struct bnx2i_nop_in_msg *nopin;
+	int tgt_async_msg;
+
+	while (1) {
+		nopin = (struct bnx2i_nop_in_msg *) qp->cq_cons_qe;
+		if (nopin->cq_req_sn != qp->cqe_exp_seq_sn)
+			break;
+
+		if (unlikely(test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx)))
+			break;
+
+		tgt_async_msg = 0;
+
+		switch (nopin->op_code) {
+		case ISCSI_OP_SCSI_CMD_RSP:
+		case ISCSI_OP_SCSI_DATA_IN:
+			bnx2i_process_scsi_cmd_resp(session, bnx2i_conn,
+						    qp->cq_cons_qe);
+			break;
+		case ISCSI_OP_LOGIN_RSP:
+			bnx2i_process_login_resp(session, bnx2i_conn,
+						 qp->cq_cons_qe);
+			break;
+		case ISCSI_OP_SCSI_TMFUNC_RSP:
+			bnx2i_process_tmf_resp(session, bnx2i_conn,
+					       qp->cq_cons_qe);
+			break;
+		case ISCSI_OP_LOGOUT_RSP:
+			bnx2i_process_logout_resp(session, bnx2i_conn,
+						  qp->cq_cons_qe);
+			break;
+		case ISCSI_OP_NOOP_IN:
+			if (bnx2i_process_nopin_mesg(session, bnx2i_conn,
+						     qp->cq_cons_qe))
+				tgt_async_msg = 1;
+			break;
+		case ISCSI_OPCODE_NOPOUT_LOCAL_COMPLETION:
+			bnx2i_process_nopin_local_cmpl(session, bnx2i_conn,
+						       qp->cq_cons_qe);
+			break;
+		case ISCSI_OP_ASYNC_EVENT:
+			bnx2i_process_async_mesg(session, bnx2i_conn,
+						 qp->cq_cons_qe);
+			tgt_async_msg = 1;
+			break;
+		case ISCSI_OP_REJECT:
+			bnx2i_process_reject_mesg(session, bnx2i_conn,
+						  qp->cq_cons_qe);
+			break;
+		case ISCSI_OPCODE_CLEANUP_RESPONSE:
+			bnx2i_process_cmd_cleanup_resp(session, bnx2i_conn,
+						       qp->cq_cons_qe);
+			break;
+		default:
+			printk(KERN_ALERT "bnx2i: unknown opcode 0x%x\n",
+					  nopin->op_code);
+		}
+
+		if (!tgt_async_msg)
+			bnx2i_conn->ep->num_active_cmds--;
+
+		/* clear out in production version only, till beta keep opcode
+		 * field intact, will be helpful in debugging (context dump)
+		 * nopin->op_code = 0;
+		 */
+		qp->cqe_exp_seq_sn++;
+		if (qp->cqe_exp_seq_sn == (qp->cqe_size * 2 + 1))
+			qp->cqe_exp_seq_sn = ISCSI_INITIAL_SN;
+
+		if (qp->cq_cons_qe == qp->cq_last_qe) {
+			qp->cq_cons_qe = qp->cq_first_qe;
+			qp->cq_cons_idx = 0;
+		} else {
+			qp->cq_cons_qe++;
+			qp->cq_cons_idx++;
+		}
+	}
+	bnx2i_arm_cq_event_coalescing(bnx2i_conn->ep, CNIC_ARM_CQE);
+}
+
+/**
+ * bnx2i_fastpath_notification - process global event queue (KCQ)
+ * @hba:		adapter structure pointer
+ * @new_cqe_kcqe:	pointer to newly DMA'ed KCQE entry
+ *
+ * Fast path event notification handler, KCQ entry carries context id
+ *	of the connection that has 1 or more pending CQ entries
+ */
+static void bnx2i_fastpath_notification(struct bnx2i_hba *hba,
+					struct iscsi_kcqe *new_cqe_kcqe)
+{
+	struct bnx2i_conn *conn;
+	u32 iscsi_cid;
+
+	iscsi_cid = new_cqe_kcqe->iscsi_conn_id;
+	conn = bnx2i_get_conn_from_id(hba, iscsi_cid);
+
+	if (!conn) {
+		printk(KERN_ALERT "cid #%x not valid\n", iscsi_cid);
+		return;
+	}
+	if (!conn->ep) {
+		printk(KERN_ALERT "cid #%x - ep not bound\n", iscsi_cid);
+		return;
+	}
+
+	bnx2i_process_new_cqes(conn);
+}
+
+
+/**
+ * bnx2i_process_update_conn_cmpl - process iscsi conn update completion KCQE
+ * @hba:		adapter structure pointer
+ * @update_kcqe:	kcqe pointer
+ *
+ * CONN_UPDATE completion handler, this completes iSCSI connection FFP migration
+ */
+static void bnx2i_process_update_conn_cmpl(struct bnx2i_hba *hba,
+					   struct iscsi_kcqe *update_kcqe)
+{
+	struct bnx2i_conn *conn;
+	u32 iscsi_cid;
+
+	iscsi_cid = update_kcqe->iscsi_conn_id;
+	conn = bnx2i_get_conn_from_id(hba, iscsi_cid);
+
+	if (!conn) {
+		printk(KERN_ALERT "conn_update: cid %x not valid\n", iscsi_cid);
+		return;
+	}
+	if (!conn->ep) {
+		printk(KERN_ALERT "cid %x does not have ep bound\n", iscsi_cid);
+		return;
+	}
+
+	if (update_kcqe->completion_status) {
+		printk(KERN_ALERT "request failed cid %x\n", iscsi_cid);
+		conn->ep->state = EP_STATE_ULP_UPDATE_FAILED;
+	} else
+		conn->ep->state = EP_STATE_ULP_UPDATE_COMPL;
+
+	wake_up_interruptible(&conn->ep->ofld_wait);
+}
+
+
+/**
+ * bnx2i_recovery_que_add_conn - add connection to recovery queue
+ * @hba:		adapter structure pointer
+ * @bnx2i_conn:		iscsi connection
+ *
+ * Add connection to recovery queue and schedule adapter eh worker
+ */
+static void bnx2i_recovery_que_add_conn(struct bnx2i_hba *hba,
+					struct bnx2i_conn *bnx2i_conn)
+{
+	iscsi_conn_failure(bnx2i_conn->cls_conn->dd_data,
+			   ISCSI_ERR_CONN_FAILED);
+}
+
+
+/**
+ * bnx2i_process_tcp_error - process error notification on a given connection
+ *
+ * @hba: 		adapter structure pointer
+ * @tcp_err: 		tcp error kcqe pointer
+ *
+ * handles tcp level error notifications from FW.
+ */
+static void bnx2i_process_tcp_error(struct bnx2i_hba *hba,
+				    struct iscsi_kcqe *tcp_err)
+{
+	struct bnx2i_conn *bnx2i_conn;
+	u32 iscsi_cid;
+
+	iscsi_cid = tcp_err->iscsi_conn_id;
+	bnx2i_conn = bnx2i_get_conn_from_id(hba, iscsi_cid);
+
+	if (!bnx2i_conn) {
+		printk(KERN_ALERT "bnx2i - cid 0x%x not valid\n", iscsi_cid);
+		return;
+	}
+
+	printk(KERN_ALERT "bnx2i - cid 0x%x had TCP errors, error code 0x%x\n",
+			  iscsi_cid, tcp_err->completion_status);
+	bnx2i_recovery_que_add_conn(bnx2i_conn->hba, bnx2i_conn);
+}
+
+
+/**
+ * bnx2i_process_iscsi_error - process error notification on a given connection
+ * @hba:		adapter structure pointer
+ * @iscsi_err:		iscsi error kcqe pointer
+ *
+ * handles iscsi error notifications from the FW. Firmware based in initial
+ *	handshake classifies iscsi protocol / TCP rfc violation into either
+ *	warning or error indications. If indication is of "Error" type, driver
+ *	will initiate session recovery for that connection/session. For
+ *	"Warning" type indication, driver will put out a system log message
+ *	(there will be only one message for each type for the life of the
+ *	session, this is to avoid un-necessarily overloading the system)
+ */
+static void bnx2i_process_iscsi_error(struct bnx2i_hba *hba,
+				      struct iscsi_kcqe *iscsi_err)
+{
+	struct bnx2i_conn *bnx2i_conn;
+	u32 iscsi_cid;
+	char warn_notice[] = "iscsi_warning";
+	char error_notice[] = "iscsi_error";
+	char additional_notice[64];
+	char *message;
+	int need_recovery;
+	u64 err_mask64;
+
+	iscsi_cid = iscsi_err->iscsi_conn_id;
+	bnx2i_conn = bnx2i_get_conn_from_id(hba, iscsi_cid);
+	if (!bnx2i_conn) {
+		printk(KERN_ALERT "bnx2i - cid 0x%x not valid\n", iscsi_cid);
+		return;
+	}
+
+	err_mask64 = (0x1ULL << iscsi_err->completion_status);
+
+	if (err_mask64 & iscsi_error_mask) {
+		need_recovery = 0;
+		message = warn_notice;
+	} else {
+		need_recovery = 1;
+		message = error_notice;
+	}
+
+	switch (iscsi_err->completion_status) {
+	case ISCSI_KCQE_COMPLETION_STATUS_HDR_DIG_ERR:
+		strcpy(additional_notice, "hdr digest err");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_DATA_DIG_ERR:
+		strcpy(additional_notice, "data digest err");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_OPCODE:
+		strcpy(additional_notice, "wrong opcode rcvd");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_AHS_LEN:
+		strcpy(additional_notice, "AHS len > 0 rcvd");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_ITT:
+		strcpy(additional_notice, "invalid ITT rcvd");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_STATSN:
+		strcpy(additional_notice, "wrong StatSN rcvd");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_EXP_DATASN:
+		strcpy(additional_notice, "wrong DataSN rcvd");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_PEND_R2T:
+		strcpy(additional_notice, "pend R2T violation");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_0:
+		strcpy(additional_notice, "ERL0, UO");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_1:
+		strcpy(additional_notice, "ERL0, U1");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_2:
+		strcpy(additional_notice, "ERL0, U2");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_3:
+		strcpy(additional_notice, "ERL0, U3");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_4:
+		strcpy(additional_notice, "ERL0, U4");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_5:
+		strcpy(additional_notice, "ERL0, U5");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_6:
+		strcpy(additional_notice, "ERL0, U6");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_REMAIN_RCV_LEN:
+		strcpy(additional_notice, "invalid resi len");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_MAX_RCV_PDU_LEN:
+		strcpy(additional_notice, "MRDSL violation");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_F_BIT_ZERO:
+		strcpy(additional_notice, "F-bit not set");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_TTT_NOT_RSRV:
+		strcpy(additional_notice, "invalid TTT");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DATASN:
+		strcpy(additional_notice, "invalid DataSN");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_REMAIN_BURST_LEN:
+		strcpy(additional_notice, "burst len violation");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_BUFFER_OFF:
+		strcpy(additional_notice, "buf offset violation");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_LUN:
+		strcpy(additional_notice, "invalid LUN field");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_R2TSN:
+		strcpy(additional_notice, "invalid R2TSN field");
+		break;
+#define BNX2I_ERR_DESIRED_DATA_TRNS_LEN_0 	\
+	ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_0
+	case BNX2I_ERR_DESIRED_DATA_TRNS_LEN_0:
+		strcpy(additional_notice, "invalid cmd len1");
+		break;
+#define BNX2I_ERR_DESIRED_DATA_TRNS_LEN_1 	\
+	ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_1
+	case BNX2I_ERR_DESIRED_DATA_TRNS_LEN_1:
+		strcpy(additional_notice, "invalid cmd len2");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_PEND_R2T_EXCEED:
+		strcpy(additional_notice,
+		       "pend r2t exceeds MaxOutstandingR2T value");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_TTT_IS_RSRV:
+		strcpy(additional_notice, "TTT is rsvd");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_MAX_BURST_LEN:
+		strcpy(additional_notice, "MBL violation");
+		break;
+#define BNX2I_ERR_DATA_SEG_LEN_NOT_ZERO 	\
+	ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DATA_SEG_LEN_NOT_ZERO
+	case BNX2I_ERR_DATA_SEG_LEN_NOT_ZERO:
+		strcpy(additional_notice, "data seg len != 0");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_REJECT_PDU_LEN:
+		strcpy(additional_notice, "reject pdu len error");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_ASYNC_PDU_LEN:
+		strcpy(additional_notice, "async pdu len error");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_NOPIN_PDU_LEN:
+		strcpy(additional_notice, "nopin pdu len error");
+		break;
+#define BNX2_ERR_PEND_R2T_IN_CLEANUP			\
+	ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_PEND_R2T_IN_CLEANUP
+	case BNX2_ERR_PEND_R2T_IN_CLEANUP:
+		strcpy(additional_notice, "pend r2t in cleanup");
+		break;
+
+	case ISCI_KCQE_COMPLETION_STATUS_TCP_ERROR_IP_FRAGMENT:
+		strcpy(additional_notice, "IP fragments rcvd");
+		break;
+	case ISCI_KCQE_COMPLETION_STATUS_TCP_ERROR_IP_OPTIONS:
+		strcpy(additional_notice, "IP options error");
+		break;
+	case ISCI_KCQE_COMPLETION_STATUS_TCP_ERROR_URGENT_FLAG:
+		strcpy(additional_notice, "urgent flag error");
+		break;
+	default:
+		printk(KERN_ALERT "iscsi_err - unknown err %x\n",
+				  iscsi_err->completion_status);
+	}
+
+	if (need_recovery) {
+		iscsi_conn_printk(KERN_ALERT,
+				  bnx2i_conn->cls_conn->dd_data,
+				  "bnx2i: %s - %s\n",
+				  message, additional_notice);
+
+		iscsi_conn_printk(KERN_ALERT,
+				  bnx2i_conn->cls_conn->dd_data,
+				  "conn_err - hostno %d conn %p, "
+				  "iscsi_cid %x cid %x\n",
+				  bnx2i_conn->hba->shost->host_no,
+				  bnx2i_conn, bnx2i_conn->ep->ep_iscsi_cid,
+				  bnx2i_conn->ep->ep_cid);
+		bnx2i_recovery_que_add_conn(bnx2i_conn->hba, bnx2i_conn);
+	} else
+		if (!test_and_set_bit(iscsi_err->completion_status,
+				      (void *) &bnx2i_conn->violation_notified))
+			iscsi_conn_printk(KERN_ALERT,
+					  bnx2i_conn->cls_conn->dd_data,
+					  "bnx2i: %s - %s\n",
+					  message, additional_notice);
+}
+
+
+/**
+ * bnx2i_process_conn_destroy_cmpl - process iscsi conn destroy completion
+ * @hba:		adapter structure pointer
+ * @conn_destroy:	conn destroy kcqe pointer
+ *
+ * handles connection destroy completion request.
+ */
+static void bnx2i_process_conn_destroy_cmpl(struct bnx2i_hba *hba,
+					    struct iscsi_kcqe *conn_destroy)
+{
+	struct bnx2i_endpoint *ep;
+
+	ep = bnx2i_find_ep_in_destroy_list(hba, conn_destroy->iscsi_conn_id);
+	if (!ep) {
+		printk(KERN_ALERT "bnx2i_conn_destroy_cmpl: no pending "
+				  "offload request, unexpected complection\n");
+		return;
+	}
+
+	if (hba != ep->hba) {
+		printk(KERN_ALERT "conn destroy- error hba mis-match\n");
+		return;
+	}
+
+	if (conn_destroy->completion_status) {
+		printk(KERN_ALERT "conn_destroy_cmpl: op failed\n");
+		ep->state = EP_STATE_CLEANUP_FAILED;
+	} else
+		ep->state = EP_STATE_CLEANUP_CMPL;
+	wake_up_interruptible(&ep->ofld_wait);
+}
+
+
+/**
+ * bnx2i_process_ofld_cmpl - process initial iscsi conn offload completion
+ * @hba:		adapter structure pointer
+ * @ofld_kcqe:		conn offload kcqe pointer
+ *
+ * handles initial connection offload completion, ep_connect() thread is
+ *	woken-up to continue with LLP connect process
+ */
+static void bnx2i_process_ofld_cmpl(struct bnx2i_hba *hba,
+				    struct iscsi_kcqe *ofld_kcqe)
+{
+	u32 cid_addr;
+	struct bnx2i_endpoint *ep;
+	u32 cid_num;
+
+	ep = bnx2i_find_ep_in_ofld_list(hba, ofld_kcqe->iscsi_conn_id);
+	if (!ep) {
+		printk(KERN_ALERT "ofld_cmpl: no pend offload request\n");
+		return;
+	}
+
+	if (hba != ep->hba) {
+		printk(KERN_ALERT "ofld_cmpl: error hba mis-match\n");
+		return;
+	}
+
+	if (ofld_kcqe->completion_status) {
+		if (ofld_kcqe->completion_status ==
+		    ISCSI_KCQE_COMPLETION_STATUS_CTX_ALLOC_FAILURE)
+			printk(KERN_ALERT "bnx2i: unable to allocate"
+					  " iSCSI context resources\n");
+		ep->state = EP_STATE_OFLD_FAILED;
+	} else {
+		ep->state = EP_STATE_OFLD_COMPL;
+		cid_addr = ofld_kcqe->iscsi_conn_context_id;
+		cid_num = bnx2i_get_cid_num(ep);
+		ep->ep_cid = cid_addr;
+		ep->qp.ctx_base = NULL;
+	}
+	wake_up_interruptible(&ep->ofld_wait);
+}
+
+/**
+ * bnx2i_indicate_kcqe - process iscsi conn update completion KCQE
+ * @hba:		adapter structure pointer
+ * @update_kcqe:	kcqe pointer
+ *
+ * Generic KCQ event handler/dispatcher
+ */
+static void bnx2i_indicate_kcqe(void *context, struct kcqe *kcqe[],
+				u32 num_cqe)
+{
+	struct bnx2i_hba *hba = context;
+	int i = 0;
+	struct iscsi_kcqe *ikcqe = NULL;
+
+	while (i < num_cqe) {
+		ikcqe = (struct iscsi_kcqe *) kcqe[i++];
+
+		if (ikcqe->op_code ==
+		    ISCSI_KCQE_OPCODE_CQ_EVENT_NOTIFICATION)
+			bnx2i_fastpath_notification(hba, ikcqe);
+		else if (ikcqe->op_code == ISCSI_KCQE_OPCODE_OFFLOAD_CONN)
+			bnx2i_process_ofld_cmpl(hba, ikcqe);
+		else if (ikcqe->op_code == ISCSI_KCQE_OPCODE_UPDATE_CONN)
+			bnx2i_process_update_conn_cmpl(hba, ikcqe);
+		else if (ikcqe->op_code == ISCSI_KCQE_OPCODE_INIT) {
+			if (ikcqe->completion_status !=
+			    ISCSI_KCQE_COMPLETION_STATUS_SUCCESS)
+				bnx2i_iscsi_license_error(hba, ikcqe->\
+							  completion_status);
+			else {
+				set_bit(ADAPTER_STATE_UP, &hba->adapter_state);
+				bnx2i_get_link_state(hba);
+				printk(KERN_INFO "bnx2i [%.2x:%.2x.%.2x]: "
+						 "ISCSI_INIT passed\n",
+						 (u8)hba->pcidev->bus->number,
+						 hba->pci_devno,
+						 (u8)hba->pci_func);
+
+
+			}
+		} else if (ikcqe->op_code == ISCSI_KCQE_OPCODE_DESTROY_CONN)
+			bnx2i_process_conn_destroy_cmpl(hba, ikcqe);
+		else if (ikcqe->op_code == ISCSI_KCQE_OPCODE_ISCSI_ERROR)
+			bnx2i_process_iscsi_error(hba, ikcqe);
+		else if (ikcqe->op_code == ISCSI_KCQE_OPCODE_TCP_ERROR)
+			bnx2i_process_tcp_error(hba, ikcqe);
+		else
+			printk(KERN_ALERT "bnx2i: unknown opcode 0x%x\n",
+					  ikcqe->op_code);
+	}
+}
+
+
+/**
+ * bnx2i_indicate_netevent - Generic netdev event handler
+ * @context:	adapter structure pointer
+ * @event:	event type
+ *
+ * Handles four netdev events, NETDEV_UP, NETDEV_DOWN,
+ *	NETDEV_GOING_DOWN and NETDEV_CHANGE
+ */
+static void bnx2i_indicate_netevent(void *context, unsigned long event)
+{
+	struct bnx2i_hba *hba = context;
+
+	switch (event) {
+	case NETDEV_UP:
+		if (!test_bit(ADAPTER_STATE_UP, &hba->adapter_state))
+			bnx2i_send_fw_iscsi_init_msg(hba);
+		break;
+	case NETDEV_DOWN:
+		clear_bit(ADAPTER_STATE_GOING_DOWN, &hba->adapter_state);
+		clear_bit(ADAPTER_STATE_UP, &hba->adapter_state);
+		break;
+	case NETDEV_GOING_DOWN:
+		set_bit(ADAPTER_STATE_GOING_DOWN, &hba->adapter_state);
+		iscsi_host_for_each_session(hba->shost,
+					    bnx2i_drop_session);
+		break;
+	case NETDEV_CHANGE:
+		bnx2i_get_link_state(hba);
+		break;
+	default:
+		;
+	}
+}
+
+
+/**
+ * bnx2i_cm_connect_cmpl - process iscsi conn establishment completion
+ * @cm_sk: 		cnic sock structure pointer
+ *
+ * function callback exported via bnx2i - cnic driver interface to
+ *	indicate completion of option-2 TCP connect request.
+ */
+static void bnx2i_cm_connect_cmpl(struct cnic_sock *cm_sk)
+{
+	struct bnx2i_endpoint *ep = (struct bnx2i_endpoint *) cm_sk->context;
+
+	if (test_bit(ADAPTER_STATE_GOING_DOWN, &ep->hba->adapter_state))
+		ep->state = EP_STATE_CONNECT_FAILED;
+	else if (test_bit(SK_F_OFFLD_COMPLETE, &cm_sk->flags))
+		ep->state = EP_STATE_CONNECT_COMPL;
+	else
+		ep->state = EP_STATE_CONNECT_FAILED;
+
+	wake_up_interruptible(&ep->ofld_wait);
+}
+
+
+/**
+ * bnx2i_cm_close_cmpl - process tcp conn close completion
+ * @cm_sk:	cnic sock structure pointer
+ *
+ * function callback exported via bnx2i - cnic driver interface to
+ *	indicate completion of option-2 graceful TCP connect shutdown
+ */
+static void bnx2i_cm_close_cmpl(struct cnic_sock *cm_sk)
+{
+	struct bnx2i_endpoint *ep = (struct bnx2i_endpoint *) cm_sk->context;
+
+	ep->state = EP_STATE_DISCONN_COMPL;
+	wake_up_interruptible(&ep->ofld_wait);
+}
+
+
+/**
+ * bnx2i_cm_abort_cmpl - process abortive tcp conn teardown completion
+ * @cm_sk:	cnic sock structure pointer
+ *
+ * function callback exported via bnx2i - cnic driver interface to
+ *	indicate completion of option-2 abortive TCP connect termination
+ */
+static void bnx2i_cm_abort_cmpl(struct cnic_sock *cm_sk)
+{
+	struct bnx2i_endpoint *ep = (struct bnx2i_endpoint *) cm_sk->context;
+
+	ep->state = EP_STATE_DISCONN_COMPL;
+	wake_up_interruptible(&ep->ofld_wait);
+}
+
+
+/**
+ * bnx2i_cm_remote_close - process received TCP FIN
+ * @hba:		adapter structure pointer
+ * @update_kcqe:	kcqe pointer
+ *
+ * function callback exported via bnx2i - cnic driver interface to indicate
+ *	async TCP events such as FIN
+ */
+static void bnx2i_cm_remote_close(struct cnic_sock *cm_sk)
+{
+	struct bnx2i_endpoint *ep = (struct bnx2i_endpoint *) cm_sk->context;
+
+	ep->state = EP_STATE_TCP_FIN_RCVD;
+	if (ep->conn)
+		bnx2i_recovery_que_add_conn(ep->hba, ep->conn);
+}
+
+/**
+ * bnx2i_cm_remote_abort - process TCP RST and start conn cleanup
+ * @hba:		adapter structure pointer
+ * @update_kcqe:	kcqe pointer
+ *
+ * function callback exported via bnx2i - cnic driver interface to
+ *	indicate async TCP events (RST) sent by the peer.
+ */
+static void bnx2i_cm_remote_abort(struct cnic_sock *cm_sk)
+{
+	struct bnx2i_endpoint *ep = (struct bnx2i_endpoint *) cm_sk->context;
+
+	ep->state = EP_STATE_TCP_RST_RCVD;
+	if (ep->conn)
+		bnx2i_recovery_que_add_conn(ep->hba, ep->conn);
+}
+
+
+static void bnx2i_send_nl_mesg(struct cnic_dev *dev, u32 msg_type,
+			       char *buf, u16 buflen)
+{
+	struct bnx2i_hba *hba;
+
+	hba = bnx2i_find_hba_for_cnic(dev);
+	if (!hba)
+		return;
+
+	if (iscsi_offload_mesg(hba->shost, &bnx2i_iscsi_transport,
+				   msg_type, buf, buflen))
+		printk(KERN_ALERT "bnx2i: private nl message send error\n");
+
+}
+
+
+/**
+ * bnx2i_cnic_cb - global template of bnx2i - cnic driver interface structure
+ *			carrying callback function pointers
+ *
+ */
+struct cnic_ulp_ops bnx2i_cnic_cb = {
+	.cnic_init = bnx2i_ulp_init,
+	.cnic_exit = bnx2i_ulp_exit,
+	.cnic_start = bnx2i_start,
+	.cnic_stop = bnx2i_stop,
+	.indicate_kcqes = bnx2i_indicate_kcqe,
+	.indicate_netevent = bnx2i_indicate_netevent,
+	.cm_connect_complete = bnx2i_cm_connect_cmpl,
+	.cm_close_complete = bnx2i_cm_close_cmpl,
+	.cm_abort_complete = bnx2i_cm_abort_cmpl,
+	.cm_remote_close = bnx2i_cm_remote_close,
+	.cm_remote_abort = bnx2i_cm_remote_abort,
+	.iscsi_nl_send_msg = bnx2i_send_nl_mesg,
+	.owner = THIS_MODULE
+};
+
+
+/**
+ * bnx2i_map_ep_dbell_regs - map connection doorbell registers
+ * @ep: bnx2i endpoint
+ *
+ * maps connection's SQ and RQ doorbell registers, 5706/5708/5709 hosts these
+ *	register in BAR #0. Whereas in 57710 these register are accessed by
+ *	mapping BAR #1
+ */
+int bnx2i_map_ep_dbell_regs(struct bnx2i_endpoint *ep)
+{
+	u32 cid_num;
+	u32 reg_off;
+	u32 first_l4l5;
+	u32 ctx_sz;
+	u32 config2;
+	resource_size_t reg_base;
+
+	cid_num = bnx2i_get_cid_num(ep);
+
+	if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type)) {
+		reg_base = pci_resource_start(ep->hba->pcidev,
+					      BNX2X_DOORBELL_PCI_BAR);
+		reg_off = PAGE_SIZE * (cid_num & 0x1FFFF) + DPM_TRIGER_TYPE;
+		ep->qp.ctx_base = ioremap_nocache(reg_base + reg_off, 4);
+		goto arm_cq;
+	}
+
+	reg_base = ep->hba->netdev->base_addr;
+	if ((test_bit(BNX2I_NX2_DEV_5709, &ep->hba->cnic_dev_type)) &&
+	    (ep->hba->mail_queue_access == BNX2I_MQ_BIN_MODE)) {
+		config2 = REG_RD(ep->hba, BNX2_MQ_CONFIG2);
+		first_l4l5 = config2 & BNX2_MQ_CONFIG2_FIRST_L4L5;
+		ctx_sz = (config2 & BNX2_MQ_CONFIG2_CONT_SZ) >> 3;
+		if (ctx_sz)
+			reg_off = CTX_OFFSET + MAX_CID_CNT * MB_KERNEL_CTX_SIZE
+				  + PAGE_SIZE *
+				  (((cid_num - first_l4l5) / ctx_sz) + 256);
+		else
+			reg_off = CTX_OFFSET + (MB_KERNEL_CTX_SIZE * cid_num);
+	} else
+		/* 5709 device in normal node and 5706/5708 devices */
+		reg_off = CTX_OFFSET + (MB_KERNEL_CTX_SIZE * cid_num);
+
+	ep->qp.ctx_base = ioremap_nocache(reg_base + reg_off,
+					  MB_KERNEL_CTX_SIZE);
+	if (!ep->qp.ctx_base)
+		return -ENOMEM;
+
+arm_cq:
+	bnx2i_arm_cq_event_coalescing(ep, CNIC_ARM_CQE);
+	return 0;
+}
diff --git a/drivers/scsi/bnx2i/bnx2i_init.c b/drivers/scsi/bnx2i/bnx2i_init.c
new file mode 100644
index 0000000..ae4b2d5
--- /dev/null
+++ b/drivers/scsi/bnx2i/bnx2i_init.c
@@ -0,0 +1,438 @@
+/* bnx2i.c: Broadcom NetXtreme II iSCSI driver.
+ *
+ * Copyright (c) 2006 - 2009 Broadcom Corporation
+ * Copyright (c) 2007, 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (c) 2007, 2008 Mike Christie
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Anil Veerabhadrappa (anilgv@broadcom.com)
+ */
+
+#include "bnx2i.h"
+
+static struct list_head adapter_list = LIST_HEAD_INIT(adapter_list);
+static u32 adapter_count;
+static int bnx2i_reg_device;
+
+#define DRV_MODULE_NAME		"bnx2i"
+#define DRV_MODULE_VERSION	"2.0.1d"
+#define DRV_MODULE_RELDATE	"Mar 25, 2009"
+
+static char version[] __devinitdata =
+		"Broadcom NetXtreme II iSCSI Driver " DRV_MODULE_NAME \
+		" v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+
+
+MODULE_AUTHOR("Anil Veerabhadrappa <anilgv@broadcom.com>");
+MODULE_DESCRIPTION("Broadcom NetXtreme II BCM5706/5708/5709 iSCSI Driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+static DEFINE_RWLOCK(bnx2i_dev_lock);
+
+unsigned int event_coal_div = 1;
+module_param(event_coal_div, int, 0664);
+MODULE_PARM_DESC(event_coal_div, "Event Coalescing Divide Factor");
+
+unsigned int en_tcp_dack = 1;
+module_param(en_tcp_dack, int, 0664);
+MODULE_PARM_DESC(en_tcp_dack, "Enable TCP Delayed ACK");
+
+unsigned int error_mask1 = 0x00;
+module_param(error_mask1, int, 0664);
+MODULE_PARM_DESC(error_mask1, "Config FW iSCSI Error Mask #1");
+
+unsigned int error_mask2 = 0x00;
+module_param(error_mask2, int, 0664);
+MODULE_PARM_DESC(error_mask2, "Config FW iSCSI Error Mask #2");
+
+unsigned int sq_size;
+module_param(sq_size, int, 0664);
+MODULE_PARM_DESC(sq_size, "Configure SQ size");
+
+unsigned int rq_size = BNX2I_RQ_WQES_DEFAULT;
+module_param(rq_size, int, 0664);
+MODULE_PARM_DESC(rq_size, "Configure RQ size");
+
+u64 iscsi_error_mask = 0x00;
+
+static void bnx2i_unreg_one_device(struct bnx2i_hba *hba) ;
+
+
+/**
+ * bnx2i_identify_device - identifies NetXtreme II device type
+ * @hba: 		Adapter structure pointer
+ *
+ * This function identifies the NX2 device type and sets appropriate
+ *	queue mailbox register access method, 5709 requires driver to
+ *	access MBOX regs using *bin* mode
+ */
+void bnx2i_identify_device(struct bnx2i_hba *hba)
+{
+	hba->cnic_dev_type = 0;
+	if ((hba->pci_did == PCI_DEVICE_ID_NX2_5706) ||
+	    (hba->pci_did == PCI_DEVICE_ID_NX2_5706S))
+		set_bit(BNX2I_NX2_DEV_5706, &hba->cnic_dev_type);
+	else if ((hba->pci_did == PCI_DEVICE_ID_NX2_5708) ||
+	    (hba->pci_did == PCI_DEVICE_ID_NX2_5708S))
+		set_bit(BNX2I_NX2_DEV_5708, &hba->cnic_dev_type);
+	else if ((hba->pci_did == PCI_DEVICE_ID_NX2_5709) ||
+	    (hba->pci_did == PCI_DEVICE_ID_NX2_5709S)) {
+		set_bit(BNX2I_NX2_DEV_5709, &hba->cnic_dev_type);
+		hba->mail_queue_access = BNX2I_MQ_BIN_MODE;
+	} else if (hba->pci_did == PCI_DEVICE_ID_NX2_57710 ||
+		   hba->pci_did == PCI_DEVICE_ID_NX2_57711)
+		set_bit(BNX2I_NX2_DEV_57710, &hba->cnic_dev_type);
+}
+
+
+/**
+ * get_adapter_list_head - returns head of adapter list
+ */
+struct bnx2i_hba *get_adapter_list_head(void)
+{
+	struct bnx2i_hba *hba = NULL;
+	struct bnx2i_hba *tmp_hba;
+
+	if (!adapter_count)
+		goto hba_not_found;
+
+	read_lock(&bnx2i_dev_lock);
+	list_for_each_entry(tmp_hba, &adapter_list, link) {
+		if (tmp_hba->cnic && tmp_hba->cnic->cm_select_dev) {
+			hba = tmp_hba;
+			break;
+		}
+	}
+	read_unlock(&bnx2i_dev_lock);
+hba_not_found:
+	return hba;
+}
+
+
+/**
+ * bnx2i_find_hba_for_cnic - maps cnic device instance to bnx2i adapter instance
+ * @cnic:	pointer to cnic device instance
+ *
+ */
+struct bnx2i_hba *bnx2i_find_hba_for_cnic(struct cnic_dev *cnic)
+{
+	struct bnx2i_hba *hba, *temp;
+
+	read_lock(&bnx2i_dev_lock);
+	list_for_each_entry_safe(hba, temp, &adapter_list, link) {
+		if (hba->cnic == cnic) {
+			read_unlock(&bnx2i_dev_lock);
+			return hba;
+		}
+	}
+	read_unlock(&bnx2i_dev_lock);
+	return NULL;
+}
+
+
+/**
+ * bnx2i_start - cnic callback to initialize & start adapter instance
+ * @handle:	transparent handle pointing to adapter structure
+ *
+ * This function maps adapter structure to pcidev structure and initiates
+ *	firmware handshake to enable/initialize on chip iscsi components
+ * 	This bnx2i - cnic interface api callback is issued after following
+ *	2 conditions are met -
+ *	  a) underlying network interface is up (marked by event 'NETDEV_UP'
+ *		from netdev
+ *	  b) bnx2i adapter instance is registered
+ */
+void bnx2i_start(void *handle)
+{
+#define BNX2I_INIT_POLL_TIME	(1000 / HZ)
+	struct bnx2i_hba *hba = handle;
+	int i = HZ;
+
+	bnx2i_send_fw_iscsi_init_msg(hba);
+	while (!test_bit(ADAPTER_STATE_UP, &hba->adapter_state) && i--)
+		msleep(BNX2I_INIT_POLL_TIME);
+}
+
+
+/**
+ * bnx2i_stop - cnic callback to shutdown adapter instance
+ * @handle:	transparent handle pointing to adapter structure
+ *
+ * driver checks if adapter is already in shutdown mode, if not start
+ *	the shutdown process
+ */
+void bnx2i_stop(void *handle)
+{
+	struct bnx2i_hba *hba = handle;
+
+	/* check if cleanup happened in GOING_DOWN context */
+	clear_bit(ADAPTER_STATE_UP, &hba->adapter_state);
+	if (!test_and_clear_bit(ADAPTER_STATE_GOING_DOWN,
+				&hba->adapter_state))
+		iscsi_host_for_each_session(hba->shost,
+					    bnx2i_drop_session);
+}
+
+/**
+ * bnx2i_register_device - register bnx2i adapter instance with the cnic driver
+ * @hba:	Adapter instance to register
+ *
+ * registers bnx2i adapter instance with the cnic driver while holding the
+ *	adapter structure lock
+ */
+void bnx2i_register_device(struct bnx2i_hba *hba)
+{
+	if (test_bit(ADAPTER_STATE_GOING_DOWN, &hba->adapter_state) ||
+	    test_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic)) {
+		return;
+	}
+
+	hba->cnic->register_device(hba->cnic, CNIC_ULP_ISCSI, hba);
+
+	spin_lock(&hba->lock);
+	bnx2i_reg_device++;
+	spin_unlock(&hba->lock);
+
+	set_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic);
+}
+
+
+/**
+ * bnx2i_reg_dev_all - registers all adapter instances with the cnic driver
+ *
+ * registers all bnx2i adapter instances with the cnic driver while holding
+ *	the global resource lock
+ */
+void bnx2i_reg_dev_all(void)
+{
+	struct bnx2i_hba *hba, *temp;
+
+	read_lock(&bnx2i_dev_lock);
+	list_for_each_entry_safe(hba, temp, &adapter_list, link)
+		bnx2i_register_device(hba);
+	read_unlock(&bnx2i_dev_lock);
+}
+
+
+/**
+ * bnx2i_unreg_one_device - unregister adapter instance with the cnic driver
+ * @hba:	Adapter instance to unregister
+ *
+ * registers bnx2i adapter instance with the cnic driver while holding
+ *	the adapter structure lock
+ */
+static void bnx2i_unreg_one_device(struct bnx2i_hba *hba)
+{
+	if (hba->ofld_conns_active ||
+	    !test_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic) ||
+	    test_bit(ADAPTER_STATE_GOING_DOWN, &hba->adapter_state))
+		return;
+
+	hba->cnic->unregister_device(hba->cnic, CNIC_ULP_ISCSI);
+
+	spin_lock(&hba->lock);
+	bnx2i_reg_device--;
+	spin_unlock(&hba->lock);
+
+	/* ep_disconnect could come before NETDEV_DOWN, driver won't
+	 * see NETDEV_DOWN as it already unregistered itself.
+	 */
+	hba->adapter_state = 0;
+	clear_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic);
+}
+
+/**
+ * bnx2i_unreg_dev_all - unregisters all bnx2i instances with the cnic driver
+ *
+ * unregisters all bnx2i adapter instances with the cnic driver while holding
+ *	the global resource lock
+ */
+void bnx2i_unreg_dev_all(void)
+{
+	struct bnx2i_hba *hba, *temp;
+
+	read_lock(&bnx2i_dev_lock);
+	list_for_each_entry_safe(hba, temp, &adapter_list, link)
+		bnx2i_unreg_one_device(hba);
+	read_unlock(&bnx2i_dev_lock);
+}
+
+
+/**
+ * bnx2i_init_one - initialize an adapter instance and allocate memory resources
+ * @hba:	bnx2i adapter instance
+ * @cnic:	cnic device handle
+ *
+ * Global resource lock and host adapter lock is held during critical sections
+ *	below. This routine is called from cnic_register_driver() context and
+ *	work horse thread which does majority of device specific initialization
+ */
+static int bnx2i_init_one(struct bnx2i_hba *hba, struct cnic_dev *cnic)
+{
+	int rc;
+
+	read_lock(&bnx2i_dev_lock);
+	if (bnx2i_reg_device &&
+	    !test_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic)) {
+		rc = cnic->register_device(cnic, CNIC_ULP_ISCSI, hba);
+		if (rc)		/* duplicate registration */
+			printk(KERN_ERR "bnx2i- dev reg failed\n");
+
+		spin_lock(&hba->lock);
+		bnx2i_reg_device++;
+		hba->age++;
+		spin_unlock(&hba->lock);
+
+		set_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic);
+	}
+	read_unlock(&bnx2i_dev_lock);
+
+	write_lock(&bnx2i_dev_lock);
+	list_add_tail(&hba->link, &adapter_list);
+	adapter_count++;
+	write_unlock(&bnx2i_dev_lock);
+	return 0;
+}
+
+
+/**
+ * bnx2i_ulp_init - initialize an adapter instance
+ * @dev:	cnic device handle
+ *
+ * Called from cnic_register_driver() context to initialize all enumerated
+ *	cnic devices. This routine allocate adapter structure and other
+ *	device specific resources.
+ */
+void bnx2i_ulp_init(struct cnic_dev *dev)
+{
+	struct bnx2i_hba *hba;
+
+	/* Allocate a HBA structure for this device */
+	hba = bnx2i_alloc_hba(dev);
+	if (!hba) {
+		printk(KERN_ERR "bnx2i init: hba initialization failed\n");
+		return;
+	}
+
+	/* Get PCI related information and update hba struct members */
+	clear_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic);
+	if (bnx2i_init_one(hba, dev)) {
+		printk(KERN_ERR "bnx2i - hba %p init failed\n", hba);
+		bnx2i_free_hba(hba);
+	} else
+		hba->cnic = dev;
+}
+
+
+/**
+ * bnx2i_ulp_exit - shuts down adapter instance and frees all resources
+ * @dev:	cnic device handle
+ *
+ */
+void bnx2i_ulp_exit(struct cnic_dev *dev)
+{
+	struct bnx2i_hba *hba;
+
+	hba = bnx2i_find_hba_for_cnic(dev);
+	if (!hba) {
+		printk(KERN_INFO "bnx2i_ulp_exit: hba not "
+				 "found, dev 0x%p\n", dev);
+		return;
+	}
+	write_lock(&bnx2i_dev_lock);
+	list_del_init(&hba->link);
+	adapter_count--;
+
+	if (test_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic)) {
+		hba->cnic->unregister_device(hba->cnic, CNIC_ULP_ISCSI);
+		clear_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic);
+
+		spin_lock(&hba->lock);
+		bnx2i_reg_device--;
+		spin_unlock(&hba->lock);
+	}
+	write_unlock(&bnx2i_dev_lock);
+
+	bnx2i_free_hba(hba);
+}
+
+
+/**
+ * bnx2i_mod_init - module init entry point
+ *
+ * initialize any driver wide global data structures such as endpoint pool,
+ *	tcp port manager/queue, sysfs. finally driver will register itself
+ *	with the cnic module
+ */
+static int __init bnx2i_mod_init(void)
+{
+	int err;
+
+	printk(KERN_INFO "%s", version);
+
+	if (!is_power_of_2(sq_size))
+		sq_size = roundup_pow_of_two(sq_size);
+
+	bnx2i_scsi_xport_template =
+			iscsi_register_transport(&bnx2i_iscsi_transport);
+	if (!bnx2i_scsi_xport_template) {
+		printk(KERN_ERR "Could not register bnx2i transport.\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = cnic_register_driver(CNIC_ULP_ISCSI, &bnx2i_cnic_cb);
+	if (err) {
+		printk(KERN_ERR "Could not register bnx2i cnic driver.\n");
+		goto unreg_xport;
+	}
+
+	return 0;
+
+unreg_xport:
+	iscsi_unregister_transport(&bnx2i_iscsi_transport);
+out:
+	return err;
+}
+
+
+/**
+ * bnx2i_mod_exit - module cleanup/exit entry point
+ *
+ * Global resource lock and host adapter lock is held during critical sections
+ *	in this function. Driver will browse through the adapter list, cleans-up
+ *	each instance, unregisters iscsi transport name and finally driver will
+ *	unregister itself with the cnic module
+ */
+static void __exit bnx2i_mod_exit(void)
+{
+	struct bnx2i_hba *hba;
+
+	write_lock(&bnx2i_dev_lock);
+	while (!list_empty(&adapter_list)) {
+		hba = list_entry(adapter_list.next, struct bnx2i_hba, link);
+		list_del(&hba->link);
+		adapter_count--;
+
+		if (test_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic)) {
+			hba->cnic->unregister_device(hba->cnic, CNIC_ULP_ISCSI);
+			clear_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic);
+			bnx2i_reg_device--;
+		}
+
+		write_unlock(&bnx2i_dev_lock);
+		bnx2i_free_hba(hba);
+		write_lock(&bnx2i_dev_lock);
+	}
+	write_unlock(&bnx2i_dev_lock);
+
+	iscsi_unregister_transport(&bnx2i_iscsi_transport);
+	cnic_unregister_driver(CNIC_ULP_ISCSI);
+}
+
+module_init(bnx2i_mod_init);
+module_exit(bnx2i_mod_exit);
diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c
new file mode 100644
index 0000000..f741219
--- /dev/null
+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c
@@ -0,0 +1,2064 @@
+/*
+ * bnx2i_iscsi.c: Broadcom NetXtreme II iSCSI driver.
+ *
+ * Copyright (c) 2006 - 2009 Broadcom Corporation
+ * Copyright (c) 2007, 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (c) 2007, 2008 Mike Christie
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Anil Veerabhadrappa (anilgv@broadcom.com)
+ */
+
+#include <scsi/scsi_tcq.h>
+#include <scsi/libiscsi.h>
+#include "bnx2i.h"
+
+struct scsi_transport_template *bnx2i_scsi_xport_template;
+struct iscsi_transport bnx2i_iscsi_transport;
+static struct scsi_host_template bnx2i_host_template;
+
+/*
+ * Global endpoint resource info
+ */
+static DEFINE_SPINLOCK(bnx2i_resc_lock); /* protects global resources */
+
+
+static int bnx2i_adapter_ready(struct bnx2i_hba *hba)
+{
+	int retval = 0;
+
+	if (!hba || !test_bit(ADAPTER_STATE_UP, &hba->adapter_state) ||
+	    test_bit(ADAPTER_STATE_GOING_DOWN, &hba->adapter_state) ||
+	    test_bit(ADAPTER_STATE_LINK_DOWN, &hba->adapter_state))
+		retval = -EPERM;
+	return retval;
+}
+
+/**
+ * bnx2i_get_write_cmd_bd_idx - identifies various BD bookmarks
+ * @cmd:		iscsi cmd struct pointer
+ * @buf_off:		absolute buffer offset
+ * @start_bd_off:	u32 pointer to return the offset within the BD
+ *			indicated by 'start_bd_idx' on which 'buf_off' falls
+ * @start_bd_idx:	index of the BD on which 'buf_off' falls
+ *
+ * identifies & marks various bd info for scsi command's imm data,
+ * unsolicited data and the first solicited data seq.
+ */
+static void bnx2i_get_write_cmd_bd_idx(struct bnx2i_cmd *cmd, u32 buf_off,
+				       u32 *start_bd_off, u32 *start_bd_idx)
+{
+	struct iscsi_bd *bd_tbl = cmd->io_tbl.bd_tbl;
+	u32 cur_offset = 0;
+	u32 cur_bd_idx = 0;
+
+	if (buf_off) {
+		while (buf_off >= (cur_offset + bd_tbl->buffer_length)) {
+			cur_offset += bd_tbl->buffer_length;
+			cur_bd_idx++;
+			bd_tbl++;
+		}
+	}
+
+	*start_bd_off = buf_off - cur_offset;
+	*start_bd_idx = cur_bd_idx;
+}
+
+/**
+ * bnx2i_setup_write_cmd_bd_info - sets up BD various information
+ * @task:	transport layer's cmd struct pointer
+ *
+ * identifies & marks various bd info for scsi command's immediate data,
+ * unsolicited data and first solicited data seq which includes BD start
+ * index & BD buf off. his function takes into account iscsi parameter such
+ * as immediate data and unsolicited data is support on this connection.
+ */
+static void bnx2i_setup_write_cmd_bd_info(struct iscsi_task *task)
+{
+	struct bnx2i_cmd *cmd = task->dd_data;
+	u32 start_bd_offset;
+	u32 start_bd_idx;
+	u32 buffer_offset = 0;
+	u32 cmd_len = cmd->req.total_data_transfer_length;
+
+	/* if ImmediateData is turned off & IntialR2T is turned on,
+	 * there will be no immediate or unsolicited data, just return.
+	 */
+	if (!iscsi_task_has_unsol_data(task) && !task->imm_count)
+		return;
+
+	/* Immediate data */
+	buffer_offset += task->imm_count;
+	if (task->imm_count == cmd_len)
+		return;
+
+	if (iscsi_task_has_unsol_data(task)) {
+		bnx2i_get_write_cmd_bd_idx(cmd, buffer_offset,
+					   &start_bd_offset, &start_bd_idx);
+		cmd->req.ud_buffer_offset = start_bd_offset;
+		cmd->req.ud_start_bd_index = start_bd_idx;
+		buffer_offset += task->unsol_r2t.data_length;
+	}
+
+	if (buffer_offset != cmd_len) {
+		bnx2i_get_write_cmd_bd_idx(cmd, buffer_offset,
+					   &start_bd_offset, &start_bd_idx);
+		if ((start_bd_offset > task->conn->session->first_burst) ||
+		    (start_bd_idx > scsi_sg_count(cmd->scsi_cmd))) {
+			int i = 0;
+
+			iscsi_conn_printk(KERN_ALERT, task->conn,
+					  "bnx2i- error, buf offset 0x%x "
+					  "bd_valid %d use_sg %d\n",
+					  buffer_offset, cmd->io_tbl.bd_valid,
+					  scsi_sg_count(cmd->scsi_cmd));
+			for (i = 0; i < cmd->io_tbl.bd_valid; i++)
+				iscsi_conn_printk(KERN_ALERT, task->conn,
+						  "bnx2i err, bd[%d]: len %x\n",
+						  i, cmd->io_tbl.bd_tbl[i].\
+						  buffer_length);
+		}
+		cmd->req.sd_buffer_offset = start_bd_offset;
+		cmd->req.sd_start_bd_index = start_bd_idx;
+	}
+}
+
+
+
+/**
+ * bnx2i_map_scsi_sg - maps IO buffer and prepares the BD table
+ * @hba:	adapter instance
+ * @cmd:	iscsi cmd struct pointer
+ *
+ * map SG list
+ */
+static int bnx2i_map_scsi_sg(struct bnx2i_hba *hba, struct bnx2i_cmd *cmd)
+{
+	struct scsi_cmnd *sc = cmd->scsi_cmd;
+	struct iscsi_bd *bd = cmd->io_tbl.bd_tbl;
+	struct scatterlist *sg;
+	int byte_count = 0;
+	int bd_count = 0;
+	int sg_count;
+	int sg_len;
+	u64 addr;
+	int i;
+
+	BUG_ON(scsi_sg_count(sc) > ISCSI_MAX_BDS_PER_CMD);
+
+	sg_count = scsi_dma_map(sc);
+
+	scsi_for_each_sg(sc, sg, sg_count, i) {
+		sg_len = sg_dma_len(sg);
+		addr = (u64) sg_dma_address(sg);
+		bd[bd_count].buffer_addr_lo = addr & 0xffffffff;
+		bd[bd_count].buffer_addr_hi = addr >> 32;
+		bd[bd_count].buffer_length = sg_len;
+		bd[bd_count].flags = 0;
+		if (bd_count == 0)
+			bd[bd_count].flags = ISCSI_BD_FIRST_IN_BD_CHAIN;
+
+		byte_count += sg_len;
+		bd_count++;
+	}
+
+	if (bd_count)
+		bd[bd_count - 1].flags |= ISCSI_BD_LAST_IN_BD_CHAIN;
+
+	BUG_ON(byte_count != scsi_bufflen(sc));
+	return bd_count;
+}
+
+/**
+ * bnx2i_iscsi_map_sg_list - maps SG list
+ * @cmd:	iscsi cmd struct pointer
+ *
+ * creates BD list table for the command
+ */
+static void bnx2i_iscsi_map_sg_list(struct bnx2i_cmd *cmd)
+{
+	int bd_count;
+
+	bd_count  = bnx2i_map_scsi_sg(cmd->conn->hba, cmd);
+	if (!bd_count) {
+		struct iscsi_bd *bd = cmd->io_tbl.bd_tbl;
+
+		bd[0].buffer_addr_lo = bd[0].buffer_addr_hi = 0;
+		bd[0].buffer_length = bd[0].flags = 0;
+	}
+	cmd->io_tbl.bd_valid = bd_count;
+}
+
+
+/**
+ * bnx2i_iscsi_unmap_sg_list - unmaps SG list
+ * @cmd:	iscsi cmd struct pointer
+ *
+ * unmap IO buffers and invalidate the BD table
+ */
+void bnx2i_iscsi_unmap_sg_list(struct bnx2i_cmd *cmd)
+{
+	struct scsi_cmnd *sc = cmd->scsi_cmd;
+
+	if (cmd->io_tbl.bd_valid && sc) {
+		scsi_dma_unmap(sc);
+		cmd->io_tbl.bd_valid = 0;
+	}
+}
+
+static void bnx2i_setup_cmd_wqe_template(struct bnx2i_cmd *cmd)
+{
+	memset(&cmd->req, 0x00, sizeof(cmd->req));
+	cmd->req.op_code = 0xFF;
+	cmd->req.bd_list_addr_lo = (u32) cmd->io_tbl.bd_tbl_dma;
+	cmd->req.bd_list_addr_hi =
+		(u32) ((u64) cmd->io_tbl.bd_tbl_dma >> 32);
+
+}
+
+
+/**
+ * bnx2i_bind_conn_to_iscsi_cid - bind conn structure to 'iscsi_cid'
+ * @hba:	pointer to adapter instance
+ * @conn:	pointer to iscsi connection
+ * @iscsi_cid:	iscsi context ID, range 0 - (MAX_CONN - 1)
+ *
+ * update iscsi cid table entry with connection pointer. This enables
+ *	driver to quickly get hold of connection structure pointer in
+ *	completion/interrupt thread using iscsi context ID
+ */
+static int bnx2i_bind_conn_to_iscsi_cid(struct bnx2i_hba *hba,
+					struct bnx2i_conn *bnx2i_conn,
+					u32 iscsi_cid)
+{
+	if (hba && hba->cid_que.conn_cid_tbl[iscsi_cid]) {
+		iscsi_conn_printk(KERN_ALERT, bnx2i_conn->cls_conn->dd_data,
+				 "conn bind - entry #%d not free\n", iscsi_cid);
+		return -EBUSY;
+	}
+
+	hba->cid_que.conn_cid_tbl[iscsi_cid] = bnx2i_conn;
+	return 0;
+}
+
+
+/**
+ * bnx2i_get_conn_from_id - maps an iscsi cid to corresponding conn ptr
+ * @hba:	pointer to adapter instance
+ * @iscsi_cid:	iscsi context ID, range 0 - (MAX_CONN - 1)
+ */
+struct bnx2i_conn *bnx2i_get_conn_from_id(struct bnx2i_hba *hba,
+					  u16 iscsi_cid)
+{
+	if (!hba->cid_que.conn_cid_tbl) {
+		printk(KERN_ERR "bnx2i: ERROR - missing conn<->cid table\n");
+		return NULL;
+
+	} else if (iscsi_cid >= hba->max_active_conns) {
+		printk(KERN_ERR "bnx2i: wrong cid #%d\n", iscsi_cid);
+		return NULL;
+	}
+	return hba->cid_que.conn_cid_tbl[iscsi_cid];
+}
+
+
+/**
+ * bnx2i_alloc_iscsi_cid - allocates a iscsi_cid from free pool
+ * @hba:	pointer to adapter instance
+ */
+static u32 bnx2i_alloc_iscsi_cid(struct bnx2i_hba *hba)
+{
+	int idx;
+
+	if (!hba->cid_que.cid_free_cnt)
+		return -1;
+
+	idx = hba->cid_que.cid_q_cons_idx;
+	hba->cid_que.cid_q_cons_idx++;
+	if (hba->cid_que.cid_q_cons_idx == hba->cid_que.cid_q_max_idx)
+		hba->cid_que.cid_q_cons_idx = 0;
+
+	hba->cid_que.cid_free_cnt--;
+	return hba->cid_que.cid_que[idx];
+}
+
+
+/**
+ * bnx2i_free_iscsi_cid - returns tcp port to free list
+ * @hba: 		pointer to adapter instance
+ * @iscsi_cid:		iscsi context ID to free
+ */
+static void bnx2i_free_iscsi_cid(struct bnx2i_hba *hba, u16 iscsi_cid)
+{
+	int idx;
+
+	if (iscsi_cid == (u16) -1)
+		return;
+
+	hba->cid_que.cid_free_cnt++;
+
+	idx = hba->cid_que.cid_q_prod_idx;
+	hba->cid_que.cid_que[idx] = iscsi_cid;
+	hba->cid_que.conn_cid_tbl[iscsi_cid] = NULL;
+	hba->cid_que.cid_q_prod_idx++;
+	if (hba->cid_que.cid_q_prod_idx == hba->cid_que.cid_q_max_idx)
+		hba->cid_que.cid_q_prod_idx = 0;
+}
+
+
+/**
+ * bnx2i_setup_free_cid_que - sets up free iscsi cid queue
+ * @hba:	pointer to adapter instance
+ *
+ * allocates memory for iscsi cid queue & 'cid - conn ptr' mapping table,
+ * 	and initialize table attributes
+ */
+static int bnx2i_setup_free_cid_que(struct bnx2i_hba *hba)
+{
+	int mem_size;
+	int i;
+
+	mem_size = hba->max_active_conns * sizeof(u32);
+	mem_size = (mem_size + (PAGE_SIZE - 1)) & PAGE_MASK;
+
+	hba->cid_que.cid_que_base = kmalloc(mem_size, GFP_KERNEL);
+	if (!hba->cid_que.cid_que_base)
+		return -ENOMEM;
+
+	mem_size = hba->max_active_conns * sizeof(struct bnx2i_conn *);
+	mem_size = (mem_size + (PAGE_SIZE - 1)) & PAGE_MASK;
+	hba->cid_que.conn_cid_tbl = kmalloc(mem_size, GFP_KERNEL);
+	if (!hba->cid_que.conn_cid_tbl) {
+		kfree(hba->cid_que.cid_que_base);
+		hba->cid_que.cid_que_base = NULL;
+		return -ENOMEM;
+	}
+
+	hba->cid_que.cid_que = (u32 *)hba->cid_que.cid_que_base;
+	hba->cid_que.cid_q_prod_idx = 0;
+	hba->cid_que.cid_q_cons_idx = 0;
+	hba->cid_que.cid_q_max_idx = hba->max_active_conns;
+	hba->cid_que.cid_free_cnt = hba->max_active_conns;
+
+	for (i = 0; i < hba->max_active_conns; i++) {
+		hba->cid_que.cid_que[i] = i;
+		hba->cid_que.conn_cid_tbl[i] = NULL;
+	}
+	return 0;
+}
+
+
+/**
+ * bnx2i_release_free_cid_que - releases 'iscsi_cid' queue resources
+ * @hba:	pointer to adapter instance
+ */
+static void bnx2i_release_free_cid_que(struct bnx2i_hba *hba)
+{
+	kfree(hba->cid_que.cid_que_base);
+	hba->cid_que.cid_que_base = NULL;
+
+	kfree(hba->cid_que.conn_cid_tbl);
+	hba->cid_que.conn_cid_tbl = NULL;
+}
+
+
+/**
+ * bnx2i_alloc_ep - allocates ep structure from global pool
+ * @hba:	pointer to adapter instance
+ *
+ * routine allocates a free endpoint structure from global pool and
+ *	a tcp port to be used for this connection.  Global resource lock,
+ *	'bnx2i_resc_lock' is held while accessing shared global data structures
+ */
+static struct iscsi_endpoint *bnx2i_alloc_ep(struct bnx2i_hba *hba)
+{
+	struct iscsi_endpoint *ep;
+	struct bnx2i_endpoint *bnx2i_ep;
+
+	ep = iscsi_create_endpoint(sizeof(*bnx2i_ep));
+	if (!ep) {
+		printk(KERN_ERR "bnx2i: Could not allocate ep\n");
+		return NULL;
+	}
+
+	bnx2i_ep = ep->dd_data;
+	INIT_LIST_HEAD(&bnx2i_ep->link);
+	bnx2i_ep->state = EP_STATE_IDLE;
+	bnx2i_ep->hba = hba;
+	bnx2i_ep->hba_age = hba->age;
+	hba->ofld_conns_active++;
+	init_waitqueue_head(&bnx2i_ep->ofld_wait);
+	return ep;
+}
+
+
+/**
+ * bnx2i_free_ep - free endpoint
+ * @ep:		pointer to iscsi endpoint structure
+ */
+static void bnx2i_free_ep(struct iscsi_endpoint *ep)
+{
+	struct bnx2i_endpoint *bnx2i_ep = ep->dd_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bnx2i_resc_lock, flags);
+	bnx2i_ep->state = EP_STATE_IDLE;
+	bnx2i_ep->hba->ofld_conns_active--;
+
+	bnx2i_free_iscsi_cid(bnx2i_ep->hba, bnx2i_ep->ep_iscsi_cid);
+	if (bnx2i_ep->conn) {
+		bnx2i_ep->conn->ep = NULL;
+		bnx2i_ep->conn = NULL;
+	}
+
+	bnx2i_ep->hba = NULL;
+	spin_unlock_irqrestore(&bnx2i_resc_lock, flags);
+	iscsi_destroy_endpoint(ep);
+}
+
+
+/**
+ * bnx2i_alloc_bdt - allocates buffer descriptor (BD) table for the command
+ * @hba:	adapter instance pointer
+ * @session:	iscsi session pointer
+ * @cmd:	iscsi command structure
+ */
+static int bnx2i_alloc_bdt(struct bnx2i_hba *hba, struct iscsi_session *session,
+			   struct bnx2i_cmd *cmd)
+{
+	struct io_bdt *io = &cmd->io_tbl;
+	struct iscsi_bd *bd;
+
+	io->bd_tbl = dma_alloc_coherent(&hba->pcidev->dev,
+					ISCSI_MAX_BDS_PER_CMD * sizeof(*bd),
+					&io->bd_tbl_dma, GFP_KERNEL);
+	if (!io->bd_tbl) {
+		iscsi_session_printk(KERN_ERR, session, "Could not "
+				     "allocate bdt.\n");
+		return -ENOMEM;
+	}
+	io->bd_valid = 0;
+	return 0;
+}
+
+/**
+ * bnx2i_destroy_cmd_pool - destroys iscsi command pool and release BD table
+ * @hba:	adapter instance pointer
+ * @session:	iscsi session pointer
+ * @cmd:	iscsi command structure
+ */
+static void bnx2i_destroy_cmd_pool(struct bnx2i_hba *hba,
+				   struct iscsi_session *session)
+{
+	int i;
+
+	for (i = 0; i < session->cmds_max; i++) {
+		struct iscsi_task *task = session->cmds[i];
+		struct bnx2i_cmd *cmd = task->dd_data;
+
+		if (cmd->io_tbl.bd_tbl)
+			dma_free_coherent(&hba->pcidev->dev,
+					  ISCSI_MAX_BDS_PER_CMD *
+					  sizeof(struct iscsi_bd),
+					  cmd->io_tbl.bd_tbl,
+					  cmd->io_tbl.bd_tbl_dma);
+	}
+
+}
+
+
+/**
+ * bnx2i_setup_cmd_pool - sets up iscsi command pool for the session
+ * @hba:	adapter instance pointer
+ * @session:	iscsi session pointer
+ */
+static int bnx2i_setup_cmd_pool(struct bnx2i_hba *hba,
+				struct iscsi_session *session)
+{
+	int i;
+
+	for (i = 0; i < session->cmds_max; i++) {
+		struct iscsi_task *task = session->cmds[i];
+		struct bnx2i_cmd *cmd = task->dd_data;
+
+		/* Anil */
+		task->hdr = &cmd->hdr;
+		task->hdr_max = sizeof(struct iscsi_hdr);
+
+		if (bnx2i_alloc_bdt(hba, session, cmd))
+			goto free_bdts;
+	}
+
+	return 0;
+
+free_bdts:
+	bnx2i_destroy_cmd_pool(hba, session);
+	return -ENOMEM;
+}
+
+
+/**
+ * bnx2i_setup_mp_bdt - allocate BD table resources
+ * @hba:	pointer to adapter structure
+ *
+ * Allocate memory for dummy buffer and associated BD
+ * table to be used by middle path (MP) requests
+ */
+static int bnx2i_setup_mp_bdt(struct bnx2i_hba *hba)
+{
+	int rc = 0;
+	struct iscsi_bd *mp_bdt;
+	u64 addr;
+
+	hba->mp_bd_tbl = dma_alloc_coherent(&hba->pcidev->dev, PAGE_SIZE,
+					    &hba->mp_bd_dma, GFP_KERNEL);
+	if (!hba->mp_bd_tbl) {
+		printk(KERN_ERR "unable to allocate Middle Path BDT\n");
+		rc = -1;
+		goto out;
+	}
+
+	hba->dummy_buffer = dma_alloc_coherent(&hba->pcidev->dev, PAGE_SIZE,
+					       &hba->dummy_buf_dma, GFP_KERNEL);
+	if (!hba->dummy_buffer) {
+		printk(KERN_ERR "unable to alloc Middle Path Dummy Buffer\n");
+		dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE,
+				  hba->mp_bd_tbl, hba->mp_bd_dma);
+		hba->mp_bd_tbl = NULL;
+		rc = -1;
+		goto out;
+	}
+
+	mp_bdt = (struct iscsi_bd *) hba->mp_bd_tbl;
+	addr = (unsigned long) hba->dummy_buf_dma;
+	mp_bdt->buffer_addr_lo = addr & 0xffffffff;
+	mp_bdt->buffer_addr_hi = addr >> 32;
+	mp_bdt->buffer_length = PAGE_SIZE;
+	mp_bdt->flags = ISCSI_BD_LAST_IN_BD_CHAIN |
+			ISCSI_BD_FIRST_IN_BD_CHAIN;
+out:
+	return rc;
+}
+
+
+/**
+ * bnx2i_free_mp_bdt - releases ITT back to free pool
+ * @hba:	pointer to adapter instance
+ *
+ * free MP dummy buffer and associated BD table
+ */
+static void bnx2i_free_mp_bdt(struct bnx2i_hba *hba)
+{
+	if (hba->mp_bd_tbl) {
+		dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE,
+				  hba->mp_bd_tbl, hba->mp_bd_dma);
+		hba->mp_bd_tbl = NULL;
+	}
+	if (hba->dummy_buffer) {
+		dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE,
+				  hba->dummy_buffer, hba->dummy_buf_dma);
+		hba->dummy_buffer = NULL;
+	}
+		return;
+}
+
+/**
+ * bnx2i_drop_session - notifies iscsid of connection error.
+ * @hba:	adapter instance pointer
+ * @session:	iscsi session pointer
+ *
+ * This notifies iscsid that there is a error, so it can initiate
+ * recovery.
+ *
+ * This relies on caller using the iscsi class iterator so the object
+ * is refcounted and does not disapper from under us.
+ */
+void bnx2i_drop_session(struct iscsi_cls_session *cls_session)
+{
+	iscsi_session_failure(cls_session->dd_data, ISCSI_ERR_CONN_FAILED);
+}
+
+/**
+ * bnx2i_ep_destroy_list_add - add an entry to EP destroy list
+ * @hba:	pointer to adapter instance
+ * @ep:		pointer to endpoint (transport indentifier) structure
+ *
+ * EP destroy queue manager
+ */
+static int bnx2i_ep_destroy_list_add(struct bnx2i_hba *hba,
+				     struct bnx2i_endpoint *ep)
+{
+	write_lock_bh(&hba->ep_rdwr_lock);
+	list_add_tail(&ep->link, &hba->ep_destroy_list);
+	write_unlock_bh(&hba->ep_rdwr_lock);
+	return 0;
+}
+
+/**
+ * bnx2i_ep_destroy_list_del - add an entry to EP destroy list
+ *
+ * @hba: 		pointer to adapter instance
+ * @ep: 		pointer to endpoint (transport indentifier) structure
+ *
+ * EP destroy queue manager
+ */
+static int bnx2i_ep_destroy_list_del(struct bnx2i_hba *hba,
+				     struct bnx2i_endpoint *ep)
+{
+	write_lock_bh(&hba->ep_rdwr_lock);
+	list_del_init(&ep->link);
+	write_unlock_bh(&hba->ep_rdwr_lock);
+
+	return 0;
+}
+
+/**
+ * bnx2i_ep_ofld_list_add - add an entry to ep offload pending list
+ * @hba:	pointer to adapter instance
+ * @ep:		pointer to endpoint (transport indentifier) structure
+ *
+ * pending conn offload completion queue manager
+ */
+static int bnx2i_ep_ofld_list_add(struct bnx2i_hba *hba,
+				  struct bnx2i_endpoint *ep)
+{
+	write_lock_bh(&hba->ep_rdwr_lock);
+	list_add_tail(&ep->link, &hba->ep_ofld_list);
+	write_unlock_bh(&hba->ep_rdwr_lock);
+	return 0;
+}
+
+/**
+ * bnx2i_ep_ofld_list_del - add an entry to ep offload pending list
+ * @hba: 		pointer to adapter instance
+ * @ep: 		pointer to endpoint (transport indentifier) structure
+ *
+ * pending conn offload completion queue manager
+ */
+static int bnx2i_ep_ofld_list_del(struct bnx2i_hba *hba,
+				  struct bnx2i_endpoint *ep)
+{
+	write_lock_bh(&hba->ep_rdwr_lock);
+	list_del_init(&ep->link);
+	write_unlock_bh(&hba->ep_rdwr_lock);
+	return 0;
+}
+
+
+/**
+ * bnx2i_find_ep_in_ofld_list - find iscsi_cid in pending list of endpoints
+ *
+ * @hba: 		pointer to adapter instance
+ * @iscsi_cid:		iscsi context ID to find
+ *
+ */
+struct bnx2i_endpoint *
+bnx2i_find_ep_in_ofld_list(struct bnx2i_hba *hba, u32 iscsi_cid)
+{
+	struct list_head *list;
+	struct list_head *tmp;
+	struct bnx2i_endpoint *ep;
+
+	read_lock_bh(&hba->ep_rdwr_lock);
+	list_for_each_safe(list, tmp, &hba->ep_ofld_list) {
+		ep = (struct bnx2i_endpoint *)list;
+
+		if (ep->ep_iscsi_cid == iscsi_cid)
+			break;
+		ep = NULL;
+	}
+	read_unlock_bh(&hba->ep_rdwr_lock);
+
+	if (!ep)
+		printk(KERN_ERR "l5 cid %d not found\n", iscsi_cid);
+	return ep;
+}
+
+
+/**
+ * bnx2i_find_ep_in_destroy_list - find iscsi_cid in destroy list
+ * @hba: 		pointer to adapter instance
+ * @iscsi_cid:		iscsi context ID to find
+ *
+ */
+struct bnx2i_endpoint *
+bnx2i_find_ep_in_destroy_list(struct bnx2i_hba *hba, u32 iscsi_cid)
+{
+	struct list_head *list;
+	struct list_head *tmp;
+	struct bnx2i_endpoint *ep;
+
+	read_lock_bh(&hba->ep_rdwr_lock);
+	list_for_each_safe(list, tmp, &hba->ep_destroy_list) {
+		ep = (struct bnx2i_endpoint *)list;
+
+		if (ep->ep_iscsi_cid == iscsi_cid)
+			break;
+		ep = NULL;
+	}
+	read_unlock_bh(&hba->ep_rdwr_lock);
+
+	if (!ep)
+		printk(KERN_ERR "l5 cid %d not found\n", iscsi_cid);
+
+	return ep;
+}
+
+/**
+ * bnx2i_setup_host_queue_size - assigns shost->can_queue param
+ * @hba:	pointer to adapter instance
+ * @shost:	scsi host pointer
+ *
+ * Initializes 'can_queue' parameter based on how many outstanding commands
+ * 	the device can handle. Each device 5708/5709/57710 has different
+ *	capabilities
+ */
+static void bnx2i_setup_host_queue_size(struct bnx2i_hba *hba,
+					struct Scsi_Host *shost)
+{
+	if (test_bit(BNX2I_NX2_DEV_5708, &hba->cnic_dev_type))
+		shost->can_queue = ISCSI_MAX_CMDS_PER_HBA_5708;
+	else if (test_bit(BNX2I_NX2_DEV_5709, &hba->cnic_dev_type))
+		shost->can_queue = ISCSI_MAX_CMDS_PER_HBA_5709;
+	else if (test_bit(BNX2I_NX2_DEV_57710, &hba->cnic_dev_type))
+		shost->can_queue = ISCSI_MAX_CMDS_PER_HBA_57710;
+	else
+		shost->can_queue = ISCSI_MAX_CMDS_PER_HBA_5708;
+}
+
+
+/**
+ * bnx2i_alloc_hba - allocate and init adapter instance
+ * @cnic:	cnic device pointer
+ *
+ * allocate & initialize adapter structure and call other
+ *	support routines to do per adapter initialization
+ */
+struct bnx2i_hba *bnx2i_alloc_hba(struct cnic_dev *cnic)
+{
+	struct Scsi_Host *shost;
+	struct bnx2i_hba *hba;
+
+	shost = iscsi_host_alloc(&bnx2i_host_template, sizeof(*hba), 0);
+	if (!shost)
+		return NULL;
+	shost->dma_boundary = cnic->pcidev->dma_mask;
+	shost->transportt = bnx2i_scsi_xport_template;
+	shost->max_id = ISCSI_MAX_CONNS_PER_HBA;
+	shost->max_channel = 0;
+	shost->max_lun = 512;
+	shost->max_cmd_len = 16;
+
+	hba = iscsi_host_priv(shost);
+	hba->shost = shost;
+	hba->netdev = cnic->netdev;
+	/* Get PCI related information and update hba struct members */
+	hba->pcidev = cnic->pcidev;
+	pci_dev_get(hba->pcidev);
+	hba->pci_did = hba->pcidev->device;
+	hba->pci_vid = hba->pcidev->vendor;
+	hba->pci_sdid = hba->pcidev->subsystem_device;
+	hba->pci_svid = hba->pcidev->subsystem_vendor;
+	hba->pci_func = PCI_FUNC(hba->pcidev->devfn);
+	hba->pci_devno = PCI_SLOT(hba->pcidev->devfn);
+	bnx2i_identify_device(hba);
+
+	bnx2i_identify_device(hba);
+	bnx2i_setup_host_queue_size(hba, shost);
+
+	if (test_bit(BNX2I_NX2_DEV_5709, &hba->cnic_dev_type)) {
+		hba->regview = ioremap_nocache(hba->netdev->base_addr,
+					       BNX2_MQ_CONFIG2);
+		if (!hba->regview)
+			goto ioreg_map_err;
+	} else if (test_bit(BNX2I_NX2_DEV_57710, &hba->cnic_dev_type)) {
+		hba->regview = ioremap_nocache(hba->netdev->base_addr, 4096);
+		if (!hba->regview)
+			goto ioreg_map_err;
+	}
+
+	if (bnx2i_setup_mp_bdt(hba))
+		goto mp_bdt_mem_err;
+
+	INIT_LIST_HEAD(&hba->ep_ofld_list);
+	INIT_LIST_HEAD(&hba->ep_destroy_list);
+	rwlock_init(&hba->ep_rdwr_lock);
+
+	hba->mtu_supported = BNX2I_MAX_MTU_SUPPORTED;
+
+	/* different values for 5708/5709/57710 */
+	hba->max_active_conns = ISCSI_MAX_CONNS_PER_HBA;
+
+	if (bnx2i_setup_free_cid_que(hba))
+		goto cid_que_err;
+
+	/* SQ/RQ/CQ size can be changed via sysfx interface */
+	if (test_bit(BNX2I_NX2_DEV_57710, &hba->cnic_dev_type)) {
+		if (sq_size && sq_size <= BNX2I_5770X_SQ_WQES_MAX)
+			hba->max_sqes = sq_size;
+		else
+			hba->max_sqes = BNX2I_5770X_SQ_WQES_DEFAULT;
+	} else {	/* 5706/5708/5709 */
+		if (sq_size && sq_size <= BNX2I_570X_SQ_WQES_MAX)
+			hba->max_sqes = sq_size;
+		else
+			hba->max_sqes = BNX2I_570X_SQ_WQES_DEFAULT;
+	}
+
+	hba->max_rqes = rq_size;
+	hba->max_cqes = hba->max_sqes + rq_size;
+	if (test_bit(BNX2I_NX2_DEV_57710, &hba->cnic_dev_type)) {
+		if (hba->max_cqes > BNX2I_5770X_CQ_WQES_MAX)
+			hba->max_cqes = BNX2I_5770X_CQ_WQES_MAX;
+	} else if (hba->max_cqes > BNX2I_570X_CQ_WQES_MAX)
+		hba->max_cqes = BNX2I_570X_CQ_WQES_MAX;
+
+	hba->num_ccell = hba->max_sqes / 2;
+
+	spin_lock_init(&hba->lock);
+	mutex_init(&hba->net_dev_lock);
+
+	if (iscsi_host_add(shost, &hba->pcidev->dev))
+		goto free_dump_mem;
+	return hba;
+
+free_dump_mem:
+	bnx2i_release_free_cid_que(hba);
+cid_que_err:
+	bnx2i_free_mp_bdt(hba);
+mp_bdt_mem_err:
+	if (hba->regview) {
+		iounmap(hba->regview);
+		hba->regview = NULL;
+	}
+ioreg_map_err:
+	pci_dev_put(hba->pcidev);
+	scsi_host_put(shost);
+	return NULL;
+}
+
+/**
+ * bnx2i_free_hba- releases hba structure and resources held by the adapter
+ * @hba:	pointer to adapter instance
+ *
+ * free adapter structure and call various cleanup routines.
+ */
+void bnx2i_free_hba(struct bnx2i_hba *hba)
+{
+	struct Scsi_Host *shost = hba->shost;
+
+	iscsi_host_remove(shost);
+	INIT_LIST_HEAD(&hba->ep_ofld_list);
+	INIT_LIST_HEAD(&hba->ep_destroy_list);
+	pci_dev_put(hba->pcidev);
+
+	if (hba->regview) {
+		iounmap(hba->regview);
+		hba->regview = NULL;
+	}
+	bnx2i_free_mp_bdt(hba);
+	bnx2i_release_free_cid_que(hba);
+	iscsi_host_free(shost);
+}
+
+/**
+ * bnx2i_conn_free_login_resources - free DMA resources used for login process
+ * @hba:		pointer to adapter instance
+ * @bnx2i_conn:		iscsi connection pointer
+ *
+ * Login related resources, mostly BDT & payload DMA memory is freed
+ */
+static void bnx2i_conn_free_login_resources(struct bnx2i_hba *hba,
+					    struct bnx2i_conn *bnx2i_conn)
+{
+	if (bnx2i_conn->gen_pdu.resp_bd_tbl) {
+		dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE,
+				  bnx2i_conn->gen_pdu.resp_bd_tbl,
+				  bnx2i_conn->gen_pdu.resp_bd_dma);
+		bnx2i_conn->gen_pdu.resp_bd_tbl = NULL;
+	}
+
+	if (bnx2i_conn->gen_pdu.req_bd_tbl) {
+		dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE,
+				  bnx2i_conn->gen_pdu.req_bd_tbl,
+				  bnx2i_conn->gen_pdu.req_bd_dma);
+		bnx2i_conn->gen_pdu.req_bd_tbl = NULL;
+	}
+
+	if (bnx2i_conn->gen_pdu.resp_buf) {
+		dma_free_coherent(&hba->pcidev->dev,
+				  ISCSI_DEF_MAX_RECV_SEG_LEN,
+				  bnx2i_conn->gen_pdu.resp_buf,
+				  bnx2i_conn->gen_pdu.resp_dma_addr);
+		bnx2i_conn->gen_pdu.resp_buf = NULL;
+	}
+
+	if (bnx2i_conn->gen_pdu.req_buf) {
+		dma_free_coherent(&hba->pcidev->dev,
+				  ISCSI_DEF_MAX_RECV_SEG_LEN,
+				  bnx2i_conn->gen_pdu.req_buf,
+				  bnx2i_conn->gen_pdu.req_dma_addr);
+		bnx2i_conn->gen_pdu.req_buf = NULL;
+	}
+}
+
+/**
+ * bnx2i_conn_alloc_login_resources - alloc DMA resources for login/nop.
+ * @hba:		pointer to adapter instance
+ * @bnx2i_conn:		iscsi connection pointer
+ *
+ * Mgmt task DNA resources are allocated in this routine.
+ */
+static int bnx2i_conn_alloc_login_resources(struct bnx2i_hba *hba,
+					    struct bnx2i_conn *bnx2i_conn)
+{
+	/* Allocate memory for login request/response buffers */
+	bnx2i_conn->gen_pdu.req_buf =
+		dma_alloc_coherent(&hba->pcidev->dev,
+				   ISCSI_DEF_MAX_RECV_SEG_LEN,
+				   &bnx2i_conn->gen_pdu.req_dma_addr,
+				   GFP_KERNEL);
+	if (bnx2i_conn->gen_pdu.req_buf == NULL)
+		goto login_req_buf_failure;
+
+	bnx2i_conn->gen_pdu.req_buf_size = 0;
+	bnx2i_conn->gen_pdu.req_wr_ptr = bnx2i_conn->gen_pdu.req_buf;
+
+	bnx2i_conn->gen_pdu.resp_buf =
+		dma_alloc_coherent(&hba->pcidev->dev,
+				   ISCSI_DEF_MAX_RECV_SEG_LEN,
+				   &bnx2i_conn->gen_pdu.resp_dma_addr,
+				   GFP_KERNEL);
+	if (bnx2i_conn->gen_pdu.resp_buf == NULL)
+		goto login_resp_buf_failure;
+
+	bnx2i_conn->gen_pdu.resp_buf_size = ISCSI_DEF_MAX_RECV_SEG_LEN;
+	bnx2i_conn->gen_pdu.resp_wr_ptr = bnx2i_conn->gen_pdu.resp_buf;
+
+	bnx2i_conn->gen_pdu.req_bd_tbl =
+		dma_alloc_coherent(&hba->pcidev->dev, PAGE_SIZE,
+				   &bnx2i_conn->gen_pdu.req_bd_dma, GFP_KERNEL);
+	if (bnx2i_conn->gen_pdu.req_bd_tbl == NULL)
+		goto login_req_bd_tbl_failure;
+
+	bnx2i_conn->gen_pdu.resp_bd_tbl =
+		dma_alloc_coherent(&hba->pcidev->dev, PAGE_SIZE,
+				   &bnx2i_conn->gen_pdu.resp_bd_dma,
+				   GFP_KERNEL);
+	if (bnx2i_conn->gen_pdu.resp_bd_tbl == NULL)
+		goto login_resp_bd_tbl_failure;
+
+	return 0;
+
+login_resp_bd_tbl_failure:
+	dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE,
+			  bnx2i_conn->gen_pdu.req_bd_tbl,
+			  bnx2i_conn->gen_pdu.req_bd_dma);
+	bnx2i_conn->gen_pdu.req_bd_tbl = NULL;
+
+login_req_bd_tbl_failure:
+	dma_free_coherent(&hba->pcidev->dev, ISCSI_DEF_MAX_RECV_SEG_LEN,
+			  bnx2i_conn->gen_pdu.resp_buf,
+			  bnx2i_conn->gen_pdu.resp_dma_addr);
+	bnx2i_conn->gen_pdu.resp_buf = NULL;
+login_resp_buf_failure:
+	dma_free_coherent(&hba->pcidev->dev, ISCSI_DEF_MAX_RECV_SEG_LEN,
+			  bnx2i_conn->gen_pdu.req_buf,
+			  bnx2i_conn->gen_pdu.req_dma_addr);
+	bnx2i_conn->gen_pdu.req_buf = NULL;
+login_req_buf_failure:
+	iscsi_conn_printk(KERN_ERR, bnx2i_conn->cls_conn->dd_data,
+			  "login resource alloc failed!!\n");
+	return -ENOMEM;
+
+}
+
+
+/**
+ * bnx2i_iscsi_prep_generic_pdu_bd - prepares BD table.
+ * @bnx2i_conn:		iscsi connection pointer
+ *
+ * Allocates buffers and BD tables before shipping requests to cnic
+ *	for PDUs prepared by 'iscsid' daemon
+ */
+static void bnx2i_iscsi_prep_generic_pdu_bd(struct bnx2i_conn *bnx2i_conn)
+{
+	struct iscsi_bd *bd_tbl;
+
+	bd_tbl = (struct iscsi_bd *) bnx2i_conn->gen_pdu.req_bd_tbl;
+
+	bd_tbl->buffer_addr_hi =
+		(u32) ((u64) bnx2i_conn->gen_pdu.req_dma_addr >> 32);
+	bd_tbl->buffer_addr_lo = (u32) bnx2i_conn->gen_pdu.req_dma_addr;
+	bd_tbl->buffer_length = bnx2i_conn->gen_pdu.req_wr_ptr -
+				bnx2i_conn->gen_pdu.req_buf;
+	bd_tbl->reserved0 = 0;
+	bd_tbl->flags = ISCSI_BD_LAST_IN_BD_CHAIN |
+			ISCSI_BD_FIRST_IN_BD_CHAIN;
+
+	bd_tbl = (struct iscsi_bd  *) bnx2i_conn->gen_pdu.resp_bd_tbl;
+	bd_tbl->buffer_addr_hi = (u64) bnx2i_conn->gen_pdu.resp_dma_addr >> 32;
+	bd_tbl->buffer_addr_lo = (u32) bnx2i_conn->gen_pdu.resp_dma_addr;
+	bd_tbl->buffer_length = ISCSI_DEF_MAX_RECV_SEG_LEN;
+	bd_tbl->reserved0 = 0;
+	bd_tbl->flags = ISCSI_BD_LAST_IN_BD_CHAIN |
+			ISCSI_BD_FIRST_IN_BD_CHAIN;
+}
+
+
+/**
+ * bnx2i_iscsi_send_generic_request - called to send mgmt tasks.
+ * @task:	transport layer task pointer
+ *
+ * called to transmit PDUs prepared by the 'iscsid' daemon. iSCSI login,
+ *	Nop-out and Logout requests flow through this path.
+ */
+static int bnx2i_iscsi_send_generic_request(struct iscsi_task *task)
+{
+	struct bnx2i_cmd *cmd = task->dd_data;
+	struct bnx2i_conn *bnx2i_conn = cmd->conn;
+	int rc = 0;
+	char *buf;
+	int data_len;
+
+	bnx2i_iscsi_prep_generic_pdu_bd(bnx2i_conn);
+	switch (task->hdr->opcode & ISCSI_OPCODE_MASK) {
+	case ISCSI_OP_LOGIN:
+		bnx2i_send_iscsi_login(bnx2i_conn, task);
+		break;
+	case ISCSI_OP_NOOP_OUT:
+		data_len = bnx2i_conn->gen_pdu.req_buf_size;
+		buf = bnx2i_conn->gen_pdu.req_buf;
+		if (data_len)
+			rc = bnx2i_send_iscsi_nopout(bnx2i_conn, task,
+						     RESERVED_ITT,
+						     buf, data_len, 1);
+		else
+			rc = bnx2i_send_iscsi_nopout(bnx2i_conn, task,
+						     RESERVED_ITT,
+						     NULL, 0, 1);
+		break;
+	case ISCSI_OP_LOGOUT:
+		rc = bnx2i_send_iscsi_logout(bnx2i_conn, task);
+		break;
+	case ISCSI_OP_SCSI_TMFUNC:
+		rc = bnx2i_send_iscsi_tmf(bnx2i_conn, task);
+		break;
+	default:
+		iscsi_conn_printk(KERN_ALERT, bnx2i_conn->cls_conn->dd_data,
+				  "send_gen: unsupported op 0x%x\n",
+				  task->hdr->opcode);
+	}
+	return rc;
+}
+
+
+/**********************************************************************
+ *		SCSI-ML Interface
+ **********************************************************************/
+
+/**
+ * bnx2i_cpy_scsi_cdb - copies LUN & CDB fields in required format to sq wqe
+ * @sc:		SCSI-ML command pointer
+ * @cmd:	iscsi cmd pointer
+ */
+static void bnx2i_cpy_scsi_cdb(struct scsi_cmnd *sc, struct bnx2i_cmd *cmd)
+{
+	u32 dword;
+	int lpcnt;
+	u8 *srcp;
+	u32 *dstp;
+	u32 scsi_lun[2];
+
+	int_to_scsilun(sc->device->lun, (struct scsi_lun *) scsi_lun);
+	cmd->req.lun[0] = be32_to_cpu(scsi_lun[0]);
+	cmd->req.lun[1] = be32_to_cpu(scsi_lun[1]);
+
+	lpcnt = cmd->scsi_cmd->cmd_len / sizeof(dword);
+	srcp = (u8 *) sc->cmnd;
+	dstp = (u32 *) cmd->req.cdb;
+	while (lpcnt--) {
+		memcpy(&dword, (const void *) srcp, 4);
+		*dstp = cpu_to_be32(dword);
+		srcp += 4;
+		dstp++;
+	}
+	if (sc->cmd_len & 0x3) {
+		dword = (u32) srcp[0] | ((u32) srcp[1] << 8);
+		*dstp = cpu_to_be32(dword);
+	}
+}
+
+static void bnx2i_cleanup_task(struct iscsi_task *task)
+{
+	struct iscsi_conn *conn = task->conn;
+	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
+	struct bnx2i_hba *hba = bnx2i_conn->hba;
+
+	/*
+	 * mgmt task or cmd was never sent to us to transmit.
+	 */
+	if (!task->sc || task->state == ISCSI_TASK_PENDING)
+		return;
+	/*
+	 * need to clean-up task context to claim dma buffers
+	 */
+	if (task->state == ISCSI_TASK_ABRT_TMF) {
+		bnx2i_send_cmd_cleanup_req(hba, task->dd_data);
+
+		spin_unlock_bh(&conn->session->lock);
+		wait_for_completion_timeout(&bnx2i_conn->cmd_cleanup_cmpl,
+				msecs_to_jiffies(ISCSI_CMD_CLEANUP_TIMEOUT));
+		spin_lock_bh(&conn->session->lock);
+	}
+	bnx2i_iscsi_unmap_sg_list(task->dd_data);
+}
+
+/**
+ * bnx2i_mtask_xmit - transmit mtask to chip for further processing
+ * @conn:	transport layer conn structure pointer
+ * @task:	transport layer command structure pointer
+ */
+static int
+bnx2i_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task)
+{
+	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
+	struct bnx2i_cmd *cmd = task->dd_data;
+
+	memset(bnx2i_conn->gen_pdu.req_buf, 0, ISCSI_DEF_MAX_RECV_SEG_LEN);
+
+	bnx2i_setup_cmd_wqe_template(cmd);
+	bnx2i_conn->gen_pdu.req_buf_size = task->data_count;
+	if (task->data_count) {
+		memcpy(bnx2i_conn->gen_pdu.req_buf, task->data,
+		       task->data_count);
+		bnx2i_conn->gen_pdu.req_wr_ptr =
+			bnx2i_conn->gen_pdu.req_buf + task->data_count;
+	}
+	cmd->conn = conn->dd_data;
+	cmd->scsi_cmd = NULL;
+	return bnx2i_iscsi_send_generic_request(task);
+}
+
+/**
+ * bnx2i_task_xmit - transmit iscsi command to chip for further processing
+ * @task:	transport layer command structure pointer
+ *
+ * maps SG buffers and send request to chip/firmware in the form of SQ WQE
+ */
+static int bnx2i_task_xmit(struct iscsi_task *task)
+{
+	struct iscsi_conn *conn = task->conn;
+	struct iscsi_session *session = conn->session;
+	struct Scsi_Host *shost = iscsi_session_to_shost(session->cls_session);
+	struct bnx2i_hba *hba = iscsi_host_priv(shost);
+	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
+	struct scsi_cmnd *sc = task->sc;
+	struct bnx2i_cmd *cmd = task->dd_data;
+	struct iscsi_cmd *hdr = (struct iscsi_cmd *) task->hdr;
+
+	if (test_bit(ADAPTER_STATE_LINK_DOWN, &hba->adapter_state))
+		return -ENOTCONN;
+
+	if (!bnx2i_conn->is_bound)
+		return -ENOTCONN;
+
+	/*
+	 * If there is no scsi_cmnd this must be a mgmt task
+	 */
+	if (!sc)
+		return bnx2i_mtask_xmit(conn, task);
+
+	bnx2i_setup_cmd_wqe_template(cmd);
+	cmd->req.op_code = ISCSI_OP_SCSI_CMD;
+	cmd->conn = bnx2i_conn;
+	cmd->scsi_cmd = sc;
+	cmd->req.total_data_transfer_length = scsi_bufflen(sc);
+	cmd->req.cmd_sn = be32_to_cpu(hdr->cmdsn);
+
+	bnx2i_iscsi_map_sg_list(cmd);
+	bnx2i_cpy_scsi_cdb(sc, cmd);
+
+	cmd->req.op_attr = ISCSI_ATTR_SIMPLE;
+	if (sc->sc_data_direction == DMA_TO_DEVICE) {
+		cmd->req.op_attr |= ISCSI_CMD_REQUEST_WRITE;
+		cmd->req.itt = task->itt |
+			(ISCSI_TASK_TYPE_WRITE << ISCSI_CMD_REQUEST_TYPE_SHIFT);
+		bnx2i_setup_write_cmd_bd_info(task);
+	} else {
+		if (scsi_bufflen(sc))
+			cmd->req.op_attr |= ISCSI_CMD_REQUEST_READ;
+		cmd->req.itt = task->itt |
+			(ISCSI_TASK_TYPE_READ << ISCSI_CMD_REQUEST_TYPE_SHIFT);
+	}
+
+	cmd->req.num_bds = cmd->io_tbl.bd_valid;
+	if (!cmd->io_tbl.bd_valid) {
+		cmd->req.bd_list_addr_lo = (u32) hba->mp_bd_dma;
+		cmd->req.bd_list_addr_hi = (u32) ((u64) hba->mp_bd_dma >> 32);
+		cmd->req.num_bds = 1;
+	}
+
+	bnx2i_send_iscsi_scsicmd(bnx2i_conn, cmd);
+	return 0;
+}
+
+/**
+ * bnx2i_session_create - create a new iscsi session
+ * @cmds_max:		max commands supported
+ * @qdepth:		scsi queue depth to support
+ * @initial_cmdsn:	initial iscsi CMDSN to be used for this session
+ *
+ * Creates a new iSCSI session instance on given device.
+ */
+static struct iscsi_cls_session *
+bnx2i_session_create(struct iscsi_endpoint *ep,
+		     uint16_t cmds_max, uint16_t qdepth,
+		     uint32_t initial_cmdsn)
+{
+	struct Scsi_Host *shost;
+	struct iscsi_cls_session *cls_session;
+	struct bnx2i_hba *hba;
+	struct bnx2i_endpoint *bnx2i_ep;
+
+	if (!ep) {
+		printk(KERN_ERR "bnx2i: missing ep.\n");
+		return NULL;
+	}
+
+	bnx2i_ep = ep->dd_data;
+	shost = bnx2i_ep->hba->shost;
+	hba = iscsi_host_priv(shost);
+	if (bnx2i_adapter_ready(hba))
+		return NULL;
+
+	/*
+	 * user can override hw limit as long as it is within
+	 * the min/max.
+	 */
+	if (cmds_max > hba->max_sqes)
+		cmds_max = hba->max_sqes;
+	else if (cmds_max < BNX2I_SQ_WQES_MIN)
+		cmds_max = BNX2I_SQ_WQES_MIN;
+
+	cls_session = iscsi_session_setup(&bnx2i_iscsi_transport, shost,
+					  cmds_max, sizeof(struct bnx2i_cmd),
+					  initial_cmdsn, ISCSI_MAX_TARGET);
+	if (!cls_session)
+		return NULL;
+
+	if (bnx2i_setup_cmd_pool(hba, cls_session->dd_data))
+		goto session_teardown;
+	return cls_session;
+
+session_teardown:
+	iscsi_session_teardown(cls_session);
+	return NULL;
+}
+
+
+/**
+ * bnx2i_session_destroy - destroys iscsi session
+ * @cls_session:	pointer to iscsi cls session
+ *
+ * Destroys previously created iSCSI session instance and releases
+ *	all resources held by it
+ */
+static void bnx2i_session_destroy(struct iscsi_cls_session *cls_session)
+{
+	struct iscsi_session *session = cls_session->dd_data;
+	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
+	struct bnx2i_hba *hba = iscsi_host_priv(shost);
+
+	bnx2i_destroy_cmd_pool(hba, session);
+	iscsi_session_teardown(cls_session);
+}
+
+
+/**
+ * bnx2i_conn_create - create iscsi connection instance
+ * @cls_session:	pointer to iscsi cls session
+ * @cid:		iscsi cid as per rfc (not NX2's CID terminology)
+ *
+ * Creates a new iSCSI connection instance for a given session
+ */
+static struct iscsi_cls_conn *
+bnx2i_conn_create(struct iscsi_cls_session *cls_session, uint32_t cid)
+{
+	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
+	struct bnx2i_hba *hba = iscsi_host_priv(shost);
+	struct bnx2i_conn *bnx2i_conn;
+	struct iscsi_cls_conn *cls_conn;
+	struct iscsi_conn *conn;
+
+	cls_conn = iscsi_conn_setup(cls_session, sizeof(*bnx2i_conn),
+				    cid);
+	if (!cls_conn)
+		return NULL;
+	conn = cls_conn->dd_data;
+
+	bnx2i_conn = conn->dd_data;
+	bnx2i_conn->cls_conn = cls_conn;
+	bnx2i_conn->hba = hba;
+	/* 'ep' ptr will be assigned in bind() call */
+	bnx2i_conn->ep = NULL;
+	init_completion(&bnx2i_conn->cmd_cleanup_cmpl);
+
+	if (bnx2i_conn_alloc_login_resources(hba, bnx2i_conn)) {
+		iscsi_conn_printk(KERN_ALERT, conn,
+				  "conn_new: login resc alloc failed!!\n");
+		goto free_conn;
+	}
+
+	return cls_conn;
+
+free_conn:
+	iscsi_conn_teardown(cls_conn);
+	return NULL;
+}
+
+/**
+ * bnx2i_conn_bind - binds iscsi sess, conn and ep objects together
+ * @cls_session:	pointer to iscsi cls session
+ * @cls_conn:		pointer to iscsi cls conn
+ * @transport_fd:	64-bit EP handle
+ * @is_leading:		leading connection on this session?
+ *
+ * Binds together iSCSI session instance, iSCSI connection instance
+ *	and the TCP connection. This routine returns error code if
+ *	TCP connection does not belong on the device iSCSI sess/conn
+ *	is bound
+ */
+static int bnx2i_conn_bind(struct iscsi_cls_session *cls_session,
+			   struct iscsi_cls_conn *cls_conn,
+			   uint64_t transport_fd, int is_leading)
+{
+	struct iscsi_conn *conn = cls_conn->dd_data;
+	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
+	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
+	struct bnx2i_hba *hba = iscsi_host_priv(shost);
+	struct bnx2i_endpoint *bnx2i_ep;
+	struct iscsi_endpoint *ep;
+	int ret_code;
+
+	ep = iscsi_lookup_endpoint(transport_fd);
+	if (!ep)
+		return -EINVAL;
+
+	bnx2i_ep = ep->dd_data;
+	if ((bnx2i_ep->state == EP_STATE_TCP_FIN_RCVD) ||
+	    (bnx2i_ep->state == EP_STATE_TCP_RST_RCVD))
+		/* Peer disconnect via' FIN or RST */
+		return -EINVAL;
+
+	if (iscsi_conn_bind(cls_session, cls_conn, is_leading))
+		return -EINVAL;
+
+	if (bnx2i_ep->hba != hba) {
+		/* Error - TCP connection does not belong to this device
+		 */
+		iscsi_conn_printk(KERN_ALERT, cls_conn->dd_data,
+				  "conn bind, ep=0x%p (%s) does not",
+				  bnx2i_ep, bnx2i_ep->hba->netdev->name);
+		iscsi_conn_printk(KERN_ALERT, cls_conn->dd_data,
+				  "belong to hba (%s)\n",
+				  hba->netdev->name);
+		return -EEXIST;
+	}
+
+	bnx2i_ep->conn = bnx2i_conn;
+	bnx2i_conn->ep = bnx2i_ep;
+	bnx2i_conn->iscsi_conn_cid = bnx2i_ep->ep_iscsi_cid;
+	bnx2i_conn->fw_cid = bnx2i_ep->ep_cid;
+	bnx2i_conn->is_bound = 1;
+
+	ret_code = bnx2i_bind_conn_to_iscsi_cid(hba, bnx2i_conn,
+						bnx2i_ep->ep_iscsi_cid);
+
+	/* 5706/5708/5709 FW takes RQ as full when initiated, but for 57710
+	 * driver needs to explicitly replenish RQ index during setup.
+	 */
+	if (test_bit(BNX2I_NX2_DEV_57710, &bnx2i_ep->hba->cnic_dev_type))
+		bnx2i_put_rq_buf(bnx2i_conn, 0);
+
+	bnx2i_arm_cq_event_coalescing(bnx2i_conn->ep, CNIC_ARM_CQE);
+	return ret_code;
+}
+
+
+/**
+ * bnx2i_conn_destroy - destroy iscsi connection instance & release resources
+ * @cls_conn:	pointer to iscsi cls conn
+ *
+ * Destroy an iSCSI connection instance and release memory resources held by
+ *	this connection
+ */
+static void bnx2i_conn_destroy(struct iscsi_cls_conn *cls_conn)
+{
+	struct iscsi_conn *conn = cls_conn->dd_data;
+	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
+	struct Scsi_Host *shost;
+	struct bnx2i_hba *hba;
+
+	shost = iscsi_session_to_shost(iscsi_conn_to_session(cls_conn));
+	hba = iscsi_host_priv(shost);
+
+	bnx2i_conn_free_login_resources(hba, bnx2i_conn);
+	iscsi_conn_teardown(cls_conn);
+}
+
+
+/**
+ * bnx2i_conn_get_param - return iscsi connection parameter to caller
+ * @cls_conn:	pointer to iscsi cls conn
+ * @param:	parameter type identifier
+ * @buf: 	buffer pointer
+ *
+ * returns iSCSI connection parameters
+ */
+static int bnx2i_conn_get_param(struct iscsi_cls_conn *cls_conn,
+				enum iscsi_param param, char *buf)
+{
+	struct iscsi_conn *conn = cls_conn->dd_data;
+	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
+	int len = 0;
+
+	switch (param) {
+	case ISCSI_PARAM_CONN_PORT:
+		if (bnx2i_conn->ep)
+			len = sprintf(buf, "%hu\n",
+				      bnx2i_conn->ep->cm_sk->dst_port);
+		break;
+	case ISCSI_PARAM_CONN_ADDRESS:
+		if (bnx2i_conn->ep)
+			len = sprintf(buf, NIPQUAD_FMT "\n",
+				      NIPQUAD(bnx2i_conn->ep->cm_sk->dst_ip));
+		break;
+	default:
+		return iscsi_conn_get_param(cls_conn, param, buf);
+	}
+
+	return len;
+}
+
+/**
+ * bnx2i_host_get_param - returns host (adapter) related parameters
+ * @shost:	scsi host pointer
+ * @param:	parameter type identifier
+ * @buf:	buffer pointer
+ */
+static int bnx2i_host_get_param(struct Scsi_Host *shost,
+				enum iscsi_host_param param, char *buf)
+{
+	struct bnx2i_hba *hba = iscsi_host_priv(shost);
+	int len = 0;
+
+	switch (param) {
+	case ISCSI_HOST_PARAM_HWADDRESS:
+		len = sysfs_format_mac(buf, hba->cnic->mac_addr, 6);
+		break;
+	case ISCSI_HOST_PARAM_NETDEV_NAME:
+		len = sprintf(buf, "%s\n", hba->netdev->name);
+		break;
+	default:
+		return iscsi_host_get_param(shost, param, buf);
+	}
+	return len;
+}
+
+/**
+ * bnx2i_conn_start - completes iscsi connection migration to FFP
+ * @cls_conn:	pointer to iscsi cls conn
+ *
+ * last call in FFP migration to handover iscsi conn to the driver
+ */
+static int bnx2i_conn_start(struct iscsi_cls_conn *cls_conn)
+{
+	struct iscsi_conn *conn = cls_conn->dd_data;
+	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
+
+	bnx2i_conn->ep->state = EP_STATE_ULP_UPDATE_START;
+	bnx2i_update_iscsi_conn(conn);
+
+	/*
+	 * this should normally not sleep for a long time so it should
+	 * not disrupt the caller.
+	 */
+	bnx2i_conn->ep->ofld_timer.expires = 1 * HZ + jiffies;
+	bnx2i_conn->ep->ofld_timer.function = bnx2i_ep_ofld_timer;
+	bnx2i_conn->ep->ofld_timer.data = (unsigned long) bnx2i_conn->ep;
+	add_timer(&bnx2i_conn->ep->ofld_timer);
+	/* update iSCSI context for this conn, wait for CNIC to complete */
+	wait_event_interruptible(bnx2i_conn->ep->ofld_wait,
+			bnx2i_conn->ep->state != EP_STATE_ULP_UPDATE_START);
+
+	if (signal_pending(current))
+		flush_signals(current);
+	del_timer_sync(&bnx2i_conn->ep->ofld_timer);
+
+	iscsi_conn_start(cls_conn);
+	return 0;
+}
+
+
+/**
+ * bnx2i_conn_get_stats - returns iSCSI stats
+ * @cls_conn:	pointer to iscsi cls conn
+ * @stats:	pointer to iscsi statistic struct
+ */
+static void bnx2i_conn_get_stats(struct iscsi_cls_conn *cls_conn,
+				 struct iscsi_stats *stats)
+{
+	struct iscsi_conn *conn = cls_conn->dd_data;
+
+	stats->txdata_octets = conn->txdata_octets;
+	stats->rxdata_octets = conn->rxdata_octets;
+	stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
+	stats->dataout_pdus = conn->dataout_pdus_cnt;
+	stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
+	stats->datain_pdus = conn->datain_pdus_cnt;
+	stats->r2t_pdus = conn->r2t_pdus_cnt;
+	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
+	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
+	stats->custom_length = 3;
+	strcpy(stats->custom[2].desc, "eh_abort_cnt");
+	stats->custom[2].value = conn->eh_abort_cnt;
+	stats->digest_err = 0;
+	stats->timeout_err = 0;
+	stats->custom_length = 0;
+}
+
+
+/**
+ * bnx2i_check_route - checks if target IP route belongs to one of NX2 devices
+ * @dst_addr:	target IP address
+ *
+ * check if route resolves to BNX2 device
+ */
+static struct bnx2i_hba *bnx2i_check_route(struct sockaddr *dst_addr)
+{
+	struct sockaddr_in *desti = (struct sockaddr_in *) dst_addr;
+	struct bnx2i_hba *hba;
+	struct cnic_dev *cnic = NULL;
+
+	bnx2i_reg_dev_all();
+
+	hba = get_adapter_list_head();
+	if (hba && hba->cnic)
+		cnic = hba->cnic->cm_select_dev(desti, CNIC_ULP_ISCSI);
+	if (!cnic) {
+		printk(KERN_ALERT "bnx2i: no route,"
+		       "can't connect using cnic\n");
+		goto no_nx2_route;
+	}
+	hba = bnx2i_find_hba_for_cnic(cnic);
+	if (!hba)
+		goto no_nx2_route;
+
+	if (bnx2i_adapter_ready(hba)) {
+		printk(KERN_ALERT "bnx2i: check route, hba not found\n");
+		goto no_nx2_route;
+	}
+	if (hba->netdev->mtu > hba->mtu_supported) {
+		printk(KERN_ALERT "bnx2i: %s network i/f mtu is set to %d\n",
+				  hba->netdev->name, hba->netdev->mtu);
+		printk(KERN_ALERT "bnx2i: iSCSI HBA can support mtu of %d\n",
+				  hba->mtu_supported);
+		goto no_nx2_route;
+	}
+	return hba;
+no_nx2_route:
+	return NULL;
+}
+
+
+/**
+ * bnx2i_tear_down_conn - tear down iscsi/tcp connection and free resources
+ * @hba:	pointer to adapter instance
+ * @ep:		endpoint (transport indentifier) structure
+ *
+ * destroys cm_sock structure and on chip iscsi context
+ */
+static int bnx2i_tear_down_conn(struct bnx2i_hba *hba,
+				 struct bnx2i_endpoint *ep)
+{
+	if (test_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic))
+		hba->cnic->cm_destroy(ep->cm_sk);
+
+	if (test_bit(ADAPTER_STATE_GOING_DOWN, &ep->hba->adapter_state))
+		ep->state = EP_STATE_DISCONN_COMPL;
+
+	if (test_bit(BNX2I_NX2_DEV_57710, &hba->cnic_dev_type) &&
+	    ep->state == EP_STATE_DISCONN_TIMEDOUT) {
+		printk(KERN_ALERT "bnx2i - ERROR - please submit GRC Dump,"
+				  " NW/PCIe trace, driver msgs to developers"
+				  " for analysis\n");
+		return 1;
+	}
+
+	ep->state = EP_STATE_CLEANUP_START;
+	init_timer(&ep->ofld_timer);
+	ep->ofld_timer.expires = 10*HZ + jiffies;
+	ep->ofld_timer.function = bnx2i_ep_ofld_timer;
+	ep->ofld_timer.data = (unsigned long) ep;
+	add_timer(&ep->ofld_timer);
+
+	bnx2i_ep_destroy_list_add(hba, ep);
+
+	/* destroy iSCSI context, wait for it to complete */
+	bnx2i_send_conn_destroy(hba, ep);
+	wait_event_interruptible(ep->ofld_wait,
+				 (ep->state != EP_STATE_CLEANUP_START));
+
+	if (signal_pending(current))
+		flush_signals(current);
+	del_timer_sync(&ep->ofld_timer);
+
+	bnx2i_ep_destroy_list_del(hba, ep);
+
+	if (ep->state != EP_STATE_CLEANUP_CMPL)
+		/* should never happen */
+		printk(KERN_ALERT "bnx2i - conn destroy failed\n");
+
+	return 0;
+}
+
+
+/**
+ * bnx2i_ep_connect - establish TCP connection to target portal
+ * @shost:		scsi host
+ * @dst_addr:		target IP address
+ * @non_blocking:	blocking or non-blocking call
+ *
+ * this routine initiates the TCP/IP connection by invoking Option-2 i/f
+ *	with l5_core and the CNIC. This is a multi-step process of resolving
+ *	route to target, create a iscsi connection context, handshaking with
+ *	CNIC module to create/initialize the socket struct and finally
+ *	sending down option-2 request to complete TCP 3-way handshake
+ */
+static struct iscsi_endpoint *bnx2i_ep_connect(struct Scsi_Host *shost,
+					       struct sockaddr *dst_addr,
+					       int non_blocking)
+{
+	u32 iscsi_cid = BNX2I_CID_RESERVED;
+	struct sockaddr_in *desti = (struct sockaddr_in *) dst_addr;
+	struct sockaddr_in6 *desti6;
+	struct bnx2i_endpoint *bnx2i_ep;
+	struct bnx2i_hba *hba;
+	struct cnic_dev *cnic;
+	struct cnic_sockaddr saddr;
+	struct iscsi_endpoint *ep;
+	int rc = 0;
+
+	if (shost)
+		/* driver is given scsi host to work with */
+		hba = iscsi_host_priv(shost);
+	else
+		/*
+		 * check if the given destination can be reached through
+		 * a iscsi capable NetXtreme2 device
+		 */
+		hba = bnx2i_check_route(dst_addr);
+	if (!hba) {
+		rc = -ENOMEM;
+		goto check_busy;
+	}
+
+	cnic = hba->cnic;
+	ep = bnx2i_alloc_ep(hba);
+	if (!ep) {
+		rc = -ENOMEM;
+		goto check_busy;
+	}
+	bnx2i_ep = ep->dd_data;
+
+	mutex_lock(&hba->net_dev_lock);
+	if (bnx2i_adapter_ready(hba)) {
+		rc = -EPERM;
+		goto net_if_down;
+	}
+
+	bnx2i_ep->state = EP_STATE_IDLE;
+	bnx2i_ep->ep_iscsi_cid = (u16) -1;
+	bnx2i_ep->num_active_cmds = 0;
+	iscsi_cid = bnx2i_alloc_iscsi_cid(hba);
+	if (iscsi_cid == -1) {
+		printk(KERN_ALERT "alloc_ep: unable to allocate iscsi cid\n");
+		rc = -ENOMEM;
+		goto iscsi_cid_err;
+	}
+	bnx2i_ep->hba_age = hba->age;
+
+	rc = bnx2i_alloc_qp_resc(hba, bnx2i_ep);
+	if (rc != 0) {
+		printk(KERN_ALERT "bnx2i: ep_conn, alloc QP resc error\n");
+		rc = -ENOMEM;
+		goto qp_resc_err;
+	}
+
+	bnx2i_ep->ep_iscsi_cid = (u16)iscsi_cid;
+	bnx2i_ep->state = EP_STATE_OFLD_START;
+	bnx2i_ep_ofld_list_add(hba, bnx2i_ep);
+
+	init_timer(&bnx2i_ep->ofld_timer);
+	bnx2i_ep->ofld_timer.expires = 2 * HZ + jiffies;
+	bnx2i_ep->ofld_timer.function = bnx2i_ep_ofld_timer;
+	bnx2i_ep->ofld_timer.data = (unsigned long) bnx2i_ep;
+	add_timer(&bnx2i_ep->ofld_timer);
+
+	bnx2i_send_conn_ofld_req(hba, bnx2i_ep);
+
+	/* Wait for CNIC hardware to setup conn context and return 'cid' */
+	wait_event_interruptible(bnx2i_ep->ofld_wait,
+				 bnx2i_ep->state != EP_STATE_OFLD_START);
+
+	if (signal_pending(current))
+		flush_signals(current);
+	del_timer_sync(&bnx2i_ep->ofld_timer);
+
+	bnx2i_ep_ofld_list_del(hba, bnx2i_ep);
+
+	if (bnx2i_ep->state != EP_STATE_OFLD_COMPL) {
+		rc = -ENOSPC;
+		goto conn_failed;
+	}
+
+	rc = cnic->cm_create(cnic, CNIC_ULP_ISCSI, bnx2i_ep->ep_cid,
+			     iscsi_cid, &bnx2i_ep->cm_sk, bnx2i_ep);
+	if (rc) {
+		rc = -EINVAL;
+		goto conn_failed;
+	}
+
+	bnx2i_ep->cm_sk->rcv_buf = 256 * 1024;
+	bnx2i_ep->cm_sk->snd_buf = 256 * 1024;
+	clear_bit(SK_TCP_TIMESTAMP, &bnx2i_ep->cm_sk->tcp_flags);
+
+	memset(&saddr, 0, sizeof(saddr));
+	if (dst_addr->sa_family == AF_INET) {
+		desti = (struct sockaddr_in *) dst_addr;
+		saddr.remote.v4 = *desti;
+		saddr.local.v4.sin_family = desti->sin_family;
+	} else if (dst_addr->sa_family == AF_INET6) {
+		desti6 = (struct sockaddr_in6 *) dst_addr;
+		saddr.remote.v6 = *desti6;
+		saddr.local.v6.sin6_family = desti6->sin6_family;
+	}
+
+	bnx2i_ep->timestamp = jiffies;
+	bnx2i_ep->state = EP_STATE_CONNECT_START;
+	if (!test_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic)) {
+		rc = -EINVAL;
+		goto conn_failed;
+	} else
+		rc = cnic->cm_connect(bnx2i_ep->cm_sk, &saddr);
+
+	if (rc)
+		goto release_ep;
+
+	if (bnx2i_map_ep_dbell_regs(bnx2i_ep))
+		goto release_ep;
+	mutex_unlock(&hba->net_dev_lock);
+	return ep;
+
+release_ep:
+	if (bnx2i_tear_down_conn(hba, bnx2i_ep)) {
+		mutex_unlock(&hba->net_dev_lock);
+		return ERR_PTR(rc);
+	}
+conn_failed:
+net_if_down:
+iscsi_cid_err:
+	bnx2i_free_qp_resc(hba, bnx2i_ep);
+qp_resc_err:
+	bnx2i_free_ep(ep);
+	mutex_unlock(&hba->net_dev_lock);
+check_busy:
+	bnx2i_unreg_dev_all();
+	return ERR_PTR(rc);
+}
+
+
+/**
+ * bnx2i_ep_poll - polls for TCP connection establishement
+ * @ep:			TCP connection (endpoint) handle
+ * @timeout_ms:		timeout value in milli secs
+ *
+ * polls for TCP connect request to complete
+ */
+static int bnx2i_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
+{
+	struct bnx2i_endpoint *bnx2i_ep;
+	int rc = 0;
+
+	bnx2i_ep = ep->dd_data;
+	if ((bnx2i_ep->state == EP_STATE_IDLE) ||
+	    (bnx2i_ep->state == EP_STATE_CONNECT_FAILED) ||
+	    (bnx2i_ep->state == EP_STATE_OFLD_FAILED))
+		return -1;
+	if (bnx2i_ep->state == EP_STATE_CONNECT_COMPL)
+		return 1;
+
+	rc = wait_event_interruptible_timeout(bnx2i_ep->ofld_wait,
+					      ((bnx2i_ep->state ==
+						EP_STATE_OFLD_FAILED) ||
+					       (bnx2i_ep->state ==
+						EP_STATE_CONNECT_FAILED) ||
+					       (bnx2i_ep->state ==
+						EP_STATE_CONNECT_COMPL)),
+					      msecs_to_jiffies(timeout_ms));
+	if (!rc || (bnx2i_ep->state == EP_STATE_OFLD_FAILED))
+		rc = -1;
+
+	if (rc > 0)
+		return 1;
+	else if (!rc)
+		return 0;	/* timeout */
+	else
+		return rc;
+}
+
+
+/**
+ * bnx2i_ep_tcp_conn_active - check EP state transition
+ * @ep:		endpoint pointer
+ *
+ * check if underlying TCP connection is active
+ */
+static int bnx2i_ep_tcp_conn_active(struct bnx2i_endpoint *bnx2i_ep)
+{
+	int ret;
+	int cnic_dev_10g = 0;
+
+	if (test_bit(BNX2I_NX2_DEV_57710, &bnx2i_ep->hba->cnic_dev_type))
+		cnic_dev_10g = 1;
+
+	switch (bnx2i_ep->state) {
+	case EP_STATE_CONNECT_START:
+	case EP_STATE_CLEANUP_FAILED:
+	case EP_STATE_OFLD_FAILED:
+	case EP_STATE_DISCONN_TIMEDOUT:
+		ret = 0;
+		break;
+	case EP_STATE_CONNECT_COMPL:
+	case EP_STATE_ULP_UPDATE_START:
+	case EP_STATE_ULP_UPDATE_COMPL:
+	case EP_STATE_TCP_FIN_RCVD:
+	case EP_STATE_ULP_UPDATE_FAILED:
+		ret = 1;
+		break;
+	case EP_STATE_TCP_RST_RCVD:
+		ret = 0;
+		break;
+	case EP_STATE_CONNECT_FAILED:
+		if (cnic_dev_10g)
+			ret = 1;
+		else
+			ret = 0;
+		break;
+	default:
+		ret = 0;
+	}
+
+	return ret;
+}
+
+
+/**
+ * bnx2i_ep_disconnect - executes TCP connection teardown process
+ * @ep:		TCP connection (endpoint) handle
+ *
+ * executes  TCP connection teardown process
+ */
+static void bnx2i_ep_disconnect(struct iscsi_endpoint *ep)
+{
+	struct bnx2i_endpoint *bnx2i_ep;
+	struct bnx2i_conn *bnx2i_conn = NULL;
+	struct iscsi_session *session = NULL;
+	struct iscsi_conn *conn;
+	struct cnic_dev *cnic;
+	struct bnx2i_hba *hba;
+
+	bnx2i_ep = ep->dd_data;
+
+	/* driver should not attempt connection cleanup untill TCP_CONNECT
+	 * completes either successfully or fails. Timeout is 9-secs, so
+	 * wait for it to complete
+	 */
+	while ((bnx2i_ep->state == EP_STATE_CONNECT_START) &&
+		!time_after(jiffies, bnx2i_ep->timestamp + (12 * HZ)))
+		msleep(250);
+
+	if (bnx2i_ep->conn) {
+		bnx2i_conn = bnx2i_ep->conn;
+		conn = bnx2i_conn->cls_conn->dd_data;
+		session = conn->session;
+
+		spin_lock_bh(&session->lock);
+		bnx2i_conn->is_bound = 0;
+		spin_unlock_bh(&session->lock);
+	}
+
+	hba = bnx2i_ep->hba;
+	if (bnx2i_ep->state == EP_STATE_IDLE)
+		goto return_bnx2i_ep;
+	cnic = hba->cnic;
+
+	mutex_lock(&hba->net_dev_lock);
+
+	if (!test_bit(ADAPTER_STATE_UP, &hba->adapter_state))
+		goto free_resc;
+	if (bnx2i_ep->hba_age != hba->age)
+		goto free_resc;
+
+	if (!bnx2i_ep_tcp_conn_active(bnx2i_ep))
+		goto destory_conn;
+
+	bnx2i_ep->state = EP_STATE_DISCONN_START;
+
+	init_timer(&bnx2i_ep->ofld_timer);
+	bnx2i_ep->ofld_timer.expires = 10*HZ + jiffies;
+	bnx2i_ep->ofld_timer.function = bnx2i_ep_ofld_timer;
+	bnx2i_ep->ofld_timer.data = (unsigned long) bnx2i_ep;
+	add_timer(&bnx2i_ep->ofld_timer);
+
+	if (test_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic)) {
+		int close = 0;
+
+		if (session) {
+			spin_lock_bh(&session->lock);
+			if (session->state == ISCSI_STATE_LOGGING_OUT)
+				close = 1;
+			spin_unlock_bh(&session->lock);
+		}
+		if (close)
+			cnic->cm_close(bnx2i_ep->cm_sk);
+		else
+			cnic->cm_abort(bnx2i_ep->cm_sk);
+	} else
+		goto free_resc;
+
+	/* wait for option-2 conn teardown */
+	wait_event_interruptible(bnx2i_ep->ofld_wait,
+				 bnx2i_ep->state != EP_STATE_DISCONN_START);
+
+	if (signal_pending(current))
+		flush_signals(current);
+	del_timer_sync(&bnx2i_ep->ofld_timer);
+
+destory_conn:
+	if (bnx2i_tear_down_conn(hba, bnx2i_ep)) {
+		mutex_unlock(&hba->net_dev_lock);
+		return;
+	}
+free_resc:
+	mutex_unlock(&hba->net_dev_lock);
+	bnx2i_free_qp_resc(hba, bnx2i_ep);
+return_bnx2i_ep:
+	if (bnx2i_conn)
+		bnx2i_conn->ep = NULL;
+
+	bnx2i_free_ep(ep);
+
+	if (!hba->ofld_conns_active)
+		bnx2i_unreg_dev_all();
+}
+
+
+/**
+ * bnx2i_nl_set_path - ISCSI_UEVENT_PATH_UPDATE user message handler
+ * @buf:	pointer to buffer containing iscsi path message
+ *
+ */
+static int bnx2i_nl_set_path(struct Scsi_Host *shost, struct iscsi_path *params)
+{
+	struct bnx2i_hba *hba = iscsi_host_priv(shost);
+	char *buf = (char *) params;
+	u16 len = sizeof(*params);
+
+	/* handled by cnic driver */
+	hba->cnic->iscsi_nl_msg_recv(hba->cnic, ISCSI_UEVENT_PATH_UPDATE, buf,
+				     len);
+
+	return 0;
+}
+
+
+/*
+ * 'Scsi_Host_Template' structure and 'iscsi_tranport' structure template
+ * used while registering with the scsi host and iSCSI transport module.
+ */
+static struct scsi_host_template bnx2i_host_template = {
+	.module			= THIS_MODULE,
+	.name			= "Broadcom Offload iSCSI Initiator",
+	.proc_name		= "bnx2i",
+	.queuecommand		= iscsi_queuecommand,
+	.eh_abort_handler	= iscsi_eh_abort,
+	.eh_device_reset_handler = iscsi_eh_device_reset,
+	.eh_target_reset_handler = iscsi_eh_target_reset,
+	.can_queue		= 1024,
+	.max_sectors		= 127,
+	.cmd_per_lun		= 32,
+	.this_id		= -1,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.sg_tablesize		= ISCSI_MAX_BDS_PER_CMD,
+	.shost_attrs		= bnx2i_dev_attributes,
+};
+
+struct iscsi_transport bnx2i_iscsi_transport = {
+	.owner			= THIS_MODULE,
+	.name			= "bnx2i",
+	.caps			= CAP_RECOVERY_L0 | CAP_HDRDGST |
+				  CAP_MULTI_R2T | CAP_DATADGST |
+				  CAP_DATA_PATH_OFFLOAD,
+	.param_mask		= ISCSI_MAX_RECV_DLENGTH |
+				  ISCSI_MAX_XMIT_DLENGTH |
+				  ISCSI_HDRDGST_EN |
+				  ISCSI_DATADGST_EN |
+				  ISCSI_INITIAL_R2T_EN |
+				  ISCSI_MAX_R2T |
+				  ISCSI_IMM_DATA_EN |
+				  ISCSI_FIRST_BURST |
+				  ISCSI_MAX_BURST |
+				  ISCSI_PDU_INORDER_EN |
+				  ISCSI_DATASEQ_INORDER_EN |
+				  ISCSI_ERL |
+				  ISCSI_CONN_PORT |
+				  ISCSI_CONN_ADDRESS |
+				  ISCSI_EXP_STATSN |
+				  ISCSI_PERSISTENT_PORT |
+				  ISCSI_PERSISTENT_ADDRESS |
+				  ISCSI_TARGET_NAME | ISCSI_TPGT |
+				  ISCSI_USERNAME | ISCSI_PASSWORD |
+				  ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN |
+				  ISCSI_FAST_ABORT | ISCSI_ABORT_TMO |
+				  ISCSI_LU_RESET_TMO |
+				  ISCSI_PING_TMO | ISCSI_RECV_TMO |
+				  ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME,
+	.host_param_mask	= ISCSI_HOST_HWADDRESS | ISCSI_HOST_NETDEV_NAME,
+	.create_session		= bnx2i_session_create,
+	.destroy_session	= bnx2i_session_destroy,
+	.create_conn		= bnx2i_conn_create,
+	.bind_conn		= bnx2i_conn_bind,
+	.destroy_conn		= bnx2i_conn_destroy,
+	.set_param		= iscsi_set_param,
+	.get_conn_param		= bnx2i_conn_get_param,
+	.get_session_param	= iscsi_session_get_param,
+	.get_host_param		= bnx2i_host_get_param,
+	.start_conn		= bnx2i_conn_start,
+	.stop_conn		= iscsi_conn_stop,
+	.send_pdu		= iscsi_conn_send_pdu,
+	.xmit_task		= bnx2i_task_xmit,
+	.get_stats		= bnx2i_conn_get_stats,
+	/* TCP connect - disconnect - option-2 interface calls */
+	.ep_connect		= bnx2i_ep_connect,
+	.ep_poll		= bnx2i_ep_poll,
+	.ep_disconnect		= bnx2i_ep_disconnect,
+	.set_path		= bnx2i_nl_set_path,
+	/* Error recovery timeout call */
+	.session_recovery_timedout = iscsi_session_recovery_timedout,
+	.cleanup_task		= bnx2i_cleanup_task,
+};
diff --git a/drivers/scsi/bnx2i/bnx2i_sysfs.c b/drivers/scsi/bnx2i/bnx2i_sysfs.c
new file mode 100644
index 0000000..96426b7
--- /dev/null
+++ b/drivers/scsi/bnx2i/bnx2i_sysfs.c
@@ -0,0 +1,142 @@
+/* bnx2i_sysfs.c: Broadcom NetXtreme II iSCSI driver.
+ *
+ * Copyright (c) 2004 - 2009 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Anil Veerabhadrappa (anilgv@broadcom.com)
+ */
+
+#include "bnx2i.h"
+
+/**
+ * bnx2i_dev_to_hba - maps dev pointer to adapter struct
+ * @dev:	device pointer
+ *
+ * Map device to hba structure
+ */
+static inline struct bnx2i_hba *bnx2i_dev_to_hba(struct device *dev)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	return iscsi_host_priv(shost);
+}
+
+
+/**
+ * bnx2i_show_sq_info - return(s currently configured send queue (SQ) size
+ * @dev:	device pointer
+ * @buf:	buffer to return current SQ size parameter
+ *
+ * Returns current SQ size parameter, this paramater determines the number
+ * outstanding iSCSI commands supported on a connection
+ */
+static ssize_t bnx2i_show_sq_info(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct bnx2i_hba *hba = bnx2i_dev_to_hba(dev);
+
+	return sprintf(buf, "0x%x\n", hba->max_sqes);
+}
+
+
+/**
+ * bnx2i_set_sq_info - update send queue (SQ) size parameter
+ * @dev:	device pointer
+ * @buf:	buffer to return current SQ size parameter
+ * @count:	parameter buffer size
+ *
+ * Interface for user to change shared queue size allocated for each conn
+ * Must be within SQ limits and a power of 2. For the latter this is needed
+ * because of how libiscsi preallocates tasks.
+ */
+static ssize_t bnx2i_set_sq_info(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct bnx2i_hba *hba = bnx2i_dev_to_hba(dev);
+	u32 val;
+	int max_sq_size;
+
+	if (hba->ofld_conns_active)
+		goto skip_config;
+
+	if (test_bit(BNX2I_NX2_DEV_57710, &hba->cnic_dev_type))
+		max_sq_size = BNX2I_5770X_SQ_WQES_MAX;
+	else
+		max_sq_size = BNX2I_570X_SQ_WQES_MAX;
+
+	if (sscanf(buf, " 0x%x ", &val) > 0) {
+		if ((val >= BNX2I_SQ_WQES_MIN) && (val <= max_sq_size) &&
+		    (is_power_of_2(val)))
+			hba->max_sqes = val;
+	}
+
+	return count;
+
+skip_config:
+	printk(KERN_ERR "bnx2i: device busy, cannot change SQ size\n");
+	return 0;
+}
+
+
+/**
+ * bnx2i_show_ccell_info - returns command cell (HQ) size
+ * @dev:	device pointer
+ * @buf:	buffer to return current SQ size parameter
+ *
+ * returns per-connection TCP history queue size parameter
+ */
+static ssize_t bnx2i_show_ccell_info(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct bnx2i_hba *hba = bnx2i_dev_to_hba(dev);
+
+	return sprintf(buf, "0x%x\n", hba->num_ccell);
+}
+
+
+/**
+ * bnx2i_get_link_state - set command cell (HQ) size
+ * @dev:	device pointer
+ * @buf:	buffer to return current SQ size parameter
+ * @count:	parameter buffer size
+ *
+ * updates per-connection TCP history queue size parameter
+ */
+static ssize_t bnx2i_set_ccell_info(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	u32 val;
+	struct bnx2i_hba *hba = bnx2i_dev_to_hba(dev);
+
+	if (hba->ofld_conns_active)
+		goto skip_config;
+
+	if (sscanf(buf, " 0x%x ", &val) > 0) {
+		if ((val >= BNX2I_CCELLS_MIN) &&
+		    (val <= BNX2I_CCELLS_MAX)) {
+			hba->num_ccell = val;
+		}
+	}
+
+	return count;
+
+skip_config:
+	printk(KERN_ERR "bnx2i: device busy, cannot change CCELL size\n");
+	return 0;
+}
+
+
+static DEVICE_ATTR(sq_size, S_IRUGO | S_IWUSR,
+		   bnx2i_show_sq_info, bnx2i_set_sq_info);
+static DEVICE_ATTR(num_ccell, S_IRUGO | S_IWUSR,
+		   bnx2i_show_ccell_info, bnx2i_set_ccell_info);
+
+struct device_attribute *bnx2i_dev_attributes[] = {
+	&dev_attr_sq_size,
+	&dev_attr_num_ccell,
+	NULL
+};
diff --git a/drivers/scsi/cxgb3i/cxgb3i.h b/drivers/scsi/cxgb3i/cxgb3i.h
index 59b0958d..e3133b5 100644
--- a/drivers/scsi/cxgb3i/cxgb3i.h
+++ b/drivers/scsi/cxgb3i/cxgb3i.h
@@ -144,7 +144,6 @@
 void cxgb3i_adapter_open(struct t3cdev *);
 void cxgb3i_adapter_close(struct t3cdev *);
 
-struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *);
 struct cxgb3i_hba *cxgb3i_hba_host_add(struct cxgb3i_adapter *,
 				       struct net_device *);
 void cxgb3i_hba_host_remove(struct cxgb3i_hba *);
diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
index 9212400..74369a3 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
+++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
@@ -13,6 +13,7 @@
 
 #include <linux/inet.h>
 #include <linux/crypto.h>
+#include <net/dst.h>
 #include <net/tcp.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
@@ -178,7 +179,7 @@
  * cxgb3i_hba_find_by_netdev - find the cxgb3i_hba structure via net_device
  * @t3dev: t3cdev adapter
  */
-struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev)
+static struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev)
 {
 	struct cxgb3i_adapter *snic;
 	int i;
@@ -261,20 +262,27 @@
 
 /**
  * cxgb3i_ep_connect - establish TCP connection to target portal
+ * @shost:		scsi host to use
  * @dst_addr:		target IP address
  * @non_blocking:	blocking or non-blocking call
  *
  * Initiates a TCP/IP connection to the dst_addr
  */
-static struct iscsi_endpoint *cxgb3i_ep_connect(struct sockaddr *dst_addr,
+static struct iscsi_endpoint *cxgb3i_ep_connect(struct Scsi_Host *shost,
+						struct sockaddr *dst_addr,
 						int non_blocking)
 {
 	struct iscsi_endpoint *ep;
 	struct cxgb3i_endpoint *cep;
-	struct cxgb3i_hba *hba;
+	struct cxgb3i_hba *hba = NULL;
 	struct s3_conn *c3cn = NULL;
 	int err = 0;
 
+	if (shost)
+		hba = iscsi_host_priv(shost);
+
+	cxgb3i_api_debug("shost 0x%p, hba 0x%p.\n", shost, hba);
+
 	c3cn = cxgb3i_c3cn_create();
 	if (!c3cn) {
 		cxgb3i_log_info("ep connect OOM.\n");
@@ -282,17 +290,27 @@
 		goto release_conn;
 	}
 
-	err = cxgb3i_c3cn_connect(c3cn, (struct sockaddr_in *)dst_addr);
+	err = cxgb3i_c3cn_connect(hba ? hba->ndev : NULL, c3cn,
+				 (struct sockaddr_in *)dst_addr);
 	if (err < 0) {
 		cxgb3i_log_info("ep connect failed.\n");
 		goto release_conn;
 	}
+
 	hba = cxgb3i_hba_find_by_netdev(c3cn->dst_cache->dev);
 	if (!hba) {
 		err = -ENOSPC;
 		cxgb3i_log_info("NOT going through cxgbi device.\n");
 		goto release_conn;
 	}
+
+	if (shost && hba != iscsi_host_priv(shost)) {
+		err = -ENOSPC;
+		cxgb3i_log_info("Could not connect through request host%u\n",
+				shost->host_no);
+		goto release_conn;
+	}
+
 	if (c3cn_is_closing(c3cn)) {
 		err = -ENOSPC;
 		cxgb3i_log_info("ep connect unable to connect.\n");
diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.c b/drivers/scsi/cxgb3i/cxgb3i_offload.c
index e11c9c1..c1d5be4 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_offload.c
+++ b/drivers/scsi/cxgb3i/cxgb3i_offload.c
@@ -1479,12 +1479,13 @@
 	return NULL;
 }
 
-static struct rtable *find_route(__be32 saddr, __be32 daddr,
+static struct rtable *find_route(struct net_device *dev,
+				 __be32 saddr, __be32 daddr,
 				 __be16 sport, __be16 dport)
 {
 	struct rtable *rt;
 	struct flowi fl = {
-		.oif = 0,
+		.oif = dev ? dev->ifindex : 0,
 		.nl_u = {
 			 .ip4_u = {
 				   .daddr = daddr,
@@ -1573,36 +1574,40 @@
  *
  * return 0 if active open request is sent, < 0 otherwise.
  */
-int cxgb3i_c3cn_connect(struct s3_conn *c3cn, struct sockaddr_in *usin)
+int cxgb3i_c3cn_connect(struct net_device *dev, struct s3_conn *c3cn,
+			struct sockaddr_in *usin)
 {
 	struct rtable *rt;
-	struct net_device *dev;
 	struct cxgb3i_sdev_data *cdata;
 	struct t3cdev *cdev;
 	__be32 sipv4;
 	int err;
 
+	c3cn_conn_debug("c3cn 0x%p, dev 0x%p.\n", c3cn, dev);
+
 	if (usin->sin_family != AF_INET)
 		return -EAFNOSUPPORT;
 
 	c3cn->daddr.sin_port = usin->sin_port;
 	c3cn->daddr.sin_addr.s_addr = usin->sin_addr.s_addr;
 
-	rt = find_route(c3cn->saddr.sin_addr.s_addr,
+	rt = find_route(dev, c3cn->saddr.sin_addr.s_addr,
 			c3cn->daddr.sin_addr.s_addr,
 			c3cn->saddr.sin_port,
 			c3cn->daddr.sin_port);
 	if (rt == NULL) {
-		c3cn_conn_debug("NO route to 0x%x, port %u.\n",
+		c3cn_conn_debug("NO route to 0x%x, port %u, dev %s.\n",
 				c3cn->daddr.sin_addr.s_addr,
-				ntohs(c3cn->daddr.sin_port));
+				ntohs(c3cn->daddr.sin_port),
+				dev ? dev->name : "any");
 		return -ENETUNREACH;
 	}
 
 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
-		c3cn_conn_debug("multi-cast route to 0x%x, port %u.\n",
+		c3cn_conn_debug("multi-cast route to 0x%x, port %u, dev %s.\n",
 				c3cn->daddr.sin_addr.s_addr,
-				ntohs(c3cn->daddr.sin_port));
+				ntohs(c3cn->daddr.sin_port),
+				dev ? dev->name : "any");
 		ip_rt_put(rt);
 		return -ENETUNREACH;
 	}
diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.h b/drivers/scsi/cxgb3i/cxgb3i_offload.h
index ebfca96..6a1d86b 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_offload.h
+++ b/drivers/scsi/cxgb3i/cxgb3i_offload.h
@@ -169,7 +169,8 @@
 void cxgb3i_sdev_remove(struct t3cdev *);
 
 struct s3_conn *cxgb3i_c3cn_create(void);
-int cxgb3i_c3cn_connect(struct s3_conn *, struct sockaddr_in *);
+int cxgb3i_c3cn_connect(struct net_device *, struct s3_conn *,
+			struct sockaddr_in *);
 void cxgb3i_c3cn_rx_credits(struct s3_conn *, int);
 int cxgb3i_c3cn_send_pdus(struct s3_conn *, struct sk_buff *);
 void cxgb3i_c3cn_release(struct s3_conn *);
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index 43b8c51..fd0544f 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -561,6 +561,12 @@
 	struct rdac_dh_data *h = get_rdac_data(sdev);
 	switch (sense_hdr->sense_key) {
 	case NOT_READY:
+		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x01)
+			/* LUN Not Ready - Logical Unit Not Ready and is in
+			* the process of becoming ready
+			* Just retry.
+			*/
+			return ADD_TO_MLQUEUE;
 		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x81)
 			/* LUN Not Ready - Storage firmware incompatible
 			 * Manual code synchonisation required.
diff --git a/drivers/scsi/eata.c b/drivers/scsi/eata.c
index be5099d..c7076ce 100644
--- a/drivers/scsi/eata.c
+++ b/drivers/scsi/eata.c
@@ -1825,7 +1825,7 @@
 	if (linked_comm && SCpnt->device->queue_depth > 2
 	    && TLDEV(SCpnt->device->type)) {
 		ha->cp_stat[i] = READY;
-		flush_dev(SCpnt->device, SCpnt->request->sector, ha, 0);
+		flush_dev(SCpnt->device, blk_rq_pos(SCpnt->request), ha, 0);
 		return 0;
 	}
 
@@ -2144,13 +2144,13 @@
 		if (!cpp->din)
 			input_only = 0;
 
-		if (SCpnt->request->sector < minsec)
-			minsec = SCpnt->request->sector;
-		if (SCpnt->request->sector > maxsec)
-			maxsec = SCpnt->request->sector;
+		if (blk_rq_pos(SCpnt->request) < minsec)
+			minsec = blk_rq_pos(SCpnt->request);
+		if (blk_rq_pos(SCpnt->request) > maxsec)
+			maxsec = blk_rq_pos(SCpnt->request);
 
-		sl[n] = SCpnt->request->sector;
-		ioseek += SCpnt->request->nr_sectors;
+		sl[n] = blk_rq_pos(SCpnt->request);
+		ioseek += blk_rq_sectors(SCpnt->request);
 
 		if (!n)
 			continue;
@@ -2190,7 +2190,7 @@
 			k = il[n];
 			cpp = &ha->cp[k];
 			SCpnt = cpp->SCpnt;
-			ll[n] = SCpnt->request->nr_sectors;
+			ll[n] = blk_rq_sectors(SCpnt->request);
 			pl[n] = SCpnt->serial_number;
 
 			if (!n)
@@ -2236,12 +2236,12 @@
 			cpp = &ha->cp[k];
 			SCpnt = cpp->SCpnt;
 			scmd_printk(KERN_INFO, SCpnt,
-			    "%s pid %ld mb %d fc %d nr %d sec %ld ns %ld"
+			    "%s pid %ld mb %d fc %d nr %d sec %ld ns %u"
 			     " cur %ld s:%c r:%c rev:%c in:%c ov:%c xd %d.\n",
 			     (ihdlr ? "ihdlr" : "qcomm"),
 			     SCpnt->serial_number, k, flushcount,
-			     n_ready, SCpnt->request->sector,
-			     SCpnt->request->nr_sectors, cursec, YESNO(s),
+			     n_ready, blk_rq_pos(SCpnt->request),
+			     blk_rq_sectors(SCpnt->request), cursec, YESNO(s),
 			     YESNO(r), YESNO(rev), YESNO(input_only),
 			     YESNO(overlap), cpp->din);
 		}
@@ -2408,7 +2408,7 @@
 
 	if (linked_comm && SCpnt->device->queue_depth > 2
 	    && TLDEV(SCpnt->device->type))
-		flush_dev(SCpnt->device, SCpnt->request->sector, ha, 1);
+		flush_dev(SCpnt->device, blk_rq_pos(SCpnt->request), ha, 1);
 
 	tstatus = status_byte(spp->target_status);
 
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 03e1926..e606b48 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -54,7 +54,6 @@
 /* fcoe host list */
 LIST_HEAD(fcoe_hostlist);
 DEFINE_RWLOCK(fcoe_hostlist_lock);
-DEFINE_TIMER(fcoe_timer, NULL, 0, 0);
 DEFINE_PER_CPU(struct fcoe_percpu_s, fcoe_percpu);
 
 /* Function Prototypes */
@@ -71,7 +70,7 @@
 static int fcoe_hostlist_add(const struct fc_lport *);
 static int fcoe_hostlist_remove(const struct fc_lport *);
 
-static int fcoe_check_wait_queue(struct fc_lport *);
+static void fcoe_check_wait_queue(struct fc_lport *, struct sk_buff *);
 static int fcoe_device_notification(struct notifier_block *, ulong, void *);
 static void fcoe_dev_setup(void);
 static void fcoe_dev_cleanup(void);
@@ -146,6 +145,7 @@
 	lp->link_up = 0;
 	lp->qfull = 0;
 	lp->max_retry_count = 3;
+	lp->max_rport_retry_count = 3;
 	lp->e_d_tov = 2 * 1000;	/* FC-FS default */
 	lp->r_a_tov = 2 * 2 * 1000;
 	lp->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS |
@@ -167,6 +167,18 @@
 }
 
 /**
+ * fcoe_queue_timer() - fcoe queue timer
+ * @lp: the fc_lport pointer
+ *
+ * Calls fcoe_check_wait_queue on timeout
+ *
+ */
+static void fcoe_queue_timer(ulong lp)
+{
+	fcoe_check_wait_queue((struct fc_lport *)lp, NULL);
+}
+
+/**
  * fcoe_netdev_config() - Set up netdev for SW FCoE
  * @lp : ptr to the fc_lport
  * @netdev : ptr to the associated netdevice struct
@@ -236,6 +248,7 @@
 	}
 	skb_queue_head_init(&fc->fcoe_pending_queue);
 	fc->fcoe_pending_queue_active = 0;
+	setup_timer(&fc->timer, fcoe_queue_timer, (unsigned long)lp);
 
 	/* setup Source Mac Address */
 	memcpy(fc->ctlr.ctl_src_addr, fc->real_dev->dev_addr,
@@ -386,6 +399,9 @@
 	/* Free existing skbs */
 	fcoe_clean_pending_queue(lp);
 
+	/* Stop the timer */
+	del_timer_sync(&fc->timer);
+
 	/* Free memory used by statistical counters */
 	fc_lport_free_stats(lp);
 
@@ -988,7 +1004,7 @@
  */
 int fcoe_xmit(struct fc_lport *lp, struct fc_frame *fp)
 {
-	int wlen, rc = 0;
+	int wlen;
 	u32 crc;
 	struct ethhdr *eh;
 	struct fcoe_crc_eof *cp;
@@ -1021,8 +1037,7 @@
 	sof = fr_sof(fp);
 	eof = fr_eof(fp);
 
-	elen = (fc->real_dev->priv_flags & IFF_802_1Q_VLAN) ?
-		sizeof(struct vlan_ethhdr) : sizeof(struct ethhdr);
+	elen = sizeof(struct ethhdr);
 	hlen = sizeof(struct fcoe_hdr);
 	tlen = sizeof(struct fcoe_crc_eof);
 	wlen = (skb->len - tlen + sizeof(crc)) / FCOE_WORD_TO_BYTE;
@@ -1107,18 +1122,9 @@
 	/* send down to lld */
 	fr_dev(fp) = lp;
 	if (fc->fcoe_pending_queue.qlen)
-		rc = fcoe_check_wait_queue(lp);
-
-	if (rc == 0)
-		rc = fcoe_start_io(skb);
-
-	if (rc) {
-		spin_lock_bh(&fc->fcoe_pending_queue.lock);
-		__skb_queue_tail(&fc->fcoe_pending_queue, skb);
-		spin_unlock_bh(&fc->fcoe_pending_queue.lock);
-		if (fc->fcoe_pending_queue.qlen > FCOE_MAX_QUEUE_DEPTH)
-			lp->qfull = 1;
-	}
+		fcoe_check_wait_queue(lp, skb);
+	else if (fcoe_start_io(skb))
+		fcoe_check_wait_queue(lp, skb);
 
 	return 0;
 }
@@ -1268,32 +1274,6 @@
 }
 
 /**
- * fcoe_watchdog() - fcoe timer callback
- * @vp:
- *
- * This checks the pending queue length for fcoe and set lport qfull
- * if the FCOE_MAX_QUEUE_DEPTH is reached. This is done for all fc_lport on the
- * fcoe_hostlist.
- *
- * Returns: 0 for success
- */
-void fcoe_watchdog(ulong vp)
-{
-	struct fcoe_softc *fc;
-
-	read_lock(&fcoe_hostlist_lock);
-	list_for_each_entry(fc, &fcoe_hostlist, list) {
-		if (fc->ctlr.lp)
-			fcoe_check_wait_queue(fc->ctlr.lp);
-	}
-	read_unlock(&fcoe_hostlist_lock);
-
-	fcoe_timer.expires = jiffies + (1 * HZ);
-	add_timer(&fcoe_timer);
-}
-
-
-/**
  * fcoe_check_wait_queue() - attempt to clear the transmit backlog
  * @lp: the fc_lport
  *
@@ -1305,16 +1285,17 @@
  * The wait_queue is used when the skb transmit fails. skb will go
  * in the wait_queue which will be emptied by the timer function or
  * by the next skb transmit.
- *
- * Returns: 0 for success
  */
-static int fcoe_check_wait_queue(struct fc_lport *lp)
+static void fcoe_check_wait_queue(struct fc_lport *lp, struct sk_buff *skb)
 {
 	struct fcoe_softc *fc = lport_priv(lp);
-	struct sk_buff *skb;
-	int rc = -1;
+	int rc;
 
 	spin_lock_bh(&fc->fcoe_pending_queue.lock);
+
+	if (skb)
+		__skb_queue_tail(&fc->fcoe_pending_queue, skb);
+
 	if (fc->fcoe_pending_queue_active)
 		goto out;
 	fc->fcoe_pending_queue_active = 1;
@@ -1340,23 +1321,26 @@
 
 	if (fc->fcoe_pending_queue.qlen < FCOE_LOW_QUEUE_DEPTH)
 		lp->qfull = 0;
+	if (fc->fcoe_pending_queue.qlen && !timer_pending(&fc->timer))
+		mod_timer(&fc->timer, jiffies + 2);
 	fc->fcoe_pending_queue_active = 0;
-	rc = fc->fcoe_pending_queue.qlen;
 out:
+	if (fc->fcoe_pending_queue.qlen > FCOE_MAX_QUEUE_DEPTH)
+		lp->qfull = 1;
 	spin_unlock_bh(&fc->fcoe_pending_queue.lock);
-	return rc;
+	return;
 }
 
 /**
  * fcoe_dev_setup() - setup link change notification interface
  */
-static void fcoe_dev_setup()
+static void fcoe_dev_setup(void)
 {
 	register_netdevice_notifier(&fcoe_notifier);
 }
 
 /**
- * fcoe_dev_setup() - cleanup link change notification interface
+ * fcoe_dev_cleanup() - cleanup link change notification interface
  */
 static void fcoe_dev_cleanup(void)
 {
@@ -1815,10 +1799,6 @@
 	/* Setup link change notification */
 	fcoe_dev_setup();
 
-	setup_timer(&fcoe_timer, fcoe_watchdog, 0);
-
-	mod_timer(&fcoe_timer, jiffies + (10 * HZ));
-
 	fcoe_if_init();
 
 	return 0;
@@ -1844,9 +1824,6 @@
 
 	fcoe_dev_cleanup();
 
-	/* Stop the timer */
-	del_timer_sync(&fcoe_timer);
-
 	/* releases the associated fcoe hosts */
 	list_for_each_entry_safe(fc, tmp, &fcoe_hostlist, list)
 		fcoe_if_destroy(fc->real_dev);
diff --git a/drivers/scsi/fcoe/fcoe.h b/drivers/scsi/fcoe/fcoe.h
index 917aae8..a1eb8c1 100644
--- a/drivers/scsi/fcoe/fcoe.h
+++ b/drivers/scsi/fcoe/fcoe.h
@@ -61,6 +61,7 @@
 	struct packet_type  fip_packet_type;
 	struct sk_buff_head fcoe_pending_queue;
 	u8	fcoe_pending_queue_active;
+	struct timer_list timer;		/* queue timer */
 	struct fcoe_ctlr ctlr;
 };
 
diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c
index 62ba0f3..9294118 100644
--- a/drivers/scsi/fcoe/libfcoe.c
+++ b/drivers/scsi/fcoe/libfcoe.c
@@ -213,7 +213,7 @@
 	sol->desc.size.fd_size = htons(fcoe_size);
 
 	skb_put(skb, sizeof(*sol));
-	skb->protocol = htons(ETH_P_802_3);
+	skb->protocol = htons(ETH_P_FIP);
 	skb_reset_mac_header(skb);
 	skb_reset_network_header(skb);
 	fip->send(fip, skb);
@@ -365,7 +365,7 @@
 	}
 
 	skb_put(skb, len);
-	skb->protocol = htons(ETH_P_802_3);
+	skb->protocol = htons(ETH_P_FIP);
 	skb_reset_mac_header(skb);
 	skb_reset_network_header(skb);
 	fip->send(fip, skb);
@@ -424,7 +424,7 @@
 	if (dtype != ELS_FLOGI)
 		memcpy(mac->fd_mac, fip->data_src_addr, ETH_ALEN);
 
-	skb->protocol = htons(ETH_P_802_3);
+	skb->protocol = htons(ETH_P_FIP);
 	skb_reset_mac_header(skb);
 	skb_reset_network_header(skb);
 	return 0;
@@ -447,14 +447,10 @@
 	u16 old_xid;
 	u8 op;
 
-	if (fip->state == FIP_ST_NON_FIP)
-		return 0;
-
 	fh = (struct fc_frame_header *)skb->data;
 	op = *(u8 *)(fh + 1);
 
-	switch (op) {
-	case ELS_FLOGI:
+	if (op == ELS_FLOGI) {
 		old_xid = fip->flogi_oxid;
 		fip->flogi_oxid = ntohs(fh->fh_ox_id);
 		if (fip->state == FIP_ST_AUTO) {
@@ -466,6 +462,15 @@
 			fip->map_dest = 1;
 			return 0;
 		}
+		if (fip->state == FIP_ST_NON_FIP)
+			fip->map_dest = 1;
+	}
+
+	if (fip->state == FIP_ST_NON_FIP)
+		return 0;
+
+	switch (op) {
+	case ELS_FLOGI:
 		op = FIP_DT_FLOGI;
 		break;
 	case ELS_FDISC:
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
index 32ef6b8..a840728 100644
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -680,6 +680,7 @@
 	}
 
 	lp->max_retry_count = fnic->config.flogi_retries;
+	lp->max_rport_retry_count = fnic->config.plogi_retries;
 	lp->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS |
 			      FCP_SPPF_CONF_COMPL);
 	if (fnic->config.flags & VFCF_FCP_SEQ_LVL_ERR)
diff --git a/drivers/scsi/gdth_proc.c b/drivers/scsi/gdth_proc.c
index 59349a3..1258da3 100644
--- a/drivers/scsi/gdth_proc.c
+++ b/drivers/scsi/gdth_proc.c
@@ -152,6 +152,7 @@
                          struct Scsi_Host *host, gdth_ha_str *ha)
 {
     int size = 0,len = 0;
+    int hlen;
     off_t begin = 0,pos = 0;
     int id, i, j, k, sec, flag;
     int no_mdrv = 0, drv_no, is_mirr;
@@ -192,11 +193,11 @@
     if (reserve_list[0] == 0xff)
         strcpy(hrec, "--");
     else {
-        sprintf(hrec, "%d", reserve_list[0]);
+        hlen = sprintf(hrec, "%d", reserve_list[0]);
         for (i = 1;  i < MAX_RES_ARGS; i++) {
             if (reserve_list[i] == 0xff) 
                 break;
-            sprintf(hrec,"%s,%d", hrec, reserve_list[i]);
+            hlen += snprintf(hrec + hlen , 161 - hlen, ",%d", reserve_list[i]);
         }
     }
     size = sprintf(buffer+len,
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index ea4abee..b4b805e 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -110,7 +110,7 @@
 	{ IBMVFC_FABRIC_MAPPED, IBMVFC_XPORT_DEAD, DID_ERROR, 0, 1, "transport dead" },
 	{ IBMVFC_FABRIC_MAPPED, IBMVFC_CONFIG_ERROR, DID_ERROR, 1, 1, "configuration error" },
 	{ IBMVFC_FABRIC_MAPPED, IBMVFC_NAME_SERVER_FAIL, DID_ERROR, 1, 1, "name server failure" },
-	{ IBMVFC_FABRIC_MAPPED, IBMVFC_LINK_HALTED, DID_REQUEUE, 0, 0, "link halted" },
+	{ IBMVFC_FABRIC_MAPPED, IBMVFC_LINK_HALTED, DID_REQUEUE, 1, 0, "link halted" },
 	{ IBMVFC_FABRIC_MAPPED, IBMVFC_XPORT_GENERAL, DID_OK, 1, 0, "general transport error" },
 
 	{ IBMVFC_VIOS_FAILURE, IBMVFC_CRQ_FAILURE, DID_REQUEUE, 1, 1, "CRQ failure" },
@@ -143,6 +143,7 @@
 static void ibmvfc_tgt_send_prli(struct ibmvfc_target *);
 static void ibmvfc_tgt_send_plogi(struct ibmvfc_target *);
 static void ibmvfc_tgt_query_target(struct ibmvfc_target *);
+static void ibmvfc_npiv_logout(struct ibmvfc_host *);
 
 static const char *unknown_error = "unknown error";
 
@@ -275,7 +276,7 @@
 	int fc_rsp_len = rsp->fcp_rsp_len;
 
 	if ((rsp->flags & FCP_RSP_LEN_VALID) &&
-	    ((!fc_rsp_len && fc_rsp_len != 4 && fc_rsp_len != 8) ||
+	    ((fc_rsp_len && fc_rsp_len != 4 && fc_rsp_len != 8) ||
 	     rsp->data.info.rsp_code))
 		return DID_ERROR << 16;
 
@@ -431,6 +432,8 @@
 	case IBMVFC_TGT_ACTION_DEL_RPORT:
 		break;
 	default:
+		if (action == IBMVFC_TGT_ACTION_DEL_RPORT)
+			tgt->add_rport = 0;
 		tgt->action = action;
 		break;
 	}
@@ -475,6 +478,10 @@
 		if (vhost->action == IBMVFC_HOST_ACTION_INIT_WAIT)
 			vhost->action = action;
 		break;
+	case IBMVFC_HOST_ACTION_LOGO_WAIT:
+		if (vhost->action == IBMVFC_HOST_ACTION_LOGO)
+			vhost->action = action;
+		break;
 	case IBMVFC_HOST_ACTION_INIT_WAIT:
 		if (vhost->action == IBMVFC_HOST_ACTION_INIT)
 			vhost->action = action;
@@ -483,7 +490,7 @@
 		switch (vhost->action) {
 		case IBMVFC_HOST_ACTION_INIT_WAIT:
 		case IBMVFC_HOST_ACTION_NONE:
-		case IBMVFC_HOST_ACTION_TGT_ADD:
+		case IBMVFC_HOST_ACTION_TGT_DEL_FAILED:
 			vhost->action = action;
 			break;
 		default:
@@ -494,11 +501,11 @@
 		if (vhost->action == IBMVFC_HOST_ACTION_ALLOC_TGTS)
 			vhost->action = action;
 		break;
+	case IBMVFC_HOST_ACTION_LOGO:
 	case IBMVFC_HOST_ACTION_INIT:
 	case IBMVFC_HOST_ACTION_TGT_DEL:
 	case IBMVFC_HOST_ACTION_QUERY_TGTS:
 	case IBMVFC_HOST_ACTION_TGT_DEL_FAILED:
-	case IBMVFC_HOST_ACTION_TGT_ADD:
 	case IBMVFC_HOST_ACTION_NONE:
 	default:
 		vhost->action = action;
@@ -576,7 +583,7 @@
 		}
 
 		list_for_each_entry(tgt, &vhost->targets, queue)
-			tgt->need_login = 1;
+			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
 		scsi_block_requests(vhost->host);
 		ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT);
 		vhost->job_step = ibmvfc_npiv_login;
@@ -646,6 +653,7 @@
 	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 
 	vhost->state = IBMVFC_NO_CRQ;
+	vhost->logged_in = 0;
 	dma_unmap_single(vhost->dev, crq->msg_token, PAGE_SIZE, DMA_BIDIRECTIONAL);
 	free_page((unsigned long)crq->msgs);
 }
@@ -692,6 +700,7 @@
 	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 
 	vhost->state = IBMVFC_NO_CRQ;
+	vhost->logged_in = 0;
 	ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_NONE);
 
 	/* Clean out the queue */
@@ -807,10 +816,10 @@
 }
 
 /**
- * __ibmvfc_reset_host - Reset the connection to the server (no locking)
+ * ibmvfc_hard_reset_host - Reset the connection to the server by breaking the CRQ
  * @vhost:	struct ibmvfc host to reset
  **/
-static void __ibmvfc_reset_host(struct ibmvfc_host *vhost)
+static void ibmvfc_hard_reset_host(struct ibmvfc_host *vhost)
 {
 	int rc;
 
@@ -826,9 +835,25 @@
 }
 
 /**
- * ibmvfc_reset_host - Reset the connection to the server
+ * __ibmvfc_reset_host - Reset the connection to the server (no locking)
  * @vhost:	struct ibmvfc host to reset
  **/
+static void __ibmvfc_reset_host(struct ibmvfc_host *vhost)
+{
+	if (vhost->logged_in && vhost->action != IBMVFC_HOST_ACTION_LOGO_WAIT &&
+	    !ibmvfc_set_host_state(vhost, IBMVFC_INITIALIZING)) {
+		scsi_block_requests(vhost->host);
+		ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_LOGO);
+		vhost->job_step = ibmvfc_npiv_logout;
+		wake_up(&vhost->work_wait_q);
+	} else
+		ibmvfc_hard_reset_host(vhost);
+}
+
+/**
+ * ibmvfc_reset_host - Reset the connection to the server
+ * @vhost:	ibmvfc host struct
+ **/
 static void ibmvfc_reset_host(struct ibmvfc_host *vhost)
 {
 	unsigned long flags;
@@ -842,9 +867,13 @@
  * ibmvfc_retry_host_init - Retry host initialization if allowed
  * @vhost:	ibmvfc host struct
  *
+ * Returns: 1 if init will be retried / 0 if not
+ *
  **/
-static void ibmvfc_retry_host_init(struct ibmvfc_host *vhost)
+static int ibmvfc_retry_host_init(struct ibmvfc_host *vhost)
 {
+	int retry = 0;
+
 	if (vhost->action == IBMVFC_HOST_ACTION_INIT_WAIT) {
 		vhost->delay_init = 1;
 		if (++vhost->init_retries > IBMVFC_MAX_HOST_INIT_RETRIES) {
@@ -853,11 +882,14 @@
 			ibmvfc_link_down(vhost, IBMVFC_HOST_OFFLINE);
 		} else if (vhost->init_retries == IBMVFC_MAX_HOST_INIT_RETRIES)
 			__ibmvfc_reset_host(vhost);
-		else
+		else {
 			ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT);
+			retry = 1;
+		}
 	}
 
 	wake_up(&vhost->work_wait_q);
+	return retry;
 }
 
 /**
@@ -1137,8 +1169,9 @@
 	login_info->partition_num = vhost->partition_number;
 	login_info->vfc_frame_version = 1;
 	login_info->fcp_version = 3;
+	login_info->flags = IBMVFC_FLUSH_ON_HALT;
 	if (vhost->client_migrated)
-		login_info->flags = IBMVFC_CLIENT_MIGRATED;
+		login_info->flags |= IBMVFC_CLIENT_MIGRATED;
 
 	login_info->max_cmds = max_requests + IBMVFC_NUM_INTERNAL_REQ;
 	login_info->capabilities = IBMVFC_CAN_MIGRATE;
@@ -1452,6 +1485,27 @@
 }
 
 /**
+ * ibmvfc_relogin - Log back into the specified device
+ * @sdev:	scsi device struct
+ *
+ **/
+static void ibmvfc_relogin(struct scsi_device *sdev)
+{
+	struct ibmvfc_host *vhost = shost_priv(sdev->host);
+	struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
+	struct ibmvfc_target *tgt;
+
+	list_for_each_entry(tgt, &vhost->targets, queue) {
+		if (rport == tgt->rport) {
+			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
+			break;
+		}
+	}
+
+	ibmvfc_reinit_host(vhost);
+}
+
+/**
  * ibmvfc_scsi_done - Handle responses from commands
  * @evt:	ibmvfc event to be handled
  *
@@ -1483,7 +1537,7 @@
 			if ((rsp->flags & FCP_SNS_LEN_VALID) && rsp->fcp_sense_len && rsp_len <= 8)
 				memcpy(cmnd->sense_buffer, rsp->data.sense + rsp_len, sense_len);
 			if ((vfc_cmd->status & IBMVFC_VIOS_FAILURE) && (vfc_cmd->error == IBMVFC_PLOGI_REQUIRED))
-				ibmvfc_reinit_host(evt->vhost);
+				ibmvfc_relogin(cmnd->device);
 
 			if (!cmnd->result && (!scsi_get_resid(cmnd) || (rsp->flags & FCP_RESID_OVER)))
 				cmnd->result = (DID_ERROR << 16);
@@ -2148,13 +2202,31 @@
 				struct ibmvfc_host *vhost)
 {
 	const char *desc = ibmvfc_get_ae_desc(crq->event);
+	struct ibmvfc_target *tgt;
 
 	ibmvfc_log(vhost, 3, "%s event received. scsi_id: %llx, wwpn: %llx,"
 		   " node_name: %llx\n", desc, crq->scsi_id, crq->wwpn, crq->node_name);
 
 	switch (crq->event) {
-	case IBMVFC_AE_LINK_UP:
 	case IBMVFC_AE_RESUME:
+		switch (crq->link_state) {
+		case IBMVFC_AE_LS_LINK_DOWN:
+			ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN);
+			break;
+		case IBMVFC_AE_LS_LINK_DEAD:
+			ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
+			break;
+		case IBMVFC_AE_LS_LINK_UP:
+		case IBMVFC_AE_LS_LINK_BOUNCED:
+		default:
+			vhost->events_to_log |= IBMVFC_AE_LINKUP;
+			vhost->delay_init = 1;
+			__ibmvfc_reset_host(vhost);
+			break;
+		};
+
+		break;
+	case IBMVFC_AE_LINK_UP:
 		vhost->events_to_log |= IBMVFC_AE_LINKUP;
 		vhost->delay_init = 1;
 		__ibmvfc_reset_host(vhost);
@@ -2168,9 +2240,23 @@
 	case IBMVFC_AE_SCN_NPORT:
 	case IBMVFC_AE_SCN_GROUP:
 		vhost->events_to_log |= IBMVFC_AE_RSCN;
+		ibmvfc_reinit_host(vhost);
+		break;
 	case IBMVFC_AE_ELS_LOGO:
 	case IBMVFC_AE_ELS_PRLO:
 	case IBMVFC_AE_ELS_PLOGI:
+		list_for_each_entry(tgt, &vhost->targets, queue) {
+			if (!crq->scsi_id && !crq->wwpn && !crq->node_name)
+				break;
+			if (crq->scsi_id && tgt->scsi_id != crq->scsi_id)
+				continue;
+			if (crq->wwpn && tgt->ids.port_name != crq->wwpn)
+				continue;
+			if (crq->node_name && tgt->ids.node_name != crq->node_name)
+				continue;
+			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
+		}
+
 		ibmvfc_reinit_host(vhost);
 		break;
 	case IBMVFC_AE_LINK_DOWN:
@@ -2222,6 +2308,7 @@
 		return;
 	case IBMVFC_CRQ_XPORT_EVENT:
 		vhost->state = IBMVFC_NO_CRQ;
+		vhost->logged_in = 0;
 		ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_NONE);
 		if (crq->format == IBMVFC_PARTITION_MIGRATED) {
 			/* We need to re-setup the interpartition connection */
@@ -2299,7 +2386,7 @@
 		done = 1;
 	}
 
-	if (vhost->state != IBMVFC_NO_CRQ && vhost->action == IBMVFC_HOST_ACTION_NONE)
+	if (vhost->scan_complete)
 		done = 1;
 	spin_unlock_irqrestore(shost->host_lock, flags);
 	return done;
@@ -2434,14 +2521,6 @@
 			vhost->login_buf->resp.partition_name);
 }
 
-static struct device_attribute ibmvfc_host_partition_name = {
-	.attr = {
-		.name = "partition_name",
-		.mode = S_IRUGO,
-	},
-	.show = ibmvfc_show_host_partition_name,
-};
-
 static ssize_t ibmvfc_show_host_device_name(struct device *dev,
 					    struct device_attribute *attr, char *buf)
 {
@@ -2452,14 +2531,6 @@
 			vhost->login_buf->resp.device_name);
 }
 
-static struct device_attribute ibmvfc_host_device_name = {
-	.attr = {
-		.name = "device_name",
-		.mode = S_IRUGO,
-	},
-	.show = ibmvfc_show_host_device_name,
-};
-
 static ssize_t ibmvfc_show_host_loc_code(struct device *dev,
 					 struct device_attribute *attr, char *buf)
 {
@@ -2470,14 +2541,6 @@
 			vhost->login_buf->resp.port_loc_code);
 }
 
-static struct device_attribute ibmvfc_host_loc_code = {
-	.attr = {
-		.name = "port_loc_code",
-		.mode = S_IRUGO,
-	},
-	.show = ibmvfc_show_host_loc_code,
-};
-
 static ssize_t ibmvfc_show_host_drc_name(struct device *dev,
 					 struct device_attribute *attr, char *buf)
 {
@@ -2488,14 +2551,6 @@
 			vhost->login_buf->resp.drc_name);
 }
 
-static struct device_attribute ibmvfc_host_drc_name = {
-	.attr = {
-		.name = "drc_name",
-		.mode = S_IRUGO,
-	},
-	.show = ibmvfc_show_host_drc_name,
-};
-
 static ssize_t ibmvfc_show_host_npiv_version(struct device *dev,
 					     struct device_attribute *attr, char *buf)
 {
@@ -2504,13 +2559,13 @@
 	return snprintf(buf, PAGE_SIZE, "%d\n", vhost->login_buf->resp.version);
 }
 
-static struct device_attribute ibmvfc_host_npiv_version = {
-	.attr = {
-		.name = "npiv_version",
-		.mode = S_IRUGO,
-	},
-	.show = ibmvfc_show_host_npiv_version,
-};
+static ssize_t ibmvfc_show_host_capabilities(struct device *dev,
+					     struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct ibmvfc_host *vhost = shost_priv(shost);
+	return snprintf(buf, PAGE_SIZE, "%llx\n", vhost->login_buf->resp.capabilities);
+}
 
 /**
  * ibmvfc_show_log_level - Show the adapter's error logging level
@@ -2556,14 +2611,14 @@
 	return strlen(buf);
 }
 
-static struct device_attribute ibmvfc_log_level_attr = {
-	.attr = {
-		.name =		"log_level",
-		.mode =		S_IRUGO | S_IWUSR,
-	},
-	.show = ibmvfc_show_log_level,
-	.store = ibmvfc_store_log_level
-};
+static DEVICE_ATTR(partition_name, S_IRUGO, ibmvfc_show_host_partition_name, NULL);
+static DEVICE_ATTR(device_name, S_IRUGO, ibmvfc_show_host_device_name, NULL);
+static DEVICE_ATTR(port_loc_code, S_IRUGO, ibmvfc_show_host_loc_code, NULL);
+static DEVICE_ATTR(drc_name, S_IRUGO, ibmvfc_show_host_drc_name, NULL);
+static DEVICE_ATTR(npiv_version, S_IRUGO, ibmvfc_show_host_npiv_version, NULL);
+static DEVICE_ATTR(capabilities, S_IRUGO, ibmvfc_show_host_capabilities, NULL);
+static DEVICE_ATTR(log_level, S_IRUGO | S_IWUSR,
+		   ibmvfc_show_log_level, ibmvfc_store_log_level);
 
 #ifdef CONFIG_SCSI_IBMVFC_TRACE
 /**
@@ -2612,12 +2667,13 @@
 #endif
 
 static struct device_attribute *ibmvfc_attrs[] = {
-	&ibmvfc_host_partition_name,
-	&ibmvfc_host_device_name,
-	&ibmvfc_host_loc_code,
-	&ibmvfc_host_drc_name,
-	&ibmvfc_host_npiv_version,
-	&ibmvfc_log_level_attr,
+	&dev_attr_partition_name,
+	&dev_attr_device_name,
+	&dev_attr_port_loc_code,
+	&dev_attr_drc_name,
+	&dev_attr_npiv_version,
+	&dev_attr_capabilities,
+	&dev_attr_log_level,
 	NULL
 };
 
@@ -2774,15 +2830,19 @@
  * @tgt:		ibmvfc target struct
  * @job_step:	initialization job step
  *
+ * Returns: 1 if step will be retried / 0 if not
+ *
  **/
-static void ibmvfc_retry_tgt_init(struct ibmvfc_target *tgt,
+static int ibmvfc_retry_tgt_init(struct ibmvfc_target *tgt,
 				  void (*job_step) (struct ibmvfc_target *))
 {
 	if (++tgt->init_retries > IBMVFC_MAX_TGT_INIT_RETRIES) {
 		ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
 		wake_up(&tgt->vhost->work_wait_q);
+		return 0;
 	} else
 		ibmvfc_init_tgt(tgt, job_step);
+	return 1;
 }
 
 /* Defined in FC-LS */
@@ -2831,7 +2891,7 @@
 	struct ibmvfc_process_login *rsp = &evt->xfer_iu->prli;
 	struct ibmvfc_prli_svc_parms *parms = &rsp->parms;
 	u32 status = rsp->common.status;
-	int index;
+	int index, level = IBMVFC_DEFAULT_LOG_LEVEL;
 
 	vhost->discovery_threads--;
 	ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE);
@@ -2850,7 +2910,7 @@
 						tgt->ids.roles |= FC_PORT_ROLE_FCP_TARGET;
 					if (parms->service_parms & IBMVFC_PRLI_INITIATOR_FUNC)
 						tgt->ids.roles |= FC_PORT_ROLE_FCP_INITIATOR;
-					ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_ADD_RPORT);
+					tgt->add_rport = 1;
 				} else
 					ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
 			} else if (prli_rsp[index].retry)
@@ -2867,13 +2927,14 @@
 		break;
 	case IBMVFC_MAD_FAILED:
 	default:
-		tgt_err(tgt, "Process Login failed: %s (%x:%x) rc=0x%02X\n",
-			ibmvfc_get_cmd_error(rsp->status, rsp->error),
-			rsp->status, rsp->error, status);
 		if (ibmvfc_retry_cmd(rsp->status, rsp->error))
-			ibmvfc_retry_tgt_init(tgt, ibmvfc_tgt_send_prli);
+			level += ibmvfc_retry_tgt_init(tgt, ibmvfc_tgt_send_prli);
 		else
 			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
+
+		tgt_log(tgt, level, "Process Login failed: %s (%x:%x) rc=0x%02X\n",
+			ibmvfc_get_cmd_error(rsp->status, rsp->error),
+			rsp->status, rsp->error, status);
 		break;
 	};
 
@@ -2932,6 +2993,7 @@
 	struct ibmvfc_host *vhost = evt->vhost;
 	struct ibmvfc_port_login *rsp = &evt->xfer_iu->plogi;
 	u32 status = rsp->common.status;
+	int level = IBMVFC_DEFAULT_LOG_LEVEL;
 
 	vhost->discovery_threads--;
 	ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE);
@@ -2960,15 +3022,15 @@
 		break;
 	case IBMVFC_MAD_FAILED:
 	default:
-		tgt_err(tgt, "Port Login failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n",
+		if (ibmvfc_retry_cmd(rsp->status, rsp->error))
+			level += ibmvfc_retry_tgt_init(tgt, ibmvfc_tgt_send_plogi);
+		else
+			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
+
+		tgt_log(tgt, level, "Port Login failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n",
 			ibmvfc_get_cmd_error(rsp->status, rsp->error), rsp->status, rsp->error,
 			ibmvfc_get_fc_type(rsp->fc_type), rsp->fc_type,
 			ibmvfc_get_ls_explain(rsp->fc_explain), rsp->fc_explain, status);
-
-		if (ibmvfc_retry_cmd(rsp->status, rsp->error))
-			ibmvfc_retry_tgt_init(tgt, ibmvfc_tgt_send_plogi);
-		else
-			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
 		break;
 	};
 
@@ -3129,13 +3191,13 @@
 	case IBMVFC_MAD_SUCCESS:
 		tgt_dbg(tgt, "ADISC succeeded\n");
 		if (ibmvfc_adisc_needs_plogi(mad, tgt))
-			tgt->need_login = 1;
+			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
 		break;
 	case IBMVFC_MAD_DRIVER_FAILED:
 		break;
 	case IBMVFC_MAD_FAILED:
 	default:
-		tgt->need_login = 1;
+		ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
 		fc_reason = (mad->fc_iu.response[1] & 0x00ff0000) >> 16;
 		fc_explain = (mad->fc_iu.response[1] & 0x0000ff00) >> 8;
 		tgt_info(tgt, "ADISC failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n",
@@ -3322,6 +3384,7 @@
 	struct ibmvfc_host *vhost = evt->vhost;
 	struct ibmvfc_query_tgt *rsp = &evt->xfer_iu->query_tgt;
 	u32 status = rsp->common.status;
+	int level = IBMVFC_DEFAULT_LOG_LEVEL;
 
 	vhost->discovery_threads--;
 	ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE);
@@ -3341,19 +3404,19 @@
 		break;
 	case IBMVFC_MAD_FAILED:
 	default:
-		tgt_err(tgt, "Query Target failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n",
-			ibmvfc_get_cmd_error(rsp->status, rsp->error), rsp->status, rsp->error,
-			ibmvfc_get_fc_type(rsp->fc_type), rsp->fc_type,
-			ibmvfc_get_gs_explain(rsp->fc_explain), rsp->fc_explain, status);
-
 		if ((rsp->status & IBMVFC_FABRIC_MAPPED) == IBMVFC_FABRIC_MAPPED &&
 		    rsp->error == IBMVFC_UNABLE_TO_PERFORM_REQ &&
 		    rsp->fc_explain == IBMVFC_PORT_NAME_NOT_REG)
 			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
 		else if (ibmvfc_retry_cmd(rsp->status, rsp->error))
-			ibmvfc_retry_tgt_init(tgt, ibmvfc_tgt_query_target);
+			level += ibmvfc_retry_tgt_init(tgt, ibmvfc_tgt_query_target);
 		else
 			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
+
+		tgt_log(tgt, level, "Query Target failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n",
+			ibmvfc_get_cmd_error(rsp->status, rsp->error), rsp->status, rsp->error,
+			ibmvfc_get_fc_type(rsp->fc_type), rsp->fc_type,
+			ibmvfc_get_gs_explain(rsp->fc_explain), rsp->fc_explain, status);
 		break;
 	};
 
@@ -3420,7 +3483,7 @@
 	}
 	spin_unlock_irqrestore(vhost->host->host_lock, flags);
 
-	tgt = mempool_alloc(vhost->tgt_pool, GFP_KERNEL);
+	tgt = mempool_alloc(vhost->tgt_pool, GFP_NOIO);
 	if (!tgt) {
 		dev_err(vhost->dev, "Target allocation failure for scsi id %08llx\n",
 			scsi_id);
@@ -3472,6 +3535,7 @@
 	struct ibmvfc_host *vhost = evt->vhost;
 	struct ibmvfc_discover_targets *rsp = &evt->xfer_iu->discover_targets;
 	u32 mad_status = rsp->common.status;
+	int level = IBMVFC_DEFAULT_LOG_LEVEL;
 
 	switch (mad_status) {
 	case IBMVFC_MAD_SUCCESS:
@@ -3480,9 +3544,9 @@
 		ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_ALLOC_TGTS);
 		break;
 	case IBMVFC_MAD_FAILED:
-		dev_err(vhost->dev, "Discover Targets failed: %s (%x:%x)\n",
-			ibmvfc_get_cmd_error(rsp->status, rsp->error), rsp->status, rsp->error);
-		ibmvfc_retry_host_init(vhost);
+		level += ibmvfc_retry_host_init(vhost);
+		ibmvfc_log(vhost, level, "Discover Targets failed: %s (%x:%x)\n",
+			   ibmvfc_get_cmd_error(rsp->status, rsp->error), rsp->status, rsp->error);
 		break;
 	case IBMVFC_MAD_DRIVER_FAILED:
 		break;
@@ -3534,18 +3598,19 @@
 	u32 mad_status = evt->xfer_iu->npiv_login.common.status;
 	struct ibmvfc_npiv_login_resp *rsp = &vhost->login_buf->resp;
 	unsigned int npiv_max_sectors;
+	int level = IBMVFC_DEFAULT_LOG_LEVEL;
 
 	switch (mad_status) {
 	case IBMVFC_MAD_SUCCESS:
 		ibmvfc_free_event(evt);
 		break;
 	case IBMVFC_MAD_FAILED:
-		dev_err(vhost->dev, "NPIV Login failed: %s (%x:%x)\n",
-			ibmvfc_get_cmd_error(rsp->status, rsp->error), rsp->status, rsp->error);
 		if (ibmvfc_retry_cmd(rsp->status, rsp->error))
-			ibmvfc_retry_host_init(vhost);
+			level += ibmvfc_retry_host_init(vhost);
 		else
 			ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
+		ibmvfc_log(vhost, level, "NPIV Login failed: %s (%x:%x)\n",
+			   ibmvfc_get_cmd_error(rsp->status, rsp->error), rsp->status, rsp->error);
 		ibmvfc_free_event(evt);
 		return;
 	case IBMVFC_MAD_CRQ_ERROR:
@@ -3578,6 +3643,7 @@
 		return;
 	}
 
+	vhost->logged_in = 1;
 	npiv_max_sectors = min((uint)(rsp->max_dma_len >> 9), IBMVFC_MAX_SECTORS);
 	dev_info(vhost->dev, "Host partition: %s, device: %s %s %s max sectors %u\n",
 		 rsp->partition_name, rsp->device_name, rsp->port_loc_code,
@@ -3636,6 +3702,65 @@
 };
 
 /**
+ * ibmvfc_npiv_logout_done - Completion handler for NPIV Logout
+ * @vhost:		ibmvfc host struct
+ *
+ **/
+static void ibmvfc_npiv_logout_done(struct ibmvfc_event *evt)
+{
+	struct ibmvfc_host *vhost = evt->vhost;
+	u32 mad_status = evt->xfer_iu->npiv_logout.common.status;
+
+	ibmvfc_free_event(evt);
+
+	switch (mad_status) {
+	case IBMVFC_MAD_SUCCESS:
+		if (list_empty(&vhost->sent) &&
+		    vhost->action == IBMVFC_HOST_ACTION_LOGO_WAIT) {
+			ibmvfc_init_host(vhost, 0);
+			return;
+		}
+		break;
+	case IBMVFC_MAD_FAILED:
+	case IBMVFC_MAD_NOT_SUPPORTED:
+	case IBMVFC_MAD_CRQ_ERROR:
+	case IBMVFC_MAD_DRIVER_FAILED:
+	default:
+		ibmvfc_dbg(vhost, "NPIV Logout failed. 0x%X\n", mad_status);
+		break;
+	}
+
+	ibmvfc_hard_reset_host(vhost);
+}
+
+/**
+ * ibmvfc_npiv_logout - Issue an NPIV Logout
+ * @vhost:		ibmvfc host struct
+ *
+ **/
+static void ibmvfc_npiv_logout(struct ibmvfc_host *vhost)
+{
+	struct ibmvfc_npiv_logout_mad *mad;
+	struct ibmvfc_event *evt;
+
+	evt = ibmvfc_get_event(vhost);
+	ibmvfc_init_event(evt, ibmvfc_npiv_logout_done, IBMVFC_MAD_FORMAT);
+
+	mad = &evt->iu.npiv_logout;
+	memset(mad, 0, sizeof(*mad));
+	mad->common.version = 1;
+	mad->common.opcode = IBMVFC_NPIV_LOGOUT;
+	mad->common.length = sizeof(struct ibmvfc_npiv_logout_mad);
+
+	ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_LOGO_WAIT);
+
+	if (!ibmvfc_send_event(evt, vhost, default_timeout))
+		ibmvfc_dbg(vhost, "Sent NPIV logout\n");
+	else
+		ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
+}
+
+/**
  * ibmvfc_dev_init_to_do - Is there target initialization work to do?
  * @vhost:		ibmvfc host struct
  *
@@ -3671,6 +3796,7 @@
 	switch (vhost->action) {
 	case IBMVFC_HOST_ACTION_NONE:
 	case IBMVFC_HOST_ACTION_INIT_WAIT:
+	case IBMVFC_HOST_ACTION_LOGO_WAIT:
 		return 0;
 	case IBMVFC_HOST_ACTION_TGT_INIT:
 	case IBMVFC_HOST_ACTION_QUERY_TGTS:
@@ -3683,9 +3809,9 @@
 			if (tgt->action == IBMVFC_TGT_ACTION_INIT_WAIT)
 				return 0;
 		return 1;
+	case IBMVFC_HOST_ACTION_LOGO:
 	case IBMVFC_HOST_ACTION_INIT:
 	case IBMVFC_HOST_ACTION_ALLOC_TGTS:
-	case IBMVFC_HOST_ACTION_TGT_ADD:
 	case IBMVFC_HOST_ACTION_TGT_DEL:
 	case IBMVFC_HOST_ACTION_TGT_DEL_FAILED:
 	case IBMVFC_HOST_ACTION_QUERY:
@@ -3740,25 +3866,26 @@
 static void ibmvfc_tgt_add_rport(struct ibmvfc_target *tgt)
 {
 	struct ibmvfc_host *vhost = tgt->vhost;
-	struct fc_rport *rport = tgt->rport;
+	struct fc_rport *rport;
 	unsigned long flags;
 
-	if (rport) {
-		tgt_dbg(tgt, "Setting rport roles\n");
-		fc_remote_port_rolechg(rport, tgt->ids.roles);
-		spin_lock_irqsave(vhost->host->host_lock, flags);
-		ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE);
-		spin_unlock_irqrestore(vhost->host->host_lock, flags);
-		return;
-	}
-
 	tgt_dbg(tgt, "Adding rport\n");
 	rport = fc_remote_port_add(vhost->host, 0, &tgt->ids);
 	spin_lock_irqsave(vhost->host->host_lock, flags);
-	tgt->rport = rport;
-	ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE);
+
+	if (rport && tgt->action == IBMVFC_TGT_ACTION_DEL_RPORT) {
+		tgt_dbg(tgt, "Deleting rport\n");
+		list_del(&tgt->queue);
+		spin_unlock_irqrestore(vhost->host->host_lock, flags);
+		fc_remote_port_delete(rport);
+		del_timer_sync(&tgt->timer);
+		kref_put(&tgt->kref, ibmvfc_release_tgt);
+		return;
+	}
+
 	if (rport) {
 		tgt_dbg(tgt, "rport add succeeded\n");
+		tgt->rport = rport;
 		rport->maxframe_size = tgt->service_parms.common.bb_rcv_sz & 0x0fff;
 		rport->supported_classes = 0;
 		tgt->target_id = rport->scsi_target_id;
@@ -3789,8 +3916,12 @@
 	vhost->events_to_log = 0;
 	switch (vhost->action) {
 	case IBMVFC_HOST_ACTION_NONE:
+	case IBMVFC_HOST_ACTION_LOGO_WAIT:
 	case IBMVFC_HOST_ACTION_INIT_WAIT:
 		break;
+	case IBMVFC_HOST_ACTION_LOGO:
+		vhost->job_step(vhost);
+		break;
 	case IBMVFC_HOST_ACTION_INIT:
 		BUG_ON(vhost->state != IBMVFC_INITIALIZING);
 		if (vhost->delay_init) {
@@ -3836,11 +3967,21 @@
 
 		if (vhost->state == IBMVFC_INITIALIZING) {
 			if (vhost->action == IBMVFC_HOST_ACTION_TGT_DEL_FAILED) {
-				ibmvfc_set_host_state(vhost, IBMVFC_ACTIVE);
-				ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_TGT_ADD);
-				vhost->init_retries = 0;
-				spin_unlock_irqrestore(vhost->host->host_lock, flags);
-				scsi_unblock_requests(vhost->host);
+				if (vhost->reinit) {
+					vhost->reinit = 0;
+					scsi_block_requests(vhost->host);
+					ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_QUERY);
+					spin_unlock_irqrestore(vhost->host->host_lock, flags);
+				} else {
+					ibmvfc_set_host_state(vhost, IBMVFC_ACTIVE);
+					ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_NONE);
+					wake_up(&vhost->init_wait_q);
+					schedule_work(&vhost->rport_add_work_q);
+					vhost->init_retries = 0;
+					spin_unlock_irqrestore(vhost->host->host_lock, flags);
+					scsi_unblock_requests(vhost->host);
+				}
+
 				return;
 			} else {
 				ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT);
@@ -3871,24 +4012,6 @@
 		if (!ibmvfc_dev_init_to_do(vhost))
 			ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_TGT_DEL_FAILED);
 		break;
-	case IBMVFC_HOST_ACTION_TGT_ADD:
-		list_for_each_entry(tgt, &vhost->targets, queue) {
-			if (tgt->action == IBMVFC_TGT_ACTION_ADD_RPORT) {
-				spin_unlock_irqrestore(vhost->host->host_lock, flags);
-				ibmvfc_tgt_add_rport(tgt);
-				return;
-			}
-		}
-
-		if (vhost->reinit && !ibmvfc_set_host_state(vhost, IBMVFC_INITIALIZING)) {
-			vhost->reinit = 0;
-			scsi_block_requests(vhost->host);
-			ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_QUERY);
-		} else {
-			ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_NONE);
-			wake_up(&vhost->init_wait_q);
-		}
-		break;
 	default:
 		break;
 	};
@@ -4118,6 +4241,56 @@
 }
 
 /**
+ * ibmvfc_rport_add_thread - Worker thread for rport adds
+ * @work:	work struct
+ *
+ **/
+static void ibmvfc_rport_add_thread(struct work_struct *work)
+{
+	struct ibmvfc_host *vhost = container_of(work, struct ibmvfc_host,
+						 rport_add_work_q);
+	struct ibmvfc_target *tgt;
+	struct fc_rport *rport;
+	unsigned long flags;
+	int did_work;
+
+	ENTER;
+	spin_lock_irqsave(vhost->host->host_lock, flags);
+	do {
+		did_work = 0;
+		if (vhost->state != IBMVFC_ACTIVE)
+			break;
+
+		list_for_each_entry(tgt, &vhost->targets, queue) {
+			if (tgt->add_rport) {
+				did_work = 1;
+				tgt->add_rport = 0;
+				kref_get(&tgt->kref);
+				rport = tgt->rport;
+				if (!rport) {
+					spin_unlock_irqrestore(vhost->host->host_lock, flags);
+					ibmvfc_tgt_add_rport(tgt);
+				} else if (get_device(&rport->dev)) {
+					spin_unlock_irqrestore(vhost->host->host_lock, flags);
+					tgt_dbg(tgt, "Setting rport roles\n");
+					fc_remote_port_rolechg(rport, tgt->ids.roles);
+					put_device(&rport->dev);
+				}
+
+				kref_put(&tgt->kref, ibmvfc_release_tgt);
+				spin_lock_irqsave(vhost->host->host_lock, flags);
+				break;
+			}
+		}
+	} while(did_work);
+
+	if (vhost->state == IBMVFC_ACTIVE)
+		vhost->scan_complete = 1;
+	spin_unlock_irqrestore(vhost->host->host_lock, flags);
+	LEAVE;
+}
+
+/**
  * ibmvfc_probe - Adapter hot plug add entry point
  * @vdev:	vio device struct
  * @id:	vio device id struct
@@ -4160,6 +4333,7 @@
 	strcpy(vhost->partition_name, "UNKNOWN");
 	init_waitqueue_head(&vhost->work_wait_q);
 	init_waitqueue_head(&vhost->init_wait_q);
+	INIT_WORK(&vhost->rport_add_work_q, ibmvfc_rport_add_thread);
 
 	if ((rc = ibmvfc_alloc_mem(vhost)))
 		goto free_scsi_host;
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index ca1dcf7..c2668d7 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -29,8 +29,8 @@
 #include "viosrp.h"
 
 #define IBMVFC_NAME	"ibmvfc"
-#define IBMVFC_DRIVER_VERSION		"1.0.5"
-#define IBMVFC_DRIVER_DATE		"(March 19, 2009)"
+#define IBMVFC_DRIVER_VERSION		"1.0.6"
+#define IBMVFC_DRIVER_DATE		"(May 28, 2009)"
 
 #define IBMVFC_DEFAULT_TIMEOUT	60
 #define IBMVFC_ADISC_CANCEL_TIMEOUT	45
@@ -57,9 +57,10 @@
  * Ensure we have resources for ERP and initialization:
  * 1 for ERP
  * 1 for initialization
+ * 1 for NPIV Logout
  * 2 for each discovery thread
  */
-#define IBMVFC_NUM_INTERNAL_REQ	(1 + 1 + (disc_threads * 2))
+#define IBMVFC_NUM_INTERNAL_REQ	(1 + 1 + 1 + (disc_threads * 2))
 
 #define IBMVFC_MAD_SUCCESS		0x00
 #define IBMVFC_MAD_NOT_SUPPORTED	0xF1
@@ -127,6 +128,7 @@
 	IBMVFC_IMPLICIT_LOGOUT	= 0x0040,
 	IBMVFC_PASSTHRU		= 0x0200,
 	IBMVFC_TMF_MAD		= 0x0100,
+	IBMVFC_NPIV_LOGOUT	= 0x0800,
 };
 
 struct ibmvfc_mad_common {
@@ -143,6 +145,10 @@
 	struct srp_direct_buf buffer;
 }__attribute__((packed, aligned (8)));
 
+struct ibmvfc_npiv_logout_mad {
+	struct ibmvfc_mad_common common;
+}__attribute__((packed, aligned (8)));
+
 #define IBMVFC_MAX_NAME 256
 
 struct ibmvfc_npiv_login {
@@ -201,7 +207,8 @@
 #define IBMVFC_NATIVE_FC		0x01
 #define IBMVFC_CAN_FLUSH_ON_HALT	0x08
 	u32 reserved;
-	u64 capabilites;
+	u64 capabilities;
+#define IBMVFC_CAN_FLUSH_ON_HALT	0x08
 	u32 max_cmds;
 	u32 scsi_id_sz;
 	u64 max_dma_len;
@@ -541,9 +548,17 @@
 	dma_addr_t msg_token;
 };
 
+enum ibmvfc_ae_link_state {
+	IBMVFC_AE_LS_LINK_UP		= 0x01,
+	IBMVFC_AE_LS_LINK_BOUNCED	= 0x02,
+	IBMVFC_AE_LS_LINK_DOWN		= 0x04,
+	IBMVFC_AE_LS_LINK_DEAD		= 0x08,
+};
+
 struct ibmvfc_async_crq {
 	volatile u8 valid;
-	u8 pad[3];
+	u8 link_state;
+	u8 pad[2];
 	u32 pad2;
 	volatile u64 event;
 	volatile u64 scsi_id;
@@ -561,6 +576,7 @@
 union ibmvfc_iu {
 	struct ibmvfc_mad_common mad_common;
 	struct ibmvfc_npiv_login_mad npiv_login;
+	struct ibmvfc_npiv_logout_mad npiv_logout;
 	struct ibmvfc_discover_targets discover_targets;
 	struct ibmvfc_port_login plogi;
 	struct ibmvfc_process_login prli;
@@ -575,7 +591,6 @@
 	IBMVFC_TGT_ACTION_NONE = 0,
 	IBMVFC_TGT_ACTION_INIT,
 	IBMVFC_TGT_ACTION_INIT_WAIT,
-	IBMVFC_TGT_ACTION_ADD_RPORT,
 	IBMVFC_TGT_ACTION_DEL_RPORT,
 };
 
@@ -588,6 +603,7 @@
 	int target_id;
 	enum ibmvfc_target_action action;
 	int need_login;
+	int add_rport;
 	int init_retries;
 	u32 cancel_key;
 	struct ibmvfc_service_parms service_parms;
@@ -627,6 +643,8 @@
 
 enum ibmvfc_host_action {
 	IBMVFC_HOST_ACTION_NONE = 0,
+	IBMVFC_HOST_ACTION_LOGO,
+	IBMVFC_HOST_ACTION_LOGO_WAIT,
 	IBMVFC_HOST_ACTION_INIT,
 	IBMVFC_HOST_ACTION_INIT_WAIT,
 	IBMVFC_HOST_ACTION_QUERY,
@@ -635,7 +653,6 @@
 	IBMVFC_HOST_ACTION_ALLOC_TGTS,
 	IBMVFC_HOST_ACTION_TGT_INIT,
 	IBMVFC_HOST_ACTION_TGT_DEL_FAILED,
-	IBMVFC_HOST_ACTION_TGT_ADD,
 };
 
 enum ibmvfc_host_state {
@@ -682,6 +699,8 @@
 	int client_migrated;
 	int reinit;
 	int delay_init;
+	int scan_complete;
+	int logged_in;
 	int events_to_log;
 #define IBMVFC_AE_LINKUP	0x0001
 #define IBMVFC_AE_LINKDOWN	0x0002
@@ -692,6 +711,7 @@
 	void (*job_step) (struct ibmvfc_host *);
 	struct task_struct *work_thread;
 	struct tasklet_struct tasklet;
+	struct work_struct rport_add_work_q;
 	wait_queue_head_t init_wait_q;
 	wait_queue_head_t work_wait_q;
 };
@@ -707,6 +727,12 @@
 #define tgt_err(t, fmt, ...)		\
 	dev_err((t)->vhost->dev, "%llX: " fmt, (t)->scsi_id, ##__VA_ARGS__)
 
+#define tgt_log(t, level, fmt, ...) \
+	do { \
+		if ((t)->vhost->log_level >= level) \
+			tgt_err(t, fmt, ##__VA_ARGS__); \
+	} while (0)
+
 #define ibmvfc_dbg(vhost, ...) \
 	DBG_CMD(dev_info((vhost)->dev, ##__VA_ARGS__))
 
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index c9aa761..11d2602 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -70,6 +70,7 @@
 #include <linux/moduleparam.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
+#include <linux/of.h>
 #include <asm/firmware.h>
 #include <asm/vio.h>
 #include <asm/firmware.h>
@@ -87,9 +88,15 @@
  */
 static int max_id = 64;
 static int max_channel = 3;
-static int init_timeout = 5;
+static int init_timeout = 300;
+static int login_timeout = 60;
+static int info_timeout = 30;
+static int abort_timeout = 60;
+static int reset_timeout = 60;
 static int max_requests = IBMVSCSI_MAX_REQUESTS_DEFAULT;
 static int max_events = IBMVSCSI_MAX_REQUESTS_DEFAULT + 2;
+static int fast_fail = 1;
+static int client_reserve = 1;
 
 static struct scsi_transport_template *ibmvscsi_transport_template;
 
@@ -110,6 +117,10 @@
 MODULE_PARM_DESC(init_timeout, "Initialization timeout in seconds");
 module_param_named(max_requests, max_requests, int, S_IRUGO);
 MODULE_PARM_DESC(max_requests, "Maximum requests for this adapter");
+module_param_named(fast_fail, fast_fail, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(fast_fail, "Enable fast fail. [Default=1]");
+module_param_named(client_reserve, client_reserve, int, S_IRUGO );
+MODULE_PARM_DESC(client_reserve, "Attempt client managed reserve/release");
 
 /* ------------------------------------------------------------
  * Routines for the event pool and event structs
@@ -781,105 +792,53 @@
 /* ------------------------------------------------------------
  * Routines for driver initialization
  */
-/**
- * adapter_info_rsp: - Handle response to MAD adapter info request
- * @evt_struct:	srp_event_struct with the response
- *
- * Used as a "done" callback by when sending adapter_info. Gets called
- * by ibmvscsi_handle_crq()
-*/
-static void adapter_info_rsp(struct srp_event_struct *evt_struct)
-{
-	struct ibmvscsi_host_data *hostdata = evt_struct->hostdata;
-	dma_unmap_single(hostdata->dev,
-			 evt_struct->iu.mad.adapter_info.buffer,
-			 evt_struct->iu.mad.adapter_info.common.length,
-			 DMA_BIDIRECTIONAL);
 
-	if (evt_struct->xfer_iu->mad.adapter_info.common.status) {
-		dev_err(hostdata->dev, "error %d getting adapter info\n",
-			evt_struct->xfer_iu->mad.adapter_info.common.status);
-	} else {
-		dev_info(hostdata->dev, "host srp version: %s, "
-			 "host partition %s (%d), OS %d, max io %u\n",
-			 hostdata->madapter_info.srp_version,
-			 hostdata->madapter_info.partition_name,
-			 hostdata->madapter_info.partition_number,
-			 hostdata->madapter_info.os_type,
-			 hostdata->madapter_info.port_max_txu[0]);
-		
-		if (hostdata->madapter_info.port_max_txu[0]) 
-			hostdata->host->max_sectors = 
-				hostdata->madapter_info.port_max_txu[0] >> 9;
-		
-		if (hostdata->madapter_info.os_type == 3 &&
-		    strcmp(hostdata->madapter_info.srp_version, "1.6a") <= 0) {
-			dev_err(hostdata->dev, "host (Ver. %s) doesn't support large transfers\n",
-				hostdata->madapter_info.srp_version);
-			dev_err(hostdata->dev, "limiting scatterlists to %d\n",
-				MAX_INDIRECT_BUFS);
-			hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS;
-		}
+/**
+ * map_persist_bufs: - Pre-map persistent data for adapter logins
+ * @hostdata:   ibmvscsi_host_data of host
+ *
+ * Map the capabilities and adapter info DMA buffers to avoid runtime failures.
+ * Return 1 on error, 0 on success.
+ */
+static int map_persist_bufs(struct ibmvscsi_host_data *hostdata)
+{
+
+	hostdata->caps_addr = dma_map_single(hostdata->dev, &hostdata->caps,
+					     sizeof(hostdata->caps), DMA_BIDIRECTIONAL);
+
+	if (dma_mapping_error(hostdata->dev, hostdata->caps_addr)) {
+		dev_err(hostdata->dev, "Unable to map capabilities buffer!\n");
+		return 1;
 	}
+
+	hostdata->adapter_info_addr = dma_map_single(hostdata->dev,
+						     &hostdata->madapter_info,
+						     sizeof(hostdata->madapter_info),
+						     DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(hostdata->dev, hostdata->adapter_info_addr)) {
+		dev_err(hostdata->dev, "Unable to map adapter info buffer!\n");
+		dma_unmap_single(hostdata->dev, hostdata->caps_addr,
+				 sizeof(hostdata->caps), DMA_BIDIRECTIONAL);
+		return 1;
+	}
+
+	return 0;
 }
 
 /**
- * send_mad_adapter_info: - Sends the mad adapter info request
- *      and stores the result so it can be retrieved with
- *      sysfs.  We COULD consider causing a failure if the
- *      returned SRP version doesn't match ours.
- * @hostdata:	ibmvscsi_host_data of host
- * 
- * Returns zero if successful.
-*/
-static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
+ * unmap_persist_bufs: - Unmap persistent data needed for adapter logins
+ * @hostdata:   ibmvscsi_host_data of host
+ *
+ * Unmap the capabilities and adapter info DMA buffers
+ */
+static void unmap_persist_bufs(struct ibmvscsi_host_data *hostdata)
 {
-	struct viosrp_adapter_info *req;
-	struct srp_event_struct *evt_struct;
-	unsigned long flags;
-	dma_addr_t addr;
+	dma_unmap_single(hostdata->dev, hostdata->caps_addr,
+			 sizeof(hostdata->caps), DMA_BIDIRECTIONAL);
 
-	evt_struct = get_event_struct(&hostdata->pool);
-	if (!evt_struct) {
-		dev_err(hostdata->dev,
-			"couldn't allocate an event for ADAPTER_INFO_REQ!\n");
-		return;
-	}
-
-	init_event_struct(evt_struct,
-			  adapter_info_rsp,
-			  VIOSRP_MAD_FORMAT,
-			  init_timeout);
-	
-	req = &evt_struct->iu.mad.adapter_info;
-	memset(req, 0x00, sizeof(*req));
-	
-	req->common.type = VIOSRP_ADAPTER_INFO_TYPE;
-	req->common.length = sizeof(hostdata->madapter_info);
-	req->buffer = addr = dma_map_single(hostdata->dev,
-					    &hostdata->madapter_info,
-					    sizeof(hostdata->madapter_info),
-					    DMA_BIDIRECTIONAL);
-
-	if (dma_mapping_error(hostdata->dev, req->buffer)) {
-		if (!firmware_has_feature(FW_FEATURE_CMO))
-			dev_err(hostdata->dev,
-			        "Unable to map request_buffer for "
-			        "adapter_info!\n");
-		free_event_struct(&hostdata->pool, evt_struct);
-		return;
-	}
-	
-	spin_lock_irqsave(hostdata->host->host_lock, flags);
-	if (ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2)) {
-		dev_err(hostdata->dev, "couldn't send ADAPTER_INFO_REQ!\n");
-		dma_unmap_single(hostdata->dev,
-				 addr,
-				 sizeof(hostdata->madapter_info),
-				 DMA_BIDIRECTIONAL);
-	}
-	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
-};
+	dma_unmap_single(hostdata->dev, hostdata->adapter_info_addr,
+			 sizeof(hostdata->madapter_info), DMA_BIDIRECTIONAL);
+}
 
 /**
  * login_rsp: - Handle response to SRP login request
@@ -909,9 +868,7 @@
 	}
 
 	dev_info(hostdata->dev, "SRP_LOGIN succeeded\n");
-
-	if (evt_struct->xfer_iu->srp.login_rsp.req_lim_delta < 0)
-		dev_err(hostdata->dev, "Invalid request_limit.\n");
+	hostdata->client_migrated = 0;
 
 	/* Now we know what the real request-limit is.
 	 * This value is set rather than added to request_limit because
@@ -922,15 +879,12 @@
 
 	/* If we had any pending I/Os, kick them */
 	scsi_unblock_requests(hostdata->host);
-
-	send_mad_adapter_info(hostdata);
-	return;
 }
 
 /**
  * send_srp_login: - Sends the srp login
  * @hostdata:	ibmvscsi_host_data of host
- * 
+ *
  * Returns zero if successful.
 */
 static int send_srp_login(struct ibmvscsi_host_data *hostdata)
@@ -939,22 +893,17 @@
 	unsigned long flags;
 	struct srp_login_req *login;
 	struct srp_event_struct *evt_struct = get_event_struct(&hostdata->pool);
-	if (!evt_struct) {
-		dev_err(hostdata->dev, "couldn't allocate an event for login req!\n");
-		return FAILED;
-	}
 
-	init_event_struct(evt_struct,
-			  login_rsp,
-			  VIOSRP_SRP_FORMAT,
-			  init_timeout);
+	BUG_ON(!evt_struct);
+	init_event_struct(evt_struct, login_rsp,
+			  VIOSRP_SRP_FORMAT, login_timeout);
 
 	login = &evt_struct->iu.srp.login_req;
-	memset(login, 0x00, sizeof(struct srp_login_req));
+	memset(login, 0, sizeof(*login));
 	login->opcode = SRP_LOGIN_REQ;
 	login->req_it_iu_len = sizeof(union srp_iu);
 	login->req_buf_fmt = SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT;
-	
+
 	spin_lock_irqsave(hostdata->host->host_lock, flags);
 	/* Start out with a request limit of 0, since this is negotiated in
 	 * the login request we are just sending and login requests always
@@ -962,13 +911,241 @@
 	 */
 	atomic_set(&hostdata->request_limit, 0);
 
-	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2);
+	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, login_timeout * 2);
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 	dev_info(hostdata->dev, "sent SRP login\n");
 	return rc;
 };
 
 /**
+ * capabilities_rsp: - Handle response to MAD adapter capabilities request
+ * @evt_struct:	srp_event_struct with the response
+ *
+ * Used as a "done" callback by when sending adapter_info.
+ */
+static void capabilities_rsp(struct srp_event_struct *evt_struct)
+{
+	struct ibmvscsi_host_data *hostdata = evt_struct->hostdata;
+
+	if (evt_struct->xfer_iu->mad.capabilities.common.status) {
+		dev_err(hostdata->dev, "error 0x%X getting capabilities info\n",
+			evt_struct->xfer_iu->mad.capabilities.common.status);
+	} else {
+		if (hostdata->caps.migration.common.server_support != SERVER_SUPPORTS_CAP)
+			dev_info(hostdata->dev, "Partition migration not supported\n");
+
+		if (client_reserve) {
+			if (hostdata->caps.reserve.common.server_support ==
+			    SERVER_SUPPORTS_CAP)
+				dev_info(hostdata->dev, "Client reserve enabled\n");
+			else
+				dev_info(hostdata->dev, "Client reserve not supported\n");
+		}
+	}
+
+	send_srp_login(hostdata);
+}
+
+/**
+ * send_mad_capabilities: - Sends the mad capabilities request
+ *      and stores the result so it can be retrieved with
+ * @hostdata:	ibmvscsi_host_data of host
+ */
+static void send_mad_capabilities(struct ibmvscsi_host_data *hostdata)
+{
+	struct viosrp_capabilities *req;
+	struct srp_event_struct *evt_struct;
+	unsigned long flags;
+	struct device_node *of_node = hostdata->dev->archdata.of_node;
+	const char *location;
+
+	evt_struct = get_event_struct(&hostdata->pool);
+	BUG_ON(!evt_struct);
+
+	init_event_struct(evt_struct, capabilities_rsp,
+			  VIOSRP_MAD_FORMAT, info_timeout);
+
+	req = &evt_struct->iu.mad.capabilities;
+	memset(req, 0, sizeof(*req));
+
+	hostdata->caps.flags = CAP_LIST_SUPPORTED;
+	if (hostdata->client_migrated)
+		hostdata->caps.flags |= CLIENT_MIGRATED;
+
+	strncpy(hostdata->caps.name, dev_name(&hostdata->host->shost_gendev),
+		sizeof(hostdata->caps.name));
+	hostdata->caps.name[sizeof(hostdata->caps.name) - 1] = '\0';
+
+	location = of_get_property(of_node, "ibm,loc-code", NULL);
+	location = location ? location : dev_name(hostdata->dev);
+	strncpy(hostdata->caps.loc, location, sizeof(hostdata->caps.loc));
+	hostdata->caps.loc[sizeof(hostdata->caps.loc) - 1] = '\0';
+
+	req->common.type = VIOSRP_CAPABILITIES_TYPE;
+	req->buffer = hostdata->caps_addr;
+
+	hostdata->caps.migration.common.cap_type = MIGRATION_CAPABILITIES;
+	hostdata->caps.migration.common.length = sizeof(hostdata->caps.migration);
+	hostdata->caps.migration.common.server_support = SERVER_SUPPORTS_CAP;
+	hostdata->caps.migration.ecl = 1;
+
+	if (client_reserve) {
+		hostdata->caps.reserve.common.cap_type = RESERVATION_CAPABILITIES;
+		hostdata->caps.reserve.common.length = sizeof(hostdata->caps.reserve);
+		hostdata->caps.reserve.common.server_support = SERVER_SUPPORTS_CAP;
+		hostdata->caps.reserve.type = CLIENT_RESERVE_SCSI_2;
+		req->common.length = sizeof(hostdata->caps);
+	} else
+		req->common.length = sizeof(hostdata->caps) - sizeof(hostdata->caps.reserve);
+
+	spin_lock_irqsave(hostdata->host->host_lock, flags);
+	if (ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2))
+		dev_err(hostdata->dev, "couldn't send CAPABILITIES_REQ!\n");
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+};
+
+/**
+ * fast_fail_rsp: - Handle response to MAD enable fast fail
+ * @evt_struct:	srp_event_struct with the response
+ *
+ * Used as a "done" callback by when sending enable fast fail. Gets called
+ * by ibmvscsi_handle_crq()
+ */
+static void fast_fail_rsp(struct srp_event_struct *evt_struct)
+{
+	struct ibmvscsi_host_data *hostdata = evt_struct->hostdata;
+	u8 status = evt_struct->xfer_iu->mad.fast_fail.common.status;
+
+	if (status == VIOSRP_MAD_NOT_SUPPORTED)
+		dev_err(hostdata->dev, "fast_fail not supported in server\n");
+	else if (status == VIOSRP_MAD_FAILED)
+		dev_err(hostdata->dev, "fast_fail request failed\n");
+	else if (status != VIOSRP_MAD_SUCCESS)
+		dev_err(hostdata->dev, "error 0x%X enabling fast_fail\n", status);
+
+	send_mad_capabilities(hostdata);
+}
+
+/**
+ * init_host - Start host initialization
+ * @hostdata:	ibmvscsi_host_data of host
+ *
+ * Returns zero if successful.
+ */
+static int enable_fast_fail(struct ibmvscsi_host_data *hostdata)
+{
+	int rc;
+	unsigned long flags;
+	struct viosrp_fast_fail *fast_fail_mad;
+	struct srp_event_struct *evt_struct;
+
+	if (!fast_fail) {
+		send_mad_capabilities(hostdata);
+		return 0;
+	}
+
+	evt_struct = get_event_struct(&hostdata->pool);
+	BUG_ON(!evt_struct);
+
+	init_event_struct(evt_struct, fast_fail_rsp, VIOSRP_MAD_FORMAT, info_timeout);
+
+	fast_fail_mad = &evt_struct->iu.mad.fast_fail;
+	memset(fast_fail_mad, 0, sizeof(*fast_fail_mad));
+	fast_fail_mad->common.type = VIOSRP_ENABLE_FAST_FAIL;
+	fast_fail_mad->common.length = sizeof(*fast_fail_mad);
+
+	spin_lock_irqsave(hostdata->host->host_lock, flags);
+	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2);
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+	return rc;
+}
+
+/**
+ * adapter_info_rsp: - Handle response to MAD adapter info request
+ * @evt_struct:	srp_event_struct with the response
+ *
+ * Used as a "done" callback by when sending adapter_info. Gets called
+ * by ibmvscsi_handle_crq()
+*/
+static void adapter_info_rsp(struct srp_event_struct *evt_struct)
+{
+	struct ibmvscsi_host_data *hostdata = evt_struct->hostdata;
+
+	if (evt_struct->xfer_iu->mad.adapter_info.common.status) {
+		dev_err(hostdata->dev, "error %d getting adapter info\n",
+			evt_struct->xfer_iu->mad.adapter_info.common.status);
+	} else {
+		dev_info(hostdata->dev, "host srp version: %s, "
+			 "host partition %s (%d), OS %d, max io %u\n",
+			 hostdata->madapter_info.srp_version,
+			 hostdata->madapter_info.partition_name,
+			 hostdata->madapter_info.partition_number,
+			 hostdata->madapter_info.os_type,
+			 hostdata->madapter_info.port_max_txu[0]);
+		
+		if (hostdata->madapter_info.port_max_txu[0]) 
+			hostdata->host->max_sectors = 
+				hostdata->madapter_info.port_max_txu[0] >> 9;
+		
+		if (hostdata->madapter_info.os_type == 3 &&
+		    strcmp(hostdata->madapter_info.srp_version, "1.6a") <= 0) {
+			dev_err(hostdata->dev, "host (Ver. %s) doesn't support large transfers\n",
+				hostdata->madapter_info.srp_version);
+			dev_err(hostdata->dev, "limiting scatterlists to %d\n",
+				MAX_INDIRECT_BUFS);
+			hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS;
+		}
+	}
+
+	enable_fast_fail(hostdata);
+}
+
+/**
+ * send_mad_adapter_info: - Sends the mad adapter info request
+ *      and stores the result so it can be retrieved with
+ *      sysfs.  We COULD consider causing a failure if the
+ *      returned SRP version doesn't match ours.
+ * @hostdata:	ibmvscsi_host_data of host
+ * 
+ * Returns zero if successful.
+*/
+static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
+{
+	struct viosrp_adapter_info *req;
+	struct srp_event_struct *evt_struct;
+	unsigned long flags;
+
+	evt_struct = get_event_struct(&hostdata->pool);
+	BUG_ON(!evt_struct);
+
+	init_event_struct(evt_struct,
+			  adapter_info_rsp,
+			  VIOSRP_MAD_FORMAT,
+			  info_timeout);
+	
+	req = &evt_struct->iu.mad.adapter_info;
+	memset(req, 0x00, sizeof(*req));
+	
+	req->common.type = VIOSRP_ADAPTER_INFO_TYPE;
+	req->common.length = sizeof(hostdata->madapter_info);
+	req->buffer = hostdata->adapter_info_addr;
+
+	spin_lock_irqsave(hostdata->host->host_lock, flags);
+	if (ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2))
+		dev_err(hostdata->dev, "couldn't send ADAPTER_INFO_REQ!\n");
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+};
+
+/**
+ * init_adapter: Start virtual adapter initialization sequence
+ *
+ */
+static void init_adapter(struct ibmvscsi_host_data *hostdata)
+{
+	send_mad_adapter_info(hostdata);
+}
+
+/**
  * sync_completion: Signal that a synchronous command has completed
  * Note that after returning from this call, the evt_struct is freed.
  * the caller waiting on this completion shouldn't touch the evt_struct
@@ -1029,7 +1206,7 @@
 		init_event_struct(evt,
 				  sync_completion,
 				  VIOSRP_SRP_FORMAT,
-				  init_timeout);
+				  abort_timeout);
 
 		tsk_mgmt = &evt->iu.srp.tsk_mgmt;
 	
@@ -1043,7 +1220,7 @@
 		evt->sync_srp = &srp_rsp;
 
 		init_completion(&evt->comp);
-		rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
+		rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, abort_timeout * 2);
 
 		if (rsp_rc != SCSI_MLQUEUE_HOST_BUSY)
 			break;
@@ -1152,7 +1329,7 @@
 		init_event_struct(evt,
 				  sync_completion,
 				  VIOSRP_SRP_FORMAT,
-				  init_timeout);
+				  reset_timeout);
 
 		tsk_mgmt = &evt->iu.srp.tsk_mgmt;
 
@@ -1165,7 +1342,7 @@
 		evt->sync_srp = &srp_rsp;
 
 		init_completion(&evt->comp);
-		rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
+		rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, reset_timeout * 2);
 
 		if (rsp_rc != SCSI_MLQUEUE_HOST_BUSY)
 			break;
@@ -1281,7 +1458,7 @@
 			if ((rc = ibmvscsi_ops->send_crq(hostdata,
 							 0xC002000000000000LL, 0)) == 0) {
 				/* Now login */
-				send_srp_login(hostdata);
+				init_adapter(hostdata);
 			} else {
 				dev_err(hostdata->dev, "Unable to send init rsp. rc=%ld\n", rc);
 			}
@@ -1291,7 +1468,7 @@
 			dev_info(hostdata->dev, "partner initialization complete\n");
 
 			/* Now login */
-			send_srp_login(hostdata);
+			init_adapter(hostdata);
 			break;
 		default:
 			dev_err(hostdata->dev, "unknown crq message type: %d\n", crq->format);
@@ -1303,6 +1480,7 @@
 		if (crq->format == 0x06) {
 			/* We need to re-setup the interpartition connection */
 			dev_info(hostdata->dev, "Re-enabling adapter!\n");
+			hostdata->client_migrated = 1;
 			purge_requests(hostdata, DID_REQUEUE);
 			if ((ibmvscsi_ops->reenable_crq_queue(&hostdata->queue,
 							      hostdata)) ||
@@ -1397,7 +1575,7 @@
 	init_event_struct(evt_struct,
 			  sync_completion,
 			  VIOSRP_MAD_FORMAT,
-			  init_timeout);
+			  info_timeout);
 
 	host_config = &evt_struct->iu.mad.host_config;
 
@@ -1419,7 +1597,7 @@
 
 	init_completion(&evt_struct->comp);
 	spin_lock_irqsave(hostdata->host->host_lock, flags);
-	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2);
+	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2);
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 	if (rc == 0)
 		wait_for_completion(&evt_struct->comp);
@@ -1444,7 +1622,7 @@
 	spin_lock_irqsave(shost->host_lock, lock_flags);
 	if (sdev->type == TYPE_DISK) {
 		sdev->allow_restart = 1;
-		blk_queue_rq_timeout(sdev->request_queue, 60 * HZ);
+		blk_queue_rq_timeout(sdev->request_queue, 120 * HZ);
 	}
 	scsi_adjust_queue_depth(sdev, 0, shost->cmd_per_lun);
 	spin_unlock_irqrestore(shost->host_lock, lock_flags);
@@ -1471,6 +1649,46 @@
 /* ------------------------------------------------------------
  * sysfs attributes
  */
+static ssize_t show_host_vhost_loc(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct ibmvscsi_host_data *hostdata = shost_priv(shost);
+	int len;
+
+	len = snprintf(buf, sizeof(hostdata->caps.loc), "%s\n",
+		       hostdata->caps.loc);
+	return len;
+}
+
+static struct device_attribute ibmvscsi_host_vhost_loc = {
+	.attr = {
+		 .name = "vhost_loc",
+		 .mode = S_IRUGO,
+		 },
+	.show = show_host_vhost_loc,
+};
+
+static ssize_t show_host_vhost_name(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct ibmvscsi_host_data *hostdata = shost_priv(shost);
+	int len;
+
+	len = snprintf(buf, sizeof(hostdata->caps.name), "%s\n",
+		       hostdata->caps.name);
+	return len;
+}
+
+static struct device_attribute ibmvscsi_host_vhost_name = {
+	.attr = {
+		 .name = "vhost_name",
+		 .mode = S_IRUGO,
+		 },
+	.show = show_host_vhost_name,
+};
+
 static ssize_t show_host_srp_version(struct device *dev,
 				     struct device_attribute *attr, char *buf)
 {
@@ -1594,6 +1812,8 @@
 };
 
 static struct device_attribute *ibmvscsi_attrs[] = {
+	&ibmvscsi_host_vhost_loc,
+	&ibmvscsi_host_vhost_name,
 	&ibmvscsi_host_srp_version,
 	&ibmvscsi_host_partition_name,
 	&ibmvscsi_host_partition_number,
@@ -1674,6 +1894,11 @@
 	atomic_set(&hostdata->request_limit, -1);
 	hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT;
 
+	if (map_persist_bufs(hostdata)) {
+		dev_err(&vdev->dev, "couldn't map persistent buffers\n");
+		goto persist_bufs_failed;
+	}
+
 	rc = ibmvscsi_ops->init_crq_queue(&hostdata->queue, hostdata, max_events);
 	if (rc != 0 && rc != H_RESOURCE) {
 		dev_err(&vdev->dev, "couldn't initialize crq. rc=%d\n", rc);
@@ -1687,6 +1912,7 @@
 	host->max_lun = 8;
 	host->max_id = max_id;
 	host->max_channel = max_channel;
+	host->max_cmd_len = 16;
 
 	if (scsi_add_host(hostdata->host, hostdata->dev))
 		goto add_host_failed;
@@ -1733,6 +1959,8 @@
       init_pool_failed:
 	ibmvscsi_ops->release_crq_queue(&hostdata->queue, hostdata, max_events);
       init_crq_failed:
+	unmap_persist_bufs(hostdata);
+      persist_bufs_failed:
 	scsi_host_put(host);
       scsi_host_alloc_failed:
 	return -1;
@@ -1741,6 +1969,7 @@
 static int ibmvscsi_remove(struct vio_dev *vdev)
 {
 	struct ibmvscsi_host_data *hostdata = vdev->dev.driver_data;
+	unmap_persist_bufs(hostdata);
 	release_event_pool(&hostdata->pool, hostdata);
 	ibmvscsi_ops->release_crq_queue(&hostdata->queue, hostdata,
 					max_events);
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.h b/drivers/scsi/ibmvscsi/ibmvscsi.h
index 2d4339d..7642530 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.h
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.h
@@ -90,6 +90,7 @@
 /* all driver data associated with a host adapter */
 struct ibmvscsi_host_data {
 	atomic_t request_limit;
+	int client_migrated;
 	struct device *dev;
 	struct event_pool pool;
 	struct crq_queue queue;
@@ -97,6 +98,9 @@
 	struct list_head sent;
 	struct Scsi_Host *host;
 	struct mad_adapter_info_data madapter_info;
+	struct capabilities caps;
+	dma_addr_t caps_addr;
+	dma_addr_t adapter_info_addr;
 };
 
 /* routines for managing a command/response queue */
diff --git a/drivers/scsi/ibmvscsi/viosrp.h b/drivers/scsi/ibmvscsi/viosrp.h
index 2046045..2cd735d 100644
--- a/drivers/scsi/ibmvscsi/viosrp.h
+++ b/drivers/scsi/ibmvscsi/viosrp.h
@@ -37,6 +37,7 @@
 
 #define SRP_VERSION "16.a"
 #define SRP_MAX_IU_LEN	256
+#define SRP_MAX_LOC_LEN 32
 
 union srp_iu {
 	struct srp_login_req login_req;
@@ -86,7 +87,37 @@
 	VIOSRP_EMPTY_IU_TYPE = 0x01,
 	VIOSRP_ERROR_LOG_TYPE = 0x02,
 	VIOSRP_ADAPTER_INFO_TYPE = 0x03,
-	VIOSRP_HOST_CONFIG_TYPE = 0x04
+	VIOSRP_HOST_CONFIG_TYPE = 0x04,
+	VIOSRP_CAPABILITIES_TYPE = 0x05,
+	VIOSRP_ENABLE_FAST_FAIL = 0x08,
+};
+
+enum viosrp_mad_status {
+	VIOSRP_MAD_SUCCESS = 0x00,
+	VIOSRP_MAD_NOT_SUPPORTED = 0xF1,
+	VIOSRP_MAD_FAILED = 0xF7,
+};
+
+enum viosrp_capability_type {
+	MIGRATION_CAPABILITIES = 0x01,
+	RESERVATION_CAPABILITIES = 0x02,
+};
+
+enum viosrp_capability_support {
+	SERVER_DOES_NOT_SUPPORTS_CAP = 0x0,
+	SERVER_SUPPORTS_CAP = 0x01,
+	SERVER_CAP_DATA = 0x02,
+};
+
+enum viosrp_reserve_type {
+	CLIENT_RESERVE_SCSI_2 = 0x01,
+};
+
+enum viosrp_capability_flag {
+	CLIENT_MIGRATED = 0x01,
+	CLIENT_RECONNECT = 0x02,
+	CAP_LIST_SUPPORTED = 0x04,
+	CAP_LIST_DATA = 0x08,
 };
 
 /* 
@@ -127,11 +158,46 @@
 	u64 buffer;
 };
 
+struct viosrp_fast_fail {
+	struct mad_common common;
+};
+
+struct viosrp_capabilities {
+	struct mad_common common;
+	u64 buffer;
+};
+
+struct mad_capability_common {
+	u32 cap_type;
+	u16 length;
+	u16 server_support;
+};
+
+struct mad_reserve_cap {
+	struct mad_capability_common common;
+	u32 type;
+};
+
+struct mad_migration_cap {
+	struct mad_capability_common common;
+	u32 ecl;
+};
+
+struct capabilities{
+	u32 flags;
+	char name[SRP_MAX_LOC_LEN];
+	char loc[SRP_MAX_LOC_LEN];
+	struct mad_migration_cap migration;
+	struct mad_reserve_cap reserve;
+};
+
 union mad_iu {
 	struct viosrp_empty_iu empty_iu;
 	struct viosrp_error_log error_log;
 	struct viosrp_adapter_info adapter_info;
 	struct viosrp_host_config host_config;
+	struct viosrp_fast_fail fast_fail;
+	struct viosrp_capabilities capabilities;
 };
 
 union viosrp_iu {
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index dd689de..0f8bc77 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -7003,6 +7003,7 @@
 		ioa_cfg->sdt_state = ABORT_DUMP;
 	ioa_cfg->reset_retries = IPR_NUM_RESET_RELOAD_RETRIES;
 	ioa_cfg->in_ioa_bringdown = 1;
+	ioa_cfg->allow_cmds = 0;
 	ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE);
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
 }
@@ -7688,7 +7689,7 @@
  * Return value:
  * 	none
  **/
-static void ipr_remove(struct pci_dev *pdev)
+static void __devexit ipr_remove(struct pci_dev *pdev)
 {
 	struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev);
 
@@ -7864,7 +7865,7 @@
 	.name = IPR_NAME,
 	.id_table = ipr_pci_table,
 	.probe = ipr_probe,
-	.remove = ipr_remove,
+	.remove = __devexit_p(ipr_remove),
 	.shutdown = ipr_shutdown,
 	.err_handler = &ipr_err_handler,
 };
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index 992af05..7af9bce 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -1159,6 +1159,10 @@
 		atomic_inc(&mp->stats.xid_not_found);
 		goto out;
 	}
+	if (ep->esb_stat & ESB_ST_COMPLETE) {
+		atomic_inc(&mp->stats.xid_not_found);
+		goto out;
+	}
 	if (ep->rxid == FC_XID_UNKNOWN)
 		ep->rxid = ntohs(fh->fh_rx_id);
 	if (ep->sid != 0 && ep->sid != ntoh24(fh->fh_d_id)) {
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index 521f996..ad8b747 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -1896,7 +1896,7 @@
 		sc_cmd->result = (DID_ERROR << 16) | fsp->cdb_status;
 		break;
 	case FC_CMD_ABORTED:
-		sc_cmd->result = (DID_ABORT << 16) | fsp->io_status;
+		sc_cmd->result = (DID_ERROR << 16) | fsp->io_status;
 		break;
 	case FC_CMD_TIME_OUT:
 		sc_cmd->result = (DID_BUS_BUSY << 16) | fsp->io_status;
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 747d73c..7bfbff7 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -478,7 +478,7 @@
 	if (PTR_ERR(fp) == -FC_EX_CLOSED)
 		return fc_rport_error(rport, fp);
 
-	if (rdata->retries < rdata->local_port->max_retry_count) {
+	if (rdata->retries < rdata->local_port->max_rport_retry_count) {
 		FC_DEBUG_RPORT("Error %ld in state %s, retrying\n",
 			       PTR_ERR(fp), fc_rport_state(rport));
 		rdata->retries++;
@@ -1330,7 +1330,7 @@
 }
 EXPORT_SYMBOL(fc_rport_init);
 
-int fc_setup_rport()
+int fc_setup_rport(void)
 {
 	rport_event_queue = create_singlethread_workqueue("fc_rport_eq");
 	if (!rport_event_queue)
@@ -1339,7 +1339,7 @@
 }
 EXPORT_SYMBOL(fc_setup_rport);
 
-void fc_destroy_rport()
+void fc_destroy_rport(void)
 {
 	destroy_workqueue(rport_event_queue);
 }
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index e72b4ad..59908ae 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -81,7 +81,8 @@
 	struct Scsi_Host *shost = conn->session->host;
 	struct iscsi_host *ihost = shost_priv(shost);
 
-	queue_work(ihost->workq, &conn->xmitwork);
+	if (ihost->workq)
+		queue_work(ihost->workq, &conn->xmitwork);
 }
 EXPORT_SYMBOL_GPL(iscsi_conn_queue_work);
 
@@ -109,11 +110,9 @@
 		 * if the window closed with IO queued, then kick the
 		 * xmit thread
 		 */
-		if (!list_empty(&session->leadconn->xmitqueue) ||
-		    !list_empty(&session->leadconn->mgmtqueue)) {
-			if (!(session->tt->caps & CAP_DATA_PATH_OFFLOAD))
-				iscsi_conn_queue_work(session->leadconn);
-		}
+		if (!list_empty(&session->leadconn->cmdqueue) ||
+		    !list_empty(&session->leadconn->mgmtqueue))
+			iscsi_conn_queue_work(session->leadconn);
 	}
 }
 EXPORT_SYMBOL_GPL(iscsi_update_cmdsn);
@@ -257,9 +256,11 @@
 	itt_t itt;
 	int rc;
 
-	rc = conn->session->tt->alloc_pdu(task, ISCSI_OP_SCSI_CMD);
-	if (rc)
-		return rc;
+	if (conn->session->tt->alloc_pdu) {
+		rc = conn->session->tt->alloc_pdu(task, ISCSI_OP_SCSI_CMD);
+		if (rc)
+			return rc;
+	}
 	hdr = (struct iscsi_cmd *) task->hdr;
 	itt = hdr->itt;
 	memset(hdr, 0, sizeof(*hdr));
@@ -364,7 +365,6 @@
 		return -EIO;
 
 	task->state = ISCSI_TASK_RUNNING;
-	list_move_tail(&task->running, &conn->run_list);
 
 	conn->scsicmd_pdus_cnt++;
 	ISCSI_DBG_SESSION(session, "iscsi prep [%s cid %d sc %p cdb 0x%x "
@@ -380,26 +380,25 @@
 }
 
 /**
- * iscsi_complete_command - finish a task
+ * iscsi_free_task - free a task
  * @task: iscsi cmd task
  *
  * Must be called with session lock.
  * This function returns the scsi command to scsi-ml or cleans
  * up mgmt tasks then returns the task to the pool.
  */
-static void iscsi_complete_command(struct iscsi_task *task)
+static void iscsi_free_task(struct iscsi_task *task)
 {
 	struct iscsi_conn *conn = task->conn;
 	struct iscsi_session *session = conn->session;
 	struct scsi_cmnd *sc = task->sc;
 
-	session->tt->cleanup_task(task);
-	list_del_init(&task->running);
-	task->state = ISCSI_TASK_COMPLETED;
-	task->sc = NULL;
+	ISCSI_DBG_SESSION(session, "freeing task itt 0x%x state %d sc %p\n",
+			  task->itt, task->state, task->sc);
 
-	if (conn->task == task)
-		conn->task = NULL;
+	session->tt->cleanup_task(task);
+	task->state = ISCSI_TASK_FREE;
+	task->sc = NULL;
 	/*
 	 * login task is preallocated so do not free
 	 */
@@ -408,9 +407,6 @@
 
 	__kfifo_put(session->cmdpool.queue, (void*)&task, sizeof(void*));
 
-	if (conn->ping_task == task)
-		conn->ping_task = NULL;
-
 	if (sc) {
 		task->sc = NULL;
 		/* SCSI eh reuses commands to verify us */
@@ -433,7 +429,7 @@
 static void __iscsi_put_task(struct iscsi_task *task)
 {
 	if (atomic_dec_and_test(&task->refcount))
-		iscsi_complete_command(task);
+		iscsi_free_task(task);
 }
 
 void iscsi_put_task(struct iscsi_task *task)
@@ -446,26 +442,74 @@
 }
 EXPORT_SYMBOL_GPL(iscsi_put_task);
 
-/*
- * session lock must be held
+/**
+ * iscsi_complete_task - finish a task
+ * @task: iscsi cmd task
+ * @state: state to complete task with
+ *
+ * Must be called with session lock.
  */
-static void fail_command(struct iscsi_conn *conn, struct iscsi_task *task,
-			 int err)
+static void iscsi_complete_task(struct iscsi_task *task, int state)
 {
-	struct scsi_cmnd *sc;
+	struct iscsi_conn *conn = task->conn;
 
+	ISCSI_DBG_SESSION(conn->session,
+			  "complete task itt 0x%x state %d sc %p\n",
+			  task->itt, task->state, task->sc);
+	if (task->state == ISCSI_TASK_COMPLETED ||
+	    task->state == ISCSI_TASK_ABRT_TMF ||
+	    task->state == ISCSI_TASK_ABRT_SESS_RECOV)
+		return;
+	WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
+	task->state = state;
+
+	if (!list_empty(&task->running))
+		list_del_init(&task->running);
+
+	if (conn->task == task)
+		conn->task = NULL;
+
+	if (conn->ping_task == task)
+		conn->ping_task = NULL;
+
+	/* release get from queueing */
+	__iscsi_put_task(task);
+}
+
+/*
+ * session lock must be held and if not called for a task that is
+ * still pending or from the xmit thread, then xmit thread must
+ * be suspended.
+ */
+static void fail_scsi_task(struct iscsi_task *task, int err)
+{
+	struct iscsi_conn *conn = task->conn;
+	struct scsi_cmnd *sc;
+	int state;
+
+	/*
+	 * if a command completes and we get a successful tmf response
+	 * we will hit this because the scsi eh abort code does not take
+	 * a ref to the task.
+	 */
 	sc = task->sc;
 	if (!sc)
 		return;
 
-	if (task->state == ISCSI_TASK_PENDING)
+	if (task->state == ISCSI_TASK_PENDING) {
 		/*
 		 * cmd never made it to the xmit thread, so we should not count
 		 * the cmd in the sequencing
 		 */
 		conn->session->queued_cmdsn--;
+		/* it was never sent so just complete like normal */
+		state = ISCSI_TASK_COMPLETED;
+	} else if (err == DID_TRANSPORT_DISRUPTED)
+		state = ISCSI_TASK_ABRT_SESS_RECOV;
+	else
+		state = ISCSI_TASK_ABRT_TMF;
 
-	sc->result = err;
+	sc->result = err << 16;
 	if (!scsi_bidi_cmnd(sc))
 		scsi_set_resid(sc, scsi_bufflen(sc));
 	else {
@@ -473,10 +517,7 @@
 		scsi_in(sc)->resid = scsi_in(sc)->length;
 	}
 
-	if (conn->task == task)
-		conn->task = NULL;
-	/* release ref from queuecommand */
-	__iscsi_put_task(task);
+	iscsi_complete_task(task, state);
 }
 
 static int iscsi_prep_mgmt_task(struct iscsi_conn *conn,
@@ -516,7 +557,6 @@
 		session->state = ISCSI_STATE_LOGGING_OUT;
 
 	task->state = ISCSI_TASK_RUNNING;
-	list_move_tail(&task->running, &conn->mgmt_run_list);
 	ISCSI_DBG_SESSION(session, "mgmtpdu [op 0x%x hdr->itt 0x%x "
 			  "datalen %d]\n", hdr->opcode & ISCSI_OPCODE_MASK,
 			  hdr->itt, task->data_count);
@@ -528,6 +568,7 @@
 		      char *data, uint32_t data_size)
 {
 	struct iscsi_session *session = conn->session;
+	struct iscsi_host *ihost = shost_priv(session->host);
 	struct iscsi_task *task;
 	itt_t itt;
 
@@ -544,6 +585,9 @@
 		 */
 		task = conn->login_task;
 	else {
+		if (session->state != ISCSI_STATE_LOGGED_IN)
+			return NULL;
+
 		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
 		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 
@@ -559,6 +603,8 @@
 	atomic_set(&task->refcount, 1);
 	task->conn = conn;
 	task->sc = NULL;
+	INIT_LIST_HEAD(&task->running);
+	task->state = ISCSI_TASK_PENDING;
 
 	if (data_size) {
 		memcpy(task->data, data, data_size);
@@ -566,11 +612,14 @@
 	} else
 		task->data_count = 0;
 
-	if (conn->session->tt->alloc_pdu(task, hdr->opcode)) {
-		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate "
-				 "pdu for mgmt task.\n");
-		goto requeue_task;
+	if (conn->session->tt->alloc_pdu) {
+		if (conn->session->tt->alloc_pdu(task, hdr->opcode)) {
+			iscsi_conn_printk(KERN_ERR, conn, "Could not allocate "
+					 "pdu for mgmt task.\n");
+			goto free_task;
+		}
 	}
+
 	itt = task->hdr->itt;
 	task->hdr_len = sizeof(struct iscsi_hdr);
 	memcpy(task->hdr, hdr, sizeof(struct iscsi_hdr));
@@ -583,30 +632,22 @@
 						   task->conn->session->age);
 	}
 
-	INIT_LIST_HEAD(&task->running);
-	list_add_tail(&task->running, &conn->mgmtqueue);
-
-	if (session->tt->caps & CAP_DATA_PATH_OFFLOAD) {
+	if (!ihost->workq) {
 		if (iscsi_prep_mgmt_task(conn, task))
 			goto free_task;
 
 		if (session->tt->xmit_task(task))
 			goto free_task;
-
-	} else
+	} else {
+		list_add_tail(&task->running, &conn->mgmtqueue);
 		iscsi_conn_queue_work(conn);
+	}
 
 	return task;
 
 free_task:
 	__iscsi_put_task(task);
 	return NULL;
-
-requeue_task:
-	if (task != conn->login_task)
-		__kfifo_put(session->cmdpool.queue, (void*)&task,
-			    sizeof(void*));
-	return NULL;
 }
 
 int iscsi_conn_send_pdu(struct iscsi_cls_conn *cls_conn, struct iscsi_hdr *hdr,
@@ -701,11 +742,10 @@
 			sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
 	}
 out:
-	ISCSI_DBG_SESSION(session, "done [sc %p res %d itt 0x%x]\n",
+	ISCSI_DBG_SESSION(session, "cmd rsp done [sc %p res %d itt 0x%x]\n",
 			  sc, sc->result, task->itt);
 	conn->scsirsp_pdus_cnt++;
-
-	__iscsi_put_task(task);
+	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 }
 
 /**
@@ -724,6 +764,7 @@
 	if (!(rhdr->flags & ISCSI_FLAG_DATA_STATUS))
 		return;
 
+	iscsi_update_cmdsn(conn->session, (struct iscsi_nopin *)hdr);
 	sc->result = (DID_OK << 16) | rhdr->cmd_status;
 	conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1;
 	if (rhdr->flags & (ISCSI_FLAG_DATA_UNDERFLOW |
@@ -738,8 +779,11 @@
 			sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
 	}
 
+	ISCSI_DBG_SESSION(conn->session, "data in with status done "
+			  "[sc %p res %d itt 0x%x]\n",
+			  sc, sc->result, task->itt);
 	conn->scsirsp_pdus_cnt++;
-	__iscsi_put_task(task);
+	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 }
 
 static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
@@ -823,7 +867,7 @@
  *
  * The session lock must be held.
  */
-static struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *conn, itt_t itt)
+struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *conn, itt_t itt)
 {
 	struct iscsi_session *session = conn->session;
 	int i;
@@ -840,6 +884,7 @@
 
 	return session->cmds[i];
 }
+EXPORT_SYMBOL_GPL(iscsi_itt_to_task);
 
 /**
  * __iscsi_complete_pdu - complete pdu
@@ -959,7 +1004,7 @@
 		}
 
 		iscsi_tmf_rsp(conn, hdr);
-		__iscsi_put_task(task);
+		iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 		break;
 	case ISCSI_OP_NOOP_IN:
 		iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr);
@@ -977,7 +1022,7 @@
 			goto recv_pdu;
 
 		mod_timer(&conn->transport_timer, jiffies + conn->recv_timeout);
-		__iscsi_put_task(task);
+		iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 		break;
 	default:
 		rc = ISCSI_ERR_BAD_OPCODE;
@@ -989,7 +1034,7 @@
 recv_pdu:
 	if (iscsi_recv_pdu(conn->cls_conn, hdr, data, datalen))
 		rc = ISCSI_ERR_CONN_FAILED;
-	__iscsi_put_task(task);
+	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(__iscsi_complete_pdu);
@@ -1166,7 +1211,12 @@
 {
 	struct iscsi_conn *conn = task->conn;
 
-	list_move_tail(&task->running, &conn->requeue);
+	/*
+	 * this may be on the requeue list already if the xmit_task callout
+	 * is handling the r2ts while we are adding new ones
+	 */
+	if (list_empty(&task->running))
+		list_add_tail(&task->running, &conn->requeue);
 	iscsi_conn_queue_work(conn);
 }
 EXPORT_SYMBOL_GPL(iscsi_requeue_task);
@@ -1206,6 +1256,7 @@
 	while (!list_empty(&conn->mgmtqueue)) {
 		conn->task = list_entry(conn->mgmtqueue.next,
 					 struct iscsi_task, running);
+		list_del_init(&conn->task->running);
 		if (iscsi_prep_mgmt_task(conn, conn->task)) {
 			__iscsi_put_task(conn->task);
 			conn->task = NULL;
@@ -1217,23 +1268,26 @@
 	}
 
 	/* process pending command queue */
-	while (!list_empty(&conn->xmitqueue)) {
+	while (!list_empty(&conn->cmdqueue)) {
 		if (conn->tmf_state == TMF_QUEUED)
 			break;
 
-		conn->task = list_entry(conn->xmitqueue.next,
+		conn->task = list_entry(conn->cmdqueue.next,
 					 struct iscsi_task, running);
+		list_del_init(&conn->task->running);
 		if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
-			fail_command(conn, conn->task, DID_IMM_RETRY << 16);
+			fail_scsi_task(conn->task, DID_IMM_RETRY);
 			continue;
 		}
 		rc = iscsi_prep_scsi_cmd_pdu(conn->task);
 		if (rc) {
 			if (rc == -ENOMEM) {
+				list_add_tail(&conn->task->running,
+					      &conn->cmdqueue);
 				conn->task = NULL;
 				goto again;
 			} else
-				fail_command(conn, conn->task, DID_ABORT << 16);
+				fail_scsi_task(conn->task, DID_ABORT);
 			continue;
 		}
 		rc = iscsi_xmit_task(conn);
@@ -1260,8 +1314,8 @@
 
 		conn->task = list_entry(conn->requeue.next,
 					 struct iscsi_task, running);
+		list_del_init(&conn->task->running);
 		conn->task->state = ISCSI_TASK_RUNNING;
-		list_move_tail(conn->requeue.next, &conn->run_list);
 		rc = iscsi_xmit_task(conn);
 		if (rc)
 			goto again;
@@ -1328,6 +1382,7 @@
 {
 	struct iscsi_cls_session *cls_session;
 	struct Scsi_Host *host;
+	struct iscsi_host *ihost;
 	int reason = 0;
 	struct iscsi_session *session;
 	struct iscsi_conn *conn;
@@ -1338,6 +1393,7 @@
 	sc->SCp.ptr = NULL;
 
 	host = sc->device->host;
+	ihost = shost_priv(host);
 	spin_unlock(host->host_lock);
 
 	cls_session = starget_to_session(scsi_target(sc->device));
@@ -1350,13 +1406,7 @@
 		goto fault;
 	}
 
-	/*
-	 * ISCSI_STATE_FAILED is a temp. state. The recovery
-	 * code will decide what is best to do with command queued
-	 * during this time
-	 */
-	if (session->state != ISCSI_STATE_LOGGED_IN &&
-	    session->state != ISCSI_STATE_FAILED) {
+	if (session->state != ISCSI_STATE_LOGGED_IN) {
 		/*
 		 * to handle the race between when we set the recovery state
 		 * and block the session we requeue here (commands could
@@ -1364,12 +1414,15 @@
 		 * up because the block code is not locked)
 		 */
 		switch (session->state) {
+		case ISCSI_STATE_FAILED:
 		case ISCSI_STATE_IN_RECOVERY:
 			reason = FAILURE_SESSION_IN_RECOVERY;
-			goto reject;
+			sc->result = DID_IMM_RETRY << 16;
+			break;
 		case ISCSI_STATE_LOGGING_OUT:
 			reason = FAILURE_SESSION_LOGGING_OUT;
-			goto reject;
+			sc->result = DID_IMM_RETRY << 16;
+			break;
 		case ISCSI_STATE_RECOVERY_FAILED:
 			reason = FAILURE_SESSION_RECOVERY_TIMEOUT;
 			sc->result = DID_TRANSPORT_FAILFAST << 16;
@@ -1402,9 +1455,8 @@
 		reason = FAILURE_OOM;
 		goto reject;
 	}
-	list_add_tail(&task->running, &conn->xmitqueue);
 
-	if (session->tt->caps & CAP_DATA_PATH_OFFLOAD) {
+	if (!ihost->workq) {
 		reason = iscsi_prep_scsi_cmd_pdu(task);
 		if (reason) {
 			if (reason == -ENOMEM) {
@@ -1419,8 +1471,10 @@
 			reason = FAILURE_SESSION_NOT_READY;
 			goto prepd_reject;
 		}
-	} else
+	} else {
+		list_add_tail(&task->running, &conn->cmdqueue);
 		iscsi_conn_queue_work(conn);
+	}
 
 	session->queued_cmdsn++;
 	spin_unlock(&session->lock);
@@ -1429,7 +1483,7 @@
 
 prepd_reject:
 	sc->scsi_done = NULL;
-	iscsi_complete_command(task);
+	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 reject:
 	spin_unlock(&session->lock);
 	ISCSI_DBG_SESSION(session, "cmd 0x%x rejected (%d)\n",
@@ -1439,7 +1493,7 @@
 
 prepd_fault:
 	sc->scsi_done = NULL;
-	iscsi_complete_command(task);
+	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 fault:
 	spin_unlock(&session->lock);
 	ISCSI_DBG_SESSION(session, "iscsi: cmd 0x%x is not queued (%d)\n",
@@ -1608,44 +1662,24 @@
  * Fail commands. session lock held and recv side suspended and xmit
  * thread flushed
  */
-static void fail_all_commands(struct iscsi_conn *conn, unsigned lun,
-			      int error)
+static void fail_scsi_tasks(struct iscsi_conn *conn, unsigned lun,
+			    int error)
 {
-	struct iscsi_task *task, *tmp;
+	struct iscsi_task *task;
+	int i;
 
-	if (conn->task) {
-		if (lun == -1 ||
-		    (conn->task->sc && conn->task->sc->device->lun == lun))
-			conn->task = NULL;
-	}
+	for (i = 0; i < conn->session->cmds_max; i++) {
+		task = conn->session->cmds[i];
+		if (!task->sc || task->state == ISCSI_TASK_FREE)
+			continue;
 
-	/* flush pending */
-	list_for_each_entry_safe(task, tmp, &conn->xmitqueue, running) {
-		if (lun == task->sc->device->lun || lun == -1) {
-			ISCSI_DBG_SESSION(conn->session,
-					  "failing pending sc %p itt 0x%x\n",
-					  task->sc, task->itt);
-			fail_command(conn, task, error << 16);
-		}
-	}
+		if (lun != -1 && lun != task->sc->device->lun)
+			continue;
 
-	list_for_each_entry_safe(task, tmp, &conn->requeue, running) {
-		if (lun == task->sc->device->lun || lun == -1) {
-			ISCSI_DBG_SESSION(conn->session,
-					  "failing requeued sc %p itt 0x%x\n",
-					  task->sc, task->itt);
-			fail_command(conn, task, error << 16);
-		}
-	}
-
-	/* fail all other running */
-	list_for_each_entry_safe(task, tmp, &conn->run_list, running) {
-		if (lun == task->sc->device->lun || lun == -1) {
-			ISCSI_DBG_SESSION(conn->session,
-					 "failing in progress sc %p itt 0x%x\n",
-					 task->sc, task->itt);
-			fail_command(conn, task, error << 16);
-		}
+		ISCSI_DBG_SESSION(conn->session,
+				  "failing sc %p itt 0x%x state %d\n",
+				  task->sc, task->itt, task->state);
+		fail_scsi_task(task, error);
 	}
 }
 
@@ -1655,7 +1689,7 @@
 	struct iscsi_host *ihost = shost_priv(shost);
 
 	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
-	if (!(conn->session->tt->caps & CAP_DATA_PATH_OFFLOAD))
+	if (ihost->workq)
 		flush_workqueue(ihost->workq);
 }
 EXPORT_SYMBOL_GPL(iscsi_suspend_tx);
@@ -1663,8 +1697,23 @@
 static void iscsi_start_tx(struct iscsi_conn *conn)
 {
 	clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
-	if (!(conn->session->tt->caps & CAP_DATA_PATH_OFFLOAD))
-		iscsi_conn_queue_work(conn);
+	iscsi_conn_queue_work(conn);
+}
+
+/*
+ * We want to make sure a ping is in flight. It has timed out.
+ * And we are not busy processing a pdu that is making
+ * progress but got started before the ping and is taking a while
+ * to complete so the ping is just stuck behind it in a queue.
+ */
+static int iscsi_has_ping_timed_out(struct iscsi_conn *conn)
+{
+	if (conn->ping_task &&
+	    time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) +
+			   (conn->ping_timeout * HZ), jiffies))
+		return 1;
+	else
+		return 0;
 }
 
 static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
@@ -1702,16 +1751,20 @@
 	 * if the ping timedout then we are in the middle of cleaning up
 	 * and can let the iscsi eh handle it
 	 */
-	if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) +
-			    (conn->ping_timeout * HZ), jiffies))
+	if (iscsi_has_ping_timed_out(conn)) {
 		rc = BLK_EH_RESET_TIMER;
+		goto done;
+	}
 	/*
 	 * if we are about to check the transport then give the command
 	 * more time
 	 */
 	if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ),
-			   jiffies))
+			   jiffies)) {
 		rc = BLK_EH_RESET_TIMER;
+		goto done;
+	}
+
 	/* if in the middle of checking the transport then give us more time */
 	if (conn->ping_task)
 		rc = BLK_EH_RESET_TIMER;
@@ -1738,13 +1791,13 @@
 
 	recv_timeout *= HZ;
 	last_recv = conn->last_recv;
-	if (conn->ping_task &&
-	    time_before_eq(conn->last_ping + (conn->ping_timeout * HZ),
-			   jiffies)) {
+
+	if (iscsi_has_ping_timed_out(conn)) {
 		iscsi_conn_printk(KERN_ERR, conn, "ping timeout of %d secs "
-				  "expired, last rx %lu, last ping %lu, "
-				  "now %lu\n", conn->ping_timeout, last_recv,
-				  conn->last_ping, jiffies);
+				  "expired, recv timeout %d, last rx %lu, "
+				  "last ping %lu, now %lu\n",
+				  conn->ping_timeout, conn->recv_timeout,
+				  last_recv, conn->last_ping, jiffies);
 		spin_unlock(&session->lock);
 		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
 		return;
@@ -1788,6 +1841,8 @@
 	cls_session = starget_to_session(scsi_target(sc->device));
 	session = cls_session->dd_data;
 
+	ISCSI_DBG_SESSION(session, "aborting sc %p\n", sc);
+
 	mutex_lock(&session->eh_mutex);
 	spin_lock_bh(&session->lock);
 	/*
@@ -1810,6 +1865,8 @@
 	    sc->SCp.phase != session->age) {
 		spin_unlock_bh(&session->lock);
 		mutex_unlock(&session->eh_mutex);
+		ISCSI_DBG_SESSION(session, "failing abort due to dropped "
+				  "session.\n");
 		return FAILED;
 	}
 
@@ -1829,7 +1886,7 @@
 	}
 
 	if (task->state == ISCSI_TASK_PENDING) {
-		fail_command(conn, task, DID_ABORT << 16);
+		fail_scsi_task(task, DID_ABORT);
 		goto success;
 	}
 
@@ -1860,7 +1917,7 @@
 		 * then sent more data for the cmd.
 		 */
 		spin_lock(&session->lock);
-		fail_command(conn, task, DID_ABORT << 16);
+		fail_scsi_task(task, DID_ABORT);
 		conn->tmf_state = TMF_INITIAL;
 		spin_unlock(&session->lock);
 		iscsi_start_tx(conn);
@@ -1967,7 +2024,7 @@
 	iscsi_suspend_tx(conn);
 
 	spin_lock_bh(&session->lock);
-	fail_all_commands(conn, sc->device->lun, DID_ERROR);
+	fail_scsi_tasks(conn, sc->device->lun, DID_ERROR);
 	conn->tmf_state = TMF_INITIAL;
 	spin_unlock_bh(&session->lock);
 
@@ -2274,6 +2331,7 @@
 		if (cmd_task_size)
 			task->dd_data = &task[1];
 		task->itt = cmd_i;
+		task->state = ISCSI_TASK_FREE;
 		INIT_LIST_HEAD(&task->running);
 	}
 
@@ -2360,10 +2418,8 @@
 	conn->transport_timer.data = (unsigned long)conn;
 	conn->transport_timer.function = iscsi_check_transport_timeouts;
 
-	INIT_LIST_HEAD(&conn->run_list);
-	INIT_LIST_HEAD(&conn->mgmt_run_list);
 	INIT_LIST_HEAD(&conn->mgmtqueue);
-	INIT_LIST_HEAD(&conn->xmitqueue);
+	INIT_LIST_HEAD(&conn->cmdqueue);
 	INIT_LIST_HEAD(&conn->requeue);
 	INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
 
@@ -2531,27 +2587,28 @@
 EXPORT_SYMBOL_GPL(iscsi_conn_start);
 
 static void
-flush_control_queues(struct iscsi_session *session, struct iscsi_conn *conn)
+fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn)
 {
-	struct iscsi_task *task, *tmp;
+	struct iscsi_task *task;
+	int i, state;
 
-	/* handle pending */
-	list_for_each_entry_safe(task, tmp, &conn->mgmtqueue, running) {
-		ISCSI_DBG_SESSION(session, "flushing pending mgmt task "
-				  "itt 0x%x\n", task->itt);
-		/* release ref from prep task */
-		__iscsi_put_task(task);
+	for (i = 0; i < conn->session->cmds_max; i++) {
+		task = conn->session->cmds[i];
+		if (task->sc)
+			continue;
+
+		if (task->state == ISCSI_TASK_FREE)
+			continue;
+
+		ISCSI_DBG_SESSION(conn->session,
+				  "failing mgmt itt 0x%x state %d\n",
+				  task->itt, task->state);
+		state = ISCSI_TASK_ABRT_SESS_RECOV;
+		if (task->state == ISCSI_TASK_PENDING)
+			state = ISCSI_TASK_COMPLETED;
+		iscsi_complete_task(task, state);
+
 	}
-
-	/* handle running */
-	list_for_each_entry_safe(task, tmp, &conn->mgmt_run_list, running) {
-		ISCSI_DBG_SESSION(session, "flushing running mgmt task "
-				  "itt 0x%x\n", task->itt);
-		/* release ref from prep task */
-		__iscsi_put_task(task);
-	}
-
-	conn->task = NULL;
 }
 
 static void iscsi_start_session_recovery(struct iscsi_session *session,
@@ -2559,8 +2616,6 @@
 {
 	int old_stop_stage;
 
-	del_timer_sync(&conn->transport_timer);
-
 	mutex_lock(&session->eh_mutex);
 	spin_lock_bh(&session->lock);
 	if (conn->stop_stage == STOP_CONN_TERM) {
@@ -2578,13 +2633,17 @@
 		session->state = ISCSI_STATE_TERMINATE;
 	else if (conn->stop_stage != STOP_CONN_RECOVER)
 		session->state = ISCSI_STATE_IN_RECOVERY;
+	spin_unlock_bh(&session->lock);
 
+	del_timer_sync(&conn->transport_timer);
+	iscsi_suspend_tx(conn);
+
+	spin_lock_bh(&session->lock);
 	old_stop_stage = conn->stop_stage;
 	conn->stop_stage = flag;
 	conn->c_stage = ISCSI_CONN_STOPPED;
 	spin_unlock_bh(&session->lock);
 
-	iscsi_suspend_tx(conn);
 	/*
 	 * for connection level recovery we should not calculate
 	 * header digest. conn->hdr_size used for optimization
@@ -2605,11 +2664,8 @@
 	 * flush queues.
 	 */
 	spin_lock_bh(&session->lock);
-	if (flag == STOP_CONN_RECOVER)
-		fail_all_commands(conn, -1, DID_TRANSPORT_DISRUPTED);
-	else
-		fail_all_commands(conn, -1, DID_ERROR);
-	flush_control_queues(session, conn);
+	fail_scsi_tasks(conn, -1, DID_TRANSPORT_DISRUPTED);
+	fail_mgmt_tasks(session, conn);
 	spin_unlock_bh(&session->lock);
 	mutex_unlock(&session->eh_mutex);
 }
@@ -2651,6 +2707,23 @@
 }
 EXPORT_SYMBOL_GPL(iscsi_conn_bind);
 
+static int iscsi_switch_str_param(char **param, char *new_val_buf)
+{
+	char *new_val;
+
+	if (*param) {
+		if (!strcmp(*param, new_val_buf))
+			return 0;
+	}
+
+	new_val = kstrdup(new_val_buf, GFP_NOIO);
+	if (!new_val)
+		return -ENOMEM;
+
+	kfree(*param);
+	*param = new_val;
+	return 0;
+}
 
 int iscsi_set_param(struct iscsi_cls_conn *cls_conn,
 		    enum iscsi_param param, char *buf, int buflen)
@@ -2723,38 +2796,15 @@
 		sscanf(buf, "%u", &conn->exp_statsn);
 		break;
 	case ISCSI_PARAM_USERNAME:
-		kfree(session->username);
-		session->username = kstrdup(buf, GFP_KERNEL);
-		if (!session->username)
-			return -ENOMEM;
-		break;
+		return iscsi_switch_str_param(&session->username, buf);
 	case ISCSI_PARAM_USERNAME_IN:
-		kfree(session->username_in);
-		session->username_in = kstrdup(buf, GFP_KERNEL);
-		if (!session->username_in)
-			return -ENOMEM;
-		break;
+		return iscsi_switch_str_param(&session->username_in, buf);
 	case ISCSI_PARAM_PASSWORD:
-		kfree(session->password);
-		session->password = kstrdup(buf, GFP_KERNEL);
-		if (!session->password)
-			return -ENOMEM;
-		break;
+		return iscsi_switch_str_param(&session->password, buf);
 	case ISCSI_PARAM_PASSWORD_IN:
-		kfree(session->password_in);
-		session->password_in = kstrdup(buf, GFP_KERNEL);
-		if (!session->password_in)
-			return -ENOMEM;
-		break;
+		return iscsi_switch_str_param(&session->password_in, buf);
 	case ISCSI_PARAM_TARGET_NAME:
-		/* this should not change between logins */
-		if (session->targetname)
-			break;
-
-		session->targetname = kstrdup(buf, GFP_KERNEL);
-		if (!session->targetname)
-			return -ENOMEM;
-		break;
+		return iscsi_switch_str_param(&session->targetname, buf);
 	case ISCSI_PARAM_TPGT:
 		sscanf(buf, "%d", &session->tpgt);
 		break;
@@ -2762,25 +2812,11 @@
 		sscanf(buf, "%d", &conn->persistent_port);
 		break;
 	case ISCSI_PARAM_PERSISTENT_ADDRESS:
-		/*
-		 * this is the address returned in discovery so it should
-		 * not change between logins.
-		 */
-		if (conn->persistent_address)
-			break;
-
-		conn->persistent_address = kstrdup(buf, GFP_KERNEL);
-		if (!conn->persistent_address)
-			return -ENOMEM;
-		break;
+		return iscsi_switch_str_param(&conn->persistent_address, buf);
 	case ISCSI_PARAM_IFACE_NAME:
-		if (!session->ifacename)
-			session->ifacename = kstrdup(buf, GFP_KERNEL);
-		break;
+		return iscsi_switch_str_param(&session->ifacename, buf);
 	case ISCSI_PARAM_INITIATOR_NAME:
-		if (!session->initiatorname)
-			session->initiatorname = kstrdup(buf, GFP_KERNEL);
-		break;
+		return iscsi_switch_str_param(&session->initiatorname, buf);
 	default:
 		return -ENOSYS;
 	}
@@ -2851,10 +2887,7 @@
 		len = sprintf(buf, "%s\n", session->ifacename);
 		break;
 	case ISCSI_PARAM_INITIATOR_NAME:
-		if (!session->initiatorname)
-			len = sprintf(buf, "%s\n", "unknown");
-		else
-			len = sprintf(buf, "%s\n", session->initiatorname);
+		len = sprintf(buf, "%s\n", session->initiatorname);
 		break;
 	default:
 		return -ENOSYS;
@@ -2920,29 +2953,16 @@
 
 	switch (param) {
 	case ISCSI_HOST_PARAM_NETDEV_NAME:
-		if (!ihost->netdev)
-			len = sprintf(buf, "%s\n", "default");
-		else
-			len = sprintf(buf, "%s\n", ihost->netdev);
+		len = sprintf(buf, "%s\n", ihost->netdev);
 		break;
 	case ISCSI_HOST_PARAM_HWADDRESS:
-		if (!ihost->hwaddress)
-			len = sprintf(buf, "%s\n", "default");
-		else
-			len = sprintf(buf, "%s\n", ihost->hwaddress);
+		len = sprintf(buf, "%s\n", ihost->hwaddress);
 		break;
 	case ISCSI_HOST_PARAM_INITIATOR_NAME:
-		if (!ihost->initiatorname)
-			len = sprintf(buf, "%s\n", "unknown");
-		else
-			len = sprintf(buf, "%s\n", ihost->initiatorname);
+		len = sprintf(buf, "%s\n", ihost->initiatorname);
 		break;
 	case ISCSI_HOST_PARAM_IPADDRESS:
-		if (!strlen(ihost->local_address))
-			len = sprintf(buf, "%s\n", "unknown");
-		else
-			len = sprintf(buf, "%s\n",
-				      ihost->local_address);
+		len = sprintf(buf, "%s\n", ihost->local_address);
 		break;
 	default:
 		return -ENOSYS;
@@ -2959,17 +2979,11 @@
 
 	switch (param) {
 	case ISCSI_HOST_PARAM_NETDEV_NAME:
-		if (!ihost->netdev)
-			ihost->netdev = kstrdup(buf, GFP_KERNEL);
-		break;
+		return iscsi_switch_str_param(&ihost->netdev, buf);
 	case ISCSI_HOST_PARAM_HWADDRESS:
-		if (!ihost->hwaddress)
-			ihost->hwaddress = kstrdup(buf, GFP_KERNEL);
-		break;
+		return iscsi_switch_str_param(&ihost->hwaddress, buf);
 	case ISCSI_HOST_PARAM_INITIATOR_NAME:
-		if (!ihost->initiatorname)
-			ihost->initiatorname = kstrdup(buf, GFP_KERNEL);
-		break;
+		return iscsi_switch_str_param(&ihost->initiatorname, buf);
 	default:
 		return -ENOSYS;
 	}
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index b579ca9..2bc0709 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -440,8 +440,8 @@
 	struct iscsi_tcp_task *tcp_task = task->dd_data;
 	struct iscsi_r2t_info *r2t;
 
-	/* nothing to do for mgmt or pending tasks */
-	if (!task->sc || task->state == ISCSI_TASK_PENDING)
+	/* nothing to do for mgmt */
+	if (!task->sc)
 		return;
 
 	/* flush task's r2t queues */
@@ -473,7 +473,13 @@
 	int datasn = be32_to_cpu(rhdr->datasn);
 	unsigned total_in_length = scsi_in(task->sc)->length;
 
-	iscsi_update_cmdsn(conn->session, (struct iscsi_nopin*)rhdr);
+	/*
+	 * lib iscsi will update this in the completion handling if there
+	 * is status.
+	 */
+	if (!(rhdr->flags & ISCSI_FLAG_DATA_STATUS))
+		iscsi_update_cmdsn(conn->session, (struct iscsi_nopin*)rhdr);
+
 	if (tcp_conn->in.datalen == 0)
 		return 0;
 
@@ -857,6 +863,12 @@
 	int rc = 0;
 
 	ISCSI_DBG_TCP(conn, "in %d bytes\n", skb->len - offset);
+	/*
+	 * Update for each skb instead of pdu, because over slow networks a
+	 * data_in's data could take a while to read in. We also want to
+	 * account for r2ts.
+	 */
+	conn->last_recv = jiffies;
 
 	if (unlikely(conn->suspend_rx)) {
 		ISCSI_DBG_TCP(conn, "Rx suspended!\n");
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 3da02e4..54fa1e4 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1927,21 +1927,21 @@
 	/* do we need to support multiple segments? */
 	if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) {
 		printk("%s: multiple segments req %u %u, rsp %u %u\n",
-		       __func__, req->bio->bi_vcnt, req->data_len,
-		       rsp->bio->bi_vcnt, rsp->data_len);
+		       __func__, req->bio->bi_vcnt, blk_rq_bytes(req),
+		       rsp->bio->bi_vcnt, blk_rq_bytes(rsp));
 		return -EINVAL;
 	}
 
-	ret = smp_execute_task(dev, bio_data(req->bio), req->data_len,
-			       bio_data(rsp->bio), rsp->data_len);
+	ret = smp_execute_task(dev, bio_data(req->bio), blk_rq_bytes(req),
+			       bio_data(rsp->bio), blk_rq_bytes(rsp));
 	if (ret > 0) {
 		/* positive number is the untransferred residual */
-		rsp->data_len = ret;
-		req->data_len = 0;
+		rsp->resid_len = ret;
+		req->resid_len = 0;
 		ret = 0;
 	} else if (ret == 0) {
-		rsp->data_len = 0;
-		req->data_len = 0;
+		rsp->resid_len = 0;
+		req->resid_len = 0;
 	}
 
 	return ret;
diff --git a/drivers/scsi/libsas/sas_host_smp.c b/drivers/scsi/libsas/sas_host_smp.c
index d110a36..1bc3b75 100644
--- a/drivers/scsi/libsas/sas_host_smp.c
+++ b/drivers/scsi/libsas/sas_host_smp.c
@@ -134,24 +134,24 @@
 {
 	u8 *req_data = NULL, *resp_data = NULL, *buf;
 	struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost);
-	int error = -EINVAL, resp_data_len = rsp->data_len;
+	int error = -EINVAL;
 
 	/* eight is the minimum size for request and response frames */
-	if (req->data_len < 8 || rsp->data_len < 8)
+	if (blk_rq_bytes(req) < 8 || blk_rq_bytes(rsp) < 8)
 		goto out;
 
-	if (bio_offset(req->bio) + req->data_len > PAGE_SIZE ||
-	    bio_offset(rsp->bio) + rsp->data_len > PAGE_SIZE) {
+	if (bio_offset(req->bio) + blk_rq_bytes(req) > PAGE_SIZE ||
+	    bio_offset(rsp->bio) + blk_rq_bytes(rsp) > PAGE_SIZE) {
 		shost_printk(KERN_ERR, shost,
 			"SMP request/response frame crosses page boundary");
 		goto out;
 	}
 
-	req_data = kzalloc(req->data_len, GFP_KERNEL);
+	req_data = kzalloc(blk_rq_bytes(req), GFP_KERNEL);
 
 	/* make sure frame can always be built ... we copy
 	 * back only the requested length */
-	resp_data = kzalloc(max(rsp->data_len, 128U), GFP_KERNEL);
+	resp_data = kzalloc(max(blk_rq_bytes(rsp), 128U), GFP_KERNEL);
 
 	if (!req_data || !resp_data) {
 		error = -ENOMEM;
@@ -160,7 +160,7 @@
 
 	local_irq_disable();
 	buf = kmap_atomic(bio_page(req->bio), KM_USER0) + bio_offset(req->bio);
-	memcpy(req_data, buf, req->data_len);
+	memcpy(req_data, buf, blk_rq_bytes(req));
 	kunmap_atomic(buf - bio_offset(req->bio), KM_USER0);
 	local_irq_enable();
 
@@ -178,15 +178,15 @@
 
 	switch (req_data[1]) {
 	case SMP_REPORT_GENERAL:
-		req->data_len -= 8;
-		resp_data_len -= 32;
+		req->resid_len -= 8;
+		rsp->resid_len -= 32;
 		resp_data[2] = SMP_RESP_FUNC_ACC;
 		resp_data[9] = sas_ha->num_phys;
 		break;
 
 	case SMP_REPORT_MANUF_INFO:
-		req->data_len -= 8;
-		resp_data_len -= 64;
+		req->resid_len -= 8;
+		rsp->resid_len -= 64;
 		resp_data[2] = SMP_RESP_FUNC_ACC;
 		memcpy(resp_data + 12, shost->hostt->name,
 		       SAS_EXPANDER_VENDOR_ID_LEN);
@@ -199,13 +199,13 @@
 		break;
 
 	case SMP_DISCOVER:
-		req->data_len -= 16;
-		if ((int)req->data_len < 0) {
-			req->data_len = 0;
+		req->resid_len -= 16;
+		if ((int)req->resid_len < 0) {
+			req->resid_len = 0;
 			error = -EINVAL;
 			goto out;
 		}
-		resp_data_len -= 56;
+		rsp->resid_len -= 56;
 		sas_host_smp_discover(sas_ha, resp_data, req_data[9]);
 		break;
 
@@ -215,13 +215,13 @@
 		break;
 
 	case SMP_REPORT_PHY_SATA:
-		req->data_len -= 16;
-		if ((int)req->data_len < 0) {
-			req->data_len = 0;
+		req->resid_len -= 16;
+		if ((int)req->resid_len < 0) {
+			req->resid_len = 0;
 			error = -EINVAL;
 			goto out;
 		}
-		resp_data_len -= 60;
+		rsp->resid_len -= 60;
 		sas_report_phy_sata(sas_ha, resp_data, req_data[9]);
 		break;
 
@@ -238,13 +238,13 @@
 		break;
 
 	case SMP_PHY_CONTROL:
-		req->data_len -= 44;
-		if ((int)req->data_len < 0) {
-			req->data_len = 0;
+		req->resid_len -= 44;
+		if ((int)req->resid_len < 0) {
+			req->resid_len = 0;
 			error = -EINVAL;
 			goto out;
 		}
-		resp_data_len -= 8;
+		rsp->resid_len -= 8;
 		sas_phy_control(sas_ha, req_data[9], req_data[10],
 				req_data[32] >> 4, req_data[33] >> 4,
 				resp_data);
@@ -261,11 +261,10 @@
 
 	local_irq_disable();
 	buf = kmap_atomic(bio_page(rsp->bio), KM_USER0) + bio_offset(rsp->bio);
-	memcpy(buf, resp_data, rsp->data_len);
+	memcpy(buf, resp_data, blk_rq_bytes(rsp));
 	flush_kernel_dcache_page(bio_page(rsp->bio));
 	kunmap_atomic(buf - bio_offset(rsp->bio), KM_USER0);
 	local_irq_enable();
-	rsp->data_len = resp_data_len;
 
  out:
 	kfree(req_data);
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 1105f9a..5405698 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -23,6 +23,13 @@
 
 struct lpfc_sli2_slim;
 
+#define LPFC_PCI_DEV_LP		0x1
+#define LPFC_PCI_DEV_OC		0x2
+
+#define LPFC_SLI_REV2		2
+#define LPFC_SLI_REV3		3
+#define LPFC_SLI_REV4		4
+
 #define LPFC_MAX_TARGET		4096	/* max number of targets supported */
 #define LPFC_MAX_DISC_THREADS	64	/* max outstanding discovery els
 					   requests */
@@ -98,9 +105,11 @@
 };
 
 struct hbq_dmabuf {
+	struct lpfc_dmabuf hbuf;
 	struct lpfc_dmabuf dbuf;
 	uint32_t size;
 	uint32_t tag;
+	struct lpfc_rcqe rcqe;
 };
 
 /* Priority bit.  Set value to exceed low water mark in lpfc_mem. */
@@ -134,7 +143,10 @@
 	} rev;
 	struct {
 #ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t rsvd2  :24;  /* Reserved                             */
+		uint32_t rsvd3  :19;  /* Reserved                             */
+		uint32_t cdss	: 1;  /* Configure Data Security SLI          */
+		uint32_t rsvd2	: 3;  /* Reserved                             */
+		uint32_t cbg	: 1;  /* Configure BlockGuard                 */
 		uint32_t cmv	: 1;  /* Configure Max VPIs                   */
 		uint32_t ccrp   : 1;  /* Config Command Ring Polling          */
 		uint32_t csah   : 1;  /* Configure Synchronous Abort Handling */
@@ -152,7 +164,10 @@
 		uint32_t csah   : 1;  /* Configure Synchronous Abort Handling */
 		uint32_t ccrp   : 1;  /* Config Command Ring Polling          */
 		uint32_t cmv	: 1;  /* Configure Max VPIs                   */
-		uint32_t rsvd2  :24;  /* Reserved                             */
+		uint32_t cbg	: 1;  /* Configure BlockGuard                 */
+		uint32_t rsvd2	: 3;  /* Reserved                             */
+		uint32_t cdss	: 1;  /* Configure Data Security SLI          */
+		uint32_t rsvd3  :19;  /* Reserved                             */
 #endif
 	} sli3Feat;
 } lpfc_vpd_t;
@@ -264,8 +279,8 @@
 };
 
 struct lpfc_vport {
-	struct list_head listentry;
 	struct lpfc_hba *phba;
+	struct list_head listentry;
 	uint8_t port_type;
 #define LPFC_PHYSICAL_PORT 1
 #define LPFC_NPIV_PORT  2
@@ -273,6 +288,9 @@
 	enum discovery_state port_state;
 
 	uint16_t vpi;
+	uint16_t vfi;
+	uint8_t vfi_state;
+#define LPFC_VFI_REGISTERED	0x1
 
 	uint32_t fc_flag;	/* FC flags */
 /* Several of these flags are HBA centric and should be moved to
@@ -385,6 +403,9 @@
 #endif
 	uint8_t stat_data_enabled;
 	uint8_t stat_data_blocked;
+	struct list_head rcv_buffer_list;
+	uint32_t vport_flag;
+#define STATIC_VPORT	1
 };
 
 struct hbq_s {
@@ -420,8 +441,66 @@
 };
 
 struct lpfc_hba {
+	/* SCSI interface function jump table entries */
+	int (*lpfc_new_scsi_buf)
+		(struct lpfc_vport *, int);
+	struct lpfc_scsi_buf * (*lpfc_get_scsi_buf)
+		(struct lpfc_hba *);
+	int (*lpfc_scsi_prep_dma_buf)
+		(struct lpfc_hba *, struct lpfc_scsi_buf *);
+	void (*lpfc_scsi_unprep_dma_buf)
+		(struct lpfc_hba *, struct lpfc_scsi_buf *);
+	void (*lpfc_release_scsi_buf)
+		(struct lpfc_hba *, struct lpfc_scsi_buf *);
+	void (*lpfc_rampdown_queue_depth)
+		(struct lpfc_hba *);
+	void (*lpfc_scsi_prep_cmnd)
+		(struct lpfc_vport *, struct lpfc_scsi_buf *,
+		 struct lpfc_nodelist *);
+	int (*lpfc_scsi_prep_task_mgmt_cmd)
+		(struct lpfc_vport *, struct lpfc_scsi_buf *,
+		 unsigned int, uint8_t);
+
+	/* IOCB interface function jump table entries */
+	int (*__lpfc_sli_issue_iocb)
+		(struct lpfc_hba *, uint32_t,
+		 struct lpfc_iocbq *, uint32_t);
+	void (*__lpfc_sli_release_iocbq)(struct lpfc_hba *,
+			 struct lpfc_iocbq *);
+	int (*lpfc_hba_down_post)(struct lpfc_hba *phba);
+
+
+	IOCB_t * (*lpfc_get_iocb_from_iocbq)
+		(struct lpfc_iocbq *);
+	void (*lpfc_scsi_cmd_iocb_cmpl)
+		(struct lpfc_hba *, struct lpfc_iocbq *, struct lpfc_iocbq *);
+
+	/* MBOX interface function jump table entries */
+	int (*lpfc_sli_issue_mbox)
+		(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t);
+	/* Slow-path IOCB process function jump table entries */
+	void (*lpfc_sli_handle_slow_ring_event)
+		(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+		 uint32_t mask);
+	/* INIT device interface function jump table entries */
+	int (*lpfc_sli_hbq_to_firmware)
+		(struct lpfc_hba *, uint32_t, struct hbq_dmabuf *);
+	int (*lpfc_sli_brdrestart)
+		(struct lpfc_hba *);
+	int (*lpfc_sli_brdready)
+		(struct lpfc_hba *, uint32_t);
+	void (*lpfc_handle_eratt)
+		(struct lpfc_hba *);
+	void (*lpfc_stop_port)
+		(struct lpfc_hba *);
+
+
+	/* SLI4 specific HBA data structure */
+	struct lpfc_sli4_hba sli4_hba;
+
 	struct lpfc_sli sli;
-	uint32_t sli_rev;		/* SLI2 or SLI3 */
+	uint8_t pci_dev_grp;	/* lpfc PCI dev group: 0x0, 0x1, 0x2,... */
+	uint32_t sli_rev;		/* SLI2, SLI3, or SLI4 */
 	uint32_t sli3_options;		/* Mask of enabled SLI3 options */
 #define LPFC_SLI3_HBQ_ENABLED		0x01
 #define LPFC_SLI3_NPIV_ENABLED		0x02
@@ -429,6 +508,7 @@
 #define LPFC_SLI3_CRP_ENABLED		0x08
 #define LPFC_SLI3_INB_ENABLED		0x10
 #define LPFC_SLI3_BG_ENABLED		0x20
+#define LPFC_SLI3_DSS_ENABLED		0x40
 	uint32_t iocb_cmd_size;
 	uint32_t iocb_rsp_size;
 
@@ -442,8 +522,13 @@
 
 	uint32_t hba_flag;	/* hba generic flags */
 #define HBA_ERATT_HANDLED	0x1 /* This flag is set when eratt handled */
-
-#define DEFER_ERATT		0x4 /* Deferred error attention in progress */
+#define DEFER_ERATT		0x2 /* Deferred error attention in progress */
+#define HBA_FCOE_SUPPORT	0x4 /* HBA function supports FCOE */
+#define HBA_RECEIVE_BUFFER	0x8 /* Rcv buffer posted to worker thread */
+#define HBA_POST_RECEIVE_BUFFER 0x10 /* Rcv buffers need to be posted */
+#define FCP_XRI_ABORT_EVENT	0x20
+#define ELS_XRI_ABORT_EVENT	0x40
+#define ASYNC_EVENT		0x80
 	struct lpfc_dmabuf slim2p;
 
 	MAILBOX_t *mbox;
@@ -502,6 +587,9 @@
 	uint32_t cfg_poll;
 	uint32_t cfg_poll_tmo;
 	uint32_t cfg_use_msi;
+	uint32_t cfg_fcp_imax;
+	uint32_t cfg_fcp_wq_count;
+	uint32_t cfg_fcp_eq_count;
 	uint32_t cfg_sg_seg_cnt;
 	uint32_t cfg_prot_sg_seg_cnt;
 	uint32_t cfg_sg_dma_buf_size;
@@ -511,6 +599,8 @@
 	uint32_t cfg_enable_hba_reset;
 	uint32_t cfg_enable_hba_heartbeat;
 	uint32_t cfg_enable_bg;
+	uint32_t cfg_enable_fip;
+	uint32_t cfg_log_verbose;
 
 	lpfc_vpd_t vpd;		/* vital product data */
 
@@ -526,11 +616,12 @@
 	unsigned long data_flags;
 
 	uint32_t hbq_in_use;		/* HBQs in use flag */
-	struct list_head hbqbuf_in_list;  /* in-fly hbq buffer list */
+	struct list_head rb_pend_list;  /* Received buffers to be processed */
 	uint32_t hbq_count;	        /* Count of configured HBQs */
 	struct hbq_s hbqs[LPFC_MAX_HBQS]; /* local copy of hbq indicies  */
 
 	unsigned long pci_bar0_map;     /* Physical address for PCI BAR0 */
+	unsigned long pci_bar1_map;     /* Physical address for PCI BAR1 */
 	unsigned long pci_bar2_map;     /* Physical address for PCI BAR2 */
 	void __iomem *slim_memmap_p;	/* Kernel memory mapped address for
 					   PCI BAR0 */
@@ -593,7 +684,8 @@
 	/* pci_mem_pools */
 	struct pci_pool *lpfc_scsi_dma_buf_pool;
 	struct pci_pool *lpfc_mbuf_pool;
-	struct pci_pool *lpfc_hbq_pool;
+	struct pci_pool *lpfc_hrb_pool;	/* header receive buffer pool */
+	struct pci_pool *lpfc_drb_pool; /* data receive buffer pool */
 	struct lpfc_dma_pool lpfc_mbuf_safety_pool;
 
 	mempool_t *mbox_mem_pool;
@@ -609,6 +701,14 @@
 	struct lpfc_vport *pport;	/* physical lpfc_vport pointer */
 	uint16_t max_vpi;		/* Maximum virtual nports */
 #define LPFC_MAX_VPI 0xFFFF		/* Max number of VPI supported */
+	uint16_t max_vports;            /*
+					 * For IOV HBAs max_vpi can change
+					 * after a reset. max_vports is max
+					 * number of vports present. This can
+					 * be greater than max_vpi.
+					 */
+	uint16_t vpi_base;
+	uint16_t vfi_base;
 	unsigned long *vpi_bmask;	/* vpi allocation table */
 
 	/* Data structure used by fabric iocb scheduler */
@@ -667,6 +767,11 @@
 /* Maximum number of events that can be outstanding at any time*/
 #define LPFC_MAX_EVT_COUNT 512
 	atomic_t fast_event_count;
+	struct lpfc_fcf fcf;
+	uint8_t fc_map[3];
+	uint8_t valid_vlan;
+	uint16_t vlan_id;
+	struct list_head fcf_conn_rec_list;
 };
 
 static inline struct Scsi_Host *
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index c14f0cb..d73e677 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -30,8 +30,10 @@
 #include <scsi/scsi_tcq.h>
 #include <scsi/scsi_transport_fc.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -505,12 +507,14 @@
 		return -ENOMEM;
 
 	memset((void *)pmboxq, 0, sizeof (LPFC_MBOXQ_t));
-	pmboxq->mb.mbxCommand = MBX_DOWN_LINK;
-	pmboxq->mb.mbxOwner = OWN_HOST;
+	pmboxq->u.mb.mbxCommand = MBX_DOWN_LINK;
+	pmboxq->u.mb.mbxOwner = OWN_HOST;
 
 	mbxstatus = lpfc_sli_issue_mbox_wait(phba, pmboxq, LPFC_MBOX_TMO * 2);
 
-	if ((mbxstatus == MBX_SUCCESS) && (pmboxq->mb.mbxStatus == 0)) {
+	if ((mbxstatus == MBX_SUCCESS) &&
+	    (pmboxq->u.mb.mbxStatus == 0 ||
+	     pmboxq->u.mb.mbxStatus == MBXERR_LINK_DOWN)) {
 		memset((void *)pmboxq, 0, sizeof (LPFC_MBOXQ_t));
 		lpfc_init_link(phba, pmboxq, phba->cfg_topology,
 			       phba->cfg_link_speed);
@@ -789,7 +793,8 @@
 		  uint32_t *mrpi, uint32_t *arpi,
 		  uint32_t *mvpi, uint32_t *avpi)
 {
-	struct lpfc_sli   *psli = &phba->sli;
+	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_mbx_read_config *rd_config;
 	LPFC_MBOXQ_t *pmboxq;
 	MAILBOX_t *pmb;
 	int rc = 0;
@@ -800,7 +805,7 @@
 	 */
 	if (phba->link_state < LPFC_LINK_DOWN ||
 	    !phba->mbox_mem_pool ||
-	    (phba->sli.sli_flag & LPFC_SLI2_ACTIVE) == 0)
+	    (phba->sli.sli_flag & LPFC_SLI_ACTIVE) == 0)
 		return 0;
 
 	if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO)
@@ -811,13 +816,13 @@
 		return 0;
 	memset(pmboxq, 0, sizeof (LPFC_MBOXQ_t));
 
-	pmb = &pmboxq->mb;
+	pmb = &pmboxq->u.mb;
 	pmb->mbxCommand = MBX_READ_CONFIG;
 	pmb->mbxOwner = OWN_HOST;
 	pmboxq->context1 = NULL;
 
 	if ((phba->pport->fc_flag & FC_OFFLINE_MODE) ||
-		(!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+		(!(psli->sli_flag & LPFC_SLI_ACTIVE)))
 		rc = MBX_NOT_FINISHED;
 	else
 		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
@@ -828,18 +833,37 @@
 		return 0;
 	}
 
-	if (mrpi)
-		*mrpi = pmb->un.varRdConfig.max_rpi;
-	if (arpi)
-		*arpi = pmb->un.varRdConfig.avail_rpi;
-	if (mxri)
-		*mxri = pmb->un.varRdConfig.max_xri;
-	if (axri)
-		*axri = pmb->un.varRdConfig.avail_xri;
-	if (mvpi)
-		*mvpi = pmb->un.varRdConfig.max_vpi;
-	if (avpi)
-		*avpi = pmb->un.varRdConfig.avail_vpi;
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		rd_config = &pmboxq->u.mqe.un.rd_config;
+		if (mrpi)
+			*mrpi = bf_get(lpfc_mbx_rd_conf_rpi_count, rd_config);
+		if (arpi)
+			*arpi = bf_get(lpfc_mbx_rd_conf_rpi_count, rd_config) -
+					phba->sli4_hba.max_cfg_param.rpi_used;
+		if (mxri)
+			*mxri = bf_get(lpfc_mbx_rd_conf_xri_count, rd_config);
+		if (axri)
+			*axri = bf_get(lpfc_mbx_rd_conf_xri_count, rd_config) -
+					phba->sli4_hba.max_cfg_param.xri_used;
+		if (mvpi)
+			*mvpi = bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config);
+		if (avpi)
+			*avpi = bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config) -
+					phba->sli4_hba.max_cfg_param.vpi_used;
+	} else {
+		if (mrpi)
+			*mrpi = pmb->un.varRdConfig.max_rpi;
+		if (arpi)
+			*arpi = pmb->un.varRdConfig.avail_rpi;
+		if (mxri)
+			*mxri = pmb->un.varRdConfig.max_xri;
+		if (axri)
+			*axri = pmb->un.varRdConfig.avail_xri;
+		if (mvpi)
+			*mvpi = pmb->un.varRdConfig.max_vpi;
+		if (avpi)
+			*avpi = pmb->un.varRdConfig.avail_vpi;
+	}
 
 	mempool_free(pmboxq, phba->mbox_mem_pool);
 	return 1;
@@ -2021,22 +2045,9 @@
 # lpfc_log_verbose: Only turn this flag on if you are willing to risk being
 # deluged with LOTS of information.
 # You can set a bit mask to record specific types of verbose messages:
-#
-# LOG_ELS                       0x1        ELS events
-# LOG_DISCOVERY                 0x2        Link discovery events
-# LOG_MBOX                      0x4        Mailbox events
-# LOG_INIT                      0x8        Initialization events
-# LOG_LINK_EVENT                0x10       Link events
-# LOG_FCP                       0x40       FCP traffic history
-# LOG_NODE                      0x80       Node table events
-# LOG_BG                        0x200      BlockBuard events
-# LOG_MISC                      0x400      Miscellaneous events
-# LOG_SLI                       0x800      SLI events
-# LOG_FCP_ERROR                 0x1000     Only log FCP errors
-# LOG_LIBDFC                    0x2000     LIBDFC events
-# LOG_ALL_MSG                   0xffff     LOG all messages
+# See lpfc_logmsh.h for definitions.
 */
-LPFC_VPORT_ATTR_HEX_RW(log_verbose, 0x0, 0x0, 0xffff,
+LPFC_VPORT_ATTR_HEX_RW(log_verbose, 0x0, 0x0, 0xffffffff,
 		       "Verbose logging bit-mask");
 
 /*
@@ -2266,6 +2277,36 @@
 static DEVICE_ATTR(lpfc_topology, S_IRUGO | S_IWUSR,
 		lpfc_topology_show, lpfc_topology_store);
 
+/**
+ * lpfc_static_vport_show: Read callback function for
+ *   lpfc_static_vport sysfs file.
+ * @dev: Pointer to class device object.
+ * @attr: device attribute structure.
+ * @buf: Data buffer.
+ *
+ * This function is the read call back function for
+ * lpfc_static_vport sysfs file. The lpfc_static_vport
+ * sysfs file report the mageability of the vport.
+ **/
+static ssize_t
+lpfc_static_vport_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct Scsi_Host  *shost = class_to_shost(dev);
+	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
+	if (vport->vport_flag & STATIC_VPORT)
+		sprintf(buf, "1\n");
+	else
+		sprintf(buf, "0\n");
+
+	return strlen(buf);
+}
+
+/*
+ * Sysfs attribute to control the statistical data collection.
+ */
+static DEVICE_ATTR(lpfc_static_vport, S_IRUGO,
+		   lpfc_static_vport_show, NULL);
 
 /**
  * lpfc_stat_data_ctrl_store - write call back for lpfc_stat_data_ctrl sysfs file
@@ -2341,7 +2382,7 @@
 		if (vports == NULL)
 			return -ENOMEM;
 
-		for (i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			v_shost = lpfc_shost_from_vport(vports[i]);
 			spin_lock_irq(v_shost->host_lock);
 			/* Block and reset data collection */
@@ -2356,7 +2397,7 @@
 		phba->bucket_base = base;
 		phba->bucket_step = step;
 
-		for (i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			v_shost = lpfc_shost_from_vport(vports[i]);
 
 			/* Unblock data collection */
@@ -2373,7 +2414,7 @@
 		if (vports == NULL)
 			return -ENOMEM;
 
-		for (i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			v_shost = lpfc_shost_from_vport(vports[i]);
 			spin_lock_irq(shost->host_lock);
 			vports[i]->stat_data_blocked = 1;
@@ -2844,15 +2885,39 @@
 /*
 # lpfc_use_msi: Use MSI (Message Signaled Interrupts) in systems that
 #		support this feature
-#       0  = MSI disabled
+#       0  = MSI disabled (default)
 #       1  = MSI enabled
-#       2  = MSI-X enabled (default)
-# Value range is [0,2]. Default value is 2.
+#       2  = MSI-X enabled
+# Value range is [0,2]. Default value is 0.
 */
-LPFC_ATTR_R(use_msi, 2, 0, 2, "Use Message Signaled Interrupts (1) or "
+LPFC_ATTR_R(use_msi, 0, 0, 2, "Use Message Signaled Interrupts (1) or "
 	    "MSI-X (2), if possible");
 
 /*
+# lpfc_fcp_imax: Set the maximum number of fast-path FCP interrupts per second
+#
+# Value range is [636,651042]. Default value is 10000.
+*/
+LPFC_ATTR_R(fcp_imax, LPFC_FP_DEF_IMAX, LPFC_MIM_IMAX, LPFC_DMULT_CONST,
+	    "Set the maximum number of fast-path FCP interrupts per second");
+
+/*
+# lpfc_fcp_wq_count: Set the number of fast-path FCP work queues
+#
+# Value range is [1,31]. Default value is 4.
+*/
+LPFC_ATTR_R(fcp_wq_count, LPFC_FP_WQN_DEF, LPFC_FP_WQN_MIN, LPFC_FP_WQN_MAX,
+	    "Set the number of fast-path FCP work queues, if possible");
+
+/*
+# lpfc_fcp_eq_count: Set the number of fast-path FCP event queues
+#
+# Value range is [1,7]. Default value is 1.
+*/
+LPFC_ATTR_R(fcp_eq_count, LPFC_FP_EQN_DEF, LPFC_FP_EQN_MIN, LPFC_FP_EQN_MAX,
+	    "Set the number of fast-path FCP event queues, if possible");
+
+/*
 # lpfc_enable_hba_reset: Allow or prevent HBA resets to the hardware.
 #       0  = HBA resets disabled
 #       1  = HBA resets enabled (default)
@@ -2876,6 +2941,14 @@
 */
 LPFC_ATTR_R(enable_bg, 0, 0, 1, "Enable BlockGuard Support");
 
+/*
+# lpfc_enable_fip: When set, FIP is required to start discovery. If not
+# set, the driver will add an FCF record manually if the port has no
+# FCF records available and start discovery.
+# Value range is [0,1]. Default value is 1 (enabled)
+*/
+LPFC_ATTR_RW(enable_fip, 0, 0, 1, "Enable FIP Discovery");
+
 
 /*
 # lpfc_prot_mask: i
@@ -2942,6 +3015,7 @@
 	&dev_attr_lpfc_peer_port_login,
 	&dev_attr_lpfc_nodev_tmo,
 	&dev_attr_lpfc_devloss_tmo,
+	&dev_attr_lpfc_enable_fip,
 	&dev_attr_lpfc_fcp_class,
 	&dev_attr_lpfc_use_adisc,
 	&dev_attr_lpfc_ack0,
@@ -2969,6 +3043,9 @@
 	&dev_attr_lpfc_poll,
 	&dev_attr_lpfc_poll_tmo,
 	&dev_attr_lpfc_use_msi,
+	&dev_attr_lpfc_fcp_imax,
+	&dev_attr_lpfc_fcp_wq_count,
+	&dev_attr_lpfc_fcp_eq_count,
 	&dev_attr_lpfc_enable_bg,
 	&dev_attr_lpfc_soft_wwnn,
 	&dev_attr_lpfc_soft_wwpn,
@@ -2991,6 +3068,7 @@
 	&dev_attr_lpfc_lun_queue_depth,
 	&dev_attr_lpfc_nodev_tmo,
 	&dev_attr_lpfc_devloss_tmo,
+	&dev_attr_lpfc_enable_fip,
 	&dev_attr_lpfc_hba_queue_depth,
 	&dev_attr_lpfc_peer_port_login,
 	&dev_attr_lpfc_restrict_login,
@@ -3003,6 +3081,7 @@
 	&dev_attr_lpfc_enable_da_id,
 	&dev_attr_lpfc_max_scsicmpl_time,
 	&dev_attr_lpfc_stat_data_ctrl,
+	&dev_attr_lpfc_static_vport,
 	NULL,
 };
 
@@ -3199,7 +3278,7 @@
 		}
 	}
 
-	memcpy((uint8_t *) & phba->sysfs_mbox.mbox->mb + off,
+	memcpy((uint8_t *) &phba->sysfs_mbox.mbox->u.mb + off,
 	       buf, count);
 
 	phba->sysfs_mbox.offset = off + count;
@@ -3241,6 +3320,7 @@
 	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
 	struct lpfc_hba   *phba = vport->phba;
 	int rc;
+	MAILBOX_t *pmb;
 
 	if (off > MAILBOX_CMD_SIZE)
 		return -ERANGE;
@@ -3265,8 +3345,8 @@
 	if (off == 0 &&
 	    phba->sysfs_mbox.state  == SMBOX_WRITING &&
 	    phba->sysfs_mbox.offset >= 2 * sizeof(uint32_t)) {
-
-		switch (phba->sysfs_mbox.mbox->mb.mbxCommand) {
+		pmb = &phba->sysfs_mbox.mbox->u.mb;
+		switch (pmb->mbxCommand) {
 			/* Offline only */
 		case MBX_INIT_LINK:
 		case MBX_DOWN_LINK:
@@ -3283,7 +3363,7 @@
 			if (!(vport->fc_flag & FC_OFFLINE_MODE)) {
 				printk(KERN_WARNING "mbox_read:Command 0x%x "
 				       "is illegal in on-line state\n",
-				       phba->sysfs_mbox.mbox->mb.mbxCommand);
+				       pmb->mbxCommand);
 				sysfs_mbox_idle(phba);
 				spin_unlock_irq(&phba->hbalock);
 				return -EPERM;
@@ -3319,13 +3399,13 @@
 		case MBX_CONFIG_PORT:
 		case MBX_RUN_BIU_DIAG:
 			printk(KERN_WARNING "mbox_read: Illegal Command 0x%x\n",
-			       phba->sysfs_mbox.mbox->mb.mbxCommand);
+			       pmb->mbxCommand);
 			sysfs_mbox_idle(phba);
 			spin_unlock_irq(&phba->hbalock);
 			return -EPERM;
 		default:
 			printk(KERN_WARNING "mbox_read: Unknown Command 0x%x\n",
-			       phba->sysfs_mbox.mbox->mb.mbxCommand);
+			       pmb->mbxCommand);
 			sysfs_mbox_idle(phba);
 			spin_unlock_irq(&phba->hbalock);
 			return -EPERM;
@@ -3335,14 +3415,14 @@
 		 * or RESTART mailbox commands until the HBA is restarted.
 		 */
 		if (phba->pport->stopped &&
-		    phba->sysfs_mbox.mbox->mb.mbxCommand != MBX_DUMP_MEMORY &&
-		    phba->sysfs_mbox.mbox->mb.mbxCommand != MBX_RESTART &&
-		    phba->sysfs_mbox.mbox->mb.mbxCommand != MBX_WRITE_VPARMS &&
-		    phba->sysfs_mbox.mbox->mb.mbxCommand != MBX_WRITE_WWN)
+		    pmb->mbxCommand != MBX_DUMP_MEMORY &&
+		    pmb->mbxCommand != MBX_RESTART &&
+		    pmb->mbxCommand != MBX_WRITE_VPARMS &&
+		    pmb->mbxCommand != MBX_WRITE_WWN)
 			lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX,
 					"1259 mbox: Issued mailbox cmd "
 					"0x%x while in stopped state.\n",
-					phba->sysfs_mbox.mbox->mb.mbxCommand);
+					pmb->mbxCommand);
 
 		phba->sysfs_mbox.mbox->vport = vport;
 
@@ -3356,7 +3436,7 @@
 		}
 
 		if ((vport->fc_flag & FC_OFFLINE_MODE) ||
-		    (!(phba->sli.sli_flag & LPFC_SLI2_ACTIVE))){
+		    (!(phba->sli.sli_flag & LPFC_SLI_ACTIVE))) {
 
 			spin_unlock_irq(&phba->hbalock);
 			rc = lpfc_sli_issue_mbox (phba,
@@ -3368,8 +3448,7 @@
 			spin_unlock_irq(&phba->hbalock);
 			rc = lpfc_sli_issue_mbox_wait (phba,
 						       phba->sysfs_mbox.mbox,
-				lpfc_mbox_tmo_val(phba,
-				    phba->sysfs_mbox.mbox->mb.mbxCommand) * HZ);
+				lpfc_mbox_tmo_val(phba, pmb->mbxCommand) * HZ);
 			spin_lock_irq(&phba->hbalock);
 		}
 
@@ -3391,7 +3470,7 @@
 		return -EAGAIN;
 	}
 
-	memcpy(buf, (uint8_t *) & phba->sysfs_mbox.mbox->mb + off, count);
+	memcpy(buf, (uint8_t *) &pmb + off, count);
 
 	phba->sysfs_mbox.offset = off + count;
 
@@ -3585,6 +3664,9 @@
 			case LA_8GHZ_LINK:
 				fc_host_speed(shost) = FC_PORTSPEED_8GBIT;
 			break;
+			case LA_10GHZ_LINK:
+				fc_host_speed(shost) = FC_PORTSPEED_10GBIT;
+			break;
 			default:
 				fc_host_speed(shost) = FC_PORTSPEED_UNKNOWN;
 			break;
@@ -3652,7 +3734,7 @@
 	 */
 	if (phba->link_state < LPFC_LINK_DOWN ||
 	    !phba->mbox_mem_pool ||
-	    (phba->sli.sli_flag & LPFC_SLI2_ACTIVE) == 0)
+	    (phba->sli.sli_flag & LPFC_SLI_ACTIVE) == 0)
 		return NULL;
 
 	if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO)
@@ -3663,14 +3745,14 @@
 		return NULL;
 	memset(pmboxq, 0, sizeof (LPFC_MBOXQ_t));
 
-	pmb = &pmboxq->mb;
+	pmb = &pmboxq->u.mb;
 	pmb->mbxCommand = MBX_READ_STATUS;
 	pmb->mbxOwner = OWN_HOST;
 	pmboxq->context1 = NULL;
 	pmboxq->vport = vport;
 
 	if ((vport->fc_flag & FC_OFFLINE_MODE) ||
-		(!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+		(!(psli->sli_flag & LPFC_SLI_ACTIVE)))
 		rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
 	else
 		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
@@ -3695,7 +3777,7 @@
 	pmboxq->vport = vport;
 
 	if ((vport->fc_flag & FC_OFFLINE_MODE) ||
-	    (!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+	    (!(psli->sli_flag & LPFC_SLI_ACTIVE)))
 		rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
 	else
 		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
@@ -3769,7 +3851,7 @@
 		return;
 	memset(pmboxq, 0, sizeof(LPFC_MBOXQ_t));
 
-	pmb = &pmboxq->mb;
+	pmb = &pmboxq->u.mb;
 	pmb->mbxCommand = MBX_READ_STATUS;
 	pmb->mbxOwner = OWN_HOST;
 	pmb->un.varWords[0] = 0x1; /* reset request */
@@ -3777,7 +3859,7 @@
 	pmboxq->vport = vport;
 
 	if ((vport->fc_flag & FC_OFFLINE_MODE) ||
-		(!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+		(!(psli->sli_flag & LPFC_SLI_ACTIVE)))
 		rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
 	else
 		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
@@ -3795,7 +3877,7 @@
 	pmboxq->vport = vport;
 
 	if ((vport->fc_flag & FC_OFFLINE_MODE) ||
-	    (!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+	    (!(psli->sli_flag & LPFC_SLI_ACTIVE)))
 		rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
 	else
 		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
@@ -3962,6 +4044,21 @@
 		lpfc_ns_cmd(vport, SLI_CTNS_RSPN_ID, 0, 0);
 }
 
+/**
+ * lpfc_hba_log_verbose_init - Set hba's log verbose level
+ * @phba: Pointer to lpfc_hba struct.
+ *
+ * This function is called by the lpfc_get_cfgparam() routine to set the
+ * module lpfc_log_verbose into the @phba cfg_log_verbose for use with
+ * log messsage according to the module's lpfc_log_verbose parameter setting
+ * before hba port or vport created.
+ **/
+static void
+lpfc_hba_log_verbose_init(struct lpfc_hba *phba, uint32_t verbose)
+{
+	phba->cfg_log_verbose = verbose;
+}
+
 struct fc_function_template lpfc_transport_functions = {
 	/* fixed attributes the driver supports */
 	.show_host_node_name = 1,
@@ -4105,6 +4202,9 @@
 	lpfc_poll_tmo_init(phba, lpfc_poll_tmo);
 	lpfc_enable_npiv_init(phba, lpfc_enable_npiv);
 	lpfc_use_msi_init(phba, lpfc_use_msi);
+	lpfc_fcp_imax_init(phba, lpfc_fcp_imax);
+	lpfc_fcp_wq_count_init(phba, lpfc_fcp_wq_count);
+	lpfc_fcp_eq_count_init(phba, lpfc_fcp_eq_count);
 	lpfc_enable_hba_reset_init(phba, lpfc_enable_hba_reset);
 	lpfc_enable_hba_heartbeat_init(phba, lpfc_enable_hba_heartbeat);
 	lpfc_enable_bg_init(phba, lpfc_enable_bg);
@@ -4113,26 +4213,10 @@
 	phba->cfg_soft_wwpn = 0L;
 	lpfc_sg_seg_cnt_init(phba, lpfc_sg_seg_cnt);
 	lpfc_prot_sg_seg_cnt_init(phba, lpfc_prot_sg_seg_cnt);
-	/*
-	 * Since the sg_tablesize is module parameter, the sg_dma_buf_size
-	 * used to create the sg_dma_buf_pool must be dynamically calculated.
-	 * 2 segments are added since the IOCB needs a command and response bde.
-	 */
-	phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
-			sizeof(struct fcp_rsp) +
-			((phba->cfg_sg_seg_cnt + 2) * sizeof(struct ulp_bde64));
-
-	if (phba->cfg_enable_bg) {
-		phba->cfg_sg_seg_cnt = LPFC_MAX_SG_SEG_CNT;
-		phba->cfg_sg_dma_buf_size +=
-			phba->cfg_prot_sg_seg_cnt * sizeof(struct ulp_bde64);
-	}
-
-	/* Also reinitialize the host templates with new values. */
-	lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
-	lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
-
 	lpfc_hba_queue_depth_init(phba, lpfc_hba_queue_depth);
+	lpfc_enable_fip_init(phba, lpfc_enable_fip);
+	lpfc_hba_log_verbose_init(phba, lpfc_log_verbose);
+
 	return;
 }
 
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index f88ce3f..d2a9229 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -23,6 +23,8 @@
 struct fc_rport;
 void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t);
 void lpfc_dump_wakeup_param(struct lpfc_hba *, LPFC_MBOXQ_t *);
+void lpfc_dump_static_vport(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t);
+int lpfc_dump_fcoe_param(struct lpfc_hba *, struct lpfcMboxq *);
 void lpfc_read_nv(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_config_async(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t);
 
@@ -35,17 +37,19 @@
 int lpfc_read_sparam(struct lpfc_hba *, LPFC_MBOXQ_t *, int);
 void lpfc_read_config(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_read_lnk_stat(struct lpfc_hba *, LPFC_MBOXQ_t *);
-int lpfc_reg_login(struct lpfc_hba *, uint16_t, uint32_t, uint8_t *,
-		   LPFC_MBOXQ_t *, uint32_t);
+int lpfc_reg_rpi(struct lpfc_hba *, uint16_t, uint32_t, uint8_t *,
+		 LPFC_MBOXQ_t *, uint32_t);
 void lpfc_unreg_login(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *);
 void lpfc_unreg_did(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *);
-void lpfc_reg_vpi(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *);
+void lpfc_reg_vpi(struct lpfc_vport *, LPFC_MBOXQ_t *);
 void lpfc_unreg_vpi(struct lpfc_hba *, uint16_t, LPFC_MBOXQ_t *);
 void lpfc_init_link(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t, uint32_t);
+void lpfc_request_features(struct lpfc_hba *, struct lpfcMboxq *);
 
 struct lpfc_vport *lpfc_find_vport_by_did(struct lpfc_hba *, uint32_t);
 void lpfc_cleanup_rpis(struct lpfc_vport *, int);
 int lpfc_linkdown(struct lpfc_hba *);
+void lpfc_linkdown_port(struct lpfc_vport *);
 void lpfc_port_link_failure(struct lpfc_vport *);
 void lpfc_mbx_cmpl_read_la(struct lpfc_hba *, LPFC_MBOXQ_t *);
 
@@ -54,6 +58,7 @@
 void lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
+void lpfc_mbx_cmpl_reg_vfi(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_enqueue_node(struct lpfc_vport *, struct lpfc_nodelist *);
 void lpfc_dequeue_node(struct lpfc_vport *, struct lpfc_nodelist *);
 struct lpfc_nodelist *lpfc_enable_node(struct lpfc_vport *,
@@ -105,6 +110,7 @@
 int lpfc_issue_els_logo(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t);
 int lpfc_issue_els_npiv_logo(struct lpfc_vport *, struct lpfc_nodelist *);
 int lpfc_issue_els_scr(struct lpfc_vport *, uint32_t, uint8_t);
+int lpfc_issue_fabric_reglogin(struct lpfc_vport *);
 int lpfc_els_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *);
 int lpfc_ct_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *);
 int lpfc_els_rsp_acc(struct lpfc_vport *, uint32_t, struct lpfc_iocbq *,
@@ -149,15 +155,19 @@
 void lpfc_unblock_mgmt_io(struct lpfc_hba *);
 void lpfc_offline_prep(struct lpfc_hba *);
 void lpfc_offline(struct lpfc_hba *);
+void lpfc_reset_hba(struct lpfc_hba *);
 
 int lpfc_sli_setup(struct lpfc_hba *);
 int lpfc_sli_queue_setup(struct lpfc_hba *);
 
 void lpfc_handle_eratt(struct lpfc_hba *);
 void lpfc_handle_latt(struct lpfc_hba *);
-irqreturn_t lpfc_intr_handler(int, void *);
-irqreturn_t lpfc_sp_intr_handler(int, void *);
-irqreturn_t lpfc_fp_intr_handler(int, void *);
+irqreturn_t lpfc_sli_intr_handler(int, void *);
+irqreturn_t lpfc_sli_sp_intr_handler(int, void *);
+irqreturn_t lpfc_sli_fp_intr_handler(int, void *);
+irqreturn_t lpfc_sli4_intr_handler(int, void *);
+irqreturn_t lpfc_sli4_sp_intr_handler(int, void *);
+irqreturn_t lpfc_sli4_fp_intr_handler(int, void *);
 
 void lpfc_read_rev(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_config_ring(struct lpfc_hba *, int, LPFC_MBOXQ_t *);
@@ -165,16 +175,32 @@
 void lpfc_kill_board(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_mbox_put(struct lpfc_hba *, LPFC_MBOXQ_t *);
 LPFC_MBOXQ_t *lpfc_mbox_get(struct lpfc_hba *);
+void __lpfc_mbox_cmpl_put(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_mbox_cmpl_put(struct lpfc_hba *, LPFC_MBOXQ_t *);
+int lpfc_mbox_cmd_check(struct lpfc_hba *, LPFC_MBOXQ_t *);
+int lpfc_mbox_dev_check(struct lpfc_hba *);
 int lpfc_mbox_tmo_val(struct lpfc_hba *, int);
+void lpfc_init_vfi(struct lpfcMboxq *, struct lpfc_vport *);
+void lpfc_reg_vfi(struct lpfcMboxq *, struct lpfc_vport *, dma_addr_t);
+void lpfc_init_vpi(struct lpfcMboxq *, uint16_t);
+void lpfc_unreg_vfi(struct lpfcMboxq *, uint16_t);
+void lpfc_reg_fcfi(struct lpfc_hba *, struct lpfcMboxq *);
+void lpfc_unreg_fcfi(struct lpfcMboxq *, uint16_t);
+void lpfc_resume_rpi(struct lpfcMboxq *, struct lpfc_nodelist *);
 
 void lpfc_config_hbq(struct lpfc_hba *, uint32_t, struct lpfc_hbq_init *,
 	uint32_t , LPFC_MBOXQ_t *);
 struct hbq_dmabuf *lpfc_els_hbq_alloc(struct lpfc_hba *);
 void lpfc_els_hbq_free(struct lpfc_hba *, struct hbq_dmabuf *);
+struct hbq_dmabuf *lpfc_sli4_rb_alloc(struct lpfc_hba *);
+void lpfc_sli4_rb_free(struct lpfc_hba *, struct hbq_dmabuf *);
+void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *,
+			uint16_t);
+void lpfc_unregister_unused_fcf(struct lpfc_hba *);
 
-int lpfc_mem_alloc(struct lpfc_hba *);
+int lpfc_mem_alloc(struct lpfc_hba *, int align);
 void lpfc_mem_free(struct lpfc_hba *);
+void lpfc_mem_free_all(struct lpfc_hba *);
 void lpfc_stop_vport_timers(struct lpfc_vport *);
 
 void lpfc_poll_timeout(unsigned long ptr);
@@ -186,6 +212,7 @@
 uint16_t lpfc_sli_next_iotag(struct lpfc_hba *, struct lpfc_iocbq *);
 void lpfc_sli_cancel_iocbs(struct lpfc_hba *, struct list_head *, uint32_t,
 			   uint32_t);
+void lpfc_sli_wake_mbox_wait(struct lpfc_hba *, LPFC_MBOXQ_t *);
 
 void lpfc_reset_barrier(struct lpfc_hba * phba);
 int lpfc_sli_brdready(struct lpfc_hba *, uint32_t);
@@ -198,12 +225,13 @@
 int lpfc_sli_hba_down(struct lpfc_hba *);
 int lpfc_sli_issue_mbox(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t);
 int lpfc_sli_handle_mb_event(struct lpfc_hba *);
-int lpfc_sli_flush_mbox_queue(struct lpfc_hba *);
+void lpfc_sli_mbox_sys_shutdown(struct lpfc_hba *);
 int lpfc_sli_check_eratt(struct lpfc_hba *);
-int lpfc_sli_handle_slow_ring_event(struct lpfc_hba *,
+void lpfc_sli_handle_slow_ring_event(struct lpfc_hba *,
 				    struct lpfc_sli_ring *, uint32_t);
+int lpfc_sli4_handle_received_buffer(struct lpfc_hba *);
 void lpfc_sli_def_mbox_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *);
-int lpfc_sli_issue_iocb(struct lpfc_hba *, struct lpfc_sli_ring *,
+int lpfc_sli_issue_iocb(struct lpfc_hba *, uint32_t,
 			struct lpfc_iocbq *, uint32_t);
 void lpfc_sli_pcimem_bcopy(void *, void *, uint32_t);
 void lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *);
@@ -237,7 +265,7 @@
 
 int lpfc_sli_issue_mbox_wait(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t);
 
-int lpfc_sli_issue_iocb_wait(struct lpfc_hba *, struct lpfc_sli_ring *,
+int lpfc_sli_issue_iocb_wait(struct lpfc_hba *, uint32_t,
 			     struct lpfc_iocbq *, struct lpfc_iocbq *,
 			     uint32_t);
 void lpfc_sli_abort_fcp_cmpl(struct lpfc_hba *, struct lpfc_iocbq *,
@@ -254,6 +282,12 @@
 const char* lpfc_info(struct Scsi_Host *);
 int lpfc_scan_finished(struct Scsi_Host *, unsigned long);
 
+int lpfc_init_api_table_setup(struct lpfc_hba *, uint8_t);
+int lpfc_sli_api_table_setup(struct lpfc_hba *, uint8_t);
+int lpfc_scsi_api_table_setup(struct lpfc_hba *, uint8_t);
+int lpfc_mbox_api_table_setup(struct lpfc_hba *, uint8_t);
+int lpfc_api_table_setup(struct lpfc_hba *, uint8_t);
+
 void lpfc_get_cfgparam(struct lpfc_hba *);
 void lpfc_get_vport_cfgparam(struct lpfc_vport *);
 int lpfc_alloc_sysfs_attr(struct lpfc_vport *);
@@ -314,8 +348,15 @@
 				struct lpfc_iocbq *);
 struct lpfc_fast_path_event *lpfc_alloc_fast_evt(struct lpfc_hba *);
 void lpfc_free_fast_evt(struct lpfc_hba *, struct lpfc_fast_path_event *);
+void lpfc_create_static_vport(struct lpfc_hba *);
+void lpfc_stop_hba_timers(struct lpfc_hba *);
+void lpfc_stop_port(struct lpfc_hba *);
+void lpfc_parse_fcoe_conf(struct lpfc_hba *, uint8_t *, uint32_t);
+int lpfc_parse_vpd(struct lpfc_hba *, uint8_t *, int);
+void lpfc_start_fdiscs(struct lpfc_hba *phba);
 
 #define ScsiResult(host_code, scsi_code) (((host_code) << 16) | scsi_code)
 #define HBA_EVENT_RSCN                   5
 #define HBA_EVENT_LINK_UP                2
 #define HBA_EVENT_LINK_DOWN              3
+
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 896c7b0..1dbccfd 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -32,8 +32,10 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -267,8 +269,6 @@
 	     uint32_t tmo, uint8_t retry)
 {
 	struct lpfc_hba  *phba = vport->phba;
-	struct lpfc_sli  *psli = &phba->sli;
-	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
 	IOCB_t *icmd;
 	struct lpfc_iocbq *geniocb;
 	int rc;
@@ -331,7 +331,7 @@
 	geniocb->drvrTimeout = icmd->ulpTimeout + LPFC_DRVR_TIMEOUT;
 	geniocb->vport = vport;
 	geniocb->retry = retry;
-	rc = lpfc_sli_issue_iocb(phba, pring, geniocb, 0);
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, geniocb, 0);
 
 	if (rc == IOCB_ERROR) {
 		lpfc_sli_release_iocbq(phba, geniocb);
@@ -1578,6 +1578,9 @@
 				case LA_8GHZ_LINK:
 					ae->un.PortSpeed = HBA_PORTSPEED_8GBIT;
 				break;
+				case LA_10GHZ_LINK:
+					ae->un.PortSpeed = HBA_PORTSPEED_10GBIT;
+				break;
 				default:
 					ae->un.PortSpeed =
 						HBA_PORTSPEED_UNKNOWN;
@@ -1730,7 +1733,7 @@
 	uint8_t *fwname;
 
 	if (vp->rev.rBit) {
-		if (psli->sli_flag & LPFC_SLI2_ACTIVE)
+		if (psli->sli_flag & LPFC_SLI_ACTIVE)
 			rev = vp->rev.sli2FwRev;
 		else
 			rev = vp->rev.sli1FwRev;
@@ -1756,7 +1759,7 @@
 		}
 		b4 = (rev & 0x0000000f);
 
-		if (psli->sli_flag & LPFC_SLI2_ACTIVE)
+		if (psli->sli_flag & LPFC_SLI_ACTIVE)
 			fwname = vp->rev.sli2FwName;
 		else
 			fwname = vp->rev.sli1FwName;
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 52be564..2b02b1f 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2007-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2007-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -33,8 +33,10 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -280,6 +282,8 @@
 	struct lpfc_dmabuf *d_buf;
 	struct hbq_dmabuf *hbq_buf;
 
+	if (phba->sli_rev != 3)
+		return 0;
 	cnt = LPFC_HBQINFO_SIZE;
 	spin_lock_irq(&phba->hbalock);
 
@@ -489,12 +493,15 @@
 				 pring->next_cmdidx, pring->local_getidx,
 				 pring->flag, pgpp->rspPutInx, pring->numRiocb);
 	}
-	word0 = readl(phba->HAregaddr);
-	word1 = readl(phba->CAregaddr);
-	word2 = readl(phba->HSregaddr);
-	word3 = readl(phba->HCregaddr);
-	len +=  snprintf(buf+len, size-len, "HA:%08x CA:%08x HS:%08x HC:%08x\n",
-	word0, word1, word2, word3);
+
+	if (phba->sli_rev <= LPFC_SLI_REV3) {
+		word0 = readl(phba->HAregaddr);
+		word1 = readl(phba->CAregaddr);
+		word2 = readl(phba->HSregaddr);
+		word3 = readl(phba->HCregaddr);
+		len +=  snprintf(buf+len, size-len, "HA:%08x CA:%08x HS:%08x "
+				 "HC:%08x\n", word0, word1, word2, word3);
+	}
 	spin_unlock_irq(&phba->hbalock);
 	return len;
 }
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index ffd1089..1142070 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -135,6 +135,7 @@
 #define NLP_NODEV_REMOVE   0x08000000	/* Defer removal till discovery ends */
 #define NLP_TARGET_REMOVE  0x10000000   /* Target remove in process */
 #define NLP_SC_REQ         0x20000000	/* Target requires authentication */
+#define NLP_RPI_VALID      0x80000000	/* nlp_rpi is valid */
 
 /* ndlp usage management macros */
 #define NLP_CHK_NODE_ACT(ndlp)		(((ndlp)->nlp_usg_map \
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index b8b34cf..6bdeb14 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -28,8 +28,10 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -84,7 +86,8 @@
 	uint32_t ha_copy;
 
 	if (vport->port_state >= LPFC_VPORT_READY ||
-	    phba->link_state == LPFC_LINK_DOWN)
+	    phba->link_state == LPFC_LINK_DOWN ||
+	    phba->sli_rev > LPFC_SLI_REV3)
 		return 0;
 
 	/* Read the HBA Host Attention Register */
@@ -219,7 +222,7 @@
 		icmd->un.elsreq64.myID = vport->fc_myDID;
 
 		/* For ELS_REQUEST64_CR, use the VPI by default */
-		icmd->ulpContext = vport->vpi;
+		icmd->ulpContext = vport->vpi + phba->vpi_base;
 		icmd->ulpCt_h = 0;
 		/* The CT field must be 0=INVALID_RPI for the ECHO cmd */
 		if (elscmd == ELS_CMD_ECHO)
@@ -305,7 +308,7 @@
  *   0 - successfully issued fabric registration login for @vport
  *   -ENXIO -- failed to issue fabric registration login for @vport
  **/
-static int
+int
 lpfc_issue_fabric_reglogin(struct lpfc_vport *vport)
 {
 	struct lpfc_hba  *phba = vport->phba;
@@ -345,8 +348,7 @@
 		err = 4;
 		goto fail;
 	}
-	rc = lpfc_reg_login(phba, vport->vpi, Fabric_DID, (uint8_t *)sp, mbox,
-			    0);
+	rc = lpfc_reg_rpi(phba, vport->vpi, Fabric_DID, (uint8_t *)sp, mbox, 0);
 	if (rc) {
 		err = 5;
 		goto fail_free_mbox;
@@ -386,6 +388,75 @@
 }
 
 /**
+ * lpfc_issue_reg_vfi - Register VFI for this vport's fabric login
+ * @vport: pointer to a host virtual N_Port data structure.
+ *
+ * This routine issues a REG_VFI mailbox for the vfi, vpi, fcfi triplet for
+ * the @vport. This mailbox command is necessary for FCoE only.
+ *
+ * Return code
+ *   0 - successfully issued REG_VFI for @vport
+ *   A failure code otherwise.
+ **/
+static int
+lpfc_issue_reg_vfi(struct lpfc_vport *vport)
+{
+	struct lpfc_hba  *phba = vport->phba;
+	LPFC_MBOXQ_t *mboxq;
+	struct lpfc_nodelist *ndlp;
+	struct serv_parm *sp;
+	struct lpfc_dmabuf *dmabuf;
+	int rc = 0;
+
+	sp = &phba->fc_fabparam;
+	ndlp = lpfc_findnode_did(vport, Fabric_DID);
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
+		rc = -ENODEV;
+		goto fail;
+	}
+
+	dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (!dmabuf) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+	dmabuf->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &dmabuf->phys);
+	if (!dmabuf->virt) {
+		rc = -ENOMEM;
+		goto fail_free_dmabuf;
+	}
+	mboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		rc = -ENOMEM;
+		goto fail_free_coherent;
+	}
+	vport->port_state = LPFC_FABRIC_CFG_LINK;
+	memcpy(dmabuf->virt, &phba->fc_fabparam, sizeof(vport->fc_sparam));
+	lpfc_reg_vfi(mboxq, vport, dmabuf->phys);
+	mboxq->mbox_cmpl = lpfc_mbx_cmpl_reg_vfi;
+	mboxq->vport = vport;
+	mboxq->context1 = dmabuf;
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_NOWAIT);
+	if (rc == MBX_NOT_FINISHED) {
+		rc = -ENXIO;
+		goto fail_free_mbox;
+	}
+	return 0;
+
+fail_free_mbox:
+	mempool_free(mboxq, phba->mbox_mem_pool);
+fail_free_coherent:
+	lpfc_mbuf_free(phba, dmabuf->virt, dmabuf->phys);
+fail_free_dmabuf:
+	kfree(dmabuf);
+fail:
+	lpfc_vport_set_state(vport, FC_VPORT_FAILED);
+	lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
+		"0289 Issue Register VFI failed: Err %d\n", rc);
+	return rc;
+}
+
+/**
  * lpfc_cmpl_els_flogi_fabric - Completion function for flogi to a fabric port
  * @vport: pointer to a host virtual N_Port data structure.
  * @ndlp: pointer to a node-list data structure.
@@ -497,17 +568,24 @@
 		}
 	}
 
-	lpfc_nlp_set_state(vport, ndlp, NLP_STE_REG_LOGIN_ISSUE);
-
-	if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED &&
-	    vport->fc_flag & FC_VPORT_NEEDS_REG_VPI) {
-		lpfc_register_new_vport(phba, vport, ndlp);
-		return 0;
+	if (phba->sli_rev < LPFC_SLI_REV4) {
+		lpfc_nlp_set_state(vport, ndlp, NLP_STE_REG_LOGIN_ISSUE);
+		if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED &&
+		    vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)
+			lpfc_register_new_vport(phba, vport, ndlp);
+		else
+			lpfc_issue_fabric_reglogin(vport);
+	} else {
+		ndlp->nlp_type |= NLP_FABRIC;
+		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+		if (vport->vfi_state & LPFC_VFI_REGISTERED) {
+			lpfc_start_fdiscs(phba);
+			lpfc_do_scr_ns_plogi(phba, vport);
+		} else
+			lpfc_issue_reg_vfi(vport);
 	}
-	lpfc_issue_fabric_reglogin(vport);
 	return 0;
 }
-
 /**
  * lpfc_cmpl_els_flogi_nport - Completion function for flogi to an N_Port
  * @vport: pointer to a host virtual N_Port data structure.
@@ -815,9 +893,14 @@
 	if (sp->cmn.fcphHigh < FC_PH3)
 		sp->cmn.fcphHigh = FC_PH3;
 
-	if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
+	if  (phba->sli_rev == LPFC_SLI_REV4) {
+		elsiocb->iocb.ulpCt_h = ((SLI4_CT_FCFI >> 1) & 1);
+		elsiocb->iocb.ulpCt_l = (SLI4_CT_FCFI & 1);
+		/* FLOGI needs to be 3 for WQE FCFI */
+		/* Set the fcfi to the fcfi we registered with */
+		elsiocb->iocb.ulpContext = phba->fcf.fcfi;
+	} else if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
 		sp->cmn.request_multiple_Nport = 1;
-
 		/* For FLOGI, Let FLOGI rsp set the NPortID for VPI 0 */
 		icmd->ulpCt_h = 1;
 		icmd->ulpCt_l = 0;
@@ -930,6 +1013,8 @@
 		if (!ndlp)
 			return 0;
 		lpfc_nlp_init(vport, ndlp, Fabric_DID);
+		/* Set the node type */
+		ndlp->nlp_type |= NLP_FABRIC;
 		/* Put ndlp onto node list */
 		lpfc_enqueue_node(vport, ndlp);
 	} else if (!NLP_CHK_NODE_ACT(ndlp)) {
@@ -1350,14 +1435,12 @@
 	IOCB_t *icmd;
 	struct lpfc_nodelist *ndlp;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
 	struct lpfc_sli *psli;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
 	int ret;
 
 	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
 
 	ndlp = lpfc_findnode_did(vport, did);
 	if (ndlp && !NLP_CHK_NODE_ACT(ndlp))
@@ -1391,7 +1474,7 @@
 
 	phba->fc_stat.elsXmitPLOGI++;
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_plogi;
-	ret = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+	ret = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
 
 	if (ret == IOCB_ERROR) {
 		lpfc_els_free_iocb(phba, elsiocb);
@@ -1501,14 +1584,9 @@
 	PRLI *npr;
 	IOCB_t *icmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
-	struct lpfc_sli *psli;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
 
-	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
-
 	cmdsize = (sizeof(uint32_t) + sizeof(PRLI));
 	elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
 				     ndlp->nlp_DID, ELS_CMD_PRLI);
@@ -1550,7 +1628,8 @@
 	spin_lock_irq(shost->host_lock);
 	ndlp->nlp_flag |= NLP_PRLI_SND;
 	spin_unlock_irq(shost->host_lock);
-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+	if (lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0) ==
+	    IOCB_ERROR) {
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag &= ~NLP_PRLI_SND;
 		spin_unlock_irq(shost->host_lock);
@@ -1608,7 +1687,8 @@
 	 * and continue discovery.
 	 */
 	if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
-	    !(vport->fc_flag & FC_RSCN_MODE)) {
+	    !(vport->fc_flag & FC_RSCN_MODE) &&
+	    (phba->sli_rev < LPFC_SLI_REV4)) {
 		lpfc_issue_reg_vpi(phba, vport);
 		return;
 	}
@@ -1788,8 +1868,6 @@
 	ADISC *ap;
 	IOCB_t *icmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
 	uint8_t *pcmd;
 	uint16_t cmdsize;
 
@@ -1822,7 +1900,8 @@
 	spin_lock_irq(shost->host_lock);
 	ndlp->nlp_flag |= NLP_ADISC_SND;
 	spin_unlock_irq(shost->host_lock);
-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+	if (lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0) ==
+	    IOCB_ERROR) {
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag &= ~NLP_ADISC_SND;
 		spin_unlock_irq(shost->host_lock);
@@ -1937,15 +2016,10 @@
 	struct lpfc_hba  *phba = vport->phba;
 	IOCB_t *icmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
-	struct lpfc_sli *psli;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
 	int rc;
 
-	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];
-
 	spin_lock_irq(shost->host_lock);
 	if (ndlp->nlp_flag & NLP_LOGO_SND) {
 		spin_unlock_irq(shost->host_lock);
@@ -1978,7 +2052,7 @@
 	spin_lock_irq(shost->host_lock);
 	ndlp->nlp_flag |= NLP_LOGO_SND;
 	spin_unlock_irq(shost->host_lock);
-	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
 
 	if (rc == IOCB_ERROR) {
 		spin_lock_irq(shost->host_lock);
@@ -2058,14 +2132,12 @@
 	struct lpfc_hba  *phba = vport->phba;
 	IOCB_t *icmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
 	struct lpfc_sli *psli;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
 	struct lpfc_nodelist *ndlp;
 
 	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
 	cmdsize = (sizeof(uint32_t) + sizeof(SCR));
 
 	ndlp = lpfc_findnode_did(vport, nportid);
@@ -2108,7 +2180,8 @@
 
 	phba->fc_stat.elsXmitSCR++;
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd;
-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+	if (lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0) ==
+	    IOCB_ERROR) {
 		/* The additional lpfc_nlp_put will cause the following
 		 * lpfc_els_free_iocb routine to trigger the rlease of
 		 * the node.
@@ -2152,7 +2225,6 @@
 	struct lpfc_hba  *phba = vport->phba;
 	IOCB_t *icmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
 	struct lpfc_sli *psli;
 	FARP *fp;
 	uint8_t *pcmd;
@@ -2162,7 +2234,6 @@
 	struct lpfc_nodelist *ndlp;
 
 	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
 	cmdsize = (sizeof(uint32_t) + sizeof(FARP));
 
 	ndlp = lpfc_findnode_did(vport, nportid);
@@ -2219,7 +2290,8 @@
 
 	phba->fc_stat.elsXmitFARPR++;
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd;
-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+	if (lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0) ==
+	    IOCB_ERROR) {
 		/* The additional lpfc_nlp_put will cause the following
 		 * lpfc_els_free_iocb routine to trigger the release of
 		 * the node.
@@ -2949,6 +3021,14 @@
 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
 	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
 
+	/*
+	 * This routine is used to register and unregister in previous SLI
+	 * modes.
+	 */
+	if ((pmb->u.mb.mbxCommand == MBX_UNREG_LOGIN) &&
+	    (phba->sli_rev == LPFC_SLI_REV4))
+		lpfc_sli4_free_rpi(phba, pmb->u.mb.un.varUnregLogin.rpi);
+
 	pmb->context1 = NULL;
 	lpfc_mbuf_free(phba, mp->virt, mp->phys);
 	kfree(mp);
@@ -2961,6 +3041,7 @@
 		 */
 		lpfc_nlp_not_used(ndlp);
 	}
+
 	return;
 }
 
@@ -3170,7 +3251,6 @@
 	IOCB_t *icmd;
 	IOCB_t *oldcmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
 	struct lpfc_sli *psli;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
@@ -3178,7 +3258,6 @@
 	ELS_PKT *els_pkt_ptr;
 
 	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
 	oldcmd = &oldiocb->iocb;
 
 	switch (flag) {
@@ -3266,7 +3345,7 @@
 	}
 
 	phba->fc_stat.elsXmitACC++;
-	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
 	if (rc == IOCB_ERROR) {
 		lpfc_els_free_iocb(phba, elsiocb);
 		return 1;
@@ -3305,15 +3384,12 @@
 	IOCB_t *icmd;
 	IOCB_t *oldcmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
 	struct lpfc_sli *psli;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
 	int rc;
 
 	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
-
 	cmdsize = 2 * sizeof(uint32_t);
 	elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
 				     ndlp->nlp_DID, ELS_CMD_LS_RJT);
@@ -3346,7 +3422,7 @@
 
 	phba->fc_stat.elsXmitLSRJT++;
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
-	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
 
 	if (rc == IOCB_ERROR) {
 		lpfc_els_free_iocb(phba, elsiocb);
@@ -3379,8 +3455,6 @@
 		       struct lpfc_nodelist *ndlp)
 {
 	struct lpfc_hba  *phba = vport->phba;
-	struct lpfc_sli  *psli = &phba->sli;
-	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
 	ADISC *ap;
 	IOCB_t *icmd, *oldcmd;
 	struct lpfc_iocbq *elsiocb;
@@ -3422,7 +3496,7 @@
 
 	phba->fc_stat.elsXmitACC++;
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
-	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
 	if (rc == IOCB_ERROR) {
 		lpfc_els_free_iocb(phba, elsiocb);
 		return 1;
@@ -3459,14 +3533,12 @@
 	IOCB_t *icmd;
 	IOCB_t *oldcmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
 	struct lpfc_sli *psli;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
 	int rc;
 
 	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
 
 	cmdsize = sizeof(uint32_t) + sizeof(PRLI);
 	elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
@@ -3520,7 +3592,7 @@
 	phba->fc_stat.elsXmitACC++;
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
 
-	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
 	if (rc == IOCB_ERROR) {
 		lpfc_els_free_iocb(phba, elsiocb);
 		return 1;
@@ -3562,15 +3634,12 @@
 	RNID *rn;
 	IOCB_t *icmd, *oldcmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
 	struct lpfc_sli *psli;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
 	int rc;
 
 	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];
-
 	cmdsize = sizeof(uint32_t) + sizeof(uint32_t)
 					+ (2 * sizeof(struct lpfc_name));
 	if (format)
@@ -3626,7 +3695,7 @@
 	elsiocb->context1 = NULL;  /* Don't need ndlp for cmpl,
 				    * it could be freed */
 
-	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
 	if (rc == IOCB_ERROR) {
 		lpfc_els_free_iocb(phba, elsiocb);
 		return 1;
@@ -3839,7 +3908,9 @@
 			payload_len -= sizeof(uint32_t);
 			switch (rscn_did.un.b.resv & RSCN_ADDRESS_FORMAT_MASK) {
 			case RSCN_ADDRESS_FORMAT_PORT:
-				if (ns_did.un.word == rscn_did.un.word)
+				if ((ns_did.un.b.domain == rscn_did.un.b.domain)
+				    && (ns_did.un.b.area == rscn_did.un.b.area)
+				    && (ns_did.un.b.id == rscn_did.un.b.id))
 					goto return_did_out;
 				break;
 			case RSCN_ADDRESS_FORMAT_AREA:
@@ -4300,7 +4371,7 @@
 			lpfc_init_link(phba, mbox,
 				       phba->cfg_topology,
 				       phba->cfg_link_speed);
-			mbox->mb.un.varInitLnk.lipsr_AL_PA = 0;
+			mbox->u.mb.un.varInitLnk.lipsr_AL_PA = 0;
 			mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
 			mbox->vport = vport;
 			rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
@@ -4440,8 +4511,6 @@
 static void
 lpfc_els_rsp_rps_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
 	MAILBOX_t *mb;
 	IOCB_t *icmd;
 	RPS_RSP *rps_rsp;
@@ -4451,7 +4520,7 @@
 	uint16_t xri, status;
 	uint32_t cmdsize;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 
 	ndlp = (struct lpfc_nodelist *) pmb->context2;
 	xri = (uint16_t) ((unsigned long)(pmb->context1));
@@ -4507,7 +4576,7 @@
 			 ndlp->nlp_rpi);
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
 	phba->fc_stat.elsXmitACC++;
-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR)
+	if (lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0) == IOCB_ERROR)
 		lpfc_els_free_iocb(phba, elsiocb);
 	return;
 }
@@ -4616,8 +4685,6 @@
 	IOCB_t *icmd, *oldcmd;
 	RPL_RSP rpl_rsp;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
 	uint8_t *pcmd;
 
 	elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
@@ -4654,7 +4721,8 @@
 			 ndlp->nlp_rpi);
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
 	phba->fc_stat.elsXmitACC++;
-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+	if (lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0) ==
+	    IOCB_ERROR) {
 		lpfc_els_free_iocb(phba, elsiocb);
 		return 1;
 	}
@@ -4883,7 +4951,10 @@
 		} else {
 			/* FAN verified - skip FLOGI */
 			vport->fc_myDID = vport->fc_prevDID;
-			lpfc_issue_fabric_reglogin(vport);
+			if (phba->sli_rev < LPFC_SLI_REV4)
+				lpfc_issue_fabric_reglogin(vport);
+			else
+				lpfc_issue_reg_vfi(vport);
 		}
 	}
 	return 0;
@@ -5566,11 +5637,10 @@
 
 dropit:
 	if (vport && !(vport->load_flag & FC_UNLOADING))
-		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
-			"(%d):0111 Dropping received ELS cmd "
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
+			"0111 Dropping received ELS cmd "
 			"Data: x%x x%x x%x\n",
-			vport->vpi, icmd->ulpStatus,
-			icmd->un.ulpWord[4], icmd->ulpTimeout);
+			icmd->ulpStatus, icmd->un.ulpWord[4], icmd->ulpTimeout);
 	phba->fc_stat.elsRcvDrop++;
 }
 
@@ -5646,10 +5716,9 @@
 	     icmd->ulpCommand == CMD_IOCB_RCV_SEQ64_CX)) {
 		if (icmd->unsli3.rcvsli3.vpi == 0xffff)
 			vport = phba->pport;
-		else {
-			uint16_t vpi = icmd->unsli3.rcvsli3.vpi;
-			vport = lpfc_find_vport_by_vpid(phba, vpi);
-		}
+		else
+			vport = lpfc_find_vport_by_vpid(phba,
+				icmd->unsli3.rcvsli3.vpi - phba->vpi_base);
 	}
 	/* If there are no BDEs associated
 	 * with this IOCB, there is nothing to do.
@@ -5781,7 +5850,7 @@
 	struct lpfc_vport *vport = pmb->vport;
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 
 	spin_lock_irq(shost->host_lock);
 	vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI;
@@ -5818,7 +5887,10 @@
 
 	} else {
 		if (vport == phba->pport)
-			lpfc_issue_fabric_reglogin(vport);
+			if (phba->sli_rev < LPFC_SLI_REV4)
+				lpfc_issue_fabric_reglogin(vport);
+			else
+				lpfc_issue_reg_vfi(vport);
 		else
 			lpfc_do_scr_ns_plogi(phba, vport);
 	}
@@ -5850,7 +5922,7 @@
 
 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 	if (mbox) {
-		lpfc_reg_vpi(phba, vport->vpi, vport->fc_myDID, mbox);
+		lpfc_reg_vpi(vport, mbox);
 		mbox->vport = vport;
 		mbox->context2 = lpfc_nlp_get(ndlp);
 		mbox->mbox_cmpl = lpfc_cmpl_reg_new_vport;
@@ -6139,7 +6211,6 @@
 {
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_hba  *phba = vport->phba;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
 	IOCB_t *icmd;
 	struct lpfc_iocbq *elsiocb;
 	uint8_t *pcmd;
@@ -6169,7 +6240,8 @@
 	spin_lock_irq(shost->host_lock);
 	ndlp->nlp_flag |= NLP_LOGO_SND;
 	spin_unlock_irq(shost->host_lock);
-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+	if (lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0) ==
+	    IOCB_ERROR) {
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag &= ~NLP_LOGO_SND;
 		spin_unlock_irq(shost->host_lock);
@@ -6224,7 +6296,6 @@
 	struct lpfc_iocbq *iocb;
 	unsigned long iflags;
 	int ret;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
 	IOCB_t *cmd;
 
 repeat:
@@ -6248,7 +6319,7 @@
 			"Fabric sched1:   ste:x%x",
 			iocb->vport->port_state, 0, 0);
 
-		ret = lpfc_sli_issue_iocb(phba, pring, iocb, 0);
+		ret = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, iocb, 0);
 
 		if (ret == IOCB_ERROR) {
 			iocb->iocb_cmpl = iocb->fabric_iocb_cmpl;
@@ -6394,7 +6465,6 @@
 lpfc_issue_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *iocb)
 {
 	unsigned long iflags;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
 	int ready;
 	int ret;
 
@@ -6418,7 +6488,7 @@
 			"Fabric sched2:   ste:x%x",
 			iocb->vport->port_state, 0, 0);
 
-		ret = lpfc_sli_issue_iocb(phba, pring, iocb, 0);
+		ret = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, iocb, 0);
 
 		if (ret == IOCB_ERROR) {
 			iocb->iocb_cmpl = iocb->fabric_iocb_cmpl;
@@ -6524,3 +6594,38 @@
 	lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT,
 			      IOERR_SLI_ABORTED);
 }
+
+/**
+ * lpfc_sli4_els_xri_aborted - Slow-path process of els xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @axri: pointer to the els xri abort wcqe structure.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 slow-path
+ * ELS aborted xri.
+ **/
+void
+lpfc_sli4_els_xri_aborted(struct lpfc_hba *phba,
+			  struct sli4_wcqe_xri_aborted *axri)
+{
+	uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
+	struct lpfc_sglq *sglq_entry = NULL, *sglq_next = NULL;
+	unsigned long iflag = 0;
+
+	spin_lock_irqsave(&phba->sli4_hba.abts_sgl_list_lock, iflag);
+	list_for_each_entry_safe(sglq_entry, sglq_next,
+			&phba->sli4_hba.lpfc_abts_els_sgl_list, list) {
+		if (sglq_entry->sli4_xritag == xri) {
+			list_del(&sglq_entry->list);
+			spin_unlock_irqrestore(
+					&phba->sli4_hba.abts_sgl_list_lock,
+					 iflag);
+			spin_lock_irqsave(&phba->hbalock, iflag);
+
+			list_add_tail(&sglq_entry->list,
+				&phba->sli4_hba.lpfc_sgl_list);
+			spin_unlock_irqrestore(&phba->hbalock, iflag);
+			return;
+		}
+	}
+	spin_unlock_irqrestore(&phba->sli4_hba.abts_sgl_list_lock, iflag);
+}
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index e764ce0..35c41ae 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -29,10 +29,12 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_scsi.h"
 #include "lpfc.h"
 #include "lpfc_logmsg.h"
@@ -273,6 +275,8 @@
 	    !(ndlp->nlp_flag & NLP_NPR_2B_DISC) &&
 	    (ndlp->nlp_state != NLP_STE_UNMAPPED_NODE))
 		lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM);
+
+	lpfc_unregister_unused_fcf(phba);
 }
 
 /**
@@ -295,10 +299,11 @@
 
 	ret = kzalloc(sizeof(struct lpfc_fast_path_event),
 			GFP_ATOMIC);
-	if (ret)
+	if (ret) {
 		atomic_inc(&phba->fast_event_count);
-	INIT_LIST_HEAD(&ret->work_evt.evt_listp);
-	ret->work_evt.evt = LPFC_EVT_FASTPATH_MGMT_EVT;
+		INIT_LIST_HEAD(&ret->work_evt.evt_listp);
+		ret->work_evt.evt = LPFC_EVT_FASTPATH_MGMT_EVT;
+	}
 	return ret;
 }
 
@@ -491,6 +496,10 @@
 	phba->work_ha = 0;
 	spin_unlock_irq(&phba->hbalock);
 
+	/* First, try to post the next mailbox command to SLI4 device */
+	if (phba->pci_dev_grp == LPFC_PCI_DEV_OC)
+		lpfc_sli4_post_async_mbox(phba);
+
 	if (ha_copy & HA_ERATT)
 		/* Handle the error attention event */
 		lpfc_handle_eratt(phba);
@@ -501,9 +510,27 @@
 	if (ha_copy & HA_LATT)
 		lpfc_handle_latt(phba);
 
+	/* Process SLI4 events */
+	if (phba->pci_dev_grp == LPFC_PCI_DEV_OC) {
+		if (phba->hba_flag & FCP_XRI_ABORT_EVENT)
+			lpfc_sli4_fcp_xri_abort_event_proc(phba);
+		if (phba->hba_flag & ELS_XRI_ABORT_EVENT)
+			lpfc_sli4_els_xri_abort_event_proc(phba);
+		if (phba->hba_flag & ASYNC_EVENT)
+			lpfc_sli4_async_event_proc(phba);
+		if (phba->hba_flag & HBA_POST_RECEIVE_BUFFER) {
+			spin_lock_irq(&phba->hbalock);
+			phba->hba_flag &= ~HBA_POST_RECEIVE_BUFFER;
+			spin_unlock_irq(&phba->hbalock);
+			lpfc_sli_hbqbuf_add_hbqs(phba, LPFC_ELS_HBQ);
+		}
+		if (phba->hba_flag & HBA_RECEIVE_BUFFER)
+			lpfc_sli4_handle_received_buffer(phba);
+	}
+
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for(i = 0; i <= phba->max_vpi; i++) {
+		for (i = 0; i <= phba->max_vports; i++) {
 			/*
 			 * We could have no vports in array if unloading, so if
 			 * this happens then just use the pport
@@ -555,23 +582,24 @@
 		/*
 		 * Turn on Ring interrupts
 		 */
-		spin_lock_irq(&phba->hbalock);
-		control = readl(phba->HCregaddr);
-		if (!(control & (HC_R0INT_ENA << LPFC_ELS_RING))) {
-			lpfc_debugfs_slow_ring_trc(phba,
-				"WRK Enable ring: cntl:x%x hacopy:x%x",
-				control, ha_copy, 0);
+		if (phba->sli_rev <= LPFC_SLI_REV3) {
+			spin_lock_irq(&phba->hbalock);
+			control = readl(phba->HCregaddr);
+			if (!(control & (HC_R0INT_ENA << LPFC_ELS_RING))) {
+				lpfc_debugfs_slow_ring_trc(phba,
+					"WRK Enable ring: cntl:x%x hacopy:x%x",
+					control, ha_copy, 0);
 
-			control |= (HC_R0INT_ENA << LPFC_ELS_RING);
-			writel(control, phba->HCregaddr);
-			readl(phba->HCregaddr); /* flush */
+				control |= (HC_R0INT_ENA << LPFC_ELS_RING);
+				writel(control, phba->HCregaddr);
+				readl(phba->HCregaddr); /* flush */
+			} else {
+				lpfc_debugfs_slow_ring_trc(phba,
+					"WRK Ring ok:     cntl:x%x hacopy:x%x",
+					control, ha_copy, 0);
+			}
+			spin_unlock_irq(&phba->hbalock);
 		}
-		else {
-			lpfc_debugfs_slow_ring_trc(phba,
-				"WRK Ring ok:     cntl:x%x hacopy:x%x",
-				control, ha_copy, 0);
-		}
-		spin_unlock_irq(&phba->hbalock);
 	}
 	lpfc_work_list_done(phba);
 }
@@ -689,7 +717,7 @@
 	lpfc_can_disctmo(vport);
 }
 
-static void
+void
 lpfc_linkdown_port(struct lpfc_vport *vport)
 {
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
@@ -716,6 +744,7 @@
 	if (phba->link_state == LPFC_LINK_DOWN)
 		return 0;
 	spin_lock_irq(&phba->hbalock);
+	phba->fcf.fcf_flag &= ~(FCF_AVAILABLE | FCF_DISCOVERED);
 	if (phba->link_state > LPFC_LINK_DOWN) {
 		phba->link_state = LPFC_LINK_DOWN;
 		phba->pport->fc_flag &= ~FC_LBIT;
@@ -723,7 +752,7 @@
 	spin_unlock_irq(&phba->hbalock);
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			/* Issue a LINK DOWN event to all nodes */
 			lpfc_linkdown_port(vports[i]);
 		}
@@ -833,10 +862,11 @@
 
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++)
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++)
 			lpfc_linkup_port(vports[i]);
 	lpfc_destroy_vport_work_array(phba, vports);
-	if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
+	if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
+	    (phba->sli_rev < LPFC_SLI_REV4))
 		lpfc_issue_clear_la(phba, phba->pport);
 
 	return 0;
@@ -854,7 +884,7 @@
 	struct lpfc_vport *vport = pmb->vport;
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_sli   *psli = &phba->sli;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	uint32_t control;
 
 	/* Since we don't do discovery right now, turn these off here */
@@ -917,7 +947,7 @@
 {
 	struct lpfc_vport *vport = pmb->vport;
 
-	if (pmb->mb.mbxStatus)
+	if (pmb->u.mb.mbxStatus)
 		goto out;
 
 	mempool_free(pmb, phba->mbox_mem_pool);
@@ -945,7 +975,7 @@
 	lpfc_printf_vlog(vport, KERN_ERR, LOG_MBOX,
 			 "0306 CONFIG_LINK mbxStatus error x%x "
 			 "HBA state x%x\n",
-			 pmb->mb.mbxStatus, vport->port_state);
+			 pmb->u.mb.mbxStatus, vport->port_state);
 	mempool_free(pmb, phba->mbox_mem_pool);
 
 	lpfc_linkdown(phba);
@@ -959,9 +989,592 @@
 }
 
 static void
+lpfc_mbx_cmpl_reg_fcfi(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
+{
+	struct lpfc_vport *vport = mboxq->vport;
+	unsigned long flags;
+
+	if (mboxq->u.mb.mbxStatus) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_MBOX,
+			 "2017 REG_FCFI mbxStatus error x%x "
+			 "HBA state x%x\n",
+			 mboxq->u.mb.mbxStatus, vport->port_state);
+		mempool_free(mboxq, phba->mbox_mem_pool);
+		return;
+	}
+
+	/* Start FCoE discovery by sending a FLOGI. */
+	phba->fcf.fcfi = bf_get(lpfc_reg_fcfi_fcfi, &mboxq->u.mqe.un.reg_fcfi);
+	/* Set the FCFI registered flag */
+	spin_lock_irqsave(&phba->hbalock, flags);
+	phba->fcf.fcf_flag |= FCF_REGISTERED;
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+	if (vport->port_state != LPFC_FLOGI) {
+		spin_lock_irqsave(&phba->hbalock, flags);
+		phba->fcf.fcf_flag |= (FCF_DISCOVERED | FCF_IN_USE);
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_initial_flogi(vport);
+	}
+
+	mempool_free(mboxq, phba->mbox_mem_pool);
+	return;
+}
+
+/**
+ * lpfc_fab_name_match - Check if the fcf fabric name match.
+ * @fab_name: pointer to fabric name.
+ * @new_fcf_record: pointer to fcf record.
+ *
+ * This routine compare the fcf record's fabric name with provided
+ * fabric name. If the fabric name are identical this function
+ * returns 1 else return 0.
+ **/
+static uint32_t
+lpfc_fab_name_match(uint8_t *fab_name, struct fcf_record *new_fcf_record)
+{
+	if ((fab_name[0] ==
+		bf_get(lpfc_fcf_record_fab_name_0, new_fcf_record)) &&
+	    (fab_name[1] ==
+		bf_get(lpfc_fcf_record_fab_name_1, new_fcf_record)) &&
+	    (fab_name[2] ==
+		bf_get(lpfc_fcf_record_fab_name_2, new_fcf_record)) &&
+	    (fab_name[3] ==
+		bf_get(lpfc_fcf_record_fab_name_3, new_fcf_record)) &&
+	    (fab_name[4] ==
+		bf_get(lpfc_fcf_record_fab_name_4, new_fcf_record)) &&
+	    (fab_name[5] ==
+		bf_get(lpfc_fcf_record_fab_name_5, new_fcf_record)) &&
+	    (fab_name[6] ==
+		bf_get(lpfc_fcf_record_fab_name_6, new_fcf_record)) &&
+	    (fab_name[7] ==
+		bf_get(lpfc_fcf_record_fab_name_7, new_fcf_record)))
+		return 1;
+	else
+		return 0;
+}
+
+/**
+ * lpfc_mac_addr_match - Check if the fcf mac address match.
+ * @phba: pointer to lpfc hba data structure.
+ * @new_fcf_record: pointer to fcf record.
+ *
+ * This routine compare the fcf record's mac address with HBA's
+ * FCF mac address. If the mac addresses are identical this function
+ * returns 1 else return 0.
+ **/
+static uint32_t
+lpfc_mac_addr_match(struct lpfc_hba *phba, struct fcf_record *new_fcf_record)
+{
+	if ((phba->fcf.mac_addr[0] ==
+		bf_get(lpfc_fcf_record_mac_0, new_fcf_record)) &&
+	    (phba->fcf.mac_addr[1] ==
+		bf_get(lpfc_fcf_record_mac_1, new_fcf_record)) &&
+	    (phba->fcf.mac_addr[2] ==
+		bf_get(lpfc_fcf_record_mac_2, new_fcf_record)) &&
+	    (phba->fcf.mac_addr[3] ==
+		bf_get(lpfc_fcf_record_mac_3, new_fcf_record)) &&
+	    (phba->fcf.mac_addr[4] ==
+		bf_get(lpfc_fcf_record_mac_4, new_fcf_record)) &&
+	    (phba->fcf.mac_addr[5] ==
+		bf_get(lpfc_fcf_record_mac_5, new_fcf_record)))
+		return 1;
+	else
+		return 0;
+}
+
+/**
+ * lpfc_copy_fcf_record - Copy fcf information to lpfc_hba.
+ * @phba: pointer to lpfc hba data structure.
+ * @new_fcf_record: pointer to fcf record.
+ *
+ * This routine copies the FCF information from the FCF
+ * record to lpfc_hba data structure.
+ **/
+static void
+lpfc_copy_fcf_record(struct lpfc_hba *phba, struct fcf_record *new_fcf_record)
+{
+	phba->fcf.fabric_name[0] =
+		bf_get(lpfc_fcf_record_fab_name_0, new_fcf_record);
+	phba->fcf.fabric_name[1] =
+		bf_get(lpfc_fcf_record_fab_name_1, new_fcf_record);
+	phba->fcf.fabric_name[2] =
+		bf_get(lpfc_fcf_record_fab_name_2, new_fcf_record);
+	phba->fcf.fabric_name[3] =
+		bf_get(lpfc_fcf_record_fab_name_3, new_fcf_record);
+	phba->fcf.fabric_name[4] =
+		bf_get(lpfc_fcf_record_fab_name_4, new_fcf_record);
+	phba->fcf.fabric_name[5] =
+		bf_get(lpfc_fcf_record_fab_name_5, new_fcf_record);
+	phba->fcf.fabric_name[6] =
+		bf_get(lpfc_fcf_record_fab_name_6, new_fcf_record);
+	phba->fcf.fabric_name[7] =
+		bf_get(lpfc_fcf_record_fab_name_7, new_fcf_record);
+	phba->fcf.mac_addr[0] =
+		bf_get(lpfc_fcf_record_mac_0, new_fcf_record);
+	phba->fcf.mac_addr[1] =
+		bf_get(lpfc_fcf_record_mac_1, new_fcf_record);
+	phba->fcf.mac_addr[2] =
+		bf_get(lpfc_fcf_record_mac_2, new_fcf_record);
+	phba->fcf.mac_addr[3] =
+		bf_get(lpfc_fcf_record_mac_3, new_fcf_record);
+	phba->fcf.mac_addr[4] =
+		bf_get(lpfc_fcf_record_mac_4, new_fcf_record);
+	phba->fcf.mac_addr[5] =
+		bf_get(lpfc_fcf_record_mac_5, new_fcf_record);
+	phba->fcf.fcf_indx = bf_get(lpfc_fcf_record_fcf_index, new_fcf_record);
+	phba->fcf.priority = new_fcf_record->fip_priority;
+}
+
+/**
+ * lpfc_register_fcf - Register the FCF with hba.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine issues a register fcfi mailbox command to register
+ * the fcf with HBA.
+ **/
+static void
+lpfc_register_fcf(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *fcf_mbxq;
+	int rc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&phba->hbalock, flags);
+
+	/* If the FCF is not availabe do nothing. */
+	if (!(phba->fcf.fcf_flag & FCF_AVAILABLE)) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		return;
+	}
+
+	/* The FCF is already registered, start discovery */
+	if (phba->fcf.fcf_flag & FCF_REGISTERED) {
+		phba->fcf.fcf_flag |= (FCF_DISCOVERED | FCF_IN_USE);
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		if (phba->pport->port_state != LPFC_FLOGI)
+			lpfc_initial_flogi(phba->pport);
+		return;
+	}
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+
+	fcf_mbxq = mempool_alloc(phba->mbox_mem_pool,
+		GFP_KERNEL);
+	if (!fcf_mbxq)
+		return;
+
+	lpfc_reg_fcfi(phba, fcf_mbxq);
+	fcf_mbxq->vport = phba->pport;
+	fcf_mbxq->mbox_cmpl = lpfc_mbx_cmpl_reg_fcfi;
+	rc = lpfc_sli_issue_mbox(phba, fcf_mbxq, MBX_NOWAIT);
+	if (rc == MBX_NOT_FINISHED)
+		mempool_free(fcf_mbxq, phba->mbox_mem_pool);
+
+	return;
+}
+
+/**
+ * lpfc_match_fcf_conn_list - Check if the FCF record can be used for discovery.
+ * @phba: pointer to lpfc hba data structure.
+ * @new_fcf_record: pointer to fcf record.
+ * @boot_flag: Indicates if this record used by boot bios.
+ * @addr_mode: The address mode to be used by this FCF
+ *
+ * This routine compare the fcf record with connect list obtained from the
+ * config region to decide if this FCF can be used for SAN discovery. It returns
+ * 1 if this record can be used for SAN discovery else return zero. If this FCF
+ * record can be used for SAN discovery, the boot_flag will indicate if this FCF
+ * is used by boot bios and addr_mode will indicate the addressing mode to be
+ * used for this FCF when the function returns.
+ * If the FCF record need to be used with a particular vlan id, the vlan is
+ * set in the vlan_id on return of the function. If not VLAN tagging need to
+ * be used with the FCF vlan_id will be set to 0xFFFF;
+ **/
+static int
+lpfc_match_fcf_conn_list(struct lpfc_hba *phba,
+			struct fcf_record *new_fcf_record,
+			uint32_t *boot_flag, uint32_t *addr_mode,
+			uint16_t *vlan_id)
+{
+	struct lpfc_fcf_conn_entry *conn_entry;
+
+	if (!phba->cfg_enable_fip) {
+		*boot_flag = 0;
+		*addr_mode = bf_get(lpfc_fcf_record_mac_addr_prov,
+				new_fcf_record);
+		if (phba->valid_vlan)
+			*vlan_id = phba->vlan_id;
+		else
+			*vlan_id = 0xFFFF;
+		return 1;
+	}
+
+	/*
+	 * If there are no FCF connection table entry, driver connect to all
+	 * FCFs.
+	 */
+	if (list_empty(&phba->fcf_conn_rec_list)) {
+		*boot_flag = 0;
+		*addr_mode = bf_get(lpfc_fcf_record_mac_addr_prov,
+			new_fcf_record);
+		*vlan_id = 0xFFFF;
+		return 1;
+	}
+
+	list_for_each_entry(conn_entry, &phba->fcf_conn_rec_list, list) {
+		if (!(conn_entry->conn_rec.flags & FCFCNCT_VALID))
+			continue;
+
+		if ((conn_entry->conn_rec.flags & FCFCNCT_FBNM_VALID) &&
+			!lpfc_fab_name_match(conn_entry->conn_rec.fabric_name,
+				new_fcf_record))
+			continue;
+
+		if (conn_entry->conn_rec.flags & FCFCNCT_VLAN_VALID) {
+			/*
+			 * If the vlan bit map does not have the bit set for the
+			 * vlan id to be used, then it is not a match.
+			 */
+			if (!(new_fcf_record->vlan_bitmap
+				[conn_entry->conn_rec.vlan_tag / 8] &
+				(1 << (conn_entry->conn_rec.vlan_tag % 8))))
+				continue;
+		}
+
+		/*
+		 * Check if the connection record specifies a required
+		 * addressing mode.
+		 */
+		if ((conn_entry->conn_rec.flags & FCFCNCT_AM_VALID) &&
+			!(conn_entry->conn_rec.flags & FCFCNCT_AM_PREFERRED)) {
+
+			/*
+			 * If SPMA required but FCF not support this continue.
+			 */
+			if ((conn_entry->conn_rec.flags & FCFCNCT_AM_SPMA) &&
+				!(bf_get(lpfc_fcf_record_mac_addr_prov,
+					new_fcf_record) & LPFC_FCF_SPMA))
+				continue;
+
+			/*
+			 * If FPMA required but FCF not support this continue.
+			 */
+			if (!(conn_entry->conn_rec.flags & FCFCNCT_AM_SPMA) &&
+				!(bf_get(lpfc_fcf_record_mac_addr_prov,
+				new_fcf_record) & LPFC_FCF_FPMA))
+				continue;
+		}
+
+		/*
+		 * This fcf record matches filtering criteria.
+		 */
+		if (conn_entry->conn_rec.flags & FCFCNCT_BOOT)
+			*boot_flag = 1;
+		else
+			*boot_flag = 0;
+
+		*addr_mode = bf_get(lpfc_fcf_record_mac_addr_prov,
+				new_fcf_record);
+		/*
+		 * If the user specified a required address mode, assign that
+		 * address mode
+		 */
+		if ((conn_entry->conn_rec.flags & FCFCNCT_AM_VALID) &&
+			(!(conn_entry->conn_rec.flags & FCFCNCT_AM_PREFERRED)))
+			*addr_mode = (conn_entry->conn_rec.flags &
+				FCFCNCT_AM_SPMA) ?
+				LPFC_FCF_SPMA : LPFC_FCF_FPMA;
+		/*
+		 * If the user specified a prefered address mode, use the
+		 * addr mode only if FCF support the addr_mode.
+		 */
+		else if ((conn_entry->conn_rec.flags & FCFCNCT_AM_VALID) &&
+			(conn_entry->conn_rec.flags & FCFCNCT_AM_PREFERRED) &&
+			(conn_entry->conn_rec.flags & FCFCNCT_AM_SPMA) &&
+			(*addr_mode & LPFC_FCF_SPMA))
+				*addr_mode = LPFC_FCF_SPMA;
+		else if ((conn_entry->conn_rec.flags & FCFCNCT_AM_VALID) &&
+			(conn_entry->conn_rec.flags & FCFCNCT_AM_PREFERRED) &&
+			!(conn_entry->conn_rec.flags & FCFCNCT_AM_SPMA) &&
+			(*addr_mode & LPFC_FCF_FPMA))
+				*addr_mode = LPFC_FCF_FPMA;
+		/*
+		 * If user did not specify any addressing mode, use FPMA if
+		 * possible else use SPMA.
+		 */
+		else if (*addr_mode & LPFC_FCF_FPMA)
+			*addr_mode = LPFC_FCF_FPMA;
+
+		if (conn_entry->conn_rec.flags & FCFCNCT_VLAN_VALID)
+			*vlan_id = conn_entry->conn_rec.vlan_tag;
+		else
+			*vlan_id = 0xFFFF;
+
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * lpfc_mbx_cmpl_read_fcf_record - Completion handler for read_fcf mbox.
+ * @phba: pointer to lpfc hba data structure.
+ * @mboxq: pointer to mailbox object.
+ *
+ * This function iterate through all the fcf records available in
+ * HBA and choose the optimal FCF record for discovery. After finding
+ * the FCF for discovery it register the FCF record and kick start
+ * discovery.
+ * If FCF_IN_USE flag is set in currently used FCF, the routine try to
+ * use a FCF record which match fabric name and mac address of the
+ * currently used FCF record.
+ * If the driver support only one FCF, it will try to use the FCF record
+ * used by BOOT_BIOS.
+ */
+void
+lpfc_mbx_cmpl_read_fcf_record(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
+{
+	void *virt_addr;
+	dma_addr_t phys_addr;
+	uint8_t *bytep;
+	struct lpfc_mbx_sge sge;
+	struct lpfc_mbx_read_fcf_tbl *read_fcf;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+	struct fcf_record *new_fcf_record;
+	int rc;
+	uint32_t boot_flag, addr_mode;
+	uint32_t next_fcf_index;
+	unsigned long flags;
+	uint16_t vlan_id;
+
+	/* Get the first SGE entry from the non-embedded DMA memory. This
+	 * routine only uses a single SGE.
+	 */
+	lpfc_sli4_mbx_sge_get(mboxq, 0, &sge);
+	phys_addr = getPaddr(sge.pa_hi, sge.pa_lo);
+	if (unlikely(!mboxq->sge_array)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+				"2524 Failed to get the non-embedded SGE "
+				"virtual address\n");
+		goto out;
+	}
+	virt_addr = mboxq->sge_array->addr[0];
+
+	shdr = (union lpfc_sli4_cfg_shdr *)virt_addr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status,
+				 &shdr->response);
+	/*
+	 * The FCF Record was read and there is no reason for the driver
+	 * to maintain the FCF record data or memory. Instead, just need
+	 * to book keeping the FCFIs can be used.
+	 */
+	if (shdr_status || shdr_add_status) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2521 READ_FCF_RECORD mailbox failed "
+				"with status x%x add_status x%x, mbx\n",
+				shdr_status, shdr_add_status);
+		goto out;
+	}
+	/* Interpreting the returned information of FCF records */
+	read_fcf = (struct lpfc_mbx_read_fcf_tbl *)virt_addr;
+	lpfc_sli_pcimem_bcopy(read_fcf, read_fcf,
+			      sizeof(struct lpfc_mbx_read_fcf_tbl));
+	next_fcf_index = bf_get(lpfc_mbx_read_fcf_tbl_nxt_vindx, read_fcf);
+
+	new_fcf_record = (struct fcf_record *)(virt_addr +
+			  sizeof(struct lpfc_mbx_read_fcf_tbl));
+	lpfc_sli_pcimem_bcopy(new_fcf_record, new_fcf_record,
+			      sizeof(struct fcf_record));
+	bytep = virt_addr + sizeof(union lpfc_sli4_cfg_shdr);
+
+	rc = lpfc_match_fcf_conn_list(phba, new_fcf_record,
+				      &boot_flag, &addr_mode,
+					&vlan_id);
+	/*
+	 * If the fcf record does not match with connect list entries
+	 * read the next entry.
+	 */
+	if (!rc)
+		goto read_next_fcf;
+	/*
+	 * If this is not the first FCF discovery of the HBA, use last
+	 * FCF record for the discovery.
+	 */
+	spin_lock_irqsave(&phba->hbalock, flags);
+	if (phba->fcf.fcf_flag & FCF_IN_USE) {
+		if (lpfc_fab_name_match(phba->fcf.fabric_name,
+			new_fcf_record) &&
+		    lpfc_mac_addr_match(phba, new_fcf_record)) {
+			phba->fcf.fcf_flag |= FCF_AVAILABLE;
+			spin_unlock_irqrestore(&phba->hbalock, flags);
+			goto out;
+		}
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		goto read_next_fcf;
+	}
+	if (phba->fcf.fcf_flag & FCF_AVAILABLE) {
+		/*
+		 * If the current FCF record does not have boot flag
+		 * set and new fcf record has boot flag set, use the
+		 * new fcf record.
+		 */
+		if (boot_flag && !(phba->fcf.fcf_flag & FCF_BOOT_ENABLE)) {
+			/* Use this FCF record */
+			lpfc_copy_fcf_record(phba, new_fcf_record);
+			phba->fcf.addr_mode = addr_mode;
+			phba->fcf.fcf_flag |= FCF_BOOT_ENABLE;
+			if (vlan_id != 0xFFFF) {
+				phba->fcf.fcf_flag |= FCF_VALID_VLAN;
+				phba->fcf.vlan_id = vlan_id;
+			}
+			spin_unlock_irqrestore(&phba->hbalock, flags);
+			goto read_next_fcf;
+		}
+		/*
+		 * If the current FCF record has boot flag set and the
+		 * new FCF record does not have boot flag, read the next
+		 * FCF record.
+		 */
+		if (!boot_flag && (phba->fcf.fcf_flag & FCF_BOOT_ENABLE)) {
+			spin_unlock_irqrestore(&phba->hbalock, flags);
+			goto read_next_fcf;
+		}
+		/*
+		 * If there is a record with lower priority value for
+		 * the current FCF, use that record.
+		 */
+		if (lpfc_fab_name_match(phba->fcf.fabric_name, new_fcf_record)
+			&& (new_fcf_record->fip_priority <
+				phba->fcf.priority)) {
+			/* Use this FCF record */
+			lpfc_copy_fcf_record(phba, new_fcf_record);
+			phba->fcf.addr_mode = addr_mode;
+			if (vlan_id != 0xFFFF) {
+				phba->fcf.fcf_flag |= FCF_VALID_VLAN;
+				phba->fcf.vlan_id = vlan_id;
+			}
+			spin_unlock_irqrestore(&phba->hbalock, flags);
+			goto read_next_fcf;
+		}
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		goto read_next_fcf;
+	}
+	/*
+	 * This is the first available FCF record, use this
+	 * record.
+	 */
+	lpfc_copy_fcf_record(phba, new_fcf_record);
+	phba->fcf.addr_mode = addr_mode;
+	if (boot_flag)
+		phba->fcf.fcf_flag |= FCF_BOOT_ENABLE;
+	phba->fcf.fcf_flag |= FCF_AVAILABLE;
+	if (vlan_id != 0xFFFF) {
+		phba->fcf.fcf_flag |= FCF_VALID_VLAN;
+		phba->fcf.vlan_id = vlan_id;
+	}
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+	goto read_next_fcf;
+
+read_next_fcf:
+	lpfc_sli4_mbox_cmd_free(phba, mboxq);
+	if (next_fcf_index == LPFC_FCOE_FCF_NEXT_NONE || next_fcf_index == 0)
+		lpfc_register_fcf(phba);
+	else
+		lpfc_sli4_read_fcf_record(phba, next_fcf_index);
+	return;
+
+out:
+	lpfc_sli4_mbox_cmd_free(phba, mboxq);
+	lpfc_register_fcf(phba);
+
+	return;
+}
+
+/**
+ * lpfc_start_fdiscs - send fdiscs for each vports on this port.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This function loops through the list of vports on the @phba and issues an
+ * FDISC if possible.
+ */
+void
+lpfc_start_fdiscs(struct lpfc_hba *phba)
+{
+	struct lpfc_vport **vports;
+	int i;
+
+	vports = lpfc_create_vport_work_array(phba);
+	if (vports != NULL) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
+			if (vports[i]->port_type == LPFC_PHYSICAL_PORT)
+				continue;
+			/* There are no vpi for this vport */
+			if (vports[i]->vpi > phba->max_vpi) {
+				lpfc_vport_set_state(vports[i],
+						     FC_VPORT_FAILED);
+				continue;
+			}
+			if (phba->fc_topology == TOPOLOGY_LOOP) {
+				lpfc_vport_set_state(vports[i],
+						     FC_VPORT_LINKDOWN);
+				continue;
+			}
+			if (phba->link_flag & LS_NPIV_FAB_SUPPORTED)
+				lpfc_initial_fdisc(vports[i]);
+			else {
+				lpfc_vport_set_state(vports[i],
+						     FC_VPORT_NO_FABRIC_SUPP);
+				lpfc_printf_vlog(vports[i], KERN_ERR,
+						 LOG_ELS,
+						 "0259 No NPIV "
+						 "Fabric support\n");
+			}
+		}
+	}
+	lpfc_destroy_vport_work_array(phba, vports);
+}
+
+void
+lpfc_mbx_cmpl_reg_vfi(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
+{
+	struct lpfc_dmabuf *dmabuf = mboxq->context1;
+	struct lpfc_vport *vport = mboxq->vport;
+
+	if (mboxq->u.mb.mbxStatus) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_MBOX,
+			 "2018 REG_VFI mbxStatus error x%x "
+			 "HBA state x%x\n",
+			 mboxq->u.mb.mbxStatus, vport->port_state);
+		if (phba->fc_topology == TOPOLOGY_LOOP) {
+			/* FLOGI failed, use loop map to make discovery list */
+			lpfc_disc_list_loopmap(vport);
+			/* Start discovery */
+			lpfc_disc_start(vport);
+			goto fail_free_mem;
+		}
+		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
+		goto fail_free_mem;
+	}
+	/* Mark the vport has registered with its VFI */
+	vport->vfi_state |= LPFC_VFI_REGISTERED;
+
+	if (vport->port_state == LPFC_FABRIC_CFG_LINK) {
+		lpfc_start_fdiscs(phba);
+		lpfc_do_scr_ns_plogi(phba, vport);
+	}
+
+fail_free_mem:
+	mempool_free(mboxq, phba->mbox_mem_pool);
+	lpfc_mbuf_free(phba, dmabuf->virt, dmabuf->phys);
+	kfree(dmabuf);
+	return;
+}
+
+static void
 lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) pmb->context1;
 	struct lpfc_vport  *vport = pmb->vport;
 
@@ -1012,13 +1625,13 @@
 lpfc_mbx_process_link_up(struct lpfc_hba *phba, READ_LA_VAR *la)
 {
 	struct lpfc_vport *vport = phba->pport;
-	LPFC_MBOXQ_t *sparam_mbox, *cfglink_mbox;
+	LPFC_MBOXQ_t *sparam_mbox, *cfglink_mbox = NULL;
 	int i;
 	struct lpfc_dmabuf *mp;
 	int rc;
+	struct fcf_record *fcf_record;
 
 	sparam_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
-	cfglink_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 
 	spin_lock_irq(&phba->hbalock);
 	switch (la->UlnkSpeed) {
@@ -1034,6 +1647,9 @@
 	case LA_8GHZ_LINK:
 		phba->fc_linkspeed = LA_8GHZ_LINK;
 		break;
+	case LA_10GHZ_LINK:
+		phba->fc_linkspeed = LA_10GHZ_LINK;
+		break;
 	default:
 		phba->fc_linkspeed = LA_UNKNW_LINK;
 		break;
@@ -1115,22 +1731,66 @@
 			lpfc_mbuf_free(phba, mp->virt, mp->phys);
 			kfree(mp);
 			mempool_free(sparam_mbox, phba->mbox_mem_pool);
-			if (cfglink_mbox)
-				mempool_free(cfglink_mbox, phba->mbox_mem_pool);
 			goto out;
 		}
 	}
 
-	if (cfglink_mbox) {
+	if (!(phba->hba_flag & HBA_FCOE_SUPPORT)) {
+		cfglink_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+		if (!cfglink_mbox)
+			goto out;
 		vport->port_state = LPFC_LOCAL_CFG_LINK;
 		lpfc_config_link(phba, cfglink_mbox);
 		cfglink_mbox->vport = vport;
 		cfglink_mbox->mbox_cmpl = lpfc_mbx_cmpl_local_config_link;
 		rc = lpfc_sli_issue_mbox(phba, cfglink_mbox, MBX_NOWAIT);
-		if (rc != MBX_NOT_FINISHED)
-			return;
-		mempool_free(cfglink_mbox, phba->mbox_mem_pool);
+		if (rc == MBX_NOT_FINISHED) {
+			mempool_free(cfglink_mbox, phba->mbox_mem_pool);
+			goto out;
+		}
+	} else {
+		/*
+		 * Add the driver's default FCF record at FCF index 0 now. This
+		 * is phase 1 implementation that support FCF index 0 and driver
+		 * defaults.
+		 */
+		if (phba->cfg_enable_fip == 0) {
+			fcf_record = kzalloc(sizeof(struct fcf_record),
+					GFP_KERNEL);
+			if (unlikely(!fcf_record)) {
+				lpfc_printf_log(phba, KERN_ERR,
+					LOG_MBOX | LOG_SLI,
+					"2554 Could not allocate memmory for "
+					"fcf record\n");
+				rc = -ENODEV;
+				goto out;
+			}
+
+			lpfc_sli4_build_dflt_fcf_record(phba, fcf_record,
+						LPFC_FCOE_FCF_DEF_INDEX);
+			rc = lpfc_sli4_add_fcf_record(phba, fcf_record);
+			if (unlikely(rc)) {
+				lpfc_printf_log(phba, KERN_ERR,
+					LOG_MBOX | LOG_SLI,
+					"2013 Could not manually add FCF "
+					"record 0, status %d\n", rc);
+				rc = -ENODEV;
+				kfree(fcf_record);
+				goto out;
+			}
+			kfree(fcf_record);
+		}
+		/*
+		 * The driver is expected to do FIP/FCF. Call the port
+		 * and get the FCF Table.
+		 */
+		rc = lpfc_sli4_read_fcf_record(phba,
+					LPFC_FCOE_FCF_GET_FIRST);
+		if (rc)
+			goto out;
 	}
+
+	return;
 out:
 	lpfc_vport_set_state(vport, FC_VPORT_FAILED);
 	lpfc_printf_vlog(vport, KERN_ERR, LOG_MBOX,
@@ -1147,10 +1807,12 @@
 	struct lpfc_sli *psli = &phba->sli;
 	spin_lock_irq(&phba->hbalock);
 	psli->sli_flag |= LPFC_PROCESS_LA;
-	control = readl(phba->HCregaddr);
-	control |= HC_LAINT_ENA;
-	writel(control, phba->HCregaddr);
-	readl(phba->HCregaddr); /* flush */
+	if (phba->sli_rev <= LPFC_SLI_REV3) {
+		control = readl(phba->HCregaddr);
+		control |= HC_LAINT_ENA;
+		writel(control, phba->HCregaddr);
+		readl(phba->HCregaddr); /* flush */
+	}
 	spin_unlock_irq(&phba->hbalock);
 }
 
@@ -1159,6 +1821,7 @@
 {
 	lpfc_linkdown(phba);
 	lpfc_enable_la(phba);
+	lpfc_unregister_unused_fcf(phba);
 	/* turn on Link Attention interrupts - no CLEAR_LA needed */
 }
 
@@ -1175,7 +1838,7 @@
 	struct lpfc_vport *vport = pmb->vport;
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
 	READ_LA_VAR *la;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
 
 	/* Unblock ELS traffic */
@@ -1190,7 +1853,7 @@
 		goto lpfc_mbx_cmpl_read_la_free_mbuf;
 	}
 
-	la = (READ_LA_VAR *) & pmb->mb.un.varReadLA;
+	la = (READ_LA_VAR *) &pmb->u.mb.un.varReadLA;
 
 	memcpy(&phba->alpa_map[0], mp->virt, 128);
 
@@ -1328,7 +1991,7 @@
 static void
 lpfc_mbx_cmpl_unreg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_vport *vport = pmb->vport;
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
 
@@ -1381,7 +2044,7 @@
 {
 	struct lpfc_vport *vport = pmb->vport;
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 
 	switch (mb->mbxStatus) {
 	case 0x0011:
@@ -1416,6 +2079,128 @@
 	return;
 }
 
+/**
+ * lpfc_create_static_vport - Read HBA config region to create static vports.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine issue a DUMP mailbox command for config region 22 to get
+ * the list of static vports to be created. The function create vports
+ * based on the information returned from the HBA.
+ **/
+void
+lpfc_create_static_vport(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *pmb = NULL;
+	MAILBOX_t *mb;
+	struct static_vport_info *vport_info;
+	int rc, i;
+	struct fc_vport_identifiers vport_id;
+	struct fc_vport *new_fc_vport;
+	struct Scsi_Host *shost;
+	struct lpfc_vport *vport;
+	uint16_t offset = 0;
+	uint8_t *vport_buff;
+
+	pmb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!pmb) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0542 lpfc_create_static_vport failed to"
+				" allocate mailbox memory\n");
+		return;
+	}
+
+	mb = &pmb->u.mb;
+
+	vport_info = kzalloc(sizeof(struct static_vport_info), GFP_KERNEL);
+	if (!vport_info) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0543 lpfc_create_static_vport failed to"
+				" allocate vport_info\n");
+		mempool_free(pmb, phba->mbox_mem_pool);
+		return;
+	}
+
+	vport_buff = (uint8_t *) vport_info;
+	do {
+		lpfc_dump_static_vport(phba, pmb, offset);
+		pmb->vport = phba->pport;
+		rc = lpfc_sli_issue_mbox_wait(phba, pmb, LPFC_MBOX_TMO);
+
+		if ((rc != MBX_SUCCESS) || mb->mbxStatus) {
+			lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"0544 lpfc_create_static_vport failed to"
+				" issue dump mailbox command ret 0x%x "
+				"status 0x%x\n",
+				rc, mb->mbxStatus);
+			goto out;
+		}
+
+		if (mb->un.varDmp.word_cnt >
+			sizeof(struct static_vport_info) - offset)
+			mb->un.varDmp.word_cnt =
+			sizeof(struct static_vport_info) - offset;
+
+		lpfc_sli_pcimem_bcopy(((uint8_t *)mb) + DMP_RSP_OFFSET,
+			vport_buff + offset,
+			mb->un.varDmp.word_cnt);
+		offset += mb->un.varDmp.word_cnt;
+
+	} while (mb->un.varDmp.word_cnt &&
+		offset < sizeof(struct static_vport_info));
+
+
+	if ((le32_to_cpu(vport_info->signature) != VPORT_INFO_SIG) ||
+		((le32_to_cpu(vport_info->rev) & VPORT_INFO_REV_MASK)
+			!= VPORT_INFO_REV)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"0545 lpfc_create_static_vport bad"
+			" information header 0x%x 0x%x\n",
+			le32_to_cpu(vport_info->signature),
+			le32_to_cpu(vport_info->rev) & VPORT_INFO_REV_MASK);
+
+		goto out;
+	}
+
+	shost = lpfc_shost_from_vport(phba->pport);
+
+	for (i = 0; i < MAX_STATIC_VPORT_COUNT; i++) {
+		memset(&vport_id, 0, sizeof(vport_id));
+		vport_id.port_name = wwn_to_u64(vport_info->vport_list[i].wwpn);
+		vport_id.node_name = wwn_to_u64(vport_info->vport_list[i].wwnn);
+		if (!vport_id.port_name || !vport_id.node_name)
+			continue;
+
+		vport_id.roles = FC_PORT_ROLE_FCP_INITIATOR;
+		vport_id.vport_type = FC_PORTTYPE_NPIV;
+		vport_id.disable = false;
+		new_fc_vport = fc_vport_create(shost, 0, &vport_id);
+
+		if (!new_fc_vport) {
+			lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"0546 lpfc_create_static_vport failed to"
+				" create vport \n");
+			continue;
+		}
+
+		vport = *(struct lpfc_vport **)new_fc_vport->dd_data;
+		vport->vport_flag |= STATIC_VPORT;
+	}
+
+out:
+	/*
+	 * If this is timed out command, setting NULL to context2 tell SLI
+	 * layer not to use this buffer.
+	 */
+	spin_lock_irq(&phba->hbalock);
+	pmb->context2 = NULL;
+	spin_unlock_irq(&phba->hbalock);
+	kfree(vport_info);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(pmb, phba->mbox_mem_pool);
+
+	return;
+}
+
 /*
  * This routine handles processing a Fabric REG_LOGIN mailbox
  * command upon completion. It is setup in the LPFC_MBOXQ
@@ -1426,16 +2211,17 @@
 lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
 	struct lpfc_vport *vport = pmb->vport;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
 	struct lpfc_nodelist *ndlp;
-	struct lpfc_vport **vports;
-	int i;
 
 	ndlp = (struct lpfc_nodelist *) pmb->context2;
 	pmb->context1 = NULL;
 	pmb->context2 = NULL;
 	if (mb->mbxStatus) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_MBOX,
+				 "0258 Register Fabric login error: 0x%x\n",
+				 mb->mbxStatus);
 		lpfc_mbuf_free(phba, mp->virt, mp->phys);
 		kfree(mp);
 		mempool_free(pmb, phba->mbox_mem_pool);
@@ -1454,9 +2240,6 @@
 		}
 
 		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_MBOX,
-				 "0258 Register Fabric login error: 0x%x\n",
-				 mb->mbxStatus);
 		/* Decrement the reference count to ndlp after the reference
 		 * to the ndlp are done.
 		 */
@@ -1465,34 +2248,12 @@
 	}
 
 	ndlp->nlp_rpi = mb->un.varWords[0];
+	ndlp->nlp_flag |= NLP_RPI_VALID;
 	ndlp->nlp_type |= NLP_FABRIC;
 	lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
 
 	if (vport->port_state == LPFC_FABRIC_CFG_LINK) {
-		vports = lpfc_create_vport_work_array(phba);
-		if (vports != NULL)
-			for(i = 0;
-			    i <= phba->max_vpi && vports[i] != NULL;
-			    i++) {
-				if (vports[i]->port_type == LPFC_PHYSICAL_PORT)
-					continue;
-				if (phba->fc_topology == TOPOLOGY_LOOP) {
-					lpfc_vport_set_state(vports[i],
-							FC_VPORT_LINKDOWN);
-					continue;
-				}
-				if (phba->link_flag & LS_NPIV_FAB_SUPPORTED)
-					lpfc_initial_fdisc(vports[i]);
-				else {
-					lpfc_vport_set_state(vports[i],
-						FC_VPORT_NO_FABRIC_SUPP);
-					lpfc_printf_vlog(vport, KERN_ERR,
-							 LOG_ELS,
-							"0259 No NPIV "
-							"Fabric support\n");
-				}
-			}
-		lpfc_destroy_vport_work_array(phba, vports);
+		lpfc_start_fdiscs(phba);
 		lpfc_do_scr_ns_plogi(phba, vport);
 	}
 
@@ -1516,13 +2277,16 @@
 void
 lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
 	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
 	struct lpfc_vport *vport = pmb->vport;
 
 	if (mb->mbxStatus) {
 out:
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
+				 "0260 Register NameServer error: 0x%x\n",
+				 mb->mbxStatus);
 		/* decrement the node reference count held for this
 		 * callback function.
 		 */
@@ -1546,15 +2310,13 @@
 			return;
 		}
 		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
-				 "0260 Register NameServer error: 0x%x\n",
-				 mb->mbxStatus);
 		return;
 	}
 
 	pmb->context1 = NULL;
 
 	ndlp->nlp_rpi = mb->un.varWords[0];
+	ndlp->nlp_flag |= NLP_RPI_VALID;
 	ndlp->nlp_type |= NLP_FABRIC;
 	lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
 
@@ -2055,7 +2817,7 @@
 	if (pring->ringno == LPFC_ELS_RING) {
 		switch (icmd->ulpCommand) {
 		case CMD_GEN_REQUEST64_CR:
-			if (icmd->ulpContext == (volatile ushort)ndlp->nlp_rpi)
+			if (iocb->context_un.ndlp == ndlp)
 				return 1;
 		case CMD_ELS_REQUEST64_CR:
 			if (icmd->un.elsreq64.remoteID == ndlp->nlp_DID)
@@ -2102,7 +2864,7 @@
 	 */
 	psli = &phba->sli;
 	rpi = ndlp->nlp_rpi;
-	if (rpi) {
+	if (ndlp->nlp_flag & NLP_RPI_VALID) {
 		/* Now process each ring */
 		for (i = 0; i < psli->num_rings; i++) {
 			pring = &psli->ring[i];
@@ -2150,7 +2912,7 @@
 	LPFC_MBOXQ_t    *mbox;
 	int rc;
 
-	if (ndlp->nlp_rpi) {
+	if (ndlp->nlp_flag & NLP_RPI_VALID) {
 		mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 		if (mbox) {
 			lpfc_unreg_login(phba, vport->vpi, ndlp->nlp_rpi, mbox);
@@ -2162,6 +2924,7 @@
 		}
 		lpfc_no_rpi(phba, ndlp);
 		ndlp->nlp_rpi = 0;
+		ndlp->nlp_flag &= ~NLP_RPI_VALID;
 		return 1;
 	}
 	return 0;
@@ -2252,7 +3015,7 @@
 
 	/* cleanup any ndlp on mbox q waiting for reglogin cmpl */
 	if ((mb = phba->sli.mbox_active)) {
-		if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
+		if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
 		   (ndlp == (struct lpfc_nodelist *) mb->context2)) {
 			mb->context2 = NULL;
 			mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
@@ -2261,7 +3024,7 @@
 
 	spin_lock_irq(&phba->hbalock);
 	list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
-		if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
+		if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
 		    (ndlp == (struct lpfc_nodelist *) mb->context2)) {
 			mp = (struct lpfc_dmabuf *) (mb->context1);
 			if (mp) {
@@ -2309,13 +3072,14 @@
 	int rc;
 
 	lpfc_cancel_retry_delay_tmo(vport, ndlp);
-	if (ndlp->nlp_flag & NLP_DEFER_RM && !ndlp->nlp_rpi) {
+	if ((ndlp->nlp_flag & NLP_DEFER_RM) &&
+	    !(ndlp->nlp_flag & NLP_RPI_VALID)) {
 		/* For this case we need to cleanup the default rpi
 		 * allocated by the firmware.
 		 */
 		if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))
 			!= NULL) {
-			rc = lpfc_reg_login(phba, vport->vpi, ndlp->nlp_DID,
+			rc = lpfc_reg_rpi(phba, vport->vpi, ndlp->nlp_DID,
 			    (uint8_t *) &vport->fc_sparam, mbox, 0);
 			if (rc) {
 				mempool_free(mbox, phba->mbox_mem_pool);
@@ -2553,7 +3317,8 @@
 	 * clear_la then don't send it.
 	 */
 	if ((phba->link_state >= LPFC_CLEAR_LA) ||
-	    (vport->port_type != LPFC_PHYSICAL_PORT))
+	    (vport->port_type != LPFC_PHYSICAL_PORT) ||
+		(phba->sli_rev == LPFC_SLI_REV4))
 		return;
 
 			/* Link up discovery */
@@ -2582,7 +3347,7 @@
 
 	regvpimbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 	if (regvpimbox) {
-		lpfc_reg_vpi(phba, vport->vpi, vport->fc_myDID, regvpimbox);
+		lpfc_reg_vpi(vport, regvpimbox);
 		regvpimbox->mbox_cmpl = lpfc_mbx_cmpl_reg_vpi;
 		regvpimbox->vport = vport;
 		if (lpfc_sli_issue_mbox(phba, regvpimbox, MBX_NOWAIT)
@@ -2642,7 +3407,8 @@
 	 */
 	if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
 	    !(vport->fc_flag & FC_PT2PT) &&
-	    !(vport->fc_flag & FC_RSCN_MODE)) {
+	    !(vport->fc_flag & FC_RSCN_MODE) &&
+	    (phba->sli_rev < LPFC_SLI_REV4)) {
 		lpfc_issue_reg_vpi(phba, vport);
 		return;
 	}
@@ -2919,11 +3685,13 @@
 		 * set port_state to PORT_READY if SLI2.
 		 * cmpl_reg_vpi will set port_state to READY for SLI3.
 		 */
-		if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
-			lpfc_issue_reg_vpi(phba, vport);
-		else  {	/* NPIV Not enabled */
-			lpfc_issue_clear_la(phba, vport);
-			vport->port_state = LPFC_VPORT_READY;
+		if (phba->sli_rev < LPFC_SLI_REV4) {
+			if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
+				lpfc_issue_reg_vpi(phba, vport);
+			else  {	/* NPIV Not enabled */
+				lpfc_issue_clear_la(phba, vport);
+				vport->port_state = LPFC_VPORT_READY;
+			}
 		}
 
 		/* Setup and issue mailbox INITIALIZE LINK command */
@@ -2939,7 +3707,7 @@
 		lpfc_linkdown(phba);
 		lpfc_init_link(phba, initlinkmbox, phba->cfg_topology,
 			       phba->cfg_link_speed);
-		initlinkmbox->mb.un.varInitLnk.lipsr_AL_PA = 0;
+		initlinkmbox->u.mb.un.varInitLnk.lipsr_AL_PA = 0;
 		initlinkmbox->vport = vport;
 		initlinkmbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
 		rc = lpfc_sli_issue_mbox(phba, initlinkmbox, MBX_NOWAIT);
@@ -2959,11 +3727,13 @@
 		 * set port_state to PORT_READY if SLI2.
 		 * cmpl_reg_vpi will set port_state to READY for SLI3.
 		 */
-		if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
-			lpfc_issue_reg_vpi(phba, vport);
-		else {	/* NPIV Not enabled */
-			lpfc_issue_clear_la(phba, vport);
-			vport->port_state = LPFC_VPORT_READY;
+		if (phba->sli_rev < LPFC_SLI_REV4) {
+			if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
+				lpfc_issue_reg_vpi(phba, vport);
+			else  {	/* NPIV Not enabled */
+				lpfc_issue_clear_la(phba, vport);
+				vport->port_state = LPFC_VPORT_READY;
+			}
 		}
 		break;
 
@@ -3036,7 +3806,7 @@
 void
 lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_dmabuf   *mp = (struct lpfc_dmabuf *) (pmb->context1);
 	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
 	struct lpfc_vport    *vport = pmb->vport;
@@ -3044,6 +3814,7 @@
 	pmb->context1 = NULL;
 
 	ndlp->nlp_rpi = mb->un.varWords[0];
+	ndlp->nlp_flag |= NLP_RPI_VALID;
 	ndlp->nlp_type |= NLP_FABRIC;
 	lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
 
@@ -3297,3 +4068,395 @@
 			return 1;
 	return 0;
 }
+
+/**
+ * lpfc_fcf_inuse - Check if FCF can be unregistered.
+ * @phba: Pointer to hba context object.
+ *
+ * This function iterate through all FC nodes associated
+ * will all vports to check if there is any node with
+ * fc_rports associated with it. If there is an fc_rport
+ * associated with the node, then the node is either in
+ * discovered state or its devloss_timer is pending.
+ */
+static int
+lpfc_fcf_inuse(struct lpfc_hba *phba)
+{
+	struct lpfc_vport **vports;
+	int i, ret = 0;
+	struct lpfc_nodelist *ndlp;
+	struct Scsi_Host  *shost;
+
+	vports = lpfc_create_vport_work_array(phba);
+
+	for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
+		shost = lpfc_shost_from_vport(vports[i]);
+		spin_lock_irq(shost->host_lock);
+		list_for_each_entry(ndlp, &vports[i]->fc_nodes, nlp_listp) {
+			if (NLP_CHK_NODE_ACT(ndlp) && ndlp->rport &&
+			  (ndlp->rport->roles & FC_RPORT_ROLE_FCP_TARGET)) {
+				ret = 1;
+				spin_unlock_irq(shost->host_lock);
+				goto out;
+			}
+		}
+		spin_unlock_irq(shost->host_lock);
+	}
+out:
+	lpfc_destroy_vport_work_array(phba, vports);
+	return ret;
+}
+
+/**
+ * lpfc_unregister_vfi_cmpl - Completion handler for unreg vfi.
+ * @phba: Pointer to hba context object.
+ * @mboxq: Pointer to mailbox object.
+ *
+ * This function frees memory associated with the mailbox command.
+ */
+static void
+lpfc_unregister_vfi_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
+{
+	struct lpfc_vport *vport = mboxq->vport;
+
+	if (mboxq->u.mb.mbxStatus) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY|LOG_MBOX,
+			"2555 UNREG_VFI mbxStatus error x%x "
+			"HBA state x%x\n",
+			mboxq->u.mb.mbxStatus, vport->port_state);
+	}
+	mempool_free(mboxq, phba->mbox_mem_pool);
+	return;
+}
+
+/**
+ * lpfc_unregister_fcfi_cmpl - Completion handler for unreg fcfi.
+ * @phba: Pointer to hba context object.
+ * @mboxq: Pointer to mailbox object.
+ *
+ * This function frees memory associated with the mailbox command.
+ */
+static void
+lpfc_unregister_fcfi_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
+{
+	struct lpfc_vport *vport = mboxq->vport;
+
+	if (mboxq->u.mb.mbxStatus) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY|LOG_MBOX,
+			"2550 UNREG_FCFI mbxStatus error x%x "
+			"HBA state x%x\n",
+			mboxq->u.mb.mbxStatus, vport->port_state);
+	}
+	mempool_free(mboxq, phba->mbox_mem_pool);
+	return;
+}
+
+/**
+ * lpfc_unregister_unused_fcf - Unregister FCF if all devices are disconnected.
+ * @phba: Pointer to hba context object.
+ *
+ * This function check if there are any connected remote port for the FCF and
+ * if all the devices are disconnected, this function unregister FCFI.
+ * This function also tries to use another FCF for discovery.
+ */
+void
+lpfc_unregister_unused_fcf(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *mbox;
+	int rc;
+	struct lpfc_vport **vports;
+	int i;
+
+	spin_lock_irq(&phba->hbalock);
+	/*
+	 * If HBA is not running in FIP mode or
+	 * If HBA does not support FCoE or
+	 * If FCF is not registered.
+	 * do nothing.
+	 */
+	if (!(phba->hba_flag & HBA_FCOE_SUPPORT) ||
+		!(phba->fcf.fcf_flag & FCF_REGISTERED) ||
+		(phba->cfg_enable_fip == 0)) {
+		spin_unlock_irq(&phba->hbalock);
+		return;
+	}
+	spin_unlock_irq(&phba->hbalock);
+
+	if (lpfc_fcf_inuse(phba))
+		return;
+
+
+	/* Unregister VPIs */
+	vports = lpfc_create_vport_work_array(phba);
+	if (vports &&
+		(phba->sli3_options & LPFC_SLI3_NPIV_ENABLED))
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
+			lpfc_mbx_unreg_vpi(vports[i]);
+			vports[i]->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
+			vports[i]->vfi_state &= ~LPFC_VFI_REGISTERED;
+		}
+	lpfc_destroy_vport_work_array(phba, vports);
+
+	/* Unregister VFI */
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY|LOG_MBOX,
+			"2556 UNREG_VFI mbox allocation failed"
+			"HBA state x%x\n",
+			phba->pport->port_state);
+		return;
+	}
+
+	lpfc_unreg_vfi(mbox, phba->pport->vfi);
+	mbox->vport = phba->pport;
+	mbox->mbox_cmpl = lpfc_unregister_vfi_cmpl;
+
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
+	if (rc == MBX_NOT_FINISHED) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY|LOG_MBOX,
+			"2557 UNREG_VFI issue mbox failed rc x%x "
+			"HBA state x%x\n",
+			rc, phba->pport->port_state);
+		mempool_free(mbox, phba->mbox_mem_pool);
+		return;
+	}
+
+	/* Unregister FCF */
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY|LOG_MBOX,
+			"2551 UNREG_FCFI mbox allocation failed"
+			"HBA state x%x\n",
+			phba->pport->port_state);
+		return;
+	}
+
+	lpfc_unreg_fcfi(mbox, phba->fcf.fcfi);
+	mbox->vport = phba->pport;
+	mbox->mbox_cmpl = lpfc_unregister_fcfi_cmpl;
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
+
+	if (rc == MBX_NOT_FINISHED) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY|LOG_MBOX,
+			"2552 UNREG_FCFI issue mbox failed rc x%x "
+			"HBA state x%x\n",
+			rc, phba->pport->port_state);
+		mempool_free(mbox, phba->mbox_mem_pool);
+		return;
+	}
+
+	spin_lock_irq(&phba->hbalock);
+	phba->fcf.fcf_flag &= ~(FCF_AVAILABLE | FCF_REGISTERED |
+		FCF_DISCOVERED | FCF_BOOT_ENABLE | FCF_IN_USE |
+		FCF_VALID_VLAN);
+	spin_unlock_irq(&phba->hbalock);
+
+	/*
+	 * If driver is not unloading, check if there is any other
+	 * FCF record that can be used for discovery.
+	 */
+	if ((phba->pport->load_flag & FC_UNLOADING) ||
+		(phba->link_state < LPFC_LINK_UP))
+		return;
+
+	rc = lpfc_sli4_read_fcf_record(phba, LPFC_FCOE_FCF_GET_FIRST);
+
+	if (rc)
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY|LOG_MBOX,
+			"2553 lpfc_unregister_unused_fcf failed to read FCF"
+			" record HBA state x%x\n",
+			phba->pport->port_state);
+}
+
+/**
+ * lpfc_read_fcf_conn_tbl - Create driver FCF connection table.
+ * @phba: Pointer to hba context object.
+ * @buff: Buffer containing the FCF connection table as in the config
+ *         region.
+ * This function create driver data structure for the FCF connection
+ * record table read from config region 23.
+ */
+static void
+lpfc_read_fcf_conn_tbl(struct lpfc_hba *phba,
+	uint8_t *buff)
+{
+	struct lpfc_fcf_conn_entry *conn_entry, *next_conn_entry;
+	struct lpfc_fcf_conn_hdr *conn_hdr;
+	struct lpfc_fcf_conn_rec *conn_rec;
+	uint32_t record_count;
+	int i;
+
+	/* Free the current connect table */
+	list_for_each_entry_safe(conn_entry, next_conn_entry,
+		&phba->fcf_conn_rec_list, list)
+		kfree(conn_entry);
+
+	conn_hdr = (struct lpfc_fcf_conn_hdr *) buff;
+	record_count = conn_hdr->length * sizeof(uint32_t)/
+		sizeof(struct lpfc_fcf_conn_rec);
+
+	conn_rec = (struct lpfc_fcf_conn_rec *)
+		(buff + sizeof(struct lpfc_fcf_conn_hdr));
+
+	for (i = 0; i < record_count; i++) {
+		if (!(conn_rec[i].flags & FCFCNCT_VALID))
+			continue;
+		conn_entry = kzalloc(sizeof(struct lpfc_fcf_conn_entry),
+			GFP_KERNEL);
+		if (!conn_entry) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2566 Failed to allocate connection"
+				" table entry\n");
+			return;
+		}
+
+		memcpy(&conn_entry->conn_rec, &conn_rec[i],
+			sizeof(struct lpfc_fcf_conn_rec));
+		conn_entry->conn_rec.vlan_tag =
+			le16_to_cpu(conn_entry->conn_rec.vlan_tag) & 0xFFF;
+		conn_entry->conn_rec.flags =
+			le16_to_cpu(conn_entry->conn_rec.flags);
+		list_add_tail(&conn_entry->list,
+			&phba->fcf_conn_rec_list);
+	}
+}
+
+/**
+ * lpfc_read_fcoe_param - Read FCoe parameters from conf region..
+ * @phba: Pointer to hba context object.
+ * @buff: Buffer containing the FCoE parameter data structure.
+ *
+ *  This function update driver data structure with config
+ *  parameters read from config region 23.
+ */
+static void
+lpfc_read_fcoe_param(struct lpfc_hba *phba,
+			uint8_t *buff)
+{
+	struct lpfc_fip_param_hdr *fcoe_param_hdr;
+	struct lpfc_fcoe_params *fcoe_param;
+
+	fcoe_param_hdr = (struct lpfc_fip_param_hdr *)
+		buff;
+	fcoe_param = (struct lpfc_fcoe_params *)
+		buff + sizeof(struct lpfc_fip_param_hdr);
+
+	if ((fcoe_param_hdr->parm_version != FIPP_VERSION) ||
+		(fcoe_param_hdr->length != FCOE_PARAM_LENGTH))
+		return;
+
+	if (bf_get(lpfc_fip_param_hdr_fipp_mode, fcoe_param_hdr) ==
+			FIPP_MODE_ON)
+		phba->cfg_enable_fip = 1;
+
+	if (bf_get(lpfc_fip_param_hdr_fipp_mode, fcoe_param_hdr) ==
+		FIPP_MODE_OFF)
+		phba->cfg_enable_fip = 0;
+
+	if (fcoe_param_hdr->parm_flags & FIPP_VLAN_VALID) {
+		phba->valid_vlan = 1;
+		phba->vlan_id = le16_to_cpu(fcoe_param->vlan_tag) &
+			0xFFF;
+	}
+
+	phba->fc_map[0] = fcoe_param->fc_map[0];
+	phba->fc_map[1] = fcoe_param->fc_map[1];
+	phba->fc_map[2] = fcoe_param->fc_map[2];
+	return;
+}
+
+/**
+ * lpfc_get_rec_conf23 - Get a record type in config region data.
+ * @buff: Buffer containing config region 23 data.
+ * @size: Size of the data buffer.
+ * @rec_type: Record type to be searched.
+ *
+ * This function searches config region data to find the begining
+ * of the record specified by record_type. If record found, this
+ * function return pointer to the record else return NULL.
+ */
+static uint8_t *
+lpfc_get_rec_conf23(uint8_t *buff, uint32_t size, uint8_t rec_type)
+{
+	uint32_t offset = 0, rec_length;
+
+	if ((buff[0] == LPFC_REGION23_LAST_REC) ||
+		(size < sizeof(uint32_t)))
+		return NULL;
+
+	rec_length = buff[offset + 1];
+
+	/*
+	 * One TLV record has one word header and number of data words
+	 * specified in the rec_length field of the record header.
+	 */
+	while ((offset + rec_length * sizeof(uint32_t) + sizeof(uint32_t))
+		<= size) {
+		if (buff[offset] == rec_type)
+			return &buff[offset];
+
+		if (buff[offset] == LPFC_REGION23_LAST_REC)
+			return NULL;
+
+		offset += rec_length * sizeof(uint32_t) + sizeof(uint32_t);
+		rec_length = buff[offset + 1];
+	}
+	return NULL;
+}
+
+/**
+ * lpfc_parse_fcoe_conf - Parse FCoE config data read from config region 23.
+ * @phba: Pointer to lpfc_hba data structure.
+ * @buff: Buffer containing config region 23 data.
+ * @size: Size of the data buffer.
+ *
+ * This fuction parse the FCoE config parameters in config region 23 and
+ * populate driver data structure with the parameters.
+ */
+void
+lpfc_parse_fcoe_conf(struct lpfc_hba *phba,
+		uint8_t *buff,
+		uint32_t size)
+{
+	uint32_t offset = 0, rec_length;
+	uint8_t *rec_ptr;
+
+	/*
+	 * If data size is less than 2 words signature and version cannot be
+	 * verified.
+	 */
+	if (size < 2*sizeof(uint32_t))
+		return;
+
+	/* Check the region signature first */
+	if (memcmp(buff, LPFC_REGION23_SIGNATURE, 4)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"2567 Config region 23 has bad signature\n");
+		return;
+	}
+
+	offset += 4;
+
+	/* Check the data structure version */
+	if (buff[offset] != LPFC_REGION23_VERSION) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"2568 Config region 23 has bad version\n");
+		return;
+	}
+	offset += 4;
+
+	rec_length = buff[offset + 1];
+
+	/* Read FCoE param record */
+	rec_ptr = lpfc_get_rec_conf23(&buff[offset],
+			size - offset, FCOE_PARAM_TYPE);
+	if (rec_ptr)
+		lpfc_read_fcoe_param(phba, rec_ptr);
+
+	/* Read FCF connection table */
+	rec_ptr = lpfc_get_rec_conf23(&buff[offset],
+		size - offset, FCOE_CONN_TBL_TYPE);
+	if (rec_ptr)
+		lpfc_read_fcf_conn_tbl(phba, rec_ptr);
+
+}
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 4168c7b..02aa016 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -471,6 +471,35 @@
 };
 
 /*
+ * Virtual Fabric Tagging Header
+ */
+struct fc_vft_header {
+	 uint32_t word0;
+#define fc_vft_hdr_r_ctl_SHIFT		24
+#define fc_vft_hdr_r_ctl_MASK		0xFF
+#define fc_vft_hdr_r_ctl_WORD		word0
+#define fc_vft_hdr_ver_SHIFT		22
+#define fc_vft_hdr_ver_MASK		0x3
+#define fc_vft_hdr_ver_WORD		word0
+#define fc_vft_hdr_type_SHIFT		18
+#define fc_vft_hdr_type_MASK		0xF
+#define fc_vft_hdr_type_WORD		word0
+#define fc_vft_hdr_e_SHIFT		16
+#define fc_vft_hdr_e_MASK		0x1
+#define fc_vft_hdr_e_WORD		word0
+#define fc_vft_hdr_priority_SHIFT	13
+#define fc_vft_hdr_priority_MASK	0x7
+#define fc_vft_hdr_priority_WORD	word0
+#define fc_vft_hdr_vf_id_SHIFT		1
+#define fc_vft_hdr_vf_id_MASK		0xFFF
+#define fc_vft_hdr_vf_id_WORD		word0
+	uint32_t word1;
+#define fc_vft_hdr_hopct_SHIFT		24
+#define fc_vft_hdr_hopct_MASK		0xFF
+#define fc_vft_hdr_hopct_WORD		word1
+};
+
+/*
  *  Extended Link Service LS_COMMAND codes (Payload Word 0)
  */
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -1152,6 +1181,9 @@
 #define PCI_DEVICE_ID_HORNET        0xfe05
 #define PCI_DEVICE_ID_ZEPHYR_SCSP   0xfe11
 #define PCI_DEVICE_ID_ZEPHYR_DCSP   0xfe12
+#define PCI_VENDOR_ID_SERVERENGINE  0x19a2
+#define PCI_DEVICE_ID_TIGERSHARK    0x0704
+#define PCI_DEVICE_ID_TIGERSHARK_S  0x0705
 
 #define JEDEC_ID_ADDRESS            0x0080001c
 #define FIREFLY_JEDEC_ID            0x1ACC
@@ -1342,15 +1374,21 @@
 #define MBX_READ_LA64       0x95
 #define MBX_REG_VPI	    0x96
 #define MBX_UNREG_VPI	    0x97
-#define MBX_REG_VNPID	    0x96
-#define MBX_UNREG_VNPID	    0x97
 
 #define MBX_WRITE_WWN       0x98
 #define MBX_SET_DEBUG       0x99
 #define MBX_LOAD_EXP_ROM    0x9C
-
-#define MBX_MAX_CMDS        0x9D
+#define MBX_SLI4_CONFIG	    0x9B
+#define MBX_SLI4_REQ_FTRS   0x9D
+#define MBX_MAX_CMDS        0x9E
+#define MBX_RESUME_RPI      0x9E
 #define MBX_SLI2_CMD_MASK   0x80
+#define MBX_REG_VFI         0x9F
+#define MBX_REG_FCFI        0xA0
+#define MBX_UNREG_VFI       0xA1
+#define MBX_UNREG_FCFI	    0xA2
+#define MBX_INIT_VFI        0xA3
+#define MBX_INIT_VPI        0xA4
 
 /* IOCB Commands */
 
@@ -1440,6 +1478,16 @@
 #define CMD_IOCB_LOGENTRY_CN		0x94
 #define CMD_IOCB_LOGENTRY_ASYNC_CN	0x96
 
+/* Unhandled Data Security SLI Commands */
+#define DSSCMD_IWRITE64_CR 		0xD8
+#define DSSCMD_IWRITE64_CX		0xD9
+#define DSSCMD_IREAD64_CR		0xDA
+#define DSSCMD_IREAD64_CX		0xDB
+#define DSSCMD_INVALIDATE_DEK		0xDC
+#define DSSCMD_SET_KEK			0xDD
+#define DSSCMD_GET_KEK_ID		0xDE
+#define DSSCMD_GEN_XFER			0xDF
+
 #define CMD_MAX_IOCB_CMD        0xE6
 #define CMD_IOCB_MASK           0xff
 
@@ -1466,6 +1514,7 @@
 #define MBXERR_BAD_RCV_LENGTH       14
 #define MBXERR_DMA_ERROR            15
 #define MBXERR_ERROR                16
+#define MBXERR_LINK_DOWN            0x33
 #define MBX_NOT_FINISHED           255
 
 #define MBX_BUSY                   0xffffff /* Attempted cmd to busy Mailbox */
@@ -1504,32 +1553,6 @@
 #endif
 };
 
-struct ulp_bde64 {	/* SLI-2 */
-	union ULP_BDE_TUS {
-		uint32_t w;
-		struct {
-#ifdef __BIG_ENDIAN_BITFIELD
-			uint32_t bdeFlags:8;	/* BDE Flags 0 IS A SUPPORTED
-						   VALUE !! */
-			uint32_t bdeSize:24;	/* Size of buffer (in bytes) */
-#else	/*  __LITTLE_ENDIAN_BITFIELD */
-			uint32_t bdeSize:24;	/* Size of buffer (in bytes) */
-			uint32_t bdeFlags:8;	/* BDE Flags 0 IS A SUPPORTED
-						   VALUE !! */
-#endif
-#define BUFF_TYPE_BDE_64    0x00	/* BDE (Host_resident) */
-#define BUFF_TYPE_BDE_IMMED 0x01	/* Immediate Data BDE */
-#define BUFF_TYPE_BDE_64P   0x02	/* BDE (Port-resident) */
-#define BUFF_TYPE_BDE_64I   0x08	/* Input BDE (Host-resident) */
-#define BUFF_TYPE_BDE_64IP  0x0A	/* Input BDE (Port-resident) */
-#define BUFF_TYPE_BLP_64    0x40	/* BLP (Host-resident) */
-#define BUFF_TYPE_BLP_64P   0x42	/* BLP (Port-resident) */
-		} f;
-	} tus;
-	uint32_t addrLow;
-	uint32_t addrHigh;
-};
-
 typedef struct ULP_BDL {	/* SLI-2 */
 #ifdef __BIG_ENDIAN_BITFIELD
 	uint32_t bdeFlags:8;	/* BDL Flags */
@@ -2287,7 +2310,7 @@
 	uint32_t rsvd3;
 	uint32_t rsvd4;
 	uint32_t rsvd5;
-	uint16_t rsvd6;
+	uint16_t vfi;
 	uint16_t vpi;
 #else	/*  __LITTLE_ENDIAN */
 	uint32_t rsvd1;
@@ -2297,7 +2320,7 @@
 	uint32_t rsvd4;
 	uint32_t rsvd5;
 	uint16_t vpi;
-	uint16_t rsvd6;
+	uint16_t vfi;
 #endif
 } REG_VPI_VAR;
 
@@ -2457,7 +2480,7 @@
 	uint32_t entry_index:16;
 #endif
 
-	uint32_t rsvd1;
+	uint32_t sli4_length;
 	uint32_t word_cnt;
 	uint32_t resp_offset;
 } DUMP_VAR;
@@ -2470,9 +2493,32 @@
 #define  DMP_RSP_OFFSET          0x14   /* word 5 contains first word of rsp */
 #define  DMP_RSP_SIZE            0x6C   /* maximum of 27 words of rsp data */
 
+#define  DMP_REGION_VPORT	 0x16   /* VPort info region */
+#define  DMP_VPORT_REGION_SIZE	 0x200
+#define  DMP_MBOX_OFFSET_WORD	 0x5
+
+#define  DMP_REGION_FCOEPARAM	 0x17   /* fcoe param region */
+#define  DMP_FCOEPARAM_RGN_SIZE	 0x400
+
 #define  WAKE_UP_PARMS_REGION_ID    4
 #define  WAKE_UP_PARMS_WORD_SIZE   15
 
+struct vport_rec {
+	uint8_t wwpn[8];
+	uint8_t wwnn[8];
+};
+
+#define VPORT_INFO_SIG 0x32324752
+#define VPORT_INFO_REV_MASK 0xff
+#define VPORT_INFO_REV 0x1
+#define MAX_STATIC_VPORT_COUNT 16
+struct static_vport_info {
+	uint32_t 		signature;
+	uint32_t		rev;
+	struct vport_rec 	vport_list[MAX_STATIC_VPORT_COUNT];
+	uint32_t		resvd[66];
+};
+
 /* Option rom version structure */
 struct prog_id {
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -2697,7 +2743,9 @@
 #endif
 
 #ifdef __BIG_ENDIAN_BITFIELD
-	uint32_t rsvd1     : 23;  /* Reserved                             */
+	uint32_t rsvd1     : 19;  /* Reserved                             */
+	uint32_t cdss      :  1;  /* Configure Data Security SLI          */
+	uint32_t rsvd2     :  3;  /* Reserved                             */
 	uint32_t cbg       :  1;  /* Configure BlockGuard                 */
 	uint32_t cmv       :  1;  /* Configure Max VPIs                   */
 	uint32_t ccrp      :  1;  /* Config Command Ring Polling          */
@@ -2717,10 +2765,14 @@
 	uint32_t ccrp      :  1;  /* Config Command Ring Polling          */
 	uint32_t cmv	   :  1;  /* Configure Max VPIs                   */
 	uint32_t cbg       :  1;  /* Configure BlockGuard                 */
-	uint32_t rsvd1     : 23;  /* Reserved                             */
+	uint32_t rsvd2     :  3;  /* Reserved                             */
+	uint32_t cdss      :  1;  /* Configure Data Security SLI          */
+	uint32_t rsvd1     : 19;  /* Reserved                             */
 #endif
 #ifdef __BIG_ENDIAN_BITFIELD
-	uint32_t rsvd2     : 23;  /* Reserved                             */
+	uint32_t rsvd3     : 19;  /* Reserved                             */
+	uint32_t gdss      :  1;  /* Configure Data Security SLI          */
+	uint32_t rsvd4     :  3;  /* Reserved                             */
 	uint32_t gbg       :  1;  /* Grant BlockGuard                     */
 	uint32_t gmv	   :  1;  /* Grant Max VPIs                       */
 	uint32_t gcrp	   :  1;  /* Grant Command Ring Polling           */
@@ -2740,7 +2792,9 @@
 	uint32_t gcrp	   :  1;  /* Grant Command Ring Polling           */
 	uint32_t gmv	   :  1;  /* Grant Max VPIs                       */
 	uint32_t gbg       :  1;  /* Grant BlockGuard                     */
-	uint32_t rsvd2     : 23;  /* Reserved                             */
+	uint32_t rsvd4     :  3;  /* Reserved                             */
+	uint32_t gdss      :  1;  /* Configure Data Security SLI          */
+	uint32_t rsvd3     : 19;  /* Reserved                             */
 #endif
 
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -2753,20 +2807,20 @@
 
 #ifdef __BIG_ENDIAN_BITFIELD
 	uint32_t max_hbq   : 16;  /* Max HBQs Host expect to configure    */
-	uint32_t rsvd3     : 16;  /* Max HBQs Host expect to configure    */
+	uint32_t rsvd5     : 16;  /* Max HBQs Host expect to configure    */
 #else	/*  __LITTLE_ENDIAN */
-	uint32_t rsvd3     : 16;  /* Max HBQs Host expect to configure    */
+	uint32_t rsvd5     : 16;  /* Max HBQs Host expect to configure    */
 	uint32_t max_hbq   : 16;  /* Max HBQs Host expect to configure    */
 #endif
 
-	uint32_t rsvd4;           /* Reserved                             */
+	uint32_t rsvd6;           /* Reserved                             */
 
 #ifdef __BIG_ENDIAN_BITFIELD
-	uint32_t rsvd5      : 16;  /* Reserved                             */
+	uint32_t rsvd7      : 16;  /* Reserved                             */
 	uint32_t max_vpi    : 16;  /* Max number of virt N-Ports           */
 #else	/*  __LITTLE_ENDIAN */
 	uint32_t max_vpi    : 16;  /* Max number of virt N-Ports           */
-	uint32_t rsvd5      : 16;  /* Reserved                             */
+	uint32_t rsvd7      : 16;  /* Reserved                             */
 #endif
 
 } CONFIG_PORT_VAR;
@@ -3666,3 +3720,5 @@
 #define MENLO_TIMEOUT 30
 #define SETVAR_MLOMNT 0x103107
 #define SETVAR_MLORST 0x103007
+
+#define BPL_ALIGN_SZ 8 /* 8 byte alignment for bpl and mbufs */
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
new file mode 100644
index 0000000..39c34b3
--- /dev/null
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -0,0 +1,2141 @@
+/*******************************************************************
+ * This file is part of the Emulex Linux Device Driver for         *
+ * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2009 Emulex.  All rights reserved.                *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *******************************************************************/
+
+/* Macros to deal with bit fields. Each bit field must have 3 #defines
+ * associated with it (_SHIFT, _MASK, and _WORD).
+ * EG. For a bit field that is in the 7th bit of the "field4" field of a
+ * structure and is 2 bits in size the following #defines must exist:
+ *	struct temp {
+ *		uint32_t	field1;
+ *		uint32_t	field2;
+ *		uint32_t	field3;
+ *		uint32_t	field4;
+ *	#define example_bit_field_SHIFT		7
+ *	#define example_bit_field_MASK		0x03
+ *	#define example_bit_field_WORD		field4
+ *		uint32_t	field5;
+ *	};
+ * Then the macros below may be used to get or set the value of that field.
+ * EG. To get the value of the bit field from the above example:
+ *	struct temp t1;
+ *	value = bf_get(example_bit_field, &t1);
+ * And then to set that bit field:
+ *	bf_set(example_bit_field, &t1, 2);
+ * Or clear that bit field:
+ *	bf_set(example_bit_field, &t1, 0);
+ */
+#define bf_get(name, ptr) \
+	(((ptr)->name##_WORD >> name##_SHIFT) & name##_MASK)
+#define bf_set(name, ptr, value) \
+	((ptr)->name##_WORD = ((((value) & name##_MASK) << name##_SHIFT) | \
+		 ((ptr)->name##_WORD & ~(name##_MASK << name##_SHIFT))))
+
+struct dma_address {
+	uint32_t addr_lo;
+	uint32_t addr_hi;
+};
+
+#define LPFC_SLI4_BAR0		1
+#define LPFC_SLI4_BAR1		2
+#define LPFC_SLI4_BAR2		4
+
+#define LPFC_SLI4_MBX_EMBED	true
+#define LPFC_SLI4_MBX_NEMBED	false
+
+#define LPFC_SLI4_MB_WORD_COUNT		64
+#define LPFC_MAX_MQ_PAGE		8
+#define LPFC_MAX_WQ_PAGE		8
+#define LPFC_MAX_CQ_PAGE		4
+#define LPFC_MAX_EQ_PAGE		8
+
+#define LPFC_VIR_FUNC_MAX       32 /* Maximum number of virtual functions */
+#define LPFC_PCI_FUNC_MAX        5 /* Maximum number of PCI functions */
+#define LPFC_VFR_PAGE_SIZE	0x1000 /* 4KB BAR2 per-VF register page size */
+
+/* Define SLI4 Alignment requirements. */
+#define LPFC_ALIGN_16_BYTE	16
+#define LPFC_ALIGN_64_BYTE	64
+
+/* Define SLI4 specific definitions. */
+#define LPFC_MQ_CQE_BYTE_OFFSET	256
+#define LPFC_MBX_CMD_HDR_LENGTH 16
+#define LPFC_MBX_ERROR_RANGE	0x4000
+#define LPFC_BMBX_BIT1_ADDR_HI	0x2
+#define LPFC_BMBX_BIT1_ADDR_LO	0
+#define LPFC_RPI_HDR_COUNT	64
+#define LPFC_HDR_TEMPLATE_SIZE	4096
+#define LPFC_RPI_ALLOC_ERROR 	0xFFFF
+#define LPFC_FCF_RECORD_WD_CNT	132
+#define LPFC_ENTIRE_FCF_DATABASE 0
+#define LPFC_DFLT_FCF_INDEX	 0
+
+/* Virtual function numbers */
+#define LPFC_VF0		0
+#define LPFC_VF1		1
+#define LPFC_VF2		2
+#define LPFC_VF3		3
+#define LPFC_VF4		4
+#define LPFC_VF5		5
+#define LPFC_VF6		6
+#define LPFC_VF7		7
+#define LPFC_VF8		8
+#define LPFC_VF9		9
+#define LPFC_VF10		10
+#define LPFC_VF11		11
+#define LPFC_VF12		12
+#define LPFC_VF13		13
+#define LPFC_VF14		14
+#define LPFC_VF15		15
+#define LPFC_VF16		16
+#define LPFC_VF17		17
+#define LPFC_VF18		18
+#define LPFC_VF19		19
+#define LPFC_VF20		20
+#define LPFC_VF21		21
+#define LPFC_VF22		22
+#define LPFC_VF23		23
+#define LPFC_VF24		24
+#define LPFC_VF25		25
+#define LPFC_VF26		26
+#define LPFC_VF27		27
+#define LPFC_VF28		28
+#define LPFC_VF29		29
+#define LPFC_VF30		30
+#define LPFC_VF31		31
+
+/* PCI function numbers */
+#define LPFC_PCI_FUNC0		0
+#define LPFC_PCI_FUNC1		1
+#define LPFC_PCI_FUNC2		2
+#define LPFC_PCI_FUNC3		3
+#define LPFC_PCI_FUNC4		4
+
+/* Active interrupt test count */
+#define LPFC_ACT_INTR_CNT	4
+
+/* Delay Multiplier constant */
+#define LPFC_DMULT_CONST       651042
+#define LPFC_MIM_IMAX          636
+#define LPFC_FP_DEF_IMAX       10000
+#define LPFC_SP_DEF_IMAX       10000
+
+struct ulp_bde64 {
+	union ULP_BDE_TUS {
+		uint32_t w;
+		struct {
+#ifdef __BIG_ENDIAN_BITFIELD
+			uint32_t bdeFlags:8;	/* BDE Flags 0 IS A SUPPORTED
+						   VALUE !! */
+			uint32_t bdeSize:24;	/* Size of buffer (in bytes) */
+#else	/*  __LITTLE_ENDIAN_BITFIELD */
+			uint32_t bdeSize:24;	/* Size of buffer (in bytes) */
+			uint32_t bdeFlags:8;	/* BDE Flags 0 IS A SUPPORTED
+						   VALUE !! */
+#endif
+#define BUFF_TYPE_BDE_64    0x00	/* BDE (Host_resident) */
+#define BUFF_TYPE_BDE_IMMED 0x01	/* Immediate Data BDE */
+#define BUFF_TYPE_BDE_64P   0x02	/* BDE (Port-resident) */
+#define BUFF_TYPE_BDE_64I   0x08	/* Input BDE (Host-resident) */
+#define BUFF_TYPE_BDE_64IP  0x0A	/* Input BDE (Port-resident) */
+#define BUFF_TYPE_BLP_64    0x40	/* BLP (Host-resident) */
+#define BUFF_TYPE_BLP_64P   0x42	/* BLP (Port-resident) */
+		} f;
+	} tus;
+	uint32_t addrLow;
+	uint32_t addrHigh;
+};
+
+struct lpfc_sli4_flags {
+	uint32_t word0;
+#define lpfc_fip_flag_SHIFT 0
+#define lpfc_fip_flag_MASK 0x00000001
+#define lpfc_fip_flag_WORD word0
+};
+
+/* event queue entry structure */
+struct lpfc_eqe {
+	uint32_t word0;
+#define lpfc_eqe_resource_id_SHIFT	16
+#define lpfc_eqe_resource_id_MASK	0x000000FF
+#define lpfc_eqe_resource_id_WORD	word0
+#define lpfc_eqe_minor_code_SHIFT	4
+#define lpfc_eqe_minor_code_MASK	0x00000FFF
+#define lpfc_eqe_minor_code_WORD	word0
+#define lpfc_eqe_major_code_SHIFT	1
+#define lpfc_eqe_major_code_MASK	0x00000007
+#define lpfc_eqe_major_code_WORD	word0
+#define lpfc_eqe_valid_SHIFT		0
+#define lpfc_eqe_valid_MASK		0x00000001
+#define lpfc_eqe_valid_WORD		word0
+};
+
+/* completion queue entry structure (common fields for all cqe types) */
+struct lpfc_cqe {
+	uint32_t reserved0;
+	uint32_t reserved1;
+	uint32_t reserved2;
+	uint32_t word3;
+#define lpfc_cqe_valid_SHIFT		31
+#define lpfc_cqe_valid_MASK		0x00000001
+#define lpfc_cqe_valid_WORD		word3
+#define lpfc_cqe_code_SHIFT		16
+#define lpfc_cqe_code_MASK		0x000000FF
+#define lpfc_cqe_code_WORD		word3
+};
+
+/* Completion Queue Entry Status Codes */
+#define CQE_STATUS_SUCCESS		0x0
+#define CQE_STATUS_FCP_RSP_FAILURE	0x1
+#define CQE_STATUS_REMOTE_STOP		0x2
+#define CQE_STATUS_LOCAL_REJECT		0x3
+#define CQE_STATUS_NPORT_RJT		0x4
+#define CQE_STATUS_FABRIC_RJT		0x5
+#define CQE_STATUS_NPORT_BSY		0x6
+#define CQE_STATUS_FABRIC_BSY		0x7
+#define CQE_STATUS_INTERMED_RSP		0x8
+#define CQE_STATUS_LS_RJT		0x9
+#define CQE_STATUS_CMD_REJECT		0xb
+#define CQE_STATUS_FCP_TGT_LENCHECK	0xc
+#define CQE_STATUS_NEED_BUFF_ENTRY	0xf
+
+/* Status returned by hardware (valid only if status = CQE_STATUS_SUCCESS). */
+#define CQE_HW_STATUS_NO_ERR		0x0
+#define CQE_HW_STATUS_UNDERRUN		0x1
+#define CQE_HW_STATUS_OVERRUN		0x2
+
+/* Completion Queue Entry Codes */
+#define CQE_CODE_COMPL_WQE		0x1
+#define CQE_CODE_RELEASE_WQE		0x2
+#define CQE_CODE_RECEIVE		0x4
+#define CQE_CODE_XRI_ABORTED		0x5
+
+/* completion queue entry for wqe completions */
+struct lpfc_wcqe_complete {
+	uint32_t word0;
+#define lpfc_wcqe_c_request_tag_SHIFT	16
+#define lpfc_wcqe_c_request_tag_MASK	0x0000FFFF
+#define lpfc_wcqe_c_request_tag_WORD	word0
+#define lpfc_wcqe_c_status_SHIFT	8
+#define lpfc_wcqe_c_status_MASK		0x000000FF
+#define lpfc_wcqe_c_status_WORD		word0
+#define lpfc_wcqe_c_hw_status_SHIFT	0
+#define lpfc_wcqe_c_hw_status_MASK	0x000000FF
+#define lpfc_wcqe_c_hw_status_WORD	word0
+	uint32_t total_data_placed;
+	uint32_t parameter;
+	uint32_t word3;
+#define lpfc_wcqe_c_valid_SHIFT		lpfc_cqe_valid_SHIFT
+#define lpfc_wcqe_c_valid_MASK		lpfc_cqe_valid_MASK
+#define lpfc_wcqe_c_valid_WORD		lpfc_cqe_valid_WORD
+#define lpfc_wcqe_c_xb_SHIFT		28
+#define lpfc_wcqe_c_xb_MASK		0x00000001
+#define lpfc_wcqe_c_xb_WORD		word3
+#define lpfc_wcqe_c_pv_SHIFT		27
+#define lpfc_wcqe_c_pv_MASK		0x00000001
+#define lpfc_wcqe_c_pv_WORD		word3
+#define lpfc_wcqe_c_priority_SHIFT	24
+#define lpfc_wcqe_c_priority_MASK		0x00000007
+#define lpfc_wcqe_c_priority_WORD		word3
+#define lpfc_wcqe_c_code_SHIFT		lpfc_cqe_code_SHIFT
+#define lpfc_wcqe_c_code_MASK		lpfc_cqe_code_MASK
+#define lpfc_wcqe_c_code_WORD		lpfc_cqe_code_WORD
+};
+
+/* completion queue entry for wqe release */
+struct lpfc_wcqe_release {
+	uint32_t reserved0;
+	uint32_t reserved1;
+	uint32_t word2;
+#define lpfc_wcqe_r_wq_id_SHIFT		16
+#define lpfc_wcqe_r_wq_id_MASK		0x0000FFFF
+#define lpfc_wcqe_r_wq_id_WORD		word2
+#define lpfc_wcqe_r_wqe_index_SHIFT	0
+#define lpfc_wcqe_r_wqe_index_MASK	0x0000FFFF
+#define lpfc_wcqe_r_wqe_index_WORD	word2
+	uint32_t word3;
+#define lpfc_wcqe_r_valid_SHIFT		lpfc_cqe_valid_SHIFT
+#define lpfc_wcqe_r_valid_MASK		lpfc_cqe_valid_MASK
+#define lpfc_wcqe_r_valid_WORD		lpfc_cqe_valid_WORD
+#define lpfc_wcqe_r_code_SHIFT		lpfc_cqe_code_SHIFT
+#define lpfc_wcqe_r_code_MASK		lpfc_cqe_code_MASK
+#define lpfc_wcqe_r_code_WORD		lpfc_cqe_code_WORD
+};
+
+struct sli4_wcqe_xri_aborted {
+	uint32_t word0;
+#define lpfc_wcqe_xa_status_SHIFT		8
+#define lpfc_wcqe_xa_status_MASK		0x000000FF
+#define lpfc_wcqe_xa_status_WORD		word0
+	uint32_t parameter;
+	uint32_t word2;
+#define lpfc_wcqe_xa_remote_xid_SHIFT	16
+#define lpfc_wcqe_xa_remote_xid_MASK	0x0000FFFF
+#define lpfc_wcqe_xa_remote_xid_WORD	word2
+#define lpfc_wcqe_xa_xri_SHIFT		0
+#define lpfc_wcqe_xa_xri_MASK		0x0000FFFF
+#define lpfc_wcqe_xa_xri_WORD		word2
+	uint32_t word3;
+#define lpfc_wcqe_xa_valid_SHIFT	lpfc_cqe_valid_SHIFT
+#define lpfc_wcqe_xa_valid_MASK		lpfc_cqe_valid_MASK
+#define lpfc_wcqe_xa_valid_WORD		lpfc_cqe_valid_WORD
+#define lpfc_wcqe_xa_ia_SHIFT		30
+#define lpfc_wcqe_xa_ia_MASK		0x00000001
+#define lpfc_wcqe_xa_ia_WORD		word3
+#define CQE_XRI_ABORTED_IA_REMOTE	0
+#define CQE_XRI_ABORTED_IA_LOCAL	1
+#define lpfc_wcqe_xa_br_SHIFT		29
+#define lpfc_wcqe_xa_br_MASK		0x00000001
+#define lpfc_wcqe_xa_br_WORD		word3
+#define CQE_XRI_ABORTED_BR_BA_ACC	0
+#define CQE_XRI_ABORTED_BR_BA_RJT	1
+#define lpfc_wcqe_xa_eo_SHIFT		28
+#define lpfc_wcqe_xa_eo_MASK		0x00000001
+#define lpfc_wcqe_xa_eo_WORD		word3
+#define CQE_XRI_ABORTED_EO_REMOTE	0
+#define CQE_XRI_ABORTED_EO_LOCAL	1
+#define lpfc_wcqe_xa_code_SHIFT		lpfc_cqe_code_SHIFT
+#define lpfc_wcqe_xa_code_MASK		lpfc_cqe_code_MASK
+#define lpfc_wcqe_xa_code_WORD		lpfc_cqe_code_WORD
+};
+
+/* completion queue entry structure for rqe completion */
+struct lpfc_rcqe {
+	uint32_t word0;
+#define lpfc_rcqe_bindex_SHIFT		16
+#define lpfc_rcqe_bindex_MASK		0x0000FFF
+#define lpfc_rcqe_bindex_WORD		word0
+#define lpfc_rcqe_status_SHIFT		8
+#define lpfc_rcqe_status_MASK		0x000000FF
+#define lpfc_rcqe_status_WORD		word0
+#define FC_STATUS_RQ_SUCCESS		0x10 /* Async receive successful */
+#define FC_STATUS_RQ_BUF_LEN_EXCEEDED 	0x11 /* payload truncated */
+#define FC_STATUS_INSUFF_BUF_NEED_BUF 	0x12 /* Insufficient buffers */
+#define FC_STATUS_INSUFF_BUF_FRM_DISC 	0x13 /* Frame Discard */
+	uint32_t reserved1;
+	uint32_t word2;
+#define lpfc_rcqe_length_SHIFT		16
+#define lpfc_rcqe_length_MASK		0x0000FFFF
+#define lpfc_rcqe_length_WORD		word2
+#define lpfc_rcqe_rq_id_SHIFT		6
+#define lpfc_rcqe_rq_id_MASK		0x000003FF
+#define lpfc_rcqe_rq_id_WORD		word2
+#define lpfc_rcqe_fcf_id_SHIFT		0
+#define lpfc_rcqe_fcf_id_MASK		0x0000003F
+#define lpfc_rcqe_fcf_id_WORD		word2
+	uint32_t word3;
+#define lpfc_rcqe_valid_SHIFT		lpfc_cqe_valid_SHIFT
+#define lpfc_rcqe_valid_MASK		lpfc_cqe_valid_MASK
+#define lpfc_rcqe_valid_WORD		lpfc_cqe_valid_WORD
+#define lpfc_rcqe_port_SHIFT		30
+#define lpfc_rcqe_port_MASK		0x00000001
+#define lpfc_rcqe_port_WORD		word3
+#define lpfc_rcqe_hdr_length_SHIFT	24
+#define lpfc_rcqe_hdr_length_MASK	0x0000001F
+#define lpfc_rcqe_hdr_length_WORD	word3
+#define lpfc_rcqe_code_SHIFT		lpfc_cqe_code_SHIFT
+#define lpfc_rcqe_code_MASK		lpfc_cqe_code_MASK
+#define lpfc_rcqe_code_WORD		lpfc_cqe_code_WORD
+#define lpfc_rcqe_eof_SHIFT		8
+#define lpfc_rcqe_eof_MASK		0x000000FF
+#define lpfc_rcqe_eof_WORD		word3
+#define FCOE_EOFn	0x41
+#define FCOE_EOFt	0x42
+#define FCOE_EOFni	0x49
+#define FCOE_EOFa	0x50
+#define lpfc_rcqe_sof_SHIFT		0
+#define lpfc_rcqe_sof_MASK		0x000000FF
+#define lpfc_rcqe_sof_WORD		word3
+#define FCOE_SOFi2	0x2d
+#define FCOE_SOFi3	0x2e
+#define FCOE_SOFn2	0x35
+#define FCOE_SOFn3	0x36
+};
+
+struct lpfc_wqe_generic{
+	struct ulp_bde64 bde;
+	uint32_t word3;
+	uint32_t word4;
+	uint32_t word5;
+	uint32_t word6;
+#define lpfc_wqe_gen_context_SHIFT	16
+#define lpfc_wqe_gen_context_MASK	0x0000FFFF
+#define lpfc_wqe_gen_context_WORD	word6
+#define lpfc_wqe_gen_xri_SHIFT		0
+#define lpfc_wqe_gen_xri_MASK		0x0000FFFF
+#define lpfc_wqe_gen_xri_WORD		word6
+	uint32_t word7;
+#define lpfc_wqe_gen_lnk_SHIFT		23
+#define lpfc_wqe_gen_lnk_MASK		0x00000001
+#define lpfc_wqe_gen_lnk_WORD		word7
+#define lpfc_wqe_gen_erp_SHIFT		22
+#define lpfc_wqe_gen_erp_MASK		0x00000001
+#define lpfc_wqe_gen_erp_WORD		word7
+#define lpfc_wqe_gen_pu_SHIFT		20
+#define lpfc_wqe_gen_pu_MASK		0x00000003
+#define lpfc_wqe_gen_pu_WORD		word7
+#define lpfc_wqe_gen_class_SHIFT	16
+#define lpfc_wqe_gen_class_MASK		0x00000007
+#define lpfc_wqe_gen_class_WORD		word7
+#define lpfc_wqe_gen_command_SHIFT	8
+#define lpfc_wqe_gen_command_MASK	0x000000FF
+#define lpfc_wqe_gen_command_WORD	word7
+#define lpfc_wqe_gen_status_SHIFT	4
+#define lpfc_wqe_gen_status_MASK	0x0000000F
+#define lpfc_wqe_gen_status_WORD	word7
+#define lpfc_wqe_gen_ct_SHIFT		2
+#define lpfc_wqe_gen_ct_MASK		0x00000007
+#define lpfc_wqe_gen_ct_WORD		word7
+	uint32_t abort_tag;
+	uint32_t word9;
+#define lpfc_wqe_gen_request_tag_SHIFT	0
+#define lpfc_wqe_gen_request_tag_MASK	0x0000FFFF
+#define lpfc_wqe_gen_request_tag_WORD	word9
+	uint32_t word10;
+#define lpfc_wqe_gen_ccp_SHIFT		24
+#define lpfc_wqe_gen_ccp_MASK		0x000000FF
+#define lpfc_wqe_gen_ccp_WORD		word10
+#define lpfc_wqe_gen_ccpe_SHIFT		23
+#define lpfc_wqe_gen_ccpe_MASK		0x00000001
+#define lpfc_wqe_gen_ccpe_WORD		word10
+#define lpfc_wqe_gen_pv_SHIFT		19
+#define lpfc_wqe_gen_pv_MASK		0x00000001
+#define lpfc_wqe_gen_pv_WORD		word10
+#define lpfc_wqe_gen_pri_SHIFT		16
+#define lpfc_wqe_gen_pri_MASK		0x00000007
+#define lpfc_wqe_gen_pri_WORD		word10
+	uint32_t word11;
+#define lpfc_wqe_gen_cq_id_SHIFT	16
+#define lpfc_wqe_gen_cq_id_MASK		0x000003FF
+#define lpfc_wqe_gen_cq_id_WORD		word11
+#define LPFC_WQE_CQ_ID_DEFAULT	0x3ff
+#define lpfc_wqe_gen_wqec_SHIFT		7
+#define lpfc_wqe_gen_wqec_MASK		0x00000001
+#define lpfc_wqe_gen_wqec_WORD		word11
+#define lpfc_wqe_gen_cmd_type_SHIFT	0
+#define lpfc_wqe_gen_cmd_type_MASK	0x0000000F
+#define lpfc_wqe_gen_cmd_type_WORD	word11
+	uint32_t payload[4];
+};
+
+struct lpfc_rqe {
+	uint32_t address_hi;
+	uint32_t address_lo;
+};
+
+/* buffer descriptors */
+struct lpfc_bde4 {
+	uint32_t addr_hi;
+	uint32_t addr_lo;
+	uint32_t word2;
+#define lpfc_bde4_last_SHIFT		31
+#define lpfc_bde4_last_MASK		0x00000001
+#define lpfc_bde4_last_WORD		word2
+#define lpfc_bde4_sge_offset_SHIFT	0
+#define lpfc_bde4_sge_offset_MASK	0x000003FF
+#define lpfc_bde4_sge_offset_WORD	word2
+	uint32_t word3;
+#define lpfc_bde4_length_SHIFT		0
+#define lpfc_bde4_length_MASK		0x000000FF
+#define lpfc_bde4_length_WORD		word3
+};
+
+struct lpfc_register {
+	uint32_t word0;
+};
+
+#define LPFC_UERR_STATUS_HI		0x00A4
+#define LPFC_UERR_STATUS_LO		0x00A0
+#define LPFC_ONLINE0			0x00B0
+#define LPFC_ONLINE1			0x00B4
+#define LPFC_SCRATCHPAD			0x0058
+
+/* BAR0 Registers */
+#define LPFC_HST_STATE			0x00AC
+#define lpfc_hst_state_perr_SHIFT	31
+#define lpfc_hst_state_perr_MASK	0x1
+#define lpfc_hst_state_perr_WORD	word0
+#define lpfc_hst_state_sfi_SHIFT	30
+#define lpfc_hst_state_sfi_MASK		0x1
+#define lpfc_hst_state_sfi_WORD		word0
+#define lpfc_hst_state_nip_SHIFT	29
+#define lpfc_hst_state_nip_MASK		0x1
+#define lpfc_hst_state_nip_WORD		word0
+#define lpfc_hst_state_ipc_SHIFT	28
+#define lpfc_hst_state_ipc_MASK		0x1
+#define lpfc_hst_state_ipc_WORD		word0
+#define lpfc_hst_state_xrom_SHIFT	27
+#define lpfc_hst_state_xrom_MASK	0x1
+#define lpfc_hst_state_xrom_WORD	word0
+#define lpfc_hst_state_dl_SHIFT		26
+#define lpfc_hst_state_dl_MASK		0x1
+#define lpfc_hst_state_dl_WORD		word0
+#define lpfc_hst_state_port_status_SHIFT	0
+#define lpfc_hst_state_port_status_MASK		0xFFFF
+#define lpfc_hst_state_port_status_WORD		word0
+
+#define LPFC_POST_STAGE_POWER_ON_RESET			0x0000
+#define LPFC_POST_STAGE_AWAITING_HOST_RDY		0x0001
+#define LPFC_POST_STAGE_HOST_RDY			0x0002
+#define LPFC_POST_STAGE_BE_RESET			0x0003
+#define LPFC_POST_STAGE_SEEPROM_CS_START		0x0100
+#define LPFC_POST_STAGE_SEEPROM_CS_DONE			0x0101
+#define LPFC_POST_STAGE_DDR_CONFIG_START		0x0200
+#define LPFC_POST_STAGE_DDR_CONFIG_DONE			0x0201
+#define LPFC_POST_STAGE_DDR_CALIBRATE_START		0x0300
+#define LPFC_POST_STAGE_DDR_CALIBRATE_DONE		0x0301
+#define LPFC_POST_STAGE_DDR_TEST_START			0x0400
+#define LPFC_POST_STAGE_DDR_TEST_DONE			0x0401
+#define LPFC_POST_STAGE_REDBOOT_INIT_START		0x0600
+#define LPFC_POST_STAGE_REDBOOT_INIT_DONE		0x0601
+#define LPFC_POST_STAGE_FW_IMAGE_LOAD_START		0x0700
+#define LPFC_POST_STAGE_FW_IMAGE_LOAD_DONE		0x0701
+#define LPFC_POST_STAGE_ARMFW_START			0x0800
+#define LPFC_POST_STAGE_DHCP_QUERY_START		0x0900
+#define LPFC_POST_STAGE_DHCP_QUERY_DONE			0x0901
+#define LPFC_POST_STAGE_BOOT_TARGET_DISCOVERY_START	0x0A00
+#define LPFC_POST_STAGE_BOOT_TARGET_DISCOVERY_DONE	0x0A01
+#define LPFC_POST_STAGE_RC_OPTION_SET			0x0B00
+#define LPFC_POST_STAGE_SWITCH_LINK			0x0B01
+#define LPFC_POST_STAGE_SEND_ICDS_MESSAGE		0x0B02
+#define LPFC_POST_STAGE_PERFROM_TFTP			0x0B03
+#define LPFC_POST_STAGE_PARSE_XML			0x0B04
+#define LPFC_POST_STAGE_DOWNLOAD_IMAGE			0x0B05
+#define LPFC_POST_STAGE_FLASH_IMAGE			0x0B06
+#define LPFC_POST_STAGE_RC_DONE				0x0B07
+#define LPFC_POST_STAGE_REBOOT_SYSTEM			0x0B08
+#define LPFC_POST_STAGE_MAC_ADDRESS			0x0C00
+#define LPFC_POST_STAGE_ARMFW_READY			0xC000
+#define LPFC_POST_STAGE_ARMFW_UE 			0xF000
+
+#define lpfc_scratchpad_slirev_SHIFT			4
+#define lpfc_scratchpad_slirev_MASK			0xF
+#define lpfc_scratchpad_slirev_WORD			word0
+#define lpfc_scratchpad_chiptype_SHIFT			8
+#define lpfc_scratchpad_chiptype_MASK			0xFF
+#define lpfc_scratchpad_chiptype_WORD			word0
+#define lpfc_scratchpad_featurelevel1_SHIFT		16
+#define lpfc_scratchpad_featurelevel1_MASK		0xFF
+#define lpfc_scratchpad_featurelevel1_WORD		word0
+#define lpfc_scratchpad_featurelevel2_SHIFT		24
+#define lpfc_scratchpad_featurelevel2_MASK		0xFF
+#define lpfc_scratchpad_featurelevel2_WORD		word0
+
+/* BAR1 Registers */
+#define LPFC_IMR_MASK_ALL	0xFFFFFFFF
+#define LPFC_ISCR_CLEAR_ALL	0xFFFFFFFF
+
+#define LPFC_HST_ISR0		0x0C18
+#define LPFC_HST_ISR1		0x0C1C
+#define LPFC_HST_ISR2		0x0C20
+#define LPFC_HST_ISR3		0x0C24
+#define LPFC_HST_ISR4		0x0C28
+
+#define LPFC_HST_IMR0		0x0C48
+#define LPFC_HST_IMR1		0x0C4C
+#define LPFC_HST_IMR2		0x0C50
+#define LPFC_HST_IMR3		0x0C54
+#define LPFC_HST_IMR4		0x0C58
+
+#define LPFC_HST_ISCR0		0x0C78
+#define LPFC_HST_ISCR1		0x0C7C
+#define LPFC_HST_ISCR2		0x0C80
+#define LPFC_HST_ISCR3		0x0C84
+#define LPFC_HST_ISCR4		0x0C88
+
+#define LPFC_SLI4_INTR0			BIT0
+#define LPFC_SLI4_INTR1			BIT1
+#define LPFC_SLI4_INTR2			BIT2
+#define LPFC_SLI4_INTR3			BIT3
+#define LPFC_SLI4_INTR4			BIT4
+#define LPFC_SLI4_INTR5			BIT5
+#define LPFC_SLI4_INTR6			BIT6
+#define LPFC_SLI4_INTR7			BIT7
+#define LPFC_SLI4_INTR8			BIT8
+#define LPFC_SLI4_INTR9			BIT9
+#define LPFC_SLI4_INTR10		BIT10
+#define LPFC_SLI4_INTR11		BIT11
+#define LPFC_SLI4_INTR12		BIT12
+#define LPFC_SLI4_INTR13		BIT13
+#define LPFC_SLI4_INTR14		BIT14
+#define LPFC_SLI4_INTR15		BIT15
+#define LPFC_SLI4_INTR16		BIT16
+#define LPFC_SLI4_INTR17		BIT17
+#define LPFC_SLI4_INTR18		BIT18
+#define LPFC_SLI4_INTR19		BIT19
+#define LPFC_SLI4_INTR20		BIT20
+#define LPFC_SLI4_INTR21		BIT21
+#define LPFC_SLI4_INTR22		BIT22
+#define LPFC_SLI4_INTR23		BIT23
+#define LPFC_SLI4_INTR24		BIT24
+#define LPFC_SLI4_INTR25		BIT25
+#define LPFC_SLI4_INTR26		BIT26
+#define LPFC_SLI4_INTR27		BIT27
+#define LPFC_SLI4_INTR28		BIT28
+#define LPFC_SLI4_INTR29		BIT29
+#define LPFC_SLI4_INTR30		BIT30
+#define LPFC_SLI4_INTR31		BIT31
+
+/* BAR2 Registers */
+#define LPFC_RQ_DOORBELL		0x00A0
+#define lpfc_rq_doorbell_num_posted_SHIFT	16
+#define lpfc_rq_doorbell_num_posted_MASK	0x3FFF
+#define lpfc_rq_doorbell_num_posted_WORD	word0
+#define LPFC_RQ_POST_BATCH		8	/* RQEs to post at one time */
+#define lpfc_rq_doorbell_id_SHIFT		0
+#define lpfc_rq_doorbell_id_MASK		0x03FF
+#define lpfc_rq_doorbell_id_WORD		word0
+
+#define LPFC_WQ_DOORBELL		0x0040
+#define lpfc_wq_doorbell_num_posted_SHIFT	24
+#define lpfc_wq_doorbell_num_posted_MASK	0x00FF
+#define lpfc_wq_doorbell_num_posted_WORD	word0
+#define lpfc_wq_doorbell_index_SHIFT		16
+#define lpfc_wq_doorbell_index_MASK		0x00FF
+#define lpfc_wq_doorbell_index_WORD		word0
+#define lpfc_wq_doorbell_id_SHIFT		0
+#define lpfc_wq_doorbell_id_MASK		0xFFFF
+#define lpfc_wq_doorbell_id_WORD		word0
+
+#define LPFC_EQCQ_DOORBELL		0x0120
+#define lpfc_eqcq_doorbell_arm_SHIFT		29
+#define lpfc_eqcq_doorbell_arm_MASK		0x0001
+#define lpfc_eqcq_doorbell_arm_WORD		word0
+#define lpfc_eqcq_doorbell_num_released_SHIFT	16
+#define lpfc_eqcq_doorbell_num_released_MASK	0x1FFF
+#define lpfc_eqcq_doorbell_num_released_WORD	word0
+#define lpfc_eqcq_doorbell_qt_SHIFT		10
+#define lpfc_eqcq_doorbell_qt_MASK		0x0001
+#define lpfc_eqcq_doorbell_qt_WORD		word0
+#define LPFC_QUEUE_TYPE_COMPLETION	0
+#define LPFC_QUEUE_TYPE_EVENT		1
+#define lpfc_eqcq_doorbell_eqci_SHIFT		9
+#define lpfc_eqcq_doorbell_eqci_MASK		0x0001
+#define lpfc_eqcq_doorbell_eqci_WORD		word0
+#define lpfc_eqcq_doorbell_cqid_SHIFT		0
+#define lpfc_eqcq_doorbell_cqid_MASK		0x03FF
+#define lpfc_eqcq_doorbell_cqid_WORD		word0
+#define lpfc_eqcq_doorbell_eqid_SHIFT		0
+#define lpfc_eqcq_doorbell_eqid_MASK		0x01FF
+#define lpfc_eqcq_doorbell_eqid_WORD		word0
+
+#define LPFC_BMBX			0x0160
+#define lpfc_bmbx_addr_SHIFT		2
+#define lpfc_bmbx_addr_MASK		0x3FFFFFFF
+#define lpfc_bmbx_addr_WORD		word0
+#define lpfc_bmbx_hi_SHIFT		1
+#define lpfc_bmbx_hi_MASK		0x0001
+#define lpfc_bmbx_hi_WORD		word0
+#define lpfc_bmbx_rdy_SHIFT		0
+#define lpfc_bmbx_rdy_MASK		0x0001
+#define lpfc_bmbx_rdy_WORD		word0
+
+#define LPFC_MQ_DOORBELL			0x0140
+#define lpfc_mq_doorbell_num_posted_SHIFT	16
+#define lpfc_mq_doorbell_num_posted_MASK	0x3FFF
+#define lpfc_mq_doorbell_num_posted_WORD	word0
+#define lpfc_mq_doorbell_id_SHIFT		0
+#define lpfc_mq_doorbell_id_MASK		0x03FF
+#define lpfc_mq_doorbell_id_WORD		word0
+
+struct lpfc_sli4_cfg_mhdr {
+	uint32_t word1;
+#define lpfc_mbox_hdr_emb_SHIFT		0
+#define lpfc_mbox_hdr_emb_MASK		0x00000001
+#define lpfc_mbox_hdr_emb_WORD		word1
+#define lpfc_mbox_hdr_sge_cnt_SHIFT	3
+#define lpfc_mbox_hdr_sge_cnt_MASK	0x0000001F
+#define lpfc_mbox_hdr_sge_cnt_WORD	word1
+	uint32_t payload_length;
+	uint32_t tag_lo;
+	uint32_t tag_hi;
+	uint32_t reserved5;
+};
+
+union lpfc_sli4_cfg_shdr {
+	struct {
+		uint32_t word6;
+#define lpfc_mbox_hdr_opcode_SHIFT		0
+#define lpfc_mbox_hdr_opcode_MASK		0x000000FF
+#define lpfc_mbox_hdr_opcode_WORD		word6
+#define lpfc_mbox_hdr_subsystem_SHIFT		8
+#define lpfc_mbox_hdr_subsystem_MASK		0x000000FF
+#define lpfc_mbox_hdr_subsystem_WORD		word6
+#define lpfc_mbox_hdr_port_number_SHIFT		16
+#define lpfc_mbox_hdr_port_number_MASK		0x000000FF
+#define lpfc_mbox_hdr_port_number_WORD		word6
+#define lpfc_mbox_hdr_domain_SHIFT		24
+#define lpfc_mbox_hdr_domain_MASK		0x000000FF
+#define lpfc_mbox_hdr_domain_WORD		word6
+		uint32_t timeout;
+		uint32_t request_length;
+		uint32_t reserved9;
+	} request;
+	struct {
+		uint32_t word6;
+#define lpfc_mbox_hdr_opcode_SHIFT		0
+#define lpfc_mbox_hdr_opcode_MASK		0x000000FF
+#define lpfc_mbox_hdr_opcode_WORD		word6
+#define lpfc_mbox_hdr_subsystem_SHIFT		8
+#define lpfc_mbox_hdr_subsystem_MASK		0x000000FF
+#define lpfc_mbox_hdr_subsystem_WORD		word6
+#define lpfc_mbox_hdr_domain_SHIFT		24
+#define lpfc_mbox_hdr_domain_MASK		0x000000FF
+#define lpfc_mbox_hdr_domain_WORD		word6
+		uint32_t word7;
+#define lpfc_mbox_hdr_status_SHIFT		0
+#define lpfc_mbox_hdr_status_MASK		0x000000FF
+#define lpfc_mbox_hdr_status_WORD		word7
+#define lpfc_mbox_hdr_add_status_SHIFT		8
+#define lpfc_mbox_hdr_add_status_MASK		0x000000FF
+#define lpfc_mbox_hdr_add_status_WORD		word7
+		uint32_t response_length;
+		uint32_t actual_response_length;
+	} response;
+};
+
+/* Mailbox structures */
+struct mbox_header {
+	struct lpfc_sli4_cfg_mhdr cfg_mhdr;
+	union  lpfc_sli4_cfg_shdr cfg_shdr;
+};
+
+/* Subsystem Definitions */
+#define LPFC_MBOX_SUBSYSTEM_COMMON	0x1
+#define LPFC_MBOX_SUBSYSTEM_FCOE	0xC
+
+/* Device Specific Definitions */
+
+/* The HOST ENDIAN defines are in Big Endian format. */
+#define HOST_ENDIAN_LOW_WORD0   0xFF3412FF
+#define HOST_ENDIAN_HIGH_WORD1	0xFF7856FF
+
+/* Common Opcodes */
+#define LPFC_MBOX_OPCODE_CQ_CREATE		0x0C
+#define LPFC_MBOX_OPCODE_EQ_CREATE		0x0D
+#define LPFC_MBOX_OPCODE_MQ_CREATE		0x15
+#define LPFC_MBOX_OPCODE_GET_CNTL_ATTRIBUTES	0x20
+#define LPFC_MBOX_OPCODE_NOP			0x21
+#define LPFC_MBOX_OPCODE_MQ_DESTROY		0x35
+#define LPFC_MBOX_OPCODE_CQ_DESTROY		0x36
+#define LPFC_MBOX_OPCODE_EQ_DESTROY		0x37
+#define LPFC_MBOX_OPCODE_FUNCTION_RESET		0x3D
+
+/* FCoE Opcodes */
+#define LPFC_MBOX_OPCODE_FCOE_WQ_CREATE			0x01
+#define LPFC_MBOX_OPCODE_FCOE_WQ_DESTROY		0x02
+#define LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES		0x03
+#define LPFC_MBOX_OPCODE_FCOE_REMOVE_SGL_PAGES		0x04
+#define LPFC_MBOX_OPCODE_FCOE_RQ_CREATE			0x05
+#define LPFC_MBOX_OPCODE_FCOE_RQ_DESTROY		0x06
+#define LPFC_MBOX_OPCODE_FCOE_READ_FCF_TABLE		0x08
+#define LPFC_MBOX_OPCODE_FCOE_ADD_FCF			0x09
+#define LPFC_MBOX_OPCODE_FCOE_DELETE_FCF		0x0A
+#define LPFC_MBOX_OPCODE_FCOE_POST_HDR_TEMPLATE		0x0B
+
+/* Mailbox command structures */
+struct eq_context {
+	uint32_t word0;
+#define lpfc_eq_context_size_SHIFT	31
+#define lpfc_eq_context_size_MASK	0x00000001
+#define lpfc_eq_context_size_WORD	word0
+#define LPFC_EQE_SIZE_4			0x0
+#define LPFC_EQE_SIZE_16		0x1
+#define lpfc_eq_context_valid_SHIFT	29
+#define lpfc_eq_context_valid_MASK	0x00000001
+#define lpfc_eq_context_valid_WORD	word0
+	uint32_t word1;
+#define lpfc_eq_context_count_SHIFT	26
+#define lpfc_eq_context_count_MASK	0x00000003
+#define lpfc_eq_context_count_WORD	word1
+#define LPFC_EQ_CNT_256		0x0
+#define LPFC_EQ_CNT_512		0x1
+#define LPFC_EQ_CNT_1024	0x2
+#define LPFC_EQ_CNT_2048	0x3
+#define LPFC_EQ_CNT_4096	0x4
+	uint32_t word2;
+#define lpfc_eq_context_delay_multi_SHIFT	13
+#define lpfc_eq_context_delay_multi_MASK	0x000003FF
+#define lpfc_eq_context_delay_multi_WORD	word2
+	uint32_t reserved3;
+};
+
+struct sgl_page_pairs {
+	uint32_t sgl_pg0_addr_lo;
+	uint32_t sgl_pg0_addr_hi;
+	uint32_t sgl_pg1_addr_lo;
+	uint32_t sgl_pg1_addr_hi;
+};
+
+struct lpfc_mbx_post_sgl_pages {
+	struct mbox_header header;
+	uint32_t word0;
+#define lpfc_post_sgl_pages_xri_SHIFT	0
+#define lpfc_post_sgl_pages_xri_MASK	0x0000FFFF
+#define lpfc_post_sgl_pages_xri_WORD	word0
+#define lpfc_post_sgl_pages_xricnt_SHIFT	16
+#define lpfc_post_sgl_pages_xricnt_MASK	0x0000FFFF
+#define lpfc_post_sgl_pages_xricnt_WORD	word0
+	struct sgl_page_pairs  sgl_pg_pairs[1];
+};
+
+/* word0 of page-1 struct shares the same SHIFT/MASK/WORD defines as above */
+struct lpfc_mbx_post_uembed_sgl_page1 {
+	union  lpfc_sli4_cfg_shdr cfg_shdr;
+	uint32_t word0;
+	struct sgl_page_pairs sgl_pg_pairs;
+};
+
+struct lpfc_mbx_sge {
+	uint32_t pa_lo;
+	uint32_t pa_hi;
+	uint32_t length;
+};
+
+struct lpfc_mbx_nembed_cmd {
+	struct lpfc_sli4_cfg_mhdr cfg_mhdr;
+#define LPFC_SLI4_MBX_SGE_MAX_PAGES	19
+	struct lpfc_mbx_sge sge[LPFC_SLI4_MBX_SGE_MAX_PAGES];
+};
+
+struct lpfc_mbx_nembed_sge_virt {
+	void *addr[LPFC_SLI4_MBX_SGE_MAX_PAGES];
+};
+
+struct lpfc_mbx_eq_create {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_eq_create_num_pages_SHIFT	0
+#define lpfc_mbx_eq_create_num_pages_MASK	0x0000FFFF
+#define lpfc_mbx_eq_create_num_pages_WORD	word0
+			struct eq_context context;
+			struct dma_address page[LPFC_MAX_EQ_PAGE];
+		} request;
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_eq_create_q_id_SHIFT	0
+#define lpfc_mbx_eq_create_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_eq_create_q_id_WORD	word0
+		} response;
+	} u;
+};
+
+struct lpfc_mbx_eq_destroy {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_eq_destroy_q_id_SHIFT	0
+#define lpfc_mbx_eq_destroy_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_eq_destroy_q_id_WORD	word0
+		} request;
+		struct {
+			uint32_t word0;
+		} response;
+	} u;
+};
+
+struct lpfc_mbx_nop {
+	struct mbox_header header;
+	uint32_t context[2];
+};
+
+struct cq_context {
+	uint32_t word0;
+#define lpfc_cq_context_event_SHIFT	31
+#define lpfc_cq_context_event_MASK	0x00000001
+#define lpfc_cq_context_event_WORD	word0
+#define lpfc_cq_context_valid_SHIFT	29
+#define lpfc_cq_context_valid_MASK	0x00000001
+#define lpfc_cq_context_valid_WORD	word0
+#define lpfc_cq_context_count_SHIFT	27
+#define lpfc_cq_context_count_MASK	0x00000003
+#define lpfc_cq_context_count_WORD	word0
+#define LPFC_CQ_CNT_256		0x0
+#define LPFC_CQ_CNT_512		0x1
+#define LPFC_CQ_CNT_1024	0x2
+	uint32_t word1;
+#define lpfc_cq_eq_id_SHIFT		22
+#define lpfc_cq_eq_id_MASK		0x000000FF
+#define lpfc_cq_eq_id_WORD		word1
+	uint32_t reserved0;
+	uint32_t reserved1;
+};
+
+struct lpfc_mbx_cq_create {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_cq_create_num_pages_SHIFT	0
+#define lpfc_mbx_cq_create_num_pages_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_num_pages_WORD	word0
+			struct cq_context context;
+			struct dma_address page[LPFC_MAX_CQ_PAGE];
+		} request;
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_cq_create_q_id_SHIFT	0
+#define lpfc_mbx_cq_create_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_q_id_WORD	word0
+		} response;
+	} u;
+};
+
+struct lpfc_mbx_cq_destroy {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_cq_destroy_q_id_SHIFT	0
+#define lpfc_mbx_cq_destroy_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_cq_destroy_q_id_WORD	word0
+		} request;
+		struct {
+			uint32_t word0;
+		} response;
+	} u;
+};
+
+struct wq_context {
+	uint32_t reserved0;
+	uint32_t reserved1;
+	uint32_t reserved2;
+	uint32_t reserved3;
+};
+
+struct lpfc_mbx_wq_create {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_wq_create_num_pages_SHIFT	0
+#define lpfc_mbx_wq_create_num_pages_MASK	0x0000FFFF
+#define lpfc_mbx_wq_create_num_pages_WORD	word0
+#define lpfc_mbx_wq_create_cq_id_SHIFT		16
+#define lpfc_mbx_wq_create_cq_id_MASK		0x0000FFFF
+#define lpfc_mbx_wq_create_cq_id_WORD		word0
+			struct dma_address page[LPFC_MAX_WQ_PAGE];
+		} request;
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_wq_create_q_id_SHIFT	0
+#define lpfc_mbx_wq_create_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_wq_create_q_id_WORD	word0
+		} response;
+	} u;
+};
+
+struct lpfc_mbx_wq_destroy {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_wq_destroy_q_id_SHIFT	0
+#define lpfc_mbx_wq_destroy_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_wq_destroy_q_id_WORD	word0
+		} request;
+		struct {
+			uint32_t word0;
+		} response;
+	} u;
+};
+
+#define LPFC_HDR_BUF_SIZE 128
+#define LPFC_DATA_BUF_SIZE 4096
+struct rq_context {
+	uint32_t word0;
+#define lpfc_rq_context_rq_size_SHIFT	16
+#define lpfc_rq_context_rq_size_MASK	0x0000000F
+#define lpfc_rq_context_rq_size_WORD	word0
+#define LPFC_RQ_RING_SIZE_512		9	/* 512 entries */
+#define LPFC_RQ_RING_SIZE_1024		10	/* 1024 entries */
+#define LPFC_RQ_RING_SIZE_2048		11	/* 2048 entries */
+#define LPFC_RQ_RING_SIZE_4096		12	/* 4096 entries */
+	uint32_t reserved1;
+	uint32_t word2;
+#define lpfc_rq_context_cq_id_SHIFT	16
+#define lpfc_rq_context_cq_id_MASK	0x000003FF
+#define lpfc_rq_context_cq_id_WORD	word2
+#define lpfc_rq_context_buf_size_SHIFT	0
+#define lpfc_rq_context_buf_size_MASK	0x0000FFFF
+#define lpfc_rq_context_buf_size_WORD	word2
+	uint32_t reserved3;
+};
+
+struct lpfc_mbx_rq_create {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_rq_create_num_pages_SHIFT	0
+#define lpfc_mbx_rq_create_num_pages_MASK	0x0000FFFF
+#define lpfc_mbx_rq_create_num_pages_WORD	word0
+			struct rq_context context;
+			struct dma_address page[LPFC_MAX_WQ_PAGE];
+		} request;
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_rq_create_q_id_SHIFT	0
+#define lpfc_mbx_rq_create_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_rq_create_q_id_WORD	word0
+		} response;
+	} u;
+};
+
+struct lpfc_mbx_rq_destroy {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_rq_destroy_q_id_SHIFT	0
+#define lpfc_mbx_rq_destroy_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_rq_destroy_q_id_WORD	word0
+		} request;
+		struct {
+			uint32_t word0;
+		} response;
+	} u;
+};
+
+struct mq_context {
+	uint32_t word0;
+#define lpfc_mq_context_cq_id_SHIFT	22
+#define lpfc_mq_context_cq_id_MASK	0x000003FF
+#define lpfc_mq_context_cq_id_WORD	word0
+#define lpfc_mq_context_count_SHIFT	16
+#define lpfc_mq_context_count_MASK	0x0000000F
+#define lpfc_mq_context_count_WORD	word0
+#define LPFC_MQ_CNT_16		0x5
+#define LPFC_MQ_CNT_32		0x6
+#define LPFC_MQ_CNT_64		0x7
+#define LPFC_MQ_CNT_128		0x8
+	uint32_t word1;
+#define lpfc_mq_context_valid_SHIFT	31
+#define lpfc_mq_context_valid_MASK	0x00000001
+#define lpfc_mq_context_valid_WORD	word1
+	uint32_t reserved2;
+	uint32_t reserved3;
+};
+
+struct lpfc_mbx_mq_create {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_mq_create_num_pages_SHIFT	0
+#define lpfc_mbx_mq_create_num_pages_MASK	0x0000FFFF
+#define lpfc_mbx_mq_create_num_pages_WORD	word0
+			struct mq_context context;
+			struct dma_address page[LPFC_MAX_MQ_PAGE];
+		} request;
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_mq_create_q_id_SHIFT	0
+#define lpfc_mbx_mq_create_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_mq_create_q_id_WORD	word0
+		} response;
+	} u;
+};
+
+struct lpfc_mbx_mq_destroy {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_mq_destroy_q_id_SHIFT	0
+#define lpfc_mbx_mq_destroy_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_mq_destroy_q_id_WORD	word0
+		} request;
+		struct {
+			uint32_t word0;
+		} response;
+	} u;
+};
+
+struct lpfc_mbx_post_hdr_tmpl {
+	struct mbox_header header;
+	uint32_t word10;
+#define lpfc_mbx_post_hdr_tmpl_rpi_offset_SHIFT  0
+#define lpfc_mbx_post_hdr_tmpl_rpi_offset_MASK   0x0000FFFF
+#define lpfc_mbx_post_hdr_tmpl_rpi_offset_WORD   word10
+#define lpfc_mbx_post_hdr_tmpl_page_cnt_SHIFT   16
+#define lpfc_mbx_post_hdr_tmpl_page_cnt_MASK    0x0000FFFF
+#define lpfc_mbx_post_hdr_tmpl_page_cnt_WORD    word10
+	uint32_t rpi_paddr_lo;
+	uint32_t rpi_paddr_hi;
+};
+
+struct sli4_sge {	/* SLI-4 */
+	uint32_t addr_hi;
+	uint32_t addr_lo;
+
+	uint32_t word2;
+#define lpfc_sli4_sge_offset_SHIFT	0 /* Offset of buffer - Not used*/
+#define lpfc_sli4_sge_offset_MASK	0x00FFFFFF
+#define lpfc_sli4_sge_offset_WORD	word2
+#define lpfc_sli4_sge_last_SHIFT	31 /* Last SEG in the SGL sets
+						this  flag !! */
+#define lpfc_sli4_sge_last_MASK		0x00000001
+#define lpfc_sli4_sge_last_WORD		word2
+	uint32_t word3;
+#define lpfc_sli4_sge_len_SHIFT		0
+#define lpfc_sli4_sge_len_MASK		0x0001FFFF
+#define lpfc_sli4_sge_len_WORD		word3
+};
+
+struct fcf_record {
+	uint32_t max_rcv_size;
+	uint32_t fka_adv_period;
+	uint32_t fip_priority;
+	uint32_t word3;
+#define lpfc_fcf_record_mac_0_SHIFT		0
+#define lpfc_fcf_record_mac_0_MASK		0x000000FF
+#define lpfc_fcf_record_mac_0_WORD		word3
+#define lpfc_fcf_record_mac_1_SHIFT		8
+#define lpfc_fcf_record_mac_1_MASK		0x000000FF
+#define lpfc_fcf_record_mac_1_WORD		word3
+#define lpfc_fcf_record_mac_2_SHIFT		16
+#define lpfc_fcf_record_mac_2_MASK		0x000000FF
+#define lpfc_fcf_record_mac_2_WORD		word3
+#define lpfc_fcf_record_mac_3_SHIFT		24
+#define lpfc_fcf_record_mac_3_MASK		0x000000FF
+#define lpfc_fcf_record_mac_3_WORD		word3
+	uint32_t word4;
+#define lpfc_fcf_record_mac_4_SHIFT		0
+#define lpfc_fcf_record_mac_4_MASK		0x000000FF
+#define lpfc_fcf_record_mac_4_WORD		word4
+#define lpfc_fcf_record_mac_5_SHIFT		8
+#define lpfc_fcf_record_mac_5_MASK		0x000000FF
+#define lpfc_fcf_record_mac_5_WORD		word4
+#define lpfc_fcf_record_fcf_avail_SHIFT		16
+#define lpfc_fcf_record_fcf_avail_MASK		0x000000FF
+#define lpfc_fcf_record_fc_avail_WORD		word4
+#define lpfc_fcf_record_mac_addr_prov_SHIFT	24
+#define lpfc_fcf_record_mac_addr_prov_MASK	0x000000FF
+#define lpfc_fcf_record_mac_addr_prov_WORD	word4
+#define LPFC_FCF_FPMA           1 	/* Fabric Provided MAC Address */
+#define LPFC_FCF_SPMA           2       /* Server Provided MAC Address */
+	uint32_t word5;
+#define lpfc_fcf_record_fab_name_0_SHIFT	0
+#define lpfc_fcf_record_fab_name_0_MASK		0x000000FF
+#define lpfc_fcf_record_fab_name_0_WORD		word5
+#define lpfc_fcf_record_fab_name_1_SHIFT	8
+#define lpfc_fcf_record_fab_name_1_MASK		0x000000FF
+#define lpfc_fcf_record_fab_name_1_WORD		word5
+#define lpfc_fcf_record_fab_name_2_SHIFT	16
+#define lpfc_fcf_record_fab_name_2_MASK		0x000000FF
+#define lpfc_fcf_record_fab_name_2_WORD		word5
+#define lpfc_fcf_record_fab_name_3_SHIFT	24
+#define lpfc_fcf_record_fab_name_3_MASK		0x000000FF
+#define lpfc_fcf_record_fab_name_3_WORD		word5
+	uint32_t word6;
+#define lpfc_fcf_record_fab_name_4_SHIFT	0
+#define lpfc_fcf_record_fab_name_4_MASK		0x000000FF
+#define lpfc_fcf_record_fab_name_4_WORD		word6
+#define lpfc_fcf_record_fab_name_5_SHIFT	8
+#define lpfc_fcf_record_fab_name_5_MASK		0x000000FF
+#define lpfc_fcf_record_fab_name_5_WORD		word6
+#define lpfc_fcf_record_fab_name_6_SHIFT	16
+#define lpfc_fcf_record_fab_name_6_MASK		0x000000FF
+#define lpfc_fcf_record_fab_name_6_WORD		word6
+#define lpfc_fcf_record_fab_name_7_SHIFT	24
+#define lpfc_fcf_record_fab_name_7_MASK		0x000000FF
+#define lpfc_fcf_record_fab_name_7_WORD		word6
+	uint32_t word7;
+#define lpfc_fcf_record_fc_map_0_SHIFT		0
+#define lpfc_fcf_record_fc_map_0_MASK		0x000000FF
+#define lpfc_fcf_record_fc_map_0_WORD		word7
+#define lpfc_fcf_record_fc_map_1_SHIFT		8
+#define lpfc_fcf_record_fc_map_1_MASK		0x000000FF
+#define lpfc_fcf_record_fc_map_1_WORD		word7
+#define lpfc_fcf_record_fc_map_2_SHIFT		16
+#define lpfc_fcf_record_fc_map_2_MASK		0x000000FF
+#define lpfc_fcf_record_fc_map_2_WORD		word7
+#define lpfc_fcf_record_fcf_valid_SHIFT		24
+#define lpfc_fcf_record_fcf_valid_MASK		0x000000FF
+#define lpfc_fcf_record_fcf_valid_WORD		word7
+	uint32_t word8;
+#define lpfc_fcf_record_fcf_index_SHIFT		0
+#define lpfc_fcf_record_fcf_index_MASK		0x0000FFFF
+#define lpfc_fcf_record_fcf_index_WORD		word8
+#define lpfc_fcf_record_fcf_state_SHIFT		16
+#define lpfc_fcf_record_fcf_state_MASK		0x0000FFFF
+#define lpfc_fcf_record_fcf_state_WORD		word8
+	uint8_t vlan_bitmap[512];
+};
+
+struct lpfc_mbx_read_fcf_tbl {
+	union lpfc_sli4_cfg_shdr cfg_shdr;
+	union {
+		struct {
+			uint32_t word10;
+#define lpfc_mbx_read_fcf_tbl_indx_SHIFT	0
+#define lpfc_mbx_read_fcf_tbl_indx_MASK		0x0000FFFF
+#define lpfc_mbx_read_fcf_tbl_indx_WORD		word10
+		} request;
+		struct {
+			uint32_t eventag;
+		} response;
+	} u;
+	uint32_t word11;
+#define lpfc_mbx_read_fcf_tbl_nxt_vindx_SHIFT	0
+#define lpfc_mbx_read_fcf_tbl_nxt_vindx_MASK	0x0000FFFF
+#define lpfc_mbx_read_fcf_tbl_nxt_vindx_WORD	word11
+};
+
+struct lpfc_mbx_add_fcf_tbl_entry {
+	union lpfc_sli4_cfg_shdr cfg_shdr;
+	uint32_t word10;
+#define lpfc_mbx_add_fcf_tbl_fcfi_SHIFT        0
+#define lpfc_mbx_add_fcf_tbl_fcfi_MASK         0x0000FFFF
+#define lpfc_mbx_add_fcf_tbl_fcfi_WORD         word10
+	struct lpfc_mbx_sge fcf_sge;
+};
+
+struct lpfc_mbx_del_fcf_tbl_entry {
+	struct mbox_header header;
+	uint32_t word10;
+#define lpfc_mbx_del_fcf_tbl_count_SHIFT	0
+#define lpfc_mbx_del_fcf_tbl_count_MASK		0x0000FFFF
+#define lpfc_mbx_del_fcf_tbl_count_WORD		word10
+#define lpfc_mbx_del_fcf_tbl_index_SHIFT	16
+#define lpfc_mbx_del_fcf_tbl_index_MASK		0x0000FFFF
+#define lpfc_mbx_del_fcf_tbl_index_WORD		word10
+};
+
+/* Status field for embedded SLI_CONFIG mailbox command */
+#define STATUS_SUCCESS					0x0
+#define STATUS_FAILED 					0x1
+#define STATUS_ILLEGAL_REQUEST				0x2
+#define STATUS_ILLEGAL_FIELD				0x3
+#define STATUS_INSUFFICIENT_BUFFER 			0x4
+#define STATUS_UNAUTHORIZED_REQUEST			0x5
+#define STATUS_FLASHROM_SAVE_FAILED			0x17
+#define STATUS_FLASHROM_RESTORE_FAILED			0x18
+#define STATUS_ICCBINDEX_ALLOC_FAILED			0x1a
+#define STATUS_IOCTLHANDLE_ALLOC_FAILED 		0x1b
+#define STATUS_INVALID_PHY_ADDR_FROM_OSM		0x1c
+#define STATUS_INVALID_PHY_ADDR_LEN_FROM_OSM		0x1d
+#define STATUS_ASSERT_FAILED				0x1e
+#define STATUS_INVALID_SESSION				0x1f
+#define STATUS_INVALID_CONNECTION			0x20
+#define STATUS_BTL_PATH_EXCEEDS_OSM_LIMIT		0x21
+#define STATUS_BTL_NO_FREE_SLOT_PATH			0x24
+#define STATUS_BTL_NO_FREE_SLOT_TGTID			0x25
+#define STATUS_OSM_DEVSLOT_NOT_FOUND			0x26
+#define STATUS_FLASHROM_READ_FAILED			0x27
+#define STATUS_POLL_IOCTL_TIMEOUT			0x28
+#define STATUS_ERROR_ACITMAIN				0x2a
+#define STATUS_REBOOT_REQUIRED				0x2c
+#define STATUS_FCF_IN_USE				0x3a
+
+struct lpfc_mbx_sli4_config {
+	struct mbox_header header;
+};
+
+struct lpfc_mbx_init_vfi {
+	uint32_t word1;
+#define lpfc_init_vfi_vr_SHIFT		31
+#define lpfc_init_vfi_vr_MASK		0x00000001
+#define lpfc_init_vfi_vr_WORD		word1
+#define lpfc_init_vfi_vt_SHIFT		30
+#define lpfc_init_vfi_vt_MASK		0x00000001
+#define lpfc_init_vfi_vt_WORD		word1
+#define lpfc_init_vfi_vf_SHIFT		29
+#define lpfc_init_vfi_vf_MASK		0x00000001
+#define lpfc_init_vfi_vf_WORD		word1
+#define lpfc_init_vfi_vfi_SHIFT		0
+#define lpfc_init_vfi_vfi_MASK		0x0000FFFF
+#define lpfc_init_vfi_vfi_WORD		word1
+	uint32_t word2;
+#define lpfc_init_vfi_fcfi_SHIFT	0
+#define lpfc_init_vfi_fcfi_MASK		0x0000FFFF
+#define lpfc_init_vfi_fcfi_WORD		word2
+	uint32_t word3;
+#define lpfc_init_vfi_pri_SHIFT		13
+#define lpfc_init_vfi_pri_MASK		0x00000007
+#define lpfc_init_vfi_pri_WORD		word3
+#define lpfc_init_vfi_vf_id_SHIFT	1
+#define lpfc_init_vfi_vf_id_MASK	0x00000FFF
+#define lpfc_init_vfi_vf_id_WORD	word3
+	uint32_t word4;
+#define lpfc_init_vfi_hop_count_SHIFT	24
+#define lpfc_init_vfi_hop_count_MASK	0x000000FF
+#define lpfc_init_vfi_hop_count_WORD	word4
+};
+
+struct lpfc_mbx_reg_vfi {
+	uint32_t word1;
+#define lpfc_reg_vfi_vp_SHIFT		28
+#define lpfc_reg_vfi_vp_MASK		0x00000001
+#define lpfc_reg_vfi_vp_WORD		word1
+#define lpfc_reg_vfi_vfi_SHIFT		0
+#define lpfc_reg_vfi_vfi_MASK		0x0000FFFF
+#define lpfc_reg_vfi_vfi_WORD		word1
+	uint32_t word2;
+#define lpfc_reg_vfi_vpi_SHIFT		16
+#define lpfc_reg_vfi_vpi_MASK		0x0000FFFF
+#define lpfc_reg_vfi_vpi_WORD		word2
+#define lpfc_reg_vfi_fcfi_SHIFT		0
+#define lpfc_reg_vfi_fcfi_MASK		0x0000FFFF
+#define lpfc_reg_vfi_fcfi_WORD		word2
+	uint32_t word3_rsvd;
+	uint32_t word4_rsvd;
+	struct ulp_bde64 bde;
+	uint32_t word8_rsvd;
+	uint32_t word9_rsvd;
+	uint32_t word10;
+#define lpfc_reg_vfi_nport_id_SHIFT		0
+#define lpfc_reg_vfi_nport_id_MASK		0x00FFFFFF
+#define lpfc_reg_vfi_nport_id_WORD		word10
+};
+
+struct lpfc_mbx_init_vpi {
+	uint32_t word1;
+#define lpfc_init_vpi_vfi_SHIFT		16
+#define lpfc_init_vpi_vfi_MASK		0x0000FFFF
+#define lpfc_init_vpi_vfi_WORD		word1
+#define lpfc_init_vpi_vpi_SHIFT		0
+#define lpfc_init_vpi_vpi_MASK		0x0000FFFF
+#define lpfc_init_vpi_vpi_WORD		word1
+};
+
+struct lpfc_mbx_read_vpi {
+	uint32_t word1_rsvd;
+	uint32_t word2;
+#define lpfc_mbx_read_vpi_vnportid_SHIFT	0
+#define lpfc_mbx_read_vpi_vnportid_MASK		0x00FFFFFF
+#define lpfc_mbx_read_vpi_vnportid_WORD		word2
+	uint32_t word3_rsvd;
+	uint32_t word4;
+#define lpfc_mbx_read_vpi_acq_alpa_SHIFT	0
+#define lpfc_mbx_read_vpi_acq_alpa_MASK		0x000000FF
+#define lpfc_mbx_read_vpi_acq_alpa_WORD		word4
+#define lpfc_mbx_read_vpi_pb_SHIFT		15
+#define lpfc_mbx_read_vpi_pb_MASK		0x00000001
+#define lpfc_mbx_read_vpi_pb_WORD		word4
+#define lpfc_mbx_read_vpi_spec_alpa_SHIFT	16
+#define lpfc_mbx_read_vpi_spec_alpa_MASK	0x000000FF
+#define lpfc_mbx_read_vpi_spec_alpa_WORD	word4
+#define lpfc_mbx_read_vpi_ns_SHIFT		30
+#define lpfc_mbx_read_vpi_ns_MASK		0x00000001
+#define lpfc_mbx_read_vpi_ns_WORD		word4
+#define lpfc_mbx_read_vpi_hl_SHIFT		31
+#define lpfc_mbx_read_vpi_hl_MASK		0x00000001
+#define lpfc_mbx_read_vpi_hl_WORD		word4
+	uint32_t word5_rsvd;
+	uint32_t word6;
+#define lpfc_mbx_read_vpi_vpi_SHIFT		0
+#define lpfc_mbx_read_vpi_vpi_MASK		0x0000FFFF
+#define lpfc_mbx_read_vpi_vpi_WORD		word6
+	uint32_t word7;
+#define lpfc_mbx_read_vpi_mac_0_SHIFT		0
+#define lpfc_mbx_read_vpi_mac_0_MASK		0x000000FF
+#define lpfc_mbx_read_vpi_mac_0_WORD		word7
+#define lpfc_mbx_read_vpi_mac_1_SHIFT		8
+#define lpfc_mbx_read_vpi_mac_1_MASK		0x000000FF
+#define lpfc_mbx_read_vpi_mac_1_WORD		word7
+#define lpfc_mbx_read_vpi_mac_2_SHIFT		16
+#define lpfc_mbx_read_vpi_mac_2_MASK		0x000000FF
+#define lpfc_mbx_read_vpi_mac_2_WORD		word7
+#define lpfc_mbx_read_vpi_mac_3_SHIFT		24
+#define lpfc_mbx_read_vpi_mac_3_MASK		0x000000FF
+#define lpfc_mbx_read_vpi_mac_3_WORD		word7
+	uint32_t word8;
+#define lpfc_mbx_read_vpi_mac_4_SHIFT		0
+#define lpfc_mbx_read_vpi_mac_4_MASK		0x000000FF
+#define lpfc_mbx_read_vpi_mac_4_WORD		word8
+#define lpfc_mbx_read_vpi_mac_5_SHIFT		8
+#define lpfc_mbx_read_vpi_mac_5_MASK		0x000000FF
+#define lpfc_mbx_read_vpi_mac_5_WORD		word8
+#define lpfc_mbx_read_vpi_vlan_tag_SHIFT	16
+#define lpfc_mbx_read_vpi_vlan_tag_MASK		0x00000FFF
+#define lpfc_mbx_read_vpi_vlan_tag_WORD		word8
+#define lpfc_mbx_read_vpi_vv_SHIFT		28
+#define lpfc_mbx_read_vpi_vv_MASK		0x0000001
+#define lpfc_mbx_read_vpi_vv_WORD		word8
+};
+
+struct lpfc_mbx_unreg_vfi {
+	uint32_t word1_rsvd;
+	uint32_t word2;
+#define lpfc_unreg_vfi_vfi_SHIFT	0
+#define lpfc_unreg_vfi_vfi_MASK		0x0000FFFF
+#define lpfc_unreg_vfi_vfi_WORD		word2
+};
+
+struct lpfc_mbx_resume_rpi {
+	uint32_t word1;
+#define lpfc_resume_rpi_rpi_SHIFT	0
+#define lpfc_resume_rpi_rpi_MASK	0x0000FFFF
+#define lpfc_resume_rpi_rpi_WORD	word1
+	uint32_t event_tag;
+	uint32_t word3_rsvd;
+	uint32_t word4_rsvd;
+	uint32_t word5_rsvd;
+	uint32_t word6;
+#define lpfc_resume_rpi_vpi_SHIFT	0
+#define lpfc_resume_rpi_vpi_MASK	0x0000FFFF
+#define lpfc_resume_rpi_vpi_WORD	word6
+#define lpfc_resume_rpi_vfi_SHIFT	16
+#define lpfc_resume_rpi_vfi_MASK	0x0000FFFF
+#define lpfc_resume_rpi_vfi_WORD	word6
+};
+
+#define REG_FCF_INVALID_QID	0xFFFF
+struct lpfc_mbx_reg_fcfi {
+	uint32_t word1;
+#define lpfc_reg_fcfi_info_index_SHIFT	0
+#define lpfc_reg_fcfi_info_index_MASK	0x0000FFFF
+#define lpfc_reg_fcfi_info_index_WORD	word1
+#define lpfc_reg_fcfi_fcfi_SHIFT	16
+#define lpfc_reg_fcfi_fcfi_MASK		0x0000FFFF
+#define lpfc_reg_fcfi_fcfi_WORD		word1
+	uint32_t word2;
+#define lpfc_reg_fcfi_rq_id1_SHIFT	0
+#define lpfc_reg_fcfi_rq_id1_MASK	0x0000FFFF
+#define lpfc_reg_fcfi_rq_id1_WORD	word2
+#define lpfc_reg_fcfi_rq_id0_SHIFT	16
+#define lpfc_reg_fcfi_rq_id0_MASK	0x0000FFFF
+#define lpfc_reg_fcfi_rq_id0_WORD	word2
+	uint32_t word3;
+#define lpfc_reg_fcfi_rq_id3_SHIFT	0
+#define lpfc_reg_fcfi_rq_id3_MASK	0x0000FFFF
+#define lpfc_reg_fcfi_rq_id3_WORD	word3
+#define lpfc_reg_fcfi_rq_id2_SHIFT	16
+#define lpfc_reg_fcfi_rq_id2_MASK	0x0000FFFF
+#define lpfc_reg_fcfi_rq_id2_WORD	word3
+	uint32_t word4;
+#define lpfc_reg_fcfi_type_match0_SHIFT	24
+#define lpfc_reg_fcfi_type_match0_MASK	0x000000FF
+#define lpfc_reg_fcfi_type_match0_WORD	word4
+#define lpfc_reg_fcfi_type_mask0_SHIFT	16
+#define lpfc_reg_fcfi_type_mask0_MASK	0x000000FF
+#define lpfc_reg_fcfi_type_mask0_WORD	word4
+#define lpfc_reg_fcfi_rctl_match0_SHIFT	8
+#define lpfc_reg_fcfi_rctl_match0_MASK	0x000000FF
+#define lpfc_reg_fcfi_rctl_match0_WORD	word4
+#define lpfc_reg_fcfi_rctl_mask0_SHIFT	0
+#define lpfc_reg_fcfi_rctl_mask0_MASK	0x000000FF
+#define lpfc_reg_fcfi_rctl_mask0_WORD	word4
+	uint32_t word5;
+#define lpfc_reg_fcfi_type_match1_SHIFT	24
+#define lpfc_reg_fcfi_type_match1_MASK	0x000000FF
+#define lpfc_reg_fcfi_type_match1_WORD	word5
+#define lpfc_reg_fcfi_type_mask1_SHIFT	16
+#define lpfc_reg_fcfi_type_mask1_MASK	0x000000FF
+#define lpfc_reg_fcfi_type_mask1_WORD	word5
+#define lpfc_reg_fcfi_rctl_match1_SHIFT	8
+#define lpfc_reg_fcfi_rctl_match1_MASK	0x000000FF
+#define lpfc_reg_fcfi_rctl_match1_WORD	word5
+#define lpfc_reg_fcfi_rctl_mask1_SHIFT	0
+#define lpfc_reg_fcfi_rctl_mask1_MASK	0x000000FF
+#define lpfc_reg_fcfi_rctl_mask1_WORD	word5
+	uint32_t word6;
+#define lpfc_reg_fcfi_type_match2_SHIFT	24
+#define lpfc_reg_fcfi_type_match2_MASK	0x000000FF
+#define lpfc_reg_fcfi_type_match2_WORD	word6
+#define lpfc_reg_fcfi_type_mask2_SHIFT	16
+#define lpfc_reg_fcfi_type_mask2_MASK	0x000000FF
+#define lpfc_reg_fcfi_type_mask2_WORD	word6
+#define lpfc_reg_fcfi_rctl_match2_SHIFT	8
+#define lpfc_reg_fcfi_rctl_match2_MASK	0x000000FF
+#define lpfc_reg_fcfi_rctl_match2_WORD	word6
+#define lpfc_reg_fcfi_rctl_mask2_SHIFT	0
+#define lpfc_reg_fcfi_rctl_mask2_MASK	0x000000FF
+#define lpfc_reg_fcfi_rctl_mask2_WORD	word6
+	uint32_t word7;
+#define lpfc_reg_fcfi_type_match3_SHIFT	24
+#define lpfc_reg_fcfi_type_match3_MASK	0x000000FF
+#define lpfc_reg_fcfi_type_match3_WORD	word7
+#define lpfc_reg_fcfi_type_mask3_SHIFT	16
+#define lpfc_reg_fcfi_type_mask3_MASK	0x000000FF
+#define lpfc_reg_fcfi_type_mask3_WORD	word7
+#define lpfc_reg_fcfi_rctl_match3_SHIFT	8
+#define lpfc_reg_fcfi_rctl_match3_MASK	0x000000FF
+#define lpfc_reg_fcfi_rctl_match3_WORD	word7
+#define lpfc_reg_fcfi_rctl_mask3_SHIFT	0
+#define lpfc_reg_fcfi_rctl_mask3_MASK	0x000000FF
+#define lpfc_reg_fcfi_rctl_mask3_WORD	word7
+	uint32_t word8;
+#define lpfc_reg_fcfi_mam_SHIFT		13
+#define lpfc_reg_fcfi_mam_MASK		0x00000003
+#define lpfc_reg_fcfi_mam_WORD		word8
+#define LPFC_MAM_BOTH		0	/* Both SPMA and FPMA */
+#define LPFC_MAM_SPMA		1	/* Server Provided MAC Address */
+#define LPFC_MAM_FPMA		2	/* Fabric Provided MAC Address */
+#define lpfc_reg_fcfi_vv_SHIFT		12
+#define lpfc_reg_fcfi_vv_MASK		0x00000001
+#define lpfc_reg_fcfi_vv_WORD		word8
+#define lpfc_reg_fcfi_vlan_tag_SHIFT	0
+#define lpfc_reg_fcfi_vlan_tag_MASK	0x00000FFF
+#define lpfc_reg_fcfi_vlan_tag_WORD	word8
+};
+
+struct lpfc_mbx_unreg_fcfi {
+	uint32_t word1_rsv;
+	uint32_t word2;
+#define lpfc_unreg_fcfi_SHIFT		0
+#define lpfc_unreg_fcfi_MASK		0x0000FFFF
+#define lpfc_unreg_fcfi_WORD		word2
+};
+
+struct lpfc_mbx_read_rev {
+	uint32_t word1;
+#define lpfc_mbx_rd_rev_sli_lvl_SHIFT  		16
+#define lpfc_mbx_rd_rev_sli_lvl_MASK   		0x0000000F
+#define lpfc_mbx_rd_rev_sli_lvl_WORD   		word1
+#define lpfc_mbx_rd_rev_fcoe_SHIFT		20
+#define lpfc_mbx_rd_rev_fcoe_MASK		0x00000001
+#define lpfc_mbx_rd_rev_fcoe_WORD		word1
+#define lpfc_mbx_rd_rev_vpd_SHIFT		29
+#define lpfc_mbx_rd_rev_vpd_MASK		0x00000001
+#define lpfc_mbx_rd_rev_vpd_WORD		word1
+	uint32_t first_hw_rev;
+	uint32_t second_hw_rev;
+	uint32_t word4_rsvd;
+	uint32_t third_hw_rev;
+	uint32_t word6;
+#define lpfc_mbx_rd_rev_fcph_low_SHIFT		0
+#define lpfc_mbx_rd_rev_fcph_low_MASK		0x000000FF
+#define lpfc_mbx_rd_rev_fcph_low_WORD		word6
+#define lpfc_mbx_rd_rev_fcph_high_SHIFT		8
+#define lpfc_mbx_rd_rev_fcph_high_MASK		0x000000FF
+#define lpfc_mbx_rd_rev_fcph_high_WORD		word6
+#define lpfc_mbx_rd_rev_ftr_lvl_low_SHIFT	16
+#define lpfc_mbx_rd_rev_ftr_lvl_low_MASK	0x000000FF
+#define lpfc_mbx_rd_rev_ftr_lvl_low_WORD	word6
+#define lpfc_mbx_rd_rev_ftr_lvl_high_SHIFT	24
+#define lpfc_mbx_rd_rev_ftr_lvl_high_MASK	0x000000FF
+#define lpfc_mbx_rd_rev_ftr_lvl_high_WORD	word6
+	uint32_t word7_rsvd;
+	uint32_t fw_id_rev;
+	uint8_t  fw_name[16];
+	uint32_t ulp_fw_id_rev;
+	uint8_t  ulp_fw_name[16];
+	uint32_t word18_47_rsvd[30];
+	uint32_t word48;
+#define lpfc_mbx_rd_rev_avail_len_SHIFT		0
+#define lpfc_mbx_rd_rev_avail_len_MASK		0x00FFFFFF
+#define lpfc_mbx_rd_rev_avail_len_WORD		word48
+	uint32_t vpd_paddr_low;
+	uint32_t vpd_paddr_high;
+	uint32_t avail_vpd_len;
+	uint32_t rsvd_52_63[12];
+};
+
+struct lpfc_mbx_read_config {
+	uint32_t word1;
+#define lpfc_mbx_rd_conf_max_bbc_SHIFT		0
+#define lpfc_mbx_rd_conf_max_bbc_MASK		0x000000FF
+#define lpfc_mbx_rd_conf_max_bbc_WORD		word1
+#define lpfc_mbx_rd_conf_init_bbc_SHIFT		8
+#define lpfc_mbx_rd_conf_init_bbc_MASK		0x000000FF
+#define lpfc_mbx_rd_conf_init_bbc_WORD		word1
+	uint32_t word2;
+#define lpfc_mbx_rd_conf_nport_did_SHIFT	0
+#define lpfc_mbx_rd_conf_nport_did_MASK		0x00FFFFFF
+#define lpfc_mbx_rd_conf_nport_did_WORD		word2
+#define lpfc_mbx_rd_conf_topology_SHIFT		24
+#define lpfc_mbx_rd_conf_topology_MASK		0x000000FF
+#define lpfc_mbx_rd_conf_topology_WORD		word2
+	uint32_t word3;
+#define lpfc_mbx_rd_conf_ao_SHIFT		0
+#define lpfc_mbx_rd_conf_ao_MASK		0x00000001
+#define lpfc_mbx_rd_conf_ao_WORD		word3
+#define lpfc_mbx_rd_conf_bb_scn_SHIFT		8
+#define lpfc_mbx_rd_conf_bb_scn_MASK		0x0000000F
+#define lpfc_mbx_rd_conf_bb_scn_WORD		word3
+#define lpfc_mbx_rd_conf_cbb_scn_SHIFT		12
+#define lpfc_mbx_rd_conf_cbb_scn_MASK		0x0000000F
+#define lpfc_mbx_rd_conf_cbb_scn_WORD		word3
+#define lpfc_mbx_rd_conf_mc_SHIFT		29
+#define lpfc_mbx_rd_conf_mc_MASK		0x00000001
+#define lpfc_mbx_rd_conf_mc_WORD		word3
+	uint32_t word4;
+#define lpfc_mbx_rd_conf_e_d_tov_SHIFT		0
+#define lpfc_mbx_rd_conf_e_d_tov_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_e_d_tov_WORD		word4
+	uint32_t word5;
+#define lpfc_mbx_rd_conf_lp_tov_SHIFT		0
+#define lpfc_mbx_rd_conf_lp_tov_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_lp_tov_WORD		word5
+	uint32_t word6;
+#define lpfc_mbx_rd_conf_r_a_tov_SHIFT		0
+#define lpfc_mbx_rd_conf_r_a_tov_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_r_a_tov_WORD		word6
+	uint32_t word7;
+#define lpfc_mbx_rd_conf_r_t_tov_SHIFT		0
+#define lpfc_mbx_rd_conf_r_t_tov_MASK		0x000000FF
+#define lpfc_mbx_rd_conf_r_t_tov_WORD		word7
+	uint32_t word8;
+#define lpfc_mbx_rd_conf_al_tov_SHIFT		0
+#define lpfc_mbx_rd_conf_al_tov_MASK		0x0000000F
+#define lpfc_mbx_rd_conf_al_tov_WORD		word8
+	uint32_t word9;
+#define lpfc_mbx_rd_conf_lmt_SHIFT		0
+#define lpfc_mbx_rd_conf_lmt_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_lmt_WORD		word9
+	uint32_t word10;
+#define lpfc_mbx_rd_conf_max_alpa_SHIFT		0
+#define lpfc_mbx_rd_conf_max_alpa_MASK		0x000000FF
+#define lpfc_mbx_rd_conf_max_alpa_WORD		word10
+	uint32_t word11_rsvd;
+	uint32_t word12;
+#define lpfc_mbx_rd_conf_xri_base_SHIFT		0
+#define lpfc_mbx_rd_conf_xri_base_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_xri_base_WORD		word12
+#define lpfc_mbx_rd_conf_xri_count_SHIFT	16
+#define lpfc_mbx_rd_conf_xri_count_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_xri_count_WORD		word12
+	uint32_t word13;
+#define lpfc_mbx_rd_conf_rpi_base_SHIFT		0
+#define lpfc_mbx_rd_conf_rpi_base_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_rpi_base_WORD		word13
+#define lpfc_mbx_rd_conf_rpi_count_SHIFT	16
+#define lpfc_mbx_rd_conf_rpi_count_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_rpi_count_WORD		word13
+	uint32_t word14;
+#define lpfc_mbx_rd_conf_vpi_base_SHIFT		0
+#define lpfc_mbx_rd_conf_vpi_base_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_vpi_base_WORD		word14
+#define lpfc_mbx_rd_conf_vpi_count_SHIFT	16
+#define lpfc_mbx_rd_conf_vpi_count_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_vpi_count_WORD		word14
+	uint32_t word15;
+#define lpfc_mbx_rd_conf_vfi_base_SHIFT         0
+#define lpfc_mbx_rd_conf_vfi_base_MASK          0x0000FFFF
+#define lpfc_mbx_rd_conf_vfi_base_WORD          word15
+#define lpfc_mbx_rd_conf_vfi_count_SHIFT        16
+#define lpfc_mbx_rd_conf_vfi_count_MASK         0x0000FFFF
+#define lpfc_mbx_rd_conf_vfi_count_WORD         word15
+	uint32_t word16;
+#define lpfc_mbx_rd_conf_fcfi_base_SHIFT	0
+#define lpfc_mbx_rd_conf_fcfi_base_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_fcfi_base_WORD		word16
+#define lpfc_mbx_rd_conf_fcfi_count_SHIFT	16
+#define lpfc_mbx_rd_conf_fcfi_count_MASK	0x0000FFFF
+#define lpfc_mbx_rd_conf_fcfi_count_WORD	word16
+	uint32_t word17;
+#define lpfc_mbx_rd_conf_rq_count_SHIFT		0
+#define lpfc_mbx_rd_conf_rq_count_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_rq_count_WORD		word17
+#define lpfc_mbx_rd_conf_eq_count_SHIFT		16
+#define lpfc_mbx_rd_conf_eq_count_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_eq_count_WORD		word17
+	uint32_t word18;
+#define lpfc_mbx_rd_conf_wq_count_SHIFT		0
+#define lpfc_mbx_rd_conf_wq_count_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_wq_count_WORD		word18
+#define lpfc_mbx_rd_conf_cq_count_SHIFT		16
+#define lpfc_mbx_rd_conf_cq_count_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_cq_count_WORD		word18
+};
+
+struct lpfc_mbx_request_features {
+	uint32_t word1;
+#define lpfc_mbx_rq_ftr_qry_SHIFT		0
+#define lpfc_mbx_rq_ftr_qry_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_qry_WORD		word1
+	uint32_t word2;
+#define lpfc_mbx_rq_ftr_rq_iaab_SHIFT		0
+#define lpfc_mbx_rq_ftr_rq_iaab_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_iaab_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_npiv_SHIFT		1
+#define lpfc_mbx_rq_ftr_rq_npiv_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_npiv_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_dif_SHIFT		2
+#define lpfc_mbx_rq_ftr_rq_dif_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_dif_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_vf_SHIFT		3
+#define lpfc_mbx_rq_ftr_rq_vf_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_vf_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_fcpi_SHIFT		4
+#define lpfc_mbx_rq_ftr_rq_fcpi_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_fcpi_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_fcpt_SHIFT		5
+#define lpfc_mbx_rq_ftr_rq_fcpt_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_fcpt_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_fcpc_SHIFT		6
+#define lpfc_mbx_rq_ftr_rq_fcpc_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_fcpc_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_ifip_SHIFT		7
+#define lpfc_mbx_rq_ftr_rq_ifip_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_ifip_WORD		word2
+	uint32_t word3;
+#define lpfc_mbx_rq_ftr_rsp_iaab_SHIFT		0
+#define lpfc_mbx_rq_ftr_rsp_iaab_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_iaab_WORD		word3
+#define lpfc_mbx_rq_ftr_rsp_npiv_SHIFT		1
+#define lpfc_mbx_rq_ftr_rsp_npiv_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_npiv_WORD		word3
+#define lpfc_mbx_rq_ftr_rsp_dif_SHIFT		2
+#define lpfc_mbx_rq_ftr_rsp_dif_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_dif_WORD		word3
+#define lpfc_mbx_rq_ftr_rsp_vf_SHIFT		3
+#define lpfc_mbx_rq_ftr_rsp_vf__MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_vf_WORD		word3
+#define lpfc_mbx_rq_ftr_rsp_fcpi_SHIFT		4
+#define lpfc_mbx_rq_ftr_rsp_fcpi_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_fcpi_WORD		word3
+#define lpfc_mbx_rq_ftr_rsp_fcpt_SHIFT		5
+#define lpfc_mbx_rq_ftr_rsp_fcpt_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_fcpt_WORD		word3
+#define lpfc_mbx_rq_ftr_rsp_fcpc_SHIFT		6
+#define lpfc_mbx_rq_ftr_rsp_fcpc_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_fcpc_WORD		word3
+#define lpfc_mbx_rq_ftr_rsp_ifip_SHIFT		7
+#define lpfc_mbx_rq_ftr_rsp_ifip_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_ifip_WORD		word3
+};
+
+/* Mailbox Completion Queue Error Messages */
+#define MB_CQE_STATUS_SUCCESS 			0x0
+#define MB_CQE_STATUS_INSUFFICIENT_PRIVILEGES	0x1
+#define MB_CQE_STATUS_INVALID_PARAMETER		0x2
+#define MB_CQE_STATUS_INSUFFICIENT_RESOURCES	0x3
+#define MB_CEQ_STATUS_QUEUE_FLUSHING		0x4
+#define MB_CQE_STATUS_DMA_FAILED		0x5
+
+/* mailbox queue entry structure */
+struct lpfc_mqe {
+	uint32_t word0;
+#define lpfc_mqe_status_SHIFT		16
+#define lpfc_mqe_status_MASK		0x0000FFFF
+#define lpfc_mqe_status_WORD		word0
+#define lpfc_mqe_command_SHIFT		8
+#define lpfc_mqe_command_MASK		0x000000FF
+#define lpfc_mqe_command_WORD		word0
+	union {
+		uint32_t mb_words[LPFC_SLI4_MB_WORD_COUNT - 1];
+		/* sli4 mailbox commands */
+		struct lpfc_mbx_sli4_config sli4_config;
+		struct lpfc_mbx_init_vfi init_vfi;
+		struct lpfc_mbx_reg_vfi reg_vfi;
+		struct lpfc_mbx_reg_vfi unreg_vfi;
+		struct lpfc_mbx_init_vpi init_vpi;
+		struct lpfc_mbx_resume_rpi resume_rpi;
+		struct lpfc_mbx_read_fcf_tbl read_fcf_tbl;
+		struct lpfc_mbx_add_fcf_tbl_entry add_fcf_entry;
+		struct lpfc_mbx_del_fcf_tbl_entry del_fcf_entry;
+		struct lpfc_mbx_reg_fcfi reg_fcfi;
+		struct lpfc_mbx_unreg_fcfi unreg_fcfi;
+		struct lpfc_mbx_mq_create mq_create;
+		struct lpfc_mbx_eq_create eq_create;
+		struct lpfc_mbx_cq_create cq_create;
+		struct lpfc_mbx_wq_create wq_create;
+		struct lpfc_mbx_rq_create rq_create;
+		struct lpfc_mbx_mq_destroy mq_destroy;
+		struct lpfc_mbx_eq_destroy eq_destroy;
+		struct lpfc_mbx_cq_destroy cq_destroy;
+		struct lpfc_mbx_wq_destroy wq_destroy;
+		struct lpfc_mbx_rq_destroy rq_destroy;
+		struct lpfc_mbx_post_sgl_pages post_sgl_pages;
+		struct lpfc_mbx_nembed_cmd nembed_cmd;
+		struct lpfc_mbx_read_rev read_rev;
+		struct lpfc_mbx_read_vpi read_vpi;
+		struct lpfc_mbx_read_config rd_config;
+		struct lpfc_mbx_request_features req_ftrs;
+		struct lpfc_mbx_post_hdr_tmpl hdr_tmpl;
+		struct lpfc_mbx_nop nop;
+	} un;
+};
+
+struct lpfc_mcqe {
+	uint32_t word0;
+#define lpfc_mcqe_status_SHIFT		0
+#define lpfc_mcqe_status_MASK		0x0000FFFF
+#define lpfc_mcqe_status_WORD		word0
+#define lpfc_mcqe_ext_status_SHIFT	16
+#define lpfc_mcqe_ext_status_MASK  	0x0000FFFF
+#define lpfc_mcqe_ext_status_WORD 	word0
+	uint32_t mcqe_tag0;
+	uint32_t mcqe_tag1;
+	uint32_t trailer;
+#define lpfc_trailer_valid_SHIFT	31
+#define lpfc_trailer_valid_MASK		0x00000001
+#define lpfc_trailer_valid_WORD		trailer
+#define lpfc_trailer_async_SHIFT	30
+#define lpfc_trailer_async_MASK		0x00000001
+#define lpfc_trailer_async_WORD		trailer
+#define lpfc_trailer_hpi_SHIFT		29
+#define lpfc_trailer_hpi_MASK		0x00000001
+#define lpfc_trailer_hpi_WORD		trailer
+#define lpfc_trailer_completed_SHIFT	28
+#define lpfc_trailer_completed_MASK	0x00000001
+#define lpfc_trailer_completed_WORD	trailer
+#define lpfc_trailer_consumed_SHIFT	27
+#define lpfc_trailer_consumed_MASK	0x00000001
+#define lpfc_trailer_consumed_WORD	trailer
+#define lpfc_trailer_type_SHIFT		16
+#define lpfc_trailer_type_MASK		0x000000FF
+#define lpfc_trailer_type_WORD		trailer
+#define lpfc_trailer_code_SHIFT		8
+#define lpfc_trailer_code_MASK		0x000000FF
+#define lpfc_trailer_code_WORD		trailer
+#define LPFC_TRAILER_CODE_LINK	0x1
+#define LPFC_TRAILER_CODE_FCOE	0x2
+#define LPFC_TRAILER_CODE_DCBX	0x3
+};
+
+struct lpfc_acqe_link {
+	uint32_t word0;
+#define lpfc_acqe_link_speed_SHIFT		24
+#define lpfc_acqe_link_speed_MASK		0x000000FF
+#define lpfc_acqe_link_speed_WORD		word0
+#define LPFC_ASYNC_LINK_SPEED_ZERO		0x0
+#define LPFC_ASYNC_LINK_SPEED_10MBPS		0x1
+#define LPFC_ASYNC_LINK_SPEED_100MBPS		0x2
+#define LPFC_ASYNC_LINK_SPEED_1GBPS		0x3
+#define LPFC_ASYNC_LINK_SPEED_10GBPS		0x4
+#define lpfc_acqe_link_duplex_SHIFT		16
+#define lpfc_acqe_link_duplex_MASK		0x000000FF
+#define lpfc_acqe_link_duplex_WORD		word0
+#define LPFC_ASYNC_LINK_DUPLEX_NONE		0x0
+#define LPFC_ASYNC_LINK_DUPLEX_HALF		0x1
+#define LPFC_ASYNC_LINK_DUPLEX_FULL		0x2
+#define lpfc_acqe_link_status_SHIFT		8
+#define lpfc_acqe_link_status_MASK		0x000000FF
+#define lpfc_acqe_link_status_WORD		word0
+#define LPFC_ASYNC_LINK_STATUS_DOWN		0x0
+#define LPFC_ASYNC_LINK_STATUS_UP		0x1
+#define LPFC_ASYNC_LINK_STATUS_LOGICAL_DOWN	0x2
+#define LPFC_ASYNC_LINK_STATUS_LOGICAL_UP	0x3
+#define lpfc_acqe_link_physical_SHIFT		0
+#define lpfc_acqe_link_physical_MASK		0x000000FF
+#define lpfc_acqe_link_physical_WORD		word0
+#define LPFC_ASYNC_LINK_PORT_A			0x0
+#define LPFC_ASYNC_LINK_PORT_B			0x1
+	uint32_t word1;
+#define lpfc_acqe_link_fault_SHIFT	0
+#define lpfc_acqe_link_fault_MASK	0x000000FF
+#define lpfc_acqe_link_fault_WORD	word1
+#define LPFC_ASYNC_LINK_FAULT_NONE	0x0
+#define LPFC_ASYNC_LINK_FAULT_LOCAL	0x1
+#define LPFC_ASYNC_LINK_FAULT_REMOTE	0x2
+	uint32_t event_tag;
+	uint32_t trailer;
+};
+
+struct lpfc_acqe_fcoe {
+	uint32_t fcf_index;
+	uint32_t word1;
+#define lpfc_acqe_fcoe_fcf_count_SHIFT		0
+#define lpfc_acqe_fcoe_fcf_count_MASK		0x0000FFFF
+#define lpfc_acqe_fcoe_fcf_count_WORD		word1
+#define lpfc_acqe_fcoe_event_type_SHIFT		16
+#define lpfc_acqe_fcoe_event_type_MASK		0x0000FFFF
+#define lpfc_acqe_fcoe_event_type_WORD		word1
+#define LPFC_FCOE_EVENT_TYPE_NEW_FCF		0x1
+#define LPFC_FCOE_EVENT_TYPE_FCF_TABLE_FULL	0x2
+#define LPFC_FCOE_EVENT_TYPE_FCF_DEAD		0x3
+	uint32_t event_tag;
+	uint32_t trailer;
+};
+
+struct lpfc_acqe_dcbx {
+	uint32_t tlv_ttl;
+	uint32_t reserved;
+	uint32_t event_tag;
+	uint32_t trailer;
+};
+
+/*
+ * Define the bootstrap mailbox (bmbx) region used to communicate
+ * mailbox command between the host and port. The mailbox consists
+ * of a payload area of 256 bytes and a completion queue of length
+ * 16 bytes.
+ */
+struct lpfc_bmbx_create {
+	struct lpfc_mqe mqe;
+	struct lpfc_mcqe mcqe;
+};
+
+#define SGL_ALIGN_SZ 64
+#define SGL_PAGE_SIZE 4096
+/* align SGL addr on a size boundary - adjust address up */
+#define NO_XRI ((uint16_t)-1)
+struct wqe_common {
+	uint32_t word6;
+#define wqe_xri_SHIFT         0
+#define wqe_xri_MASK          0x0000FFFF
+#define wqe_xri_WORD          word6
+#define wqe_ctxt_tag_SHIFT    16
+#define wqe_ctxt_tag_MASK     0x0000FFFF
+#define wqe_ctxt_tag_WORD     word6
+	uint32_t word7;
+#define wqe_ct_SHIFT          2
+#define wqe_ct_MASK           0x00000003
+#define wqe_ct_WORD           word7
+#define wqe_status_SHIFT      4
+#define wqe_status_MASK       0x0000000f
+#define wqe_status_WORD       word7
+#define wqe_cmnd_SHIFT        8
+#define wqe_cmnd_MASK         0x000000ff
+#define wqe_cmnd_WORD         word7
+#define wqe_class_SHIFT       16
+#define wqe_class_MASK        0x00000007
+#define wqe_class_WORD        word7
+#define wqe_pu_SHIFT          20
+#define wqe_pu_MASK           0x00000003
+#define wqe_pu_WORD           word7
+#define wqe_erp_SHIFT         22
+#define wqe_erp_MASK          0x00000001
+#define wqe_erp_WORD          word7
+#define wqe_lnk_SHIFT         23
+#define wqe_lnk_MASK          0x00000001
+#define wqe_lnk_WORD          word7
+#define wqe_tmo_SHIFT         24
+#define wqe_tmo_MASK          0x000000ff
+#define wqe_tmo_WORD          word7
+	uint32_t abort_tag; /* word 8 in WQE */
+	uint32_t word9;
+#define wqe_reqtag_SHIFT      0
+#define wqe_reqtag_MASK       0x0000FFFF
+#define wqe_reqtag_WORD       word9
+#define wqe_rcvoxid_SHIFT     16
+#define wqe_rcvoxid_MASK       0x0000FFFF
+#define wqe_rcvoxid_WORD       word9
+	uint32_t word10;
+#define wqe_pri_SHIFT         16
+#define wqe_pri_MASK          0x00000007
+#define wqe_pri_WORD          word10
+#define wqe_pv_SHIFT          19
+#define wqe_pv_MASK           0x00000001
+#define wqe_pv_WORD           word10
+#define wqe_xc_SHIFT          21
+#define wqe_xc_MASK           0x00000001
+#define wqe_xc_WORD           word10
+#define wqe_ccpe_SHIFT        23
+#define wqe_ccpe_MASK         0x00000001
+#define wqe_ccpe_WORD         word10
+#define wqe_ccp_SHIFT         24
+#define wqe_ccp_MASK         0x000000ff
+#define wqe_ccp_WORD         word10
+	uint32_t word11;
+#define wqe_cmd_type_SHIFT  0
+#define wqe_cmd_type_MASK   0x0000000f
+#define wqe_cmd_type_WORD   word11
+#define wqe_wqec_SHIFT      7
+#define wqe_wqec_MASK       0x00000001
+#define wqe_wqec_WORD       word11
+#define wqe_cqid_SHIFT      16
+#define wqe_cqid_MASK       0x000003ff
+#define wqe_cqid_WORD       word11
+};
+
+struct wqe_did {
+	uint32_t word5;
+#define wqe_els_did_SHIFT         0
+#define wqe_els_did_MASK          0x00FFFFFF
+#define wqe_els_did_WORD          word5
+#define wqe_xmit_bls_ar_SHIFT         30
+#define wqe_xmit_bls_ar_MASK          0x00000001
+#define wqe_xmit_bls_ar_WORD          word5
+#define wqe_xmit_bls_xo_SHIFT         31
+#define wqe_xmit_bls_xo_MASK          0x00000001
+#define wqe_xmit_bls_xo_WORD          word5
+};
+
+struct els_request64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t payload_len;
+	uint32_t word4;
+#define els_req64_sid_SHIFT         0
+#define els_req64_sid_MASK          0x00FFFFFF
+#define els_req64_sid_WORD          word4
+#define els_req64_sp_SHIFT          24
+#define els_req64_sp_MASK           0x00000001
+#define els_req64_sp_WORD           word4
+#define els_req64_vf_SHIFT          25
+#define els_req64_vf_MASK           0x00000001
+#define els_req64_vf_WORD           word4
+	struct wqe_did	wqe_dest;
+	struct wqe_common wqe_com; /* words 6-11 */
+	uint32_t word12;
+#define els_req64_vfid_SHIFT        1
+#define els_req64_vfid_MASK         0x00000FFF
+#define els_req64_vfid_WORD         word12
+#define els_req64_pri_SHIFT         13
+#define els_req64_pri_MASK          0x00000007
+#define els_req64_pri_WORD          word12
+	uint32_t word13;
+#define els_req64_hopcnt_SHIFT      24
+#define els_req64_hopcnt_MASK       0x000000ff
+#define els_req64_hopcnt_WORD       word13
+	uint32_t reserved[2];
+};
+
+struct xmit_els_rsp64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t rsvd3;
+	uint32_t rsvd4;
+	struct wqe_did	wqe_dest;
+	struct wqe_common wqe_com; /* words 6-11 */
+	uint32_t rsvd_12_15[4];
+};
+
+struct xmit_bls_rsp64_wqe {
+	uint32_t payload0;
+	uint32_t word1;
+#define xmit_bls_rsp64_rxid_SHIFT  0
+#define xmit_bls_rsp64_rxid_MASK   0x0000ffff
+#define xmit_bls_rsp64_rxid_WORD   word1
+#define xmit_bls_rsp64_oxid_SHIFT  16
+#define xmit_bls_rsp64_oxid_MASK   0x0000ffff
+#define xmit_bls_rsp64_oxid_WORD   word1
+	uint32_t word2;
+#define xmit_bls_rsp64_seqcntlo_SHIFT  0
+#define xmit_bls_rsp64_seqcntlo_MASK   0x0000ffff
+#define xmit_bls_rsp64_seqcntlo_WORD   word2
+#define xmit_bls_rsp64_seqcnthi_SHIFT  16
+#define xmit_bls_rsp64_seqcnthi_MASK   0x0000ffff
+#define xmit_bls_rsp64_seqcnthi_WORD   word2
+	uint32_t rsrvd3;
+	uint32_t rsrvd4;
+	struct wqe_did	wqe_dest;
+	struct wqe_common wqe_com; /* words 6-11 */
+	uint32_t rsvd_12_15[4];
+};
+struct wqe_rctl_dfctl {
+	uint32_t word5;
+#define wqe_si_SHIFT 2
+#define wqe_si_MASK  0x000000001
+#define wqe_si_WORD  word5
+#define wqe_la_SHIFT 3
+#define wqe_la_MASK  0x000000001
+#define wqe_la_WORD  word5
+#define wqe_ls_SHIFT 7
+#define wqe_ls_MASK  0x000000001
+#define wqe_ls_WORD  word5
+#define wqe_dfctl_SHIFT 8
+#define wqe_dfctl_MASK  0x0000000ff
+#define wqe_dfctl_WORD  word5
+#define wqe_type_SHIFT 16
+#define wqe_type_MASK  0x0000000ff
+#define wqe_type_WORD  word5
+#define wqe_rctl_SHIFT 24
+#define wqe_rctl_MASK  0x0000000ff
+#define wqe_rctl_WORD  word5
+};
+
+struct xmit_seq64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t paylaod_offset;
+	uint32_t relative_offset;
+	struct wqe_rctl_dfctl wge_ctl;
+	struct wqe_common wqe_com; /* words 6-11 */
+	/* Note: word10 different REVISIT */
+	uint32_t xmit_len;
+	uint32_t rsvd_12_15[3];
+};
+struct xmit_bcast64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t paylaod_len;
+	uint32_t rsvd4;
+	struct wqe_rctl_dfctl wge_ctl; /* word 5 */
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t rsvd_12_15[4];
+};
+
+struct gen_req64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t command_len;
+	uint32_t payload_len;
+	struct wqe_rctl_dfctl wge_ctl; /* word 5 */
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t rsvd_12_15[4];
+};
+
+struct create_xri_wqe {
+	uint32_t rsrvd[5];           /* words 0-4 */
+	struct wqe_did	wqe_dest;  /* word 5 */
+	struct wqe_common wqe_com; /* words 6-11 */
+	uint32_t rsvd_12_15[4];         /* word 12-15 */
+};
+
+#define T_REQUEST_TAG 3
+#define T_XRI_TAG 1
+
+struct abort_cmd_wqe {
+	uint32_t rsrvd[3];
+	uint32_t word3;
+#define	abort_cmd_ia_SHIFT  0
+#define	abort_cmd_ia_MASK  0x000000001
+#define	abort_cmd_ia_WORD  word3
+#define	abort_cmd_criteria_SHIFT  8
+#define	abort_cmd_criteria_MASK  0x0000000ff
+#define	abort_cmd_criteria_WORD  word3
+	uint32_t rsrvd4;
+	uint32_t rsrvd5;
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t rsvd_12_15[4];         /* word 12-15 */
+};
+
+struct fcp_iwrite64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t payload_len;
+	uint32_t total_xfer_len;
+	uint32_t initial_xfer_len;
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t rsvd_12_15[4];         /* word 12-15 */
+};
+
+struct fcp_iread64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t payload_len;          /* word 3 */
+	uint32_t total_xfer_len;       /* word 4 */
+	uint32_t rsrvd5;               /* word 5 */
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t rsvd_12_15[4];         /* word 12-15 */
+};
+
+struct fcp_icmnd64_wqe {
+	struct ulp_bde64 bde;	 /* words 0-2 */
+	uint32_t rsrvd[3];             /* words 3-5 */
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t rsvd_12_15[4];         /* word 12-15 */
+};
+
+
+union lpfc_wqe {
+	uint32_t words[16];
+	struct lpfc_wqe_generic generic;
+	struct fcp_icmnd64_wqe fcp_icmd;
+	struct fcp_iread64_wqe fcp_iread;
+	struct fcp_iwrite64_wqe fcp_iwrite;
+	struct abort_cmd_wqe abort_cmd;
+	struct create_xri_wqe create_xri;
+	struct xmit_bcast64_wqe xmit_bcast64;
+	struct xmit_seq64_wqe xmit_sequence;
+	struct xmit_bls_rsp64_wqe xmit_bls_rsp;
+	struct xmit_els_rsp64_wqe xmit_els_rsp;
+	struct els_request64_wqe els_req;
+	struct gen_req64_wqe gen_req;
+};
+
+#define FCP_COMMAND 0x0
+#define FCP_COMMAND_DATA_OUT 0x1
+#define ELS_COMMAND_NON_FIP 0xC
+#define ELS_COMMAND_FIP 0xD
+#define OTHER_COMMAND 0x8
+
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 86d1bdc..2f5907f 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -34,8 +34,10 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -51,9 +53,23 @@
 unsigned long _dump_buf_dif_order;
 spinlock_t _dump_buf_lock;
 
-static int lpfc_parse_vpd(struct lpfc_hba *, uint8_t *, int);
 static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *);
 static int lpfc_post_rcv_buf(struct lpfc_hba *);
+static int lpfc_sli4_queue_create(struct lpfc_hba *);
+static void lpfc_sli4_queue_destroy(struct lpfc_hba *);
+static int lpfc_create_bootstrap_mbox(struct lpfc_hba *);
+static int lpfc_setup_endian_order(struct lpfc_hba *);
+static int lpfc_sli4_read_config(struct lpfc_hba *);
+static void lpfc_destroy_bootstrap_mbox(struct lpfc_hba *);
+static void lpfc_free_sgl_list(struct lpfc_hba *);
+static int lpfc_init_sgl_list(struct lpfc_hba *);
+static int lpfc_init_active_sgl_array(struct lpfc_hba *);
+static void lpfc_free_active_sgl(struct lpfc_hba *);
+static int lpfc_hba_down_post_s3(struct lpfc_hba *phba);
+static int lpfc_hba_down_post_s4(struct lpfc_hba *phba);
+static int lpfc_sli4_cq_event_pool_create(struct lpfc_hba *);
+static void lpfc_sli4_cq_event_pool_destroy(struct lpfc_hba *);
+static void lpfc_sli4_cq_event_release_all(struct lpfc_hba *);
 
 static struct scsi_transport_template *lpfc_transport_template = NULL;
 static struct scsi_transport_template *lpfc_vport_transport_template = NULL;
@@ -92,7 +108,7 @@
 		return -ENOMEM;
 	}
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	phba->link_state = LPFC_INIT_MBX_CMDS;
 
 	if (lpfc_is_LC_HBA(phba->pcidev->device)) {
@@ -205,6 +221,11 @@
 					mb->mbxCommand, mb->mbxStatus);
 			mb->un.varDmp.word_cnt = 0;
 		}
+		/* dump mem may return a zero when finished or we got a
+		 * mailbox error, either way we are done.
+		 */
+		if (mb->un.varDmp.word_cnt == 0)
+			break;
 		if (mb->un.varDmp.word_cnt > DMP_VPD_SIZE - offset)
 			mb->un.varDmp.word_cnt = DMP_VPD_SIZE - offset;
 		lpfc_sli_pcimem_bcopy(((uint8_t *)mb) + DMP_RSP_OFFSET,
@@ -233,7 +254,7 @@
 static void
 lpfc_config_async_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq)
 {
-	if (pmboxq->mb.mbxStatus == MBX_SUCCESS)
+	if (pmboxq->u.mb.mbxStatus == MBX_SUCCESS)
 		phba->temp_sensor_support = 1;
 	else
 		phba->temp_sensor_support = 0;
@@ -260,7 +281,7 @@
 	/* character array used for decoding dist type. */
 	char dist_char[] = "nabx";
 
-	if (pmboxq->mb.mbxStatus != MBX_SUCCESS) {
+	if (pmboxq->u.mb.mbxStatus != MBX_SUCCESS) {
 		mempool_free(pmboxq, phba->mbox_mem_pool);
 		return;
 	}
@@ -268,7 +289,7 @@
 	prg = (struct prog_id *) &prog_id_word;
 
 	/* word 7 contain option rom version */
-	prog_id_word = pmboxq->mb.un.varWords[7];
+	prog_id_word = pmboxq->u.mb.un.varWords[7];
 
 	/* Decode the Option rom version word to a readable string */
 	if (prg->dist < 4)
@@ -325,7 +346,7 @@
 		phba->link_state = LPFC_HBA_ERROR;
 		return -ENOMEM;
 	}
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 
 	/* Get login parameters for NID.  */
 	lpfc_read_sparam(phba, pmb, 0);
@@ -364,6 +385,7 @@
 	/* Update the fc_host data structures with new wwn. */
 	fc_host_node_name(shost) = wwn_to_u64(vport->fc_nodename.u.wwn);
 	fc_host_port_name(shost) = wwn_to_u64(vport->fc_portname.u.wwn);
+	fc_host_max_npiv_vports(shost) = phba->max_vpi;
 
 	/* If no serial number in VPD data, use low 6 bytes of WWNN */
 	/* This should be consolidated into parse_vpd ? - mr */
@@ -460,17 +482,18 @@
 			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
 					"0352 Config MSI mailbox command "
 					"failed, mbxCmd x%x, mbxStatus x%x\n",
-					pmb->mb.mbxCommand, pmb->mb.mbxStatus);
+					pmb->u.mb.mbxCommand,
+					pmb->u.mb.mbxStatus);
 			mempool_free(pmb, phba->mbox_mem_pool);
 			return -EIO;
 		}
 	}
 
+	spin_lock_irq(&phba->hbalock);
 	/* Initialize ERATT handling flag */
 	phba->hba_flag &= ~HBA_ERATT_HANDLED;
 
 	/* Enable appropriate host interrupts */
-	spin_lock_irq(&phba->hbalock);
 	status = readl(phba->HCregaddr);
 	status |= HC_MBINT_ENA | HC_ERINT_ENA | HC_LAINT_ENA;
 	if (psli->num_rings > 0)
@@ -571,16 +594,20 @@
 {
 	struct lpfc_vport **vports;
 	int i;
-	/* Disable interrupts */
-	writel(0, phba->HCregaddr);
-	readl(phba->HCregaddr); /* flush */
+
+	if (phba->sli_rev <= LPFC_SLI_REV3) {
+		/* Disable interrupts */
+		writel(0, phba->HCregaddr);
+		readl(phba->HCregaddr); /* flush */
+	}
 
 	if (phba->pport->load_flag & FC_UNLOADING)
 		lpfc_cleanup_discovery_resources(phba->pport);
 	else {
 		vports = lpfc_create_vport_work_array(phba);
 		if (vports != NULL)
-			for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++)
+			for (i = 0; i <= phba->max_vports &&
+				vports[i] != NULL; i++)
 				lpfc_cleanup_discovery_resources(vports[i]);
 		lpfc_destroy_vport_work_array(phba, vports);
 	}
@@ -588,7 +615,7 @@
 }
 
 /**
- * lpfc_hba_down_post - Perform lpfc uninitialization after HBA reset
+ * lpfc_hba_down_post_s3 - Perform lpfc uninitialization after HBA reset
  * @phba: pointer to lpfc HBA data structure.
  *
  * This routine will do uninitialization after the HBA is reset when bring
@@ -598,8 +625,8 @@
  *   0 - sucess.
  *   Any other value - error.
  **/
-int
-lpfc_hba_down_post(struct lpfc_hba *phba)
+static int
+lpfc_hba_down_post_s3(struct lpfc_hba *phba)
 {
 	struct lpfc_sli *psli = &phba->sli;
 	struct lpfc_sli_ring *pring;
@@ -642,6 +669,77 @@
 
 	return 0;
 }
+/**
+ * lpfc_hba_down_post_s4 - Perform lpfc uninitialization after HBA reset
+ * @phba: pointer to lpfc HBA data structure.
+ *
+ * This routine will do uninitialization after the HBA is reset when bring
+ * down the SLI Layer.
+ *
+ * Return codes
+ *   0 - sucess.
+ *   Any other value - error.
+ **/
+static int
+lpfc_hba_down_post_s4(struct lpfc_hba *phba)
+{
+	struct lpfc_scsi_buf *psb, *psb_next;
+	LIST_HEAD(aborts);
+	int ret;
+	unsigned long iflag = 0;
+	ret = lpfc_hba_down_post_s3(phba);
+	if (ret)
+		return ret;
+	/* At this point in time the HBA is either reset or DOA. Either
+	 * way, nothing should be on lpfc_abts_els_sgl_list, it needs to be
+	 * on the lpfc_sgl_list so that it can either be freed if the
+	 * driver is unloading or reposted if the driver is restarting
+	 * the port.
+	 */
+	spin_lock_irq(&phba->hbalock);  /* required for lpfc_sgl_list and */
+					/* scsl_buf_list */
+	/* abts_sgl_list_lock required because worker thread uses this
+	 * list.
+	 */
+	spin_lock(&phba->sli4_hba.abts_sgl_list_lock);
+	list_splice_init(&phba->sli4_hba.lpfc_abts_els_sgl_list,
+			&phba->sli4_hba.lpfc_sgl_list);
+	spin_unlock(&phba->sli4_hba.abts_sgl_list_lock);
+	/* abts_scsi_buf_list_lock required because worker thread uses this
+	 * list.
+	 */
+	spin_lock(&phba->sli4_hba.abts_scsi_buf_list_lock);
+	list_splice_init(&phba->sli4_hba.lpfc_abts_scsi_buf_list,
+			&aborts);
+	spin_unlock(&phba->sli4_hba.abts_scsi_buf_list_lock);
+	spin_unlock_irq(&phba->hbalock);
+
+	list_for_each_entry_safe(psb, psb_next, &aborts, list) {
+		psb->pCmd = NULL;
+		psb->status = IOSTAT_SUCCESS;
+	}
+	spin_lock_irqsave(&phba->scsi_buf_list_lock, iflag);
+	list_splice(&aborts, &phba->lpfc_scsi_buf_list);
+	spin_unlock_irqrestore(&phba->scsi_buf_list_lock, iflag);
+	return 0;
+}
+
+/**
+ * lpfc_hba_down_post - Wrapper func for hba down post routine
+ * @phba: pointer to lpfc HBA data structure.
+ *
+ * This routine wraps the actual SLI3 or SLI4 routine for performing
+ * uninitialization after the HBA is reset when bring down the SLI Layer.
+ *
+ * Return codes
+ *   0 - sucess.
+ *   Any other value - error.
+ **/
+int
+lpfc_hba_down_post(struct lpfc_hba *phba)
+{
+	return (*phba->lpfc_hba_down_post)(phba);
+}
 
 /**
  * lpfc_hb_timeout - The HBA-timer timeout handler
@@ -809,7 +907,7 @@
 					"taking this port offline.\n");
 
 			spin_lock_irq(&phba->hbalock);
-			psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+			psli->sli_flag &= ~LPFC_SLI_ACTIVE;
 			spin_unlock_irq(&phba->hbalock);
 
 			lpfc_offline_prep(phba);
@@ -834,13 +932,15 @@
 	struct lpfc_sli   *psli = &phba->sli;
 
 	spin_lock_irq(&phba->hbalock);
-	psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+	psli->sli_flag &= ~LPFC_SLI_ACTIVE;
 	spin_unlock_irq(&phba->hbalock);
 	lpfc_offline_prep(phba);
 
 	lpfc_offline(phba);
 	lpfc_reset_barrier(phba);
+	spin_lock_irq(&phba->hbalock);
 	lpfc_sli_brdreset(phba);
+	spin_unlock_irq(&phba->hbalock);
 	lpfc_hba_down_post(phba);
 	lpfc_sli_brdready(phba, HS_MBRDY);
 	lpfc_unblock_mgmt_io(phba);
@@ -849,6 +949,25 @@
 }
 
 /**
+ * lpfc_sli4_offline_eratt - Bring lpfc offline on SLI4 hardware error attention
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is called to bring a SLI4 HBA offline when HBA hardware error
+ * other than Port Error 6 has been detected.
+ **/
+static void
+lpfc_sli4_offline_eratt(struct lpfc_hba *phba)
+{
+	lpfc_offline_prep(phba);
+	lpfc_offline(phba);
+	lpfc_sli4_brdreset(phba);
+	lpfc_hba_down_post(phba);
+	lpfc_sli4_post_status_check(phba);
+	lpfc_unblock_mgmt_io(phba);
+	phba->link_state = LPFC_HBA_ERROR;
+}
+
+/**
  * lpfc_handle_deferred_eratt - The HBA hardware deferred error handler
  * @phba: pointer to lpfc hba data structure.
  *
@@ -864,6 +983,16 @@
 	struct lpfc_sli_ring  *pring;
 	struct lpfc_sli *psli = &phba->sli;
 
+	/* If the pci channel is offline, ignore possible errors,
+	 * since we cannot communicate with the pci card anyway.
+	 */
+	if (pci_channel_offline(phba->pcidev)) {
+		spin_lock_irq(&phba->hbalock);
+		phba->hba_flag &= ~DEFER_ERATT;
+		spin_unlock_irq(&phba->hbalock);
+		return;
+	}
+
 	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 		"0479 Deferred Adapter Hardware Error "
 		"Data: x%x x%x x%x\n",
@@ -871,7 +1000,7 @@
 		phba->work_status[0], phba->work_status[1]);
 
 	spin_lock_irq(&phba->hbalock);
-	psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+	psli->sli_flag &= ~LPFC_SLI_ACTIVE;
 	spin_unlock_irq(&phba->hbalock);
 
 
@@ -909,13 +1038,30 @@
 	if ((!phba->work_hs) && (!(phba->pport->load_flag & FC_UNLOADING)))
 		phba->work_hs = old_host_status & ~HS_FFER1;
 
+	spin_lock_irq(&phba->hbalock);
 	phba->hba_flag &= ~DEFER_ERATT;
+	spin_unlock_irq(&phba->hbalock);
 	phba->work_status[0] = readl(phba->MBslimaddr + 0xa8);
 	phba->work_status[1] = readl(phba->MBslimaddr + 0xac);
 }
 
+static void
+lpfc_board_errevt_to_mgmt(struct lpfc_hba *phba)
+{
+	struct lpfc_board_event_header board_event;
+	struct Scsi_Host *shost;
+
+	board_event.event_type = FC_REG_BOARD_EVENT;
+	board_event.subcategory = LPFC_EVENT_PORTINTERR;
+	shost = lpfc_shost_from_vport(phba->pport);
+	fc_host_post_vendor_event(shost, fc_get_event_number(),
+				  sizeof(board_event),
+				  (char *) &board_event,
+				  LPFC_NL_VENDOR_ID);
+}
+
 /**
- * lpfc_handle_eratt - The HBA hardware error handler
+ * lpfc_handle_eratt_s3 - The SLI3 HBA hardware error handler
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine is invoked to handle the following HBA hardware error
@@ -924,8 +1070,8 @@
  * 2 - DMA ring index out of range
  * 3 - Mailbox command came back as unknown
  **/
-void
-lpfc_handle_eratt(struct lpfc_hba *phba)
+static void
+lpfc_handle_eratt_s3(struct lpfc_hba *phba)
 {
 	struct lpfc_vport *vport = phba->pport;
 	struct lpfc_sli   *psli = &phba->sli;
@@ -934,24 +1080,23 @@
 	unsigned long temperature;
 	struct temp_event temp_event_data;
 	struct Scsi_Host  *shost;
-	struct lpfc_board_event_header board_event;
 
 	/* If the pci channel is offline, ignore possible errors,
-	 * since we cannot communicate with the pci card anyway. */
-	if (pci_channel_offline(phba->pcidev))
+	 * since we cannot communicate with the pci card anyway.
+	 */
+	if (pci_channel_offline(phba->pcidev)) {
+		spin_lock_irq(&phba->hbalock);
+		phba->hba_flag &= ~DEFER_ERATT;
+		spin_unlock_irq(&phba->hbalock);
 		return;
+	}
+
 	/* If resets are disabled then leave the HBA alone and return */
 	if (!phba->cfg_enable_hba_reset)
 		return;
 
 	/* Send an internal error event to mgmt application */
-	board_event.event_type = FC_REG_BOARD_EVENT;
-	board_event.subcategory = LPFC_EVENT_PORTINTERR;
-	shost = lpfc_shost_from_vport(phba->pport);
-	fc_host_post_vendor_event(shost, fc_get_event_number(),
-				  sizeof(board_event),
-				  (char *) &board_event,
-				  LPFC_NL_VENDOR_ID);
+	lpfc_board_errevt_to_mgmt(phba);
 
 	if (phba->hba_flag & DEFER_ERATT)
 		lpfc_handle_deferred_eratt(phba);
@@ -965,7 +1110,7 @@
 				phba->work_status[0], phba->work_status[1]);
 
 		spin_lock_irq(&phba->hbalock);
-		psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+		psli->sli_flag &= ~LPFC_SLI_ACTIVE;
 		spin_unlock_irq(&phba->hbalock);
 
 		/*
@@ -1037,6 +1182,65 @@
 }
 
 /**
+ * lpfc_handle_eratt_s4 - The SLI4 HBA hardware error handler
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to handle the SLI4 HBA hardware error attention
+ * conditions.
+ **/
+static void
+lpfc_handle_eratt_s4(struct lpfc_hba *phba)
+{
+	struct lpfc_vport *vport = phba->pport;
+	uint32_t event_data;
+	struct Scsi_Host *shost;
+
+	/* If the pci channel is offline, ignore possible errors, since
+	 * we cannot communicate with the pci card anyway.
+	 */
+	if (pci_channel_offline(phba->pcidev))
+		return;
+	/* If resets are disabled then leave the HBA alone and return */
+	if (!phba->cfg_enable_hba_reset)
+		return;
+
+	/* Send an internal error event to mgmt application */
+	lpfc_board_errevt_to_mgmt(phba);
+
+	/* For now, the actual action for SLI4 device handling is not
+	 * specified yet, just treated it as adaptor hardware failure
+	 */
+	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"0143 SLI4 Adapter Hardware Error Data: x%x x%x\n",
+			phba->work_status[0], phba->work_status[1]);
+
+	event_data = FC_REG_DUMP_EVENT;
+	shost = lpfc_shost_from_vport(vport);
+	fc_host_post_vendor_event(shost, fc_get_event_number(),
+				  sizeof(event_data), (char *) &event_data,
+				  SCSI_NL_VID_TYPE_PCI | PCI_VENDOR_ID_EMULEX);
+
+	lpfc_sli4_offline_eratt(phba);
+}
+
+/**
+ * lpfc_handle_eratt - Wrapper func for handling hba error attention
+ * @phba: pointer to lpfc HBA data structure.
+ *
+ * This routine wraps the actual SLI3 or SLI4 hba error attention handling
+ * routine from the API jump table function pointer from the lpfc_hba struct.
+ *
+ * Return codes
+ *   0 - sucess.
+ *   Any other value - error.
+ **/
+void
+lpfc_handle_eratt(struct lpfc_hba *phba)
+{
+	(*phba->lpfc_handle_eratt)(phba);
+}
+
+/**
  * lpfc_handle_latt - The HBA link event handler
  * @phba: pointer to lpfc hba data structure.
  *
@@ -1137,7 +1341,7 @@
  *   0 - pointer to the VPD passed in is NULL
  *   1 - success
  **/
-static int
+int
 lpfc_parse_vpd(struct lpfc_hba *phba, uint8_t *vpd, int len)
 {
 	uint8_t lenlo, lenhi;
@@ -1292,6 +1496,7 @@
 	uint16_t dev_id = phba->pcidev->device;
 	int max_speed;
 	int GE = 0;
+	int oneConnect = 0; /* default is not a oneConnect */
 	struct {
 		char * name;
 		int    max_speed;
@@ -1437,6 +1642,14 @@
 	case PCI_DEVICE_ID_PROTEUS_S:
 		m = (typeof(m)) {"LPemv12002-S", max_speed, "PCIe IOV"};
 		break;
+	case PCI_DEVICE_ID_TIGERSHARK:
+		oneConnect = 1;
+		m = (typeof(m)) {"OCe10100-F", max_speed, "PCIe"};
+		break;
+	case PCI_DEVICE_ID_TIGERSHARK_S:
+		oneConnect = 1;
+		m = (typeof(m)) {"OCe10100-F-S", max_speed, "PCIe"};
+		break;
 	default:
 		m = (typeof(m)){ NULL };
 		break;
@@ -1444,13 +1657,24 @@
 
 	if (mdp && mdp[0] == '\0')
 		snprintf(mdp, 79,"%s", m.name);
-	if (descp && descp[0] == '\0')
-		snprintf(descp, 255,
-			"Emulex %s %d%s %s %s",
-			m.name, m.max_speed,
-			(GE) ? "GE" : "Gb",
-			m.bus,
-			(GE) ? "FCoE Adapter" : "Fibre Channel Adapter");
+	/* oneConnect hba requires special processing, they are all initiators
+	 * and we put the port number on the end
+	 */
+	if (descp && descp[0] == '\0') {
+		if (oneConnect)
+			snprintf(descp, 255,
+				"Emulex OneConnect %s, FCoE Initiator, Port %s",
+				m.name,
+				phba->Port);
+		else
+			snprintf(descp, 255,
+				"Emulex %s %d%s %s %s",
+				m.name, m.max_speed,
+				(GE) ? "GE" : "Gb",
+				m.bus,
+				(GE) ? "FCoE Adapter" :
+					"Fibre Channel Adapter");
+	}
 }
 
 /**
@@ -1533,7 +1757,8 @@
 		icmd->ulpCommand = CMD_QUE_RING_BUF64_CN;
 		icmd->ulpLe = 1;
 
-		if (lpfc_sli_issue_iocb(phba, pring, iocb, 0) == IOCB_ERROR) {
+		if (lpfc_sli_issue_iocb(phba, pring->ringno, iocb, 0) ==
+		    IOCB_ERROR) {
 			lpfc_mbuf_free(phba, mp1->virt, mp1->phys);
 			kfree(mp1);
 			cnt++;
@@ -1761,7 +1986,6 @@
 	 * Lets wait for this to happen, if needed.
 	 */
 	while (!list_empty(&vport->fc_nodes)) {
-
 		if (i++ > 3000) {
 			lpfc_printf_vlog(vport, KERN_ERR, LOG_DISCOVERY,
 				"0233 Nodelist not empty\n");
@@ -1782,7 +2006,6 @@
 		/* Wait for any activity on ndlps to settle */
 		msleep(10);
 	}
-	return;
 }
 
 /**
@@ -1803,22 +2026,36 @@
 }
 
 /**
- * lpfc_stop_phba_timers - Stop all the timers associated with an HBA
+ * lpfc_stop_hba_timers - Stop all the timers associated with an HBA
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine stops all the timers associated with a HBA. This function is
  * invoked before either putting a HBA offline or unloading the driver.
  **/
-static void
-lpfc_stop_phba_timers(struct lpfc_hba *phba)
+void
+lpfc_stop_hba_timers(struct lpfc_hba *phba)
 {
-	del_timer_sync(&phba->fcp_poll_timer);
 	lpfc_stop_vport_timers(phba->pport);
 	del_timer_sync(&phba->sli.mbox_tmo);
 	del_timer_sync(&phba->fabric_block_timer);
-	phba->hb_outstanding = 0;
-	del_timer_sync(&phba->hb_tmofunc);
 	del_timer_sync(&phba->eratt_poll);
+	del_timer_sync(&phba->hb_tmofunc);
+	phba->hb_outstanding = 0;
+
+	switch (phba->pci_dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		/* Stop any LightPulse device specific driver timers */
+		del_timer_sync(&phba->fcp_poll_timer);
+		break;
+	case LPFC_PCI_DEV_OC:
+		/* Stop any OneConnect device sepcific driver timers */
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0297 Invalid device group (x%x)\n",
+				phba->pci_dev_grp);
+		break;
+	}
 	return;
 }
 
@@ -1878,14 +2115,21 @@
 		return 1;
 	}
 
-	if (lpfc_sli_hba_setup(phba)) {	/* Initialize the HBA */
-		lpfc_unblock_mgmt_io(phba);
-		return 1;
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		if (lpfc_sli4_hba_setup(phba)) { /* Initialize SLI4 HBA */
+			lpfc_unblock_mgmt_io(phba);
+			return 1;
+		}
+	} else {
+		if (lpfc_sli_hba_setup(phba)) {	/* Initialize SLI2/SLI3 HBA */
+			lpfc_unblock_mgmt_io(phba);
+			return 1;
+		}
 	}
 
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			struct Scsi_Host *shost;
 			shost = lpfc_shost_from_vport(vports[i]);
 			spin_lock_irq(shost->host_lock);
@@ -1947,11 +2191,12 @@
 	/* Issue an unreg_login to all nodes on all vports */
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL) {
-		for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			struct Scsi_Host *shost;
 
 			if (vports[i]->load_flag & FC_UNLOADING)
 				continue;
+			vports[i]->vfi_state &= ~LPFC_VFI_REGISTERED;
 			shost =	lpfc_shost_from_vport(vports[i]);
 			list_for_each_entry_safe(ndlp, next_ndlp,
 						 &vports[i]->fc_nodes,
@@ -1975,7 +2220,7 @@
 	}
 	lpfc_destroy_vport_work_array(phba, vports);
 
-	lpfc_sli_flush_mbox_queue(phba);
+	lpfc_sli_mbox_sys_shutdown(phba);
 }
 
 /**
@@ -1996,11 +2241,11 @@
 	if (phba->pport->fc_flag & FC_OFFLINE_MODE)
 		return;
 
-	/* stop all timers associated with this hba */
-	lpfc_stop_phba_timers(phba);
+	/* stop port and all timers associated with this hba */
+	lpfc_stop_port(phba);
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++)
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++)
 			lpfc_stop_vport_timers(vports[i]);
 	lpfc_destroy_vport_work_array(phba, vports);
 	lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
@@ -2013,7 +2258,7 @@
 	spin_unlock_irq(&phba->hbalock);
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			shost = lpfc_shost_from_vport(vports[i]);
 			spin_lock_irq(shost->host_lock);
 			vports[i]->work_port_events = 0;
@@ -2106,6 +2351,10 @@
 	shost->max_lun = vport->cfg_max_luns;
 	shost->this_id = -1;
 	shost->max_cmd_len = 16;
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		shost->dma_boundary = LPFC_SLI4_MAX_SEGMENT_SIZE;
+		shost->sg_tablesize = phba->cfg_sg_seg_cnt;
+	}
 
 	/*
 	 * Set initial can_queue value since 0 is no longer supported and
@@ -2123,6 +2372,7 @@
 
 	/* Initialize all internally managed lists. */
 	INIT_LIST_HEAD(&vport->fc_nodes);
+	INIT_LIST_HEAD(&vport->rcv_buffer_list);
 	spin_lock_init(&vport->work_port_lock);
 
 	init_timer(&vport->fc_disctmo);
@@ -2314,15 +2564,3461 @@
 }
 
 /**
- * lpfc_enable_msix - Enable MSI-X interrupt mode
+ * lpfc_stop_port_s3 - Stop SLI3 device port
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to enable the MSI-X interrupt vectors. The kernel
- * function pci_enable_msix() is called to enable the MSI-X vectors. Note that
- * pci_enable_msix(), once invoked, enables either all or nothing, depending
- * on the current availability of PCI vector resources. The device driver is
- * responsible for calling the individual request_irq() to register each MSI-X
- * vector with a interrupt handler, which is done in this function. Note that
+ * This routine is invoked to stop an SLI3 device port, it stops the device
+ * from generating interrupts and stops the device driver's timers for the
+ * device.
+ **/
+static void
+lpfc_stop_port_s3(struct lpfc_hba *phba)
+{
+	/* Clear all interrupt enable conditions */
+	writel(0, phba->HCregaddr);
+	readl(phba->HCregaddr); /* flush */
+	/* Clear all pending interrupts */
+	writel(0xffffffff, phba->HAregaddr);
+	readl(phba->HAregaddr); /* flush */
+
+	/* Reset some HBA SLI setup states */
+	lpfc_stop_hba_timers(phba);
+	phba->pport->work_port_events = 0;
+}
+
+/**
+ * lpfc_stop_port_s4 - Stop SLI4 device port
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to stop an SLI4 device port, it stops the device
+ * from generating interrupts and stops the device driver's timers for the
+ * device.
+ **/
+static void
+lpfc_stop_port_s4(struct lpfc_hba *phba)
+{
+	/* Reset some HBA SLI4 setup states */
+	lpfc_stop_hba_timers(phba);
+	phba->pport->work_port_events = 0;
+	phba->sli4_hba.intr_enable = 0;
+	/* Hard clear it for now, shall have more graceful way to wait later */
+	phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+}
+
+/**
+ * lpfc_stop_port - Wrapper function for stopping hba port
+ * @phba: Pointer to HBA context object.
+ *
+ * This routine wraps the actual SLI3 or SLI4 hba stop port routine from
+ * the API jump table function pointer from the lpfc_hba struct.
+ **/
+void
+lpfc_stop_port(struct lpfc_hba *phba)
+{
+	phba->lpfc_stop_port(phba);
+}
+
+/**
+ * lpfc_sli4_remove_dflt_fcf - Remove the driver default fcf record from the port.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to remove the driver default fcf record from
+ * the port.  This routine currently acts on FCF Index 0.
+ *
+ **/
+void
+lpfc_sli_remove_dflt_fcf(struct lpfc_hba *phba)
+{
+	int rc = 0;
+	LPFC_MBOXQ_t *mboxq;
+	struct lpfc_mbx_del_fcf_tbl_entry *del_fcf_record;
+	uint32_t mbox_tmo, req_len;
+	uint32_t shdr_status, shdr_add_status;
+
+	mboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"2020 Failed to allocate mbox for ADD_FCF cmd\n");
+		return;
+	}
+
+	req_len = sizeof(struct lpfc_mbx_del_fcf_tbl_entry) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr);
+	rc = lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_FCOE,
+			      LPFC_MBOX_OPCODE_FCOE_DELETE_FCF,
+			      req_len, LPFC_SLI4_MBX_EMBED);
+	/*
+	 * In phase 1, there is a single FCF index, 0.  In phase2, the driver
+	 * supports multiple FCF indices.
+	 */
+	del_fcf_record = &mboxq->u.mqe.un.del_fcf_entry;
+	bf_set(lpfc_mbx_del_fcf_tbl_count, del_fcf_record, 1);
+	bf_set(lpfc_mbx_del_fcf_tbl_index, del_fcf_record,
+	       phba->fcf.fcf_indx);
+
+	if (!phba->sli4_hba.intr_enable)
+		rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	else {
+		mbox_tmo = lpfc_mbox_tmo_val(phba, MBX_SLI4_CONFIG);
+		rc = lpfc_sli_issue_mbox_wait(phba, mboxq, mbox_tmo);
+	}
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr_status = bf_get(lpfc_mbox_hdr_status,
+			     &del_fcf_record->header.cfg_shdr.response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status,
+				 &del_fcf_record->header.cfg_shdr.response);
+	if (shdr_status || shdr_add_status || rc != MBX_SUCCESS) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2516 DEL FCF of default FCF Index failed "
+				"mbx status x%x, status x%x add_status x%x\n",
+				rc, shdr_status, shdr_add_status);
+	}
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mboxq, phba->mbox_mem_pool);
+}
+
+/**
+ * lpfc_sli4_parse_latt_fault - Parse sli4 link-attention link fault code
+ * @phba: pointer to lpfc hba data structure.
+ * @acqe_link: pointer to the async link completion queue entry.
+ *
+ * This routine is to parse the SLI4 link-attention link fault code and
+ * translate it into the base driver's read link attention mailbox command
+ * status.
+ *
+ * Return: Link-attention status in terms of base driver's coding.
+ **/
+static uint16_t
+lpfc_sli4_parse_latt_fault(struct lpfc_hba *phba,
+			   struct lpfc_acqe_link *acqe_link)
+{
+	uint16_t latt_fault;
+
+	switch (bf_get(lpfc_acqe_link_fault, acqe_link)) {
+	case LPFC_ASYNC_LINK_FAULT_NONE:
+	case LPFC_ASYNC_LINK_FAULT_LOCAL:
+	case LPFC_ASYNC_LINK_FAULT_REMOTE:
+		latt_fault = 0;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0398 Invalid link fault code: x%x\n",
+				bf_get(lpfc_acqe_link_fault, acqe_link));
+		latt_fault = MBXERR_ERROR;
+		break;
+	}
+	return latt_fault;
+}
+
+/**
+ * lpfc_sli4_parse_latt_type - Parse sli4 link attention type
+ * @phba: pointer to lpfc hba data structure.
+ * @acqe_link: pointer to the async link completion queue entry.
+ *
+ * This routine is to parse the SLI4 link attention type and translate it
+ * into the base driver's link attention type coding.
+ *
+ * Return: Link attention type in terms of base driver's coding.
+ **/
+static uint8_t
+lpfc_sli4_parse_latt_type(struct lpfc_hba *phba,
+			  struct lpfc_acqe_link *acqe_link)
+{
+	uint8_t att_type;
+
+	switch (bf_get(lpfc_acqe_link_status, acqe_link)) {
+	case LPFC_ASYNC_LINK_STATUS_DOWN:
+	case LPFC_ASYNC_LINK_STATUS_LOGICAL_DOWN:
+		att_type = AT_LINK_DOWN;
+		break;
+	case LPFC_ASYNC_LINK_STATUS_UP:
+		/* Ignore physical link up events - wait for logical link up */
+		att_type = AT_RESERVED;
+		break;
+	case LPFC_ASYNC_LINK_STATUS_LOGICAL_UP:
+		att_type = AT_LINK_UP;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0399 Invalid link attention type: x%x\n",
+				bf_get(lpfc_acqe_link_status, acqe_link));
+		att_type = AT_RESERVED;
+		break;
+	}
+	return att_type;
+}
+
+/**
+ * lpfc_sli4_parse_latt_link_speed - Parse sli4 link-attention link speed
+ * @phba: pointer to lpfc hba data structure.
+ * @acqe_link: pointer to the async link completion queue entry.
+ *
+ * This routine is to parse the SLI4 link-attention link speed and translate
+ * it into the base driver's link-attention link speed coding.
+ *
+ * Return: Link-attention link speed in terms of base driver's coding.
+ **/
+static uint8_t
+lpfc_sli4_parse_latt_link_speed(struct lpfc_hba *phba,
+				struct lpfc_acqe_link *acqe_link)
+{
+	uint8_t link_speed;
+
+	switch (bf_get(lpfc_acqe_link_speed, acqe_link)) {
+	case LPFC_ASYNC_LINK_SPEED_ZERO:
+		link_speed = LA_UNKNW_LINK;
+		break;
+	case LPFC_ASYNC_LINK_SPEED_10MBPS:
+		link_speed = LA_UNKNW_LINK;
+		break;
+	case LPFC_ASYNC_LINK_SPEED_100MBPS:
+		link_speed = LA_UNKNW_LINK;
+		break;
+	case LPFC_ASYNC_LINK_SPEED_1GBPS:
+		link_speed = LA_1GHZ_LINK;
+		break;
+	case LPFC_ASYNC_LINK_SPEED_10GBPS:
+		link_speed = LA_10GHZ_LINK;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0483 Invalid link-attention link speed: x%x\n",
+				bf_get(lpfc_acqe_link_speed, acqe_link));
+		link_speed = LA_UNKNW_LINK;
+		break;
+	}
+	return link_speed;
+}
+
+/**
+ * lpfc_sli4_async_link_evt - Process the asynchronous link event
+ * @phba: pointer to lpfc hba data structure.
+ * @acqe_link: pointer to the async link completion queue entry.
+ *
+ * This routine is to handle the SLI4 asynchronous link event.
+ **/
+static void
+lpfc_sli4_async_link_evt(struct lpfc_hba *phba,
+			 struct lpfc_acqe_link *acqe_link)
+{
+	struct lpfc_dmabuf *mp;
+	LPFC_MBOXQ_t *pmb;
+	MAILBOX_t *mb;
+	READ_LA_VAR *la;
+	uint8_t att_type;
+
+	att_type = lpfc_sli4_parse_latt_type(phba, acqe_link);
+	if (att_type != AT_LINK_DOWN && att_type != AT_LINK_UP)
+		return;
+	pmb = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!pmb) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0395 The mboxq allocation failed\n");
+		return;
+	}
+	mp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (!mp) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0396 The lpfc_dmabuf allocation failed\n");
+		goto out_free_pmb;
+	}
+	mp->virt = lpfc_mbuf_alloc(phba, 0, &mp->phys);
+	if (!mp->virt) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0397 The mbuf allocation failed\n");
+		goto out_free_dmabuf;
+	}
+
+	/* Cleanup any outstanding ELS commands */
+	lpfc_els_flush_all_cmd(phba);
+
+	/* Block ELS IOCBs until we have done process link event */
+	phba->sli.ring[LPFC_ELS_RING].flag |= LPFC_STOP_IOCB_EVENT;
+
+	/* Update link event statistics */
+	phba->sli.slistat.link_event++;
+
+	/* Create pseudo lpfc_handle_latt mailbox command from link ACQE */
+	lpfc_read_la(phba, pmb, mp);
+	pmb->vport = phba->pport;
+
+	/* Parse and translate status field */
+	mb = &pmb->u.mb;
+	mb->mbxStatus = lpfc_sli4_parse_latt_fault(phba, acqe_link);
+
+	/* Parse and translate link attention fields */
+	la = (READ_LA_VAR *) &pmb->u.mb.un.varReadLA;
+	la->eventTag = acqe_link->event_tag;
+	la->attType = att_type;
+	la->UlnkSpeed = lpfc_sli4_parse_latt_link_speed(phba, acqe_link);
+
+	/* Fake the the following irrelvant fields */
+	la->topology = TOPOLOGY_PT_PT;
+	la->granted_AL_PA = 0;
+	la->il = 0;
+	la->pb = 0;
+	la->fa = 0;
+	la->mm = 0;
+
+	/* Keep the link status for extra SLI4 state machine reference */
+	phba->sli4_hba.link_state.speed =
+				bf_get(lpfc_acqe_link_speed, acqe_link);
+	phba->sli4_hba.link_state.duplex =
+				bf_get(lpfc_acqe_link_duplex, acqe_link);
+	phba->sli4_hba.link_state.status =
+				bf_get(lpfc_acqe_link_status, acqe_link);
+	phba->sli4_hba.link_state.physical =
+				bf_get(lpfc_acqe_link_physical, acqe_link);
+	phba->sli4_hba.link_state.fault =
+				bf_get(lpfc_acqe_link_fault, acqe_link);
+
+	/* Invoke the lpfc_handle_latt mailbox command callback function */
+	lpfc_mbx_cmpl_read_la(phba, pmb);
+
+	return;
+
+out_free_dmabuf:
+	kfree(mp);
+out_free_pmb:
+	mempool_free(pmb, phba->mbox_mem_pool);
+}
+
+/**
+ * lpfc_sli4_async_fcoe_evt - Process the asynchronous fcoe event
+ * @phba: pointer to lpfc hba data structure.
+ * @acqe_link: pointer to the async fcoe completion queue entry.
+ *
+ * This routine is to handle the SLI4 asynchronous fcoe event.
+ **/
+static void
+lpfc_sli4_async_fcoe_evt(struct lpfc_hba *phba,
+			 struct lpfc_acqe_fcoe *acqe_fcoe)
+{
+	uint8_t event_type = bf_get(lpfc_acqe_fcoe_event_type, acqe_fcoe);
+	int rc;
+
+	switch (event_type) {
+	case LPFC_FCOE_EVENT_TYPE_NEW_FCF:
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+			"2546 New FCF found index 0x%x tag 0x%x \n",
+			acqe_fcoe->fcf_index,
+			acqe_fcoe->event_tag);
+		/*
+		 * If the current FCF is in discovered state,
+		 * do nothing.
+		 */
+		spin_lock_irq(&phba->hbalock);
+		if (phba->fcf.fcf_flag & FCF_DISCOVERED) {
+			spin_unlock_irq(&phba->hbalock);
+			break;
+		}
+		spin_unlock_irq(&phba->hbalock);
+
+		/* Read the FCF table and re-discover SAN. */
+		rc = lpfc_sli4_read_fcf_record(phba,
+			LPFC_FCOE_FCF_GET_FIRST);
+		if (rc)
+			lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+				"2547 Read FCF record failed 0x%x\n",
+				rc);
+		break;
+
+	case LPFC_FCOE_EVENT_TYPE_FCF_TABLE_FULL:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+			"2548 FCF Table full count 0x%x tag 0x%x \n",
+			bf_get(lpfc_acqe_fcoe_fcf_count, acqe_fcoe),
+			acqe_fcoe->event_tag);
+		break;
+
+	case LPFC_FCOE_EVENT_TYPE_FCF_DEAD:
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+			"2549 FCF disconnected fron network index 0x%x"
+			" tag 0x%x \n", acqe_fcoe->fcf_index,
+			acqe_fcoe->event_tag);
+		/* If the event is not for currently used fcf do nothing */
+		if (phba->fcf.fcf_indx != acqe_fcoe->fcf_index)
+			break;
+		/*
+		 * Currently, driver support only one FCF - so treat this as
+		 * a link down.
+		 */
+		lpfc_linkdown(phba);
+		/* Unregister FCF if no devices connected to it */
+		lpfc_unregister_unused_fcf(phba);
+		break;
+
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+			"0288 Unknown FCoE event type 0x%x event tag "
+			"0x%x\n", event_type, acqe_fcoe->event_tag);
+		break;
+	}
+}
+
+/**
+ * lpfc_sli4_async_dcbx_evt - Process the asynchronous dcbx event
+ * @phba: pointer to lpfc hba data structure.
+ * @acqe_link: pointer to the async dcbx completion queue entry.
+ *
+ * This routine is to handle the SLI4 asynchronous dcbx event.
+ **/
+static void
+lpfc_sli4_async_dcbx_evt(struct lpfc_hba *phba,
+			 struct lpfc_acqe_dcbx *acqe_dcbx)
+{
+	lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+			"0290 The SLI4 DCBX asynchronous event is not "
+			"handled yet\n");
+}
+
+/**
+ * lpfc_sli4_async_event_proc - Process all the pending asynchronous event
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked by the worker thread to process all the pending
+ * SLI4 asynchronous events.
+ **/
+void lpfc_sli4_async_event_proc(struct lpfc_hba *phba)
+{
+	struct lpfc_cq_event *cq_event;
+
+	/* First, declare the async event has been handled */
+	spin_lock_irq(&phba->hbalock);
+	phba->hba_flag &= ~ASYNC_EVENT;
+	spin_unlock_irq(&phba->hbalock);
+	/* Now, handle all the async events */
+	while (!list_empty(&phba->sli4_hba.sp_asynce_work_queue)) {
+		/* Get the first event from the head of the event queue */
+		spin_lock_irq(&phba->hbalock);
+		list_remove_head(&phba->sli4_hba.sp_asynce_work_queue,
+				 cq_event, struct lpfc_cq_event, list);
+		spin_unlock_irq(&phba->hbalock);
+		/* Process the asynchronous event */
+		switch (bf_get(lpfc_trailer_code, &cq_event->cqe.mcqe_cmpl)) {
+		case LPFC_TRAILER_CODE_LINK:
+			lpfc_sli4_async_link_evt(phba,
+						 &cq_event->cqe.acqe_link);
+			break;
+		case LPFC_TRAILER_CODE_FCOE:
+			lpfc_sli4_async_fcoe_evt(phba,
+						 &cq_event->cqe.acqe_fcoe);
+			break;
+		case LPFC_TRAILER_CODE_DCBX:
+			lpfc_sli4_async_dcbx_evt(phba,
+						 &cq_event->cqe.acqe_dcbx);
+			break;
+		default:
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"1804 Invalid asynchrous event code: "
+					"x%x\n", bf_get(lpfc_trailer_code,
+					&cq_event->cqe.mcqe_cmpl));
+			break;
+		}
+		/* Free the completion event processed to the free pool */
+		lpfc_sli4_cq_event_release(phba, cq_event);
+	}
+}
+
+/**
+ * lpfc_api_table_setup - Set up per hba pci-device group func api jump table
+ * @phba: pointer to lpfc hba data structure.
+ * @dev_grp: The HBA PCI-Device group number.
+ *
+ * This routine is invoked to set up the per HBA PCI-Device group function
+ * API jump table entries.
+ *
+ * Return: 0 if success, otherwise -ENODEV
+ **/
+int
+lpfc_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
+{
+	int rc;
+
+	/* Set up lpfc PCI-device group */
+	phba->pci_dev_grp = dev_grp;
+
+	/* The LPFC_PCI_DEV_OC uses SLI4 */
+	if (dev_grp == LPFC_PCI_DEV_OC)
+		phba->sli_rev = LPFC_SLI_REV4;
+
+	/* Set up device INIT API function jump table */
+	rc = lpfc_init_api_table_setup(phba, dev_grp);
+	if (rc)
+		return -ENODEV;
+	/* Set up SCSI API function jump table */
+	rc = lpfc_scsi_api_table_setup(phba, dev_grp);
+	if (rc)
+		return -ENODEV;
+	/* Set up SLI API function jump table */
+	rc = lpfc_sli_api_table_setup(phba, dev_grp);
+	if (rc)
+		return -ENODEV;
+	/* Set up MBOX API function jump table */
+	rc = lpfc_mbox_api_table_setup(phba, dev_grp);
+	if (rc)
+		return -ENODEV;
+
+	return 0;
+}
+
+/**
+ * lpfc_log_intr_mode - Log the active interrupt mode
+ * @phba: pointer to lpfc hba data structure.
+ * @intr_mode: active interrupt mode adopted.
+ *
+ * This routine it invoked to log the currently used active interrupt mode
+ * to the device.
+ **/
+static void lpfc_log_intr_mode(struct lpfc_hba *phba, uint32_t intr_mode)
+{
+	switch (intr_mode) {
+	case 0:
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0470 Enable INTx interrupt mode.\n");
+		break;
+	case 1:
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0481 Enabled MSI interrupt mode.\n");
+		break;
+	case 2:
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0480 Enabled MSI-X interrupt mode.\n");
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0482 Illegal interrupt mode.\n");
+		break;
+	}
+	return;
+}
+
+/**
+ * lpfc_enable_pci_dev - Enable a generic PCI device.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to enable the PCI device that is common to all
+ * PCI devices.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ **/
+static int
+lpfc_enable_pci_dev(struct lpfc_hba *phba)
+{
+	struct pci_dev *pdev;
+	int bars;
+
+	/* Obtain PCI device reference */
+	if (!phba->pcidev)
+		goto out_error;
+	else
+		pdev = phba->pcidev;
+	/* Select PCI BARs */
+	bars = pci_select_bars(pdev, IORESOURCE_MEM);
+	/* Enable PCI device */
+	if (pci_enable_device_mem(pdev))
+		goto out_error;
+	/* Request PCI resource for the device */
+	if (pci_request_selected_regions(pdev, bars, LPFC_DRIVER_NAME))
+		goto out_disable_device;
+	/* Set up device as PCI master and save state for EEH */
+	pci_set_master(pdev);
+	pci_try_set_mwi(pdev);
+	pci_save_state(pdev);
+
+	return 0;
+
+out_disable_device:
+	pci_disable_device(pdev);
+out_error:
+	return -ENODEV;
+}
+
+/**
+ * lpfc_disable_pci_dev - Disable a generic PCI device.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to disable the PCI device that is common to all
+ * PCI devices.
+ **/
+static void
+lpfc_disable_pci_dev(struct lpfc_hba *phba)
+{
+	struct pci_dev *pdev;
+	int bars;
+
+	/* Obtain PCI device reference */
+	if (!phba->pcidev)
+		return;
+	else
+		pdev = phba->pcidev;
+	/* Select PCI BARs */
+	bars = pci_select_bars(pdev, IORESOURCE_MEM);
+	/* Release PCI resource and disable PCI device */
+	pci_release_selected_regions(pdev, bars);
+	pci_disable_device(pdev);
+	/* Null out PCI private reference to driver */
+	pci_set_drvdata(pdev, NULL);
+
+	return;
+}
+
+/**
+ * lpfc_reset_hba - Reset a hba
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to reset a hba device. It brings the HBA
+ * offline, performs a board restart, and then brings the board back
+ * online. The lpfc_offline calls lpfc_sli_hba_down which will clean up
+ * on outstanding mailbox commands.
+ **/
+void
+lpfc_reset_hba(struct lpfc_hba *phba)
+{
+	/* If resets are disabled then set error state and return. */
+	if (!phba->cfg_enable_hba_reset) {
+		phba->link_state = LPFC_HBA_ERROR;
+		return;
+	}
+	lpfc_offline_prep(phba);
+	lpfc_offline(phba);
+	lpfc_sli_brdrestart(phba);
+	lpfc_online(phba);
+	lpfc_unblock_mgmt_io(phba);
+}
+
+/**
+ * lpfc_sli_driver_resource_setup - Setup driver internal resources for SLI3 dev.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up the driver internal resources specific to
+ * support the SLI-3 HBA device it attached to.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ **/
+static int
+lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli;
+
+	/*
+	 * Initialize timers used by driver
+	 */
+
+	/* Heartbeat timer */
+	init_timer(&phba->hb_tmofunc);
+	phba->hb_tmofunc.function = lpfc_hb_timeout;
+	phba->hb_tmofunc.data = (unsigned long)phba;
+
+	psli = &phba->sli;
+	/* MBOX heartbeat timer */
+	init_timer(&psli->mbox_tmo);
+	psli->mbox_tmo.function = lpfc_mbox_timeout;
+	psli->mbox_tmo.data = (unsigned long) phba;
+	/* FCP polling mode timer */
+	init_timer(&phba->fcp_poll_timer);
+	phba->fcp_poll_timer.function = lpfc_poll_timeout;
+	phba->fcp_poll_timer.data = (unsigned long) phba;
+	/* Fabric block timer */
+	init_timer(&phba->fabric_block_timer);
+	phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
+	phba->fabric_block_timer.data = (unsigned long) phba;
+	/* EA polling mode timer */
+	init_timer(&phba->eratt_poll);
+	phba->eratt_poll.function = lpfc_poll_eratt;
+	phba->eratt_poll.data = (unsigned long) phba;
+
+	/* Host attention work mask setup */
+	phba->work_ha_mask = (HA_ERATT | HA_MBATT | HA_LATT);
+	phba->work_ha_mask |= (HA_RXMASK << (LPFC_ELS_RING * 4));
+
+	/* Get all the module params for configuring this host */
+	lpfc_get_cfgparam(phba);
+	/*
+	 * Since the sg_tablesize is module parameter, the sg_dma_buf_size
+	 * used to create the sg_dma_buf_pool must be dynamically calculated.
+	 * 2 segments are added since the IOCB needs a command and response bde.
+	 */
+	phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
+		sizeof(struct fcp_rsp) +
+			((phba->cfg_sg_seg_cnt + 2) * sizeof(struct ulp_bde64));
+
+	if (phba->cfg_enable_bg) {
+		phba->cfg_sg_seg_cnt = LPFC_MAX_SG_SEG_CNT;
+		phba->cfg_sg_dma_buf_size +=
+			phba->cfg_prot_sg_seg_cnt * sizeof(struct ulp_bde64);
+	}
+
+	/* Also reinitialize the host templates with new values. */
+	lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
+	lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
+
+	phba->max_vpi = LPFC_MAX_VPI;
+	/* This will be set to correct value after config_port mbox */
+	phba->max_vports = 0;
+
+	/*
+	 * Initialize the SLI Layer to run with lpfc HBAs.
+	 */
+	lpfc_sli_setup(phba);
+	lpfc_sli_queue_setup(phba);
+
+	/* Allocate device driver memory */
+	if (lpfc_mem_alloc(phba, BPL_ALIGN_SZ))
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * lpfc_sli_driver_resource_unset - Unset drvr internal resources for SLI3 dev
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset the driver internal resources set up
+ * specific for supporting the SLI-3 HBA device it attached to.
+ **/
+static void
+lpfc_sli_driver_resource_unset(struct lpfc_hba *phba)
+{
+	/* Free device driver memory allocated */
+	lpfc_mem_free_all(phba);
+
+	return;
+}
+
+/**
+ * lpfc_sli4_driver_resource_setup - Setup drvr internal resources for SLI4 dev
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up the driver internal resources specific to
+ * support the SLI-4 HBA device it attached to.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ **/
+static int
+lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli;
+	int rc;
+	int i, hbq_count;
+
+	/* Before proceed, wait for POST done and device ready */
+	rc = lpfc_sli4_post_status_check(phba);
+	if (rc)
+		return -ENODEV;
+
+	/*
+	 * Initialize timers used by driver
+	 */
+
+	/* Heartbeat timer */
+	init_timer(&phba->hb_tmofunc);
+	phba->hb_tmofunc.function = lpfc_hb_timeout;
+	phba->hb_tmofunc.data = (unsigned long)phba;
+
+	psli = &phba->sli;
+	/* MBOX heartbeat timer */
+	init_timer(&psli->mbox_tmo);
+	psli->mbox_tmo.function = lpfc_mbox_timeout;
+	psli->mbox_tmo.data = (unsigned long) phba;
+	/* Fabric block timer */
+	init_timer(&phba->fabric_block_timer);
+	phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
+	phba->fabric_block_timer.data = (unsigned long) phba;
+	/* EA polling mode timer */
+	init_timer(&phba->eratt_poll);
+	phba->eratt_poll.function = lpfc_poll_eratt;
+	phba->eratt_poll.data = (unsigned long) phba;
+	/*
+	 * We need to do a READ_CONFIG mailbox command here before
+	 * calling lpfc_get_cfgparam. For VFs this will report the
+	 * MAX_XRI, MAX_VPI, MAX_RPI, MAX_IOCB, and MAX_VFI settings.
+	 * All of the resources allocated
+	 * for this Port are tied to these values.
+	 */
+	/* Get all the module params for configuring this host */
+	lpfc_get_cfgparam(phba);
+	phba->max_vpi = LPFC_MAX_VPI;
+	/* This will be set to correct value after the read_config mbox */
+	phba->max_vports = 0;
+
+	/* Program the default value of vlan_id and fc_map */
+	phba->valid_vlan = 0;
+	phba->fc_map[0] = LPFC_FCOE_FCF_MAP0;
+	phba->fc_map[1] = LPFC_FCOE_FCF_MAP1;
+	phba->fc_map[2] = LPFC_FCOE_FCF_MAP2;
+
+	/*
+	 * Since the sg_tablesize is module parameter, the sg_dma_buf_size
+	 * used to create the sg_dma_buf_pool must be dynamically calculated.
+	 * 2 segments are added since the IOCB needs a command and response bde.
+	 * To insure that the scsi sgl does not cross a 4k page boundary only
+	 * sgl sizes of 1k, 2k, 4k, and 8k are supported.
+	 * Table of sgl sizes and seg_cnt:
+	 * sgl size, 	sg_seg_cnt	total seg
+	 * 1k		50		52
+	 * 2k		114		116
+	 * 4k		242		244
+	 * 8k		498		500
+	 * cmd(32) + rsp(160) + (52 * sizeof(sli4_sge)) = 1024
+	 * cmd(32) + rsp(160) + (116 * sizeof(sli4_sge)) = 2048
+	 * cmd(32) + rsp(160) + (244 * sizeof(sli4_sge)) = 4096
+	 * cmd(32) + rsp(160) + (500 * sizeof(sli4_sge)) = 8192
+	 */
+	if (phba->cfg_sg_seg_cnt <= LPFC_DEFAULT_SG_SEG_CNT)
+		phba->cfg_sg_seg_cnt = 50;
+	else if (phba->cfg_sg_seg_cnt <= 114)
+		phba->cfg_sg_seg_cnt = 114;
+	else if (phba->cfg_sg_seg_cnt <= 242)
+		phba->cfg_sg_seg_cnt = 242;
+	else
+		phba->cfg_sg_seg_cnt = 498;
+
+	phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd)
+					+ sizeof(struct fcp_rsp);
+	phba->cfg_sg_dma_buf_size +=
+		((phba->cfg_sg_seg_cnt + 2) * sizeof(struct sli4_sge));
+
+	/* Initialize buffer queue management fields */
+	hbq_count = lpfc_sli_hbq_count();
+	for (i = 0; i < hbq_count; ++i)
+		INIT_LIST_HEAD(&phba->hbqs[i].hbq_buffer_list);
+	INIT_LIST_HEAD(&phba->rb_pend_list);
+	phba->hbqs[LPFC_ELS_HBQ].hbq_alloc_buffer = lpfc_sli4_rb_alloc;
+	phba->hbqs[LPFC_ELS_HBQ].hbq_free_buffer = lpfc_sli4_rb_free;
+
+	/*
+	 * Initialize the SLI Layer to run with lpfc SLI4 HBAs.
+	 */
+	/* Initialize the Abort scsi buffer list used by driver */
+	spin_lock_init(&phba->sli4_hba.abts_scsi_buf_list_lock);
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+	/* This abort list used by worker thread */
+	spin_lock_init(&phba->sli4_hba.abts_sgl_list_lock);
+
+	/*
+	 * Initialize dirver internal slow-path work queues
+	 */
+
+	/* Driver internel slow-path CQ Event pool */
+	INIT_LIST_HEAD(&phba->sli4_hba.sp_cqe_event_pool);
+	/* Response IOCB work queue list */
+	INIT_LIST_HEAD(&phba->sli4_hba.sp_rspiocb_work_queue);
+	/* Asynchronous event CQ Event work queue list */
+	INIT_LIST_HEAD(&phba->sli4_hba.sp_asynce_work_queue);
+	/* Fast-path XRI aborted CQ Event work queue list */
+	INIT_LIST_HEAD(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue);
+	/* Slow-path XRI aborted CQ Event work queue list */
+	INIT_LIST_HEAD(&phba->sli4_hba.sp_els_xri_aborted_work_queue);
+	/* Receive queue CQ Event work queue list */
+	INIT_LIST_HEAD(&phba->sli4_hba.sp_unsol_work_queue);
+
+	/* Initialize the driver internal SLI layer lists. */
+	lpfc_sli_setup(phba);
+	lpfc_sli_queue_setup(phba);
+
+	/* Allocate device driver memory */
+	rc = lpfc_mem_alloc(phba, SGL_ALIGN_SZ);
+	if (rc)
+		return -ENOMEM;
+
+	/* Create the bootstrap mailbox command */
+	rc = lpfc_create_bootstrap_mbox(phba);
+	if (unlikely(rc))
+		goto out_free_mem;
+
+	/* Set up the host's endian order with the device. */
+	rc = lpfc_setup_endian_order(phba);
+	if (unlikely(rc))
+		goto out_free_bsmbx;
+
+	/* Set up the hba's configuration parameters. */
+	rc = lpfc_sli4_read_config(phba);
+	if (unlikely(rc))
+		goto out_free_bsmbx;
+
+	/* Perform a function reset */
+	rc = lpfc_pci_function_reset(phba);
+	if (unlikely(rc))
+		goto out_free_bsmbx;
+
+	/* Create all the SLI4 queues */
+	rc = lpfc_sli4_queue_create(phba);
+	if (rc)
+		goto out_free_bsmbx;
+
+	/* Create driver internal CQE event pool */
+	rc = lpfc_sli4_cq_event_pool_create(phba);
+	if (rc)
+		goto out_destroy_queue;
+
+	/* Initialize and populate the iocb list per host */
+	rc = lpfc_init_sgl_list(phba);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1400 Failed to initialize sgl list.\n");
+		goto out_destroy_cq_event_pool;
+	}
+	rc = lpfc_init_active_sgl_array(phba);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1430 Failed to initialize sgl list.\n");
+		goto out_free_sgl_list;
+	}
+
+	rc = lpfc_sli4_init_rpi_hdrs(phba);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1432 Failed to initialize rpi headers.\n");
+		goto out_free_active_sgl;
+	}
+
+	phba->sli4_hba.fcp_eq_hdl = kzalloc((sizeof(struct lpfc_fcp_eq_hdl) *
+				    phba->cfg_fcp_eq_count), GFP_KERNEL);
+	if (!phba->sli4_hba.fcp_eq_hdl) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2572 Failed allocate memory for fast-path "
+				"per-EQ handle array\n");
+		goto out_remove_rpi_hdrs;
+	}
+
+	phba->sli4_hba.msix_entries = kzalloc((sizeof(struct msix_entry) *
+				      phba->sli4_hba.cfg_eqn), GFP_KERNEL);
+	if (!phba->sli4_hba.msix_entries) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2573 Failed allocate memory for msi-x "
+				"interrupt vector entries\n");
+		goto out_free_fcp_eq_hdl;
+	}
+
+	return rc;
+
+out_free_fcp_eq_hdl:
+	kfree(phba->sli4_hba.fcp_eq_hdl);
+out_remove_rpi_hdrs:
+	lpfc_sli4_remove_rpi_hdrs(phba);
+out_free_active_sgl:
+	lpfc_free_active_sgl(phba);
+out_free_sgl_list:
+	lpfc_free_sgl_list(phba);
+out_destroy_cq_event_pool:
+	lpfc_sli4_cq_event_pool_destroy(phba);
+out_destroy_queue:
+	lpfc_sli4_queue_destroy(phba);
+out_free_bsmbx:
+	lpfc_destroy_bootstrap_mbox(phba);
+out_free_mem:
+	lpfc_mem_free(phba);
+	return rc;
+}
+
+/**
+ * lpfc_sli4_driver_resource_unset - Unset drvr internal resources for SLI4 dev
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset the driver internal resources set up
+ * specific for supporting the SLI-4 HBA device it attached to.
+ **/
+static void
+lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba)
+{
+	struct lpfc_fcf_conn_entry *conn_entry, *next_conn_entry;
+
+	/* unregister default FCFI from the HBA */
+	lpfc_sli4_fcfi_unreg(phba, phba->fcf.fcfi);
+
+	/* Free the default FCR table */
+	lpfc_sli_remove_dflt_fcf(phba);
+
+	/* Free memory allocated for msi-x interrupt vector entries */
+	kfree(phba->sli4_hba.msix_entries);
+
+	/* Free memory allocated for fast-path work queue handles */
+	kfree(phba->sli4_hba.fcp_eq_hdl);
+
+	/* Free the allocated rpi headers. */
+	lpfc_sli4_remove_rpi_hdrs(phba);
+
+	/* Free the ELS sgl list */
+	lpfc_free_active_sgl(phba);
+	lpfc_free_sgl_list(phba);
+
+	/* Free the SCSI sgl management array */
+	kfree(phba->sli4_hba.lpfc_scsi_psb_array);
+
+	/* Free the SLI4 queues */
+	lpfc_sli4_queue_destroy(phba);
+
+	/* Free the completion queue EQ event pool */
+	lpfc_sli4_cq_event_release_all(phba);
+	lpfc_sli4_cq_event_pool_destroy(phba);
+
+	/* Reset SLI4 HBA FCoE function */
+	lpfc_pci_function_reset(phba);
+
+	/* Free the bsmbx region. */
+	lpfc_destroy_bootstrap_mbox(phba);
+
+	/* Free the SLI Layer memory with SLI4 HBAs */
+	lpfc_mem_free_all(phba);
+
+	/* Free the current connect table */
+	list_for_each_entry_safe(conn_entry, next_conn_entry,
+		&phba->fcf_conn_rec_list, list)
+		kfree(conn_entry);
+
+	return;
+}
+
+/**
+ * lpfc_init_api_table_setup - Set up init api fucntion jump table
+ * @phba: The hba struct for which this call is being executed.
+ * @dev_grp: The HBA PCI-Device group number.
+ *
+ * This routine sets up the device INIT interface API function jump table
+ * in @phba struct.
+ *
+ * Returns: 0 - success, -ENODEV - failure.
+ **/
+int
+lpfc_init_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
+{
+	switch (dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		phba->lpfc_hba_down_post = lpfc_hba_down_post_s3;
+		phba->lpfc_handle_eratt = lpfc_handle_eratt_s3;
+		phba->lpfc_stop_port = lpfc_stop_port_s3;
+		break;
+	case LPFC_PCI_DEV_OC:
+		phba->lpfc_hba_down_post = lpfc_hba_down_post_s4;
+		phba->lpfc_handle_eratt = lpfc_handle_eratt_s4;
+		phba->lpfc_stop_port = lpfc_stop_port_s4;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1431 Invalid HBA PCI-device group: 0x%x\n",
+				dev_grp);
+		return -ENODEV;
+		break;
+	}
+	return 0;
+}
+
+/**
+ * lpfc_setup_driver_resource_phase1 - Phase1 etup driver internal resources.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up the driver internal resources before the
+ * device specific resource setup to support the HBA device it attached to.
+ *
+ * Return codes
+ *	0 - sucessful
+ *	other values - error
+ **/
+static int
+lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
+{
+	/*
+	 * Driver resources common to all SLI revisions
+	 */
+	atomic_set(&phba->fast_event_count, 0);
+	spin_lock_init(&phba->hbalock);
+
+	/* Initialize ndlp management spinlock */
+	spin_lock_init(&phba->ndlp_lock);
+
+	INIT_LIST_HEAD(&phba->port_list);
+	INIT_LIST_HEAD(&phba->work_list);
+	init_waitqueue_head(&phba->wait_4_mlo_m_q);
+
+	/* Initialize the wait queue head for the kernel thread */
+	init_waitqueue_head(&phba->work_waitq);
+
+	/* Initialize the scsi buffer list used by driver for scsi IO */
+	spin_lock_init(&phba->scsi_buf_list_lock);
+	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list);
+
+	/* Initialize the fabric iocb list */
+	INIT_LIST_HEAD(&phba->fabric_iocb_list);
+
+	/* Initialize list to save ELS buffers */
+	INIT_LIST_HEAD(&phba->elsbuf);
+
+	/* Initialize FCF connection rec list */
+	INIT_LIST_HEAD(&phba->fcf_conn_rec_list);
+
+	return 0;
+}
+
+/**
+ * lpfc_setup_driver_resource_phase2 - Phase2 setup driver internal resources.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up the driver internal resources after the
+ * device specific resource setup to support the HBA device it attached to.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ **/
+static int
+lpfc_setup_driver_resource_phase2(struct lpfc_hba *phba)
+{
+	int error;
+
+	/* Startup the kernel thread for this host adapter. */
+	phba->worker_thread = kthread_run(lpfc_do_work, phba,
+					  "lpfc_worker_%d", phba->brd_no);
+	if (IS_ERR(phba->worker_thread)) {
+		error = PTR_ERR(phba->worker_thread);
+		return error;
+	}
+
+	return 0;
+}
+
+/**
+ * lpfc_unset_driver_resource_phase2 - Phase2 unset driver internal resources.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset the driver internal resources set up after
+ * the device specific resource setup for supporting the HBA device it
+ * attached to.
+ **/
+static void
+lpfc_unset_driver_resource_phase2(struct lpfc_hba *phba)
+{
+	/* Stop kernel worker thread */
+	kthread_stop(phba->worker_thread);
+}
+
+/**
+ * lpfc_free_iocb_list - Free iocb list.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to free the driver's IOCB list and memory.
+ **/
+static void
+lpfc_free_iocb_list(struct lpfc_hba *phba)
+{
+	struct lpfc_iocbq *iocbq_entry = NULL, *iocbq_next = NULL;
+
+	spin_lock_irq(&phba->hbalock);
+	list_for_each_entry_safe(iocbq_entry, iocbq_next,
+				 &phba->lpfc_iocb_list, list) {
+		list_del(&iocbq_entry->list);
+		kfree(iocbq_entry);
+		phba->total_iocbq_bufs--;
+	}
+	spin_unlock_irq(&phba->hbalock);
+
+	return;
+}
+
+/**
+ * lpfc_init_iocb_list - Allocate and initialize iocb list.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to allocate and initizlize the driver's IOCB
+ * list and set up the IOCB tag array accordingly.
+ *
+ * Return codes
+ *	0 - sucessful
+ *	other values - error
+ **/
+static int
+lpfc_init_iocb_list(struct lpfc_hba *phba, int iocb_count)
+{
+	struct lpfc_iocbq *iocbq_entry = NULL;
+	uint16_t iotag;
+	int i;
+
+	/* Initialize and populate the iocb list per host.  */
+	INIT_LIST_HEAD(&phba->lpfc_iocb_list);
+	for (i = 0; i < iocb_count; i++) {
+		iocbq_entry = kzalloc(sizeof(struct lpfc_iocbq), GFP_KERNEL);
+		if (iocbq_entry == NULL) {
+			printk(KERN_ERR "%s: only allocated %d iocbs of "
+				"expected %d count. Unloading driver.\n",
+				__func__, i, LPFC_IOCB_LIST_CNT);
+			goto out_free_iocbq;
+		}
+
+		iotag = lpfc_sli_next_iotag(phba, iocbq_entry);
+		if (iotag == 0) {
+			kfree(iocbq_entry);
+			printk(KERN_ERR "%s: failed to allocate IOTAG. "
+				"Unloading driver.\n", __func__);
+			goto out_free_iocbq;
+		}
+		iocbq_entry->sli4_xritag = NO_XRI;
+
+		spin_lock_irq(&phba->hbalock);
+		list_add(&iocbq_entry->list, &phba->lpfc_iocb_list);
+		phba->total_iocbq_bufs++;
+		spin_unlock_irq(&phba->hbalock);
+	}
+
+	return 0;
+
+out_free_iocbq:
+	lpfc_free_iocb_list(phba);
+
+	return -ENOMEM;
+}
+
+/**
+ * lpfc_free_sgl_list - Free sgl list.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to free the driver's sgl list and memory.
+ **/
+static void
+lpfc_free_sgl_list(struct lpfc_hba *phba)
+{
+	struct lpfc_sglq *sglq_entry = NULL, *sglq_next = NULL;
+	LIST_HEAD(sglq_list);
+	int rc = 0;
+
+	spin_lock_irq(&phba->hbalock);
+	list_splice_init(&phba->sli4_hba.lpfc_sgl_list, &sglq_list);
+	spin_unlock_irq(&phba->hbalock);
+
+	list_for_each_entry_safe(sglq_entry, sglq_next,
+				 &sglq_list, list) {
+		list_del(&sglq_entry->list);
+		lpfc_mbuf_free(phba, sglq_entry->virt, sglq_entry->phys);
+		kfree(sglq_entry);
+		phba->sli4_hba.total_sglq_bufs--;
+	}
+	rc = lpfc_sli4_remove_all_sgl_pages(phba);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+			"2005 Unable to deregister pages from HBA: %x", rc);
+	}
+	kfree(phba->sli4_hba.lpfc_els_sgl_array);
+}
+
+/**
+ * lpfc_init_active_sgl_array - Allocate the buf to track active ELS XRIs.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to allocate the driver's active sgl memory.
+ * This array will hold the sglq_entry's for active IOs.
+ **/
+static int
+lpfc_init_active_sgl_array(struct lpfc_hba *phba)
+{
+	int size;
+	size = sizeof(struct lpfc_sglq *);
+	size *= phba->sli4_hba.max_cfg_param.max_xri;
+
+	phba->sli4_hba.lpfc_sglq_active_list =
+		kzalloc(size, GFP_KERNEL);
+	if (!phba->sli4_hba.lpfc_sglq_active_list)
+		return -ENOMEM;
+	return 0;
+}
+
+/**
+ * lpfc_free_active_sgl - Free the buf that tracks active ELS XRIs.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to walk through the array of active sglq entries
+ * and free all of the resources.
+ * This is just a place holder for now.
+ **/
+static void
+lpfc_free_active_sgl(struct lpfc_hba *phba)
+{
+	kfree(phba->sli4_hba.lpfc_sglq_active_list);
+}
+
+/**
+ * lpfc_init_sgl_list - Allocate and initialize sgl list.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to allocate and initizlize the driver's sgl
+ * list and set up the sgl xritag tag array accordingly.
+ *
+ * Return codes
+ *	0 - sucessful
+ *	other values - error
+ **/
+static int
+lpfc_init_sgl_list(struct lpfc_hba *phba)
+{
+	struct lpfc_sglq *sglq_entry = NULL;
+	int i;
+	int els_xri_cnt;
+
+	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
+	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"2400 lpfc_init_sgl_list els %d.\n",
+				els_xri_cnt);
+	/* Initialize and populate the sglq list per host/VF. */
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_sgl_list);
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_els_sgl_list);
+
+	/* Sanity check on XRI management */
+	if (phba->sli4_hba.max_cfg_param.max_xri <= els_xri_cnt) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2562 No room left for SCSI XRI allocation: "
+				"max_xri=%d, els_xri=%d\n",
+				phba->sli4_hba.max_cfg_param.max_xri,
+				els_xri_cnt);
+		return -ENOMEM;
+	}
+
+	/* Allocate memory for the ELS XRI management array */
+	phba->sli4_hba.lpfc_els_sgl_array =
+			kzalloc((sizeof(struct lpfc_sglq *) * els_xri_cnt),
+			GFP_KERNEL);
+
+	if (!phba->sli4_hba.lpfc_els_sgl_array) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2401 Failed to allocate memory for ELS "
+				"XRI management array of size %d.\n",
+				els_xri_cnt);
+		return -ENOMEM;
+	}
+
+	/* Keep the SCSI XRI into the XRI management array */
+	phba->sli4_hba.scsi_xri_max =
+			phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
+	phba->sli4_hba.scsi_xri_cnt = 0;
+
+	phba->sli4_hba.lpfc_scsi_psb_array =
+			kzalloc((sizeof(struct lpfc_scsi_buf *) *
+			phba->sli4_hba.scsi_xri_max), GFP_KERNEL);
+
+	if (!phba->sli4_hba.lpfc_scsi_psb_array) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2563 Failed to allocate memory for SCSI "
+				"XRI management array of size %d.\n",
+				phba->sli4_hba.scsi_xri_max);
+		kfree(phba->sli4_hba.lpfc_els_sgl_array);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < els_xri_cnt; i++) {
+		sglq_entry = kzalloc(sizeof(struct lpfc_sglq), GFP_KERNEL);
+		if (sglq_entry == NULL) {
+			printk(KERN_ERR "%s: only allocated %d sgls of "
+				"expected %d count. Unloading driver.\n",
+				__func__, i, els_xri_cnt);
+			goto out_free_mem;
+		}
+
+		sglq_entry->sli4_xritag = lpfc_sli4_next_xritag(phba);
+		if (sglq_entry->sli4_xritag == NO_XRI) {
+			kfree(sglq_entry);
+			printk(KERN_ERR "%s: failed to allocate XRI.\n"
+				"Unloading driver.\n", __func__);
+			goto out_free_mem;
+		}
+		sglq_entry->buff_type = GEN_BUFF_TYPE;
+		sglq_entry->virt = lpfc_mbuf_alloc(phba, 0, &sglq_entry->phys);
+		if (sglq_entry->virt == NULL) {
+			kfree(sglq_entry);
+			printk(KERN_ERR "%s: failed to allocate mbuf.\n"
+				"Unloading driver.\n", __func__);
+			goto out_free_mem;
+		}
+		sglq_entry->sgl = sglq_entry->virt;
+		memset(sglq_entry->sgl, 0, LPFC_BPL_SIZE);
+
+		/* The list order is used by later block SGL registraton */
+		spin_lock_irq(&phba->hbalock);
+		list_add_tail(&sglq_entry->list, &phba->sli4_hba.lpfc_sgl_list);
+		phba->sli4_hba.lpfc_els_sgl_array[i] = sglq_entry;
+		phba->sli4_hba.total_sglq_bufs++;
+		spin_unlock_irq(&phba->hbalock);
+	}
+	return 0;
+
+out_free_mem:
+	kfree(phba->sli4_hba.lpfc_scsi_psb_array);
+	lpfc_free_sgl_list(phba);
+	return -ENOMEM;
+}
+
+/**
+ * lpfc_sli4_init_rpi_hdrs - Post the rpi header memory region to the port
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to post rpi header templates to the
+ * HBA consistent with the SLI-4 interface spec.  This routine
+ * posts a PAGE_SIZE memory region to the port to hold up to
+ * PAGE_SIZE modulo 64 rpi context headers.
+ * No locks are held here because this is an initialization routine
+ * called only from probe or lpfc_online when interrupts are not
+ * enabled and the driver is reinitializing the device.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	ENOMEM - No availble memory
+ *      EIO - The mailbox failed to complete successfully.
+ **/
+int
+lpfc_sli4_init_rpi_hdrs(struct lpfc_hba *phba)
+{
+	int rc = 0;
+	int longs;
+	uint16_t rpi_count;
+	struct lpfc_rpi_hdr *rpi_hdr;
+
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_rpi_hdr_list);
+
+	/*
+	 * Provision an rpi bitmask range for discovery. The total count
+	 * is the difference between max and base + 1.
+	 */
+	rpi_count = phba->sli4_hba.max_cfg_param.rpi_base +
+		    phba->sli4_hba.max_cfg_param.max_rpi - 1;
+
+	longs = ((rpi_count) + BITS_PER_LONG - 1) / BITS_PER_LONG;
+	phba->sli4_hba.rpi_bmask = kzalloc(longs * sizeof(unsigned long),
+					   GFP_KERNEL);
+	if (!phba->sli4_hba.rpi_bmask)
+		return -ENOMEM;
+
+	rpi_hdr = lpfc_sli4_create_rpi_hdr(phba);
+	if (!rpi_hdr) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"0391 Error during rpi post operation\n");
+		lpfc_sli4_remove_rpis(phba);
+		rc = -ENODEV;
+	}
+
+	return rc;
+}
+
+/**
+ * lpfc_sli4_create_rpi_hdr - Allocate an rpi header memory region
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to allocate a single 4KB memory region to
+ * support rpis and stores them in the phba.  This single region
+ * provides support for up to 64 rpis.  The region is used globally
+ * by the device.
+ *
+ * Returns:
+ *   A valid rpi hdr on success.
+ *   A NULL pointer on any failure.
+ **/
+struct lpfc_rpi_hdr *
+lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
+{
+	uint16_t rpi_limit, curr_rpi_range;
+	struct lpfc_dmabuf *dmabuf;
+	struct lpfc_rpi_hdr *rpi_hdr;
+
+	rpi_limit = phba->sli4_hba.max_cfg_param.rpi_base +
+		    phba->sli4_hba.max_cfg_param.max_rpi - 1;
+
+	spin_lock_irq(&phba->hbalock);
+	curr_rpi_range = phba->sli4_hba.next_rpi;
+	spin_unlock_irq(&phba->hbalock);
+
+	/*
+	 * The port has a limited number of rpis. The increment here
+	 * is LPFC_RPI_HDR_COUNT - 1 to account for the starting value
+	 * and to allow the full max_rpi range per port.
+	 */
+	if ((curr_rpi_range + (LPFC_RPI_HDR_COUNT - 1)) > rpi_limit)
+		return NULL;
+
+	/*
+	 * First allocate the protocol header region for the port.  The
+	 * port expects a 4KB DMA-mapped memory region that is 4K aligned.
+	 */
+	dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (!dmabuf)
+		return NULL;
+
+	dmabuf->virt = dma_alloc_coherent(&phba->pcidev->dev,
+					  LPFC_HDR_TEMPLATE_SIZE,
+					  &dmabuf->phys,
+					  GFP_KERNEL);
+	if (!dmabuf->virt) {
+		rpi_hdr = NULL;
+		goto err_free_dmabuf;
+	}
+
+	memset(dmabuf->virt, 0, LPFC_HDR_TEMPLATE_SIZE);
+	if (!IS_ALIGNED(dmabuf->phys, LPFC_HDR_TEMPLATE_SIZE)) {
+		rpi_hdr = NULL;
+		goto err_free_coherent;
+	}
+
+	/* Save the rpi header data for cleanup later. */
+	rpi_hdr = kzalloc(sizeof(struct lpfc_rpi_hdr), GFP_KERNEL);
+	if (!rpi_hdr)
+		goto err_free_coherent;
+
+	rpi_hdr->dmabuf = dmabuf;
+	rpi_hdr->len = LPFC_HDR_TEMPLATE_SIZE;
+	rpi_hdr->page_count = 1;
+	spin_lock_irq(&phba->hbalock);
+	rpi_hdr->start_rpi = phba->sli4_hba.next_rpi;
+	list_add_tail(&rpi_hdr->list, &phba->sli4_hba.lpfc_rpi_hdr_list);
+
+	/*
+	 * The next_rpi stores the next module-64 rpi value to post
+	 * in any subsequent rpi memory region postings.
+	 */
+	phba->sli4_hba.next_rpi += LPFC_RPI_HDR_COUNT;
+	spin_unlock_irq(&phba->hbalock);
+	return rpi_hdr;
+
+ err_free_coherent:
+	dma_free_coherent(&phba->pcidev->dev, LPFC_HDR_TEMPLATE_SIZE,
+			  dmabuf->virt, dmabuf->phys);
+ err_free_dmabuf:
+	kfree(dmabuf);
+	return NULL;
+}
+
+/**
+ * lpfc_sli4_remove_rpi_hdrs - Remove all rpi header memory regions
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to remove all memory resources allocated
+ * to support rpis. This routine presumes the caller has released all
+ * rpis consumed by fabric or port logins and is prepared to have
+ * the header pages removed.
+ **/
+void
+lpfc_sli4_remove_rpi_hdrs(struct lpfc_hba *phba)
+{
+	struct lpfc_rpi_hdr *rpi_hdr, *next_rpi_hdr;
+
+	list_for_each_entry_safe(rpi_hdr, next_rpi_hdr,
+				 &phba->sli4_hba.lpfc_rpi_hdr_list, list) {
+		list_del(&rpi_hdr->list);
+		dma_free_coherent(&phba->pcidev->dev, rpi_hdr->len,
+				  rpi_hdr->dmabuf->virt, rpi_hdr->dmabuf->phys);
+		kfree(rpi_hdr->dmabuf);
+		kfree(rpi_hdr);
+	}
+
+	phba->sli4_hba.next_rpi = phba->sli4_hba.max_cfg_param.rpi_base;
+	memset(phba->sli4_hba.rpi_bmask, 0, sizeof(*phba->sli4_hba.rpi_bmask));
+}
+
+/**
+ * lpfc_hba_alloc - Allocate driver hba data structure for a device.
+ * @pdev: pointer to pci device data structure.
+ *
+ * This routine is invoked to allocate the driver hba data structure for an
+ * HBA device. If the allocation is successful, the phba reference to the
+ * PCI device data structure is set.
+ *
+ * Return codes
+ *      pointer to @phba - sucessful
+ *      NULL - error
+ **/
+static struct lpfc_hba *
+lpfc_hba_alloc(struct pci_dev *pdev)
+{
+	struct lpfc_hba *phba;
+
+	/* Allocate memory for HBA structure */
+	phba = kzalloc(sizeof(struct lpfc_hba), GFP_KERNEL);
+	if (!phba) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1417 Failed to allocate hba struct.\n");
+		return NULL;
+	}
+
+	/* Set reference to PCI device in HBA structure */
+	phba->pcidev = pdev;
+
+	/* Assign an unused board number */
+	phba->brd_no = lpfc_get_instance();
+	if (phba->brd_no < 0) {
+		kfree(phba);
+		return NULL;
+	}
+
+	return phba;
+}
+
+/**
+ * lpfc_hba_free - Free driver hba data structure with a device.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to free the driver hba data structure with an
+ * HBA device.
+ **/
+static void
+lpfc_hba_free(struct lpfc_hba *phba)
+{
+	/* Release the driver assigned board number */
+	idr_remove(&lpfc_hba_index, phba->brd_no);
+
+	kfree(phba);
+	return;
+}
+
+/**
+ * lpfc_create_shost - Create hba physical port with associated scsi host.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to create HBA physical port and associate a SCSI
+ * host with it.
+ *
+ * Return codes
+ *      0 - sucessful
+ *      other values - error
+ **/
+static int
+lpfc_create_shost(struct lpfc_hba *phba)
+{
+	struct lpfc_vport *vport;
+	struct Scsi_Host  *shost;
+
+	/* Initialize HBA FC structure */
+	phba->fc_edtov = FF_DEF_EDTOV;
+	phba->fc_ratov = FF_DEF_RATOV;
+	phba->fc_altov = FF_DEF_ALTOV;
+	phba->fc_arbtov = FF_DEF_ARBTOV;
+
+	vport = lpfc_create_port(phba, phba->brd_no, &phba->pcidev->dev);
+	if (!vport)
+		return -ENODEV;
+
+	shost = lpfc_shost_from_vport(vport);
+	phba->pport = vport;
+	lpfc_debugfs_initialize(vport);
+	/* Put reference to SCSI host to driver's device private data */
+	pci_set_drvdata(phba->pcidev, shost);
+
+	return 0;
+}
+
+/**
+ * lpfc_destroy_shost - Destroy hba physical port with associated scsi host.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to destroy HBA physical port and the associated
+ * SCSI host.
+ **/
+static void
+lpfc_destroy_shost(struct lpfc_hba *phba)
+{
+	struct lpfc_vport *vport = phba->pport;
+
+	/* Destroy physical port that associated with the SCSI host */
+	destroy_port(vport);
+
+	return;
+}
+
+/**
+ * lpfc_setup_bg - Setup Block guard structures and debug areas.
+ * @phba: pointer to lpfc hba data structure.
+ * @shost: the shost to be used to detect Block guard settings.
+ *
+ * This routine sets up the local Block guard protocol settings for @shost.
+ * This routine also allocates memory for debugging bg buffers.
+ **/
+static void
+lpfc_setup_bg(struct lpfc_hba *phba, struct Scsi_Host *shost)
+{
+	int pagecnt = 10;
+	if (lpfc_prot_mask && lpfc_prot_guard) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"1478 Registering BlockGuard with the "
+				"SCSI layer\n");
+		scsi_host_set_prot(shost, lpfc_prot_mask);
+		scsi_host_set_guard(shost, lpfc_prot_guard);
+	}
+	if (!_dump_buf_data) {
+		while (pagecnt) {
+			spin_lock_init(&_dump_buf_lock);
+			_dump_buf_data =
+				(char *) __get_free_pages(GFP_KERNEL, pagecnt);
+			if (_dump_buf_data) {
+				printk(KERN_ERR "BLKGRD allocated %d pages for "
+				       "_dump_buf_data at 0x%p\n",
+				       (1 << pagecnt), _dump_buf_data);
+				_dump_buf_data_order = pagecnt;
+				memset(_dump_buf_data, 0,
+				       ((1 << PAGE_SHIFT) << pagecnt));
+				break;
+			} else
+				--pagecnt;
+		}
+		if (!_dump_buf_data_order)
+			printk(KERN_ERR "BLKGRD ERROR unable to allocate "
+			       "memory for hexdump\n");
+	} else
+		printk(KERN_ERR "BLKGRD already allocated _dump_buf_data=0x%p"
+		       "\n", _dump_buf_data);
+	if (!_dump_buf_dif) {
+		while (pagecnt) {
+			_dump_buf_dif =
+				(char *) __get_free_pages(GFP_KERNEL, pagecnt);
+			if (_dump_buf_dif) {
+				printk(KERN_ERR "BLKGRD allocated %d pages for "
+				       "_dump_buf_dif at 0x%p\n",
+				       (1 << pagecnt), _dump_buf_dif);
+				_dump_buf_dif_order = pagecnt;
+				memset(_dump_buf_dif, 0,
+				       ((1 << PAGE_SHIFT) << pagecnt));
+				break;
+			} else
+				--pagecnt;
+		}
+		if (!_dump_buf_dif_order)
+			printk(KERN_ERR "BLKGRD ERROR unable to allocate "
+			       "memory for hexdump\n");
+	} else
+		printk(KERN_ERR "BLKGRD already allocated _dump_buf_dif=0x%p\n",
+		       _dump_buf_dif);
+}
+
+/**
+ * lpfc_post_init_setup - Perform necessary device post initialization setup.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to perform all the necessary post initialization
+ * setup for the device.
+ **/
+static void
+lpfc_post_init_setup(struct lpfc_hba *phba)
+{
+	struct Scsi_Host  *shost;
+	struct lpfc_adapter_event_header adapter_event;
+
+	/* Get the default values for Model Name and Description */
+	lpfc_get_hba_model_desc(phba, phba->ModelName, phba->ModelDesc);
+
+	/*
+	 * hba setup may have changed the hba_queue_depth so we need to
+	 * adjust the value of can_queue.
+	 */
+	shost = pci_get_drvdata(phba->pcidev);
+	shost->can_queue = phba->cfg_hba_queue_depth - 10;
+	if (phba->sli3_options & LPFC_SLI3_BG_ENABLED)
+		lpfc_setup_bg(phba, shost);
+
+	lpfc_host_attrib_init(shost);
+
+	if (phba->cfg_poll & DISABLE_FCP_RING_INT) {
+		spin_lock_irq(shost->host_lock);
+		lpfc_poll_start_timer(phba);
+		spin_unlock_irq(shost->host_lock);
+	}
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"0428 Perform SCSI scan\n");
+	/* Send board arrival event to upper layer */
+	adapter_event.event_type = FC_REG_ADAPTER_EVENT;
+	adapter_event.subcategory = LPFC_EVENT_ARRIVAL;
+	fc_host_post_vendor_event(shost, fc_get_event_number(),
+				  sizeof(adapter_event),
+				  (char *) &adapter_event,
+				  LPFC_NL_VENDOR_ID);
+	return;
+}
+
+/**
+ * lpfc_sli_pci_mem_setup - Setup SLI3 HBA PCI memory space.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up the PCI device memory space for device
+ * with SLI-3 interface spec.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ **/
+static int
+lpfc_sli_pci_mem_setup(struct lpfc_hba *phba)
+{
+	struct pci_dev *pdev;
+	unsigned long bar0map_len, bar2map_len;
+	int i, hbq_count;
+	void *ptr;
+	int error = -ENODEV;
+
+	/* Obtain PCI device reference */
+	if (!phba->pcidev)
+		return error;
+	else
+		pdev = phba->pcidev;
+
+	/* Set the device DMA mask size */
+	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) != 0)
+		if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)
+			return error;
+
+	/* Get the bus address of Bar0 and Bar2 and the number of bytes
+	 * required by each mapping.
+	 */
+	phba->pci_bar0_map = pci_resource_start(pdev, 0);
+	bar0map_len = pci_resource_len(pdev, 0);
+
+	phba->pci_bar2_map = pci_resource_start(pdev, 2);
+	bar2map_len = pci_resource_len(pdev, 2);
+
+	/* Map HBA SLIM to a kernel virtual address. */
+	phba->slim_memmap_p = ioremap(phba->pci_bar0_map, bar0map_len);
+	if (!phba->slim_memmap_p) {
+		dev_printk(KERN_ERR, &pdev->dev,
+			   "ioremap failed for SLIM memory.\n");
+		goto out;
+	}
+
+	/* Map HBA Control Registers to a kernel virtual address. */
+	phba->ctrl_regs_memmap_p = ioremap(phba->pci_bar2_map, bar2map_len);
+	if (!phba->ctrl_regs_memmap_p) {
+		dev_printk(KERN_ERR, &pdev->dev,
+			   "ioremap failed for HBA control registers.\n");
+		goto out_iounmap_slim;
+	}
+
+	/* Allocate memory for SLI-2 structures */
+	phba->slim2p.virt = dma_alloc_coherent(&pdev->dev,
+					       SLI2_SLIM_SIZE,
+					       &phba->slim2p.phys,
+					       GFP_KERNEL);
+	if (!phba->slim2p.virt)
+		goto out_iounmap;
+
+	memset(phba->slim2p.virt, 0, SLI2_SLIM_SIZE);
+	phba->mbox = phba->slim2p.virt + offsetof(struct lpfc_sli2_slim, mbx);
+	phba->pcb = (phba->slim2p.virt + offsetof(struct lpfc_sli2_slim, pcb));
+	phba->IOCBs = (phba->slim2p.virt +
+		       offsetof(struct lpfc_sli2_slim, IOCBs));
+
+	phba->hbqslimp.virt = dma_alloc_coherent(&pdev->dev,
+						 lpfc_sli_hbq_size(),
+						 &phba->hbqslimp.phys,
+						 GFP_KERNEL);
+	if (!phba->hbqslimp.virt)
+		goto out_free_slim;
+
+	hbq_count = lpfc_sli_hbq_count();
+	ptr = phba->hbqslimp.virt;
+	for (i = 0; i < hbq_count; ++i) {
+		phba->hbqs[i].hbq_virt = ptr;
+		INIT_LIST_HEAD(&phba->hbqs[i].hbq_buffer_list);
+		ptr += (lpfc_hbq_defs[i]->entry_count *
+			sizeof(struct lpfc_hbq_entry));
+	}
+	phba->hbqs[LPFC_ELS_HBQ].hbq_alloc_buffer = lpfc_els_hbq_alloc;
+	phba->hbqs[LPFC_ELS_HBQ].hbq_free_buffer = lpfc_els_hbq_free;
+
+	memset(phba->hbqslimp.virt, 0, lpfc_sli_hbq_size());
+
+	INIT_LIST_HEAD(&phba->rb_pend_list);
+
+	phba->MBslimaddr = phba->slim_memmap_p;
+	phba->HAregaddr = phba->ctrl_regs_memmap_p + HA_REG_OFFSET;
+	phba->CAregaddr = phba->ctrl_regs_memmap_p + CA_REG_OFFSET;
+	phba->HSregaddr = phba->ctrl_regs_memmap_p + HS_REG_OFFSET;
+	phba->HCregaddr = phba->ctrl_regs_memmap_p + HC_REG_OFFSET;
+
+	return 0;
+
+out_free_slim:
+	dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE,
+			  phba->slim2p.virt, phba->slim2p.phys);
+out_iounmap:
+	iounmap(phba->ctrl_regs_memmap_p);
+out_iounmap_slim:
+	iounmap(phba->slim_memmap_p);
+out:
+	return error;
+}
+
+/**
+ * lpfc_sli_pci_mem_unset - Unset SLI3 HBA PCI memory space.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset the PCI device memory space for device
+ * with SLI-3 interface spec.
+ **/
+static void
+lpfc_sli_pci_mem_unset(struct lpfc_hba *phba)
+{
+	struct pci_dev *pdev;
+
+	/* Obtain PCI device reference */
+	if (!phba->pcidev)
+		return;
+	else
+		pdev = phba->pcidev;
+
+	/* Free coherent DMA memory allocated */
+	dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(),
+			  phba->hbqslimp.virt, phba->hbqslimp.phys);
+	dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE,
+			  phba->slim2p.virt, phba->slim2p.phys);
+
+	/* I/O memory unmap */
+	iounmap(phba->ctrl_regs_memmap_p);
+	iounmap(phba->slim_memmap_p);
+
+	return;
+}
+
+/**
+ * lpfc_sli4_post_status_check - Wait for SLI4 POST done and check status
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to wait for SLI4 device Power On Self Test (POST)
+ * done and check status.
+ *
+ * Return 0 if successful, otherwise -ENODEV.
+ **/
+int
+lpfc_sli4_post_status_check(struct lpfc_hba *phba)
+{
+	struct lpfc_register sta_reg, uerrlo_reg, uerrhi_reg, scratchpad;
+	uint32_t onlnreg0, onlnreg1;
+	int i, port_error = -ENODEV;
+
+	if (!phba->sli4_hba.STAregaddr)
+		return -ENODEV;
+
+	/* With uncoverable error, log the error message and return error */
+	onlnreg0 = readl(phba->sli4_hba.ONLINE0regaddr);
+	onlnreg1 = readl(phba->sli4_hba.ONLINE1regaddr);
+	if ((onlnreg0 != LPFC_ONLINE_NERR) || (onlnreg1 != LPFC_ONLINE_NERR)) {
+		uerrlo_reg.word0 = readl(phba->sli4_hba.UERRLOregaddr);
+		uerrhi_reg.word0 = readl(phba->sli4_hba.UERRHIregaddr);
+		if (uerrlo_reg.word0 || uerrhi_reg.word0) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"1422 HBA Unrecoverable error: "
+					"uerr_lo_reg=0x%x, uerr_hi_reg=0x%x, "
+					"online0_reg=0x%x, online1_reg=0x%x\n",
+					uerrlo_reg.word0, uerrhi_reg.word0,
+					onlnreg0, onlnreg1);
+		}
+		return -ENODEV;
+	}
+
+	/* Wait up to 30 seconds for the SLI Port POST done and ready */
+	for (i = 0; i < 3000; i++) {
+		sta_reg.word0 = readl(phba->sli4_hba.STAregaddr);
+		/* Encounter fatal POST error, break out */
+		if (bf_get(lpfc_hst_state_perr, &sta_reg)) {
+			port_error = -ENODEV;
+			break;
+		}
+		if (LPFC_POST_STAGE_ARMFW_READY ==
+		    bf_get(lpfc_hst_state_port_status, &sta_reg)) {
+			port_error = 0;
+			break;
+		}
+		msleep(10);
+	}
+
+	if (port_error)
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"1408 Failure HBA POST Status: sta_reg=0x%x, "
+			"perr=x%x, sfi=x%x, nip=x%x, ipc=x%x, xrom=x%x, "
+			"dl=x%x, pstatus=x%x\n", sta_reg.word0,
+			bf_get(lpfc_hst_state_perr, &sta_reg),
+			bf_get(lpfc_hst_state_sfi, &sta_reg),
+			bf_get(lpfc_hst_state_nip, &sta_reg),
+			bf_get(lpfc_hst_state_ipc, &sta_reg),
+			bf_get(lpfc_hst_state_xrom, &sta_reg),
+			bf_get(lpfc_hst_state_dl, &sta_reg),
+			bf_get(lpfc_hst_state_port_status, &sta_reg));
+
+	/* Log device information */
+	scratchpad.word0 =  readl(phba->sli4_hba.SCRATCHPADregaddr);
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2534 Device Info: ChipType=0x%x, SliRev=0x%x, "
+			"FeatureL1=0x%x, FeatureL2=0x%x\n",
+			bf_get(lpfc_scratchpad_chiptype, &scratchpad),
+			bf_get(lpfc_scratchpad_slirev, &scratchpad),
+			bf_get(lpfc_scratchpad_featurelevel1, &scratchpad),
+			bf_get(lpfc_scratchpad_featurelevel2, &scratchpad));
+
+	return port_error;
+}
+
+/**
+ * lpfc_sli4_bar0_register_memmap - Set up SLI4 BAR0 register memory map.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up SLI4 BAR0 PCI config space register
+ * memory map.
+ **/
+static void
+lpfc_sli4_bar0_register_memmap(struct lpfc_hba *phba)
+{
+	phba->sli4_hba.UERRLOregaddr = phba->sli4_hba.conf_regs_memmap_p +
+					LPFC_UERR_STATUS_LO;
+	phba->sli4_hba.UERRHIregaddr = phba->sli4_hba.conf_regs_memmap_p +
+					LPFC_UERR_STATUS_HI;
+	phba->sli4_hba.ONLINE0regaddr = phba->sli4_hba.conf_regs_memmap_p +
+					LPFC_ONLINE0;
+	phba->sli4_hba.ONLINE1regaddr = phba->sli4_hba.conf_regs_memmap_p +
+					LPFC_ONLINE1;
+	phba->sli4_hba.SCRATCHPADregaddr = phba->sli4_hba.conf_regs_memmap_p +
+					LPFC_SCRATCHPAD;
+}
+
+/**
+ * lpfc_sli4_bar1_register_memmap - Set up SLI4 BAR1 register memory map.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up SLI4 BAR1 control status register (CSR)
+ * memory map.
+ **/
+static void
+lpfc_sli4_bar1_register_memmap(struct lpfc_hba *phba)
+{
+
+	phba->sli4_hba.STAregaddr = phba->sli4_hba.ctrl_regs_memmap_p +
+				    LPFC_HST_STATE;
+	phba->sli4_hba.ISRregaddr = phba->sli4_hba.ctrl_regs_memmap_p +
+				    LPFC_HST_ISR0;
+	phba->sli4_hba.IMRregaddr = phba->sli4_hba.ctrl_regs_memmap_p +
+				    LPFC_HST_IMR0;
+	phba->sli4_hba.ISCRregaddr = phba->sli4_hba.ctrl_regs_memmap_p +
+				     LPFC_HST_ISCR0;
+	return;
+}
+
+/**
+ * lpfc_sli4_bar2_register_memmap - Set up SLI4 BAR2 register memory map.
+ * @phba: pointer to lpfc hba data structure.
+ * @vf: virtual function number
+ *
+ * This routine is invoked to set up SLI4 BAR2 doorbell register memory map
+ * based on the given viftual function number, @vf.
+ *
+ * Return 0 if successful, otherwise -ENODEV.
+ **/
+static int
+lpfc_sli4_bar2_register_memmap(struct lpfc_hba *phba, uint32_t vf)
+{
+	if (vf > LPFC_VIR_FUNC_MAX)
+		return -ENODEV;
+
+	phba->sli4_hba.RQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
+				vf * LPFC_VFR_PAGE_SIZE + LPFC_RQ_DOORBELL);
+	phba->sli4_hba.WQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
+				vf * LPFC_VFR_PAGE_SIZE + LPFC_WQ_DOORBELL);
+	phba->sli4_hba.EQCQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
+				vf * LPFC_VFR_PAGE_SIZE + LPFC_EQCQ_DOORBELL);
+	phba->sli4_hba.MQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
+				vf * LPFC_VFR_PAGE_SIZE + LPFC_MQ_DOORBELL);
+	phba->sli4_hba.BMBXregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
+				vf * LPFC_VFR_PAGE_SIZE + LPFC_BMBX);
+	return 0;
+}
+
+/**
+ * lpfc_create_bootstrap_mbox - Create the bootstrap mailbox
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to create the bootstrap mailbox
+ * region consistent with the SLI-4 interface spec.  This
+ * routine allocates all memory necessary to communicate
+ * mailbox commands to the port and sets up all alignment
+ * needs.  No locks are expected to be held when calling
+ * this routine.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	ENOMEM - could not allocated memory.
+ **/
+static int
+lpfc_create_bootstrap_mbox(struct lpfc_hba *phba)
+{
+	uint32_t bmbx_size;
+	struct lpfc_dmabuf *dmabuf;
+	struct dma_address *dma_address;
+	uint32_t pa_addr;
+	uint64_t phys_addr;
+
+	dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (!dmabuf)
+		return -ENOMEM;
+
+	/*
+	 * The bootstrap mailbox region is comprised of 2 parts
+	 * plus an alignment restriction of 16 bytes.
+	 */
+	bmbx_size = sizeof(struct lpfc_bmbx_create) + (LPFC_ALIGN_16_BYTE - 1);
+	dmabuf->virt = dma_alloc_coherent(&phba->pcidev->dev,
+					  bmbx_size,
+					  &dmabuf->phys,
+					  GFP_KERNEL);
+	if (!dmabuf->virt) {
+		kfree(dmabuf);
+		return -ENOMEM;
+	}
+	memset(dmabuf->virt, 0, bmbx_size);
+
+	/*
+	 * Initialize the bootstrap mailbox pointers now so that the register
+	 * operations are simple later.  The mailbox dma address is required
+	 * to be 16-byte aligned.  Also align the virtual memory as each
+	 * maibox is copied into the bmbx mailbox region before issuing the
+	 * command to the port.
+	 */
+	phba->sli4_hba.bmbx.dmabuf = dmabuf;
+	phba->sli4_hba.bmbx.bmbx_size = bmbx_size;
+
+	phba->sli4_hba.bmbx.avirt = PTR_ALIGN(dmabuf->virt,
+					      LPFC_ALIGN_16_BYTE);
+	phba->sli4_hba.bmbx.aphys = ALIGN(dmabuf->phys,
+					      LPFC_ALIGN_16_BYTE);
+
+	/*
+	 * Set the high and low physical addresses now.  The SLI4 alignment
+	 * requirement is 16 bytes and the mailbox is posted to the port
+	 * as two 30-bit addresses.  The other data is a bit marking whether
+	 * the 30-bit address is the high or low address.
+	 * Upcast bmbx aphys to 64bits so shift instruction compiles
+	 * clean on 32 bit machines.
+	 */
+	dma_address = &phba->sli4_hba.bmbx.dma_address;
+	phys_addr = (uint64_t)phba->sli4_hba.bmbx.aphys;
+	pa_addr = (uint32_t) ((phys_addr >> 34) & 0x3fffffff);
+	dma_address->addr_hi = (uint32_t) ((pa_addr << 2) |
+					   LPFC_BMBX_BIT1_ADDR_HI);
+
+	pa_addr = (uint32_t) ((phba->sli4_hba.bmbx.aphys >> 4) & 0x3fffffff);
+	dma_address->addr_lo = (uint32_t) ((pa_addr << 2) |
+					   LPFC_BMBX_BIT1_ADDR_LO);
+	return 0;
+}
+
+/**
+ * lpfc_destroy_bootstrap_mbox - Destroy all bootstrap mailbox resources
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to teardown the bootstrap mailbox
+ * region and release all host resources. This routine requires
+ * the caller to ensure all mailbox commands recovered, no
+ * additional mailbox comands are sent, and interrupts are disabled
+ * before calling this routine.
+ *
+ **/
+static void
+lpfc_destroy_bootstrap_mbox(struct lpfc_hba *phba)
+{
+	dma_free_coherent(&phba->pcidev->dev,
+			  phba->sli4_hba.bmbx.bmbx_size,
+			  phba->sli4_hba.bmbx.dmabuf->virt,
+			  phba->sli4_hba.bmbx.dmabuf->phys);
+
+	kfree(phba->sli4_hba.bmbx.dmabuf);
+	memset(&phba->sli4_hba.bmbx, 0, sizeof(struct lpfc_bmbx));
+}
+
+/**
+ * lpfc_sli4_read_config - Get the config parameters.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to read the configuration parameters from the HBA.
+ * The configuration parameters are used to set the base and maximum values
+ * for RPI's XRI's VPI's VFI's and FCFIs. These values also affect the resource
+ * allocation for the port.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	ENOMEM - No availble memory
+ *      EIO - The mailbox failed to complete successfully.
+ **/
+static int
+lpfc_sli4_read_config(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *pmb;
+	struct lpfc_mbx_read_config *rd_config;
+	uint32_t rc = 0;
+
+	pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!pmb) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2011 Unable to allocate memory for issuing "
+				"SLI_CONFIG_SPECIAL mailbox command\n");
+		return -ENOMEM;
+	}
+
+	lpfc_read_config(phba, pmb);
+
+	rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL);
+	if (rc != MBX_SUCCESS) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+			"2012 Mailbox failed , mbxCmd x%x "
+			"READ_CONFIG, mbxStatus x%x\n",
+			bf_get(lpfc_mqe_command, &pmb->u.mqe),
+			bf_get(lpfc_mqe_status, &pmb->u.mqe));
+		rc = -EIO;
+	} else {
+		rd_config = &pmb->u.mqe.un.rd_config;
+		phba->sli4_hba.max_cfg_param.max_xri =
+			bf_get(lpfc_mbx_rd_conf_xri_count, rd_config);
+		phba->sli4_hba.max_cfg_param.xri_base =
+			bf_get(lpfc_mbx_rd_conf_xri_base, rd_config);
+		phba->sli4_hba.max_cfg_param.max_vpi =
+			bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config);
+		phba->sli4_hba.max_cfg_param.vpi_base =
+			bf_get(lpfc_mbx_rd_conf_vpi_base, rd_config);
+		phba->sli4_hba.max_cfg_param.max_rpi =
+			bf_get(lpfc_mbx_rd_conf_rpi_count, rd_config);
+		phba->sli4_hba.max_cfg_param.rpi_base =
+			bf_get(lpfc_mbx_rd_conf_rpi_base, rd_config);
+		phba->sli4_hba.max_cfg_param.max_vfi =
+			bf_get(lpfc_mbx_rd_conf_vfi_count, rd_config);
+		phba->sli4_hba.max_cfg_param.vfi_base =
+			bf_get(lpfc_mbx_rd_conf_vfi_base, rd_config);
+		phba->sli4_hba.max_cfg_param.max_fcfi =
+			bf_get(lpfc_mbx_rd_conf_fcfi_count, rd_config);
+		phba->sli4_hba.max_cfg_param.fcfi_base =
+			bf_get(lpfc_mbx_rd_conf_fcfi_base, rd_config);
+		phba->sli4_hba.max_cfg_param.max_eq =
+			bf_get(lpfc_mbx_rd_conf_eq_count, rd_config);
+		phba->sli4_hba.max_cfg_param.max_rq =
+			bf_get(lpfc_mbx_rd_conf_rq_count, rd_config);
+		phba->sli4_hba.max_cfg_param.max_wq =
+			bf_get(lpfc_mbx_rd_conf_wq_count, rd_config);
+		phba->sli4_hba.max_cfg_param.max_cq =
+			bf_get(lpfc_mbx_rd_conf_cq_count, rd_config);
+		phba->lmt = bf_get(lpfc_mbx_rd_conf_lmt, rd_config);
+		phba->sli4_hba.next_xri = phba->sli4_hba.max_cfg_param.xri_base;
+		phba->vpi_base = phba->sli4_hba.max_cfg_param.vpi_base;
+		phba->vfi_base = phba->sli4_hba.max_cfg_param.vfi_base;
+		phba->sli4_hba.next_rpi = phba->sli4_hba.max_cfg_param.rpi_base;
+		phba->max_vpi = phba->sli4_hba.max_cfg_param.max_vpi;
+		phba->max_vports = phba->max_vpi;
+		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"2003 cfg params XRI(B:%d M:%d), "
+				"VPI(B:%d M:%d) "
+				"VFI(B:%d M:%d) "
+				"RPI(B:%d M:%d) "
+				"FCFI(B:%d M:%d)\n",
+				phba->sli4_hba.max_cfg_param.xri_base,
+				phba->sli4_hba.max_cfg_param.max_xri,
+				phba->sli4_hba.max_cfg_param.vpi_base,
+				phba->sli4_hba.max_cfg_param.max_vpi,
+				phba->sli4_hba.max_cfg_param.vfi_base,
+				phba->sli4_hba.max_cfg_param.max_vfi,
+				phba->sli4_hba.max_cfg_param.rpi_base,
+				phba->sli4_hba.max_cfg_param.max_rpi,
+				phba->sli4_hba.max_cfg_param.fcfi_base,
+				phba->sli4_hba.max_cfg_param.max_fcfi);
+	}
+	mempool_free(pmb, phba->mbox_mem_pool);
+
+	/* Reset the DFT_HBA_Q_DEPTH to the max xri  */
+	if (phba->cfg_hba_queue_depth > (phba->sli4_hba.max_cfg_param.max_xri))
+		phba->cfg_hba_queue_depth =
+				phba->sli4_hba.max_cfg_param.max_xri;
+	return rc;
+}
+
+/**
+ * lpfc_dev_endian_order_setup - Notify the port of the host's endian order.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to setup the host-side endian order to the
+ * HBA consistent with the SLI-4 interface spec.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	ENOMEM - No availble memory
+ *      EIO - The mailbox failed to complete successfully.
+ **/
+static int
+lpfc_setup_endian_order(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *mboxq;
+	uint32_t rc = 0;
+	uint32_t endian_mb_data[2] = {HOST_ENDIAN_LOW_WORD0,
+				      HOST_ENDIAN_HIGH_WORD1};
+
+	mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0492 Unable to allocate memory for issuing "
+				"SLI_CONFIG_SPECIAL mailbox command\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * The SLI4_CONFIG_SPECIAL mailbox command requires the first two
+	 * words to contain special data values and no other data.
+	 */
+	memset(mboxq, 0, sizeof(LPFC_MBOXQ_t));
+	memcpy(&mboxq->u.mqe, &endian_mb_data, sizeof(endian_mb_data));
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	if (rc != MBX_SUCCESS) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0493 SLI_CONFIG_SPECIAL mailbox failed with "
+				"status x%x\n",
+				rc);
+		rc = -EIO;
+	}
+
+	mempool_free(mboxq, phba->mbox_mem_pool);
+	return rc;
+}
+
+/**
+ * lpfc_sli4_queue_create - Create all the SLI4 queues
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to allocate all the SLI4 queues for the FCoE HBA
+ * operation. For each SLI4 queue type, the parameters such as queue entry
+ * count (queue depth) shall be taken from the module parameter. For now,
+ * we just use some constant number as place holder.
+ *
+ * Return codes
+ *      0 - sucessful
+ *      ENOMEM - No availble memory
+ *      EIO - The mailbox failed to complete successfully.
+ **/
+static int
+lpfc_sli4_queue_create(struct lpfc_hba *phba)
+{
+	struct lpfc_queue *qdesc;
+	int fcp_eqidx, fcp_cqidx, fcp_wqidx;
+	int cfg_fcp_wq_count;
+	int cfg_fcp_eq_count;
+
+	/*
+	 * Sanity check for confiugred queue parameters against the run-time
+	 * device parameters
+	 */
+
+	/* Sanity check on FCP fast-path WQ parameters */
+	cfg_fcp_wq_count = phba->cfg_fcp_wq_count;
+	if (cfg_fcp_wq_count >
+	    (phba->sli4_hba.max_cfg_param.max_wq - LPFC_SP_WQN_DEF)) {
+		cfg_fcp_wq_count = phba->sli4_hba.max_cfg_param.max_wq -
+				   LPFC_SP_WQN_DEF;
+		if (cfg_fcp_wq_count < LPFC_FP_WQN_MIN) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"2581 Not enough WQs (%d) from "
+					"the pci function for supporting "
+					"FCP WQs (%d)\n",
+					phba->sli4_hba.max_cfg_param.max_wq,
+					phba->cfg_fcp_wq_count);
+			goto out_error;
+		}
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"2582 Not enough WQs (%d) from the pci "
+				"function for supporting the requested "
+				"FCP WQs (%d), the actual FCP WQs can "
+				"be supported: %d\n",
+				phba->sli4_hba.max_cfg_param.max_wq,
+				phba->cfg_fcp_wq_count, cfg_fcp_wq_count);
+	}
+	/* The actual number of FCP work queues adopted */
+	phba->cfg_fcp_wq_count = cfg_fcp_wq_count;
+
+	/* Sanity check on FCP fast-path EQ parameters */
+	cfg_fcp_eq_count = phba->cfg_fcp_eq_count;
+	if (cfg_fcp_eq_count >
+	    (phba->sli4_hba.max_cfg_param.max_eq - LPFC_SP_EQN_DEF)) {
+		cfg_fcp_eq_count = phba->sli4_hba.max_cfg_param.max_eq -
+				   LPFC_SP_EQN_DEF;
+		if (cfg_fcp_eq_count < LPFC_FP_EQN_MIN) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"2574 Not enough EQs (%d) from the "
+					"pci function for supporting FCP "
+					"EQs (%d)\n",
+					phba->sli4_hba.max_cfg_param.max_eq,
+					phba->cfg_fcp_eq_count);
+			goto out_error;
+		}
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"2575 Not enough EQs (%d) from the pci "
+				"function for supporting the requested "
+				"FCP EQs (%d), the actual FCP EQs can "
+				"be supported: %d\n",
+				phba->sli4_hba.max_cfg_param.max_eq,
+				phba->cfg_fcp_eq_count, cfg_fcp_eq_count);
+	}
+	/* It does not make sense to have more EQs than WQs */
+	if (cfg_fcp_eq_count > phba->cfg_fcp_wq_count) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"2593 The number of FCP EQs (%d) is more "
+				"than the number of FCP WQs (%d), take "
+				"the number of FCP EQs same as than of "
+				"WQs (%d)\n", cfg_fcp_eq_count,
+				phba->cfg_fcp_wq_count,
+				phba->cfg_fcp_wq_count);
+		cfg_fcp_eq_count = phba->cfg_fcp_wq_count;
+	}
+	/* The actual number of FCP event queues adopted */
+	phba->cfg_fcp_eq_count = cfg_fcp_eq_count;
+	/* The overall number of event queues used */
+	phba->sli4_hba.cfg_eqn = phba->cfg_fcp_eq_count + LPFC_SP_EQN_DEF;
+
+	/*
+	 * Create Event Queues (EQs)
+	 */
+
+	/* Get EQ depth from module parameter, fake the default for now */
+	phba->sli4_hba.eq_esize = LPFC_EQE_SIZE_4B;
+	phba->sli4_hba.eq_ecount = LPFC_EQE_DEF_COUNT;
+
+	/* Create slow path event queue */
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.eq_esize,
+				      phba->sli4_hba.eq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0496 Failed allocate slow-path EQ\n");
+		goto out_error;
+	}
+	phba->sli4_hba.sp_eq = qdesc;
+
+	/* Create fast-path FCP Event Queue(s) */
+	phba->sli4_hba.fp_eq = kzalloc((sizeof(struct lpfc_queue *) *
+			       phba->cfg_fcp_eq_count), GFP_KERNEL);
+	if (!phba->sli4_hba.fp_eq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2576 Failed allocate memory for fast-path "
+				"EQ record array\n");
+		goto out_free_sp_eq;
+	}
+	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_eq_count; fcp_eqidx++) {
+		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.eq_esize,
+					      phba->sli4_hba.eq_ecount);
+		if (!qdesc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0497 Failed allocate fast-path EQ\n");
+			goto out_free_fp_eq;
+		}
+		phba->sli4_hba.fp_eq[fcp_eqidx] = qdesc;
+	}
+
+	/*
+	 * Create Complete Queues (CQs)
+	 */
+
+	/* Get CQ depth from module parameter, fake the default for now */
+	phba->sli4_hba.cq_esize = LPFC_CQE_SIZE;
+	phba->sli4_hba.cq_ecount = LPFC_CQE_DEF_COUNT;
+
+	/* Create slow-path Mailbox Command Complete Queue */
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,
+				      phba->sli4_hba.cq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0500 Failed allocate slow-path mailbox CQ\n");
+		goto out_free_fp_eq;
+	}
+	phba->sli4_hba.mbx_cq = qdesc;
+
+	/* Create slow-path ELS Complete Queue */
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,
+				      phba->sli4_hba.cq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0501 Failed allocate slow-path ELS CQ\n");
+		goto out_free_mbx_cq;
+	}
+	phba->sli4_hba.els_cq = qdesc;
+
+	/* Create slow-path Unsolicited Receive Complete Queue */
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,
+				      phba->sli4_hba.cq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0502 Failed allocate slow-path USOL RX CQ\n");
+		goto out_free_els_cq;
+	}
+	phba->sli4_hba.rxq_cq = qdesc;
+
+	/* Create fast-path FCP Completion Queue(s), one-to-one with EQs */
+	phba->sli4_hba.fcp_cq = kzalloc((sizeof(struct lpfc_queue *) *
+				phba->cfg_fcp_eq_count), GFP_KERNEL);
+	if (!phba->sli4_hba.fcp_cq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2577 Failed allocate memory for fast-path "
+				"CQ record array\n");
+		goto out_free_rxq_cq;
+	}
+	for (fcp_cqidx = 0; fcp_cqidx < phba->cfg_fcp_eq_count; fcp_cqidx++) {
+		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,
+					      phba->sli4_hba.cq_ecount);
+		if (!qdesc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0499 Failed allocate fast-path FCP "
+					"CQ (%d)\n", fcp_cqidx);
+			goto out_free_fcp_cq;
+		}
+		phba->sli4_hba.fcp_cq[fcp_cqidx] = qdesc;
+	}
+
+	/* Create Mailbox Command Queue */
+	phba->sli4_hba.mq_esize = LPFC_MQE_SIZE;
+	phba->sli4_hba.mq_ecount = LPFC_MQE_DEF_COUNT;
+
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.mq_esize,
+				      phba->sli4_hba.mq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0505 Failed allocate slow-path MQ\n");
+		goto out_free_fcp_cq;
+	}
+	phba->sli4_hba.mbx_wq = qdesc;
+
+	/*
+	 * Create all the Work Queues (WQs)
+	 */
+	phba->sli4_hba.wq_esize = LPFC_WQE_SIZE;
+	phba->sli4_hba.wq_ecount = LPFC_WQE_DEF_COUNT;
+
+	/* Create slow-path ELS Work Queue */
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.wq_esize,
+				      phba->sli4_hba.wq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0504 Failed allocate slow-path ELS WQ\n");
+		goto out_free_mbx_wq;
+	}
+	phba->sli4_hba.els_wq = qdesc;
+
+	/* Create fast-path FCP Work Queue(s) */
+	phba->sli4_hba.fcp_wq = kzalloc((sizeof(struct lpfc_queue *) *
+				phba->cfg_fcp_wq_count), GFP_KERNEL);
+	if (!phba->sli4_hba.fcp_wq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2578 Failed allocate memory for fast-path "
+				"WQ record array\n");
+		goto out_free_els_wq;
+	}
+	for (fcp_wqidx = 0; fcp_wqidx < phba->cfg_fcp_wq_count; fcp_wqidx++) {
+		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.wq_esize,
+					      phba->sli4_hba.wq_ecount);
+		if (!qdesc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0503 Failed allocate fast-path FCP "
+					"WQ (%d)\n", fcp_wqidx);
+			goto out_free_fcp_wq;
+		}
+		phba->sli4_hba.fcp_wq[fcp_wqidx] = qdesc;
+	}
+
+	/*
+	 * Create Receive Queue (RQ)
+	 */
+	phba->sli4_hba.rq_esize = LPFC_RQE_SIZE;
+	phba->sli4_hba.rq_ecount = LPFC_RQE_DEF_COUNT;
+
+	/* Create Receive Queue for header */
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.rq_esize,
+				      phba->sli4_hba.rq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0506 Failed allocate receive HRQ\n");
+		goto out_free_fcp_wq;
+	}
+	phba->sli4_hba.hdr_rq = qdesc;
+
+	/* Create Receive Queue for data */
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.rq_esize,
+				      phba->sli4_hba.rq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0507 Failed allocate receive DRQ\n");
+		goto out_free_hdr_rq;
+	}
+	phba->sli4_hba.dat_rq = qdesc;
+
+	return 0;
+
+out_free_hdr_rq:
+	lpfc_sli4_queue_free(phba->sli4_hba.hdr_rq);
+	phba->sli4_hba.hdr_rq = NULL;
+out_free_fcp_wq:
+	for (--fcp_wqidx; fcp_wqidx >= 0; fcp_wqidx--) {
+		lpfc_sli4_queue_free(phba->sli4_hba.fcp_wq[fcp_wqidx]);
+		phba->sli4_hba.fcp_wq[fcp_wqidx] = NULL;
+	}
+	kfree(phba->sli4_hba.fcp_wq);
+out_free_els_wq:
+	lpfc_sli4_queue_free(phba->sli4_hba.els_wq);
+	phba->sli4_hba.els_wq = NULL;
+out_free_mbx_wq:
+	lpfc_sli4_queue_free(phba->sli4_hba.mbx_wq);
+	phba->sli4_hba.mbx_wq = NULL;
+out_free_fcp_cq:
+	for (--fcp_cqidx; fcp_cqidx >= 0; fcp_cqidx--) {
+		lpfc_sli4_queue_free(phba->sli4_hba.fcp_cq[fcp_cqidx]);
+		phba->sli4_hba.fcp_cq[fcp_cqidx] = NULL;
+	}
+	kfree(phba->sli4_hba.fcp_cq);
+out_free_rxq_cq:
+	lpfc_sli4_queue_free(phba->sli4_hba.rxq_cq);
+	phba->sli4_hba.rxq_cq = NULL;
+out_free_els_cq:
+	lpfc_sli4_queue_free(phba->sli4_hba.els_cq);
+	phba->sli4_hba.els_cq = NULL;
+out_free_mbx_cq:
+	lpfc_sli4_queue_free(phba->sli4_hba.mbx_cq);
+	phba->sli4_hba.mbx_cq = NULL;
+out_free_fp_eq:
+	for (--fcp_eqidx; fcp_eqidx >= 0; fcp_eqidx--) {
+		lpfc_sli4_queue_free(phba->sli4_hba.fp_eq[fcp_eqidx]);
+		phba->sli4_hba.fp_eq[fcp_eqidx] = NULL;
+	}
+	kfree(phba->sli4_hba.fp_eq);
+out_free_sp_eq:
+	lpfc_sli4_queue_free(phba->sli4_hba.sp_eq);
+	phba->sli4_hba.sp_eq = NULL;
+out_error:
+	return -ENOMEM;
+}
+
+/**
+ * lpfc_sli4_queue_destroy - Destroy all the SLI4 queues
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to release all the SLI4 queues with the FCoE HBA
+ * operation.
+ *
+ * Return codes
+ *      0 - sucessful
+ *      ENOMEM - No availble memory
+ *      EIO - The mailbox failed to complete successfully.
+ **/
+static void
+lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
+{
+	int fcp_qidx;
+
+	/* Release mailbox command work queue */
+	lpfc_sli4_queue_free(phba->sli4_hba.mbx_wq);
+	phba->sli4_hba.mbx_wq = NULL;
+
+	/* Release ELS work queue */
+	lpfc_sli4_queue_free(phba->sli4_hba.els_wq);
+	phba->sli4_hba.els_wq = NULL;
+
+	/* Release FCP work queue */
+	for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_wq_count; fcp_qidx++)
+		lpfc_sli4_queue_free(phba->sli4_hba.fcp_wq[fcp_qidx]);
+	kfree(phba->sli4_hba.fcp_wq);
+	phba->sli4_hba.fcp_wq = NULL;
+
+	/* Release unsolicited receive queue */
+	lpfc_sli4_queue_free(phba->sli4_hba.hdr_rq);
+	phba->sli4_hba.hdr_rq = NULL;
+	lpfc_sli4_queue_free(phba->sli4_hba.dat_rq);
+	phba->sli4_hba.dat_rq = NULL;
+
+	/* Release unsolicited receive complete queue */
+	lpfc_sli4_queue_free(phba->sli4_hba.rxq_cq);
+	phba->sli4_hba.rxq_cq = NULL;
+
+	/* Release ELS complete queue */
+	lpfc_sli4_queue_free(phba->sli4_hba.els_cq);
+	phba->sli4_hba.els_cq = NULL;
+
+	/* Release mailbox command complete queue */
+	lpfc_sli4_queue_free(phba->sli4_hba.mbx_cq);
+	phba->sli4_hba.mbx_cq = NULL;
+
+	/* Release FCP response complete queue */
+	for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_eq_count; fcp_qidx++)
+		lpfc_sli4_queue_free(phba->sli4_hba.fcp_cq[fcp_qidx]);
+	kfree(phba->sli4_hba.fcp_cq);
+	phba->sli4_hba.fcp_cq = NULL;
+
+	/* Release fast-path event queue */
+	for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_eq_count; fcp_qidx++)
+		lpfc_sli4_queue_free(phba->sli4_hba.fp_eq[fcp_qidx]);
+	kfree(phba->sli4_hba.fp_eq);
+	phba->sli4_hba.fp_eq = NULL;
+
+	/* Release slow-path event queue */
+	lpfc_sli4_queue_free(phba->sli4_hba.sp_eq);
+	phba->sli4_hba.sp_eq = NULL;
+
+	return;
+}
+
+/**
+ * lpfc_sli4_queue_setup - Set up all the SLI4 queues
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up all the SLI4 queues for the FCoE HBA
+ * operation.
+ *
+ * Return codes
+ *      0 - sucessful
+ *      ENOMEM - No availble memory
+ *      EIO - The mailbox failed to complete successfully.
+ **/
+int
+lpfc_sli4_queue_setup(struct lpfc_hba *phba)
+{
+	int rc = -ENOMEM;
+	int fcp_eqidx, fcp_cqidx, fcp_wqidx;
+	int fcp_cq_index = 0;
+
+	/*
+	 * Set up Event Queues (EQs)
+	 */
+
+	/* Set up slow-path event queue */
+	if (!phba->sli4_hba.sp_eq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0520 Slow-path EQ not allocated\n");
+		goto out_error;
+	}
+	rc = lpfc_eq_create(phba, phba->sli4_hba.sp_eq,
+			    LPFC_SP_DEF_IMAX);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0521 Failed setup of slow-path EQ: "
+				"rc = 0x%x\n", rc);
+		goto out_error;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2583 Slow-path EQ setup: queue-id=%d\n",
+			phba->sli4_hba.sp_eq->queue_id);
+
+	/* Set up fast-path event queue */
+	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_eq_count; fcp_eqidx++) {
+		if (!phba->sli4_hba.fp_eq[fcp_eqidx]) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0522 Fast-path EQ (%d) not "
+					"allocated\n", fcp_eqidx);
+			goto out_destroy_fp_eq;
+		}
+		rc = lpfc_eq_create(phba, phba->sli4_hba.fp_eq[fcp_eqidx],
+				    phba->cfg_fcp_imax);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0523 Failed setup of fast-path EQ "
+					"(%d), rc = 0x%x\n", fcp_eqidx, rc);
+			goto out_destroy_fp_eq;
+		}
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"2584 Fast-path EQ setup: "
+				"queue[%d]-id=%d\n", fcp_eqidx,
+				phba->sli4_hba.fp_eq[fcp_eqidx]->queue_id);
+	}
+
+	/*
+	 * Set up Complete Queues (CQs)
+	 */
+
+	/* Set up slow-path MBOX Complete Queue as the first CQ */
+	if (!phba->sli4_hba.mbx_cq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0528 Mailbox CQ not allocated\n");
+		goto out_destroy_fp_eq;
+	}
+	rc = lpfc_cq_create(phba, phba->sli4_hba.mbx_cq, phba->sli4_hba.sp_eq,
+			    LPFC_MCQ, LPFC_MBOX);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0529 Failed setup of slow-path mailbox CQ: "
+				"rc = 0x%x\n", rc);
+		goto out_destroy_fp_eq;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2585 MBX CQ setup: cq-id=%d, parent eq-id=%d\n",
+			phba->sli4_hba.mbx_cq->queue_id,
+			phba->sli4_hba.sp_eq->queue_id);
+
+	/* Set up slow-path ELS Complete Queue */
+	if (!phba->sli4_hba.els_cq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0530 ELS CQ not allocated\n");
+		goto out_destroy_mbx_cq;
+	}
+	rc = lpfc_cq_create(phba, phba->sli4_hba.els_cq, phba->sli4_hba.sp_eq,
+			    LPFC_WCQ, LPFC_ELS);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0531 Failed setup of slow-path ELS CQ: "
+				"rc = 0x%x\n", rc);
+		goto out_destroy_mbx_cq;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2586 ELS CQ setup: cq-id=%d, parent eq-id=%d\n",
+			phba->sli4_hba.els_cq->queue_id,
+			phba->sli4_hba.sp_eq->queue_id);
+
+	/* Set up slow-path Unsolicited Receive Complete Queue */
+	if (!phba->sli4_hba.rxq_cq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0532 USOL RX CQ not allocated\n");
+		goto out_destroy_els_cq;
+	}
+	rc = lpfc_cq_create(phba, phba->sli4_hba.rxq_cq, phba->sli4_hba.sp_eq,
+			    LPFC_RCQ, LPFC_USOL);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0533 Failed setup of slow-path USOL RX CQ: "
+				"rc = 0x%x\n", rc);
+		goto out_destroy_els_cq;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2587 USL CQ setup: cq-id=%d, parent eq-id=%d\n",
+			phba->sli4_hba.rxq_cq->queue_id,
+			phba->sli4_hba.sp_eq->queue_id);
+
+	/* Set up fast-path FCP Response Complete Queue */
+	for (fcp_cqidx = 0; fcp_cqidx < phba->cfg_fcp_eq_count; fcp_cqidx++) {
+		if (!phba->sli4_hba.fcp_cq[fcp_cqidx]) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0526 Fast-path FCP CQ (%d) not "
+					"allocated\n", fcp_cqidx);
+			goto out_destroy_fcp_cq;
+		}
+		rc = lpfc_cq_create(phba, phba->sli4_hba.fcp_cq[fcp_cqidx],
+				    phba->sli4_hba.fp_eq[fcp_cqidx],
+				    LPFC_WCQ, LPFC_FCP);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0527 Failed setup of fast-path FCP "
+					"CQ (%d), rc = 0x%x\n", fcp_cqidx, rc);
+			goto out_destroy_fcp_cq;
+		}
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"2588 FCP CQ setup: cq[%d]-id=%d, "
+				"parent eq[%d]-id=%d\n",
+				fcp_cqidx,
+				phba->sli4_hba.fcp_cq[fcp_cqidx]->queue_id,
+				fcp_cqidx,
+				phba->sli4_hba.fp_eq[fcp_cqidx]->queue_id);
+	}
+
+	/*
+	 * Set up all the Work Queues (WQs)
+	 */
+
+	/* Set up Mailbox Command Queue */
+	if (!phba->sli4_hba.mbx_wq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0538 Slow-path MQ not allocated\n");
+		goto out_destroy_fcp_cq;
+	}
+	rc = lpfc_mq_create(phba, phba->sli4_hba.mbx_wq,
+			    phba->sli4_hba.mbx_cq, LPFC_MBOX);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0539 Failed setup of slow-path MQ: "
+				"rc = 0x%x\n", rc);
+		goto out_destroy_fcp_cq;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2589 MBX MQ setup: wq-id=%d, parent cq-id=%d\n",
+			phba->sli4_hba.mbx_wq->queue_id,
+			phba->sli4_hba.mbx_cq->queue_id);
+
+	/* Set up slow-path ELS Work Queue */
+	if (!phba->sli4_hba.els_wq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0536 Slow-path ELS WQ not allocated\n");
+		goto out_destroy_mbx_wq;
+	}
+	rc = lpfc_wq_create(phba, phba->sli4_hba.els_wq,
+			    phba->sli4_hba.els_cq, LPFC_ELS);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0537 Failed setup of slow-path ELS WQ: "
+				"rc = 0x%x\n", rc);
+		goto out_destroy_mbx_wq;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2590 ELS WQ setup: wq-id=%d, parent cq-id=%d\n",
+			phba->sli4_hba.els_wq->queue_id,
+			phba->sli4_hba.els_cq->queue_id);
+
+	/* Set up fast-path FCP Work Queue */
+	for (fcp_wqidx = 0; fcp_wqidx < phba->cfg_fcp_wq_count; fcp_wqidx++) {
+		if (!phba->sli4_hba.fcp_wq[fcp_wqidx]) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0534 Fast-path FCP WQ (%d) not "
+					"allocated\n", fcp_wqidx);
+			goto out_destroy_fcp_wq;
+		}
+		rc = lpfc_wq_create(phba, phba->sli4_hba.fcp_wq[fcp_wqidx],
+				    phba->sli4_hba.fcp_cq[fcp_cq_index],
+				    LPFC_FCP);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0535 Failed setup of fast-path FCP "
+					"WQ (%d), rc = 0x%x\n", fcp_wqidx, rc);
+			goto out_destroy_fcp_wq;
+		}
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"2591 FCP WQ setup: wq[%d]-id=%d, "
+				"parent cq[%d]-id=%d\n",
+				fcp_wqidx,
+				phba->sli4_hba.fcp_wq[fcp_wqidx]->queue_id,
+				fcp_cq_index,
+				phba->sli4_hba.fcp_cq[fcp_cq_index]->queue_id);
+		/* Round robin FCP Work Queue's Completion Queue assignment */
+		fcp_cq_index = ((fcp_cq_index + 1) % phba->cfg_fcp_eq_count);
+	}
+
+	/*
+	 * Create Receive Queue (RQ)
+	 */
+	if (!phba->sli4_hba.hdr_rq || !phba->sli4_hba.dat_rq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0540 Receive Queue not allocated\n");
+		goto out_destroy_fcp_wq;
+	}
+	rc = lpfc_rq_create(phba, phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq,
+			    phba->sli4_hba.rxq_cq, LPFC_USOL);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0541 Failed setup of Receive Queue: "
+				"rc = 0x%x\n", rc);
+		goto out_destroy_fcp_wq;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2592 USL RQ setup: hdr-rq-id=%d, dat-rq-id=%d "
+			"parent cq-id=%d\n",
+			phba->sli4_hba.hdr_rq->queue_id,
+			phba->sli4_hba.dat_rq->queue_id,
+			phba->sli4_hba.rxq_cq->queue_id);
+	return 0;
+
+out_destroy_fcp_wq:
+	for (--fcp_wqidx; fcp_wqidx >= 0; fcp_wqidx--)
+		lpfc_wq_destroy(phba, phba->sli4_hba.fcp_wq[fcp_wqidx]);
+	lpfc_wq_destroy(phba, phba->sli4_hba.els_wq);
+out_destroy_mbx_wq:
+	lpfc_mq_destroy(phba, phba->sli4_hba.mbx_wq);
+out_destroy_fcp_cq:
+	for (--fcp_cqidx; fcp_cqidx >= 0; fcp_cqidx--)
+		lpfc_cq_destroy(phba, phba->sli4_hba.fcp_cq[fcp_cqidx]);
+	lpfc_cq_destroy(phba, phba->sli4_hba.rxq_cq);
+out_destroy_els_cq:
+	lpfc_cq_destroy(phba, phba->sli4_hba.els_cq);
+out_destroy_mbx_cq:
+	lpfc_cq_destroy(phba, phba->sli4_hba.mbx_cq);
+out_destroy_fp_eq:
+	for (--fcp_eqidx; fcp_eqidx >= 0; fcp_eqidx--)
+		lpfc_eq_destroy(phba, phba->sli4_hba.fp_eq[fcp_eqidx]);
+	lpfc_eq_destroy(phba, phba->sli4_hba.sp_eq);
+out_error:
+	return rc;
+}
+
+/**
+ * lpfc_sli4_queue_unset - Unset all the SLI4 queues
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset all the SLI4 queues with the FCoE HBA
+ * operation.
+ *
+ * Return codes
+ *      0 - sucessful
+ *      ENOMEM - No availble memory
+ *      EIO - The mailbox failed to complete successfully.
+ **/
+void
+lpfc_sli4_queue_unset(struct lpfc_hba *phba)
+{
+	int fcp_qidx;
+
+	/* Unset mailbox command work queue */
+	lpfc_mq_destroy(phba, phba->sli4_hba.mbx_wq);
+	/* Unset ELS work queue */
+	lpfc_wq_destroy(phba, phba->sli4_hba.els_wq);
+	/* Unset unsolicited receive queue */
+	lpfc_rq_destroy(phba, phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq);
+	/* Unset FCP work queue */
+	for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_wq_count; fcp_qidx++)
+		lpfc_wq_destroy(phba, phba->sli4_hba.fcp_wq[fcp_qidx]);
+	/* Unset mailbox command complete queue */
+	lpfc_cq_destroy(phba, phba->sli4_hba.mbx_cq);
+	/* Unset ELS complete queue */
+	lpfc_cq_destroy(phba, phba->sli4_hba.els_cq);
+	/* Unset unsolicited receive complete queue */
+	lpfc_cq_destroy(phba, phba->sli4_hba.rxq_cq);
+	/* Unset FCP response complete queue */
+	for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_eq_count; fcp_qidx++)
+		lpfc_cq_destroy(phba, phba->sli4_hba.fcp_cq[fcp_qidx]);
+	/* Unset fast-path event queue */
+	for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_eq_count; fcp_qidx++)
+		lpfc_eq_destroy(phba, phba->sli4_hba.fp_eq[fcp_qidx]);
+	/* Unset slow-path event queue */
+	lpfc_eq_destroy(phba, phba->sli4_hba.sp_eq);
+}
+
+/**
+ * lpfc_sli4_cq_event_pool_create - Create completion-queue event free pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to allocate and set up a pool of completion queue
+ * events. The body of the completion queue event is a completion queue entry
+ * CQE. For now, this pool is used for the interrupt service routine to queue
+ * the following HBA completion queue events for the worker thread to process:
+ *   - Mailbox asynchronous events
+ *   - Receive queue completion unsolicited events
+ * Later, this can be used for all the slow-path events.
+ *
+ * Return codes
+ *      0 - sucessful
+ *      -ENOMEM - No availble memory
+ **/
+static int
+lpfc_sli4_cq_event_pool_create(struct lpfc_hba *phba)
+{
+	struct lpfc_cq_event *cq_event;
+	int i;
+
+	for (i = 0; i < (4 * phba->sli4_hba.cq_ecount); i++) {
+		cq_event = kmalloc(sizeof(struct lpfc_cq_event), GFP_KERNEL);
+		if (!cq_event)
+			goto out_pool_create_fail;
+		list_add_tail(&cq_event->list,
+			      &phba->sli4_hba.sp_cqe_event_pool);
+	}
+	return 0;
+
+out_pool_create_fail:
+	lpfc_sli4_cq_event_pool_destroy(phba);
+	return -ENOMEM;
+}
+
+/**
+ * lpfc_sli4_cq_event_pool_destroy - Free completion-queue event free pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to free the pool of completion queue events at
+ * driver unload time. Note that, it is the responsibility of the driver
+ * cleanup routine to free all the outstanding completion-queue events
+ * allocated from this pool back into the pool before invoking this routine
+ * to destroy the pool.
+ **/
+static void
+lpfc_sli4_cq_event_pool_destroy(struct lpfc_hba *phba)
+{
+	struct lpfc_cq_event *cq_event, *next_cq_event;
+
+	list_for_each_entry_safe(cq_event, next_cq_event,
+				 &phba->sli4_hba.sp_cqe_event_pool, list) {
+		list_del(&cq_event->list);
+		kfree(cq_event);
+	}
+}
+
+/**
+ * __lpfc_sli4_cq_event_alloc - Allocate a completion-queue event from free pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is the lock free version of the API invoked to allocate a
+ * completion-queue event from the free pool.
+ *
+ * Return: Pointer to the newly allocated completion-queue event if successful
+ *         NULL otherwise.
+ **/
+struct lpfc_cq_event *
+__lpfc_sli4_cq_event_alloc(struct lpfc_hba *phba)
+{
+	struct lpfc_cq_event *cq_event = NULL;
+
+	list_remove_head(&phba->sli4_hba.sp_cqe_event_pool, cq_event,
+			 struct lpfc_cq_event, list);
+	return cq_event;
+}
+
+/**
+ * lpfc_sli4_cq_event_alloc - Allocate a completion-queue event from free pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is the lock version of the API invoked to allocate a
+ * completion-queue event from the free pool.
+ *
+ * Return: Pointer to the newly allocated completion-queue event if successful
+ *         NULL otherwise.
+ **/
+struct lpfc_cq_event *
+lpfc_sli4_cq_event_alloc(struct lpfc_hba *phba)
+{
+	struct lpfc_cq_event *cq_event;
+	unsigned long iflags;
+
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	cq_event = __lpfc_sli4_cq_event_alloc(phba);
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	return cq_event;
+}
+
+/**
+ * __lpfc_sli4_cq_event_release - Release a completion-queue event to free pool
+ * @phba: pointer to lpfc hba data structure.
+ * @cq_event: pointer to the completion queue event to be freed.
+ *
+ * This routine is the lock free version of the API invoked to release a
+ * completion-queue event back into the free pool.
+ **/
+void
+__lpfc_sli4_cq_event_release(struct lpfc_hba *phba,
+			     struct lpfc_cq_event *cq_event)
+{
+	list_add_tail(&cq_event->list, &phba->sli4_hba.sp_cqe_event_pool);
+}
+
+/**
+ * lpfc_sli4_cq_event_release - Release a completion-queue event to free pool
+ * @phba: pointer to lpfc hba data structure.
+ * @cq_event: pointer to the completion queue event to be freed.
+ *
+ * This routine is the lock version of the API invoked to release a
+ * completion-queue event back into the free pool.
+ **/
+void
+lpfc_sli4_cq_event_release(struct lpfc_hba *phba,
+			   struct lpfc_cq_event *cq_event)
+{
+	unsigned long iflags;
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	__lpfc_sli4_cq_event_release(phba, cq_event);
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+}
+
+/**
+ * lpfc_sli4_cq_event_release_all - Release all cq events to the free pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is to free all the pending completion-queue events to the
+ * back into the free pool for device reset.
+ **/
+static void
+lpfc_sli4_cq_event_release_all(struct lpfc_hba *phba)
+{
+	LIST_HEAD(cqelist);
+	struct lpfc_cq_event *cqe;
+	unsigned long iflags;
+
+	/* Retrieve all the pending WCQEs from pending WCQE lists */
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	/* Pending FCP XRI abort events */
+	list_splice_init(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue,
+			 &cqelist);
+	/* Pending ELS XRI abort events */
+	list_splice_init(&phba->sli4_hba.sp_els_xri_aborted_work_queue,
+			 &cqelist);
+	/* Pending asynnc events */
+	list_splice_init(&phba->sli4_hba.sp_asynce_work_queue,
+			 &cqelist);
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+
+	while (!list_empty(&cqelist)) {
+		list_remove_head(&cqelist, cqe, struct lpfc_cq_event, list);
+		lpfc_sli4_cq_event_release(phba, cqe);
+	}
+}
+
+/**
+ * lpfc_pci_function_reset - Reset pci function.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to request a PCI function reset. It will destroys
+ * all resources assigned to the PCI function which originates this request.
+ *
+ * Return codes
+ *      0 - sucessful
+ *      ENOMEM - No availble memory
+ *      EIO - The mailbox failed to complete successfully.
+ **/
+int
+lpfc_pci_function_reset(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *mboxq;
+	uint32_t rc = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0494 Unable to allocate memory for issuing "
+				"SLI_FUNCTION_RESET mailbox command\n");
+		return -ENOMEM;
+	}
+
+	/* Set up PCI function reset SLI4_CONFIG mailbox-ioctl command */
+	lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_FUNCTION_RESET, 0,
+			 LPFC_SLI4_MBX_EMBED);
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mboxq->u.mqe.un.sli4_config.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mboxq, phba->mbox_mem_pool);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0495 SLI_FUNCTION_RESET mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		rc = -ENXIO;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_sli4_send_nop_mbox_cmds - Send sli-4 nop mailbox commands
+ * @phba: pointer to lpfc hba data structure.
+ * @cnt: number of nop mailbox commands to send.
+ *
+ * This routine is invoked to send a number @cnt of NOP mailbox command and
+ * wait for each command to complete.
+ *
+ * Return: the number of NOP mailbox command completed.
+ **/
+static int
+lpfc_sli4_send_nop_mbox_cmds(struct lpfc_hba *phba, uint32_t cnt)
+{
+	LPFC_MBOXQ_t *mboxq;
+	int length, cmdsent;
+	uint32_t mbox_tmo;
+	uint32_t rc = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	if (cnt == 0) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"2518 Requested to send 0 NOP mailbox cmd\n");
+		return cnt;
+	}
+
+	mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2519 Unable to allocate memory for issuing "
+				"NOP mailbox command\n");
+		return 0;
+	}
+
+	/* Set up NOP SLI4_CONFIG mailbox-ioctl command */
+	length = (sizeof(struct lpfc_mbx_nop) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_NOP, length, LPFC_SLI4_MBX_EMBED);
+
+	mbox_tmo = lpfc_mbox_tmo_val(phba, MBX_SLI4_CONFIG);
+	for (cmdsent = 0; cmdsent < cnt; cmdsent++) {
+		if (!phba->sli4_hba.intr_enable)
+			rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+		else
+			rc = lpfc_sli_issue_mbox_wait(phba, mboxq, mbox_tmo);
+		if (rc == MBX_TIMEOUT)
+			break;
+		/* Check return status */
+		shdr = (union lpfc_sli4_cfg_shdr *)
+			&mboxq->u.mqe.un.sli4_config.header.cfg_shdr;
+		shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+		shdr_add_status = bf_get(lpfc_mbox_hdr_add_status,
+					 &shdr->response);
+		if (shdr_status || shdr_add_status || rc) {
+			lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+					"2520 NOP mailbox command failed "
+					"status x%x add_status x%x mbx "
+					"status x%x\n", shdr_status,
+					shdr_add_status, rc);
+			break;
+		}
+	}
+
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mboxq, phba->mbox_mem_pool);
+
+	return cmdsent;
+}
+
+/**
+ * lpfc_sli4_fcfi_unreg - Unregister fcfi to device
+ * @phba: pointer to lpfc hba data structure.
+ * @fcfi: fcf index.
+ *
+ * This routine is invoked to unregister a FCFI from device.
+ **/
+void
+lpfc_sli4_fcfi_unreg(struct lpfc_hba *phba, uint16_t fcfi)
+{
+	LPFC_MBOXQ_t *mbox;
+	uint32_t mbox_tmo;
+	int rc;
+	unsigned long flags;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+
+	if (!mbox)
+		return;
+
+	lpfc_unreg_fcfi(mbox, fcfi);
+
+	if (!phba->sli4_hba.intr_enable)
+		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	else {
+		mbox_tmo = lpfc_mbox_tmo_val(phba, MBX_SLI4_CONFIG);
+		rc = lpfc_sli_issue_mbox_wait(phba, mbox, mbox_tmo);
+	}
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, phba->mbox_mem_pool);
+	if (rc != MBX_SUCCESS)
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2517 Unregister FCFI command failed "
+				"status %d, mbxStatus x%x\n", rc,
+				bf_get(lpfc_mqe_status, &mbox->u.mqe));
+	else {
+		spin_lock_irqsave(&phba->hbalock, flags);
+		/* Mark the FCFI is no longer registered */
+		phba->fcf.fcf_flag &=
+			~(FCF_AVAILABLE | FCF_REGISTERED | FCF_DISCOVERED);
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+	}
+}
+
+/**
+ * lpfc_sli4_pci_mem_setup - Setup SLI4 HBA PCI memory space.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up the PCI device memory space for device
+ * with SLI-4 interface spec.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ **/
+static int
+lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
+{
+	struct pci_dev *pdev;
+	unsigned long bar0map_len, bar1map_len, bar2map_len;
+	int error = -ENODEV;
+
+	/* Obtain PCI device reference */
+	if (!phba->pcidev)
+		return error;
+	else
+		pdev = phba->pcidev;
+
+	/* Set the device DMA mask size */
+	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) != 0)
+		if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)
+			return error;
+
+	/* Get the bus address of SLI4 device Bar0, Bar1, and Bar2 and the
+	 * number of bytes required by each mapping. They are actually
+	 * mapping to the PCI BAR regions 1, 2, and 4 by the SLI4 device.
+	 */
+	phba->pci_bar0_map = pci_resource_start(pdev, LPFC_SLI4_BAR0);
+	bar0map_len = pci_resource_len(pdev, LPFC_SLI4_BAR0);
+
+	phba->pci_bar1_map = pci_resource_start(pdev, LPFC_SLI4_BAR1);
+	bar1map_len = pci_resource_len(pdev, LPFC_SLI4_BAR1);
+
+	phba->pci_bar2_map = pci_resource_start(pdev, LPFC_SLI4_BAR2);
+	bar2map_len = pci_resource_len(pdev, LPFC_SLI4_BAR2);
+
+	/* Map SLI4 PCI Config Space Register base to a kernel virtual addr */
+	phba->sli4_hba.conf_regs_memmap_p =
+				ioremap(phba->pci_bar0_map, bar0map_len);
+	if (!phba->sli4_hba.conf_regs_memmap_p) {
+		dev_printk(KERN_ERR, &pdev->dev,
+			   "ioremap failed for SLI4 PCI config registers.\n");
+		goto out;
+	}
+
+	/* Map SLI4 HBA Control Register base to a kernel virtual address. */
+	phba->sli4_hba.ctrl_regs_memmap_p =
+				ioremap(phba->pci_bar1_map, bar1map_len);
+	if (!phba->sli4_hba.ctrl_regs_memmap_p) {
+		dev_printk(KERN_ERR, &pdev->dev,
+			   "ioremap failed for SLI4 HBA control registers.\n");
+		goto out_iounmap_conf;
+	}
+
+	/* Map SLI4 HBA Doorbell Register base to a kernel virtual address. */
+	phba->sli4_hba.drbl_regs_memmap_p =
+				ioremap(phba->pci_bar2_map, bar2map_len);
+	if (!phba->sli4_hba.drbl_regs_memmap_p) {
+		dev_printk(KERN_ERR, &pdev->dev,
+			   "ioremap failed for SLI4 HBA doorbell registers.\n");
+		goto out_iounmap_ctrl;
+	}
+
+	/* Set up BAR0 PCI config space register memory map */
+	lpfc_sli4_bar0_register_memmap(phba);
+
+	/* Set up BAR1 register memory map */
+	lpfc_sli4_bar1_register_memmap(phba);
+
+	/* Set up BAR2 register memory map */
+	error = lpfc_sli4_bar2_register_memmap(phba, LPFC_VF0);
+	if (error)
+		goto out_iounmap_all;
+
+	return 0;
+
+out_iounmap_all:
+	iounmap(phba->sli4_hba.drbl_regs_memmap_p);
+out_iounmap_ctrl:
+	iounmap(phba->sli4_hba.ctrl_regs_memmap_p);
+out_iounmap_conf:
+	iounmap(phba->sli4_hba.conf_regs_memmap_p);
+out:
+	return error;
+}
+
+/**
+ * lpfc_sli4_pci_mem_unset - Unset SLI4 HBA PCI memory space.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset the PCI device memory space for device
+ * with SLI-4 interface spec.
+ **/
+static void
+lpfc_sli4_pci_mem_unset(struct lpfc_hba *phba)
+{
+	struct pci_dev *pdev;
+
+	/* Obtain PCI device reference */
+	if (!phba->pcidev)
+		return;
+	else
+		pdev = phba->pcidev;
+
+	/* Free coherent DMA memory allocated */
+
+	/* Unmap I/O memory space */
+	iounmap(phba->sli4_hba.drbl_regs_memmap_p);
+	iounmap(phba->sli4_hba.ctrl_regs_memmap_p);
+	iounmap(phba->sli4_hba.conf_regs_memmap_p);
+
+	return;
+}
+
+/**
+ * lpfc_sli_enable_msix - Enable MSI-X interrupt mode on SLI-3 device
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to enable the MSI-X interrupt vectors to device
+ * with SLI-3 interface specs. The kernel function pci_enable_msix() is
+ * called to enable the MSI-X vectors. Note that pci_enable_msix(), once
+ * invoked, enables either all or nothing, depending on the current
+ * availability of PCI vector resources. The device driver is responsible
+ * for calling the individual request_irq() to register each MSI-X vector
+ * with a interrupt handler, which is done in this function. Note that
  * later when device is unloading, the driver should always call free_irq()
  * on all MSI-X vectors it has done request_irq() on before calling
  * pci_disable_msix(). Failure to do so results in a BUG_ON() and a device
@@ -2333,7 +6029,7 @@
  *   other values - error
  **/
 static int
-lpfc_enable_msix(struct lpfc_hba *phba)
+lpfc_sli_enable_msix(struct lpfc_hba *phba)
 {
 	int rc, i;
 	LPFC_MBOXQ_t *pmb;
@@ -2349,20 +6045,21 @@
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 				"0420 PCI enable MSI-X failed (%d)\n", rc);
 		goto msi_fail_out;
-	} else
-		for (i = 0; i < LPFC_MSIX_VECTORS; i++)
-			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-					"0477 MSI-X entry[%d]: vector=x%x "
-					"message=%d\n", i,
-					phba->msix_entries[i].vector,
-					phba->msix_entries[i].entry);
+	}
+	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0477 MSI-X entry[%d]: vector=x%x "
+				"message=%d\n", i,
+				phba->msix_entries[i].vector,
+				phba->msix_entries[i].entry);
 	/*
 	 * Assign MSI-X vectors to interrupt handlers
 	 */
 
 	/* vector-0 is associated to slow-path handler */
-	rc = request_irq(phba->msix_entries[0].vector, &lpfc_sp_intr_handler,
-			 IRQF_SHARED, LPFC_SP_DRIVER_HANDLER_NAME, phba);
+	rc = request_irq(phba->msix_entries[0].vector,
+			 &lpfc_sli_sp_intr_handler, IRQF_SHARED,
+			 LPFC_SP_DRIVER_HANDLER_NAME, phba);
 	if (rc) {
 		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
 				"0421 MSI-X slow-path request_irq failed "
@@ -2371,8 +6068,9 @@
 	}
 
 	/* vector-1 is associated to fast-path handler */
-	rc = request_irq(phba->msix_entries[1].vector, &lpfc_fp_intr_handler,
-			 IRQF_SHARED, LPFC_FP_DRIVER_HANDLER_NAME, phba);
+	rc = request_irq(phba->msix_entries[1].vector,
+			 &lpfc_sli_fp_intr_handler, IRQF_SHARED,
+			 LPFC_FP_DRIVER_HANDLER_NAME, phba);
 
 	if (rc) {
 		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
@@ -2401,7 +6099,7 @@
 		lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX,
 				"0351 Config MSI mailbox command failed, "
 				"mbxCmd x%x, mbxStatus x%x\n",
-				pmb->mb.mbxCommand, pmb->mb.mbxStatus);
+				pmb->u.mb.mbxCommand, pmb->u.mb.mbxStatus);
 		goto mbx_fail_out;
 	}
 
@@ -2428,14 +6126,14 @@
 }
 
 /**
- * lpfc_disable_msix - Disable MSI-X interrupt mode
+ * lpfc_sli_disable_msix - Disable MSI-X interrupt mode on SLI-3 device.
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine is invoked to release the MSI-X vectors and then disable the
- * MSI-X interrupt mode.
+ * MSI-X interrupt mode to device with SLI-3 interface spec.
  **/
 static void
-lpfc_disable_msix(struct lpfc_hba *phba)
+lpfc_sli_disable_msix(struct lpfc_hba *phba)
 {
 	int i;
 
@@ -2444,23 +6142,26 @@
 		free_irq(phba->msix_entries[i].vector, phba);
 	/* Disable MSI-X */
 	pci_disable_msix(phba->pcidev);
+
+	return;
 }
 
 /**
- * lpfc_enable_msi - Enable MSI interrupt mode
+ * lpfc_sli_enable_msi - Enable MSI interrupt mode on SLI-3 device.
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to enable the MSI interrupt mode. The kernel
- * function pci_enable_msi() is called to enable the MSI vector. The
- * device driver is responsible for calling the request_irq() to register
- * MSI vector with a interrupt the handler, which is done in this function.
+ * This routine is invoked to enable the MSI interrupt mode to device with
+ * SLI-3 interface spec. The kernel function pci_enable_msi() is called to
+ * enable the MSI vector. The device driver is responsible for calling the
+ * request_irq() to register MSI vector with a interrupt the handler, which
+ * is done in this function.
  *
  * Return codes
  * 	0 - sucessful
  * 	other values - error
  */
 static int
-lpfc_enable_msi(struct lpfc_hba *phba)
+lpfc_sli_enable_msi(struct lpfc_hba *phba)
 {
 	int rc;
 
@@ -2474,7 +6175,7 @@
 		return rc;
 	}
 
-	rc = request_irq(phba->pcidev->irq, lpfc_intr_handler,
+	rc = request_irq(phba->pcidev->irq, lpfc_sli_intr_handler,
 			 IRQF_SHARED, LPFC_DRIVER_NAME, phba);
 	if (rc) {
 		pci_disable_msi(phba->pcidev);
@@ -2485,17 +6186,17 @@
 }
 
 /**
- * lpfc_disable_msi - Disable MSI interrupt mode
+ * lpfc_sli_disable_msi - Disable MSI interrupt mode to SLI-3 device.
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to disable the MSI interrupt mode. The driver
- * calls free_irq() on MSI vector it has done request_irq() on before
- * calling pci_disable_msi(). Failure to do so results in a BUG_ON() and
- * a device will be left with MSI enabled and leaks its vector.
+ * This routine is invoked to disable the MSI interrupt mode to device with
+ * SLI-3 interface spec. The driver calls free_irq() on MSI vector it has
+ * done request_irq() on before calling pci_disable_msi(). Failure to do so
+ * results in a BUG_ON() and a device will be left with MSI enabled and leaks
+ * its vector.
  */
-
 static void
-lpfc_disable_msi(struct lpfc_hba *phba)
+lpfc_sli_disable_msi(struct lpfc_hba *phba)
 {
 	free_irq(phba->pcidev->irq, phba);
 	pci_disable_msi(phba->pcidev);
@@ -2503,80 +6204,33 @@
 }
 
 /**
- * lpfc_log_intr_mode - Log the active interrupt mode
- * @phba: pointer to lpfc hba data structure.
- * @intr_mode: active interrupt mode adopted.
- *
- * This routine it invoked to log the currently used active interrupt mode
- * to the device.
- */
-static void
-lpfc_log_intr_mode(struct lpfc_hba *phba, uint32_t intr_mode)
-{
-	switch (intr_mode) {
-	case 0:
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"0470 Enable INTx interrupt mode.\n");
-		break;
-	case 1:
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"0481 Enabled MSI interrupt mode.\n");
-		break;
-	case 2:
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"0480 Enabled MSI-X interrupt mode.\n");
-		break;
-	default:
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0482 Illegal interrupt mode.\n");
-		break;
-	}
-	return;
-}
-
-static void
-lpfc_stop_port(struct lpfc_hba *phba)
-{
-	/* Clear all interrupt enable conditions */
-	writel(0, phba->HCregaddr);
-	readl(phba->HCregaddr); /* flush */
-	/* Clear all pending interrupts */
-	writel(0xffffffff, phba->HAregaddr);
-	readl(phba->HAregaddr); /* flush */
-
-	/* Reset some HBA SLI setup states */
-	lpfc_stop_phba_timers(phba);
-	phba->pport->work_port_events = 0;
-
-	return;
-}
-
-/**
- * lpfc_enable_intr - Enable device interrupt
+ * lpfc_sli_enable_intr - Enable device interrupt to SLI-3 device.
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine is invoked to enable device interrupt and associate driver's
- * interrupt handler(s) to interrupt vector(s). Depends on the interrupt
- * mode configured to the driver, the driver will try to fallback from the
- * configured interrupt mode to an interrupt mode which is supported by the
- * platform, kernel, and device in the order of: MSI-X -> MSI -> IRQ.
+ * interrupt handler(s) to interrupt vector(s) to device with SLI-3 interface
+ * spec. Depends on the interrupt mode configured to the driver, the driver
+ * will try to fallback from the configured interrupt mode to an interrupt
+ * mode which is supported by the platform, kernel, and device in the order
+ * of:
+ * MSI-X -> MSI -> IRQ.
  *
  * Return codes
  *   0 - sucessful
  *   other values - error
  **/
 static uint32_t
-lpfc_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
+lpfc_sli_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
 {
 	uint32_t intr_mode = LPFC_INTR_ERROR;
 	int retval;
 
 	if (cfg_mode == 2) {
 		/* Need to issue conf_port mbox cmd before conf_msi mbox cmd */
-		retval = lpfc_sli_config_port(phba, 3);
+		retval = lpfc_sli_config_port(phba, LPFC_SLI_REV3);
 		if (!retval) {
 			/* Now, try to enable MSI-X interrupt mode */
-			retval = lpfc_enable_msix(phba);
+			retval = lpfc_sli_enable_msix(phba);
 			if (!retval) {
 				/* Indicate initialization to MSI-X mode */
 				phba->intr_type = MSIX;
@@ -2587,7 +6241,7 @@
 
 	/* Fallback to MSI if MSI-X initialization failed */
 	if (cfg_mode >= 1 && phba->intr_type == NONE) {
-		retval = lpfc_enable_msi(phba);
+		retval = lpfc_sli_enable_msi(phba);
 		if (!retval) {
 			/* Indicate initialization to MSI mode */
 			phba->intr_type = MSI;
@@ -2597,7 +6251,7 @@
 
 	/* Fallback to INTx if both MSI-X/MSI initalization failed */
 	if (phba->intr_type == NONE) {
-		retval = request_irq(phba->pcidev->irq, lpfc_intr_handler,
+		retval = request_irq(phba->pcidev->irq, lpfc_sli_intr_handler,
 				     IRQF_SHARED, LPFC_DRIVER_NAME, phba);
 		if (!retval) {
 			/* Indicate initialization to INTx mode */
@@ -2609,22 +6263,22 @@
 }
 
 /**
- * lpfc_disable_intr - Disable device interrupt
+ * lpfc_sli_disable_intr - Disable device interrupt to SLI-3 device.
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine is invoked to disable device interrupt and disassociate the
- * driver's interrupt handler(s) from interrupt vector(s). Depending on the
- * interrupt mode, the driver will release the interrupt vector(s) for the
- * message signaled interrupt.
+ * driver's interrupt handler(s) from interrupt vector(s) to device with
+ * SLI-3 interface spec. Depending on the interrupt mode, the driver will
+ * release the interrupt vector(s) for the message signaled interrupt.
  **/
 static void
-lpfc_disable_intr(struct lpfc_hba *phba)
+lpfc_sli_disable_intr(struct lpfc_hba *phba)
 {
 	/* Disable the currently initialized interrupt mode */
 	if (phba->intr_type == MSIX)
-		lpfc_disable_msix(phba);
+		lpfc_sli_disable_msix(phba);
 	else if (phba->intr_type == MSI)
-		lpfc_disable_msi(phba);
+		lpfc_sli_disable_msi(phba);
 	else if (phba->intr_type == INTx)
 		free_irq(phba->pcidev->irq, phba);
 
@@ -2636,263 +6290,503 @@
 }
 
 /**
- * lpfc_pci_probe_one - lpfc PCI probe func to register device to PCI subsystem
- * @pdev: pointer to PCI device
- * @pid: pointer to PCI device identifier
+ * lpfc_sli4_enable_msix - Enable MSI-X interrupt mode to SLI-4 device
+ * @phba: pointer to lpfc hba data structure.
  *
- * This routine is to be registered to the kernel's PCI subsystem. When an
- * Emulex HBA is presented in PCI bus, the kernel PCI subsystem looks at
- * PCI device-specific information of the device and driver to see if the
- * driver state that it can support this kind of device. If the match is
- * successful, the driver core invokes this routine. If this routine
- * determines it can claim the HBA, it does all the initialization that it
- * needs to do to handle the HBA properly.
+ * This routine is invoked to enable the MSI-X interrupt vectors to device
+ * with SLI-4 interface spec. The kernel function pci_enable_msix() is called
+ * to enable the MSI-X vectors. Note that pci_enable_msix(), once invoked,
+ * enables either all or nothing, depending on the current availability of
+ * PCI vector resources. The device driver is responsible for calling the
+ * individual request_irq() to register each MSI-X vector with a interrupt
+ * handler, which is done in this function. Note that later when device is
+ * unloading, the driver should always call free_irq() on all MSI-X vectors
+ * it has done request_irq() on before calling pci_disable_msix(). Failure
+ * to do so results in a BUG_ON() and a device will be left with MSI-X
+ * enabled and leaks its vectors.
  *
- * Return code
- *   0 - driver can claim the device
- *   negative value - driver can not claim the device
+ * Return codes
+ * 0 - sucessful
+ * other values - error
  **/
-static int __devinit
-lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid)
+static int
+lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 {
-	struct lpfc_vport *vport = NULL;
-	struct lpfc_hba   *phba;
-	struct lpfc_sli   *psli;
-	struct lpfc_iocbq *iocbq_entry = NULL, *iocbq_next = NULL;
-	struct Scsi_Host  *shost = NULL;
-	void *ptr;
-	unsigned long bar0map_len, bar2map_len;
-	int error = -ENODEV, retval;
-	int  i, hbq_count;
-	uint16_t iotag;
-	uint32_t cfg_mode, intr_mode;
-	int bars = pci_select_bars(pdev, IORESOURCE_MEM);
-	struct lpfc_adapter_event_header adapter_event;
+	int rc, index;
 
-	if (pci_enable_device_mem(pdev))
-		goto out;
-	if (pci_request_selected_regions(pdev, bars, LPFC_DRIVER_NAME))
-		goto out_disable_device;
+	/* Set up MSI-X multi-message vectors */
+	for (index = 0; index < phba->sli4_hba.cfg_eqn; index++)
+		phba->sli4_hba.msix_entries[index].entry = index;
 
-	phba = kzalloc(sizeof (struct lpfc_hba), GFP_KERNEL);
-	if (!phba)
-		goto out_release_regions;
-
-	atomic_set(&phba->fast_event_count, 0);
-	spin_lock_init(&phba->hbalock);
-
-	/* Initialize ndlp management spinlock */
-	spin_lock_init(&phba->ndlp_lock);
-
-	phba->pcidev = pdev;
-
-	/* Assign an unused board number */
-	if ((phba->brd_no = lpfc_get_instance()) < 0)
-		goto out_free_phba;
-
-	INIT_LIST_HEAD(&phba->port_list);
-	init_waitqueue_head(&phba->wait_4_mlo_m_q);
+	/* Configure MSI-X capability structure */
+	rc = pci_enable_msix(phba->pcidev, phba->sli4_hba.msix_entries,
+			     phba->sli4_hba.cfg_eqn);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0484 PCI enable MSI-X failed (%d)\n", rc);
+		goto msi_fail_out;
+	}
+	/* Log MSI-X vector assignment */
+	for (index = 0; index < phba->sli4_hba.cfg_eqn; index++)
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0489 MSI-X entry[%d]: vector=x%x "
+				"message=%d\n", index,
+				phba->sli4_hba.msix_entries[index].vector,
+				phba->sli4_hba.msix_entries[index].entry);
 	/*
-	 * Get all the module params for configuring this host and then
-	 * establish the host.
+	 * Assign MSI-X vectors to interrupt handlers
 	 */
-	lpfc_get_cfgparam(phba);
-	phba->max_vpi = LPFC_MAX_VPI;
 
-	/* Initialize timers used by driver */
-	init_timer(&phba->hb_tmofunc);
-	phba->hb_tmofunc.function = lpfc_hb_timeout;
-	phba->hb_tmofunc.data = (unsigned long)phba;
+	/* The first vector must associated to slow-path handler for MQ */
+	rc = request_irq(phba->sli4_hba.msix_entries[0].vector,
+			 &lpfc_sli4_sp_intr_handler, IRQF_SHARED,
+			 LPFC_SP_DRIVER_HANDLER_NAME, phba);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"0485 MSI-X slow-path request_irq failed "
+				"(%d)\n", rc);
+		goto msi_fail_out;
+	}
 
-	psli = &phba->sli;
-	init_timer(&psli->mbox_tmo);
-	psli->mbox_tmo.function = lpfc_mbox_timeout;
-	psli->mbox_tmo.data = (unsigned long) phba;
-	init_timer(&phba->fcp_poll_timer);
-	phba->fcp_poll_timer.function = lpfc_poll_timeout;
-	phba->fcp_poll_timer.data = (unsigned long) phba;
-	init_timer(&phba->fabric_block_timer);
-	phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
-	phba->fabric_block_timer.data = (unsigned long) phba;
-	init_timer(&phba->eratt_poll);
-	phba->eratt_poll.function = lpfc_poll_eratt;
-	phba->eratt_poll.data = (unsigned long) phba;
+	/* The rest of the vector(s) are associated to fast-path handler(s) */
+	for (index = 1; index < phba->sli4_hba.cfg_eqn; index++) {
+		phba->sli4_hba.fcp_eq_hdl[index - 1].idx = index - 1;
+		phba->sli4_hba.fcp_eq_hdl[index - 1].phba = phba;
+		rc = request_irq(phba->sli4_hba.msix_entries[index].vector,
+				 &lpfc_sli4_fp_intr_handler, IRQF_SHARED,
+				 LPFC_FP_DRIVER_HANDLER_NAME,
+				 &phba->sli4_hba.fcp_eq_hdl[index - 1]);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+					"0486 MSI-X fast-path (%d) "
+					"request_irq failed (%d)\n", index, rc);
+			goto cfg_fail_out;
+		}
+	}
 
-	pci_set_master(pdev);
-	pci_save_state(pdev);
-	pci_try_set_mwi(pdev);
+	return rc;
 
-	if (pci_set_dma_mask(phba->pcidev, DMA_BIT_MASK(64)) != 0)
-		if (pci_set_dma_mask(phba->pcidev, DMA_BIT_MASK(32)) != 0)
-			goto out_idr_remove;
+cfg_fail_out:
+	/* free the irq already requested */
+	for (--index; index >= 1; index--)
+		free_irq(phba->sli4_hba.msix_entries[index - 1].vector,
+			 &phba->sli4_hba.fcp_eq_hdl[index - 1]);
+
+	/* free the irq already requested */
+	free_irq(phba->sli4_hba.msix_entries[0].vector, phba);
+
+msi_fail_out:
+	/* Unconfigure MSI-X capability structure */
+	pci_disable_msix(phba->pcidev);
+	return rc;
+}
+
+/**
+ * lpfc_sli4_disable_msix - Disable MSI-X interrupt mode to SLI-4 device
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to release the MSI-X vectors and then disable the
+ * MSI-X interrupt mode to device with SLI-4 interface spec.
+ **/
+static void
+lpfc_sli4_disable_msix(struct lpfc_hba *phba)
+{
+	int index;
+
+	/* Free up MSI-X multi-message vectors */
+	free_irq(phba->sli4_hba.msix_entries[0].vector, phba);
+
+	for (index = 1; index < phba->sli4_hba.cfg_eqn; index++)
+		free_irq(phba->sli4_hba.msix_entries[index].vector,
+			 &phba->sli4_hba.fcp_eq_hdl[index - 1]);
+	/* Disable MSI-X */
+	pci_disable_msix(phba->pcidev);
+
+	return;
+}
+
+/**
+ * lpfc_sli4_enable_msi - Enable MSI interrupt mode to SLI-4 device
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to enable the MSI interrupt mode to device with
+ * SLI-4 interface spec. The kernel function pci_enable_msi() is called
+ * to enable the MSI vector. The device driver is responsible for calling
+ * the request_irq() to register MSI vector with a interrupt the handler,
+ * which is done in this function.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ **/
+static int
+lpfc_sli4_enable_msi(struct lpfc_hba *phba)
+{
+	int rc, index;
+
+	rc = pci_enable_msi(phba->pcidev);
+	if (!rc)
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0487 PCI enable MSI mode success.\n");
+	else {
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0488 PCI enable MSI mode failed (%d)\n", rc);
+		return rc;
+	}
+
+	rc = request_irq(phba->pcidev->irq, lpfc_sli4_intr_handler,
+			 IRQF_SHARED, LPFC_DRIVER_NAME, phba);
+	if (rc) {
+		pci_disable_msi(phba->pcidev);
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"0490 MSI request_irq failed (%d)\n", rc);
+	}
+
+	for (index = 0; index < phba->cfg_fcp_eq_count; index++) {
+		phba->sli4_hba.fcp_eq_hdl[index].idx = index;
+		phba->sli4_hba.fcp_eq_hdl[index].phba = phba;
+	}
+
+	return rc;
+}
+
+/**
+ * lpfc_sli4_disable_msi - Disable MSI interrupt mode to SLI-4 device
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to disable the MSI interrupt mode to device with
+ * SLI-4 interface spec. The driver calls free_irq() on MSI vector it has
+ * done request_irq() on before calling pci_disable_msi(). Failure to do so
+ * results in a BUG_ON() and a device will be left with MSI enabled and leaks
+ * its vector.
+ **/
+static void
+lpfc_sli4_disable_msi(struct lpfc_hba *phba)
+{
+	free_irq(phba->pcidev->irq, phba);
+	pci_disable_msi(phba->pcidev);
+	return;
+}
+
+/**
+ * lpfc_sli4_enable_intr - Enable device interrupt to SLI-4 device
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to enable device interrupt and associate driver's
+ * interrupt handler(s) to interrupt vector(s) to device with SLI-4
+ * interface spec. Depends on the interrupt mode configured to the driver,
+ * the driver will try to fallback from the configured interrupt mode to an
+ * interrupt mode which is supported by the platform, kernel, and device in
+ * the order of:
+ * MSI-X -> MSI -> IRQ.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ **/
+static uint32_t
+lpfc_sli4_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
+{
+	uint32_t intr_mode = LPFC_INTR_ERROR;
+	int retval, index;
+
+	if (cfg_mode == 2) {
+		/* Preparation before conf_msi mbox cmd */
+		retval = 0;
+		if (!retval) {
+			/* Now, try to enable MSI-X interrupt mode */
+			retval = lpfc_sli4_enable_msix(phba);
+			if (!retval) {
+				/* Indicate initialization to MSI-X mode */
+				phba->intr_type = MSIX;
+				intr_mode = 2;
+			}
+		}
+	}
+
+	/* Fallback to MSI if MSI-X initialization failed */
+	if (cfg_mode >= 1 && phba->intr_type == NONE) {
+		retval = lpfc_sli4_enable_msi(phba);
+		if (!retval) {
+			/* Indicate initialization to MSI mode */
+			phba->intr_type = MSI;
+			intr_mode = 1;
+		}
+	}
+
+	/* Fallback to INTx if both MSI-X/MSI initalization failed */
+	if (phba->intr_type == NONE) {
+		retval = request_irq(phba->pcidev->irq, lpfc_sli4_intr_handler,
+				     IRQF_SHARED, LPFC_DRIVER_NAME, phba);
+		if (!retval) {
+			/* Indicate initialization to INTx mode */
+			phba->intr_type = INTx;
+			intr_mode = 0;
+			for (index = 0; index < phba->cfg_fcp_eq_count;
+			     index++) {
+				phba->sli4_hba.fcp_eq_hdl[index].idx = index;
+				phba->sli4_hba.fcp_eq_hdl[index].phba = phba;
+			}
+		}
+	}
+	return intr_mode;
+}
+
+/**
+ * lpfc_sli4_disable_intr - Disable device interrupt to SLI-4 device
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to disable device interrupt and disassociate
+ * the driver's interrupt handler(s) from interrupt vector(s) to device
+ * with SLI-4 interface spec. Depending on the interrupt mode, the driver
+ * will release the interrupt vector(s) for the message signaled interrupt.
+ **/
+static void
+lpfc_sli4_disable_intr(struct lpfc_hba *phba)
+{
+	/* Disable the currently initialized interrupt mode */
+	if (phba->intr_type == MSIX)
+		lpfc_sli4_disable_msix(phba);
+	else if (phba->intr_type == MSI)
+		lpfc_sli4_disable_msi(phba);
+	else if (phba->intr_type == INTx)
+		free_irq(phba->pcidev->irq, phba);
+
+	/* Reset interrupt management states */
+	phba->intr_type = NONE;
+	phba->sli.slistat.sli_intr = 0;
+
+	return;
+}
+
+/**
+ * lpfc_unset_hba - Unset SLI3 hba device initialization
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset the HBA device initialization steps to
+ * a device with SLI-3 interface spec.
+ **/
+static void
+lpfc_unset_hba(struct lpfc_hba *phba)
+{
+	struct lpfc_vport *vport = phba->pport;
+	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+
+	spin_lock_irq(shost->host_lock);
+	vport->load_flag |= FC_UNLOADING;
+	spin_unlock_irq(shost->host_lock);
+
+	lpfc_stop_hba_timers(phba);
+
+	phba->pport->work_port_events = 0;
+
+	lpfc_sli_hba_down(phba);
+
+	lpfc_sli_brdrestart(phba);
+
+	lpfc_sli_disable_intr(phba);
+
+	return;
+}
+
+/**
+ * lpfc_sli4_unset_hba - Unset SLI4 hba device initialization.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset the HBA device initialization steps to
+ * a device with SLI-4 interface spec.
+ **/
+static void
+lpfc_sli4_unset_hba(struct lpfc_hba *phba)
+{
+	struct lpfc_vport *vport = phba->pport;
+	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+
+	spin_lock_irq(shost->host_lock);
+	vport->load_flag |= FC_UNLOADING;
+	spin_unlock_irq(shost->host_lock);
+
+	phba->pport->work_port_events = 0;
+
+	lpfc_sli4_hba_down(phba);
+
+	lpfc_sli4_disable_intr(phba);
+
+	return;
+}
+
+/**
+ * lpfc_sli4_hba_unset - Unset the fcoe hba
+ * @phba: Pointer to HBA context object.
+ *
+ * This function is called in the SLI4 code path to reset the HBA's FCoE
+ * function. The caller is not required to hold any lock. This routine
+ * issues PCI function reset mailbox command to reset the FCoE function.
+ * At the end of the function, it calls lpfc_hba_down_post function to
+ * free any pending commands.
+ **/
+static void
+lpfc_sli4_hba_unset(struct lpfc_hba *phba)
+{
+	int wait_cnt = 0;
+	LPFC_MBOXQ_t *mboxq;
+
+	lpfc_stop_hba_timers(phba);
+	phba->sli4_hba.intr_enable = 0;
 
 	/*
-	 * Get the bus address of Bar0 and Bar2 and the number of bytes
-	 * required by each mapping.
+	 * Gracefully wait out the potential current outstanding asynchronous
+	 * mailbox command.
 	 */
-	phba->pci_bar0_map = pci_resource_start(phba->pcidev, 0);
-	bar0map_len        = pci_resource_len(phba->pcidev, 0);
 
-	phba->pci_bar2_map = pci_resource_start(phba->pcidev, 2);
-	bar2map_len        = pci_resource_len(phba->pcidev, 2);
-
-	/* Map HBA SLIM to a kernel virtual address. */
-	phba->slim_memmap_p = ioremap(phba->pci_bar0_map, bar0map_len);
-	if (!phba->slim_memmap_p) {
-		error = -ENODEV;
-		dev_printk(KERN_ERR, &pdev->dev,
-			   "ioremap failed for SLIM memory.\n");
-		goto out_idr_remove;
+	/* First, block any pending async mailbox command from posted */
+	spin_lock_irq(&phba->hbalock);
+	phba->sli.sli_flag |= LPFC_SLI_ASYNC_MBX_BLK;
+	spin_unlock_irq(&phba->hbalock);
+	/* Now, trying to wait it out if we can */
+	while (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE) {
+		msleep(10);
+		if (++wait_cnt > LPFC_ACTIVE_MBOX_WAIT_CNT)
+			break;
 	}
-
-	/* Map HBA Control Registers to a kernel virtual address. */
-	phba->ctrl_regs_memmap_p = ioremap(phba->pci_bar2_map, bar2map_len);
-	if (!phba->ctrl_regs_memmap_p) {
-		error = -ENODEV;
-		dev_printk(KERN_ERR, &pdev->dev,
-			   "ioremap failed for HBA control registers.\n");
-		goto out_iounmap_slim;
-	}
-
-	/* Allocate memory for SLI-2 structures */
-	phba->slim2p.virt = dma_alloc_coherent(&phba->pcidev->dev,
-					       SLI2_SLIM_SIZE,
-					       &phba->slim2p.phys,
-					       GFP_KERNEL);
-	if (!phba->slim2p.virt)
-		goto out_iounmap;
-
-	memset(phba->slim2p.virt, 0, SLI2_SLIM_SIZE);
-	phba->mbox = phba->slim2p.virt + offsetof(struct lpfc_sli2_slim, mbx);
-	phba->pcb = (phba->slim2p.virt + offsetof(struct lpfc_sli2_slim, pcb));
-	phba->IOCBs = (phba->slim2p.virt +
-		       offsetof(struct lpfc_sli2_slim, IOCBs));
-
-	phba->hbqslimp.virt = dma_alloc_coherent(&phba->pcidev->dev,
-						 lpfc_sli_hbq_size(),
-						 &phba->hbqslimp.phys,
-						 GFP_KERNEL);
-	if (!phba->hbqslimp.virt)
-		goto out_free_slim;
-
-	hbq_count = lpfc_sli_hbq_count();
-	ptr = phba->hbqslimp.virt;
-	for (i = 0; i < hbq_count; ++i) {
-		phba->hbqs[i].hbq_virt = ptr;
-		INIT_LIST_HEAD(&phba->hbqs[i].hbq_buffer_list);
-		ptr += (lpfc_hbq_defs[i]->entry_count *
-			sizeof(struct lpfc_hbq_entry));
-	}
-	phba->hbqs[LPFC_ELS_HBQ].hbq_alloc_buffer = lpfc_els_hbq_alloc;
-	phba->hbqs[LPFC_ELS_HBQ].hbq_free_buffer  = lpfc_els_hbq_free;
-
-	memset(phba->hbqslimp.virt, 0, lpfc_sli_hbq_size());
-
-	INIT_LIST_HEAD(&phba->hbqbuf_in_list);
-
-	/* Initialize the SLI Layer to run with lpfc HBAs. */
-	lpfc_sli_setup(phba);
-	lpfc_sli_queue_setup(phba);
-
-	retval = lpfc_mem_alloc(phba);
-	if (retval) {
-		error = retval;
-		goto out_free_hbqslimp;
-	}
-
-	/* Initialize and populate the iocb list per host.  */
-	INIT_LIST_HEAD(&phba->lpfc_iocb_list);
-	for (i = 0; i < LPFC_IOCB_LIST_CNT; i++) {
-		iocbq_entry = kzalloc(sizeof(struct lpfc_iocbq), GFP_KERNEL);
-		if (iocbq_entry == NULL) {
-			printk(KERN_ERR "%s: only allocated %d iocbs of "
-				"expected %d count. Unloading driver.\n",
-				__func__, i, LPFC_IOCB_LIST_CNT);
-			error = -ENOMEM;
-			goto out_free_iocbq;
-		}
-
-		iotag = lpfc_sli_next_iotag(phba, iocbq_entry);
-		if (iotag == 0) {
-			kfree (iocbq_entry);
-			printk(KERN_ERR "%s: failed to allocate IOTAG. "
-			       "Unloading driver.\n",
-				__func__);
-			error = -ENOMEM;
-			goto out_free_iocbq;
-		}
-
+	/* Forcefully release the outstanding mailbox command if timed out */
+	if (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE) {
 		spin_lock_irq(&phba->hbalock);
-		list_add(&iocbq_entry->list, &phba->lpfc_iocb_list);
-		phba->total_iocbq_bufs++;
+		mboxq = phba->sli.mbox_active;
+		mboxq->u.mb.mbxStatus = MBX_NOT_FINISHED;
+		__lpfc_mbox_cmpl_put(phba, mboxq);
+		phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+		phba->sli.mbox_active = NULL;
 		spin_unlock_irq(&phba->hbalock);
 	}
 
-	/* Initialize HBA structure */
-	phba->fc_edtov = FF_DEF_EDTOV;
-	phba->fc_ratov = FF_DEF_RATOV;
-	phba->fc_altov = FF_DEF_ALTOV;
-	phba->fc_arbtov = FF_DEF_ARBTOV;
+	/* Tear down the queues in the HBA */
+	lpfc_sli4_queue_unset(phba);
 
-	INIT_LIST_HEAD(&phba->work_list);
-	phba->work_ha_mask = (HA_ERATT | HA_MBATT | HA_LATT);
-	phba->work_ha_mask |= (HA_RXMASK << (LPFC_ELS_RING * 4));
+	/* Disable PCI subsystem interrupt */
+	lpfc_sli4_disable_intr(phba);
 
-	/* Initialize the wait queue head for the kernel thread */
-	init_waitqueue_head(&phba->work_waitq);
+	/* Stop kthread signal shall trigger work_done one more time */
+	kthread_stop(phba->worker_thread);
 
-	/* Startup the kernel thread for this host adapter. */
-	phba->worker_thread = kthread_run(lpfc_do_work, phba,
-				       "lpfc_worker_%d", phba->brd_no);
-	if (IS_ERR(phba->worker_thread)) {
-		error = PTR_ERR(phba->worker_thread);
-		goto out_free_iocbq;
+	/* Stop the SLI4 device port */
+	phba->pport->work_port_events = 0;
+}
+
+/**
+ * lpfc_pci_probe_one_s3 - PCI probe func to reg SLI-3 device to PCI subsystem.
+ * @pdev: pointer to PCI device
+ * @pid: pointer to PCI device identifier
+ *
+ * This routine is to be called to attach a device with SLI-3 interface spec
+ * to the PCI subsystem. When an Emulex HBA with SLI-3 interface spec is
+ * presented on PCI bus, the kernel PCI subsystem looks at PCI device-specific
+ * information of the device and driver to see if the driver state that it can
+ * support this kind of device. If the match is successful, the driver core
+ * invokes this routine. If this routine determines it can claim the HBA, it
+ * does all the initialization that it needs to do to handle the HBA properly.
+ *
+ * Return code
+ * 	0 - driver can claim the device
+ * 	negative value - driver can not claim the device
+ **/
+static int __devinit
+lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct pci_device_id *pid)
+{
+	struct lpfc_hba   *phba;
+	struct lpfc_vport *vport = NULL;
+	int error;
+	uint32_t cfg_mode, intr_mode;
+
+	/* Allocate memory for HBA structure */
+	phba = lpfc_hba_alloc(pdev);
+	if (!phba)
+		return -ENOMEM;
+
+	/* Perform generic PCI device enabling operation */
+	error = lpfc_enable_pci_dev(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1401 Failed to enable pci device.\n");
+		goto out_free_phba;
 	}
 
-	/* Initialize the list of scsi buffers used by driver for scsi IO. */
-	spin_lock_init(&phba->scsi_buf_list_lock);
-	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list);
+	/* Set up SLI API function jump table for PCI-device group-0 HBAs */
+	error = lpfc_api_table_setup(phba, LPFC_PCI_DEV_LP);
+	if (error)
+		goto out_disable_pci_dev;
 
-	/* Initialize list of fabric iocbs */
-	INIT_LIST_HEAD(&phba->fabric_iocb_list);
+	/* Set up SLI-3 specific device PCI memory space */
+	error = lpfc_sli_pci_mem_setup(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1402 Failed to set up pci memory space.\n");
+		goto out_disable_pci_dev;
+	}
 
-	/* Initialize list to save ELS buffers */
-	INIT_LIST_HEAD(&phba->elsbuf);
+	/* Set up phase-1 common device driver resources */
+	error = lpfc_setup_driver_resource_phase1(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1403 Failed to set up driver resource.\n");
+		goto out_unset_pci_mem_s3;
+	}
 
-	vport = lpfc_create_port(phba, phba->brd_no, &phba->pcidev->dev);
-	if (!vport)
-		goto out_kthread_stop;
+	/* Set up SLI-3 specific device driver resources */
+	error = lpfc_sli_driver_resource_setup(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1404 Failed to set up driver resource.\n");
+		goto out_unset_pci_mem_s3;
+	}
 
-	shost = lpfc_shost_from_vport(vport);
-	phba->pport = vport;
-	lpfc_debugfs_initialize(vport);
+	/* Initialize and populate the iocb list per host */
+	error = lpfc_init_iocb_list(phba, LPFC_IOCB_LIST_CNT);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1405 Failed to initialize iocb list.\n");
+		goto out_unset_driver_resource_s3;
+	}
 
-	pci_set_drvdata(pdev, shost);
+	/* Set up common device driver resources */
+	error = lpfc_setup_driver_resource_phase2(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1406 Failed to set up driver resource.\n");
+		goto out_free_iocb_list;
+	}
 
-	phba->MBslimaddr = phba->slim_memmap_p;
-	phba->HAregaddr = phba->ctrl_regs_memmap_p + HA_REG_OFFSET;
-	phba->CAregaddr = phba->ctrl_regs_memmap_p + CA_REG_OFFSET;
-	phba->HSregaddr = phba->ctrl_regs_memmap_p + HS_REG_OFFSET;
-	phba->HCregaddr = phba->ctrl_regs_memmap_p + HC_REG_OFFSET;
+	/* Create SCSI host to the physical port */
+	error = lpfc_create_shost(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1407 Failed to create scsi host.\n");
+		goto out_unset_driver_resource;
+	}
 
 	/* Configure sysfs attributes */
-	if (lpfc_alloc_sysfs_attr(vport)) {
+	vport = phba->pport;
+	error = lpfc_alloc_sysfs_attr(vport);
+	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"1476 Failed to allocate sysfs attr\n");
-		error = -ENOMEM;
-		goto out_destroy_port;
+		goto out_destroy_shost;
 	}
 
+	/* Now, trying to enable interrupt and bring up the device */
 	cfg_mode = phba->cfg_use_msi;
 	while (true) {
+		/* Put device to a known state before enabling interrupt */
+		lpfc_stop_port(phba);
 		/* Configure and enable interrupt */
-		intr_mode = lpfc_enable_intr(phba, cfg_mode);
+		intr_mode = lpfc_sli_enable_intr(phba, cfg_mode);
 		if (intr_mode == LPFC_INTR_ERROR) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0426 Failed to enable interrupt.\n");
+					"0431 Failed to enable interrupt.\n");
+			error = -ENODEV;
 			goto out_free_sysfs_attr;
 		}
-		/* HBA SLI setup */
+		/* SLI-3 HBA setup */
 		if (lpfc_sli_hba_setup(phba)) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"1477 Failed to set up hba\n");
@@ -2902,185 +6796,65 @@
 
 		/* Wait 50ms for the interrupts of previous mailbox commands */
 		msleep(50);
-		/* Check active interrupts received */
-		if (phba->sli.slistat.sli_intr > LPFC_MSIX_VECTORS) {
+		/* Check active interrupts on message signaled interrupts */
+		if (intr_mode == 0 ||
+		    phba->sli.slistat.sli_intr > LPFC_MSIX_VECTORS) {
 			/* Log the current active interrupt mode */
 			phba->intr_mode = intr_mode;
 			lpfc_log_intr_mode(phba, intr_mode);
 			break;
 		} else {
 			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-					"0451 Configure interrupt mode (%d) "
+					"0447 Configure interrupt mode (%d) "
 					"failed active interrupt test.\n",
 					intr_mode);
-			if (intr_mode == 0) {
-				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-						"0479 Failed to enable "
-						"interrupt.\n");
-				error = -ENODEV;
-				goto out_remove_device;
-			}
-			/* Stop HBA SLI setups */
-			lpfc_stop_port(phba);
 			/* Disable the current interrupt mode */
-			lpfc_disable_intr(phba);
+			lpfc_sli_disable_intr(phba);
 			/* Try next level of interrupt mode */
 			cfg_mode = --intr_mode;
 		}
 	}
 
-	/*
-	 * hba setup may have changed the hba_queue_depth so we need to adjust
-	 * the value of can_queue.
-	 */
-	shost->can_queue = phba->cfg_hba_queue_depth - 10;
-	if (phba->sli3_options & LPFC_SLI3_BG_ENABLED) {
+	/* Perform post initialization setup */
+	lpfc_post_init_setup(phba);
 
-		if (lpfc_prot_mask && lpfc_prot_guard) {
-			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-					"1478 Registering BlockGuard with the "
-					"SCSI layer\n");
-
-			scsi_host_set_prot(shost, lpfc_prot_mask);
-			scsi_host_set_guard(shost, lpfc_prot_guard);
-		}
-	}
-
-	if (!_dump_buf_data) {
-		int pagecnt = 10;
-		while (pagecnt) {
-			spin_lock_init(&_dump_buf_lock);
-			_dump_buf_data =
-				(char *) __get_free_pages(GFP_KERNEL, pagecnt);
-			if (_dump_buf_data) {
-				printk(KERN_ERR "BLKGRD allocated %d pages for "
-						"_dump_buf_data at 0x%p\n",
-						(1 << pagecnt), _dump_buf_data);
-				_dump_buf_data_order = pagecnt;
-				memset(_dump_buf_data, 0, ((1 << PAGE_SHIFT)
-							   << pagecnt));
-				break;
-			} else {
-				--pagecnt;
-			}
-
-		}
-
-		if (!_dump_buf_data_order)
-			printk(KERN_ERR "BLKGRD ERROR unable to allocate "
-					"memory for hexdump\n");
-
-	} else {
-		printk(KERN_ERR "BLKGRD already allocated _dump_buf_data=0x%p"
-		       "\n", _dump_buf_data);
-	}
-
-
-	if (!_dump_buf_dif) {
-		int pagecnt = 10;
-		while (pagecnt) {
-			_dump_buf_dif =
-				(char *) __get_free_pages(GFP_KERNEL, pagecnt);
-			if (_dump_buf_dif) {
-				printk(KERN_ERR "BLKGRD allocated %d pages for "
-						"_dump_buf_dif at 0x%p\n",
-						(1 << pagecnt), _dump_buf_dif);
-				_dump_buf_dif_order = pagecnt;
-				memset(_dump_buf_dif, 0, ((1 << PAGE_SHIFT)
-							  << pagecnt));
-				break;
-			} else {
-				--pagecnt;
-			}
-
-		}
-
-		if (!_dump_buf_dif_order)
-			printk(KERN_ERR "BLKGRD ERROR unable to allocate "
-					"memory for hexdump\n");
-
-	} else {
-		printk(KERN_ERR "BLKGRD already allocated _dump_buf_dif=0x%p\n",
-				_dump_buf_dif);
-	}
-
-	lpfc_host_attrib_init(shost);
-
-	if (phba->cfg_poll & DISABLE_FCP_RING_INT) {
-		spin_lock_irq(shost->host_lock);
-		lpfc_poll_start_timer(phba);
-		spin_unlock_irq(shost->host_lock);
-	}
-
-	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"0428 Perform SCSI scan\n");
-	/* Send board arrival event to upper layer */
-	adapter_event.event_type = FC_REG_ADAPTER_EVENT;
-	adapter_event.subcategory = LPFC_EVENT_ARRIVAL;
-	fc_host_post_vendor_event(shost, fc_get_event_number(),
-		sizeof(adapter_event),
-		(char *) &adapter_event,
-		LPFC_NL_VENDOR_ID);
+	/* Check if there are static vports to be created. */
+	lpfc_create_static_vport(phba);
 
 	return 0;
 
 out_remove_device:
-	spin_lock_irq(shost->host_lock);
-	vport->load_flag |= FC_UNLOADING;
-	spin_unlock_irq(shost->host_lock);
-	lpfc_stop_phba_timers(phba);
-	phba->pport->work_port_events = 0;
-	lpfc_disable_intr(phba);
-	lpfc_sli_hba_down(phba);
-	lpfc_sli_brdrestart(phba);
+	lpfc_unset_hba(phba);
 out_free_sysfs_attr:
 	lpfc_free_sysfs_attr(vport);
-out_destroy_port:
-	destroy_port(vport);
-out_kthread_stop:
-	kthread_stop(phba->worker_thread);
-out_free_iocbq:
-	list_for_each_entry_safe(iocbq_entry, iocbq_next,
-						&phba->lpfc_iocb_list, list) {
-		kfree(iocbq_entry);
-		phba->total_iocbq_bufs--;
-	}
-	lpfc_mem_free(phba);
-out_free_hbqslimp:
-	dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(),
-			  phba->hbqslimp.virt, phba->hbqslimp.phys);
-out_free_slim:
-	dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE,
-			  phba->slim2p.virt, phba->slim2p.phys);
-out_iounmap:
-	iounmap(phba->ctrl_regs_memmap_p);
-out_iounmap_slim:
-	iounmap(phba->slim_memmap_p);
-out_idr_remove:
-	idr_remove(&lpfc_hba_index, phba->brd_no);
+out_destroy_shost:
+	lpfc_destroy_shost(phba);
+out_unset_driver_resource:
+	lpfc_unset_driver_resource_phase2(phba);
+out_free_iocb_list:
+	lpfc_free_iocb_list(phba);
+out_unset_driver_resource_s3:
+	lpfc_sli_driver_resource_unset(phba);
+out_unset_pci_mem_s3:
+	lpfc_sli_pci_mem_unset(phba);
+out_disable_pci_dev:
+	lpfc_disable_pci_dev(phba);
 out_free_phba:
-	kfree(phba);
-out_release_regions:
-	pci_release_selected_regions(pdev, bars);
-out_disable_device:
-	pci_disable_device(pdev);
-out:
-	pci_set_drvdata(pdev, NULL);
-	if (shost)
-		scsi_host_put(shost);
+	lpfc_hba_free(phba);
 	return error;
 }
 
 /**
- * lpfc_pci_remove_one - lpfc PCI func to unregister device from PCI subsystem
+ * lpfc_pci_remove_one_s3 - PCI func to unreg SLI-3 device from PCI subsystem.
  * @pdev: pointer to PCI device
  *
- * This routine is to be registered to the kernel's PCI subsystem. When an
- * Emulex HBA is removed from PCI bus, it performs all the necessary cleanup
- * for the HBA device to be removed from the PCI subsystem properly.
+ * This routine is to be called to disattach a device with SLI-3 interface
+ * spec from PCI subsystem. When an Emulex HBA with SLI-3 interface spec is
+ * removed from PCI bus, it performs all the necessary cleanup for the HBA
+ * device to be removed from the PCI subsystem properly.
  **/
 static void __devexit
-lpfc_pci_remove_one(struct pci_dev *pdev)
+lpfc_pci_remove_one_s3(struct pci_dev *pdev)
 {
 	struct Scsi_Host  *shost = pci_get_drvdata(pdev);
 	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
@@ -3098,7 +6872,7 @@
 	/* Release all the vports against this physical port */
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for (i = 1; i <= phba->max_vpi && vports[i] != NULL; i++)
+		for (i = 1; i <= phba->max_vports && vports[i] != NULL; i++)
 			fc_vport_terminate(vports[i]->fc_vport);
 	lpfc_destroy_vport_work_array(phba, vports);
 
@@ -3120,7 +6894,7 @@
 	/* Final cleanup of txcmplq and reset the HBA */
 	lpfc_sli_brdrestart(phba);
 
-	lpfc_stop_phba_timers(phba);
+	lpfc_stop_hba_timers(phba);
 	spin_lock_irq(&phba->hbalock);
 	list_del_init(&vport->listentry);
 	spin_unlock_irq(&phba->hbalock);
@@ -3128,7 +6902,7 @@
 	lpfc_debugfs_terminate(vport);
 
 	/* Disable interrupt */
-	lpfc_disable_intr(phba);
+	lpfc_sli_disable_intr(phba);
 
 	pci_set_drvdata(pdev, NULL);
 	scsi_host_put(shost);
@@ -3138,7 +6912,7 @@
 	 * corresponding pools here.
 	 */
 	lpfc_scsi_free(phba);
-	lpfc_mem_free(phba);
+	lpfc_mem_free_all(phba);
 
 	dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(),
 			  phba->hbqslimp.virt, phba->hbqslimp.phys);
@@ -3151,36 +6925,35 @@
 	iounmap(phba->ctrl_regs_memmap_p);
 	iounmap(phba->slim_memmap_p);
 
-	idr_remove(&lpfc_hba_index, phba->brd_no);
-
-	kfree(phba);
+	lpfc_hba_free(phba);
 
 	pci_release_selected_regions(pdev, bars);
 	pci_disable_device(pdev);
 }
 
 /**
- * lpfc_pci_suspend_one - lpfc PCI func to suspend device for power management
+ * lpfc_pci_suspend_one_s3 - PCI func to suspend SLI-3 device for power mgmnt
  * @pdev: pointer to PCI device
  * @msg: power management message
  *
- * This routine is to be registered to the kernel's PCI subsystem to support
- * system Power Management (PM). When PM invokes this method, it quiesces the
- * device by stopping the driver's worker thread for the device, turning off
- * device's interrupt and DMA, and bring the device offline. Note that as the
- * driver implements the minimum PM requirements to a power-aware driver's PM
- * support for suspend/resume -- all the possible PM messages (SUSPEND,
- * HIBERNATE, FREEZE) to the suspend() method call will be treated as SUSPEND
- * and the driver will fully reinitialize its device during resume() method
- * call, the driver will set device to PCI_D3hot state in PCI config space
- * instead of setting it according to the @msg provided by the PM.
+ * This routine is to be called from the kernel's PCI subsystem to support
+ * system Power Management (PM) to device with SLI-3 interface spec. When
+ * PM invokes this method, it quiesces the device by stopping the driver's
+ * worker thread for the device, turning off device's interrupt and DMA,
+ * and bring the device offline. Note that as the driver implements the
+ * minimum PM requirements to a power-aware driver's PM support for the
+ * suspend/resume -- all the possible PM messages (SUSPEND, HIBERNATE, FREEZE)
+ * to the suspend() method call will be treated as SUSPEND and the driver will
+ * fully reinitialize its device during resume() method call, the driver will
+ * set device to PCI_D3hot state in PCI config space instead of setting it
+ * according to the @msg provided by the PM.
  *
  * Return code
- *   0 - driver suspended the device
- *   Error otherwise
+ * 	0 - driver suspended the device
+ * 	Error otherwise
  **/
 static int
-lpfc_pci_suspend_one(struct pci_dev *pdev, pm_message_t msg)
+lpfc_pci_suspend_one_s3(struct pci_dev *pdev, pm_message_t msg)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
@@ -3194,7 +6967,7 @@
 	kthread_stop(phba->worker_thread);
 
 	/* Disable interrupt from device */
-	lpfc_disable_intr(phba);
+	lpfc_sli_disable_intr(phba);
 
 	/* Save device state to PCI config space */
 	pci_save_state(pdev);
@@ -3204,25 +6977,26 @@
 }
 
 /**
- * lpfc_pci_resume_one - lpfc PCI func to resume device for power management
+ * lpfc_pci_resume_one_s3 - PCI func to resume SLI-3 device for power mgmnt
  * @pdev: pointer to PCI device
  *
- * This routine is to be registered to the kernel's PCI subsystem to support
- * system Power Management (PM). When PM invokes this method, it restores
- * the device's PCI config space state and fully reinitializes the device
- * and brings it online. Note that as the driver implements the minimum PM
- * requirements to a power-aware driver's PM for suspend/resume -- all
- * the possible PM messages (SUSPEND, HIBERNATE, FREEZE) to the suspend()
- * method call will be treated as SUSPEND and the driver will fully
- * reinitialize its device during resume() method call, the device will be
- * set to PCI_D0 directly in PCI config space before restoring the state.
+ * This routine is to be called from the kernel's PCI subsystem to support
+ * system Power Management (PM) to device with SLI-3 interface spec. When PM
+ * invokes this method, it restores the device's PCI config space state and
+ * fully reinitializes the device and brings it online. Note that as the
+ * driver implements the minimum PM requirements to a power-aware driver's
+ * PM for suspend/resume -- all the possible PM messages (SUSPEND, HIBERNATE,
+ * FREEZE) to the suspend() method call will be treated as SUSPEND and the
+ * driver will fully reinitialize its device during resume() method call,
+ * the device will be set to PCI_D0 directly in PCI config space before
+ * restoring the state.
  *
  * Return code
- *   0 - driver suspended the device
- *   Error otherwise
+ * 	0 - driver suspended the device
+ * 	Error otherwise
  **/
 static int
-lpfc_pci_resume_one(struct pci_dev *pdev)
+lpfc_pci_resume_one_s3(struct pci_dev *pdev)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
@@ -3250,7 +7024,7 @@
 	}
 
 	/* Configure and enable interrupt */
-	intr_mode = lpfc_enable_intr(phba, phba->intr_mode);
+	intr_mode = lpfc_sli_enable_intr(phba, phba->intr_mode);
 	if (intr_mode == LPFC_INTR_ERROR) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0430 PM resume Failed to enable interrupt\n");
@@ -3269,23 +7043,24 @@
 }
 
 /**
- * lpfc_io_error_detected - Driver method for handling PCI I/O error detected
+ * lpfc_io_error_detected_s3 - Method for handling SLI-3 device PCI I/O error
  * @pdev: pointer to PCI device.
  * @state: the current PCI connection state.
  *
- * This routine is registered to the PCI subsystem for error handling. This
- * function is called by the PCI subsystem after a PCI bus error affecting
- * this device has been detected. When this function is invoked, it will
- * need to stop all the I/Os and interrupt(s) to the device. Once that is
- * done, it will return PCI_ERS_RESULT_NEED_RESET for the PCI subsystem to
- * perform proper recovery as desired.
+ * This routine is called from the PCI subsystem for I/O error handling to
+ * device with SLI-3 interface spec. This function is called by the PCI
+ * subsystem after a PCI bus error affecting this device has been detected.
+ * When this function is invoked, it will need to stop all the I/Os and
+ * interrupt(s) to the device. Once that is done, it will return
+ * PCI_ERS_RESULT_NEED_RESET for the PCI subsystem to perform proper recovery
+ * as desired.
  *
  * Return codes
- *   PCI_ERS_RESULT_NEED_RESET - need to reset before recovery
- *   PCI_ERS_RESULT_DISCONNECT - device could not be recovered
+ * 	PCI_ERS_RESULT_NEED_RESET - need to reset before recovery
+ * 	PCI_ERS_RESULT_DISCONNECT - device could not be recovered
  **/
-static pci_ers_result_t lpfc_io_error_detected(struct pci_dev *pdev,
-				pci_channel_state_t state)
+static pci_ers_result_t
+lpfc_io_error_detected_s3(struct pci_dev *pdev, pci_channel_state_t state)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
@@ -3312,30 +7087,32 @@
 	lpfc_sli_abort_iocb_ring(phba, pring);
 
 	/* Disable interrupt */
-	lpfc_disable_intr(phba);
+	lpfc_sli_disable_intr(phba);
 
 	/* Request a slot reset. */
 	return PCI_ERS_RESULT_NEED_RESET;
 }
 
 /**
- * lpfc_io_slot_reset - Restart a PCI device from scratch
+ * lpfc_io_slot_reset_s3 - Method for restarting PCI SLI-3 device from scratch.
  * @pdev: pointer to PCI device.
  *
- * This routine is registered to the PCI subsystem for error handling. This is
- * called after PCI bus has been reset to restart the PCI card from scratch,
- * as if from a cold-boot. During the PCI subsystem error recovery, after the
- * driver returns PCI_ERS_RESULT_NEED_RESET, the PCI subsystem will perform
- * proper error recovery and then call this routine before calling the .resume
- * method to recover the device. This function will initialize the HBA device,
- * enable the interrupt, but it will just put the HBA to offline state without
- * passing any I/O traffic.
+ * This routine is called from the PCI subsystem for error handling to
+ * device with SLI-3 interface spec. This is called after PCI bus has been
+ * reset to restart the PCI card from scratch, as if from a cold-boot.
+ * During the PCI subsystem error recovery, after driver returns
+ * PCI_ERS_RESULT_NEED_RESET, the PCI subsystem will perform proper error
+ * recovery and then call this routine before calling the .resume method
+ * to recover the device. This function will initialize the HBA device,
+ * enable the interrupt, but it will just put the HBA to offline state
+ * without passing any I/O traffic.
  *
  * Return codes
- *   PCI_ERS_RESULT_RECOVERED - the device has been recovered
- *   PCI_ERS_RESULT_DISCONNECT - device could not be recovered
+ * 	PCI_ERS_RESULT_RECOVERED - the device has been recovered
+ * 	PCI_ERS_RESULT_DISCONNECT - device could not be recovered
  */
-static pci_ers_result_t lpfc_io_slot_reset(struct pci_dev *pdev)
+static pci_ers_result_t
+lpfc_io_slot_reset_s3(struct pci_dev *pdev)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
@@ -3354,11 +7131,11 @@
 		pci_set_master(pdev);
 
 	spin_lock_irq(&phba->hbalock);
-	psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+	psli->sli_flag &= ~LPFC_SLI_ACTIVE;
 	spin_unlock_irq(&phba->hbalock);
 
 	/* Configure and enable interrupt */
-	intr_mode = lpfc_enable_intr(phba, phba->intr_mode);
+	intr_mode = lpfc_sli_enable_intr(phba, phba->intr_mode);
 	if (intr_mode == LPFC_INTR_ERROR) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0427 Cannot re-enable interrupt after "
@@ -3378,15 +7155,17 @@
 }
 
 /**
- * lpfc_io_resume - Resume PCI I/O operation
+ * lpfc_io_resume_s3 - Method for resuming PCI I/O operation on SLI-3 device.
  * @pdev: pointer to PCI device
  *
- * This routine is registered to the PCI subsystem for error handling. It is
- * called when kernel error recovery tells the lpfc driver that it is ok to
- * resume normal PCI operation after PCI bus error recovery. After this call,
- * traffic can start to flow from this device again.
+ * This routine is called from the PCI subsystem for error handling to device
+ * with SLI-3 interface spec. It is called when kernel error recovery tells
+ * the lpfc driver that it is ok to resume normal PCI operation after PCI bus
+ * error recovery. After this call, traffic can start to flow from this device
+ * again.
  */
-static void lpfc_io_resume(struct pci_dev *pdev)
+static void
+lpfc_io_resume_s3(struct pci_dev *pdev)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
@@ -3394,6 +7173,697 @@
 	lpfc_online(phba);
 }
 
+/**
+ * lpfc_sli4_get_els_iocb_cnt - Calculate the # of ELS IOCBs to reserve
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * returns the number of ELS/CT IOCBs to reserve
+ **/
+int
+lpfc_sli4_get_els_iocb_cnt(struct lpfc_hba *phba)
+{
+	int max_xri = phba->sli4_hba.max_cfg_param.max_xri;
+
+	if (max_xri <= 100)
+		return 4;
+	else if (max_xri <= 256)
+		return 8;
+	else if (max_xri <= 512)
+		return 16;
+	else if (max_xri <= 1024)
+		return 32;
+	else
+		return 48;
+}
+
+/**
+ * lpfc_pci_probe_one_s4 - PCI probe func to reg SLI-4 device to PCI subsys
+ * @pdev: pointer to PCI device
+ * @pid: pointer to PCI device identifier
+ *
+ * This routine is called from the kernel's PCI subsystem to device with
+ * SLI-4 interface spec. When an Emulex HBA with SLI-4 interface spec is
+ * presented on PCI bus, the kernel PCI subsystem looks at PCI device-specific
+ * information of the device and driver to see if the driver state that it
+ * can support this kind of device. If the match is successful, the driver
+ * core invokes this routine. If this routine determines it can claim the HBA,
+ * it does all the initialization that it needs to do to handle the HBA
+ * properly.
+ *
+ * Return code
+ * 	0 - driver can claim the device
+ * 	negative value - driver can not claim the device
+ **/
+static int __devinit
+lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
+{
+	struct lpfc_hba   *phba;
+	struct lpfc_vport *vport = NULL;
+	int error;
+	uint32_t cfg_mode, intr_mode;
+	int mcnt;
+
+	/* Allocate memory for HBA structure */
+	phba = lpfc_hba_alloc(pdev);
+	if (!phba)
+		return -ENOMEM;
+
+	/* Perform generic PCI device enabling operation */
+	error = lpfc_enable_pci_dev(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1409 Failed to enable pci device.\n");
+		goto out_free_phba;
+	}
+
+	/* Set up SLI API function jump table for PCI-device group-1 HBAs */
+	error = lpfc_api_table_setup(phba, LPFC_PCI_DEV_OC);
+	if (error)
+		goto out_disable_pci_dev;
+
+	/* Set up SLI-4 specific device PCI memory space */
+	error = lpfc_sli4_pci_mem_setup(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1410 Failed to set up pci memory space.\n");
+		goto out_disable_pci_dev;
+	}
+
+	/* Set up phase-1 common device driver resources */
+	error = lpfc_setup_driver_resource_phase1(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1411 Failed to set up driver resource.\n");
+		goto out_unset_pci_mem_s4;
+	}
+
+	/* Set up SLI-4 Specific device driver resources */
+	error = lpfc_sli4_driver_resource_setup(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1412 Failed to set up driver resource.\n");
+		goto out_unset_pci_mem_s4;
+	}
+
+	/* Initialize and populate the iocb list per host */
+	error = lpfc_init_iocb_list(phba,
+			phba->sli4_hba.max_cfg_param.max_xri);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1413 Failed to initialize iocb list.\n");
+		goto out_unset_driver_resource_s4;
+	}
+
+	/* Set up common device driver resources */
+	error = lpfc_setup_driver_resource_phase2(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1414 Failed to set up driver resource.\n");
+		goto out_free_iocb_list;
+	}
+
+	/* Create SCSI host to the physical port */
+	error = lpfc_create_shost(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1415 Failed to create scsi host.\n");
+		goto out_unset_driver_resource;
+	}
+
+	/* Configure sysfs attributes */
+	vport = phba->pport;
+	error = lpfc_alloc_sysfs_attr(vport);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1416 Failed to allocate sysfs attr\n");
+		goto out_destroy_shost;
+	}
+
+	/* Now, trying to enable interrupt and bring up the device */
+	cfg_mode = phba->cfg_use_msi;
+	while (true) {
+		/* Put device to a known state before enabling interrupt */
+		lpfc_stop_port(phba);
+		/* Configure and enable interrupt */
+		intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode);
+		if (intr_mode == LPFC_INTR_ERROR) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0426 Failed to enable interrupt.\n");
+			error = -ENODEV;
+			goto out_free_sysfs_attr;
+		}
+		/* Set up SLI-4 HBA */
+		if (lpfc_sli4_hba_setup(phba)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"1421 Failed to set up hba\n");
+			error = -ENODEV;
+			goto out_disable_intr;
+		}
+
+		/* Send NOP mbx cmds for non-INTx mode active interrupt test */
+		if (intr_mode != 0)
+			mcnt = lpfc_sli4_send_nop_mbox_cmds(phba,
+							    LPFC_ACT_INTR_CNT);
+
+		/* Check active interrupts received only for MSI/MSI-X */
+		if (intr_mode == 0 ||
+		    phba->sli.slistat.sli_intr >= LPFC_ACT_INTR_CNT) {
+			/* Log the current active interrupt mode */
+			phba->intr_mode = intr_mode;
+			lpfc_log_intr_mode(phba, intr_mode);
+			break;
+		}
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0451 Configure interrupt mode (%d) "
+				"failed active interrupt test.\n",
+				intr_mode);
+		/* Unset the preivous SLI-4 HBA setup */
+		lpfc_sli4_unset_hba(phba);
+		/* Try next level of interrupt mode */
+		cfg_mode = --intr_mode;
+	}
+
+	/* Perform post initialization setup */
+	lpfc_post_init_setup(phba);
+
+	return 0;
+
+out_disable_intr:
+	lpfc_sli4_disable_intr(phba);
+out_free_sysfs_attr:
+	lpfc_free_sysfs_attr(vport);
+out_destroy_shost:
+	lpfc_destroy_shost(phba);
+out_unset_driver_resource:
+	lpfc_unset_driver_resource_phase2(phba);
+out_free_iocb_list:
+	lpfc_free_iocb_list(phba);
+out_unset_driver_resource_s4:
+	lpfc_sli4_driver_resource_unset(phba);
+out_unset_pci_mem_s4:
+	lpfc_sli4_pci_mem_unset(phba);
+out_disable_pci_dev:
+	lpfc_disable_pci_dev(phba);
+out_free_phba:
+	lpfc_hba_free(phba);
+	return error;
+}
+
+/**
+ * lpfc_pci_remove_one_s4 - PCI func to unreg SLI-4 device from PCI subsystem
+ * @pdev: pointer to PCI device
+ *
+ * This routine is called from the kernel's PCI subsystem to device with
+ * SLI-4 interface spec. When an Emulex HBA with SLI-4 interface spec is
+ * removed from PCI bus, it performs all the necessary cleanup for the HBA
+ * device to be removed from the PCI subsystem properly.
+ **/
+static void __devexit
+lpfc_pci_remove_one_s4(struct pci_dev *pdev)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
+	struct lpfc_vport **vports;
+	struct lpfc_hba *phba = vport->phba;
+	int i;
+
+	/* Mark the device unloading flag */
+	spin_lock_irq(&phba->hbalock);
+	vport->load_flag |= FC_UNLOADING;
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Free the HBA sysfs attributes */
+	lpfc_free_sysfs_attr(vport);
+
+	/* Release all the vports against this physical port */
+	vports = lpfc_create_vport_work_array(phba);
+	if (vports != NULL)
+		for (i = 1; i <= phba->max_vports && vports[i] != NULL; i++)
+			fc_vport_terminate(vports[i]->fc_vport);
+	lpfc_destroy_vport_work_array(phba, vports);
+
+	/* Remove FC host and then SCSI host with the physical port */
+	fc_remove_host(shost);
+	scsi_remove_host(shost);
+
+	/* Perform cleanup on the physical port */
+	lpfc_cleanup(vport);
+
+	/*
+	 * Bring down the SLI Layer. This step disables all interrupts,
+	 * clears the rings, discards all mailbox commands, and resets
+	 * the HBA FCoE function.
+	 */
+	lpfc_debugfs_terminate(vport);
+	lpfc_sli4_hba_unset(phba);
+
+	spin_lock_irq(&phba->hbalock);
+	list_del_init(&vport->listentry);
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Call scsi_free before lpfc_sli4_driver_resource_unset since scsi
+	 * buffers are released to their corresponding pools here.
+	 */
+	lpfc_scsi_free(phba);
+	lpfc_sli4_driver_resource_unset(phba);
+
+	/* Unmap adapter Control and Doorbell registers */
+	lpfc_sli4_pci_mem_unset(phba);
+
+	/* Release PCI resources and disable device's PCI function */
+	scsi_host_put(shost);
+	lpfc_disable_pci_dev(phba);
+
+	/* Finally, free the driver's device data structure */
+	lpfc_hba_free(phba);
+
+	return;
+}
+
+/**
+ * lpfc_pci_suspend_one_s4 - PCI func to suspend SLI-4 device for power mgmnt
+ * @pdev: pointer to PCI device
+ * @msg: power management message
+ *
+ * This routine is called from the kernel's PCI subsystem to support system
+ * Power Management (PM) to device with SLI-4 interface spec. When PM invokes
+ * this method, it quiesces the device by stopping the driver's worker
+ * thread for the device, turning off device's interrupt and DMA, and bring
+ * the device offline. Note that as the driver implements the minimum PM
+ * requirements to a power-aware driver's PM support for suspend/resume -- all
+ * the possible PM messages (SUSPEND, HIBERNATE, FREEZE) to the suspend()
+ * method call will be treated as SUSPEND and the driver will fully
+ * reinitialize its device during resume() method call, the driver will set
+ * device to PCI_D3hot state in PCI config space instead of setting it
+ * according to the @msg provided by the PM.
+ *
+ * Return code
+ * 	0 - driver suspended the device
+ * 	Error otherwise
+ **/
+static int
+lpfc_pci_suspend_one_s4(struct pci_dev *pdev, pm_message_t msg)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"0298 PCI device Power Management suspend.\n");
+
+	/* Bring down the device */
+	lpfc_offline_prep(phba);
+	lpfc_offline(phba);
+	kthread_stop(phba->worker_thread);
+
+	/* Disable interrupt from device */
+	lpfc_sli4_disable_intr(phba);
+
+	/* Save device state to PCI config space */
+	pci_save_state(pdev);
+	pci_set_power_state(pdev, PCI_D3hot);
+
+	return 0;
+}
+
+/**
+ * lpfc_pci_resume_one_s4 - PCI func to resume SLI-4 device for power mgmnt
+ * @pdev: pointer to PCI device
+ *
+ * This routine is called from the kernel's PCI subsystem to support system
+ * Power Management (PM) to device with SLI-4 interface spac. When PM invokes
+ * this method, it restores the device's PCI config space state and fully
+ * reinitializes the device and brings it online. Note that as the driver
+ * implements the minimum PM requirements to a power-aware driver's PM for
+ * suspend/resume -- all the possible PM messages (SUSPEND, HIBERNATE, FREEZE)
+ * to the suspend() method call will be treated as SUSPEND and the driver
+ * will fully reinitialize its device during resume() method call, the device
+ * will be set to PCI_D0 directly in PCI config space before restoring the
+ * state.
+ *
+ * Return code
+ * 	0 - driver suspended the device
+ * 	Error otherwise
+ **/
+static int
+lpfc_pci_resume_one_s4(struct pci_dev *pdev)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+	uint32_t intr_mode;
+	int error;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"0292 PCI device Power Management resume.\n");
+
+	/* Restore device state from PCI config space */
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	if (pdev->is_busmaster)
+		pci_set_master(pdev);
+
+	 /* Startup the kernel thread for this host adapter. */
+	phba->worker_thread = kthread_run(lpfc_do_work, phba,
+					"lpfc_worker_%d", phba->brd_no);
+	if (IS_ERR(phba->worker_thread)) {
+		error = PTR_ERR(phba->worker_thread);
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0293 PM resume failed to start worker "
+				"thread: error=x%x.\n", error);
+		return error;
+	}
+
+	/* Configure and enable interrupt */
+	intr_mode = lpfc_sli4_enable_intr(phba, phba->intr_mode);
+	if (intr_mode == LPFC_INTR_ERROR) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0294 PM resume Failed to enable interrupt\n");
+		return -EIO;
+	} else
+		phba->intr_mode = intr_mode;
+
+	/* Restart HBA and bring it online */
+	lpfc_sli_brdrestart(phba);
+	lpfc_online(phba);
+
+	/* Log the current active interrupt mode */
+	lpfc_log_intr_mode(phba, phba->intr_mode);
+
+	return 0;
+}
+
+/**
+ * lpfc_io_error_detected_s4 - Method for handling PCI I/O error to SLI-4 device
+ * @pdev: pointer to PCI device.
+ * @state: the current PCI connection state.
+ *
+ * This routine is called from the PCI subsystem for error handling to device
+ * with SLI-4 interface spec. This function is called by the PCI subsystem
+ * after a PCI bus error affecting this device has been detected. When this
+ * function is invoked, it will need to stop all the I/Os and interrupt(s)
+ * to the device. Once that is done, it will return PCI_ERS_RESULT_NEED_RESET
+ * for the PCI subsystem to perform proper recovery as desired.
+ *
+ * Return codes
+ * 	PCI_ERS_RESULT_NEED_RESET - need to reset before recovery
+ * 	PCI_ERS_RESULT_DISCONNECT - device could not be recovered
+ **/
+static pci_ers_result_t
+lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state)
+{
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * lpfc_io_slot_reset_s4 - Method for restart PCI SLI-4 device from scratch
+ * @pdev: pointer to PCI device.
+ *
+ * This routine is called from the PCI subsystem for error handling to device
+ * with SLI-4 interface spec. It is called after PCI bus has been reset to
+ * restart the PCI card from scratch, as if from a cold-boot. During the
+ * PCI subsystem error recovery, after the driver returns
+ * PCI_ERS_RESULT_NEED_RESET, the PCI subsystem will perform proper error
+ * recovery and then call this routine before calling the .resume method to
+ * recover the device. This function will initialize the HBA device, enable
+ * the interrupt, but it will just put the HBA to offline state without
+ * passing any I/O traffic.
+ *
+ * Return codes
+ * 	PCI_ERS_RESULT_RECOVERED - the device has been recovered
+ * 	PCI_ERS_RESULT_DISCONNECT - device could not be recovered
+ */
+static pci_ers_result_t
+lpfc_io_slot_reset_s4(struct pci_dev *pdev)
+{
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * lpfc_io_resume_s4 - Method for resuming PCI I/O operation to SLI-4 device
+ * @pdev: pointer to PCI device
+ *
+ * This routine is called from the PCI subsystem for error handling to device
+ * with SLI-4 interface spec. It is called when kernel error recovery tells
+ * the lpfc driver that it is ok to resume normal PCI operation after PCI bus
+ * error recovery. After this call, traffic can start to flow from this device
+ * again.
+ **/
+static void
+lpfc_io_resume_s4(struct pci_dev *pdev)
+{
+	return;
+}
+
+/**
+ * lpfc_pci_probe_one - lpfc PCI probe func to reg dev to PCI subsystem
+ * @pdev: pointer to PCI device
+ * @pid: pointer to PCI device identifier
+ *
+ * This routine is to be registered to the kernel's PCI subsystem. When an
+ * Emulex HBA device is presented on PCI bus, the kernel PCI subsystem looks
+ * at PCI device-specific information of the device and driver to see if the
+ * driver state that it can support this kind of device. If the match is
+ * successful, the driver core invokes this routine. This routine dispatches
+ * the action to the proper SLI-3 or SLI-4 device probing routine, which will
+ * do all the initialization that it needs to do to handle the HBA device
+ * properly.
+ *
+ * Return code
+ * 	0 - driver can claim the device
+ * 	negative value - driver can not claim the device
+ **/
+static int __devinit
+lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid)
+{
+	int rc;
+	uint16_t dev_id;
+
+	if (pci_read_config_word(pdev, PCI_DEVICE_ID, &dev_id))
+		return -ENODEV;
+
+	switch (dev_id) {
+	case PCI_DEVICE_ID_TIGERSHARK:
+	case PCI_DEVICE_ID_TIGERSHARK_S:
+		rc = lpfc_pci_probe_one_s4(pdev, pid);
+		break;
+	default:
+		rc = lpfc_pci_probe_one_s3(pdev, pid);
+		break;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_pci_remove_one - lpfc PCI func to unreg dev from PCI subsystem
+ * @pdev: pointer to PCI device
+ *
+ * This routine is to be registered to the kernel's PCI subsystem. When an
+ * Emulex HBA is removed from PCI bus, the driver core invokes this routine.
+ * This routine dispatches the action to the proper SLI-3 or SLI-4 device
+ * remove routine, which will perform all the necessary cleanup for the
+ * device to be removed from the PCI subsystem properly.
+ **/
+static void __devexit
+lpfc_pci_remove_one(struct pci_dev *pdev)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+
+	switch (phba->pci_dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		lpfc_pci_remove_one_s3(pdev);
+		break;
+	case LPFC_PCI_DEV_OC:
+		lpfc_pci_remove_one_s4(pdev);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1424 Invalid PCI device group: 0x%x\n",
+				phba->pci_dev_grp);
+		break;
+	}
+	return;
+}
+
+/**
+ * lpfc_pci_suspend_one - lpfc PCI func to suspend dev for power management
+ * @pdev: pointer to PCI device
+ * @msg: power management message
+ *
+ * This routine is to be registered to the kernel's PCI subsystem to support
+ * system Power Management (PM). When PM invokes this method, it dispatches
+ * the action to the proper SLI-3 or SLI-4 device suspend routine, which will
+ * suspend the device.
+ *
+ * Return code
+ * 	0 - driver suspended the device
+ * 	Error otherwise
+ **/
+static int
+lpfc_pci_suspend_one(struct pci_dev *pdev, pm_message_t msg)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+	int rc = -ENODEV;
+
+	switch (phba->pci_dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		rc = lpfc_pci_suspend_one_s3(pdev, msg);
+		break;
+	case LPFC_PCI_DEV_OC:
+		rc = lpfc_pci_suspend_one_s4(pdev, msg);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1425 Invalid PCI device group: 0x%x\n",
+				phba->pci_dev_grp);
+		break;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_pci_resume_one - lpfc PCI func to resume dev for power management
+ * @pdev: pointer to PCI device
+ *
+ * This routine is to be registered to the kernel's PCI subsystem to support
+ * system Power Management (PM). When PM invokes this method, it dispatches
+ * the action to the proper SLI-3 or SLI-4 device resume routine, which will
+ * resume the device.
+ *
+ * Return code
+ * 	0 - driver suspended the device
+ * 	Error otherwise
+ **/
+static int
+lpfc_pci_resume_one(struct pci_dev *pdev)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+	int rc = -ENODEV;
+
+	switch (phba->pci_dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		rc = lpfc_pci_resume_one_s3(pdev);
+		break;
+	case LPFC_PCI_DEV_OC:
+		rc = lpfc_pci_resume_one_s4(pdev);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1426 Invalid PCI device group: 0x%x\n",
+				phba->pci_dev_grp);
+		break;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_io_error_detected - lpfc method for handling PCI I/O error
+ * @pdev: pointer to PCI device.
+ * @state: the current PCI connection state.
+ *
+ * This routine is registered to the PCI subsystem for error handling. This
+ * function is called by the PCI subsystem after a PCI bus error affecting
+ * this device has been detected. When this routine is invoked, it dispatches
+ * the action to the proper SLI-3 or SLI-4 device error detected handling
+ * routine, which will perform the proper error detected operation.
+ *
+ * Return codes
+ * 	PCI_ERS_RESULT_NEED_RESET - need to reset before recovery
+ * 	PCI_ERS_RESULT_DISCONNECT - device could not be recovered
+ **/
+static pci_ers_result_t
+lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+	pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;
+
+	switch (phba->pci_dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		rc = lpfc_io_error_detected_s3(pdev, state);
+		break;
+	case LPFC_PCI_DEV_OC:
+		rc = lpfc_io_error_detected_s4(pdev, state);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1427 Invalid PCI device group: 0x%x\n",
+				phba->pci_dev_grp);
+		break;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_io_slot_reset - lpfc method for restart PCI dev from scratch
+ * @pdev: pointer to PCI device.
+ *
+ * This routine is registered to the PCI subsystem for error handling. This
+ * function is called after PCI bus has been reset to restart the PCI card
+ * from scratch, as if from a cold-boot. When this routine is invoked, it
+ * dispatches the action to the proper SLI-3 or SLI-4 device reset handling
+ * routine, which will perform the proper device reset.
+ *
+ * Return codes
+ * 	PCI_ERS_RESULT_RECOVERED - the device has been recovered
+ * 	PCI_ERS_RESULT_DISCONNECT - device could not be recovered
+ **/
+static pci_ers_result_t
+lpfc_io_slot_reset(struct pci_dev *pdev)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+	pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;
+
+	switch (phba->pci_dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		rc = lpfc_io_slot_reset_s3(pdev);
+		break;
+	case LPFC_PCI_DEV_OC:
+		rc = lpfc_io_slot_reset_s4(pdev);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1428 Invalid PCI device group: 0x%x\n",
+				phba->pci_dev_grp);
+		break;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_io_resume - lpfc method for resuming PCI I/O operation
+ * @pdev: pointer to PCI device
+ *
+ * This routine is registered to the PCI subsystem for error handling. It
+ * is called when kernel error recovery tells the lpfc driver that it is
+ * OK to resume normal PCI operation after PCI bus error recovery. When
+ * this routine is invoked, it dispatches the action to the proper SLI-3
+ * or SLI-4 device io_resume routine, which will resume the device operation.
+ **/
+static void
+lpfc_io_resume(struct pci_dev *pdev)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+
+	switch (phba->pci_dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		lpfc_io_resume_s3(pdev);
+		break;
+	case LPFC_PCI_DEV_OC:
+		lpfc_io_resume_s4(pdev);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1429 Invalid PCI device group: 0x%x\n",
+				phba->pci_dev_grp);
+		break;
+	}
+	return;
+}
+
 static struct pci_device_id lpfc_id_table[] = {
 	{PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_VIPER,
 		PCI_ANY_ID, PCI_ANY_ID, },
@@ -3469,6 +7939,10 @@
 		PCI_ANY_ID, PCI_ANY_ID, },
 	{PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_PROTEUS_S,
 		PCI_ANY_ID, PCI_ANY_ID, },
+	{PCI_VENDOR_ID_SERVERENGINE, PCI_DEVICE_ID_TIGERSHARK,
+		PCI_ANY_ID, PCI_ANY_ID, },
+	{PCI_VENDOR_ID_SERVERENGINE, PCI_DEVICE_ID_TIGERSHARK_S,
+		PCI_ANY_ID, PCI_ANY_ID, },
 	{ 0 }
 };
 
@@ -3486,7 +7960,7 @@
 	.probe		= lpfc_pci_probe_one,
 	.remove		= __devexit_p(lpfc_pci_remove_one),
 	.suspend        = lpfc_pci_suspend_one,
-	.resume         = lpfc_pci_resume_one,
+	.resume		= lpfc_pci_resume_one,
 	.err_handler    = &lpfc_err_handler,
 };
 
diff --git a/drivers/scsi/lpfc/lpfc_logmsg.h b/drivers/scsi/lpfc/lpfc_logmsg.h
index 1aa8570..954ba57 100644
--- a/drivers/scsi/lpfc/lpfc_logmsg.h
+++ b/drivers/scsi/lpfc/lpfc_logmsg.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -18,33 +18,39 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LOG_ELS                       0x1	/* ELS events */
-#define LOG_DISCOVERY                 0x2	/* Link discovery events */
-#define LOG_MBOX                      0x4	/* Mailbox events */
-#define LOG_INIT                      0x8	/* Initialization events */
-#define LOG_LINK_EVENT                0x10	/* Link events */
-#define LOG_IP                        0x20	/* IP traffic history */
-#define LOG_FCP                       0x40	/* FCP traffic history */
-#define LOG_NODE                      0x80	/* Node table events */
-#define LOG_TEMP                      0x100	/* Temperature sensor events */
-#define LOG_BG			      0x200	/* BlockGuard events */
-#define LOG_MISC                      0x400	/* Miscellaneous events */
-#define LOG_SLI                       0x800	/* SLI events */
-#define LOG_FCP_ERROR                 0x1000	/* log errors, not underruns */
-#define LOG_LIBDFC                    0x2000	/* Libdfc events */
-#define LOG_VPORT                     0x4000	/* NPIV events */
-#define LOG_ALL_MSG                   0xffff	/* LOG all messages */
+#define LOG_ELS		0x00000001	/* ELS events */
+#define LOG_DISCOVERY	0x00000002	/* Link discovery events */
+#define LOG_MBOX	0x00000004	/* Mailbox events */
+#define LOG_INIT	0x00000008	/* Initialization events */
+#define LOG_LINK_EVENT	0x00000010	/* Link events */
+#define LOG_IP		0x00000020	/* IP traffic history */
+#define LOG_FCP		0x00000040	/* FCP traffic history */
+#define LOG_NODE	0x00000080	/* Node table events */
+#define LOG_TEMP	0x00000100	/* Temperature sensor events */
+#define LOG_BG		0x00000200	/* BlockGuard events */
+#define LOG_MISC	0x00000400	/* Miscellaneous events */
+#define LOG_SLI		0x00000800	/* SLI events */
+#define LOG_FCP_ERROR	0x00001000	/* log errors, not underruns */
+#define LOG_LIBDFC	0x00002000	/* Libdfc events */
+#define LOG_VPORT	0x00004000	/* NPIV events */
+#define LOF_SECURITY	0x00008000	/* Security events */
+#define LOG_EVENT	0x00010000	/* CT,TEMP,DUMP, logging */
+#define LOG_ALL_MSG	0xffffffff	/* LOG all messages */
 
 #define lpfc_printf_vlog(vport, level, mask, fmt, arg...) \
-	do { \
-	{ if (((mask) &(vport)->cfg_log_verbose) || (level[1] <= '3')) \
+do { \
+	{ if (((mask) & (vport)->cfg_log_verbose) || (level[1] <= '3')) \
 		dev_printk(level, &((vport)->phba->pcidev)->dev, "%d:(%d):" \
 			   fmt, (vport)->phba->brd_no, vport->vpi, ##arg); } \
-	} while (0)
+} while (0)
 
 #define lpfc_printf_log(phba, level, mask, fmt, arg...) \
-	do { \
-	{ if (((mask) &(phba)->pport->cfg_log_verbose) || (level[1] <= '3')) \
+do { \
+	{ uint32_t log_verbose = (phba)->pport ? \
+				 (phba)->pport->cfg_log_verbose : \
+				 (phba)->cfg_log_verbose; \
+	  if (((mask) & log_verbose) || (level[1] <= '3')) \
 		dev_printk(level, &((phba)->pcidev)->dev, "%d:" \
-			   fmt, phba->brd_no, ##arg); } \
-	} while (0)
+			   fmt, phba->brd_no, ##arg); \
+	} \
+} while (0)
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index 134fc7f..b9b451c 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -28,8 +28,10 @@
 
 #include <scsi/scsi.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -39,6 +41,44 @@
 #include "lpfc_compat.h"
 
 /**
+ * lpfc_dump_static_vport - Dump HBA's static vport information.
+ * @phba: pointer to lpfc hba data structure.
+ * @pmb: pointer to the driver internal queue element for mailbox command.
+ * @offset: offset for dumping vport info.
+ *
+ * The dump mailbox command provides a method for the device driver to obtain
+ * various types of information from the HBA device.
+ *
+ * This routine prepares the mailbox command for dumping list of static
+ * vports to be created.
+ **/
+void
+lpfc_dump_static_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb,
+		uint16_t offset)
+{
+	MAILBOX_t *mb;
+	void *ctx;
+
+	mb = &pmb->u.mb;
+	ctx = pmb->context2;
+
+	/* Setup to dump vport info region */
+	memset(pmb, 0, sizeof(LPFC_MBOXQ_t));
+	mb->mbxCommand = MBX_DUMP_MEMORY;
+	mb->un.varDmp.cv = 1;
+	mb->un.varDmp.type = DMP_NV_PARAMS;
+	mb->un.varDmp.entry_index = offset;
+	mb->un.varDmp.region_id = DMP_REGION_VPORT;
+	mb->un.varDmp.word_cnt = DMP_RSP_SIZE/sizeof(uint32_t);
+	mb->un.varDmp.co = 0;
+	mb->un.varDmp.resp_offset = 0;
+	pmb->context2 = ctx;
+	mb->mbxOwner = OWN_HOST;
+
+	return;
+}
+
+/**
  * lpfc_dump_mem - Prepare a mailbox command for retrieving HBA's VPD memory
  * @phba: pointer to lpfc hba data structure.
  * @pmb: pointer to the driver internal queue element for mailbox command.
@@ -58,7 +98,7 @@
 	MAILBOX_t *mb;
 	void *ctx;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	ctx = pmb->context2;
 
 	/* Setup to dump VPD region */
@@ -90,7 +130,7 @@
 	MAILBOX_t *mb;
 	void *ctx;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	/* Save context so that we can restore after memset */
 	ctx = pmb->context2;
 
@@ -125,7 +165,7 @@
 {
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 	mb->mbxCommand = MBX_READ_NV;
 	mb->mbxOwner = OWN_HOST;
@@ -151,7 +191,7 @@
 {
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 	mb->mbxCommand = MBX_ASYNCEVT_ENABLE;
 	mb->un.varCfgAsyncEvent.ring = ring;
@@ -177,7 +217,7 @@
 {
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 	mb->mbxCommand = MBX_HEARTBEAT;
 	mb->mbxOwner = OWN_HOST;
@@ -211,7 +251,7 @@
 	struct lpfc_sli *psli;
 
 	psli = &phba->sli;
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	INIT_LIST_HEAD(&mp->list);
@@ -248,7 +288,7 @@
 {
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	mb->un.varClearLA.eventTag = phba->fc_eventTag;
@@ -275,7 +315,7 @@
 lpfc_config_link(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
 {
 	struct lpfc_vport  *vport = phba->pport;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	/* NEW_FEATURE
@@ -321,7 +361,7 @@
 int
 lpfc_config_msi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	uint32_t attentionConditions[2];
 
 	/* Sanity check */
@@ -405,7 +445,7 @@
 	struct lpfc_sli *psli;
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	psli = &phba->sli;
@@ -492,7 +532,7 @@
 	struct lpfc_sli *psli;
 
 	psli = &phba->sli;
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	mb->mbxOwner = OWN_HOST;
@@ -515,7 +555,7 @@
 	mb->un.varRdSparm.un.sp64.tus.f.bdeSize = sizeof (struct serv_parm);
 	mb->un.varRdSparm.un.sp64.addrHigh = putPaddrHigh(mp->phys);
 	mb->un.varRdSparm.un.sp64.addrLow = putPaddrLow(mp->phys);
-	mb->un.varRdSparm.vpi = vpi;
+	mb->un.varRdSparm.vpi = vpi + phba->vpi_base;
 
 	/* save address for completion */
 	pmb->context1 = mp;
@@ -544,10 +584,12 @@
 {
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	mb->un.varUnregDID.did = did;
+	if (vpi != 0xffff)
+		vpi += phba->vpi_base;
 	mb->un.varUnregDID.vpi = vpi;
 
 	mb->mbxCommand = MBX_UNREG_D_ID;
@@ -573,7 +615,7 @@
 {
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	mb->mbxCommand = MBX_READ_CONFIG;
@@ -598,7 +640,7 @@
 {
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	mb->mbxCommand = MBX_READ_LNK_STAT;
@@ -607,7 +649,7 @@
 }
 
 /**
- * lpfc_reg_login - Prepare a mailbox command for registering remote login
+ * lpfc_reg_rpi - Prepare a mailbox command for registering remote login
  * @phba: pointer to lpfc hba data structure.
  * @vpi: virtual N_Port identifier.
  * @did: remote port identifier.
@@ -631,17 +673,23 @@
  *    1 - DMA memory allocation failed
  **/
 int
-lpfc_reg_login(struct lpfc_hba *phba, uint16_t vpi, uint32_t did,
+lpfc_reg_rpi(struct lpfc_hba *phba, uint16_t vpi, uint32_t did,
 	       uint8_t *param, LPFC_MBOXQ_t *pmb, uint32_t flag)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	uint8_t *sparam;
 	struct lpfc_dmabuf *mp;
 
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	mb->un.varRegLogin.rpi = 0;
-	mb->un.varRegLogin.vpi = vpi;
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		mb->un.varRegLogin.rpi = lpfc_sli4_alloc_rpi(phba);
+		if (mb->un.varRegLogin.rpi == LPFC_RPI_ALLOC_ERROR)
+			return 1;
+	}
+
+	mb->un.varRegLogin.vpi = vpi + phba->vpi_base;
 	mb->un.varRegLogin.did = did;
 	mb->un.varWords[30] = flag;	/* Set flag to issue action on cmpl */
 
@@ -697,15 +745,16 @@
 {
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	mb->un.varUnregLogin.rpi = (uint16_t) rpi;
 	mb->un.varUnregLogin.rsvd1 = 0;
-	mb->un.varUnregLogin.vpi = vpi;
+	mb->un.varUnregLogin.vpi = vpi + phba->vpi_base;
 
 	mb->mbxCommand = MBX_UNREG_LOGIN;
 	mb->mbxOwner = OWN_HOST;
+
 	return;
 }
 
@@ -725,15 +774,15 @@
  * This routine prepares the mailbox command for registering a virtual N_Port.
  **/
 void
-lpfc_reg_vpi(struct lpfc_hba *phba, uint16_t vpi, uint32_t sid,
-	     LPFC_MBOXQ_t *pmb)
+lpfc_reg_vpi(struct lpfc_vport *vport, LPFC_MBOXQ_t *pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
-	mb->un.varRegVpi.vpi = vpi;
-	mb->un.varRegVpi.sid = sid;
+	mb->un.varRegVpi.vpi = vport->vpi + vport->phba->vpi_base;
+	mb->un.varRegVpi.sid = vport->fc_myDID;
+	mb->un.varRegVpi.vfi = vport->vfi + vport->phba->vfi_base;
 
 	mb->mbxCommand = MBX_REG_VPI;
 	mb->mbxOwner = OWN_HOST;
@@ -760,10 +809,10 @@
 void
 lpfc_unreg_vpi(struct lpfc_hba *phba, uint16_t vpi, LPFC_MBOXQ_t *pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
-	mb->un.varUnregVpi.vpi = vpi;
+	mb->un.varUnregVpi.vpi = vpi + phba->vpi_base;
 
 	mb->mbxCommand = MBX_UNREG_VPI;
 	mb->mbxOwner = OWN_HOST;
@@ -852,7 +901,7 @@
 void
 lpfc_read_rev(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 	mb->un.varRdRev.cv = 1;
 	mb->un.varRdRev.v3req = 1; /* Request SLI3 info */
@@ -945,7 +994,7 @@
 		uint32_t hbq_entry_index, LPFC_MBOXQ_t *pmb)
 {
 	int i;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	struct config_hbq_var *hbqmb = &mb->un.varCfgHbq;
 
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
@@ -1020,7 +1069,7 @@
 lpfc_config_ring(struct lpfc_hba * phba, int ring, LPFC_MBOXQ_t * pmb)
 {
 	int i;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_sli *psli;
 	struct lpfc_sli_ring *pring;
 
@@ -1075,7 +1124,7 @@
 lpfc_config_port(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
 	MAILBOX_t __iomem *mb_slim = (MAILBOX_t __iomem *) phba->MBslimaddr;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	dma_addr_t pdma_addr;
 	uint32_t bar_low, bar_high;
 	size_t offset;
@@ -1099,21 +1148,22 @@
 
 	/* If HBA supports SLI=3 ask for it */
 
-	if (phba->sli_rev == 3 && phba->vpd.sli3Feat.cerbm) {
+	if (phba->sli_rev == LPFC_SLI_REV3 && phba->vpd.sli3Feat.cerbm) {
 		if (phba->cfg_enable_bg)
 			mb->un.varCfgPort.cbg = 1; /* configure BlockGuard */
+		mb->un.varCfgPort.cdss = 1; /* Configure Security */
 		mb->un.varCfgPort.cerbm = 1; /* Request HBQs */
 		mb->un.varCfgPort.ccrp = 1; /* Command Ring Polling */
 		mb->un.varCfgPort.cinb = 1; /* Interrupt Notification Block */
 		mb->un.varCfgPort.max_hbq = lpfc_sli_hbq_count();
 		if (phba->max_vpi && phba->cfg_enable_npiv &&
 		    phba->vpd.sli3Feat.cmv) {
-			mb->un.varCfgPort.max_vpi = phba->max_vpi;
+			mb->un.varCfgPort.max_vpi = LPFC_MAX_VPI;
 			mb->un.varCfgPort.cmv = 1;
 		} else
 			mb->un.varCfgPort.max_vpi = phba->max_vpi = 0;
 	} else
-		phba->sli_rev = 2;
+		phba->sli_rev = LPFC_SLI_REV2;
 	mb->un.varCfgPort.sli_mode = phba->sli_rev;
 
 	/* Now setup pcb */
@@ -1245,7 +1295,7 @@
 void
 lpfc_kill_board(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 
 	memset(pmb, 0, sizeof(LPFC_MBOXQ_t));
 	mb->mbxCommand = MBX_KILL_BOARD;
@@ -1305,29 +1355,98 @@
 }
 
 /**
+ * __lpfc_mbox_cmpl_put - Put mailbox cmd into mailbox cmd complete list
+ * @phba: pointer to lpfc hba data structure.
+ * @mbq: pointer to the driver internal queue element for mailbox command.
+ *
+ * This routine put the completed mailbox command into the mailbox command
+ * complete list. This is the unlocked version of the routine. The mailbox
+ * complete list is used by the driver worker thread to process mailbox
+ * complete callback functions outside the driver interrupt handler.
+ **/
+void
+__lpfc_mbox_cmpl_put(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbq)
+{
+	list_add_tail(&mbq->list, &phba->sli.mboxq_cmpl);
+}
+
+/**
  * lpfc_mbox_cmpl_put - Put mailbox command into mailbox command complete list
  * @phba: pointer to lpfc hba data structure.
  * @mbq: pointer to the driver internal queue element for mailbox command.
  *
  * This routine put the completed mailbox command into the mailbox command
- * complete list. This routine is called from driver interrupt handler
- * context.The mailbox complete list is used by the driver worker thread
- * to process mailbox complete callback functions outside the driver interrupt
- * handler.
+ * complete list. This is the locked version of the routine. The mailbox
+ * complete list is used by the driver worker thread to process mailbox
+ * complete callback functions outside the driver interrupt handler.
  **/
 void
-lpfc_mbox_cmpl_put(struct lpfc_hba * phba, LPFC_MBOXQ_t * mbq)
+lpfc_mbox_cmpl_put(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbq)
 {
 	unsigned long iflag;
 
 	/* This function expects to be called from interrupt context */
 	spin_lock_irqsave(&phba->hbalock, iflag);
-	list_add_tail(&mbq->list, &phba->sli.mboxq_cmpl);
+	__lpfc_mbox_cmpl_put(phba, mbq);
 	spin_unlock_irqrestore(&phba->hbalock, iflag);
 	return;
 }
 
 /**
+ * lpfc_mbox_cmd_check - Check the validality of a mailbox command
+ * @phba: pointer to lpfc hba data structure.
+ * @mboxq: pointer to the driver internal queue element for mailbox command.
+ *
+ * This routine is to check whether a mailbox command is valid to be issued.
+ * This check will be performed by both the mailbox issue API when a client
+ * is to issue a mailbox command to the mailbox transport.
+ *
+ * Return 0 - pass the check, -ENODEV - fail the check
+ **/
+int
+lpfc_mbox_cmd_check(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
+{
+	/* Mailbox command that have a completion handler must also have a
+	 * vport specified.
+	 */
+	if (mboxq->mbox_cmpl && mboxq->mbox_cmpl != lpfc_sli_def_mbox_cmpl &&
+	    mboxq->mbox_cmpl != lpfc_sli_wake_mbox_wait) {
+		if (!mboxq->vport) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_VPORT,
+					"1814 Mbox x%x failed, no vport\n",
+					mboxq->u.mb.mbxCommand);
+			dump_stack();
+			return -ENODEV;
+		}
+	}
+	return 0;
+}
+
+/**
+ * lpfc_mbox_dev_check - Check the device state for issuing a mailbox command
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is to check whether the HBA device is ready for posting a
+ * mailbox command. It is used by the mailbox transport API at the time the
+ * to post a mailbox command to the device.
+ *
+ * Return 0 - pass the check, -ENODEV - fail the check
+ **/
+int
+lpfc_mbox_dev_check(struct lpfc_hba *phba)
+{
+	/* If the PCI channel is in offline state, do not issue mbox */
+	if (unlikely(pci_channel_offline(phba->pcidev)))
+		return -ENODEV;
+
+	/* If the HBA is in error state, do not issue mbox */
+	if (phba->link_state == LPFC_HBA_ERROR)
+		return -ENODEV;
+
+	return 0;
+}
+
+/**
  * lpfc_mbox_tmo_val - Retrieve mailbox command timeout value
  * @phba: pointer to lpfc hba data structure.
  * @cmd: mailbox command code.
@@ -1350,6 +1469,475 @@
 	case MBX_WRITE_WWN:     /* 0x98 */
 	case MBX_LOAD_EXP_ROM:	/* 0x9C */
 		return LPFC_MBOX_TMO_FLASH_CMD;
+	case MBX_SLI4_CONFIG:	/* 0x9b */
+		return LPFC_MBOX_SLI4_CONFIG_TMO;
 	}
 	return LPFC_MBOX_TMO;
 }
+
+/**
+ * lpfc_sli4_mbx_sge_set - Set a sge entry in non-embedded mailbox command
+ * @mbox: pointer to lpfc mbox command.
+ * @sgentry: sge entry index.
+ * @phyaddr: physical address for the sge
+ * @length: Length of the sge.
+ *
+ * This routine sets up an entry in the non-embedded mailbox command at the sge
+ * index location.
+ **/
+void
+lpfc_sli4_mbx_sge_set(struct lpfcMboxq *mbox, uint32_t sgentry,
+		      dma_addr_t phyaddr, uint32_t length)
+{
+	struct lpfc_mbx_nembed_cmd *nembed_sge;
+
+	nembed_sge = (struct lpfc_mbx_nembed_cmd *)
+				&mbox->u.mqe.un.nembed_cmd;
+	nembed_sge->sge[sgentry].pa_lo = putPaddrLow(phyaddr);
+	nembed_sge->sge[sgentry].pa_hi = putPaddrHigh(phyaddr);
+	nembed_sge->sge[sgentry].length = length;
+}
+
+/**
+ * lpfc_sli4_mbx_sge_get - Get a sge entry from non-embedded mailbox command
+ * @mbox: pointer to lpfc mbox command.
+ * @sgentry: sge entry index.
+ *
+ * This routine gets an entry from the non-embedded mailbox command at the sge
+ * index location.
+ **/
+void
+lpfc_sli4_mbx_sge_get(struct lpfcMboxq *mbox, uint32_t sgentry,
+		      struct lpfc_mbx_sge *sge)
+{
+	struct lpfc_mbx_nembed_cmd *nembed_sge;
+
+	nembed_sge = (struct lpfc_mbx_nembed_cmd *)
+				&mbox->u.mqe.un.nembed_cmd;
+	sge->pa_lo = nembed_sge->sge[sgentry].pa_lo;
+	sge->pa_hi = nembed_sge->sge[sgentry].pa_hi;
+	sge->length = nembed_sge->sge[sgentry].length;
+}
+
+/**
+ * lpfc_sli4_mbox_cmd_free - Free a sli4 mailbox command
+ * @phba: pointer to lpfc hba data structure.
+ * @mbox: pointer to lpfc mbox command.
+ *
+ * This routine frees SLI4 specific mailbox command for sending IOCTL command.
+ **/
+void
+lpfc_sli4_mbox_cmd_free(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
+{
+	struct lpfc_mbx_sli4_config *sli4_cfg;
+	struct lpfc_mbx_sge sge;
+	dma_addr_t phyaddr;
+	uint32_t sgecount, sgentry;
+
+	sli4_cfg = &mbox->u.mqe.un.sli4_config;
+
+	/* For embedded mbox command, just free the mbox command */
+	if (bf_get(lpfc_mbox_hdr_emb, &sli4_cfg->header.cfg_mhdr)) {
+		mempool_free(mbox, phba->mbox_mem_pool);
+		return;
+	}
+
+	/* For non-embedded mbox command, we need to free the pages first */
+	sgecount = bf_get(lpfc_mbox_hdr_sge_cnt, &sli4_cfg->header.cfg_mhdr);
+	/* There is nothing we can do if there is no sge address array */
+	if (unlikely(!mbox->sge_array)) {
+		mempool_free(mbox, phba->mbox_mem_pool);
+		return;
+	}
+	/* Each non-embedded DMA memory was allocated in the length of a page */
+	for (sgentry = 0; sgentry < sgecount; sgentry++) {
+		lpfc_sli4_mbx_sge_get(mbox, sgentry, &sge);
+		phyaddr = getPaddr(sge.pa_hi, sge.pa_lo);
+		dma_free_coherent(&phba->pcidev->dev, PAGE_SIZE,
+				  mbox->sge_array->addr[sgentry], phyaddr);
+	}
+	/* Free the sge address array memory */
+	kfree(mbox->sge_array);
+	/* Finally, free the mailbox command itself */
+	mempool_free(mbox, phba->mbox_mem_pool);
+}
+
+/**
+ * lpfc_sli4_config - Initialize the  SLI4 Config Mailbox command
+ * @phba: pointer to lpfc hba data structure.
+ * @mbox: pointer to lpfc mbox command.
+ * @subsystem: The sli4 config sub mailbox subsystem.
+ * @opcode: The sli4 config sub mailbox command opcode.
+ * @length: Length of the sli4 config mailbox command.
+ *
+ * This routine sets up the header fields of SLI4 specific mailbox command
+ * for sending IOCTL command.
+ *
+ * Return: the actual length of the mbox command allocated (mostly useful
+ *         for none embedded mailbox command).
+ **/
+int
+lpfc_sli4_config(struct lpfc_hba *phba, struct lpfcMboxq *mbox,
+		 uint8_t subsystem, uint8_t opcode, uint32_t length, bool emb)
+{
+	struct lpfc_mbx_sli4_config *sli4_config;
+	union lpfc_sli4_cfg_shdr *cfg_shdr = NULL;
+	uint32_t alloc_len;
+	uint32_t resid_len;
+	uint32_t pagen, pcount;
+	void *viraddr;
+	dma_addr_t phyaddr;
+
+	/* Set up SLI4 mailbox command header fields */
+	memset(mbox, 0, sizeof(*mbox));
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_SLI4_CONFIG);
+
+	/* Set up SLI4 ioctl command header fields */
+	sli4_config = &mbox->u.mqe.un.sli4_config;
+
+	/* Setup for the embedded mbox command */
+	if (emb) {
+		/* Set up main header fields */
+		bf_set(lpfc_mbox_hdr_emb, &sli4_config->header.cfg_mhdr, 1);
+		sli4_config->header.cfg_mhdr.payload_length =
+					LPFC_MBX_CMD_HDR_LENGTH + length;
+		/* Set up sub-header fields following main header */
+		bf_set(lpfc_mbox_hdr_opcode,
+			&sli4_config->header.cfg_shdr.request, opcode);
+		bf_set(lpfc_mbox_hdr_subsystem,
+			&sli4_config->header.cfg_shdr.request, subsystem);
+		sli4_config->header.cfg_shdr.request.request_length = length;
+		return length;
+	}
+
+	/* Setup for the none-embedded mbox command */
+	pcount = (PAGE_ALIGN(length))/PAGE_SIZE;
+	pcount = (pcount > LPFC_SLI4_MBX_SGE_MAX_PAGES) ?
+				LPFC_SLI4_MBX_SGE_MAX_PAGES : pcount;
+	/* Allocate record for keeping SGE virtual addresses */
+	mbox->sge_array = kmalloc(sizeof(struct lpfc_mbx_nembed_sge_virt),
+				  GFP_KERNEL);
+	if (!mbox->sge_array)
+		return 0;
+
+	for (pagen = 0, alloc_len = 0; pagen < pcount; pagen++) {
+		/* The DMA memory is always allocated in the length of a
+		 * page even though the last SGE might not fill up to a
+		 * page, this is used as a priori size of PAGE_SIZE for
+		 * the later DMA memory free.
+		 */
+		viraddr = dma_alloc_coherent(&phba->pcidev->dev, PAGE_SIZE,
+					     &phyaddr, GFP_KERNEL);
+		/* In case of malloc fails, proceed with whatever we have */
+		if (!viraddr)
+			break;
+		mbox->sge_array->addr[pagen] = viraddr;
+		/* Keep the first page for later sub-header construction */
+		if (pagen == 0)
+			cfg_shdr = (union lpfc_sli4_cfg_shdr *)viraddr;
+		resid_len = length - alloc_len;
+		if (resid_len > PAGE_SIZE) {
+			lpfc_sli4_mbx_sge_set(mbox, pagen, phyaddr,
+					      PAGE_SIZE);
+			alloc_len += PAGE_SIZE;
+		} else {
+			lpfc_sli4_mbx_sge_set(mbox, pagen, phyaddr,
+					      resid_len);
+			alloc_len = length;
+		}
+	}
+
+	/* Set up main header fields in mailbox command */
+	sli4_config->header.cfg_mhdr.payload_length = alloc_len;
+	bf_set(lpfc_mbox_hdr_sge_cnt, &sli4_config->header.cfg_mhdr, pagen);
+
+	/* Set up sub-header fields into the first page */
+	if (pagen > 0) {
+		bf_set(lpfc_mbox_hdr_opcode, &cfg_shdr->request, opcode);
+		bf_set(lpfc_mbox_hdr_subsystem, &cfg_shdr->request, subsystem);
+		cfg_shdr->request.request_length =
+				alloc_len - sizeof(union  lpfc_sli4_cfg_shdr);
+	}
+	/* The sub-header is in DMA memory, which needs endian converstion */
+	lpfc_sli_pcimem_bcopy(cfg_shdr, cfg_shdr,
+			      sizeof(union  lpfc_sli4_cfg_shdr));
+
+	return alloc_len;
+}
+
+/**
+ * lpfc_sli4_mbox_opcode_get - Get the opcode from a sli4 mailbox command
+ * @phba: pointer to lpfc hba data structure.
+ * @mbox: pointer to lpfc mbox command.
+ *
+ * This routine gets the opcode from a SLI4 specific mailbox command for
+ * sending IOCTL command. If the mailbox command is not MBX_SLI4_CONFIG
+ * (0x9B) or if the IOCTL sub-header is not present, opcode 0x0 shall be
+ * returned.
+ **/
+uint8_t
+lpfc_sli4_mbox_opcode_get(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
+{
+	struct lpfc_mbx_sli4_config *sli4_cfg;
+	union lpfc_sli4_cfg_shdr *cfg_shdr;
+
+	if (mbox->u.mb.mbxCommand != MBX_SLI4_CONFIG)
+		return 0;
+	sli4_cfg = &mbox->u.mqe.un.sli4_config;
+
+	/* For embedded mbox command, get opcode from embedded sub-header*/
+	if (bf_get(lpfc_mbox_hdr_emb, &sli4_cfg->header.cfg_mhdr)) {
+		cfg_shdr = &mbox->u.mqe.un.sli4_config.header.cfg_shdr;
+		return bf_get(lpfc_mbox_hdr_opcode, &cfg_shdr->request);
+	}
+
+	/* For non-embedded mbox command, get opcode from first dma page */
+	if (unlikely(!mbox->sge_array))
+		return 0;
+	cfg_shdr = (union lpfc_sli4_cfg_shdr *)mbox->sge_array->addr[0];
+	return bf_get(lpfc_mbox_hdr_opcode, &cfg_shdr->request);
+}
+
+/**
+ * lpfc_request_features: Configure SLI4 REQUEST_FEATURES mailbox
+ * @mboxq: pointer to lpfc mbox command.
+ *
+ * This routine sets up the mailbox for an SLI4 REQUEST_FEATURES
+ * mailbox command.
+ **/
+void
+lpfc_request_features(struct lpfc_hba *phba, struct lpfcMboxq *mboxq)
+{
+	/* Set up SLI4 mailbox command header fields */
+	memset(mboxq, 0, sizeof(LPFC_MBOXQ_t));
+	bf_set(lpfc_mqe_command, &mboxq->u.mqe, MBX_SLI4_REQ_FTRS);
+
+	/* Set up host requested features. */
+	bf_set(lpfc_mbx_rq_ftr_rq_fcpi, &mboxq->u.mqe.un.req_ftrs, 1);
+
+	/* Virtual fabrics and FIPs are not supported yet. */
+	bf_set(lpfc_mbx_rq_ftr_rq_ifip, &mboxq->u.mqe.un.req_ftrs, 0);
+
+	/* Enable DIF (block guard) only if configured to do so. */
+	if (phba->cfg_enable_bg)
+		bf_set(lpfc_mbx_rq_ftr_rq_dif, &mboxq->u.mqe.un.req_ftrs, 1);
+
+	/* Enable NPIV only if configured to do so. */
+	if (phba->max_vpi && phba->cfg_enable_npiv)
+		bf_set(lpfc_mbx_rq_ftr_rq_npiv, &mboxq->u.mqe.un.req_ftrs, 1);
+
+	return;
+}
+
+/**
+ * lpfc_init_vfi - Initialize the INIT_VFI mailbox command
+ * @mbox: pointer to lpfc mbox command to initialize.
+ * @vport: Vport associated with the VF.
+ *
+ * This routine initializes @mbox to all zeros and then fills in the mailbox
+ * fields from @vport. INIT_VFI configures virtual fabrics identified by VFI
+ * in the context of an FCF. The driver issues this command to setup a VFI
+ * before issuing a FLOGI to login to the VSAN. The driver should also issue a
+ * REG_VFI after a successful VSAN login.
+ **/
+void
+lpfc_init_vfi(struct lpfcMboxq *mbox, struct lpfc_vport *vport)
+{
+	struct lpfc_mbx_init_vfi *init_vfi;
+
+	memset(mbox, 0, sizeof(*mbox));
+	init_vfi = &mbox->u.mqe.un.init_vfi;
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_INIT_VFI);
+	bf_set(lpfc_init_vfi_vr, init_vfi, 1);
+	bf_set(lpfc_init_vfi_vt, init_vfi, 1);
+	bf_set(lpfc_init_vfi_vfi, init_vfi, vport->vfi + vport->phba->vfi_base);
+	bf_set(lpfc_init_vfi_fcfi, init_vfi, vport->phba->fcf.fcfi);
+}
+
+/**
+ * lpfc_reg_vfi - Initialize the REG_VFI mailbox command
+ * @mbox: pointer to lpfc mbox command to initialize.
+ * @vport: vport associated with the VF.
+ * @phys: BDE DMA bus address used to send the service parameters to the HBA.
+ *
+ * This routine initializes @mbox to all zeros and then fills in the mailbox
+ * fields from @vport, and uses @buf as a DMAable buffer to send the vport's
+ * fc service parameters to the HBA for this VFI. REG_VFI configures virtual
+ * fabrics identified by VFI in the context of an FCF.
+ **/
+void
+lpfc_reg_vfi(struct lpfcMboxq *mbox, struct lpfc_vport *vport, dma_addr_t phys)
+{
+	struct lpfc_mbx_reg_vfi *reg_vfi;
+
+	memset(mbox, 0, sizeof(*mbox));
+	reg_vfi = &mbox->u.mqe.un.reg_vfi;
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_REG_VFI);
+	bf_set(lpfc_reg_vfi_vp, reg_vfi, 1);
+	bf_set(lpfc_reg_vfi_vfi, reg_vfi, vport->vfi + vport->phba->vfi_base);
+	bf_set(lpfc_reg_vfi_fcfi, reg_vfi, vport->phba->fcf.fcfi);
+	bf_set(lpfc_reg_vfi_vpi, reg_vfi, vport->vpi + vport->phba->vpi_base);
+	reg_vfi->bde.addrHigh = putPaddrHigh(phys);
+	reg_vfi->bde.addrLow = putPaddrLow(phys);
+	reg_vfi->bde.tus.f.bdeSize = sizeof(vport->fc_sparam);
+	reg_vfi->bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+	bf_set(lpfc_reg_vfi_nport_id, reg_vfi, vport->fc_myDID);
+}
+
+/**
+ * lpfc_init_vpi - Initialize the INIT_VPI mailbox command
+ * @mbox: pointer to lpfc mbox command to initialize.
+ * @vpi: VPI to be initialized.
+ *
+ * The INIT_VPI mailbox command supports virtual N_Ports. The driver uses the
+ * command to activate a virtual N_Port. The HBA assigns a MAC address to use
+ * with the virtual N Port.  The SLI Host issues this command before issuing a
+ * FDISC to connect to the Fabric. The SLI Host should issue a REG_VPI after a
+ * successful virtual NPort login.
+ **/
+void
+lpfc_init_vpi(struct lpfcMboxq *mbox, uint16_t vpi)
+{
+	memset(mbox, 0, sizeof(*mbox));
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_INIT_VPI);
+	bf_set(lpfc_init_vpi_vpi, &mbox->u.mqe.un.init_vpi, vpi);
+}
+
+/**
+ * lpfc_unreg_vfi - Initialize the UNREG_VFI mailbox command
+ * @mbox: pointer to lpfc mbox command to initialize.
+ * @vfi: VFI to be unregistered.
+ *
+ * The UNREG_VFI mailbox command causes the SLI Host to put a virtual fabric
+ * (logical NPort) into the inactive state. The SLI Host must have logged out
+ * and unregistered all remote N_Ports to abort any activity on the virtual
+ * fabric. The SLI Port posts the mailbox response after marking the virtual
+ * fabric inactive.
+ **/
+void
+lpfc_unreg_vfi(struct lpfcMboxq *mbox, uint16_t vfi)
+{
+	memset(mbox, 0, sizeof(*mbox));
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_UNREG_VFI);
+	bf_set(lpfc_unreg_vfi_vfi, &mbox->u.mqe.un.unreg_vfi, vfi);
+}
+
+/**
+ * lpfc_dump_fcoe_param - Dump config region 23 to get FCoe parameters.
+ * @phba: pointer to the hba structure containing.
+ * @mbox: pointer to lpfc mbox command to initialize.
+ *
+ * This function create a SLI4 dump mailbox command to dump FCoE
+ * parameters stored in region 23.
+ **/
+int
+lpfc_dump_fcoe_param(struct lpfc_hba *phba,
+		struct lpfcMboxq *mbox)
+{
+	struct lpfc_dmabuf *mp = NULL;
+	MAILBOX_t *mb;
+
+	memset(mbox, 0, sizeof(*mbox));
+	mb = &mbox->u.mb;
+
+	mp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (mp)
+		mp->virt = lpfc_mbuf_alloc(phba, 0, &mp->phys);
+
+	if (!mp || !mp->virt) {
+		kfree(mp);
+		/* dump_fcoe_param failed to allocate memory */
+		lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX,
+			"2569 lpfc_dump_fcoe_param: memory"
+			" allocation failed \n");
+		return 1;
+	}
+
+	memset(mp->virt, 0, LPFC_BPL_SIZE);
+	INIT_LIST_HEAD(&mp->list);
+
+	/* save address for completion */
+	mbox->context1 = (uint8_t *) mp;
+
+	mb->mbxCommand = MBX_DUMP_MEMORY;
+	mb->un.varDmp.type = DMP_NV_PARAMS;
+	mb->un.varDmp.region_id = DMP_REGION_FCOEPARAM;
+	mb->un.varDmp.sli4_length = DMP_FCOEPARAM_RGN_SIZE;
+	mb->un.varWords[3] = putPaddrLow(mp->phys);
+	mb->un.varWords[4] = putPaddrHigh(mp->phys);
+	return 0;
+}
+
+/**
+ * lpfc_reg_fcfi - Initialize the REG_FCFI mailbox command
+ * @phba: pointer to the hba structure containing the FCF index and RQ ID.
+ * @mbox: pointer to lpfc mbox command to initialize.
+ *
+ * The REG_FCFI mailbox command supports Fibre Channel Forwarders (FCFs). The
+ * SLI Host uses the command to activate an FCF after it has acquired FCF
+ * information via a READ_FCF mailbox command. This mailbox command also is used
+ * to indicate where received unsolicited frames from this FCF will be sent. By
+ * default this routine will set up the FCF to forward all unsolicited frames
+ * the the RQ ID passed in the @phba. This can be overridden by the caller for
+ * more complicated setups.
+ **/
+void
+lpfc_reg_fcfi(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
+{
+	struct lpfc_mbx_reg_fcfi *reg_fcfi;
+
+	memset(mbox, 0, sizeof(*mbox));
+	reg_fcfi = &mbox->u.mqe.un.reg_fcfi;
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_REG_FCFI);
+	bf_set(lpfc_reg_fcfi_rq_id0, reg_fcfi, phba->sli4_hba.hdr_rq->queue_id);
+	bf_set(lpfc_reg_fcfi_rq_id1, reg_fcfi, REG_FCF_INVALID_QID);
+	bf_set(lpfc_reg_fcfi_rq_id2, reg_fcfi, REG_FCF_INVALID_QID);
+	bf_set(lpfc_reg_fcfi_rq_id3, reg_fcfi, REG_FCF_INVALID_QID);
+	bf_set(lpfc_reg_fcfi_info_index, reg_fcfi, phba->fcf.fcf_indx);
+	/* reg_fcf addr mode is bit wise inverted value of fcf addr_mode */
+	bf_set(lpfc_reg_fcfi_mam, reg_fcfi,
+		(~phba->fcf.addr_mode) & 0x3);
+	if (phba->fcf.fcf_flag & FCF_VALID_VLAN) {
+		bf_set(lpfc_reg_fcfi_vv, reg_fcfi, 1);
+		bf_set(lpfc_reg_fcfi_vlan_tag, reg_fcfi, phba->fcf.vlan_id);
+	}
+}
+
+/**
+ * lpfc_unreg_fcfi - Initialize the UNREG_FCFI mailbox command
+ * @mbox: pointer to lpfc mbox command to initialize.
+ * @fcfi: FCFI to be unregistered.
+ *
+ * The UNREG_FCFI mailbox command supports Fibre Channel Forwarders (FCFs).
+ * The SLI Host uses the command to inactivate an FCFI.
+ **/
+void
+lpfc_unreg_fcfi(struct lpfcMboxq *mbox, uint16_t fcfi)
+{
+	memset(mbox, 0, sizeof(*mbox));
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_UNREG_FCFI);
+	bf_set(lpfc_unreg_fcfi, &mbox->u.mqe.un.unreg_fcfi, fcfi);
+}
+
+/**
+ * lpfc_resume_rpi - Initialize the RESUME_RPI mailbox command
+ * @mbox: pointer to lpfc mbox command to initialize.
+ * @ndlp: The nodelist structure that describes the RPI to resume.
+ *
+ * The RESUME_RPI mailbox command is used to restart I/O to an RPI after a
+ * link event.
+ **/
+void
+lpfc_resume_rpi(struct lpfcMboxq *mbox, struct lpfc_nodelist *ndlp)
+{
+	struct lpfc_mbx_resume_rpi *resume_rpi;
+
+	memset(mbox, 0, sizeof(*mbox));
+	resume_rpi = &mbox->u.mqe.un.resume_rpi;
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_RESUME_RPI);
+	bf_set(lpfc_resume_rpi_rpi, resume_rpi, ndlp->nlp_rpi);
+	bf_set(lpfc_resume_rpi_vpi, resume_rpi,
+	       ndlp->vport->vpi + ndlp->vport->phba->vpi_base);
+	bf_set(lpfc_resume_rpi_vfi, resume_rpi,
+	       ndlp->vport->vfi + ndlp->vport->phba->vfi_base);
+}
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 35a9767..e198c91 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -28,8 +28,10 @@
 
 #include <scsi/scsi.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -45,7 +47,7 @@
  * @phba: HBA to allocate pools for
  *
  * Description: Creates and allocates PCI pools lpfc_scsi_dma_buf_pool,
- * lpfc_mbuf_pool, lpfc_hbq_pool.  Creates and allocates kmalloc-backed mempools
+ * lpfc_mbuf_pool, lpfc_hrb_pool.  Creates and allocates kmalloc-backed mempools
  * for LPFC_MBOXQ_t and lpfc_nodelist.  Also allocates the VPI bitmask.
  *
  * Notes: Not interrupt-safe.  Must be called with no locks held.  If any
@@ -56,19 +58,30 @@
  *   -ENOMEM on failure (if any memory allocations fail)
  **/
 int
-lpfc_mem_alloc(struct lpfc_hba * phba)
+lpfc_mem_alloc(struct lpfc_hba *phba, int align)
 {
 	struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
 	int longs;
 	int i;
 
-	phba->lpfc_scsi_dma_buf_pool = pci_pool_create("lpfc_scsi_dma_buf_pool",
-				phba->pcidev, phba->cfg_sg_dma_buf_size, 8, 0);
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		phba->lpfc_scsi_dma_buf_pool =
+			pci_pool_create("lpfc_scsi_dma_buf_pool",
+				phba->pcidev,
+				phba->cfg_sg_dma_buf_size,
+				phba->cfg_sg_dma_buf_size,
+				0);
+	else
+		phba->lpfc_scsi_dma_buf_pool =
+			pci_pool_create("lpfc_scsi_dma_buf_pool",
+				phba->pcidev, phba->cfg_sg_dma_buf_size,
+				align, 0);
 	if (!phba->lpfc_scsi_dma_buf_pool)
 		goto fail;
 
 	phba->lpfc_mbuf_pool = pci_pool_create("lpfc_mbuf_pool", phba->pcidev,
-							LPFC_BPL_SIZE, 8,0);
+							LPFC_BPL_SIZE,
+							align, 0);
 	if (!phba->lpfc_mbuf_pool)
 		goto fail_free_dma_buf_pool;
 
@@ -97,23 +110,31 @@
 						sizeof(struct lpfc_nodelist));
 	if (!phba->nlp_mem_pool)
 		goto fail_free_mbox_pool;
-
-	phba->lpfc_hbq_pool = pci_pool_create("lpfc_hbq_pool",phba->pcidev,
-					      LPFC_BPL_SIZE, 8, 0);
-	if (!phba->lpfc_hbq_pool)
+	phba->lpfc_hrb_pool = pci_pool_create("lpfc_hrb_pool",
+					      phba->pcidev,
+					      LPFC_HDR_BUF_SIZE, align, 0);
+	if (!phba->lpfc_hrb_pool)
 		goto fail_free_nlp_mem_pool;
+	phba->lpfc_drb_pool = pci_pool_create("lpfc_drb_pool",
+					      phba->pcidev,
+					      LPFC_DATA_BUF_SIZE, align, 0);
+	if (!phba->lpfc_drb_pool)
+		goto fail_free_hbq_pool;
 
 	/* vpi zero is reserved for the physical port so add 1 to max */
 	longs = ((phba->max_vpi + 1) + BITS_PER_LONG - 1) / BITS_PER_LONG;
 	phba->vpi_bmask = kzalloc(longs * sizeof(unsigned long), GFP_KERNEL);
 	if (!phba->vpi_bmask)
-		goto fail_free_hbq_pool;
+		goto fail_free_dbq_pool;
 
 	return 0;
 
+ fail_free_dbq_pool:
+	pci_pool_destroy(phba->lpfc_drb_pool);
+	phba->lpfc_drb_pool = NULL;
  fail_free_hbq_pool:
-	lpfc_sli_hbqbuf_free_all(phba);
-	pci_pool_destroy(phba->lpfc_hbq_pool);
+	pci_pool_destroy(phba->lpfc_hrb_pool);
+	phba->lpfc_hrb_pool = NULL;
  fail_free_nlp_mem_pool:
 	mempool_destroy(phba->nlp_mem_pool);
 	phba->nlp_mem_pool = NULL;
@@ -136,27 +157,73 @@
 }
 
 /**
- * lpfc_mem_free - Frees all PCI and memory allocated by lpfc_mem_alloc
+ * lpfc_mem_free - Frees memory allocated by lpfc_mem_alloc
  * @phba: HBA to free memory for
  *
- * Description: Frees PCI pools lpfc_scsi_dma_buf_pool, lpfc_mbuf_pool,
- * lpfc_hbq_pool.  Frees kmalloc-backed mempools for LPFC_MBOXQ_t and
- * lpfc_nodelist.  Also frees the VPI bitmask
+ * Description: Free the memory allocated by lpfc_mem_alloc routine. This
+ * routine is a the counterpart of lpfc_mem_alloc.
  *
  * Returns: None
  **/
 void
-lpfc_mem_free(struct lpfc_hba * phba)
+lpfc_mem_free(struct lpfc_hba *phba)
+{
+	int i;
+	struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
+
+	/* Free VPI bitmask memory */
+	kfree(phba->vpi_bmask);
+
+	/* Free HBQ pools */
+	lpfc_sli_hbqbuf_free_all(phba);
+	pci_pool_destroy(phba->lpfc_drb_pool);
+	phba->lpfc_drb_pool = NULL;
+	pci_pool_destroy(phba->lpfc_hrb_pool);
+	phba->lpfc_hrb_pool = NULL;
+
+	/* Free NLP memory pool */
+	mempool_destroy(phba->nlp_mem_pool);
+	phba->nlp_mem_pool = NULL;
+
+	/* Free mbox memory pool */
+	mempool_destroy(phba->mbox_mem_pool);
+	phba->mbox_mem_pool = NULL;
+
+	/* Free MBUF memory pool */
+	for (i = 0; i < pool->current_count; i++)
+		pci_pool_free(phba->lpfc_mbuf_pool, pool->elements[i].virt,
+			      pool->elements[i].phys);
+	kfree(pool->elements);
+
+	pci_pool_destroy(phba->lpfc_mbuf_pool);
+	phba->lpfc_mbuf_pool = NULL;
+
+	/* Free DMA buffer memory pool */
+	pci_pool_destroy(phba->lpfc_scsi_dma_buf_pool);
+	phba->lpfc_scsi_dma_buf_pool = NULL;
+
+	return;
+}
+
+/**
+ * lpfc_mem_free_all - Frees all PCI and driver memory
+ * @phba: HBA to free memory for
+ *
+ * Description: Free memory from PCI and driver memory pools and also those
+ * used : lpfc_scsi_dma_buf_pool, lpfc_mbuf_pool, lpfc_hrb_pool. Frees
+ * kmalloc-backed mempools for LPFC_MBOXQ_t and lpfc_nodelist. Also frees
+ * the VPI bitmask.
+ *
+ * Returns: None
+ **/
+void
+lpfc_mem_free_all(struct lpfc_hba *phba)
 {
 	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
 	LPFC_MBOXQ_t *mbox, *next_mbox;
 	struct lpfc_dmabuf   *mp;
-	int i;
 
-	kfree(phba->vpi_bmask);
-	lpfc_sli_hbqbuf_free_all(phba);
-
+	/* Free memory used in mailbox queue back to mailbox memory pool */
 	list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq, list) {
 		mp = (struct lpfc_dmabuf *) (mbox->context1);
 		if (mp) {
@@ -166,6 +233,7 @@
 		list_del(&mbox->list);
 		mempool_free(mbox, phba->mbox_mem_pool);
 	}
+	/* Free memory used in mailbox cmpl list back to mailbox memory pool */
 	list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq_cmpl, list) {
 		mp = (struct lpfc_dmabuf *) (mbox->context1);
 		if (mp) {
@@ -175,8 +243,10 @@
 		list_del(&mbox->list);
 		mempool_free(mbox, phba->mbox_mem_pool);
 	}
-
+	/* Free the active mailbox command back to the mailbox memory pool */
+	spin_lock_irq(&phba->hbalock);
 	psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+	spin_unlock_irq(&phba->hbalock);
 	if (psli->mbox_active) {
 		mbox = psli->mbox_active;
 		mp = (struct lpfc_dmabuf *) (mbox->context1);
@@ -188,27 +258,14 @@
 		psli->mbox_active = NULL;
 	}
 
-	for (i = 0; i < pool->current_count; i++)
-		pci_pool_free(phba->lpfc_mbuf_pool, pool->elements[i].virt,
-						 pool->elements[i].phys);
-	kfree(pool->elements);
-
-	pci_pool_destroy(phba->lpfc_hbq_pool);
-	mempool_destroy(phba->nlp_mem_pool);
-	mempool_destroy(phba->mbox_mem_pool);
-
-	pci_pool_destroy(phba->lpfc_scsi_dma_buf_pool);
-	pci_pool_destroy(phba->lpfc_mbuf_pool);
-
-	phba->lpfc_hbq_pool = NULL;
-	phba->nlp_mem_pool = NULL;
-	phba->mbox_mem_pool = NULL;
-	phba->lpfc_scsi_dma_buf_pool = NULL;
-	phba->lpfc_mbuf_pool = NULL;
+	/* Free and destroy all the allocated memory pools */
+	lpfc_mem_free(phba);
 
 	/* Free the iocb lookup array */
 	kfree(psli->iocbq_lookup);
 	psli->iocbq_lookup = NULL;
+
+	return;
 }
 
 /**
@@ -305,7 +362,7 @@
  * lpfc_els_hbq_alloc - Allocate an HBQ buffer
  * @phba: HBA to allocate HBQ buffer for
  *
- * Description: Allocates a DMA-mapped HBQ buffer from the lpfc_hbq_pool PCI
+ * Description: Allocates a DMA-mapped HBQ buffer from the lpfc_hrb_pool PCI
  * pool along a non-DMA-mapped container for it.
  *
  * Notes: Not interrupt-safe.  Must be called with no locks held.
@@ -323,7 +380,7 @@
 	if (!hbqbp)
 		return NULL;
 
-	hbqbp->dbuf.virt = pci_pool_alloc(phba->lpfc_hbq_pool, GFP_KERNEL,
+	hbqbp->dbuf.virt = pci_pool_alloc(phba->lpfc_hrb_pool, GFP_KERNEL,
 					  &hbqbp->dbuf.phys);
 	if (!hbqbp->dbuf.virt) {
 		kfree(hbqbp);
@@ -334,7 +391,7 @@
 }
 
 /**
- * lpfc_mem_hbq_free - Frees an HBQ buffer allocated with lpfc_els_hbq_alloc
+ * lpfc_els_hbq_free - Frees an HBQ buffer allocated with lpfc_els_hbq_alloc
  * @phba: HBA buffer was allocated for
  * @hbqbp: HBQ container returned by lpfc_els_hbq_alloc
  *
@@ -348,12 +405,73 @@
 void
 lpfc_els_hbq_free(struct lpfc_hba *phba, struct hbq_dmabuf *hbqbp)
 {
-	pci_pool_free(phba->lpfc_hbq_pool, hbqbp->dbuf.virt, hbqbp->dbuf.phys);
+	pci_pool_free(phba->lpfc_hrb_pool, hbqbp->dbuf.virt, hbqbp->dbuf.phys);
 	kfree(hbqbp);
 	return;
 }
 
 /**
+ * lpfc_sli4_rb_alloc - Allocate an SLI4 Receive buffer
+ * @phba: HBA to allocate a receive buffer for
+ *
+ * Description: Allocates a DMA-mapped receive buffer from the lpfc_hrb_pool PCI
+ * pool along a non-DMA-mapped container for it.
+ *
+ * Notes: Not interrupt-safe.  Must be called with no locks held.
+ *
+ * Returns:
+ *   pointer to HBQ on success
+ *   NULL on failure
+ **/
+struct hbq_dmabuf *
+lpfc_sli4_rb_alloc(struct lpfc_hba *phba)
+{
+	struct hbq_dmabuf *dma_buf;
+
+	dma_buf = kmalloc(sizeof(struct hbq_dmabuf), GFP_KERNEL);
+	if (!dma_buf)
+		return NULL;
+
+	dma_buf->hbuf.virt = pci_pool_alloc(phba->lpfc_hrb_pool, GFP_KERNEL,
+					    &dma_buf->hbuf.phys);
+	if (!dma_buf->hbuf.virt) {
+		kfree(dma_buf);
+		return NULL;
+	}
+	dma_buf->dbuf.virt = pci_pool_alloc(phba->lpfc_drb_pool, GFP_KERNEL,
+					    &dma_buf->dbuf.phys);
+	if (!dma_buf->dbuf.virt) {
+		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
+			      dma_buf->hbuf.phys);
+		kfree(dma_buf);
+		return NULL;
+	}
+	dma_buf->size = LPFC_BPL_SIZE;
+	return dma_buf;
+}
+
+/**
+ * lpfc_sli4_rb_free - Frees a receive buffer
+ * @phba: HBA buffer was allocated for
+ * @dmab: DMA Buffer container returned by lpfc_sli4_hbq_alloc
+ *
+ * Description: Frees both the container and the DMA-mapped buffers returned by
+ * lpfc_sli4_rb_alloc.
+ *
+ * Notes: Can be called with or without locks held.
+ *
+ * Returns: None
+ **/
+void
+lpfc_sli4_rb_free(struct lpfc_hba *phba, struct hbq_dmabuf *dmab)
+{
+	pci_pool_free(phba->lpfc_hrb_pool, dmab->hbuf.virt, dmab->hbuf.phys);
+	pci_pool_free(phba->lpfc_drb_pool, dmab->dbuf.virt, dmab->dbuf.phys);
+	kfree(dmab);
+	return;
+}
+
+/**
  * lpfc_in_buf_free - Free a DMA buffer
  * @phba: HBA buffer is associated with
  * @mp: Buffer to free
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 08cdc77..09f659f 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1,7 +1,7 @@
  /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -28,8 +28,10 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -361,7 +363,7 @@
 	if (!mbox)
 		goto out;
 
-	rc = lpfc_reg_login(phba, vport->vpi, icmd->un.rcvels.remoteID,
+	rc = lpfc_reg_rpi(phba, vport->vpi, icmd->un.rcvels.remoteID,
 			    (uint8_t *) sp, mbox, 0);
 	if (rc) {
 		mempool_free(mbox, phba->mbox_mem_pool);
@@ -495,11 +497,19 @@
 		lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL);
 	else
 		lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL);
+	if ((ndlp->nlp_type & NLP_FABRIC) &&
+		vport->port_type == LPFC_NPIV_PORT) {
+		lpfc_linkdown_port(vport);
+		mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1);
+		spin_lock_irq(shost->host_lock);
+		ndlp->nlp_flag |= NLP_DELAY_TMO;
+		spin_unlock_irq(shost->host_lock);
 
-	if ((!(ndlp->nlp_type & NLP_FABRIC) &&
-	     ((ndlp->nlp_type & NLP_FCP_TARGET) ||
-	      !(ndlp->nlp_type & NLP_FCP_INITIATOR))) ||
-	    (ndlp->nlp_state == NLP_STE_ADISC_ISSUE)) {
+		ndlp->nlp_last_elscmd = ELS_CMD_FDISC;
+	} else if ((!(ndlp->nlp_type & NLP_FABRIC) &&
+		((ndlp->nlp_type & NLP_FCP_TARGET) ||
+		!(ndlp->nlp_type & NLP_FCP_INITIATOR))) ||
+		(ndlp->nlp_state == NLP_STE_ADISC_ISSUE)) {
 		/* Only try to re-login if this is NOT a Fabric Node */
 		mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1);
 		spin_lock_irq(shost->host_lock);
@@ -567,7 +577,7 @@
 {
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 
-	if (!ndlp->nlp_rpi) {
+	if (!(ndlp->nlp_flag & NLP_RPI_VALID)) {
 		ndlp->nlp_flag &= ~NLP_NPR_ADISC;
 		return 0;
 	}
@@ -857,7 +867,7 @@
 
 	lpfc_unreg_rpi(vport, ndlp);
 
-	if (lpfc_reg_login(phba, vport->vpi, irsp->un.elsreq64.remoteID,
+	if (lpfc_reg_rpi(phba, vport->vpi, irsp->un.elsreq64.remoteID,
 			   (uint8_t *) sp, mbox, 0) == 0) {
 		switch (ndlp->nlp_DID) {
 		case NameServer_DID:
@@ -1068,6 +1078,7 @@
 	struct lpfc_iocbq *cmdiocb, *rspiocb;
 	IOCB_t *irsp;
 	ADISC *ap;
+	int rc;
 
 	cmdiocb = (struct lpfc_iocbq *) arg;
 	rspiocb = cmdiocb->context_un.rsp_iocb;
@@ -1093,6 +1104,15 @@
 		return ndlp->nlp_state;
 	}
 
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		rc = lpfc_sli4_resume_rpi(ndlp);
+		if (rc) {
+			/* Stay in state and retry. */
+			ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
+			return ndlp->nlp_state;
+		}
+	}
+
 	if (ndlp->nlp_type & NLP_FCP_TARGET) {
 		ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE);
@@ -1100,6 +1120,7 @@
 		ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
 	}
+
 	return ndlp->nlp_state;
 }
 
@@ -1190,7 +1211,7 @@
 
 	/* cleanup any ndlp on mbox q waiting for reglogin cmpl */
 	if ((mb = phba->sli.mbox_active)) {
-		if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
+		if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
 		   (ndlp == (struct lpfc_nodelist *) mb->context2)) {
 			lpfc_nlp_put(ndlp);
 			mb->context2 = NULL;
@@ -1200,7 +1221,7 @@
 
 	spin_lock_irq(&phba->hbalock);
 	list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
-		if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
+		if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
 		   (ndlp == (struct lpfc_nodelist *) mb->context2)) {
 			mp = (struct lpfc_dmabuf *) (mb->context1);
 			if (mp) {
@@ -1251,7 +1272,7 @@
 {
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 	LPFC_MBOXQ_t *pmb = (LPFC_MBOXQ_t *) arg;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	uint32_t did  = mb->un.varWords[1];
 
 	if (mb->mbxStatus) {
@@ -1283,6 +1304,7 @@
 	}
 
 	ndlp->nlp_rpi = mb->un.varWords[0];
+	ndlp->nlp_flag |= NLP_RPI_VALID;
 
 	/* Only if we are not a fabric nport do we issue PRLI */
 	if (!(ndlp->nlp_type & NLP_FABRIC)) {
@@ -1878,11 +1900,12 @@
 			    void *arg, uint32_t evt)
 {
 	LPFC_MBOXQ_t *pmb = (LPFC_MBOXQ_t *) arg;
-	MAILBOX_t    *mb = &pmb->mb;
+	MAILBOX_t    *mb = &pmb->u.mb;
 
-	if (!mb->mbxStatus)
+	if (!mb->mbxStatus) {
 		ndlp->nlp_rpi = mb->un.varWords[0];
-	else {
+		ndlp->nlp_flag |= NLP_RPI_VALID;
+	} else {
 		if (ndlp->nlp_flag & NLP_NODEV_REMOVE) {
 			lpfc_drop_node(vport, ndlp);
 			return NLP_STE_FREED_NODE;
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 167b66d..e9fa676 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -31,8 +31,10 @@
 #include <scsi/scsi_transport_fc.h>
 
 #include "lpfc_version.h"
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -57,6 +59,8 @@
 	"SCSI_PROT_READ_CONVERT",
 	"SCSI_PROT_WRITE_CONVERT"
 };
+static void
+lpfc_release_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb);
 
 static void
 lpfc_debug_save_data(struct scsi_cmnd *cmnd)
@@ -325,7 +329,7 @@
 
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			shost = lpfc_shost_from_vport(vports[i]);
 			shost_for_each_device(sdev, shost) {
 				new_queue_depth =
@@ -379,7 +383,7 @@
 
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			shost = lpfc_shost_from_vport(vports[i]);
 			shost_for_each_device(sdev, shost) {
 				if (vports[i]->cfg_lun_queue_depth <=
@@ -427,7 +431,7 @@
 
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for (i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			shost = lpfc_shost_from_vport(vports[i]);
 			shost_for_each_device(sdev, shost) {
 				rport = starget_to_rport(scsi_target(sdev));
@@ -438,22 +442,23 @@
 }
 
 /**
- * lpfc_new_scsi_buf - Scsi buffer allocator
+ * lpfc_new_scsi_buf_s3 - Scsi buffer allocator for HBA with SLI3 IF spec
  * @vport: The virtual port for which this call being executed.
+ * @num_to_allocate: The requested number of buffers to allocate.
  *
- * This routine allocates a scsi buffer, which contains all the necessary
- * information needed to initiate a SCSI I/O.  The non-DMAable buffer region
- * contains information to build the IOCB.  The DMAable region contains
- * memory for the FCP CMND, FCP RSP, and the initial BPL.  In addition to
- * allocating memory, the FCP CMND and FCP RSP BDEs are setup in the BPL
- * and the BPL BDE is setup in the IOCB.
+ * This routine allocates a scsi buffer for device with SLI-3 interface spec,
+ * the scsi buffer contains all the necessary information needed to initiate
+ * a SCSI I/O. The non-DMAable buffer region contains information to build
+ * the IOCB. The DMAable region contains memory for the FCP CMND, FCP RSP,
+ * and the initial BPL. In addition to allocating memory, the FCP CMND and
+ * FCP RSP BDEs are setup in the BPL and the BPL BDE is setup in the IOCB.
  *
  * Return codes:
- *   NULL - Error
- *   Pointer to lpfc_scsi_buf data structure - Success
+ *   int - number of scsi buffers that were allocated.
+ *   0 = failure, less than num_to_alloc is a partial failure.
  **/
-static struct lpfc_scsi_buf *
-lpfc_new_scsi_buf(struct lpfc_vport *vport)
+static int
+lpfc_new_scsi_buf_s3(struct lpfc_vport *vport, int num_to_alloc)
 {
 	struct lpfc_hba *phba = vport->phba;
 	struct lpfc_scsi_buf *psb;
@@ -463,107 +468,401 @@
 	dma_addr_t pdma_phys_fcp_rsp;
 	dma_addr_t pdma_phys_bpl;
 	uint16_t iotag;
+	int bcnt;
 
-	psb = kzalloc(sizeof(struct lpfc_scsi_buf), GFP_KERNEL);
-	if (!psb)
-		return NULL;
+	for (bcnt = 0; bcnt < num_to_alloc; bcnt++) {
+		psb = kzalloc(sizeof(struct lpfc_scsi_buf), GFP_KERNEL);
+		if (!psb)
+			break;
 
-	/*
-	 * Get memory from the pci pool to map the virt space to pci bus space
-	 * for an I/O.  The DMA buffer includes space for the struct fcp_cmnd,
-	 * struct fcp_rsp and the number of bde's necessary to support the
-	 * sg_tablesize.
-	 */
-	psb->data = pci_pool_alloc(phba->lpfc_scsi_dma_buf_pool, GFP_KERNEL,
-							&psb->dma_handle);
-	if (!psb->data) {
-		kfree(psb);
-		return NULL;
-	}
+		/*
+		 * Get memory from the pci pool to map the virt space to pci
+		 * bus space for an I/O.  The DMA buffer includes space for the
+		 * struct fcp_cmnd, struct fcp_rsp and the number of bde's
+		 * necessary to support the sg_tablesize.
+		 */
+		psb->data = pci_pool_alloc(phba->lpfc_scsi_dma_buf_pool,
+					GFP_KERNEL, &psb->dma_handle);
+		if (!psb->data) {
+			kfree(psb);
+			break;
+		}
 
-	/* Initialize virtual ptrs to dma_buf region. */
-	memset(psb->data, 0, phba->cfg_sg_dma_buf_size);
+		/* Initialize virtual ptrs to dma_buf region. */
+		memset(psb->data, 0, phba->cfg_sg_dma_buf_size);
 
-	/* Allocate iotag for psb->cur_iocbq. */
-	iotag = lpfc_sli_next_iotag(phba, &psb->cur_iocbq);
-	if (iotag == 0) {
-		pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
-			      psb->data, psb->dma_handle);
-		kfree (psb);
-		return NULL;
-	}
-	psb->cur_iocbq.iocb_flag |= LPFC_IO_FCP;
+		/* Allocate iotag for psb->cur_iocbq. */
+		iotag = lpfc_sli_next_iotag(phba, &psb->cur_iocbq);
+		if (iotag == 0) {
+			pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
+					psb->data, psb->dma_handle);
+			kfree(psb);
+			break;
+		}
+		psb->cur_iocbq.iocb_flag |= LPFC_IO_FCP;
 
-	psb->fcp_cmnd = psb->data;
-	psb->fcp_rsp = psb->data + sizeof(struct fcp_cmnd);
-	psb->fcp_bpl = psb->data + sizeof(struct fcp_cmnd) +
-							sizeof(struct fcp_rsp);
-
-	/* Initialize local short-hand pointers. */
-	bpl = psb->fcp_bpl;
-	pdma_phys_fcp_cmd = psb->dma_handle;
-	pdma_phys_fcp_rsp = psb->dma_handle + sizeof(struct fcp_cmnd);
-	pdma_phys_bpl = psb->dma_handle + sizeof(struct fcp_cmnd) +
+		psb->fcp_cmnd = psb->data;
+		psb->fcp_rsp = psb->data + sizeof(struct fcp_cmnd);
+		psb->fcp_bpl = psb->data + sizeof(struct fcp_cmnd) +
 			sizeof(struct fcp_rsp);
 
-	/*
-	 * The first two bdes are the FCP_CMD and FCP_RSP.  The balance are sg
-	 * list bdes.  Initialize the first two and leave the rest for
-	 * queuecommand.
-	 */
-	bpl[0].addrHigh = le32_to_cpu(putPaddrHigh(pdma_phys_fcp_cmd));
-	bpl[0].addrLow = le32_to_cpu(putPaddrLow(pdma_phys_fcp_cmd));
-	bpl[0].tus.f.bdeSize = sizeof(struct fcp_cmnd);
-	bpl[0].tus.f.bdeFlags = BUFF_TYPE_BDE_64;
-	bpl[0].tus.w = le32_to_cpu(bpl[0].tus.w);
+		/* Initialize local short-hand pointers. */
+		bpl = psb->fcp_bpl;
+		pdma_phys_fcp_cmd = psb->dma_handle;
+		pdma_phys_fcp_rsp = psb->dma_handle + sizeof(struct fcp_cmnd);
+		pdma_phys_bpl = psb->dma_handle + sizeof(struct fcp_cmnd) +
+			sizeof(struct fcp_rsp);
 
-	/* Setup the physical region for the FCP RSP */
-	bpl[1].addrHigh = le32_to_cpu(putPaddrHigh(pdma_phys_fcp_rsp));
-	bpl[1].addrLow = le32_to_cpu(putPaddrLow(pdma_phys_fcp_rsp));
-	bpl[1].tus.f.bdeSize = sizeof(struct fcp_rsp);
-	bpl[1].tus.f.bdeFlags = BUFF_TYPE_BDE_64;
-	bpl[1].tus.w = le32_to_cpu(bpl[1].tus.w);
+		/*
+		 * The first two bdes are the FCP_CMD and FCP_RSP. The balance
+		 * are sg list bdes.  Initialize the first two and leave the
+		 * rest for queuecommand.
+		 */
+		bpl[0].addrHigh = le32_to_cpu(putPaddrHigh(pdma_phys_fcp_cmd));
+		bpl[0].addrLow = le32_to_cpu(putPaddrLow(pdma_phys_fcp_cmd));
+		bpl[0].tus.f.bdeSize = sizeof(struct fcp_cmnd);
+		bpl[0].tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+		bpl[0].tus.w = le32_to_cpu(bpl[0].tus.w);
 
-	/*
-	 * Since the IOCB for the FCP I/O is built into this lpfc_scsi_buf,
-	 * initialize it with all known data now.
-	 */
-	iocb = &psb->cur_iocbq.iocb;
-	iocb->un.fcpi64.bdl.ulpIoTag32 = 0;
-	if ((phba->sli_rev == 3) &&
-	    !(phba->sli3_options & LPFC_SLI3_BG_ENABLED)) {
-		/* fill in immediate fcp command BDE */
-		iocb->un.fcpi64.bdl.bdeFlags = BUFF_TYPE_BDE_IMMED;
-		iocb->un.fcpi64.bdl.bdeSize = sizeof(struct fcp_cmnd);
-		iocb->un.fcpi64.bdl.addrLow = offsetof(IOCB_t,
-						       unsli3.fcp_ext.icd);
-		iocb->un.fcpi64.bdl.addrHigh = 0;
-		iocb->ulpBdeCount = 0;
-		iocb->ulpLe = 0;
-		/* fill in responce BDE */
-		iocb->unsli3.fcp_ext.rbde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
-		iocb->unsli3.fcp_ext.rbde.tus.f.bdeSize =
-						sizeof(struct fcp_rsp);
-		iocb->unsli3.fcp_ext.rbde.addrLow =
-						putPaddrLow(pdma_phys_fcp_rsp);
-		iocb->unsli3.fcp_ext.rbde.addrHigh =
-						putPaddrHigh(pdma_phys_fcp_rsp);
-	} else {
-		iocb->un.fcpi64.bdl.bdeFlags = BUFF_TYPE_BLP_64;
-		iocb->un.fcpi64.bdl.bdeSize = (2 * sizeof(struct ulp_bde64));
-		iocb->un.fcpi64.bdl.addrLow = putPaddrLow(pdma_phys_bpl);
-		iocb->un.fcpi64.bdl.addrHigh = putPaddrHigh(pdma_phys_bpl);
-		iocb->ulpBdeCount = 1;
-		iocb->ulpLe = 1;
+		/* Setup the physical region for the FCP RSP */
+		bpl[1].addrHigh = le32_to_cpu(putPaddrHigh(pdma_phys_fcp_rsp));
+		bpl[1].addrLow = le32_to_cpu(putPaddrLow(pdma_phys_fcp_rsp));
+		bpl[1].tus.f.bdeSize = sizeof(struct fcp_rsp);
+		bpl[1].tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+		bpl[1].tus.w = le32_to_cpu(bpl[1].tus.w);
+
+		/*
+		 * Since the IOCB for the FCP I/O is built into this
+		 * lpfc_scsi_buf, initialize it with all known data now.
+		 */
+		iocb = &psb->cur_iocbq.iocb;
+		iocb->un.fcpi64.bdl.ulpIoTag32 = 0;
+		if ((phba->sli_rev == 3) &&
+				!(phba->sli3_options & LPFC_SLI3_BG_ENABLED)) {
+			/* fill in immediate fcp command BDE */
+			iocb->un.fcpi64.bdl.bdeFlags = BUFF_TYPE_BDE_IMMED;
+			iocb->un.fcpi64.bdl.bdeSize = sizeof(struct fcp_cmnd);
+			iocb->un.fcpi64.bdl.addrLow = offsetof(IOCB_t,
+					unsli3.fcp_ext.icd);
+			iocb->un.fcpi64.bdl.addrHigh = 0;
+			iocb->ulpBdeCount = 0;
+			iocb->ulpLe = 0;
+			/* fill in responce BDE */
+			iocb->unsli3.fcp_ext.rbde.tus.f.bdeFlags =
+							BUFF_TYPE_BDE_64;
+			iocb->unsli3.fcp_ext.rbde.tus.f.bdeSize =
+				sizeof(struct fcp_rsp);
+			iocb->unsli3.fcp_ext.rbde.addrLow =
+				putPaddrLow(pdma_phys_fcp_rsp);
+			iocb->unsli3.fcp_ext.rbde.addrHigh =
+				putPaddrHigh(pdma_phys_fcp_rsp);
+		} else {
+			iocb->un.fcpi64.bdl.bdeFlags = BUFF_TYPE_BLP_64;
+			iocb->un.fcpi64.bdl.bdeSize =
+					(2 * sizeof(struct ulp_bde64));
+			iocb->un.fcpi64.bdl.addrLow =
+					putPaddrLow(pdma_phys_bpl);
+			iocb->un.fcpi64.bdl.addrHigh =
+					putPaddrHigh(pdma_phys_bpl);
+			iocb->ulpBdeCount = 1;
+			iocb->ulpLe = 1;
+		}
+		iocb->ulpClass = CLASS3;
+		psb->status = IOSTAT_SUCCESS;
+		/* Put it back into the SCSI buffer list */
+		lpfc_release_scsi_buf_s4(phba, psb);
+
 	}
-	iocb->ulpClass = CLASS3;
 
-	return psb;
+	return bcnt;
 }
 
 /**
- * lpfc_get_scsi_buf - Get a scsi buffer from lpfc_scsi_buf_list list of Hba
- * @phba: The Hba for which this call is being executed.
+ * lpfc_sli4_fcp_xri_aborted - Fast-path process of fcp xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @axri: pointer to the fcp xri abort wcqe structure.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 fast-path
+ * FCP aborted xri.
+ **/
+void
+lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *phba,
+			  struct sli4_wcqe_xri_aborted *axri)
+{
+	uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
+	struct lpfc_scsi_buf *psb, *next_psb;
+	unsigned long iflag = 0;
+
+	spin_lock_irqsave(&phba->sli4_hba.abts_scsi_buf_list_lock, iflag);
+	list_for_each_entry_safe(psb, next_psb,
+		&phba->sli4_hba.lpfc_abts_scsi_buf_list, list) {
+		if (psb->cur_iocbq.sli4_xritag == xri) {
+			list_del(&psb->list);
+			psb->status = IOSTAT_SUCCESS;
+			spin_unlock_irqrestore(
+				&phba->sli4_hba.abts_scsi_buf_list_lock,
+				iflag);
+			lpfc_release_scsi_buf_s4(phba, psb);
+			return;
+		}
+	}
+	spin_unlock_irqrestore(&phba->sli4_hba.abts_scsi_buf_list_lock,
+				iflag);
+}
+
+/**
+ * lpfc_sli4_repost_scsi_sgl_list - Repsot the Scsi buffers sgl pages as block
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine walks the list of scsi buffers that have been allocated and
+ * repost them to the HBA by using SGL block post. This is needed after a
+ * pci_function_reset/warm_start or start. The lpfc_hba_down_post_s4 routine
+ * is responsible for moving all scsi buffers on the lpfc_abts_scsi_sgl_list
+ * to the lpfc_scsi_buf_list. If the repost fails, reject all scsi buffers.
+ *
+ * Returns: 0 = success, non-zero failure.
+ **/
+int
+lpfc_sli4_repost_scsi_sgl_list(struct lpfc_hba *phba)
+{
+	struct lpfc_scsi_buf *psb;
+	int index, status, bcnt = 0, rcnt = 0, rc = 0;
+	LIST_HEAD(sblist);
+
+	for (index = 0; index < phba->sli4_hba.scsi_xri_cnt; index++) {
+		psb = phba->sli4_hba.lpfc_scsi_psb_array[index];
+		if (psb) {
+			/* Remove from SCSI buffer list */
+			list_del(&psb->list);
+			/* Add it to a local SCSI buffer list */
+			list_add_tail(&psb->list, &sblist);
+			if (++rcnt == LPFC_NEMBED_MBOX_SGL_CNT) {
+				bcnt = rcnt;
+				rcnt = 0;
+			}
+		} else
+			/* A hole present in the XRI array, need to skip */
+			bcnt = rcnt;
+
+		if (index == phba->sli4_hba.scsi_xri_cnt - 1)
+			/* End of XRI array for SCSI buffer, complete */
+			bcnt = rcnt;
+
+		/* Continue until collect up to a nembed page worth of sgls */
+		if (bcnt == 0)
+			continue;
+		/* Now, post the SCSI buffer list sgls as a block */
+		status = lpfc_sli4_post_scsi_sgl_block(phba, &sblist, bcnt);
+		/* Reset SCSI buffer count for next round of posting */
+		bcnt = 0;
+		while (!list_empty(&sblist)) {
+			list_remove_head(&sblist, psb, struct lpfc_scsi_buf,
+					 list);
+			if (status) {
+				/* Put this back on the abort scsi list */
+				psb->status = IOSTAT_LOCAL_REJECT;
+				psb->result = IOERR_ABORT_REQUESTED;
+				rc++;
+			} else
+				psb->status = IOSTAT_SUCCESS;
+			/* Put it back into the SCSI buffer list */
+			lpfc_release_scsi_buf_s4(phba, psb);
+		}
+	}
+	return rc;
+}
+
+/**
+ * lpfc_new_scsi_buf_s4 - Scsi buffer allocator for HBA with SLI4 IF spec
+ * @vport: The virtual port for which this call being executed.
+ * @num_to_allocate: The requested number of buffers to allocate.
+ *
+ * This routine allocates a scsi buffer for device with SLI-4 interface spec,
+ * the scsi buffer contains all the necessary information needed to initiate
+ * a SCSI I/O.
+ *
+ * Return codes:
+ *   int - number of scsi buffers that were allocated.
+ *   0 = failure, less than num_to_alloc is a partial failure.
+ **/
+static int
+lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int num_to_alloc)
+{
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_scsi_buf *psb;
+	struct sli4_sge *sgl;
+	IOCB_t *iocb;
+	dma_addr_t pdma_phys_fcp_cmd;
+	dma_addr_t pdma_phys_fcp_rsp;
+	dma_addr_t pdma_phys_bpl, pdma_phys_bpl1;
+	uint16_t iotag, last_xritag = NO_XRI;
+	int status = 0, index;
+	int bcnt;
+	int non_sequential_xri = 0;
+	int rc = 0;
+	LIST_HEAD(sblist);
+
+	for (bcnt = 0; bcnt < num_to_alloc; bcnt++) {
+		psb = kzalloc(sizeof(struct lpfc_scsi_buf), GFP_KERNEL);
+		if (!psb)
+			break;
+
+		/*
+		 * Get memory from the pci pool to map the virt space to pci bus
+		 * space for an I/O.  The DMA buffer includes space for the
+		 * struct fcp_cmnd, struct fcp_rsp and the number of bde's
+		 * necessary to support the sg_tablesize.
+		 */
+		psb->data = pci_pool_alloc(phba->lpfc_scsi_dma_buf_pool,
+						GFP_KERNEL, &psb->dma_handle);
+		if (!psb->data) {
+			kfree(psb);
+			break;
+		}
+
+		/* Initialize virtual ptrs to dma_buf region. */
+		memset(psb->data, 0, phba->cfg_sg_dma_buf_size);
+
+		/* Allocate iotag for psb->cur_iocbq. */
+		iotag = lpfc_sli_next_iotag(phba, &psb->cur_iocbq);
+		if (iotag == 0) {
+			kfree(psb);
+			break;
+		}
+
+		psb->cur_iocbq.sli4_xritag = lpfc_sli4_next_xritag(phba);
+		if (psb->cur_iocbq.sli4_xritag == NO_XRI) {
+			pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
+			      psb->data, psb->dma_handle);
+			kfree(psb);
+			break;
+		}
+		if (last_xritag != NO_XRI
+			&& psb->cur_iocbq.sli4_xritag != (last_xritag+1)) {
+			non_sequential_xri = 1;
+		} else
+			list_add_tail(&psb->list, &sblist);
+		last_xritag = psb->cur_iocbq.sli4_xritag;
+
+		index = phba->sli4_hba.scsi_xri_cnt++;
+		psb->cur_iocbq.iocb_flag |= LPFC_IO_FCP;
+
+		psb->fcp_bpl = psb->data;
+		psb->fcp_cmnd = (psb->data + phba->cfg_sg_dma_buf_size)
+			- (sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp));
+		psb->fcp_rsp = (struct fcp_rsp *)((uint8_t *)psb->fcp_cmnd +
+					sizeof(struct fcp_cmnd));
+
+		/* Initialize local short-hand pointers. */
+		sgl = (struct sli4_sge *)psb->fcp_bpl;
+		pdma_phys_bpl = psb->dma_handle;
+		pdma_phys_fcp_cmd =
+			(psb->dma_handle + phba->cfg_sg_dma_buf_size)
+			 - (sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp));
+		pdma_phys_fcp_rsp = pdma_phys_fcp_cmd + sizeof(struct fcp_cmnd);
+
+		/*
+		 * The first two bdes are the FCP_CMD and FCP_RSP.  The balance
+		 * are sg list bdes.  Initialize the first two and leave the
+		 * rest for queuecommand.
+		 */
+		sgl->addr_hi = cpu_to_le32(putPaddrHigh(pdma_phys_fcp_cmd));
+		sgl->addr_lo = cpu_to_le32(putPaddrLow(pdma_phys_fcp_cmd));
+		bf_set(lpfc_sli4_sge_len, sgl, sizeof(struct fcp_cmnd));
+		bf_set(lpfc_sli4_sge_last, sgl, 0);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl->word3 = cpu_to_le32(sgl->word3);
+		sgl++;
+
+		/* Setup the physical region for the FCP RSP */
+		sgl->addr_hi = cpu_to_le32(putPaddrHigh(pdma_phys_fcp_rsp));
+		sgl->addr_lo = cpu_to_le32(putPaddrLow(pdma_phys_fcp_rsp));
+		bf_set(lpfc_sli4_sge_len, sgl, sizeof(struct fcp_rsp));
+		bf_set(lpfc_sli4_sge_last, sgl, 1);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl->word3 = cpu_to_le32(sgl->word3);
+
+		/*
+		 * Since the IOCB for the FCP I/O is built into this
+		 * lpfc_scsi_buf, initialize it with all known data now.
+		 */
+		iocb = &psb->cur_iocbq.iocb;
+		iocb->un.fcpi64.bdl.ulpIoTag32 = 0;
+		iocb->un.fcpi64.bdl.bdeFlags = BUFF_TYPE_BDE_64;
+		/* setting the BLP size to 2 * sizeof BDE may not be correct.
+		 * We are setting the bpl to point to out sgl. An sgl's
+		 * entries are 16 bytes, a bpl entries are 12 bytes.
+		 */
+		iocb->un.fcpi64.bdl.bdeSize = sizeof(struct fcp_cmnd);
+		iocb->un.fcpi64.bdl.addrLow = putPaddrLow(pdma_phys_fcp_cmd);
+		iocb->un.fcpi64.bdl.addrHigh = putPaddrHigh(pdma_phys_fcp_cmd);
+		iocb->ulpBdeCount = 1;
+		iocb->ulpLe = 1;
+		iocb->ulpClass = CLASS3;
+		if (phba->cfg_sg_dma_buf_size > SGL_PAGE_SIZE)
+			pdma_phys_bpl1 = pdma_phys_bpl + SGL_PAGE_SIZE;
+		else
+			pdma_phys_bpl1 = 0;
+		psb->dma_phys_bpl = pdma_phys_bpl;
+		phba->sli4_hba.lpfc_scsi_psb_array[index] = psb;
+		if (non_sequential_xri) {
+			status = lpfc_sli4_post_sgl(phba, pdma_phys_bpl,
+						pdma_phys_bpl1,
+						psb->cur_iocbq.sli4_xritag);
+			if (status) {
+				/* Put this back on the abort scsi list */
+				psb->status = IOSTAT_LOCAL_REJECT;
+				psb->result = IOERR_ABORT_REQUESTED;
+				rc++;
+			} else
+				psb->status = IOSTAT_SUCCESS;
+			/* Put it back into the SCSI buffer list */
+			lpfc_release_scsi_buf_s4(phba, psb);
+			break;
+		}
+	}
+	if (bcnt) {
+		status = lpfc_sli4_post_scsi_sgl_block(phba, &sblist, bcnt);
+		/* Reset SCSI buffer count for next round of posting */
+		while (!list_empty(&sblist)) {
+			list_remove_head(&sblist, psb, struct lpfc_scsi_buf,
+				 list);
+			if (status) {
+				/* Put this back on the abort scsi list */
+				psb->status = IOSTAT_LOCAL_REJECT;
+				psb->result = IOERR_ABORT_REQUESTED;
+				rc++;
+			} else
+				psb->status = IOSTAT_SUCCESS;
+			/* Put it back into the SCSI buffer list */
+			lpfc_release_scsi_buf_s4(phba, psb);
+		}
+	}
+
+	return bcnt + non_sequential_xri - rc;
+}
+
+/**
+ * lpfc_new_scsi_buf - Wrapper funciton for scsi buffer allocator
+ * @vport: The virtual port for which this call being executed.
+ * @num_to_allocate: The requested number of buffers to allocate.
+ *
+ * This routine wraps the actual SCSI buffer allocator function pointer from
+ * the lpfc_hba struct.
+ *
+ * Return codes:
+ *   int - number of scsi buffers that were allocated.
+ *   0 = failure, less than num_to_alloc is a partial failure.
+ **/
+static inline int
+lpfc_new_scsi_buf(struct lpfc_vport *vport, int num_to_alloc)
+{
+	return vport->phba->lpfc_new_scsi_buf(vport, num_to_alloc);
+}
+
+/**
+ * lpfc_get_scsi_buf - Get a scsi buffer from lpfc_scsi_buf_list of the HBA
+ * @phba: The HBA for which this call is being executed.
  *
  * This routine removes a scsi buffer from head of @phba lpfc_scsi_buf_list list
  * and returns to caller.
@@ -591,7 +890,7 @@
 }
 
 /**
- * lpfc_release_scsi_buf - Return a scsi buffer back to hba's lpfc_scsi_buf_list
+ * lpfc_release_scsi_buf - Return a scsi buffer back to hba scsi buf list
  * @phba: The Hba for which this call is being executed.
  * @psb: The scsi buffer which is being released.
  *
@@ -599,7 +898,7 @@
  * lpfc_scsi_buf_list list.
  **/
 static void
-lpfc_release_scsi_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
+lpfc_release_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
 {
 	unsigned long iflag = 0;
 
@@ -610,21 +909,69 @@
 }
 
 /**
- * lpfc_scsi_prep_dma_buf - Routine to do DMA mapping for scsi buffer
+ * lpfc_release_scsi_buf_s4: Return a scsi buffer back to hba scsi buf list.
+ * @phba: The Hba for which this call is being executed.
+ * @psb: The scsi buffer which is being released.
+ *
+ * This routine releases @psb scsi buffer by adding it to tail of @phba
+ * lpfc_scsi_buf_list list. For SLI4 XRI's are tied to the scsi buffer
+ * and cannot be reused for at least RA_TOV amount of time if it was
+ * aborted.
+ **/
+static void
+lpfc_release_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
+{
+	unsigned long iflag = 0;
+
+	if (psb->status == IOSTAT_LOCAL_REJECT
+		&& psb->result == IOERR_ABORT_REQUESTED) {
+		spin_lock_irqsave(&phba->sli4_hba.abts_scsi_buf_list_lock,
+					iflag);
+		psb->pCmd = NULL;
+		list_add_tail(&psb->list,
+			&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+		spin_unlock_irqrestore(&phba->sli4_hba.abts_scsi_buf_list_lock,
+					iflag);
+	} else {
+
+		spin_lock_irqsave(&phba->scsi_buf_list_lock, iflag);
+		psb->pCmd = NULL;
+		list_add_tail(&psb->list, &phba->lpfc_scsi_buf_list);
+		spin_unlock_irqrestore(&phba->scsi_buf_list_lock, iflag);
+	}
+}
+
+/**
+ * lpfc_release_scsi_buf: Return a scsi buffer back to hba scsi buf list.
+ * @phba: The Hba for which this call is being executed.
+ * @psb: The scsi buffer which is being released.
+ *
+ * This routine releases @psb scsi buffer by adding it to tail of @phba
+ * lpfc_scsi_buf_list list.
+ **/
+static void
+lpfc_release_scsi_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
+{
+
+	phba->lpfc_release_scsi_buf(phba, psb);
+}
+
+/**
+ * lpfc_scsi_prep_dma_buf_s3 - DMA mapping for scsi buffer to SLI3 IF spec
  * @phba: The Hba for which this call is being executed.
  * @lpfc_cmd: The scsi buffer which is going to be mapped.
  *
  * This routine does the pci dma mapping for scatter-gather list of scsi cmnd
- * field of @lpfc_cmd. This routine scans through sg elements and format the
- * bdea. This routine also initializes all IOCB fields which are dependent on
- * scsi command request buffer.
+ * field of @lpfc_cmd for device with SLI-3 interface spec. This routine scans
+ * through sg elements and format the bdea. This routine also initializes all
+ * IOCB fields which are dependent on scsi command request buffer.
  *
  * Return codes:
  *   1 - Error
  *   0 - Success
  **/
 static int
-lpfc_scsi_prep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
+lpfc_scsi_prep_dma_buf_s3(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
 {
 	struct scsi_cmnd *scsi_cmnd = lpfc_cmd->pCmd;
 	struct scatterlist *sgel = NULL;
@@ -1312,10 +1659,10 @@
 	uint32_t bgstat = bgf->bgstat;
 	uint64_t failing_sector = 0;
 
-	printk(KERN_ERR "BG ERROR in cmd 0x%x lba 0x%llx blk cnt 0x%lx "
+	printk(KERN_ERR "BG ERROR in cmd 0x%x lba 0x%llx blk cnt 0x%x "
 			"bgstat=0x%x bghm=0x%x\n",
 			cmd->cmnd[0], (unsigned long long)scsi_get_lba(cmd),
-			cmd->request->nr_sectors, bgstat, bghm);
+			blk_rq_sectors(cmd->request), bgstat, bghm);
 
 	spin_lock(&_dump_buf_lock);
 	if (!_dump_buf_done) {
@@ -1412,6 +1759,133 @@
 }
 
 /**
+ * lpfc_scsi_prep_dma_buf_s4 - DMA mapping for scsi buffer to SLI4 IF spec
+ * @phba: The Hba for which this call is being executed.
+ * @lpfc_cmd: The scsi buffer which is going to be mapped.
+ *
+ * This routine does the pci dma mapping for scatter-gather list of scsi cmnd
+ * field of @lpfc_cmd for device with SLI-4 interface spec.
+ *
+ * Return codes:
+ * 	1 - Error
+ * 	0 - Success
+ **/
+static int
+lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
+{
+	struct scsi_cmnd *scsi_cmnd = lpfc_cmd->pCmd;
+	struct scatterlist *sgel = NULL;
+	struct fcp_cmnd *fcp_cmnd = lpfc_cmd->fcp_cmnd;
+	struct sli4_sge *sgl = (struct sli4_sge *)lpfc_cmd->fcp_bpl;
+	IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb;
+	dma_addr_t physaddr;
+	uint32_t num_bde = 0;
+	uint32_t dma_len;
+	uint32_t dma_offset = 0;
+	int nseg;
+
+	/*
+	 * There are three possibilities here - use scatter-gather segment, use
+	 * the single mapping, or neither.  Start the lpfc command prep by
+	 * bumping the bpl beyond the fcp_cmnd and fcp_rsp regions to the first
+	 * data bde entry.
+	 */
+	if (scsi_sg_count(scsi_cmnd)) {
+		/*
+		 * The driver stores the segment count returned from pci_map_sg
+		 * because this a count of dma-mappings used to map the use_sg
+		 * pages.  They are not guaranteed to be the same for those
+		 * architectures that implement an IOMMU.
+		 */
+
+		nseg = scsi_dma_map(scsi_cmnd);
+		if (unlikely(!nseg))
+			return 1;
+		sgl += 1;
+		/* clear the last flag in the fcp_rsp map entry */
+		sgl->word2 = le32_to_cpu(sgl->word2);
+		bf_set(lpfc_sli4_sge_last, sgl, 0);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl += 1;
+
+		lpfc_cmd->seg_cnt = nseg;
+		if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt) {
+			printk(KERN_ERR "%s: Too many sg segments from "
+			       "dma_map_sg.  Config %d, seg_cnt %d\n",
+			       __func__, phba->cfg_sg_seg_cnt,
+			       lpfc_cmd->seg_cnt);
+			scsi_dma_unmap(scsi_cmnd);
+			return 1;
+		}
+
+		/*
+		 * The driver established a maximum scatter-gather segment count
+		 * during probe that limits the number of sg elements in any
+		 * single scsi command.  Just run through the seg_cnt and format
+		 * the sge's.
+		 * When using SLI-3 the driver will try to fit all the BDEs into
+		 * the IOCB. If it can't then the BDEs get added to a BPL as it
+		 * does for SLI-2 mode.
+		 */
+		scsi_for_each_sg(scsi_cmnd, sgel, nseg, num_bde) {
+			physaddr = sg_dma_address(sgel);
+			dma_len = sg_dma_len(sgel);
+			bf_set(lpfc_sli4_sge_len, sgl, sg_dma_len(sgel));
+			sgl->addr_lo = cpu_to_le32(putPaddrLow(physaddr));
+			sgl->addr_hi = cpu_to_le32(putPaddrHigh(physaddr));
+			if ((num_bde + 1) == nseg)
+				bf_set(lpfc_sli4_sge_last, sgl, 1);
+			else
+				bf_set(lpfc_sli4_sge_last, sgl, 0);
+			bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
+			sgl->word2 = cpu_to_le32(sgl->word2);
+			sgl->word3 = cpu_to_le32(sgl->word3);
+			dma_offset += dma_len;
+			sgl++;
+		}
+	} else {
+		sgl += 1;
+		/* clear the last flag in the fcp_rsp map entry */
+		sgl->word2 = le32_to_cpu(sgl->word2);
+		bf_set(lpfc_sli4_sge_last, sgl, 1);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+	}
+
+	/*
+	 * Finish initializing those IOCB fields that are dependent on the
+	 * scsi_cmnd request_buffer.  Note that for SLI-2 the bdeSize is
+	 * explicitly reinitialized.
+	 * all iocb memory resources are reused.
+	 */
+	fcp_cmnd->fcpDl = cpu_to_be32(scsi_bufflen(scsi_cmnd));
+
+	/*
+	 * Due to difference in data length between DIF/non-DIF paths,
+	 * we need to set word 4 of IOCB here
+	 */
+	iocb_cmd->un.fcpi.fcpi_parm = scsi_bufflen(scsi_cmnd);
+	return 0;
+}
+
+/**
+ * lpfc_scsi_prep_dma_buf - Wrapper function for DMA mapping of scsi buffer
+ * @phba: The Hba for which this call is being executed.
+ * @lpfc_cmd: The scsi buffer which is going to be mapped.
+ *
+ * This routine wraps the actual DMA mapping function pointer from the
+ * lpfc_hba struct.
+ *
+ * Return codes:
+ * 	1 - Error
+ * 	0 - Success
+ **/
+static inline int
+lpfc_scsi_prep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
+{
+	return phba->lpfc_scsi_prep_dma_buf(phba, lpfc_cmd);
+}
+
+/**
  * lpfc_send_scsi_error_event - Posts an event when there is SCSI error
  * @phba: Pointer to hba context object.
  * @vport: Pointer to vport object.
@@ -1504,15 +1978,15 @@
 }
 
 /**
- * lpfc_scsi_unprep_dma_buf - Routine to un-map DMA mapping of scatter gather
- * @phba: The Hba for which this call is being executed.
+ * lpfc_scsi_unprep_dma_buf_s3 - Un-map DMA mapping of SG-list for SLI3 dev
+ * @phba: The HBA for which this call is being executed.
  * @psb: The scsi buffer which is going to be un-mapped.
  *
  * This routine does DMA un-mapping of scatter gather list of scsi command
- * field of @lpfc_cmd.
+ * field of @lpfc_cmd for device with SLI-3 interface spec.
  **/
 static void
-lpfc_scsi_unprep_dma_buf(struct lpfc_hba * phba, struct lpfc_scsi_buf * psb)
+lpfc_scsi_unprep_dma_buf_s3(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
 {
 	/*
 	 * There are only two special cases to consider.  (1) the scsi command
@@ -1529,6 +2003,36 @@
 }
 
 /**
+ * lpfc_scsi_unprep_dma_buf_s4 - Un-map DMA mapping of SG-list for SLI4 dev
+ * @phba: The Hba for which this call is being executed.
+ * @psb: The scsi buffer which is going to be un-mapped.
+ *
+ * This routine does DMA un-mapping of scatter gather list of scsi command
+ * field of @lpfc_cmd for device with SLI-4 interface spec. If we have to
+ * remove the sgl for this scsi buffer then we will do it here. For now
+ * we should be able to just call the sli3 unprep routine.
+ **/
+static void
+lpfc_scsi_unprep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
+{
+	lpfc_scsi_unprep_dma_buf_s3(phba, psb);
+}
+
+/**
+ * lpfc_scsi_unprep_dma_buf - Wrapper function for unmap DMA mapping of SG-list
+ * @phba: The Hba for which this call is being executed.
+ * @psb: The scsi buffer which is going to be un-mapped.
+ *
+ * This routine does DMA un-mapping of scatter gather list of scsi command
+ * field of @lpfc_cmd for device with SLI-4 interface spec.
+ **/
+static void
+lpfc_scsi_unprep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
+{
+	phba->lpfc_scsi_unprep_dma_buf(phba, psb);
+}
+
+/**
  * lpfc_handler_fcp_err - FCP response handler
  * @vport: The virtual port for which this call is being executed.
  * @lpfc_cmd: Pointer to lpfc_scsi_buf data structure.
@@ -1676,7 +2180,7 @@
  * lpfc_scsi_cmd_iocb_cmpl - Scsi cmnd IOCB completion routine
  * @phba: The Hba for which this call is being executed.
  * @pIocbIn: The command IOCBQ for the scsi cmnd.
- * @pIocbOut: The response IOCBQ for the scsi cmnd .
+ * @pIocbOut: The response IOCBQ for the scsi cmnd.
  *
  * This routine assigns scsi command result by looking into response IOCB
  * status field appropriately. This routine handles QUEUE FULL condition as
@@ -1957,16 +2461,16 @@
 }
 
 /**
- * lpfc_scsi_prep_cmnd -  Routine to convert scsi cmnd to FCP information unit
+ * lpfc_scsi_prep_cmnd_s3 - Convert scsi cmnd to FCP infor unit for SLI3 dev
  * @vport: The virtual port for which this call is being executed.
  * @lpfc_cmd: The scsi command which needs to send.
  * @pnode: Pointer to lpfc_nodelist.
  *
  * This routine initializes fcp_cmnd and iocb data structure from scsi command
- * to transfer.
+ * to transfer for device with SLI3 interface spec.
  **/
 static void
-lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
+lpfc_scsi_prep_cmnd_s3(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 		    struct lpfc_nodelist *pnode)
 {
 	struct lpfc_hba *phba = vport->phba;
@@ -2013,8 +2517,11 @@
 	if (scsi_sg_count(scsi_cmnd)) {
 		if (datadir == DMA_TO_DEVICE) {
 			iocb_cmd->ulpCommand = CMD_FCP_IWRITE64_CR;
-			iocb_cmd->un.fcpi.fcpi_parm = 0;
-			iocb_cmd->ulpPU = 0;
+			if (phba->sli_rev < LPFC_SLI_REV4) {
+				iocb_cmd->un.fcpi.fcpi_parm = 0;
+				iocb_cmd->ulpPU = 0;
+			} else
+				iocb_cmd->ulpPU = PARM_READ_CHECK;
 			fcp_cmnd->fcpCntl3 = WRITE_DATA;
 			phba->fc4OutputRequests++;
 		} else {
@@ -2051,20 +2558,60 @@
 }
 
 /**
- * lpfc_scsi_prep_task_mgmt_cmnd - Convert scsi TM cmnd to FCP information unit
+ * lpfc_scsi_prep_cmnd_s4 - Convert scsi cmnd to FCP infor unit for SLI4 dev
+ * @vport: The virtual port for which this call is being executed.
+ * @lpfc_cmd: The scsi command which needs to send.
+ * @pnode: Pointer to lpfc_nodelist.
+ *
+ * This routine initializes fcp_cmnd and iocb data structure from scsi command
+ * to transfer for device with SLI4 interface spec.
+ **/
+static void
+lpfc_scsi_prep_cmnd_s4(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
+		       struct lpfc_nodelist *pnode)
+{
+	/*
+	 * The prep cmnd routines do not touch the sgl or its
+	 * entries. We may not have to do anything different.
+	 * I will leave this function in place until we can
+	 * run some IO through the driver and determine if changes
+	 * are needed.
+	 */
+	return lpfc_scsi_prep_cmnd_s3(vport, lpfc_cmd, pnode);
+}
+
+/**
+ * lpfc_scsi_prep_cmnd - Wrapper func for convert scsi cmnd to FCP info unit
+ * @vport: The virtual port for which this call is being executed.
+ * @lpfc_cmd: The scsi command which needs to send.
+ * @pnode: Pointer to lpfc_nodelist.
+ *
+ * This routine wraps the actual convert SCSI cmnd function pointer from
+ * the lpfc_hba struct.
+ **/
+static inline void
+lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
+		    struct lpfc_nodelist *pnode)
+{
+	vport->phba->lpfc_scsi_prep_cmnd(vport, lpfc_cmd, pnode);
+}
+
+/**
+ * lpfc_scsi_prep_task_mgmt_cmnd_s3 - Convert SLI3 scsi TM cmd to FCP info unit
  * @vport: The virtual port for which this call is being executed.
  * @lpfc_cmd: Pointer to lpfc_scsi_buf data structure.
  * @lun: Logical unit number.
  * @task_mgmt_cmd: SCSI task management command.
  *
- * This routine creates FCP information unit corresponding to @task_mgmt_cmd.
+ * This routine creates FCP information unit corresponding to @task_mgmt_cmd
+ * for device with SLI-3 interface spec.
  *
  * Return codes:
  *   0 - Error
  *   1 - Success
  **/
 static int
-lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_vport *vport,
+lpfc_scsi_prep_task_mgmt_cmd_s3(struct lpfc_vport *vport,
 			     struct lpfc_scsi_buf *lpfc_cmd,
 			     unsigned int lun,
 			     uint8_t task_mgmt_cmd)
@@ -2114,6 +2661,107 @@
 }
 
 /**
+ * lpfc_scsi_prep_task_mgmt_cmnd_s4 - Convert SLI4 scsi TM cmd to FCP info unit
+ * @vport: The virtual port for which this call is being executed.
+ * @lpfc_cmd: Pointer to lpfc_scsi_buf data structure.
+ * @lun: Logical unit number.
+ * @task_mgmt_cmd: SCSI task management command.
+ *
+ * This routine creates FCP information unit corresponding to @task_mgmt_cmd
+ * for device with SLI-4 interface spec.
+ *
+ * Return codes:
+ * 	0 - Error
+ * 	1 - Success
+ **/
+static int
+lpfc_scsi_prep_task_mgmt_cmd_s4(struct lpfc_vport *vport,
+				struct lpfc_scsi_buf *lpfc_cmd,
+				unsigned int lun,
+				uint8_t task_mgmt_cmd)
+{
+	/*
+	 * The prep cmnd routines do not touch the sgl or its
+	 * entries. We may not have to do anything different.
+	 * I will leave this function in place until we can
+	 * run some IO through the driver and determine if changes
+	 * are needed.
+	 */
+	return lpfc_scsi_prep_task_mgmt_cmd_s3(vport, lpfc_cmd, lun,
+						task_mgmt_cmd);
+}
+
+/**
+ * lpfc_scsi_prep_task_mgmt_cmnd - Wrapper func convert scsi TM cmd to FCP info
+ * @vport: The virtual port for which this call is being executed.
+ * @lpfc_cmd: Pointer to lpfc_scsi_buf data structure.
+ * @lun: Logical unit number.
+ * @task_mgmt_cmd: SCSI task management command.
+ *
+ * This routine wraps the actual convert SCSI TM to FCP information unit
+ * function pointer from the lpfc_hba struct.
+ *
+ * Return codes:
+ * 	0 - Error
+ * 	1 - Success
+ **/
+static inline int
+lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_vport *vport,
+			     struct lpfc_scsi_buf *lpfc_cmd,
+			     unsigned int lun,
+			     uint8_t task_mgmt_cmd)
+{
+	struct lpfc_hba *phba = vport->phba;
+
+	return phba->lpfc_scsi_prep_task_mgmt_cmd(vport, lpfc_cmd, lun,
+						  task_mgmt_cmd);
+}
+
+/**
+ * lpfc_scsi_api_table_setup - Set up scsi api fucntion jump table
+ * @phba: The hba struct for which this call is being executed.
+ * @dev_grp: The HBA PCI-Device group number.
+ *
+ * This routine sets up the SCSI interface API function jump table in @phba
+ * struct.
+ * Returns: 0 - success, -ENODEV - failure.
+ **/
+int
+lpfc_scsi_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
+{
+
+	switch (dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		phba->lpfc_new_scsi_buf = lpfc_new_scsi_buf_s3;
+		phba->lpfc_scsi_prep_dma_buf = lpfc_scsi_prep_dma_buf_s3;
+		phba->lpfc_scsi_prep_cmnd = lpfc_scsi_prep_cmnd_s3;
+		phba->lpfc_scsi_unprep_dma_buf = lpfc_scsi_unprep_dma_buf_s3;
+		phba->lpfc_scsi_prep_task_mgmt_cmd =
+					lpfc_scsi_prep_task_mgmt_cmd_s3;
+		phba->lpfc_release_scsi_buf = lpfc_release_scsi_buf_s3;
+		break;
+	case LPFC_PCI_DEV_OC:
+		phba->lpfc_new_scsi_buf = lpfc_new_scsi_buf_s4;
+		phba->lpfc_scsi_prep_dma_buf = lpfc_scsi_prep_dma_buf_s4;
+		phba->lpfc_scsi_prep_cmnd = lpfc_scsi_prep_cmnd_s4;
+		phba->lpfc_scsi_unprep_dma_buf = lpfc_scsi_unprep_dma_buf_s4;
+		phba->lpfc_scsi_prep_task_mgmt_cmd =
+					lpfc_scsi_prep_task_mgmt_cmd_s4;
+		phba->lpfc_release_scsi_buf = lpfc_release_scsi_buf_s4;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1418 Invalid HBA PCI-device group: 0x%x\n",
+				dev_grp);
+		return -ENODEV;
+		break;
+	}
+	phba->lpfc_get_scsi_buf = lpfc_get_scsi_buf;
+	phba->lpfc_rampdown_queue_depth = lpfc_rampdown_queue_depth;
+	return 0;
+}
+
+/**
  * lpfc_taskmgmt_def_cmpl - IOCB completion routine for task management command
  * @phba: The Hba for which this call is being executed.
  * @cmdiocbq: Pointer to lpfc_iocbq data structure.
@@ -2178,9 +2826,8 @@
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP,
 			 "0702 Issue Target Reset to TGT %d Data: x%x x%x\n",
 			 tgt_id, rdata->pnode->nlp_rpi, rdata->pnode->nlp_flag);
-	status = lpfc_sli_issue_iocb_wait(phba,
-				       &phba->sli.ring[phba->sli.fcp_ring],
-				       iocbq, iocbqrsp, lpfc_cmd->timeout);
+	status = lpfc_sli_issue_iocb_wait(phba, LPFC_FCP_RING,
+					  iocbq, iocbqrsp, lpfc_cmd->timeout);
 	if (status != IOCB_SUCCESS) {
 		if (status == IOCB_TIMEDOUT) {
 			iocbq->iocb_cmpl = lpfc_tskmgmt_def_cmpl;
@@ -2305,7 +2952,6 @@
 	struct Scsi_Host  *shost = cmnd->device->host;
 	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
 	struct lpfc_hba   *phba = vport->phba;
-	struct lpfc_sli   *psli = &phba->sli;
 	struct lpfc_rport_data *rdata = cmnd->device->hostdata;
 	struct lpfc_nodelist *ndlp = rdata->pnode;
 	struct lpfc_scsi_buf *lpfc_cmd;
@@ -2378,15 +3024,15 @@
 		if (cmnd->cmnd[0] == READ_10)
 			lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG,
 					"9035 BLKGRD: READ @ sector %llu, "
-					 "count %lu\n",
-					 (unsigned long long)scsi_get_lba(cmnd),
-					cmnd->request->nr_sectors);
+					"count %u\n",
+					(unsigned long long)scsi_get_lba(cmnd),
+					blk_rq_sectors(cmnd->request));
 		else if (cmnd->cmnd[0] == WRITE_10)
 			lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG,
 					"9036 BLKGRD: WRITE @ sector %llu, "
-					"count %lu cmd=%p\n",
+					"count %u cmd=%p\n",
 					(unsigned long long)scsi_get_lba(cmnd),
-					cmnd->request->nr_sectors,
+					blk_rq_sectors(cmnd->request),
 					cmnd);
 
 		err = lpfc_bg_scsi_prep_dma_buf(phba, lpfc_cmd);
@@ -2406,15 +3052,15 @@
 		if (cmnd->cmnd[0] == READ_10)
 			lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG,
 					 "9040 dbg: READ @ sector %llu, "
-					 "count %lu\n",
+					 "count %u\n",
 					 (unsigned long long)scsi_get_lba(cmnd),
-					 cmnd->request->nr_sectors);
+					 blk_rq_sectors(cmnd->request));
 		else if (cmnd->cmnd[0] == WRITE_10)
 			lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG,
 					 "9041 dbg: WRITE @ sector %llu, "
-					 "count %lu cmd=%p\n",
+					 "count %u cmd=%p\n",
 					 (unsigned long long)scsi_get_lba(cmnd),
-					 cmnd->request->nr_sectors, cmnd);
+					 blk_rq_sectors(cmnd->request), cmnd);
 		else
 			lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG,
 					 "9042 dbg: parser not implemented\n");
@@ -2427,7 +3073,7 @@
 	lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp);
 
 	atomic_inc(&ndlp->cmd_pending);
-	err = lpfc_sli_issue_iocb(phba, &phba->sli.ring[psli->fcp_ring],
+	err = lpfc_sli_issue_iocb(phba, LPFC_FCP_RING,
 				  &lpfc_cmd->cur_iocbq, SLI_IOCB_RET_IOCB);
 	if (err) {
 		atomic_dec(&ndlp->cmd_pending);
@@ -2490,7 +3136,6 @@
 	struct Scsi_Host  *shost = cmnd->device->host;
 	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
 	struct lpfc_hba   *phba = vport->phba;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[phba->sli.fcp_ring];
 	struct lpfc_iocbq *iocb;
 	struct lpfc_iocbq *abtsiocb;
 	struct lpfc_scsi_buf *lpfc_cmd;
@@ -2531,7 +3176,10 @@
 	icmd = &abtsiocb->iocb;
 	icmd->un.acxri.abortType = ABORT_TYPE_ABTS;
 	icmd->un.acxri.abortContextTag = cmd->ulpContext;
-	icmd->un.acxri.abortIoTag = cmd->ulpIoTag;
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		icmd->un.acxri.abortIoTag = iocb->sli4_xritag;
+	else
+		icmd->un.acxri.abortIoTag = cmd->ulpIoTag;
 
 	icmd->ulpLe = 1;
 	icmd->ulpClass = cmd->ulpClass;
@@ -2542,7 +3190,8 @@
 
 	abtsiocb->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
 	abtsiocb->vport = vport;
-	if (lpfc_sli_issue_iocb(phba, pring, abtsiocb, 0) == IOCB_ERROR) {
+	if (lpfc_sli_issue_iocb(phba, LPFC_FCP_RING, abtsiocb, 0) ==
+	    IOCB_ERROR) {
 		lpfc_sli_release_iocbq(phba, abtsiocb);
 		ret = FAILED;
 		goto out;
@@ -2668,8 +3317,7 @@
 			 "0703 Issue target reset to TGT %d LUN %d "
 			 "rpi x%x nlp_flag x%x\n", cmnd->device->id,
 			 cmnd->device->lun, pnode->nlp_rpi, pnode->nlp_flag);
-	status = lpfc_sli_issue_iocb_wait(phba,
-					  &phba->sli.ring[phba->sli.fcp_ring],
+	status = lpfc_sli_issue_iocb_wait(phba, LPFC_FCP_RING,
 					  iocbq, iocbqrsp, lpfc_cmd->timeout);
 	if (status == IOCB_TIMEDOUT) {
 		iocbq->iocb_cmpl = lpfc_tskmgmt_def_cmpl;
@@ -2825,11 +3473,10 @@
 {
 	struct lpfc_vport *vport = (struct lpfc_vport *) sdev->host->hostdata;
 	struct lpfc_hba   *phba = vport->phba;
-	struct lpfc_scsi_buf *scsi_buf = NULL;
 	struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
-	uint32_t total = 0, i;
+	uint32_t total = 0;
 	uint32_t num_to_alloc = 0;
-	unsigned long flags;
+	int num_allocated = 0;
 
 	if (!rport || fc_remote_port_chkready(rport))
 		return -ENXIO;
@@ -2863,20 +3510,13 @@
 				 (phba->cfg_hba_queue_depth - total));
 		num_to_alloc = phba->cfg_hba_queue_depth - total;
 	}
-
-	for (i = 0; i < num_to_alloc; i++) {
-		scsi_buf = lpfc_new_scsi_buf(vport);
-		if (!scsi_buf) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP,
-					 "0706 Failed to allocate "
-					 "command buffer\n");
-			break;
-		}
-
-		spin_lock_irqsave(&phba->scsi_buf_list_lock, flags);
-		phba->total_scsi_bufs++;
-		list_add_tail(&scsi_buf->list, &phba->lpfc_scsi_buf_list);
-		spin_unlock_irqrestore(&phba->scsi_buf_list_lock, flags);
+	num_allocated = lpfc_new_scsi_buf(vport, num_to_alloc);
+	if (num_to_alloc != num_allocated) {
+			lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
+				 "0708 Allocation request of %d "
+				 "command buffers did not succeed.  "
+				 "Allocated %d buffers.\n",
+				 num_to_alloc, num_allocated);
 	}
 	return 0;
 }
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index c7c440d..65dfc8b 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -140,6 +140,8 @@
 	struct fcp_rsp *fcp_rsp;
 	struct ulp_bde64 *fcp_bpl;
 
+	dma_addr_t dma_phys_bpl;
+
 	/* cur_iocbq has phys of the dma-able buffer.
 	 * Iotag is in here
 	 */
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index eb5c75c..ff04daf 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -29,9 +29,12 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
+#include <scsi/fc/fc_fs.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -40,24 +43,7 @@
 #include "lpfc_logmsg.h"
 #include "lpfc_compat.h"
 #include "lpfc_debugfs.h"
-
-/*
- * Define macro to log: Mailbox command x%x cannot issue Data
- * This allows multiple uses of lpfc_msgBlk0311
- * w/o perturbing log msg utility.
- */
-#define LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag) \
-			lpfc_printf_log(phba, \
-				KERN_INFO, \
-				LOG_MBOX | LOG_SLI, \
-				"(%d):0311 Mailbox command x%x cannot " \
-				"issue Data: x%x x%x x%x\n", \
-				pmbox->vport ? pmbox->vport->vpi : 0, \
-				pmbox->mb.mbxCommand,		\
-				phba->pport->port_state,	\
-				psli->sli_flag,	\
-				flag)
-
+#include "lpfc_vport.h"
 
 /* There are only four IOCB completion types. */
 typedef enum _lpfc_iocb_type {
@@ -67,6 +53,350 @@
 	LPFC_ABORT_IOCB
 } lpfc_iocb_type;
 
+
+/* Provide function prototypes local to this module. */
+static int lpfc_sli_issue_mbox_s4(struct lpfc_hba *, LPFC_MBOXQ_t *,
+				  uint32_t);
+static int lpfc_sli4_read_rev(struct lpfc_hba *, LPFC_MBOXQ_t *,
+			    uint8_t *, uint32_t *);
+
+static IOCB_t *
+lpfc_get_iocb_from_iocbq(struct lpfc_iocbq *iocbq)
+{
+	return &iocbq->iocb;
+}
+
+/**
+ * lpfc_sli4_wq_put - Put a Work Queue Entry on an Work Queue
+ * @q: The Work Queue to operate on.
+ * @wqe: The work Queue Entry to put on the Work queue.
+ *
+ * This routine will copy the contents of @wqe to the next available entry on
+ * the @q. This function will then ring the Work Queue Doorbell to signal the
+ * HBA to start processing the Work Queue Entry. This function returns 0 if
+ * successful. If no entries are available on @q then this function will return
+ * -ENOMEM.
+ * The caller is expected to hold the hbalock when calling this routine.
+ **/
+static uint32_t
+lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe *wqe)
+{
+	union lpfc_wqe *temp_wqe = q->qe[q->host_index].wqe;
+	struct lpfc_register doorbell;
+	uint32_t host_index;
+
+	/* If the host has not yet processed the next entry then we are done */
+	if (((q->host_index + 1) % q->entry_count) == q->hba_index)
+		return -ENOMEM;
+	/* set consumption flag every once in a while */
+	if (!((q->host_index + 1) % LPFC_RELEASE_NOTIFICATION_INTERVAL))
+		bf_set(lpfc_wqe_gen_wqec, &wqe->generic, 1);
+
+	lpfc_sli_pcimem_bcopy(wqe, temp_wqe, q->entry_size);
+
+	/* Update the host index before invoking device */
+	host_index = q->host_index;
+	q->host_index = ((q->host_index + 1) % q->entry_count);
+
+	/* Ring Doorbell */
+	doorbell.word0 = 0;
+	bf_set(lpfc_wq_doorbell_num_posted, &doorbell, 1);
+	bf_set(lpfc_wq_doorbell_index, &doorbell, host_index);
+	bf_set(lpfc_wq_doorbell_id, &doorbell, q->queue_id);
+	writel(doorbell.word0, q->phba->sli4_hba.WQDBregaddr);
+	readl(q->phba->sli4_hba.WQDBregaddr); /* Flush */
+
+	return 0;
+}
+
+/**
+ * lpfc_sli4_wq_release - Updates internal hba index for WQ
+ * @q: The Work Queue to operate on.
+ * @index: The index to advance the hba index to.
+ *
+ * This routine will update the HBA index of a queue to reflect consumption of
+ * Work Queue Entries by the HBA. When the HBA indicates that it has consumed
+ * an entry the host calls this function to update the queue's internal
+ * pointers. This routine returns the number of entries that were consumed by
+ * the HBA.
+ **/
+static uint32_t
+lpfc_sli4_wq_release(struct lpfc_queue *q, uint32_t index)
+{
+	uint32_t released = 0;
+
+	if (q->hba_index == index)
+		return 0;
+	do {
+		q->hba_index = ((q->hba_index + 1) % q->entry_count);
+		released++;
+	} while (q->hba_index != index);
+	return released;
+}
+
+/**
+ * lpfc_sli4_mq_put - Put a Mailbox Queue Entry on an Mailbox Queue
+ * @q: The Mailbox Queue to operate on.
+ * @wqe: The Mailbox Queue Entry to put on the Work queue.
+ *
+ * This routine will copy the contents of @mqe to the next available entry on
+ * the @q. This function will then ring the Work Queue Doorbell to signal the
+ * HBA to start processing the Work Queue Entry. This function returns 0 if
+ * successful. If no entries are available on @q then this function will return
+ * -ENOMEM.
+ * The caller is expected to hold the hbalock when calling this routine.
+ **/
+static uint32_t
+lpfc_sli4_mq_put(struct lpfc_queue *q, struct lpfc_mqe *mqe)
+{
+	struct lpfc_mqe *temp_mqe = q->qe[q->host_index].mqe;
+	struct lpfc_register doorbell;
+	uint32_t host_index;
+
+	/* If the host has not yet processed the next entry then we are done */
+	if (((q->host_index + 1) % q->entry_count) == q->hba_index)
+		return -ENOMEM;
+	lpfc_sli_pcimem_bcopy(mqe, temp_mqe, q->entry_size);
+	/* Save off the mailbox pointer for completion */
+	q->phba->mbox = (MAILBOX_t *)temp_mqe;
+
+	/* Update the host index before invoking device */
+	host_index = q->host_index;
+	q->host_index = ((q->host_index + 1) % q->entry_count);
+
+	/* Ring Doorbell */
+	doorbell.word0 = 0;
+	bf_set(lpfc_mq_doorbell_num_posted, &doorbell, 1);
+	bf_set(lpfc_mq_doorbell_id, &doorbell, q->queue_id);
+	writel(doorbell.word0, q->phba->sli4_hba.MQDBregaddr);
+	readl(q->phba->sli4_hba.MQDBregaddr); /* Flush */
+	return 0;
+}
+
+/**
+ * lpfc_sli4_mq_release - Updates internal hba index for MQ
+ * @q: The Mailbox Queue to operate on.
+ *
+ * This routine will update the HBA index of a queue to reflect consumption of
+ * a Mailbox Queue Entry by the HBA. When the HBA indicates that it has consumed
+ * an entry the host calls this function to update the queue's internal
+ * pointers. This routine returns the number of entries that were consumed by
+ * the HBA.
+ **/
+static uint32_t
+lpfc_sli4_mq_release(struct lpfc_queue *q)
+{
+	/* Clear the mailbox pointer for completion */
+	q->phba->mbox = NULL;
+	q->hba_index = ((q->hba_index + 1) % q->entry_count);
+	return 1;
+}
+
+/**
+ * lpfc_sli4_eq_get - Gets the next valid EQE from a EQ
+ * @q: The Event Queue to get the first valid EQE from
+ *
+ * This routine will get the first valid Event Queue Entry from @q, update
+ * the queue's internal hba index, and return the EQE. If no valid EQEs are in
+ * the Queue (no more work to do), or the Queue is full of EQEs that have been
+ * processed, but not popped back to the HBA then this routine will return NULL.
+ **/
+static struct lpfc_eqe *
+lpfc_sli4_eq_get(struct lpfc_queue *q)
+{
+	struct lpfc_eqe *eqe = q->qe[q->hba_index].eqe;
+
+	/* If the next EQE is not valid then we are done */
+	if (!bf_get(lpfc_eqe_valid, eqe))
+		return NULL;
+	/* If the host has not yet processed the next entry then we are done */
+	if (((q->hba_index + 1) % q->entry_count) == q->host_index)
+		return NULL;
+
+	q->hba_index = ((q->hba_index + 1) % q->entry_count);
+	return eqe;
+}
+
+/**
+ * lpfc_sli4_eq_release - Indicates the host has finished processing an EQ
+ * @q: The Event Queue that the host has completed processing for.
+ * @arm: Indicates whether the host wants to arms this CQ.
+ *
+ * This routine will mark all Event Queue Entries on @q, from the last
+ * known completed entry to the last entry that was processed, as completed
+ * by clearing the valid bit for each completion queue entry. Then it will
+ * notify the HBA, by ringing the doorbell, that the EQEs have been processed.
+ * The internal host index in the @q will be updated by this routine to indicate
+ * that the host has finished processing the entries. The @arm parameter
+ * indicates that the queue should be rearmed when ringing the doorbell.
+ *
+ * This function will return the number of EQEs that were popped.
+ **/
+uint32_t
+lpfc_sli4_eq_release(struct lpfc_queue *q, bool arm)
+{
+	uint32_t released = 0;
+	struct lpfc_eqe *temp_eqe;
+	struct lpfc_register doorbell;
+
+	/* while there are valid entries */
+	while (q->hba_index != q->host_index) {
+		temp_eqe = q->qe[q->host_index].eqe;
+		bf_set(lpfc_eqe_valid, temp_eqe, 0);
+		released++;
+		q->host_index = ((q->host_index + 1) % q->entry_count);
+	}
+	if (unlikely(released == 0 && !arm))
+		return 0;
+
+	/* ring doorbell for number popped */
+	doorbell.word0 = 0;
+	if (arm) {
+		bf_set(lpfc_eqcq_doorbell_arm, &doorbell, 1);
+		bf_set(lpfc_eqcq_doorbell_eqci, &doorbell, 1);
+	}
+	bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, released);
+	bf_set(lpfc_eqcq_doorbell_qt, &doorbell, LPFC_QUEUE_TYPE_EVENT);
+	bf_set(lpfc_eqcq_doorbell_eqid, &doorbell, q->queue_id);
+	writel(doorbell.word0, q->phba->sli4_hba.EQCQDBregaddr);
+	return released;
+}
+
+/**
+ * lpfc_sli4_cq_get - Gets the next valid CQE from a CQ
+ * @q: The Completion Queue to get the first valid CQE from
+ *
+ * This routine will get the first valid Completion Queue Entry from @q, update
+ * the queue's internal hba index, and return the CQE. If no valid CQEs are in
+ * the Queue (no more work to do), or the Queue is full of CQEs that have been
+ * processed, but not popped back to the HBA then this routine will return NULL.
+ **/
+static struct lpfc_cqe *
+lpfc_sli4_cq_get(struct lpfc_queue *q)
+{
+	struct lpfc_cqe *cqe;
+
+	/* If the next CQE is not valid then we are done */
+	if (!bf_get(lpfc_cqe_valid, q->qe[q->hba_index].cqe))
+		return NULL;
+	/* If the host has not yet processed the next entry then we are done */
+	if (((q->hba_index + 1) % q->entry_count) == q->host_index)
+		return NULL;
+
+	cqe = q->qe[q->hba_index].cqe;
+	q->hba_index = ((q->hba_index + 1) % q->entry_count);
+	return cqe;
+}
+
+/**
+ * lpfc_sli4_cq_release - Indicates the host has finished processing a CQ
+ * @q: The Completion Queue that the host has completed processing for.
+ * @arm: Indicates whether the host wants to arms this CQ.
+ *
+ * This routine will mark all Completion queue entries on @q, from the last
+ * known completed entry to the last entry that was processed, as completed
+ * by clearing the valid bit for each completion queue entry. Then it will
+ * notify the HBA, by ringing the doorbell, that the CQEs have been processed.
+ * The internal host index in the @q will be updated by this routine to indicate
+ * that the host has finished processing the entries. The @arm parameter
+ * indicates that the queue should be rearmed when ringing the doorbell.
+ *
+ * This function will return the number of CQEs that were released.
+ **/
+uint32_t
+lpfc_sli4_cq_release(struct lpfc_queue *q, bool arm)
+{
+	uint32_t released = 0;
+	struct lpfc_cqe *temp_qe;
+	struct lpfc_register doorbell;
+
+	/* while there are valid entries */
+	while (q->hba_index != q->host_index) {
+		temp_qe = q->qe[q->host_index].cqe;
+		bf_set(lpfc_cqe_valid, temp_qe, 0);
+		released++;
+		q->host_index = ((q->host_index + 1) % q->entry_count);
+	}
+	if (unlikely(released == 0 && !arm))
+		return 0;
+
+	/* ring doorbell for number popped */
+	doorbell.word0 = 0;
+	if (arm)
+		bf_set(lpfc_eqcq_doorbell_arm, &doorbell, 1);
+	bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, released);
+	bf_set(lpfc_eqcq_doorbell_qt, &doorbell, LPFC_QUEUE_TYPE_COMPLETION);
+	bf_set(lpfc_eqcq_doorbell_cqid, &doorbell, q->queue_id);
+	writel(doorbell.word0, q->phba->sli4_hba.EQCQDBregaddr);
+	return released;
+}
+
+/**
+ * lpfc_sli4_rq_put - Put a Receive Buffer Queue Entry on a Receive Queue
+ * @q: The Header Receive Queue to operate on.
+ * @wqe: The Receive Queue Entry to put on the Receive queue.
+ *
+ * This routine will copy the contents of @wqe to the next available entry on
+ * the @q. This function will then ring the Receive Queue Doorbell to signal the
+ * HBA to start processing the Receive Queue Entry. This function returns the
+ * index that the rqe was copied to if successful. If no entries are available
+ * on @q then this function will return -ENOMEM.
+ * The caller is expected to hold the hbalock when calling this routine.
+ **/
+static int
+lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
+		 struct lpfc_rqe *hrqe, struct lpfc_rqe *drqe)
+{
+	struct lpfc_rqe *temp_hrqe = hq->qe[hq->host_index].rqe;
+	struct lpfc_rqe *temp_drqe = dq->qe[dq->host_index].rqe;
+	struct lpfc_register doorbell;
+	int put_index = hq->host_index;
+
+	if (hq->type != LPFC_HRQ || dq->type != LPFC_DRQ)
+		return -EINVAL;
+	if (hq->host_index != dq->host_index)
+		return -EINVAL;
+	/* If the host has not yet processed the next entry then we are done */
+	if (((hq->host_index + 1) % hq->entry_count) == hq->hba_index)
+		return -EBUSY;
+	lpfc_sli_pcimem_bcopy(hrqe, temp_hrqe, hq->entry_size);
+	lpfc_sli_pcimem_bcopy(drqe, temp_drqe, dq->entry_size);
+
+	/* Update the host index to point to the next slot */
+	hq->host_index = ((hq->host_index + 1) % hq->entry_count);
+	dq->host_index = ((dq->host_index + 1) % dq->entry_count);
+
+	/* Ring The Header Receive Queue Doorbell */
+	if (!(hq->host_index % LPFC_RQ_POST_BATCH)) {
+		doorbell.word0 = 0;
+		bf_set(lpfc_rq_doorbell_num_posted, &doorbell,
+		       LPFC_RQ_POST_BATCH);
+		bf_set(lpfc_rq_doorbell_id, &doorbell, hq->queue_id);
+		writel(doorbell.word0, hq->phba->sli4_hba.RQDBregaddr);
+	}
+	return put_index;
+}
+
+/**
+ * lpfc_sli4_rq_release - Updates internal hba index for RQ
+ * @q: The Header Receive Queue to operate on.
+ *
+ * This routine will update the HBA index of a queue to reflect consumption of
+ * one Receive Queue Entry by the HBA. When the HBA indicates that it has
+ * consumed an entry the host calls this function to update the queue's
+ * internal pointers. This routine returns the number of entries that were
+ * consumed by the HBA.
+ **/
+static uint32_t
+lpfc_sli4_rq_release(struct lpfc_queue *hq, struct lpfc_queue *dq)
+{
+	if ((hq->type != LPFC_HRQ) || (dq->type != LPFC_DRQ))
+		return 0;
+	hq->hba_index = ((hq->hba_index + 1) % hq->entry_count);
+	dq->hba_index = ((dq->hba_index + 1) % dq->entry_count);
+	return 1;
+}
+
 /**
  * lpfc_cmd_iocb - Get next command iocb entry in the ring
  * @phba: Pointer to HBA context object.
@@ -121,6 +451,76 @@
 }
 
 /**
+ * __lpfc_clear_active_sglq - Remove the active sglq for this XRI.
+ * @phba: Pointer to HBA context object.
+ * @xritag: XRI value.
+ *
+ * This function clears the sglq pointer from the array of acive
+ * sglq's. The xritag that is passed in is used to index into the
+ * array. Before the xritag can be used it needs to be adjusted
+ * by subtracting the xribase.
+ *
+ * Returns sglq ponter = success, NULL = Failure.
+ **/
+static struct lpfc_sglq *
+__lpfc_clear_active_sglq(struct lpfc_hba *phba, uint16_t xritag)
+{
+	uint16_t adj_xri;
+	struct lpfc_sglq *sglq;
+	adj_xri = xritag - phba->sli4_hba.max_cfg_param.xri_base;
+	if (adj_xri > phba->sli4_hba.max_cfg_param.max_xri)
+		return NULL;
+	sglq = phba->sli4_hba.lpfc_sglq_active_list[adj_xri];
+	phba->sli4_hba.lpfc_sglq_active_list[adj_xri] = NULL;
+	return sglq;
+}
+
+/**
+ * __lpfc_get_active_sglq - Get the active sglq for this XRI.
+ * @phba: Pointer to HBA context object.
+ * @xritag: XRI value.
+ *
+ * This function returns the sglq pointer from the array of acive
+ * sglq's. The xritag that is passed in is used to index into the
+ * array. Before the xritag can be used it needs to be adjusted
+ * by subtracting the xribase.
+ *
+ * Returns sglq ponter = success, NULL = Failure.
+ **/
+static struct lpfc_sglq *
+__lpfc_get_active_sglq(struct lpfc_hba *phba, uint16_t xritag)
+{
+	uint16_t adj_xri;
+	struct lpfc_sglq *sglq;
+	adj_xri = xritag - phba->sli4_hba.max_cfg_param.xri_base;
+	if (adj_xri > phba->sli4_hba.max_cfg_param.max_xri)
+		return NULL;
+	sglq =  phba->sli4_hba.lpfc_sglq_active_list[adj_xri];
+	return sglq;
+}
+
+/**
+ * __lpfc_sli_get_sglq - Allocates an iocb object from sgl pool
+ * @phba: Pointer to HBA context object.
+ *
+ * This function is called with hbalock held. This function
+ * Gets a new driver sglq object from the sglq list. If the
+ * list is not empty then it is successful, it returns pointer to the newly
+ * allocated sglq object else it returns NULL.
+ **/
+static struct lpfc_sglq *
+__lpfc_sli_get_sglq(struct lpfc_hba *phba)
+{
+	struct list_head *lpfc_sgl_list = &phba->sli4_hba.lpfc_sgl_list;
+	struct lpfc_sglq *sglq = NULL;
+	uint16_t adj_xri;
+	list_remove_head(lpfc_sgl_list, sglq, struct lpfc_sglq, list);
+	adj_xri = sglq->sli4_xritag - phba->sli4_hba.max_cfg_param.xri_base;
+	phba->sli4_hba.lpfc_sglq_active_list[adj_xri] = sglq;
+	return sglq;
+}
+
+/**
  * lpfc_sli_get_iocbq - Allocates an iocb object from iocb pool
  * @phba: Pointer to HBA context object.
  *
@@ -142,6 +542,82 @@
 }
 
 /**
+ * __lpfc_sli_release_iocbq_s4 - Release iocb to the iocb pool
+ * @phba: Pointer to HBA context object.
+ * @iocbq: Pointer to driver iocb object.
+ *
+ * This function is called with hbalock held to release driver
+ * iocb object to the iocb pool. The iotag in the iocb object
+ * does not change for each use of the iocb object. This function
+ * clears all other fields of the iocb object when it is freed.
+ * The sqlq structure that holds the xritag and phys and virtual
+ * mappings for the scatter gather list is retrieved from the
+ * active array of sglq. The get of the sglq pointer also clears
+ * the entry in the array. If the status of the IO indiactes that
+ * this IO was aborted then the sglq entry it put on the
+ * lpfc_abts_els_sgl_list until the CQ_ABORTED_XRI is received. If the
+ * IO has good status or fails for any other reason then the sglq
+ * entry is added to the free list (lpfc_sgl_list).
+ **/
+static void
+__lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
+{
+	struct lpfc_sglq *sglq;
+	size_t start_clean = offsetof(struct lpfc_iocbq, iocb);
+	unsigned long iflag;
+
+	if (iocbq->sli4_xritag == NO_XRI)
+		sglq = NULL;
+	else
+		sglq = __lpfc_clear_active_sglq(phba, iocbq->sli4_xritag);
+	if (sglq)  {
+		if (iocbq->iocb_flag & LPFC_DRIVER_ABORTED
+			|| ((iocbq->iocb.ulpStatus == IOSTAT_LOCAL_REJECT)
+			&& (iocbq->iocb.un.ulpWord[4]
+				== IOERR_SLI_ABORTED))) {
+			spin_lock_irqsave(&phba->sli4_hba.abts_sgl_list_lock,
+					iflag);
+			list_add(&sglq->list,
+				&phba->sli4_hba.lpfc_abts_els_sgl_list);
+			spin_unlock_irqrestore(
+				&phba->sli4_hba.abts_sgl_list_lock, iflag);
+		} else
+			list_add(&sglq->list, &phba->sli4_hba.lpfc_sgl_list);
+	}
+
+
+	/*
+	 * Clean all volatile data fields, preserve iotag and node struct.
+	 */
+	memset((char *)iocbq + start_clean, 0, sizeof(*iocbq) - start_clean);
+	iocbq->sli4_xritag = NO_XRI;
+	list_add_tail(&iocbq->list, &phba->lpfc_iocb_list);
+}
+
+/**
+ * __lpfc_sli_release_iocbq_s3 - Release iocb to the iocb pool
+ * @phba: Pointer to HBA context object.
+ * @iocbq: Pointer to driver iocb object.
+ *
+ * This function is called with hbalock held to release driver
+ * iocb object to the iocb pool. The iotag in the iocb object
+ * does not change for each use of the iocb object. This function
+ * clears all other fields of the iocb object when it is freed.
+ **/
+static void
+__lpfc_sli_release_iocbq_s3(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
+{
+	size_t start_clean = offsetof(struct lpfc_iocbq, iocb);
+
+	/*
+	 * Clean all volatile data fields, preserve iotag and node struct.
+	 */
+	memset((char*)iocbq + start_clean, 0, sizeof(*iocbq) - start_clean);
+	iocbq->sli4_xritag = NO_XRI;
+	list_add_tail(&iocbq->list, &phba->lpfc_iocb_list);
+}
+
+/**
  * __lpfc_sli_release_iocbq - Release iocb to the iocb pool
  * @phba: Pointer to HBA context object.
  * @iocbq: Pointer to driver iocb object.
@@ -154,13 +630,7 @@
 static void
 __lpfc_sli_release_iocbq(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
 {
-	size_t start_clean = offsetof(struct lpfc_iocbq, iocb);
-
-	/*
-	 * Clean all volatile data fields, preserve iotag and node struct.
-	 */
-	memset((char*)iocbq + start_clean, 0, sizeof(*iocbq) - start_clean);
-	list_add_tail(&iocbq->list, &phba->lpfc_iocb_list);
+	phba->__lpfc_sli_release_iocbq(phba, iocbq);
 }
 
 /**
@@ -281,6 +751,14 @@
 	case CMD_GEN_REQUEST64_CR:
 	case CMD_GEN_REQUEST64_CX:
 	case CMD_XMIT_ELS_RSP64_CX:
+	case DSSCMD_IWRITE64_CR:
+	case DSSCMD_IWRITE64_CX:
+	case DSSCMD_IREAD64_CR:
+	case DSSCMD_IREAD64_CX:
+	case DSSCMD_INVALIDATE_DEK:
+	case DSSCMD_SET_KEK:
+	case DSSCMD_GET_KEK_ID:
+	case DSSCMD_GEN_XFER:
 		type = LPFC_SOL_IOCB;
 		break;
 	case CMD_ABORT_XRI_CN:
@@ -348,7 +826,7 @@
 	pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 	if (!pmb)
 		return -ENOMEM;
-	pmbox = &pmb->mb;
+	pmbox = &pmb->u.mb;
 	phba->link_state = LPFC_INIT_MBX_CMDS;
 	for (i = 0; i < psli->num_rings; i++) {
 		lpfc_config_ring(phba, i, pmb);
@@ -779,8 +1257,8 @@
 		phba->hbqs[i].buffer_count = 0;
 	}
 	/* Return all HBQ buffer that are in-fly */
-	list_for_each_entry_safe(dmabuf, next_dmabuf,
-			&phba->hbqbuf_in_list, list) {
+	list_for_each_entry_safe(dmabuf, next_dmabuf, &phba->rb_pend_list,
+				 list) {
 		hbq_buf = container_of(dmabuf, struct hbq_dmabuf, dbuf);
 		list_del(&hbq_buf->dbuf.list);
 		if (hbq_buf->tag == -1) {
@@ -814,10 +1292,28 @@
  * pointer to the hbq entry if it successfully post the buffer
  * else it will return NULL.
  **/
-static struct lpfc_hbq_entry *
+static int
 lpfc_sli_hbq_to_firmware(struct lpfc_hba *phba, uint32_t hbqno,
 			 struct hbq_dmabuf *hbq_buf)
 {
+	return phba->lpfc_sli_hbq_to_firmware(phba, hbqno, hbq_buf);
+}
+
+/**
+ * lpfc_sli_hbq_to_firmware_s3 - Post the hbq buffer to SLI3 firmware
+ * @phba: Pointer to HBA context object.
+ * @hbqno: HBQ number.
+ * @hbq_buf: Pointer to HBQ buffer.
+ *
+ * This function is called with the hbalock held to post a hbq buffer to the
+ * firmware. If the function finds an empty slot in the HBQ, it will post the
+ * buffer and place it on the hbq_buffer_list. The function will return zero if
+ * it successfully post the buffer else it will return an error.
+ **/
+static int
+lpfc_sli_hbq_to_firmware_s3(struct lpfc_hba *phba, uint32_t hbqno,
+			    struct hbq_dmabuf *hbq_buf)
+{
 	struct lpfc_hbq_entry *hbqe;
 	dma_addr_t physaddr = hbq_buf->dbuf.phys;
 
@@ -838,8 +1334,40 @@
 				/* flush */
 		readl(phba->hbq_put + hbqno);
 		list_add_tail(&hbq_buf->dbuf.list, &hbqp->hbq_buffer_list);
-	}
-	return hbqe;
+		return 0;
+	} else
+		return -ENOMEM;
+}
+
+/**
+ * lpfc_sli_hbq_to_firmware_s4 - Post the hbq buffer to SLI4 firmware
+ * @phba: Pointer to HBA context object.
+ * @hbqno: HBQ number.
+ * @hbq_buf: Pointer to HBQ buffer.
+ *
+ * This function is called with the hbalock held to post an RQE to the SLI4
+ * firmware. If able to post the RQE to the RQ it will queue the hbq entry to
+ * the hbq_buffer_list and return zero, otherwise it will return an error.
+ **/
+static int
+lpfc_sli_hbq_to_firmware_s4(struct lpfc_hba *phba, uint32_t hbqno,
+			    struct hbq_dmabuf *hbq_buf)
+{
+	int rc;
+	struct lpfc_rqe hrqe;
+	struct lpfc_rqe drqe;
+
+	hrqe.address_lo = putPaddrLow(hbq_buf->hbuf.phys);
+	hrqe.address_hi = putPaddrHigh(hbq_buf->hbuf.phys);
+	drqe.address_lo = putPaddrLow(hbq_buf->dbuf.phys);
+	drqe.address_hi = putPaddrHigh(hbq_buf->dbuf.phys);
+	rc = lpfc_sli4_rq_put(phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq,
+			      &hrqe, &drqe);
+	if (rc < 0)
+		return rc;
+	hbq_buf->tag = rc;
+	list_add_tail(&hbq_buf->dbuf.list, &phba->hbqs[hbqno].hbq_buffer_list);
+	return 0;
 }
 
 /* HBQ for ELS and CT traffic. */
@@ -914,7 +1442,7 @@
 				 dbuf.list);
 		hbq_buffer->tag = (phba->hbqs[hbqno].buffer_count |
 				      (hbqno << 16));
-		if (lpfc_sli_hbq_to_firmware(phba, hbqno, hbq_buffer)) {
+		if (!lpfc_sli_hbq_to_firmware(phba, hbqno, hbq_buffer)) {
 			phba->hbqs[hbqno].buffer_count++;
 			posted++;
 		} else
@@ -965,6 +1493,25 @@
 }
 
 /**
+ * lpfc_sli_hbqbuf_get - Remove the first hbq off of an hbq list
+ * @phba: Pointer to HBA context object.
+ * @hbqno: HBQ number.
+ *
+ * This function removes the first hbq buffer on an hbq list and returns a
+ * pointer to that buffer. If it finds no buffers on the list it returns NULL.
+ **/
+static struct hbq_dmabuf *
+lpfc_sli_hbqbuf_get(struct list_head *rb_list)
+{
+	struct lpfc_dmabuf *d_buf;
+
+	list_remove_head(rb_list, d_buf, struct lpfc_dmabuf, list);
+	if (!d_buf)
+		return NULL;
+	return container_of(d_buf, struct hbq_dmabuf, dbuf);
+}
+
+/**
  * lpfc_sli_hbqbuf_find - Find the hbq buffer associated with a tag
  * @phba: Pointer to HBA context object.
  * @tag: Tag of the hbq buffer.
@@ -985,12 +1532,15 @@
 	if (hbqno >= LPFC_MAX_HBQS)
 		return NULL;
 
+	spin_lock_irq(&phba->hbalock);
 	list_for_each_entry(d_buf, &phba->hbqs[hbqno].hbq_buffer_list, list) {
 		hbq_buf = container_of(d_buf, struct hbq_dmabuf, dbuf);
 		if (hbq_buf->tag == tag) {
+			spin_unlock_irq(&phba->hbalock);
 			return hbq_buf;
 		}
 	}
+	spin_unlock_irq(&phba->hbalock);
 	lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_VPORT,
 			"1803 Bad hbq tag. Data: x%x x%x\n",
 			tag, phba->hbqs[tag >> 16].buffer_count);
@@ -1013,9 +1563,8 @@
 
 	if (hbq_buffer) {
 		hbqno = hbq_buffer->tag >> 16;
-		if (!lpfc_sli_hbq_to_firmware(phba, hbqno, hbq_buffer)) {
+		if (lpfc_sli_hbq_to_firmware(phba, hbqno, hbq_buffer))
 			(phba->hbqs[hbqno].hbq_free_buffer)(phba, hbq_buffer);
-		}
 	}
 }
 
@@ -1086,6 +1635,15 @@
 	case MBX_HEARTBEAT:
 	case MBX_PORT_CAPABILITIES:
 	case MBX_PORT_IOV_CONTROL:
+	case MBX_SLI4_CONFIG:
+	case MBX_SLI4_REQ_FTRS:
+	case MBX_REG_FCFI:
+	case MBX_UNREG_FCFI:
+	case MBX_REG_VFI:
+	case MBX_UNREG_VFI:
+	case MBX_INIT_VPI:
+	case MBX_INIT_VFI:
+	case MBX_RESUME_RPI:
 		ret = mbxCommand;
 		break;
 	default:
@@ -1106,7 +1664,7 @@
  * will wake up thread waiting on the wait queue pointed by context1
  * of the mailbox.
  **/
-static void
+void
 lpfc_sli_wake_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
 {
 	wait_queue_head_t *pdone_q;
@@ -1140,7 +1698,7 @@
 lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
 	struct lpfc_dmabuf *mp;
-	uint16_t rpi;
+	uint16_t rpi, vpi;
 	int rc;
 
 	mp = (struct lpfc_dmabuf *) (pmb->context1);
@@ -1150,24 +1708,30 @@
 		kfree(mp);
 	}
 
+	if ((pmb->u.mb.mbxCommand == MBX_UNREG_LOGIN) &&
+	    (phba->sli_rev == LPFC_SLI_REV4))
+		lpfc_sli4_free_rpi(phba, pmb->u.mb.un.varUnregLogin.rpi);
+
 	/*
 	 * If a REG_LOGIN succeeded  after node is destroyed or node
 	 * is in re-discovery driver need to cleanup the RPI.
 	 */
 	if (!(phba->pport->load_flag & FC_UNLOADING) &&
-	    pmb->mb.mbxCommand == MBX_REG_LOGIN64 &&
-	    !pmb->mb.mbxStatus) {
-
-		rpi = pmb->mb.un.varWords[0];
-		lpfc_unreg_login(phba, pmb->mb.un.varRegLogin.vpi, rpi, pmb);
+	    pmb->u.mb.mbxCommand == MBX_REG_LOGIN64 &&
+	    !pmb->u.mb.mbxStatus) {
+		rpi = pmb->u.mb.un.varWords[0];
+		vpi = pmb->u.mb.un.varRegLogin.vpi - phba->vpi_base;
+		lpfc_unreg_login(phba, vpi, rpi, pmb);
 		pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
 		rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
 		if (rc != MBX_NOT_FINISHED)
 			return;
 	}
 
-	mempool_free(pmb, phba->mbox_mem_pool);
-	return;
+	if (bf_get(lpfc_mqe_command, &pmb->u.mqe) == MBX_SLI4_CONFIG)
+		lpfc_sli4_mbox_cmd_free(phba, pmb);
+	else
+		mempool_free(pmb, phba->mbox_mem_pool);
 }
 
 /**
@@ -1204,7 +1768,7 @@
 		if (pmb == NULL)
 			break;
 
-		pmbox = &pmb->mb;
+		pmbox = &pmb->u.mb;
 
 		if (pmbox->mbxCommand != MBX_HEARTBEAT) {
 			if (pmb->vport) {
@@ -1233,9 +1797,10 @@
 			/* Unknow mailbox command compl */
 			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
 					"(%d):0323 Unknown Mailbox command "
-					"%x Cmpl\n",
+					"x%x (x%x) Cmpl\n",
 					pmb->vport ? pmb->vport->vpi : 0,
-					pmbox->mbxCommand);
+					pmbox->mbxCommand,
+					lpfc_sli4_mbox_opcode_get(phba, pmb));
 			phba->link_state = LPFC_HBA_ERROR;
 			phba->work_hs = HS_FFER3;
 			lpfc_handle_eratt(phba);
@@ -1250,29 +1815,29 @@
 						LOG_MBOX | LOG_SLI,
 						"(%d):0305 Mbox cmd cmpl "
 						"error - RETRYing Data: x%x "
-						"x%x x%x x%x\n",
+						"(x%x) x%x x%x x%x\n",
 						pmb->vport ? pmb->vport->vpi :0,
 						pmbox->mbxCommand,
+						lpfc_sli4_mbox_opcode_get(phba,
+									  pmb),
 						pmbox->mbxStatus,
 						pmbox->un.varWords[0],
 						pmb->vport->port_state);
 				pmbox->mbxStatus = 0;
 				pmbox->mbxOwner = OWN_HOST;
-				spin_lock_irq(&phba->hbalock);
-				phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
-				spin_unlock_irq(&phba->hbalock);
 				rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
-				if (rc == MBX_SUCCESS)
+				if (rc != MBX_NOT_FINISHED)
 					continue;
 			}
 		}
 
 		/* Mailbox cmd <cmd> Cmpl <cmpl> */
 		lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
-				"(%d):0307 Mailbox cmd x%x Cmpl x%p "
+				"(%d):0307 Mailbox cmd x%x (x%x) Cmpl x%p "
 				"Data: x%x x%x x%x x%x x%x x%x x%x x%x x%x\n",
 				pmb->vport ? pmb->vport->vpi : 0,
 				pmbox->mbxCommand,
+				lpfc_sli4_mbox_opcode_get(phba, pmb),
 				pmb->mbox_cmpl,
 				*((uint32_t *) pmbox),
 				pmbox->un.varWords[0],
@@ -1317,6 +1882,45 @@
 	return &hbq_entry->dbuf;
 }
 
+/**
+ * lpfc_complete_unsol_iocb - Complete an unsolicited sequence
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ * @saveq: Pointer to the iocbq struct representing the sequence starting frame.
+ * @fch_r_ctl: the r_ctl for the first frame of the sequence.
+ * @fch_type: the type for the first frame of the sequence.
+ *
+ * This function is called with no lock held. This function uses the r_ctl and
+ * type of the received sequence to find the correct callback function to call
+ * to process the sequence.
+ **/
+static int
+lpfc_complete_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+			 struct lpfc_iocbq *saveq, uint32_t fch_r_ctl,
+			 uint32_t fch_type)
+{
+	int i;
+
+	/* unSolicited Responses */
+	if (pring->prt[0].profile) {
+		if (pring->prt[0].lpfc_sli_rcv_unsol_event)
+			(pring->prt[0].lpfc_sli_rcv_unsol_event) (phba, pring,
+									saveq);
+		return 1;
+	}
+	/* We must search, based on rctl / type
+	   for the right routine */
+	for (i = 0; i < pring->num_mask; i++) {
+		if ((pring->prt[i].rctl == fch_r_ctl) &&
+		    (pring->prt[i].type == fch_type)) {
+			if (pring->prt[i].lpfc_sli_rcv_unsol_event)
+				(pring->prt[i].lpfc_sli_rcv_unsol_event)
+						(phba, pring, saveq);
+			return 1;
+		}
+	}
+	return 0;
+}
 
 /**
  * lpfc_sli_process_unsol_iocb - Unsolicited iocb handler
@@ -1339,7 +1943,7 @@
 	IOCB_t           * irsp;
 	WORD5            * w5p;
 	uint32_t           Rctl, Type;
-	uint32_t           match, i;
+	uint32_t           match;
 	struct lpfc_iocbq *iocbq;
 	struct lpfc_dmabuf *dmzbuf;
 
@@ -1482,35 +2086,12 @@
 		}
 	}
 
-	/* unSolicited Responses */
-	if (pring->prt[0].profile) {
-		if (pring->prt[0].lpfc_sli_rcv_unsol_event)
-			(pring->prt[0].lpfc_sli_rcv_unsol_event) (phba, pring,
-									saveq);
-		match = 1;
-	} else {
-		/* We must search, based on rctl / type
-		   for the right routine */
-		for (i = 0; i < pring->num_mask; i++) {
-			if ((pring->prt[i].rctl == Rctl)
-			    && (pring->prt[i].type == Type)) {
-				if (pring->prt[i].lpfc_sli_rcv_unsol_event)
-					(pring->prt[i].lpfc_sli_rcv_unsol_event)
-							(phba, pring, saveq);
-				match = 1;
-				break;
-			}
-		}
-	}
-	if (match == 0) {
-		/* Unexpected Rctl / Type received */
-		/* Ring <ringno> handler: unexpected
-		   Rctl <Rctl> Type <Type> received */
+	if (!lpfc_complete_unsol_iocb(phba, pring, saveq, Rctl, Type))
 		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
 				"0313 Ring %d handler: unexpected Rctl x%x "
 				"Type x%x received\n",
 				pring->ringno, Rctl, Type);
-	}
+
 	return 1;
 }
 
@@ -1552,6 +2133,37 @@
 }
 
 /**
+ * lpfc_sli_iocbq_lookup_by_tag - Find command iocb for the iotag
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ * @iotag: IOCB tag.
+ *
+ * This function looks up the iocb_lookup table to get the command iocb
+ * corresponding to the given iotag. This function is called with the
+ * hbalock held.
+ * This function returns the command iocb object if it finds the command
+ * iocb else returns NULL.
+ **/
+static struct lpfc_iocbq *
+lpfc_sli_iocbq_lookup_by_tag(struct lpfc_hba *phba,
+			     struct lpfc_sli_ring *pring, uint16_t iotag)
+{
+	struct lpfc_iocbq *cmd_iocb;
+
+	if (iotag != 0 && iotag <= phba->sli.last_iotag) {
+		cmd_iocb = phba->sli.iocbq_lookup[iotag];
+		list_del_init(&cmd_iocb->list);
+		pring->txcmplq_cnt--;
+		return cmd_iocb;
+	}
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+			"0372 iotag x%x is out off range: max iotag (x%x)\n",
+			iotag, phba->sli.last_iotag);
+	return NULL;
+}
+
+/**
  * lpfc_sli_process_sol_iocb - process solicited iocb completion
  * @phba: Pointer to HBA context object.
  * @pring: Pointer to driver SLI ring object.
@@ -1954,7 +2566,7 @@
 			if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
 				(irsp->un.ulpWord[4] == IOERR_NO_RESOURCES)) {
 				spin_unlock_irqrestore(&phba->hbalock, iflag);
-				lpfc_rampdown_queue_depth(phba);
+				phba->lpfc_rampdown_queue_depth(phba);
 				spin_lock_irqsave(&phba->hbalock, iflag);
 			}
 
@@ -2068,39 +2680,215 @@
 }
 
 /**
- * lpfc_sli_handle_slow_ring_event - Handle ring events for non-FCP rings
+ * lpfc_sli_sp_handle_rspiocb - Handle slow-path response iocb
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ * @rspiocbp: Pointer to driver response IOCB object.
+ *
+ * This function is called from the worker thread when there is a slow-path
+ * response IOCB to process. This function chains all the response iocbs until
+ * seeing the iocb with the LE bit set. The function will call
+ * lpfc_sli_process_sol_iocb function if the response iocb indicates a
+ * completion of a command iocb. The function will call the
+ * lpfc_sli_process_unsol_iocb function if this is an unsolicited iocb.
+ * The function frees the resources or calls the completion handler if this
+ * iocb is an abort completion. The function returns NULL when the response
+ * iocb has the LE bit set and all the chained iocbs are processed, otherwise
+ * this function shall chain the iocb on to the iocb_continueq and return the
+ * response iocb passed in.
+ **/
+static struct lpfc_iocbq *
+lpfc_sli_sp_handle_rspiocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+			struct lpfc_iocbq *rspiocbp)
+{
+	struct lpfc_iocbq *saveq;
+	struct lpfc_iocbq *cmdiocbp;
+	struct lpfc_iocbq *next_iocb;
+	IOCB_t *irsp = NULL;
+	uint32_t free_saveq;
+	uint8_t iocb_cmd_type;
+	lpfc_iocb_type type;
+	unsigned long iflag;
+	int rc;
+
+	spin_lock_irqsave(&phba->hbalock, iflag);
+	/* First add the response iocb to the countinueq list */
+	list_add_tail(&rspiocbp->list, &(pring->iocb_continueq));
+	pring->iocb_continueq_cnt++;
+
+	/* Now, determine whetehr the list is completed for processing */
+	irsp = &rspiocbp->iocb;
+	if (irsp->ulpLe) {
+		/*
+		 * By default, the driver expects to free all resources
+		 * associated with this iocb completion.
+		 */
+		free_saveq = 1;
+		saveq = list_get_first(&pring->iocb_continueq,
+				       struct lpfc_iocbq, list);
+		irsp = &(saveq->iocb);
+		list_del_init(&pring->iocb_continueq);
+		pring->iocb_continueq_cnt = 0;
+
+		pring->stats.iocb_rsp++;
+
+		/*
+		 * If resource errors reported from HBA, reduce
+		 * queuedepths of the SCSI device.
+		 */
+		if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+		    (irsp->un.ulpWord[4] == IOERR_NO_RESOURCES)) {
+			spin_unlock_irqrestore(&phba->hbalock, iflag);
+			phba->lpfc_rampdown_queue_depth(phba);
+			spin_lock_irqsave(&phba->hbalock, iflag);
+		}
+
+		if (irsp->ulpStatus) {
+			/* Rsp ring <ringno> error: IOCB */
+			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+					"0328 Rsp Ring %d error: "
+					"IOCB Data: "
+					"x%x x%x x%x x%x "
+					"x%x x%x x%x x%x "
+					"x%x x%x x%x x%x "
+					"x%x x%x x%x x%x\n",
+					pring->ringno,
+					irsp->un.ulpWord[0],
+					irsp->un.ulpWord[1],
+					irsp->un.ulpWord[2],
+					irsp->un.ulpWord[3],
+					irsp->un.ulpWord[4],
+					irsp->un.ulpWord[5],
+					*(((uint32_t *) irsp) + 6),
+					*(((uint32_t *) irsp) + 7),
+					*(((uint32_t *) irsp) + 8),
+					*(((uint32_t *) irsp) + 9),
+					*(((uint32_t *) irsp) + 10),
+					*(((uint32_t *) irsp) + 11),
+					*(((uint32_t *) irsp) + 12),
+					*(((uint32_t *) irsp) + 13),
+					*(((uint32_t *) irsp) + 14),
+					*(((uint32_t *) irsp) + 15));
+		}
+
+		/*
+		 * Fetch the IOCB command type and call the correct completion
+		 * routine. Solicited and Unsolicited IOCBs on the ELS ring
+		 * get freed back to the lpfc_iocb_list by the discovery
+		 * kernel thread.
+		 */
+		iocb_cmd_type = irsp->ulpCommand & CMD_IOCB_MASK;
+		type = lpfc_sli_iocb_cmd_type(iocb_cmd_type);
+		switch (type) {
+		case LPFC_SOL_IOCB:
+			spin_unlock_irqrestore(&phba->hbalock, iflag);
+			rc = lpfc_sli_process_sol_iocb(phba, pring, saveq);
+			spin_lock_irqsave(&phba->hbalock, iflag);
+			break;
+
+		case LPFC_UNSOL_IOCB:
+			spin_unlock_irqrestore(&phba->hbalock, iflag);
+			rc = lpfc_sli_process_unsol_iocb(phba, pring, saveq);
+			spin_lock_irqsave(&phba->hbalock, iflag);
+			if (!rc)
+				free_saveq = 0;
+			break;
+
+		case LPFC_ABORT_IOCB:
+			cmdiocbp = NULL;
+			if (irsp->ulpCommand != CMD_XRI_ABORTED_CX)
+				cmdiocbp = lpfc_sli_iocbq_lookup(phba, pring,
+								 saveq);
+			if (cmdiocbp) {
+				/* Call the specified completion routine */
+				if (cmdiocbp->iocb_cmpl) {
+					spin_unlock_irqrestore(&phba->hbalock,
+							       iflag);
+					(cmdiocbp->iocb_cmpl)(phba, cmdiocbp,
+							      saveq);
+					spin_lock_irqsave(&phba->hbalock,
+							  iflag);
+				} else
+					__lpfc_sli_release_iocbq(phba,
+								 cmdiocbp);
+			}
+			break;
+
+		case LPFC_UNKNOWN_IOCB:
+			if (irsp->ulpCommand == CMD_ADAPTER_MSG) {
+				char adaptermsg[LPFC_MAX_ADPTMSG];
+				memset(adaptermsg, 0, LPFC_MAX_ADPTMSG);
+				memcpy(&adaptermsg[0], (uint8_t *)irsp,
+				       MAX_MSG_DATA);
+				dev_warn(&((phba->pcidev)->dev),
+					 "lpfc%d: %s\n",
+					 phba->brd_no, adaptermsg);
+			} else {
+				/* Unknown IOCB command */
+				lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+						"0335 Unknown IOCB "
+						"command Data: x%x "
+						"x%x x%x x%x\n",
+						irsp->ulpCommand,
+						irsp->ulpStatus,
+						irsp->ulpIoTag,
+						irsp->ulpContext);
+			}
+			break;
+		}
+
+		if (free_saveq) {
+			list_for_each_entry_safe(rspiocbp, next_iocb,
+						 &saveq->list, list) {
+				list_del(&rspiocbp->list);
+				__lpfc_sli_release_iocbq(phba, rspiocbp);
+			}
+			__lpfc_sli_release_iocbq(phba, saveq);
+		}
+		rspiocbp = NULL;
+	}
+	spin_unlock_irqrestore(&phba->hbalock, iflag);
+	return rspiocbp;
+}
+
+/**
+ * lpfc_sli_handle_slow_ring_event - Wrapper func for handling slow-path iocbs
  * @phba: Pointer to HBA context object.
  * @pring: Pointer to driver SLI ring object.
  * @mask: Host attention register mask for this ring.
  *
- * This function is called from the worker thread when there is a ring
- * event for non-fcp rings. The caller does not hold any lock .
- * The function processes each response iocb in the response ring until it
- * finds an iocb with LE bit set and chains all the iocbs upto the iocb with
- * LE bit set. The function will call lpfc_sli_process_sol_iocb function if the
- * response iocb indicates a completion of a command iocb. The function
- * will call lpfc_sli_process_unsol_iocb function if this is an unsolicited
- * iocb. The function frees the resources or calls the completion handler if
- * this iocb is an abort completion. The function returns 0 when the allocated
- * iocbs are not freed, otherwise returns 1.
+ * This routine wraps the actual slow_ring event process routine from the
+ * API jump table function pointer from the lpfc_hba struct.
  **/
-int
+void
 lpfc_sli_handle_slow_ring_event(struct lpfc_hba *phba,
 				struct lpfc_sli_ring *pring, uint32_t mask)
 {
+	phba->lpfc_sli_handle_slow_ring_event(phba, pring, mask);
+}
+
+/**
+ * lpfc_sli_handle_slow_ring_event_s3 - Handle SLI3 ring event for non-FCP rings
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ * @mask: Host attention register mask for this ring.
+ *
+ * This function is called from the worker thread when there is a ring event
+ * for non-fcp rings. The caller does not hold any lock. The function will
+ * remove each response iocb in the response ring and calls the handle
+ * response iocb routine (lpfc_sli_sp_handle_rspiocb) to process it.
+ **/
+static void
+lpfc_sli_handle_slow_ring_event_s3(struct lpfc_hba *phba,
+				   struct lpfc_sli_ring *pring, uint32_t mask)
+{
 	struct lpfc_pgp *pgp;
 	IOCB_t *entry;
 	IOCB_t *irsp = NULL;
 	struct lpfc_iocbq *rspiocbp = NULL;
-	struct lpfc_iocbq *next_iocb;
-	struct lpfc_iocbq *cmdiocbp;
-	struct lpfc_iocbq *saveq;
-	uint8_t iocb_cmd_type;
-	lpfc_iocb_type type;
-	uint32_t status, free_saveq;
 	uint32_t portRspPut, portRspMax;
-	int rc = 1;
 	unsigned long iflag;
+	uint32_t status;
 
 	pgp = &phba->port_gp[pring->ringno];
 	spin_lock_irqsave(&phba->hbalock, iflag);
@@ -2128,7 +2916,7 @@
 		phba->work_hs = HS_FFER3;
 		lpfc_handle_eratt(phba);
 
-		return 1;
+		return;
 	}
 
 	rmb();
@@ -2173,138 +2961,10 @@
 
 		writel(pring->rspidx, &phba->host_gp[pring->ringno].rspGetInx);
 
-		list_add_tail(&rspiocbp->list, &(pring->iocb_continueq));
-
-		pring->iocb_continueq_cnt++;
-		if (irsp->ulpLe) {
-			/*
-			 * By default, the driver expects to free all resources
-			 * associated with this iocb completion.
-			 */
-			free_saveq = 1;
-			saveq = list_get_first(&pring->iocb_continueq,
-					       struct lpfc_iocbq, list);
-			irsp = &(saveq->iocb);
-			list_del_init(&pring->iocb_continueq);
-			pring->iocb_continueq_cnt = 0;
-
-			pring->stats.iocb_rsp++;
-
-			/*
-			 * If resource errors reported from HBA, reduce
-			 * queuedepths of the SCSI device.
-			 */
-			if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
-			     (irsp->un.ulpWord[4] == IOERR_NO_RESOURCES)) {
-				spin_unlock_irqrestore(&phba->hbalock, iflag);
-				lpfc_rampdown_queue_depth(phba);
-				spin_lock_irqsave(&phba->hbalock, iflag);
-			}
-
-			if (irsp->ulpStatus) {
-				/* Rsp ring <ringno> error: IOCB */
-				lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
-						"0328 Rsp Ring %d error: "
-						"IOCB Data: "
-						"x%x x%x x%x x%x "
-						"x%x x%x x%x x%x "
-						"x%x x%x x%x x%x "
-						"x%x x%x x%x x%x\n",
-						pring->ringno,
-						irsp->un.ulpWord[0],
-						irsp->un.ulpWord[1],
-						irsp->un.ulpWord[2],
-						irsp->un.ulpWord[3],
-						irsp->un.ulpWord[4],
-						irsp->un.ulpWord[5],
-						*(((uint32_t *) irsp) + 6),
-						*(((uint32_t *) irsp) + 7),
-						*(((uint32_t *) irsp) + 8),
-						*(((uint32_t *) irsp) + 9),
-						*(((uint32_t *) irsp) + 10),
-						*(((uint32_t *) irsp) + 11),
-						*(((uint32_t *) irsp) + 12),
-						*(((uint32_t *) irsp) + 13),
-						*(((uint32_t *) irsp) + 14),
-						*(((uint32_t *) irsp) + 15));
-			}
-
-			/*
-			 * Fetch the IOCB command type and call the correct
-			 * completion routine.  Solicited and Unsolicited
-			 * IOCBs on the ELS ring get freed back to the
-			 * lpfc_iocb_list by the discovery kernel thread.
-			 */
-			iocb_cmd_type = irsp->ulpCommand & CMD_IOCB_MASK;
-			type = lpfc_sli_iocb_cmd_type(iocb_cmd_type);
-			if (type == LPFC_SOL_IOCB) {
-				spin_unlock_irqrestore(&phba->hbalock, iflag);
-				rc = lpfc_sli_process_sol_iocb(phba, pring,
-							       saveq);
-				spin_lock_irqsave(&phba->hbalock, iflag);
-			} else if (type == LPFC_UNSOL_IOCB) {
-				spin_unlock_irqrestore(&phba->hbalock, iflag);
-				rc = lpfc_sli_process_unsol_iocb(phba, pring,
-								 saveq);
-				spin_lock_irqsave(&phba->hbalock, iflag);
-				if (!rc)
-					free_saveq = 0;
-			} else if (type == LPFC_ABORT_IOCB) {
-				if ((irsp->ulpCommand != CMD_XRI_ABORTED_CX) &&
-				    ((cmdiocbp =
-				      lpfc_sli_iocbq_lookup(phba, pring,
-							    saveq)))) {
-					/* Call the specified completion
-					   routine */
-					if (cmdiocbp->iocb_cmpl) {
-						spin_unlock_irqrestore(
-						       &phba->hbalock,
-						       iflag);
-						(cmdiocbp->iocb_cmpl) (phba,
-							     cmdiocbp, saveq);
-						spin_lock_irqsave(
-							  &phba->hbalock,
-							  iflag);
-					} else
-						__lpfc_sli_release_iocbq(phba,
-								      cmdiocbp);
-				}
-			} else if (type == LPFC_UNKNOWN_IOCB) {
-				if (irsp->ulpCommand == CMD_ADAPTER_MSG) {
-
-					char adaptermsg[LPFC_MAX_ADPTMSG];
-
-					memset(adaptermsg, 0,
-					       LPFC_MAX_ADPTMSG);
-					memcpy(&adaptermsg[0], (uint8_t *) irsp,
-					       MAX_MSG_DATA);
-					dev_warn(&((phba->pcidev)->dev),
-						 "lpfc%d: %s\n",
-						 phba->brd_no, adaptermsg);
-				} else {
-					/* Unknown IOCB command */
-					lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-							"0335 Unknown IOCB "
-							"command Data: x%x "
-							"x%x x%x x%x\n",
-							irsp->ulpCommand,
-							irsp->ulpStatus,
-							irsp->ulpIoTag,
-							irsp->ulpContext);
-				}
-			}
-
-			if (free_saveq) {
-				list_for_each_entry_safe(rspiocbp, next_iocb,
-							 &saveq->list, list) {
-					list_del(&rspiocbp->list);
-					__lpfc_sli_release_iocbq(phba,
-								 rspiocbp);
-				}
-				__lpfc_sli_release_iocbq(phba, saveq);
-			}
-			rspiocbp = NULL;
-		}
+		spin_unlock_irqrestore(&phba->hbalock, iflag);
+		/* Handle the response IOCB */
+		rspiocbp = lpfc_sli_sp_handle_rspiocb(phba, pring, rspiocbp);
+		spin_lock_irqsave(&phba->hbalock, iflag);
 
 		/*
 		 * If the port response put pointer has not been updated, sync
@@ -2338,7 +2998,37 @@
 	}
 
 	spin_unlock_irqrestore(&phba->hbalock, iflag);
-	return rc;
+	return;
+}
+
+/**
+ * lpfc_sli_handle_slow_ring_event_s4 - Handle SLI4 slow-path els events
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ * @mask: Host attention register mask for this ring.
+ *
+ * This function is called from the worker thread when there is a pending
+ * ELS response iocb on the driver internal slow-path response iocb worker
+ * queue. The caller does not hold any lock. The function will remove each
+ * response iocb from the response worker queue and calls the handle
+ * response iocb routine (lpfc_sli_sp_handle_rspiocb) to process it.
+ **/
+static void
+lpfc_sli_handle_slow_ring_event_s4(struct lpfc_hba *phba,
+				   struct lpfc_sli_ring *pring, uint32_t mask)
+{
+	struct lpfc_iocbq *irspiocbq;
+	unsigned long iflag;
+
+	while (!list_empty(&phba->sli4_hba.sp_rspiocb_work_queue)) {
+		/* Get the response iocb from the head of work queue */
+		spin_lock_irqsave(&phba->hbalock, iflag);
+		list_remove_head(&phba->sli4_hba.sp_rspiocb_work_queue,
+				 irspiocbq, struct lpfc_iocbq, list);
+		spin_unlock_irqrestore(&phba->hbalock, iflag);
+		/* Process the response iocb */
+		lpfc_sli_sp_handle_rspiocb(phba, pring, irspiocbq);
+	}
 }
 
 /**
@@ -2420,7 +3110,7 @@
 }
 
 /**
- * lpfc_sli_brdready - Check for host status bits
+ * lpfc_sli_brdready_s3 - Check for sli3 host ready status
  * @phba: Pointer to HBA context object.
  * @mask: Bit mask to be checked.
  *
@@ -2432,8 +3122,8 @@
  * function returns 1 when HBA fail to restart otherwise returns
  * zero.
  **/
-int
-lpfc_sli_brdready(struct lpfc_hba *phba, uint32_t mask)
+static int
+lpfc_sli_brdready_s3(struct lpfc_hba *phba, uint32_t mask)
 {
 	uint32_t status;
 	int i = 0;
@@ -2477,6 +3167,56 @@
 	return retval;
 }
 
+/**
+ * lpfc_sli_brdready_s4 - Check for sli4 host ready status
+ * @phba: Pointer to HBA context object.
+ * @mask: Bit mask to be checked.
+ *
+ * This function checks the host status register to check if HBA is
+ * ready. This function will wait in a loop for the HBA to be ready
+ * If the HBA is not ready , the function will will reset the HBA PCI
+ * function again. The function returns 1 when HBA fail to be ready
+ * otherwise returns zero.
+ **/
+static int
+lpfc_sli_brdready_s4(struct lpfc_hba *phba, uint32_t mask)
+{
+	uint32_t status;
+	int retval = 0;
+
+	/* Read the HBA Host Status Register */
+	status = lpfc_sli4_post_status_check(phba);
+
+	if (status) {
+		phba->pport->port_state = LPFC_VPORT_UNKNOWN;
+		lpfc_sli_brdrestart(phba);
+		status = lpfc_sli4_post_status_check(phba);
+	}
+
+	/* Check to see if any errors occurred during init */
+	if (status) {
+		phba->link_state = LPFC_HBA_ERROR;
+		retval = 1;
+	} else
+		phba->sli4_hba.intr_enable = 0;
+
+	return retval;
+}
+
+/**
+ * lpfc_sli_brdready - Wrapper func for checking the hba readyness
+ * @phba: Pointer to HBA context object.
+ * @mask: Bit mask to be checked.
+ *
+ * This routine wraps the actual SLI3 or SLI4 hba readyness check routine
+ * from the API jump table function pointer from the lpfc_hba struct.
+ **/
+int
+lpfc_sli_brdready(struct lpfc_hba *phba, uint32_t mask)
+{
+	return phba->lpfc_sli_brdready(phba, mask);
+}
+
 #define BARRIER_TEST_PATTERN (0xdeadbeef)
 
 /**
@@ -2532,7 +3272,7 @@
 		mdelay(1);
 
 	if (readl(resp_buf + 1) != ~(BARRIER_TEST_PATTERN)) {
-		if (phba->sli.sli_flag & LPFC_SLI2_ACTIVE ||
+		if (phba->sli.sli_flag & LPFC_SLI_ACTIVE ||
 		    phba->pport->stopped)
 			goto restore_hc;
 		else
@@ -2613,7 +3353,9 @@
 		return 1;
 	}
 
-	psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+	spin_lock_irq(&phba->hbalock);
+	psli->sli_flag &= ~LPFC_SLI_ACTIVE;
+	spin_unlock_irq(&phba->hbalock);
 
 	mempool_free(pmb, phba->mbox_mem_pool);
 
@@ -2636,10 +3378,10 @@
 	}
 	spin_lock_irq(&phba->hbalock);
 	psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+	psli->mbox_active = NULL;
 	phba->link_flag &= ~LS_IGNORE_ERATT;
 	spin_unlock_irq(&phba->hbalock);
 
-	psli->mbox_active = NULL;
 	lpfc_hba_down_post(phba);
 	phba->link_state = LPFC_HBA_ERROR;
 
@@ -2647,7 +3389,7 @@
 }
 
 /**
- * lpfc_sli_brdreset - Reset the HBA
+ * lpfc_sli_brdreset - Reset a sli-2 or sli-3 HBA
  * @phba: Pointer to HBA context object.
  *
  * This function resets the HBA by writing HC_INITFF to the control
@@ -2683,7 +3425,8 @@
 			      (cfg_value &
 			       ~(PCI_COMMAND_PARITY | PCI_COMMAND_SERR)));
 
-	psli->sli_flag &= ~(LPFC_SLI2_ACTIVE | LPFC_PROCESS_LA);
+	psli->sli_flag &= ~(LPFC_SLI_ACTIVE | LPFC_PROCESS_LA);
+
 	/* Now toggle INITFF bit in the Host Control Register */
 	writel(HC_INITFF, phba->HCregaddr);
 	mdelay(1);
@@ -2710,7 +3453,66 @@
 }
 
 /**
- * lpfc_sli_brdrestart - Restart the HBA
+ * lpfc_sli4_brdreset - Reset a sli-4 HBA
+ * @phba: Pointer to HBA context object.
+ *
+ * This function resets a SLI4 HBA. This function disables PCI layer parity
+ * checking during resets the device. The caller is not required to hold
+ * any locks.
+ *
+ * This function returns 0 always.
+ **/
+int
+lpfc_sli4_brdreset(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli = &phba->sli;
+	uint16_t cfg_value;
+	uint8_t qindx;
+
+	/* Reset HBA */
+	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+			"0295 Reset HBA Data: x%x x%x\n",
+			phba->pport->port_state, psli->sli_flag);
+
+	/* perform board reset */
+	phba->fc_eventTag = 0;
+	phba->pport->fc_myDID = 0;
+	phba->pport->fc_prevDID = 0;
+
+	/* Turn off parity checking and serr during the physical reset */
+	pci_read_config_word(phba->pcidev, PCI_COMMAND, &cfg_value);
+	pci_write_config_word(phba->pcidev, PCI_COMMAND,
+			      (cfg_value &
+			      ~(PCI_COMMAND_PARITY | PCI_COMMAND_SERR)));
+
+	spin_lock_irq(&phba->hbalock);
+	psli->sli_flag &= ~(LPFC_PROCESS_LA);
+	phba->fcf.fcf_flag = 0;
+	/* Clean up the child queue list for the CQs */
+	list_del_init(&phba->sli4_hba.mbx_wq->list);
+	list_del_init(&phba->sli4_hba.els_wq->list);
+	list_del_init(&phba->sli4_hba.hdr_rq->list);
+	list_del_init(&phba->sli4_hba.dat_rq->list);
+	list_del_init(&phba->sli4_hba.mbx_cq->list);
+	list_del_init(&phba->sli4_hba.els_cq->list);
+	list_del_init(&phba->sli4_hba.rxq_cq->list);
+	for (qindx = 0; qindx < phba->cfg_fcp_wq_count; qindx++)
+		list_del_init(&phba->sli4_hba.fcp_wq[qindx]->list);
+	for (qindx = 0; qindx < phba->cfg_fcp_eq_count; qindx++)
+		list_del_init(&phba->sli4_hba.fcp_cq[qindx]->list);
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Now physically reset the device */
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"0389 Performing PCI function reset!\n");
+	/* Perform FCoE PCI function reset */
+	lpfc_pci_function_reset(phba);
+
+	return 0;
+}
+
+/**
+ * lpfc_sli_brdrestart_s3 - Restart a sli-3 hba
  * @phba: Pointer to HBA context object.
  *
  * This function is called in the SLI initialization code path to
@@ -2722,8 +3524,8 @@
  * The function does not guarantee completion of MBX_RESTART mailbox
  * command before the return of this function.
  **/
-int
-lpfc_sli_brdrestart(struct lpfc_hba *phba)
+static int
+lpfc_sli_brdrestart_s3(struct lpfc_hba *phba)
 {
 	MAILBOX_t *mb;
 	struct lpfc_sli *psli;
@@ -2762,7 +3564,7 @@
 	lpfc_sli_brdreset(phba);
 	phba->pport->stopped = 0;
 	phba->link_state = LPFC_INIT_START;
-
+	phba->hba_flag = 0;
 	spin_unlock_irq(&phba->hbalock);
 
 	memset(&psli->lnk_stat_offsets, 0, sizeof(psli->lnk_stat_offsets));
@@ -2777,6 +3579,55 @@
 }
 
 /**
+ * lpfc_sli_brdrestart_s4 - Restart the sli-4 hba
+ * @phba: Pointer to HBA context object.
+ *
+ * This function is called in the SLI initialization code path to restart
+ * a SLI4 HBA. The caller is not required to hold any lock.
+ * At the end of the function, it calls lpfc_hba_down_post function to
+ * free any pending commands.
+ **/
+static int
+lpfc_sli_brdrestart_s4(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli = &phba->sli;
+
+
+	/* Restart HBA */
+	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+			"0296 Restart HBA Data: x%x x%x\n",
+			phba->pport->port_state, psli->sli_flag);
+
+	lpfc_sli4_brdreset(phba);
+
+	spin_lock_irq(&phba->hbalock);
+	phba->pport->stopped = 0;
+	phba->link_state = LPFC_INIT_START;
+	phba->hba_flag = 0;
+	spin_unlock_irq(&phba->hbalock);
+
+	memset(&psli->lnk_stat_offsets, 0, sizeof(psli->lnk_stat_offsets));
+	psli->stats_start = get_seconds();
+
+	lpfc_hba_down_post(phba);
+
+	return 0;
+}
+
+/**
+ * lpfc_sli_brdrestart - Wrapper func for restarting hba
+ * @phba: Pointer to HBA context object.
+ *
+ * This routine wraps the actual SLI3 or SLI4 hba restart routine from the
+ * API jump table function pointer from the lpfc_hba struct.
+**/
+int
+lpfc_sli_brdrestart(struct lpfc_hba *phba)
+{
+	return phba->lpfc_sli_brdrestart(phba);
+}
+
+/**
  * lpfc_sli_chipset_init - Wait for the restart of the HBA after a restart
  * @phba: Pointer to HBA context object.
  *
@@ -2940,7 +3791,7 @@
 	if (!pmb)
 		return -ENOMEM;
 
-	pmbox = &pmb->mb;
+	pmbox = &pmb->u.mb;
 
 	/* Initialize the struct lpfc_sli_hbq structure for each hbq */
 	phba->link_state = LPFC_INIT_MBX_CMDS;
@@ -2984,6 +3835,26 @@
 }
 
 /**
+ * lpfc_sli4_rb_setup - Initialize and post RBs to HBA
+ * @phba: Pointer to HBA context object.
+ *
+ * This function is called during the SLI initialization to configure
+ * all the HBQs and post buffers to the HBQ. The caller is not
+ * required to hold any locks. This function will return zero if successful
+ * else it will return negative error code.
+ **/
+static int
+lpfc_sli4_rb_setup(struct lpfc_hba *phba)
+{
+	phba->hbq_in_use = 1;
+	phba->hbqs[0].entry_count = lpfc_hbq_defs[0]->entry_count;
+	phba->hbq_count = 1;
+	/* Initially populate or replenish the HBQs */
+	lpfc_sli_hbqbuf_init_hbqs(phba, 0);
+	return 0;
+}
+
+/**
  * lpfc_sli_config_port - Issue config port mailbox command
  * @phba: Pointer to HBA context object.
  * @sli_mode: sli mode - 2/3
@@ -3047,33 +3918,43 @@
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0442 Adapter failed to init, mbxCmd x%x "
 				"CONFIG_PORT, mbxStatus x%x Data: x%x\n",
-				pmb->mb.mbxCommand, pmb->mb.mbxStatus, 0);
+				pmb->u.mb.mbxCommand, pmb->u.mb.mbxStatus, 0);
 			spin_lock_irq(&phba->hbalock);
-			phba->sli.sli_flag &= ~LPFC_SLI2_ACTIVE;
+			phba->sli.sli_flag &= ~LPFC_SLI_ACTIVE;
 			spin_unlock_irq(&phba->hbalock);
 			rc = -ENXIO;
-		} else
+		} else {
+			/* Allow asynchronous mailbox command to go through */
+			spin_lock_irq(&phba->hbalock);
+			phba->sli.sli_flag &= ~LPFC_SLI_ASYNC_MBX_BLK;
+			spin_unlock_irq(&phba->hbalock);
 			done = 1;
+		}
 	}
 	if (!done) {
 		rc = -EINVAL;
 		goto do_prep_failed;
 	}
-	if (pmb->mb.un.varCfgPort.sli_mode == 3) {
-		if (!pmb->mb.un.varCfgPort.cMA) {
+	if (pmb->u.mb.un.varCfgPort.sli_mode == 3) {
+		if (!pmb->u.mb.un.varCfgPort.cMA) {
 			rc = -ENXIO;
 			goto do_prep_failed;
 		}
-		if (phba->max_vpi && pmb->mb.un.varCfgPort.gmv) {
+		if (phba->max_vpi && pmb->u.mb.un.varCfgPort.gmv) {
 			phba->sli3_options |= LPFC_SLI3_NPIV_ENABLED;
-			phba->max_vpi = pmb->mb.un.varCfgPort.max_vpi;
+			phba->max_vpi = pmb->u.mb.un.varCfgPort.max_vpi;
+			phba->max_vports = (phba->max_vpi > phba->max_vports) ?
+				phba->max_vpi : phba->max_vports;
+
 		} else
 			phba->max_vpi = 0;
-		if (pmb->mb.un.varCfgPort.gerbm)
+		if (pmb->u.mb.un.varCfgPort.gdss)
+			phba->sli3_options |= LPFC_SLI3_DSS_ENABLED;
+		if (pmb->u.mb.un.varCfgPort.gerbm)
 			phba->sli3_options |= LPFC_SLI3_HBQ_ENABLED;
-		if (pmb->mb.un.varCfgPort.gcrp)
+		if (pmb->u.mb.un.varCfgPort.gcrp)
 			phba->sli3_options |= LPFC_SLI3_CRP_ENABLED;
-		if (pmb->mb.un.varCfgPort.ginb) {
+		if (pmb->u.mb.un.varCfgPort.ginb) {
 			phba->sli3_options |= LPFC_SLI3_INB_ENABLED;
 			phba->hbq_get = phba->mbox->us.s3_inb_pgp.hbq_get;
 			phba->port_gp = phba->mbox->us.s3_inb_pgp.port;
@@ -3089,7 +3970,7 @@
 		}
 
 		if (phba->cfg_enable_bg) {
-			if (pmb->mb.un.varCfgPort.gbg)
+			if (pmb->u.mb.un.varCfgPort.gbg)
 				phba->sli3_options |= LPFC_SLI3_BG_ENABLED;
 			else
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -3184,8 +4065,9 @@
 		if (rc)
 			goto lpfc_sli_hba_setup_error;
 	}
-
+	spin_lock_irq(&phba->hbalock);
 	phba->sli.sli_flag |= LPFC_PROCESS_LA;
+	spin_unlock_irq(&phba->hbalock);
 
 	rc = lpfc_config_port_post(phba);
 	if (rc)
@@ -3200,6 +4082,488 @@
 	return rc;
 }
 
+/**
+ * lpfc_sli4_read_fcoe_params - Read fcoe params from conf region
+ * @phba: Pointer to HBA context object.
+ * @mboxq: mailbox pointer.
+ * This function issue a dump mailbox command to read config region
+ * 23 and parse the records in the region and populate driver
+ * data structure.
+ **/
+static int
+lpfc_sli4_read_fcoe_params(struct lpfc_hba *phba,
+		LPFC_MBOXQ_t *mboxq)
+{
+	struct lpfc_dmabuf *mp;
+	struct lpfc_mqe *mqe;
+	uint32_t data_length;
+	int rc;
+
+	/* Program the default value of vlan_id and fc_map */
+	phba->valid_vlan = 0;
+	phba->fc_map[0] = LPFC_FCOE_FCF_MAP0;
+	phba->fc_map[1] = LPFC_FCOE_FCF_MAP1;
+	phba->fc_map[2] = LPFC_FCOE_FCF_MAP2;
+
+	mqe = &mboxq->u.mqe;
+	if (lpfc_dump_fcoe_param(phba, mboxq))
+		return -ENOMEM;
+
+	mp = (struct lpfc_dmabuf *) mboxq->context1;
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
+			"(%d):2571 Mailbox cmd x%x Status x%x "
+			"Data: x%x x%x x%x x%x x%x x%x x%x x%x x%x "
+			"x%x x%x x%x x%x x%x x%x x%x x%x x%x "
+			"CQ: x%x x%x x%x x%x\n",
+			mboxq->vport ? mboxq->vport->vpi : 0,
+			bf_get(lpfc_mqe_command, mqe),
+			bf_get(lpfc_mqe_status, mqe),
+			mqe->un.mb_words[0], mqe->un.mb_words[1],
+			mqe->un.mb_words[2], mqe->un.mb_words[3],
+			mqe->un.mb_words[4], mqe->un.mb_words[5],
+			mqe->un.mb_words[6], mqe->un.mb_words[7],
+			mqe->un.mb_words[8], mqe->un.mb_words[9],
+			mqe->un.mb_words[10], mqe->un.mb_words[11],
+			mqe->un.mb_words[12], mqe->un.mb_words[13],
+			mqe->un.mb_words[14], mqe->un.mb_words[15],
+			mqe->un.mb_words[16], mqe->un.mb_words[50],
+			mboxq->mcqe.word0,
+			mboxq->mcqe.mcqe_tag0, 	mboxq->mcqe.mcqe_tag1,
+			mboxq->mcqe.trailer);
+
+	if (rc) {
+		lpfc_mbuf_free(phba, mp->virt, mp->phys);
+		kfree(mp);
+		return -EIO;
+	}
+	data_length = mqe->un.mb_words[5];
+	if (data_length > DMP_FCOEPARAM_RGN_SIZE)
+		return -EIO;
+
+	lpfc_parse_fcoe_conf(phba, mp->virt, data_length);
+	lpfc_mbuf_free(phba, mp->virt, mp->phys);
+	kfree(mp);
+	return 0;
+}
+
+/**
+ * lpfc_sli4_read_rev - Issue READ_REV and collect vpd data
+ * @phba: pointer to lpfc hba data structure.
+ * @mboxq: pointer to the LPFC_MBOXQ_t structure.
+ * @vpd: pointer to the memory to hold resulting port vpd data.
+ * @vpd_size: On input, the number of bytes allocated to @vpd.
+ *	      On output, the number of data bytes in @vpd.
+ *
+ * This routine executes a READ_REV SLI4 mailbox command.  In
+ * addition, this routine gets the port vpd data.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	ENOMEM - could not allocated memory.
+ **/
+static int
+lpfc_sli4_read_rev(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq,
+		    uint8_t *vpd, uint32_t *vpd_size)
+{
+	int rc = 0;
+	uint32_t dma_size;
+	struct lpfc_dmabuf *dmabuf;
+	struct lpfc_mqe *mqe;
+
+	dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (!dmabuf)
+		return -ENOMEM;
+
+	/*
+	 * Get a DMA buffer for the vpd data resulting from the READ_REV
+	 * mailbox command.
+	 */
+	dma_size = *vpd_size;
+	dmabuf->virt = dma_alloc_coherent(&phba->pcidev->dev,
+					  dma_size,
+					  &dmabuf->phys,
+					  GFP_KERNEL);
+	if (!dmabuf->virt) {
+		kfree(dmabuf);
+		return -ENOMEM;
+	}
+	memset(dmabuf->virt, 0, dma_size);
+
+	/*
+	 * The SLI4 implementation of READ_REV conflicts at word1,
+	 * bits 31:16 and SLI4 adds vpd functionality not present
+	 * in SLI3.  This code corrects the conflicts.
+	 */
+	lpfc_read_rev(phba, mboxq);
+	mqe = &mboxq->u.mqe;
+	mqe->un.read_rev.vpd_paddr_high = putPaddrHigh(dmabuf->phys);
+	mqe->un.read_rev.vpd_paddr_low = putPaddrLow(dmabuf->phys);
+	mqe->un.read_rev.word1 &= 0x0000FFFF;
+	bf_set(lpfc_mbx_rd_rev_vpd, &mqe->un.read_rev, 1);
+	bf_set(lpfc_mbx_rd_rev_avail_len, &mqe->un.read_rev, dma_size);
+
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	if (rc) {
+		dma_free_coherent(&phba->pcidev->dev, dma_size,
+				  dmabuf->virt, dmabuf->phys);
+		return -EIO;
+	}
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
+			"(%d):0380 Mailbox cmd x%x Status x%x "
+			"Data: x%x x%x x%x x%x x%x x%x x%x x%x x%x "
+			"x%x x%x x%x x%x x%x x%x x%x x%x x%x "
+			"CQ: x%x x%x x%x x%x\n",
+			mboxq->vport ? mboxq->vport->vpi : 0,
+			bf_get(lpfc_mqe_command, mqe),
+			bf_get(lpfc_mqe_status, mqe),
+			mqe->un.mb_words[0], mqe->un.mb_words[1],
+			mqe->un.mb_words[2], mqe->un.mb_words[3],
+			mqe->un.mb_words[4], mqe->un.mb_words[5],
+			mqe->un.mb_words[6], mqe->un.mb_words[7],
+			mqe->un.mb_words[8], mqe->un.mb_words[9],
+			mqe->un.mb_words[10], mqe->un.mb_words[11],
+			mqe->un.mb_words[12], mqe->un.mb_words[13],
+			mqe->un.mb_words[14], mqe->un.mb_words[15],
+			mqe->un.mb_words[16], mqe->un.mb_words[50],
+			mboxq->mcqe.word0,
+			mboxq->mcqe.mcqe_tag0, 	mboxq->mcqe.mcqe_tag1,
+			mboxq->mcqe.trailer);
+
+	/*
+	 * The available vpd length cannot be bigger than the
+	 * DMA buffer passed to the port.  Catch the less than
+	 * case and update the caller's size.
+	 */
+	if (mqe->un.read_rev.avail_vpd_len < *vpd_size)
+		*vpd_size = mqe->un.read_rev.avail_vpd_len;
+
+	lpfc_sli_pcimem_bcopy(dmabuf->virt, vpd, *vpd_size);
+	dma_free_coherent(&phba->pcidev->dev, dma_size,
+			  dmabuf->virt, dmabuf->phys);
+	kfree(dmabuf);
+	return 0;
+}
+
+/**
+ * lpfc_sli4_arm_cqeq_intr - Arm sli-4 device completion and event queues
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is called to explicitly arm the SLI4 device's completion and
+ * event queues
+ **/
+static void
+lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
+{
+	uint8_t fcp_eqidx;
+
+	lpfc_sli4_cq_release(phba->sli4_hba.mbx_cq, LPFC_QUEUE_REARM);
+	lpfc_sli4_cq_release(phba->sli4_hba.els_cq, LPFC_QUEUE_REARM);
+	lpfc_sli4_cq_release(phba->sli4_hba.rxq_cq, LPFC_QUEUE_REARM);
+	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_eq_count; fcp_eqidx++)
+		lpfc_sli4_cq_release(phba->sli4_hba.fcp_cq[fcp_eqidx],
+				     LPFC_QUEUE_REARM);
+	lpfc_sli4_eq_release(phba->sli4_hba.sp_eq, LPFC_QUEUE_REARM);
+	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_eq_count; fcp_eqidx++)
+		lpfc_sli4_eq_release(phba->sli4_hba.fp_eq[fcp_eqidx],
+				     LPFC_QUEUE_REARM);
+}
+
+/**
+ * lpfc_sli4_hba_setup - SLI4 device intialization PCI function
+ * @phba: Pointer to HBA context object.
+ *
+ * This function is the main SLI4 device intialization PCI function. This
+ * function is called by the HBA intialization code, HBA reset code and
+ * HBA error attention handler code. Caller is not required to hold any
+ * locks.
+ **/
+int
+lpfc_sli4_hba_setup(struct lpfc_hba *phba)
+{
+	int rc;
+	LPFC_MBOXQ_t *mboxq;
+	struct lpfc_mqe *mqe;
+	uint8_t *vpd;
+	uint32_t vpd_size;
+	uint32_t ftr_rsp = 0;
+	struct Scsi_Host *shost = lpfc_shost_from_vport(phba->pport);
+	struct lpfc_vport *vport = phba->pport;
+	struct lpfc_dmabuf *mp;
+
+	/* Perform a PCI function reset to start from clean */
+	rc = lpfc_pci_function_reset(phba);
+	if (unlikely(rc))
+		return -ENODEV;
+
+	/* Check the HBA Host Status Register for readyness */
+	rc = lpfc_sli4_post_status_check(phba);
+	if (unlikely(rc))
+		return -ENODEV;
+	else {
+		spin_lock_irq(&phba->hbalock);
+		phba->sli.sli_flag |= LPFC_SLI_ACTIVE;
+		spin_unlock_irq(&phba->hbalock);
+	}
+
+	/*
+	 * Allocate a single mailbox container for initializing the
+	 * port.
+	 */
+	mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq)
+		return -ENOMEM;
+
+	/*
+	 * Continue initialization with default values even if driver failed
+	 * to read FCoE param config regions
+	 */
+	if (lpfc_sli4_read_fcoe_params(phba, mboxq))
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_INIT,
+			"2570 Failed to read FCoE parameters \n");
+
+	/* Issue READ_REV to collect vpd and FW information. */
+	vpd_size = PAGE_SIZE;
+	vpd = kzalloc(vpd_size, GFP_KERNEL);
+	if (!vpd) {
+		rc = -ENOMEM;
+		goto out_free_mbox;
+	}
+
+	rc = lpfc_sli4_read_rev(phba, mboxq, vpd, &vpd_size);
+	if (unlikely(rc))
+		goto out_free_vpd;
+
+	mqe = &mboxq->u.mqe;
+	if ((bf_get(lpfc_mbx_rd_rev_sli_lvl,
+		    &mqe->un.read_rev) != LPFC_SLI_REV4) ||
+	    (bf_get(lpfc_mbx_rd_rev_fcoe, &mqe->un.read_rev) == 0)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+			"0376 READ_REV Error. SLI Level %d "
+			"FCoE enabled %d\n",
+			bf_get(lpfc_mbx_rd_rev_sli_lvl, &mqe->un.read_rev),
+			bf_get(lpfc_mbx_rd_rev_fcoe, &mqe->un.read_rev));
+		rc = -EIO;
+		goto out_free_vpd;
+	}
+	/* Single threaded at this point, no need for lock */
+	spin_lock_irq(&phba->hbalock);
+	phba->hba_flag |= HBA_FCOE_SUPPORT;
+	spin_unlock_irq(&phba->hbalock);
+	/*
+	 * Evaluate the read rev and vpd data. Populate the driver
+	 * state with the results. If this routine fails, the failure
+	 * is not fatal as the driver will use generic values.
+	 */
+	rc = lpfc_parse_vpd(phba, vpd, vpd_size);
+	if (unlikely(!rc)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"0377 Error %d parsing vpd. "
+				"Using defaults.\n", rc);
+		rc = 0;
+	}
+
+	/* By now, we should determine the SLI revision, hard code for now */
+	phba->sli_rev = LPFC_SLI_REV4;
+
+	/*
+	 * Discover the port's supported feature set and match it against the
+	 * hosts requests.
+	 */
+	lpfc_request_features(phba, mboxq);
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	if (unlikely(rc)) {
+		rc = -EIO;
+		goto out_free_vpd;
+	}
+
+	/*
+	 * The port must support FCP initiator mode as this is the
+	 * only mode running in the host.
+	 */
+	if (!(bf_get(lpfc_mbx_rq_ftr_rsp_fcpi, &mqe->un.req_ftrs))) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX | LOG_SLI,
+				"0378 No support for fcpi mode.\n");
+		ftr_rsp++;
+	}
+
+	/*
+	 * If the port cannot support the host's requested features
+	 * then turn off the global config parameters to disable the
+	 * feature in the driver.  This is not a fatal error.
+	 */
+	if ((phba->cfg_enable_bg) &&
+	    !(bf_get(lpfc_mbx_rq_ftr_rsp_dif, &mqe->un.req_ftrs)))
+		ftr_rsp++;
+
+	if (phba->max_vpi && phba->cfg_enable_npiv &&
+	    !(bf_get(lpfc_mbx_rq_ftr_rsp_npiv, &mqe->un.req_ftrs)))
+		ftr_rsp++;
+
+	if (ftr_rsp) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX | LOG_SLI,
+				"0379 Feature Mismatch Data: x%08x %08x "
+				"x%x x%x x%x\n", mqe->un.req_ftrs.word2,
+				mqe->un.req_ftrs.word3, phba->cfg_enable_bg,
+				phba->cfg_enable_npiv, phba->max_vpi);
+		if (!(bf_get(lpfc_mbx_rq_ftr_rsp_dif, &mqe->un.req_ftrs)))
+			phba->cfg_enable_bg = 0;
+		if (!(bf_get(lpfc_mbx_rq_ftr_rsp_npiv, &mqe->un.req_ftrs)))
+			phba->cfg_enable_npiv = 0;
+	}
+
+	/* These SLI3 features are assumed in SLI4 */
+	spin_lock_irq(&phba->hbalock);
+	phba->sli3_options |= (LPFC_SLI3_NPIV_ENABLED | LPFC_SLI3_HBQ_ENABLED);
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Read the port's service parameters. */
+	lpfc_read_sparam(phba, mboxq, vport->vpi);
+	mboxq->vport = vport;
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	mp = (struct lpfc_dmabuf *) mboxq->context1;
+	if (rc == MBX_SUCCESS) {
+		memcpy(&vport->fc_sparam, mp->virt, sizeof(struct serv_parm));
+		rc = 0;
+	}
+
+	/*
+	 * This memory was allocated by the lpfc_read_sparam routine. Release
+	 * it to the mbuf pool.
+	 */
+	lpfc_mbuf_free(phba, mp->virt, mp->phys);
+	kfree(mp);
+	mboxq->context1 = NULL;
+	if (unlikely(rc)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"0382 READ_SPARAM command failed "
+				"status %d, mbxStatus x%x\n",
+				rc, bf_get(lpfc_mqe_status, mqe));
+		phba->link_state = LPFC_HBA_ERROR;
+		rc = -EIO;
+		goto out_free_vpd;
+	}
+
+	if (phba->cfg_soft_wwnn)
+		u64_to_wwn(phba->cfg_soft_wwnn,
+			   vport->fc_sparam.nodeName.u.wwn);
+	if (phba->cfg_soft_wwpn)
+		u64_to_wwn(phba->cfg_soft_wwpn,
+			   vport->fc_sparam.portName.u.wwn);
+	memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName,
+	       sizeof(struct lpfc_name));
+	memcpy(&vport->fc_portname, &vport->fc_sparam.portName,
+	       sizeof(struct lpfc_name));
+
+	/* Update the fc_host data structures with new wwn. */
+	fc_host_node_name(shost) = wwn_to_u64(vport->fc_nodename.u.wwn);
+	fc_host_port_name(shost) = wwn_to_u64(vport->fc_portname.u.wwn);
+
+	/* Register SGL pool to the device using non-embedded mailbox command */
+	rc = lpfc_sli4_post_sgl_list(phba);
+	if (unlikely(rc)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"0582 Error %d during sgl post operation", rc);
+		rc = -ENODEV;
+		goto out_free_vpd;
+	}
+
+	/* Register SCSI SGL pool to the device */
+	rc = lpfc_sli4_repost_scsi_sgl_list(phba);
+	if (unlikely(rc)) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX | LOG_SLI,
+				"0383 Error %d during scsi sgl post opeation",
+				rc);
+		/* Some Scsi buffers were moved to the abort scsi list */
+		/* A pci function reset will repost them */
+		rc = -ENODEV;
+		goto out_free_vpd;
+	}
+
+	/* Post the rpi header region to the device. */
+	rc = lpfc_sli4_post_all_rpi_hdrs(phba);
+	if (unlikely(rc)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"0393 Error %d during rpi post operation\n",
+				rc);
+		rc = -ENODEV;
+		goto out_free_vpd;
+	}
+	/* Temporary initialization of lpfc_fip_flag to non-fip */
+	bf_set(lpfc_fip_flag, &phba->sli4_hba.sli4_flags, 0);
+
+	/* Set up all the queues to the device */
+	rc = lpfc_sli4_queue_setup(phba);
+	if (unlikely(rc)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"0381 Error %d during queue setup.\n ", rc);
+		goto out_stop_timers;
+	}
+
+	/* Arm the CQs and then EQs on device */
+	lpfc_sli4_arm_cqeq_intr(phba);
+
+	/* Indicate device interrupt mode */
+	phba->sli4_hba.intr_enable = 1;
+
+	/* Allow asynchronous mailbox command to go through */
+	spin_lock_irq(&phba->hbalock);
+	phba->sli.sli_flag &= ~LPFC_SLI_ASYNC_MBX_BLK;
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Post receive buffers to the device */
+	lpfc_sli4_rb_setup(phba);
+
+	/* Start the ELS watchdog timer */
+	/*
+	 * The driver for SLI4 is not yet ready to process timeouts
+	 * or interrupts.  Once it is, the comment bars can be removed.
+	 */
+	/* mod_timer(&vport->els_tmofunc,
+	 *           jiffies + HZ * (phba->fc_ratov*2)); */
+
+	/* Start heart beat timer */
+	mod_timer(&phba->hb_tmofunc,
+		  jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
+	phba->hb_outstanding = 0;
+	phba->last_completion_time = jiffies;
+
+	/* Start error attention (ERATT) polling timer */
+	mod_timer(&phba->eratt_poll, jiffies + HZ * LPFC_ERATT_POLL_INTERVAL);
+
+	/*
+	 * The port is ready, set the host's link state to LINK_DOWN
+	 * in preparation for link interrupts.
+	 */
+	lpfc_init_link(phba, mboxq, phba->cfg_topology, phba->cfg_link_speed);
+	mboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+	lpfc_set_loopback_flag(phba);
+	/* Change driver state to LPFC_LINK_DOWN right before init link */
+	spin_lock_irq(&phba->hbalock);
+	phba->link_state = LPFC_LINK_DOWN;
+	spin_unlock_irq(&phba->hbalock);
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_NOWAIT);
+	if (unlikely(rc != MBX_NOT_FINISHED)) {
+		kfree(vpd);
+		return 0;
+	} else
+		rc = -EIO;
+
+	/* Unset all the queues set up in this routine when error out */
+	if (rc)
+		lpfc_sli4_queue_unset(phba);
+
+out_stop_timers:
+	if (rc)
+		lpfc_stop_hba_timers(phba);
+out_free_vpd:
+	kfree(vpd);
+out_free_mbox:
+	mempool_free(mboxq, phba->mbox_mem_pool);
+	return rc;
+}
 
 /**
  * lpfc_mbox_timeout - Timeout call back function for mbox timer
@@ -3244,7 +4608,7 @@
 lpfc_mbox_timeout_handler(struct lpfc_hba *phba)
 {
 	LPFC_MBOXQ_t *pmbox = phba->sli.mbox_active;
-	MAILBOX_t *mb = &pmbox->mb;
+	MAILBOX_t *mb = &pmbox->u.mb;
 	struct lpfc_sli *psli = &phba->sli;
 	struct lpfc_sli_ring *pring;
 
@@ -3281,7 +4645,7 @@
 	spin_unlock_irq(&phba->pport->work_port_lock);
 	spin_lock_irq(&phba->hbalock);
 	phba->link_state = LPFC_LINK_UNKNOWN;
-	psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+	psli->sli_flag &= ~LPFC_SLI_ACTIVE;
 	spin_unlock_irq(&phba->hbalock);
 
 	pring = &psli->ring[psli->fcp_ring];
@@ -3289,32 +4653,20 @@
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
 			"0345 Resetting board due to mailbox timeout\n");
-	/*
-	 * lpfc_offline calls lpfc_sli_hba_down which will clean up
-	 * on oustanding mailbox commands.
-	 */
-	/* If resets are disabled then set error state and return. */
-	if (!phba->cfg_enable_hba_reset) {
-		phba->link_state = LPFC_HBA_ERROR;
-		return;
-	}
-	lpfc_offline_prep(phba);
-	lpfc_offline(phba);
-	lpfc_sli_brdrestart(phba);
-	lpfc_online(phba);
-	lpfc_unblock_mgmt_io(phba);
-	return;
+
+	/* Reset the HBA device */
+	lpfc_reset_hba(phba);
 }
 
 /**
- * lpfc_sli_issue_mbox - Issue a mailbox command to firmware
+ * lpfc_sli_issue_mbox_s3 - Issue an SLI3 mailbox command to firmware
  * @phba: Pointer to HBA context object.
  * @pmbox: Pointer to mailbox object.
  * @flag: Flag indicating how the mailbox need to be processed.
  *
  * This function is called by discovery code and HBA management code
- * to submit a mailbox command to firmware. This function gets the
- * hbalock to protect the data structures.
+ * to submit a mailbox command to firmware with SLI-3 interface spec. This
+ * function gets the hbalock to protect the data structures.
  * The mailbox command can be submitted in polling mode, in which case
  * this function will wait in a polling loop for the completion of the
  * mailbox.
@@ -3332,8 +4684,9 @@
  * return codes the caller owns the mailbox command after the return of
  * the function.
  **/
-int
-lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
+static int
+lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
+		       uint32_t flag)
 {
 	MAILBOX_t *mb;
 	struct lpfc_sli *psli = &phba->sli;
@@ -3349,6 +4702,10 @@
 	spin_lock_irqsave(&phba->hbalock, drvr_flag);
 	if (!pmbox) {
 		/* processing mbox queue from intr_handler */
+		if (unlikely(psli->sli_flag & LPFC_SLI_ASYNC_MBX_BLK)) {
+			spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
+			return MBX_SUCCESS;
+		}
 		processing_queue = 1;
 		phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
 		pmbox = lpfc_mbox_get(phba);
@@ -3365,7 +4722,7 @@
 			lpfc_printf_log(phba, KERN_ERR,
 					LOG_MBOX | LOG_VPORT,
 					"1806 Mbox x%x failed. No vport\n",
-					pmbox->mb.mbxCommand);
+					pmbox->u.mb.mbxCommand);
 			dump_stack();
 			goto out_not_finished;
 		}
@@ -3385,21 +4742,29 @@
 
 	psli = &phba->sli;
 
-	mb = &pmbox->mb;
+	mb = &pmbox->u.mb;
 	status = MBX_SUCCESS;
 
 	if (phba->link_state == LPFC_HBA_ERROR) {
 		spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
 
 		/* Mbox command <mbxCommand> cannot issue */
-		LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag);
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"(%d):0311 Mailbox command x%x cannot "
+				"issue Data: x%x x%x\n",
+				pmbox->vport ? pmbox->vport->vpi : 0,
+				pmbox->u.mb.mbxCommand, psli->sli_flag, flag);
 		goto out_not_finished;
 	}
 
 	if (mb->mbxCommand != MBX_KILL_BOARD && flag & MBX_NOWAIT &&
 	    !(readl(phba->HCregaddr) & HC_MBINT_ENA)) {
 		spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
-		LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag);
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"(%d):2528 Mailbox command x%x cannot "
+				"issue Data: x%x x%x\n",
+				pmbox->vport ? pmbox->vport->vpi : 0,
+				pmbox->u.mb.mbxCommand, psli->sli_flag, flag);
 		goto out_not_finished;
 	}
 
@@ -3413,14 +4778,24 @@
 			spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
 
 			/* Mbox command <mbxCommand> cannot issue */
-			LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag);
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"(%d):2529 Mailbox command x%x "
+					"cannot issue Data: x%x x%x\n",
+					pmbox->vport ? pmbox->vport->vpi : 0,
+					pmbox->u.mb.mbxCommand,
+					psli->sli_flag, flag);
 			goto out_not_finished;
 		}
 
-		if (!(psli->sli_flag & LPFC_SLI2_ACTIVE)) {
+		if (!(psli->sli_flag & LPFC_SLI_ACTIVE)) {
 			spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
 			/* Mbox command <mbxCommand> cannot issue */
-			LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag);
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"(%d):2530 Mailbox command x%x "
+					"cannot issue Data: x%x x%x\n",
+					pmbox->vport ? pmbox->vport->vpi : 0,
+					pmbox->u.mb.mbxCommand,
+					psli->sli_flag, flag);
 			goto out_not_finished;
 		}
 
@@ -3462,12 +4837,17 @@
 
 	/* If we are not polling, we MUST be in SLI2 mode */
 	if (flag != MBX_POLL) {
-		if (!(psli->sli_flag & LPFC_SLI2_ACTIVE) &&
+		if (!(psli->sli_flag & LPFC_SLI_ACTIVE) &&
 		    (mb->mbxCommand != MBX_KILL_BOARD)) {
 			psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
 			spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
 			/* Mbox command <mbxCommand> cannot issue */
-			LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag);
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"(%d):2531 Mailbox command x%x "
+					"cannot issue Data: x%x x%x\n",
+					pmbox->vport ? pmbox->vport->vpi : 0,
+					pmbox->u.mb.mbxCommand,
+					psli->sli_flag, flag);
 			goto out_not_finished;
 		}
 		/* timeout active mbox command */
@@ -3506,7 +4886,7 @@
 	/* next set own bit for the adapter and copy over command word */
 	mb->mbxOwner = OWN_CHIP;
 
-	if (psli->sli_flag & LPFC_SLI2_ACTIVE) {
+	if (psli->sli_flag & LPFC_SLI_ACTIVE) {
 		/* First copy command data to host SLIM area */
 		lpfc_sli_pcimem_bcopy(mb, phba->mbox, MAILBOX_CMD_SIZE);
 	} else {
@@ -3529,7 +4909,7 @@
 
 		if (mb->mbxCommand == MBX_CONFIG_PORT) {
 			/* switch over to host mailbox */
-			psli->sli_flag |= LPFC_SLI2_ACTIVE;
+			psli->sli_flag |= LPFC_SLI_ACTIVE;
 		}
 	}
 
@@ -3552,7 +4932,7 @@
 		writel(CA_MBATT, phba->CAregaddr);
 		readl(phba->CAregaddr); /* flush */
 
-		if (psli->sli_flag & LPFC_SLI2_ACTIVE) {
+		if (psli->sli_flag & LPFC_SLI_ACTIVE) {
 			/* First read mbox status word */
 			word0 = *((uint32_t *)phba->mbox);
 			word0 = le32_to_cpu(word0);
@@ -3591,7 +4971,7 @@
 				spin_lock_irqsave(&phba->hbalock, drvr_flag);
 			}
 
-			if (psli->sli_flag & LPFC_SLI2_ACTIVE) {
+			if (psli->sli_flag & LPFC_SLI_ACTIVE) {
 				/* First copy command data */
 				word0 = *((uint32_t *)phba->mbox);
 				word0 = le32_to_cpu(word0);
@@ -3604,7 +4984,7 @@
 					if (((slimword0 & OWN_CHIP) != OWN_CHIP)
 					    && slimmb->mbxStatus) {
 						psli->sli_flag &=
-						    ~LPFC_SLI2_ACTIVE;
+						    ~LPFC_SLI_ACTIVE;
 						word0 = slimword0;
 					}
 				}
@@ -3616,7 +4996,7 @@
 			ha_copy = readl(phba->HAregaddr);
 		}
 
-		if (psli->sli_flag & LPFC_SLI2_ACTIVE) {
+		if (psli->sli_flag & LPFC_SLI_ACTIVE) {
 			/* copy results back to user */
 			lpfc_sli_pcimem_bcopy(phba->mbox, mb, MAILBOX_CMD_SIZE);
 		} else {
@@ -3643,13 +5023,420 @@
 
 out_not_finished:
 	if (processing_queue) {
-		pmbox->mb.mbxStatus = MBX_NOT_FINISHED;
+		pmbox->u.mb.mbxStatus = MBX_NOT_FINISHED;
 		lpfc_mbox_cmpl_put(phba, pmbox);
 	}
 	return MBX_NOT_FINISHED;
 }
 
 /**
+ * lpfc_sli4_post_sync_mbox - Post an SLI4 mailbox to the bootstrap mailbox
+ * @phba: Pointer to HBA context object.
+ * @mboxq: Pointer to mailbox object.
+ *
+ * The function posts a mailbox to the port.  The mailbox is expected
+ * to be comletely filled in and ready for the port to operate on it.
+ * This routine executes a synchronous completion operation on the
+ * mailbox by polling for its completion.
+ *
+ * The caller must not be holding any locks when calling this routine.
+ *
+ * Returns:
+ *	MBX_SUCCESS - mailbox posted successfully
+ *	Any of the MBX error values.
+ **/
+static int
+lpfc_sli4_post_sync_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
+{
+	int rc = MBX_SUCCESS;
+	unsigned long iflag;
+	uint32_t db_ready;
+	uint32_t mcqe_status;
+	uint32_t mbx_cmnd;
+	unsigned long timeout;
+	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_mqe *mb = &mboxq->u.mqe;
+	struct lpfc_bmbx_create *mbox_rgn;
+	struct dma_address *dma_address;
+	struct lpfc_register bmbx_reg;
+
+	/*
+	 * Only one mailbox can be active to the bootstrap mailbox region
+	 * at a time and there is no queueing provided.
+	 */
+	spin_lock_irqsave(&phba->hbalock, iflag);
+	if (psli->sli_flag & LPFC_SLI_MBOX_ACTIVE) {
+		spin_unlock_irqrestore(&phba->hbalock, iflag);
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"(%d):2532 Mailbox command x%x (x%x) "
+				"cannot issue Data: x%x x%x\n",
+				mboxq->vport ? mboxq->vport->vpi : 0,
+				mboxq->u.mb.mbxCommand,
+				lpfc_sli4_mbox_opcode_get(phba, mboxq),
+				psli->sli_flag, MBX_POLL);
+		return MBXERR_ERROR;
+	}
+	/* The server grabs the token and owns it until release */
+	psli->sli_flag |= LPFC_SLI_MBOX_ACTIVE;
+	phba->sli.mbox_active = mboxq;
+	spin_unlock_irqrestore(&phba->hbalock, iflag);
+
+	/*
+	 * Initialize the bootstrap memory region to avoid stale data areas
+	 * in the mailbox post.  Then copy the caller's mailbox contents to
+	 * the bmbx mailbox region.
+	 */
+	mbx_cmnd = bf_get(lpfc_mqe_command, mb);
+	memset(phba->sli4_hba.bmbx.avirt, 0, sizeof(struct lpfc_bmbx_create));
+	lpfc_sli_pcimem_bcopy(mb, phba->sli4_hba.bmbx.avirt,
+			      sizeof(struct lpfc_mqe));
+
+	/* Post the high mailbox dma address to the port and wait for ready. */
+	dma_address = &phba->sli4_hba.bmbx.dma_address;
+	writel(dma_address->addr_hi, phba->sli4_hba.BMBXregaddr);
+
+	timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba, mbx_cmnd)
+				   * 1000) + jiffies;
+	do {
+		bmbx_reg.word0 = readl(phba->sli4_hba.BMBXregaddr);
+		db_ready = bf_get(lpfc_bmbx_rdy, &bmbx_reg);
+		if (!db_ready)
+			msleep(2);
+
+		if (time_after(jiffies, timeout)) {
+			rc = MBXERR_ERROR;
+			goto exit;
+		}
+	} while (!db_ready);
+
+	/* Post the low mailbox dma address to the port. */
+	writel(dma_address->addr_lo, phba->sli4_hba.BMBXregaddr);
+	timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba, mbx_cmnd)
+				   * 1000) + jiffies;
+	do {
+		bmbx_reg.word0 = readl(phba->sli4_hba.BMBXregaddr);
+		db_ready = bf_get(lpfc_bmbx_rdy, &bmbx_reg);
+		if (!db_ready)
+			msleep(2);
+
+		if (time_after(jiffies, timeout)) {
+			rc = MBXERR_ERROR;
+			goto exit;
+		}
+	} while (!db_ready);
+
+	/*
+	 * Read the CQ to ensure the mailbox has completed.
+	 * If so, update the mailbox status so that the upper layers
+	 * can complete the request normally.
+	 */
+	lpfc_sli_pcimem_bcopy(phba->sli4_hba.bmbx.avirt, mb,
+			      sizeof(struct lpfc_mqe));
+	mbox_rgn = (struct lpfc_bmbx_create *) phba->sli4_hba.bmbx.avirt;
+	lpfc_sli_pcimem_bcopy(&mbox_rgn->mcqe, &mboxq->mcqe,
+			      sizeof(struct lpfc_mcqe));
+	mcqe_status = bf_get(lpfc_mcqe_status, &mbox_rgn->mcqe);
+
+	/* Prefix the mailbox status with range x4000 to note SLI4 status. */
+	if (mcqe_status != MB_CQE_STATUS_SUCCESS) {
+		bf_set(lpfc_mqe_status, mb, LPFC_MBX_ERROR_RANGE | mcqe_status);
+		rc = MBXERR_ERROR;
+	}
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
+			"(%d):0356 Mailbox cmd x%x (x%x) Status x%x "
+			"Data: x%x x%x x%x x%x x%x x%x x%x x%x x%x x%x x%x"
+			" x%x x%x CQ: x%x x%x x%x x%x\n",
+			mboxq->vport ? mboxq->vport->vpi : 0,
+			mbx_cmnd, lpfc_sli4_mbox_opcode_get(phba, mboxq),
+			bf_get(lpfc_mqe_status, mb),
+			mb->un.mb_words[0], mb->un.mb_words[1],
+			mb->un.mb_words[2], mb->un.mb_words[3],
+			mb->un.mb_words[4], mb->un.mb_words[5],
+			mb->un.mb_words[6], mb->un.mb_words[7],
+			mb->un.mb_words[8], mb->un.mb_words[9],
+			mb->un.mb_words[10], mb->un.mb_words[11],
+			mb->un.mb_words[12], mboxq->mcqe.word0,
+			mboxq->mcqe.mcqe_tag0, 	mboxq->mcqe.mcqe_tag1,
+			mboxq->mcqe.trailer);
+exit:
+	/* We are holding the token, no needed for lock when release */
+	spin_lock_irqsave(&phba->hbalock, iflag);
+	psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+	phba->sli.mbox_active = NULL;
+	spin_unlock_irqrestore(&phba->hbalock, iflag);
+	return rc;
+}
+
+/**
+ * lpfc_sli_issue_mbox_s4 - Issue an SLI4 mailbox command to firmware
+ * @phba: Pointer to HBA context object.
+ * @pmbox: Pointer to mailbox object.
+ * @flag: Flag indicating how the mailbox need to be processed.
+ *
+ * This function is called by discovery code and HBA management code to submit
+ * a mailbox command to firmware with SLI-4 interface spec.
+ *
+ * Return codes the caller owns the mailbox command after the return of the
+ * function.
+ **/
+static int
+lpfc_sli_issue_mbox_s4(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq,
+		       uint32_t flag)
+{
+	struct lpfc_sli *psli = &phba->sli;
+	unsigned long iflags;
+	int rc;
+
+	/* Detect polling mode and jump to a handler */
+	if (!phba->sli4_hba.intr_enable) {
+		if (flag == MBX_POLL)
+			rc = lpfc_sli4_post_sync_mbox(phba, mboxq);
+		else
+			rc = -EIO;
+		if (rc != MBX_SUCCESS)
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"(%d):2541 Mailbox command x%x "
+					"(x%x) cannot issue Data: x%x x%x\n",
+					mboxq->vport ? mboxq->vport->vpi : 0,
+					mboxq->u.mb.mbxCommand,
+					lpfc_sli4_mbox_opcode_get(phba, mboxq),
+					psli->sli_flag, flag);
+		return rc;
+	} else if (flag == MBX_POLL) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"(%d):2542 Mailbox command x%x (x%x) "
+				"cannot issue Data: x%x x%x\n",
+				mboxq->vport ? mboxq->vport->vpi : 0,
+				mboxq->u.mb.mbxCommand,
+				lpfc_sli4_mbox_opcode_get(phba, mboxq),
+				psli->sli_flag, flag);
+		return -EIO;
+	}
+
+	/* Now, interrupt mode asynchrous mailbox command */
+	rc = lpfc_mbox_cmd_check(phba, mboxq);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"(%d):2543 Mailbox command x%x (x%x) "
+				"cannot issue Data: x%x x%x\n",
+				mboxq->vport ? mboxq->vport->vpi : 0,
+				mboxq->u.mb.mbxCommand,
+				lpfc_sli4_mbox_opcode_get(phba, mboxq),
+				psli->sli_flag, flag);
+		goto out_not_finished;
+	}
+	rc = lpfc_mbox_dev_check(phba);
+	if (unlikely(rc)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"(%d):2544 Mailbox command x%x (x%x) "
+				"cannot issue Data: x%x x%x\n",
+				mboxq->vport ? mboxq->vport->vpi : 0,
+				mboxq->u.mb.mbxCommand,
+				lpfc_sli4_mbox_opcode_get(phba, mboxq),
+				psli->sli_flag, flag);
+		goto out_not_finished;
+	}
+
+	/* Put the mailbox command to the driver internal FIFO */
+	psli->slistat.mbox_busy++;
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	lpfc_mbox_put(phba, mboxq);
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
+			"(%d):0354 Mbox cmd issue - Enqueue Data: "
+			"x%x (x%x) x%x x%x x%x\n",
+			mboxq->vport ? mboxq->vport->vpi : 0xffffff,
+			bf_get(lpfc_mqe_command, &mboxq->u.mqe),
+			lpfc_sli4_mbox_opcode_get(phba, mboxq),
+			phba->pport->port_state,
+			psli->sli_flag, MBX_NOWAIT);
+	/* Wake up worker thread to transport mailbox command from head */
+	lpfc_worker_wake_up(phba);
+
+	return MBX_BUSY;
+
+out_not_finished:
+	return MBX_NOT_FINISHED;
+}
+
+/**
+ * lpfc_sli4_post_async_mbox - Post an SLI4 mailbox command to device
+ * @phba: Pointer to HBA context object.
+ *
+ * This function is called by worker thread to send a mailbox command to
+ * SLI4 HBA firmware.
+ *
+ **/
+int
+lpfc_sli4_post_async_mbox(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli = &phba->sli;
+	LPFC_MBOXQ_t *mboxq;
+	int rc = MBX_SUCCESS;
+	unsigned long iflags;
+	struct lpfc_mqe *mqe;
+	uint32_t mbx_cmnd;
+
+	/* Check interrupt mode before post async mailbox command */
+	if (unlikely(!phba->sli4_hba.intr_enable))
+		return MBX_NOT_FINISHED;
+
+	/* Check for mailbox command service token */
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	if (unlikely(psli->sli_flag & LPFC_SLI_ASYNC_MBX_BLK)) {
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		return MBX_NOT_FINISHED;
+	}
+	if (psli->sli_flag & LPFC_SLI_MBOX_ACTIVE) {
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		return MBX_NOT_FINISHED;
+	}
+	if (unlikely(phba->sli.mbox_active)) {
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"0384 There is pending active mailbox cmd\n");
+		return MBX_NOT_FINISHED;
+	}
+	/* Take the mailbox command service token */
+	psli->sli_flag |= LPFC_SLI_MBOX_ACTIVE;
+
+	/* Get the next mailbox command from head of queue */
+	mboxq = lpfc_mbox_get(phba);
+
+	/* If no more mailbox command waiting for post, we're done */
+	if (!mboxq) {
+		psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		return MBX_SUCCESS;
+	}
+	phba->sli.mbox_active = mboxq;
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+
+	/* Check device readiness for posting mailbox command */
+	rc = lpfc_mbox_dev_check(phba);
+	if (unlikely(rc))
+		/* Driver clean routine will clean up pending mailbox */
+		goto out_not_finished;
+
+	/* Prepare the mbox command to be posted */
+	mqe = &mboxq->u.mqe;
+	mbx_cmnd = bf_get(lpfc_mqe_command, mqe);
+
+	/* Start timer for the mbox_tmo and log some mailbox post messages */
+	mod_timer(&psli->mbox_tmo, (jiffies +
+		  (HZ * lpfc_mbox_tmo_val(phba, mbx_cmnd))));
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
+			"(%d):0355 Mailbox cmd x%x (x%x) issue Data: "
+			"x%x x%x\n",
+			mboxq->vport ? mboxq->vport->vpi : 0, mbx_cmnd,
+			lpfc_sli4_mbox_opcode_get(phba, mboxq),
+			phba->pport->port_state, psli->sli_flag);
+
+	if (mbx_cmnd != MBX_HEARTBEAT) {
+		if (mboxq->vport) {
+			lpfc_debugfs_disc_trc(mboxq->vport,
+				LPFC_DISC_TRC_MBOX_VPORT,
+				"MBOX Send vport: cmd:x%x mb:x%x x%x",
+				mbx_cmnd, mqe->un.mb_words[0],
+				mqe->un.mb_words[1]);
+		} else {
+			lpfc_debugfs_disc_trc(phba->pport,
+				LPFC_DISC_TRC_MBOX,
+				"MBOX Send: cmd:x%x mb:x%x x%x",
+				mbx_cmnd, mqe->un.mb_words[0],
+				mqe->un.mb_words[1]);
+		}
+	}
+	psli->slistat.mbox_cmd++;
+
+	/* Post the mailbox command to the port */
+	rc = lpfc_sli4_mq_put(phba->sli4_hba.mbx_wq, mqe);
+	if (rc != MBX_SUCCESS) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"(%d):2533 Mailbox command x%x (x%x) "
+				"cannot issue Data: x%x x%x\n",
+				mboxq->vport ? mboxq->vport->vpi : 0,
+				mboxq->u.mb.mbxCommand,
+				lpfc_sli4_mbox_opcode_get(phba, mboxq),
+				psli->sli_flag, MBX_NOWAIT);
+		goto out_not_finished;
+	}
+
+	return rc;
+
+out_not_finished:
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	mboxq->u.mb.mbxStatus = MBX_NOT_FINISHED;
+	__lpfc_mbox_cmpl_put(phba, mboxq);
+	/* Release the token */
+	psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+	phba->sli.mbox_active = NULL;
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+
+	return MBX_NOT_FINISHED;
+}
+
+/**
+ * lpfc_sli_issue_mbox - Wrapper func for issuing mailbox command
+ * @phba: Pointer to HBA context object.
+ * @pmbox: Pointer to mailbox object.
+ * @flag: Flag indicating how the mailbox need to be processed.
+ *
+ * This routine wraps the actual SLI3 or SLI4 mailbox issuing routine from
+ * the API jump table function pointer from the lpfc_hba struct.
+ *
+ * Return codes the caller owns the mailbox command after the return of the
+ * function.
+ **/
+int
+lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
+{
+	return phba->lpfc_sli_issue_mbox(phba, pmbox, flag);
+}
+
+/**
+ * lpfc_mbox_api_table_setup - Set up mbox api fucntion jump table
+ * @phba: The hba struct for which this call is being executed.
+ * @dev_grp: The HBA PCI-Device group number.
+ *
+ * This routine sets up the mbox interface API function jump table in @phba
+ * struct.
+ * Returns: 0 - success, -ENODEV - failure.
+ **/
+int
+lpfc_mbox_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
+{
+
+	switch (dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		phba->lpfc_sli_issue_mbox = lpfc_sli_issue_mbox_s3;
+		phba->lpfc_sli_handle_slow_ring_event =
+				lpfc_sli_handle_slow_ring_event_s3;
+		phba->lpfc_sli_hbq_to_firmware = lpfc_sli_hbq_to_firmware_s3;
+		phba->lpfc_sli_brdrestart = lpfc_sli_brdrestart_s3;
+		phba->lpfc_sli_brdready = lpfc_sli_brdready_s3;
+		break;
+	case LPFC_PCI_DEV_OC:
+		phba->lpfc_sli_issue_mbox = lpfc_sli_issue_mbox_s4;
+		phba->lpfc_sli_handle_slow_ring_event =
+				lpfc_sli_handle_slow_ring_event_s4;
+		phba->lpfc_sli_hbq_to_firmware = lpfc_sli_hbq_to_firmware_s4;
+		phba->lpfc_sli_brdrestart = lpfc_sli_brdrestart_s4;
+		phba->lpfc_sli_brdready = lpfc_sli_brdready_s4;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1420 Invalid HBA PCI-device group: 0x%x\n",
+				dev_grp);
+		return -ENODEV;
+		break;
+	}
+	return 0;
+}
+
+/**
  * __lpfc_sli_ringtx_put - Add an iocb to the txq
  * @phba: Pointer to HBA context object.
  * @pring: Pointer to driver SLI ring object.
@@ -3701,35 +5488,34 @@
 }
 
 /**
- * __lpfc_sli_issue_iocb - Lockless version of lpfc_sli_issue_iocb
+ * __lpfc_sli_issue_iocb_s3 - SLI3 device lockless ver of lpfc_sli_issue_iocb
  * @phba: Pointer to HBA context object.
- * @pring: Pointer to driver SLI ring object.
+ * @ring_number: SLI ring number to issue iocb on.
  * @piocb: Pointer to command iocb.
  * @flag: Flag indicating if this command can be put into txq.
  *
- * __lpfc_sli_issue_iocb is used by other functions in the driver
- * to issue an iocb command to the HBA. If the PCI slot is recovering
- * from error state or if HBA is resetting or if LPFC_STOP_IOCB_EVENT
- * flag is turned on, the function returns IOCB_ERROR.
- * When the link is down, this function allows only iocbs for
- * posting buffers.
- * This function finds next available slot in the command ring and
- * posts the command to the available slot and writes the port
- * attention register to request HBA start processing new iocb.
- * If there is no slot available in the ring and
- * flag & SLI_IOCB_RET_IOCB is set, the new iocb is added to the
- * txq, otherwise the function returns IOCB_BUSY.
+ * __lpfc_sli_issue_iocb_s3 is used by other functions in the driver to issue
+ * an iocb command to an HBA with SLI-3 interface spec. If the PCI slot is
+ * recovering from error state, if HBA is resetting or if LPFC_STOP_IOCB_EVENT
+ * flag is turned on, the function returns IOCB_ERROR. When the link is down,
+ * this function allows only iocbs for posting buffers. This function finds
+ * next available slot in the command ring and posts the command to the
+ * available slot and writes the port attention register to request HBA start
+ * processing new iocb. If there is no slot available in the ring and
+ * flag & SLI_IOCB_RET_IOCB is set, the new iocb is added to the txq, otherwise
+ * the function returns IOCB_BUSY.
  *
- * This function is called with hbalock held.
- * The function will return success after it successfully submit the
- * iocb to firmware or after adding to the txq.
+ * This function is called with hbalock held. The function will return success
+ * after it successfully submit the iocb to firmware or after adding to the
+ * txq.
  **/
 static int
-__lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+__lpfc_sli_issue_iocb_s3(struct lpfc_hba *phba, uint32_t ring_number,
 		    struct lpfc_iocbq *piocb, uint32_t flag)
 {
 	struct lpfc_iocbq *nextiocb;
 	IOCB_t *iocb;
+	struct lpfc_sli_ring *pring = &phba->sli.ring[ring_number];
 
 	if (piocb->iocb_cmpl && (!piocb->vport) &&
 	   (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) &&
@@ -3833,6 +5619,498 @@
 	return IOCB_BUSY;
 }
 
+/**
+ * lpfc_sli4_bpl2sgl - Convert the bpl/bde to a sgl.
+ * @phba: Pointer to HBA context object.
+ * @piocb: Pointer to command iocb.
+ * @sglq: Pointer to the scatter gather queue object.
+ *
+ * This routine converts the bpl or bde that is in the IOCB
+ * to a sgl list for the sli4 hardware. The physical address
+ * of the bpl/bde is converted back to a virtual address.
+ * If the IOCB contains a BPL then the list of BDE's is
+ * converted to sli4_sge's. If the IOCB contains a single
+ * BDE then it is converted to a single sli_sge.
+ * The IOCB is still in cpu endianess so the contents of
+ * the bpl can be used without byte swapping.
+ *
+ * Returns valid XRI = Success, NO_XRI = Failure.
+**/
+static uint16_t
+lpfc_sli4_bpl2sgl(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq,
+		struct lpfc_sglq *sglq)
+{
+	uint16_t xritag = NO_XRI;
+	struct ulp_bde64 *bpl = NULL;
+	struct ulp_bde64 bde;
+	struct sli4_sge *sgl  = NULL;
+	IOCB_t *icmd;
+	int numBdes = 0;
+	int i = 0;
+
+	if (!piocbq || !sglq)
+		return xritag;
+
+	sgl  = (struct sli4_sge *)sglq->sgl;
+	icmd = &piocbq->iocb;
+	if (icmd->un.genreq64.bdl.bdeFlags == BUFF_TYPE_BLP_64) {
+		numBdes = icmd->un.genreq64.bdl.bdeSize /
+				sizeof(struct ulp_bde64);
+		/* The addrHigh and addrLow fields within the IOCB
+		 * have not been byteswapped yet so there is no
+		 * need to swap them back.
+		 */
+		bpl  = (struct ulp_bde64 *)
+			((struct lpfc_dmabuf *)piocbq->context3)->virt;
+
+		if (!bpl)
+			return xritag;
+
+		for (i = 0; i < numBdes; i++) {
+			/* Should already be byte swapped. */
+			sgl->addr_hi =  bpl->addrHigh;
+			sgl->addr_lo =  bpl->addrLow;
+			/* swap the size field back to the cpu so we
+			 * can assign it to the sgl.
+			 */
+			bde.tus.w  = le32_to_cpu(bpl->tus.w);
+			bf_set(lpfc_sli4_sge_len, sgl, bde.tus.f.bdeSize);
+			if ((i+1) == numBdes)
+				bf_set(lpfc_sli4_sge_last, sgl, 1);
+			else
+				bf_set(lpfc_sli4_sge_last, sgl, 0);
+			sgl->word2 = cpu_to_le32(sgl->word2);
+			sgl->word3 = cpu_to_le32(sgl->word3);
+			bpl++;
+			sgl++;
+		}
+	} else if (icmd->un.genreq64.bdl.bdeFlags == BUFF_TYPE_BDE_64) {
+			/* The addrHigh and addrLow fields of the BDE have not
+			 * been byteswapped yet so they need to be swapped
+			 * before putting them in the sgl.
+			 */
+			sgl->addr_hi =
+				cpu_to_le32(icmd->un.genreq64.bdl.addrHigh);
+			sgl->addr_lo =
+				cpu_to_le32(icmd->un.genreq64.bdl.addrLow);
+			bf_set(lpfc_sli4_sge_len, sgl,
+				icmd->un.genreq64.bdl.bdeSize);
+			bf_set(lpfc_sli4_sge_last, sgl, 1);
+			sgl->word2 = cpu_to_le32(sgl->word2);
+			sgl->word3 = cpu_to_le32(sgl->word3);
+	}
+	return sglq->sli4_xritag;
+}
+
+/**
+ * lpfc_sli4_scmd_to_wqidx_distr - scsi command to SLI4 WQ index distribution
+ * @phba: Pointer to HBA context object.
+ * @piocb: Pointer to command iocb.
+ *
+ * This routine performs a round robin SCSI command to SLI4 FCP WQ index
+ * distribution.
+ *
+ * Return: index into SLI4 fast-path FCP queue index.
+ **/
+static uint32_t
+lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
+{
+	static uint32_t fcp_qidx;
+
+	return fcp_qidx++ % phba->cfg_fcp_wq_count;
+}
+
+/**
+ * lpfc_sli_iocb2wqe - Convert the IOCB to a work queue entry.
+ * @phba: Pointer to HBA context object.
+ * @piocb: Pointer to command iocb.
+ * @wqe: Pointer to the work queue entry.
+ *
+ * This routine converts the iocb command to its Work Queue Entry
+ * equivalent. The wqe pointer should not have any fields set when
+ * this routine is called because it will memcpy over them.
+ * This routine does not set the CQ_ID or the WQEC bits in the
+ * wqe.
+ *
+ * Returns: 0 = Success, IOCB_ERROR = Failure.
+ **/
+static int
+lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
+		union lpfc_wqe *wqe)
+{
+	uint32_t payload_len = 0;
+	uint8_t ct = 0;
+	uint32_t fip;
+	uint32_t abort_tag;
+	uint8_t command_type = ELS_COMMAND_NON_FIP;
+	uint8_t cmnd;
+	uint16_t xritag;
+	struct ulp_bde64 *bpl = NULL;
+
+	fip = bf_get(lpfc_fip_flag, &phba->sli4_hba.sli4_flags);
+	/* The fcp commands will set command type */
+	if ((!(iocbq->iocb_flag &  LPFC_IO_FCP)) && (!fip))
+		command_type = ELS_COMMAND_NON_FIP;
+	else if (!(iocbq->iocb_flag &  LPFC_IO_FCP))
+		command_type = ELS_COMMAND_FIP;
+	else if (iocbq->iocb_flag &  LPFC_IO_FCP)
+		command_type = FCP_COMMAND;
+	else {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+			"2019 Invalid cmd 0x%x\n",
+			iocbq->iocb.ulpCommand);
+		return IOCB_ERROR;
+	}
+	/* Some of the fields are in the right position already */
+	memcpy(wqe, &iocbq->iocb, sizeof(union lpfc_wqe));
+	abort_tag = (uint32_t) iocbq->iotag;
+	xritag = iocbq->sli4_xritag;
+	wqe->words[7] = 0; /* The ct field has moved so reset */
+	/* words0-2 bpl convert bde */
+	if (iocbq->iocb.un.genreq64.bdl.bdeFlags == BUFF_TYPE_BLP_64) {
+		bpl  = (struct ulp_bde64 *)
+			((struct lpfc_dmabuf *)iocbq->context3)->virt;
+		if (!bpl)
+			return IOCB_ERROR;
+
+		/* Should already be byte swapped. */
+		wqe->generic.bde.addrHigh =  le32_to_cpu(bpl->addrHigh);
+		wqe->generic.bde.addrLow =  le32_to_cpu(bpl->addrLow);
+		/* swap the size field back to the cpu so we
+		 * can assign it to the sgl.
+		 */
+		wqe->generic.bde.tus.w  = le32_to_cpu(bpl->tus.w);
+		payload_len = wqe->generic.bde.tus.f.bdeSize;
+	} else
+		payload_len = iocbq->iocb.un.fcpi64.bdl.bdeSize;
+
+	iocbq->iocb.ulpIoTag = iocbq->iotag;
+	cmnd = iocbq->iocb.ulpCommand;
+
+	switch (iocbq->iocb.ulpCommand) {
+	case CMD_ELS_REQUEST64_CR:
+		if (!iocbq->iocb.ulpLe) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2007 Only Limited Edition cmd Format"
+				" supported 0x%x\n",
+				iocbq->iocb.ulpCommand);
+			return IOCB_ERROR;
+		}
+		wqe->els_req.payload_len = payload_len;
+		/* Els_reguest64 has a TMO */
+		bf_set(wqe_tmo, &wqe->els_req.wqe_com,
+			iocbq->iocb.ulpTimeout);
+		/* Need a VF for word 4 set the vf bit*/
+		bf_set(els_req64_vf, &wqe->els_req, 0);
+		/* And a VFID for word 12 */
+		bf_set(els_req64_vfid, &wqe->els_req, 0);
+		/*
+		 * Set ct field to 3, indicates that the context_tag field
+		 * contains the FCFI and remote N_Port_ID is
+		 * in word 5.
+		 */
+
+		ct = ((iocbq->iocb.ulpCt_h << 1) | iocbq->iocb.ulpCt_l);
+		bf_set(lpfc_wqe_gen_context, &wqe->generic,
+				iocbq->iocb.ulpContext);
+
+		if (iocbq->vport->fc_myDID != 0) {
+			bf_set(els_req64_sid, &wqe->els_req,
+				 iocbq->vport->fc_myDID);
+			bf_set(els_req64_sp, &wqe->els_req, 1);
+		}
+		bf_set(lpfc_wqe_gen_ct, &wqe->generic, ct);
+		bf_set(lpfc_wqe_gen_pu, &wqe->generic, 0);
+		/* CCP CCPE PV PRI in word10 were set in the memcpy */
+	break;
+	case CMD_XMIT_SEQUENCE64_CR:
+		/* word3 iocb=io_tag32 wqe=payload_offset */
+		/* payload offset used for multilpe outstanding
+		 * sequences on the same exchange
+		 */
+		wqe->words[3] = 0;
+		/* word4 relative_offset memcpy */
+		/* word5 r_ctl/df_ctl memcpy */
+		bf_set(lpfc_wqe_gen_pu, &wqe->generic, 0);
+		wqe->xmit_sequence.xmit_len = payload_len;
+	break;
+	case CMD_XMIT_BCAST64_CN:
+		/* word3 iocb=iotag32 wqe=payload_len */
+		wqe->words[3] = 0; /* no definition for this in wqe */
+		/* word4 iocb=rsvd wqe=rsvd */
+		/* word5 iocb=rctl/type/df_ctl wqe=rctl/type/df_ctl memcpy */
+		/* word6 iocb=ctxt_tag/io_tag wqe=ctxt_tag/xri */
+		bf_set(lpfc_wqe_gen_ct, &wqe->generic,
+			((iocbq->iocb.ulpCt_h << 1) | iocbq->iocb.ulpCt_l));
+	break;
+	case CMD_FCP_IWRITE64_CR:
+		command_type = FCP_COMMAND_DATA_OUT;
+		/* The struct for wqe fcp_iwrite has 3 fields that are somewhat
+		 * confusing.
+		 * word3 is payload_len: byte offset to the sgl entry for the
+		 * fcp_command.
+		 * word4 is total xfer len, same as the IOCB->ulpParameter.
+		 * word5 is initial xfer len 0 = wait for xfer-ready
+		 */
+
+		/* Always wait for xfer-ready before sending data */
+		wqe->fcp_iwrite.initial_xfer_len = 0;
+		/* word 4 (xfer length) should have been set on the memcpy */
+
+	/* allow write to fall through to read */
+	case CMD_FCP_IREAD64_CR:
+		/* FCP_CMD is always the 1st sgl entry */
+		wqe->fcp_iread.payload_len =
+			payload_len + sizeof(struct fcp_rsp);
+
+		/* word 4 (xfer length) should have been set on the memcpy */
+
+		bf_set(lpfc_wqe_gen_erp, &wqe->generic,
+			iocbq->iocb.ulpFCP2Rcvy);
+		bf_set(lpfc_wqe_gen_lnk, &wqe->generic, iocbq->iocb.ulpXS);
+		/* The XC bit and the XS bit are similar. The driver never
+		 * tracked whether or not the exchange was previouslly open.
+		 * XC = Exchange create, 0 is create. 1 is already open.
+		 * XS = link cmd: 1 do not close the exchange after command.
+		 * XS = 0 close exchange when command completes.
+		 * The only time we would not set the XC bit is when the XS bit
+		 * is set and we are sending our 2nd or greater command on
+		 * this exchange.
+		 */
+
+	/* ALLOW read & write to fall through to ICMD64 */
+	case CMD_FCP_ICMND64_CR:
+		/* Always open the exchange */
+		bf_set(wqe_xc, &wqe->fcp_iread.wqe_com, 0);
+
+		wqe->words[10] &= 0xffff0000; /* zero out ebde count */
+		bf_set(lpfc_wqe_gen_pu, &wqe->generic, iocbq->iocb.ulpPU);
+	break;
+	case CMD_GEN_REQUEST64_CR:
+		/* word3 command length is described as byte offset to the
+		 * rsp_data. Would always be 16, sizeof(struct sli4_sge)
+		 * sgl[0] = cmnd
+		 * sgl[1] = rsp.
+		 *
+		 */
+		wqe->gen_req.command_len = payload_len;
+		/* Word4 parameter  copied in the memcpy */
+		/* Word5 [rctl, type, df_ctl, la] copied in memcpy */
+		/* word6 context tag copied in memcpy */
+		if (iocbq->iocb.ulpCt_h  || iocbq->iocb.ulpCt_l) {
+			ct = ((iocbq->iocb.ulpCt_h << 1) | iocbq->iocb.ulpCt_l);
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2015 Invalid CT %x command 0x%x\n",
+				ct, iocbq->iocb.ulpCommand);
+			return IOCB_ERROR;
+		}
+		bf_set(lpfc_wqe_gen_ct, &wqe->generic, 0);
+		bf_set(wqe_tmo, &wqe->gen_req.wqe_com,
+			iocbq->iocb.ulpTimeout);
+
+		bf_set(lpfc_wqe_gen_pu, &wqe->generic, iocbq->iocb.ulpPU);
+		command_type = OTHER_COMMAND;
+	break;
+	case CMD_XMIT_ELS_RSP64_CX:
+		/* words0-2 BDE memcpy */
+		/* word3 iocb=iotag32 wqe=rsvd */
+		wqe->words[3] = 0;
+		/* word4 iocb=did wge=rsvd. */
+		wqe->words[4] = 0;
+		/* word5 iocb=rsvd wge=did */
+		bf_set(wqe_els_did, &wqe->xmit_els_rsp.wqe_dest,
+			 iocbq->iocb.un.elsreq64.remoteID);
+
+		bf_set(lpfc_wqe_gen_ct, &wqe->generic,
+			((iocbq->iocb.ulpCt_h << 1) | iocbq->iocb.ulpCt_l));
+
+		bf_set(lpfc_wqe_gen_pu, &wqe->generic, iocbq->iocb.ulpPU);
+		bf_set(wqe_rcvoxid, &wqe->generic, iocbq->iocb.ulpContext);
+		if (!iocbq->iocb.ulpCt_h && iocbq->iocb.ulpCt_l)
+			bf_set(lpfc_wqe_gen_context, &wqe->generic,
+			       iocbq->vport->vpi + phba->vpi_base);
+		command_type = OTHER_COMMAND;
+	break;
+	case CMD_CLOSE_XRI_CN:
+	case CMD_ABORT_XRI_CN:
+	case CMD_ABORT_XRI_CX:
+		/* words 0-2 memcpy should be 0 rserved */
+		/* port will send abts */
+		if (iocbq->iocb.ulpCommand == CMD_CLOSE_XRI_CN)
+			/*
+			 * The link is down so the fw does not need to send abts
+			 * on the wire.
+			 */
+			bf_set(abort_cmd_ia, &wqe->abort_cmd, 1);
+		else
+			bf_set(abort_cmd_ia, &wqe->abort_cmd, 0);
+		bf_set(abort_cmd_criteria, &wqe->abort_cmd, T_XRI_TAG);
+		abort_tag = iocbq->iocb.un.acxri.abortIoTag;
+		wqe->words[5] = 0;
+		bf_set(lpfc_wqe_gen_ct, &wqe->generic,
+			((iocbq->iocb.ulpCt_h << 1) | iocbq->iocb.ulpCt_l));
+		abort_tag = iocbq->iocb.un.acxri.abortIoTag;
+		wqe->generic.abort_tag = abort_tag;
+		/*
+		 * The abort handler will send us CMD_ABORT_XRI_CN or
+		 * CMD_CLOSE_XRI_CN and the fw only accepts CMD_ABORT_XRI_CX
+		 */
+		bf_set(lpfc_wqe_gen_command, &wqe->generic, CMD_ABORT_XRI_CX);
+		cmnd = CMD_ABORT_XRI_CX;
+		command_type = OTHER_COMMAND;
+		xritag = 0;
+	break;
+	case CMD_XRI_ABORTED_CX:
+	case CMD_CREATE_XRI_CR: /* Do we expect to use this? */
+		/* words0-2 are all 0's no bde */
+		/* word3 and word4 are rsvrd */
+		wqe->words[3] = 0;
+		wqe->words[4] = 0;
+		/* word5 iocb=rsvd wge=did */
+		/* There is no remote port id in the IOCB? */
+		/* Let this fall through and fail */
+	case CMD_IOCB_FCP_IBIDIR64_CR: /* bidirectional xfer */
+	case CMD_FCP_TSEND64_CX: /* Target mode send xfer-ready */
+	case CMD_FCP_TRSP64_CX: /* Target mode rcv */
+	case CMD_FCP_AUTO_TRSP_CX: /* Auto target rsp */
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2014 Invalid command 0x%x\n",
+				iocbq->iocb.ulpCommand);
+		return IOCB_ERROR;
+	break;
+
+	}
+	bf_set(lpfc_wqe_gen_xri, &wqe->generic, xritag);
+	bf_set(lpfc_wqe_gen_request_tag, &wqe->generic, iocbq->iotag);
+	wqe->generic.abort_tag = abort_tag;
+	bf_set(lpfc_wqe_gen_cmd_type, &wqe->generic, command_type);
+	bf_set(lpfc_wqe_gen_command, &wqe->generic, cmnd);
+	bf_set(lpfc_wqe_gen_class, &wqe->generic, iocbq->iocb.ulpClass);
+	bf_set(lpfc_wqe_gen_cq_id, &wqe->generic, LPFC_WQE_CQ_ID_DEFAULT);
+
+	return 0;
+}
+
+/**
+ * __lpfc_sli_issue_iocb_s4 - SLI4 device lockless ver of lpfc_sli_issue_iocb
+ * @phba: Pointer to HBA context object.
+ * @ring_number: SLI ring number to issue iocb on.
+ * @piocb: Pointer to command iocb.
+ * @flag: Flag indicating if this command can be put into txq.
+ *
+ * __lpfc_sli_issue_iocb_s4 is used by other functions in the driver to issue
+ * an iocb command to an HBA with SLI-4 interface spec.
+ *
+ * This function is called with hbalock held. The function will return success
+ * after it successfully submit the iocb to firmware or after adding to the
+ * txq.
+ **/
+static int
+__lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
+			 struct lpfc_iocbq *piocb, uint32_t flag)
+{
+	struct lpfc_sglq *sglq;
+	uint16_t xritag;
+	union lpfc_wqe wqe;
+	struct lpfc_sli_ring *pring = &phba->sli.ring[ring_number];
+	uint32_t fcp_wqidx;
+
+	if (piocb->sli4_xritag == NO_XRI) {
+		if (piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN ||
+			piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN)
+			sglq = NULL;
+		else {
+			sglq = __lpfc_sli_get_sglq(phba);
+			if (!sglq)
+				return IOCB_ERROR;
+			piocb->sli4_xritag = sglq->sli4_xritag;
+		}
+	} else if (piocb->iocb_flag &  LPFC_IO_FCP) {
+		sglq = NULL; /* These IO's already have an XRI and
+			      * a mapped sgl.
+			      */
+	} else {
+		/* This is a continuation of a commandi,(CX) so this
+		 * sglq is on the active list
+		 */
+		sglq = __lpfc_get_active_sglq(phba, piocb->sli4_xritag);
+		if (!sglq)
+			return IOCB_ERROR;
+	}
+
+	if (sglq) {
+		xritag = lpfc_sli4_bpl2sgl(phba, piocb, sglq);
+		if (xritag != sglq->sli4_xritag)
+			return IOCB_ERROR;
+	}
+
+	if (lpfc_sli4_iocb2wqe(phba, piocb, &wqe))
+		return IOCB_ERROR;
+
+	if (piocb->iocb_flag &  LPFC_IO_FCP) {
+		fcp_wqidx = lpfc_sli4_scmd_to_wqidx_distr(phba, piocb);
+		if (lpfc_sli4_wq_put(phba->sli4_hba.fcp_wq[fcp_wqidx], &wqe))
+			return IOCB_ERROR;
+	} else {
+		if (lpfc_sli4_wq_put(phba->sli4_hba.els_wq, &wqe))
+			return IOCB_ERROR;
+	}
+	lpfc_sli_ringtxcmpl_put(phba, pring, piocb);
+
+	return 0;
+}
+
+/**
+ * __lpfc_sli_issue_iocb - Wrapper func of lockless version for issuing iocb
+ *
+ * This routine wraps the actual lockless version for issusing IOCB function
+ * pointer from the lpfc_hba struct.
+ *
+ * Return codes:
+ * 	IOCB_ERROR - Error
+ * 	IOCB_SUCCESS - Success
+ * 	IOCB_BUSY - Busy
+ **/
+static inline int
+__lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
+		struct lpfc_iocbq *piocb, uint32_t flag)
+{
+	return phba->__lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
+}
+
+/**
+ * lpfc_sli_api_table_setup - Set up sli api fucntion jump table
+ * @phba: The hba struct for which this call is being executed.
+ * @dev_grp: The HBA PCI-Device group number.
+ *
+ * This routine sets up the SLI interface API function jump table in @phba
+ * struct.
+ * Returns: 0 - success, -ENODEV - failure.
+ **/
+int
+lpfc_sli_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
+{
+
+	switch (dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		phba->__lpfc_sli_issue_iocb = __lpfc_sli_issue_iocb_s3;
+		phba->__lpfc_sli_release_iocbq = __lpfc_sli_release_iocbq_s3;
+		break;
+	case LPFC_PCI_DEV_OC:
+		phba->__lpfc_sli_issue_iocb = __lpfc_sli_issue_iocb_s4;
+		phba->__lpfc_sli_release_iocbq = __lpfc_sli_release_iocbq_s4;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1419 Invalid HBA PCI-device group: 0x%x\n",
+				dev_grp);
+		return -ENODEV;
+		break;
+	}
+	phba->lpfc_get_iocb_from_iocbq = lpfc_get_iocb_from_iocbq;
+	return 0;
+}
 
 /**
  * lpfc_sli_issue_iocb - Wrapper function for __lpfc_sli_issue_iocb
@@ -3848,14 +6126,14 @@
  * functions which do not hold hbalock.
  **/
 int
-lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
 		    struct lpfc_iocbq *piocb, uint32_t flag)
 {
 	unsigned long iflags;
 	int rc;
 
 	spin_lock_irqsave(&phba->hbalock, iflags);
-	rc = __lpfc_sli_issue_iocb(phba, pring, piocb, flag);
+	rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
 	spin_unlock_irqrestore(&phba->hbalock, iflags);
 
 	return rc;
@@ -4148,6 +6426,52 @@
 }
 
 /**
+ * lpfc_sli_mbox_sys_flush - Flush mailbox command sub-system
+ * @phba: Pointer to HBA context object.
+ *
+ * This routine flushes the mailbox command subsystem. It will unconditionally
+ * flush all the mailbox commands in the three possible stages in the mailbox
+ * command sub-system: pending mailbox command queue; the outstanding mailbox
+ * command; and completed mailbox command queue. It is caller's responsibility
+ * to make sure that the driver is in the proper state to flush the mailbox
+ * command sub-system. Namely, the posting of mailbox commands into the
+ * pending mailbox command queue from the various clients must be stopped;
+ * either the HBA is in a state that it will never works on the outstanding
+ * mailbox command (such as in EEH or ERATT conditions) or the outstanding
+ * mailbox command has been completed.
+ **/
+static void
+lpfc_sli_mbox_sys_flush(struct lpfc_hba *phba)
+{
+	LIST_HEAD(completions);
+	struct lpfc_sli *psli = &phba->sli;
+	LPFC_MBOXQ_t *pmb;
+	unsigned long iflag;
+
+	/* Flush all the mailbox commands in the mbox system */
+	spin_lock_irqsave(&phba->hbalock, iflag);
+	/* The pending mailbox command queue */
+	list_splice_init(&phba->sli.mboxq, &completions);
+	/* The outstanding active mailbox command */
+	if (psli->mbox_active) {
+		list_add_tail(&psli->mbox_active->list, &completions);
+		psli->mbox_active = NULL;
+		psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+	}
+	/* The completed mailbox command queue */
+	list_splice_init(&phba->sli.mboxq_cmpl, &completions);
+	spin_unlock_irqrestore(&phba->hbalock, iflag);
+
+	/* Return all flushed mailbox commands with MBX_NOT_FINISHED status */
+	while (!list_empty(&completions)) {
+		list_remove_head(&completions, pmb, LPFC_MBOXQ_t, list);
+		pmb->u.mb.mbxStatus = MBX_NOT_FINISHED;
+		if (pmb->mbox_cmpl)
+			pmb->mbox_cmpl(phba, pmb);
+	}
+}
+
+/**
  * lpfc_sli_host_down - Vport cleanup function
  * @vport: Pointer to virtual port object.
  *
@@ -4240,9 +6564,11 @@
 	struct lpfc_sli *psli = &phba->sli;
 	struct lpfc_sli_ring *pring;
 	struct lpfc_dmabuf *buf_ptr;
-	LPFC_MBOXQ_t *pmb;
-	int i;
 	unsigned long flags = 0;
+	int i;
+
+	/* Shutdown the mailbox command sub-system */
+	lpfc_sli_mbox_sys_shutdown(phba);
 
 	lpfc_hba_down_prep(phba);
 
@@ -4287,28 +6613,42 @@
 
 	/* Return any active mbox cmds */
 	del_timer_sync(&psli->mbox_tmo);
-	spin_lock_irqsave(&phba->hbalock, flags);
 
-	spin_lock(&phba->pport->work_port_lock);
+	spin_lock_irqsave(&phba->pport->work_port_lock, flags);
 	phba->pport->work_port_events &= ~WORKER_MBOX_TMO;
-	spin_unlock(&phba->pport->work_port_lock);
+	spin_unlock_irqrestore(&phba->pport->work_port_lock, flags);
 
-	/* Return any pending or completed mbox cmds */
-	list_splice_init(&phba->sli.mboxq, &completions);
-	if (psli->mbox_active) {
-		list_add_tail(&psli->mbox_active->list, &completions);
-		psli->mbox_active = NULL;
-		psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
-	}
-	list_splice_init(&phba->sli.mboxq_cmpl, &completions);
-	spin_unlock_irqrestore(&phba->hbalock, flags);
+	return 1;
+}
 
-	while (!list_empty(&completions)) {
-		list_remove_head(&completions, pmb, LPFC_MBOXQ_t, list);
-		pmb->mb.mbxStatus = MBX_NOT_FINISHED;
-		if (pmb->mbox_cmpl)
-			pmb->mbox_cmpl(phba,pmb);
-	}
+/**
+ * lpfc_sli4_hba_down - PCI function resource cleanup for the SLI4 HBA
+ * @phba: Pointer to HBA context object.
+ *
+ * This function cleans up all queues, iocb, buffers, mailbox commands while
+ * shutting down the SLI4 HBA FCoE function. This function is called with no
+ * lock held and always returns 1.
+ *
+ * This function does the following to cleanup driver FCoE function resources:
+ * - Free discovery resources for each virtual port
+ * - Cleanup any pending fabric iocbs
+ * - Iterate through the iocb txq and free each entry in the list.
+ * - Free up any buffer posted to the HBA.
+ * - Clean up all the queue entries: WQ, RQ, MQ, EQ, CQ, etc.
+ * - Free mailbox commands in the mailbox queue.
+ **/
+int
+lpfc_sli4_hba_down(struct lpfc_hba *phba)
+{
+	/* Stop the SLI4 device port */
+	lpfc_stop_port(phba);
+
+	/* Tear down the queues in the HBA */
+	lpfc_sli4_queue_unset(phba);
+
+	/* unregister default FCFI from the HBA */
+	lpfc_sli4_fcfi_unreg(phba, phba->fcf.fcfi);
+
 	return 1;
 }
 
@@ -4639,7 +6979,10 @@
 	iabt = &abtsiocbp->iocb;
 	iabt->un.acxri.abortType = ABORT_TYPE_ABTS;
 	iabt->un.acxri.abortContextTag = icmd->ulpContext;
-	iabt->un.acxri.abortIoTag = icmd->ulpIoTag;
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		iabt->un.acxri.abortIoTag = cmdiocb->sli4_xritag;
+	else
+		iabt->un.acxri.abortIoTag = icmd->ulpIoTag;
 	iabt->ulpLe = 1;
 	iabt->ulpClass = icmd->ulpClass;
 
@@ -4655,7 +6998,7 @@
 			 "abort cmd iotag x%x\n",
 			 iabt->un.acxri.abortContextTag,
 			 iabt->un.acxri.abortIoTag, abtsiocbp->iotag);
-	retval = __lpfc_sli_issue_iocb(phba, pring, abtsiocbp, 0);
+	retval = __lpfc_sli_issue_iocb(phba, pring->ringno, abtsiocbp, 0);
 
 	if (retval)
 		__lpfc_sli_release_iocbq(phba, abtsiocbp);
@@ -4838,7 +7181,10 @@
 		cmd = &iocbq->iocb;
 		abtsiocb->iocb.un.acxri.abortType = ABORT_TYPE_ABTS;
 		abtsiocb->iocb.un.acxri.abortContextTag = cmd->ulpContext;
-		abtsiocb->iocb.un.acxri.abortIoTag = cmd->ulpIoTag;
+		if (phba->sli_rev == LPFC_SLI_REV4)
+			abtsiocb->iocb.un.acxri.abortIoTag = iocbq->sli4_xritag;
+		else
+			abtsiocb->iocb.un.acxri.abortIoTag = cmd->ulpIoTag;
 		abtsiocb->iocb.ulpLe = 1;
 		abtsiocb->iocb.ulpClass = cmd->ulpClass;
 		abtsiocb->vport = phba->pport;
@@ -4850,7 +7196,8 @@
 
 		/* Setup callback routine and issue the command. */
 		abtsiocb->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
-		ret_val = lpfc_sli_issue_iocb(phba, pring, abtsiocb, 0);
+		ret_val = lpfc_sli_issue_iocb(phba, pring->ringno,
+					      abtsiocb, 0);
 		if (ret_val == IOCB_ERROR) {
 			lpfc_sli_release_iocbq(phba, abtsiocb);
 			errcnt++;
@@ -4931,7 +7278,7 @@
  **/
 int
 lpfc_sli_issue_iocb_wait(struct lpfc_hba *phba,
-			 struct lpfc_sli_ring *pring,
+			 uint32_t ring_number,
 			 struct lpfc_iocbq *piocb,
 			 struct lpfc_iocbq *prspiocbq,
 			 uint32_t timeout)
@@ -4962,7 +7309,7 @@
 		readl(phba->HCregaddr); /* flush */
 	}
 
-	retval = lpfc_sli_issue_iocb(phba, pring, piocb, 0);
+	retval = lpfc_sli_issue_iocb(phba, ring_number, piocb, 0);
 	if (retval == IOCB_SUCCESS) {
 		timeout_req = timeout * HZ;
 		timeleft = wait_event_timeout(done_q,
@@ -5077,53 +7424,156 @@
 }
 
 /**
- * lpfc_sli_flush_mbox_queue - mailbox queue cleanup function
+ * lpfc_sli_mbox_sys_shutdown - shutdown mailbox command sub-system
  * @phba: Pointer to HBA context.
  *
- * This function is called to cleanup any pending mailbox
- * objects in the driver queue before bringing the HBA offline.
- * This function is called while resetting the HBA.
- * The function is called without any lock held. The function
- * takes hbalock to update SLI data structure.
- * This function returns 1 when there is an active mailbox
- * command pending else returns 0.
+ * This function is called to shutdown the driver's mailbox sub-system.
+ * It first marks the mailbox sub-system is in a block state to prevent
+ * the asynchronous mailbox command from issued off the pending mailbox
+ * command queue. If the mailbox command sub-system shutdown is due to
+ * HBA error conditions such as EEH or ERATT, this routine shall invoke
+ * the mailbox sub-system flush routine to forcefully bring down the
+ * mailbox sub-system. Otherwise, if it is due to normal condition (such
+ * as with offline or HBA function reset), this routine will wait for the
+ * outstanding mailbox command to complete before invoking the mailbox
+ * sub-system flush routine to gracefully bring down mailbox sub-system.
  **/
-int
-lpfc_sli_flush_mbox_queue(struct lpfc_hba * phba)
+void
+lpfc_sli_mbox_sys_shutdown(struct lpfc_hba *phba)
 {
-	struct lpfc_vport *vport = phba->pport;
-	int i = 0;
+	struct lpfc_sli *psli = &phba->sli;
+	uint8_t actcmd = MBX_HEARTBEAT;
+	unsigned long timeout;
+
+	spin_lock_irq(&phba->hbalock);
+	psli->sli_flag |= LPFC_SLI_ASYNC_MBX_BLK;
+	spin_unlock_irq(&phba->hbalock);
+
+	if (psli->sli_flag & LPFC_SLI_ACTIVE) {
+		spin_lock_irq(&phba->hbalock);
+		if (phba->sli.mbox_active)
+			actcmd = phba->sli.mbox_active->u.mb.mbxCommand;
+		spin_unlock_irq(&phba->hbalock);
+		/* Determine how long we might wait for the active mailbox
+		 * command to be gracefully completed by firmware.
+		 */
+		timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba, actcmd) *
+					   1000) + jiffies;
+		while (phba->sli.mbox_active) {
+			/* Check active mailbox complete status every 2ms */
+			msleep(2);
+			if (time_after(jiffies, timeout))
+				/* Timeout, let the mailbox flush routine to
+				 * forcefully release active mailbox command
+				 */
+				break;
+		}
+	}
+	lpfc_sli_mbox_sys_flush(phba);
+}
+
+/**
+ * lpfc_sli_eratt_read - read sli-3 error attention events
+ * @phba: Pointer to HBA context.
+ *
+ * This function is called to read the SLI3 device error attention registers
+ * for possible error attention events. The caller must hold the hostlock
+ * with spin_lock_irq().
+ *
+ * This fucntion returns 1 when there is Error Attention in the Host Attention
+ * Register and returns 0 otherwise.
+ **/
+static int
+lpfc_sli_eratt_read(struct lpfc_hba *phba)
+{
 	uint32_t ha_copy;
 
-	while (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE && !vport->stopped) {
-		if (i++ > LPFC_MBOX_TMO * 1000)
-			return 1;
+	/* Read chip Host Attention (HA) register */
+	ha_copy = readl(phba->HAregaddr);
+	if (ha_copy & HA_ERATT) {
+		/* Read host status register to retrieve error event */
+		lpfc_sli_read_hs(phba);
 
-		/*
-		 * Call lpfc_sli_handle_mb_event only if a mailbox cmd
-		 * did finish. This way we won't get the misleading
-		 * "Stray Mailbox Interrupt" message.
-		 */
+		/* Check if there is a deferred error condition is active */
+		if ((HS_FFER1 & phba->work_hs) &&
+		    ((HS_FFER2 | HS_FFER3 | HS_FFER4 | HS_FFER5 |
+		     HS_FFER6 | HS_FFER7) & phba->work_hs)) {
+			spin_lock_irq(&phba->hbalock);
+			phba->hba_flag |= DEFER_ERATT;
+			spin_unlock_irq(&phba->hbalock);
+			/* Clear all interrupt enable conditions */
+			writel(0, phba->HCregaddr);
+			readl(phba->HCregaddr);
+		}
+
+		/* Set the driver HA work bitmap */
 		spin_lock_irq(&phba->hbalock);
-		ha_copy = phba->work_ha;
-		phba->work_ha &= ~HA_MBATT;
+		phba->work_ha |= HA_ERATT;
+		/* Indicate polling handles this ERATT */
+		phba->hba_flag |= HBA_ERATT_HANDLED;
 		spin_unlock_irq(&phba->hbalock);
-
-		if (ha_copy & HA_MBATT)
-			if (lpfc_sli_handle_mb_event(phba) == 0)
-				i = 0;
-
-		msleep(1);
+		return 1;
 	}
+	return 0;
+}
 
-	return (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE) ? 1 : 0;
+/**
+ * lpfc_sli4_eratt_read - read sli-4 error attention events
+ * @phba: Pointer to HBA context.
+ *
+ * This function is called to read the SLI4 device error attention registers
+ * for possible error attention events. The caller must hold the hostlock
+ * with spin_lock_irq().
+ *
+ * This fucntion returns 1 when there is Error Attention in the Host Attention
+ * Register and returns 0 otherwise.
+ **/
+static int
+lpfc_sli4_eratt_read(struct lpfc_hba *phba)
+{
+	uint32_t uerr_sta_hi, uerr_sta_lo;
+	uint32_t onlnreg0, onlnreg1;
+
+	/* For now, use the SLI4 device internal unrecoverable error
+	 * registers for error attention. This can be changed later.
+	 */
+	onlnreg0 = readl(phba->sli4_hba.ONLINE0regaddr);
+	onlnreg1 = readl(phba->sli4_hba.ONLINE1regaddr);
+	if ((onlnreg0 != LPFC_ONLINE_NERR) || (onlnreg1 != LPFC_ONLINE_NERR)) {
+		uerr_sta_lo = readl(phba->sli4_hba.UERRLOregaddr);
+		uerr_sta_hi = readl(phba->sli4_hba.UERRHIregaddr);
+		if (uerr_sta_lo || uerr_sta_hi) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"1423 HBA Unrecoverable error: "
+					"uerr_lo_reg=0x%x, uerr_hi_reg=0x%x, "
+					"online0_reg=0x%x, online1_reg=0x%x\n",
+					uerr_sta_lo, uerr_sta_hi,
+					onlnreg0, onlnreg1);
+			/* TEMP: as the driver error recover logic is not
+			 * fully developed, we just log the error message
+			 * and the device error attention action is now
+			 * temporarily disabled.
+			 */
+			return 0;
+			phba->work_status[0] = uerr_sta_lo;
+			phba->work_status[1] = uerr_sta_hi;
+			spin_lock_irq(&phba->hbalock);
+			/* Set the driver HA work bitmap */
+			phba->work_ha |= HA_ERATT;
+			/* Indicate polling handles this ERATT */
+			phba->hba_flag |= HBA_ERATT_HANDLED;
+			spin_unlock_irq(&phba->hbalock);
+			return 1;
+		}
+	}
+	return 0;
 }
 
 /**
  * lpfc_sli_check_eratt - check error attention events
  * @phba: Pointer to HBA context.
  *
- * This function is called form timer soft interrupt context to check HBA's
+ * This function is called from timer soft interrupt context to check HBA's
  * error attention register bit for error attention events.
  *
  * This fucntion returns 1 when there is Error Attention in the Host Attention
@@ -5134,10 +7584,6 @@
 {
 	uint32_t ha_copy;
 
-	/* If PCI channel is offline, don't process it */
-	if (unlikely(pci_channel_offline(phba->pcidev)))
-		return 0;
-
 	/* If somebody is waiting to handle an eratt, don't process it
 	 * here. The brdkill function will do this.
 	 */
@@ -5161,56 +7607,84 @@
 		return 0;
 	}
 
-	/* Read chip Host Attention (HA) register */
-	ha_copy = readl(phba->HAregaddr);
-	if (ha_copy & HA_ERATT) {
-		/* Read host status register to retrieve error event */
-		lpfc_sli_read_hs(phba);
-
-		/* Check if there is a deferred error condition is active */
-		if ((HS_FFER1 & phba->work_hs) &&
-			((HS_FFER2 | HS_FFER3 | HS_FFER4 | HS_FFER5 |
-			HS_FFER6 | HS_FFER7) & phba->work_hs)) {
-			phba->hba_flag |= DEFER_ERATT;
-			/* Clear all interrupt enable conditions */
-			writel(0, phba->HCregaddr);
-			readl(phba->HCregaddr);
-		}
-
-		/* Set the driver HA work bitmap */
-		phba->work_ha |= HA_ERATT;
-		/* Indicate polling handles this ERATT */
-		phba->hba_flag |= HBA_ERATT_HANDLED;
+	/* If PCI channel is offline, don't process it */
+	if (unlikely(pci_channel_offline(phba->pcidev))) {
 		spin_unlock_irq(&phba->hbalock);
-		return 1;
+		return 0;
+	}
+
+	switch (phba->sli_rev) {
+	case LPFC_SLI_REV2:
+	case LPFC_SLI_REV3:
+		/* Read chip Host Attention (HA) register */
+		ha_copy = lpfc_sli_eratt_read(phba);
+		break;
+	case LPFC_SLI_REV4:
+		/* Read devcie Uncoverable Error (UERR) registers */
+		ha_copy = lpfc_sli4_eratt_read(phba);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0299 Invalid SLI revision (%d)\n",
+				phba->sli_rev);
+		ha_copy = 0;
+		break;
 	}
 	spin_unlock_irq(&phba->hbalock);
+
+	return ha_copy;
+}
+
+/**
+ * lpfc_intr_state_check - Check device state for interrupt handling
+ * @phba: Pointer to HBA context.
+ *
+ * This inline routine checks whether a device or its PCI slot is in a state
+ * that the interrupt should be handled.
+ *
+ * This function returns 0 if the device or the PCI slot is in a state that
+ * interrupt should be handled, otherwise -EIO.
+ */
+static inline int
+lpfc_intr_state_check(struct lpfc_hba *phba)
+{
+	/* If the pci channel is offline, ignore all the interrupts */
+	if (unlikely(pci_channel_offline(phba->pcidev)))
+		return -EIO;
+
+	/* Update device level interrupt statistics */
+	phba->sli.slistat.sli_intr++;
+
+	/* Ignore all interrupts during initialization. */
+	if (unlikely(phba->link_state < LPFC_LINK_DOWN))
+		return -EIO;
+
 	return 0;
 }
 
 /**
- * lpfc_sp_intr_handler - The slow-path interrupt handler of lpfc driver
+ * lpfc_sli_sp_intr_handler - Slow-path interrupt handler to SLI-3 device
  * @irq: Interrupt number.
  * @dev_id: The device context pointer.
  *
  * This function is directly called from the PCI layer as an interrupt
- * service routine when the device is enabled with MSI-X multi-message
- * interrupt mode and there are slow-path events in the HBA. However,
- * when the device is enabled with either MSI or Pin-IRQ interrupt mode,
- * this function is called as part of the device-level interrupt handler.
- * When the PCI slot is in error recovery or the HBA is undergoing
- * initialization, the interrupt handler will not process the interrupt.
- * The link attention and ELS ring attention events are handled by the
- * worker thread. The interrupt handler signals the worker thread and
- * and returns for these events. This function is called without any
- * lock held. It gets the hbalock to access and update SLI data
+ * service routine when device with SLI-3 interface spec is enabled with
+ * MSI-X multi-message interrupt mode and there are slow-path events in
+ * the HBA. However, when the device is enabled with either MSI or Pin-IRQ
+ * interrupt mode, this function is called as part of the device-level
+ * interrupt handler. When the PCI slot is in error recovery or the HBA
+ * is undergoing initialization, the interrupt handler will not process
+ * the interrupt. The link attention and ELS ring attention events are
+ * handled by the worker thread. The interrupt handler signals the worker
+ * thread and returns for these events. This function is called without
+ * any lock held. It gets the hbalock to access and update SLI data
  * structures.
  *
  * This function returns IRQ_HANDLED when interrupt is handled else it
  * returns IRQ_NONE.
  **/
 irqreturn_t
-lpfc_sp_intr_handler(int irq, void *dev_id)
+lpfc_sli_sp_intr_handler(int irq, void *dev_id)
 {
 	struct lpfc_hba  *phba;
 	uint32_t ha_copy;
@@ -5240,13 +7714,8 @@
 	 * individual interrupt handler in MSI-X multi-message interrupt mode
 	 */
 	if (phba->intr_type == MSIX) {
-		/* If the pci channel is offline, ignore all the interrupts */
-		if (unlikely(pci_channel_offline(phba->pcidev)))
-			return IRQ_NONE;
-		/* Update device-level interrupt statistics */
-		phba->sli.slistat.sli_intr++;
-		/* Ignore all interrupts during initialization. */
-		if (unlikely(phba->link_state < LPFC_LINK_DOWN))
+		/* Check device state for handling interrupt */
+		if (lpfc_intr_state_check(phba))
 			return IRQ_NONE;
 		/* Need to read HA REG for slow-path events */
 		spin_lock_irqsave(&phba->hbalock, iflag);
@@ -5271,7 +7740,7 @@
 		 * interrupt.
 		 */
 		if (unlikely(phba->hba_flag & DEFER_ERATT)) {
-			spin_unlock_irq(&phba->hbalock);
+			spin_unlock_irqrestore(&phba->hbalock, iflag);
 			return IRQ_NONE;
 		}
 
@@ -5364,7 +7833,7 @@
 
 		if ((work_ha_copy & HA_MBATT) && (phba->sli.mbox_active)) {
 			pmb = phba->sli.mbox_active;
-			pmbox = &pmb->mb;
+			pmbox = &pmb->u.mb;
 			mbox = phba->mbox;
 			vport = pmb->vport;
 
@@ -5434,7 +7903,8 @@
 							LOG_MBOX | LOG_SLI,
 							"0350 rc should have"
 							"been MBX_BUSY");
-						goto send_current_mbox;
+						if (rc != MBX_NOT_FINISHED)
+							goto send_current_mbox;
 					}
 				}
 				spin_lock_irqsave(
@@ -5471,29 +7941,29 @@
 	}
 	return IRQ_HANDLED;
 
-} /* lpfc_sp_intr_handler */
+} /* lpfc_sli_sp_intr_handler */
 
 /**
- * lpfc_fp_intr_handler - The fast-path interrupt handler of lpfc driver
+ * lpfc_sli_fp_intr_handler - Fast-path interrupt handler to SLI-3 device.
  * @irq: Interrupt number.
  * @dev_id: The device context pointer.
  *
  * This function is directly called from the PCI layer as an interrupt
- * service routine when the device is enabled with MSI-X multi-message
- * interrupt mode and there is a fast-path FCP IOCB ring event in the
- * HBA. However, when the device is enabled with either MSI or Pin-IRQ
- * interrupt mode, this function is called as part of the device-level
- * interrupt handler. When the PCI slot is in error recovery or the HBA
- * is undergoing initialization, the interrupt handler will not process
- * the interrupt. The SCSI FCP fast-path ring event are handled in the
- * intrrupt context. This function is called without any lock held. It
- * gets the hbalock to access and update SLI data structures.
+ * service routine when device with SLI-3 interface spec is enabled with
+ * MSI-X multi-message interrupt mode and there is a fast-path FCP IOCB
+ * ring event in the HBA. However, when the device is enabled with either
+ * MSI or Pin-IRQ interrupt mode, this function is called as part of the
+ * device-level interrupt handler. When the PCI slot is in error recovery
+ * or the HBA is undergoing initialization, the interrupt handler will not
+ * process the interrupt. The SCSI FCP fast-path ring event are handled in
+ * the intrrupt context. This function is called without any lock held.
+ * It gets the hbalock to access and update SLI data structures.
  *
  * This function returns IRQ_HANDLED when interrupt is handled else it
  * returns IRQ_NONE.
  **/
 irqreturn_t
-lpfc_fp_intr_handler(int irq, void *dev_id)
+lpfc_sli_fp_intr_handler(int irq, void *dev_id)
 {
 	struct lpfc_hba  *phba;
 	uint32_t ha_copy;
@@ -5513,13 +7983,8 @@
 	 * individual interrupt handler in MSI-X multi-message interrupt mode
 	 */
 	if (phba->intr_type == MSIX) {
-		/* If pci channel is offline, ignore all the interrupts */
-		if (unlikely(pci_channel_offline(phba->pcidev)))
-			return IRQ_NONE;
-		/* Update device-level interrupt statistics */
-		phba->sli.slistat.sli_intr++;
-		/* Ignore all interrupts during initialization. */
-		if (unlikely(phba->link_state < LPFC_LINK_DOWN))
+		/* Check device state for handling interrupt */
+		if (lpfc_intr_state_check(phba))
 			return IRQ_NONE;
 		/* Need to read HA REG for FCP ring and other ring events */
 		ha_copy = readl(phba->HAregaddr);
@@ -5530,7 +7995,7 @@
 		 * any interrupt.
 		 */
 		if (unlikely(phba->hba_flag & DEFER_ERATT)) {
-			spin_unlock_irq(&phba->hbalock);
+			spin_unlock_irqrestore(&phba->hbalock, iflag);
 			return IRQ_NONE;
 		}
 		writel((ha_copy & (HA_R0_CLR_MSK | HA_R1_CLR_MSK)),
@@ -5566,26 +8031,27 @@
 		}
 	}
 	return IRQ_HANDLED;
-}  /* lpfc_fp_intr_handler */
+}  /* lpfc_sli_fp_intr_handler */
 
 /**
- * lpfc_intr_handler - The device-level interrupt handler of lpfc driver
+ * lpfc_sli_intr_handler - Device-level interrupt handler to SLI-3 device
  * @irq: Interrupt number.
  * @dev_id: The device context pointer.
  *
- * This function is the device-level interrupt handler called from the PCI
- * layer when either MSI or Pin-IRQ interrupt mode is enabled and there is
- * an event in the HBA which requires driver attention. This function
- * invokes the slow-path interrupt attention handling function and fast-path
- * interrupt attention handling function in turn to process the relevant
- * HBA attention events. This function is called without any lock held. It
- * gets the hbalock to access and update SLI data structures.
+ * This function is the HBA device-level interrupt handler to device with
+ * SLI-3 interface spec, called from the PCI layer when either MSI or
+ * Pin-IRQ interrupt mode is enabled and there is an event in the HBA which
+ * requires driver attention. This function invokes the slow-path interrupt
+ * attention handling function and fast-path interrupt attention handling
+ * function in turn to process the relevant HBA attention events. This
+ * function is called without any lock held. It gets the hbalock to access
+ * and update SLI data structures.
  *
  * This function returns IRQ_HANDLED when interrupt is handled, else it
  * returns IRQ_NONE.
  **/
 irqreturn_t
-lpfc_intr_handler(int irq, void *dev_id)
+lpfc_sli_intr_handler(int irq, void *dev_id)
 {
 	struct lpfc_hba  *phba;
 	irqreturn_t sp_irq_rc, fp_irq_rc;
@@ -5600,15 +8066,8 @@
 	if (unlikely(!phba))
 		return IRQ_NONE;
 
-	/* If the pci channel is offline, ignore all the interrupts. */
-	if (unlikely(pci_channel_offline(phba->pcidev)))
-		return IRQ_NONE;
-
-	/* Update device level interrupt statistics */
-	phba->sli.slistat.sli_intr++;
-
-	/* Ignore all interrupts during initialization. */
-	if (unlikely(phba->link_state < LPFC_LINK_DOWN))
+	/* Check device state for handling interrupt */
+	if (lpfc_intr_state_check(phba))
 		return IRQ_NONE;
 
 	spin_lock(&phba->hbalock);
@@ -5650,7 +8109,7 @@
 	status2 >>= (4*LPFC_ELS_RING);
 
 	if (status1 || (status2 & HA_RXMASK))
-		sp_irq_rc = lpfc_sp_intr_handler(irq, dev_id);
+		sp_irq_rc = lpfc_sli_sp_intr_handler(irq, dev_id);
 	else
 		sp_irq_rc = IRQ_NONE;
 
@@ -5670,10 +8129,3322 @@
 		status2 = 0;
 
 	if ((status1 & HA_RXMASK) || (status2 & HA_RXMASK))
-		fp_irq_rc = lpfc_fp_intr_handler(irq, dev_id);
+		fp_irq_rc = lpfc_sli_fp_intr_handler(irq, dev_id);
 	else
 		fp_irq_rc = IRQ_NONE;
 
 	/* Return device-level interrupt handling status */
 	return (sp_irq_rc == IRQ_HANDLED) ? sp_irq_rc : fp_irq_rc;
-}  /* lpfc_intr_handler */
+}  /* lpfc_sli_intr_handler */
+
+/**
+ * lpfc_sli4_fcp_xri_abort_event_proc - Process fcp xri abort event
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked by the worker thread to process all the pending
+ * SLI4 FCP abort XRI events.
+ **/
+void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *phba)
+{
+	struct lpfc_cq_event *cq_event;
+
+	/* First, declare the fcp xri abort event has been handled */
+	spin_lock_irq(&phba->hbalock);
+	phba->hba_flag &= ~FCP_XRI_ABORT_EVENT;
+	spin_unlock_irq(&phba->hbalock);
+	/* Now, handle all the fcp xri abort events */
+	while (!list_empty(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue)) {
+		/* Get the first event from the head of the event queue */
+		spin_lock_irq(&phba->hbalock);
+		list_remove_head(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue,
+				 cq_event, struct lpfc_cq_event, list);
+		spin_unlock_irq(&phba->hbalock);
+		/* Notify aborted XRI for FCP work queue */
+		lpfc_sli4_fcp_xri_aborted(phba, &cq_event->cqe.wcqe_axri);
+		/* Free the event processed back to the free pool */
+		lpfc_sli4_cq_event_release(phba, cq_event);
+	}
+}
+
+/**
+ * lpfc_sli4_els_xri_abort_event_proc - Process els xri abort event
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked by the worker thread to process all the pending
+ * SLI4 els abort xri events.
+ **/
+void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *phba)
+{
+	struct lpfc_cq_event *cq_event;
+
+	/* First, declare the els xri abort event has been handled */
+	spin_lock_irq(&phba->hbalock);
+	phba->hba_flag &= ~ELS_XRI_ABORT_EVENT;
+	spin_unlock_irq(&phba->hbalock);
+	/* Now, handle all the els xri abort events */
+	while (!list_empty(&phba->sli4_hba.sp_els_xri_aborted_work_queue)) {
+		/* Get the first event from the head of the event queue */
+		spin_lock_irq(&phba->hbalock);
+		list_remove_head(&phba->sli4_hba.sp_els_xri_aborted_work_queue,
+				 cq_event, struct lpfc_cq_event, list);
+		spin_unlock_irq(&phba->hbalock);
+		/* Notify aborted XRI for ELS work queue */
+		lpfc_sli4_els_xri_aborted(phba, &cq_event->cqe.wcqe_axri);
+		/* Free the event processed back to the free pool */
+		lpfc_sli4_cq_event_release(phba, cq_event);
+	}
+}
+
+static void
+lpfc_sli4_iocb_param_transfer(struct lpfc_iocbq *pIocbIn,
+			      struct lpfc_iocbq *pIocbOut,
+			      struct lpfc_wcqe_complete *wcqe)
+{
+	size_t offset = offsetof(struct lpfc_iocbq, iocb);
+
+	memcpy((char *)pIocbIn + offset, (char *)pIocbOut + offset,
+	       sizeof(struct lpfc_iocbq) - offset);
+	memset(&pIocbIn->sli4_info, 0,
+	       sizeof(struct lpfc_sli4_rspiocb_info));
+	/* Map WCQE parameters into irspiocb parameters */
+	pIocbIn->iocb.ulpStatus = bf_get(lpfc_wcqe_c_status, wcqe);
+	if (pIocbOut->iocb_flag & LPFC_IO_FCP)
+		if (pIocbIn->iocb.ulpStatus == IOSTAT_FCP_RSP_ERROR)
+			pIocbIn->iocb.un.fcpi.fcpi_parm =
+					pIocbOut->iocb.un.fcpi.fcpi_parm -
+					wcqe->total_data_placed;
+		else
+			pIocbIn->iocb.un.ulpWord[4] = wcqe->parameter;
+	else
+		pIocbIn->iocb.un.ulpWord[4] = wcqe->parameter;
+	/* Load in additional WCQE parameters */
+	pIocbIn->sli4_info.hw_status = bf_get(lpfc_wcqe_c_hw_status, wcqe);
+	pIocbIn->sli4_info.bfield = 0;
+	if (bf_get(lpfc_wcqe_c_xb, wcqe))
+		pIocbIn->sli4_info.bfield |= LPFC_XB;
+	if (bf_get(lpfc_wcqe_c_pv, wcqe)) {
+		pIocbIn->sli4_info.bfield |= LPFC_PV;
+		pIocbIn->sli4_info.priority =
+					bf_get(lpfc_wcqe_c_priority, wcqe);
+	}
+}
+
+/**
+ * lpfc_sli4_sp_handle_async_event - Handle an asynchroous event
+ * @phba: Pointer to HBA context object.
+ * @cqe: Pointer to mailbox completion queue entry.
+ *
+ * This routine process a mailbox completion queue entry with asynchrous
+ * event.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
+ **/
+static bool
+lpfc_sli4_sp_handle_async_event(struct lpfc_hba *phba, struct lpfc_mcqe *mcqe)
+{
+	struct lpfc_cq_event *cq_event;
+	unsigned long iflags;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+			"0392 Async Event: word0:x%x, word1:x%x, "
+			"word2:x%x, word3:x%x\n", mcqe->word0,
+			mcqe->mcqe_tag0, mcqe->mcqe_tag1, mcqe->trailer);
+
+	/* Allocate a new internal CQ_EVENT entry */
+	cq_event = lpfc_sli4_cq_event_alloc(phba);
+	if (!cq_event) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0394 Failed to allocate CQ_EVENT entry\n");
+		return false;
+	}
+
+	/* Move the CQE into an asynchronous event entry */
+	memcpy(&cq_event->cqe, mcqe, sizeof(struct lpfc_mcqe));
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	list_add_tail(&cq_event->list, &phba->sli4_hba.sp_asynce_work_queue);
+	/* Set the async event flag */
+	phba->hba_flag |= ASYNC_EVENT;
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+
+	return true;
+}
+
+/**
+ * lpfc_sli4_sp_handle_mbox_event - Handle a mailbox completion event
+ * @phba: Pointer to HBA context object.
+ * @cqe: Pointer to mailbox completion queue entry.
+ *
+ * This routine process a mailbox completion queue entry with mailbox
+ * completion event.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
+ **/
+static bool
+lpfc_sli4_sp_handle_mbox_event(struct lpfc_hba *phba, struct lpfc_mcqe *mcqe)
+{
+	uint32_t mcqe_status;
+	MAILBOX_t *mbox, *pmbox;
+	struct lpfc_mqe *mqe;
+	struct lpfc_vport *vport;
+	struct lpfc_nodelist *ndlp;
+	struct lpfc_dmabuf *mp;
+	unsigned long iflags;
+	LPFC_MBOXQ_t *pmb;
+	bool workposted = false;
+	int rc;
+
+	/* If not a mailbox complete MCQE, out by checking mailbox consume */
+	if (!bf_get(lpfc_trailer_completed, mcqe))
+		goto out_no_mqe_complete;
+
+	/* Get the reference to the active mbox command */
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	pmb = phba->sli.mbox_active;
+	if (unlikely(!pmb)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+				"1832 No pending MBOX command to handle\n");
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		goto out_no_mqe_complete;
+	}
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	mqe = &pmb->u.mqe;
+	pmbox = (MAILBOX_t *)&pmb->u.mqe;
+	mbox = phba->mbox;
+	vport = pmb->vport;
+
+	/* Reset heartbeat timer */
+	phba->last_completion_time = jiffies;
+	del_timer(&phba->sli.mbox_tmo);
+
+	/* Move mbox data to caller's mailbox region, do endian swapping */
+	if (pmb->mbox_cmpl && mbox)
+		lpfc_sli_pcimem_bcopy(mbox, mqe, sizeof(struct lpfc_mqe));
+	/* Set the mailbox status with SLI4 range 0x4000 */
+	mcqe_status = bf_get(lpfc_mcqe_status, mcqe);
+	if (mcqe_status != MB_CQE_STATUS_SUCCESS)
+		bf_set(lpfc_mqe_status, mqe,
+		       (LPFC_MBX_ERROR_RANGE | mcqe_status));
+
+	if (pmb->mbox_flag & LPFC_MBX_IMED_UNREG) {
+		pmb->mbox_flag &= ~LPFC_MBX_IMED_UNREG;
+		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_MBOX_VPORT,
+				      "MBOX dflt rpi: status:x%x rpi:x%x",
+				      mcqe_status,
+				      pmbox->un.varWords[0], 0);
+		if (mcqe_status == MB_CQE_STATUS_SUCCESS) {
+			mp = (struct lpfc_dmabuf *)(pmb->context1);
+			ndlp = (struct lpfc_nodelist *)pmb->context2;
+			/* Reg_LOGIN of dflt RPI was successful. Now lets get
+			 * RID of the PPI using the same mbox buffer.
+			 */
+			lpfc_unreg_login(phba, vport->vpi,
+					 pmbox->un.varWords[0], pmb);
+			pmb->mbox_cmpl = lpfc_mbx_cmpl_dflt_rpi;
+			pmb->context1 = mp;
+			pmb->context2 = ndlp;
+			pmb->vport = vport;
+			rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
+			if (rc != MBX_BUSY)
+				lpfc_printf_log(phba, KERN_ERR, LOG_MBOX |
+						LOG_SLI, "0385 rc should "
+						"have been MBX_BUSY\n");
+			if (rc != MBX_NOT_FINISHED)
+				goto send_current_mbox;
+		}
+	}
+	spin_lock_irqsave(&phba->pport->work_port_lock, iflags);
+	phba->pport->work_port_events &= ~WORKER_MBOX_TMO;
+	spin_unlock_irqrestore(&phba->pport->work_port_lock, iflags);
+
+	/* There is mailbox completion work to do */
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	__lpfc_mbox_cmpl_put(phba, pmb);
+	phba->work_ha |= HA_MBATT;
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	workposted = true;
+
+send_current_mbox:
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	/* Release the mailbox command posting token */
+	phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+	/* Setting active mailbox pointer need to be in sync to flag clear */
+	phba->sli.mbox_active = NULL;
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	/* Wake up worker thread to post the next pending mailbox command */
+	lpfc_worker_wake_up(phba);
+out_no_mqe_complete:
+	if (bf_get(lpfc_trailer_consumed, mcqe))
+		lpfc_sli4_mq_release(phba->sli4_hba.mbx_wq);
+	return workposted;
+}
+
+/**
+ * lpfc_sli4_sp_handle_mcqe - Process a mailbox completion queue entry
+ * @phba: Pointer to HBA context object.
+ * @cqe: Pointer to mailbox completion queue entry.
+ *
+ * This routine process a mailbox completion queue entry, it invokes the
+ * proper mailbox complete handling or asynchrous event handling routine
+ * according to the MCQE's async bit.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
+ **/
+static bool
+lpfc_sli4_sp_handle_mcqe(struct lpfc_hba *phba, struct lpfc_cqe *cqe)
+{
+	struct lpfc_mcqe mcqe;
+	bool workposted;
+
+	/* Copy the mailbox MCQE and convert endian order as needed */
+	lpfc_sli_pcimem_bcopy(cqe, &mcqe, sizeof(struct lpfc_mcqe));
+
+	/* Invoke the proper event handling routine */
+	if (!bf_get(lpfc_trailer_async, &mcqe))
+		workposted = lpfc_sli4_sp_handle_mbox_event(phba, &mcqe);
+	else
+		workposted = lpfc_sli4_sp_handle_async_event(phba, &mcqe);
+	return workposted;
+}
+
+/**
+ * lpfc_sli4_sp_handle_els_wcqe - Handle els work-queue completion event
+ * @phba: Pointer to HBA context object.
+ * @wcqe: Pointer to work-queue completion queue entry.
+ *
+ * This routine handles an ELS work-queue completion event.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
+ **/
+static bool
+lpfc_sli4_sp_handle_els_wcqe(struct lpfc_hba *phba,
+			     struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_iocbq *cmdiocbq;
+	struct lpfc_iocbq *irspiocbq;
+	unsigned long iflags;
+	bool workposted = false;
+
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	pring->stats.iocb_event++;
+	/* Look up the ELS command IOCB and create pseudo response IOCB */
+	cmdiocbq = lpfc_sli_iocbq_lookup_by_tag(phba, pring,
+				bf_get(lpfc_wcqe_c_request_tag, wcqe));
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+
+	if (unlikely(!cmdiocbq)) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+				"0386 ELS complete with no corresponding "
+				"cmdiocb: iotag (%d)\n",
+				bf_get(lpfc_wcqe_c_request_tag, wcqe));
+		return workposted;
+	}
+
+	/* Fake the irspiocbq and copy necessary response information */
+	irspiocbq = lpfc_sli_get_iocbq(phba);
+	if (!irspiocbq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0387 Failed to allocate an iocbq\n");
+		return workposted;
+	}
+	lpfc_sli4_iocb_param_transfer(irspiocbq, cmdiocbq, wcqe);
+
+	/* Add the irspiocb to the response IOCB work list */
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	list_add_tail(&irspiocbq->list, &phba->sli4_hba.sp_rspiocb_work_queue);
+	/* Indicate ELS ring attention */
+	phba->work_ha |= (HA_R0ATT << (4*LPFC_ELS_RING));
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	workposted = true;
+
+	return workposted;
+}
+
+/**
+ * lpfc_sli4_sp_handle_rel_wcqe - Handle slow-path WQ entry consumed event
+ * @phba: Pointer to HBA context object.
+ * @wcqe: Pointer to work-queue completion queue entry.
+ *
+ * This routine handles slow-path WQ entry comsumed event by invoking the
+ * proper WQ release routine to the slow-path WQ.
+ **/
+static void
+lpfc_sli4_sp_handle_rel_wcqe(struct lpfc_hba *phba,
+			     struct lpfc_wcqe_release *wcqe)
+{
+	/* Check for the slow-path ELS work queue */
+	if (bf_get(lpfc_wcqe_r_wq_id, wcqe) == phba->sli4_hba.els_wq->queue_id)
+		lpfc_sli4_wq_release(phba->sli4_hba.els_wq,
+				     bf_get(lpfc_wcqe_r_wqe_index, wcqe));
+	else
+		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+				"2579 Slow-path wqe consume event carries "
+				"miss-matched qid: wcqe-qid=x%x, sp-qid=x%x\n",
+				bf_get(lpfc_wcqe_r_wqe_index, wcqe),
+				phba->sli4_hba.els_wq->queue_id);
+}
+
+/**
+ * lpfc_sli4_sp_handle_abort_xri_wcqe - Handle a xri abort event
+ * @phba: Pointer to HBA context object.
+ * @cq: Pointer to a WQ completion queue.
+ * @wcqe: Pointer to work-queue completion queue entry.
+ *
+ * This routine handles an XRI abort event.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
+ **/
+static bool
+lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
+				   struct lpfc_queue *cq,
+				   struct sli4_wcqe_xri_aborted *wcqe)
+{
+	bool workposted = false;
+	struct lpfc_cq_event *cq_event;
+	unsigned long iflags;
+
+	/* Allocate a new internal CQ_EVENT entry */
+	cq_event = lpfc_sli4_cq_event_alloc(phba);
+	if (!cq_event) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0602 Failed to allocate CQ_EVENT entry\n");
+		return false;
+	}
+
+	/* Move the CQE into the proper xri abort event list */
+	memcpy(&cq_event->cqe, wcqe, sizeof(struct sli4_wcqe_xri_aborted));
+	switch (cq->subtype) {
+	case LPFC_FCP:
+		spin_lock_irqsave(&phba->hbalock, iflags);
+		list_add_tail(&cq_event->list,
+			      &phba->sli4_hba.sp_fcp_xri_aborted_work_queue);
+		/* Set the fcp xri abort event flag */
+		phba->hba_flag |= FCP_XRI_ABORT_EVENT;
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		workposted = true;
+		break;
+	case LPFC_ELS:
+		spin_lock_irqsave(&phba->hbalock, iflags);
+		list_add_tail(&cq_event->list,
+			      &phba->sli4_hba.sp_els_xri_aborted_work_queue);
+		/* Set the els xri abort event flag */
+		phba->hba_flag |= ELS_XRI_ABORT_EVENT;
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		workposted = true;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0603 Invalid work queue CQE subtype (x%x)\n",
+				cq->subtype);
+		workposted = false;
+		break;
+	}
+	return workposted;
+}
+
+/**
+ * lpfc_sli4_sp_handle_wcqe - Process a work-queue completion queue entry
+ * @phba: Pointer to HBA context object.
+ * @cq: Pointer to the completion queue.
+ * @wcqe: Pointer to a completion queue entry.
+ *
+ * This routine process a slow-path work-queue completion queue entry.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
+ **/
+static bool
+lpfc_sli4_sp_handle_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+			 struct lpfc_cqe *cqe)
+{
+	struct lpfc_wcqe_complete wcqe;
+	bool workposted = false;
+
+	/* Copy the work queue CQE and convert endian order if needed */
+	lpfc_sli_pcimem_bcopy(cqe, &wcqe, sizeof(struct lpfc_cqe));
+
+	/* Check and process for different type of WCQE and dispatch */
+	switch (bf_get(lpfc_wcqe_c_code, &wcqe)) {
+	case CQE_CODE_COMPL_WQE:
+		/* Process the WQ complete event */
+		workposted = lpfc_sli4_sp_handle_els_wcqe(phba,
+					(struct lpfc_wcqe_complete *)&wcqe);
+		break;
+	case CQE_CODE_RELEASE_WQE:
+		/* Process the WQ release event */
+		lpfc_sli4_sp_handle_rel_wcqe(phba,
+					(struct lpfc_wcqe_release *)&wcqe);
+		break;
+	case CQE_CODE_XRI_ABORTED:
+		/* Process the WQ XRI abort event */
+		workposted = lpfc_sli4_sp_handle_abort_xri_wcqe(phba, cq,
+					(struct sli4_wcqe_xri_aborted *)&wcqe);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0388 Not a valid WCQE code: x%x\n",
+				bf_get(lpfc_wcqe_c_code, &wcqe));
+		break;
+	}
+	return workposted;
+}
+
+/**
+ * lpfc_sli4_sp_handle_rcqe - Process a receive-queue completion queue entry
+ * @phba: Pointer to HBA context object.
+ * @rcqe: Pointer to receive-queue completion queue entry.
+ *
+ * This routine process a receive-queue completion queue entry.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
+ **/
+static bool
+lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_cqe *cqe)
+{
+	struct lpfc_rcqe rcqe;
+	bool workposted = false;
+	struct lpfc_queue *hrq = phba->sli4_hba.hdr_rq;
+	struct lpfc_queue *drq = phba->sli4_hba.dat_rq;
+	struct hbq_dmabuf *dma_buf;
+	uint32_t status;
+	unsigned long iflags;
+
+	/* Copy the receive queue CQE and convert endian order if needed */
+	lpfc_sli_pcimem_bcopy(cqe, &rcqe, sizeof(struct lpfc_rcqe));
+	lpfc_sli4_rq_release(hrq, drq);
+	if (bf_get(lpfc_rcqe_code, &rcqe) != CQE_CODE_RECEIVE)
+		goto out;
+	if (bf_get(lpfc_rcqe_rq_id, &rcqe) != hrq->queue_id)
+		goto out;
+
+	status = bf_get(lpfc_rcqe_status, &rcqe);
+	switch (status) {
+	case FC_STATUS_RQ_BUF_LEN_EXCEEDED:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2537 Receive Frame Truncated!!\n");
+	case FC_STATUS_RQ_SUCCESS:
+		spin_lock_irqsave(&phba->hbalock, iflags);
+		dma_buf = lpfc_sli_hbqbuf_get(&phba->hbqs[0].hbq_buffer_list);
+		if (!dma_buf) {
+			spin_unlock_irqrestore(&phba->hbalock, iflags);
+			goto out;
+		}
+		memcpy(&dma_buf->rcqe, &rcqe, sizeof(rcqe));
+		/* save off the frame for the word thread to process */
+		list_add_tail(&dma_buf->dbuf.list, &phba->rb_pend_list);
+		/* Frame received */
+		phba->hba_flag |= HBA_RECEIVE_BUFFER;
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		workposted = true;
+		break;
+	case FC_STATUS_INSUFF_BUF_NEED_BUF:
+	case FC_STATUS_INSUFF_BUF_FRM_DISC:
+		/* Post more buffers if possible */
+		spin_lock_irqsave(&phba->hbalock, iflags);
+		phba->hba_flag |= HBA_POST_RECEIVE_BUFFER;
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		workposted = true;
+		break;
+	}
+out:
+	return workposted;
+
+}
+
+/**
+ * lpfc_sli4_sp_handle_eqe - Process a slow-path event queue entry
+ * @phba: Pointer to HBA context object.
+ * @eqe: Pointer to fast-path event queue entry.
+ *
+ * This routine process a event queue entry from the slow-path event queue.
+ * It will check the MajorCode and MinorCode to determine this is for a
+ * completion event on a completion queue, if not, an error shall be logged
+ * and just return. Otherwise, it will get to the corresponding completion
+ * queue and process all the entries on that completion queue, rearm the
+ * completion queue, and then return.
+ *
+ **/
+static void
+lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe)
+{
+	struct lpfc_queue *cq = NULL, *childq, *speq;
+	struct lpfc_cqe *cqe;
+	bool workposted = false;
+	int ecount = 0;
+	uint16_t cqid;
+
+	if (bf_get(lpfc_eqe_major_code, eqe) != 0 ||
+	    bf_get(lpfc_eqe_minor_code, eqe) != 0) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0359 Not a valid slow-path completion "
+				"event: majorcode=x%x, minorcode=x%x\n",
+				bf_get(lpfc_eqe_major_code, eqe),
+				bf_get(lpfc_eqe_minor_code, eqe));
+		return;
+	}
+
+	/* Get the reference to the corresponding CQ */
+	cqid = bf_get(lpfc_eqe_resource_id, eqe);
+
+	/* Search for completion queue pointer matching this cqid */
+	speq = phba->sli4_hba.sp_eq;
+	list_for_each_entry(childq, &speq->child_list, list) {
+		if (childq->queue_id == cqid) {
+			cq = childq;
+			break;
+		}
+	}
+	if (unlikely(!cq)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0365 Slow-path CQ identifier (%d) does "
+				"not exist\n", cqid);
+		return;
+	}
+
+	/* Process all the entries to the CQ */
+	switch (cq->type) {
+	case LPFC_MCQ:
+		while ((cqe = lpfc_sli4_cq_get(cq))) {
+			workposted |= lpfc_sli4_sp_handle_mcqe(phba, cqe);
+			if (!(++ecount % LPFC_GET_QE_REL_INT))
+				lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+		}
+		break;
+	case LPFC_WCQ:
+		while ((cqe = lpfc_sli4_cq_get(cq))) {
+			workposted |= lpfc_sli4_sp_handle_wcqe(phba, cq, cqe);
+			if (!(++ecount % LPFC_GET_QE_REL_INT))
+				lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+		}
+		break;
+	case LPFC_RCQ:
+		while ((cqe = lpfc_sli4_cq_get(cq))) {
+			workposted |= lpfc_sli4_sp_handle_rcqe(phba, cqe);
+			if (!(++ecount % LPFC_GET_QE_REL_INT))
+				lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+		}
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0370 Invalid completion queue type (%d)\n",
+				cq->type);
+		return;
+	}
+
+	/* Catch the no cq entry condition, log an error */
+	if (unlikely(ecount == 0))
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0371 No entry from the CQ: identifier "
+				"(x%x), type (%d)\n", cq->queue_id, cq->type);
+
+	/* In any case, flash and re-arm the RCQ */
+	lpfc_sli4_cq_release(cq, LPFC_QUEUE_REARM);
+
+	/* wake up worker thread if there are works to be done */
+	if (workposted)
+		lpfc_worker_wake_up(phba);
+}
+
+/**
+ * lpfc_sli4_fp_handle_fcp_wcqe - Process fast-path work queue completion entry
+ * @eqe: Pointer to fast-path completion queue entry.
+ *
+ * This routine process a fast-path work queue completion entry from fast-path
+ * event queue for FCP command response completion.
+ **/
+static void
+lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba,
+			     struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_FCP_RING];
+	struct lpfc_iocbq *cmdiocbq;
+	struct lpfc_iocbq irspiocbq;
+	unsigned long iflags;
+
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	pring->stats.iocb_event++;
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+
+	/* Check for response status */
+	if (unlikely(bf_get(lpfc_wcqe_c_status, wcqe))) {
+		/* If resource errors reported from HBA, reduce queue
+		 * depth of the SCSI device.
+		 */
+		if ((bf_get(lpfc_wcqe_c_status, wcqe) ==
+		     IOSTAT_LOCAL_REJECT) &&
+		    (wcqe->parameter == IOERR_NO_RESOURCES)) {
+			phba->lpfc_rampdown_queue_depth(phba);
+		}
+		/* Log the error status */
+		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+				"0373 FCP complete error: status=x%x, "
+				"hw_status=x%x, total_data_specified=%d, "
+				"parameter=x%x, word3=x%x\n",
+				bf_get(lpfc_wcqe_c_status, wcqe),
+				bf_get(lpfc_wcqe_c_hw_status, wcqe),
+				wcqe->total_data_placed, wcqe->parameter,
+				wcqe->word3);
+	}
+
+	/* Look up the FCP command IOCB and create pseudo response IOCB */
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	cmdiocbq = lpfc_sli_iocbq_lookup_by_tag(phba, pring,
+				bf_get(lpfc_wcqe_c_request_tag, wcqe));
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	if (unlikely(!cmdiocbq)) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+				"0374 FCP complete with no corresponding "
+				"cmdiocb: iotag (%d)\n",
+				bf_get(lpfc_wcqe_c_request_tag, wcqe));
+		return;
+	}
+	if (unlikely(!cmdiocbq->iocb_cmpl)) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+				"0375 FCP cmdiocb not callback function "
+				"iotag: (%d)\n",
+				bf_get(lpfc_wcqe_c_request_tag, wcqe));
+		return;
+	}
+
+	/* Fake the irspiocb and copy necessary response information */
+	lpfc_sli4_iocb_param_transfer(&irspiocbq, cmdiocbq, wcqe);
+
+	/* Pass the cmd_iocb and the rsp state to the upper layer */
+	(cmdiocbq->iocb_cmpl)(phba, cmdiocbq, &irspiocbq);
+}
+
+/**
+ * lpfc_sli4_fp_handle_rel_wcqe - Handle fast-path WQ entry consumed event
+ * @phba: Pointer to HBA context object.
+ * @cq: Pointer to completion queue.
+ * @wcqe: Pointer to work-queue completion queue entry.
+ *
+ * This routine handles an fast-path WQ entry comsumed event by invoking the
+ * proper WQ release routine to the slow-path WQ.
+ **/
+static void
+lpfc_sli4_fp_handle_rel_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+			     struct lpfc_wcqe_release *wcqe)
+{
+	struct lpfc_queue *childwq;
+	bool wqid_matched = false;
+	uint16_t fcp_wqid;
+
+	/* Check for fast-path FCP work queue release */
+	fcp_wqid = bf_get(lpfc_wcqe_r_wq_id, wcqe);
+	list_for_each_entry(childwq, &cq->child_list, list) {
+		if (childwq->queue_id == fcp_wqid) {
+			lpfc_sli4_wq_release(childwq,
+					bf_get(lpfc_wcqe_r_wqe_index, wcqe));
+			wqid_matched = true;
+			break;
+		}
+	}
+	/* Report warning log message if no match found */
+	if (wqid_matched != true)
+		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+				"2580 Fast-path wqe consume event carries "
+				"miss-matched qid: wcqe-qid=x%x\n", fcp_wqid);
+}
+
+/**
+ * lpfc_sli4_fp_handle_wcqe - Process fast-path work queue completion entry
+ * @cq: Pointer to the completion queue.
+ * @eqe: Pointer to fast-path completion queue entry.
+ *
+ * This routine process a fast-path work queue completion entry from fast-path
+ * event queue for FCP command response completion.
+ **/
+static int
+lpfc_sli4_fp_handle_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+			 struct lpfc_cqe *cqe)
+{
+	struct lpfc_wcqe_release wcqe;
+	bool workposted = false;
+
+	/* Copy the work queue CQE and convert endian order if needed */
+	lpfc_sli_pcimem_bcopy(cqe, &wcqe, sizeof(struct lpfc_cqe));
+
+	/* Check and process for different type of WCQE and dispatch */
+	switch (bf_get(lpfc_wcqe_c_code, &wcqe)) {
+	case CQE_CODE_COMPL_WQE:
+		/* Process the WQ complete event */
+		lpfc_sli4_fp_handle_fcp_wcqe(phba,
+				(struct lpfc_wcqe_complete *)&wcqe);
+		break;
+	case CQE_CODE_RELEASE_WQE:
+		/* Process the WQ release event */
+		lpfc_sli4_fp_handle_rel_wcqe(phba, cq,
+				(struct lpfc_wcqe_release *)&wcqe);
+		break;
+	case CQE_CODE_XRI_ABORTED:
+		/* Process the WQ XRI abort event */
+		workposted = lpfc_sli4_sp_handle_abort_xri_wcqe(phba, cq,
+				(struct sli4_wcqe_xri_aborted *)&wcqe);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0144 Not a valid WCQE code: x%x\n",
+				bf_get(lpfc_wcqe_c_code, &wcqe));
+		break;
+	}
+	return workposted;
+}
+
+/**
+ * lpfc_sli4_fp_handle_eqe - Process a fast-path event queue entry
+ * @phba: Pointer to HBA context object.
+ * @eqe: Pointer to fast-path event queue entry.
+ *
+ * This routine process a event queue entry from the fast-path event queue.
+ * It will check the MajorCode and MinorCode to determine this is for a
+ * completion event on a completion queue, if not, an error shall be logged
+ * and just return. Otherwise, it will get to the corresponding completion
+ * queue and process all the entries on the completion queue, rearm the
+ * completion queue, and then return.
+ **/
+static void
+lpfc_sli4_fp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
+			uint32_t fcp_cqidx)
+{
+	struct lpfc_queue *cq;
+	struct lpfc_cqe *cqe;
+	bool workposted = false;
+	uint16_t cqid;
+	int ecount = 0;
+
+	if (unlikely(bf_get(lpfc_eqe_major_code, eqe) != 0) ||
+	    unlikely(bf_get(lpfc_eqe_minor_code, eqe) != 0)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0366 Not a valid fast-path completion "
+				"event: majorcode=x%x, minorcode=x%x\n",
+				bf_get(lpfc_eqe_major_code, eqe),
+				bf_get(lpfc_eqe_minor_code, eqe));
+		return;
+	}
+
+	cq = phba->sli4_hba.fcp_cq[fcp_cqidx];
+	if (unlikely(!cq)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0367 Fast-path completion queue does not "
+				"exist\n");
+		return;
+	}
+
+	/* Get the reference to the corresponding CQ */
+	cqid = bf_get(lpfc_eqe_resource_id, eqe);
+	if (unlikely(cqid != cq->queue_id)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0368 Miss-matched fast-path completion "
+				"queue identifier: eqcqid=%d, fcpcqid=%d\n",
+				cqid, cq->queue_id);
+		return;
+	}
+
+	/* Process all the entries to the CQ */
+	while ((cqe = lpfc_sli4_cq_get(cq))) {
+		workposted |= lpfc_sli4_fp_handle_wcqe(phba, cq, cqe);
+		if (!(++ecount % LPFC_GET_QE_REL_INT))
+			lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+	}
+
+	/* Catch the no cq entry condition */
+	if (unlikely(ecount == 0))
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0369 No entry from fast-path completion "
+				"queue fcpcqid=%d\n", cq->queue_id);
+
+	/* In any case, flash and re-arm the CQ */
+	lpfc_sli4_cq_release(cq, LPFC_QUEUE_REARM);
+
+	/* wake up worker thread if there are works to be done */
+	if (workposted)
+		lpfc_worker_wake_up(phba);
+}
+
+static void
+lpfc_sli4_eq_flush(struct lpfc_hba *phba, struct lpfc_queue *eq)
+{
+	struct lpfc_eqe *eqe;
+
+	/* walk all the EQ entries and drop on the floor */
+	while ((eqe = lpfc_sli4_eq_get(eq)))
+		;
+
+	/* Clear and re-arm the EQ */
+	lpfc_sli4_eq_release(eq, LPFC_QUEUE_REARM);
+}
+
+/**
+ * lpfc_sli4_sp_intr_handler - Slow-path interrupt handler to SLI-4 device
+ * @irq: Interrupt number.
+ * @dev_id: The device context pointer.
+ *
+ * This function is directly called from the PCI layer as an interrupt
+ * service routine when device with SLI-4 interface spec is enabled with
+ * MSI-X multi-message interrupt mode and there are slow-path events in
+ * the HBA. However, when the device is enabled with either MSI or Pin-IRQ
+ * interrupt mode, this function is called as part of the device-level
+ * interrupt handler. When the PCI slot is in error recovery or the HBA is
+ * undergoing initialization, the interrupt handler will not process the
+ * interrupt. The link attention and ELS ring attention events are handled
+ * by the worker thread. The interrupt handler signals the worker thread
+ * and returns for these events. This function is called without any lock
+ * held. It gets the hbalock to access and update SLI data structures.
+ *
+ * This function returns IRQ_HANDLED when interrupt is handled else it
+ * returns IRQ_NONE.
+ **/
+irqreturn_t
+lpfc_sli4_sp_intr_handler(int irq, void *dev_id)
+{
+	struct lpfc_hba *phba;
+	struct lpfc_queue *speq;
+	struct lpfc_eqe *eqe;
+	unsigned long iflag;
+	int ecount = 0;
+
+	/*
+	 * Get the driver's phba structure from the dev_id
+	 */
+	phba = (struct lpfc_hba *)dev_id;
+
+	if (unlikely(!phba))
+		return IRQ_NONE;
+
+	/* Get to the EQ struct associated with this vector */
+	speq = phba->sli4_hba.sp_eq;
+
+	/* Check device state for handling interrupt */
+	if (unlikely(lpfc_intr_state_check(phba))) {
+		/* Check again for link_state with lock held */
+		spin_lock_irqsave(&phba->hbalock, iflag);
+		if (phba->link_state < LPFC_LINK_DOWN)
+			/* Flush, clear interrupt, and rearm the EQ */
+			lpfc_sli4_eq_flush(phba, speq);
+		spin_unlock_irqrestore(&phba->hbalock, iflag);
+		return IRQ_NONE;
+	}
+
+	/*
+	 * Process all the event on FCP slow-path EQ
+	 */
+	while ((eqe = lpfc_sli4_eq_get(speq))) {
+		lpfc_sli4_sp_handle_eqe(phba, eqe);
+		if (!(++ecount % LPFC_GET_QE_REL_INT))
+			lpfc_sli4_eq_release(speq, LPFC_QUEUE_NOARM);
+	}
+
+	/* Always clear and re-arm the slow-path EQ */
+	lpfc_sli4_eq_release(speq, LPFC_QUEUE_REARM);
+
+	/* Catch the no cq entry condition */
+	if (unlikely(ecount == 0)) {
+		if (phba->intr_type == MSIX)
+			/* MSI-X treated interrupt served as no EQ share INT */
+			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+					"0357 MSI-X interrupt with no EQE\n");
+		else
+			/* Non MSI-X treated on interrupt as EQ share INT */
+			return IRQ_NONE;
+	}
+
+	return IRQ_HANDLED;
+} /* lpfc_sli4_sp_intr_handler */
+
+/**
+ * lpfc_sli4_fp_intr_handler - Fast-path interrupt handler to SLI-4 device
+ * @irq: Interrupt number.
+ * @dev_id: The device context pointer.
+ *
+ * This function is directly called from the PCI layer as an interrupt
+ * service routine when device with SLI-4 interface spec is enabled with
+ * MSI-X multi-message interrupt mode and there is a fast-path FCP IOCB
+ * ring event in the HBA. However, when the device is enabled with either
+ * MSI or Pin-IRQ interrupt mode, this function is called as part of the
+ * device-level interrupt handler. When the PCI slot is in error recovery
+ * or the HBA is undergoing initialization, the interrupt handler will not
+ * process the interrupt. The SCSI FCP fast-path ring event are handled in
+ * the intrrupt context. This function is called without any lock held.
+ * It gets the hbalock to access and update SLI data structures. Note that,
+ * the FCP EQ to FCP CQ are one-to-one map such that the FCP EQ index is
+ * equal to that of FCP CQ index.
+ *
+ * This function returns IRQ_HANDLED when interrupt is handled else it
+ * returns IRQ_NONE.
+ **/
+irqreturn_t
+lpfc_sli4_fp_intr_handler(int irq, void *dev_id)
+{
+	struct lpfc_hba *phba;
+	struct lpfc_fcp_eq_hdl *fcp_eq_hdl;
+	struct lpfc_queue *fpeq;
+	struct lpfc_eqe *eqe;
+	unsigned long iflag;
+	int ecount = 0;
+	uint32_t fcp_eqidx;
+
+	/* Get the driver's phba structure from the dev_id */
+	fcp_eq_hdl = (struct lpfc_fcp_eq_hdl *)dev_id;
+	phba = fcp_eq_hdl->phba;
+	fcp_eqidx = fcp_eq_hdl->idx;
+
+	if (unlikely(!phba))
+		return IRQ_NONE;
+
+	/* Get to the EQ struct associated with this vector */
+	fpeq = phba->sli4_hba.fp_eq[fcp_eqidx];
+
+	/* Check device state for handling interrupt */
+	if (unlikely(lpfc_intr_state_check(phba))) {
+		/* Check again for link_state with lock held */
+		spin_lock_irqsave(&phba->hbalock, iflag);
+		if (phba->link_state < LPFC_LINK_DOWN)
+			/* Flush, clear interrupt, and rearm the EQ */
+			lpfc_sli4_eq_flush(phba, fpeq);
+		spin_unlock_irqrestore(&phba->hbalock, iflag);
+		return IRQ_NONE;
+	}
+
+	/*
+	 * Process all the event on FCP fast-path EQ
+	 */
+	while ((eqe = lpfc_sli4_eq_get(fpeq))) {
+		lpfc_sli4_fp_handle_eqe(phba, eqe, fcp_eqidx);
+		if (!(++ecount % LPFC_GET_QE_REL_INT))
+			lpfc_sli4_eq_release(fpeq, LPFC_QUEUE_NOARM);
+	}
+
+	/* Always clear and re-arm the fast-path EQ */
+	lpfc_sli4_eq_release(fpeq, LPFC_QUEUE_REARM);
+
+	if (unlikely(ecount == 0)) {
+		if (phba->intr_type == MSIX)
+			/* MSI-X treated interrupt served as no EQ share INT */
+			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+					"0358 MSI-X interrupt with no EQE\n");
+		else
+			/* Non MSI-X treated on interrupt as EQ share INT */
+			return IRQ_NONE;
+	}
+
+	return IRQ_HANDLED;
+} /* lpfc_sli4_fp_intr_handler */
+
+/**
+ * lpfc_sli4_intr_handler - Device-level interrupt handler for SLI-4 device
+ * @irq: Interrupt number.
+ * @dev_id: The device context pointer.
+ *
+ * This function is the device-level interrupt handler to device with SLI-4
+ * interface spec, called from the PCI layer when either MSI or Pin-IRQ
+ * interrupt mode is enabled and there is an event in the HBA which requires
+ * driver attention. This function invokes the slow-path interrupt attention
+ * handling function and fast-path interrupt attention handling function in
+ * turn to process the relevant HBA attention events. This function is called
+ * without any lock held. It gets the hbalock to access and update SLI data
+ * structures.
+ *
+ * This function returns IRQ_HANDLED when interrupt is handled, else it
+ * returns IRQ_NONE.
+ **/
+irqreturn_t
+lpfc_sli4_intr_handler(int irq, void *dev_id)
+{
+	struct lpfc_hba  *phba;
+	irqreturn_t sp_irq_rc, fp_irq_rc;
+	bool fp_handled = false;
+	uint32_t fcp_eqidx;
+
+	/* Get the driver's phba structure from the dev_id */
+	phba = (struct lpfc_hba *)dev_id;
+
+	if (unlikely(!phba))
+		return IRQ_NONE;
+
+	/*
+	 * Invokes slow-path host attention interrupt handling as appropriate.
+	 */
+	sp_irq_rc = lpfc_sli4_sp_intr_handler(irq, dev_id);
+
+	/*
+	 * Invoke fast-path host attention interrupt handling as appropriate.
+	 */
+	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_eq_count; fcp_eqidx++) {
+		fp_irq_rc = lpfc_sli4_fp_intr_handler(irq,
+					&phba->sli4_hba.fcp_eq_hdl[fcp_eqidx]);
+		if (fp_irq_rc == IRQ_HANDLED)
+			fp_handled |= true;
+	}
+
+	return (fp_handled == true) ? IRQ_HANDLED : sp_irq_rc;
+} /* lpfc_sli4_intr_handler */
+
+/**
+ * lpfc_sli4_queue_free - free a queue structure and associated memory
+ * @queue: The queue structure to free.
+ *
+ * This function frees a queue structure and the DMAable memeory used for
+ * the host resident queue. This function must be called after destroying the
+ * queue on the HBA.
+ **/
+void
+lpfc_sli4_queue_free(struct lpfc_queue *queue)
+{
+	struct lpfc_dmabuf *dmabuf;
+
+	if (!queue)
+		return;
+
+	while (!list_empty(&queue->page_list)) {
+		list_remove_head(&queue->page_list, dmabuf, struct lpfc_dmabuf,
+				 list);
+		dma_free_coherent(&queue->phba->pcidev->dev, PAGE_SIZE,
+				  dmabuf->virt, dmabuf->phys);
+		kfree(dmabuf);
+	}
+	kfree(queue);
+	return;
+}
+
+/**
+ * lpfc_sli4_queue_alloc - Allocate and initialize a queue structure
+ * @phba: The HBA that this queue is being created on.
+ * @entry_size: The size of each queue entry for this queue.
+ * @entry count: The number of entries that this queue will handle.
+ *
+ * This function allocates a queue structure and the DMAable memory used for
+ * the host resident queue. This function must be called before creating the
+ * queue on the HBA.
+ **/
+struct lpfc_queue *
+lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t entry_size,
+		      uint32_t entry_count)
+{
+	struct lpfc_queue *queue;
+	struct lpfc_dmabuf *dmabuf;
+	int x, total_qe_count;
+	void *dma_pointer;
+
+
+	queue = kzalloc(sizeof(struct lpfc_queue) +
+			(sizeof(union sli4_qe) * entry_count), GFP_KERNEL);
+	if (!queue)
+		return NULL;
+	queue->page_count = (PAGE_ALIGN(entry_size * entry_count))/PAGE_SIZE;
+	INIT_LIST_HEAD(&queue->list);
+	INIT_LIST_HEAD(&queue->page_list);
+	INIT_LIST_HEAD(&queue->child_list);
+	for (x = 0, total_qe_count = 0; x < queue->page_count; x++) {
+		dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+		if (!dmabuf)
+			goto out_fail;
+		dmabuf->virt = dma_alloc_coherent(&phba->pcidev->dev,
+						  PAGE_SIZE, &dmabuf->phys,
+						  GFP_KERNEL);
+		if (!dmabuf->virt) {
+			kfree(dmabuf);
+			goto out_fail;
+		}
+		dmabuf->buffer_tag = x;
+		list_add_tail(&dmabuf->list, &queue->page_list);
+		/* initialize queue's entry array */
+		dma_pointer = dmabuf->virt;
+		for (; total_qe_count < entry_count &&
+		     dma_pointer < (PAGE_SIZE + dmabuf->virt);
+		     total_qe_count++, dma_pointer += entry_size) {
+			queue->qe[total_qe_count].address = dma_pointer;
+		}
+	}
+	queue->entry_size = entry_size;
+	queue->entry_count = entry_count;
+	queue->phba = phba;
+
+	return queue;
+out_fail:
+	lpfc_sli4_queue_free(queue);
+	return NULL;
+}
+
+/**
+ * lpfc_eq_create - Create an Event Queue on the HBA
+ * @phba: HBA structure that indicates port to create a queue on.
+ * @eq: The queue structure to use to create the event queue.
+ * @imax: The maximum interrupt per second limit.
+ *
+ * This function creates an event queue, as detailed in @eq, on a port,
+ * described by @phba by sending an EQ_CREATE mailbox command to the HBA.
+ *
+ * The @phba struct is used to send mailbox command to HBA. The @eq struct
+ * is used to get the entry count and entry size that are necessary to
+ * determine the number of pages to allocate and use for this queue. This
+ * function will send the EQ_CREATE mailbox command to the HBA to setup the
+ * event queue. This function is asynchronous and will wait for the mailbox
+ * command to finish before continuing.
+ *
+ * On success this function will return a zero. If unable to allocate enough
+ * memory this function will return ENOMEM. If the queue create mailbox command
+ * fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint16_t imax)
+{
+	struct lpfc_mbx_eq_create *eq_create;
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	struct lpfc_dmabuf *dmabuf;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+	uint16_t dmult;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_eq_create) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_EQ_CREATE,
+			 length, LPFC_SLI4_MBX_EMBED);
+	eq_create = &mbox->u.mqe.un.eq_create;
+	bf_set(lpfc_mbx_eq_create_num_pages, &eq_create->u.request,
+	       eq->page_count);
+	bf_set(lpfc_eq_context_size, &eq_create->u.request.context,
+	       LPFC_EQE_SIZE);
+	bf_set(lpfc_eq_context_valid, &eq_create->u.request.context, 1);
+	/* Calculate delay multiper from maximum interrupt per second */
+	dmult = LPFC_DMULT_CONST/imax - 1;
+	bf_set(lpfc_eq_context_delay_multi, &eq_create->u.request.context,
+	       dmult);
+	switch (eq->entry_count) {
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0360 Unsupported EQ count. (%d)\n",
+				eq->entry_count);
+		if (eq->entry_count < 256)
+			return -EINVAL;
+		/* otherwise default to smallest count (drop through) */
+	case 256:
+		bf_set(lpfc_eq_context_count, &eq_create->u.request.context,
+		       LPFC_EQ_CNT_256);
+		break;
+	case 512:
+		bf_set(lpfc_eq_context_count, &eq_create->u.request.context,
+		       LPFC_EQ_CNT_512);
+		break;
+	case 1024:
+		bf_set(lpfc_eq_context_count, &eq_create->u.request.context,
+		       LPFC_EQ_CNT_1024);
+		break;
+	case 2048:
+		bf_set(lpfc_eq_context_count, &eq_create->u.request.context,
+		       LPFC_EQ_CNT_2048);
+		break;
+	case 4096:
+		bf_set(lpfc_eq_context_count, &eq_create->u.request.context,
+		       LPFC_EQ_CNT_4096);
+		break;
+	}
+	list_for_each_entry(dmabuf, &eq->page_list, list) {
+		eq_create->u.request.page[dmabuf->buffer_tag].addr_lo =
+					putPaddrLow(dmabuf->phys);
+		eq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+	}
+	mbox->vport = phba->pport;
+	mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+	mbox->context1 = NULL;
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	shdr = (union lpfc_sli4_cfg_shdr *) &eq_create->header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2500 EQ_CREATE mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+	}
+	eq->type = LPFC_EQ;
+	eq->subtype = LPFC_NONE;
+	eq->queue_id = bf_get(lpfc_mbx_eq_create_q_id, &eq_create->u.response);
+	if (eq->queue_id == 0xFFFF)
+		status = -ENXIO;
+	eq->host_index = 0;
+	eq->hba_index = 0;
+
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_cq_create - Create a Completion Queue on the HBA
+ * @phba: HBA structure that indicates port to create a queue on.
+ * @cq: The queue structure to use to create the completion queue.
+ * @eq: The event queue to bind this completion queue to.
+ *
+ * This function creates a completion queue, as detailed in @wq, on a port,
+ * described by @phba by sending a CQ_CREATE mailbox command to the HBA.
+ *
+ * The @phba struct is used to send mailbox command to HBA. The @cq struct
+ * is used to get the entry count and entry size that are necessary to
+ * determine the number of pages to allocate and use for this queue. The @eq
+ * is used to indicate which event queue to bind this completion queue to. This
+ * function will send the CQ_CREATE mailbox command to the HBA to setup the
+ * completion queue. This function is asynchronous and will wait for the mailbox
+ * command to finish before continuing.
+ *
+ * On success this function will return a zero. If unable to allocate enough
+ * memory this function will return ENOMEM. If the queue create mailbox command
+ * fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
+	       struct lpfc_queue *eq, uint32_t type, uint32_t subtype)
+{
+	struct lpfc_mbx_cq_create *cq_create;
+	struct lpfc_dmabuf *dmabuf;
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_cq_create) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_CQ_CREATE,
+			 length, LPFC_SLI4_MBX_EMBED);
+	cq_create = &mbox->u.mqe.un.cq_create;
+	bf_set(lpfc_mbx_cq_create_num_pages, &cq_create->u.request,
+		    cq->page_count);
+	bf_set(lpfc_cq_context_event, &cq_create->u.request.context, 1);
+	bf_set(lpfc_cq_context_valid, &cq_create->u.request.context, 1);
+	bf_set(lpfc_cq_eq_id, &cq_create->u.request.context, eq->queue_id);
+	switch (cq->entry_count) {
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0361 Unsupported CQ count. (%d)\n",
+				cq->entry_count);
+		if (cq->entry_count < 256)
+			return -EINVAL;
+		/* otherwise default to smallest count (drop through) */
+	case 256:
+		bf_set(lpfc_cq_context_count, &cq_create->u.request.context,
+		       LPFC_CQ_CNT_256);
+		break;
+	case 512:
+		bf_set(lpfc_cq_context_count, &cq_create->u.request.context,
+		       LPFC_CQ_CNT_512);
+		break;
+	case 1024:
+		bf_set(lpfc_cq_context_count, &cq_create->u.request.context,
+		       LPFC_CQ_CNT_1024);
+		break;
+	}
+	list_for_each_entry(dmabuf, &cq->page_list, list) {
+		cq_create->u.request.page[dmabuf->buffer_tag].addr_lo =
+					putPaddrLow(dmabuf->phys);
+		cq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+	}
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *) &cq_create->header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2501 CQ_CREATE mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+		goto out;
+	}
+	cq->queue_id = bf_get(lpfc_mbx_cq_create_q_id, &cq_create->u.response);
+	if (cq->queue_id == 0xFFFF) {
+		status = -ENXIO;
+		goto out;
+	}
+	/* link the cq onto the parent eq child list */
+	list_add_tail(&cq->list, &eq->child_list);
+	/* Set up completion queue's type and subtype */
+	cq->type = type;
+	cq->subtype = subtype;
+	cq->queue_id = bf_get(lpfc_mbx_cq_create_q_id, &cq_create->u.response);
+	cq->host_index = 0;
+	cq->hba_index = 0;
+out:
+
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_mq_create - Create a mailbox Queue on the HBA
+ * @phba: HBA structure that indicates port to create a queue on.
+ * @mq: The queue structure to use to create the mailbox queue.
+ *
+ * This function creates a mailbox queue, as detailed in @mq, on a port,
+ * described by @phba by sending a MQ_CREATE mailbox command to the HBA.
+ *
+ * The @phba struct is used to send mailbox command to HBA. The @cq struct
+ * is used to get the entry count and entry size that are necessary to
+ * determine the number of pages to allocate and use for this queue. This
+ * function will send the MQ_CREATE mailbox command to the HBA to setup the
+ * mailbox queue. This function is asynchronous and will wait for the mailbox
+ * command to finish before continuing.
+ *
+ * On success this function will return a zero. If unable to allocate enough
+ * memory this function will return ENOMEM. If the queue create mailbox command
+ * fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_mq_create(struct lpfc_hba *phba, struct lpfc_queue *mq,
+	       struct lpfc_queue *cq, uint32_t subtype)
+{
+	struct lpfc_mbx_mq_create *mq_create;
+	struct lpfc_dmabuf *dmabuf;
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_mq_create) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_MQ_CREATE,
+			 length, LPFC_SLI4_MBX_EMBED);
+	mq_create = &mbox->u.mqe.un.mq_create;
+	bf_set(lpfc_mbx_mq_create_num_pages, &mq_create->u.request,
+		    mq->page_count);
+	bf_set(lpfc_mq_context_cq_id, &mq_create->u.request.context,
+		    cq->queue_id);
+	bf_set(lpfc_mq_context_valid, &mq_create->u.request.context, 1);
+	switch (mq->entry_count) {
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0362 Unsupported MQ count. (%d)\n",
+				mq->entry_count);
+		if (mq->entry_count < 16)
+			return -EINVAL;
+		/* otherwise default to smallest count (drop through) */
+	case 16:
+		bf_set(lpfc_mq_context_count, &mq_create->u.request.context,
+		       LPFC_MQ_CNT_16);
+		break;
+	case 32:
+		bf_set(lpfc_mq_context_count, &mq_create->u.request.context,
+		       LPFC_MQ_CNT_32);
+		break;
+	case 64:
+		bf_set(lpfc_mq_context_count, &mq_create->u.request.context,
+		       LPFC_MQ_CNT_64);
+		break;
+	case 128:
+		bf_set(lpfc_mq_context_count, &mq_create->u.request.context,
+		       LPFC_MQ_CNT_128);
+		break;
+	}
+	list_for_each_entry(dmabuf, &mq->page_list, list) {
+		mq_create->u.request.page[dmabuf->buffer_tag].addr_lo =
+					putPaddrLow(dmabuf->phys);
+		mq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+	}
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *) &mq_create->header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2502 MQ_CREATE mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+		goto out;
+	}
+	mq->queue_id = bf_get(lpfc_mbx_mq_create_q_id, &mq_create->u.response);
+	if (mq->queue_id == 0xFFFF) {
+		status = -ENXIO;
+		goto out;
+	}
+	mq->type = LPFC_MQ;
+	mq->subtype = subtype;
+	mq->host_index = 0;
+	mq->hba_index = 0;
+
+	/* link the mq onto the parent cq child list */
+	list_add_tail(&mq->list, &cq->child_list);
+out:
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_wq_create - Create a Work Queue on the HBA
+ * @phba: HBA structure that indicates port to create a queue on.
+ * @wq: The queue structure to use to create the work queue.
+ * @cq: The completion queue to bind this work queue to.
+ * @subtype: The subtype of the work queue indicating its functionality.
+ *
+ * This function creates a work queue, as detailed in @wq, on a port, described
+ * by @phba by sending a WQ_CREATE mailbox command to the HBA.
+ *
+ * The @phba struct is used to send mailbox command to HBA. The @wq struct
+ * is used to get the entry count and entry size that are necessary to
+ * determine the number of pages to allocate and use for this queue. The @cq
+ * is used to indicate which completion queue to bind this work queue to. This
+ * function will send the WQ_CREATE mailbox command to the HBA to setup the
+ * work queue. This function is asynchronous and will wait for the mailbox
+ * command to finish before continuing.
+ *
+ * On success this function will return a zero. If unable to allocate enough
+ * memory this function will return ENOMEM. If the queue create mailbox command
+ * fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
+	       struct lpfc_queue *cq, uint32_t subtype)
+{
+	struct lpfc_mbx_wq_create *wq_create;
+	struct lpfc_dmabuf *dmabuf;
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_wq_create) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			 LPFC_MBOX_OPCODE_FCOE_WQ_CREATE,
+			 length, LPFC_SLI4_MBX_EMBED);
+	wq_create = &mbox->u.mqe.un.wq_create;
+	bf_set(lpfc_mbx_wq_create_num_pages, &wq_create->u.request,
+		    wq->page_count);
+	bf_set(lpfc_mbx_wq_create_cq_id, &wq_create->u.request,
+		    cq->queue_id);
+	list_for_each_entry(dmabuf, &wq->page_list, list) {
+		wq_create->u.request.page[dmabuf->buffer_tag].addr_lo =
+					putPaddrLow(dmabuf->phys);
+		wq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+	}
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *) &wq_create->header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2503 WQ_CREATE mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+		goto out;
+	}
+	wq->queue_id = bf_get(lpfc_mbx_wq_create_q_id, &wq_create->u.response);
+	if (wq->queue_id == 0xFFFF) {
+		status = -ENXIO;
+		goto out;
+	}
+	wq->type = LPFC_WQ;
+	wq->subtype = subtype;
+	wq->host_index = 0;
+	wq->hba_index = 0;
+
+	/* link the wq onto the parent cq child list */
+	list_add_tail(&wq->list, &cq->child_list);
+out:
+	if (rc == MBX_TIMEOUT)
+		mempool_free(mbox, phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_rq_create - Create a Receive Queue on the HBA
+ * @phba: HBA structure that indicates port to create a queue on.
+ * @hrq: The queue structure to use to create the header receive queue.
+ * @drq: The queue structure to use to create the data receive queue.
+ * @cq: The completion queue to bind this work queue to.
+ *
+ * This function creates a receive buffer queue pair , as detailed in @hrq and
+ * @drq, on a port, described by @phba by sending a RQ_CREATE mailbox command
+ * to the HBA.
+ *
+ * The @phba struct is used to send mailbox command to HBA. The @drq and @hrq
+ * struct is used to get the entry count that is necessary to determine the
+ * number of pages to use for this queue. The @cq is used to indicate which
+ * completion queue to bind received buffers that are posted to these queues to.
+ * This function will send the RQ_CREATE mailbox command to the HBA to setup the
+ * receive queue pair. This function is asynchronous and will wait for the
+ * mailbox command to finish before continuing.
+ *
+ * On success this function will return a zero. If unable to allocate enough
+ * memory this function will return ENOMEM. If the queue create mailbox command
+ * fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
+	       struct lpfc_queue *drq, struct lpfc_queue *cq, uint32_t subtype)
+{
+	struct lpfc_mbx_rq_create *rq_create;
+	struct lpfc_dmabuf *dmabuf;
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	if (hrq->entry_count != drq->entry_count)
+		return -EINVAL;
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_rq_create) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			 LPFC_MBOX_OPCODE_FCOE_RQ_CREATE,
+			 length, LPFC_SLI4_MBX_EMBED);
+	rq_create = &mbox->u.mqe.un.rq_create;
+	switch (hrq->entry_count) {
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2535 Unsupported RQ count. (%d)\n",
+				hrq->entry_count);
+		if (hrq->entry_count < 512)
+			return -EINVAL;
+		/* otherwise default to smallest count (drop through) */
+	case 512:
+		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
+		       LPFC_RQ_RING_SIZE_512);
+		break;
+	case 1024:
+		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
+		       LPFC_RQ_RING_SIZE_1024);
+		break;
+	case 2048:
+		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
+		       LPFC_RQ_RING_SIZE_2048);
+		break;
+	case 4096:
+		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
+		       LPFC_RQ_RING_SIZE_4096);
+		break;
+	}
+	bf_set(lpfc_rq_context_cq_id, &rq_create->u.request.context,
+	       cq->queue_id);
+	bf_set(lpfc_mbx_rq_create_num_pages, &rq_create->u.request,
+	       hrq->page_count);
+	bf_set(lpfc_rq_context_buf_size, &rq_create->u.request.context,
+	       LPFC_HDR_BUF_SIZE);
+	list_for_each_entry(dmabuf, &hrq->page_list, list) {
+		rq_create->u.request.page[dmabuf->buffer_tag].addr_lo =
+					putPaddrLow(dmabuf->phys);
+		rq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+	}
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *) &rq_create->header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2504 RQ_CREATE mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+		goto out;
+	}
+	hrq->queue_id = bf_get(lpfc_mbx_rq_create_q_id, &rq_create->u.response);
+	if (hrq->queue_id == 0xFFFF) {
+		status = -ENXIO;
+		goto out;
+	}
+	hrq->type = LPFC_HRQ;
+	hrq->subtype = subtype;
+	hrq->host_index = 0;
+	hrq->hba_index = 0;
+
+	/* now create the data queue */
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			 LPFC_MBOX_OPCODE_FCOE_RQ_CREATE,
+			 length, LPFC_SLI4_MBX_EMBED);
+	switch (drq->entry_count) {
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2536 Unsupported RQ count. (%d)\n",
+				drq->entry_count);
+		if (drq->entry_count < 512)
+			return -EINVAL;
+		/* otherwise default to smallest count (drop through) */
+	case 512:
+		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
+		       LPFC_RQ_RING_SIZE_512);
+		break;
+	case 1024:
+		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
+		       LPFC_RQ_RING_SIZE_1024);
+		break;
+	case 2048:
+		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
+		       LPFC_RQ_RING_SIZE_2048);
+		break;
+	case 4096:
+		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
+		       LPFC_RQ_RING_SIZE_4096);
+		break;
+	}
+	bf_set(lpfc_rq_context_cq_id, &rq_create->u.request.context,
+	       cq->queue_id);
+	bf_set(lpfc_mbx_rq_create_num_pages, &rq_create->u.request,
+	       drq->page_count);
+	bf_set(lpfc_rq_context_buf_size, &rq_create->u.request.context,
+	       LPFC_DATA_BUF_SIZE);
+	list_for_each_entry(dmabuf, &drq->page_list, list) {
+		rq_create->u.request.page[dmabuf->buffer_tag].addr_lo =
+					putPaddrLow(dmabuf->phys);
+		rq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+	}
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *) &rq_create->header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		status = -ENXIO;
+		goto out;
+	}
+	drq->queue_id = bf_get(lpfc_mbx_rq_create_q_id, &rq_create->u.response);
+	if (drq->queue_id == 0xFFFF) {
+		status = -ENXIO;
+		goto out;
+	}
+	drq->type = LPFC_DRQ;
+	drq->subtype = subtype;
+	drq->host_index = 0;
+	drq->hba_index = 0;
+
+	/* link the header and data RQs onto the parent cq child list */
+	list_add_tail(&hrq->list, &cq->child_list);
+	list_add_tail(&drq->list, &cq->child_list);
+
+out:
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_eq_destroy - Destroy an event Queue on the HBA
+ * @eq: The queue structure associated with the queue to destroy.
+ *
+ * This function destroys a queue, as detailed in @eq by sending an mailbox
+ * command, specific to the type of queue, to the HBA.
+ *
+ * The @eq struct is used to get the queue ID of the queue to destroy.
+ *
+ * On success this function will return a zero. If the queue destroy mailbox
+ * command fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_eq_destroy(struct lpfc_hba *phba, struct lpfc_queue *eq)
+{
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	if (!eq)
+		return -ENODEV;
+	mbox = mempool_alloc(eq->phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_eq_destroy) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_EQ_DESTROY,
+			 length, LPFC_SLI4_MBX_EMBED);
+	bf_set(lpfc_mbx_eq_destroy_q_id, &mbox->u.mqe.un.eq_destroy.u.request,
+	       eq->queue_id);
+	mbox->vport = eq->phba->pport;
+	mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+
+	rc = lpfc_sli_issue_mbox(eq->phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mbox->u.mqe.un.eq_destroy.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2505 EQ_DESTROY mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+	}
+
+	/* Remove eq from any list */
+	list_del_init(&eq->list);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, eq->phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_cq_destroy - Destroy a Completion Queue on the HBA
+ * @cq: The queue structure associated with the queue to destroy.
+ *
+ * This function destroys a queue, as detailed in @cq by sending an mailbox
+ * command, specific to the type of queue, to the HBA.
+ *
+ * The @cq struct is used to get the queue ID of the queue to destroy.
+ *
+ * On success this function will return a zero. If the queue destroy mailbox
+ * command fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_cq_destroy(struct lpfc_hba *phba, struct lpfc_queue *cq)
+{
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	if (!cq)
+		return -ENODEV;
+	mbox = mempool_alloc(cq->phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_cq_destroy) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_CQ_DESTROY,
+			 length, LPFC_SLI4_MBX_EMBED);
+	bf_set(lpfc_mbx_cq_destroy_q_id, &mbox->u.mqe.un.cq_destroy.u.request,
+	       cq->queue_id);
+	mbox->vport = cq->phba->pport;
+	mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+	rc = lpfc_sli_issue_mbox(cq->phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mbox->u.mqe.un.wq_create.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2506 CQ_DESTROY mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+	}
+	/* Remove cq from any list */
+	list_del_init(&cq->list);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, cq->phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_mq_destroy - Destroy a Mailbox Queue on the HBA
+ * @qm: The queue structure associated with the queue to destroy.
+ *
+ * This function destroys a queue, as detailed in @mq by sending an mailbox
+ * command, specific to the type of queue, to the HBA.
+ *
+ * The @mq struct is used to get the queue ID of the queue to destroy.
+ *
+ * On success this function will return a zero. If the queue destroy mailbox
+ * command fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_mq_destroy(struct lpfc_hba *phba, struct lpfc_queue *mq)
+{
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	if (!mq)
+		return -ENODEV;
+	mbox = mempool_alloc(mq->phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_mq_destroy) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_MQ_DESTROY,
+			 length, LPFC_SLI4_MBX_EMBED);
+	bf_set(lpfc_mbx_mq_destroy_q_id, &mbox->u.mqe.un.mq_destroy.u.request,
+	       mq->queue_id);
+	mbox->vport = mq->phba->pport;
+	mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+	rc = lpfc_sli_issue_mbox(mq->phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mbox->u.mqe.un.mq_destroy.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2507 MQ_DESTROY mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+	}
+	/* Remove mq from any list */
+	list_del_init(&mq->list);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, mq->phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_wq_destroy - Destroy a Work Queue on the HBA
+ * @wq: The queue structure associated with the queue to destroy.
+ *
+ * This function destroys a queue, as detailed in @wq by sending an mailbox
+ * command, specific to the type of queue, to the HBA.
+ *
+ * The @wq struct is used to get the queue ID of the queue to destroy.
+ *
+ * On success this function will return a zero. If the queue destroy mailbox
+ * command fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_wq_destroy(struct lpfc_hba *phba, struct lpfc_queue *wq)
+{
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	if (!wq)
+		return -ENODEV;
+	mbox = mempool_alloc(wq->phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_wq_destroy) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			 LPFC_MBOX_OPCODE_FCOE_WQ_DESTROY,
+			 length, LPFC_SLI4_MBX_EMBED);
+	bf_set(lpfc_mbx_wq_destroy_q_id, &mbox->u.mqe.un.wq_destroy.u.request,
+	       wq->queue_id);
+	mbox->vport = wq->phba->pport;
+	mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+	rc = lpfc_sli_issue_mbox(wq->phba, mbox, MBX_POLL);
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mbox->u.mqe.un.wq_destroy.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2508 WQ_DESTROY mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+	}
+	/* Remove wq from any list */
+	list_del_init(&wq->list);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, wq->phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_rq_destroy - Destroy a Receive Queue on the HBA
+ * @rq: The queue structure associated with the queue to destroy.
+ *
+ * This function destroys a queue, as detailed in @rq by sending an mailbox
+ * command, specific to the type of queue, to the HBA.
+ *
+ * The @rq struct is used to get the queue ID of the queue to destroy.
+ *
+ * On success this function will return a zero. If the queue destroy mailbox
+ * command fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_rq_destroy(struct lpfc_hba *phba, struct lpfc_queue *hrq,
+		struct lpfc_queue *drq)
+{
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	if (!hrq || !drq)
+		return -ENODEV;
+	mbox = mempool_alloc(hrq->phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_rq_destroy) -
+		  sizeof(struct mbox_header));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			 LPFC_MBOX_OPCODE_FCOE_RQ_DESTROY,
+			 length, LPFC_SLI4_MBX_EMBED);
+	bf_set(lpfc_mbx_rq_destroy_q_id, &mbox->u.mqe.un.rq_destroy.u.request,
+	       hrq->queue_id);
+	mbox->vport = hrq->phba->pport;
+	mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+	rc = lpfc_sli_issue_mbox(hrq->phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mbox->u.mqe.un.rq_destroy.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2509 RQ_DESTROY mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		if (rc != MBX_TIMEOUT)
+			mempool_free(mbox, hrq->phba->mbox_mem_pool);
+		return -ENXIO;
+	}
+	bf_set(lpfc_mbx_rq_destroy_q_id, &mbox->u.mqe.un.rq_destroy.u.request,
+	       drq->queue_id);
+	rc = lpfc_sli_issue_mbox(drq->phba, mbox, MBX_POLL);
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mbox->u.mqe.un.rq_destroy.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2510 RQ_DESTROY mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+	}
+	list_del_init(&hrq->list);
+	list_del_init(&drq->list);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, hrq->phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_sli4_post_sgl - Post scatter gather list for an XRI to HBA
+ * @phba: The virtual port for which this call being executed.
+ * @pdma_phys_addr0: Physical address of the 1st SGL page.
+ * @pdma_phys_addr1: Physical address of the 2nd SGL page.
+ * @xritag: the xritag that ties this io to the SGL pages.
+ *
+ * This routine will post the sgl pages for the IO that has the xritag
+ * that is in the iocbq structure. The xritag is assigned during iocbq
+ * creation and persists for as long as the driver is loaded.
+ * if the caller has fewer than 256 scatter gather segments to map then
+ * pdma_phys_addr1 should be 0.
+ * If the caller needs to map more than 256 scatter gather segment then
+ * pdma_phys_addr1 should be a valid physical address.
+ * physical address for SGLs must be 64 byte aligned.
+ * If you are going to map 2 SGL's then the first one must have 256 entries
+ * the second sgl can have between 1 and 256 entries.
+ *
+ * Return codes:
+ * 	0 - Success
+ * 	-ENXIO, -ENOMEM - Failure
+ **/
+int
+lpfc_sli4_post_sgl(struct lpfc_hba *phba,
+		dma_addr_t pdma_phys_addr0,
+		dma_addr_t pdma_phys_addr1,
+		uint16_t xritag)
+{
+	struct lpfc_mbx_post_sgl_pages *post_sgl_pages;
+	LPFC_MBOXQ_t *mbox;
+	int rc;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	if (xritag == NO_XRI) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0364 Invalid param:\n");
+		return -EINVAL;
+	}
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES,
+			sizeof(struct lpfc_mbx_post_sgl_pages) -
+			sizeof(struct mbox_header), LPFC_SLI4_MBX_EMBED);
+
+	post_sgl_pages = (struct lpfc_mbx_post_sgl_pages *)
+				&mbox->u.mqe.un.post_sgl_pages;
+	bf_set(lpfc_post_sgl_pages_xri, post_sgl_pages, xritag);
+	bf_set(lpfc_post_sgl_pages_xricnt, post_sgl_pages, 1);
+
+	post_sgl_pages->sgl_pg_pairs[0].sgl_pg0_addr_lo	=
+				cpu_to_le32(putPaddrLow(pdma_phys_addr0));
+	post_sgl_pages->sgl_pg_pairs[0].sgl_pg0_addr_hi =
+				cpu_to_le32(putPaddrHigh(pdma_phys_addr0));
+
+	post_sgl_pages->sgl_pg_pairs[0].sgl_pg1_addr_lo	=
+				cpu_to_le32(putPaddrLow(pdma_phys_addr1));
+	post_sgl_pages->sgl_pg_pairs[0].sgl_pg1_addr_hi =
+				cpu_to_le32(putPaddrHigh(pdma_phys_addr1));
+	if (!phba->sli4_hba.intr_enable)
+		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	else
+		rc = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_TMO);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *) &post_sgl_pages->header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, phba->mbox_mem_pool);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2511 POST_SGL mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		rc = -ENXIO;
+	}
+	return 0;
+}
+/**
+ * lpfc_sli4_remove_all_sgl_pages - Post scatter gather list for an XRI to HBA
+ * @phba: The virtual port for which this call being executed.
+ *
+ * This routine will remove all of the sgl pages registered with the hba.
+ *
+ * Return codes:
+ * 	0 - Success
+ * 	-ENXIO, -ENOMEM - Failure
+ **/
+int
+lpfc_sli4_remove_all_sgl_pages(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *mbox;
+	int rc;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			LPFC_MBOX_OPCODE_FCOE_REMOVE_SGL_PAGES, 0,
+			LPFC_SLI4_MBX_EMBED);
+	if (!phba->sli4_hba.intr_enable)
+		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	else
+		rc = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_TMO);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mbox->u.mqe.un.sli4_config.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, phba->mbox_mem_pool);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2512 REMOVE_ALL_SGL_PAGES mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		rc = -ENXIO;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_sli4_next_xritag - Get an xritag for the io
+ * @phba: Pointer to HBA context object.
+ *
+ * This function gets an xritag for the iocb. If there is no unused xritag
+ * it will return 0xffff.
+ * The function returns the allocated xritag if successful, else returns zero.
+ * Zero is not a valid xritag.
+ * The caller is not required to hold any lock.
+ **/
+uint16_t
+lpfc_sli4_next_xritag(struct lpfc_hba *phba)
+{
+	uint16_t xritag;
+
+	spin_lock_irq(&phba->hbalock);
+	xritag = phba->sli4_hba.next_xri;
+	if ((xritag != (uint16_t) -1) && xritag <
+		(phba->sli4_hba.max_cfg_param.max_xri
+			+ phba->sli4_hba.max_cfg_param.xri_base)) {
+		phba->sli4_hba.next_xri++;
+		phba->sli4_hba.max_cfg_param.xri_used++;
+		spin_unlock_irq(&phba->hbalock);
+		return xritag;
+	}
+	spin_unlock_irq(&phba->hbalock);
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+			"2004 Failed to allocate XRI.last XRITAG is %d"
+			" Max XRI is %d, Used XRI is %d\n",
+			phba->sli4_hba.next_xri,
+			phba->sli4_hba.max_cfg_param.max_xri,
+			phba->sli4_hba.max_cfg_param.xri_used);
+	return -1;
+}
+
+/**
+ * lpfc_sli4_post_sgl_list - post a block of sgl list to the firmware.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to post a block of driver's sgl pages to the
+ * HBA using non-embedded mailbox command. No Lock is held. This routine
+ * is only called when the driver is loading and after all IO has been
+ * stopped.
+ **/
+int
+lpfc_sli4_post_sgl_list(struct lpfc_hba *phba)
+{
+	struct lpfc_sglq *sglq_entry;
+	struct lpfc_mbx_post_uembed_sgl_page1 *sgl;
+	struct sgl_page_pairs *sgl_pg_pairs;
+	void *viraddr;
+	LPFC_MBOXQ_t *mbox;
+	uint32_t reqlen, alloclen, pg_pairs;
+	uint32_t mbox_tmo;
+	uint16_t xritag_start = 0;
+	int els_xri_cnt, rc = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	/* The number of sgls to be posted */
+	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
+
+	reqlen = els_xri_cnt * sizeof(struct sgl_page_pairs) +
+		 sizeof(union lpfc_sli4_cfg_shdr) + sizeof(uint32_t);
+	if (reqlen > PAGE_SIZE) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"2559 Block sgl registration required DMA "
+				"size (%d) great than a page\n", reqlen);
+		return -ENOMEM;
+	}
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2560 Failed to allocate mbox cmd memory\n");
+		return -ENOMEM;
+	}
+
+	/* Allocate DMA memory and set up the non-embedded mailbox command */
+	alloclen = lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			 LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES, reqlen,
+			 LPFC_SLI4_MBX_NEMBED);
+
+	if (alloclen < reqlen) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0285 Allocated DMA memory size (%d) is "
+				"less than the requested DMA memory "
+				"size (%d)\n", alloclen, reqlen);
+		lpfc_sli4_mbox_cmd_free(phba, mbox);
+		return -ENOMEM;
+	}
+
+	/* Get the first SGE entry from the non-embedded DMA memory */
+	if (unlikely(!mbox->sge_array)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+				"2525 Failed to get the non-embedded SGE "
+				"virtual address\n");
+		lpfc_sli4_mbox_cmd_free(phba, mbox);
+		return -ENOMEM;
+	}
+	viraddr = mbox->sge_array->addr[0];
+
+	/* Set up the SGL pages in the non-embedded DMA pages */
+	sgl = (struct lpfc_mbx_post_uembed_sgl_page1 *)viraddr;
+	sgl_pg_pairs = &sgl->sgl_pg_pairs;
+
+	for (pg_pairs = 0; pg_pairs < els_xri_cnt; pg_pairs++) {
+		sglq_entry = phba->sli4_hba.lpfc_els_sgl_array[pg_pairs];
+		/* Set up the sge entry */
+		sgl_pg_pairs->sgl_pg0_addr_lo =
+				cpu_to_le32(putPaddrLow(sglq_entry->phys));
+		sgl_pg_pairs->sgl_pg0_addr_hi =
+				cpu_to_le32(putPaddrHigh(sglq_entry->phys));
+		sgl_pg_pairs->sgl_pg1_addr_lo =
+				cpu_to_le32(putPaddrLow(0));
+		sgl_pg_pairs->sgl_pg1_addr_hi =
+				cpu_to_le32(putPaddrHigh(0));
+		/* Keep the first xritag on the list */
+		if (pg_pairs == 0)
+			xritag_start = sglq_entry->sli4_xritag;
+		sgl_pg_pairs++;
+	}
+	bf_set(lpfc_post_sgl_pages_xri, sgl, xritag_start);
+	pg_pairs = (pg_pairs > 0) ? (pg_pairs - 1) : pg_pairs;
+	bf_set(lpfc_post_sgl_pages_xricnt, sgl, pg_pairs);
+	/* Perform endian conversion if necessary */
+	sgl->word0 = cpu_to_le32(sgl->word0);
+
+	if (!phba->sli4_hba.intr_enable)
+		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	else {
+		mbox_tmo = lpfc_mbox_tmo_val(phba, MBX_SLI4_CONFIG);
+		rc = lpfc_sli_issue_mbox_wait(phba, mbox, mbox_tmo);
+	}
+	shdr = (union lpfc_sli4_cfg_shdr *) &sgl->cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (rc != MBX_TIMEOUT)
+		lpfc_sli4_mbox_cmd_free(phba, mbox);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2513 POST_SGL_BLOCK mailbox command failed "
+				"status x%x add_status x%x mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		rc = -ENXIO;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_sli4_post_scsi_sgl_block - post a block of scsi sgl list to firmware
+ * @phba: pointer to lpfc hba data structure.
+ * @sblist: pointer to scsi buffer list.
+ * @count: number of scsi buffers on the list.
+ *
+ * This routine is invoked to post a block of @count scsi sgl pages from a
+ * SCSI buffer list @sblist to the HBA using non-embedded mailbox command.
+ * No Lock is held.
+ *
+ **/
+int
+lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba, struct list_head *sblist,
+			      int cnt)
+{
+	struct lpfc_scsi_buf *psb;
+	struct lpfc_mbx_post_uembed_sgl_page1 *sgl;
+	struct sgl_page_pairs *sgl_pg_pairs;
+	void *viraddr;
+	LPFC_MBOXQ_t *mbox;
+	uint32_t reqlen, alloclen, pg_pairs;
+	uint32_t mbox_tmo;
+	uint16_t xritag_start = 0;
+	int rc = 0;
+	uint32_t shdr_status, shdr_add_status;
+	dma_addr_t pdma_phys_bpl1;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	/* Calculate the requested length of the dma memory */
+	reqlen = cnt * sizeof(struct sgl_page_pairs) +
+		 sizeof(union lpfc_sli4_cfg_shdr) + sizeof(uint32_t);
+	if (reqlen > PAGE_SIZE) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"0217 Block sgl registration required DMA "
+				"size (%d) great than a page\n", reqlen);
+		return -ENOMEM;
+	}
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0283 Failed to allocate mbox cmd memory\n");
+		return -ENOMEM;
+	}
+
+	/* Allocate DMA memory and set up the non-embedded mailbox command */
+	alloclen = lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+				LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES, reqlen,
+				LPFC_SLI4_MBX_NEMBED);
+
+	if (alloclen < reqlen) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2561 Allocated DMA memory size (%d) is "
+				"less than the requested DMA memory "
+				"size (%d)\n", alloclen, reqlen);
+		lpfc_sli4_mbox_cmd_free(phba, mbox);
+		return -ENOMEM;
+	}
+
+	/* Get the first SGE entry from the non-embedded DMA memory */
+	if (unlikely(!mbox->sge_array)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+				"2565 Failed to get the non-embedded SGE "
+				"virtual address\n");
+		lpfc_sli4_mbox_cmd_free(phba, mbox);
+		return -ENOMEM;
+	}
+	viraddr = mbox->sge_array->addr[0];
+
+	/* Set up the SGL pages in the non-embedded DMA pages */
+	sgl = (struct lpfc_mbx_post_uembed_sgl_page1 *)viraddr;
+	sgl_pg_pairs = &sgl->sgl_pg_pairs;
+
+	pg_pairs = 0;
+	list_for_each_entry(psb, sblist, list) {
+		/* Set up the sge entry */
+		sgl_pg_pairs->sgl_pg0_addr_lo =
+			cpu_to_le32(putPaddrLow(psb->dma_phys_bpl));
+		sgl_pg_pairs->sgl_pg0_addr_hi =
+			cpu_to_le32(putPaddrHigh(psb->dma_phys_bpl));
+		if (phba->cfg_sg_dma_buf_size > SGL_PAGE_SIZE)
+			pdma_phys_bpl1 = psb->dma_phys_bpl + SGL_PAGE_SIZE;
+		else
+			pdma_phys_bpl1 = 0;
+		sgl_pg_pairs->sgl_pg1_addr_lo =
+			cpu_to_le32(putPaddrLow(pdma_phys_bpl1));
+		sgl_pg_pairs->sgl_pg1_addr_hi =
+			cpu_to_le32(putPaddrHigh(pdma_phys_bpl1));
+		/* Keep the first xritag on the list */
+		if (pg_pairs == 0)
+			xritag_start = psb->cur_iocbq.sli4_xritag;
+		sgl_pg_pairs++;
+		pg_pairs++;
+	}
+	bf_set(lpfc_post_sgl_pages_xri, sgl, xritag_start);
+	bf_set(lpfc_post_sgl_pages_xricnt, sgl, pg_pairs);
+	/* Perform endian conversion if necessary */
+	sgl->word0 = cpu_to_le32(sgl->word0);
+
+	if (!phba->sli4_hba.intr_enable)
+		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	else {
+		mbox_tmo = lpfc_mbox_tmo_val(phba, MBX_SLI4_CONFIG);
+		rc = lpfc_sli_issue_mbox_wait(phba, mbox, mbox_tmo);
+	}
+	shdr = (union lpfc_sli4_cfg_shdr *) &sgl->cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (rc != MBX_TIMEOUT)
+		lpfc_sli4_mbox_cmd_free(phba, mbox);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2564 POST_SGL_BLOCK mailbox command failed "
+				"status x%x add_status x%x mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		rc = -ENXIO;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_fc_frame_check - Check that this frame is a valid frame to handle
+ * @phba: pointer to lpfc_hba struct that the frame was received on
+ * @fc_hdr: A pointer to the FC Header data (In Big Endian Format)
+ *
+ * This function checks the fields in the @fc_hdr to see if the FC frame is a
+ * valid type of frame that the LPFC driver will handle. This function will
+ * return a zero if the frame is a valid frame or a non zero value when the
+ * frame does not pass the check.
+ **/
+static int
+lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
+{
+	char *rctl_names[] = FC_RCTL_NAMES_INIT;
+	char *type_names[] = FC_TYPE_NAMES_INIT;
+	struct fc_vft_header *fc_vft_hdr;
+
+	switch (fc_hdr->fh_r_ctl) {
+	case FC_RCTL_DD_UNCAT:		/* uncategorized information */
+	case FC_RCTL_DD_SOL_DATA:	/* solicited data */
+	case FC_RCTL_DD_UNSOL_CTL:	/* unsolicited control */
+	case FC_RCTL_DD_SOL_CTL:	/* solicited control or reply */
+	case FC_RCTL_DD_UNSOL_DATA:	/* unsolicited data */
+	case FC_RCTL_DD_DATA_DESC:	/* data descriptor */
+	case FC_RCTL_DD_UNSOL_CMD:	/* unsolicited command */
+	case FC_RCTL_DD_CMD_STATUS:	/* command status */
+	case FC_RCTL_ELS_REQ:	/* extended link services request */
+	case FC_RCTL_ELS_REP:	/* extended link services reply */
+	case FC_RCTL_ELS4_REQ:	/* FC-4 ELS request */
+	case FC_RCTL_ELS4_REP:	/* FC-4 ELS reply */
+	case FC_RCTL_BA_NOP:  	/* basic link service NOP */
+	case FC_RCTL_BA_ABTS: 	/* basic link service abort */
+	case FC_RCTL_BA_RMC: 	/* remove connection */
+	case FC_RCTL_BA_ACC:	/* basic accept */
+	case FC_RCTL_BA_RJT:	/* basic reject */
+	case FC_RCTL_BA_PRMT:
+	case FC_RCTL_ACK_1:	/* acknowledge_1 */
+	case FC_RCTL_ACK_0:	/* acknowledge_0 */
+	case FC_RCTL_P_RJT:	/* port reject */
+	case FC_RCTL_F_RJT:	/* fabric reject */
+	case FC_RCTL_P_BSY:	/* port busy */
+	case FC_RCTL_F_BSY:	/* fabric busy to data frame */
+	case FC_RCTL_F_BSYL:	/* fabric busy to link control frame */
+	case FC_RCTL_LCR:	/* link credit reset */
+	case FC_RCTL_END:	/* end */
+		break;
+	case FC_RCTL_VFTH:	/* Virtual Fabric tagging Header */
+		fc_vft_hdr = (struct fc_vft_header *)fc_hdr;
+		fc_hdr = &((struct fc_frame_header *)fc_vft_hdr)[1];
+		return lpfc_fc_frame_check(phba, fc_hdr);
+	default:
+		goto drop;
+	}
+	switch (fc_hdr->fh_type) {
+	case FC_TYPE_BLS:
+	case FC_TYPE_ELS:
+	case FC_TYPE_FCP:
+	case FC_TYPE_CT:
+		break;
+	case FC_TYPE_IP:
+	case FC_TYPE_ILS:
+	default:
+		goto drop;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+			"2538 Received frame rctl:%s type:%s\n",
+			rctl_names[fc_hdr->fh_r_ctl],
+			type_names[fc_hdr->fh_type]);
+	return 0;
+drop:
+	lpfc_printf_log(phba, KERN_WARNING, LOG_ELS,
+			"2539 Dropped frame rctl:%s type:%s\n",
+			rctl_names[fc_hdr->fh_r_ctl],
+			type_names[fc_hdr->fh_type]);
+	return 1;
+}
+
+/**
+ * lpfc_fc_hdr_get_vfi - Get the VFI from an FC frame
+ * @fc_hdr: A pointer to the FC Header data (In Big Endian Format)
+ *
+ * This function processes the FC header to retrieve the VFI from the VF
+ * header, if one exists. This function will return the VFI if one exists
+ * or 0 if no VSAN Header exists.
+ **/
+static uint32_t
+lpfc_fc_hdr_get_vfi(struct fc_frame_header *fc_hdr)
+{
+	struct fc_vft_header *fc_vft_hdr = (struct fc_vft_header *)fc_hdr;
+
+	if (fc_hdr->fh_r_ctl != FC_RCTL_VFTH)
+		return 0;
+	return bf_get(fc_vft_hdr_vf_id, fc_vft_hdr);
+}
+
+/**
+ * lpfc_fc_frame_to_vport - Finds the vport that a frame is destined to
+ * @phba: Pointer to the HBA structure to search for the vport on
+ * @fc_hdr: A pointer to the FC Header data (In Big Endian Format)
+ * @fcfi: The FC Fabric ID that the frame came from
+ *
+ * This function searches the @phba for a vport that matches the content of the
+ * @fc_hdr passed in and the @fcfi. This function uses the @fc_hdr to fetch the
+ * VFI, if the Virtual Fabric Tagging Header exists, and the DID. This function
+ * returns the matching vport pointer or NULL if unable to match frame to a
+ * vport.
+ **/
+static struct lpfc_vport *
+lpfc_fc_frame_to_vport(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr,
+		       uint16_t fcfi)
+{
+	struct lpfc_vport **vports;
+	struct lpfc_vport *vport = NULL;
+	int i;
+	uint32_t did = (fc_hdr->fh_d_id[0] << 16 |
+			fc_hdr->fh_d_id[1] << 8 |
+			fc_hdr->fh_d_id[2]);
+
+	vports = lpfc_create_vport_work_array(phba);
+	if (vports != NULL)
+		for (i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+			if (phba->fcf.fcfi == fcfi &&
+			    vports[i]->vfi == lpfc_fc_hdr_get_vfi(fc_hdr) &&
+			    vports[i]->fc_myDID == did) {
+				vport = vports[i];
+				break;
+			}
+		}
+	lpfc_destroy_vport_work_array(phba, vports);
+	return vport;
+}
+
+/**
+ * lpfc_fc_frame_add - Adds a frame to the vport's list of received sequences
+ * @dmabuf: pointer to a dmabuf that describes the hdr and data of the FC frame
+ *
+ * This function searches through the existing incomplete sequences that have
+ * been sent to this @vport. If the frame matches one of the incomplete
+ * sequences then the dbuf in the @dmabuf is added to the list of frames that
+ * make up that sequence. If no sequence is found that matches this frame then
+ * the function will add the hbuf in the @dmabuf to the @vport's rcv_buffer_list
+ * This function returns a pointer to the first dmabuf in the sequence list that
+ * the frame was linked to.
+ **/
+static struct hbq_dmabuf *
+lpfc_fc_frame_add(struct lpfc_vport *vport, struct hbq_dmabuf *dmabuf)
+{
+	struct fc_frame_header *new_hdr;
+	struct fc_frame_header *temp_hdr;
+	struct lpfc_dmabuf *d_buf;
+	struct lpfc_dmabuf *h_buf;
+	struct hbq_dmabuf *seq_dmabuf = NULL;
+	struct hbq_dmabuf *temp_dmabuf = NULL;
+
+	new_hdr = (struct fc_frame_header *)dmabuf->hbuf.virt;
+	/* Use the hdr_buf to find the sequence that this frame belongs to */
+	list_for_each_entry(h_buf, &vport->rcv_buffer_list, list) {
+		temp_hdr = (struct fc_frame_header *)h_buf->virt;
+		if ((temp_hdr->fh_seq_id != new_hdr->fh_seq_id) ||
+		    (temp_hdr->fh_ox_id != new_hdr->fh_ox_id) ||
+		    (memcmp(&temp_hdr->fh_s_id, &new_hdr->fh_s_id, 3)))
+			continue;
+		/* found a pending sequence that matches this frame */
+		seq_dmabuf = container_of(h_buf, struct hbq_dmabuf, hbuf);
+		break;
+	}
+	if (!seq_dmabuf) {
+		/*
+		 * This indicates first frame received for this sequence.
+		 * Queue the buffer on the vport's rcv_buffer_list.
+		 */
+		list_add_tail(&dmabuf->hbuf.list, &vport->rcv_buffer_list);
+		return dmabuf;
+	}
+	temp_hdr = seq_dmabuf->hbuf.virt;
+	if (new_hdr->fh_seq_cnt < temp_hdr->fh_seq_cnt) {
+		list_add(&seq_dmabuf->dbuf.list, &dmabuf->dbuf.list);
+		return dmabuf;
+	}
+	/* find the correct place in the sequence to insert this frame */
+	list_for_each_entry_reverse(d_buf, &seq_dmabuf->dbuf.list, list) {
+		temp_dmabuf = container_of(d_buf, struct hbq_dmabuf, dbuf);
+		temp_hdr = (struct fc_frame_header *)temp_dmabuf->hbuf.virt;
+		/*
+		 * If the frame's sequence count is greater than the frame on
+		 * the list then insert the frame right after this frame
+		 */
+		if (new_hdr->fh_seq_cnt > temp_hdr->fh_seq_cnt) {
+			list_add(&dmabuf->dbuf.list, &temp_dmabuf->dbuf.list);
+			return seq_dmabuf;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * lpfc_seq_complete - Indicates if a sequence is complete
+ * @dmabuf: pointer to a dmabuf that describes the FC sequence
+ *
+ * This function checks the sequence, starting with the frame described by
+ * @dmabuf, to see if all the frames associated with this sequence are present.
+ * the frames associated with this sequence are linked to the @dmabuf using the
+ * dbuf list. This function looks for two major things. 1) That the first frame
+ * has a sequence count of zero. 2) There is a frame with last frame of sequence
+ * set. 3) That there are no holes in the sequence count. The function will
+ * return 1 when the sequence is complete, otherwise it will return 0.
+ **/
+static int
+lpfc_seq_complete(struct hbq_dmabuf *dmabuf)
+{
+	struct fc_frame_header *hdr;
+	struct lpfc_dmabuf *d_buf;
+	struct hbq_dmabuf *seq_dmabuf;
+	uint32_t fctl;
+	int seq_count = 0;
+
+	hdr = (struct fc_frame_header *)dmabuf->hbuf.virt;
+	/* make sure first fame of sequence has a sequence count of zero */
+	if (hdr->fh_seq_cnt != seq_count)
+		return 0;
+	fctl = (hdr->fh_f_ctl[0] << 16 |
+		hdr->fh_f_ctl[1] << 8 |
+		hdr->fh_f_ctl[2]);
+	/* If last frame of sequence we can return success. */
+	if (fctl & FC_FC_END_SEQ)
+		return 1;
+	list_for_each_entry(d_buf, &dmabuf->dbuf.list, list) {
+		seq_dmabuf = container_of(d_buf, struct hbq_dmabuf, dbuf);
+		hdr = (struct fc_frame_header *)seq_dmabuf->hbuf.virt;
+		/* If there is a hole in the sequence count then fail. */
+		if (++seq_count != hdr->fh_seq_cnt)
+			return 0;
+		fctl = (hdr->fh_f_ctl[0] << 16 |
+			hdr->fh_f_ctl[1] << 8 |
+			hdr->fh_f_ctl[2]);
+		/* If last frame of sequence we can return success. */
+		if (fctl & FC_FC_END_SEQ)
+			return 1;
+	}
+	return 0;
+}
+
+/**
+ * lpfc_prep_seq - Prep sequence for ULP processing
+ * @vport: Pointer to the vport on which this sequence was received
+ * @dmabuf: pointer to a dmabuf that describes the FC sequence
+ *
+ * This function takes a sequence, described by a list of frames, and creates
+ * a list of iocbq structures to describe the sequence. This iocbq list will be
+ * used to issue to the generic unsolicited sequence handler. This routine
+ * returns a pointer to the first iocbq in the list. If the function is unable
+ * to allocate an iocbq then it throw out the received frames that were not
+ * able to be described and return a pointer to the first iocbq. If unable to
+ * allocate any iocbqs (including the first) this function will return NULL.
+ **/
+static struct lpfc_iocbq *
+lpfc_prep_seq(struct lpfc_vport *vport, struct hbq_dmabuf *seq_dmabuf)
+{
+	struct lpfc_dmabuf *d_buf, *n_buf;
+	struct lpfc_iocbq *first_iocbq, *iocbq;
+	struct fc_frame_header *fc_hdr;
+	uint32_t sid;
+
+	fc_hdr = (struct fc_frame_header *)seq_dmabuf->hbuf.virt;
+	/* remove from receive buffer list */
+	list_del_init(&seq_dmabuf->hbuf.list);
+	/* get the Remote Port's SID */
+	sid = (fc_hdr->fh_s_id[0] << 16 |
+	       fc_hdr->fh_s_id[1] << 8 |
+	       fc_hdr->fh_s_id[2]);
+	/* Get an iocbq struct to fill in. */
+	first_iocbq = lpfc_sli_get_iocbq(vport->phba);
+	if (first_iocbq) {
+		/* Initialize the first IOCB. */
+		first_iocbq->iocb.ulpStatus = IOSTAT_SUCCESS;
+		first_iocbq->iocb.ulpCommand = CMD_IOCB_RCV_SEQ64_CX;
+		first_iocbq->iocb.ulpContext = be16_to_cpu(fc_hdr->fh_ox_id);
+		first_iocbq->iocb.unsli3.rcvsli3.vpi =
+					vport->vpi + vport->phba->vpi_base;
+		/* put the first buffer into the first IOCBq */
+		first_iocbq->context2 = &seq_dmabuf->dbuf;
+		first_iocbq->context3 = NULL;
+		first_iocbq->iocb.ulpBdeCount = 1;
+		first_iocbq->iocb.un.cont64[0].tus.f.bdeSize =
+							LPFC_DATA_BUF_SIZE;
+		first_iocbq->iocb.un.rcvels.remoteID = sid;
+	}
+	iocbq = first_iocbq;
+	/*
+	 * Each IOCBq can have two Buffers assigned, so go through the list
+	 * of buffers for this sequence and save two buffers in each IOCBq
+	 */
+	list_for_each_entry_safe(d_buf, n_buf, &seq_dmabuf->dbuf.list, list) {
+		if (!iocbq) {
+			lpfc_in_buf_free(vport->phba, d_buf);
+			continue;
+		}
+		if (!iocbq->context3) {
+			iocbq->context3 = d_buf;
+			iocbq->iocb.ulpBdeCount++;
+			iocbq->iocb.unsli3.rcvsli3.bde2.tus.f.bdeSize =
+							LPFC_DATA_BUF_SIZE;
+		} else {
+			iocbq = lpfc_sli_get_iocbq(vport->phba);
+			if (!iocbq) {
+				if (first_iocbq) {
+					first_iocbq->iocb.ulpStatus =
+							IOSTAT_FCP_RSP_ERROR;
+					first_iocbq->iocb.un.ulpWord[4] =
+							IOERR_NO_RESOURCES;
+				}
+				lpfc_in_buf_free(vport->phba, d_buf);
+				continue;
+			}
+			iocbq->context2 = d_buf;
+			iocbq->context3 = NULL;
+			iocbq->iocb.ulpBdeCount = 1;
+			iocbq->iocb.un.cont64[0].tus.f.bdeSize =
+							LPFC_DATA_BUF_SIZE;
+			iocbq->iocb.un.rcvels.remoteID = sid;
+			list_add_tail(&iocbq->list, &first_iocbq->list);
+		}
+	}
+	return first_iocbq;
+}
+
+/**
+ * lpfc_sli4_handle_received_buffer - Handle received buffers from firmware
+ * @phba: Pointer to HBA context object.
+ *
+ * This function is called with no lock held. This function processes all
+ * the received buffers and gives it to upper layers when a received buffer
+ * indicates that it is the final frame in the sequence. The interrupt
+ * service routine processes received buffers at interrupt contexts and adds
+ * received dma buffers to the rb_pend_list queue and signals the worker thread.
+ * Worker thread calls lpfc_sli4_handle_received_buffer, which will call the
+ * appropriate receive function when the final frame in a sequence is received.
+ **/
+int
+lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba)
+{
+	LIST_HEAD(cmplq);
+	struct hbq_dmabuf *dmabuf, *seq_dmabuf;
+	struct fc_frame_header *fc_hdr;
+	struct lpfc_vport *vport;
+	uint32_t fcfi;
+	struct lpfc_iocbq *iocbq;
+
+	/* Clear hba flag and get all received buffers into the cmplq */
+	spin_lock_irq(&phba->hbalock);
+	phba->hba_flag &= ~HBA_RECEIVE_BUFFER;
+	list_splice_init(&phba->rb_pend_list, &cmplq);
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Process each received buffer */
+	while ((dmabuf = lpfc_sli_hbqbuf_get(&cmplq)) != NULL) {
+		fc_hdr = (struct fc_frame_header *)dmabuf->hbuf.virt;
+		/* check to see if this a valid type of frame */
+		if (lpfc_fc_frame_check(phba, fc_hdr)) {
+			lpfc_in_buf_free(phba, &dmabuf->dbuf);
+			continue;
+		}
+		fcfi = bf_get(lpfc_rcqe_fcf_id, &dmabuf->rcqe);
+		vport = lpfc_fc_frame_to_vport(phba, fc_hdr, fcfi);
+		if (!vport) {
+			/* throw out the frame */
+			lpfc_in_buf_free(phba, &dmabuf->dbuf);
+			continue;
+		}
+		/* Link this frame */
+		seq_dmabuf = lpfc_fc_frame_add(vport, dmabuf);
+		if (!seq_dmabuf) {
+			/* unable to add frame to vport - throw it out */
+			lpfc_in_buf_free(phba, &dmabuf->dbuf);
+			continue;
+		}
+		/* If not last frame in sequence continue processing frames. */
+		if (!lpfc_seq_complete(seq_dmabuf)) {
+			/*
+			 * When saving off frames post a new one and mark this
+			 * frame to be freed when it is finished.
+			 **/
+			lpfc_sli_hbqbuf_fill_hbqs(phba, LPFC_ELS_HBQ, 1);
+			dmabuf->tag = -1;
+			continue;
+		}
+		fc_hdr = (struct fc_frame_header *)seq_dmabuf->hbuf.virt;
+		iocbq = lpfc_prep_seq(vport, seq_dmabuf);
+		if (!lpfc_complete_unsol_iocb(phba,
+					      &phba->sli.ring[LPFC_ELS_RING],
+					      iocbq, fc_hdr->fh_r_ctl,
+					      fc_hdr->fh_type))
+			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+					"2540 Ring %d handler: unexpected Rctl "
+					"x%x Type x%x received\n",
+					LPFC_ELS_RING,
+					fc_hdr->fh_r_ctl, fc_hdr->fh_type);
+	};
+	return 0;
+}
+
+/**
+ * lpfc_sli4_post_all_rpi_hdrs - Post the rpi header memory region to the port
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to post rpi header templates to the
+ * HBA consistent with the SLI-4 interface spec.  This routine
+ * posts a PAGE_SIZE memory region to the port to hold up to
+ * PAGE_SIZE modulo 64 rpi context headers.
+ *
+ * This routine does not require any locks.  It's usage is expected
+ * to be driver load or reset recovery when the driver is
+ * sequential.
+ *
+ * Return codes
+ * 	0 - sucessful
+ *      EIO - The mailbox failed to complete successfully.
+ * 	When this error occurs, the driver is not guaranteed
+ *	to have any rpi regions posted to the device and
+ *	must either attempt to repost the regions or take a
+ *	fatal error.
+ **/
+int
+lpfc_sli4_post_all_rpi_hdrs(struct lpfc_hba *phba)
+{
+	struct lpfc_rpi_hdr *rpi_page;
+	uint32_t rc = 0;
+
+	/* Post all rpi memory regions to the port. */
+	list_for_each_entry(rpi_page, &phba->sli4_hba.lpfc_rpi_hdr_list, list) {
+		rc = lpfc_sli4_post_rpi_hdr(phba, rpi_page);
+		if (rc != MBX_SUCCESS) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"2008 Error %d posting all rpi "
+					"headers\n", rc);
+			rc = -EIO;
+			break;
+		}
+	}
+
+	return rc;
+}
+
+/**
+ * lpfc_sli4_post_rpi_hdr - Post an rpi header memory region to the port
+ * @phba: pointer to lpfc hba data structure.
+ * @rpi_page:  pointer to the rpi memory region.
+ *
+ * This routine is invoked to post a single rpi header to the
+ * HBA consistent with the SLI-4 interface spec.  This memory region
+ * maps up to 64 rpi context regions.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	ENOMEM - No available memory
+ *      EIO - The mailbox failed to complete successfully.
+ **/
+int
+lpfc_sli4_post_rpi_hdr(struct lpfc_hba *phba, struct lpfc_rpi_hdr *rpi_page)
+{
+	LPFC_MBOXQ_t *mboxq;
+	struct lpfc_mbx_post_hdr_tmpl *hdr_tmpl;
+	uint32_t rc = 0;
+	uint32_t mbox_tmo;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	/* The port is notified of the header region via a mailbox command. */
+	mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2001 Unable to allocate memory for issuing "
+				"SLI_CONFIG_SPECIAL mailbox command\n");
+		return -ENOMEM;
+	}
+
+	/* Post all rpi memory regions to the port. */
+	hdr_tmpl = &mboxq->u.mqe.un.hdr_tmpl;
+	mbox_tmo = lpfc_mbox_tmo_val(phba, MBX_SLI4_CONFIG);
+	lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_FCOE,
+			 LPFC_MBOX_OPCODE_FCOE_POST_HDR_TEMPLATE,
+			 sizeof(struct lpfc_mbx_post_hdr_tmpl) -
+			 sizeof(struct mbox_header), LPFC_SLI4_MBX_EMBED);
+	bf_set(lpfc_mbx_post_hdr_tmpl_page_cnt,
+	       hdr_tmpl, rpi_page->page_count);
+	bf_set(lpfc_mbx_post_hdr_tmpl_rpi_offset, hdr_tmpl,
+	       rpi_page->start_rpi);
+	hdr_tmpl->rpi_paddr_lo = putPaddrLow(rpi_page->dmabuf->phys);
+	hdr_tmpl->rpi_paddr_hi = putPaddrHigh(rpi_page->dmabuf->phys);
+	if (!phba->sli4_hba.intr_enable)
+		rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	else
+		rc = lpfc_sli_issue_mbox_wait(phba, mboxq, mbox_tmo);
+	shdr = (union lpfc_sli4_cfg_shdr *) &hdr_tmpl->header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mboxq, phba->mbox_mem_pool);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2514 POST_RPI_HDR mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		rc = -ENXIO;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_sli4_alloc_rpi - Get an available rpi in the device's range
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to post rpi header templates to the
+ * HBA consistent with the SLI-4 interface spec.  This routine
+ * posts a PAGE_SIZE memory region to the port to hold up to
+ * PAGE_SIZE modulo 64 rpi context headers.
+ *
+ * Returns
+ * 	A nonzero rpi defined as rpi_base <= rpi < max_rpi if sucessful
+ * 	LPFC_RPI_ALLOC_ERROR if no rpis are available.
+ **/
+int
+lpfc_sli4_alloc_rpi(struct lpfc_hba *phba)
+{
+	int rpi;
+	uint16_t max_rpi, rpi_base, rpi_limit;
+	uint16_t rpi_remaining;
+	struct lpfc_rpi_hdr *rpi_hdr;
+
+	max_rpi = phba->sli4_hba.max_cfg_param.max_rpi;
+	rpi_base = phba->sli4_hba.max_cfg_param.rpi_base;
+	rpi_limit = phba->sli4_hba.next_rpi;
+
+	/*
+	 * The valid rpi range is not guaranteed to be zero-based.  Start
+	 * the search at the rpi_base as reported by the port.
+	 */
+	spin_lock_irq(&phba->hbalock);
+	rpi = find_next_zero_bit(phba->sli4_hba.rpi_bmask, rpi_limit, rpi_base);
+	if (rpi >= rpi_limit || rpi < rpi_base)
+		rpi = LPFC_RPI_ALLOC_ERROR;
+	else {
+		set_bit(rpi, phba->sli4_hba.rpi_bmask);
+		phba->sli4_hba.max_cfg_param.rpi_used++;
+		phba->sli4_hba.rpi_count++;
+	}
+
+	/*
+	 * Don't try to allocate more rpi header regions if the device limit
+	 * on available rpis max has been exhausted.
+	 */
+	if ((rpi == LPFC_RPI_ALLOC_ERROR) &&
+	    (phba->sli4_hba.rpi_count >= max_rpi)) {
+		spin_unlock_irq(&phba->hbalock);
+		return rpi;
+	}
+
+	/*
+	 * If the driver is running low on rpi resources, allocate another
+	 * page now.  Note that the next_rpi value is used because
+	 * it represents how many are actually in use whereas max_rpi notes
+	 * how many are supported max by the device.
+	 */
+	rpi_remaining = phba->sli4_hba.next_rpi - rpi_base -
+		phba->sli4_hba.rpi_count;
+	spin_unlock_irq(&phba->hbalock);
+	if (rpi_remaining < LPFC_RPI_LOW_WATER_MARK) {
+		rpi_hdr = lpfc_sli4_create_rpi_hdr(phba);
+		if (!rpi_hdr) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"2002 Error Could not grow rpi "
+					"count\n");
+		} else {
+			lpfc_sli4_post_rpi_hdr(phba, rpi_hdr);
+		}
+	}
+
+	return rpi;
+}
+
+/**
+ * lpfc_sli4_free_rpi - Release an rpi for reuse.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to release an rpi to the pool of
+ * available rpis maintained by the driver.
+ **/
+void
+lpfc_sli4_free_rpi(struct lpfc_hba *phba, int rpi)
+{
+	spin_lock_irq(&phba->hbalock);
+	clear_bit(rpi, phba->sli4_hba.rpi_bmask);
+	phba->sli4_hba.rpi_count--;
+	phba->sli4_hba.max_cfg_param.rpi_used--;
+	spin_unlock_irq(&phba->hbalock);
+}
+
+/**
+ * lpfc_sli4_remove_rpis - Remove the rpi bitmask region
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to remove the memory region that
+ * provided rpi via a bitmask.
+ **/
+void
+lpfc_sli4_remove_rpis(struct lpfc_hba *phba)
+{
+	kfree(phba->sli4_hba.rpi_bmask);
+}
+
+/**
+ * lpfc_sli4_resume_rpi - Remove the rpi bitmask region
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to remove the memory region that
+ * provided rpi via a bitmask.
+ **/
+int
+lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp)
+{
+	LPFC_MBOXQ_t *mboxq;
+	struct lpfc_hba *phba = ndlp->phba;
+	int rc;
+
+	/* The port is notified of the header region via a mailbox command. */
+	mboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq)
+		return -ENOMEM;
+
+	/* Post all rpi memory regions to the port. */
+	lpfc_resume_rpi(mboxq, ndlp);
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_NOWAIT);
+	if (rc == MBX_NOT_FINISHED) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2010 Resume RPI Mailbox failed "
+				"status %d, mbxStatus x%x\n", rc,
+				bf_get(lpfc_mqe_status, &mboxq->u.mqe));
+		mempool_free(mboxq, phba->mbox_mem_pool);
+		return -EIO;
+	}
+	return 0;
+}
+
+/**
+ * lpfc_sli4_init_vpi - Initialize a vpi with the port
+ * @phba: pointer to lpfc hba data structure.
+ * @vpi: vpi value to activate with the port.
+ *
+ * This routine is invoked to activate a vpi with the
+ * port when the host intends to use vports with a
+ * nonzero vpi.
+ *
+ * Returns:
+ *    0 success
+ *    -Evalue otherwise
+ **/
+int
+lpfc_sli4_init_vpi(struct lpfc_hba *phba, uint16_t vpi)
+{
+	LPFC_MBOXQ_t *mboxq;
+	int rc = 0;
+	uint32_t mbox_tmo;
+
+	if (vpi == 0)
+		return -EINVAL;
+	mboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq)
+		return -ENOMEM;
+	lpfc_init_vpi(mboxq, vpi);
+	mbox_tmo = lpfc_mbox_tmo_val(phba, MBX_INIT_VPI);
+	rc = lpfc_sli_issue_mbox_wait(phba, mboxq, mbox_tmo);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mboxq, phba->mbox_mem_pool);
+	if (rc != MBX_SUCCESS) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2022 INIT VPI Mailbox failed "
+				"status %d, mbxStatus x%x\n", rc,
+				bf_get(lpfc_mqe_status, &mboxq->u.mqe));
+		rc = -EIO;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_mbx_cmpl_add_fcf_record - add fcf mbox completion handler.
+ * @phba: pointer to lpfc hba data structure.
+ * @mboxq: Pointer to mailbox object.
+ *
+ * This routine is invoked to manually add a single FCF record. The caller
+ * must pass a completely initialized FCF_Record.  This routine takes
+ * care of the nonembedded mailbox operations.
+ **/
+static void
+lpfc_mbx_cmpl_add_fcf_record(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
+{
+	void *virt_addr;
+	union lpfc_sli4_cfg_shdr *shdr;
+	uint32_t shdr_status, shdr_add_status;
+
+	virt_addr = mboxq->sge_array->addr[0];
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *) virt_addr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+
+	if ((shdr_status || shdr_add_status) &&
+		(shdr_status != STATUS_FCF_IN_USE))
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"2558 ADD_FCF_RECORD mailbox failed with "
+			"status x%x add_status x%x\n",
+			shdr_status, shdr_add_status);
+
+	lpfc_sli4_mbox_cmd_free(phba, mboxq);
+}
+
+/**
+ * lpfc_sli4_add_fcf_record - Manually add an FCF Record.
+ * @phba: pointer to lpfc hba data structure.
+ * @fcf_record:  pointer to the initialized fcf record to add.
+ *
+ * This routine is invoked to manually add a single FCF record. The caller
+ * must pass a completely initialized FCF_Record.  This routine takes
+ * care of the nonembedded mailbox operations.
+ **/
+int
+lpfc_sli4_add_fcf_record(struct lpfc_hba *phba, struct fcf_record *fcf_record)
+{
+	int rc = 0;
+	LPFC_MBOXQ_t *mboxq;
+	uint8_t *bytep;
+	void *virt_addr;
+	dma_addr_t phys_addr;
+	struct lpfc_mbx_sge sge;
+	uint32_t alloc_len, req_len;
+	uint32_t fcfindex;
+
+	mboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"2009 Failed to allocate mbox for ADD_FCF cmd\n");
+		return -ENOMEM;
+	}
+
+	req_len = sizeof(struct fcf_record) + sizeof(union lpfc_sli4_cfg_shdr) +
+		  sizeof(uint32_t);
+
+	/* Allocate DMA memory and set up the non-embedded mailbox command */
+	alloc_len = lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_FCOE,
+				     LPFC_MBOX_OPCODE_FCOE_ADD_FCF,
+				     req_len, LPFC_SLI4_MBX_NEMBED);
+	if (alloc_len < req_len) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"2523 Allocated DMA memory size (x%x) is "
+			"less than the requested DMA memory "
+			"size (x%x)\n", alloc_len, req_len);
+		lpfc_sli4_mbox_cmd_free(phba, mboxq);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Get the first SGE entry from the non-embedded DMA memory.  This
+	 * routine only uses a single SGE.
+	 */
+	lpfc_sli4_mbx_sge_get(mboxq, 0, &sge);
+	phys_addr = getPaddr(sge.pa_hi, sge.pa_lo);
+	if (unlikely(!mboxq->sge_array)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+				"2526 Failed to get the non-embedded SGE "
+				"virtual address\n");
+		lpfc_sli4_mbox_cmd_free(phba, mboxq);
+		return -ENOMEM;
+	}
+	virt_addr = mboxq->sge_array->addr[0];
+	/*
+	 * Configure the FCF record for FCFI 0.  This is the driver's
+	 * hardcoded default and gets used in nonFIP mode.
+	 */
+	fcfindex = bf_get(lpfc_fcf_record_fcf_index, fcf_record);
+	bytep = virt_addr + sizeof(union lpfc_sli4_cfg_shdr);
+	lpfc_sli_pcimem_bcopy(&fcfindex, bytep, sizeof(uint32_t));
+
+	/*
+	 * Copy the fcf_index and the FCF Record Data. The data starts after
+	 * the FCoE header plus word10. The data copy needs to be endian
+	 * correct.
+	 */
+	bytep += sizeof(uint32_t);
+	lpfc_sli_pcimem_bcopy(fcf_record, bytep, sizeof(struct fcf_record));
+	mboxq->vport = phba->pport;
+	mboxq->mbox_cmpl = lpfc_mbx_cmpl_add_fcf_record;
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_NOWAIT);
+	if (rc == MBX_NOT_FINISHED) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"2515 ADD_FCF_RECORD mailbox failed with "
+			"status 0x%x\n", rc);
+		lpfc_sli4_mbox_cmd_free(phba, mboxq);
+		rc = -EIO;
+	} else
+		rc = 0;
+
+	return rc;
+}
+
+/**
+ * lpfc_sli4_build_dflt_fcf_record - Build the driver's default FCF Record.
+ * @phba: pointer to lpfc hba data structure.
+ * @fcf_record:  pointer to the fcf record to write the default data.
+ * @fcf_index: FCF table entry index.
+ *
+ * This routine is invoked to build the driver's default FCF record.  The
+ * values used are hardcoded.  This routine handles memory initialization.
+ *
+ **/
+void
+lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *phba,
+				struct fcf_record *fcf_record,
+				uint16_t fcf_index)
+{
+	memset(fcf_record, 0, sizeof(struct fcf_record));
+	fcf_record->max_rcv_size = LPFC_FCOE_MAX_RCV_SIZE;
+	fcf_record->fka_adv_period = LPFC_FCOE_FKA_ADV_PER;
+	fcf_record->fip_priority = LPFC_FCOE_FIP_PRIORITY;
+	bf_set(lpfc_fcf_record_mac_0, fcf_record, phba->fc_map[0]);
+	bf_set(lpfc_fcf_record_mac_1, fcf_record, phba->fc_map[1]);
+	bf_set(lpfc_fcf_record_mac_2, fcf_record, phba->fc_map[2]);
+	bf_set(lpfc_fcf_record_mac_3, fcf_record, LPFC_FCOE_FCF_MAC3);
+	bf_set(lpfc_fcf_record_mac_4, fcf_record, LPFC_FCOE_FCF_MAC4);
+	bf_set(lpfc_fcf_record_mac_5, fcf_record, LPFC_FCOE_FCF_MAC5);
+	bf_set(lpfc_fcf_record_fc_map_0, fcf_record, phba->fc_map[0]);
+	bf_set(lpfc_fcf_record_fc_map_1, fcf_record, phba->fc_map[1]);
+	bf_set(lpfc_fcf_record_fc_map_2, fcf_record, phba->fc_map[2]);
+	bf_set(lpfc_fcf_record_fcf_valid, fcf_record, 1);
+	bf_set(lpfc_fcf_record_fcf_index, fcf_record, fcf_index);
+	bf_set(lpfc_fcf_record_mac_addr_prov, fcf_record,
+		LPFC_FCF_FPMA | LPFC_FCF_SPMA);
+	/* Set the VLAN bit map */
+	if (phba->valid_vlan) {
+		fcf_record->vlan_bitmap[phba->vlan_id / 8]
+			= 1 << (phba->vlan_id % 8);
+	}
+}
+
+/**
+ * lpfc_sli4_read_fcf_record - Read the driver's default FCF Record.
+ * @phba: pointer to lpfc hba data structure.
+ * @fcf_index: FCF table entry offset.
+ *
+ * This routine is invoked to read up to @fcf_num of FCF record from the
+ * device starting with the given @fcf_index.
+ **/
+int
+lpfc_sli4_read_fcf_record(struct lpfc_hba *phba, uint16_t fcf_index)
+{
+	int rc = 0, error;
+	LPFC_MBOXQ_t *mboxq;
+	void *virt_addr;
+	dma_addr_t phys_addr;
+	uint8_t *bytep;
+	struct lpfc_mbx_sge sge;
+	uint32_t alloc_len, req_len;
+	struct lpfc_mbx_read_fcf_tbl *read_fcf;
+
+	mboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2000 Failed to allocate mbox for "
+				"READ_FCF cmd\n");
+		return -ENOMEM;
+	}
+
+	req_len = sizeof(struct fcf_record) +
+		  sizeof(union lpfc_sli4_cfg_shdr) + 2 * sizeof(uint32_t);
+
+	/* Set up READ_FCF SLI4_CONFIG mailbox-ioctl command */
+	alloc_len = lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_FCOE,
+			 LPFC_MBOX_OPCODE_FCOE_READ_FCF_TABLE, req_len,
+			 LPFC_SLI4_MBX_NEMBED);
+
+	if (alloc_len < req_len) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0291 Allocated DMA memory size (x%x) is "
+				"less than the requested DMA memory "
+				"size (x%x)\n", alloc_len, req_len);
+		lpfc_sli4_mbox_cmd_free(phba, mboxq);
+		return -ENOMEM;
+	}
+
+	/* Get the first SGE entry from the non-embedded DMA memory. This
+	 * routine only uses a single SGE.
+	 */
+	lpfc_sli4_mbx_sge_get(mboxq, 0, &sge);
+	phys_addr = getPaddr(sge.pa_hi, sge.pa_lo);
+	if (unlikely(!mboxq->sge_array)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+				"2527 Failed to get the non-embedded SGE "
+				"virtual address\n");
+		lpfc_sli4_mbox_cmd_free(phba, mboxq);
+		return -ENOMEM;
+	}
+	virt_addr = mboxq->sge_array->addr[0];
+	read_fcf = (struct lpfc_mbx_read_fcf_tbl *)virt_addr;
+
+	/* Set up command fields */
+	bf_set(lpfc_mbx_read_fcf_tbl_indx, &read_fcf->u.request, fcf_index);
+	/* Perform necessary endian conversion */
+	bytep = virt_addr + sizeof(union lpfc_sli4_cfg_shdr);
+	lpfc_sli_pcimem_bcopy(bytep, bytep, sizeof(uint32_t));
+	mboxq->vport = phba->pport;
+	mboxq->mbox_cmpl = lpfc_mbx_cmpl_read_fcf_record;
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_NOWAIT);
+	if (rc == MBX_NOT_FINISHED) {
+		lpfc_sli4_mbox_cmd_free(phba, mboxq);
+		error = -EIO;
+	} else
+		error = 0;
+	return error;
+}
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index 8839386..7d37eb7 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -29,13 +29,23 @@
 	LPFC_CTX_HOST
 } lpfc_ctx_cmd;
 
+/* This structure is used to carry the needed response IOCB states */
+struct lpfc_sli4_rspiocb_info {
+	uint8_t hw_status;
+	uint8_t bfield;
+#define LPFC_XB	0x1
+#define LPFC_PV	0x2
+	uint8_t priority;
+	uint8_t reserved;
+};
+
 /* This structure is used to handle IOCB requests / responses */
 struct lpfc_iocbq {
 	/* lpfc_iocbqs are used in double linked lists */
 	struct list_head list;
 	struct list_head clist;
 	uint16_t iotag;         /* pre-assigned IO tag */
-	uint16_t rsvd1;
+	uint16_t sli4_xritag;   /* pre-assigned XRI, (OXID) tag. */
 
 	IOCB_t iocb;		/* IOCB cmd */
 	uint8_t retry;		/* retry counter for IOCB cmd - if needed */
@@ -65,7 +75,7 @@
 			   struct lpfc_iocbq *);
 	void (*iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
 			   struct lpfc_iocbq *);
-
+	struct lpfc_sli4_rspiocb_info sli4_info;
 };
 
 #define SLI_IOCB_RET_IOCB      1	/* Return IOCB if cmd ring full */
@@ -81,14 +91,18 @@
 typedef struct lpfcMboxq {
 	/* MBOXQs are used in single linked lists */
 	struct list_head list;	/* ptr to next mailbox command */
-	MAILBOX_t mb;		/* Mailbox cmd */
-	struct lpfc_vport *vport;/* virutal port pointer */
+	union {
+		MAILBOX_t mb;		/* Mailbox cmd */
+		struct lpfc_mqe mqe;
+	} u;
+	struct lpfc_vport *vport;/* virtual port pointer */
 	void *context1;		/* caller context information */
 	void *context2;		/* caller context information */
 
 	void (*mbox_cmpl) (struct lpfc_hba *, struct lpfcMboxq *);
 	uint8_t mbox_flag;
-
+	struct lpfc_mcqe mcqe;
+	struct lpfc_mbx_nembed_sge_virt *sge_array;
 } LPFC_MBOXQ_t;
 
 #define MBX_POLL        1	/* poll mailbox till command done, then
@@ -230,10 +244,11 @@
 
 	/* Additional sli_flags */
 #define LPFC_SLI_MBOX_ACTIVE      0x100	/* HBA mailbox is currently active */
-#define LPFC_SLI2_ACTIVE          0x200	/* SLI2 overlay in firmware is active */
+#define LPFC_SLI_ACTIVE           0x200	/* SLI in firmware is active */
 #define LPFC_PROCESS_LA           0x400	/* Able to process link attention */
 #define LPFC_BLOCK_MGMT_IO        0x800	/* Don't allow mgmt mbx or iocb cmds */
 #define LPFC_MENLO_MAINT          0x1000 /* need for menl fw download */
+#define LPFC_SLI_ASYNC_MBX_BLK    0x2000 /* Async mailbox is blocked */
 
 	struct lpfc_sli_ring ring[LPFC_MAX_RING];
 	int fcp_ring;		/* ring used for FCP initiator commands */
@@ -261,6 +276,8 @@
 
 #define LPFC_MBOX_TMO           30	/* Sec tmo for outstanding mbox
 					   command */
+#define LPFC_MBOX_SLI4_CONFIG_TMO 60	/* Sec tmo for outstanding mbox
+					   command */
 #define LPFC_MBOX_TMO_FLASH_CMD 300     /* Sec tmo for outstanding FLASH write
 					 * or erase cmds. This is especially
 					 * long because of the potential of
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
new file mode 100644
index 0000000..5196b466
--- /dev/null
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -0,0 +1,467 @@
+/*******************************************************************
+ * This file is part of the Emulex Linux Device Driver for         *
+ * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2009 Emulex.  All rights reserved.                *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *******************************************************************/
+
+#define LPFC_ACTIVE_MBOX_WAIT_CNT               100
+#define LPFC_RELEASE_NOTIFICATION_INTERVAL	32
+#define LPFC_GET_QE_REL_INT			32
+#define LPFC_RPI_LOW_WATER_MARK			10
+/* Number of SGL entries can be posted in a 4KB nonembedded mbox command */
+#define LPFC_NEMBED_MBOX_SGL_CNT		254
+
+/* Multi-queue arrangement for fast-path FCP work queues */
+#define LPFC_FN_EQN_MAX       8
+#define LPFC_SP_EQN_DEF       1
+#define LPFC_FP_EQN_DEF       1
+#define LPFC_FP_EQN_MIN       1
+#define LPFC_FP_EQN_MAX       (LPFC_FN_EQN_MAX - LPFC_SP_EQN_DEF)
+
+#define LPFC_FN_WQN_MAX       32
+#define LPFC_SP_WQN_DEF       1
+#define LPFC_FP_WQN_DEF       4
+#define LPFC_FP_WQN_MIN       1
+#define LPFC_FP_WQN_MAX       (LPFC_FN_WQN_MAX - LPFC_SP_WQN_DEF)
+
+/*
+ * Provide the default FCF Record attributes used by the driver
+ * when nonFIP mode is configured and there is no other default
+ * FCF Record attributes.
+ */
+#define LPFC_FCOE_FCF_DEF_INDEX	0
+#define LPFC_FCOE_FCF_GET_FIRST	0xFFFF
+#define LPFC_FCOE_FCF_NEXT_NONE	0xFFFF
+
+/* First 3 bytes of default FCF MAC is specified by FC_MAP */
+#define LPFC_FCOE_FCF_MAC3	0xFF
+#define LPFC_FCOE_FCF_MAC4	0xFF
+#define LPFC_FCOE_FCF_MAC5	0xFE
+#define LPFC_FCOE_FCF_MAP0	0x0E
+#define LPFC_FCOE_FCF_MAP1	0xFC
+#define LPFC_FCOE_FCF_MAP2	0x00
+#define LPFC_FCOE_MAX_RCV_SIZE	0x5AC
+#define LPFC_FCOE_FKA_ADV_PER	0
+#define LPFC_FCOE_FIP_PRIORITY	0x80
+
+enum lpfc_sli4_queue_type {
+	LPFC_EQ,
+	LPFC_GCQ,
+	LPFC_MCQ,
+	LPFC_WCQ,
+	LPFC_RCQ,
+	LPFC_MQ,
+	LPFC_WQ,
+	LPFC_HRQ,
+	LPFC_DRQ
+};
+
+/* The queue sub-type defines the functional purpose of the queue */
+enum lpfc_sli4_queue_subtype {
+	LPFC_NONE,
+	LPFC_MBOX,
+	LPFC_FCP,
+	LPFC_ELS,
+	LPFC_USOL
+};
+
+union sli4_qe {
+	void *address;
+	struct lpfc_eqe *eqe;
+	struct lpfc_cqe *cqe;
+	struct lpfc_mcqe *mcqe;
+	struct lpfc_wcqe_complete *wcqe_complete;
+	struct lpfc_wcqe_release *wcqe_release;
+	struct sli4_wcqe_xri_aborted *wcqe_xri_aborted;
+	struct lpfc_rcqe_complete *rcqe_complete;
+	struct lpfc_mqe *mqe;
+	union  lpfc_wqe *wqe;
+	struct lpfc_rqe *rqe;
+};
+
+struct lpfc_queue {
+	struct list_head list;
+	enum lpfc_sli4_queue_type type;
+	enum lpfc_sli4_queue_subtype subtype;
+	struct lpfc_hba *phba;
+	struct list_head child_list;
+	uint32_t entry_count;	/* Number of entries to support on the queue */
+	uint32_t entry_size;	/* Size of each queue entry. */
+	uint32_t queue_id;	/* Queue ID assigned by the hardware */
+	struct list_head page_list;
+	uint32_t page_count;	/* Number of pages allocated for this queue */
+
+	uint32_t host_index;	/* The host's index for putting or getting */
+	uint32_t hba_index;	/* The last known hba index for get or put */
+	union sli4_qe qe[1];	/* array to index entries (must be last) */
+};
+
+struct lpfc_cq_event {
+	struct list_head list;
+	union {
+		struct lpfc_mcqe		mcqe_cmpl;
+		struct lpfc_acqe_link		acqe_link;
+		struct lpfc_acqe_fcoe		acqe_fcoe;
+		struct lpfc_acqe_dcbx		acqe_dcbx;
+		struct lpfc_rcqe		rcqe_cmpl;
+		struct sli4_wcqe_xri_aborted	wcqe_axri;
+	} cqe;
+};
+
+struct lpfc_sli4_link {
+	uint8_t speed;
+	uint8_t duplex;
+	uint8_t status;
+	uint8_t physical;
+	uint8_t fault;
+};
+
+struct lpfc_fcf {
+	uint8_t	 fabric_name[8];
+	uint8_t  mac_addr[6];
+	uint16_t fcf_indx;
+	uint16_t fcfi;
+	uint32_t fcf_flag;
+#define FCF_AVAILABLE	0x01 /* FCF available for discovery */
+#define FCF_REGISTERED	0x02 /* FCF registered with FW */
+#define FCF_DISCOVERED	0x04 /* FCF discovery started  */
+#define FCF_BOOT_ENABLE 0x08 /* Boot bios use this FCF */
+#define FCF_IN_USE	0x10 /* Atleast one discovery completed */
+#define FCF_VALID_VLAN	0x20 /* Use the vlan id specified */
+	uint32_t priority;
+	uint32_t addr_mode;
+	uint16_t vlan_id;
+};
+
+#define LPFC_REGION23_SIGNATURE "RG23"
+#define LPFC_REGION23_VERSION	1
+#define LPFC_REGION23_LAST_REC  0xff
+struct lpfc_fip_param_hdr {
+	uint8_t type;
+#define FCOE_PARAM_TYPE		0xA0
+	uint8_t length;
+#define FCOE_PARAM_LENGTH	2
+	uint8_t parm_version;
+#define FIPP_VERSION		0x01
+	uint8_t parm_flags;
+#define	lpfc_fip_param_hdr_fipp_mode_SHIFT	6
+#define	lpfc_fip_param_hdr_fipp_mode_MASK	0x3
+#define lpfc_fip_param_hdr_fipp_mode_WORD	parm_flags
+#define	FIPP_MODE_ON				0x2
+#define	FIPP_MODE_OFF				0x0
+#define FIPP_VLAN_VALID				0x1
+};
+
+struct lpfc_fcoe_params {
+	uint8_t fc_map[3];
+	uint8_t reserved1;
+	uint16_t vlan_tag;
+	uint8_t reserved[2];
+};
+
+struct lpfc_fcf_conn_hdr {
+	uint8_t type;
+#define FCOE_CONN_TBL_TYPE		0xA1
+	uint8_t length;   /* words */
+	uint8_t reserved[2];
+};
+
+struct lpfc_fcf_conn_rec {
+	uint16_t flags;
+#define	FCFCNCT_VALID		0x0001
+#define	FCFCNCT_BOOT		0x0002
+#define	FCFCNCT_PRIMARY		0x0004   /* if not set, Secondary */
+#define	FCFCNCT_FBNM_VALID	0x0008
+#define	FCFCNCT_SWNM_VALID	0x0010
+#define	FCFCNCT_VLAN_VALID	0x0020
+#define	FCFCNCT_AM_VALID	0x0040
+#define	FCFCNCT_AM_PREFERRED	0x0080   /* if not set, AM Required */
+#define	FCFCNCT_AM_SPMA		0x0100	 /* if not set, FPMA */
+
+	uint16_t vlan_tag;
+	uint8_t fabric_name[8];
+	uint8_t switch_name[8];
+};
+
+struct lpfc_fcf_conn_entry {
+	struct list_head list;
+	struct lpfc_fcf_conn_rec conn_rec;
+};
+
+/*
+ * Define the host's bootstrap mailbox.  This structure contains
+ * the member attributes needed to create, use, and destroy the
+ * bootstrap mailbox region.
+ *
+ * The macro definitions for the bmbx data structure are defined
+ * in lpfc_hw4.h with the register definition.
+ */
+struct lpfc_bmbx {
+	struct lpfc_dmabuf *dmabuf;
+	struct dma_address dma_address;
+	void *avirt;
+	dma_addr_t aphys;
+	uint32_t bmbx_size;
+};
+
+#define LPFC_EQE_SIZE LPFC_EQE_SIZE_4
+
+#define LPFC_EQE_SIZE_4B 	4
+#define LPFC_EQE_SIZE_16B	16
+#define LPFC_CQE_SIZE		16
+#define LPFC_WQE_SIZE		64
+#define LPFC_MQE_SIZE		256
+#define LPFC_RQE_SIZE		8
+
+#define LPFC_EQE_DEF_COUNT	1024
+#define LPFC_CQE_DEF_COUNT      256
+#define LPFC_WQE_DEF_COUNT      64
+#define LPFC_MQE_DEF_COUNT      16
+#define LPFC_RQE_DEF_COUNT	512
+
+#define LPFC_QUEUE_NOARM	false
+#define LPFC_QUEUE_REARM	true
+
+
+/*
+ * SLI4 CT field defines
+ */
+#define SLI4_CT_RPI 0
+#define SLI4_CT_VPI 1
+#define SLI4_CT_VFI 2
+#define SLI4_CT_FCFI 3
+
+#define LPFC_SLI4_MAX_SEGMENT_SIZE 0x10000
+
+/*
+ * SLI4 specific data structures
+ */
+struct lpfc_max_cfg_param {
+	uint16_t max_xri;
+	uint16_t xri_base;
+	uint16_t xri_used;
+	uint16_t max_rpi;
+	uint16_t rpi_base;
+	uint16_t rpi_used;
+	uint16_t max_vpi;
+	uint16_t vpi_base;
+	uint16_t vpi_used;
+	uint16_t max_vfi;
+	uint16_t vfi_base;
+	uint16_t vfi_used;
+	uint16_t max_fcfi;
+	uint16_t fcfi_base;
+	uint16_t fcfi_used;
+	uint16_t max_eq;
+	uint16_t max_rq;
+	uint16_t max_cq;
+	uint16_t max_wq;
+};
+
+struct lpfc_hba;
+/* SLI4 HBA multi-fcp queue handler struct */
+struct lpfc_fcp_eq_hdl {
+	uint32_t idx;
+	struct lpfc_hba *phba;
+};
+
+/* SLI4 HBA data structure entries */
+struct lpfc_sli4_hba {
+	void __iomem *conf_regs_memmap_p; /* Kernel memory mapped address for
+					     PCI BAR0, config space registers */
+	void __iomem *ctrl_regs_memmap_p; /* Kernel memory mapped address for
+					     PCI BAR1, control registers */
+	void __iomem *drbl_regs_memmap_p; /* Kernel memory mapped address for
+					     PCI BAR2, doorbell registers */
+	/* BAR0 PCI config space register memory map */
+	void __iomem *UERRLOregaddr; /* Address to UERR_STATUS_LO register */
+	void __iomem *UERRHIregaddr; /* Address to UERR_STATUS_HI register */
+	void __iomem *ONLINE0regaddr; /* Address to components of internal UE */
+	void __iomem *ONLINE1regaddr; /* Address to components of internal UE */
+#define LPFC_ONLINE_NERR	0xFFFFFFFF
+	void __iomem *SCRATCHPADregaddr; /* Address to scratchpad register */
+	/* BAR1 FCoE function CSR register memory map */
+	void __iomem *STAregaddr;    /* Address to HST_STATE register */
+	void __iomem *ISRregaddr;    /* Address to HST_ISR register */
+	void __iomem *IMRregaddr;    /* Address to HST_IMR register */
+	void __iomem *ISCRregaddr;   /* Address to HST_ISCR register */
+	/* BAR2 VF-0 doorbell register memory map */
+	void __iomem *RQDBregaddr;   /* Address to RQ_DOORBELL register */
+	void __iomem *WQDBregaddr;   /* Address to WQ_DOORBELL register */
+	void __iomem *EQCQDBregaddr; /* Address to EQCQ_DOORBELL register */
+	void __iomem *MQDBregaddr;   /* Address to MQ_DOORBELL register */
+	void __iomem *BMBXregaddr;   /* Address to BootStrap MBX register */
+
+	struct msix_entry *msix_entries;
+	uint32_t cfg_eqn;
+	struct lpfc_fcp_eq_hdl *fcp_eq_hdl; /* FCP per-WQ handle */
+	/* Pointers to the constructed SLI4 queues */
+	struct lpfc_queue **fp_eq; /* Fast-path event queue */
+	struct lpfc_queue *sp_eq;  /* Slow-path event queue */
+	struct lpfc_queue **fcp_wq;/* Fast-path FCP work queue */
+	struct lpfc_queue *mbx_wq; /* Slow-path MBOX work queue */
+	struct lpfc_queue *els_wq; /* Slow-path ELS work queue */
+	struct lpfc_queue *hdr_rq; /* Slow-path Header Receive queue */
+	struct lpfc_queue *dat_rq; /* Slow-path Data Receive queue */
+	struct lpfc_queue **fcp_cq;/* Fast-path FCP compl queue */
+	struct lpfc_queue *mbx_cq; /* Slow-path mailbox complete queue */
+	struct lpfc_queue *els_cq; /* Slow-path ELS response complete queue */
+	struct lpfc_queue *rxq_cq; /* Slow-path unsolicited complete queue */
+
+	/* Setup information for various queue parameters */
+	int eq_esize;
+	int eq_ecount;
+	int cq_esize;
+	int cq_ecount;
+	int wq_esize;
+	int wq_ecount;
+	int mq_esize;
+	int mq_ecount;
+	int rq_esize;
+	int rq_ecount;
+#define LPFC_SP_EQ_MAX_INTR_SEC         10000
+#define LPFC_FP_EQ_MAX_INTR_SEC         10000
+
+	uint32_t intr_enable;
+	struct lpfc_bmbx bmbx;
+	struct lpfc_max_cfg_param max_cfg_param;
+	uint16_t next_xri; /* last_xri - max_cfg_param.xri_base = used */
+	uint16_t next_rpi;
+	uint16_t scsi_xri_max;
+	uint16_t scsi_xri_cnt;
+	struct list_head lpfc_free_sgl_list;
+	struct list_head lpfc_sgl_list;
+	struct lpfc_sglq **lpfc_els_sgl_array;
+	struct list_head lpfc_abts_els_sgl_list;
+	struct lpfc_scsi_buf **lpfc_scsi_psb_array;
+	struct list_head lpfc_abts_scsi_buf_list;
+	uint32_t total_sglq_bufs;
+	struct lpfc_sglq **lpfc_sglq_active_list;
+	struct list_head lpfc_rpi_hdr_list;
+	unsigned long *rpi_bmask;
+	uint16_t rpi_count;
+	struct lpfc_sli4_flags sli4_flags;
+	struct list_head sp_rspiocb_work_queue;
+	struct list_head sp_cqe_event_pool;
+	struct list_head sp_asynce_work_queue;
+	struct list_head sp_fcp_xri_aborted_work_queue;
+	struct list_head sp_els_xri_aborted_work_queue;
+	struct list_head sp_unsol_work_queue;
+	struct lpfc_sli4_link link_state;
+	spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
+	spinlock_t abts_sgl_list_lock; /* list of aborted els IOs */
+};
+
+enum lpfc_sge_type {
+	GEN_BUFF_TYPE,
+	SCSI_BUFF_TYPE
+};
+
+struct lpfc_sglq {
+	/* lpfc_sglqs are used in double linked lists */
+	struct list_head list;
+	struct list_head clist;
+	enum lpfc_sge_type buff_type; /* is this a scsi sgl */
+	uint16_t iotag;         /* pre-assigned IO tag */
+	uint16_t sli4_xritag;   /* pre-assigned XRI, (OXID) tag. */
+	struct sli4_sge *sgl;	/* pre-assigned SGL */
+	void *virt;		/* virtual address. */
+	dma_addr_t phys;	/* physical address */
+};
+
+struct lpfc_rpi_hdr {
+	struct list_head list;
+	uint32_t len;
+	struct lpfc_dmabuf *dmabuf;
+	uint32_t page_count;
+	uint32_t start_rpi;
+};
+
+/*
+ * SLI4 specific function prototypes
+ */
+int lpfc_pci_function_reset(struct lpfc_hba *);
+int lpfc_sli4_hba_setup(struct lpfc_hba *);
+int lpfc_sli4_hba_down(struct lpfc_hba *);
+int lpfc_sli4_config(struct lpfc_hba *, struct lpfcMboxq *, uint8_t,
+		     uint8_t, uint32_t, bool);
+void lpfc_sli4_mbox_cmd_free(struct lpfc_hba *, struct lpfcMboxq *);
+void lpfc_sli4_mbx_sge_set(struct lpfcMboxq *, uint32_t, dma_addr_t, uint32_t);
+void lpfc_sli4_mbx_sge_get(struct lpfcMboxq *, uint32_t,
+			   struct lpfc_mbx_sge *);
+
+void lpfc_sli4_hba_reset(struct lpfc_hba *);
+struct lpfc_queue *lpfc_sli4_queue_alloc(struct lpfc_hba *, uint32_t,
+			uint32_t);
+void lpfc_sli4_queue_free(struct lpfc_queue *);
+uint32_t lpfc_eq_create(struct lpfc_hba *, struct lpfc_queue *, uint16_t);
+uint32_t lpfc_cq_create(struct lpfc_hba *, struct lpfc_queue *,
+			struct lpfc_queue *, uint32_t, uint32_t);
+uint32_t lpfc_mq_create(struct lpfc_hba *, struct lpfc_queue *,
+			struct lpfc_queue *, uint32_t);
+uint32_t lpfc_wq_create(struct lpfc_hba *, struct lpfc_queue *,
+			struct lpfc_queue *, uint32_t);
+uint32_t lpfc_rq_create(struct lpfc_hba *, struct lpfc_queue *,
+			struct lpfc_queue *, struct lpfc_queue *, uint32_t);
+uint32_t lpfc_eq_destroy(struct lpfc_hba *, struct lpfc_queue *);
+uint32_t lpfc_cq_destroy(struct lpfc_hba *, struct lpfc_queue *);
+uint32_t lpfc_mq_destroy(struct lpfc_hba *, struct lpfc_queue *);
+uint32_t lpfc_wq_destroy(struct lpfc_hba *, struct lpfc_queue *);
+uint32_t lpfc_rq_destroy(struct lpfc_hba *, struct lpfc_queue *,
+			 struct lpfc_queue *);
+int lpfc_sli4_queue_setup(struct lpfc_hba *);
+void lpfc_sli4_queue_unset(struct lpfc_hba *);
+int lpfc_sli4_post_sgl(struct lpfc_hba *, dma_addr_t, dma_addr_t, uint16_t);
+int lpfc_sli4_repost_scsi_sgl_list(struct lpfc_hba *);
+int lpfc_sli4_remove_all_sgl_pages(struct lpfc_hba *);
+uint16_t lpfc_sli4_next_xritag(struct lpfc_hba *);
+int lpfc_sli4_post_async_mbox(struct lpfc_hba *);
+int lpfc_sli4_post_sgl_list(struct lpfc_hba *phba);
+int lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *, struct list_head *, int);
+struct lpfc_cq_event *__lpfc_sli4_cq_event_alloc(struct lpfc_hba *);
+struct lpfc_cq_event *lpfc_sli4_cq_event_alloc(struct lpfc_hba *);
+void __lpfc_sli4_cq_event_release(struct lpfc_hba *, struct lpfc_cq_event *);
+void lpfc_sli4_cq_event_release(struct lpfc_hba *, struct lpfc_cq_event *);
+int lpfc_sli4_init_rpi_hdrs(struct lpfc_hba *);
+int lpfc_sli4_post_rpi_hdr(struct lpfc_hba *, struct lpfc_rpi_hdr *);
+int lpfc_sli4_post_all_rpi_hdrs(struct lpfc_hba *);
+struct lpfc_rpi_hdr *lpfc_sli4_create_rpi_hdr(struct lpfc_hba *);
+void lpfc_sli4_remove_rpi_hdrs(struct lpfc_hba *);
+int lpfc_sli4_alloc_rpi(struct lpfc_hba *);
+void lpfc_sli4_free_rpi(struct lpfc_hba *, int);
+void lpfc_sli4_remove_rpis(struct lpfc_hba *);
+void lpfc_sli4_async_event_proc(struct lpfc_hba *);
+int lpfc_sli4_resume_rpi(struct lpfc_nodelist *);
+void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *);
+void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *);
+void lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *,
+			       struct sli4_wcqe_xri_aborted *);
+void lpfc_sli4_els_xri_aborted(struct lpfc_hba *,
+			       struct sli4_wcqe_xri_aborted *);
+int lpfc_sli4_brdreset(struct lpfc_hba *);
+int lpfc_sli4_add_fcf_record(struct lpfc_hba *, struct fcf_record *);
+void lpfc_sli_remove_dflt_fcf(struct lpfc_hba *);
+int lpfc_sli4_get_els_iocb_cnt(struct lpfc_hba *);
+int lpfc_sli4_init_vpi(struct lpfc_hba *, uint16_t);
+uint32_t lpfc_sli4_cq_release(struct lpfc_queue *, bool);
+uint32_t lpfc_sli4_eq_release(struct lpfc_queue *, bool);
+void lpfc_sli4_fcfi_unreg(struct lpfc_hba *, uint16_t);
+int lpfc_sli4_read_fcf_record(struct lpfc_hba *, uint16_t);
+void lpfc_mbx_cmpl_read_fcf_record(struct lpfc_hba *, LPFC_MBOXQ_t *);
+int lpfc_sli4_post_status_check(struct lpfc_hba *);
+uint8_t lpfc_sli4_mbox_opcode_get(struct lpfc_hba *, struct lpfcMboxq *);
+
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index e599519..6b8a148 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -18,7 +18,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "8.3.1"
+#define LPFC_DRIVER_VERSION "8.3.2"
 
 #define LPFC_DRIVER_NAME		"lpfc"
 #define LPFC_SP_DRIVER_HANDLER_NAME	"lpfc:sp"
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index 917ad56..a6313ee 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -32,8 +32,10 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -89,6 +91,8 @@
 		vpi = 0;
 	else
 		set_bit(vpi, phba->vpi_bmask);
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		phba->sli4_hba.max_cfg_param.vpi_used++;
 	spin_unlock_irq(&phba->hbalock);
 	return vpi;
 }
@@ -96,8 +100,12 @@
 static void
 lpfc_free_vpi(struct lpfc_hba *phba, int vpi)
 {
+	if (vpi == 0)
+		return;
 	spin_lock_irq(&phba->hbalock);
 	clear_bit(vpi, phba->vpi_bmask);
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		phba->sli4_hba.max_cfg_param.vpi_used--;
 	spin_unlock_irq(&phba->hbalock);
 }
 
@@ -113,7 +121,7 @@
 	if (!pmb) {
 		return -ENOMEM;
 	}
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 
 	lpfc_read_sparam(phba, pmb, vport->vpi);
 	/*
@@ -243,23 +251,22 @@
 		    (vport->fc_flag & wait_flags)  ||
 		    ((vport->port_state > LPFC_VPORT_FAILED) &&
 		     (vport->port_state < LPFC_VPORT_READY))) {
-			lpfc_printf_log(phba, KERN_INFO, LOG_VPORT,
+			lpfc_printf_vlog(vport, KERN_INFO, LOG_VPORT,
 					"1833 Vport discovery quiesce Wait:"
-					" vpi x%x state x%x fc_flags x%x"
+					" state x%x fc_flags x%x"
 					" num_nodes x%x, waiting 1000 msecs"
 					" total wait msecs x%x\n",
-					vport->vpi, vport->port_state,
-					vport->fc_flag, vport->num_disc_nodes,
+					vport->port_state, vport->fc_flag,
+					vport->num_disc_nodes,
 					jiffies_to_msecs(jiffies - start_time));
 			msleep(1000);
 		} else {
 			/* Base case.  Wait variants satisfied.  Break out */
-			lpfc_printf_log(phba, KERN_INFO, LOG_VPORT,
+			lpfc_printf_vlog(vport, KERN_INFO, LOG_VPORT,
 					 "1834 Vport discovery quiesced:"
-					 " vpi x%x state x%x fc_flags x%x"
+					 " state x%x fc_flags x%x"
 					 " wait msecs x%x\n",
-					 vport->vpi, vport->port_state,
-					 vport->fc_flag,
+					 vport->port_state, vport->fc_flag,
 					 jiffies_to_msecs(jiffies
 						- start_time));
 			break;
@@ -267,12 +274,10 @@
 	}
 
 	if (time_after(jiffies, wait_time_max))
-		lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_VPORT,
 				"1835 Vport discovery quiesce failed:"
-				" vpi x%x state x%x fc_flags x%x"
-				" wait msecs x%x\n",
-				vport->vpi, vport->port_state,
-				vport->fc_flag,
+				" state x%x fc_flags x%x wait msecs x%x\n",
+				vport->port_state, vport->fc_flag,
 				jiffies_to_msecs(jiffies - start_time));
 }
 
@@ -308,6 +313,21 @@
 		goto error_out;
 	}
 
+	/*
+	 * In SLI4, the vpi must be activated before it can be used
+	 * by the port.
+	 */
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		rc = lpfc_sli4_init_vpi(phba, vpi);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
+					"1838 Failed to INIT_VPI on vpi %d "
+					"status %d\n", vpi, rc);
+			rc = VPORT_NORESOURCES;
+			lpfc_free_vpi(phba, vpi);
+			goto error_out;
+		}
+	}
 
 	/* Assign an unused board number */
 	if ((instance = lpfc_get_instance()) < 0) {
@@ -535,6 +555,16 @@
 				 "physical host\n");
 		return VPORT_ERROR;
 	}
+
+	/* If the vport is a static vport fail the deletion. */
+	if ((vport->vport_flag & STATIC_VPORT) &&
+		!(phba->pport->load_flag & FC_UNLOADING)) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_VPORT,
+				 "1837 vport_delete failed: Cannot delete "
+				 "static vport.\n");
+		return VPORT_ERROR;
+	}
+
 	/*
 	 * If we are not unloading the driver then prevent the vport_delete
 	 * from happening until after this vport's discovery is finished.
@@ -710,7 +740,7 @@
 	struct lpfc_vport *port_iterator;
 	struct lpfc_vport **vports;
 	int index = 0;
-	vports = kzalloc((phba->max_vpi + 1) * sizeof(struct lpfc_vport *),
+	vports = kzalloc((phba->max_vports + 1) * sizeof(struct lpfc_vport *),
 			 GFP_KERNEL);
 	if (vports == NULL)
 		return NULL;
@@ -734,7 +764,7 @@
 	int i;
 	if (vports == NULL)
 		return;
-	for (i=0; vports[i] != NULL && i <= phba->max_vpi; i++)
+	for (i = 0; vports[i] != NULL && i <= phba->max_vports; i++)
 		scsi_host_put(lpfc_shost_from_vport(vports[i]));
 	kfree(vports);
 }
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h
index 36b1d10..286c185 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.h
@@ -61,6 +61,7 @@
 #include <scsi/scsi_tcq.h>
 #include <scsi/scsi_transport_sas.h>
 #include <scsi/scsi_dbg.h>
+#include <scsi/scsi_eh.h>
 
 #include "mpt2sas_debug.h"
 
@@ -68,10 +69,10 @@
 #define MPT2SAS_DRIVER_NAME		"mpt2sas"
 #define MPT2SAS_AUTHOR	"LSI Corporation <DL-MPTFusionLinux@lsi.com>"
 #define MPT2SAS_DESCRIPTION	"LSI MPT Fusion SAS 2.0 Device Driver"
-#define MPT2SAS_DRIVER_VERSION		"01.100.02.00"
+#define MPT2SAS_DRIVER_VERSION		"01.100.03.00"
 #define MPT2SAS_MAJOR_VERSION		01
 #define MPT2SAS_MINOR_VERSION		100
-#define MPT2SAS_BUILD_VERSION		02
+#define MPT2SAS_BUILD_VERSION		03
 #define MPT2SAS_RELEASE_VERSION		00
 
 /*
diff --git a/drivers/scsi/mpt2sas/mpt2sas_ctl.c b/drivers/scsi/mpt2sas/mpt2sas_ctl.c
index ba6ab17..14e473d 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_ctl.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_ctl.c
@@ -473,7 +473,7 @@
 }
 
 /**
- * _ctl_do_task_abort - assign an active smid to the abort_task
+ * _ctl_set_task_mid - assign an active smid to tm request
  * @ioc: per adapter object
  * @karg - (struct mpt2_ioctl_command)
  * @tm_request - pointer to mf from user space
@@ -482,7 +482,7 @@
  * during failure, the reply frame is filled.
  */
 static int
-_ctl_do_task_abort(struct MPT2SAS_ADAPTER *ioc, struct mpt2_ioctl_command *karg,
+_ctl_set_task_mid(struct MPT2SAS_ADAPTER *ioc, struct mpt2_ioctl_command *karg,
     Mpi2SCSITaskManagementRequest_t *tm_request)
 {
 	u8 found = 0;
@@ -494,6 +494,14 @@
 	Mpi2SCSITaskManagementReply_t *tm_reply;
 	u32 sz;
 	u32 lun;
+	char *desc = NULL;
+
+	if (tm_request->TaskType == MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK)
+		desc = "abort_task";
+	else if (tm_request->TaskType == MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK)
+		desc = "query_task";
+	else
+		return 0;
 
 	lun = scsilun_to_int((struct scsi_lun *)tm_request->LUN);
 
@@ -517,13 +525,13 @@
 	spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
 
 	if (!found) {
-		dctlprintk(ioc, printk(MPT2SAS_DEBUG_FMT "ABORT_TASK: "
-		    "DevHandle(0x%04x), lun(%d), no active mid!!\n", ioc->name,
-		    tm_request->DevHandle, lun));
+		dctlprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s: "
+		    "handle(0x%04x), lun(%d), no active mid!!\n", ioc->name,
+		    desc, tm_request->DevHandle, lun));
 		tm_reply = ioc->ctl_cmds.reply;
 		tm_reply->DevHandle = tm_request->DevHandle;
 		tm_reply->Function = MPI2_FUNCTION_SCSI_TASK_MGMT;
-		tm_reply->TaskType = MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK;
+		tm_reply->TaskType = tm_request->TaskType;
 		tm_reply->MsgLength = sizeof(Mpi2SCSITaskManagementReply_t)/4;
 		tm_reply->VP_ID = tm_request->VP_ID;
 		tm_reply->VF_ID = tm_request->VF_ID;
@@ -535,9 +543,9 @@
 		return 1;
 	}
 
-	dctlprintk(ioc, printk(MPT2SAS_DEBUG_FMT "ABORT_TASK: "
-	    "DevHandle(0x%04x), lun(%d), smid(%d)\n", ioc->name,
-	    tm_request->DevHandle, lun, tm_request->TaskMID));
+	dctlprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s: "
+	    "handle(0x%04x), lun(%d), task_mid(%d)\n", ioc->name,
+	    desc, tm_request->DevHandle, lun, tm_request->TaskMID));
 	return 0;
 }
 
@@ -739,8 +747,10 @@
 		    (Mpi2SCSITaskManagementRequest_t *)mpi_request;
 
 		if (tm_request->TaskType ==
-		    MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK) {
-			if (_ctl_do_task_abort(ioc, &karg, tm_request)) {
+		    MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK ||
+		    tm_request->TaskType ==
+		    MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK) {
+			if (_ctl_set_task_mid(ioc, &karg, tm_request)) {
 				mpt2sas_base_free_smid(ioc, smid);
 				goto out;
 			}
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index e3a7967..2a01a5f 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -197,12 +197,12 @@
 MODULE_DEVICE_TABLE(pci, scsih_pci_table);
 
 /**
- * scsih_set_debug_level - global setting of ioc->logging_level.
+ * _scsih_set_debug_level - global setting of ioc->logging_level.
  *
  * Note: The logging levels are defined in mpt2sas_debug.h.
  */
 static int
-scsih_set_debug_level(const char *val, struct kernel_param *kp)
+_scsih_set_debug_level(const char *val, struct kernel_param *kp)
 {
 	int ret = param_set_int(val, kp);
 	struct MPT2SAS_ADAPTER *ioc;
@@ -215,7 +215,7 @@
 		ioc->logging_level = logging_level;
 	return 0;
 }
-module_param_call(logging_level, scsih_set_debug_level, param_get_int,
+module_param_call(logging_level, _scsih_set_debug_level, param_get_int,
     &logging_level, 0644);
 
 /**
@@ -884,6 +884,41 @@
 }
 
 /**
+ * _scsih_scsi_lookup_find_by_lun - search for matching channel:id:lun
+ * @ioc: per adapter object
+ * @id: target id
+ * @lun: lun number
+ * @channel: channel
+ * Context: This function will acquire ioc->scsi_lookup_lock.
+ *
+ * This will search for a matching channel:id:lun in the scsi_lookup array,
+ * returning 1 if found.
+ */
+static u8
+_scsih_scsi_lookup_find_by_lun(struct MPT2SAS_ADAPTER *ioc, int id,
+    unsigned int lun, int channel)
+{
+	u8 found;
+	unsigned long	flags;
+	int i;
+
+	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
+	found = 0;
+	for (i = 0 ; i < ioc->request_depth; i++) {
+		if (ioc->scsi_lookup[i].scmd &&
+		    (ioc->scsi_lookup[i].scmd->device->id == id &&
+		    ioc->scsi_lookup[i].scmd->device->channel == channel &&
+		    ioc->scsi_lookup[i].scmd->device->lun == lun)) {
+			found = 1;
+			goto out;
+		}
+	}
+ out:
+	spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
+	return found;
+}
+
+/**
  * _scsih_get_chain_buffer_dma - obtain block of chains (dma address)
  * @ioc: per adapter object
  * @smid: system request message index
@@ -1047,14 +1082,14 @@
 }
 
 /**
- * scsih_change_queue_depth - setting device queue depth
+ * _scsih_change_queue_depth - setting device queue depth
  * @sdev: scsi device struct
  * @qdepth: requested queue depth
  *
  * Returns queue depth.
  */
 static int
-scsih_change_queue_depth(struct scsi_device *sdev, int qdepth)
+_scsih_change_queue_depth(struct scsi_device *sdev, int qdepth)
 {
 	struct Scsi_Host *shost = sdev->host;
 	int max_depth;
@@ -1079,14 +1114,14 @@
 }
 
 /**
- * scsih_change_queue_depth - changing device queue tag type
+ * _scsih_change_queue_depth - changing device queue tag type
  * @sdev: scsi device struct
  * @tag_type: requested tag type
  *
  * Returns queue tag type.
  */
 static int
-scsih_change_queue_type(struct scsi_device *sdev, int tag_type)
+_scsih_change_queue_type(struct scsi_device *sdev, int tag_type)
 {
 	if (sdev->tagged_supported) {
 		scsi_set_tag_type(sdev, tag_type);
@@ -1101,14 +1136,14 @@
 }
 
 /**
- * scsih_target_alloc - target add routine
+ * _scsih_target_alloc - target add routine
  * @starget: scsi target struct
  *
  * Returns 0 if ok. Any other return is assumed to be an error and
  * the device is ignored.
  */
 static int
-scsih_target_alloc(struct scsi_target *starget)
+_scsih_target_alloc(struct scsi_target *starget)
 {
 	struct Scsi_Host *shost = dev_to_shost(&starget->dev);
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
@@ -1163,13 +1198,13 @@
 }
 
 /**
- * scsih_target_destroy - target destroy routine
+ * _scsih_target_destroy - target destroy routine
  * @starget: scsi target struct
  *
  * Returns nothing.
  */
 static void
-scsih_target_destroy(struct scsi_target *starget)
+_scsih_target_destroy(struct scsi_target *starget)
 {
 	struct Scsi_Host *shost = dev_to_shost(&starget->dev);
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
@@ -1212,14 +1247,14 @@
 }
 
 /**
- * scsih_slave_alloc - device add routine
+ * _scsih_slave_alloc - device add routine
  * @sdev: scsi device struct
  *
  * Returns 0 if ok. Any other return is assumed to be an error and
  * the device is ignored.
  */
 static int
-scsih_slave_alloc(struct scsi_device *sdev)
+_scsih_slave_alloc(struct scsi_device *sdev)
 {
 	struct Scsi_Host *shost;
 	struct MPT2SAS_ADAPTER *ioc;
@@ -1273,13 +1308,13 @@
 }
 
 /**
- * scsih_slave_destroy - device destroy routine
+ * _scsih_slave_destroy - device destroy routine
  * @sdev: scsi device struct
  *
  * Returns nothing.
  */
 static void
-scsih_slave_destroy(struct scsi_device *sdev)
+_scsih_slave_destroy(struct scsi_device *sdev)
 {
 	struct MPT2SAS_TARGET *sas_target_priv_data;
 	struct scsi_target *starget;
@@ -1295,13 +1330,13 @@
 }
 
 /**
- * scsih_display_sata_capabilities - sata capabilities
+ * _scsih_display_sata_capabilities - sata capabilities
  * @ioc: per adapter object
  * @sas_device: the sas_device object
  * @sdev: scsi device struct
  */
 static void
-scsih_display_sata_capabilities(struct MPT2SAS_ADAPTER *ioc,
+_scsih_display_sata_capabilities(struct MPT2SAS_ADAPTER *ioc,
     struct _sas_device *sas_device, struct scsi_device *sdev)
 {
 	Mpi2ConfigReply_t mpi_reply;
@@ -1401,14 +1436,14 @@
 }
 
 /**
- * scsih_slave_configure - device configure routine.
+ * _scsih_slave_configure - device configure routine.
  * @sdev: scsi device struct
  *
  * Returns 0 if ok. Any other return is assumed to be an error and
  * the device is ignored.
  */
 static int
-scsih_slave_configure(struct scsi_device *sdev)
+_scsih_slave_configure(struct scsi_device *sdev)
 {
 	struct Scsi_Host *shost = sdev->host;
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
@@ -1489,7 +1524,7 @@
 		    r_level, raid_device->handle,
 		    (unsigned long long)raid_device->wwid,
 		    raid_device->num_pds, ds);
-		scsih_change_queue_depth(sdev, qdepth);
+		_scsih_change_queue_depth(sdev, qdepth);
 		return 0;
 	}
 
@@ -1532,10 +1567,10 @@
 		    sas_device->slot);
 
 		if (!ssp_target)
-			scsih_display_sata_capabilities(ioc, sas_device, sdev);
+			_scsih_display_sata_capabilities(ioc, sas_device, sdev);
 	}
 
-	scsih_change_queue_depth(sdev, qdepth);
+	_scsih_change_queue_depth(sdev, qdepth);
 
 	if (ssp_target)
 		sas_read_port_mode_page(sdev);
@@ -1543,7 +1578,7 @@
 }
 
 /**
- * scsih_bios_param - fetch head, sector, cylinder info for a disk
+ * _scsih_bios_param - fetch head, sector, cylinder info for a disk
  * @sdev: scsi device struct
  * @bdev: pointer to block device context
  * @capacity: device size (in 512 byte sectors)
@@ -1555,7 +1590,7 @@
  * Return nothing.
  */
 static int
-scsih_bios_param(struct scsi_device *sdev, struct block_device *bdev,
+_scsih_bios_param(struct scsi_device *sdev, struct block_device *bdev,
     sector_t capacity, int params[])
 {
 	int		heads;
@@ -1636,7 +1671,7 @@
 }
 
 /**
- * scsih_tm_done - tm completion routine
+ * _scsih_tm_done - tm completion routine
  * @ioc: per adapter object
  * @smid: system request message index
  * @VF_ID: virtual function id
@@ -1648,7 +1683,7 @@
  * Return nothing.
  */
 static void
-scsih_tm_done(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 VF_ID, u32 reply)
+_scsih_tm_done(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 VF_ID, u32 reply)
 {
 	MPI2DefaultReply_t *mpi_reply;
 
@@ -1823,13 +1858,13 @@
 }
 
 /**
- * scsih_abort - eh threads main abort routine
+ * _scsih_abort - eh threads main abort routine
  * @sdev: scsi device struct
  *
  * Returns SUCCESS if command aborted else FAILED
  */
 static int
-scsih_abort(struct scsi_cmnd *scmd)
+_scsih_abort(struct scsi_cmnd *scmd)
 {
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
 	struct MPT2SAS_DEVICE *sas_device_priv_data;
@@ -1889,15 +1924,86 @@
 	return r;
 }
 
-
 /**
- * scsih_dev_reset - eh threads main device reset routine
+ * _scsih_dev_reset - eh threads main device reset routine
  * @sdev: scsi device struct
  *
  * Returns SUCCESS if command aborted else FAILED
  */
 static int
-scsih_dev_reset(struct scsi_cmnd *scmd)
+_scsih_dev_reset(struct scsi_cmnd *scmd)
+{
+	struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
+	struct MPT2SAS_DEVICE *sas_device_priv_data;
+	struct _sas_device *sas_device;
+	unsigned long flags;
+	u16	handle;
+	int r;
+
+	printk(MPT2SAS_INFO_FMT "attempting device reset! scmd(%p)\n",
+	    ioc->name, scmd);
+	scsi_print_command(scmd);
+
+	sas_device_priv_data = scmd->device->hostdata;
+	if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
+		printk(MPT2SAS_INFO_FMT "device been deleted! scmd(%p)\n",
+		    ioc->name, scmd);
+		scmd->result = DID_NO_CONNECT << 16;
+		scmd->scsi_done(scmd);
+		r = SUCCESS;
+		goto out;
+	}
+
+	/* for hidden raid components obtain the volume_handle */
+	handle = 0;
+	if (sas_device_priv_data->sas_target->flags &
+	    MPT_TARGET_FLAGS_RAID_COMPONENT) {
+		spin_lock_irqsave(&ioc->sas_device_lock, flags);
+		sas_device = _scsih_sas_device_find_by_handle(ioc,
+		   sas_device_priv_data->sas_target->handle);
+		if (sas_device)
+			handle = sas_device->volume_handle;
+		spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
+	} else
+		handle = sas_device_priv_data->sas_target->handle;
+
+	if (!handle) {
+		scmd->result = DID_RESET << 16;
+		r = FAILED;
+		goto out;
+	}
+
+	mutex_lock(&ioc->tm_cmds.mutex);
+	mpt2sas_scsih_issue_tm(ioc, handle, 0,
+	    MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET, scmd->device->lun,
+	    30);
+
+	/*
+	 *  sanity check see whether all commands to this device been
+	 *  completed
+	 */
+	if (_scsih_scsi_lookup_find_by_lun(ioc, scmd->device->id,
+	    scmd->device->lun, scmd->device->channel))
+		r = FAILED;
+	else
+		r = SUCCESS;
+	ioc->tm_cmds.status = MPT2_CMD_NOT_USED;
+	mutex_unlock(&ioc->tm_cmds.mutex);
+
+ out:
+	printk(MPT2SAS_INFO_FMT "device reset: %s scmd(%p)\n",
+	    ioc->name, ((r == SUCCESS) ? "SUCCESS" : "FAILED"), scmd);
+	return r;
+}
+
+/**
+ * _scsih_target_reset - eh threads main target reset routine
+ * @sdev: scsi device struct
+ *
+ * Returns SUCCESS if command aborted else FAILED
+ */
+static int
+_scsih_target_reset(struct scsi_cmnd *scmd)
 {
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
 	struct MPT2SAS_DEVICE *sas_device_priv_data;
@@ -1912,7 +2018,7 @@
 
 	sas_device_priv_data = scmd->device->hostdata;
 	if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
-		printk(MPT2SAS_INFO_FMT "device been deleted! scmd(%p)\n",
+		printk(MPT2SAS_INFO_FMT "target been deleted! scmd(%p)\n",
 		    ioc->name, scmd);
 		scmd->result = DID_NO_CONNECT << 16;
 		scmd->scsi_done(scmd);
@@ -1962,13 +2068,13 @@
 }
 
 /**
- * scsih_abort - eh threads main host reset routine
+ * _scsih_abort - eh threads main host reset routine
  * @sdev: scsi device struct
  *
  * Returns SUCCESS if command aborted else FAILED
  */
 static int
-scsih_host_reset(struct scsi_cmnd *scmd)
+_scsih_host_reset(struct scsi_cmnd *scmd)
 {
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
 	int r, retval;
@@ -2390,7 +2496,107 @@
 }
 
 /**
- * scsih_qcmd - main scsi request entry point
+ * _scsih_setup_eedp - setup MPI request for EEDP transfer
+ * @scmd: pointer to scsi command object
+ * @mpi_request: pointer to the SCSI_IO reqest message frame
+ *
+ * Supporting protection 1 and 3.
+ *
+ * Returns nothing
+ */
+static void
+_scsih_setup_eedp(struct scsi_cmnd *scmd, Mpi2SCSIIORequest_t *mpi_request)
+{
+	u16 eedp_flags;
+	unsigned char prot_op = scsi_get_prot_op(scmd);
+	unsigned char prot_type = scsi_get_prot_type(scmd);
+
+	if (prot_type == SCSI_PROT_DIF_TYPE0 ||
+	   prot_type == SCSI_PROT_DIF_TYPE2 ||
+	   prot_op == SCSI_PROT_NORMAL)
+		return;
+
+	if (prot_op ==  SCSI_PROT_READ_STRIP)
+		eedp_flags = MPI2_SCSIIO_EEDPFLAGS_CHECK_REMOVE_OP;
+	else if (prot_op ==  SCSI_PROT_WRITE_INSERT)
+		eedp_flags = MPI2_SCSIIO_EEDPFLAGS_INSERT_OP;
+	else
+		return;
+
+	mpi_request->EEDPBlockSize = scmd->device->sector_size;
+
+	switch (prot_type) {
+	case SCSI_PROT_DIF_TYPE1:
+
+		/*
+		* enable ref/guard checking
+		* auto increment ref tag
+		*/
+		mpi_request->EEDPFlags = eedp_flags |
+		    MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG |
+		    MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG |
+		    MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD;
+		mpi_request->CDB.EEDP32.PrimaryReferenceTag =
+		    cpu_to_be32(scsi_get_lba(scmd));
+
+		break;
+
+	case SCSI_PROT_DIF_TYPE3:
+
+		/*
+		* enable guard checking
+		*/
+		mpi_request->EEDPFlags = eedp_flags |
+		    MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD;
+
+		break;
+	}
+}
+
+/**
+ * _scsih_eedp_error_handling - return sense code for EEDP errors
+ * @scmd: pointer to scsi command object
+ * @ioc_status: ioc status
+ *
+ * Returns nothing
+ */
+static void
+_scsih_eedp_error_handling(struct scsi_cmnd *scmd, u16 ioc_status)
+{
+	u8 ascq;
+	u8 sk;
+	u8 host_byte;
+
+	switch (ioc_status) {
+	case MPI2_IOCSTATUS_EEDP_GUARD_ERROR:
+		ascq = 0x01;
+		break;
+	case MPI2_IOCSTATUS_EEDP_APP_TAG_ERROR:
+		ascq = 0x02;
+		break;
+	case MPI2_IOCSTATUS_EEDP_REF_TAG_ERROR:
+		ascq = 0x03;
+		break;
+	default:
+		ascq = 0x00;
+		break;
+	}
+
+	if (scmd->sc_data_direction == DMA_TO_DEVICE) {
+		sk = ILLEGAL_REQUEST;
+		host_byte = DID_ABORT;
+	} else {
+		sk = ABORTED_COMMAND;
+		host_byte = DID_OK;
+	}
+
+	scsi_build_sense_buffer(0, scmd->sense_buffer, sk, 0x10, ascq);
+	scmd->result = DRIVER_SENSE << 24 | (host_byte << 16) |
+	    SAM_STAT_CHECK_CONDITION;
+}
+
+/**
+ * _scsih_qcmd - main scsi request entry point
  * @scmd: pointer to scsi command object
  * @done: function pointer to be invoked on completion
  *
@@ -2401,7 +2607,7 @@
  * SCSI_MLQUEUE_HOST_BUSY if the entire host queue is full
  */
 static int
-scsih_qcmd(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *))
+_scsih_qcmd(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *))
 {
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
 	struct MPT2SAS_DEVICE *sas_device_priv_data;
@@ -2470,6 +2676,7 @@
 	}
 	mpi_request = mpt2sas_base_get_msg_frame(ioc, smid);
 	memset(mpi_request, 0, sizeof(Mpi2SCSIIORequest_t));
+	_scsih_setup_eedp(scmd, mpi_request);
 	mpi_request->Function = MPI2_FUNCTION_SCSI_IO_REQUEST;
 	if (sas_device_priv_data->sas_target->flags &
 	    MPT_TARGET_FLAGS_RAID_COMPONENT)
@@ -2604,6 +2811,15 @@
 	case MPI2_IOCSTATUS_SCSI_EXT_TERMINATED:
 		desc_ioc_state = "scsi ext terminated";
 		break;
+	case MPI2_IOCSTATUS_EEDP_GUARD_ERROR:
+		desc_ioc_state = "eedp guard error";
+		break;
+	case MPI2_IOCSTATUS_EEDP_REF_TAG_ERROR:
+		desc_ioc_state = "eedp ref tag error";
+		break;
+	case MPI2_IOCSTATUS_EEDP_APP_TAG_ERROR:
+		desc_ioc_state = "eedp app tag error";
+		break;
 	default:
 		desc_ioc_state = "unknown";
 		break;
@@ -2783,7 +2999,7 @@
 }
 
 /**
- * scsih_io_done - scsi request callback
+ * _scsih_io_done - scsi request callback
  * @ioc: per adapter object
  * @smid: system request message index
  * @VF_ID: virtual function id
@@ -2794,7 +3010,7 @@
  * Return nothing.
  */
 static void
-scsih_io_done(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 VF_ID, u32 reply)
+_scsih_io_done(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 VF_ID, u32 reply)
 {
 	Mpi2SCSIIORequest_t *mpi_request;
 	Mpi2SCSIIOReply_t *mpi_reply;
@@ -2939,6 +3155,11 @@
 			scmd->result = DID_RESET << 16;
 		break;
 
+	case MPI2_IOCSTATUS_EEDP_GUARD_ERROR:
+	case MPI2_IOCSTATUS_EEDP_REF_TAG_ERROR:
+	case MPI2_IOCSTATUS_EEDP_APP_TAG_ERROR:
+		_scsih_eedp_error_handling(scmd, ioc_status);
+		break;
 	case MPI2_IOCSTATUS_SCSI_PROTOCOL_ERROR:
 	case MPI2_IOCSTATUS_INVALID_FUNCTION:
 	case MPI2_IOCSTATUS_INVALID_SGL:
@@ -5130,18 +5351,19 @@
 	.module				= THIS_MODULE,
 	.name				= "Fusion MPT SAS Host",
 	.proc_name			= MPT2SAS_DRIVER_NAME,
-	.queuecommand			= scsih_qcmd,
-	.target_alloc			= scsih_target_alloc,
-	.slave_alloc			= scsih_slave_alloc,
-	.slave_configure		= scsih_slave_configure,
-	.target_destroy			= scsih_target_destroy,
-	.slave_destroy			= scsih_slave_destroy,
-	.change_queue_depth 		= scsih_change_queue_depth,
-	.change_queue_type		= scsih_change_queue_type,
-	.eh_abort_handler		= scsih_abort,
-	.eh_device_reset_handler	= scsih_dev_reset,
-	.eh_host_reset_handler		= scsih_host_reset,
-	.bios_param			= scsih_bios_param,
+	.queuecommand			= _scsih_qcmd,
+	.target_alloc			= _scsih_target_alloc,
+	.slave_alloc			= _scsih_slave_alloc,
+	.slave_configure		= _scsih_slave_configure,
+	.target_destroy			= _scsih_target_destroy,
+	.slave_destroy			= _scsih_slave_destroy,
+	.change_queue_depth 		= _scsih_change_queue_depth,
+	.change_queue_type		= _scsih_change_queue_type,
+	.eh_abort_handler		= _scsih_abort,
+	.eh_device_reset_handler	= _scsih_dev_reset,
+	.eh_target_reset_handler	= _scsih_target_reset,
+	.eh_host_reset_handler		= _scsih_host_reset,
+	.bios_param			= _scsih_bios_param,
 	.can_queue			= 1,
 	.this_id			= -1,
 	.sg_tablesize			= MPT2SAS_SG_DEPTH,
@@ -5228,13 +5450,13 @@
 }
 
 /**
- * scsih_remove - detach and remove add host
+ * _scsih_remove - detach and remove add host
  * @pdev: PCI device struct
  *
  * Return nothing.
  */
 static void __devexit
-scsih_remove(struct pci_dev *pdev)
+_scsih_remove(struct pci_dev *pdev)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
@@ -5442,14 +5664,14 @@
 }
 
 /**
- * scsih_probe - attach and add scsi host
+ * _scsih_probe - attach and add scsi host
  * @pdev: PCI device struct
  * @id: pci device id
  *
  * Returns 0 success, anything else error.
  */
 static int
-scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+_scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct MPT2SAS_ADAPTER *ioc;
 	struct Scsi_Host *shost;
@@ -5503,6 +5725,9 @@
 		goto out_add_shost_fail;
 	}
 
+	scsi_host_set_prot(shost, SHOST_DIF_TYPE1_PROTECTION
+	    | SHOST_DIF_TYPE3_PROTECTION);
+
 	/* event thread */
 	snprintf(ioc->firmware_event_name, sizeof(ioc->firmware_event_name),
 	    "fw_event%d", ioc->id);
@@ -5536,14 +5761,14 @@
 
 #ifdef CONFIG_PM
 /**
- * scsih_suspend - power management suspend main entry point
+ * _scsih_suspend - power management suspend main entry point
  * @pdev: PCI device struct
  * @state: PM state change to (usually PCI_D3)
  *
  * Returns 0 success, anything else error.
  */
 static int
-scsih_suspend(struct pci_dev *pdev, pm_message_t state)
+_scsih_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
@@ -5564,13 +5789,13 @@
 }
 
 /**
- * scsih_resume - power management resume main entry point
+ * _scsih_resume - power management resume main entry point
  * @pdev: PCI device struct
  *
  * Returns 0 success, anything else error.
  */
 static int
-scsih_resume(struct pci_dev *pdev)
+_scsih_resume(struct pci_dev *pdev)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
@@ -5599,22 +5824,22 @@
 static struct pci_driver scsih_driver = {
 	.name		= MPT2SAS_DRIVER_NAME,
 	.id_table	= scsih_pci_table,
-	.probe		= scsih_probe,
-	.remove		= __devexit_p(scsih_remove),
+	.probe		= _scsih_probe,
+	.remove		= __devexit_p(_scsih_remove),
 #ifdef CONFIG_PM
-	.suspend	= scsih_suspend,
-	.resume		= scsih_resume,
+	.suspend	= _scsih_suspend,
+	.resume		= _scsih_resume,
 #endif
 };
 
 
 /**
- * scsih_init - main entry point for this driver.
+ * _scsih_init - main entry point for this driver.
  *
  * Returns 0 success, anything else error.
  */
 static int __init
-scsih_init(void)
+_scsih_init(void)
 {
 	int error;
 
@@ -5630,10 +5855,10 @@
 	mpt2sas_base_initialize_callback_handler();
 
 	 /* queuecommand callback hander */
-	scsi_io_cb_idx = mpt2sas_base_register_callback_handler(scsih_io_done);
+	scsi_io_cb_idx = mpt2sas_base_register_callback_handler(_scsih_io_done);
 
 	/* task managment callback handler */
-	tm_cb_idx = mpt2sas_base_register_callback_handler(scsih_tm_done);
+	tm_cb_idx = mpt2sas_base_register_callback_handler(_scsih_tm_done);
 
 	/* base internal commands callback handler */
 	base_cb_idx = mpt2sas_base_register_callback_handler(mpt2sas_base_done);
@@ -5659,12 +5884,12 @@
 }
 
 /**
- * scsih_exit - exit point for this driver (when it is a module).
+ * _scsih_exit - exit point for this driver (when it is a module).
  *
  * Returns 0 success, anything else error.
  */
 static void __exit
-scsih_exit(void)
+_scsih_exit(void)
 {
 	printk(KERN_INFO "mpt2sas version %s unloading\n",
 	    MPT2SAS_DRIVER_VERSION);
@@ -5682,5 +5907,5 @@
 	mpt2sas_ctl_exit();
 }
 
-module_init(scsih_init);
-module_exit(scsih_exit);
+module_init(_scsih_init);
+module_exit(_scsih_exit);
diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c
index e03dc0b..686695b 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_transport.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c
@@ -264,7 +264,7 @@
 };
 
 /**
- * transport_expander_report_manufacture - obtain SMP report_manufacture
+ * _transport_expander_report_manufacture - obtain SMP report_manufacture
  * @ioc: per adapter object
  * @sas_address: expander sas address
  * @edev: the sas_expander_device object
@@ -274,7 +274,7 @@
  * Returns 0 for success, non-zero for failure.
  */
 static int
-transport_expander_report_manufacture(struct MPT2SAS_ADAPTER *ioc,
+_transport_expander_report_manufacture(struct MPT2SAS_ADAPTER *ioc,
     u64 sas_address, struct sas_expander_device *edev)
 {
 	Mpi2SmpPassthroughRequest_t *mpi_request;
@@ -578,7 +578,7 @@
 	    MPI2_SAS_DEVICE_INFO_EDGE_EXPANDER ||
 	    mpt2sas_port->remote_identify.device_type ==
 	    MPI2_SAS_DEVICE_INFO_FANOUT_EXPANDER)
-		transport_expander_report_manufacture(ioc,
+		_transport_expander_report_manufacture(ioc,
 		    mpt2sas_port->remote_identify.sas_address,
 		    rphy_to_expander_device(rphy));
 
@@ -852,7 +852,7 @@
 }
 
 /**
- * transport_get_linkerrors -
+ * _transport_get_linkerrors -
  * @phy: The sas phy object
  *
  * Only support sas_host direct attached phys.
@@ -860,7 +860,7 @@
  *
  */
 static int
-transport_get_linkerrors(struct sas_phy *phy)
+_transport_get_linkerrors(struct sas_phy *phy)
 {
 	struct MPT2SAS_ADAPTER *ioc = phy_to_ioc(phy);
 	struct _sas_phy *mpt2sas_phy;
@@ -903,14 +903,14 @@
 }
 
 /**
- * transport_get_enclosure_identifier -
+ * _transport_get_enclosure_identifier -
  * @phy: The sas phy object
  *
  * Obtain the enclosure logical id for an expander.
  * Returns 0 for success, non-zero for failure.
  */
 static int
-transport_get_enclosure_identifier(struct sas_rphy *rphy, u64 *identifier)
+_transport_get_enclosure_identifier(struct sas_rphy *rphy, u64 *identifier)
 {
 	struct MPT2SAS_ADAPTER *ioc = rphy_to_ioc(rphy);
 	struct _sas_node *sas_expander;
@@ -929,13 +929,13 @@
 }
 
 /**
- * transport_get_bay_identifier -
+ * _transport_get_bay_identifier -
  * @phy: The sas phy object
  *
  * Returns the slot id for a device that resides inside an enclosure.
  */
 static int
-transport_get_bay_identifier(struct sas_rphy *rphy)
+_transport_get_bay_identifier(struct sas_rphy *rphy)
 {
 	struct MPT2SAS_ADAPTER *ioc = rphy_to_ioc(rphy);
 	struct _sas_device *sas_device;
@@ -953,7 +953,7 @@
 }
 
 /**
- * transport_phy_reset -
+ * _transport_phy_reset -
  * @phy: The sas phy object
  * @hard_reset:
  *
@@ -961,7 +961,7 @@
  * Returns 0 for success, non-zero for failure.
  */
 static int
-transport_phy_reset(struct sas_phy *phy, int hard_reset)
+_transport_phy_reset(struct sas_phy *phy, int hard_reset)
 {
 	struct MPT2SAS_ADAPTER *ioc = phy_to_ioc(phy);
 	struct _sas_phy *mpt2sas_phy;
@@ -1002,7 +1002,7 @@
 }
 
 /**
- * transport_smp_handler - transport portal for smp passthru
+ * _transport_smp_handler - transport portal for smp passthru
  * @shost: shost object
  * @rphy: sas transport rphy object
  * @req:
@@ -1012,7 +1012,7 @@
  *           smp_rep_general /sys/class/bsg/expander-5:0
  */
 static int
-transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
+_transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
     struct request *req)
 {
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
@@ -1041,7 +1041,7 @@
 	if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) {
 		printk(MPT2SAS_ERR_FMT "%s: multiple segments req %u %u, "
 		    "rsp %u %u\n", ioc->name, __func__, req->bio->bi_vcnt,
-		    req->data_len, rsp->bio->bi_vcnt, rsp->data_len);
+		    blk_rq_bytes(req), rsp->bio->bi_vcnt, blk_rq_bytes(rsp));
 		return -EINVAL;
 	}
 
@@ -1104,7 +1104,7 @@
 	*((u64 *)&mpi_request->SASAddress) = (rphy) ?
 	    cpu_to_le64(rphy->identify.sas_address) :
 	    cpu_to_le64(ioc->sas_hba.sas_address);
-	mpi_request->RequestDataLength = cpu_to_le16(req->data_len - 4);
+	mpi_request->RequestDataLength = cpu_to_le16(blk_rq_bytes(req) - 4);
 	psge = &mpi_request->SGL;
 
 	/* WRITE sgel first */
@@ -1112,13 +1112,13 @@
 	    MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC);
 	sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
 	dma_addr_out = pci_map_single(ioc->pdev, bio_data(req->bio),
-	      req->data_len, PCI_DMA_BIDIRECTIONAL);
+		blk_rq_bytes(req), PCI_DMA_BIDIRECTIONAL);
 	if (!dma_addr_out) {
 		mpt2sas_base_free_smid(ioc, le16_to_cpu(smid));
 		goto unmap;
 	}
 
-	ioc->base_add_sg_single(psge, sgl_flags | (req->data_len - 4),
+	ioc->base_add_sg_single(psge, sgl_flags | (blk_rq_bytes(req) - 4),
 	    dma_addr_out);
 
 	/* incr sgel */
@@ -1129,14 +1129,14 @@
 	    MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER |
 	    MPI2_SGE_FLAGS_END_OF_LIST);
 	sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
-	dma_addr_in =  pci_map_single(ioc->pdev, bio_data(rsp->bio),
-	      rsp->data_len, PCI_DMA_BIDIRECTIONAL);
+	dma_addr_in = pci_map_single(ioc->pdev, bio_data(rsp->bio),
+				     blk_rq_bytes(rsp), PCI_DMA_BIDIRECTIONAL);
 	if (!dma_addr_in) {
 		mpt2sas_base_free_smid(ioc, le16_to_cpu(smid));
 		goto unmap;
 	}
 
-	ioc->base_add_sg_single(psge, sgl_flags | (rsp->data_len + 4),
+	ioc->base_add_sg_single(psge, sgl_flags | (blk_rq_bytes(rsp) + 4),
 	    dma_addr_in);
 
 	dtransportprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s - "
@@ -1170,9 +1170,8 @@
 
 		memcpy(req->sense, mpi_reply, sizeof(*mpi_reply));
 		req->sense_len = sizeof(*mpi_reply);
-		req->data_len = 0;
-		rsp->data_len -= mpi_reply->ResponseDataLength;
-
+		req->resid_len = 0;
+		rsp->resid_len -= mpi_reply->ResponseDataLength;
 	} else {
 		dtransportprintk(ioc, printk(MPT2SAS_DEBUG_FMT
 		    "%s - no reply\n", ioc->name, __func__));
@@ -1188,10 +1187,10 @@
 
  unmap:
 	if (dma_addr_out)
-		pci_unmap_single(ioc->pdev, dma_addr_out, req->data_len,
+		pci_unmap_single(ioc->pdev, dma_addr_out, blk_rq_bytes(req),
 		    PCI_DMA_BIDIRECTIONAL);
 	if (dma_addr_in)
-		pci_unmap_single(ioc->pdev, dma_addr_in, rsp->data_len,
+		pci_unmap_single(ioc->pdev, dma_addr_in, blk_rq_bytes(rsp),
 		    PCI_DMA_BIDIRECTIONAL);
 
  out:
@@ -1201,11 +1200,11 @@
 }
 
 struct sas_function_template mpt2sas_transport_functions = {
-	.get_linkerrors		= transport_get_linkerrors,
-	.get_enclosure_identifier = transport_get_enclosure_identifier,
-	.get_bay_identifier	= transport_get_bay_identifier,
-	.phy_reset		= transport_phy_reset,
-	.smp_handler		= transport_smp_handler,
+	.get_linkerrors		= _transport_get_linkerrors,
+	.get_enclosure_identifier = _transport_get_enclosure_identifier,
+	.get_bay_identifier	= _transport_get_bay_identifier,
+	.phy_reset		= _transport_phy_reset,
+	.smp_handler		= _transport_smp_handler,
 };
 
 struct scsi_transport_template *mpt2sas_transport_template;
diff --git a/drivers/scsi/mvsas.c b/drivers/scsi/mvsas.c
deleted file mode 100644
index e4acebd..0000000
--- a/drivers/scsi/mvsas.c
+++ /dev/null
@@ -1,3222 +0,0 @@
-/*
-	mvsas.c - Marvell 88SE6440 SAS/SATA support
-
-	Copyright 2007 Red Hat, Inc.
-	Copyright 2008 Marvell. <kewei@marvell.com>
-
-	This program is free software; you can redistribute it and/or
-	modify it under the terms of the GNU General Public License as
-	published by the Free Software Foundation; either version 2,
-	or (at your option) any later version.
-
-	This program is distributed in the hope that it will be useful,
-	but WITHOUT ANY WARRANTY; without even the implied warranty
-	of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-	See the GNU General Public License for more details.
-
-	You should have received a copy of the GNU General Public
-	License along with this program; see the file COPYING.	If not,
-	write to the Free Software Foundation, 675 Mass Ave, Cambridge,
-	MA 02139, USA.
-
-	---------------------------------------------------------------
-
-	Random notes:
-	* hardware supports controlling the endian-ness of data
-	  structures.  this permits elimination of all the le32_to_cpu()
-	  and cpu_to_le32() conversions.
-
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <linux/ctype.h>
-#include <scsi/libsas.h>
-#include <scsi/scsi_tcq.h>
-#include <scsi/sas_ata.h>
-#include <asm/io.h>
-
-#define DRV_NAME	"mvsas"
-#define DRV_VERSION	"0.5.2"
-#define _MV_DUMP	0
-#define MVS_DISABLE_NVRAM
-#define MVS_DISABLE_MSI
-
-#define mr32(reg)	readl(regs + MVS_##reg)
-#define mw32(reg,val)	writel((val), regs + MVS_##reg)
-#define mw32_f(reg,val)	do {			\
-	writel((val), regs + MVS_##reg);	\
-	readl(regs + MVS_##reg);		\
-	} while (0)
-
-#define MVS_ID_NOT_MAPPED	0x7f
-#define MVS_CHIP_SLOT_SZ	(1U << mvi->chip->slot_width)
-
-/* offset for D2H FIS in the Received FIS List Structure */
-#define SATA_RECEIVED_D2H_FIS(reg_set)	\
-	((void *) mvi->rx_fis + 0x400 + 0x100 * reg_set + 0x40)
-#define SATA_RECEIVED_PIO_FIS(reg_set)	\
-	((void *) mvi->rx_fis + 0x400 + 0x100 * reg_set + 0x20)
-#define UNASSOC_D2H_FIS(id)		\
-	((void *) mvi->rx_fis + 0x100 * id)
-
-#define for_each_phy(__lseq_mask, __mc, __lseq, __rest)			\
-	for ((__mc) = (__lseq_mask), (__lseq) = 0;			\
-					(__mc) != 0 && __rest;		\
-					(++__lseq), (__mc) >>= 1)
-
-/* driver compile-time configuration */
-enum driver_configuration {
-	MVS_TX_RING_SZ		= 1024,	/* TX ring size (12-bit) */
-	MVS_RX_RING_SZ		= 1024, /* RX ring size (12-bit) */
-					/* software requires power-of-2
-					   ring size */
-
-	MVS_SLOTS		= 512,	/* command slots */
-	MVS_SLOT_BUF_SZ		= 8192, /* cmd tbl + IU + status + PRD */
-	MVS_SSP_CMD_SZ		= 64,	/* SSP command table buffer size */
-	MVS_ATA_CMD_SZ		= 96,	/* SATA command table buffer size */
-	MVS_OAF_SZ		= 64,	/* Open address frame buffer size */
-
-	MVS_RX_FIS_COUNT	= 17,	/* Optional rx'd FISs (max 17) */
-
-	MVS_QUEUE_SIZE		= 30,	/* Support Queue depth */
-	MVS_CAN_QUEUE		= MVS_SLOTS - 1,	/* SCSI Queue depth */
-};
-
-/* unchangeable hardware details */
-enum hardware_details {
-	MVS_MAX_PHYS		= 8,	/* max. possible phys */
-	MVS_MAX_PORTS		= 8,	/* max. possible ports */
-	MVS_RX_FISL_SZ		= 0x400 + (MVS_RX_FIS_COUNT * 0x100),
-};
-
-/* peripheral registers (BAR2) */
-enum peripheral_registers {
-	SPI_CTL			= 0x10,	/* EEPROM control */
-	SPI_CMD			= 0x14,	/* EEPROM command */
-	SPI_DATA		= 0x18, /* EEPROM data */
-};
-
-enum peripheral_register_bits {
-	TWSI_RDY		= (1U << 7),	/* EEPROM interface ready */
-	TWSI_RD			= (1U << 4),	/* EEPROM read access */
-
-	SPI_ADDR_MASK		= 0x3ffff,	/* bits 17:0 */
-};
-
-/* enhanced mode registers (BAR4) */
-enum hw_registers {
-	MVS_GBL_CTL		= 0x04,  /* global control */
-	MVS_GBL_INT_STAT	= 0x08,  /* global irq status */
-	MVS_GBL_PI		= 0x0C,  /* ports implemented bitmask */
-	MVS_GBL_PORT_TYPE	= 0xa0,  /* port type */
-
-	MVS_CTL			= 0x100, /* SAS/SATA port configuration */
-	MVS_PCS			= 0x104, /* SAS/SATA port control/status */
-	MVS_CMD_LIST_LO		= 0x108, /* cmd list addr */
-	MVS_CMD_LIST_HI		= 0x10C,
-	MVS_RX_FIS_LO		= 0x110, /* RX FIS list addr */
-	MVS_RX_FIS_HI		= 0x114,
-
-	MVS_TX_CFG		= 0x120, /* TX configuration */
-	MVS_TX_LO		= 0x124, /* TX (delivery) ring addr */
-	MVS_TX_HI		= 0x128,
-
-	MVS_TX_PROD_IDX		= 0x12C, /* TX producer pointer */
-	MVS_TX_CONS_IDX		= 0x130, /* TX consumer pointer (RO) */
-	MVS_RX_CFG		= 0x134, /* RX configuration */
-	MVS_RX_LO		= 0x138, /* RX (completion) ring addr */
-	MVS_RX_HI		= 0x13C,
-	MVS_RX_CONS_IDX		= 0x140, /* RX consumer pointer (RO) */
-
-	MVS_INT_COAL		= 0x148, /* Int coalescing config */
-	MVS_INT_COAL_TMOUT	= 0x14C, /* Int coalescing timeout */
-	MVS_INT_STAT		= 0x150, /* Central int status */
-	MVS_INT_MASK		= 0x154, /* Central int enable */
-	MVS_INT_STAT_SRS	= 0x158, /* SATA register set status */
-	MVS_INT_MASK_SRS	= 0x15C,
-
-					 /* ports 1-3 follow after this */
-	MVS_P0_INT_STAT		= 0x160, /* port0 interrupt status */
-	MVS_P0_INT_MASK		= 0x164, /* port0 interrupt mask */
-	MVS_P4_INT_STAT		= 0x200, /* Port 4 interrupt status */
-	MVS_P4_INT_MASK		= 0x204, /* Port 4 interrupt enable mask */
-
-					 /* ports 1-3 follow after this */
-	MVS_P0_SER_CTLSTAT	= 0x180, /* port0 serial control/status */
-	MVS_P4_SER_CTLSTAT	= 0x220, /* port4 serial control/status */
-
-	MVS_CMD_ADDR		= 0x1B8, /* Command register port (addr) */
-	MVS_CMD_DATA		= 0x1BC, /* Command register port (data) */
-
-					 /* ports 1-3 follow after this */
-	MVS_P0_CFG_ADDR		= 0x1C0, /* port0 phy register address */
-	MVS_P0_CFG_DATA		= 0x1C4, /* port0 phy register data */
-	MVS_P4_CFG_ADDR		= 0x230, /* Port 4 config address */
-	MVS_P4_CFG_DATA		= 0x234, /* Port 4 config data */
-
-					 /* ports 1-3 follow after this */
-	MVS_P0_VSR_ADDR		= 0x1E0, /* port0 VSR address */
-	MVS_P0_VSR_DATA		= 0x1E4, /* port0 VSR data */
-	MVS_P4_VSR_ADDR		= 0x250, /* port 4 VSR addr */
-	MVS_P4_VSR_DATA		= 0x254, /* port 4 VSR data */
-};
-
-enum hw_register_bits {
-	/* MVS_GBL_CTL */
-	INT_EN			= (1U << 1),	/* Global int enable */
-	HBA_RST			= (1U << 0),	/* HBA reset */
-
-	/* MVS_GBL_INT_STAT */
-	INT_XOR			= (1U << 4),	/* XOR engine event */
-	INT_SAS_SATA		= (1U << 0),	/* SAS/SATA event */
-
-	/* MVS_GBL_PORT_TYPE */			/* shl for ports 1-3 */
-	SATA_TARGET		= (1U << 16),	/* port0 SATA target enable */
-	MODE_AUTO_DET_PORT7 = (1U << 15),	/* port0 SAS/SATA autodetect */
-	MODE_AUTO_DET_PORT6 = (1U << 14),
-	MODE_AUTO_DET_PORT5 = (1U << 13),
-	MODE_AUTO_DET_PORT4 = (1U << 12),
-	MODE_AUTO_DET_PORT3 = (1U << 11),
-	MODE_AUTO_DET_PORT2 = (1U << 10),
-	MODE_AUTO_DET_PORT1 = (1U << 9),
-	MODE_AUTO_DET_PORT0 = (1U << 8),
-	MODE_AUTO_DET_EN    =	MODE_AUTO_DET_PORT0 | MODE_AUTO_DET_PORT1 |
-				MODE_AUTO_DET_PORT2 | MODE_AUTO_DET_PORT3 |
-				MODE_AUTO_DET_PORT4 | MODE_AUTO_DET_PORT5 |
-				MODE_AUTO_DET_PORT6 | MODE_AUTO_DET_PORT7,
-	MODE_SAS_PORT7_MASK = (1U << 7),  /* port0 SAS(1), SATA(0) mode */
-	MODE_SAS_PORT6_MASK = (1U << 6),
-	MODE_SAS_PORT5_MASK = (1U << 5),
-	MODE_SAS_PORT4_MASK = (1U << 4),
-	MODE_SAS_PORT3_MASK = (1U << 3),
-	MODE_SAS_PORT2_MASK = (1U << 2),
-	MODE_SAS_PORT1_MASK = (1U << 1),
-	MODE_SAS_PORT0_MASK = (1U << 0),
-	MODE_SAS_SATA	=	MODE_SAS_PORT0_MASK | MODE_SAS_PORT1_MASK |
-				MODE_SAS_PORT2_MASK | MODE_SAS_PORT3_MASK |
-				MODE_SAS_PORT4_MASK | MODE_SAS_PORT5_MASK |
-				MODE_SAS_PORT6_MASK | MODE_SAS_PORT7_MASK,
-
-				/* SAS_MODE value may be
-				 * dictated (in hw) by values
-				 * of SATA_TARGET & AUTO_DET
-				 */
-
-	/* MVS_TX_CFG */
-	TX_EN			= (1U << 16),	/* Enable TX */
-	TX_RING_SZ_MASK		= 0xfff,	/* TX ring size, bits 11:0 */
-
-	/* MVS_RX_CFG */
-	RX_EN			= (1U << 16),	/* Enable RX */
-	RX_RING_SZ_MASK		= 0xfff,	/* RX ring size, bits 11:0 */
-
-	/* MVS_INT_COAL */
-	COAL_EN			= (1U << 16),	/* Enable int coalescing */
-
-	/* MVS_INT_STAT, MVS_INT_MASK */
-	CINT_I2C		= (1U << 31),	/* I2C event */
-	CINT_SW0		= (1U << 30),	/* software event 0 */
-	CINT_SW1		= (1U << 29),	/* software event 1 */
-	CINT_PRD_BC		= (1U << 28),	/* PRD BC err for read cmd */
-	CINT_DMA_PCIE		= (1U << 27),	/* DMA to PCIE timeout */
-	CINT_MEM		= (1U << 26),	/* int mem parity err */
-	CINT_I2C_SLAVE		= (1U << 25),	/* slave I2C event */
-	CINT_SRS		= (1U << 3),	/* SRS event */
-	CINT_CI_STOP		= (1U << 1),	/* cmd issue stopped */
-	CINT_DONE		= (1U << 0),	/* cmd completion */
-
-						/* shl for ports 1-3 */
-	CINT_PORT_STOPPED	= (1U << 16),	/* port0 stopped */
-	CINT_PORT		= (1U << 8),	/* port0 event */
-	CINT_PORT_MASK_OFFSET	= 8,
-	CINT_PORT_MASK		= (0xFF << CINT_PORT_MASK_OFFSET),
-
-	/* TX (delivery) ring bits */
-	TXQ_CMD_SHIFT		= 29,
-	TXQ_CMD_SSP		= 1,		/* SSP protocol */
-	TXQ_CMD_SMP		= 2,		/* SMP protocol */
-	TXQ_CMD_STP		= 3,		/* STP/SATA protocol */
-	TXQ_CMD_SSP_FREE_LIST	= 4,		/* add to SSP targ free list */
-	TXQ_CMD_SLOT_RESET	= 7,		/* reset command slot */
-	TXQ_MODE_I		= (1U << 28),	/* mode: 0=target,1=initiator */
-	TXQ_PRIO_HI		= (1U << 27),	/* priority: 0=normal, 1=high */
-	TXQ_SRS_SHIFT		= 20,		/* SATA register set */
-	TXQ_SRS_MASK		= 0x7f,
-	TXQ_PHY_SHIFT		= 12,		/* PHY bitmap */
-	TXQ_PHY_MASK		= 0xff,
-	TXQ_SLOT_MASK		= 0xfff,	/* slot number */
-
-	/* RX (completion) ring bits */
-	RXQ_GOOD		= (1U << 23),	/* Response good */
-	RXQ_SLOT_RESET		= (1U << 21),	/* Slot reset complete */
-	RXQ_CMD_RX		= (1U << 20),	/* target cmd received */
-	RXQ_ATTN		= (1U << 19),	/* attention */
-	RXQ_RSP			= (1U << 18),	/* response frame xfer'd */
-	RXQ_ERR			= (1U << 17),	/* err info rec xfer'd */
-	RXQ_DONE		= (1U << 16),	/* cmd complete */
-	RXQ_SLOT_MASK		= 0xfff,	/* slot number */
-
-	/* mvs_cmd_hdr bits */
-	MCH_PRD_LEN_SHIFT	= 16,		/* 16-bit PRD table len */
-	MCH_SSP_FR_TYPE_SHIFT	= 13,		/* SSP frame type */
-
-						/* SSP initiator only */
-	MCH_SSP_FR_CMD		= 0x0,		/* COMMAND frame */
-
-						/* SSP initiator or target */
-	MCH_SSP_FR_TASK		= 0x1,		/* TASK frame */
-
-						/* SSP target only */
-	MCH_SSP_FR_XFER_RDY	= 0x4,		/* XFER_RDY frame */
-	MCH_SSP_FR_RESP		= 0x5,		/* RESPONSE frame */
-	MCH_SSP_FR_READ		= 0x6,		/* Read DATA frame(s) */
-	MCH_SSP_FR_READ_RESP	= 0x7,		/* ditto, plus RESPONSE */
-
-	MCH_PASSTHRU		= (1U << 12),	/* pass-through (SSP) */
-	MCH_FBURST		= (1U << 11),	/* first burst (SSP) */
-	MCH_CHK_LEN		= (1U << 10),	/* chk xfer len (SSP) */
-	MCH_RETRY		= (1U << 9),	/* tport layer retry (SSP) */
-	MCH_PROTECTION		= (1U << 8),	/* protection info rec (SSP) */
-	MCH_RESET		= (1U << 7),	/* Reset (STP/SATA) */
-	MCH_FPDMA		= (1U << 6),	/* First party DMA (STP/SATA) */
-	MCH_ATAPI		= (1U << 5),	/* ATAPI (STP/SATA) */
-	MCH_BIST		= (1U << 4),	/* BIST activate (STP/SATA) */
-	MCH_PMP_MASK		= 0xf,		/* PMP from cmd FIS (STP/SATA)*/
-
-	CCTL_RST		= (1U << 5),	/* port logic reset */
-
-						/* 0(LSB first), 1(MSB first) */
-	CCTL_ENDIAN_DATA	= (1U << 3),	/* PRD data */
-	CCTL_ENDIAN_RSP		= (1U << 2),	/* response frame */
-	CCTL_ENDIAN_OPEN	= (1U << 1),	/* open address frame */
-	CCTL_ENDIAN_CMD		= (1U << 0),	/* command table */
-
-	/* MVS_Px_SER_CTLSTAT (per-phy control) */
-	PHY_SSP_RST		= (1U << 3),	/* reset SSP link layer */
-	PHY_BCAST_CHG		= (1U << 2),	/* broadcast(change) notif */
-	PHY_RST_HARD		= (1U << 1),	/* hard reset + phy reset */
-	PHY_RST			= (1U << 0),	/* phy reset */
-	PHY_MIN_SPP_PHYS_LINK_RATE_MASK = (0xF << 8),
-	PHY_MAX_SPP_PHYS_LINK_RATE_MASK = (0xF << 12),
-	PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET = (16),
-	PHY_NEG_SPP_PHYS_LINK_RATE_MASK =
-			(0xF << PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET),
-	PHY_READY_MASK		= (1U << 20),
-
-	/* MVS_Px_INT_STAT, MVS_Px_INT_MASK (per-phy events) */
-	PHYEV_DEC_ERR		= (1U << 24),	/* Phy Decoding Error */
-	PHYEV_UNASSOC_FIS	= (1U << 19),	/* unassociated FIS rx'd */
-	PHYEV_AN		= (1U << 18),	/* SATA async notification */
-	PHYEV_BIST_ACT		= (1U << 17),	/* BIST activate FIS */
-	PHYEV_SIG_FIS		= (1U << 16),	/* signature FIS */
-	PHYEV_POOF		= (1U << 12),	/* phy ready from 1 -> 0 */
-	PHYEV_IU_BIG		= (1U << 11),	/* IU too long err */
-	PHYEV_IU_SMALL		= (1U << 10),	/* IU too short err */
-	PHYEV_UNK_TAG		= (1U << 9),	/* unknown tag */
-	PHYEV_BROAD_CH		= (1U << 8),	/* broadcast(CHANGE) */
-	PHYEV_COMWAKE		= (1U << 7),	/* COMWAKE rx'd */
-	PHYEV_PORT_SEL		= (1U << 6),	/* port selector present */
-	PHYEV_HARD_RST		= (1U << 5),	/* hard reset rx'd */
-	PHYEV_ID_TMOUT		= (1U << 4),	/* identify timeout */
-	PHYEV_ID_FAIL		= (1U << 3),	/* identify failed */
-	PHYEV_ID_DONE		= (1U << 2),	/* identify done */
-	PHYEV_HARD_RST_DONE	= (1U << 1),	/* hard reset done */
-	PHYEV_RDY_CH		= (1U << 0),	/* phy ready changed state */
-
-	/* MVS_PCS */
-	PCS_EN_SATA_REG_SHIFT	= (16),		/* Enable SATA Register Set */
-	PCS_EN_PORT_XMT_SHIFT	= (12),		/* Enable Port Transmit */
-	PCS_EN_PORT_XMT_SHIFT2	= (8),		/* For 6480 */
-	PCS_SATA_RETRY		= (1U << 8),	/* retry ctl FIS on R_ERR */
-	PCS_RSP_RX_EN		= (1U << 7),	/* raw response rx */
-	PCS_SELF_CLEAR		= (1U << 5),	/* self-clearing int mode */
-	PCS_FIS_RX_EN		= (1U << 4),	/* FIS rx enable */
-	PCS_CMD_STOP_ERR	= (1U << 3),	/* cmd stop-on-err enable */
-	PCS_CMD_RST		= (1U << 1),	/* reset cmd issue */
-	PCS_CMD_EN		= (1U << 0),	/* enable cmd issue */
-
-	/* Port n Attached Device Info */
-	PORT_DEV_SSP_TRGT	= (1U << 19),
-	PORT_DEV_SMP_TRGT	= (1U << 18),
-	PORT_DEV_STP_TRGT	= (1U << 17),
-	PORT_DEV_SSP_INIT	= (1U << 11),
-	PORT_DEV_SMP_INIT	= (1U << 10),
-	PORT_DEV_STP_INIT	= (1U << 9),
-	PORT_PHY_ID_MASK	= (0xFFU << 24),
-	PORT_DEV_TRGT_MASK	= (0x7U << 17),
-	PORT_DEV_INIT_MASK	= (0x7U << 9),
-	PORT_DEV_TYPE_MASK	= (0x7U << 0),
-
-	/* Port n PHY Status */
-	PHY_RDY			= (1U << 2),
-	PHY_DW_SYNC		= (1U << 1),
-	PHY_OOB_DTCTD		= (1U << 0),
-
-	/* VSR */
-	/* PHYMODE 6 (CDB) */
-	PHY_MODE6_LATECLK	= (1U << 29),	/* Lock Clock */
-	PHY_MODE6_DTL_SPEED	= (1U << 27),	/* Digital Loop Speed */
-	PHY_MODE6_FC_ORDER	= (1U << 26),	/* Fibre Channel Mode Order*/
-	PHY_MODE6_MUCNT_EN	= (1U << 24),	/* u Count Enable */
-	PHY_MODE6_SEL_MUCNT_LEN	= (1U << 22),	/* Training Length Select */
-	PHY_MODE6_SELMUPI	= (1U << 20),	/* Phase Multi Select (init) */
-	PHY_MODE6_SELMUPF	= (1U << 18),	/* Phase Multi Select (final) */
-	PHY_MODE6_SELMUFF	= (1U << 16),	/* Freq Loop Multi Sel(final) */
-	PHY_MODE6_SELMUFI	= (1U << 14),	/* Freq Loop Multi Sel(init) */
-	PHY_MODE6_FREEZE_LOOP	= (1U << 12),	/* Freeze Rx CDR Loop */
-	PHY_MODE6_INT_RXFOFFS	= (1U << 3),	/* Rx CDR Freq Loop Enable */
-	PHY_MODE6_FRC_RXFOFFS	= (1U << 2),	/* Initial Rx CDR Offset */
-	PHY_MODE6_STAU_0D8	= (1U << 1),	/* Rx CDR Freq Loop Saturate */
-	PHY_MODE6_RXSAT_DIS	= (1U << 0),	/* Saturate Ctl */
-};
-
-enum mvs_info_flags {
-	MVF_MSI			= (1U << 0),	/* MSI is enabled */
-	MVF_PHY_PWR_FIX		= (1U << 1),	/* bug workaround */
-};
-
-enum sas_cmd_port_registers {
-	CMD_CMRST_OOB_DET	= 0x100, /* COMRESET OOB detect register */
-	CMD_CMWK_OOB_DET	= 0x104, /* COMWAKE OOB detect register */
-	CMD_CMSAS_OOB_DET	= 0x108, /* COMSAS OOB detect register */
-	CMD_BRST_OOB_DET	= 0x10c, /* burst OOB detect register */
-	CMD_OOB_SPACE		= 0x110, /* OOB space control register */
-	CMD_OOB_BURST		= 0x114, /* OOB burst control register */
-	CMD_PHY_TIMER		= 0x118, /* PHY timer control register */
-	CMD_PHY_CONFIG0		= 0x11c, /* PHY config register 0 */
-	CMD_PHY_CONFIG1		= 0x120, /* PHY config register 1 */
-	CMD_SAS_CTL0		= 0x124, /* SAS control register 0 */
-	CMD_SAS_CTL1		= 0x128, /* SAS control register 1 */
-	CMD_SAS_CTL2		= 0x12c, /* SAS control register 2 */
-	CMD_SAS_CTL3		= 0x130, /* SAS control register 3 */
-	CMD_ID_TEST		= 0x134, /* ID test register */
-	CMD_PL_TIMER		= 0x138, /* PL timer register */
-	CMD_WD_TIMER		= 0x13c, /* WD timer register */
-	CMD_PORT_SEL_COUNT	= 0x140, /* port selector count register */
-	CMD_APP_MEM_CTL		= 0x144, /* Application Memory Control */
-	CMD_XOR_MEM_CTL		= 0x148, /* XOR Block Memory Control */
-	CMD_DMA_MEM_CTL		= 0x14c, /* DMA Block Memory Control */
-	CMD_PORT_MEM_CTL0	= 0x150, /* Port Memory Control 0 */
-	CMD_PORT_MEM_CTL1	= 0x154, /* Port Memory Control 1 */
-	CMD_SATA_PORT_MEM_CTL0	= 0x158, /* SATA Port Memory Control 0 */
-	CMD_SATA_PORT_MEM_CTL1	= 0x15c, /* SATA Port Memory Control 1 */
-	CMD_XOR_MEM_BIST_CTL	= 0x160, /* XOR Memory BIST Control */
-	CMD_XOR_MEM_BIST_STAT	= 0x164, /* XOR Memroy BIST Status */
-	CMD_DMA_MEM_BIST_CTL	= 0x168, /* DMA Memory BIST Control */
-	CMD_DMA_MEM_BIST_STAT	= 0x16c, /* DMA Memory BIST Status */
-	CMD_PORT_MEM_BIST_CTL	= 0x170, /* Port Memory BIST Control */
-	CMD_PORT_MEM_BIST_STAT0 = 0x174, /* Port Memory BIST Status 0 */
-	CMD_PORT_MEM_BIST_STAT1 = 0x178, /* Port Memory BIST Status 1 */
-	CMD_STP_MEM_BIST_CTL	= 0x17c, /* STP Memory BIST Control */
-	CMD_STP_MEM_BIST_STAT0	= 0x180, /* STP Memory BIST Status 0 */
-	CMD_STP_MEM_BIST_STAT1	= 0x184, /* STP Memory BIST Status 1 */
-	CMD_RESET_COUNT		= 0x188, /* Reset Count */
-	CMD_MONTR_DATA_SEL	= 0x18C, /* Monitor Data/Select */
-	CMD_PLL_PHY_CONFIG	= 0x190, /* PLL/PHY Configuration */
-	CMD_PHY_CTL		= 0x194, /* PHY Control and Status */
-	CMD_PHY_TEST_COUNT0	= 0x198, /* Phy Test Count 0 */
-	CMD_PHY_TEST_COUNT1	= 0x19C, /* Phy Test Count 1 */
-	CMD_PHY_TEST_COUNT2	= 0x1A0, /* Phy Test Count 2 */
-	CMD_APP_ERR_CONFIG	= 0x1A4, /* Application Error Configuration */
-	CMD_PND_FIFO_CTL0	= 0x1A8, /* Pending FIFO Control 0 */
-	CMD_HOST_CTL		= 0x1AC, /* Host Control Status */
-	CMD_HOST_WR_DATA	= 0x1B0, /* Host Write Data */
-	CMD_HOST_RD_DATA	= 0x1B4, /* Host Read Data */
-	CMD_PHY_MODE_21		= 0x1B8, /* Phy Mode 21 */
-	CMD_SL_MODE0		= 0x1BC, /* SL Mode 0 */
-	CMD_SL_MODE1		= 0x1C0, /* SL Mode 1 */
-	CMD_PND_FIFO_CTL1	= 0x1C4, /* Pending FIFO Control 1 */
-};
-
-/* SAS/SATA configuration port registers, aka phy registers */
-enum sas_sata_config_port_regs {
-	PHYR_IDENTIFY		= 0x00,	/* info for IDENTIFY frame */
-	PHYR_ADDR_LO		= 0x04,	/* my SAS address (low) */
-	PHYR_ADDR_HI		= 0x08,	/* my SAS address (high) */
-	PHYR_ATT_DEV_INFO	= 0x0C,	/* attached device info */
-	PHYR_ATT_ADDR_LO	= 0x10,	/* attached dev SAS addr (low) */
-	PHYR_ATT_ADDR_HI	= 0x14,	/* attached dev SAS addr (high) */
-	PHYR_SATA_CTL		= 0x18,	/* SATA control */
-	PHYR_PHY_STAT		= 0x1C,	/* PHY status */
-	PHYR_SATA_SIG0		= 0x20,	/*port SATA signature FIS(Byte 0-3) */
-	PHYR_SATA_SIG1		= 0x24,	/*port SATA signature FIS(Byte 4-7) */
-	PHYR_SATA_SIG2		= 0x28,	/*port SATA signature FIS(Byte 8-11) */
-	PHYR_SATA_SIG3		= 0x2c,	/*port SATA signature FIS(Byte 12-15) */
-	PHYR_R_ERR_COUNT	= 0x30, /* port R_ERR count register */
-	PHYR_CRC_ERR_COUNT	= 0x34, /* port CRC error count register */
-	PHYR_WIDE_PORT		= 0x38,	/* wide port participating */
-	PHYR_CURRENT0		= 0x80,	/* current connection info 0 */
-	PHYR_CURRENT1		= 0x84,	/* current connection info 1 */
-	PHYR_CURRENT2		= 0x88,	/* current connection info 2 */
-};
-
-/*  SAS/SATA Vendor Specific Port Registers */
-enum sas_sata_vsp_regs {
-	VSR_PHY_STAT		= 0x00, /* Phy Status */
-	VSR_PHY_MODE1		= 0x01, /* phy tx */
-	VSR_PHY_MODE2		= 0x02, /* tx scc */
-	VSR_PHY_MODE3		= 0x03, /* pll */
-	VSR_PHY_MODE4		= 0x04, /* VCO */
-	VSR_PHY_MODE5		= 0x05, /* Rx */
-	VSR_PHY_MODE6		= 0x06, /* CDR */
-	VSR_PHY_MODE7		= 0x07, /* Impedance */
-	VSR_PHY_MODE8		= 0x08, /* Voltage */
-	VSR_PHY_MODE9		= 0x09, /* Test */
-	VSR_PHY_MODE10		= 0x0A, /* Power */
-	VSR_PHY_MODE11		= 0x0B, /* Phy Mode */
-	VSR_PHY_VS0		= 0x0C, /* Vednor Specific 0 */
-	VSR_PHY_VS1		= 0x0D, /* Vednor Specific 1 */
-};
-
-enum pci_cfg_registers {
-	PCR_PHY_CTL	= 0x40,
-	PCR_PHY_CTL2	= 0x90,
-	PCR_DEV_CTRL	= 0xE8,
-};
-
-enum pci_cfg_register_bits {
-	PCTL_PWR_ON	= (0xFU << 24),
-	PCTL_OFF	= (0xFU << 12),
-	PRD_REQ_SIZE	= (0x4000),
-	PRD_REQ_MASK	= (0x00007000),
-};
-
-enum nvram_layout_offsets {
-	NVR_SIG		= 0x00,		/* 0xAA, 0x55 */
-	NVR_SAS_ADDR	= 0x02,		/* 8-byte SAS address */
-};
-
-enum chip_flavors {
-	chip_6320,
-	chip_6440,
-	chip_6480,
-};
-
-enum port_type {
-	PORT_TYPE_SAS	=  (1L << 1),
-	PORT_TYPE_SATA	=  (1L << 0),
-};
-
-/* Command Table Format */
-enum ct_format {
-	/* SSP */
-	SSP_F_H		=  0x00,
-	SSP_F_IU	=  0x18,
-	SSP_F_MAX	=  0x4D,
-	/* STP */
-	STP_CMD_FIS	=  0x00,
-	STP_ATAPI_CMD	=  0x40,
-	STP_F_MAX	=  0x10,
-	/* SMP */
-	SMP_F_T		=  0x00,
-	SMP_F_DEP	=  0x01,
-	SMP_F_MAX	=  0x101,
-};
-
-enum status_buffer {
-	SB_EIR_OFF	=  0x00,	/* Error Information Record */
-	SB_RFB_OFF	=  0x08,	/* Response Frame Buffer */
-	SB_RFB_MAX	=  0x400,	/* RFB size*/
-};
-
-enum error_info_rec {
-	CMD_ISS_STPD	= (1U << 31),	/* Cmd Issue Stopped */
-	CMD_PI_ERR	= (1U << 30),	/* Protection info error.  see flags2 */
-	RSP_OVER	= (1U << 29),	/* rsp buffer overflow */
-	RETRY_LIM	= (1U << 28),	/* FIS/frame retry limit exceeded */
-	UNK_FIS 	= (1U << 27),	/* unknown FIS */
-	DMA_TERM	= (1U << 26),	/* DMA terminate primitive rx'd */
-	SYNC_ERR	= (1U << 25),	/* SYNC rx'd during frame xmit */
-	TFILE_ERR	= (1U << 24),	/* SATA taskfile Error bit set */
-	R_ERR		= (1U << 23),	/* SATA returned R_ERR prim */
-	RD_OFS		= (1U << 20),	/* Read DATA frame invalid offset */
-	XFER_RDY_OFS	= (1U << 19),	/* XFER_RDY offset error */
-	UNEXP_XFER_RDY	= (1U << 18),	/* unexpected XFER_RDY error */
-	DATA_OVER_UNDER = (1U << 16),	/* data overflow/underflow */
-	INTERLOCK	= (1U << 15),	/* interlock error */
-	NAK		= (1U << 14),	/* NAK rx'd */
-	ACK_NAK_TO	= (1U << 13),	/* ACK/NAK timeout */
-	CXN_CLOSED	= (1U << 12),	/* cxn closed w/out ack/nak */
-	OPEN_TO 	= (1U << 11),	/* I_T nexus lost, open cxn timeout */
-	PATH_BLOCKED	= (1U << 10),	/* I_T nexus lost, pathway blocked */
-	NO_DEST 	= (1U << 9),	/* I_T nexus lost, no destination */
-	STP_RES_BSY	= (1U << 8),	/* STP resources busy */
-	BREAK		= (1U << 7),	/* break received */
-	BAD_DEST	= (1U << 6),	/* bad destination */
-	BAD_PROTO	= (1U << 5),	/* protocol not supported */
-	BAD_RATE	= (1U << 4),	/* cxn rate not supported */
-	WRONG_DEST	= (1U << 3),	/* wrong destination error */
-	CREDIT_TO	= (1U << 2),	/* credit timeout */
-	WDOG_TO 	= (1U << 1),	/* watchdog timeout */
-	BUF_PAR 	= (1U << 0),	/* buffer parity error */
-};
-
-enum error_info_rec_2 {
-	SLOT_BSY_ERR	= (1U << 31),	/* Slot Busy Error */
-	GRD_CHK_ERR	= (1U << 14),	/* Guard Check Error */
-	APP_CHK_ERR	= (1U << 13),	/* Application Check error */
-	REF_CHK_ERR	= (1U << 12),	/* Reference Check Error */
-	USR_BLK_NM	= (1U << 0),	/* User Block Number */
-};
-
-struct mvs_chip_info {
-	u32		n_phy;
-	u32		srs_sz;
-	u32		slot_width;
-};
-
-struct mvs_err_info {
-	__le32			flags;
-	__le32			flags2;
-};
-
-struct mvs_prd {
-	__le64			addr;		/* 64-bit buffer address */
-	__le32			reserved;
-	__le32			len;		/* 16-bit length */
-};
-
-struct mvs_cmd_hdr {
-	__le32			flags;		/* PRD tbl len; SAS, SATA ctl */
-	__le32			lens;		/* cmd, max resp frame len */
-	__le32			tags;		/* targ port xfer tag; tag */
-	__le32			data_len;	/* data xfer len */
-	__le64			cmd_tbl;	/* command table address */
-	__le64			open_frame;	/* open addr frame address */
-	__le64			status_buf;	/* status buffer address */
-	__le64			prd_tbl;	/* PRD tbl address */
-	__le32			reserved[4];
-};
-
-struct mvs_port {
-	struct asd_sas_port	sas_port;
-	u8			port_attached;
-	u8			taskfileset;
-	u8			wide_port_phymap;
-	struct list_head	list;
-};
-
-struct mvs_phy {
-	struct mvs_port		*port;
-	struct asd_sas_phy	sas_phy;
-	struct sas_identify	identify;
-	struct scsi_device	*sdev;
-	u64		dev_sas_addr;
-	u64		att_dev_sas_addr;
-	u32		att_dev_info;
-	u32		dev_info;
-	u32		phy_type;
-	u32		phy_status;
-	u32		irq_status;
-	u32		frame_rcvd_size;
-	u8		frame_rcvd[32];
-	u8		phy_attached;
-	enum sas_linkrate	minimum_linkrate;
-	enum sas_linkrate	maximum_linkrate;
-};
-
-struct mvs_slot_info {
-	struct list_head	list;
-	struct sas_task		*task;
-	u32			n_elem;
-	u32			tx;
-
-	/* DMA buffer for storing cmd tbl, open addr frame, status buffer,
-	 * and PRD table
-	 */
-	void			*buf;
-	dma_addr_t		buf_dma;
-#if _MV_DUMP
-	u32			cmd_size;
-#endif
-
-	void			*response;
-	struct mvs_port		*port;
-};
-
-struct mvs_info {
-	unsigned long		flags;
-
-	spinlock_t		lock;		/* host-wide lock */
-	struct pci_dev		*pdev;		/* our device */
-	void __iomem		*regs;		/* enhanced mode registers */
-	void __iomem		*peri_regs;	/* peripheral registers */
-
-	u8			sas_addr[SAS_ADDR_SIZE];
-	struct sas_ha_struct	sas;		/* SCSI/SAS glue */
-	struct Scsi_Host	*shost;
-
-	__le32			*tx;		/* TX (delivery) DMA ring */
-	dma_addr_t		tx_dma;
-	u32			tx_prod;	/* cached next-producer idx */
-
-	__le32			*rx;		/* RX (completion) DMA ring */
-	dma_addr_t		rx_dma;
-	u32			rx_cons;	/* RX consumer idx */
-
-	__le32			*rx_fis;	/* RX'd FIS area */
-	dma_addr_t		rx_fis_dma;
-
-	struct mvs_cmd_hdr	*slot;	/* DMA command header slots */
-	dma_addr_t		slot_dma;
-
-	const struct mvs_chip_info *chip;
-
-	u8			tags[MVS_SLOTS];
-	struct mvs_slot_info	slot_info[MVS_SLOTS];
-				/* further per-slot information */
-	struct mvs_phy		phy[MVS_MAX_PHYS];
-	struct mvs_port		port[MVS_MAX_PHYS];
-#ifdef MVS_USE_TASKLET
-	struct tasklet_struct	tasklet;
-#endif
-};
-
-static int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
-			   void *funcdata);
-static u32 mvs_read_phy_ctl(struct mvs_info *mvi, u32 port);
-static void mvs_write_phy_ctl(struct mvs_info *mvi, u32 port, u32 val);
-static u32 mvs_read_port_irq_stat(struct mvs_info *mvi, u32 port);
-static void mvs_write_port_irq_stat(struct mvs_info *mvi, u32 port, u32 val);
-static void mvs_write_port_irq_mask(struct mvs_info *mvi, u32 port, u32 val);
-static u32 mvs_read_port_irq_mask(struct mvs_info *mvi, u32 port);
-
-static u32 mvs_is_phy_ready(struct mvs_info *mvi, int i);
-static void mvs_detect_porttype(struct mvs_info *mvi, int i);
-static void mvs_update_phyinfo(struct mvs_info *mvi, int i, int get_st);
-static void mvs_release_task(struct mvs_info *mvi, int phy_no);
-
-static int mvs_scan_finished(struct Scsi_Host *, unsigned long);
-static void mvs_scan_start(struct Scsi_Host *);
-static int mvs_slave_configure(struct scsi_device *sdev);
-
-static struct scsi_transport_template *mvs_stt;
-
-static const struct mvs_chip_info mvs_chips[] = {
-	[chip_6320] =		{ 2, 16, 9  },
-	[chip_6440] =		{ 4, 16, 9  },
-	[chip_6480] =		{ 8, 32, 10 },
-};
-
-static struct scsi_host_template mvs_sht = {
-	.module			= THIS_MODULE,
-	.name			= DRV_NAME,
-	.queuecommand		= sas_queuecommand,
-	.target_alloc		= sas_target_alloc,
-	.slave_configure	= mvs_slave_configure,
-	.slave_destroy		= sas_slave_destroy,
-	.scan_finished		= mvs_scan_finished,
-	.scan_start		= mvs_scan_start,
-	.change_queue_depth	= sas_change_queue_depth,
-	.change_queue_type	= sas_change_queue_type,
-	.bios_param		= sas_bios_param,
-	.can_queue		= 1,
-	.cmd_per_lun		= 1,
-	.this_id		= -1,
-	.sg_tablesize		= SG_ALL,
-	.max_sectors		= SCSI_DEFAULT_MAX_SECTORS,
-	.use_clustering		= ENABLE_CLUSTERING,
-	.eh_device_reset_handler	= sas_eh_device_reset_handler,
-	.eh_bus_reset_handler	= sas_eh_bus_reset_handler,
-	.slave_alloc		= sas_slave_alloc,
-	.target_destroy		= sas_target_destroy,
-	.ioctl			= sas_ioctl,
-};
-
-static void mvs_hexdump(u32 size, u8 *data, u32 baseaddr)
-{
-	u32 i;
-	u32 run;
-	u32 offset;
-
-	offset = 0;
-	while (size) {
-		printk("%08X : ", baseaddr + offset);
-		if (size >= 16)
-			run = 16;
-		else
-			run = size;
-		size -= run;
-		for (i = 0; i < 16; i++) {
-			if (i < run)
-				printk("%02X ", (u32)data[i]);
-			else
-				printk("   ");
-		}
-		printk(": ");
-		for (i = 0; i < run; i++)
-			printk("%c", isalnum(data[i]) ? data[i] : '.');
-		printk("\n");
-		data = &data[16];
-		offset += run;
-	}
-	printk("\n");
-}
-
-#if _MV_DUMP
-static void mvs_hba_sb_dump(struct mvs_info *mvi, u32 tag,
-				   enum sas_protocol proto)
-{
-	u32 offset;
-	struct pci_dev *pdev = mvi->pdev;
-	struct mvs_slot_info *slot = &mvi->slot_info[tag];
-
-	offset = slot->cmd_size + MVS_OAF_SZ +
-	    sizeof(struct mvs_prd) * slot->n_elem;
-	dev_printk(KERN_DEBUG, &pdev->dev, "+---->Status buffer[%d] :\n",
-			tag);
-	mvs_hexdump(32, (u8 *) slot->response,
-		    (u32) slot->buf_dma + offset);
-}
-#endif
-
-static void mvs_hba_memory_dump(struct mvs_info *mvi, u32 tag,
-				enum sas_protocol proto)
-{
-#if _MV_DUMP
-	u32 sz, w_ptr;
-	u64 addr;
-	void __iomem *regs = mvi->regs;
-	struct pci_dev *pdev = mvi->pdev;
-	struct mvs_slot_info *slot = &mvi->slot_info[tag];
-
-	/*Delivery Queue */
-	sz = mr32(TX_CFG) & TX_RING_SZ_MASK;
-	w_ptr = slot->tx;
-	addr = mr32(TX_HI) << 16 << 16 | mr32(TX_LO);
-	dev_printk(KERN_DEBUG, &pdev->dev,
-		"Delivery Queue Size=%04d , WRT_PTR=%04X\n", sz, w_ptr);
-	dev_printk(KERN_DEBUG, &pdev->dev,
-		"Delivery Queue Base Address=0x%llX (PA)"
-		"(tx_dma=0x%llX), Entry=%04d\n",
-		addr, mvi->tx_dma, w_ptr);
-	mvs_hexdump(sizeof(u32), (u8 *)(&mvi->tx[mvi->tx_prod]),
-			(u32) mvi->tx_dma + sizeof(u32) * w_ptr);
-	/*Command List */
-	addr = mvi->slot_dma;
-	dev_printk(KERN_DEBUG, &pdev->dev,
-		"Command List Base Address=0x%llX (PA)"
-		"(slot_dma=0x%llX), Header=%03d\n",
-		addr, slot->buf_dma, tag);
-	dev_printk(KERN_DEBUG, &pdev->dev, "Command Header[%03d]:\n", tag);
-	/*mvs_cmd_hdr */
-	mvs_hexdump(sizeof(struct mvs_cmd_hdr), (u8 *)(&mvi->slot[tag]),
-		(u32) mvi->slot_dma + tag * sizeof(struct mvs_cmd_hdr));
-	/*1.command table area */
-	dev_printk(KERN_DEBUG, &pdev->dev, "+---->Command Table :\n");
-	mvs_hexdump(slot->cmd_size, (u8 *) slot->buf, (u32) slot->buf_dma);
-	/*2.open address frame area */
-	dev_printk(KERN_DEBUG, &pdev->dev, "+---->Open Address Frame :\n");
-	mvs_hexdump(MVS_OAF_SZ, (u8 *) slot->buf + slot->cmd_size,
-				(u32) slot->buf_dma + slot->cmd_size);
-	/*3.status buffer */
-	mvs_hba_sb_dump(mvi, tag, proto);
-	/*4.PRD table */
-	dev_printk(KERN_DEBUG, &pdev->dev, "+---->PRD table :\n");
-	mvs_hexdump(sizeof(struct mvs_prd) * slot->n_elem,
-		(u8 *) slot->buf + slot->cmd_size + MVS_OAF_SZ,
-		(u32) slot->buf_dma + slot->cmd_size + MVS_OAF_SZ);
-#endif
-}
-
-static void mvs_hba_cq_dump(struct mvs_info *mvi)
-{
-#if (_MV_DUMP > 2)
-	u64 addr;
-	void __iomem *regs = mvi->regs;
-	struct pci_dev *pdev = mvi->pdev;
-	u32 entry = mvi->rx_cons + 1;
-	u32 rx_desc = le32_to_cpu(mvi->rx[entry]);
-
-	/*Completion Queue */
-	addr = mr32(RX_HI) << 16 << 16 | mr32(RX_LO);
-	dev_printk(KERN_DEBUG, &pdev->dev, "Completion Task = 0x%p\n",
-		   mvi->slot_info[rx_desc & RXQ_SLOT_MASK].task);
-	dev_printk(KERN_DEBUG, &pdev->dev,
-		"Completion List Base Address=0x%llX (PA), "
-		"CQ_Entry=%04d, CQ_WP=0x%08X\n",
-		addr, entry - 1, mvi->rx[0]);
-	mvs_hexdump(sizeof(u32), (u8 *)(&rx_desc),
-		    mvi->rx_dma + sizeof(u32) * entry);
-#endif
-}
-
-static void mvs_hba_interrupt_enable(struct mvs_info *mvi)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp;
-
-	tmp = mr32(GBL_CTL);
-
-	mw32(GBL_CTL, tmp | INT_EN);
-}
-
-static void mvs_hba_interrupt_disable(struct mvs_info *mvi)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp;
-
-	tmp = mr32(GBL_CTL);
-
-	mw32(GBL_CTL, tmp & ~INT_EN);
-}
-
-static int mvs_int_rx(struct mvs_info *mvi, bool self_clear);
-
-/* move to PCI layer or libata core? */
-static int pci_go_64(struct pci_dev *pdev)
-{
-	int rc;
-
-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-		if (rc) {
-			rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-			if (rc) {
-				dev_printk(KERN_ERR, &pdev->dev,
-					   "64-bit DMA enable failed\n");
-				return rc;
-			}
-		}
-	} else {
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (rc) {
-			dev_printk(KERN_ERR, &pdev->dev,
-				   "32-bit DMA enable failed\n");
-			return rc;
-		}
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (rc) {
-			dev_printk(KERN_ERR, &pdev->dev,
-				   "32-bit consistent DMA enable failed\n");
-			return rc;
-		}
-	}
-
-	return rc;
-}
-
-static int mvs_find_tag(struct mvs_info *mvi, struct sas_task *task, u32 *tag)
-{
-	if (task->lldd_task) {
-		struct mvs_slot_info *slot;
-		slot = (struct mvs_slot_info *) task->lldd_task;
-		*tag = slot - mvi->slot_info;
-		return 1;
-	}
-	return 0;
-}
-
-static void mvs_tag_clear(struct mvs_info *mvi, u32 tag)
-{
-	void *bitmap = (void *) &mvi->tags;
-	clear_bit(tag, bitmap);
-}
-
-static void mvs_tag_free(struct mvs_info *mvi, u32 tag)
-{
-	mvs_tag_clear(mvi, tag);
-}
-
-static void mvs_tag_set(struct mvs_info *mvi, unsigned int tag)
-{
-	void *bitmap = (void *) &mvi->tags;
-	set_bit(tag, bitmap);
-}
-
-static int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out)
-{
-	unsigned int index, tag;
-	void *bitmap = (void *) &mvi->tags;
-
-	index = find_first_zero_bit(bitmap, MVS_SLOTS);
-	tag = index;
-	if (tag >= MVS_SLOTS)
-		return -SAS_QUEUE_FULL;
-	mvs_tag_set(mvi, tag);
-	*tag_out = tag;
-	return 0;
-}
-
-static void mvs_tag_init(struct mvs_info *mvi)
-{
-	int i;
-	for (i = 0; i < MVS_SLOTS; ++i)
-		mvs_tag_clear(mvi, i);
-}
-
-#ifndef MVS_DISABLE_NVRAM
-static int mvs_eep_read(void __iomem *regs, u32 addr, u32 *data)
-{
-	int timeout = 1000;
-
-	if (addr & ~SPI_ADDR_MASK)
-		return -EINVAL;
-
-	writel(addr, regs + SPI_CMD);
-	writel(TWSI_RD, regs + SPI_CTL);
-
-	while (timeout-- > 0) {
-		if (readl(regs + SPI_CTL) & TWSI_RDY) {
-			*data = readl(regs + SPI_DATA);
-			return 0;
-		}
-
-		udelay(10);
-	}
-
-	return -EBUSY;
-}
-
-static int mvs_eep_read_buf(void __iomem *regs, u32 addr,
-			    void *buf, u32 buflen)
-{
-	u32 addr_end, tmp_addr, i, j;
-	u32 tmp = 0;
-	int rc;
-	u8 *tmp8, *buf8 = buf;
-
-	addr_end = addr + buflen;
-	tmp_addr = ALIGN(addr, 4);
-	if (addr > 0xff)
-		return -EINVAL;
-
-	j = addr & 0x3;
-	if (j) {
-		rc = mvs_eep_read(regs, tmp_addr, &tmp);
-		if (rc)
-			return rc;
-
-		tmp8 = (u8 *)&tmp;
-		for (i = j; i < 4; i++)
-			*buf8++ = tmp8[i];
-
-		tmp_addr += 4;
-	}
-
-	for (j = ALIGN(addr_end, 4); tmp_addr < j; tmp_addr += 4) {
-		rc = mvs_eep_read(regs, tmp_addr, &tmp);
-		if (rc)
-			return rc;
-
-		memcpy(buf8, &tmp, 4);
-		buf8 += 4;
-	}
-
-	if (tmp_addr < addr_end) {
-		rc = mvs_eep_read(regs, tmp_addr, &tmp);
-		if (rc)
-			return rc;
-
-		tmp8 = (u8 *)&tmp;
-		j = addr_end - tmp_addr;
-		for (i = 0; i < j; i++)
-			*buf8++ = tmp8[i];
-
-		tmp_addr += 4;
-	}
-
-	return 0;
-}
-#endif
-
-static int mvs_nvram_read(struct mvs_info *mvi, u32 addr,
-			  void *buf, u32 buflen)
-{
-#ifndef MVS_DISABLE_NVRAM
-	void __iomem *regs = mvi->regs;
-	int rc, i;
-	u32 sum;
-	u8 hdr[2], *tmp;
-	const char *msg;
-
-	rc = mvs_eep_read_buf(regs, addr, &hdr, 2);
-	if (rc) {
-		msg = "nvram hdr read failed";
-		goto err_out;
-	}
-	rc = mvs_eep_read_buf(regs, addr + 2, buf, buflen);
-	if (rc) {
-		msg = "nvram read failed";
-		goto err_out;
-	}
-
-	if (hdr[0] != 0x5A) {
-		/* entry id */
-		msg = "invalid nvram entry id";
-		rc = -ENOENT;
-		goto err_out;
-	}
-
-	tmp = buf;
-	sum = ((u32)hdr[0]) + ((u32)hdr[1]);
-	for (i = 0; i < buflen; i++)
-		sum += ((u32)tmp[i]);
-
-	if (sum) {
-		msg = "nvram checksum failure";
-		rc = -EILSEQ;
-		goto err_out;
-	}
-
-	return 0;
-
-err_out:
-	dev_printk(KERN_ERR, &mvi->pdev->dev, "%s", msg);
-	return rc;
-#else
-	/* FIXME , For SAS target mode */
-	memcpy(buf, "\x50\x05\x04\x30\x11\xab\x00\x00", 8);
-	return 0;
-#endif
-}
-
-static void mvs_bytes_dmaed(struct mvs_info *mvi, int i)
-{
-	struct mvs_phy *phy = &mvi->phy[i];
-	struct asd_sas_phy *sas_phy = mvi->sas.sas_phy[i];
-
-	if (!phy->phy_attached)
-		return;
-
-	if (sas_phy->phy) {
-		struct sas_phy *sphy = sas_phy->phy;
-
-		sphy->negotiated_linkrate = sas_phy->linkrate;
-		sphy->minimum_linkrate = phy->minimum_linkrate;
-		sphy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
-		sphy->maximum_linkrate = phy->maximum_linkrate;
-		sphy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS;
-	}
-
-	if (phy->phy_type & PORT_TYPE_SAS) {
-		struct sas_identify_frame *id;
-
-		id = (struct sas_identify_frame *)phy->frame_rcvd;
-		id->dev_type = phy->identify.device_type;
-		id->initiator_bits = SAS_PROTOCOL_ALL;
-		id->target_bits = phy->identify.target_port_protocols;
-	} else if (phy->phy_type & PORT_TYPE_SATA) {
-		/* TODO */
-	}
-	mvi->sas.sas_phy[i]->frame_rcvd_size = phy->frame_rcvd_size;
-	mvi->sas.notify_port_event(mvi->sas.sas_phy[i],
-				   PORTE_BYTES_DMAED);
-}
-
-static int mvs_scan_finished(struct Scsi_Host *shost, unsigned long time)
-{
-	/* give the phy enabling interrupt event time to come in (1s
-	 * is empirically about all it takes) */
-	if (time < HZ)
-		return 0;
-	/* Wait for discovery to finish */
-	scsi_flush_work(shost);
-	return 1;
-}
-
-static void mvs_scan_start(struct Scsi_Host *shost)
-{
-	int i;
-	struct mvs_info *mvi = SHOST_TO_SAS_HA(shost)->lldd_ha;
-
-	for (i = 0; i < mvi->chip->n_phy; ++i) {
-		mvs_bytes_dmaed(mvi, i);
-	}
-}
-
-static int mvs_slave_configure(struct scsi_device *sdev)
-{
-	struct domain_device *dev = sdev_to_domain_dev(sdev);
-	int ret = sas_slave_configure(sdev);
-
-	if (ret)
-		return ret;
-
-	if (dev_is_sata(dev)) {
-		/* struct ata_port *ap = dev->sata_dev.ap; */
-		/* struct ata_device *adev = ap->link.device; */
-
-		/* clamp at no NCQ for the time being */
-		/* adev->flags |= ATA_DFLAG_NCQ_OFF; */
-		scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, 1);
-	}
-	return 0;
-}
-
-static void mvs_int_port(struct mvs_info *mvi, int phy_no, u32 events)
-{
-	struct pci_dev *pdev = mvi->pdev;
-	struct sas_ha_struct *sas_ha = &mvi->sas;
-	struct mvs_phy *phy = &mvi->phy[phy_no];
-	struct asd_sas_phy *sas_phy = &phy->sas_phy;
-
-	phy->irq_status = mvs_read_port_irq_stat(mvi, phy_no);
-	/*
-	* events is port event now ,
-	* we need check the interrupt status which belongs to per port.
-	*/
-	dev_printk(KERN_DEBUG, &pdev->dev,
-		"Port %d Event = %X\n",
-		phy_no, phy->irq_status);
-
-	if (phy->irq_status & (PHYEV_POOF | PHYEV_DEC_ERR)) {
-		mvs_release_task(mvi, phy_no);
-		if (!mvs_is_phy_ready(mvi, phy_no)) {
-			sas_phy_disconnected(sas_phy);
-			sas_ha->notify_phy_event(sas_phy, PHYE_LOSS_OF_SIGNAL);
-			dev_printk(KERN_INFO, &pdev->dev,
-				"Port %d Unplug Notice\n", phy_no);
-
-		} else
-			mvs_phy_control(sas_phy, PHY_FUNC_LINK_RESET, NULL);
-	}
-	if (!(phy->irq_status & PHYEV_DEC_ERR)) {
-		if (phy->irq_status & PHYEV_COMWAKE) {
-			u32 tmp = mvs_read_port_irq_mask(mvi, phy_no);
-			mvs_write_port_irq_mask(mvi, phy_no,
-						tmp | PHYEV_SIG_FIS);
-		}
-		if (phy->irq_status & (PHYEV_SIG_FIS | PHYEV_ID_DONE)) {
-			phy->phy_status = mvs_is_phy_ready(mvi, phy_no);
-			if (phy->phy_status) {
-				mvs_detect_porttype(mvi, phy_no);
-
-				if (phy->phy_type & PORT_TYPE_SATA) {
-					u32 tmp = mvs_read_port_irq_mask(mvi,
-								phy_no);
-					tmp &= ~PHYEV_SIG_FIS;
-					mvs_write_port_irq_mask(mvi,
-								phy_no, tmp);
-				}
-
-				mvs_update_phyinfo(mvi, phy_no, 0);
-				sas_ha->notify_phy_event(sas_phy,
-							PHYE_OOB_DONE);
-				mvs_bytes_dmaed(mvi, phy_no);
-			} else {
-				dev_printk(KERN_DEBUG, &pdev->dev,
-					"plugin interrupt but phy is gone\n");
-				mvs_phy_control(sas_phy, PHY_FUNC_LINK_RESET,
-							NULL);
-			}
-		} else if (phy->irq_status & PHYEV_BROAD_CH) {
-			mvs_release_task(mvi, phy_no);
-			sas_ha->notify_port_event(sas_phy,
-						PORTE_BROADCAST_RCVD);
-		}
-	}
-	mvs_write_port_irq_stat(mvi, phy_no, phy->irq_status);
-}
-
-static void mvs_int_sata(struct mvs_info *mvi)
-{
-	u32 tmp;
-	void __iomem *regs = mvi->regs;
-	tmp = mr32(INT_STAT_SRS);
-	mw32(INT_STAT_SRS, tmp & 0xFFFF);
-}
-
-static void mvs_slot_reset(struct mvs_info *mvi, struct sas_task *task,
-				u32 slot_idx)
-{
-	void __iomem *regs = mvi->regs;
-	struct domain_device *dev = task->dev;
-	struct asd_sas_port *sas_port = dev->port;
-	struct mvs_port *port = mvi->slot_info[slot_idx].port;
-	u32 reg_set, phy_mask;
-
-	if (!sas_protocol_ata(task->task_proto)) {
-		reg_set = 0;
-		phy_mask = (port->wide_port_phymap) ? port->wide_port_phymap :
-				sas_port->phy_mask;
-	} else {
-		reg_set = port->taskfileset;
-		phy_mask = sas_port->phy_mask;
-	}
-	mvi->tx[mvi->tx_prod] = cpu_to_le32(TXQ_MODE_I | slot_idx |
-					(TXQ_CMD_SLOT_RESET << TXQ_CMD_SHIFT) |
-					(phy_mask << TXQ_PHY_SHIFT) |
-					(reg_set << TXQ_SRS_SHIFT));
-
-	mw32(TX_PROD_IDX, mvi->tx_prod);
-	mvi->tx_prod = (mvi->tx_prod + 1) & (MVS_CHIP_SLOT_SZ - 1);
-}
-
-static int mvs_sata_done(struct mvs_info *mvi, struct sas_task *task,
-			u32 slot_idx, int err)
-{
-	struct mvs_port *port = mvi->slot_info[slot_idx].port;
-	struct task_status_struct *tstat = &task->task_status;
-	struct ata_task_resp *resp = (struct ata_task_resp *)tstat->buf;
-	int stat = SAM_GOOD;
-
-	resp->frame_len = sizeof(struct dev_to_host_fis);
-	memcpy(&resp->ending_fis[0],
-	       SATA_RECEIVED_D2H_FIS(port->taskfileset),
-	       sizeof(struct dev_to_host_fis));
-	tstat->buf_valid_size = sizeof(*resp);
-	if (unlikely(err))
-		stat = SAS_PROTO_RESPONSE;
-	return stat;
-}
-
-static void mvs_slot_free(struct mvs_info *mvi, u32 rx_desc)
-{
-	u32 slot_idx = rx_desc & RXQ_SLOT_MASK;
-	mvs_tag_clear(mvi, slot_idx);
-}
-
-static void mvs_slot_task_free(struct mvs_info *mvi, struct sas_task *task,
-			  struct mvs_slot_info *slot, u32 slot_idx)
-{
-	if (!sas_protocol_ata(task->task_proto))
-		if (slot->n_elem)
-			pci_unmap_sg(mvi->pdev, task->scatter,
-				     slot->n_elem, task->data_dir);
-
-	switch (task->task_proto) {
-	case SAS_PROTOCOL_SMP:
-		pci_unmap_sg(mvi->pdev, &task->smp_task.smp_resp, 1,
-			     PCI_DMA_FROMDEVICE);
-		pci_unmap_sg(mvi->pdev, &task->smp_task.smp_req, 1,
-			     PCI_DMA_TODEVICE);
-		break;
-
-	case SAS_PROTOCOL_SATA:
-	case SAS_PROTOCOL_STP:
-	case SAS_PROTOCOL_SSP:
-	default:
-		/* do nothing */
-		break;
-	}
-	list_del(&slot->list);
-	task->lldd_task = NULL;
-	slot->task = NULL;
-	slot->port = NULL;
-}
-
-static int mvs_slot_err(struct mvs_info *mvi, struct sas_task *task,
-			 u32 slot_idx)
-{
-	struct mvs_slot_info *slot = &mvi->slot_info[slot_idx];
-	u32 err_dw0 = le32_to_cpu(*(u32 *) (slot->response));
-	u32 err_dw1 = le32_to_cpu(*(u32 *) (slot->response + 4));
-	int stat = SAM_CHECK_COND;
-
-	if (err_dw1 & SLOT_BSY_ERR) {
-		stat = SAS_QUEUE_FULL;
-		mvs_slot_reset(mvi, task, slot_idx);
-	}
-	switch (task->task_proto) {
-	case SAS_PROTOCOL_SSP:
-		break;
-	case SAS_PROTOCOL_SMP:
-		break;
-	case SAS_PROTOCOL_SATA:
-	case SAS_PROTOCOL_STP:
-	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
-		if (err_dw0 & TFILE_ERR)
-			stat = mvs_sata_done(mvi, task, slot_idx, 1);
-		break;
-	default:
-		break;
-	}
-
-	mvs_hexdump(16, (u8 *) slot->response, 0);
-	return stat;
-}
-
-static int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags)
-{
-	u32 slot_idx = rx_desc & RXQ_SLOT_MASK;
-	struct mvs_slot_info *slot = &mvi->slot_info[slot_idx];
-	struct sas_task *task = slot->task;
-	struct task_status_struct *tstat;
-	struct mvs_port *port;
-	bool aborted;
-	void *to;
-
-	if (unlikely(!task || !task->lldd_task))
-		return -1;
-
-	mvs_hba_cq_dump(mvi);
-
-	spin_lock(&task->task_state_lock);
-	aborted = task->task_state_flags & SAS_TASK_STATE_ABORTED;
-	if (!aborted) {
-		task->task_state_flags &=
-		    ~(SAS_TASK_STATE_PENDING | SAS_TASK_AT_INITIATOR);
-		task->task_state_flags |= SAS_TASK_STATE_DONE;
-	}
-	spin_unlock(&task->task_state_lock);
-
-	if (aborted) {
-		mvs_slot_task_free(mvi, task, slot, slot_idx);
-		mvs_slot_free(mvi, rx_desc);
-		return -1;
-	}
-
-	port = slot->port;
-	tstat = &task->task_status;
-	memset(tstat, 0, sizeof(*tstat));
-	tstat->resp = SAS_TASK_COMPLETE;
-
-	if (unlikely(!port->port_attached || flags)) {
-		mvs_slot_err(mvi, task, slot_idx);
-		if (!sas_protocol_ata(task->task_proto))
-			tstat->stat = SAS_PHY_DOWN;
-		goto out;
-	}
-
-	/* error info record present */
-	if (unlikely((rx_desc & RXQ_ERR) && (*(u64 *) slot->response))) {
-		tstat->stat = mvs_slot_err(mvi, task, slot_idx);
-		goto out;
-	}
-
-	switch (task->task_proto) {
-	case SAS_PROTOCOL_SSP:
-		/* hw says status == 0, datapres == 0 */
-		if (rx_desc & RXQ_GOOD) {
-			tstat->stat = SAM_GOOD;
-			tstat->resp = SAS_TASK_COMPLETE;
-		}
-		/* response frame present */
-		else if (rx_desc & RXQ_RSP) {
-			struct ssp_response_iu *iu =
-			    slot->response + sizeof(struct mvs_err_info);
-			sas_ssp_task_response(&mvi->pdev->dev, task, iu);
-		}
-
-		/* should never happen? */
-		else
-			tstat->stat = SAM_CHECK_COND;
-		break;
-
-	case SAS_PROTOCOL_SMP: {
-			struct scatterlist *sg_resp = &task->smp_task.smp_resp;
-			tstat->stat = SAM_GOOD;
-			to = kmap_atomic(sg_page(sg_resp), KM_IRQ0);
-			memcpy(to + sg_resp->offset,
-				slot->response + sizeof(struct mvs_err_info),
-				sg_dma_len(sg_resp));
-			kunmap_atomic(to, KM_IRQ0);
-			break;
-		}
-
-	case SAS_PROTOCOL_SATA:
-	case SAS_PROTOCOL_STP:
-	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP: {
-			tstat->stat = mvs_sata_done(mvi, task, slot_idx, 0);
-			break;
-		}
-
-	default:
-		tstat->stat = SAM_CHECK_COND;
-		break;
-	}
-
-out:
-	mvs_slot_task_free(mvi, task, slot, slot_idx);
-	if (unlikely(tstat->stat != SAS_QUEUE_FULL))
-		mvs_slot_free(mvi, rx_desc);
-
-	spin_unlock(&mvi->lock);
-	task->task_done(task);
-	spin_lock(&mvi->lock);
-	return tstat->stat;
-}
-
-static void mvs_release_task(struct mvs_info *mvi, int phy_no)
-{
-	struct list_head *pos, *n;
-	struct mvs_slot_info *slot;
-	struct mvs_phy *phy = &mvi->phy[phy_no];
-	struct mvs_port *port = phy->port;
-	u32 rx_desc;
-
-	if (!port)
-		return;
-
-	list_for_each_safe(pos, n, &port->list) {
-		slot = container_of(pos, struct mvs_slot_info, list);
-		rx_desc = (u32) (slot - mvi->slot_info);
-		mvs_slot_complete(mvi, rx_desc, 1);
-	}
-}
-
-static void mvs_int_full(struct mvs_info *mvi)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp, stat;
-	int i;
-
-	stat = mr32(INT_STAT);
-
-	mvs_int_rx(mvi, false);
-
-	for (i = 0; i < MVS_MAX_PORTS; i++) {
-		tmp = (stat >> i) & (CINT_PORT | CINT_PORT_STOPPED);
-		if (tmp)
-			mvs_int_port(mvi, i, tmp);
-	}
-
-	if (stat & CINT_SRS)
-		mvs_int_sata(mvi);
-
-	mw32(INT_STAT, stat);
-}
-
-static int mvs_int_rx(struct mvs_info *mvi, bool self_clear)
-{
-	void __iomem *regs = mvi->regs;
-	u32 rx_prod_idx, rx_desc;
-	bool attn = false;
-	struct pci_dev *pdev = mvi->pdev;
-
-	/* the first dword in the RX ring is special: it contains
-	 * a mirror of the hardware's RX producer index, so that
-	 * we don't have to stall the CPU reading that register.
-	 * The actual RX ring is offset by one dword, due to this.
-	 */
-	rx_prod_idx = mvi->rx_cons;
-	mvi->rx_cons = le32_to_cpu(mvi->rx[0]);
-	if (mvi->rx_cons == 0xfff)	/* h/w hasn't touched RX ring yet */
-		return 0;
-
-	/* The CMPL_Q may come late, read from register and try again
-	* note: if coalescing is enabled,
-	* it will need to read from register every time for sure
-	*/
-	if (mvi->rx_cons == rx_prod_idx)
-		mvi->rx_cons = mr32(RX_CONS_IDX) & RX_RING_SZ_MASK;
-
-	if (mvi->rx_cons == rx_prod_idx)
-		return 0;
-
-	while (mvi->rx_cons != rx_prod_idx) {
-
-		/* increment our internal RX consumer pointer */
-		rx_prod_idx = (rx_prod_idx + 1) & (MVS_RX_RING_SZ - 1);
-
-		rx_desc = le32_to_cpu(mvi->rx[rx_prod_idx + 1]);
-
-		if (likely(rx_desc & RXQ_DONE))
-			mvs_slot_complete(mvi, rx_desc, 0);
-		if (rx_desc & RXQ_ATTN) {
-			attn = true;
-			dev_printk(KERN_DEBUG, &pdev->dev, "ATTN %X\n",
-				rx_desc);
-		} else if (rx_desc & RXQ_ERR) {
-			if (!(rx_desc & RXQ_DONE))
-				mvs_slot_complete(mvi, rx_desc, 0);
-			dev_printk(KERN_DEBUG, &pdev->dev, "RXQ_ERR %X\n",
-				rx_desc);
-		} else if (rx_desc & RXQ_SLOT_RESET) {
-			dev_printk(KERN_DEBUG, &pdev->dev, "Slot reset[%X]\n",
-				rx_desc);
-			mvs_slot_free(mvi, rx_desc);
-		}
-	}
-
-	if (attn && self_clear)
-		mvs_int_full(mvi);
-
-	return 0;
-}
-
-#ifdef MVS_USE_TASKLET
-static void mvs_tasklet(unsigned long data)
-{
-	struct mvs_info *mvi = (struct mvs_info *) data;
-	unsigned long flags;
-
-	spin_lock_irqsave(&mvi->lock, flags);
-
-#ifdef MVS_DISABLE_MSI
-	mvs_int_full(mvi);
-#else
-	mvs_int_rx(mvi, true);
-#endif
-	spin_unlock_irqrestore(&mvi->lock, flags);
-}
-#endif
-
-static irqreturn_t mvs_interrupt(int irq, void *opaque)
-{
-	struct mvs_info *mvi = opaque;
-	void __iomem *regs = mvi->regs;
-	u32 stat;
-
-	stat = mr32(GBL_INT_STAT);
-
-	if (stat == 0 || stat == 0xffffffff)
-		return IRQ_NONE;
-
-	/* clear CMD_CMPLT ASAP */
-	mw32_f(INT_STAT, CINT_DONE);
-
-#ifndef MVS_USE_TASKLET
-	spin_lock(&mvi->lock);
-
-	mvs_int_full(mvi);
-
-	spin_unlock(&mvi->lock);
-#else
-	tasklet_schedule(&mvi->tasklet);
-#endif
-	return IRQ_HANDLED;
-}
-
-#ifndef MVS_DISABLE_MSI
-static irqreturn_t mvs_msi_interrupt(int irq, void *opaque)
-{
-	struct mvs_info *mvi = opaque;
-
-#ifndef MVS_USE_TASKLET
-	spin_lock(&mvi->lock);
-
-	mvs_int_rx(mvi, true);
-
-	spin_unlock(&mvi->lock);
-#else
-	tasklet_schedule(&mvi->tasklet);
-#endif
-	return IRQ_HANDLED;
-}
-#endif
-
-struct mvs_task_exec_info {
-	struct sas_task *task;
-	struct mvs_cmd_hdr *hdr;
-	struct mvs_port *port;
-	u32 tag;
-	int n_elem;
-};
-
-static int mvs_task_prep_smp(struct mvs_info *mvi,
-			     struct mvs_task_exec_info *tei)
-{
-	int elem, rc, i;
-	struct sas_task *task = tei->task;
-	struct mvs_cmd_hdr *hdr = tei->hdr;
-	struct scatterlist *sg_req, *sg_resp;
-	u32 req_len, resp_len, tag = tei->tag;
-	void *buf_tmp;
-	u8 *buf_oaf;
-	dma_addr_t buf_tmp_dma;
-	struct mvs_prd *buf_prd;
-	struct scatterlist *sg;
-	struct mvs_slot_info *slot = &mvi->slot_info[tag];
-	struct asd_sas_port *sas_port = task->dev->port;
-	u32 flags = (tei->n_elem << MCH_PRD_LEN_SHIFT);
-#if _MV_DUMP
-	u8 *buf_cmd;
-	void *from;
-#endif
-	/*
-	 * DMA-map SMP request, response buffers
-	 */
-	sg_req = &task->smp_task.smp_req;
-	elem = pci_map_sg(mvi->pdev, sg_req, 1, PCI_DMA_TODEVICE);
-	if (!elem)
-		return -ENOMEM;
-	req_len = sg_dma_len(sg_req);
-
-	sg_resp = &task->smp_task.smp_resp;
-	elem = pci_map_sg(mvi->pdev, sg_resp, 1, PCI_DMA_FROMDEVICE);
-	if (!elem) {
-		rc = -ENOMEM;
-		goto err_out;
-	}
-	resp_len = sg_dma_len(sg_resp);
-
-	/* must be in dwords */
-	if ((req_len & 0x3) || (resp_len & 0x3)) {
-		rc = -EINVAL;
-		goto err_out_2;
-	}
-
-	/*
-	 * arrange MVS_SLOT_BUF_SZ-sized DMA buffer according to our needs
-	 */
-
-	/* region 1: command table area (MVS_SSP_CMD_SZ bytes) ************** */
-	buf_tmp = slot->buf;
-	buf_tmp_dma = slot->buf_dma;
-
-#if _MV_DUMP
-	buf_cmd = buf_tmp;
-	hdr->cmd_tbl = cpu_to_le64(buf_tmp_dma);
-	buf_tmp += req_len;
-	buf_tmp_dma += req_len;
-	slot->cmd_size = req_len;
-#else
-	hdr->cmd_tbl = cpu_to_le64(sg_dma_address(sg_req));
-#endif
-
-	/* region 2: open address frame area (MVS_OAF_SZ bytes) ********* */
-	buf_oaf = buf_tmp;
-	hdr->open_frame = cpu_to_le64(buf_tmp_dma);
-
-	buf_tmp += MVS_OAF_SZ;
-	buf_tmp_dma += MVS_OAF_SZ;
-
-	/* region 3: PRD table ********************************************* */
-	buf_prd = buf_tmp;
-	if (tei->n_elem)
-		hdr->prd_tbl = cpu_to_le64(buf_tmp_dma);
-	else
-		hdr->prd_tbl = 0;
-
-	i = sizeof(struct mvs_prd) * tei->n_elem;
-	buf_tmp += i;
-	buf_tmp_dma += i;
-
-	/* region 4: status buffer (larger the PRD, smaller this buf) ****** */
-	slot->response = buf_tmp;
-	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
-
-	/*
-	 * Fill in TX ring and command slot header
-	 */
-	slot->tx = mvi->tx_prod;
-	mvi->tx[mvi->tx_prod] = cpu_to_le32((TXQ_CMD_SMP << TXQ_CMD_SHIFT) |
-					TXQ_MODE_I | tag |
-					(sas_port->phy_mask << TXQ_PHY_SHIFT));
-
-	hdr->flags |= flags;
-	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | ((req_len - 4) / 4));
-	hdr->tags = cpu_to_le32(tag);
-	hdr->data_len = 0;
-
-	/* generate open address frame hdr (first 12 bytes) */
-	buf_oaf[0] = (1 << 7) | (0 << 4) | 0x01; /* initiator, SMP, ftype 1h */
-	buf_oaf[1] = task->dev->linkrate & 0xf;
-	*(u16 *)(buf_oaf + 2) = 0xFFFF;		/* SAS SPEC */
-	memcpy(buf_oaf + 4, task->dev->sas_addr, SAS_ADDR_SIZE);
-
-	/* fill in PRD (scatter/gather) table, if any */
-	for_each_sg(task->scatter, sg, tei->n_elem, i) {
-		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
-		buf_prd->len = cpu_to_le32(sg_dma_len(sg));
-		buf_prd++;
-	}
-
-#if _MV_DUMP
-	/* copy cmd table */
-	from = kmap_atomic(sg_page(sg_req), KM_IRQ0);
-	memcpy(buf_cmd, from + sg_req->offset, req_len);
-	kunmap_atomic(from, KM_IRQ0);
-#endif
-	return 0;
-
-err_out_2:
-	pci_unmap_sg(mvi->pdev, &tei->task->smp_task.smp_resp, 1,
-		     PCI_DMA_FROMDEVICE);
-err_out:
-	pci_unmap_sg(mvi->pdev, &tei->task->smp_task.smp_req, 1,
-		     PCI_DMA_TODEVICE);
-	return rc;
-}
-
-static void mvs_free_reg_set(struct mvs_info *mvi, struct mvs_port *port)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp, offs;
-	u8 *tfs = &port->taskfileset;
-
-	if (*tfs == MVS_ID_NOT_MAPPED)
-		return;
-
-	offs = 1U << ((*tfs & 0x0f) + PCS_EN_SATA_REG_SHIFT);
-	if (*tfs < 16) {
-		tmp = mr32(PCS);
-		mw32(PCS, tmp & ~offs);
-	} else {
-		tmp = mr32(CTL);
-		mw32(CTL, tmp & ~offs);
-	}
-
-	tmp = mr32(INT_STAT_SRS) & (1U << *tfs);
-	if (tmp)
-		mw32(INT_STAT_SRS, tmp);
-
-	*tfs = MVS_ID_NOT_MAPPED;
-}
-
-static u8 mvs_assign_reg_set(struct mvs_info *mvi, struct mvs_port *port)
-{
-	int i;
-	u32 tmp, offs;
-	void __iomem *regs = mvi->regs;
-
-	if (port->taskfileset != MVS_ID_NOT_MAPPED)
-		return 0;
-
-	tmp = mr32(PCS);
-
-	for (i = 0; i < mvi->chip->srs_sz; i++) {
-		if (i == 16)
-			tmp = mr32(CTL);
-		offs = 1U << ((i & 0x0f) + PCS_EN_SATA_REG_SHIFT);
-		if (!(tmp & offs)) {
-			port->taskfileset = i;
-
-			if (i < 16)
-				mw32(PCS, tmp | offs);
-			else
-				mw32(CTL, tmp | offs);
-			tmp = mr32(INT_STAT_SRS) & (1U << i);
-			if (tmp)
-				mw32(INT_STAT_SRS, tmp);
-			return 0;
-		}
-	}
-	return MVS_ID_NOT_MAPPED;
-}
-
-static u32 mvs_get_ncq_tag(struct sas_task *task, u32 *tag)
-{
-	struct ata_queued_cmd *qc = task->uldd_task;
-
-	if (qc) {
-		if (qc->tf.command == ATA_CMD_FPDMA_WRITE ||
-			qc->tf.command == ATA_CMD_FPDMA_READ) {
-			*tag = qc->tag;
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-static int mvs_task_prep_ata(struct mvs_info *mvi,
-			     struct mvs_task_exec_info *tei)
-{
-	struct sas_task *task = tei->task;
-	struct domain_device *dev = task->dev;
-	struct mvs_cmd_hdr *hdr = tei->hdr;
-	struct asd_sas_port *sas_port = dev->port;
-	struct mvs_slot_info *slot;
-	struct scatterlist *sg;
-	struct mvs_prd *buf_prd;
-	struct mvs_port *port = tei->port;
-	u32 tag = tei->tag;
-	u32 flags = (tei->n_elem << MCH_PRD_LEN_SHIFT);
-	void *buf_tmp;
-	u8 *buf_cmd, *buf_oaf;
-	dma_addr_t buf_tmp_dma;
-	u32 i, req_len, resp_len;
-	const u32 max_resp_len = SB_RFB_MAX;
-
-	if (mvs_assign_reg_set(mvi, port) == MVS_ID_NOT_MAPPED)
-		return -EBUSY;
-
-	slot = &mvi->slot_info[tag];
-	slot->tx = mvi->tx_prod;
-	mvi->tx[mvi->tx_prod] = cpu_to_le32(TXQ_MODE_I | tag |
-					(TXQ_CMD_STP << TXQ_CMD_SHIFT) |
-					(sas_port->phy_mask << TXQ_PHY_SHIFT) |
-					(port->taskfileset << TXQ_SRS_SHIFT));
-
-	if (task->ata_task.use_ncq)
-		flags |= MCH_FPDMA;
-	if (dev->sata_dev.command_set == ATAPI_COMMAND_SET) {
-		if (task->ata_task.fis.command != ATA_CMD_ID_ATAPI)
-			flags |= MCH_ATAPI;
-	}
-
-	/* FIXME: fill in port multiplier number */
-
-	hdr->flags = cpu_to_le32(flags);
-
-	/* FIXME: the low order order 5 bits for the TAG if enable NCQ */
-	if (task->ata_task.use_ncq && mvs_get_ncq_tag(task, &hdr->tags))
-		task->ata_task.fis.sector_count |= hdr->tags << 3;
-	else
-		hdr->tags = cpu_to_le32(tag);
-	hdr->data_len = cpu_to_le32(task->total_xfer_len);
-
-	/*
-	 * arrange MVS_SLOT_BUF_SZ-sized DMA buffer according to our needs
-	 */
-
-	/* region 1: command table area (MVS_ATA_CMD_SZ bytes) ************** */
-	buf_cmd = buf_tmp = slot->buf;
-	buf_tmp_dma = slot->buf_dma;
-
-	hdr->cmd_tbl = cpu_to_le64(buf_tmp_dma);
-
-	buf_tmp += MVS_ATA_CMD_SZ;
-	buf_tmp_dma += MVS_ATA_CMD_SZ;
-#if _MV_DUMP
-	slot->cmd_size = MVS_ATA_CMD_SZ;
-#endif
-
-	/* region 2: open address frame area (MVS_OAF_SZ bytes) ********* */
-	/* used for STP.  unused for SATA? */
-	buf_oaf = buf_tmp;
-	hdr->open_frame = cpu_to_le64(buf_tmp_dma);
-
-	buf_tmp += MVS_OAF_SZ;
-	buf_tmp_dma += MVS_OAF_SZ;
-
-	/* region 3: PRD table ********************************************* */
-	buf_prd = buf_tmp;
-	if (tei->n_elem)
-		hdr->prd_tbl = cpu_to_le64(buf_tmp_dma);
-	else
-		hdr->prd_tbl = 0;
-
-	i = sizeof(struct mvs_prd) * tei->n_elem;
-	buf_tmp += i;
-	buf_tmp_dma += i;
-
-	/* region 4: status buffer (larger the PRD, smaller this buf) ****** */
-	/* FIXME: probably unused, for SATA.  kept here just in case
-	 * we get a STP/SATA error information record
-	 */
-	slot->response = buf_tmp;
-	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
-
-	req_len = sizeof(struct host_to_dev_fis);
-	resp_len = MVS_SLOT_BUF_SZ - MVS_ATA_CMD_SZ -
-	    sizeof(struct mvs_err_info) - i;
-
-	/* request, response lengths */
-	resp_len = min(resp_len, max_resp_len);
-	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | (req_len / 4));
-
-	task->ata_task.fis.flags |= 0x80; /* C=1: update ATA cmd reg */
-	/* fill in command FIS and ATAPI CDB */
-	memcpy(buf_cmd, &task->ata_task.fis, sizeof(struct host_to_dev_fis));
-	if (dev->sata_dev.command_set == ATAPI_COMMAND_SET)
-		memcpy(buf_cmd + STP_ATAPI_CMD,
-			task->ata_task.atapi_packet, 16);
-
-	/* generate open address frame hdr (first 12 bytes) */
-	buf_oaf[0] = (1 << 7) | (2 << 4) | 0x1;	/* initiator, STP, ftype 1h */
-	buf_oaf[1] = task->dev->linkrate & 0xf;
-	*(u16 *)(buf_oaf + 2) = cpu_to_be16(tag);
-	memcpy(buf_oaf + 4, task->dev->sas_addr, SAS_ADDR_SIZE);
-
-	/* fill in PRD (scatter/gather) table, if any */
-	for_each_sg(task->scatter, sg, tei->n_elem, i) {
-		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
-		buf_prd->len = cpu_to_le32(sg_dma_len(sg));
-		buf_prd++;
-	}
-
-	return 0;
-}
-
-static int mvs_task_prep_ssp(struct mvs_info *mvi,
-			     struct mvs_task_exec_info *tei)
-{
-	struct sas_task *task = tei->task;
-	struct mvs_cmd_hdr *hdr = tei->hdr;
-	struct mvs_port *port = tei->port;
-	struct mvs_slot_info *slot;
-	struct scatterlist *sg;
-	struct mvs_prd *buf_prd;
-	struct ssp_frame_hdr *ssp_hdr;
-	void *buf_tmp;
-	u8 *buf_cmd, *buf_oaf, fburst = 0;
-	dma_addr_t buf_tmp_dma;
-	u32 flags;
-	u32 resp_len, req_len, i, tag = tei->tag;
-	const u32 max_resp_len = SB_RFB_MAX;
-	u8 phy_mask;
-
-	slot = &mvi->slot_info[tag];
-
-	phy_mask = (port->wide_port_phymap) ? port->wide_port_phymap :
-		task->dev->port->phy_mask;
-	slot->tx = mvi->tx_prod;
-	mvi->tx[mvi->tx_prod] = cpu_to_le32(TXQ_MODE_I | tag |
-				(TXQ_CMD_SSP << TXQ_CMD_SHIFT) |
-				(phy_mask << TXQ_PHY_SHIFT));
-
-	flags = MCH_RETRY;
-	if (task->ssp_task.enable_first_burst) {
-		flags |= MCH_FBURST;
-		fburst = (1 << 7);
-	}
-	hdr->flags = cpu_to_le32(flags |
-				 (tei->n_elem << MCH_PRD_LEN_SHIFT) |
-				 (MCH_SSP_FR_CMD << MCH_SSP_FR_TYPE_SHIFT));
-
-	hdr->tags = cpu_to_le32(tag);
-	hdr->data_len = cpu_to_le32(task->total_xfer_len);
-
-	/*
-	 * arrange MVS_SLOT_BUF_SZ-sized DMA buffer according to our needs
-	 */
-
-	/* region 1: command table area (MVS_SSP_CMD_SZ bytes) ************** */
-	buf_cmd = buf_tmp = slot->buf;
-	buf_tmp_dma = slot->buf_dma;
-
-	hdr->cmd_tbl = cpu_to_le64(buf_tmp_dma);
-
-	buf_tmp += MVS_SSP_CMD_SZ;
-	buf_tmp_dma += MVS_SSP_CMD_SZ;
-#if _MV_DUMP
-	slot->cmd_size = MVS_SSP_CMD_SZ;
-#endif
-
-	/* region 2: open address frame area (MVS_OAF_SZ bytes) ********* */
-	buf_oaf = buf_tmp;
-	hdr->open_frame = cpu_to_le64(buf_tmp_dma);
-
-	buf_tmp += MVS_OAF_SZ;
-	buf_tmp_dma += MVS_OAF_SZ;
-
-	/* region 3: PRD table ********************************************* */
-	buf_prd = buf_tmp;
-	if (tei->n_elem)
-		hdr->prd_tbl = cpu_to_le64(buf_tmp_dma);
-	else
-		hdr->prd_tbl = 0;
-
-	i = sizeof(struct mvs_prd) * tei->n_elem;
-	buf_tmp += i;
-	buf_tmp_dma += i;
-
-	/* region 4: status buffer (larger the PRD, smaller this buf) ****** */
-	slot->response = buf_tmp;
-	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
-
-	resp_len = MVS_SLOT_BUF_SZ - MVS_SSP_CMD_SZ - MVS_OAF_SZ -
-	    sizeof(struct mvs_err_info) - i;
-	resp_len = min(resp_len, max_resp_len);
-
-	req_len = sizeof(struct ssp_frame_hdr) + 28;
-
-	/* request, response lengths */
-	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | (req_len / 4));
-
-	/* generate open address frame hdr (first 12 bytes) */
-	buf_oaf[0] = (1 << 7) | (1 << 4) | 0x1;	/* initiator, SSP, ftype 1h */
-	buf_oaf[1] = task->dev->linkrate & 0xf;
-	*(u16 *)(buf_oaf + 2) = cpu_to_be16(tag);
-	memcpy(buf_oaf + 4, task->dev->sas_addr, SAS_ADDR_SIZE);
-
-	/* fill in SSP frame header (Command Table.SSP frame header) */
-	ssp_hdr = (struct ssp_frame_hdr *)buf_cmd;
-	ssp_hdr->frame_type = SSP_COMMAND;
-	memcpy(ssp_hdr->hashed_dest_addr, task->dev->hashed_sas_addr,
-	       HASHED_SAS_ADDR_SIZE);
-	memcpy(ssp_hdr->hashed_src_addr,
-	       task->dev->port->ha->hashed_sas_addr, HASHED_SAS_ADDR_SIZE);
-	ssp_hdr->tag = cpu_to_be16(tag);
-
-	/* fill in command frame IU */
-	buf_cmd += sizeof(*ssp_hdr);
-	memcpy(buf_cmd, &task->ssp_task.LUN, 8);
-	buf_cmd[9] = fburst | task->ssp_task.task_attr |
-			(task->ssp_task.task_prio << 3);
-	memcpy(buf_cmd + 12, &task->ssp_task.cdb, 16);
-
-	/* fill in PRD (scatter/gather) table, if any */
-	for_each_sg(task->scatter, sg, tei->n_elem, i) {
-		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
-		buf_prd->len = cpu_to_le32(sg_dma_len(sg));
-		buf_prd++;
-	}
-
-	return 0;
-}
-
-static int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags)
-{
-	struct domain_device *dev = task->dev;
-	struct mvs_info *mvi = dev->port->ha->lldd_ha;
-	struct pci_dev *pdev = mvi->pdev;
-	void __iomem *regs = mvi->regs;
-	struct mvs_task_exec_info tei;
-	struct sas_task *t = task;
-	struct mvs_slot_info *slot;
-	u32 tag = 0xdeadbeef, rc, n_elem = 0;
-	unsigned long flags;
-	u32 n = num, pass = 0;
-
-	spin_lock_irqsave(&mvi->lock, flags);
-	do {
-		dev = t->dev;
-		tei.port = &mvi->port[dev->port->id];
-
-		if (!tei.port->port_attached) {
-			if (sas_protocol_ata(t->task_proto)) {
-				rc = SAS_PHY_DOWN;
-				goto out_done;
-			} else {
-				struct task_status_struct *ts = &t->task_status;
-				ts->resp = SAS_TASK_UNDELIVERED;
-				ts->stat = SAS_PHY_DOWN;
-				t->task_done(t);
-				if (n > 1)
-					t = list_entry(t->list.next,
-							struct sas_task, list);
-				continue;
-			}
-		}
-
-		if (!sas_protocol_ata(t->task_proto)) {
-			if (t->num_scatter) {
-				n_elem = pci_map_sg(mvi->pdev, t->scatter,
-						    t->num_scatter,
-						    t->data_dir);
-				if (!n_elem) {
-					rc = -ENOMEM;
-					goto err_out;
-				}
-			}
-		} else {
-			n_elem = t->num_scatter;
-		}
-
-		rc = mvs_tag_alloc(mvi, &tag);
-		if (rc)
-			goto err_out;
-
-		slot = &mvi->slot_info[tag];
-		t->lldd_task = NULL;
-		slot->n_elem = n_elem;
-		memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
-		tei.task = t;
-		tei.hdr = &mvi->slot[tag];
-		tei.tag = tag;
-		tei.n_elem = n_elem;
-
-		switch (t->task_proto) {
-		case SAS_PROTOCOL_SMP:
-			rc = mvs_task_prep_smp(mvi, &tei);
-			break;
-		case SAS_PROTOCOL_SSP:
-			rc = mvs_task_prep_ssp(mvi, &tei);
-			break;
-		case SAS_PROTOCOL_SATA:
-		case SAS_PROTOCOL_STP:
-		case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
-			rc = mvs_task_prep_ata(mvi, &tei);
-			break;
-		default:
-			dev_printk(KERN_ERR, &pdev->dev,
-				"unknown sas_task proto: 0x%x\n",
-				t->task_proto);
-			rc = -EINVAL;
-			break;
-		}
-
-		if (rc)
-			goto err_out_tag;
-
-		slot->task = t;
-		slot->port = tei.port;
-		t->lldd_task = (void *) slot;
-		list_add_tail(&slot->list, &slot->port->list);
-		/* TODO: select normal or high priority */
-
-		spin_lock(&t->task_state_lock);
-		t->task_state_flags |= SAS_TASK_AT_INITIATOR;
-		spin_unlock(&t->task_state_lock);
-
-		mvs_hba_memory_dump(mvi, tag, t->task_proto);
-
-		++pass;
-		mvi->tx_prod = (mvi->tx_prod + 1) & (MVS_CHIP_SLOT_SZ - 1);
-		if (n > 1)
-			t = list_entry(t->list.next, struct sas_task, list);
-	} while (--n);
-
-	rc = 0;
-	goto out_done;
-
-err_out_tag:
-	mvs_tag_free(mvi, tag);
-err_out:
-	dev_printk(KERN_ERR, &pdev->dev, "mvsas exec failed[%d]!\n", rc);
-	if (!sas_protocol_ata(t->task_proto))
-		if (n_elem)
-			pci_unmap_sg(mvi->pdev, t->scatter, n_elem,
-				     t->data_dir);
-out_done:
-	if (pass)
-		mw32(TX_PROD_IDX, (mvi->tx_prod - 1) & (MVS_CHIP_SLOT_SZ - 1));
-	spin_unlock_irqrestore(&mvi->lock, flags);
-	return rc;
-}
-
-static int mvs_task_abort(struct sas_task *task)
-{
-	int rc;
-	unsigned long flags;
-	struct mvs_info *mvi = task->dev->port->ha->lldd_ha;
-	struct pci_dev *pdev = mvi->pdev;
-	int tag;
-
-	spin_lock_irqsave(&task->task_state_lock, flags);
-	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
-		rc = TMF_RESP_FUNC_COMPLETE;
-		spin_unlock_irqrestore(&task->task_state_lock, flags);
-		goto out_done;
-	}
-	spin_unlock_irqrestore(&task->task_state_lock, flags);
-
-	switch (task->task_proto) {
-	case SAS_PROTOCOL_SMP:
-		dev_printk(KERN_DEBUG, &pdev->dev, "SMP Abort! \n");
-		break;
-	case SAS_PROTOCOL_SSP:
-		dev_printk(KERN_DEBUG, &pdev->dev, "SSP Abort! \n");
-		break;
-	case SAS_PROTOCOL_SATA:
-	case SAS_PROTOCOL_STP:
-	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:{
-		dev_printk(KERN_DEBUG, &pdev->dev, "STP Abort! \n");
-#if _MV_DUMP
-		dev_printk(KERN_DEBUG, &pdev->dev, "Dump D2H FIS: \n");
-		mvs_hexdump(sizeof(struct host_to_dev_fis),
-				(void *)&task->ata_task.fis, 0);
-		dev_printk(KERN_DEBUG, &pdev->dev, "Dump ATAPI Cmd : \n");
-		mvs_hexdump(16, task->ata_task.atapi_packet, 0);
-#endif
-		spin_lock_irqsave(&task->task_state_lock, flags);
-		if (task->task_state_flags & SAS_TASK_NEED_DEV_RESET) {
-			/* TODO */
-			;
-		}
-		spin_unlock_irqrestore(&task->task_state_lock, flags);
-		break;
-	}
-	default:
-		break;
-	}
-
-	if (mvs_find_tag(mvi, task, &tag)) {
-		spin_lock_irqsave(&mvi->lock, flags);
-		mvs_slot_task_free(mvi, task, &mvi->slot_info[tag], tag);
-		spin_unlock_irqrestore(&mvi->lock, flags);
-	}
-	if (!mvs_task_exec(task, 1, GFP_ATOMIC))
-		rc = TMF_RESP_FUNC_COMPLETE;
-	else
-		rc = TMF_RESP_FUNC_FAILED;
-out_done:
-	return rc;
-}
-
-static void mvs_free(struct mvs_info *mvi)
-{
-	int i;
-
-	if (!mvi)
-		return;
-
-	for (i = 0; i < MVS_SLOTS; i++) {
-		struct mvs_slot_info *slot = &mvi->slot_info[i];
-
-		if (slot->buf)
-			dma_free_coherent(&mvi->pdev->dev, MVS_SLOT_BUF_SZ,
-					  slot->buf, slot->buf_dma);
-	}
-
-	if (mvi->tx)
-		dma_free_coherent(&mvi->pdev->dev,
-				  sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ,
-				  mvi->tx, mvi->tx_dma);
-	if (mvi->rx_fis)
-		dma_free_coherent(&mvi->pdev->dev, MVS_RX_FISL_SZ,
-				  mvi->rx_fis, mvi->rx_fis_dma);
-	if (mvi->rx)
-		dma_free_coherent(&mvi->pdev->dev,
-				  sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1),
-				  mvi->rx, mvi->rx_dma);
-	if (mvi->slot)
-		dma_free_coherent(&mvi->pdev->dev,
-				  sizeof(*mvi->slot) * MVS_SLOTS,
-				  mvi->slot, mvi->slot_dma);
-#ifdef MVS_ENABLE_PERI
-	if (mvi->peri_regs)
-		iounmap(mvi->peri_regs);
-#endif
-	if (mvi->regs)
-		iounmap(mvi->regs);
-	if (mvi->shost)
-		scsi_host_put(mvi->shost);
-	kfree(mvi->sas.sas_port);
-	kfree(mvi->sas.sas_phy);
-	kfree(mvi);
-}
-
-/* FIXME: locking? */
-static int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
-			   void *funcdata)
-{
-	struct mvs_info *mvi = sas_phy->ha->lldd_ha;
-	int rc = 0, phy_id = sas_phy->id;
-	u32 tmp;
-
-	tmp = mvs_read_phy_ctl(mvi, phy_id);
-
-	switch (func) {
-	case PHY_FUNC_SET_LINK_RATE:{
-			struct sas_phy_linkrates *rates = funcdata;
-			u32 lrmin = 0, lrmax = 0;
-
-			lrmin = (rates->minimum_linkrate << 8);
-			lrmax = (rates->maximum_linkrate << 12);
-
-			if (lrmin) {
-				tmp &= ~(0xf << 8);
-				tmp |= lrmin;
-			}
-			if (lrmax) {
-				tmp &= ~(0xf << 12);
-				tmp |= lrmax;
-			}
-			mvs_write_phy_ctl(mvi, phy_id, tmp);
-			break;
-		}
-
-	case PHY_FUNC_HARD_RESET:
-		if (tmp & PHY_RST_HARD)
-			break;
-		mvs_write_phy_ctl(mvi, phy_id, tmp | PHY_RST_HARD);
-		break;
-
-	case PHY_FUNC_LINK_RESET:
-		mvs_write_phy_ctl(mvi, phy_id, tmp | PHY_RST);
-		break;
-
-	case PHY_FUNC_DISABLE:
-	case PHY_FUNC_RELEASE_SPINUP_HOLD:
-	default:
-		rc = -EOPNOTSUPP;
-	}
-
-	return rc;
-}
-
-static void __devinit mvs_phy_init(struct mvs_info *mvi, int phy_id)
-{
-	struct mvs_phy *phy = &mvi->phy[phy_id];
-	struct asd_sas_phy *sas_phy = &phy->sas_phy;
-
-	sas_phy->enabled = (phy_id < mvi->chip->n_phy) ? 1 : 0;
-	sas_phy->class = SAS;
-	sas_phy->iproto = SAS_PROTOCOL_ALL;
-	sas_phy->tproto = 0;
-	sas_phy->type = PHY_TYPE_PHYSICAL;
-	sas_phy->role = PHY_ROLE_INITIATOR;
-	sas_phy->oob_mode = OOB_NOT_CONNECTED;
-	sas_phy->linkrate = SAS_LINK_RATE_UNKNOWN;
-
-	sas_phy->id = phy_id;
-	sas_phy->sas_addr = &mvi->sas_addr[0];
-	sas_phy->frame_rcvd = &phy->frame_rcvd[0];
-	sas_phy->ha = &mvi->sas;
-	sas_phy->lldd_phy = phy;
-}
-
-static struct mvs_info *__devinit mvs_alloc(struct pci_dev *pdev,
-					    const struct pci_device_id *ent)
-{
-	struct mvs_info *mvi;
-	unsigned long res_start, res_len, res_flag;
-	struct asd_sas_phy **arr_phy;
-	struct asd_sas_port **arr_port;
-	const struct mvs_chip_info *chip = &mvs_chips[ent->driver_data];
-	int i;
-
-	/*
-	 * alloc and init our per-HBA mvs_info struct
-	 */
-
-	mvi = kzalloc(sizeof(*mvi), GFP_KERNEL);
-	if (!mvi)
-		return NULL;
-
-	spin_lock_init(&mvi->lock);
-#ifdef MVS_USE_TASKLET
-	tasklet_init(&mvi->tasklet, mvs_tasklet, (unsigned long)mvi);
-#endif
-	mvi->pdev = pdev;
-	mvi->chip = chip;
-
-	if (pdev->device == 0x6440 && pdev->revision == 0)
-		mvi->flags |= MVF_PHY_PWR_FIX;
-
-	/*
-	 * alloc and init SCSI, SAS glue
-	 */
-
-	mvi->shost = scsi_host_alloc(&mvs_sht, sizeof(void *));
-	if (!mvi->shost)
-		goto err_out;
-
-	arr_phy = kcalloc(MVS_MAX_PHYS, sizeof(void *), GFP_KERNEL);
-	arr_port = kcalloc(MVS_MAX_PHYS, sizeof(void *), GFP_KERNEL);
-	if (!arr_phy || !arr_port)
-		goto err_out;
-
-	for (i = 0; i < MVS_MAX_PHYS; i++) {
-		mvs_phy_init(mvi, i);
-		arr_phy[i] = &mvi->phy[i].sas_phy;
-		arr_port[i] = &mvi->port[i].sas_port;
-		mvi->port[i].taskfileset = MVS_ID_NOT_MAPPED;
-		mvi->port[i].wide_port_phymap = 0;
-		mvi->port[i].port_attached = 0;
-		INIT_LIST_HEAD(&mvi->port[i].list);
-	}
-
-	SHOST_TO_SAS_HA(mvi->shost) = &mvi->sas;
-	mvi->shost->transportt = mvs_stt;
-	mvi->shost->max_id = 21;
-	mvi->shost->max_lun = ~0;
-	mvi->shost->max_channel = 0;
-	mvi->shost->max_cmd_len = 16;
-
-	mvi->sas.sas_ha_name = DRV_NAME;
-	mvi->sas.dev = &pdev->dev;
-	mvi->sas.lldd_module = THIS_MODULE;
-	mvi->sas.sas_addr = &mvi->sas_addr[0];
-	mvi->sas.sas_phy = arr_phy;
-	mvi->sas.sas_port = arr_port;
-	mvi->sas.num_phys = chip->n_phy;
-	mvi->sas.lldd_max_execute_num = 1;
-	mvi->sas.lldd_queue_size = MVS_QUEUE_SIZE;
-	mvi->shost->can_queue = MVS_CAN_QUEUE;
-	mvi->shost->cmd_per_lun = MVS_SLOTS / mvi->sas.num_phys;
-	mvi->sas.lldd_ha = mvi;
-	mvi->sas.core.shost = mvi->shost;
-
-	mvs_tag_init(mvi);
-
-	/*
-	 * ioremap main and peripheral registers
-	 */
-
-#ifdef MVS_ENABLE_PERI
-	res_start = pci_resource_start(pdev, 2);
-	res_len = pci_resource_len(pdev, 2);
-	if (!res_start || !res_len)
-		goto err_out;
-
-	mvi->peri_regs = ioremap_nocache(res_start, res_len);
-	if (!mvi->peri_regs)
-		goto err_out;
-#endif
-
-	res_start = pci_resource_start(pdev, 4);
-	res_len = pci_resource_len(pdev, 4);
-	if (!res_start || !res_len)
-		goto err_out;
-
-	res_flag = pci_resource_flags(pdev, 4);
-	if (res_flag & IORESOURCE_CACHEABLE)
-		mvi->regs = ioremap(res_start, res_len);
-	else
-		mvi->regs = ioremap_nocache(res_start, res_len);
-
-	if (!mvi->regs)
-		goto err_out;
-
-	/*
-	 * alloc and init our DMA areas
-	 */
-
-	mvi->tx = dma_alloc_coherent(&pdev->dev,
-				     sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ,
-				     &mvi->tx_dma, GFP_KERNEL);
-	if (!mvi->tx)
-		goto err_out;
-	memset(mvi->tx, 0, sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ);
-
-	mvi->rx_fis = dma_alloc_coherent(&pdev->dev, MVS_RX_FISL_SZ,
-					 &mvi->rx_fis_dma, GFP_KERNEL);
-	if (!mvi->rx_fis)
-		goto err_out;
-	memset(mvi->rx_fis, 0, MVS_RX_FISL_SZ);
-
-	mvi->rx = dma_alloc_coherent(&pdev->dev,
-				     sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1),
-				     &mvi->rx_dma, GFP_KERNEL);
-	if (!mvi->rx)
-		goto err_out;
-	memset(mvi->rx, 0, sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1));
-
-	mvi->rx[0] = cpu_to_le32(0xfff);
-	mvi->rx_cons = 0xfff;
-
-	mvi->slot = dma_alloc_coherent(&pdev->dev,
-				       sizeof(*mvi->slot) * MVS_SLOTS,
-				       &mvi->slot_dma, GFP_KERNEL);
-	if (!mvi->slot)
-		goto err_out;
-	memset(mvi->slot, 0, sizeof(*mvi->slot) * MVS_SLOTS);
-
-	for (i = 0; i < MVS_SLOTS; i++) {
-		struct mvs_slot_info *slot = &mvi->slot_info[i];
-
-		slot->buf = dma_alloc_coherent(&pdev->dev, MVS_SLOT_BUF_SZ,
-					       &slot->buf_dma, GFP_KERNEL);
-		if (!slot->buf)
-			goto err_out;
-		memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
-	}
-
-	/* finally, read NVRAM to get our SAS address */
-	if (mvs_nvram_read(mvi, NVR_SAS_ADDR, &mvi->sas_addr, 8))
-		goto err_out;
-	return mvi;
-
-err_out:
-	mvs_free(mvi);
-	return NULL;
-}
-
-static u32 mvs_cr32(void __iomem *regs, u32 addr)
-{
-	mw32(CMD_ADDR, addr);
-	return mr32(CMD_DATA);
-}
-
-static void mvs_cw32(void __iomem *regs, u32 addr, u32 val)
-{
-	mw32(CMD_ADDR, addr);
-	mw32(CMD_DATA, val);
-}
-
-static u32 mvs_read_phy_ctl(struct mvs_info *mvi, u32 port)
-{
-	void __iomem *regs = mvi->regs;
-	return (port < 4)?mr32(P0_SER_CTLSTAT + port * 4):
-		mr32(P4_SER_CTLSTAT + (port - 4) * 4);
-}
-
-static void mvs_write_phy_ctl(struct mvs_info *mvi, u32 port, u32 val)
-{
-	void __iomem *regs = mvi->regs;
-	if (port < 4)
-		mw32(P0_SER_CTLSTAT + port * 4, val);
-	else
-		mw32(P4_SER_CTLSTAT + (port - 4) * 4, val);
-}
-
-static u32 mvs_read_port(struct mvs_info *mvi, u32 off, u32 off2, u32 port)
-{
-	void __iomem *regs = mvi->regs + off;
-	void __iomem *regs2 = mvi->regs + off2;
-	return (port < 4)?readl(regs + port * 8):
-		readl(regs2 + (port - 4) * 8);
-}
-
-static void mvs_write_port(struct mvs_info *mvi, u32 off, u32 off2,
-				u32 port, u32 val)
-{
-	void __iomem *regs = mvi->regs + off;
-	void __iomem *regs2 = mvi->regs + off2;
-	if (port < 4)
-		writel(val, regs + port * 8);
-	else
-		writel(val, regs2 + (port - 4) * 8);
-}
-
-static u32 mvs_read_port_cfg_data(struct mvs_info *mvi, u32 port)
-{
-	return mvs_read_port(mvi, MVS_P0_CFG_DATA, MVS_P4_CFG_DATA, port);
-}
-
-static void mvs_write_port_cfg_data(struct mvs_info *mvi, u32 port, u32 val)
-{
-	mvs_write_port(mvi, MVS_P0_CFG_DATA, MVS_P4_CFG_DATA, port, val);
-}
-
-static void mvs_write_port_cfg_addr(struct mvs_info *mvi, u32 port, u32 addr)
-{
-	mvs_write_port(mvi, MVS_P0_CFG_ADDR, MVS_P4_CFG_ADDR, port, addr);
-}
-
-static u32 mvs_read_port_vsr_data(struct mvs_info *mvi, u32 port)
-{
-	return mvs_read_port(mvi, MVS_P0_VSR_DATA, MVS_P4_VSR_DATA, port);
-}
-
-static void mvs_write_port_vsr_data(struct mvs_info *mvi, u32 port, u32 val)
-{
-	mvs_write_port(mvi, MVS_P0_VSR_DATA, MVS_P4_VSR_DATA, port, val);
-}
-
-static void mvs_write_port_vsr_addr(struct mvs_info *mvi, u32 port, u32 addr)
-{
-	mvs_write_port(mvi, MVS_P0_VSR_ADDR, MVS_P4_VSR_ADDR, port, addr);
-}
-
-static u32 mvs_read_port_irq_stat(struct mvs_info *mvi, u32 port)
-{
-	return mvs_read_port(mvi, MVS_P0_INT_STAT, MVS_P4_INT_STAT, port);
-}
-
-static void mvs_write_port_irq_stat(struct mvs_info *mvi, u32 port, u32 val)
-{
-	mvs_write_port(mvi, MVS_P0_INT_STAT, MVS_P4_INT_STAT, port, val);
-}
-
-static u32 mvs_read_port_irq_mask(struct mvs_info *mvi, u32 port)
-{
-	return mvs_read_port(mvi, MVS_P0_INT_MASK, MVS_P4_INT_MASK, port);
-}
-
-static void mvs_write_port_irq_mask(struct mvs_info *mvi, u32 port, u32 val)
-{
-	mvs_write_port(mvi, MVS_P0_INT_MASK, MVS_P4_INT_MASK, port, val);
-}
-
-static void __devinit mvs_phy_hacks(struct mvs_info *mvi)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp;
-
-	/* workaround for SATA R-ERR, to ignore phy glitch */
-	tmp = mvs_cr32(regs, CMD_PHY_TIMER);
-	tmp &= ~(1 << 9);
-	tmp |= (1 << 10);
-	mvs_cw32(regs, CMD_PHY_TIMER, tmp);
-
-	/* enable retry 127 times */
-	mvs_cw32(regs, CMD_SAS_CTL1, 0x7f7f);
-
-	/* extend open frame timeout to max */
-	tmp = mvs_cr32(regs, CMD_SAS_CTL0);
-	tmp &= ~0xffff;
-	tmp |= 0x3fff;
-	mvs_cw32(regs, CMD_SAS_CTL0, tmp);
-
-	/* workaround for WDTIMEOUT , set to 550 ms */
-	mvs_cw32(regs, CMD_WD_TIMER, 0x86470);
-
-	/* not to halt for different port op during wideport link change */
-	mvs_cw32(regs, CMD_APP_ERR_CONFIG, 0xffefbf7d);
-
-	/* workaround for Seagate disk not-found OOB sequence, recv
-	 * COMINIT before sending out COMWAKE */
-	tmp = mvs_cr32(regs, CMD_PHY_MODE_21);
-	tmp &= 0x0000ffff;
-	tmp |= 0x00fa0000;
-	mvs_cw32(regs, CMD_PHY_MODE_21, tmp);
-
-	tmp = mvs_cr32(regs, CMD_PHY_TIMER);
-	tmp &= 0x1fffffff;
-	tmp |= (2U << 29);	/* 8 ms retry */
-	mvs_cw32(regs, CMD_PHY_TIMER, tmp);
-
-	/* TEST - for phy decoding error, adjust voltage levels */
-	mw32(P0_VSR_ADDR + 0, 0x8);
-	mw32(P0_VSR_DATA + 0, 0x2F0);
-
-	mw32(P0_VSR_ADDR + 8, 0x8);
-	mw32(P0_VSR_DATA + 8, 0x2F0);
-
-	mw32(P0_VSR_ADDR + 16, 0x8);
-	mw32(P0_VSR_DATA + 16, 0x2F0);
-
-	mw32(P0_VSR_ADDR + 24, 0x8);
-	mw32(P0_VSR_DATA + 24, 0x2F0);
-
-}
-
-static void mvs_enable_xmt(struct mvs_info *mvi, int PhyId)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp;
-
-	tmp = mr32(PCS);
-	if (mvi->chip->n_phy <= 4)
-		tmp |= 1 << (PhyId + PCS_EN_PORT_XMT_SHIFT);
-	else
-		tmp |= 1 << (PhyId + PCS_EN_PORT_XMT_SHIFT2);
-	mw32(PCS, tmp);
-}
-
-static void mvs_detect_porttype(struct mvs_info *mvi, int i)
-{
-	void __iomem *regs = mvi->regs;
-	u32 reg;
-	struct mvs_phy *phy = &mvi->phy[i];
-
-	/* TODO check & save device type */
-	reg = mr32(GBL_PORT_TYPE);
-
-	if (reg & MODE_SAS_SATA & (1 << i))
-		phy->phy_type |= PORT_TYPE_SAS;
-	else
-		phy->phy_type |= PORT_TYPE_SATA;
-}
-
-static void *mvs_get_d2h_reg(struct mvs_info *mvi, int i, void *buf)
-{
-	u32 *s = (u32 *) buf;
-
-	if (!s)
-		return NULL;
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG3);
-	s[3] = mvs_read_port_cfg_data(mvi, i);
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG2);
-	s[2] = mvs_read_port_cfg_data(mvi, i);
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG1);
-	s[1] = mvs_read_port_cfg_data(mvi, i);
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG0);
-	s[0] = mvs_read_port_cfg_data(mvi, i);
-
-	return (void *)s;
-}
-
-static u32 mvs_is_sig_fis_received(u32 irq_status)
-{
-	return irq_status & PHYEV_SIG_FIS;
-}
-
-static void mvs_update_wideport(struct mvs_info *mvi, int i)
-{
-	struct mvs_phy *phy = &mvi->phy[i];
-	struct mvs_port *port = phy->port;
-	int j, no;
-
-	for_each_phy(port->wide_port_phymap, no, j, mvi->chip->n_phy)
-		if (no & 1) {
-			mvs_write_port_cfg_addr(mvi, no, PHYR_WIDE_PORT);
-			mvs_write_port_cfg_data(mvi, no,
-						port->wide_port_phymap);
-		} else {
-			mvs_write_port_cfg_addr(mvi, no, PHYR_WIDE_PORT);
-			mvs_write_port_cfg_data(mvi, no, 0);
-		}
-}
-
-static u32 mvs_is_phy_ready(struct mvs_info *mvi, int i)
-{
-	u32 tmp;
-	struct mvs_phy *phy = &mvi->phy[i];
-	struct mvs_port *port = phy->port;;
-
-	tmp = mvs_read_phy_ctl(mvi, i);
-
-	if ((tmp & PHY_READY_MASK) && !(phy->irq_status & PHYEV_POOF)) {
-		if (!port)
-			phy->phy_attached = 1;
-		return tmp;
-	}
-
-	if (port) {
-		if (phy->phy_type & PORT_TYPE_SAS) {
-			port->wide_port_phymap &= ~(1U << i);
-			if (!port->wide_port_phymap)
-				port->port_attached = 0;
-			mvs_update_wideport(mvi, i);
-		} else if (phy->phy_type & PORT_TYPE_SATA)
-			port->port_attached = 0;
-		mvs_free_reg_set(mvi, phy->port);
-		phy->port = NULL;
-		phy->phy_attached = 0;
-		phy->phy_type &= ~(PORT_TYPE_SAS | PORT_TYPE_SATA);
-	}
-	return 0;
-}
-
-static void mvs_update_phyinfo(struct mvs_info *mvi, int i,
-					int get_st)
-{
-	struct mvs_phy *phy = &mvi->phy[i];
-	struct pci_dev *pdev = mvi->pdev;
-	u32 tmp;
-	u64 tmp64;
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_IDENTIFY);
-	phy->dev_info = mvs_read_port_cfg_data(mvi, i);
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_HI);
-	phy->dev_sas_addr = (u64) mvs_read_port_cfg_data(mvi, i) << 32;
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_LO);
-	phy->dev_sas_addr |= mvs_read_port_cfg_data(mvi, i);
-
-	if (get_st) {
-		phy->irq_status = mvs_read_port_irq_stat(mvi, i);
-		phy->phy_status = mvs_is_phy_ready(mvi, i);
-	}
-
-	if (phy->phy_status) {
-		u32 phy_st;
-		struct asd_sas_phy *sas_phy = mvi->sas.sas_phy[i];
-
-		mvs_write_port_cfg_addr(mvi, i, PHYR_PHY_STAT);
-		phy_st = mvs_read_port_cfg_data(mvi, i);
-
-		sas_phy->linkrate =
-			(phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
-				PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET;
-		phy->minimum_linkrate =
-			(phy->phy_status &
-				PHY_MIN_SPP_PHYS_LINK_RATE_MASK) >> 8;
-		phy->maximum_linkrate =
-			(phy->phy_status &
-				PHY_MAX_SPP_PHYS_LINK_RATE_MASK) >> 12;
-
-		if (phy->phy_type & PORT_TYPE_SAS) {
-			/* Updated attached_sas_addr */
-			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_ADDR_HI);
-			phy->att_dev_sas_addr =
-				(u64) mvs_read_port_cfg_data(mvi, i) << 32;
-			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_ADDR_LO);
-			phy->att_dev_sas_addr |= mvs_read_port_cfg_data(mvi, i);
-			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_DEV_INFO);
-			phy->att_dev_info = mvs_read_port_cfg_data(mvi, i);
-			phy->identify.device_type =
-			    phy->att_dev_info & PORT_DEV_TYPE_MASK;
-
-			if (phy->identify.device_type == SAS_END_DEV)
-				phy->identify.target_port_protocols =
-							SAS_PROTOCOL_SSP;
-			else if (phy->identify.device_type != NO_DEVICE)
-				phy->identify.target_port_protocols =
-							SAS_PROTOCOL_SMP;
-			if (phy_st & PHY_OOB_DTCTD)
-				sas_phy->oob_mode = SAS_OOB_MODE;
-			phy->frame_rcvd_size =
-			    sizeof(struct sas_identify_frame);
-		} else if (phy->phy_type & PORT_TYPE_SATA) {
-			phy->identify.target_port_protocols = SAS_PROTOCOL_STP;
-			if (mvs_is_sig_fis_received(phy->irq_status)) {
-				phy->att_dev_sas_addr = i;	/* temp */
-				if (phy_st & PHY_OOB_DTCTD)
-					sas_phy->oob_mode = SATA_OOB_MODE;
-				phy->frame_rcvd_size =
-				    sizeof(struct dev_to_host_fis);
-				mvs_get_d2h_reg(mvi, i,
-						(void *)sas_phy->frame_rcvd);
-			} else {
-				dev_printk(KERN_DEBUG, &pdev->dev,
-					"No sig fis\n");
-				phy->phy_type &= ~(PORT_TYPE_SATA);
-				goto out_done;
-			}
-		}
-		tmp64 = cpu_to_be64(phy->att_dev_sas_addr);
-		memcpy(sas_phy->attached_sas_addr, &tmp64, SAS_ADDR_SIZE);
-
-		dev_printk(KERN_DEBUG, &pdev->dev,
-			"phy[%d] Get Attached Address 0x%llX ,"
-			" SAS Address 0x%llX\n",
-			i,
-			(unsigned long long)phy->att_dev_sas_addr,
-			(unsigned long long)phy->dev_sas_addr);
-		dev_printk(KERN_DEBUG, &pdev->dev,
-			"Rate = %x , type = %d\n",
-			sas_phy->linkrate, phy->phy_type);
-
-		/* workaround for HW phy decoding error on 1.5g disk drive */
-		mvs_write_port_vsr_addr(mvi, i, VSR_PHY_MODE6);
-		tmp = mvs_read_port_vsr_data(mvi, i);
-		if (((phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
-		     PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET) ==
-			SAS_LINK_RATE_1_5_GBPS)
-			tmp &= ~PHY_MODE6_LATECLK;
-		else
-			tmp |= PHY_MODE6_LATECLK;
-		mvs_write_port_vsr_data(mvi, i, tmp);
-
-	}
-out_done:
-	if (get_st)
-		mvs_write_port_irq_stat(mvi, i, phy->irq_status);
-}
-
-static void mvs_port_formed(struct asd_sas_phy *sas_phy)
-{
-	struct sas_ha_struct *sas_ha = sas_phy->ha;
-	struct mvs_info *mvi = sas_ha->lldd_ha;
-	struct asd_sas_port *sas_port = sas_phy->port;
-	struct mvs_phy *phy = sas_phy->lldd_phy;
-	struct mvs_port *port = &mvi->port[sas_port->id];
-	unsigned long flags;
-
-	spin_lock_irqsave(&mvi->lock, flags);
-	port->port_attached = 1;
-	phy->port = port;
-	port->taskfileset = MVS_ID_NOT_MAPPED;
-	if (phy->phy_type & PORT_TYPE_SAS) {
-		port->wide_port_phymap = sas_port->phy_mask;
-		mvs_update_wideport(mvi, sas_phy->id);
-	}
-	spin_unlock_irqrestore(&mvi->lock, flags);
-}
-
-static int mvs_I_T_nexus_reset(struct domain_device *dev)
-{
-	return TMF_RESP_FUNC_FAILED;
-}
-
-static int __devinit mvs_hw_init(struct mvs_info *mvi)
-{
-	void __iomem *regs = mvi->regs;
-	int i;
-	u32 tmp, cctl;
-
-	/* make sure interrupts are masked immediately (paranoia) */
-	mw32(GBL_CTL, 0);
-	tmp = mr32(GBL_CTL);
-
-	/* Reset Controller */
-	if (!(tmp & HBA_RST)) {
-		if (mvi->flags & MVF_PHY_PWR_FIX) {
-			pci_read_config_dword(mvi->pdev, PCR_PHY_CTL, &tmp);
-			tmp &= ~PCTL_PWR_ON;
-			tmp |= PCTL_OFF;
-			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL, tmp);
-
-			pci_read_config_dword(mvi->pdev, PCR_PHY_CTL2, &tmp);
-			tmp &= ~PCTL_PWR_ON;
-			tmp |= PCTL_OFF;
-			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL2, tmp);
-		}
-
-		/* global reset, incl. COMRESET/H_RESET_N (self-clearing) */
-		mw32_f(GBL_CTL, HBA_RST);
-	}
-
-	/* wait for reset to finish; timeout is just a guess */
-	i = 1000;
-	while (i-- > 0) {
-		msleep(10);
-
-		if (!(mr32(GBL_CTL) & HBA_RST))
-			break;
-	}
-	if (mr32(GBL_CTL) & HBA_RST) {
-		dev_printk(KERN_ERR, &mvi->pdev->dev, "HBA reset failed\n");
-		return -EBUSY;
-	}
-
-	/* Init Chip */
-	/* make sure RST is set; HBA_RST /should/ have done that for us */
-	cctl = mr32(CTL);
-	if (cctl & CCTL_RST)
-		cctl &= ~CCTL_RST;
-	else
-		mw32_f(CTL, cctl | CCTL_RST);
-
-	/* write to device control _AND_ device status register? - A.C. */
-	pci_read_config_dword(mvi->pdev, PCR_DEV_CTRL, &tmp);
-	tmp &= ~PRD_REQ_MASK;
-	tmp |= PRD_REQ_SIZE;
-	pci_write_config_dword(mvi->pdev, PCR_DEV_CTRL, tmp);
-
-	pci_read_config_dword(mvi->pdev, PCR_PHY_CTL, &tmp);
-	tmp |= PCTL_PWR_ON;
-	tmp &= ~PCTL_OFF;
-	pci_write_config_dword(mvi->pdev, PCR_PHY_CTL, tmp);
-
-	pci_read_config_dword(mvi->pdev, PCR_PHY_CTL2, &tmp);
-	tmp |= PCTL_PWR_ON;
-	tmp &= ~PCTL_OFF;
-	pci_write_config_dword(mvi->pdev, PCR_PHY_CTL2, tmp);
-
-	mw32_f(CTL, cctl);
-
-	/* reset control */
-	mw32(PCS, 0);		/*MVS_PCS */
-
-	mvs_phy_hacks(mvi);
-
-	mw32(CMD_LIST_LO, mvi->slot_dma);
-	mw32(CMD_LIST_HI, (mvi->slot_dma >> 16) >> 16);
-
-	mw32(RX_FIS_LO, mvi->rx_fis_dma);
-	mw32(RX_FIS_HI, (mvi->rx_fis_dma >> 16) >> 16);
-
-	mw32(TX_CFG, MVS_CHIP_SLOT_SZ);
-	mw32(TX_LO, mvi->tx_dma);
-	mw32(TX_HI, (mvi->tx_dma >> 16) >> 16);
-
-	mw32(RX_CFG, MVS_RX_RING_SZ);
-	mw32(RX_LO, mvi->rx_dma);
-	mw32(RX_HI, (mvi->rx_dma >> 16) >> 16);
-
-	/* enable auto port detection */
-	mw32(GBL_PORT_TYPE, MODE_AUTO_DET_EN);
-	msleep(1100);
-	/* init and reset phys */
-	for (i = 0; i < mvi->chip->n_phy; i++) {
-		u32 lo = be32_to_cpu(*(u32 *)&mvi->sas_addr[4]);
-		u32 hi = be32_to_cpu(*(u32 *)&mvi->sas_addr[0]);
-
-		mvs_detect_porttype(mvi, i);
-
-		/* set phy local SAS address */
-		mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_LO);
-		mvs_write_port_cfg_data(mvi, i, lo);
-		mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_HI);
-		mvs_write_port_cfg_data(mvi, i, hi);
-
-		/* reset phy */
-		tmp = mvs_read_phy_ctl(mvi, i);
-		tmp |= PHY_RST;
-		mvs_write_phy_ctl(mvi, i, tmp);
-	}
-
-	msleep(100);
-
-	for (i = 0; i < mvi->chip->n_phy; i++) {
-		/* clear phy int status */
-		tmp = mvs_read_port_irq_stat(mvi, i);
-		tmp &= ~PHYEV_SIG_FIS;
-		mvs_write_port_irq_stat(mvi, i, tmp);
-
-		/* set phy int mask */
-		tmp = PHYEV_RDY_CH | PHYEV_BROAD_CH | PHYEV_UNASSOC_FIS |
-			PHYEV_ID_DONE | PHYEV_DEC_ERR;
-		mvs_write_port_irq_mask(mvi, i, tmp);
-
-		msleep(100);
-		mvs_update_phyinfo(mvi, i, 1);
-		mvs_enable_xmt(mvi, i);
-	}
-
-	/* FIXME: update wide port bitmaps */
-
-	/* little endian for open address and command table, etc. */
-	/* A.C.
-	 * it seems that ( from the spec ) turning on big-endian won't
-	 * do us any good on big-endian machines, need further confirmation
-	 */
-	cctl = mr32(CTL);
-	cctl |= CCTL_ENDIAN_CMD;
-	cctl |= CCTL_ENDIAN_DATA;
-	cctl &= ~CCTL_ENDIAN_OPEN;
-	cctl |= CCTL_ENDIAN_RSP;
-	mw32_f(CTL, cctl);
-
-	/* reset CMD queue */
-	tmp = mr32(PCS);
-	tmp |= PCS_CMD_RST;
-	mw32(PCS, tmp);
-	/* interrupt coalescing may cause missing HW interrput in some case,
-	 * and the max count is 0x1ff, while our max slot is 0x200,
-	 * it will make count 0.
-	 */
-	tmp = 0;
-	mw32(INT_COAL, tmp);
-
-	tmp = 0x100;
-	mw32(INT_COAL_TMOUT, tmp);
-
-	/* ladies and gentlemen, start your engines */
-	mw32(TX_CFG, 0);
-	mw32(TX_CFG, MVS_CHIP_SLOT_SZ | TX_EN);
-	mw32(RX_CFG, MVS_RX_RING_SZ | RX_EN);
-	/* enable CMD/CMPL_Q/RESP mode */
-	mw32(PCS, PCS_SATA_RETRY | PCS_FIS_RX_EN | PCS_CMD_EN);
-
-	/* enable completion queue interrupt */
-	tmp = (CINT_PORT_MASK | CINT_DONE | CINT_MEM | CINT_SRS);
-	mw32(INT_MASK, tmp);
-
-	/* Enable SRS interrupt */
-	mw32(INT_MASK_SRS, 0xFF);
-	return 0;
-}
-
-static void __devinit mvs_print_info(struct mvs_info *mvi)
-{
-	struct pci_dev *pdev = mvi->pdev;
-	static int printed_version;
-
-	if (!printed_version++)
-		dev_printk(KERN_INFO, &pdev->dev, "version " DRV_VERSION "\n");
-
-	dev_printk(KERN_INFO, &pdev->dev, "%u phys, addr %llx\n",
-		   mvi->chip->n_phy, SAS_ADDR(mvi->sas_addr));
-}
-
-static int __devinit mvs_pci_init(struct pci_dev *pdev,
-				  const struct pci_device_id *ent)
-{
-	int rc;
-	struct mvs_info *mvi;
-	irq_handler_t irq_handler = mvs_interrupt;
-
-	rc = pci_enable_device(pdev);
-	if (rc)
-		return rc;
-
-	pci_set_master(pdev);
-
-	rc = pci_request_regions(pdev, DRV_NAME);
-	if (rc)
-		goto err_out_disable;
-
-	rc = pci_go_64(pdev);
-	if (rc)
-		goto err_out_regions;
-
-	mvi = mvs_alloc(pdev, ent);
-	if (!mvi) {
-		rc = -ENOMEM;
-		goto err_out_regions;
-	}
-
-	rc = mvs_hw_init(mvi);
-	if (rc)
-		goto err_out_mvi;
-
-#ifndef MVS_DISABLE_MSI
-	if (!pci_enable_msi(pdev)) {
-		u32 tmp;
-		void __iomem *regs = mvi->regs;
-		mvi->flags |= MVF_MSI;
-		irq_handler = mvs_msi_interrupt;
-		tmp = mr32(PCS);
-		mw32(PCS, tmp | PCS_SELF_CLEAR);
-	}
-#endif
-
-	rc = request_irq(pdev->irq, irq_handler, IRQF_SHARED, DRV_NAME, mvi);
-	if (rc)
-		goto err_out_msi;
-
-	rc = scsi_add_host(mvi->shost, &pdev->dev);
-	if (rc)
-		goto err_out_irq;
-
-	rc = sas_register_ha(&mvi->sas);
-	if (rc)
-		goto err_out_shost;
-
-	pci_set_drvdata(pdev, mvi);
-
-	mvs_print_info(mvi);
-
-	mvs_hba_interrupt_enable(mvi);
-
-	scsi_scan_host(mvi->shost);
-
-	return 0;
-
-err_out_shost:
-	scsi_remove_host(mvi->shost);
-err_out_irq:
-	free_irq(pdev->irq, mvi);
-err_out_msi:
-	if (mvi->flags |= MVF_MSI)
-		pci_disable_msi(pdev);
-err_out_mvi:
-	mvs_free(mvi);
-err_out_regions:
-	pci_release_regions(pdev);
-err_out_disable:
-	pci_disable_device(pdev);
-	return rc;
-}
-
-static void __devexit mvs_pci_remove(struct pci_dev *pdev)
-{
-	struct mvs_info *mvi = pci_get_drvdata(pdev);
-
-	pci_set_drvdata(pdev, NULL);
-
-	if (mvi) {
-		sas_unregister_ha(&mvi->sas);
-		mvs_hba_interrupt_disable(mvi);
-		sas_remove_host(mvi->shost);
-		scsi_remove_host(mvi->shost);
-
-		free_irq(pdev->irq, mvi);
-		if (mvi->flags & MVF_MSI)
-			pci_disable_msi(pdev);
-		mvs_free(mvi);
-		pci_release_regions(pdev);
-	}
-	pci_disable_device(pdev);
-}
-
-static struct sas_domain_function_template mvs_transport_ops = {
-	.lldd_execute_task	= mvs_task_exec,
-	.lldd_control_phy	= mvs_phy_control,
-	.lldd_abort_task	= mvs_task_abort,
-	.lldd_port_formed	= mvs_port_formed,
-	.lldd_I_T_nexus_reset	= mvs_I_T_nexus_reset,
-};
-
-static struct pci_device_id __devinitdata mvs_pci_table[] = {
-	{ PCI_VDEVICE(MARVELL, 0x6320), chip_6320 },
-	{ PCI_VDEVICE(MARVELL, 0x6340), chip_6440 },
-	{
-		.vendor 	= PCI_VENDOR_ID_MARVELL,
-		.device 	= 0x6440,
-		.subvendor	= PCI_ANY_ID,
-		.subdevice	= 0x6480,
-		.class		= 0,
-		.class_mask	= 0,
-		.driver_data	= chip_6480,
-	},
-	{ PCI_VDEVICE(MARVELL, 0x6440), chip_6440 },
-	{ PCI_VDEVICE(MARVELL, 0x6480), chip_6480 },
-
-	{ }	/* terminate list */
-};
-
-static struct pci_driver mvs_pci_driver = {
-	.name		= DRV_NAME,
-	.id_table	= mvs_pci_table,
-	.probe		= mvs_pci_init,
-	.remove		= __devexit_p(mvs_pci_remove),
-};
-
-static int __init mvs_init(void)
-{
-	int rc;
-
-	mvs_stt = sas_domain_attach_transport(&mvs_transport_ops);
-	if (!mvs_stt)
-		return -ENOMEM;
-
-	rc = pci_register_driver(&mvs_pci_driver);
-	if (rc)
-		goto err_out;
-
-	return 0;
-
-err_out:
-	sas_release_transport(mvs_stt);
-	return rc;
-}
-
-static void __exit mvs_exit(void)
-{
-	pci_unregister_driver(&mvs_pci_driver);
-	sas_release_transport(mvs_stt);
-}
-
-module_init(mvs_init);
-module_exit(mvs_exit);
-
-MODULE_AUTHOR("Jeff Garzik <jgarzik@pobox.com>");
-MODULE_DESCRIPTION("Marvell 88SE6440 SAS/SATA controller driver");
-MODULE_VERSION(DRV_VERSION);
-MODULE_LICENSE("GPL");
-MODULE_DEVICE_TABLE(pci, mvs_pci_table);
diff --git a/drivers/scsi/mvsas/Kconfig b/drivers/scsi/mvsas/Kconfig
new file mode 100644
index 0000000..6de7af2
--- /dev/null
+++ b/drivers/scsi/mvsas/Kconfig
@@ -0,0 +1,42 @@
+#
+# Kernel configuration file for 88SE64XX/88SE94XX SAS/SATA driver.
+#
+# Copyright 2007 Red Hat, Inc.
+# Copyright 2008 Marvell. <kewei@marvell.com>
+#
+# This file is licensed under GPLv2.
+#
+# This file is part of the 88SE64XX/88SE94XX driver.
+#
+# The 88SE64XX/88SE94XX driver is free software; you can redistribute
+# it and/or modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2 of the
+# License.
+#
+# The 88SE64XX/88SE94XX driver is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with 88SE64XX/88SE94XX Driver; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#
+#
+
+config SCSI_MVSAS
+	tristate "Marvell 88SE64XX/88SE94XX SAS/SATA support"
+	depends on PCI
+	select SCSI_SAS_LIBSAS
+	select FW_LOADER
+	help
+		This driver supports Marvell's SAS/SATA 3Gb/s PCI-E 88SE64XX and 6Gb/s
+		PCI-E 88SE94XX chip based host adapters.
+
+config SCSI_MVSAS_DEBUG
+	bool "Compile in debug mode"
+	default y
+	depends on SCSI_MVSAS
+	help
+		Compiles the 88SE64XX/88SE94XX driver in debug mode.  In debug mode,
+		the driver prints some messages to the console.
diff --git a/drivers/scsi/mvsas/Makefile b/drivers/scsi/mvsas/Makefile
new file mode 100644
index 0000000..52ac426
--- /dev/null
+++ b/drivers/scsi/mvsas/Makefile
@@ -0,0 +1,32 @@
+#
+# Makefile for Marvell 88SE64xx/88SE84xx SAS/SATA driver.
+#
+# Copyright 2007 Red Hat, Inc.
+# Copyright 2008 Marvell. <kewei@marvell.com>
+#
+# This file is licensed under GPLv2.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; version 2 of the
+# License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+# USA
+
+ifeq ($(CONFIG_SCSI_MVSAS_DEBUG),y)
+	EXTRA_CFLAGS += -DMV_DEBUG
+endif
+
+obj-$(CONFIG_SCSI_MVSAS) += mvsas.o
+mvsas-y +=  mv_init.o  \
+           mv_sas.o   \
+           mv_64xx.o  \
+           mv_94xx.o
diff --git a/drivers/scsi/mvsas/mv_64xx.c b/drivers/scsi/mvsas/mv_64xx.c
new file mode 100644
index 0000000..10a5077
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_64xx.c
@@ -0,0 +1,793 @@
+/*
+ * Marvell 88SE64xx hardware specific
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
+
+#include "mv_sas.h"
+#include "mv_64xx.h"
+#include "mv_chips.h"
+
+static void mvs_64xx_detect_porttype(struct mvs_info *mvi, int i)
+{
+	void __iomem *regs = mvi->regs;
+	u32 reg;
+	struct mvs_phy *phy = &mvi->phy[i];
+
+	/* TODO check & save device type */
+	reg = mr32(MVS_GBL_PORT_TYPE);
+	phy->phy_type &= ~(PORT_TYPE_SAS | PORT_TYPE_SATA);
+	if (reg & MODE_SAS_SATA & (1 << i))
+		phy->phy_type |= PORT_TYPE_SAS;
+	else
+		phy->phy_type |= PORT_TYPE_SATA;
+}
+
+static void __devinit mvs_64xx_enable_xmt(struct mvs_info *mvi, int phy_id)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	tmp = mr32(MVS_PCS);
+	if (mvi->chip->n_phy <= 4)
+		tmp |= 1 << (phy_id + PCS_EN_PORT_XMT_SHIFT);
+	else
+		tmp |= 1 << (phy_id + PCS_EN_PORT_XMT_SHIFT2);
+	mw32(MVS_PCS, tmp);
+}
+
+static void __devinit mvs_64xx_phy_hacks(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+
+	mvs_phy_hacks(mvi);
+
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		/* TEST - for phy decoding error, adjust voltage levels */
+		mw32(MVS_P0_VSR_ADDR + 0, 0x8);
+		mw32(MVS_P0_VSR_DATA + 0, 0x2F0);
+
+		mw32(MVS_P0_VSR_ADDR + 8, 0x8);
+		mw32(MVS_P0_VSR_DATA + 8, 0x2F0);
+
+		mw32(MVS_P0_VSR_ADDR + 16, 0x8);
+		mw32(MVS_P0_VSR_DATA + 16, 0x2F0);
+
+		mw32(MVS_P0_VSR_ADDR + 24, 0x8);
+		mw32(MVS_P0_VSR_DATA + 24, 0x2F0);
+	} else {
+		int i;
+		/* disable auto port detection */
+		mw32(MVS_GBL_PORT_TYPE, 0);
+		for (i = 0; i < mvi->chip->n_phy; i++) {
+			mvs_write_port_vsr_addr(mvi, i, VSR_PHY_MODE7);
+			mvs_write_port_vsr_data(mvi, i, 0x90000000);
+			mvs_write_port_vsr_addr(mvi, i, VSR_PHY_MODE9);
+			mvs_write_port_vsr_data(mvi, i, 0x50f2);
+			mvs_write_port_vsr_addr(mvi, i, VSR_PHY_MODE11);
+			mvs_write_port_vsr_data(mvi, i, 0x0e);
+		}
+	}
+}
+
+static void mvs_64xx_stp_reset(struct mvs_info *mvi, u32 phy_id)
+{
+	void __iomem *regs = mvi->regs;
+	u32 reg, tmp;
+
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		if (phy_id < 4)
+			pci_read_config_dword(mvi->pdev, PCR_PHY_CTL, &reg);
+		else
+			pci_read_config_dword(mvi->pdev, PCR_PHY_CTL2, &reg);
+
+	} else
+		reg = mr32(MVS_PHY_CTL);
+
+	tmp = reg;
+	if (phy_id < 4)
+		tmp |= (1U << phy_id) << PCTL_LINK_OFFS;
+	else
+		tmp |= (1U << (phy_id - 4)) << PCTL_LINK_OFFS;
+
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		if (phy_id < 4) {
+			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL, tmp);
+			mdelay(10);
+			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL, reg);
+		} else {
+			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL2, tmp);
+			mdelay(10);
+			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL2, reg);
+		}
+	} else {
+		mw32(MVS_PHY_CTL, tmp);
+		mdelay(10);
+		mw32(MVS_PHY_CTL, reg);
+	}
+}
+
+static void mvs_64xx_phy_reset(struct mvs_info *mvi, u32 phy_id, int hard)
+{
+	u32 tmp;
+	tmp = mvs_read_port_irq_stat(mvi, phy_id);
+	tmp &= ~PHYEV_RDY_CH;
+	mvs_write_port_irq_stat(mvi, phy_id, tmp);
+	tmp = mvs_read_phy_ctl(mvi, phy_id);
+	if (hard)
+		tmp |= PHY_RST_HARD;
+	else
+		tmp |= PHY_RST;
+	mvs_write_phy_ctl(mvi, phy_id, tmp);
+	if (hard) {
+		do {
+			tmp = mvs_read_phy_ctl(mvi, phy_id);
+		} while (tmp & PHY_RST_HARD);
+	}
+}
+
+static int __devinit mvs_64xx_chip_reset(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+	int i;
+
+	/* make sure interrupts are masked immediately (paranoia) */
+	mw32(MVS_GBL_CTL, 0);
+	tmp = mr32(MVS_GBL_CTL);
+
+	/* Reset Controller */
+	if (!(tmp & HBA_RST)) {
+		if (mvi->flags & MVF_PHY_PWR_FIX) {
+			pci_read_config_dword(mvi->pdev, PCR_PHY_CTL, &tmp);
+			tmp &= ~PCTL_PWR_OFF;
+			tmp |= PCTL_PHY_DSBL;
+			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL, tmp);
+
+			pci_read_config_dword(mvi->pdev, PCR_PHY_CTL2, &tmp);
+			tmp &= ~PCTL_PWR_OFF;
+			tmp |= PCTL_PHY_DSBL;
+			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL2, tmp);
+		}
+	}
+
+	/* make sure interrupts are masked immediately (paranoia) */
+	mw32(MVS_GBL_CTL, 0);
+	tmp = mr32(MVS_GBL_CTL);
+
+	/* Reset Controller */
+	if (!(tmp & HBA_RST)) {
+		/* global reset, incl. COMRESET/H_RESET_N (self-clearing) */
+		mw32_f(MVS_GBL_CTL, HBA_RST);
+	}
+
+	/* wait for reset to finish; timeout is just a guess */
+	i = 1000;
+	while (i-- > 0) {
+		msleep(10);
+
+		if (!(mr32(MVS_GBL_CTL) & HBA_RST))
+			break;
+	}
+	if (mr32(MVS_GBL_CTL) & HBA_RST) {
+		dev_printk(KERN_ERR, mvi->dev, "HBA reset failed\n");
+		return -EBUSY;
+	}
+	return 0;
+}
+
+static void mvs_64xx_phy_disable(struct mvs_info *mvi, u32 phy_id)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		u32 offs;
+		if (phy_id < 4)
+			offs = PCR_PHY_CTL;
+		else {
+			offs = PCR_PHY_CTL2;
+			phy_id -= 4;
+		}
+		pci_read_config_dword(mvi->pdev, offs, &tmp);
+		tmp |= 1U << (PCTL_PHY_DSBL_OFFS + phy_id);
+		pci_write_config_dword(mvi->pdev, offs, tmp);
+	} else {
+		tmp = mr32(MVS_PHY_CTL);
+		tmp |= 1U << (PCTL_PHY_DSBL_OFFS + phy_id);
+		mw32(MVS_PHY_CTL, tmp);
+	}
+}
+
+static void mvs_64xx_phy_enable(struct mvs_info *mvi, u32 phy_id)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		u32 offs;
+		if (phy_id < 4)
+			offs = PCR_PHY_CTL;
+		else {
+			offs = PCR_PHY_CTL2;
+			phy_id -= 4;
+		}
+		pci_read_config_dword(mvi->pdev, offs, &tmp);
+		tmp &= ~(1U << (PCTL_PHY_DSBL_OFFS + phy_id));
+		pci_write_config_dword(mvi->pdev, offs, tmp);
+	} else {
+		tmp = mr32(MVS_PHY_CTL);
+		tmp &= ~(1U << (PCTL_PHY_DSBL_OFFS + phy_id));
+		mw32(MVS_PHY_CTL, tmp);
+	}
+}
+
+static int __devinit mvs_64xx_init(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	int i;
+	u32 tmp, cctl;
+
+	if (mvi->pdev && mvi->pdev->revision == 0)
+		mvi->flags |= MVF_PHY_PWR_FIX;
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		mvs_show_pcie_usage(mvi);
+		tmp = mvs_64xx_chip_reset(mvi);
+		if (tmp)
+			return tmp;
+	} else {
+		tmp = mr32(MVS_PHY_CTL);
+		tmp &= ~PCTL_PWR_OFF;
+		tmp |= PCTL_PHY_DSBL;
+		mw32(MVS_PHY_CTL, tmp);
+	}
+
+	/* Init Chip */
+	/* make sure RST is set; HBA_RST /should/ have done that for us */
+	cctl = mr32(MVS_CTL) & 0xFFFF;
+	if (cctl & CCTL_RST)
+		cctl &= ~CCTL_RST;
+	else
+		mw32_f(MVS_CTL, cctl | CCTL_RST);
+
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		/* write to device control _AND_ device status register */
+		pci_read_config_dword(mvi->pdev, PCR_DEV_CTRL, &tmp);
+		tmp &= ~PRD_REQ_MASK;
+		tmp |= PRD_REQ_SIZE;
+		pci_write_config_dword(mvi->pdev, PCR_DEV_CTRL, tmp);
+
+		pci_read_config_dword(mvi->pdev, PCR_PHY_CTL, &tmp);
+		tmp &= ~PCTL_PWR_OFF;
+		tmp &= ~PCTL_PHY_DSBL;
+		pci_write_config_dword(mvi->pdev, PCR_PHY_CTL, tmp);
+
+		pci_read_config_dword(mvi->pdev, PCR_PHY_CTL2, &tmp);
+		tmp &= PCTL_PWR_OFF;
+		tmp &= ~PCTL_PHY_DSBL;
+		pci_write_config_dword(mvi->pdev, PCR_PHY_CTL2, tmp);
+	} else {
+		tmp = mr32(MVS_PHY_CTL);
+		tmp &= ~PCTL_PWR_OFF;
+		tmp |= PCTL_COM_ON;
+		tmp &= ~PCTL_PHY_DSBL;
+		tmp |= PCTL_LINK_RST;
+		mw32(MVS_PHY_CTL, tmp);
+		msleep(100);
+		tmp &= ~PCTL_LINK_RST;
+		mw32(MVS_PHY_CTL, tmp);
+		msleep(100);
+	}
+
+	/* reset control */
+	mw32(MVS_PCS, 0);		/* MVS_PCS */
+	/* init phys */
+	mvs_64xx_phy_hacks(mvi);
+
+	/* enable auto port detection */
+	mw32(MVS_GBL_PORT_TYPE, MODE_AUTO_DET_EN);
+
+	mw32(MVS_CMD_LIST_LO, mvi->slot_dma);
+	mw32(MVS_CMD_LIST_HI, (mvi->slot_dma >> 16) >> 16);
+
+	mw32(MVS_RX_FIS_LO, mvi->rx_fis_dma);
+	mw32(MVS_RX_FIS_HI, (mvi->rx_fis_dma >> 16) >> 16);
+
+	mw32(MVS_TX_CFG, MVS_CHIP_SLOT_SZ);
+	mw32(MVS_TX_LO, mvi->tx_dma);
+	mw32(MVS_TX_HI, (mvi->tx_dma >> 16) >> 16);
+
+	mw32(MVS_RX_CFG, MVS_RX_RING_SZ);
+	mw32(MVS_RX_LO, mvi->rx_dma);
+	mw32(MVS_RX_HI, (mvi->rx_dma >> 16) >> 16);
+
+	for (i = 0; i < mvi->chip->n_phy; i++) {
+		/* set phy local SAS address */
+		/* should set little endian SAS address to 64xx chip */
+		mvs_set_sas_addr(mvi, i, PHYR_ADDR_LO, PHYR_ADDR_HI,
+				cpu_to_be64(mvi->phy[i].dev_sas_addr));
+
+		mvs_64xx_enable_xmt(mvi, i);
+
+		mvs_64xx_phy_reset(mvi, i, 1);
+		msleep(500);
+		mvs_64xx_detect_porttype(mvi, i);
+	}
+	if (mvi->flags & MVF_FLAG_SOC) {
+		/* set select registers */
+		writel(0x0E008000, regs + 0x000);
+		writel(0x59000008, regs + 0x004);
+		writel(0x20, regs + 0x008);
+		writel(0x20, regs + 0x00c);
+		writel(0x20, regs + 0x010);
+		writel(0x20, regs + 0x014);
+		writel(0x20, regs + 0x018);
+		writel(0x20, regs + 0x01c);
+	}
+	for (i = 0; i < mvi->chip->n_phy; i++) {
+		/* clear phy int status */
+		tmp = mvs_read_port_irq_stat(mvi, i);
+		tmp &= ~PHYEV_SIG_FIS;
+		mvs_write_port_irq_stat(mvi, i, tmp);
+
+		/* set phy int mask */
+		tmp = PHYEV_RDY_CH | PHYEV_BROAD_CH | PHYEV_UNASSOC_FIS |
+			PHYEV_ID_DONE | PHYEV_DCDR_ERR | PHYEV_CRC_ERR |
+			PHYEV_DEC_ERR;
+		mvs_write_port_irq_mask(mvi, i, tmp);
+
+		msleep(100);
+		mvs_update_phyinfo(mvi, i, 1);
+	}
+
+	/* FIXME: update wide port bitmaps */
+
+	/* little endian for open address and command table, etc. */
+	/*
+	 * it seems that ( from the spec ) turning on big-endian won't
+	 * do us any good on big-endian machines, need further confirmation
+	 */
+	cctl = mr32(MVS_CTL);
+	cctl |= CCTL_ENDIAN_CMD;
+	cctl |= CCTL_ENDIAN_DATA;
+	cctl &= ~CCTL_ENDIAN_OPEN;
+	cctl |= CCTL_ENDIAN_RSP;
+	mw32_f(MVS_CTL, cctl);
+
+	/* reset CMD queue */
+	tmp = mr32(MVS_PCS);
+	tmp |= PCS_CMD_RST;
+	mw32(MVS_PCS, tmp);
+	/* interrupt coalescing may cause missing HW interrput in some case,
+	 * and the max count is 0x1ff, while our max slot is 0x200,
+	 * it will make count 0.
+	 */
+	tmp = 0;
+	mw32(MVS_INT_COAL, tmp);
+
+	tmp = 0x100;
+	mw32(MVS_INT_COAL_TMOUT, tmp);
+
+	/* ladies and gentlemen, start your engines */
+	mw32(MVS_TX_CFG, 0);
+	mw32(MVS_TX_CFG, MVS_CHIP_SLOT_SZ | TX_EN);
+	mw32(MVS_RX_CFG, MVS_RX_RING_SZ | RX_EN);
+	/* enable CMD/CMPL_Q/RESP mode */
+	mw32(MVS_PCS, PCS_SATA_RETRY | PCS_FIS_RX_EN |
+		PCS_CMD_EN | PCS_CMD_STOP_ERR);
+
+	/* enable completion queue interrupt */
+	tmp = (CINT_PORT_MASK | CINT_DONE | CINT_MEM | CINT_SRS | CINT_CI_STOP |
+		CINT_DMA_PCIE);
+
+	mw32(MVS_INT_MASK, tmp);
+
+	/* Enable SRS interrupt */
+	mw32(MVS_INT_MASK_SRS_0, 0xFFFF);
+
+	return 0;
+}
+
+static int mvs_64xx_ioremap(struct mvs_info *mvi)
+{
+	if (!mvs_ioremap(mvi, 4, 2))
+		return 0;
+	return -1;
+}
+
+static void mvs_64xx_iounmap(struct mvs_info *mvi)
+{
+	mvs_iounmap(mvi->regs);
+	mvs_iounmap(mvi->regs_ex);
+}
+
+static void mvs_64xx_interrupt_enable(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	tmp = mr32(MVS_GBL_CTL);
+	mw32(MVS_GBL_CTL, tmp | INT_EN);
+}
+
+static void mvs_64xx_interrupt_disable(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	tmp = mr32(MVS_GBL_CTL);
+	mw32(MVS_GBL_CTL, tmp & ~INT_EN);
+}
+
+static u32 mvs_64xx_isr_status(struct mvs_info *mvi, int irq)
+{
+	void __iomem *regs = mvi->regs;
+	u32 stat;
+
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		stat = mr32(MVS_GBL_INT_STAT);
+
+		if (stat == 0 || stat == 0xffffffff)
+			return 0;
+	} else
+		stat = 1;
+	return stat;
+}
+
+static irqreturn_t mvs_64xx_isr(struct mvs_info *mvi, int irq, u32 stat)
+{
+	void __iomem *regs = mvi->regs;
+
+	/* clear CMD_CMPLT ASAP */
+	mw32_f(MVS_INT_STAT, CINT_DONE);
+#ifndef MVS_USE_TASKLET
+	spin_lock(&mvi->lock);
+#endif
+	mvs_int_full(mvi);
+#ifndef MVS_USE_TASKLET
+	spin_unlock(&mvi->lock);
+#endif
+	return IRQ_HANDLED;
+}
+
+static void mvs_64xx_command_active(struct mvs_info *mvi, u32 slot_idx)
+{
+	u32 tmp;
+	mvs_cw32(mvi, 0x40 + (slot_idx >> 3), 1 << (slot_idx % 32));
+	mvs_cw32(mvi, 0x00 + (slot_idx >> 3), 1 << (slot_idx % 32));
+	do {
+		tmp = mvs_cr32(mvi, 0x00 + (slot_idx >> 3));
+	} while (tmp & 1 << (slot_idx % 32));
+	do {
+		tmp = mvs_cr32(mvi, 0x40 + (slot_idx >> 3));
+	} while (tmp & 1 << (slot_idx % 32));
+}
+
+static void mvs_64xx_issue_stop(struct mvs_info *mvi, enum mvs_port_type type,
+				u32 tfs)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	if (type == PORT_TYPE_SATA) {
+		tmp = mr32(MVS_INT_STAT_SRS_0) | (1U << tfs);
+		mw32(MVS_INT_STAT_SRS_0, tmp);
+	}
+	mw32(MVS_INT_STAT, CINT_CI_STOP);
+	tmp = mr32(MVS_PCS) | 0xFF00;
+	mw32(MVS_PCS, tmp);
+}
+
+static void mvs_64xx_free_reg_set(struct mvs_info *mvi, u8 *tfs)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp, offs;
+
+	if (*tfs == MVS_ID_NOT_MAPPED)
+		return;
+
+	offs = 1U << ((*tfs & 0x0f) + PCS_EN_SATA_REG_SHIFT);
+	if (*tfs < 16) {
+		tmp = mr32(MVS_PCS);
+		mw32(MVS_PCS, tmp & ~offs);
+	} else {
+		tmp = mr32(MVS_CTL);
+		mw32(MVS_CTL, tmp & ~offs);
+	}
+
+	tmp = mr32(MVS_INT_STAT_SRS_0) & (1U << *tfs);
+	if (tmp)
+		mw32(MVS_INT_STAT_SRS_0, tmp);
+
+	*tfs = MVS_ID_NOT_MAPPED;
+	return;
+}
+
+static u8 mvs_64xx_assign_reg_set(struct mvs_info *mvi, u8 *tfs)
+{
+	int i;
+	u32 tmp, offs;
+	void __iomem *regs = mvi->regs;
+
+	if (*tfs != MVS_ID_NOT_MAPPED)
+		return 0;
+
+	tmp = mr32(MVS_PCS);
+
+	for (i = 0; i < mvi->chip->srs_sz; i++) {
+		if (i == 16)
+			tmp = mr32(MVS_CTL);
+		offs = 1U << ((i & 0x0f) + PCS_EN_SATA_REG_SHIFT);
+		if (!(tmp & offs)) {
+			*tfs = i;
+
+			if (i < 16)
+				mw32(MVS_PCS, tmp | offs);
+			else
+				mw32(MVS_CTL, tmp | offs);
+			tmp = mr32(MVS_INT_STAT_SRS_0) & (1U << i);
+			if (tmp)
+				mw32(MVS_INT_STAT_SRS_0, tmp);
+			return 0;
+		}
+	}
+	return MVS_ID_NOT_MAPPED;
+}
+
+void mvs_64xx_make_prd(struct scatterlist *scatter, int nr, void *prd)
+{
+	int i;
+	struct scatterlist *sg;
+	struct mvs_prd *buf_prd = prd;
+	for_each_sg(scatter, sg, nr, i) {
+		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
+		buf_prd->len = cpu_to_le32(sg_dma_len(sg));
+		buf_prd++;
+	}
+}
+
+static int mvs_64xx_oob_done(struct mvs_info *mvi, int i)
+{
+	u32 phy_st;
+	mvs_write_port_cfg_addr(mvi, i,
+			PHYR_PHY_STAT);
+	phy_st = mvs_read_port_cfg_data(mvi, i);
+	if (phy_st & PHY_OOB_DTCTD)
+		return 1;
+	return 0;
+}
+
+static void mvs_64xx_fix_phy_info(struct mvs_info *mvi, int i,
+				struct sas_identify_frame *id)
+
+{
+	struct mvs_phy *phy = &mvi->phy[i];
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
+
+	sas_phy->linkrate =
+		(phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
+			PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET;
+
+	phy->minimum_linkrate =
+		(phy->phy_status &
+			PHY_MIN_SPP_PHYS_LINK_RATE_MASK) >> 8;
+	phy->maximum_linkrate =
+		(phy->phy_status &
+			PHY_MAX_SPP_PHYS_LINK_RATE_MASK) >> 12;
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_IDENTIFY);
+	phy->dev_info = mvs_read_port_cfg_data(mvi, i);
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_DEV_INFO);
+	phy->att_dev_info = mvs_read_port_cfg_data(mvi, i);
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_ADDR_HI);
+	phy->att_dev_sas_addr =
+	     (u64) mvs_read_port_cfg_data(mvi, i) << 32;
+	mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_ADDR_LO);
+	phy->att_dev_sas_addr |= mvs_read_port_cfg_data(mvi, i);
+	phy->att_dev_sas_addr = SAS_ADDR(&phy->att_dev_sas_addr);
+}
+
+static void mvs_64xx_phy_work_around(struct mvs_info *mvi, int i)
+{
+	u32 tmp;
+	struct mvs_phy *phy = &mvi->phy[i];
+	/* workaround for HW phy decoding error on 1.5g disk drive */
+	mvs_write_port_vsr_addr(mvi, i, VSR_PHY_MODE6);
+	tmp = mvs_read_port_vsr_data(mvi, i);
+	if (((phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
+	     PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET) ==
+		SAS_LINK_RATE_1_5_GBPS)
+		tmp &= ~PHY_MODE6_LATECLK;
+	else
+		tmp |= PHY_MODE6_LATECLK;
+	mvs_write_port_vsr_data(mvi, i, tmp);
+}
+
+void mvs_64xx_phy_set_link_rate(struct mvs_info *mvi, u32 phy_id,
+			struct sas_phy_linkrates *rates)
+{
+	u32 lrmin = 0, lrmax = 0;
+	u32 tmp;
+
+	tmp = mvs_read_phy_ctl(mvi, phy_id);
+	lrmin = (rates->minimum_linkrate << 8);
+	lrmax = (rates->maximum_linkrate << 12);
+
+	if (lrmin) {
+		tmp &= ~(0xf << 8);
+		tmp |= lrmin;
+	}
+	if (lrmax) {
+		tmp &= ~(0xf << 12);
+		tmp |= lrmax;
+	}
+	mvs_write_phy_ctl(mvi, phy_id, tmp);
+	mvs_64xx_phy_reset(mvi, phy_id, 1);
+}
+
+static void mvs_64xx_clear_active_cmds(struct mvs_info *mvi)
+{
+	u32 tmp;
+	void __iomem *regs = mvi->regs;
+	tmp = mr32(MVS_PCS);
+	mw32(MVS_PCS, tmp & 0xFFFF);
+	mw32(MVS_PCS, tmp);
+	tmp = mr32(MVS_CTL);
+	mw32(MVS_CTL, tmp & 0xFFFF);
+	mw32(MVS_CTL, tmp);
+}
+
+
+u32 mvs_64xx_spi_read_data(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs_ex;
+	return ior32(SPI_DATA_REG_64XX);
+}
+
+void mvs_64xx_spi_write_data(struct mvs_info *mvi, u32 data)
+{
+	void __iomem *regs = mvi->regs_ex;
+	 iow32(SPI_DATA_REG_64XX, data);
+}
+
+
+int mvs_64xx_spi_buildcmd(struct mvs_info *mvi,
+			u32      *dwCmd,
+			u8       cmd,
+			u8       read,
+			u8       length,
+			u32      addr
+			)
+{
+	u32  dwTmp;
+
+	dwTmp = ((u32)cmd << 24) | ((u32)length << 19);
+	if (read)
+		dwTmp |= 1U<<23;
+
+	if (addr != MV_MAX_U32) {
+		dwTmp |= 1U<<22;
+		dwTmp |= (addr & 0x0003FFFF);
+	}
+
+	*dwCmd = dwTmp;
+	return 0;
+}
+
+
+int mvs_64xx_spi_issuecmd(struct mvs_info *mvi, u32 cmd)
+{
+	void __iomem *regs = mvi->regs_ex;
+	int     retry;
+
+	for (retry = 0; retry < 1; retry++) {
+		iow32(SPI_CTRL_REG_64XX, SPI_CTRL_VENDOR_ENABLE);
+		iow32(SPI_CMD_REG_64XX, cmd);
+		iow32(SPI_CTRL_REG_64XX,
+			SPI_CTRL_VENDOR_ENABLE | SPI_CTRL_SPISTART);
+	}
+
+	return 0;
+}
+
+int mvs_64xx_spi_waitdataready(struct mvs_info *mvi, u32 timeout)
+{
+	void __iomem *regs = mvi->regs_ex;
+	u32 i, dwTmp;
+
+	for (i = 0; i < timeout; i++) {
+		dwTmp = ior32(SPI_CTRL_REG_64XX);
+		if (!(dwTmp & SPI_CTRL_SPISTART))
+			return 0;
+		msleep(10);
+	}
+
+	return -1;
+}
+
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+void mvs_64xx_fix_dma(dma_addr_t buf_dma, int buf_len, int from, void *prd)
+{
+	int i;
+	struct mvs_prd *buf_prd = prd;
+	buf_prd	+= from;
+	for (i = 0; i < MAX_SG_ENTRY - from; i++) {
+		buf_prd->addr = cpu_to_le64(buf_dma);
+		buf_prd->len = cpu_to_le32(buf_len);
+		++buf_prd;
+	}
+}
+#endif
+
+const struct mvs_dispatch mvs_64xx_dispatch = {
+	"mv64xx",
+	mvs_64xx_init,
+	NULL,
+	mvs_64xx_ioremap,
+	mvs_64xx_iounmap,
+	mvs_64xx_isr,
+	mvs_64xx_isr_status,
+	mvs_64xx_interrupt_enable,
+	mvs_64xx_interrupt_disable,
+	mvs_read_phy_ctl,
+	mvs_write_phy_ctl,
+	mvs_read_port_cfg_data,
+	mvs_write_port_cfg_data,
+	mvs_write_port_cfg_addr,
+	mvs_read_port_vsr_data,
+	mvs_write_port_vsr_data,
+	mvs_write_port_vsr_addr,
+	mvs_read_port_irq_stat,
+	mvs_write_port_irq_stat,
+	mvs_read_port_irq_mask,
+	mvs_write_port_irq_mask,
+	mvs_get_sas_addr,
+	mvs_64xx_command_active,
+	mvs_64xx_issue_stop,
+	mvs_start_delivery,
+	mvs_rx_update,
+	mvs_int_full,
+	mvs_64xx_assign_reg_set,
+	mvs_64xx_free_reg_set,
+	mvs_get_prd_size,
+	mvs_get_prd_count,
+	mvs_64xx_make_prd,
+	mvs_64xx_detect_porttype,
+	mvs_64xx_oob_done,
+	mvs_64xx_fix_phy_info,
+	mvs_64xx_phy_work_around,
+	mvs_64xx_phy_set_link_rate,
+	mvs_hw_max_link_rate,
+	mvs_64xx_phy_disable,
+	mvs_64xx_phy_enable,
+	mvs_64xx_phy_reset,
+	mvs_64xx_stp_reset,
+	mvs_64xx_clear_active_cmds,
+	mvs_64xx_spi_read_data,
+	mvs_64xx_spi_write_data,
+	mvs_64xx_spi_buildcmd,
+	mvs_64xx_spi_issuecmd,
+	mvs_64xx_spi_waitdataready,
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+	mvs_64xx_fix_dma,
+#endif
+};
+
diff --git a/drivers/scsi/mvsas/mv_64xx.h b/drivers/scsi/mvsas/mv_64xx.h
new file mode 100644
index 0000000..42e947d
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_64xx.h
@@ -0,0 +1,151 @@
+/*
+ * Marvell 88SE64xx hardware specific head file
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
+
+#ifndef _MVS64XX_REG_H_
+#define _MVS64XX_REG_H_
+
+#include <linux/types.h>
+
+#define MAX_LINK_RATE		SAS_LINK_RATE_3_0_GBPS
+
+/* enhanced mode registers (BAR4) */
+enum hw_registers {
+	MVS_GBL_CTL		= 0x04,  /* global control */
+	MVS_GBL_INT_STAT	= 0x08,  /* global irq status */
+	MVS_GBL_PI		= 0x0C,  /* ports implemented bitmask */
+
+	MVS_PHY_CTL		= 0x40,  /* SOC PHY Control */
+	MVS_PORTS_IMP		= 0x9C,  /* SOC Port Implemented */
+
+	MVS_GBL_PORT_TYPE	= 0xa0,  /* port type */
+
+	MVS_CTL			= 0x100, /* SAS/SATA port configuration */
+	MVS_PCS			= 0x104, /* SAS/SATA port control/status */
+	MVS_CMD_LIST_LO		= 0x108, /* cmd list addr */
+	MVS_CMD_LIST_HI		= 0x10C,
+	MVS_RX_FIS_LO		= 0x110, /* RX FIS list addr */
+	MVS_RX_FIS_HI		= 0x114,
+
+	MVS_TX_CFG		= 0x120, /* TX configuration */
+	MVS_TX_LO		= 0x124, /* TX (delivery) ring addr */
+	MVS_TX_HI		= 0x128,
+
+	MVS_TX_PROD_IDX		= 0x12C, /* TX producer pointer */
+	MVS_TX_CONS_IDX		= 0x130, /* TX consumer pointer (RO) */
+	MVS_RX_CFG		= 0x134, /* RX configuration */
+	MVS_RX_LO		= 0x138, /* RX (completion) ring addr */
+	MVS_RX_HI		= 0x13C,
+	MVS_RX_CONS_IDX		= 0x140, /* RX consumer pointer (RO) */
+
+	MVS_INT_COAL		= 0x148, /* Int coalescing config */
+	MVS_INT_COAL_TMOUT	= 0x14C, /* Int coalescing timeout */
+	MVS_INT_STAT		= 0x150, /* Central int status */
+	MVS_INT_MASK		= 0x154, /* Central int enable */
+	MVS_INT_STAT_SRS_0	= 0x158, /* SATA register set status */
+	MVS_INT_MASK_SRS_0	= 0x15C,
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_INT_STAT		= 0x160, /* port0 interrupt status */
+	MVS_P0_INT_MASK		= 0x164, /* port0 interrupt mask */
+					 /* ports 5-7 follow after this */
+	MVS_P4_INT_STAT		= 0x200, /* Port4 interrupt status */
+	MVS_P4_INT_MASK		= 0x204, /* Port4 interrupt enable mask */
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_SER_CTLSTAT	= 0x180, /* port0 serial control/status */
+					 /* ports 5-7 follow after this */
+	MVS_P4_SER_CTLSTAT	= 0x220, /* port4 serial control/status */
+
+	MVS_CMD_ADDR		= 0x1B8, /* Command register port (addr) */
+	MVS_CMD_DATA		= 0x1BC, /* Command register port (data) */
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_CFG_ADDR		= 0x1C0, /* port0 phy register address */
+	MVS_P0_CFG_DATA		= 0x1C4, /* port0 phy register data */
+					 /* ports 5-7 follow after this */
+	MVS_P4_CFG_ADDR		= 0x230, /* Port4 config address */
+	MVS_P4_CFG_DATA		= 0x234, /* Port4 config data */
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_VSR_ADDR		= 0x1E0, /* port0 VSR address */
+	MVS_P0_VSR_DATA		= 0x1E4, /* port0 VSR data */
+					 /* ports 5-7 follow after this */
+	MVS_P4_VSR_ADDR		= 0x250, /* port4 VSR addr */
+	MVS_P4_VSR_DATA		= 0x254, /* port4 VSR data */
+};
+
+enum pci_cfg_registers {
+	PCR_PHY_CTL		= 0x40,
+	PCR_PHY_CTL2		= 0x90,
+	PCR_DEV_CTRL		= 0xE8,
+	PCR_LINK_STAT		= 0xF2,
+};
+
+/*  SAS/SATA Vendor Specific Port Registers */
+enum sas_sata_vsp_regs {
+	VSR_PHY_STAT		= 0x00, /* Phy Status */
+	VSR_PHY_MODE1		= 0x01, /* phy tx */
+	VSR_PHY_MODE2		= 0x02, /* tx scc */
+	VSR_PHY_MODE3		= 0x03, /* pll */
+	VSR_PHY_MODE4		= 0x04, /* VCO */
+	VSR_PHY_MODE5		= 0x05, /* Rx */
+	VSR_PHY_MODE6		= 0x06, /* CDR */
+	VSR_PHY_MODE7		= 0x07, /* Impedance */
+	VSR_PHY_MODE8		= 0x08, /* Voltage */
+	VSR_PHY_MODE9		= 0x09, /* Test */
+	VSR_PHY_MODE10		= 0x0A, /* Power */
+	VSR_PHY_MODE11		= 0x0B, /* Phy Mode */
+	VSR_PHY_VS0		= 0x0C, /* Vednor Specific 0 */
+	VSR_PHY_VS1		= 0x0D, /* Vednor Specific 1 */
+};
+
+enum chip_register_bits {
+	PHY_MIN_SPP_PHYS_LINK_RATE_MASK = (0xF << 8),
+	PHY_MAX_SPP_PHYS_LINK_RATE_MASK = (0xF << 12),
+	PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET = (16),
+	PHY_NEG_SPP_PHYS_LINK_RATE_MASK =
+			(0xF << PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET),
+};
+
+#define MAX_SG_ENTRY		64
+
+struct mvs_prd {
+	__le64			addr;		/* 64-bit buffer address */
+	__le32			reserved;
+	__le32			len;		/* 16-bit length */
+};
+
+#define SPI_CTRL_REG				0xc0
+#define SPI_CTRL_VENDOR_ENABLE		(1U<<29)
+#define SPI_CTRL_SPIRDY         		(1U<<22)
+#define SPI_CTRL_SPISTART			(1U<<20)
+
+#define SPI_CMD_REG		0xc4
+#define SPI_DATA_REG		0xc8
+
+#define SPI_CTRL_REG_64XX		0x10
+#define SPI_CMD_REG_64XX		0x14
+#define SPI_DATA_REG_64XX		0x18
+
+#endif
diff --git a/drivers/scsi/mvsas/mv_94xx.c b/drivers/scsi/mvsas/mv_94xx.c
new file mode 100644
index 0000000..0940fae
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_94xx.c
@@ -0,0 +1,672 @@
+/*
+ * Marvell 88SE94xx hardware specific
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
+
+#include "mv_sas.h"
+#include "mv_94xx.h"
+#include "mv_chips.h"
+
+static void mvs_94xx_detect_porttype(struct mvs_info *mvi, int i)
+{
+	u32 reg;
+	struct mvs_phy *phy = &mvi->phy[i];
+	u32 phy_status;
+
+	mvs_write_port_vsr_addr(mvi, i, VSR_PHY_MODE3);
+	reg = mvs_read_port_vsr_data(mvi, i);
+	phy_status = ((reg & 0x3f0000) >> 16) & 0xff;
+	phy->phy_type &= ~(PORT_TYPE_SAS | PORT_TYPE_SATA);
+	switch (phy_status) {
+	case 0x10:
+		phy->phy_type |= PORT_TYPE_SAS;
+		break;
+	case 0x1d:
+	default:
+		phy->phy_type |= PORT_TYPE_SATA;
+		break;
+	}
+}
+
+static void __devinit mvs_94xx_enable_xmt(struct mvs_info *mvi, int phy_id)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	tmp = mr32(MVS_PCS);
+	tmp |= 1 << (phy_id + PCS_EN_PORT_XMT_SHIFT2);
+	mw32(MVS_PCS, tmp);
+}
+
+static void mvs_94xx_phy_reset(struct mvs_info *mvi, u32 phy_id, int hard)
+{
+	u32 tmp;
+
+	tmp = mvs_read_port_irq_stat(mvi, phy_id);
+	tmp &= ~PHYEV_RDY_CH;
+	mvs_write_port_irq_stat(mvi, phy_id, tmp);
+	if (hard) {
+		tmp = mvs_read_phy_ctl(mvi, phy_id);
+		tmp |= PHY_RST_HARD;
+		mvs_write_phy_ctl(mvi, phy_id, tmp);
+		do {
+			tmp = mvs_read_phy_ctl(mvi, phy_id);
+		} while (tmp & PHY_RST_HARD);
+	} else {
+		mvs_write_port_vsr_addr(mvi, phy_id, VSR_PHY_STAT);
+		tmp = mvs_read_port_vsr_data(mvi, phy_id);
+		tmp |= PHY_RST;
+		mvs_write_port_vsr_data(mvi, phy_id, tmp);
+	}
+}
+
+static void mvs_94xx_phy_disable(struct mvs_info *mvi, u32 phy_id)
+{
+	u32 tmp;
+	mvs_write_port_vsr_addr(mvi, phy_id, VSR_PHY_MODE2);
+	tmp = mvs_read_port_vsr_data(mvi, phy_id);
+	mvs_write_port_vsr_data(mvi, phy_id, tmp | 0x00800000);
+}
+
+static void mvs_94xx_phy_enable(struct mvs_info *mvi, u32 phy_id)
+{
+	mvs_write_port_vsr_addr(mvi, phy_id, 0x1B4);
+	mvs_write_port_vsr_data(mvi, phy_id, 0x8300ffc1);
+	mvs_write_port_vsr_addr(mvi, phy_id, 0x104);
+	mvs_write_port_vsr_data(mvi, phy_id, 0x00018080);
+	mvs_write_port_vsr_addr(mvi, phy_id, VSR_PHY_MODE2);
+	mvs_write_port_vsr_data(mvi, phy_id, 0x00207fff);
+}
+
+static int __devinit mvs_94xx_init(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	int i;
+	u32 tmp, cctl;
+
+	mvs_show_pcie_usage(mvi);
+	if (mvi->flags & MVF_FLAG_SOC) {
+		tmp = mr32(MVS_PHY_CTL);
+		tmp &= ~PCTL_PWR_OFF;
+		tmp |= PCTL_PHY_DSBL;
+		mw32(MVS_PHY_CTL, tmp);
+	}
+
+	/* Init Chip */
+	/* make sure RST is set; HBA_RST /should/ have done that for us */
+	cctl = mr32(MVS_CTL) & 0xFFFF;
+	if (cctl & CCTL_RST)
+		cctl &= ~CCTL_RST;
+	else
+		mw32_f(MVS_CTL, cctl | CCTL_RST);
+
+	if (mvi->flags & MVF_FLAG_SOC) {
+		tmp = mr32(MVS_PHY_CTL);
+		tmp &= ~PCTL_PWR_OFF;
+		tmp |= PCTL_COM_ON;
+		tmp &= ~PCTL_PHY_DSBL;
+		tmp |= PCTL_LINK_RST;
+		mw32(MVS_PHY_CTL, tmp);
+		msleep(100);
+		tmp &= ~PCTL_LINK_RST;
+		mw32(MVS_PHY_CTL, tmp);
+		msleep(100);
+	}
+
+	/* reset control */
+	mw32(MVS_PCS, 0);		/* MVS_PCS */
+	mw32(MVS_STP_REG_SET_0, 0);
+	mw32(MVS_STP_REG_SET_1, 0);
+
+	/* init phys */
+	mvs_phy_hacks(mvi);
+
+	/* disable Multiplexing, enable phy implemented */
+	mw32(MVS_PORTS_IMP, 0xFF);
+
+
+	mw32(MVS_PA_VSR_ADDR, 0x00000104);
+	mw32(MVS_PA_VSR_PORT, 0x00018080);
+	mw32(MVS_PA_VSR_ADDR, VSR_PHY_MODE8);
+	mw32(MVS_PA_VSR_PORT, 0x0084ffff);
+
+	/* set LED blink when IO*/
+	mw32(MVS_PA_VSR_ADDR, 0x00000030);
+	tmp = mr32(MVS_PA_VSR_PORT);
+	tmp &= 0xFFFF00FF;
+	tmp |= 0x00003300;
+	mw32(MVS_PA_VSR_PORT, tmp);
+
+	mw32(MVS_CMD_LIST_LO, mvi->slot_dma);
+	mw32(MVS_CMD_LIST_HI, (mvi->slot_dma >> 16) >> 16);
+
+	mw32(MVS_RX_FIS_LO, mvi->rx_fis_dma);
+	mw32(MVS_RX_FIS_HI, (mvi->rx_fis_dma >> 16) >> 16);
+
+	mw32(MVS_TX_CFG, MVS_CHIP_SLOT_SZ);
+	mw32(MVS_TX_LO, mvi->tx_dma);
+	mw32(MVS_TX_HI, (mvi->tx_dma >> 16) >> 16);
+
+	mw32(MVS_RX_CFG, MVS_RX_RING_SZ);
+	mw32(MVS_RX_LO, mvi->rx_dma);
+	mw32(MVS_RX_HI, (mvi->rx_dma >> 16) >> 16);
+
+	for (i = 0; i < mvi->chip->n_phy; i++) {
+		mvs_94xx_phy_disable(mvi, i);
+		/* set phy local SAS address */
+		mvs_set_sas_addr(mvi, i, CONFIG_ID_FRAME3, CONFIG_ID_FRAME4,
+						(mvi->phy[i].dev_sas_addr));
+
+		mvs_94xx_enable_xmt(mvi, i);
+		mvs_94xx_phy_enable(mvi, i);
+
+		mvs_94xx_phy_reset(mvi, i, 1);
+		msleep(500);
+		mvs_94xx_detect_porttype(mvi, i);
+	}
+
+	if (mvi->flags & MVF_FLAG_SOC) {
+		/* set select registers */
+		writel(0x0E008000, regs + 0x000);
+		writel(0x59000008, regs + 0x004);
+		writel(0x20, regs + 0x008);
+		writel(0x20, regs + 0x00c);
+		writel(0x20, regs + 0x010);
+		writel(0x20, regs + 0x014);
+		writel(0x20, regs + 0x018);
+		writel(0x20, regs + 0x01c);
+	}
+	for (i = 0; i < mvi->chip->n_phy; i++) {
+		/* clear phy int status */
+		tmp = mvs_read_port_irq_stat(mvi, i);
+		tmp &= ~PHYEV_SIG_FIS;
+		mvs_write_port_irq_stat(mvi, i, tmp);
+
+		/* set phy int mask */
+		tmp = PHYEV_RDY_CH | PHYEV_BROAD_CH |
+			PHYEV_ID_DONE  | PHYEV_DCDR_ERR | PHYEV_CRC_ERR ;
+		mvs_write_port_irq_mask(mvi, i, tmp);
+
+		msleep(100);
+		mvs_update_phyinfo(mvi, i, 1);
+	}
+
+	/* FIXME: update wide port bitmaps */
+
+	/* little endian for open address and command table, etc. */
+	/*
+	 * it seems that ( from the spec ) turning on big-endian won't
+	 * do us any good on big-endian machines, need further confirmation
+	 */
+	cctl = mr32(MVS_CTL);
+	cctl |= CCTL_ENDIAN_CMD;
+	cctl |= CCTL_ENDIAN_DATA;
+	cctl &= ~CCTL_ENDIAN_OPEN;
+	cctl |= CCTL_ENDIAN_RSP;
+	mw32_f(MVS_CTL, cctl);
+
+	/* reset CMD queue */
+	tmp = mr32(MVS_PCS);
+	tmp |= PCS_CMD_RST;
+	mw32(MVS_PCS, tmp);
+	/* interrupt coalescing may cause missing HW interrput in some case,
+	 * and the max count is 0x1ff, while our max slot is 0x200,
+	 * it will make count 0.
+	 */
+	tmp = 0;
+	mw32(MVS_INT_COAL, tmp);
+
+	tmp = 0x100;
+	mw32(MVS_INT_COAL_TMOUT, tmp);
+
+	/* ladies and gentlemen, start your engines */
+	mw32(MVS_TX_CFG, 0);
+	mw32(MVS_TX_CFG, MVS_CHIP_SLOT_SZ | TX_EN);
+	mw32(MVS_RX_CFG, MVS_RX_RING_SZ | RX_EN);
+	/* enable CMD/CMPL_Q/RESP mode */
+	mw32(MVS_PCS, PCS_SATA_RETRY_2 | PCS_FIS_RX_EN |
+		PCS_CMD_EN | PCS_CMD_STOP_ERR);
+
+	/* enable completion queue interrupt */
+	tmp = (CINT_PORT_MASK | CINT_DONE | CINT_MEM | CINT_SRS | CINT_CI_STOP |
+		CINT_DMA_PCIE);
+	tmp |= CINT_PHY_MASK;
+	mw32(MVS_INT_MASK, tmp);
+
+	/* Enable SRS interrupt */
+	mw32(MVS_INT_MASK_SRS_0, 0xFFFF);
+
+	return 0;
+}
+
+static int mvs_94xx_ioremap(struct mvs_info *mvi)
+{
+	if (!mvs_ioremap(mvi, 2, -1)) {
+		mvi->regs_ex = mvi->regs + 0x10200;
+		mvi->regs += 0x20000;
+		if (mvi->id == 1)
+			mvi->regs += 0x4000;
+		return 0;
+	}
+	return -1;
+}
+
+static void mvs_94xx_iounmap(struct mvs_info *mvi)
+{
+	if (mvi->regs) {
+		mvi->regs -= 0x20000;
+		if (mvi->id == 1)
+			mvi->regs -= 0x4000;
+		mvs_iounmap(mvi->regs);
+	}
+}
+
+static void mvs_94xx_interrupt_enable(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs_ex;
+	u32 tmp;
+
+	tmp = mr32(MVS_GBL_CTL);
+	tmp |= (IRQ_SAS_A | IRQ_SAS_B);
+	mw32(MVS_GBL_INT_STAT, tmp);
+	writel(tmp, regs + 0x0C);
+	writel(tmp, regs + 0x10);
+	writel(tmp, regs + 0x14);
+	writel(tmp, regs + 0x18);
+	mw32(MVS_GBL_CTL, tmp);
+}
+
+static void mvs_94xx_interrupt_disable(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs_ex;
+	u32 tmp;
+
+	tmp = mr32(MVS_GBL_CTL);
+
+	tmp &= ~(IRQ_SAS_A | IRQ_SAS_B);
+	mw32(MVS_GBL_INT_STAT, tmp);
+	writel(tmp, regs + 0x0C);
+	writel(tmp, regs + 0x10);
+	writel(tmp, regs + 0x14);
+	writel(tmp, regs + 0x18);
+	mw32(MVS_GBL_CTL, tmp);
+}
+
+static u32 mvs_94xx_isr_status(struct mvs_info *mvi, int irq)
+{
+	void __iomem *regs = mvi->regs_ex;
+	u32 stat = 0;
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		stat = mr32(MVS_GBL_INT_STAT);
+
+		if (!(stat & (IRQ_SAS_A | IRQ_SAS_B)))
+			return 0;
+	}
+	return stat;
+}
+
+static irqreturn_t mvs_94xx_isr(struct mvs_info *mvi, int irq, u32 stat)
+{
+	void __iomem *regs = mvi->regs;
+
+	if (((stat & IRQ_SAS_A) && mvi->id == 0) ||
+			((stat & IRQ_SAS_B) && mvi->id == 1)) {
+		mw32_f(MVS_INT_STAT, CINT_DONE);
+	#ifndef MVS_USE_TASKLET
+		spin_lock(&mvi->lock);
+	#endif
+		mvs_int_full(mvi);
+	#ifndef MVS_USE_TASKLET
+		spin_unlock(&mvi->lock);
+	#endif
+	}
+	return IRQ_HANDLED;
+}
+
+static void mvs_94xx_command_active(struct mvs_info *mvi, u32 slot_idx)
+{
+	u32 tmp;
+	mvs_cw32(mvi, 0x300 + (slot_idx >> 3), 1 << (slot_idx % 32));
+	do {
+		tmp = mvs_cr32(mvi, 0x300 + (slot_idx >> 3));
+	} while (tmp & 1 << (slot_idx % 32));
+}
+
+static void mvs_94xx_issue_stop(struct mvs_info *mvi, enum mvs_port_type type,
+				u32 tfs)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	if (type == PORT_TYPE_SATA) {
+		tmp = mr32(MVS_INT_STAT_SRS_0) | (1U << tfs);
+		mw32(MVS_INT_STAT_SRS_0, tmp);
+	}
+	mw32(MVS_INT_STAT, CINT_CI_STOP);
+	tmp = mr32(MVS_PCS) | 0xFF00;
+	mw32(MVS_PCS, tmp);
+}
+
+static void mvs_94xx_free_reg_set(struct mvs_info *mvi, u8 *tfs)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+	u8 reg_set = *tfs;
+
+	if (*tfs == MVS_ID_NOT_MAPPED)
+		return;
+
+	mvi->sata_reg_set &= ~bit(reg_set);
+	if (reg_set < 32) {
+		w_reg_set_enable(reg_set, (u32)mvi->sata_reg_set);
+		tmp = mr32(MVS_INT_STAT_SRS_0) & (u32)mvi->sata_reg_set;
+		if (tmp)
+			mw32(MVS_INT_STAT_SRS_0, tmp);
+	} else {
+		w_reg_set_enable(reg_set, mvi->sata_reg_set);
+		tmp = mr32(MVS_INT_STAT_SRS_1) & mvi->sata_reg_set;
+		if (tmp)
+			mw32(MVS_INT_STAT_SRS_1, tmp);
+	}
+
+	*tfs = MVS_ID_NOT_MAPPED;
+
+	return;
+}
+
+static u8 mvs_94xx_assign_reg_set(struct mvs_info *mvi, u8 *tfs)
+{
+	int i;
+	void __iomem *regs = mvi->regs;
+
+	if (*tfs != MVS_ID_NOT_MAPPED)
+		return 0;
+
+	i = mv_ffc64(mvi->sata_reg_set);
+	if (i > 32) {
+		mvi->sata_reg_set |= bit(i);
+		w_reg_set_enable(i, (u32)(mvi->sata_reg_set >> 32));
+		*tfs = i;
+		return 0;
+	} else if (i >= 0) {
+		mvi->sata_reg_set |= bit(i);
+		w_reg_set_enable(i, (u32)mvi->sata_reg_set);
+		*tfs = i;
+		return 0;
+	}
+	return MVS_ID_NOT_MAPPED;
+}
+
+static void mvs_94xx_make_prd(struct scatterlist *scatter, int nr, void *prd)
+{
+	int i;
+	struct scatterlist *sg;
+	struct mvs_prd *buf_prd = prd;
+	for_each_sg(scatter, sg, nr, i) {
+		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
+		buf_prd->im_len.len = cpu_to_le32(sg_dma_len(sg));
+		buf_prd++;
+	}
+}
+
+static int mvs_94xx_oob_done(struct mvs_info *mvi, int i)
+{
+	u32 phy_st;
+	phy_st = mvs_read_phy_ctl(mvi, i);
+	if (phy_st & PHY_READY_MASK)	/* phy ready */
+		return 1;
+	return 0;
+}
+
+static void mvs_94xx_get_dev_identify_frame(struct mvs_info *mvi, int port_id,
+					struct sas_identify_frame *id)
+{
+	int i;
+	u32 id_frame[7];
+
+	for (i = 0; i < 7; i++) {
+		mvs_write_port_cfg_addr(mvi, port_id,
+					CONFIG_ID_FRAME0 + i * 4);
+		id_frame[i] = mvs_read_port_cfg_data(mvi, port_id);
+	}
+	memcpy(id, id_frame, 28);
+}
+
+static void mvs_94xx_get_att_identify_frame(struct mvs_info *mvi, int port_id,
+					struct sas_identify_frame *id)
+{
+	int i;
+	u32 id_frame[7];
+
+	/* mvs_hexdump(28, (u8 *)id_frame, 0); */
+	for (i = 0; i < 7; i++) {
+		mvs_write_port_cfg_addr(mvi, port_id,
+					CONFIG_ATT_ID_FRAME0 + i * 4);
+		id_frame[i] = mvs_read_port_cfg_data(mvi, port_id);
+		mv_dprintk("94xx phy %d atta frame %d %x.\n",
+			port_id + mvi->id * mvi->chip->n_phy, i, id_frame[i]);
+	}
+	/* mvs_hexdump(28, (u8 *)id_frame, 0); */
+	memcpy(id, id_frame, 28);
+}
+
+static u32 mvs_94xx_make_dev_info(struct sas_identify_frame *id)
+{
+	u32 att_dev_info = 0;
+
+	att_dev_info |= id->dev_type;
+	if (id->stp_iport)
+		att_dev_info |= PORT_DEV_STP_INIT;
+	if (id->smp_iport)
+		att_dev_info |= PORT_DEV_SMP_INIT;
+	if (id->ssp_iport)
+		att_dev_info |= PORT_DEV_SSP_INIT;
+	if (id->stp_tport)
+		att_dev_info |= PORT_DEV_STP_TRGT;
+	if (id->smp_tport)
+		att_dev_info |= PORT_DEV_SMP_TRGT;
+	if (id->ssp_tport)
+		att_dev_info |= PORT_DEV_SSP_TRGT;
+
+	att_dev_info |= (u32)id->phy_id<<24;
+	return att_dev_info;
+}
+
+static u32 mvs_94xx_make_att_info(struct sas_identify_frame *id)
+{
+	return mvs_94xx_make_dev_info(id);
+}
+
+static void mvs_94xx_fix_phy_info(struct mvs_info *mvi, int i,
+				struct sas_identify_frame *id)
+{
+	struct mvs_phy *phy = &mvi->phy[i];
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
+	mv_dprintk("get all reg link rate is 0x%x\n", phy->phy_status);
+	sas_phy->linkrate =
+		(phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
+			PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET;
+	sas_phy->linkrate += 0x8;
+	mv_dprintk("get link rate is %d\n", sas_phy->linkrate);
+	phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS;
+	phy->maximum_linkrate = SAS_LINK_RATE_6_0_GBPS;
+	mvs_94xx_get_dev_identify_frame(mvi, i, id);
+	phy->dev_info = mvs_94xx_make_dev_info(id);
+
+	if (phy->phy_type & PORT_TYPE_SAS) {
+		mvs_94xx_get_att_identify_frame(mvi, i, id);
+		phy->att_dev_info = mvs_94xx_make_att_info(id);
+		phy->att_dev_sas_addr = *(u64 *)id->sas_addr;
+	} else {
+		phy->att_dev_info = PORT_DEV_STP_TRGT | 1;
+	}
+
+}
+
+void mvs_94xx_phy_set_link_rate(struct mvs_info *mvi, u32 phy_id,
+			struct sas_phy_linkrates *rates)
+{
+	/* TODO */
+}
+
+static void mvs_94xx_clear_active_cmds(struct mvs_info *mvi)
+{
+	u32 tmp;
+	void __iomem *regs = mvi->regs;
+	tmp = mr32(MVS_STP_REG_SET_0);
+	mw32(MVS_STP_REG_SET_0, 0);
+	mw32(MVS_STP_REG_SET_0, tmp);
+	tmp = mr32(MVS_STP_REG_SET_1);
+	mw32(MVS_STP_REG_SET_1, 0);
+	mw32(MVS_STP_REG_SET_1, tmp);
+}
+
+
+u32 mvs_94xx_spi_read_data(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs_ex - 0x10200;
+	return mr32(SPI_RD_DATA_REG_94XX);
+}
+
+void mvs_94xx_spi_write_data(struct mvs_info *mvi, u32 data)
+{
+	void __iomem *regs = mvi->regs_ex - 0x10200;
+	 mw32(SPI_RD_DATA_REG_94XX, data);
+}
+
+
+int mvs_94xx_spi_buildcmd(struct mvs_info *mvi,
+				u32      *dwCmd,
+				u8       cmd,
+				u8       read,
+				u8       length,
+				u32      addr
+				)
+{
+	void __iomem *regs = mvi->regs_ex - 0x10200;
+	u32  dwTmp;
+
+	dwTmp = ((u32)cmd << 8) | ((u32)length << 4);
+	if (read)
+		dwTmp |= SPI_CTRL_READ_94XX;
+
+	if (addr != MV_MAX_U32) {
+		mw32(SPI_ADDR_REG_94XX, (addr & 0x0003FFFFL));
+		dwTmp |= SPI_ADDR_VLD_94XX;
+	}
+
+	*dwCmd = dwTmp;
+	return 0;
+}
+
+
+int mvs_94xx_spi_issuecmd(struct mvs_info *mvi, u32 cmd)
+{
+	void __iomem *regs = mvi->regs_ex - 0x10200;
+	mw32(SPI_CTRL_REG_94XX, cmd | SPI_CTRL_SpiStart_94XX);
+
+	return 0;
+}
+
+int mvs_94xx_spi_waitdataready(struct mvs_info *mvi, u32 timeout)
+{
+	void __iomem *regs = mvi->regs_ex - 0x10200;
+	u32   i, dwTmp;
+
+	for (i = 0; i < timeout; i++) {
+		dwTmp = mr32(SPI_CTRL_REG_94XX);
+		if (!(dwTmp & SPI_CTRL_SpiStart_94XX))
+			return 0;
+		msleep(10);
+	}
+
+	return -1;
+}
+
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+void mvs_94xx_fix_dma(dma_addr_t buf_dma, int buf_len, int from, void *prd)
+{
+	int i;
+	struct mvs_prd *buf_prd = prd;
+	buf_prd += from;
+	for (i = 0; i < MAX_SG_ENTRY - from; i++) {
+		buf_prd->addr = cpu_to_le64(buf_dma);
+		buf_prd->im_len.len = cpu_to_le32(buf_len);
+		++buf_prd;
+	}
+}
+#endif
+
+const struct mvs_dispatch mvs_94xx_dispatch = {
+	"mv94xx",
+	mvs_94xx_init,
+	NULL,
+	mvs_94xx_ioremap,
+	mvs_94xx_iounmap,
+	mvs_94xx_isr,
+	mvs_94xx_isr_status,
+	mvs_94xx_interrupt_enable,
+	mvs_94xx_interrupt_disable,
+	mvs_read_phy_ctl,
+	mvs_write_phy_ctl,
+	mvs_read_port_cfg_data,
+	mvs_write_port_cfg_data,
+	mvs_write_port_cfg_addr,
+	mvs_read_port_vsr_data,
+	mvs_write_port_vsr_data,
+	mvs_write_port_vsr_addr,
+	mvs_read_port_irq_stat,
+	mvs_write_port_irq_stat,
+	mvs_read_port_irq_mask,
+	mvs_write_port_irq_mask,
+	mvs_get_sas_addr,
+	mvs_94xx_command_active,
+	mvs_94xx_issue_stop,
+	mvs_start_delivery,
+	mvs_rx_update,
+	mvs_int_full,
+	mvs_94xx_assign_reg_set,
+	mvs_94xx_free_reg_set,
+	mvs_get_prd_size,
+	mvs_get_prd_count,
+	mvs_94xx_make_prd,
+	mvs_94xx_detect_porttype,
+	mvs_94xx_oob_done,
+	mvs_94xx_fix_phy_info,
+	NULL,
+	mvs_94xx_phy_set_link_rate,
+	mvs_hw_max_link_rate,
+	mvs_94xx_phy_disable,
+	mvs_94xx_phy_enable,
+	mvs_94xx_phy_reset,
+	NULL,
+	mvs_94xx_clear_active_cmds,
+	mvs_94xx_spi_read_data,
+	mvs_94xx_spi_write_data,
+	mvs_94xx_spi_buildcmd,
+	mvs_94xx_spi_issuecmd,
+	mvs_94xx_spi_waitdataready,
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+	mvs_94xx_fix_dma,
+#endif
+};
+
diff --git a/drivers/scsi/mvsas/mv_94xx.h b/drivers/scsi/mvsas/mv_94xx.h
new file mode 100644
index 0000000..23ed9b1
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_94xx.h
@@ -0,0 +1,222 @@
+/*
+ * Marvell 88SE94xx hardware specific head file
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
+
+#ifndef _MVS94XX_REG_H_
+#define _MVS94XX_REG_H_
+
+#include <linux/types.h>
+
+#define MAX_LINK_RATE		SAS_LINK_RATE_6_0_GBPS
+
+enum hw_registers {
+	MVS_GBL_CTL		= 0x04,  /* global control */
+	MVS_GBL_INT_STAT	= 0x00,  /* global irq status */
+	MVS_GBL_PI		= 0x0C,  /* ports implemented bitmask */
+
+	MVS_PHY_CTL		= 0x40,  /* SOC PHY Control */
+	MVS_PORTS_IMP		= 0x9C,  /* SOC Port Implemented */
+
+	MVS_GBL_PORT_TYPE	= 0xa0,  /* port type */
+
+	MVS_CTL			= 0x100, /* SAS/SATA port configuration */
+	MVS_PCS			= 0x104, /* SAS/SATA port control/status */
+	MVS_CMD_LIST_LO		= 0x108, /* cmd list addr */
+	MVS_CMD_LIST_HI		= 0x10C,
+	MVS_RX_FIS_LO		= 0x110, /* RX FIS list addr */
+	MVS_RX_FIS_HI		= 0x114,
+	MVS_STP_REG_SET_0	= 0x118, /* STP/SATA Register Set Enable */
+	MVS_STP_REG_SET_1	= 0x11C,
+	MVS_TX_CFG		= 0x120, /* TX configuration */
+	MVS_TX_LO		= 0x124, /* TX (delivery) ring addr */
+	MVS_TX_HI		= 0x128,
+
+	MVS_TX_PROD_IDX		= 0x12C, /* TX producer pointer */
+	MVS_TX_CONS_IDX		= 0x130, /* TX consumer pointer (RO) */
+	MVS_RX_CFG		= 0x134, /* RX configuration */
+	MVS_RX_LO		= 0x138, /* RX (completion) ring addr */
+	MVS_RX_HI		= 0x13C,
+	MVS_RX_CONS_IDX		= 0x140, /* RX consumer pointer (RO) */
+
+	MVS_INT_COAL		= 0x148, /* Int coalescing config */
+	MVS_INT_COAL_TMOUT	= 0x14C, /* Int coalescing timeout */
+	MVS_INT_STAT		= 0x150, /* Central int status */
+	MVS_INT_MASK		= 0x154, /* Central int enable */
+	MVS_INT_STAT_SRS_0	= 0x158, /* SATA register set status */
+	MVS_INT_MASK_SRS_0	= 0x15C,
+	MVS_INT_STAT_SRS_1	= 0x160,
+	MVS_INT_MASK_SRS_1	= 0x164,
+	MVS_NON_NCQ_ERR_0	= 0x168, /* SRS Non-specific NCQ Error */
+	MVS_NON_NCQ_ERR_1	= 0x16C,
+	MVS_CMD_ADDR		= 0x170, /* Command register port (addr) */
+	MVS_CMD_DATA		= 0x174, /* Command register port (data) */
+	MVS_MEM_PARITY_ERR	= 0x178, /* Memory parity error */
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_INT_STAT		= 0x180, /* port0 interrupt status */
+	MVS_P0_INT_MASK		= 0x184, /* port0 interrupt mask */
+					 /* ports 5-7 follow after this */
+	MVS_P4_INT_STAT		= 0x1A0, /* Port4 interrupt status */
+	MVS_P4_INT_MASK		= 0x1A4, /* Port4 interrupt enable mask */
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_SER_CTLSTAT	= 0x1D0, /* port0 serial control/status */
+					 /* ports 5-7 follow after this */
+	MVS_P4_SER_CTLSTAT	= 0x1E0, /* port4 serial control/status */
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_CFG_ADDR		= 0x200, /* port0 phy register address */
+	MVS_P0_CFG_DATA		= 0x204, /* port0 phy register data */
+					 /* ports 5-7 follow after this */
+	MVS_P4_CFG_ADDR		= 0x220, /* Port4 config address */
+	MVS_P4_CFG_DATA		= 0x224, /* Port4 config data */
+
+					 /* phys 1-3 follow after this */
+	MVS_P0_VSR_ADDR		= 0x250, /* phy0 VSR address */
+	MVS_P0_VSR_DATA		= 0x254, /* phy0 VSR data */
+					 /* phys 1-3 follow after this */
+					 /* multiplexing */
+	MVS_P4_VSR_ADDR 	= 0x250, /* phy4 VSR address */
+	MVS_P4_VSR_DATA 	= 0x254, /* phy4 VSR data */
+	MVS_PA_VSR_ADDR		= 0x290, /* All port VSR addr */
+	MVS_PA_VSR_PORT		= 0x294, /* All port VSR data */
+};
+
+enum pci_cfg_registers {
+	PCR_PHY_CTL		= 0x40,
+	PCR_PHY_CTL2		= 0x90,
+	PCR_DEV_CTRL		= 0x78,
+	PCR_LINK_STAT		= 0x82,
+};
+
+/*  SAS/SATA Vendor Specific Port Registers */
+enum sas_sata_vsp_regs {
+	VSR_PHY_STAT		= 0x00 * 4, /* Phy Status */
+	VSR_PHY_MODE1		= 0x01 * 4, /* phy tx */
+	VSR_PHY_MODE2		= 0x02 * 4, /* tx scc */
+	VSR_PHY_MODE3		= 0x03 * 4, /* pll */
+	VSR_PHY_MODE4		= 0x04 * 4, /* VCO */
+	VSR_PHY_MODE5		= 0x05 * 4, /* Rx */
+	VSR_PHY_MODE6		= 0x06 * 4, /* CDR */
+	VSR_PHY_MODE7		= 0x07 * 4, /* Impedance */
+	VSR_PHY_MODE8		= 0x08 * 4, /* Voltage */
+	VSR_PHY_MODE9		= 0x09 * 4, /* Test */
+	VSR_PHY_MODE10		= 0x0A * 4, /* Power */
+	VSR_PHY_MODE11		= 0x0B * 4, /* Phy Mode */
+	VSR_PHY_VS0		= 0x0C * 4, /* Vednor Specific 0 */
+	VSR_PHY_VS1		= 0x0D * 4, /* Vednor Specific 1 */
+};
+
+enum chip_register_bits {
+	PHY_MIN_SPP_PHYS_LINK_RATE_MASK = (0x7 << 8),
+	PHY_MAX_SPP_PHYS_LINK_RATE_MASK = (0x7 << 8),
+	PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET = (12),
+	PHY_NEG_SPP_PHYS_LINK_RATE_MASK =
+			(0x3 << PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET),
+};
+
+enum pci_interrupt_cause {
+	/*  MAIN_IRQ_CAUSE (R10200) Bits*/
+	IRQ_COM_IN_I2O_IOP0            = (1 << 0),
+	IRQ_COM_IN_I2O_IOP1            = (1 << 1),
+	IRQ_COM_IN_I2O_IOP2            = (1 << 2),
+	IRQ_COM_IN_I2O_IOP3            = (1 << 3),
+	IRQ_COM_OUT_I2O_HOS0           = (1 << 4),
+	IRQ_COM_OUT_I2O_HOS1           = (1 << 5),
+	IRQ_COM_OUT_I2O_HOS2           = (1 << 6),
+	IRQ_COM_OUT_I2O_HOS3           = (1 << 7),
+	IRQ_PCIF_TO_CPU_DRBL0          = (1 << 8),
+	IRQ_PCIF_TO_CPU_DRBL1          = (1 << 9),
+	IRQ_PCIF_TO_CPU_DRBL2          = (1 << 10),
+	IRQ_PCIF_TO_CPU_DRBL3          = (1 << 11),
+	IRQ_PCIF_DRBL0                 = (1 << 12),
+	IRQ_PCIF_DRBL1                 = (1 << 13),
+	IRQ_PCIF_DRBL2                 = (1 << 14),
+	IRQ_PCIF_DRBL3                 = (1 << 15),
+	IRQ_XOR_A                      = (1 << 16),
+	IRQ_XOR_B                      = (1 << 17),
+	IRQ_SAS_A                      = (1 << 18),
+	IRQ_SAS_B                      = (1 << 19),
+	IRQ_CPU_CNTRL                  = (1 << 20),
+	IRQ_GPIO                       = (1 << 21),
+	IRQ_UART                       = (1 << 22),
+	IRQ_SPI                        = (1 << 23),
+	IRQ_I2C                        = (1 << 24),
+	IRQ_SGPIO                      = (1 << 25),
+	IRQ_COM_ERR                    = (1 << 29),
+	IRQ_I2O_ERR                    = (1 << 30),
+	IRQ_PCIE_ERR                   = (1 << 31),
+};
+
+#define MAX_SG_ENTRY		255
+
+struct mvs_prd_imt {
+	__le32			len:22;
+	u8			_r_a:2;
+	u8			misc_ctl:4;
+	u8			inter_sel:4;
+};
+
+struct mvs_prd {
+	/* 64-bit buffer address */
+	__le64			addr;
+	/* 22-bit length */
+	struct mvs_prd_imt	im_len;
+} __attribute__ ((packed));
+
+#define SPI_CTRL_REG_94XX           	0xc800
+#define SPI_ADDR_REG_94XX            	0xc804
+#define SPI_WR_DATA_REG_94XX         0xc808
+#define SPI_RD_DATA_REG_94XX         	0xc80c
+#define SPI_CTRL_READ_94XX         	(1U << 2)
+#define SPI_ADDR_VLD_94XX         	(1U << 1)
+#define SPI_CTRL_SpiStart_94XX     	(1U << 0)
+
+#define mv_ffc(x)   ffz(x)
+
+static inline int
+mv_ffc64(u64 v)
+{
+	int i;
+	i = mv_ffc((u32)v);
+	if (i >= 0)
+		return i;
+	i = mv_ffc((u32)(v>>32));
+
+	if (i != 0)
+		return 32 + i;
+
+	return -1;
+}
+
+#define r_reg_set_enable(i) \
+	(((i) > 31) ? mr32(MVS_STP_REG_SET_1) : \
+	mr32(MVS_STP_REG_SET_0))
+
+#define w_reg_set_enable(i, tmp) \
+	(((i) > 31) ? mw32(MVS_STP_REG_SET_1, tmp) : \
+	mw32(MVS_STP_REG_SET_0, tmp))
+
+extern const struct mvs_dispatch mvs_94xx_dispatch;
+#endif
+
diff --git a/drivers/scsi/mvsas/mv_chips.h b/drivers/scsi/mvsas/mv_chips.h
new file mode 100644
index 0000000..a67e1c4
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_chips.h
@@ -0,0 +1,280 @@
+/*
+ * Marvell 88SE64xx/88SE94xx register IO interface
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
+
+
+#ifndef _MV_CHIPS_H_
+#define _MV_CHIPS_H_
+
+#define mr32(reg)	readl(regs + reg)
+#define mw32(reg, val)	writel((val), regs + reg)
+#define mw32_f(reg, val)	do {			\
+				mw32(reg, val);	\
+				mr32(reg);	\
+			} while (0)
+
+#define iow32(reg, val) 	outl(val, (unsigned long)(regs + reg))
+#define ior32(reg) 		inl((unsigned long)(regs + reg))
+#define iow16(reg, val) 	outw((unsigned long)(val, regs + reg))
+#define ior16(reg) 		inw((unsigned long)(regs + reg))
+#define iow8(reg, val) 		outb((unsigned long)(val, regs + reg))
+#define ior8(reg) 		inb((unsigned long)(regs + reg))
+
+static inline u32 mvs_cr32(struct mvs_info *mvi, u32 addr)
+{
+	void __iomem *regs = mvi->regs;
+	mw32(MVS_CMD_ADDR, addr);
+	return mr32(MVS_CMD_DATA);
+}
+
+static inline void mvs_cw32(struct mvs_info *mvi, u32 addr, u32 val)
+{
+	void __iomem *regs = mvi->regs;
+	mw32(MVS_CMD_ADDR, addr);
+	mw32(MVS_CMD_DATA, val);
+}
+
+static inline u32 mvs_read_phy_ctl(struct mvs_info *mvi, u32 port)
+{
+	void __iomem *regs = mvi->regs;
+	return (port < 4) ? mr32(MVS_P0_SER_CTLSTAT + port * 4) :
+		mr32(MVS_P4_SER_CTLSTAT + (port - 4) * 4);
+}
+
+static inline void mvs_write_phy_ctl(struct mvs_info *mvi, u32 port, u32 val)
+{
+	void __iomem *regs = mvi->regs;
+	if (port < 4)
+		mw32(MVS_P0_SER_CTLSTAT + port * 4, val);
+	else
+		mw32(MVS_P4_SER_CTLSTAT + (port - 4) * 4, val);
+}
+
+static inline u32 mvs_read_port(struct mvs_info *mvi, u32 off,
+				u32 off2, u32 port)
+{
+	void __iomem *regs = mvi->regs + off;
+	void __iomem *regs2 = mvi->regs + off2;
+	return (port < 4) ? readl(regs + port * 8) :
+		readl(regs2 + (port - 4) * 8);
+}
+
+static inline void mvs_write_port(struct mvs_info *mvi, u32 off, u32 off2,
+				u32 port, u32 val)
+{
+	void __iomem *regs = mvi->regs + off;
+	void __iomem *regs2 = mvi->regs + off2;
+	if (port < 4)
+		writel(val, regs + port * 8);
+	else
+		writel(val, regs2 + (port - 4) * 8);
+}
+
+static inline u32 mvs_read_port_cfg_data(struct mvs_info *mvi, u32 port)
+{
+	return mvs_read_port(mvi, MVS_P0_CFG_DATA,
+			MVS_P4_CFG_DATA, port);
+}
+
+static inline void mvs_write_port_cfg_data(struct mvs_info *mvi,
+						u32 port, u32 val)
+{
+	mvs_write_port(mvi, MVS_P0_CFG_DATA,
+			MVS_P4_CFG_DATA, port, val);
+}
+
+static inline void mvs_write_port_cfg_addr(struct mvs_info *mvi,
+						u32 port, u32 addr)
+{
+	mvs_write_port(mvi, MVS_P0_CFG_ADDR,
+			MVS_P4_CFG_ADDR, port, addr);
+	mdelay(10);
+}
+
+static inline u32 mvs_read_port_vsr_data(struct mvs_info *mvi, u32 port)
+{
+	return mvs_read_port(mvi, MVS_P0_VSR_DATA,
+			MVS_P4_VSR_DATA, port);
+}
+
+static inline void mvs_write_port_vsr_data(struct mvs_info *mvi,
+						u32 port, u32 val)
+{
+	mvs_write_port(mvi, MVS_P0_VSR_DATA,
+			MVS_P4_VSR_DATA, port, val);
+}
+
+static inline void mvs_write_port_vsr_addr(struct mvs_info *mvi,
+						u32 port, u32 addr)
+{
+	mvs_write_port(mvi, MVS_P0_VSR_ADDR,
+			MVS_P4_VSR_ADDR, port, addr);
+	mdelay(10);
+}
+
+static inline u32 mvs_read_port_irq_stat(struct mvs_info *mvi, u32 port)
+{
+	return mvs_read_port(mvi, MVS_P0_INT_STAT,
+			MVS_P4_INT_STAT, port);
+}
+
+static inline void mvs_write_port_irq_stat(struct mvs_info *mvi,
+						u32 port, u32 val)
+{
+	mvs_write_port(mvi, MVS_P0_INT_STAT,
+			MVS_P4_INT_STAT, port, val);
+}
+
+static inline u32 mvs_read_port_irq_mask(struct mvs_info *mvi, u32 port)
+{
+	return mvs_read_port(mvi, MVS_P0_INT_MASK,
+			MVS_P4_INT_MASK, port);
+
+}
+
+static inline void mvs_write_port_irq_mask(struct mvs_info *mvi,
+						u32 port, u32 val)
+{
+	mvs_write_port(mvi, MVS_P0_INT_MASK,
+			MVS_P4_INT_MASK, port, val);
+}
+
+static inline void __devinit mvs_phy_hacks(struct mvs_info *mvi)
+{
+	u32 tmp;
+
+	/* workaround for SATA R-ERR, to ignore phy glitch */
+	tmp = mvs_cr32(mvi, CMD_PHY_TIMER);
+	tmp &= ~(1 << 9);
+	tmp |= (1 << 10);
+	mvs_cw32(mvi, CMD_PHY_TIMER, tmp);
+
+	/* enable retry 127 times */
+	mvs_cw32(mvi, CMD_SAS_CTL1, 0x7f7f);
+
+	/* extend open frame timeout to max */
+	tmp = mvs_cr32(mvi, CMD_SAS_CTL0);
+	tmp &= ~0xffff;
+	tmp |= 0x3fff;
+	mvs_cw32(mvi, CMD_SAS_CTL0, tmp);
+
+	/* workaround for WDTIMEOUT , set to 550 ms */
+	mvs_cw32(mvi, CMD_WD_TIMER, 0x7a0000);
+
+	/* not to halt for different port op during wideport link change */
+	mvs_cw32(mvi, CMD_APP_ERR_CONFIG, 0xffefbf7d);
+
+	/* workaround for Seagate disk not-found OOB sequence, recv
+	 * COMINIT before sending out COMWAKE */
+	tmp = mvs_cr32(mvi, CMD_PHY_MODE_21);
+	tmp &= 0x0000ffff;
+	tmp |= 0x00fa0000;
+	mvs_cw32(mvi, CMD_PHY_MODE_21, tmp);
+
+	tmp = mvs_cr32(mvi, CMD_PHY_TIMER);
+	tmp &= 0x1fffffff;
+	tmp |= (2U << 29);	/* 8 ms retry */
+	mvs_cw32(mvi, CMD_PHY_TIMER, tmp);
+}
+
+static inline void mvs_int_sata(struct mvs_info *mvi)
+{
+	u32 tmp;
+	void __iomem *regs = mvi->regs;
+	tmp = mr32(MVS_INT_STAT_SRS_0);
+	if (tmp)
+		mw32(MVS_INT_STAT_SRS_0, tmp);
+	MVS_CHIP_DISP->clear_active_cmds(mvi);
+}
+
+static inline void mvs_int_full(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp, stat;
+	int i;
+
+	stat = mr32(MVS_INT_STAT);
+	mvs_int_rx(mvi, false);
+
+	for (i = 0; i < mvi->chip->n_phy; i++) {
+		tmp = (stat >> i) & (CINT_PORT | CINT_PORT_STOPPED);
+		if (tmp)
+			mvs_int_port(mvi, i, tmp);
+	}
+
+	if (stat & CINT_SRS)
+		mvs_int_sata(mvi);
+
+	mw32(MVS_INT_STAT, stat);
+}
+
+static inline void mvs_start_delivery(struct mvs_info *mvi, u32 tx)
+{
+	void __iomem *regs = mvi->regs;
+	mw32(MVS_TX_PROD_IDX, tx);
+}
+
+static inline u32 mvs_rx_update(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	return mr32(MVS_RX_CONS_IDX);
+}
+
+static inline u32 mvs_get_prd_size(void)
+{
+	return sizeof(struct mvs_prd);
+}
+
+static inline u32 mvs_get_prd_count(void)
+{
+	return MAX_SG_ENTRY;
+}
+
+static inline void mvs_show_pcie_usage(struct mvs_info *mvi)
+{
+	u16 link_stat, link_spd;
+	const char *spd[] = {
+		"UnKnown",
+		"2.5",
+		"5.0",
+	};
+	if (mvi->flags & MVF_FLAG_SOC || mvi->id > 0)
+		return;
+
+	pci_read_config_word(mvi->pdev, PCR_LINK_STAT, &link_stat);
+	link_spd = (link_stat & PLS_LINK_SPD) >> PLS_LINK_SPD_OFFS;
+	if (link_spd >= 3)
+		link_spd = 0;
+	dev_printk(KERN_INFO, mvi->dev,
+		"mvsas: PCI-E x%u, Bandwidth Usage: %s Gbps\n",
+	       (link_stat & PLS_NEG_LINK_WD) >> PLS_NEG_LINK_WD_OFFS,
+	       spd[link_spd]);
+}
+
+static inline u32 mvs_hw_max_link_rate(void)
+{
+	return MAX_LINK_RATE;
+}
+
+#endif  /* _MV_CHIPS_H_ */
+
diff --git a/drivers/scsi/mvsas/mv_defs.h b/drivers/scsi/mvsas/mv_defs.h
new file mode 100644
index 0000000..f8cb9de
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_defs.h
@@ -0,0 +1,502 @@
+/*
+ * Marvell 88SE64xx/88SE94xx const head file
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
+
+#ifndef _MV_DEFS_H_
+#define _MV_DEFS_H_
+
+
+enum chip_flavors {
+	chip_6320,
+	chip_6440,
+	chip_6485,
+	chip_9480,
+	chip_9180,
+};
+
+/* driver compile-time configuration */
+enum driver_configuration {
+	MVS_SLOTS		= 512,	/* command slots */
+	MVS_TX_RING_SZ		= 1024,	/* TX ring size (12-bit) */
+	MVS_RX_RING_SZ		= 1024, /* RX ring size (12-bit) */
+					/* software requires power-of-2
+					   ring size */
+	MVS_SOC_SLOTS		= 64,
+	MVS_SOC_TX_RING_SZ	= MVS_SOC_SLOTS * 2,
+	MVS_SOC_RX_RING_SZ	= MVS_SOC_SLOTS * 2,
+
+	MVS_SLOT_BUF_SZ		= 8192, /* cmd tbl + IU + status + PRD */
+	MVS_SSP_CMD_SZ		= 64,	/* SSP command table buffer size */
+	MVS_ATA_CMD_SZ		= 96,	/* SATA command table buffer size */
+	MVS_OAF_SZ		= 64,	/* Open address frame buffer size */
+	MVS_QUEUE_SIZE	= 32,	/* Support Queue depth */
+	MVS_CAN_QUEUE		= MVS_SLOTS - 2,	/* SCSI Queue depth */
+	MVS_SOC_CAN_QUEUE	= MVS_SOC_SLOTS - 2,
+};
+
+/* unchangeable hardware details */
+enum hardware_details {
+	MVS_MAX_PHYS		= 8,	/* max. possible phys */
+	MVS_MAX_PORTS		= 8,	/* max. possible ports */
+	MVS_SOC_PHYS		= 4,	/* soc phys */
+	MVS_SOC_PORTS		= 4,	/* soc phys */
+	MVS_MAX_DEVICES	= 1024,	/* max supported device */
+};
+
+/* peripheral registers (BAR2) */
+enum peripheral_registers {
+	SPI_CTL			= 0x10,	/* EEPROM control */
+	SPI_CMD			= 0x14,	/* EEPROM command */
+	SPI_DATA		= 0x18, /* EEPROM data */
+};
+
+enum peripheral_register_bits {
+	TWSI_RDY		= (1U << 7),	/* EEPROM interface ready */
+	TWSI_RD			= (1U << 4),	/* EEPROM read access */
+
+	SPI_ADDR_MASK		= 0x3ffff,	/* bits 17:0 */
+};
+
+enum hw_register_bits {
+	/* MVS_GBL_CTL */
+	INT_EN			= (1U << 1),	/* Global int enable */
+	HBA_RST			= (1U << 0),	/* HBA reset */
+
+	/* MVS_GBL_INT_STAT */
+	INT_XOR			= (1U << 4),	/* XOR engine event */
+	INT_SAS_SATA		= (1U << 0),	/* SAS/SATA event */
+
+	/* MVS_GBL_PORT_TYPE */			/* shl for ports 1-3 */
+	SATA_TARGET		= (1U << 16),	/* port0 SATA target enable */
+	MODE_AUTO_DET_PORT7 = (1U << 15),	/* port0 SAS/SATA autodetect */
+	MODE_AUTO_DET_PORT6 = (1U << 14),
+	MODE_AUTO_DET_PORT5 = (1U << 13),
+	MODE_AUTO_DET_PORT4 = (1U << 12),
+	MODE_AUTO_DET_PORT3 = (1U << 11),
+	MODE_AUTO_DET_PORT2 = (1U << 10),
+	MODE_AUTO_DET_PORT1 = (1U << 9),
+	MODE_AUTO_DET_PORT0 = (1U << 8),
+	MODE_AUTO_DET_EN    =	MODE_AUTO_DET_PORT0 | MODE_AUTO_DET_PORT1 |
+				MODE_AUTO_DET_PORT2 | MODE_AUTO_DET_PORT3 |
+				MODE_AUTO_DET_PORT4 | MODE_AUTO_DET_PORT5 |
+				MODE_AUTO_DET_PORT6 | MODE_AUTO_DET_PORT7,
+	MODE_SAS_PORT7_MASK = (1U << 7),  /* port0 SAS(1), SATA(0) mode */
+	MODE_SAS_PORT6_MASK = (1U << 6),
+	MODE_SAS_PORT5_MASK = (1U << 5),
+	MODE_SAS_PORT4_MASK = (1U << 4),
+	MODE_SAS_PORT3_MASK = (1U << 3),
+	MODE_SAS_PORT2_MASK = (1U << 2),
+	MODE_SAS_PORT1_MASK = (1U << 1),
+	MODE_SAS_PORT0_MASK = (1U << 0),
+	MODE_SAS_SATA	=	MODE_SAS_PORT0_MASK | MODE_SAS_PORT1_MASK |
+				MODE_SAS_PORT2_MASK | MODE_SAS_PORT3_MASK |
+				MODE_SAS_PORT4_MASK | MODE_SAS_PORT5_MASK |
+				MODE_SAS_PORT6_MASK | MODE_SAS_PORT7_MASK,
+
+				/* SAS_MODE value may be
+				 * dictated (in hw) by values
+				 * of SATA_TARGET & AUTO_DET
+				 */
+
+	/* MVS_TX_CFG */
+	TX_EN			= (1U << 16),	/* Enable TX */
+	TX_RING_SZ_MASK		= 0xfff,	/* TX ring size, bits 11:0 */
+
+	/* MVS_RX_CFG */
+	RX_EN			= (1U << 16),	/* Enable RX */
+	RX_RING_SZ_MASK		= 0xfff,	/* RX ring size, bits 11:0 */
+
+	/* MVS_INT_COAL */
+	COAL_EN			= (1U << 16),	/* Enable int coalescing */
+
+	/* MVS_INT_STAT, MVS_INT_MASK */
+	CINT_I2C		= (1U << 31),	/* I2C event */
+	CINT_SW0		= (1U << 30),	/* software event 0 */
+	CINT_SW1		= (1U << 29),	/* software event 1 */
+	CINT_PRD_BC		= (1U << 28),	/* PRD BC err for read cmd */
+	CINT_DMA_PCIE		= (1U << 27),	/* DMA to PCIE timeout */
+	CINT_MEM		= (1U << 26),	/* int mem parity err */
+	CINT_I2C_SLAVE		= (1U << 25),	/* slave I2C event */
+	CINT_SRS		= (1U << 3),	/* SRS event */
+	CINT_CI_STOP		= (1U << 1),	/* cmd issue stopped */
+	CINT_DONE		= (1U << 0),	/* cmd completion */
+
+						/* shl for ports 1-3 */
+	CINT_PORT_STOPPED	= (1U << 16),	/* port0 stopped */
+	CINT_PORT		= (1U << 8),	/* port0 event */
+	CINT_PORT_MASK_OFFSET	= 8,
+	CINT_PORT_MASK		= (0xFF << CINT_PORT_MASK_OFFSET),
+	CINT_PHY_MASK_OFFSET	= 4,
+	CINT_PHY_MASK		= (0x0F << CINT_PHY_MASK_OFFSET),
+
+	/* TX (delivery) ring bits */
+	TXQ_CMD_SHIFT		= 29,
+	TXQ_CMD_SSP		= 1,		/* SSP protocol */
+	TXQ_CMD_SMP		= 2,		/* SMP protocol */
+	TXQ_CMD_STP		= 3,		/* STP/SATA protocol */
+	TXQ_CMD_SSP_FREE_LIST	= 4,		/* add to SSP targ free list */
+	TXQ_CMD_SLOT_RESET	= 7,		/* reset command slot */
+	TXQ_MODE_I		= (1U << 28),	/* mode: 0=target,1=initiator */
+	TXQ_MODE_TARGET 	= 0,
+	TXQ_MODE_INITIATOR	= 1,
+	TXQ_PRIO_HI		= (1U << 27),	/* priority: 0=normal, 1=high */
+	TXQ_PRI_NORMAL		= 0,
+	TXQ_PRI_HIGH		= 1,
+	TXQ_SRS_SHIFT		= 20,		/* SATA register set */
+	TXQ_SRS_MASK		= 0x7f,
+	TXQ_PHY_SHIFT		= 12,		/* PHY bitmap */
+	TXQ_PHY_MASK		= 0xff,
+	TXQ_SLOT_MASK		= 0xfff,	/* slot number */
+
+	/* RX (completion) ring bits */
+	RXQ_GOOD		= (1U << 23),	/* Response good */
+	RXQ_SLOT_RESET		= (1U << 21),	/* Slot reset complete */
+	RXQ_CMD_RX		= (1U << 20),	/* target cmd received */
+	RXQ_ATTN		= (1U << 19),	/* attention */
+	RXQ_RSP			= (1U << 18),	/* response frame xfer'd */
+	RXQ_ERR			= (1U << 17),	/* err info rec xfer'd */
+	RXQ_DONE		= (1U << 16),	/* cmd complete */
+	RXQ_SLOT_MASK		= 0xfff,	/* slot number */
+
+	/* mvs_cmd_hdr bits */
+	MCH_PRD_LEN_SHIFT	= 16,		/* 16-bit PRD table len */
+	MCH_SSP_FR_TYPE_SHIFT	= 13,		/* SSP frame type */
+
+						/* SSP initiator only */
+	MCH_SSP_FR_CMD		= 0x0,		/* COMMAND frame */
+
+						/* SSP initiator or target */
+	MCH_SSP_FR_TASK		= 0x1,		/* TASK frame */
+
+						/* SSP target only */
+	MCH_SSP_FR_XFER_RDY	= 0x4,		/* XFER_RDY frame */
+	MCH_SSP_FR_RESP		= 0x5,		/* RESPONSE frame */
+	MCH_SSP_FR_READ		= 0x6,		/* Read DATA frame(s) */
+	MCH_SSP_FR_READ_RESP	= 0x7,		/* ditto, plus RESPONSE */
+
+	MCH_SSP_MODE_PASSTHRU	= 1,
+	MCH_SSP_MODE_NORMAL	= 0,
+	MCH_PASSTHRU		= (1U << 12),	/* pass-through (SSP) */
+	MCH_FBURST		= (1U << 11),	/* first burst (SSP) */
+	MCH_CHK_LEN		= (1U << 10),	/* chk xfer len (SSP) */
+	MCH_RETRY		= (1U << 9),	/* tport layer retry (SSP) */
+	MCH_PROTECTION		= (1U << 8),	/* protection info rec (SSP) */
+	MCH_RESET		= (1U << 7),	/* Reset (STP/SATA) */
+	MCH_FPDMA		= (1U << 6),	/* First party DMA (STP/SATA) */
+	MCH_ATAPI		= (1U << 5),	/* ATAPI (STP/SATA) */
+	MCH_BIST		= (1U << 4),	/* BIST activate (STP/SATA) */
+	MCH_PMP_MASK		= 0xf,		/* PMP from cmd FIS (STP/SATA)*/
+
+	CCTL_RST		= (1U << 5),	/* port logic reset */
+
+						/* 0(LSB first), 1(MSB first) */
+	CCTL_ENDIAN_DATA	= (1U << 3),	/* PRD data */
+	CCTL_ENDIAN_RSP		= (1U << 2),	/* response frame */
+	CCTL_ENDIAN_OPEN	= (1U << 1),	/* open address frame */
+	CCTL_ENDIAN_CMD		= (1U << 0),	/* command table */
+
+	/* MVS_Px_SER_CTLSTAT (per-phy control) */
+	PHY_SSP_RST		= (1U << 3),	/* reset SSP link layer */
+	PHY_BCAST_CHG		= (1U << 2),	/* broadcast(change) notif */
+	PHY_RST_HARD		= (1U << 1),	/* hard reset + phy reset */
+	PHY_RST			= (1U << 0),	/* phy reset */
+	PHY_READY_MASK		= (1U << 20),
+
+	/* MVS_Px_INT_STAT, MVS_Px_INT_MASK (per-phy events) */
+	PHYEV_DEC_ERR		= (1U << 24),	/* Phy Decoding Error */
+	PHYEV_DCDR_ERR		= (1U << 23),	/* STP Deocder Error */
+	PHYEV_CRC_ERR		= (1U << 22),	/* STP CRC Error */
+	PHYEV_UNASSOC_FIS	= (1U << 19),	/* unassociated FIS rx'd */
+	PHYEV_AN		= (1U << 18),	/* SATA async notification */
+	PHYEV_BIST_ACT		= (1U << 17),	/* BIST activate FIS */
+	PHYEV_SIG_FIS		= (1U << 16),	/* signature FIS */
+	PHYEV_POOF		= (1U << 12),	/* phy ready from 1 -> 0 */
+	PHYEV_IU_BIG		= (1U << 11),	/* IU too long err */
+	PHYEV_IU_SMALL		= (1U << 10),	/* IU too short err */
+	PHYEV_UNK_TAG		= (1U << 9),	/* unknown tag */
+	PHYEV_BROAD_CH		= (1U << 8),	/* broadcast(CHANGE) */
+	PHYEV_COMWAKE		= (1U << 7),	/* COMWAKE rx'd */
+	PHYEV_PORT_SEL		= (1U << 6),	/* port selector present */
+	PHYEV_HARD_RST		= (1U << 5),	/* hard reset rx'd */
+	PHYEV_ID_TMOUT		= (1U << 4),	/* identify timeout */
+	PHYEV_ID_FAIL		= (1U << 3),	/* identify failed */
+	PHYEV_ID_DONE		= (1U << 2),	/* identify done */
+	PHYEV_HARD_RST_DONE	= (1U << 1),	/* hard reset done */
+	PHYEV_RDY_CH		= (1U << 0),	/* phy ready changed state */
+
+	/* MVS_PCS */
+	PCS_EN_SATA_REG_SHIFT	= (16),		/* Enable SATA Register Set */
+	PCS_EN_PORT_XMT_SHIFT	= (12),		/* Enable Port Transmit */
+	PCS_EN_PORT_XMT_SHIFT2	= (8),		/* For 6485 */
+	PCS_SATA_RETRY		= (1U << 8),	/* retry ctl FIS on R_ERR */
+	PCS_RSP_RX_EN		= (1U << 7),	/* raw response rx */
+	PCS_SATA_RETRY_2	= (1U << 6),	/* For 9180 */
+	PCS_SELF_CLEAR		= (1U << 5),	/* self-clearing int mode */
+	PCS_FIS_RX_EN		= (1U << 4),	/* FIS rx enable */
+	PCS_CMD_STOP_ERR	= (1U << 3),	/* cmd stop-on-err enable */
+	PCS_CMD_RST		= (1U << 1),	/* reset cmd issue */
+	PCS_CMD_EN		= (1U << 0),	/* enable cmd issue */
+
+	/* Port n Attached Device Info */
+	PORT_DEV_SSP_TRGT	= (1U << 19),
+	PORT_DEV_SMP_TRGT	= (1U << 18),
+	PORT_DEV_STP_TRGT	= (1U << 17),
+	PORT_DEV_SSP_INIT	= (1U << 11),
+	PORT_DEV_SMP_INIT	= (1U << 10),
+	PORT_DEV_STP_INIT	= (1U << 9),
+	PORT_PHY_ID_MASK	= (0xFFU << 24),
+	PORT_SSP_TRGT_MASK	= (0x1U << 19),
+	PORT_SSP_INIT_MASK	= (0x1U << 11),
+	PORT_DEV_TRGT_MASK	= (0x7U << 17),
+	PORT_DEV_INIT_MASK	= (0x7U << 9),
+	PORT_DEV_TYPE_MASK	= (0x7U << 0),
+
+	/* Port n PHY Status */
+	PHY_RDY			= (1U << 2),
+	PHY_DW_SYNC		= (1U << 1),
+	PHY_OOB_DTCTD		= (1U << 0),
+
+	/* VSR */
+	/* PHYMODE 6 (CDB) */
+	PHY_MODE6_LATECLK	= (1U << 29),	/* Lock Clock */
+	PHY_MODE6_DTL_SPEED	= (1U << 27),	/* Digital Loop Speed */
+	PHY_MODE6_FC_ORDER	= (1U << 26),	/* Fibre Channel Mode Order*/
+	PHY_MODE6_MUCNT_EN	= (1U << 24),	/* u Count Enable */
+	PHY_MODE6_SEL_MUCNT_LEN	= (1U << 22),	/* Training Length Select */
+	PHY_MODE6_SELMUPI	= (1U << 20),	/* Phase Multi Select (init) */
+	PHY_MODE6_SELMUPF	= (1U << 18),	/* Phase Multi Select (final) */
+	PHY_MODE6_SELMUFF	= (1U << 16),	/* Freq Loop Multi Sel(final) */
+	PHY_MODE6_SELMUFI	= (1U << 14),	/* Freq Loop Multi Sel(init) */
+	PHY_MODE6_FREEZE_LOOP	= (1U << 12),	/* Freeze Rx CDR Loop */
+	PHY_MODE6_INT_RXFOFFS	= (1U << 3),	/* Rx CDR Freq Loop Enable */
+	PHY_MODE6_FRC_RXFOFFS	= (1U << 2),	/* Initial Rx CDR Offset */
+	PHY_MODE6_STAU_0D8	= (1U << 1),	/* Rx CDR Freq Loop Saturate */
+	PHY_MODE6_RXSAT_DIS	= (1U << 0),	/* Saturate Ctl */
+};
+
+/* SAS/SATA configuration port registers, aka phy registers */
+enum sas_sata_config_port_regs {
+	PHYR_IDENTIFY		= 0x00,	/* info for IDENTIFY frame */
+	PHYR_ADDR_LO		= 0x04,	/* my SAS address (low) */
+	PHYR_ADDR_HI		= 0x08,	/* my SAS address (high) */
+	PHYR_ATT_DEV_INFO	= 0x0C,	/* attached device info */
+	PHYR_ATT_ADDR_LO	= 0x10,	/* attached dev SAS addr (low) */
+	PHYR_ATT_ADDR_HI	= 0x14,	/* attached dev SAS addr (high) */
+	PHYR_SATA_CTL		= 0x18,	/* SATA control */
+	PHYR_PHY_STAT		= 0x1C,	/* PHY status */
+	PHYR_SATA_SIG0	= 0x20,	/*port SATA signature FIS(Byte 0-3) */
+	PHYR_SATA_SIG1	= 0x24,	/*port SATA signature FIS(Byte 4-7) */
+	PHYR_SATA_SIG2	= 0x28,	/*port SATA signature FIS(Byte 8-11) */
+	PHYR_SATA_SIG3	= 0x2c,	/*port SATA signature FIS(Byte 12-15) */
+	PHYR_R_ERR_COUNT	= 0x30, /* port R_ERR count register */
+	PHYR_CRC_ERR_COUNT	= 0x34, /* port CRC error count register */
+	PHYR_WIDE_PORT	= 0x38,	/* wide port participating */
+	PHYR_CURRENT0		= 0x80,	/* current connection info 0 */
+	PHYR_CURRENT1		= 0x84,	/* current connection info 1 */
+	PHYR_CURRENT2		= 0x88,	/* current connection info 2 */
+	CONFIG_ID_FRAME0       = 0x100, /* Port device ID frame register 0 */
+	CONFIG_ID_FRAME1       = 0x104, /* Port device ID frame register 1 */
+	CONFIG_ID_FRAME2       = 0x108, /* Port device ID frame register 2 */
+	CONFIG_ID_FRAME3       = 0x10c, /* Port device ID frame register 3 */
+	CONFIG_ID_FRAME4       = 0x110, /* Port device ID frame register 4 */
+	CONFIG_ID_FRAME5       = 0x114, /* Port device ID frame register 5 */
+	CONFIG_ID_FRAME6       = 0x118, /* Port device ID frame register 6 */
+	CONFIG_ATT_ID_FRAME0   = 0x11c, /* attached ID frame register 0 */
+	CONFIG_ATT_ID_FRAME1   = 0x120, /* attached ID frame register 1 */
+	CONFIG_ATT_ID_FRAME2   = 0x124, /* attached ID frame register 2 */
+	CONFIG_ATT_ID_FRAME3   = 0x128, /* attached ID frame register 3 */
+	CONFIG_ATT_ID_FRAME4   = 0x12c, /* attached ID frame register 4 */
+	CONFIG_ATT_ID_FRAME5   = 0x130, /* attached ID frame register 5 */
+	CONFIG_ATT_ID_FRAME6   = 0x134, /* attached ID frame register 6 */
+};
+
+enum sas_cmd_port_registers {
+	CMD_CMRST_OOB_DET	= 0x100, /* COMRESET OOB detect register */
+	CMD_CMWK_OOB_DET	= 0x104, /* COMWAKE OOB detect register */
+	CMD_CMSAS_OOB_DET	= 0x108, /* COMSAS OOB detect register */
+	CMD_BRST_OOB_DET	= 0x10c, /* burst OOB detect register */
+	CMD_OOB_SPACE	= 0x110, /* OOB space control register */
+	CMD_OOB_BURST	= 0x114, /* OOB burst control register */
+	CMD_PHY_TIMER		= 0x118, /* PHY timer control register */
+	CMD_PHY_CONFIG0	= 0x11c, /* PHY config register 0 */
+	CMD_PHY_CONFIG1	= 0x120, /* PHY config register 1 */
+	CMD_SAS_CTL0		= 0x124, /* SAS control register 0 */
+	CMD_SAS_CTL1		= 0x128, /* SAS control register 1 */
+	CMD_SAS_CTL2		= 0x12c, /* SAS control register 2 */
+	CMD_SAS_CTL3		= 0x130, /* SAS control register 3 */
+	CMD_ID_TEST		= 0x134, /* ID test register */
+	CMD_PL_TIMER		= 0x138, /* PL timer register */
+	CMD_WD_TIMER		= 0x13c, /* WD timer register */
+	CMD_PORT_SEL_COUNT	= 0x140, /* port selector count register */
+	CMD_APP_MEM_CTL	= 0x144, /* Application Memory Control */
+	CMD_XOR_MEM_CTL	= 0x148, /* XOR Block Memory Control */
+	CMD_DMA_MEM_CTL	= 0x14c, /* DMA Block Memory Control */
+	CMD_PORT_MEM_CTL0	= 0x150, /* Port Memory Control 0 */
+	CMD_PORT_MEM_CTL1	= 0x154, /* Port Memory Control 1 */
+	CMD_SATA_PORT_MEM_CTL0	= 0x158, /* SATA Port Memory Control 0 */
+	CMD_SATA_PORT_MEM_CTL1	= 0x15c, /* SATA Port Memory Control 1 */
+	CMD_XOR_MEM_BIST_CTL	= 0x160, /* XOR Memory BIST Control */
+	CMD_XOR_MEM_BIST_STAT	= 0x164, /* XOR Memroy BIST Status */
+	CMD_DMA_MEM_BIST_CTL	= 0x168, /* DMA Memory BIST Control */
+	CMD_DMA_MEM_BIST_STAT	= 0x16c, /* DMA Memory BIST Status */
+	CMD_PORT_MEM_BIST_CTL	= 0x170, /* Port Memory BIST Control */
+	CMD_PORT_MEM_BIST_STAT0 = 0x174, /* Port Memory BIST Status 0 */
+	CMD_PORT_MEM_BIST_STAT1 = 0x178, /* Port Memory BIST Status 1 */
+	CMD_STP_MEM_BIST_CTL	= 0x17c, /* STP Memory BIST Control */
+	CMD_STP_MEM_BIST_STAT0	= 0x180, /* STP Memory BIST Status 0 */
+	CMD_STP_MEM_BIST_STAT1	= 0x184, /* STP Memory BIST Status 1 */
+	CMD_RESET_COUNT		= 0x188, /* Reset Count */
+	CMD_MONTR_DATA_SEL	= 0x18C, /* Monitor Data/Select */
+	CMD_PLL_PHY_CONFIG	= 0x190, /* PLL/PHY Configuration */
+	CMD_PHY_CTL		= 0x194, /* PHY Control and Status */
+	CMD_PHY_TEST_COUNT0	= 0x198, /* Phy Test Count 0 */
+	CMD_PHY_TEST_COUNT1	= 0x19C, /* Phy Test Count 1 */
+	CMD_PHY_TEST_COUNT2	= 0x1A0, /* Phy Test Count 2 */
+	CMD_APP_ERR_CONFIG	= 0x1A4, /* Application Error Configuration */
+	CMD_PND_FIFO_CTL0	= 0x1A8, /* Pending FIFO Control 0 */
+	CMD_HOST_CTL		= 0x1AC, /* Host Control Status */
+	CMD_HOST_WR_DATA	= 0x1B0, /* Host Write Data */
+	CMD_HOST_RD_DATA	= 0x1B4, /* Host Read Data */
+	CMD_PHY_MODE_21		= 0x1B8, /* Phy Mode 21 */
+	CMD_SL_MODE0		= 0x1BC, /* SL Mode 0 */
+	CMD_SL_MODE1		= 0x1C0, /* SL Mode 1 */
+	CMD_PND_FIFO_CTL1	= 0x1C4, /* Pending FIFO Control 1 */
+};
+
+enum mvs_info_flags {
+	MVF_MSI		= (1U << 0),	/* MSI is enabled */
+	MVF_PHY_PWR_FIX	= (1U << 1),	/* bug workaround */
+	MVF_FLAG_SOC		= (1U << 2),	/* SoC integrated controllers */
+};
+
+enum mvs_event_flags {
+	PHY_PLUG_EVENT	= (3U),
+	PHY_PLUG_IN		= (1U << 0),	/* phy plug in */
+	PHY_PLUG_OUT		= (1U << 1),	/* phy plug out */
+};
+
+enum mvs_port_type {
+	PORT_TGT_MASK	=  (1U << 5),
+	PORT_INIT_PORT	=  (1U << 4),
+	PORT_TGT_PORT	=  (1U << 3),
+	PORT_INIT_TGT_PORT = (PORT_INIT_PORT | PORT_TGT_PORT),
+	PORT_TYPE_SAS	=  (1U << 1),
+	PORT_TYPE_SATA	=  (1U << 0),
+};
+
+/* Command Table Format */
+enum ct_format {
+	/* SSP */
+	SSP_F_H		=  0x00,
+	SSP_F_IU	=  0x18,
+	SSP_F_MAX	=  0x4D,
+	/* STP */
+	STP_CMD_FIS	=  0x00,
+	STP_ATAPI_CMD	=  0x40,
+	STP_F_MAX	=  0x10,
+	/* SMP */
+	SMP_F_T		=  0x00,
+	SMP_F_DEP	=  0x01,
+	SMP_F_MAX	=  0x101,
+};
+
+enum status_buffer {
+	SB_EIR_OFF	=  0x00,	/* Error Information Record */
+	SB_RFB_OFF	=  0x08,	/* Response Frame Buffer */
+	SB_RFB_MAX	=  0x400,	/* RFB size*/
+};
+
+enum error_info_rec {
+	CMD_ISS_STPD	= (1U << 31),	/* Cmd Issue Stopped */
+	CMD_PI_ERR	= (1U << 30),	/* Protection info error.  see flags2 */
+	RSP_OVER	= (1U << 29),	/* rsp buffer overflow */
+	RETRY_LIM	= (1U << 28),	/* FIS/frame retry limit exceeded */
+	UNK_FIS 	= (1U << 27),	/* unknown FIS */
+	DMA_TERM	= (1U << 26),	/* DMA terminate primitive rx'd */
+	SYNC_ERR	= (1U << 25),	/* SYNC rx'd during frame xmit */
+	TFILE_ERR	= (1U << 24),	/* SATA taskfile Error bit set */
+	R_ERR		= (1U << 23),	/* SATA returned R_ERR prim */
+	RD_OFS		= (1U << 20),	/* Read DATA frame invalid offset */
+	XFER_RDY_OFS	= (1U << 19),	/* XFER_RDY offset error */
+	UNEXP_XFER_RDY	= (1U << 18),	/* unexpected XFER_RDY error */
+	DATA_OVER_UNDER = (1U << 16),	/* data overflow/underflow */
+	INTERLOCK	= (1U << 15),	/* interlock error */
+	NAK		= (1U << 14),	/* NAK rx'd */
+	ACK_NAK_TO	= (1U << 13),	/* ACK/NAK timeout */
+	CXN_CLOSED	= (1U << 12),	/* cxn closed w/out ack/nak */
+	OPEN_TO 	= (1U << 11),	/* I_T nexus lost, open cxn timeout */
+	PATH_BLOCKED	= (1U << 10),	/* I_T nexus lost, pathway blocked */
+	NO_DEST 	= (1U << 9),	/* I_T nexus lost, no destination */
+	STP_RES_BSY	= (1U << 8),	/* STP resources busy */
+	BREAK		= (1U << 7),	/* break received */
+	BAD_DEST	= (1U << 6),	/* bad destination */
+	BAD_PROTO	= (1U << 5),	/* protocol not supported */
+	BAD_RATE	= (1U << 4),	/* cxn rate not supported */
+	WRONG_DEST	= (1U << 3),	/* wrong destination error */
+	CREDIT_TO	= (1U << 2),	/* credit timeout */
+	WDOG_TO 	= (1U << 1),	/* watchdog timeout */
+	BUF_PAR 	= (1U << 0),	/* buffer parity error */
+};
+
+enum error_info_rec_2 {
+	SLOT_BSY_ERR	= (1U << 31),	/* Slot Busy Error */
+	GRD_CHK_ERR	= (1U << 14),	/* Guard Check Error */
+	APP_CHK_ERR	= (1U << 13),	/* Application Check error */
+	REF_CHK_ERR	= (1U << 12),	/* Reference Check Error */
+	USR_BLK_NM	= (1U << 0),	/* User Block Number */
+};
+
+enum pci_cfg_register_bits {
+	PCTL_PWR_OFF	= (0xFU << 24),
+	PCTL_COM_ON	= (0xFU << 20),
+	PCTL_LINK_RST	= (0xFU << 16),
+	PCTL_LINK_OFFS	= (16),
+	PCTL_PHY_DSBL	= (0xFU << 12),
+	PCTL_PHY_DSBL_OFFS	= (12),
+	PRD_REQ_SIZE	= (0x4000),
+	PRD_REQ_MASK	= (0x00007000),
+	PLS_NEG_LINK_WD		= (0x3FU << 4),
+	PLS_NEG_LINK_WD_OFFS	= 4,
+	PLS_LINK_SPD		= (0x0FU << 0),
+	PLS_LINK_SPD_OFFS	= 0,
+};
+
+enum open_frame_protocol {
+	PROTOCOL_SMP	= 0x0,
+	PROTOCOL_SSP	= 0x1,
+	PROTOCOL_STP	= 0x2,
+};
+
+/* define for response frame datapres field */
+enum datapres_field {
+	NO_DATA		= 0,
+	RESPONSE_DATA	= 1,
+	SENSE_DATA	= 2,
+};
+
+/* define task management IU */
+struct mvs_tmf_task{
+	u8 tmf;
+	u16 tag_of_task_to_be_managed;
+};
+#endif
diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
new file mode 100644
index 0000000..8646a19
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_init.c
@@ -0,0 +1,703 @@
+/*
+ * Marvell 88SE64xx/88SE94xx pci init
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
+
+
+#include "mv_sas.h"
+
+static struct scsi_transport_template *mvs_stt;
+static const struct mvs_chip_info mvs_chips[] = {
+	[chip_6320] =	{ 1, 2, 0x400, 17, 16,  9, &mvs_64xx_dispatch, },
+	[chip_6440] =	{ 1, 4, 0x400, 17, 16,  9, &mvs_64xx_dispatch, },
+	[chip_6485] =	{ 1, 8, 0x800, 33, 32, 10, &mvs_64xx_dispatch, },
+	[chip_9180] =	{ 2, 4, 0x800, 17, 64,  9, &mvs_94xx_dispatch, },
+	[chip_9480] =	{ 2, 4, 0x800, 17, 64,  9, &mvs_94xx_dispatch, },
+};
+
+#define SOC_SAS_NUM 2
+
+static struct scsi_host_template mvs_sht = {
+	.module			= THIS_MODULE,
+	.name			= DRV_NAME,
+	.queuecommand		= sas_queuecommand,
+	.target_alloc		= sas_target_alloc,
+	.slave_configure	= mvs_slave_configure,
+	.slave_destroy		= sas_slave_destroy,
+	.scan_finished		= mvs_scan_finished,
+	.scan_start		= mvs_scan_start,
+	.change_queue_depth	= sas_change_queue_depth,
+	.change_queue_type	= sas_change_queue_type,
+	.bios_param		= sas_bios_param,
+	.can_queue		= 1,
+	.cmd_per_lun		= 1,
+	.this_id		= -1,
+	.sg_tablesize		= SG_ALL,
+	.max_sectors		= SCSI_DEFAULT_MAX_SECTORS,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.eh_device_reset_handler	= sas_eh_device_reset_handler,
+	.eh_bus_reset_handler	= sas_eh_bus_reset_handler,
+	.slave_alloc		= mvs_slave_alloc,
+	.target_destroy		= sas_target_destroy,
+	.ioctl			= sas_ioctl,
+};
+
+static struct sas_domain_function_template mvs_transport_ops = {
+	.lldd_dev_found 	= mvs_dev_found,
+	.lldd_dev_gone	= mvs_dev_gone,
+
+	.lldd_execute_task	= mvs_queue_command,
+	.lldd_control_phy	= mvs_phy_control,
+
+	.lldd_abort_task	= mvs_abort_task,
+	.lldd_abort_task_set    = mvs_abort_task_set,
+	.lldd_clear_aca         = mvs_clear_aca,
+       .lldd_clear_task_set    = mvs_clear_task_set,
+	.lldd_I_T_nexus_reset	= mvs_I_T_nexus_reset,
+	.lldd_lu_reset 		= mvs_lu_reset,
+	.lldd_query_task	= mvs_query_task,
+
+	.lldd_port_formed	= mvs_port_formed,
+	.lldd_port_deformed     = mvs_port_deformed,
+
+};
+
+static void __devinit mvs_phy_init(struct mvs_info *mvi, int phy_id)
+{
+	struct mvs_phy *phy = &mvi->phy[phy_id];
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
+
+	phy->mvi = mvi;
+	init_timer(&phy->timer);
+	sas_phy->enabled = (phy_id < mvi->chip->n_phy) ? 1 : 0;
+	sas_phy->class = SAS;
+	sas_phy->iproto = SAS_PROTOCOL_ALL;
+	sas_phy->tproto = 0;
+	sas_phy->type = PHY_TYPE_PHYSICAL;
+	sas_phy->role = PHY_ROLE_INITIATOR;
+	sas_phy->oob_mode = OOB_NOT_CONNECTED;
+	sas_phy->linkrate = SAS_LINK_RATE_UNKNOWN;
+
+	sas_phy->id = phy_id;
+	sas_phy->sas_addr = &mvi->sas_addr[0];
+	sas_phy->frame_rcvd = &phy->frame_rcvd[0];
+	sas_phy->ha = (struct sas_ha_struct *)mvi->shost->hostdata;
+	sas_phy->lldd_phy = phy;
+}
+
+static void mvs_free(struct mvs_info *mvi)
+{
+	int i;
+	struct mvs_wq *mwq;
+	int slot_nr;
+
+	if (!mvi)
+		return;
+
+	if (mvi->flags & MVF_FLAG_SOC)
+		slot_nr = MVS_SOC_SLOTS;
+	else
+		slot_nr = MVS_SLOTS;
+
+	for (i = 0; i < mvi->tags_num; i++) {
+		struct mvs_slot_info *slot = &mvi->slot_info[i];
+		if (slot->buf)
+			dma_free_coherent(mvi->dev, MVS_SLOT_BUF_SZ,
+					  slot->buf, slot->buf_dma);
+	}
+
+	if (mvi->tx)
+		dma_free_coherent(mvi->dev,
+				  sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ,
+				  mvi->tx, mvi->tx_dma);
+	if (mvi->rx_fis)
+		dma_free_coherent(mvi->dev, MVS_RX_FISL_SZ,
+				  mvi->rx_fis, mvi->rx_fis_dma);
+	if (mvi->rx)
+		dma_free_coherent(mvi->dev,
+				  sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1),
+				  mvi->rx, mvi->rx_dma);
+	if (mvi->slot)
+		dma_free_coherent(mvi->dev,
+				  sizeof(*mvi->slot) * slot_nr,
+				  mvi->slot, mvi->slot_dma);
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+	if (mvi->bulk_buffer)
+		dma_free_coherent(mvi->dev, TRASH_BUCKET_SIZE,
+				  mvi->bulk_buffer, mvi->bulk_buffer_dma);
+#endif
+
+	MVS_CHIP_DISP->chip_iounmap(mvi);
+	if (mvi->shost)
+		scsi_host_put(mvi->shost);
+	list_for_each_entry(mwq, &mvi->wq_list, entry)
+		cancel_delayed_work(&mwq->work_q);
+	kfree(mvi);
+}
+
+#ifdef MVS_USE_TASKLET
+struct tasklet_struct	mv_tasklet;
+static void mvs_tasklet(unsigned long opaque)
+{
+	unsigned long flags;
+	u32 stat;
+	u16 core_nr, i = 0;
+
+	struct mvs_info *mvi;
+	struct sas_ha_struct *sha = (struct sas_ha_struct *)opaque;
+
+	core_nr = ((struct mvs_prv_info *)sha->lldd_ha)->n_host;
+	mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[0];
+
+	if (unlikely(!mvi))
+		BUG_ON(1);
+
+	for (i = 0; i < core_nr; i++) {
+		mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[i];
+		stat = MVS_CHIP_DISP->isr_status(mvi, mvi->irq);
+		if (stat)
+			MVS_CHIP_DISP->isr(mvi, mvi->irq, stat);
+	}
+
+}
+#endif
+
+static irqreturn_t mvs_interrupt(int irq, void *opaque)
+{
+	u32 core_nr, i = 0;
+	u32 stat;
+	struct mvs_info *mvi;
+	struct sas_ha_struct *sha = opaque;
+
+	core_nr = ((struct mvs_prv_info *)sha->lldd_ha)->n_host;
+	mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[0];
+
+	if (unlikely(!mvi))
+		return IRQ_NONE;
+
+	stat = MVS_CHIP_DISP->isr_status(mvi, irq);
+	if (!stat)
+		return IRQ_NONE;
+
+#ifdef MVS_USE_TASKLET
+	tasklet_schedule(&mv_tasklet);
+#else
+	for (i = 0; i < core_nr; i++) {
+		mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[i];
+		MVS_CHIP_DISP->isr(mvi, irq, stat);
+	}
+#endif
+	return IRQ_HANDLED;
+}
+
+static int __devinit mvs_alloc(struct mvs_info *mvi, struct Scsi_Host *shost)
+{
+	int i, slot_nr;
+
+	if (mvi->flags & MVF_FLAG_SOC)
+		slot_nr = MVS_SOC_SLOTS;
+	else
+		slot_nr = MVS_SLOTS;
+
+	spin_lock_init(&mvi->lock);
+	for (i = 0; i < mvi->chip->n_phy; i++) {
+		mvs_phy_init(mvi, i);
+		mvi->port[i].wide_port_phymap = 0;
+		mvi->port[i].port_attached = 0;
+		INIT_LIST_HEAD(&mvi->port[i].list);
+	}
+	for (i = 0; i < MVS_MAX_DEVICES; i++) {
+		mvi->devices[i].taskfileset = MVS_ID_NOT_MAPPED;
+		mvi->devices[i].dev_type = NO_DEVICE;
+		mvi->devices[i].device_id = i;
+		mvi->devices[i].dev_status = MVS_DEV_NORMAL;
+	}
+
+	/*
+	 * alloc and init our DMA areas
+	 */
+	mvi->tx = dma_alloc_coherent(mvi->dev,
+				     sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ,
+				     &mvi->tx_dma, GFP_KERNEL);
+	if (!mvi->tx)
+		goto err_out;
+	memset(mvi->tx, 0, sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ);
+	mvi->rx_fis = dma_alloc_coherent(mvi->dev, MVS_RX_FISL_SZ,
+					 &mvi->rx_fis_dma, GFP_KERNEL);
+	if (!mvi->rx_fis)
+		goto err_out;
+	memset(mvi->rx_fis, 0, MVS_RX_FISL_SZ);
+
+	mvi->rx = dma_alloc_coherent(mvi->dev,
+				     sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1),
+				     &mvi->rx_dma, GFP_KERNEL);
+	if (!mvi->rx)
+		goto err_out;
+	memset(mvi->rx, 0, sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1));
+	mvi->rx[0] = cpu_to_le32(0xfff);
+	mvi->rx_cons = 0xfff;
+
+	mvi->slot = dma_alloc_coherent(mvi->dev,
+				       sizeof(*mvi->slot) * slot_nr,
+				       &mvi->slot_dma, GFP_KERNEL);
+	if (!mvi->slot)
+		goto err_out;
+	memset(mvi->slot, 0, sizeof(*mvi->slot) * slot_nr);
+
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+	mvi->bulk_buffer = dma_alloc_coherent(mvi->dev,
+				       TRASH_BUCKET_SIZE,
+				       &mvi->bulk_buffer_dma, GFP_KERNEL);
+	if (!mvi->bulk_buffer)
+		goto err_out;
+#endif
+	for (i = 0; i < slot_nr; i++) {
+		struct mvs_slot_info *slot = &mvi->slot_info[i];
+
+		slot->buf = dma_alloc_coherent(mvi->dev, MVS_SLOT_BUF_SZ,
+					       &slot->buf_dma, GFP_KERNEL);
+		if (!slot->buf) {
+			printk(KERN_DEBUG"failed to allocate slot->buf.\n");
+			goto err_out;
+		}
+		memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
+		++mvi->tags_num;
+	}
+	/* Initialize tags */
+	mvs_tag_init(mvi);
+	return 0;
+err_out:
+	return 1;
+}
+
+
+int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex)
+{
+	unsigned long res_start, res_len, res_flag, res_flag_ex = 0;
+	struct pci_dev *pdev = mvi->pdev;
+	if (bar_ex != -1) {
+		/*
+		 * ioremap main and peripheral registers
+		 */
+		res_start = pci_resource_start(pdev, bar_ex);
+		res_len = pci_resource_len(pdev, bar_ex);
+		if (!res_start || !res_len)
+			goto err_out;
+
+		res_flag_ex = pci_resource_flags(pdev, bar_ex);
+		if (res_flag_ex & IORESOURCE_MEM) {
+			if (res_flag_ex & IORESOURCE_CACHEABLE)
+				mvi->regs_ex = ioremap(res_start, res_len);
+			else
+				mvi->regs_ex = ioremap_nocache(res_start,
+						res_len);
+		} else
+			mvi->regs_ex = (void *)res_start;
+		if (!mvi->regs_ex)
+			goto err_out;
+	}
+
+	res_start = pci_resource_start(pdev, bar);
+	res_len = pci_resource_len(pdev, bar);
+	if (!res_start || !res_len)
+		goto err_out;
+
+	res_flag = pci_resource_flags(pdev, bar);
+	if (res_flag & IORESOURCE_CACHEABLE)
+		mvi->regs = ioremap(res_start, res_len);
+	else
+		mvi->regs = ioremap_nocache(res_start, res_len);
+
+	if (!mvi->regs) {
+		if (mvi->regs_ex && (res_flag_ex & IORESOURCE_MEM))
+			iounmap(mvi->regs_ex);
+		mvi->regs_ex = NULL;
+		goto err_out;
+	}
+
+	return 0;
+err_out:
+	return -1;
+}
+
+void mvs_iounmap(void __iomem *regs)
+{
+	iounmap(regs);
+}
+
+static struct mvs_info *__devinit mvs_pci_alloc(struct pci_dev *pdev,
+				const struct pci_device_id *ent,
+				struct Scsi_Host *shost, unsigned int id)
+{
+	struct mvs_info *mvi;
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+
+	mvi = kzalloc(sizeof(*mvi) + MVS_SLOTS * sizeof(struct mvs_slot_info),
+			GFP_KERNEL);
+	if (!mvi)
+		return NULL;
+
+	mvi->pdev = pdev;
+	mvi->dev = &pdev->dev;
+	mvi->chip_id = ent->driver_data;
+	mvi->chip = &mvs_chips[mvi->chip_id];
+	INIT_LIST_HEAD(&mvi->wq_list);
+	mvi->irq = pdev->irq;
+
+	((struct mvs_prv_info *)sha->lldd_ha)->mvi[id] = mvi;
+	((struct mvs_prv_info *)sha->lldd_ha)->n_phy = mvi->chip->n_phy;
+
+	mvi->id = id;
+	mvi->sas = sha;
+	mvi->shost = shost;
+#ifdef MVS_USE_TASKLET
+	tasklet_init(&mv_tasklet, mvs_tasklet, (unsigned long)sha);
+#endif
+
+	if (MVS_CHIP_DISP->chip_ioremap(mvi))
+		goto err_out;
+	if (!mvs_alloc(mvi, shost))
+		return mvi;
+err_out:
+	mvs_free(mvi);
+	return NULL;
+}
+
+/* move to PCI layer or libata core? */
+static int pci_go_64(struct pci_dev *pdev)
+{
+	int rc;
+
+	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+		if (rc) {
+			rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+			if (rc) {
+				dev_printk(KERN_ERR, &pdev->dev,
+					   "64-bit DMA enable failed\n");
+				return rc;
+			}
+		}
+	} else {
+		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc) {
+			dev_printk(KERN_ERR, &pdev->dev,
+				   "32-bit DMA enable failed\n");
+			return rc;
+		}
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc) {
+			dev_printk(KERN_ERR, &pdev->dev,
+				   "32-bit consistent DMA enable failed\n");
+			return rc;
+		}
+	}
+
+	return rc;
+}
+
+static int __devinit mvs_prep_sas_ha_init(struct Scsi_Host *shost,
+				const struct mvs_chip_info *chip_info)
+{
+	int phy_nr, port_nr; unsigned short core_nr;
+	struct asd_sas_phy **arr_phy;
+	struct asd_sas_port **arr_port;
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+
+	core_nr = chip_info->n_host;
+	phy_nr  = core_nr * chip_info->n_phy;
+	port_nr = phy_nr;
+
+	memset(sha, 0x00, sizeof(struct sas_ha_struct));
+	arr_phy  = kcalloc(phy_nr, sizeof(void *), GFP_KERNEL);
+	arr_port = kcalloc(port_nr, sizeof(void *), GFP_KERNEL);
+	if (!arr_phy || !arr_port)
+		goto exit_free;
+
+	sha->sas_phy = arr_phy;
+	sha->sas_port = arr_port;
+
+	sha->lldd_ha = kzalloc(sizeof(struct mvs_prv_info), GFP_KERNEL);
+	if (!sha->lldd_ha)
+		goto exit_free;
+
+	((struct mvs_prv_info *)sha->lldd_ha)->n_host = core_nr;
+
+	shost->transportt = mvs_stt;
+	shost->max_id = 128;
+	shost->max_lun = ~0;
+	shost->max_channel = 1;
+	shost->max_cmd_len = 16;
+
+	return 0;
+exit_free:
+	kfree(arr_phy);
+	kfree(arr_port);
+	return -1;
+
+}
+
+static void  __devinit mvs_post_sas_ha_init(struct Scsi_Host *shost,
+			const struct mvs_chip_info *chip_info)
+{
+	int can_queue, i = 0, j = 0;
+	struct mvs_info *mvi = NULL;
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	unsigned short nr_core = ((struct mvs_prv_info *)sha->lldd_ha)->n_host;
+
+	for (j = 0; j < nr_core; j++) {
+		mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[j];
+		for (i = 0; i < chip_info->n_phy; i++) {
+			sha->sas_phy[j * chip_info->n_phy  + i] =
+				&mvi->phy[i].sas_phy;
+			sha->sas_port[j * chip_info->n_phy + i] =
+				&mvi->port[i].sas_port;
+		}
+	}
+
+	sha->sas_ha_name = DRV_NAME;
+	sha->dev = mvi->dev;
+	sha->lldd_module = THIS_MODULE;
+	sha->sas_addr = &mvi->sas_addr[0];
+
+	sha->num_phys = nr_core * chip_info->n_phy;
+
+	sha->lldd_max_execute_num = 1;
+
+	if (mvi->flags & MVF_FLAG_SOC)
+		can_queue = MVS_SOC_CAN_QUEUE;
+	else
+		can_queue = MVS_CAN_QUEUE;
+
+	sha->lldd_queue_size = can_queue;
+	shost->can_queue = can_queue;
+	mvi->shost->cmd_per_lun = MVS_SLOTS/sha->num_phys;
+	sha->core.shost = mvi->shost;
+}
+
+static void mvs_init_sas_add(struct mvs_info *mvi)
+{
+	u8 i;
+	for (i = 0; i < mvi->chip->n_phy; i++) {
+		mvi->phy[i].dev_sas_addr = 0x5005043011ab0000ULL;
+		mvi->phy[i].dev_sas_addr =
+			cpu_to_be64((u64)(*(u64 *)&mvi->phy[i].dev_sas_addr));
+	}
+
+	memcpy(mvi->sas_addr, &mvi->phy[0].dev_sas_addr, SAS_ADDR_SIZE);
+}
+
+static int __devinit mvs_pci_init(struct pci_dev *pdev,
+				  const struct pci_device_id *ent)
+{
+	unsigned int rc, nhost = 0;
+	struct mvs_info *mvi;
+	irq_handler_t irq_handler = mvs_interrupt;
+	struct Scsi_Host *shost = NULL;
+	const struct mvs_chip_info *chip;
+
+	dev_printk(KERN_INFO, &pdev->dev,
+		"mvsas: driver version %s\n", DRV_VERSION);
+	rc = pci_enable_device(pdev);
+	if (rc)
+		goto err_out_enable;
+
+	pci_set_master(pdev);
+
+	rc = pci_request_regions(pdev, DRV_NAME);
+	if (rc)
+		goto err_out_disable;
+
+	rc = pci_go_64(pdev);
+	if (rc)
+		goto err_out_regions;
+
+	shost = scsi_host_alloc(&mvs_sht, sizeof(void *));
+	if (!shost) {
+		rc = -ENOMEM;
+		goto err_out_regions;
+	}
+
+	chip = &mvs_chips[ent->driver_data];
+	SHOST_TO_SAS_HA(shost) =
+		kcalloc(1, sizeof(struct sas_ha_struct), GFP_KERNEL);
+	if (!SHOST_TO_SAS_HA(shost)) {
+		kfree(shost);
+		rc = -ENOMEM;
+		goto err_out_regions;
+	}
+
+	rc = mvs_prep_sas_ha_init(shost, chip);
+	if (rc) {
+		kfree(shost);
+		rc = -ENOMEM;
+		goto err_out_regions;
+	}
+
+	pci_set_drvdata(pdev, SHOST_TO_SAS_HA(shost));
+
+	do {
+		mvi = mvs_pci_alloc(pdev, ent, shost, nhost);
+		if (!mvi) {
+			rc = -ENOMEM;
+			goto err_out_regions;
+		}
+
+		mvs_init_sas_add(mvi);
+
+		mvi->instance = nhost;
+		rc = MVS_CHIP_DISP->chip_init(mvi);
+		if (rc) {
+			mvs_free(mvi);
+			goto err_out_regions;
+		}
+		nhost++;
+	} while (nhost < chip->n_host);
+
+	mvs_post_sas_ha_init(shost, chip);
+
+	rc = scsi_add_host(shost, &pdev->dev);
+	if (rc)
+		goto err_out_shost;
+
+	rc = sas_register_ha(SHOST_TO_SAS_HA(shost));
+	if (rc)
+		goto err_out_shost;
+	rc = request_irq(pdev->irq, irq_handler, IRQF_SHARED,
+		DRV_NAME, SHOST_TO_SAS_HA(shost));
+	if (rc)
+		goto err_not_sas;
+
+	MVS_CHIP_DISP->interrupt_enable(mvi);
+
+	scsi_scan_host(mvi->shost);
+
+	return 0;
+
+err_not_sas:
+	sas_unregister_ha(SHOST_TO_SAS_HA(shost));
+err_out_shost:
+	scsi_remove_host(mvi->shost);
+err_out_regions:
+	pci_release_regions(pdev);
+err_out_disable:
+	pci_disable_device(pdev);
+err_out_enable:
+	return rc;
+}
+
+static void __devexit mvs_pci_remove(struct pci_dev *pdev)
+{
+	unsigned short core_nr, i = 0;
+	struct sas_ha_struct *sha = pci_get_drvdata(pdev);
+	struct mvs_info *mvi = NULL;
+
+	core_nr = ((struct mvs_prv_info *)sha->lldd_ha)->n_host;
+	mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[0];
+
+#ifdef MVS_USE_TASKLET
+	tasklet_kill(&mv_tasklet);
+#endif
+
+	pci_set_drvdata(pdev, NULL);
+	sas_unregister_ha(sha);
+	sas_remove_host(mvi->shost);
+	scsi_remove_host(mvi->shost);
+
+	MVS_CHIP_DISP->interrupt_disable(mvi);
+	free_irq(mvi->irq, sha);
+	for (i = 0; i < core_nr; i++) {
+		mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[i];
+		mvs_free(mvi);
+	}
+	kfree(sha->sas_phy);
+	kfree(sha->sas_port);
+	kfree(sha);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	return;
+}
+
+static struct pci_device_id __devinitdata mvs_pci_table[] = {
+	{ PCI_VDEVICE(MARVELL, 0x6320), chip_6320 },
+	{ PCI_VDEVICE(MARVELL, 0x6340), chip_6440 },
+	{
+		.vendor 	= PCI_VENDOR_ID_MARVELL,
+		.device 	= 0x6440,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= 0x6480,
+		.class		= 0,
+		.class_mask	= 0,
+		.driver_data	= chip_6485,
+	},
+	{ PCI_VDEVICE(MARVELL, 0x6440), chip_6440 },
+	{ PCI_VDEVICE(MARVELL, 0x6485), chip_6485 },
+	{ PCI_VDEVICE(MARVELL, 0x9480), chip_9480 },
+	{ PCI_VDEVICE(MARVELL, 0x9180), chip_9180 },
+
+	{ }	/* terminate list */
+};
+
+static struct pci_driver mvs_pci_driver = {
+	.name		= DRV_NAME,
+	.id_table	= mvs_pci_table,
+	.probe		= mvs_pci_init,
+	.remove		= __devexit_p(mvs_pci_remove),
+};
+
+/* task handler */
+struct task_struct *mvs_th;
+static int __init mvs_init(void)
+{
+	int rc;
+	mvs_stt = sas_domain_attach_transport(&mvs_transport_ops);
+	if (!mvs_stt)
+		return -ENOMEM;
+
+	rc = pci_register_driver(&mvs_pci_driver);
+
+	if (rc)
+		goto err_out;
+
+	return 0;
+
+err_out:
+	sas_release_transport(mvs_stt);
+	return rc;
+}
+
+static void __exit mvs_exit(void)
+{
+	pci_unregister_driver(&mvs_pci_driver);
+	sas_release_transport(mvs_stt);
+}
+
+module_init(mvs_init);
+module_exit(mvs_exit);
+
+MODULE_AUTHOR("Jeff Garzik <jgarzik@pobox.com>");
+MODULE_DESCRIPTION("Marvell 88SE6440 SAS/SATA controller driver");
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE("GPL");
+#ifdef CONFIG_PCI
+MODULE_DEVICE_TABLE(pci, mvs_pci_table);
+#endif
diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
new file mode 100644
index 0000000..0d21386
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -0,0 +1,2154 @@
+/*
+ * Marvell 88SE64xx/88SE94xx main function
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
+
+#include "mv_sas.h"
+
+static int mvs_find_tag(struct mvs_info *mvi, struct sas_task *task, u32 *tag)
+{
+	if (task->lldd_task) {
+		struct mvs_slot_info *slot;
+		slot = task->lldd_task;
+		*tag = slot->slot_tag;
+		return 1;
+	}
+	return 0;
+}
+
+void mvs_tag_clear(struct mvs_info *mvi, u32 tag)
+{
+	void *bitmap = &mvi->tags;
+	clear_bit(tag, bitmap);
+}
+
+void mvs_tag_free(struct mvs_info *mvi, u32 tag)
+{
+	mvs_tag_clear(mvi, tag);
+}
+
+void mvs_tag_set(struct mvs_info *mvi, unsigned int tag)
+{
+	void *bitmap = &mvi->tags;
+	set_bit(tag, bitmap);
+}
+
+inline int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out)
+{
+	unsigned int index, tag;
+	void *bitmap = &mvi->tags;
+
+	index = find_first_zero_bit(bitmap, mvi->tags_num);
+	tag = index;
+	if (tag >= mvi->tags_num)
+		return -SAS_QUEUE_FULL;
+	mvs_tag_set(mvi, tag);
+	*tag_out = tag;
+	return 0;
+}
+
+void mvs_tag_init(struct mvs_info *mvi)
+{
+	int i;
+	for (i = 0; i < mvi->tags_num; ++i)
+		mvs_tag_clear(mvi, i);
+}
+
+void mvs_hexdump(u32 size, u8 *data, u32 baseaddr)
+{
+	u32 i;
+	u32 run;
+	u32 offset;
+
+	offset = 0;
+	while (size) {
+		printk(KERN_DEBUG"%08X : ", baseaddr + offset);
+		if (size >= 16)
+			run = 16;
+		else
+			run = size;
+		size -= run;
+		for (i = 0; i < 16; i++) {
+			if (i < run)
+				printk(KERN_DEBUG"%02X ", (u32)data[i]);
+			else
+				printk(KERN_DEBUG"   ");
+		}
+		printk(KERN_DEBUG": ");
+		for (i = 0; i < run; i++)
+			printk(KERN_DEBUG"%c",
+				isalnum(data[i]) ? data[i] : '.');
+		printk(KERN_DEBUG"\n");
+		data = &data[16];
+		offset += run;
+	}
+	printk(KERN_DEBUG"\n");
+}
+
+#if (_MV_DUMP > 1)
+static void mvs_hba_sb_dump(struct mvs_info *mvi, u32 tag,
+				   enum sas_protocol proto)
+{
+	u32 offset;
+	struct mvs_slot_info *slot = &mvi->slot_info[tag];
+
+	offset = slot->cmd_size + MVS_OAF_SZ +
+	    MVS_CHIP_DISP->prd_size() * slot->n_elem;
+	dev_printk(KERN_DEBUG, mvi->dev, "+---->Status buffer[%d] :\n",
+			tag);
+	mvs_hexdump(32, (u8 *) slot->response,
+		    (u32) slot->buf_dma + offset);
+}
+#endif
+
+static void mvs_hba_memory_dump(struct mvs_info *mvi, u32 tag,
+				enum sas_protocol proto)
+{
+#if (_MV_DUMP > 1)
+	u32 sz, w_ptr;
+	u64 addr;
+	struct mvs_slot_info *slot = &mvi->slot_info[tag];
+
+	/*Delivery Queue */
+	sz = MVS_CHIP_SLOT_SZ;
+	w_ptr = slot->tx;
+	addr = mvi->tx_dma;
+	dev_printk(KERN_DEBUG, mvi->dev,
+		"Delivery Queue Size=%04d , WRT_PTR=%04X\n", sz, w_ptr);
+	dev_printk(KERN_DEBUG, mvi->dev,
+		"Delivery Queue Base Address=0x%llX (PA)"
+		"(tx_dma=0x%llX), Entry=%04d\n",
+		addr, (unsigned long long)mvi->tx_dma, w_ptr);
+	mvs_hexdump(sizeof(u32), (u8 *)(&mvi->tx[mvi->tx_prod]),
+			(u32) mvi->tx_dma + sizeof(u32) * w_ptr);
+	/*Command List */
+	addr = mvi->slot_dma;
+	dev_printk(KERN_DEBUG, mvi->dev,
+		"Command List Base Address=0x%llX (PA)"
+		"(slot_dma=0x%llX), Header=%03d\n",
+		addr, (unsigned long long)slot->buf_dma, tag);
+	dev_printk(KERN_DEBUG, mvi->dev, "Command Header[%03d]:\n", tag);
+	/*mvs_cmd_hdr */
+	mvs_hexdump(sizeof(struct mvs_cmd_hdr), (u8 *)(&mvi->slot[tag]),
+		(u32) mvi->slot_dma + tag * sizeof(struct mvs_cmd_hdr));
+	/*1.command table area */
+	dev_printk(KERN_DEBUG, mvi->dev, "+---->Command Table :\n");
+	mvs_hexdump(slot->cmd_size, (u8 *) slot->buf, (u32) slot->buf_dma);
+	/*2.open address frame area */
+	dev_printk(KERN_DEBUG, mvi->dev, "+---->Open Address Frame :\n");
+	mvs_hexdump(MVS_OAF_SZ, (u8 *) slot->buf + slot->cmd_size,
+				(u32) slot->buf_dma + slot->cmd_size);
+	/*3.status buffer */
+	mvs_hba_sb_dump(mvi, tag, proto);
+	/*4.PRD table */
+	dev_printk(KERN_DEBUG, mvi->dev, "+---->PRD table :\n");
+	mvs_hexdump(MVS_CHIP_DISP->prd_size() * slot->n_elem,
+		(u8 *) slot->buf + slot->cmd_size + MVS_OAF_SZ,
+		(u32) slot->buf_dma + slot->cmd_size + MVS_OAF_SZ);
+#endif
+}
+
+static void mvs_hba_cq_dump(struct mvs_info *mvi)
+{
+#if (_MV_DUMP > 2)
+	u64 addr;
+	void __iomem *regs = mvi->regs;
+	u32 entry = mvi->rx_cons + 1;
+	u32 rx_desc = le32_to_cpu(mvi->rx[entry]);
+
+	/*Completion Queue */
+	addr = mr32(RX_HI) << 16 << 16 | mr32(RX_LO);
+	dev_printk(KERN_DEBUG, mvi->dev, "Completion Task = 0x%p\n",
+		   mvi->slot_info[rx_desc & RXQ_SLOT_MASK].task);
+	dev_printk(KERN_DEBUG, mvi->dev,
+		"Completion List Base Address=0x%llX (PA), "
+		"CQ_Entry=%04d, CQ_WP=0x%08X\n",
+		addr, entry - 1, mvi->rx[0]);
+	mvs_hexdump(sizeof(u32), (u8 *)(&rx_desc),
+		    mvi->rx_dma + sizeof(u32) * entry);
+#endif
+}
+
+void mvs_get_sas_addr(void *buf, u32 buflen)
+{
+	/*memcpy(buf, "\x50\x05\x04\x30\x11\xab\x64\x40", 8);*/
+}
+
+struct mvs_info *mvs_find_dev_mvi(struct domain_device *dev)
+{
+	unsigned long i = 0, j = 0, hi = 0;
+	struct sas_ha_struct *sha = dev->port->ha;
+	struct mvs_info *mvi = NULL;
+	struct asd_sas_phy *phy;
+
+	while (sha->sas_port[i]) {
+		if (sha->sas_port[i] == dev->port) {
+			phy =  container_of(sha->sas_port[i]->phy_list.next,
+				struct asd_sas_phy, port_phy_el);
+			j = 0;
+			while (sha->sas_phy[j]) {
+				if (sha->sas_phy[j] == phy)
+					break;
+				j++;
+			}
+			break;
+		}
+		i++;
+	}
+	hi = j/((struct mvs_prv_info *)sha->lldd_ha)->n_phy;
+	mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[hi];
+
+	return mvi;
+
+}
+
+/* FIXME */
+int mvs_find_dev_phyno(struct domain_device *dev, int *phyno)
+{
+	unsigned long i = 0, j = 0, n = 0, num = 0;
+	struct mvs_device *mvi_dev = (struct mvs_device *)dev->lldd_dev;
+	struct mvs_info *mvi = mvi_dev->mvi_info;
+	struct sas_ha_struct *sha = dev->port->ha;
+
+	while (sha->sas_port[i]) {
+		if (sha->sas_port[i] == dev->port) {
+			struct asd_sas_phy *phy;
+			list_for_each_entry(phy,
+				&sha->sas_port[i]->phy_list, port_phy_el) {
+				j = 0;
+				while (sha->sas_phy[j]) {
+					if (sha->sas_phy[j] == phy)
+						break;
+					j++;
+				}
+				phyno[n] = (j >= mvi->chip->n_phy) ?
+					(j - mvi->chip->n_phy) : j;
+				num++;
+				n++;
+			}
+			break;
+		}
+		i++;
+	}
+	return num;
+}
+
+static inline void mvs_free_reg_set(struct mvs_info *mvi,
+				struct mvs_device *dev)
+{
+	if (!dev) {
+		mv_printk("device has been free.\n");
+		return;
+	}
+	if (dev->runing_req != 0)
+		return;
+	if (dev->taskfileset == MVS_ID_NOT_MAPPED)
+		return;
+	MVS_CHIP_DISP->free_reg_set(mvi, &dev->taskfileset);
+}
+
+static inline u8 mvs_assign_reg_set(struct mvs_info *mvi,
+				struct mvs_device *dev)
+{
+	if (dev->taskfileset != MVS_ID_NOT_MAPPED)
+		return 0;
+	return MVS_CHIP_DISP->assign_reg_set(mvi, &dev->taskfileset);
+}
+
+void mvs_phys_reset(struct mvs_info *mvi, u32 phy_mask, int hard)
+{
+	u32 no;
+	for_each_phy(phy_mask, phy_mask, no) {
+		if (!(phy_mask & 1))
+			continue;
+		MVS_CHIP_DISP->phy_reset(mvi, no, hard);
+	}
+}
+
+/* FIXME: locking? */
+int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
+			void *funcdata)
+{
+	int rc = 0, phy_id = sas_phy->id;
+	u32 tmp, i = 0, hi;
+	struct sas_ha_struct *sha = sas_phy->ha;
+	struct mvs_info *mvi = NULL;
+
+	while (sha->sas_phy[i]) {
+		if (sha->sas_phy[i] == sas_phy)
+			break;
+		i++;
+	}
+	hi = i/((struct mvs_prv_info *)sha->lldd_ha)->n_phy;
+	mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[hi];
+
+	switch (func) {
+	case PHY_FUNC_SET_LINK_RATE:
+		MVS_CHIP_DISP->phy_set_link_rate(mvi, phy_id, funcdata);
+		break;
+
+	case PHY_FUNC_HARD_RESET:
+		tmp = MVS_CHIP_DISP->read_phy_ctl(mvi, phy_id);
+		if (tmp & PHY_RST_HARD)
+			break;
+		MVS_CHIP_DISP->phy_reset(mvi, phy_id, 1);
+		break;
+
+	case PHY_FUNC_LINK_RESET:
+		MVS_CHIP_DISP->phy_enable(mvi, phy_id);
+		MVS_CHIP_DISP->phy_reset(mvi, phy_id, 0);
+		break;
+
+	case PHY_FUNC_DISABLE:
+		MVS_CHIP_DISP->phy_disable(mvi, phy_id);
+		break;
+	case PHY_FUNC_RELEASE_SPINUP_HOLD:
+	default:
+		rc = -EOPNOTSUPP;
+	}
+	msleep(200);
+	return rc;
+}
+
+void __devinit mvs_set_sas_addr(struct mvs_info *mvi, int port_id,
+				u32 off_lo, u32 off_hi, u64 sas_addr)
+{
+	u32 lo = (u32)sas_addr;
+	u32 hi = (u32)(sas_addr>>32);
+
+	MVS_CHIP_DISP->write_port_cfg_addr(mvi, port_id, off_lo);
+	MVS_CHIP_DISP->write_port_cfg_data(mvi, port_id, lo);
+	MVS_CHIP_DISP->write_port_cfg_addr(mvi, port_id, off_hi);
+	MVS_CHIP_DISP->write_port_cfg_data(mvi, port_id, hi);
+}
+
+static void mvs_bytes_dmaed(struct mvs_info *mvi, int i)
+{
+	struct mvs_phy *phy = &mvi->phy[i];
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
+	struct sas_ha_struct *sas_ha;
+	if (!phy->phy_attached)
+		return;
+
+	if (!(phy->att_dev_info & PORT_DEV_TRGT_MASK)
+		&& phy->phy_type & PORT_TYPE_SAS) {
+		return;
+	}
+
+	sas_ha = mvi->sas;
+	sas_ha->notify_phy_event(sas_phy, PHYE_OOB_DONE);
+
+	if (sas_phy->phy) {
+		struct sas_phy *sphy = sas_phy->phy;
+
+		sphy->negotiated_linkrate = sas_phy->linkrate;
+		sphy->minimum_linkrate = phy->minimum_linkrate;
+		sphy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
+		sphy->maximum_linkrate = phy->maximum_linkrate;
+		sphy->maximum_linkrate_hw = MVS_CHIP_DISP->phy_max_link_rate();
+	}
+
+	if (phy->phy_type & PORT_TYPE_SAS) {
+		struct sas_identify_frame *id;
+
+		id = (struct sas_identify_frame *)phy->frame_rcvd;
+		id->dev_type = phy->identify.device_type;
+		id->initiator_bits = SAS_PROTOCOL_ALL;
+		id->target_bits = phy->identify.target_port_protocols;
+	} else if (phy->phy_type & PORT_TYPE_SATA) {
+		/*Nothing*/
+	}
+	mv_dprintk("phy %d byte dmaded.\n", i + mvi->id * mvi->chip->n_phy);
+
+	sas_phy->frame_rcvd_size = phy->frame_rcvd_size;
+
+	mvi->sas->notify_port_event(sas_phy,
+				   PORTE_BYTES_DMAED);
+}
+
+int mvs_slave_alloc(struct scsi_device *scsi_dev)
+{
+	struct domain_device *dev = sdev_to_domain_dev(scsi_dev);
+	if (dev_is_sata(dev)) {
+		/* We don't need to rescan targets
+		 * if REPORT_LUNS request is failed
+		 */
+		if (scsi_dev->lun > 0)
+			return -ENXIO;
+		scsi_dev->tagged_supported = 1;
+	}
+
+	return sas_slave_alloc(scsi_dev);
+}
+
+int mvs_slave_configure(struct scsi_device *sdev)
+{
+	struct domain_device *dev = sdev_to_domain_dev(sdev);
+	int ret = sas_slave_configure(sdev);
+
+	if (ret)
+		return ret;
+	if (dev_is_sata(dev)) {
+		/* may set PIO mode */
+	#if MV_DISABLE_NCQ
+		struct ata_port *ap = dev->sata_dev.ap;
+		struct ata_device *adev = ap->link.device;
+		adev->flags |= ATA_DFLAG_NCQ_OFF;
+		scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, 1);
+	#endif
+	}
+	return 0;
+}
+
+void mvs_scan_start(struct Scsi_Host *shost)
+{
+	int i, j;
+	unsigned short core_nr;
+	struct mvs_info *mvi;
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+
+	core_nr = ((struct mvs_prv_info *)sha->lldd_ha)->n_host;
+
+	for (j = 0; j < core_nr; j++) {
+		mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[j];
+		for (i = 0; i < mvi->chip->n_phy; ++i)
+			mvs_bytes_dmaed(mvi, i);
+	}
+}
+
+int mvs_scan_finished(struct Scsi_Host *shost, unsigned long time)
+{
+	/* give the phy enabling interrupt event time to come in (1s
+	 * is empirically about all it takes) */
+	if (time < HZ)
+		return 0;
+	/* Wait for discovery to finish */
+	scsi_flush_work(shost);
+	return 1;
+}
+
+static int mvs_task_prep_smp(struct mvs_info *mvi,
+			     struct mvs_task_exec_info *tei)
+{
+	int elem, rc, i;
+	struct sas_task *task = tei->task;
+	struct mvs_cmd_hdr *hdr = tei->hdr;
+	struct domain_device *dev = task->dev;
+	struct asd_sas_port *sas_port = dev->port;
+	struct scatterlist *sg_req, *sg_resp;
+	u32 req_len, resp_len, tag = tei->tag;
+	void *buf_tmp;
+	u8 *buf_oaf;
+	dma_addr_t buf_tmp_dma;
+	void *buf_prd;
+	struct mvs_slot_info *slot = &mvi->slot_info[tag];
+	u32 flags = (tei->n_elem << MCH_PRD_LEN_SHIFT);
+#if _MV_DUMP
+	u8 *buf_cmd;
+	void *from;
+#endif
+	/*
+	 * DMA-map SMP request, response buffers
+	 */
+	sg_req = &task->smp_task.smp_req;
+	elem = dma_map_sg(mvi->dev, sg_req, 1, PCI_DMA_TODEVICE);
+	if (!elem)
+		return -ENOMEM;
+	req_len = sg_dma_len(sg_req);
+
+	sg_resp = &task->smp_task.smp_resp;
+	elem = dma_map_sg(mvi->dev, sg_resp, 1, PCI_DMA_FROMDEVICE);
+	if (!elem) {
+		rc = -ENOMEM;
+		goto err_out;
+	}
+	resp_len = SB_RFB_MAX;
+
+	/* must be in dwords */
+	if ((req_len & 0x3) || (resp_len & 0x3)) {
+		rc = -EINVAL;
+		goto err_out_2;
+	}
+
+	/*
+	 * arrange MVS_SLOT_BUF_SZ-sized DMA buffer according to our needs
+	 */
+
+	/* region 1: command table area (MVS_SSP_CMD_SZ bytes) ***** */
+	buf_tmp = slot->buf;
+	buf_tmp_dma = slot->buf_dma;
+
+#if _MV_DUMP
+	buf_cmd = buf_tmp;
+	hdr->cmd_tbl = cpu_to_le64(buf_tmp_dma);
+	buf_tmp += req_len;
+	buf_tmp_dma += req_len;
+	slot->cmd_size = req_len;
+#else
+	hdr->cmd_tbl = cpu_to_le64(sg_dma_address(sg_req));
+#endif
+
+	/* region 2: open address frame area (MVS_OAF_SZ bytes) ********* */
+	buf_oaf = buf_tmp;
+	hdr->open_frame = cpu_to_le64(buf_tmp_dma);
+
+	buf_tmp += MVS_OAF_SZ;
+	buf_tmp_dma += MVS_OAF_SZ;
+
+	/* region 3: PRD table *********************************** */
+	buf_prd = buf_tmp;
+	if (tei->n_elem)
+		hdr->prd_tbl = cpu_to_le64(buf_tmp_dma);
+	else
+		hdr->prd_tbl = 0;
+
+	i = MVS_CHIP_DISP->prd_size() * tei->n_elem;
+	buf_tmp += i;
+	buf_tmp_dma += i;
+
+	/* region 4: status buffer (larger the PRD, smaller this buf) ****** */
+	slot->response = buf_tmp;
+	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
+	if (mvi->flags & MVF_FLAG_SOC)
+		hdr->reserved[0] = 0;
+
+	/*
+	 * Fill in TX ring and command slot header
+	 */
+	slot->tx = mvi->tx_prod;
+	mvi->tx[mvi->tx_prod] = cpu_to_le32((TXQ_CMD_SMP << TXQ_CMD_SHIFT) |
+					TXQ_MODE_I | tag |
+					(sas_port->phy_mask << TXQ_PHY_SHIFT));
+
+	hdr->flags |= flags;
+	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | ((req_len - 4) / 4));
+	hdr->tags = cpu_to_le32(tag);
+	hdr->data_len = 0;
+
+	/* generate open address frame hdr (first 12 bytes) */
+	/* initiator, SMP, ftype 1h */
+	buf_oaf[0] = (1 << 7) | (PROTOCOL_SMP << 4) | 0x01;
+	buf_oaf[1] = dev->linkrate & 0xf;
+	*(u16 *)(buf_oaf + 2) = 0xFFFF;		/* SAS SPEC */
+	memcpy(buf_oaf + 4, dev->sas_addr, SAS_ADDR_SIZE);
+
+	/* fill in PRD (scatter/gather) table, if any */
+	MVS_CHIP_DISP->make_prd(task->scatter, tei->n_elem, buf_prd);
+
+#if _MV_DUMP
+	/* copy cmd table */
+	from = kmap_atomic(sg_page(sg_req), KM_IRQ0);
+	memcpy(buf_cmd, from + sg_req->offset, req_len);
+	kunmap_atomic(from, KM_IRQ0);
+#endif
+	return 0;
+
+err_out_2:
+	dma_unmap_sg(mvi->dev, &tei->task->smp_task.smp_resp, 1,
+		     PCI_DMA_FROMDEVICE);
+err_out:
+	dma_unmap_sg(mvi->dev, &tei->task->smp_task.smp_req, 1,
+		     PCI_DMA_TODEVICE);
+	return rc;
+}
+
+static u32 mvs_get_ncq_tag(struct sas_task *task, u32 *tag)
+{
+	struct ata_queued_cmd *qc = task->uldd_task;
+
+	if (qc) {
+		if (qc->tf.command == ATA_CMD_FPDMA_WRITE ||
+			qc->tf.command == ATA_CMD_FPDMA_READ) {
+			*tag = qc->tag;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int mvs_task_prep_ata(struct mvs_info *mvi,
+			     struct mvs_task_exec_info *tei)
+{
+	struct sas_task *task = tei->task;
+	struct domain_device *dev = task->dev;
+	struct mvs_device *mvi_dev = dev->lldd_dev;
+	struct mvs_cmd_hdr *hdr = tei->hdr;
+	struct asd_sas_port *sas_port = dev->port;
+	struct mvs_slot_info *slot;
+	void *buf_prd;
+	u32 tag = tei->tag, hdr_tag;
+	u32 flags, del_q;
+	void *buf_tmp;
+	u8 *buf_cmd, *buf_oaf;
+	dma_addr_t buf_tmp_dma;
+	u32 i, req_len, resp_len;
+	const u32 max_resp_len = SB_RFB_MAX;
+
+	if (mvs_assign_reg_set(mvi, mvi_dev) == MVS_ID_NOT_MAPPED) {
+		mv_dprintk("Have not enough regiset for dev %d.\n",
+			mvi_dev->device_id);
+		return -EBUSY;
+	}
+	slot = &mvi->slot_info[tag];
+	slot->tx = mvi->tx_prod;
+	del_q = TXQ_MODE_I | tag |
+		(TXQ_CMD_STP << TXQ_CMD_SHIFT) |
+		(sas_port->phy_mask << TXQ_PHY_SHIFT) |
+		(mvi_dev->taskfileset << TXQ_SRS_SHIFT);
+	mvi->tx[mvi->tx_prod] = cpu_to_le32(del_q);
+
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+	if (task->data_dir == DMA_FROM_DEVICE)
+		flags = (MVS_CHIP_DISP->prd_count() << MCH_PRD_LEN_SHIFT);
+	else
+		flags = (tei->n_elem << MCH_PRD_LEN_SHIFT);
+#else
+	flags = (tei->n_elem << MCH_PRD_LEN_SHIFT);
+#endif
+	if (task->ata_task.use_ncq)
+		flags |= MCH_FPDMA;
+	if (dev->sata_dev.command_set == ATAPI_COMMAND_SET) {
+		if (task->ata_task.fis.command != ATA_CMD_ID_ATAPI)
+			flags |= MCH_ATAPI;
+	}
+
+	/* FIXME: fill in port multiplier number */
+
+	hdr->flags = cpu_to_le32(flags);
+
+	/* FIXME: the low order order 5 bits for the TAG if enable NCQ */
+	if (task->ata_task.use_ncq && mvs_get_ncq_tag(task, &hdr_tag))
+		task->ata_task.fis.sector_count |= (u8) (hdr_tag << 3);
+	else
+		hdr_tag = tag;
+
+	hdr->tags = cpu_to_le32(hdr_tag);
+
+	hdr->data_len = cpu_to_le32(task->total_xfer_len);
+
+	/*
+	 * arrange MVS_SLOT_BUF_SZ-sized DMA buffer according to our needs
+	 */
+
+	/* region 1: command table area (MVS_ATA_CMD_SZ bytes) ************** */
+	buf_cmd = buf_tmp = slot->buf;
+	buf_tmp_dma = slot->buf_dma;
+
+	hdr->cmd_tbl = cpu_to_le64(buf_tmp_dma);
+
+	buf_tmp += MVS_ATA_CMD_SZ;
+	buf_tmp_dma += MVS_ATA_CMD_SZ;
+#if _MV_DUMP
+	slot->cmd_size = MVS_ATA_CMD_SZ;
+#endif
+
+	/* region 2: open address frame area (MVS_OAF_SZ bytes) ********* */
+	/* used for STP.  unused for SATA? */
+	buf_oaf = buf_tmp;
+	hdr->open_frame = cpu_to_le64(buf_tmp_dma);
+
+	buf_tmp += MVS_OAF_SZ;
+	buf_tmp_dma += MVS_OAF_SZ;
+
+	/* region 3: PRD table ********************************************* */
+	buf_prd = buf_tmp;
+
+	if (tei->n_elem)
+		hdr->prd_tbl = cpu_to_le64(buf_tmp_dma);
+	else
+		hdr->prd_tbl = 0;
+	i = MVS_CHIP_DISP->prd_size() * MVS_CHIP_DISP->prd_count();
+
+	buf_tmp += i;
+	buf_tmp_dma += i;
+
+	/* region 4: status buffer (larger the PRD, smaller this buf) ****** */
+	/* FIXME: probably unused, for SATA.  kept here just in case
+	 * we get a STP/SATA error information record
+	 */
+	slot->response = buf_tmp;
+	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
+	if (mvi->flags & MVF_FLAG_SOC)
+		hdr->reserved[0] = 0;
+
+	req_len = sizeof(struct host_to_dev_fis);
+	resp_len = MVS_SLOT_BUF_SZ - MVS_ATA_CMD_SZ -
+	    sizeof(struct mvs_err_info) - i;
+
+	/* request, response lengths */
+	resp_len = min(resp_len, max_resp_len);
+	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | (req_len / 4));
+
+	if (likely(!task->ata_task.device_control_reg_update))
+		task->ata_task.fis.flags |= 0x80; /* C=1: update ATA cmd reg */
+	/* fill in command FIS and ATAPI CDB */
+	memcpy(buf_cmd, &task->ata_task.fis, sizeof(struct host_to_dev_fis));
+	if (dev->sata_dev.command_set == ATAPI_COMMAND_SET)
+		memcpy(buf_cmd + STP_ATAPI_CMD,
+			task->ata_task.atapi_packet, 16);
+
+	/* generate open address frame hdr (first 12 bytes) */
+	/* initiator, STP, ftype 1h */
+	buf_oaf[0] = (1 << 7) | (PROTOCOL_STP << 4) | 0x1;
+	buf_oaf[1] = dev->linkrate & 0xf;
+	*(u16 *)(buf_oaf + 2) = cpu_to_be16(mvi_dev->device_id + 1);
+	memcpy(buf_oaf + 4, dev->sas_addr, SAS_ADDR_SIZE);
+
+	/* fill in PRD (scatter/gather) table, if any */
+	MVS_CHIP_DISP->make_prd(task->scatter, tei->n_elem, buf_prd);
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+	if (task->data_dir == DMA_FROM_DEVICE)
+		MVS_CHIP_DISP->dma_fix(mvi->bulk_buffer_dma,
+				TRASH_BUCKET_SIZE, tei->n_elem, buf_prd);
+#endif
+	return 0;
+}
+
+static int mvs_task_prep_ssp(struct mvs_info *mvi,
+			     struct mvs_task_exec_info *tei, int is_tmf,
+			     struct mvs_tmf_task *tmf)
+{
+	struct sas_task *task = tei->task;
+	struct mvs_cmd_hdr *hdr = tei->hdr;
+	struct mvs_port *port = tei->port;
+	struct domain_device *dev = task->dev;
+	struct mvs_device *mvi_dev = dev->lldd_dev;
+	struct asd_sas_port *sas_port = dev->port;
+	struct mvs_slot_info *slot;
+	void *buf_prd;
+	struct ssp_frame_hdr *ssp_hdr;
+	void *buf_tmp;
+	u8 *buf_cmd, *buf_oaf, fburst = 0;
+	dma_addr_t buf_tmp_dma;
+	u32 flags;
+	u32 resp_len, req_len, i, tag = tei->tag;
+	const u32 max_resp_len = SB_RFB_MAX;
+	u32 phy_mask;
+
+	slot = &mvi->slot_info[tag];
+
+	phy_mask = ((port->wide_port_phymap) ? port->wide_port_phymap :
+		sas_port->phy_mask) & TXQ_PHY_MASK;
+
+	slot->tx = mvi->tx_prod;
+	mvi->tx[mvi->tx_prod] = cpu_to_le32(TXQ_MODE_I | tag |
+				(TXQ_CMD_SSP << TXQ_CMD_SHIFT) |
+				(phy_mask << TXQ_PHY_SHIFT));
+
+	flags = MCH_RETRY;
+	if (task->ssp_task.enable_first_burst) {
+		flags |= MCH_FBURST;
+		fburst = (1 << 7);
+	}
+	if (is_tmf)
+		flags |= (MCH_SSP_FR_TASK << MCH_SSP_FR_TYPE_SHIFT);
+	else
+		flags |= (MCH_SSP_FR_CMD << MCH_SSP_FR_TYPE_SHIFT);
+	hdr->flags = cpu_to_le32(flags | (tei->n_elem << MCH_PRD_LEN_SHIFT));
+	hdr->tags = cpu_to_le32(tag);
+	hdr->data_len = cpu_to_le32(task->total_xfer_len);
+
+	/*
+	 * arrange MVS_SLOT_BUF_SZ-sized DMA buffer according to our needs
+	 */
+
+	/* region 1: command table area (MVS_SSP_CMD_SZ bytes) ************** */
+	buf_cmd = buf_tmp = slot->buf;
+	buf_tmp_dma = slot->buf_dma;
+
+	hdr->cmd_tbl = cpu_to_le64(buf_tmp_dma);
+
+	buf_tmp += MVS_SSP_CMD_SZ;
+	buf_tmp_dma += MVS_SSP_CMD_SZ;
+#if _MV_DUMP
+	slot->cmd_size = MVS_SSP_CMD_SZ;
+#endif
+
+	/* region 2: open address frame area (MVS_OAF_SZ bytes) ********* */
+	buf_oaf = buf_tmp;
+	hdr->open_frame = cpu_to_le64(buf_tmp_dma);
+
+	buf_tmp += MVS_OAF_SZ;
+	buf_tmp_dma += MVS_OAF_SZ;
+
+	/* region 3: PRD table ********************************************* */
+	buf_prd = buf_tmp;
+	if (tei->n_elem)
+		hdr->prd_tbl = cpu_to_le64(buf_tmp_dma);
+	else
+		hdr->prd_tbl = 0;
+
+	i = MVS_CHIP_DISP->prd_size() * tei->n_elem;
+	buf_tmp += i;
+	buf_tmp_dma += i;
+
+	/* region 4: status buffer (larger the PRD, smaller this buf) ****** */
+	slot->response = buf_tmp;
+	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
+	if (mvi->flags & MVF_FLAG_SOC)
+		hdr->reserved[0] = 0;
+
+	resp_len = MVS_SLOT_BUF_SZ - MVS_SSP_CMD_SZ - MVS_OAF_SZ -
+	    sizeof(struct mvs_err_info) - i;
+	resp_len = min(resp_len, max_resp_len);
+
+	req_len = sizeof(struct ssp_frame_hdr) + 28;
+
+	/* request, response lengths */
+	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | (req_len / 4));
+
+	/* generate open address frame hdr (first 12 bytes) */
+	/* initiator, SSP, ftype 1h */
+	buf_oaf[0] = (1 << 7) | (PROTOCOL_SSP << 4) | 0x1;
+	buf_oaf[1] = dev->linkrate & 0xf;
+	*(u16 *)(buf_oaf + 2) = cpu_to_be16(mvi_dev->device_id + 1);
+	memcpy(buf_oaf + 4, dev->sas_addr, SAS_ADDR_SIZE);
+
+	/* fill in SSP frame header (Command Table.SSP frame header) */
+	ssp_hdr = (struct ssp_frame_hdr *)buf_cmd;
+
+	if (is_tmf)
+		ssp_hdr->frame_type = SSP_TASK;
+	else
+		ssp_hdr->frame_type = SSP_COMMAND;
+
+	memcpy(ssp_hdr->hashed_dest_addr, dev->hashed_sas_addr,
+	       HASHED_SAS_ADDR_SIZE);
+	memcpy(ssp_hdr->hashed_src_addr,
+	       dev->hashed_sas_addr, HASHED_SAS_ADDR_SIZE);
+	ssp_hdr->tag = cpu_to_be16(tag);
+
+	/* fill in IU for TASK and Command Frame */
+	buf_cmd += sizeof(*ssp_hdr);
+	memcpy(buf_cmd, &task->ssp_task.LUN, 8);
+
+	if (ssp_hdr->frame_type != SSP_TASK) {
+		buf_cmd[9] = fburst | task->ssp_task.task_attr |
+				(task->ssp_task.task_prio << 3);
+		memcpy(buf_cmd + 12, &task->ssp_task.cdb, 16);
+	} else{
+		buf_cmd[10] = tmf->tmf;
+		switch (tmf->tmf) {
+		case TMF_ABORT_TASK:
+		case TMF_QUERY_TASK:
+			buf_cmd[12] =
+				(tmf->tag_of_task_to_be_managed >> 8) & 0xff;
+			buf_cmd[13] =
+				tmf->tag_of_task_to_be_managed & 0xff;
+			break;
+		default:
+			break;
+		}
+	}
+	/* fill in PRD (scatter/gather) table, if any */
+	MVS_CHIP_DISP->make_prd(task->scatter, tei->n_elem, buf_prd);
+	return 0;
+}
+
+#define	DEV_IS_GONE(mvi_dev)	((!mvi_dev || (mvi_dev->dev_type == NO_DEVICE)))
+static int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags,
+				struct completion *completion,int is_tmf,
+				struct mvs_tmf_task *tmf)
+{
+	struct domain_device *dev = task->dev;
+	struct mvs_device *mvi_dev = (struct mvs_device *)dev->lldd_dev;
+	struct mvs_info *mvi = mvi_dev->mvi_info;
+	struct mvs_task_exec_info tei;
+	struct sas_task *t = task;
+	struct mvs_slot_info *slot;
+	u32 tag = 0xdeadbeef, rc, n_elem = 0;
+	u32 n = num, pass = 0;
+	unsigned long flags = 0;
+
+	if (!dev->port) {
+		struct task_status_struct *tsm = &t->task_status;
+
+		tsm->resp = SAS_TASK_UNDELIVERED;
+		tsm->stat = SAS_PHY_DOWN;
+		t->task_done(t);
+		return 0;
+	}
+
+	spin_lock_irqsave(&mvi->lock, flags);
+	do {
+		dev = t->dev;
+		mvi_dev = dev->lldd_dev;
+		if (DEV_IS_GONE(mvi_dev)) {
+			if (mvi_dev)
+				mv_dprintk("device %d not ready.\n",
+					mvi_dev->device_id);
+			else
+				mv_dprintk("device %016llx not ready.\n",
+					SAS_ADDR(dev->sas_addr));
+
+			rc = SAS_PHY_DOWN;
+			goto out_done;
+		}
+
+		if (dev->port->id >= mvi->chip->n_phy)
+			tei.port = &mvi->port[dev->port->id - mvi->chip->n_phy];
+		else
+			tei.port = &mvi->port[dev->port->id];
+
+		if (!tei.port->port_attached) {
+			if (sas_protocol_ata(t->task_proto)) {
+				mv_dprintk("port %d does not"
+					"attached device.\n", dev->port->id);
+				rc = SAS_PHY_DOWN;
+				goto out_done;
+			} else {
+				struct task_status_struct *ts = &t->task_status;
+				ts->resp = SAS_TASK_UNDELIVERED;
+				ts->stat = SAS_PHY_DOWN;
+				t->task_done(t);
+				if (n > 1)
+					t = list_entry(t->list.next,
+							struct sas_task, list);
+				continue;
+			}
+		}
+
+		if (!sas_protocol_ata(t->task_proto)) {
+			if (t->num_scatter) {
+				n_elem = dma_map_sg(mvi->dev,
+						    t->scatter,
+						    t->num_scatter,
+						    t->data_dir);
+				if (!n_elem) {
+					rc = -ENOMEM;
+					goto err_out;
+				}
+			}
+		} else {
+			n_elem = t->num_scatter;
+		}
+
+		rc = mvs_tag_alloc(mvi, &tag);
+		if (rc)
+			goto err_out;
+
+		slot = &mvi->slot_info[tag];
+
+
+		t->lldd_task = NULL;
+		slot->n_elem = n_elem;
+		slot->slot_tag = tag;
+		memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
+
+		tei.task = t;
+		tei.hdr = &mvi->slot[tag];
+		tei.tag = tag;
+		tei.n_elem = n_elem;
+		switch (t->task_proto) {
+		case SAS_PROTOCOL_SMP:
+			rc = mvs_task_prep_smp(mvi, &tei);
+			break;
+		case SAS_PROTOCOL_SSP:
+			rc = mvs_task_prep_ssp(mvi, &tei, is_tmf, tmf);
+			break;
+		case SAS_PROTOCOL_SATA:
+		case SAS_PROTOCOL_STP:
+		case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
+			rc = mvs_task_prep_ata(mvi, &tei);
+			break;
+		default:
+			dev_printk(KERN_ERR, mvi->dev,
+				"unknown sas_task proto: 0x%x\n",
+				t->task_proto);
+			rc = -EINVAL;
+			break;
+		}
+
+		if (rc) {
+			mv_dprintk("rc is %x\n", rc);
+			goto err_out_tag;
+		}
+		slot->task = t;
+		slot->port = tei.port;
+		t->lldd_task = slot;
+		list_add_tail(&slot->entry, &tei.port->list);
+		/* TODO: select normal or high priority */
+		spin_lock(&t->task_state_lock);
+		t->task_state_flags |= SAS_TASK_AT_INITIATOR;
+		spin_unlock(&t->task_state_lock);
+
+		mvs_hba_memory_dump(mvi, tag, t->task_proto);
+		mvi_dev->runing_req++;
+		++pass;
+		mvi->tx_prod = (mvi->tx_prod + 1) & (MVS_CHIP_SLOT_SZ - 1);
+		if (n > 1)
+			t = list_entry(t->list.next, struct sas_task, list);
+	} while (--n);
+	rc = 0;
+	goto out_done;
+
+err_out_tag:
+	mvs_tag_free(mvi, tag);
+err_out:
+
+	dev_printk(KERN_ERR, mvi->dev, "mvsas exec failed[%d]!\n", rc);
+	if (!sas_protocol_ata(t->task_proto))
+		if (n_elem)
+			dma_unmap_sg(mvi->dev, t->scatter, n_elem,
+				     t->data_dir);
+out_done:
+	if (likely(pass)) {
+		MVS_CHIP_DISP->start_delivery(mvi,
+			(mvi->tx_prod - 1) & (MVS_CHIP_SLOT_SZ - 1));
+	}
+	spin_unlock_irqrestore(&mvi->lock, flags);
+	return rc;
+}
+
+int mvs_queue_command(struct sas_task *task, const int num,
+			gfp_t gfp_flags)
+{
+	return mvs_task_exec(task, num, gfp_flags, NULL, 0, NULL);
+}
+
+static void mvs_slot_free(struct mvs_info *mvi, u32 rx_desc)
+{
+	u32 slot_idx = rx_desc & RXQ_SLOT_MASK;
+	mvs_tag_clear(mvi, slot_idx);
+}
+
+static void mvs_slot_task_free(struct mvs_info *mvi, struct sas_task *task,
+			  struct mvs_slot_info *slot, u32 slot_idx)
+{
+	if (!slot->task)
+		return;
+	if (!sas_protocol_ata(task->task_proto))
+		if (slot->n_elem)
+			dma_unmap_sg(mvi->dev, task->scatter,
+				     slot->n_elem, task->data_dir);
+
+	switch (task->task_proto) {
+	case SAS_PROTOCOL_SMP:
+		dma_unmap_sg(mvi->dev, &task->smp_task.smp_resp, 1,
+			     PCI_DMA_FROMDEVICE);
+		dma_unmap_sg(mvi->dev, &task->smp_task.smp_req, 1,
+			     PCI_DMA_TODEVICE);
+		break;
+
+	case SAS_PROTOCOL_SATA:
+	case SAS_PROTOCOL_STP:
+	case SAS_PROTOCOL_SSP:
+	default:
+		/* do nothing */
+		break;
+	}
+	list_del_init(&slot->entry);
+	task->lldd_task = NULL;
+	slot->task = NULL;
+	slot->port = NULL;
+	slot->slot_tag = 0xFFFFFFFF;
+	mvs_slot_free(mvi, slot_idx);
+}
+
+static void mvs_update_wideport(struct mvs_info *mvi, int i)
+{
+	struct mvs_phy *phy = &mvi->phy[i];
+	struct mvs_port *port = phy->port;
+	int j, no;
+
+	for_each_phy(port->wide_port_phymap, j, no) {
+		if (j & 1) {
+			MVS_CHIP_DISP->write_port_cfg_addr(mvi, no,
+						PHYR_WIDE_PORT);
+			MVS_CHIP_DISP->write_port_cfg_data(mvi, no,
+						port->wide_port_phymap);
+		} else {
+			MVS_CHIP_DISP->write_port_cfg_addr(mvi, no,
+						PHYR_WIDE_PORT);
+			MVS_CHIP_DISP->write_port_cfg_data(mvi, no,
+						0);
+		}
+	}
+}
+
+static u32 mvs_is_phy_ready(struct mvs_info *mvi, int i)
+{
+	u32 tmp;
+	struct mvs_phy *phy = &mvi->phy[i];
+	struct mvs_port *port = phy->port;
+
+	tmp = MVS_CHIP_DISP->read_phy_ctl(mvi, i);
+	if ((tmp & PHY_READY_MASK) && !(phy->irq_status & PHYEV_POOF)) {
+		if (!port)
+			phy->phy_attached = 1;
+		return tmp;
+	}
+
+	if (port) {
+		if (phy->phy_type & PORT_TYPE_SAS) {
+			port->wide_port_phymap &= ~(1U << i);
+			if (!port->wide_port_phymap)
+				port->port_attached = 0;
+			mvs_update_wideport(mvi, i);
+		} else if (phy->phy_type & PORT_TYPE_SATA)
+			port->port_attached = 0;
+		phy->port = NULL;
+		phy->phy_attached = 0;
+		phy->phy_type &= ~(PORT_TYPE_SAS | PORT_TYPE_SATA);
+	}
+	return 0;
+}
+
+static void *mvs_get_d2h_reg(struct mvs_info *mvi, int i, void *buf)
+{
+	u32 *s = (u32 *) buf;
+
+	if (!s)
+		return NULL;
+
+	MVS_CHIP_DISP->write_port_cfg_addr(mvi, i, PHYR_SATA_SIG3);
+	s[3] = MVS_CHIP_DISP->read_port_cfg_data(mvi, i);
+
+	MVS_CHIP_DISP->write_port_cfg_addr(mvi, i, PHYR_SATA_SIG2);
+	s[2] = MVS_CHIP_DISP->read_port_cfg_data(mvi, i);
+
+	MVS_CHIP_DISP->write_port_cfg_addr(mvi, i, PHYR_SATA_SIG1);
+	s[1] = MVS_CHIP_DISP->read_port_cfg_data(mvi, i);
+
+	MVS_CHIP_DISP->write_port_cfg_addr(mvi, i, PHYR_SATA_SIG0);
+	s[0] = MVS_CHIP_DISP->read_port_cfg_data(mvi, i);
+
+	/* Workaround: take some ATAPI devices for ATA */
+	if (((s[1] & 0x00FFFFFF) == 0x00EB1401) && (*(u8 *)&s[3] == 0x01))
+		s[1] = 0x00EB1401 | (*((u8 *)&s[1] + 3) & 0x10);
+
+	return s;
+}
+
+static u32 mvs_is_sig_fis_received(u32 irq_status)
+{
+	return irq_status & PHYEV_SIG_FIS;
+}
+
+void mvs_update_phyinfo(struct mvs_info *mvi, int i, int get_st)
+{
+	struct mvs_phy *phy = &mvi->phy[i];
+	struct sas_identify_frame *id;
+
+	id = (struct sas_identify_frame *)phy->frame_rcvd;
+
+	if (get_st) {
+		phy->irq_status = MVS_CHIP_DISP->read_port_irq_stat(mvi, i);
+		phy->phy_status = mvs_is_phy_ready(mvi, i);
+	}
+
+	if (phy->phy_status) {
+		int oob_done = 0;
+		struct asd_sas_phy *sas_phy = &mvi->phy[i].sas_phy;
+
+		oob_done = MVS_CHIP_DISP->oob_done(mvi, i);
+
+		MVS_CHIP_DISP->fix_phy_info(mvi, i, id);
+		if (phy->phy_type & PORT_TYPE_SATA) {
+			phy->identify.target_port_protocols = SAS_PROTOCOL_STP;
+			if (mvs_is_sig_fis_received(phy->irq_status)) {
+				phy->phy_attached = 1;
+				phy->att_dev_sas_addr =
+					i + mvi->id * mvi->chip->n_phy;
+				if (oob_done)
+					sas_phy->oob_mode = SATA_OOB_MODE;
+				phy->frame_rcvd_size =
+				    sizeof(struct dev_to_host_fis);
+				mvs_get_d2h_reg(mvi, i, id);
+			} else {
+				u32 tmp;
+				dev_printk(KERN_DEBUG, mvi->dev,
+					"Phy%d : No sig fis\n", i);
+				tmp = MVS_CHIP_DISP->read_port_irq_mask(mvi, i);
+				MVS_CHIP_DISP->write_port_irq_mask(mvi, i,
+						tmp | PHYEV_SIG_FIS);
+				phy->phy_attached = 0;
+				phy->phy_type &= ~PORT_TYPE_SATA;
+				MVS_CHIP_DISP->phy_reset(mvi, i, 0);
+				goto out_done;
+			}
+		}		else if (phy->phy_type & PORT_TYPE_SAS
+			|| phy->att_dev_info & PORT_SSP_INIT_MASK) {
+			phy->phy_attached = 1;
+			phy->identify.device_type =
+				phy->att_dev_info & PORT_DEV_TYPE_MASK;
+
+			if (phy->identify.device_type == SAS_END_DEV)
+				phy->identify.target_port_protocols =
+							SAS_PROTOCOL_SSP;
+			else if (phy->identify.device_type != NO_DEVICE)
+				phy->identify.target_port_protocols =
+							SAS_PROTOCOL_SMP;
+			if (oob_done)
+				sas_phy->oob_mode = SAS_OOB_MODE;
+			phy->frame_rcvd_size =
+			    sizeof(struct sas_identify_frame);
+		}
+		memcpy(sas_phy->attached_sas_addr,
+			&phy->att_dev_sas_addr, SAS_ADDR_SIZE);
+
+		if (MVS_CHIP_DISP->phy_work_around)
+			MVS_CHIP_DISP->phy_work_around(mvi, i);
+	}
+	mv_dprintk("port %d attach dev info is %x\n",
+		i + mvi->id * mvi->chip->n_phy, phy->att_dev_info);
+	mv_dprintk("port %d attach sas addr is %llx\n",
+		i + mvi->id * mvi->chip->n_phy, phy->att_dev_sas_addr);
+out_done:
+	if (get_st)
+		MVS_CHIP_DISP->write_port_irq_stat(mvi, i, phy->irq_status);
+}
+
+static void mvs_port_notify_formed(struct asd_sas_phy *sas_phy, int lock)
+{
+	struct sas_ha_struct *sas_ha = sas_phy->ha;
+	struct mvs_info *mvi = NULL; int i = 0, hi;
+	struct mvs_phy *phy = sas_phy->lldd_phy;
+	struct asd_sas_port *sas_port = sas_phy->port;
+	struct mvs_port *port;
+	unsigned long flags = 0;
+	if (!sas_port)
+		return;
+
+	while (sas_ha->sas_phy[i]) {
+		if (sas_ha->sas_phy[i] == sas_phy)
+			break;
+		i++;
+	}
+	hi = i/((struct mvs_prv_info *)sas_ha->lldd_ha)->n_phy;
+	mvi = ((struct mvs_prv_info *)sas_ha->lldd_ha)->mvi[hi];
+	if (sas_port->id >= mvi->chip->n_phy)
+		port = &mvi->port[sas_port->id - mvi->chip->n_phy];
+	else
+		port = &mvi->port[sas_port->id];
+	if (lock)
+		spin_lock_irqsave(&mvi->lock, flags);
+	port->port_attached = 1;
+	phy->port = port;
+	if (phy->phy_type & PORT_TYPE_SAS) {
+		port->wide_port_phymap = sas_port->phy_mask;
+		mv_printk("set wide port phy map %x\n", sas_port->phy_mask);
+		mvs_update_wideport(mvi, sas_phy->id);
+	}
+	if (lock)
+		spin_unlock_irqrestore(&mvi->lock, flags);
+}
+
+static void mvs_port_notify_deformed(struct asd_sas_phy *sas_phy, int lock)
+{
+	/*Nothing*/
+}
+
+
+void mvs_port_formed(struct asd_sas_phy *sas_phy)
+{
+	mvs_port_notify_formed(sas_phy, 1);
+}
+
+void mvs_port_deformed(struct asd_sas_phy *sas_phy)
+{
+	mvs_port_notify_deformed(sas_phy, 1);
+}
+
+struct mvs_device *mvs_alloc_dev(struct mvs_info *mvi)
+{
+	u32 dev;
+	for (dev = 0; dev < MVS_MAX_DEVICES; dev++) {
+		if (mvi->devices[dev].dev_type == NO_DEVICE) {
+			mvi->devices[dev].device_id = dev;
+			return &mvi->devices[dev];
+		}
+	}
+
+	if (dev == MVS_MAX_DEVICES)
+		mv_printk("max support %d devices, ignore ..\n",
+			MVS_MAX_DEVICES);
+
+	return NULL;
+}
+
+void mvs_free_dev(struct mvs_device *mvi_dev)
+{
+	u32 id = mvi_dev->device_id;
+	memset(mvi_dev, 0, sizeof(*mvi_dev));
+	mvi_dev->device_id = id;
+	mvi_dev->dev_type = NO_DEVICE;
+	mvi_dev->dev_status = MVS_DEV_NORMAL;
+	mvi_dev->taskfileset = MVS_ID_NOT_MAPPED;
+}
+
+int mvs_dev_found_notify(struct domain_device *dev, int lock)
+{
+	unsigned long flags = 0;
+	int res = 0;
+	struct mvs_info *mvi = NULL;
+	struct domain_device *parent_dev = dev->parent;
+	struct mvs_device *mvi_device;
+
+	mvi = mvs_find_dev_mvi(dev);
+
+	if (lock)
+		spin_lock_irqsave(&mvi->lock, flags);
+
+	mvi_device = mvs_alloc_dev(mvi);
+	if (!mvi_device) {
+		res = -1;
+		goto found_out;
+	}
+	dev->lldd_dev = mvi_device;
+	mvi_device->dev_type = dev->dev_type;
+	mvi_device->mvi_info = mvi;
+	if (parent_dev && DEV_IS_EXPANDER(parent_dev->dev_type)) {
+		int phy_id;
+		u8 phy_num = parent_dev->ex_dev.num_phys;
+		struct ex_phy *phy;
+		for (phy_id = 0; phy_id < phy_num; phy_id++) {
+			phy = &parent_dev->ex_dev.ex_phy[phy_id];
+			if (SAS_ADDR(phy->attached_sas_addr) ==
+				SAS_ADDR(dev->sas_addr)) {
+				mvi_device->attached_phy = phy_id;
+				break;
+			}
+		}
+
+		if (phy_id == phy_num) {
+			mv_printk("Error: no attached dev:%016llx"
+				"at ex:%016llx.\n",
+				SAS_ADDR(dev->sas_addr),
+				SAS_ADDR(parent_dev->sas_addr));
+			res = -1;
+		}
+	}
+
+found_out:
+	if (lock)
+		spin_unlock_irqrestore(&mvi->lock, flags);
+	return res;
+}
+
+int mvs_dev_found(struct domain_device *dev)
+{
+	return mvs_dev_found_notify(dev, 1);
+}
+
+void mvs_dev_gone_notify(struct domain_device *dev, int lock)
+{
+	unsigned long flags = 0;
+	struct mvs_device *mvi_dev = dev->lldd_dev;
+	struct mvs_info *mvi = mvi_dev->mvi_info;
+
+	if (lock)
+		spin_lock_irqsave(&mvi->lock, flags);
+
+	if (mvi_dev) {
+		mv_dprintk("found dev[%d:%x] is gone.\n",
+			mvi_dev->device_id, mvi_dev->dev_type);
+		mvs_free_reg_set(mvi, mvi_dev);
+		mvs_free_dev(mvi_dev);
+	} else {
+		mv_dprintk("found dev has gone.\n");
+	}
+	dev->lldd_dev = NULL;
+
+	if (lock)
+		spin_unlock_irqrestore(&mvi->lock, flags);
+}
+
+
+void mvs_dev_gone(struct domain_device *dev)
+{
+	mvs_dev_gone_notify(dev, 1);
+}
+
+static  struct sas_task *mvs_alloc_task(void)
+{
+	struct sas_task *task = kzalloc(sizeof(struct sas_task), GFP_KERNEL);
+
+	if (task) {
+		INIT_LIST_HEAD(&task->list);
+		spin_lock_init(&task->task_state_lock);
+		task->task_state_flags = SAS_TASK_STATE_PENDING;
+		init_timer(&task->timer);
+		init_completion(&task->completion);
+	}
+	return task;
+}
+
+static  void mvs_free_task(struct sas_task *task)
+{
+	if (task) {
+		BUG_ON(!list_empty(&task->list));
+		kfree(task);
+	}
+}
+
+static void mvs_task_done(struct sas_task *task)
+{
+	if (!del_timer(&task->timer))
+		return;
+	complete(&task->completion);
+}
+
+static void mvs_tmf_timedout(unsigned long data)
+{
+	struct sas_task *task = (struct sas_task *)data;
+
+	task->task_state_flags |= SAS_TASK_STATE_ABORTED;
+	complete(&task->completion);
+}
+
+/* XXX */
+#define MVS_TASK_TIMEOUT 20
+static int mvs_exec_internal_tmf_task(struct domain_device *dev,
+			void *parameter, u32 para_len, struct mvs_tmf_task *tmf)
+{
+	int res, retry;
+	struct sas_task *task = NULL;
+
+	for (retry = 0; retry < 3; retry++) {
+		task = mvs_alloc_task();
+		if (!task)
+			return -ENOMEM;
+
+		task->dev = dev;
+		task->task_proto = dev->tproto;
+
+		memcpy(&task->ssp_task, parameter, para_len);
+		task->task_done = mvs_task_done;
+
+		task->timer.data = (unsigned long) task;
+		task->timer.function = mvs_tmf_timedout;
+		task->timer.expires = jiffies + MVS_TASK_TIMEOUT*HZ;
+		add_timer(&task->timer);
+
+		res = mvs_task_exec(task, 1, GFP_KERNEL, NULL, 1, tmf);
+
+		if (res) {
+			del_timer(&task->timer);
+			mv_printk("executing internel task failed:%d\n", res);
+			goto ex_err;
+		}
+
+		wait_for_completion(&task->completion);
+		res = -TMF_RESP_FUNC_FAILED;
+		/* Even TMF timed out, return direct. */
+		if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
+			if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
+				mv_printk("TMF task[%x] timeout.\n", tmf->tmf);
+				goto ex_err;
+			}
+		}
+
+		if (task->task_status.resp == SAS_TASK_COMPLETE &&
+		    task->task_status.stat == SAM_GOOD) {
+			res = TMF_RESP_FUNC_COMPLETE;
+			break;
+		}
+
+		if (task->task_status.resp == SAS_TASK_COMPLETE &&
+		      task->task_status.stat == SAS_DATA_UNDERRUN) {
+			/* no error, but return the number of bytes of
+			 * underrun */
+			res = task->task_status.residual;
+			break;
+		}
+
+		if (task->task_status.resp == SAS_TASK_COMPLETE &&
+		      task->task_status.stat == SAS_DATA_OVERRUN) {
+			mv_dprintk("blocked task error.\n");
+			res = -EMSGSIZE;
+			break;
+		} else {
+			mv_dprintk(" task to dev %016llx response: 0x%x "
+				    "status 0x%x\n",
+				    SAS_ADDR(dev->sas_addr),
+				    task->task_status.resp,
+				    task->task_status.stat);
+			mvs_free_task(task);
+			task = NULL;
+
+		}
+	}
+ex_err:
+	BUG_ON(retry == 3 && task != NULL);
+	if (task != NULL)
+		mvs_free_task(task);
+	return res;
+}
+
+static int mvs_debug_issue_ssp_tmf(struct domain_device *dev,
+				u8 *lun, struct mvs_tmf_task *tmf)
+{
+	struct sas_ssp_task ssp_task;
+	DECLARE_COMPLETION_ONSTACK(completion);
+	if (!(dev->tproto & SAS_PROTOCOL_SSP))
+		return TMF_RESP_FUNC_ESUPP;
+
+	strncpy((u8 *)&ssp_task.LUN, lun, 8);
+
+	return mvs_exec_internal_tmf_task(dev, &ssp_task,
+				sizeof(ssp_task), tmf);
+}
+
+
+/*  Standard mandates link reset for ATA  (type 0)
+    and hard reset for SSP (type 1) , only for RECOVERY */
+static int mvs_debug_I_T_nexus_reset(struct domain_device *dev)
+{
+	int rc;
+	struct sas_phy *phy = sas_find_local_phy(dev);
+	int reset_type = (dev->dev_type == SATA_DEV ||
+			(dev->tproto & SAS_PROTOCOL_STP)) ? 0 : 1;
+	rc = sas_phy_reset(phy, reset_type);
+	msleep(2000);
+	return rc;
+}
+
+/* mandatory SAM-3 */
+int mvs_lu_reset(struct domain_device *dev, u8 *lun)
+{
+	unsigned long flags;
+	int i, phyno[WIDE_PORT_MAX_PHY], num , rc = TMF_RESP_FUNC_FAILED;
+	struct mvs_tmf_task tmf_task;
+	struct mvs_device * mvi_dev = dev->lldd_dev;
+	struct mvs_info *mvi = mvi_dev->mvi_info;
+
+	tmf_task.tmf = TMF_LU_RESET;
+	mvi_dev->dev_status = MVS_DEV_EH;
+	rc = mvs_debug_issue_ssp_tmf(dev, lun, &tmf_task);
+	if (rc == TMF_RESP_FUNC_COMPLETE) {
+		num = mvs_find_dev_phyno(dev, phyno);
+		spin_lock_irqsave(&mvi->lock, flags);
+		for (i = 0; i < num; i++)
+			mvs_release_task(mvi, phyno[i], dev);
+		spin_unlock_irqrestore(&mvi->lock, flags);
+	}
+	/* If failed, fall-through I_T_Nexus reset */
+	mv_printk("%s for device[%x]:rc= %d\n", __func__,
+			mvi_dev->device_id, rc);
+	return rc;
+}
+
+int mvs_I_T_nexus_reset(struct domain_device *dev)
+{
+	unsigned long flags;
+	int i, phyno[WIDE_PORT_MAX_PHY], num , rc = TMF_RESP_FUNC_FAILED;
+	struct mvs_device * mvi_dev = (struct mvs_device *)dev->lldd_dev;
+	struct mvs_info *mvi = mvi_dev->mvi_info;
+
+	if (mvi_dev->dev_status != MVS_DEV_EH)
+		return TMF_RESP_FUNC_COMPLETE;
+	rc = mvs_debug_I_T_nexus_reset(dev);
+	mv_printk("%s for device[%x]:rc= %d\n",
+		__func__, mvi_dev->device_id, rc);
+
+	/* housekeeper */
+	num = mvs_find_dev_phyno(dev, phyno);
+	spin_lock_irqsave(&mvi->lock, flags);
+	for (i = 0; i < num; i++)
+		mvs_release_task(mvi, phyno[i], dev);
+	spin_unlock_irqrestore(&mvi->lock, flags);
+
+	return rc;
+}
+/* optional SAM-3 */
+int mvs_query_task(struct sas_task *task)
+{
+	u32 tag;
+	struct scsi_lun lun;
+	struct mvs_tmf_task tmf_task;
+	int rc = TMF_RESP_FUNC_FAILED;
+
+	if (task->lldd_task && task->task_proto & SAS_PROTOCOL_SSP) {
+		struct scsi_cmnd * cmnd = (struct scsi_cmnd *)task->uldd_task;
+		struct domain_device *dev = task->dev;
+		struct mvs_device *mvi_dev = (struct mvs_device *)dev->lldd_dev;
+		struct mvs_info *mvi = mvi_dev->mvi_info;
+
+		int_to_scsilun(cmnd->device->lun, &lun);
+		rc = mvs_find_tag(mvi, task, &tag);
+		if (rc == 0) {
+			rc = TMF_RESP_FUNC_FAILED;
+			return rc;
+		}
+
+		tmf_task.tmf = TMF_QUERY_TASK;
+		tmf_task.tag_of_task_to_be_managed = cpu_to_le16(tag);
+
+		rc = mvs_debug_issue_ssp_tmf(dev, lun.scsi_lun, &tmf_task);
+		switch (rc) {
+		/* The task is still in Lun, release it then */
+		case TMF_RESP_FUNC_SUCC:
+		/* The task is not in Lun or failed, reset the phy */
+		case TMF_RESP_FUNC_FAILED:
+		case TMF_RESP_FUNC_COMPLETE:
+			break;
+		}
+	}
+	mv_printk("%s:rc= %d\n", __func__, rc);
+	return rc;
+}
+
+/*  mandatory SAM-3, still need free task/slot info */
+int mvs_abort_task(struct sas_task *task)
+{
+	struct scsi_lun lun;
+	struct mvs_tmf_task tmf_task;
+	struct domain_device *dev = task->dev;
+	struct mvs_device *mvi_dev = (struct mvs_device *)dev->lldd_dev;
+	struct mvs_info *mvi = mvi_dev->mvi_info;
+	int rc = TMF_RESP_FUNC_FAILED;
+	unsigned long flags;
+	u32 tag;
+
+	if (mvi->exp_req)
+		mvi->exp_req--;
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		rc = TMF_RESP_FUNC_COMPLETE;
+		goto out;
+	}
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
+	if (task->lldd_task && task->task_proto & SAS_PROTOCOL_SSP) {
+		struct scsi_cmnd * cmnd = (struct scsi_cmnd *)task->uldd_task;
+
+		int_to_scsilun(cmnd->device->lun, &lun);
+		rc = mvs_find_tag(mvi, task, &tag);
+		if (rc == 0) {
+			mv_printk("No such tag in %s\n", __func__);
+			rc = TMF_RESP_FUNC_FAILED;
+			return rc;
+		}
+
+		tmf_task.tmf = TMF_ABORT_TASK;
+		tmf_task.tag_of_task_to_be_managed = cpu_to_le16(tag);
+
+		rc = mvs_debug_issue_ssp_tmf(dev, lun.scsi_lun, &tmf_task);
+
+		/* if successful, clear the task and callback forwards.*/
+		if (rc == TMF_RESP_FUNC_COMPLETE) {
+			u32 slot_no;
+			struct mvs_slot_info *slot;
+
+			if (task->lldd_task) {
+				slot = task->lldd_task;
+				slot_no = (u32) (slot - mvi->slot_info);
+				mvs_slot_complete(mvi, slot_no, 1);
+			}
+		}
+	} else if (task->task_proto & SAS_PROTOCOL_SATA ||
+		task->task_proto & SAS_PROTOCOL_STP) {
+		/* to do free register_set */
+	} else {
+		/* SMP */
+
+	}
+out:
+	if (rc != TMF_RESP_FUNC_COMPLETE)
+		mv_printk("%s:rc= %d\n", __func__, rc);
+	return rc;
+}
+
+int mvs_abort_task_set(struct domain_device *dev, u8 *lun)
+{
+	int rc = TMF_RESP_FUNC_FAILED;
+	struct mvs_tmf_task tmf_task;
+
+	tmf_task.tmf = TMF_ABORT_TASK_SET;
+	rc = mvs_debug_issue_ssp_tmf(dev, lun, &tmf_task);
+
+	return rc;
+}
+
+int mvs_clear_aca(struct domain_device *dev, u8 *lun)
+{
+	int rc = TMF_RESP_FUNC_FAILED;
+	struct mvs_tmf_task tmf_task;
+
+	tmf_task.tmf = TMF_CLEAR_ACA;
+	rc = mvs_debug_issue_ssp_tmf(dev, lun, &tmf_task);
+
+	return rc;
+}
+
+int mvs_clear_task_set(struct domain_device *dev, u8 *lun)
+{
+	int rc = TMF_RESP_FUNC_FAILED;
+	struct mvs_tmf_task tmf_task;
+
+	tmf_task.tmf = TMF_CLEAR_TASK_SET;
+	rc = mvs_debug_issue_ssp_tmf(dev, lun, &tmf_task);
+
+	return rc;
+}
+
+static int mvs_sata_done(struct mvs_info *mvi, struct sas_task *task,
+			u32 slot_idx, int err)
+{
+	struct mvs_device *mvi_dev = task->dev->lldd_dev;
+	struct task_status_struct *tstat = &task->task_status;
+	struct ata_task_resp *resp = (struct ata_task_resp *)tstat->buf;
+	int stat = SAM_GOOD;
+
+
+	resp->frame_len = sizeof(struct dev_to_host_fis);
+	memcpy(&resp->ending_fis[0],
+	       SATA_RECEIVED_D2H_FIS(mvi_dev->taskfileset),
+	       sizeof(struct dev_to_host_fis));
+	tstat->buf_valid_size = sizeof(*resp);
+	if (unlikely(err))
+		stat = SAS_PROTO_RESPONSE;
+	return stat;
+}
+
+static int mvs_slot_err(struct mvs_info *mvi, struct sas_task *task,
+			 u32 slot_idx)
+{
+	struct mvs_slot_info *slot = &mvi->slot_info[slot_idx];
+	int stat;
+	u32 err_dw0 = le32_to_cpu(*(u32 *) (slot->response));
+	u32 tfs = 0;
+	enum mvs_port_type type = PORT_TYPE_SAS;
+
+	if (err_dw0 & CMD_ISS_STPD)
+		MVS_CHIP_DISP->issue_stop(mvi, type, tfs);
+
+	MVS_CHIP_DISP->command_active(mvi, slot_idx);
+
+	stat = SAM_CHECK_COND;
+	switch (task->task_proto) {
+	case SAS_PROTOCOL_SSP:
+		stat = SAS_ABORTED_TASK;
+		break;
+	case SAS_PROTOCOL_SMP:
+		stat = SAM_CHECK_COND;
+		break;
+
+	case SAS_PROTOCOL_SATA:
+	case SAS_PROTOCOL_STP:
+	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
+	{
+		if (err_dw0 == 0x80400002)
+			mv_printk("find reserved error, why?\n");
+
+		task->ata_task.use_ncq = 0;
+		stat = SAS_PROTO_RESPONSE;
+		mvs_sata_done(mvi, task, slot_idx, 1);
+
+	}
+		break;
+	default:
+		break;
+	}
+
+	return stat;
+}
+
+int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags)
+{
+	u32 slot_idx = rx_desc & RXQ_SLOT_MASK;
+	struct mvs_slot_info *slot = &mvi->slot_info[slot_idx];
+	struct sas_task *task = slot->task;
+	struct mvs_device *mvi_dev = NULL;
+	struct task_status_struct *tstat;
+
+	bool aborted;
+	void *to;
+	enum exec_status sts;
+
+	if (mvi->exp_req)
+		mvi->exp_req--;
+	if (unlikely(!task || !task->lldd_task))
+		return -1;
+
+	tstat = &task->task_status;
+	mvi_dev = task->dev->lldd_dev;
+
+	mvs_hba_cq_dump(mvi);
+
+	spin_lock(&task->task_state_lock);
+	task->task_state_flags &=
+		~(SAS_TASK_STATE_PENDING | SAS_TASK_AT_INITIATOR);
+	task->task_state_flags |= SAS_TASK_STATE_DONE;
+	/* race condition*/
+	aborted = task->task_state_flags & SAS_TASK_STATE_ABORTED;
+	spin_unlock(&task->task_state_lock);
+
+	memset(tstat, 0, sizeof(*tstat));
+	tstat->resp = SAS_TASK_COMPLETE;
+
+	if (unlikely(aborted)) {
+		tstat->stat = SAS_ABORTED_TASK;
+		if (mvi_dev)
+			mvi_dev->runing_req--;
+		if (sas_protocol_ata(task->task_proto))
+			mvs_free_reg_set(mvi, mvi_dev);
+
+		mvs_slot_task_free(mvi, task, slot, slot_idx);
+		return -1;
+	}
+
+	if (unlikely(!mvi_dev || !slot->port->port_attached || flags)) {
+		mv_dprintk("port has not device.\n");
+		tstat->stat = SAS_PHY_DOWN;
+		goto out;
+	}
+
+	/*
+	if (unlikely((rx_desc & RXQ_ERR) || (*(u64 *) slot->response))) {
+		 mv_dprintk("Find device[%016llx] RXQ_ERR %X,
+		 err info:%016llx\n",
+		 SAS_ADDR(task->dev->sas_addr),
+		 rx_desc, (u64)(*(u64 *) slot->response));
+	}
+	*/
+
+	/* error info record present */
+	if (unlikely((rx_desc & RXQ_ERR) && (*(u64 *) slot->response))) {
+		tstat->stat = mvs_slot_err(mvi, task, slot_idx);
+		goto out;
+	}
+
+	switch (task->task_proto) {
+	case SAS_PROTOCOL_SSP:
+		/* hw says status == 0, datapres == 0 */
+		if (rx_desc & RXQ_GOOD) {
+			tstat->stat = SAM_GOOD;
+			tstat->resp = SAS_TASK_COMPLETE;
+		}
+		/* response frame present */
+		else if (rx_desc & RXQ_RSP) {
+			struct ssp_response_iu *iu = slot->response +
+						sizeof(struct mvs_err_info);
+			sas_ssp_task_response(mvi->dev, task, iu);
+		} else
+			tstat->stat = SAM_CHECK_COND;
+		break;
+
+	case SAS_PROTOCOL_SMP: {
+			struct scatterlist *sg_resp = &task->smp_task.smp_resp;
+			tstat->stat = SAM_GOOD;
+			to = kmap_atomic(sg_page(sg_resp), KM_IRQ0);
+			memcpy(to + sg_resp->offset,
+				slot->response + sizeof(struct mvs_err_info),
+				sg_dma_len(sg_resp));
+			kunmap_atomic(to, KM_IRQ0);
+			break;
+		}
+
+	case SAS_PROTOCOL_SATA:
+	case SAS_PROTOCOL_STP:
+	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP: {
+			tstat->stat = mvs_sata_done(mvi, task, slot_idx, 0);
+			break;
+		}
+
+	default:
+		tstat->stat = SAM_CHECK_COND;
+		break;
+	}
+
+out:
+	if (mvi_dev) {
+		mvi_dev->runing_req--;
+		if (sas_protocol_ata(task->task_proto))
+			mvs_free_reg_set(mvi, mvi_dev);
+	}
+	mvs_slot_task_free(mvi, task, slot, slot_idx);
+	sts = tstat->stat;
+
+	spin_unlock(&mvi->lock);
+	if (task->task_done)
+		task->task_done(task);
+	else
+		mv_dprintk("why has not task_done.\n");
+	spin_lock(&mvi->lock);
+
+	return sts;
+}
+
+void mvs_release_task(struct mvs_info *mvi,
+		int phy_no, struct domain_device *dev)
+{
+	int i = 0; u32 slot_idx;
+	struct mvs_phy *phy;
+	struct mvs_port *port;
+	struct mvs_slot_info *slot, *slot2;
+
+	phy = &mvi->phy[phy_no];
+	port = phy->port;
+	if (!port)
+		return;
+
+	list_for_each_entry_safe(slot, slot2, &port->list, entry) {
+		struct sas_task *task;
+		slot_idx = (u32) (slot - mvi->slot_info);
+		task = slot->task;
+
+		if (dev && task->dev != dev)
+			continue;
+
+		mv_printk("Release slot [%x] tag[%x], task [%p]:\n",
+			slot_idx, slot->slot_tag, task);
+
+		if (task->task_proto & SAS_PROTOCOL_SSP) {
+			mv_printk("attached with SSP task CDB[");
+			for (i = 0; i < 16; i++)
+				mv_printk(" %02x", task->ssp_task.cdb[i]);
+			mv_printk(" ]\n");
+		}
+
+		mvs_slot_complete(mvi, slot_idx, 1);
+	}
+}
+
+static void mvs_phy_disconnected(struct mvs_phy *phy)
+{
+	phy->phy_attached = 0;
+	phy->att_dev_info = 0;
+	phy->att_dev_sas_addr = 0;
+}
+
+static void mvs_work_queue(struct work_struct *work)
+{
+	struct delayed_work *dw = container_of(work, struct delayed_work, work);
+	struct mvs_wq *mwq = container_of(dw, struct mvs_wq, work_q);
+	struct mvs_info *mvi = mwq->mvi;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mvi->lock, flags);
+	if (mwq->handler & PHY_PLUG_EVENT) {
+		u32 phy_no = (unsigned long) mwq->data;
+		struct sas_ha_struct *sas_ha = mvi->sas;
+		struct mvs_phy *phy = &mvi->phy[phy_no];
+		struct asd_sas_phy *sas_phy = &phy->sas_phy;
+
+		if (phy->phy_event & PHY_PLUG_OUT) {
+			u32 tmp;
+			struct sas_identify_frame *id;
+			id = (struct sas_identify_frame *)phy->frame_rcvd;
+			tmp = MVS_CHIP_DISP->read_phy_ctl(mvi, phy_no);
+			phy->phy_event &= ~PHY_PLUG_OUT;
+			if (!(tmp & PHY_READY_MASK)) {
+				sas_phy_disconnected(sas_phy);
+				mvs_phy_disconnected(phy);
+				sas_ha->notify_phy_event(sas_phy,
+					PHYE_LOSS_OF_SIGNAL);
+				mv_dprintk("phy%d Removed Device\n", phy_no);
+			} else {
+				MVS_CHIP_DISP->detect_porttype(mvi, phy_no);
+				mvs_update_phyinfo(mvi, phy_no, 1);
+				mvs_bytes_dmaed(mvi, phy_no);
+				mvs_port_notify_formed(sas_phy, 0);
+				mv_dprintk("phy%d Attached Device\n", phy_no);
+			}
+		}
+	}
+	list_del(&mwq->entry);
+	spin_unlock_irqrestore(&mvi->lock, flags);
+	kfree(mwq);
+}
+
+static int mvs_handle_event(struct mvs_info *mvi, void *data, int handler)
+{
+	struct mvs_wq *mwq;
+	int ret = 0;
+
+	mwq = kmalloc(sizeof(struct mvs_wq), GFP_ATOMIC);
+	if (mwq) {
+		mwq->mvi = mvi;
+		mwq->data = data;
+		mwq->handler = handler;
+		MV_INIT_DELAYED_WORK(&mwq->work_q, mvs_work_queue, mwq);
+		list_add_tail(&mwq->entry, &mvi->wq_list);
+		schedule_delayed_work(&mwq->work_q, HZ * 2);
+	} else
+		ret = -ENOMEM;
+
+	return ret;
+}
+
+static void mvs_sig_time_out(unsigned long tphy)
+{
+	struct mvs_phy *phy = (struct mvs_phy *)tphy;
+	struct mvs_info *mvi = phy->mvi;
+	u8 phy_no;
+
+	for (phy_no = 0; phy_no < mvi->chip->n_phy; phy_no++) {
+		if (&mvi->phy[phy_no] == phy) {
+			mv_dprintk("Get signature time out, reset phy %d\n",
+				phy_no+mvi->id*mvi->chip->n_phy);
+			MVS_CHIP_DISP->phy_reset(mvi, phy_no, 1);
+		}
+	}
+}
+
+static void mvs_sig_remove_timer(struct mvs_phy *phy)
+{
+	if (phy->timer.function)
+		del_timer(&phy->timer);
+	phy->timer.function = NULL;
+}
+
+void mvs_int_port(struct mvs_info *mvi, int phy_no, u32 events)
+{
+	u32 tmp;
+	struct sas_ha_struct *sas_ha = mvi->sas;
+	struct mvs_phy *phy = &mvi->phy[phy_no];
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
+
+	phy->irq_status = MVS_CHIP_DISP->read_port_irq_stat(mvi, phy_no);
+	mv_dprintk("port %d ctrl sts=0x%X.\n", phy_no+mvi->id*mvi->chip->n_phy,
+		MVS_CHIP_DISP->read_phy_ctl(mvi, phy_no));
+	mv_dprintk("Port %d irq sts = 0x%X\n", phy_no+mvi->id*mvi->chip->n_phy,
+		phy->irq_status);
+
+	/*
+	* events is port event now ,
+	* we need check the interrupt status which belongs to per port.
+	*/
+
+	if (phy->irq_status & PHYEV_DCDR_ERR)
+		mv_dprintk("port %d STP decoding error.\n",
+		phy_no+mvi->id*mvi->chip->n_phy);
+
+	if (phy->irq_status & PHYEV_POOF) {
+		if (!(phy->phy_event & PHY_PLUG_OUT)) {
+			int dev_sata = phy->phy_type & PORT_TYPE_SATA;
+			int ready;
+			mvs_release_task(mvi, phy_no, NULL);
+			phy->phy_event |= PHY_PLUG_OUT;
+			mvs_handle_event(mvi,
+				(void *)(unsigned long)phy_no,
+				PHY_PLUG_EVENT);
+			ready = mvs_is_phy_ready(mvi, phy_no);
+			if (!ready)
+				mv_dprintk("phy%d Unplug Notice\n",
+					phy_no +
+					mvi->id * mvi->chip->n_phy);
+			if (ready || dev_sata) {
+				if (MVS_CHIP_DISP->stp_reset)
+					MVS_CHIP_DISP->stp_reset(mvi,
+							phy_no);
+				else
+					MVS_CHIP_DISP->phy_reset(mvi,
+							phy_no, 0);
+				return;
+			}
+		}
+	}
+
+	if (phy->irq_status & PHYEV_COMWAKE) {
+		tmp = MVS_CHIP_DISP->read_port_irq_mask(mvi, phy_no);
+		MVS_CHIP_DISP->write_port_irq_mask(mvi, phy_no,
+					tmp | PHYEV_SIG_FIS);
+		if (phy->timer.function == NULL) {
+			phy->timer.data = (unsigned long)phy;
+			phy->timer.function = mvs_sig_time_out;
+			phy->timer.expires = jiffies + 10*HZ;
+			add_timer(&phy->timer);
+		}
+	}
+	if (phy->irq_status & (PHYEV_SIG_FIS | PHYEV_ID_DONE)) {
+		phy->phy_status = mvs_is_phy_ready(mvi, phy_no);
+		mvs_sig_remove_timer(phy);
+		mv_dprintk("notify plug in on phy[%d]\n", phy_no);
+		if (phy->phy_status) {
+			mdelay(10);
+			MVS_CHIP_DISP->detect_porttype(mvi, phy_no);
+			if (phy->phy_type & PORT_TYPE_SATA) {
+				tmp = MVS_CHIP_DISP->read_port_irq_mask(
+						mvi, phy_no);
+				tmp &= ~PHYEV_SIG_FIS;
+				MVS_CHIP_DISP->write_port_irq_mask(mvi,
+							phy_no, tmp);
+			}
+			mvs_update_phyinfo(mvi, phy_no, 0);
+			mvs_bytes_dmaed(mvi, phy_no);
+			/* whether driver is going to handle hot plug */
+			if (phy->phy_event & PHY_PLUG_OUT) {
+				mvs_port_notify_formed(sas_phy, 0);
+				phy->phy_event &= ~PHY_PLUG_OUT;
+			}
+		} else {
+			mv_dprintk("plugin interrupt but phy%d is gone\n",
+				phy_no + mvi->id*mvi->chip->n_phy);
+		}
+	} else if (phy->irq_status & PHYEV_BROAD_CH) {
+		mv_dprintk("port %d broadcast change.\n",
+			phy_no + mvi->id*mvi->chip->n_phy);
+		/* exception for Samsung disk drive*/
+		mdelay(1000);
+		sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+	}
+	MVS_CHIP_DISP->write_port_irq_stat(mvi, phy_no, phy->irq_status);
+}
+
+int mvs_int_rx(struct mvs_info *mvi, bool self_clear)
+{
+	u32 rx_prod_idx, rx_desc;
+	bool attn = false;
+
+	/* the first dword in the RX ring is special: it contains
+	 * a mirror of the hardware's RX producer index, so that
+	 * we don't have to stall the CPU reading that register.
+	 * The actual RX ring is offset by one dword, due to this.
+	 */
+	rx_prod_idx = mvi->rx_cons;
+	mvi->rx_cons = le32_to_cpu(mvi->rx[0]);
+	if (mvi->rx_cons == 0xfff)	/* h/w hasn't touched RX ring yet */
+		return 0;
+
+	/* The CMPL_Q may come late, read from register and try again
+	* note: if coalescing is enabled,
+	* it will need to read from register every time for sure
+	*/
+	if (unlikely(mvi->rx_cons == rx_prod_idx))
+		mvi->rx_cons = MVS_CHIP_DISP->rx_update(mvi) & RX_RING_SZ_MASK;
+
+	if (mvi->rx_cons == rx_prod_idx)
+		return 0;
+
+	while (mvi->rx_cons != rx_prod_idx) {
+		/* increment our internal RX consumer pointer */
+		rx_prod_idx = (rx_prod_idx + 1) & (MVS_RX_RING_SZ - 1);
+		rx_desc = le32_to_cpu(mvi->rx[rx_prod_idx + 1]);
+
+		if (likely(rx_desc & RXQ_DONE))
+			mvs_slot_complete(mvi, rx_desc, 0);
+		if (rx_desc & RXQ_ATTN) {
+			attn = true;
+		} else if (rx_desc & RXQ_ERR) {
+			if (!(rx_desc & RXQ_DONE))
+				mvs_slot_complete(mvi, rx_desc, 0);
+		} else if (rx_desc & RXQ_SLOT_RESET) {
+			mvs_slot_free(mvi, rx_desc);
+		}
+	}
+
+	if (attn && self_clear)
+		MVS_CHIP_DISP->int_full(mvi);
+	return 0;
+}
+
diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h
new file mode 100644
index 0000000..aa2270a
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_sas.h
@@ -0,0 +1,406 @@
+/*
+ * Marvell 88SE64xx/88SE94xx main function head file
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
+
+#ifndef _MV_SAS_H_
+#define _MV_SAS_H_
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/vmalloc.h>
+#include <scsi/libsas.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/sas_ata.h>
+#include <linux/version.h>
+#include "mv_defs.h"
+
+#define DRV_NAME		"mvsas"
+#define DRV_VERSION		"0.8.2"
+#define _MV_DUMP		0
+#define MVS_ID_NOT_MAPPED	0x7f
+/* #define DISABLE_HOTPLUG_DMA_FIX */
+#define MAX_EXP_RUNNING_REQ	2
+#define WIDE_PORT_MAX_PHY		4
+#define	MV_DISABLE_NCQ	0
+#define mv_printk(fmt, arg ...)	\
+	printk(KERN_DEBUG"%s %d:" fmt, __FILE__, __LINE__, ## arg)
+#ifdef MV_DEBUG
+#define mv_dprintk(format, arg...)	\
+	printk(KERN_DEBUG"%s %d:" format, __FILE__, __LINE__, ## arg)
+#else
+#define mv_dprintk(format, arg...)
+#endif
+#define MV_MAX_U32			0xffffffff
+
+extern struct mvs_tgt_initiator mvs_tgt;
+extern struct mvs_info *tgt_mvi;
+extern const struct mvs_dispatch mvs_64xx_dispatch;
+extern const struct mvs_dispatch mvs_94xx_dispatch;
+
+#define DEV_IS_EXPANDER(type)	\
+	((type == EDGE_DEV) || (type == FANOUT_DEV))
+
+#define bit(n) ((u32)1 << n)
+
+#define for_each_phy(__lseq_mask, __mc, __lseq)			\
+	for ((__mc) = (__lseq_mask), (__lseq) = 0;		\
+					(__mc) != 0 ;		\
+					(++__lseq), (__mc) >>= 1)
+
+#define MV_INIT_DELAYED_WORK(w, f, d)	INIT_DELAYED_WORK(w, f)
+#define UNASSOC_D2H_FIS(id)		\
+	((void *) mvi->rx_fis + 0x100 * id)
+#define SATA_RECEIVED_FIS_LIST(reg_set)	\
+	((void *) mvi->rx_fis + mvi->chip->fis_offs + 0x100 * reg_set)
+#define SATA_RECEIVED_SDB_FIS(reg_set)	\
+	(SATA_RECEIVED_FIS_LIST(reg_set) + 0x58)
+#define SATA_RECEIVED_D2H_FIS(reg_set)	\
+	(SATA_RECEIVED_FIS_LIST(reg_set) + 0x40)
+#define SATA_RECEIVED_PIO_FIS(reg_set)	\
+	(SATA_RECEIVED_FIS_LIST(reg_set) + 0x20)
+#define SATA_RECEIVED_DMA_FIS(reg_set)	\
+	(SATA_RECEIVED_FIS_LIST(reg_set) + 0x00)
+
+enum dev_status {
+	MVS_DEV_NORMAL = 0x0,
+	MVS_DEV_EH	= 0x1,
+};
+
+
+struct mvs_info;
+
+struct mvs_dispatch {
+	char *name;
+	int (*chip_init)(struct mvs_info *mvi);
+	int (*spi_init)(struct mvs_info *mvi);
+	int (*chip_ioremap)(struct mvs_info *mvi);
+	void (*chip_iounmap)(struct mvs_info *mvi);
+	irqreturn_t (*isr)(struct mvs_info *mvi, int irq, u32 stat);
+	u32 (*isr_status)(struct mvs_info *mvi, int irq);
+	void (*interrupt_enable)(struct mvs_info *mvi);
+	void (*interrupt_disable)(struct mvs_info *mvi);
+
+	u32 (*read_phy_ctl)(struct mvs_info *mvi, u32 port);
+	void (*write_phy_ctl)(struct mvs_info *mvi, u32 port, u32 val);
+
+	u32 (*read_port_cfg_data)(struct mvs_info *mvi, u32 port);
+	void (*write_port_cfg_data)(struct mvs_info *mvi, u32 port, u32 val);
+	void (*write_port_cfg_addr)(struct mvs_info *mvi, u32 port, u32 addr);
+
+	u32 (*read_port_vsr_data)(struct mvs_info *mvi, u32 port);
+	void (*write_port_vsr_data)(struct mvs_info *mvi, u32 port, u32 val);
+	void (*write_port_vsr_addr)(struct mvs_info *mvi, u32 port, u32 addr);
+
+	u32 (*read_port_irq_stat)(struct mvs_info *mvi, u32 port);
+	void (*write_port_irq_stat)(struct mvs_info *mvi, u32 port, u32 val);
+
+	u32 (*read_port_irq_mask)(struct mvs_info *mvi, u32 port);
+	void (*write_port_irq_mask)(struct mvs_info *mvi, u32 port, u32 val);
+
+	void (*get_sas_addr)(void *buf, u32 buflen);
+	void (*command_active)(struct mvs_info *mvi, u32 slot_idx);
+	void (*issue_stop)(struct mvs_info *mvi, enum mvs_port_type type,
+				u32 tfs);
+	void (*start_delivery)(struct mvs_info *mvi, u32 tx);
+	u32 (*rx_update)(struct mvs_info *mvi);
+	void (*int_full)(struct mvs_info *mvi);
+	u8 (*assign_reg_set)(struct mvs_info *mvi, u8 *tfs);
+	void (*free_reg_set)(struct mvs_info *mvi, u8 *tfs);
+	u32 (*prd_size)(void);
+	u32 (*prd_count)(void);
+	void (*make_prd)(struct scatterlist *scatter, int nr, void *prd);
+	void (*detect_porttype)(struct mvs_info *mvi, int i);
+	int (*oob_done)(struct mvs_info *mvi, int i);
+	void (*fix_phy_info)(struct mvs_info *mvi, int i,
+				struct sas_identify_frame *id);
+	void (*phy_work_around)(struct mvs_info *mvi, int i);
+	void (*phy_set_link_rate)(struct mvs_info *mvi, u32 phy_id,
+				struct sas_phy_linkrates *rates);
+	u32 (*phy_max_link_rate)(void);
+	void (*phy_disable)(struct mvs_info *mvi, u32 phy_id);
+	void (*phy_enable)(struct mvs_info *mvi, u32 phy_id);
+	void (*phy_reset)(struct mvs_info *mvi, u32 phy_id, int hard);
+	void (*stp_reset)(struct mvs_info *mvi, u32 phy_id);
+	void (*clear_active_cmds)(struct mvs_info *mvi);
+	u32 (*spi_read_data)(struct mvs_info *mvi);
+	void (*spi_write_data)(struct mvs_info *mvi, u32 data);
+	int (*spi_buildcmd)(struct mvs_info *mvi,
+						u32      *dwCmd,
+						u8       cmd,
+						u8       read,
+						u8       length,
+						u32      addr
+						);
+	int (*spi_issuecmd)(struct mvs_info *mvi, u32 cmd);
+	int (*spi_waitdataready)(struct mvs_info *mvi, u32 timeout);
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+	void (*dma_fix)(dma_addr_t buf_dma, int buf_len, int from, void *prd);
+#endif
+
+};
+
+struct mvs_chip_info {
+	u32 		n_host;
+	u32 		n_phy;
+	u32 		fis_offs;
+	u32 		fis_count;
+	u32 		srs_sz;
+	u32 		slot_width;
+	const struct mvs_dispatch *dispatch;
+};
+#define MVS_CHIP_SLOT_SZ	(1U << mvi->chip->slot_width)
+#define MVS_RX_FISL_SZ		\
+	(mvi->chip->fis_offs + (mvi->chip->fis_count * 0x100))
+#define MVS_CHIP_DISP		(mvi->chip->dispatch)
+
+struct mvs_err_info {
+	__le32			flags;
+	__le32			flags2;
+};
+
+struct mvs_cmd_hdr {
+	__le32			flags;	/* PRD tbl len; SAS, SATA ctl */
+	__le32			lens;	/* cmd, max resp frame len */
+	__le32			tags;	/* targ port xfer tag; tag */
+	__le32			data_len;	/* data xfer len */
+	__le64			cmd_tbl;  	/* command table address */
+	__le64			open_frame;	/* open addr frame address */
+	__le64			status_buf;	/* status buffer address */
+	__le64			prd_tbl;		/* PRD tbl address */
+	__le32			reserved[4];
+};
+
+struct mvs_port {
+	struct asd_sas_port	sas_port;
+	u8			port_attached;
+	u8			wide_port_phymap;
+	struct list_head	list;
+};
+
+struct mvs_phy {
+	struct mvs_info 		*mvi;
+	struct mvs_port		*port;
+	struct asd_sas_phy	sas_phy;
+	struct sas_identify	identify;
+	struct scsi_device	*sdev;
+	struct timer_list timer;
+	u64		dev_sas_addr;
+	u64		att_dev_sas_addr;
+	u32		att_dev_info;
+	u32		dev_info;
+	u32		phy_type;
+	u32		phy_status;
+	u32		irq_status;
+	u32		frame_rcvd_size;
+	u8		frame_rcvd[32];
+	u8		phy_attached;
+	u8		phy_mode;
+	u8		reserved[2];
+	u32		phy_event;
+	enum sas_linkrate	minimum_linkrate;
+	enum sas_linkrate	maximum_linkrate;
+};
+
+struct mvs_device {
+	struct list_head		dev_entry;
+	enum sas_dev_type dev_type;
+	struct mvs_info *mvi_info;
+	struct domain_device *sas_device;
+	u32 attached_phy;
+	u32 device_id;
+	u32 runing_req;
+	u8 taskfileset;
+	u8 dev_status;
+	u16 reserved;
+};
+
+struct mvs_slot_info {
+	struct list_head entry;
+	union {
+		struct sas_task *task;
+		void *tdata;
+	};
+	u32 n_elem;
+	u32 tx;
+	u32 slot_tag;
+
+	/* DMA buffer for storing cmd tbl, open addr frame, status buffer,
+	 * and PRD table
+	 */
+	void *buf;
+	dma_addr_t buf_dma;
+#if _MV_DUMP
+	u32 cmd_size;
+#endif
+	void *response;
+	struct mvs_port *port;
+	struct mvs_device	*device;
+	void *open_frame;
+};
+
+struct mvs_info {
+	unsigned long flags;
+
+	/* host-wide lock */
+	spinlock_t lock;
+
+	/* our device */
+	struct pci_dev *pdev;
+	struct device *dev;
+
+	/* enhanced mode registers */
+	void __iomem *regs;
+
+	/* peripheral or soc registers */
+	void __iomem *regs_ex;
+	u8 sas_addr[SAS_ADDR_SIZE];
+
+	/* SCSI/SAS glue */
+	struct sas_ha_struct *sas;
+	struct Scsi_Host *shost;
+
+	/* TX (delivery) DMA ring */
+	__le32 *tx;
+	dma_addr_t tx_dma;
+
+	/* cached next-producer idx */
+	u32 tx_prod;
+
+	/* RX (completion) DMA ring */
+	__le32	*rx;
+	dma_addr_t rx_dma;
+
+	/* RX consumer idx */
+	u32 rx_cons;
+
+	/* RX'd FIS area */
+	__le32 *rx_fis;
+	dma_addr_t rx_fis_dma;
+
+	/* DMA command header slots */
+	struct mvs_cmd_hdr *slot;
+	dma_addr_t slot_dma;
+
+	u32 chip_id;
+	const struct mvs_chip_info *chip;
+
+	int tags_num;
+	DECLARE_BITMAP(tags, MVS_SLOTS);
+	/* further per-slot information */
+	struct mvs_phy phy[MVS_MAX_PHYS];
+	struct mvs_port port[MVS_MAX_PHYS];
+	u32 irq;
+	u32 exp_req;
+	u32 id;
+	u64 sata_reg_set;
+	struct list_head *hba_list;
+	struct list_head soc_entry;
+	struct list_head wq_list;
+	unsigned long instance;
+	u16 flashid;
+	u32 flashsize;
+	u32 flashsectSize;
+
+	void *addon;
+	struct mvs_device	devices[MVS_MAX_DEVICES];
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+	void *bulk_buffer;
+	dma_addr_t bulk_buffer_dma;
+#define TRASH_BUCKET_SIZE    	0x20000
+#endif
+	struct mvs_slot_info slot_info[0];
+};
+
+struct mvs_prv_info{
+	u8 n_host;
+	u8 n_phy;
+	u16 reserve;
+	struct mvs_info *mvi[2];
+};
+
+struct mvs_wq {
+	struct delayed_work work_q;
+	struct mvs_info *mvi;
+	void *data;
+	int handler;
+	struct list_head entry;
+};
+
+struct mvs_task_exec_info {
+	struct sas_task *task;
+	struct mvs_cmd_hdr *hdr;
+	struct mvs_port *port;
+	u32 tag;
+	int n_elem;
+};
+
+
+/******************** function prototype *********************/
+void mvs_get_sas_addr(void *buf, u32 buflen);
+void mvs_tag_clear(struct mvs_info *mvi, u32 tag);
+void mvs_tag_free(struct mvs_info *mvi, u32 tag);
+void mvs_tag_set(struct mvs_info *mvi, unsigned int tag);
+int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out);
+void mvs_tag_init(struct mvs_info *mvi);
+void mvs_iounmap(void __iomem *regs);
+int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex);
+void mvs_phys_reset(struct mvs_info *mvi, u32 phy_mask, int hard);
+int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
+			void *funcdata);
+void __devinit mvs_set_sas_addr(struct mvs_info *mvi, int port_id,
+				u32 off_lo, u32 off_hi, u64 sas_addr);
+int mvs_slave_alloc(struct scsi_device *scsi_dev);
+int mvs_slave_configure(struct scsi_device *sdev);
+void mvs_scan_start(struct Scsi_Host *shost);
+int mvs_scan_finished(struct Scsi_Host *shost, unsigned long time);
+int mvs_queue_command(struct sas_task *task, const int num,
+			gfp_t gfp_flags);
+int mvs_abort_task(struct sas_task *task);
+int mvs_abort_task_set(struct domain_device *dev, u8 *lun);
+int mvs_clear_aca(struct domain_device *dev, u8 *lun);
+int mvs_clear_task_set(struct domain_device *dev, u8 * lun);
+void mvs_port_formed(struct asd_sas_phy *sas_phy);
+void mvs_port_deformed(struct asd_sas_phy *sas_phy);
+int mvs_dev_found(struct domain_device *dev);
+void mvs_dev_gone(struct domain_device *dev);
+int mvs_lu_reset(struct domain_device *dev, u8 *lun);
+int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags);
+int mvs_I_T_nexus_reset(struct domain_device *dev);
+int mvs_query_task(struct sas_task *task);
+void mvs_release_task(struct mvs_info *mvi, int phy_no,
+			struct domain_device *dev);
+void mvs_int_port(struct mvs_info *mvi, int phy_no, u32 events);
+void mvs_update_phyinfo(struct mvs_info *mvi, int i, int get_st);
+int mvs_int_rx(struct mvs_info *mvi, bool self_clear);
+void mvs_hexdump(u32 size, u8 *data, u32 baseaddr);
+#endif
+
diff --git a/drivers/scsi/osd/Kbuild b/drivers/scsi/osd/Kbuild
index 0e207aa..5fd73d77 100644
--- a/drivers/scsi/osd/Kbuild
+++ b/drivers/scsi/osd/Kbuild
@@ -11,31 +11,6 @@
 # it under the terms of the GNU General Public License version 2
 #
 
-ifneq ($(OSD_INC),)
-# we are built out-of-tree Kconfigure everything as on
-
-CONFIG_SCSI_OSD_INITIATOR=m
-ccflags-y += -DCONFIG_SCSI_OSD_INITIATOR -DCONFIG_SCSI_OSD_INITIATOR_MODULE
-
-CONFIG_SCSI_OSD_ULD=m
-ccflags-y += -DCONFIG_SCSI_OSD_ULD -DCONFIG_SCSI_OSD_ULD_MODULE
-
-# CONFIG_SCSI_OSD_DPRINT_SENSE =
-#	0 - no print of errors
-#	1 - print errors
-#	2 - errors + warrnings
-ccflags-y += -DCONFIG_SCSI_OSD_DPRINT_SENSE=1
-
-# Uncomment to turn debug on
-# ccflags-y += -DCONFIG_SCSI_OSD_DEBUG
-
-# if we are built out-of-tree and the hosting kernel has OSD headers
-# then "ccflags-y +=" will not pick the out-off-tree headers. Only by doing
-# this it will work. This might break in future kernels
-LINUXINCLUDE := -I$(OSD_INC) $(LINUXINCLUDE)
-
-endif
-
 # libosd.ko - osd-initiator library
 libosd-y := osd_initiator.o
 obj-$(CONFIG_SCSI_OSD_INITIATOR) += libosd.o
diff --git a/drivers/scsi/osd/Makefile b/drivers/scsi/osd/Makefile
deleted file mode 100755
index d905344..0000000
--- a/drivers/scsi/osd/Makefile
+++ /dev/null
@@ -1,37 +0,0 @@
-#
-# Makefile for the OSD modules (out of tree)
-#
-# Copyright (C) 2008 Panasas Inc.  All rights reserved.
-#
-# Authors:
-#   Boaz Harrosh <bharrosh@panasas.com>
-#   Benny Halevy <bhalevy@panasas.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2
-#
-# This Makefile is used to call the kernel Makefile in case of an out-of-tree
-# build.
-# $KSRC should point to a Kernel source tree otherwise host's default is
-# used. (eg. /lib/modules/`uname -r`/build)
-
-# include path for out-of-tree Headers
-OSD_INC ?= `pwd`/../../../include
-
-# allow users to override these
-# e.g. to compile for a kernel that you aren't currently running
-KSRC ?= /lib/modules/$(shell uname -r)/build
-KBUILD_OUTPUT ?=
-ARCH ?=
-V ?= 0
-
-# this is the basic Kbuild out-of-tree invocation, with the M= option
-KBUILD_BASE = +$(MAKE) -C $(KSRC) M=`pwd` KBUILD_OUTPUT=$(KBUILD_OUTPUT) ARCH=$(ARCH) V=$(V)
-
-all: libosd
-
-libosd: ;
-	$(KBUILD_BASE) OSD_INC=$(OSD_INC) modules
-
-clean:
-	$(KBUILD_BASE) clean
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 1ce6b24..7a117c1 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -118,39 +118,39 @@
 		_osd_ver_desc(or));
 
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_VENDOR_IDENTIFICATION [%s]\n",
+	OSD_INFO("VENDOR_IDENTIFICATION  [%s]\n",
 		(char *)pFirst);
 
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_PRODUCT_IDENTIFICATION [%s]\n",
+	OSD_INFO("PRODUCT_IDENTIFICATION [%s]\n",
 		(char *)pFirst);
 
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_PRODUCT_MODEL [%s]\n",
+	OSD_INFO("PRODUCT_MODEL          [%s]\n",
 		(char *)pFirst);
 
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_PRODUCT_REVISION_LEVEL [%u]\n",
+	OSD_INFO("PRODUCT_REVISION_LEVEL [%u]\n",
 		pFirst ? get_unaligned_be32(pFirst) : ~0U);
 
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_PRODUCT_SERIAL_NUMBER [%s]\n",
+	OSD_INFO("PRODUCT_SERIAL_NUMBER  [%s]\n",
 		(char *)pFirst);
 
 	pFirst = get_attrs[a].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_OSD_NAME [%s]\n", (char *)pFirst);
+	OSD_INFO("OSD_NAME               [%s]\n", (char *)pFirst);
 	a++;
 
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_TOTAL_CAPACITY [0x%llx]\n",
+	OSD_INFO("TOTAL_CAPACITY         [0x%llx]\n",
 		pFirst ? _LLU(get_unaligned_be64(pFirst)) : ~0ULL);
 
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_USED_CAPACITY [0x%llx]\n",
+	OSD_INFO("USED_CAPACITY          [0x%llx]\n",
 		pFirst ? _LLU(get_unaligned_be64(pFirst)) : ~0ULL);
 
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_NUMBER_OF_PARTITIONS [%llu]\n",
+	OSD_INFO("NUMBER_OF_PARTITIONS   [%llu]\n",
 		pFirst ? _LLU(get_unaligned_be64(pFirst)) : ~0ULL);
 
 	if (a >= nelem)
@@ -158,7 +158,7 @@
 
 	/* FIXME: Where are the time utilities */
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_CLOCK [0x%02x%02x%02x%02x%02x%02x]\n",
+	OSD_INFO("CLOCK                  [0x%02x%02x%02x%02x%02x%02x]\n",
 		((char *)pFirst)[0], ((char *)pFirst)[1],
 		((char *)pFirst)[2], ((char *)pFirst)[3],
 		((char *)pFirst)[4], ((char *)pFirst)[5]);
@@ -169,7 +169,8 @@
 
 		hex_dump_to_buffer(get_attrs[a].val_ptr, len, 32, 1,
 				   sid_dump, sizeof(sid_dump), true);
-		OSD_INFO("OSD_ATTR_RI_OSD_SYSTEM_ID(%d) [%s]\n", len, sid_dump);
+		OSD_INFO("OSD_SYSTEM_ID(%d)\n"
+			 "        [%s]\n", len, sid_dump);
 		a++;
 	}
 out:
@@ -669,7 +670,7 @@
 	__be16 action, const struct osd_obj_id *obj, osd_id initial_id,
 	struct osd_obj_id_list *list, unsigned nelem)
 {
-	struct request_queue *q = or->osd_dev->scsi_device->request_queue;
+	struct request_queue *q = osd_request_queue(or->osd_dev);
 	u64 len = nelem * sizeof(osd_id) + sizeof(*list);
 	struct bio *bio;
 
@@ -778,16 +779,32 @@
 */
 
 void osd_req_write(struct osd_request *or,
-	const struct osd_obj_id *obj, struct bio *bio, u64 offset)
+	const struct osd_obj_id *obj, u64 offset,
+	struct bio *bio, u64 len)
 {
-	_osd_req_encode_common(or, OSD_ACT_WRITE, obj, offset, bio->bi_size);
+	_osd_req_encode_common(or, OSD_ACT_WRITE, obj, offset, len);
 	WARN_ON(or->out.bio || or->out.total_bytes);
-	bio->bi_rw |= (1 << BIO_RW);
+	WARN_ON(0 ==  bio_rw_flagged(bio, BIO_RW));
 	or->out.bio = bio;
-	or->out.total_bytes = bio->bi_size;
+	or->out.total_bytes = len;
 }
 EXPORT_SYMBOL(osd_req_write);
 
+int osd_req_write_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
+{
+	struct request_queue *req_q = osd_request_queue(or->osd_dev);
+	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
+
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
+	osd_req_write(or, obj, offset, bio, len);
+	return 0;
+}
+EXPORT_SYMBOL(osd_req_write_kern);
+
 /*TODO: void osd_req_append(struct osd_request *,
 	const struct osd_obj_id *, struct bio *data_out); */
 /*TODO: void osd_req_create_write(struct osd_request *,
@@ -813,16 +830,31 @@
 EXPORT_SYMBOL(osd_req_flush_object);
 
 void osd_req_read(struct osd_request *or,
-	const struct osd_obj_id *obj, struct bio *bio, u64 offset)
+	const struct osd_obj_id *obj, u64 offset,
+	struct bio *bio, u64 len)
 {
-	_osd_req_encode_common(or, OSD_ACT_READ, obj, offset, bio->bi_size);
+	_osd_req_encode_common(or, OSD_ACT_READ, obj, offset, len);
 	WARN_ON(or->in.bio || or->in.total_bytes);
-	bio->bi_rw &= ~(1 << BIO_RW);
+	WARN_ON(1 == bio_rw_flagged(bio, BIO_RW));
 	or->in.bio = bio;
-	or->in.total_bytes = bio->bi_size;
+	or->in.total_bytes = len;
 }
 EXPORT_SYMBOL(osd_req_read);
 
+int osd_req_read_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
+{
+	struct request_queue *req_q = osd_request_queue(or->osd_dev);
+	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
+
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	osd_req_read(or, obj, offset, bio, len);
+	return 0;
+}
+EXPORT_SYMBOL(osd_req_read_kern);
+
 void osd_req_get_attributes(struct osd_request *or,
 	const struct osd_obj_id *obj)
 {
@@ -889,26 +921,6 @@
 }
 EXPORT_SYMBOL(osd_req_add_set_attr_list);
 
-static int _append_map_kern(struct request *req,
-	void *buff, unsigned len, gfp_t flags)
-{
-	struct bio *bio;
-	int ret;
-
-	bio = bio_map_kern(req->q, buff, len, flags);
-	if (IS_ERR(bio)) {
-		OSD_ERR("Failed bio_map_kern(%p, %d) => %ld\n", buff, len,
-			PTR_ERR(bio));
-		return PTR_ERR(bio);
-	}
-	ret = blk_rq_append_bio(req->q, req, bio);
-	if (ret) {
-		OSD_ERR("Failed blk_rq_append_bio(%p) => %d\n", bio, ret);
-		bio_put(bio);
-	}
-	return ret;
-}
-
 static int _req_append_segment(struct osd_request *or,
 	unsigned padding, struct _osd_req_data_segment *seg,
 	struct _osd_req_data_segment *last_seg, struct _osd_io_info *io)
@@ -924,14 +936,14 @@
 		else
 			pad_buff = io->pad_buff;
 
-		ret = _append_map_kern(io->req, pad_buff, padding,
+		ret = blk_rq_map_kern(io->req->q, io->req, pad_buff, padding,
 				       or->alloc_flags);
 		if (ret)
 			return ret;
 		io->total_bytes += padding;
 	}
 
-	ret = _append_map_kern(io->req, seg->buff, seg->total_bytes,
+	ret = blk_rq_map_kern(io->req->q, io->req, seg->buff, seg->total_bytes,
 			       or->alloc_flags);
 	if (ret)
 		return ret;
@@ -1233,7 +1245,7 @@
 }
 
 static int _osd_req_finalize_data_integrity(struct osd_request *or,
-	bool has_in, bool has_out, const u8 *cap_key)
+	bool has_in, bool has_out, u64 out_data_bytes, const u8 *cap_key)
 {
 	struct osd_security_parameters *sec_parms = _osd_req_sec_params(or);
 	int ret;
@@ -1248,8 +1260,7 @@
 		};
 		unsigned pad;
 
-		or->out_data_integ.data_bytes = cpu_to_be64(
-			or->out.bio ? or->out.bio->bi_size : 0);
+		or->out_data_integ.data_bytes = cpu_to_be64(out_data_bytes);
 		or->out_data_integ.set_attributes_bytes = cpu_to_be64(
 			or->set_attr.total_bytes);
 		or->out_data_integ.get_attributes_bytes = cpu_to_be64(
@@ -1293,6 +1304,21 @@
 /*
  * osd_finalize_request and helpers
  */
+static struct request *_make_request(struct request_queue *q, bool has_write,
+			      struct _osd_io_info *oii, gfp_t flags)
+{
+	if (oii->bio)
+		return blk_make_request(q, oii->bio, flags);
+	else {
+		struct request *req;
+
+		req = blk_get_request(q, has_write ? WRITE : READ, flags);
+		if (unlikely(!req))
+			return ERR_PTR(-ENOMEM);
+
+		return req;
+	}
+}
 
 static int _init_blk_request(struct osd_request *or,
 	bool has_in, bool has_out)
@@ -1301,14 +1327,18 @@
 	struct scsi_device *scsi_device = or->osd_dev->scsi_device;
 	struct request_queue *q = scsi_device->request_queue;
 	struct request *req;
-	int ret = -ENOMEM;
+	int ret;
 
-	req = blk_get_request(q, has_out, flags);
-	if (!req)
+	req = _make_request(q, has_out, has_out ? &or->out : &or->in, flags);
+	if (IS_ERR(req)) {
+		ret = PTR_ERR(req);
 		goto out;
+	}
 
 	or->request = req;
 	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	req->cmd_flags |= REQ_QUIET;
+
 	req->timeout = or->timeout;
 	req->retries = or->retries;
 	req->sense = or->sense;
@@ -1318,9 +1348,10 @@
 		or->out.req = req;
 		if (has_in) {
 			/* allocate bidi request */
-			req = blk_get_request(q, READ, flags);
-			if (!req) {
+			req = _make_request(q, false, &or->in, flags);
+			if (IS_ERR(req)) {
 				OSD_DEBUG("blk_get_request for bidi failed\n");
+				ret = PTR_ERR(req);
 				goto out;
 			}
 			req->cmd_type = REQ_TYPE_BLOCK_PC;
@@ -1341,6 +1372,7 @@
 {
 	struct osd_cdb_head *cdbh = osd_cdb_head(&or->cdb);
 	bool has_in, has_out;
+	u64 out_data_bytes = or->out.total_bytes;
 	int ret;
 
 	if (options & OSD_REQ_FUA)
@@ -1364,26 +1396,6 @@
 		return ret;
 	}
 
-	if (or->out.bio) {
-		ret = blk_rq_append_bio(or->request->q, or->out.req,
-					or->out.bio);
-		if (ret) {
-			OSD_DEBUG("blk_rq_append_bio out failed\n");
-			return ret;
-		}
-		OSD_DEBUG("out bytes=%llu (bytes_req=%u)\n",
-			_LLU(or->out.total_bytes), or->out.req->data_len);
-	}
-	if (or->in.bio) {
-		ret = blk_rq_append_bio(or->request->q, or->in.req, or->in.bio);
-		if (ret) {
-			OSD_DEBUG("blk_rq_append_bio in failed\n");
-			return ret;
-		}
-		OSD_DEBUG("in bytes=%llu (bytes_req=%u)\n",
-			_LLU(or->in.total_bytes), or->in.req->data_len);
-	}
-
 	or->out.pad_buff = sg_out_pad_buffer;
 	or->in.pad_buff = sg_in_pad_buffer;
 
@@ -1410,7 +1422,8 @@
 		}
 	}
 
-	ret = _osd_req_finalize_data_integrity(or, has_in, has_out, cap_key);
+	ret = _osd_req_finalize_data_integrity(or, has_in, has_out,
+					       out_data_bytes, cap_key);
 	if (ret)
 		return ret;
 
diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c
index 22b59e1..0bdef33 100644
--- a/drivers/scsi/osd/osd_uld.c
+++ b/drivers/scsi/osd/osd_uld.c
@@ -49,6 +49,7 @@
 #include <linux/device.h>
 #include <linux/idr.h>
 #include <linux/major.h>
+#include <linux/file.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_driver.h>
@@ -175,10 +176,9 @@
 
 struct osd_dev *osduld_path_lookup(const char *name)
 {
-	struct path path;
-	struct inode *inode;
-	struct cdev *cdev;
-	struct osd_uld_device *uninitialized_var(oud);
+	struct osd_uld_device *oud;
+	struct osd_dev *od;
+	struct file *file;
 	int error;
 
 	if (!name || !*name) {
@@ -186,52 +186,46 @@
 		return ERR_PTR(-EINVAL);
 	}
 
-	error = kern_path(name, LOOKUP_FOLLOW, &path);
-	if (error) {
-		OSD_ERR("path_lookup of %s failed=>%d\n", name, error);
-		return ERR_PTR(error);
+	od = kzalloc(sizeof(*od), GFP_KERNEL);
+	if (!od)
+		return ERR_PTR(-ENOMEM);
+
+	file = filp_open(name, O_RDWR, 0);
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto free_od;
 	}
 
-	inode = path.dentry->d_inode;
-	error = -EINVAL; /* Not the right device e.g osd_uld_device */
-	if (!S_ISCHR(inode->i_mode)) {
-		OSD_DEBUG("!S_ISCHR()\n");
-		goto out;
+	if (file->f_op != &osd_fops){
+		error = -EINVAL;
+		goto close_file;
 	}
 
-	cdev = inode->i_cdev;
-	if (!cdev) {
-		OSD_ERR("Before mounting an OSD Based filesystem\n");
-		OSD_ERR("  user-mode must open+close the %s device\n", name);
-		OSD_ERR("  Example: bash: echo < %s\n", name);
-		goto out;
-	}
+	oud = file->private_data;
 
-	/* The Magic wand. Is it our char-dev */
-	/* TODO: Support sg devices */
-	if (cdev->owner != THIS_MODULE) {
-		OSD_ERR("Error mounting %s - is not an OSD device\n", name);
-		goto out;
-	}
+	*od = oud->od;
+	od->file = file;
 
-	oud = container_of(cdev, struct osd_uld_device, cdev);
+	return od;
 
-	__uld_get(oud);
-	error = 0;
-
-out:
-	path_put(&path);
-	return error ? ERR_PTR(error) : &oud->od;
+close_file:
+	fput(file);
+free_od:
+	kfree(od);
+	return ERR_PTR(error);
 }
 EXPORT_SYMBOL(osduld_path_lookup);
 
 void osduld_put_device(struct osd_dev *od)
 {
-	if (od) {
-		struct osd_uld_device *oud = container_of(od,
-						struct osd_uld_device, od);
 
-		__uld_put(oud);
+	if (od && !IS_ERR(od)) {
+		struct osd_uld_device *oud = od->file->private_data;
+
+		BUG_ON(od->scsi_device != oud->od.scsi_device);
+
+		fput(od->file);
+		kfree(od);
 	}
 }
 EXPORT_SYMBOL(osduld_put_device);
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 5defe5e..8371d91 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -17,9 +17,12 @@
 * General Public License for more details.
 *
 ******************************************************************************/
-#define QLA1280_VERSION      "3.26"
+#define QLA1280_VERSION      "3.27"
 /*****************************************************************************
     Revision History:
+    Rev  3.27, February 10, 2009, Michael Reed
+	- General code cleanup.
+	- Improve error recovery.
     Rev  3.26, January 16, 2006 Jes Sorensen
 	- Ditch all < 2.6 support
     Rev  3.25.1, February 10, 2005 Christoph Hellwig
@@ -435,7 +438,6 @@
 				   uint8_t, uint16_t *);
 static int qla1280_bus_reset(struct scsi_qla_host *, int);
 static int qla1280_device_reset(struct scsi_qla_host *, int, int);
-static int qla1280_abort_device(struct scsi_qla_host *, int, int, int);
 static int qla1280_abort_command(struct scsi_qla_host *, struct srb *, int);
 static int qla1280_abort_isp(struct scsi_qla_host *);
 #ifdef QLA_64BIT_PTR
@@ -698,7 +700,7 @@
 }
 
 /**************************************************************************
- *   qla1200_queuecommand
+ *   qla1280_queuecommand
  *     Queue a command to the controller.
  *
  * Note:
@@ -713,12 +715,14 @@
 {
 	struct Scsi_Host *host = cmd->device->host;
 	struct scsi_qla_host *ha = (struct scsi_qla_host *)host->hostdata;
-	struct srb *sp = (struct srb *)&cmd->SCp;
+	struct srb *sp = (struct srb *)CMD_SP(cmd);
 	int status;
 
 	cmd->scsi_done = fn;
 	sp->cmd = cmd;
 	sp->flags = 0;
+	sp->wait = NULL;
+	CMD_HANDLE(cmd) = (unsigned char *)NULL;
 
 	qla1280_print_scsi_cmd(5, cmd);
 
@@ -738,21 +742,11 @@
 
 enum action {
 	ABORT_COMMAND,
-	ABORT_DEVICE,
 	DEVICE_RESET,
 	BUS_RESET,
 	ADAPTER_RESET,
-	FAIL
 };
 
-/* timer action for error action processor */
-static void qla1280_error_wait_timeout(unsigned long __data)
-{
-	struct scsi_cmnd *cmd = (struct scsi_cmnd *)__data;
-	struct srb *sp = (struct srb *)CMD_SP(cmd);
-
-	complete(sp->wait);
-}
 
 static void qla1280_mailbox_timeout(unsigned long __data)
 {
@@ -767,8 +761,67 @@
 	complete(ha->mailbox_wait);
 }
 
+static int
+_qla1280_wait_for_single_command(struct scsi_qla_host *ha, struct srb *sp,
+				 struct completion *wait)
+{
+	int	status = FAILED;
+	struct scsi_cmnd *cmd = sp->cmd;
+
+	spin_unlock_irq(ha->host->host_lock);
+	wait_for_completion_timeout(wait, 4*HZ);
+	spin_lock_irq(ha->host->host_lock);
+	sp->wait = NULL;
+	if(CMD_HANDLE(cmd) == COMPLETED_HANDLE) {
+		status = SUCCESS;
+		(*cmd->scsi_done)(cmd);
+	}
+	return status;
+}
+
+static int
+qla1280_wait_for_single_command(struct scsi_qla_host *ha, struct srb *sp)
+{
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	sp->wait = &wait;
+	return _qla1280_wait_for_single_command(ha, sp, &wait);
+}
+
+static int
+qla1280_wait_for_pending_commands(struct scsi_qla_host *ha, int bus, int target)
+{
+	int		cnt;
+	int		status;
+	struct srb	*sp;
+	struct scsi_cmnd *cmd;
+
+	status = SUCCESS;
+
+	/*
+	 * Wait for all commands with the designated bus/target
+	 * to be completed by the firmware
+	 */
+	for (cnt = 0; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) {
+		sp = ha->outstanding_cmds[cnt];
+		if (sp) {
+			cmd = sp->cmd;
+
+			if (bus >= 0 && SCSI_BUS_32(cmd) != bus)
+				continue;
+			if (target >= 0 && SCSI_TCN_32(cmd) != target)
+				continue;
+
+			status = qla1280_wait_for_single_command(ha, sp);
+			if (status == FAILED)
+				break;
+		}
+	}
+	return status;
+}
+
 /**************************************************************************
- * qla1200_error_action
+ * qla1280_error_action
  *    The function will attempt to perform a specified error action and
  *    wait for the results (or time out).
  *
@@ -780,11 +833,6 @@
  * Returns:
  *      SUCCESS or FAILED
  *
- * Note:
- *      Resetting the bus always succeeds - is has to, otherwise the
- *      kernel will panic! Try a surgical technique - sending a BUS
- *      DEVICE RESET message - on the offending target before pulling
- *      the SCSI bus reset line.
  **************************************************************************/
 static int
 qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
@@ -792,13 +840,19 @@
 	struct scsi_qla_host *ha;
 	int bus, target, lun;
 	struct srb *sp;
-	uint16_t data;
-	unsigned char *handle;
-	int result, i;
+	int i, found;
+	int result=FAILED;
+	int wait_for_bus=-1;
+	int wait_for_target = -1;
 	DECLARE_COMPLETION_ONSTACK(wait);
-	struct timer_list timer;
+
+	ENTER("qla1280_error_action");
 
 	ha = (struct scsi_qla_host *)(CMD_HOST(cmd)->hostdata);
+	sp = (struct srb *)CMD_SP(cmd);
+	bus = SCSI_BUS_32(cmd);
+	target = SCSI_TCN_32(cmd);
+	lun = SCSI_LUN_32(cmd);
 
 	dprintk(4, "error_action %i, istatus 0x%04x\n", action,
 		RD_REG_WORD(&ha->iobase->istatus));
@@ -807,99 +861,47 @@
 		RD_REG_WORD(&ha->iobase->host_cmd),
 		RD_REG_WORD(&ha->iobase->ictrl), jiffies);
 
-	ENTER("qla1280_error_action");
 	if (qla1280_verbose)
 		printk(KERN_INFO "scsi(%li): Resetting Cmnd=0x%p, "
 		       "Handle=0x%p, action=0x%x\n",
 		       ha->host_no, cmd, CMD_HANDLE(cmd), action);
 
-	if (cmd == NULL) {
-		printk(KERN_WARNING "(scsi?:?:?:?) Reset called with NULL "
-		       "si_Cmnd pointer, failing.\n");
-		LEAVE("qla1280_error_action");
-		return FAILED;
-	}
-
-	ha = (struct scsi_qla_host *)cmd->device->host->hostdata;
-	sp = (struct srb *)CMD_SP(cmd);
-	handle = CMD_HANDLE(cmd);
-
-	/* Check for pending interrupts. */
-	data = qla1280_debounce_register(&ha->iobase->istatus);
 	/*
-	 * The io_request_lock is held when the reset handler is called, hence
-	 * the interrupt handler cannot be running in parallel as it also
-	 * grabs the lock. /Jes
+	 * Check to see if we have the command in the outstanding_cmds[]
+	 * array.  If not then it must have completed before this error
+	 * action was initiated.  If the error_action isn't ABORT_COMMAND
+	 * then the driver must proceed with the requested action.
 	 */
-	if (data & RISC_INT)
-		qla1280_isr(ha, &ha->done_q);
-
-	/*
-	 * Determine the suggested action that the mid-level driver wants
-	 * us to perform.
-	 */
-	if (handle == (unsigned char *)INVALID_HANDLE || handle == NULL) {
-		if(action == ABORT_COMMAND) {
-			/* we never got this command */
-			printk(KERN_INFO "qla1280: Aborting a NULL handle\n");
-			return SUCCESS;	/* no action - we don't have command */
-		}
-	} else {
-		sp->wait = &wait;
-	}
-
-	bus = SCSI_BUS_32(cmd);
-	target = SCSI_TCN_32(cmd);
-	lun = SCSI_LUN_32(cmd);
-
-	/* Overloading result.  Here it means the success or fail of the
-	 * *issue* of the action.  When we return from the routine, it must
-	 * mean the actual success or fail of the action */
-	result = FAILED;
-	switch (action) {
-	case FAIL:
-		break;
-
-	case ABORT_COMMAND:
-		if ((sp->flags & SRB_ABORT_PENDING)) {
-			printk(KERN_WARNING
-			       "scsi(): Command has a pending abort "
-			       "message - ABORT_PENDING.\n");
-			/* This should technically be impossible since we
-			 * now wait for abort completion */
+	found = -1;
+	for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) {
+		if (sp == ha->outstanding_cmds[i]) {
+			found = i;
+			sp->wait = &wait; /* we'll wait for it to complete */
 			break;
 		}
+	}
 
-		for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) {
-			if (sp == ha->outstanding_cmds[i]) {
-				dprintk(1, "qla1280: RISC aborting command\n");
-				if (qla1280_abort_command(ha, sp, i) == 0)
-					result = SUCCESS;
-				else {
-					/*
-					 * Since we don't know what might
-					 * have happend to the command, it
-					 * is unsafe to remove it from the
-					 * device's queue at this point.
-					 * Wait and let the escalation
-					 * process take care of it.
-					 */
-					printk(KERN_WARNING
-					       "scsi(%li:%i:%i:%i): Unable"
-					       " to abort command!\n",
-					       ha->host_no, bus, target, lun);
-				}
-			}
-		}
-		break;
-
-	case ABORT_DEVICE:
-		if (qla1280_verbose)
+	if (found < 0) {	/* driver doesn't have command */
+		result = SUCCESS;
+		if (qla1280_verbose) {
 			printk(KERN_INFO
-			       "scsi(%ld:%d:%d:%d): Queueing abort device "
-			       "command.\n", ha->host_no, bus, target, lun);
-		if (qla1280_abort_device(ha, bus, target, lun) == 0)
-			result = SUCCESS;
+			       "scsi(%ld:%d:%d:%d): specified command has "
+			       "already completed.\n", ha->host_no, bus,
+				target, lun);
+		}
+	}
+
+	switch (action) {
+
+	case ABORT_COMMAND:
+		dprintk(1, "qla1280: RISC aborting command\n");
+		/*
+		 * The abort might fail due to race when the host_lock
+		 * is released to issue the abort.  As such, we
+		 * don't bother to check the return status.
+		 */
+		if (found >= 0)
+			qla1280_abort_command(ha, sp, found);
 		break;
 
 	case DEVICE_RESET:
@@ -907,16 +909,21 @@
 			printk(KERN_INFO
 			       "scsi(%ld:%d:%d:%d): Queueing device reset "
 			       "command.\n", ha->host_no, bus, target, lun);
-		if (qla1280_device_reset(ha, bus, target) == 0)
-			result = SUCCESS;
+		if (qla1280_device_reset(ha, bus, target) == 0) {
+			/* issued device reset, set wait conditions */
+			wait_for_bus = bus;
+			wait_for_target = target;
+		}
 		break;
 
 	case BUS_RESET:
 		if (qla1280_verbose)
 			printk(KERN_INFO "qla1280(%ld:%d): Issued bus "
 			       "reset.\n", ha->host_no, bus);
-		if (qla1280_bus_reset(ha, bus) == 0)
-			result = SUCCESS;
+		if (qla1280_bus_reset(ha, bus) == 0) {
+			/* issued bus reset, set wait conditions */
+			wait_for_bus = bus;
+		}
 		break;
 
 	case ADAPTER_RESET:
@@ -929,55 +936,48 @@
 			       "continue automatically\n", ha->host_no);
 		}
 		ha->flags.reset_active = 1;
-		/*
-		 * We restarted all of the commands automatically, so the
-		 * mid-level code can expect completions momentitarily.
-		 */
-		if (qla1280_abort_isp(ha) == 0)
-			result = SUCCESS;
+
+		if (qla1280_abort_isp(ha) != 0) {	/* it's dead */
+			result = FAILED;
+		}
 
 		ha->flags.reset_active = 0;
 	}
 
-	if (!list_empty(&ha->done_q))
-		qla1280_done(ha);
+	/*
+	 * At this point, the host_lock has been released and retaken
+	 * by the issuance of the mailbox command.
+	 * Wait for the command passed in by the mid-layer if it
+	 * was found by the driver.  It might have been returned
+	 * between eh recovery steps, hence the check of the "found"
+	 * variable.
+	 */
 
-	/* If we didn't manage to issue the action, or we have no
-	 * command to wait for, exit here */
-	if (result == FAILED || handle == NULL ||
-	    handle == (unsigned char *)INVALID_HANDLE) {
-		/*
-		 * Clear completion queue to avoid qla1280_done() trying
-		 * to complete the command at a later stage after we
-		 * have exited the current context
-		 */
-		sp->wait = NULL;
-		goto leave;
+	if (found >= 0)
+		result = _qla1280_wait_for_single_command(ha, sp, &wait);
+
+	if (action == ABORT_COMMAND && result != SUCCESS) {
+		printk(KERN_WARNING
+		       "scsi(%li:%i:%i:%i): "
+		       "Unable to abort command!\n",
+		       ha->host_no, bus, target, lun);
 	}
 
-	/* set up a timer just in case we're really jammed */
-	init_timer(&timer);
-	timer.expires = jiffies + 4*HZ;
-	timer.data = (unsigned long)cmd;
-	timer.function = qla1280_error_wait_timeout;
-	add_timer(&timer);
-
-	/* wait for the action to complete (or the timer to expire) */
-	spin_unlock_irq(ha->host->host_lock);
-	wait_for_completion(&wait);
-	del_timer_sync(&timer);
-	spin_lock_irq(ha->host->host_lock);
-	sp->wait = NULL;
-
-	/* the only action we might get a fail for is abort */
-	if (action == ABORT_COMMAND) {
-		if(sp->flags & SRB_ABORTED)
-			result = SUCCESS;
-		else
-			result = FAILED;
+	/*
+	 * If the command passed in by the mid-layer has been
+	 * returned by the board, then wait for any additional
+	 * commands which are supposed to complete based upon
+	 * the error action.
+	 *
+	 * All commands are unconditionally returned during a
+	 * call to qla1280_abort_isp(), ADAPTER_RESET.  No need
+	 * to wait for them.
+	 */
+	if (result == SUCCESS && wait_for_bus >= 0) {
+		result = qla1280_wait_for_pending_commands(ha,
+					wait_for_bus, wait_for_target);
 	}
 
- leave:
 	dprintk(1, "RESET returning %d\n", result);
 
 	LEAVE("qla1280_error_action");
@@ -1280,13 +1280,12 @@
 		switch ((CMD_RESULT(cmd) >> 16)) {
 		case DID_RESET:
 			/* Issue marker command. */
-			qla1280_marker(ha, bus, target, 0, MK_SYNC_ID);
+			if (!ha->flags.abort_isp_active)
+				qla1280_marker(ha, bus, target, 0, MK_SYNC_ID);
 			break;
 		case DID_ABORT:
 			sp->flags &= ~SRB_ABORT_PENDING;
 			sp->flags |= SRB_ABORTED;
-			if (sp->flags & SRB_TIMEOUT)
-				CMD_RESULT(sp->cmd) = DID_TIME_OUT << 16;
 			break;
 		default:
 			break;
@@ -1296,12 +1295,11 @@
 		scsi_dma_unmap(cmd);
 
 		/* Call the mid-level driver interrupt handler */
-		CMD_HANDLE(sp->cmd) = (unsigned char *)INVALID_HANDLE;
 		ha->actthreads--;
 
-		(*(cmd)->scsi_done)(cmd);
-
-		if(sp->wait != NULL)
+		if (sp->wait == NULL)
+			(*(cmd)->scsi_done)(cmd);
+		else
 			complete(sp->wait);
 	}
 	LEAVE("qla1280_done");
@@ -2417,9 +2415,6 @@
 qla1280_mailbox_command(struct scsi_qla_host *ha, uint8_t mr, uint16_t *mb)
 {
 	struct device_reg __iomem *reg = ha->iobase;
-#if 0
-	LIST_HEAD(done_q);
-#endif
 	int status = 0;
 	int cnt;
 	uint16_t *optr, *iptr;
@@ -2493,19 +2488,9 @@
 	mr = MAILBOX_REGISTER_COUNT;
 	memcpy(optr, iptr, MAILBOX_REGISTER_COUNT * sizeof(uint16_t));
 
-#if 0
-	/* Go check for any response interrupts pending. */
-	qla1280_isr(ha, &done_q);
-#endif
-
 	if (ha->flags.reset_marker)
 		qla1280_rst_aen(ha);
 
-#if 0
-	if (!list_empty(&done_q))
-		qla1280_done(ha, &done_q);
-#endif
-
 	if (status)
 		dprintk(2, "qla1280_mailbox_command: **** FAILED, mailbox0 = "
 			"0x%x ****\n", mb[0]);
@@ -2641,41 +2626,6 @@
 }
 
 /*
- * qla1280_abort_device
- *      Issue an abort message to the device
- *
- * Input:
- *      ha     = adapter block pointer.
- *      bus    = SCSI BUS.
- *      target = SCSI ID.
- *      lun    = SCSI LUN.
- *
- * Returns:
- *      0 = success
- */
-static int
-qla1280_abort_device(struct scsi_qla_host *ha, int bus, int target, int lun)
-{
-	uint16_t mb[MAILBOX_REGISTER_COUNT];
-	int status;
-
-	ENTER("qla1280_abort_device");
-
-	mb[0] = MBC_ABORT_DEVICE;
-	mb[1] = (bus ? target | BIT_7 : target) << 8 | lun;
-	status = qla1280_mailbox_command(ha, BIT_1 | BIT_0, &mb[0]);
-
-	/* Issue marker command. */
-	qla1280_marker(ha, bus, target, lun, MK_SYNC_ID_LUN);
-
-	if (status)
-		dprintk(2, "qla1280_abort_device: **** FAILED ****\n");
-
-	LEAVE("qla1280_abort_device");
-	return status;
-}
-
-/*
  * qla1280_abort_command
  *      Abort command aborts a specified IOCB.
  *
@@ -2833,7 +2783,7 @@
 
 	/* If room for request in request ring. */
 	if ((req_cnt + 2) >= ha->req_q_cnt) {
-		status = 1;
+		status = SCSI_MLQUEUE_HOST_BUSY;
 		dprintk(2, "qla1280_start_scsi: in-ptr=0x%x  req_q_cnt="
 			"0x%xreq_cnt=0x%x", ha->req_ring_index, ha->req_q_cnt,
 			req_cnt);
@@ -2845,7 +2795,7 @@
 		     ha->outstanding_cmds[cnt] != NULL; cnt++);
 
 	if (cnt >= MAX_OUTSTANDING_COMMANDS) {
-		status = 1;
+		status = SCSI_MLQUEUE_HOST_BUSY;
 		dprintk(2, "qla1280_start_scsi: NO ROOM IN "
 			"OUTSTANDING ARRAY, req_q_cnt=0x%x", ha->req_q_cnt);
 		goto out;
@@ -3108,7 +3058,7 @@
 		ha->req_q_cnt, seg_cnt);
 	/* If room for request in request ring. */
 	if ((req_cnt + 2) >= ha->req_q_cnt) {
-		status = 1;
+		status = SCSI_MLQUEUE_HOST_BUSY;
 		dprintk(2, "qla1280_32bit_start_scsi: in-ptr=0x%x, "
 			"req_q_cnt=0x%x, req_cnt=0x%x", ha->req_ring_index,
 			ha->req_q_cnt, req_cnt);
@@ -3120,7 +3070,7 @@
 		     (ha->outstanding_cmds[cnt] != 0); cnt++) ;
 
 	if (cnt >= MAX_OUTSTANDING_COMMANDS) {
-		status = 1;
+		status = SCSI_MLQUEUE_HOST_BUSY;
 		dprintk(2, "qla1280_32bit_start_scsi: NO ROOM IN OUTSTANDING "
 			"ARRAY, req_q_cnt=0x%x\n", ha->req_q_cnt);
 		goto out;
@@ -3487,6 +3437,7 @@
 
 					/* Save ISP completion status */
 					CMD_RESULT(sp->cmd) = 0;
+					CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE;
 
 					/* Place block on done queue */
 					list_add_tail(&sp->list, done_q);
@@ -3495,7 +3446,7 @@
 					 * If we get here we have a real problem!
 					 */
 					printk(KERN_WARNING
-					       "qla1280: ISP invalid handle");
+					       "qla1280: ISP invalid handle\n");
 				}
 			}
 			break;
@@ -3753,6 +3704,8 @@
 		}
 	}
 
+	CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE;
+
 	/* Place command on done queue. */
 	list_add_tail(&sp->list, done_q);
  out:
@@ -3808,6 +3761,8 @@
 			CMD_RESULT(sp->cmd) = DID_ERROR << 16;
 		}
 
+		CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE;
+
 		/* Place command on done queue. */
 		list_add_tail(&sp->list, done_q);
 	}
@@ -3858,19 +3813,16 @@
 		struct scsi_cmnd *cmd;
 		sp = ha->outstanding_cmds[cnt];
 		if (sp) {
-
 			cmd = sp->cmd;
 			CMD_RESULT(cmd) = DID_RESET << 16;
-
-			sp->cmd = NULL;
+			CMD_HANDLE(cmd) = COMPLETED_HANDLE;
 			ha->outstanding_cmds[cnt] = NULL;
-
-			(*cmd->scsi_done)(cmd);
-
-			sp->flags = 0;
+			list_add_tail(&sp->list, &ha->done_q);
 		}
 	}
 
+	qla1280_done(ha);
+
 	status = qla1280_load_firmware(ha);
 	if (status)
 		goto out;
@@ -3955,13 +3907,6 @@
 
 		if (scsi_control == SCSI_PHASE_INVALID) {
 			ha->bus_settings[bus].scsi_bus_dead = 1;
-#if 0
-			CMD_RESULT(cp) = DID_NO_CONNECT << 16;
-			CMD_HANDLE(cp) = INVALID_HANDLE;
-			/* ha->actthreads--; */
-
-			(*(cp)->scsi_done)(cp);
-#endif
 			return 1;	/* bus is dead */
 		} else {
 			ha->bus_settings[bus].scsi_bus_dead = 0;
diff --git a/drivers/scsi/qla1280.h b/drivers/scsi/qla1280.h
index d7c44b8..834884b 100644
--- a/drivers/scsi/qla1280.h
+++ b/drivers/scsi/qla1280.h
@@ -88,7 +88,8 @@
 
 /* Maximum outstanding commands in ISP queues */
 #define MAX_OUTSTANDING_COMMANDS	512
-#define INVALID_HANDLE			(MAX_OUTSTANDING_COMMANDS + 2)
+#define COMPLETED_HANDLE		((unsigned char *) \
+					(MAX_OUTSTANDING_COMMANDS + 2))
 
 /* ISP request and response entry counts (37-65535) */
 #define REQUEST_ENTRY_CNT		255 /* Number of request entries. */
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index b09993a..0f87962 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -97,7 +97,7 @@
 		return 0;
 
 	if (IS_NOCACHE_VPD_TYPE(ha))
-		ha->isp_ops->read_optrom(vha, ha->vpd, ha->flt_region_nvram << 2,
+		ha->isp_ops->read_optrom(vha, ha->nvram, ha->flt_region_nvram << 2,
 		    ha->nvram_size);
 	return memory_read_from_buffer(buf, count, &off, ha->nvram,
 					ha->nvram_size);
@@ -692,6 +692,109 @@
 	.read = qla2x00_sysfs_read_edc_status,
 };
 
+static ssize_t
+qla2x00_sysfs_read_xgmac_stats(struct kobject *kobj,
+		       struct bin_attribute *bin_attr,
+		       char *buf, loff_t off, size_t count)
+{
+	struct scsi_qla_host *vha = shost_priv(dev_to_shost(container_of(kobj,
+	    struct device, kobj)));
+	struct qla_hw_data *ha = vha->hw;
+	int rval;
+	uint16_t actual_size;
+
+	if (!capable(CAP_SYS_ADMIN) || off != 0 || count > XGMAC_DATA_SIZE)
+		return 0;
+
+	if (ha->xgmac_data)
+		goto do_read;
+
+	ha->xgmac_data = dma_alloc_coherent(&ha->pdev->dev, XGMAC_DATA_SIZE,
+	    &ha->xgmac_data_dma, GFP_KERNEL);
+	if (!ha->xgmac_data) {
+		qla_printk(KERN_WARNING, ha,
+		    "Unable to allocate memory for XGMAC read-data.\n");
+		return 0;
+	}
+
+do_read:
+	actual_size = 0;
+	memset(ha->xgmac_data, 0, XGMAC_DATA_SIZE);
+
+	rval = qla2x00_get_xgmac_stats(vha, ha->xgmac_data_dma,
+	    XGMAC_DATA_SIZE, &actual_size);
+	if (rval != QLA_SUCCESS) {
+		qla_printk(KERN_WARNING, ha,
+		    "Unable to read XGMAC data (%x).\n", rval);
+		count = 0;
+	}
+
+	count = actual_size > count ? count: actual_size;
+	memcpy(buf, ha->xgmac_data, count);
+
+	return count;
+}
+
+static struct bin_attribute sysfs_xgmac_stats_attr = {
+	.attr = {
+		.name = "xgmac_stats",
+		.mode = S_IRUSR,
+	},
+	.size = 0,
+	.read = qla2x00_sysfs_read_xgmac_stats,
+};
+
+static ssize_t
+qla2x00_sysfs_read_dcbx_tlv(struct kobject *kobj,
+		       struct bin_attribute *bin_attr,
+		       char *buf, loff_t off, size_t count)
+{
+	struct scsi_qla_host *vha = shost_priv(dev_to_shost(container_of(kobj,
+	    struct device, kobj)));
+	struct qla_hw_data *ha = vha->hw;
+	int rval;
+	uint16_t actual_size;
+
+	if (!capable(CAP_SYS_ADMIN) || off != 0 || count > DCBX_TLV_DATA_SIZE)
+		return 0;
+
+	if (ha->dcbx_tlv)
+		goto do_read;
+
+	ha->dcbx_tlv = dma_alloc_coherent(&ha->pdev->dev, DCBX_TLV_DATA_SIZE,
+	    &ha->dcbx_tlv_dma, GFP_KERNEL);
+	if (!ha->dcbx_tlv) {
+		qla_printk(KERN_WARNING, ha,
+		    "Unable to allocate memory for DCBX TLV read-data.\n");
+		return 0;
+	}
+
+do_read:
+	actual_size = 0;
+	memset(ha->dcbx_tlv, 0, DCBX_TLV_DATA_SIZE);
+
+	rval = qla2x00_get_dcbx_params(vha, ha->dcbx_tlv_dma,
+	    DCBX_TLV_DATA_SIZE);
+	if (rval != QLA_SUCCESS) {
+		qla_printk(KERN_WARNING, ha,
+		    "Unable to read DCBX TLV data (%x).\n", rval);
+		count = 0;
+	}
+
+	memcpy(buf, ha->dcbx_tlv, count);
+
+	return count;
+}
+
+static struct bin_attribute sysfs_dcbx_tlv_attr = {
+	.attr = {
+		.name = "dcbx_tlv",
+		.mode = S_IRUSR,
+	},
+	.size = 0,
+	.read = qla2x00_sysfs_read_dcbx_tlv,
+};
+
 static struct sysfs_entry {
 	char *name;
 	struct bin_attribute *attr;
@@ -706,6 +809,8 @@
 	{ "reset", &sysfs_reset_attr, },
 	{ "edc", &sysfs_edc_attr, 2 },
 	{ "edc_status", &sysfs_edc_status_attr, 2 },
+	{ "xgmac_stats", &sysfs_xgmac_stats_attr, 3 },
+	{ "dcbx_tlv", &sysfs_dcbx_tlv_attr, 3 },
 	{ NULL },
 };
 
@@ -721,6 +826,8 @@
 			continue;
 		if (iter->is4GBp_only == 2 && !IS_QLA25XX(vha->hw))
 			continue;
+		if (iter->is4GBp_only == 3 && !IS_QLA81XX(vha->hw))
+			continue;
 
 		ret = sysfs_create_bin_file(&host->shost_gendev.kobj,
 		    iter->attr);
@@ -743,6 +850,8 @@
 			continue;
 		if (iter->is4GBp_only == 2 && !IS_QLA25XX(ha))
 			continue;
+		if (iter->is4GBp_only == 3 && !IS_QLA81XX(ha))
+			continue;
 
 		sysfs_remove_bin_file(&host->shost_gendev.kobj,
 		    iter->attr);
@@ -1088,6 +1197,58 @@
 	return snprintf(buf, PAGE_SIZE, "0x%x\n", ha->fdt_block_size);
 }
 
+static ssize_t
+qla2x00_vlan_id_show(struct device *dev, struct device_attribute *attr,
+    char *buf)
+{
+	scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+
+	if (!IS_QLA81XX(vha->hw))
+		return snprintf(buf, PAGE_SIZE, "\n");
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", vha->fcoe_vlan_id);
+}
+
+static ssize_t
+qla2x00_vn_port_mac_address_show(struct device *dev,
+    struct device_attribute *attr, char *buf)
+{
+	scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+
+	if (!IS_QLA81XX(vha->hw))
+		return snprintf(buf, PAGE_SIZE, "\n");
+
+	return snprintf(buf, PAGE_SIZE, "%02x:%02x:%02x:%02x:%02x:%02x\n",
+	    vha->fcoe_vn_port_mac[5], vha->fcoe_vn_port_mac[4],
+	    vha->fcoe_vn_port_mac[3], vha->fcoe_vn_port_mac[2],
+	    vha->fcoe_vn_port_mac[1], vha->fcoe_vn_port_mac[0]);
+}
+
+static ssize_t
+qla2x00_fabric_param_show(struct device *dev, struct device_attribute *attr,
+    char *buf)
+{
+	scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", vha->hw->switch_cap);
+}
+
+static ssize_t
+qla2x00_fw_state_show(struct device *dev, struct device_attribute *attr,
+    char *buf)
+{
+	scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+	int rval;
+	uint16_t state[5];
+
+	rval = qla2x00_get_firmware_state(vha, state);
+	if (rval != QLA_SUCCESS)
+		memset(state, -1, sizeof(state));
+
+	return snprintf(buf, PAGE_SIZE, "0x%x 0x%x 0x%x 0x%x 0x%x\n", state[0],
+	    state[1], state[2], state[3], state[4]);
+}
+
 static DEVICE_ATTR(driver_version, S_IRUGO, qla2x00_drvr_version_show, NULL);
 static DEVICE_ATTR(fw_version, S_IRUGO, qla2x00_fw_version_show, NULL);
 static DEVICE_ATTR(serial_num, S_IRUGO, qla2x00_serial_num_show, NULL);
@@ -1116,6 +1277,11 @@
 static DEVICE_ATTR(phy_version, S_IRUGO, qla2x00_phy_version_show, NULL);
 static DEVICE_ATTR(flash_block_size, S_IRUGO, qla2x00_flash_block_size_show,
 		   NULL);
+static DEVICE_ATTR(vlan_id, S_IRUGO, qla2x00_vlan_id_show, NULL);
+static DEVICE_ATTR(vn_port_mac_address, S_IRUGO,
+		   qla2x00_vn_port_mac_address_show, NULL);
+static DEVICE_ATTR(fabric_param, S_IRUGO, qla2x00_fabric_param_show, NULL);
+static DEVICE_ATTR(fw_state, S_IRUGO, qla2x00_fw_state_show, NULL);
 
 struct device_attribute *qla2x00_host_attrs[] = {
 	&dev_attr_driver_version,
@@ -1138,6 +1304,10 @@
 	&dev_attr_mpi_version,
 	&dev_attr_phy_version,
 	&dev_attr_flash_block_size,
+	&dev_attr_vlan_id,
+	&dev_attr_vn_port_mac_address,
+	&dev_attr_fabric_param,
+	&dev_attr_fw_state,
 	NULL,
 };
 
@@ -1313,7 +1483,8 @@
 	 * At this point all fcport's software-states are cleared.  Perform any
 	 * final cleanup of firmware resources (PCBs and XCBs).
 	 */
-	if (fcport->loop_id != FC_NO_LOOP_ID)
+	if (fcport->loop_id != FC_NO_LOOP_ID &&
+	    !test_bit(UNLOADING, &fcport->vha->dpc_flags))
 		fcport->vha->hw->isp_ops->fabric_logout(fcport->vha,
 			fcport->loop_id, fcport->d_id.b.domain,
 			fcport->d_id.b.area, fcport->d_id.b.al_pa);
@@ -1437,11 +1608,13 @@
 qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
 {
 	int	ret = 0;
-	int	cnt = 0;
-	uint8_t	qos = QLA_DEFAULT_QUE_QOS;
+	uint8_t	qos = 0;
 	scsi_qla_host_t *base_vha = shost_priv(fc_vport->shost);
 	scsi_qla_host_t *vha = NULL;
 	struct qla_hw_data *ha = base_vha->hw;
+	uint16_t options = 0;
+	int	cnt;
+	struct req_que *req = ha->req_q_map[0];
 
 	ret = qla24xx_vport_create_req_sanity_check(fc_vport);
 	if (ret) {
@@ -1497,23 +1670,39 @@
 
 	qla24xx_vport_disable(fc_vport, disable);
 
-	/* Create a queue pair for the vport */
-	if (ha->mqenable) {
-		if (ha->npiv_info) {
-			for (; cnt < ha->nvram_npiv_size; cnt++) {
-				if (ha->npiv_info[cnt].port_name ==
-					vha->port_name &&
-					ha->npiv_info[cnt].node_name ==
-					vha->node_name) {
-					qos = ha->npiv_info[cnt].q_qos;
-					break;
-				}
-			}
+	if (ql2xmultique_tag) {
+		req = ha->req_q_map[1];
+		goto vport_queue;
+	} else if (ql2xmaxqueues == 1 || !ha->npiv_info)
+		goto vport_queue;
+	/* Create a request queue in QoS mode for the vport */
+	for (cnt = 0; cnt < ha->nvram_npiv_size; cnt++) {
+		if (memcmp(ha->npiv_info[cnt].port_name, vha->port_name, 8) == 0
+			&& memcmp(ha->npiv_info[cnt].node_name, vha->node_name,
+					8) == 0) {
+			qos = ha->npiv_info[cnt].q_qos;
+			break;
 		}
-		qla25xx_create_queues(vha, qos);
+	}
+	if (qos) {
+		ret = qla25xx_create_req_que(ha, options, vha->vp_idx, 0, 0,
+			qos);
+		if (!ret)
+			qla_printk(KERN_WARNING, ha,
+			"Can't create request queue for vp_idx:%d\n",
+			vha->vp_idx);
+		else {
+			DEBUG2(qla_printk(KERN_INFO, ha,
+			"Request Que:%d (QoS: %d) created for vp_idx:%d\n",
+			ret, qos, vha->vp_idx));
+			req = ha->req_q_map[ret];
+		}
 	}
 
+vport_queue:
+	vha->req = req;
 	return 0;
+
 vport_create_failed_2:
 	qla24xx_disable_vp(vha);
 	qla24xx_deallocate_vp_id(vha);
@@ -1554,8 +1743,8 @@
 		    vha->host_no, vha->vp_idx, vha));
         }
 
-	if (ha->mqenable) {
-		if (qla25xx_delete_queues(vha, 0) != QLA_SUCCESS)
+	if (vha->req->id && !ql2xmultique_tag) {
+		if (qla25xx_delete_req_que(vha, vha->req) != QLA_SUCCESS)
 			qla_printk(KERN_WARNING, ha,
 				"Queue delete failed.\n");
 	}
diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 34760f8..4a990f4 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -149,11 +149,9 @@
 	int rval = QLA_SUCCESS;
 	uint32_t cnt;
 
-	if (RD_REG_DWORD(&reg->hccr) & HCCRX_RISC_PAUSE)
-		return rval;
-
 	WRT_REG_DWORD(&reg->hccr, HCCRX_SET_RISC_PAUSE);
-	for (cnt = 30000; (RD_REG_DWORD(&reg->hccr) & HCCRX_RISC_PAUSE) == 0 &&
+	for (cnt = 30000;
+	    ((RD_REG_DWORD(&reg->host_status) & HSRX_RISC_PAUSED) == 0) &&
 	    rval == QLA_SUCCESS; cnt--) {
 		if (cnt)
 			udelay(100);
@@ -351,7 +349,7 @@
 qla25xx_copy_mq(struct qla_hw_data *ha, void *ptr, uint32_t **last_chain)
 {
 	uint32_t cnt, que_idx;
-	uint8_t req_cnt, rsp_cnt, que_cnt;
+	uint8_t que_cnt;
 	struct qla2xxx_mq_chain *mq = ptr;
 	struct device_reg_25xxmq __iomem *reg;
 
@@ -363,9 +361,8 @@
 	mq->type = __constant_htonl(DUMP_CHAIN_MQ);
 	mq->chain_size = __constant_htonl(sizeof(struct qla2xxx_mq_chain));
 
-	req_cnt = find_first_zero_bit(ha->req_qid_map, ha->max_queues);
-	rsp_cnt = find_first_zero_bit(ha->rsp_qid_map, ha->max_queues);
-	que_cnt = req_cnt > rsp_cnt ? req_cnt : rsp_cnt;
+	que_cnt = ha->max_req_queues > ha->max_rsp_queues ?
+		ha->max_req_queues : ha->max_rsp_queues;
 	mq->count = htonl(que_cnt);
 	for (cnt = 0; cnt < que_cnt; cnt++) {
 		reg = (struct device_reg_25xxmq *) ((void *)
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 714ee67..00aa48d 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -93,6 +93,7 @@
 #define LSD(x)	((uint32_t)((uint64_t)(x)))
 #define MSD(x)	((uint32_t)((((uint64_t)(x)) >> 16) >> 16))
 
+#define MAKE_HANDLE(x, y) ((uint32_t)((((uint32_t)(x)) << 16) | (uint32_t)(y)))
 
 /*
  * I/O register
@@ -179,6 +180,7 @@
 #define REQUEST_ENTRY_CNT_24XX		2048	/* Number of request entries. */
 #define RESPONSE_ENTRY_CNT_2100		64	/* Number of response entries.*/
 #define RESPONSE_ENTRY_CNT_2300		512	/* Number of response entries.*/
+#define RESPONSE_ENTRY_CNT_MQ		128	/* Number of response entries.*/
 
 struct req_que;
 
@@ -186,7 +188,6 @@
  * SCSI Request Block
  */
 typedef struct srb {
-	struct req_que *que;
 	struct fc_port *fcport;
 
 	struct scsi_cmnd *cmd;		/* Linux SCSI command pkt */
@@ -2008,7 +2009,7 @@
 #define VP_RET_CODE_NOT_FOUND		6
 
 struct qla_hw_data;
-
+struct rsp_que;
 /*
  * ISP operations
  */
@@ -2030,10 +2031,9 @@
 	void (*enable_intrs) (struct qla_hw_data *);
 	void (*disable_intrs) (struct qla_hw_data *);
 
-	int (*abort_command) (struct scsi_qla_host *, srb_t *,
-		struct req_que *);
-	int (*target_reset) (struct fc_port *, unsigned int);
-	int (*lun_reset) (struct fc_port *, unsigned int);
+	int (*abort_command) (srb_t *);
+	int (*target_reset) (struct fc_port *, unsigned int, int);
+	int (*lun_reset) (struct fc_port *, unsigned int, int);
 	int (*fabric_login) (struct scsi_qla_host *, uint16_t, uint8_t,
 		uint8_t, uint8_t, uint16_t *, uint8_t);
 	int (*fabric_logout) (struct scsi_qla_host *, uint16_t, uint8_t,
@@ -2079,7 +2079,6 @@
 #define QLA_PCI_MSIX_CONTROL	0xa2
 
 struct scsi_qla_host;
-struct rsp_que;
 
 struct qla_msix_entry {
 	int have_irq;
@@ -2140,7 +2139,6 @@
 #define MBC_INITIALIZE_MULTIQ 0x1f
 #define QLA_QUE_PAGE 0X1000
 #define QLA_MQ_SIZE 32
-#define QLA_MAX_HOST_QUES 16
 #define QLA_MAX_QUEUES 256
 #define ISP_QUE_REG(ha, id) \
 	((ha->mqenable) ? \
@@ -2170,6 +2168,8 @@
 	struct qla_hw_data *hw;
 	struct qla_msix_entry *msix;
 	struct req_que *req;
+	srb_t *status_srb; /* status continuation entry */
+	struct work_struct q_work;
 };
 
 /* Request queue data structure */
@@ -2222,6 +2222,8 @@
 		uint32_t	fce_enabled		:1;
 		uint32_t	fac_supported		:1;
 		uint32_t	chip_reset_done		:1;
+		uint32_t	port0			:1;
+		uint32_t	running_gold_fw		:1;
 	} flags;
 
 	/* This spinlock is used to protect "io transactions", you must
@@ -2246,7 +2248,8 @@
 	struct rsp_que **rsp_q_map;
 	unsigned long req_qid_map[(QLA_MAX_QUEUES / 8) / sizeof(unsigned long)];
 	unsigned long rsp_qid_map[(QLA_MAX_QUEUES / 8) / sizeof(unsigned long)];
-	uint16_t 	max_queues;
+	uint8_t 	max_req_queues;
+	uint8_t 	max_rsp_queues;
 	struct qla_npiv_entry *npiv_info;
 	uint16_t	nvram_npiv_size;
 
@@ -2255,6 +2258,9 @@
 #define FLOGI_MID_SUPPORT       BIT_10
 #define FLOGI_VSAN_SUPPORT      BIT_12
 #define FLOGI_SP_SUPPORT        BIT_13
+
+	uint8_t		port_no;		/* Physical port of adapter */
+
 	/* Timeout timers. */
 	uint8_t 	loop_down_abort_time;    /* port down timer */
 	atomic_t	loop_down_timer;         /* loop down timer */
@@ -2392,6 +2398,14 @@
 	dma_addr_t	edc_data_dma;
 	uint16_t	edc_data_len;
 
+#define XGMAC_DATA_SIZE	PAGE_SIZE
+	void		*xgmac_data;
+	dma_addr_t	xgmac_data_dma;
+
+#define DCBX_TLV_DATA_SIZE PAGE_SIZE
+	void		*dcbx_tlv;
+	dma_addr_t	dcbx_tlv_dma;
+
 	struct task_struct	*dpc_thread;
 	uint8_t dpc_active;                  /* DPC routine is active */
 
@@ -2510,6 +2524,7 @@
 	uint32_t        flt_region_vpd;
 	uint32_t        flt_region_nvram;
 	uint32_t        flt_region_npiv_conf;
+	uint32_t	flt_region_gold_fw;
 
 	/* Needed for BEACON */
 	uint16_t        beacon_blink_led;
@@ -2536,6 +2551,7 @@
 	struct qla_chip_state_84xx *cs84xx;
 	struct qla_statistics qla_stats;
 	struct isp_operations *isp_ops;
+	struct workqueue_struct *wq;
 };
 
 /*
@@ -2545,6 +2561,8 @@
 	struct list_head list;
 	struct list_head vp_fcports;	/* list of fcports */
 	struct list_head work_list;
+	spinlock_t work_lock;
+
 	/* Commonly used flags and state information. */
 	struct Scsi_Host *host;
 	unsigned long	host_no;
@@ -2591,8 +2609,6 @@
 #define SWITCH_FOUND		BIT_0
 #define DFLG_NO_CABLE		BIT_1
 
-	srb_t		*status_srb;	/* Status continuation entry. */
-
 	/* ISP configuration data. */
 	uint16_t	loop_id;		/* Host adapter loop id */
 
@@ -2618,6 +2634,11 @@
 	uint8_t		node_name[WWN_SIZE];
 	uint8_t		port_name[WWN_SIZE];
 	uint8_t		fabric_node_name[WWN_SIZE];
+
+	uint16_t	fcoe_vlan_id;
+	uint16_t	fcoe_fcf_idx;
+	uint8_t		fcoe_vn_port_mac[6];
+
 	uint32_t   	vp_abort_cnt;
 
 	struct fc_vport	*fc_vport;	/* holds fc_vport * for each vport */
@@ -2643,7 +2664,7 @@
 #define VP_ERR_FAB_LOGOUT	4
 #define VP_ERR_ADAP_NORESOURCES	5
 	struct qla_hw_data *hw;
-	int	req_ques[QLA_MAX_HOST_QUES];
+	struct req_que *req;
 } scsi_qla_host_t;
 
 /*
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index 96ccb96..dfde2dd 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -878,7 +878,6 @@
 					/* HCCR statuses. */
 #define HCCRX_HOST_INT		BIT_6	/* Host to RISC interrupt bit. */
 #define HCCRX_RISC_RESET	BIT_5	/* RISC Reset mode bit. */
-#define HCCRX_RISC_PAUSE	BIT_4	/* RISC Pause mode bit. */
 					/* HCCR commands. */
 					/* NOOP. */
 #define HCCRX_NOOP		0x00000000
@@ -1241,6 +1240,7 @@
 #define FLT_REG_HW_EVENT_1	0x1f
 #define FLT_REG_NPIV_CONF_0	0x29
 #define FLT_REG_NPIV_CONF_1	0x2a
+#define FLT_REG_GOLD_FW		0x2f
 
 struct qla_flt_region {
 	uint32_t code;
@@ -1405,6 +1405,8 @@
 #define MBC_IDC_ACK		0x101
 #define MBC_RESTART_MPI_FW	0x3d
 #define MBC_FLASH_ACCESS_CTRL	0x3e	/* Control flash access. */
+#define MBC_GET_XGMAC_STATS	0x7a
+#define MBC_GET_DCBX_PARAMS	0x51
 
 /* Flash access control option field bit definitions */
 #define FAC_OPT_FORCE_SEMAPHORE		BIT_15
@@ -1711,7 +1713,7 @@
 #define FA_VPD0_ADDR_81		0xD0000
 #define FA_VPD1_ADDR_81		0xD0400
 #define FA_NVRAM0_ADDR_81	0xD0080
-#define FA_NVRAM1_ADDR_81	0xD0480
+#define FA_NVRAM1_ADDR_81	0xD0180
 #define FA_FEATURE_ADDR_81	0xD4000
 #define FA_FLASH_DESCR_ADDR_81	0xD8000
 #define FA_FLASH_LAYOUT_ADDR_81	0xD8400
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 528913f..65b12d8 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -65,8 +65,11 @@
 extern int ql2xallocfwdump;
 extern int ql2xextended_error_logging;
 extern int ql2xqfullrampup;
+extern int ql2xqfulltracking;
 extern int ql2xiidmaenable;
 extern int ql2xmaxqueues;
+extern int ql2xmultique_tag;
+extern int ql2xfwloadbin;
 
 extern int qla2x00_loop_reset(scsi_qla_host_t *);
 extern void qla2x00_abort_all_cmds(scsi_qla_host_t *, int);
@@ -145,7 +148,7 @@
 extern int
 qla2x00_execute_fw(scsi_qla_host_t *, uint32_t);
 
-extern void
+extern int
 qla2x00_get_fw_version(scsi_qla_host_t *, uint16_t *, uint16_t *, uint16_t *,
     uint16_t *, uint32_t *, uint8_t *, uint32_t *, uint8_t *);
 
@@ -165,13 +168,13 @@
 qla2x00_issue_iocb(scsi_qla_host_t *, void *, dma_addr_t, size_t);
 
 extern int
-qla2x00_abort_command(scsi_qla_host_t *, srb_t *, struct req_que *);
+qla2x00_abort_command(srb_t *);
 
 extern int
-qla2x00_abort_target(struct fc_port *, unsigned int);
+qla2x00_abort_target(struct fc_port *, unsigned int, int);
 
 extern int
-qla2x00_lun_reset(struct fc_port *, unsigned int);
+qla2x00_lun_reset(struct fc_port *, unsigned int, int);
 
 extern int
 qla2x00_get_adapter_id(scsi_qla_host_t *, uint16_t *, uint8_t *, uint8_t *,
@@ -236,9 +239,11 @@
 qla24xx_get_isp_stats(scsi_qla_host_t *, struct link_statistics *,
     dma_addr_t);
 
-extern int qla24xx_abort_command(scsi_qla_host_t *, srb_t *, struct req_que *);
-extern int qla24xx_abort_target(struct fc_port *, unsigned int);
-extern int qla24xx_lun_reset(struct fc_port *, unsigned int);
+extern int qla24xx_abort_command(srb_t *);
+extern int
+qla24xx_abort_target(struct fc_port *, unsigned int, int);
+extern int
+qla24xx_lun_reset(struct fc_port *, unsigned int, int);
 
 extern int
 qla2x00_system_error(scsi_qla_host_t *);
@@ -288,6 +293,18 @@
 extern int
 qla81xx_fac_erase_sector(scsi_qla_host_t *, uint32_t, uint32_t);
 
+extern int
+qla2x00_get_xgmac_stats(scsi_qla_host_t *, dma_addr_t, uint16_t, uint16_t *);
+
+extern int
+qla2x00_get_dcbx_params(scsi_qla_host_t *, dma_addr_t, uint16_t);
+
+extern int
+qla2x00_read_ram_word(scsi_qla_host_t *, uint32_t, uint32_t *);
+
+extern int
+qla2x00_write_ram_word(scsi_qla_host_t *, uint32_t, uint32_t);
+
 /*
  * Global Function Prototypes in qla_isr.c source file.
  */
@@ -295,8 +312,8 @@
 extern irqreturn_t qla2300_intr_handler(int, void *);
 extern irqreturn_t qla24xx_intr_handler(int, void *);
 extern void qla2x00_process_response_queue(struct rsp_que *);
-extern void qla24xx_process_response_queue(struct rsp_que *);
-
+extern void
+qla24xx_process_response_queue(struct scsi_qla_host *, struct rsp_que *);
 extern int qla2x00_request_irqs(struct qla_hw_data *, struct rsp_que *);
 extern void qla2x00_free_irqs(scsi_qla_host_t *);
 
@@ -401,19 +418,21 @@
 extern int qla25xx_init_req_que(struct scsi_qla_host *, struct req_que *);
 extern int qla25xx_init_rsp_que(struct scsi_qla_host *, struct rsp_que *);
 extern int qla25xx_create_req_que(struct qla_hw_data *, uint16_t, uint8_t,
-	uint16_t, uint8_t, uint8_t);
+	uint16_t, int, uint8_t);
 extern int qla25xx_create_rsp_que(struct qla_hw_data *, uint16_t, uint8_t,
-	uint16_t);
+	uint16_t, int);
 extern int qla25xx_update_req_que(struct scsi_qla_host *, uint8_t, uint8_t);
 extern void qla2x00_init_response_q_entries(struct rsp_que *);
 extern int qla25xx_delete_req_que(struct scsi_qla_host *, struct req_que *);
 extern int qla25xx_delete_rsp_que(struct scsi_qla_host *, struct rsp_que *);
 extern int qla25xx_create_queues(struct scsi_qla_host *, uint8_t);
-extern int qla25xx_delete_queues(struct scsi_qla_host *, uint8_t);
+extern int qla25xx_delete_queues(struct scsi_qla_host *);
 extern uint16_t qla24xx_rd_req_reg(struct qla_hw_data *, uint16_t);
 extern uint16_t qla25xx_rd_req_reg(struct qla_hw_data *, uint16_t);
 extern void qla24xx_wrt_req_reg(struct qla_hw_data *, uint16_t, uint16_t);
 extern void qla25xx_wrt_req_reg(struct qla_hw_data *, uint16_t, uint16_t);
 extern void qla25xx_wrt_rsp_reg(struct qla_hw_data *, uint16_t, uint16_t);
 extern void qla24xx_wrt_rsp_reg(struct qla_hw_data *, uint16_t, uint16_t);
+extern struct scsi_qla_host * qla25xx_get_host(struct rsp_que *);
+
 #endif /* _QLA_GBL_H */
diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index 557f58d..917534b 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -1107,7 +1107,7 @@
 		return ret;
 
 	ha->isp_ops->fabric_login(vha, vha->mgmt_svr_loop_id, 0xff, 0xff, 0xfa,
-	    mb, BIT_1);
+	    mb, BIT_1|BIT_0);
 	if (mb[0] != MBS_COMMAND_COMPLETE) {
 		DEBUG2_13(printk("%s(%ld): Failed MANAGEMENT_SERVER login: "
 		    "loop_id=%x mb[0]=%x mb[1]=%x mb[2]=%x mb[6]=%x mb[7]=%x\n",
@@ -1879,6 +1879,9 @@
 			case BIT_13:
 				list[i].fp_speed = PORT_SPEED_4GB;
 				break;
+			case BIT_12:
+				list[i].fp_speed = PORT_SPEED_10GB;
+				break;
 			case BIT_11:
 				list[i].fp_speed = PORT_SPEED_8GB;
 				break;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index bd7dd84..2620261 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -634,7 +634,7 @@
 		goto chip_diag_failed;
 
 	DEBUG3(printk("scsi(%ld): Reset register cleared by chip reset\n",
-	    ha->host_no));
+	    vha->host_no));
 
 	/* Reset RISC processor. */
 	WRT_REG_WORD(&reg->hccr, HCCR_RESET_RISC);
@@ -655,7 +655,7 @@
 		goto chip_diag_failed;
 
 	/* Check product ID of chip */
-	DEBUG3(printk("scsi(%ld): Checking product ID of chip\n", ha->host_no));
+	DEBUG3(printk("scsi(%ld): Checking product ID of chip\n", vha->host_no));
 
 	mb[1] = RD_MAILBOX_REG(ha, reg, 1);
 	mb[2] = RD_MAILBOX_REG(ha, reg, 2);
@@ -730,9 +730,6 @@
 	struct qla_hw_data *ha = vha->hw;
 	struct req_que *req = ha->req_q_map[0];
 
-	/* Perform RISC reset. */
-	qla24xx_reset_risc(vha);
-
 	ha->fw_transfer_size = REQUEST_ENTRY_SIZE * req->length;
 
 	rval = qla2x00_mbx_reg_test(vha);
@@ -786,7 +783,6 @@
 		    sizeof(uint32_t);
 		if (ha->mqenable)
 			mq_size = sizeof(struct qla2xxx_mq_chain);
-
 		/* Allocate memory for Fibre Channel Event Buffer. */
 		if (!IS_QLA25XX(ha) && !IS_QLA81XX(ha))
 			goto try_eft;
@@ -850,8 +846,7 @@
 	rsp_q_size = rsp->length * sizeof(response_t);
 
 	dump_size = offsetof(struct qla2xxx_fw_dump, isp);
-	dump_size += fixed_size + mem_size + req_q_size + rsp_q_size +
-	    eft_size;
+	dump_size += fixed_size + mem_size + req_q_size + rsp_q_size + eft_size;
 	ha->chain_offset = dump_size;
 	dump_size += mq_size + fce_size;
 
@@ -891,6 +886,56 @@
 	    htonl(offsetof(struct qla2xxx_fw_dump, isp));
 }
 
+static int
+qla81xx_mpi_sync(scsi_qla_host_t *vha)
+{
+#define MPS_MASK	0xe0
+	int rval;
+	uint16_t dc;
+	uint32_t dw;
+	struct qla_hw_data *ha = vha->hw;
+
+	if (!IS_QLA81XX(vha->hw))
+		return QLA_SUCCESS;
+
+	rval = qla2x00_write_ram_word(vha, 0x7c00, 1);
+	if (rval != QLA_SUCCESS) {
+		DEBUG2(qla_printk(KERN_WARNING, ha,
+		    "Sync-MPI: Unable to acquire semaphore.\n"));
+		goto done;
+	}
+
+	pci_read_config_word(vha->hw->pdev, 0x54, &dc);
+	rval = qla2x00_read_ram_word(vha, 0x7a15, &dw);
+	if (rval != QLA_SUCCESS) {
+		DEBUG2(qla_printk(KERN_WARNING, ha,
+		    "Sync-MPI: Unable to read sync.\n"));
+		goto done_release;
+	}
+
+	dc &= MPS_MASK;
+	if (dc == (dw & MPS_MASK))
+		goto done_release;
+
+	dw &= ~MPS_MASK;
+	dw |= dc;
+	rval = qla2x00_write_ram_word(vha, 0x7a15, dw);
+	if (rval != QLA_SUCCESS) {
+		DEBUG2(qla_printk(KERN_WARNING, ha,
+		    "Sync-MPI: Unable to gain sync.\n"));
+	}
+
+done_release:
+	rval = qla2x00_write_ram_word(vha, 0x7c00, 0);
+	if (rval != QLA_SUCCESS) {
+		DEBUG2(qla_printk(KERN_WARNING, ha,
+		    "Sync-MPI: Unable to release semaphore.\n"));
+	}
+
+done:
+	return rval;
+}
+
 /**
  * qla2x00_setup_chip() - Load and start RISC firmware.
  * @ha: HA context
@@ -915,6 +960,8 @@
 		spin_unlock_irqrestore(&ha->hardware_lock, flags);
 	}
 
+	qla81xx_mpi_sync(vha);
+
 	/* Load firmware sequences */
 	rval = ha->isp_ops->load_risc(vha, &srisc_address);
 	if (rval == QLA_SUCCESS) {
@@ -931,13 +978,16 @@
 			/* Retrieve firmware information. */
 			if (rval == QLA_SUCCESS) {
 				fw_major_version = ha->fw_major_version;
-				qla2x00_get_fw_version(vha,
+				rval = qla2x00_get_fw_version(vha,
 				    &ha->fw_major_version,
 				    &ha->fw_minor_version,
 				    &ha->fw_subminor_version,
 				    &ha->fw_attributes, &ha->fw_memory_size,
 				    ha->mpi_version, &ha->mpi_capabilities,
 				    ha->phy_version);
+				if (rval != QLA_SUCCESS)
+					goto failed;
+
 				ha->flags.npiv_supported = 0;
 				if (IS_QLA2XXX_MIDTYPE(ha) &&
 					 (ha->fw_attributes & BIT_2)) {
@@ -989,7 +1039,7 @@
 			    ha->fw_subminor_version);
 		}
 	}
-
+failed:
 	if (rval) {
 		DEBUG2_3(printk("scsi(%ld): Setup chip **** FAILED ****.\n",
 		    vha->host_no));
@@ -1013,12 +1063,14 @@
 	uint16_t cnt;
 	response_t *pkt;
 
+	rsp->ring_ptr = rsp->ring;
+	rsp->ring_index    = 0;
+	rsp->status_srb = NULL;
 	pkt = rsp->ring_ptr;
 	for (cnt = 0; cnt < rsp->length; cnt++) {
 		pkt->signature = RESPONSE_PROCESSED;
 		pkt++;
 	}
-
 }
 
 /**
@@ -1176,7 +1228,7 @@
 		if (ha->flags.msix_enabled) {
 			msix = &ha->msix_entries[1];
 			DEBUG2_17(printk(KERN_INFO
-			"Reistering vector 0x%x for base que\n", msix->entry));
+			"Registering vector 0x%x for base que\n", msix->entry));
 			icb->msix = cpu_to_le16(msix->entry);
 		}
 		/* Use alternate PCI bus number */
@@ -1230,14 +1282,14 @@
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 
 	/* Clear outstanding commands array. */
-	for (que = 0; que < ha->max_queues; que++) {
+	for (que = 0; que < ha->max_req_queues; que++) {
 		req = ha->req_q_map[que];
 		if (!req)
 			continue;
-		for (cnt = 0; cnt < MAX_OUTSTANDING_COMMANDS; cnt++)
+		for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++)
 			req->outstanding_cmds[cnt] = NULL;
 
-		req->current_outstanding_cmd = 0;
+		req->current_outstanding_cmd = 1;
 
 		/* Initialize firmware. */
 		req->ring_ptr  = req->ring;
@@ -1245,13 +1297,10 @@
 		req->cnt      = req->length;
 	}
 
-	for (que = 0; que < ha->max_queues; que++) {
+	for (que = 0; que < ha->max_rsp_queues; que++) {
 		rsp = ha->rsp_q_map[que];
 		if (!rsp)
 			continue;
-		rsp->ring_ptr = rsp->ring;
-		rsp->ring_index    = 0;
-
 		/* Initialize response queue entries */
 		qla2x00_init_response_q_entries(rsp);
 	}
@@ -1307,7 +1356,7 @@
 	unsigned long	wtime, mtime, cs84xx_time;
 	uint16_t	min_wait;	/* Minimum wait time if loop is down */
 	uint16_t	wait_time;	/* Wait time if loop is coming ready */
-	uint16_t	state[3];
+	uint16_t	state[5];
 	struct qla_hw_data *ha = vha->hw;
 
 	rval = QLA_SUCCESS;
@@ -1406,8 +1455,9 @@
 		    vha->host_no, state[0], jiffies));
 	} while (1);
 
-	DEBUG(printk("scsi(%ld): fw_state=%x curr time=%lx.\n",
-	    vha->host_no, state[0], jiffies));
+	DEBUG(printk("scsi(%ld): fw_state=%x (%x, %x, %x, %x) curr time=%lx.\n",
+	    vha->host_no, state[0], state[1], state[2], state[3], state[4],
+	    jiffies));
 
 	if (rval) {
 		DEBUG2_3(printk("scsi(%ld): Firmware ready **** FAILED ****.\n",
@@ -1541,6 +1591,7 @@
 	char *st, *en;
 	uint16_t index;
 	struct qla_hw_data *ha = vha->hw;
+	int use_tbl = !IS_QLA25XX(ha) && !IS_QLA81XX(ha);
 
 	if (memcmp(model, BINZERO, len) != 0) {
 		strncpy(ha->model_number, model, len);
@@ -1553,14 +1604,16 @@
 		}
 
 		index = (ha->pdev->subsystem_device & 0xff);
-		if (ha->pdev->subsystem_vendor == PCI_VENDOR_ID_QLOGIC &&
+		if (use_tbl &&
+		    ha->pdev->subsystem_vendor == PCI_VENDOR_ID_QLOGIC &&
 		    index < QLA_MODEL_NAMES)
 			strncpy(ha->model_desc,
 			    qla2x00_model_name[index * 2 + 1],
 			    sizeof(ha->model_desc) - 1);
 	} else {
 		index = (ha->pdev->subsystem_device & 0xff);
-		if (ha->pdev->subsystem_vendor == PCI_VENDOR_ID_QLOGIC &&
+		if (use_tbl &&
+		    ha->pdev->subsystem_vendor == PCI_VENDOR_ID_QLOGIC &&
 		    index < QLA_MODEL_NAMES) {
 			strcpy(ha->model_number,
 			    qla2x00_model_name[index * 2]);
@@ -2061,8 +2114,10 @@
 	if (test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags)) {
 		if (test_bit(LOCAL_LOOP_UPDATE, &save_flags))
 			set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
-		if (test_bit(RSCN_UPDATE, &save_flags))
+		if (test_bit(RSCN_UPDATE, &save_flags)) {
 			set_bit(RSCN_UPDATE, &vha->dpc_flags);
+			vha->flags.rscn_queue_overflow = 1;
+		}
 	}
 
 	return (rval);
@@ -2110,7 +2165,7 @@
 		goto cleanup_allocation;
 
 	DEBUG3(printk("scsi(%ld): Entries in ID list (%d)\n",
-	    ha->host_no, entries));
+	    vha->host_no, entries));
 	DEBUG3(qla2x00_dump_buffer((uint8_t *)ha->gid_list,
 	    entries * sizeof(struct gid_list_info)));
 
@@ -2243,7 +2298,8 @@
 qla2x00_iidma_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
 {
 #define LS_UNKNOWN      2
-	static char *link_speeds[5] = { "1", "2", "?", "4", "8" };
+	static char *link_speeds[] = { "1", "2", "?", "4", "8", "10" };
+	char *link_speed;
 	int rval;
 	uint16_t mb[6];
 	struct qla_hw_data *ha = vha->hw;
@@ -2266,10 +2322,15 @@
 		    fcport->port_name[6], fcport->port_name[7], rval,
 		    fcport->fp_speed, mb[0], mb[1]));
 	} else {
+		link_speed = link_speeds[LS_UNKNOWN];
+		if (fcport->fp_speed < 5)
+			link_speed = link_speeds[fcport->fp_speed];
+		else if (fcport->fp_speed == 0x13)
+			link_speed = link_speeds[5];
 		DEBUG2(qla_printk(KERN_INFO, ha,
 		    "iIDMA adjusted to %s GB/s on "
 		    "%02x%02x%02x%02x%02x%02x%02x%02x.\n",
-		    link_speeds[fcport->fp_speed], fcport->port_name[0],
+		    link_speed, fcport->port_name[0],
 		    fcport->port_name[1], fcport->port_name[2],
 		    fcport->port_name[3], fcport->port_name[4],
 		    fcport->port_name[5], fcport->port_name[6],
@@ -3180,9 +3241,14 @@
 {
 	int rval = QLA_SUCCESS;
 	uint32_t wait_time;
-	struct qla_hw_data *ha = vha->hw;
-	struct req_que *req = ha->req_q_map[vha->req_ques[0]];
-	struct rsp_que *rsp = req->rsp;
+	struct req_que *req;
+	struct rsp_que *rsp;
+
+	if (ql2xmultique_tag)
+		req = vha->hw->req_q_map[0];
+	else
+		req = vha->req;
+	rsp = req->rsp;
 
 	atomic_set(&vha->loop_state, LOOP_UPDATE);
 	clear_bit(ISP_ABORT_RETRY, &vha->dpc_flags);
@@ -3448,7 +3514,7 @@
 	int ret = -1;
 	int i;
 
-	for (i = 1; i < ha->max_queues; i++) {
+	for (i = 1; i < ha->max_rsp_queues; i++) {
 		rsp = ha->rsp_q_map[i];
 		if (rsp) {
 			rsp->options &= ~BIT_0;
@@ -3462,6 +3528,8 @@
 					"%s Rsp que:%d inited\n", __func__,
 						rsp->id));
 		}
+	}
+	for (i = 1; i < ha->max_req_queues; i++) {
 		req = ha->req_q_map[i];
 		if (req) {
 		/* Clear outstanding commands array. */
@@ -3566,14 +3634,15 @@
 	nv = ha->nvram;
 
 	/* Determine NVRAM starting address. */
-	ha->nvram_size = sizeof(struct nvram_24xx);
-	ha->nvram_base = FA_NVRAM_FUNC0_ADDR;
-	ha->vpd_size = FA_NVRAM_VPD_SIZE;
-	ha->vpd_base = FA_NVRAM_VPD0_ADDR;
-	if (PCI_FUNC(ha->pdev->devfn)) {
+	if (ha->flags.port0) {
+		ha->nvram_base = FA_NVRAM_FUNC0_ADDR;
+		ha->vpd_base = FA_NVRAM_VPD0_ADDR;
+	} else {
 		ha->nvram_base = FA_NVRAM_FUNC1_ADDR;
 		ha->vpd_base = FA_NVRAM_VPD1_ADDR;
 	}
+	ha->nvram_size = sizeof(struct nvram_24xx);
+	ha->vpd_size = FA_NVRAM_VPD_SIZE;
 
 	/* Get VPD data into cache */
 	ha->vpd = ha->nvram + VPD_OFFSET;
@@ -3587,7 +3656,7 @@
 	for (cnt = 0, chksum = 0; cnt < ha->nvram_size >> 2; cnt++)
 		chksum += le32_to_cpu(*dptr++);
 
-	DEBUG5(printk("scsi(%ld): Contents of NVRAM\n", ha->host_no));
+	DEBUG5(printk("scsi(%ld): Contents of NVRAM\n", vha->host_no));
 	DEBUG5(qla2x00_dump_buffer((uint8_t *)nv, ha->nvram_size));
 
 	/* Bad NVRAM data, set defaults parameters. */
@@ -3612,7 +3681,7 @@
 		nv->exchange_count = __constant_cpu_to_le16(0);
 		nv->hard_address = __constant_cpu_to_le16(124);
 		nv->port_name[0] = 0x21;
-		nv->port_name[1] = 0x00 + PCI_FUNC(ha->pdev->devfn);
+		nv->port_name[1] = 0x00 + ha->port_no;
 		nv->port_name[2] = 0x00;
 		nv->port_name[3] = 0xe0;
 		nv->port_name[4] = 0x8b;
@@ -3798,11 +3867,11 @@
 }
 
 static int
-qla24xx_load_risc_flash(scsi_qla_host_t *vha, uint32_t *srisc_addr)
+qla24xx_load_risc_flash(scsi_qla_host_t *vha, uint32_t *srisc_addr,
+    uint32_t faddr)
 {
 	int	rval = QLA_SUCCESS;
 	int	segments, fragment;
-	uint32_t faddr;
 	uint32_t *dcode, dlen;
 	uint32_t risc_addr;
 	uint32_t risc_size;
@@ -3811,12 +3880,11 @@
 	struct req_que *req = ha->req_q_map[0];
 
 	qla_printk(KERN_INFO, ha,
-	    "FW: Loading from flash (%x)...\n", ha->flt_region_fw);
+	    "FW: Loading from flash (%x)...\n", faddr);
 
 	rval = QLA_SUCCESS;
 
 	segments = FA_RISC_CODE_SEGMENTS;
-	faddr = ha->flt_region_fw;
 	dcode = (uint32_t *)req->ring;
 	*srisc_addr = 0;
 
@@ -4104,6 +4172,9 @@
 {
 	int rval;
 
+	if (ql2xfwloadbin == 1)
+		return qla81xx_load_risc(vha, srisc_addr);
+
 	/*
 	 * FW Load priority:
 	 * 1) Firmware via request-firmware interface (.bin file).
@@ -4113,24 +4184,45 @@
 	if (rval == QLA_SUCCESS)
 		return rval;
 
-	return qla24xx_load_risc_flash(vha, srisc_addr);
+	return qla24xx_load_risc_flash(vha, srisc_addr,
+	    vha->hw->flt_region_fw);
 }
 
 int
 qla81xx_load_risc(scsi_qla_host_t *vha, uint32_t *srisc_addr)
 {
 	int rval;
+	struct qla_hw_data *ha = vha->hw;
+
+	if (ql2xfwloadbin == 2)
+		goto try_blob_fw;
 
 	/*
 	 * FW Load priority:
 	 * 1) Firmware residing in flash.
 	 * 2) Firmware via request-firmware interface (.bin file).
+	 * 3) Golden-Firmware residing in flash -- limited operation.
 	 */
-	rval = qla24xx_load_risc_flash(vha, srisc_addr);
+	rval = qla24xx_load_risc_flash(vha, srisc_addr, ha->flt_region_fw);
 	if (rval == QLA_SUCCESS)
 		return rval;
 
-	return qla24xx_load_risc_blob(vha, srisc_addr);
+try_blob_fw:
+	rval = qla24xx_load_risc_blob(vha, srisc_addr);
+	if (rval == QLA_SUCCESS || !ha->flt_region_gold_fw)
+		return rval;
+
+	qla_printk(KERN_ERR, ha,
+	    "FW: Attempting to fallback to golden firmware...\n");
+	rval = qla24xx_load_risc_flash(vha, srisc_addr, ha->flt_region_gold_fw);
+	if (rval != QLA_SUCCESS)
+		return rval;
+
+	qla_printk(KERN_ERR, ha,
+	    "FW: Please update operational firmware...\n");
+	ha->flags.running_gold_fw = 1;
+
+	return rval;
 }
 
 void
@@ -4146,7 +4238,7 @@
 
 	ret = qla2x00_stop_firmware(vha);
 	for (retries = 5; ret != QLA_SUCCESS && ret != QLA_FUNCTION_TIMEOUT &&
-	    retries ; retries--) {
+	    ret != QLA_INVALID_COMMAND && retries ; retries--) {
 		ha->isp_ops->reset_chip(vha);
 		if (ha->isp_ops->chip_diag(vha) != QLA_SUCCESS)
 			continue;
@@ -4165,13 +4257,19 @@
 	uint16_t mb[MAILBOX_REGISTER_COUNT];
 	struct qla_hw_data *ha = vha->hw;
 	struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
-	struct req_que *req = ha->req_q_map[vha->req_ques[0]];
-	struct rsp_que *rsp = req->rsp;
+	struct req_que *req;
+	struct rsp_que *rsp;
 
 	if (!vha->vp_idx)
 		return -EINVAL;
 
 	rval = qla2x00_fw_ready(base_vha);
+	if (ql2xmultique_tag)
+		req = ha->req_q_map[0];
+	else
+		req = vha->req;
+	rsp = req->rsp;
+
 	if (rval == QLA_SUCCESS) {
 		clear_bit(RESET_MARKER_NEEDED, &vha->dpc_flags);
 		qla2x00_marker(vha, req, rsp, 0, 0, MK_SYNC_ALL);
@@ -4305,7 +4403,7 @@
 	for (cnt = 0, chksum = 0; cnt < ha->nvram_size >> 2; cnt++)
 		chksum += le32_to_cpu(*dptr++);
 
-	DEBUG5(printk("scsi(%ld): Contents of NVRAM\n", ha->host_no));
+	DEBUG5(printk("scsi(%ld): Contents of NVRAM\n", vha->host_no));
 	DEBUG5(qla2x00_dump_buffer((uint8_t *)nv, ha->nvram_size));
 
 	/* Bad NVRAM data, set defaults parameters. */
@@ -4329,7 +4427,7 @@
 		nv->execution_throttle = __constant_cpu_to_le16(0xFFFF);
 		nv->exchange_count = __constant_cpu_to_le16(0);
 		nv->port_name[0] = 0x21;
-		nv->port_name[1] = 0x00 + PCI_FUNC(ha->pdev->devfn);
+		nv->port_name[1] = 0x00 + ha->port_no;
 		nv->port_name[2] = 0x00;
 		nv->port_name[3] = 0xe0;
 		nv->port_name[4] = 0x8b;
@@ -4358,12 +4456,12 @@
 		nv->max_luns_per_target = __constant_cpu_to_le16(128);
 		nv->port_down_retry_count = __constant_cpu_to_le16(30);
 		nv->link_down_timeout = __constant_cpu_to_le16(30);
-		nv->enode_mac[0] = 0x01;
+		nv->enode_mac[0] = 0x00;
 		nv->enode_mac[1] = 0x02;
 		nv->enode_mac[2] = 0x03;
 		nv->enode_mac[3] = 0x04;
 		nv->enode_mac[4] = 0x05;
-		nv->enode_mac[5] = 0x06 + PCI_FUNC(ha->pdev->devfn);
+		nv->enode_mac[5] = 0x06 + ha->port_no;
 
 		rval = 1;
 	}
@@ -4396,7 +4494,7 @@
 		icb->enode_mac[2] = 0x03;
 		icb->enode_mac[3] = 0x04;
 		icb->enode_mac[4] = 0x05;
-		icb->enode_mac[5] = 0x06 + PCI_FUNC(ha->pdev->devfn);
+		icb->enode_mac[5] = 0x06 + ha->port_no;
 	}
 
 	/* Use extended-initialization control block. */
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index a8abbb9..13396be 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -15,6 +15,7 @@
 							struct rsp_que *rsp);
 static void qla2x00_isp_cmd(struct scsi_qla_host *, struct req_que *);
 
+static void qla25xx_set_que(srb_t *, struct rsp_que **);
 /**
  * qla2x00_get_cmd_direction() - Determine control_flag data direction.
  * @cmd: SCSI command
@@ -92,9 +93,10 @@
  * Returns a pointer to the Continuation Type 0 IOCB packet.
  */
 static inline cont_entry_t *
-qla2x00_prep_cont_type0_iocb(struct req_que *req, struct scsi_qla_host *vha)
+qla2x00_prep_cont_type0_iocb(struct scsi_qla_host *vha)
 {
 	cont_entry_t *cont_pkt;
+	struct req_que *req = vha->req;
 	/* Adjust ring index. */
 	req->ring_index++;
 	if (req->ring_index == req->length) {
@@ -120,10 +122,11 @@
  * Returns a pointer to the continuation type 1 IOCB packet.
  */
 static inline cont_a64_entry_t *
-qla2x00_prep_cont_type1_iocb(struct req_que *req, scsi_qla_host_t *vha)
+qla2x00_prep_cont_type1_iocb(scsi_qla_host_t *vha)
 {
 	cont_a64_entry_t *cont_pkt;
 
+	struct req_que *req = vha->req;
 	/* Adjust ring index. */
 	req->ring_index++;
 	if (req->ring_index == req->length) {
@@ -159,7 +162,6 @@
 	struct scsi_cmnd *cmd;
 	struct scatterlist *sg;
 	int i;
-	struct req_que *req;
 
 	cmd = sp->cmd;
 
@@ -174,8 +176,6 @@
 	}
 
 	vha = sp->fcport->vha;
-	req = sp->que;
-
 	cmd_pkt->control_flags |= cpu_to_le16(qla2x00_get_cmd_direction(sp));
 
 	/* Three DSDs are available in the Command Type 2 IOCB */
@@ -192,7 +192,7 @@
 			 * Seven DSDs are available in the Continuation
 			 * Type 0 IOCB.
 			 */
-			cont_pkt = qla2x00_prep_cont_type0_iocb(req, vha);
+			cont_pkt = qla2x00_prep_cont_type0_iocb(vha);
 			cur_dsd = (uint32_t *)&cont_pkt->dseg_0_address;
 			avail_dsds = 7;
 		}
@@ -220,7 +220,6 @@
 	struct scsi_cmnd *cmd;
 	struct scatterlist *sg;
 	int i;
-	struct req_que *req;
 
 	cmd = sp->cmd;
 
@@ -235,8 +234,6 @@
 	}
 
 	vha = sp->fcport->vha;
-	req = sp->que;
-
 	cmd_pkt->control_flags |= cpu_to_le16(qla2x00_get_cmd_direction(sp));
 
 	/* Two DSDs are available in the Command Type 3 IOCB */
@@ -254,7 +251,7 @@
 			 * Five DSDs are available in the Continuation
 			 * Type 1 IOCB.
 			 */
-			cont_pkt = qla2x00_prep_cont_type1_iocb(req, vha);
+			cont_pkt = qla2x00_prep_cont_type1_iocb(vha);
 			cur_dsd = (uint32_t *)cont_pkt->dseg_0_address;
 			avail_dsds = 5;
 		}
@@ -353,7 +350,6 @@
 	/* Build command packet */
 	req->current_outstanding_cmd = handle;
 	req->outstanding_cmds[handle] = sp;
-	sp->que = req;
 	sp->cmd->host_scribble = (unsigned char *)(unsigned long)handle;
 	req->cnt -= req_cnt;
 
@@ -453,6 +449,7 @@
 			mrk24->lun[2] = MSB(lun);
 			host_to_fcp_swap(mrk24->lun, sizeof(mrk24->lun));
 			mrk24->vp_index = vha->vp_idx;
+			mrk24->handle = MAKE_HANDLE(req->id, mrk24->handle);
 		} else {
 			SET_TARGET_ID(ha, mrk->target, loop_id);
 			mrk->lun = cpu_to_le16(lun);
@@ -531,9 +528,6 @@
 			for (cnt = 0; cnt < REQUEST_ENTRY_SIZE / 4; cnt++)
 				*dword_ptr++ = 0;
 
-			/* Set system defined field. */
-			pkt->sys_define = (uint8_t)req->ring_index;
-
 			/* Set entry count. */
 			pkt->entry_count = 1;
 
@@ -656,7 +650,7 @@
 	}
 
 	vha = sp->fcport->vha;
-	req = sp->que;
+	req = vha->req;
 
 	/* Set transfer direction */
 	if (cmd->sc_data_direction == DMA_TO_DEVICE) {
@@ -687,7 +681,7 @@
 			 * Five DSDs are available in the Continuation
 			 * Type 1 IOCB.
 			 */
-			cont_pkt = qla2x00_prep_cont_type1_iocb(req, vha);
+			cont_pkt = qla2x00_prep_cont_type1_iocb(vha);
 			cur_dsd = (uint32_t *)cont_pkt->dseg_0_address;
 			avail_dsds = 5;
 		}
@@ -724,19 +718,13 @@
 	struct scsi_cmnd *cmd = sp->cmd;
 	struct scsi_qla_host *vha = sp->fcport->vha;
 	struct qla_hw_data *ha = vha->hw;
-	uint16_t que_id;
 
 	/* Setup device pointers. */
 	ret = 0;
-	que_id = vha->req_ques[0];
 
-	req = ha->req_q_map[que_id];
-	sp->que = req;
+	qla25xx_set_que(sp, &rsp);
+	req = vha->req;
 
-	if (req->rsp)
-		rsp = req->rsp;
-	else
-		rsp = ha->rsp_q_map[que_id];
 	/* So we know we haven't pci_map'ed anything yet */
 	tot_dsds = 0;
 
@@ -794,7 +782,7 @@
 	req->cnt -= req_cnt;
 
 	cmd_pkt = (struct cmd_type_7 *)req->ring_ptr;
-	cmd_pkt->handle = handle;
+	cmd_pkt->handle = MAKE_HANDLE(req->id, handle);
 
 	/* Zero out remaining portion of packet. */
 	/*    tagged queuing modifier -- default is TSK_SIMPLE (0). */
@@ -823,6 +811,8 @@
 
 	/* Set total data segment count. */
 	cmd_pkt->entry_count = (uint8_t)req_cnt;
+	/* Specify response queue number where completion should happen */
+	cmd_pkt->entry_status = (uint8_t) rsp->id;
 	wmb();
 
 	/* Adjust ring index. */
@@ -842,7 +832,7 @@
 	/* Manage unprocessed RIO/ZIO commands in response queue. */
 	if (vha->flags.process_response_queue &&
 		rsp->ring_ptr->signature != RESPONSE_PROCESSED)
-		qla24xx_process_response_queue(rsp);
+		qla24xx_process_response_queue(vha, rsp);
 
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 	return QLA_SUCCESS;
@@ -855,3 +845,16 @@
 
 	return QLA_FUNCTION_FAILED;
 }
+
+static void qla25xx_set_que(srb_t *sp, struct rsp_que **rsp)
+{
+	struct scsi_cmnd *cmd = sp->cmd;
+	struct qla_hw_data *ha = sp->fcport->vha->hw;
+	int affinity = cmd->request->cpu;
+
+	if (ql2xmultique_tag && affinity >= 0 &&
+		affinity < ha->max_rsp_queues - 1)
+		*rsp = ha->rsp_q_map[affinity + 1];
+	 else
+		*rsp = ha->rsp_q_map[0];
+}
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index d0498184..c8d0a17 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -13,10 +13,9 @@
 static void qla2x00_process_completed_request(struct scsi_qla_host *,
 	struct req_que *, uint32_t);
 static void qla2x00_status_entry(scsi_qla_host_t *, struct rsp_que *, void *);
-static void qla2x00_status_cont_entry(scsi_qla_host_t *, sts_cont_entry_t *);
+static void qla2x00_status_cont_entry(struct rsp_que *, sts_cont_entry_t *);
 static void qla2x00_error_entry(scsi_qla_host_t *, struct rsp_que *,
 	sts_entry_t *);
-static struct scsi_qla_host *qla2x00_get_rsp_host(struct rsp_que *);
 
 /**
  * qla2100_intr_handler() - Process interrupts for the ISP2100 and ISP2200.
@@ -51,7 +50,7 @@
 	status = 0;
 
 	spin_lock(&ha->hardware_lock);
-	vha = qla2x00_get_rsp_host(rsp);
+	vha = pci_get_drvdata(ha->pdev);
 	for (iter = 50; iter--; ) {
 		hccr = RD_REG_WORD(&reg->hccr);
 		if (hccr & HCCR_RISC_PAUSE) {
@@ -147,7 +146,7 @@
 	status = 0;
 
 	spin_lock(&ha->hardware_lock);
-	vha = qla2x00_get_rsp_host(rsp);
+	vha = pci_get_drvdata(ha->pdev);
 	for (iter = 50; iter--; ) {
 		stat = RD_REG_DWORD(&reg->u.isp2300.host_status);
 		if (stat & HSR_RISC_PAUSED) {
@@ -685,7 +684,7 @@
 		    vha->host_no));
 
 		if (IS_FWI2_CAPABLE(ha))
-			qla24xx_process_response_queue(rsp);
+			qla24xx_process_response_queue(vha, rsp);
 		else
 			qla2x00_process_response_queue(rsp);
 		break;
@@ -766,7 +765,10 @@
 	struct qla_hw_data *ha = vha->hw;
 	struct req_que *req = NULL;
 
-	req = ha->req_q_map[vha->req_ques[0]];
+	if (!ql2xqfulltracking)
+		return;
+
+	req = vha->req;
 	if (!req)
 		return;
 	if (req->max_q_depth <= sdev->queue_depth)
@@ -808,6 +810,9 @@
 	fc_port_t *fcport;
 	struct scsi_device *sdev;
 
+	if (!ql2xqfulltracking)
+		return;
+
 	sdev = sp->cmd->device;
 	if (sdev->queue_depth >= req->max_q_depth)
 		return;
@@ -858,8 +863,8 @@
 		qla2x00_ramp_up_queue_depth(vha, req, sp);
 		qla2x00_sp_compl(ha, sp);
 	} else {
-		DEBUG2(printk("scsi(%ld): Invalid ISP SCSI completion handle\n",
-		    vha->host_no));
+		DEBUG2(printk("scsi(%ld) Req:%d: Invalid ISP SCSI completion"
+			" handle(%d)\n", vha->host_no, req->id, index));
 		qla_printk(KERN_WARNING, ha,
 		    "Invalid ISP SCSI completion handle\n");
 
@@ -881,7 +886,7 @@
 	uint16_t        handle_cnt;
 	uint16_t        cnt;
 
-	vha = qla2x00_get_rsp_host(rsp);
+	vha = pci_get_drvdata(ha->pdev);
 
 	if (!vha->flags.online)
 		return;
@@ -926,7 +931,7 @@
 			}
 			break;
 		case STATUS_CONT_TYPE:
-			qla2x00_status_cont_entry(vha, (sts_cont_entry_t *)pkt);
+			qla2x00_status_cont_entry(rsp, (sts_cont_entry_t *)pkt);
 			break;
 		default:
 			/* Type Not Supported. */
@@ -945,7 +950,8 @@
 }
 
 static inline void
-qla2x00_handle_sense(srb_t *sp, uint8_t *sense_data, uint32_t sense_len)
+qla2x00_handle_sense(srb_t *sp, uint8_t *sense_data, uint32_t sense_len,
+	struct rsp_que *rsp)
 {
 	struct scsi_cmnd *cp = sp->cmd;
 
@@ -962,7 +968,7 @@
 	sp->request_sense_ptr += sense_len;
 	sp->request_sense_length -= sense_len;
 	if (sp->request_sense_length != 0)
-		sp->fcport->vha->status_srb = sp;
+		rsp->status_srb = sp;
 
 	DEBUG5(printk("%s(): Check condition Sense data, scsi(%ld:%d:%d:%d) "
 	    "cmd=%p pid=%ld\n", __func__, sp->fcport->vha->host_no,
@@ -992,7 +998,9 @@
 	uint32_t	sense_len, rsp_info_len, resid_len, fw_resid_len;
 	uint8_t		*rsp_info, *sense_data;
 	struct qla_hw_data *ha = vha->hw;
-	struct req_que *req = rsp->req;
+	uint32_t handle;
+	uint16_t que;
+	struct req_que *req;
 
 	sts = (sts_entry_t *) pkt;
 	sts24 = (struct sts_entry_24xx *) pkt;
@@ -1003,18 +1011,20 @@
 		comp_status = le16_to_cpu(sts->comp_status);
 		scsi_status = le16_to_cpu(sts->scsi_status) & SS_MASK;
 	}
-
+	handle = (uint32_t) LSW(sts->handle);
+	que = MSW(sts->handle);
+	req = ha->req_q_map[que];
 	/* Fast path completion. */
 	if (comp_status == CS_COMPLETE && scsi_status == 0) {
-		qla2x00_process_completed_request(vha, req, sts->handle);
+		qla2x00_process_completed_request(vha, req, handle);
 
 		return;
 	}
 
 	/* Validate handle. */
-	if (sts->handle < MAX_OUTSTANDING_COMMANDS) {
-		sp = req->outstanding_cmds[sts->handle];
-		req->outstanding_cmds[sts->handle] = NULL;
+	if (handle < MAX_OUTSTANDING_COMMANDS) {
+		sp = req->outstanding_cmds[handle];
+		req->outstanding_cmds[handle] = NULL;
 	} else
 		sp = NULL;
 
@@ -1030,7 +1040,7 @@
 	cp = sp->cmd;
 	if (cp == NULL) {
 		DEBUG2(printk("scsi(%ld): Command already returned back to OS "
-		    "pkt->handle=%d sp=%p.\n", vha->host_no, sts->handle, sp));
+		    "pkt->handle=%d sp=%p.\n", vha->host_no, handle, sp));
 		qla_printk(KERN_WARNING, ha,
 		    "Command is NULL: already returned to OS (sp=%p)\n", sp);
 
@@ -1121,6 +1131,8 @@
 			    scsi_status));
 
 			/* Adjust queue depth for all luns on the port. */
+			if (!ql2xqfulltracking)
+				break;
 			fcport->last_queue_full = jiffies;
 			starget_for_each_device(cp->device->sdev_target,
 			    fcport, qla2x00_adjust_sdev_qdepth_down);
@@ -1133,7 +1145,7 @@
 		if (!(scsi_status & SS_SENSE_LEN_VALID))
 			break;
 
-		qla2x00_handle_sense(sp, sense_data, sense_len);
+		qla2x00_handle_sense(sp, sense_data, sense_len, rsp);
 		break;
 
 	case CS_DATA_UNDERRUN:
@@ -1179,6 +1191,8 @@
 				 * Adjust queue depth for all luns on the
 				 * port.
 				 */
+				if (!ql2xqfulltracking)
+					break;
 				fcport->last_queue_full = jiffies;
 				starget_for_each_device(
 				    cp->device->sdev_target, fcport,
@@ -1192,12 +1206,12 @@
 			if (!(scsi_status & SS_SENSE_LEN_VALID))
 				break;
 
-			qla2x00_handle_sense(sp, sense_data, sense_len);
+			qla2x00_handle_sense(sp, sense_data, sense_len, rsp);
 		} else {
 			/*
 			 * If RISC reports underrun and target does not report
 			 * it then we must have a lost frame, so tell upper
-			 * layer to retry it by reporting a bus busy.
+			 * layer to retry it by reporting an error.
 			 */
 			if (!(scsi_status & SS_RESIDUAL_UNDER)) {
 				DEBUG2(printk("scsi(%ld:%d:%d:%d) Dropped "
@@ -1207,7 +1221,7 @@
 					cp->device->id, cp->device->lun, resid,
 					scsi_bufflen(cp)));
 
-				cp->result = DID_BUS_BUSY << 16;
+				cp->result = DID_ERROR << 16;
 				break;
 			}
 
@@ -1334,7 +1348,7 @@
 	}
 
 	/* Place command on done queue. */
-	if (vha->status_srb == NULL)
+	if (rsp->status_srb == NULL)
 		qla2x00_sp_compl(ha, sp);
 }
 
@@ -1346,11 +1360,11 @@
  * Extended sense data.
  */
 static void
-qla2x00_status_cont_entry(scsi_qla_host_t *vha, sts_cont_entry_t *pkt)
+qla2x00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
 {
 	uint8_t		sense_sz = 0;
-	struct qla_hw_data *ha = vha->hw;
-	srb_t		*sp = vha->status_srb;
+	struct qla_hw_data *ha = rsp->hw;
+	srb_t		*sp = rsp->status_srb;
 	struct scsi_cmnd *cp;
 
 	if (sp != NULL && sp->request_sense_length != 0) {
@@ -1362,7 +1376,7 @@
 			    "cmd is NULL: already returned to OS (sp=%p)\n",
 			    sp);
 
-			vha->status_srb = NULL;
+			rsp->status_srb = NULL;
 			return;
 		}
 
@@ -1383,7 +1397,7 @@
 
 		/* Place command on done queue. */
 		if (sp->request_sense_length == 0) {
-			vha->status_srb = NULL;
+			rsp->status_srb = NULL;
 			qla2x00_sp_compl(ha, sp);
 		}
 	}
@@ -1399,7 +1413,9 @@
 {
 	srb_t *sp;
 	struct qla_hw_data *ha = vha->hw;
-	struct req_que *req = rsp->req;
+	uint32_t handle = LSW(pkt->handle);
+	uint16_t que = MSW(pkt->handle);
+	struct req_que *req = ha->req_q_map[que];
 #if defined(QL_DEBUG_LEVEL_2)
 	if (pkt->entry_status & RF_INV_E_ORDER)
 		qla_printk(KERN_ERR, ha, "%s: Invalid Entry Order\n", __func__);
@@ -1417,14 +1433,14 @@
 #endif
 
 	/* Validate handle. */
-	if (pkt->handle < MAX_OUTSTANDING_COMMANDS)
-		sp = req->outstanding_cmds[pkt->handle];
+	if (handle < MAX_OUTSTANDING_COMMANDS)
+		sp = req->outstanding_cmds[handle];
 	else
 		sp = NULL;
 
 	if (sp) {
 		/* Free outstanding command slot. */
-		req->outstanding_cmds[pkt->handle] = NULL;
+		req->outstanding_cmds[handle] = NULL;
 
 		/* Bad payload or header */
 		if (pkt->entry_status &
@@ -1486,13 +1502,10 @@
  * qla24xx_process_response_queue() - Process response queue entries.
  * @ha: SCSI driver HA context
  */
-void
-qla24xx_process_response_queue(struct rsp_que *rsp)
+void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+	struct rsp_que *rsp)
 {
 	struct sts_entry_24xx *pkt;
-	struct scsi_qla_host *vha;
-
-	vha = qla2x00_get_rsp_host(rsp);
 
 	if (!vha->flags.online)
 		return;
@@ -1523,7 +1536,7 @@
 			qla2x00_status_entry(vha, rsp, pkt);
 			break;
 		case STATUS_CONT_TYPE:
-			qla2x00_status_cont_entry(vha, (sts_cont_entry_t *)pkt);
+			qla2x00_status_cont_entry(rsp, (sts_cont_entry_t *)pkt);
 			break;
 		case VP_RPT_ID_IOCB_TYPE:
 			qla24xx_report_id_acquisition(vha,
@@ -1626,7 +1639,7 @@
 	status = 0;
 
 	spin_lock(&ha->hardware_lock);
-	vha = qla2x00_get_rsp_host(rsp);
+	vha = pci_get_drvdata(ha->pdev);
 	for (iter = 50; iter--; ) {
 		stat = RD_REG_DWORD(&reg->host_status);
 		if (stat & HSRX_RISC_PAUSED) {
@@ -1664,7 +1677,7 @@
 			break;
 		case 0x13:
 		case 0x14:
-			qla24xx_process_response_queue(rsp);
+			qla24xx_process_response_queue(vha, rsp);
 			break;
 		default:
 			DEBUG2(printk("scsi(%ld): Unrecognized interrupt type "
@@ -1692,6 +1705,7 @@
 	struct qla_hw_data *ha;
 	struct rsp_que *rsp;
 	struct device_reg_24xx __iomem *reg;
+	struct scsi_qla_host *vha;
 
 	rsp = (struct rsp_que *) dev_id;
 	if (!rsp) {
@@ -1704,7 +1718,8 @@
 
 	spin_lock_irq(&ha->hardware_lock);
 
-	qla24xx_process_response_queue(rsp);
+	vha = qla25xx_get_host(rsp);
+	qla24xx_process_response_queue(vha, rsp);
 	WRT_REG_DWORD(&reg->hccr, HCCRX_CLR_RISC_INT);
 
 	spin_unlock_irq(&ha->hardware_lock);
@@ -1717,7 +1732,6 @@
 {
 	struct qla_hw_data *ha;
 	struct rsp_que *rsp;
-	struct device_reg_24xx __iomem *reg;
 
 	rsp = (struct rsp_que *) dev_id;
 	if (!rsp) {
@@ -1726,13 +1740,8 @@
 		return IRQ_NONE;
 	}
 	ha = rsp->hw;
-	reg = &ha->iobase->isp24;
 
-	spin_lock_irq(&ha->hardware_lock);
-
-	qla24xx_process_response_queue(rsp);
-
-	spin_unlock_irq(&ha->hardware_lock);
+	queue_work_on((int) (rsp->id - 1), ha->wq, &rsp->q_work);
 
 	return IRQ_HANDLED;
 }
@@ -1760,7 +1769,7 @@
 	status = 0;
 
 	spin_lock_irq(&ha->hardware_lock);
-	vha = qla2x00_get_rsp_host(rsp);
+	vha = pci_get_drvdata(ha->pdev);
 	do {
 		stat = RD_REG_DWORD(&reg->host_status);
 		if (stat & HSRX_RISC_PAUSED) {
@@ -1798,7 +1807,7 @@
 			break;
 		case 0x13:
 		case 0x14:
-			qla24xx_process_response_queue(rsp);
+			qla24xx_process_response_queue(vha, rsp);
 			break;
 		default:
 			DEBUG2(printk("scsi(%ld): Unrecognized interrupt type "
@@ -1822,31 +1831,14 @@
 /* Interrupt handling helpers. */
 
 struct qla_init_msix_entry {
-	uint16_t entry;
-	uint16_t index;
 	const char *name;
 	irq_handler_t handler;
 };
 
-static struct qla_init_msix_entry base_queue = {
-	.entry = 0,
-	.index = 0,
-	.name = "qla2xxx (default)",
-	.handler = qla24xx_msix_default,
-};
-
-static struct qla_init_msix_entry base_rsp_queue = {
-	.entry = 1,
-	.index = 1,
-	.name = "qla2xxx (rsp_q)",
-	.handler = qla24xx_msix_rsp_q,
-};
-
-static struct qla_init_msix_entry multi_rsp_queue = {
-	.entry = 1,
-	.index = 1,
-	.name = "qla2xxx (multi_q)",
-	.handler = qla25xx_msix_rsp_q,
+static struct qla_init_msix_entry msix_entries[3] = {
+	{ "qla2xxx (default)", qla24xx_msix_default },
+	{ "qla2xxx (rsp_q)", qla24xx_msix_rsp_q },
+	{ "qla2xxx (multiq)", qla25xx_msix_rsp_q },
 };
 
 static void
@@ -1873,7 +1865,6 @@
 	int i, ret;
 	struct msix_entry *entries;
 	struct qla_msix_entry *qentry;
-	struct qla_init_msix_entry *msix_queue;
 
 	entries = kzalloc(sizeof(struct msix_entry) * ha->msix_count,
 					GFP_KERNEL);
@@ -1900,7 +1891,7 @@
 				ha->msix_count, ret);
 			goto msix_out;
 		}
-		ha->max_queues = ha->msix_count - 1;
+		ha->max_rsp_queues = ha->msix_count - 1;
 	}
 	ha->msix_entries = kzalloc(sizeof(struct qla_msix_entry) *
 				ha->msix_count, GFP_KERNEL);
@@ -1918,45 +1909,27 @@
 		qentry->rsp = NULL;
 	}
 
-	/* Enable MSI-X for AENs for queue 0 */
-	qentry = &ha->msix_entries[0];
-	ret = request_irq(qentry->vector, base_queue.handler, 0,
-					base_queue.name, rsp);
-	if (ret) {
-		qla_printk(KERN_WARNING, ha,
+	/* Enable MSI-X vectors for the base queue */
+	for (i = 0; i < 2; i++) {
+		qentry = &ha->msix_entries[i];
+		ret = request_irq(qentry->vector, msix_entries[i].handler,
+					0, msix_entries[i].name, rsp);
+		if (ret) {
+			qla_printk(KERN_WARNING, ha,
 			"MSI-X: Unable to register handler -- %x/%d.\n",
 			qentry->vector, ret);
-		qla24xx_disable_msix(ha);
-		goto msix_out;
+			qla24xx_disable_msix(ha);
+			ha->mqenable = 0;
+			goto msix_out;
+		}
+		qentry->have_irq = 1;
+		qentry->rsp = rsp;
+		rsp->msix = qentry;
 	}
-	qentry->have_irq = 1;
-	qentry->rsp = rsp;
 
 	/* Enable MSI-X vector for response queue update for queue 0 */
-	if (ha->max_queues > 1 && ha->mqiobase) {
+	if (ha->mqiobase &&  (ha->max_rsp_queues > 1 || ha->max_req_queues > 1))
 		ha->mqenable = 1;
-		msix_queue = &multi_rsp_queue;
-		qla_printk(KERN_INFO, ha,
-				"MQ enabled, Number of Queue Resources: %d \n",
-				ha->max_queues);
-	} else {
-		ha->mqenable = 0;
-		msix_queue = &base_rsp_queue;
-	}
-
-	qentry = &ha->msix_entries[1];
-	ret = request_irq(qentry->vector, msix_queue->handler, 0,
-						msix_queue->name, rsp);
-	if (ret) {
-		qla_printk(KERN_WARNING, ha,
-			"MSI-X: Unable to register handler -- %x/%d.\n",
-			qentry->vector, ret);
-		qla24xx_disable_msix(ha);
-		ha->mqenable = 0;
-		goto msix_out;
-	}
-	qentry->have_irq = 1;
-	qentry->rsp = rsp;
 
 msix_out:
 	kfree(entries);
@@ -2063,35 +2036,11 @@
 	}
 }
 
-static struct scsi_qla_host *
-qla2x00_get_rsp_host(struct rsp_que *rsp)
-{
-	srb_t *sp;
-	struct qla_hw_data *ha = rsp->hw;
-	struct scsi_qla_host *vha = NULL;
-	struct sts_entry_24xx *pkt;
-	struct req_que *req;
-
-	if (rsp->id) {
-		pkt = (struct sts_entry_24xx *) rsp->ring_ptr;
-		req = rsp->req;
-		if (pkt && pkt->handle < MAX_OUTSTANDING_COMMANDS) {
-			sp = req->outstanding_cmds[pkt->handle];
-			if (sp)
-				vha = sp->fcport->vha;
-		}
-	}
-	if (!vha)
-	/* handle it in base queue */
-		vha = pci_get_drvdata(ha->pdev);
-
-	return vha;
-}
 
 int qla25xx_request_irq(struct rsp_que *rsp)
 {
 	struct qla_hw_data *ha = rsp->hw;
-	struct qla_init_msix_entry *intr = &multi_rsp_queue;
+	struct qla_init_msix_entry *intr = &msix_entries[2];
 	struct qla_msix_entry *msix = rsp->msix;
 	int ret;
 
@@ -2106,3 +2055,30 @@
 	msix->rsp = rsp;
 	return ret;
 }
+
+struct scsi_qla_host *
+qla25xx_get_host(struct rsp_que *rsp)
+{
+	srb_t *sp;
+	struct qla_hw_data *ha = rsp->hw;
+	struct scsi_qla_host *vha = NULL;
+	struct sts_entry_24xx *pkt;
+	struct req_que *req;
+	uint16_t que;
+	uint32_t handle;
+
+	pkt = (struct sts_entry_24xx *) rsp->ring_ptr;
+	que = MSW(pkt->handle);
+	handle = (uint32_t) LSW(pkt->handle);
+	req = ha->req_q_map[que];
+	if (handle < MAX_OUTSTANDING_COMMANDS) {
+		sp = req->outstanding_cmds[handle];
+		if (sp)
+			return  sp->fcport->vha;
+		else
+			goto base_que;
+	}
+base_que:
+	vha = pci_get_drvdata(ha->pdev);
+	return vha;
+}
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index e67c166..451ece0 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -408,7 +408,7 @@
  * Context:
  *	Kernel context.
  */
-void
+int
 qla2x00_get_fw_version(scsi_qla_host_t *vha, uint16_t *major, uint16_t *minor,
     uint16_t *subminor, uint16_t *attributes, uint32_t *memory, uint8_t *mpi,
     uint32_t *mpi_caps, uint8_t *phy)
@@ -427,6 +427,8 @@
 	mcp->flags = 0;
 	mcp->tov = MBX_TOV_SECONDS;
 	rval = qla2x00_mailbox_command(vha, mcp);
+	if (rval != QLA_SUCCESS)
+		goto failed;
 
 	/* Return mailbox data. */
 	*major = mcp->mb[1];
@@ -446,7 +448,7 @@
 		phy[1] = mcp->mb[9] >> 8;
 		phy[2] = mcp->mb[9] & 0xff;
 	}
-
+failed:
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
 		DEBUG2_3_11(printk("%s(%ld): failed=%x.\n", __func__,
@@ -455,6 +457,7 @@
 		/*EMPTY*/
 		DEBUG11(printk("%s(%ld): done.\n", __func__, vha->host_no));
 	}
+	return rval;
 }
 
 /*
@@ -748,20 +751,20 @@
  *	Kernel context.
  */
 int
-qla2x00_abort_command(scsi_qla_host_t *vha, srb_t *sp, struct req_que *req)
+qla2x00_abort_command(srb_t *sp)
 {
 	unsigned long   flags = 0;
-	fc_port_t	*fcport;
 	int		rval;
 	uint32_t	handle = 0;
 	mbx_cmd_t	mc;
 	mbx_cmd_t	*mcp = &mc;
+	fc_port_t	*fcport = sp->fcport;
+	scsi_qla_host_t *vha = fcport->vha;
 	struct qla_hw_data *ha = vha->hw;
+	struct req_que *req = vha->req;
 
 	DEBUG11(printk("qla2x00_abort_command(%ld): entered.\n", vha->host_no));
 
-	fcport = sp->fcport;
-
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	for (handle = 1; handle < MAX_OUTSTANDING_COMMANDS; handle++) {
 		if (req->outstanding_cmds[handle] == sp)
@@ -800,7 +803,7 @@
 }
 
 int
-qla2x00_abort_target(struct fc_port *fcport, unsigned int l)
+qla2x00_abort_target(struct fc_port *fcport, unsigned int l, int tag)
 {
 	int rval, rval2;
 	mbx_cmd_t  mc;
@@ -813,8 +816,8 @@
 
 	l = l;
 	vha = fcport->vha;
-	req = vha->hw->req_q_map[0];
-	rsp = vha->hw->rsp_q_map[0];
+	req = vha->hw->req_q_map[tag];
+	rsp = vha->hw->rsp_q_map[tag];
 	mcp->mb[0] = MBC_ABORT_TARGET;
 	mcp->out_mb = MBX_9|MBX_2|MBX_1|MBX_0;
 	if (HAS_EXTENDED_IDS(vha->hw)) {
@@ -850,7 +853,7 @@
 }
 
 int
-qla2x00_lun_reset(struct fc_port *fcport, unsigned int l)
+qla2x00_lun_reset(struct fc_port *fcport, unsigned int l, int tag)
 {
 	int rval, rval2;
 	mbx_cmd_t  mc;
@@ -862,8 +865,8 @@
 	DEBUG11(printk("%s(%ld): entered.\n", __func__, fcport->vha->host_no));
 
 	vha = fcport->vha;
-	req = vha->hw->req_q_map[0];
-	rsp = vha->hw->rsp_q_map[0];
+	req = vha->hw->req_q_map[tag];
+	rsp = vha->hw->rsp_q_map[tag];
 	mcp->mb[0] = MBC_LUN_RESET;
 	mcp->out_mb = MBX_9|MBX_3|MBX_2|MBX_1|MBX_0;
 	if (HAS_EXTENDED_IDS(vha->hw))
@@ -931,6 +934,8 @@
 	mcp->mb[9] = vha->vp_idx;
 	mcp->out_mb = MBX_9|MBX_0;
 	mcp->in_mb = MBX_9|MBX_7|MBX_6|MBX_3|MBX_2|MBX_1|MBX_0;
+	if (IS_QLA81XX(vha->hw))
+		mcp->in_mb |= MBX_13|MBX_12|MBX_11|MBX_10;
 	mcp->tov = MBX_TOV_SECONDS;
 	mcp->flags = 0;
 	rval = qla2x00_mailbox_command(vha, mcp);
@@ -952,9 +957,19 @@
 		DEBUG2_3_11(printk("qla2x00_get_adapter_id(%ld): failed=%x.\n",
 		    vha->host_no, rval));
 	} else {
-		/*EMPTY*/
 		DEBUG11(printk("qla2x00_get_adapter_id(%ld): done.\n",
 		    vha->host_no));
+
+		if (IS_QLA81XX(vha->hw)) {
+			vha->fcoe_vlan_id = mcp->mb[9] & 0xfff;
+			vha->fcoe_fcf_idx = mcp->mb[10];
+			vha->fcoe_vn_port_mac[5] = mcp->mb[11] >> 8;
+			vha->fcoe_vn_port_mac[4] = mcp->mb[11] & 0xff;
+			vha->fcoe_vn_port_mac[3] = mcp->mb[12] >> 8;
+			vha->fcoe_vn_port_mac[2] = mcp->mb[12] & 0xff;
+			vha->fcoe_vn_port_mac[1] = mcp->mb[13] >> 8;
+			vha->fcoe_vn_port_mac[0] = mcp->mb[13] & 0xff;
+		}
 	}
 
 	return rval;
@@ -1252,7 +1267,7 @@
 
 	mcp->mb[0] = MBC_GET_FIRMWARE_STATE;
 	mcp->out_mb = MBX_0;
-	mcp->in_mb = MBX_3|MBX_2|MBX_1|MBX_0;
+	mcp->in_mb = MBX_5|MBX_4|MBX_3|MBX_2|MBX_1|MBX_0;
 	mcp->tov = MBX_TOV_SECONDS;
 	mcp->flags = 0;
 	rval = qla2x00_mailbox_command(vha, mcp);
@@ -1261,6 +1276,8 @@
 	states[0] = mcp->mb[1];
 	states[1] = mcp->mb[2];
 	states[2] = mcp->mb[3];
+	states[3] = mcp->mb[4];
+	states[4] = mcp->mb[5];
 
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
@@ -1480,9 +1497,17 @@
 	dma_addr_t	lg_dma;
 	uint32_t	iop[2];
 	struct qla_hw_data *ha = vha->hw;
+	struct req_que *req;
+	struct rsp_que *rsp;
 
 	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
 
+	if (ql2xmultique_tag)
+		req = ha->req_q_map[0];
+	else
+		req = vha->req;
+	rsp = req->rsp;
+
 	lg = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &lg_dma);
 	if (lg == NULL) {
 		DEBUG2_3(printk("%s(%ld): failed to allocate Login IOCB.\n",
@@ -1493,6 +1518,7 @@
 
 	lg->entry_type = LOGINOUT_PORT_IOCB_TYPE;
 	lg->entry_count = 1;
+	lg->handle = MAKE_HANDLE(req->id, lg->handle);
 	lg->nport_handle = cpu_to_le16(loop_id);
 	lg->control_flags = __constant_cpu_to_le16(LCF_COMMAND_PLOGI);
 	if (opt & BIT_0)
@@ -1741,6 +1767,8 @@
 	struct logio_entry_24xx *lg;
 	dma_addr_t	lg_dma;
 	struct qla_hw_data *ha = vha->hw;
+	struct req_que *req;
+	struct rsp_que *rsp;
 
 	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
 
@@ -1752,8 +1780,14 @@
 	}
 	memset(lg, 0, sizeof(struct logio_entry_24xx));
 
+	if (ql2xmaxqueues > 1)
+		req = ha->req_q_map[0];
+	else
+		req = vha->req;
+	rsp = req->rsp;
 	lg->entry_type = LOGINOUT_PORT_IOCB_TYPE;
 	lg->entry_count = 1;
+	lg->handle = MAKE_HANDLE(req->id, lg->handle);
 	lg->nport_handle = cpu_to_le16(loop_id);
 	lg->control_flags =
 	    __constant_cpu_to_le16(LCF_COMMAND_LOGO|LCF_IMPL_LOGO);
@@ -1864,9 +1898,6 @@
 	mbx_cmd_t mc;
 	mbx_cmd_t *mcp = &mc;
 
-	if (IS_QLA81XX(vha->hw))
-	    return QLA_SUCCESS;
-
 	DEBUG11(printk("qla2x00_full_login_lip(%ld): entered.\n",
 	    vha->host_no));
 
@@ -2195,21 +2226,21 @@
 }
 
 int
-qla24xx_abort_command(scsi_qla_host_t *vha, srb_t *sp, struct req_que *req)
+qla24xx_abort_command(srb_t *sp)
 {
 	int		rval;
-	fc_port_t	*fcport;
 	unsigned long   flags = 0;
 
 	struct abort_entry_24xx *abt;
 	dma_addr_t	abt_dma;
 	uint32_t	handle;
+	fc_port_t	*fcport = sp->fcport;
+	struct scsi_qla_host *vha = fcport->vha;
 	struct qla_hw_data *ha = vha->hw;
+	struct req_que *req = vha->req;
 
 	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
 
-	fcport = sp->fcport;
-
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	for (handle = 1; handle < MAX_OUTSTANDING_COMMANDS; handle++) {
 		if (req->outstanding_cmds[handle] == sp)
@@ -2231,6 +2262,7 @@
 
 	abt->entry_type = ABORT_IOCB_TYPE;
 	abt->entry_count = 1;
+	abt->handle = MAKE_HANDLE(req->id, abt->handle);
 	abt->nport_handle = cpu_to_le16(fcport->loop_id);
 	abt->handle_to_abort = handle;
 	abt->port_id[0] = fcport->d_id.b.al_pa;
@@ -2272,7 +2304,7 @@
 
 static int
 __qla24xx_issue_tmf(char *name, uint32_t type, struct fc_port *fcport,
-    unsigned int l)
+    unsigned int l, int tag)
 {
 	int		rval, rval2;
 	struct tsk_mgmt_cmd *tsk;
@@ -2286,8 +2318,11 @@
 
 	vha = fcport->vha;
 	ha = vha->hw;
-	req = ha->req_q_map[0];
-	rsp = ha->rsp_q_map[0];
+	req = vha->req;
+	if (ql2xmultique_tag)
+		rsp = ha->rsp_q_map[tag + 1];
+	else
+		rsp = req->rsp;
 	tsk = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &tsk_dma);
 	if (tsk == NULL) {
 		DEBUG2_3(printk("%s(%ld): failed to allocate Task Management "
@@ -2298,6 +2333,7 @@
 
 	tsk->p.tsk.entry_type = TSK_MGMT_IOCB_TYPE;
 	tsk->p.tsk.entry_count = 1;
+	tsk->p.tsk.handle = MAKE_HANDLE(req->id, tsk->p.tsk.handle);
 	tsk->p.tsk.nport_handle = cpu_to_le16(fcport->loop_id);
 	tsk->p.tsk.timeout = cpu_to_le16(ha->r_a_tov / 10 * 2);
 	tsk->p.tsk.control_flags = cpu_to_le32(type);
@@ -2344,15 +2380,15 @@
 }
 
 int
-qla24xx_abort_target(struct fc_port *fcport, unsigned int l)
+qla24xx_abort_target(struct fc_port *fcport, unsigned int l, int tag)
 {
-	return __qla24xx_issue_tmf("Target", TCF_TARGET_RESET, fcport, l);
+	return __qla24xx_issue_tmf("Target", TCF_TARGET_RESET, fcport, l, tag);
 }
 
 int
-qla24xx_lun_reset(struct fc_port *fcport, unsigned int l)
+qla24xx_lun_reset(struct fc_port *fcport, unsigned int l, int tag)
 {
-	return __qla24xx_issue_tmf("Lun", TCF_LUN_RESET, fcport, l);
+	return __qla24xx_issue_tmf("Lun", TCF_LUN_RESET, fcport, l, tag);
 }
 
 int
@@ -2446,6 +2482,8 @@
 	if (rval != QLA_SUCCESS) {
 		DEBUG2_3_11(printk("%s(%ld): failed=%x.\n", __func__,
 		    vha->host_no, rval));
+		if (mcp->mb[0] == MBS_INVALID_COMMAND)
+			rval = QLA_INVALID_COMMAND;
 	} else {
 		DEBUG11(printk("%s(%ld): done.\n", __func__, vha->host_no));
 	}
@@ -2717,8 +2755,11 @@
 		if (vp_idx == 0)
 			return;
 
-		if (MSB(stat) == 1)
+		if (MSB(stat) == 1) {
+			DEBUG2(printk("scsi(%ld): Could not acquire ID for "
+			    "VP[%d].\n", vha->host_no, vp_idx));
 			return;
+		}
 
 		list_for_each_entry_safe(vp, tvp, &ha->vp_list, list)
 			if (vp_idx == vp->vp_idx)
@@ -3141,6 +3182,8 @@
 		WRT_REG_DWORD(&reg->req_q_in, 0);
 		WRT_REG_DWORD(&reg->req_q_out, 0);
 	}
+	req->req_q_in = &reg->req_q_in;
+	req->req_q_out = &reg->req_q_out;
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	rval = qla2x00_mailbox_command(vha, mcp);
@@ -3167,7 +3210,6 @@
 	mcp->mb[6] = MSW(MSD(rsp->dma));
 	mcp->mb[7] = LSW(MSD(rsp->dma));
 	mcp->mb[5] = rsp->length;
-	mcp->mb[11] = rsp->vp_idx;
 	mcp->mb[14] = rsp->msix->entry;
 	mcp->mb[13] = rsp->rid;
 
@@ -3179,7 +3221,7 @@
 	mcp->mb[8] = 0;
 	/* que out ptr index */
 	mcp->mb[9] = 0;
-	mcp->out_mb = MBX_14|MBX_13|MBX_12|MBX_11|MBX_10|MBX_9|MBX_8|MBX_7
+	mcp->out_mb = MBX_14|MBX_13|MBX_9|MBX_8|MBX_7
 			|MBX_6|MBX_5|MBX_4|MBX_3|MBX_2|MBX_1|MBX_0;
 	mcp->in_mb = MBX_0;
 	mcp->flags = MBX_DMA_OUT;
@@ -3384,7 +3426,7 @@
 		DEBUG2_3_11(printk("%s(%ld): failed=%x (%x).\n", __func__,
 		    vha->host_no, rval, mcp->mb[0]));
 	} else {
-		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no));
+		DEBUG11(printk("%s(%ld): done.\n", __func__, vha->host_no));
 	}
 
 	return rval;
@@ -3428,3 +3470,141 @@
 
 	return rval;
 }
+
+int
+qla2x00_get_xgmac_stats(scsi_qla_host_t *vha, dma_addr_t stats_dma,
+    uint16_t size_in_bytes, uint16_t *actual_size)
+{
+	int rval;
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+
+	if (!IS_QLA81XX(vha->hw))
+		return QLA_FUNCTION_FAILED;
+
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
+
+	mcp->mb[0] = MBC_GET_XGMAC_STATS;
+	mcp->mb[2] = MSW(stats_dma);
+	mcp->mb[3] = LSW(stats_dma);
+	mcp->mb[6] = MSW(MSD(stats_dma));
+	mcp->mb[7] = LSW(MSD(stats_dma));
+	mcp->mb[8] = size_in_bytes >> 2;
+	mcp->out_mb = MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_0;
+	mcp->in_mb = MBX_2|MBX_1|MBX_0;
+	mcp->tov = MBX_TOV_SECONDS;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(vha, mcp);
+
+	if (rval != QLA_SUCCESS) {
+		DEBUG2_3_11(printk("%s(%ld): failed=%x mb[0]=0x%x "
+		    "mb[1]=0x%x mb[2]=0x%x.\n", __func__, vha->host_no, rval,
+		    mcp->mb[0], mcp->mb[1], mcp->mb[2]));
+	} else {
+		DEBUG11(printk("%s(%ld): done.\n", __func__, vha->host_no));
+
+		*actual_size = mcp->mb[2] << 2;
+	}
+
+	return rval;
+}
+
+int
+qla2x00_get_dcbx_params(scsi_qla_host_t *vha, dma_addr_t tlv_dma,
+    uint16_t size)
+{
+	int rval;
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+
+	if (!IS_QLA81XX(vha->hw))
+		return QLA_FUNCTION_FAILED;
+
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
+
+	mcp->mb[0] = MBC_GET_DCBX_PARAMS;
+	mcp->mb[1] = 0;
+	mcp->mb[2] = MSW(tlv_dma);
+	mcp->mb[3] = LSW(tlv_dma);
+	mcp->mb[6] = MSW(MSD(tlv_dma));
+	mcp->mb[7] = LSW(MSD(tlv_dma));
+	mcp->mb[8] = size;
+	mcp->out_mb = MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_1|MBX_0;
+	mcp->in_mb = MBX_2|MBX_1|MBX_0;
+	mcp->tov = MBX_TOV_SECONDS;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(vha, mcp);
+
+	if (rval != QLA_SUCCESS) {
+		DEBUG2_3_11(printk("%s(%ld): failed=%x mb[0]=0x%x "
+		    "mb[1]=0x%x mb[2]=0x%x.\n", __func__, vha->host_no, rval,
+		    mcp->mb[0], mcp->mb[1], mcp->mb[2]));
+	} else {
+		DEBUG11(printk("%s(%ld): done.\n", __func__, vha->host_no));
+	}
+
+	return rval;
+}
+
+int
+qla2x00_read_ram_word(scsi_qla_host_t *vha, uint32_t risc_addr, uint32_t *data)
+{
+	int rval;
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+
+	if (!IS_FWI2_CAPABLE(vha->hw))
+		return QLA_FUNCTION_FAILED;
+
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
+
+	mcp->mb[0] = MBC_READ_RAM_EXTENDED;
+	mcp->mb[1] = LSW(risc_addr);
+	mcp->mb[8] = MSW(risc_addr);
+	mcp->out_mb = MBX_8|MBX_1|MBX_0;
+	mcp->in_mb = MBX_3|MBX_2|MBX_0;
+	mcp->tov = 30;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(vha, mcp);
+	if (rval != QLA_SUCCESS) {
+		DEBUG2_3_11(printk("%s(%ld): failed=%x mb[0]=%x.\n", __func__,
+		    vha->host_no, rval, mcp->mb[0]));
+	} else {
+		DEBUG11(printk("%s(%ld): done.\n", __func__, vha->host_no));
+		*data = mcp->mb[3] << 16 | mcp->mb[2];
+	}
+
+	return rval;
+}
+
+int
+qla2x00_write_ram_word(scsi_qla_host_t *vha, uint32_t risc_addr, uint32_t data)
+{
+	int rval;
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+
+	if (!IS_FWI2_CAPABLE(vha->hw))
+                return QLA_FUNCTION_FAILED;
+
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
+
+	mcp->mb[0] = MBC_WRITE_RAM_WORD_EXTENDED;
+	mcp->mb[1] = LSW(risc_addr);
+	mcp->mb[2] = LSW(data);
+	mcp->mb[3] = MSW(data);
+	mcp->mb[8] = MSW(risc_addr);
+	mcp->out_mb = MBX_8|MBX_3|MBX_2|MBX_1|MBX_0;
+	mcp->in_mb = MBX_0;
+	mcp->tov = 30;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(vha, mcp);
+	if (rval != QLA_SUCCESS) {
+		DEBUG2_3_11(printk("%s(%ld): failed=%x mb[0]=%x.\n", __func__,
+		    vha->host_no, rval, mcp->mb[0]));
+	} else {
+		DEBUG11(printk("%s(%ld): done.\n", __func__, vha->host_no));
+	}
+
+	return rval;
+}
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index 51716c7..650bcef 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -398,9 +398,8 @@
 
 	qla2x00_start_timer(vha, qla2x00_timer, WATCH_INTERVAL);
 
-	memset(vha->req_ques, 0, sizeof(vha->req_ques));
-	vha->req_ques[0] = ha->req_q_map[0]->id;
-	host->can_queue = ha->req_q_map[0]->length + 128;
+	vha->req = base_vha->req;
+	host->can_queue = base_vha->req->length + 128;
 	host->this_id = 255;
 	host->cmd_per_lun = 3;
 	host->max_cmd_len = MAX_CMDSZ;
@@ -515,76 +514,53 @@
 
 /* Delete all queues for a given vhost */
 int
-qla25xx_delete_queues(struct scsi_qla_host *vha, uint8_t que_no)
+qla25xx_delete_queues(struct scsi_qla_host *vha)
 {
 	int cnt, ret = 0;
 	struct req_que *req = NULL;
 	struct rsp_que *rsp = NULL;
 	struct qla_hw_data *ha = vha->hw;
 
-	if (que_no) {
-	/* Delete request queue */
-		req = ha->req_q_map[que_no];
+	/* Delete request queues */
+	for (cnt = 1; cnt < ha->max_req_queues; cnt++) {
+		req = ha->req_q_map[cnt];
 		if (req) {
-			rsp = req->rsp;
 			ret = qla25xx_delete_req_que(vha, req);
 			if (ret != QLA_SUCCESS) {
 				qla_printk(KERN_WARNING, ha,
-				"Couldn't delete req que %d\n", req->id);
+				"Couldn't delete req que %d\n",
+				req->id);
 				return ret;
 			}
-			/* Delete associated response queue */
-			if (rsp) {
-				ret = qla25xx_delete_rsp_que(vha, rsp);
-				if (ret != QLA_SUCCESS) {
-					qla_printk(KERN_WARNING, ha,
-						"Couldn't delete rsp que %d\n",
-						rsp->id);
-					return ret;
-				}
-			}
-		}
-	} else {  /* delete all queues of this host */
-		for (cnt = 0; cnt < QLA_MAX_HOST_QUES; cnt++) {
-			/* Delete request queues */
-			req = ha->req_q_map[vha->req_ques[cnt]];
-			if (req && req->id) {
-				rsp = req->rsp;
-				ret = qla25xx_delete_req_que(vha, req);
-				if (ret != QLA_SUCCESS) {
-					qla_printk(KERN_WARNING, ha,
-						"Couldn't delete req que %d\n",
-						vha->req_ques[cnt]);
-					return ret;
-				}
-				vha->req_ques[cnt] = ha->req_q_map[0]->id;
-			/* Delete associated response queue */
-				if (rsp && rsp->id) {
-					ret = qla25xx_delete_rsp_que(vha, rsp);
-					if (ret != QLA_SUCCESS) {
-						qla_printk(KERN_WARNING, ha,
-						"Couldn't delete rsp que %d\n",
-						rsp->id);
-						return ret;
-					}
-				}
-			}
 		}
 	}
-	qla_printk(KERN_INFO, ha, "Queues deleted for vport:%d\n",
-		vha->vp_idx);
+
+	/* Delete response queues */
+	for (cnt = 1; cnt < ha->max_rsp_queues; cnt++) {
+		rsp = ha->rsp_q_map[cnt];
+		if (rsp) {
+			ret = qla25xx_delete_rsp_que(vha, rsp);
+			if (ret != QLA_SUCCESS) {
+				qla_printk(KERN_WARNING, ha,
+				"Couldn't delete rsp que %d\n",
+				rsp->id);
+				return ret;
+			}
+		}
+	}
 	return ret;
 }
 
 int
 qla25xx_create_req_que(struct qla_hw_data *ha, uint16_t options,
-	uint8_t vp_idx, uint16_t rid, uint8_t rsp_que, uint8_t qos)
+	uint8_t vp_idx, uint16_t rid, int rsp_que, uint8_t qos)
 {
 	int ret = 0;
 	struct req_que *req = NULL;
 	struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
 	uint16_t que_id = 0;
 	device_reg_t __iomem *reg;
+	uint32_t cnt;
 
 	req = kzalloc(sizeof(struct req_que), GFP_KERNEL);
 	if (req == NULL) {
@@ -604,8 +580,8 @@
 	}
 
 	mutex_lock(&ha->vport_lock);
-	que_id = find_first_zero_bit(ha->req_qid_map, ha->max_queues);
-	if (que_id >= ha->max_queues) {
+	que_id = find_first_zero_bit(ha->req_qid_map, ha->max_req_queues);
+	if (que_id >= ha->max_req_queues) {
 		mutex_unlock(&ha->vport_lock);
 		qla_printk(KERN_INFO, ha, "No resources to create "
 			 "additional request queue\n");
@@ -617,10 +593,10 @@
 	req->vp_idx = vp_idx;
 	req->qos = qos;
 
-	if (ha->rsp_q_map[rsp_que]) {
+	if (rsp_que < 0)
+		req->rsp = NULL;
+	else
 		req->rsp = ha->rsp_q_map[rsp_que];
-		req->rsp->req = req;
-	}
 	/* Use alternate PCI bus number */
 	if (MSB(req->rid))
 		options |= BIT_4;
@@ -628,13 +604,16 @@
 	if (LSB(req->rid))
 		options |= BIT_5;
 	req->options = options;
+
+	for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++)
+		req->outstanding_cmds[cnt] = NULL;
+	req->current_outstanding_cmd = 1;
+
 	req->ring_ptr = req->ring;
 	req->ring_index = 0;
 	req->cnt = req->length;
 	req->id = que_id;
 	reg = ISP_QUE_REG(ha, que_id);
-	req->req_q_in = &reg->isp25mq.req_q_in;
-	req->req_q_out = &reg->isp25mq.req_q_out;
 	req->max_q_depth = ha->req_q_map[0]->max_q_depth;
 	mutex_unlock(&ha->vport_lock);
 
@@ -654,10 +633,19 @@
 	return 0;
 }
 
+static void qla_do_work(struct work_struct *work)
+{
+	struct rsp_que *rsp = container_of(work, struct rsp_que, q_work);
+	struct scsi_qla_host *vha;
+
+	vha = qla25xx_get_host(rsp);
+	qla24xx_process_response_queue(vha, rsp);
+}
+
 /* create response queue */
 int
 qla25xx_create_rsp_que(struct qla_hw_data *ha, uint16_t options,
-	uint8_t vp_idx, uint16_t rid)
+	uint8_t vp_idx, uint16_t rid, int req)
 {
 	int ret = 0;
 	struct rsp_que *rsp = NULL;
@@ -672,7 +660,7 @@
 		goto que_failed;
 	}
 
-	rsp->length = RESPONSE_ENTRY_CNT_2300;
+	rsp->length = RESPONSE_ENTRY_CNT_MQ;
 	rsp->ring = dma_alloc_coherent(&ha->pdev->dev,
 			(rsp->length + 1) * sizeof(response_t),
 			&rsp->dma, GFP_KERNEL);
@@ -683,8 +671,8 @@
 	}
 
 	mutex_lock(&ha->vport_lock);
-	que_id = find_first_zero_bit(ha->rsp_qid_map, ha->max_queues);
-	if (que_id >= ha->max_queues) {
+	que_id = find_first_zero_bit(ha->rsp_qid_map, ha->max_rsp_queues);
+	if (que_id >= ha->max_rsp_queues) {
 		mutex_unlock(&ha->vport_lock);
 		qla_printk(KERN_INFO, ha, "No resources to create "
 			 "additional response queue\n");
@@ -708,8 +696,6 @@
 	if (LSB(rsp->rid))
 		options |= BIT_5;
 	rsp->options = options;
-	rsp->ring_ptr = rsp->ring;
-	rsp->ring_index = 0;
 	rsp->id = que_id;
 	reg = ISP_QUE_REG(ha, que_id);
 	rsp->rsp_q_in = &reg->isp25mq.rsp_q_in;
@@ -728,9 +714,14 @@
 		mutex_unlock(&ha->vport_lock);
 		goto que_failed;
 	}
+	if (req >= 0)
+		rsp->req = ha->req_q_map[req];
+	else
+		rsp->req = NULL;
 
 	qla2x00_init_response_q_entries(rsp);
-
+	if (rsp->hw->wq)
+		INIT_WORK(&rsp->q_work, qla_do_work);
 	return rsp->id;
 
 que_failed:
@@ -744,14 +735,16 @@
 	uint16_t options = 0;
 	uint8_t ret = 0;
 	struct qla_hw_data *ha = vha->hw;
+	struct rsp_que *rsp;
 
 	options |= BIT_1;
-	ret = qla25xx_create_rsp_que(ha, options, vha->vp_idx, 0);
+	ret = qla25xx_create_rsp_que(ha, options, vha->vp_idx, 0, -1);
 	if (!ret) {
 		qla_printk(KERN_WARNING, ha, "Response Que create failed\n");
 		return ret;
 	} else
 		qla_printk(KERN_INFO, ha, "Response Que:%d created.\n", ret);
+	rsp = ha->rsp_q_map[ret];
 
 	options = 0;
 	if (qos & BIT_7)
@@ -759,10 +752,11 @@
 	ret = qla25xx_create_req_que(ha, options, vha->vp_idx, 0, ret,
 					qos & ~BIT_7);
 	if (ret) {
-		vha->req_ques[0] = ret;
+		vha->req = ha->req_q_map[ret];
 		qla_printk(KERN_INFO, ha, "Request Que:%d created.\n", ret);
 	} else
 		qla_printk(KERN_WARNING, ha, "Request Que create failed\n");
+	rsp->req = ha->req_q_map[ret];
 
 	return ret;
 }
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index e4fdcda..dcf0116 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -77,6 +77,14 @@
 MODULE_PARM_DESC(ql2xmaxqdepth,
 		"Maximum queue depth to report for target devices.");
 
+int ql2xqfulltracking = 1;
+module_param(ql2xqfulltracking, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(ql2xqfulltracking,
+		"Controls whether the driver tracks queue full status "
+		"returns and dynamically adjusts a scsi device's queue "
+		"depth.  Default is 1, perform tracking.  Set to 0 to "
+		"disable dynamic tracking and adjustment of queue depth.");
+
 int ql2xqfullrampup = 120;
 module_param(ql2xqfullrampup, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(ql2xqfullrampup,
@@ -96,6 +104,23 @@
 		"Enables MQ settings "
 		"Default is 1 for single queue. Set it to number \
 			of queues in MQ mode.");
+
+int ql2xmultique_tag;
+module_param(ql2xmultique_tag, int, S_IRUGO|S_IRUSR);
+MODULE_PARM_DESC(ql2xmultique_tag,
+		"Enables CPU affinity settings for the driver "
+		"Default is 0 for no affinity of request and response IO. "
+		"Set it to 1 to turn on the cpu affinity.");
+
+int ql2xfwloadbin;
+module_param(ql2xfwloadbin, int, S_IRUGO|S_IRUSR);
+MODULE_PARM_DESC(ql2xfwloadbin,
+		"Option to specify location from which to load ISP firmware:\n"
+		" 2 -- load firmware via the request_firmware() (hotplug)\n"
+		"      interface.\n"
+		" 1 -- load firmware from flash.\n"
+		" 0 -- use default semantics.\n");
+
 /*
  * SCSI host template entry points
  */
@@ -187,7 +212,7 @@
 /* -------------------------------------------------------------------------- */
 static int qla2x00_alloc_queues(struct qla_hw_data *ha)
 {
-	ha->req_q_map = kzalloc(sizeof(struct req_que *) * ha->max_queues,
+	ha->req_q_map = kzalloc(sizeof(struct req_que *) * ha->max_req_queues,
 				GFP_KERNEL);
 	if (!ha->req_q_map) {
 		qla_printk(KERN_WARNING, ha,
@@ -195,7 +220,7 @@
 		goto fail_req_map;
 	}
 
-	ha->rsp_q_map = kzalloc(sizeof(struct rsp_que *) * ha->max_queues,
+	ha->rsp_q_map = kzalloc(sizeof(struct rsp_que *) * ha->max_rsp_queues,
 				GFP_KERNEL);
 	if (!ha->rsp_q_map) {
 		qla_printk(KERN_WARNING, ha,
@@ -213,16 +238,8 @@
 	return -ENOMEM;
 }
 
-static void qla2x00_free_que(struct qla_hw_data *ha, struct req_que *req,
-	struct rsp_que *rsp)
+static void qla2x00_free_req_que(struct qla_hw_data *ha, struct req_que *req)
 {
-	if (rsp && rsp->ring)
-		dma_free_coherent(&ha->pdev->dev,
-		(rsp->length + 1) * sizeof(response_t),
-		rsp->ring, rsp->dma);
-
-	kfree(rsp);
-	rsp = NULL;
 	if (req && req->ring)
 		dma_free_coherent(&ha->pdev->dev,
 		(req->length + 1) * sizeof(request_t),
@@ -232,22 +249,77 @@
 	req = NULL;
 }
 
+static void qla2x00_free_rsp_que(struct qla_hw_data *ha, struct rsp_que *rsp)
+{
+	if (rsp && rsp->ring)
+		dma_free_coherent(&ha->pdev->dev,
+		(rsp->length + 1) * sizeof(response_t),
+		rsp->ring, rsp->dma);
+
+	kfree(rsp);
+	rsp = NULL;
+}
+
 static void qla2x00_free_queues(struct qla_hw_data *ha)
 {
 	struct req_que *req;
 	struct rsp_que *rsp;
 	int cnt;
 
-	for (cnt = 0; cnt < ha->max_queues; cnt++) {
-		rsp = ha->rsp_q_map[cnt];
+	for (cnt = 0; cnt < ha->max_req_queues; cnt++) {
 		req = ha->req_q_map[cnt];
-		qla2x00_free_que(ha, req, rsp);
+		qla2x00_free_req_que(ha, req);
+	}
+	kfree(ha->req_q_map);
+	ha->req_q_map = NULL;
+
+	for (cnt = 0; cnt < ha->max_rsp_queues; cnt++) {
+		rsp = ha->rsp_q_map[cnt];
+		qla2x00_free_rsp_que(ha, rsp);
 	}
 	kfree(ha->rsp_q_map);
 	ha->rsp_q_map = NULL;
+}
 
-	kfree(ha->req_q_map);
-	ha->req_q_map = NULL;
+static int qla25xx_setup_mode(struct scsi_qla_host *vha)
+{
+	uint16_t options = 0;
+	int ques, req, ret;
+	struct qla_hw_data *ha = vha->hw;
+
+	if (ql2xmultique_tag) {
+		/* CPU affinity mode */
+		ha->wq = create_workqueue("qla2xxx_wq");
+		/* create a request queue for IO */
+		options |= BIT_7;
+		req = qla25xx_create_req_que(ha, options, 0, 0, -1,
+			QLA_DEFAULT_QUE_QOS);
+		if (!req) {
+			qla_printk(KERN_WARNING, ha,
+				"Can't create request queue\n");
+			goto fail;
+		}
+		vha->req = ha->req_q_map[req];
+		options |= BIT_1;
+		for (ques = 1; ques < ha->max_rsp_queues; ques++) {
+			ret = qla25xx_create_rsp_que(ha, options, 0, 0, req);
+			if (!ret) {
+				qla_printk(KERN_WARNING, ha,
+					"Response Queue create failed\n");
+				goto fail2;
+			}
+		}
+		DEBUG2(qla_printk(KERN_INFO, ha,
+			"CPU affinity mode enabled, no. of response"
+			" queues:%d, no. of request queues:%d\n",
+			ha->max_rsp_queues, ha->max_req_queues));
+	}
+	return 0;
+fail2:
+	qla25xx_delete_queues(vha);
+fail:
+	ha->mqenable = 0;
+	return 1;
 }
 
 static char *
@@ -387,7 +459,6 @@
 
 	sp->fcport = fcport;
 	sp->cmd = cmd;
-	sp->que = ha->req_q_map[0];
 	sp->flags = 0;
 	CMD_SP(cmd) = (void *)sp;
 	cmd->scsi_done = done;
@@ -612,7 +683,7 @@
 void
 qla2x00_abort_fcport_cmds(fc_port_t *fcport)
 {
-	int cnt, que, id;
+	int cnt;
 	unsigned long flags;
 	srb_t *sp;
 	scsi_qla_host_t *vha = fcport->vha;
@@ -620,32 +691,27 @@
 	struct req_que *req;
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	for (que = 0; que < QLA_MAX_HOST_QUES; que++) {
-		id = vha->req_ques[que];
-		req = ha->req_q_map[id];
-		if (!req)
+	req = vha->req;
+	for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) {
+		sp = req->outstanding_cmds[cnt];
+		if (!sp)
 			continue;
-		for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) {
-			sp = req->outstanding_cmds[cnt];
-			if (!sp)
-				continue;
-			if (sp->fcport != fcport)
-				continue;
+		if (sp->fcport != fcport)
+			continue;
 
-			spin_unlock_irqrestore(&ha->hardware_lock, flags);
-			if (ha->isp_ops->abort_command(vha, sp, req)) {
+		spin_unlock_irqrestore(&ha->hardware_lock, flags);
+		if (ha->isp_ops->abort_command(sp)) {
+			DEBUG2(qla_printk(KERN_WARNING, ha,
+			"Abort failed --  %lx\n",
+			sp->cmd->serial_number));
+		} else {
+			if (qla2x00_eh_wait_on_command(sp->cmd) !=
+				QLA_SUCCESS)
 				DEBUG2(qla_printk(KERN_WARNING, ha,
-				"Abort failed --  %lx\n",
+				"Abort failed while waiting --  %lx\n",
 				sp->cmd->serial_number));
-			} else {
-				if (qla2x00_eh_wait_on_command(sp->cmd) !=
-					QLA_SUCCESS)
-					DEBUG2(qla_printk(KERN_WARNING, ha,
-					"Abort failed while waiting --  %lx\n",
-					sp->cmd->serial_number));
-			}
-			spin_lock_irqsave(&ha->hardware_lock, flags);
 		}
+		spin_lock_irqsave(&ha->hardware_lock, flags);
 	}
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 }
@@ -693,7 +759,7 @@
 	unsigned long flags;
 	int wait = 0;
 	struct qla_hw_data *ha = vha->hw;
-	struct req_que *req;
+	struct req_que *req = vha->req;
 	srb_t *spt;
 
 	qla2x00_block_error_handler(cmd);
@@ -709,7 +775,6 @@
 	spt = (srb_t *) CMD_SP(cmd);
 	if (!spt)
 		return SUCCESS;
-	req = spt->que;
 
 	/* Check active list for command command. */
 	spin_lock_irqsave(&ha->hardware_lock, flags);
@@ -726,7 +791,7 @@
 		" pid=%ld.\n", __func__, vha->host_no, sp, serial));
 
 		spin_unlock_irqrestore(&ha->hardware_lock, flags);
-		if (ha->isp_ops->abort_command(vha, sp, req)) {
+		if (ha->isp_ops->abort_command(sp)) {
 			DEBUG2(printk("%s(%ld): abort_command "
 			"mbx failed.\n", __func__, vha->host_no));
 			ret = FAILED;
@@ -777,7 +842,7 @@
 		return status;
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	req = sp->que;
+	req = vha->req;
 	for (cnt = 1; status == QLA_SUCCESS &&
 		cnt < MAX_OUTSTANDING_COMMANDS; cnt++) {
 		sp = req->outstanding_cmds[cnt];
@@ -820,7 +885,7 @@
 
 static int
 __qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type,
-    struct scsi_cmnd *cmd, int (*do_reset)(struct fc_port *, unsigned int))
+    struct scsi_cmnd *cmd, int (*do_reset)(struct fc_port *, unsigned int, int))
 {
 	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
 	fc_port_t *fcport = (struct fc_port *) cmd->device->hostdata;
@@ -841,7 +906,8 @@
 	if (qla2x00_wait_for_loop_ready(vha) != QLA_SUCCESS)
 		goto eh_reset_failed;
 	err = 2;
-	if (do_reset(fcport, cmd->device->lun) != QLA_SUCCESS)
+	if (do_reset(fcport, cmd->device->lun, cmd->request->cpu + 1)
+		!= QLA_SUCCESS)
 		goto eh_reset_failed;
 	err = 3;
 	if (qla2x00_eh_wait_for_pending_commands(vha, cmd->device->id,
@@ -996,6 +1062,9 @@
 		if (qla2x00_vp_abort_isp(vha))
 			goto eh_host_reset_lock;
 	} else {
+		if (ha->wq)
+			flush_workqueue(ha->wq);
+
 		set_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
 		if (qla2x00_abort_isp(base_vha)) {
 			clear_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
@@ -1037,7 +1106,8 @@
 	struct fc_port *fcport;
 	struct qla_hw_data *ha = vha->hw;
 
-	if (ha->flags.enable_lip_full_login && !vha->vp_idx) {
+	if (ha->flags.enable_lip_full_login && !vha->vp_idx &&
+	    !IS_QLA81XX(ha)) {
 		ret = qla2x00_full_login_lip(vha);
 		if (ret != QLA_SUCCESS) {
 			DEBUG2_3(printk("%s(%ld): failed: "
@@ -1064,7 +1134,7 @@
 			if (fcport->port_type != FCT_TARGET)
 				continue;
 
-			ret = ha->isp_ops->target_reset(fcport, 0);
+			ret = ha->isp_ops->target_reset(fcport, 0, 0);
 			if (ret != QLA_SUCCESS) {
 				DEBUG2_3(printk("%s(%ld): bus_reset failed: "
 				    "target_reset=%d d_id=%x.\n", __func__,
@@ -1088,7 +1158,7 @@
 	struct req_que *req;
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	for (que = 0; que < ha->max_queues; que++) {
+	for (que = 0; que < ha->max_req_queues; que++) {
 		req = ha->req_q_map[que];
 		if (!req)
 			continue;
@@ -1123,7 +1193,7 @@
 	scsi_qla_host_t *vha = shost_priv(sdev->host);
 	struct qla_hw_data *ha = vha->hw;
 	struct fc_rport *rport = starget_to_rport(sdev->sdev_target);
-	struct req_que *req = ha->req_q_map[vha->req_ques[0]];
+	struct req_que *req = vha->req;
 
 	if (sdev->tagged_supported)
 		scsi_activate_tcq(sdev, req->max_q_depth);
@@ -1511,6 +1581,13 @@
 		ha->fw_srisc_address = RISC_START_ADDRESS_2400;
 		break;
 	}
+
+	/* Get adapter physical port no from interrupt pin register. */
+	pci_read_config_byte(ha->pdev, PCI_INTERRUPT_PIN, &ha->port_no);
+	if (ha->port_no & 1)
+		ha->flags.port0 = 1;
+	else
+		ha->flags.port0 = 0;
 }
 
 static int
@@ -1518,6 +1595,7 @@
 {
 	resource_size_t pio;
 	uint16_t msix;
+	int cpus;
 
 	if (pci_request_selected_regions(ha->pdev, ha->bars,
 	    QLA2XXX_DRIVER_NAME)) {
@@ -1571,8 +1649,9 @@
 	}
 
 	/* Determine queue resources */
-	ha->max_queues = 1;
-	if (ql2xmaxqueues <= 1 || (!IS_QLA25XX(ha) && !IS_QLA81XX(ha)))
+	ha->max_req_queues = ha->max_rsp_queues = 1;
+	if ((ql2xmaxqueues <= 1 || ql2xmultique_tag < 1) &&
+		(!IS_QLA25XX(ha) && !IS_QLA81XX(ha)))
 		goto mqiobase_exit;
 	ha->mqiobase = ioremap(pci_resource_start(ha->pdev, 3),
 			pci_resource_len(ha->pdev, 3));
@@ -1582,18 +1661,24 @@
 		ha->msix_count = msix;
 		/* Max queues are bounded by available msix vectors */
 		/* queue 0 uses two msix vectors */
-		if (ha->msix_count - 1 < ql2xmaxqueues)
-			ha->max_queues = ha->msix_count - 1;
-		else if (ql2xmaxqueues > QLA_MQ_SIZE)
-			ha->max_queues = QLA_MQ_SIZE;
-		else
-			ha->max_queues = ql2xmaxqueues;
+		if (ql2xmultique_tag) {
+			cpus = num_online_cpus();
+			ha->max_rsp_queues = (ha->msix_count - 1 - cpus) ?
+				(cpus + 1) : (ha->msix_count - 1);
+			ha->max_req_queues = 2;
+		} else if (ql2xmaxqueues > 1) {
+			ha->max_req_queues = ql2xmaxqueues > QLA_MQ_SIZE ?
+						QLA_MQ_SIZE : ql2xmaxqueues;
+			DEBUG2(qla_printk(KERN_INFO, ha, "QoS mode set, max no"
+			" of request queues:%d\n", ha->max_req_queues));
+		}
 		qla_printk(KERN_INFO, ha,
 			"MSI-X vector count: %d\n", msix);
-	}
+	} else
+		qla_printk(KERN_INFO, ha, "BAR 3 not enabled\n");
 
 mqiobase_exit:
-	ha->msix_count = ha->max_queues + 1;
+	ha->msix_count = ha->max_rsp_queues + 1;
 	return (0);
 
 iospace_error_exit:
@@ -1605,6 +1690,9 @@
 {
 	scsi_qla_host_t *vha = shost_priv(shost);
 
+	if (vha->hw->flags.running_gold_fw)
+		return;
+
 	set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
 	set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
 	set_bit(RSCN_UPDATE, &vha->dpc_flags);
@@ -1768,6 +1856,7 @@
 		ha->init_cb_size = sizeof(struct mid_init_cb_81xx);
 		ha->gid_list_info_size = 8;
 		ha->optrom_size = OPTROM_SIZE_81XX;
+		ha->nvram_npiv_size = QLA_MAX_VPORTS_QLA25XX;
 		ha->isp_ops = &qla81xx_isp_ops;
 		ha->flash_conf_off = FARX_ACCESS_FLASH_CONF_81XX;
 		ha->flash_data_off = FARX_ACCESS_FLASH_DATA_81XX;
@@ -1803,14 +1892,15 @@
 
 		ret = -ENOMEM;
 		qla2x00_mem_free(ha);
-		qla2x00_free_que(ha, req, rsp);
+		qla2x00_free_req_que(ha, req);
+		qla2x00_free_rsp_que(ha, rsp);
 		goto probe_hw_failed;
 	}
 
 	pci_set_drvdata(pdev, base_vha);
 
 	host = base_vha->host;
-	base_vha->req_ques[0] = req->id;
+	base_vha->req = req;
 	host->can_queue = req->length + 128;
 	if (IS_QLA2XXX_MIDTYPE(ha))
 		base_vha->mgmt_svr_loop_id = 10 + base_vha->vp_idx;
@@ -1841,7 +1931,10 @@
 	}
 	ha->rsp_q_map[0] = rsp;
 	ha->req_q_map[0] = req;
-
+	rsp->req = req;
+	req->rsp = rsp;
+	set_bit(0, ha->req_qid_map);
+	set_bit(0, ha->rsp_qid_map);
 	/* FWI2-capable only. */
 	req->req_q_in = &ha->iobase->isp24.req_q_in;
 	req->req_q_out = &ha->iobase->isp24.req_q_out;
@@ -1866,6 +1959,15 @@
 		goto probe_failed;
 	}
 
+	if (ha->mqenable)
+		if (qla25xx_setup_mode(base_vha))
+			qla_printk(KERN_WARNING, ha,
+				"Can't create queues, falling back to single"
+				" queue mode\n");
+
+	if (ha->flags.running_gold_fw)
+		goto skip_dpc;
+
 	/*
 	 * Startup the kernel thread for this host adapter
 	 */
@@ -1878,6 +1980,7 @@
 		goto probe_failed;
 	}
 
+skip_dpc:
 	list_add_tail(&base_vha->list, &ha->vp_list);
 	base_vha->host->irq = ha->pdev->irq;
 
@@ -1917,8 +2020,9 @@
 	return 0;
 
 probe_init_failed:
-	qla2x00_free_que(ha, req, rsp);
-	ha->max_queues = 0;
+	qla2x00_free_req_que(ha, req);
+	qla2x00_free_rsp_que(ha, rsp);
+	ha->max_req_queues = ha->max_rsp_queues = 0;
 
 probe_failed:
 	if (base_vha->timer_active)
@@ -1976,6 +2080,13 @@
 
 	base_vha->flags.online = 0;
 
+	/* Flush the work queue and remove it */
+	if (ha->wq) {
+		flush_workqueue(ha->wq);
+		destroy_workqueue(ha->wq);
+		ha->wq = NULL;
+	}
+
 	/* Kill the kernel thread for this host */
 	if (ha->dpc_thread) {
 		struct task_struct *t = ha->dpc_thread;
@@ -2017,6 +2128,8 @@
 {
 	struct qla_hw_data *ha = vha->hw;
 
+	qla25xx_delete_queues(vha);
+
 	if (ha->flags.fce_enabled)
 		qla2x00_disable_fce_trace(vha, NULL, NULL);
 
@@ -2329,6 +2442,14 @@
 		vfree(ha->fw_dump);
 	}
 
+	if (ha->dcbx_tlv)
+		dma_free_coherent(&ha->pdev->dev, DCBX_TLV_DATA_SIZE,
+		    ha->dcbx_tlv, ha->dcbx_tlv_dma);
+
+	if (ha->xgmac_data)
+		dma_free_coherent(&ha->pdev->dev, XGMAC_DATA_SIZE,
+		    ha->xgmac_data, ha->xgmac_data_dma);
+
 	if (ha->sns_cmd)
 		dma_free_coherent(&ha->pdev->dev, sizeof(struct sns_cmd_pkt),
 		ha->sns_cmd, ha->sns_cmd_dma);
@@ -2412,6 +2533,8 @@
 	INIT_LIST_HEAD(&vha->work_list);
 	INIT_LIST_HEAD(&vha->list);
 
+	spin_lock_init(&vha->work_lock);
+
 	sprintf(vha->host_str, "%s_%ld", QLA2XXX_DRIVER_NAME, vha->host_no);
 	return vha;
 
@@ -2420,13 +2543,11 @@
 }
 
 static struct qla_work_evt *
-qla2x00_alloc_work(struct scsi_qla_host *vha, enum qla_work_type type,
-    int locked)
+qla2x00_alloc_work(struct scsi_qla_host *vha, enum qla_work_type type)
 {
 	struct qla_work_evt *e;
 
-	e = kzalloc(sizeof(struct qla_work_evt), locked ? GFP_ATOMIC:
-	    GFP_KERNEL);
+	e = kzalloc(sizeof(struct qla_work_evt), GFP_ATOMIC);
 	if (!e)
 		return NULL;
 
@@ -2437,17 +2558,15 @@
 }
 
 static int
-qla2x00_post_work(struct scsi_qla_host *vha, struct qla_work_evt *e, int locked)
+qla2x00_post_work(struct scsi_qla_host *vha, struct qla_work_evt *e)
 {
-	unsigned long uninitialized_var(flags);
-	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags;
 
-	if (!locked)
-		spin_lock_irqsave(&ha->hardware_lock, flags);
+	spin_lock_irqsave(&vha->work_lock, flags);
 	list_add_tail(&e->list, &vha->work_list);
+	spin_unlock_irqrestore(&vha->work_lock, flags);
 	qla2xxx_wake_dpc(vha);
-	if (!locked)
-		spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
 	return QLA_SUCCESS;
 }
 
@@ -2457,13 +2576,13 @@
 {
 	struct qla_work_evt *e;
 
-	e = qla2x00_alloc_work(vha, QLA_EVT_AEN, 1);
+	e = qla2x00_alloc_work(vha, QLA_EVT_AEN);
 	if (!e)
 		return QLA_FUNCTION_FAILED;
 
 	e->u.aen.code = code;
 	e->u.aen.data = data;
-	return qla2x00_post_work(vha, e, 1);
+	return qla2x00_post_work(vha, e);
 }
 
 int
@@ -2471,25 +2590,27 @@
 {
 	struct qla_work_evt *e;
 
-	e = qla2x00_alloc_work(vha, QLA_EVT_IDC_ACK, 1);
+	e = qla2x00_alloc_work(vha, QLA_EVT_IDC_ACK);
 	if (!e)
 		return QLA_FUNCTION_FAILED;
 
 	memcpy(e->u.idc_ack.mb, mb, QLA_IDC_ACK_REGS * sizeof(uint16_t));
-	return qla2x00_post_work(vha, e, 1);
+	return qla2x00_post_work(vha, e);
 }
 
 static void
 qla2x00_do_work(struct scsi_qla_host *vha)
 {
-	struct qla_work_evt *e;
-	struct qla_hw_data *ha = vha->hw;
+	struct qla_work_evt *e, *tmp;
+	unsigned long flags;
+	LIST_HEAD(work);
 
-	spin_lock_irq(&ha->hardware_lock);
-	while (!list_empty(&vha->work_list)) {
-		e = list_entry(vha->work_list.next, struct qla_work_evt, list);
+	spin_lock_irqsave(&vha->work_lock, flags);
+	list_splice_init(&vha->work_list, &work);
+	spin_unlock_irqrestore(&vha->work_lock, flags);
+
+	list_for_each_entry_safe(e, tmp, &work, list) {
 		list_del_init(&e->list);
-		spin_unlock_irq(&ha->hardware_lock);
 
 		switch (e->type) {
 		case QLA_EVT_AEN:
@@ -2502,10 +2623,9 @@
 		}
 		if (e->flags & QLA_EVT_FLAG_FREE)
 			kfree(e);
-		spin_lock_irq(&ha->hardware_lock);
 	}
-	spin_unlock_irq(&ha->hardware_lock);
 }
+
 /* Relogins all the fcports of a vport
  * Context: dpc thread
  */
diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index 152ecfc..6260505 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c
@@ -219,8 +219,8 @@
 	wait_cnt = NVR_WAIT_CNT;
 	do {
 		if (!--wait_cnt) {
-			DEBUG9_10(printk("%s(%ld): NVRAM didn't go ready...\n",
-			    __func__, vha->host_no));
+			DEBUG9_10(qla_printk(KERN_WARNING, ha,
+			    "NVRAM didn't go ready...\n"));
 			break;
 		}
 		NVRAM_DELAY();
@@ -349,7 +349,7 @@
 		wait_cnt = NVR_WAIT_CNT;
 		do {
 			if (!--wait_cnt) {
-				DEBUG9_10(qla_printk(
+				DEBUG9_10(qla_printk(KERN_WARNING, ha,
 				    "NVRAM didn't go ready...\n"));
 				break;
 			}
@@ -408,7 +408,8 @@
 	wait_cnt = NVR_WAIT_CNT;
 	do {
 		if (!--wait_cnt) {
-			DEBUG9_10(qla_printk("NVRAM didn't go ready...\n"));
+			DEBUG9_10(qla_printk(KERN_WARNING, ha,
+			    "NVRAM didn't go ready...\n"));
 			break;
 		}
 		NVRAM_DELAY();
@@ -701,32 +702,35 @@
 			break;
 		case FLT_REG_VPD_0:
 			ha->flt_region_vpd_nvram = start;
-			if (!(PCI_FUNC(ha->pdev->devfn) & 1))
+			if (ha->flags.port0)
 				ha->flt_region_vpd = start;
 			break;
 		case FLT_REG_VPD_1:
-			if (PCI_FUNC(ha->pdev->devfn) & 1)
+			if (!ha->flags.port0)
 				ha->flt_region_vpd = start;
 			break;
 		case FLT_REG_NVRAM_0:
-			if (!(PCI_FUNC(ha->pdev->devfn) & 1))
+			if (ha->flags.port0)
 				ha->flt_region_nvram = start;
 			break;
 		case FLT_REG_NVRAM_1:
-			if (PCI_FUNC(ha->pdev->devfn) & 1)
+			if (!ha->flags.port0)
 				ha->flt_region_nvram = start;
 			break;
 		case FLT_REG_FDT:
 			ha->flt_region_fdt = start;
 			break;
 		case FLT_REG_NPIV_CONF_0:
-			if (!(PCI_FUNC(ha->pdev->devfn) & 1))
+			if (ha->flags.port0)
 				ha->flt_region_npiv_conf = start;
 			break;
 		case FLT_REG_NPIV_CONF_1:
-			if (PCI_FUNC(ha->pdev->devfn) & 1)
+			if (!ha->flags.port0)
 				ha->flt_region_npiv_conf = start;
 			break;
+		case FLT_REG_GOLD_FW:
+			ha->flt_region_gold_fw = start;
+			break;
 		}
 	}
 	goto done;
@@ -744,12 +748,12 @@
 	ha->flt_region_fw = def_fw[def];
 	ha->flt_region_boot = def_boot[def];
 	ha->flt_region_vpd_nvram = def_vpd_nvram[def];
-	ha->flt_region_vpd = !(PCI_FUNC(ha->pdev->devfn) & 1) ?
+	ha->flt_region_vpd = ha->flags.port0 ?
 	    def_vpd0[def]: def_vpd1[def];
-	ha->flt_region_nvram = !(PCI_FUNC(ha->pdev->devfn) & 1) ?
+	ha->flt_region_nvram = ha->flags.port0 ?
 	    def_nvram0[def]: def_nvram1[def];
 	ha->flt_region_fdt = def_fdt[def];
-	ha->flt_region_npiv_conf = !(PCI_FUNC(ha->pdev->devfn) & 1) ?
+	ha->flt_region_npiv_conf = ha->flags.port0 ?
 	    def_npiv_conf0[def]: def_npiv_conf1[def];
 done:
 	DEBUG2(qla_printk(KERN_DEBUG, ha, "FLT[%s]: boot=0x%x fw=0x%x "
@@ -924,6 +928,8 @@
 		struct fc_vport_identifiers vid;
 		struct fc_vport *vport;
 
+		memcpy(&ha->npiv_info[i], entry, sizeof(struct qla_npiv_entry));
+
 		flags = le16_to_cpu(entry->flags);
 		if (flags == 0xffff)
 			continue;
@@ -937,9 +943,7 @@
 		vid.port_name = wwn_to_u64(entry->port_name);
 		vid.node_name = wwn_to_u64(entry->node_name);
 
-		memcpy(&ha->npiv_info[i], entry, sizeof(struct qla_npiv_entry));
-
-		DEBUG2(qla_printk(KERN_DEBUG, ha, "NPIV[%02x]: wwpn=%llx "
+		DEBUG2(qla_printk(KERN_INFO, ha, "NPIV[%02x]: wwpn=%llx "
 			"wwnn=%llx vf_id=0x%x Q_qos=0x%x F_qos=0x%x.\n", cnt,
 			vid.port_name, vid.node_name, le16_to_cpu(entry->vf_id),
 			entry->q_qos, entry->f_qos));
@@ -955,7 +959,6 @@
 	}
 done:
 	kfree(data);
-	ha->npiv_info = NULL;
 }
 
 static int
@@ -1079,8 +1082,9 @@
 				    0xff0000) | ((fdata >> 16) & 0xff));
 			ret = qla24xx_erase_sector(vha, fdata);
 			if (ret != QLA_SUCCESS) {
-				DEBUG9(qla_printk("Unable to erase sector: "
-				    "address=%x.\n", faddr));
+				DEBUG9(qla_printk(KERN_WARNING, ha,
+				    "Unable to erase sector: address=%x.\n",
+				    faddr));
 				break;
 			}
 		}
@@ -1240,8 +1244,9 @@
 		ret = qla24xx_write_flash_dword(ha,
 		    nvram_data_addr(ha, naddr), cpu_to_le32(*dwptr));
 		if (ret != QLA_SUCCESS) {
-			DEBUG9(qla_printk("Unable to program nvram address=%x "
-			    "data=%x.\n", naddr, *dwptr));
+			DEBUG9(qla_printk(KERN_WARNING, ha,
+			    "Unable to program nvram address=%x data=%x.\n",
+			    naddr, *dwptr));
 			break;
 		}
 	}
diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index 19d1afc..b63feaf 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -7,7 +7,7 @@
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "8.03.01-k1"
+#define QLA2XXX_VERSION      "8.03.01-k3"
 
 #define QLA_DRIVER_MAJOR_VER	8
 #define QLA_DRIVER_MINOR_VER	3
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 166417a..2de5f3a 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -1225,8 +1225,8 @@
  * @starget:	SCSI target pointer
  * @lun:	SCSI Logical Unit Number
  *
- * Description: Looks up the scsi_device with the specified @channel, @id, @lun
- * for a given host.  The returned scsi_device has an additional reference that
+ * Description: Looks up the scsi_device with the specified @lun for a given
+ * @starget.  The returned scsi_device has an additional reference that
  * needs to be released with scsi_device_put once you're done with it.
  **/
 struct scsi_device *scsi_device_lookup_by_target(struct scsi_target *starget,
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 213123b..41a2177 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -887,7 +887,7 @@
 static sector_t get_sdebug_capacity(void)
 {
 	if (scsi_debug_virtual_gb > 0)
-		return 2048 * 1024 * scsi_debug_virtual_gb;
+		return 2048 * 1024 * (sector_t)scsi_debug_virtual_gb;
 	else
 		return sdebug_store_sectors;
 }
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 0c2c73b..a168935 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -641,9 +641,9 @@
 /**
  * scsi_eh_restore_cmnd  - Restore a scsi command info as part of error recory
  * @scmd:       SCSI command structure to restore
- * @ses:        saved information from a coresponding call to scsi_prep_eh_cmnd
+ * @ses:        saved information from a coresponding call to scsi_eh_prep_cmnd
  *
- * Undo any damage done by above scsi_prep_eh_cmnd().
+ * Undo any damage done by above scsi_eh_prep_cmnd().
  */
 void scsi_eh_restore_cmnd(struct scsi_cmnd* scmd, struct scsi_eh_save *ses)
 {
@@ -1451,28 +1451,21 @@
  * @sdev:	SCSI device to prevent medium removal
  *
  * Locking:
- * 	We must be called from process context; scsi_allocate_request()
- * 	may sleep.
+ * 	We must be called from process context.
  *
  * Notes:
  * 	We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the
  * 	head of the devices request queue, and continue.
- *
- * Bugs:
- * 	scsi_allocate_request() may sleep waiting for existing requests to
- * 	be processed.  However, since we haven't kicked off any request
- * 	processing for this host, this may deadlock.
- *
- *	If scsi_allocate_request() fails for what ever reason, we
- *	completely forget to lock the door.
  */
 static void scsi_eh_lock_door(struct scsi_device *sdev)
 {
 	struct request *req;
 
+	/*
+	 * blk_get_request with GFP_KERNEL (__GFP_WAIT) sleeps until a
+	 * request becomes available
+	 */
 	req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL);
-	if (!req)
-		return;
 
 	req->cmd[0] = ALLOW_MEDIUM_REMOVAL;
 	req->cmd[1] = 0;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index bb218c8..30f3275 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -240,11 +240,11 @@
 	 * is invalid.  Prevent the garbage from being misinterpreted
 	 * and prevent security leaks by zeroing out the excess data.
 	 */
-	if (unlikely(req->data_len > 0 && req->data_len <= bufflen))
-		memset(buffer + (bufflen - req->data_len), 0, req->data_len);
+	if (unlikely(req->resid_len > 0 && req->resid_len <= bufflen))
+		memset(buffer + (bufflen - req->resid_len), 0, req->resid_len);
 
 	if (resid)
-		*resid = req->data_len;
+		*resid = req->resid_len;
 	ret = req->errors;
  out:
 	blk_put_request(req);
@@ -546,14 +546,9 @@
 	 * to queue the remainder of them.
 	 */
 	if (blk_end_request(req, error, bytes)) {
-		int leftover = (req->hard_nr_sectors << 9);
-
-		if (blk_pc_request(req))
-			leftover = req->data_len;
-
 		/* kill remainder if no retrys */
 		if (error && scsi_noretry_cmd(cmd))
-			blk_end_request(req, error, leftover);
+			blk_end_request_all(req, error);
 		else {
 			if (requeue) {
 				/*
@@ -673,34 +668,6 @@
 EXPORT_SYMBOL(scsi_release_buffers);
 
 /*
- * Bidi commands Must be complete as a whole, both sides at once.
- * If part of the bytes were written and lld returned
- * scsi_in()->resid and/or scsi_out()->resid this information will be left
- * in req->data_len and req->next_rq->data_len. The upper-layer driver can
- * decide what to do with this information.
- */
-static void scsi_end_bidi_request(struct scsi_cmnd *cmd)
-{
-	struct request *req = cmd->request;
-	unsigned int dlen = req->data_len;
-	unsigned int next_dlen = req->next_rq->data_len;
-
-	req->data_len = scsi_out(cmd)->resid;
-	req->next_rq->data_len = scsi_in(cmd)->resid;
-
-	/* The req and req->next_rq have not been completed */
-	BUG_ON(blk_end_bidi_request(req, 0, dlen, next_dlen));
-
-	scsi_release_buffers(cmd);
-
-	/*
-	 * This will goose the queue request function at the end, so we don't
-	 * need to worry about launching another command.
-	 */
-	scsi_next_command(cmd);
-}
-
-/*
  * Function:    scsi_io_completion()
  *
  * Purpose:     Completion processing for block device I/O requests.
@@ -739,7 +706,6 @@
 void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 {
 	int result = cmd->result;
-	int this_count;
 	struct request_queue *q = cmd->device->request_queue;
 	struct request *req = cmd->request;
 	int error = 0;
@@ -773,12 +739,22 @@
 			if (!sense_deferred)
 				error = -EIO;
 		}
+
+		req->resid_len = scsi_get_resid(cmd);
+
 		if (scsi_bidi_cmnd(cmd)) {
-			/* will also release_buffers */
-			scsi_end_bidi_request(cmd);
+			/*
+			 * Bidi commands Must be complete as a whole,
+			 * both sides at once.
+			 */
+			req->next_rq->resid_len = scsi_in(cmd)->resid;
+
+			blk_end_request_all(req, 0);
+
+			scsi_release_buffers(cmd);
+			scsi_next_command(cmd);
 			return;
 		}
-		req->data_len = scsi_get_resid(cmd);
 	}
 
 	BUG_ON(blk_bidi_rq(req)); /* bidi not support for !blk_pc_request yet */
@@ -787,9 +763,9 @@
 	 * Next deal with any sectors which we were able to correctly
 	 * handle.
 	 */
-	SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, "
+	SCSI_LOG_HLCOMPLETE(1, printk("%u sectors total, "
 				      "%d bytes done.\n",
-				      req->nr_sectors, good_bytes));
+				      blk_rq_sectors(req), good_bytes));
 
 	/*
 	 * Recovered errors need reporting, but they're always treated
@@ -812,7 +788,6 @@
 	 */
 	if (scsi_end_request(cmd, error, good_bytes, result == 0) == NULL)
 		return;
-	this_count = blk_rq_bytes(req);
 
 	error = -EIO;
 
@@ -922,7 +897,7 @@
 			if (driver_byte(result) & DRIVER_SENSE)
 				scsi_print_sense("", cmd);
 		}
-		blk_end_request(req, -EIO, blk_rq_bytes(req));
+		blk_end_request_all(req, -EIO);
 		scsi_next_command(cmd);
 		break;
 	case ACTION_REPREP:
@@ -965,10 +940,7 @@
 	count = blk_rq_map_sg(req->q, req, sdb->table.sgl);
 	BUG_ON(count > sdb->table.nents);
 	sdb->table.nents = count;
-	if (blk_pc_request(req))
-		sdb->length = req->data_len;
-	else
-		sdb->length = req->nr_sectors << 9;
+	sdb->length = blk_rq_bytes(req);
 	return BLKPREP_OK;
 }
 
@@ -1087,22 +1059,21 @@
 		if (unlikely(ret))
 			return ret;
 	} else {
-		BUG_ON(req->data_len);
-		BUG_ON(req->data);
+		BUG_ON(blk_rq_bytes(req));
 
 		memset(&cmd->sdb, 0, sizeof(cmd->sdb));
 		req->buffer = NULL;
 	}
 
 	cmd->cmd_len = req->cmd_len;
-	if (!req->data_len)
+	if (!blk_rq_bytes(req))
 		cmd->sc_data_direction = DMA_NONE;
 	else if (rq_data_dir(req) == WRITE)
 		cmd->sc_data_direction = DMA_TO_DEVICE;
 	else
 		cmd->sc_data_direction = DMA_FROM_DEVICE;
 	
-	cmd->transfersize = req->data_len;
+	cmd->transfersize = blk_rq_bytes(req);
 	cmd->allowed = req->retries;
 	return BLKPREP_OK;
 }
@@ -1212,7 +1183,7 @@
 		break;
 	case BLKPREP_DEFER:
 		/*
-		 * If we defer, the elv_next_request() returns NULL, but the
+		 * If we defer, the blk_peek_request() returns NULL, but the
 		 * queue must be restarted, so we plug here if no returning
 		 * command will automatically do that.
 		 */
@@ -1388,7 +1359,7 @@
 	struct scsi_target *starget = scsi_target(sdev);
 	struct Scsi_Host *shost = sdev->host;
 
-	blkdev_dequeue_request(req);
+	blk_start_request(req);
 
 	if (unlikely(cmd == NULL)) {
 		printk(KERN_CRIT "impossible request in %s.\n",
@@ -1480,7 +1451,7 @@
 
 	if (!sdev) {
 		printk("scsi: killing requests for dead queue\n");
-		while ((req = elv_next_request(q)) != NULL)
+		while ((req = blk_peek_request(q)) != NULL)
 			scsi_kill_request(req, q);
 		return;
 	}
@@ -1501,7 +1472,7 @@
 		 * that the request is fully prepared even if we cannot 
 		 * accept it.
 		 */
-		req = elv_next_request(q);
+		req = blk_peek_request(q);
 		if (!req || !scsi_dev_queue_ready(q, sdev))
 			break;
 
@@ -1517,7 +1488,7 @@
 		 * Remove the request from the request list.
 		 */
 		if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
-			blkdev_dequeue_request(req);
+			blk_start_request(req);
 		sdev->device_busy++;
 
 		spin_unlock(q->queue_lock);
@@ -2441,20 +2412,18 @@
 scsi_internal_device_unblock(struct scsi_device *sdev)
 {
 	struct request_queue *q = sdev->request_queue; 
-	int err;
 	unsigned long flags;
 	
 	/* 
 	 * Try to transition the scsi device to SDEV_RUNNING
 	 * and goose the device queue if successful.  
 	 */
-	err = scsi_device_set_state(sdev, SDEV_RUNNING);
-	if (err) {
-		err = scsi_device_set_state(sdev, SDEV_CREATED);
-
-		if (err)
-			return err;
-	}
+	if (sdev->sdev_state == SDEV_BLOCK)
+		sdev->sdev_state = SDEV_RUNNING;
+	else if (sdev->sdev_state == SDEV_CREATED_BLOCK)
+		sdev->sdev_state = SDEV_CREATED;
+	else
+		return -EINVAL;
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	blk_start_queue(q);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index e2b50d8..c447838 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -115,12 +115,12 @@
 		 "REPORT LUNS maximum number of LUNS received (should be"
 		 " between 1 and 16384)");
 
-static unsigned int scsi_inq_timeout = SCSI_TIMEOUT/HZ+3;
+static unsigned int scsi_inq_timeout = SCSI_TIMEOUT/HZ + 18;
 
 module_param_named(inq_timeout, scsi_inq_timeout, uint, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(inq_timeout, 
 		 "Timeout (in seconds) waiting for devices to answer INQUIRY."
-		 " Default is 5. Some non-compliant devices need more.");
+		 " Default is 20. Some devices may need more; most need less.");
 
 /* This lock protects only this list */
 static DEFINE_SPINLOCK(async_scan_lock);
diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c
index 48ba413..1030327 100644
--- a/drivers/scsi/scsi_tgt_lib.c
+++ b/drivers/scsi/scsi_tgt_lib.c
@@ -387,7 +387,7 @@
 	 * we use REQ_TYPE_BLOCK_PC so scsi_init_io doesn't set the
 	 * length for us.
 	 */
-	cmd->sdb.length = rq->data_len;
+	cmd->sdb.length = blk_rq_bytes(rq);
 
 	return 0;
 
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 0a2ce7b..f3e6646 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -37,7 +37,6 @@
 #define ISCSI_TRANSPORT_VERSION "2.0-870"
 
 struct iscsi_internal {
-	int daemon_pid;
 	struct scsi_transport_template t;
 	struct iscsi_transport *iscsi_transport;
 	struct list_head list;
@@ -938,23 +937,9 @@
 }
 
 static int
-iscsi_broadcast_skb(struct sk_buff *skb, gfp_t gfp)
+iscsi_multicast_skb(struct sk_buff *skb, uint32_t group, gfp_t gfp)
 {
-	return netlink_broadcast(nls, skb, 0, 1, gfp);
-}
-
-static int
-iscsi_unicast_skb(struct sk_buff *skb, int pid)
-{
-	int rc;
-
-	rc = netlink_unicast(nls, skb, pid, MSG_DONTWAIT);
-	if (rc < 0) {
-		printk(KERN_ERR "iscsi: can not unicast skb (%d)\n", rc);
-		return rc;
-	}
-
-	return 0;
+	return nlmsg_multicast(nls, skb, 0, group, gfp);
 }
 
 int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
@@ -980,7 +965,7 @@
 		return -ENOMEM;
 	}
 
-	nlh = __nlmsg_put(skb, priv->daemon_pid, 0, 0, (len - sizeof(*nlh)), 0);
+	nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0);
 	ev = NLMSG_DATA(nlh);
 	memset(ev, 0, sizeof(*ev));
 	ev->transport_handle = iscsi_handle(conn->transport);
@@ -991,10 +976,45 @@
 	memcpy(pdu, hdr, sizeof(struct iscsi_hdr));
 	memcpy(pdu + sizeof(struct iscsi_hdr), data, data_size);
 
-	return iscsi_unicast_skb(skb, priv->daemon_pid);
+	return iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_ATOMIC);
 }
 EXPORT_SYMBOL_GPL(iscsi_recv_pdu);
 
+int iscsi_offload_mesg(struct Scsi_Host *shost,
+		       struct iscsi_transport *transport, uint32_t type,
+		       char *data, uint16_t data_size)
+{
+	struct nlmsghdr	*nlh;
+	struct sk_buff *skb;
+	struct iscsi_uevent *ev;
+	int len = NLMSG_SPACE(sizeof(*ev) + data_size);
+
+	skb = alloc_skb(len, GFP_NOIO);
+	if (!skb) {
+		printk(KERN_ERR "can not deliver iscsi offload message:OOM\n");
+		return -ENOMEM;
+	}
+
+	nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0);
+	ev = NLMSG_DATA(nlh);
+	memset(ev, 0, sizeof(*ev));
+	ev->type = type;
+	ev->transport_handle = iscsi_handle(transport);
+	switch (type) {
+	case ISCSI_KEVENT_PATH_REQ:
+		ev->r.req_path.host_no = shost->host_no;
+		break;
+	case ISCSI_KEVENT_IF_DOWN:
+		ev->r.notify_if_down.host_no = shost->host_no;
+		break;
+	}
+
+	memcpy((char *)ev + sizeof(*ev), data, data_size);
+
+	return iscsi_multicast_skb(skb, ISCSI_NL_GRP_UIP, GFP_NOIO);
+}
+EXPORT_SYMBOL_GPL(iscsi_offload_mesg);
+
 void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
 {
 	struct nlmsghdr	*nlh;
@@ -1014,7 +1034,7 @@
 		return;
 	}
 
-	nlh = __nlmsg_put(skb, priv->daemon_pid, 0, 0, (len - sizeof(*nlh)), 0);
+	nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0);
 	ev = NLMSG_DATA(nlh);
 	ev->transport_handle = iscsi_handle(conn->transport);
 	ev->type = ISCSI_KEVENT_CONN_ERROR;
@@ -1022,7 +1042,7 @@
 	ev->r.connerror.cid = conn->cid;
 	ev->r.connerror.sid = iscsi_conn_get_sid(conn);
 
-	iscsi_broadcast_skb(skb, GFP_ATOMIC);
+	iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_ATOMIC);
 
 	iscsi_cls_conn_printk(KERN_INFO, conn, "detected conn error (%d)\n",
 			      error);
@@ -1030,8 +1050,8 @@
 EXPORT_SYMBOL_GPL(iscsi_conn_error_event);
 
 static int
-iscsi_if_send_reply(int pid, int seq, int type, int done, int multi,
-		      void *payload, int size)
+iscsi_if_send_reply(uint32_t group, int seq, int type, int done, int multi,
+		    void *payload, int size)
 {
 	struct sk_buff	*skb;
 	struct nlmsghdr	*nlh;
@@ -1045,10 +1065,10 @@
 		return -ENOMEM;
 	}
 
-	nlh = __nlmsg_put(skb, pid, seq, t, (len - sizeof(*nlh)), 0);
+	nlh = __nlmsg_put(skb, 0, 0, t, (len - sizeof(*nlh)), 0);
 	nlh->nlmsg_flags = flags;
 	memcpy(NLMSG_DATA(nlh), payload, size);
-	return iscsi_unicast_skb(skb, pid);
+	return iscsi_multicast_skb(skb, group, GFP_ATOMIC);
 }
 
 static int
@@ -1085,7 +1105,7 @@
 			return -ENOMEM;
 		}
 
-		nlhstat = __nlmsg_put(skbstat, priv->daemon_pid, 0, 0,
+		nlhstat = __nlmsg_put(skbstat, 0, 0, 0,
 				      (len - sizeof(*nlhstat)), 0);
 		evstat = NLMSG_DATA(nlhstat);
 		memset(evstat, 0, sizeof(*evstat));
@@ -1109,7 +1129,8 @@
 		skb_trim(skbstat, NLMSG_ALIGN(actual_size));
 		nlhstat->nlmsg_len = actual_size;
 
-		err = iscsi_unicast_skb(skbstat, priv->daemon_pid);
+		err = iscsi_multicast_skb(skbstat, ISCSI_NL_GRP_ISCSID,
+					  GFP_ATOMIC);
 	} while (err < 0 && err != -ECONNREFUSED);
 
 	return err;
@@ -1143,7 +1164,7 @@
 		return -ENOMEM;
 	}
 
-	nlh = __nlmsg_put(skb, priv->daemon_pid, 0, 0, (len - sizeof(*nlh)), 0);
+	nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0);
 	ev = NLMSG_DATA(nlh);
 	ev->transport_handle = iscsi_handle(session->transport);
 
@@ -1172,7 +1193,7 @@
 	 * this will occur if the daemon is not up, so we just warn
 	 * the user and when the daemon is restarted it will handle it
 	 */
-	rc = iscsi_broadcast_skb(skb, GFP_KERNEL);
+	rc = iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_KERNEL);
 	if (rc == -ESRCH)
 		iscsi_cls_session_printk(KERN_ERR, session,
 					 "Cannot notify userspace of session "
@@ -1268,26 +1289,54 @@
 	return err;
 }
 
+static int iscsi_if_ep_connect(struct iscsi_transport *transport,
+			       struct iscsi_uevent *ev, int msg_type)
+{
+	struct iscsi_endpoint *ep;
+	struct sockaddr *dst_addr;
+	struct Scsi_Host *shost = NULL;
+	int non_blocking, err = 0;
+
+	if (!transport->ep_connect)
+		return -EINVAL;
+
+	if (msg_type == ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST) {
+		shost = scsi_host_lookup(ev->u.ep_connect_through_host.host_no);
+		if (!shost) {
+			printk(KERN_ERR "ep connect failed. Could not find "
+			       "host no %u\n",
+			       ev->u.ep_connect_through_host.host_no);
+			return -ENODEV;
+		}
+		non_blocking = ev->u.ep_connect_through_host.non_blocking;
+	} else
+		non_blocking = ev->u.ep_connect.non_blocking;
+
+	dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev));
+	ep = transport->ep_connect(shost, dst_addr, non_blocking);
+	if (IS_ERR(ep)) {
+		err = PTR_ERR(ep);
+		goto release_host;
+	}
+
+	ev->r.ep_connect_ret.handle = ep->id;
+release_host:
+	if (shost)
+		scsi_host_put(shost);
+	return err;
+}
+
 static int
 iscsi_if_transport_ep(struct iscsi_transport *transport,
 		      struct iscsi_uevent *ev, int msg_type)
 {
 	struct iscsi_endpoint *ep;
-	struct sockaddr *dst_addr;
 	int rc = 0;
 
 	switch (msg_type) {
+	case ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST:
 	case ISCSI_UEVENT_TRANSPORT_EP_CONNECT:
-		if (!transport->ep_connect)
-			return -EINVAL;
-
-		dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev));
-		ep = transport->ep_connect(dst_addr,
-					   ev->u.ep_connect.non_blocking);
-		if (IS_ERR(ep))
-			return PTR_ERR(ep);
-
-		ev->r.ep_connect_ret.handle = ep->id;
+		rc = iscsi_if_ep_connect(transport, ev, msg_type);
 		break;
 	case ISCSI_UEVENT_TRANSPORT_EP_POLL:
 		if (!transport->ep_poll)
@@ -1365,7 +1414,31 @@
 }
 
 static int
-iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+iscsi_set_path(struct iscsi_transport *transport, struct iscsi_uevent *ev)
+{
+	struct Scsi_Host *shost;
+	struct iscsi_path *params;
+	int err;
+
+	if (!transport->set_path)
+		return -ENOSYS;
+
+	shost = scsi_host_lookup(ev->u.set_path.host_no);
+	if (!shost) {
+		printk(KERN_ERR "set path could not find host no %u\n",
+		       ev->u.set_path.host_no);
+		return -ENODEV;
+	}
+
+	params = (struct iscsi_path *)((char *)ev + sizeof(*ev));
+	err = transport->set_path(shost, params);
+
+	scsi_host_put(shost);
+	return err;
+}
+
+static int
+iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
 {
 	int err = 0;
 	struct iscsi_uevent *ev = NLMSG_DATA(nlh);
@@ -1375,6 +1448,11 @@
 	struct iscsi_cls_conn *conn;
 	struct iscsi_endpoint *ep = NULL;
 
+	if (nlh->nlmsg_type == ISCSI_UEVENT_PATH_UPDATE)
+		*group = ISCSI_NL_GRP_UIP;
+	else
+		*group = ISCSI_NL_GRP_ISCSID;
+
 	priv = iscsi_if_transport_lookup(iscsi_ptr(ev->transport_handle));
 	if (!priv)
 		return -EINVAL;
@@ -1383,8 +1461,6 @@
 	if (!try_module_get(transport->owner))
 		return -EINVAL;
 
-	priv->daemon_pid = NETLINK_CREDS(skb)->pid;
-
 	switch (nlh->nlmsg_type) {
 	case ISCSI_UEVENT_CREATE_SESSION:
 		err = iscsi_if_create_session(priv, ep, ev,
@@ -1469,6 +1545,7 @@
 	case ISCSI_UEVENT_TRANSPORT_EP_CONNECT:
 	case ISCSI_UEVENT_TRANSPORT_EP_POLL:
 	case ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT:
+	case ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST:
 		err = iscsi_if_transport_ep(transport, ev, nlh->nlmsg_type);
 		break;
 	case ISCSI_UEVENT_TGT_DSCVR:
@@ -1477,6 +1554,9 @@
 	case ISCSI_UEVENT_SET_HOST_PARAM:
 		err = iscsi_set_host_param(transport, ev);
 		break;
+	case ISCSI_UEVENT_PATH_UPDATE:
+		err = iscsi_set_path(transport, ev);
+		break;
 	default:
 		err = -ENOSYS;
 		break;
@@ -1499,6 +1579,7 @@
 		uint32_t rlen;
 		struct nlmsghdr	*nlh;
 		struct iscsi_uevent *ev;
+		uint32_t group;
 
 		nlh = nlmsg_hdr(skb);
 		if (nlh->nlmsg_len < sizeof(*nlh) ||
@@ -1511,7 +1592,7 @@
 		if (rlen > skb->len)
 			rlen = skb->len;
 
-		err = iscsi_if_recv_msg(skb, nlh);
+		err = iscsi_if_recv_msg(skb, nlh, &group);
 		if (err) {
 			ev->type = ISCSI_KEVENT_IF_ERROR;
 			ev->iferror = err;
@@ -1525,8 +1606,7 @@
 			 */
 			if (ev->type == ISCSI_UEVENT_GET_STATS && !err)
 				break;
-			err = iscsi_if_send_reply(
-				NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq,
+			err = iscsi_if_send_reply(group, nlh->nlmsg_seq,
 				nlh->nlmsg_type, 0, 0, ev, sizeof(*ev));
 		} while (err < 0 && err != -ECONNREFUSED);
 		skb_pull(skb, rlen);
@@ -1774,7 +1854,6 @@
 	if (!priv)
 		return NULL;
 	INIT_LIST_HEAD(&priv->list);
-	priv->daemon_pid = -1;
 	priv->iscsi_transport = tt;
 	priv->t.user_scan = iscsi_user_scan;
 	priv->t.create_work_queue = 1;
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 50988cb..d606452 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -163,12 +163,10 @@
 	int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *);
 
 	while (!blk_queue_plugged(q)) {
-		req = elv_next_request(q);
+		req = blk_fetch_request(q);
 		if (!req)
 			break;
 
-		blkdev_dequeue_request(req);
-
 		spin_unlock_irq(q->queue_lock);
 
 		handler = to_sas_internal(shost->transportt)->f->smp_handler;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 8404423..878b17a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -384,9 +384,9 @@
 	struct scsi_device *sdp = q->queuedata;
 	struct gendisk *disk = rq->rq_disk;
 	struct scsi_disk *sdkp;
-	sector_t block = rq->sector;
+	sector_t block = blk_rq_pos(rq);
 	sector_t threshold;
-	unsigned int this_count = rq->nr_sectors;
+	unsigned int this_count = blk_rq_sectors(rq);
 	int ret, host_dif;
 
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
@@ -413,10 +413,10 @@
 					this_count));
 
 	if (!sdp || !scsi_device_online(sdp) ||
- 	    block + rq->nr_sectors > get_capacity(disk)) {
+	    block + blk_rq_sectors(rq) > get_capacity(disk)) {
 		SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
-						"Finishing %ld sectors\n",
-						rq->nr_sectors));
+						"Finishing %u sectors\n",
+						blk_rq_sectors(rq)));
 		SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
 						"Retry with 0x%p\n", SCpnt));
 		goto out;
@@ -463,7 +463,7 @@
 	 * for this.
 	 */
 	if (sdp->sector_size == 1024) {
-		if ((block & 1) || (rq->nr_sectors & 1)) {
+		if ((block & 1) || (blk_rq_sectors(rq) & 1)) {
 			scmd_printk(KERN_ERR, SCpnt,
 				    "Bad block number requested\n");
 			goto out;
@@ -473,7 +473,7 @@
 		}
 	}
 	if (sdp->sector_size == 2048) {
-		if ((block & 3) || (rq->nr_sectors & 3)) {
+		if ((block & 3) || (blk_rq_sectors(rq) & 3)) {
 			scmd_printk(KERN_ERR, SCpnt,
 				    "Bad block number requested\n");
 			goto out;
@@ -483,7 +483,7 @@
 		}
 	}
 	if (sdp->sector_size == 4096) {
-		if ((block & 7) || (rq->nr_sectors & 7)) {
+		if ((block & 7) || (blk_rq_sectors(rq) & 7)) {
 			scmd_printk(KERN_ERR, SCpnt,
 				    "Bad block number requested\n");
 			goto out;
@@ -512,10 +512,10 @@
 	}
 
 	SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
-					"%s %d/%ld 512 byte blocks.\n",
+					"%s %d/%u 512 byte blocks.\n",
 					(rq_data_dir(rq) == WRITE) ?
 					"writing" : "reading", this_count,
-					rq->nr_sectors));
+					blk_rq_sectors(rq)));
 
 	/* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */
 	host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type);
@@ -971,8 +971,8 @@
 
 static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd)
 {
-	u64 start_lba = scmd->request->sector;
-	u64 end_lba = scmd->request->sector + (scsi_bufflen(scmd) / 512);
+	u64 start_lba = blk_rq_pos(scmd->request);
+	u64 end_lba = blk_rq_pos(scmd->request) + (scsi_bufflen(scmd) / 512);
 	u64 bad_lba;
 	int info_valid;
 
@@ -1510,7 +1510,7 @@
 		 */
 		sector_size = 512;
 	}
-	blk_queue_hardsect_size(sdp->request_queue, sector_size);
+	blk_queue_logical_block_size(sdp->request_queue, sector_size);
 
 	{
 		char cap_str_2[10], cap_str_10[10];
@@ -1902,24 +1902,6 @@
 	index = sdkp->index;
 	dev = &sdp->sdev_gendev;
 
-	if (!sdp->request_queue->rq_timeout) {
-		if (sdp->type != TYPE_MOD)
-			blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT);
-		else
-			blk_queue_rq_timeout(sdp->request_queue,
-					     SD_MOD_TIMEOUT);
-	}
-
-	device_initialize(&sdkp->dev);
-	sdkp->dev.parent = &sdp->sdev_gendev;
-	sdkp->dev.class = &sd_disk_class;
-	dev_set_name(&sdkp->dev, dev_name(&sdp->sdev_gendev));
-
-	if (device_add(&sdkp->dev))
-		goto out_free_index;
-
-	get_device(&sdp->sdev_gendev);
-
 	if (index < SD_MAX_DISKS) {
 		gd->major = sd_major((index & 0xf0) >> 4);
 		gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
@@ -1954,11 +1936,6 @@
 
 	sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n",
 		  sdp->removable ? "removable " : "");
-
-	return;
-
- out_free_index:
-	ida_remove(&sd_index_ida, index);
 }
 
 /**
@@ -2026,6 +2003,24 @@
 	sdkp->openers = 0;
 	sdkp->previous_state = 1;
 
+	if (!sdp->request_queue->rq_timeout) {
+		if (sdp->type != TYPE_MOD)
+			blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT);
+		else
+			blk_queue_rq_timeout(sdp->request_queue,
+					     SD_MOD_TIMEOUT);
+	}
+
+	device_initialize(&sdkp->dev);
+	sdkp->dev.parent = &sdp->sdev_gendev;
+	sdkp->dev.class = &sd_disk_class;
+	dev_set_name(&sdkp->dev, dev_name(&sdp->sdev_gendev));
+
+	if (device_add(&sdkp->dev))
+		goto out_free_index;
+
+	get_device(&sdp->sdev_gendev);
+
 	async_schedule(sd_probe_async, sdkp);
 
 	return 0;
@@ -2055,8 +2050,10 @@
  **/
 static int sd_remove(struct device *dev)
 {
-	struct scsi_disk *sdkp = dev_get_drvdata(dev);
+	struct scsi_disk *sdkp;
 
+	async_synchronize_full();
+	sdkp = dev_get_drvdata(dev);
 	device_del(&sdkp->dev);
 	del_gendisk(sdkp->disk);
 	sd_shutdown(dev);
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 184dff4..82f14a9 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -507,7 +507,7 @@
 	sector_sz = scmd->device->sector_size;
 	sectors = good_bytes / sector_sz;
 
-	phys = scmd->request->sector & 0xffffffff;
+	phys = blk_rq_pos(scmd->request) & 0xffffffff;
 	if (sector_sz == 4096)
 		phys >>= 3;
 
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index e1716f1..8201387 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -289,8 +289,8 @@
 	if (list_empty(&sdp->sfds)) {	/* no existing opens on this device */
 		sdp->sgdebug = 0;
 		q = sdp->device->request_queue;
-		sdp->sg_tablesize = min(q->max_hw_segments,
-					q->max_phys_segments);
+		sdp->sg_tablesize = min(queue_max_hw_segments(q),
+					queue_max_phys_segments(q));
 	}
 	if ((sfp = sg_add_sfp(sdp, dev)))
 		filp->private_data = sfp;
@@ -909,7 +909,7 @@
                 if (val < 0)
                         return -EINVAL;
 		val = min_t(int, val,
-				sdp->device->request_queue->max_sectors * 512);
+			    queue_max_sectors(sdp->device->request_queue) * 512);
 		if (val != sfp->reserve.bufflen) {
 			if (sg_res_in_use(sfp) || sfp->mmap_called)
 				return -EBUSY;
@@ -919,7 +919,7 @@
 		return 0;
 	case SG_GET_RESERVED_SIZE:
 		val = min_t(int, sfp->reserve.bufflen,
-				sdp->device->request_queue->max_sectors * 512);
+			    queue_max_sectors(sdp->device->request_queue) * 512);
 		return put_user(val, ip);
 	case SG_SET_COMMAND_Q:
 		result = get_user(val, ip);
@@ -1059,12 +1059,13 @@
 			return -ENODEV;
 		return scsi_ioctl(sdp->device, cmd_in, p);
 	case BLKSECTGET:
-		return put_user(sdp->device->request_queue->max_sectors * 512,
+		return put_user(queue_max_sectors(sdp->device->request_queue) * 512,
 				ip);
 	case BLKTRACESETUP:
 		return blk_trace_setup(sdp->device->request_queue,
 				       sdp->disk->disk_name,
 				       MKDEV(SCSI_GENERIC_MAJOR, sdp->index),
+				       NULL,
 				       (char *)arg);
 	case BLKTRACESTART:
 		return blk_trace_startstop(sdp->device->request_queue, 1);
@@ -1260,7 +1261,7 @@
 
 	sense = rq->sense;
 	result = rq->errors;
-	resid = rq->data_len;
+	resid = rq->resid_len;
 
 	SCSI_LOG_TIMEOUT(4, printk("sg_cmd_done: %s, pack_id=%d, res=0x%x\n",
 		sdp->disk->disk_name, srp->header.pack_id, result));
@@ -1377,7 +1378,8 @@
 	sdp->device = scsidp;
 	INIT_LIST_HEAD(&sdp->sfds);
 	init_waitqueue_head(&sdp->o_excl_wait);
-	sdp->sg_tablesize = min(q->max_hw_segments, q->max_phys_segments);
+	sdp->sg_tablesize = min(queue_max_hw_segments(q),
+				queue_max_phys_segments(q));
 	sdp->index = k;
 	kref_init(&sdp->d_ref);
 
@@ -2055,7 +2057,7 @@
 		sg_big_buff = def_reserved_size;
 
 	bufflen = min_t(int, sg_big_buff,
-			sdp->device->request_queue->max_sectors * 512);
+			queue_max_sectors(sdp->device->request_queue) * 512);
 	sg_build_reserve(sfp, bufflen);
 	SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp:   bufflen=%d, k_use_sg=%d\n",
 			   sfp->reserve.bufflen, sfp->reserve.k_use_sg));
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 0e1a0f2..cd350df 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -292,7 +292,8 @@
 			if (cd->device->sector_size == 2048)
 				error_sector <<= 2;
 			error_sector &= ~(block_sectors - 1);
-			good_bytes = (error_sector - SCpnt->request->sector) << 9;
+			good_bytes = (error_sector -
+				      blk_rq_pos(SCpnt->request)) << 9;
 			if (good_bytes < 0 || good_bytes >= this_count)
 				good_bytes = 0;
 			/*
@@ -349,8 +350,8 @@
 				cd->disk->disk_name, block));
 
 	if (!cd->device || !scsi_device_online(cd->device)) {
-		SCSI_LOG_HLQUEUE(2, printk("Finishing %ld sectors\n",
-					rq->nr_sectors));
+		SCSI_LOG_HLQUEUE(2, printk("Finishing %u sectors\n",
+					   blk_rq_sectors(rq)));
 		SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt));
 		goto out;
 	}
@@ -413,7 +414,7 @@
 	/*
 	 * request doesn't start on hw block boundary, add scatter pads
 	 */
-	if (((unsigned int)rq->sector % (s_size >> 9)) ||
+	if (((unsigned int)blk_rq_pos(rq) % (s_size >> 9)) ||
 	    (scsi_bufflen(SCpnt) % s_size)) {
 		scmd_printk(KERN_NOTICE, SCpnt, "unaligned transfer\n");
 		goto out;
@@ -422,14 +423,14 @@
 	this_count = (scsi_bufflen(SCpnt) >> 9) / (s_size >> 9);
 
 
-	SCSI_LOG_HLQUEUE(2, printk("%s : %s %d/%ld 512 byte blocks.\n",
+	SCSI_LOG_HLQUEUE(2, printk("%s : %s %d/%u 512 byte blocks.\n",
 				cd->cdi.name,
 				(rq_data_dir(rq) == WRITE) ?
 					"writing" : "reading",
-				this_count, rq->nr_sectors));
+				this_count, blk_rq_sectors(rq)));
 
 	SCpnt->cmnd[1] = 0;
-	block = (unsigned int)rq->sector / (s_size >> 9);
+	block = (unsigned int)blk_rq_pos(rq) / (s_size >> 9);
 
 	if (this_count > 0xffff) {
 		this_count = 0xffff;
@@ -726,7 +727,7 @@
 	}
 
 	queue = cd->device->request_queue;
-	blk_queue_hardsect_size(queue, sector_size);
+	blk_queue_logical_block_size(queue, sector_size);
 
 	return;
 }
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index eb24efe..b33d042 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -463,7 +463,7 @@
 	struct scsi_tape *STp = SRpnt->stp;
 
 	STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors;
-	STp->buffer->cmdstat.residual = req->data_len;
+	STp->buffer->cmdstat.residual = req->resid_len;
 
 	if (SRpnt->waiting)
 		complete(SRpnt->waiting);
@@ -2964,7 +2964,7 @@
 			    !(STp->use_pf & PF_TESTED)) {
 				/* Try the other possible state of Page Format if not
 				   already tried */
-				STp->use_pf = !STp->use_pf | PF_TESTED;
+				STp->use_pf = (STp->use_pf ^ USE_PF) | PF_TESTED;
 				st_release_request(SRpnt);
 				SRpnt = NULL;
 				return st_int_ioctl(STp, cmd_in, arg);
@@ -3983,8 +3983,8 @@
 		return -ENODEV;
 	}
 
-	i = min(SDp->request_queue->max_hw_segments,
-		SDp->request_queue->max_phys_segments);
+	i = min(queue_max_hw_segments(SDp->request_queue),
+		queue_max_phys_segments(SDp->request_queue));
 	if (st_max_sg_segs < i)
 		i = st_max_sg_segs;
 	buffer = new_tape_buffer((SDp->host)->unchecked_isa_dma, i);
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 583966e..45374d6 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -737,11 +737,14 @@
 	struct sym_hcb *np = sym_get_hcb(sdev->host);
 	struct sym_tcb *tp = &np->target[sdev->id];
 	struct sym_lcb *lp;
+	unsigned long flags;
+	int error;
 
 	if (sdev->id >= SYM_CONF_MAX_TARGET || sdev->lun >= SYM_CONF_MAX_LUN)
 		return -ENXIO;
 
-	tp->starget = sdev->sdev_target;
+	spin_lock_irqsave(np->s.host->host_lock, flags);
+
 	/*
 	 * Fail the device init if the device is flagged NOSCAN at BOOT in
 	 * the NVRAM.  This may speed up boot and maintain coherency with
@@ -753,26 +756,37 @@
 
 	if (tp->usrflags & SYM_SCAN_BOOT_DISABLED) {
 		tp->usrflags &= ~SYM_SCAN_BOOT_DISABLED;
-		starget_printk(KERN_INFO, tp->starget,
+		starget_printk(KERN_INFO, sdev->sdev_target,
 				"Scan at boot disabled in NVRAM\n");
-		return -ENXIO;
+		error = -ENXIO;
+		goto out;
 	}
 
 	if (tp->usrflags & SYM_SCAN_LUNS_DISABLED) {
-		if (sdev->lun != 0)
-			return -ENXIO;
-		starget_printk(KERN_INFO, tp->starget,
+		if (sdev->lun != 0) {
+			error = -ENXIO;
+			goto out;
+		}
+		starget_printk(KERN_INFO, sdev->sdev_target,
 				"Multiple LUNs disabled in NVRAM\n");
 	}
 
 	lp = sym_alloc_lcb(np, sdev->id, sdev->lun);
-	if (!lp)
-		return -ENOMEM;
+	if (!lp) {
+		error = -ENOMEM;
+		goto out;
+	}
+	if (tp->nlcb == 1)
+		tp->starget = sdev->sdev_target;
 
 	spi_min_period(tp->starget) = tp->usr_period;
 	spi_max_width(tp->starget) = tp->usr_width;
 
-	return 0;
+	error = 0;
+out:
+	spin_unlock_irqrestore(np->s.host->host_lock, flags);
+
+	return error;
 }
 
 /*
@@ -819,12 +833,34 @@
 static void sym53c8xx_slave_destroy(struct scsi_device *sdev)
 {
 	struct sym_hcb *np = sym_get_hcb(sdev->host);
-	struct sym_lcb *lp = sym_lp(&np->target[sdev->id], sdev->lun);
+	struct sym_tcb *tp = &np->target[sdev->id];
+	struct sym_lcb *lp = sym_lp(tp, sdev->lun);
+	unsigned long flags;
 
-	if (lp->itlq_tbl)
-		sym_mfree_dma(lp->itlq_tbl, SYM_CONF_MAX_TASK * 4, "ITLQ_TBL");
-	kfree(lp->cb_tags);
-	sym_mfree_dma(lp, sizeof(*lp), "LCB");
+	spin_lock_irqsave(np->s.host->host_lock, flags);
+
+	if (lp->busy_itlq || lp->busy_itl) {
+		/*
+		 * This really shouldn't happen, but we can't return an error
+		 * so let's try to stop all on-going I/O.
+		 */
+		starget_printk(KERN_WARNING, tp->starget,
+			       "Removing busy LCB (%d)\n", sdev->lun);
+		sym_reset_scsi_bus(np, 1);
+	}
+
+	if (sym_free_lcb(np, sdev->id, sdev->lun) == 0) {
+		/*
+		 * It was the last unit for this target.
+		 */
+		tp->head.sval        = 0;
+		tp->head.wval        = np->rv_scntl3;
+		tp->head.uval        = 0;
+		tp->tgoal.check_nego = 1;
+		tp->starget	     = NULL;
+	}
+
+	spin_unlock_irqrestore(np->s.host->host_lock, flags);
 }
 
 /*
@@ -890,6 +926,8 @@
 			if (!((uc->target >> t) & 1))
 				continue;
 			tp = &np->target[t];
+			if (!tp->nlcb)
+				continue;
 
 			switch (uc->cmd) {
 
diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c
index ffa70d1..69ad494 100644
--- a/drivers/scsi/sym53c8xx_2/sym_hipd.c
+++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c
@@ -1896,6 +1896,15 @@
 		tp->head.sval = 0;
 		tp->head.wval = np->rv_scntl3;
 		tp->head.uval = 0;
+		if (tp->lun0p)
+			tp->lun0p->to_clear = 0;
+		if (tp->lunmp) {
+			int ln;
+
+			for (ln = 1; ln < SYM_CONF_MAX_LUN; ln++)
+				if (tp->lunmp[ln])
+					tp->lunmp[ln]->to_clear = 0;
+		}
 	}
 
 	/*
@@ -4988,7 +4997,7 @@
 	 */
 	if (ln && !tp->lunmp) {
 		tp->lunmp = kcalloc(SYM_CONF_MAX_LUN, sizeof(struct sym_lcb *),
-				GFP_KERNEL);
+				GFP_ATOMIC);
 		if (!tp->lunmp)
 			goto fail;
 	}
@@ -5008,6 +5017,7 @@
 		tp->lun0p = lp;
 		tp->head.lun0_sa = cpu_to_scr(vtobus(lp));
 	}
+	tp->nlcb++;
 
 	/*
 	 *  Let the itl task point to error handling.
@@ -5085,6 +5095,43 @@
 }
 
 /*
+ *  Lun control block deallocation. Returns the number of valid remaing LCBs
+ *  for the target.
+ */
+int sym_free_lcb(struct sym_hcb *np, u_char tn, u_char ln)
+{
+	struct sym_tcb *tp = &np->target[tn];
+	struct sym_lcb *lp = sym_lp(tp, ln);
+
+	tp->nlcb--;
+
+	if (ln) {
+		if (!tp->nlcb) {
+			kfree(tp->lunmp);
+			sym_mfree_dma(tp->luntbl, 256, "LUNTBL");
+			tp->lunmp = NULL;
+			tp->luntbl = NULL;
+			tp->head.luntbl_sa = cpu_to_scr(vtobus(np->badluntbl));
+		} else {
+			tp->luntbl[ln] = cpu_to_scr(vtobus(&np->badlun_sa));
+			tp->lunmp[ln] = NULL;
+		}
+	} else {
+		tp->lun0p = NULL;
+		tp->head.lun0_sa = cpu_to_scr(vtobus(&np->badlun_sa));
+	}
+
+	if (lp->itlq_tbl) {
+		sym_mfree_dma(lp->itlq_tbl, SYM_CONF_MAX_TASK*4, "ITLQ_TBL");
+		kfree(lp->cb_tags);
+	}
+
+	sym_mfree_dma(lp, sizeof(*lp), "LCB");
+
+	return tp->nlcb;
+}
+
+/*
  *  Queue a SCSI IO to the controller.
  */
 int sym_queue_scsiio(struct sym_hcb *np, struct scsi_cmnd *cmd, struct sym_ccb *cp)
diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.h b/drivers/scsi/sym53c8xx_2/sym_hipd.h
index 9ebc870..053e63c 100644
--- a/drivers/scsi/sym53c8xx_2/sym_hipd.h
+++ b/drivers/scsi/sym53c8xx_2/sym_hipd.h
@@ -401,6 +401,7 @@
 	 *  An array of bus addresses is used on reselection.
 	 */
 	u32	*luntbl;	/* LCBs bus address table	*/
+	int	nlcb;		/* Number of valid LCBs (including LUN #0) */
 
 	/*
 	 *  LUN table used by the C code.
@@ -1065,6 +1066,7 @@
 struct sym_ccb *sym_get_ccb(struct sym_hcb *np, struct scsi_cmnd *cmd, u_char tag_order);
 void sym_free_ccb(struct sym_hcb *np, struct sym_ccb *cp);
 struct sym_lcb *sym_alloc_lcb(struct sym_hcb *np, u_char tn, u_char ln);
+int sym_free_lcb(struct sym_hcb *np, u_char tn, u_char ln);
 int sym_queue_scsiio(struct sym_hcb *np, struct scsi_cmnd *csio, struct sym_ccb *cp);
 int sym_abort_scsiio(struct sym_hcb *np, struct scsi_cmnd *ccb, int timed_out);
 int sym_reset_scsi_target(struct sym_hcb *np, int target);
diff --git a/drivers/scsi/u14-34f.c b/drivers/scsi/u14-34f.c
index 601e951..54023d41 100644
--- a/drivers/scsi/u14-34f.c
+++ b/drivers/scsi/u14-34f.c
@@ -1306,7 +1306,7 @@
    if (linked_comm && SCpnt->device->queue_depth > 2
                                      && TLDEV(SCpnt->device->type)) {
       HD(j)->cp_stat[i] = READY;
-      flush_dev(SCpnt->device, SCpnt->request->sector, j, FALSE);
+      flush_dev(SCpnt->device, blk_rq_pos(SCpnt->request), j, FALSE);
       return 0;
       }
 
@@ -1610,11 +1610,13 @@
 
       if (!(cpp->xdir == DTD_IN)) input_only = FALSE;
 
-      if (SCpnt->request->sector < minsec) minsec = SCpnt->request->sector;
-      if (SCpnt->request->sector > maxsec) maxsec = SCpnt->request->sector;
+      if (blk_rq_pos(SCpnt->request) < minsec)
+	 minsec = blk_rq_pos(SCpnt->request);
+      if (blk_rq_pos(SCpnt->request) > maxsec)
+	 maxsec = blk_rq_pos(SCpnt->request);
 
-      sl[n] = SCpnt->request->sector;
-      ioseek += SCpnt->request->nr_sectors;
+      sl[n] = blk_rq_pos(SCpnt->request);
+      ioseek += blk_rq_sectors(SCpnt->request);
 
       if (!n) continue;
 
@@ -1642,7 +1644,7 @@
 
    if (!input_only) for (n = 0; n < n_ready; n++) {
       k = il[n]; cpp = &HD(j)->cp[k]; SCpnt = cpp->SCpnt;
-      ll[n] = SCpnt->request->nr_sectors; pl[n] = SCpnt->serial_number;
+      ll[n] = blk_rq_sectors(SCpnt->request); pl[n] = SCpnt->serial_number;
 
       if (!n) continue;
 
@@ -1666,12 +1668,12 @@
    if (link_statistics && (overlap || !(flushcount % link_statistics)))
       for (n = 0; n < n_ready; n++) {
          k = il[n]; cpp = &HD(j)->cp[k]; SCpnt = cpp->SCpnt;
-         printk("%s %d.%d:%d pid %ld mb %d fc %d nr %d sec %ld ns %ld"\
+         printk("%s %d.%d:%d pid %ld mb %d fc %d nr %d sec %ld ns %u"\
                 " cur %ld s:%c r:%c rev:%c in:%c ov:%c xd %d.\n",
                 (ihdlr ? "ihdlr" : "qcomm"), SCpnt->channel, SCpnt->target,
                 SCpnt->lun, SCpnt->serial_number, k, flushcount, n_ready,
-                SCpnt->request->sector, SCpnt->request->nr_sectors, cursec,
-                YESNO(s), YESNO(r), YESNO(rev), YESNO(input_only),
+                blk_rq_pos(SCpnt->request), blk_rq_sectors(SCpnt->request),
+		cursec, YESNO(s), YESNO(r), YESNO(rev), YESNO(input_only),
                 YESNO(overlap), cpp->xdir);
          }
 #endif
@@ -1799,7 +1801,7 @@
 
    if (linked_comm && SCpnt->device->queue_depth > 2
                                      && TLDEV(SCpnt->device->type))
-      flush_dev(SCpnt->device, SCpnt->request->sector, j, TRUE);
+      flush_dev(SCpnt->device, blk_rq_pos(SCpnt->request), j, TRUE);
 
    tstatus = status_byte(spp->target_status);
 
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index a0127e9..fb867a9 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -287,6 +287,13 @@
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
 		.flags		= UART_CAP_FIFO,
 	},
+	[PORT_AR7] = {
+		.name		= "AR7",
+		.fifo_size	= 16,
+		.tx_loadsz	= 16,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00,
+		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
+	},
 };
 
 #if defined (CONFIG_SERIAL_8250_AU1X00)
diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 938bc1b..e371a9c 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -2776,6 +2776,9 @@
 	{	PCI_VENDOR_ID_OXSEMI, 0x950a,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b0_2_1130000 },
+	{	PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_C950,
+		PCI_VENDOR_ID_OXSEMI, PCI_SUBDEVICE_ID_OXSEMI_C950, 0, 0,
+		pbn_b0_1_921600 },
 	{	PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b0_4_115200 },
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 343e3a3..641e800 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -833,6 +833,7 @@
 	bool "IMX serial port support"
 	depends on ARM && (ARCH_IMX || ARCH_MXC)
 	select SERIAL_CORE
+	select RATIONAL
 	help
 	  If you have a machine based on a Motorola IMX CPU you
 	  can enable its onboard serial port by enabling this option.
@@ -1433,4 +1434,11 @@
 	default 19200 if (SERIAL_SPORT_BAUD_RATE_19200)
 	default 9600 if (SERIAL_SPORT_BAUD_RATE_9600)
 
+config SERIAL_TIMBERDALE
+	tristate "Support for timberdale UART"
+	depends on MFD_TIMBERDALE
+	select SERIAL_CORE
+	---help---
+	Add support for UART controller on timberdale.
+
 endmenu
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index d438eb2..45a8658 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -77,3 +77,4 @@
 obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o
 obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o
 obj-$(CONFIG_SERIAL_QE) += ucc_uart.o
+obj-$(CONFIG_SERIAL_TIMBERDALE)	+= timbuart.o
diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index d86123e..e2f6b1b 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -330,6 +330,11 @@
 		/* Clear TFI bit */
 		UART_PUT_LSR(uart, TFI);
 #endif
+		/* Anomaly notes:
+		 *  05000215 -	we always clear ETBEI within last UART TX
+		 *		interrupt to end a string. It is always set
+		 *		when start a new tx.
+		 */
 		UART_CLEAR_IER(uart, ETBEI);
 		return;
 	}
@@ -415,6 +420,7 @@
 	set_dma_start_addr(uart->tx_dma_channel, (unsigned long)(xmit->buf+xmit->tail));
 	set_dma_x_count(uart->tx_dma_channel, uart->tx_count);
 	set_dma_x_modify(uart->tx_dma_channel, 1);
+	SSYNC();
 	enable_dma(uart->tx_dma_channel);
 
 	UART_SET_IER(uart, ETBEI);
@@ -473,27 +479,41 @@
 void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart)
 {
 	int x_pos, pos;
-	unsigned long flags;
 
-	spin_lock_irqsave(&uart->port.lock, flags);
+	dma_disable_irq(uart->rx_dma_channel);
+	spin_lock_bh(&uart->port.lock);
 
+	/* 2D DMA RX buffer ring is used. Because curr_y_count and
+	 * curr_x_count can't be read as an atomic operation,
+	 * curr_y_count should be read before curr_x_count. When
+	 * curr_x_count is read, curr_y_count may already indicate
+	 * next buffer line. But, the position calculated here is
+	 * still indicate the old line. The wrong position data may
+	 * be smaller than current buffer tail, which cause garbages
+	 * are received if it is not prohibit.
+	 */
 	uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel);
 	x_pos = get_dma_curr_xcount(uart->rx_dma_channel);
 	uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows;
-	if (uart->rx_dma_nrows == DMA_RX_YCOUNT)
+	if (uart->rx_dma_nrows == DMA_RX_YCOUNT || x_pos == 0)
 		uart->rx_dma_nrows = 0;
 	x_pos = DMA_RX_XCOUNT - x_pos;
 	if (x_pos == DMA_RX_XCOUNT)
 		x_pos = 0;
 
 	pos = uart->rx_dma_nrows * DMA_RX_XCOUNT + x_pos;
-	if (pos != uart->rx_dma_buf.tail) {
+	/* Ignore receiving data if new position is in the same line of
+	 * current buffer tail and small.
+	 */
+	if (pos > uart->rx_dma_buf.tail ||
+		uart->rx_dma_nrows < (uart->rx_dma_buf.tail/DMA_RX_XCOUNT)) {
 		uart->rx_dma_buf.head = pos;
 		bfin_serial_dma_rx_chars(uart);
 		uart->rx_dma_buf.tail = uart->rx_dma_buf.head;
 	}
 
-	spin_unlock_irqrestore(&uart->port.lock, flags);
+	spin_unlock_bh(&uart->port.lock);
+	dma_enable_irq(uart->rx_dma_channel);
 
 	mod_timer(&(uart->rx_dma_timer), jiffies + DMA_RX_FLUSH_JIFFIES);
 }
@@ -514,6 +534,11 @@
 	if (!(get_dma_curr_irqstat(uart->tx_dma_channel)&DMA_RUN)) {
 		disable_dma(uart->tx_dma_channel);
 		clear_dma_irqstat(uart->tx_dma_channel);
+		/* Anomaly notes:
+		 *  05000215 -	we always clear ETBEI within last UART TX
+		 *		interrupt to end a string. It is always set
+		 *		when start a new tx.
+		 */
 		UART_CLEAR_IER(uart, ETBEI);
 		xmit->tail = (xmit->tail + uart->tx_count) & (UART_XMIT_SIZE - 1);
 		uart->port.icount.tx += uart->tx_count;
@@ -532,11 +557,26 @@
 {
 	struct bfin_serial_port *uart = dev_id;
 	unsigned short irqstat;
+	int x_pos, pos;
 
 	spin_lock(&uart->port.lock);
 	irqstat = get_dma_curr_irqstat(uart->rx_dma_channel);
 	clear_dma_irqstat(uart->rx_dma_channel);
-	bfin_serial_dma_rx_chars(uart);
+
+	uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel);
+	x_pos = get_dma_curr_xcount(uart->rx_dma_channel);
+	uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows;
+	if (uart->rx_dma_nrows == DMA_RX_YCOUNT || x_pos == 0)
+		uart->rx_dma_nrows = 0;
+
+	pos = uart->rx_dma_nrows * DMA_RX_XCOUNT;
+	if (pos > uart->rx_dma_buf.tail ||
+		uart->rx_dma_nrows < (uart->rx_dma_buf.tail/DMA_RX_XCOUNT)) {
+		uart->rx_dma_buf.head = pos;
+		bfin_serial_dma_rx_chars(uart);
+		uart->rx_dma_buf.tail = uart->rx_dma_buf.head;
+	}
+
 	spin_unlock(&uart->port.lock);
 
 	return IRQ_HANDLED;
@@ -789,8 +829,16 @@
 			__func__);
 	}
 
-	if (termios->c_cflag & CSTOPB)
-		lcr |= STB;
+	/* Anomaly notes:
+	 *  05000231 -  STOP bit is always set to 1 whatever the user is set.
+	 */
+	if (termios->c_cflag & CSTOPB) {
+		if (ANOMALY_05000231)
+			printk(KERN_WARNING "STOP bits other than 1 is not "
+				"supported in case of anomaly 05000231.\n");
+		else
+			lcr |= STB;
+	}
 	if (termios->c_cflag & PARENB)
 		lcr |= PEN;
 	if (!(termios->c_cflag & PARODD))
@@ -940,6 +988,10 @@
 }
 
 #ifdef CONFIG_CONSOLE_POLL
+/* Anomaly notes:
+ *  05000099 -  Because we only use THRE in poll_put and DR in poll_get,
+ *		losing other bits of UART_LSR is not a problem here.
+ */
 static void bfin_serial_poll_put_char(struct uart_port *port, unsigned char chr)
 {
 	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
@@ -1245,12 +1297,17 @@
 	}
 }
 
+/*
+ * This should have a .setup or .early_setup in it, but then things get called
+ * without the command line options, and the baud rate gets messed up - so
+ * don't let the common infrastructure play with things. (see calls to setup
+ * & earlysetup in ./kernel/printk.c:register_console()
+ */
 static struct __initdata console bfin_early_serial_console = {
 	.name = "early_BFuart",
 	.write = early_serial_write,
 	.device = uart_console_device,
 	.flags = CON_PRINTBUFFER,
-	.setup = bfin_serial_console_setup,
 	.index = -1,
 	.data  = &bfin_serial_reg,
 };
diff --git a/drivers/serial/bfin_sport_uart.c b/drivers/serial/bfin_sport_uart.c
index 529c0ff..34b4ae0 100644
--- a/drivers/serial/bfin_sport_uart.c
+++ b/drivers/serial/bfin_sport_uart.c
@@ -101,15 +101,16 @@
 {
 	pr_debug("%s value:%x\n", __func__, value);
 	/* Place a Start and Stop bit */
-	__asm__ volatile (
-		"R2 = b#01111111100;\n\t"
-		"R3 = b#10000000001;\n\t"
-		"%0 <<= 2;\n\t"
-		"%0 = %0 & R2;\n\t"
-		"%0 = %0 | R3;\n\t"
-		:"=r"(value)
-		:"0"(value)
-		:"R2", "R3");
+	__asm__ __volatile__ (
+		"R2 = b#01111111100;"
+		"R3 = b#10000000001;"
+		"%0 <<= 2;"
+		"%0 = %0 & R2;"
+		"%0 = %0 | R3;"
+		: "=d"(value)
+		: "d"(value)
+		: "ASTAT", "R2", "R3"
+	);
 	pr_debug("%s value:%x\n", __func__, value);
 
 	SPORT_PUT_TX(up, value);
@@ -118,27 +119,30 @@
 static inline unsigned int rx_one_byte(struct sport_uart_port *up)
 {
 	unsigned int value, extract;
+	u32 tmp_mask1, tmp_mask2, tmp_shift, tmp;
 
 	value = SPORT_GET_RX32(up);
 	pr_debug("%s value:%x\n", __func__, value);
 
 	/* Extract 8 bits data */
-	__asm__ volatile (
-		"R5 = 0;\n\t"
-		"P0 = 8;\n\t"
-		"R1 = 0x1801(Z);\n\t"
-		"R3 = 0x0300(Z);\n\t"
-		"R4 = 0;\n\t"
-		"LSETUP(loop_s, loop_e) LC0 = P0;\nloop_s:\t"
-		"R2 = extract(%1, R1.L)(Z);\n\t"
-		"R2 <<= R4;\n\t"
-		"R5 = R5 | R2;\n\t"
-		"R1 = R1 - R3;\nloop_e:\t"
-		"R4 += 1;\n\t"
-		"%0 = R5;\n\t"
-		:"=r"(extract)
-		:"r"(value)
-		:"P0", "R1", "R2","R3","R4", "R5");
+	__asm__ __volatile__ (
+		"%[extr] = 0;"
+		"%[mask1] = 0x1801(Z);"
+		"%[mask2] = 0x0300(Z);"
+		"%[shift] = 0;"
+		"LSETUP(.Lloop_s, .Lloop_e) LC0 = %[lc];"
+		".Lloop_s:"
+		"%[tmp] = extract(%[val], %[mask1].L)(Z);"
+		"%[tmp] <<= %[shift];"
+		"%[extr] = %[extr] | %[tmp];"
+		"%[mask1] = %[mask1] - %[mask2];"
+		".Lloop_e:"
+		"%[shift] += 1;"
+		: [val]"=d"(value), [extr]"=d"(extract), [shift]"=d"(tmp_shift), [tmp]"=d"(tmp),
+		  [mask1]"=d"(tmp_mask1), [mask2]"=d"(tmp_mask2)
+		: "d"(value), [lc]"a"(8)
+		: "ASTAT", "LB0", "LC0", "LT0"
+	);
 
 	pr_debug("	extract:%x\n", extract);
 	return extract;
@@ -149,7 +153,7 @@
 	int tclkdiv, tfsdiv, rclkdiv;
 
 	/* Set TCR1 and TCR2 */
-	SPORT_PUT_TCR1(up, (LTFS | ITFS | TFSR | TLSBIT | ITCLK));
+	SPORT_PUT_TCR1(up, (LATFS | ITFS | TFSR | TLSBIT | ITCLK));
 	SPORT_PUT_TCR2(up, 10);
 	pr_debug("%s TCR1:%x, TCR2:%x\n", __func__, SPORT_GET_TCR1(up), SPORT_GET_TCR2(up));
 
@@ -419,7 +423,7 @@
 }
 
 static void sport_set_termios(struct uart_port *port,
-		struct termios *termios, struct termios *old)
+		struct ktermios *termios, struct ktermios *old)
 {
 	pr_debug("%s enter, c_cflag:%08x\n", __func__, termios->c_cflag);
 	uart_update_timeout(port, CS8 ,port->uartclk);
diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c
index a461b3b..9f2891c 100644
--- a/drivers/serial/icom.c
+++ b/drivers/serial/icom.c
@@ -137,7 +137,12 @@
 static spinlock_t icom_lock;
 
 #ifdef ICOM_TRACE
-static inline void trace(struct icom_port *, char *, unsigned long) {};
+static inline void trace(struct icom_port *icom_port, char *trace_pt,
+			unsigned long trace_data)
+{
+	dev_info(&icom_port->adapter->pci_dev->dev, ":%d:%s - %lx\n",
+	icom_port->port, trace_pt, trace_data);
+}
 #else
 static inline void trace(struct icom_port *icom_port, char *trace_pt, unsigned long trace_data) {};
 #endif
@@ -408,7 +413,7 @@
 	release_firmware(fw);
 
 	/* Set Hardware level */
-	if ((icom_port->adapter->version | ADAPTER_V2) == ADAPTER_V2)
+	if (icom_port->adapter->version == ADAPTER_V2)
 		writeb(V2_HARDWARE, &(icom_port->dram->misc_flags));
 
 	/* Start the processor in Adapter */
@@ -861,7 +866,7 @@
 	/* find icom_port for this interrupt */
 	icom_adapter = (struct icom_adapter *) dev_id;
 
-	if ((icom_adapter->version | ADAPTER_V2) == ADAPTER_V2) {
+	if (icom_adapter->version == ADAPTER_V2) {
 		int_reg = icom_adapter->base_addr + 0x8024;
 
 		adapter_interrupts = readl(int_reg);
@@ -1647,15 +1652,6 @@
 module_init(icom_init);
 module_exit(icom_exit);
 
-#ifdef ICOM_TRACE
-static inline void trace(struct icom_port *icom_port, char *trace_pt,
-		  unsigned long trace_data)
-{
-	dev_info(&icom_port->adapter->pci_dev->dev, ":%d:%s - %lx\n",
-		 icom_port->port, trace_pt, trace_data);
-}
-#endif
-
 MODULE_AUTHOR("Michael Anderson <mjanders@us.ibm.com>");
 MODULE_DESCRIPTION("IBM iSeries Serial IOA driver");
 MODULE_SUPPORTED_DEVICE
diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 9f460b1..7b5d1de 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -8,6 +8,9 @@
  *  Author: Sascha Hauer <sascha@saschahauer.de>
  *  Copyright (C) 2004 Pengutronix
  *
+ *  Copyright (C) 2009 emlix GmbH
+ *  Author: Fabian Godehardt (added IrDA support for iMX)
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -41,6 +44,8 @@
 #include <linux/serial_core.h>
 #include <linux/serial.h>
 #include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/rational.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -148,6 +153,7 @@
 #define  UCR4_DREN  	 (1<<0)  /* Recv data ready interrupt enable */
 #define  UFCR_RXTL_SHF   0       /* Receiver trigger level shift */
 #define  UFCR_RFDIV      (7<<7)  /* Reference freq divider mask */
+#define  UFCR_RFDIV_REG(x)	(((x) < 7 ? 6 - (x) : 6) << 7)
 #define  UFCR_TXTL_SHF   10      /* Transmitter trigger level shift */
 #define  USR1_PARITYERR  (1<<15) /* Parity error interrupt flag */
 #define  USR1_RTSS  	 (1<<14) /* RTS pin status */
@@ -211,10 +217,20 @@
 	struct timer_list	timer;
 	unsigned int		old_status;
 	int			txirq,rxirq,rtsirq;
-	int			have_rtscts:1;
+	unsigned int		have_rtscts:1;
+	unsigned int		use_irda:1;
+	unsigned int		irda_inv_rx:1;
+	unsigned int		irda_inv_tx:1;
+	unsigned short		trcv_delay; /* transceiver delay */
 	struct clk		*clk;
 };
 
+#ifdef CONFIG_IRDA
+#define USE_IRDA(sport)	((sport)->use_irda)
+#else
+#define USE_IRDA(sport)	(0)
+#endif
+
 /*
  * Handle any change of modem status signal since we were last called.
  */
@@ -268,6 +284,48 @@
 	struct imx_port *sport = (struct imx_port *)port;
 	unsigned long temp;
 
+	if (USE_IRDA(sport)) {
+		/* half duplex - wait for end of transmission */
+		int n = 256;
+		while ((--n > 0) &&
+		      !(readl(sport->port.membase + USR2) & USR2_TXDC)) {
+			udelay(5);
+			barrier();
+		}
+		/*
+		 * irda transceiver - wait a bit more to avoid
+		 * cutoff, hardware dependent
+		 */
+		udelay(sport->trcv_delay);
+
+		/*
+		 * half duplex - reactivate receive mode,
+		 * flush receive pipe echo crap
+		 */
+		if (readl(sport->port.membase + USR2) & USR2_TXDC) {
+			temp = readl(sport->port.membase + UCR1);
+			temp &= ~(UCR1_TXMPTYEN | UCR1_TRDYEN);
+			writel(temp, sport->port.membase + UCR1);
+
+			temp = readl(sport->port.membase + UCR4);
+			temp &= ~(UCR4_TCEN);
+			writel(temp, sport->port.membase + UCR4);
+
+			while (readl(sport->port.membase + URXD0) &
+			       URXD_CHARRDY)
+				barrier();
+
+			temp = readl(sport->port.membase + UCR1);
+			temp |= UCR1_RRDYEN;
+			writel(temp, sport->port.membase + UCR1);
+
+			temp = readl(sport->port.membase + UCR4);
+			temp |= UCR4_DREN;
+			writel(temp, sport->port.membase + UCR4);
+		}
+		return;
+	}
+
 	temp = readl(sport->port.membase + UCR1);
 	writel(temp & ~UCR1_TXMPTYEN, sport->port.membase + UCR1);
 }
@@ -302,13 +360,15 @@
 		/* send xmit->buf[xmit->tail]
 		 * out the port here */
 		writel(xmit->buf[xmit->tail], sport->port.membase + URTX0);
-		xmit->tail = (xmit->tail + 1) &
-		         (UART_XMIT_SIZE - 1);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
 		sport->port.icount.tx++;
 		if (uart_circ_empty(xmit))
 			break;
 	}
 
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(&sport->port);
+
 	if (uart_circ_empty(xmit))
 		imx_stop_tx(&sport->port);
 }
@@ -321,9 +381,30 @@
 	struct imx_port *sport = (struct imx_port *)port;
 	unsigned long temp;
 
+	if (USE_IRDA(sport)) {
+		/* half duplex in IrDA mode; have to disable receive mode */
+		temp = readl(sport->port.membase + UCR4);
+		temp &= ~(UCR4_DREN);
+		writel(temp, sport->port.membase + UCR4);
+
+		temp = readl(sport->port.membase + UCR1);
+		temp &= ~(UCR1_RRDYEN);
+		writel(temp, sport->port.membase + UCR1);
+	}
+
 	temp = readl(sport->port.membase + UCR1);
 	writel(temp | UCR1_TXMPTYEN, sport->port.membase + UCR1);
 
+	if (USE_IRDA(sport)) {
+		temp = readl(sport->port.membase + UCR1);
+		temp |= UCR1_TRDYEN;
+		writel(temp, sport->port.membase + UCR1);
+
+		temp = readl(sport->port.membase + UCR4);
+		temp |= UCR4_TCEN;
+		writel(temp, sport->port.membase + UCR4);
+	}
+
 	if (readl(sport->port.membase + UTS) & UTS_TXEMPTY)
 		imx_transmit_buffer(sport);
 }
@@ -395,8 +476,7 @@
 				continue;
 		}
 
-		if (uart_handle_sysrq_char
-		            (&sport->port, (unsigned char)rx))
+		if (uart_handle_sysrq_char(&sport->port, (unsigned char)rx))
 			continue;
 
 		if (rx & (URXD_PRERR | URXD_OVRRUN | URXD_FRMERR) ) {
@@ -471,26 +551,26 @@
  */
 static unsigned int imx_get_mctrl(struct uart_port *port)
 {
-        struct imx_port *sport = (struct imx_port *)port;
-        unsigned int tmp = TIOCM_DSR | TIOCM_CAR;
+	struct imx_port *sport = (struct imx_port *)port;
+	unsigned int tmp = TIOCM_DSR | TIOCM_CAR;
 
-        if (readl(sport->port.membase + USR1) & USR1_RTSS)
-                tmp |= TIOCM_CTS;
+	if (readl(sport->port.membase + USR1) & USR1_RTSS)
+		tmp |= TIOCM_CTS;
 
-        if (readl(sport->port.membase + UCR2) & UCR2_CTS)
-                tmp |= TIOCM_RTS;
+	if (readl(sport->port.membase + UCR2) & UCR2_CTS)
+		tmp |= TIOCM_RTS;
 
-        return tmp;
+	return tmp;
 }
 
 static void imx_set_mctrl(struct uart_port *port, unsigned int mctrl)
 {
-        struct imx_port *sport = (struct imx_port *)port;
+	struct imx_port *sport = (struct imx_port *)port;
 	unsigned long temp;
 
 	temp = readl(sport->port.membase + UCR2) & ~UCR2_CTS;
 
-        if (mctrl & TIOCM_RTS)
+	if (mctrl & TIOCM_RTS)
 		temp |= UCR2_CTS;
 
 	writel(temp, sport->port.membase + UCR2);
@@ -534,12 +614,7 @@
 	if(!ufcr_rfdiv)
 		ufcr_rfdiv = 1;
 
-	if(ufcr_rfdiv >= 7)
-		ufcr_rfdiv = 6;
-	else
-		ufcr_rfdiv = 6 - ufcr_rfdiv;
-
-	val |= UFCR_RFDIV & (ufcr_rfdiv << 7);
+	val |= UFCR_RFDIV_REG(ufcr_rfdiv);
 
 	writel(val, sport->port.membase + UFCR);
 
@@ -558,8 +633,24 @@
 	 * requesting IRQs
 	 */
 	temp = readl(sport->port.membase + UCR4);
+
+	if (USE_IRDA(sport))
+		temp |= UCR4_IRSC;
+
 	writel(temp & ~UCR4_DREN, sport->port.membase + UCR4);
 
+	if (USE_IRDA(sport)) {
+		/* reset fifo's and state machines */
+		int i = 100;
+		temp = readl(sport->port.membase + UCR2);
+		temp &= ~UCR2_SRST;
+		writel(temp, sport->port.membase + UCR2);
+		while (!(readl(sport->port.membase + UCR2) & UCR2_SRST) &&
+		    (--i > 0)) {
+			udelay(1);
+		}
+	}
+
 	/*
 	 * Allocate the IRQ(s) i.MX1 has three interrupts whereas later
 	 * chips only have one interrupt.
@@ -575,12 +666,16 @@
 		if (retval)
 			goto error_out2;
 
-		retval = request_irq(sport->rtsirq, imx_rtsint,
-			     (sport->rtsirq < MAX_INTERNAL_IRQ) ? 0 :
-			       IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
-				DRIVER_NAME, sport);
-		if (retval)
-			goto error_out3;
+		/* do not use RTS IRQ on IrDA */
+		if (!USE_IRDA(sport)) {
+			retval = request_irq(sport->rtsirq, imx_rtsint,
+				     (sport->rtsirq < MAX_INTERNAL_IRQ) ? 0 :
+				       IRQF_TRIGGER_FALLING |
+				       IRQF_TRIGGER_RISING,
+					DRIVER_NAME, sport);
+			if (retval)
+				goto error_out3;
+		}
 	} else {
 		retval = request_irq(sport->port.irq, imx_int, 0,
 				DRIVER_NAME, sport);
@@ -597,18 +692,49 @@
 
 	temp = readl(sport->port.membase + UCR1);
 	temp |= UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN;
+
+	if (USE_IRDA(sport)) {
+		temp |= UCR1_IREN;
+		temp &= ~(UCR1_RTSDEN);
+	}
+
 	writel(temp, sport->port.membase + UCR1);
 
 	temp = readl(sport->port.membase + UCR2);
 	temp |= (UCR2_RXEN | UCR2_TXEN);
 	writel(temp, sport->port.membase + UCR2);
 
+	if (USE_IRDA(sport)) {
+		/* clear RX-FIFO */
+		int i = 64;
+		while ((--i > 0) &&
+			(readl(sport->port.membase + URXD0) & URXD_CHARRDY)) {
+			barrier();
+		}
+	}
+
 #if defined CONFIG_ARCH_MX2 || defined CONFIG_ARCH_MX3
 	temp = readl(sport->port.membase + UCR3);
 	temp |= UCR3_RXDMUXSEL;
 	writel(temp, sport->port.membase + UCR3);
 #endif
 
+	if (USE_IRDA(sport)) {
+		temp = readl(sport->port.membase + UCR4);
+		if (sport->irda_inv_rx)
+			temp |= UCR4_INVR;
+		else
+			temp &= ~(UCR4_INVR);
+		writel(temp | UCR4_DREN, sport->port.membase + UCR4);
+
+		temp = readl(sport->port.membase + UCR3);
+		if (sport->irda_inv_tx)
+			temp |= UCR3_INVT;
+		else
+			temp &= ~(UCR3_INVT);
+		writel(temp, sport->port.membase + UCR3);
+	}
+
 	/*
 	 * Enable modem status interrupts
 	 */
@@ -616,6 +742,16 @@
 	imx_enable_ms(&sport->port);
 	spin_unlock_irqrestore(&sport->port.lock,flags);
 
+	if (USE_IRDA(sport)) {
+		struct imxuart_platform_data *pdata;
+		pdata = sport->port.dev->platform_data;
+		sport->irda_inv_rx = pdata->irda_inv_rx;
+		sport->irda_inv_tx = pdata->irda_inv_tx;
+		sport->trcv_delay = pdata->transceiver_delay;
+		if (pdata->irda_enable)
+			pdata->irda_enable(1);
+	}
+
 	return 0;
 
 error_out3:
@@ -633,6 +769,17 @@
 	struct imx_port *sport = (struct imx_port *)port;
 	unsigned long temp;
 
+	temp = readl(sport->port.membase + UCR2);
+	temp &= ~(UCR2_TXEN);
+	writel(temp, sport->port.membase + UCR2);
+
+	if (USE_IRDA(sport)) {
+		struct imxuart_platform_data *pdata;
+		pdata = sport->port.dev->platform_data;
+		if (pdata->irda_enable)
+			pdata->irda_enable(0);
+	}
+
 	/*
 	 * Stop our timer.
 	 */
@@ -642,7 +789,8 @@
 	 * Free the interrupts
 	 */
 	if (sport->txirq > 0) {
-		free_irq(sport->rtsirq, sport);
+		if (!USE_IRDA(sport))
+			free_irq(sport->rtsirq, sport);
 		free_irq(sport->txirq, sport);
 		free_irq(sport->rxirq, sport);
 	} else
@@ -654,6 +802,9 @@
 
 	temp = readl(sport->port.membase + UCR1);
 	temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN);
+	if (USE_IRDA(sport))
+		temp &= ~(UCR1_IREN);
+
 	writel(temp, sport->port.membase + UCR1);
 }
 
@@ -665,7 +816,9 @@
 	unsigned long flags;
 	unsigned int ucr2, old_ucr1, old_txrxen, baud, quot;
 	unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8;
-	unsigned int div, num, denom, ufcr;
+	unsigned int div, ufcr;
+	unsigned long num, denom;
+	uint64_t tdiv64;
 
 	/*
 	 * If we don't support modem control lines, don't allow
@@ -761,38 +914,39 @@
 			sport->port.membase + UCR2);
 	old_txrxen &= (UCR2_TXEN | UCR2_RXEN);
 
-	div = sport->port.uartclk / (baud * 16);
-	if (div > 7)
-		div = 7;
-	if (!div)
+	if (USE_IRDA(sport)) {
+		/*
+		 * use maximum available submodule frequency to
+		 * avoid missing short pulses due to low sampling rate
+		 */
 		div = 1;
-
-	num = baud;
-	denom = port->uartclk / div / 16;
-
-	/* shift num and denom right until they fit into 16 bits */
-	while (num > 0x10000 || denom > 0x10000) {
-		num >>= 1;
-		denom >>= 1;
+	} else {
+		div = sport->port.uartclk / (baud * 16);
+		if (div > 7)
+			div = 7;
+		if (!div)
+			div = 1;
 	}
-	if (num > 0)
-		num -= 1;
-	if (denom > 0)
-		denom -= 1;
+
+	rational_best_approximation(16 * div * baud, sport->port.uartclk,
+		1 << 16, 1 << 16, &num, &denom);
+
+	tdiv64 = sport->port.uartclk;
+	tdiv64 *= num;
+	do_div(tdiv64, denom * 16 * div);
+	tty_encode_baud_rate(sport->port.info->port.tty,
+		(speed_t)tdiv64, (speed_t)tdiv64);
+
+	num -= 1;
+	denom -= 1;
+
+	ufcr = readl(sport->port.membase + UFCR);
+	ufcr = (ufcr & (~UFCR_RFDIV)) | UFCR_RFDIV_REG(div);
+	writel(ufcr, sport->port.membase + UFCR);
 
 	writel(num, sport->port.membase + UBIR);
 	writel(denom, sport->port.membase + UBMR);
 
-	if (div == 7)
-		div = 6; /* 6 in RFDIV means divide by 7 */
-	else
-		div = 6 - div;
-
-	ufcr = readl(sport->port.membase + UFCR);
-	ufcr = (ufcr & (~UFCR_RFDIV)) |
-	    (div << 7);
-	writel(ufcr, sport->port.membase + UFCR);
-
 #ifdef ONEMS
 	writel(sport->port.uartclk / div / 1000, sport->port.membase + ONEMS);
 #endif
@@ -1031,6 +1185,8 @@
 	if (co->index == -1 || co->index >= ARRAY_SIZE(imx_ports))
 		co->index = 0;
 	sport = imx_ports[co->index];
+	if(sport == NULL)
+		return -ENODEV;
 
 	if (options)
 		uart_parse_options(options, &baud, &parity, &bits, &flow);
@@ -1070,22 +1226,22 @@
 
 static int serial_imx_suspend(struct platform_device *dev, pm_message_t state)
 {
-        struct imx_port *sport = platform_get_drvdata(dev);
+	struct imx_port *sport = platform_get_drvdata(dev);
 
-        if (sport)
-                uart_suspend_port(&imx_reg, &sport->port);
+	if (sport)
+		uart_suspend_port(&imx_reg, &sport->port);
 
-        return 0;
+	return 0;
 }
 
 static int serial_imx_resume(struct platform_device *dev)
 {
-        struct imx_port *sport = platform_get_drvdata(dev);
+	struct imx_port *sport = platform_get_drvdata(dev);
 
-        if (sport)
-                uart_resume_port(&imx_reg, &sport->port);
+	if (sport)
+		uart_resume_port(&imx_reg, &sport->port);
 
-        return 0;
+	return 0;
 }
 
 static int serial_imx_probe(struct platform_device *pdev)
@@ -1141,19 +1297,29 @@
 	imx_ports[pdev->id] = sport;
 
 	pdata = pdev->dev.platform_data;
-	if(pdata && (pdata->flags & IMXUART_HAVE_RTSCTS))
+	if (pdata && (pdata->flags & IMXUART_HAVE_RTSCTS))
 		sport->have_rtscts = 1;
 
+#ifdef CONFIG_IRDA
+	if (pdata && (pdata->flags & IMXUART_IRDA))
+		sport->use_irda = 1;
+#endif
+
 	if (pdata->init) {
 		ret = pdata->init(pdev);
 		if (ret)
 			goto clkput;
 	}
 
-	uart_add_one_port(&imx_reg, &sport->port);
+	ret = uart_add_one_port(&imx_reg, &sport->port);
+	if (ret)
+		goto deinit;
 	platform_set_drvdata(pdev, &sport->port);
 
 	return 0;
+deinit:
+	if (pdata->exit)
+		pdata->exit(pdev);
 clkput:
 	clk_put(sport->clk);
 	clk_disable(sport->clk);
@@ -1191,13 +1357,13 @@
 }
 
 static struct platform_driver serial_imx_driver = {
-        .probe          = serial_imx_probe,
-        .remove         = serial_imx_remove,
+	.probe		= serial_imx_probe,
+	.remove		= serial_imx_remove,
 
 	.suspend	= serial_imx_suspend,
 	.resume		= serial_imx_resume,
 	.driver		= {
-	        .name	= "imx-uart",
+		.name	= "imx-uart",
 		.owner	= THIS_MODULE,
 	},
 };
diff --git a/drivers/serial/jsm/jsm.h b/drivers/serial/jsm/jsm.h
index c0a3e27..4e5f3bd 100644
--- a/drivers/serial/jsm/jsm.h
+++ b/drivers/serial/jsm/jsm.h
@@ -61,6 +61,7 @@
 	if ((DBG_##nlevel & jsm_debug))			\
 	dev_printk(KERN_##klevel, pdev->dev, fmt, ## args)
 
+#define	MAXLINES	256
 #define MAXPORTS	8
 #define MAX_STOPS_SENT	5
 
diff --git a/drivers/serial/jsm/jsm_tty.c b/drivers/serial/jsm/jsm_tty.c
index 31496dc0..107ce2e 100644
--- a/drivers/serial/jsm/jsm_tty.c
+++ b/drivers/serial/jsm/jsm_tty.c
@@ -33,6 +33,8 @@
 
 #include "jsm.h"
 
+static DECLARE_BITMAP(linemap, MAXLINES);
+
 static void jsm_carrier(struct jsm_channel *ch);
 
 static inline int jsm_get_mstat(struct jsm_channel *ch)
@@ -433,6 +435,7 @@
 int __devinit jsm_uart_port_init(struct jsm_board *brd)
 {
 	int i;
+	unsigned int line;
 	struct jsm_channel *ch;
 
 	if (!brd)
@@ -459,9 +462,15 @@
 		brd->channels[i]->uart_port.membase = brd->re_map_membase;
 		brd->channels[i]->uart_port.fifosize = 16;
 		brd->channels[i]->uart_port.ops = &jsm_ops;
-		brd->channels[i]->uart_port.line = brd->channels[i]->ch_portnum + brd->boardnum * 2;
+		line = find_first_zero_bit(linemap, MAXLINES);
+		if (line >= MAXLINES) {
+			printk(KERN_INFO "jsm: linemap is full, added device failed\n");
+			continue;
+		} else
+			set_bit((int)line, linemap);
+		brd->channels[i]->uart_port.line = line;
 		if (uart_add_one_port (&jsm_uart_driver, &brd->channels[i]->uart_port))
-			printk(KERN_INFO "Added device failed\n");
+			printk(KERN_INFO "jsm: add device failed\n");
 		else
 			printk(KERN_INFO "Added device \n");
 	}
@@ -494,6 +503,7 @@
 
 		ch = brd->channels[i];
 
+		clear_bit((int)(ch->uart_port.line), linemap);
 		uart_remove_one_port(&jsm_uart_driver, &brd->channels[i]->uart_port);
 	}
 
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index dbf5357..a4cf107 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -47,12 +47,17 @@
 #include <linux/clk.h>
 #include <linux/ctype.h>
 #include <linux/err.h>
+#include <linux/list.h>
 
 #ifdef CONFIG_SUPERH
 #include <asm/clock.h>
 #include <asm/sh_bios.h>
 #endif
 
+#ifdef CONFIG_H8300
+#include <asm/gpio.h>
+#endif
+
 #include "sh-sci.h"
 
 struct sci_port {
@@ -75,14 +80,22 @@
 	int			break_flag;
 
 #ifdef CONFIG_HAVE_CLK
-	/* Port clock */
-	struct clk		*clk;
+	/* Interface clock */
+	struct clk		*iclk;
+	/* Data clock */
+	struct clk		*dclk;
 #endif
+	struct list_head	node;
 };
 
-#ifdef CONFIG_SERIAL_SH_SCI_CONSOLE
-static struct sci_port *serial_console_port;
+struct sh_sci_priv {
+	spinlock_t lock;
+	struct list_head ports;
+
+#ifdef CONFIG_HAVE_CLK
+	struct notifier_block clk_nb;
 #endif
+};
 
 /* Function prototypes */
 static void sci_stop_tx(struct uart_port *port);
@@ -138,9 +151,8 @@
 		status = sci_in(port, SCxSR);
 	} while (!(status & SCxSR_TDxE(port)));
 
-	sci_in(port, SCxSR);            /* Dummy read */
-	sci_out(port, SCxSR, SCxSR_TDxE_CLEAR(port) & ~SCxSR_TEND(port));
 	sci_out(port, SCxTDR, c);
+	sci_out(port, SCxSR, SCxSR_TDxE_CLEAR(port) & ~SCxSR_TEND(port));
 }
 #endif /* CONFIG_CONSOLE_POLL || CONFIG_SERIAL_SH_SCI_CONSOLE */
 
@@ -159,12 +171,12 @@
 		*mstpcrl &= ~mask;
 }
 
-static inline void h8300_sci_enable(struct uart_port *port)
+static void h8300_sci_enable(struct uart_port *port)
 {
 	h8300_sci_config(port, sci_enable);
 }
 
-static inline void h8300_sci_disable(struct uart_port *port)
+static void h8300_sci_disable(struct uart_port *port)
 {
 	h8300_sci_config(port, sci_disable);
 }
@@ -611,7 +623,7 @@
 	int copied = 0;
 	unsigned short status = sci_in(port, SCxSR);
 	struct tty_struct *tty = port->info->port.tty;
-	struct sci_port *s = &sci_ports[port->line];
+	struct sci_port *s = to_sci_port(port);
 
 	if (uart_handle_break(port))
 		return 0;
@@ -726,19 +738,43 @@
 static int sci_notifier(struct notifier_block *self,
 			unsigned long phase, void *p)
 {
-	int i;
+	struct sh_sci_priv *priv = container_of(self,
+						struct sh_sci_priv, clk_nb);
+	struct sci_port *sci_port;
+	unsigned long flags;
 
 	if ((phase == CPUFREQ_POSTCHANGE) ||
-	    (phase == CPUFREQ_RESUMECHANGE))
-		for (i = 0; i < SCI_NPORTS; i++) {
-			struct sci_port *s = &sci_ports[i];
-			s->port.uartclk = clk_get_rate(s->clk);
-		}
+	    (phase == CPUFREQ_RESUMECHANGE)) {
+		spin_lock_irqsave(&priv->lock, flags);
+		list_for_each_entry(sci_port, &priv->ports, node)
+			sci_port->port.uartclk = clk_get_rate(sci_port->dclk);
+
+		spin_unlock_irqrestore(&priv->lock, flags);
+	}
 
 	return NOTIFY_OK;
 }
 
-static struct notifier_block sci_nb = { &sci_notifier, NULL, 0 };
+static void sci_clk_enable(struct uart_port *port)
+{
+	struct sci_port *sci_port = to_sci_port(port);
+
+	clk_enable(sci_port->dclk);
+	sci_port->port.uartclk = clk_get_rate(sci_port->dclk);
+
+	if (sci_port->iclk)
+		clk_enable(sci_port->iclk);
+}
+
+static void sci_clk_disable(struct uart_port *port)
+{
+	struct sci_port *sci_port = to_sci_port(port);
+
+	if (sci_port->iclk)
+		clk_disable(sci_port->iclk);
+
+	clk_disable(sci_port->dclk);
+}
 #endif
 
 static int sci_request_irq(struct sci_port *port)
@@ -865,15 +901,11 @@
 
 static int sci_startup(struct uart_port *port)
 {
-	struct sci_port *s = &sci_ports[port->line];
+	struct sci_port *s = to_sci_port(port);
 
 	if (s->enable)
 		s->enable(port);
 
-#ifdef CONFIG_HAVE_CLK
-	s->clk = clk_get(NULL, "module_clk");
-#endif
-
 	sci_request_irq(s);
 	sci_start_tx(port);
 	sci_start_rx(port, 1);
@@ -883,7 +915,7 @@
 
 static void sci_shutdown(struct uart_port *port)
 {
-	struct sci_port *s = &sci_ports[port->line];
+	struct sci_port *s = to_sci_port(port);
 
 	sci_stop_rx(port);
 	sci_stop_tx(port);
@@ -891,11 +923,6 @@
 
 	if (s->disable)
 		s->disable(port);
-
-#ifdef CONFIG_HAVE_CLK
-	clk_put(s->clk);
-	s->clk = NULL;
-#endif
 }
 
 static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
@@ -980,25 +1007,31 @@
 
 static void sci_config_port(struct uart_port *port, int flags)
 {
-	struct sci_port *s = &sci_ports[port->line];
+	struct sci_port *s = to_sci_port(port);
 
 	port->type = s->type;
 
-	if (port->flags & UPF_IOREMAP && !port->membase) {
-#if defined(CONFIG_SUPERH64)
-		port->mapbase = onchip_remap(SCIF_ADDR_SH5, 1024, "SCIF");
-		port->membase = (void __iomem *)port->mapbase;
-#else
-		port->membase = ioremap_nocache(port->mapbase, 0x40);
-#endif
+	if (port->membase)
+		return;
 
-		dev_err(port->dev, "can't remap port#%d\n", port->line);
+	if (port->flags & UPF_IOREMAP) {
+		port->membase = ioremap_nocache(port->mapbase, 0x40);
+
+		if (IS_ERR(port->membase))
+			dev_err(port->dev, "can't remap port#%d\n", port->line);
+	} else {
+		/*
+		 * For the simple (and majority of) cases where we don't
+		 * need to do any remapping, just cast the cookie
+		 * directly.
+		 */
+		port->membase = (void __iomem *)port->mapbase;
 	}
 }
 
 static int sci_verify_port(struct uart_port *port, struct serial_struct *ser)
 {
-	struct sci_port *s = &sci_ports[port->line];
+	struct sci_port *s = to_sci_port(port);
 
 	if (ser->irq != s->irqs[SCIx_TXI_IRQ] || ser->irq > nr_irqs)
 		return -EINVAL;
@@ -1032,63 +1065,60 @@
 #endif
 };
 
-static void __init sci_init_ports(void)
+static void __devinit sci_init_single(struct platform_device *dev,
+				      struct sci_port *sci_port,
+				      unsigned int index,
+				      struct plat_sci_port *p)
 {
-	static int first = 1;
-	int i;
-
-	if (!first)
-		return;
-
-	first = 0;
-
-	for (i = 0; i < SCI_NPORTS; i++) {
-		sci_ports[i].port.ops		= &sci_uart_ops;
-		sci_ports[i].port.iotype	= UPIO_MEM;
-		sci_ports[i].port.line		= i;
-		sci_ports[i].port.fifosize	= 1;
+	sci_port->port.ops	= &sci_uart_ops;
+	sci_port->port.iotype	= UPIO_MEM;
+	sci_port->port.line	= index;
+	sci_port->port.fifosize	= 1;
 
 #if defined(__H8300H__) || defined(__H8300S__)
 #ifdef __H8300S__
-		sci_ports[i].enable	= h8300_sci_enable;
-		sci_ports[i].disable	= h8300_sci_disable;
+	sci_port->enable	= h8300_sci_enable;
+	sci_port->disable	= h8300_sci_disable;
 #endif
-		sci_ports[i].port.uartclk = CONFIG_CPU_CLOCK;
+	sci_port->port.uartclk	= CONFIG_CPU_CLOCK;
 #elif defined(CONFIG_HAVE_CLK)
-		/*
-		 * XXX: We should use a proper SCI/SCIF clock
-		 */
-		{
-			struct clk *clk = clk_get(NULL, "module_clk");
-			sci_ports[i].port.uartclk = clk_get_rate(clk);
-			clk_put(clk);
-		}
+	sci_port->iclk		= p->clk ? clk_get(&dev->dev, p->clk) : NULL;
+	sci_port->dclk		= clk_get(&dev->dev, "peripheral_clk");
+	sci_port->enable	= sci_clk_enable;
+	sci_port->disable	= sci_clk_disable;
 #else
 #error "Need a valid uartclk"
 #endif
 
-		sci_ports[i].break_timer.data = (unsigned long)&sci_ports[i];
-		sci_ports[i].break_timer.function = sci_break_timer;
+	sci_port->break_timer.data = (unsigned long)sci_port;
+	sci_port->break_timer.function = sci_break_timer;
+	init_timer(&sci_port->break_timer);
 
-		init_timer(&sci_ports[i].break_timer);
-	}
-}
+	sci_port->port.mapbase	= p->mapbase;
+	sci_port->port.membase	= p->membase;
 
-int __init early_sci_setup(struct uart_port *port)
-{
-	if (unlikely(port->line > SCI_NPORTS))
-		return -ENODEV;
+	sci_port->port.irq	= p->irqs[SCIx_TXI_IRQ];
+	sci_port->port.flags	= p->flags;
+	sci_port->port.dev	= &dev->dev;
+	sci_port->type		= sci_port->port.type = p->type;
 
-	sci_init_ports();
+	memcpy(&sci_port->irqs, &p->irqs, sizeof(p->irqs));
 
-	sci_ports[port->line].port.membase	= port->membase;
-	sci_ports[port->line].port.mapbase	= port->mapbase;
-	sci_ports[port->line].port.type		= port->type;
-
-	return 0;
 }
 
 #ifdef CONFIG_SERIAL_SH_SCI_CONSOLE
+static struct tty_driver *serial_console_device(struct console *co, int *index)
+{
+	struct uart_driver *p = &sci_uart_driver;
+	*index = co->index;
+	return p->tty_driver;
+}
+
+static void serial_console_putchar(struct uart_port *port, int ch)
+{
+	sci_poll_put_char(port, ch);
+}
+
 /*
  *	Print a string to the serial port trying not to disturb
  *	any possible real use of the port...
@@ -1096,25 +1126,27 @@
 static void serial_console_write(struct console *co, const char *s,
 				 unsigned count)
 {
-	struct uart_port *port = &serial_console_port->port;
+	struct uart_port *port = co->data;
+	struct sci_port *sci_port = to_sci_port(port);
 	unsigned short bits;
-	int i;
 
-	for (i = 0; i < count; i++) {
-		if (*s == 10)
-			sci_poll_put_char(port, '\r');
+	if (sci_port->enable)
+		sci_port->enable(port);
 
-		sci_poll_put_char(port, *s++);
-	}
+	uart_console_write(port, s, count, serial_console_putchar);
 
 	/* wait until fifo is empty and last bit has been transmitted */
 	bits = SCxSR_TDxE(port) | SCxSR_TEND(port);
 	while ((sci_in(port, SCxSR) & bits) != bits)
 		cpu_relax();
+
+	if (sci_port->disable);
+		sci_port->disable(port);
 }
 
 static int __init serial_console_setup(struct console *co, char *options)
 {
+	struct sci_port *sci_port;
 	struct uart_port *port;
 	int baud = 115200;
 	int bits = 8;
@@ -1130,8 +1162,9 @@
 	if (co->index >= SCI_NPORTS)
 		co->index = 0;
 
-	serial_console_port = &sci_ports[co->index];
-	port = &serial_console_port->port;
+	sci_port = &sci_ports[co->index];
+	port = &sci_port->port;
+	co->data = port;
 
 	/*
 	 * Also need to check port->type, we don't actually have any
@@ -1141,21 +1174,11 @@
 	 */
 	if (!port->type)
 		return -ENODEV;
-	if (!port->membase || !port->mapbase)
-		return -ENODEV;
 
-	port->type = serial_console_port->type;
+	sci_config_port(port, 0);
 
-#ifdef CONFIG_HAVE_CLK
-	if (!serial_console_port->clk)
-		serial_console_port->clk = clk_get(NULL, "module_clk");
-#endif
-
-	if (port->flags & UPF_IOREMAP)
-		sci_config_port(port, 0);
-
-	if (serial_console_port->enable)
-		serial_console_port->enable(port);
+	if (sci_port->enable)
+		sci_port->enable(port);
 
 	if (options)
 		uart_parse_options(options, &baud, &parity, &bits, &flow);
@@ -1166,22 +1189,21 @@
 	if (ret == 0)
 		sci_stop_rx(port);
 #endif
+	/* TODO: disable clock */
 	return ret;
 }
 
 static struct console serial_console = {
 	.name		= "ttySC",
-	.device		= uart_console_device,
+	.device		= serial_console_device,
 	.write		= serial_console_write,
 	.setup		= serial_console_setup,
 	.flags		= CON_PRINTBUFFER,
 	.index		= -1,
-	.data		= &sci_uart_driver,
 };
 
 static int __init sci_console_init(void)
 {
-	sci_init_ports();
 	register_console(&serial_console);
 	return 0;
 }
@@ -1207,6 +1229,61 @@
 	.cons		= SCI_CONSOLE,
 };
 
+
+static int sci_remove(struct platform_device *dev)
+{
+	struct sh_sci_priv *priv = platform_get_drvdata(dev);
+	struct sci_port *p;
+	unsigned long flags;
+
+#ifdef CONFIG_HAVE_CLK
+	cpufreq_unregister_notifier(&priv->clk_nb, CPUFREQ_TRANSITION_NOTIFIER);
+#endif
+
+	spin_lock_irqsave(&priv->lock, flags);
+	list_for_each_entry(p, &priv->ports, node)
+		uart_remove_one_port(&sci_uart_driver, &p->port);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	kfree(priv);
+	return 0;
+}
+
+static int __devinit sci_probe_single(struct platform_device *dev,
+				      unsigned int index,
+				      struct plat_sci_port *p,
+				      struct sci_port *sciport)
+{
+	struct sh_sci_priv *priv = platform_get_drvdata(dev);
+	unsigned long flags;
+	int ret;
+
+	/* Sanity check */
+	if (unlikely(index >= SCI_NPORTS)) {
+		dev_notice(&dev->dev, "Attempting to register port "
+			   "%d when only %d are available.\n",
+			   index+1, SCI_NPORTS);
+		dev_notice(&dev->dev, "Consider bumping "
+			   "CONFIG_SERIAL_SH_SCI_NR_UARTS!\n");
+		return 0;
+	}
+
+	sci_init_single(dev, sciport, index, p);
+
+	ret = uart_add_one_port(&sci_uart_driver, &sciport->port);
+	if (ret)
+		return ret;
+
+	INIT_LIST_HEAD(&sciport->node);
+
+	spin_lock_irqsave(&priv->lock, flags);
+	list_add(&sciport->node, &priv->ports);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
 /*
  * Register a set of serial devices attached to a platform device.  The
  * list is terminated with a zero flags entry, which means we expect
@@ -1216,57 +1293,34 @@
 static int __devinit sci_probe(struct platform_device *dev)
 {
 	struct plat_sci_port *p = dev->dev.platform_data;
+	struct sh_sci_priv *priv;
 	int i, ret = -EINVAL;
 
-	for (i = 0; p && p->flags != 0; p++, i++) {
-		struct sci_port *sciport = &sci_ports[i];
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
 
-		/* Sanity check */
-		if (unlikely(i == SCI_NPORTS)) {
-			dev_notice(&dev->dev, "Attempting to register port "
-				   "%d when only %d are available.\n",
-				   i+1, SCI_NPORTS);
-			dev_notice(&dev->dev, "Consider bumping "
-				   "CONFIG_SERIAL_SH_SCI_NR_UARTS!\n");
-			break;
-		}
-
-		sciport->port.mapbase	= p->mapbase;
-
-		if (p->mapbase && !p->membase) {
-			if (p->flags & UPF_IOREMAP) {
-				p->membase = ioremap_nocache(p->mapbase, 0x40);
-				if (IS_ERR(p->membase)) {
-					ret = PTR_ERR(p->membase);
-					goto err_unreg;
-				}
-			} else {
-				/*
-				 * For the simple (and majority of) cases
-				 * where we don't need to do any remapping,
-				 * just cast the cookie directly.
-				 */
-				p->membase = (void __iomem *)p->mapbase;
-			}
-		}
-
-		sciport->port.membase	= p->membase;
-
-		sciport->port.irq	= p->irqs[SCIx_TXI_IRQ];
-		sciport->port.flags	= p->flags;
-		sciport->port.dev	= &dev->dev;
-
-		sciport->type		= sciport->port.type = p->type;
-
-		memcpy(&sciport->irqs, &p->irqs, sizeof(p->irqs));
-
-		uart_add_one_port(&sci_uart_driver, &sciport->port);
-	}
+	INIT_LIST_HEAD(&priv->ports);
+	spin_lock_init(&priv->lock);
+	platform_set_drvdata(dev, priv);
 
 #ifdef CONFIG_HAVE_CLK
-	cpufreq_register_notifier(&sci_nb, CPUFREQ_TRANSITION_NOTIFIER);
+	priv->clk_nb.notifier_call = sci_notifier;
+	cpufreq_register_notifier(&priv->clk_nb, CPUFREQ_TRANSITION_NOTIFIER);
 #endif
 
+	if (dev->id != -1) {
+		ret = sci_probe_single(dev, dev->id, p, &sci_ports[dev->id]);
+		if (ret)
+			goto err_unreg;
+	} else {
+		for (i = 0; p && p->flags != 0; p++, i++) {
+			ret = sci_probe_single(dev, i, p, &sci_ports[i]);
+			if (ret)
+				goto err_unreg;
+		}
+	}
+
 #ifdef CONFIG_SH_STANDARD_BIOS
 	sh_bios_gdb_detach();
 #endif
@@ -1274,50 +1328,36 @@
 	return 0;
 
 err_unreg:
-	for (i = i - 1; i >= 0; i--)
-		uart_remove_one_port(&sci_uart_driver, &sci_ports[i].port);
-
+	sci_remove(dev);
 	return ret;
 }
 
-static int __devexit sci_remove(struct platform_device *dev)
-{
-	int i;
-
-#ifdef CONFIG_HAVE_CLK
-	cpufreq_unregister_notifier(&sci_nb, CPUFREQ_TRANSITION_NOTIFIER);
-#endif
-
-	for (i = 0; i < SCI_NPORTS; i++)
-		uart_remove_one_port(&sci_uart_driver, &sci_ports[i].port);
-
-	return 0;
-}
-
 static int sci_suspend(struct platform_device *dev, pm_message_t state)
 {
-	int i;
+	struct sh_sci_priv *priv = platform_get_drvdata(dev);
+	struct sci_port *p;
+	unsigned long flags;
 
-	for (i = 0; i < SCI_NPORTS; i++) {
-		struct sci_port *p = &sci_ports[i];
+	spin_lock_irqsave(&priv->lock, flags);
+	list_for_each_entry(p, &priv->ports, node)
+		uart_suspend_port(&sci_uart_driver, &p->port);
 
-		if (p->type != PORT_UNKNOWN && p->port.dev == &dev->dev)
-			uart_suspend_port(&sci_uart_driver, &p->port);
-	}
+	spin_unlock_irqrestore(&priv->lock, flags);
 
 	return 0;
 }
 
 static int sci_resume(struct platform_device *dev)
 {
-	int i;
+	struct sh_sci_priv *priv = platform_get_drvdata(dev);
+	struct sci_port *p;
+	unsigned long flags;
 
-	for (i = 0; i < SCI_NPORTS; i++) {
-		struct sci_port *p = &sci_ports[i];
+	spin_lock_irqsave(&priv->lock, flags);
+	list_for_each_entry(p, &priv->ports, node)
+		uart_resume_port(&sci_uart_driver, &p->port);
 
-		if (p->type != PORT_UNKNOWN && p->port.dev == &dev->dev)
-			uart_resume_port(&sci_uart_driver, &p->port);
-	}
+	spin_unlock_irqrestore(&priv->lock, flags);
 
 	return 0;
 }
@@ -1339,8 +1379,6 @@
 
 	printk(banner);
 
-	sci_init_ports();
-
 	ret = uart_register_driver(&sci_uart_driver);
 	if (likely(ret == 0)) {
 		ret = platform_driver_register(&sci_driver);
diff --git a/drivers/serial/sh-sci.h b/drivers/serial/sh-sci.h
index d0aa82d..38072c1 100644
--- a/drivers/serial/sh-sci.h
+++ b/drivers/serial/sh-sci.h
@@ -91,6 +91,9 @@
 # define SCSPTR5                0xa4050128
 # define SCIF_ORER              0x0001  /* overrun error bit */
 # define SCSCR_INIT(port)       0x0038  /* TIE=0,RIE=0,TE=1,RE=1,REIE=1 */
+#elif defined(CONFIG_CPU_SUBTYPE_SH7724)
+# define SCIF_ORER              0x0001  /* overrun error bit */
+# define SCSCR_INIT(port)       0x0038  /* TIE=0,RIE=0,TE=1,RE=1,REIE=1 */
 #elif defined(CONFIG_CPU_SUBTYPE_SH4_202)
 # define SCSPTR2 0xffe80020 /* 16 bit SCIF */
 # define SCIF_ORER 0x0001   /* overrun error bit */
@@ -314,7 +317,18 @@
     }									\
   }
 
-#define CPU_SCIF_FNS(name, scif_offset, scif_size)				\
+#ifdef CONFIG_H8300
+/* h8300 don't have SCIF */
+#define CPU_SCIF_FNS(name)						\
+  static inline unsigned int sci_##name##_in(struct uart_port *port)	\
+  {									\
+    return 0;								\
+  }									\
+  static inline void sci_##name##_out(struct uart_port *port, unsigned int value) \
+  {									\
+  }
+#else
+#define CPU_SCIF_FNS(name, scif_offset, scif_size)			\
   static inline unsigned int sci_##name##_in(struct uart_port *port)	\
   {									\
     SCI_IN(scif_size, scif_offset);					\
@@ -323,6 +337,7 @@
   {									\
     SCI_OUT(scif_size, scif_offset, value);				\
   }
+#endif
 
 #define CPU_SCI_FNS(name, sci_offset, sci_size)				\
   static inline unsigned int sci_##name##_in(struct uart_port* port)	\
@@ -360,8 +375,10 @@
 		 sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size, \
                  h8_sci_offset, h8_sci_size) \
   CPU_SCI_FNS(name, h8_sci_offset, h8_sci_size)
-#define SCIF_FNS(name, sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size)
-#elif defined(CONFIG_CPU_SUBTYPE_SH7723)
+#define SCIF_FNS(name, sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size) \
+  CPU_SCIF_FNS(name)
+#elif defined(CONFIG_CPU_SUBTYPE_SH7723) ||\
+      defined(CONFIG_CPU_SUBTYPE_SH7724)
         #define SCIx_FNS(name, sh4_scifa_offset, sh4_scifa_size, sh4_scif_offset, sh4_scif_size) \
                 CPU_SCIx_FNS(name, sh4_scifa_offset, sh4_scifa_size, sh4_scif_offset, sh4_scif_size)
         #define SCIF_FNS(name, sh4_scif_offset, sh4_scif_size) \
@@ -390,7 +407,8 @@
 SCIF_FNS(SCxTDR, 0x20,  8)
 SCIF_FNS(SCxRDR, 0x24,  8)
 SCIF_FNS(SCLSR,  0x24, 16)
-#elif defined(CONFIG_CPU_SUBTYPE_SH7723)
+#elif defined(CONFIG_CPU_SUBTYPE_SH7723) ||\
+      defined(CONFIG_CPU_SUBTYPE_SH7724)
 SCIx_FNS(SCSMR,  0x00, 16, 0x00, 16)
 SCIx_FNS(SCBRR,  0x04,  8, 0x04,  8)
 SCIx_FNS(SCSCR,  0x08, 16, 0x08, 16)
@@ -604,10 +622,21 @@
                 return ctrl_inb(SCSPTR5) & 0x0008 ? 1 : 0; /* SCIF5 */
         return 1;
 }
+#elif defined(CONFIG_CPU_SUBTYPE_SH7724)
+#  define SCFSR    0x0010
+#  define SCASSR   0x0014
+static inline int sci_rxd_in(struct uart_port *port)
+{
+	if (port->type == PORT_SCIF)
+		return ctrl_inw((port->mapbase + SCFSR))  & SCIF_BRK ? 1 : 0;
+	if (port->type == PORT_SCIFA)
+		return ctrl_inw((port->mapbase + SCASSR)) & SCIF_BRK ? 1 : 0;
+	return 1;
+}
 #elif defined(CONFIG_CPU_SUBTYPE_SH5_101) || defined(CONFIG_CPU_SUBTYPE_SH5_103)
 static inline int sci_rxd_in(struct uart_port *port)
 {
-         return sci_in(port, SCSPTR2)&0x0001 ? 1 : 0; /* SCIF */
+         return sci_in(port, SCSPTR)&0x0001 ? 1 : 0; /* SCIF */
 }
 #elif defined(__H8300H__) || defined(__H8300S__)
 static inline int sci_rxd_in(struct uart_port *port)
@@ -757,7 +786,8 @@
       defined(CONFIG_CPU_SUBTYPE_SH7720) || \
       defined(CONFIG_CPU_SUBTYPE_SH7721)
 #define SCBRR_VALUE(bps, clk) (((clk*2)+16*bps)/(32*bps)-1)
-#elif defined(CONFIG_CPU_SUBTYPE_SH7723)
+#elif defined(CONFIG_CPU_SUBTYPE_SH7723) ||\
+      defined(CONFIG_CPU_SUBTYPE_SH7724)
 static inline int scbrr_calc(struct uart_port *port, int bps, int clk)
 {
 	if (port->type == PORT_SCIF)
diff --git a/drivers/serial/timbuart.c b/drivers/serial/timbuart.c
new file mode 100644
index 0000000..ac9e5d5
--- /dev/null
+++ b/drivers/serial/timbuart.c
@@ -0,0 +1,526 @@
+/*
+ * timbuart.c timberdale FPGA UART driver
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Timberdale FPGA UART
+ */
+
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/serial_core.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/ioport.h>
+
+#include "timbuart.h"
+
+struct timbuart_port {
+	struct uart_port	port;
+	struct tasklet_struct	tasklet;
+	int			usedma;
+	u8			last_ier;
+	struct platform_device  *dev;
+};
+
+static int baudrates[] = {9600, 19200, 38400, 57600, 115200, 230400, 460800,
+	921600, 1843200, 3250000};
+
+static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier);
+
+static irqreturn_t timbuart_handleinterrupt(int irq, void *devid);
+
+static void timbuart_stop_rx(struct uart_port *port)
+{
+	/* spin lock held by upper layer, disable all RX interrupts */
+	u8 ier = ioread8(port->membase + TIMBUART_IER) & ~RXFLAGS;
+	iowrite8(ier, port->membase + TIMBUART_IER);
+}
+
+static void timbuart_stop_tx(struct uart_port *port)
+{
+	/* spinlock held by upper layer, disable TX interrupt */
+	u8 ier = ioread8(port->membase + TIMBUART_IER) & ~TXBAE;
+	iowrite8(ier, port->membase + TIMBUART_IER);
+}
+
+static void timbuart_start_tx(struct uart_port *port)
+{
+	struct timbuart_port *uart =
+		container_of(port, struct timbuart_port, port);
+
+	/* do not transfer anything here -> fire off the tasklet */
+	tasklet_schedule(&uart->tasklet);
+}
+
+static void timbuart_flush_buffer(struct uart_port *port)
+{
+	u8 ctl = ioread8(port->membase + TIMBUART_CTRL) | TIMBUART_CTRL_FLSHTX;
+
+	iowrite8(ctl, port->membase + TIMBUART_CTRL);
+	iowrite8(TXBF, port->membase + TIMBUART_ISR);
+}
+
+static void timbuart_rx_chars(struct uart_port *port)
+{
+	struct tty_struct *tty = port->info->port.tty;
+
+	while (ioread8(port->membase + TIMBUART_ISR) & RXDP) {
+		u8 ch = ioread8(port->membase + TIMBUART_RXFIFO);
+		port->icount.rx++;
+		tty_insert_flip_char(tty, ch, TTY_NORMAL);
+	}
+
+	spin_unlock(&port->lock);
+	tty_flip_buffer_push(port->info->port.tty);
+	spin_lock(&port->lock);
+
+	dev_dbg(port->dev, "%s - total read %d bytes\n",
+		__func__, port->icount.rx);
+}
+
+static void timbuart_tx_chars(struct uart_port *port)
+{
+	struct circ_buf *xmit = &port->info->xmit;
+
+	while (!(ioread8(port->membase + TIMBUART_ISR) & TXBF) &&
+		!uart_circ_empty(xmit)) {
+		iowrite8(xmit->buf[xmit->tail],
+			port->membase + TIMBUART_TXFIFO);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		port->icount.tx++;
+	}
+
+	dev_dbg(port->dev,
+		"%s - total written %d bytes, CTL: %x, RTS: %x, baud: %x\n",
+		 __func__,
+		port->icount.tx,
+		ioread8(port->membase + TIMBUART_CTRL),
+		port->mctrl & TIOCM_RTS,
+		ioread8(port->membase + TIMBUART_BAUDRATE));
+}
+
+static void timbuart_handle_tx_port(struct uart_port *port, u8 isr, u8 *ier)
+{
+	struct timbuart_port *uart =
+		container_of(port, struct timbuart_port, port);
+	struct circ_buf *xmit = &port->info->xmit;
+
+	if (uart_circ_empty(xmit) || uart_tx_stopped(port))
+		return;
+
+	if (port->x_char)
+		return;
+
+	if (isr & TXFLAGS) {
+		timbuart_tx_chars(port);
+		/* clear all TX interrupts */
+		iowrite8(TXFLAGS, port->membase + TIMBUART_ISR);
+
+		if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+			uart_write_wakeup(port);
+	} else
+		/* Re-enable any tx interrupt */
+		*ier |= uart->last_ier & TXFLAGS;
+
+	/* enable interrupts if there are chars in the transmit buffer,
+	 * Or if we delivered some bytes and want the almost empty interrupt
+	 * we wake up the upper layer later when we got the interrupt
+	 * to give it some time to go out...
+	 */
+	if (!uart_circ_empty(xmit))
+		*ier |= TXBAE;
+
+	dev_dbg(port->dev, "%s - leaving\n", __func__);
+}
+
+void timbuart_handle_rx_port(struct uart_port *port, u8 isr, u8 *ier)
+{
+	if (isr & RXFLAGS) {
+		/* Some RX status is set */
+		if (isr & RXBF) {
+			u8 ctl = ioread8(port->membase + TIMBUART_CTRL) |
+				TIMBUART_CTRL_FLSHRX;
+			iowrite8(ctl, port->membase + TIMBUART_CTRL);
+			port->icount.overrun++;
+		} else if (isr & (RXDP))
+			timbuart_rx_chars(port);
+
+		/* ack all RX interrupts */
+		iowrite8(RXFLAGS, port->membase + TIMBUART_ISR);
+	}
+
+	/* always have the RX interrupts enabled */
+	*ier |= RXBAF | RXBF | RXTT;
+
+	dev_dbg(port->dev, "%s - leaving\n", __func__);
+}
+
+void timbuart_tasklet(unsigned long arg)
+{
+	struct timbuart_port *uart = (struct timbuart_port *)arg;
+	u8 isr, ier = 0;
+
+	spin_lock(&uart->port.lock);
+
+	isr = ioread8(uart->port.membase + TIMBUART_ISR);
+	dev_dbg(uart->port.dev, "%s ISR: %x\n", __func__, isr);
+
+	if (!uart->usedma)
+		timbuart_handle_tx_port(&uart->port, isr, &ier);
+
+	timbuart_mctrl_check(&uart->port, isr, &ier);
+
+	if (!uart->usedma)
+		timbuart_handle_rx_port(&uart->port, isr, &ier);
+
+	iowrite8(ier, uart->port.membase + TIMBUART_IER);
+
+	spin_unlock(&uart->port.lock);
+	dev_dbg(uart->port.dev, "%s leaving\n", __func__);
+}
+
+static unsigned int timbuart_tx_empty(struct uart_port *port)
+{
+	u8 isr = ioread8(port->membase + TIMBUART_ISR);
+
+	return (isr & TXBAE) ? TIOCSER_TEMT : 0;
+}
+
+static unsigned int timbuart_get_mctrl(struct uart_port *port)
+{
+	u8 cts = ioread8(port->membase + TIMBUART_CTRL);
+	dev_dbg(port->dev, "%s - cts %x\n", __func__, cts);
+
+	if (cts & TIMBUART_CTRL_CTS)
+		return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR;
+	else
+		return TIOCM_DSR | TIOCM_CAR;
+}
+
+static void timbuart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	dev_dbg(port->dev, "%s - %x\n", __func__, mctrl);
+
+	if (mctrl & TIOCM_RTS)
+		iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL);
+	else
+		iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL);
+}
+
+static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier)
+{
+	unsigned int cts;
+
+	if (isr & CTS_DELTA) {
+		/* ack */
+		iowrite8(CTS_DELTA, port->membase + TIMBUART_ISR);
+		cts = timbuart_get_mctrl(port);
+		uart_handle_cts_change(port, cts & TIOCM_CTS);
+		wake_up_interruptible(&port->info->delta_msr_wait);
+	}
+
+	*ier |= CTS_DELTA;
+}
+
+static void timbuart_enable_ms(struct uart_port *port)
+{
+	/* N/A */
+}
+
+static void timbuart_break_ctl(struct uart_port *port, int ctl)
+{
+	/* N/A */
+}
+
+static int timbuart_startup(struct uart_port *port)
+{
+	struct timbuart_port *uart =
+		container_of(port, struct timbuart_port, port);
+
+	dev_dbg(port->dev, "%s\n", __func__);
+
+	iowrite8(TIMBUART_CTRL_FLSHRX, port->membase + TIMBUART_CTRL);
+	iowrite8(0xff, port->membase + TIMBUART_ISR);
+	/* Enable all but TX interrupts */
+	iowrite8(RXBAF | RXBF | RXTT | CTS_DELTA,
+		port->membase + TIMBUART_IER);
+
+	return request_irq(port->irq, timbuart_handleinterrupt, IRQF_SHARED,
+		"timb-uart", uart);
+}
+
+static void timbuart_shutdown(struct uart_port *port)
+{
+	struct timbuart_port *uart =
+		container_of(port, struct timbuart_port, port);
+	dev_dbg(port->dev, "%s\n", __func__);
+	free_irq(port->irq, uart);
+	iowrite8(0, port->membase + TIMBUART_IER);
+}
+
+static int get_bindex(int baud)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(baudrates); i++)
+		if (baud <= baudrates[i])
+			return i;
+
+	return -1;
+}
+
+static void timbuart_set_termios(struct uart_port *port,
+	struct ktermios *termios,
+	struct ktermios *old)
+{
+	unsigned int baud;
+	short bindex;
+	unsigned long flags;
+
+	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 16);
+	bindex = get_bindex(baud);
+	dev_dbg(port->dev, "%s - bindex %d\n", __func__, bindex);
+
+	if (bindex < 0)
+		bindex = 0;
+	baud = baudrates[bindex];
+
+	/* The serial layer calls into this once with old = NULL when setting
+	   up initially */
+	if (old)
+		tty_termios_copy_hw(termios, old);
+	tty_termios_encode_baud_rate(termios, baud, baud);
+
+	spin_lock_irqsave(&port->lock, flags);
+	iowrite8((u8)bindex, port->membase + TIMBUART_BAUDRATE);
+	uart_update_timeout(port, termios->c_cflag, baud);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static const char *timbuart_type(struct uart_port *port)
+{
+	return port->type == PORT_UNKNOWN ? "timbuart" : NULL;
+}
+
+/* We do not request/release mappings of the registers here,
+ * currently it's done in the proble function.
+ */
+static void timbuart_release_port(struct uart_port *port)
+{
+	struct platform_device *pdev = to_platform_device(port->dev);
+	int size =
+		resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0));
+
+	if (port->flags & UPF_IOREMAP) {
+		iounmap(port->membase);
+		port->membase = NULL;
+	}
+
+	release_mem_region(port->mapbase, size);
+}
+
+static int timbuart_request_port(struct uart_port *port)
+{
+	struct platform_device *pdev = to_platform_device(port->dev);
+	int size =
+		resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0));
+
+	if (!request_mem_region(port->mapbase, size, "timb-uart"))
+		return -EBUSY;
+
+	if (port->flags & UPF_IOREMAP) {
+		port->membase = ioremap(port->mapbase, size);
+		if (port->membase == NULL) {
+			release_mem_region(port->mapbase, size);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static irqreturn_t timbuart_handleinterrupt(int irq, void *devid)
+{
+	struct timbuart_port *uart = (struct timbuart_port *)devid;
+
+	if (ioread8(uart->port.membase + TIMBUART_IPR)) {
+		uart->last_ier = ioread8(uart->port.membase + TIMBUART_IER);
+
+		/* disable interrupts, the tasklet enables them again */
+		iowrite8(0, uart->port.membase + TIMBUART_IER);
+
+		/* fire off bottom half */
+		tasklet_schedule(&uart->tasklet);
+
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
+}
+
+/*
+ * Configure/autoconfigure the port.
+ */
+static void timbuart_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE) {
+		port->type = PORT_TIMBUART;
+		timbuart_request_port(port);
+	}
+}
+
+static int timbuart_verify_port(struct uart_port *port,
+	struct serial_struct *ser)
+{
+	/* we don't want the core code to modify any port params */
+	return -EINVAL;
+}
+
+static struct uart_ops timbuart_ops = {
+	.tx_empty = timbuart_tx_empty,
+	.set_mctrl = timbuart_set_mctrl,
+	.get_mctrl = timbuart_get_mctrl,
+	.stop_tx = timbuart_stop_tx,
+	.start_tx = timbuart_start_tx,
+	.flush_buffer = timbuart_flush_buffer,
+	.stop_rx = timbuart_stop_rx,
+	.enable_ms = timbuart_enable_ms,
+	.break_ctl = timbuart_break_ctl,
+	.startup = timbuart_startup,
+	.shutdown = timbuart_shutdown,
+	.set_termios = timbuart_set_termios,
+	.type = timbuart_type,
+	.release_port = timbuart_release_port,
+	.request_port = timbuart_request_port,
+	.config_port = timbuart_config_port,
+	.verify_port = timbuart_verify_port
+};
+
+static struct uart_driver timbuart_driver = {
+	.owner = THIS_MODULE,
+	.driver_name = "timberdale_uart",
+	.dev_name = "ttyTU",
+	.major = TIMBUART_MAJOR,
+	.minor = TIMBUART_MINOR,
+	.nr = 1
+};
+
+static int timbuart_probe(struct platform_device *dev)
+{
+	int err;
+	struct timbuart_port *uart;
+	struct resource *iomem;
+
+	dev_dbg(&dev->dev, "%s\n", __func__);
+
+	uart = kzalloc(sizeof(*uart), GFP_KERNEL);
+	if (!uart) {
+		err = -EINVAL;
+		goto err_mem;
+	}
+
+	uart->usedma = 0;
+
+	uart->port.uartclk = 3250000 * 16;
+	uart->port.fifosize  = TIMBUART_FIFO_SIZE;
+	uart->port.regshift  = 2;
+	uart->port.iotype  = UPIO_MEM;
+	uart->port.ops = &timbuart_ops;
+	uart->port.irq = 0;
+	uart->port.flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP;
+	uart->port.line  = 0;
+	uart->port.dev	= &dev->dev;
+
+	iomem = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (!iomem) {
+		err = -ENOMEM;
+		goto err_register;
+	}
+	uart->port.mapbase = iomem->start;
+	uart->port.membase = NULL;
+
+	uart->port.irq = platform_get_irq(dev, 0);
+	if (uart->port.irq < 0) {
+		err = -EINVAL;
+		goto err_register;
+	}
+
+	tasklet_init(&uart->tasklet, timbuart_tasklet, (unsigned long)uart);
+
+	err = uart_register_driver(&timbuart_driver);
+	if (err)
+		goto err_register;
+
+	err = uart_add_one_port(&timbuart_driver, &uart->port);
+	if (err)
+		goto err_add_port;
+
+	platform_set_drvdata(dev, uart);
+
+	return 0;
+
+err_add_port:
+	uart_unregister_driver(&timbuart_driver);
+err_register:
+	kfree(uart);
+err_mem:
+	printk(KERN_ERR "timberdale: Failed to register Timberdale UART: %d\n",
+		err);
+
+	return err;
+}
+
+static int timbuart_remove(struct platform_device *dev)
+{
+	struct timbuart_port *uart = platform_get_drvdata(dev);
+
+	tasklet_kill(&uart->tasklet);
+	uart_remove_one_port(&timbuart_driver, &uart->port);
+	uart_unregister_driver(&timbuart_driver);
+	kfree(uart);
+
+	return 0;
+}
+
+static struct platform_driver timbuart_platform_driver = {
+	.driver = {
+		.name	= "timb-uart",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= timbuart_probe,
+	.remove		= timbuart_remove,
+};
+
+/*--------------------------------------------------------------------------*/
+
+static int __init timbuart_init(void)
+{
+	return platform_driver_register(&timbuart_platform_driver);
+}
+
+static void __exit timbuart_exit(void)
+{
+	platform_driver_unregister(&timbuart_platform_driver);
+}
+
+module_init(timbuart_init);
+module_exit(timbuart_exit);
+
+MODULE_DESCRIPTION("Timberdale UART driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:timb-uart");
+
diff --git a/drivers/serial/timbuart.h b/drivers/serial/timbuart.h
new file mode 100644
index 0000000..7e56676
--- /dev/null
+++ b/drivers/serial/timbuart.h
@@ -0,0 +1,58 @@
+/*
+ * timbuart.c timberdale FPGA GPIO driver
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Timberdale FPGA UART
+ */
+
+#ifndef _TIMBUART_H
+#define _TIMBUART_H
+
+#define TIMBUART_FIFO_SIZE	2048
+
+#define TIMBUART_RXFIFO		0x08
+#define TIMBUART_TXFIFO		0x0c
+#define TIMBUART_IER		0x10
+#define TIMBUART_IPR		0x14
+#define TIMBUART_ISR		0x18
+#define TIMBUART_CTRL		0x1c
+#define TIMBUART_BAUDRATE	0x20
+
+#define TIMBUART_CTRL_RTS	0x01
+#define TIMBUART_CTRL_CTS	0x02
+#define TIMBUART_CTRL_FLSHTX	0x40
+#define TIMBUART_CTRL_FLSHRX	0x80
+
+#define TXBF		0x01
+#define TXBAE		0x02
+#define CTS_DELTA	0x04
+#define RXDP		0x08
+#define RXBAF		0x10
+#define RXBF		0x20
+#define RXTT		0x40
+#define RXBNAE		0x80
+#define TXBE		0x100
+
+#define RXFLAGS (RXDP | RXBAF | RXBF | RXTT | RXBNAE)
+#define TXFLAGS (TXBF | TXBAE)
+
+#define TIMBUART_MAJOR 204
+#define TIMBUART_MINOR 192
+
+#endif /* _TIMBUART_H */
+
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index 12d13d9..d687a9b 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -24,6 +24,7 @@
 #include <linux/sh_intc.h>
 #include <linux/sysdev.h>
 #include <linux/list.h>
+#include <linux/topology.h>
 
 #define _INTC_MK(fn, mode, addr_e, addr_d, width, shift) \
 	((shift) | ((width) << 5) | ((fn) << 9) | ((mode) << 13) | \
@@ -770,11 +771,19 @@
 	/* register the vectors one by one */
 	for (i = 0; i < desc->nr_vectors; i++) {
 		struct intc_vect *vect = desc->vectors + i;
+		unsigned int irq = evt2irq(vect->vect);
+		struct irq_desc *irq_desc;
 
 		if (!vect->enum_id)
 			continue;
 
-		intc_register_irq(desc, d, vect->enum_id, evt2irq(vect->vect));
+		irq_desc = irq_to_desc_alloc_node(irq, numa_node_id());
+		if (unlikely(!irq_desc)) {
+			printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+			continue;
+		}
+
+		intc_register_irq(desc, d, vect->enum_id, irq);
 	}
 }
 
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 83a185d..9574947 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -212,7 +212,7 @@
 
 config SPI_XILINX
 	tristate "Xilinx SPI controller"
-	depends on XILINX_VIRTEX && EXPERIMENTAL
+	depends on (XILINX_VIRTEX || MICROBLAZE) && EXPERIMENTAL
 	select SPI_BITBANG
 	help
 	  This exposes the SPI controller IP from the Xilinx EDK.
diff --git a/drivers/ssb/embedded.c b/drivers/ssb/embedded.c
index 7dc3a6b..a0e0d24 100644
--- a/drivers/ssb/embedded.c
+++ b/drivers/ssb/embedded.c
@@ -29,6 +29,7 @@
 	}
 	return -ENODEV;
 }
+EXPORT_SYMBOL(ssb_watchdog_timer_set);
 
 u32 ssb_gpio_in(struct ssb_bus *bus, u32 mask)
 {
diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index c6c816b..5eee3f8 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -22,6 +22,7 @@
 	default y if PCMCIA && !M32R			# sl811_cs
 	default y if ARM				# SL-811
 	default y if SUPERH				# r8a66597-hcd
+	default y if MICROBLAZE
 	default PCI
 
 # many non-PCI SOC chips embed OHCI
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 7a1164d..ddeb691 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -16,7 +16,8 @@
  *	v0.9  - thorough cleaning, URBification, almost a rewrite
  *	v0.10 - some more cleanups
  *	v0.11 - fixed flow control, read error doesn't stop reads
- *	v0.12 - added TIOCM ioctls, added break handling, made struct acm kmalloced
+ *	v0.12 - added TIOCM ioctls, added break handling, made struct acm
+ *		kmalloced
  *	v0.13 - added termios, added hangup
  *	v0.14 - sized down struct acm
  *	v0.15 - fixed flow control again - characters could be lost
@@ -62,7 +63,7 @@
 #include <linux/tty_flip.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/usb.h>
 #include <linux/usb/cdc.h>
 #include <asm/byteorder.h>
@@ -87,7 +88,10 @@
 
 static DEFINE_MUTEX(open_mutex);
 
-#define ACM_READY(acm)	(acm && acm->dev && acm->used)
+#define ACM_READY(acm)	(acm && acm->dev && acm->port.count)
+
+static const struct tty_port_operations acm_port_ops = {
+};
 
 #ifdef VERBOSE_DEBUG
 #define verbose	1
@@ -99,13 +103,15 @@
  * Functions for ACM control messages.
  */
 
-static int acm_ctrl_msg(struct acm *acm, int request, int value, void *buf, int len)
+static int acm_ctrl_msg(struct acm *acm, int request, int value,
+							void *buf, int len)
 {
 	int retval = usb_control_msg(acm->dev, usb_sndctrlpipe(acm->dev, 0),
 		request, USB_RT_ACM, value,
 		acm->control->altsetting[0].desc.bInterfaceNumber,
 		buf, len, 5000);
-	dbg("acm_control_msg: rq: 0x%02x val: %#x len: %#x result: %d", request, value, len, retval);
+	dbg("acm_control_msg: rq: 0x%02x val: %#x len: %#x result: %d",
+						request, value, len, retval);
 	return retval < 0 ? retval : 0;
 }
 
@@ -150,9 +156,8 @@
 
 	n = ACM_NW;
 	spin_lock_irqsave(&acm->write_lock, flags);
-	for (i = 0; i < ACM_NW; i++) {
+	for (i = 0; i < ACM_NW; i++)
 		n -= acm->wb[i].use;
-	}
 	spin_unlock_irqrestore(&acm->write_lock, flags);
 	return n;
 }
@@ -183,7 +188,8 @@
 	wb->urb->transfer_buffer_length = wb->len;
 	wb->urb->dev = acm->dev;
 
-	if ((rc = usb_submit_urb(wb->urb, GFP_ATOMIC)) < 0) {
+	rc = usb_submit_urb(wb->urb, GFP_ATOMIC);
+	if (rc < 0) {
 		dbg("usb_submit_urb(write bulk) failed: %d", rc);
 		acm_write_done(acm, wb);
 	}
@@ -262,6 +268,7 @@
 {
 	struct acm *acm = urb->context;
 	struct usb_cdc_notification *dr = urb->transfer_buffer;
+	struct tty_struct *tty;
 	unsigned char *data;
 	int newctrl;
 	int retval;
@@ -287,40 +294,45 @@
 
 	data = (unsigned char *)(dr + 1);
 	switch (dr->bNotificationType) {
+	case USB_CDC_NOTIFY_NETWORK_CONNECTION:
+		dbg("%s network", dr->wValue ?
+					"connected to" : "disconnected from");
+		break;
 
-		case USB_CDC_NOTIFY_NETWORK_CONNECTION:
+	case USB_CDC_NOTIFY_SERIAL_STATE:
+		tty = tty_port_tty_get(&acm->port);
+		newctrl = get_unaligned_le16(data);
 
-			dbg("%s network", dr->wValue ? "connected to" : "disconnected from");
-			break;
-
-		case USB_CDC_NOTIFY_SERIAL_STATE:
-
-			newctrl = get_unaligned_le16(data);
-
-			if (acm->tty && !acm->clocal && (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) {
+		if (tty) {
+			if (!acm->clocal &&
+				(acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) {
 				dbg("calling hangup");
-				tty_hangup(acm->tty);
+				tty_hangup(tty);
 			}
+			tty_kref_put(tty);
+		}
 
-			acm->ctrlin = newctrl;
+		acm->ctrlin = newctrl;
 
-			dbg("input control lines: dcd%c dsr%c break%c ring%c framing%c parity%c overrun%c",
-				acm->ctrlin & ACM_CTRL_DCD ? '+' : '-',	acm->ctrlin & ACM_CTRL_DSR ? '+' : '-',
-				acm->ctrlin & ACM_CTRL_BRK ? '+' : '-',	acm->ctrlin & ACM_CTRL_RI  ? '+' : '-',
-				acm->ctrlin & ACM_CTRL_FRAMING ? '+' : '-',	acm->ctrlin & ACM_CTRL_PARITY ? '+' : '-',
-				acm->ctrlin & ACM_CTRL_OVERRUN ? '+' : '-');
-
+		dbg("input control lines: dcd%c dsr%c break%c ring%c framing%c parity%c overrun%c",
+			acm->ctrlin & ACM_CTRL_DCD ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_DSR ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_BRK ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_RI  ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_FRAMING ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_PARITY ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_OVERRUN ? '+' : '-');
 			break;
 
-		default:
-			dbg("unknown notification %d received: index %d len %d data0 %d data1 %d",
-				dr->bNotificationType, dr->wIndex,
-				dr->wLength, data[0], data[1]);
-			break;
+	default:
+		dbg("unknown notification %d received: index %d len %d data0 %d data1 %d",
+			dr->bNotificationType, dr->wIndex,
+			dr->wLength, data[0], data[1]);
+		break;
 	}
 exit:
 	usb_mark_last_busy(acm->dev);
-	retval = usb_submit_urb (urb, GFP_ATOMIC);
+	retval = usb_submit_urb(urb, GFP_ATOMIC);
 	if (retval)
 		dev_err(&urb->dev->dev, "%s - usb_submit_urb failed with "
 			"result %d", __func__, retval);
@@ -371,15 +383,14 @@
 {
 	struct acm *acm = (void *)_acm;
 	struct acm_rb *buf;
-	struct tty_struct *tty = acm->tty;
+	struct tty_struct *tty;
 	struct acm_ru *rcv;
 	unsigned long flags;
 	unsigned char throttled;
 
 	dbg("Entering acm_rx_tasklet");
 
-	if (!ACM_READY(acm))
-	{
+	if (!ACM_READY(acm)) {
 		dbg("acm_rx_tasklet: ACM not ready");
 		return;
 	}
@@ -387,12 +398,13 @@
 	spin_lock_irqsave(&acm->throttle_lock, flags);
 	throttled = acm->throttle;
 	spin_unlock_irqrestore(&acm->throttle_lock, flags);
-	if (throttled)
-	{
+	if (throttled) {
 		dbg("acm_rx_tasklet: throttled");
 		return;
 	}
 
+	tty = tty_port_tty_get(&acm->port);
+
 next_buffer:
 	spin_lock_irqsave(&acm->read_lock, flags);
 	if (list_empty(&acm->filled_read_bufs)) {
@@ -406,20 +418,22 @@
 
 	dbg("acm_rx_tasklet: procesing buf 0x%p, size = %d", buf, buf->size);
 
-	tty_buffer_request_room(tty, buf->size);
-	spin_lock_irqsave(&acm->throttle_lock, flags);
-	throttled = acm->throttle;
-	spin_unlock_irqrestore(&acm->throttle_lock, flags);
-	if (!throttled)
-		tty_insert_flip_string(tty, buf->base, buf->size);
-	tty_flip_buffer_push(tty);
-
-	if (throttled) {
-		dbg("Throttling noticed");
-		spin_lock_irqsave(&acm->read_lock, flags);
-		list_add(&buf->list, &acm->filled_read_bufs);
-		spin_unlock_irqrestore(&acm->read_lock, flags);
-		return;
+	if (tty) {
+		spin_lock_irqsave(&acm->throttle_lock, flags);
+		throttled = acm->throttle;
+		spin_unlock_irqrestore(&acm->throttle_lock, flags);
+		if (!throttled) {
+			tty_buffer_request_room(tty, buf->size);
+			tty_insert_flip_string(tty, buf->base, buf->size);
+			tty_flip_buffer_push(tty);
+		} else {
+			tty_kref_put(tty);
+			dbg("Throttling noticed");
+			spin_lock_irqsave(&acm->read_lock, flags);
+			list_add(&buf->list, &acm->filled_read_bufs);
+			spin_unlock_irqrestore(&acm->read_lock, flags);
+			return;
+		}
 	}
 
 	spin_lock_irqsave(&acm->read_lock, flags);
@@ -428,6 +442,8 @@
 	goto next_buffer;
 
 urbs:
+	tty_kref_put(tty);
+
 	while (!list_empty(&acm->spare_read_bufs)) {
 		spin_lock_irqsave(&acm->read_lock, flags);
 		if (list_empty(&acm->spare_read_urbs)) {
@@ -454,10 +470,11 @@
 		rcv->urb->transfer_dma = buf->dma;
 		rcv->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
 
-		/* This shouldn't kill the driver as unsuccessful URBs are returned to the
-		   free-urbs-pool and resubmited ASAP */
+		/* This shouldn't kill the driver as unsuccessful URBs are
+		   returned to the free-urbs-pool and resubmited ASAP */
 		spin_lock_irqsave(&acm->read_lock, flags);
-		if (acm->susp_count || usb_submit_urb(rcv->urb, GFP_ATOMIC) < 0) {
+		if (acm->susp_count ||
+				usb_submit_urb(rcv->urb, GFP_ATOMIC) < 0) {
 			list_add(&buf->list, &acm->spare_read_bufs);
 			list_add(&rcv->list, &acm->spare_read_urbs);
 			acm->processing = 0;
@@ -499,11 +516,14 @@
 static void acm_softint(struct work_struct *work)
 {
 	struct acm *acm = container_of(work, struct acm, work);
+	struct tty_struct *tty;
 
 	dev_vdbg(&acm->data->dev, "tx work\n");
 	if (!ACM_READY(acm))
 		return;
-	tty_wakeup(acm->tty);
+	tty = tty_port_tty_get(&acm->port);
+	tty_wakeup(tty);
+	tty_kref_put(tty);
 }
 
 static void acm_waker(struct work_struct *waker)
@@ -543,8 +563,9 @@
 		rv = 0;
 
 	set_bit(TTY_NO_WRITE_SPLIT, &tty->flags);
+
 	tty->driver_data = acm;
-	acm->tty = tty;
+	tty_port_tty_set(&acm->port, tty);
 
 	if (usb_autopm_get_interface(acm->control) < 0)
 		goto early_bail;
@@ -552,11 +573,10 @@
 		acm->control->needs_remote_wakeup = 1;
 
 	mutex_lock(&acm->mutex);
-	if (acm->used++) {
+	if (acm->port.count++) {
 		usb_autopm_put_interface(acm->control);
 		goto done;
-        }
-
+	}
 
 	acm->ctrlurb->dev = acm->dev;
 	if (usb_submit_urb(acm->ctrlurb, GFP_KERNEL)) {
@@ -567,22 +587,22 @@
 	if (0 > acm_set_control(acm, acm->ctrlout = ACM_CTRL_DTR | ACM_CTRL_RTS) &&
 	    (acm->ctrl_caps & USB_CDC_CAP_LINE))
 		goto full_bailout;
+
 	usb_autopm_put_interface(acm->control);
 
 	INIT_LIST_HEAD(&acm->spare_read_urbs);
 	INIT_LIST_HEAD(&acm->spare_read_bufs);
 	INIT_LIST_HEAD(&acm->filled_read_bufs);
-	for (i = 0; i < acm->rx_buflimit; i++) {
+
+	for (i = 0; i < acm->rx_buflimit; i++)
 		list_add(&(acm->ru[i].list), &acm->spare_read_urbs);
-	}
-	for (i = 0; i < acm->rx_buflimit; i++) {
+	for (i = 0; i < acm->rx_buflimit; i++)
 		list_add(&(acm->rb[i].list), &acm->spare_read_bufs);
-	}
 
 	acm->throttle = 0;
 
 	tasklet_schedule(&acm->urb_task);
-
+	rv = tty_port_block_til_ready(&acm->port, tty, filp);
 done:
 	mutex_unlock(&acm->mutex);
 err_out:
@@ -593,16 +613,17 @@
 	usb_kill_urb(acm->ctrlurb);
 bail_out:
 	usb_autopm_put_interface(acm->control);
-	acm->used--;
+	acm->port.count--;
 	mutex_unlock(&acm->mutex);
 early_bail:
 	mutex_unlock(&open_mutex);
+	tty_port_tty_set(&acm->port, NULL);
 	return -EIO;
 }
 
 static void acm_tty_unregister(struct acm *acm)
 {
-	int i,nr;
+	int i, nr;
 
 	nr = acm->rx_buflimit;
 	tty_unregister_device(acm_tty_driver, acm->minor);
@@ -619,41 +640,56 @@
 
 static int acm_tty_chars_in_buffer(struct tty_struct *tty);
 
-static void acm_tty_close(struct tty_struct *tty, struct file *filp)
+static void acm_port_down(struct acm *acm, int drain)
 {
-	struct acm *acm = tty->driver_data;
-	int i,nr;
-
-	if (!acm || !acm->used)
-		return;
-
-	nr = acm->rx_buflimit;
+	int i, nr = acm->rx_buflimit;
 	mutex_lock(&open_mutex);
-	if (!--acm->used) {
-		if (acm->dev) {
-			usb_autopm_get_interface(acm->control);
-			acm_set_control(acm, acm->ctrlout = 0);
-
-			/* try letting the last writes drain naturally */
+	if (acm->dev) {
+		usb_autopm_get_interface(acm->control);
+		acm_set_control(acm, acm->ctrlout = 0);
+		/* try letting the last writes drain naturally */
+		if (drain) {
 			wait_event_interruptible_timeout(acm->drain_wait,
-					(ACM_NW == acm_wb_is_avail(acm))
-						|| !acm->dev,
+				(ACM_NW == acm_wb_is_avail(acm)) || !acm->dev,
 					ACM_CLOSE_TIMEOUT * HZ);
-
-			usb_kill_urb(acm->ctrlurb);
-			for (i = 0; i < ACM_NW; i++)
-				usb_kill_urb(acm->wb[i].urb);
-			for (i = 0; i < nr; i++)
-				usb_kill_urb(acm->ru[i].urb);
-			acm->control->needs_remote_wakeup = 0;
-			usb_autopm_put_interface(acm->control);
-		} else
-			acm_tty_unregister(acm);
+		}
+		usb_kill_urb(acm->ctrlurb);
+		for (i = 0; i < ACM_NW; i++)
+			usb_kill_urb(acm->wb[i].urb);
+		for (i = 0; i < nr; i++)
+			usb_kill_urb(acm->ru[i].urb);
+		acm->control->needs_remote_wakeup = 0;
+		usb_autopm_put_interface(acm->control);
 	}
 	mutex_unlock(&open_mutex);
 }
 
-static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int count)
+static void acm_tty_hangup(struct tty_struct *tty)
+{
+	struct acm *acm = tty->driver_data;
+	tty_port_hangup(&acm->port);
+	acm_port_down(acm, 0);
+}
+
+static void acm_tty_close(struct tty_struct *tty, struct file *filp)
+{
+	struct acm *acm = tty->driver_data;
+
+	/* Perform the closing process and see if we need to do the hardware
+	   shutdown */
+	if (tty_port_close_start(&acm->port, tty, filp) == 0)
+		return;
+	acm_port_down(acm, 0);
+	tty_port_close_end(&acm->port, tty);
+	mutex_lock(&open_mutex);
+	tty_port_tty_set(&acm->port, NULL);
+	if (!acm->dev)
+		acm_tty_unregister(acm);
+	mutex_unlock(&open_mutex);
+}
+
+static int acm_tty_write(struct tty_struct *tty,
+					const unsigned char *buf, int count)
 {
 	struct acm *acm = tty->driver_data;
 	int stat;
@@ -669,7 +705,8 @@
 		return 0;
 
 	spin_lock_irqsave(&acm->write_lock, flags);
-	if ((wbn = acm_wb_alloc(acm)) < 0) {
+	wbn = acm_wb_alloc(acm);
+	if (wbn < 0) {
 		spin_unlock_irqrestore(&acm->write_lock, flags);
 		return 0;
 	}
@@ -681,7 +718,8 @@
 	wb->len = count;
 	spin_unlock_irqrestore(&acm->write_lock, flags);
 
-	if ((stat = acm_write_start(acm, wbn)) < 0)
+	stat = acm_write_start(acm, wbn);
+	if (stat < 0)
 		return stat;
 	return count;
 }
@@ -767,8 +805,10 @@
 		return -EINVAL;
 
 	newctrl = acm->ctrlout;
-	set = (set & TIOCM_DTR ? ACM_CTRL_DTR : 0) | (set & TIOCM_RTS ? ACM_CTRL_RTS : 0);
-	clear = (clear & TIOCM_DTR ? ACM_CTRL_DTR : 0) | (clear & TIOCM_RTS ? ACM_CTRL_RTS : 0);
+	set = (set & TIOCM_DTR ? ACM_CTRL_DTR : 0) |
+					(set & TIOCM_RTS ? ACM_CTRL_RTS : 0);
+	clear = (clear & TIOCM_DTR ? ACM_CTRL_DTR : 0) |
+					(clear & TIOCM_RTS ? ACM_CTRL_RTS : 0);
 
 	newctrl = (newctrl & ~clear) | set;
 
@@ -777,7 +817,8 @@
 	return acm_set_control(acm, acm->ctrlout = newctrl);
 }
 
-static int acm_tty_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg)
+static int acm_tty_ioctl(struct tty_struct *tty, struct file *file,
+					unsigned int cmd, unsigned long arg)
 {
 	struct acm *acm = tty->driver_data;
 
@@ -799,7 +840,8 @@
 	5, 6, 7, 8
 };
 
-static void acm_tty_set_termios(struct tty_struct *tty, struct ktermios *termios_old)
+static void acm_tty_set_termios(struct tty_struct *tty,
+						struct ktermios *termios_old)
 {
 	struct acm *acm = tty->driver_data;
 	struct ktermios *termios = tty->termios;
@@ -809,19 +851,23 @@
 	if (!ACM_READY(acm))
 		return;
 
+	/* FIXME: Needs to support the tty_baud interface */
+	/* FIXME: Broken on sparc */
 	newline.dwDTERate = cpu_to_le32p(acm_tty_speed +
 		(termios->c_cflag & CBAUD & ~CBAUDEX) + (termios->c_cflag & CBAUDEX ? 15 : 0));
 	newline.bCharFormat = termios->c_cflag & CSTOPB ? 2 : 0;
 	newline.bParityType = termios->c_cflag & PARENB ?
-		(termios->c_cflag & PARODD ? 1 : 2) + (termios->c_cflag & CMSPAR ? 2 : 0) : 0;
+				(termios->c_cflag & PARODD ? 1 : 2) +
+				(termios->c_cflag & CMSPAR ? 2 : 0) : 0;
 	newline.bDataBits = acm_tty_size[(termios->c_cflag & CSIZE) >> 4];
-
+	/* FIXME: Needs to clear unsupported bits in the termios */
 	acm->clocal = ((termios->c_cflag & CLOCAL) != 0);
 
 	if (!newline.dwDTERate) {
 		newline.dwDTERate = acm->line.dwDTERate;
 		newctrl &= ~ACM_CTRL_DTR;
-	} else  newctrl |=  ACM_CTRL_DTR;
+	} else
+		newctrl |=  ACM_CTRL_DTR;
 
 	if (newctrl != acm->ctrlout)
 		acm_set_control(acm, acm->ctrlout = newctrl);
@@ -846,9 +892,8 @@
 	struct acm_wb *wb;
 	struct usb_device *usb_dev = interface_to_usbdev(acm->control);
 
-	for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++) {
+	for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++)
 		usb_buffer_free(usb_dev, acm->writesize, wb->buf, wb->dmah);
-	}
 }
 
 static void acm_read_buffers_free(struct acm *acm)
@@ -857,7 +902,8 @@
 	int i, n = acm->rx_buflimit;
 
 	for (i = 0; i < n; i++)
-		usb_buffer_free(usb_dev, acm->readsize, acm->rb[i].base, acm->rb[i].dma);
+		usb_buffer_free(usb_dev, acm->readsize,
+					acm->rb[i].base, acm->rb[i].dma);
 }
 
 /* Little helper: write buffers allocate */
@@ -882,8 +928,8 @@
 	return 0;
 }
 
-static int acm_probe (struct usb_interface *intf,
-		      const struct usb_device_id *id)
+static int acm_probe(struct usb_interface *intf,
+		     const struct usb_device_id *id)
 {
 	struct usb_cdc_union_desc *union_header = NULL;
 	struct usb_cdc_country_functional_desc *cfd = NULL;
@@ -897,7 +943,7 @@
 	struct usb_device *usb_dev = interface_to_usbdev(intf);
 	struct acm *acm;
 	int minor;
-	int ctrlsize,readsize;
+	int ctrlsize, readsize;
 	u8 *buf;
 	u8 ac_management_function = 0;
 	u8 call_management_function = 0;
@@ -917,7 +963,7 @@
 		control_interface = usb_ifnum_to_if(usb_dev, 0);
 		goto skip_normal_probe;
 	}
-	
+
 	/* normal probing*/
 	if (!buffer) {
 		dev_err(&intf->dev, "Weird descriptor references\n");
@@ -925,8 +971,10 @@
 	}
 
 	if (!buflen) {
-		if (intf->cur_altsetting->endpoint->extralen && intf->cur_altsetting->endpoint->extra) {
-			dev_dbg(&intf->dev,"Seeking extra descriptors on endpoint\n");
+		if (intf->cur_altsetting->endpoint->extralen &&
+				intf->cur_altsetting->endpoint->extra) {
+			dev_dbg(&intf->dev,
+				"Seeking extra descriptors on endpoint\n");
 			buflen = intf->cur_altsetting->endpoint->extralen;
 			buffer = intf->cur_altsetting->endpoint->extra;
 		} else {
@@ -937,47 +985,43 @@
 	}
 
 	while (buflen > 0) {
-		if (buffer [1] != USB_DT_CS_INTERFACE) {
+		if (buffer[1] != USB_DT_CS_INTERFACE) {
 			dev_err(&intf->dev, "skipping garbage\n");
 			goto next_desc;
 		}
 
-		switch (buffer [2]) {
-			case USB_CDC_UNION_TYPE: /* we've found it */
-				if (union_header) {
-					dev_err(&intf->dev, "More than one "
-						"union descriptor, "
-						"skipping ...\n");
-					goto next_desc;
-				}
-				union_header = (struct usb_cdc_union_desc *)
-							buffer;
-				break;
-			case USB_CDC_COUNTRY_TYPE: /* export through sysfs*/
-				cfd = (struct usb_cdc_country_functional_desc *)buffer;
-				break;
-			case USB_CDC_HEADER_TYPE: /* maybe check version */ 
-				break; /* for now we ignore it */ 
-			case USB_CDC_ACM_TYPE:
-				ac_management_function = buffer[3];
-				break;
-			case USB_CDC_CALL_MANAGEMENT_TYPE:
-				call_management_function = buffer[3];
-				call_interface_num = buffer[4];
-				if ((call_management_function & 3) != 3)
-					dev_err(&intf->dev, "This device "
-						"cannot do calls on its own. "
-						"It is no modem.\n");
-				break;
-			default:
-				/* there are LOTS more CDC descriptors that
-				 * could legitimately be found here.
-				 */
-				dev_dbg(&intf->dev, "Ignoring descriptor: "
-						"type %02x, length %d\n",
-						buffer[2], buffer[0]);
-				break;
+		switch (buffer[2]) {
+		case USB_CDC_UNION_TYPE: /* we've found it */
+			if (union_header) {
+				dev_err(&intf->dev, "More than one "
+					"union descriptor, skipping ...\n");
+				goto next_desc;
 			}
+			union_header = (struct usb_cdc_union_desc *)buffer;
+			break;
+		case USB_CDC_COUNTRY_TYPE: /* export through sysfs*/
+			cfd = (struct usb_cdc_country_functional_desc *)buffer;
+			break;
+		case USB_CDC_HEADER_TYPE: /* maybe check version */
+			break; /* for now we ignore it */
+		case USB_CDC_ACM_TYPE:
+			ac_management_function = buffer[3];
+			break;
+		case USB_CDC_CALL_MANAGEMENT_TYPE:
+			call_management_function = buffer[3];
+			call_interface_num = buffer[4];
+			if ((call_management_function & 3) != 3)
+				dev_err(&intf->dev, "This device cannot do calls on its own. It is not a modem.\n");
+			break;
+		default:
+			/* there are LOTS more CDC descriptors that
+			 * could legitimately be found here.
+			 */
+			dev_dbg(&intf->dev, "Ignoring descriptor: "
+					"type %02x, length %d\n",
+					buffer[2], buffer[0]);
+			break;
+		}
 next_desc:
 		buflen -= buffer[0];
 		buffer += buffer[0];
@@ -985,33 +1029,36 @@
 
 	if (!union_header) {
 		if (call_interface_num > 0) {
-			dev_dbg(&intf->dev,"No union descriptor, using call management descriptor\n");
+			dev_dbg(&intf->dev, "No union descriptor, using call management descriptor\n");
 			data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = call_interface_num));
 			control_interface = intf;
 		} else {
-			dev_dbg(&intf->dev,"No union descriptor, giving up\n");
+			dev_dbg(&intf->dev,
+					"No union descriptor, giving up\n");
 			return -ENODEV;
 		}
 	} else {
 		control_interface = usb_ifnum_to_if(usb_dev, union_header->bMasterInterface0);
 		data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = union_header->bSlaveInterface0));
 		if (!control_interface || !data_interface) {
-			dev_dbg(&intf->dev,"no interfaces\n");
+			dev_dbg(&intf->dev, "no interfaces\n");
 			return -ENODEV;
 		}
 	}
-	
+
 	if (data_interface_num != call_interface_num)
-		dev_dbg(&intf->dev,"Separate call control interface. That is not fully supported.\n");
+		dev_dbg(&intf->dev, "Separate call control interface. That is not fully supported.\n");
 
 skip_normal_probe:
 
 	/*workaround for switched interfaces */
-	if (data_interface->cur_altsetting->desc.bInterfaceClass != CDC_DATA_INTERFACE_TYPE) {
-		if (control_interface->cur_altsetting->desc.bInterfaceClass == CDC_DATA_INTERFACE_TYPE) {
+	if (data_interface->cur_altsetting->desc.bInterfaceClass
+						!= CDC_DATA_INTERFACE_TYPE) {
+		if (control_interface->cur_altsetting->desc.bInterfaceClass
+						== CDC_DATA_INTERFACE_TYPE) {
 			struct usb_interface *t;
-			dev_dbg(&intf->dev,"Your device has switched interfaces.\n");
-
+			dev_dbg(&intf->dev,
+				"Your device has switched interfaces.\n");
 			t = control_interface;
 			control_interface = data_interface;
 			data_interface = t;
@@ -1023,9 +1070,9 @@
 	/* Accept probe requests only for the control interface */
 	if (intf != control_interface)
 		return -ENODEV;
-	
+
 	if (usb_interface_claimed(data_interface)) { /* valid in this context */
-		dev_dbg(&intf->dev,"The data interface isn't available\n");
+		dev_dbg(&intf->dev, "The data interface isn't available\n");
 		return -EBUSY;
 	}
 
@@ -1042,8 +1089,8 @@
 	if (!usb_endpoint_dir_in(epread)) {
 		/* descriptors are swapped */
 		struct usb_endpoint_descriptor *t;
-		dev_dbg(&intf->dev,"The data interface has switched endpoints\n");
-		
+		dev_dbg(&intf->dev,
+			"The data interface has switched endpoints\n");
 		t = epread;
 		epread = epwrite;
 		epwrite = t;
@@ -1056,13 +1103,15 @@
 		return -ENODEV;
 	}
 
-	if (!(acm = kzalloc(sizeof(struct acm), GFP_KERNEL))) {
+	acm = kzalloc(sizeof(struct acm), GFP_KERNEL);
+	if (acm == NULL) {
 		dev_dbg(&intf->dev, "out of memory (acm kzalloc)\n");
 		goto alloc_fail;
 	}
 
 	ctrlsize = le16_to_cpu(epctrl->wMaxPacketSize);
-	readsize = le16_to_cpu(epread->wMaxPacketSize)* ( quirks == SINGLE_RX_URB ? 1 : 2);
+	readsize = le16_to_cpu(epread->wMaxPacketSize) *
+				(quirks == SINGLE_RX_URB ? 1 : 2);
 	acm->writesize = le16_to_cpu(epwrite->wMaxPacketSize) * 20;
 	acm->control = control_interface;
 	acm->data = data_interface;
@@ -1082,6 +1131,8 @@
 	spin_lock_init(&acm->read_lock);
 	mutex_init(&acm->mutex);
 	acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress);
+	tty_port_init(&acm->port);
+	acm->port.ops = &acm_port_ops;
 
 	buf = usb_buffer_alloc(usb_dev, ctrlsize, GFP_KERNEL, &acm->ctrl_dma);
 	if (!buf) {
@@ -1103,8 +1154,10 @@
 	for (i = 0; i < num_rx_buf; i++) {
 		struct acm_ru *rcv = &(acm->ru[i]);
 
-		if (!(rcv->urb = usb_alloc_urb(0, GFP_KERNEL))) {
-			dev_dbg(&intf->dev, "out of memory (read urbs usb_alloc_urb)\n");
+		rcv->urb = usb_alloc_urb(0, GFP_KERNEL);
+		if (rcv->urb == NULL) {
+			dev_dbg(&intf->dev,
+				"out of memory (read urbs usb_alloc_urb)\n");
 			goto alloc_fail7;
 		}
 
@@ -1117,26 +1170,29 @@
 		rb->base = usb_buffer_alloc(acm->dev, readsize,
 				GFP_KERNEL, &rb->dma);
 		if (!rb->base) {
-			dev_dbg(&intf->dev, "out of memory (read bufs usb_buffer_alloc)\n");
+			dev_dbg(&intf->dev,
+				"out of memory (read bufs usb_buffer_alloc)\n");
 			goto alloc_fail7;
 		}
 	}
-	for(i = 0; i < ACM_NW; i++)
-	{
+	for (i = 0; i < ACM_NW; i++) {
 		struct acm_wb *snd = &(acm->wb[i]);
 
-		if (!(snd->urb = usb_alloc_urb(0, GFP_KERNEL))) {
-			dev_dbg(&intf->dev, "out of memory (write urbs usb_alloc_urb)");
+		snd->urb = usb_alloc_urb(0, GFP_KERNEL);
+		if (snd->urb == NULL) {
+			dev_dbg(&intf->dev,
+				"out of memory (write urbs usb_alloc_urb)");
 			goto alloc_fail7;
 		}
 
-		usb_fill_bulk_urb(snd->urb, usb_dev, usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
-				NULL, acm->writesize, acm_write_bulk, snd);
+		usb_fill_bulk_urb(snd->urb, usb_dev,
+			usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
+			NULL, acm->writesize, acm_write_bulk, snd);
 		snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
 		snd->instance = acm;
 	}
 
-	usb_set_intfdata (intf, acm);
+	usb_set_intfdata(intf, acm);
 
 	i = device_create_file(&intf->dev, &dev_attr_bmCapabilities);
 	if (i < 0)
@@ -1147,7 +1203,8 @@
 		if (!acm->country_codes)
 			goto skip_countries;
 		acm->country_code_size = cfd->bLength - 4;
-		memcpy(acm->country_codes, (u8 *)&cfd->wCountyCode0, cfd->bLength - 4);
+		memcpy(acm->country_codes, (u8 *)&cfd->wCountyCode0,
+							cfd->bLength - 4);
 		acm->country_rel_date = cfd->iCountryCodeRelDate;
 
 		i = device_create_file(&intf->dev, &dev_attr_wCountryCodes);
@@ -1156,7 +1213,8 @@
 			goto skip_countries;
 		}
 
-		i = device_create_file(&intf->dev, &dev_attr_iCountryCodeRelDate);
+		i = device_create_file(&intf->dev,
+						&dev_attr_iCountryCodeRelDate);
 		if (i < 0) {
 			kfree(acm->country_codes);
 			goto skip_countries;
@@ -1164,8 +1222,10 @@
 	}
 
 skip_countries:
-	usb_fill_int_urb(acm->ctrlurb, usb_dev, usb_rcvintpipe(usb_dev, epctrl->bEndpointAddress),
-			 acm->ctrl_buffer, ctrlsize, acm_ctrl_irq, acm, epctrl->bInterval);
+	usb_fill_int_urb(acm->ctrlurb, usb_dev,
+			usb_rcvintpipe(usb_dev, epctrl->bEndpointAddress),
+			acm->ctrl_buffer, ctrlsize, acm_ctrl_irq, acm,
+			epctrl->bInterval);
 	acm->ctrlurb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
 	acm->ctrlurb->transfer_dma = acm->ctrl_dma;
 
@@ -1212,7 +1272,7 @@
 	tasklet_disable(&acm->urb_task);
 
 	usb_kill_urb(acm->ctrlurb);
-	for(i = 0; i < ACM_NW; i++)
+	for (i = 0; i < ACM_NW; i++)
 		usb_kill_urb(acm->wb[i].urb);
 	for (i = 0; i < acm->rx_buflimit; i++)
 		usb_kill_urb(acm->ru[i].urb);
@@ -1227,13 +1287,14 @@
 {
 	struct acm *acm = usb_get_intfdata(intf);
 	struct usb_device *usb_dev = interface_to_usbdev(intf);
+	struct tty_struct *tty;
 
 	/* sibling interface is already cleaning up */
 	if (!acm)
 		return;
 
 	mutex_lock(&open_mutex);
-	if (acm->country_codes){
+	if (acm->country_codes) {
 		device_remove_file(&acm->control->dev,
 				&dev_attr_wCountryCodes);
 		device_remove_file(&acm->control->dev,
@@ -1247,22 +1308,25 @@
 	stop_data_traffic(acm);
 
 	acm_write_buffers_free(acm);
-	usb_buffer_free(usb_dev, acm->ctrlsize, acm->ctrl_buffer, acm->ctrl_dma);
+	usb_buffer_free(usb_dev, acm->ctrlsize, acm->ctrl_buffer,
+								acm->ctrl_dma);
 	acm_read_buffers_free(acm);
 
 	usb_driver_release_interface(&acm_driver, intf == acm->control ?
 					acm->data : acm->control);
 
-	if (!acm->used) {
+	if (acm->port.count == 0) {
 		acm_tty_unregister(acm);
 		mutex_unlock(&open_mutex);
 		return;
 	}
 
 	mutex_unlock(&open_mutex);
-
-	if (acm->tty)
-		tty_hangup(acm->tty);
+	tty = tty_port_tty_get(&acm->port);
+	if (tty) {
+		tty_hangup(tty);
+		tty_kref_put(tty);
+	}
 }
 
 #ifdef CONFIG_PM
@@ -1297,7 +1361,7 @@
 	*/
 	mutex_lock(&acm->mutex);
 
-	if (acm->used)
+	if (acm->port.count)
 		stop_data_traffic(acm);
 
 	mutex_unlock(&acm->mutex);
@@ -1319,7 +1383,7 @@
 		return 0;
 
 	mutex_lock(&acm->mutex);
-	if (acm->used) {
+	if (acm->port.count) {
 		rv = usb_submit_urb(acm->ctrlurb, GFP_NOIO);
 		if (rv < 0)
 			goto err_out;
@@ -1398,7 +1462,7 @@
 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
 		USB_CDC_ACM_PROTO_AT_GSM) },
 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
-		USB_CDC_ACM_PROTO_AT_3G	) },
+		USB_CDC_ACM_PROTO_AT_3G) },
 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
 		USB_CDC_ACM_PROTO_AT_CDMA) },
 
@@ -1406,7 +1470,7 @@
 	{ }
 };
 
-MODULE_DEVICE_TABLE (usb, acm_ids);
+MODULE_DEVICE_TABLE(usb, acm_ids);
 
 static struct usb_driver acm_driver = {
 	.name =		"cdc_acm",
@@ -1429,6 +1493,7 @@
 static const struct tty_operations acm_ops = {
 	.open =			acm_tty_open,
 	.close =		acm_tty_close,
+	.hangup =		acm_tty_hangup,
 	.write =		acm_tty_write,
 	.write_room =		acm_tty_write_room,
 	.ioctl =		acm_tty_ioctl,
@@ -1460,7 +1525,8 @@
 	acm_tty_driver->subtype = SERIAL_TYPE_NORMAL,
 	acm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
 	acm_tty_driver->init_termios = tty_std_termios;
-	acm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL;
+	acm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD |
+								HUPCL | CLOCAL;
 	tty_set_operations(acm_tty_driver, &acm_ops);
 
 	retval = tty_register_driver(acm_tty_driver);
@@ -1492,7 +1558,7 @@
 module_init(acm_init);
 module_exit(acm_exit);
 
-MODULE_AUTHOR( DRIVER_AUTHOR );
-MODULE_DESCRIPTION( DRIVER_DESC );
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_CHARDEV_MAJOR(ACM_TTY_MAJOR);
diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h
index 1f95e7a..4c38564 100644
--- a/drivers/usb/class/cdc-acm.h
+++ b/drivers/usb/class/cdc-acm.h
@@ -89,8 +89,8 @@
 	struct usb_device *dev;				/* the corresponding usb device */
 	struct usb_interface *control;			/* control interface */
 	struct usb_interface *data;			/* data interface */
-	struct tty_struct *tty;				/* the corresponding tty */
-	struct urb *ctrlurb;			/* urbs */
+	struct tty_port port;			 	/* our tty port data */
+	struct urb *ctrlurb;				/* urbs */
 	u8 *ctrl_buffer;				/* buffers of urbs */
 	dma_addr_t ctrl_dma;				/* dma handles of buffers */
 	u8 *country_codes;				/* country codes from device */
@@ -120,7 +120,6 @@
 	unsigned int ctrlout;				/* output control lines (DTR, RTS) */
 	unsigned int writesize;				/* max packet size for the output bulk endpoint */
 	unsigned int readsize,ctrlsize;			/* buffer sizes for freeing */
-	unsigned int used;				/* someone has this acm's device open */
 	unsigned int minor;				/* acm minor number */
 	unsigned char throttle;				/* throttled by tty layer */
 	unsigned char clocal;				/* termios CLOCAL */
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index dff5760..ffe75e8 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -39,6 +39,7 @@
 #include <linux/parser.h>
 #include <linux/notifier.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <asm/byteorder.h>
 #include "usb.h"
 #include "hcd.h"
@@ -265,9 +266,13 @@
 		return -EINVAL;
 	}
 
+	lock_kernel();
+
 	if (usbfs_mount && usbfs_mount->mnt_sb)
 		update_sb(usbfs_mount->mnt_sb);
 
+	unlock_kernel();
+
 	return 0;
 }
 
diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c
index b7eacad..2bfd6dd 100644
--- a/drivers/usb/serial/belkin_sa.c
+++ b/drivers/usb/serial/belkin_sa.c
@@ -93,8 +93,7 @@
 static void belkin_sa_shutdown(struct usb_serial *serial);
 static int  belkin_sa_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void belkin_sa_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void belkin_sa_close(struct usb_serial_port *port);
 static void belkin_sa_read_int_callback(struct urb *urb);
 static void belkin_sa_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios * old);
@@ -244,8 +243,7 @@
 } /* belkin_sa_open */
 
 
-static void belkin_sa_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void belkin_sa_close(struct usb_serial_port *port)
 {
 	dbg("%s port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index ab4cc27..2830766 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -262,32 +262,40 @@
 	return r;
 }
 
-static void ch341_close(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static int ch341_carrier_raised(struct usb_serial_port *port)
+{
+	struct ch341_private *priv = usb_get_serial_port_data(port);
+	if (priv->line_status & CH341_BIT_DCD)
+		return 1;
+	return 0;
+}
+
+static void ch341_dtr_rts(struct usb_serial_port *port, int on)
 {
 	struct ch341_private *priv = usb_get_serial_port_data(port);
 	unsigned long flags;
-	unsigned int c_cflag;
 
 	dbg("%s - port %d", __func__, port->number);
+	/* drop DTR and RTS */
+	spin_lock_irqsave(&priv->lock, flags);
+	if (on)
+		priv->line_control |= CH341_BIT_RTS | CH341_BIT_DTR;
+	else
+		priv->line_control &= ~(CH341_BIT_RTS | CH341_BIT_DTR);
+	spin_unlock_irqrestore(&priv->lock, flags);
+	ch341_set_handshake(port->serial->dev, priv->line_control);
+	wake_up_interruptible(&priv->delta_msr_wait);
+}
+
+static void ch341_close(struct usb_serial_port *port)
+{
+	dbg("%s - port %d", __func__, port->number);
 
 	/* shutdown our urbs */
 	dbg("%s - shutting down urbs", __func__);
 	usb_kill_urb(port->write_urb);
 	usb_kill_urb(port->read_urb);
 	usb_kill_urb(port->interrupt_in_urb);
-
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		if (c_cflag & HUPCL) {
-			/* drop DTR and RTS */
-			spin_lock_irqsave(&priv->lock, flags);
-			priv->line_control = 0;
-			spin_unlock_irqrestore(&priv->lock, flags);
-			ch341_set_handshake(port->serial->dev, 0);
-		}
-	}
-	wake_up_interruptible(&priv->delta_msr_wait);
 }
 
 
@@ -302,7 +310,6 @@
 	dbg("ch341_open()");
 
 	priv->baud_rate = DEFAULT_BAUD_RATE;
-	priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR;
 
 	r = ch341_configure(serial->dev, priv);
 	if (r)
@@ -322,7 +329,7 @@
 	if (r) {
 		dev_err(&port->dev, "%s - failed submitting interrupt urb,"
 			" error %d\n", __func__, r);
-		ch341_close(tty, port, NULL);
+		ch341_close(port);
 		return -EPROTO;
 	}
 
@@ -343,9 +350,6 @@
 
 	dbg("ch341_set_termios()");
 
-	if (!tty || !tty->termios)
-		return;
-
 	baud_rate = tty_get_baud_rate(tty);
 
 	priv->baud_rate = baud_rate;
@@ -568,6 +572,8 @@
 	.usb_driver        = &ch341_driver,
 	.num_ports         = 1,
 	.open              = ch341_open,
+	.dtr_rts	   = ch341_dtr_rts,
+	.carrier_raised	   = ch341_carrier_raised,
 	.close             = ch341_close,
 	.ioctl             = ch341_ioctl,
 	.set_termios       = ch341_set_termios,
diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c
index 19e2404..247b61b 100644
--- a/drivers/usb/serial/console.c
+++ b/drivers/usb/serial/console.c
@@ -169,7 +169,9 @@
 			kfree(tty);
 		}
 	}
-
+	/* So we know not to kill the hardware on a hangup on this
+	   port. We have also bumped the use count by one so it won't go
+	   idle */
 	port->console = 1;
 	retval = 0;
 
@@ -182,7 +184,7 @@
 	kfree(tty);
 reset_open_count:
 	port->port.count = 0;
-goto out;
+	goto out;
 }
 
 static void usb_console_write(struct console *co,
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index e8d5133..16a154d 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -1,5 +1,5 @@
 /*
- * Silicon Laboratories CP2101/CP2102 USB to RS232 serial adaptor driver
+ * Silicon Laboratories CP210x USB to RS232 serial adaptor driver
  *
  * Copyright (C) 2005 Craig Shelley (craig@microtron.org.uk)
  *
@@ -27,44 +27,46 @@
 /*
  * Version Information
  */
-#define DRIVER_VERSION "v0.08"
-#define DRIVER_DESC "Silicon Labs CP2101/CP2102 RS232 serial adaptor driver"
+#define DRIVER_VERSION "v0.09"
+#define DRIVER_DESC "Silicon Labs CP210x RS232 serial adaptor driver"
 
 /*
  * Function Prototypes
  */
-static int cp2101_open(struct tty_struct *, struct usb_serial_port *,
+static int cp210x_open(struct tty_struct *, struct usb_serial_port *,
 							struct file *);
-static void cp2101_cleanup(struct usb_serial_port *);
-static void cp2101_close(struct tty_struct *, struct usb_serial_port *,
-							struct file*);
-static void cp2101_get_termios(struct tty_struct *,
+static void cp210x_cleanup(struct usb_serial_port *);
+static void cp210x_close(struct usb_serial_port *);
+static void cp210x_get_termios(struct tty_struct *,
 	struct usb_serial_port *port);
-static void cp2101_get_termios_port(struct usb_serial_port *port,
+static void cp210x_get_termios_port(struct usb_serial_port *port,
 	unsigned int *cflagp, unsigned int *baudp);
-static void cp2101_set_termios(struct tty_struct *, struct usb_serial_port *,
+static void cp210x_set_termios(struct tty_struct *, struct usb_serial_port *,
 							struct ktermios*);
-static int cp2101_tiocmget(struct tty_struct *, struct file *);
-static int cp2101_tiocmset(struct tty_struct *, struct file *,
+static int cp210x_tiocmget(struct tty_struct *, struct file *);
+static int cp210x_tiocmset(struct tty_struct *, struct file *,
 		unsigned int, unsigned int);
-static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *,
+static int cp210x_tiocmset_port(struct usb_serial_port *port, struct file *,
 		unsigned int, unsigned int);
-static void cp2101_break_ctl(struct tty_struct *, int);
-static int cp2101_startup(struct usb_serial *);
-static void cp2101_shutdown(struct usb_serial *);
+static void cp210x_break_ctl(struct tty_struct *, int);
+static int cp210x_startup(struct usb_serial *);
+static void cp210x_shutdown(struct usb_serial *);
 
 static int debug;
 
 static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */
 	{ USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */
+	{ USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */
 	{ USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */
+	{ USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */
 	{ USB_DEVICE(0x0FCF, 0x1003) }, /* Dynastream ANT development board */
 	{ USB_DEVICE(0x0FCF, 0x1004) }, /* Dynastream ANT2USB */
 	{ USB_DEVICE(0x0FCF, 0x1006) }, /* Dynastream ANT development board */
 	{ USB_DEVICE(0x10A6, 0xAA26) }, /* Knock-off DCU-11 cable */
 	{ USB_DEVICE(0x10AB, 0x10C5) }, /* Siemens MC60 Cable */
 	{ USB_DEVICE(0x10B5, 0xAC70) }, /* Nokia CA-42 USB */
+	{ USB_DEVICE(0x10C4, 0x0F91) }, /* Vstabi */
 	{ USB_DEVICE(0x10C4, 0x800A) }, /* SPORTident BSM7-D-USB main station */
 	{ USB_DEVICE(0x10C4, 0x803B) }, /* Pololu USB-serial converter */
 	{ USB_DEVICE(0x10C4, 0x8053) }, /* Enfora EDG1228 */
@@ -85,10 +87,12 @@
 	{ USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */
 	{ USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */
 	{ USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */
+	{ USB_DEVICE(0x10C4, 0x81F2) }, /* C1007 HF band RFID controller */
 	{ USB_DEVICE(0x10C4, 0x8218) }, /* Lipowsky Industrie Elektronik GmbH, HARP-1 */
 	{ USB_DEVICE(0x10C4, 0x822B) }, /* Modem EDGE(GSM) Comander 2 */
 	{ USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demostration module */
 	{ USB_DEVICE(0x10c4, 0x8293) }, /* Telegesys ETRX2USB */
+	{ USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */
 	{ USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */
 	{ USB_DEVICE(0x10C4, 0x83A8) }, /* Amber Wireless AMB2560 */
 	{ USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */
@@ -99,7 +103,9 @@
 	{ USB_DEVICE(0x10C4, 0xF003) }, /* Elan Digital Systems USBpulse100 */
 	{ USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */
 	{ USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */
+	{ USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */
 	{ USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */
+	{ USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */
 	{ USB_DEVICE(0x166A, 0x0303) }, /* Clipsal 5500PCU C-Bus USB interface */
 	{ USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */
 	{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
@@ -108,53 +114,70 @@
 
 MODULE_DEVICE_TABLE(usb, id_table);
 
-static struct usb_driver cp2101_driver = {
-	.name		= "cp2101",
+static struct usb_driver cp210x_driver = {
+	.name		= "cp210x",
 	.probe		= usb_serial_probe,
 	.disconnect	= usb_serial_disconnect,
 	.id_table	= id_table,
 	.no_dynamic_id	= 	1,
 };
 
-static struct usb_serial_driver cp2101_device = {
+static struct usb_serial_driver cp210x_device = {
 	.driver = {
 		.owner =	THIS_MODULE,
-		.name = 	"cp2101",
+		.name = 	"cp210x",
 	},
-	.usb_driver		= &cp2101_driver,
+	.usb_driver		= &cp210x_driver,
 	.id_table		= id_table,
 	.num_ports		= 1,
-	.open			= cp2101_open,
-	.close			= cp2101_close,
-	.break_ctl		= cp2101_break_ctl,
-	.set_termios		= cp2101_set_termios,
-	.tiocmget 		= cp2101_tiocmget,
-	.tiocmset		= cp2101_tiocmset,
-	.attach			= cp2101_startup,
-	.shutdown		= cp2101_shutdown,
+	.open			= cp210x_open,
+	.close			= cp210x_close,
+	.break_ctl		= cp210x_break_ctl,
+	.set_termios		= cp210x_set_termios,
+	.tiocmget 		= cp210x_tiocmget,
+	.tiocmset		= cp210x_tiocmset,
+	.attach			= cp210x_startup,
+	.shutdown		= cp210x_shutdown,
 };
 
 /* Config request types */
 #define REQTYPE_HOST_TO_DEVICE	0x41
 #define REQTYPE_DEVICE_TO_HOST	0xc1
 
-/* Config SET requests. To GET, add 1 to the request number */
-#define CP2101_UART 		0x00	/* Enable / Disable */
-#define CP2101_BAUDRATE		0x01	/* (BAUD_RATE_GEN_FREQ / baudrate) */
-#define CP2101_BITS		0x03	/* 0x(0)(databits)(parity)(stopbits) */
-#define CP2101_BREAK		0x05	/* On / Off */
-#define CP2101_CONTROL		0x07	/* Flow control line states */
-#define CP2101_MODEMCTL		0x13	/* Modem controls */
-#define CP2101_CONFIG_6		0x19	/* 6 bytes of config data ??? */
+/* Config request codes */
+#define CP210X_IFC_ENABLE	0x00
+#define CP210X_SET_BAUDDIV	0x01
+#define CP210X_GET_BAUDDIV	0x02
+#define CP210X_SET_LINE_CTL	0x03
+#define CP210X_GET_LINE_CTL	0x04
+#define CP210X_SET_BREAK	0x05
+#define CP210X_IMM_CHAR		0x06
+#define CP210X_SET_MHS		0x07
+#define CP210X_GET_MDMSTS	0x08
+#define CP210X_SET_XON		0x09
+#define CP210X_SET_XOFF		0x0A
+#define CP210X_SET_EVENTMASK	0x0B
+#define CP210X_GET_EVENTMASK	0x0C
+#define CP210X_SET_CHAR		0x0D
+#define CP210X_GET_CHARS	0x0E
+#define CP210X_GET_PROPS	0x0F
+#define CP210X_GET_COMM_STATUS	0x10
+#define CP210X_RESET		0x11
+#define CP210X_PURGE		0x12
+#define CP210X_SET_FLOW		0x13
+#define CP210X_GET_FLOW		0x14
+#define CP210X_EMBED_EVENTS	0x15
+#define CP210X_GET_EVENTSTATE	0x16
+#define CP210X_SET_CHARS	0x19
 
-/* CP2101_UART */
+/* CP210X_IFC_ENABLE */
 #define UART_ENABLE		0x0001
 #define UART_DISABLE		0x0000
 
-/* CP2101_BAUDRATE */
+/* CP210X_(SET|GET)_BAUDDIV */
 #define BAUD_RATE_GEN_FREQ	0x384000
 
-/* CP2101_BITS */
+/* CP210X_(SET|GET)_LINE_CTL */
 #define BITS_DATA_MASK		0X0f00
 #define BITS_DATA_5		0X0500
 #define BITS_DATA_6		0X0600
@@ -174,11 +197,11 @@
 #define BITS_STOP_1_5		0x0001
 #define BITS_STOP_2		0x0002
 
-/* CP2101_BREAK */
+/* CP210X_SET_BREAK */
 #define BREAK_ON		0x0000
 #define BREAK_OFF		0x0001
 
-/* CP2101_CONTROL */
+/* CP210X_(SET_MHS|GET_MDMSTS) */
 #define CONTROL_DTR		0x0001
 #define CONTROL_RTS		0x0002
 #define CONTROL_CTS		0x0010
@@ -189,13 +212,13 @@
 #define CONTROL_WRITE_RTS	0x0200
 
 /*
- * cp2101_get_config
- * Reads from the CP2101 configuration registers
+ * cp210x_get_config
+ * Reads from the CP210x configuration registers
  * 'size' is specified in bytes.
  * 'data' is a pointer to a pre-allocated array of integers large
  * enough to hold 'size' bytes (with 4 bytes to each integer)
  */
-static int cp2101_get_config(struct usb_serial_port *port, u8 request,
+static int cp210x_get_config(struct usb_serial_port *port, u8 request,
 		unsigned int *data, int size)
 {
 	struct usb_serial *serial = port->serial;
@@ -211,9 +234,6 @@
 		return -ENOMEM;
 	}
 
-	/* For get requests, the request number must be incremented */
-	request++;
-
 	/* Issue the request, attempting to read 'size' bytes */
 	result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0),
 				request, REQTYPE_DEVICE_TO_HOST, 0x0000,
@@ -236,12 +256,12 @@
 }
 
 /*
- * cp2101_set_config
- * Writes to the CP2101 configuration registers
+ * cp210x_set_config
+ * Writes to the CP210x configuration registers
  * Values less than 16 bits wide are sent directly
  * 'size' is specified in bytes.
  */
-static int cp2101_set_config(struct usb_serial_port *port, u8 request,
+static int cp210x_set_config(struct usb_serial_port *port, u8 request,
 		unsigned int *data, int size)
 {
 	struct usb_serial *serial = port->serial;
@@ -292,21 +312,21 @@
 }
 
 /*
- * cp2101_set_config_single
- * Convenience function for calling cp2101_set_config on single data values
+ * cp210x_set_config_single
+ * Convenience function for calling cp210x_set_config on single data values
  * without requiring an integer pointer
  */
-static inline int cp2101_set_config_single(struct usb_serial_port *port,
+static inline int cp210x_set_config_single(struct usb_serial_port *port,
 		u8 request, unsigned int data)
 {
-	return cp2101_set_config(port, request, &data, 2);
+	return cp210x_set_config(port, request, &data, 2);
 }
 
 /*
- * cp2101_quantise_baudrate
+ * cp210x_quantise_baudrate
  * Quantises the baud rate as per AN205 Table 1
  */
-static unsigned int cp2101_quantise_baudrate(unsigned int baud) {
+static unsigned int cp210x_quantise_baudrate(unsigned int baud) {
 	if      (baud <= 56)       baud = 0;
 	else if (baud <= 300)      baud = 300;
 	else if (baud <= 600)      baud = 600;
@@ -343,7 +363,7 @@
 	return baud;
 }
 
-static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port,
+static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *port,
 				struct file *filp)
 {
 	struct usb_serial *serial = port->serial;
@@ -351,7 +371,7 @@
 
 	dbg("%s - port %d", __func__, port->number);
 
-	if (cp2101_set_config_single(port, CP2101_UART, UART_ENABLE)) {
+	if (cp210x_set_config_single(port, CP210X_IFC_ENABLE, UART_ENABLE)) {
 		dev_err(&port->dev, "%s - Unable to enable UART\n",
 				__func__);
 		return -EPROTO;
@@ -373,17 +393,17 @@
 	}
 
 	/* Configure the termios structure */
-	cp2101_get_termios(tty, port);
+	cp210x_get_termios(tty, port);
 
 	/* Set the DTR and RTS pins low */
-	cp2101_tiocmset_port(tty ? (struct usb_serial_port *) tty->driver_data
+	cp210x_tiocmset_port(tty ? (struct usb_serial_port *) tty->driver_data
 			: port,
 		NULL, TIOCM_DTR | TIOCM_RTS, 0);
 
 	return 0;
 }
 
-static void cp2101_cleanup(struct usb_serial_port *port)
+static void cp210x_cleanup(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 
@@ -398,8 +418,7 @@
 	}
 }
 
-static void cp2101_close(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp)
+static void cp210x_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
@@ -410,23 +429,23 @@
 
 	mutex_lock(&port->serial->disc_mutex);
 	if (!port->serial->disconnected)
-		cp2101_set_config_single(port, CP2101_UART, UART_DISABLE);
+		cp210x_set_config_single(port, CP210X_IFC_ENABLE, UART_DISABLE);
 	mutex_unlock(&port->serial->disc_mutex);
 }
 
 /*
- * cp2101_get_termios
+ * cp210x_get_termios
  * Reads the baud rate, data bits, parity, stop bits and flow control mode
  * from the device, corrects any unsupported values, and configures the
  * termios structure to reflect the state of the device
  */
-static void cp2101_get_termios(struct tty_struct *tty,
+static void cp210x_get_termios(struct tty_struct *tty,
 	struct usb_serial_port *port)
 {
 	unsigned int baud;
 
 	if (tty) {
-		cp2101_get_termios_port(tty->driver_data,
+		cp210x_get_termios_port(tty->driver_data,
 			&tty->termios->c_cflag, &baud);
 		tty_encode_baud_rate(tty, baud, baud);
 	}
@@ -434,15 +453,15 @@
 	else {
 		unsigned int cflag;
 		cflag = 0;
-		cp2101_get_termios_port(port, &cflag, &baud);
+		cp210x_get_termios_port(port, &cflag, &baud);
 	}
 }
 
 /*
- * cp2101_get_termios_port
- * This is the heart of cp2101_get_termios which always uses a &usb_serial_port.
+ * cp210x_get_termios_port
+ * This is the heart of cp210x_get_termios which always uses a &usb_serial_port.
  */
-static void cp2101_get_termios_port(struct usb_serial_port *port,
+static void cp210x_get_termios_port(struct usb_serial_port *port,
 	unsigned int *cflagp, unsigned int *baudp)
 {
 	unsigned int cflag, modem_ctl[4];
@@ -451,17 +470,17 @@
 
 	dbg("%s - port %d", __func__, port->number);
 
-	cp2101_get_config(port, CP2101_BAUDRATE, &baud, 2);
+	cp210x_get_config(port, CP210X_GET_BAUDDIV, &baud, 2);
 	/* Convert to baudrate */
 	if (baud)
-		baud = cp2101_quantise_baudrate((BAUD_RATE_GEN_FREQ + baud/2)/ baud);
+		baud = cp210x_quantise_baudrate((BAUD_RATE_GEN_FREQ + baud/2)/ baud);
 
 	dbg("%s - baud rate = %d", __func__, baud);
 	*baudp = baud;
 
 	cflag = *cflagp;
 
-	cp2101_get_config(port, CP2101_BITS, &bits, 2);
+	cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
 	cflag &= ~CSIZE;
 	switch (bits & BITS_DATA_MASK) {
 	case BITS_DATA_5:
@@ -486,14 +505,14 @@
 		cflag |= CS8;
 		bits &= ~BITS_DATA_MASK;
 		bits |= BITS_DATA_8;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	default:
 		dbg("%s - Unknown number of data bits, using 8", __func__);
 		cflag |= CS8;
 		bits &= ~BITS_DATA_MASK;
 		bits |= BITS_DATA_8;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	}
 
@@ -516,20 +535,20 @@
 				__func__);
 		cflag &= ~PARENB;
 		bits &= ~BITS_PARITY_MASK;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	case BITS_PARITY_SPACE:
 		dbg("%s - parity = SPACE (not supported, disabling parity)",
 				__func__);
 		cflag &= ~PARENB;
 		bits &= ~BITS_PARITY_MASK;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	default:
 		dbg("%s - Unknown parity mode, disabling parity", __func__);
 		cflag &= ~PARENB;
 		bits &= ~BITS_PARITY_MASK;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	}
 
@@ -542,7 +561,7 @@
 		dbg("%s - stop bits = 1.5 (not supported, using 1 stop bit)",
 								__func__);
 		bits &= ~BITS_STOP_MASK;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	case BITS_STOP_2:
 		dbg("%s - stop bits = 2", __func__);
@@ -552,11 +571,11 @@
 		dbg("%s - Unknown number of stop bits, using 1 stop bit",
 								__func__);
 		bits &= ~BITS_STOP_MASK;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	}
 
-	cp2101_get_config(port, CP2101_MODEMCTL, modem_ctl, 16);
+	cp210x_get_config(port, CP210X_GET_FLOW, modem_ctl, 16);
 	if (modem_ctl[0] & 0x0008) {
 		dbg("%s - flow control = CRTSCTS", __func__);
 		cflag |= CRTSCTS;
@@ -568,7 +587,7 @@
 	*cflagp = cflag;
 }
 
-static void cp2101_set_termios(struct tty_struct *tty,
+static void cp210x_set_termios(struct tty_struct *tty,
 		struct usb_serial_port *port, struct ktermios *old_termios)
 {
 	unsigned int cflag, old_cflag;
@@ -583,13 +602,13 @@
 	tty->termios->c_cflag &= ~CMSPAR;
 	cflag = tty->termios->c_cflag;
 	old_cflag = old_termios->c_cflag;
-	baud = cp2101_quantise_baudrate(tty_get_baud_rate(tty));
+	baud = cp210x_quantise_baudrate(tty_get_baud_rate(tty));
 
 	/* If the baud rate is to be updated*/
 	if (baud != tty_termios_baud_rate(old_termios) && baud != 0) {
 		dbg("%s - Setting baud rate to %d baud", __func__,
 				baud);
-		if (cp2101_set_config_single(port, CP2101_BAUDRATE,
+		if (cp210x_set_config_single(port, CP210X_SET_BAUDDIV,
 					((BAUD_RATE_GEN_FREQ + baud/2) / baud))) {
 			dbg("Baud rate requested not supported by device\n");
 			baud = tty_termios_baud_rate(old_termios);
@@ -600,7 +619,7 @@
 
 	/* If the number of data bits is to be updated */
 	if ((cflag & CSIZE) != (old_cflag & CSIZE)) {
-		cp2101_get_config(port, CP2101_BITS, &bits, 2);
+		cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
 		bits &= ~BITS_DATA_MASK;
 		switch (cflag & CSIZE) {
 		case CS5:
@@ -624,19 +643,19 @@
 			dbg("%s - data bits = 9", __func__);
 			break;*/
 		default:
-			dbg("cp2101 driver does not "
+			dbg("cp210x driver does not "
 					"support the number of bits requested,"
 					" using 8 bit mode\n");
 				bits |= BITS_DATA_8;
 				break;
 		}
-		if (cp2101_set_config(port, CP2101_BITS, &bits, 2))
+		if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
 			dbg("Number of data bits requested "
 					"not supported by device\n");
 	}
 
 	if ((cflag & (PARENB|PARODD)) != (old_cflag & (PARENB|PARODD))) {
-		cp2101_get_config(port, CP2101_BITS, &bits, 2);
+		cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
 		bits &= ~BITS_PARITY_MASK;
 		if (cflag & PARENB) {
 			if (cflag & PARODD) {
@@ -647,13 +666,13 @@
 				dbg("%s - parity = EVEN", __func__);
 			}
 		}
-		if (cp2101_set_config(port, CP2101_BITS, &bits, 2))
+		if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
 			dbg("Parity mode not supported "
 					"by device\n");
 	}
 
 	if ((cflag & CSTOPB) != (old_cflag & CSTOPB)) {
-		cp2101_get_config(port, CP2101_BITS, &bits, 2);
+		cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
 		bits &= ~BITS_STOP_MASK;
 		if (cflag & CSTOPB) {
 			bits |= BITS_STOP_2;
@@ -662,13 +681,13 @@
 			bits |= BITS_STOP_1;
 			dbg("%s - stop bits = 1", __func__);
 		}
-		if (cp2101_set_config(port, CP2101_BITS, &bits, 2))
+		if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
 			dbg("Number of stop bits requested "
 					"not supported by device\n");
 	}
 
 	if ((cflag & CRTSCTS) != (old_cflag & CRTSCTS)) {
-		cp2101_get_config(port, CP2101_MODEMCTL, modem_ctl, 16);
+		cp210x_get_config(port, CP210X_GET_FLOW, modem_ctl, 16);
 		dbg("%s - read modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x",
 				__func__, modem_ctl[0], modem_ctl[1],
 				modem_ctl[2], modem_ctl[3]);
@@ -688,19 +707,19 @@
 		dbg("%s - write modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x",
 				__func__, modem_ctl[0], modem_ctl[1],
 				modem_ctl[2], modem_ctl[3]);
-		cp2101_set_config(port, CP2101_MODEMCTL, modem_ctl, 16);
+		cp210x_set_config(port, CP210X_SET_FLOW, modem_ctl, 16);
 	}
 
 }
 
-static int cp2101_tiocmset (struct tty_struct *tty, struct file *file,
+static int cp210x_tiocmset (struct tty_struct *tty, struct file *file,
 		unsigned int set, unsigned int clear)
 {
 	struct usb_serial_port *port = tty->driver_data;
-	return cp2101_tiocmset_port(port, file, set, clear);
+	return cp210x_tiocmset_port(port, file, set, clear);
 }
 
-static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *file,
+static int cp210x_tiocmset_port(struct usb_serial_port *port, struct file *file,
 		unsigned int set, unsigned int clear)
 {
 	unsigned int control = 0;
@@ -726,10 +745,10 @@
 
 	dbg("%s - control = 0x%.4x", __func__, control);
 
-	return cp2101_set_config(port, CP2101_CONTROL, &control, 2);
+	return cp210x_set_config(port, CP210X_SET_MHS, &control, 2);
 }
 
-static int cp2101_tiocmget (struct tty_struct *tty, struct file *file)
+static int cp210x_tiocmget (struct tty_struct *tty, struct file *file)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	unsigned int control;
@@ -737,7 +756,7 @@
 
 	dbg("%s - port %d", __func__, port->number);
 
-	cp2101_get_config(port, CP2101_CONTROL, &control, 1);
+	cp210x_get_config(port, CP210X_GET_MDMSTS, &control, 1);
 
 	result = ((control & CONTROL_DTR) ? TIOCM_DTR : 0)
 		|((control & CONTROL_RTS) ? TIOCM_RTS : 0)
@@ -751,7 +770,7 @@
 	return result;
 }
 
-static void cp2101_break_ctl (struct tty_struct *tty, int break_state)
+static void cp210x_break_ctl (struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	unsigned int state;
@@ -763,17 +782,17 @@
 		state = BREAK_ON;
 	dbg("%s - turning break %s", __func__,
 			state == BREAK_OFF ? "off" : "on");
-	cp2101_set_config(port, CP2101_BREAK, &state, 2);
+	cp210x_set_config(port, CP210X_SET_BREAK, &state, 2);
 }
 
-static int cp2101_startup(struct usb_serial *serial)
+static int cp210x_startup(struct usb_serial *serial)
 {
-	/* CP2101 buffers behave strangely unless device is reset */
+	/* cp210x buffers behave strangely unless device is reset */
 	usb_reset_device(serial->dev);
 	return 0;
 }
 
-static void cp2101_shutdown(struct usb_serial *serial)
+static void cp210x_shutdown(struct usb_serial *serial)
 {
 	int i;
 
@@ -781,21 +800,21 @@
 
 	/* Stop reads and writes on all ports */
 	for (i = 0; i < serial->num_ports; ++i)
-		cp2101_cleanup(serial->port[i]);
+		cp210x_cleanup(serial->port[i]);
 }
 
-static int __init cp2101_init(void)
+static int __init cp210x_init(void)
 {
 	int retval;
 
-	retval = usb_serial_register(&cp2101_device);
+	retval = usb_serial_register(&cp210x_device);
 	if (retval)
 		return retval; /* Failed to register */
 
-	retval = usb_register(&cp2101_driver);
+	retval = usb_register(&cp210x_driver);
 	if (retval) {
 		/* Failed to register */
-		usb_serial_deregister(&cp2101_device);
+		usb_serial_deregister(&cp210x_device);
 		return retval;
 	}
 
@@ -805,14 +824,14 @@
 	return 0;
 }
 
-static void __exit cp2101_exit(void)
+static void __exit cp210x_exit(void)
 {
-	usb_deregister(&cp2101_driver);
-	usb_serial_deregister(&cp2101_device);
+	usb_deregister(&cp210x_driver);
+	usb_serial_deregister(&cp210x_device);
 }
 
-module_init(cp2101_init);
-module_exit(cp2101_exit);
+module_init(cp210x_init);
+module_exit(cp210x_exit);
 
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c
index dd501bb..933ba91 100644
--- a/drivers/usb/serial/cyberjack.c
+++ b/drivers/usb/serial/cyberjack.c
@@ -61,8 +61,7 @@
 static void cyberjack_shutdown(struct usb_serial *serial);
 static int  cyberjack_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void cyberjack_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void cyberjack_close(struct usb_serial_port *port);
 static int cyberjack_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
 static int cyberjack_write_room(struct tty_struct *tty);
@@ -185,8 +184,7 @@
 	return result;
 }
 
-static void cyberjack_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void cyberjack_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index e568710..669f938 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -174,8 +174,8 @@
 static void cypress_shutdown(struct usb_serial *serial);
 static int  cypress_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void cypress_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void cypress_close(struct usb_serial_port *port);
+static void cypress_dtr_rts(struct usb_serial_port *port, int on);
 static int  cypress_write(struct tty_struct *tty, struct usb_serial_port *port,
 			const unsigned char *buf, int count);
 static void cypress_send(struct usb_serial_port *port);
@@ -218,6 +218,7 @@
 	.shutdown =			cypress_shutdown,
 	.open =				cypress_open,
 	.close =			cypress_close,
+	.dtr_rts =			cypress_dtr_rts,
 	.write =			cypress_write,
 	.write_room =			cypress_write_room,
 	.ioctl =			cypress_ioctl,
@@ -244,6 +245,7 @@
 	.shutdown =			cypress_shutdown,
 	.open =				cypress_open,
 	.close =			cypress_close,
+	.dtr_rts =			cypress_dtr_rts,
 	.write =			cypress_write,
 	.write_room =			cypress_write_room,
 	.ioctl =			cypress_ioctl,
@@ -270,6 +272,7 @@
 	.shutdown =			cypress_shutdown,
 	.open =				cypress_open,
 	.close =			cypress_close,
+	.dtr_rts =			cypress_dtr_rts,
 	.write =			cypress_write,
 	.write_room =			cypress_write_room,
 	.ioctl =			cypress_ioctl,
@@ -656,11 +659,7 @@
 	priv->rx_flags = 0;
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* raise both lines and set termios */
-	spin_lock_irqsave(&priv->lock, flags);
-	priv->line_control = CONTROL_DTR | CONTROL_RTS;
-	priv->cmd_ctrl = 1;
-	spin_unlock_irqrestore(&priv->lock, flags);
+	/* Set termios */
 	result = cypress_write(tty, port, NULL, 0);
 
 	if (result) {
@@ -694,76 +693,42 @@
 							__func__, result);
 		cypress_set_dead(port);
 	}
-
+	port->port.drain_delay = 256;
 	return result;
 } /* cypress_open */
 
-
-static void cypress_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void cypress_dtr_rts(struct usb_serial_port *port, int on)
 {
 	struct cypress_private *priv = usb_get_serial_port_data(port);
-	unsigned int c_cflag;
-	int bps;
-	long timeout;
-	wait_queue_t wait;
+	/* drop dtr and rts */
+	priv = usb_get_serial_port_data(port);
+	spin_lock_irq(&priv->lock);
+	if (on == 0)
+		priv->line_control = 0;
+	else 
+		priv->line_control = CONTROL_DTR | CONTROL_RTS;
+	priv->cmd_ctrl = 1;
+	spin_unlock_irq(&priv->lock);
+	cypress_write(NULL, port, NULL, 0);
+}
+
+static void cypress_close(struct usb_serial_port *port)
+{
+	struct cypress_private *priv = usb_get_serial_port_data(port);
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* wait for data to drain from buffer */
-	spin_lock_irq(&priv->lock);
-	timeout = CYPRESS_CLOSING_WAIT;
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&tty->write_wait, &wait);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (cypress_buf_data_avail(priv->buf) == 0
-		|| timeout == 0 || signal_pending(current)
-		/* without mutex, allowed due to harmless failure mode */
-		|| port->serial->disconnected)
-			break;
-		spin_unlock_irq(&priv->lock);
-		timeout = schedule_timeout(timeout);
-		spin_lock_irq(&priv->lock);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&tty->write_wait, &wait);
-	/* clear out any remaining data in the buffer */
-	cypress_buf_clear(priv->buf);
-	spin_unlock_irq(&priv->lock);
-
 	/* writing is potentially harmful, lock must be taken */
 	mutex_lock(&port->serial->disc_mutex);
 	if (port->serial->disconnected) {
 		mutex_unlock(&port->serial->disc_mutex);
 		return;
 	}
-	/* wait for characters to drain from device */
-	if (tty) {
-		bps = tty_get_baud_rate(tty);
-		if (bps > 1200)
-			timeout = max((HZ * 2560) / bps, HZ / 10);
-		else
-			timeout = 2 * HZ;
-		schedule_timeout_interruptible(timeout);
-	}
-
+	cypress_buf_clear(priv->buf);
 	dbg("%s - stopping urbs", __func__);
 	usb_kill_urb(port->interrupt_in_urb);
 	usb_kill_urb(port->interrupt_out_urb);
 
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		if (c_cflag & HUPCL) {
-			/* drop dtr and rts */
-			priv = usb_get_serial_port_data(port);
-			spin_lock_irq(&priv->lock);
-			priv->line_control = 0;
-			priv->cmd_ctrl = 1;
-			spin_unlock_irq(&priv->lock);
-			cypress_write(tty, port, NULL, 0);
-		}
-	}
 
 	if (stats)
 		dev_info(&port->dev, "Statistics: %d Bytes In | %d Bytes Out | %d Commands Issued\n",
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index 38ba4ea..30f5140 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -422,7 +422,6 @@
 	int dp_throttled;
 	int dp_throttle_restart;
 	wait_queue_head_t dp_flush_wait;
-	int dp_in_close;			/* close in progress */
 	wait_queue_head_t dp_close_wait;	/* wait queue for close */
 	struct work_struct dp_wakeup_work;
 	struct usb_serial_port *dp_port;
@@ -456,8 +455,9 @@
 static int digi_chars_in_buffer(struct tty_struct *tty);
 static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
 	struct file *filp);
-static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
-	struct file *filp);
+static void digi_close(struct usb_serial_port *port);
+static int digi_carrier_raised(struct usb_serial_port *port);
+static void digi_dtr_rts(struct usb_serial_port *port, int on);
 static int digi_startup_device(struct usb_serial *serial);
 static int digi_startup(struct usb_serial *serial);
 static void digi_shutdown(struct usb_serial *serial);
@@ -510,6 +510,8 @@
 	.num_ports =			3,
 	.open =				digi_open,
 	.close =			digi_close,
+	.dtr_rts =			digi_dtr_rts,
+	.carrier_raised =		digi_carrier_raised,
 	.write =			digi_write,
 	.write_room =			digi_write_room,
 	.write_bulk_callback = 		digi_write_bulk_callback,
@@ -1328,6 +1330,19 @@
 
 }
 
+static void digi_dtr_rts(struct usb_serial_port *port, int on)
+{
+	/* Adjust DTR and RTS */
+	digi_set_modem_signals(port, on * (TIOCM_DTR|TIOCM_RTS), 1);
+}
+
+static int digi_carrier_raised(struct usb_serial_port *port)
+{
+	struct digi_port *priv = usb_get_serial_port_data(port);
+	if (priv->dp_modem_signals & TIOCM_CD)
+		return 1;
+	return 0;
+}
 
 static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
 				struct file *filp)
@@ -1336,7 +1351,6 @@
 	unsigned char buf[32];
 	struct digi_port *priv = usb_get_serial_port_data(port);
 	struct ktermios not_termios;
-	unsigned long flags = 0;
 
 	dbg("digi_open: TOP: port=%d, open_count=%d",
 		priv->dp_port_num, port->port.count);
@@ -1345,26 +1359,6 @@
 	if (digi_startup_device(port->serial) != 0)
 		return -ENXIO;
 
-	spin_lock_irqsave(&priv->dp_port_lock, flags);
-
-	/* don't wait on a close in progress for non-blocking opens */
-	if (priv->dp_in_close && (filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0) {
-		spin_unlock_irqrestore(&priv->dp_port_lock, flags);
-		return -EAGAIN;
-	}
-
-	/* wait for a close in progress to finish */
-	while (priv->dp_in_close) {
-		cond_wait_interruptible_timeout_irqrestore(
-			&priv->dp_close_wait, DIGI_RETRY_TIMEOUT,
-			&priv->dp_port_lock, flags);
-		if (signal_pending(current))
-			return -EINTR;
-		spin_lock_irqsave(&priv->dp_port_lock, flags);
-	}
-
-	spin_unlock_irqrestore(&priv->dp_port_lock, flags);
-
 	/* read modem signals automatically whenever they change */
 	buf[0] = DIGI_CMD_READ_INPUT_SIGNALS;
 	buf[1] = priv->dp_port_num;
@@ -1387,16 +1381,11 @@
 		not_termios.c_iflag = ~tty->termios->c_iflag;
 		digi_set_termios(tty, port, &not_termios);
 	}
-
-	/* set DTR and RTS */
-	digi_set_modem_signals(port, TIOCM_DTR|TIOCM_RTS, 1);
-
 	return 0;
 }
 
 
-static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static void digi_close(struct usb_serial_port *port)
 {
 	DEFINE_WAIT(wait);
 	int ret;
@@ -1411,28 +1400,9 @@
 	if (port->serial->disconnected)
 		goto exit;
 
-	/* do cleanup only after final close on this port */
-	spin_lock_irq(&priv->dp_port_lock);
-	priv->dp_in_close = 1;
-	spin_unlock_irq(&priv->dp_port_lock);
-
-	/* tell line discipline to process only XON/XOFF */
-	tty->closing = 1;
-
-	/* wait for output to drain */
-	if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0)
-		tty_wait_until_sent(tty, DIGI_CLOSE_TIMEOUT);
-
-	/* flush driver and line discipline buffers */
-	tty_driver_flush_buffer(tty);
-	tty_ldisc_flush(tty);
-
 	if (port->serial->dev) {
-		/* wait for transmit idle */
-		if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0)
-			digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT);
-		/* drop DTR and RTS */
-		digi_set_modem_signals(port, 0, 0);
+		/* FIXME: Transmit idle belongs in the wait_unti_sent path */
+		digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT);
 
 		/* disable input flow control */
 		buf[0] = DIGI_CMD_SET_INPUT_FLOW_CONTROL;
@@ -1477,11 +1447,9 @@
 		/* shutdown any outstanding bulk writes */
 		usb_kill_urb(port->write_urb);
 	}
-	tty->closing = 0;
 exit:
 	spin_lock_irq(&priv->dp_port_lock);
 	priv->dp_write_urb_in_use = 0;
-	priv->dp_in_close = 0;
 	wake_up_interruptible(&priv->dp_close_wait);
 	spin_unlock_irq(&priv->dp_port_lock);
 	mutex_unlock(&port->serial->disc_mutex);
@@ -1560,7 +1528,6 @@
 		priv->dp_throttled = 0;
 		priv->dp_throttle_restart = 0;
 		init_waitqueue_head(&priv->dp_flush_wait);
-		priv->dp_in_close = 0;
 		init_waitqueue_head(&priv->dp_close_wait);
 		INIT_WORK(&priv->dp_wakeup_work, digi_wakeup_write_lock);
 		priv->dp_port = serial->port[i];
diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c
index c709ec4..2b141cc 100644
--- a/drivers/usb/serial/empeg.c
+++ b/drivers/usb/serial/empeg.c
@@ -81,8 +81,7 @@
 /* function prototypes for an empeg-car player */
 static int  empeg_open(struct tty_struct *tty, struct usb_serial_port *port,
 						struct file *filp);
-static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port,
-						struct file *filp);
+static void empeg_close(struct usb_serial_port *port);
 static int  empeg_write(struct tty_struct *tty, struct usb_serial_port *port,
 						const unsigned char *buf,
 						int count);
@@ -181,8 +180,7 @@
 }
 
 
-static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static void empeg_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index d9fcdae..683304d 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -89,6 +89,7 @@
 	int force_rtscts;	/* if non-zero, force RTS-CTS to always
 				   be enabled */
 
+	unsigned int latency;		/* latency setting in use */
 	spinlock_t tx_lock;	/* spinlock for transmit state */
 	unsigned long tx_bytes;
 	unsigned long tx_outstanding_bytes;
@@ -719,8 +720,8 @@
 static int  ftdi_sio_port_remove(struct usb_serial_port *port);
 static int  ftdi_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void ftdi_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void ftdi_close(struct usb_serial_port *port);
+static void ftdi_dtr_rts(struct usb_serial_port *port, int on);
 static int  ftdi_write(struct tty_struct *tty, struct usb_serial_port *port,
 			const unsigned char *buf, int count);
 static int  ftdi_write_room(struct tty_struct *tty);
@@ -758,6 +759,7 @@
 	.port_remove =		ftdi_sio_port_remove,
 	.open =			ftdi_open,
 	.close =		ftdi_close,
+	.dtr_rts =		ftdi_dtr_rts,
 	.throttle =		ftdi_throttle,
 	.unthrottle =		ftdi_unthrottle,
 	.write =		ftdi_write,
@@ -1037,7 +1039,54 @@
 	return rv;
 }
 
+static int write_latency_timer(struct usb_serial_port *port)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	struct usb_device *udev = port->serial->dev;
+	char buf[1];
+	int rv = 0;
+	int l = priv->latency;
 
+	if (priv->flags & ASYNC_LOW_LATENCY)
+		l = 1;
+
+	dbg("%s: setting latency timer = %i", __func__, l);
+
+	rv = usb_control_msg(udev,
+			     usb_sndctrlpipe(udev, 0),
+			     FTDI_SIO_SET_LATENCY_TIMER_REQUEST,
+			     FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE,
+			     l, priv->interface,
+			     buf, 0, WDR_TIMEOUT);
+
+	if (rv < 0)
+		dev_err(&port->dev, "Unable to write latency timer: %i\n", rv);
+	return rv;
+}
+
+static int read_latency_timer(struct usb_serial_port *port)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	struct usb_device *udev = port->serial->dev;
+	unsigned short latency = 0;
+	int rv = 0;
+
+
+	dbg("%s", __func__);
+
+	rv = usb_control_msg(udev,
+			     usb_rcvctrlpipe(udev, 0),
+			     FTDI_SIO_GET_LATENCY_TIMER_REQUEST,
+			     FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE,
+			     0, priv->interface,
+			     (char *) &latency, 1, WDR_TIMEOUT);
+
+	if (rv < 0) {
+		dev_err(&port->dev, "Unable to read latency timer: %i\n", rv);
+		return -EIO;
+	}
+	return latency;
+}
 
 static int get_serial_info(struct usb_serial_port *port,
 				struct serial_struct __user *retinfo)
@@ -1097,6 +1146,7 @@
 	priv->custom_divisor = new_serial.custom_divisor;
 
 	tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+	write_latency_timer(port);
 
 check_and_exit:
 	if ((old_priv.flags & ASYNC_SPD_MASK) !=
@@ -1192,27 +1242,13 @@
 {
 	struct usb_serial_port *port = to_usb_serial_port(dev);
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
-	struct usb_device *udev = port->serial->dev;
-	unsigned short latency = 0;
-	int rv = 0;
-
-
-	dbg("%s", __func__);
-
-	rv = usb_control_msg(udev,
-			     usb_rcvctrlpipe(udev, 0),
-			     FTDI_SIO_GET_LATENCY_TIMER_REQUEST,
-			     FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE,
-			     0, priv->interface,
-			     (char *) &latency, 1, WDR_TIMEOUT);
-
-	if (rv < 0) {
-		dev_err(dev, "Unable to read latency timer: %i\n", rv);
-		return -EIO;
-	}
-	return sprintf(buf, "%i\n", latency);
+	if (priv->flags & ASYNC_LOW_LATENCY)
+		return sprintf(buf, "1\n");
+	else
+		return sprintf(buf, "%i\n", priv->latency);
 }
 
+
 /* Write a new value of the latency timer, in units of milliseconds. */
 static ssize_t store_latency_timer(struct device *dev,
 			struct device_attribute *attr, const char *valbuf,
@@ -1220,25 +1256,13 @@
 {
 	struct usb_serial_port *port = to_usb_serial_port(dev);
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
-	struct usb_device *udev = port->serial->dev;
-	char buf[1];
 	int v = simple_strtoul(valbuf, NULL, 10);
 	int rv = 0;
 
-	dbg("%s: setting latency timer = %i", __func__, v);
-
-	rv = usb_control_msg(udev,
-			     usb_sndctrlpipe(udev, 0),
-			     FTDI_SIO_SET_LATENCY_TIMER_REQUEST,
-			     FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE,
-			     v, priv->interface,
-			     buf, 0, WDR_TIMEOUT);
-
-	if (rv < 0) {
-		dev_err(dev, "Unable to write latency timer: %i\n", rv);
+	priv->latency = v;
+	rv = write_latency_timer(port);
+	if (rv < 0)
 		return -EIO;
-	}
-
 	return count;
 }
 
@@ -1392,6 +1416,7 @@
 	usb_set_serial_port_data(port, priv);
 
 	ftdi_determine_type(port);
+	read_latency_timer(port);
 	create_sysfs_attrs(port);
 	return 0;
 }
@@ -1514,6 +1539,8 @@
 	if (tty)
 		tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
+	write_latency_timer(port);
+
 	/* No error checking for this (will get errors later anyway) */
 	/* See ftdi_sio.h for description of what is reset */
 	usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
@@ -1529,11 +1556,6 @@
 	if (tty)
 		ftdi_set_termios(tty, port, tty->termios);
 
-	/* FIXME: Flow control might be enabled, so it should be checked -
-	   we have no control of defaults! */
-	/* Turn on RTS and DTR since we are not flow controlling by default */
-	set_mctrl(port, TIOCM_DTR | TIOCM_RTS);
-
 	/* Not throttled */
 	spin_lock_irqsave(&priv->rx_lock, flags);
 	priv->rx_flags &= ~(THROTTLED | ACTUALLY_THROTTLED);
@@ -1558,6 +1580,30 @@
 } /* ftdi_open */
 
 
+static void ftdi_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	char buf[1];
+
+	mutex_lock(&port->serial->disc_mutex);
+	if (!port->serial->disconnected) {
+		/* Disable flow control */
+		if (!on && usb_control_msg(port->serial->dev,
+			    usb_sndctrlpipe(port->serial->dev, 0),
+			    FTDI_SIO_SET_FLOW_CTRL_REQUEST,
+			    FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
+			    0, priv->interface, buf, 0,
+			    WDR_TIMEOUT) < 0) {
+			    dev_err(&port->dev, "error from flowcontrol urb\n");
+		}
+		/* drop RTS and DTR */
+		if (on)
+			set_mctrl(port, TIOCM_DTR | TIOCM_RTS);
+		else
+			clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
+	}
+	mutex_unlock(&port->serial->disc_mutex);
+}
 
 /*
  * usbserial:__serial_close  only calls ftdi_close if the point is open
@@ -1567,31 +1613,12 @@
  *
  */
 
-static void ftdi_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void ftdi_close(struct usb_serial_port *port)
 { /* ftdi_close */
-	unsigned int c_cflag = tty->termios->c_cflag;
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
-	char buf[1];
 
 	dbg("%s", __func__);
 
-	mutex_lock(&port->serial->disc_mutex);
-	if (c_cflag & HUPCL && !port->serial->disconnected) {
-		/* Disable flow control */
-		if (usb_control_msg(port->serial->dev,
-				    usb_sndctrlpipe(port->serial->dev, 0),
-				    FTDI_SIO_SET_FLOW_CTRL_REQUEST,
-				    FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
-				    0, priv->interface, buf, 0,
-				    WDR_TIMEOUT) < 0) {
-			dev_err(&port->dev, "error from flowcontrol urb\n");
-		}
-
-		/* drop RTS and DTR */
-		clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
-	} /* Note change no line if hupcl is off */
-	mutex_unlock(&port->serial->disc_mutex);
 
 	/* cancel any scheduled reading */
 	cancel_delayed_work_sync(&priv->rx_work);
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index 586d30f..ee25a3f 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -993,8 +993,7 @@
 }
 
 
-static void garmin_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void garmin_close(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct garmin_data *garmin_data_p = usb_get_serial_port_data(port);
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 4cec990..be82ea9 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -184,8 +184,7 @@
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_resume);
 
-void usb_serial_generic_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+void usb_serial_generic_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 	generic_cleanup(port);
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index fb4a73d..53ef599 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -207,8 +207,7 @@
 /* function prototypes for the usbserial callbacks */
 static int edge_open(struct tty_struct *tty, struct usb_serial_port *port,
 					struct file *filp);
-static void edge_close(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp);
+static void edge_close(struct usb_serial_port *port);
 static int edge_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
 static int edge_write_room(struct tty_struct *tty);
@@ -965,7 +964,7 @@
 
 	if (!edge_port->txfifo.fifo) {
 		dbg("%s - no memory", __func__);
-		edge_close(tty, port, filp);
+		edge_close(port);
 		return -ENOMEM;
 	}
 
@@ -975,7 +974,7 @@
 
 	if (!edge_port->write_urb) {
 		dbg("%s - no memory", __func__);
-		edge_close(tty, port, filp);
+		edge_close(port);
 		return -ENOMEM;
 	}
 
@@ -1099,8 +1098,7 @@
  * edge_close
  *	this function is called by the tty driver when a port is closed
  *****************************************************************************/
-static void edge_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void edge_close(struct usb_serial_port *port)
 {
 	struct edgeport_serial *edge_serial;
 	struct edgeport_port *edge_port;
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index 513b25e..eabf20e 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -2009,8 +2009,7 @@
 	return status;
 }
 
-static void edge_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void edge_close(struct usb_serial_port *port)
 {
 	struct edgeport_serial *edge_serial;
 	struct edgeport_port *edge_port;
diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c
index cd62825..c610a99 100644
--- a/drivers/usb/serial/ipaq.c
+++ b/drivers/usb/serial/ipaq.c
@@ -76,8 +76,7 @@
 /* Function prototypes for an ipaq */
 static int  ipaq_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void ipaq_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void ipaq_close(struct usb_serial_port *port);
 static int  ipaq_calc_num_ports(struct usb_serial *serial);
 static int  ipaq_startup(struct usb_serial *serial);
 static void ipaq_shutdown(struct usb_serial *serial);
@@ -714,8 +713,7 @@
 }
 
 
-static void ipaq_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void ipaq_close(struct usb_serial_port *port)
 {
 	struct ipaq_private	*priv = usb_get_serial_port_data(port);
 
diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c
index da2a2b4..29ad038 100644
--- a/drivers/usb/serial/ipw.c
+++ b/drivers/usb/serial/ipw.c
@@ -302,23 +302,17 @@
 	return 0;
 }
 
-static void ipw_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void ipw_dtr_rts(struct usb_serial_port *port, int on)
 {
 	struct usb_device *dev = port->serial->dev;
 	int result;
 
-	if (tty_hung_up_p(filp)) {
-		dbg("%s: tty_hung_up_p ...", __func__);
-		return;
-	}
-
 	/*--1: drop the dtr */
 	dbg("%s:dropping dtr", __func__);
 	result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
 			 IPW_SIO_SET_PIN,
 			 USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT,
-			 IPW_PIN_CLRDTR,
+			 on ? IPW_PIN_SETDTR : IPW_PIN_CLRDTR,
 			 0,
 			 NULL,
 			 0,
@@ -332,7 +326,7 @@
 	result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
 			 IPW_SIO_SET_PIN, USB_TYPE_VENDOR |
 			 		USB_RECIP_INTERFACE | USB_DIR_OUT,
-			 IPW_PIN_CLRRTS,
+			 on ? IPW_PIN_SETRTS : IPW_PIN_CLRRTS,
 			 0,
 			 NULL,
 			 0,
@@ -340,7 +334,12 @@
 	if (result < 0)
 		dev_err(&port->dev,
 				"dropping rts failed (error = %d)\n", result);
+}
 
+static void ipw_close(struct usb_serial_port *port)
+{
+	struct usb_device *dev = port->serial->dev;
+	int result;
 
 	/*--3: purge */
 	dbg("%s:sending purge", __func__);
@@ -461,6 +460,7 @@
 	.num_ports =		1,
 	.open =			ipw_open,
 	.close =		ipw_close,
+	.dtr_rts =		ipw_dtr_rts,
 	.port_probe = 		ipw_probe,
 	.port_remove =		ipw_disconnect,
 	.write =		ipw_write,
diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index 4e2cda9..66009b6 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -88,8 +88,7 @@
 static int  ir_startup (struct usb_serial *serial);
 static int  ir_open(struct tty_struct *tty, struct usb_serial_port *port,
 					struct file *filep);
-static void ir_close(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filep);
+static void ir_close(struct usb_serial_port *port);
 static int  ir_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
 static void ir_write_bulk_callback (struct urb *urb);
@@ -346,8 +345,7 @@
 	return result;
 }
 
-static void ir_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file * filp)
+static void ir_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 4473d44..76a3cc3 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -40,7 +40,7 @@
 /*
  * Version Information
  */
-#define DRIVER_VERSION "v0.5"
+#define DRIVER_VERSION "v0.10"
 #define DRIVER_DESC "Infinity USB Unlimited Phoenix driver"
 
 static struct usb_device_id id_table[] = {
@@ -70,7 +70,6 @@
 struct iuu_private {
 	spinlock_t lock;	/* store irq state */
 	wait_queue_head_t delta_msr_wait;
-	u8 line_control;
 	u8 line_status;
 	u8 termios_initialized;
 	int tiostatus;		/* store IUART SIGNAL for tiocmget call */
@@ -651,32 +650,33 @@
 	unsigned long flags;
 	int result;
 	int i;
+	int buf_len;
 	char *buf_ptr = port->write_urb->transfer_buffer;
 	dbg("%s - enter", __func__);
 
+	spin_lock_irqsave(&priv->lock, flags);
 	*buf_ptr++ = IUU_UART_ESC;
 	*buf_ptr++ = IUU_UART_TX;
 	*buf_ptr++ = priv->writelen;
 
-	memcpy(buf_ptr, priv->writebuf,
-	       priv->writelen);
+	memcpy(buf_ptr, priv->writebuf, priv->writelen);
+	buf_len = priv->writelen;
+	priv->writelen = 0;
+	spin_unlock_irqrestore(&priv->lock, flags);
 	if (debug == 1) {
-		for (i = 0; i < priv->writelen; i++)
+		for (i = 0; i < buf_len; i++)
 			sprintf(priv->dbgbuf + i*2 ,
 				"%02X", priv->writebuf[i]);
-		priv->dbgbuf[priv->writelen+i*2] = 0;
+		priv->dbgbuf[buf_len+i*2] = 0;
 		dbg("%s - writing %i chars : %s", __func__,
-		    priv->writelen, priv->dbgbuf);
+		    buf_len, priv->dbgbuf);
 	}
 	usb_fill_bulk_urb(port->write_urb, port->serial->dev,
 			  usb_sndbulkpipe(port->serial->dev,
 					  port->bulk_out_endpointAddress),
-			  port->write_urb->transfer_buffer, priv->writelen + 3,
+			  port->write_urb->transfer_buffer, buf_len + 3,
 			  iuu_rxcmd, port);
 	result = usb_submit_urb(port->write_urb, GFP_ATOMIC);
-	spin_lock_irqsave(&priv->lock, flags);
-	priv->writelen = 0;
-	spin_unlock_irqrestore(&priv->lock, flags);
 	usb_serial_port_softint(port);
 	return result;
 }
@@ -770,14 +770,10 @@
 		return -ENOMEM;
 
 	spin_lock_irqsave(&priv->lock, flags);
-	if (priv->writelen > 0) {
-		/* buffer already filled but not commited */
-		spin_unlock_irqrestore(&priv->lock, flags);
-		return 0;
-	}
+
 	/* fill the buffer */
-	memcpy(priv->writebuf, buf, count);
-	priv->writelen = count;
+	memcpy(priv->writebuf + priv->writelen, buf, count);
+	priv->writelen += count;
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	return count;
@@ -819,7 +815,7 @@
 	buf[0] = IUU_UART_ENABLE;
 	buf[1] = (u8) ((IUU_BAUD_9600 >> 8) & 0x00FF);
 	buf[2] = (u8) (0x00FF & IUU_BAUD_9600);
-	buf[3] = (u8) (0x0F0 & IUU_TWO_STOP_BITS) | (0x07 & IUU_PARITY_EVEN);
+	buf[3] = (u8) (0x0F0 & IUU_ONE_STOP_BIT) | (0x07 & IUU_PARITY_EVEN);
 
 	status = bulk_immediate(port, buf, 4);
 	if (status != IUU_OPERATION_OK) {
@@ -946,19 +942,59 @@
 	return status;
 }
 
-static int set_control_lines(struct usb_device *dev, u8 value)
+static void iuu_set_termios(struct tty_struct *tty,
+		struct usb_serial_port *port, struct ktermios *old_termios)
 {
-	return 0;
+	const u32 supported_mask = CMSPAR|PARENB|PARODD;
+
+	unsigned int cflag = tty->termios->c_cflag;
+	int status;
+	u32 actual;
+	u32 parity;
+	int csize = CS7;
+	int baud = 9600;	/* Fixed for the moment */
+	u32 newval = cflag & supported_mask;
+
+	/* compute the parity parameter */
+	parity = 0;
+	if (cflag & CMSPAR) {	/* Using mark space */
+		if (cflag & PARODD)
+			parity |= IUU_PARITY_SPACE;
+		else
+			parity |= IUU_PARITY_MARK;
+	} else if (!(cflag & PARENB)) {
+		parity |= IUU_PARITY_NONE;
+		csize = CS8;
+	} else if (cflag & PARODD)
+		parity |= IUU_PARITY_ODD;
+	else
+		parity |= IUU_PARITY_EVEN;
+
+	parity |= (cflag & CSTOPB ? IUU_TWO_STOP_BITS : IUU_ONE_STOP_BIT);
+
+	/* set it */
+	status = iuu_uart_baud(port,
+			(clockmode == 2) ? 16457 : 9600 * boost / 100,
+			&actual, parity);
+
+	/* set the termios value to the real one, so the user now what has
+	 * changed. We support few fields so its easies to copy the old hw
+	 * settings back over and then adjust them
+	 */
+ 	if (old_termios)
+ 		tty_termios_copy_hw(tty->termios, old_termios);
+	if (status != 0)	/* Set failed - return old bits */
+		return;
+	/* Re-encode speed, parity and csize */
+	tty_encode_baud_rate(tty, baud, baud);
+	tty->termios->c_cflag &= ~(supported_mask|CSIZE);
+	tty->termios->c_cflag |= newval | csize;
 }
 
-static void iuu_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void iuu_close(struct usb_serial_port *port)
 {
 	/* iuu_led (port,255,0,0,0); */
 	struct usb_serial *serial;
-	struct iuu_private *priv = usb_get_serial_port_data(port);
-	unsigned long flags;
-	unsigned int c_cflag;
 
 	serial = port->serial;
 	if (!serial)
@@ -968,17 +1004,6 @@
 
 	iuu_uart_off(port);
 	if (serial->dev) {
-		if (tty) {
-			c_cflag = tty->termios->c_cflag;
-			if (c_cflag & HUPCL) {
-				/* drop DTR and RTS */
-				priv = usb_get_serial_port_data(port);
-				spin_lock_irqsave(&priv->lock, flags);
-				priv->line_control = 0;
-				spin_unlock_irqrestore(&priv->lock, flags);
-				set_control_lines(port->serial->dev, 0);
-			}
-		}
 		/* free writebuf */
 		/* shutdown our urbs */
 		dbg("%s - shutting down urbs", __func__);
@@ -1154,7 +1179,7 @@
 	if (result) {
 		dev_err(&port->dev, "%s - failed submitting read urb,"
 			" error %d\n", __func__, result);
-		iuu_close(tty, port, NULL);
+		iuu_close(port);
 		return -EPROTO;
 	} else {
 		dbg("%s - rxcmd OK", __func__);
@@ -1175,6 +1200,7 @@
 	.read_bulk_callback = iuu_uart_read_callback,
 	.tiocmget = iuu_tiocmget,
 	.tiocmset = iuu_tiocmset,
+	.set_termios = iuu_set_termios,
 	.attach = iuu_startup,
 	.shutdown = iuu_shutdown,
 };
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index 00daa8f..f1195a9 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -1298,8 +1298,16 @@
 		usb_kill_urb(urb);
 }
 
-static void keyspan_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void keyspan_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct keyspan_port_private *p_priv = usb_get_serial_port_data(port);
+
+	p_priv->rts_state = on;
+	p_priv->dtr_state = on;
+	keyspan_send_setup(port, 0);
+}
+
+static void keyspan_close(struct usb_serial_port *port)
 {
 	int			i;
 	struct usb_serial	*serial = port->serial;
@@ -1336,7 +1344,6 @@
 			stop_urb(p_priv->out_urbs[i]);
 		}
 	}
-	tty_port_tty_set(&port->port, NULL);
 }
 
 /* download the firmware to a pre-renumeration device */
diff --git a/drivers/usb/serial/keyspan.h b/drivers/usb/serial/keyspan.h
index 38b4582..0d4569b 100644
--- a/drivers/usb/serial/keyspan.h
+++ b/drivers/usb/serial/keyspan.h
@@ -38,9 +38,8 @@
 static int  keyspan_open		(struct tty_struct *tty,
 					 struct usb_serial_port *port,
 					 struct file *filp);
-static void keyspan_close		(struct tty_struct *tty,
-					 struct usb_serial_port *port,
-					 struct file *filp);
+static void keyspan_close		(struct usb_serial_port *port);
+static void keyspan_dtr_rts		(struct usb_serial_port *port, int on);
 static int  keyspan_startup		(struct usb_serial *serial);
 static void keyspan_shutdown		(struct usb_serial *serial);
 static int  keyspan_write_room		(struct tty_struct *tty);
@@ -562,6 +561,7 @@
 	.num_ports		= 1,
 	.open			= keyspan_open,
 	.close			= keyspan_close,
+	.dtr_rts		= keyspan_dtr_rts,
 	.write			= keyspan_write,
 	.write_room		= keyspan_write_room,
 	.set_termios		= keyspan_set_termios,
@@ -582,6 +582,7 @@
 	.num_ports		= 2,
 	.open			= keyspan_open,
 	.close			= keyspan_close,
+	.dtr_rts		= keyspan_dtr_rts,
 	.write			= keyspan_write,
 	.write_room		= keyspan_write_room,
 	.set_termios		= keyspan_set_termios,
@@ -602,6 +603,7 @@
 	.num_ports		= 4,
 	.open			= keyspan_open,
 	.close			= keyspan_close,
+	.dtr_rts		= keyspan_dtr_rts,
 	.write			= keyspan_write,
 	.write_room		= keyspan_write_room,
 	.set_termios		= keyspan_set_termios,
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index bf1ae24..ab769db 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -651,6 +651,35 @@
 }
 
 
+static void keyspan_pda_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct usb_serial *serial = port->serial;
+
+	if (serial->dev) {
+		if (on)
+			keyspan_pda_set_modem_info(serial, (1<<7) | (1<< 2));
+		else
+			keyspan_pda_set_modem_info(serial, 0);
+	}
+}
+
+static int keyspan_pda_carrier_raised(struct usb_serial_port *port)
+{
+	struct usb_serial *serial = port->serial;
+	unsigned char modembits;
+
+	/* If we can read the modem status and the DCD is low then
+	   carrier is not raised yet */
+	if (keyspan_pda_get_modem_info(serial, &modembits) >= 0) {
+		if (!(modembits & (1>>6)))
+			return 0;
+	}
+	/* Carrier raised, or we failed (eg disconnected) so
+	   progress accordingly */
+	return 1;
+}
+
+
 static int keyspan_pda_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp)
 {
@@ -682,13 +711,6 @@
 	priv->tx_room = room;
 	priv->tx_throttled = room ? 0 : 1;
 
-	/* the normal serial device seems to always turn on DTR and RTS here,
-	   so do the same */
-	if (tty && (tty->termios->c_cflag & CBAUD))
-		keyspan_pda_set_modem_info(serial, (1<<7) | (1<<2));
-	else
-		keyspan_pda_set_modem_info(serial, 0);
-
 	/*Start reading from the device*/
 	port->interrupt_in_urb->dev = serial->dev;
 	rc = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
@@ -700,19 +722,11 @@
 error:
 	return rc;
 }
-
-
-static void keyspan_pda_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void keyspan_pda_close(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 
 	if (serial->dev) {
-		/* the normal serial device seems to always shut
-		   off DTR and RTS now */
-		if (tty->termios->c_cflag & HUPCL)
-			keyspan_pda_set_modem_info(serial, 0);
-
 		/* shutdown our bulk reads and writes */
 		usb_kill_urb(port->write_urb);
 		usb_kill_urb(port->interrupt_in_urb);
@@ -839,6 +853,8 @@
 	.usb_driver = 		&keyspan_pda_driver,
 	.id_table =		id_table_std,
 	.num_ports =		1,
+	.dtr_rts =		keyspan_pda_dtr_rts,
+	.carrier_raised	=	keyspan_pda_carrier_raised,
 	.open =			keyspan_pda_open,
 	.close =		keyspan_pda_close,
 	.write =		keyspan_pda_write,
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index fcd9082..fa817c6 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -76,8 +76,7 @@
 static void klsi_105_shutdown(struct usb_serial *serial);
 static int  klsi_105_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void klsi_105_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void klsi_105_close(struct usb_serial_port *port);
 static int  klsi_105_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
 static void klsi_105_write_bulk_callback(struct urb *urb);
@@ -447,8 +446,7 @@
 } /* klsi_105_open */
 
 
-static void klsi_105_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void klsi_105_close(struct usb_serial_port *port)
 {
 	struct klsi_105_private *priv = usb_get_serial_port_data(port);
 	int rc;
diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c
index c148544..6b57049 100644
--- a/drivers/usb/serial/kobil_sct.c
+++ b/drivers/usb/serial/kobil_sct.c
@@ -72,8 +72,7 @@
 static void kobil_shutdown(struct usb_serial *serial);
 static int  kobil_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void kobil_close(struct tty_struct *tty, struct usb_serial_port *port,
-			struct file *filp);
+static void kobil_close(struct usb_serial_port *port);
 static int  kobil_write(struct tty_struct *tty, struct usb_serial_port *port,
 			 const unsigned char *buf, int count);
 static int  kobil_write_room(struct tty_struct *tty);
@@ -209,7 +208,7 @@
 
 	for (i = 0; i < serial->num_ports; ++i) {
 		while (serial->port[i]->port.count > 0)
-			kobil_close(NULL, serial->port[i], NULL);
+			kobil_close(serial->port[i]);
 		kfree(usb_get_serial_port_data(serial->port[i]));
 		usb_set_serial_port_data(serial->port[i], NULL);
 	}
@@ -346,11 +345,11 @@
 }
 
 
-static void kobil_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void kobil_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
+	/* FIXME: Add rts/dtr methods */
 	if (port->write_urb) {
 		usb_kill_urb(port->write_urb);
 		usb_free_urb(port->write_urb);
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index 82930a7..8737955 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -95,8 +95,8 @@
 static void mct_u232_shutdown(struct usb_serial *serial);
 static int  mct_u232_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void mct_u232_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void mct_u232_close(struct usb_serial_port *port);
+static void mct_u232_dtr_rts(struct usb_serial_port *port, int on);
 static void mct_u232_read_int_callback(struct urb *urb);
 static void mct_u232_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios *old);
@@ -140,6 +140,7 @@
 	.num_ports =	     1,
 	.open =		     mct_u232_open,
 	.close =	     mct_u232_close,
+	.dtr_rts =	     mct_u232_dtr_rts,
 	.throttle =	     mct_u232_throttle,
 	.unthrottle =	     mct_u232_unthrottle,
 	.read_int_callback = mct_u232_read_int_callback,
@@ -496,29 +497,29 @@
 	return retval;
 } /* mct_u232_open */
 
-
-static void mct_u232_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void mct_u232_dtr_rts(struct usb_serial_port *port, int on)
 {
-	unsigned int c_cflag;
 	unsigned int control_state;
 	struct mct_u232_private *priv = usb_get_serial_port_data(port);
-	dbg("%s port %d", __func__, port->number);
 
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		mutex_lock(&port->serial->disc_mutex);
-		if (c_cflag & HUPCL && !port->serial->disconnected) {
-			/* drop DTR and RTS */
-			spin_lock_irq(&priv->lock);
+	mutex_lock(&port->serial->disc_mutex);
+	if (!port->serial->disconnected) {
+		/* drop DTR and RTS */
+		spin_lock_irq(&priv->lock);
+		if (on)
+			priv->control_state |= TIOCM_DTR | TIOCM_RTS;
+		else
 			priv->control_state &= ~(TIOCM_DTR | TIOCM_RTS);
-			control_state = priv->control_state;
-			spin_unlock_irq(&priv->lock);
-			mct_u232_set_modem_ctrl(port->serial, control_state);
-		}
-		mutex_unlock(&port->serial->disc_mutex);
+		control_state = priv->control_state;
+		spin_unlock_irq(&priv->lock);
+		mct_u232_set_modem_ctrl(port->serial, control_state);
 	}
+	mutex_unlock(&port->serial->disc_mutex);
+}
 
+static void mct_u232_close(struct usb_serial_port *port)
+{
+	dbg("%s port %d", __func__, port->number);
 
 	if (port->serial->dev) {
 		/* shutdown our urbs */
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 24e3b5d..9e1a013 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -533,8 +533,7 @@
 	return chars;
 }
 
-static void mos7720_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void mos7720_close(struct usb_serial_port *port)
 {
 	struct usb_serial *serial;
 	struct moschip_port *mos7720_port;
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 84fb1dc..10b78a3 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -1135,54 +1135,12 @@
 
 }
 
-/************************************************************************
- *
- * mos7840_block_until_tx_empty
- *
- *	This function will block the close until one of the following:
- *		1. TX count are 0
- *		2. The mos7840 has stopped
- *		3. A timeout of 3 seconds without activity has expired
- *
- ************************************************************************/
-static void mos7840_block_until_tx_empty(struct tty_struct *tty,
-				struct moschip_port *mos7840_port)
-{
-	int timeout = HZ / 10;
-	int wait = 30;
-	int count;
-
-	while (1) {
-
-		count = mos7840_chars_in_buffer(tty);
-
-		/* Check for Buffer status */
-		if (count <= 0)
-			return;
-
-		/* Block the thread for a while */
-		interruptible_sleep_on_timeout(&mos7840_port->wait_chase,
-					       timeout);
-
-		/* No activity.. count down section */
-		wait--;
-		if (wait == 0) {
-			dbg("%s - TIMEOUT", __func__);
-			return;
-		} else {
-			/* Reset timeout value back to seconds */
-			wait = 30;
-		}
-	}
-}
-
 /*****************************************************************************
  * mos7840_close
  *	this function is called by the tty driver when a port is closed
  *****************************************************************************/
 
-static void mos7840_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void mos7840_close(struct usb_serial_port *port)
 {
 	struct usb_serial *serial;
 	struct moschip_port *mos7840_port;
@@ -1223,10 +1181,6 @@
 		}
 	}
 
-	if (serial->dev)
-		/* flush and block until tx is empty */
-		mos7840_block_until_tx_empty(tty, mos7840_port);
-
 	/* While closing port, shutdown all bulk read, write  *
 	 * and interrupt read if they exists                  */
 	if (serial->dev) {
diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c
index bcdcbb8..f5f3751a8 100644
--- a/drivers/usb/serial/navman.c
+++ b/drivers/usb/serial/navman.c
@@ -98,8 +98,7 @@
 	return result;
 }
 
-static void navman_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void navman_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index df65397..1104617 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -66,8 +66,7 @@
 /* function prototypes */
 static int  omninet_open(struct tty_struct *tty, struct usb_serial_port *port,
 							struct file *filp);
-static void omninet_close(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *filp);
+static void omninet_close(struct usb_serial_port *port);
 static void omninet_read_bulk_callback(struct urb *urb);
 static void omninet_write_bulk_callback(struct urb *urb);
 static int  omninet_write(struct tty_struct *tty, struct usb_serial_port *port,
@@ -189,8 +188,7 @@
 	return result;
 }
 
-static void omninet_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void omninet_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 	usb_kill_urb(port->read_urb);
diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c
index b500ad1..c20480a 100644
--- a/drivers/usb/serial/opticon.c
+++ b/drivers/usb/serial/opticon.c
@@ -173,8 +173,7 @@
 	return result;
 }
 
-static void opticon_close(struct tty_struct *tty, struct usb_serial_port *port,
-			  struct file *filp)
+static void opticon_close(struct usb_serial_port *port)
 {
 	struct opticon_private *priv = usb_get_serial_data(port->serial);
 
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 7817b82..a16d69f 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -45,8 +45,9 @@
 /* Function prototypes */
 static int  option_open(struct tty_struct *tty, struct usb_serial_port *port,
 							struct file *filp);
-static void option_close(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *filp);
+static void option_close(struct usb_serial_port *port);
+static void option_dtr_rts(struct usb_serial_port *port, int on);
+
 static int  option_startup(struct usb_serial *serial);
 static void option_shutdown(struct usb_serial *serial);
 static int  option_write_room(struct tty_struct *tty);
@@ -61,7 +62,7 @@
 static int  option_tiocmget(struct tty_struct *tty, struct file *file);
 static int  option_tiocmset(struct tty_struct *tty, struct file *file,
 				unsigned int set, unsigned int clear);
-static int  option_send_setup(struct tty_struct *tty, struct usb_serial_port *port);
+static int  option_send_setup(struct usb_serial_port *port);
 static int  option_suspend(struct usb_serial *serial, pm_message_t message);
 static int  option_resume(struct usb_serial *serial);
 
@@ -551,6 +552,7 @@
 	.num_ports         = 1,
 	.open              = option_open,
 	.close             = option_close,
+	.dtr_rts	   = option_dtr_rts,
 	.write             = option_write,
 	.write_room        = option_write_room,
 	.chars_in_buffer   = option_chars_in_buffer,
@@ -630,7 +632,7 @@
 	dbg("%s", __func__);
 	/* Doesn't support option setting */
 	tty_termios_copy_hw(tty->termios, old_termios);
-	option_send_setup(tty, port);
+	option_send_setup(port);
 }
 
 static int option_tiocmget(struct tty_struct *tty, struct file *file)
@@ -669,7 +671,7 @@
 		portdata->rts_state = 0;
 	if (clear & TIOCM_DTR)
 		portdata->dtr_state = 0;
-	return option_send_setup(tty, port);
+	return option_send_setup(port);
 }
 
 /* Write */
@@ -897,10 +899,6 @@
 
 	dbg("%s", __func__);
 
-	/* Set some sane defaults */
-	portdata->rts_state = 1;
-	portdata->dtr_state = 1;
-
 	/* Reset low level data toggle and start reading from endpoints */
 	for (i = 0; i < N_IN_URB; i++) {
 		urb = portdata->in_urbs[i];
@@ -936,13 +934,28 @@
 				usb_pipeout(urb->pipe), 0); */
 	}
 
-	option_send_setup(tty, port);
+	option_send_setup(port);
 
 	return 0;
 }
 
-static void option_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void option_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct usb_serial *serial = port->serial;
+	struct option_port_private *portdata;
+
+	dbg("%s", __func__);
+	portdata = usb_get_serial_port_data(port);
+	mutex_lock(&serial->disc_mutex);
+	portdata->rts_state = on;
+	portdata->dtr_state = on;
+	if (serial->dev)
+		option_send_setup(port);
+	mutex_unlock(&serial->disc_mutex);
+}
+
+
+static void option_close(struct usb_serial_port *port)
 {
 	int i;
 	struct usb_serial *serial = port->serial;
@@ -951,22 +964,13 @@
 	dbg("%s", __func__);
 	portdata = usb_get_serial_port_data(port);
 
-	portdata->rts_state = 0;
-	portdata->dtr_state = 0;
-
 	if (serial->dev) {
-		mutex_lock(&serial->disc_mutex);
-		if (!serial->disconnected)
-			option_send_setup(tty, port);
-		mutex_unlock(&serial->disc_mutex);
-
 		/* Stop reading/writing urbs */
 		for (i = 0; i < N_IN_URB; i++)
 			usb_kill_urb(portdata->in_urbs[i]);
 		for (i = 0; i < N_OUT_URB; i++)
 			usb_kill_urb(portdata->out_urbs[i]);
 	}
-	tty_port_tty_set(&port->port, NULL);
 }
 
 /* Helper functions used by option_setup_urbs */
@@ -1032,28 +1036,24 @@
  * This is exactly the same as SET_CONTROL_LINE_STATE from the PSTN
  * CDC.
 */
-static int option_send_setup(struct tty_struct *tty,
-						struct usb_serial_port *port)
+static int option_send_setup(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct option_port_private *portdata;
 	int ifNum = serial->interface->cur_altsetting->desc.bInterfaceNumber;
+	int val = 0;
 	dbg("%s", __func__);
 
 	portdata = usb_get_serial_port_data(port);
 
-	if (tty) {
-		int val = 0;
-		if (portdata->dtr_state)
-			val |= 0x01;
-		if (portdata->rts_state)
-			val |= 0x02;
+	if (portdata->dtr_state)
+		val |= 0x01;
+	if (portdata->rts_state)
+		val |= 0x02;
 
-		return usb_control_msg(serial->dev,
-			usb_rcvctrlpipe(serial->dev, 0),
-			0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT);
-	}
-	return 0;
+	return usb_control_msg(serial->dev,
+		usb_rcvctrlpipe(serial->dev, 0),
+		0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT);
 }
 
 static int option_startup(struct usb_serial *serial)
diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c
index ba551f0..7de5478 100644
--- a/drivers/usb/serial/oti6858.c
+++ b/drivers/usb/serial/oti6858.c
@@ -143,8 +143,7 @@
 /* function prototypes */
 static int oti6858_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void oti6858_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void oti6858_close(struct usb_serial_port *port);
 static void oti6858_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios *old);
 static int oti6858_ioctl(struct tty_struct *tty, struct file *file,
@@ -622,67 +621,30 @@
 	if (result != 0) {
 		dev_err(&port->dev, "%s(): usb_submit_urb() failed"
 			       " with error %d\n", __func__, result);
-		oti6858_close(tty, port, NULL);
+		oti6858_close(port);
 		return -EPROTO;
 	}
 
 	/* setup termios */
 	if (tty)
 		oti6858_set_termios(tty, port, &tmp_termios);
-
+	port->port.drain_delay = 256;	/* FIXME: check the FIFO length */
 	return 0;
 }
 
-static void oti6858_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void oti6858_close(struct usb_serial_port *port)
 {
 	struct oti6858_private *priv = usb_get_serial_port_data(port);
 	unsigned long flags;
-	long timeout;
-	wait_queue_t wait;
 
 	dbg("%s(port = %d)", __func__, port->number);
 
-	/* wait for data to drain from the buffer */
 	spin_lock_irqsave(&priv->lock, flags);
-	timeout = 30 * HZ;	/* PL2303_CLOSING_WAIT */
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&tty->write_wait, &wait);
-	dbg("%s(): entering wait loop", __func__);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (oti6858_buf_data_avail(priv->buf) == 0
-		|| timeout == 0 || signal_pending(current)
-		|| port->serial->disconnected)
-			break;
-		spin_unlock_irqrestore(&priv->lock, flags);
-		timeout = schedule_timeout(timeout);
-		spin_lock_irqsave(&priv->lock, flags);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&tty->write_wait, &wait);
-	dbg("%s(): after wait loop", __func__);
-
 	/* clear out any remaining data in the buffer */
 	oti6858_buf_clear(priv->buf);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* wait for characters to drain from the device */
-	/* (this is long enough for the entire 256 byte */
-	/* pl2303 hardware buffer to drain with no flow */
-	/* control for data rates of 1200 bps or more, */
-	/* for lower rates we should really know how much */
-	/* data is in the buffer to compute a delay */
-	/* that is not unnecessarily long) */
-	/* FIXME
-	bps = tty_get_baud_rate(tty);
-	if (bps > 1200)
-		timeout = max((HZ*2560)/bps,HZ/10);
-	else
-	*/
-		timeout = 2*HZ;
-	schedule_timeout_interruptible(timeout);
-	dbg("%s(): after schedule_timeout_interruptible()", __func__);
+	dbg("%s(): after buf_clear()", __func__);
 
 	/* cancel scheduled setup */
 	cancel_delayed_work(&priv->delayed_setup_work);
@@ -694,15 +656,6 @@
 	usb_kill_urb(port->write_urb);
 	usb_kill_urb(port->read_urb);
 	usb_kill_urb(port->interrupt_in_urb);
-
-	/*
-	if (tty && (tty->termios->c_cflag) & HUPCL) {
-		// drop DTR and RTS
-		spin_lock_irqsave(&priv->lock, flags);
-		priv->pending_setup.control &= ~CONTROL_MASK;
-		spin_unlock_irqrestore(&priv->lock, flags);
-	}
-	*/
 }
 
 static int oti6858_tiocmset(struct tty_struct *tty, struct file *file,
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 751a533..e02dc3d 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -652,69 +652,41 @@
 	kfree(buf);
 }
 
-static void pl2303_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void pl2303_dtr_rts(struct usb_serial_port *port, int on)
 {
 	struct pl2303_private *priv = usb_get_serial_port_data(port);
 	unsigned long flags;
-	unsigned int c_cflag;
-	int bps;
-	long timeout;
-	wait_queue_t wait;
+	u8 control;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	/* Change DTR and RTS */
+	if (on)
+		priv->line_control |= (CONTROL_DTR | CONTROL_RTS);
+	else
+		priv->line_control &= ~(CONTROL_DTR | CONTROL_RTS);
+	control = priv->line_control;
+	spin_unlock_irqrestore(&priv->lock, flags);
+	set_control_lines(port->serial->dev, control);
+}
+
+static void pl2303_close(struct usb_serial_port *port)
+{
+	struct pl2303_private *priv = usb_get_serial_port_data(port);
+	unsigned long flags;
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* wait for data to drain from the buffer */
 	spin_lock_irqsave(&priv->lock, flags);
-	timeout = PL2303_CLOSING_WAIT;
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&tty->write_wait, &wait);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (pl2303_buf_data_avail(priv->buf) == 0 ||
-		    timeout == 0 || signal_pending(current) ||
-		    port->serial->disconnected)
-			break;
-		spin_unlock_irqrestore(&priv->lock, flags);
-		timeout = schedule_timeout(timeout);
-		spin_lock_irqsave(&priv->lock, flags);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&tty->write_wait, &wait);
 	/* clear out any remaining data in the buffer */
 	pl2303_buf_clear(priv->buf);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* wait for characters to drain from the device */
-	/* (this is long enough for the entire 256 byte */
-	/* pl2303 hardware buffer to drain with no flow */
-	/* control for data rates of 1200 bps or more, */
-	/* for lower rates we should really know how much */
-	/* data is in the buffer to compute a delay */
-	/* that is not unnecessarily long) */
-	bps = tty_get_baud_rate(tty);
-	if (bps > 1200)
-		timeout = max((HZ*2560)/bps, HZ/10);
-	else
-		timeout = 2*HZ;
-	schedule_timeout_interruptible(timeout);
-
 	/* shutdown our urbs */
 	dbg("%s - shutting down urbs", __func__);
 	usb_kill_urb(port->write_urb);
 	usb_kill_urb(port->read_urb);
 	usb_kill_urb(port->interrupt_in_urb);
 
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		if (c_cflag & HUPCL) {
-			/* drop DTR and RTS */
-			spin_lock_irqsave(&priv->lock, flags);
-			priv->line_control = 0;
-			spin_unlock_irqrestore(&priv->lock, flags);
-			set_control_lines(port->serial->dev, 0);
-		}
-	}
 }
 
 static int pl2303_open(struct tty_struct *tty,
@@ -748,7 +720,7 @@
 	if (result) {
 		dev_err(&port->dev, "%s - failed submitting read urb,"
 			" error %d\n", __func__, result);
-		pl2303_close(tty, port, NULL);
+		pl2303_close(port);
 		return -EPROTO;
 	}
 
@@ -758,9 +730,10 @@
 	if (result) {
 		dev_err(&port->dev, "%s - failed submitting interrupt urb,"
 			" error %d\n", __func__, result);
-		pl2303_close(tty, port, NULL);
+		pl2303_close(port);
 		return -EPROTO;
 	}
+	port->port.drain_delay = 256;
 	return 0;
 }
 
@@ -821,6 +794,14 @@
 	return result;
 }
 
+static int pl2303_carrier_raised(struct usb_serial_port *port)
+{
+	struct pl2303_private *priv = usb_get_serial_port_data(port);
+	if (priv->line_status & UART_DCD)
+		return 1;
+	return 0;
+}
+
 static int wait_modem_info(struct usb_serial_port *port, unsigned int arg)
 {
 	struct pl2303_private *priv = usb_get_serial_port_data(port);
@@ -1125,6 +1106,8 @@
 	.num_ports =		1,
 	.open =			pl2303_open,
 	.close =		pl2303_close,
+	.dtr_rts = 		pl2303_dtr_rts,
+	.carrier_raised =	pl2303_carrier_raised,
 	.write =		pl2303_write,
 	.ioctl =		pl2303_ioctl,
 	.break_ctl =		pl2303_break_ctl,
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 913225c..17ac34f 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -26,12 +26,10 @@
 #include <linux/module.h>
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
-#include <linux/usb/ch9.h>
 
 #define SWIMS_USB_REQUEST_SetPower	0x00
 #define SWIMS_USB_REQUEST_SetNmea	0x07
 
-/* per port private data */
 #define N_IN_URB	4
 #define N_OUT_URB	4
 #define IN_BUFLEN	4096
@@ -39,6 +37,12 @@
 static int debug;
 static int nmea;
 
+/* Used in interface blacklisting */
+struct sierra_iface_info {
+	const u32 infolen;	/* number of interface numbers on blacklist */
+	const u8  *ifaceinfo;	/* pointer to the array holding the numbers */
+};
+
 static int sierra_set_power_state(struct usb_device *udev, __u16 swiState)
 {
 	int result;
@@ -85,6 +89,23 @@
 	return result;
 }
 
+static int is_blacklisted(const u8 ifnum,
+				const struct sierra_iface_info *blacklist)
+{
+	const u8  *info;
+	int i;
+
+	if (blacklist) {
+		info = blacklist->ifaceinfo;
+
+		for (i = 0; i < blacklist->infolen; i++) {
+			if (info[i] == ifnum)
+				return 1;
+		}
+	}
+	return 0;
+}
+
 static int sierra_calc_interface(struct usb_serial *serial)
 {
 	int interface;
@@ -153,9 +174,25 @@
 	 */
 	usb_set_serial_data(serial, (void *)num_ports);
 
+	/* ifnum could have changed - by calling usb_set_interface */
+	ifnum = sierra_calc_interface(serial);
+
+	if (is_blacklisted(ifnum,
+				(struct sierra_iface_info *)id->driver_info)) {
+		dev_dbg(&serial->dev->dev,
+			"Ignoring blacklisted interface #%d\n", ifnum);
+		return -ENODEV;
+	}
+
 	return result;
 }
 
+static const u8 direct_ip_non_serial_ifaces[] = { 7, 8, 9, 10, 11 };
+static const struct sierra_iface_info direct_ip_interface_blacklist = {
+	.infolen = ARRAY_SIZE(direct_ip_non_serial_ifaces),
+	.ifaceinfo = direct_ip_non_serial_ifaces,
+};
+
 static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(0x1199, 0x0017) },	/* Sierra Wireless EM5625 */
 	{ USB_DEVICE(0x1199, 0x0018) },	/* Sierra Wireless MC5720 */
@@ -188,9 +225,11 @@
 	{ USB_DEVICE(0x1199, 0x6833) },	/* Sierra Wireless MC8781 */
 	{ USB_DEVICE(0x1199, 0x683A) },	/* Sierra Wireless MC8785 */
 	{ USB_DEVICE(0x1199, 0x683B) },	/* Sierra Wireless MC8785 Composite */
-	{ USB_DEVICE(0x1199, 0x683C) },	/* Sierra Wireless MC8790 */
-	{ USB_DEVICE(0x1199, 0x683D) },	/* Sierra Wireless MC8790 */
-	{ USB_DEVICE(0x1199, 0x683E) },	/* Sierra Wireless MC8790 */
+	/* Sierra Wireless MC8790, MC8791, MC8792 Composite */
+	{ USB_DEVICE(0x1199, 0x683C) },
+	{ USB_DEVICE(0x1199, 0x683D) },	/* Sierra Wireless MC8791 Composite */
+	/* Sierra Wireless MC8790, MC8791, MC8792 */
+	{ USB_DEVICE(0x1199, 0x683E) },
 	{ USB_DEVICE(0x1199, 0x6850) },	/* Sierra Wireless AirCard 880 */
 	{ USB_DEVICE(0x1199, 0x6851) },	/* Sierra Wireless AirCard 881 */
 	{ USB_DEVICE(0x1199, 0x6852) },	/* Sierra Wireless AirCard 880 E */
@@ -211,6 +250,10 @@
 	{ USB_DEVICE(0x1199, 0x0112) }, /* Sierra Wireless AirCard 580 */
 	{ USB_DEVICE(0x0F3D, 0x0112) }, /* Airprime/Sierra PC 5220 */
 
+	{ USB_DEVICE(0x1199, 0x68A3), 	/* Sierra Wireless Direct IP modems */
+	  .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
+	},
+
 	{ }
 };
 MODULE_DEVICE_TABLE(usb, id_table);
@@ -229,7 +272,6 @@
 
 	/* Input endpoints and buffers for this port */
 	struct urb *in_urbs[N_IN_URB];
-	char *in_buffer[N_IN_URB];
 
 	/* Settings for the port */
 	int rts_state;	/* Handshaking pins (outputs) */
@@ -240,57 +282,50 @@
 	int ri_state;
 };
 
-static int sierra_send_setup(struct tty_struct *tty,
-						struct usb_serial_port *port)
+static int sierra_send_setup(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct sierra_port_private *portdata;
 	__u16 interface = 0;
+	int val = 0;
 
 	dev_dbg(&port->dev, "%s", __func__);
 
 	portdata = usb_get_serial_port_data(port);
 
-	if (tty) {
-		int val = 0;
-		if (portdata->dtr_state)
-			val |= 0x01;
-		if (portdata->rts_state)
-			val |= 0x02;
+	if (portdata->dtr_state)
+		val |= 0x01;
+	if (portdata->rts_state)
+		val |= 0x02;
 
-		/* If composite device then properly report interface */
-		if (serial->num_ports == 1) {
-			interface = sierra_calc_interface(serial);
-
-			/* Control message is sent only to interfaces with
-			 * interrupt_in endpoints
-			 */
-			if (port->interrupt_in_urb) {
-				/* send control message */
-				return usb_control_msg(serial->dev,
-					usb_rcvctrlpipe(serial->dev, 0),
-					0x22, 0x21, val, interface,
-					NULL, 0, USB_CTRL_SET_TIMEOUT);
-			}
-		}
-
-		/* Otherwise the need to do non-composite mapping */
-		else {
-			if (port->bulk_out_endpointAddress == 2)
-				interface = 0;
-			else if (port->bulk_out_endpointAddress == 4)
-				interface = 1;
-			else if (port->bulk_out_endpointAddress == 5)
-				interface = 2;
-
+	/* If composite device then properly report interface */
+	if (serial->num_ports == 1) {
+		interface = sierra_calc_interface(serial);
+		/* Control message is sent only to interfaces with
+		 * interrupt_in endpoints
+		 */
+		if (port->interrupt_in_urb) {
+			/* send control message */
 			return usb_control_msg(serial->dev,
 				usb_rcvctrlpipe(serial->dev, 0),
 				0x22, 0x21, val, interface,
 				NULL, 0, USB_CTRL_SET_TIMEOUT);
-
 		}
 	}
 
+	/* Otherwise the need to do non-composite mapping */
+	else {
+		if (port->bulk_out_endpointAddress == 2)
+			interface = 0;
+		else if (port->bulk_out_endpointAddress == 4)
+			interface = 1;
+		else if (port->bulk_out_endpointAddress == 5)
+			interface = 2;
+		return usb_control_msg(serial->dev,
+			usb_rcvctrlpipe(serial->dev, 0),
+			0x22, 0x21, val, interface,
+			NULL, 0, USB_CTRL_SET_TIMEOUT);
+	}
 	return 0;
 }
 
@@ -299,7 +334,7 @@
 {
 	dev_dbg(&port->dev, "%s", __func__);
 	tty_termios_copy_hw(tty->termios, old_termios);
-	sierra_send_setup(tty, port);
+	sierra_send_setup(port);
 }
 
 static int sierra_tiocmget(struct tty_struct *tty, struct file *file)
@@ -338,7 +373,18 @@
 		portdata->rts_state = 0;
 	if (clear & TIOCM_DTR)
 		portdata->dtr_state = 0;
-	return sierra_send_setup(tty, port);
+	return sierra_send_setup(port);
+}
+
+static void sierra_release_urb(struct urb *urb)
+{
+	struct usb_serial_port *port;
+	if (urb) {
+		port =  urb->context;
+		dev_dbg(&port->dev, "%s: %p\n", __func__, urb);
+		kfree(urb->transfer_buffer);
+		usb_free_urb(urb);
+	}
 }
 
 static void sierra_outdat_callback(struct urb *urb)
@@ -465,7 +511,7 @@
 				" received", __func__);
 
 		/* Resubmit urb so we continue receiving */
-		if (port->port.count && status != -ESHUTDOWN) {
+		if (port->port.count && status != -ESHUTDOWN && status != -EPERM) {
 			err = usb_submit_urb(urb, GFP_ATOMIC);
 			if (err)
 				dev_err(&port->dev, "resubmit read urb failed."
@@ -557,14 +603,129 @@
 	return 2048;
 }
 
+static void sierra_stop_rx_urbs(struct usb_serial_port *port)
+{
+	int i;
+	struct sierra_port_private *portdata = usb_get_serial_port_data(port);
+
+	for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++)
+		usb_kill_urb(portdata->in_urbs[i]);
+
+	usb_kill_urb(port->interrupt_in_urb);
+}
+
+static int sierra_submit_rx_urbs(struct usb_serial_port *port, gfp_t mem_flags)
+{
+	int ok_cnt;
+	int err = -EINVAL;
+	int i;
+	struct urb *urb;
+	struct sierra_port_private *portdata = usb_get_serial_port_data(port);
+
+	ok_cnt = 0;
+	for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++) {
+		urb = portdata->in_urbs[i];
+		if (!urb)
+			continue;
+		err = usb_submit_urb(urb, mem_flags);
+		if (err) {
+			dev_err(&port->dev, "%s: submit urb failed: %d\n",
+				__func__, err);
+		} else {
+			ok_cnt++;
+		}
+	}
+
+	if (ok_cnt && port->interrupt_in_urb) {
+		err = usb_submit_urb(port->interrupt_in_urb, mem_flags);
+		if (err) {
+			dev_err(&port->dev, "%s: submit intr urb failed: %d\n",
+				__func__, err);
+		}
+	}
+
+	if (ok_cnt > 0) /* at least one rx urb submitted */
+		return 0;
+	else
+		return err;
+}
+
+static struct urb *sierra_setup_urb(struct usb_serial *serial, int endpoint,
+					int dir, void *ctx, int len,
+					gfp_t mem_flags,
+					usb_complete_t callback)
+{
+	struct urb	*urb;
+	u8		*buf;
+
+	if (endpoint == -1)
+		return NULL;
+
+	urb = usb_alloc_urb(0, mem_flags);
+	if (urb == NULL) {
+		dev_dbg(&serial->dev->dev, "%s: alloc for endpoint %d failed\n",
+			__func__, endpoint);
+		return NULL;
+	}
+
+	buf = kmalloc(len, mem_flags);
+	if (buf) {
+		/* Fill URB using supplied data */
+		usb_fill_bulk_urb(urb, serial->dev,
+			usb_sndbulkpipe(serial->dev, endpoint) | dir,
+			buf, len, callback, ctx);
+
+		/* debug */
+		dev_dbg(&serial->dev->dev, "%s %c u : %p d:%p\n", __func__,
+				dir == USB_DIR_IN ? 'i' : 'o', urb, buf);
+	} else {
+		dev_dbg(&serial->dev->dev, "%s %c u:%p d:%p\n", __func__,
+				dir == USB_DIR_IN ? 'i' : 'o', urb, buf);
+
+		sierra_release_urb(urb);
+		urb = NULL;
+	}
+
+	return urb;
+}
+
+static void sierra_close(struct usb_serial_port *port)
+{
+	int i;
+	struct usb_serial *serial = port->serial;
+	struct sierra_port_private *portdata;
+
+	dev_dbg(&port->dev, "%s\n", __func__);
+	portdata = usb_get_serial_port_data(port);
+
+	portdata->rts_state = 0;
+	portdata->dtr_state = 0;
+
+	if (serial->dev) {
+		mutex_lock(&serial->disc_mutex);
+		if (!serial->disconnected)
+			sierra_send_setup(port);
+		mutex_unlock(&serial->disc_mutex);
+
+		/* Stop reading urbs */
+		sierra_stop_rx_urbs(port);
+		/* .. and release them */
+		for (i = 0; i < N_IN_URB; i++) {
+			sierra_release_urb(portdata->in_urbs[i]);
+			portdata->in_urbs[i] = NULL;
+		}
+	}
+}
+
 static int sierra_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp)
 {
 	struct sierra_port_private *portdata;
 	struct usb_serial *serial = port->serial;
 	int i;
+	int err;
+	int endpoint;
 	struct urb *urb;
-	int result;
 
 	portdata = usb_get_serial_port_data(port);
 
@@ -574,77 +735,52 @@
 	portdata->rts_state = 1;
 	portdata->dtr_state = 1;
 
-	/* Reset low level data toggle and start reading from endpoints */
-	for (i = 0; i < N_IN_URB; i++) {
-		urb = portdata->in_urbs[i];
-		if (!urb)
-			continue;
-		if (urb->dev != serial->dev) {
-			dev_dbg(&port->dev, "%s: dev %p != %p",
-				 __func__, urb->dev, serial->dev);
-			continue;
-		}
 
-		/*
-		 * make sure endpoint data toggle is synchronized with the
-		 * device
-		 */
-		usb_clear_halt(urb->dev, urb->pipe);
-
-		result = usb_submit_urb(urb, GFP_KERNEL);
-		if (result) {
-			dev_err(&port->dev, "submit urb %d failed (%d) %d\n",
-				i, result, urb->transfer_buffer_length);
-		}
+	endpoint = port->bulk_in_endpointAddress;
+	for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++) {
+		urb = sierra_setup_urb(serial, endpoint, USB_DIR_IN, port,
+					IN_BUFLEN, GFP_KERNEL,
+					sierra_indat_callback);
+		portdata->in_urbs[i] = urb;
 	}
+	/* clear halt condition */
+	usb_clear_halt(serial->dev,
+			usb_sndbulkpipe(serial->dev, endpoint) | USB_DIR_IN);
 
-	sierra_send_setup(tty, port);
-
-	/* start up the interrupt endpoint if we have one */
-	if (port->interrupt_in_urb) {
-		result = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
-		if (result)
-			dev_err(&port->dev, "submit irq_in urb failed %d\n",
-				result);
+	err = sierra_submit_rx_urbs(port, GFP_KERNEL);
+	if (err) {
+		/* get rid of everything as in close */
+		sierra_close(port);
+		return err;
 	}
+	sierra_send_setup(port);
+
 	return 0;
 }
 
-static void sierra_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+
+static void sierra_dtr_rts(struct usb_serial_port *port, int on)
 {
-	int i;
 	struct usb_serial *serial = port->serial;
 	struct sierra_port_private *portdata;
 
-	dev_dbg(&port->dev, "%s", __func__);
 	portdata = usb_get_serial_port_data(port);
-
-	portdata->rts_state = 0;
-	portdata->dtr_state = 0;
+	portdata->rts_state = on;
+	portdata->dtr_state = on;
 
 	if (serial->dev) {
 		mutex_lock(&serial->disc_mutex);
 		if (!serial->disconnected)
-			sierra_send_setup(tty, port);
+			sierra_send_setup(port);
 		mutex_unlock(&serial->disc_mutex);
-
-		/* Stop reading/writing urbs */
-		for (i = 0; i < N_IN_URB; i++)
-			usb_kill_urb(portdata->in_urbs[i]);
 	}
-
-	usb_kill_urb(port->interrupt_in_urb);
-	tty_port_tty_set(&port->port, NULL);
 }
 
 static int sierra_startup(struct usb_serial *serial)
 {
 	struct usb_serial_port *port;
 	struct sierra_port_private *portdata;
-	struct urb *urb;
 	int i;
-	int j;
 
 	dev_dbg(&serial->dev->dev, "%s", __func__);
 
@@ -666,34 +802,8 @@
 			return -ENOMEM;
 		}
 		spin_lock_init(&portdata->lock);
-		for (j = 0; j < N_IN_URB; j++) {
-			portdata->in_buffer[j] = kmalloc(IN_BUFLEN, GFP_KERNEL);
-			if (!portdata->in_buffer[j]) {
-				for (--j; j >= 0; j--)
-					kfree(portdata->in_buffer[j]);
-				kfree(portdata);
-				return -ENOMEM;
-			}
-		}
-
+		/* Set the port private data pointer */
 		usb_set_serial_port_data(port, portdata);
-
-		/* initialize the in urbs */
-		for (j = 0; j < N_IN_URB; ++j) {
-			urb = usb_alloc_urb(0, GFP_KERNEL);
-			if (urb == NULL) {
-				dev_dbg(&port->dev, "%s: alloc for in "
-					"port failed.", __func__);
-				continue;
-			}
-			/* Fill URB using supplied data. */
-			usb_fill_bulk_urb(urb, serial->dev,
-					  usb_rcvbulkpipe(serial->dev,
-						port->bulk_in_endpointAddress),
-					  portdata->in_buffer[j], IN_BUFLEN,
-					  sierra_indat_callback, port);
-			portdata->in_urbs[j] = urb;
-		}
 	}
 
 	return 0;
@@ -701,7 +811,7 @@
 
 static void sierra_shutdown(struct usb_serial *serial)
 {
-	int i, j;
+	int i;
 	struct usb_serial_port *port;
 	struct sierra_port_private *portdata;
 
@@ -714,12 +824,6 @@
 		portdata = usb_get_serial_port_data(port);
 		if (!portdata)
 			continue;
-
-		for (j = 0; j < N_IN_URB; j++) {
-			usb_kill_urb(portdata->in_urbs[j]);
-			usb_free_urb(portdata->in_urbs[j]);
-			kfree(portdata->in_buffer[j]);
-		}
 		kfree(portdata);
 		usb_set_serial_port_data(port, NULL);
 	}
@@ -737,6 +841,7 @@
 	.probe		   = sierra_probe,
 	.open              = sierra_open,
 	.close             = sierra_close,
+	.dtr_rts	   = sierra_dtr_rts,
 	.write             = sierra_write,
 	.write_room        = sierra_write_room,
 	.set_termios       = sierra_set_termios,
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 5e7528c..8f7ed8f 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -446,66 +446,47 @@
 			"RTSCTS usb_control_msg(enable flowctrl) = %d\n", ret);
 }
 
-/* close the serial port. We should wait for data sending to device 1st and
- * then kill all urb. */
-static void spcp8x5_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int spcp8x5_carrier_raised(struct usb_serial_port *port)
+{
+	struct spcp8x5_private *priv = usb_get_serial_port_data(port);
+	if (priv->line_status & MSR_STATUS_LINE_DCD)
+		return 1;
+	return 0;
+}
+
+static void spcp8x5_dtr_rts(struct usb_serial_port *port, int on)
 {
 	struct spcp8x5_private *priv = usb_get_serial_port_data(port);
 	unsigned long flags;
-	unsigned int c_cflag;
-	int bps;
-	long timeout;
-	wait_queue_t wait;
+	u8 control;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (on)
+		priv->line_control = MCR_CONTROL_LINE_DTR
+						| MCR_CONTROL_LINE_RTS;
+	else
+		priv->line_control &= ~ (MCR_CONTROL_LINE_DTR
+						| MCR_CONTROL_LINE_RTS);
+	control = priv->line_control;
+	spin_unlock_irqrestore(&priv->lock, flags);
+	spcp8x5_set_ctrlLine(port->serial->dev, control , priv->type);
+}
+
+/* close the serial port. We should wait for data sending to device 1st and
+ * then kill all urb. */
+static void spcp8x5_close(struct usb_serial_port *port)
+{
+	struct spcp8x5_private *priv = usb_get_serial_port_data(port);
+	unsigned long flags;
 	int result;
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* wait for data to drain from the buffer */
 	spin_lock_irqsave(&priv->lock, flags);
-	timeout = SPCP8x5_CLOSING_WAIT;
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&tty->write_wait, &wait);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (ringbuf_avail_data(priv->buf) == 0 ||
-		    timeout == 0 || signal_pending(current))
-			break;
-		spin_unlock_irqrestore(&priv->lock, flags);
-		timeout = schedule_timeout(timeout);
-		spin_lock_irqsave(&priv->lock, flags);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&tty->write_wait, &wait);
-
 	/* clear out any remaining data in the buffer */
 	clear_ringbuf(priv->buf);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* wait for characters to drain from the device (this is long enough
-	 * for the entire all byte spcp8x5 hardware buffer to drain with no
-	 * flow control for data rates of 1200 bps or more, for lower rates we
-	 * should really know how much data is in the buffer to compute a delay
-	 * that is not unnecessarily long) */
-	bps = tty_get_baud_rate(tty);
-	if (bps > 1200)
-		timeout = max((HZ*2560) / bps, HZ/10);
-	else
-		timeout = 2*HZ;
-	set_current_state(TASK_INTERRUPTIBLE);
-	schedule_timeout(timeout);
-
-	/* clear control lines */
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		if (c_cflag & HUPCL) {
-			spin_lock_irqsave(&priv->lock, flags);
-			priv->line_control = 0;
-			spin_unlock_irqrestore(&priv->lock, flags);
-			spcp8x5_set_ctrlLine(port->serial->dev, 0 , priv->type);
-		}
-	}
-
 	/* kill urb */
 	if (port->write_urb != NULL) {
 		result = usb_unlink_urb(port->write_urb);
@@ -665,13 +646,6 @@
 	if (ret)
 		return ret;
 
-	spin_lock_irqsave(&priv->lock, flags);
-	if (tty && (tty->termios->c_cflag & CBAUD))
-		priv->line_control = MCR_DTR | MCR_RTS;
-	else
-		priv->line_control = 0;
-	spin_unlock_irqrestore(&priv->lock, flags);
-
 	spcp8x5_set_ctrlLine(serial->dev, priv->line_control , priv->type);
 
 	/* Setup termios */
@@ -691,9 +665,10 @@
 	port->read_urb->dev = serial->dev;
 	ret = usb_submit_urb(port->read_urb, GFP_KERNEL);
 	if (ret) {
-		spcp8x5_close(tty, port, NULL);
+		spcp8x5_close(port);
 		return -EPROTO;
 	}
+	port->port.drain_delay = 256;
 	return 0;
 }
 
@@ -1033,6 +1008,8 @@
 	.num_ports		= 1,
 	.open 			= spcp8x5_open,
 	.close 			= spcp8x5_close,
+	.dtr_rts		= spcp8x5_dtr_rts,
+	.carrier_raised		= spcp8x5_carrier_raised,
 	.write 			= spcp8x5_write,
 	.set_termios 		= spcp8x5_set_termios,
 	.ioctl 			= spcp8x5_ioctl,
diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c
index 69879e4..8b07ebc 100644
--- a/drivers/usb/serial/symbolserial.c
+++ b/drivers/usb/serial/symbolserial.c
@@ -152,8 +152,7 @@
 	return result;
 }
 
-static void symbol_close(struct tty_struct *tty, struct usb_serial_port *port,
-			  struct file *filp)
+static void symbol_close(struct usb_serial_port *port)
 {
 	struct symbol_private *priv = usb_get_serial_data(port->serial);
 
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 0a64bac..42cb04c 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -100,8 +100,7 @@
 static void ti_shutdown(struct usb_serial *serial);
 static int ti_open(struct tty_struct *tty, struct usb_serial_port *port,
 		struct file *file);
-static void ti_close(struct tty_struct *tty, struct usb_serial_port *port,
-		struct file *file);
+static void ti_close(struct usb_serial_port *port);
 static int ti_write(struct tty_struct *tty, struct usb_serial_port *port,
 		const unsigned char *data, int count);
 static int ti_write_room(struct tty_struct *tty);
@@ -647,8 +646,7 @@
 }
 
 
-static void ti_close(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *file)
+static void ti_close(struct usb_serial_port *port)
 {
 	struct ti_device *tdev;
 	struct ti_port *tport;
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index f331e2b..1967a7e 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -238,9 +238,11 @@
 			goto bailout_interface_put;
 		mutex_unlock(&serial->disc_mutex);
 	}
-
 	mutex_unlock(&port->mutex);
-	return 0;
+	/* Now do the correct tty layer semantics */
+	retval = tty_port_block_til_ready(&port->port, tty, filp);
+	if (retval == 0)
+		return 0;
 
 bailout_interface_put:
 	usb_autopm_put_interface(serial->interface);
@@ -259,64 +261,89 @@
 	return retval;
 }
 
-static void serial_close(struct tty_struct *tty, struct file *filp)
+/**
+ *	serial_do_down		-	shut down hardware
+ *	@port: port to shut down
+ *
+ *	Shut down a USB port unless it is the console. We never shut down the
+ *	console hardware as it will always be in use.
+ *
+ *	Don't free any resources at this point
+ */
+static void serial_do_down(struct usb_serial_port *port)
 {
-	struct usb_serial_port *port = tty->driver_data;
+	struct usb_serial_driver *drv = port->serial->type;
 	struct usb_serial *serial;
 	struct module *owner;
-	int count;
 
-	if (!port)
+	/* The console is magical, do not hang up the console hardware
+	   or there will be tears */
+	if (port->console)
 		return;
 
-	dbg("%s - port %d", __func__, port->number);
-
 	mutex_lock(&port->mutex);
 	serial = port->serial;
 	owner = serial->type->driver.owner;
 
-	if (port->port.count == 0) {
-		mutex_unlock(&port->mutex);
-		return;
-	}
+	if (drv->close)
+		drv->close(port);
 
-	if (port->port.count == 1)
-		/* only call the device specific close if this
-		 * port is being closed by the last owner. Ensure we do
-		 * this before we drop the port count. The call is protected
-		 * by the port mutex
-		 */
-		serial->type->close(tty, port, filp);
-
-	if (port->port.count == (port->console ? 2 : 1)) {
-		struct tty_struct *tty = tty_port_tty_get(&port->port);
-		if (tty) {
-			/* We must do this before we drop the port count to
-			   zero. */
-			if (tty->driver_data)
-				tty->driver_data = NULL;
-			tty_port_tty_set(&port->port, NULL);
-			tty_kref_put(tty);
-		}
-	}
-
-	--port->port.count;
-	count = port->port.count;
 	mutex_unlock(&port->mutex);
+}
+
+/**
+ *	serial_do_free		-	free resources post close/hangup
+ *	@port: port to free up
+ *
+ *	Do the resource freeing and refcount dropping for the port. We must
+ *	be careful about ordering and we must avoid freeing up the console.
+ */
+
+static void serial_do_free(struct usb_serial_port *port)
+{
+	struct usb_serial *serial;
+	struct module *owner;
+
+	/* The console is magical, do not hang up the console hardware
+	   or there will be tears */
+	if (port->console)
+		return;
+
+	serial = port->serial;
+	owner = serial->type->driver.owner;
 	put_device(&port->dev);
-
 	/* Mustn't dereference port any more */
-	if (count == 0) {
-		mutex_lock(&serial->disc_mutex);
-		if (!serial->disconnected)
-			usb_autopm_put_interface(serial->interface);
-		mutex_unlock(&serial->disc_mutex);
-	}
+	mutex_lock(&serial->disc_mutex);
+	if (!serial->disconnected)
+		usb_autopm_put_interface(serial->interface);
+	mutex_unlock(&serial->disc_mutex);
 	usb_serial_put(serial);
-
 	/* Mustn't dereference serial any more */
-	if (count == 0)
-		module_put(owner);
+	module_put(owner);
+}
+
+static void serial_close(struct tty_struct *tty, struct file *filp)
+{
+	struct usb_serial_port *port = tty->driver_data;
+
+	dbg("%s - port %d", __func__, port->number);
+
+
+	if (tty_port_close_start(&port->port, tty, filp) == 0)
+		return;
+
+	serial_do_down(port);		
+	tty_port_close_end(&port->port, tty);
+	tty_port_tty_set(&port->port, NULL);
+	serial_do_free(port);
+}
+
+static void serial_hangup(struct tty_struct *tty)
+{
+	struct usb_serial_port *port = tty->driver_data;
+	serial_do_down(port);
+	tty_port_hangup(&port->port);
+	serial_do_free(port);
 }
 
 static int serial_write(struct tty_struct *tty, const unsigned char *buf,
@@ -648,6 +675,29 @@
 	return NULL;
 }
 
+static int serial_carrier_raised(struct tty_port *port)
+{
+	struct usb_serial_port *p = container_of(port, struct usb_serial_port, port);
+	struct usb_serial_driver *drv = p->serial->type;
+	if (drv->carrier_raised)
+		return drv->carrier_raised(p);
+	/* No carrier control - don't block */
+	return 1;	
+}
+
+static void serial_dtr_rts(struct tty_port *port, int on)
+{
+	struct usb_serial_port *p = container_of(port, struct usb_serial_port, port);
+	struct usb_serial_driver *drv = p->serial->type;
+	if (drv->dtr_rts)
+		drv->dtr_rts(p, on);
+}
+
+static const struct tty_port_operations serial_port_ops = {
+	.carrier_raised = serial_carrier_raised,
+	.dtr_rts = serial_dtr_rts,
+};
+
 int usb_serial_probe(struct usb_interface *interface,
 			       const struct usb_device_id *id)
 {
@@ -841,6 +891,7 @@
 		if (!port)
 			goto probe_error;
 		tty_port_init(&port->port);
+		port->port.ops = &serial_port_ops;
 		port->serial = serial;
 		spin_lock_init(&port->lock);
 		mutex_init(&port->mutex);
@@ -1071,6 +1122,9 @@
 		if (port) {
 			struct tty_struct *tty = tty_port_tty_get(&port->port);
 			if (tty) {
+				/* The hangup will occur asynchronously but
+				   the object refcounts will sort out all the
+				   cleanup */
 				tty_hangup(tty);
 				tty_kref_put(tty);
 			}
@@ -1135,6 +1189,7 @@
 	.open =			serial_open,
 	.close =		serial_close,
 	.write =		serial_write,
+	.hangup = 		serial_hangup,
 	.write_room =		serial_write_room,
 	.ioctl =		serial_ioctl,
 	.set_termios =		serial_set_termios,
@@ -1147,6 +1202,7 @@
 	.proc_fops =		&serial_proc_fops,
 };
 
+
 struct tty_driver *usb_serial_tty_driver;
 
 static int __init usb_serial_init(void)
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index 5ac414b..b15f1c0 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -38,8 +38,7 @@
 /* function prototypes for a handspring visor */
 static int  visor_open(struct tty_struct *tty, struct usb_serial_port *port,
 					struct file *filp);
-static void visor_close(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp);
+static void visor_close(struct usb_serial_port *port);
 static int  visor_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
 static int  visor_write_room(struct tty_struct *tty);
@@ -324,8 +323,7 @@
 }
 
 
-static void visor_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void visor_close(struct usb_serial_port *port)
 {
 	struct visor_private *priv = usb_get_serial_port_data(port);
 	unsigned char *transfer_buffer;
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 5335d32..7c7295d 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -147,8 +147,7 @@
 static void whiteheat_shutdown(struct usb_serial *serial);
 static int  whiteheat_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void whiteheat_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void whiteheat_close(struct usb_serial_port *port);
 static int  whiteheat_write(struct tty_struct *tty,
 			struct usb_serial_port *port,
 			const unsigned char *buf, int count);
@@ -712,8 +711,7 @@
 }
 
 
-static void whiteheat_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void whiteheat_close(struct usb_serial_port *port)
 {
 	struct whiteheat_private *info = usb_get_serial_port_data(port);
 	struct whiteheat_urb_wrap *wrap;
@@ -723,31 +721,7 @@
 
 	dbg("%s - port %d", __func__, port->number);
 
-	mutex_lock(&port->serial->disc_mutex);
-	/* filp is NULL when called from usb_serial_disconnect */
-	if ((filp && (tty_hung_up_p(filp))) || port->serial->disconnected) {
-		mutex_unlock(&port->serial->disc_mutex);
-		return;
-	}
-	mutex_unlock(&port->serial->disc_mutex);
-
-	tty->closing = 1;
-
-/*
- * Not currently in use; tty_wait_until_sent() calls
- * serial_chars_in_buffer() which deadlocks on the second semaphore
- * acquisition. This should be fixed at some point. Greg's been
- * notified.
-	if ((filp->f_flags & (O_NDELAY | O_NONBLOCK)) == 0) {
-		tty_wait_until_sent(tty, CLOSING_DELAY);
-	}
-*/
-
-	tty_driver_flush_buffer(tty);
-	tty_ldisc_flush(tty);
-
 	firm_report_tx_done(port);
-
 	firm_close(port);
 
 	/* shutdown our bulk reads and writes */
@@ -775,10 +749,7 @@
 	}
 	spin_unlock_irq(&info->lock);
 	mutex_unlock(&info->deathwarrant);
-
 	stop_command_port(port->serial);
-
-	tty->closing = 0;
 }
 
 
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 4ca3b58..cfa26d5 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -132,7 +132,7 @@
 
 		if (us->fflags & US_FL_MAX_SECTORS_MIN)
 			max_sectors = PAGE_CACHE_SIZE >> 9;
-		if (sdev->request_queue->max_sectors > max_sectors)
+		if (queue_max_sectors(sdev->request_queue) > max_sectors)
 			blk_queue_max_sectors(sdev->request_queue,
 					      max_sectors);
 	} else if (sdev->type == TYPE_TAPE) {
@@ -483,7 +483,7 @@
 {
 	struct scsi_device *sdev = to_scsi_device(dev);
 
-	return sprintf(buf, "%u\n", sdev->request_queue->max_sectors);
+	return sprintf(buf, "%u\n", queue_max_sectors(sdev->request_queue));
 }
 
 /* Input routine for the sysfs max_sectors file */
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 0048f11..74712cb 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1996,7 +1996,7 @@
 
 config FB_XILINX
 	tristate "Xilinx frame buffer support"
-	depends on FB && XILINX_VIRTEX
+	depends on FB && (XILINX_VIRTEX || MICROBLAZE)
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c
index 35e8eb0..e4e4d43 100644
--- a/drivers/video/aty/aty128fb.c
+++ b/drivers/video/aty/aty128fb.c
@@ -354,7 +354,7 @@
 static int default_lcd_on __devinitdata = 1;
 
 #ifdef CONFIG_MTRR
-static int mtrr = 1;
+static bool mtrr = true;
 #endif
 
 #ifdef CONFIG_PMAC_BACKLIGHT
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 38e86b8..59d7d5e 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -180,7 +180,7 @@
 }
 
 #ifdef CONFIG_VGACON_SOFT_SCROLLBACK
-#include <linux/bootmem.h>
+#include <linux/slab.h>
 /* software scrollback */
 static void *vgacon_scrollback;
 static int vgacon_scrollback_tail;
@@ -210,8 +210,7 @@
  */
 static void __init_refok vgacon_scrollback_startup(void)
 {
-	vgacon_scrollback = alloc_bootmem(CONFIG_VGACON_SOFT_SCROLLBACK_SIZE
-					  * 1024);
+	vgacon_scrollback = kcalloc(CONFIG_VGACON_SOFT_SCROLLBACK_SIZE, 1024, GFP_NOWAIT);
 	vgacon_scrollback_init(vga_video_num_columns * 2);
 }
 
diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c
index 83c5cef..da7c01b 100644
--- a/drivers/video/cyber2000fb.c
+++ b/drivers/video/cyber2000fb.c
@@ -1736,10 +1736,8 @@
 
 #ifdef CONFIG_ARCH_SHARK
 	err = cyberpro_vl_probe();
-	if (!err) {
+	if (!err)
 		ret = 0;
-		__module_get(THIS_MODULE);
-	}
 #endif
 #ifdef CONFIG_PCI
 	err = pci_register_driver(&cyberpro_driver);
@@ -1749,14 +1747,15 @@
 
 	return ret ? err : 0;
 }
+module_init(cyber2000fb_init);
 
+#ifndef CONFIG_ARCH_SHARK
 static void __exit cyberpro_exit(void)
 {
 	pci_unregister_driver(&cyberpro_driver);
 }
-
-module_init(cyber2000fb_init);
 module_exit(cyberpro_exit);
+#endif
 
 MODULE_AUTHOR("Russell King");
 MODULE_DESCRIPTION("CyberPro 2000, 2010 and 5000 framebuffer driver");
diff --git a/drivers/video/hitfb.c b/drivers/video/hitfb.c
index e6467cf..020db7f 100644
--- a/drivers/video/hitfb.c
+++ b/drivers/video/hitfb.c
@@ -335,9 +335,9 @@
 	if (fb_get_options("hitfb", NULL))
 		return -ENODEV;
 
-	hitfb_fix.mmio_start = CONFIG_HD64461_IOBASE+0x1000;
+	hitfb_fix.mmio_start = HD64461_IO_OFFSET(0x1000);
 	hitfb_fix.mmio_len = 0x1000;
-	hitfb_fix.smem_start = CONFIG_HD64461_IOBASE + 0x02000000;
+	hitfb_fix.smem_start = HD64461_IO_OFFSET(0x02000000);
 	hitfb_fix.smem_len = 512 * 1024;
 
 	lcdclor = fb_readw(HD64461_LCDCLOR);
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index 421770b..ca5b464 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -45,7 +45,7 @@
 static int mtrr		__devinitdata = 3; /* enable mtrr by default */
 static int blank	= 1;		   /* enable blanking by default */
 static int ypan		= 1; 		 /* 0: scroll, 1: ypan, 2: ywrap */
-static int pmi_setpal	__devinitdata = 1; /* use PMI for palette changes */
+static bool pmi_setpal	__devinitdata = true; /* use PMI for palette changes */
 static int nocrtc	__devinitdata; /* ignore CRTC settings */
 static int noedid	__devinitdata; /* don't try DDC transfers */
 static int vram_remap	__devinitdata; /* set amt. of memory to be used */
@@ -2002,11 +2002,7 @@
 
 module_exit(uvesafb_exit);
 
-static int param_get_scroll(char *buffer, struct kernel_param *kp)
-{
-	return 0;
-}
-
+#define param_get_scroll NULL
 static int param_set_scroll(const char *val, struct kernel_param *kp)
 {
 	ypan = 0;
@@ -2017,6 +2013,8 @@
 		ypan = 1;
 	else if (!strcmp(val, "ywrap"))
 		ypan = 2;
+	else
+		return -EINVAL;
 
 	return 0;
 }
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 018c070..3a43ebf 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -31,21 +31,37 @@
 	return sprintf(buf, "virtio:d%08Xv%08X\n",
 		       dev->id.device, dev->id.vendor);
 }
+static ssize_t features_show(struct device *_d,
+			     struct device_attribute *attr, char *buf)
+{
+	struct virtio_device *dev = container_of(_d, struct virtio_device, dev);
+	unsigned int i;
+	ssize_t len = 0;
+
+	/* We actually represent this as a bitstring, as it could be
+	 * arbitrary length in future. */
+	for (i = 0; i < ARRAY_SIZE(dev->features)*BITS_PER_LONG; i++)
+		len += sprintf(buf+len, "%c",
+			       test_bit(i, dev->features) ? '1' : '0');
+	len += sprintf(buf+len, "\n");
+	return len;
+}
 static struct device_attribute virtio_dev_attrs[] = {
 	__ATTR_RO(device),
 	__ATTR_RO(vendor),
 	__ATTR_RO(status),
 	__ATTR_RO(modalias),
+	__ATTR_RO(features),
 	__ATTR_NULL
 };
 
 static inline int virtio_id_match(const struct virtio_device *dev,
 				  const struct virtio_device_id *id)
 {
-	if (id->device != dev->id.device)
+	if (id->device != dev->id.device && id->device != VIRTIO_DEV_ANY_ID)
 		return 0;
 
-	return id->vendor == VIRTIO_DEV_ANY_ID || id->vendor != dev->id.vendor;
+	return id->vendor == VIRTIO_DEV_ANY_ID || id->vendor == dev->id.vendor;
 }
 
 /* This looks through all the IDs a driver claims to support.  If any of them
@@ -118,13 +134,14 @@
 		if (device_features & (1 << i))
 			set_bit(i, dev->features);
 
+	dev->config->finalize_features(dev);
+
 	err = drv->probe(dev);
 	if (err)
 		add_status(dev, VIRTIO_CONFIG_S_FAILED);
-	else {
-		dev->config->finalize_features(dev);
+	else
 		add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
-	}
+
 	return err;
 }
 
@@ -185,6 +202,8 @@
 	/* Acknowledge that we've seen the device. */
 	add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
 
+	INIT_LIST_HEAD(&dev->vqs);
+
 	/* device_register() causes the bus infrastructure to look for a
 	 * matching driver. */
 	err = device_register(&dev->dev);
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 9c76a06..26b2782 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -204,6 +204,9 @@
 static int virtballoon_probe(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb;
+	struct virtqueue *vqs[2];
+	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
+	const char *names[] = { "inflate", "deflate" };
 	int err;
 
 	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
@@ -218,22 +221,17 @@
 	vb->vdev = vdev;
 
 	/* We expect two virtqueues. */
-	vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack);
-	if (IS_ERR(vb->inflate_vq)) {
-		err = PTR_ERR(vb->inflate_vq);
+	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
+	if (err)
 		goto out_free_vb;
-	}
 
-	vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack);
-	if (IS_ERR(vb->deflate_vq)) {
-		err = PTR_ERR(vb->deflate_vq);
-		goto out_del_inflate_vq;
-	}
+	vb->inflate_vq = vqs[0];
+	vb->deflate_vq = vqs[1];
 
 	vb->thread = kthread_run(balloon, vb, "vballoon");
 	if (IS_ERR(vb->thread)) {
 		err = PTR_ERR(vb->thread);
-		goto out_del_deflate_vq;
+		goto out_del_vqs;
 	}
 
 	vb->tell_host_first
@@ -241,10 +239,8 @@
 
 	return 0;
 
-out_del_deflate_vq:
-	vdev->config->del_vq(vb->deflate_vq);
-out_del_inflate_vq:
-	vdev->config->del_vq(vb->inflate_vq);
+out_del_vqs:
+	vdev->config->del_vqs(vdev);
 out_free_vb:
 	kfree(vb);
 out:
@@ -264,8 +260,7 @@
 	/* Now we reset the device so we can clean up the queues. */
 	vdev->config->reset(vdev);
 
-	vdev->config->del_vq(vb->deflate_vq);
-	vdev->config->del_vq(vb->inflate_vq);
+	vdev->config->del_vqs(vdev);
 	kfree(vb);
 }
 
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 330aacb..193c8f0 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -42,6 +42,26 @@
 	/* a list of queues so we can dispatch IRQs */
 	spinlock_t lock;
 	struct list_head virtqueues;
+
+	/* MSI-X support */
+	int msix_enabled;
+	int intx_enabled;
+	struct msix_entry *msix_entries;
+	/* Name strings for interrupts. This size should be enough,
+	 * and I'm too lazy to allocate each name separately. */
+	char (*msix_names)[256];
+	/* Number of available vectors */
+	unsigned msix_vectors;
+	/* Vectors allocated */
+	unsigned msix_used_vectors;
+};
+
+/* Constants for MSI-X */
+/* Use first vector for configuration changes, second and the rest for
+ * virtqueues Thus, we need at least 2 vectors for MSI. */
+enum {
+	VP_MSIX_CONFIG_VECTOR = 0,
+	VP_MSIX_VQ_VECTOR = 1,
 };
 
 struct virtio_pci_vq_info
@@ -60,6 +80,9 @@
 
 	/* the list node for the virtqueues list */
 	struct list_head node;
+
+	/* MSI-X vector (or none) */
+	unsigned vector;
 };
 
 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
@@ -109,7 +132,8 @@
 		   void *buf, unsigned len)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	void __iomem *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset;
+	void __iomem *ioaddr = vp_dev->ioaddr +
+				VIRTIO_PCI_CONFIG(vp_dev) + offset;
 	u8 *ptr = buf;
 	int i;
 
@@ -123,7 +147,8 @@
 		   const void *buf, unsigned len)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	void __iomem *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset;
+	void __iomem *ioaddr = vp_dev->ioaddr +
+				VIRTIO_PCI_CONFIG(vp_dev) + offset;
 	const u8 *ptr = buf;
 	int i;
 
@@ -164,37 +189,26 @@
 	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
 }
 
-/* A small wrapper to also acknowledge the interrupt when it's handled.
- * I really need an EIO hook for the vring so I can ack the interrupt once we
- * know that we'll be handling the IRQ but before we invoke the callback since
- * the callback may notify the host which results in the host attempting to
- * raise an interrupt that we would then mask once we acknowledged the
- * interrupt. */
-static irqreturn_t vp_interrupt(int irq, void *opaque)
+/* Handle a configuration change: Tell driver if it wants to know. */
+static irqreturn_t vp_config_changed(int irq, void *opaque)
+{
+	struct virtio_pci_device *vp_dev = opaque;
+	struct virtio_driver *drv;
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	if (drv && drv->config_changed)
+		drv->config_changed(&vp_dev->vdev);
+	return IRQ_HANDLED;
+}
+
+/* Notify all virtqueues on an interrupt. */
+static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
 {
 	struct virtio_pci_device *vp_dev = opaque;
 	struct virtio_pci_vq_info *info;
 	irqreturn_t ret = IRQ_NONE;
 	unsigned long flags;
-	u8 isr;
-
-	/* reading the ISR has the effect of also clearing it so it's very
-	 * important to save off the value. */
-	isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
-
-	/* It's definitely not us if the ISR was not high */
-	if (!isr)
-		return IRQ_NONE;
-
-	/* Configuration change?  Tell driver if it wants to know. */
-	if (isr & VIRTIO_PCI_ISR_CONFIG) {
-		struct virtio_driver *drv;
-		drv = container_of(vp_dev->vdev.dev.driver,
-				   struct virtio_driver, driver);
-
-		if (drv && drv->config_changed)
-			drv->config_changed(&vp_dev->vdev);
-	}
 
 	spin_lock_irqsave(&vp_dev->lock, flags);
 	list_for_each_entry(info, &vp_dev->virtqueues, node) {
@@ -206,15 +220,157 @@
 	return ret;
 }
 
-/* the config->find_vq() implementation */
+/* A small wrapper to also acknowledge the interrupt when it's handled.
+ * I really need an EIO hook for the vring so I can ack the interrupt once we
+ * know that we'll be handling the IRQ but before we invoke the callback since
+ * the callback may notify the host which results in the host attempting to
+ * raise an interrupt that we would then mask once we acknowledged the
+ * interrupt. */
+static irqreturn_t vp_interrupt(int irq, void *opaque)
+{
+	struct virtio_pci_device *vp_dev = opaque;
+	u8 isr;
+
+	/* reading the ISR has the effect of also clearing it so it's very
+	 * important to save off the value. */
+	isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
+
+	/* It's definitely not us if the ISR was not high */
+	if (!isr)
+		return IRQ_NONE;
+
+	/* Configuration change?  Tell driver if it wants to know. */
+	if (isr & VIRTIO_PCI_ISR_CONFIG)
+		vp_config_changed(irq, opaque);
+
+	return vp_vring_interrupt(irq, opaque);
+}
+
+static void vp_free_vectors(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	int i;
+
+	if (vp_dev->intx_enabled) {
+		free_irq(vp_dev->pci_dev->irq, vp_dev);
+		vp_dev->intx_enabled = 0;
+	}
+
+	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
+		free_irq(vp_dev->msix_entries[i].vector, vp_dev);
+	vp_dev->msix_used_vectors = 0;
+
+	if (vp_dev->msix_enabled) {
+		/* Disable the vector used for configuration */
+		iowrite16(VIRTIO_MSI_NO_VECTOR,
+			  vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+		/* Flush the write out to device */
+		ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+
+		vp_dev->msix_enabled = 0;
+		pci_disable_msix(vp_dev->pci_dev);
+	}
+}
+
+static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
+			  int *options, int noptions)
+{
+	int i;
+	for (i = 0; i < noptions; ++i)
+		if (!pci_enable_msix(dev, entries, options[i]))
+			return options[i];
+	return -EBUSY;
+}
+
+static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	const char *name = dev_name(&vp_dev->vdev.dev);
+	unsigned i, v;
+	int err = -ENOMEM;
+	/* We want at most one vector per queue and one for config changes.
+	 * Fallback to separate vectors for config and a shared for queues.
+	 * Finally fall back to regular interrupts. */
+	int options[] = { max_vqs + 1, 2 };
+	int nvectors = max(options[0], options[1]);
+
+	vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
+				       GFP_KERNEL);
+	if (!vp_dev->msix_entries)
+		goto error_entries;
+	vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
+				     GFP_KERNEL);
+	if (!vp_dev->msix_names)
+		goto error_names;
+
+	for (i = 0; i < nvectors; ++i)
+		vp_dev->msix_entries[i].entry = i;
+
+	err = vp_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries,
+			     options, ARRAY_SIZE(options));
+	if (err < 0) {
+		/* Can't allocate enough MSI-X vectors, use regular interrupt */
+		vp_dev->msix_vectors = 0;
+		err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
+				  IRQF_SHARED, name, vp_dev);
+		if (err)
+			goto error_irq;
+		vp_dev->intx_enabled = 1;
+	} else {
+		vp_dev->msix_vectors = err;
+		vp_dev->msix_enabled = 1;
+
+		/* Set the vector used for configuration */
+		v = vp_dev->msix_used_vectors;
+		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
+			 "%s-config", name);
+		err = request_irq(vp_dev->msix_entries[v].vector,
+				  vp_config_changed, 0, vp_dev->msix_names[v],
+				  vp_dev);
+		if (err)
+			goto error_irq;
+		++vp_dev->msix_used_vectors;
+
+		iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+		/* Verify we had enough resources to assign the vector */
+		v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+		if (v == VIRTIO_MSI_NO_VECTOR) {
+			err = -EBUSY;
+			goto error_irq;
+		}
+	}
+
+	if (vp_dev->msix_vectors && vp_dev->msix_vectors != max_vqs + 1) {
+		/* Shared vector for all VQs */
+		v = vp_dev->msix_used_vectors;
+		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
+			 "%s-virtqueues", name);
+		err = request_irq(vp_dev->msix_entries[v].vector,
+				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
+				  vp_dev);
+		if (err)
+			goto error_irq;
+		++vp_dev->msix_used_vectors;
+	}
+	return 0;
+error_irq:
+	vp_free_vectors(vdev);
+	kfree(vp_dev->msix_names);
+error_names:
+	kfree(vp_dev->msix_entries);
+error_entries:
+	return err;
+}
+
 static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
-				    void (*callback)(struct virtqueue *vq))
+				    void (*callback)(struct virtqueue *vq),
+				    const char *name)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtio_pci_vq_info *info;
 	struct virtqueue *vq;
 	unsigned long flags, size;
-	u16 num;
+	u16 num, vector;
 	int err;
 
 	/* Select the queue we're interested in */
@@ -233,6 +389,7 @@
 
 	info->queue_index = index;
 	info->num = num;
+	info->vector = VIRTIO_MSI_NO_VECTOR;
 
 	size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
 	info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
@@ -247,7 +404,7 @@
 
 	/* create the vring */
 	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
-				 vdev, info->queue, vp_notify, callback);
+				 vdev, info->queue, vp_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto out_activate_queue;
@@ -256,12 +413,43 @@
 	vq->priv = info;
 	info->vq = vq;
 
+	/* allocate per-vq vector if available and necessary */
+	if (callback && vp_dev->msix_used_vectors < vp_dev->msix_vectors) {
+		vector = vp_dev->msix_used_vectors;
+		snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names,
+			 "%s-%s", dev_name(&vp_dev->vdev.dev), name);
+		err = request_irq(vp_dev->msix_entries[vector].vector,
+				  vring_interrupt, 0,
+				  vp_dev->msix_names[vector], vq);
+		if (err)
+			goto out_request_irq;
+		info->vector = vector;
+		++vp_dev->msix_used_vectors;
+	} else
+		vector = VP_MSIX_VQ_VECTOR;
+
+	 if (callback && vp_dev->msix_enabled) {
+		iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		vector = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		if (vector == VIRTIO_MSI_NO_VECTOR) {
+			err = -EBUSY;
+			goto out_assign;
+		}
+	}
+
 	spin_lock_irqsave(&vp_dev->lock, flags);
 	list_add(&info->node, &vp_dev->virtqueues);
 	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
 	return vq;
 
+out_assign:
+	if (info->vector != VIRTIO_MSI_NO_VECTOR) {
+		free_irq(vp_dev->msix_entries[info->vector].vector, vq);
+		--vp_dev->msix_used_vectors;
+	}
+out_request_irq:
+	vring_del_virtqueue(vq);
 out_activate_queue:
 	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 	free_pages_exact(info->queue, size);
@@ -270,21 +458,27 @@
 	return ERR_PTR(err);
 }
 
-/* the config->del_vq() implementation */
 static void vp_del_vq(struct virtqueue *vq)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 	struct virtio_pci_vq_info *info = vq->priv;
-	unsigned long flags, size;
+	unsigned long size;
 
-	spin_lock_irqsave(&vp_dev->lock, flags);
-	list_del(&info->node);
-	spin_unlock_irqrestore(&vp_dev->lock, flags);
+	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+
+	if (info->vector != VIRTIO_MSI_NO_VECTOR)
+		free_irq(vp_dev->msix_entries[info->vector].vector, vq);
+
+	if (vp_dev->msix_enabled) {
+		iowrite16(VIRTIO_MSI_NO_VECTOR,
+			  vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		/* Flush the write out to device */
+		ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
+	}
 
 	vring_del_virtqueue(vq);
 
 	/* Select and deactivate the queue */
-	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN));
@@ -292,14 +486,57 @@
 	kfree(info);
 }
 
+/* the config->del_vqs() implementation */
+static void vp_del_vqs(struct virtio_device *vdev)
+{
+	struct virtqueue *vq, *n;
+
+	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
+		vp_del_vq(vq);
+
+	vp_free_vectors(vdev);
+}
+
+/* the config->find_vqs() implementation */
+static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+		       struct virtqueue *vqs[],
+		       vq_callback_t *callbacks[],
+		       const char *names[])
+{
+	int vectors = 0;
+	int i, err;
+
+	/* How many vectors would we like? */
+	for (i = 0; i < nvqs; ++i)
+		if (callbacks[i])
+			++vectors;
+
+	err = vp_request_vectors(vdev, vectors);
+	if (err)
+		goto error_request;
+
+	for (i = 0; i < nvqs; ++i) {
+		vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i]);
+		if (IS_ERR(vqs[i]))
+			goto error_find;
+	}
+	return 0;
+
+error_find:
+	vp_del_vqs(vdev);
+
+error_request:
+	return PTR_ERR(vqs[i]);
+}
+
 static struct virtio_config_ops virtio_pci_config_ops = {
 	.get		= vp_get,
 	.set		= vp_set,
 	.get_status	= vp_get_status,
 	.set_status	= vp_set_status,
 	.reset		= vp_reset,
-	.find_vq	= vp_find_vq,
-	.del_vq		= vp_del_vq,
+	.find_vqs	= vp_find_vqs,
+	.del_vqs	= vp_del_vqs,
 	.get_features	= vp_get_features,
 	.finalize_features = vp_finalize_features,
 };
@@ -310,7 +547,7 @@
 	struct virtio_pci_device *vp_dev = to_vp_device(dev);
 	struct pci_dev *pci_dev = vp_dev->pci_dev;
 
-	free_irq(pci_dev->irq, vp_dev);
+	vp_del_vqs(dev);
 	pci_set_drvdata(pci_dev, NULL);
 	pci_iounmap(pci_dev, vp_dev->ioaddr);
 	pci_release_regions(pci_dev);
@@ -369,21 +606,13 @@
 	vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
 	vp_dev->vdev.id.device = pci_dev->subsystem_device;
 
-	/* register a handler for the queue with the PCI device's interrupt */
-	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
-			  dev_name(&vp_dev->vdev.dev), vp_dev);
-	if (err)
-		goto out_set_drvdata;
-
 	/* finally register the virtio device */
 	err = register_virtio_device(&vp_dev->vdev);
 	if (err)
-		goto out_req_irq;
+		goto out_set_drvdata;
 
 	return 0;
 
-out_req_irq:
-	free_irq(pci_dev->irq, vp_dev);
 out_set_drvdata:
 	pci_set_drvdata(pci_dev, NULL);
 	pci_iounmap(pci_dev, vp_dev->ioaddr);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5c52369..a882f26 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -23,21 +23,30 @@
 
 #ifdef DEBUG
 /* For development, we want to crash whenever the ring is screwed. */
-#define BAD_RING(_vq, fmt...)			\
-	do { dev_err(&(_vq)->vq.vdev->dev, fmt); BUG(); } while(0)
+#define BAD_RING(_vq, fmt, args...)				\
+	do {							\
+		dev_err(&(_vq)->vq.vdev->dev,			\
+			"%s:"fmt, (_vq)->vq.name, ##args);	\
+		BUG();						\
+	} while (0)
 /* Caller is supposed to guarantee no reentry. */
 #define START_USE(_vq)						\
 	do {							\
 		if ((_vq)->in_use)				\
-			panic("in_use = %i\n", (_vq)->in_use);	\
+			panic("%s:in_use = %i\n",		\
+			      (_vq)->vq.name, (_vq)->in_use);	\
 		(_vq)->in_use = __LINE__;			\
 		mb();						\
-	} while(0)
+	} while (0)
 #define END_USE(_vq) \
 	do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; mb(); } while(0)
 #else
-#define BAD_RING(_vq, fmt...)			\
-	do { dev_err(&_vq->vq.vdev->dev, fmt); (_vq)->broken = true; } while(0)
+#define BAD_RING(_vq, fmt, args...)				\
+	do {							\
+		dev_err(&_vq->vq.vdev->dev,			\
+			"%s:"fmt, (_vq)->vq.name, ##args);	\
+		(_vq)->broken = true;				\
+	} while (0)
 #define START_USE(vq)
 #define END_USE(vq)
 #endif
@@ -52,6 +61,9 @@
 	/* Other side has made a mess, don't try any more. */
 	bool broken;
 
+	/* Host supports indirect buffers */
+	bool indirect;
+
 	/* Number of free buffers */
 	unsigned int num_free;
 	/* Head of free buffer list. */
@@ -76,6 +88,55 @@
 
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 
+/* Set up an indirect table of descriptors and add it to the queue. */
+static int vring_add_indirect(struct vring_virtqueue *vq,
+			      struct scatterlist sg[],
+			      unsigned int out,
+			      unsigned int in)
+{
+	struct vring_desc *desc;
+	unsigned head;
+	int i;
+
+	desc = kmalloc((out + in) * sizeof(struct vring_desc), GFP_ATOMIC);
+	if (!desc)
+		return vq->vring.num;
+
+	/* Transfer entries from the sg list into the indirect page */
+	for (i = 0; i < out; i++) {
+		desc[i].flags = VRING_DESC_F_NEXT;
+		desc[i].addr = sg_phys(sg);
+		desc[i].len = sg->length;
+		desc[i].next = i+1;
+		sg++;
+	}
+	for (; i < (out + in); i++) {
+		desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
+		desc[i].addr = sg_phys(sg);
+		desc[i].len = sg->length;
+		desc[i].next = i+1;
+		sg++;
+	}
+
+	/* Last one doesn't continue. */
+	desc[i-1].flags &= ~VRING_DESC_F_NEXT;
+	desc[i-1].next = 0;
+
+	/* We're about to use a buffer */
+	vq->num_free--;
+
+	/* Use a single buffer which doesn't continue */
+	head = vq->free_head;
+	vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
+	vq->vring.desc[head].addr = virt_to_phys(desc);
+	vq->vring.desc[head].len = i * sizeof(struct vring_desc);
+
+	/* Update free pointer */
+	vq->free_head = vq->vring.desc[head].next;
+
+	return head;
+}
+
 static int vring_add_buf(struct virtqueue *_vq,
 			 struct scatterlist sg[],
 			 unsigned int out,
@@ -85,12 +146,21 @@
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	unsigned int i, avail, head, uninitialized_var(prev);
 
+	START_USE(vq);
+
 	BUG_ON(data == NULL);
+
+	/* If the host supports indirect descriptor tables, and we have multiple
+	 * buffers, then go indirect. FIXME: tune this threshold */
+	if (vq->indirect && (out + in) > 1 && vq->num_free) {
+		head = vring_add_indirect(vq, sg, out, in);
+		if (head != vq->vring.num)
+			goto add_head;
+	}
+
 	BUG_ON(out + in > vq->vring.num);
 	BUG_ON(out + in == 0);
 
-	START_USE(vq);
-
 	if (vq->num_free < out + in) {
 		pr_debug("Can't add buf len %i - avail = %i\n",
 			 out + in, vq->num_free);
@@ -127,6 +197,7 @@
 	/* Update free pointer */
 	vq->free_head = i;
 
+add_head:
 	/* Set token. */
 	vq->data[head] = data;
 
@@ -170,6 +241,11 @@
 
 	/* Put back on free list: find end */
 	i = head;
+
+	/* Free the indirect table */
+	if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
+		kfree(phys_to_virt(vq->vring.desc[i].addr));
+
 	while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
 		i = vq->vring.desc[i].next;
 		vq->num_free++;
@@ -284,7 +360,8 @@
 				      struct virtio_device *vdev,
 				      void *pages,
 				      void (*notify)(struct virtqueue *),
-				      void (*callback)(struct virtqueue *))
+				      void (*callback)(struct virtqueue *),
+				      const char *name)
 {
 	struct vring_virtqueue *vq;
 	unsigned int i;
@@ -303,14 +380,18 @@
 	vq->vq.callback = callback;
 	vq->vq.vdev = vdev;
 	vq->vq.vq_ops = &vring_vq_ops;
+	vq->vq.name = name;
 	vq->notify = notify;
 	vq->broken = false;
 	vq->last_used_idx = 0;
 	vq->num_added = 0;
+	list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
 	vq->in_use = false;
 #endif
 
+	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
+
 	/* No callback?  Tell other side not to bother us. */
 	if (!callback)
 		vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
@@ -327,6 +408,7 @@
 
 void vring_del_virtqueue(struct virtqueue *vq)
 {
+	list_del(&vq->list);
 	kfree(to_vvq(vq));
 }
 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
@@ -338,6 +420,8 @@
 
 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
 		switch (i) {
+		case VIRTIO_RING_F_INDIRECT_DESC:
+			break;
 		default:
 			/* We don't understand this bit. */
 			clear_bit(i, vdev->features);
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 8ac9cdd..cab100a 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -18,6 +18,16 @@
 	  secure, but slightly less efficient.
 	  If in doubt, say yes.
 
+config XEN_DEV_EVTCHN
+	tristate "Xen /dev/xen/evtchn device"
+	depends on XEN
+	default y
+	help
+	  The evtchn driver allows a userspace process to triger event
+	  channels and to receive notification of an event channel
+	  firing.
+	  If in doubt, say yes.
+
 config XENFS
 	tristate "Xen filesystem"
 	depends on XEN
@@ -41,3 +51,13 @@
          a xen platform.
          If in doubt, say yes.
 
+config XEN_SYS_HYPERVISOR
+       bool "Create xen entries under /sys/hypervisor"
+       depends on XEN && SYSFS
+       select SYS_HYPERVISOR
+       default y
+       help
+         Create entries under /sys/hypervisor describing the Xen
+	 hypervisor environment.  When running native or in another
+	 virtual environment, /sys/hypervisor will still be present,
+	 but will have no xen contents.
\ No newline at end of file
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index ff8accc..ec2a39b 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -4,4 +4,6 @@
 obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
 obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
-obj-$(CONFIG_XENFS)		+= xenfs/
\ No newline at end of file
+obj-$(CONFIG_XEN_DEV_EVTCHN)	+= evtchn.o
+obj-$(CONFIG_XENFS)		+= xenfs/
+obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
\ No newline at end of file
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 30963af..891d2e9 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -151,6 +151,12 @@
 	return info_for_irq(irq)->evtchn;
 }
 
+unsigned irq_from_evtchn(unsigned int evtchn)
+{
+	return evtchn_to_irq[evtchn];
+}
+EXPORT_SYMBOL_GPL(irq_from_evtchn);
+
 static enum ipi_vector ipi_from_irq(unsigned irq)
 {
 	struct irq_info *info = info_for_irq(irq);
@@ -335,7 +341,7 @@
 	if (irq == nr_irqs)
 		panic("No available IRQ to bind to: increase nr_irqs!\n");
 
-	desc = irq_to_desc_alloc_cpu(irq, 0);
+	desc = irq_to_desc_alloc_node(irq, 0);
 	if (WARN_ON(desc == NULL))
 		return -1;
 
@@ -688,13 +694,13 @@
 }
 
 /* Rebind an evtchn so that it gets delivered to a specific cpu */
-static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
+static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 {
 	struct evtchn_bind_vcpu bind_vcpu;
 	int evtchn = evtchn_from_irq(irq);
 
 	if (!VALID_EVTCHN(evtchn))
-		return;
+		return -1;
 
 	/* Send future instances of this interrupt to other vcpu. */
 	bind_vcpu.port = evtchn;
@@ -707,13 +713,15 @@
 	 */
 	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
 		bind_evtchn_to_cpu(evtchn, tcpu);
+
+	return 0;
 }
 
-
-static void set_affinity_irq(unsigned irq, const struct cpumask *dest)
+static int set_affinity_irq(unsigned irq, const struct cpumask *dest)
 {
 	unsigned tcpu = cpumask_first(dest);
-	rebind_irq_to_cpu(irq, tcpu);
+
+	return rebind_irq_to_cpu(irq, tcpu);
 }
 
 int resend_irq_on_evtchn(unsigned int irq)
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
new file mode 100644
index 0000000..af03195
--- /dev/null
+++ b/drivers/xen/evtchn.c
@@ -0,0 +1,507 @@
+/******************************************************************************
+ * evtchn.c
+ *
+ * Driver for receiving and demuxing event-channel signals.
+ *
+ * Copyright (c) 2004-2005, K A Fraser
+ * Multi-process extensions Copyright (c) 2004, Steven Smith
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/miscdevice.h>
+#include <linux/major.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/poll.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/gfp.h>
+#include <linux/mutex.h>
+#include <linux/cpu.h>
+#include <xen/events.h>
+#include <xen/evtchn.h>
+#include <asm/xen/hypervisor.h>
+
+struct per_user_data {
+	struct mutex bind_mutex; /* serialize bind/unbind operations */
+
+	/* Notification ring, accessed via /dev/xen/evtchn. */
+#define EVTCHN_RING_SIZE     (PAGE_SIZE / sizeof(evtchn_port_t))
+#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
+	evtchn_port_t *ring;
+	unsigned int ring_cons, ring_prod, ring_overflow;
+	struct mutex ring_cons_mutex; /* protect against concurrent readers */
+
+	/* Processes wait on this queue when ring is empty. */
+	wait_queue_head_t evtchn_wait;
+	struct fasync_struct *evtchn_async_queue;
+	const char *name;
+};
+
+/* Who's bound to each port? */
+static struct per_user_data *port_user[NR_EVENT_CHANNELS];
+static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
+
+irqreturn_t evtchn_interrupt(int irq, void *data)
+{
+	unsigned int port = (unsigned long)data;
+	struct per_user_data *u;
+
+	spin_lock(&port_user_lock);
+
+	u = port_user[port];
+
+	disable_irq_nosync(irq);
+
+	if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
+		u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
+		wmb(); /* Ensure ring contents visible */
+		if (u->ring_cons == u->ring_prod++) {
+			wake_up_interruptible(&u->evtchn_wait);
+			kill_fasync(&u->evtchn_async_queue,
+				    SIGIO, POLL_IN);
+		}
+	} else {
+		u->ring_overflow = 1;
+	}
+
+	spin_unlock(&port_user_lock);
+
+	return IRQ_HANDLED;
+}
+
+static ssize_t evtchn_read(struct file *file, char __user *buf,
+			   size_t count, loff_t *ppos)
+{
+	int rc;
+	unsigned int c, p, bytes1 = 0, bytes2 = 0;
+	struct per_user_data *u = file->private_data;
+
+	/* Whole number of ports. */
+	count &= ~(sizeof(evtchn_port_t)-1);
+
+	if (count == 0)
+		return 0;
+
+	if (count > PAGE_SIZE)
+		count = PAGE_SIZE;
+
+	for (;;) {
+		mutex_lock(&u->ring_cons_mutex);
+
+		rc = -EFBIG;
+		if (u->ring_overflow)
+			goto unlock_out;
+
+		c = u->ring_cons;
+		p = u->ring_prod;
+		if (c != p)
+			break;
+
+		mutex_unlock(&u->ring_cons_mutex);
+
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		rc = wait_event_interruptible(u->evtchn_wait,
+					      u->ring_cons != u->ring_prod);
+		if (rc)
+			return rc;
+	}
+
+	/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
+	if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
+		bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
+			sizeof(evtchn_port_t);
+		bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
+	} else {
+		bytes1 = (p - c) * sizeof(evtchn_port_t);
+		bytes2 = 0;
+	}
+
+	/* Truncate chunks according to caller's maximum byte count. */
+	if (bytes1 > count) {
+		bytes1 = count;
+		bytes2 = 0;
+	} else if ((bytes1 + bytes2) > count) {
+		bytes2 = count - bytes1;
+	}
+
+	rc = -EFAULT;
+	rmb(); /* Ensure that we see the port before we copy it. */
+	if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
+	    ((bytes2 != 0) &&
+	     copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
+		goto unlock_out;
+
+	u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
+	rc = bytes1 + bytes2;
+
+ unlock_out:
+	mutex_unlock(&u->ring_cons_mutex);
+	return rc;
+}
+
+static ssize_t evtchn_write(struct file *file, const char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	int rc, i;
+	evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
+	struct per_user_data *u = file->private_data;
+
+	if (kbuf == NULL)
+		return -ENOMEM;
+
+	/* Whole number of ports. */
+	count &= ~(sizeof(evtchn_port_t)-1);
+
+	rc = 0;
+	if (count == 0)
+		goto out;
+
+	if (count > PAGE_SIZE)
+		count = PAGE_SIZE;
+
+	rc = -EFAULT;
+	if (copy_from_user(kbuf, buf, count) != 0)
+		goto out;
+
+	spin_lock_irq(&port_user_lock);
+	for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
+		if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
+			enable_irq(irq_from_evtchn(kbuf[i]));
+	spin_unlock_irq(&port_user_lock);
+
+	rc = count;
+
+ out:
+	free_page((unsigned long)kbuf);
+	return rc;
+}
+
+static int evtchn_bind_to_user(struct per_user_data *u, int port)
+{
+	int rc = 0;
+
+	/*
+	 * Ports are never reused, so every caller should pass in a
+	 * unique port.
+	 *
+	 * (Locking not necessary because we haven't registered the
+	 * interrupt handler yet, and our caller has already
+	 * serialized bind operations.)
+	 */
+	BUG_ON(port_user[port] != NULL);
+	port_user[port] = u;
+
+	rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
+				       u->name, (void *)(unsigned long)port);
+	if (rc >= 0)
+		rc = 0;
+
+	return rc;
+}
+
+static void evtchn_unbind_from_user(struct per_user_data *u, int port)
+{
+	int irq = irq_from_evtchn(port);
+
+	unbind_from_irqhandler(irq, (void *)(unsigned long)port);
+
+	/* make sure we unbind the irq handler before clearing the port */
+	barrier();
+
+	port_user[port] = NULL;
+}
+
+static long evtchn_ioctl(struct file *file,
+			 unsigned int cmd, unsigned long arg)
+{
+	int rc;
+	struct per_user_data *u = file->private_data;
+	void __user *uarg = (void __user *) arg;
+
+	/* Prevent bind from racing with unbind */
+	mutex_lock(&u->bind_mutex);
+
+	switch (cmd) {
+	case IOCTL_EVTCHN_BIND_VIRQ: {
+		struct ioctl_evtchn_bind_virq bind;
+		struct evtchn_bind_virq bind_virq;
+
+		rc = -EFAULT;
+		if (copy_from_user(&bind, uarg, sizeof(bind)))
+			break;
+
+		bind_virq.virq = bind.virq;
+		bind_virq.vcpu = 0;
+		rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+						 &bind_virq);
+		if (rc != 0)
+			break;
+
+		rc = evtchn_bind_to_user(u, bind_virq.port);
+		if (rc == 0)
+			rc = bind_virq.port;
+		break;
+	}
+
+	case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
+		struct ioctl_evtchn_bind_interdomain bind;
+		struct evtchn_bind_interdomain bind_interdomain;
+
+		rc = -EFAULT;
+		if (copy_from_user(&bind, uarg, sizeof(bind)))
+			break;
+
+		bind_interdomain.remote_dom  = bind.remote_domain;
+		bind_interdomain.remote_port = bind.remote_port;
+		rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+						 &bind_interdomain);
+		if (rc != 0)
+			break;
+
+		rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
+		if (rc == 0)
+			rc = bind_interdomain.local_port;
+		break;
+	}
+
+	case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
+		struct ioctl_evtchn_bind_unbound_port bind;
+		struct evtchn_alloc_unbound alloc_unbound;
+
+		rc = -EFAULT;
+		if (copy_from_user(&bind, uarg, sizeof(bind)))
+			break;
+
+		alloc_unbound.dom        = DOMID_SELF;
+		alloc_unbound.remote_dom = bind.remote_domain;
+		rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+						 &alloc_unbound);
+		if (rc != 0)
+			break;
+
+		rc = evtchn_bind_to_user(u, alloc_unbound.port);
+		if (rc == 0)
+			rc = alloc_unbound.port;
+		break;
+	}
+
+	case IOCTL_EVTCHN_UNBIND: {
+		struct ioctl_evtchn_unbind unbind;
+
+		rc = -EFAULT;
+		if (copy_from_user(&unbind, uarg, sizeof(unbind)))
+			break;
+
+		rc = -EINVAL;
+		if (unbind.port >= NR_EVENT_CHANNELS)
+			break;
+
+		spin_lock_irq(&port_user_lock);
+
+		rc = -ENOTCONN;
+		if (port_user[unbind.port] != u) {
+			spin_unlock_irq(&port_user_lock);
+			break;
+		}
+
+		evtchn_unbind_from_user(u, unbind.port);
+
+		spin_unlock_irq(&port_user_lock);
+
+		rc = 0;
+		break;
+	}
+
+	case IOCTL_EVTCHN_NOTIFY: {
+		struct ioctl_evtchn_notify notify;
+
+		rc = -EFAULT;
+		if (copy_from_user(&notify, uarg, sizeof(notify)))
+			break;
+
+		if (notify.port >= NR_EVENT_CHANNELS) {
+			rc = -EINVAL;
+		} else if (port_user[notify.port] != u) {
+			rc = -ENOTCONN;
+		} else {
+			notify_remote_via_evtchn(notify.port);
+			rc = 0;
+		}
+		break;
+	}
+
+	case IOCTL_EVTCHN_RESET: {
+		/* Initialise the ring to empty. Clear errors. */
+		mutex_lock(&u->ring_cons_mutex);
+		spin_lock_irq(&port_user_lock);
+		u->ring_cons = u->ring_prod = u->ring_overflow = 0;
+		spin_unlock_irq(&port_user_lock);
+		mutex_unlock(&u->ring_cons_mutex);
+		rc = 0;
+		break;
+	}
+
+	default:
+		rc = -ENOSYS;
+		break;
+	}
+	mutex_unlock(&u->bind_mutex);
+
+	return rc;
+}
+
+static unsigned int evtchn_poll(struct file *file, poll_table *wait)
+{
+	unsigned int mask = POLLOUT | POLLWRNORM;
+	struct per_user_data *u = file->private_data;
+
+	poll_wait(file, &u->evtchn_wait, wait);
+	if (u->ring_cons != u->ring_prod)
+		mask |= POLLIN | POLLRDNORM;
+	if (u->ring_overflow)
+		mask = POLLERR;
+	return mask;
+}
+
+static int evtchn_fasync(int fd, struct file *filp, int on)
+{
+	struct per_user_data *u = filp->private_data;
+	return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
+}
+
+static int evtchn_open(struct inode *inode, struct file *filp)
+{
+	struct per_user_data *u;
+
+	u = kzalloc(sizeof(*u), GFP_KERNEL);
+	if (u == NULL)
+		return -ENOMEM;
+
+	u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm);
+	if (u->name == NULL) {
+		kfree(u);
+		return -ENOMEM;
+	}
+
+	init_waitqueue_head(&u->evtchn_wait);
+
+	u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
+	if (u->ring == NULL) {
+		kfree(u->name);
+		kfree(u);
+		return -ENOMEM;
+	}
+
+	mutex_init(&u->bind_mutex);
+	mutex_init(&u->ring_cons_mutex);
+
+	filp->private_data = u;
+
+	return 0;
+}
+
+static int evtchn_release(struct inode *inode, struct file *filp)
+{
+	int i;
+	struct per_user_data *u = filp->private_data;
+
+	spin_lock_irq(&port_user_lock);
+
+	free_page((unsigned long)u->ring);
+
+	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+		if (port_user[i] != u)
+			continue;
+
+		evtchn_unbind_from_user(port_user[i], i);
+	}
+
+	spin_unlock_irq(&port_user_lock);
+
+	kfree(u->name);
+	kfree(u);
+
+	return 0;
+}
+
+static const struct file_operations evtchn_fops = {
+	.owner   = THIS_MODULE,
+	.read    = evtchn_read,
+	.write   = evtchn_write,
+	.unlocked_ioctl = evtchn_ioctl,
+	.poll    = evtchn_poll,
+	.fasync  = evtchn_fasync,
+	.open    = evtchn_open,
+	.release = evtchn_release,
+};
+
+static struct miscdevice evtchn_miscdev = {
+	.minor        = MISC_DYNAMIC_MINOR,
+	.name         = "evtchn",
+	.fops         = &evtchn_fops,
+};
+static int __init evtchn_init(void)
+{
+	int err;
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	spin_lock_init(&port_user_lock);
+	memset(port_user, 0, sizeof(port_user));
+
+	/* Create '/dev/misc/evtchn'. */
+	err = misc_register(&evtchn_miscdev);
+	if (err != 0) {
+		printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+		return err;
+	}
+
+	printk(KERN_INFO "Event-channel device installed.\n");
+
+	return 0;
+}
+
+static void __exit evtchn_cleanup(void)
+{
+	misc_deregister(&evtchn_miscdev);
+}
+
+module_init(evtchn_init);
+module_exit(evtchn_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 4b5b848..10d03d7 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -43,7 +43,7 @@
 	if (err) {
 		printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n",
 			err);
-		device_power_up(PMSG_RESUME);
+		dpm_resume_noirq(PMSG_RESUME);
 		return err;
 	}
 
@@ -69,7 +69,7 @@
 	}
 
 	sysdev_resume();
-	device_power_up(PMSG_RESUME);
+	dpm_resume_noirq(PMSG_RESUME);
 
 	return 0;
 }
@@ -92,19 +92,18 @@
 	}
 #endif
 
-	err = device_suspend(PMSG_SUSPEND);
+	err = dpm_suspend_start(PMSG_SUSPEND);
 	if (err) {
-		printk(KERN_ERR "xen suspend: device_suspend %d\n", err);
+		printk(KERN_ERR "xen suspend: dpm_suspend_start %d\n", err);
 		goto out;
 	}
 
-	printk("suspending xenbus...\n");
-	/* XXX use normal device tree? */
-	xenbus_suspend();
+	printk(KERN_DEBUG "suspending xenstore...\n");
+	xs_suspend();
 
-	err = device_power_down(PMSG_SUSPEND);
+	err = dpm_suspend_noirq(PMSG_SUSPEND);
 	if (err) {
-		printk(KERN_ERR "device_power_down failed: %d\n", err);
+		printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err);
 		goto resume_devices;
 	}
 
@@ -116,14 +115,14 @@
 
 	if (!cancelled) {
 		xen_arch_resume();
-		xenbus_resume();
+		xs_resume();
 	} else
-		xenbus_suspend_cancel();
+		xs_suspend_cancel();
 
-	device_power_up(PMSG_RESUME);
+	dpm_resume_noirq(PMSG_RESUME);
 
 resume_devices:
-	device_resume(PMSG_RESUME);
+	dpm_resume_end(PMSG_RESUME);
 
 	/* Make sure timer events get retriggered on all CPUs */
 	clock_was_set();
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
new file mode 100644
index 0000000..88a60e0
--- /dev/null
+++ b/drivers/xen/sys-hypervisor.c
@@ -0,0 +1,445 @@
+/*
+ *  copyright (c) 2006 IBM Corporation
+ *  Authored by: Mike D. Day <ncmike@us.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kobject.h>
+
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+#include <xen/xenbus.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/version.h>
+
+#define HYPERVISOR_ATTR_RO(_name) \
+static struct hyp_sysfs_attr  _name##_attr = __ATTR_RO(_name)
+
+#define HYPERVISOR_ATTR_RW(_name) \
+static struct hyp_sysfs_attr _name##_attr = \
+	__ATTR(_name, 0644, _name##_show, _name##_store)
+
+struct hyp_sysfs_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct hyp_sysfs_attr *, char *);
+	ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t);
+	void *hyp_attr_data;
+};
+
+static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	return sprintf(buffer, "xen\n");
+}
+
+HYPERVISOR_ATTR_RO(type);
+
+static int __init xen_sysfs_type_init(void)
+{
+	return sysfs_create_file(hypervisor_kobj, &type_attr.attr);
+}
+
+static void xen_sysfs_type_destroy(void)
+{
+	sysfs_remove_file(hypervisor_kobj, &type_attr.attr);
+}
+
+/* xen version attributes */
+static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int version = HYPERVISOR_xen_version(XENVER_version, NULL);
+	if (version)
+		return sprintf(buffer, "%d\n", version >> 16);
+	return -ENODEV;
+}
+
+HYPERVISOR_ATTR_RO(major);
+
+static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int version = HYPERVISOR_xen_version(XENVER_version, NULL);
+	if (version)
+		return sprintf(buffer, "%d\n", version & 0xff);
+	return -ENODEV;
+}
+
+HYPERVISOR_ATTR_RO(minor);
+
+static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	char *extra;
+
+	extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL);
+	if (extra) {
+		ret = HYPERVISOR_xen_version(XENVER_extraversion, extra);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", extra);
+		kfree(extra);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(extra);
+
+static struct attribute *version_attrs[] = {
+	&major_attr.attr,
+	&minor_attr.attr,
+	&extra_attr.attr,
+	NULL
+};
+
+static struct attribute_group version_group = {
+	.name = "version",
+	.attrs = version_attrs,
+};
+
+static int __init xen_sysfs_version_init(void)
+{
+	return sysfs_create_group(hypervisor_kobj, &version_group);
+}
+
+static void xen_sysfs_version_destroy(void)
+{
+	sysfs_remove_group(hypervisor_kobj, &version_group);
+}
+
+/* UUID */
+
+static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	char *vm, *val;
+	int ret;
+	extern int xenstored_ready;
+
+	if (!xenstored_ready)
+		return -EBUSY;
+
+	vm = xenbus_read(XBT_NIL, "vm", "", NULL);
+	if (IS_ERR(vm))
+		return PTR_ERR(vm);
+	val = xenbus_read(XBT_NIL, vm, "uuid", NULL);
+	kfree(vm);
+	if (IS_ERR(val))
+		return PTR_ERR(val);
+	ret = sprintf(buffer, "%s\n", val);
+	kfree(val);
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(uuid);
+
+static int __init xen_sysfs_uuid_init(void)
+{
+	return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr);
+}
+
+static void xen_sysfs_uuid_destroy(void)
+{
+	sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr);
+}
+
+/* xen compilation attributes */
+
+static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_compile_info *info;
+
+	info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
+	if (info) {
+		ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", info->compiler);
+		kfree(info);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(compiler);
+
+static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_compile_info *info;
+
+	info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
+	if (info) {
+		ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", info->compile_by);
+		kfree(info);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(compiled_by);
+
+static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_compile_info *info;
+
+	info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
+	if (info) {
+		ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", info->compile_date);
+		kfree(info);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(compile_date);
+
+static struct attribute *xen_compile_attrs[] = {
+	&compiler_attr.attr,
+	&compiled_by_attr.attr,
+	&compile_date_attr.attr,
+	NULL
+};
+
+static struct attribute_group xen_compilation_group = {
+	.name = "compilation",
+	.attrs = xen_compile_attrs,
+};
+
+int __init static xen_compilation_init(void)
+{
+	return sysfs_create_group(hypervisor_kobj, &xen_compilation_group);
+}
+
+static void xen_compilation_destroy(void)
+{
+	sysfs_remove_group(hypervisor_kobj, &xen_compilation_group);
+}
+
+/* xen properties info */
+
+static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	char *caps;
+
+	caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL);
+	if (caps) {
+		ret = HYPERVISOR_xen_version(XENVER_capabilities, caps);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", caps);
+		kfree(caps);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(capabilities);
+
+static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	char *cset;
+
+	cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL);
+	if (cset) {
+		ret = HYPERVISOR_xen_version(XENVER_changeset, cset);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", cset);
+		kfree(cset);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(changeset);
+
+static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_platform_parameters *parms;
+
+	parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL);
+	if (parms) {
+		ret = HYPERVISOR_xen_version(XENVER_platform_parameters,
+					     parms);
+		if (!ret)
+			ret = sprintf(buffer, "%lx\n", parms->virt_start);
+		kfree(parms);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(virtual_start);
+
+static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret;
+
+	ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL);
+	if (ret > 0)
+		ret = sprintf(buffer, "%x\n", ret);
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(pagesize);
+
+static ssize_t xen_feature_show(int index, char *buffer)
+{
+	ssize_t ret;
+	struct xen_feature_info info;
+
+	info.submap_idx = index;
+	ret = HYPERVISOR_xen_version(XENVER_get_features, &info);
+	if (!ret)
+		ret = sprintf(buffer, "%08x", info.submap);
+
+	return ret;
+}
+
+static ssize_t features_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	ssize_t len;
+	int i;
+
+	len = 0;
+	for (i = XENFEAT_NR_SUBMAPS-1; i >= 0; i--) {
+		int ret = xen_feature_show(i, buffer + len);
+		if (ret < 0) {
+			if (len == 0)
+				len = ret;
+			break;
+		}
+		len += ret;
+	}
+	if (len > 0)
+		buffer[len++] = '\n';
+
+	return len;
+}
+
+HYPERVISOR_ATTR_RO(features);
+
+static struct attribute *xen_properties_attrs[] = {
+	&capabilities_attr.attr,
+	&changeset_attr.attr,
+	&virtual_start_attr.attr,
+	&pagesize_attr.attr,
+	&features_attr.attr,
+	NULL
+};
+
+static struct attribute_group xen_properties_group = {
+	.name = "properties",
+	.attrs = xen_properties_attrs,
+};
+
+static int __init xen_properties_init(void)
+{
+	return sysfs_create_group(hypervisor_kobj, &xen_properties_group);
+}
+
+static void xen_properties_destroy(void)
+{
+	sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
+}
+
+static int __init hyper_sysfs_init(void)
+{
+	int ret;
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	ret = xen_sysfs_type_init();
+	if (ret)
+		goto out;
+	ret = xen_sysfs_version_init();
+	if (ret)
+		goto version_out;
+	ret = xen_compilation_init();
+	if (ret)
+		goto comp_out;
+	ret = xen_sysfs_uuid_init();
+	if (ret)
+		goto uuid_out;
+	ret = xen_properties_init();
+	if (ret)
+		goto prop_out;
+
+	goto out;
+
+prop_out:
+	xen_sysfs_uuid_destroy();
+uuid_out:
+	xen_compilation_destroy();
+comp_out:
+	xen_sysfs_version_destroy();
+version_out:
+	xen_sysfs_type_destroy();
+out:
+	return ret;
+}
+
+static void __exit hyper_sysfs_exit(void)
+{
+	xen_properties_destroy();
+	xen_compilation_destroy();
+	xen_sysfs_uuid_destroy();
+	xen_sysfs_version_destroy();
+	xen_sysfs_type_destroy();
+
+}
+module_init(hyper_sysfs_init);
+module_exit(hyper_sysfs_exit);
+
+static ssize_t hyp_sysfs_show(struct kobject *kobj,
+			      struct attribute *attr,
+			      char *buffer)
+{
+	struct hyp_sysfs_attr *hyp_attr;
+	hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
+	if (hyp_attr->show)
+		return hyp_attr->show(hyp_attr, buffer);
+	return 0;
+}
+
+static ssize_t hyp_sysfs_store(struct kobject *kobj,
+			       struct attribute *attr,
+			       const char *buffer,
+			       size_t len)
+{
+	struct hyp_sysfs_attr *hyp_attr;
+	hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
+	if (hyp_attr->store)
+		return hyp_attr->store(hyp_attr, buffer, len);
+	return 0;
+}
+
+static struct sysfs_ops hyp_sysfs_ops = {
+	.show = hyp_sysfs_show,
+	.store = hyp_sysfs_store,
+};
+
+static struct kobj_type hyp_sysfs_kobj_type = {
+	.sysfs_ops = &hyp_sysfs_ops,
+};
+
+static int __init hypervisor_subsys_init(void)
+{
+	if (!xen_domain())
+		return -ENODEV;
+
+	hypervisor_kobj->ktype = &hyp_sysfs_kobj_type;
+	return 0;
+}
+device_initcall(hypervisor_subsys_init);
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 773d1cf..d42e25d 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -71,6 +71,9 @@
 
 static void xenbus_dev_shutdown(struct device *_dev);
 
+static int xenbus_dev_suspend(struct device *dev, pm_message_t state);
+static int xenbus_dev_resume(struct device *dev);
+
 /* If something in array of ids matches this device, return it. */
 static const struct xenbus_device_id *
 match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
@@ -188,6 +191,9 @@
 		.remove    = xenbus_dev_remove,
 		.shutdown  = xenbus_dev_shutdown,
 		.dev_attrs = xenbus_dev_attrs,
+
+		.suspend   = xenbus_dev_suspend,
+		.resume    = xenbus_dev_resume,
 	},
 };
 
@@ -654,6 +660,7 @@
 
 	kfree(root);
 }
+EXPORT_SYMBOL_GPL(xenbus_dev_changed);
 
 static void frontend_changed(struct xenbus_watch *watch,
 			     const char **vec, unsigned int len)
@@ -669,7 +676,7 @@
 	.callback = frontend_changed,
 };
 
-static int suspend_dev(struct device *dev, void *data)
+static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
 {
 	int err = 0;
 	struct xenbus_driver *drv;
@@ -682,35 +689,14 @@
 	drv = to_xenbus_driver(dev->driver);
 	xdev = container_of(dev, struct xenbus_device, dev);
 	if (drv->suspend)
-		err = drv->suspend(xdev);
+		err = drv->suspend(xdev, state);
 	if (err)
 		printk(KERN_WARNING
 		       "xenbus: suspend %s failed: %i\n", dev_name(dev), err);
 	return 0;
 }
 
-static int suspend_cancel_dev(struct device *dev, void *data)
-{
-	int err = 0;
-	struct xenbus_driver *drv;
-	struct xenbus_device *xdev;
-
-	DPRINTK("");
-
-	if (dev->driver == NULL)
-		return 0;
-	drv = to_xenbus_driver(dev->driver);
-	xdev = container_of(dev, struct xenbus_device, dev);
-	if (drv->suspend_cancel)
-		err = drv->suspend_cancel(xdev);
-	if (err)
-		printk(KERN_WARNING
-		       "xenbus: suspend_cancel %s failed: %i\n",
-		       dev_name(dev), err);
-	return 0;
-}
-
-static int resume_dev(struct device *dev, void *data)
+static int xenbus_dev_resume(struct device *dev)
 {
 	int err;
 	struct xenbus_driver *drv;
@@ -755,33 +741,6 @@
 	return 0;
 }
 
-void xenbus_suspend(void)
-{
-	DPRINTK("");
-
-	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
-	xenbus_backend_suspend(suspend_dev);
-	xs_suspend();
-}
-EXPORT_SYMBOL_GPL(xenbus_suspend);
-
-void xenbus_resume(void)
-{
-	xb_init_comms();
-	xs_resume();
-	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
-	xenbus_backend_resume(resume_dev);
-}
-EXPORT_SYMBOL_GPL(xenbus_resume);
-
-void xenbus_suspend_cancel(void)
-{
-	xs_suspend_cancel();
-	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev);
-	xenbus_backend_resume(suspend_cancel_dev);
-}
-EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
-
 /* A flag to determine if xenstored is 'ready' (i.e. has started) */
 int xenstored_ready = 0;
 
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index e325eab..eab33f1 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -673,6 +673,8 @@
 	struct xenbus_watch *watch;
 	char token[sizeof(watch) * 2 + 1];
 
+	xb_init_comms();
+
 	mutex_unlock(&xs_state.response_mutex);
 	mutex_unlock(&xs_state.request_mutex);
 	up_write(&xs_state.transaction_mutex);
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 515741a..6559e0c 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -20,10 +20,27 @@
 MODULE_DESCRIPTION("Xen filesystem");
 MODULE_LICENSE("GPL");
 
+static ssize_t capabilities_read(struct file *file, char __user *buf,
+				 size_t size, loff_t *off)
+{
+	char *tmp = "";
+
+	if (xen_initial_domain())
+		tmp = "control_d\n";
+
+	return simple_read_from_buffer(buf, size, off, tmp, strlen(tmp));
+}
+
+static const struct file_operations capabilities_file_ops = {
+	.read = capabilities_read,
+};
+
 static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	static struct tree_descr xenfs_files[] = {
-		[2] = {"xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR},
+		[1] = {},
+		{ "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
+		{ "capabilities", &capabilities_file_ops, S_IRUGO },
 		{""},
 	};
 
diff --git a/fs/Kconfig b/fs/Kconfig
index 9f7270f..525da2e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -62,6 +62,16 @@
 source "fs/autofs4/Kconfig"
 source "fs/fuse/Kconfig"
 
+config CUSE
+	tristate "Character device in Userpace support"
+	depends on FUSE_FS
+	help
+	  This FUSE extension allows character devices to be
+	  implemented in userspace.
+
+	  If you want to develop or use userspace character device
+	  based on CUSE, answer Y or M.
+
 config GENERIC_ACL
 	bool
 	select FS_POSIX_ACL
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index e0a85db..a6665f3 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -53,6 +53,7 @@
 	int	(*update)(struct adfs_dir *dir, struct object_info *obj);
 	int	(*create)(struct adfs_dir *dir, struct object_info *obj);
 	int	(*remove)(struct adfs_dir *dir, struct object_info *obj);
+	int	(*sync)(struct adfs_dir *dir);
 	void	(*free)(struct adfs_dir *dir);
 };
 
@@ -90,7 +91,8 @@
 extern struct adfs_dir_ops adfs_f_dir_ops;
 extern struct adfs_dir_ops adfs_fplus_dir_ops;
 
-extern int adfs_dir_update(struct super_block *sb, struct object_info *obj);
+extern int adfs_dir_update(struct super_block *sb, struct object_info *obj,
+			   int wait);
 
 /* file.c */
 extern const struct inode_operations adfs_file_inode_operations;
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index e867ccf..4d40734 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -83,7 +83,7 @@
 }
 
 int
-adfs_dir_update(struct super_block *sb, struct object_info *obj)
+adfs_dir_update(struct super_block *sb, struct object_info *obj, int wait)
 {
 	int ret = -EINVAL;
 #ifdef CONFIG_ADFS_FS_RW
@@ -106,6 +106,12 @@
 	ret = ops->update(&dir, obj);
 	write_unlock(&adfs_dir_lock);
 
+	if (wait) {
+		int err = ops->sync(&dir);
+		if (!ret)
+			ret = err;
+	}
+
 	ops->free(&dir);
 out:
 #endif
@@ -199,7 +205,7 @@
 	.read		= generic_read_dir,
 	.llseek		= generic_file_llseek,
 	.readdir	= adfs_readdir,
-	.fsync		= file_fsync,
+	.fsync		= simple_fsync,
 };
 
 static int
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index ea7df21..31df6ad 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -437,6 +437,22 @@
 #endif
 }
 
+static int
+adfs_f_sync(struct adfs_dir *dir)
+{
+	int err = 0;
+	int i;
+
+	for (i = dir->nr_buffers - 1; i >= 0; i--) {
+		struct buffer_head *bh = dir->bh[i];
+		sync_dirty_buffer(bh);
+		if (buffer_req(bh) && !buffer_uptodate(bh))
+			err = -EIO;
+	}
+
+	return err;
+}
+
 static void
 adfs_f_free(struct adfs_dir *dir)
 {
@@ -456,5 +472,6 @@
 	.setpos		= adfs_f_setpos,
 	.getnext	= adfs_f_getnext,
 	.update		= adfs_f_update,
+	.sync		= adfs_f_sync,
 	.free		= adfs_f_free
 };
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index 1ec644e..139e0f3 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -161,6 +161,22 @@
 	return ret;
 }
 
+static int
+adfs_fplus_sync(struct adfs_dir *dir)
+{
+	int err = 0;
+	int i;
+
+	for (i = dir->nr_buffers - 1; i >= 0; i--) {
+		struct buffer_head *bh = dir->bh[i];
+		sync_dirty_buffer(bh);
+		if (buffer_req(bh) && !buffer_uptodate(bh))
+			err = -EIO;
+	}
+
+	return err;
+}
+
 static void
 adfs_fplus_free(struct adfs_dir *dir)
 {
@@ -175,5 +191,6 @@
 	.read		= adfs_fplus_read,
 	.setpos		= adfs_fplus_setpos,
 	.getnext	= adfs_fplus_getnext,
+	.sync		= adfs_fplus_sync,
 	.free		= adfs_fplus_free
 };
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 36e381c..8224d54 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -30,7 +30,7 @@
 	.read		= do_sync_read,
 	.aio_read	= generic_file_aio_read,
 	.mmap		= generic_file_mmap,
-	.fsync		= file_fsync,
+	.fsync		= simple_fsync,
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
 	.splice_read	= generic_file_splice_read,
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index e647200..05b3a67 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -376,7 +376,7 @@
  * The adfs-specific inode data has already been updated by
  * adfs_notify_change()
  */
-int adfs_write_inode(struct inode *inode, int unused)
+int adfs_write_inode(struct inode *inode, int wait)
 {
 	struct super_block *sb = inode->i_sb;
 	struct object_info obj;
@@ -391,7 +391,7 @@
 	obj.attr	= ADFS_I(inode)->attr;
 	obj.size	= inode->i_size;
 
-	ret = adfs_dir_update(sb, &obj);
+	ret = adfs_dir_update(sb, &obj, wait);
 	unlock_kernel();
 	return ret;
 }
diff --git a/fs/adfs/map.c b/fs/adfs/map.c
index 92ab4fb..568081b 100644
--- a/fs/adfs/map.c
+++ b/fs/adfs/map.c
@@ -62,7 +62,7 @@
 #define GET_FRAG_ID(_map,_start,_idmask)				\
 	({								\
 		unsigned char *_m = _map + (_start >> 3);		\
-		u32 _frag = get_unaligned((u32 *)_m);			\
+		u32 _frag = get_unaligned_le32(_m);			\
 		_frag >>= (_start & 7);					\
 		_frag & _idmask;					\
 	})
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index dd9becc..0ec5aaf 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -132,11 +132,15 @@
 	int i;
 	struct adfs_sb_info *asb = ADFS_SB(sb);
 
+	lock_kernel();
+
 	for (i = 0; i < asb->s_map_size; i++)
 		brelse(asb->s_map[i].dm_bh);
 	kfree(asb->s_map);
 	kfree(asb);
 	sb->s_fs_info = NULL;
+
+	unlock_kernel();
 }
 
 static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 1a2d5e3..e511dc6 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -182,6 +182,7 @@
 
 void		affs_free_prealloc(struct inode *inode);
 extern void	affs_truncate(struct inode *);
+int		affs_file_fsync(struct file *, struct dentry *, int);
 
 /* dir.c */
 
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 7b36904..8ca8f3a 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -21,7 +21,7 @@
 	.read		= generic_read_dir,
 	.llseek		= generic_file_llseek,
 	.readdir	= affs_readdir,
-	.fsync		= file_fsync,
+	.fsync		= affs_file_fsync,
 };
 
 /*
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 9246cb4..184e55c 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -34,7 +34,7 @@
 	.mmap		= generic_file_mmap,
 	.open		= affs_file_open,
 	.release	= affs_file_release,
-	.fsync		= file_fsync,
+	.fsync		= affs_file_fsync,
 	.splice_read	= generic_file_splice_read,
 };
 
@@ -915,3 +915,15 @@
 	}
 	affs_free_prealloc(inode);
 }
+
+int affs_file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+{
+	struct inode * inode = dentry->d_inode;
+	int ret, err;
+
+	ret = write_inode_now(inode, 0);
+	err = sync_blockdev(inode->i_sb->s_bdev);
+	if (!ret)
+		ret = err;
+	return ret;
+}
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 63f5183..104fdcb 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -16,6 +16,7 @@
 #include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include "affs.h"
 
 extern struct timezone sys_tz;
@@ -24,49 +25,67 @@
 static int affs_remount (struct super_block *sb, int *flags, char *data);
 
 static void
+affs_commit_super(struct super_block *sb, int clean)
+{
+	struct affs_sb_info *sbi = AFFS_SB(sb);
+	struct buffer_head *bh = sbi->s_root_bh;
+	struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh);
+
+	tail->bm_flag = cpu_to_be32(clean);
+	secs_to_datestamp(get_seconds(), &tail->disk_change);
+	affs_fix_checksum(sb, bh);
+	mark_buffer_dirty(bh);
+}
+
+static void
 affs_put_super(struct super_block *sb)
 {
 	struct affs_sb_info *sbi = AFFS_SB(sb);
 	pr_debug("AFFS: put_super()\n");
 
-	if (!(sb->s_flags & MS_RDONLY)) {
-		AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag = cpu_to_be32(1);
-		secs_to_datestamp(get_seconds(),
-				  &AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->disk_change);
-		affs_fix_checksum(sb, sbi->s_root_bh);
-		mark_buffer_dirty(sbi->s_root_bh);
-	}
+	lock_kernel();
+
+	if (!(sb->s_flags & MS_RDONLY))
+		affs_commit_super(sb, 1);
 
 	kfree(sbi->s_prefix);
 	affs_free_bitmap(sb);
 	affs_brelse(sbi->s_root_bh);
 	kfree(sbi);
 	sb->s_fs_info = NULL;
-	return;
+
+	unlock_kernel();
 }
 
 static void
 affs_write_super(struct super_block *sb)
 {
 	int clean = 2;
-	struct affs_sb_info *sbi = AFFS_SB(sb);
 
+	lock_super(sb);
 	if (!(sb->s_flags & MS_RDONLY)) {
 		//	if (sbi->s_bitmap[i].bm_bh) {
 		//		if (buffer_dirty(sbi->s_bitmap[i].bm_bh)) {
 		//			clean = 0;
-		AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag = cpu_to_be32(clean);
-		secs_to_datestamp(get_seconds(),
-				  &AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->disk_change);
-		affs_fix_checksum(sb, sbi->s_root_bh);
-		mark_buffer_dirty(sbi->s_root_bh);
+		affs_commit_super(sb, clean);
 		sb->s_dirt = !clean;	/* redo until bitmap synced */
 	} else
 		sb->s_dirt = 0;
+	unlock_super(sb);
 
 	pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean);
 }
 
+static int
+affs_sync_fs(struct super_block *sb, int wait)
+{
+	lock_super(sb);
+	affs_commit_super(sb, 2);
+	sb->s_dirt = 0;
+	unlock_super(sb);
+	return 0;
+}
+
 static struct kmem_cache * affs_inode_cachep;
 
 static struct inode *affs_alloc_inode(struct super_block *sb)
@@ -124,6 +143,7 @@
 	.clear_inode	= affs_clear_inode,
 	.put_super	= affs_put_super,
 	.write_super	= affs_write_super,
+	.sync_fs	= affs_sync_fs,
 	.statfs		= affs_statfs,
 	.remount_fs	= affs_remount,
 	.show_options	= generic_show_options,
@@ -507,6 +527,7 @@
 		kfree(new_opts);
 		return -EINVAL;
 	}
+	lock_kernel();
 	replace_mount_options(sb, new_opts);
 
 	sbi->s_flags = mount_flags;
@@ -514,8 +535,10 @@
 	sbi->s_uid   = uid;
 	sbi->s_gid   = gid;
 
-	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
+		unlock_kernel();
 		return 0;
+	}
 	if (*flags & MS_RDONLY) {
 		sb->s_dirt = 1;
 		while (sb->s_dirt)
@@ -524,6 +547,7 @@
 	} else
 		res = affs_init_bitmap(sb, flags);
 
+	unlock_kernel();
 	return res;
 }
 
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 2b9e2d0..c52be53 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -244,7 +244,7 @@
 	case -EBUSY:
 		/* someone else made a mount here whilst we were busy */
 		while (d_mountpoint(nd->path.dentry) &&
-		       follow_down(&nd->path.mnt, &nd->path.dentry))
+		       follow_down(&nd->path))
 			;
 		err = 0;
 	default:
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 76828e5..ad0514d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -440,8 +440,12 @@
 
 	_enter("");
 
+	lock_kernel();
+
 	afs_put_volume(as->volume);
 
+	unlock_kernel();
+
 	_leave("");
 }
 
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 4eb4d8d..2316e94 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -85,13 +85,12 @@
 		}
 		path.mnt = mnt;
 		path_get(&path);
-		if (!follow_down(&path.mnt, &path.dentry)) {
+		if (!follow_down(&path)) {
 			path_put(&path);
 			DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
 			continue;
 		}
-		while (d_mountpoint(path.dentry) &&
-		       follow_down(&path.mnt, &path.dentry))
+		while (d_mountpoint(path.dentry) && follow_down(&path));
 			;
 		umount_ok = may_umount(path.mnt);
 		path_put(&path);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index b7ff33c..8f7cdde 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -223,12 +223,12 @@
 int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
 void autofs4_catatonic_mode(struct autofs_sb_info *);
 
-static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **dentry)
+static inline int autofs4_follow_mount(struct path *path)
 {
 	int res = 0;
 
-	while (d_mountpoint(*dentry)) {
-		int followed = follow_down(mnt, dentry);
+	while (d_mountpoint(path->dentry)) {
+		int followed = follow_down(path);
 		if (!followed)
 			break;
 		res = 1;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 84168c0..f3da2eb 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -192,77 +192,42 @@
 	return 0;
 }
 
-/*
- * Walk down the mount stack looking for an autofs mount that
- * has the requested device number (aka. new_encode_dev(sb->s_dev).
- */
-static int autofs_dev_ioctl_find_super(struct nameidata *nd, dev_t devno)
+static int find_autofs_mount(const char *pathname,
+			     struct path *res,
+			     int test(struct path *path, void *data),
+			     void *data)
 {
-	struct dentry *dentry;
-	struct inode *inode;
-	struct super_block *sb;
-	dev_t s_dev;
-	unsigned int err;
-
+	struct path path;
+	int err = kern_path(pathname, 0, &path);
+	if (err)
+		return err;
 	err = -ENOENT;
-
-	/* Lookup the dentry name at the base of our mount point */
-	dentry = d_lookup(nd->path.dentry, &nd->last);
-	if (!dentry)
-		goto out;
-
-	dput(nd->path.dentry);
-	nd->path.dentry = dentry;
-
-	/* And follow the mount stack looking for our autofs mount */
-	while (follow_down(&nd->path.mnt, &nd->path.dentry)) {
-		inode = nd->path.dentry->d_inode;
-		if (!inode)
-			break;
-
-		sb = inode->i_sb;
-		s_dev = new_encode_dev(sb->s_dev);
-		if (devno == s_dev) {
-			if (sb->s_magic == AUTOFS_SUPER_MAGIC) {
+	while (path.dentry == path.mnt->mnt_root) {
+		if (path.mnt->mnt_sb->s_magic == AUTOFS_SUPER_MAGIC) {
+			if (test(&path, data)) {
+				path_get(&path);
+				if (!err) /* already found some */
+					path_put(res);
+				*res = path;
 				err = 0;
-				break;
 			}
 		}
+		if (!follow_up(&path))
+			break;
 	}
-out:
+	path_put(&path);
 	return err;
 }
 
-/*
- * Walk down the mount stack looking for an autofs mount that
- * has the requested mount type (ie. indirect, direct or offset).
- */
-static int autofs_dev_ioctl_find_sbi_type(struct nameidata *nd, unsigned int type)
+static int test_by_dev(struct path *path, void *p)
 {
-	struct dentry *dentry;
-	struct autofs_info *ino;
-	unsigned int err;
+	return path->mnt->mnt_sb->s_dev == *(dev_t *)p;
+}
 
-	err = -ENOENT;
-
-	/* Lookup the dentry name at the base of our mount point */
-	dentry = d_lookup(nd->path.dentry, &nd->last);
-	if (!dentry)
-		goto out;
-
-	dput(nd->path.dentry);
-	nd->path.dentry = dentry;
-
-	/* And follow the mount stack looking for our autofs mount */
-	while (follow_down(&nd->path.mnt, &nd->path.dentry)) {
-		ino = autofs4_dentry_ino(nd->path.dentry);
-		if (ino && ino->sbi->type & type) {
-			err = 0;
-			break;
-		}
-	}
-out:
-	return err;
+static int test_by_type(struct path *path, void *p)
+{
+	struct autofs_info *ino = autofs4_dentry_ino(path->dentry);
+	return ino && ino->sbi->type & *(unsigned *)p;
 }
 
 static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
@@ -283,31 +248,25 @@
  * Open a file descriptor on the autofs mount point corresponding
  * to the given path and device number (aka. new_encode_dev(sb->s_dev)).
  */
-static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid)
+static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
 {
-	struct file *filp;
-	struct nameidata nd;
 	int err, fd;
 
 	fd = get_unused_fd();
 	if (likely(fd >= 0)) {
-		/* Get nameidata of the parent directory */
-		err = path_lookup(path, LOOKUP_PARENT, &nd);
+		struct file *filp;
+		struct path path;
+
+		err = find_autofs_mount(name, &path, test_by_dev, &devid);
 		if (err)
 			goto out;
 
 		/*
-		 * Search down, within the parent, looking for an
-		 * autofs super block that has the device number
+		 * Find autofs super block that has the device number
 		 * corresponding to the autofs fs we want to open.
 		 */
-		err = autofs_dev_ioctl_find_super(&nd, devid);
-		if (err) {
-			path_put(&nd.path);
-			goto out;
-		}
 
-		filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
+		filp = dentry_open(path.dentry, path.mnt, O_RDONLY,
 				   current_cred());
 		if (IS_ERR(filp)) {
 			err = PTR_ERR(filp);
@@ -340,7 +299,7 @@
 	param->ioctlfd = -1;
 
 	path = param->path;
-	devid = param->openmount.devid;
+	devid = new_decode_dev(param->openmount.devid);
 
 	err = 0;
 	fd = autofs_dev_ioctl_open_mountpoint(path, devid);
@@ -475,8 +434,7 @@
 				      struct autofs_dev_ioctl *param)
 {
 	struct autofs_info *ino;
-	struct nameidata nd;
-	const char *path;
+	struct path path;
 	dev_t devid;
 	int err = -ENOENT;
 
@@ -485,32 +443,24 @@
 		goto out;
 	}
 
-	path = param->path;
-	devid = new_encode_dev(sbi->sb->s_dev);
+	devid = sbi->sb->s_dev;
 
 	param->requester.uid = param->requester.gid = -1;
 
-	/* Get nameidata of the parent directory */
-	err = path_lookup(path, LOOKUP_PARENT, &nd);
+	err = find_autofs_mount(param->path, &path, test_by_dev, &devid);
 	if (err)
 		goto out;
 
-	err = autofs_dev_ioctl_find_super(&nd, devid);
-	if (err)
-		goto out_release;
-
-	ino = autofs4_dentry_ino(nd.path.dentry);
+	ino = autofs4_dentry_ino(path.dentry);
 	if (ino) {
 		err = 0;
-		autofs4_expire_wait(nd.path.dentry);
+		autofs4_expire_wait(path.dentry);
 		spin_lock(&sbi->fs_lock);
 		param->requester.uid = ino->uid;
 		param->requester.gid = ino->gid;
 		spin_unlock(&sbi->fs_lock);
 	}
-
-out_release:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return err;
 }
@@ -569,8 +519,8 @@
 					 struct autofs_sb_info *sbi,
 					 struct autofs_dev_ioctl *param)
 {
-	struct nameidata nd;
-	const char *path;
+	struct path path;
+	const char *name;
 	unsigned int type;
 	unsigned int devid, magic;
 	int err = -ENOENT;
@@ -580,71 +530,46 @@
 		goto out;
 	}
 
-	path = param->path;
+	name = param->path;
 	type = param->ismountpoint.in.type;
 
 	param->ismountpoint.out.devid = devid = 0;
 	param->ismountpoint.out.magic = magic = 0;
 
 	if (!fp || param->ioctlfd == -1) {
-		if (autofs_type_any(type)) {
-			struct super_block *sb;
-
-			err = path_lookup(path, LOOKUP_FOLLOW, &nd);
-			if (err)
-				goto out;
-
-			sb = nd.path.dentry->d_sb;
-			devid = new_encode_dev(sb->s_dev);
-		} else {
-			struct autofs_info *ino;
-
-			err = path_lookup(path, LOOKUP_PARENT, &nd);
-			if (err)
-				goto out;
-
-			err = autofs_dev_ioctl_find_sbi_type(&nd, type);
-			if (err)
-				goto out_release;
-
-			ino = autofs4_dentry_ino(nd.path.dentry);
-			devid = autofs4_get_dev(ino->sbi);
-		}
-
+		if (autofs_type_any(type))
+			err = kern_path(name, LOOKUP_FOLLOW, &path);
+		else
+			err = find_autofs_mount(name, &path, test_by_type, &type);
+		if (err)
+			goto out;
+		devid = new_encode_dev(path.mnt->mnt_sb->s_dev);
 		err = 0;
-		if (nd.path.dentry->d_inode &&
-		    nd.path.mnt->mnt_root == nd.path.dentry) {
+		if (path.dentry->d_inode &&
+		    path.mnt->mnt_root == path.dentry) {
 			err = 1;
-			magic = nd.path.dentry->d_inode->i_sb->s_magic;
+			magic = path.dentry->d_inode->i_sb->s_magic;
 		}
 	} else {
-		dev_t dev = autofs4_get_dev(sbi);
+		dev_t dev = sbi->sb->s_dev;
 
-		err = path_lookup(path, LOOKUP_PARENT, &nd);
+		err = find_autofs_mount(name, &path, test_by_dev, &dev);
 		if (err)
 			goto out;
 
-		err = autofs_dev_ioctl_find_super(&nd, dev);
-		if (err)
-			goto out_release;
+		devid = new_encode_dev(dev);
 
-		devid = dev;
+		err = have_submounts(path.dentry);
 
-		err = have_submounts(nd.path.dentry);
-
-		if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) {
-			if (follow_down(&nd.path.mnt, &nd.path.dentry)) {
-				struct inode *inode = nd.path.dentry->d_inode;
-				magic = inode->i_sb->s_magic;
-			}
+		if (path.mnt->mnt_mountpoint != path.mnt->mnt_root) {
+			if (follow_down(&path))
+				magic = path.mnt->mnt_sb->s_magic;
 		}
 	}
 
 	param->ismountpoint.out.devid = devid;
 	param->ismountpoint.out.magic = magic;
-
-out_release:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return err;
 }
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 3077d8f..aa39ae8 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -48,19 +48,19 @@
 static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 {
 	struct dentry *top = dentry;
+	struct path path = {.mnt = mnt, .dentry = dentry};
 	int status = 1;
 
 	DPRINTK("dentry %p %.*s",
 		dentry, (int)dentry->d_name.len, dentry->d_name.name);
 
-	mntget(mnt);
-	dget(dentry);
+	path_get(&path);
 
-	if (!follow_down(&mnt, &dentry))
+	if (!follow_down(&path))
 		goto done;
 
-	if (is_autofs4_dentry(dentry)) {
-		struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	if (is_autofs4_dentry(path.dentry)) {
+		struct autofs_sb_info *sbi = autofs4_sbi(path.dentry->d_sb);
 
 		/* This is an autofs submount, we can't expire it */
 		if (autofs_type_indirect(sbi->type))
@@ -70,7 +70,7 @@
 		 * Otherwise it's an offset mount and we need to check
 		 * if we can umount its mount, if there is one.
 		 */
-		if (!d_mountpoint(dentry)) {
+		if (!d_mountpoint(path.dentry)) {
 			status = 0;
 			goto done;
 		}
@@ -86,8 +86,7 @@
 	status = 0;
 done:
 	DPRINTK("returning = %d", status);
-	dput(dentry);
-	mntput(mnt);
+	path_put(&path);
 	return status;
 }
 
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index e383bf0..b96a3c5 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -181,7 +181,7 @@
 		nd->flags);
 	/*
 	 * For an expire of a covered direct or offset mount we need
-	 * to beeak out of follow_down() at the autofs mount trigger
+	 * to break out of follow_down() at the autofs mount trigger
 	 * (d_mounted--), so we can see the expiring flag, and manage
 	 * the blocking and following here until the expire is completed.
 	 */
@@ -190,7 +190,7 @@
 		if (ino->flags & AUTOFS_INF_EXPIRING) {
 			spin_unlock(&sbi->fs_lock);
 			/* Follow down to our covering mount. */
-			if (!follow_down(&nd->path.mnt, &nd->path.dentry))
+			if (!follow_down(&nd->path))
 				goto done;
 			goto follow;
 		}
@@ -230,8 +230,7 @@
 	 * to follow it.
 	 */
 	if (d_mountpoint(dentry)) {
-		if (!autofs4_follow_mount(&nd->path.mnt,
-					  &nd->path.dentry)) {
+		if (!autofs4_follow_mount(&nd->path)) {
 			status = -ENOENT;
 			goto out_error;
 		}
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index eeb2468..2341375 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -297,20 +297,14 @@
 	 */
 	if (notify == NFY_MOUNT) {
 		/*
-		 * If the dentry isn't hashed just go ahead and try the
-		 * mount again with a new wait (not much else we can do).
-		*/
-		if (!d_unhashed(dentry)) {
-			/*
-			 * But if the dentry is hashed, that means that we
-			 * got here through the revalidate path.  Thus, we
-			 * need to check if the dentry has been mounted
-			 * while we waited on the wq_mutex. If it has,
-			 * simply return success.
-			 */
-			if (d_mountpoint(dentry))
-				return 0;
-		}
+		 * If the dentry was successfully mounted while we slept
+		 * on the wait queue mutex we can return success. If it
+		 * isn't mounted (doesn't have submounts for the case of
+		 * a multi-mount with no mount at it's base) we can
+		 * continue on and create a new request.
+		 */
+		if (have_submounts(dentry))
+			return 0;
 	}
 
 	return 1;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 76afd0d..9367b62 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -737,6 +737,8 @@
 static void
 befs_put_super(struct super_block *sb)
 {
+	lock_kernel();
+
 	kfree(BEFS_SB(sb)->mount_opts.iocharset);
 	BEFS_SB(sb)->mount_opts.iocharset = NULL;
 
@@ -747,7 +749,8 @@
 
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
-	return;
+
+	unlock_kernel();
 }
 
 /* Allocate private field of the superblock, fill it.
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 4dd1b62..54bd07d 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -79,7 +79,7 @@
 const struct file_operations bfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= bfs_readdir,
-	.fsync		= file_fsync,
+	.fsync		= simple_fsync,
 	.llseek		= generic_file_llseek,
 };
 
@@ -205,7 +205,7 @@
 		inode->i_nlink = 1;
 	}
 	de->ino = 0;
-	mark_buffer_dirty(bh);
+	mark_buffer_dirty_inode(bh, dir);
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
 	mark_inode_dirty(dir);
 	inode->i_ctime = dir->i_ctime;
@@ -267,7 +267,7 @@
 		new_inode->i_ctime = CURRENT_TIME_SEC;
 		inode_dec_link_count(new_inode);
 	}
-	mark_buffer_dirty(old_bh);
+	mark_buffer_dirty_inode(old_bh, old_dir);
 	error = 0;
 
 end_rename:
@@ -320,7 +320,7 @@
 				for (i = 0; i < BFS_NAMELEN; i++)
 					de->name[i] =
 						(i < namelen) ? name[i] : 0;
-				mark_buffer_dirty(bh);
+				mark_buffer_dirty_inode(bh, dir);
 				brelse(bh);
 				return 0;
 			}
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index cc4062d..6f60336 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -30,6 +30,7 @@
 #define dprintf(x...)
 #endif
 
+static void bfs_write_super(struct super_block *s);
 void dump_imap(const char *prefix, struct super_block *s);
 
 struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
@@ -97,14 +98,15 @@
 	return ERR_PTR(-EIO);
 }
 
-static int bfs_write_inode(struct inode *inode, int unused)
+static int bfs_write_inode(struct inode *inode, int wait)
 {
+	struct bfs_sb_info *info = BFS_SB(inode->i_sb);
 	unsigned int ino = (u16)inode->i_ino;
         unsigned long i_sblock;
 	struct bfs_inode *di;
 	struct buffer_head *bh;
 	int block, off;
-	struct bfs_sb_info *info = BFS_SB(inode->i_sb);
+	int err = 0;
 
         dprintf("ino=%08x\n", ino);
 
@@ -145,9 +147,14 @@
 	di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
 
 	mark_buffer_dirty(bh);
+	if (wait) {
+		sync_dirty_buffer(bh);
+		if (buffer_req(bh) && !buffer_uptodate(bh))
+			err = -EIO;
+	}
 	brelse(bh);
 	mutex_unlock(&info->bfs_lock);
-	return 0;
+	return err;
 }
 
 static void bfs_delete_inode(struct inode *inode)
@@ -209,6 +216,26 @@
 	clear_inode(inode);
 }
 
+static int bfs_sync_fs(struct super_block *sb, int wait)
+{
+	struct bfs_sb_info *info = BFS_SB(sb);
+
+	mutex_lock(&info->bfs_lock);
+	mark_buffer_dirty(info->si_sbh);
+	sb->s_dirt = 0;
+	mutex_unlock(&info->bfs_lock);
+
+	return 0;
+}
+
+static void bfs_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		bfs_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
+}
+
 static void bfs_put_super(struct super_block *s)
 {
 	struct bfs_sb_info *info = BFS_SB(s);
@@ -216,11 +243,18 @@
 	if (!info)
 		return;
 
+	lock_kernel();
+
+	if (s->s_dirt)
+		bfs_write_super(s);
+
 	brelse(info->si_sbh);
 	mutex_destroy(&info->bfs_lock);
 	kfree(info->si_imap);
 	kfree(info);
 	s->s_fs_info = NULL;
+
+	unlock_kernel();
 }
 
 static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -240,17 +274,6 @@
 	return 0;
 }
 
-static void bfs_write_super(struct super_block *s)
-{
-	struct bfs_sb_info *info = BFS_SB(s);
-
-	mutex_lock(&info->bfs_lock);
-	if (!(s->s_flags & MS_RDONLY))
-		mark_buffer_dirty(info->si_sbh);
-	s->s_dirt = 0;
-	mutex_unlock(&info->bfs_lock);
-}
-
 static struct kmem_cache *bfs_inode_cachep;
 
 static struct inode *bfs_alloc_inode(struct super_block *sb)
@@ -298,6 +321,7 @@
 	.delete_inode	= bfs_delete_inode,
 	.put_super	= bfs_put_super,
 	.write_super	= bfs_write_super,
+	.sync_fs	= bfs_sync_fs,
 	.statfs		= bfs_statfs,
 };
 
diff --git a/fs/bio.c b/fs/bio.c
index 9871164..5900021 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -26,10 +26,9 @@
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
 #include <scsi/sg.h>		/* for struct sg_iovec */
 
-DEFINE_TRACE(block_split);
+#include <trace/events/block.h>
 
 /*
  * Test patch to inline a certain number of bi_io_vec's inside the bio
@@ -499,11 +498,11 @@
 	struct request_queue *q = bdev_get_queue(bdev);
 	int nr_pages;
 
-	nr_pages = ((q->max_sectors << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	if (nr_pages > q->max_phys_segments)
-		nr_pages = q->max_phys_segments;
-	if (nr_pages > q->max_hw_segments)
-		nr_pages = q->max_hw_segments;
+	nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	if (nr_pages > queue_max_phys_segments(q))
+		nr_pages = queue_max_phys_segments(q);
+	if (nr_pages > queue_max_hw_segments(q))
+		nr_pages = queue_max_hw_segments(q);
 
 	return nr_pages;
 }
@@ -562,8 +561,8 @@
 	 * make this too complex.
 	 */
 
-	while (bio->bi_phys_segments >= q->max_phys_segments
-	       || bio->bi_phys_segments >= q->max_hw_segments) {
+	while (bio->bi_phys_segments >= queue_max_phys_segments(q)
+	       || bio->bi_phys_segments >= queue_max_hw_segments(q)) {
 
 		if (retried_segments)
 			return 0;
@@ -634,7 +633,8 @@
 int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
 		    unsigned int len, unsigned int offset)
 {
-	return __bio_add_page(q, bio, page, len, offset, q->max_hw_sectors);
+	return __bio_add_page(q, bio, page, len, offset,
+			      queue_max_hw_sectors(q));
 }
 
 /**
@@ -654,7 +654,7 @@
 		 unsigned int offset)
 {
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-	return __bio_add_page(q, bio, page, len, offset, q->max_sectors);
+	return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
 }
 
 struct bio_map_data {
@@ -721,7 +721,7 @@
 
 		while (bv_len && iov_idx < iov_count) {
 			unsigned int bytes;
-			char *iov_addr;
+			char __user *iov_addr;
 
 			bytes = min_t(unsigned int,
 				      iov[iov_idx].iov_len - iov_off, bv_len);
@@ -1201,7 +1201,7 @@
 		char *addr = page_address(bvec->bv_page);
 		int len = bmd->iovecs[i].bv_len;
 
-		if (read && !err)
+		if (read)
 			memcpy(p, addr, len);
 
 		__free_page(bvec->bv_page);
@@ -1490,11 +1490,12 @@
 sector_t bio_sector_offset(struct bio *bio, unsigned short index,
 			   unsigned int offset)
 {
-	unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue);
+	unsigned int sector_sz;
 	struct bio_vec *bv;
 	sector_t sectors;
 	int i;
 
+	sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue);
 	sectors = 0;
 
 	if (index >= bio->bi_idx)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index f45dbc1..3a6d4fb 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -25,6 +25,7 @@
 #include <linux/uio.h>
 #include <linux/namei.h>
 #include <linux/log2.h>
+#include <linux/kmemleak.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -76,7 +77,7 @@
 		return -EINVAL;
 
 	/* Size cannot be smaller than the size supported by the device */
-	if (size < bdev_hardsect_size(bdev))
+	if (size < bdev_logical_block_size(bdev))
 		return -EINVAL;
 
 	/* Don't change the size if it is same as current */
@@ -106,7 +107,7 @@
 
 int sb_min_blocksize(struct super_block *sb, int size)
 {
-	int minsize = bdev_hardsect_size(sb->s_bdev);
+	int minsize = bdev_logical_block_size(sb->s_bdev);
 	if (size < minsize)
 		size = minsize;
 	return sb_set_blocksize(sb, size);
@@ -175,17 +176,22 @@
 				iov, offset, nr_segs, blkdev_get_blocks, NULL);
 }
 
+int __sync_blockdev(struct block_device *bdev, int wait)
+{
+	if (!bdev)
+		return 0;
+	if (!wait)
+		return filemap_flush(bdev->bd_inode->i_mapping);
+	return filemap_write_and_wait(bdev->bd_inode->i_mapping);
+}
+
 /*
  * Write out and wait upon all the dirty data associated with a block
  * device via its mapping.  Does not take the superblock lock.
  */
 int sync_blockdev(struct block_device *bdev)
 {
-	int ret = 0;
-
-	if (bdev)
-		ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
-	return ret;
+	return __sync_blockdev(bdev, 1);
 }
 EXPORT_SYMBOL(sync_blockdev);
 
@@ -198,7 +204,7 @@
 {
 	struct super_block *sb = get_super(bdev);
 	if (sb) {
-		int res = fsync_super(sb);
+		int res = sync_filesystem(sb);
 		drop_super(sb);
 		return res;
 	}
@@ -240,7 +246,7 @@
 		sb->s_frozen = SB_FREEZE_WRITE;
 		smp_wmb();
 
-		__fsync_super(sb);
+		sync_filesystem(sb);
 
 		sb->s_frozen = SB_FREEZE_TRANS;
 		smp_wmb();
@@ -492,6 +498,11 @@
 	bd_mnt = kern_mount(&bd_type);
 	if (IS_ERR(bd_mnt))
 		panic("Cannot create bdev pseudo-fs");
+	/*
+	 * This vfsmount structure is only used to obtain the
+	 * blockdev_superblock, so tell kmemleak not to report it.
+	 */
+	kmemleak_not_leak(bd_mnt);
 	blockdev_superblock = bd_mnt->mnt_sb;	/* For writeback */
 }
 
@@ -1111,7 +1122,7 @@
 
 void bd_set_size(struct block_device *bdev, loff_t size)
 {
-	unsigned bsize = bdev_hardsect_size(bdev);
+	unsigned bsize = bdev_logical_block_size(bdev);
 
 	bdev->bd_inode->i_size = size;
 	while (bsize < PAGE_CACHE_SIZE) {
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9421284..a35eb36 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,5 +6,5 @@
 	   transaction.o inode.o file.o tree-defrag.o \
 	   extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
 	   extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
-	   ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \
-	   compression.o delayed-ref.o
+	   export.o tree-log.o acl.o free-space-cache.o zlib.o \
+	   compression.o delayed-ref.o relocation.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index cbba000..6039725 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -351,9 +351,4 @@
 	return 0;
 }
 
-int btrfs_check_acl(struct inode *inode, int mask)
-{
-	return 0;
-}
-
 #endif /* CONFIG_FS_POSIX_ACL */
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 502c3d6..7f88628 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -294,10 +294,10 @@
 		INIT_LIST_HEAD(&worker->worker_list);
 		spin_lock_init(&worker->lock);
 		atomic_set(&worker->num_pending, 0);
+		worker->workers = workers;
 		worker->task = kthread_run(worker_loop, worker,
 					   "btrfs-%s-%d", workers->name,
 					   workers->num_workers + i);
-		worker->workers = workers;
 		if (IS_ERR(worker->task)) {
 			kfree(worker);
 			ret = PTR_ERR(worker->task);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b30986f..acb4f35 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -72,6 +72,9 @@
 	 */
 	struct list_head ordered_operations;
 
+	/* node for the red-black tree that links inodes in subvolume root */
+	struct rb_node rb_node;
+
 	/* the space_info for where this inode's data allocations are done */
 	struct btrfs_space_info *space_info;
 
@@ -154,5 +157,4 @@
 	BTRFS_I(inode)->disk_i_size = size;
 }
 
-
 #endif
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index ab07627..de1e2fd 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -123,7 +123,7 @@
 	u32 csum;
 	u32 *cb_sum = &cb->sums;
 
-	if (btrfs_test_flag(inode, NODATASUM))
+	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
 		return 0;
 
 	for (i = 0; i < cb->nr_pages; i++) {
@@ -670,7 +670,7 @@
 			 */
 			atomic_inc(&cb->pending_bios);
 
-			if (!btrfs_test_flag(inode, NODATASUM)) {
+			if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
 				btrfs_lookup_bio_sums(root, inode, comp_bio,
 						      sums);
 			}
@@ -697,7 +697,7 @@
 	ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
 	BUG_ON(ret);
 
-	if (!btrfs_test_flag(inode, NODATASUM))
+	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
 		btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
 
 	ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h
deleted file mode 100644
index 6e1b3de..0000000
--- a/fs/btrfs/crc32c.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2008 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __BTRFS_CRC32C__
-#define __BTRFS_CRC32C__
-#include <linux/crc32c.h>
-
-/*
- * this file used to do more for selecting the HW version of crc32c,
- * perhaps it will one day again soon.
- */
-#define btrfs_crc32c(seed, data, length) crc32c(seed, data, length)
-#endif
-
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index fedf8b9..60a45f3 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -197,14 +197,7 @@
 	u32 nritems;
 	int ret = 0;
 	int level;
-	struct btrfs_root *new_root;
-
-	new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
-	if (!new_root)
-		return -ENOMEM;
-
-	memcpy(new_root, root, sizeof(*new_root));
-	new_root->root_key.objectid = new_root_objectid;
+	struct btrfs_disk_key disk_key;
 
 	WARN_ON(root->ref_cows && trans->transid !=
 		root->fs_info->running_transaction->transid);
@@ -212,28 +205,37 @@
 
 	level = btrfs_header_level(buf);
 	nritems = btrfs_header_nritems(buf);
+	if (level == 0)
+		btrfs_item_key(buf, &disk_key, 0);
+	else
+		btrfs_node_key(buf, &disk_key, 0);
 
-	cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0,
-				     new_root_objectid, trans->transid,
-				     level, buf->start, 0);
-	if (IS_ERR(cow)) {
-		kfree(new_root);
+	cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
+				     new_root_objectid, &disk_key, level,
+				     buf->start, 0);
+	if (IS_ERR(cow))
 		return PTR_ERR(cow);
-	}
 
 	copy_extent_buffer(cow, buf, 0, 0, cow->len);
 	btrfs_set_header_bytenr(cow, cow->start);
 	btrfs_set_header_generation(cow, trans->transid);
-	btrfs_set_header_owner(cow, new_root_objectid);
-	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
+	btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
+	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
+				     BTRFS_HEADER_FLAG_RELOC);
+	if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
+		btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
+	else
+		btrfs_set_header_owner(cow, new_root_objectid);
 
 	write_extent_buffer(cow, root->fs_info->fsid,
 			    (unsigned long)btrfs_header_fsid(cow),
 			    BTRFS_FSID_SIZE);
 
 	WARN_ON(btrfs_header_generation(buf) > trans->transid);
-	ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
-	kfree(new_root);
+	if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
+		ret = btrfs_inc_ref(trans, root, cow, 1);
+	else
+		ret = btrfs_inc_ref(trans, root, cow, 0);
 
 	if (ret)
 		return ret;
@@ -244,6 +246,125 @@
 }
 
 /*
+ * check if the tree block can be shared by multiple trees
+ */
+int btrfs_block_can_be_shared(struct btrfs_root *root,
+			      struct extent_buffer *buf)
+{
+	/*
+	 * Tree blocks not in refernece counted trees and tree roots
+	 * are never shared. If a block was allocated after the last
+	 * snapshot and the block was not allocated by tree relocation,
+	 * we know the block is not shared.
+	 */
+	if (root->ref_cows &&
+	    buf != root->node && buf != root->commit_root &&
+	    (btrfs_header_generation(buf) <=
+	     btrfs_root_last_snapshot(&root->root_item) ||
+	     btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
+		return 1;
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	if (root->ref_cows &&
+	    btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
+		return 1;
+#endif
+	return 0;
+}
+
+static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
+				       struct btrfs_root *root,
+				       struct extent_buffer *buf,
+				       struct extent_buffer *cow)
+{
+	u64 refs;
+	u64 owner;
+	u64 flags;
+	u64 new_flags = 0;
+	int ret;
+
+	/*
+	 * Backrefs update rules:
+	 *
+	 * Always use full backrefs for extent pointers in tree block
+	 * allocated by tree relocation.
+	 *
+	 * If a shared tree block is no longer referenced by its owner
+	 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
+	 * use full backrefs for extent pointers in tree block.
+	 *
+	 * If a tree block is been relocating
+	 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
+	 * use full backrefs for extent pointers in tree block.
+	 * The reason for this is some operations (such as drop tree)
+	 * are only allowed for blocks use full backrefs.
+	 */
+
+	if (btrfs_block_can_be_shared(root, buf)) {
+		ret = btrfs_lookup_extent_info(trans, root, buf->start,
+					       buf->len, &refs, &flags);
+		BUG_ON(ret);
+		BUG_ON(refs == 0);
+	} else {
+		refs = 1;
+		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
+		    btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
+			flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
+		else
+			flags = 0;
+	}
+
+	owner = btrfs_header_owner(buf);
+	BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
+	       !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+
+	if (refs > 1) {
+		if ((owner == root->root_key.objectid ||
+		     root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
+		    !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
+			ret = btrfs_inc_ref(trans, root, buf, 1);
+			BUG_ON(ret);
+
+			if (root->root_key.objectid ==
+			    BTRFS_TREE_RELOC_OBJECTID) {
+				ret = btrfs_dec_ref(trans, root, buf, 0);
+				BUG_ON(ret);
+				ret = btrfs_inc_ref(trans, root, cow, 1);
+				BUG_ON(ret);
+			}
+			new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+		} else {
+
+			if (root->root_key.objectid ==
+			    BTRFS_TREE_RELOC_OBJECTID)
+				ret = btrfs_inc_ref(trans, root, cow, 1);
+			else
+				ret = btrfs_inc_ref(trans, root, cow, 0);
+			BUG_ON(ret);
+		}
+		if (new_flags != 0) {
+			ret = btrfs_set_disk_extent_flags(trans, root,
+							  buf->start,
+							  buf->len,
+							  new_flags, 0);
+			BUG_ON(ret);
+		}
+	} else {
+		if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
+			if (root->root_key.objectid ==
+			    BTRFS_TREE_RELOC_OBJECTID)
+				ret = btrfs_inc_ref(trans, root, cow, 1);
+			else
+				ret = btrfs_inc_ref(trans, root, cow, 0);
+			BUG_ON(ret);
+			ret = btrfs_dec_ref(trans, root, buf, 1);
+			BUG_ON(ret);
+		}
+		clean_tree_block(trans, root, buf);
+	}
+	return 0;
+}
+
+/*
  * does the dirty work in cow of a single block.  The parent block (if
  * supplied) is updated to point to the new cow copy.  The new buffer is marked
  * dirty and returned locked.  If you modify the block it needs to be marked
@@ -262,34 +383,39 @@
 			     struct extent_buffer **cow_ret,
 			     u64 search_start, u64 empty_size)
 {
-	u64 parent_start;
+	struct btrfs_disk_key disk_key;
 	struct extent_buffer *cow;
-	u32 nritems;
-	int ret = 0;
 	int level;
 	int unlock_orig = 0;
+	u64 parent_start;
 
 	if (*cow_ret == buf)
 		unlock_orig = 1;
 
 	btrfs_assert_tree_locked(buf);
 
-	if (parent)
-		parent_start = parent->start;
-	else
-		parent_start = 0;
-
 	WARN_ON(root->ref_cows && trans->transid !=
 		root->fs_info->running_transaction->transid);
 	WARN_ON(root->ref_cows && trans->transid != root->last_trans);
 
 	level = btrfs_header_level(buf);
-	nritems = btrfs_header_nritems(buf);
 
-	cow = btrfs_alloc_free_block(trans, root, buf->len,
-				     parent_start, root->root_key.objectid,
-				     trans->transid, level,
-				     search_start, empty_size);
+	if (level == 0)
+		btrfs_item_key(buf, &disk_key, 0);
+	else
+		btrfs_node_key(buf, &disk_key, 0);
+
+	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
+		if (parent)
+			parent_start = parent->start;
+		else
+			parent_start = 0;
+	} else
+		parent_start = 0;
+
+	cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
+				     root->root_key.objectid, &disk_key,
+				     level, search_start, empty_size);
 	if (IS_ERR(cow))
 		return PTR_ERR(cow);
 
@@ -298,83 +424,53 @@
 	copy_extent_buffer(cow, buf, 0, 0, cow->len);
 	btrfs_set_header_bytenr(cow, cow->start);
 	btrfs_set_header_generation(cow, trans->transid);
-	btrfs_set_header_owner(cow, root->root_key.objectid);
-	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
+	btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
+	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
+				     BTRFS_HEADER_FLAG_RELOC);
+	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+		btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
+	else
+		btrfs_set_header_owner(cow, root->root_key.objectid);
 
 	write_extent_buffer(cow, root->fs_info->fsid,
 			    (unsigned long)btrfs_header_fsid(cow),
 			    BTRFS_FSID_SIZE);
 
-	WARN_ON(btrfs_header_generation(buf) > trans->transid);
-	if (btrfs_header_generation(buf) != trans->transid) {
-		u32 nr_extents;
-		ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
-		if (ret)
-			return ret;
-
-		ret = btrfs_cache_ref(trans, root, buf, nr_extents);
-		WARN_ON(ret);
-	} else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
-		/*
-		 * There are only two places that can drop reference to
-		 * tree blocks owned by living reloc trees, one is here,
-		 * the other place is btrfs_drop_subtree. In both places,
-		 * we check reference count while tree block is locked.
-		 * Furthermore, if reference count is one, it won't get
-		 * increased by someone else.
-		 */
-		u32 refs;
-		ret = btrfs_lookup_extent_ref(trans, root, buf->start,
-					      buf->len, &refs);
-		BUG_ON(ret);
-		if (refs == 1) {
-			ret = btrfs_update_ref(trans, root, buf, cow,
-					       0, nritems);
-			clean_tree_block(trans, root, buf);
-		} else {
-			ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
-		}
-		BUG_ON(ret);
-	} else {
-		ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
-		if (ret)
-			return ret;
-		clean_tree_block(trans, root, buf);
-	}
-
-	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
-		ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
-		WARN_ON(ret);
-	}
+	update_ref_for_cow(trans, root, buf, cow);
 
 	if (buf == root->node) {
 		WARN_ON(parent && parent != buf);
+		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
+		    btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
+			parent_start = buf->start;
+		else
+			parent_start = 0;
 
 		spin_lock(&root->node_lock);
 		root->node = cow;
 		extent_buffer_get(cow);
 		spin_unlock(&root->node_lock);
 
-		if (buf != root->commit_root) {
-			btrfs_free_extent(trans, root, buf->start,
-					  buf->len, buf->start,
-					  root->root_key.objectid,
-					  btrfs_header_generation(buf),
-					  level, 1);
-		}
+		btrfs_free_extent(trans, root, buf->start, buf->len,
+				  parent_start, root->root_key.objectid,
+				  level, 0);
 		free_extent_buffer(buf);
 		add_root_to_dirty_list(root);
 	} else {
+		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+			parent_start = parent->start;
+		else
+			parent_start = 0;
+
+		WARN_ON(trans->transid != btrfs_header_generation(parent));
 		btrfs_set_node_blockptr(parent, parent_slot,
 					cow->start);
-		WARN_ON(trans->transid == 0);
 		btrfs_set_node_ptr_generation(parent, parent_slot,
 					      trans->transid);
 		btrfs_mark_buffer_dirty(parent);
-		WARN_ON(btrfs_header_generation(parent) != trans->transid);
 		btrfs_free_extent(trans, root, buf->start, buf->len,
-				  parent_start, btrfs_header_owner(parent),
-				  btrfs_header_generation(parent), level, 1);
+				  parent_start, root->root_key.objectid,
+				  level, 0);
 	}
 	if (unlock_orig)
 		btrfs_tree_unlock(buf);
@@ -384,6 +480,18 @@
 	return 0;
 }
 
+static inline int should_cow_block(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   struct extent_buffer *buf)
+{
+	if (btrfs_header_generation(buf) == trans->transid &&
+	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
+	    !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
+	      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
+		return 0;
+	return 1;
+}
+
 /*
  * cows a single block, see __btrfs_cow_block for the real work.
  * This version of it has extra checks so that a block isn't cow'd more than
@@ -411,9 +519,7 @@
 		WARN_ON(1);
 	}
 
-	if (btrfs_header_generation(buf) == trans->transid &&
-	    btrfs_header_owner(buf) == root->root_key.objectid &&
-	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
+	if (!should_cow_block(trans, root, buf)) {
 		*cow_ret = buf;
 		return 0;
 	}
@@ -469,7 +575,7 @@
 /*
  * same as comp_keys only with two btrfs_key's
  */
-static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
+int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
 {
 	if (k1->objectid > k2->objectid)
 		return 1;
@@ -845,6 +951,12 @@
 	return -1;
 }
 
+int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+		     int level, int *slot)
+{
+	return bin_search(eb, key, level, slot);
+}
+
 /* given a node and slot number, this reads the blocks it points to.  The
  * extent buffer is returned with a reference taken (but unlocked).
  * NULL is returned on error.
@@ -921,13 +1033,6 @@
 		root->node = child;
 		spin_unlock(&root->node_lock);
 
-		ret = btrfs_update_extent_ref(trans, root, child->start,
-					      child->len,
-					      mid->start, child->start,
-					      root->root_key.objectid,
-					      trans->transid, level - 1);
-		BUG_ON(ret);
-
 		add_root_to_dirty_list(root);
 		btrfs_tree_unlock(child);
 
@@ -938,9 +1043,7 @@
 		/* once for the path */
 		free_extent_buffer(mid);
 		ret = btrfs_free_extent(trans, root, mid->start, mid->len,
-					mid->start, root->root_key.objectid,
-					btrfs_header_generation(mid),
-					level, 1);
+					0, root->root_key.objectid, level, 1);
 		/* once for the root ptr */
 		free_extent_buffer(mid);
 		return ret;
@@ -949,8 +1052,7 @@
 	    BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
 		return 0;
 
-	if (trans->transaction->delayed_refs.flushing &&
-	    btrfs_header_nritems(mid) > 2)
+	if (btrfs_header_nritems(mid) > 2)
 		return 0;
 
 	if (btrfs_header_nritems(mid) < 2)
@@ -998,7 +1100,6 @@
 			ret = wret;
 		if (btrfs_header_nritems(right) == 0) {
 			u64 bytenr = right->start;
-			u64 generation = btrfs_header_generation(parent);
 			u32 blocksize = right->len;
 
 			clean_tree_block(trans, root, right);
@@ -1010,9 +1111,9 @@
 			if (wret)
 				ret = wret;
 			wret = btrfs_free_extent(trans, root, bytenr,
-						 blocksize, parent->start,
-						 btrfs_header_owner(parent),
-						 generation, level, 1);
+						 blocksize, 0,
+						 root->root_key.objectid,
+						 level, 0);
 			if (wret)
 				ret = wret;
 		} else {
@@ -1047,7 +1148,6 @@
 	}
 	if (btrfs_header_nritems(mid) == 0) {
 		/* we've managed to empty the middle node, drop it */
-		u64 root_gen = btrfs_header_generation(parent);
 		u64 bytenr = mid->start;
 		u32 blocksize = mid->len;
 
@@ -1059,9 +1159,8 @@
 		if (wret)
 			ret = wret;
 		wret = btrfs_free_extent(trans, root, bytenr, blocksize,
-					 parent->start,
-					 btrfs_header_owner(parent),
-					 root_gen, level, 1);
+					 0, root->root_key.objectid,
+					 level, 0);
 		if (wret)
 			ret = wret;
 	} else {
@@ -1437,7 +1536,7 @@
 {
 	int i;
 
-	if (path->keep_locks || path->lowest_level)
+	if (path->keep_locks)
 		return;
 
 	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -1552,7 +1651,7 @@
 		}
 		b = p->nodes[level];
 	} else if (ins_len < 0 && btrfs_header_nritems(b) <
-		   BTRFS_NODEPTRS_PER_BLOCK(root) / 4) {
+		   BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
 		int sret;
 
 		sret = reada_for_balance(root, p, level);
@@ -1614,10 +1713,17 @@
 		lowest_unlock = 2;
 
 again:
-	if (p->skip_locking)
-		b = btrfs_root_node(root);
-	else
-		b = btrfs_lock_root_node(root);
+	if (p->search_commit_root) {
+		b = root->commit_root;
+		extent_buffer_get(b);
+		if (!p->skip_locking)
+			btrfs_tree_lock(b);
+	} else {
+		if (p->skip_locking)
+			b = btrfs_root_node(root);
+		else
+			b = btrfs_lock_root_node(root);
+	}
 
 	while (b) {
 		level = btrfs_header_level(b);
@@ -1638,11 +1744,9 @@
 			 * then we don't want to set the path blocking,
 			 * so we test it here
 			 */
-			if (btrfs_header_generation(b) == trans->transid &&
-			    btrfs_header_owner(b) == root->root_key.objectid &&
-			    !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
+			if (!should_cow_block(trans, root, b))
 				goto cow_done;
-			}
+
 			btrfs_set_path_blocking(p);
 
 			wret = btrfs_cow_block(trans, root, b,
@@ -1764,138 +1868,6 @@
 	return ret;
 }
 
-int btrfs_merge_path(struct btrfs_trans_handle *trans,
-		     struct btrfs_root *root,
-		     struct btrfs_key *node_keys,
-		     u64 *nodes, int lowest_level)
-{
-	struct extent_buffer *eb;
-	struct extent_buffer *parent;
-	struct btrfs_key key;
-	u64 bytenr;
-	u64 generation;
-	u32 blocksize;
-	int level;
-	int slot;
-	int key_match;
-	int ret;
-
-	eb = btrfs_lock_root_node(root);
-	ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb);
-	BUG_ON(ret);
-
-	btrfs_set_lock_blocking(eb);
-
-	parent = eb;
-	while (1) {
-		level = btrfs_header_level(parent);
-		if (level == 0 || level <= lowest_level)
-			break;
-
-		ret = bin_search(parent, &node_keys[lowest_level], level,
-				 &slot);
-		if (ret && slot > 0)
-			slot--;
-
-		bytenr = btrfs_node_blockptr(parent, slot);
-		if (nodes[level - 1] == bytenr)
-			break;
-
-		blocksize = btrfs_level_size(root, level - 1);
-		generation = btrfs_node_ptr_generation(parent, slot);
-		btrfs_node_key_to_cpu(eb, &key, slot);
-		key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
-
-		if (generation == trans->transid) {
-			eb = read_tree_block(root, bytenr, blocksize,
-					     generation);
-			btrfs_tree_lock(eb);
-			btrfs_set_lock_blocking(eb);
-		}
-
-		/*
-		 * if node keys match and node pointer hasn't been modified
-		 * in the running transaction, we can merge the path. for
-		 * blocks owened by reloc trees, the node pointer check is
-		 * skipped, this is because these blocks are fully controlled
-		 * by the space balance code, no one else can modify them.
-		 */
-		if (!nodes[level - 1] || !key_match ||
-		    (generation == trans->transid &&
-		     btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
-			if (level == 1 || level == lowest_level + 1) {
-				if (generation == trans->transid) {
-					btrfs_tree_unlock(eb);
-					free_extent_buffer(eb);
-				}
-				break;
-			}
-
-			if (generation != trans->transid) {
-				eb = read_tree_block(root, bytenr, blocksize,
-						generation);
-				btrfs_tree_lock(eb);
-				btrfs_set_lock_blocking(eb);
-			}
-
-			ret = btrfs_cow_block(trans, root, eb, parent, slot,
-					      &eb);
-			BUG_ON(ret);
-
-			if (root->root_key.objectid ==
-			    BTRFS_TREE_RELOC_OBJECTID) {
-				if (!nodes[level - 1]) {
-					nodes[level - 1] = eb->start;
-					memcpy(&node_keys[level - 1], &key,
-					       sizeof(node_keys[0]));
-				} else {
-					WARN_ON(1);
-				}
-			}
-
-			btrfs_tree_unlock(parent);
-			free_extent_buffer(parent);
-			parent = eb;
-			continue;
-		}
-
-		btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
-		btrfs_set_node_ptr_generation(parent, slot, trans->transid);
-		btrfs_mark_buffer_dirty(parent);
-
-		ret = btrfs_inc_extent_ref(trans, root,
-					nodes[level - 1],
-					blocksize, parent->start,
-					btrfs_header_owner(parent),
-					btrfs_header_generation(parent),
-					level - 1);
-		BUG_ON(ret);
-
-		/*
-		 * If the block was created in the running transaction,
-		 * it's possible this is the last reference to it, so we
-		 * should drop the subtree.
-		 */
-		if (generation == trans->transid) {
-			ret = btrfs_drop_subtree(trans, root, eb, parent);
-			BUG_ON(ret);
-			btrfs_tree_unlock(eb);
-			free_extent_buffer(eb);
-		} else {
-			ret = btrfs_free_extent(trans, root, bytenr,
-					blocksize, parent->start,
-					btrfs_header_owner(parent),
-					btrfs_header_generation(parent),
-					level - 1, 1);
-			BUG_ON(ret);
-		}
-		break;
-	}
-	btrfs_tree_unlock(parent);
-	free_extent_buffer(parent);
-	return 0;
-}
-
 /*
  * adjust the pointers going up the tree, starting at level
  * making sure the right key of each node is points to 'key'.
@@ -2021,9 +1993,6 @@
 	btrfs_mark_buffer_dirty(src);
 	btrfs_mark_buffer_dirty(dst);
 
-	ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
-	BUG_ON(ret);
-
 	return ret;
 }
 
@@ -2083,9 +2052,6 @@
 	btrfs_mark_buffer_dirty(src);
 	btrfs_mark_buffer_dirty(dst);
 
-	ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
-	BUG_ON(ret);
-
 	return ret;
 }
 
@@ -2105,7 +2071,6 @@
 	struct extent_buffer *c;
 	struct extent_buffer *old;
 	struct btrfs_disk_key lower_key;
-	int ret;
 
 	BUG_ON(path->nodes[level]);
 	BUG_ON(path->nodes[level-1] != root->node);
@@ -2117,16 +2082,17 @@
 		btrfs_node_key(lower, &lower_key, 0);
 
 	c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
-				   root->root_key.objectid, trans->transid,
+				   root->root_key.objectid, &lower_key,
 				   level, root->node->start, 0);
 	if (IS_ERR(c))
 		return PTR_ERR(c);
 
-	memset_extent_buffer(c, 0, 0, root->nodesize);
+	memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
 	btrfs_set_header_nritems(c, 1);
 	btrfs_set_header_level(c, level);
 	btrfs_set_header_bytenr(c, c->start);
 	btrfs_set_header_generation(c, trans->transid);
+	btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
 	btrfs_set_header_owner(c, root->root_key.objectid);
 
 	write_extent_buffer(c, root->fs_info->fsid,
@@ -2151,12 +2117,6 @@
 	root->node = c;
 	spin_unlock(&root->node_lock);
 
-	ret = btrfs_update_extent_ref(trans, root, lower->start,
-				      lower->len, lower->start, c->start,
-				      root->root_key.objectid,
-				      trans->transid, level - 1);
-	BUG_ON(ret);
-
 	/* the super has an extra ref to root->node */
 	free_extent_buffer(old);
 
@@ -2233,7 +2193,7 @@
 		ret = insert_new_root(trans, root, path, level + 1);
 		if (ret)
 			return ret;
-	} else if (!trans->transaction->delayed_refs.flushing) {
+	} else {
 		ret = push_nodes_for_insert(trans, root, path, level);
 		c = path->nodes[level];
 		if (!ret && btrfs_header_nritems(c) <
@@ -2244,20 +2204,21 @@
 	}
 
 	c_nritems = btrfs_header_nritems(c);
+	mid = (c_nritems + 1) / 2;
+	btrfs_node_key(c, &disk_key, mid);
 
-	split = btrfs_alloc_free_block(trans, root, root->nodesize,
-					path->nodes[level + 1]->start,
+	split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
 					root->root_key.objectid,
-					trans->transid, level, c->start, 0);
+					&disk_key, level, c->start, 0);
 	if (IS_ERR(split))
 		return PTR_ERR(split);
 
-	btrfs_set_header_flags(split, btrfs_header_flags(c));
+	memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
 	btrfs_set_header_level(split, btrfs_header_level(c));
 	btrfs_set_header_bytenr(split, split->start);
 	btrfs_set_header_generation(split, trans->transid);
+	btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
 	btrfs_set_header_owner(split, root->root_key.objectid);
-	btrfs_set_header_flags(split, 0);
 	write_extent_buffer(split, root->fs_info->fsid,
 			    (unsigned long)btrfs_header_fsid(split),
 			    BTRFS_FSID_SIZE);
@@ -2265,7 +2226,6 @@
 			    (unsigned long)btrfs_header_chunk_tree_uuid(split),
 			    BTRFS_UUID_SIZE);
 
-	mid = (c_nritems + 1) / 2;
 
 	copy_extent_buffer(split, c,
 			   btrfs_node_key_ptr_offset(0),
@@ -2278,16 +2238,12 @@
 	btrfs_mark_buffer_dirty(c);
 	btrfs_mark_buffer_dirty(split);
 
-	btrfs_node_key(split, &disk_key, 0);
 	wret = insert_ptr(trans, root, path, &disk_key, split->start,
 			  path->slots[level + 1] + 1,
 			  level + 1);
 	if (wret)
 		ret = wret;
 
-	ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
-	BUG_ON(ret);
-
 	if (path->slots[level] >= mid) {
 		path->slots[level] -= mid;
 		btrfs_tree_unlock(c);
@@ -2360,7 +2316,6 @@
 	u32 right_nritems;
 	u32 data_end;
 	u32 this_item_size;
-	int ret;
 
 	if (empty)
 		nr = 0;
@@ -2473,9 +2428,6 @@
 		btrfs_mark_buffer_dirty(left);
 	btrfs_mark_buffer_dirty(right);
 
-	ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
-	BUG_ON(ret);
-
 	btrfs_item_key(right, &disk_key, 0);
 	btrfs_set_node_key(upper, &disk_key, slot + 1);
 	btrfs_mark_buffer_dirty(upper);
@@ -2720,10 +2672,6 @@
 	if (right_nritems)
 		btrfs_mark_buffer_dirty(right);
 
-	ret = btrfs_update_ref(trans, root, right, left,
-			       old_left_nritems, push_items);
-	BUG_ON(ret);
-
 	btrfs_item_key(right, &disk_key, 0);
 	wret = fixup_low_keys(trans, root, path, &disk_key, 1);
 	if (wret)
@@ -2880,9 +2828,6 @@
 	btrfs_mark_buffer_dirty(l);
 	BUG_ON(path->slots[0] != slot);
 
-	ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
-	BUG_ON(ret);
-
 	if (mid <= slot) {
 		btrfs_tree_unlock(path->nodes[0]);
 		free_extent_buffer(path->nodes[0]);
@@ -2911,6 +2856,7 @@
 			       struct btrfs_path *path, int data_size,
 			       int extend)
 {
+	struct btrfs_disk_key disk_key;
 	struct extent_buffer *l;
 	u32 nritems;
 	int mid;
@@ -2918,12 +2864,11 @@
 	struct extent_buffer *right;
 	int ret = 0;
 	int wret;
-	int double_split;
+	int split;
 	int num_doubles = 0;
 
 	/* first try to make some room by pushing left and right */
-	if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY &&
-	    !trans->transaction->delayed_refs.flushing) {
+	if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
 		wret = push_leaf_right(trans, root, path, data_size, 0);
 		if (wret < 0)
 			return wret;
@@ -2945,16 +2890,53 @@
 			return ret;
 	}
 again:
-	double_split = 0;
+	split = 1;
 	l = path->nodes[0];
 	slot = path->slots[0];
 	nritems = btrfs_header_nritems(l);
 	mid = (nritems + 1) / 2;
 
-	right = btrfs_alloc_free_block(trans, root, root->leafsize,
-					path->nodes[1]->start,
+	if (mid <= slot) {
+		if (nritems == 1 ||
+		    leaf_space_used(l, mid, nritems - mid) + data_size >
+			BTRFS_LEAF_DATA_SIZE(root)) {
+			if (slot >= nritems) {
+				split = 0;
+			} else {
+				mid = slot;
+				if (mid != nritems &&
+				    leaf_space_used(l, mid, nritems - mid) +
+				    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+					split = 2;
+				}
+			}
+		}
+	} else {
+		if (leaf_space_used(l, 0, mid) + data_size >
+			BTRFS_LEAF_DATA_SIZE(root)) {
+			if (!extend && data_size && slot == 0) {
+				split = 0;
+			} else if ((extend || !data_size) && slot == 0) {
+				mid = 1;
+			} else {
+				mid = slot;
+				if (mid != nritems &&
+				    leaf_space_used(l, mid, nritems - mid) +
+				    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+					split = 2 ;
+				}
+			}
+		}
+	}
+
+	if (split == 0)
+		btrfs_cpu_key_to_disk(&disk_key, ins_key);
+	else
+		btrfs_item_key(l, &disk_key, mid);
+
+	right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
 					root->root_key.objectid,
-					trans->transid, 0, l->start, 0);
+					&disk_key, 0, l->start, 0);
 	if (IS_ERR(right)) {
 		BUG_ON(1);
 		return PTR_ERR(right);
@@ -2963,6 +2945,7 @@
 	memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
 	btrfs_set_header_bytenr(right, right->start);
 	btrfs_set_header_generation(right, trans->transid);
+	btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
 	btrfs_set_header_owner(right, root->root_key.objectid);
 	btrfs_set_header_level(right, 0);
 	write_extent_buffer(right, root->fs_info->fsid,
@@ -2973,79 +2956,47 @@
 			    (unsigned long)btrfs_header_chunk_tree_uuid(right),
 			    BTRFS_UUID_SIZE);
 
-	if (mid <= slot) {
-		if (nritems == 1 ||
-		    leaf_space_used(l, mid, nritems - mid) + data_size >
-			BTRFS_LEAF_DATA_SIZE(root)) {
-			if (slot >= nritems) {
-				struct btrfs_disk_key disk_key;
+	if (split == 0) {
+		if (mid <= slot) {
+			btrfs_set_header_nritems(right, 0);
+			wret = insert_ptr(trans, root, path,
+					  &disk_key, right->start,
+					  path->slots[1] + 1, 1);
+			if (wret)
+				ret = wret;
 
-				btrfs_cpu_key_to_disk(&disk_key, ins_key);
-				btrfs_set_header_nritems(right, 0);
-				wret = insert_ptr(trans, root, path,
-						  &disk_key, right->start,
-						  path->slots[1] + 1, 1);
+			btrfs_tree_unlock(path->nodes[0]);
+			free_extent_buffer(path->nodes[0]);
+			path->nodes[0] = right;
+			path->slots[0] = 0;
+			path->slots[1] += 1;
+		} else {
+			btrfs_set_header_nritems(right, 0);
+			wret = insert_ptr(trans, root, path,
+					  &disk_key,
+					  right->start,
+					  path->slots[1], 1);
+			if (wret)
+				ret = wret;
+			btrfs_tree_unlock(path->nodes[0]);
+			free_extent_buffer(path->nodes[0]);
+			path->nodes[0] = right;
+			path->slots[0] = 0;
+			if (path->slots[1] == 0) {
+				wret = fixup_low_keys(trans, root,
+						path, &disk_key, 1);
 				if (wret)
 					ret = wret;
-
-				btrfs_tree_unlock(path->nodes[0]);
-				free_extent_buffer(path->nodes[0]);
-				path->nodes[0] = right;
-				path->slots[0] = 0;
-				path->slots[1] += 1;
-				btrfs_mark_buffer_dirty(right);
-				return ret;
-			}
-			mid = slot;
-			if (mid != nritems &&
-			    leaf_space_used(l, mid, nritems - mid) +
-			    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
-				double_split = 1;
 			}
 		}
-	} else {
-		if (leaf_space_used(l, 0, mid) + data_size >
-			BTRFS_LEAF_DATA_SIZE(root)) {
-			if (!extend && data_size && slot == 0) {
-				struct btrfs_disk_key disk_key;
-
-				btrfs_cpu_key_to_disk(&disk_key, ins_key);
-				btrfs_set_header_nritems(right, 0);
-				wret = insert_ptr(trans, root, path,
-						  &disk_key,
-						  right->start,
-						  path->slots[1], 1);
-				if (wret)
-					ret = wret;
-				btrfs_tree_unlock(path->nodes[0]);
-				free_extent_buffer(path->nodes[0]);
-				path->nodes[0] = right;
-				path->slots[0] = 0;
-				if (path->slots[1] == 0) {
-					wret = fixup_low_keys(trans, root,
-						      path, &disk_key, 1);
-					if (wret)
-						ret = wret;
-				}
-				btrfs_mark_buffer_dirty(right);
-				return ret;
-			} else if ((extend || !data_size) && slot == 0) {
-				mid = 1;
-			} else {
-				mid = slot;
-				if (mid != nritems &&
-				    leaf_space_used(l, mid, nritems - mid) +
-				    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
-					double_split = 1;
-				}
-			}
-		}
+		btrfs_mark_buffer_dirty(right);
+		return ret;
 	}
 
 	ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
 	BUG_ON(ret);
 
-	if (double_split) {
+	if (split == 2) {
 		BUG_ON(num_doubles != 0);
 		num_doubles++;
 		goto again;
@@ -3447,7 +3398,7 @@
 		/* figure out how many keys we can insert in here */
 		total_data = data_size[0];
 		for (i = 1; i < nr; i++) {
-			if (comp_cpu_keys(&found_key, cpu_key + i) <= 0)
+			if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0)
 				break;
 			total_data += data_size[i];
 		}
@@ -3745,9 +3696,7 @@
 
 /*
  * a helper function to delete the leaf pointed to by path->slots[1] and
- * path->nodes[1].  bytenr is the node block pointer, but since the callers
- * already know it, it is faster to have them pass it down than to
- * read it out of the node again.
+ * path->nodes[1].
  *
  * This deletes the pointer in path->nodes[1] and frees the leaf
  * block extent.  zero is returned if it all worked out, < 0 otherwise.
@@ -3755,15 +3704,14 @@
  * The path must have already been setup for deleting the leaf, including
  * all the proper balancing.  path->nodes[1] must be locked.
  */
-noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root,
-			    struct btrfs_path *path, u64 bytenr)
+static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   struct btrfs_path *path,
+				   struct extent_buffer *leaf)
 {
 	int ret;
-	u64 root_gen = btrfs_header_generation(path->nodes[1]);
-	u64 parent_start = path->nodes[1]->start;
-	u64 parent_owner = btrfs_header_owner(path->nodes[1]);
 
+	WARN_ON(btrfs_header_generation(leaf) != trans->transid);
 	ret = del_ptr(trans, root, path, 1, path->slots[1]);
 	if (ret)
 		return ret;
@@ -3774,10 +3722,8 @@
 	 */
 	btrfs_unlock_up_safe(path, 0);
 
-	ret = btrfs_free_extent(trans, root, bytenr,
-				btrfs_level_size(root, 0),
-				parent_start, parent_owner,
-				root_gen, 0, 1);
+	ret = btrfs_free_extent(trans, root, leaf->start, leaf->len,
+				0, root->root_key.objectid, 0, 0);
 	return ret;
 }
 /*
@@ -3845,7 +3791,7 @@
 		if (leaf == root->node) {
 			btrfs_set_header_level(leaf, 0);
 		} else {
-			ret = btrfs_del_leaf(trans, root, path, leaf->start);
+			ret = btrfs_del_leaf(trans, root, path, leaf);
 			BUG_ON(ret);
 		}
 	} else {
@@ -3861,8 +3807,7 @@
 		}
 
 		/* delete the leaf if it is mostly empty */
-		if (used < BTRFS_LEAF_DATA_SIZE(root) / 4 &&
-		    !trans->transaction->delayed_refs.flushing) {
+		if (used < BTRFS_LEAF_DATA_SIZE(root) / 2) {
 			/* push_leaf_left fixes the path.
 			 * make sure the path still points to our leaf
 			 * for possible call to del_ptr below
@@ -3884,8 +3829,7 @@
 
 			if (btrfs_header_nritems(leaf) == 0) {
 				path->slots[1] = slot;
-				ret = btrfs_del_leaf(trans, root, path,
-						     leaf->start);
+				ret = btrfs_del_leaf(trans, root, path, leaf);
 				BUG_ON(ret);
 				free_extent_buffer(leaf);
 			} else {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4414a5d..03441a9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -45,6 +45,8 @@
 
 #define BTRFS_MAX_LEVEL 8
 
+#define BTRFS_COMPAT_EXTENT_TREE_V0
+
 /*
  * files bigger than this get some pre-flushing when they are added
  * to the ordered operations list.  That way we limit the total
@@ -267,7 +269,18 @@
 }
 
 #define BTRFS_FSID_SIZE 16
-#define BTRFS_HEADER_FLAG_WRITTEN (1 << 0)
+#define BTRFS_HEADER_FLAG_WRITTEN	(1ULL << 0)
+#define BTRFS_HEADER_FLAG_RELOC		(1ULL << 1)
+#define BTRFS_SUPER_FLAG_SEEDING	(1ULL << 32)
+#define BTRFS_SUPER_FLAG_METADUMP	(1ULL << 33)
+
+#define BTRFS_BACKREF_REV_MAX		256
+#define BTRFS_BACKREF_REV_SHIFT		56
+#define BTRFS_BACKREF_REV_MASK		(((u64)BTRFS_BACKREF_REV_MAX - 1) << \
+					 BTRFS_BACKREF_REV_SHIFT)
+
+#define BTRFS_OLD_BACKREF_REV		0
+#define BTRFS_MIXED_BACKREF_REV		1
 
 /*
  * every tree block (leaf or node) starts with this header.
@@ -296,7 +309,6 @@
 					sizeof(struct btrfs_item) - \
 					sizeof(struct btrfs_file_extent_item))
 
-#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
 
 /*
  * this is a very generous portion of the super block, giving us
@@ -355,9 +367,12 @@
  * Compat flags that we support.  If any incompat flags are set other than the
  * ones specified below then we will fail to mount
  */
-#define BTRFS_FEATURE_COMPAT_SUPP	0x0
-#define BTRFS_FEATURE_COMPAT_RO_SUPP	0x0
-#define BTRFS_FEATURE_INCOMPAT_SUPP	0x0
+#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF	(1ULL << 0)
+
+#define BTRFS_FEATURE_COMPAT_SUPP		0ULL
+#define BTRFS_FEATURE_COMPAT_RO_SUPP		0ULL
+#define BTRFS_FEATURE_INCOMPAT_SUPP		\
+	BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF
 
 /*
  * A leaf is full of items. offset and size tell us where to find
@@ -421,23 +436,65 @@
 	unsigned int keep_locks:1;
 	unsigned int skip_locking:1;
 	unsigned int leave_spinning:1;
+	unsigned int search_commit_root:1;
 };
 
 /*
  * items in the extent btree are used to record the objectid of the
  * owner of the block and the number of references
  */
+
 struct btrfs_extent_item {
+	__le64 refs;
+	__le64 generation;
+	__le64 flags;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_item_v0 {
 	__le32 refs;
 } __attribute__ ((__packed__));
 
-struct btrfs_extent_ref {
+#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r) >> 4) - \
+					sizeof(struct btrfs_item))
+
+#define BTRFS_EXTENT_FLAG_DATA		(1ULL << 0)
+#define BTRFS_EXTENT_FLAG_TREE_BLOCK	(1ULL << 1)
+
+/* following flags only apply to tree blocks */
+
+/* use full backrefs for extent pointers in the block */
+#define BTRFS_BLOCK_FLAG_FULL_BACKREF	(1ULL << 8)
+
+struct btrfs_tree_block_info {
+	struct btrfs_disk_key key;
+	u8 level;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_data_ref {
+	__le64 root;
+	__le64 objectid;
+	__le64 offset;
+	__le32 count;
+} __attribute__ ((__packed__));
+
+struct btrfs_shared_data_ref {
+	__le32 count;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_inline_ref {
+	u8 type;
+	u64 offset;
+} __attribute__ ((__packed__));
+
+/* old style backrefs item */
+struct btrfs_extent_ref_v0 {
 	__le64 root;
 	__le64 generation;
 	__le64 objectid;
-	__le32 num_refs;
+	__le32 count;
 } __attribute__ ((__packed__));
 
+
 /* dev extents record free space on individual devices.  The owner
  * field points back to the chunk allocation mapping tree that allocated
  * the extent.  The chunk tree uuid field is a way to double check the owner
@@ -695,12 +752,7 @@
 	struct list_head cluster_list;
 };
 
-struct btrfs_leaf_ref_tree {
-	struct rb_root root;
-	struct list_head list;
-	spinlock_t lock;
-};
-
+struct reloc_control;
 struct btrfs_device;
 struct btrfs_fs_devices;
 struct btrfs_fs_info {
@@ -831,18 +883,11 @@
 	struct task_struct *cleaner_kthread;
 	int thread_pool_size;
 
-	/* tree relocation relocated fields */
-	struct list_head dead_reloc_roots;
-	struct btrfs_leaf_ref_tree reloc_ref_tree;
-	struct btrfs_leaf_ref_tree shared_ref_tree;
-
 	struct kobject super_kobj;
 	struct completion kobj_unregister;
 	int do_barriers;
 	int closing;
 	int log_root_recovering;
-	atomic_t throttles;
-	atomic_t throttle_gen;
 
 	u64 total_pinned;
 
@@ -861,6 +906,8 @@
 	 */
 	struct list_head space_info;
 
+	struct reloc_control *reloc_ctl;
+
 	spinlock_t delalloc_lock;
 	spinlock_t new_trans_lock;
 	u64 delalloc_bytes;
@@ -891,7 +938,6 @@
  * in ram representation of the tree.  extent_root is used for all allocations
  * and for the extent tree extent_root root.
  */
-struct btrfs_dirty_root;
 struct btrfs_root {
 	struct extent_buffer *node;
 
@@ -899,9 +945,6 @@
 	spinlock_t node_lock;
 
 	struct extent_buffer *commit_root;
-	struct btrfs_leaf_ref_tree *ref_tree;
-	struct btrfs_leaf_ref_tree ref_tree_struct;
-	struct btrfs_dirty_root *dirty_root;
 	struct btrfs_root *log_root;
 	struct btrfs_root *reloc_root;
 
@@ -952,10 +995,15 @@
 	/* the dirty list is only used by non-reference counted roots */
 	struct list_head dirty_list;
 
+	struct list_head root_list;
+
 	spinlock_t list_lock;
-	struct list_head dead_list;
 	struct list_head orphan_list;
 
+	spinlock_t inode_lock;
+	/* red-black tree that keeps track of in-memory inodes */
+	struct rb_root inode_tree;
+
 	/*
 	 * right now this just gets used so that a root has its own devid
 	 * for stat.  It may be used for more later
@@ -1017,7 +1065,16 @@
  * are used, and how many references there are to each block
  */
 #define BTRFS_EXTENT_ITEM_KEY	168
-#define BTRFS_EXTENT_REF_KEY	180
+
+#define BTRFS_TREE_BLOCK_REF_KEY	176
+
+#define BTRFS_EXTENT_DATA_REF_KEY	178
+
+#define BTRFS_EXTENT_REF_V0_KEY		180
+
+#define BTRFS_SHARED_BLOCK_REF_KEY	182
+
+#define BTRFS_SHARED_DATA_REF_KEY	184
 
 /*
  * block groups give us hints into the extent allocation trees.  Which
@@ -1043,6 +1100,8 @@
 #define BTRFS_MOUNT_COMPRESS		(1 << 5)
 #define BTRFS_MOUNT_NOTREELOG           (1 << 6)
 #define BTRFS_MOUNT_FLUSHONCOMMIT       (1 << 7)
+#define BTRFS_MOUNT_SSD_SPREAD		(1 << 8)
+#define BTRFS_MOUNT_NOSSD		(1 << 9)
 
 #define btrfs_clear_opt(o, opt)		((o) &= ~BTRFS_MOUNT_##opt)
 #define btrfs_set_opt(o, opt)		((o) |= BTRFS_MOUNT_##opt)
@@ -1056,12 +1115,14 @@
 #define BTRFS_INODE_READONLY		(1 << 2)
 #define BTRFS_INODE_NOCOMPRESS		(1 << 3)
 #define BTRFS_INODE_PREALLOC		(1 << 4)
-#define btrfs_clear_flag(inode, flag)	(BTRFS_I(inode)->flags &= \
-					 ~BTRFS_INODE_##flag)
-#define btrfs_set_flag(inode, flag)	(BTRFS_I(inode)->flags |= \
-					 BTRFS_INODE_##flag)
-#define btrfs_test_flag(inode, flag)	(BTRFS_I(inode)->flags & \
-					 BTRFS_INODE_##flag)
+#define BTRFS_INODE_SYNC		(1 << 5)
+#define BTRFS_INODE_IMMUTABLE		(1 << 6)
+#define BTRFS_INODE_APPEND		(1 << 7)
+#define BTRFS_INODE_NODUMP		(1 << 8)
+#define BTRFS_INODE_NOATIME		(1 << 9)
+#define BTRFS_INODE_DIRSYNC		(1 << 10)
+
+
 /* some macros to generate set/get funcs for the struct fields.  This
  * assumes there is a lefoo_to_cpu for every type, so lets make a simple
  * one for u8:
@@ -1317,24 +1378,67 @@
 	return (u8 *)((unsigned long)dev + ptr);
 }
 
-/* struct btrfs_extent_ref */
-BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64);
-BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64);
-BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64);
-BTRFS_SETGET_FUNCS(ref_num_refs, struct btrfs_extent_ref, num_refs, 32);
+BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64);
+BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item,
+		   generation, 64);
+BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64);
 
-BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref,
-			 generation, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref,
-			 objectid, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_ref_num_refs, struct btrfs_extent_ref,
-			 num_refs, 32);
+BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
 
-/* struct btrfs_extent_item */
-BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item,
-			 refs, 32);
+
+BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
+
+static inline void btrfs_tree_block_key(struct extent_buffer *eb,
+					struct btrfs_tree_block_info *item,
+					struct btrfs_disk_key *key)
+{
+	read_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
+}
+
+static inline void btrfs_set_tree_block_key(struct extent_buffer *eb,
+					    struct btrfs_tree_block_info *item,
+					    struct btrfs_disk_key *key)
+{
+	write_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
+}
+
+BTRFS_SETGET_FUNCS(extent_data_ref_root, struct btrfs_extent_data_ref,
+		   root, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_objectid, struct btrfs_extent_data_ref,
+		   objectid, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_offset, struct btrfs_extent_data_ref,
+		   offset, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref,
+		   count, 32);
+
+BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref,
+		   count, 32);
+
+BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref,
+		   type, 8);
+BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref,
+		   offset, 64);
+
+static inline u32 btrfs_extent_inline_ref_size(int type)
+{
+	if (type == BTRFS_TREE_BLOCK_REF_KEY ||
+	    type == BTRFS_SHARED_BLOCK_REF_KEY)
+		return sizeof(struct btrfs_extent_inline_ref);
+	if (type == BTRFS_SHARED_DATA_REF_KEY)
+		return sizeof(struct btrfs_shared_data_ref) +
+		       sizeof(struct btrfs_extent_inline_ref);
+	if (type == BTRFS_EXTENT_DATA_REF_KEY)
+		return sizeof(struct btrfs_extent_data_ref) +
+		       offsetof(struct btrfs_extent_inline_ref, offset);
+	BUG();
+	return 0;
+}
+
+BTRFS_SETGET_FUNCS(ref_root_v0, struct btrfs_extent_ref_v0, root, 64);
+BTRFS_SETGET_FUNCS(ref_generation_v0, struct btrfs_extent_ref_v0,
+		   generation, 64);
+BTRFS_SETGET_FUNCS(ref_objectid_v0, struct btrfs_extent_ref_v0, objectid, 64);
+BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32);
 
 /* struct btrfs_node */
 BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
@@ -1558,6 +1662,21 @@
 	return (flags & flag) == flag;
 }
 
+static inline int btrfs_header_backref_rev(struct extent_buffer *eb)
+{
+	u64 flags = btrfs_header_flags(eb);
+	return flags >> BTRFS_BACKREF_REV_SHIFT;
+}
+
+static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
+						int rev)
+{
+	u64 flags = btrfs_header_flags(eb);
+	flags &= ~BTRFS_BACKREF_REV_MASK;
+	flags |= (u64)rev << BTRFS_BACKREF_REV_SHIFT;
+	btrfs_set_header_flags(eb, flags);
+}
+
 static inline u8 *btrfs_header_fsid(struct extent_buffer *eb)
 {
 	unsigned long ptr = offsetof(struct btrfs_header, fsid);
@@ -1790,39 +1909,32 @@
 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root, struct extent_buffer *leaf);
 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
-			  struct btrfs_root *root, u64 objectid, u64 bytenr);
+			  struct btrfs_root *root,
+			  u64 objectid, u64 offset, u64 bytenr);
 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
 struct btrfs_block_group_cache *btrfs_lookup_block_group(
 						 struct btrfs_fs_info *info,
 						 u64 bytenr);
+void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 u64 btrfs_find_block_group(struct btrfs_root *root,
 			   u64 search_start, u64 search_hint, int owner);
 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
-					     struct btrfs_root *root,
-					     u32 blocksize, u64 parent,
-					     u64 root_objectid,
-					     u64 ref_generation,
-					     int level,
-					     u64 hint,
-					     u64 empty_size);
+					struct btrfs_root *root, u32 blocksize,
+					u64 parent, u64 root_objectid,
+					struct btrfs_disk_key *key, int level,
+					u64 hint, u64 empty_size);
 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
 					    struct btrfs_root *root,
 					    u64 bytenr, u32 blocksize,
 					    int level);
-int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
-		       struct btrfs_root *root,
-		       u64 num_bytes, u64 parent, u64 min_bytes,
-		       u64 root_objectid, u64 ref_generation,
-		       u64 owner, u64 empty_size, u64 hint_byte,
-		       u64 search_end, struct btrfs_key *ins, u64 data);
-int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
-				struct btrfs_root *root, u64 parent,
-				u64 root_objectid, u64 ref_generation,
-				u64 owner, struct btrfs_key *ins);
-int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
-				struct btrfs_root *root, u64 parent,
-				u64 root_objectid, u64 ref_generation,
-				u64 owner, struct btrfs_key *ins);
+int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
+				     struct btrfs_root *root,
+				     u64 root_objectid, u64 owner,
+				     u64 offset, struct btrfs_key *ins);
+int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   u64 root_objectid, u64 owner, u64 offset,
+				   struct btrfs_key *ins);
 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
 				  struct btrfs_root *root,
 				  u64 num_bytes, u64 min_alloc_size,
@@ -1830,18 +1942,18 @@
 				  u64 search_end, struct btrfs_key *ins,
 				  u64 data);
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		  struct extent_buffer *orig_buf, struct extent_buffer *buf,
-		  u32 *nr_extents);
-int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		    struct extent_buffer *buf, u32 nr_extents);
-int btrfs_update_ref(struct btrfs_trans_handle *trans,
-		     struct btrfs_root *root, struct extent_buffer *orig_buf,
-		     struct extent_buffer *buf, int start_slot, int nr);
+		  struct extent_buffer *buf, int full_backref);
+int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+		  struct extent_buffer *buf, int full_backref);
+int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				u64 bytenr, u64 num_bytes, u64 flags,
+				int is_data);
 int btrfs_free_extent(struct btrfs_trans_handle *trans,
 		      struct btrfs_root *root,
 		      u64 bytenr, u64 num_bytes, u64 parent,
-		      u64 root_objectid, u64 ref_generation,
-		      u64 owner_objectid, int pin);
+		      u64 root_objectid, u64 owner, u64 offset);
+
 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root,
@@ -1849,13 +1961,8 @@
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 			 struct btrfs_root *root,
 			 u64 bytenr, u64 num_bytes, u64 parent,
-			 u64 root_objectid, u64 ref_generation,
-			 u64 owner_objectid);
-int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root, u64 bytenr, u64 num_bytes,
-			    u64 orig_parent, u64 parent,
-			    u64 root_objectid, u64 ref_generation,
-			    u64 owner_objectid);
+			 u64 root_objectid, u64 owner, u64 offset);
+
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 				    struct btrfs_root *root);
 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
@@ -1867,16 +1974,9 @@
 			   u64 size);
 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root, u64 group_start);
-int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
-int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
-			  struct btrfs_root *root);
-int btrfs_drop_dead_reloc_roots(struct btrfs_root *root);
-int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
-			       struct btrfs_root *root,
-			       struct extent_buffer *buf, u64 orig_start);
-int btrfs_add_dead_reloc_root(struct btrfs_root *root);
-int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
-int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
+int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
+				struct btrfs_block_group_cache *group);
+
 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
 void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
@@ -1891,13 +1991,12 @@
 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
 			      u64 bytes);
 /* ctree.c */
+int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+		     int level, int *slot);
+int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
 int btrfs_previous_item(struct btrfs_root *root,
 			struct btrfs_path *path, u64 min_objectid,
 			int type);
-int btrfs_merge_path(struct btrfs_trans_handle *trans,
-		     struct btrfs_root *root,
-		     struct btrfs_key *node_keys,
-		     u64 *nodes, int lowest_level);
 int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
 			    struct btrfs_root *root, struct btrfs_path *path,
 			    struct btrfs_key *new_key);
@@ -1918,6 +2017,8 @@
 		      struct btrfs_root *root,
 		      struct extent_buffer *buf,
 		      struct extent_buffer **cow_ret, u64 new_root_objectid);
+int btrfs_block_can_be_shared(struct btrfs_root *root,
+			      struct extent_buffer *buf);
 int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
 		      *root, struct btrfs_path *path, u32 data_size);
 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
@@ -1944,9 +2045,6 @@
 
 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		   struct btrfs_path *path, int slot, int nr);
-int btrfs_del_leaf(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root,
-			    struct btrfs_path *path, u64 bytenr);
 static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *root,
 				 struct btrfs_path *path)
@@ -2005,8 +2103,9 @@
 			 btrfs_root_item *item, struct btrfs_key *key);
 int btrfs_search_root(struct btrfs_root *root, u64 search_start,
 		      u64 *found_objectid);
-int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid,
-			  struct btrfs_root *latest_root);
+int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
+int btrfs_set_root_node(struct btrfs_root_item *item,
+			struct extent_buffer *node);
 /* dir-item.c */
 int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root, const char *name,
@@ -2139,7 +2238,6 @@
 int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_delete_inode(struct inode *inode);
 void btrfs_put_inode(struct inode *inode);
-void btrfs_read_locked_inode(struct inode *inode);
 int btrfs_write_inode(struct inode *inode, int wait);
 void btrfs_dirty_inode(struct inode *inode);
 struct inode *btrfs_alloc_inode(struct super_block *sb);
@@ -2147,12 +2245,8 @@
 int btrfs_init_cachep(void);
 void btrfs_destroy_cachep(void);
 long btrfs_ioctl_trans_end(struct file *file);
-struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
-			    struct btrfs_root *root, int wait);
-struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
-				struct btrfs_root *root);
 struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
-			 struct btrfs_root *root, int *is_new);
+			 struct btrfs_root *root);
 int btrfs_commit_write(struct file *file, struct page *page,
 		       unsigned from, unsigned to);
 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
@@ -2168,6 +2262,8 @@
 
 /* ioctl.c */
 long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+void btrfs_update_iflags(struct inode *inode);
+void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
 
 /* file.c */
 int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync);
@@ -2205,8 +2301,20 @@
 int btrfs_sync_fs(struct super_block *sb, int wait);
 
 /* acl.c */
+#ifdef CONFIG_FS_POSIX_ACL
 int btrfs_check_acl(struct inode *inode, int mask);
+#else
+#define btrfs_check_acl NULL
+#endif
 int btrfs_init_acl(struct inode *inode, struct inode *dir);
 int btrfs_acl_chmod(struct inode *inode);
 
+/* relocation.c */
+int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
+int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *root);
+int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
+			    struct btrfs_root *root);
+int btrfs_recover_relocation(struct btrfs_root *root);
+int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
 #endif
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index d6c01c0..84e6781 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -29,27 +29,87 @@
  * add extents in the middle of btrfs_search_slot, and it allows
  * us to buffer up frequently modified backrefs in an rb tree instead
  * of hammering updates on the extent allocation tree.
- *
- * Right now this code is only used for reference counted trees, but
- * the long term goal is to get rid of the similar code for delayed
- * extent tree modifications.
  */
 
 /*
- * entries in the rb tree are ordered by the byte number of the extent
- * and by the byte number of the parent block.
+ * compare two delayed tree backrefs with same bytenr and type
  */
-static int comp_entry(struct btrfs_delayed_ref_node *ref,
-		      u64 bytenr, u64 parent)
+static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
+			  struct btrfs_delayed_tree_ref *ref1)
 {
-	if (bytenr < ref->bytenr)
+	if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
+		if (ref1->root < ref2->root)
+			return -1;
+		if (ref1->root > ref2->root)
+			return 1;
+	} else {
+		if (ref1->parent < ref2->parent)
+			return -1;
+		if (ref1->parent > ref2->parent)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * compare two delayed data backrefs with same bytenr and type
+ */
+static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
+			  struct btrfs_delayed_data_ref *ref1)
+{
+	if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
+		if (ref1->root < ref2->root)
+			return -1;
+		if (ref1->root > ref2->root)
+			return 1;
+		if (ref1->objectid < ref2->objectid)
+			return -1;
+		if (ref1->objectid > ref2->objectid)
+			return 1;
+		if (ref1->offset < ref2->offset)
+			return -1;
+		if (ref1->offset > ref2->offset)
+			return 1;
+	} else {
+		if (ref1->parent < ref2->parent)
+			return -1;
+		if (ref1->parent > ref2->parent)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * entries in the rb tree are ordered by the byte number of the extent,
+ * type of the delayed backrefs and content of delayed backrefs.
+ */
+static int comp_entry(struct btrfs_delayed_ref_node *ref2,
+		      struct btrfs_delayed_ref_node *ref1)
+{
+	if (ref1->bytenr < ref2->bytenr)
 		return -1;
-	if (bytenr > ref->bytenr)
+	if (ref1->bytenr > ref2->bytenr)
 		return 1;
-	if (parent < ref->parent)
+	if (ref1->is_head && ref2->is_head)
+		return 0;
+	if (ref2->is_head)
 		return -1;
-	if (parent > ref->parent)
+	if (ref1->is_head)
 		return 1;
+	if (ref1->type < ref2->type)
+		return -1;
+	if (ref1->type > ref2->type)
+		return 1;
+	if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
+	    ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
+		return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
+				      btrfs_delayed_node_to_tree_ref(ref1));
+	} else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
+		   ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
+		return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
+				      btrfs_delayed_node_to_data_ref(ref1));
+	}
+	BUG();
 	return 0;
 }
 
@@ -59,20 +119,21 @@
  * inserted.
  */
 static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
-						  u64 bytenr, u64 parent,
 						  struct rb_node *node)
 {
 	struct rb_node **p = &root->rb_node;
 	struct rb_node *parent_node = NULL;
 	struct btrfs_delayed_ref_node *entry;
+	struct btrfs_delayed_ref_node *ins;
 	int cmp;
 
+	ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
 	while (*p) {
 		parent_node = *p;
 		entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
 				 rb_node);
 
-		cmp = comp_entry(entry, bytenr, parent);
+		cmp = comp_entry(entry, ins);
 		if (cmp < 0)
 			p = &(*p)->rb_left;
 		else if (cmp > 0)
@@ -81,18 +142,17 @@
 			return entry;
 	}
 
-	entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
 	rb_link_node(node, parent_node, p);
 	rb_insert_color(node, root);
 	return NULL;
 }
 
 /*
- * find an entry based on (bytenr,parent).  This returns the delayed
- * ref if it was able to find one, or NULL if nothing was in that spot
+ * find an head entry based on bytenr. This returns the delayed ref
+ * head if it was able to find one, or NULL if nothing was in that spot
  */
-static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
-				  u64 bytenr, u64 parent,
+static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
+				  u64 bytenr,
 				  struct btrfs_delayed_ref_node **last)
 {
 	struct rb_node *n = root->rb_node;
@@ -105,7 +165,15 @@
 		if (last)
 			*last = entry;
 
-		cmp = comp_entry(entry, bytenr, parent);
+		if (bytenr < entry->bytenr)
+			cmp = -1;
+		else if (bytenr > entry->bytenr)
+			cmp = 1;
+		else if (!btrfs_delayed_ref_is_head(entry))
+			cmp = 1;
+		else
+			cmp = 0;
+
 		if (cmp < 0)
 			n = n->rb_left;
 		else if (cmp > 0)
@@ -154,7 +222,7 @@
 		node = rb_first(&delayed_refs->root);
 	} else {
 		ref = NULL;
-		tree_search(&delayed_refs->root, start, (u64)-1, &ref);
+		find_ref_head(&delayed_refs->root, start, &ref);
 		if (ref) {
 			struct btrfs_delayed_ref_node *tmp;
 
@@ -234,7 +302,7 @@
 	delayed_refs = &trans->transaction->delayed_refs;
 	spin_lock(&delayed_refs->lock);
 
-	ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
+	ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
 	if (ref) {
 		prev_node = rb_prev(&ref->rb_node);
 		if (!prev_node)
@@ -250,25 +318,28 @@
 }
 
 /*
- * helper function to lookup reference count
+ * helper function to lookup reference count and flags of extent.
  *
  * the head node for delayed ref is used to store the sum of all the
- * reference count modifications queued up in the rbtree.  This way you
- * can check to see what the reference count would be if all of the
- * delayed refs are processed.
+ * reference count modifications queued up in the rbtree. the head
+ * node may also store the extent flags to set. This way you can check
+ * to see what the reference count and extent flags would be if all of
+ * the delayed refs are not processed.
  */
-int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root, u64 bytenr,
-			    u64 num_bytes, u32 *refs)
+int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
+			     struct btrfs_root *root, u64 bytenr,
+			     u64 num_bytes, u64 *refs, u64 *flags)
 {
 	struct btrfs_delayed_ref_node *ref;
 	struct btrfs_delayed_ref_head *head;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct btrfs_path *path;
-	struct extent_buffer *leaf;
 	struct btrfs_extent_item *ei;
+	struct extent_buffer *leaf;
 	struct btrfs_key key;
-	u32 num_refs;
+	u32 item_size;
+	u64 num_refs;
+	u64 extent_flags;
 	int ret;
 
 	path = btrfs_alloc_path();
@@ -287,37 +358,60 @@
 
 	if (ret == 0) {
 		leaf = path->nodes[0];
-		ei = btrfs_item_ptr(leaf, path->slots[0],
-				    struct btrfs_extent_item);
-		num_refs = btrfs_extent_refs(leaf, ei);
+		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+		if (item_size >= sizeof(*ei)) {
+			ei = btrfs_item_ptr(leaf, path->slots[0],
+					    struct btrfs_extent_item);
+			num_refs = btrfs_extent_refs(leaf, ei);
+			extent_flags = btrfs_extent_flags(leaf, ei);
+		} else {
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+			struct btrfs_extent_item_v0 *ei0;
+			BUG_ON(item_size != sizeof(*ei0));
+			ei0 = btrfs_item_ptr(leaf, path->slots[0],
+					     struct btrfs_extent_item_v0);
+			num_refs = btrfs_extent_refs_v0(leaf, ei0);
+			/* FIXME: this isn't correct for data */
+			extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
+#else
+			BUG();
+#endif
+		}
+		BUG_ON(num_refs == 0);
 	} else {
 		num_refs = 0;
+		extent_flags = 0;
 		ret = 0;
 	}
 
 	spin_lock(&delayed_refs->lock);
-	ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
+	ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
 	if (ref) {
 		head = btrfs_delayed_node_to_head(ref);
-		if (mutex_trylock(&head->mutex)) {
-			num_refs += ref->ref_mod;
+		if (!mutex_trylock(&head->mutex)) {
+			atomic_inc(&ref->refs);
+			spin_unlock(&delayed_refs->lock);
+
+			btrfs_release_path(root->fs_info->extent_root, path);
+
+			mutex_lock(&head->mutex);
 			mutex_unlock(&head->mutex);
-			*refs = num_refs;
-			goto out;
+			btrfs_put_delayed_ref(ref);
+			goto again;
 		}
+		if (head->extent_op && head->extent_op->update_flags)
+			extent_flags |= head->extent_op->flags_to_set;
+		else
+			BUG_ON(num_refs == 0);
 
-		atomic_inc(&ref->refs);
-		spin_unlock(&delayed_refs->lock);
-
-		btrfs_release_path(root->fs_info->extent_root, path);
-
-		mutex_lock(&head->mutex);
+		num_refs += ref->ref_mod;
 		mutex_unlock(&head->mutex);
-		btrfs_put_delayed_ref(ref);
-		goto again;
-	} else {
-		*refs = num_refs;
 	}
+	WARN_ON(num_refs == 0);
+	if (refs)
+		*refs = num_refs;
+	if (flags)
+		*flags = extent_flags;
 out:
 	spin_unlock(&delayed_refs->lock);
 	btrfs_free_path(path);
@@ -338,16 +432,7 @@
 		    struct btrfs_delayed_ref_node *existing,
 		    struct btrfs_delayed_ref_node *update)
 {
-	struct btrfs_delayed_ref *existing_ref;
-	struct btrfs_delayed_ref *ref;
-
-	existing_ref = btrfs_delayed_node_to_ref(existing);
-	ref = btrfs_delayed_node_to_ref(update);
-
-	if (ref->pin)
-		existing_ref->pin = 1;
-
-	if (ref->action != existing_ref->action) {
+	if (update->action != existing->action) {
 		/*
 		 * this is effectively undoing either an add or a
 		 * drop.  We decrement the ref_mod, and if it goes
@@ -363,20 +448,13 @@
 			delayed_refs->num_entries--;
 			if (trans->delayed_ref_updates)
 				trans->delayed_ref_updates--;
+		} else {
+			WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
+				existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
 		}
 	} else {
-		if (existing_ref->action == BTRFS_ADD_DELAYED_REF) {
-			/* if we're adding refs, make sure all the
-			 * details match up.  The extent could
-			 * have been totally freed and reallocated
-			 * by a different owner before the delayed
-			 * ref entries were removed.
-			 */
-			existing_ref->owner_objectid = ref->owner_objectid;
-			existing_ref->generation = ref->generation;
-			existing_ref->root = ref->root;
-			existing->num_bytes = update->num_bytes;
-		}
+		WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
+			existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
 		/*
 		 * the action on the existing ref matches
 		 * the action on the ref we're trying to add.
@@ -401,6 +479,7 @@
 
 	existing_ref = btrfs_delayed_node_to_head(existing);
 	ref = btrfs_delayed_node_to_head(update);
+	BUG_ON(existing_ref->is_data != ref->is_data);
 
 	if (ref->must_insert_reserved) {
 		/* if the extent was freed and then
@@ -420,6 +499,24 @@
 
 	}
 
+	if (ref->extent_op) {
+		if (!existing_ref->extent_op) {
+			existing_ref->extent_op = ref->extent_op;
+		} else {
+			if (ref->extent_op->update_key) {
+				memcpy(&existing_ref->extent_op->key,
+				       &ref->extent_op->key,
+				       sizeof(ref->extent_op->key));
+				existing_ref->extent_op->update_key = 1;
+			}
+			if (ref->extent_op->update_flags) {
+				existing_ref->extent_op->flags_to_set |=
+					ref->extent_op->flags_to_set;
+				existing_ref->extent_op->update_flags = 1;
+			}
+			kfree(ref->extent_op);
+		}
+	}
 	/*
 	 * update the reference mod on the head to reflect this new operation
 	 */
@@ -427,19 +524,16 @@
 }
 
 /*
- * helper function to actually insert a delayed ref into the rbtree.
+ * helper function to actually insert a head node into the rbtree.
  * this does all the dirty work in terms of maintaining the correct
- * overall modification count in the head node and properly dealing
- * with updating existing nodes as new modifications are queued.
+ * overall modification count.
  */
-static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
-			  struct btrfs_delayed_ref_node *ref,
-			  u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
-			  u64 ref_generation, u64 owner_objectid, int action,
-			  int pin)
+static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
+					struct btrfs_delayed_ref_node *ref,
+					u64 bytenr, u64 num_bytes,
+					int action, int is_data)
 {
 	struct btrfs_delayed_ref_node *existing;
-	struct btrfs_delayed_ref *full_ref;
 	struct btrfs_delayed_ref_head *head_ref = NULL;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	int count_mod = 1;
@@ -449,12 +543,10 @@
 	 * the head node stores the sum of all the mods, so dropping a ref
 	 * should drop the sum in the head node by one.
 	 */
-	if (parent == (u64)-1) {
-		if (action == BTRFS_DROP_DELAYED_REF)
-			count_mod = -1;
-		else if (action == BTRFS_UPDATE_DELAYED_HEAD)
-			count_mod = 0;
-	}
+	if (action == BTRFS_UPDATE_DELAYED_HEAD)
+		count_mod = 0;
+	else if (action == BTRFS_DROP_DELAYED_REF)
+		count_mod = -1;
 
 	/*
 	 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
@@ -467,57 +559,42 @@
 	 * Once we record must_insert_reserved, switch the action to
 	 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
 	 */
-	if (action == BTRFS_ADD_DELAYED_EXTENT) {
+	if (action == BTRFS_ADD_DELAYED_EXTENT)
 		must_insert_reserved = 1;
-		action = BTRFS_ADD_DELAYED_REF;
-	} else {
+	else
 		must_insert_reserved = 0;
-	}
-
 
 	delayed_refs = &trans->transaction->delayed_refs;
 
 	/* first set the basic ref node struct up */
 	atomic_set(&ref->refs, 1);
 	ref->bytenr = bytenr;
-	ref->parent = parent;
-	ref->ref_mod = count_mod;
-	ref->in_tree = 1;
 	ref->num_bytes = num_bytes;
+	ref->ref_mod = count_mod;
+	ref->type  = 0;
+	ref->action  = 0;
+	ref->is_head = 1;
+	ref->in_tree = 1;
 
-	if (btrfs_delayed_ref_is_head(ref)) {
-		head_ref = btrfs_delayed_node_to_head(ref);
-		head_ref->must_insert_reserved = must_insert_reserved;
-		INIT_LIST_HEAD(&head_ref->cluster);
-		mutex_init(&head_ref->mutex);
-	} else {
-		full_ref = btrfs_delayed_node_to_ref(ref);
-		full_ref->root = ref_root;
-		full_ref->generation = ref_generation;
-		full_ref->owner_objectid = owner_objectid;
-		full_ref->pin = pin;
-		full_ref->action = action;
-	}
+	head_ref = btrfs_delayed_node_to_head(ref);
+	head_ref->must_insert_reserved = must_insert_reserved;
+	head_ref->is_data = is_data;
 
-	existing = tree_insert(&delayed_refs->root, bytenr,
-			       parent, &ref->rb_node);
+	INIT_LIST_HEAD(&head_ref->cluster);
+	mutex_init(&head_ref->mutex);
+
+	existing = tree_insert(&delayed_refs->root, &ref->rb_node);
 
 	if (existing) {
-		if (btrfs_delayed_ref_is_head(ref))
-			update_existing_head_ref(existing, ref);
-		else
-			update_existing_ref(trans, delayed_refs, existing, ref);
-
+		update_existing_head_ref(existing, ref);
 		/*
 		 * we've updated the existing ref, free the newly
 		 * allocated ref
 		 */
 		kfree(ref);
 	} else {
-		if (btrfs_delayed_ref_is_head(ref)) {
-			delayed_refs->num_heads++;
-			delayed_refs->num_heads_ready++;
-		}
+		delayed_refs->num_heads++;
+		delayed_refs->num_heads_ready++;
 		delayed_refs->num_entries++;
 		trans->delayed_ref_updates++;
 	}
@@ -525,37 +602,139 @@
 }
 
 /*
- * add a delayed ref to the tree.  This does all of the accounting required
+ * helper to insert a delayed tree ref into the rbtree.
+ */
+static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
+					 struct btrfs_delayed_ref_node *ref,
+					 u64 bytenr, u64 num_bytes, u64 parent,
+					 u64 ref_root, int level, int action)
+{
+	struct btrfs_delayed_ref_node *existing;
+	struct btrfs_delayed_tree_ref *full_ref;
+	struct btrfs_delayed_ref_root *delayed_refs;
+
+	if (action == BTRFS_ADD_DELAYED_EXTENT)
+		action = BTRFS_ADD_DELAYED_REF;
+
+	delayed_refs = &trans->transaction->delayed_refs;
+
+	/* first set the basic ref node struct up */
+	atomic_set(&ref->refs, 1);
+	ref->bytenr = bytenr;
+	ref->num_bytes = num_bytes;
+	ref->ref_mod = 1;
+	ref->action = action;
+	ref->is_head = 0;
+	ref->in_tree = 1;
+
+	full_ref = btrfs_delayed_node_to_tree_ref(ref);
+	if (parent) {
+		full_ref->parent = parent;
+		ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
+	} else {
+		full_ref->root = ref_root;
+		ref->type = BTRFS_TREE_BLOCK_REF_KEY;
+	}
+	full_ref->level = level;
+
+	existing = tree_insert(&delayed_refs->root, &ref->rb_node);
+
+	if (existing) {
+		update_existing_ref(trans, delayed_refs, existing, ref);
+		/*
+		 * we've updated the existing ref, free the newly
+		 * allocated ref
+		 */
+		kfree(ref);
+	} else {
+		delayed_refs->num_entries++;
+		trans->delayed_ref_updates++;
+	}
+	return 0;
+}
+
+/*
+ * helper to insert a delayed data ref into the rbtree.
+ */
+static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
+					 struct btrfs_delayed_ref_node *ref,
+					 u64 bytenr, u64 num_bytes, u64 parent,
+					 u64 ref_root, u64 owner, u64 offset,
+					 int action)
+{
+	struct btrfs_delayed_ref_node *existing;
+	struct btrfs_delayed_data_ref *full_ref;
+	struct btrfs_delayed_ref_root *delayed_refs;
+
+	if (action == BTRFS_ADD_DELAYED_EXTENT)
+		action = BTRFS_ADD_DELAYED_REF;
+
+	delayed_refs = &trans->transaction->delayed_refs;
+
+	/* first set the basic ref node struct up */
+	atomic_set(&ref->refs, 1);
+	ref->bytenr = bytenr;
+	ref->num_bytes = num_bytes;
+	ref->ref_mod = 1;
+	ref->action = action;
+	ref->is_head = 0;
+	ref->in_tree = 1;
+
+	full_ref = btrfs_delayed_node_to_data_ref(ref);
+	if (parent) {
+		full_ref->parent = parent;
+		ref->type = BTRFS_SHARED_DATA_REF_KEY;
+	} else {
+		full_ref->root = ref_root;
+		ref->type = BTRFS_EXTENT_DATA_REF_KEY;
+	}
+	full_ref->objectid = owner;
+	full_ref->offset = offset;
+
+	existing = tree_insert(&delayed_refs->root, &ref->rb_node);
+
+	if (existing) {
+		update_existing_ref(trans, delayed_refs, existing, ref);
+		/*
+		 * we've updated the existing ref, free the newly
+		 * allocated ref
+		 */
+		kfree(ref);
+	} else {
+		delayed_refs->num_entries++;
+		trans->delayed_ref_updates++;
+	}
+	return 0;
+}
+
+/*
+ * add a delayed tree ref.  This does all of the accounting required
  * to make sure the delayed ref is eventually processed before this
  * transaction commits.
  */
-int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
-			  u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
-			  u64 ref_generation, u64 owner_objectid, int action,
-			  int pin)
+int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
+			       u64 bytenr, u64 num_bytes, u64 parent,
+			       u64 ref_root,  int level, int action,
+			       struct btrfs_delayed_extent_op *extent_op)
 {
-	struct btrfs_delayed_ref *ref;
+	struct btrfs_delayed_tree_ref *ref;
 	struct btrfs_delayed_ref_head *head_ref;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	int ret;
 
+	BUG_ON(extent_op && extent_op->is_data);
 	ref = kmalloc(sizeof(*ref), GFP_NOFS);
 	if (!ref)
 		return -ENOMEM;
 
-	/*
-	 * the parent = 0 case comes from cases where we don't actually
-	 * know the parent yet.  It will get updated later via a add/drop
-	 * pair.
-	 */
-	if (parent == 0)
-		parent = bytenr;
-
 	head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
 	if (!head_ref) {
 		kfree(ref);
 		return -ENOMEM;
 	}
+
+	head_ref->extent_op = extent_op;
+
 	delayed_refs = &trans->transaction->delayed_refs;
 	spin_lock(&delayed_refs->lock);
 
@@ -563,19 +742,89 @@
 	 * insert both the head node and the new ref without dropping
 	 * the spin lock
 	 */
-	ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
-				      (u64)-1, 0, 0, 0, action, pin);
+	ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
+				   action, 0);
 	BUG_ON(ret);
 
-	ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
-				      parent, ref_root, ref_generation,
-				      owner_objectid, action, pin);
+	ret = add_delayed_tree_ref(trans, &ref->node, bytenr, num_bytes,
+				   parent, ref_root, level, action);
 	BUG_ON(ret);
 	spin_unlock(&delayed_refs->lock);
 	return 0;
 }
 
 /*
+ * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
+ */
+int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
+			       u64 bytenr, u64 num_bytes,
+			       u64 parent, u64 ref_root,
+			       u64 owner, u64 offset, int action,
+			       struct btrfs_delayed_extent_op *extent_op)
+{
+	struct btrfs_delayed_data_ref *ref;
+	struct btrfs_delayed_ref_head *head_ref;
+	struct btrfs_delayed_ref_root *delayed_refs;
+	int ret;
+
+	BUG_ON(extent_op && !extent_op->is_data);
+	ref = kmalloc(sizeof(*ref), GFP_NOFS);
+	if (!ref)
+		return -ENOMEM;
+
+	head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
+	if (!head_ref) {
+		kfree(ref);
+		return -ENOMEM;
+	}
+
+	head_ref->extent_op = extent_op;
+
+	delayed_refs = &trans->transaction->delayed_refs;
+	spin_lock(&delayed_refs->lock);
+
+	/*
+	 * insert both the head node and the new ref without dropping
+	 * the spin lock
+	 */
+	ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
+				   action, 1);
+	BUG_ON(ret);
+
+	ret = add_delayed_data_ref(trans, &ref->node, bytenr, num_bytes,
+				   parent, ref_root, owner, offset, action);
+	BUG_ON(ret);
+	spin_unlock(&delayed_refs->lock);
+	return 0;
+}
+
+int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
+				u64 bytenr, u64 num_bytes,
+				struct btrfs_delayed_extent_op *extent_op)
+{
+	struct btrfs_delayed_ref_head *head_ref;
+	struct btrfs_delayed_ref_root *delayed_refs;
+	int ret;
+
+	head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
+	if (!head_ref)
+		return -ENOMEM;
+
+	head_ref->extent_op = extent_op;
+
+	delayed_refs = &trans->transaction->delayed_refs;
+	spin_lock(&delayed_refs->lock);
+
+	ret = add_delayed_ref_head(trans, &head_ref->node, bytenr,
+				   num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
+				   extent_op->is_data);
+	BUG_ON(ret);
+
+	spin_unlock(&delayed_refs->lock);
+	return 0;
+}
+
+/*
  * this does a simple search for the head node for a given extent.
  * It must be called with the delayed ref spinlock held, and it returns
  * the head node if any where found, or NULL if not.
@@ -587,7 +836,7 @@
 	struct btrfs_delayed_ref_root *delayed_refs;
 
 	delayed_refs = &trans->transaction->delayed_refs;
-	ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
+	ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
 	if (ref)
 		return btrfs_delayed_node_to_head(ref);
 	return NULL;
@@ -603,6 +852,7 @@
  *
  * It is the same as doing a ref add and delete in two separate calls.
  */
+#if 0
 int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
 			  u64 bytenr, u64 num_bytes, u64 orig_parent,
 			  u64 parent, u64 orig_ref_root, u64 ref_root,
@@ -666,3 +916,4 @@
 	spin_unlock(&delayed_refs->lock);
 	return 0;
 }
+#endif
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 3bec2ff..f6fc67d 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -30,9 +30,6 @@
 	/* the starting bytenr of the extent */
 	u64 bytenr;
 
-	/* the parent our backref will point to */
-	u64 parent;
-
 	/* the size of the extent */
 	u64 num_bytes;
 
@@ -50,10 +47,21 @@
 	 */
 	int ref_mod;
 
+	unsigned int action:8;
+	unsigned int type:8;
 	/* is this node still in the rbtree? */
+	unsigned int is_head:1;
 	unsigned int in_tree:1;
 };
 
+struct btrfs_delayed_extent_op {
+	struct btrfs_disk_key key;
+	u64 flags_to_set;
+	unsigned int update_key:1;
+	unsigned int update_flags:1;
+	unsigned int is_data:1;
+};
+
 /*
  * the head refs are used to hold a lock on a given extent, which allows us
  * to make sure that only one process is running the delayed refs
@@ -71,6 +79,7 @@
 
 	struct list_head cluster;
 
+	struct btrfs_delayed_extent_op *extent_op;
 	/*
 	 * when a new extent is allocated, it is just reserved in memory
 	 * The actual extent isn't inserted into the extent allocation tree
@@ -84,27 +93,26 @@
 	 * the free has happened.
 	 */
 	unsigned int must_insert_reserved:1;
+	unsigned int is_data:1;
 };
 
-struct btrfs_delayed_ref {
+struct btrfs_delayed_tree_ref {
 	struct btrfs_delayed_ref_node node;
+	union {
+		u64 root;
+		u64 parent;
+	};
+	int level;
+};
 
-	/* the root objectid our ref will point to */
-	u64 root;
-
-	/* the generation for the backref */
-	u64 generation;
-
-	/* owner_objectid of the backref  */
-	u64 owner_objectid;
-
-	/* operation done by this entry in the rbtree */
-	u8 action;
-
-	/* if pin == 1, when the extent is freed it will be pinned until
-	 * transaction commit
-	 */
-	unsigned int pin:1;
+struct btrfs_delayed_data_ref {
+	struct btrfs_delayed_ref_node node;
+	union {
+		u64 root;
+		u64 parent;
+	};
+	u64 objectid;
+	u64 offset;
 };
 
 struct btrfs_delayed_ref_root {
@@ -143,17 +151,25 @@
 	}
 }
 
-int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
-			  u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
-			  u64 ref_generation, u64 owner_objectid, int action,
-			  int pin);
+int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
+			       u64 bytenr, u64 num_bytes, u64 parent,
+			       u64 ref_root, int level, int action,
+			       struct btrfs_delayed_extent_op *extent_op);
+int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
+			       u64 bytenr, u64 num_bytes,
+			       u64 parent, u64 ref_root,
+			       u64 owner, u64 offset, int action,
+			       struct btrfs_delayed_extent_op *extent_op);
+int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
+				u64 bytenr, u64 num_bytes,
+				struct btrfs_delayed_extent_op *extent_op);
 
 struct btrfs_delayed_ref_head *
 btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
 int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
-int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root, u64 bytenr,
-			    u64 num_bytes, u32 *refs);
+int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
+			     struct btrfs_root *root, u64 bytenr,
+			     u64 num_bytes, u64 *refs, u64 *flags);
 int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
 			  u64 bytenr, u64 num_bytes, u64 orig_parent,
 			  u64 parent, u64 orig_ref_root, u64 ref_root,
@@ -169,18 +185,24 @@
  */
 static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node)
 {
-	return node->parent == (u64)-1;
+	return node->is_head;
 }
 
 /*
  * helper functions to cast a node into its container
  */
-static inline struct btrfs_delayed_ref *
-btrfs_delayed_node_to_ref(struct btrfs_delayed_ref_node *node)
+static inline struct btrfs_delayed_tree_ref *
+btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
 {
 	WARN_ON(btrfs_delayed_ref_is_head(node));
-	return container_of(node, struct btrfs_delayed_ref, node);
+	return container_of(node, struct btrfs_delayed_tree_ref, node);
+}
 
+static inline struct btrfs_delayed_data_ref *
+btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node)
+{
+	WARN_ON(btrfs_delayed_ref_is_head(node));
+	return container_of(node, struct btrfs_delayed_data_ref, node);
 }
 
 static inline struct btrfs_delayed_ref_head *
@@ -188,6 +210,5 @@
 {
 	WARN_ON(!btrfs_delayed_ref_is_head(node));
 	return container_of(node, struct btrfs_delayed_ref_head, node);
-
 }
 #endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4b0ea0b..0d50d49 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -26,8 +26,8 @@
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+#include <linux/crc32c.h>
 #include "compat.h"
-#include "crc32c.h"
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -36,7 +36,6 @@
 #include "print-tree.h"
 #include "async-thread.h"
 #include "locking.h"
-#include "ref-cache.h"
 #include "tree-log.h"
 #include "free-space-cache.h"
 
@@ -172,7 +171,7 @@
 
 u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
 {
-	return btrfs_crc32c(seed, data, len);
+	return crc32c(seed, data, len);
 }
 
 void btrfs_csum_final(u32 crc, char *result)
@@ -884,7 +883,6 @@
 {
 	root->node = NULL;
 	root->commit_root = NULL;
-	root->ref_tree = NULL;
 	root->sectorsize = sectorsize;
 	root->nodesize = nodesize;
 	root->leafsize = leafsize;
@@ -899,12 +897,14 @@
 	root->last_inode_alloc = 0;
 	root->name = NULL;
 	root->in_sysfs = 0;
+	root->inode_tree.rb_node = NULL;
 
 	INIT_LIST_HEAD(&root->dirty_list);
 	INIT_LIST_HEAD(&root->orphan_list);
-	INIT_LIST_HEAD(&root->dead_list);
+	INIT_LIST_HEAD(&root->root_list);
 	spin_lock_init(&root->node_lock);
 	spin_lock_init(&root->list_lock);
+	spin_lock_init(&root->inode_lock);
 	mutex_init(&root->objectid_mutex);
 	mutex_init(&root->log_mutex);
 	init_waitqueue_head(&root->log_writer_wait);
@@ -918,9 +918,6 @@
 	extent_io_tree_init(&root->dirty_log_pages,
 			     fs_info->btree_inode->i_mapping, GFP_NOFS);
 
-	btrfs_leaf_ref_tree_init(&root->ref_tree_struct);
-	root->ref_tree = &root->ref_tree_struct;
-
 	memset(&root->root_key, 0, sizeof(root->root_key));
 	memset(&root->root_item, 0, sizeof(root->root_item));
 	memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
@@ -959,6 +956,7 @@
 	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
 	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
 				     blocksize, generation);
+	root->commit_root = btrfs_root_node(root);
 	BUG_ON(!root->node);
 	return 0;
 }
@@ -1025,20 +1023,19 @@
 	 */
 	root->ref_cows = 0;
 
-	leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
-				      0, BTRFS_TREE_LOG_OBJECTID,
-				      trans->transid, 0, 0, 0);
+	leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
+				      BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0);
 	if (IS_ERR(leaf)) {
 		kfree(root);
 		return ERR_CAST(leaf);
 	}
 
+	memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
+	btrfs_set_header_bytenr(leaf, leaf->start);
+	btrfs_set_header_generation(leaf, trans->transid);
+	btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
+	btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
 	root->node = leaf;
-	btrfs_set_header_nritems(root->node, 0);
-	btrfs_set_header_level(root->node, 0);
-	btrfs_set_header_bytenr(root->node, root->node->start);
-	btrfs_set_header_generation(root->node, trans->transid);
-	btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID);
 
 	write_extent_buffer(root->node, root->fs_info->fsid,
 			    (unsigned long)btrfs_header_fsid(root->node),
@@ -1081,8 +1078,7 @@
 	inode_item->nbytes = cpu_to_le64(root->leafsize);
 	inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
 
-	btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start);
-	btrfs_set_root_generation(&log_root->root_item, trans->transid);
+	btrfs_set_root_node(&log_root->root_item, log_root->node);
 
 	WARN_ON(root->log_root);
 	root->log_root = log_root;
@@ -1144,6 +1140,7 @@
 	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
 	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
 				     blocksize, generation);
+	root->commit_root = btrfs_root_node(root);
 	BUG_ON(!root->node);
 insert:
 	if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
@@ -1210,7 +1207,7 @@
 	}
 	if (!(fs_info->sb->s_flags & MS_RDONLY)) {
 		ret = btrfs_find_dead_roots(fs_info->tree_root,
-					    root->root_key.objectid, root);
+					    root->root_key.objectid);
 		BUG_ON(ret);
 		btrfs_orphan_cleanup(root);
 	}
@@ -1569,8 +1566,6 @@
 	atomic_set(&fs_info->async_delalloc_pages, 0);
 	atomic_set(&fs_info->async_submit_draining, 0);
 	atomic_set(&fs_info->nr_async_bios, 0);
-	atomic_set(&fs_info->throttles, 0);
-	atomic_set(&fs_info->throttle_gen, 0);
 	fs_info->sb = sb;
 	fs_info->max_extent = (u64)-1;
 	fs_info->max_inline = 8192 * 1024;
@@ -1598,6 +1593,7 @@
 	fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
 	fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
 
+	RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
 	extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
 			     fs_info->btree_inode->i_mapping,
 			     GFP_NOFS);
@@ -1613,10 +1609,6 @@
 			     fs_info->btree_inode->i_mapping, GFP_NOFS);
 	fs_info->do_barriers = 1;
 
-	INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
-	btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree);
-	btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree);
-
 	BTRFS_I(fs_info->btree_inode)->root = tree_root;
 	memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
 	       sizeof(struct btrfs_key));
@@ -1674,6 +1666,12 @@
 		goto fail_iput;
 	}
 
+	features = btrfs_super_incompat_flags(disk_super);
+	if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
+		features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
+		btrfs_set_super_incompat_flags(disk_super, features);
+	}
+
 	features = btrfs_super_compat_ro_flags(disk_super) &
 		~BTRFS_FEATURE_COMPAT_RO_SUPP;
 	if (!(sb->s_flags & MS_RDONLY) && features) {
@@ -1771,7 +1769,7 @@
 	if (ret) {
 		printk(KERN_WARNING "btrfs: failed to read the system "
 		       "array on %s\n", sb->s_id);
-		goto fail_sys_array;
+		goto fail_sb_buffer;
 	}
 
 	blocksize = btrfs_level_size(tree_root,
@@ -1785,6 +1783,8 @@
 					   btrfs_super_chunk_root(disk_super),
 					   blocksize, generation);
 	BUG_ON(!chunk_root->node);
+	btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
+	chunk_root->commit_root = btrfs_root_node(chunk_root);
 
 	read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
 	   (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
@@ -1810,7 +1810,8 @@
 					  blocksize, generation);
 	if (!tree_root->node)
 		goto fail_chunk_root;
-
+	btrfs_set_root_node(&tree_root->root_item, tree_root->node);
+	tree_root->commit_root = btrfs_root_node(tree_root);
 
 	ret = find_and_setup_root(tree_root, fs_info,
 				  BTRFS_EXTENT_TREE_OBJECTID, extent_root);
@@ -1820,14 +1821,14 @@
 
 	ret = find_and_setup_root(tree_root, fs_info,
 				  BTRFS_DEV_TREE_OBJECTID, dev_root);
-	dev_root->track_dirty = 1;
 	if (ret)
 		goto fail_extent_root;
+	dev_root->track_dirty = 1;
 
 	ret = find_and_setup_root(tree_root, fs_info,
 				  BTRFS_CSUM_TREE_OBJECTID, csum_root);
 	if (ret)
-		goto fail_extent_root;
+		goto fail_dev_root;
 
 	csum_root->track_dirty = 1;
 
@@ -1849,6 +1850,14 @@
 	if (IS_ERR(fs_info->transaction_kthread))
 		goto fail_cleaner;
 
+	if (!btrfs_test_opt(tree_root, SSD) &&
+	    !btrfs_test_opt(tree_root, NOSSD) &&
+	    !fs_info->fs_devices->rotating) {
+		printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD "
+		       "mode\n");
+		btrfs_set_opt(fs_info->mount_opt, SSD);
+	}
+
 	if (btrfs_super_log_root(disk_super) != 0) {
 		u64 bytenr = btrfs_super_log_root(disk_super);
 
@@ -1881,7 +1890,7 @@
 	}
 
 	if (!(sb->s_flags & MS_RDONLY)) {
-		ret = btrfs_cleanup_reloc_trees(tree_root);
+		ret = btrfs_recover_relocation(tree_root);
 		BUG_ON(ret);
 	}
 
@@ -1892,6 +1901,7 @@
 	fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
 	if (!fs_info->fs_root)
 		goto fail_trans_kthread;
+
 	return tree_root;
 
 fail_trans_kthread:
@@ -1908,14 +1918,19 @@
 
 fail_csum_root:
 	free_extent_buffer(csum_root->node);
+	free_extent_buffer(csum_root->commit_root);
+fail_dev_root:
+	free_extent_buffer(dev_root->node);
+	free_extent_buffer(dev_root->commit_root);
 fail_extent_root:
 	free_extent_buffer(extent_root->node);
+	free_extent_buffer(extent_root->commit_root);
 fail_tree_root:
 	free_extent_buffer(tree_root->node);
+	free_extent_buffer(tree_root->commit_root);
 fail_chunk_root:
 	free_extent_buffer(chunk_root->node);
-fail_sys_array:
-	free_extent_buffer(dev_root->node);
+	free_extent_buffer(chunk_root->commit_root);
 fail_sb_buffer:
 	btrfs_stop_workers(&fs_info->fixup_workers);
 	btrfs_stop_workers(&fs_info->delalloc_workers);
@@ -2005,6 +2020,17 @@
 	return latest;
 }
 
+/*
+ * this should be called twice, once with wait == 0 and
+ * once with wait == 1.  When wait == 0 is done, all the buffer heads
+ * we write are pinned.
+ *
+ * They are released when wait == 1 is done.
+ * max_mirrors must be the same for both runs, and it indicates how
+ * many supers on this one device should be written.
+ *
+ * max_mirrors == 0 means to write them all.
+ */
 static int write_dev_supers(struct btrfs_device *device,
 			    struct btrfs_super_block *sb,
 			    int do_barriers, int wait, int max_mirrors)
@@ -2040,12 +2066,16 @@
 			bh = __find_get_block(device->bdev, bytenr / 4096,
 					      BTRFS_SUPER_INFO_SIZE);
 			BUG_ON(!bh);
-			brelse(bh);
 			wait_on_buffer(bh);
-			if (buffer_uptodate(bh)) {
-				brelse(bh);
-				continue;
-			}
+			if (!buffer_uptodate(bh))
+				errors++;
+
+			/* drop our reference */
+			brelse(bh);
+
+			/* drop the reference from the wait == 0 run */
+			brelse(bh);
+			continue;
 		} else {
 			btrfs_set_super_bytenr(sb, bytenr);
 
@@ -2056,12 +2086,18 @@
 					      BTRFS_CSUM_SIZE);
 			btrfs_csum_final(crc, sb->csum);
 
+			/*
+			 * one reference for us, and we leave it for the
+			 * caller
+			 */
 			bh = __getblk(device->bdev, bytenr / 4096,
 				      BTRFS_SUPER_INFO_SIZE);
 			memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
 
-			set_buffer_uptodate(bh);
+			/* one reference for submit_bh */
 			get_bh(bh);
+
+			set_buffer_uptodate(bh);
 			lock_buffer(bh);
 			bh->b_end_io = btrfs_end_buffer_write_sync;
 		}
@@ -2073,6 +2109,7 @@
 				       device->name);
 				set_buffer_uptodate(bh);
 				device->barriers = 0;
+				/* one reference for submit_bh */
 				get_bh(bh);
 				lock_buffer(bh);
 				ret = submit_bh(WRITE_SYNC, bh);
@@ -2081,22 +2118,15 @@
 			ret = submit_bh(WRITE_SYNC, bh);
 		}
 
-		if (!ret && wait) {
-			wait_on_buffer(bh);
-			if (!buffer_uptodate(bh))
-				errors++;
-		} else if (ret) {
+		if (ret)
 			errors++;
-		}
-		if (wait)
-			brelse(bh);
 	}
 	return errors < i ? 0 : -1;
 }
 
 int write_all_supers(struct btrfs_root *root, int max_mirrors)
 {
-	struct list_head *head = &root->fs_info->fs_devices->devices;
+	struct list_head *head;
 	struct btrfs_device *dev;
 	struct btrfs_super_block *sb;
 	struct btrfs_dev_item *dev_item;
@@ -2111,6 +2141,9 @@
 
 	sb = &root->fs_info->super_for_commit;
 	dev_item = &sb->dev_item;
+
+	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
+	head = &root->fs_info->fs_devices->devices;
 	list_for_each_entry(dev, head, dev_list) {
 		if (!dev->bdev) {
 			total_errors++;
@@ -2154,6 +2187,7 @@
 		if (ret)
 			total_errors++;
 	}
+	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 	if (total_errors > max_errors) {
 		printk(KERN_ERR "btrfs: %d errors while writing supers\n",
 		       total_errors);
@@ -2173,6 +2207,7 @@
 
 int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
 {
+	WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
 	radix_tree_delete(&fs_info->fs_roots_radix,
 			  (unsigned long)root->root_key.objectid);
 	if (root->anon_super.s_dev) {
@@ -2219,10 +2254,12 @@
 					     ARRAY_SIZE(gang));
 		if (!ret)
 			break;
+
+		root_objectid = gang[ret - 1]->root_key.objectid + 1;
 		for (i = 0; i < ret; i++) {
 			root_objectid = gang[i]->root_key.objectid;
 			ret = btrfs_find_dead_roots(fs_info->tree_root,
-						    root_objectid, gang[i]);
+						    root_objectid);
 			BUG_ON(ret);
 			btrfs_orphan_cleanup(gang[i]);
 		}
@@ -2278,20 +2315,16 @@
 		       (unsigned long long)fs_info->total_ref_cache_size);
 	}
 
-	if (fs_info->extent_root->node)
-		free_extent_buffer(fs_info->extent_root->node);
-
-	if (fs_info->tree_root->node)
-		free_extent_buffer(fs_info->tree_root->node);
-
-	if (root->fs_info->chunk_root->node)
-		free_extent_buffer(root->fs_info->chunk_root->node);
-
-	if (root->fs_info->dev_root->node)
-		free_extent_buffer(root->fs_info->dev_root->node);
-
-	if (root->fs_info->csum_root->node)
-		free_extent_buffer(root->fs_info->csum_root->node);
+	free_extent_buffer(fs_info->extent_root->node);
+	free_extent_buffer(fs_info->extent_root->commit_root);
+	free_extent_buffer(fs_info->tree_root->node);
+	free_extent_buffer(fs_info->tree_root->commit_root);
+	free_extent_buffer(root->fs_info->chunk_root->node);
+	free_extent_buffer(root->fs_info->chunk_root->commit_root);
+	free_extent_buffer(root->fs_info->dev_root->node);
+	free_extent_buffer(root->fs_info->dev_root->commit_root);
+	free_extent_buffer(root->fs_info->csum_root->node);
+	free_extent_buffer(root->fs_info->csum_root->commit_root);
 
 	btrfs_free_block_groups(root->fs_info);
 
@@ -2373,17 +2406,14 @@
 	 * looks as though older kernels can get into trouble with
 	 * this code, they end up stuck in balance_dirty_pages forever
 	 */
-	struct extent_io_tree *tree;
 	u64 num_dirty;
-	u64 start = 0;
 	unsigned long thresh = 32 * 1024 * 1024;
-	tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
 
 	if (current->flags & PF_MEMALLOC)
 		return;
 
-	num_dirty = count_range_bits(tree, &start, (u64)-1,
-				     thresh, EXTENT_DIRTY);
+	num_dirty = root->fs_info->dirty_metadata_bytes;
+
 	if (num_dirty > thresh) {
 		balance_dirty_pages_ratelimited_nr(
 				   root->fs_info->btree_inode->i_mapping, 1);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 85315d2..9596b40 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -78,7 +78,7 @@
 	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
 	key.offset = 0;
 
-	inode = btrfs_iget(sb, &key, root, NULL);
+	inode = btrfs_iget(sb, &key, root);
 	if (IS_ERR(inode))
 		return (void *)inode;
 
@@ -192,7 +192,7 @@
 	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
 	key.offset = 0;
 
-	return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
+	return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
 }
 
 const struct export_operations btrfs_export_ops = {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3e2c7c7..edc7d20 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -23,50 +23,39 @@
 #include <linux/rcupdate.h>
 #include "compat.h"
 #include "hash.h"
-#include "crc32c.h"
 #include "ctree.h"
 #include "disk-io.h"
 #include "print-tree.h"
 #include "transaction.h"
 #include "volumes.h"
 #include "locking.h"
-#include "ref-cache.h"
 #include "free-space-cache.h"
 
-#define PENDING_EXTENT_INSERT 0
-#define PENDING_EXTENT_DELETE 1
-#define PENDING_BACKREF_UPDATE 2
-
-struct pending_extent_op {
-	int type;
-	u64 bytenr;
-	u64 num_bytes;
-	u64 parent;
-	u64 orig_parent;
-	u64 generation;
-	u64 orig_generation;
-	int level;
-	struct list_head list;
-	int del;
-};
-
-static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
-					 struct btrfs_root *root, u64 parent,
-					 u64 root_objectid, u64 ref_generation,
-					 u64 owner, struct btrfs_key *ins,
-					 int ref_mod);
 static int update_reserved_extents(struct btrfs_root *root,
 				   u64 bytenr, u64 num, int reserve);
 static int update_block_group(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root,
 			      u64 bytenr, u64 num_bytes, int alloc,
 			      int mark_free);
-static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans,
-					struct btrfs_root *root,
-					u64 bytenr, u64 num_bytes, u64 parent,
-					u64 root_objectid, u64 ref_generation,
-					u64 owner_objectid, int pin,
-					int ref_to_drop);
+static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				u64 bytenr, u64 num_bytes, u64 parent,
+				u64 root_objectid, u64 owner_objectid,
+				u64 owner_offset, int refs_to_drop,
+				struct btrfs_delayed_extent_op *extra_op);
+static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
+				    struct extent_buffer *leaf,
+				    struct btrfs_extent_item *ei);
+static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
+				      struct btrfs_root *root,
+				      u64 parent, u64 root_objectid,
+				      u64 flags, u64 owner, u64 offset,
+				      struct btrfs_key *ins, int ref_mod);
+static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
+				     struct btrfs_root *root,
+				     u64 parent, u64 root_objectid,
+				     u64 flags, struct btrfs_disk_key *key,
+				     int level, struct btrfs_key *ins);
 
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *extent_root, u64 alloc_bytes,
@@ -453,199 +442,969 @@
  *    maintenance.  This is actually the same as #2, but with a slightly
  *    different use case.
  *
+ * There are two kinds of back refs. The implicit back refs is optimized
+ * for pointers in non-shared tree blocks. For a given pointer in a block,
+ * back refs of this kind provide information about the block's owner tree
+ * and the pointer's key. These information allow us to find the block by
+ * b-tree searching. The full back refs is for pointers in tree blocks not
+ * referenced by their owner trees. The location of tree block is recorded
+ * in the back refs. Actually the full back refs is generic, and can be
+ * used in all cases the implicit back refs is used. The major shortcoming
+ * of the full back refs is its overhead. Every time a tree block gets
+ * COWed, we have to update back refs entry for all pointers in it.
+ *
+ * For a newly allocated tree block, we use implicit back refs for
+ * pointers in it. This means most tree related operations only involve
+ * implicit back refs. For a tree block created in old transaction, the
+ * only way to drop a reference to it is COW it. So we can detect the
+ * event that tree block loses its owner tree's reference and do the
+ * back refs conversion.
+ *
+ * When a tree block is COW'd through a tree, there are four cases:
+ *
+ * The reference count of the block is one and the tree is the block's
+ * owner tree. Nothing to do in this case.
+ *
+ * The reference count of the block is one and the tree is not the
+ * block's owner tree. In this case, full back refs is used for pointers
+ * in the block. Remove these full back refs, add implicit back refs for
+ * every pointers in the new block.
+ *
+ * The reference count of the block is greater than one and the tree is
+ * the block's owner tree. In this case, implicit back refs is used for
+ * pointers in the block. Add full back refs for every pointers in the
+ * block, increase lower level extents' reference counts. The original
+ * implicit back refs are entailed to the new block.
+ *
+ * The reference count of the block is greater than one and the tree is
+ * not the block's owner tree. Add implicit back refs for every pointer in
+ * the new block, increase lower level extents' reference count.
+ *
+ * Back Reference Key composing:
+ *
+ * The key objectid corresponds to the first byte in the extent,
+ * The key type is used to differentiate between types of back refs.
+ * There are different meanings of the key offset for different types
+ * of back refs.
+ *
  * File extents can be referenced by:
  *
  * - multiple snapshots, subvolumes, or different generations in one subvol
  * - different files inside a single subvolume
  * - different offsets inside a file (bookend extents in file.c)
  *
- * The extent ref structure has fields for:
+ * The extent ref structure for the implicit back refs has fields for:
  *
  * - Objectid of the subvolume root
- * - Generation number of the tree holding the reference
  * - objectid of the file holding the reference
- * - number of references holding by parent node (alway 1 for tree blocks)
+ * - original offset in the file
+ * - how many bookend extents
  *
- * Btree leaf may hold multiple references to a file extent. In most cases,
- * these references are from same file and the corresponding offsets inside
- * the file are close together.
+ * The key offset for the implicit back refs is hash of the first
+ * three fields.
  *
- * When a file extent is allocated the fields are filled in:
- *     (root_key.objectid, trans->transid, inode objectid, 1)
+ * The extent ref structure for the full back refs has field for:
  *
- * When a leaf is cow'd new references are added for every file extent found
- * in the leaf.  It looks similar to the create case, but trans->transid will
- * be different when the block is cow'd.
+ * - number of pointers in the tree leaf
  *
- *     (root_key.objectid, trans->transid, inode objectid,
- *      number of references in the leaf)
+ * The key offset for the implicit back refs is the first byte of
+ * the tree leaf
  *
- * When a file extent is removed either during snapshot deletion or
- * file truncation, we find the corresponding back reference and check
- * the following fields:
+ * When a file extent is allocated, The implicit back refs is used.
+ * the fields are filled in:
  *
- *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
- *      inode objectid)
+ *     (root_key.objectid, inode objectid, offset in file, 1)
+ *
+ * When a file extent is removed file truncation, we find the
+ * corresponding implicit back refs and check the following fields:
+ *
+ *     (btrfs_header_owner(leaf), inode objectid, offset in file)
  *
  * Btree extents can be referenced by:
  *
  * - Different subvolumes
- * - Different generations of the same subvolume
  *
- * When a tree block is created, back references are inserted:
+ * Both the implicit back refs and the full back refs for tree blocks
+ * only consist of key. The key offset for the implicit back refs is
+ * objectid of block's owner tree. The key offset for the full back refs
+ * is the first byte of parent block.
  *
- * (root->root_key.objectid, trans->transid, level, 1)
- *
- * When a tree block is cow'd, new back references are added for all the
- * blocks it points to. If the tree block isn't in reference counted root,
- * the old back references are removed. These new back references are of
- * the form (trans->transid will have increased since creation):
- *
- * (root->root_key.objectid, trans->transid, level, 1)
- *
- * When a backref is in deleting, the following fields are checked:
- *
- * if backref was for a tree root:
- *     (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
- * else
- *     (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
- *
- * Back Reference Key composing:
- *
- * The key objectid corresponds to the first byte in the extent, the key
- * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
- * byte of parent extent. If a extent is tree root, the key offset is set
- * to the key objectid.
+ * When implicit back refs is used, information about the lowest key and
+ * level of the tree block are required. These information are stored in
+ * tree block info structure.
  */
 
-static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans,
-					  struct btrfs_root *root,
-					  struct btrfs_path *path,
-					  u64 bytenr, u64 parent,
-					  u64 ref_root, u64 ref_generation,
-					  u64 owner_objectid, int del)
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
+				  struct btrfs_root *root,
+				  struct btrfs_path *path,
+				  u64 owner, u32 extra_size)
 {
-	struct btrfs_key key;
-	struct btrfs_extent_ref *ref;
+	struct btrfs_extent_item *item;
+	struct btrfs_extent_item_v0 *ei0;
+	struct btrfs_extent_ref_v0 *ref0;
+	struct btrfs_tree_block_info *bi;
 	struct extent_buffer *leaf;
-	u64 ref_objectid;
+	struct btrfs_key key;
+	struct btrfs_key found_key;
+	u32 new_size = sizeof(*item);
+	u64 refs;
 	int ret;
 
-	key.objectid = bytenr;
-	key.type = BTRFS_EXTENT_REF_KEY;
-	key.offset = parent;
+	leaf = path->nodes[0];
+	BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
 
-	ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
+	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+	ei0 = btrfs_item_ptr(leaf, path->slots[0],
+			     struct btrfs_extent_item_v0);
+	refs = btrfs_extent_refs_v0(leaf, ei0);
+
+	if (owner == (u64)-1) {
+		while (1) {
+			if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+				ret = btrfs_next_leaf(root, path);
+				if (ret < 0)
+					return ret;
+				BUG_ON(ret > 0);
+				leaf = path->nodes[0];
+			}
+			btrfs_item_key_to_cpu(leaf, &found_key,
+					      path->slots[0]);
+			BUG_ON(key.objectid != found_key.objectid);
+			if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
+				path->slots[0]++;
+				continue;
+			}
+			ref0 = btrfs_item_ptr(leaf, path->slots[0],
+					      struct btrfs_extent_ref_v0);
+			owner = btrfs_ref_objectid_v0(leaf, ref0);
+			break;
+		}
+	}
+	btrfs_release_path(root, path);
+
+	if (owner < BTRFS_FIRST_FREE_OBJECTID)
+		new_size += sizeof(*bi);
+
+	new_size -= sizeof(*ei0);
+	ret = btrfs_search_slot(trans, root, &key, path,
+				new_size + extra_size, 1);
 	if (ret < 0)
-		goto out;
-	if (ret > 0) {
-		ret = -ENOENT;
-		goto out;
+		return ret;
+	BUG_ON(ret);
+
+	ret = btrfs_extend_item(trans, root, path, new_size);
+	BUG_ON(ret);
+
+	leaf = path->nodes[0];
+	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+	btrfs_set_extent_refs(leaf, item, refs);
+	/* FIXME: get real generation */
+	btrfs_set_extent_generation(leaf, item, 0);
+	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+		btrfs_set_extent_flags(leaf, item,
+				       BTRFS_EXTENT_FLAG_TREE_BLOCK |
+				       BTRFS_BLOCK_FLAG_FULL_BACKREF);
+		bi = (struct btrfs_tree_block_info *)(item + 1);
+		/* FIXME: get first key of the block */
+		memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
+		btrfs_set_tree_block_level(leaf, bi, (int)owner);
+	} else {
+		btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
+	}
+	btrfs_mark_buffer_dirty(leaf);
+	return 0;
+}
+#endif
+
+static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
+{
+	u32 high_crc = ~(u32)0;
+	u32 low_crc = ~(u32)0;
+	__le64 lenum;
+
+	lenum = cpu_to_le64(root_objectid);
+	high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
+	lenum = cpu_to_le64(owner);
+	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
+	lenum = cpu_to_le64(offset);
+	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
+
+	return ((u64)high_crc << 31) ^ (u64)low_crc;
+}
+
+static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
+				     struct btrfs_extent_data_ref *ref)
+{
+	return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
+				    btrfs_extent_data_ref_objectid(leaf, ref),
+				    btrfs_extent_data_ref_offset(leaf, ref));
+}
+
+static int match_extent_data_ref(struct extent_buffer *leaf,
+				 struct btrfs_extent_data_ref *ref,
+				 u64 root_objectid, u64 owner, u64 offset)
+{
+	if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
+	    btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
+	    btrfs_extent_data_ref_offset(leaf, ref) != offset)
+		return 0;
+	return 1;
+}
+
+static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
+					   struct btrfs_root *root,
+					   struct btrfs_path *path,
+					   u64 bytenr, u64 parent,
+					   u64 root_objectid,
+					   u64 owner, u64 offset)
+{
+	struct btrfs_key key;
+	struct btrfs_extent_data_ref *ref;
+	struct extent_buffer *leaf;
+	u32 nritems;
+	int ret;
+	int recow;
+	int err = -ENOENT;
+
+	key.objectid = bytenr;
+	if (parent) {
+		key.type = BTRFS_SHARED_DATA_REF_KEY;
+		key.offset = parent;
+	} else {
+		key.type = BTRFS_EXTENT_DATA_REF_KEY;
+		key.offset = hash_extent_data_ref(root_objectid,
+						  owner, offset);
+	}
+again:
+	recow = 0;
+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+	if (ret < 0) {
+		err = ret;
+		goto fail;
+	}
+
+	if (parent) {
+		if (!ret)
+			return 0;
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+		key.type = BTRFS_EXTENT_REF_V0_KEY;
+		btrfs_release_path(root, path);
+		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+		if (ret < 0) {
+			err = ret;
+			goto fail;
+		}
+		if (!ret)
+			return 0;
+#endif
+		goto fail;
 	}
 
 	leaf = path->nodes[0];
-	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
-	ref_objectid = btrfs_ref_objectid(leaf, ref);
-	if (btrfs_ref_root(leaf, ref) != ref_root ||
-	    btrfs_ref_generation(leaf, ref) != ref_generation ||
-	    (ref_objectid != owner_objectid &&
-	     ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
-		ret = -EIO;
-		WARN_ON(1);
-		goto out;
+	nritems = btrfs_header_nritems(leaf);
+	while (1) {
+		if (path->slots[0] >= nritems) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0)
+				err = ret;
+			if (ret)
+				goto fail;
+
+			leaf = path->nodes[0];
+			nritems = btrfs_header_nritems(leaf);
+			recow = 1;
+		}
+
+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+		if (key.objectid != bytenr ||
+		    key.type != BTRFS_EXTENT_DATA_REF_KEY)
+			goto fail;
+
+		ref = btrfs_item_ptr(leaf, path->slots[0],
+				     struct btrfs_extent_data_ref);
+
+		if (match_extent_data_ref(leaf, ref, root_objectid,
+					  owner, offset)) {
+			if (recow) {
+				btrfs_release_path(root, path);
+				goto again;
+			}
+			err = 0;
+			break;
+		}
+		path->slots[0]++;
 	}
-	ret = 0;
-out:
-	return ret;
+fail:
+	return err;
 }
 
-static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
-					  struct btrfs_root *root,
-					  struct btrfs_path *path,
-					  u64 bytenr, u64 parent,
-					  u64 ref_root, u64 ref_generation,
-					  u64 owner_objectid,
-					  int refs_to_add)
+static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
+					   struct btrfs_root *root,
+					   struct btrfs_path *path,
+					   u64 bytenr, u64 parent,
+					   u64 root_objectid, u64 owner,
+					   u64 offset, int refs_to_add)
 {
 	struct btrfs_key key;
 	struct extent_buffer *leaf;
-	struct btrfs_extent_ref *ref;
+	u32 size;
 	u32 num_refs;
 	int ret;
 
 	key.objectid = bytenr;
-	key.type = BTRFS_EXTENT_REF_KEY;
-	key.offset = parent;
-
-	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
-	if (ret == 0) {
-		leaf = path->nodes[0];
-		ref = btrfs_item_ptr(leaf, path->slots[0],
-				     struct btrfs_extent_ref);
-		btrfs_set_ref_root(leaf, ref, ref_root);
-		btrfs_set_ref_generation(leaf, ref, ref_generation);
-		btrfs_set_ref_objectid(leaf, ref, owner_objectid);
-		btrfs_set_ref_num_refs(leaf, ref, refs_to_add);
-	} else if (ret == -EEXIST) {
-		u64 existing_owner;
-
-		BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
-		leaf = path->nodes[0];
-		ref = btrfs_item_ptr(leaf, path->slots[0],
-				     struct btrfs_extent_ref);
-		if (btrfs_ref_root(leaf, ref) != ref_root ||
-		    btrfs_ref_generation(leaf, ref) != ref_generation) {
-			ret = -EIO;
-			WARN_ON(1);
-			goto out;
-		}
-
-		num_refs = btrfs_ref_num_refs(leaf, ref);
-		BUG_ON(num_refs == 0);
-		btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add);
-
-		existing_owner = btrfs_ref_objectid(leaf, ref);
-		if (existing_owner != owner_objectid &&
-		    existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
-			btrfs_set_ref_objectid(leaf, ref,
-					BTRFS_MULTIPLE_OBJECTIDS);
-		}
-		ret = 0;
+	if (parent) {
+		key.type = BTRFS_SHARED_DATA_REF_KEY;
+		key.offset = parent;
+		size = sizeof(struct btrfs_shared_data_ref);
 	} else {
-		goto out;
+		key.type = BTRFS_EXTENT_DATA_REF_KEY;
+		key.offset = hash_extent_data_ref(root_objectid,
+						  owner, offset);
+		size = sizeof(struct btrfs_extent_data_ref);
 	}
-	btrfs_unlock_up_safe(path, 1);
-	btrfs_mark_buffer_dirty(path->nodes[0]);
-out:
+
+	ret = btrfs_insert_empty_item(trans, root, path, &key, size);
+	if (ret && ret != -EEXIST)
+		goto fail;
+
+	leaf = path->nodes[0];
+	if (parent) {
+		struct btrfs_shared_data_ref *ref;
+		ref = btrfs_item_ptr(leaf, path->slots[0],
+				     struct btrfs_shared_data_ref);
+		if (ret == 0) {
+			btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
+		} else {
+			num_refs = btrfs_shared_data_ref_count(leaf, ref);
+			num_refs += refs_to_add;
+			btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
+		}
+	} else {
+		struct btrfs_extent_data_ref *ref;
+		while (ret == -EEXIST) {
+			ref = btrfs_item_ptr(leaf, path->slots[0],
+					     struct btrfs_extent_data_ref);
+			if (match_extent_data_ref(leaf, ref, root_objectid,
+						  owner, offset))
+				break;
+			btrfs_release_path(root, path);
+			key.offset++;
+			ret = btrfs_insert_empty_item(trans, root, path, &key,
+						      size);
+			if (ret && ret != -EEXIST)
+				goto fail;
+
+			leaf = path->nodes[0];
+		}
+		ref = btrfs_item_ptr(leaf, path->slots[0],
+				     struct btrfs_extent_data_ref);
+		if (ret == 0) {
+			btrfs_set_extent_data_ref_root(leaf, ref,
+						       root_objectid);
+			btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
+			btrfs_set_extent_data_ref_offset(leaf, ref, offset);
+			btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
+		} else {
+			num_refs = btrfs_extent_data_ref_count(leaf, ref);
+			num_refs += refs_to_add;
+			btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
+		}
+	}
+	btrfs_mark_buffer_dirty(leaf);
+	ret = 0;
+fail:
 	btrfs_release_path(root, path);
 	return ret;
 }
 
-static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
-					  struct btrfs_root *root,
-					  struct btrfs_path *path,
-					  int refs_to_drop)
+static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
+					   struct btrfs_root *root,
+					   struct btrfs_path *path,
+					   int refs_to_drop)
 {
+	struct btrfs_key key;
+	struct btrfs_extent_data_ref *ref1 = NULL;
+	struct btrfs_shared_data_ref *ref2 = NULL;
 	struct extent_buffer *leaf;
-	struct btrfs_extent_ref *ref;
-	u32 num_refs;
+	u32 num_refs = 0;
 	int ret = 0;
 
 	leaf = path->nodes[0];
-	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
-	num_refs = btrfs_ref_num_refs(leaf, ref);
+	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+	if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
+		ref1 = btrfs_item_ptr(leaf, path->slots[0],
+				      struct btrfs_extent_data_ref);
+		num_refs = btrfs_extent_data_ref_count(leaf, ref1);
+	} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
+		ref2 = btrfs_item_ptr(leaf, path->slots[0],
+				      struct btrfs_shared_data_ref);
+		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
+		struct btrfs_extent_ref_v0 *ref0;
+		ref0 = btrfs_item_ptr(leaf, path->slots[0],
+				      struct btrfs_extent_ref_v0);
+		num_refs = btrfs_ref_count_v0(leaf, ref0);
+#endif
+	} else {
+		BUG();
+	}
+
 	BUG_ON(num_refs < refs_to_drop);
 	num_refs -= refs_to_drop;
+
 	if (num_refs == 0) {
 		ret = btrfs_del_item(trans, root, path);
 	} else {
-		btrfs_set_ref_num_refs(leaf, ref, num_refs);
+		if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
+			btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
+		else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
+			btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+		else {
+			struct btrfs_extent_ref_v0 *ref0;
+			ref0 = btrfs_item_ptr(leaf, path->slots[0],
+					struct btrfs_extent_ref_v0);
+			btrfs_set_ref_count_v0(leaf, ref0, num_refs);
+		}
+#endif
 		btrfs_mark_buffer_dirty(leaf);
 	}
+	return ret;
+}
+
+static noinline u32 extent_data_ref_count(struct btrfs_root *root,
+					  struct btrfs_path *path,
+					  struct btrfs_extent_inline_ref *iref)
+{
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	struct btrfs_extent_data_ref *ref1;
+	struct btrfs_shared_data_ref *ref2;
+	u32 num_refs = 0;
+
+	leaf = path->nodes[0];
+	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+	if (iref) {
+		if (btrfs_extent_inline_ref_type(leaf, iref) ==
+		    BTRFS_EXTENT_DATA_REF_KEY) {
+			ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
+			num_refs = btrfs_extent_data_ref_count(leaf, ref1);
+		} else {
+			ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
+			num_refs = btrfs_shared_data_ref_count(leaf, ref2);
+		}
+	} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
+		ref1 = btrfs_item_ptr(leaf, path->slots[0],
+				      struct btrfs_extent_data_ref);
+		num_refs = btrfs_extent_data_ref_count(leaf, ref1);
+	} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
+		ref2 = btrfs_item_ptr(leaf, path->slots[0],
+				      struct btrfs_shared_data_ref);
+		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
+		struct btrfs_extent_ref_v0 *ref0;
+		ref0 = btrfs_item_ptr(leaf, path->slots[0],
+				      struct btrfs_extent_ref_v0);
+		num_refs = btrfs_ref_count_v0(leaf, ref0);
+#endif
+	} else {
+		WARN_ON(1);
+	}
+	return num_refs;
+}
+
+static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
+					  struct btrfs_root *root,
+					  struct btrfs_path *path,
+					  u64 bytenr, u64 parent,
+					  u64 root_objectid)
+{
+	struct btrfs_key key;
+	int ret;
+
+	key.objectid = bytenr;
+	if (parent) {
+		key.type = BTRFS_SHARED_BLOCK_REF_KEY;
+		key.offset = parent;
+	} else {
+		key.type = BTRFS_TREE_BLOCK_REF_KEY;
+		key.offset = root_objectid;
+	}
+
+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+	if (ret > 0)
+		ret = -ENOENT;
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	if (ret == -ENOENT && parent) {
+		btrfs_release_path(root, path);
+		key.type = BTRFS_EXTENT_REF_V0_KEY;
+		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+		if (ret > 0)
+			ret = -ENOENT;
+	}
+#endif
+	return ret;
+}
+
+static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
+					  struct btrfs_root *root,
+					  struct btrfs_path *path,
+					  u64 bytenr, u64 parent,
+					  u64 root_objectid)
+{
+	struct btrfs_key key;
+	int ret;
+
+	key.objectid = bytenr;
+	if (parent) {
+		key.type = BTRFS_SHARED_BLOCK_REF_KEY;
+		key.offset = parent;
+	} else {
+		key.type = BTRFS_TREE_BLOCK_REF_KEY;
+		key.offset = root_objectid;
+	}
+
+	ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 	btrfs_release_path(root, path);
 	return ret;
 }
 
+static inline int extent_ref_type(u64 parent, u64 owner)
+{
+	int type;
+	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+		if (parent > 0)
+			type = BTRFS_SHARED_BLOCK_REF_KEY;
+		else
+			type = BTRFS_TREE_BLOCK_REF_KEY;
+	} else {
+		if (parent > 0)
+			type = BTRFS_SHARED_DATA_REF_KEY;
+		else
+			type = BTRFS_EXTENT_DATA_REF_KEY;
+	}
+	return type;
+}
+
+static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
+
+{
+	int level;
+	BUG_ON(!path->keep_locks);
+	for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+		if (!path->nodes[level])
+			break;
+		btrfs_assert_tree_locked(path->nodes[level]);
+		if (path->slots[level] + 1 >=
+		    btrfs_header_nritems(path->nodes[level]))
+			continue;
+		if (level == 0)
+			btrfs_item_key_to_cpu(path->nodes[level], key,
+					      path->slots[level] + 1);
+		else
+			btrfs_node_key_to_cpu(path->nodes[level], key,
+					      path->slots[level] + 1);
+		return 0;
+	}
+	return 1;
+}
+
+/*
+ * look for inline back ref. if back ref is found, *ref_ret is set
+ * to the address of inline back ref, and 0 is returned.
+ *
+ * if back ref isn't found, *ref_ret is set to the address where it
+ * should be inserted, and -ENOENT is returned.
+ *
+ * if insert is true and there are too many inline back refs, the path
+ * points to the extent item, and -EAGAIN is returned.
+ *
+ * NOTE: inline back refs are ordered in the same way that back ref
+ *	 items in the tree are ordered.
+ */
+static noinline_for_stack
+int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 struct btrfs_extent_inline_ref **ref_ret,
+				 u64 bytenr, u64 num_bytes,
+				 u64 parent, u64 root_objectid,
+				 u64 owner, u64 offset, int insert)
+{
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	struct btrfs_extent_item *ei;
+	struct btrfs_extent_inline_ref *iref;
+	u64 flags;
+	u64 item_size;
+	unsigned long ptr;
+	unsigned long end;
+	int extra_size;
+	int type;
+	int want;
+	int ret;
+	int err = 0;
+
+	key.objectid = bytenr;
+	key.type = BTRFS_EXTENT_ITEM_KEY;
+	key.offset = num_bytes;
+
+	want = extent_ref_type(parent, owner);
+	if (insert) {
+		extra_size = btrfs_extent_inline_ref_size(want);
+		path->keep_locks = 1;
+	} else
+		extra_size = -1;
+	ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
+	if (ret < 0) {
+		err = ret;
+		goto out;
+	}
+	BUG_ON(ret);
+
+	leaf = path->nodes[0];
+	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	if (item_size < sizeof(*ei)) {
+		if (!insert) {
+			err = -ENOENT;
+			goto out;
+		}
+		ret = convert_extent_item_v0(trans, root, path, owner,
+					     extra_size);
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+		leaf = path->nodes[0];
+		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+	}
+#endif
+	BUG_ON(item_size < sizeof(*ei));
+
+	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+	flags = btrfs_extent_flags(leaf, ei);
+
+	ptr = (unsigned long)(ei + 1);
+	end = (unsigned long)ei + item_size;
+
+	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+		ptr += sizeof(struct btrfs_tree_block_info);
+		BUG_ON(ptr > end);
+	} else {
+		BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
+	}
+
+	err = -ENOENT;
+	while (1) {
+		if (ptr >= end) {
+			WARN_ON(ptr > end);
+			break;
+		}
+		iref = (struct btrfs_extent_inline_ref *)ptr;
+		type = btrfs_extent_inline_ref_type(leaf, iref);
+		if (want < type)
+			break;
+		if (want > type) {
+			ptr += btrfs_extent_inline_ref_size(type);
+			continue;
+		}
+
+		if (type == BTRFS_EXTENT_DATA_REF_KEY) {
+			struct btrfs_extent_data_ref *dref;
+			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+			if (match_extent_data_ref(leaf, dref, root_objectid,
+						  owner, offset)) {
+				err = 0;
+				break;
+			}
+			if (hash_extent_data_ref_item(leaf, dref) <
+			    hash_extent_data_ref(root_objectid, owner, offset))
+				break;
+		} else {
+			u64 ref_offset;
+			ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
+			if (parent > 0) {
+				if (parent == ref_offset) {
+					err = 0;
+					break;
+				}
+				if (ref_offset < parent)
+					break;
+			} else {
+				if (root_objectid == ref_offset) {
+					err = 0;
+					break;
+				}
+				if (ref_offset < root_objectid)
+					break;
+			}
+		}
+		ptr += btrfs_extent_inline_ref_size(type);
+	}
+	if (err == -ENOENT && insert) {
+		if (item_size + extra_size >=
+		    BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
+			err = -EAGAIN;
+			goto out;
+		}
+		/*
+		 * To add new inline back ref, we have to make sure
+		 * there is no corresponding back ref item.
+		 * For simplicity, we just do not add new inline back
+		 * ref if there is any kind of item for this block
+		 */
+		if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
+		    key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
+			err = -EAGAIN;
+			goto out;
+		}
+	}
+	*ref_ret = (struct btrfs_extent_inline_ref *)ptr;
+out:
+	if (insert) {
+		path->keep_locks = 0;
+		btrfs_unlock_up_safe(path, 1);
+	}
+	return err;
+}
+
+/*
+ * helper to add new inline back ref
+ */
+static noinline_for_stack
+int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				struct btrfs_path *path,
+				struct btrfs_extent_inline_ref *iref,
+				u64 parent, u64 root_objectid,
+				u64 owner, u64 offset, int refs_to_add,
+				struct btrfs_delayed_extent_op *extent_op)
+{
+	struct extent_buffer *leaf;
+	struct btrfs_extent_item *ei;
+	unsigned long ptr;
+	unsigned long end;
+	unsigned long item_offset;
+	u64 refs;
+	int size;
+	int type;
+	int ret;
+
+	leaf = path->nodes[0];
+	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+	item_offset = (unsigned long)iref - (unsigned long)ei;
+
+	type = extent_ref_type(parent, owner);
+	size = btrfs_extent_inline_ref_size(type);
+
+	ret = btrfs_extend_item(trans, root, path, size);
+	BUG_ON(ret);
+
+	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+	refs = btrfs_extent_refs(leaf, ei);
+	refs += refs_to_add;
+	btrfs_set_extent_refs(leaf, ei, refs);
+	if (extent_op)
+		__run_delayed_extent_op(extent_op, leaf, ei);
+
+	ptr = (unsigned long)ei + item_offset;
+	end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
+	if (ptr < end - size)
+		memmove_extent_buffer(leaf, ptr + size, ptr,
+				      end - size - ptr);
+
+	iref = (struct btrfs_extent_inline_ref *)ptr;
+	btrfs_set_extent_inline_ref_type(leaf, iref, type);
+	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
+		struct btrfs_extent_data_ref *dref;
+		dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+		btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
+		btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
+		btrfs_set_extent_data_ref_offset(leaf, dref, offset);
+		btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
+	} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
+		struct btrfs_shared_data_ref *sref;
+		sref = (struct btrfs_shared_data_ref *)(iref + 1);
+		btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
+		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
+	} else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
+		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
+	} else {
+		btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
+	}
+	btrfs_mark_buffer_dirty(leaf);
+	return 0;
+}
+
+static int lookup_extent_backref(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 struct btrfs_extent_inline_ref **ref_ret,
+				 u64 bytenr, u64 num_bytes, u64 parent,
+				 u64 root_objectid, u64 owner, u64 offset)
+{
+	int ret;
+
+	ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
+					   bytenr, num_bytes, parent,
+					   root_objectid, owner, offset, 0);
+	if (ret != -ENOENT)
+		return ret;
+
+	btrfs_release_path(root, path);
+	*ref_ret = NULL;
+
+	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+		ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
+					    root_objectid);
+	} else {
+		ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
+					     root_objectid, owner, offset);
+	}
+	return ret;
+}
+
+/*
+ * helper to update/remove inline back ref
+ */
+static noinline_for_stack
+int update_inline_extent_backref(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 struct btrfs_extent_inline_ref *iref,
+				 int refs_to_mod,
+				 struct btrfs_delayed_extent_op *extent_op)
+{
+	struct extent_buffer *leaf;
+	struct btrfs_extent_item *ei;
+	struct btrfs_extent_data_ref *dref = NULL;
+	struct btrfs_shared_data_ref *sref = NULL;
+	unsigned long ptr;
+	unsigned long end;
+	u32 item_size;
+	int size;
+	int type;
+	int ret;
+	u64 refs;
+
+	leaf = path->nodes[0];
+	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+	refs = btrfs_extent_refs(leaf, ei);
+	WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
+	refs += refs_to_mod;
+	btrfs_set_extent_refs(leaf, ei, refs);
+	if (extent_op)
+		__run_delayed_extent_op(extent_op, leaf, ei);
+
+	type = btrfs_extent_inline_ref_type(leaf, iref);
+
+	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
+		dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+		refs = btrfs_extent_data_ref_count(leaf, dref);
+	} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
+		sref = (struct btrfs_shared_data_ref *)(iref + 1);
+		refs = btrfs_shared_data_ref_count(leaf, sref);
+	} else {
+		refs = 1;
+		BUG_ON(refs_to_mod != -1);
+	}
+
+	BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
+	refs += refs_to_mod;
+
+	if (refs > 0) {
+		if (type == BTRFS_EXTENT_DATA_REF_KEY)
+			btrfs_set_extent_data_ref_count(leaf, dref, refs);
+		else
+			btrfs_set_shared_data_ref_count(leaf, sref, refs);
+	} else {
+		size =  btrfs_extent_inline_ref_size(type);
+		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+		ptr = (unsigned long)iref;
+		end = (unsigned long)ei + item_size;
+		if (ptr + size < end)
+			memmove_extent_buffer(leaf, ptr, ptr + size,
+					      end - ptr - size);
+		item_size -= size;
+		ret = btrfs_truncate_item(trans, root, path, item_size, 1);
+		BUG_ON(ret);
+	}
+	btrfs_mark_buffer_dirty(leaf);
+	return 0;
+}
+
+static noinline_for_stack
+int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 u64 bytenr, u64 num_bytes, u64 parent,
+				 u64 root_objectid, u64 owner,
+				 u64 offset, int refs_to_add,
+				 struct btrfs_delayed_extent_op *extent_op)
+{
+	struct btrfs_extent_inline_ref *iref;
+	int ret;
+
+	ret = lookup_inline_extent_backref(trans, root, path, &iref,
+					   bytenr, num_bytes, parent,
+					   root_objectid, owner, offset, 1);
+	if (ret == 0) {
+		BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
+		ret = update_inline_extent_backref(trans, root, path, iref,
+						   refs_to_add, extent_op);
+	} else if (ret == -ENOENT) {
+		ret = setup_inline_extent_backref(trans, root, path, iref,
+						  parent, root_objectid,
+						  owner, offset, refs_to_add,
+						  extent_op);
+	}
+	return ret;
+}
+
+static int insert_extent_backref(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 u64 bytenr, u64 parent, u64 root_objectid,
+				 u64 owner, u64 offset, int refs_to_add)
+{
+	int ret;
+	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+		BUG_ON(refs_to_add != 1);
+		ret = insert_tree_block_ref(trans, root, path, bytenr,
+					    parent, root_objectid);
+	} else {
+		ret = insert_extent_data_ref(trans, root, path, bytenr,
+					     parent, root_objectid,
+					     owner, offset, refs_to_add);
+	}
+	return ret;
+}
+
+static int remove_extent_backref(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 struct btrfs_extent_inline_ref *iref,
+				 int refs_to_drop, int is_data)
+{
+	int ret;
+
+	BUG_ON(!is_data && refs_to_drop != 1);
+	if (iref) {
+		ret = update_inline_extent_backref(trans, root, path, iref,
+						   -refs_to_drop, NULL);
+	} else if (is_data) {
+		ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
+	} else {
+		ret = btrfs_del_item(trans, root, path);
+	}
+	return ret;
+}
+
 #ifdef BIO_RW_DISCARD
 static void btrfs_issue_discard(struct block_device *bdev,
 				u64 start, u64 len)
@@ -686,71 +1445,40 @@
 #endif
 }
 
-static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
-				     struct btrfs_root *root, u64 bytenr,
-				     u64 num_bytes,
-				     u64 orig_parent, u64 parent,
-				     u64 orig_root, u64 ref_root,
-				     u64 orig_generation, u64 ref_generation,
-				     u64 owner_objectid)
+int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
+			 struct btrfs_root *root,
+			 u64 bytenr, u64 num_bytes, u64 parent,
+			 u64 root_objectid, u64 owner, u64 offset)
 {
 	int ret;
-	int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID;
+	BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
+	       root_objectid == BTRFS_TREE_LOG_OBJECTID);
 
-	ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes,
-				       orig_parent, parent, orig_root,
-				       ref_root, orig_generation,
-				       ref_generation, owner_objectid, pin);
-	BUG_ON(ret);
+	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+		ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
+					parent, root_objectid, (int)owner,
+					BTRFS_ADD_DELAYED_REF, NULL);
+	} else {
+		ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
+					parent, root_objectid, owner, offset,
+					BTRFS_ADD_DELAYED_REF, NULL);
+	}
 	return ret;
 }
 
-int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root, u64 bytenr,
-			    u64 num_bytes, u64 orig_parent, u64 parent,
-			    u64 ref_root, u64 ref_generation,
-			    u64 owner_objectid)
-{
-	int ret;
-	if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
-	    owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
-		return 0;
-
-	ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
-					orig_parent, parent, ref_root,
-					ref_root, ref_generation,
-					ref_generation, owner_objectid);
-	return ret;
-}
 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
-				  struct btrfs_root *root, u64 bytenr,
-				  u64 num_bytes,
-				  u64 orig_parent, u64 parent,
-				  u64 orig_root, u64 ref_root,
-				  u64 orig_generation, u64 ref_generation,
-				  u64 owner_objectid)
-{
-	int ret;
-
-	ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root,
-				    ref_generation, owner_objectid,
-				    BTRFS_ADD_DELAYED_REF, 0);
-	BUG_ON(ret);
-	return ret;
-}
-
-static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
-			  struct btrfs_root *root, u64 bytenr,
-			  u64 num_bytes, u64 parent, u64 ref_root,
-			  u64 ref_generation, u64 owner_objectid,
-			  int refs_to_add)
+				  struct btrfs_root *root,
+				  u64 bytenr, u64 num_bytes,
+				  u64 parent, u64 root_objectid,
+				  u64 owner, u64 offset, int refs_to_add,
+				  struct btrfs_delayed_extent_op *extent_op)
 {
 	struct btrfs_path *path;
-	int ret;
-	struct btrfs_key key;
-	struct extent_buffer *l;
+	struct extent_buffer *leaf;
 	struct btrfs_extent_item *item;
-	u32 refs;
+	u64 refs;
+	int ret;
+	int err = 0;
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -758,43 +1486,27 @@
 
 	path->reada = 1;
 	path->leave_spinning = 1;
-	key.objectid = bytenr;
-	key.type = BTRFS_EXTENT_ITEM_KEY;
-	key.offset = num_bytes;
+	/* this will setup the path even if it fails to insert the back ref */
+	ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
+					   path, bytenr, num_bytes, parent,
+					   root_objectid, owner, offset,
+					   refs_to_add, extent_op);
+	if (ret == 0)
+		goto out;
 
-	/* first find the extent item and update its reference count */
-	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
-				path, 0, 1);
-	if (ret < 0) {
-		btrfs_set_path_blocking(path);
-		return ret;
+	if (ret != -EAGAIN) {
+		err = ret;
+		goto out;
 	}
 
-	if (ret > 0) {
-		WARN_ON(1);
-		btrfs_free_path(path);
-		return -EIO;
-	}
-	l = path->nodes[0];
+	leaf = path->nodes[0];
+	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+	refs = btrfs_extent_refs(leaf, item);
+	btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
+	if (extent_op)
+		__run_delayed_extent_op(extent_op, leaf, item);
 
-	btrfs_item_key_to_cpu(l, &key, path->slots[0]);
-	if (key.objectid != bytenr) {
-		btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]);
-		printk(KERN_ERR "btrfs wanted %llu found %llu\n",
-		       (unsigned long long)bytenr,
-		       (unsigned long long)key.objectid);
-		BUG();
-	}
-	BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
-
-	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
-
-	refs = btrfs_extent_refs(l, item);
-	btrfs_set_extent_refs(l, item, refs + refs_to_add);
-	btrfs_unlock_up_safe(path, 1);
-
-	btrfs_mark_buffer_dirty(path->nodes[0]);
-
+	btrfs_mark_buffer_dirty(leaf);
 	btrfs_release_path(root->fs_info->extent_root, path);
 
 	path->reada = 1;
@@ -802,56 +1514,197 @@
 
 	/* now insert the actual backref */
 	ret = insert_extent_backref(trans, root->fs_info->extent_root,
-				    path, bytenr, parent,
-				    ref_root, ref_generation,
-				    owner_objectid, refs_to_add);
+				    path, bytenr, parent, root_objectid,
+				    owner, offset, refs_to_add);
 	BUG_ON(ret);
+out:
 	btrfs_free_path(path);
-	return 0;
+	return err;
 }
 
-int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
-			 struct btrfs_root *root,
-			 u64 bytenr, u64 num_bytes, u64 parent,
-			 u64 ref_root, u64 ref_generation,
-			 u64 owner_objectid)
-{
-	int ret;
-	if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
-	    owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
-		return 0;
-
-	ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent,
-				     0, ref_root, 0, ref_generation,
-				     owner_objectid);
-	return ret;
-}
-
-static int drop_delayed_ref(struct btrfs_trans_handle *trans,
-					struct btrfs_root *root,
-					struct btrfs_delayed_ref_node *node)
+static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				struct btrfs_delayed_ref_node *node,
+				struct btrfs_delayed_extent_op *extent_op,
+				int insert_reserved)
 {
 	int ret = 0;
-	struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node);
+	struct btrfs_delayed_data_ref *ref;
+	struct btrfs_key ins;
+	u64 parent = 0;
+	u64 ref_root = 0;
+	u64 flags = 0;
 
-	BUG_ON(node->ref_mod == 0);
-	ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes,
-				  node->parent, ref->root, ref->generation,
-				  ref->owner_objectid, ref->pin, node->ref_mod);
+	ins.objectid = node->bytenr;
+	ins.offset = node->num_bytes;
+	ins.type = BTRFS_EXTENT_ITEM_KEY;
 
+	ref = btrfs_delayed_node_to_data_ref(node);
+	if (node->type == BTRFS_SHARED_DATA_REF_KEY)
+		parent = ref->parent;
+	else
+		ref_root = ref->root;
+
+	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
+		if (extent_op) {
+			BUG_ON(extent_op->update_key);
+			flags |= extent_op->flags_to_set;
+		}
+		ret = alloc_reserved_file_extent(trans, root,
+						 parent, ref_root, flags,
+						 ref->objectid, ref->offset,
+						 &ins, node->ref_mod);
+		update_reserved_extents(root, ins.objectid, ins.offset, 0);
+	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
+		ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
+					     node->num_bytes, parent,
+					     ref_root, ref->objectid,
+					     ref->offset, node->ref_mod,
+					     extent_op);
+	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
+		ret = __btrfs_free_extent(trans, root, node->bytenr,
+					  node->num_bytes, parent,
+					  ref_root, ref->objectid,
+					  ref->offset, node->ref_mod,
+					  extent_op);
+	} else {
+		BUG();
+	}
 	return ret;
 }
 
+static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
+				    struct extent_buffer *leaf,
+				    struct btrfs_extent_item *ei)
+{
+	u64 flags = btrfs_extent_flags(leaf, ei);
+	if (extent_op->update_flags) {
+		flags |= extent_op->flags_to_set;
+		btrfs_set_extent_flags(leaf, ei, flags);
+	}
+
+	if (extent_op->update_key) {
+		struct btrfs_tree_block_info *bi;
+		BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
+		bi = (struct btrfs_tree_block_info *)(ei + 1);
+		btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
+	}
+}
+
+static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_delayed_ref_node *node,
+				 struct btrfs_delayed_extent_op *extent_op)
+{
+	struct btrfs_key key;
+	struct btrfs_path *path;
+	struct btrfs_extent_item *ei;
+	struct extent_buffer *leaf;
+	u32 item_size;
+	int ret;
+	int err = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	key.objectid = node->bytenr;
+	key.type = BTRFS_EXTENT_ITEM_KEY;
+	key.offset = node->num_bytes;
+
+	path->reada = 1;
+	path->leave_spinning = 1;
+	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
+				path, 0, 1);
+	if (ret < 0) {
+		err = ret;
+		goto out;
+	}
+	if (ret > 0) {
+		err = -EIO;
+		goto out;
+	}
+
+	leaf = path->nodes[0];
+	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	if (item_size < sizeof(*ei)) {
+		ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
+					     path, (u64)-1, 0);
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+		leaf = path->nodes[0];
+		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+	}
+#endif
+	BUG_ON(item_size < sizeof(*ei));
+	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+	__run_delayed_extent_op(extent_op, leaf, ei);
+
+	btrfs_mark_buffer_dirty(leaf);
+out:
+	btrfs_free_path(path);
+	return err;
+}
+
+static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				struct btrfs_delayed_ref_node *node,
+				struct btrfs_delayed_extent_op *extent_op,
+				int insert_reserved)
+{
+	int ret = 0;
+	struct btrfs_delayed_tree_ref *ref;
+	struct btrfs_key ins;
+	u64 parent = 0;
+	u64 ref_root = 0;
+
+	ins.objectid = node->bytenr;
+	ins.offset = node->num_bytes;
+	ins.type = BTRFS_EXTENT_ITEM_KEY;
+
+	ref = btrfs_delayed_node_to_tree_ref(node);
+	if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
+		parent = ref->parent;
+	else
+		ref_root = ref->root;
+
+	BUG_ON(node->ref_mod != 1);
+	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
+		BUG_ON(!extent_op || !extent_op->update_flags ||
+		       !extent_op->update_key);
+		ret = alloc_reserved_tree_block(trans, root,
+						parent, ref_root,
+						extent_op->flags_to_set,
+						&extent_op->key,
+						ref->level, &ins);
+		update_reserved_extents(root, ins.objectid, ins.offset, 0);
+	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
+		ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
+					     node->num_bytes, parent, ref_root,
+					     ref->level, 0, 1, extent_op);
+	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
+		ret = __btrfs_free_extent(trans, root, node->bytenr,
+					  node->num_bytes, parent, ref_root,
+					  ref->level, 0, 1, extent_op);
+	} else {
+		BUG();
+	}
+	return ret;
+}
+
+
 /* helper function to actually process a single delayed ref entry */
-static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans,
-					struct btrfs_root *root,
-					struct btrfs_delayed_ref_node *node,
-					int insert_reserved)
+static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
+			       struct btrfs_root *root,
+			       struct btrfs_delayed_ref_node *node,
+			       struct btrfs_delayed_extent_op *extent_op,
+			       int insert_reserved)
 {
 	int ret;
-	struct btrfs_delayed_ref *ref;
-
-	if (node->parent == (u64)-1) {
+	if (btrfs_delayed_ref_is_head(node)) {
 		struct btrfs_delayed_ref_head *head;
 		/*
 		 * we've hit the end of the chain and we were supposed
@@ -859,44 +1712,35 @@
 		 * deleted before we ever needed to insert it, so all
 		 * we have to do is clean up the accounting
 		 */
+		BUG_ON(extent_op);
+		head = btrfs_delayed_node_to_head(node);
 		if (insert_reserved) {
+			if (head->is_data) {
+				ret = btrfs_del_csums(trans, root,
+						      node->bytenr,
+						      node->num_bytes);
+				BUG_ON(ret);
+			}
+			btrfs_update_pinned_extents(root, node->bytenr,
+						    node->num_bytes, 1);
 			update_reserved_extents(root, node->bytenr,
 						node->num_bytes, 0);
 		}
-		head = btrfs_delayed_node_to_head(node);
 		mutex_unlock(&head->mutex);
 		return 0;
 	}
 
-	ref = btrfs_delayed_node_to_ref(node);
-	if (ref->action == BTRFS_ADD_DELAYED_REF) {
-		if (insert_reserved) {
-			struct btrfs_key ins;
-
-			ins.objectid = node->bytenr;
-			ins.offset = node->num_bytes;
-			ins.type = BTRFS_EXTENT_ITEM_KEY;
-
-			/* record the full extent allocation */
-			ret = __btrfs_alloc_reserved_extent(trans, root,
-					node->parent, ref->root,
-					ref->generation, ref->owner_objectid,
-					&ins, node->ref_mod);
-			update_reserved_extents(root, node->bytenr,
-						node->num_bytes, 0);
-		} else {
-			/* just add one backref */
-			ret = add_extent_ref(trans, root, node->bytenr,
-				     node->num_bytes,
-				     node->parent, ref->root, ref->generation,
-				     ref->owner_objectid, node->ref_mod);
-		}
-		BUG_ON(ret);
-	} else if (ref->action == BTRFS_DROP_DELAYED_REF) {
-		WARN_ON(insert_reserved);
-		ret = drop_delayed_ref(trans, root, node);
-	}
-	return 0;
+	if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
+	    node->type == BTRFS_SHARED_BLOCK_REF_KEY)
+		ret = run_delayed_tree_ref(trans, root, node, extent_op,
+					   insert_reserved);
+	else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
+		 node->type == BTRFS_SHARED_DATA_REF_KEY)
+		ret = run_delayed_data_ref(trans, root, node, extent_op,
+					   insert_reserved);
+	else
+		BUG();
+	return ret;
 }
 
 static noinline struct btrfs_delayed_ref_node *
@@ -919,7 +1763,7 @@
 				rb_node);
 		if (ref->bytenr != head->node.bytenr)
 			break;
-		if (btrfs_delayed_node_to_ref(ref)->action == action)
+		if (ref->action == action)
 			return ref;
 		node = rb_prev(node);
 	}
@@ -937,6 +1781,7 @@
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct btrfs_delayed_ref_node *ref;
 	struct btrfs_delayed_ref_head *locked_ref = NULL;
+	struct btrfs_delayed_extent_op *extent_op;
 	int ret;
 	int count = 0;
 	int must_insert_reserved = 0;
@@ -975,6 +1820,9 @@
 		must_insert_reserved = locked_ref->must_insert_reserved;
 		locked_ref->must_insert_reserved = 0;
 
+		extent_op = locked_ref->extent_op;
+		locked_ref->extent_op = NULL;
+
 		/*
 		 * locked_ref is the head node, so we have to go one
 		 * node back for any delayed ref updates
@@ -986,6 +1834,25 @@
 			 * so that any accounting fixes can happen
 			 */
 			ref = &locked_ref->node;
+
+			if (extent_op && must_insert_reserved) {
+				kfree(extent_op);
+				extent_op = NULL;
+			}
+
+			if (extent_op) {
+				spin_unlock(&delayed_refs->lock);
+
+				ret = run_delayed_extent_op(trans, root,
+							    ref, extent_op);
+				BUG_ON(ret);
+				kfree(extent_op);
+
+				cond_resched();
+				spin_lock(&delayed_refs->lock);
+				continue;
+			}
+
 			list_del_init(&locked_ref->cluster);
 			locked_ref = NULL;
 		}
@@ -993,14 +1860,17 @@
 		ref->in_tree = 0;
 		rb_erase(&ref->rb_node, &delayed_refs->root);
 		delayed_refs->num_entries--;
+
 		spin_unlock(&delayed_refs->lock);
 
-		ret = run_one_delayed_ref(trans, root, ref,
+		ret = run_one_delayed_ref(trans, root, ref, extent_op,
 					  must_insert_reserved);
 		BUG_ON(ret);
-		btrfs_put_delayed_ref(ref);
 
+		btrfs_put_delayed_ref(ref);
+		kfree(extent_op);
 		count++;
+
 		cond_resched();
 		spin_lock(&delayed_refs->lock);
 	}
@@ -1095,25 +1965,112 @@
 	return 0;
 }
 
-int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
-			  struct btrfs_root *root, u64 objectid, u64 bytenr)
+int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				u64 bytenr, u64 num_bytes, u64 flags,
+				int is_data)
+{
+	struct btrfs_delayed_extent_op *extent_op;
+	int ret;
+
+	extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
+	if (!extent_op)
+		return -ENOMEM;
+
+	extent_op->flags_to_set = flags;
+	extent_op->update_flags = 1;
+	extent_op->update_key = 0;
+	extent_op->is_data = is_data ? 1 : 0;
+
+	ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
+	if (ret)
+		kfree(extent_op);
+	return ret;
+}
+
+static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
+				      struct btrfs_root *root,
+				      struct btrfs_path *path,
+				      u64 objectid, u64 offset, u64 bytenr)
+{
+	struct btrfs_delayed_ref_head *head;
+	struct btrfs_delayed_ref_node *ref;
+	struct btrfs_delayed_data_ref *data_ref;
+	struct btrfs_delayed_ref_root *delayed_refs;
+	struct rb_node *node;
+	int ret = 0;
+
+	ret = -ENOENT;
+	delayed_refs = &trans->transaction->delayed_refs;
+	spin_lock(&delayed_refs->lock);
+	head = btrfs_find_delayed_ref_head(trans, bytenr);
+	if (!head)
+		goto out;
+
+	if (!mutex_trylock(&head->mutex)) {
+		atomic_inc(&head->node.refs);
+		spin_unlock(&delayed_refs->lock);
+
+		btrfs_release_path(root->fs_info->extent_root, path);
+
+		mutex_lock(&head->mutex);
+		mutex_unlock(&head->mutex);
+		btrfs_put_delayed_ref(&head->node);
+		return -EAGAIN;
+	}
+
+	node = rb_prev(&head->node.rb_node);
+	if (!node)
+		goto out_unlock;
+
+	ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+
+	if (ref->bytenr != bytenr)
+		goto out_unlock;
+
+	ret = 1;
+	if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
+		goto out_unlock;
+
+	data_ref = btrfs_delayed_node_to_data_ref(ref);
+
+	node = rb_prev(node);
+	if (node) {
+		ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+		if (ref->bytenr == bytenr)
+			goto out_unlock;
+	}
+
+	if (data_ref->root != root->root_key.objectid ||
+	    data_ref->objectid != objectid || data_ref->offset != offset)
+		goto out_unlock;
+
+	ret = 0;
+out_unlock:
+	mutex_unlock(&head->mutex);
+out:
+	spin_unlock(&delayed_refs->lock);
+	return ret;
+}
+
+static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
+					struct btrfs_root *root,
+					struct btrfs_path *path,
+					u64 objectid, u64 offset, u64 bytenr)
 {
 	struct btrfs_root *extent_root = root->fs_info->extent_root;
-	struct btrfs_path *path;
 	struct extent_buffer *leaf;
-	struct btrfs_extent_ref *ref_item;
+	struct btrfs_extent_data_ref *ref;
+	struct btrfs_extent_inline_ref *iref;
+	struct btrfs_extent_item *ei;
 	struct btrfs_key key;
-	struct btrfs_key found_key;
-	u64 ref_root;
-	u64 last_snapshot;
-	u32 nritems;
+	u32 item_size;
 	int ret;
 
 	key.objectid = bytenr;
 	key.offset = (u64)-1;
 	key.type = BTRFS_EXTENT_ITEM_KEY;
 
-	path = btrfs_alloc_path();
 	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
 	if (ret < 0)
 		goto out;
@@ -1125,55 +2082,83 @@
 
 	path->slots[0]--;
 	leaf = path->nodes[0];
-	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 
-	if (found_key.objectid != bytenr ||
-	    found_key.type != BTRFS_EXTENT_ITEM_KEY)
+	if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
 		goto out;
 
-	last_snapshot = btrfs_root_last_snapshot(&root->root_item);
-	while (1) {
-		leaf = path->nodes[0];
-		nritems = btrfs_header_nritems(leaf);
-		if (path->slots[0] >= nritems) {
-			ret = btrfs_next_leaf(extent_root, path);
-			if (ret < 0)
-				goto out;
-			if (ret == 0)
-				continue;
-			break;
-		}
-		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-		if (found_key.objectid != bytenr)
-			break;
-
-		if (found_key.type != BTRFS_EXTENT_REF_KEY) {
-			path->slots[0]++;
-			continue;
-		}
-
-		ref_item = btrfs_item_ptr(leaf, path->slots[0],
-					  struct btrfs_extent_ref);
-		ref_root = btrfs_ref_root(leaf, ref_item);
-		if ((ref_root != root->root_key.objectid &&
-		     ref_root != BTRFS_TREE_LOG_OBJECTID) ||
-		     objectid != btrfs_ref_objectid(leaf, ref_item)) {
-			ret = 1;
-			goto out;
-		}
-		if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) {
-			ret = 1;
-			goto out;
-		}
-
-		path->slots[0]++;
+	ret = 1;
+	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	if (item_size < sizeof(*ei)) {
+		WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
+		goto out;
 	}
+#endif
+	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+
+	if (item_size != sizeof(*ei) +
+	    btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
+		goto out;
+
+	if (btrfs_extent_generation(leaf, ei) <=
+	    btrfs_root_last_snapshot(&root->root_item))
+		goto out;
+
+	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
+	if (btrfs_extent_inline_ref_type(leaf, iref) !=
+	    BTRFS_EXTENT_DATA_REF_KEY)
+		goto out;
+
+	ref = (struct btrfs_extent_data_ref *)(&iref->offset);
+	if (btrfs_extent_refs(leaf, ei) !=
+	    btrfs_extent_data_ref_count(leaf, ref) ||
+	    btrfs_extent_data_ref_root(leaf, ref) !=
+	    root->root_key.objectid ||
+	    btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
+	    btrfs_extent_data_ref_offset(leaf, ref) != offset)
+		goto out;
+
 	ret = 0;
 out:
+	return ret;
+}
+
+int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *root,
+			  u64 objectid, u64 offset, u64 bytenr)
+{
+	struct btrfs_path *path;
+	int ret;
+	int ret2;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOENT;
+
+	do {
+		ret = check_committed_ref(trans, root, path, objectid,
+					  offset, bytenr);
+		if (ret && ret != -ENOENT)
+			goto out;
+
+		ret2 = check_delayed_ref(trans, root, path, objectid,
+					 offset, bytenr);
+	} while (ret2 == -EAGAIN);
+
+	if (ret2 && ret2 != -ENOENT) {
+		ret = ret2;
+		goto out;
+	}
+
+	if (ret != -ENOENT || ret2 != -ENOENT)
+		ret = 0;
+out:
 	btrfs_free_path(path);
 	return ret;
 }
 
+#if 0
 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		    struct extent_buffer *buf, u32 nr_extents)
 {
@@ -1291,62 +2276,44 @@
 		return 1;
 	return 0;
 }
+#endif
 
-
-noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
+static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
-			   struct extent_buffer *orig_buf,
-			   struct extent_buffer *buf, u32 *nr_extents)
+			   struct extent_buffer *buf,
+			   int full_backref, int inc)
 {
 	u64 bytenr;
+	u64 num_bytes;
+	u64 parent;
 	u64 ref_root;
-	u64 orig_root;
-	u64 ref_generation;
-	u64 orig_generation;
-	struct refsort *sorted;
 	u32 nritems;
-	u32 nr_file_extents = 0;
 	struct btrfs_key key;
 	struct btrfs_file_extent_item *fi;
 	int i;
 	int level;
 	int ret = 0;
-	int faili = 0;
-	int refi = 0;
-	int slot;
 	int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
-			    u64, u64, u64, u64, u64, u64, u64, u64, u64);
+			    u64, u64, u64, u64, u64, u64);
 
 	ref_root = btrfs_header_owner(buf);
-	ref_generation = btrfs_header_generation(buf);
-	orig_root = btrfs_header_owner(orig_buf);
-	orig_generation = btrfs_header_generation(orig_buf);
-
 	nritems = btrfs_header_nritems(buf);
 	level = btrfs_header_level(buf);
 
-	sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS);
-	BUG_ON(!sorted);
+	if (!root->ref_cows && level == 0)
+		return 0;
 
-	if (root->ref_cows) {
-		process_func = __btrfs_inc_extent_ref;
-	} else {
-		if (level == 0 &&
-		    root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
-			goto out;
-		if (level != 0 &&
-		    root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
-			goto out;
-		process_func = __btrfs_update_extent_ref;
-	}
+	if (inc)
+		process_func = btrfs_inc_extent_ref;
+	else
+		process_func = btrfs_free_extent;
 
-	/*
-	 * we make two passes through the items.  In the first pass we
-	 * only record the byte number and slot.  Then we sort based on
-	 * byte number and do the actual work based on the sorted results
-	 */
+	if (full_backref)
+		parent = buf->start;
+	else
+		parent = 0;
+
 	for (i = 0; i < nritems; i++) {
-		cond_resched();
 		if (level == 0) {
 			btrfs_item_key_to_cpu(buf, &key, i);
 			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
@@ -1360,151 +2327,38 @@
 			if (bytenr == 0)
 				continue;
 
-			nr_file_extents++;
-			sorted[refi].bytenr = bytenr;
-			sorted[refi].slot = i;
-			refi++;
+			num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
+			key.offset -= btrfs_file_extent_offset(buf, fi);
+			ret = process_func(trans, root, bytenr, num_bytes,
+					   parent, ref_root, key.objectid,
+					   key.offset);
+			if (ret)
+				goto fail;
 		} else {
 			bytenr = btrfs_node_blockptr(buf, i);
-			sorted[refi].bytenr = bytenr;
-			sorted[refi].slot = i;
-			refi++;
-		}
-	}
-	/*
-	 * if refi == 0, we didn't actually put anything into the sorted
-	 * array and we're done
-	 */
-	if (refi == 0)
-		goto out;
-
-	sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
-
-	for (i = 0; i < refi; i++) {
-		cond_resched();
-		slot = sorted[i].slot;
-		bytenr = sorted[i].bytenr;
-
-		if (level == 0) {
-			btrfs_item_key_to_cpu(buf, &key, slot);
-			fi = btrfs_item_ptr(buf, slot,
-					    struct btrfs_file_extent_item);
-
-			bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
-			if (bytenr == 0)
-				continue;
-
-			ret = process_func(trans, root, bytenr,
-				   btrfs_file_extent_disk_num_bytes(buf, fi),
-				   orig_buf->start, buf->start,
-				   orig_root, ref_root,
-				   orig_generation, ref_generation,
-				   key.objectid);
-
-			if (ret) {
-				faili = slot;
-				WARN_ON(1);
+			num_bytes = btrfs_level_size(root, level - 1);
+			ret = process_func(trans, root, bytenr, num_bytes,
+					   parent, ref_root, level - 1, 0);
+			if (ret)
 				goto fail;
-			}
-		} else {
-			ret = process_func(trans, root, bytenr, buf->len,
-					   orig_buf->start, buf->start,
-					   orig_root, ref_root,
-					   orig_generation, ref_generation,
-					   level - 1);
-			if (ret) {
-				faili = slot;
-				WARN_ON(1);
-				goto fail;
-			}
 		}
 	}
-out:
-	kfree(sorted);
-	if (nr_extents) {
-		if (level == 0)
-			*nr_extents = nr_file_extents;
-		else
-			*nr_extents = nritems;
-	}
 	return 0;
 fail:
-	kfree(sorted);
-	WARN_ON(1);
+	BUG();
 	return ret;
 }
 
-int btrfs_update_ref(struct btrfs_trans_handle *trans,
-		     struct btrfs_root *root, struct extent_buffer *orig_buf,
-		     struct extent_buffer *buf, int start_slot, int nr)
-
+int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+		  struct extent_buffer *buf, int full_backref)
 {
-	u64 bytenr;
-	u64 ref_root;
-	u64 orig_root;
-	u64 ref_generation;
-	u64 orig_generation;
-	struct btrfs_key key;
-	struct btrfs_file_extent_item *fi;
-	int i;
-	int ret;
-	int slot;
-	int level;
+	return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
+}
 
-	BUG_ON(start_slot < 0);
-	BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
-
-	ref_root = btrfs_header_owner(buf);
-	ref_generation = btrfs_header_generation(buf);
-	orig_root = btrfs_header_owner(orig_buf);
-	orig_generation = btrfs_header_generation(orig_buf);
-	level = btrfs_header_level(buf);
-
-	if (!root->ref_cows) {
-		if (level == 0 &&
-		    root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
-			return 0;
-		if (level != 0 &&
-		    root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
-			return 0;
-	}
-
-	for (i = 0, slot = start_slot; i < nr; i++, slot++) {
-		cond_resched();
-		if (level == 0) {
-			btrfs_item_key_to_cpu(buf, &key, slot);
-			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
-				continue;
-			fi = btrfs_item_ptr(buf, slot,
-					    struct btrfs_file_extent_item);
-			if (btrfs_file_extent_type(buf, fi) ==
-			    BTRFS_FILE_EXTENT_INLINE)
-				continue;
-			bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
-			if (bytenr == 0)
-				continue;
-			ret = __btrfs_update_extent_ref(trans, root, bytenr,
-				    btrfs_file_extent_disk_num_bytes(buf, fi),
-				    orig_buf->start, buf->start,
-				    orig_root, ref_root, orig_generation,
-				    ref_generation, key.objectid);
-			if (ret)
-				goto fail;
-		} else {
-			bytenr = btrfs_node_blockptr(buf, slot);
-			ret = __btrfs_update_extent_ref(trans, root, bytenr,
-					    buf->len, orig_buf->start,
-					    buf->start, orig_root, ref_root,
-					    orig_generation, ref_generation,
-					    level - 1);
-			if (ret)
-				goto fail;
-		}
-	}
-	return 0;
-fail:
-	WARN_ON(1);
-	return -1;
+int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+		  struct extent_buffer *buf, int full_backref)
+{
+	return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
 }
 
 static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -2007,6 +2861,24 @@
 	u64 old_val;
 	u64 byte_in_group;
 
+	/* block accounting for super block */
+	spin_lock(&info->delalloc_lock);
+	old_val = btrfs_super_bytes_used(&info->super_copy);
+	if (alloc)
+		old_val += num_bytes;
+	else
+		old_val -= num_bytes;
+	btrfs_set_super_bytes_used(&info->super_copy, old_val);
+
+	/* block accounting for root item */
+	old_val = btrfs_root_used(&root->root_item);
+	if (alloc)
+		old_val += num_bytes;
+	else
+		old_val -= num_bytes;
+	btrfs_set_root_used(&root->root_item, old_val);
+	spin_unlock(&info->delalloc_lock);
+
 	while (total) {
 		cache = btrfs_lookup_block_group(info, bytenr);
 		if (!cache)
@@ -2216,8 +3088,6 @@
 		u64 header_owner = btrfs_header_owner(buf);
 		u64 header_transid = btrfs_header_generation(buf);
 		if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
-		    header_owner != BTRFS_TREE_RELOC_OBJECTID &&
-		    header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID &&
 		    header_transid == trans->transid &&
 		    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
 			*must_clean = buf;
@@ -2235,63 +3105,77 @@
 	return 0;
 }
 
-/*
- * remove an extent from the root, returns 0 on success
- */
-static int __free_extent(struct btrfs_trans_handle *trans,
-			 struct btrfs_root *root,
-			 u64 bytenr, u64 num_bytes, u64 parent,
-			 u64 root_objectid, u64 ref_generation,
-			 u64 owner_objectid, int pin, int mark_free,
-			 int refs_to_drop)
+
+static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				u64 bytenr, u64 num_bytes, u64 parent,
+				u64 root_objectid, u64 owner_objectid,
+				u64 owner_offset, int refs_to_drop,
+				struct btrfs_delayed_extent_op *extent_op)
 {
-	struct btrfs_path *path;
 	struct btrfs_key key;
+	struct btrfs_path *path;
 	struct btrfs_fs_info *info = root->fs_info;
 	struct btrfs_root *extent_root = info->extent_root;
 	struct extent_buffer *leaf;
+	struct btrfs_extent_item *ei;
+	struct btrfs_extent_inline_ref *iref;
 	int ret;
+	int is_data;
 	int extent_slot = 0;
 	int found_extent = 0;
 	int num_to_del = 1;
-	struct btrfs_extent_item *ei;
-	u32 refs;
+	u32 item_size;
+	u64 refs;
 
-	key.objectid = bytenr;
-	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
-	key.offset = num_bytes;
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
 
 	path->reada = 1;
 	path->leave_spinning = 1;
-	ret = lookup_extent_backref(trans, extent_root, path,
-				    bytenr, parent, root_objectid,
-				    ref_generation, owner_objectid, 1);
+
+	is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
+	BUG_ON(!is_data && refs_to_drop != 1);
+
+	ret = lookup_extent_backref(trans, extent_root, path, &iref,
+				    bytenr, num_bytes, parent,
+				    root_objectid, owner_objectid,
+				    owner_offset);
 	if (ret == 0) {
-		struct btrfs_key found_key;
 		extent_slot = path->slots[0];
-		while (extent_slot > 0) {
-			extent_slot--;
-			btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+		while (extent_slot >= 0) {
+			btrfs_item_key_to_cpu(path->nodes[0], &key,
 					      extent_slot);
-			if (found_key.objectid != bytenr)
+			if (key.objectid != bytenr)
 				break;
-			if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
-			    found_key.offset == num_bytes) {
+			if (key.type == BTRFS_EXTENT_ITEM_KEY &&
+			    key.offset == num_bytes) {
 				found_extent = 1;
 				break;
 			}
 			if (path->slots[0] - extent_slot > 5)
 				break;
+			extent_slot--;
 		}
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+		item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
+		if (found_extent && item_size < sizeof(*ei))
+			found_extent = 0;
+#endif
 		if (!found_extent) {
+			BUG_ON(iref);
 			ret = remove_extent_backref(trans, extent_root, path,
-						    refs_to_drop);
+						    NULL, refs_to_drop,
+						    is_data);
 			BUG_ON(ret);
 			btrfs_release_path(extent_root, path);
 			path->leave_spinning = 1;
+
+			key.objectid = bytenr;
+			key.type = BTRFS_EXTENT_ITEM_KEY;
+			key.offset = num_bytes;
+
 			ret = btrfs_search_slot(trans, extent_root,
 						&key, path, -1, 1);
 			if (ret) {
@@ -2307,82 +3191,98 @@
 		btrfs_print_leaf(extent_root, path->nodes[0]);
 		WARN_ON(1);
 		printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
-		       "parent %llu root %llu gen %llu owner %llu\n",
+		       "parent %llu root %llu  owner %llu offset %llu\n",
 		       (unsigned long long)bytenr,
 		       (unsigned long long)parent,
 		       (unsigned long long)root_objectid,
-		       (unsigned long long)ref_generation,
-		       (unsigned long long)owner_objectid);
+		       (unsigned long long)owner_objectid,
+		       (unsigned long long)owner_offset);
 	}
 
 	leaf = path->nodes[0];
+	item_size = btrfs_item_size_nr(leaf, extent_slot);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	if (item_size < sizeof(*ei)) {
+		BUG_ON(found_extent || extent_slot != path->slots[0]);
+		ret = convert_extent_item_v0(trans, extent_root, path,
+					     owner_objectid, 0);
+		BUG_ON(ret < 0);
+
+		btrfs_release_path(extent_root, path);
+		path->leave_spinning = 1;
+
+		key.objectid = bytenr;
+		key.type = BTRFS_EXTENT_ITEM_KEY;
+		key.offset = num_bytes;
+
+		ret = btrfs_search_slot(trans, extent_root, &key, path,
+					-1, 1);
+		if (ret) {
+			printk(KERN_ERR "umm, got %d back from search"
+			       ", was looking for %llu\n", ret,
+			       (unsigned long long)bytenr);
+			btrfs_print_leaf(extent_root, path->nodes[0]);
+		}
+		BUG_ON(ret);
+		extent_slot = path->slots[0];
+		leaf = path->nodes[0];
+		item_size = btrfs_item_size_nr(leaf, extent_slot);
+	}
+#endif
+	BUG_ON(item_size < sizeof(*ei));
 	ei = btrfs_item_ptr(leaf, extent_slot,
 			    struct btrfs_extent_item);
-	refs = btrfs_extent_refs(leaf, ei);
-
-	/*
-	 * we're not allowed to delete the extent item if there
-	 * are other delayed ref updates pending
-	 */
-
-	BUG_ON(refs < refs_to_drop);
-	refs -= refs_to_drop;
-	btrfs_set_extent_refs(leaf, ei, refs);
-	btrfs_mark_buffer_dirty(leaf);
-
-	if (refs == 0 && found_extent &&
-	    path->slots[0] == extent_slot + 1) {
-		struct btrfs_extent_ref *ref;
-		ref = btrfs_item_ptr(leaf, path->slots[0],
-				     struct btrfs_extent_ref);
-		BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop);
-		/* if the back ref and the extent are next to each other
-		 * they get deleted below in one shot
-		 */
-		path->slots[0] = extent_slot;
-		num_to_del = 2;
-	} else if (found_extent) {
-		/* otherwise delete the extent back ref */
-		ret = remove_extent_backref(trans, extent_root, path,
-					    refs_to_drop);
-		BUG_ON(ret);
-		/* if refs are 0, we need to setup the path for deletion */
-		if (refs == 0) {
-			btrfs_release_path(extent_root, path);
-			path->leave_spinning = 1;
-			ret = btrfs_search_slot(trans, extent_root, &key, path,
-						-1, 1);
-			BUG_ON(ret);
-		}
+	if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
+		struct btrfs_tree_block_info *bi;
+		BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
+		bi = (struct btrfs_tree_block_info *)(ei + 1);
+		WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
 	}
 
-	if (refs == 0) {
-		u64 super_used;
-		u64 root_used;
+	refs = btrfs_extent_refs(leaf, ei);
+	BUG_ON(refs < refs_to_drop);
+	refs -= refs_to_drop;
+
+	if (refs > 0) {
+		if (extent_op)
+			__run_delayed_extent_op(extent_op, leaf, ei);
+		/*
+		 * In the case of inline back ref, reference count will
+		 * be updated by remove_extent_backref
+		 */
+		if (iref) {
+			BUG_ON(!found_extent);
+		} else {
+			btrfs_set_extent_refs(leaf, ei, refs);
+			btrfs_mark_buffer_dirty(leaf);
+		}
+		if (found_extent) {
+			ret = remove_extent_backref(trans, extent_root, path,
+						    iref, refs_to_drop,
+						    is_data);
+			BUG_ON(ret);
+		}
+	} else {
+		int mark_free = 0;
 		struct extent_buffer *must_clean = NULL;
 
-		if (pin) {
-			ret = pin_down_bytes(trans, root, path,
-				bytenr, num_bytes,
-				owner_objectid >= BTRFS_FIRST_FREE_OBJECTID,
-				&must_clean);
-			if (ret > 0)
-				mark_free = 1;
-			BUG_ON(ret < 0);
+		if (found_extent) {
+			BUG_ON(is_data && refs_to_drop !=
+			       extent_data_ref_count(root, path, iref));
+			if (iref) {
+				BUG_ON(path->slots[0] != extent_slot);
+			} else {
+				BUG_ON(path->slots[0] != extent_slot + 1);
+				path->slots[0] = extent_slot;
+				num_to_del = 2;
+			}
 		}
 
-		/* block accounting for super block */
-		spin_lock(&info->delalloc_lock);
-		super_used = btrfs_super_bytes_used(&info->super_copy);
-		btrfs_set_super_bytes_used(&info->super_copy,
-					   super_used - num_bytes);
-
-		/* block accounting for root item */
-		root_used = btrfs_root_used(&root->root_item);
-		btrfs_set_root_used(&root->root_item,
-					   root_used - num_bytes);
-		spin_unlock(&info->delalloc_lock);
-
+		ret = pin_down_bytes(trans, root, path, bytenr,
+				     num_bytes, is_data, &must_clean);
+		if (ret > 0)
+			mark_free = 1;
+		BUG_ON(ret < 0);
 		/*
 		 * it is going to be very rare for someone to be waiting
 		 * on the block we're freeing.  del_items might need to
@@ -2403,7 +3303,7 @@
 			free_extent_buffer(must_clean);
 		}
 
-		if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
+		if (is_data) {
 			ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
 			BUG_ON(ret);
 		} else {
@@ -2421,34 +3321,6 @@
 }
 
 /*
- * remove an extent from the root, returns 0 on success
- */
-static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
-					struct btrfs_root *root,
-					u64 bytenr, u64 num_bytes, u64 parent,
-					u64 root_objectid, u64 ref_generation,
-					u64 owner_objectid, int pin,
-					int refs_to_drop)
-{
-	WARN_ON(num_bytes < root->sectorsize);
-
-	/*
-	 * if metadata always pin
-	 * if data pin when any transaction has committed this
-	 */
-	if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID ||
-	    ref_generation != trans->transid)
-		pin = 1;
-
-	if (ref_generation != trans->transid)
-		pin = 1;
-
-	return __free_extent(trans, root, bytenr, num_bytes, parent,
-			    root_objectid, ref_generation,
-			    owner_objectid, pin, pin == 0, refs_to_drop);
-}
-
-/*
  * when we free an extent, it is possible (and likely) that we free the last
  * delayed ref for that extent as well.  This searches the delayed ref tree for
  * a given extent, and if there are no other delayed refs to be processed, it
@@ -2479,6 +3351,13 @@
 	if (ref->bytenr == bytenr)
 		goto out;
 
+	if (head->extent_op) {
+		if (!head->must_insert_reserved)
+			goto out;
+		kfree(head->extent_op);
+		head->extent_op = NULL;
+	}
+
 	/*
 	 * waiting for the lock here would deadlock.  If someone else has it
 	 * locked they are already in the process of dropping it anyway
@@ -2507,7 +3386,8 @@
 	spin_unlock(&delayed_refs->lock);
 
 	ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
-				  &head->node, head->must_insert_reserved);
+				  &head->node, head->extent_op,
+				  head->must_insert_reserved);
 	BUG_ON(ret);
 	btrfs_put_delayed_ref(&head->node);
 	return 0;
@@ -2519,32 +3399,32 @@
 int btrfs_free_extent(struct btrfs_trans_handle *trans,
 		      struct btrfs_root *root,
 		      u64 bytenr, u64 num_bytes, u64 parent,
-		      u64 root_objectid, u64 ref_generation,
-		      u64 owner_objectid, int pin)
+		      u64 root_objectid, u64 owner, u64 offset)
 {
 	int ret;
 
 	/*
 	 * tree log blocks never actually go into the extent allocation
 	 * tree, just update pinning info and exit early.
-	 *
-	 * data extents referenced by the tree log do need to have
-	 * their reference counts bumped.
 	 */
-	if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID &&
-	    owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
+	if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
+		WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
 		/* unlocks the pinned mutex */
 		btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
 		update_reserved_extents(root, bytenr, num_bytes, 0);
 		ret = 0;
-	} else {
-		ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent,
-				       root_objectid, ref_generation,
-				       owner_objectid,
-				       BTRFS_DROP_DELAYED_REF, 1);
+	} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+		ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
+					parent, root_objectid, (int)owner,
+					BTRFS_DROP_DELAYED_REF, NULL);
 		BUG_ON(ret);
 		ret = check_ref_cleanup(trans, root, bytenr);
 		BUG_ON(ret);
+	} else {
+		ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
+					parent, root_objectid, owner,
+					offset, BTRFS_DROP_DELAYED_REF, NULL);
+		BUG_ON(ret);
 	}
 	return ret;
 }
@@ -2622,7 +3502,18 @@
 						       search_start);
 		if (block_group && block_group_bits(block_group, data)) {
 			down_read(&space_info->groups_sem);
-			goto have_block_group;
+			if (list_empty(&block_group->list) ||
+			    block_group->ro) {
+				/*
+				 * someone is removing this block group,
+				 * we can't jump into the have_block_group
+				 * target because our list pointers are not
+				 * valid
+				 */
+				btrfs_put_block_group(block_group);
+				up_read(&space_info->groups_sem);
+			} else
+				goto have_block_group;
 		} else if (block_group) {
 			btrfs_put_block_group(block_group);
 		}
@@ -2656,6 +3547,13 @@
 			 * people trying to start a new cluster
 			 */
 			spin_lock(&last_ptr->refill_lock);
+			if (last_ptr->block_group &&
+			    (last_ptr->block_group->ro ||
+			    !block_group_bits(last_ptr->block_group, data))) {
+				offset = 0;
+				goto refill_cluster;
+			}
+
 			offset = btrfs_alloc_from_cluster(block_group, last_ptr,
 						 num_bytes, search_start);
 			if (offset) {
@@ -2681,10 +3579,17 @@
 
 				last_ptr_loop = 1;
 				search_start = block_group->key.objectid;
+				/*
+				 * we know this block group is properly
+				 * in the list because
+				 * btrfs_remove_block_group, drops the
+				 * cluster before it removes the block
+				 * group from the list
+				 */
 				goto have_block_group;
 			}
 			spin_unlock(&last_ptr->lock);
-
+refill_cluster:
 			/*
 			 * this cluster didn't work out, free it and
 			 * start over
@@ -2694,7 +3599,7 @@
 			last_ptr_loop = 0;
 
 			/* allocate a cluster in this block group */
-			ret = btrfs_find_space_cluster(trans,
+			ret = btrfs_find_space_cluster(trans, root,
 					       block_group, last_ptr,
 					       offset, num_bytes,
 					       empty_cluster + empty_size);
@@ -2944,99 +3849,147 @@
 	return ret;
 }
 
-static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
-					 struct btrfs_root *root, u64 parent,
-					 u64 root_objectid, u64 ref_generation,
-					 u64 owner, struct btrfs_key *ins,
-					 int ref_mod)
+static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
+				      struct btrfs_root *root,
+				      u64 parent, u64 root_objectid,
+				      u64 flags, u64 owner, u64 offset,
+				      struct btrfs_key *ins, int ref_mod)
 {
 	int ret;
-	u64 super_used;
-	u64 root_used;
-	u64 num_bytes = ins->offset;
-	u32 sizes[2];
-	struct btrfs_fs_info *info = root->fs_info;
-	struct btrfs_root *extent_root = info->extent_root;
+	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_extent_item *extent_item;
-	struct btrfs_extent_ref *ref;
+	struct btrfs_extent_inline_ref *iref;
 	struct btrfs_path *path;
-	struct btrfs_key keys[2];
+	struct extent_buffer *leaf;
+	int type;
+	u32 size;
 
-	if (parent == 0)
-		parent = ins->objectid;
+	if (parent > 0)
+		type = BTRFS_SHARED_DATA_REF_KEY;
+	else
+		type = BTRFS_EXTENT_DATA_REF_KEY;
 
-	/* block accounting for super block */
-	spin_lock(&info->delalloc_lock);
-	super_used = btrfs_super_bytes_used(&info->super_copy);
-	btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
-
-	/* block accounting for root item */
-	root_used = btrfs_root_used(&root->root_item);
-	btrfs_set_root_used(&root->root_item, root_used + num_bytes);
-	spin_unlock(&info->delalloc_lock);
-
-	memcpy(&keys[0], ins, sizeof(*ins));
-	keys[1].objectid = ins->objectid;
-	keys[1].type = BTRFS_EXTENT_REF_KEY;
-	keys[1].offset = parent;
-	sizes[0] = sizeof(*extent_item);
-	sizes[1] = sizeof(*ref);
+	size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
 
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 
 	path->leave_spinning = 1;
-	ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
-				       sizes, 2);
+	ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
+				      ins, size);
 	BUG_ON(ret);
 
-	extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+	leaf = path->nodes[0];
+	extent_item = btrfs_item_ptr(leaf, path->slots[0],
 				     struct btrfs_extent_item);
-	btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod);
-	ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
-			     struct btrfs_extent_ref);
+	btrfs_set_extent_refs(leaf, extent_item, ref_mod);
+	btrfs_set_extent_generation(leaf, extent_item, trans->transid);
+	btrfs_set_extent_flags(leaf, extent_item,
+			       flags | BTRFS_EXTENT_FLAG_DATA);
 
-	btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
-	btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
-	btrfs_set_ref_objectid(path->nodes[0], ref, owner);
-	btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod);
+	iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
+	btrfs_set_extent_inline_ref_type(leaf, iref, type);
+	if (parent > 0) {
+		struct btrfs_shared_data_ref *ref;
+		ref = (struct btrfs_shared_data_ref *)(iref + 1);
+		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
+		btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
+	} else {
+		struct btrfs_extent_data_ref *ref;
+		ref = (struct btrfs_extent_data_ref *)(&iref->offset);
+		btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
+		btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
+		btrfs_set_extent_data_ref_offset(leaf, ref, offset);
+		btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
+	}
 
 	btrfs_mark_buffer_dirty(path->nodes[0]);
-
-	trans->alloc_exclude_start = 0;
-	trans->alloc_exclude_nr = 0;
 	btrfs_free_path(path);
 
-	if (ret)
-		goto out;
-
-	ret = update_block_group(trans, root, ins->objectid,
-				 ins->offset, 1, 0);
+	ret = update_block_group(trans, root, ins->objectid, ins->offset,
+				 1, 0);
 	if (ret) {
 		printk(KERN_ERR "btrfs update block group failed for %llu "
 		       "%llu\n", (unsigned long long)ins->objectid,
 		       (unsigned long long)ins->offset);
 		BUG();
 	}
-out:
 	return ret;
 }
 
-int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
-				struct btrfs_root *root, u64 parent,
-				u64 root_objectid, u64 ref_generation,
-				u64 owner, struct btrfs_key *ins)
+static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
+				     struct btrfs_root *root,
+				     u64 parent, u64 root_objectid,
+				     u64 flags, struct btrfs_disk_key *key,
+				     int level, struct btrfs_key *ins)
+{
+	int ret;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_extent_item *extent_item;
+	struct btrfs_tree_block_info *block_info;
+	struct btrfs_extent_inline_ref *iref;
+	struct btrfs_path *path;
+	struct extent_buffer *leaf;
+	u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
+
+	path = btrfs_alloc_path();
+	BUG_ON(!path);
+
+	path->leave_spinning = 1;
+	ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
+				      ins, size);
+	BUG_ON(ret);
+
+	leaf = path->nodes[0];
+	extent_item = btrfs_item_ptr(leaf, path->slots[0],
+				     struct btrfs_extent_item);
+	btrfs_set_extent_refs(leaf, extent_item, 1);
+	btrfs_set_extent_generation(leaf, extent_item, trans->transid);
+	btrfs_set_extent_flags(leaf, extent_item,
+			       flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
+	block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
+
+	btrfs_set_tree_block_key(leaf, block_info, key);
+	btrfs_set_tree_block_level(leaf, block_info, level);
+
+	iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
+	if (parent > 0) {
+		BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+		btrfs_set_extent_inline_ref_type(leaf, iref,
+						 BTRFS_SHARED_BLOCK_REF_KEY);
+		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
+	} else {
+		btrfs_set_extent_inline_ref_type(leaf, iref,
+						 BTRFS_TREE_BLOCK_REF_KEY);
+		btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
+	}
+
+	btrfs_mark_buffer_dirty(leaf);
+	btrfs_free_path(path);
+
+	ret = update_block_group(trans, root, ins->objectid, ins->offset,
+				 1, 0);
+	if (ret) {
+		printk(KERN_ERR "btrfs update block group failed for %llu "
+		       "%llu\n", (unsigned long long)ins->objectid,
+		       (unsigned long long)ins->offset);
+		BUG();
+	}
+	return ret;
+}
+
+int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
+				     struct btrfs_root *root,
+				     u64 root_objectid, u64 owner,
+				     u64 offset, struct btrfs_key *ins)
 {
 	int ret;
 
-	if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
-		return 0;
+	BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
 
-	ret = btrfs_add_delayed_ref(trans, ins->objectid,
-				    ins->offset, parent, root_objectid,
-				    ref_generation, owner,
-				    BTRFS_ADD_DELAYED_EXTENT, 0);
-	BUG_ON(ret);
+	ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset,
+					 0, root_objectid, owner, offset,
+					 BTRFS_ADD_DELAYED_EXTENT, NULL);
 	return ret;
 }
 
@@ -3045,10 +3998,10 @@
  * an extent has been allocated and makes sure to clear the free
  * space cache bits as well
  */
-int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
-				struct btrfs_root *root, u64 parent,
-				u64 root_objectid, u64 ref_generation,
-				u64 owner, struct btrfs_key *ins)
+int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   u64 root_objectid, u64 owner, u64 offset,
+				   struct btrfs_key *ins)
 {
 	int ret;
 	struct btrfs_block_group_cache *block_group;
@@ -3062,8 +4015,8 @@
 				      ins->offset);
 	BUG_ON(ret);
 	btrfs_put_block_group(block_group);
-	ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
-					    ref_generation, owner, ins, 1);
+	ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
+					 0, owner, offset, ins, 1);
 	return ret;
 }
 
@@ -3074,26 +4027,48 @@
  *
  * returns 0 if everything worked, non-zero otherwise.
  */
-int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
-		       struct btrfs_root *root,
-		       u64 num_bytes, u64 parent, u64 min_alloc_size,
-		       u64 root_objectid, u64 ref_generation,
-		       u64 owner_objectid, u64 empty_size, u64 hint_byte,
-		       u64 search_end, struct btrfs_key *ins, u64 data)
+static int alloc_tree_block(struct btrfs_trans_handle *trans,
+			    struct btrfs_root *root,
+			    u64 num_bytes, u64 parent, u64 root_objectid,
+			    struct btrfs_disk_key *key, int level,
+			    u64 empty_size, u64 hint_byte, u64 search_end,
+			    struct btrfs_key *ins)
 {
 	int ret;
-	ret = __btrfs_reserve_extent(trans, root, num_bytes,
-				     min_alloc_size, empty_size, hint_byte,
-				     search_end, ins, data);
+	u64 flags = 0;
+
+	ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
+				     empty_size, hint_byte, search_end,
+				     ins, 0);
 	BUG_ON(ret);
+
+	if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
+		if (parent == 0)
+			parent = ins->objectid;
+		flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+	} else
+		BUG_ON(parent > 0);
+
+	update_reserved_extents(root, ins->objectid, ins->offset, 1);
 	if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
-		ret = btrfs_add_delayed_ref(trans, ins->objectid,
-					    ins->offset, parent, root_objectid,
-					    ref_generation, owner_objectid,
-					    BTRFS_ADD_DELAYED_EXTENT, 0);
+		struct btrfs_delayed_extent_op *extent_op;
+		extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
+		BUG_ON(!extent_op);
+		if (key)
+			memcpy(&extent_op->key, key, sizeof(extent_op->key));
+		else
+			memset(&extent_op->key, 0, sizeof(extent_op->key));
+		extent_op->flags_to_set = flags;
+		extent_op->update_key = 1;
+		extent_op->update_flags = 1;
+		extent_op->is_data = 0;
+
+		ret = btrfs_add_delayed_tree_ref(trans, ins->objectid,
+					ins->offset, parent, root_objectid,
+					level, BTRFS_ADD_DELAYED_EXTENT,
+					extent_op);
 		BUG_ON(ret);
 	}
-	update_reserved_extents(root, ins->objectid, ins->offset, 1);
 	return ret;
 }
 
@@ -3132,21 +4107,17 @@
  * returns the tree buffer or NULL.
  */
 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
-					     struct btrfs_root *root,
-					     u32 blocksize, u64 parent,
-					     u64 root_objectid,
-					     u64 ref_generation,
-					     int level,
-					     u64 hint,
-					     u64 empty_size)
+					struct btrfs_root *root, u32 blocksize,
+					u64 parent, u64 root_objectid,
+					struct btrfs_disk_key *key, int level,
+					u64 hint, u64 empty_size)
 {
 	struct btrfs_key ins;
 	int ret;
 	struct extent_buffer *buf;
 
-	ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize,
-				 root_objectid, ref_generation, level,
-				 empty_size, hint, (u64)-1, &ins, 0);
+	ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid,
+			       key, level, empty_size, hint, (u64)-1, &ins);
 	if (ret) {
 		BUG_ON(ret > 0);
 		return ERR_PTR(ret);
@@ -3160,32 +4131,19 @@
 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root, struct extent_buffer *leaf)
 {
-	u64 leaf_owner;
-	u64 leaf_generation;
-	struct refsort *sorted;
+	u64 disk_bytenr;
+	u64 num_bytes;
 	struct btrfs_key key;
 	struct btrfs_file_extent_item *fi;
+	u32 nritems;
 	int i;
-	int nritems;
 	int ret;
-	int refi = 0;
-	int slot;
 
 	BUG_ON(!btrfs_is_leaf(leaf));
 	nritems = btrfs_header_nritems(leaf);
-	leaf_owner = btrfs_header_owner(leaf);
-	leaf_generation = btrfs_header_generation(leaf);
 
-	sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
-	/* we do this loop twice.  The first time we build a list
-	 * of the extents we have a reference on, then we sort the list
-	 * by bytenr.  The second time around we actually do the
-	 * extent freeing.
-	 */
 	for (i = 0; i < nritems; i++) {
-		u64 disk_bytenr;
 		cond_resched();
-
 		btrfs_item_key_to_cpu(leaf, &key, i);
 
 		/* only extents have references, skip everything else */
@@ -3205,45 +4163,16 @@
 		if (disk_bytenr == 0)
 			continue;
 
-		sorted[refi].bytenr = disk_bytenr;
-		sorted[refi].slot = i;
-		refi++;
-	}
-
-	if (refi == 0)
-		goto out;
-
-	sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
-
-	for (i = 0; i < refi; i++) {
-		u64 disk_bytenr;
-
-		disk_bytenr = sorted[i].bytenr;
-		slot = sorted[i].slot;
-
-		cond_resched();
-
-		btrfs_item_key_to_cpu(leaf, &key, slot);
-		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
-			continue;
-
-		fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
-
-		ret = btrfs_free_extent(trans, root, disk_bytenr,
-				btrfs_file_extent_disk_num_bytes(leaf, fi),
-				leaf->start, leaf_owner, leaf_generation,
-				key.objectid, 0);
+		num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
+		ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes,
+					leaf->start, 0, key.objectid, 0);
 		BUG_ON(ret);
-
-		atomic_inc(&root->fs_info->throttle_gen);
-		wake_up(&root->fs_info->transaction_throttle);
-		cond_resched();
 	}
-out:
-	kfree(sorted);
 	return 0;
 }
 
+#if 0
+
 static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
 					struct btrfs_root *root,
 					struct btrfs_leaf_ref *ref)
@@ -3286,13 +4215,14 @@
 	return 0;
 }
 
+
 static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
 				     struct btrfs_root *root, u64 start,
 				     u64 len, u32 *refs)
 {
 	int ret;
 
-	ret = btrfs_lookup_extent_ref(trans, root, start, len, refs);
+	ret = btrfs_lookup_extent_refs(trans, root, start, len, refs);
 	BUG_ON(ret);
 
 #if 0 /* some debugging code in case we see problems here */
@@ -3327,6 +4257,7 @@
 	return ret;
 }
 
+
 /*
  * this is used while deleting old snapshots, and it drops the refs
  * on a whole subtree starting from a level 1 node.
@@ -3620,32 +4551,36 @@
 	cond_resched();
 	return 0;
 }
+#endif
 
 /*
  * helper function for drop_subtree, this function is similar to
  * walk_down_tree. The main difference is that it checks reference
  * counts while tree blocks are locked.
  */
-static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
-				      struct btrfs_root *root,
-				      struct btrfs_path *path, int *level)
+static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   struct btrfs_path *path, int *level)
 {
 	struct extent_buffer *next;
 	struct extent_buffer *cur;
 	struct extent_buffer *parent;
 	u64 bytenr;
 	u64 ptr_gen;
+	u64 refs;
+	u64 flags;
 	u32 blocksize;
-	u32 refs;
 	int ret;
 
 	cur = path->nodes[*level];
-	ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len,
-				      &refs);
+	ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len,
+				       &refs, &flags);
 	BUG_ON(ret);
 	if (refs > 1)
 		goto out;
 
+	BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+
 	while (*level >= 0) {
 		cur = path->nodes[*level];
 		if (*level == 0) {
@@ -3667,16 +4602,15 @@
 		btrfs_tree_lock(next);
 		btrfs_set_lock_blocking(next);
 
-		ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
-					      &refs);
+		ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
+					       &refs, &flags);
 		BUG_ON(ret);
 		if (refs > 1) {
 			parent = path->nodes[*level];
 			ret = btrfs_free_extent(trans, root, bytenr,
-					blocksize, parent->start,
-					btrfs_header_owner(parent),
-					btrfs_header_generation(parent),
-					*level - 1, 1);
+						blocksize, parent->start,
+						btrfs_header_owner(parent),
+						*level - 1, 0);
 			BUG_ON(ret);
 			path->slots[*level]++;
 			btrfs_tree_unlock(next);
@@ -3684,6 +4618,8 @@
 			continue;
 		}
 
+		BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+
 		*level = btrfs_header_level(next);
 		path->nodes[*level] = next;
 		path->slots[*level] = 0;
@@ -3691,13 +4627,15 @@
 		cond_resched();
 	}
 out:
-	parent = path->nodes[*level + 1];
+	if (path->nodes[*level] == root->node)
+		parent = path->nodes[*level];
+	else
+		parent = path->nodes[*level + 1];
 	bytenr = path->nodes[*level]->start;
 	blocksize = path->nodes[*level]->len;
 
-	ret = btrfs_free_extent(trans, root, bytenr, blocksize,
-			parent->start, btrfs_header_owner(parent),
-			btrfs_header_generation(parent), *level, 1);
+	ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start,
+				btrfs_header_owner(parent), *level, 0);
 	BUG_ON(ret);
 
 	if (path->locks[*level]) {
@@ -3721,8 +4659,6 @@
 				 struct btrfs_path *path,
 				 int *level, int max_level)
 {
-	u64 root_owner;
-	u64 root_gen;
 	struct btrfs_root_item *root_item = &root->root_item;
 	int i;
 	int slot;
@@ -3730,24 +4666,22 @@
 
 	for (i = *level; i < max_level && path->nodes[i]; i++) {
 		slot = path->slots[i];
-		if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
-			struct extent_buffer *node;
-			struct btrfs_disk_key disk_key;
-
+		if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
 			/*
 			 * there is more work to do in this level.
 			 * Update the drop_progress marker to reflect
 			 * the work we've done so far, and then bump
 			 * the slot number
 			 */
-			node = path->nodes[i];
 			path->slots[i]++;
-			*level = i;
 			WARN_ON(*level == 0);
-			btrfs_node_key(node, &disk_key, path->slots[i]);
-			memcpy(&root_item->drop_progress,
-			       &disk_key, sizeof(disk_key));
-			root_item->drop_level = i;
+			if (max_level == BTRFS_MAX_LEVEL) {
+				btrfs_node_key(path->nodes[i],
+					       &root_item->drop_progress,
+					       path->slots[i]);
+				root_item->drop_level = i;
+			}
+			*level = i;
 			return 0;
 		} else {
 			struct extent_buffer *parent;
@@ -3761,22 +4695,20 @@
 			else
 				parent = path->nodes[*level + 1];
 
-			root_owner = btrfs_header_owner(parent);
-			root_gen = btrfs_header_generation(parent);
-
-			clean_tree_block(trans, root, path->nodes[*level]);
+			clean_tree_block(trans, root, path->nodes[i]);
 			ret = btrfs_free_extent(trans, root,
-						path->nodes[*level]->start,
-						path->nodes[*level]->len,
-						parent->start, root_owner,
-						root_gen, *level, 1);
+						path->nodes[i]->start,
+						path->nodes[i]->len,
+						parent->start,
+						btrfs_header_owner(parent),
+						*level, 0);
 			BUG_ON(ret);
 			if (path->locks[*level]) {
-				btrfs_tree_unlock(path->nodes[*level]);
-				path->locks[*level] = 0;
+				btrfs_tree_unlock(path->nodes[i]);
+				path->locks[i] = 0;
 			}
-			free_extent_buffer(path->nodes[*level]);
-			path->nodes[*level] = NULL;
+			free_extent_buffer(path->nodes[i]);
+			path->nodes[i] = NULL;
 			*level = i + 1;
 		}
 	}
@@ -3795,21 +4727,18 @@
 	int wret;
 	int level;
 	struct btrfs_path *path;
-	int i;
-	int orig_level;
 	int update_count;
 	struct btrfs_root_item *root_item = &root->root_item;
 
-	WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 
 	level = btrfs_header_level(root->node);
-	orig_level = level;
 	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
-		path->nodes[level] = root->node;
-		extent_buffer_get(root->node);
+		path->nodes[level] = btrfs_lock_root_node(root);
+		btrfs_set_lock_blocking(path->nodes[level]);
 		path->slots[level] = 0;
+		path->locks[level] = 1;
 	} else {
 		struct btrfs_key key;
 		struct btrfs_disk_key found_key;
@@ -3831,12 +4760,7 @@
 		 * unlock our path, this is safe because only this
 		 * function is allowed to delete this snapshot
 		 */
-		for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
-			if (path->nodes[i] && path->locks[i]) {
-				path->locks[i] = 0;
-				btrfs_tree_unlock(path->nodes[i]);
-			}
-		}
+		btrfs_unlock_up_safe(path, 0);
 	}
 	while (1) {
 		unsigned long update;
@@ -3857,8 +4781,6 @@
 			ret = -EAGAIN;
 			break;
 		}
-		atomic_inc(&root->fs_info->throttle_gen);
-		wake_up(&root->fs_info->transaction_throttle);
 		for (update_count = 0; update_count < 16; update_count++) {
 			update = trans->delayed_ref_updates;
 			trans->delayed_ref_updates = 0;
@@ -3868,12 +4790,6 @@
 				break;
 		}
 	}
-	for (i = 0; i <= orig_level; i++) {
-		if (path->nodes[i]) {
-			free_extent_buffer(path->nodes[i]);
-			path->nodes[i] = NULL;
-		}
-	}
 out:
 	btrfs_free_path(path);
 	return ret;
@@ -3906,7 +4822,7 @@
 	path->slots[level] = 0;
 
 	while (1) {
-		wret = walk_down_subtree(trans, root, path, &level);
+		wret = walk_down_tree(trans, root, path, &level);
 		if (wret < 0)
 			ret = wret;
 		if (wret != 0)
@@ -3923,6 +4839,7 @@
 	return ret;
 }
 
+#if 0
 static unsigned long calc_ra(unsigned long start, unsigned long last,
 			     unsigned long nr)
 {
@@ -5404,6 +6321,7 @@
 	kfree(ref_path);
 	return ret;
 }
+#endif
 
 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
 {
@@ -5452,7 +6370,8 @@
 	u64 calc;
 
 	spin_lock(&shrink_block_group->lock);
-	if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
+	if (btrfs_block_group_used(&shrink_block_group->item) +
+	    shrink_block_group->reserved > 0) {
 		spin_unlock(&shrink_block_group->lock);
 
 		trans = btrfs_start_transaction(root, 1);
@@ -5477,6 +6396,17 @@
 	return 0;
 }
 
+
+int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
+					 struct btrfs_block_group_cache *group)
+
+{
+	__alloc_chunk_for_shrink(root, group, 1);
+	set_block_group_readonly(group);
+	return 0;
+}
+
+#if 0
 static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *root,
 				 u64 objectid, u64 size)
@@ -5756,6 +6686,7 @@
 	btrfs_free_path(path);
 	return ret;
 }
+#endif
 
 static int find_first_block_group(struct btrfs_root *root,
 		struct btrfs_path *path, struct btrfs_key *key)
@@ -5968,6 +6899,7 @@
 {
 	struct btrfs_path *path;
 	struct btrfs_block_group_cache *block_group;
+	struct btrfs_free_cluster *cluster;
 	struct btrfs_key key;
 	int ret;
 
@@ -5979,6 +6911,21 @@
 
 	memcpy(&key, &block_group->key, sizeof(key));
 
+	/* make sure this block group isn't part of an allocation cluster */
+	cluster = &root->fs_info->data_alloc_cluster;
+	spin_lock(&cluster->refill_lock);
+	btrfs_return_cluster_to_free_space(block_group, cluster);
+	spin_unlock(&cluster->refill_lock);
+
+	/*
+	 * make sure this block group isn't part of a metadata
+	 * allocation cluster
+	 */
+	cluster = &root->fs_info->meta_alloc_cluster;
+	spin_lock(&cluster->refill_lock);
+	btrfs_return_cluster_to_free_space(block_group, cluster);
+	spin_unlock(&cluster->refill_lock);
+
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 
@@ -5988,7 +6935,11 @@
 	spin_unlock(&root->fs_info->block_group_cache_lock);
 	btrfs_remove_free_space_cache(block_group);
 	down_write(&block_group->space_info->groups_sem);
-	list_del(&block_group->list);
+	/*
+	 * we must use list_del_init so people can check to see if they
+	 * are still on the list after taking the semaphore
+	 */
+	list_del_init(&block_group->list);
 	up_write(&block_group->space_info->groups_sem);
 
 	spin_lock(&block_group->space_info->lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fe9eb99..6826018 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -476,6 +476,7 @@
 	struct extent_state *state;
 	struct extent_state *prealloc = NULL;
 	struct rb_node *node;
+	u64 last_end;
 	int err;
 	int set = 0;
 
@@ -498,6 +499,7 @@
 	if (state->start > end)
 		goto out;
 	WARN_ON(state->end < start);
+	last_end = state->end;
 
 	/*
 	 *     | ---- desired range ---- |
@@ -524,9 +526,11 @@
 		if (err)
 			goto out;
 		if (state->end <= end) {
-			start = state->end + 1;
 			set |= clear_state_bit(tree, state, bits,
 					wake, delete);
+			if (last_end == (u64)-1)
+				goto out;
+			start = last_end + 1;
 		} else {
 			start = state->start;
 		}
@@ -552,8 +556,10 @@
 		goto out;
 	}
 
-	start = state->end + 1;
 	set |= clear_state_bit(tree, state, bits, wake, delete);
+	if (last_end == (u64)-1)
+		goto out;
+	start = last_end + 1;
 	goto search_again;
 
 out:
@@ -707,8 +713,10 @@
 			goto out;
 		}
 		set_state_bits(tree, state, bits);
-		start = state->end + 1;
 		merge_state(tree, state);
+		if (last_end == (u64)-1)
+			goto out;
+		start = last_end + 1;
 		goto search_again;
 	}
 
@@ -742,8 +750,10 @@
 			goto out;
 		if (state->end <= end) {
 			set_state_bits(tree, state, bits);
-			start = state->end + 1;
 			merge_state(tree, state);
+			if (last_end == (u64)-1)
+				goto out;
+			start = last_end + 1;
 		} else {
 			start = state->start;
 		}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1d51dc3..126477e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -291,16 +291,12 @@
 {
 	u64 extent_end = 0;
 	u64 search_start = start;
-	u64 leaf_start;
 	u64 ram_bytes = 0;
-	u64 orig_parent = 0;
 	u64 disk_bytenr = 0;
 	u64 orig_locked_end = locked_end;
 	u8 compression;
 	u8 encryption;
 	u16 other_encoding = 0;
-	u64 root_gen;
-	u64 root_owner;
 	struct extent_buffer *leaf;
 	struct btrfs_file_extent_item *extent;
 	struct btrfs_path *path;
@@ -340,9 +336,6 @@
 		bookend = 0;
 		found_extent = 0;
 		found_inline = 0;
-		leaf_start = 0;
-		root_gen = 0;
-		root_owner = 0;
 		compression = 0;
 		encryption = 0;
 		extent = NULL;
@@ -417,9 +410,6 @@
 		if (found_extent) {
 			read_extent_buffer(leaf, &old, (unsigned long)extent,
 					   sizeof(old));
-			root_gen = btrfs_header_generation(leaf);
-			root_owner = btrfs_header_owner(leaf);
-			leaf_start = leaf->start;
 		}
 
 		if (end < extent_end && end >= key.offset) {
@@ -443,14 +433,14 @@
 				}
 				locked_end = extent_end;
 			}
-			orig_parent = path->nodes[0]->start;
 			disk_bytenr = le64_to_cpu(old.disk_bytenr);
 			if (disk_bytenr != 0) {
 				ret = btrfs_inc_extent_ref(trans, root,
 					   disk_bytenr,
-					   le64_to_cpu(old.disk_num_bytes),
-					   orig_parent, root->root_key.objectid,
-					   trans->transid, inode->i_ino);
+					   le64_to_cpu(old.disk_num_bytes), 0,
+					   root->root_key.objectid,
+					   key.objectid, key.offset -
+					   le64_to_cpu(old.offset));
 				BUG_ON(ret);
 			}
 		}
@@ -568,17 +558,6 @@
 			btrfs_mark_buffer_dirty(path->nodes[0]);
 			btrfs_set_lock_blocking(path->nodes[0]);
 
-			if (disk_bytenr != 0) {
-				ret = btrfs_update_extent_ref(trans, root,
-						disk_bytenr,
-						le64_to_cpu(old.disk_num_bytes),
-						orig_parent,
-						leaf->start,
-						root->root_key.objectid,
-						trans->transid, ins.objectid);
-
-				BUG_ON(ret);
-			}
 			path->leave_spinning = 0;
 			btrfs_release_path(root, path);
 			if (disk_bytenr != 0)
@@ -594,8 +573,9 @@
 				ret = btrfs_free_extent(trans, root,
 						old_disk_bytenr,
 						le64_to_cpu(old.disk_num_bytes),
-						leaf_start, root_owner,
-						root_gen, key.objectid, 0);
+						0, root->root_key.objectid,
+						key.objectid, key.offset -
+						le64_to_cpu(old.offset));
 				BUG_ON(ret);
 				*hint_byte = old_disk_bytenr;
 			}
@@ -664,12 +644,11 @@
 	u64 bytenr;
 	u64 num_bytes;
 	u64 extent_end;
-	u64 extent_offset;
+	u64 orig_offset;
 	u64 other_start;
 	u64 other_end;
 	u64 split = start;
 	u64 locked_end = end;
-	u64 orig_parent;
 	int extent_type;
 	int split_end = 1;
 	int ret;
@@ -703,7 +682,7 @@
 
 	bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
 	num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
-	extent_offset = btrfs_file_extent_offset(leaf, fi);
+	orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
 
 	if (key.offset == start)
 		split = end;
@@ -711,8 +690,6 @@
 	if (key.offset == start && extent_end == end) {
 		int del_nr = 0;
 		int del_slot = 0;
-		u64 leaf_owner = btrfs_header_owner(leaf);
-		u64 leaf_gen = btrfs_header_generation(leaf);
 		other_start = end;
 		other_end = 0;
 		if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
@@ -721,8 +698,8 @@
 			del_slot = path->slots[0] + 1;
 			del_nr++;
 			ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
-						leaf->start, leaf_owner,
-						leaf_gen, inode->i_ino, 0);
+						0, root->root_key.objectid,
+						inode->i_ino, orig_offset);
 			BUG_ON(ret);
 		}
 		other_start = 0;
@@ -733,8 +710,8 @@
 			del_slot = path->slots[0];
 			del_nr++;
 			ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
-						leaf->start, leaf_owner,
-						leaf_gen, inode->i_ino, 0);
+						0, root->root_key.objectid,
+						inode->i_ino, orig_offset);
 			BUG_ON(ret);
 		}
 		split_end = 0;
@@ -768,13 +745,12 @@
 			locked_end = extent_end;
 		}
 		btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
-		extent_offset += split - key.offset;
 	} else  {
 		BUG_ON(key.offset != start);
-		btrfs_set_file_extent_offset(leaf, fi, extent_offset +
-					     split - key.offset);
-		btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
 		key.offset = split;
+		btrfs_set_file_extent_offset(leaf, fi, key.offset -
+					     orig_offset);
+		btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
 		btrfs_set_item_key_safe(trans, root, path, &key);
 		extent_end = split;
 	}
@@ -793,7 +769,8 @@
 					    struct btrfs_file_extent_item);
 			key.offset = split;
 			btrfs_set_item_key_safe(trans, root, path, &key);
-			btrfs_set_file_extent_offset(leaf, fi, extent_offset);
+			btrfs_set_file_extent_offset(leaf, fi, key.offset -
+						     orig_offset);
 			btrfs_set_file_extent_num_bytes(leaf, fi,
 							other_end - split);
 			goto done;
@@ -815,10 +792,9 @@
 
 	btrfs_mark_buffer_dirty(leaf);
 
-	orig_parent = leaf->start;
-	ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
-				   orig_parent, root->root_key.objectid,
-				   trans->transid, inode->i_ino);
+	ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
+				   root->root_key.objectid,
+				   inode->i_ino, orig_offset);
 	BUG_ON(ret);
 	btrfs_release_path(root, path);
 
@@ -833,20 +809,12 @@
 	btrfs_set_file_extent_type(leaf, fi, extent_type);
 	btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
 	btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
-	btrfs_set_file_extent_offset(leaf, fi, extent_offset);
+	btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset);
 	btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
 	btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
 	btrfs_set_file_extent_compression(leaf, fi, 0);
 	btrfs_set_file_extent_encryption(leaf, fi, 0);
 	btrfs_set_file_extent_other_encoding(leaf, fi, 0);
-
-	if (orig_parent != leaf->start) {
-		ret = btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
-					      orig_parent, leaf->start,
-					      root->root_key.objectid,
-					      trans->transid, inode->i_ino);
-		BUG_ON(ret);
-	}
 done:
 	btrfs_mark_buffer_dirty(leaf);
 
@@ -1189,6 +1157,8 @@
 	btrfs_wait_ordered_range(inode, 0, (u64)-1);
 	root->log_batch++;
 
+	if (datasync && !(inode->i_state & I_DIRTY_PAGES))
+		goto out;
 	/*
 	 * ok we haven't committed the transaction yet, lets do a commit
 	 */
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0bc9365..4538e48 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -579,6 +579,7 @@
  * it returns -enospc
  */
 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
+			     struct btrfs_root *root,
 			     struct btrfs_block_group_cache *block_group,
 			     struct btrfs_free_cluster *cluster,
 			     u64 offset, u64 bytes, u64 empty_size)
@@ -595,7 +596,9 @@
 	int ret;
 
 	/* for metadata, allow allocates with more holes */
-	if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
+	if (btrfs_test_opt(root, SSD_SPREAD)) {
+		min_bytes = bytes + empty_size;
+	} else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
 		/*
 		 * we want to do larger allocations when we are
 		 * flushing out the delayed refs, it helps prevent
@@ -645,14 +648,15 @@
 		 * we haven't filled the empty size and the window is
 		 * very large.  reset and try again
 		 */
-		if (next->offset - window_start > (bytes + empty_size) * 2) {
+		if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
+		    next->offset - window_start > (bytes + empty_size) * 2) {
 			entry = next;
 			window_start = entry->offset;
 			window_free = entry->bytes;
 			last = entry;
 			max_extent = 0;
 			total_retries++;
-			if (total_retries % 256 == 0) {
+			if (total_retries % 64 == 0) {
 				if (min_bytes >= (bytes + empty_size)) {
 					ret = -ENOSPC;
 					goto out;
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index ab0bdc0..266fb87 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -31,6 +31,7 @@
 			   u64 bytes);
 u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
+			     struct btrfs_root *root,
 			     struct btrfs_block_group_cache *block_group,
 			     struct btrfs_free_cluster *cluster,
 			     u64 offset, u64 bytes, u64 empty_size);
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h
index 2a020b2..db2ff97 100644
--- a/fs/btrfs/hash.h
+++ b/fs/btrfs/hash.h
@@ -19,9 +19,9 @@
 #ifndef __HASH__
 #define __HASH__
 
-#include "crc32c.h"
+#include <linux/crc32c.h>
 static inline u64 btrfs_name_hash(const char *name, int len)
 {
-	return btrfs_crc32c((u32)~1, name, len);
+	return crc32c((u32)~1, name, len);
 }
 #endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1c8b019..8612b3a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -48,7 +48,6 @@
 #include "ordered-data.h"
 #include "xattr.h"
 #include "tree-log.h"
-#include "ref-cache.h"
 #include "compression.h"
 #include "locking.h"
 
@@ -369,7 +368,7 @@
 	 * inode has not been flagged as nocompress.  This flag can
 	 * change at any time if we discover bad compression ratios.
 	 */
-	if (!btrfs_test_flag(inode, NOCOMPRESS) &&
+	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) &&
 	    btrfs_test_opt(root, COMPRESS)) {
 		WARN_ON(pages);
 		pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
@@ -470,7 +469,7 @@
 		nr_pages_ret = 0;
 
 		/* flag the file so we don't compress in the future */
-		btrfs_set_flag(inode, NOCOMPRESS);
+		BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
 	}
 	if (will_compress) {
 		*num_added += 1;
@@ -863,7 +862,7 @@
 		async_cow->locked_page = locked_page;
 		async_cow->start = start;
 
-		if (btrfs_test_flag(inode, NOCOMPRESS))
+		if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
 			cur_end = end;
 		else
 			cur_end = min(end, start + 512 * 1024 - 1);
@@ -944,6 +943,7 @@
 	u64 cow_start;
 	u64 cur_offset;
 	u64 extent_end;
+	u64 extent_offset;
 	u64 disk_bytenr;
 	u64 num_bytes;
 	int extent_type;
@@ -1005,6 +1005,7 @@
 		if (extent_type == BTRFS_FILE_EXTENT_REG ||
 		    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
 			disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+			extent_offset = btrfs_file_extent_offset(leaf, fi);
 			extent_end = found_key.offset +
 				btrfs_file_extent_num_bytes(leaf, fi);
 			if (extent_end <= start) {
@@ -1022,9 +1023,10 @@
 			if (btrfs_extent_readonly(root, disk_bytenr))
 				goto out_check;
 			if (btrfs_cross_ref_exist(trans, root, inode->i_ino,
-						  disk_bytenr))
+						  found_key.offset -
+						  extent_offset, disk_bytenr))
 				goto out_check;
-			disk_bytenr += btrfs_file_extent_offset(leaf, fi);
+			disk_bytenr += extent_offset;
 			disk_bytenr += cur_offset - found_key.offset;
 			num_bytes = min(end + 1, extent_end) - cur_offset;
 			/*
@@ -1131,10 +1133,10 @@
 	int ret;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 
-	if (btrfs_test_flag(inode, NODATACOW))
+	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)
 		ret = run_delalloc_nocow(inode, locked_page, start, end,
 					 page_started, 1, nr_written);
-	else if (btrfs_test_flag(inode, PREALLOC))
+	else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)
 		ret = run_delalloc_nocow(inode, locked_page, start, end,
 					 page_started, 0, nr_written);
 	else if (!btrfs_test_opt(root, COMPRESS))
@@ -1288,7 +1290,7 @@
 	int ret = 0;
 	int skip_sum;
 
-	skip_sum = btrfs_test_flag(inode, NODATASUM);
+	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
 	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
 	BUG_ON(ret);
@@ -1489,9 +1491,9 @@
 	ins.objectid = disk_bytenr;
 	ins.offset = disk_num_bytes;
 	ins.type = BTRFS_EXTENT_ITEM_KEY;
-	ret = btrfs_alloc_reserved_extent(trans, root, leaf->start,
-					  root->root_key.objectid,
-					  trans->transid, inode->i_ino, &ins);
+	ret = btrfs_alloc_reserved_file_extent(trans, root,
+					root->root_key.objectid,
+					inode->i_ino, file_pos, &ins);
 	BUG_ON(ret);
 	btrfs_free_path(path);
 
@@ -1788,7 +1790,8 @@
 		ClearPageChecked(page);
 		goto good;
 	}
-	if (btrfs_test_flag(inode, NODATASUM))
+
+	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
 		return 0;
 
 	if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
@@ -1956,23 +1959,13 @@
 		 * crossing root thing.  we store the inode number in the
 		 * offset of the orphan item.
 		 */
-		inode = btrfs_iget_locked(root->fs_info->sb,
-					  found_key.offset, root);
-		if (!inode)
+		found_key.objectid = found_key.offset;
+		found_key.type = BTRFS_INODE_ITEM_KEY;
+		found_key.offset = 0;
+		inode = btrfs_iget(root->fs_info->sb, &found_key, root);
+		if (IS_ERR(inode))
 			break;
 
-		if (inode->i_state & I_NEW) {
-			BTRFS_I(inode)->root = root;
-
-			/* have to set the location manually */
-			BTRFS_I(inode)->location.objectid = inode->i_ino;
-			BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
-			BTRFS_I(inode)->location.offset = 0;
-
-			btrfs_read_locked_inode(inode);
-			unlock_new_inode(inode);
-		}
-
 		/*
 		 * add this inode to the orphan list so btrfs_orphan_del does
 		 * the proper thing when we hit it
@@ -2069,7 +2062,7 @@
 /*
  * read an inode from the btree into the in-memory inode
  */
-void btrfs_read_locked_inode(struct inode *inode)
+static void btrfs_read_locked_inode(struct inode *inode)
 {
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
@@ -2164,6 +2157,8 @@
 		init_special_inode(inode, inode->i_mode, rdev);
 		break;
 	}
+
+	btrfs_update_iflags(inode);
 	return;
 
 make_bad:
@@ -2327,7 +2322,6 @@
 	btrfs_update_inode(trans, root, dir);
 	btrfs_drop_nlink(inode);
 	ret = btrfs_update_inode(trans, root, inode);
-	dir->i_sb->s_dirt = 1;
 out:
 	return ret;
 }
@@ -2599,9 +2593,8 @@
 	struct btrfs_file_extent_item *fi;
 	u64 extent_start = 0;
 	u64 extent_num_bytes = 0;
+	u64 extent_offset = 0;
 	u64 item_end = 0;
-	u64 root_gen = 0;
-	u64 root_owner = 0;
 	int found_extent;
 	int del_item;
 	int pending_del_nr = 0;
@@ -2716,6 +2709,9 @@
 				extent_num_bytes =
 					btrfs_file_extent_disk_num_bytes(leaf,
 									 fi);
+				extent_offset = found_key.offset -
+					btrfs_file_extent_offset(leaf, fi);
+
 				/* FIXME blocksize != 4096 */
 				num_dec = btrfs_file_extent_num_bytes(leaf, fi);
 				if (extent_start != 0) {
@@ -2723,8 +2719,6 @@
 					if (root->ref_cows)
 						inode_sub_bytes(inode, num_dec);
 				}
-				root_gen = btrfs_header_generation(leaf);
-				root_owner = btrfs_header_owner(leaf);
 			}
 		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
 			/*
@@ -2768,12 +2762,12 @@
 		} else {
 			break;
 		}
-		if (found_extent) {
+		if (found_extent && root->ref_cows) {
 			btrfs_set_path_blocking(path);
 			ret = btrfs_free_extent(trans, root, extent_start,
-						extent_num_bytes,
-						leaf->start, root_owner,
-						root_gen, inode->i_ino, 0);
+						extent_num_bytes, 0,
+						btrfs_header_owner(leaf),
+						inode->i_ino, extent_offset);
 			BUG_ON(ret);
 		}
 next:
@@ -2811,7 +2805,6 @@
 				      pending_del_nr);
 	}
 	btrfs_free_path(path);
-	inode->i_sb->s_dirt = 1;
 	return ret;
 }
 
@@ -3105,6 +3098,45 @@
 	return 0;
 }
 
+static void inode_tree_add(struct inode *inode)
+{
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_inode *entry;
+	struct rb_node **p = &root->inode_tree.rb_node;
+	struct rb_node *parent = NULL;
+
+	spin_lock(&root->inode_lock);
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct btrfs_inode, rb_node);
+
+		if (inode->i_ino < entry->vfs_inode.i_ino)
+			p = &(*p)->rb_left;
+		else if (inode->i_ino > entry->vfs_inode.i_ino)
+			p = &(*p)->rb_right;
+		else {
+			WARN_ON(!(entry->vfs_inode.i_state &
+				  (I_WILL_FREE | I_FREEING | I_CLEAR)));
+			break;
+		}
+	}
+	rb_link_node(&BTRFS_I(inode)->rb_node, parent, p);
+	rb_insert_color(&BTRFS_I(inode)->rb_node, &root->inode_tree);
+	spin_unlock(&root->inode_lock);
+}
+
+static void inode_tree_del(struct inode *inode)
+{
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+
+	if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
+		spin_lock(&root->inode_lock);
+		rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
+		spin_unlock(&root->inode_lock);
+		RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
+	}
+}
+
 static noinline void init_btrfs_i(struct inode *inode)
 {
 	struct btrfs_inode *bi = BTRFS_I(inode);
@@ -3130,6 +3162,7 @@
 			     inode->i_mapping, GFP_NOFS);
 	INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
 	INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
+	RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
 	btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
 	mutex_init(&BTRFS_I(inode)->extent_mutex);
 	mutex_init(&BTRFS_I(inode)->log_mutex);
@@ -3152,26 +3185,9 @@
 		args->root == BTRFS_I(inode)->root;
 }
 
-struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
-			    struct btrfs_root *root, int wait)
-{
-	struct inode *inode;
-	struct btrfs_iget_args args;
-	args.ino = objectid;
-	args.root = root;
-
-	if (wait) {
-		inode = ilookup5(s, objectid, btrfs_find_actor,
-				 (void *)&args);
-	} else {
-		inode = ilookup5_nowait(s, objectid, btrfs_find_actor,
-					(void *)&args);
-	}
-	return inode;
-}
-
-struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
-				struct btrfs_root *root)
+static struct inode *btrfs_iget_locked(struct super_block *s,
+				       u64 objectid,
+				       struct btrfs_root *root)
 {
 	struct inode *inode;
 	struct btrfs_iget_args args;
@@ -3188,24 +3204,21 @@
  * Returns in *is_new if the inode was read from disk
  */
 struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
-			 struct btrfs_root *root, int *is_new)
+			 struct btrfs_root *root)
 {
 	struct inode *inode;
 
 	inode = btrfs_iget_locked(s, location->objectid, root);
 	if (!inode)
-		return ERR_PTR(-EACCES);
+		return ERR_PTR(-ENOMEM);
 
 	if (inode->i_state & I_NEW) {
 		BTRFS_I(inode)->root = root;
 		memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
 		btrfs_read_locked_inode(inode);
+
+		inode_tree_add(inode);
 		unlock_new_inode(inode);
-		if (is_new)
-			*is_new = 1;
-	} else {
-		if (is_new)
-			*is_new = 0;
 	}
 
 	return inode;
@@ -3218,7 +3231,7 @@
 	struct btrfs_root *root = bi->root;
 	struct btrfs_root *sub_root = root;
 	struct btrfs_key location;
-	int ret, new;
+	int ret;
 
 	if (dentry->d_name.len > BTRFS_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
@@ -3236,7 +3249,7 @@
 			return ERR_PTR(ret);
 		if (ret > 0)
 			return ERR_PTR(-ENOENT);
-		inode = btrfs_iget(dir->i_sb, &location, sub_root, &new);
+		inode = btrfs_iget(dir->i_sb, &location, sub_root);
 		if (IS_ERR(inode))
 			return ERR_CAST(inode);
 	}
@@ -3574,9 +3587,9 @@
 			btrfs_find_block_group(root, 0, alloc_hint, owner);
 	if ((mode & S_IFREG)) {
 		if (btrfs_test_opt(root, NODATASUM))
-			btrfs_set_flag(inode, NODATASUM);
+			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
 		if (btrfs_test_opt(root, NODATACOW))
-			btrfs_set_flag(inode, NODATACOW);
+			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
 	}
 
 	key[0].objectid = objectid;
@@ -3630,7 +3643,10 @@
 	location->offset = 0;
 	btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
 
+	btrfs_inherit_iflags(inode, dir);
+
 	insert_inode_hash(inode);
+	inode_tree_add(inode);
 	return inode;
 fail:
 	if (dir)
@@ -3750,7 +3766,6 @@
 		init_special_inode(inode, inode->i_mode, rdev);
 		btrfs_update_inode(trans, root, inode);
 	}
-	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, inode);
 	btrfs_update_inode_block_group(trans, dir);
 out_unlock:
@@ -3815,7 +3830,6 @@
 		inode->i_op = &btrfs_file_inode_operations;
 		BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
 	}
-	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, inode);
 	btrfs_update_inode_block_group(trans, dir);
 out_unlock:
@@ -3862,7 +3876,6 @@
 	if (err)
 		drop_inode = 1;
 
-	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, dir);
 	err = btrfs_update_inode(trans, root, inode);
 
@@ -3944,7 +3957,6 @@
 
 	d_instantiate(dentry, inode);
 	drop_on_err = 0;
-	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, inode);
 	btrfs_update_inode_block_group(trans, dir);
 
@@ -4683,6 +4695,7 @@
 			btrfs_put_ordered_extent(ordered);
 		}
 	}
+	inode_tree_del(inode);
 	btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
 	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
 }
@@ -4972,7 +4985,6 @@
 		inode->i_op = &btrfs_file_inode_operations;
 		BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
 	}
-	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, inode);
 	btrfs_update_inode_block_group(trans, dir);
 	if (drop_inode)
@@ -5061,7 +5073,7 @@
 out:
 	if (cur_offset > start) {
 		inode->i_ctime = CURRENT_TIME;
-		btrfs_set_flag(inode, PREALLOC);
+		BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
 		if (!(mode & FALLOC_FL_KEEP_SIZE) &&
 		    cur_offset > i_size_read(inode))
 			btrfs_i_size_write(inode, cur_offset);
@@ -5182,7 +5194,7 @@
 
 static int btrfs_permission(struct inode *inode, int mask)
 {
-	if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
+	if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
 		return -EACCES;
 	return generic_permission(inode, mask, btrfs_check_acl);
 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2624b53..eff18f5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -50,7 +50,177 @@
 #include "volumes.h"
 #include "locking.h"
 
+/* Mask out flags that are inappropriate for the given type of inode. */
+static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
+{
+	if (S_ISDIR(mode))
+		return flags;
+	else if (S_ISREG(mode))
+		return flags & ~FS_DIRSYNC_FL;
+	else
+		return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
+}
 
+/*
+ * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl.
+ */
+static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
+{
+	unsigned int iflags = 0;
+
+	if (flags & BTRFS_INODE_SYNC)
+		iflags |= FS_SYNC_FL;
+	if (flags & BTRFS_INODE_IMMUTABLE)
+		iflags |= FS_IMMUTABLE_FL;
+	if (flags & BTRFS_INODE_APPEND)
+		iflags |= FS_APPEND_FL;
+	if (flags & BTRFS_INODE_NODUMP)
+		iflags |= FS_NODUMP_FL;
+	if (flags & BTRFS_INODE_NOATIME)
+		iflags |= FS_NOATIME_FL;
+	if (flags & BTRFS_INODE_DIRSYNC)
+		iflags |= FS_DIRSYNC_FL;
+
+	return iflags;
+}
+
+/*
+ * Update inode->i_flags based on the btrfs internal flags.
+ */
+void btrfs_update_iflags(struct inode *inode)
+{
+	struct btrfs_inode *ip = BTRFS_I(inode);
+
+	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+
+	if (ip->flags & BTRFS_INODE_SYNC)
+		inode->i_flags |= S_SYNC;
+	if (ip->flags & BTRFS_INODE_IMMUTABLE)
+		inode->i_flags |= S_IMMUTABLE;
+	if (ip->flags & BTRFS_INODE_APPEND)
+		inode->i_flags |= S_APPEND;
+	if (ip->flags & BTRFS_INODE_NOATIME)
+		inode->i_flags |= S_NOATIME;
+	if (ip->flags & BTRFS_INODE_DIRSYNC)
+		inode->i_flags |= S_DIRSYNC;
+}
+
+/*
+ * Inherit flags from the parent inode.
+ *
+ * Unlike extN we don't have any flags we don't want to inherit currently.
+ */
+void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
+{
+	unsigned int flags;
+
+	if (!dir)
+		return;
+
+	flags = BTRFS_I(dir)->flags;
+
+	if (S_ISREG(inode->i_mode))
+		flags &= ~BTRFS_INODE_DIRSYNC;
+	else if (!S_ISDIR(inode->i_mode))
+		flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME);
+
+	BTRFS_I(inode)->flags = flags;
+	btrfs_update_iflags(inode);
+}
+
+static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
+{
+	struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode);
+	unsigned int flags = btrfs_flags_to_ioctl(ip->flags);
+
+	if (copy_to_user(arg, &flags, sizeof(flags)))
+		return -EFAULT;
+	return 0;
+}
+
+static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct btrfs_inode *ip = BTRFS_I(inode);
+	struct btrfs_root *root = ip->root;
+	struct btrfs_trans_handle *trans;
+	unsigned int flags, oldflags;
+	int ret;
+
+	if (copy_from_user(&flags, arg, sizeof(flags)))
+		return -EFAULT;
+
+	if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
+		      FS_NOATIME_FL | FS_NODUMP_FL | \
+		      FS_SYNC_FL | FS_DIRSYNC_FL))
+		return -EOPNOTSUPP;
+
+	if (!is_owner_or_cap(inode))
+		return -EACCES;
+
+	mutex_lock(&inode->i_mutex);
+
+	flags = btrfs_mask_flags(inode->i_mode, flags);
+	oldflags = btrfs_flags_to_ioctl(ip->flags);
+	if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
+		if (!capable(CAP_LINUX_IMMUTABLE)) {
+			ret = -EPERM;
+			goto out_unlock;
+		}
+	}
+
+	ret = mnt_want_write(file->f_path.mnt);
+	if (ret)
+		goto out_unlock;
+
+	if (flags & FS_SYNC_FL)
+		ip->flags |= BTRFS_INODE_SYNC;
+	else
+		ip->flags &= ~BTRFS_INODE_SYNC;
+	if (flags & FS_IMMUTABLE_FL)
+		ip->flags |= BTRFS_INODE_IMMUTABLE;
+	else
+		ip->flags &= ~BTRFS_INODE_IMMUTABLE;
+	if (flags & FS_APPEND_FL)
+		ip->flags |= BTRFS_INODE_APPEND;
+	else
+		ip->flags &= ~BTRFS_INODE_APPEND;
+	if (flags & FS_NODUMP_FL)
+		ip->flags |= BTRFS_INODE_NODUMP;
+	else
+		ip->flags &= ~BTRFS_INODE_NODUMP;
+	if (flags & FS_NOATIME_FL)
+		ip->flags |= BTRFS_INODE_NOATIME;
+	else
+		ip->flags &= ~BTRFS_INODE_NOATIME;
+	if (flags & FS_DIRSYNC_FL)
+		ip->flags |= BTRFS_INODE_DIRSYNC;
+	else
+		ip->flags &= ~BTRFS_INODE_DIRSYNC;
+
+
+	trans = btrfs_join_transaction(root, 1);
+	BUG_ON(!trans);
+
+	ret = btrfs_update_inode(trans, root, inode);
+	BUG_ON(ret);
+
+	btrfs_update_iflags(inode);
+	inode->i_ctime = CURRENT_TIME;
+	btrfs_end_transaction(trans, root);
+
+	mnt_drop_write(file->f_path.mnt);
+ out_unlock:
+	mutex_unlock(&inode->i_mutex);
+	return 0;
+}
+
+static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+
+	return put_user(inode->i_generation, arg);
+}
 
 static noinline int create_subvol(struct btrfs_root *root,
 				  struct dentry *dentry,
@@ -82,22 +252,25 @@
 	if (ret)
 		goto fail;
 
-	leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
-				      objectid, trans->transid, 0, 0, 0);
+	leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
+				      0, objectid, NULL, 0, 0, 0);
 	if (IS_ERR(leaf)) {
 		ret = PTR_ERR(leaf);
 		goto fail;
 	}
 
-	btrfs_set_header_nritems(leaf, 0);
-	btrfs_set_header_level(leaf, 0);
+	memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
 	btrfs_set_header_bytenr(leaf, leaf->start);
 	btrfs_set_header_generation(leaf, trans->transid);
+	btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
 	btrfs_set_header_owner(leaf, objectid);
 
 	write_extent_buffer(leaf, root->fs_info->fsid,
 			    (unsigned long)btrfs_header_fsid(leaf),
 			    BTRFS_FSID_SIZE);
+	write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
+			    (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
+			    BTRFS_UUID_SIZE);
 	btrfs_mark_buffer_dirty(leaf);
 
 	inode_item = &root_item.inode;
@@ -125,7 +298,7 @@
 	btrfs_set_root_dirid(&root_item, new_dirid);
 
 	key.objectid = objectid;
-	key.offset = 1;
+	key.offset = 0;
 	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
 	ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
 				&root_item);
@@ -911,10 +1084,10 @@
 				if (disko) {
 					inode_add_bytes(inode, datal);
 					ret = btrfs_inc_extent_ref(trans, root,
-						   disko, diskl, leaf->start,
-						   root->root_key.objectid,
-						   trans->transid,
-						   inode->i_ino);
+							disko, diskl, 0,
+							root->root_key.objectid,
+							inode->i_ino,
+							new_key.offset - datao);
 					BUG_ON(ret);
 				}
 			} else if (type == BTRFS_FILE_EXTENT_INLINE) {
@@ -1074,6 +1247,12 @@
 	void __user *argp = (void __user *)arg;
 
 	switch (cmd) {
+	case FS_IOC_GETFLAGS:
+		return btrfs_ioctl_getflags(file, argp);
+	case FS_IOC_SETFLAGS:
+		return btrfs_ioctl_setflags(file, argp);
+	case FS_IOC_GETVERSION:
+		return btrfs_ioctl_getversion(file, argp);
 	case BTRFS_IOC_SNAP_CREATE:
 		return btrfs_ioctl_snap_create(file, argp, 0);
 	case BTRFS_IOC_SUBVOL_CREATE:
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 5f8f218..6d6523d 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -45,22 +45,132 @@
 	       (unsigned long long)btrfs_device_total_bytes(eb, dev_item),
 	       (unsigned long long)btrfs_device_bytes_used(eb, dev_item));
 }
+static void print_extent_data_ref(struct extent_buffer *eb,
+				  struct btrfs_extent_data_ref *ref)
+{
+	printk(KERN_INFO "\t\textent data backref root %llu "
+	       "objectid %llu offset %llu count %u\n",
+	       (unsigned long long)btrfs_extent_data_ref_root(eb, ref),
+	       (unsigned long long)btrfs_extent_data_ref_objectid(eb, ref),
+	       (unsigned long long)btrfs_extent_data_ref_offset(eb, ref),
+	       btrfs_extent_data_ref_count(eb, ref));
+}
+
+static void print_extent_item(struct extent_buffer *eb, int slot)
+{
+	struct btrfs_extent_item *ei;
+	struct btrfs_extent_inline_ref *iref;
+	struct btrfs_extent_data_ref *dref;
+	struct btrfs_shared_data_ref *sref;
+	struct btrfs_disk_key key;
+	unsigned long end;
+	unsigned long ptr;
+	int type;
+	u32 item_size = btrfs_item_size_nr(eb, slot);
+	u64 flags;
+	u64 offset;
+
+	if (item_size < sizeof(*ei)) {
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+		struct btrfs_extent_item_v0 *ei0;
+		BUG_ON(item_size != sizeof(*ei0));
+		ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
+		printk(KERN_INFO "\t\textent refs %u\n",
+		       btrfs_extent_refs_v0(eb, ei0));
+		return;
+#else
+		BUG();
+#endif
+	}
+
+	ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
+	flags = btrfs_extent_flags(eb, ei);
+
+	printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n",
+	       (unsigned long long)btrfs_extent_refs(eb, ei),
+	       (unsigned long long)btrfs_extent_generation(eb, ei),
+	       (unsigned long long)flags);
+
+	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+		struct btrfs_tree_block_info *info;
+		info = (struct btrfs_tree_block_info *)(ei + 1);
+		btrfs_tree_block_key(eb, info, &key);
+		printk(KERN_INFO "\t\ttree block key (%llu %x %llu) "
+		       "level %d\n",
+		       (unsigned long long)btrfs_disk_key_objectid(&key),
+		       key.type,
+		       (unsigned long long)btrfs_disk_key_offset(&key),
+		       btrfs_tree_block_level(eb, info));
+		iref = (struct btrfs_extent_inline_ref *)(info + 1);
+	} else {
+		iref = (struct btrfs_extent_inline_ref *)(ei + 1);
+	}
+
+	ptr = (unsigned long)iref;
+	end = (unsigned long)ei + item_size;
+	while (ptr < end) {
+		iref = (struct btrfs_extent_inline_ref *)ptr;
+		type = btrfs_extent_inline_ref_type(eb, iref);
+		offset = btrfs_extent_inline_ref_offset(eb, iref);
+		switch (type) {
+		case BTRFS_TREE_BLOCK_REF_KEY:
+			printk(KERN_INFO "\t\ttree block backref "
+				"root %llu\n", (unsigned long long)offset);
+			break;
+		case BTRFS_SHARED_BLOCK_REF_KEY:
+			printk(KERN_INFO "\t\tshared block backref "
+				"parent %llu\n", (unsigned long long)offset);
+			break;
+		case BTRFS_EXTENT_DATA_REF_KEY:
+			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+			print_extent_data_ref(eb, dref);
+			break;
+		case BTRFS_SHARED_DATA_REF_KEY:
+			sref = (struct btrfs_shared_data_ref *)(iref + 1);
+			printk(KERN_INFO "\t\tshared data backref "
+			       "parent %llu count %u\n",
+			       (unsigned long long)offset,
+			       btrfs_shared_data_ref_count(eb, sref));
+			break;
+		default:
+			BUG();
+		}
+		ptr += btrfs_extent_inline_ref_size(type);
+	}
+	WARN_ON(ptr > end);
+}
+
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+static void print_extent_ref_v0(struct extent_buffer *eb, int slot)
+{
+	struct btrfs_extent_ref_v0 *ref0;
+
+	ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0);
+	printk("\t\textent back ref root %llu gen %llu "
+		"owner %llu num_refs %lu\n",
+		(unsigned long long)btrfs_ref_root_v0(eb, ref0),
+		(unsigned long long)btrfs_ref_generation_v0(eb, ref0),
+		(unsigned long long)btrfs_ref_objectid_v0(eb, ref0),
+		(unsigned long)btrfs_ref_count_v0(eb, ref0));
+}
+#endif
+
 void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
 {
 	int i;
+	u32 type;
 	u32 nr = btrfs_header_nritems(l);
 	struct btrfs_item *item;
-	struct btrfs_extent_item *ei;
 	struct btrfs_root_item *ri;
 	struct btrfs_dir_item *di;
 	struct btrfs_inode_item *ii;
 	struct btrfs_block_group_item *bi;
 	struct btrfs_file_extent_item *fi;
+	struct btrfs_extent_data_ref *dref;
+	struct btrfs_shared_data_ref *sref;
+	struct btrfs_dev_extent *dev_extent;
 	struct btrfs_key key;
 	struct btrfs_key found_key;
-	struct btrfs_extent_ref *ref;
-	struct btrfs_dev_extent *dev_extent;
-	u32 type;
 
 	printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n",
 		(unsigned long long)btrfs_header_bytenr(l), nr,
@@ -100,20 +210,25 @@
 				btrfs_disk_root_refs(l, ri));
 			break;
 		case BTRFS_EXTENT_ITEM_KEY:
-			ei = btrfs_item_ptr(l, i, struct btrfs_extent_item);
-			printk(KERN_INFO "\t\textent data refs %u\n",
-				btrfs_extent_refs(l, ei));
+			print_extent_item(l, i);
 			break;
-		case BTRFS_EXTENT_REF_KEY:
-			ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref);
-			printk(KERN_INFO "\t\textent back ref root %llu "
-			       "gen %llu owner %llu num_refs %lu\n",
-			       (unsigned long long)btrfs_ref_root(l, ref),
-			       (unsigned long long)btrfs_ref_generation(l, ref),
-			       (unsigned long long)btrfs_ref_objectid(l, ref),
-			       (unsigned long)btrfs_ref_num_refs(l, ref));
+		case BTRFS_TREE_BLOCK_REF_KEY:
+			printk(KERN_INFO "\t\ttree block backref\n");
 			break;
-
+		case BTRFS_SHARED_BLOCK_REF_KEY:
+			printk(KERN_INFO "\t\tshared block backref\n");
+			break;
+		case BTRFS_EXTENT_DATA_REF_KEY:
+			dref = btrfs_item_ptr(l, i,
+					      struct btrfs_extent_data_ref);
+			print_extent_data_ref(l, dref);
+			break;
+		case BTRFS_SHARED_DATA_REF_KEY:
+			sref = btrfs_item_ptr(l, i,
+					      struct btrfs_shared_data_ref);
+			printk(KERN_INFO "\t\tshared data backref count %u\n",
+			       btrfs_shared_data_ref_count(l, sref));
+			break;
 		case BTRFS_EXTENT_DATA_KEY:
 			fi = btrfs_item_ptr(l, i,
 					    struct btrfs_file_extent_item);
@@ -139,6 +254,12 @@
 			       (unsigned long long)
 			       btrfs_file_extent_ram_bytes(l, fi));
 			break;
+		case BTRFS_EXTENT_REF_V0_KEY:
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+			print_extent_ref_v0(l, i);
+#else
+			BUG();
+#endif
 		case BTRFS_BLOCK_GROUP_ITEM_KEY:
 			bi = btrfs_item_ptr(l, i,
 					    struct btrfs_block_group_item);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
new file mode 100644
index 0000000..b23dc20
--- /dev/null
+++ b/fs/btrfs/relocation.c
@@ -0,0 +1,3711 @@
+/*
+ * Copyright (C) 2009 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/sched.h>
+#include <linux/pagemap.h>
+#include <linux/writeback.h>
+#include <linux/blkdev.h>
+#include <linux/rbtree.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "volumes.h"
+#include "locking.h"
+#include "btrfs_inode.h"
+#include "async-thread.h"
+
+/*
+ * backref_node, mapping_node and tree_block start with this
+ */
+struct tree_entry {
+	struct rb_node rb_node;
+	u64 bytenr;
+};
+
+/*
+ * present a tree block in the backref cache
+ */
+struct backref_node {
+	struct rb_node rb_node;
+	u64 bytenr;
+	/* objectid tree block owner */
+	u64 owner;
+	/* list of upper level blocks reference this block */
+	struct list_head upper;
+	/* list of child blocks in the cache */
+	struct list_head lower;
+	/* NULL if this node is not tree root */
+	struct btrfs_root *root;
+	/* extent buffer got by COW the block */
+	struct extent_buffer *eb;
+	/* level of tree block */
+	unsigned int level:8;
+	/* 1 if the block is root of old snapshot */
+	unsigned int old_root:1;
+	/* 1 if no child blocks in the cache */
+	unsigned int lowest:1;
+	/* is the extent buffer locked */
+	unsigned int locked:1;
+	/* has the block been processed */
+	unsigned int processed:1;
+	/* have backrefs of this block been checked */
+	unsigned int checked:1;
+};
+
+/*
+ * present a block pointer in the backref cache
+ */
+struct backref_edge {
+	struct list_head list[2];
+	struct backref_node *node[2];
+	u64 blockptr;
+};
+
+#define LOWER	0
+#define UPPER	1
+
+struct backref_cache {
+	/* red black tree of all backref nodes in the cache */
+	struct rb_root rb_root;
+	/* list of backref nodes with no child block in the cache */
+	struct list_head pending[BTRFS_MAX_LEVEL];
+	spinlock_t lock;
+};
+
+/*
+ * map address of tree root to tree
+ */
+struct mapping_node {
+	struct rb_node rb_node;
+	u64 bytenr;
+	void *data;
+};
+
+struct mapping_tree {
+	struct rb_root rb_root;
+	spinlock_t lock;
+};
+
+/*
+ * present a tree block to process
+ */
+struct tree_block {
+	struct rb_node rb_node;
+	u64 bytenr;
+	struct btrfs_key key;
+	unsigned int level:8;
+	unsigned int key_ready:1;
+};
+
+/* inode vector */
+#define INODEVEC_SIZE 16
+
+struct inodevec {
+	struct list_head list;
+	struct inode *inode[INODEVEC_SIZE];
+	int nr;
+};
+
+struct reloc_control {
+	/* block group to relocate */
+	struct btrfs_block_group_cache *block_group;
+	/* extent tree */
+	struct btrfs_root *extent_root;
+	/* inode for moving data */
+	struct inode *data_inode;
+	struct btrfs_workers workers;
+	/* tree blocks have been processed */
+	struct extent_io_tree processed_blocks;
+	/* map start of tree root to corresponding reloc tree */
+	struct mapping_tree reloc_root_tree;
+	/* list of reloc trees */
+	struct list_head reloc_roots;
+	u64 search_start;
+	u64 extents_found;
+	u64 extents_skipped;
+	int stage;
+	int create_reloc_root;
+	unsigned int found_file_extent:1;
+	unsigned int found_old_snapshot:1;
+};
+
+/* stages of data relocation */
+#define MOVE_DATA_EXTENTS	0
+#define UPDATE_DATA_PTRS	1
+
+/*
+ * merge reloc tree to corresponding fs tree in worker threads
+ */
+struct async_merge {
+	struct btrfs_work work;
+	struct reloc_control *rc;
+	struct btrfs_root *root;
+	struct completion *done;
+	atomic_t *num_pending;
+};
+
+static void mapping_tree_init(struct mapping_tree *tree)
+{
+	tree->rb_root.rb_node = NULL;
+	spin_lock_init(&tree->lock);
+}
+
+static void backref_cache_init(struct backref_cache *cache)
+{
+	int i;
+	cache->rb_root.rb_node = NULL;
+	for (i = 0; i < BTRFS_MAX_LEVEL; i++)
+		INIT_LIST_HEAD(&cache->pending[i]);
+	spin_lock_init(&cache->lock);
+}
+
+static void backref_node_init(struct backref_node *node)
+{
+	memset(node, 0, sizeof(*node));
+	INIT_LIST_HEAD(&node->upper);
+	INIT_LIST_HEAD(&node->lower);
+	RB_CLEAR_NODE(&node->rb_node);
+}
+
+static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
+				   struct rb_node *node)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct tree_entry *entry;
+
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct tree_entry, rb_node);
+
+		if (bytenr < entry->bytenr)
+			p = &(*p)->rb_left;
+		else if (bytenr > entry->bytenr)
+			p = &(*p)->rb_right;
+		else
+			return parent;
+	}
+
+	rb_link_node(node, parent, p);
+	rb_insert_color(node, root);
+	return NULL;
+}
+
+static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
+{
+	struct rb_node *n = root->rb_node;
+	struct tree_entry *entry;
+
+	while (n) {
+		entry = rb_entry(n, struct tree_entry, rb_node);
+
+		if (bytenr < entry->bytenr)
+			n = n->rb_left;
+		else if (bytenr > entry->bytenr)
+			n = n->rb_right;
+		else
+			return n;
+	}
+	return NULL;
+}
+
+/*
+ * walk up backref nodes until reach node presents tree root
+ */
+static struct backref_node *walk_up_backref(struct backref_node *node,
+					    struct backref_edge *edges[],
+					    int *index)
+{
+	struct backref_edge *edge;
+	int idx = *index;
+
+	while (!list_empty(&node->upper)) {
+		edge = list_entry(node->upper.next,
+				  struct backref_edge, list[LOWER]);
+		edges[idx++] = edge;
+		node = edge->node[UPPER];
+	}
+	*index = idx;
+	return node;
+}
+
+/*
+ * walk down backref nodes to find start of next reference path
+ */
+static struct backref_node *walk_down_backref(struct backref_edge *edges[],
+					      int *index)
+{
+	struct backref_edge *edge;
+	struct backref_node *lower;
+	int idx = *index;
+
+	while (idx > 0) {
+		edge = edges[idx - 1];
+		lower = edge->node[LOWER];
+		if (list_is_last(&edge->list[LOWER], &lower->upper)) {
+			idx--;
+			continue;
+		}
+		edge = list_entry(edge->list[LOWER].next,
+				  struct backref_edge, list[LOWER]);
+		edges[idx - 1] = edge;
+		*index = idx;
+		return edge->node[UPPER];
+	}
+	*index = 0;
+	return NULL;
+}
+
+static void drop_node_buffer(struct backref_node *node)
+{
+	if (node->eb) {
+		if (node->locked) {
+			btrfs_tree_unlock(node->eb);
+			node->locked = 0;
+		}
+		free_extent_buffer(node->eb);
+		node->eb = NULL;
+	}
+}
+
+static void drop_backref_node(struct backref_cache *tree,
+			      struct backref_node *node)
+{
+	BUG_ON(!node->lowest);
+	BUG_ON(!list_empty(&node->upper));
+
+	drop_node_buffer(node);
+	list_del(&node->lower);
+
+	rb_erase(&node->rb_node, &tree->rb_root);
+	kfree(node);
+}
+
+/*
+ * remove a backref node from the backref cache
+ */
+static void remove_backref_node(struct backref_cache *cache,
+				struct backref_node *node)
+{
+	struct backref_node *upper;
+	struct backref_edge *edge;
+
+	if (!node)
+		return;
+
+	BUG_ON(!node->lowest);
+	while (!list_empty(&node->upper)) {
+		edge = list_entry(node->upper.next, struct backref_edge,
+				  list[LOWER]);
+		upper = edge->node[UPPER];
+		list_del(&edge->list[LOWER]);
+		list_del(&edge->list[UPPER]);
+		kfree(edge);
+		/*
+		 * add the node to pending list if no other
+		 * child block cached.
+		 */
+		if (list_empty(&upper->lower)) {
+			list_add_tail(&upper->lower,
+				      &cache->pending[upper->level]);
+			upper->lowest = 1;
+		}
+	}
+	drop_backref_node(cache, node);
+}
+
+/*
+ * find reloc tree by address of tree root
+ */
+static struct btrfs_root *find_reloc_root(struct reloc_control *rc,
+					  u64 bytenr)
+{
+	struct rb_node *rb_node;
+	struct mapping_node *node;
+	struct btrfs_root *root = NULL;
+
+	spin_lock(&rc->reloc_root_tree.lock);
+	rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr);
+	if (rb_node) {
+		node = rb_entry(rb_node, struct mapping_node, rb_node);
+		root = (struct btrfs_root *)node->data;
+	}
+	spin_unlock(&rc->reloc_root_tree.lock);
+	return root;
+}
+
+static int is_cowonly_root(u64 root_objectid)
+{
+	if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
+	    root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
+	    root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
+	    root_objectid == BTRFS_DEV_TREE_OBJECTID ||
+	    root_objectid == BTRFS_TREE_LOG_OBJECTID ||
+	    root_objectid == BTRFS_CSUM_TREE_OBJECTID)
+		return 1;
+	return 0;
+}
+
+static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
+					u64 root_objectid)
+{
+	struct btrfs_key key;
+
+	key.objectid = root_objectid;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	if (is_cowonly_root(root_objectid))
+		key.offset = 0;
+	else
+		key.offset = (u64)-1;
+
+	return btrfs_read_fs_root_no_name(fs_info, &key);
+}
+
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+static noinline_for_stack
+struct btrfs_root *find_tree_root(struct reloc_control *rc,
+				  struct extent_buffer *leaf,
+				  struct btrfs_extent_ref_v0 *ref0)
+{
+	struct btrfs_root *root;
+	u64 root_objectid = btrfs_ref_root_v0(leaf, ref0);
+	u64 generation = btrfs_ref_generation_v0(leaf, ref0);
+
+	BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID);
+
+	root = read_fs_root(rc->extent_root->fs_info, root_objectid);
+	BUG_ON(IS_ERR(root));
+
+	if (root->ref_cows &&
+	    generation != btrfs_root_generation(&root->root_item))
+		return NULL;
+
+	return root;
+}
+#endif
+
+static noinline_for_stack
+int find_inline_backref(struct extent_buffer *leaf, int slot,
+			unsigned long *ptr, unsigned long *end)
+{
+	struct btrfs_extent_item *ei;
+	struct btrfs_tree_block_info *bi;
+	u32 item_size;
+
+	item_size = btrfs_item_size_nr(leaf, slot);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	if (item_size < sizeof(*ei)) {
+		WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
+		return 1;
+	}
+#endif
+	ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
+	WARN_ON(!(btrfs_extent_flags(leaf, ei) &
+		  BTRFS_EXTENT_FLAG_TREE_BLOCK));
+
+	if (item_size <= sizeof(*ei) + sizeof(*bi)) {
+		WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
+		return 1;
+	}
+
+	bi = (struct btrfs_tree_block_info *)(ei + 1);
+	*ptr = (unsigned long)(bi + 1);
+	*end = (unsigned long)ei + item_size;
+	return 0;
+}
+
+/*
+ * build backref tree for a given tree block. root of the backref tree
+ * corresponds the tree block, leaves of the backref tree correspond
+ * roots of b-trees that reference the tree block.
+ *
+ * the basic idea of this function is check backrefs of a given block
+ * to find upper level blocks that refernece the block, and then check
+ * bakcrefs of these upper level blocks recursively. the recursion stop
+ * when tree root is reached or backrefs for the block is cached.
+ *
+ * NOTE: if we find backrefs for a block are cached, we know backrefs
+ * for all upper level blocks that directly/indirectly reference the
+ * block are also cached.
+ */
+static struct backref_node *build_backref_tree(struct reloc_control *rc,
+					       struct backref_cache *cache,
+					       struct btrfs_key *node_key,
+					       int level, u64 bytenr)
+{
+	struct btrfs_path *path1;
+	struct btrfs_path *path2;
+	struct extent_buffer *eb;
+	struct btrfs_root *root;
+	struct backref_node *cur;
+	struct backref_node *upper;
+	struct backref_node *lower;
+	struct backref_node *node = NULL;
+	struct backref_node *exist = NULL;
+	struct backref_edge *edge;
+	struct rb_node *rb_node;
+	struct btrfs_key key;
+	unsigned long end;
+	unsigned long ptr;
+	LIST_HEAD(list);
+	int ret;
+	int err = 0;
+
+	path1 = btrfs_alloc_path();
+	path2 = btrfs_alloc_path();
+	if (!path1 || !path2) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	node = kmalloc(sizeof(*node), GFP_NOFS);
+	if (!node) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	backref_node_init(node);
+	node->bytenr = bytenr;
+	node->owner = 0;
+	node->level = level;
+	node->lowest = 1;
+	cur = node;
+again:
+	end = 0;
+	ptr = 0;
+	key.objectid = cur->bytenr;
+	key.type = BTRFS_EXTENT_ITEM_KEY;
+	key.offset = (u64)-1;
+
+	path1->search_commit_root = 1;
+	path1->skip_locking = 1;
+	ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1,
+				0, 0);
+	if (ret < 0) {
+		err = ret;
+		goto out;
+	}
+	BUG_ON(!ret || !path1->slots[0]);
+
+	path1->slots[0]--;
+
+	WARN_ON(cur->checked);
+	if (!list_empty(&cur->upper)) {
+		/*
+		 * the backref was added previously when processsing
+		 * backref of type BTRFS_TREE_BLOCK_REF_KEY
+		 */
+		BUG_ON(!list_is_singular(&cur->upper));
+		edge = list_entry(cur->upper.next, struct backref_edge,
+				  list[LOWER]);
+		BUG_ON(!list_empty(&edge->list[UPPER]));
+		exist = edge->node[UPPER];
+		/*
+		 * add the upper level block to pending list if we need
+		 * check its backrefs
+		 */
+		if (!exist->checked)
+			list_add_tail(&edge->list[UPPER], &list);
+	} else {
+		exist = NULL;
+	}
+
+	while (1) {
+		cond_resched();
+		eb = path1->nodes[0];
+
+		if (ptr >= end) {
+			if (path1->slots[0] >= btrfs_header_nritems(eb)) {
+				ret = btrfs_next_leaf(rc->extent_root, path1);
+				if (ret < 0) {
+					err = ret;
+					goto out;
+				}
+				if (ret > 0)
+					break;
+				eb = path1->nodes[0];
+			}
+
+			btrfs_item_key_to_cpu(eb, &key, path1->slots[0]);
+			if (key.objectid != cur->bytenr) {
+				WARN_ON(exist);
+				break;
+			}
+
+			if (key.type == BTRFS_EXTENT_ITEM_KEY) {
+				ret = find_inline_backref(eb, path1->slots[0],
+							  &ptr, &end);
+				if (ret)
+					goto next;
+			}
+		}
+
+		if (ptr < end) {
+			/* update key for inline back ref */
+			struct btrfs_extent_inline_ref *iref;
+			iref = (struct btrfs_extent_inline_ref *)ptr;
+			key.type = btrfs_extent_inline_ref_type(eb, iref);
+			key.offset = btrfs_extent_inline_ref_offset(eb, iref);
+			WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY &&
+				key.type != BTRFS_SHARED_BLOCK_REF_KEY);
+		}
+
+		if (exist &&
+		    ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
+		      exist->owner == key.offset) ||
+		     (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
+		      exist->bytenr == key.offset))) {
+			exist = NULL;
+			goto next;
+		}
+
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+		if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
+		    key.type == BTRFS_EXTENT_REF_V0_KEY) {
+			if (key.objectid == key.offset &&
+			    key.type == BTRFS_EXTENT_REF_V0_KEY) {
+				struct btrfs_extent_ref_v0 *ref0;
+				ref0 = btrfs_item_ptr(eb, path1->slots[0],
+						struct btrfs_extent_ref_v0);
+				root = find_tree_root(rc, eb, ref0);
+				if (root)
+					cur->root = root;
+				else
+					cur->old_root = 1;
+				break;
+			}
+#else
+		BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
+		if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
+#endif
+			if (key.objectid == key.offset) {
+				/*
+				 * only root blocks of reloc trees use
+				 * backref of this type.
+				 */
+				root = find_reloc_root(rc, cur->bytenr);
+				BUG_ON(!root);
+				cur->root = root;
+				break;
+			}
+
+			edge = kzalloc(sizeof(*edge), GFP_NOFS);
+			if (!edge) {
+				err = -ENOMEM;
+				goto out;
+			}
+			rb_node = tree_search(&cache->rb_root, key.offset);
+			if (!rb_node) {
+				upper = kmalloc(sizeof(*upper), GFP_NOFS);
+				if (!upper) {
+					kfree(edge);
+					err = -ENOMEM;
+					goto out;
+				}
+				backref_node_init(upper);
+				upper->bytenr = key.offset;
+				upper->owner = 0;
+				upper->level = cur->level + 1;
+				/*
+				 *  backrefs for the upper level block isn't
+				 *  cached, add the block to pending list
+				 */
+				list_add_tail(&edge->list[UPPER], &list);
+			} else {
+				upper = rb_entry(rb_node, struct backref_node,
+						 rb_node);
+				INIT_LIST_HEAD(&edge->list[UPPER]);
+			}
+			list_add(&edge->list[LOWER], &cur->upper);
+			edge->node[UPPER] = upper;
+			edge->node[LOWER] = cur;
+
+			goto next;
+		} else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
+			goto next;
+		}
+
+		/* key.type == BTRFS_TREE_BLOCK_REF_KEY */
+		root = read_fs_root(rc->extent_root->fs_info, key.offset);
+		if (IS_ERR(root)) {
+			err = PTR_ERR(root);
+			goto out;
+		}
+
+		if (btrfs_root_level(&root->root_item) == cur->level) {
+			/* tree root */
+			BUG_ON(btrfs_root_bytenr(&root->root_item) !=
+			       cur->bytenr);
+			cur->root = root;
+			break;
+		}
+
+		level = cur->level + 1;
+
+		/*
+		 * searching the tree to find upper level blocks
+		 * reference the block.
+		 */
+		path2->search_commit_root = 1;
+		path2->skip_locking = 1;
+		path2->lowest_level = level;
+		ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0);
+		path2->lowest_level = 0;
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+
+		eb = path2->nodes[level];
+		WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
+			cur->bytenr);
+
+		lower = cur;
+		for (; level < BTRFS_MAX_LEVEL; level++) {
+			if (!path2->nodes[level]) {
+				BUG_ON(btrfs_root_bytenr(&root->root_item) !=
+				       lower->bytenr);
+				lower->root = root;
+				break;
+			}
+
+			edge = kzalloc(sizeof(*edge), GFP_NOFS);
+			if (!edge) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			eb = path2->nodes[level];
+			rb_node = tree_search(&cache->rb_root, eb->start);
+			if (!rb_node) {
+				upper = kmalloc(sizeof(*upper), GFP_NOFS);
+				if (!upper) {
+					kfree(edge);
+					err = -ENOMEM;
+					goto out;
+				}
+				backref_node_init(upper);
+				upper->bytenr = eb->start;
+				upper->owner = btrfs_header_owner(eb);
+				upper->level = lower->level + 1;
+
+				/*
+				 * if we know the block isn't shared
+				 * we can void checking its backrefs.
+				 */
+				if (btrfs_block_can_be_shared(root, eb))
+					upper->checked = 0;
+				else
+					upper->checked = 1;
+
+				/*
+				 * add the block to pending list if we
+				 * need check its backrefs. only block
+				 * at 'cur->level + 1' is added to the
+				 * tail of pending list. this guarantees
+				 * we check backrefs from lower level
+				 * blocks to upper level blocks.
+				 */
+				if (!upper->checked &&
+				    level == cur->level + 1) {
+					list_add_tail(&edge->list[UPPER],
+						      &list);
+				} else
+					INIT_LIST_HEAD(&edge->list[UPPER]);
+			} else {
+				upper = rb_entry(rb_node, struct backref_node,
+						 rb_node);
+				BUG_ON(!upper->checked);
+				INIT_LIST_HEAD(&edge->list[UPPER]);
+			}
+			list_add_tail(&edge->list[LOWER], &lower->upper);
+			edge->node[UPPER] = upper;
+			edge->node[LOWER] = lower;
+
+			if (rb_node)
+				break;
+			lower = upper;
+			upper = NULL;
+		}
+		btrfs_release_path(root, path2);
+next:
+		if (ptr < end) {
+			ptr += btrfs_extent_inline_ref_size(key.type);
+			if (ptr >= end) {
+				WARN_ON(ptr > end);
+				ptr = 0;
+				end = 0;
+			}
+		}
+		if (ptr >= end)
+			path1->slots[0]++;
+	}
+	btrfs_release_path(rc->extent_root, path1);
+
+	cur->checked = 1;
+	WARN_ON(exist);
+
+	/* the pending list isn't empty, take the first block to process */
+	if (!list_empty(&list)) {
+		edge = list_entry(list.next, struct backref_edge, list[UPPER]);
+		list_del_init(&edge->list[UPPER]);
+		cur = edge->node[UPPER];
+		goto again;
+	}
+
+	/*
+	 * everything goes well, connect backref nodes and insert backref nodes
+	 * into the cache.
+	 */
+	BUG_ON(!node->checked);
+	rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
+	BUG_ON(rb_node);
+
+	list_for_each_entry(edge, &node->upper, list[LOWER])
+		list_add_tail(&edge->list[UPPER], &list);
+
+	while (!list_empty(&list)) {
+		edge = list_entry(list.next, struct backref_edge, list[UPPER]);
+		list_del_init(&edge->list[UPPER]);
+		upper = edge->node[UPPER];
+
+		if (!RB_EMPTY_NODE(&upper->rb_node)) {
+			if (upper->lowest) {
+				list_del_init(&upper->lower);
+				upper->lowest = 0;
+			}
+
+			list_add_tail(&edge->list[UPPER], &upper->lower);
+			continue;
+		}
+
+		BUG_ON(!upper->checked);
+		rb_node = tree_insert(&cache->rb_root, upper->bytenr,
+				      &upper->rb_node);
+		BUG_ON(rb_node);
+
+		list_add_tail(&edge->list[UPPER], &upper->lower);
+
+		list_for_each_entry(edge, &upper->upper, list[LOWER])
+			list_add_tail(&edge->list[UPPER], &list);
+	}
+out:
+	btrfs_free_path(path1);
+	btrfs_free_path(path2);
+	if (err) {
+		INIT_LIST_HEAD(&list);
+		upper = node;
+		while (upper) {
+			if (RB_EMPTY_NODE(&upper->rb_node)) {
+				list_splice_tail(&upper->upper, &list);
+				kfree(upper);
+			}
+
+			if (list_empty(&list))
+				break;
+
+			edge = list_entry(list.next, struct backref_edge,
+					  list[LOWER]);
+			upper = edge->node[UPPER];
+			kfree(edge);
+		}
+		return ERR_PTR(err);
+	}
+	return node;
+}
+
+/*
+ * helper to add 'address of tree root -> reloc tree' mapping
+ */
+static int __add_reloc_root(struct btrfs_root *root)
+{
+	struct rb_node *rb_node;
+	struct mapping_node *node;
+	struct reloc_control *rc = root->fs_info->reloc_ctl;
+
+	node = kmalloc(sizeof(*node), GFP_NOFS);
+	BUG_ON(!node);
+
+	node->bytenr = root->node->start;
+	node->data = root;
+
+	spin_lock(&rc->reloc_root_tree.lock);
+	rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
+			      node->bytenr, &node->rb_node);
+	spin_unlock(&rc->reloc_root_tree.lock);
+	BUG_ON(rb_node);
+
+	list_add_tail(&root->root_list, &rc->reloc_roots);
+	return 0;
+}
+
+/*
+ * helper to update/delete the 'address of tree root -> reloc tree'
+ * mapping
+ */
+static int __update_reloc_root(struct btrfs_root *root, int del)
+{
+	struct rb_node *rb_node;
+	struct mapping_node *node = NULL;
+	struct reloc_control *rc = root->fs_info->reloc_ctl;
+
+	spin_lock(&rc->reloc_root_tree.lock);
+	rb_node = tree_search(&rc->reloc_root_tree.rb_root,
+			      root->commit_root->start);
+	if (rb_node) {
+		node = rb_entry(rb_node, struct mapping_node, rb_node);
+		rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
+	}
+	spin_unlock(&rc->reloc_root_tree.lock);
+
+	BUG_ON((struct btrfs_root *)node->data != root);
+
+	if (!del) {
+		spin_lock(&rc->reloc_root_tree.lock);
+		node->bytenr = root->node->start;
+		rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
+				      node->bytenr, &node->rb_node);
+		spin_unlock(&rc->reloc_root_tree.lock);
+		BUG_ON(rb_node);
+	} else {
+		list_del_init(&root->root_list);
+		kfree(node);
+	}
+	return 0;
+}
+
+/*
+ * create reloc tree for a given fs tree. reloc tree is just a
+ * snapshot of the fs tree with special root objectid.
+ */
+int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *root)
+{
+	struct btrfs_root *reloc_root;
+	struct extent_buffer *eb;
+	struct btrfs_root_item *root_item;
+	struct btrfs_key root_key;
+	int ret;
+
+	if (root->reloc_root) {
+		reloc_root = root->reloc_root;
+		reloc_root->last_trans = trans->transid;
+		return 0;
+	}
+
+	if (!root->fs_info->reloc_ctl ||
+	    !root->fs_info->reloc_ctl->create_reloc_root ||
+	    root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+		return 0;
+
+	root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
+	BUG_ON(!root_item);
+
+	root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
+	root_key.type = BTRFS_ROOT_ITEM_KEY;
+	root_key.offset = root->root_key.objectid;
+
+	ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
+			      BTRFS_TREE_RELOC_OBJECTID);
+	BUG_ON(ret);
+
+	btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1);
+	memcpy(root_item, &root->root_item, sizeof(*root_item));
+	btrfs_set_root_refs(root_item, 1);
+	btrfs_set_root_bytenr(root_item, eb->start);
+	btrfs_set_root_level(root_item, btrfs_header_level(eb));
+	btrfs_set_root_generation(root_item, trans->transid);
+	memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key));
+	root_item->drop_level = 0;
+
+	btrfs_tree_unlock(eb);
+	free_extent_buffer(eb);
+
+	ret = btrfs_insert_root(trans, root->fs_info->tree_root,
+				&root_key, root_item);
+	BUG_ON(ret);
+	kfree(root_item);
+
+	reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
+						 &root_key);
+	BUG_ON(IS_ERR(reloc_root));
+	reloc_root->last_trans = trans->transid;
+
+	__add_reloc_root(reloc_root);
+	root->reloc_root = reloc_root;
+	return 0;
+}
+
+/*
+ * update root item of reloc tree
+ */
+int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
+			    struct btrfs_root *root)
+{
+	struct btrfs_root *reloc_root;
+	struct btrfs_root_item *root_item;
+	int del = 0;
+	int ret;
+
+	if (!root->reloc_root)
+		return 0;
+
+	reloc_root = root->reloc_root;
+	root_item = &reloc_root->root_item;
+
+	if (btrfs_root_refs(root_item) == 0) {
+		root->reloc_root = NULL;
+		del = 1;
+	}
+
+	__update_reloc_root(reloc_root, del);
+
+	if (reloc_root->commit_root != reloc_root->node) {
+		btrfs_set_root_node(root_item, reloc_root->node);
+		free_extent_buffer(reloc_root->commit_root);
+		reloc_root->commit_root = btrfs_root_node(reloc_root);
+	}
+
+	ret = btrfs_update_root(trans, root->fs_info->tree_root,
+				&reloc_root->root_key, root_item);
+	BUG_ON(ret);
+	return 0;
+}
+
+/*
+ * helper to find first cached inode with inode number >= objectid
+ * in a subvolume
+ */
+static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
+{
+	struct rb_node *node;
+	struct rb_node *prev;
+	struct btrfs_inode *entry;
+	struct inode *inode;
+
+	spin_lock(&root->inode_lock);
+again:
+	node = root->inode_tree.rb_node;
+	prev = NULL;
+	while (node) {
+		prev = node;
+		entry = rb_entry(node, struct btrfs_inode, rb_node);
+
+		if (objectid < entry->vfs_inode.i_ino)
+			node = node->rb_left;
+		else if (objectid > entry->vfs_inode.i_ino)
+			node = node->rb_right;
+		else
+			break;
+	}
+	if (!node) {
+		while (prev) {
+			entry = rb_entry(prev, struct btrfs_inode, rb_node);
+			if (objectid <= entry->vfs_inode.i_ino) {
+				node = prev;
+				break;
+			}
+			prev = rb_next(prev);
+		}
+	}
+	while (node) {
+		entry = rb_entry(node, struct btrfs_inode, rb_node);
+		inode = igrab(&entry->vfs_inode);
+		if (inode) {
+			spin_unlock(&root->inode_lock);
+			return inode;
+		}
+
+		objectid = entry->vfs_inode.i_ino + 1;
+		if (cond_resched_lock(&root->inode_lock))
+			goto again;
+
+		node = rb_next(node);
+	}
+	spin_unlock(&root->inode_lock);
+	return NULL;
+}
+
+static int in_block_group(u64 bytenr,
+			  struct btrfs_block_group_cache *block_group)
+{
+	if (bytenr >= block_group->key.objectid &&
+	    bytenr < block_group->key.objectid + block_group->key.offset)
+		return 1;
+	return 0;
+}
+
+/*
+ * get new location of data
+ */
+static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
+			    u64 bytenr, u64 num_bytes)
+{
+	struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
+	struct btrfs_path *path;
+	struct btrfs_file_extent_item *fi;
+	struct extent_buffer *leaf;
+	int ret;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	bytenr -= BTRFS_I(reloc_inode)->index_cnt;
+	ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
+				       bytenr, 0);
+	if (ret < 0)
+		goto out;
+	if (ret > 0) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	leaf = path->nodes[0];
+	fi = btrfs_item_ptr(leaf, path->slots[0],
+			    struct btrfs_file_extent_item);
+
+	BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
+	       btrfs_file_extent_compression(leaf, fi) ||
+	       btrfs_file_extent_encryption(leaf, fi) ||
+	       btrfs_file_extent_other_encoding(leaf, fi));
+
+	if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
+		ret = 1;
+		goto out;
+	}
+
+	if (new_bytenr)
+		*new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+	ret = 0;
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
+/*
+ * update file extent items in the tree leaf to point to
+ * the new locations.
+ */
+static int replace_file_extents(struct btrfs_trans_handle *trans,
+				struct reloc_control *rc,
+				struct btrfs_root *root,
+				struct extent_buffer *leaf,
+				struct list_head *inode_list)
+{
+	struct btrfs_key key;
+	struct btrfs_file_extent_item *fi;
+	struct inode *inode = NULL;
+	struct inodevec *ivec = NULL;
+	u64 parent;
+	u64 bytenr;
+	u64 new_bytenr;
+	u64 num_bytes;
+	u64 end;
+	u32 nritems;
+	u32 i;
+	int ret;
+	int first = 1;
+	int dirty = 0;
+
+	if (rc->stage != UPDATE_DATA_PTRS)
+		return 0;
+
+	/* reloc trees always use full backref */
+	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+		parent = leaf->start;
+	else
+		parent = 0;
+
+	nritems = btrfs_header_nritems(leaf);
+	for (i = 0; i < nritems; i++) {
+		cond_resched();
+		btrfs_item_key_to_cpu(leaf, &key, i);
+		if (key.type != BTRFS_EXTENT_DATA_KEY)
+			continue;
+		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
+		if (btrfs_file_extent_type(leaf, fi) ==
+		    BTRFS_FILE_EXTENT_INLINE)
+			continue;
+		bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+		num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
+		if (bytenr == 0)
+			continue;
+		if (!in_block_group(bytenr, rc->block_group))
+			continue;
+
+		/*
+		 * if we are modifying block in fs tree, wait for readpage
+		 * to complete and drop the extent cache
+		 */
+		if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+			if (!ivec || ivec->nr == INODEVEC_SIZE) {
+				ivec = kmalloc(sizeof(*ivec), GFP_NOFS);
+				BUG_ON(!ivec);
+				ivec->nr = 0;
+				list_add_tail(&ivec->list, inode_list);
+			}
+			if (first) {
+				inode = find_next_inode(root, key.objectid);
+				if (inode)
+					ivec->inode[ivec->nr++] = inode;
+				first = 0;
+			} else if (inode && inode->i_ino < key.objectid) {
+				inode = find_next_inode(root, key.objectid);
+				if (inode)
+					ivec->inode[ivec->nr++] = inode;
+			}
+			if (inode && inode->i_ino == key.objectid) {
+				end = key.offset +
+				      btrfs_file_extent_num_bytes(leaf, fi);
+				WARN_ON(!IS_ALIGNED(key.offset,
+						    root->sectorsize));
+				WARN_ON(!IS_ALIGNED(end, root->sectorsize));
+				end--;
+				ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
+						      key.offset, end,
+						      GFP_NOFS);
+				if (!ret)
+					continue;
+
+				btrfs_drop_extent_cache(inode, key.offset, end,
+							1);
+				unlock_extent(&BTRFS_I(inode)->io_tree,
+					      key.offset, end, GFP_NOFS);
+			}
+		}
+
+		ret = get_new_location(rc->data_inode, &new_bytenr,
+				       bytenr, num_bytes);
+		if (ret > 0)
+			continue;
+		BUG_ON(ret < 0);
+
+		btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
+		dirty = 1;
+
+		key.offset -= btrfs_file_extent_offset(leaf, fi);
+		ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
+					   num_bytes, parent,
+					   btrfs_header_owner(leaf),
+					   key.objectid, key.offset);
+		BUG_ON(ret);
+
+		ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
+					parent, btrfs_header_owner(leaf),
+					key.objectid, key.offset);
+		BUG_ON(ret);
+	}
+	if (dirty)
+		btrfs_mark_buffer_dirty(leaf);
+	return 0;
+}
+
+static noinline_for_stack
+int memcmp_node_keys(struct extent_buffer *eb, int slot,
+		     struct btrfs_path *path, int level)
+{
+	struct btrfs_disk_key key1;
+	struct btrfs_disk_key key2;
+	btrfs_node_key(eb, &key1, slot);
+	btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
+	return memcmp(&key1, &key2, sizeof(key1));
+}
+
+/*
+ * try to replace tree blocks in fs tree with the new blocks
+ * in reloc tree. tree blocks haven't been modified since the
+ * reloc tree was create can be replaced.
+ *
+ * if a block was replaced, level of the block + 1 is returned.
+ * if no block got replaced, 0 is returned. if there are other
+ * errors, a negative error number is returned.
+ */
+static int replace_path(struct btrfs_trans_handle *trans,
+			struct btrfs_root *dest, struct btrfs_root *src,
+			struct btrfs_path *path, struct btrfs_key *next_key,
+			struct extent_buffer **leaf,
+			int lowest_level, int max_level)
+{
+	struct extent_buffer *eb;
+	struct extent_buffer *parent;
+	struct btrfs_key key;
+	u64 old_bytenr;
+	u64 new_bytenr;
+	u64 old_ptr_gen;
+	u64 new_ptr_gen;
+	u64 last_snapshot;
+	u32 blocksize;
+	int level;
+	int ret;
+	int slot;
+
+	BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
+	BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
+	BUG_ON(lowest_level > 1 && leaf);
+
+	last_snapshot = btrfs_root_last_snapshot(&src->root_item);
+
+	slot = path->slots[lowest_level];
+	btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
+
+	eb = btrfs_lock_root_node(dest);
+	btrfs_set_lock_blocking(eb);
+	level = btrfs_header_level(eb);
+
+	if (level < lowest_level) {
+		btrfs_tree_unlock(eb);
+		free_extent_buffer(eb);
+		return 0;
+	}
+
+	ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
+	BUG_ON(ret);
+	btrfs_set_lock_blocking(eb);
+
+	if (next_key) {
+		next_key->objectid = (u64)-1;
+		next_key->type = (u8)-1;
+		next_key->offset = (u64)-1;
+	}
+
+	parent = eb;
+	while (1) {
+		level = btrfs_header_level(parent);
+		BUG_ON(level < lowest_level);
+
+		ret = btrfs_bin_search(parent, &key, level, &slot);
+		if (ret && slot > 0)
+			slot--;
+
+		if (next_key && slot + 1 < btrfs_header_nritems(parent))
+			btrfs_node_key_to_cpu(parent, next_key, slot + 1);
+
+		old_bytenr = btrfs_node_blockptr(parent, slot);
+		blocksize = btrfs_level_size(dest, level - 1);
+		old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
+
+		if (level <= max_level) {
+			eb = path->nodes[level];
+			new_bytenr = btrfs_node_blockptr(eb,
+							path->slots[level]);
+			new_ptr_gen = btrfs_node_ptr_generation(eb,
+							path->slots[level]);
+		} else {
+			new_bytenr = 0;
+			new_ptr_gen = 0;
+		}
+
+		if (new_bytenr > 0 && new_bytenr == old_bytenr) {
+			WARN_ON(1);
+			ret = level;
+			break;
+		}
+
+		if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
+		    memcmp_node_keys(parent, slot, path, level)) {
+			if (level <= lowest_level && !leaf) {
+				ret = 0;
+				break;
+			}
+
+			eb = read_tree_block(dest, old_bytenr, blocksize,
+					     old_ptr_gen);
+			btrfs_tree_lock(eb);
+			ret = btrfs_cow_block(trans, dest, eb, parent,
+					      slot, &eb);
+			BUG_ON(ret);
+			btrfs_set_lock_blocking(eb);
+
+			if (level <= lowest_level) {
+				*leaf = eb;
+				ret = 0;
+				break;
+			}
+
+			btrfs_tree_unlock(parent);
+			free_extent_buffer(parent);
+
+			parent = eb;
+			continue;
+		}
+
+		btrfs_node_key_to_cpu(path->nodes[level], &key,
+				      path->slots[level]);
+		btrfs_release_path(src, path);
+
+		path->lowest_level = level;
+		ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
+		path->lowest_level = 0;
+		BUG_ON(ret);
+
+		/*
+		 * swap blocks in fs tree and reloc tree.
+		 */
+		btrfs_set_node_blockptr(parent, slot, new_bytenr);
+		btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
+		btrfs_mark_buffer_dirty(parent);
+
+		btrfs_set_node_blockptr(path->nodes[level],
+					path->slots[level], old_bytenr);
+		btrfs_set_node_ptr_generation(path->nodes[level],
+					      path->slots[level], old_ptr_gen);
+		btrfs_mark_buffer_dirty(path->nodes[level]);
+
+		ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
+					path->nodes[level]->start,
+					src->root_key.objectid, level - 1, 0);
+		BUG_ON(ret);
+		ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
+					0, dest->root_key.objectid, level - 1,
+					0);
+		BUG_ON(ret);
+
+		ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
+					path->nodes[level]->start,
+					src->root_key.objectid, level - 1, 0);
+		BUG_ON(ret);
+
+		ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
+					0, dest->root_key.objectid, level - 1,
+					0);
+		BUG_ON(ret);
+
+		btrfs_unlock_up_safe(path, 0);
+
+		ret = level;
+		break;
+	}
+	btrfs_tree_unlock(parent);
+	free_extent_buffer(parent);
+	return ret;
+}
+
+/*
+ * helper to find next relocated block in reloc tree
+ */
+static noinline_for_stack
+int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
+		       int *level)
+{
+	struct extent_buffer *eb;
+	int i;
+	u64 last_snapshot;
+	u32 nritems;
+
+	last_snapshot = btrfs_root_last_snapshot(&root->root_item);
+
+	for (i = 0; i < *level; i++) {
+		free_extent_buffer(path->nodes[i]);
+		path->nodes[i] = NULL;
+	}
+
+	for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
+		eb = path->nodes[i];
+		nritems = btrfs_header_nritems(eb);
+		while (path->slots[i] + 1 < nritems) {
+			path->slots[i]++;
+			if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
+			    last_snapshot)
+				continue;
+
+			*level = i;
+			return 0;
+		}
+		free_extent_buffer(path->nodes[i]);
+		path->nodes[i] = NULL;
+	}
+	return 1;
+}
+
+/*
+ * walk down reloc tree to find relocated block of lowest level
+ */
+static noinline_for_stack
+int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
+			 int *level)
+{
+	struct extent_buffer *eb = NULL;
+	int i;
+	u64 bytenr;
+	u64 ptr_gen = 0;
+	u64 last_snapshot;
+	u32 blocksize;
+	u32 nritems;
+
+	last_snapshot = btrfs_root_last_snapshot(&root->root_item);
+
+	for (i = *level; i > 0; i--) {
+		eb = path->nodes[i];
+		nritems = btrfs_header_nritems(eb);
+		while (path->slots[i] < nritems) {
+			ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
+			if (ptr_gen > last_snapshot)
+				break;
+			path->slots[i]++;
+		}
+		if (path->slots[i] >= nritems) {
+			if (i == *level)
+				break;
+			*level = i + 1;
+			return 0;
+		}
+		if (i == 1) {
+			*level = i;
+			return 0;
+		}
+
+		bytenr = btrfs_node_blockptr(eb, path->slots[i]);
+		blocksize = btrfs_level_size(root, i - 1);
+		eb = read_tree_block(root, bytenr, blocksize, ptr_gen);
+		BUG_ON(btrfs_header_level(eb) != i - 1);
+		path->nodes[i - 1] = eb;
+		path->slots[i - 1] = 0;
+	}
+	return 1;
+}
+
+/*
+ * invalidate extent cache for file extents whose key in range of
+ * [min_key, max_key)
+ */
+static int invalidate_extent_cache(struct btrfs_root *root,
+				   struct btrfs_key *min_key,
+				   struct btrfs_key *max_key)
+{
+	struct inode *inode = NULL;
+	u64 objectid;
+	u64 start, end;
+
+	objectid = min_key->objectid;
+	while (1) {
+		cond_resched();
+		iput(inode);
+
+		if (objectid > max_key->objectid)
+			break;
+
+		inode = find_next_inode(root, objectid);
+		if (!inode)
+			break;
+
+		if (inode->i_ino > max_key->objectid) {
+			iput(inode);
+			break;
+		}
+
+		objectid = inode->i_ino + 1;
+		if (!S_ISREG(inode->i_mode))
+			continue;
+
+		if (unlikely(min_key->objectid == inode->i_ino)) {
+			if (min_key->type > BTRFS_EXTENT_DATA_KEY)
+				continue;
+			if (min_key->type < BTRFS_EXTENT_DATA_KEY)
+				start = 0;
+			else {
+				start = min_key->offset;
+				WARN_ON(!IS_ALIGNED(start, root->sectorsize));
+			}
+		} else {
+			start = 0;
+		}
+
+		if (unlikely(max_key->objectid == inode->i_ino)) {
+			if (max_key->type < BTRFS_EXTENT_DATA_KEY)
+				continue;
+			if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
+				end = (u64)-1;
+			} else {
+				if (max_key->offset == 0)
+					continue;
+				end = max_key->offset;
+				WARN_ON(!IS_ALIGNED(end, root->sectorsize));
+				end--;
+			}
+		} else {
+			end = (u64)-1;
+		}
+
+		/* the lock_extent waits for readpage to complete */
+		lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+		btrfs_drop_extent_cache(inode, start, end, 1);
+		unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+	}
+	return 0;
+}
+
+static int find_next_key(struct btrfs_path *path, int level,
+			 struct btrfs_key *key)
+
+{
+	while (level < BTRFS_MAX_LEVEL) {
+		if (!path->nodes[level])
+			break;
+		if (path->slots[level] + 1 <
+		    btrfs_header_nritems(path->nodes[level])) {
+			btrfs_node_key_to_cpu(path->nodes[level], key,
+					      path->slots[level] + 1);
+			return 0;
+		}
+		level++;
+	}
+	return 1;
+}
+
+/*
+ * merge the relocated tree blocks in reloc tree with corresponding
+ * fs tree.
+ */
+static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
+					       struct btrfs_root *root)
+{
+	LIST_HEAD(inode_list);
+	struct btrfs_key key;
+	struct btrfs_key next_key;
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *reloc_root;
+	struct btrfs_root_item *root_item;
+	struct btrfs_path *path;
+	struct extent_buffer *leaf = NULL;
+	unsigned long nr;
+	int level;
+	int max_level;
+	int replaced = 0;
+	int ret;
+	int err = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	reloc_root = root->reloc_root;
+	root_item = &reloc_root->root_item;
+
+	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
+		level = btrfs_root_level(root_item);
+		extent_buffer_get(reloc_root->node);
+		path->nodes[level] = reloc_root->node;
+		path->slots[level] = 0;
+	} else {
+		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
+
+		level = root_item->drop_level;
+		BUG_ON(level == 0);
+		path->lowest_level = level;
+		ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
+		if (ret < 0) {
+			btrfs_free_path(path);
+			return ret;
+		}
+
+		btrfs_node_key_to_cpu(path->nodes[level], &next_key,
+				      path->slots[level]);
+		WARN_ON(memcmp(&key, &next_key, sizeof(key)));
+
+		btrfs_unlock_up_safe(path, 0);
+	}
+
+	if (level == 0 && rc->stage == UPDATE_DATA_PTRS) {
+		trans = btrfs_start_transaction(root, 1);
+
+		leaf = path->nodes[0];
+		btrfs_item_key_to_cpu(leaf, &key, 0);
+		btrfs_release_path(reloc_root, path);
+
+		ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+
+		leaf = path->nodes[0];
+		btrfs_unlock_up_safe(path, 1);
+		ret = replace_file_extents(trans, rc, root, leaf,
+					   &inode_list);
+		if (ret < 0)
+			err = ret;
+		goto out;
+	}
+
+	memset(&next_key, 0, sizeof(next_key));
+
+	while (1) {
+		leaf = NULL;
+		replaced = 0;
+		trans = btrfs_start_transaction(root, 1);
+		max_level = level;
+
+		ret = walk_down_reloc_tree(reloc_root, path, &level);
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+		if (ret > 0)
+			break;
+
+		if (!find_next_key(path, level, &key) &&
+		    btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
+			ret = 0;
+		} else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) {
+			ret = replace_path(trans, root, reloc_root,
+					   path, &next_key, &leaf,
+					   level, max_level);
+		} else {
+			ret = replace_path(trans, root, reloc_root,
+					   path, &next_key, NULL,
+					   level, max_level);
+		}
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+
+		if (ret > 0) {
+			level = ret;
+			btrfs_node_key_to_cpu(path->nodes[level], &key,
+					      path->slots[level]);
+			replaced = 1;
+		} else if (leaf) {
+			/*
+			 * no block got replaced, try replacing file extents
+			 */
+			btrfs_item_key_to_cpu(leaf, &key, 0);
+			ret = replace_file_extents(trans, rc, root, leaf,
+						   &inode_list);
+			btrfs_tree_unlock(leaf);
+			free_extent_buffer(leaf);
+			BUG_ON(ret < 0);
+		}
+
+		ret = walk_up_reloc_tree(reloc_root, path, &level);
+		if (ret > 0)
+			break;
+
+		BUG_ON(level == 0);
+		/*
+		 * save the merging progress in the drop_progress.
+		 * this is OK since root refs == 1 in this case.
+		 */
+		btrfs_node_key(path->nodes[level], &root_item->drop_progress,
+			       path->slots[level]);
+		root_item->drop_level = level;
+
+		nr = trans->blocks_used;
+		btrfs_end_transaction(trans, root);
+
+		btrfs_btree_balance_dirty(root, nr);
+
+		if (replaced && rc->stage == UPDATE_DATA_PTRS)
+			invalidate_extent_cache(root, &key, &next_key);
+	}
+
+	/*
+	 * handle the case only one block in the fs tree need to be
+	 * relocated and the block is tree root.
+	 */
+	leaf = btrfs_lock_root_node(root);
+	ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf);
+	btrfs_tree_unlock(leaf);
+	free_extent_buffer(leaf);
+	if (ret < 0)
+		err = ret;
+out:
+	btrfs_free_path(path);
+
+	if (err == 0) {
+		memset(&root_item->drop_progress, 0,
+		       sizeof(root_item->drop_progress));
+		root_item->drop_level = 0;
+		btrfs_set_root_refs(root_item, 0);
+	}
+
+	nr = trans->blocks_used;
+	btrfs_end_transaction(trans, root);
+
+	btrfs_btree_balance_dirty(root, nr);
+
+	/*
+	 * put inodes while we aren't holding the tree locks
+	 */
+	while (!list_empty(&inode_list)) {
+		struct inodevec *ivec;
+		ivec = list_entry(inode_list.next, struct inodevec, list);
+		list_del(&ivec->list);
+		while (ivec->nr > 0) {
+			ivec->nr--;
+			iput(ivec->inode[ivec->nr]);
+		}
+		kfree(ivec);
+	}
+
+	if (replaced && rc->stage == UPDATE_DATA_PTRS)
+		invalidate_extent_cache(root, &key, &next_key);
+
+	return err;
+}
+
+/*
+ * callback for the work threads.
+ * this function merges reloc tree with corresponding fs tree,
+ * and then drops the reloc tree.
+ */
+static void merge_func(struct btrfs_work *work)
+{
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *root;
+	struct btrfs_root *reloc_root;
+	struct async_merge *async;
+
+	async = container_of(work, struct async_merge, work);
+	reloc_root = async->root;
+
+	if (btrfs_root_refs(&reloc_root->root_item) > 0) {
+		root = read_fs_root(reloc_root->fs_info,
+				    reloc_root->root_key.offset);
+		BUG_ON(IS_ERR(root));
+		BUG_ON(root->reloc_root != reloc_root);
+
+		merge_reloc_root(async->rc, root);
+
+		trans = btrfs_start_transaction(root, 1);
+		btrfs_update_reloc_root(trans, root);
+		btrfs_end_transaction(trans, root);
+	}
+
+	btrfs_drop_dead_root(reloc_root);
+
+	if (atomic_dec_and_test(async->num_pending))
+		complete(async->done);
+
+	kfree(async);
+}
+
+static int merge_reloc_roots(struct reloc_control *rc)
+{
+	struct async_merge *async;
+	struct btrfs_root *root;
+	struct completion done;
+	atomic_t num_pending;
+
+	init_completion(&done);
+	atomic_set(&num_pending, 1);
+
+	while (!list_empty(&rc->reloc_roots)) {
+		root = list_entry(rc->reloc_roots.next,
+				  struct btrfs_root, root_list);
+		list_del_init(&root->root_list);
+
+		async = kmalloc(sizeof(*async), GFP_NOFS);
+		BUG_ON(!async);
+		async->work.func = merge_func;
+		async->work.flags = 0;
+		async->rc = rc;
+		async->root = root;
+		async->done = &done;
+		async->num_pending = &num_pending;
+		atomic_inc(&num_pending);
+		btrfs_queue_worker(&rc->workers, &async->work);
+	}
+
+	if (!atomic_dec_and_test(&num_pending))
+		wait_for_completion(&done);
+
+	BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
+	return 0;
+}
+
+static void free_block_list(struct rb_root *blocks)
+{
+	struct tree_block *block;
+	struct rb_node *rb_node;
+	while ((rb_node = rb_first(blocks))) {
+		block = rb_entry(rb_node, struct tree_block, rb_node);
+		rb_erase(rb_node, blocks);
+		kfree(block);
+	}
+}
+
+static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
+				      struct btrfs_root *reloc_root)
+{
+	struct btrfs_root *root;
+
+	if (reloc_root->last_trans == trans->transid)
+		return 0;
+
+	root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset);
+	BUG_ON(IS_ERR(root));
+	BUG_ON(root->reloc_root != reloc_root);
+
+	return btrfs_record_root_in_trans(trans, root);
+}
+
+/*
+ * select one tree from trees that references the block.
+ * for blocks in refernce counted trees, we preper reloc tree.
+ * if no reloc tree found and reloc_only is true, NULL is returned.
+ */
+static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans,
+					    struct backref_node *node,
+					    struct backref_edge *edges[],
+					    int *nr, int reloc_only)
+{
+	struct backref_node *next;
+	struct btrfs_root *root;
+	int index;
+	int loop = 0;
+again:
+	index = 0;
+	next = node;
+	while (1) {
+		cond_resched();
+		next = walk_up_backref(next, edges, &index);
+		root = next->root;
+		if (!root) {
+			BUG_ON(!node->old_root);
+			goto skip;
+		}
+
+		/* no other choice for non-refernce counted tree */
+		if (!root->ref_cows) {
+			BUG_ON(reloc_only);
+			break;
+		}
+
+		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
+			record_reloc_root_in_trans(trans, root);
+			break;
+		}
+
+		if (loop) {
+			btrfs_record_root_in_trans(trans, root);
+			break;
+		}
+
+		if (reloc_only || next != node) {
+			if (!root->reloc_root)
+				btrfs_record_root_in_trans(trans, root);
+			root = root->reloc_root;
+			/*
+			 * if the reloc tree was created in current
+			 * transation, there is no node in backref tree
+			 * corresponds to the root of the reloc tree.
+			 */
+			if (btrfs_root_last_snapshot(&root->root_item) ==
+			    trans->transid - 1)
+				break;
+		}
+skip:
+		root = NULL;
+		next = walk_down_backref(edges, &index);
+		if (!next || next->level <= node->level)
+			break;
+	}
+
+	if (!root && !loop && !reloc_only) {
+		loop = 1;
+		goto again;
+	}
+
+	if (root)
+		*nr = index;
+	else
+		*nr = 0;
+
+	return root;
+}
+
+static noinline_for_stack
+struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
+				   struct backref_node *node)
+{
+	struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
+	int nr;
+	return __select_one_root(trans, node, edges, &nr, 0);
+}
+
+static noinline_for_stack
+struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
+				     struct backref_node *node,
+				     struct backref_edge *edges[], int *nr)
+{
+	return __select_one_root(trans, node, edges, nr, 1);
+}
+
+static void grab_path_buffers(struct btrfs_path *path,
+			      struct backref_node *node,
+			      struct backref_edge *edges[], int nr)
+{
+	int i = 0;
+	while (1) {
+		drop_node_buffer(node);
+		node->eb = path->nodes[node->level];
+		BUG_ON(!node->eb);
+		if (path->locks[node->level])
+			node->locked = 1;
+		path->nodes[node->level] = NULL;
+		path->locks[node->level] = 0;
+
+		if (i >= nr)
+			break;
+
+		edges[i]->blockptr = node->eb->start;
+		node = edges[i]->node[UPPER];
+		i++;
+	}
+}
+
+/*
+ * relocate a block tree, and then update pointers in upper level
+ * blocks that reference the block to point to the new location.
+ *
+ * if called by link_to_upper, the block has already been relocated.
+ * in that case this function just updates pointers.
+ */
+static int do_relocation(struct btrfs_trans_handle *trans,
+			 struct backref_node *node,
+			 struct btrfs_key *key,
+			 struct btrfs_path *path, int lowest)
+{
+	struct backref_node *upper;
+	struct backref_edge *edge;
+	struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
+	struct btrfs_root *root;
+	struct extent_buffer *eb;
+	u32 blocksize;
+	u64 bytenr;
+	u64 generation;
+	int nr;
+	int slot;
+	int ret;
+	int err = 0;
+
+	BUG_ON(lowest && node->eb);
+
+	path->lowest_level = node->level + 1;
+	list_for_each_entry(edge, &node->upper, list[LOWER]) {
+		cond_resched();
+		if (node->eb && node->eb->start == edge->blockptr)
+			continue;
+
+		upper = edge->node[UPPER];
+		root = select_reloc_root(trans, upper, edges, &nr);
+		if (!root)
+			continue;
+
+		if (upper->eb && !upper->locked)
+			drop_node_buffer(upper);
+
+		if (!upper->eb) {
+			ret = btrfs_search_slot(trans, root, key, path, 0, 1);
+			if (ret < 0) {
+				err = ret;
+				break;
+			}
+			BUG_ON(ret > 0);
+
+			slot = path->slots[upper->level];
+
+			btrfs_unlock_up_safe(path, upper->level + 1);
+			grab_path_buffers(path, upper, edges, nr);
+
+			btrfs_release_path(NULL, path);
+		} else {
+			ret = btrfs_bin_search(upper->eb, key, upper->level,
+					       &slot);
+			BUG_ON(ret);
+		}
+
+		bytenr = btrfs_node_blockptr(upper->eb, slot);
+		if (!lowest) {
+			if (node->eb->start == bytenr) {
+				btrfs_tree_unlock(upper->eb);
+				upper->locked = 0;
+				continue;
+			}
+		} else {
+			BUG_ON(node->bytenr != bytenr);
+		}
+
+		blocksize = btrfs_level_size(root, node->level);
+		generation = btrfs_node_ptr_generation(upper->eb, slot);
+		eb = read_tree_block(root, bytenr, blocksize, generation);
+		btrfs_tree_lock(eb);
+		btrfs_set_lock_blocking(eb);
+
+		if (!node->eb) {
+			ret = btrfs_cow_block(trans, root, eb, upper->eb,
+					      slot, &eb);
+			if (ret < 0) {
+				err = ret;
+				break;
+			}
+			btrfs_set_lock_blocking(eb);
+			node->eb = eb;
+			node->locked = 1;
+		} else {
+			btrfs_set_node_blockptr(upper->eb, slot,
+						node->eb->start);
+			btrfs_set_node_ptr_generation(upper->eb, slot,
+						      trans->transid);
+			btrfs_mark_buffer_dirty(upper->eb);
+
+			ret = btrfs_inc_extent_ref(trans, root,
+						node->eb->start, blocksize,
+						upper->eb->start,
+						btrfs_header_owner(upper->eb),
+						node->level, 0);
+			BUG_ON(ret);
+
+			ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
+			BUG_ON(ret);
+
+			btrfs_tree_unlock(eb);
+			free_extent_buffer(eb);
+		}
+		if (!lowest) {
+			btrfs_tree_unlock(upper->eb);
+			upper->locked = 0;
+		}
+	}
+	path->lowest_level = 0;
+	return err;
+}
+
+static int link_to_upper(struct btrfs_trans_handle *trans,
+			 struct backref_node *node,
+			 struct btrfs_path *path)
+{
+	struct btrfs_key key;
+	if (!node->eb || list_empty(&node->upper))
+		return 0;
+
+	btrfs_node_key_to_cpu(node->eb, &key, 0);
+	return do_relocation(trans, node, &key, path, 0);
+}
+
+static int finish_pending_nodes(struct btrfs_trans_handle *trans,
+				struct backref_cache *cache,
+				struct btrfs_path *path)
+{
+	struct backref_node *node;
+	int level;
+	int ret;
+	int err = 0;
+
+	for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+		while (!list_empty(&cache->pending[level])) {
+			node = list_entry(cache->pending[level].next,
+					  struct backref_node, lower);
+			BUG_ON(node->level != level);
+
+			ret = link_to_upper(trans, node, path);
+			if (ret < 0)
+				err = ret;
+			/*
+			 * this remove the node from the pending list and
+			 * may add some other nodes to the level + 1
+			 * pending list
+			 */
+			remove_backref_node(cache, node);
+		}
+	}
+	BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root));
+	return err;
+}
+
+static void mark_block_processed(struct reloc_control *rc,
+				 struct backref_node *node)
+{
+	u32 blocksize;
+	if (node->level == 0 ||
+	    in_block_group(node->bytenr, rc->block_group)) {
+		blocksize = btrfs_level_size(rc->extent_root, node->level);
+		set_extent_bits(&rc->processed_blocks, node->bytenr,
+				node->bytenr + blocksize - 1, EXTENT_DIRTY,
+				GFP_NOFS);
+	}
+	node->processed = 1;
+}
+
+/*
+ * mark a block and all blocks directly/indirectly reference the block
+ * as processed.
+ */
+static void update_processed_blocks(struct reloc_control *rc,
+				    struct backref_node *node)
+{
+	struct backref_node *next = node;
+	struct backref_edge *edge;
+	struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
+	int index = 0;
+
+	while (next) {
+		cond_resched();
+		while (1) {
+			if (next->processed)
+				break;
+
+			mark_block_processed(rc, next);
+
+			if (list_empty(&next->upper))
+				break;
+
+			edge = list_entry(next->upper.next,
+					  struct backref_edge, list[LOWER]);
+			edges[index++] = edge;
+			next = edge->node[UPPER];
+		}
+		next = walk_down_backref(edges, &index);
+	}
+}
+
+static int tree_block_processed(u64 bytenr, u32 blocksize,
+				struct reloc_control *rc)
+{
+	if (test_range_bit(&rc->processed_blocks, bytenr,
+			   bytenr + blocksize - 1, EXTENT_DIRTY, 1))
+		return 1;
+	return 0;
+}
+
+/*
+ * check if there are any file extent pointers in the leaf point to
+ * data require processing
+ */
+static int check_file_extents(struct reloc_control *rc,
+			      u64 bytenr, u32 blocksize, u64 ptr_gen)
+{
+	struct btrfs_key found_key;
+	struct btrfs_file_extent_item *fi;
+	struct extent_buffer *leaf;
+	u32 nritems;
+	int i;
+	int ret = 0;
+
+	leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen);
+
+	nritems = btrfs_header_nritems(leaf);
+	for (i = 0; i < nritems; i++) {
+		cond_resched();
+		btrfs_item_key_to_cpu(leaf, &found_key, i);
+		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
+			continue;
+		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
+		if (btrfs_file_extent_type(leaf, fi) ==
+		    BTRFS_FILE_EXTENT_INLINE)
+			continue;
+		bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+		if (bytenr == 0)
+			continue;
+		if (in_block_group(bytenr, rc->block_group)) {
+			ret = 1;
+			break;
+		}
+	}
+	free_extent_buffer(leaf);
+	return ret;
+}
+
+/*
+ * scan child blocks of a given block to find blocks require processing
+ */
+static int add_child_blocks(struct btrfs_trans_handle *trans,
+			    struct reloc_control *rc,
+			    struct backref_node *node,
+			    struct rb_root *blocks)
+{
+	struct tree_block *block;
+	struct rb_node *rb_node;
+	u64 bytenr;
+	u64 ptr_gen;
+	u32 blocksize;
+	u32 nritems;
+	int i;
+	int err = 0;
+
+	nritems = btrfs_header_nritems(node->eb);
+	blocksize = btrfs_level_size(rc->extent_root, node->level - 1);
+	for (i = 0; i < nritems; i++) {
+		cond_resched();
+		bytenr = btrfs_node_blockptr(node->eb, i);
+		ptr_gen = btrfs_node_ptr_generation(node->eb, i);
+		if (ptr_gen == trans->transid)
+			continue;
+		if (!in_block_group(bytenr, rc->block_group) &&
+		    (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS))
+			continue;
+		if (tree_block_processed(bytenr, blocksize, rc))
+			continue;
+
+		readahead_tree_block(rc->extent_root,
+				     bytenr, blocksize, ptr_gen);
+	}
+
+	for (i = 0; i < nritems; i++) {
+		cond_resched();
+		bytenr = btrfs_node_blockptr(node->eb, i);
+		ptr_gen = btrfs_node_ptr_generation(node->eb, i);
+		if (ptr_gen == trans->transid)
+			continue;
+		if (!in_block_group(bytenr, rc->block_group) &&
+		    (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS))
+			continue;
+		if (tree_block_processed(bytenr, blocksize, rc))
+			continue;
+		if (!in_block_group(bytenr, rc->block_group) &&
+		    !check_file_extents(rc, bytenr, blocksize, ptr_gen))
+			continue;
+
+		block = kmalloc(sizeof(*block), GFP_NOFS);
+		if (!block) {
+			err = -ENOMEM;
+			break;
+		}
+		block->bytenr = bytenr;
+		btrfs_node_key_to_cpu(node->eb, &block->key, i);
+		block->level = node->level - 1;
+		block->key_ready = 1;
+		rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
+		BUG_ON(rb_node);
+	}
+	if (err)
+		free_block_list(blocks);
+	return err;
+}
+
+/*
+ * find adjacent blocks require processing
+ */
+static noinline_for_stack
+int add_adjacent_blocks(struct btrfs_trans_handle *trans,
+			struct reloc_control *rc,
+			struct backref_cache *cache,
+			struct rb_root *blocks, int level,
+			struct backref_node **upper)
+{
+	struct backref_node *node;
+	int ret = 0;
+
+	WARN_ON(!list_empty(&cache->pending[level]));
+
+	if (list_empty(&cache->pending[level + 1]))
+		return 1;
+
+	node = list_entry(cache->pending[level + 1].next,
+			  struct backref_node, lower);
+	if (node->eb)
+		ret = add_child_blocks(trans, rc, node, blocks);
+
+	*upper = node;
+	return ret;
+}
+
+static int get_tree_block_key(struct reloc_control *rc,
+			      struct tree_block *block)
+{
+	struct extent_buffer *eb;
+
+	BUG_ON(block->key_ready);
+	eb = read_tree_block(rc->extent_root, block->bytenr,
+			     block->key.objectid, block->key.offset);
+	WARN_ON(btrfs_header_level(eb) != block->level);
+	if (block->level == 0)
+		btrfs_item_key_to_cpu(eb, &block->key, 0);
+	else
+		btrfs_node_key_to_cpu(eb, &block->key, 0);
+	free_extent_buffer(eb);
+	block->key_ready = 1;
+	return 0;
+}
+
+static int reada_tree_block(struct reloc_control *rc,
+			    struct tree_block *block)
+{
+	BUG_ON(block->key_ready);
+	readahead_tree_block(rc->extent_root, block->bytenr,
+			     block->key.objectid, block->key.offset);
+	return 0;
+}
+
+/*
+ * helper function to relocate a tree block
+ */
+static int relocate_tree_block(struct btrfs_trans_handle *trans,
+				struct reloc_control *rc,
+				struct backref_node *node,
+				struct btrfs_key *key,
+				struct btrfs_path *path)
+{
+	struct btrfs_root *root;
+	int ret;
+
+	root = select_one_root(trans, node);
+	if (unlikely(!root)) {
+		rc->found_old_snapshot = 1;
+		update_processed_blocks(rc, node);
+		return 0;
+	}
+
+	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
+		ret = do_relocation(trans, node, key, path, 1);
+		if (ret < 0)
+			goto out;
+		if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) {
+			ret = replace_file_extents(trans, rc, root,
+						   node->eb, NULL);
+			if (ret < 0)
+				goto out;
+		}
+		drop_node_buffer(node);
+	} else if (!root->ref_cows) {
+		path->lowest_level = node->level;
+		ret = btrfs_search_slot(trans, root, key, path, 0, 1);
+		btrfs_release_path(root, path);
+		if (ret < 0)
+			goto out;
+	} else if (root != node->root) {
+		WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS);
+	}
+
+	update_processed_blocks(rc, node);
+	ret = 0;
+out:
+	drop_node_buffer(node);
+	return ret;
+}
+
+/*
+ * relocate a list of blocks
+ */
+static noinline_for_stack
+int relocate_tree_blocks(struct btrfs_trans_handle *trans,
+			 struct reloc_control *rc, struct rb_root *blocks)
+{
+	struct backref_cache *cache;
+	struct backref_node *node;
+	struct btrfs_path *path;
+	struct tree_block *block;
+	struct rb_node *rb_node;
+	int level = -1;
+	int ret;
+	int err = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	cache = kmalloc(sizeof(*cache), GFP_NOFS);
+	if (!cache) {
+		btrfs_free_path(path);
+		return -ENOMEM;
+	}
+
+	backref_cache_init(cache);
+
+	rb_node = rb_first(blocks);
+	while (rb_node) {
+		block = rb_entry(rb_node, struct tree_block, rb_node);
+		if (level == -1)
+			level = block->level;
+		else
+			BUG_ON(level != block->level);
+		if (!block->key_ready)
+			reada_tree_block(rc, block);
+		rb_node = rb_next(rb_node);
+	}
+
+	rb_node = rb_first(blocks);
+	while (rb_node) {
+		block = rb_entry(rb_node, struct tree_block, rb_node);
+		if (!block->key_ready)
+			get_tree_block_key(rc, block);
+		rb_node = rb_next(rb_node);
+	}
+
+	rb_node = rb_first(blocks);
+	while (rb_node) {
+		block = rb_entry(rb_node, struct tree_block, rb_node);
+
+		node = build_backref_tree(rc, cache, &block->key,
+					  block->level, block->bytenr);
+		if (IS_ERR(node)) {
+			err = PTR_ERR(node);
+			goto out;
+		}
+
+		ret = relocate_tree_block(trans, rc, node, &block->key,
+					  path);
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+		remove_backref_node(cache, node);
+		rb_node = rb_next(rb_node);
+	}
+
+	if (level > 0)
+		goto out;
+
+	free_block_list(blocks);
+
+	/*
+	 * now backrefs of some upper level tree blocks have been cached,
+	 * try relocating blocks referenced by these upper level blocks.
+	 */
+	while (1) {
+		struct backref_node *upper = NULL;
+		if (trans->transaction->in_commit ||
+		    trans->transaction->delayed_refs.flushing)
+			break;
+
+		ret = add_adjacent_blocks(trans, rc, cache, blocks, level,
+					  &upper);
+		if (ret < 0)
+			err = ret;
+		if (ret != 0)
+			break;
+
+		rb_node = rb_first(blocks);
+		while (rb_node) {
+			block = rb_entry(rb_node, struct tree_block, rb_node);
+			if (trans->transaction->in_commit ||
+			    trans->transaction->delayed_refs.flushing)
+				goto out;
+			BUG_ON(!block->key_ready);
+			node = build_backref_tree(rc, cache, &block->key,
+						  level, block->bytenr);
+			if (IS_ERR(node)) {
+				err = PTR_ERR(node);
+				goto out;
+			}
+
+			ret = relocate_tree_block(trans, rc, node,
+						  &block->key, path);
+			if (ret < 0) {
+				err = ret;
+				goto out;
+			}
+			remove_backref_node(cache, node);
+			rb_node = rb_next(rb_node);
+		}
+		free_block_list(blocks);
+
+		if (upper) {
+			ret = link_to_upper(trans, upper, path);
+			if (ret < 0) {
+				err = ret;
+				break;
+			}
+			remove_backref_node(cache, upper);
+		}
+	}
+out:
+	free_block_list(blocks);
+
+	ret = finish_pending_nodes(trans, cache, path);
+	if (ret < 0)
+		err = ret;
+
+	kfree(cache);
+	btrfs_free_path(path);
+	return err;
+}
+
+static noinline_for_stack
+int relocate_inode_pages(struct inode *inode, u64 start, u64 len)
+{
+	u64 page_start;
+	u64 page_end;
+	unsigned long i;
+	unsigned long first_index;
+	unsigned long last_index;
+	unsigned int total_read = 0;
+	unsigned int total_dirty = 0;
+	struct page *page;
+	struct file_ra_state *ra;
+	struct btrfs_ordered_extent *ordered;
+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+	int ret = 0;
+
+	ra = kzalloc(sizeof(*ra), GFP_NOFS);
+	if (!ra)
+		return -ENOMEM;
+
+	mutex_lock(&inode->i_mutex);
+	first_index = start >> PAGE_CACHE_SHIFT;
+	last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
+
+	/* make sure the dirty trick played by the caller work */
+	ret = invalidate_inode_pages2_range(inode->i_mapping,
+					    first_index, last_index);
+	if (ret)
+		goto out_unlock;
+
+	file_ra_state_init(ra, inode->i_mapping);
+
+	for (i = first_index ; i <= last_index; i++) {
+		if (total_read % ra->ra_pages == 0) {
+			btrfs_force_ra(inode->i_mapping, ra, NULL, i,
+				min(last_index, ra->ra_pages + i - 1));
+		}
+		total_read++;
+again:
+		if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
+			BUG_ON(1);
+		page = grab_cache_page(inode->i_mapping, i);
+		if (!page) {
+			ret = -ENOMEM;
+			goto out_unlock;
+		}
+		if (!PageUptodate(page)) {
+			btrfs_readpage(NULL, page);
+			lock_page(page);
+			if (!PageUptodate(page)) {
+				unlock_page(page);
+				page_cache_release(page);
+				ret = -EIO;
+				goto out_unlock;
+			}
+		}
+		wait_on_page_writeback(page);
+
+		page_start = (u64)page->index << PAGE_CACHE_SHIFT;
+		page_end = page_start + PAGE_CACHE_SIZE - 1;
+		lock_extent(io_tree, page_start, page_end, GFP_NOFS);
+
+		ordered = btrfs_lookup_ordered_extent(inode, page_start);
+		if (ordered) {
+			unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
+			unlock_page(page);
+			page_cache_release(page);
+			btrfs_start_ordered_extent(inode, ordered, 1);
+			btrfs_put_ordered_extent(ordered);
+			goto again;
+		}
+		set_page_extent_mapped(page);
+
+		if (i == first_index)
+			set_extent_bits(io_tree, page_start, page_end,
+					EXTENT_BOUNDARY, GFP_NOFS);
+		btrfs_set_extent_delalloc(inode, page_start, page_end);
+
+		set_page_dirty(page);
+		total_dirty++;
+
+		unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
+		unlock_page(page);
+		page_cache_release(page);
+	}
+out_unlock:
+	mutex_unlock(&inode->i_mutex);
+	kfree(ra);
+	balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
+	return ret;
+}
+
+static noinline_for_stack
+int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key)
+{
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct extent_map *em;
+	u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt;
+	u64 end = start + extent_key->offset - 1;
+
+	em = alloc_extent_map(GFP_NOFS);
+	em->start = start;
+	em->len = extent_key->offset;
+	em->block_len = extent_key->offset;
+	em->block_start = extent_key->objectid;
+	em->bdev = root->fs_info->fs_devices->latest_bdev;
+	set_bit(EXTENT_FLAG_PINNED, &em->flags);
+
+	/* setup extent map to cheat btrfs_readpage */
+	lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+	while (1) {
+		int ret;
+		spin_lock(&em_tree->lock);
+		ret = add_extent_mapping(em_tree, em);
+		spin_unlock(&em_tree->lock);
+		if (ret != -EEXIST) {
+			free_extent_map(em);
+			break;
+		}
+		btrfs_drop_extent_cache(inode, start, end, 0);
+	}
+	unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+
+	return relocate_inode_pages(inode, start, extent_key->offset);
+}
+
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+static int get_ref_objectid_v0(struct reloc_control *rc,
+			       struct btrfs_path *path,
+			       struct btrfs_key *extent_key,
+			       u64 *ref_objectid, int *path_change)
+{
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	struct btrfs_extent_ref_v0 *ref0;
+	int ret;
+	int slot;
+
+	leaf = path->nodes[0];
+	slot = path->slots[0];
+	while (1) {
+		if (slot >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(rc->extent_root, path);
+			if (ret < 0)
+				return ret;
+			BUG_ON(ret > 0);
+			leaf = path->nodes[0];
+			slot = path->slots[0];
+			if (path_change)
+				*path_change = 1;
+		}
+		btrfs_item_key_to_cpu(leaf, &key, slot);
+		if (key.objectid != extent_key->objectid)
+			return -ENOENT;
+
+		if (key.type != BTRFS_EXTENT_REF_V0_KEY) {
+			slot++;
+			continue;
+		}
+		ref0 = btrfs_item_ptr(leaf, slot,
+				struct btrfs_extent_ref_v0);
+		*ref_objectid = btrfs_ref_objectid_v0(leaf, ref0);
+		break;
+	}
+	return 0;
+}
+#endif
+
+/*
+ * helper to add a tree block to the list.
+ * the major work is getting the generation and level of the block
+ */
+static int add_tree_block(struct reloc_control *rc,
+			  struct btrfs_key *extent_key,
+			  struct btrfs_path *path,
+			  struct rb_root *blocks)
+{
+	struct extent_buffer *eb;
+	struct btrfs_extent_item *ei;
+	struct btrfs_tree_block_info *bi;
+	struct tree_block *block;
+	struct rb_node *rb_node;
+	u32 item_size;
+	int level = -1;
+	int generation;
+
+	eb =  path->nodes[0];
+	item_size = btrfs_item_size_nr(eb, path->slots[0]);
+
+	if (item_size >= sizeof(*ei) + sizeof(*bi)) {
+		ei = btrfs_item_ptr(eb, path->slots[0],
+				struct btrfs_extent_item);
+		bi = (struct btrfs_tree_block_info *)(ei + 1);
+		generation = btrfs_extent_generation(eb, ei);
+		level = btrfs_tree_block_level(eb, bi);
+	} else {
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+		u64 ref_owner;
+		int ret;
+
+		BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
+		ret = get_ref_objectid_v0(rc, path, extent_key,
+					  &ref_owner, NULL);
+		BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
+		level = (int)ref_owner;
+		/* FIXME: get real generation */
+		generation = 0;
+#else
+		BUG();
+#endif
+	}
+
+	btrfs_release_path(rc->extent_root, path);
+
+	BUG_ON(level == -1);
+
+	block = kmalloc(sizeof(*block), GFP_NOFS);
+	if (!block)
+		return -ENOMEM;
+
+	block->bytenr = extent_key->objectid;
+	block->key.objectid = extent_key->offset;
+	block->key.offset = generation;
+	block->level = level;
+	block->key_ready = 0;
+
+	rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
+	BUG_ON(rb_node);
+
+	return 0;
+}
+
+/*
+ * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY
+ */
+static int __add_tree_block(struct reloc_control *rc,
+			    u64 bytenr, u32 blocksize,
+			    struct rb_root *blocks)
+{
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	int ret;
+
+	if (tree_block_processed(bytenr, blocksize, rc))
+		return 0;
+
+	if (tree_search(blocks, bytenr))
+		return 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	key.objectid = bytenr;
+	key.type = BTRFS_EXTENT_ITEM_KEY;
+	key.offset = blocksize;
+
+	path->search_commit_root = 1;
+	path->skip_locking = 1;
+	ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out;
+	BUG_ON(ret);
+
+	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+	ret = add_tree_block(rc, &key, path, blocks);
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
+/*
+ * helper to check if the block use full backrefs for pointers in it
+ */
+static int block_use_full_backref(struct reloc_control *rc,
+				  struct extent_buffer *eb)
+{
+	struct btrfs_path *path;
+	struct btrfs_extent_item *ei;
+	struct btrfs_key key;
+	u64 flags;
+	int ret;
+
+	if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) ||
+	    btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV)
+		return 1;
+
+	path = btrfs_alloc_path();
+	BUG_ON(!path);
+
+	key.objectid = eb->start;
+	key.type = BTRFS_EXTENT_ITEM_KEY;
+	key.offset = eb->len;
+
+	path->search_commit_root = 1;
+	path->skip_locking = 1;
+	ret = btrfs_search_slot(NULL, rc->extent_root,
+				&key, path, 0, 0);
+	BUG_ON(ret);
+
+	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+			    struct btrfs_extent_item);
+	flags = btrfs_extent_flags(path->nodes[0], ei);
+	BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
+	if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+		ret = 1;
+	else
+		ret = 0;
+	btrfs_free_path(path);
+	return ret;
+}
+
+/*
+ * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
+ * this function scans fs tree to find blocks reference the data extent
+ */
+static int find_data_references(struct reloc_control *rc,
+				struct btrfs_key *extent_key,
+				struct extent_buffer *leaf,
+				struct btrfs_extent_data_ref *ref,
+				struct rb_root *blocks)
+{
+	struct btrfs_path *path;
+	struct tree_block *block;
+	struct btrfs_root *root;
+	struct btrfs_file_extent_item *fi;
+	struct rb_node *rb_node;
+	struct btrfs_key key;
+	u64 ref_root;
+	u64 ref_objectid;
+	u64 ref_offset;
+	u32 ref_count;
+	u32 nritems;
+	int err = 0;
+	int added = 0;
+	int counted;
+	int ret;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	ref_root = btrfs_extent_data_ref_root(leaf, ref);
+	ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
+	ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
+	ref_count = btrfs_extent_data_ref_count(leaf, ref);
+
+	root = read_fs_root(rc->extent_root->fs_info, ref_root);
+	if (IS_ERR(root)) {
+		err = PTR_ERR(root);
+		goto out;
+	}
+
+	key.objectid = ref_objectid;
+	key.offset = ref_offset;
+	key.type = BTRFS_EXTENT_DATA_KEY;
+
+	path->search_commit_root = 1;
+	path->skip_locking = 1;
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0) {
+		err = ret;
+		goto out;
+	}
+
+	leaf = path->nodes[0];
+	nritems = btrfs_header_nritems(leaf);
+	/*
+	 * the references in tree blocks that use full backrefs
+	 * are not counted in
+	 */
+	if (block_use_full_backref(rc, leaf))
+		counted = 0;
+	else
+		counted = 1;
+	rb_node = tree_search(blocks, leaf->start);
+	if (rb_node) {
+		if (counted)
+			added = 1;
+		else
+			path->slots[0] = nritems;
+	}
+
+	while (ref_count > 0) {
+		while (path->slots[0] >= nritems) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0) {
+				err = ret;
+				goto out;
+			}
+			if (ret > 0) {
+				WARN_ON(1);
+				goto out;
+			}
+
+			leaf = path->nodes[0];
+			nritems = btrfs_header_nritems(leaf);
+			added = 0;
+
+			if (block_use_full_backref(rc, leaf))
+				counted = 0;
+			else
+				counted = 1;
+			rb_node = tree_search(blocks, leaf->start);
+			if (rb_node) {
+				if (counted)
+					added = 1;
+				else
+					path->slots[0] = nritems;
+			}
+		}
+
+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+		if (key.objectid != ref_objectid ||
+		    key.type != BTRFS_EXTENT_DATA_KEY) {
+			WARN_ON(1);
+			break;
+		}
+
+		fi = btrfs_item_ptr(leaf, path->slots[0],
+				    struct btrfs_file_extent_item);
+
+		if (btrfs_file_extent_type(leaf, fi) ==
+		    BTRFS_FILE_EXTENT_INLINE)
+			goto next;
+
+		if (btrfs_file_extent_disk_bytenr(leaf, fi) !=
+		    extent_key->objectid)
+			goto next;
+
+		key.offset -= btrfs_file_extent_offset(leaf, fi);
+		if (key.offset != ref_offset)
+			goto next;
+
+		if (counted)
+			ref_count--;
+		if (added)
+			goto next;
+
+		if (!tree_block_processed(leaf->start, leaf->len, rc)) {
+			block = kmalloc(sizeof(*block), GFP_NOFS);
+			if (!block) {
+				err = -ENOMEM;
+				break;
+			}
+			block->bytenr = leaf->start;
+			btrfs_item_key_to_cpu(leaf, &block->key, 0);
+			block->level = 0;
+			block->key_ready = 1;
+			rb_node = tree_insert(blocks, block->bytenr,
+					      &block->rb_node);
+			BUG_ON(rb_node);
+		}
+		if (counted)
+			added = 1;
+		else
+			path->slots[0] = nritems;
+next:
+		path->slots[0]++;
+
+	}
+out:
+	btrfs_free_path(path);
+	return err;
+}
+
+/*
+ * hepler to find all tree blocks that reference a given data extent
+ */
+static noinline_for_stack
+int add_data_references(struct reloc_control *rc,
+			struct btrfs_key *extent_key,
+			struct btrfs_path *path,
+			struct rb_root *blocks)
+{
+	struct btrfs_key key;
+	struct extent_buffer *eb;
+	struct btrfs_extent_data_ref *dref;
+	struct btrfs_extent_inline_ref *iref;
+	unsigned long ptr;
+	unsigned long end;
+	u32 blocksize;
+	int ret;
+	int err = 0;
+
+	ret = get_new_location(rc->data_inode, NULL, extent_key->objectid,
+			       extent_key->offset);
+	BUG_ON(ret < 0);
+	if (ret > 0) {
+		/* the relocated data is fragmented */
+		rc->extents_skipped++;
+		btrfs_release_path(rc->extent_root, path);
+		return 0;
+	}
+
+	blocksize = btrfs_level_size(rc->extent_root, 0);
+
+	eb = path->nodes[0];
+	ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
+	end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	if (ptr + sizeof(struct btrfs_extent_item_v0) == end)
+		ptr = end;
+	else
+#endif
+		ptr += sizeof(struct btrfs_extent_item);
+
+	while (ptr < end) {
+		iref = (struct btrfs_extent_inline_ref *)ptr;
+		key.type = btrfs_extent_inline_ref_type(eb, iref);
+		if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
+			key.offset = btrfs_extent_inline_ref_offset(eb, iref);
+			ret = __add_tree_block(rc, key.offset, blocksize,
+					       blocks);
+		} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
+			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+			ret = find_data_references(rc, extent_key,
+						   eb, dref, blocks);
+		} else {
+			BUG();
+		}
+		ptr += btrfs_extent_inline_ref_size(key.type);
+	}
+	WARN_ON(ptr > end);
+
+	while (1) {
+		cond_resched();
+		eb = path->nodes[0];
+		if (path->slots[0] >= btrfs_header_nritems(eb)) {
+			ret = btrfs_next_leaf(rc->extent_root, path);
+			if (ret < 0) {
+				err = ret;
+				break;
+			}
+			if (ret > 0)
+				break;
+			eb = path->nodes[0];
+		}
+
+		btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
+		if (key.objectid != extent_key->objectid)
+			break;
+
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+		if (key.type == BTRFS_SHARED_DATA_REF_KEY ||
+		    key.type == BTRFS_EXTENT_REF_V0_KEY) {
+#else
+		BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
+		if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
+#endif
+			ret = __add_tree_block(rc, key.offset, blocksize,
+					       blocks);
+		} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
+			dref = btrfs_item_ptr(eb, path->slots[0],
+					      struct btrfs_extent_data_ref);
+			ret = find_data_references(rc, extent_key,
+						   eb, dref, blocks);
+		} else {
+			ret = 0;
+		}
+		if (ret) {
+			err = ret;
+			break;
+		}
+		path->slots[0]++;
+	}
+	btrfs_release_path(rc->extent_root, path);
+	if (err)
+		free_block_list(blocks);
+	return err;
+}
+
+/*
+ * hepler to find next unprocessed extent
+ */
+static noinline_for_stack
+int find_next_extent(struct btrfs_trans_handle *trans,
+		     struct reloc_control *rc, struct btrfs_path *path)
+{
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	u64 start, end, last;
+	int ret;
+
+	last = rc->block_group->key.objectid + rc->block_group->key.offset;
+	while (1) {
+		cond_resched();
+		if (rc->search_start >= last) {
+			ret = 1;
+			break;
+		}
+
+		key.objectid = rc->search_start;
+		key.type = BTRFS_EXTENT_ITEM_KEY;
+		key.offset = 0;
+
+		path->search_commit_root = 1;
+		path->skip_locking = 1;
+		ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
+					0, 0);
+		if (ret < 0)
+			break;
+next:
+		leaf = path->nodes[0];
+		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(rc->extent_root, path);
+			if (ret != 0)
+				break;
+			leaf = path->nodes[0];
+		}
+
+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+		if (key.objectid >= last) {
+			ret = 1;
+			break;
+		}
+
+		if (key.type != BTRFS_EXTENT_ITEM_KEY ||
+		    key.objectid + key.offset <= rc->search_start) {
+			path->slots[0]++;
+			goto next;
+		}
+
+		ret = find_first_extent_bit(&rc->processed_blocks,
+					    key.objectid, &start, &end,
+					    EXTENT_DIRTY);
+
+		if (ret == 0 && start <= key.objectid) {
+			btrfs_release_path(rc->extent_root, path);
+			rc->search_start = end + 1;
+		} else {
+			rc->search_start = key.objectid + key.offset;
+			return 0;
+		}
+	}
+	btrfs_release_path(rc->extent_root, path);
+	return ret;
+}
+
+static void set_reloc_control(struct reloc_control *rc)
+{
+	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
+	mutex_lock(&fs_info->trans_mutex);
+	fs_info->reloc_ctl = rc;
+	mutex_unlock(&fs_info->trans_mutex);
+}
+
+static void unset_reloc_control(struct reloc_control *rc)
+{
+	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
+	mutex_lock(&fs_info->trans_mutex);
+	fs_info->reloc_ctl = NULL;
+	mutex_unlock(&fs_info->trans_mutex);
+}
+
+static int check_extent_flags(u64 flags)
+{
+	if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
+	    (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
+		return 1;
+	if (!(flags & BTRFS_EXTENT_FLAG_DATA) &&
+	    !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
+		return 1;
+	if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
+	    (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
+		return 1;
+	return 0;
+}
+
+static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
+{
+	struct rb_root blocks = RB_ROOT;
+	struct btrfs_key key;
+	struct btrfs_trans_handle *trans = NULL;
+	struct btrfs_path *path;
+	struct btrfs_extent_item *ei;
+	unsigned long nr;
+	u64 flags;
+	u32 item_size;
+	int ret;
+	int err = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	rc->search_start = rc->block_group->key.objectid;
+	clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
+			  GFP_NOFS);
+
+	rc->create_reloc_root = 1;
+	set_reloc_control(rc);
+
+	trans = btrfs_start_transaction(rc->extent_root, 1);
+	btrfs_commit_transaction(trans, rc->extent_root);
+
+	while (1) {
+		trans = btrfs_start_transaction(rc->extent_root, 1);
+
+		ret = find_next_extent(trans, rc, path);
+		if (ret < 0)
+			err = ret;
+		if (ret != 0)
+			break;
+
+		rc->extents_found++;
+
+		ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+				    struct btrfs_extent_item);
+		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+		item_size = btrfs_item_size_nr(path->nodes[0],
+					       path->slots[0]);
+		if (item_size >= sizeof(*ei)) {
+			flags = btrfs_extent_flags(path->nodes[0], ei);
+			ret = check_extent_flags(flags);
+			BUG_ON(ret);
+
+		} else {
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+			u64 ref_owner;
+			int path_change = 0;
+
+			BUG_ON(item_size !=
+			       sizeof(struct btrfs_extent_item_v0));
+			ret = get_ref_objectid_v0(rc, path, &key, &ref_owner,
+						  &path_change);
+			if (ref_owner < BTRFS_FIRST_FREE_OBJECTID)
+				flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
+			else
+				flags = BTRFS_EXTENT_FLAG_DATA;
+
+			if (path_change) {
+				btrfs_release_path(rc->extent_root, path);
+
+				path->search_commit_root = 1;
+				path->skip_locking = 1;
+				ret = btrfs_search_slot(NULL, rc->extent_root,
+							&key, path, 0, 0);
+				if (ret < 0) {
+					err = ret;
+					break;
+				}
+				BUG_ON(ret > 0);
+			}
+#else
+			BUG();
+#endif
+		}
+
+		if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+			ret = add_tree_block(rc, &key, path, &blocks);
+		} else if (rc->stage == UPDATE_DATA_PTRS &&
+			 (flags & BTRFS_EXTENT_FLAG_DATA)) {
+			ret = add_data_references(rc, &key, path, &blocks);
+		} else {
+			btrfs_release_path(rc->extent_root, path);
+			ret = 0;
+		}
+		if (ret < 0) {
+			err = 0;
+			break;
+		}
+
+		if (!RB_EMPTY_ROOT(&blocks)) {
+			ret = relocate_tree_blocks(trans, rc, &blocks);
+			if (ret < 0) {
+				err = ret;
+				break;
+			}
+		}
+
+		nr = trans->blocks_used;
+		btrfs_end_transaction_throttle(trans, rc->extent_root);
+		trans = NULL;
+		btrfs_btree_balance_dirty(rc->extent_root, nr);
+
+		if (rc->stage == MOVE_DATA_EXTENTS &&
+		    (flags & BTRFS_EXTENT_FLAG_DATA)) {
+			rc->found_file_extent = 1;
+			ret = relocate_data_extent(rc->data_inode, &key);
+			if (ret < 0) {
+				err = ret;
+				break;
+			}
+		}
+	}
+	btrfs_free_path(path);
+
+	if (trans) {
+		nr = trans->blocks_used;
+		btrfs_end_transaction(trans, rc->extent_root);
+		btrfs_btree_balance_dirty(rc->extent_root, nr);
+	}
+
+	rc->create_reloc_root = 0;
+	smp_mb();
+
+	if (rc->extents_found > 0) {
+		trans = btrfs_start_transaction(rc->extent_root, 1);
+		btrfs_commit_transaction(trans, rc->extent_root);
+	}
+
+	merge_reloc_roots(rc);
+
+	unset_reloc_control(rc);
+
+	/* get rid of pinned extents */
+	trans = btrfs_start_transaction(rc->extent_root, 1);
+	btrfs_commit_transaction(trans, rc->extent_root);
+
+	return err;
+}
+
+static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 u64 objectid, u64 size)
+{
+	struct btrfs_path *path;
+	struct btrfs_inode_item *item;
+	struct extent_buffer *leaf;
+	int ret;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	ret = btrfs_insert_empty_inode(trans, root, path, objectid);
+	if (ret)
+		goto out;
+
+	leaf = path->nodes[0];
+	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
+	memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
+	btrfs_set_inode_generation(leaf, item, 1);
+	btrfs_set_inode_size(leaf, item, size);
+	btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
+	btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
+	btrfs_mark_buffer_dirty(leaf);
+	btrfs_release_path(root, path);
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
+/*
+ * helper to create inode for data relocation.
+ * the inode is in data relocation tree and its link count is 0
+ */
+static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
+					struct btrfs_block_group_cache *group)
+{
+	struct inode *inode = NULL;
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *root;
+	struct btrfs_key key;
+	unsigned long nr;
+	u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
+	int err = 0;
+
+	root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
+	if (IS_ERR(root))
+		return ERR_CAST(root);
+
+	trans = btrfs_start_transaction(root, 1);
+	BUG_ON(!trans);
+
+	err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
+	if (err)
+		goto out;
+
+	err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
+	BUG_ON(err);
+
+	err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
+				       group->key.offset, 0, group->key.offset,
+				       0, 0, 0);
+	BUG_ON(err);
+
+	key.objectid = objectid;
+	key.type = BTRFS_INODE_ITEM_KEY;
+	key.offset = 0;
+	inode = btrfs_iget(root->fs_info->sb, &key, root);
+	BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
+	BTRFS_I(inode)->index_cnt = group->key.objectid;
+
+	err = btrfs_orphan_add(trans, inode);
+out:
+	nr = trans->blocks_used;
+	btrfs_end_transaction(trans, root);
+
+	btrfs_btree_balance_dirty(root, nr);
+	if (err) {
+		if (inode)
+			iput(inode);
+		inode = ERR_PTR(err);
+	}
+	return inode;
+}
+
+/*
+ * function to relocate all extents in a block group.
+ */
+int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
+{
+	struct btrfs_fs_info *fs_info = extent_root->fs_info;
+	struct reloc_control *rc;
+	int ret;
+	int err = 0;
+
+	rc = kzalloc(sizeof(*rc), GFP_NOFS);
+	if (!rc)
+		return -ENOMEM;
+
+	mapping_tree_init(&rc->reloc_root_tree);
+	extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS);
+	INIT_LIST_HEAD(&rc->reloc_roots);
+
+	rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
+	BUG_ON(!rc->block_group);
+
+	btrfs_init_workers(&rc->workers, "relocate",
+			   fs_info->thread_pool_size);
+
+	rc->extent_root = extent_root;
+	btrfs_prepare_block_group_relocation(extent_root, rc->block_group);
+
+	rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
+	if (IS_ERR(rc->data_inode)) {
+		err = PTR_ERR(rc->data_inode);
+		rc->data_inode = NULL;
+		goto out;
+	}
+
+	printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
+	       (unsigned long long)rc->block_group->key.objectid,
+	       (unsigned long long)rc->block_group->flags);
+
+	btrfs_start_delalloc_inodes(fs_info->tree_root);
+	btrfs_wait_ordered_extents(fs_info->tree_root, 0);
+
+	while (1) {
+		mutex_lock(&fs_info->cleaner_mutex);
+		btrfs_clean_old_snapshots(fs_info->tree_root);
+		mutex_unlock(&fs_info->cleaner_mutex);
+
+		rc->extents_found = 0;
+		rc->extents_skipped = 0;
+
+		ret = relocate_block_group(rc);
+		if (ret < 0) {
+			err = ret;
+			break;
+		}
+
+		if (rc->extents_found == 0)
+			break;
+
+		printk(KERN_INFO "btrfs: found %llu extents\n",
+			(unsigned long long)rc->extents_found);
+
+		if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
+			btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1);
+			invalidate_mapping_pages(rc->data_inode->i_mapping,
+						 0, -1);
+			rc->stage = UPDATE_DATA_PTRS;
+		} else if (rc->stage == UPDATE_DATA_PTRS &&
+			   rc->extents_skipped >= rc->extents_found) {
+			iput(rc->data_inode);
+			rc->data_inode = create_reloc_inode(fs_info,
+							    rc->block_group);
+			if (IS_ERR(rc->data_inode)) {
+				err = PTR_ERR(rc->data_inode);
+				rc->data_inode = NULL;
+				break;
+			}
+			rc->stage = MOVE_DATA_EXTENTS;
+			rc->found_file_extent = 0;
+		}
+	}
+
+	filemap_fdatawrite_range(fs_info->btree_inode->i_mapping,
+				 rc->block_group->key.objectid,
+				 rc->block_group->key.objectid +
+				 rc->block_group->key.offset - 1);
+
+	WARN_ON(rc->block_group->pinned > 0);
+	WARN_ON(rc->block_group->reserved > 0);
+	WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
+out:
+	iput(rc->data_inode);
+	btrfs_stop_workers(&rc->workers);
+	btrfs_put_block_group(rc->block_group);
+	kfree(rc);
+	return err;
+}
+
+/*
+ * recover relocation interrupted by system crash.
+ *
+ * this function resumes merging reloc trees with corresponding fs trees.
+ * this is important for keeping the sharing of tree blocks
+ */
+int btrfs_recover_relocation(struct btrfs_root *root)
+{
+	LIST_HEAD(reloc_roots);
+	struct btrfs_key key;
+	struct btrfs_root *fs_root;
+	struct btrfs_root *reloc_root;
+	struct btrfs_path *path;
+	struct extent_buffer *leaf;
+	struct reloc_control *rc = NULL;
+	struct btrfs_trans_handle *trans;
+	int ret;
+	int err = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	key.objectid = BTRFS_TREE_RELOC_OBJECTID;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = (u64)-1;
+
+	while (1) {
+		ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key,
+					path, 0, 0);
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+		if (ret > 0) {
+			if (path->slots[0] == 0)
+				break;
+			path->slots[0]--;
+		}
+		leaf = path->nodes[0];
+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+		btrfs_release_path(root->fs_info->tree_root, path);
+
+		if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
+		    key.type != BTRFS_ROOT_ITEM_KEY)
+			break;
+
+		reloc_root = btrfs_read_fs_root_no_radix(root, &key);
+		if (IS_ERR(reloc_root)) {
+			err = PTR_ERR(reloc_root);
+			goto out;
+		}
+
+		list_add(&reloc_root->root_list, &reloc_roots);
+
+		if (btrfs_root_refs(&reloc_root->root_item) > 0) {
+			fs_root = read_fs_root(root->fs_info,
+					       reloc_root->root_key.offset);
+			if (IS_ERR(fs_root)) {
+				err = PTR_ERR(fs_root);
+				goto out;
+			}
+		}
+
+		if (key.offset == 0)
+			break;
+
+		key.offset--;
+	}
+	btrfs_release_path(root->fs_info->tree_root, path);
+
+	if (list_empty(&reloc_roots))
+		goto out;
+
+	rc = kzalloc(sizeof(*rc), GFP_NOFS);
+	if (!rc) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	mapping_tree_init(&rc->reloc_root_tree);
+	INIT_LIST_HEAD(&rc->reloc_roots);
+	btrfs_init_workers(&rc->workers, "relocate",
+			   root->fs_info->thread_pool_size);
+	rc->extent_root = root->fs_info->extent_root;
+
+	set_reloc_control(rc);
+
+	while (!list_empty(&reloc_roots)) {
+		reloc_root = list_entry(reloc_roots.next,
+					struct btrfs_root, root_list);
+		list_del(&reloc_root->root_list);
+
+		if (btrfs_root_refs(&reloc_root->root_item) == 0) {
+			list_add_tail(&reloc_root->root_list,
+				      &rc->reloc_roots);
+			continue;
+		}
+
+		fs_root = read_fs_root(root->fs_info,
+				       reloc_root->root_key.offset);
+		BUG_ON(IS_ERR(fs_root));
+
+		__add_reloc_root(reloc_root);
+		fs_root->reloc_root = reloc_root;
+	}
+
+	trans = btrfs_start_transaction(rc->extent_root, 1);
+	btrfs_commit_transaction(trans, rc->extent_root);
+
+	merge_reloc_roots(rc);
+
+	unset_reloc_control(rc);
+
+	trans = btrfs_start_transaction(rc->extent_root, 1);
+	btrfs_commit_transaction(trans, rc->extent_root);
+out:
+	if (rc) {
+		btrfs_stop_workers(&rc->workers);
+		kfree(rc);
+	}
+	while (!list_empty(&reloc_roots)) {
+		reloc_root = list_entry(reloc_roots.next,
+					struct btrfs_root, root_list);
+		list_del(&reloc_root->root_list);
+		free_extent_buffer(reloc_root->node);
+		free_extent_buffer(reloc_root->commit_root);
+		kfree(reloc_root);
+	}
+	btrfs_free_path(path);
+
+	if (err == 0) {
+		/* cleanup orphan inode in data relocation tree */
+		fs_root = read_fs_root(root->fs_info,
+				       BTRFS_DATA_RELOC_TREE_OBJECTID);
+		if (IS_ERR(fs_root))
+			err = PTR_ERR(fs_root);
+	}
+	return err;
+}
+
+/*
+ * helper to add ordered checksum for data relocation.
+ *
+ * cloning checksum properly handles the nodatasum extents.
+ * it also saves CPU time to re-calculate the checksum.
+ */
+int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
+{
+	struct btrfs_ordered_sum *sums;
+	struct btrfs_sector_sum *sector_sum;
+	struct btrfs_ordered_extent *ordered;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	size_t offset;
+	int ret;
+	u64 disk_bytenr;
+	LIST_HEAD(list);
+
+	ordered = btrfs_lookup_ordered_extent(inode, file_pos);
+	BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
+
+	disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
+	ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
+				       disk_bytenr + len - 1, &list);
+
+	while (!list_empty(&list)) {
+		sums = list_entry(list.next, struct btrfs_ordered_sum, list);
+		list_del_init(&sums->list);
+
+		sector_sum = sums->sums;
+		sums->bytenr = ordered->start;
+
+		offset = 0;
+		while (offset < sums->len) {
+			sector_sum->bytenr += ordered->start - disk_bytenr;
+			sector_sum++;
+			offset += root->sectorsize;
+		}
+
+		btrfs_add_ordered_sum(inode, ordered, sums);
+	}
+	btrfs_put_ordered_extent(ordered);
+	return 0;
+}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index b48650d..0ddc6d6 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -111,6 +111,15 @@
 	return ret;
 }
 
+int btrfs_set_root_node(struct btrfs_root_item *item,
+			struct extent_buffer *node)
+{
+	btrfs_set_root_bytenr(item, node->start);
+	btrfs_set_root_level(item, btrfs_header_level(node));
+	btrfs_set_root_generation(item, btrfs_header_generation(node));
+	return 0;
+}
+
 /*
  * copy the data in 'item' into the btree
  */
@@ -164,8 +173,7 @@
  * offset lower than the latest root.  They need to be queued for deletion to
  * finish what was happening when we crashed.
  */
-int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid,
-			  struct btrfs_root *latest)
+int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
 {
 	struct btrfs_root *dead_root;
 	struct btrfs_item *item;
@@ -227,10 +235,7 @@
 			goto err;
 		}
 
-		if (objectid == BTRFS_TREE_RELOC_OBJECTID)
-			ret = btrfs_add_dead_reloc_root(dead_root);
-		else
-			ret = btrfs_add_dead_root(dead_root, latest);
+		ret = btrfs_add_dead_root(dead_root);
 		if (ret)
 			goto err;
 		goto again;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2ff7cd2..9f179d4 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -52,7 +52,6 @@
 #include "export.h"
 #include "compression.h"
 
-
 static struct super_operations btrfs_super_ops;
 
 static void btrfs_put_super(struct super_block *sb)
@@ -67,8 +66,8 @@
 enum {
 	Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
 	Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
-	Opt_ssd, Opt_thread_pool, Opt_noacl,  Opt_compress, Opt_notreelog,
-	Opt_ratio, Opt_flushoncommit, Opt_err,
+	Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl,
+	Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -84,6 +83,8 @@
 	{Opt_thread_pool, "thread_pool=%d"},
 	{Opt_compress, "compress"},
 	{Opt_ssd, "ssd"},
+	{Opt_ssd_spread, "ssd_spread"},
+	{Opt_nossd, "nossd"},
 	{Opt_noacl, "noacl"},
 	{Opt_notreelog, "notreelog"},
 	{Opt_flushoncommit, "flushoncommit"},
@@ -158,7 +159,7 @@
 			 */
 			break;
 		case Opt_nodatasum:
-			printk(KERN_INFO "btrfs: setting nodatacsum\n");
+			printk(KERN_INFO "btrfs: setting nodatasum\n");
 			btrfs_set_opt(info->mount_opt, NODATASUM);
 			break;
 		case Opt_nodatacow:
@@ -174,6 +175,19 @@
 			printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
 			btrfs_set_opt(info->mount_opt, SSD);
 			break;
+		case Opt_ssd_spread:
+			printk(KERN_INFO "btrfs: use spread ssd "
+			       "allocation scheme\n");
+			btrfs_set_opt(info->mount_opt, SSD);
+			btrfs_set_opt(info->mount_opt, SSD_SPREAD);
+			break;
+		case Opt_nossd:
+			printk(KERN_INFO "btrfs: not using ssd allocation "
+			       "scheme\n");
+			btrfs_set_opt(info->mount_opt, NOSSD);
+			btrfs_clear_opt(info->mount_opt, SSD);
+			btrfs_clear_opt(info->mount_opt, SSD_SPREAD);
+			break;
 		case Opt_nobarrier:
 			printk(KERN_INFO "btrfs: turning off barriers\n");
 			btrfs_set_opt(info->mount_opt, NOBARRIER);
@@ -322,7 +336,7 @@
 	struct dentry *root_dentry;
 	struct btrfs_super_block *disk_super;
 	struct btrfs_root *tree_root;
-	struct btrfs_inode *bi;
+	struct btrfs_key key;
 	int err;
 
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -341,23 +355,15 @@
 	}
 	sb->s_fs_info = tree_root;
 	disk_super = &tree_root->fs_info->super_copy;
-	inode = btrfs_iget_locked(sb, BTRFS_FIRST_FREE_OBJECTID,
-				  tree_root->fs_info->fs_root);
-	bi = BTRFS_I(inode);
-	bi->location.objectid = inode->i_ino;
-	bi->location.offset = 0;
-	bi->root = tree_root->fs_info->fs_root;
 
-	btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
-
-	if (!inode) {
-		err = -ENOMEM;
+	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+	key.type = BTRFS_INODE_ITEM_KEY;
+	key.offset = 0;
+	inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
 		goto fail_close;
 	}
-	if (inode->i_state & I_NEW) {
-		btrfs_read_locked_inode(inode);
-		unlock_new_inode(inode);
-	}
 
 	root_dentry = d_alloc_root(inode);
 	if (!root_dentry) {
@@ -388,10 +394,6 @@
 	struct btrfs_root *root = btrfs_sb(sb);
 	int ret;
 
-	if (sb->s_flags & MS_RDONLY)
-		return 0;
-
-	sb->s_dirt = 0;
 	if (!wait) {
 		filemap_flush(root->fs_info->btree_inode->i_mapping);
 		return 0;
@@ -402,7 +404,6 @@
 
 	trans = btrfs_start_transaction(root, 1);
 	ret = btrfs_commit_transaction(trans, root);
-	sb->s_dirt = 0;
 	return ret;
 }
 
@@ -433,7 +434,11 @@
 		seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
 	if (btrfs_test_opt(root, COMPRESS))
 		seq_puts(seq, ",compress");
-	if (btrfs_test_opt(root, SSD))
+	if (btrfs_test_opt(root, NOSSD))
+		seq_puts(seq, ",nossd");
+	if (btrfs_test_opt(root, SSD_SPREAD))
+		seq_puts(seq, ",ssd_spread");
+	else if (btrfs_test_opt(root, SSD))
 		seq_puts(seq, ",ssd");
 	if (btrfs_test_opt(root, NOTREELOG))
 		seq_puts(seq, ",notreelog");
@@ -444,11 +449,6 @@
 	return 0;
 }
 
-static void btrfs_write_super(struct super_block *sb)
-{
-	sb->s_dirt = 0;
-}
-
 static int btrfs_test_super(struct super_block *s, void *data)
 {
 	struct btrfs_fs_devices *test_fs_devices = data;
@@ -584,7 +584,8 @@
 		if (btrfs_super_log_root(&root->fs_info->super_copy) != 0)
 			return -EINVAL;
 
-		ret = btrfs_cleanup_reloc_trees(root);
+		/* recover relocation */
+		ret = btrfs_recover_relocation(root);
 		WARN_ON(ret);
 
 		ret = btrfs_cleanup_fs_roots(root->fs_info);
@@ -678,7 +679,6 @@
 static struct super_operations btrfs_super_ops = {
 	.delete_inode	= btrfs_delete_inode,
 	.put_super	= btrfs_put_super,
-	.write_super	= btrfs_write_super,
 	.sync_fs	= btrfs_sync_fs,
 	.show_options	= btrfs_show_options,
 	.write_inode	= btrfs_write_inode,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 01b1436..2e177d7 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -25,7 +25,6 @@
 #include "disk-io.h"
 #include "transaction.h"
 #include "locking.h"
-#include "ref-cache.h"
 #include "tree-log.h"
 
 #define BTRFS_ROOT_TRANS_TAG 0
@@ -94,48 +93,40 @@
  * to make sure the old root from before we joined the transaction is deleted
  * when the transaction commits
  */
-noinline int btrfs_record_root_in_trans(struct btrfs_root *root)
+static noinline int record_root_in_trans(struct btrfs_trans_handle *trans,
+					 struct btrfs_root *root)
 {
-	struct btrfs_dirty_root *dirty;
-	u64 running_trans_id = root->fs_info->running_transaction->transid;
-	if (root->ref_cows && root->last_trans < running_trans_id) {
+	if (root->ref_cows && root->last_trans < trans->transid) {
 		WARN_ON(root == root->fs_info->extent_root);
-		if (root->root_item.refs != 0) {
-			radix_tree_tag_set(&root->fs_info->fs_roots_radix,
-				   (unsigned long)root->root_key.objectid,
-				   BTRFS_ROOT_TRANS_TAG);
+		WARN_ON(root->root_item.refs == 0);
+		WARN_ON(root->commit_root != root->node);
 
-			dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
-			BUG_ON(!dirty);
-			dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS);
-			BUG_ON(!dirty->root);
-			dirty->latest_root = root;
-			INIT_LIST_HEAD(&dirty->list);
-
-			root->commit_root = btrfs_root_node(root);
-
-			memcpy(dirty->root, root, sizeof(*root));
-			spin_lock_init(&dirty->root->node_lock);
-			spin_lock_init(&dirty->root->list_lock);
-			mutex_init(&dirty->root->objectid_mutex);
-			mutex_init(&dirty->root->log_mutex);
-			INIT_LIST_HEAD(&dirty->root->dead_list);
-			dirty->root->node = root->commit_root;
-			dirty->root->commit_root = NULL;
-
-			spin_lock(&root->list_lock);
-			list_add(&dirty->root->dead_list, &root->dead_list);
-			spin_unlock(&root->list_lock);
-
-			root->dirty_root = dirty;
-		} else {
-			WARN_ON(1);
-		}
-		root->last_trans = running_trans_id;
+		radix_tree_tag_set(&root->fs_info->fs_roots_radix,
+			   (unsigned long)root->root_key.objectid,
+			   BTRFS_ROOT_TRANS_TAG);
+		root->last_trans = trans->transid;
+		btrfs_init_reloc_root(trans, root);
 	}
 	return 0;
 }
 
+int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+			       struct btrfs_root *root)
+{
+	if (!root->ref_cows)
+		return 0;
+
+	mutex_lock(&root->fs_info->trans_mutex);
+	if (root->last_trans == trans->transid) {
+		mutex_unlock(&root->fs_info->trans_mutex);
+		return 0;
+	}
+
+	record_root_in_trans(trans, root);
+	mutex_unlock(&root->fs_info->trans_mutex);
+	return 0;
+}
+
 /* wait for commit against the current transaction to become unblocked
  * when this is done, it is safe to start a new transaction, but the current
  * transaction might not be fully on disk.
@@ -181,7 +172,6 @@
 	ret = join_transaction(root);
 	BUG_ON(ret);
 
-	btrfs_record_root_in_trans(root);
 	h->transid = root->fs_info->running_transaction->transid;
 	h->transaction = root->fs_info->running_transaction;
 	h->blocks_reserved = num_blocks;
@@ -192,6 +182,7 @@
 	h->delayed_ref_updates = 0;
 
 	root->fs_info->running_transaction->use_count++;
+	record_root_in_trans(h, root);
 	mutex_unlock(&root->fs_info->trans_mutex);
 	return h;
 }
@@ -233,6 +224,7 @@
 	return 0;
 }
 
+#if 0
 /*
  * rate limit against the drop_snapshot code.  This helps to slow down new
  * operations if the drop_snapshot code isn't able to keep up.
@@ -273,6 +265,7 @@
 			goto harder;
 	}
 }
+#endif
 
 void btrfs_throttle(struct btrfs_root *root)
 {
@@ -280,7 +273,6 @@
 	if (!root->fs_info->open_ioctl_trans)
 		wait_current_trans(root);
 	mutex_unlock(&root->fs_info->trans_mutex);
-	throttle_on_drops(root);
 }
 
 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
@@ -323,9 +315,6 @@
 	memset(trans, 0, sizeof(*trans));
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
 
-	if (throttle)
-		throttle_on_drops(root);
-
 	return 0;
 }
 
@@ -462,12 +451,8 @@
 		old_root_bytenr = btrfs_root_bytenr(&root->root_item);
 		if (old_root_bytenr == root->node->start)
 			break;
-		btrfs_set_root_bytenr(&root->root_item,
-				       root->node->start);
-		btrfs_set_root_level(&root->root_item,
-				     btrfs_header_level(root->node));
-		btrfs_set_root_generation(&root->root_item, trans->transid);
 
+		btrfs_set_root_node(&root->root_item, root->node);
 		ret = btrfs_update_root(trans, tree_root,
 					&root->root_key,
 					&root->root_item);
@@ -477,14 +462,16 @@
 		ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
 		BUG_ON(ret);
 	}
+	free_extent_buffer(root->commit_root);
+	root->commit_root = btrfs_root_node(root);
 	return 0;
 }
 
 /*
  * update all the cowonly tree roots on disk
  */
-int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root)
+static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
+					 struct btrfs_root *root)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct list_head *next;
@@ -520,118 +507,54 @@
  * a dirty root struct and adds it into the list of dead roots that need to
  * be deleted
  */
-int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest)
+int btrfs_add_dead_root(struct btrfs_root *root)
 {
-	struct btrfs_dirty_root *dirty;
-
-	dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
-	if (!dirty)
-		return -ENOMEM;
-	dirty->root = root;
-	dirty->latest_root = latest;
-
 	mutex_lock(&root->fs_info->trans_mutex);
-	list_add(&dirty->list, &latest->fs_info->dead_roots);
+	list_add(&root->root_list, &root->fs_info->dead_roots);
 	mutex_unlock(&root->fs_info->trans_mutex);
 	return 0;
 }
 
 /*
- * at transaction commit time we need to schedule the old roots for
- * deletion via btrfs_drop_snapshot.  This runs through all the
- * reference counted roots that were modified in the current
- * transaction and puts them into the drop list
+ * update all the cowonly tree roots on disk
  */
-static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
-				    struct radix_tree_root *radix,
-				    struct list_head *list)
+static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
+				    struct btrfs_root *root)
 {
-	struct btrfs_dirty_root *dirty;
 	struct btrfs_root *gang[8];
-	struct btrfs_root *root;
+	struct btrfs_fs_info *fs_info = root->fs_info;
 	int i;
 	int ret;
 	int err = 0;
-	u32 refs;
 
 	while (1) {
-		ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0,
+		ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
+						 (void **)gang, 0,
 						 ARRAY_SIZE(gang),
 						 BTRFS_ROOT_TRANS_TAG);
 		if (ret == 0)
 			break;
 		for (i = 0; i < ret; i++) {
 			root = gang[i];
-			radix_tree_tag_clear(radix,
-				     (unsigned long)root->root_key.objectid,
-				     BTRFS_ROOT_TRANS_TAG);
-
-			BUG_ON(!root->ref_tree);
-			dirty = root->dirty_root;
+			radix_tree_tag_clear(&fs_info->fs_roots_radix,
+					(unsigned long)root->root_key.objectid,
+					BTRFS_ROOT_TRANS_TAG);
 
 			btrfs_free_log(trans, root);
-			btrfs_free_reloc_root(trans, root);
+			btrfs_update_reloc_root(trans, root);
 
-			if (root->commit_root == root->node) {
-				WARN_ON(root->node->start !=
-					btrfs_root_bytenr(&root->root_item));
-
-				free_extent_buffer(root->commit_root);
-				root->commit_root = NULL;
-				root->dirty_root = NULL;
-
-				spin_lock(&root->list_lock);
-				list_del_init(&dirty->root->dead_list);
-				spin_unlock(&root->list_lock);
-
-				kfree(dirty->root);
-				kfree(dirty);
-
-				/* make sure to update the root on disk
-				 * so we get any updates to the block used
-				 * counts
-				 */
-				err = btrfs_update_root(trans,
-						root->fs_info->tree_root,
-						&root->root_key,
-						&root->root_item);
+			if (root->commit_root == root->node)
 				continue;
-			}
 
-			memset(&root->root_item.drop_progress, 0,
-			       sizeof(struct btrfs_disk_key));
-			root->root_item.drop_level = 0;
-			root->commit_root = NULL;
-			root->dirty_root = NULL;
-			root->root_key.offset = root->fs_info->generation;
-			btrfs_set_root_bytenr(&root->root_item,
-					      root->node->start);
-			btrfs_set_root_level(&root->root_item,
-					     btrfs_header_level(root->node));
-			btrfs_set_root_generation(&root->root_item,
-						  root->root_key.offset);
+			free_extent_buffer(root->commit_root);
+			root->commit_root = btrfs_root_node(root);
 
-			err = btrfs_insert_root(trans, root->fs_info->tree_root,
+			btrfs_set_root_node(&root->root_item, root->node);
+			err = btrfs_update_root(trans, fs_info->tree_root,
 						&root->root_key,
 						&root->root_item);
 			if (err)
 				break;
-
-			refs = btrfs_root_refs(&dirty->root->root_item);
-			btrfs_set_root_refs(&dirty->root->root_item, refs - 1);
-			err = btrfs_update_root(trans, root->fs_info->tree_root,
-						&dirty->root->root_key,
-						&dirty->root->root_item);
-
-			BUG_ON(err);
-			if (refs == 1) {
-				list_add(&dirty->list, list);
-			} else {
-				WARN_ON(1);
-				free_extent_buffer(dirty->root->node);
-				kfree(dirty->root);
-				kfree(dirty);
-			}
 		}
 	}
 	return err;
@@ -688,12 +611,8 @@
 				TASK_UNINTERRUPTIBLE);
 		mutex_unlock(&info->trans_mutex);
 
-		atomic_dec(&info->throttles);
-		wake_up(&info->transaction_throttle);
-
 		schedule();
 
-		atomic_inc(&info->throttles);
 		mutex_lock(&info->trans_mutex);
 		finish_wait(&info->transaction_wait, &wait);
 	}
@@ -705,111 +624,61 @@
  * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
  * all of them
  */
-static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
-				     struct list_head *list)
+int btrfs_drop_dead_root(struct btrfs_root *root)
 {
-	struct btrfs_dirty_root *dirty;
 	struct btrfs_trans_handle *trans;
+	struct btrfs_root *tree_root = root->fs_info->tree_root;
 	unsigned long nr;
-	u64 num_bytes;
-	u64 bytes_used;
-	u64 max_useless;
-	int ret = 0;
-	int err;
+	int ret;
 
-	while (!list_empty(list)) {
-		struct btrfs_root *root;
+	while (1) {
+		/*
+		 * we don't want to jump in and create a bunch of
+		 * delayed refs if the transaction is starting to close
+		 */
+		wait_transaction_pre_flush(tree_root->fs_info);
+		trans = btrfs_start_transaction(tree_root, 1);
 
-		dirty = list_entry(list->prev, struct btrfs_dirty_root, list);
-		list_del_init(&dirty->list);
-
-		num_bytes = btrfs_root_used(&dirty->root->root_item);
-		root = dirty->latest_root;
-		atomic_inc(&root->fs_info->throttles);
-
-		while (1) {
-			/*
-			 * we don't want to jump in and create a bunch of
-			 * delayed refs if the transaction is starting to close
-			 */
-			wait_transaction_pre_flush(tree_root->fs_info);
-			trans = btrfs_start_transaction(tree_root, 1);
-
-			/*
-			 * we've joined a transaction, make sure it isn't
-			 * closing right now
-			 */
-			if (trans->transaction->delayed_refs.flushing) {
-				btrfs_end_transaction(trans, tree_root);
-				continue;
-			}
-
-			mutex_lock(&root->fs_info->drop_mutex);
-			ret = btrfs_drop_snapshot(trans, dirty->root);
-			if (ret != -EAGAIN)
-				break;
-			mutex_unlock(&root->fs_info->drop_mutex);
-
-			err = btrfs_update_root(trans,
-					tree_root,
-					&dirty->root->root_key,
-					&dirty->root->root_item);
-			if (err)
-				ret = err;
-			nr = trans->blocks_used;
-			ret = btrfs_end_transaction(trans, tree_root);
-			BUG_ON(ret);
-
-			btrfs_btree_balance_dirty(tree_root, nr);
-			cond_resched();
-		}
-		BUG_ON(ret);
-		atomic_dec(&root->fs_info->throttles);
-		wake_up(&root->fs_info->transaction_throttle);
-
-		num_bytes -= btrfs_root_used(&dirty->root->root_item);
-		bytes_used = btrfs_root_used(&root->root_item);
-		if (num_bytes) {
-			mutex_lock(&root->fs_info->trans_mutex);
-			btrfs_record_root_in_trans(root);
-			mutex_unlock(&root->fs_info->trans_mutex);
-			btrfs_set_root_used(&root->root_item,
-					    bytes_used - num_bytes);
+		/*
+		 * we've joined a transaction, make sure it isn't
+		 * closing right now
+		 */
+		if (trans->transaction->delayed_refs.flushing) {
+			btrfs_end_transaction(trans, tree_root);
+			continue;
 		}
 
-		ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key);
-		if (ret) {
-			BUG();
+		ret = btrfs_drop_snapshot(trans, root);
+		if (ret != -EAGAIN)
 			break;
-		}
-		mutex_unlock(&root->fs_info->drop_mutex);
 
-		spin_lock(&root->list_lock);
-		list_del_init(&dirty->root->dead_list);
-		if (!list_empty(&root->dead_list)) {
-			struct btrfs_root *oldest;
-			oldest = list_entry(root->dead_list.prev,
-					    struct btrfs_root, dead_list);
-			max_useless = oldest->root_key.offset - 1;
-		} else {
-			max_useless = root->root_key.offset - 1;
-		}
-		spin_unlock(&root->list_lock);
+		ret = btrfs_update_root(trans, tree_root,
+					&root->root_key,
+					&root->root_item);
+		if (ret)
+			break;
 
 		nr = trans->blocks_used;
 		ret = btrfs_end_transaction(trans, tree_root);
 		BUG_ON(ret);
 
-		ret = btrfs_remove_leaf_refs(root, max_useless, 0);
-		BUG_ON(ret);
-
-		free_extent_buffer(dirty->root->node);
-		kfree(dirty->root);
-		kfree(dirty);
-
 		btrfs_btree_balance_dirty(tree_root, nr);
 		cond_resched();
 	}
+	BUG_ON(ret);
+
+	ret = btrfs_del_root(trans, tree_root, &root->root_key);
+	BUG_ON(ret);
+
+	nr = trans->blocks_used;
+	ret = btrfs_end_transaction(trans, tree_root);
+	BUG_ON(ret);
+
+	free_extent_buffer(root->node);
+	free_extent_buffer(root->commit_root);
+	kfree(root);
+
+	btrfs_btree_balance_dirty(tree_root, nr);
 	return ret;
 }
 
@@ -839,24 +708,23 @@
 	if (ret)
 		goto fail;
 
-	btrfs_record_root_in_trans(root);
+	record_root_in_trans(trans, root);
 	btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
 	memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
 
 	key.objectid = objectid;
-	key.offset = trans->transid;
+	key.offset = 0;
 	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
 
 	old = btrfs_lock_root_node(root);
 	btrfs_cow_block(trans, root, old, NULL, 0, &old);
+	btrfs_set_lock_blocking(old);
 
 	btrfs_copy_root(trans, root, old, &tmp, objectid);
 	btrfs_tree_unlock(old);
 	free_extent_buffer(old);
 
-	btrfs_set_root_bytenr(new_root_item, tmp->start);
-	btrfs_set_root_level(new_root_item, btrfs_header_level(tmp));
-	btrfs_set_root_generation(new_root_item, trans->transid);
+	btrfs_set_root_node(new_root_item, tmp);
 	ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
 				new_root_item);
 	btrfs_tree_unlock(tmp);
@@ -964,6 +832,24 @@
 	return 0;
 }
 
+static void update_super_roots(struct btrfs_root *root)
+{
+	struct btrfs_root_item *root_item;
+	struct btrfs_super_block *super;
+
+	super = &root->fs_info->super_copy;
+
+	root_item = &root->fs_info->chunk_root->root_item;
+	super->chunk_root = root_item->bytenr;
+	super->chunk_root_generation = root_item->generation;
+	super->chunk_root_level = root_item->level;
+
+	root_item = &root->fs_info->tree_root->root_item;
+	super->root = root_item->bytenr;
+	super->generation = root_item->generation;
+	super->root_level = root_item->level;
+}
+
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root)
 {
@@ -971,8 +857,6 @@
 	unsigned long timeout = 1;
 	struct btrfs_transaction *cur_trans;
 	struct btrfs_transaction *prev_trans = NULL;
-	struct btrfs_root *chunk_root = root->fs_info->chunk_root;
-	struct list_head dirty_fs_roots;
 	struct extent_io_tree *pinned_copy;
 	DEFINE_WAIT(wait);
 	int ret;
@@ -999,7 +883,6 @@
 	BUG_ON(ret);
 
 	mutex_lock(&root->fs_info->trans_mutex);
-	INIT_LIST_HEAD(&dirty_fs_roots);
 	if (cur_trans->in_commit) {
 		cur_trans->use_count++;
 		mutex_unlock(&root->fs_info->trans_mutex);
@@ -1105,41 +988,36 @@
 	 * with the tree-log code.
 	 */
 	mutex_lock(&root->fs_info->tree_log_mutex);
-	/*
-	 * keep tree reloc code from adding new reloc trees
-	 */
-	mutex_lock(&root->fs_info->tree_reloc_mutex);
 
-
-	ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
-			      &dirty_fs_roots);
+	ret = commit_fs_roots(trans, root);
 	BUG_ON(ret);
 
-	/* add_dirty_roots gets rid of all the tree log roots, it is now
+	/* commit_fs_roots gets rid of all the tree log roots, it is now
 	 * safe to free the root of tree log roots
 	 */
 	btrfs_free_log_root_tree(trans, root->fs_info);
 
-	ret = btrfs_commit_tree_roots(trans, root);
+	ret = commit_cowonly_roots(trans, root);
 	BUG_ON(ret);
 
 	cur_trans = root->fs_info->running_transaction;
 	spin_lock(&root->fs_info->new_trans_lock);
 	root->fs_info->running_transaction = NULL;
 	spin_unlock(&root->fs_info->new_trans_lock);
-	btrfs_set_super_generation(&root->fs_info->super_copy,
-				   cur_trans->transid);
-	btrfs_set_super_root(&root->fs_info->super_copy,
-			     root->fs_info->tree_root->node->start);
-	btrfs_set_super_root_level(&root->fs_info->super_copy,
-			   btrfs_header_level(root->fs_info->tree_root->node));
 
-	btrfs_set_super_chunk_root(&root->fs_info->super_copy,
-				   chunk_root->node->start);
-	btrfs_set_super_chunk_root_level(&root->fs_info->super_copy,
-					 btrfs_header_level(chunk_root->node));
-	btrfs_set_super_chunk_root_generation(&root->fs_info->super_copy,
-				btrfs_header_generation(chunk_root->node));
+	btrfs_set_root_node(&root->fs_info->tree_root->root_item,
+			    root->fs_info->tree_root->node);
+	free_extent_buffer(root->fs_info->tree_root->commit_root);
+	root->fs_info->tree_root->commit_root =
+				btrfs_root_node(root->fs_info->tree_root);
+
+	btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
+			    root->fs_info->chunk_root->node);
+	free_extent_buffer(root->fs_info->chunk_root->commit_root);
+	root->fs_info->chunk_root->commit_root =
+				btrfs_root_node(root->fs_info->chunk_root);
+
+	update_super_roots(root);
 
 	if (!root->fs_info->log_root_recovering) {
 		btrfs_set_super_log_root(&root->fs_info->super_copy, 0);
@@ -1153,7 +1031,6 @@
 
 	trans->transaction->blocked = 0;
 
-	wake_up(&root->fs_info->transaction_throttle);
 	wake_up(&root->fs_info->transaction_wait);
 
 	mutex_unlock(&root->fs_info->trans_mutex);
@@ -1170,9 +1047,6 @@
 	btrfs_finish_extent_commit(trans, root, pinned_copy);
 	kfree(pinned_copy);
 
-	btrfs_drop_dead_reloc_roots(root);
-	mutex_unlock(&root->fs_info->tree_reloc_mutex);
-
 	/* do the directory inserts of any pending snapshot creations */
 	finish_pending_snapshots(trans, root->fs_info);
 
@@ -1186,16 +1060,9 @@
 	put_transaction(cur_trans);
 	put_transaction(cur_trans);
 
-	list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
-	if (root->fs_info->closing)
-		list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
-
 	mutex_unlock(&root->fs_info->trans_mutex);
 
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
-
-	if (root->fs_info->closing)
-		drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
 	return ret;
 }
 
@@ -1204,16 +1071,17 @@
  */
 int btrfs_clean_old_snapshots(struct btrfs_root *root)
 {
-	struct list_head dirty_roots;
-	INIT_LIST_HEAD(&dirty_roots);
-again:
-	mutex_lock(&root->fs_info->trans_mutex);
-	list_splice_init(&root->fs_info->dead_roots, &dirty_roots);
-	mutex_unlock(&root->fs_info->trans_mutex);
+	LIST_HEAD(list);
+	struct btrfs_fs_info *fs_info = root->fs_info;
 
-	if (!list_empty(&dirty_roots)) {
-		drop_dirty_roots(root, &dirty_roots);
-		goto again;
+	mutex_lock(&fs_info->trans_mutex);
+	list_splice_init(&fs_info->dead_roots, &list);
+	mutex_unlock(&fs_info->trans_mutex);
+
+	while (!list_empty(&list)) {
+		root = list_entry(list.next, struct btrfs_root, root_list);
+		list_del_init(&root->root_list);
+		btrfs_drop_dead_root(root);
 	}
 	return 0;
 }
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 94f5bde..961c3ee 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -62,12 +62,6 @@
 	struct list_head list;
 };
 
-struct btrfs_dirty_root {
-	struct list_head list;
-	struct btrfs_root *root;
-	struct btrfs_root *latest_root;
-};
-
 static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans,
 					       struct inode *inode)
 {
@@ -100,7 +94,8 @@
 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
 			    struct btrfs_root *root);
 
-int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest);
+int btrfs_add_dead_root(struct btrfs_root *root);
+int btrfs_drop_dead_root(struct btrfs_root *root);
 int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
 int btrfs_clean_old_snapshots(struct btrfs_root *root);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -108,7 +103,8 @@
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root);
 void btrfs_throttle(struct btrfs_root *root);
-int btrfs_record_root_in_trans(struct btrfs_root *root);
+int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root);
 int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
 					struct extent_io_tree *dirty_pages);
 #endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index db5e212..c139222 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -430,18 +430,16 @@
 static noinline struct inode *read_one_inode(struct btrfs_root *root,
 					     u64 objectid)
 {
+	struct btrfs_key key;
 	struct inode *inode;
-	inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
-	if (inode->i_state & I_NEW) {
-		BTRFS_I(inode)->root = root;
-		BTRFS_I(inode)->location.objectid = objectid;
-		BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
-		BTRFS_I(inode)->location.offset = 0;
-		btrfs_read_locked_inode(inode);
-		unlock_new_inode(inode);
 
-	}
-	if (is_bad_inode(inode)) {
+	key.objectid = objectid;
+	key.type = BTRFS_INODE_ITEM_KEY;
+	key.offset = 0;
+	inode = btrfs_iget(root->fs_info->sb, &key, root);
+	if (IS_ERR(inode)) {
+		inode = NULL;
+	} else if (is_bad_inode(inode)) {
 		iput(inode);
 		inode = NULL;
 	}
@@ -541,6 +539,7 @@
 
 	if (found_type == BTRFS_FILE_EXTENT_REG ||
 	    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
+		u64 offset;
 		unsigned long dest_offset;
 		struct btrfs_key ins;
 
@@ -555,6 +554,7 @@
 		ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
 		ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
 		ins.type = BTRFS_EXTENT_ITEM_KEY;
+		offset = key->offset - btrfs_file_extent_offset(eb, item);
 
 		if (ins.objectid > 0) {
 			u64 csum_start;
@@ -569,19 +569,16 @@
 			if (ret == 0) {
 				ret = btrfs_inc_extent_ref(trans, root,
 						ins.objectid, ins.offset,
-						path->nodes[0]->start,
-						root->root_key.objectid,
-						trans->transid, key->objectid);
+						0, root->root_key.objectid,
+						key->objectid, offset);
 			} else {
 				/*
 				 * insert the extent pointer in the extent
 				 * allocation tree
 				 */
-				ret = btrfs_alloc_logged_extent(trans, root,
-						path->nodes[0]->start,
-						root->root_key.objectid,
-						trans->transid, key->objectid,
-						&ins);
+				ret = btrfs_alloc_logged_file_extent(trans,
+						root, root->root_key.objectid,
+						key->objectid, offset, &ins);
 				BUG_ON(ret);
 			}
 			btrfs_release_path(root, path);
@@ -1706,9 +1703,6 @@
 				btrfs_wait_tree_block_writeback(next);
 				btrfs_tree_unlock(next);
 
-				ret = btrfs_drop_leaf_ref(trans, root, next);
-				BUG_ON(ret);
-
 				WARN_ON(root_owner !=
 					BTRFS_TREE_LOG_OBJECTID);
 				ret = btrfs_free_reserved_extent(root,
@@ -1753,10 +1747,6 @@
 		btrfs_wait_tree_block_writeback(next);
 		btrfs_tree_unlock(next);
 
-		if (*level == 0) {
-			ret = btrfs_drop_leaf_ref(trans, root, next);
-			BUG_ON(ret);
-		}
 		WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
 		ret = btrfs_free_reserved_extent(root, bytenr, blocksize);
 		BUG_ON(ret);
@@ -1811,12 +1801,6 @@
 				btrfs_wait_tree_block_writeback(next);
 				btrfs_tree_unlock(next);
 
-				if (*level == 0) {
-					ret = btrfs_drop_leaf_ref(trans, root,
-								  next);
-					BUG_ON(ret);
-				}
-
 				WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
 				ret = btrfs_free_reserved_extent(root,
 						path->nodes[*level]->start,
@@ -1884,11 +1868,6 @@
 			btrfs_wait_tree_block_writeback(next);
 			btrfs_tree_unlock(next);
 
-			if (orig_level == 0) {
-				ret = btrfs_drop_leaf_ref(trans, log,
-							  next);
-				BUG_ON(ret);
-			}
 			WARN_ON(log->root_key.objectid !=
 				BTRFS_TREE_LOG_OBJECTID);
 			ret = btrfs_free_reserved_extent(log, next->start,
@@ -2027,9 +2006,7 @@
 	ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
 	BUG_ON(ret);
 
-	btrfs_set_root_bytenr(&log->root_item, log->node->start);
-	btrfs_set_root_generation(&log->root_item, trans->transid);
-	btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node));
+	btrfs_set_root_node(&log->root_item, log->node);
 
 	root->log_batch = 0;
 	root->log_transid++;
@@ -2581,7 +2558,7 @@
 				       ins_keys, ins_sizes, nr);
 	BUG_ON(ret);
 
-	for (i = 0; i < nr; i++) {
+	for (i = 0; i < nr; i++, dst_path->slots[0]++) {
 		dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0],
 						   dst_path->slots[0]);
 
@@ -2617,36 +2594,31 @@
 			found_type = btrfs_file_extent_type(src, extent);
 			if (found_type == BTRFS_FILE_EXTENT_REG ||
 			    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
-				u64 ds = btrfs_file_extent_disk_bytenr(src,
-								   extent);
-				u64 dl = btrfs_file_extent_disk_num_bytes(src,
-								      extent);
-				u64 cs = btrfs_file_extent_offset(src, extent);
-				u64 cl = btrfs_file_extent_num_bytes(src,
-								     extent);;
+				u64 ds, dl, cs, cl;
+				ds = btrfs_file_extent_disk_bytenr(src,
+								extent);
+				/* ds == 0 is a hole */
+				if (ds == 0)
+					continue;
+
+				dl = btrfs_file_extent_disk_num_bytes(src,
+								extent);
+				cs = btrfs_file_extent_offset(src, extent);
+				cl = btrfs_file_extent_num_bytes(src,
+								extent);;
 				if (btrfs_file_extent_compression(src,
 								  extent)) {
 					cs = 0;
 					cl = dl;
 				}
-				/* ds == 0 is a hole */
-				if (ds != 0) {
-					ret = btrfs_inc_extent_ref(trans, log,
-						   ds, dl,
-						   dst_path->nodes[0]->start,
-						   BTRFS_TREE_LOG_OBJECTID,
-						   trans->transid,
-						   ins_keys[i].objectid);
-					BUG_ON(ret);
-					ret = btrfs_lookup_csums_range(
-						   log->fs_info->csum_root,
-						   ds + cs, ds + cs + cl - 1,
-						   &ordered_sums);
-					BUG_ON(ret);
-				}
+
+				ret = btrfs_lookup_csums_range(
+						log->fs_info->csum_root,
+						ds + cs, ds + cs + cl - 1,
+						&ordered_sums);
+				BUG_ON(ret);
 			}
 		}
-		dst_path->slots[0]++;
 	}
 
 	btrfs_mark_buffer_dirty(dst_path->nodes[0]);
@@ -3029,9 +3001,7 @@
 		BUG_ON(!wc.replay_dest);
 
 		wc.replay_dest->log_root = log;
-		mutex_lock(&fs_info->trans_mutex);
-		btrfs_record_root_in_trans(wc.replay_dest);
-		mutex_unlock(&fs_info->trans_mutex);
+		btrfs_record_root_in_trans(trans, wc.replay_dest);
 		ret = walk_log_tree(trans, log, &wc);
 		BUG_ON(ret);
 
@@ -3049,6 +3019,7 @@
 		key.offset = found_key.offset - 1;
 		wc.replay_dest->log_root = NULL;
 		free_extent_buffer(log->node);
+		free_extent_buffer(log->commit_root);
 		kfree(log);
 
 		if (found_key.offset == 0)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5f01dad..3ab80e9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -161,8 +161,10 @@
 	int again = 0;
 	unsigned long num_run;
 	unsigned long num_sync_run;
+	unsigned long batch_run = 0;
 	unsigned long limit;
 	unsigned long last_waited = 0;
+	int force_reg = 0;
 
 	bdi = blk_get_backing_dev_info(device->bdev);
 	fs_info = device->dev_root->fs_info;
@@ -176,19 +178,22 @@
 
 loop:
 	spin_lock(&device->io_lock);
-	num_run = 0;
 
 loop_lock:
+	num_run = 0;
 
 	/* take all the bios off the list at once and process them
 	 * later on (without the lock held).  But, remember the
 	 * tail and other pointers so the bios can be properly reinserted
 	 * into the list if we hit congestion
 	 */
-	if (device->pending_sync_bios.head)
+	if (!force_reg && device->pending_sync_bios.head) {
 		pending_bios = &device->pending_sync_bios;
-	else
+		force_reg = 1;
+	} else {
 		pending_bios = &device->pending_bios;
+		force_reg = 0;
+	}
 
 	pending = pending_bios->head;
 	tail = pending_bios->tail;
@@ -228,10 +233,14 @@
 	while (pending) {
 
 		rmb();
-		if (pending_bios != &device->pending_sync_bios &&
-		    device->pending_sync_bios.head &&
-		    num_run > 16) {
-			cond_resched();
+		/* we want to work on both lists, but do more bios on the
+		 * sync list than the regular list
+		 */
+		if ((num_run > 32 &&
+		    pending_bios != &device->pending_sync_bios &&
+		    device->pending_sync_bios.head) ||
+		   (num_run > 64 && pending_bios == &device->pending_sync_bios &&
+		    device->pending_bios.head)) {
 			spin_lock(&device->io_lock);
 			requeue_list(pending_bios, pending, tail);
 			goto loop_lock;
@@ -249,6 +258,8 @@
 		BUG_ON(atomic_read(&cur->bi_cnt) == 0);
 		submit_bio(cur->bi_rw, cur);
 		num_run++;
+		batch_run++;
+
 		if (bio_sync(cur))
 			num_sync_run++;
 
@@ -265,7 +276,7 @@
 		 * is now congested.  Back off and let other work structs
 		 * run instead
 		 */
-		if (pending && bdi_write_congested(bdi) && num_run > 16 &&
+		if (pending && bdi_write_congested(bdi) && batch_run > 32 &&
 		    fs_info->fs_devices->open_devices > 1) {
 			struct io_context *ioc;
 
@@ -366,6 +377,7 @@
 		memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
 		fs_devices->latest_devid = devid;
 		fs_devices->latest_trans = found_transid;
+		mutex_init(&fs_devices->device_list_mutex);
 		device = NULL;
 	} else {
 		device = __find_device(&fs_devices->devices, devid,
@@ -392,7 +404,11 @@
 			return -ENOMEM;
 		}
 		INIT_LIST_HEAD(&device->dev_alloc_list);
+
+		mutex_lock(&fs_devices->device_list_mutex);
 		list_add(&device->dev_list, &fs_devices->devices);
+		mutex_unlock(&fs_devices->device_list_mutex);
+
 		device->fs_devices = fs_devices;
 		fs_devices->num_devices++;
 	}
@@ -418,10 +434,12 @@
 	INIT_LIST_HEAD(&fs_devices->devices);
 	INIT_LIST_HEAD(&fs_devices->alloc_list);
 	INIT_LIST_HEAD(&fs_devices->list);
+	mutex_init(&fs_devices->device_list_mutex);
 	fs_devices->latest_devid = orig->latest_devid;
 	fs_devices->latest_trans = orig->latest_trans;
 	memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
 
+	mutex_lock(&orig->device_list_mutex);
 	list_for_each_entry(orig_dev, &orig->devices, dev_list) {
 		device = kzalloc(sizeof(*device), GFP_NOFS);
 		if (!device)
@@ -443,8 +461,10 @@
 		device->fs_devices = fs_devices;
 		fs_devices->num_devices++;
 	}
+	mutex_unlock(&orig->device_list_mutex);
 	return fs_devices;
 error:
+	mutex_unlock(&orig->device_list_mutex);
 	free_fs_devices(fs_devices);
 	return ERR_PTR(-ENOMEM);
 }
@@ -455,6 +475,7 @@
 
 	mutex_lock(&uuid_mutex);
 again:
+	mutex_lock(&fs_devices->device_list_mutex);
 	list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
 		if (device->in_fs_metadata)
 			continue;
@@ -474,6 +495,7 @@
 		kfree(device->name);
 		kfree(device);
 	}
+	mutex_unlock(&fs_devices->device_list_mutex);
 
 	if (fs_devices->seed) {
 		fs_devices = fs_devices->seed;
@@ -594,6 +616,9 @@
 		device->in_fs_metadata = 0;
 		device->mode = flags;
 
+		if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+			fs_devices->rotating = 1;
+
 		fs_devices->open_devices++;
 		if (device->writeable) {
 			fs_devices->rw_devices++;
@@ -1121,12 +1146,14 @@
 
 		device = NULL;
 		devices = &root->fs_info->fs_devices->devices;
+		mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
 		list_for_each_entry(tmp, devices, dev_list) {
 			if (tmp->in_fs_metadata && !tmp->bdev) {
 				device = tmp;
 				break;
 			}
 		}
+		mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 		bdev = NULL;
 		bh = NULL;
 		disk_super = NULL;
@@ -1181,7 +1208,16 @@
 		goto error_brelse;
 
 	device->in_fs_metadata = 0;
+
+	/*
+	 * the device list mutex makes sure that we don't change
+	 * the device list while someone else is writing out all
+	 * the device supers.
+	 */
+	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
 	list_del_init(&device->dev_list);
+	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+
 	device->fs_devices->num_devices--;
 
 	next_device = list_entry(root->fs_info->fs_devices->devices.next,
@@ -1275,6 +1311,7 @@
 	seed_devices->opened = 1;
 	INIT_LIST_HEAD(&seed_devices->devices);
 	INIT_LIST_HEAD(&seed_devices->alloc_list);
+	mutex_init(&seed_devices->device_list_mutex);
 	list_splice_init(&fs_devices->devices, &seed_devices->devices);
 	list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
 	list_for_each_entry(device, &seed_devices->devices, dev_list) {
@@ -1400,6 +1437,10 @@
 	mutex_lock(&root->fs_info->volume_mutex);
 
 	devices = &root->fs_info->fs_devices->devices;
+	/*
+	 * we have the volume lock, so we don't need the extra
+	 * device list mutex while reading the list here.
+	 */
 	list_for_each_entry(device, devices, dev_list) {
 		if (device->bdev == bdev) {
 			ret = -EEXIST;
@@ -1440,6 +1481,7 @@
 	device->io_align = root->sectorsize;
 	device->sector_size = root->sectorsize;
 	device->total_bytes = i_size_read(bdev->bd_inode);
+	device->disk_total_bytes = device->total_bytes;
 	device->dev_root = root->fs_info->dev_root;
 	device->bdev = bdev;
 	device->in_fs_metadata = 1;
@@ -1453,6 +1495,12 @@
 	}
 
 	device->fs_devices = root->fs_info->fs_devices;
+
+	/*
+	 * we don't want write_supers to jump in here with our device
+	 * half setup
+	 */
+	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
 	list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
 	list_add(&device->dev_alloc_list,
 		 &root->fs_info->fs_devices->alloc_list);
@@ -1461,6 +1509,9 @@
 	root->fs_info->fs_devices->rw_devices++;
 	root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
 
+	if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+		root->fs_info->fs_devices->rotating = 1;
+
 	total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
 	btrfs_set_super_total_bytes(&root->fs_info->super_copy,
 				    total_bytes + device->total_bytes);
@@ -1468,6 +1519,7 @@
 	total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
 	btrfs_set_super_num_devices(&root->fs_info->super_copy,
 				    total_bytes + 1);
+	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
 	if (seeding_dev) {
 		ret = init_first_rw_device(trans, root, device);
@@ -1670,8 +1722,6 @@
 	int ret;
 	int i;
 
-	printk(KERN_INFO "btrfs relocating chunk %llu\n",
-	       (unsigned long long)chunk_offset);
 	root = root->fs_info->chunk_root;
 	extent_root = root->fs_info->extent_root;
 	em_tree = &root->fs_info->mapping_tree.map_tree;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5c3ff6d..5139a83 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -96,7 +96,12 @@
 	u64 rw_devices;
 	u64 total_rw_bytes;
 	struct block_device *latest_bdev;
-	/* all of the devices in the FS */
+
+	/* all of the devices in the FS, protected by a mutex
+	 * so we can safely walk it to write out the supers without
+	 * worrying about add/remove by the multi-device code
+	 */
+	struct mutex device_list_mutex;
 	struct list_head devices;
 
 	/* devices not currently being allocated */
@@ -107,6 +112,11 @@
 	int seeding;
 
 	int opened;
+
+	/* set when we find or add a device that doesn't have the
+	 * nonrot flag set
+	 */
+	int rotating;
 };
 
 struct btrfs_bio_stripe {
diff --git a/fs/buffer.c b/fs/buffer.c
index aed2977..a3ef091 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1085,12 +1085,12 @@
 __getblk_slow(struct block_device *bdev, sector_t block, int size)
 {
 	/* Size must be multiple of hard sectorsize */
-	if (unlikely(size & (bdev_hardsect_size(bdev)-1) ||
+	if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
 			(size < 512 || size > PAGE_SIZE))) {
 		printk(KERN_ERR "getblk(): invalid block size %d requested\n",
 					size);
-		printk(KERN_ERR "hardsect size: %d\n",
-					bdev_hardsect_size(bdev));
+		printk(KERN_ERR "logical block size: %d\n",
+					bdev_logical_block_size(bdev));
 
 		dump_stack();
 		return NULL;
@@ -2736,6 +2736,8 @@
 		pos += blocksize;
 	}
 
+	map_bh.b_size = blocksize;
+	map_bh.b_state = 0;
 	err = get_block(inode, iblock, &map_bh, 0);
 	if (err)
 		goto unlock;
@@ -2933,6 +2935,8 @@
 	BUG_ON(!buffer_locked(bh));
 	BUG_ON(!buffer_mapped(bh));
 	BUG_ON(!bh->b_end_io);
+	BUG_ON(buffer_delay(bh));
+	BUG_ON(buffer_unwritten(bh));
 
 	/*
 	 * Mask in barrier bit for a write (could be either a WRITE or a
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 1e96234..431accd 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -354,7 +354,9 @@
 	/* make sure all pages pinned by operations on behalf of the netfs are
 	 * written to disc */
 	cachefiles_begin_secure(cache, &saved_cred);
-	ret = fsync_super(cache->mnt->mnt_sb);
+	down_read(&cache->mnt->mnt_sb->s_umount);
+	ret = sync_filesystem(cache->mnt->mnt_sb);
+	up_read(&cache->mnt->mnt_sb->s_umount);
 	cachefiles_end_secure(cache, saved_cred);
 
 	if (ret == -EIO)
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 38f7122..b7c9d51 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -375,7 +375,6 @@
 		p = inode->i_cdev;
 		if (!p) {
 			inode->i_cdev = p = new;
-			inode->i_cindex = idx;
 			list_add(&inode->i_devices, &p->list);
 			new = NULL;
 		} else if (!cdev_get(p))
@@ -405,6 +404,18 @@
 	return ret;
 }
 
+int cdev_index(struct inode *inode)
+{
+	int idx;
+	struct kobject *kobj;
+
+	kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
+	if (!kobj)
+		return -1;
+	kobject_put(kobj);
+	return idx;
+}
+
 void cd_forget(struct inode *inode)
 {
 	spin_lock(&cdev_lock);
@@ -557,6 +568,7 @@
 EXPORT_SYMBOL(cdev_alloc);
 EXPORT_SYMBOL(cdev_del);
 EXPORT_SYMBOL(cdev_add);
+EXPORT_SYMBOL(cdev_index);
 EXPORT_SYMBOL(register_chrdev);
 EXPORT_SYMBOL(unregister_chrdev);
 EXPORT_SYMBOL(directly_mappable_cdev_bdi);
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index f20c406..b486898 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,12 @@
+Version 1.59
+------------
+Client uses server inode numbers (which are persistent) rather than
+client generated ones by default (mount option "serverino" turned
+on by default if server supports it).  Add forceuid and forcegid
+mount options (so that when negotiating unix extensions specifying
+which uid mounted does not immediately force the server's reported
+uids to be overridden).
+
 Version 1.58
 ------------
 Guard against buffer overruns in various UCS-2 to UTF-8 string conversions
@@ -10,6 +19,8 @@
 session setup to distinguish multiple mounts to same server from different
 userids. Raw NTLMSSP fixed (it requires /proc/fs/cifs/experimental
 flag to be set to 2, and mount must enable krb5 to turn on extended security).
+Performance of file create to Samba improved (posix create on lookup
+removes 1 of 2 network requests sent on file create)
  
 Version 1.57
 ------------
diff --git a/fs/cifs/README b/fs/cifs/README
index db208ddb..ad92921 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -262,7 +262,8 @@
 		mount.	
   domain	Set the SMB/CIFS workgroup name prepended to the
 		username during CIFS session establishment
-  uid		Set the default uid for inodes. For mounts to servers
+  forceuid	Set the default uid for inodes based on the uid
+		passed in. For mounts to servers
 		which do support the CIFS Unix extensions, such as a
 		properly configured Samba server, the server provides
 		the uid, gid and mode so this parameter should  not be
@@ -292,6 +293,12 @@
 		the client.  Note that the mount.cifs helper must be
 		at version 1.10 or higher to support specifying the uid
 		(or gid) in non-numeric form.
+  forcegid	(similar to above but for the groupid instead of uid)
+  uid		Set the default uid for inodes, and indicate to the
+		cifs kernel driver which local user mounted . If the server
+		supports the unix extensions the default uid is
+		not used to fill in the owner fields of inodes (files)
+		unless the "forceuid" parameter is specified.
   gid		Set the default gid for inodes (similar to above).
   file_mode     If CIFS Unix extensions are not supported by the server
 		this overrides the default mode for file inodes.
@@ -388,8 +395,13 @@
 		or the CIFS Unix Extensions equivalent and for those
 		this mount option will have no effect.  Exporting cifs mounts
 		under nfsd requires this mount option on the cifs mount.
+		This is now the default if server supports the 
+		required network operation.
   noserverino   Client generates inode numbers (rather than using the actual one
-		from the server) by default.
+		from the server). These inode numbers will vary after
+		unmount or reboot which can confuse some applications,
+		but not all server filesystems support unique inode
+		numbers.
   setuids       If the CIFS Unix extensions are negotiated with the server
 		the client will attempt to set the effective uid and gid of
 		the local process on newly created files, directories, and
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 83d6275..3bb11be 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -275,7 +275,7 @@
 	case -EBUSY:
 		/* someone else made a mount here whilst we were busy */
 		while (d_mountpoint(nd->path.dentry) &&
-		       follow_down(&nd->path.mnt, &nd->path.dentry))
+		       follow_down(&nd->path))
 			;
 		err = 0;
 	default:
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 67bf93a..4a4581c 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -23,6 +23,7 @@
 #include <linux/string.h>
 #include <keys/user-type.h>
 #include <linux/key-type.h>
+#include <linux/inet.h>
 #include "cifsglob.h"
 #include "cifs_spnego.h"
 #include "cifs_debug.h"
@@ -73,9 +74,6 @@
  * strlen(";sec=ntlmsspi") */
 #define MAX_MECH_STR_LEN	13
 
-/* max possible addr len eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/128 */
-#define MAX_IPV6_ADDR_LEN	43
-
 /* strlen of "host=" */
 #define HOST_KEY_LEN		5
 
@@ -102,7 +100,7 @@
 	   host=hostname sec=mechanism uid=0xFF user=username */
 	desc_len = MAX_VER_STR_LEN +
 		   HOST_KEY_LEN + strlen(hostname) +
-		   IP_KEY_LEN + MAX_IPV6_ADDR_LEN +
+		   IP_KEY_LEN + INET6_ADDRSTRLEN +
 		   MAX_MECH_STR_LEN +
 		   UID_KEY_LEN + (sizeof(uid_t) * 2) +
 		   USER_KEY_LEN + strlen(sesInfo->userName) + 1;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 57ecdc8..1403b5d 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -552,130 +552,138 @@
 	return rc;
 }
 
-
-/* Retrieve an ACL from the server */
-static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode,
-				       const char *path, const __u16 *pfid)
+static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
+		__u16 fid, u32 *pacllen)
 {
-	struct cifsFileInfo *open_file = NULL;
-	bool unlock_file = false;
-	int xid;
-	int rc = -EIO;
-	__u16 fid;
-	struct super_block *sb;
-	struct cifs_sb_info *cifs_sb;
 	struct cifs_ntsd *pntsd = NULL;
-
-	cFYI(1, ("get mode from ACL for %s", path));
-
-	if (inode == NULL)
-		return NULL;
+	int xid, rc;
 
 	xid = GetXid();
-	if (pfid == NULL)
-		open_file = find_readable_file(CIFS_I(inode));
-	else
-		fid = *pfid;
+	rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen);
+	FreeXid(xid);
 
-	sb = inode->i_sb;
-	if (sb == NULL) {
-		FreeXid(xid);
-		return NULL;
-	}
-	cifs_sb = CIFS_SB(sb);
 
-	if (open_file) {
-		unlock_file = true;
-		fid = open_file->netfid;
-	} else if (pfid == NULL) {
-		int oplock = 0;
-		/* open file */
-		rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN,
-				READ_CONTROL, 0, &fid, &oplock, NULL,
-				cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
-					CIFS_MOUNT_MAP_SPECIAL_CHR);
-		if (rc != 0) {
-			cERROR(1, ("Unable to open file to get ACL"));
-			FreeXid(xid);
-			return NULL;
-		}
+	cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen));
+	return pntsd;
+}
+
+static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
+		const char *path, u32 *pacllen)
+{
+	struct cifs_ntsd *pntsd = NULL;
+	int oplock = 0;
+	int xid, rc;
+	__u16 fid;
+
+	xid = GetXid();
+
+	rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, READ_CONTROL, 0,
+			 &fid, &oplock, NULL, cifs_sb->local_nls,
+			 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+	if (rc) {
+		cERROR(1, ("Unable to open file to get ACL"));
+		goto out;
 	}
 
 	rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen);
 	cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen));
-	if (unlock_file == true) /* find_readable_file increments ref count */
-		atomic_dec(&open_file->wrtPending);
-	else if (pfid == NULL) /* if opened above we have to close the handle */
-		CIFSSMBClose(xid, cifs_sb->tcon, fid);
-	/* else handle was passed in by caller */
 
+	CIFSSMBClose(xid, cifs_sb->tcon, fid);
+ out:
 	FreeXid(xid);
 	return pntsd;
 }
 
+/* Retrieve an ACL from the server */
+static struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
+				      struct inode *inode, const char *path,
+				      u32 *pacllen)
+{
+	struct cifs_ntsd *pntsd = NULL;
+	struct cifsFileInfo *open_file = NULL;
+
+	if (inode)
+		open_file = find_readable_file(CIFS_I(inode));
+	if (!open_file)
+		return get_cifs_acl_by_path(cifs_sb, path, pacllen);
+
+	pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen);
+	atomic_dec(&open_file->wrtPending);
+	return pntsd;
+}
+
+static int set_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid,
+		struct cifs_ntsd *pnntsd, u32 acllen)
+{
+	int xid, rc;
+
+	xid = GetXid();
+	rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen);
+	FreeXid(xid);
+
+	cFYI(DBG2, ("SetCIFSACL rc = %d", rc));
+	return rc;
+}
+
+static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path,
+		struct cifs_ntsd *pnntsd, u32 acllen)
+{
+	int oplock = 0;
+	int xid, rc;
+	__u16 fid;
+
+	xid = GetXid();
+
+	rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, WRITE_DAC, 0,
+			 &fid, &oplock, NULL, cifs_sb->local_nls,
+			 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+	if (rc) {
+		cERROR(1, ("Unable to open file to set ACL"));
+		goto out;
+	}
+
+	rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen);
+	cFYI(DBG2, ("SetCIFSACL rc = %d", rc));
+
+	CIFSSMBClose(xid, cifs_sb->tcon, fid);
+ out:
+	FreeXid(xid);
+	return rc;
+}
+
 /* Set an ACL on the server */
 static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
 				struct inode *inode, const char *path)
 {
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct cifsFileInfo *open_file;
-	bool unlock_file = false;
-	int xid;
-	int rc = -EIO;
-	__u16 fid;
-	struct super_block *sb;
-	struct cifs_sb_info *cifs_sb;
+	int rc;
 
 	cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode));
 
-	if (!inode)
-		return rc;
-
-	sb = inode->i_sb;
-	if (sb == NULL)
-		return rc;
-
-	cifs_sb = CIFS_SB(sb);
-	xid = GetXid();
-
 	open_file = find_readable_file(CIFS_I(inode));
-	if (open_file) {
-		unlock_file = true;
-		fid = open_file->netfid;
-	} else {
-		int oplock = 0;
-		/* open file */
-		rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN,
-				WRITE_DAC, 0, &fid, &oplock, NULL,
-				cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
-					CIFS_MOUNT_MAP_SPECIAL_CHR);
-		if (rc != 0) {
-			cERROR(1, ("Unable to open file to set ACL"));
-			FreeXid(xid);
-			return rc;
-		}
-	}
+	if (!open_file)
+		return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen);
 
-	rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen);
-	cFYI(DBG2, ("SetCIFSACL rc = %d", rc));
-	if (unlock_file)
-		atomic_dec(&open_file->wrtPending);
-	else
-		CIFSSMBClose(xid, cifs_sb->tcon, fid);
-
-	FreeXid(xid);
-
+	rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen);
+	atomic_dec(&open_file->wrtPending);
 	return rc;
 }
 
 /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
-void acl_to_uid_mode(struct inode *inode, const char *path, const __u16 *pfid)
+void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode,
+		     const char *path, const __u16 *pfid)
 {
 	struct cifs_ntsd *pntsd = NULL;
 	u32 acllen = 0;
 	int rc = 0;
 
 	cFYI(DBG2, ("converting ACL to mode for %s", path));
-	pntsd = get_cifs_acl(&acllen, inode, path, pfid);
+
+	if (pfid)
+		pntsd = get_cifs_acl_by_fid(cifs_sb, *pfid, &acllen);
+	else
+		pntsd = get_cifs_acl(cifs_sb, inode, path, &acllen);
 
 	/* if we can retrieve the ACL, now parse Access Control Entries, ACEs */
 	if (pntsd)
@@ -698,7 +706,7 @@
 	cFYI(DBG2, ("set ACL from mode for %s", path));
 
 	/* Get the security descriptor */
-	pntsd = get_cifs_acl(&secdesclen, inode, path, NULL);
+	pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen);
 
 	/* Add three ACEs for owner, group, everyone getting rid of
 	   other ACEs as chmod disables ACEs and set the security descriptor */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5e6d358..0d92114 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -146,7 +146,7 @@
 #endif
 	sb->s_blocksize = CIFS_MAX_MSGSIZE;
 	sb->s_blocksize_bits = 14;	/* default 2**14 = CIFS_MAX_MSGSIZE */
-	inode = cifs_iget(sb, ROOT_I);
+	inode = cifs_root_iget(sb, ROOT_I);
 
 	if (IS_ERR(inode)) {
 		rc = PTR_ERR(inode);
@@ -204,6 +204,9 @@
 		cFYI(1, ("Empty cifs superblock info passed to unmount"));
 		return;
 	}
+
+	lock_kernel();
+
 	rc = cifs_umount(sb, cifs_sb);
 	if (rc)
 		cERROR(1, ("cifs_umount failed with return code %d", rc));
@@ -216,7 +219,8 @@
 
 	unload_nls(cifs_sb->local_nls);
 	kfree(cifs_sb);
-	return;
+
+	unlock_kernel();
 }
 
 static int
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 051b71c..9570a0e 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -36,7 +36,7 @@
 
 /* Functions related to inodes */
 extern const struct inode_operations cifs_dir_inode_ops;
-extern struct inode *cifs_iget(struct super_block *, unsigned long);
+extern struct inode *cifs_root_iget(struct super_block *, unsigned long);
 extern int cifs_create(struct inode *, struct dentry *, int,
 		       struct nameidata *);
 extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
@@ -100,5 +100,5 @@
 extern const struct export_operations cifs_export_ops;
 #endif /* EXPERIMENTAL */
 
-#define CIFS_VERSION   "1.58"
+#define CIFS_VERSION   "1.59"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index fae0839..f945232 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -90,10 +90,10 @@
 						 struct cifsTconInfo *);
 extern void DeleteOplockQEntry(struct oplock_q_entry *);
 extern void DeleteTconOplockQEntries(struct cifsTconInfo *);
-extern struct timespec cifs_NTtimeToUnix(u64 utc_nanoseconds_since_1601);
+extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
 extern u64 cifs_UnixTimeToNT(struct timespec);
-extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time);
-extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time);
+extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
+				      int offset);
 
 extern int cifs_posix_open(char *full_path, struct inode **pinode,
 			   struct super_block *sb, int mode, int oflags,
@@ -108,8 +108,8 @@
 extern int cifs_get_inode_info_unix(struct inode **pinode,
 			const unsigned char *search_path,
 			struct super_block *sb, int xid);
-extern void acl_to_uid_mode(struct inode *inode, const char *path,
-			    const __u16 *pfid);
+extern void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode,
+			    const char *path, const __u16 *pfid);
 extern int mode_to_acl(struct inode *inode, const char *path, __u64);
 
 extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index d062602..b84c61d 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -524,8 +524,8 @@
 			int val, seconds, remain, result;
 			struct timespec ts, utc;
 			utc = CURRENT_TIME;
-			ts = cnvrtDosUnixTm(le16_to_cpu(rsp->SrvTime.Date),
-						le16_to_cpu(rsp->SrvTime.Time));
+			ts = cnvrtDosUnixTm(rsp->SrvTime.Date,
+					    rsp->SrvTime.Time, 0);
 			cFYI(1, ("SrvTime %d sec since 1970 (utc: %d) diff: %d",
 				(int)ts.tv_sec, (int)utc.tv_sec,
 				(int)(utc.tv_sec - ts.tv_sec)));
@@ -2427,8 +2427,7 @@
 	params = 2 /* level */  + 4 /* rsrvd */  + name_len /* incl null */ ;
 	pSMB->TotalDataCount = 0;
 	pSMB->MaxParameterCount = cpu_to_le16(2);
-	/* BB find exact max data count below from sess structure BB */
-	pSMB->MaxDataCount = cpu_to_le16(4000);
+	pSMB->MaxDataCount = cpu_to_le16(CIFSMaxBufSize);
 	pSMB->MaxSetupCount = 0;
 	pSMB->Reserved = 0;
 	pSMB->Flags = 0;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4aa81a5..97f4311 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -35,6 +35,7 @@
 #include <linux/namei.h>
 #include <asm/uaccess.h>
 #include <asm/processor.h>
+#include <linux/inet.h>
 #include <net/ipv6.h>
 #include "cifspdu.h"
 #include "cifsglob.h"
@@ -61,7 +62,6 @@
 	char *domainname;
 	char *UNC;
 	char *UNCip;
-	char *in6_addr;   /* ipv6 address as human readable form of in6_addr */
 	char *iocharset;  /* local code page for mapping to and from Unicode */
 	char source_rfc1001_name[16]; /* netbios name of client */
 	char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */
@@ -827,14 +827,16 @@
 	vol->target_rfc1001_name[0] = 0;
 	vol->linux_uid = current_uid();  /* use current_euid() instead? */
 	vol->linux_gid = current_gid();
-	vol->dir_mode = S_IRWXUGO;
-	/* 2767 perms indicate mandatory locking support */
-	vol->file_mode = (S_IRWXUGO | S_ISGID) & (~S_IXGRP);
+
+	/* default to only allowing write access to owner of the mount */
+	vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR;
 
 	/* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
 	vol->rw = true;
 	/* default is always to request posix paths. */
 	vol->posix_paths = 1;
+	/* default to using server inode numbers where available */
+	vol->server_ino = 1;
 
 	if (!options)
 		return 1;
@@ -955,10 +957,12 @@
 				}
 				strcpy(vol->password, value);
 			}
-		} else if (strnicmp(data, "ip", 2) == 0) {
+		} else if (!strnicmp(data, "ip", 2) ||
+			   !strnicmp(data, "addr", 4)) {
 			if (!value || !*value) {
 				vol->UNCip = NULL;
-			} else if (strnlen(value, 35) < 35) {
+			} else if (strnlen(value, INET6_ADDRSTRLEN) <
+							INET6_ADDRSTRLEN) {
 				vol->UNCip = value;
 			} else {
 				printk(KERN_WARNING "CIFS: ip address "
@@ -1092,17 +1096,17 @@
 				return 1;
 			}
 		} else if (strnicmp(data, "uid", 3) == 0) {
-			if (value && *value) {
+			if (value && *value)
 				vol->linux_uid =
 					simple_strtoul(value, &value, 0);
+		} else if (strnicmp(data, "forceuid", 8) == 0) {
 				vol->override_uid = 1;
-			}
 		} else if (strnicmp(data, "gid", 3) == 0) {
-			if (value && *value) {
+			if (value && *value)
 				vol->linux_gid =
 					simple_strtoul(value, &value, 0);
+		} else if (strnicmp(data, "forcegid", 8) == 0) {
 				vol->override_gid = 1;
-			}
 		} else if (strnicmp(data, "file_mode", 4) == 0) {
 			if (value && *value) {
 				vol->file_mode =
@@ -1315,16 +1319,6 @@
 			vol->direct_io = 1;
 		} else if (strnicmp(data, "forcedirectio", 13) == 0) {
 			vol->direct_io = 1;
-		} else if (strnicmp(data, "in6_addr", 8) == 0) {
-			if (!value || !*value) {
-				vol->in6_addr = NULL;
-			} else if (strnlen(value, 49) == 48) {
-				vol->in6_addr = value;
-			} else {
-				printk(KERN_WARNING "CIFS: ip v6 address not "
-						    "48 characters long\n");
-				return 1;
-			}
 		} else if (strnicmp(data, "noac", 4) == 0) {
 			printk(KERN_WARNING "CIFS: Mount option noac not "
 				"supported. Instead set "
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 302ea15..0686684 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -241,7 +241,7 @@
 	/* BB need same check in cifs_create too? */
 	/* if not oplocked, invalidate inode pages if mtime or file
 	   size changed */
-	temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
+	temp = cifs_NTtimeToUnix(buf->LastWriteTime);
 	if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
 			   (file->f_path.dentry->d_inode->i_size ==
 			    (loff_t)le64_to_cpu(buf->EndOfFile))) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 9c869a6..fad882b 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -85,10 +85,10 @@
 	__u64 num_of_bytes = le64_to_cpu(info->NumOfBytes);
 	__u64 end_of_file = le64_to_cpu(info->EndOfFile);
 
-	inode->i_atime = cifs_NTtimeToUnix(le64_to_cpu(info->LastAccessTime));
+	inode->i_atime = cifs_NTtimeToUnix(info->LastAccessTime);
 	inode->i_mtime =
-		cifs_NTtimeToUnix(le64_to_cpu(info->LastModificationTime));
-	inode->i_ctime = cifs_NTtimeToUnix(le64_to_cpu(info->LastStatusChange));
+		cifs_NTtimeToUnix(info->LastModificationTime);
+	inode->i_ctime = cifs_NTtimeToUnix(info->LastStatusChange);
 	inode->i_mode = le64_to_cpu(info->Permissions);
 
 	/*
@@ -554,14 +554,11 @@
 
 	/* Linux can not store file creation time so ignore it */
 	if (pfindData->LastAccessTime)
-		inode->i_atime = cifs_NTtimeToUnix
-			(le64_to_cpu(pfindData->LastAccessTime));
+		inode->i_atime = cifs_NTtimeToUnix(pfindData->LastAccessTime);
 	else /* do not need to use current_fs_time - time not stored */
 		inode->i_atime = CURRENT_TIME;
-	inode->i_mtime =
-		    cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime));
-	inode->i_ctime =
-	    cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
+	inode->i_mtime = cifs_NTtimeToUnix(pfindData->LastWriteTime);
+	inode->i_ctime = cifs_NTtimeToUnix(pfindData->ChangeTime);
 	cFYI(DBG2, ("Attributes came in as 0x%x", attr));
 	if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) {
 		inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj;
@@ -629,7 +626,7 @@
 	/* fill in 0777 bits from ACL */
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
 		cFYI(1, ("Getting mode bits from ACL"));
-		acl_to_uid_mode(inode, full_path, pfid);
+		acl_to_uid_mode(cifs_sb, inode, full_path, pfid);
 	}
 #endif
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
@@ -699,7 +696,7 @@
 }
 
 /* gets root inode */
-struct inode *cifs_iget(struct super_block *sb, unsigned long ino)
+struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
 {
 	int xid;
 	struct cifs_sb_info *cifs_sb;
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index e2fe998..32d6baa 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -853,12 +853,12 @@
 
 #define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
 
-    /*
-     * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units)
-     * into Unix UTC (based 1970-01-01, in seconds).
-     */
+/*
+ * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units)
+ * into Unix UTC (based 1970-01-01, in seconds).
+ */
 struct timespec
-cifs_NTtimeToUnix(u64 ntutc)
+cifs_NTtimeToUnix(__le64 ntutc)
 {
 	struct timespec ts;
 	/* BB what about the timezone? BB */
@@ -866,7 +866,7 @@
 	/* Subtract the NTFS time offset, then convert to 1s intervals. */
 	u64 t;
 
-	t = ntutc - NTFS_TIME_OFFSET;
+	t = le64_to_cpu(ntutc) - NTFS_TIME_OFFSET;
 	ts.tv_nsec = do_div(t, 10000000) * 100;
 	ts.tv_sec = t;
 	return ts;
@@ -883,16 +883,12 @@
 static int total_days_of_prev_months[] =
 {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334};
 
-
-__le64 cnvrtDosCifsTm(__u16 date, __u16 time)
-{
-	return cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm(date, time)));
-}
-
-struct timespec cnvrtDosUnixTm(__u16 date, __u16 time)
+struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset)
 {
 	struct timespec ts;
 	int sec, min, days, month, year;
+	u16 date = le16_to_cpu(le_date);
+	u16 time = le16_to_cpu(le_time);
 	SMB_TIME *st = (SMB_TIME *)&time;
 	SMB_DATE *sd = (SMB_DATE *)&date;
 
@@ -933,7 +929,7 @@
 		days -= ((year & 0x03) == 0) && (month < 2 ? 1 : 0);
 	sec += 24 * 60 * 60 * days;
 
-	ts.tv_sec = sec;
+	ts.tv_sec = sec + offset;
 
 	/* cFYI(1,("sec after cnvrt dos to unix time %d",sec)); */
 
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 964e097..86d0055 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -115,17 +115,6 @@
 	return rc;
 }
 
-static void AdjustForTZ(struct cifsTconInfo *tcon, struct inode *inode)
-{
-	if ((tcon) && (tcon->ses) && (tcon->ses->server)) {
-		inode->i_ctime.tv_sec += tcon->ses->server->timeAdj;
-		inode->i_mtime.tv_sec += tcon->ses->server->timeAdj;
-		inode->i_atime.tv_sec += tcon->ses->server->timeAdj;
-	}
-	return;
-}
-
-
 static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
 			  char *buf, unsigned int *pobject_type, int isNewInode)
 {
@@ -150,26 +139,25 @@
 		allocation_size = le64_to_cpu(pfindData->AllocationSize);
 		end_of_file = le64_to_cpu(pfindData->EndOfFile);
 		tmp_inode->i_atime =
-		      cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime));
+			cifs_NTtimeToUnix(pfindData->LastAccessTime);
 		tmp_inode->i_mtime =
-		      cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime));
+			cifs_NTtimeToUnix(pfindData->LastWriteTime);
 		tmp_inode->i_ctime =
-		      cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
+			cifs_NTtimeToUnix(pfindData->ChangeTime);
 	} else { /* legacy, OS2 and DOS style */
-/*		struct timespec ts;*/
+		int offset = cifs_sb->tcon->ses->server->timeAdj;
 		FIND_FILE_STANDARD_INFO *pfindData =
 			(FIND_FILE_STANDARD_INFO *)buf;
 
-		tmp_inode->i_mtime = cnvrtDosUnixTm(
-				le16_to_cpu(pfindData->LastWriteDate),
-				le16_to_cpu(pfindData->LastWriteTime));
-		tmp_inode->i_atime = cnvrtDosUnixTm(
-				le16_to_cpu(pfindData->LastAccessDate),
-				le16_to_cpu(pfindData->LastAccessTime));
-		tmp_inode->i_ctime = cnvrtDosUnixTm(
-				le16_to_cpu(pfindData->LastWriteDate),
-				le16_to_cpu(pfindData->LastWriteTime));
-		AdjustForTZ(cifs_sb->tcon, tmp_inode);
+		tmp_inode->i_mtime = cnvrtDosUnixTm(pfindData->LastWriteDate,
+						    pfindData->LastWriteTime,
+						    offset);
+		tmp_inode->i_atime = cnvrtDosUnixTm(pfindData->LastAccessDate,
+						    pfindData->LastAccessTime,
+						    offset);
+		tmp_inode->i_ctime = cnvrtDosUnixTm(pfindData->LastWriteDate,
+						    pfindData->LastWriteTime,
+						    offset);
 		attr = le16_to_cpu(pfindData->Attributes);
 		allocation_size = le32_to_cpu(pfindData->AllocationSize);
 		end_of_file = le32_to_cpu(pfindData->DataSize);
@@ -331,11 +319,11 @@
 	local_size  = tmp_inode->i_size;
 
 	tmp_inode->i_atime =
-	    cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime));
+	    cifs_NTtimeToUnix(pfindData->LastAccessTime);
 	tmp_inode->i_mtime =
-	    cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastModificationTime));
+	    cifs_NTtimeToUnix(pfindData->LastModificationTime);
 	tmp_inode->i_ctime =
-	    cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastStatusChange));
+	    cifs_NTtimeToUnix(pfindData->LastStatusChange);
 
 	tmp_inode->i_mode = le64_to_cpu(pfindData->Permissions);
 	/* since we set the inode type below we need to mask off type
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 6a347fb..ffd4281 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -47,6 +47,8 @@
 		      struct pipe_inode_info *pipe, size_t count,
 		      unsigned int flags)
 {
+	ssize_t (*splice_read)(struct file *, loff_t *,
+			       struct pipe_inode_info *, size_t, unsigned int);
 	struct coda_file_info *cfi;
 	struct file *host_file;
 
@@ -54,10 +56,11 @@
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 	host_file = cfi->cfi_container;
 
-	if (!host_file->f_op || !host_file->f_op->splice_read)
-		return -EINVAL;
+	splice_read = host_file->f_op->splice_read;
+	if (!splice_read)
+		splice_read = default_file_splice_read;
 
-	return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags);
+	return splice_read(host_file, ppos, pipe, count, flags);
 }
 
 static ssize_t
diff --git a/fs/compat.c b/fs/compat.c
index 681ed81..6aefb77 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -812,10 +812,8 @@
 		}
 	}
 
-	lock_kernel();
 	retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
 			flags, (void*)data_page);
-	unlock_kernel();
 
  out4:
 	free_page(data_page);
@@ -1488,7 +1486,7 @@
 	if (!bprm)
 		goto out_files;
 
-	retval = mutex_lock_interruptible(&current->cred_exec_mutex);
+	retval = mutex_lock_interruptible(&current->cred_guard_mutex);
 	if (retval < 0)
 		goto out_free;
 	current->in_execve = 1;
@@ -1550,7 +1548,7 @@
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_exec_mutex);
+	mutex_unlock(&current->cred_guard_mutex);
 	acct_update_integrals(current);
 	free_bprm(bprm);
 	if (displaced)
@@ -1573,7 +1571,7 @@
 
 out_unlock:
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_exec_mutex);
+	mutex_unlock(&current->cred_guard_mutex);
 
 out_free:
 	free_bprm(bprm);
diff --git a/fs/dcache.c b/fs/dcache.c
index 75659a6..9e5cd3c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1910,7 +1910,7 @@
 
 	spin_lock(&vfsmount_lock);
 	prepend(&end, &buflen, "\0", 1);
-	if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
+	if (d_unlinked(dentry) &&
 		(prepend(&end, &buflen, " (deleted)", 10) != 0))
 			goto Elong;
 
@@ -2035,7 +2035,7 @@
 
 	spin_lock(&dcache_lock);
 	prepend(&end, &buflen, "\0", 1);
-	if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
+	if (d_unlinked(dentry) &&
 		(prepend(&end, &buflen, "//deleted", 9) != 0))
 			goto Elong;
 	if (buflen < 1)
@@ -2097,9 +2097,8 @@
 	read_unlock(&current->fs->lock);
 
 	error = -ENOENT;
-	/* Has the current directory has been unlinked? */
 	spin_lock(&dcache_lock);
-	if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) {
+	if (!d_unlinked(pwd.dentry)) {
 		unsigned long len;
 		struct path tmp = root;
 		char * cwd;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index c68edb9..9b1d285 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -557,8 +557,10 @@
 	int err = register_filesystem(&devpts_fs_type);
 	if (!err) {
 		devpts_mnt = kern_mount(&devpts_fs_type);
-		if (IS_ERR(devpts_mnt))
+		if (IS_ERR(devpts_mnt)) {
 			err = PTR_ERR(devpts_mnt);
+			unregister_filesystem(&devpts_fs_type);
+		}
 	}
 	return err;
 }
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 05763bb..8b10b87 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1127,7 +1127,7 @@
 		rw = WRITE_ODIRECT;
 
 	if (bdev)
-		bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev));
+		bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
 
 	if (offset & blocksize_mask) {
 		if (bdev)
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index 858fba1..c4dfa1d 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -49,7 +49,8 @@
 	spin_unlock(&ls->ls_recover_list_lock);
 
 	if (!found)
-		de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_KERNEL);
+		de = kzalloc(sizeof(struct dlm_direntry) + len,
+			     ls->ls_allocation);
 	return de;
 }
 
@@ -211,7 +212,7 @@
 
 	dlm_dir_clear(ls);
 
-	last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL);
+	last_name = kmalloc(DLM_RESNAME_MAXLEN, ls->ls_allocation);
 	if (!last_name)
 		goto out;
 
@@ -322,7 +323,7 @@
 	if (namelen > DLM_RESNAME_MAXLEN)
 		return -EINVAL;
 
-	de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_KERNEL);
+	de = kzalloc(sizeof(struct dlm_direntry) + namelen, ls->ls_allocation);
 	if (!de)
 		return -ENOMEM;
 
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index cd8e2df..d489fcc 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -384,7 +384,7 @@
 	dlm_astd_stop();
 }
 
-static int new_lockspace(char *name, int namelen, void **lockspace,
+static int new_lockspace(const char *name, int namelen, void **lockspace,
 			 uint32_t flags, int lvblen)
 {
 	struct dlm_ls *ls;
@@ -419,16 +419,14 @@
 			break;
 		}
 		ls->ls_create_count++;
-		module_put(THIS_MODULE);
-		error = 1; /* not an error, return 0 */
+		*lockspace = ls;
+		error = 1;
 		break;
 	}
 	spin_unlock(&lslist_lock);
 
-	if (error < 0)
-		goto out;
 	if (error)
-		goto ret_zero;
+		goto out;
 
 	error = -ENOMEM;
 
@@ -583,7 +581,6 @@
 	dlm_create_debug_file(ls);
 
 	log_debug(ls, "join complete");
- ret_zero:
 	*lockspace = ls;
 	return 0;
 
@@ -614,7 +611,7 @@
 	return error;
 }
 
-int dlm_new_lockspace(char *name, int namelen, void **lockspace,
+int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
 		      uint32_t flags, int lvblen)
 {
 	int error = 0;
@@ -628,7 +625,9 @@
 	error = new_lockspace(name, namelen, lockspace, flags, lvblen);
 	if (!error)
 		ls_count++;
-	else if (!ls_count)
+	if (error > 0)
+		error = 0;
+	if (!ls_count)
 		threads_stop();
  out:
 	mutex_unlock(&ls_lock);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 609108a..cdb580a 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -309,6 +309,20 @@
 		lowcomms_write_space(sk);
 }
 
+int dlm_lowcomms_connect_node(int nodeid)
+{
+	struct connection *con;
+
+	if (nodeid == dlm_our_nodeid())
+		return 0;
+
+	con = nodeid2con(nodeid, GFP_NOFS);
+	if (!con)
+		return -ENOMEM;
+	lowcomms_connect_sock(con);
+	return 0;
+}
+
 /* Make a socket active */
 static int add_sock(struct socket *sock, struct connection *con)
 {
@@ -486,7 +500,7 @@
 				return;
 			}
 
-			new_con = nodeid2con(nodeid, GFP_KERNEL);
+			new_con = nodeid2con(nodeid, GFP_NOFS);
 			if (!new_con)
 				return;
 
@@ -722,7 +736,7 @@
 	 *  the same time and the connections cross on the wire.
 	 *  In this case we store the incoming one in "othercon"
 	 */
-	newcon = nodeid2con(nodeid, GFP_KERNEL);
+	newcon = nodeid2con(nodeid, GFP_NOFS);
 	if (!newcon) {
 		result = -ENOMEM;
 		goto accept_err;
@@ -732,7 +746,7 @@
 		struct connection *othercon = newcon->othercon;
 
 		if (!othercon) {
-			othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL);
+			othercon = kmem_cache_zalloc(con_cache, GFP_NOFS);
 			if (!othercon) {
 				log_print("failed to allocate incoming socket");
 				mutex_unlock(&newcon->sock_mutex);
@@ -1421,7 +1435,7 @@
 static void stop_conn(struct connection *con)
 {
 	con->flags |= 0x0F;
-	if (con->sock)
+	if (con->sock && con->sock->sk)
 		con->sock->sk->sk_user_data = NULL;
 }
 
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index a9a9618..1311e64 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2009 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -19,6 +19,7 @@
 int dlm_lowcomms_close(int nodeid);
 void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc);
 void dlm_lowcomms_commit_buffer(void *mh);
+int dlm_lowcomms_connect_node(int nodeid);
 
 #endif				/* __LOWCOMMS_DOT_H__ */
 
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 26133f0..b128775 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2009 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -17,6 +17,7 @@
 #include "recover.h"
 #include "rcom.h"
 #include "config.h"
+#include "lowcomms.h"
 
 static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
 {
@@ -45,9 +46,9 @@
 static int dlm_add_member(struct dlm_ls *ls, int nodeid)
 {
 	struct dlm_member *memb;
-	int w;
+	int w, error;
 
-	memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
+	memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation);
 	if (!memb)
 		return -ENOMEM;
 
@@ -57,6 +58,12 @@
 		return w;
 	}
 
+	error = dlm_lowcomms_connect_node(nodeid);
+	if (error < 0) {
+		kfree(memb);
+		return error;
+	}
+
 	memb->nodeid = nodeid;
 	memb->weight = w;
 	add_ordered_member(ls, memb);
@@ -136,7 +143,7 @@
 
 	ls->ls_total_weight = total;
 
-	array = kmalloc(sizeof(int) * total, GFP_KERNEL);
+	array = kmalloc(sizeof(int) * total, ls->ls_allocation);
 	if (!array)
 		return;
 
@@ -219,7 +226,7 @@
 			continue;
 		log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
 
-		memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
+		memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation);
 		if (!memb)
 			return -ENOMEM;
 		memb->nodeid = rv->new[i];
@@ -334,7 +341,7 @@
 	int *ids = NULL, *new = NULL;
 	int error, ids_count = 0, new_count = 0;
 
-	rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL);
+	rv = kzalloc(sizeof(struct dlm_recover), ls->ls_allocation);
 	if (!rv)
 		return -ENOMEM;
 
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index daa4183..7a2307c 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -35,7 +35,7 @@
 	struct rq_entry *e;
 	int length = ms->m_header.h_length - sizeof(struct dlm_message);
 
-	e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
+	e = kmalloc(sizeof(struct rq_entry) + length, ls->ls_allocation);
 	if (!e) {
 		log_print("dlm_add_requestqueue: out of memory len %d", length);
 		return;
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index fa4c7e7..12d6496 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -27,6 +27,7 @@
 #include <linux/mount.h>
 #include <linux/key.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <linux/file.h>
 #include <linux/crypto.h>
 #include "ecryptfs_kernel.h"
@@ -120,9 +121,13 @@
 {
 	struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb);
 
+	lock_kernel();
+
 	ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat);
 	kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
 	ecryptfs_set_superblock_private(sb, NULL);
+
+	unlock_kernel();
 }
 
 /**
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 2a701d5..3f0e197 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -16,6 +16,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/eventfd.h>
 #include <linux/syscalls.h>
+#include <linux/module.h>
 
 struct eventfd_ctx {
 	wait_queue_head_t wqh;
@@ -56,6 +57,7 @@
 
 	return n;
 }
+EXPORT_SYMBOL_GPL(eventfd_signal);
 
 static int eventfd_release(struct inode *inode, struct file *file)
 {
@@ -197,6 +199,7 @@
 
 	return file;
 }
+EXPORT_SYMBOL_GPL(eventfd_fget);
 
 SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 {
diff --git a/fs/exec.c b/fs/exec.c
index 895823d..e639957 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -33,6 +33,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
+#include <linux/perf_counter.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
 #include <linux/key.h>
@@ -922,6 +923,7 @@
 	task_lock(tsk);
 	strlcpy(tsk->comm, buf, sizeof(tsk->comm));
 	task_unlock(tsk);
+	perf_counter_comm(tsk);
 }
 
 int flush_old_exec(struct linux_binprm * bprm)
@@ -990,6 +992,13 @@
 
 	current->personality &= ~bprm->per_clear;
 
+	/*
+	 * Flush performance counters when crossing a
+	 * security domain:
+	 */
+	if (!get_dumpable(current->mm))
+		perf_counter_exit_task(current);
+
 	/* An exec changes our domain. We are no longer part of the thread
 	   group */
 
@@ -1016,7 +1025,7 @@
 	commit_creds(bprm->cred);
 	bprm->cred = NULL;
 
-	/* cred_exec_mutex must be held at least to this point to prevent
+	/* cred_guard_mutex must be held at least to this point to prevent
 	 * ptrace_attach() from altering our determination of the task's
 	 * credentials; any time after this it may be unlocked */
 
@@ -1026,7 +1035,7 @@
 
 /*
  * determine how safe it is to execute the proposed program
- * - the caller must hold current->cred_exec_mutex to protect against
+ * - the caller must hold current->cred_guard_mutex to protect against
  *   PTRACE_ATTACH
  */
 int check_unsafe_exec(struct linux_binprm *bprm)
@@ -1268,7 +1277,7 @@
 	if (!bprm)
 		goto out_files;
 
-	retval = mutex_lock_interruptible(&current->cred_exec_mutex);
+	retval = mutex_lock_interruptible(&current->cred_guard_mutex);
 	if (retval < 0)
 		goto out_free;
 	current->in_execve = 1;
@@ -1331,7 +1340,7 @@
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_exec_mutex);
+	mutex_unlock(&current->cred_guard_mutex);
 	acct_update_integrals(current);
 	free_bprm(bprm);
 	if (displaced)
@@ -1354,7 +1363,7 @@
 
 out_unlock:
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_exec_mutex);
+	mutex_unlock(&current->cred_guard_mutex);
 
 out_free:
 	free_bprm(bprm);
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index b1512c4..24667ee 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -175,10 +175,4 @@
 
 int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr);
 
-int osd_req_read_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
-
-int osd_req_write_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
-
 #endif /*ifndef __EXOFS_COM_H__*/
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index ba8d9fa..77d0a29 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -59,10 +59,9 @@
 		struct inode *inode)
 {
 	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
-	struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue;
 
 	pcol->sbi = sbi;
-	pcol->req_q = req_q;
+	pcol->req_q = osd_request_queue(sbi->s_dev);
 	pcol->inode = inode;
 	pcol->expected_pages = expected_pages;
 
@@ -266,7 +265,7 @@
 		goto err;
 	}
 
-	osd_req_read(or, &obj, pcol->bio, i_start);
+	osd_req_read(or, &obj, i_start, pcol->bio, pcol->length);
 
 	if (is_sync) {
 		exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred);
@@ -522,7 +521,8 @@
 
 	*pcol_copy = *pcol;
 
-	osd_req_write(or, &obj, pcol_copy->bio, i_start);
+	pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
+	osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length);
 	ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred);
 	if (unlikely(ret)) {
 		EXOFS_ERR("write_exec: exofs_async_op() Faild\n");
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c
index b249ae9..b3d2ccb 100644
--- a/fs/exofs/osd.c
+++ b/fs/exofs/osd.c
@@ -50,10 +50,10 @@
 
 	/* FIXME: should be include in osd_sense_info */
 	if (in_resid)
-		*in_resid = or->in.req ? or->in.req->data_len : 0;
+		*in_resid = or->in.req ? or->in.req->resid_len : 0;
 
 	if (out_resid)
-		*out_resid = or->out.req ? or->out.req->data_len : 0;
+		*out_resid = or->out.req ? or->out.req->resid_len : 0;
 
 	return ret;
 }
@@ -125,29 +125,3 @@
 
 	return -EIO;
 }
-
-int osd_req_read_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
-{
-	struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
-	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
-
-	if (!bio)
-		return -ENOMEM;
-
-	osd_req_read(or, obj, bio, offset);
-	return 0;
-}
-
-int osd_req_write_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
-{
-	struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
-	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
-
-	if (!bio)
-		return -ENOMEM;
-
-	osd_req_write(or, obj, bio, offset);
-	return 0;
-}
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 9f1985e..8216c5b 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -200,20 +200,21 @@
 /*
  * Write the superblock to the OSD
  */
-static void exofs_write_super(struct super_block *sb)
+static int exofs_sync_fs(struct super_block *sb, int wait)
 {
 	struct exofs_sb_info *sbi;
 	struct exofs_fscb *fscb;
 	struct osd_request *or;
 	struct osd_obj_id obj;
-	int ret;
+	int ret = -ENOMEM;
 
 	fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL);
 	if (!fscb) {
 		EXOFS_ERR("exofs_write_super: memory allocation failed.\n");
-		return;
+		return -ENOMEM;
 	}
 
+	lock_super(sb);
 	lock_kernel();
 	sbi = sb->s_fs_info;
 	fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
@@ -246,7 +247,17 @@
 	if (or)
 		osd_end_request(or);
 	unlock_kernel();
+	unlock_super(sb);
 	kfree(fscb);
+	return ret;
+}
+
+static void exofs_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		exofs_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
 }
 
 /*
@@ -258,6 +269,11 @@
 	int num_pend;
 	struct exofs_sb_info *sbi = sb->s_fs_info;
 
+	lock_kernel();
+
+	if (sb->s_dirt)
+		exofs_write_super(sb);
+
 	/* make sure there are no pending commands */
 	for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;
 	     num_pend = atomic_read(&sbi->s_curr_pending)) {
@@ -271,6 +287,8 @@
 	osduld_put_device(sbi->s_dev);
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
+
+	unlock_kernel();
 }
 
 /*
@@ -484,6 +502,7 @@
 	.delete_inode   = exofs_delete_inode,
 	.put_super      = exofs_put_super,
 	.write_super    = exofs_write_super,
+	.sync_fs	= exofs_sync_fs,
 	.statfs         = exofs_statfs,
 };
 
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
index e0b2b43..f42af45 100644
--- a/fs/ext2/Makefile
+++ b/fs/ext2/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_EXT2_FS) += ext2.o
 
-ext2-y := balloc.o dir.o file.o fsync.o ialloc.o inode.o \
+ext2-y := balloc.o dir.o file.o ialloc.o inode.o \
 	  ioctl.o namei.o super.o symlink.o
 
 ext2-$(CONFIG_EXT2_FS_XATTR)	 += xattr.o xattr_user.o xattr_trusted.o
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 2999d72..0035004 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -720,5 +720,5 @@
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext2_compat_ioctl,
 #endif
-	.fsync		= ext2_sync_file,
+	.fsync		= simple_fsync,
 };
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 3203042..b2bbf45 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -113,9 +113,6 @@
 extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
 extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *);
 
-/* fsync.c */
-extern int ext2_sync_file (struct file *, struct dentry *, int);
-
 /* ialloc.c */
 extern struct inode * ext2_new_inode (struct inode *, int);
 extern void ext2_free_inode (struct inode *);
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 45ed071..2b9e47d 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -55,7 +55,7 @@
 	.mmap		= generic_file_mmap,
 	.open		= generic_file_open,
 	.release	= ext2_release_file,
-	.fsync		= ext2_sync_file,
+	.fsync		= simple_fsync,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
 };
@@ -72,7 +72,7 @@
 	.mmap		= xip_file_mmap,
 	.open		= generic_file_open,
 	.release	= ext2_release_file,
-	.fsync		= ext2_sync_file,
+	.fsync		= simple_fsync,
 };
 #endif
 
diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c
deleted file mode 100644
index fc66c93..0000000
--- a/fs/ext2/fsync.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- *  linux/fs/ext2/fsync.c
- *
- *  Copyright (C) 1993  Stephen Tweedie (sct@dcs.ed.ac.uk)
- *  from
- *  Copyright (C) 1992  Remy Card (card@masi.ibp.fr)
- *                      Laboratoire MASI - Institut Blaise Pascal
- *                      Universite Pierre et Marie Curie (Paris VI)
- *  from
- *  linux/fs/minix/truncate.c   Copyright (C) 1991, 1992  Linus Torvalds
- * 
- *  ext2fs fsync primitive
- *
- *  Big-endian to little-endian byte-swapping/bitmaps by
- *        David S. Miller (davem@caip.rutgers.edu), 1995
- * 
- *  Removed unnecessary code duplication for little endian machines
- *  and excessive __inline__s. 
- *        Andi Kleen, 1997
- *
- * Major simplications and cleanup - we only need to do the metadata, because
- * we can depend on generic_block_fdatasync() to sync the data blocks.
- */
-
-#include "ext2.h"
-#include <linux/buffer_head.h>		/* for sync_mapping_buffers() */
-
-
-/*
- *	File may be NULL when we are called. Perhaps we shouldn't
- *	even pass file to fsync ?
- */
-
-int ext2_sync_file(struct file *file, struct dentry *dentry, int datasync)
-{
-	struct inode *inode = dentry->d_inode;
-	int err;
-	int ret;
-
-	ret = sync_mapping_buffers(inode->i_mapping);
-	if (!(inode->i_state & I_DIRTY))
-		return ret;
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		return ret;
-
-	err = ext2_sync_inode(inode);
-	if (ret == 0)
-		ret = err;
-	return ret;
-}
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index acf6788..29ed682 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -41,8 +41,6 @@
 MODULE_DESCRIPTION("Second Extended Filesystem");
 MODULE_LICENSE("GPL");
 
-static int ext2_update_inode(struct inode * inode, int do_sync);
-
 /*
  * Test whether an inode is a fast symlink.
  */
@@ -66,7 +64,7 @@
 		goto no_delete;
 	EXT2_I(inode)->i_dtime	= get_seconds();
 	mark_inode_dirty(inode);
-	ext2_update_inode(inode, inode_needs_sync(inode));
+	ext2_write_inode(inode, inode_needs_sync(inode));
 
 	inode->i_size = 0;
 	if (inode->i_blocks)
@@ -1337,7 +1335,7 @@
 	return ERR_PTR(ret);
 }
 
-static int ext2_update_inode(struct inode * inode, int do_sync)
+int ext2_write_inode(struct inode *inode, int do_sync)
 {
 	struct ext2_inode_info *ei = EXT2_I(inode);
 	struct super_block *sb = inode->i_sb;
@@ -1442,11 +1440,6 @@
 	return err;
 }
 
-int ext2_write_inode(struct inode *inode, int wait)
-{
-	return ext2_update_inode(inode, wait);
-}
-
 int ext2_sync_inode(struct inode *inode)
 {
 	struct writeback_control wbc = {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 5c4afe6..4589996 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -42,6 +42,7 @@
 			    struct ext2_super_block *es);
 static int ext2_remount (struct super_block * sb, int * flags, char * data);
 static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
+static int ext2_sync_fs(struct super_block *sb, int wait);
 
 void ext2_error (struct super_block * sb, const char * function,
 		 const char * fmt, ...)
@@ -114,6 +115,11 @@
 	int i;
 	struct ext2_sb_info *sbi = EXT2_SB(sb);
 
+	lock_kernel();
+
+	if (sb->s_dirt)
+		ext2_write_super(sb);
+
 	ext2_xattr_put_super(sb);
 	if (!(sb->s_flags & MS_RDONLY)) {
 		struct ext2_super_block *es = sbi->s_es;
@@ -135,7 +141,7 @@
 	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
 
-	return;
+	unlock_kernel();
 }
 
 static struct kmem_cache * ext2_inode_cachep;
@@ -304,6 +310,7 @@
 	.delete_inode	= ext2_delete_inode,
 	.put_super	= ext2_put_super,
 	.write_super	= ext2_write_super,
+	.sync_fs	= ext2_sync_fs,
 	.statfs		= ext2_statfs,
 	.remount_fs	= ext2_remount,
 	.clear_inode	= ext2_clear_inode,
@@ -1093,6 +1100,7 @@
 	brelse(bh);
 failed_sbi:
 	sb->s_fs_info = NULL;
+	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
 	return ret;
 }
@@ -1126,25 +1134,36 @@
  * set s_state to EXT2_VALID_FS after some corrections.
  */
 
-void ext2_write_super (struct super_block * sb)
+static int ext2_sync_fs(struct super_block *sb, int wait)
 {
-	struct ext2_super_block * es;
-	lock_kernel();
-	if (!(sb->s_flags & MS_RDONLY)) {
-		es = EXT2_SB(sb)->s_es;
+	struct ext2_super_block *es = EXT2_SB(sb)->s_es;
 
-		if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
-			ext2_debug ("setting valid to 0\n");
-			es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
-			es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
-			es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
-			es->s_mtime = cpu_to_le32(get_seconds());
-			ext2_sync_super(sb, es);
-		} else
-			ext2_commit_super (sb, es);
+	lock_kernel();
+	if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
+		ext2_debug("setting valid to 0\n");
+		es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
+		es->s_free_blocks_count =
+			cpu_to_le32(ext2_count_free_blocks(sb));
+		es->s_free_inodes_count =
+			cpu_to_le32(ext2_count_free_inodes(sb));
+		es->s_mtime = cpu_to_le32(get_seconds());
+		ext2_sync_super(sb, es);
+	} else {
+		ext2_commit_super(sb, es);
 	}
 	sb->s_dirt = 0;
 	unlock_kernel();
+
+	return 0;
+}
+
+
+void ext2_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		ext2_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
 }
 
 static int ext2_remount (struct super_block * sb, int * flags, char * data)
@@ -1156,6 +1175,8 @@
 	unsigned long old_sb_flags;
 	int err;
 
+	lock_kernel();
+
 	/* Store the old options */
 	old_sb_flags = sb->s_flags;
 	old_opts.s_mount_opt = sbi->s_mount_opt;
@@ -1191,12 +1212,16 @@
 		sbi->s_mount_opt &= ~EXT2_MOUNT_XIP;
 		sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP;
 	}
-	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
+		unlock_kernel();
 		return 0;
+	}
 	if (*flags & MS_RDONLY) {
 		if (le16_to_cpu(es->s_state) & EXT2_VALID_FS ||
-		    !(sbi->s_mount_state & EXT2_VALID_FS))
+		    !(sbi->s_mount_state & EXT2_VALID_FS)) {
+			unlock_kernel();
 			return 0;
+		}
 		/*
 		 * OK, we are remounting a valid rw partition rdonly, so set
 		 * the rdonly flag and then mark the partition as valid again.
@@ -1223,12 +1248,14 @@
 			sb->s_flags &= ~MS_RDONLY;
 	}
 	ext2_sync_super(sb, es);
+	unlock_kernel();
 	return 0;
 restore_opts:
 	sbi->s_mount_opt = old_opts.s_mount_opt;
 	sbi->s_resuid = old_opts.s_resuid;
 	sbi->s_resgid = old_opts.s_resgid;
 	sb->s_flags = old_sb_flags;
+	unlock_kernel();
 	return err;
 }
 
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 225202d..27967f9 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -649,7 +649,7 @@
 		count = overflow;
 		goto do_more;
 	}
-	sb->s_dirt = 1;
+
 error_return:
 	brelse(bitmap_bh);
 	ext3_std_error(sb, err);
@@ -1708,7 +1708,6 @@
 	if (!fatal)
 		fatal = err;
 
-	sb->s_dirt = 1;
 	if (fatal)
 		goto out;
 
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index dd13d60..b399912 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -181,7 +181,7 @@
 	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
 	if (!fatal)
 		fatal = err;
-	sb->s_dirt = 1;
+
 error_return:
 	brelse(bitmap_bh);
 	ext3_std_error(sb, fatal);
@@ -537,7 +537,6 @@
 	percpu_counter_dec(&sbi->s_freeinodes_counter);
 	if (S_ISDIR(mode))
 		percpu_counter_inc(&sbi->s_dirs_counter);
-	sb->s_dirt = 1;
 
 	inode->i_uid = current_fsuid();
 	if (test_opt (sb, GRPID))
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index fcfa243..b0248c6 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2960,7 +2960,6 @@
 				ext3_update_dynamic_rev(sb);
 				EXT3_SET_RO_COMPAT_FEATURE(sb,
 					EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
-				sb->s_dirt = 1;
 				handle->h_sync = 1;
 				err = ext3_journal_dirty_metadata(handle,
 						EXT3_SB(sb)->s_sbh);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 78fdf38..8a0b263 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -934,7 +934,6 @@
 			   EXT3_INODES_PER_GROUP(sb));
 
 	ext3_journal_dirty_metadata(handle, sbi->s_sbh);
-	sb->s_dirt = 1;
 
 exit_journal:
 	unlock_super(sb);
@@ -1066,7 +1065,6 @@
 	}
 	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
 	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
-	sb->s_dirt = 1;
 	unlock_super(sb);
 	ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
 		   o_blocks_count + add);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 599dbfe..26aa64d 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -67,7 +67,6 @@
 static int ext3_remount (struct super_block * sb, int * flags, char * data);
 static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
 static int ext3_unfreeze(struct super_block *sb);
-static void ext3_write_super (struct super_block * sb);
 static int ext3_freeze(struct super_block *sb);
 
 /*
@@ -399,6 +398,8 @@
 	struct ext3_super_block *es = sbi->s_es;
 	int i, err;
 
+	lock_kernel();
+
 	ext3_xattr_put_super(sb);
 	err = journal_destroy(sbi->s_journal);
 	sbi->s_journal = NULL;
@@ -447,7 +448,8 @@
 	sb->s_fs_info = NULL;
 	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
-	return;
+
+	unlock_kernel();
 }
 
 static struct kmem_cache *ext3_inode_cachep;
@@ -761,7 +763,6 @@
 	.dirty_inode	= ext3_dirty_inode,
 	.delete_inode	= ext3_delete_inode,
 	.put_super	= ext3_put_super,
-	.write_super	= ext3_write_super,
 	.sync_fs	= ext3_sync_fs,
 	.freeze_fs	= ext3_freeze,
 	.unfreeze_fs	= ext3_unfreeze,
@@ -1696,7 +1697,7 @@
 		goto failed_mount;
 	}
 
-	hblock = bdev_hardsect_size(sb->s_bdev);
+	hblock = bdev_logical_block_size(sb->s_bdev);
 	if (sb->s_blocksize != blocksize) {
 		/*
 		 * Make sure the blocksize for the filesystem is larger
@@ -1785,7 +1786,6 @@
 #else
 		es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
 #endif
-		sb->s_dirt = 1;
 	}
 
 	if (sbi->s_blocks_per_group > blocksize * 8) {
@@ -2021,6 +2021,7 @@
 	brelse(bh);
 out_fail:
 	sb->s_fs_info = NULL;
+	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
 	lock_kernel();
 	return ret;
@@ -2119,7 +2120,7 @@
 	}
 
 	blocksize = sb->s_blocksize;
-	hblock = bdev_hardsect_size(bdev);
+	hblock = bdev_logical_block_size(bdev);
 	if (blocksize < hblock) {
 		printk(KERN_ERR
 			"EXT3-fs: blocksize too small for journal device.\n");
@@ -2264,7 +2265,6 @@
 	if (journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
 		es->s_journal_dev = cpu_to_le32(journal_devnum);
-		sb->s_dirt = 1;
 
 		/* Make sure we flush the recovery flag to disk. */
 		ext3_commit_super(sb, es, 1);
@@ -2307,7 +2307,6 @@
 	EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
 
 	es->s_journal_inum = cpu_to_le32(journal_inum);
-	sb->s_dirt = 1;
 
 	/* Make sure we flush the recovery flag to disk. */
 	ext3_commit_super(sb, es, 1);
@@ -2353,7 +2352,6 @@
 	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
 	    sb->s_flags & MS_RDONLY) {
 		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-		sb->s_dirt = 0;
 		ext3_commit_super(sb, es, 1);
 	}
 	unlock_super(sb);
@@ -2412,29 +2410,14 @@
 		return 0;
 
 	journal = EXT3_SB(sb)->s_journal;
-	sb->s_dirt = 0;
 	ret = ext3_journal_force_commit(journal);
 	return ret;
 }
 
-/*
- * Ext3 always journals updates to the superblock itself, so we don't
- * have to propagate any other updates to the superblock on disk at this
- * point.  (We can probably nuke this function altogether, and remove
- * any mention to sb->s_dirt in all of fs/ext3; eventual cleanup...)
- */
-static void ext3_write_super (struct super_block * sb)
-{
-	if (mutex_trylock(&sb->s_lock) != 0)
-		BUG();
-	sb->s_dirt = 0;
-}
-
 static int ext3_sync_fs(struct super_block *sb, int wait)
 {
 	tid_t target;
 
-	sb->s_dirt = 0;
 	if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
 		if (wait)
 			log_wait_commit(EXT3_SB(sb)->s_journal, target);
@@ -2450,7 +2433,6 @@
 {
 	int error = 0;
 	journal_t *journal;
-	sb->s_dirt = 0;
 
 	if (!(sb->s_flags & MS_RDONLY)) {
 		journal = EXT3_SB(sb)->s_journal;
@@ -2508,7 +2490,10 @@
 	int i;
 #endif
 
+	lock_kernel();
+
 	/* Store the original options */
+	lock_super(sb);
 	old_sb_flags = sb->s_flags;
 	old_opts.s_mount_opt = sbi->s_mount_opt;
 	old_opts.s_resuid = sbi->s_resuid;
@@ -2616,6 +2601,8 @@
 		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
 			kfree(old_opts.s_qf_names[i]);
 #endif
+	unlock_super(sb);
+	unlock_kernel();
 	return 0;
 restore_opts:
 	sb->s_flags = old_sb_flags;
@@ -2632,6 +2619,8 @@
 		sbi->s_qf_names[i] = old_opts.s_qf_names[i];
 	}
 #endif
+	unlock_super(sb);
+	unlock_kernel();
 	return err;
 }
 
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 83b7be8..545e37c 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -463,7 +463,6 @@
 
 	if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) {
 		EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR);
-		sb->s_dirt = 1;
 		ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
 	}
 }
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index a8ff003..8a34710 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -5,8 +5,8 @@
 obj-$(CONFIG_EXT4_FS) += ext4.o
 
 ext4-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-		   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
-		   ext4_jbd2.o migrate.o mballoc.o
+		ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
+		ext4_jbd2.o migrate.o mballoc.o block_validity.o
 
 ext4-$(CONFIG_EXT4_FS_XATTR)		+= xattr.o xattr_user.o xattr_trusted.o
 ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 53c72ad..e2126d7 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -19,7 +19,6 @@
 #include <linux/buffer_head.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
-#include "group.h"
 #include "mballoc.h"
 
 /*
@@ -88,6 +87,7 @@
 		 ext4_group_t block_group, struct ext4_group_desc *gdp)
 {
 	int bit, bit_max;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	unsigned free_blocks, group_blocks;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
@@ -123,7 +123,7 @@
 		bit_max += ext4_bg_num_gdb(sb, block_group);
 	}
 
-	if (block_group == sbi->s_groups_count - 1) {
+	if (block_group == ngroups - 1) {
 		/*
 		 * Even though mke2fs always initialize first and last group
 		 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
@@ -131,7 +131,7 @@
 		 */
 		group_blocks = ext4_blocks_count(sbi->s_es) -
 			le32_to_cpu(sbi->s_es->s_first_data_block) -
-			(EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1));
+			(EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1));
 	} else {
 		group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
 	}
@@ -205,18 +205,18 @@
 {
 	unsigned int group_desc;
 	unsigned int offset;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	struct ext4_group_desc *desc;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-	if (block_group >= sbi->s_groups_count) {
+	if (block_group >= ngroups) {
 		ext4_error(sb, "ext4_get_group_desc",
 			   "block_group >= groups_count - "
 			   "block_group = %u, groups_count = %u",
-			   block_group, sbi->s_groups_count);
+			   block_group, ngroups);
 
 		return NULL;
 	}
-	smp_rmb();
 
 	group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
 	offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
@@ -326,16 +326,16 @@
 		unlock_buffer(bh);
 		return bh;
 	}
-	spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+	ext4_lock_group(sb, block_group);
 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 		ext4_init_block_bitmap(sb, bh, block_group, desc);
 		set_bitmap_uptodate(bh);
 		set_buffer_uptodate(bh);
-		spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+		ext4_unlock_group(sb, block_group);
 		unlock_buffer(bh);
 		return bh;
 	}
-	spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+	ext4_unlock_group(sb, block_group);
 	if (buffer_uptodate(bh)) {
 		/*
 		 * if not uninit if bh is uptodate,
@@ -451,7 +451,7 @@
 	down_write(&grp->alloc_sem);
 	for (i = 0, blocks_freed = 0; i < count; i++) {
 		BUFFER_TRACE(bitmap_bh, "clear bit");
-		if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+		if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
 						bit + i, bitmap_bh->b_data)) {
 			ext4_error(sb, __func__,
 				   "bit already cleared for block %llu",
@@ -461,11 +461,11 @@
 			blocks_freed++;
 		}
 	}
-	spin_lock(sb_bgl_lock(sbi, block_group));
+	ext4_lock_group(sb, block_group);
 	blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
 	ext4_free_blks_set(sb, desc, blk_free_count);
 	desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
-	spin_unlock(sb_bgl_lock(sbi, block_group));
+	ext4_unlock_group(sb, block_group);
 	percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
 
 	if (sbi->s_log_groups_per_flex) {
@@ -665,7 +665,7 @@
 	ext4_fsblk_t desc_count;
 	struct ext4_group_desc *gdp;
 	ext4_group_t i;
-	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 #ifdef EXT4FS_DEBUG
 	struct ext4_super_block *es;
 	ext4_fsblk_t bitmap_count;
@@ -677,7 +677,6 @@
 	bitmap_count = 0;
 	gdp = NULL;
 
-	smp_rmb();
 	for (i = 0; i < ngroups; i++) {
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
@@ -700,7 +699,6 @@
 	return bitmap_count;
 #else
 	desc_count = 0;
-	smp_rmb();
 	for (i = 0; i < ngroups; i++) {
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
new file mode 100644
index 0000000..50784ef
--- /dev/null
+++ b/fs/ext4/block_validity.c
@@ -0,0 +1,244 @@
+/*
+ *  linux/fs/ext4/block_validity.c
+ *
+ * Copyright (C) 2009
+ * Theodore Ts'o (tytso@mit.edu)
+ *
+ * Track which blocks in the filesystem are metadata blocks that
+ * should never be used as data blocks by files or directories.
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/mutex.h>
+#include "ext4.h"
+
+struct ext4_system_zone {
+	struct rb_node	node;
+	ext4_fsblk_t	start_blk;
+	unsigned int	count;
+};
+
+static struct kmem_cache *ext4_system_zone_cachep;
+
+int __init init_ext4_system_zone(void)
+{
+	ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
+					     SLAB_RECLAIM_ACCOUNT);
+	if (ext4_system_zone_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+void exit_ext4_system_zone(void)
+{
+	kmem_cache_destroy(ext4_system_zone_cachep);
+}
+
+static inline int can_merge(struct ext4_system_zone *entry1,
+		     struct ext4_system_zone *entry2)
+{
+	if ((entry1->start_blk + entry1->count) == entry2->start_blk)
+		return 1;
+	return 0;
+}
+
+/*
+ * Mark a range of blocks as belonging to the "system zone" --- that
+ * is, filesystem metadata blocks which should never be used by
+ * inodes.
+ */
+static int add_system_zone(struct ext4_sb_info *sbi,
+			   ext4_fsblk_t start_blk,
+			   unsigned int count)
+{
+	struct ext4_system_zone *new_entry = NULL, *entry;
+	struct rb_node **n = &sbi->system_blks.rb_node, *node;
+	struct rb_node *parent = NULL, *new_node = NULL;
+
+	while (*n) {
+		parent = *n;
+		entry = rb_entry(parent, struct ext4_system_zone, node);
+		if (start_blk < entry->start_blk)
+			n = &(*n)->rb_left;
+		else if (start_blk >= (entry->start_blk + entry->count))
+			n = &(*n)->rb_right;
+		else {
+			if (start_blk + count > (entry->start_blk + 
+						 entry->count))
+				entry->count = (start_blk + count - 
+						entry->start_blk);
+			new_node = *n;
+			new_entry = rb_entry(new_node, struct ext4_system_zone,
+					     node);
+			break;
+		}
+	}
+
+	if (!new_entry) {
+		new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
+					     GFP_KERNEL);
+		if (!new_entry)
+			return -ENOMEM;
+		new_entry->start_blk = start_blk;
+		new_entry->count = count;
+		new_node = &new_entry->node;
+
+		rb_link_node(new_node, parent, n);
+		rb_insert_color(new_node, &sbi->system_blks);
+	}
+
+	/* Can we merge to the left? */
+	node = rb_prev(new_node);
+	if (node) {
+		entry = rb_entry(node, struct ext4_system_zone, node);
+		if (can_merge(entry, new_entry)) {
+			new_entry->start_blk = entry->start_blk;
+			new_entry->count += entry->count;
+			rb_erase(node, &sbi->system_blks);
+			kmem_cache_free(ext4_system_zone_cachep, entry);
+		}
+	}
+
+	/* Can we merge to the right? */
+	node = rb_next(new_node);
+	if (node) {
+		entry = rb_entry(node, struct ext4_system_zone, node);
+		if (can_merge(new_entry, entry)) {
+			new_entry->count += entry->count;
+			rb_erase(node, &sbi->system_blks);
+			kmem_cache_free(ext4_system_zone_cachep, entry);
+		}
+	}
+	return 0;
+}
+
+static void debug_print_tree(struct ext4_sb_info *sbi)
+{
+	struct rb_node *node;
+	struct ext4_system_zone *entry;
+	int first = 1;
+
+	printk(KERN_INFO "System zones: ");
+	node = rb_first(&sbi->system_blks);
+	while (node) {
+		entry = rb_entry(node, struct ext4_system_zone, node);
+		printk("%s%llu-%llu", first ? "" : ", ",
+		       entry->start_blk, entry->start_blk + entry->count - 1);
+		first = 0;
+		node = rb_next(node);
+	}
+	printk("\n");
+}
+
+int ext4_setup_system_zone(struct super_block *sb)
+{
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_group_desc *gdp;
+	ext4_group_t i;
+	int flex_size = ext4_flex_bg_size(sbi);
+	int ret;
+
+	if (!test_opt(sb, BLOCK_VALIDITY)) {
+		if (EXT4_SB(sb)->system_blks.rb_node)
+			ext4_release_system_zone(sb);
+		return 0;
+	}
+	if (EXT4_SB(sb)->system_blks.rb_node)
+		return 0;
+
+	for (i=0; i < ngroups; i++) {
+		if (ext4_bg_has_super(sb, i) &&
+		    ((i < 5) || ((i % flex_size) == 0)))
+			add_system_zone(sbi, ext4_group_first_block_no(sb, i),
+					sbi->s_gdb_count + 1);
+		gdp = ext4_get_group_desc(sb, i, NULL);
+		ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
+		if (ret)
+			return ret;
+		ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
+		if (ret)
+			return ret;
+		ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
+				sbi->s_itb_per_group);
+		if (ret)
+			return ret;
+	}
+
+	if (test_opt(sb, DEBUG))
+		debug_print_tree(EXT4_SB(sb));
+	return 0;
+}
+
+/* Called when the filesystem is unmounted */
+void ext4_release_system_zone(struct super_block *sb)
+{
+	struct rb_node	*n = EXT4_SB(sb)->system_blks.rb_node;
+	struct rb_node	*parent;
+	struct ext4_system_zone	*entry;
+
+	while (n) {
+		/* Do the node's children first */
+		if (n->rb_left) {
+			n = n->rb_left;
+			continue;
+		}
+		if (n->rb_right) {
+			n = n->rb_right;
+			continue;
+		}
+		/*
+		 * The node has no children; free it, and then zero
+		 * out parent's link to it.  Finally go to the
+		 * beginning of the loop and try to free the parent
+		 * node.
+		 */
+		parent = rb_parent(n);
+		entry = rb_entry(n, struct ext4_system_zone, node);
+		kmem_cache_free(ext4_system_zone_cachep, entry);
+		if (!parent)
+			EXT4_SB(sb)->system_blks.rb_node = NULL;
+		else if (parent->rb_left == n)
+			parent->rb_left = NULL;
+		else if (parent->rb_right == n)
+			parent->rb_right = NULL;
+		n = parent;
+	}
+	EXT4_SB(sb)->system_blks.rb_node = NULL;
+}
+
+/*
+ * Returns 1 if the passed-in block region (start_blk,
+ * start_blk+count) is valid; 0 if some part of the block region
+ * overlaps with filesystem metadata blocks.
+ */
+int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
+			  unsigned int count)
+{
+	struct ext4_system_zone *entry;
+	struct rb_node *n = sbi->system_blks.rb_node;
+
+	if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
+	    (start_blk + count > ext4_blocks_count(sbi->s_es)))
+		return 0;
+	while (n) {
+		entry = rb_entry(n, struct ext4_system_zone, node);
+		if (start_blk + count - 1 < entry->start_blk)
+			n = n->rb_left;
+		else if (start_blk >= (entry->start_blk + entry->count))
+			n = n->rb_right;
+		else
+			return 0;
+	}
+	return 1;
+}
+
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index b647899..9dc93168 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -131,8 +131,7 @@
 		struct buffer_head *bh = NULL;
 
 		map_bh.b_state = 0;
-		err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh,
-						0, 0, 0);
+		err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0);
 		if (err > 0) {
 			pgoff_t index = map_bh.b_blocknr >>
 					(PAGE_CACHE_SHIFT - inode->i_blkbits);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d0f15ef..cc7d5ed 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -21,7 +21,14 @@
 #include <linux/magic.h>
 #include <linux/jbd2.h>
 #include <linux/quota.h>
-#include "ext4_i.h"
+#include <linux/rwsem.h>
+#include <linux/rbtree.h>
+#include <linux/seqlock.h>
+#include <linux/mutex.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+#include <linux/blockgroup_lock.h>
+#include <linux/percpu_counter.h>
 
 /*
  * The fourth extended filesystem constants/structures
@@ -46,6 +53,19 @@
 #define ext4_debug(f, a...)	do {} while (0)
 #endif
 
+/* data type for block offset of block group */
+typedef int ext4_grpblk_t;
+
+/* data type for filesystem-wide blocks number */
+typedef unsigned long long ext4_fsblk_t;
+
+/* data type for file logical block number */
+typedef __u32 ext4_lblk_t;
+
+/* data type for block group number */
+typedef unsigned int ext4_group_t;
+
+
 /* prefer goal again. length */
 #define EXT4_MB_HINT_MERGE		1
 /* blocks already reserved */
@@ -179,9 +199,6 @@
 #define EXT4_BG_BLOCK_UNINIT	0x0002 /* Block bitmap not in use */
 #define EXT4_BG_INODE_ZEROED	0x0004 /* On-disk itable initialized to zero */
 
-#ifdef __KERNEL__
-#include "ext4_sb.h"
-#endif
 /*
  * Macro-instructions used to manage group descriptors
  */
@@ -297,10 +314,23 @@
 };
 
 /*
- * Following is used by preallocation code to tell get_blocks() that we
- * want uninitialzed extents.
+ * Flags used by ext4_get_blocks()
  */
-#define EXT4_CREATE_UNINITIALIZED_EXT		2
+	/* Allocate any needed blocks and/or convert an unitialized
+	   extent to be an initialized ext4 */
+#define EXT4_GET_BLOCKS_CREATE			0x0001
+	/* Request the creation of an unitialized extent */
+#define EXT4_GET_BLOCKS_UNINIT_EXT		0x0002
+#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT	(EXT4_GET_BLOCKS_UNINIT_EXT|\
+						 EXT4_GET_BLOCKS_CREATE)
+	/* Caller is from the delayed allocation writeout path,
+	   so set the magic i_delalloc_reserve_flag after taking the 
+	   inode allocation semaphore for */
+#define EXT4_GET_BLOCKS_DELALLOC_RESERVE	0x0004
+	/* Call ext4_da_update_reserve_space() after successfully 
+	   allocating the blocks */
+#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE	0x0008
+
 
 /*
  * ioctl commands
@@ -516,6 +546,110 @@
 #endif /* defined(__KERNEL__) || defined(__linux__) */
 
 /*
+ * storage for cached extent
+ */
+struct ext4_ext_cache {
+	ext4_fsblk_t	ec_start;
+	ext4_lblk_t	ec_block;
+	__u32		ec_len; /* must be 32bit to return holes */
+	__u32		ec_type;
+};
+
+/*
+ * fourth extended file system inode data in memory
+ */
+struct ext4_inode_info {
+	__le32	i_data[15];	/* unconverted */
+	__u32	i_flags;
+	ext4_fsblk_t	i_file_acl;
+	__u32	i_dtime;
+
+	/*
+	 * i_block_group is the number of the block group which contains
+	 * this file's inode.  Constant across the lifetime of the inode,
+	 * it is ued for making block allocation decisions - we try to
+	 * place a file's data blocks near its inode block, and new inodes
+	 * near to their parent directory's inode.
+	 */
+	ext4_group_t	i_block_group;
+	__u32	i_state;		/* Dynamic state flags for ext4 */
+
+	ext4_lblk_t		i_dir_start_lookup;
+#ifdef CONFIG_EXT4_FS_XATTR
+	/*
+	 * Extended attributes can be read independently of the main file
+	 * data. Taking i_mutex even when reading would cause contention
+	 * between readers of EAs and writers of regular file data, so
+	 * instead we synchronize on xattr_sem when reading or changing
+	 * EAs.
+	 */
+	struct rw_semaphore xattr_sem;
+#endif
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
+	struct posix_acl	*i_acl;
+	struct posix_acl	*i_default_acl;
+#endif
+
+	struct list_head i_orphan;	/* unlinked but open inodes */
+
+	/*
+	 * i_disksize keeps track of what the inode size is ON DISK, not
+	 * in memory.  During truncate, i_size is set to the new size by
+	 * the VFS prior to calling ext4_truncate(), but the filesystem won't
+	 * set i_disksize to 0 until the truncate is actually under way.
+	 *
+	 * The intent is that i_disksize always represents the blocks which
+	 * are used by this file.  This allows recovery to restart truncate
+	 * on orphans if we crash during truncate.  We actually write i_disksize
+	 * into the on-disk inode when writing inodes out, instead of i_size.
+	 *
+	 * The only time when i_disksize and i_size may be different is when
+	 * a truncate is in progress.  The only things which change i_disksize
+	 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
+	 */
+	loff_t	i_disksize;
+
+	/*
+	 * i_data_sem is for serialising ext4_truncate() against
+	 * ext4_getblock().  In the 2.4 ext2 design, great chunks of inode's
+	 * data tree are chopped off during truncate. We can't do that in
+	 * ext4 because whenever we perform intermediate commits during
+	 * truncate, the inode and all the metadata blocks *must* be in a
+	 * consistent state which allows truncation of the orphans to restart
+	 * during recovery.  Hence we must fix the get_block-vs-truncate race
+	 * by other means, so we have i_data_sem.
+	 */
+	struct rw_semaphore i_data_sem;
+	struct inode vfs_inode;
+	struct jbd2_inode jinode;
+
+	struct ext4_ext_cache i_cached_extent;
+	/*
+	 * File creation time. Its function is same as that of
+	 * struct timespec i_{a,c,m}time in the generic inode.
+	 */
+	struct timespec i_crtime;
+
+	/* mballoc */
+	struct list_head i_prealloc_list;
+	spinlock_t i_prealloc_lock;
+
+	/* ialloc */
+	ext4_group_t	i_last_alloc_group;
+
+	/* allocation reservation info for delalloc */
+	unsigned int i_reserved_data_blocks;
+	unsigned int i_reserved_meta_blocks;
+	unsigned int i_allocated_meta_blocks;
+	unsigned short i_delalloc_reserved_flag;
+
+	/* on-disk additional length */
+	__u16 i_extra_isize;
+
+	spinlock_t i_block_reservation_lock;
+};
+
+/*
  * File system states
  */
 #define	EXT4_VALID_FS			0x0001	/* Unmounted cleanly */
@@ -560,6 +694,7 @@
 #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
 #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
 #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
+#define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
 
 /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H
@@ -689,6 +824,137 @@
 };
 
 #ifdef __KERNEL__
+/*
+ * fourth extended-fs super-block data in memory
+ */
+struct ext4_sb_info {
+	unsigned long s_desc_size;	/* Size of a group descriptor in bytes */
+	unsigned long s_inodes_per_block;/* Number of inodes per block */
+	unsigned long s_blocks_per_group;/* Number of blocks in a group */
+	unsigned long s_inodes_per_group;/* Number of inodes in a group */
+	unsigned long s_itb_per_group;	/* Number of inode table blocks per group */
+	unsigned long s_gdb_count;	/* Number of group descriptor blocks */
+	unsigned long s_desc_per_block;	/* Number of group descriptors per block */
+	ext4_group_t s_groups_count;	/* Number of groups in the fs */
+	unsigned long s_overhead_last;  /* Last calculated overhead */
+	unsigned long s_blocks_last;    /* Last seen block count */
+	loff_t s_bitmap_maxbytes;	/* max bytes for bitmap files */
+	struct buffer_head * s_sbh;	/* Buffer containing the super block */
+	struct ext4_super_block *s_es;	/* Pointer to the super block in the buffer */
+	struct buffer_head **s_group_desc;
+	unsigned long  s_mount_opt;
+	ext4_fsblk_t s_sb_block;
+	uid_t s_resuid;
+	gid_t s_resgid;
+	unsigned short s_mount_state;
+	unsigned short s_pad;
+	int s_addr_per_block_bits;
+	int s_desc_per_block_bits;
+	int s_inode_size;
+	int s_first_ino;
+	unsigned int s_inode_readahead_blks;
+	spinlock_t s_next_gen_lock;
+	u32 s_next_generation;
+	u32 s_hash_seed[4];
+	int s_def_hash_version;
+	int s_hash_unsigned;	/* 3 if hash should be signed, 0 if not */
+	struct percpu_counter s_freeblocks_counter;
+	struct percpu_counter s_freeinodes_counter;
+	struct percpu_counter s_dirs_counter;
+	struct percpu_counter s_dirtyblocks_counter;
+	struct blockgroup_lock *s_blockgroup_lock;
+	struct proc_dir_entry *s_proc;
+	struct kobject s_kobj;
+	struct completion s_kobj_unregister;
+
+	/* Journaling */
+	struct inode *s_journal_inode;
+	struct journal_s *s_journal;
+	struct list_head s_orphan;
+	struct mutex s_orphan_lock;
+	struct mutex s_resize_lock;
+	unsigned long s_commit_interval;
+	u32 s_max_batch_time;
+	u32 s_min_batch_time;
+	struct block_device *journal_bdev;
+#ifdef CONFIG_JBD2_DEBUG
+	struct timer_list turn_ro_timer;	/* For turning read-only (crash simulation) */
+	wait_queue_head_t ro_wait_queue;	/* For people waiting for the fs to go read-only */
+#endif
+#ifdef CONFIG_QUOTA
+	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
+	int s_jquota_fmt;			/* Format of quota to use */
+#endif
+	unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
+	struct rb_root system_blks;
+
+#ifdef EXTENTS_STATS
+	/* ext4 extents stats */
+	unsigned long s_ext_min;
+	unsigned long s_ext_max;
+	unsigned long s_depth_max;
+	spinlock_t s_ext_stats_lock;
+	unsigned long s_ext_blocks;
+	unsigned long s_ext_extents;
+#endif
+
+	/* for buddy allocator */
+	struct ext4_group_info ***s_group_info;
+	struct inode *s_buddy_cache;
+	long s_blocks_reserved;
+	spinlock_t s_reserve_lock;
+	spinlock_t s_md_lock;
+	tid_t s_last_transaction;
+	unsigned short *s_mb_offsets;
+	unsigned int *s_mb_maxs;
+
+	/* tunables */
+	unsigned long s_stripe;
+	unsigned int s_mb_stream_request;
+	unsigned int s_mb_max_to_scan;
+	unsigned int s_mb_min_to_scan;
+	unsigned int s_mb_stats;
+	unsigned int s_mb_order2_reqs;
+	unsigned int s_mb_group_prealloc;
+	/* where last allocation was done - for stream allocation */
+	unsigned long s_mb_last_group;
+	unsigned long s_mb_last_start;
+
+	/* history to debug policy */
+	struct ext4_mb_history *s_mb_history;
+	int s_mb_history_cur;
+	int s_mb_history_max;
+	int s_mb_history_num;
+	spinlock_t s_mb_history_lock;
+	int s_mb_history_filter;
+
+	/* stats for buddy allocator */
+	spinlock_t s_mb_pa_lock;
+	atomic_t s_bal_reqs;	/* number of reqs with len > 1 */
+	atomic_t s_bal_success;	/* we found long enough chunks */
+	atomic_t s_bal_allocated;	/* in blocks */
+	atomic_t s_bal_ex_scanned;	/* total extents scanned */
+	atomic_t s_bal_goals;	/* goal hits */
+	atomic_t s_bal_breaks;	/* too long searches */
+	atomic_t s_bal_2orders;	/* 2^order hits */
+	spinlock_t s_bal_lock;
+	unsigned long s_mb_buddies_generated;
+	unsigned long long s_mb_generation_time;
+	atomic_t s_mb_lost_chunks;
+	atomic_t s_mb_preallocated;
+	atomic_t s_mb_discarded;
+
+	/* locality groups */
+	struct ext4_locality_group *s_locality_groups;
+
+	/* for write statistics */
+	unsigned long s_sectors_written_start;
+	u64 s_kbytes_written;
+
+	unsigned int s_log_groups_per_flex;
+	struct flex_groups *s_flex_groups;
+};
+
 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
 {
 	return sb->s_fs_info;
@@ -704,7 +970,6 @@
 		current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
 }
 
-
 static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 {
 	return ino == EXT4_ROOT_INO ||
@@ -1014,6 +1279,14 @@
 						    ext4_group_t block_group,
 						    struct buffer_head ** bh);
 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
+struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
+				      ext4_group_t block_group);
+extern unsigned ext4_init_block_bitmap(struct super_block *sb,
+				       struct buffer_head *bh,
+				       ext4_group_t group,
+				       struct ext4_group_desc *desc);
+#define ext4_free_blocks_after_init(sb, group, desc)			\
+		ext4_init_block_bitmap(sb, NULL, group, desc)
 
 /* dir.c */
 extern int ext4_check_dir_entry(const char *, struct inode *,
@@ -1038,6 +1311,11 @@
 extern unsigned long ext4_count_free_inodes(struct super_block *);
 extern unsigned long ext4_count_dirs(struct super_block *);
 extern void ext4_check_inodes_bitmap(struct super_block *);
+extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
+				       struct buffer_head *bh,
+				       ext4_group_t group,
+				       struct ext4_group_desc *desc);
+extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
 
 /* mballoc.c */
 extern long ext4_mb_stats;
@@ -1123,6 +1401,8 @@
 	__attribute__ ((format (printf, 3, 4)));
 extern void ext4_warning(struct super_block *, const char *, const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
+extern void ext4_msg(struct super_block *, const char *, const char *, ...)
+	__attribute__ ((format (printf, 3, 4)));
 extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
 				const char *, const char *, ...)
 	__attribute__ ((format (printf, 4, 5)));
@@ -1161,6 +1441,10 @@
 				struct ext4_group_desc *bg, __u32 count);
 extern void ext4_itable_unused_set(struct super_block *sb,
 				   struct ext4_group_desc *bg, __u32 count);
+extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
+				   struct ext4_group_desc *gdp);
+extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
+				       struct ext4_group_desc *gdp);
 
 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
 {
@@ -1228,6 +1512,18 @@
 	 return grp_info[indexv][indexh];
 }
 
+/*
+ * Reading s_groups_count requires using smp_rmb() afterwards.  See
+ * the locking protocol documented in the comments of ext4_group_add()
+ * in resize.c
+ */
+static inline ext4_group_t ext4_get_groups_count(struct super_block *sb)
+{
+	ext4_group_t	ngroups = EXT4_SB(sb)->s_groups_count;
+
+	smp_rmb();
+	return ngroups;
+}
 
 static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi,
 					     ext4_group_t block_group)
@@ -1283,33 +1579,25 @@
 };
 
 #define EXT4_GROUP_INFO_NEED_INIT_BIT	0
-#define EXT4_GROUP_INFO_LOCKED_BIT	1
 
 #define EXT4_MB_GRP_NEED_INIT(grp)	\
 	(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
 
+static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
+					      ext4_group_t group)
+{
+	return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
+}
+
 static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
 {
-	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-	bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+	spin_lock(ext4_group_lock_ptr(sb, group));
 }
 
 static inline void ext4_unlock_group(struct super_block *sb,
 					ext4_group_t group)
 {
-	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-	bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
-}
-
-static inline int ext4_is_group_locked(struct super_block *sb,
-					ext4_group_t group)
-{
-	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-	return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
-						&(grinfo->bb_state));
+	spin_unlock(ext4_group_lock_ptr(sb, group));
 }
 
 /*
@@ -1326,11 +1614,21 @@
 /* namei.c */
 extern const struct inode_operations ext4_dir_inode_operations;
 extern const struct inode_operations ext4_special_inode_operations;
+extern struct dentry *ext4_get_parent(struct dentry *child);
 
 /* symlink.c */
 extern const struct inode_operations ext4_symlink_inode_operations;
 extern const struct inode_operations ext4_fast_symlink_inode_operations;
 
+/* block_validity */
+extern void ext4_release_system_zone(struct super_block *sb);
+extern int ext4_setup_system_zone(struct super_block *sb);
+extern int __init init_ext4_system_zone(void);
+extern void exit_ext4_system_zone(void);
+extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
+				 ext4_fsblk_t start_blk,
+				 unsigned int count);
+
 /* extents.c */
 extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
 extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
@@ -1338,17 +1636,15 @@
 				       int chunk);
 extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 			       ext4_lblk_t iblock, unsigned int max_blocks,
-			       struct buffer_head *bh_result,
-			       int create, int extend_disksize);
+			       struct buffer_head *bh_result, int flags);
 extern void ext4_ext_truncate(struct inode *);
 extern void ext4_ext_init(struct super_block *);
 extern void ext4_ext_release(struct super_block *);
 extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
 			  loff_t len);
-extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
-			sector_t block, unsigned int max_blocks,
-			struct buffer_head *bh, int create,
-			int extend_disksize, int flag);
+extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
+			   sector_t block, unsigned int max_blocks,
+			   struct buffer_head *bh, int flags);
 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			__u64 start, __u64 len);
 
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
deleted file mode 100644
index 4ce2187..0000000
--- a/fs/ext4/ext4_i.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- *  ext4_i.h
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- *  from
- *
- *  linux/include/linux/minix_fs_i.h
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- */
-
-#ifndef _EXT4_I
-#define _EXT4_I
-
-#include <linux/rwsem.h>
-#include <linux/rbtree.h>
-#include <linux/seqlock.h>
-#include <linux/mutex.h>
-
-/* data type for block offset of block group */
-typedef int ext4_grpblk_t;
-
-/* data type for filesystem-wide blocks number */
-typedef unsigned long long ext4_fsblk_t;
-
-/* data type for file logical block number */
-typedef __u32 ext4_lblk_t;
-
-/* data type for block group number */
-typedef unsigned int ext4_group_t;
-
-/*
- * storage for cached extent
- */
-struct ext4_ext_cache {
-	ext4_fsblk_t	ec_start;
-	ext4_lblk_t	ec_block;
-	__u32		ec_len; /* must be 32bit to return holes */
-	__u32		ec_type;
-};
-
-/*
- * fourth extended file system inode data in memory
- */
-struct ext4_inode_info {
-	__le32	i_data[15];	/* unconverted */
-	__u32	i_flags;
-	ext4_fsblk_t	i_file_acl;
-	__u32	i_dtime;
-
-	/*
-	 * i_block_group is the number of the block group which contains
-	 * this file's inode.  Constant across the lifetime of the inode,
-	 * it is ued for making block allocation decisions - we try to
-	 * place a file's data blocks near its inode block, and new inodes
-	 * near to their parent directory's inode.
-	 */
-	ext4_group_t	i_block_group;
-	__u32	i_state;		/* Dynamic state flags for ext4 */
-
-	ext4_lblk_t		i_dir_start_lookup;
-#ifdef CONFIG_EXT4_FS_XATTR
-	/*
-	 * Extended attributes can be read independently of the main file
-	 * data. Taking i_mutex even when reading would cause contention
-	 * between readers of EAs and writers of regular file data, so
-	 * instead we synchronize on xattr_sem when reading or changing
-	 * EAs.
-	 */
-	struct rw_semaphore xattr_sem;
-#endif
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-	struct posix_acl	*i_acl;
-	struct posix_acl	*i_default_acl;
-#endif
-
-	struct list_head i_orphan;	/* unlinked but open inodes */
-
-	/*
-	 * i_disksize keeps track of what the inode size is ON DISK, not
-	 * in memory.  During truncate, i_size is set to the new size by
-	 * the VFS prior to calling ext4_truncate(), but the filesystem won't
-	 * set i_disksize to 0 until the truncate is actually under way.
-	 *
-	 * The intent is that i_disksize always represents the blocks which
-	 * are used by this file.  This allows recovery to restart truncate
-	 * on orphans if we crash during truncate.  We actually write i_disksize
-	 * into the on-disk inode when writing inodes out, instead of i_size.
-	 *
-	 * The only time when i_disksize and i_size may be different is when
-	 * a truncate is in progress.  The only things which change i_disksize
-	 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
-	 */
-	loff_t	i_disksize;
-
-	/*
-	 * i_data_sem is for serialising ext4_truncate() against
-	 * ext4_getblock().  In the 2.4 ext2 design, great chunks of inode's
-	 * data tree are chopped off during truncate. We can't do that in
-	 * ext4 because whenever we perform intermediate commits during
-	 * truncate, the inode and all the metadata blocks *must* be in a
-	 * consistent state which allows truncation of the orphans to restart
-	 * during recovery.  Hence we must fix the get_block-vs-truncate race
-	 * by other means, so we have i_data_sem.
-	 */
-	struct rw_semaphore i_data_sem;
-	struct inode vfs_inode;
-	struct jbd2_inode jinode;
-
-	struct ext4_ext_cache i_cached_extent;
-	/*
-	 * File creation time. Its function is same as that of
-	 * struct timespec i_{a,c,m}time in the generic inode.
-	 */
-	struct timespec i_crtime;
-
-	/* mballoc */
-	struct list_head i_prealloc_list;
-	spinlock_t i_prealloc_lock;
-
-	/* ialloc */
-	ext4_group_t	i_last_alloc_group;
-
-	/* allocation reservation info for delalloc */
-	unsigned int i_reserved_data_blocks;
-	unsigned int i_reserved_meta_blocks;
-	unsigned int i_allocated_meta_blocks;
-	unsigned short i_delalloc_reserved_flag;
-
-	/* on-disk additional length */
-	__u16 i_extra_isize;
-
-	spinlock_t i_block_reservation_lock;
-};
-
-#endif	/* _EXT4_I */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
deleted file mode 100644
index 57b71fe..0000000
--- a/fs/ext4/ext4_sb.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- *  ext4_sb.h
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- *  from
- *
- *  linux/include/linux/minix_fs_sb.h
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- */
-
-#ifndef _EXT4_SB
-#define _EXT4_SB
-
-#ifdef __KERNEL__
-#include <linux/timer.h>
-#include <linux/wait.h>
-#include <linux/blockgroup_lock.h>
-#include <linux/percpu_counter.h>
-#endif
-#include <linux/rbtree.h>
-
-/*
- * fourth extended-fs super-block data in memory
- */
-struct ext4_sb_info {
-	unsigned long s_desc_size;	/* Size of a group descriptor in bytes */
-	unsigned long s_inodes_per_block;/* Number of inodes per block */
-	unsigned long s_blocks_per_group;/* Number of blocks in a group */
-	unsigned long s_inodes_per_group;/* Number of inodes in a group */
-	unsigned long s_itb_per_group;	/* Number of inode table blocks per group */
-	unsigned long s_gdb_count;	/* Number of group descriptor blocks */
-	unsigned long s_desc_per_block;	/* Number of group descriptors per block */
-	ext4_group_t s_groups_count;	/* Number of groups in the fs */
-	unsigned long s_overhead_last;  /* Last calculated overhead */
-	unsigned long s_blocks_last;    /* Last seen block count */
-	loff_t s_bitmap_maxbytes;	/* max bytes for bitmap files */
-	struct buffer_head * s_sbh;	/* Buffer containing the super block */
-	struct ext4_super_block *s_es;	/* Pointer to the super block in the buffer */
-	struct buffer_head **s_group_desc;
-	unsigned long  s_mount_opt;
-	ext4_fsblk_t s_sb_block;
-	uid_t s_resuid;
-	gid_t s_resgid;
-	unsigned short s_mount_state;
-	unsigned short s_pad;
-	int s_addr_per_block_bits;
-	int s_desc_per_block_bits;
-	int s_inode_size;
-	int s_first_ino;
-	unsigned int s_inode_readahead_blks;
-	spinlock_t s_next_gen_lock;
-	u32 s_next_generation;
-	u32 s_hash_seed[4];
-	int s_def_hash_version;
-	int s_hash_unsigned;	/* 3 if hash should be signed, 0 if not */
-	struct percpu_counter s_freeblocks_counter;
-	struct percpu_counter s_freeinodes_counter;
-	struct percpu_counter s_dirs_counter;
-	struct percpu_counter s_dirtyblocks_counter;
-	struct blockgroup_lock *s_blockgroup_lock;
-	struct proc_dir_entry *s_proc;
-	struct kobject s_kobj;
-	struct completion s_kobj_unregister;
-
-	/* Journaling */
-	struct inode *s_journal_inode;
-	struct journal_s *s_journal;
-	struct list_head s_orphan;
-	unsigned long s_commit_interval;
-	u32 s_max_batch_time;
-	u32 s_min_batch_time;
-	struct block_device *journal_bdev;
-#ifdef CONFIG_JBD2_DEBUG
-	struct timer_list turn_ro_timer;	/* For turning read-only (crash simulation) */
-	wait_queue_head_t ro_wait_queue;	/* For people waiting for the fs to go read-only */
-#endif
-#ifdef CONFIG_QUOTA
-	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
-	int s_jquota_fmt;			/* Format of quota to use */
-#endif
-	unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
-
-#ifdef EXTENTS_STATS
-	/* ext4 extents stats */
-	unsigned long s_ext_min;
-	unsigned long s_ext_max;
-	unsigned long s_depth_max;
-	spinlock_t s_ext_stats_lock;
-	unsigned long s_ext_blocks;
-	unsigned long s_ext_extents;
-#endif
-
-	/* for buddy allocator */
-	struct ext4_group_info ***s_group_info;
-	struct inode *s_buddy_cache;
-	long s_blocks_reserved;
-	spinlock_t s_reserve_lock;
-	spinlock_t s_md_lock;
-	tid_t s_last_transaction;
-	unsigned short *s_mb_offsets;
-	unsigned int *s_mb_maxs;
-
-	/* tunables */
-	unsigned long s_stripe;
-	unsigned int s_mb_stream_request;
-	unsigned int s_mb_max_to_scan;
-	unsigned int s_mb_min_to_scan;
-	unsigned int s_mb_stats;
-	unsigned int s_mb_order2_reqs;
-	unsigned int s_mb_group_prealloc;
-	/* where last allocation was done - for stream allocation */
-	unsigned long s_mb_last_group;
-	unsigned long s_mb_last_start;
-
-	/* history to debug policy */
-	struct ext4_mb_history *s_mb_history;
-	int s_mb_history_cur;
-	int s_mb_history_max;
-	int s_mb_history_num;
-	spinlock_t s_mb_history_lock;
-	int s_mb_history_filter;
-
-	/* stats for buddy allocator */
-	spinlock_t s_mb_pa_lock;
-	atomic_t s_bal_reqs;	/* number of reqs with len > 1 */
-	atomic_t s_bal_success;	/* we found long enough chunks */
-	atomic_t s_bal_allocated;	/* in blocks */
-	atomic_t s_bal_ex_scanned;	/* total extents scanned */
-	atomic_t s_bal_goals;	/* goal hits */
-	atomic_t s_bal_breaks;	/* too long searches */
-	atomic_t s_bal_2orders;	/* 2^order hits */
-	spinlock_t s_bal_lock;
-	unsigned long s_mb_buddies_generated;
-	unsigned long long s_mb_generation_time;
-	atomic_t s_mb_lost_chunks;
-	atomic_t s_mb_preallocated;
-	atomic_t s_mb_discarded;
-
-	/* locality groups */
-	struct ext4_locality_group *s_locality_groups;
-
-	/* for write statistics */
-	unsigned long s_sectors_written_start;
-	u64 s_kbytes_written;
-
-	unsigned int s_log_groups_per_flex;
-	struct flex_groups *s_flex_groups;
-};
-
-static inline spinlock_t *
-sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
-{
-	return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
-}
-
-#endif	/* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e3a55eb..2593f74 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -326,32 +326,18 @@
 
 static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
 {
-	ext4_fsblk_t block = ext_pblock(ext), valid_block;
+	ext4_fsblk_t block = ext_pblock(ext);
 	int len = ext4_ext_get_actual_len(ext);
-	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
 
-	valid_block = le32_to_cpu(es->s_first_data_block) +
-		EXT4_SB(inode->i_sb)->s_gdb_count;
-	if (unlikely(block <= valid_block ||
-		     ((block + len) > ext4_blocks_count(es))))
-		return 0;
-	else
-		return 1;
+	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
 }
 
 static int ext4_valid_extent_idx(struct inode *inode,
 				struct ext4_extent_idx *ext_idx)
 {
-	ext4_fsblk_t block = idx_pblock(ext_idx), valid_block;
-	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+	ext4_fsblk_t block = idx_pblock(ext_idx);
 
-	valid_block = le32_to_cpu(es->s_first_data_block) +
-		EXT4_SB(inode->i_sb)->s_gdb_count;
-	if (unlikely(block <= valid_block ||
-		     (block >= ext4_blocks_count(es))))
-		return 0;
-	else
-		return 1;
+	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
 }
 
 static int ext4_valid_extent_entries(struct inode *inode,
@@ -2097,12 +2083,16 @@
 	ex = EXT_LAST_EXTENT(eh);
 
 	ex_ee_block = le32_to_cpu(ex->ee_block);
-	if (ext4_ext_is_uninitialized(ex))
-		uninitialized = 1;
 	ex_ee_len = ext4_ext_get_actual_len(ex);
 
 	while (ex >= EXT_FIRST_EXTENT(eh) &&
 			ex_ee_block + ex_ee_len > start) {
+
+		if (ext4_ext_is_uninitialized(ex))
+			uninitialized = 1;
+		else
+			uninitialized = 0;
+
 		ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len);
 		path[depth].p_ext = ex;
 
@@ -2784,7 +2774,7 @@
 int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 			ext4_lblk_t iblock,
 			unsigned int max_blocks, struct buffer_head *bh_result,
-			int create, int extend_disksize)
+			int flags)
 {
 	struct ext4_ext_path *path = NULL;
 	struct ext4_extent_header *eh;
@@ -2793,7 +2783,6 @@
 	int err = 0, depth, ret, cache_type;
 	unsigned int allocated = 0;
 	struct ext4_allocation_request ar;
-	loff_t disksize;
 
 	__clear_bit(BH_New, &bh_result->b_state);
 	ext_debug("blocks %u/%u requested for inode %u\n",
@@ -2803,7 +2792,7 @@
 	cache_type = ext4_ext_in_cache(inode, iblock, &newex);
 	if (cache_type) {
 		if (cache_type == EXT4_EXT_CACHE_GAP) {
-			if (!create) {
+			if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
 				/*
 				 * block isn't allocated yet and
 				 * user doesn't want to allocate it
@@ -2869,9 +2858,11 @@
 							EXT4_EXT_CACHE_EXTENT);
 				goto out;
 			}
-			if (create == EXT4_CREATE_UNINITIALIZED_EXT)
+			if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
 				goto out;
-			if (!create) {
+			if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
+				if (allocated > max_blocks)
+					allocated = max_blocks;
 				/*
 				 * We have blocks reserved already.  We
 				 * return allocated blocks so that delalloc
@@ -2879,8 +2870,6 @@
 				 * the buffer head will be unmapped so that
 				 * a read from the block returns 0s.
 				 */
-				if (allocated > max_blocks)
-					allocated = max_blocks;
 				set_buffer_unwritten(bh_result);
 				bh_result->b_bdev = inode->i_sb->s_bdev;
 				bh_result->b_blocknr = newblock;
@@ -2903,7 +2892,7 @@
 	 * requested block isn't allocated yet;
 	 * we couldn't try to create block if create flag is zero
 	 */
-	if (!create) {
+	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
 		/*
 		 * put just found gap into cache to speed up
 		 * subsequent requests
@@ -2932,10 +2921,10 @@
 	 * EXT_UNINIT_MAX_LEN.
 	 */
 	if (max_blocks > EXT_INIT_MAX_LEN &&
-	    create != EXT4_CREATE_UNINITIALIZED_EXT)
+	    !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
 		max_blocks = EXT_INIT_MAX_LEN;
 	else if (max_blocks > EXT_UNINIT_MAX_LEN &&
-		 create == EXT4_CREATE_UNINITIALIZED_EXT)
+		 (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
 		max_blocks = EXT_UNINIT_MAX_LEN;
 
 	/* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
@@ -2966,7 +2955,7 @@
 	/* try to insert new extent into found leaf and return */
 	ext4_ext_store_pblock(&newex, newblock);
 	newex.ee_len = cpu_to_le16(ar.len);
-	if (create == EXT4_CREATE_UNINITIALIZED_EXT)  /* Mark uninitialized */
+	if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)  /* Mark uninitialized */
 		ext4_ext_mark_uninitialized(&newex);
 	err = ext4_ext_insert_extent(handle, inode, path, &newex);
 	if (err) {
@@ -2983,18 +2972,10 @@
 	newblock = ext_pblock(&newex);
 	allocated = ext4_ext_get_actual_len(&newex);
 outnew:
-	if (extend_disksize) {
-		disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
-		if (disksize > i_size_read(inode))
-			disksize = i_size_read(inode);
-		if (disksize > EXT4_I(inode)->i_disksize)
-			EXT4_I(inode)->i_disksize = disksize;
-	}
-
 	set_buffer_new(bh_result);
 
 	/* Cache only when it is _not_ an uninitialized extent */
-	if (create != EXT4_CREATE_UNINITIALIZED_EXT)
+	if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
 		ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
 						EXT4_EXT_CACHE_EXTENT);
 out:
@@ -3150,9 +3131,10 @@
 			ret = PTR_ERR(handle);
 			break;
 		}
-		ret = ext4_get_blocks_wrap(handle, inode, block,
-					  max_blocks, &map_bh,
-					  EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
+		map_bh.b_state = 0;
+		ret = ext4_get_blocks(handle, inode, block,
+				      max_blocks, &map_bh,
+				      EXT4_GET_BLOCKS_CREATE_UNINIT_EXT);
 		if (ret <= 0) {
 #ifdef EXT4FS_DEBUG
 			WARN_ON(ret <= 0);
@@ -3195,7 +3177,7 @@
 		       void *data)
 {
 	struct fiemap_extent_info *fieinfo = data;
-	unsigned long blksize_bits = inode->i_sb->s_blocksize_bits;
+	unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
 	__u64	logical;
 	__u64	physical;
 	__u64	length;
@@ -3242,9 +3224,16 @@
 	 *
 	 * XXX this might miss a single-block extent at EXT_MAX_BLOCK
 	 */
-	if (logical + length - 1 == EXT_MAX_BLOCK ||
-	    ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK)
+	if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
+	    newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
+		loff_t size = i_size_read(inode);
+		loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
+
 		flags |= FIEMAP_EXTENT_LAST;
+		if ((flags & FIEMAP_EXTENT_DELALLOC) &&
+		    logical+length > size)
+			length = (size - logical + bs - 1) & ~(bs-1);
+	}
 
 	error = fiemap_fill_next_extent(fieinfo, logical, physical,
 					length, flags);
@@ -3318,10 +3307,10 @@
 		 * Walk the extent tree gathering extent information.
 		 * ext4_ext_fiemap_cb will push extents back to user.
 		 */
-		down_write(&EXT4_I(inode)->i_data_sem);
+		down_read(&EXT4_I(inode)->i_data_sem);
 		error = ext4_ext_walk_space(inode, start_blk, len_blks,
 					  ext4_ext_fiemap_cb, fieinfo);
-		up_write(&EXT4_I(inode)->i_data_sem);
+		up_read(&EXT4_I(inode)->i_data_sem);
 	}
 
 	return error;
diff --git a/fs/ext4/group.h b/fs/ext4/group.h
deleted file mode 100644
index c2c0a8d..0000000
--- a/fs/ext4/group.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- *  linux/fs/ext4/group.h
- *
- * Copyright (C) 2007 Cluster File Systems, Inc
- *
- * Author: Andreas Dilger <adilger@clusterfs.com>
- */
-
-#ifndef _LINUX_EXT4_GROUP_H
-#define _LINUX_EXT4_GROUP_H
-
-extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
-				   struct ext4_group_desc *gdp);
-extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
-				       struct ext4_group_desc *gdp);
-struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
-				      ext4_group_t block_group);
-extern unsigned ext4_init_block_bitmap(struct super_block *sb,
-				       struct buffer_head *bh,
-				       ext4_group_t group,
-				       struct ext4_group_desc *desc);
-#define ext4_free_blocks_after_init(sb, group, desc)			\
-		ext4_init_block_bitmap(sb, NULL, group, desc)
-extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
-				       struct buffer_head *bh,
-				       ext4_group_t group,
-				       struct ext4_group_desc *desc);
-extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
-#endif /* _LINUX_EXT4_GROUP_H */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f18e0a0..3743bd8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -27,7 +27,6 @@
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
-#include "group.h"
 
 /*
  * ialloc.c contains the inodes allocation and deallocation routines
@@ -123,16 +122,16 @@
 		unlock_buffer(bh);
 		return bh;
 	}
-	spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+	ext4_lock_group(sb, block_group);
 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
 		ext4_init_inode_bitmap(sb, bh, block_group, desc);
 		set_bitmap_uptodate(bh);
 		set_buffer_uptodate(bh);
-		spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+		ext4_unlock_group(sb, block_group);
 		unlock_buffer(bh);
 		return bh;
 	}
-	spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+	ext4_unlock_group(sb, block_group);
 	if (buffer_uptodate(bh)) {
 		/*
 		 * if not uninit if bh is uptodate,
@@ -247,9 +246,8 @@
 		goto error_return;
 
 	/* Ok, now we can actually update the inode bitmaps.. */
-	spin_lock(sb_bgl_lock(sbi, block_group));
-	cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
-	spin_unlock(sb_bgl_lock(sbi, block_group));
+	cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
+					bit, bitmap_bh->b_data);
 	if (!cleared)
 		ext4_error(sb, "ext4_free_inode",
 			   "bit already cleared for inode %lu", ino);
@@ -261,7 +259,7 @@
 		if (fatal) goto error_return;
 
 		if (gdp) {
-			spin_lock(sb_bgl_lock(sbi, block_group));
+			ext4_lock_group(sb, block_group);
 			count = ext4_free_inodes_count(sb, gdp) + 1;
 			ext4_free_inodes_set(sb, gdp, count);
 			if (is_directory) {
@@ -277,7 +275,7 @@
 			}
 			gdp->bg_checksum = ext4_group_desc_csum(sbi,
 							block_group, gdp);
-			spin_unlock(sb_bgl_lock(sbi, block_group));
+			ext4_unlock_group(sb, block_group);
 			percpu_counter_inc(&sbi->s_freeinodes_counter);
 			if (is_directory)
 				percpu_counter_dec(&sbi->s_dirs_counter);
@@ -316,7 +314,7 @@
 static int find_group_dir(struct super_block *sb, struct inode *parent,
 				ext4_group_t *best_group)
 {
-	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	unsigned int freei, avefreei;
 	struct ext4_group_desc *desc, *best_desc = NULL;
 	ext4_group_t group;
@@ -349,11 +347,10 @@
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_group_desc *desc;
-	struct buffer_head *bh;
 	struct flex_groups *flex_group = sbi->s_flex_groups;
 	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
 	ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
-	ext4_group_t ngroups = sbi->s_groups_count;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	int flex_size = ext4_flex_bg_size(sbi);
 	ext4_group_t best_flex = parent_fbg_group;
 	int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
@@ -362,7 +359,7 @@
 	ext4_group_t n_fbg_groups;
 	ext4_group_t i;
 
-	n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >>
+	n_fbg_groups = (ngroups + flex_size - 1) >>
 		sbi->s_log_groups_per_flex;
 
 find_close_to_parent:
@@ -404,7 +401,7 @@
 found_flexbg:
 	for (i = best_flex * flex_size; i < ngroups &&
 		     i < (best_flex + 1) * flex_size; i++) {
-		desc = ext4_get_group_desc(sb, i, &bh);
+		desc = ext4_get_group_desc(sb, i, NULL);
 		if (ext4_free_inodes_count(sb, desc)) {
 			*best_group = i;
 			goto out;
@@ -478,20 +475,21 @@
 {
 	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	ext4_group_t ngroups = sbi->s_groups_count;
+	ext4_group_t real_ngroups = ext4_get_groups_count(sb);
 	int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
 	unsigned int freei, avefreei;
 	ext4_fsblk_t freeb, avefreeb;
 	unsigned int ndirs;
 	int max_dirs, min_inodes;
 	ext4_grpblk_t min_blocks;
-	ext4_group_t i, grp, g;
+	ext4_group_t i, grp, g, ngroups;
 	struct ext4_group_desc *desc;
 	struct orlov_stats stats;
 	int flex_size = ext4_flex_bg_size(sbi);
 
+	ngroups = real_ngroups;
 	if (flex_size > 1) {
-		ngroups = (ngroups + flex_size - 1) >>
+		ngroups = (real_ngroups + flex_size - 1) >>
 			sbi->s_log_groups_per_flex;
 		parent_group >>= sbi->s_log_groups_per_flex;
 	}
@@ -543,7 +541,7 @@
 		 */
 		grp *= flex_size;
 		for (i = 0; i < flex_size; i++) {
-			if (grp+i >= sbi->s_groups_count)
+			if (grp+i >= real_ngroups)
 				break;
 			desc = ext4_get_group_desc(sb, grp+i, NULL);
 			if (desc && ext4_free_inodes_count(sb, desc)) {
@@ -583,7 +581,7 @@
 	}
 
 fallback:
-	ngroups = sbi->s_groups_count;
+	ngroups = real_ngroups;
 	avefreei = freei / ngroups;
 fallback_retry:
 	parent_group = EXT4_I(parent)->i_block_group;
@@ -613,9 +611,8 @@
 			    ext4_group_t *group, int mode)
 {
 	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
-	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+	ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
 	struct ext4_group_desc *desc;
-	ext4_group_t i, last;
 	int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
 
 	/*
@@ -708,10 +705,10 @@
 
 /*
  * claim the inode from the inode bitmap. If the group
- * is uninit we need to take the groups's sb_bgl_lock
+ * is uninit we need to take the groups's ext4_group_lock
  * and clear the uninit flag. The inode bitmap update
  * and group desc uninit flag clear should be done
- * after holding sb_bgl_lock so that ext4_read_inode_bitmap
+ * after holding ext4_group_lock so that ext4_read_inode_bitmap
  * doesn't race with the ext4_claim_inode
  */
 static int ext4_claim_inode(struct super_block *sb,
@@ -722,7 +719,7 @@
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
 
-	spin_lock(sb_bgl_lock(sbi, group));
+	ext4_lock_group(sb, group);
 	if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
 		/* not a free inode */
 		retval = 1;
@@ -731,7 +728,7 @@
 	ino++;
 	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
 			ino > EXT4_INODES_PER_GROUP(sb)) {
-		spin_unlock(sb_bgl_lock(sbi, group));
+		ext4_unlock_group(sb, group);
 		ext4_error(sb, __func__,
 			   "reserved inode or inode > inodes count - "
 			   "block_group = %u, inode=%lu", group,
@@ -780,7 +777,7 @@
 	}
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
 err_ret:
-	spin_unlock(sb_bgl_lock(sbi, group));
+	ext4_unlock_group(sb, group);
 	return retval;
 }
 
@@ -799,11 +796,10 @@
 	struct super_block *sb;
 	struct buffer_head *inode_bitmap_bh = NULL;
 	struct buffer_head *group_desc_bh;
-	ext4_group_t group = 0;
+	ext4_group_t ngroups, group = 0;
 	unsigned long ino = 0;
 	struct inode *inode;
 	struct ext4_group_desc *gdp = NULL;
-	struct ext4_super_block *es;
 	struct ext4_inode_info *ei;
 	struct ext4_sb_info *sbi;
 	int ret2, err = 0;
@@ -818,15 +814,14 @@
 		return ERR_PTR(-EPERM);
 
 	sb = dir->i_sb;
+	ngroups = ext4_get_groups_count(sb);
 	trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
 		   dir->i_ino, mode);
 	inode = new_inode(sb);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 	ei = EXT4_I(inode);
-
 	sbi = EXT4_SB(sb);
-	es = sbi->s_es;
 
 	if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
 		ret2 = find_group_flex(sb, dir, &group);
@@ -856,7 +851,7 @@
 	if (ret2 == -1)
 		goto out;
 
-	for (i = 0; i < sbi->s_groups_count; i++) {
+	for (i = 0; i < ngroups; i++) {
 		err = -EIO;
 
 		gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
@@ -917,7 +912,7 @@
 		 * group descriptor metadata has not yet been updated.
 		 * So we just go onto the next blockgroup.
 		 */
-		if (++group == sbi->s_groups_count)
+		if (++group == ngroups)
 			group = 0;
 	}
 	err = -ENOSPC;
@@ -938,7 +933,7 @@
 		}
 
 		free = 0;
-		spin_lock(sb_bgl_lock(sbi, group));
+		ext4_lock_group(sb, group);
 		/* recheck and clear flag under lock if we still need to */
 		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 			free = ext4_free_blocks_after_init(sb, group, gdp);
@@ -947,7 +942,7 @@
 			gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
 								gdp);
 		}
-		spin_unlock(sb_bgl_lock(sbi, group));
+		ext4_unlock_group(sb, group);
 
 		/* Don't need to dirty bitmap block if we didn't change it */
 		if (free) {
@@ -1158,7 +1153,7 @@
 {
 	unsigned long desc_count;
 	struct ext4_group_desc *gdp;
-	ext4_group_t i;
+	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
 #ifdef EXT4FS_DEBUG
 	struct ext4_super_block *es;
 	unsigned long bitmap_count, x;
@@ -1168,7 +1163,7 @@
 	desc_count = 0;
 	bitmap_count = 0;
 	gdp = NULL;
-	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+	for (i = 0; i < ngroups; i++) {
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
@@ -1190,7 +1185,7 @@
 	return desc_count;
 #else
 	desc_count = 0;
-	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+	for (i = 0; i < ngroups; i++) {
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
@@ -1205,9 +1200,9 @@
 unsigned long ext4_count_dirs(struct super_block * sb)
 {
 	unsigned long count = 0;
-	ext4_group_t i;
+	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
 
-	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+	for (i = 0; i < ngroups; i++) {
 		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2a9ffd5..875db94 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -372,20 +372,21 @@
 }
 
 static int __ext4_check_blockref(const char *function, struct inode *inode,
-				 __le32 *p, unsigned int max) {
-
-	unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es);
+				 __le32 *p, unsigned int max)
+{
 	__le32 *bref = p;
+	unsigned int blk;
+
 	while (bref < p+max) {
-		if (unlikely(le32_to_cpu(*bref) >= maxblocks)) {
+		blk = le32_to_cpu(*bref++);
+		if (blk && 
+		    unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), 
+						    blk, 1))) {
 			ext4_error(inode->i_sb, function,
-				   "block reference %u >= max (%u) "
-				   "in inode #%lu, offset=%d",
-				   le32_to_cpu(*bref), maxblocks,
-				   inode->i_ino, (int)(bref-p));
+				   "invalid block reference %u "
+				   "in inode #%lu", blk, inode->i_ino);
  			return -EIO;
  		}
-		bref++;
  	}
  	return 0;
 }
@@ -892,6 +893,10 @@
 }
 
 /*
+ * The ext4_ind_get_blocks() function handles non-extents inodes
+ * (i.e., using the traditional indirect/double-indirect i_blocks
+ * scheme) for ext4_get_blocks().
+ *
  * Allocation strategy is simple: if we have to allocate something, we will
  * have to go the whole way to leaf. So let's do it before attaching anything
  * to tree, set linkage between the newborn blocks, write them if sync is
@@ -909,15 +914,16 @@
  * return = 0, if plain lookup failed.
  * return < 0, error case.
  *
- *
- * Need to be called with
- * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
- * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
+ * The ext4_ind_get_blocks() function should be called with
+ * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem
+ * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or
+ * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
+ * blocks.
  */
-static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
+static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
 				  ext4_lblk_t iblock, unsigned int maxblocks,
 				  struct buffer_head *bh_result,
-				  int create, int extend_disksize)
+				  int flags)
 {
 	int err = -EIO;
 	ext4_lblk_t offsets[4];
@@ -927,14 +933,11 @@
 	int indirect_blks;
 	int blocks_to_boundary = 0;
 	int depth;
-	struct ext4_inode_info *ei = EXT4_I(inode);
 	int count = 0;
 	ext4_fsblk_t first_block = 0;
-	loff_t disksize;
-
 
 	J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
-	J_ASSERT(handle != NULL || create == 0);
+	J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
 	depth = ext4_block_to_path(inode, iblock, offsets,
 					&blocks_to_boundary);
 
@@ -963,7 +966,7 @@
 	}
 
 	/* Next simple case - plain lookup or failed read of indirect block */
-	if (!create || err == -EIO)
+	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
 		goto cleanup;
 
 	/*
@@ -997,19 +1000,7 @@
 	if (!err)
 		err = ext4_splice_branch(handle, inode, iblock,
 					partial, indirect_blks, count);
-	/*
-	 * i_disksize growing is protected by i_data_sem.  Don't forget to
-	 * protect it if you're about to implement concurrent
-	 * ext4_get_block() -bzzz
-	*/
-	if (!err && extend_disksize) {
-		disksize = ((loff_t) iblock + count) << inode->i_blkbits;
-		if (disksize > i_size_read(inode))
-			disksize = i_size_read(inode);
-		if (disksize > ei->i_disksize)
-			ei->i_disksize = disksize;
-	}
-	if (err)
+	else 
 		goto cleanup;
 
 	set_buffer_new(bh_result);
@@ -1120,8 +1111,23 @@
 		ext4_discard_preallocations(inode);
 }
 
+static int check_block_validity(struct inode *inode, sector_t logical,
+				sector_t phys, int len)
+{
+	if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
+		ext4_error(inode->i_sb, "check_block_validity",
+			   "inode #%lu logical block %llu mapped to %llu "
+			   "(size %d)", inode->i_ino,
+			   (unsigned long long) logical,
+			   (unsigned long long) phys, len);
+		WARN_ON(1);
+		return -EIO;
+	}
+	return 0;
+}
+
 /*
- * The ext4_get_blocks_wrap() function try to look up the requested blocks,
+ * The ext4_get_blocks() function tries to look up the requested blocks,
  * and returns if the blocks are already mapped.
  *
  * Otherwise it takes the write lock of the i_data_sem and allocate blocks
@@ -1129,7 +1135,7 @@
  * mapped.
  *
  * If file type is extents based, it will call ext4_ext_get_blocks(),
- * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping
+ * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping
  * based files
  *
  * On success, it returns the number of blocks being mapped or allocate.
@@ -1142,9 +1148,9 @@
  *
  * It returns the error in case of allocation failure.
  */
-int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
-			unsigned int max_blocks, struct buffer_head *bh,
-			int create, int extend_disksize, int flag)
+int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
+		    unsigned int max_blocks, struct buffer_head *bh,
+		    int flags)
 {
 	int retval;
 
@@ -1152,21 +1158,28 @@
 	clear_buffer_unwritten(bh);
 
 	/*
-	 * Try to see if we can get  the block without requesting
-	 * for new file system block.
+	 * Try to see if we can get the block without requesting a new
+	 * file system block.
 	 */
 	down_read((&EXT4_I(inode)->i_data_sem));
 	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
 		retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
-				bh, 0, 0);
+				bh, 0);
 	} else {
-		retval = ext4_get_blocks_handle(handle,
-				inode, block, max_blocks, bh, 0, 0);
+		retval = ext4_ind_get_blocks(handle, inode, block, max_blocks,
+					     bh, 0);
 	}
 	up_read((&EXT4_I(inode)->i_data_sem));
 
+	if (retval > 0 && buffer_mapped(bh)) {
+		int ret = check_block_validity(inode, block, 
+					       bh->b_blocknr, retval);
+		if (ret != 0)
+			return ret;
+	}
+
 	/* If it is only a block(s) look up */
-	if (!create)
+	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0)
 		return retval;
 
 	/*
@@ -1205,7 +1218,7 @@
 	 * let the underlying get_block() function know to
 	 * avoid double accounting
 	 */
-	if (flag)
+	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
 		EXT4_I(inode)->i_delalloc_reserved_flag = 1;
 	/*
 	 * We need to check for EXT4 here because migrate
@@ -1213,10 +1226,10 @@
 	 */
 	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
 		retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
-				bh, create, extend_disksize);
+					      bh, flags);
 	} else {
-		retval = ext4_get_blocks_handle(handle, inode, block,
-				max_blocks, bh, create, extend_disksize);
+		retval = ext4_ind_get_blocks(handle, inode, block,
+					     max_blocks, bh, flags);
 
 		if (retval > 0 && buffer_new(bh)) {
 			/*
@@ -1229,18 +1242,23 @@
 		}
 	}
 
-	if (flag) {
+	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
 		EXT4_I(inode)->i_delalloc_reserved_flag = 0;
-		/*
-		 * Update reserved blocks/metadata blocks
-		 * after successful block allocation
-		 * which were deferred till now
-		 */
-		if ((retval > 0) && buffer_delay(bh))
-			ext4_da_update_reserve_space(inode, retval);
-	}
+
+	/*
+	 * Update reserved blocks/metadata blocks after successful
+	 * block allocation which had been deferred till now.
+	 */
+	if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
+		ext4_da_update_reserve_space(inode, retval);
 
 	up_write((&EXT4_I(inode)->i_data_sem));
+	if (retval > 0 && buffer_mapped(bh)) {
+		int ret = check_block_validity(inode, block, 
+					       bh->b_blocknr, retval);
+		if (ret != 0)
+			return ret;
+	}
 	return retval;
 }
 
@@ -1268,8 +1286,8 @@
 		started = 1;
 	}
 
-	ret = ext4_get_blocks_wrap(handle, inode, iblock,
-					max_blocks, bh_result, create, 0, 0);
+	ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
+			      create ? EXT4_GET_BLOCKS_CREATE : 0);
 	if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
 		ret = 0;
@@ -1288,17 +1306,19 @@
 {
 	struct buffer_head dummy;
 	int fatal = 0, err;
+	int flags = 0;
 
 	J_ASSERT(handle != NULL || create == 0);
 
 	dummy.b_state = 0;
 	dummy.b_blocknr = -1000;
 	buffer_trace_init(&dummy.b_history);
-	err = ext4_get_blocks_wrap(handle, inode, block, 1,
-					&dummy, create, 1, 0);
+	if (create)
+		flags |= EXT4_GET_BLOCKS_CREATE;
+	err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags);
 	/*
-	 * ext4_get_blocks_handle() returns number of blocks
-	 * mapped. 0 in case of a HOLE.
+	 * ext4_get_blocks() returns number of blocks mapped. 0 in
+	 * case of a HOLE.
 	 */
 	if (err > 0) {
 		if (err > 1)
@@ -1439,7 +1459,7 @@
 				struct page **pagep, void **fsdata)
 {
 	struct inode *inode = mapping->host;
-	int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
+	int ret, needed_blocks;
 	handle_t *handle;
 	int retries = 0;
 	struct page *page;
@@ -1450,6 +1470,11 @@
 		   "dev %s ino %lu pos %llu len %u flags %u",
 		   inode->i_sb->s_id, inode->i_ino,
 		   (unsigned long long) pos, len, flags);
+	/*
+	 * Reserve one block more for addition to orphan list in case
+	 * we allocate blocks but write fails for some reason
+	 */
+	needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
  	index = pos >> PAGE_CACHE_SHIFT;
 	from = pos & (PAGE_CACHE_SIZE - 1);
 	to = from + len;
@@ -1483,15 +1508,30 @@
 
 	if (ret) {
 		unlock_page(page);
-		ext4_journal_stop(handle);
 		page_cache_release(page);
 		/*
 		 * block_write_begin may have instantiated a few blocks
 		 * outside i_size.  Trim these off again. Don't need
 		 * i_size_read because we hold i_mutex.
+		 *
+		 * Add inode to orphan list in case we crash before
+		 * truncate finishes
 		 */
 		if (pos + len > inode->i_size)
+			ext4_orphan_add(handle, inode);
+
+		ext4_journal_stop(handle);
+		if (pos + len > inode->i_size) {
 			vmtruncate(inode, inode->i_size);
+			/* 
+			 * If vmtruncate failed early the inode might
+			 * still be on the orphan list; we need to
+			 * make sure the inode is removed from the
+			 * orphan list in that case.
+			 */
+			if (inode->i_nlink)
+				ext4_orphan_del(NULL, inode);
+		}
 	}
 
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -1509,6 +1549,52 @@
 	return ext4_handle_dirty_metadata(handle, NULL, bh);
 }
 
+static int ext4_generic_write_end(struct file *file,
+				struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata)
+{
+	int i_size_changed = 0;
+	struct inode *inode = mapping->host;
+	handle_t *handle = ext4_journal_current_handle();
+
+	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+
+	/*
+	 * No need to use i_size_read() here, the i_size
+	 * cannot change under us because we hold i_mutex.
+	 *
+	 * But it's important to update i_size while still holding page lock:
+	 * page writeout could otherwise come in and zero beyond i_size.
+	 */
+	if (pos + copied > inode->i_size) {
+		i_size_write(inode, pos + copied);
+		i_size_changed = 1;
+	}
+
+	if (pos + copied >  EXT4_I(inode)->i_disksize) {
+		/* We need to mark inode dirty even if
+		 * new_i_size is less that inode->i_size
+		 * bu greater than i_disksize.(hint delalloc)
+		 */
+		ext4_update_i_disksize(inode, (pos + copied));
+		i_size_changed = 1;
+	}
+	unlock_page(page);
+	page_cache_release(page);
+
+	/*
+	 * Don't mark the inode dirty under page lock. First, it unnecessarily
+	 * makes the holding time of page lock longer. Second, it forces lock
+	 * ordering of page lock and transaction start for journaling
+	 * filesystems.
+	 */
+	if (i_size_changed)
+		ext4_mark_inode_dirty(handle, inode);
+
+	return copied;
+}
+
 /*
  * We need to pick up the new inode size which generic_commit_write gave us
  * `file' can be NULL - eg, when called from page_symlink().
@@ -1532,21 +1618,15 @@
 	ret = ext4_jbd2_file_inode(handle, inode);
 
 	if (ret == 0) {
-		loff_t new_i_size;
-
-		new_i_size = pos + copied;
-		if (new_i_size > EXT4_I(inode)->i_disksize) {
-			ext4_update_i_disksize(inode, new_i_size);
-			/* We need to mark inode dirty even if
-			 * new_i_size is less that inode->i_size
-			 * bu greater than i_disksize.(hint delalloc)
-			 */
-			ext4_mark_inode_dirty(handle, inode);
-		}
-
-		ret2 = generic_write_end(file, mapping, pos, len, copied,
+		ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
 		copied = ret2;
+		if (pos + len > inode->i_size)
+			/* if we have allocated more blocks and copied
+			 * less. We will have blocks allocated outside
+			 * inode->i_size. So truncate them
+			 */
+			ext4_orphan_add(handle, inode);
 		if (ret2 < 0)
 			ret = ret2;
 	}
@@ -1554,6 +1634,18 @@
 	if (!ret)
 		ret = ret2;
 
+	if (pos + len > inode->i_size) {
+		vmtruncate(inode, inode->i_size);
+		/* 
+		 * If vmtruncate failed early the inode might still be
+		 * on the orphan list; we need to make sure the inode
+		 * is removed from the orphan list in that case.
+		 */
+		if (inode->i_nlink)
+			ext4_orphan_del(NULL, inode);
+	}
+
+
 	return ret ? ret : copied;
 }
 
@@ -1565,25 +1657,21 @@
 	handle_t *handle = ext4_journal_current_handle();
 	struct inode *inode = mapping->host;
 	int ret = 0, ret2;
-	loff_t new_i_size;
 
 	trace_mark(ext4_writeback_write_end,
 		   "dev %s ino %lu pos %llu len %u copied %u",
 		   inode->i_sb->s_id, inode->i_ino,
 		   (unsigned long long) pos, len, copied);
-	new_i_size = pos + copied;
-	if (new_i_size > EXT4_I(inode)->i_disksize) {
-		ext4_update_i_disksize(inode, new_i_size);
-		/* We need to mark inode dirty even if
-		 * new_i_size is less that inode->i_size
-		 * bu greater than i_disksize.(hint delalloc)
-		 */
-		ext4_mark_inode_dirty(handle, inode);
-	}
-
-	ret2 = generic_write_end(file, mapping, pos, len, copied,
+	ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
 	copied = ret2;
+	if (pos + len > inode->i_size)
+		/* if we have allocated more blocks and copied
+		 * less. We will have blocks allocated outside
+		 * inode->i_size. So truncate them
+		 */
+		ext4_orphan_add(handle, inode);
+
 	if (ret2 < 0)
 		ret = ret2;
 
@@ -1591,6 +1679,17 @@
 	if (!ret)
 		ret = ret2;
 
+	if (pos + len > inode->i_size) {
+		vmtruncate(inode, inode->i_size);
+		/* 
+		 * If vmtruncate failed early the inode might still be
+		 * on the orphan list; we need to make sure the inode
+		 * is removed from the orphan list in that case.
+		 */
+		if (inode->i_nlink)
+			ext4_orphan_del(NULL, inode);
+	}
+
 	return ret ? ret : copied;
 }
 
@@ -1635,10 +1734,27 @@
 	}
 
 	unlock_page(page);
+	page_cache_release(page);
+	if (pos + len > inode->i_size)
+		/* if we have allocated more blocks and copied
+		 * less. We will have blocks allocated outside
+		 * inode->i_size. So truncate them
+		 */
+		ext4_orphan_add(handle, inode);
+
 	ret2 = ext4_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
-	page_cache_release(page);
+	if (pos + len > inode->i_size) {
+		vmtruncate(inode, inode->i_size);
+		/* 
+		 * If vmtruncate failed early the inode might still be
+		 * on the orphan list; we need to make sure the inode
+		 * is removed from the orphan list in that case.
+		 */
+		if (inode->i_nlink)
+			ext4_orphan_del(NULL, inode);
+	}
 
 	return ret ? ret : copied;
 }
@@ -1852,7 +1968,7 @@
  * @logical - first logical block to start assignment with
  *
  * the function goes through all passed space and put actual disk
- * block numbers into buffer heads, dropping BH_Delay
+ * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
  */
 static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
 				 struct buffer_head *exbh)
@@ -1902,16 +2018,24 @@
 			do {
 				if (cur_logical >= logical + blocks)
 					break;
-				if (buffer_delay(bh)) {
-					bh->b_blocknr = pblock;
-					clear_buffer_delay(bh);
-					bh->b_bdev = inode->i_sb->s_bdev;
-				} else if (buffer_unwritten(bh)) {
-					bh->b_blocknr = pblock;
-					clear_buffer_unwritten(bh);
-					set_buffer_mapped(bh);
-					set_buffer_new(bh);
-					bh->b_bdev = inode->i_sb->s_bdev;
+
+				if (buffer_delay(bh) ||
+						buffer_unwritten(bh)) {
+
+					BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
+
+					if (buffer_delay(bh)) {
+						clear_buffer_delay(bh);
+						bh->b_blocknr = pblock;
+					} else {
+						/*
+						 * unwritten already should have
+						 * blocknr assigned. Verify that
+						 */
+						clear_buffer_unwritten(bh);
+						BUG_ON(bh->b_blocknr != pblock);
+					}
+
 				} else if (buffer_mapped(bh))
 					BUG_ON(bh->b_blocknr != pblock);
 
@@ -1990,51 +2114,6 @@
 	return;
 }
 
-#define		EXT4_DELALLOC_RSVED	1
-static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
-				   struct buffer_head *bh_result, int create)
-{
-	int ret;
-	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-	loff_t disksize = EXT4_I(inode)->i_disksize;
-	handle_t *handle = NULL;
-
-	handle = ext4_journal_current_handle();
-	BUG_ON(!handle);
-	ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
-				   bh_result, create, 0, EXT4_DELALLOC_RSVED);
-	if (ret <= 0)
-		return ret;
-
-	bh_result->b_size = (ret << inode->i_blkbits);
-
-	if (ext4_should_order_data(inode)) {
-		int retval;
-		retval = ext4_jbd2_file_inode(handle, inode);
-		if (retval)
-			/*
-			 * Failed to add inode for ordered mode. Don't
-			 * update file size
-			 */
-			return retval;
-	}
-
-	/*
-	 * Update on-disk size along with block allocation we don't
-	 * use 'extend_disksize' as size may change within already
-	 * allocated block -bzzz
-	 */
-	disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
-	if (disksize > i_size_read(inode))
-		disksize = i_size_read(inode);
-	if (disksize > EXT4_I(inode)->i_disksize) {
-		ext4_update_i_disksize(inode, disksize);
-		ret = ext4_mark_inode_dirty(handle, inode);
-		return ret;
-	}
-	return 0;
-}
-
 /*
  * mpage_da_map_blocks - go through given space
  *
@@ -2045,29 +2124,57 @@
  */
 static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 {
-	int err = 0;
+	int err, blks, get_blocks_flags;
 	struct buffer_head new;
-	sector_t next;
+	sector_t next = mpd->b_blocknr;
+	unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
+	loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
+	handle_t *handle = NULL;
 
 	/*
 	 * We consider only non-mapped and non-allocated blocks
 	 */
 	if ((mpd->b_state  & (1 << BH_Mapped)) &&
-	    !(mpd->b_state & (1 << BH_Delay)))
-		return 0;
-	new.b_state = mpd->b_state;
-	new.b_blocknr = 0;
-	new.b_size = mpd->b_size;
-	next = mpd->b_blocknr;
-	/*
-	 * If we didn't accumulate anything
-	 * to write simply return
-	 */
-	if (!new.b_size)
+		!(mpd->b_state & (1 << BH_Delay)) &&
+		!(mpd->b_state & (1 << BH_Unwritten)))
 		return 0;
 
-	err = ext4_da_get_block_write(mpd->inode, next, &new, 1);
-	if (err) {
+	/*
+	 * If we didn't accumulate anything to write simply return
+	 */
+	if (!mpd->b_size)
+		return 0;
+
+	handle = ext4_journal_current_handle();
+	BUG_ON(!handle);
+
+	/*
+	 * Call ext4_get_blocks() to allocate any delayed allocation
+	 * blocks, or to convert an uninitialized extent to be
+	 * initialized (in the case where we have written into
+	 * one or more preallocated blocks).
+	 *
+	 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to
+	 * indicate that we are on the delayed allocation path.  This
+	 * affects functions in many different parts of the allocation
+	 * call path.  This flag exists primarily because we don't
+	 * want to change *many* call functions, so ext4_get_blocks()
+	 * will set the magic i_delalloc_reserved_flag once the
+	 * inode's allocation semaphore is taken.
+	 *
+	 * If the blocks in questions were delalloc blocks, set
+	 * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
+	 * variables are updated after the blocks have been allocated.
+	 */
+	new.b_state = 0;
+	get_blocks_flags = (EXT4_GET_BLOCKS_CREATE |
+			    EXT4_GET_BLOCKS_DELALLOC_RESERVE);
+	if (mpd->b_state & (1 << BH_Delay))
+		get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE;
+	blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
+			       &new, get_blocks_flags);
+	if (blks < 0) {
+		err = blks;
 		/*
 		 * If get block returns with error we simply
 		 * return. Later writepage will redirty the page and
@@ -2100,12 +2207,14 @@
 		if (err == -ENOSPC) {
 			ext4_print_free_blocks(mpd->inode);
 		}
-		/* invlaidate all the pages */
+		/* invalidate all the pages */
 		ext4_da_block_invalidatepages(mpd, next,
 				mpd->b_size >> mpd->inode->i_blkbits);
 		return err;
 	}
-	BUG_ON(new.b_size == 0);
+	BUG_ON(blks == 0);
+
+	new.b_size = (blks << mpd->inode->i_blkbits);
 
 	if (buffer_new(&new))
 		__unmap_underlying_blocks(mpd->inode, &new);
@@ -2118,6 +2227,23 @@
 	    (mpd->b_state & (1 << BH_Unwritten)))
 		mpage_put_bnr_to_bhs(mpd, next, &new);
 
+	if (ext4_should_order_data(mpd->inode)) {
+		err = ext4_jbd2_file_inode(handle, mpd->inode);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * Update on-disk size along with block allocation.
+	 */
+	disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits;
+	if (disksize > i_size_read(mpd->inode))
+		disksize = i_size_read(mpd->inode);
+	if (disksize > EXT4_I(mpd->inode)->i_disksize) {
+		ext4_update_i_disksize(mpd->inode, disksize);
+		return ext4_mark_inode_dirty(handle, mpd->inode);
+	}
+
 	return 0;
 }
 
@@ -2192,6 +2318,17 @@
 	return;
 }
 
+static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
+{
+	/*
+	 * unmapped buffer is possible for holes.
+	 * delay buffer is possible with delayed allocation.
+	 * We also need to consider unwritten buffer as unmapped.
+	 */
+	return (!buffer_mapped(bh) || buffer_delay(bh) ||
+				buffer_unwritten(bh)) && buffer_dirty(bh);
+}
+
 /*
  * __mpage_da_writepage - finds extent of pages and blocks
  *
@@ -2276,8 +2413,7 @@
 			 * Otherwise we won't make progress
 			 * with the page in ext4_da_writepage
 			 */
-			if (buffer_dirty(bh) &&
-			    (!buffer_mapped(bh) || buffer_delay(bh))) {
+			if (ext4_bh_unmapped_or_delay(NULL, bh)) {
 				mpage_add_bh_to_extent(mpd, logical,
 						       bh->b_size,
 						       bh->b_state);
@@ -2303,8 +2439,16 @@
 }
 
 /*
- * this is a special callback for ->write_begin() only
- * it's intention is to return mapped block or reserve space
+ * This is a special get_blocks_t callback which is used by
+ * ext4_da_write_begin().  It will either return mapped block or
+ * reserve space for a single block.
+ *
+ * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
+ * We also have b_blocknr = -1 and b_bdev initialized properly
+ *
+ * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
+ * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
+ * initialized properly.
  */
 static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 				  struct buffer_head *bh_result, int create)
@@ -2323,7 +2467,7 @@
 	 * preallocated blocks are unmapped but should treated
 	 * the same as allocated blocks.
 	 */
-	ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1,  bh_result, 0, 0, 0);
+	ret = ext4_get_blocks(NULL, inode, iblock, 1,  bh_result, 0);
 	if ((ret == 0) && !buffer_delay(bh_result)) {
 		/* the block isn't (pre)allocated yet, let's reserve space */
 		/*
@@ -2340,40 +2484,53 @@
 		set_buffer_delay(bh_result);
 	} else if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
-		/*
-		 * With sub-block writes into unwritten extents
-		 * we also need to mark the buffer as new so that
-		 * the unwritten parts of the buffer gets correctly zeroed.
-		 */
-		if (buffer_unwritten(bh_result))
+		if (buffer_unwritten(bh_result)) {
+			/* A delayed write to unwritten bh should
+			 * be marked new and mapped.  Mapped ensures
+			 * that we don't do get_block multiple times
+			 * when we write to the same offset and new
+			 * ensures that we do proper zero out for
+			 * partial write.
+			 */
 			set_buffer_new(bh_result);
+			set_buffer_mapped(bh_result);
+		}
 		ret = 0;
 	}
 
 	return ret;
 }
 
-static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
-{
-	/*
-	 * unmapped buffer is possible for holes.
-	 * delay buffer is possible with delayed allocation
-	 */
-	return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh));
-}
-
-static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
+/*
+ * This function is used as a standard get_block_t calback function
+ * when there is no desire to allocate any blocks.  It is used as a
+ * callback function for block_prepare_write(), nobh_writepage(), and
+ * block_write_full_page().  These functions should only try to map a
+ * single block at a time.
+ *
+ * Since this function doesn't do block allocations even if the caller
+ * requests it by passing in create=1, it is critically important that
+ * any caller checks to make sure that any buffer heads are returned
+ * by this function are either all already mapped or marked for
+ * delayed allocation before calling nobh_writepage() or
+ * block_write_full_page().  Otherwise, b_blocknr could be left
+ * unitialized, and the page write functions will be taken by
+ * surprise.
+ */
+static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
 				   struct buffer_head *bh_result, int create)
 {
 	int ret = 0;
 	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
 
+	BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
+
 	/*
 	 * we don't want to do block allocation in writepage
 	 * so call get_block_wrap with create = 0
 	 */
-	ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks,
-				   bh_result, 0, 0, 0);
+	ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
+	BUG_ON(create && ret == 0);
 	if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
 		ret = 0;
@@ -2382,10 +2539,11 @@
 }
 
 /*
- * get called vi ext4_da_writepages after taking page lock (have journal handle)
- * get called via journal_submit_inode_data_buffers (no journal handle)
- * get called via shrink_page_list via pdflush (no journal handle)
- * or grab_page_cache when doing write_begin (have journal handle)
+ * This function can get called via...
+ *   - ext4_da_writepages after taking page lock (have journal handle)
+ *   - journal_submit_inode_data_buffers (no journal handle)
+ *   - shrink_page_list via pdflush (no journal handle)
+ *   - grab_page_cache when doing write_begin (have journal handle)
  */
 static int ext4_da_writepage(struct page *page,
 				struct writeback_control *wbc)
@@ -2436,7 +2594,7 @@
 		 * do block allocation here.
 		 */
 		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
-						ext4_normal_get_block_write);
+					  noalloc_get_block_write);
 		if (!ret) {
 			page_bufs = page_buffers(page);
 			/* check whether all are mapped and non delay */
@@ -2461,11 +2619,10 @@
 	}
 
 	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
-		ret = nobh_writepage(page, ext4_normal_get_block_write, wbc);
+		ret = nobh_writepage(page, noalloc_get_block_write, wbc);
 	else
-		ret = block_write_full_page(page,
-						ext4_normal_get_block_write,
-						wbc);
+		ret = block_write_full_page(page, noalloc_get_block_write,
+					    wbc);
 
 	return ret;
 }
@@ -2777,7 +2934,7 @@
 	*pagep = page;
 
 	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-							ext4_da_get_block_prep);
+				ext4_da_get_block_prep);
 	if (ret < 0) {
 		unlock_page(page);
 		ext4_journal_stop(handle);
@@ -2815,7 +2972,7 @@
 	for (i = 0; i < idx; i++)
 		bh = bh->b_this_page;
 
-	if (!buffer_mapped(bh) || (buffer_delay(bh)))
+	if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh))
 		return 0;
 	return 1;
 }
@@ -3085,12 +3242,10 @@
 	struct inode *inode = page->mapping->host;
 
 	if (test_opt(inode->i_sb, NOBH))
-		return nobh_writepage(page,
-					ext4_normal_get_block_write, wbc);
+		return nobh_writepage(page, noalloc_get_block_write, wbc);
 	else
-		return block_write_full_page(page,
-						ext4_normal_get_block_write,
-						wbc);
+		return block_write_full_page(page, noalloc_get_block_write,
+					     wbc);
 }
 
 static int ext4_normal_writepage(struct page *page,
@@ -3142,7 +3297,7 @@
 	int err;
 
 	ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
-					ext4_normal_get_block_write);
+				  noalloc_get_block_write);
 	if (ret != 0)
 		goto out_unlock;
 
@@ -3227,9 +3382,8 @@
 		 * really know unless we go poke around in the buffer_heads.
 		 * But block_write_full_page will do the right thing.
 		 */
-		return block_write_full_page(page,
-						ext4_normal_get_block_write,
-						wbc);
+		return block_write_full_page(page, noalloc_get_block_write,
+					     wbc);
 	}
 no_write:
 	redirty_page_for_writepage(wbc, page);
@@ -3973,7 +4127,8 @@
 	if (!ext4_can_truncate(inode))
 		return;
 
-	if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
+	if (ei->i_disksize && inode->i_size == 0 &&
+	    !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
 		ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
 
 	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
@@ -4715,25 +4870,6 @@
 	return ext4_force_commit(inode->i_sb);
 }
 
-int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh)
-{
-	int err = 0;
-
-	mark_buffer_dirty(bh);
-	if (inode && inode_needs_sync(inode)) {
-		sync_dirty_buffer(bh);
-		if (buffer_req(bh) && !buffer_uptodate(bh)) {
-			ext4_error(inode->i_sb, __func__,
-				   "IO error syncing inode, "
-				   "inode=%lu, block=%llu",
-				   inode->i_ino,
-				   (unsigned long long)bh->b_blocknr);
-			err = -EIO;
-		}
-	}
-	return err;
-}
-
 /*
  * ext4_setattr()
  *
@@ -4930,7 +5066,8 @@
  */
 int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 {
-	int groups, gdpblocks;
+	ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
+	int gdpblocks;
 	int idxblocks;
 	int ret = 0;
 
@@ -4957,8 +5094,8 @@
 		groups += nrblocks;
 
 	gdpblocks = groups;
-	if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
-		groups = EXT4_SB(inode->i_sb)->s_groups_count;
+	if (groups > ngroups)
+		groups = ngroups;
 	if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
 		gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
 
@@ -4998,7 +5135,7 @@
  * Calculate the journal credits for a chunk of data modification.
  *
  * This is called from DIO, fallocate or whoever calling
- * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks.
+ * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks.
  *
  * journal buffers for data blocks are not included here, as DIO
  * and fallocate do no need to journal data buffers.
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f871677..ed8482e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -372,24 +372,12 @@
 	ext4_set_bit(bit, addr);
 }
 
-static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
-{
-	addr = mb_correct_addr_and_bit(&bit, addr);
-	ext4_set_bit_atomic(lock, bit, addr);
-}
-
 static inline void mb_clear_bit(int bit, void *addr)
 {
 	addr = mb_correct_addr_and_bit(&bit, addr);
 	ext4_clear_bit(bit, addr);
 }
 
-static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
-{
-	addr = mb_correct_addr_and_bit(&bit, addr);
-	ext4_clear_bit_atomic(lock, bit, addr);
-}
-
 static inline int mb_find_next_zero_bit(void *addr, int max, int start)
 {
 	int fix = 0, ret, tmpmax;
@@ -448,7 +436,7 @@
 
 	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
 		return;
-	BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
+	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
 	for (i = 0; i < count; i++) {
 		if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
 			ext4_fsblk_t blocknr;
@@ -472,7 +460,7 @@
 
 	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
 		return;
-	BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
 	for (i = 0; i < count; i++) {
 		BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
 		mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
@@ -739,6 +727,7 @@
 
 static int ext4_mb_init_cache(struct page *page, char *incore)
 {
+	ext4_group_t ngroups;
 	int blocksize;
 	int blocks_per_page;
 	int groups_per_page;
@@ -757,6 +746,7 @@
 
 	inode = page->mapping->host;
 	sb = inode->i_sb;
+	ngroups = ext4_get_groups_count(sb);
 	blocksize = 1 << inode->i_blkbits;
 	blocks_per_page = PAGE_CACHE_SIZE / blocksize;
 
@@ -780,7 +770,7 @@
 	for (i = 0; i < groups_per_page; i++) {
 		struct ext4_group_desc *desc;
 
-		if (first_group + i >= EXT4_SB(sb)->s_groups_count)
+		if (first_group + i >= ngroups)
 			break;
 
 		err = -EIO;
@@ -801,17 +791,17 @@
 			unlock_buffer(bh[i]);
 			continue;
 		}
-		spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
+		ext4_lock_group(sb, first_group + i);
 		if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 			ext4_init_block_bitmap(sb, bh[i],
 						first_group + i, desc);
 			set_bitmap_uptodate(bh[i]);
 			set_buffer_uptodate(bh[i]);
-			spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
+			ext4_unlock_group(sb, first_group + i);
 			unlock_buffer(bh[i]);
 			continue;
 		}
-		spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
+		ext4_unlock_group(sb, first_group + i);
 		if (buffer_uptodate(bh[i])) {
 			/*
 			 * if not uninit if bh is uptodate,
@@ -852,7 +842,7 @@
 		struct ext4_group_info *grinfo;
 
 		group = (first_block + i) >> 1;
-		if (group >= EXT4_SB(sb)->s_groups_count)
+		if (group >= ngroups)
 			break;
 
 		/*
@@ -1078,7 +1068,7 @@
 	return 0;
 }
 
-static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
+static void mb_clear_bits(void *bm, int cur, int len)
 {
 	__u32 *addr;
 
@@ -1091,15 +1081,12 @@
 			cur += 32;
 			continue;
 		}
-		if (lock)
-			mb_clear_bit_atomic(lock, cur, bm);
-		else
-			mb_clear_bit(cur, bm);
+		mb_clear_bit(cur, bm);
 		cur++;
 	}
 }
 
-static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
+static void mb_set_bits(void *bm, int cur, int len)
 {
 	__u32 *addr;
 
@@ -1112,10 +1099,7 @@
 			cur += 32;
 			continue;
 		}
-		if (lock)
-			mb_set_bit_atomic(lock, cur, bm);
-		else
-			mb_set_bit(cur, bm);
+		mb_set_bit(cur, bm);
 		cur++;
 	}
 }
@@ -1131,7 +1115,7 @@
 	struct super_block *sb = e4b->bd_sb;
 
 	BUG_ON(first + count > (sb->s_blocksize << 3));
-	BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
+	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
 	mb_check_buddy(e4b);
 	mb_free_blocks_double(inode, e4b, first, count);
 
@@ -1212,7 +1196,7 @@
 	int ord;
 	void *buddy;
 
-	BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
 	BUG_ON(ex == NULL);
 
 	buddy = mb_find_buddy(e4b, order, &max);
@@ -1276,7 +1260,7 @@
 
 	BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
 	BUG_ON(e4b->bd_group != ex->fe_group);
-	BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
 	mb_check_buddy(e4b);
 	mb_mark_used_double(e4b, start, len);
 
@@ -1330,8 +1314,7 @@
 		e4b->bd_info->bb_counters[ord]++;
 	}
 
-	mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group),
-			EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
+	mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
 	mb_check_buddy(e4b);
 
 	return ret;
@@ -1726,7 +1709,6 @@
 	unsigned free, fragments;
 	unsigned i, bits;
 	int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
-	struct ext4_group_desc *desc;
 	struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
 
 	BUG_ON(cr < 0 || cr >= 4);
@@ -1742,10 +1724,6 @@
 	switch (cr) {
 	case 0:
 		BUG_ON(ac->ac_2order == 0);
-		/* If this group is uninitialized, skip it initially */
-		desc = ext4_get_group_desc(ac->ac_sb, group, NULL);
-		if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
-			return 0;
 
 		/* Avoid using the first bg of a flexgroup for data files */
 		if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
@@ -1788,6 +1766,7 @@
 	int block, pnum;
 	int blocks_per_page;
 	int groups_per_page;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	ext4_group_t first_group;
 	struct ext4_group_info *grp;
 
@@ -1807,7 +1786,7 @@
 	/* read all groups the page covers into the cache */
 	for (i = 0; i < groups_per_page; i++) {
 
-		if ((first_group + i) >= EXT4_SB(sb)->s_groups_count)
+		if ((first_group + i) >= ngroups)
 			break;
 		grp = ext4_get_group_info(sb, first_group + i);
 		/* take all groups write allocation
@@ -1945,8 +1924,7 @@
 static noinline_for_stack int
 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
-	ext4_group_t group;
-	ext4_group_t i;
+	ext4_group_t ngroups, group, i;
 	int cr;
 	int err = 0;
 	int bsbits;
@@ -1957,6 +1935,7 @@
 
 	sb = ac->ac_sb;
 	sbi = EXT4_SB(sb);
+	ngroups = ext4_get_groups_count(sb);
 	BUG_ON(ac->ac_status == AC_STATUS_FOUND);
 
 	/* first, try the goal */
@@ -2017,11 +1996,11 @@
 		 */
 		group = ac->ac_g_ex.fe_group;
 
-		for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
+		for (i = 0; i < ngroups; group++, i++) {
 			struct ext4_group_info *grp;
 			struct ext4_group_desc *desc;
 
-			if (group == EXT4_SB(sb)->s_groups_count)
+			if (group == ngroups)
 				group = 0;
 
 			/* quick check to skip empty groups */
@@ -2064,9 +2043,7 @@
 
 			ac->ac_groups_scanned++;
 			desc = ext4_get_group_desc(sb, group, NULL);
-			if (cr == 0 || (desc->bg_flags &
-					cpu_to_le16(EXT4_BG_BLOCK_UNINIT) &&
-					ac->ac_2order != 0))
+			if (cr == 0)
 				ext4_mb_simple_scan_group(ac, &e4b);
 			else if (cr == 1 &&
 					ac->ac_g_ex.fe_len == sbi->s_stripe)
@@ -2315,12 +2292,10 @@
 static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
 {
 	struct super_block *sb = seq->private;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_group_t group;
 
-	if (*pos < 0 || *pos >= sbi->s_groups_count)
+	if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
 		return NULL;
-
 	group = *pos + 1;
 	return (void *) ((unsigned long) group);
 }
@@ -2328,11 +2303,10 @@
 static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct super_block *sb = seq->private;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_group_t group;
 
 	++*pos;
-	if (*pos < 0 || *pos >= sbi->s_groups_count)
+	if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
 		return NULL;
 	group = *pos + 1;
 	return (void *) ((unsigned long) group);
@@ -2420,7 +2394,8 @@
 
 	if (sbi->s_proc != NULL) {
 		remove_proc_entry("mb_groups", sbi->s_proc);
-		remove_proc_entry("mb_history", sbi->s_proc);
+		if (sbi->s_mb_history_max)
+			remove_proc_entry("mb_history", sbi->s_proc);
 	}
 	kfree(sbi->s_mb_history);
 }
@@ -2431,17 +2406,17 @@
 	int i;
 
 	if (sbi->s_proc != NULL) {
-		proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
-				 &ext4_mb_seq_history_fops, sb);
+		if (sbi->s_mb_history_max)
+			proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
+					 &ext4_mb_seq_history_fops, sb);
 		proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
 				 &ext4_mb_seq_groups_fops, sb);
 	}
 
-	sbi->s_mb_history_max = 1000;
 	sbi->s_mb_history_cur = 0;
 	spin_lock_init(&sbi->s_mb_history_lock);
 	i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
-	sbi->s_mb_history = kzalloc(i, GFP_KERNEL);
+	sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL;
 	/* if we can't allocate history, then we simple won't use it */
 }
 
@@ -2451,7 +2426,7 @@
 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
 	struct ext4_mb_history h;
 
-	if (unlikely(sbi->s_mb_history == NULL))
+	if (sbi->s_mb_history == NULL)
 		return;
 
 	if (!(ac->ac_op & sbi->s_mb_history_filter))
@@ -2587,6 +2562,7 @@
 
 static int ext4_mb_init_backend(struct super_block *sb)
 {
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	ext4_group_t i;
 	int metalen;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2598,7 +2574,7 @@
 	struct ext4_group_desc *desc;
 
 	/* This is the number of blocks used by GDT */
-	num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) -
+	num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
 				1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
 
 	/*
@@ -2644,7 +2620,7 @@
 	for (i = 0; i < num_meta_group_infos; i++) {
 		if ((i + 1) == num_meta_group_infos)
 			metalen = sizeof(*meta_group_info) *
-				(sbi->s_groups_count -
+				(ngroups -
 					(i << EXT4_DESC_PER_BLOCK_BITS(sb)));
 		meta_group_info = kmalloc(metalen, GFP_KERNEL);
 		if (meta_group_info == NULL) {
@@ -2655,7 +2631,7 @@
 		sbi->s_group_info[i] = meta_group_info;
 	}
 
-	for (i = 0; i < sbi->s_groups_count; i++) {
+	for (i = 0; i < ngroups; i++) {
 		desc = ext4_get_group_desc(sb, i, NULL);
 		if (desc == NULL) {
 			printk(KERN_ERR
@@ -2761,7 +2737,7 @@
 	return 0;
 }
 
-/* need to called with ext4 group lock (ext4_lock_group) */
+/* need to called with the ext4 group lock held */
 static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
 {
 	struct ext4_prealloc_space *pa;
@@ -2781,13 +2757,14 @@
 
 int ext4_mb_release(struct super_block *sb)
 {
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	ext4_group_t i;
 	int num_meta_group_infos;
 	struct ext4_group_info *grinfo;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	if (sbi->s_group_info) {
-		for (i = 0; i < sbi->s_groups_count; i++) {
+		for (i = 0; i < ngroups; i++) {
 			grinfo = ext4_get_group_info(sb, i);
 #ifdef DOUBLE_CHECK
 			kfree(grinfo->bb_bitmap);
@@ -2797,7 +2774,7 @@
 			ext4_unlock_group(sb, i);
 			kfree(grinfo);
 		}
-		num_meta_group_infos = (sbi->s_groups_count +
+		num_meta_group_infos = (ngroups +
 				EXT4_DESC_PER_BLOCK(sb) - 1) >>
 			EXT4_DESC_PER_BLOCK_BITS(sb);
 		for (i = 0; i < num_meta_group_infos; i++)
@@ -2984,27 +2961,25 @@
 		+ le32_to_cpu(es->s_first_data_block);
 
 	len = ac->ac_b_ex.fe_len;
-	if (in_range(ext4_block_bitmap(sb, gdp), block, len) ||
-	    in_range(ext4_inode_bitmap(sb, gdp), block, len) ||
-	    in_range(block, ext4_inode_table(sb, gdp),
-		     EXT4_SB(sb)->s_itb_per_group) ||
-	    in_range(block + len - 1, ext4_inode_table(sb, gdp),
-		     EXT4_SB(sb)->s_itb_per_group)) {
+	if (!ext4_data_block_valid(sbi, block, len)) {
 		ext4_error(sb, __func__,
-			   "Allocating block %llu in system zone of %d group\n",
-			   block, ac->ac_b_ex.fe_group);
+			   "Allocating blocks %llu-%llu which overlap "
+			   "fs metadata\n", block, block+len);
 		/* File system mounted not to panic on error
 		 * Fix the bitmap and repeat the block allocation
 		 * We leak some of the blocks here.
 		 */
-		mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
-				bitmap_bh->b_data, ac->ac_b_ex.fe_start,
-				ac->ac_b_ex.fe_len);
+		ext4_lock_group(sb, ac->ac_b_ex.fe_group);
+		mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+			    ac->ac_b_ex.fe_len);
+		ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
 		err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
 		if (!err)
 			err = -EAGAIN;
 		goto out_err;
 	}
+
+	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
 #ifdef AGGRESSIVE_CHECK
 	{
 		int i;
@@ -3014,9 +2989,7 @@
 		}
 	}
 #endif
-	spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
-	mb_set_bits(NULL, bitmap_bh->b_data,
-				ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
+	mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len);
 	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
 		ext4_free_blks_set(sb, gdp,
@@ -3026,7 +2999,8 @@
 	len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
 	ext4_free_blks_set(sb, gdp, len);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
-	spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
+
+	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
 	percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
 	/*
 	 * Now reduce the dirty block count also. Should not go negative
@@ -3459,7 +3433,7 @@
  * the function goes through all block freed in the group
  * but not yet committed and marks them used in in-core bitmap.
  * buddy must be generated from this bitmap
- * Need to be called with ext4 group lock (ext4_lock_group)
+ * Need to be called with the ext4 group lock held
  */
 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 						ext4_group_t group)
@@ -3473,9 +3447,7 @@
 
 	while (n) {
 		entry = rb_entry(n, struct ext4_free_data, node);
-		mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
-				bitmap, entry->start_blk,
-				entry->count);
+		mb_set_bits(bitmap, entry->start_blk, entry->count);
 		n = rb_next(n);
 	}
 	return;
@@ -3484,7 +3456,7 @@
 /*
  * the function goes through all preallocation in this group and marks them
  * used in in-core bitmap. buddy must be generated from this bitmap
- * Need to be called with ext4 group lock (ext4_lock_group)
+ * Need to be called with ext4 group lock held
  */
 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 					ext4_group_t group)
@@ -3516,8 +3488,7 @@
 		if (unlikely(len == 0))
 			continue;
 		BUG_ON(groupnr != group);
-		mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
-						bitmap, start, len);
+		mb_set_bits(bitmap, start, len);
 		preallocated += len;
 		count++;
 	}
@@ -4121,7 +4092,7 @@
 static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 {
 	struct super_block *sb = ac->ac_sb;
-	ext4_group_t i;
+	ext4_group_t ngroups, i;
 
 	printk(KERN_ERR "EXT4-fs: Can't allocate:"
 			" Allocation context details:\n");
@@ -4145,7 +4116,8 @@
 	printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
 		ac->ac_found);
 	printk(KERN_ERR "EXT4-fs: groups: \n");
-	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+	ngroups = ext4_get_groups_count(sb);
+	for (i = 0; i < ngroups; i++) {
 		struct ext4_group_info *grp = ext4_get_group_info(sb, i);
 		struct ext4_prealloc_space *pa;
 		ext4_grpblk_t start;
@@ -4469,13 +4441,13 @@
 
 static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
 {
-	ext4_group_t i;
+	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
 	int ret;
 	int freed = 0;
 
 	trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
 		   sb->s_id, needed);
-	for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) {
+	for (i = 0; i < ngroups && needed > 0; i++) {
 		ret = ext4_mb_discard_group_preallocations(sb, i, needed);
 		freed += ret;
 		needed -= ret;
@@ -4859,29 +4831,25 @@
 		new_entry->group  = block_group;
 		new_entry->count = count;
 		new_entry->t_tid = handle->h_transaction->t_tid;
+
 		ext4_lock_group(sb, block_group);
-		mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
-				bit, count);
+		mb_clear_bits(bitmap_bh->b_data, bit, count);
 		ext4_mb_free_metadata(handle, &e4b, new_entry);
-		ext4_unlock_group(sb, block_group);
 	} else {
-		ext4_lock_group(sb, block_group);
 		/* need to update group_info->bb_free and bitmap
 		 * with group lock held. generate_buddy look at
 		 * them with group lock_held
 		 */
-		mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
-				bit, count);
+		ext4_lock_group(sb, block_group);
+		mb_clear_bits(bitmap_bh->b_data, bit, count);
 		mb_free_blocks(inode, &e4b, bit, count);
 		ext4_mb_return_to_preallocation(inode, &e4b, block, count);
-		ext4_unlock_group(sb, block_group);
 	}
 
-	spin_lock(sb_bgl_lock(sbi, block_group));
 	ret = ext4_free_blks_count(sb, gdp) + count;
 	ext4_free_blks_set(sb, gdp, ret);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
-	spin_unlock(sb_bgl_lock(sbi, block_group));
+	ext4_unlock_group(sb, block_group);
 	percpu_counter_add(&sbi->s_freeblocks_counter, count);
 
 	if (sbi->s_log_groups_per_flex) {
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index dd9e6cd..75e34f6 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -23,7 +23,6 @@
 #include <linux/mutex.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
-#include "group.h"
 
 /*
  * with AGGRESSIVE_CHECK allocator runs consistency checks over
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 22098e1..07eb664 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -37,7 +37,6 @@
 #include "ext4.h"
 #include "ext4_jbd2.h"
 
-#include "namei.h"
 #include "xattr.h"
 #include "acl.h"
 
@@ -750,7 +749,7 @@
 			ext4fs_dirhash(de->name, de->name_len, &h);
 			map_tail--;
 			map_tail->hash = h.hash;
-			map_tail->offs = (u16) ((char *) de - base);
+			map_tail->offs = ((char *) de - base)>>2;
 			map_tail->size = le16_to_cpu(de->rec_len);
 			count++;
 			cond_resched();
@@ -1148,7 +1147,8 @@
 	unsigned rec_len = 0;
 
 	while (count--) {
-		struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs);
+		struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) 
+						(from + (map->offs<<2));
 		rec_len = EXT4_DIR_REC_LEN(de->name_len);
 		memcpy (to, de, rec_len);
 		((struct ext4_dir_entry_2 *) to)->rec_len =
@@ -1997,7 +1997,7 @@
 	if (!ext4_handle_valid(handle))
 		return 0;
 
-	lock_super(sb);
+	mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
 	if (!list_empty(&EXT4_I(inode)->i_orphan))
 		goto out_unlock;
 
@@ -2006,9 +2006,13 @@
 
 	/* @@@ FIXME: Observation from aviro:
 	 * I think I can trigger J_ASSERT in ext4_orphan_add().  We block
-	 * here (on lock_super()), so race with ext4_link() which might bump
+	 * here (on s_orphan_lock), so race with ext4_link() which might bump
 	 * ->i_nlink. For, say it, character device. Not a regular file,
 	 * not a directory, not a symlink and ->i_nlink > 0.
+	 *
+	 * tytso, 4/25/2009: I'm not sure how that could happen;
+	 * shouldn't the fs core protect us from these sort of
+	 * unlink()/link() races?
 	 */
 	J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 		  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
@@ -2045,7 +2049,7 @@
 	jbd_debug(4, "orphan inode %lu will point to %d\n",
 			inode->i_ino, NEXT_ORPHAN(inode));
 out_unlock:
-	unlock_super(sb);
+	mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
 	ext4_std_error(inode->i_sb, err);
 	return err;
 }
@@ -2066,11 +2070,9 @@
 	if (!ext4_handle_valid(handle))
 		return 0;
 
-	lock_super(inode->i_sb);
-	if (list_empty(&ei->i_orphan)) {
-		unlock_super(inode->i_sb);
-		return 0;
-	}
+	mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
+	if (list_empty(&ei->i_orphan))
+		goto out;
 
 	ino_next = NEXT_ORPHAN(inode);
 	prev = ei->i_orphan.prev;
@@ -2120,7 +2122,7 @@
 out_err:
 	ext4_std_error(inode->i_sb, err);
 out:
-	unlock_super(inode->i_sb);
+	mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
 	return err;
 
 out_brelse:
@@ -2533,6 +2535,7 @@
 	.removexattr	= generic_removexattr,
 #endif
 	.permission	= ext4_permission,
+	.fiemap         = ext4_fiemap,
 };
 
 const struct inode_operations ext4_special_inode_operations = {
diff --git a/fs/ext4/namei.h b/fs/ext4/namei.h
deleted file mode 100644
index 5e4dfff..0000000
--- a/fs/ext4/namei.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/*  linux/fs/ext4/namei.h
- *
- * Copyright (C) 2005 Simtec Electronics
- *	Ben Dooks <ben@simtec.co.uk>
- *
-*/
-
-extern struct dentry *ext4_get_parent(struct dentry *child);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 546c7dd..27eb289 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -15,7 +15,6 @@
 #include <linux/slab.h>
 
 #include "ext4_jbd2.h"
-#include "group.h"
 
 #define outside(b, first, last)	((b) < (first) || (b) >= (last))
 #define inside(b, first, last)	((b) >= (first) && (b) < (last))
@@ -193,7 +192,7 @@
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
-	lock_super(sb);
+	mutex_lock(&sbi->s_resize_lock);
 	if (input->group != sbi->s_groups_count) {
 		err = -EBUSY;
 		goto exit_journal;
@@ -302,7 +301,7 @@
 	brelse(bh);
 
 exit_journal:
-	unlock_super(sb);
+	mutex_unlock(&sbi->s_resize_lock);
 	if ((err2 = ext4_journal_stop(handle)) && !err)
 		err = err2;
 
@@ -643,11 +642,12 @@
  * important part is that the new block and inode counts are in the backup
  * superblocks, and the location of the new group metadata in the GDT backups.
  *
- * We do not need lock_super() for this, because these blocks are not
- * otherwise touched by the filesystem code when it is mounted.  We don't
- * need to worry about last changing from sbi->s_groups_count, because the
- * worst that can happen is that we do not copy the full number of backups
- * at this time.  The resize which changed s_groups_count will backup again.
+ * We do not need take the s_resize_lock for this, because these
+ * blocks are not otherwise touched by the filesystem code when it is
+ * mounted.  We don't need to worry about last changing from
+ * sbi->s_groups_count, because the worst that can happen is that we
+ * do not copy the full number of backups at this time.  The resize
+ * which changed s_groups_count will backup again.
  */
 static void update_backups(struct super_block *sb,
 			   int blk_off, char *data, int size)
@@ -809,7 +809,7 @@
 		goto exit_put;
 	}
 
-	lock_super(sb);
+	mutex_lock(&sbi->s_resize_lock);
 	if (input->group != sbi->s_groups_count) {
 		ext4_warning(sb, __func__,
 			     "multiple resizers run on filesystem!");
@@ -840,7 +840,7 @@
         /*
          * OK, now we've set up the new group.  Time to make it active.
          *
-         * Current kernels don't lock all allocations via lock_super(),
+         * We do not lock all allocations via s_resize_lock
          * so we have to be safe wrt. concurrent accesses the group
          * data.  So we need to be careful to set all of the relevant
          * group descriptor data etc. *before* we enable the group.
@@ -900,12 +900,12 @@
 	 *
 	 * The precise rules we use are:
 	 *
-	 * * Writers of s_groups_count *must* hold lock_super
+	 * * Writers of s_groups_count *must* hold s_resize_lock
 	 * AND
 	 * * Writers must perform a smp_wmb() after updating all dependent
 	 *   data and before modifying the groups count
 	 *
-	 * * Readers must hold lock_super() over the access
+	 * * Readers must hold s_resize_lock over the access
 	 * OR
 	 * * Readers must perform an smp_rmb() after reading the groups count
 	 *   and before reading any dependent data.
@@ -948,7 +948,7 @@
 	sb->s_dirt = 1;
 
 exit_journal:
-	unlock_super(sb);
+	mutex_unlock(&sbi->s_resize_lock);
 	if ((err2 = ext4_journal_stop(handle)) && !err)
 		err = err2;
 	if (!err) {
@@ -986,7 +986,7 @@
 
 	/* We don't need to worry about locking wrt other resizers just
 	 * yet: we're going to revalidate es->s_blocks_count after
-	 * taking lock_super() below. */
+	 * taking the s_resize_lock below. */
 	o_blocks_count = ext4_blocks_count(es);
 	o_groups_count = EXT4_SB(sb)->s_groups_count;
 
@@ -1056,11 +1056,11 @@
 		goto exit_put;
 	}
 
-	lock_super(sb);
+	mutex_lock(&EXT4_SB(sb)->s_resize_lock);
 	if (o_blocks_count != ext4_blocks_count(es)) {
 		ext4_warning(sb, __func__,
 			     "multiple resizers run on filesystem!");
-		unlock_super(sb);
+		mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
 		ext4_journal_stop(handle);
 		err = -EBUSY;
 		goto exit_put;
@@ -1070,14 +1070,14 @@
 						 EXT4_SB(sb)->s_sbh))) {
 		ext4_warning(sb, __func__,
 			     "error %d on journal write access", err);
-		unlock_super(sb);
+		mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
 		ext4_journal_stop(handle);
 		goto exit_put;
 	}
 	ext4_blocks_count_set(es, o_blocks_count + add);
 	ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
 	sb->s_dirt = 1;
-	unlock_super(sb);
+	mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
 	ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
 		   o_blocks_count + add);
 	/* We add the blocks to the bitmap and set the group need init bit */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2958f4e..012c425 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -20,6 +20,7 @@
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/time.h>
+#include <linux/vmalloc.h>
 #include <linux/jbd2.h>
 #include <linux/slab.h>
 #include <linux/init.h>
@@ -45,16 +46,20 @@
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
-#include "namei.h"
-#include "group.h"
+
+static int default_mb_history_length = 1000;
+
+module_param_named(default_mb_history_length, default_mb_history_length,
+		   int, 0644);
+MODULE_PARM_DESC(default_mb_history_length,
+		 "Default number of entries saved for mb_history");
 
 struct proc_dir_entry *ext4_proc_root;
 static struct kset *ext4_kset;
 
 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
 			     unsigned long journal_devnum);
-static int ext4_commit_super(struct super_block *sb,
-			      struct ext4_super_block *es, int sync);
+static int ext4_commit_super(struct super_block *sb, int sync);
 static void ext4_mark_recovery_complete(struct super_block *sb,
 					struct ext4_super_block *es);
 static void ext4_clear_journal_err(struct super_block *sb,
@@ -74,7 +79,7 @@
 {
 	return le32_to_cpu(bg->bg_block_bitmap_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
+		 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 }
 
 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
@@ -82,7 +87,7 @@
 {
 	return le32_to_cpu(bg->bg_inode_bitmap_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
+		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 }
 
 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
@@ -90,7 +95,7 @@
 {
 	return le32_to_cpu(bg->bg_inode_table_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
+		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 }
 
 __u32 ext4_free_blks_count(struct super_block *sb,
@@ -98,7 +103,7 @@
 {
 	return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
+		 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 }
 
 __u32 ext4_free_inodes_count(struct super_block *sb,
@@ -106,7 +111,7 @@
 {
 	return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
+		 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 }
 
 __u32 ext4_used_dirs_count(struct super_block *sb,
@@ -114,7 +119,7 @@
 {
 	return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
+		 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 }
 
 __u32 ext4_itable_unused_count(struct super_block *sb,
@@ -122,7 +127,7 @@
 {
 	return le16_to_cpu(bg->bg_itable_unused_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
+		 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 }
 
 void ext4_block_bitmap_set(struct super_block *sb,
@@ -202,8 +207,7 @@
 	journal = EXT4_SB(sb)->s_journal;
 	if (journal) {
 		if (is_journal_aborted(journal)) {
-			ext4_abort(sb, __func__,
-				   "Detected aborted journal");
+			ext4_abort(sb, __func__, "Detected aborted journal");
 			return ERR_PTR(-EROFS);
 		}
 		return jbd2_journal_start(journal, nblocks);
@@ -302,10 +306,10 @@
 			jbd2_journal_abort(journal, -EIO);
 	}
 	if (test_opt(sb, ERRORS_RO)) {
-		printk(KERN_CRIT "Remounting filesystem read-only\n");
+		ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 		sb->s_flags |= MS_RDONLY;
 	}
-	ext4_commit_super(sb, es, 1);
+	ext4_commit_super(sb, 1);
 	if (test_opt(sb, ERRORS_PANIC))
 		panic("EXT4-fs (device %s): panic forced after error\n",
 			sb->s_id);
@@ -395,8 +399,6 @@
 {
 	va_list args;
 
-	printk(KERN_CRIT "ext4_abort called.\n");
-
 	va_start(args, fmt);
 	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 	vprintk(fmt, args);
@@ -409,7 +411,7 @@
 	if (sb->s_flags & MS_RDONLY)
 		return;
 
-	printk(KERN_CRIT "Remounting filesystem read-only\n");
+	ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 	sb->s_flags |= MS_RDONLY;
 	EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
@@ -417,6 +419,18 @@
 		jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 }
 
+void ext4_msg (struct super_block * sb, const char *prefix,
+		   const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	printk("%sEXT4-fs (%s): ", prefix, sb->s_id);
+	vprintk(fmt, args);
+	printk("\n");
+	va_end(args);
+}
+
 void ext4_warning(struct super_block *sb, const char *function,
 		  const char *fmt, ...)
 {
@@ -431,7 +445,7 @@
 }
 
 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
-				const char *function, const char *fmt, ...)
+			   const char *function, const char *fmt, ...)
 __releases(bitlock)
 __acquires(bitlock)
 {
@@ -447,7 +461,7 @@
 	if (test_opt(sb, ERRORS_CONT)) {
 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-		ext4_commit_super(sb, es, 0);
+		ext4_commit_super(sb, 0);
 		return;
 	}
 	ext4_unlock_group(sb, grp);
@@ -467,7 +481,6 @@
 	return;
 }
 
-
 void ext4_update_dynamic_rev(struct super_block *sb)
 {
 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@ -496,7 +509,7 @@
 /*
  * Open the external journal device
  */
-static struct block_device *ext4_blkdev_get(dev_t dev)
+static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 {
 	struct block_device *bdev;
 	char b[BDEVNAME_SIZE];
@@ -507,7 +520,7 @@
 	return bdev;
 
 fail:
-	printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n",
+	ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
 			__bdevname(dev, b), PTR_ERR(bdev));
 	return NULL;
 }
@@ -543,8 +556,8 @@
 {
 	struct list_head *l;
 
-	printk(KERN_ERR "sb orphan head is %d\n",
-	       le32_to_cpu(sbi->s_es->s_last_orphan));
+	ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
+		 le32_to_cpu(sbi->s_es->s_last_orphan));
 
 	printk(KERN_ERR "sb_info orphan list:\n");
 	list_for_each(l, &sbi->s_orphan) {
@@ -563,6 +576,12 @@
 	struct ext4_super_block *es = sbi->s_es;
 	int i, err;
 
+	lock_super(sb);
+	lock_kernel();
+	if (sb->s_dirt)
+		ext4_commit_super(sb, 1);
+
+	ext4_release_system_zone(sb);
 	ext4_mb_release(sb);
 	ext4_ext_release(sb);
 	ext4_xattr_put_super(sb);
@@ -576,7 +595,7 @@
 	if (!(sb->s_flags & MS_RDONLY)) {
 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 		es->s_state = cpu_to_le16(sbi->s_mount_state);
-		ext4_commit_super(sb, es, 1);
+		ext4_commit_super(sb, 1);
 	}
 	if (sbi->s_proc) {
 		remove_proc_entry(sb->s_id, ext4_proc_root);
@@ -586,7 +605,10 @@
 	for (i = 0; i < sbi->s_gdb_count; i++)
 		brelse(sbi->s_group_desc[i]);
 	kfree(sbi->s_group_desc);
-	kfree(sbi->s_flex_groups);
+	if (is_vmalloc_addr(sbi->s_flex_groups))
+		vfree(sbi->s_flex_groups);
+	else
+		kfree(sbi->s_flex_groups);
 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -625,11 +647,8 @@
 	unlock_super(sb);
 	kobject_put(&sbi->s_kobj);
 	wait_for_completion(&sbi->s_kobj_unregister);
-	lock_super(sb);
-	lock_kernel();
 	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
-	return;
 }
 
 static struct kmem_cache *ext4_inode_cachep;
@@ -644,6 +663,7 @@
 	ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
+
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 	ei->i_acl = EXT4_ACL_NOT_CACHED;
 	ei->i_default_acl = EXT4_ACL_NOT_CACHED;
@@ -664,14 +684,16 @@
 	ei->i_allocated_meta_blocks = 0;
 	ei->i_delalloc_reserved_flag = 0;
 	spin_lock_init(&(ei->i_block_reservation_lock));
+
 	return &ei->vfs_inode;
 }
 
 static void ext4_destroy_inode(struct inode *inode)
 {
 	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
-		printk("EXT4 Inode %p: orphan list check failed!\n",
-			EXT4_I(inode));
+		ext4_msg(inode->i_sb, KERN_ERR,
+			 "Inode %lu (%p): orphan list check failed!",
+			 inode->i_ino, EXT4_I(inode));
 		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 				EXT4_I(inode), sizeof(struct ext4_inode_info),
 				true);
@@ -870,12 +892,12 @@
 		seq_puts(seq, ",noauto_da_alloc");
 
 	ext4_show_quota_options(seq, sb);
+
 	return 0;
 }
 
-
 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
-		u64 ino, u32 generation)
+					u64 ino, u32 generation)
 {
 	struct inode *inode;
 
@@ -904,14 +926,14 @@
 }
 
 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
-		int fh_len, int fh_type)
+					int fh_len, int fh_type)
 {
 	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
 				    ext4_nfs_get_inode);
 }
 
 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
-		int fh_len, int fh_type)
+					int fh_len, int fh_type)
 {
 	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
 				    ext4_nfs_get_inode);
@@ -923,7 +945,8 @@
  * which would prevent try_to_free_buffers() from freeing them, we must use
  * jbd2 layer's try_to_free_buffers() function to release them.
  */
-static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait)
+static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
+				 gfp_t wait)
 {
 	journal_t *journal = EXT4_SB(sb)->s_journal;
 
@@ -992,7 +1015,6 @@
 	.dirty_inode	= ext4_dirty_inode,
 	.delete_inode	= ext4_delete_inode,
 	.put_super	= ext4_put_super,
-	.write_super	= ext4_write_super,
 	.sync_fs	= ext4_sync_fs,
 	.freeze_fs	= ext4_freeze,
 	.unfreeze_fs	= ext4_unfreeze,
@@ -1007,6 +1029,25 @@
 	.bdev_try_to_free_page = bdev_try_to_free_page,
 };
 
+static const struct super_operations ext4_nojournal_sops = {
+	.alloc_inode	= ext4_alloc_inode,
+	.destroy_inode	= ext4_destroy_inode,
+	.write_inode	= ext4_write_inode,
+	.dirty_inode	= ext4_dirty_inode,
+	.delete_inode	= ext4_delete_inode,
+	.write_super	= ext4_write_super,
+	.put_super	= ext4_put_super,
+	.statfs		= ext4_statfs,
+	.remount_fs	= ext4_remount,
+	.clear_inode	= ext4_clear_inode,
+	.show_options	= ext4_show_options,
+#ifdef CONFIG_QUOTA
+	.quota_read	= ext4_quota_read,
+	.quota_write	= ext4_quota_write,
+#endif
+	.bdev_try_to_free_page = bdev_try_to_free_page,
+};
+
 static const struct export_operations ext4_export_ops = {
 	.fh_to_dentry = ext4_fh_to_dentry,
 	.fh_to_parent = ext4_fh_to_parent,
@@ -1023,12 +1064,13 @@
 	Opt_journal_update, Opt_journal_dev,
 	Opt_journal_checksum, Opt_journal_async_commit,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
-	Opt_data_err_abort, Opt_data_err_ignore,
+	Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
 	Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
 	Opt_usrquota, Opt_grpquota, Opt_i_version,
 	Opt_stripe, Opt_delalloc, Opt_nodelalloc,
+	Opt_block_validity, Opt_noblock_validity,
 	Opt_inode_readahead_blks, Opt_journal_ioprio
 };
 
@@ -1069,6 +1111,7 @@
 	{Opt_data_writeback, "data=writeback"},
 	{Opt_data_err_abort, "data_err=abort"},
 	{Opt_data_err_ignore, "data_err=ignore"},
+	{Opt_mb_history_length, "mb_history_length=%u"},
 	{Opt_offusrjquota, "usrjquota="},
 	{Opt_usrjquota, "usrjquota=%s"},
 	{Opt_offgrpjquota, "grpjquota="},
@@ -1087,6 +1130,8 @@
 	{Opt_resize, "resize"},
 	{Opt_delalloc, "delalloc"},
 	{Opt_nodelalloc, "nodelalloc"},
+	{Opt_block_validity, "block_validity"},
+	{Opt_noblock_validity, "noblock_validity"},
 	{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
 	{Opt_journal_ioprio, "journal_ioprio=%u"},
 	{Opt_auto_da_alloc, "auto_da_alloc=%u"},
@@ -1102,8 +1147,9 @@
 
 	if (!options || strncmp(options, "sb=", 3) != 0)
 		return 1;	/* Default location */
+
 	options += 3;
-	/*todo: use simple_strtoll with >32bit ext4 */
+	/* TODO: use simple_strtoll with >32bit ext4 */
 	sb_block = simple_strtoul(options, &options, 0);
 	if (*options && *options != ',') {
 		printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
@@ -1113,6 +1159,7 @@
 	if (*options == ',')
 		options++;
 	*data = (void *) options;
+
 	return sb_block;
 }
 
@@ -1206,8 +1253,7 @@
 #else
 		case Opt_user_xattr:
 		case Opt_nouser_xattr:
-			printk(KERN_ERR "EXT4 (no)user_xattr options "
-			       "not supported\n");
+			ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported");
 			break;
 #endif
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
@@ -1220,8 +1266,7 @@
 #else
 		case Opt_acl:
 		case Opt_noacl:
-			printk(KERN_ERR "EXT4 (no)acl options "
-			       "not supported\n");
+			ext4_msg(sb, KERN_ERR, "(no)acl options not supported");
 			break;
 #endif
 		case Opt_journal_update:
@@ -1231,16 +1276,16 @@
 			   user to specify an existing inode to be the
 			   journal file. */
 			if (is_remount) {
-				printk(KERN_ERR "EXT4-fs: cannot specify "
-				       "journal on remount\n");
+				ext4_msg(sb, KERN_ERR,
+					 "Cannot specify journal on remount");
 				return 0;
 			}
 			set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
 			break;
 		case Opt_journal_dev:
 			if (is_remount) {
-				printk(KERN_ERR "EXT4-fs: cannot specify "
-				       "journal on remount\n");
+				ext4_msg(sb, KERN_ERR,
+					"Cannot specify journal on remount");
 				return 0;
 			}
 			if (match_int(&args[0], &option))
@@ -1294,9 +1339,8 @@
 			if (is_remount) {
 				if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
 						!= data_opt) {
-					printk(KERN_ERR
-						"EXT4-fs: cannot change data "
-						"mode on remount\n");
+					ext4_msg(sb, KERN_ERR,
+						"Cannot change data mode on remount");
 					return 0;
 				}
 			} else {
@@ -1310,6 +1354,13 @@
 		case Opt_data_err_ignore:
 			clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
 			break;
+		case Opt_mb_history_length:
+			if (match_int(&args[0], &option))
+				return 0;
+			if (option < 0)
+				return 0;
+			sbi->s_mb_history_max = option;
+			break;
 #ifdef CONFIG_QUOTA
 		case Opt_usrjquota:
 			qtype = USRQUOTA;
@@ -1319,31 +1370,31 @@
 set_qf_name:
 			if (sb_any_quota_loaded(sb) &&
 			    !sbi->s_qf_names[qtype]) {
-				printk(KERN_ERR
-				       "EXT4-fs: Cannot change journaled "
-				       "quota options when quota turned on.\n");
+				ext4_msg(sb, KERN_ERR,
+				       "Cannot change journaled "
+				       "quota options when quota turned on");
 				return 0;
 			}
 			qname = match_strdup(&args[0]);
 			if (!qname) {
-				printk(KERN_ERR
-					"EXT4-fs: not enough memory for "
-					"storing quotafile name.\n");
+				ext4_msg(sb, KERN_ERR,
+					"Not enough memory for "
+					"storing quotafile name");
 				return 0;
 			}
 			if (sbi->s_qf_names[qtype] &&
 			    strcmp(sbi->s_qf_names[qtype], qname)) {
-				printk(KERN_ERR
-					"EXT4-fs: %s quota file already "
-					"specified.\n", QTYPE2NAME(qtype));
+				ext4_msg(sb, KERN_ERR,
+					"%s quota file already "
+					"specified", QTYPE2NAME(qtype));
 				kfree(qname);
 				return 0;
 			}
 			sbi->s_qf_names[qtype] = qname;
 			if (strchr(sbi->s_qf_names[qtype], '/')) {
-				printk(KERN_ERR
-					"EXT4-fs: quotafile must be on "
-					"filesystem root.\n");
+				ext4_msg(sb, KERN_ERR,
+					"quotafile must be on "
+					"filesystem root");
 				kfree(sbi->s_qf_names[qtype]);
 				sbi->s_qf_names[qtype] = NULL;
 				return 0;
@@ -1358,9 +1409,9 @@
 clear_qf_name:
 			if (sb_any_quota_loaded(sb) &&
 			    sbi->s_qf_names[qtype]) {
-				printk(KERN_ERR "EXT4-fs: Cannot change "
+				ext4_msg(sb, KERN_ERR, "Cannot change "
 					"journaled quota options when "
-					"quota turned on.\n");
+					"quota turned on");
 				return 0;
 			}
 			/*
@@ -1377,9 +1428,9 @@
 set_qf_format:
 			if (sb_any_quota_loaded(sb) &&
 			    sbi->s_jquota_fmt != qfmt) {
-				printk(KERN_ERR "EXT4-fs: Cannot change "
+				ext4_msg(sb, KERN_ERR, "Cannot change "
 					"journaled quota options when "
-					"quota turned on.\n");
+					"quota turned on");
 				return 0;
 			}
 			sbi->s_jquota_fmt = qfmt;
@@ -1395,8 +1446,8 @@
 			break;
 		case Opt_noquota:
 			if (sb_any_quota_loaded(sb)) {
-				printk(KERN_ERR "EXT4-fs: Cannot change quota "
-					"options when quota turned on.\n");
+				ext4_msg(sb, KERN_ERR, "Cannot change quota "
+					"options when quota turned on");
 				return 0;
 			}
 			clear_opt(sbi->s_mount_opt, QUOTA);
@@ -1407,8 +1458,8 @@
 		case Opt_quota:
 		case Opt_usrquota:
 		case Opt_grpquota:
-			printk(KERN_ERR
-				"EXT4-fs: quota options not supported.\n");
+			ext4_msg(sb, KERN_ERR,
+				"quota options not supported");
 			break;
 		case Opt_usrjquota:
 		case Opt_grpjquota:
@@ -1416,9 +1467,8 @@
 		case Opt_offgrpjquota:
 		case Opt_jqfmt_vfsold:
 		case Opt_jqfmt_vfsv0:
-			printk(KERN_ERR
-				"EXT4-fs: journaled quota options not "
-				"supported.\n");
+			ext4_msg(sb, KERN_ERR,
+				"journaled quota options not supported");
 			break;
 		case Opt_noquota:
 			break;
@@ -1443,8 +1493,9 @@
 			break;
 		case Opt_resize:
 			if (!is_remount) {
-				printk("EXT4-fs: resize option only available "
-					"for remount\n");
+				ext4_msg(sb, KERN_ERR,
+					"resize option only available "
+					"for remount");
 				return 0;
 			}
 			if (match_int(&args[0], &option) != 0)
@@ -1474,14 +1525,21 @@
 		case Opt_delalloc:
 			set_opt(sbi->s_mount_opt, DELALLOC);
 			break;
+		case Opt_block_validity:
+			set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+			break;
+		case Opt_noblock_validity:
+			clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+			break;
 		case Opt_inode_readahead_blks:
 			if (match_int(&args[0], &option))
 				return 0;
 			if (option < 0 || option > (1 << 30))
 				return 0;
-			if (option & (option - 1)) {
-				printk(KERN_ERR "EXT4-fs: inode_readahead_blks"
-				       " must be a power of 2\n");
+			if (!is_power_of_2(option)) {
+				ext4_msg(sb, KERN_ERR,
+					 "EXT4-fs: inode_readahead_blks"
+					 " must be a power of 2");
 				return 0;
 			}
 			sbi->s_inode_readahead_blks = option;
@@ -1508,9 +1566,9 @@
 				set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
 			break;
 		default:
-			printk(KERN_ERR
-			       "EXT4-fs: Unrecognized mount option \"%s\" "
-			       "or missing value\n", p);
+			ext4_msg(sb, KERN_ERR,
+			       "Unrecognized mount option \"%s\" "
+			       "or missing value", p);
 			return 0;
 		}
 	}
@@ -1528,21 +1586,21 @@
 				(sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
 		    (sbi->s_qf_names[GRPQUOTA] &&
 				(sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
-			printk(KERN_ERR "EXT4-fs: old and new quota "
-					"format mixing.\n");
+			ext4_msg(sb, KERN_ERR, "old and new quota "
+					"format mixing");
 			return 0;
 		}
 
 		if (!sbi->s_jquota_fmt) {
-			printk(KERN_ERR "EXT4-fs: journaled quota format "
-					"not specified.\n");
+			ext4_msg(sb, KERN_ERR, "journaled quota format "
+					"not specified");
 			return 0;
 		}
 	} else {
 		if (sbi->s_jquota_fmt) {
-			printk(KERN_ERR "EXT4-fs: journaled quota format "
+			ext4_msg(sb, KERN_ERR, "journaled quota format "
 					"specified with no journaling "
-					"enabled.\n");
+					"enabled");
 			return 0;
 		}
 	}
@@ -1557,32 +1615,32 @@
 	int res = 0;
 
 	if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
-		printk(KERN_ERR "EXT4-fs warning: revision level too high, "
-		       "forcing read-only mode\n");
+		ext4_msg(sb, KERN_ERR, "revision level too high, "
+			 "forcing read-only mode");
 		res = MS_RDONLY;
 	}
 	if (read_only)
 		return res;
 	if (!(sbi->s_mount_state & EXT4_VALID_FS))
-		printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
-		       "running e2fsck is recommended\n");
+		ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
+			 "running e2fsck is recommended");
 	else if ((sbi->s_mount_state & EXT4_ERROR_FS))
-		printk(KERN_WARNING
-		       "EXT4-fs warning: mounting fs with errors, "
-		       "running e2fsck is recommended\n");
+		ext4_msg(sb, KERN_WARNING,
+			 "warning: mounting fs with errors, "
+			 "running e2fsck is recommended");
 	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
 		 le16_to_cpu(es->s_mnt_count) >=
 		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
-		printk(KERN_WARNING
-		       "EXT4-fs warning: maximal mount count reached, "
-		       "running e2fsck is recommended\n");
+		ext4_msg(sb, KERN_WARNING,
+			 "warning: maximal mount count reached, "
+			 "running e2fsck is recommended");
 	else if (le32_to_cpu(es->s_checkinterval) &&
 		(le32_to_cpu(es->s_lastcheck) +
 			le32_to_cpu(es->s_checkinterval) <= get_seconds()))
-		printk(KERN_WARNING
-		       "EXT4-fs warning: checktime reached, "
-		       "running e2fsck is recommended\n");
-	if (!sbi->s_journal) 
+		ext4_msg(sb, KERN_WARNING,
+			 "warning: checktime reached, "
+			 "running e2fsck is recommended");
+	if (!sbi->s_journal)
 		es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
 	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
 		es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
@@ -1592,7 +1650,7 @@
 	if (sbi->s_journal)
 		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 
-	ext4_commit_super(sb, es, 1);
+	ext4_commit_super(sb, 1);
 	if (test_opt(sb, DEBUG))
 		printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
 				"bpg=%lu, ipg=%lu, mo=%04lx]\n",
@@ -1603,11 +1661,11 @@
 			sbi->s_mount_opt);
 
 	if (EXT4_SB(sb)->s_journal) {
-		printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
-		       sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
+		ext4_msg(sb, KERN_INFO, "%s journal on %s",
+		       EXT4_SB(sb)->s_journal->j_inode ? "internal" :
 		       "external", EXT4_SB(sb)->s_journal->j_devname);
 	} else {
-		printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id);
+		ext4_msg(sb, KERN_INFO, "no journal");
 	}
 	return res;
 }
@@ -1616,10 +1674,10 @@
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_group_desc *gdp = NULL;
-	struct buffer_head *bh;
 	ext4_group_t flex_group_count;
 	ext4_group_t flex_group;
 	int groups_per_flex = 0;
+	size_t size;
 	int i;
 
 	if (!sbi->s_es->s_log_groups_per_flex) {
@@ -1634,16 +1692,21 @@
 	flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
 			((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
 			      EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
-	sbi->s_flex_groups = kzalloc(flex_group_count *
-				     sizeof(struct flex_groups), GFP_KERNEL);
+	size = flex_group_count * sizeof(struct flex_groups);
+	sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
 	if (sbi->s_flex_groups == NULL) {
-		printk(KERN_ERR "EXT4-fs: not enough memory for "
-				"%u flex groups\n", flex_group_count);
+		sbi->s_flex_groups = vmalloc(size);
+		if (sbi->s_flex_groups)
+			memset(sbi->s_flex_groups, 0, size);
+	}
+	if (sbi->s_flex_groups == NULL) {
+		ext4_msg(sb, KERN_ERR, "not enough memory for "
+				"%u flex groups", flex_group_count);
 		goto failed;
 	}
 
 	for (i = 0; i < sbi->s_groups_count; i++) {
-		gdp = ext4_get_group_desc(sb, i, &bh);
+		gdp = ext4_get_group_desc(sb, i, NULL);
 
 		flex_group = ext4_flex_group(sbi, i);
 		atomic_set(&sbi->s_flex_groups[flex_group].free_inodes,
@@ -1724,44 +1787,44 @@
 
 		block_bitmap = ext4_block_bitmap(sb, gdp);
 		if (block_bitmap < first_block || block_bitmap > last_block) {
-			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 			       "Block bitmap for group %u not in group "
-			       "(block %llu)!\n", i, block_bitmap);
+			       "(block %llu)!", i, block_bitmap);
 			return 0;
 		}
 		inode_bitmap = ext4_inode_bitmap(sb, gdp);
 		if (inode_bitmap < first_block || inode_bitmap > last_block) {
-			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 			       "Inode bitmap for group %u not in group "
-			       "(block %llu)!\n", i, inode_bitmap);
+			       "(block %llu)!", i, inode_bitmap);
 			return 0;
 		}
 		inode_table = ext4_inode_table(sb, gdp);
 		if (inode_table < first_block ||
 		    inode_table + sbi->s_itb_per_group - 1 > last_block) {
-			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 			       "Inode table for group %u not in group "
-			       "(block %llu)!\n", i, inode_table);
+			       "(block %llu)!", i, inode_table);
 			return 0;
 		}
-		spin_lock(sb_bgl_lock(sbi, i));
+		ext4_lock_group(sb, i);
 		if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
-			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-			       "Checksum for group %u failed (%u!=%u)\n",
-			       i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
-			       gdp)), le16_to_cpu(gdp->bg_checksum));
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+				 "Checksum for group %u failed (%u!=%u)",
+				 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
+				     gdp)), le16_to_cpu(gdp->bg_checksum));
 			if (!(sb->s_flags & MS_RDONLY)) {
-				spin_unlock(sb_bgl_lock(sbi, i));
+				ext4_unlock_group(sb, i);
 				return 0;
 			}
 		}
-		spin_unlock(sb_bgl_lock(sbi, i));
+		ext4_unlock_group(sb, i);
 		if (!flexbg_flag)
 			first_block += EXT4_BLOCKS_PER_GROUP(sb);
 	}
 
 	ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
-	sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
+	sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
 	return 1;
 }
 
@@ -1796,8 +1859,8 @@
 	}
 
 	if (bdev_read_only(sb->s_bdev)) {
-		printk(KERN_ERR "EXT4-fs: write access "
-			"unavailable, skipping orphan cleanup.\n");
+		ext4_msg(sb, KERN_ERR, "write access "
+			"unavailable, skipping orphan cleanup");
 		return;
 	}
 
@@ -1811,8 +1874,7 @@
 	}
 
 	if (s_flags & MS_RDONLY) {
-		printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n",
-		       sb->s_id);
+		ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
 		sb->s_flags &= ~MS_RDONLY;
 	}
 #ifdef CONFIG_QUOTA
@@ -1823,9 +1885,9 @@
 		if (EXT4_SB(sb)->s_qf_names[i]) {
 			int ret = ext4_quota_on_mount(sb, i);
 			if (ret < 0)
-				printk(KERN_ERR
-					"EXT4-fs: Cannot turn on journaled "
-					"quota: error %d\n", ret);
+				ext4_msg(sb, KERN_ERR,
+					"Cannot turn on journaled "
+					"quota: error %d", ret);
 		}
 	}
 #endif
@@ -1842,16 +1904,16 @@
 		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
 		vfs_dq_init(inode);
 		if (inode->i_nlink) {
-			printk(KERN_DEBUG
-				"%s: truncating inode %lu to %lld bytes\n",
+			ext4_msg(sb, KERN_DEBUG,
+				"%s: truncating inode %lu to %lld bytes",
 				__func__, inode->i_ino, inode->i_size);
 			jbd_debug(2, "truncating inode %lu to %lld bytes\n",
 				  inode->i_ino, inode->i_size);
 			ext4_truncate(inode);
 			nr_truncates++;
 		} else {
-			printk(KERN_DEBUG
-				"%s: deleting unreferenced inode %lu\n",
+			ext4_msg(sb, KERN_DEBUG,
+				"%s: deleting unreferenced inode %lu",
 				__func__, inode->i_ino);
 			jbd_debug(2, "deleting unreferenced inode %lu\n",
 				  inode->i_ino);
@@ -1863,11 +1925,11 @@
 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
 
 	if (nr_orphans)
-		printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
-		       sb->s_id, PLURAL(nr_orphans));
+		ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
+		       PLURAL(nr_orphans));
 	if (nr_truncates)
-		printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n",
-		       sb->s_id, PLURAL(nr_truncates));
+		ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
+		       PLURAL(nr_truncates));
 #ifdef CONFIG_QUOTA
 	/* Turn quotas off */
 	for (i = 0; i < MAXQUOTAS; i++) {
@@ -1877,6 +1939,7 @@
 #endif
 	sb->s_flags = s_flags; /* Restore MS_RDONLY status */
 }
+
 /*
  * Maximal extent format file size.
  * Resulting logical blkno at s_maxbytes must fit in our on-disk
@@ -1927,19 +1990,19 @@
 	loff_t res = EXT4_NDIR_BLOCKS;
 	int meta_blocks;
 	loff_t upper_limit;
-	/* This is calculated to be the largest file size for a
-	 * dense, bitmapped file such that the total number of
-	 * sectors in the file, including data and all indirect blocks,
-	 * does not exceed 2^48 -1
-	 * __u32 i_blocks_lo and _u16 i_blocks_high representing the
-	 * total number of  512 bytes blocks of the file
+	/* This is calculated to be the largest file size for a dense, block
+	 * mapped file such that the file's total number of 512-byte sectors,
+	 * including data and all indirect blocks, does not exceed (2^48 - 1).
+	 *
+	 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
+	 * number of 512-byte sectors of the file.
 	 */
 
 	if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
 		/*
-		 * !has_huge_files or CONFIG_LBD is not enabled
-		 * implies the inode i_block represent total blocks in
-		 * 512 bytes 32 == size of vfs inode i_blocks * 8
+		 * !has_huge_files or CONFIG_LBD not enabled implies that
+		 * the inode i_block field represents total file blocks in
+		 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
 		 */
 		upper_limit = (1LL << 32) - 1;
 
@@ -1981,7 +2044,7 @@
 }
 
 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
-				ext4_fsblk_t logical_sb_block, int nr)
+				   ext4_fsblk_t logical_sb_block, int nr)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_group_t bg, first_meta_bg;
@@ -1995,6 +2058,7 @@
 	bg = sbi->s_desc_per_block * nr;
 	if (ext4_bg_has_super(sb, bg))
 		has_super = 1;
+
 	return (has_super + ext4_group_first_block_no(sb, bg));
 }
 
@@ -2091,8 +2155,7 @@
 	if (parse_strtoul(buf, 0x40000000, &t))
 		return -EINVAL;
 
-	/* inode_readahead_blks must be a power of 2 */
-	if (t & (t-1))
+	if (!is_power_of_2(t))
 		return -EINVAL;
 
 	sbi->s_inode_readahead_blks = t;
@@ -2100,7 +2163,7 @@
 }
 
 static ssize_t sbi_ui_show(struct ext4_attr *a,
-				struct ext4_sb_info *sbi, char *buf)
+			   struct ext4_sb_info *sbi, char *buf)
 {
 	unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
 
@@ -2205,7 +2268,6 @@
 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 				__releases(kernel_lock)
 				__acquires(kernel_lock)
-
 {
 	struct buffer_head *bh;
 	struct ext4_super_block *es = NULL;
@@ -2256,7 +2318,7 @@
 
 	blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
 	if (!blocksize) {
-		printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
+		ext4_msg(sb, KERN_ERR, "unable to set blocksize");
 		goto out_fail;
 	}
 
@@ -2272,7 +2334,7 @@
 	}
 
 	if (!(bh = sb_bread(sb, logical_sb_block))) {
-		printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
+		ext4_msg(sb, KERN_ERR, "unable to read superblock");
 		goto out_fail;
 	}
 	/*
@@ -2321,6 +2383,7 @@
 	sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
 	sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
 	sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
+	sbi->s_mb_history_max = default_mb_history_length;
 
 	set_opt(sbi->s_mount_opt, BARRIER);
 
@@ -2330,7 +2393,6 @@
 	 */
 	set_opt(sbi->s_mount_opt, DELALLOC);
 
-
 	if (!parse_options((char *) data, sb, &journal_devnum,
 			   &journal_ioprio, NULL, 0))
 		goto failed_mount;
@@ -2342,9 +2404,9 @@
 	    (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
 	     EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
 	     EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
-		printk(KERN_WARNING
-		       "EXT4-fs warning: feature flags set on rev 0 fs, "
-		       "running e2fsck is recommended\n");
+		ext4_msg(sb, KERN_WARNING,
+		       "feature flags set on rev 0 fs, "
+		       "running e2fsck is recommended");
 
 	/*
 	 * Check feature flags regardless of the revision level, since we
@@ -2353,16 +2415,18 @@
 	 */
 	features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
 	if (features) {
-		printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
-		       "unsupported optional features (%x).\n", sb->s_id,
+		ext4_msg(sb, KERN_ERR,
+			"Couldn't mount because of "
+			"unsupported optional features (%x)",
 			(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
 			~EXT4_FEATURE_INCOMPAT_SUPP));
 		goto failed_mount;
 	}
 	features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
 	if (!(sb->s_flags & MS_RDONLY) && features) {
-		printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
-		       "unsupported optional features (%x).\n", sb->s_id,
+		ext4_msg(sb, KERN_ERR,
+			"Couldn't mount RDWR because of "
+			"unsupported optional features (%x)",
 			(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
 			~EXT4_FEATURE_RO_COMPAT_SUPP));
 		goto failed_mount;
@@ -2376,9 +2440,9 @@
 		 */
 		if (sizeof(root->i_blocks) < sizeof(u64) &&
 				!(sb->s_flags & MS_RDONLY)) {
-			printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
+			ext4_msg(sb, KERN_ERR, "Filesystem with huge "
 					"files cannot be mounted read-write "
-					"without CONFIG_LBD.\n", sb->s_id);
+					"without CONFIG_LBD");
 			goto failed_mount;
 		}
 	}
@@ -2386,17 +2450,15 @@
 
 	if (blocksize < EXT4_MIN_BLOCK_SIZE ||
 	    blocksize > EXT4_MAX_BLOCK_SIZE) {
-		printk(KERN_ERR
-		       "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n",
-		       blocksize, sb->s_id);
+		ext4_msg(sb, KERN_ERR,
+		       "Unsupported filesystem blocksize %d", blocksize);
 		goto failed_mount;
 	}
 
 	if (sb->s_blocksize != blocksize) {
-
 		/* Validate the filesystem blocksize */
 		if (!sb_set_blocksize(sb, blocksize)) {
-			printk(KERN_ERR "EXT4-fs: bad block size %d.\n",
+			ext4_msg(sb, KERN_ERR, "bad block size %d",
 					blocksize);
 			goto failed_mount;
 		}
@@ -2406,15 +2468,15 @@
 		offset = do_div(logical_sb_block, blocksize);
 		bh = sb_bread(sb, logical_sb_block);
 		if (!bh) {
-			printk(KERN_ERR
-			       "EXT4-fs: Can't read superblock on 2nd try.\n");
+			ext4_msg(sb, KERN_ERR,
+			       "Can't read superblock on 2nd try");
 			goto failed_mount;
 		}
 		es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
 		sbi->s_es = es;
 		if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
-			printk(KERN_ERR
-			       "EXT4-fs: Magic mismatch, very weird !\n");
+			ext4_msg(sb, KERN_ERR,
+			       "Magic mismatch, very weird!");
 			goto failed_mount;
 		}
 	}
@@ -2432,30 +2494,33 @@
 		if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
 		    (!is_power_of_2(sbi->s_inode_size)) ||
 		    (sbi->s_inode_size > blocksize)) {
-			printk(KERN_ERR
-			       "EXT4-fs: unsupported inode size: %d\n",
+			ext4_msg(sb, KERN_ERR,
+			       "unsupported inode size: %d",
 			       sbi->s_inode_size);
 			goto failed_mount;
 		}
 		if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
 			sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
 	}
+
 	sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
 		if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
 		    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
 		    !is_power_of_2(sbi->s_desc_size)) {
-			printk(KERN_ERR
-			       "EXT4-fs: unsupported descriptor size %lu\n",
+			ext4_msg(sb, KERN_ERR,
+			       "unsupported descriptor size %lu",
 			       sbi->s_desc_size);
 			goto failed_mount;
 		}
 	} else
 		sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
+
 	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
 	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
 	if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
 		goto cantfind_ext4;
+
 	sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
 	if (sbi->s_inodes_per_block == 0)
 		goto cantfind_ext4;
@@ -2466,6 +2531,7 @@
 	sbi->s_mount_state = le16_to_cpu(es->s_state);
 	sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
 	sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
+
 	for (i = 0; i < 4; i++)
 		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
 	sbi->s_def_hash_version = es->s_def_hash_version;
@@ -2483,25 +2549,24 @@
 	}
 
 	if (sbi->s_blocks_per_group > blocksize * 8) {
-		printk(KERN_ERR
-		       "EXT4-fs: #blocks per group too big: %lu\n",
+		ext4_msg(sb, KERN_ERR,
+		       "#blocks per group too big: %lu",
 		       sbi->s_blocks_per_group);
 		goto failed_mount;
 	}
 	if (sbi->s_inodes_per_group > blocksize * 8) {
-		printk(KERN_ERR
-		       "EXT4-fs: #inodes per group too big: %lu\n",
+		ext4_msg(sb, KERN_ERR,
+		       "#inodes per group too big: %lu",
 		       sbi->s_inodes_per_group);
 		goto failed_mount;
 	}
 
 	if (ext4_blocks_count(es) >
 		    (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
-		printk(KERN_ERR "EXT4-fs: filesystem on %s:"
-			" too large to mount safely\n", sb->s_id);
+		ext4_msg(sb, KERN_ERR, "filesystem"
+			" too large to mount safely");
 		if (sizeof(sector_t) < 8)
-			printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not "
-					"enabled\n");
+			ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled");
 		goto failed_mount;
 	}
 
@@ -2511,21 +2576,21 @@
 	/* check blocks count against device size */
 	blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
 	if (blocks_count && ext4_blocks_count(es) > blocks_count) {
-		printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu "
-		       "exceeds size of device (%llu blocks)\n",
+		ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
+		       "exceeds size of device (%llu blocks)",
 		       ext4_blocks_count(es), blocks_count);
 		goto failed_mount;
 	}
 
-        /*
-         * It makes no sense for the first data block to be beyond the end
-         * of the filesystem.
-         */
-        if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
-                printk(KERN_WARNING "EXT4-fs: bad geometry: first data"
-		       "block %u is beyond end of filesystem (%llu)\n",
-		       le32_to_cpu(es->s_first_data_block),
-		       ext4_blocks_count(es));
+	/*
+	 * It makes no sense for the first data block to be beyond the end
+	 * of the filesystem.
+	 */
+	if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
+                ext4_msg(sb, KERN_WARNING, "bad geometry: first data"
+			 "block %u is beyond end of filesystem (%llu)",
+			 le32_to_cpu(es->s_first_data_block),
+			 ext4_blocks_count(es));
 		goto failed_mount;
 	}
 	blocks_count = (ext4_blocks_count(es) -
@@ -2533,9 +2598,9 @@
 			EXT4_BLOCKS_PER_GROUP(sb) - 1);
 	do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
 	if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
-		printk(KERN_WARNING "EXT4-fs: groups count too large: %u "
+		ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
 		       "(block count %llu, first data block %u, "
-		       "blocks per group %lu)\n", sbi->s_groups_count,
+		       "blocks per group %lu)", sbi->s_groups_count,
 		       ext4_blocks_count(es),
 		       le32_to_cpu(es->s_first_data_block),
 		       EXT4_BLOCKS_PER_GROUP(sb));
@@ -2547,7 +2612,7 @@
 	sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
 				    GFP_KERNEL);
 	if (sbi->s_group_desc == NULL) {
-		printk(KERN_ERR "EXT4-fs: not enough memory\n");
+		ext4_msg(sb, KERN_ERR, "not enough memory");
 		goto failed_mount;
 	}
 
@@ -2562,21 +2627,21 @@
 		block = descriptor_loc(sb, logical_sb_block, i);
 		sbi->s_group_desc[i] = sb_bread(sb, block);
 		if (!sbi->s_group_desc[i]) {
-			printk(KERN_ERR "EXT4-fs: "
-			       "can't read group descriptor %d\n", i);
+			ext4_msg(sb, KERN_ERR,
+			       "can't read group descriptor %d", i);
 			db_count = i;
 			goto failed_mount2;
 		}
 	}
 	if (!ext4_check_descriptors(sb)) {
-		printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
+		ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
 		goto failed_mount2;
 	}
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
 		if (!ext4_fill_flex_info(sb)) {
-			printk(KERN_ERR
-			       "EXT4-fs: unable to initialize "
-			       "flex_bg meta info!\n");
+			ext4_msg(sb, KERN_ERR,
+			       "unable to initialize "
+			       "flex_bg meta info!");
 			goto failed_mount2;
 		}
 
@@ -2598,7 +2663,7 @@
 		err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
 	}
 	if (err) {
-		printk(KERN_ERR "EXT4-fs: insufficient memory\n");
+		ext4_msg(sb, KERN_ERR, "insufficient memory");
 		goto failed_mount3;
 	}
 
@@ -2607,7 +2672,11 @@
 	/*
 	 * set up enough so that it can read an inode
 	 */
-	sb->s_op = &ext4_sops;
+	if (!test_opt(sb, NOLOAD) &&
+	    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
+		sb->s_op = &ext4_sops;
+	else
+		sb->s_op = &ext4_nojournal_sops;
 	sb->s_export_op = &ext4_export_ops;
 	sb->s_xattr = ext4_xattr_handlers;
 #ifdef CONFIG_QUOTA
@@ -2615,6 +2684,8 @@
 	sb->dq_op = &ext4_quota_operations;
 #endif
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
+	mutex_init(&sbi->s_orphan_lock);
+	mutex_init(&sbi->s_resize_lock);
 
 	sb->s_root = NULL;
 
@@ -2632,13 +2703,13 @@
 			goto failed_mount3;
 		if (!(sb->s_flags & MS_RDONLY) &&
 		    EXT4_SB(sb)->s_journal->j_failed_commit) {
-			printk(KERN_CRIT "EXT4-fs error (device %s): "
+			ext4_msg(sb, KERN_CRIT, "error: "
 			       "ext4_fill_super: Journal transaction "
-			       "%u is corrupt\n", sb->s_id,
+			       "%u is corrupt",
 			       EXT4_SB(sb)->s_journal->j_failed_commit);
 			if (test_opt(sb, ERRORS_RO)) {
-				printk(KERN_CRIT
-				       "Mounting filesystem read-only\n");
+				ext4_msg(sb, KERN_CRIT,
+				       "Mounting filesystem read-only");
 				sb->s_flags |= MS_RDONLY;
 				EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 				es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -2646,14 +2717,14 @@
 			if (test_opt(sb, ERRORS_PANIC)) {
 				EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 				es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-				ext4_commit_super(sb, es, 1);
+				ext4_commit_super(sb, 1);
 				goto failed_mount4;
 			}
 		}
 	} else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
 	      EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
-		printk(KERN_ERR "EXT4-fs: required journal recovery "
-		       "suppressed and not mounted read-only\n");
+		ext4_msg(sb, KERN_ERR, "required journal recovery "
+		       "suppressed and not mounted read-only");
 		goto failed_mount4;
 	} else {
 		clear_opt(sbi->s_mount_opt, DATA_FLAGS);
@@ -2666,7 +2737,7 @@
 	if (ext4_blocks_count(es) > 0xffffffffULL &&
 	    !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
 				       JBD2_FEATURE_INCOMPAT_64BIT)) {
-		printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n");
+		ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
 		goto failed_mount4;
 	}
 
@@ -2704,8 +2775,8 @@
 	case EXT4_MOUNT_WRITEBACK_DATA:
 		if (!jbd2_journal_check_available_features
 		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
-			printk(KERN_ERR "EXT4-fs: Journal does not support "
-			       "requested data journaling mode\n");
+			ext4_msg(sb, KERN_ERR, "Journal does not support "
+			       "requested data journaling mode");
 			goto failed_mount4;
 		}
 	default:
@@ -2717,8 +2788,8 @@
 
 	if (test_opt(sb, NOBH)) {
 		if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
-			printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - "
-				"its supported only with writeback mode\n");
+			ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
+				"its supported only with writeback mode");
 			clear_opt(sbi->s_mount_opt, NOBH);
 		}
 	}
@@ -2729,18 +2800,18 @@
 
 	root = ext4_iget(sb, EXT4_ROOT_INO);
 	if (IS_ERR(root)) {
-		printk(KERN_ERR "EXT4-fs: get root inode failed\n");
+		ext4_msg(sb, KERN_ERR, "get root inode failed");
 		ret = PTR_ERR(root);
 		goto failed_mount4;
 	}
 	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
 		iput(root);
-		printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n");
+		ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
 		goto failed_mount4;
 	}
 	sb->s_root = d_alloc_root(root);
 	if (!sb->s_root) {
-		printk(KERN_ERR "EXT4-fs: get root dentry failed\n");
+		ext4_msg(sb, KERN_ERR, "get root dentry failed");
 		iput(root);
 		ret = -ENOMEM;
 		goto failed_mount4;
@@ -2769,22 +2840,29 @@
 							sbi->s_inode_size) {
 		sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
 						       EXT4_GOOD_OLD_INODE_SIZE;
-		printk(KERN_INFO "EXT4-fs: required extra inode space not"
-			"available.\n");
+		ext4_msg(sb, KERN_INFO, "required extra inode space not"
+			 "available");
 	}
 
 	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
-		printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
-				"requested data journaling mode\n");
+		ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
+			 "requested data journaling mode");
 		clear_opt(sbi->s_mount_opt, DELALLOC);
 	} else if (test_opt(sb, DELALLOC))
-		printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
+		ext4_msg(sb, KERN_INFO, "delayed allocation enabled");
+
+	err = ext4_setup_system_zone(sb);
+	if (err) {
+		ext4_msg(sb, KERN_ERR, "failed to initialize system "
+			 "zone (%d)\n", err);
+		goto failed_mount4;
+	}
 
 	ext4_ext_init(sb);
 	err = ext4_mb_init(sb, needs_recovery);
 	if (err) {
-		printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
-		       err);
+		ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)",
+			 err);
 		goto failed_mount4;
 	}
 
@@ -2798,19 +2876,11 @@
 		goto failed_mount4;
 	};
 
-	/*
-	 * akpm: core read_super() calls in here with the superblock locked.
-	 * That deadlocks, because orphan cleanup needs to lock the superblock
-	 * in numerous places.  Here we just pop the lock - it's relatively
-	 * harmless, because we are now ready to accept write_super() requests,
-	 * and aviro says that's the only reason for hanging onto the
-	 * superblock lock.
-	 */
 	EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
 	ext4_orphan_cleanup(sb, es);
 	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
 	if (needs_recovery) {
-		printk(KERN_INFO "EXT4-fs: recovery complete.\n");
+		ext4_msg(sb, KERN_INFO, "recovery complete");
 		ext4_mark_recovery_complete(sb, es);
 	}
 	if (EXT4_SB(sb)->s_journal) {
@@ -2823,25 +2893,30 @@
 	} else
 		descr = "out journal";
 
-	printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n",
-	       sb->s_id, descr);
+	ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr);
 
 	lock_kernel();
 	return 0;
 
 cantfind_ext4:
 	if (!silent)
-		printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n",
-		       sb->s_id);
+		ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
 	goto failed_mount;
 
 failed_mount4:
-	printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id);
+	ext4_msg(sb, KERN_ERR, "mount failed");
+	ext4_release_system_zone(sb);
 	if (sbi->s_journal) {
 		jbd2_journal_destroy(sbi->s_journal);
 		sbi->s_journal = NULL;
 	}
 failed_mount3:
+	if (sbi->s_flex_groups) {
+		if (is_vmalloc_addr(sbi->s_flex_groups))
+			vfree(sbi->s_flex_groups);
+		else
+			kfree(sbi->s_flex_groups);
+	}
 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -2862,6 +2937,7 @@
 	brelse(bh);
 out_fail:
 	sb->s_fs_info = NULL;
+	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
 	lock_kernel();
 	return ret;
@@ -2906,27 +2982,27 @@
 
 	journal_inode = ext4_iget(sb, journal_inum);
 	if (IS_ERR(journal_inode)) {
-		printk(KERN_ERR "EXT4-fs: no journal found.\n");
+		ext4_msg(sb, KERN_ERR, "no journal found");
 		return NULL;
 	}
 	if (!journal_inode->i_nlink) {
 		make_bad_inode(journal_inode);
 		iput(journal_inode);
-		printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n");
+		ext4_msg(sb, KERN_ERR, "journal inode is deleted");
 		return NULL;
 	}
 
 	jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
 		  journal_inode, journal_inode->i_size);
 	if (!S_ISREG(journal_inode->i_mode)) {
-		printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
+		ext4_msg(sb, KERN_ERR, "invalid journal inode");
 		iput(journal_inode);
 		return NULL;
 	}
 
 	journal = jbd2_journal_init_inode(journal_inode);
 	if (!journal) {
-		printk(KERN_ERR "EXT4-fs: Could not load journal inode\n");
+		ext4_msg(sb, KERN_ERR, "Could not load journal inode");
 		iput(journal_inode);
 		return NULL;
 	}
@@ -2950,22 +3026,22 @@
 
 	BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
 
-	bdev = ext4_blkdev_get(j_dev);
+	bdev = ext4_blkdev_get(j_dev, sb);
 	if (bdev == NULL)
 		return NULL;
 
 	if (bd_claim(bdev, sb)) {
-		printk(KERN_ERR
-			"EXT4-fs: failed to claim external journal device.\n");
+		ext4_msg(sb, KERN_ERR,
+			"failed to claim external journal device");
 		blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
 		return NULL;
 	}
 
 	blocksize = sb->s_blocksize;
-	hblock = bdev_hardsect_size(bdev);
+	hblock = bdev_logical_block_size(bdev);
 	if (blocksize < hblock) {
-		printk(KERN_ERR
-			"EXT4-fs: blocksize too small for journal device.\n");
+		ext4_msg(sb, KERN_ERR,
+			"blocksize too small for journal device");
 		goto out_bdev;
 	}
 
@@ -2973,8 +3049,8 @@
 	offset = EXT4_MIN_BLOCK_SIZE % blocksize;
 	set_blocksize(bdev, blocksize);
 	if (!(bh = __bread(bdev, sb_block, blocksize))) {
-		printk(KERN_ERR "EXT4-fs: couldn't read superblock of "
-		       "external journal\n");
+		ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
+		       "external journal");
 		goto out_bdev;
 	}
 
@@ -2982,14 +3058,14 @@
 	if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
 	    !(le32_to_cpu(es->s_feature_incompat) &
 	      EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
-		printk(KERN_ERR "EXT4-fs: external journal has "
-					"bad superblock\n");
+		ext4_msg(sb, KERN_ERR, "external journal has "
+					"bad superblock");
 		brelse(bh);
 		goto out_bdev;
 	}
 
 	if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
-		printk(KERN_ERR "EXT4-fs: journal UUID does not match\n");
+		ext4_msg(sb, KERN_ERR, "journal UUID does not match");
 		brelse(bh);
 		goto out_bdev;
 	}
@@ -3001,25 +3077,26 @@
 	journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
 					start, len, blocksize);
 	if (!journal) {
-		printk(KERN_ERR "EXT4-fs: failed to create device journal\n");
+		ext4_msg(sb, KERN_ERR, "failed to create device journal");
 		goto out_bdev;
 	}
 	journal->j_private = sb;
 	ll_rw_block(READ, 1, &journal->j_sb_buffer);
 	wait_on_buffer(journal->j_sb_buffer);
 	if (!buffer_uptodate(journal->j_sb_buffer)) {
-		printk(KERN_ERR "EXT4-fs: I/O error on journal device\n");
+		ext4_msg(sb, KERN_ERR, "I/O error on journal device");
 		goto out_journal;
 	}
 	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
-		printk(KERN_ERR "EXT4-fs: External journal has more than one "
-					"user (unsupported) - %d\n",
+		ext4_msg(sb, KERN_ERR, "External journal has more than one "
+					"user (unsupported) - %d",
 			be32_to_cpu(journal->j_superblock->s_nr_users));
 		goto out_journal;
 	}
 	EXT4_SB(sb)->journal_bdev = bdev;
 	ext4_init_journal_params(sb, journal);
 	return journal;
+
 out_journal:
 	jbd2_journal_destroy(journal);
 out_bdev:
@@ -3041,8 +3118,8 @@
 
 	if (journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
-		printk(KERN_INFO "EXT4-fs: external journal device major/minor "
-			"numbers have changed\n");
+		ext4_msg(sb, KERN_INFO, "external journal device major/minor "
+			"numbers have changed");
 		journal_dev = new_decode_dev(journal_devnum);
 	} else
 		journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
@@ -3054,24 +3131,23 @@
 	 * crash?  For recovery, we need to check in advance whether we
 	 * can get read-write access to the device.
 	 */
-
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
 		if (sb->s_flags & MS_RDONLY) {
-			printk(KERN_INFO "EXT4-fs: INFO: recovery "
-					"required on readonly filesystem.\n");
+			ext4_msg(sb, KERN_INFO, "INFO: recovery "
+					"required on readonly filesystem");
 			if (really_read_only) {
-				printk(KERN_ERR "EXT4-fs: write access "
-					"unavailable, cannot proceed.\n");
+				ext4_msg(sb, KERN_ERR, "write access "
+					"unavailable, cannot proceed");
 				return -EROFS;
 			}
-			printk(KERN_INFO "EXT4-fs: write access will "
-			       "be enabled during recovery.\n");
+			ext4_msg(sb, KERN_INFO, "write access will "
+			       "be enabled during recovery");
 		}
 	}
 
 	if (journal_inum && journal_dev) {
-		printk(KERN_ERR "EXT4-fs: filesystem has both journal "
-		       "and inode journals!\n");
+		ext4_msg(sb, KERN_ERR, "filesystem has both journal "
+		       "and inode journals!");
 		return -EINVAL;
 	}
 
@@ -3084,14 +3160,14 @@
 	}
 
 	if (journal->j_flags & JBD2_BARRIER)
-		printk(KERN_INFO "EXT4-fs: barriers enabled\n");
+		ext4_msg(sb, KERN_INFO, "barriers enabled");
 	else
-		printk(KERN_INFO "EXT4-fs: barriers disabled\n");
+		ext4_msg(sb, KERN_INFO, "barriers disabled");
 
 	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
 		err = jbd2_journal_update_format(journal);
 		if (err)  {
-			printk(KERN_ERR "EXT4-fs: error updating journal.\n");
+			ext4_msg(sb, KERN_ERR, "error updating journal");
 			jbd2_journal_destroy(journal);
 			return err;
 		}
@@ -3103,7 +3179,7 @@
 		err = jbd2_journal_load(journal);
 
 	if (err) {
-		printk(KERN_ERR "EXT4-fs: error loading journal.\n");
+		ext4_msg(sb, KERN_ERR, "error loading journal");
 		jbd2_journal_destroy(journal);
 		return err;
 	}
@@ -3114,18 +3190,17 @@
 	if (journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
 		es->s_journal_dev = cpu_to_le32(journal_devnum);
-		sb->s_dirt = 1;
 
 		/* Make sure we flush the recovery flag to disk. */
-		ext4_commit_super(sb, es, 1);
+		ext4_commit_super(sb, 1);
 	}
 
 	return 0;
 }
 
-static int ext4_commit_super(struct super_block *sb,
-			      struct ext4_super_block *es, int sync)
+static int ext4_commit_super(struct super_block *sb, int sync)
 {
+	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
 	int error = 0;
 
@@ -3140,8 +3215,8 @@
 		 * be remapped.  Nothing we can do but to retry the
 		 * write and hope for the best.
 		 */
-		printk(KERN_ERR "EXT4-fs: previous I/O error to "
-		       "superblock detected for %s.\n", sb->s_id);
+		ext4_msg(sb, KERN_ERR, "previous I/O error to "
+		       "superblock detected");
 		clear_buffer_write_io_error(sbh);
 		set_buffer_uptodate(sbh);
 	}
@@ -3154,7 +3229,7 @@
 					&EXT4_SB(sb)->s_freeblocks_counter));
 	es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
 					&EXT4_SB(sb)->s_freeinodes_counter));
-
+	sb->s_dirt = 0;
 	BUFFER_TRACE(sbh, "marking dirty");
 	mark_buffer_dirty(sbh);
 	if (sync) {
@@ -3164,8 +3239,8 @@
 
 		error = buffer_write_io_error(sbh);
 		if (error) {
-			printk(KERN_ERR "EXT4-fs: I/O error while writing "
-			       "superblock for %s.\n", sb->s_id);
+			ext4_msg(sb, KERN_ERR, "I/O error while writing "
+			       "superblock");
 			clear_buffer_write_io_error(sbh);
 			set_buffer_uptodate(sbh);
 		}
@@ -3173,7 +3248,6 @@
 	return error;
 }
 
-
 /*
  * Have we just finished recovery?  If so, and if we are mounting (or
  * remounting) the filesystem readonly, then we will end up with a
@@ -3192,14 +3266,11 @@
 	if (jbd2_journal_flush(journal) < 0)
 		goto out;
 
-	lock_super(sb);
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
 	    sb->s_flags & MS_RDONLY) {
 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-		sb->s_dirt = 0;
-		ext4_commit_super(sb, es, 1);
+		ext4_commit_super(sb, 1);
 	}
-	unlock_super(sb);
 
 out:
 	jbd2_journal_unlock_updates(journal);
@@ -3238,7 +3309,7 @@
 
 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-		ext4_commit_super(sb, es, 1);
+		ext4_commit_super(sb, 1);
 
 		jbd2_journal_clear_err(journal);
 	}
@@ -3257,29 +3328,17 @@
 		return 0;
 
 	journal = EXT4_SB(sb)->s_journal;
-	if (journal) {
-		sb->s_dirt = 0;
+	if (journal)
 		ret = ext4_journal_force_commit(journal);
-	}
 
 	return ret;
 }
 
-/*
- * Ext4 always journals updates to the superblock itself, so we don't
- * have to propagate any other updates to the superblock on disk at this
- * point.  (We can probably nuke this function altogether, and remove
- * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...)
- */
 static void ext4_write_super(struct super_block *sb)
 {
-	if (EXT4_SB(sb)->s_journal) {
-		if (mutex_trylock(&sb->s_lock) != 0)
-			BUG();
-		sb->s_dirt = 0;
-	} else {
-		ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
-	}
+	lock_super(sb);
+	ext4_commit_super(sb, 1);
+	unlock_super(sb);
 }
 
 static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -3288,16 +3347,9 @@
 	tid_t target;
 
 	trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
-	sb->s_dirt = 0;
-	if (EXT4_SB(sb)->s_journal) {
-		if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal,
-					      &target)) {
-			if (wait)
-				jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
-						     target);
-		}
-	} else {
-		ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
+	if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
+		if (wait)
+			jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
 	}
 	return ret;
 }
@@ -3310,34 +3362,32 @@
 {
 	int error = 0;
 	journal_t *journal;
-	sb->s_dirt = 0;
 
-	if (!(sb->s_flags & MS_RDONLY)) {
-		journal = EXT4_SB(sb)->s_journal;
+	if (sb->s_flags & MS_RDONLY)
+		return 0;
 
-		if (journal) {
-			/* Now we set up the journal barrier. */
-			jbd2_journal_lock_updates(journal);
+	journal = EXT4_SB(sb)->s_journal;
 
-			/*
-			 * We don't want to clear needs_recovery flag when we
-			 * failed to flush the journal.
-			 */
-			error = jbd2_journal_flush(journal);
-			if (error < 0)
-				goto out;
-		}
+	/* Now we set up the journal barrier. */
+	jbd2_journal_lock_updates(journal);
 
-		/* Journal blocked and flushed, clear needs_recovery flag. */
-		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-		error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
-		if (error)
-			goto out;
+	/*
+	 * Don't clear the needs_recovery flag if we failed to flush
+	 * the journal.
+	 */
+	error = jbd2_journal_flush(journal);
+	if (error < 0) {
+	out:
+		jbd2_journal_unlock_updates(journal);
+		return error;
 	}
+
+	/* Journal blocked and flushed, clear needs_recovery flag. */
+	EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+	error = ext4_commit_super(sb, 1);
+	if (error)
+		goto out;
 	return 0;
-out:
-	jbd2_journal_unlock_updates(journal);
-	return error;
 }
 
 /*
@@ -3346,14 +3396,15 @@
  */
 static int ext4_unfreeze(struct super_block *sb)
 {
-	if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) {
-		lock_super(sb);
-		/* Reser the needs_recovery flag before the fs is unlocked. */
-		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-		ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
-		unlock_super(sb);
-		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
-	}
+	if (sb->s_flags & MS_RDONLY)
+		return 0;
+
+	lock_super(sb);
+	/* Reset the needs_recovery flag before the fs is unlocked. */
+	EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+	ext4_commit_super(sb, 1);
+	unlock_super(sb);
+	jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 	return 0;
 }
 
@@ -3371,7 +3422,10 @@
 	int i;
 #endif
 
+	lock_kernel();
+
 	/* Store the original options */
+	lock_super(sb);
 	old_sb_flags = sb->s_flags;
 	old_opts.s_mount_opt = sbi->s_mount_opt;
 	old_opts.s_resuid = sbi->s_resuid;
@@ -3432,22 +3486,15 @@
 			    (sbi->s_mount_state & EXT4_VALID_FS))
 				es->s_state = cpu_to_le16(sbi->s_mount_state);
 
-			/*
-			 * We have to unlock super so that we can wait for
-			 * transactions.
-			 */
-			if (sbi->s_journal) {
-				unlock_super(sb);
+			if (sbi->s_journal)
 				ext4_mark_recovery_complete(sb, es);
-				lock_super(sb);
-			}
 		} else {
 			int ret;
 			if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
 					~EXT4_FEATURE_RO_COMPAT_SUPP))) {
-				printk(KERN_WARNING "EXT4-fs: %s: couldn't "
+				ext4_msg(sb, KERN_WARNING, "couldn't "
 				       "remount RDWR because of unsupported "
-				       "optional features (%x).\n", sb->s_id,
+				       "optional features (%x)",
 				(le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
 					~EXT4_FEATURE_RO_COMPAT_SUPP));
 				err = -EROFS;
@@ -3456,17 +3503,15 @@
 
 			/*
 			 * Make sure the group descriptor checksums
-			 * are sane.  If they aren't, refuse to
-			 * remount r/w.
+			 * are sane.  If they aren't, refuse to remount r/w.
 			 */
 			for (g = 0; g < sbi->s_groups_count; g++) {
 				struct ext4_group_desc *gdp =
 					ext4_get_group_desc(sb, g, NULL);
 
 				if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
-					printk(KERN_ERR
-	       "EXT4-fs: ext4_remount: "
-		"Checksum for group %u failed (%u!=%u)\n",
+					ext4_msg(sb, KERN_ERR,
+	       "ext4_remount: Checksum for group %u failed (%u!=%u)",
 		g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
 					       le16_to_cpu(gdp->bg_checksum));
 					err = -EINVAL;
@@ -3480,11 +3525,10 @@
 			 * require a full umount/remount for now.
 			 */
 			if (es->s_last_orphan) {
-				printk(KERN_WARNING "EXT4-fs: %s: couldn't "
+				ext4_msg(sb, KERN_WARNING, "Couldn't "
 				       "remount RDWR because of unprocessed "
 				       "orphan inode list.  Please "
-				       "umount/remount instead.\n",
-				       sb->s_id);
+				       "umount/remount instead");
 				err = -EINVAL;
 				goto restore_opts;
 			}
@@ -3504,8 +3548,9 @@
 				sb->s_flags &= ~MS_RDONLY;
 		}
 	}
+	ext4_setup_system_zone(sb);
 	if (sbi->s_journal == NULL)
-		ext4_commit_super(sb, es, 1);
+		ext4_commit_super(sb, 1);
 
 #ifdef CONFIG_QUOTA
 	/* Release old quota file names */
@@ -3514,7 +3559,10 @@
 		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
 			kfree(old_opts.s_qf_names[i]);
 #endif
+	unlock_super(sb);
+	unlock_kernel();
 	return 0;
+
 restore_opts:
 	sb->s_flags = old_sb_flags;
 	sbi->s_mount_opt = old_opts.s_mount_opt;
@@ -3532,6 +3580,8 @@
 		sbi->s_qf_names[i] = old_opts.s_qf_names[i];
 	}
 #endif
+	unlock_super(sb);
+	unlock_kernel();
 	return err;
 }
 
@@ -3545,9 +3595,8 @@
 	if (test_opt(sb, MINIX_DF)) {
 		sbi->s_overhead_last = 0;
 	} else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
-		ext4_group_t ngroups = sbi->s_groups_count, i;
+		ext4_group_t i, ngroups = ext4_get_groups_count(sb);
 		ext4_fsblk_t overhead = 0;
-		smp_rmb();
 
 		/*
 		 * Compute the overhead (FS structures).  This is constant
@@ -3599,11 +3648,12 @@
 	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
 	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
 	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+
 	return 0;
 }
 
-/* Helper function for writing quotas on sync - we need to start transaction before quota file
- * is locked for write. Otherwise the are possible deadlocks:
+/* Helper function for writing quotas on sync - we need to start transaction
+ * before quota file is locked for write. Otherwise the are possible deadlocks:
  * Process 1                         Process 2
  * ext4_create()                     quota_sync()
  *   jbd2_journal_start()                  write_dquot()
@@ -3627,7 +3677,7 @@
 
 	inode = dquot_to_inode(dquot);
 	handle = ext4_journal_start(inode,
-					EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
+				    EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_commit(dquot);
@@ -3643,7 +3693,7 @@
 	handle_t *handle;
 
 	handle = ext4_journal_start(dquot_to_inode(dquot),
-					EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
+				    EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_acquire(dquot);
@@ -3659,7 +3709,7 @@
 	handle_t *handle;
 
 	handle = ext4_journal_start(dquot_to_inode(dquot),
-					EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+				    EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle)) {
 		/* Release dquot anyway to avoid endless cycle in dqput() */
 		dquot_release(dquot);
@@ -3707,7 +3757,7 @@
 static int ext4_quota_on_mount(struct super_block *sb, int type)
 {
 	return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
-			EXT4_SB(sb)->s_jquota_fmt, type);
+				  EXT4_SB(sb)->s_jquota_fmt, type);
 }
 
 /*
@@ -3738,9 +3788,9 @@
 	if (EXT4_SB(sb)->s_qf_names[type]) {
 		/* Quotafile not in fs root? */
 		if (path.dentry->d_parent != sb->s_root)
-			printk(KERN_WARNING
-				"EXT4-fs: Quota file not on filesystem root. "
-				"Journaled quota will not work.\n");
+			ext4_msg(sb, KERN_WARNING,
+				"Quota file not on filesystem root. "
+				"Journaled quota will not work");
 	}
 
 	/*
@@ -3823,8 +3873,8 @@
 	handle_t *handle = journal_current_handle();
 
 	if (EXT4_SB(sb)->s_journal && !handle) {
-		printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
-			" cancelled because transaction is not started.\n",
+		ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
+			" cancelled because transaction is not started",
 			(unsigned long long)off, (unsigned long long)len);
 		return -EIO;
 	}
@@ -3878,10 +3928,10 @@
 
 #endif
 
-static int ext4_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static int ext4_get_sb(struct file_system_type *fs_type, int flags,
+		       const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
 }
 
 static struct file_system_type ext4_fs_type = {
@@ -3893,14 +3943,14 @@
 };
 
 #ifdef CONFIG_EXT4DEV_COMPAT
-static int ext4dev_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static int ext4dev_get_sb(struct file_system_type *fs_type, int flags,
+			  const char *dev_name, void *data,struct vfsmount *mnt)
 {
-	printk(KERN_WARNING "EXT4-fs: Update your userspace programs "
-	       "to mount using ext4\n");
-	printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility "
-	       "will go away by 2.6.31\n");
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
+	printk(KERN_WARNING "EXT4-fs (%s): Update your userspace programs "
+	       "to mount using ext4\n", dev_name);
+	printk(KERN_WARNING "EXT4-fs (%s): ext4dev backwards compatibility "
+	       "will go away by 2.6.31\n", dev_name);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
 }
 
 static struct file_system_type ext4dev_fs_type = {
@@ -3917,13 +3967,16 @@
 {
 	int err;
 
+	err = init_ext4_system_zone();
+	if (err)
+		return err;
 	ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
 	if (!ext4_kset)
-		return -ENOMEM;
+		goto out4;
 	ext4_proc_root = proc_mkdir("fs/ext4", NULL);
 	err = init_ext4_mballoc();
 	if (err)
-		return err;
+		goto out3;
 
 	err = init_ext4_xattr();
 	if (err)
@@ -3948,6 +4001,11 @@
 	exit_ext4_xattr();
 out2:
 	exit_ext4_mballoc();
+out3:
+	remove_proc_entry("fs/ext4", NULL);
+	kset_unregister(ext4_kset);
+out4:
+	exit_ext4_system_zone();
 	return err;
 }
 
@@ -3962,6 +4020,7 @@
 	exit_ext4_mballoc();
 	remove_proc_entry("fs/ext4", NULL);
 	kset_unregister(ext4_kset);
+	exit_ext4_system_zone();
 }
 
 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 3a7f603..f350029 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -840,7 +840,7 @@
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= fat_compat_dir_ioctl,
 #endif
-	.fsync		= file_fsync,
+	.fsync		= fat_file_fsync,
 };
 
 static int fat_get_short_entry(struct inode *dir, loff_t *pos,
@@ -967,7 +967,7 @@
 			de++;
 			nr_slots--;
 		}
-		mark_buffer_dirty(bh);
+		mark_buffer_dirty_inode(bh, dir);
 		if (IS_DIRSYNC(dir))
 			err = sync_dirty_buffer(bh);
 		brelse(bh);
@@ -1001,7 +1001,7 @@
 		de--;
 		nr_slots--;
 	}
-	mark_buffer_dirty(bh);
+	mark_buffer_dirty_inode(bh, dir);
 	if (IS_DIRSYNC(dir))
 		err = sync_dirty_buffer(bh);
 	brelse(bh);
@@ -1051,7 +1051,7 @@
 		}
 		memset(bhs[n]->b_data, 0, sb->s_blocksize);
 		set_buffer_uptodate(bhs[n]);
-		mark_buffer_dirty(bhs[n]);
+		mark_buffer_dirty_inode(bhs[n], dir);
 
 		n++;
 		blknr++;
@@ -1131,7 +1131,7 @@
 	de[0].size = de[1].size = 0;
 	memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de));
 	set_buffer_uptodate(bhs[0]);
-	mark_buffer_dirty(bhs[0]);
+	mark_buffer_dirty_inode(bhs[0], dir);
 
 	err = fat_zeroed_cluster(dir, blknr, 1, bhs, MAX_BUF_PER_PAGE);
 	if (err)
@@ -1193,7 +1193,7 @@
 			slots += copy;
 			size -= copy;
 			set_buffer_uptodate(bhs[n]);
-			mark_buffer_dirty(bhs[n]);
+			mark_buffer_dirty_inode(bhs[n], dir);
 			if (!size)
 				break;
 			n++;
@@ -1293,7 +1293,7 @@
 		for (i = 0; i < long_bhs; i++) {
 			int copy = min_t(int, sb->s_blocksize - offset, size);
 			memcpy(bhs[i]->b_data + offset, slots, copy);
-			mark_buffer_dirty(bhs[i]);
+			mark_buffer_dirty_inode(bhs[i], dir);
 			offset = 0;
 			slots += copy;
 			size -= copy;
@@ -1304,7 +1304,7 @@
 			/* Fill the short name slot. */
 			int copy = min_t(int, sb->s_blocksize - offset, size);
 			memcpy(bhs[i]->b_data + offset, slots, copy);
-			mark_buffer_dirty(bhs[i]);
+			mark_buffer_dirty_inode(bhs[i], dir);
 			if (IS_DIRSYNC(dir))
 				err = sync_dirty_buffer(bhs[i]);
 		}
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index ea440d6..e4d8852 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -74,6 +74,7 @@
 
 	int fatent_shift;
 	struct fatent_operations *fatent_ops;
+	struct inode *fat_inode;
 
 	spinlock_t inode_hash_lock;
 	struct hlist_head inode_hashtable[FAT_HASH_SIZE];
@@ -251,6 +252,7 @@
 	} u;
 	int nr_bhs;
 	struct buffer_head *bhs[2];
+	struct inode *fat_inode;
 };
 
 static inline void fatent_init(struct fat_entry *fatent)
@@ -259,6 +261,7 @@
 	fatent->entry = 0;
 	fatent->u.ent32_p = NULL;
 	fatent->bhs[0] = fatent->bhs[1] = NULL;
+	fatent->fat_inode = NULL;
 }
 
 static inline void fatent_set_entry(struct fat_entry *fatent, int entry)
@@ -275,6 +278,7 @@
 		brelse(fatent->bhs[i]);
 	fatent->nr_bhs = 0;
 	fatent->bhs[0] = fatent->bhs[1] = NULL;
+	fatent->fat_inode = NULL;
 }
 
 extern void fat_ent_access_init(struct super_block *sb);
@@ -296,6 +300,8 @@
 extern void fat_truncate(struct inode *inode);
 extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		       struct kstat *stat);
+extern int fat_file_fsync(struct file *file, struct dentry *dentry,
+			  int datasync);
 
 /* fat/inode.c */
 extern void fat_attach(struct inode *inode, loff_t i_pos);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index da6eea4..618f530 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -73,6 +73,8 @@
 	struct buffer_head **bhs = fatent->bhs;
 
 	WARN_ON(blocknr < MSDOS_SB(sb)->fat_start);
+	fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
+
 	bhs[0] = sb_bread(sb, blocknr);
 	if (!bhs[0])
 		goto err;
@@ -103,6 +105,7 @@
 	struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops;
 
 	WARN_ON(blocknr < MSDOS_SB(sb)->fat_start);
+	fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
 	fatent->bhs[0] = sb_bread(sb, blocknr);
 	if (!fatent->bhs[0]) {
 		printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n",
@@ -167,9 +170,9 @@
 	}
 	spin_unlock(&fat12_entry_lock);
 
-	mark_buffer_dirty(fatent->bhs[0]);
+	mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode);
 	if (fatent->nr_bhs == 2)
-		mark_buffer_dirty(fatent->bhs[1]);
+		mark_buffer_dirty_inode(fatent->bhs[1], fatent->fat_inode);
 }
 
 static void fat16_ent_put(struct fat_entry *fatent, int new)
@@ -178,7 +181,7 @@
 		new = EOF_FAT16;
 
 	*fatent->u.ent16_p = cpu_to_le16(new);
-	mark_buffer_dirty(fatent->bhs[0]);
+	mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode);
 }
 
 static void fat32_ent_put(struct fat_entry *fatent, int new)
@@ -189,7 +192,7 @@
 	WARN_ON(new & 0xf0000000);
 	new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff;
 	*fatent->u.ent32_p = cpu_to_le32(new);
-	mark_buffer_dirty(fatent->bhs[0]);
+	mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode);
 }
 
 static int fat12_ent_next(struct fat_entry *fatent)
@@ -381,7 +384,7 @@
 			}
 			memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize);
 			set_buffer_uptodate(c_bh);
-			mark_buffer_dirty(c_bh);
+			mark_buffer_dirty_inode(c_bh, sbi->fat_inode);
 			if (sb->s_flags & MS_SYNCHRONOUS)
 				err = sync_dirty_buffer(c_bh);
 			brelse(c_bh);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 0a7f4a9..e955a56 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -133,6 +133,18 @@
 	return 0;
 }
 
+int fat_file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+{
+	struct inode *inode = dentry->d_inode;
+	int res, err;
+
+	res = simple_fsync(filp, dentry, datasync);
+	err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping);
+
+	return res ? res : err;
+}
+
+
 const struct file_operations fat_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
@@ -142,7 +154,7 @@
 	.mmap		= generic_file_mmap,
 	.release	= fat_file_release,
 	.ioctl		= fat_generic_ioctl,
-	.fsync		= file_fsync,
+	.fsync		= fat_file_fsync,
 	.splice_read	= generic_file_splice_read,
 };
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 296785a..51a5ecf 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -441,16 +441,35 @@
 
 static void fat_write_super(struct super_block *sb)
 {
+	lock_super(sb);
 	sb->s_dirt = 0;
 
 	if (!(sb->s_flags & MS_RDONLY))
 		fat_clusters_flush(sb);
+	unlock_super(sb);
+}
+
+static int fat_sync_fs(struct super_block *sb, int wait)
+{
+	lock_super(sb);
+	fat_clusters_flush(sb);
+	sb->s_dirt = 0;
+	unlock_super(sb);
+
+	return 0;
 }
 
 static void fat_put_super(struct super_block *sb)
 {
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
 
+	lock_kernel();
+
+	if (sb->s_dirt)
+		fat_write_super(sb);
+
+	iput(sbi->fat_inode);
+
 	if (sbi->nls_disk) {
 		unload_nls(sbi->nls_disk);
 		sbi->nls_disk = NULL;
@@ -467,6 +486,8 @@
 
 	sb->s_fs_info = NULL;
 	kfree(sbi);
+
+	unlock_kernel();
 }
 
 static struct kmem_cache *fat_inode_cachep;
@@ -632,6 +653,7 @@
 	.delete_inode	= fat_delete_inode,
 	.put_super	= fat_put_super,
 	.write_super	= fat_write_super,
+	.sync_fs	= fat_sync_fs,
 	.statfs		= fat_statfs,
 	.clear_inode	= fat_clear_inode,
 	.remount_fs	= fat_remount,
@@ -1174,7 +1196,7 @@
 int fat_fill_super(struct super_block *sb, void *data, int silent,
 		   const struct inode_operations *fs_dir_inode_ops, int isvfat)
 {
-	struct inode *root_inode = NULL;
+	struct inode *root_inode = NULL, *fat_inode = NULL;
 	struct buffer_head *bh;
 	struct fat_boot_sector *b;
 	struct msdos_sb_info *sbi;
@@ -1414,6 +1436,11 @@
 	}
 
 	error = -ENOMEM;
+	fat_inode = new_inode(sb);
+	if (!fat_inode)
+		goto out_fail;
+	MSDOS_I(fat_inode)->i_pos = 0;
+	sbi->fat_inode = fat_inode;
 	root_inode = new_inode(sb);
 	if (!root_inode)
 		goto out_fail;
@@ -1439,6 +1466,8 @@
 		       " on dev %s.\n", sb->s_id);
 
 out_fail:
+	if (fat_inode)
+		iput(fat_inode);
 	if (root_inode)
 		iput(root_inode);
 	if (sbi->nls_io)
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index da3f361..20f5228 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -544,7 +544,7 @@
 		int start = MSDOS_I(new_dir)->i_logstart;
 		dotdot_de->start = cpu_to_le16(start);
 		dotdot_de->starthi = cpu_to_le16(start >> 16);
-		mark_buffer_dirty(dotdot_bh);
+		mark_buffer_dirty_inode(dotdot_bh, old_inode);
 		if (IS_DIRSYNC(new_dir)) {
 			err = sync_dirty_buffer(dotdot_bh);
 			if (err)
@@ -586,7 +586,7 @@
 		int start = MSDOS_I(old_dir)->i_logstart;
 		dotdot_de->start = cpu_to_le16(start);
 		dotdot_de->starthi = cpu_to_le16(start >> 16);
-		mark_buffer_dirty(dotdot_bh);
+		mark_buffer_dirty_inode(dotdot_bh, old_inode);
 		corrupt |= sync_dirty_buffer(dotdot_bh);
 	}
 error_inode:
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index a0e00e3..b50ecbe 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -965,7 +965,7 @@
 		int start = MSDOS_I(new_dir)->i_logstart;
 		dotdot_de->start = cpu_to_le16(start);
 		dotdot_de->starthi = cpu_to_le16(start >> 16);
-		mark_buffer_dirty(dotdot_bh);
+		mark_buffer_dirty_inode(dotdot_bh, old_inode);
 		if (IS_DIRSYNC(new_dir)) {
 			err = sync_dirty_buffer(dotdot_bh);
 			if (err)
@@ -1009,7 +1009,7 @@
 		int start = MSDOS_I(old_dir)->i_logstart;
 		dotdot_de->start = cpu_to_le16(start);
 		dotdot_de->starthi = cpu_to_le16(start >> 16);
-		mark_buffer_dirty(dotdot_bh);
+		mark_buffer_dirty_inode(dotdot_bh, old_inode);
 		corrupt |= sync_dirty_buffer(dotdot_bh);
 	}
 error_inode:
diff --git a/fs/file_table.c b/fs/file_table.c
index 54018fe..334ce39 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -214,7 +214,7 @@
 	 */
 	if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
 		file_take_write(file);
-		error = mnt_want_write(mnt);
+		error = mnt_clone_write(mnt);
 		WARN_ON(error);
 	}
 	return error;
@@ -399,6 +399,44 @@
 	return 0;
 }
 
+/**
+ *	mark_files_ro - mark all files read-only
+ *	@sb: superblock in question
+ *
+ *	All files are marked read-only.  We don't care about pending
+ *	delete files so this should be used in 'force' mode only.
+ */
+void mark_files_ro(struct super_block *sb)
+{
+	struct file *f;
+
+retry:
+	file_list_lock();
+	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
+		struct vfsmount *mnt;
+		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
+		       continue;
+		if (!file_count(f))
+			continue;
+		if (!(f->f_mode & FMODE_WRITE))
+			continue;
+		f->f_mode &= ~FMODE_WRITE;
+		if (file_check_writeable(f) != 0)
+			continue;
+		file_release_write(f);
+		mnt = mntget(f->f_path.mnt);
+		file_list_unlock();
+		/*
+		 * This can sleep, so we can't hold
+		 * the file_list_lock() spinlock.
+		 */
+		mnt_drop_write(mnt);
+		mntput(mnt);
+		goto retry;
+	}
+	file_list_unlock();
+}
+
 void __init files_init(unsigned long mempages)
 { 
 	int n; 
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 1dacda8..cdbd165 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -80,12 +80,16 @@
 {
 	struct vxfs_sb_info	*infp = VXFS_SBI(sbp);
 
+	lock_kernel();
+
 	vxfs_put_fake_inode(infp->vsi_fship);
 	vxfs_put_fake_inode(infp->vsi_ilist);
 	vxfs_put_fake_inode(infp->vsi_stilist);
 
 	brelse(infp->vsi_bp);
 	kfree(infp);
+
+	unlock_kernel();
 }
 
 /**
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 91013ff..40308e9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -64,6 +64,28 @@
 	clear_bit(BDI_pdflush, &bdi->state);
 }
 
+static noinline void block_dump___mark_inode_dirty(struct inode *inode)
+{
+	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
+		struct dentry *dentry;
+		const char *name = "?";
+
+		dentry = d_find_alias(inode);
+		if (dentry) {
+			spin_lock(&dentry->d_lock);
+			name = (const char *) dentry->d_name.name;
+		}
+		printk(KERN_DEBUG
+		       "%s(%d): dirtied inode %lu (%s) on %s\n",
+		       current->comm, task_pid_nr(current), inode->i_ino,
+		       name, inode->i_sb->s_id);
+		if (dentry) {
+			spin_unlock(&dentry->d_lock);
+			dput(dentry);
+		}
+	}
+}
+
 /**
  *	__mark_inode_dirty -	internal function
  *	@inode: inode to mark
@@ -114,23 +136,8 @@
 	if ((inode->i_state & flags) == flags)
 		return;
 
-	if (unlikely(block_dump)) {
-		struct dentry *dentry = NULL;
-		const char *name = "?";
-
-		if (!list_empty(&inode->i_dentry)) {
-			dentry = list_entry(inode->i_dentry.next,
-					    struct dentry, d_alias);
-			if (dentry && dentry->d_name.name)
-				name = (const char *) dentry->d_name.name;
-		}
-
-		if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev"))
-			printk(KERN_DEBUG
-			       "%s(%d): dirtied inode %lu (%s) on %s\n",
-			       current->comm, task_pid_nr(current), inode->i_ino,
-			       name, inode->i_sb->s_id);
-	}
+	if (unlikely(block_dump))
+		block_dump___mark_inode_dirty(inode);
 
 	spin_lock(&inode_lock);
 	if ((inode->i_state & flags) != flags) {
@@ -289,7 +296,6 @@
 	int ret;
 
 	BUG_ON(inode->i_state & I_SYNC);
-	WARN_ON(inode->i_state & I_NEW);
 
 	/* Set I_SYNC, reset I_DIRTY */
 	dirty = inode->i_state & I_DIRTY;
@@ -314,7 +320,6 @@
 	}
 
 	spin_lock(&inode_lock);
-	WARN_ON(inode->i_state & I_NEW);
 	inode->i_state &= ~I_SYNC;
 	if (!(inode->i_state & I_FREEING)) {
 		if (!(inode->i_state & I_DIRTY) &&
@@ -679,55 +684,6 @@
 }
 
 /**
- * sync_inodes - writes all inodes to disk
- * @wait: wait for completion
- *
- * sync_inodes() goes through each super block's dirty inode list, writes the
- * inodes out, waits on the writeout and puts the inodes back on the normal
- * list.
- *
- * This is for sys_sync().  fsync_dev() uses the same algorithm.  The subtle
- * part of the sync functions is that the blockdev "superblock" is processed
- * last.  This is because the write_inode() function of a typical fs will
- * perform no I/O, but will mark buffers in the blockdev mapping as dirty.
- * What we want to do is to perform all that dirtying first, and then write
- * back all those inode blocks via the blockdev mapping in one sweep.  So the
- * additional (somewhat redundant) sync_blockdev() calls here are to make
- * sure that really happens.  Because if we call sync_inodes_sb(wait=1) with
- * outstanding dirty inodes, the writeback goes block-at-a-time within the
- * filesystem's write_inode().  This is extremely slow.
- */
-static void __sync_inodes(int wait)
-{
-	struct super_block *sb;
-
-	spin_lock(&sb_lock);
-restart:
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (sb->s_root) {
-			sync_inodes_sb(sb, wait);
-			sync_blockdev(sb->s_bdev);
-		}
-		up_read(&sb->s_umount);
-		spin_lock(&sb_lock);
-		if (__put_super_and_need_restart(sb))
-			goto restart;
-	}
-	spin_unlock(&sb_lock);
-}
-
-void sync_inodes(int wait)
-{
-	__sync_inodes(0);
-
-	if (wait)
-		__sync_inodes(1);
-}
-
-/**
  * write_inode_now	-	write an inode to disk
  * @inode: inode to write to disk
  * @sync: whether the write should be synchronous or not
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 7243706..e95eeb4 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -3,5 +3,6 @@
 #
 
 obj-$(CONFIG_FUSE_FS) += fuse.o
+obj-$(CONFIG_CUSE) += cuse.o
 
 fuse-objs := dev.o dir.o file.o inode.o control.o
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
new file mode 100644
index 0000000..de792dc
--- /dev/null
+++ b/fs/fuse/cuse.c
@@ -0,0 +1,610 @@
+/*
+ * CUSE: Character device in Userspace
+ *
+ * Copyright (C) 2008-2009  SUSE Linux Products GmbH
+ * Copyright (C) 2008-2009  Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ *
+ * CUSE enables character devices to be implemented from userland much
+ * like FUSE allows filesystems.  On initialization /dev/cuse is
+ * created.  By opening the file and replying to the CUSE_INIT request
+ * userland CUSE server can create a character device.  After that the
+ * operation is very similar to FUSE.
+ *
+ * A CUSE instance involves the following objects.
+ *
+ * cuse_conn	: contains fuse_conn and serves as bonding structure
+ * channel	: file handle connected to the userland CUSE server
+ * cdev		: the implemented character device
+ * dev		: generic device for cdev
+ *
+ * Note that 'channel' is what 'dev' is in FUSE.  As CUSE deals with
+ * devices, it's called 'channel' to reduce confusion.
+ *
+ * channel determines when the character device dies.  When channel is
+ * closed, everything begins to destruct.  The cuse_conn is taken off
+ * the lookup table preventing further access from cdev, cdev and
+ * generic device are removed and the base reference of cuse_conn is
+ * put.
+ *
+ * On each open, the matching cuse_conn is looked up and if found an
+ * additional reference is taken which is released when the file is
+ * closed.
+ */
+
+#include <linux/fuse.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kdev_t.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/magic.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+
+#include "fuse_i.h"
+
+#define CUSE_CONNTBL_LEN	64
+
+struct cuse_conn {
+	struct list_head	list;	/* linked on cuse_conntbl */
+	struct fuse_conn	fc;	/* fuse connection */
+	struct cdev		*cdev;	/* associated character device */
+	struct device		*dev;	/* device representing @cdev */
+
+	/* init parameters, set once during initialization */
+	bool			unrestricted_ioctl;
+};
+
+static DEFINE_SPINLOCK(cuse_lock);		/* protects cuse_conntbl */
+static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN];
+static struct class *cuse_class;
+
+static struct cuse_conn *fc_to_cc(struct fuse_conn *fc)
+{
+	return container_of(fc, struct cuse_conn, fc);
+}
+
+static struct list_head *cuse_conntbl_head(dev_t devt)
+{
+	return &cuse_conntbl[(MAJOR(devt) + MINOR(devt)) % CUSE_CONNTBL_LEN];
+}
+
+
+/**************************************************************************
+ * CUSE frontend operations
+ *
+ * These are file operations for the character device.
+ *
+ * On open, CUSE opens a file from the FUSE mnt and stores it to
+ * private_data of the open file.  All other ops call FUSE ops on the
+ * FUSE file.
+ */
+
+static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
+			 loff_t *ppos)
+{
+	loff_t pos = 0;
+
+	return fuse_direct_io(file, buf, count, &pos, 0);
+}
+
+static ssize_t cuse_write(struct file *file, const char __user *buf,
+			  size_t count, loff_t *ppos)
+{
+	loff_t pos = 0;
+	/*
+	 * No locking or generic_write_checks(), the server is
+	 * responsible for locking and sanity checks.
+	 */
+	return fuse_direct_io(file, buf, count, &pos, 1);
+}
+
+static int cuse_open(struct inode *inode, struct file *file)
+{
+	dev_t devt = inode->i_cdev->dev;
+	struct cuse_conn *cc = NULL, *pos;
+	int rc;
+
+	/* look up and get the connection */
+	spin_lock(&cuse_lock);
+	list_for_each_entry(pos, cuse_conntbl_head(devt), list)
+		if (pos->dev->devt == devt) {
+			fuse_conn_get(&pos->fc);
+			cc = pos;
+			break;
+		}
+	spin_unlock(&cuse_lock);
+
+	/* dead? */
+	if (!cc)
+		return -ENODEV;
+
+	/*
+	 * Generic permission check is already done against the chrdev
+	 * file, proceed to open.
+	 */
+	rc = fuse_do_open(&cc->fc, 0, file, 0);
+	if (rc)
+		fuse_conn_put(&cc->fc);
+	return rc;
+}
+
+static int cuse_release(struct inode *inode, struct file *file)
+{
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = ff->fc;
+
+	fuse_sync_release(ff, file->f_flags);
+	fuse_conn_put(fc);
+
+	return 0;
+}
+
+static long cuse_file_ioctl(struct file *file, unsigned int cmd,
+			    unsigned long arg)
+{
+	struct fuse_file *ff = file->private_data;
+	struct cuse_conn *cc = fc_to_cc(ff->fc);
+	unsigned int flags = 0;
+
+	if (cc->unrestricted_ioctl)
+		flags |= FUSE_IOCTL_UNRESTRICTED;
+
+	return fuse_do_ioctl(file, cmd, arg, flags);
+}
+
+static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
+				   unsigned long arg)
+{
+	struct fuse_file *ff = file->private_data;
+	struct cuse_conn *cc = fc_to_cc(ff->fc);
+	unsigned int flags = FUSE_IOCTL_COMPAT;
+
+	if (cc->unrestricted_ioctl)
+		flags |= FUSE_IOCTL_UNRESTRICTED;
+
+	return fuse_do_ioctl(file, cmd, arg, flags);
+}
+
+static const struct file_operations cuse_frontend_fops = {
+	.owner			= THIS_MODULE,
+	.read			= cuse_read,
+	.write			= cuse_write,
+	.open			= cuse_open,
+	.release		= cuse_release,
+	.unlocked_ioctl		= cuse_file_ioctl,
+	.compat_ioctl		= cuse_file_compat_ioctl,
+	.poll			= fuse_file_poll,
+};
+
+
+/**************************************************************************
+ * CUSE channel initialization and destruction
+ */
+
+struct cuse_devinfo {
+	const char		*name;
+};
+
+/**
+ * cuse_parse_one - parse one key=value pair
+ * @pp: i/o parameter for the current position
+ * @end: points to one past the end of the packed string
+ * @keyp: out parameter for key
+ * @valp: out parameter for value
+ *
+ * *@pp points to packed strings - "key0=val0\0key1=val1\0" which ends
+ * at @end - 1.  This function parses one pair and set *@keyp to the
+ * start of the key and *@valp to the start of the value.  Note that
+ * the original string is modified such that the key string is
+ * terminated with '\0'.  *@pp is updated to point to the next string.
+ *
+ * RETURNS:
+ * 1 on successful parse, 0 on EOF, -errno on failure.
+ */
+static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
+{
+	char *p = *pp;
+	char *key, *val;
+
+	while (p < end && *p == '\0')
+		p++;
+	if (p == end)
+		return 0;
+
+	if (end[-1] != '\0') {
+		printk(KERN_ERR "CUSE: info not properly terminated\n");
+		return -EINVAL;
+	}
+
+	key = val = p;
+	p += strlen(p);
+
+	if (valp) {
+		strsep(&val, "=");
+		if (!val)
+			val = key + strlen(key);
+		key = strstrip(key);
+		val = strstrip(val);
+	} else
+		key = strstrip(key);
+
+	if (!strlen(key)) {
+		printk(KERN_ERR "CUSE: zero length info key specified\n");
+		return -EINVAL;
+	}
+
+	*pp = p;
+	*keyp = key;
+	if (valp)
+		*valp = val;
+
+	return 1;
+}
+
+/**
+ * cuse_parse_dev_info - parse device info
+ * @p: device info string
+ * @len: length of device info string
+ * @devinfo: out parameter for parsed device info
+ *
+ * Parse @p to extract device info and store it into @devinfo.  String
+ * pointed to by @p is modified by parsing and @devinfo points into
+ * them, so @p shouldn't be freed while @devinfo is in use.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
+{
+	char *end = p + len;
+	char *key, *val;
+	int rc;
+
+	while (true) {
+		rc = cuse_parse_one(&p, end, &key, &val);
+		if (rc < 0)
+			return rc;
+		if (!rc)
+			break;
+		if (strcmp(key, "DEVNAME") == 0)
+			devinfo->name = val;
+		else
+			printk(KERN_WARNING "CUSE: unknown device info \"%s\"\n",
+			       key);
+	}
+
+	if (!devinfo->name || !strlen(devinfo->name)) {
+		printk(KERN_ERR "CUSE: DEVNAME unspecified\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void cuse_gendev_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+/**
+ * cuse_process_init_reply - finish initializing CUSE channel
+ *
+ * This function creates the character device and sets up all the
+ * required data structures for it.  Please read the comment at the
+ * top of this file for high level overview.
+ */
+static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
+{
+	struct cuse_conn *cc = fc_to_cc(fc);
+	struct cuse_init_out *arg = &req->misc.cuse_init_out;
+	struct page *page = req->pages[0];
+	struct cuse_devinfo devinfo = { };
+	struct device *dev;
+	struct cdev *cdev;
+	dev_t devt;
+	int rc;
+
+	if (req->out.h.error ||
+	    arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
+		goto err;
+	}
+
+	fc->minor = arg->minor;
+	fc->max_read = max_t(unsigned, arg->max_read, 4096);
+	fc->max_write = max_t(unsigned, arg->max_write, 4096);
+
+	/* parse init reply */
+	cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL;
+
+	rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size,
+				&devinfo);
+	if (rc)
+		goto err;
+
+	/* determine and reserve devt */
+	devt = MKDEV(arg->dev_major, arg->dev_minor);
+	if (!MAJOR(devt))
+		rc = alloc_chrdev_region(&devt, MINOR(devt), 1, devinfo.name);
+	else
+		rc = register_chrdev_region(devt, 1, devinfo.name);
+	if (rc) {
+		printk(KERN_ERR "CUSE: failed to register chrdev region\n");
+		goto err;
+	}
+
+	/* devt determined, create device */
+	rc = -ENOMEM;
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		goto err_region;
+
+	device_initialize(dev);
+	dev_set_uevent_suppress(dev, 1);
+	dev->class = cuse_class;
+	dev->devt = devt;
+	dev->release = cuse_gendev_release;
+	dev_set_drvdata(dev, cc);
+	dev_set_name(dev, "%s", devinfo.name);
+
+	rc = device_add(dev);
+	if (rc)
+		goto err_device;
+
+	/* register cdev */
+	rc = -ENOMEM;
+	cdev = cdev_alloc();
+	if (!cdev)
+		goto err_device;
+
+	cdev->owner = THIS_MODULE;
+	cdev->ops = &cuse_frontend_fops;
+
+	rc = cdev_add(cdev, devt, 1);
+	if (rc)
+		goto err_cdev;
+
+	cc->dev = dev;
+	cc->cdev = cdev;
+
+	/* make the device available */
+	spin_lock(&cuse_lock);
+	list_add(&cc->list, cuse_conntbl_head(devt));
+	spin_unlock(&cuse_lock);
+
+	/* announce device availability */
+	dev_set_uevent_suppress(dev, 0);
+	kobject_uevent(&dev->kobj, KOBJ_ADD);
+out:
+	__free_page(page);
+	return;
+
+err_cdev:
+	cdev_del(cdev);
+err_device:
+	put_device(dev);
+err_region:
+	unregister_chrdev_region(devt, 1);
+err:
+	fc->conn_error = 1;
+	goto out;
+}
+
+static int cuse_send_init(struct cuse_conn *cc)
+{
+	int rc;
+	struct fuse_req *req;
+	struct page *page;
+	struct fuse_conn *fc = &cc->fc;
+	struct cuse_init_in *arg;
+
+	BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
+
+	req = fuse_get_req(fc);
+	if (IS_ERR(req)) {
+		rc = PTR_ERR(req);
+		goto err;
+	}
+
+	rc = -ENOMEM;
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page)
+		goto err_put_req;
+
+	arg = &req->misc.cuse_init_in;
+	arg->major = FUSE_KERNEL_VERSION;
+	arg->minor = FUSE_KERNEL_MINOR_VERSION;
+	arg->flags |= CUSE_UNRESTRICTED_IOCTL;
+	req->in.h.opcode = CUSE_INIT;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(struct cuse_init_in);
+	req->in.args[0].value = arg;
+	req->out.numargs = 2;
+	req->out.args[0].size = sizeof(struct cuse_init_out);
+	req->out.args[0].value = &req->misc.cuse_init_out;
+	req->out.args[1].size = CUSE_INIT_INFO_MAX;
+	req->out.argvar = 1;
+	req->out.argpages = 1;
+	req->pages[0] = page;
+	req->num_pages = 1;
+	req->end = cuse_process_init_reply;
+	fuse_request_send_background(fc, req);
+
+	return 0;
+
+err_put_req:
+	fuse_put_request(fc, req);
+err:
+	return rc;
+}
+
+static void cuse_fc_release(struct fuse_conn *fc)
+{
+	struct cuse_conn *cc = fc_to_cc(fc);
+	kfree(cc);
+}
+
+/**
+ * cuse_channel_open - open method for /dev/cuse
+ * @inode: inode for /dev/cuse
+ * @file: file struct being opened
+ *
+ * Userland CUSE server can create a CUSE device by opening /dev/cuse
+ * and replying to the initilaization request kernel sends.  This
+ * function is responsible for handling CUSE device initialization.
+ * Because the fd opened by this function is used during
+ * initialization, this function only creates cuse_conn and sends
+ * init.  The rest is delegated to a kthread.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_channel_open(struct inode *inode, struct file *file)
+{
+	struct cuse_conn *cc;
+	int rc;
+
+	/* set up cuse_conn */
+	cc = kzalloc(sizeof(*cc), GFP_KERNEL);
+	if (!cc)
+		return -ENOMEM;
+
+	fuse_conn_init(&cc->fc);
+
+	INIT_LIST_HEAD(&cc->list);
+	cc->fc.release = cuse_fc_release;
+
+	cc->fc.connected = 1;
+	cc->fc.blocked = 0;
+	rc = cuse_send_init(cc);
+	if (rc) {
+		fuse_conn_put(&cc->fc);
+		return rc;
+	}
+	file->private_data = &cc->fc;	/* channel owns base reference to cc */
+
+	return 0;
+}
+
+/**
+ * cuse_channel_release - release method for /dev/cuse
+ * @inode: inode for /dev/cuse
+ * @file: file struct being closed
+ *
+ * Disconnect the channel, deregister CUSE device and initiate
+ * destruction by putting the default reference.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_channel_release(struct inode *inode, struct file *file)
+{
+	struct cuse_conn *cc = fc_to_cc(file->private_data);
+	int rc;
+
+	/* remove from the conntbl, no more access from this point on */
+	spin_lock(&cuse_lock);
+	list_del_init(&cc->list);
+	spin_unlock(&cuse_lock);
+
+	/* remove device */
+	if (cc->dev)
+		device_unregister(cc->dev);
+	if (cc->cdev) {
+		unregister_chrdev_region(cc->cdev->dev, 1);
+		cdev_del(cc->cdev);
+	}
+
+	/* kill connection and shutdown channel */
+	fuse_conn_kill(&cc->fc);
+	rc = fuse_dev_release(inode, file);	/* puts the base reference */
+
+	return rc;
+}
+
+static struct file_operations cuse_channel_fops; /* initialized during init */
+
+
+/**************************************************************************
+ * Misc stuff and module initializatiion
+ *
+ * CUSE exports the same set of attributes to sysfs as fusectl.
+ */
+
+static ssize_t cuse_class_waiting_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	struct cuse_conn *cc = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting));
+}
+
+static ssize_t cuse_class_abort_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct cuse_conn *cc = dev_get_drvdata(dev);
+
+	fuse_abort_conn(&cc->fc);
+	return count;
+}
+
+static struct device_attribute cuse_class_dev_attrs[] = {
+	__ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL),
+	__ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store),
+	{ }
+};
+
+static struct miscdevice cuse_miscdev = {
+	.minor		= MISC_DYNAMIC_MINOR,
+	.name		= "cuse",
+	.fops		= &cuse_channel_fops,
+};
+
+static int __init cuse_init(void)
+{
+	int i, rc;
+
+	/* init conntbl */
+	for (i = 0; i < CUSE_CONNTBL_LEN; i++)
+		INIT_LIST_HEAD(&cuse_conntbl[i]);
+
+	/* inherit and extend fuse_dev_operations */
+	cuse_channel_fops		= fuse_dev_operations;
+	cuse_channel_fops.owner		= THIS_MODULE;
+	cuse_channel_fops.open		= cuse_channel_open;
+	cuse_channel_fops.release	= cuse_channel_release;
+
+	cuse_class = class_create(THIS_MODULE, "cuse");
+	if (IS_ERR(cuse_class))
+		return PTR_ERR(cuse_class);
+
+	cuse_class->dev_attrs = cuse_class_dev_attrs;
+
+	rc = misc_register(&cuse_miscdev);
+	if (rc) {
+		class_destroy(cuse_class);
+		return rc;
+	}
+
+	return 0;
+}
+
+static void __exit cuse_exit(void)
+{
+	misc_deregister(&cuse_miscdev);
+	class_destroy(cuse_class);
+}
+
+module_init(cuse_init);
+module_exit(cuse_exit);
+
+MODULE_AUTHOR("Tejun Heo <tj@kernel.org>");
+MODULE_DESCRIPTION("Character device in Userspace");
+MODULE_LICENSE("GPL");
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ba76b68..8fed2ed 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -46,6 +46,7 @@
 		fuse_request_init(req);
 	return req;
 }
+EXPORT_SYMBOL_GPL(fuse_request_alloc);
 
 struct fuse_req *fuse_request_alloc_nofs(void)
 {
@@ -124,6 +125,7 @@
 	atomic_dec(&fc->num_waiting);
 	return ERR_PTR(err);
 }
+EXPORT_SYMBOL_GPL(fuse_get_req);
 
 /*
  * Return request in fuse_file->reserved_req.  However that may
@@ -208,6 +210,7 @@
 			fuse_request_free(req);
 	}
 }
+EXPORT_SYMBOL_GPL(fuse_put_request);
 
 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
 {
@@ -282,7 +285,7 @@
 			wake_up_all(&fc->blocked_waitq);
 		}
 		if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
-		    fc->connected) {
+		    fc->connected && fc->bdi_initialized) {
 			clear_bdi_congested(&fc->bdi, READ);
 			clear_bdi_congested(&fc->bdi, WRITE);
 		}
@@ -400,6 +403,7 @@
 	}
 	spin_unlock(&fc->lock);
 }
+EXPORT_SYMBOL_GPL(fuse_request_send);
 
 static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 					    struct fuse_req *req)
@@ -408,7 +412,8 @@
 	fc->num_background++;
 	if (fc->num_background == FUSE_MAX_BACKGROUND)
 		fc->blocked = 1;
-	if (fc->num_background == FUSE_CONGESTION_THRESHOLD) {
+	if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
+	    fc->bdi_initialized) {
 		set_bdi_congested(&fc->bdi, READ);
 		set_bdi_congested(&fc->bdi, WRITE);
 	}
@@ -439,6 +444,7 @@
 	req->isreply = 1;
 	fuse_request_send_nowait(fc, req);
 }
+EXPORT_SYMBOL_GPL(fuse_request_send_background);
 
 /*
  * Called under fc->lock
@@ -1105,8 +1111,9 @@
 	}
 	spin_unlock(&fc->lock);
 }
+EXPORT_SYMBOL_GPL(fuse_abort_conn);
 
-static int fuse_dev_release(struct inode *inode, struct file *file)
+int fuse_dev_release(struct inode *inode, struct file *file)
 {
 	struct fuse_conn *fc = fuse_get_conn(file);
 	if (fc) {
@@ -1120,6 +1127,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(fuse_dev_release);
 
 static int fuse_dev_fasync(int fd, struct file *file, int on)
 {
@@ -1142,6 +1150,7 @@
 	.release	= fuse_dev_release,
 	.fasync		= fuse_dev_fasync,
 };
+EXPORT_SYMBOL_GPL(fuse_dev_operations);
 
 static struct miscdevice fuse_miscdevice = {
 	.minor = FUSE_MINOR,
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 8b8eebc..b3089a0 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -362,19 +362,6 @@
 }
 
 /*
- * Synchronous release for the case when something goes wrong in CREATE_OPEN
- */
-static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff,
-			      u64 nodeid, int flags)
-{
-	fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE);
-	ff->reserved_req->force = 1;
-	fuse_request_send(fc, ff->reserved_req);
-	fuse_put_request(fc, ff->reserved_req);
-	kfree(ff);
-}
-
-/*
  * Atomic create+open operation
  *
  * If the filesystem doesn't support this, then fall back to separate
@@ -445,12 +432,14 @@
 		goto out_free_ff;
 
 	fuse_put_request(fc, req);
+	ff->fh = outopen.fh;
+	ff->nodeid = outentry.nodeid;
+	ff->open_flags = outopen.open_flags;
 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
 			  &outentry.attr, entry_attr_timeout(&outentry), 0);
 	if (!inode) {
 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
-		ff->fh = outopen.fh;
-		fuse_sync_release(fc, ff, outentry.nodeid, flags);
+		fuse_sync_release(ff, flags);
 		fuse_send_forget(fc, forget_req, outentry.nodeid, 1);
 		return -ENOMEM;
 	}
@@ -460,11 +449,11 @@
 	fuse_invalidate_attr(dir);
 	file = lookup_instantiate_filp(nd, entry, generic_file_open);
 	if (IS_ERR(file)) {
-		ff->fh = outopen.fh;
-		fuse_sync_release(fc, ff, outentry.nodeid, flags);
+		fuse_sync_release(ff, flags);
 		return PTR_ERR(file);
 	}
-	fuse_finish_open(inode, file, ff, &outopen);
+	file->private_data = fuse_file_get(ff);
+	fuse_finish_open(inode, file);
 	return 0;
 
  out_free_ff:
@@ -1035,7 +1024,7 @@
 	req->out.argpages = 1;
 	req->num_pages = 1;
 	req->pages[0] = page;
-	fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR);
+	fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR);
 	fuse_request_send(fc, req);
 	nbytes = req->out.args[0].size;
 	err = req->out.h.error;
@@ -1101,12 +1090,14 @@
 
 static int fuse_dir_open(struct inode *inode, struct file *file)
 {
-	return fuse_open_common(inode, file, 1);
+	return fuse_open_common(inode, file, true);
 }
 
 static int fuse_dir_release(struct inode *inode, struct file *file)
 {
-	return fuse_release_common(inode, file, 1);
+	fuse_release_common(file, FUSE_RELEASEDIR);
+
+	return 0;
 }
 
 static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 06f30e9..fce6ce6 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -12,13 +12,13 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/module.h>
 
 static const struct file_operations fuse_direct_io_file_operations;
 
-static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
-			  struct fuse_open_out *outargp)
+static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+			  int opcode, struct fuse_open_out *outargp)
 {
-	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_open_in inarg;
 	struct fuse_req *req;
 	int err;
@@ -31,8 +31,8 @@
 	inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
 	if (!fc->atomic_o_trunc)
 		inarg.flags &= ~O_TRUNC;
-	req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
-	req->in.h.nodeid = get_node_id(inode);
+	req->in.h.opcode = opcode;
+	req->in.h.nodeid = nodeid;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(inarg);
 	req->in.args[0].value = &inarg;
@@ -49,22 +49,27 @@
 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 {
 	struct fuse_file *ff;
+
 	ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
-	if (ff) {
-		ff->reserved_req = fuse_request_alloc();
-		if (!ff->reserved_req) {
-			kfree(ff);
-			return NULL;
-		} else {
-			INIT_LIST_HEAD(&ff->write_entry);
-			atomic_set(&ff->count, 0);
-			spin_lock(&fc->lock);
-			ff->kh = ++fc->khctr;
-			spin_unlock(&fc->lock);
-		}
-		RB_CLEAR_NODE(&ff->polled_node);
-		init_waitqueue_head(&ff->poll_wait);
+	if (unlikely(!ff))
+		return NULL;
+
+	ff->fc = fc;
+	ff->reserved_req = fuse_request_alloc();
+	if (unlikely(!ff->reserved_req)) {
+		kfree(ff);
+		return NULL;
 	}
+
+	INIT_LIST_HEAD(&ff->write_entry);
+	atomic_set(&ff->count, 0);
+	RB_CLEAR_NODE(&ff->polled_node);
+	init_waitqueue_head(&ff->poll_wait);
+
+	spin_lock(&fc->lock);
+	ff->kh = ++fc->khctr;
+	spin_unlock(&fc->lock);
+
 	return ff;
 }
 
@@ -74,7 +79,7 @@
 	kfree(ff);
 }
 
-static struct fuse_file *fuse_file_get(struct fuse_file *ff)
+struct fuse_file *fuse_file_get(struct fuse_file *ff)
 {
 	atomic_inc(&ff->count);
 	return ff;
@@ -82,41 +87,66 @@
 
 static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
 {
-	dput(req->misc.release.dentry);
-	mntput(req->misc.release.vfsmount);
+	path_put(&req->misc.release.path);
 }
 
 static void fuse_file_put(struct fuse_file *ff)
 {
 	if (atomic_dec_and_test(&ff->count)) {
 		struct fuse_req *req = ff->reserved_req;
-		struct inode *inode = req->misc.release.dentry->d_inode;
-		struct fuse_conn *fc = get_fuse_conn(inode);
+
 		req->end = fuse_release_end;
-		fuse_request_send_background(fc, req);
+		fuse_request_send_background(ff->fc, req);
 		kfree(ff);
 	}
 }
 
-void fuse_finish_open(struct inode *inode, struct file *file,
-		      struct fuse_file *ff, struct fuse_open_out *outarg)
+int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+		 bool isdir)
 {
-	if (outarg->open_flags & FOPEN_DIRECT_IO)
-		file->f_op = &fuse_direct_io_file_operations;
-	if (!(outarg->open_flags & FOPEN_KEEP_CACHE))
-		invalidate_inode_pages2(inode->i_mapping);
-	if (outarg->open_flags & FOPEN_NONSEEKABLE)
-		nonseekable_open(inode, file);
-	ff->fh = outarg->fh;
-	file->private_data = fuse_file_get(ff);
-}
-
-int fuse_open_common(struct inode *inode, struct file *file, int isdir)
-{
-	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_open_out outarg;
 	struct fuse_file *ff;
 	int err;
+	int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
+
+	ff = fuse_file_alloc(fc);
+	if (!ff)
+		return -ENOMEM;
+
+	err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
+	if (err) {
+		fuse_file_free(ff);
+		return err;
+	}
+
+	if (isdir)
+		outarg.open_flags &= ~FOPEN_DIRECT_IO;
+
+	ff->fh = outarg.fh;
+	ff->nodeid = nodeid;
+	ff->open_flags = outarg.open_flags;
+	file->private_data = fuse_file_get(ff);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fuse_do_open);
+
+void fuse_finish_open(struct inode *inode, struct file *file)
+{
+	struct fuse_file *ff = file->private_data;
+
+	if (ff->open_flags & FOPEN_DIRECT_IO)
+		file->f_op = &fuse_direct_io_file_operations;
+	if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+		invalidate_inode_pages2(inode->i_mapping);
+	if (ff->open_flags & FOPEN_NONSEEKABLE)
+		nonseekable_open(inode, file);
+}
+
+int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	int err;
 
 	/* VFS checks this, but only _after_ ->open() */
 	if (file->f_flags & O_DIRECT)
@@ -126,79 +156,86 @@
 	if (err)
 		return err;
 
-	ff = fuse_file_alloc(fc);
-	if (!ff)
-		return -ENOMEM;
-
-	err = fuse_send_open(inode, file, isdir, &outarg);
+	err = fuse_do_open(fc, get_node_id(inode), file, isdir);
 	if (err)
-		fuse_file_free(ff);
-	else {
-		if (isdir)
-			outarg.open_flags &= ~FOPEN_DIRECT_IO;
-		fuse_finish_open(inode, file, ff, &outarg);
-	}
+		return err;
 
-	return err;
+	fuse_finish_open(inode, file);
+
+	return 0;
 }
 
-void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode)
+static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
 {
+	struct fuse_conn *fc = ff->fc;
 	struct fuse_req *req = ff->reserved_req;
 	struct fuse_release_in *inarg = &req->misc.release.in;
 
+	spin_lock(&fc->lock);
+	list_del(&ff->write_entry);
+	if (!RB_EMPTY_NODE(&ff->polled_node))
+		rb_erase(&ff->polled_node, &fc->polled_files);
+	spin_unlock(&fc->lock);
+
+	wake_up_interruptible_sync(&ff->poll_wait);
+
 	inarg->fh = ff->fh;
 	inarg->flags = flags;
 	req->in.h.opcode = opcode;
-	req->in.h.nodeid = nodeid;
+	req->in.h.nodeid = ff->nodeid;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(struct fuse_release_in);
 	req->in.args[0].value = inarg;
 }
 
-int fuse_release_common(struct inode *inode, struct file *file, int isdir)
+void fuse_release_common(struct file *file, int opcode)
 {
-	struct fuse_file *ff = file->private_data;
-	if (ff) {
-		struct fuse_conn *fc = get_fuse_conn(inode);
-		struct fuse_req *req = ff->reserved_req;
+	struct fuse_file *ff;
+	struct fuse_req *req;
 
-		fuse_release_fill(ff, get_node_id(inode), file->f_flags,
-				  isdir ? FUSE_RELEASEDIR : FUSE_RELEASE);
+	ff = file->private_data;
+	if (unlikely(!ff))
+		return;
 
-		/* Hold vfsmount and dentry until release is finished */
-		req->misc.release.vfsmount = mntget(file->f_path.mnt);
-		req->misc.release.dentry = dget(file->f_path.dentry);
+	req = ff->reserved_req;
+	fuse_prepare_release(ff, file->f_flags, opcode);
 
-		spin_lock(&fc->lock);
-		list_del(&ff->write_entry);
-		if (!RB_EMPTY_NODE(&ff->polled_node))
-			rb_erase(&ff->polled_node, &fc->polled_files);
-		spin_unlock(&fc->lock);
+	/* Hold vfsmount and dentry until release is finished */
+	path_get(&file->f_path);
+	req->misc.release.path = file->f_path;
 
-		wake_up_interruptible_sync(&ff->poll_wait);
-		/*
-		 * Normally this will send the RELEASE request,
-		 * however if some asynchronous READ or WRITE requests
-		 * are outstanding, the sending will be delayed
-		 */
-		fuse_file_put(ff);
-	}
-
-	/* Return value is ignored by VFS */
-	return 0;
+	/*
+	 * Normally this will send the RELEASE request, however if
+	 * some asynchronous READ or WRITE requests are outstanding,
+	 * the sending will be delayed.
+	 */
+	fuse_file_put(ff);
 }
 
 static int fuse_open(struct inode *inode, struct file *file)
 {
-	return fuse_open_common(inode, file, 0);
+	return fuse_open_common(inode, file, false);
 }
 
 static int fuse_release(struct inode *inode, struct file *file)
 {
-	return fuse_release_common(inode, file, 0);
+	fuse_release_common(file, FUSE_RELEASE);
+
+	/* return value is ignored by VFS */
+	return 0;
 }
 
+void fuse_sync_release(struct fuse_file *ff, int flags)
+{
+	WARN_ON(atomic_read(&ff->count) > 1);
+	fuse_prepare_release(ff, flags, FUSE_RELEASE);
+	ff->reserved_req->force = 1;
+	fuse_request_send(ff->fc, ff->reserved_req);
+	fuse_put_request(ff->fc, ff->reserved_req);
+	kfree(ff);
+}
+EXPORT_SYMBOL_GPL(fuse_sync_release);
+
 /*
  * Scramble the ID space with XTEA, so that the value of the files_struct
  * pointer is not exposed to userspace.
@@ -371,8 +408,8 @@
 	return fuse_fsync_common(file, de, datasync, 0);
 }
 
-void fuse_read_fill(struct fuse_req *req, struct file *file,
-		    struct inode *inode, loff_t pos, size_t count, int opcode)
+void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
+		    size_t count, int opcode)
 {
 	struct fuse_read_in *inarg = &req->misc.read.in;
 	struct fuse_file *ff = file->private_data;
@@ -382,7 +419,7 @@
 	inarg->size = count;
 	inarg->flags = file->f_flags;
 	req->in.h.opcode = opcode;
-	req->in.h.nodeid = get_node_id(inode);
+	req->in.h.nodeid = ff->nodeid;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(struct fuse_read_in);
 	req->in.args[0].value = inarg;
@@ -392,12 +429,12 @@
 }
 
 static size_t fuse_send_read(struct fuse_req *req, struct file *file,
-			     struct inode *inode, loff_t pos, size_t count,
-			     fl_owner_t owner)
+			     loff_t pos, size_t count, fl_owner_t owner)
 {
-	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = ff->fc;
 
-	fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
+	fuse_read_fill(req, file, pos, count, FUSE_READ);
 	if (owner != NULL) {
 		struct fuse_read_in *inarg = &req->misc.read.in;
 
@@ -455,7 +492,7 @@
 	req->out.argpages = 1;
 	req->num_pages = 1;
 	req->pages[0] = page;
-	num_read = fuse_send_read(req, file, inode, pos, count, NULL);
+	num_read = fuse_send_read(req, file, pos, count, NULL);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 
@@ -504,19 +541,18 @@
 		fuse_file_put(req->ff);
 }
 
-static void fuse_send_readpages(struct fuse_req *req, struct file *file,
-				struct inode *inode)
+static void fuse_send_readpages(struct fuse_req *req, struct file *file)
 {
-	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = ff->fc;
 	loff_t pos = page_offset(req->pages[0]);
 	size_t count = req->num_pages << PAGE_CACHE_SHIFT;
 
 	req->out.argpages = 1;
 	req->out.page_zeroing = 1;
-	fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
+	fuse_read_fill(req, file, pos, count, FUSE_READ);
 	req->misc.read.attr_ver = fuse_get_attr_version(fc);
 	if (fc->async_read) {
-		struct fuse_file *ff = file->private_data;
 		req->ff = fuse_file_get(ff);
 		req->end = fuse_readpages_end;
 		fuse_request_send_background(fc, req);
@@ -546,7 +582,7 @@
 	    (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
 	     (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
 	     req->pages[req->num_pages - 1]->index + 1 != page->index)) {
-		fuse_send_readpages(req, data->file, inode);
+		fuse_send_readpages(req, data->file);
 		data->req = req = fuse_get_req(fc);
 		if (IS_ERR(req)) {
 			unlock_page(page);
@@ -580,7 +616,7 @@
 	err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
 	if (!err) {
 		if (data.req->num_pages)
-			fuse_send_readpages(data.req, file, inode);
+			fuse_send_readpages(data.req, file);
 		else
 			fuse_put_request(fc, data.req);
 	}
@@ -607,24 +643,19 @@
 	return generic_file_aio_read(iocb, iov, nr_segs, pos);
 }
 
-static void fuse_write_fill(struct fuse_req *req, struct file *file,
-			    struct fuse_file *ff, struct inode *inode,
-			    loff_t pos, size_t count, int writepage)
+static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
+			    loff_t pos, size_t count)
 {
-	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_write_in *inarg = &req->misc.write.in;
 	struct fuse_write_out *outarg = &req->misc.write.out;
 
-	memset(inarg, 0, sizeof(struct fuse_write_in));
 	inarg->fh = ff->fh;
 	inarg->offset = pos;
 	inarg->size = count;
-	inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0;
-	inarg->flags = file ? file->f_flags : 0;
 	req->in.h.opcode = FUSE_WRITE;
-	req->in.h.nodeid = get_node_id(inode);
+	req->in.h.nodeid = ff->nodeid;
 	req->in.numargs = 2;
-	if (fc->minor < 9)
+	if (ff->fc->minor < 9)
 		req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
 	else
 		req->in.args[0].size = sizeof(struct fuse_write_in);
@@ -636,13 +667,15 @@
 }
 
 static size_t fuse_send_write(struct fuse_req *req, struct file *file,
-			      struct inode *inode, loff_t pos, size_t count,
-			      fl_owner_t owner)
+			      loff_t pos, size_t count, fl_owner_t owner)
 {
-	struct fuse_conn *fc = get_fuse_conn(inode);
-	fuse_write_fill(req, file, file->private_data, inode, pos, count, 0);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = ff->fc;
+	struct fuse_write_in *inarg = &req->misc.write.in;
+
+	fuse_write_fill(req, ff, pos, count);
+	inarg->flags = file->f_flags;
 	if (owner != NULL) {
-		struct fuse_write_in *inarg = &req->misc.write.in;
 		inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
 		inarg->lock_owner = fuse_lock_owner_id(fc, owner);
 	}
@@ -700,7 +733,7 @@
 	req->num_pages = 1;
 	req->pages[0] = page;
 	req->page_offset = offset;
-	nres = fuse_send_write(req, file, inode, pos, count, NULL);
+	nres = fuse_send_write(req, file, pos, count, NULL);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (!err && !nres)
@@ -741,7 +774,7 @@
 	for (i = 0; i < req->num_pages; i++)
 		fuse_wait_on_page_writeback(inode, req->pages[i]->index);
 
-	res = fuse_send_write(req, file, inode, pos, count, NULL);
+	res = fuse_send_write(req, file, pos, count, NULL);
 
 	offset = req->page_offset;
 	count = res;
@@ -979,25 +1012,23 @@
 	return 0;
 }
 
-static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
-			      size_t count, loff_t *ppos, int write)
+ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+		       size_t count, loff_t *ppos, int write)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
-	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = ff->fc;
 	size_t nmax = write ? fc->max_write : fc->max_read;
 	loff_t pos = *ppos;
 	ssize_t res = 0;
 	struct fuse_req *req;
 
-	if (is_bad_inode(inode))
-		return -EIO;
-
 	req = fuse_get_req(fc);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
 	while (count) {
 		size_t nres;
+		fl_owner_t owner = current->files;
 		size_t nbytes = min(count, nmax);
 		int err = fuse_get_user_pages(req, buf, &nbytes, write);
 		if (err) {
@@ -1006,11 +1037,10 @@
 		}
 
 		if (write)
-			nres = fuse_send_write(req, file, inode, pos, nbytes,
-					       current->files);
+			nres = fuse_send_write(req, file, pos, nbytes, owner);
 		else
-			nres = fuse_send_read(req, file, inode, pos, nbytes,
-					      current->files);
+			nres = fuse_send_read(req, file, pos, nbytes, owner);
+
 		fuse_release_user_pages(req, !write);
 		if (req->out.h.error) {
 			if (!res)
@@ -1034,20 +1064,27 @@
 		}
 	}
 	fuse_put_request(fc, req);
-	if (res > 0) {
-		if (write)
-			fuse_write_update_size(inode, pos);
+	if (res > 0)
 		*ppos = pos;
-	}
-	fuse_invalidate_attr(inode);
 
 	return res;
 }
+EXPORT_SYMBOL_GPL(fuse_direct_io);
 
 static ssize_t fuse_direct_read(struct file *file, char __user *buf,
 				     size_t count, loff_t *ppos)
 {
-	return fuse_direct_io(file, buf, count, ppos, 0);
+	ssize_t res;
+	struct inode *inode = file->f_path.dentry->d_inode;
+
+	if (is_bad_inode(inode))
+		return -EIO;
+
+	res = fuse_direct_io(file, buf, count, ppos, 0);
+
+	fuse_invalidate_attr(inode);
+
+	return res;
 }
 
 static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
@@ -1055,12 +1092,22 @@
 {
 	struct inode *inode = file->f_path.dentry->d_inode;
 	ssize_t res;
+
+	if (is_bad_inode(inode))
+		return -EIO;
+
 	/* Don't allow parallel writes to the same file */
 	mutex_lock(&inode->i_mutex);
 	res = generic_write_checks(file, ppos, &count, 0);
-	if (!res)
+	if (!res) {
 		res = fuse_direct_io(file, buf, count, ppos, 1);
+		if (res > 0)
+			fuse_write_update_size(inode, *ppos);
+	}
 	mutex_unlock(&inode->i_mutex);
+
+	fuse_invalidate_attr(inode);
+
 	return res;
 }
 
@@ -1177,9 +1224,10 @@
 	req->ff = fuse_file_get(ff);
 	spin_unlock(&fc->lock);
 
-	fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1);
+	fuse_write_fill(req, ff, page_offset(page), 0);
 
 	copy_highpage(tmp_page, page);
+	req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
 	req->in.argpages = 1;
 	req->num_pages = 1;
 	req->pages[0] = tmp_page;
@@ -1603,12 +1651,11 @@
  * limits ioctl data transfers to well-formed ioctls and is the forced
  * behavior for all FUSE servers.
  */
-static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
-			       unsigned long arg, unsigned int flags)
+long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+		   unsigned int flags)
 {
-	struct inode *inode = file->f_dentry->d_inode;
 	struct fuse_file *ff = file->private_data;
-	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_conn *fc = ff->fc;
 	struct fuse_ioctl_in inarg = {
 		.fh = ff->fh,
 		.cmd = cmd,
@@ -1627,13 +1674,6 @@
 	/* assume all the iovs returned by client always fits in a page */
 	BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
 
-	if (!fuse_allow_task(fc, current))
-		return -EACCES;
-
-	err = -EIO;
-	if (is_bad_inode(inode))
-		goto out;
-
 	err = -ENOMEM;
 	pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
 	iov_page = alloc_page(GFP_KERNEL);
@@ -1694,7 +1734,7 @@
 
 	/* okay, let's send it to the client */
 	req->in.h.opcode = FUSE_IOCTL;
-	req->in.h.nodeid = get_node_id(inode);
+	req->in.h.nodeid = ff->nodeid;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(inarg);
 	req->in.args[0].value = &inarg;
@@ -1777,17 +1817,33 @@
 
 	return err ? err : outarg.result;
 }
+EXPORT_SYMBOL_GPL(fuse_do_ioctl);
+
+static long fuse_file_ioctl_common(struct file *file, unsigned int cmd,
+				   unsigned long arg, unsigned int flags)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+
+	if (!fuse_allow_task(fc, current))
+		return -EACCES;
+
+	if (is_bad_inode(inode))
+		return -EIO;
+
+	return fuse_do_ioctl(file, cmd, arg, flags);
+}
 
 static long fuse_file_ioctl(struct file *file, unsigned int cmd,
 			    unsigned long arg)
 {
-	return fuse_file_do_ioctl(file, cmd, arg, 0);
+	return fuse_file_ioctl_common(file, cmd, arg, 0);
 }
 
 static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
 				   unsigned long arg)
 {
-	return fuse_file_do_ioctl(file, cmd, arg, FUSE_IOCTL_COMPAT);
+	return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
 }
 
 /*
@@ -1841,11 +1897,10 @@
 	spin_unlock(&fc->lock);
 }
 
-static unsigned fuse_file_poll(struct file *file, poll_table *wait)
+unsigned fuse_file_poll(struct file *file, poll_table *wait)
 {
-	struct inode *inode = file->f_dentry->d_inode;
 	struct fuse_file *ff = file->private_data;
-	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_conn *fc = ff->fc;
 	struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
 	struct fuse_poll_out outarg;
 	struct fuse_req *req;
@@ -1870,7 +1925,7 @@
 		return PTR_ERR(req);
 
 	req->in.h.opcode = FUSE_POLL;
-	req->in.h.nodeid = get_node_id(inode);
+	req->in.h.nodeid = ff->nodeid;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(inarg);
 	req->in.args[0].value = &inarg;
@@ -1889,6 +1944,7 @@
 	}
 	return POLLERR;
 }
+EXPORT_SYMBOL_GPL(fuse_file_poll);
 
 /*
  * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 6fc5aed..aaf2f9f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -97,8 +97,13 @@
 	struct list_head writepages;
 };
 
+struct fuse_conn;
+
 /** FUSE specific file data */
 struct fuse_file {
+	/** Fuse connection for this file */
+	struct fuse_conn *fc;
+
 	/** Request reserved for flush and release */
 	struct fuse_req *reserved_req;
 
@@ -108,9 +113,15 @@
 	/** File handle used by userspace */
 	u64 fh;
 
+	/** Node id of this file */
+	u64 nodeid;
+
 	/** Refcount */
 	atomic_t count;
 
+	/** FOPEN_* flags returned by open */
+	u32 open_flags;
+
 	/** Entry on inode's write_files list */
 	struct list_head write_entry;
 
@@ -185,8 +196,6 @@
 	FUSE_REQ_FINISHED
 };
 
-struct fuse_conn;
-
 /**
  * A request to the client
  */
@@ -248,11 +257,12 @@
 		struct fuse_forget_in forget_in;
 		struct {
 			struct fuse_release_in in;
-			struct vfsmount *vfsmount;
-			struct dentry *dentry;
+			struct path path;
 		} release;
 		struct fuse_init_in init_in;
 		struct fuse_init_out init_out;
+		struct cuse_init_in cuse_init_in;
+		struct cuse_init_out cuse_init_out;
 		struct {
 			struct fuse_read_in in;
 			u64 attr_ver;
@@ -386,6 +396,9 @@
 	/** Filesystem supports NFS exporting.  Only set in INIT */
 	unsigned export_support:1;
 
+	/** Set if bdi is valid */
+	unsigned bdi_initialized:1;
+
 	/*
 	 * The following bitfields are only for optimization purposes
 	 * and hence races in setting them will not cause malfunction
@@ -515,25 +528,24 @@
  * Initialize READ or READDIR request
  */
 void fuse_read_fill(struct fuse_req *req, struct file *file,
-		    struct inode *inode, loff_t pos, size_t count, int opcode);
+		    loff_t pos, size_t count, int opcode);
 
 /**
  * Send OPEN or OPENDIR request
  */
-int fuse_open_common(struct inode *inode, struct file *file, int isdir);
+int fuse_open_common(struct inode *inode, struct file *file, bool isdir);
 
 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc);
+struct fuse_file *fuse_file_get(struct fuse_file *ff);
 void fuse_file_free(struct fuse_file *ff);
-void fuse_finish_open(struct inode *inode, struct file *file,
-		      struct fuse_file *ff, struct fuse_open_out *outarg);
+void fuse_finish_open(struct inode *inode, struct file *file);
 
-/** Fill in ff->reserved_req with a RELEASE request */
-void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode);
+void fuse_sync_release(struct fuse_file *ff, int flags);
 
 /**
  * Send RELEASE or RELEASEDIR request
  */
-int fuse_release_common(struct inode *inode, struct file *file, int isdir);
+void fuse_release_common(struct file *file, int opcode);
 
 /**
  * Send FSYNC or FSYNCDIR request
@@ -652,10 +664,12 @@
  */
 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
 
+void fuse_conn_kill(struct fuse_conn *fc);
+
 /**
  * Initialize fuse_conn
  */
-int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb);
+void fuse_conn_init(struct fuse_conn *fc);
 
 /**
  * Release reference to fuse_conn
@@ -694,4 +708,13 @@
 
 u64 fuse_get_attr_version(struct fuse_conn *fc);
 
+int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+		 bool isdir);
+ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+		       size_t count, loff_t *ppos, int write);
+long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+		   unsigned int flags);
+unsigned fuse_file_poll(struct file *file, poll_table *wait);
+int fuse_dev_release(struct inode *inode, struct file *file);
+
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 91f7c85f..f0df55a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -277,11 +277,14 @@
 	}
 }
 
-static void fuse_put_super(struct super_block *sb)
+static void fuse_bdi_destroy(struct fuse_conn *fc)
 {
-	struct fuse_conn *fc = get_fuse_conn_super(sb);
+	if (fc->bdi_initialized)
+		bdi_destroy(&fc->bdi);
+}
 
-	fuse_send_destroy(fc);
+void fuse_conn_kill(struct fuse_conn *fc)
+{
 	spin_lock(&fc->lock);
 	fc->connected = 0;
 	fc->blocked = 0;
@@ -295,7 +298,16 @@
 	list_del(&fc->entry);
 	fuse_ctl_remove_conn(fc);
 	mutex_unlock(&fuse_mutex);
-	bdi_destroy(&fc->bdi);
+	fuse_bdi_destroy(fc);
+}
+EXPORT_SYMBOL_GPL(fuse_conn_kill);
+
+static void fuse_put_super(struct super_block *sb)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+	fuse_send_destroy(fc);
+	fuse_conn_kill(fc);
 	fuse_conn_put(fc);
 }
 
@@ -466,10 +478,8 @@
 	return 0;
 }
 
-int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb)
+void fuse_conn_init(struct fuse_conn *fc)
 {
-	int err;
-
 	memset(fc, 0, sizeof(*fc));
 	spin_lock_init(&fc->lock);
 	mutex_init(&fc->inst_mutex);
@@ -484,49 +494,12 @@
 	INIT_LIST_HEAD(&fc->bg_queue);
 	INIT_LIST_HEAD(&fc->entry);
 	atomic_set(&fc->num_waiting, 0);
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
-	fc->bdi.unplug_io_fn = default_unplug_io_fn;
-	/* fuse does it's own writeback accounting */
-	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
 	fc->khctr = 0;
 	fc->polled_files = RB_ROOT;
-	fc->dev = sb->s_dev;
-	err = bdi_init(&fc->bdi);
-	if (err)
-		goto error_mutex_destroy;
-	if (sb->s_bdev) {
-		err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
-				   MAJOR(fc->dev), MINOR(fc->dev));
-	} else {
-		err = bdi_register_dev(&fc->bdi, fc->dev);
-	}
-	if (err)
-		goto error_bdi_destroy;
-	/*
-	 * For a single fuse filesystem use max 1% of dirty +
-	 * writeback threshold.
-	 *
-	 * This gives about 1M of write buffer for memory maps on a
-	 * machine with 1G and 10% dirty_ratio, which should be more
-	 * than enough.
-	 *
-	 * Privileged users can raise it by writing to
-	 *
-	 *    /sys/class/bdi/<bdi>/max_ratio
-	 */
-	bdi_set_max_ratio(&fc->bdi, 1);
 	fc->reqctr = 0;
 	fc->blocked = 1;
 	fc->attr_version = 1;
 	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
-
-	return 0;
-
- error_bdi_destroy:
-	bdi_destroy(&fc->bdi);
- error_mutex_destroy:
-	mutex_destroy(&fc->inst_mutex);
-	return err;
 }
 EXPORT_SYMBOL_GPL(fuse_conn_init);
 
@@ -539,12 +512,14 @@
 		fc->release(fc);
 	}
 }
+EXPORT_SYMBOL_GPL(fuse_conn_put);
 
 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
 {
 	atomic_inc(&fc->count);
 	return fc;
 }
+EXPORT_SYMBOL_GPL(fuse_conn_get);
 
 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
 {
@@ -797,6 +772,48 @@
 	kfree(fc);
 }
 
+static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
+{
+	int err;
+
+	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.unplug_io_fn = default_unplug_io_fn;
+	/* fuse does it's own writeback accounting */
+	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
+
+	err = bdi_init(&fc->bdi);
+	if (err)
+		return err;
+
+	fc->bdi_initialized = 1;
+
+	if (sb->s_bdev) {
+		err =  bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
+				    MAJOR(fc->dev), MINOR(fc->dev));
+	} else {
+		err = bdi_register_dev(&fc->bdi, fc->dev);
+	}
+
+	if (err)
+		return err;
+
+	/*
+	 * For a single fuse filesystem use max 1% of dirty +
+	 * writeback threshold.
+	 *
+	 * This gives about 1M of write buffer for memory maps on a
+	 * machine with 1G and 10% dirty_ratio, which should be more
+	 * than enough.
+	 *
+	 * Privileged users can raise it by writing to
+	 *
+	 *    /sys/class/bdi/<bdi>/max_ratio
+	 */
+	bdi_set_max_ratio(&fc->bdi, 1);
+
+	return 0;
+}
+
 static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct fuse_conn *fc;
@@ -843,11 +860,12 @@
 	if (!fc)
 		goto err_fput;
 
-	err = fuse_conn_init(fc, sb);
-	if (err) {
-		kfree(fc);
-		goto err_fput;
-	}
+	fuse_conn_init(fc);
+
+	fc->dev = sb->s_dev;
+	err = fuse_bdi_init(fc, sb);
+	if (err)
+		goto err_put_conn;
 
 	fc->release = fuse_free_conn;
 	fc->flags = d.flags;
@@ -911,7 +929,7 @@
  err_put_root:
 	dput(root_dentry);
  err_put_conn:
-	bdi_destroy(&fc->bdi);
+	fuse_bdi_destroy(fc);
 	fuse_conn_put(fc);
  err_fput:
 	fput(file);
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 3a981b7..cad957c 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -7,6 +7,7 @@
 	select IP_SCTP if DLM_SCTP
 	select FS_POSIX_ACL
 	select CRC32
+	select SLOW_WORK
 	help
 	  A cluster filesystem.
 
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index a851ea4..3da2f1f 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,8 +1,9 @@
+EXTRA_CFLAGS := -I$(src)
 obj-$(CONFIG_GFS2_FS) += gfs2.o
 gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \
 	glops.o inode.o log.o lops.o main.o meta_io.o \
-	mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
-	ops_fstype.o ops_inode.o ops_super.o quota.o \
+	aops.o dentry.o export.o file.o \
+	ops_fstype.o ops_inode.o quota.o \
 	recovery.o rgrp.o super.o sys.o trans.o util.o
 
 gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/aops.c
similarity index 98%
rename from fs/gfs2/ops_address.c
rename to fs/gfs2/aops.c
index a6dde17..03ebb43 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/aops.c
@@ -28,7 +28,6 @@
 #include "inode.h"
 #include "log.h"
 #include "meta_io.h"
-#include "ops_address.h"
 #include "quota.h"
 #include "trans.h"
 #include "rgrp.h"
@@ -781,10 +780,12 @@
 	unlock_page(page);
 	page_cache_release(page);
 
-	if (inode->i_size < to) {
-		i_size_write(inode, to);
-		ip->i_disksize = inode->i_size;
-		di->di_size = cpu_to_be64(inode->i_size);
+	if (copied) {
+		if (inode->i_size < to) {
+			i_size_write(inode, to);
+			ip->i_disksize = inode->i_size;
+		}
+		gfs2_dinode_out(ip, di);
 		mark_inode_dirty(inode);
 	}
 
@@ -824,7 +825,6 @@
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct buffer_head *dibh;
 	struct gfs2_alloc *al = ip->i_alloc;
-	struct gfs2_dinode *di;
 	unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
 	unsigned int to = from + len;
 	int ret;
@@ -847,11 +847,10 @@
 		gfs2_page_add_databufs(ip, page, from, to);
 
 	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
-
-	if (likely(ret >= 0) && (inode->i_size > ip->i_disksize)) {
-		di = (struct gfs2_dinode *)dibh->b_data;
-		ip->i_disksize = inode->i_size;
-		di->di_size = cpu_to_be64(inode->i_size);
+	if (ret > 0) {
+		if (inode->i_size > ip->i_disksize)
+			ip->i_disksize = inode->i_size;
+		gfs2_dinode_out(ip, dibh->b_data);
 		mark_inode_dirty(inode);
 	}
 
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 3a5d3f8..6d47379 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -25,7 +25,7 @@
 #include "trans.h"
 #include "dir.h"
 #include "util.h"
-#include "ops_address.h"
+#include "trace_gfs2.h"
 
 /* This doesn't need to be that large as max 64 bit pointers in a 4k
  * block is 512, so __u16 is fine for that. It saves stack space to
@@ -136,7 +136,9 @@
 		   and write it out to disk */
 
 		unsigned int n = 1;
-		block = gfs2_alloc_block(ip, &n);
+		error = gfs2_alloc_block(ip, &block, &n);
+		if (error)
+			goto out_brelse;
 		if (isdir) {
 			gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
 			error = gfs2_dir_get_new_buffer(ip, block, &bh);
@@ -476,8 +478,11 @@
 	blks = dblks + iblks;
 	i = sheight;
 	do {
+		int error;
 		n = blks - alloced;
-		bn = gfs2_alloc_block(ip, &n);
+		error = gfs2_alloc_block(ip, &bn, &n);
+		if (error)
+			return error;
 		alloced += n;
 		if (state != ALLOC_DATA || gfs2_is_jdata(ip))
 			gfs2_trans_add_unrevoke(sdp, bn, n);
@@ -585,6 +590,7 @@
 	clear_buffer_mapped(bh_map);
 	clear_buffer_new(bh_map);
 	clear_buffer_boundary(bh_map);
+	trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
 	if (gfs2_is_dir(ip)) {
 		bsize = sdp->sd_jbsize;
 		arr = sdp->sd_jheightsize;
@@ -619,6 +625,7 @@
 	ret = 0;
 out:
 	release_metapath(&mp);
+	trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
 	bmap_unlock(ip, create);
 	return ret;
 
@@ -1008,7 +1015,7 @@
 		gfs2_trans_add_bh(ip->i_gl, bh, 0);
 
 	zero_user(page, offset, length);
-
+	mark_buffer_dirty(bh);
 unlock:
 	unlock_page(page);
 	page_cache_release(page);
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/dentry.c
similarity index 100%
rename from fs/gfs2/ops_dentry.c
rename to fs/gfs2/dentry.c
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index aef4d0c..297d7e5 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -803,13 +803,20 @@
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	unsigned int n = 1;
-	u64 bn = gfs2_alloc_block(ip, &n);
-	struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn);
+	u64 bn;
+	int error;
+	struct buffer_head *bh;
 	struct gfs2_leaf *leaf;
 	struct gfs2_dirent *dent;
 	struct qstr name = { .name = "", .len = 0, .hash = 0 };
+
+	error = gfs2_alloc_block(ip, &bn, &n);
+	if (error)
+		return NULL;
+	bh = gfs2_meta_new(ip->i_gl, bn);
 	if (!bh)
 		return NULL;
+
 	gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1);
 	gfs2_trans_add_bh(ip->i_gl, bh, 1);
 	gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 899763a..07ea952 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -582,8 +582,11 @@
 	struct gfs2_ea_header *ea;
 	unsigned int n = 1;
 	u64 block;
+	int error;
 
-	block = gfs2_alloc_block(ip, &n);
+	error = gfs2_alloc_block(ip, &block, &n);
+	if (error)
+		return error;
 	gfs2_trans_add_unrevoke(sdp, block, 1);
 	*bhp = gfs2_meta_new(ip->i_gl, block);
 	gfs2_trans_add_bh(ip->i_gl, *bhp, 1);
@@ -617,6 +620,7 @@
 		    struct gfs2_ea_request *er)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	int error;
 
 	ea->ea_data_len = cpu_to_be32(er->er_data_len);
 	ea->ea_name_len = er->er_name_len;
@@ -642,7 +646,9 @@
 			int mh_size = sizeof(struct gfs2_meta_header);
 			unsigned int n = 1;
 
-			block = gfs2_alloc_block(ip, &n);
+			error = gfs2_alloc_block(ip, &block, &n);
+			if (error)
+				return error;
 			gfs2_trans_add_unrevoke(sdp, block, 1);
 			bh = gfs2_meta_new(ip->i_gl, block);
 			gfs2_trans_add_bh(ip->i_gl, bh, 1);
@@ -963,7 +969,9 @@
 	} else {
 		u64 blk;
 		unsigned int n = 1;
-		blk = gfs2_alloc_block(ip, &n);
+		error = gfs2_alloc_block(ip, &blk, &n);
+		if (error)
+			return error;
 		gfs2_trans_add_unrevoke(sdp, blk, 1);
 		indbh = gfs2_meta_new(ip->i_gl, blk);
 		gfs2_trans_add_bh(ip->i_gl, indbh, 1);
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/export.c
similarity index 100%
rename from fs/gfs2/ops_export.c
rename to fs/gfs2/export.c
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/file.c
similarity index 96%
rename from fs/gfs2/ops_file.c
rename to fs/gfs2/file.c
index 5d82e91..73318a3 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/file.c
@@ -39,7 +39,6 @@
 #include "trans.h"
 #include "util.h"
 #include "eaops.h"
-#include "ops_address.h"
 
 /**
  * gfs2_llseek - seek to a location in a file
@@ -425,33 +424,36 @@
 	.page_mkwrite = gfs2_page_mkwrite,
 };
 
-
 /**
  * gfs2_mmap -
  * @file: The file to map
  * @vma: The VMA which described the mapping
  *
- * Returns: 0 or error code
+ * There is no need to get a lock here unless we should be updating
+ * atime. We ignore any locking errors since the only consequence is
+ * a missed atime update (which will just be deferred until later).
+ *
+ * Returns: 0
  */
 
 static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
-	struct gfs2_holder i_gh;
-	int error;
 
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
-	error = gfs2_glock_nq(&i_gh);
-	if (error) {
-		gfs2_holder_uninit(&i_gh);
-		return error;
+	if (!(file->f_flags & O_NOATIME)) {
+		struct gfs2_holder i_gh;
+		int error;
+
+		gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+		error = gfs2_glock_nq(&i_gh);
+		file_accessed(file);
+		if (error == 0)
+			gfs2_glock_dq_uninit(&i_gh);
 	}
-
 	vma->vm_ops = &gfs2_vm_ops;
+	vma->vm_flags |= VM_CAN_NONLINEAR;
 
-	gfs2_glock_dq_uninit(&i_gh);
-
-	return error;
+	return 0;
 }
 
 /**
@@ -692,12 +694,10 @@
 
 static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
 {
-	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
-
 	if (!(fl->fl_flags & FL_FLOCK))
 		return -ENOLCK;
-	if (__mandatory_lock(&ip->i_inode))
-		return -ENOLCK;
+	if (fl->fl_type & LOCK_MAND)
+		return -EOPNOTSUPP;
 
 	if (fl->fl_type == F_UNLCK) {
 		do_unflock(file, fl);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ff49810..297421c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -39,6 +39,8 @@
 #include "super.h"
 #include "util.h"
 #include "bmap.h"
+#define CREATE_TRACE_POINTS
+#include "trace_gfs2.h"
 
 struct gfs2_gl_hash_bucket {
         struct hlist_head hb_list;
@@ -155,7 +157,7 @@
 
 	if (aspace)
 		gfs2_aspace_put(aspace);
-
+	trace_gfs2_glock_put(gl);
 	sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl);
 }
 
@@ -317,14 +319,17 @@
 						return 2;
 					gh->gh_error = ret;
 					list_del_init(&gh->gh_list);
+					trace_gfs2_glock_queue(gh, 0);
 					gfs2_holder_wake(gh);
 					goto restart;
 				}
 				set_bit(HIF_HOLDER, &gh->gh_iflags);
+				trace_gfs2_promote(gh, 1);
 				gfs2_holder_wake(gh);
 				goto restart;
 			}
 			set_bit(HIF_HOLDER, &gh->gh_iflags);
+			trace_gfs2_promote(gh, 0);
 			gfs2_holder_wake(gh);
 			continue;
 		}
@@ -354,6 +359,7 @@
 		else
 			continue;
 		list_del_init(&gh->gh_list);
+		trace_gfs2_glock_queue(gh, 0);
 		gfs2_holder_wake(gh);
 	}
 }
@@ -422,6 +428,7 @@
 	int rv;
 
 	spin_lock(&gl->gl_spin);
+	trace_gfs2_glock_state_change(gl, state);
 	state_change(gl, state);
 	gh = find_first_waiter(gl);
 
@@ -796,22 +803,37 @@
 	gh->gh_ip = 0;
 }
 
-static int just_schedule(void *word)
+/**
+ * gfs2_glock_holder_wait
+ * @word: unused
+ *
+ * This function and gfs2_glock_demote_wait both show up in the WCHAN
+ * field. Thus I've separated these otherwise identical functions in
+ * order to be more informative to the user.
+ */
+
+static int gfs2_glock_holder_wait(void *word)
 {
         schedule();
         return 0;
 }
 
+static int gfs2_glock_demote_wait(void *word)
+{
+	schedule();
+	return 0;
+}
+
 static void wait_on_holder(struct gfs2_holder *gh)
 {
 	might_sleep();
-	wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE);
 }
 
 static void wait_on_demote(struct gfs2_glock *gl)
 {
 	might_sleep();
-	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
 }
 
 /**
@@ -836,6 +858,7 @@
 			gl->gl_demote_state != state) {
 		gl->gl_demote_state = LM_ST_UNLOCKED;
 	}
+	trace_gfs2_demote_rq(gl);
 }
 
 /**
@@ -921,6 +944,7 @@
 			goto do_cancel;
 		return;
 	}
+	trace_gfs2_glock_queue(gh, 1);
 	list_add_tail(&gh->gh_list, insert_pt);
 do_cancel:
 	gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
@@ -1017,6 +1041,7 @@
 		    !test_bit(GLF_DEMOTE, &gl->gl_flags))
 			fast_path = 1;
 	}
+	trace_gfs2_glock_queue(gh, 0);
 	spin_unlock(&gl->gl_spin);
 	if (likely(fast_path))
 		return;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 70f87f4..d5e4ab1 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -310,24 +310,6 @@
 }
 
 /**
- * rgrp_go_dump - print out an rgrp
- * @seq: The iterator
- * @gl: The glock in question
- *
- */
-
-static int rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
-{
-	const struct gfs2_rgrpd *rgd = gl->gl_object;
-	if (rgd == NULL)
-		return 0;
-	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n",
-		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
-		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes);
-	return 0;
-}
-
-/**
  * trans_go_sync - promote/demote the transaction glock
  * @gl: the glock
  * @state: the requested state
@@ -410,7 +392,7 @@
 	.go_demote_ok = rgrp_go_demote_ok,
 	.go_lock = rgrp_go_lock,
 	.go_unlock = rgrp_go_unlock,
-	.go_dump = rgrp_go_dump,
+	.go_dump = gfs2_rgrp_dump,
 	.go_type = LM_TYPE_RGRP,
 	.go_min_hold_time = HZ / 5,
 };
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 399d1b9..225347f 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -12,6 +12,7 @@
 
 #include <linux/fs.h>
 #include <linux/workqueue.h>
+#include <linux/slow-work.h>
 #include <linux/dlm.h>
 #include <linux/buffer_head.h>
 
@@ -63,9 +64,12 @@
 	const struct gfs2_log_operations *le_ops;
 };
 
+#define GBF_FULL 1
+
 struct gfs2_bitmap {
 	struct buffer_head *bi_bh;
 	char *bi_clone;
+	unsigned long bi_flags;
 	u32 bi_offset;
 	u32 bi_start;
 	u32 bi_len;
@@ -90,10 +94,11 @@
 	struct gfs2_sbd *rd_sbd;
 	unsigned int rd_bh_count;
 	u32 rd_last_alloc;
-	unsigned char rd_flags;
-#define GFS2_RDF_CHECK        0x01      /* Need to check for unlinked inodes */
-#define GFS2_RDF_NOALLOC      0x02      /* rg prohibits allocation */
-#define GFS2_RDF_UPTODATE     0x04      /* rg is up to date */
+	u32 rd_flags;
+#define GFS2_RDF_CHECK		0x10000000 /* check for unlinked inodes */
+#define GFS2_RDF_UPTODATE	0x20000000 /* rg is up to date */
+#define GFS2_RDF_ERROR		0x40000000 /* error in rg */
+#define GFS2_RDF_MASK		0xf0000000 /* mask for internal flags */
 };
 
 enum gfs2_state_bits {
@@ -376,11 +381,11 @@
 struct gfs2_jdesc {
 	struct list_head jd_list;
 	struct list_head extent_list;
-
+	struct slow_work jd_work;
 	struct inode *jd_inode;
+	unsigned long jd_flags;
+#define JDF_RECOVERY 1
 	unsigned int jd_jid;
-	int jd_dirty;
-
 	unsigned int jd_blocks;
 };
 
@@ -390,9 +395,6 @@
 	s64 sc_dinodes;
 };
 
-#define GFS2_GLOCKD_DEFAULT	1
-#define GFS2_GLOCKD_MAX		16
-
 #define GFS2_QUOTA_DEFAULT	GFS2_QUOTA_OFF
 #define GFS2_QUOTA_OFF		0
 #define GFS2_QUOTA_ACCOUNT	1
@@ -418,6 +420,7 @@
 	unsigned int ar_data:2;			/* ordered/writeback */
 	unsigned int ar_meta:1;			/* mount metafs */
 	unsigned int ar_discard:1;		/* discard requests */
+	int ar_commit;				/* Commit interval */
 };
 
 struct gfs2_tune {
@@ -426,7 +429,6 @@
 	unsigned int gt_incore_log_blocks;
 	unsigned int gt_log_flush_secs;
 
-	unsigned int gt_recoverd_secs;
 	unsigned int gt_logd_secs;
 
 	unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */
@@ -447,6 +449,7 @@
 	SDF_JOURNAL_LIVE	= 1,
 	SDF_SHUTDOWN		= 2,
 	SDF_NOBARRIERS		= 3,
+	SDF_NORECOVERY		= 4,
 };
 
 #define GFS2_FSNAME_LEN		256
@@ -493,7 +496,6 @@
 	unsigned long ls_flags;
 	dlm_lockspace_t *ls_dlm;
 
-	int ls_recover_jid;
 	int ls_recover_jid_done;
 	int ls_recover_jid_status;
 };
@@ -582,7 +584,6 @@
 
 	/* Daemon stuff */
 
-	struct task_struct *sd_recoverd_process;
 	struct task_struct *sd_logd_process;
 	struct task_struct *sd_quotad_process;
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 5a31d42..2f94bd7 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -30,7 +30,6 @@
 #include "inode.h"
 #include "log.h"
 #include "meta_io.h"
-#include "ops_address.h"
 #include "quota.h"
 #include "rgrp.h"
 #include "trans.h"
@@ -1047,154 +1046,7 @@
 	return ERR_PTR(error);
 }
 
-/**
- * gfs2_rmdiri - Remove a directory
- * @dip: The parent directory of the directory to be removed
- * @name: The name of the directory to be removed
- * @ip: The GFS2 inode of the directory to be removed
- *
- * Assumes Glocks on dip and ip are held
- *
- * Returns: errno
- */
-
-int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
-		struct gfs2_inode *ip)
-{
-	struct qstr dotname;
-	int error;
-
-	if (ip->i_entries != 2) {
-		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(ip);
-		return -EIO;
-	}
-
-	error = gfs2_dir_del(dip, name);
-	if (error)
-		return error;
-
-	error = gfs2_change_nlink(dip, -1);
-	if (error)
-		return error;
-
-	gfs2_str2qstr(&dotname, ".");
-	error = gfs2_dir_del(ip, &dotname);
-	if (error)
-		return error;
-
-	gfs2_str2qstr(&dotname, "..");
-	error = gfs2_dir_del(ip, &dotname);
-	if (error)
-		return error;
-
-	/* It looks odd, but it really should be done twice */
-	error = gfs2_change_nlink(ip, -1);
-	if (error)
-		return error;
-
-	error = gfs2_change_nlink(ip, -1);
-	if (error)
-		return error;
-
-	return error;
-}
-
-/*
- * gfs2_unlink_ok - check to see that a inode is still in a directory
- * @dip: the directory
- * @name: the name of the file
- * @ip: the inode
- *
- * Assumes that the lock on (at least) @dip is held.
- *
- * Returns: 0 if the parent/child relationship is correct, errno if it isn't
- */
-
-int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-		   const struct gfs2_inode *ip)
-{
-	int error;
-
-	if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
-		return -EPERM;
-
-	if ((dip->i_inode.i_mode & S_ISVTX) &&
-	    dip->i_inode.i_uid != current_fsuid() &&
-	    ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
-		return -EPERM;
-
-	if (IS_APPEND(&dip->i_inode))
-		return -EPERM;
-
-	error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
-	if (error)
-		return error;
-
-	error = gfs2_dir_check(&dip->i_inode, name, ip);
-	if (error)
-		return error;
-
-	return 0;
-}
-
-/**
- * gfs2_readlinki - return the contents of a symlink
- * @ip: the symlink's inode
- * @buf: a pointer to the buffer to be filled
- * @len: a pointer to the length of @buf
- *
- * If @buf is too small, a piece of memory is kmalloc()ed and needs
- * to be freed by the caller.
- *
- * Returns: errno
- */
-
-int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
-{
-	struct gfs2_holder i_gh;
-	struct buffer_head *dibh;
-	unsigned int x;
-	int error;
-
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
-	error = gfs2_glock_nq(&i_gh);
-	if (error) {
-		gfs2_holder_uninit(&i_gh);
-		return error;
-	}
-
-	if (!ip->i_disksize) {
-		gfs2_consist_inode(ip);
-		error = -EIO;
-		goto out;
-	}
-
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (error)
-		goto out;
-
-	x = ip->i_disksize + 1;
-	if (x > *len) {
-		*buf = kmalloc(x, GFP_NOFS);
-		if (!*buf) {
-			error = -ENOMEM;
-			goto out_brelse;
-		}
-	}
-
-	memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
-	*len = x;
-
-out_brelse:
-	brelse(dibh);
-out:
-	gfs2_glock_dq_uninit(&i_gh);
-	return error;
-}
-
-static int
-__gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
+static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 {
 	struct buffer_head *dibh;
 	int error;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index c30be2b..c341aaf 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -11,8 +11,16 @@
 #define __INODE_DOT_H__
 
 #include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/mm.h>
 #include "util.h"
 
+extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
+extern int gfs2_internal_read(struct gfs2_inode *ip,
+			      struct file_ra_state *ra_state,
+			      char *buf, loff_t *pos, unsigned size);
+extern void gfs2_set_aops(struct inode *inode);
+
 static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
 {
 	return !ip->i_height;
@@ -73,30 +81,26 @@
 }
 
 
-void gfs2_set_iop(struct inode *inode);
-struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
-				u64 no_addr, u64 no_formal_ino,
-				int skip_freeing);
-struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
+extern void gfs2_set_iop(struct inode *inode);
+extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
+				       u64 no_addr, u64 no_formal_ino,
+				       int skip_freeing);
+extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
 
-int gfs2_inode_refresh(struct gfs2_inode *ip);
+extern int gfs2_inode_refresh(struct gfs2_inode *ip);
 
-int gfs2_dinode_dealloc(struct gfs2_inode *inode);
-int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
-struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
-			   int is_root);
-struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
-			   unsigned int mode, dev_t dev);
-int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
-		struct gfs2_inode *ip);
-int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-		   const struct gfs2_inode *ip);
-int gfs2_permission(struct inode *inode, int mask);
-int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
-int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
-struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
-void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
-void gfs2_dinode_print(const struct gfs2_inode *ip);
+extern int gfs2_dinode_dealloc(struct gfs2_inode *inode);
+extern int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
+extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
+				  int is_root);
+extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
+				  const struct qstr *name,
+				  unsigned int mode, dev_t dev);
+extern int gfs2_permission(struct inode *inode, int mask);
+extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
+extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
+extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
+extern void gfs2_dinode_print(const struct gfs2_inode *ip);
 
 extern const struct inode_operations gfs2_file_iops;
 extern const struct inode_operations gfs2_dir_iops;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 98918a7..13c6237 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -28,6 +28,7 @@
 #include "meta_io.h"
 #include "util.h"
 #include "dir.h"
+#include "trace_gfs2.h"
 
 #define PULL 1
 
@@ -120,7 +121,7 @@
 			lock_buffer(bh);
 			if (test_clear_buffer_dirty(bh)) {
 				bh->b_end_io = end_buffer_write_sync;
-				submit_bh(WRITE, bh);
+				submit_bh(WRITE_SYNC_PLUG, bh);
 			} else {
 				unlock_buffer(bh);
 				brelse(bh);
@@ -313,6 +314,7 @@
 		gfs2_log_lock(sdp);
 	}
 	atomic_sub(blks, &sdp->sd_log_blks_free);
+	trace_gfs2_log_blocks(sdp, -blks);
 	gfs2_log_unlock(sdp);
 	mutex_unlock(&sdp->sd_log_reserve_mutex);
 
@@ -333,6 +335,7 @@
 
 	gfs2_log_lock(sdp);
 	atomic_add(blks, &sdp->sd_log_blks_free);
+	trace_gfs2_log_blocks(sdp, blks);
 	gfs2_assert_withdraw(sdp,
 			     atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
 	gfs2_log_unlock(sdp);
@@ -558,6 +561,7 @@
 
 	gfs2_log_lock(sdp);
 	atomic_add(dist, &sdp->sd_log_blks_free);
+	trace_gfs2_log_blocks(sdp, dist);
 	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
 	gfs2_log_unlock(sdp);
 
@@ -604,7 +608,7 @@
 	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
 		goto skip_barrier;
 	get_bh(bh);
-	submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh);
+	submit_bh(WRITE_SYNC | (1 << BIO_RW_BARRIER) | (1 << BIO_RW_META), bh);
 	wait_on_buffer(bh);
 	if (buffer_eopnotsupp(bh)) {
 		clear_buffer_eopnotsupp(bh);
@@ -664,7 +668,7 @@
 		lock_buffer(bh);
 		if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
 			bh->b_end_io = end_buffer_write_sync;
-			submit_bh(WRITE, bh);
+			submit_bh(WRITE_SYNC_PLUG, bh);
 		} else {
 			unlock_buffer(bh);
 			brelse(bh);
@@ -715,6 +719,7 @@
 		up_write(&sdp->sd_log_flush_lock);
 		return;
 	}
+	trace_gfs2_log_flush(sdp, 1);
 
 	ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
 	INIT_LIST_HEAD(&ai->ai_ail1_list);
@@ -746,6 +751,7 @@
 	else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
 		gfs2_log_lock(sdp);
 		atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
+		trace_gfs2_log_blocks(sdp, -1);
 		gfs2_log_unlock(sdp);
 		log_write_header(sdp, 0, PULL);
 	}
@@ -763,8 +769,7 @@
 		ai = NULL;
 	}
 	gfs2_log_unlock(sdp);
-
-	sdp->sd_vfs->s_dirt = 0;
+	trace_gfs2_log_flush(sdp, 0);
 	up_write(&sdp->sd_log_flush_lock);
 
 	kfree(ai);
@@ -788,6 +793,7 @@
 	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved);
 	unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved;
 	atomic_add(unused, &sdp->sd_log_blks_free);
+	trace_gfs2_log_blocks(sdp, unused);
 	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
 			     sdp->sd_jdesc->jd_blocks);
 	sdp->sd_log_blks_reserved = reserved;
@@ -823,7 +829,6 @@
 	log_refund(sdp, tr);
 	buf_lo_incore_commit(sdp, tr);
 
-	sdp->sd_vfs->s_dirt = 1;
 	up_read(&sdp->sd_log_flush_lock);
 
 	gfs2_log_lock(sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 80e4f5f..9969ff0 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -13,6 +13,8 @@
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
 #include <linux/gfs2_ondisk.h>
+#include <linux/bio.h>
+#include <linux/fs.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -25,6 +27,7 @@
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
+#include "trace_gfs2.h"
 
 /**
  * gfs2_pin - Pin a buffer in memory
@@ -51,6 +54,7 @@
 	if (bd->bd_ail)
 		list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
 	get_bh(bh);
+	trace_gfs2_pin(bd, 1);
 }
 
 /**
@@ -87,6 +91,7 @@
 	bd->bd_ail = ai;
 	list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
 	clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+	trace_gfs2_pin(bd, 0);
 	gfs2_log_unlock(sdp);
 	unlock_buffer(bh);
 }
@@ -189,7 +194,7 @@
 		}
 
 		gfs2_log_unlock(sdp);
-		submit_bh(WRITE, bh);
+		submit_bh(WRITE_SYNC_PLUG, bh);
 		gfs2_log_lock(sdp);
 
 		n = 0;
@@ -199,7 +204,7 @@
 			gfs2_log_unlock(sdp);
 			lock_buffer(bd2->bd_bh);
 			bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
-			submit_bh(WRITE, bh);
+			submit_bh(WRITE_SYNC_PLUG, bh);
 			gfs2_log_lock(sdp);
 			if (++n >= num)
 				break;
@@ -341,7 +346,7 @@
 		sdp->sd_log_num_revoke--;
 
 		if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
-			submit_bh(WRITE, bh);
+			submit_bh(WRITE_SYNC_PLUG, bh);
 
 			bh = gfs2_log_get_buf(sdp);
 			mh = (struct gfs2_meta_header *)bh->b_data;
@@ -358,7 +363,7 @@
 	}
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
 
-	submit_bh(WRITE, bh);
+	submit_bh(WRITE_SYNC_PLUG, bh);
 }
 
 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
@@ -560,7 +565,7 @@
 	ptr = bh_log_ptr(bh);
 	
 	get_bh(bh);
-	submit_bh(WRITE, bh);
+	submit_bh(WRITE_SYNC_PLUG, bh);
 	gfs2_log_lock(sdp);
 	while(!list_empty(list)) {
 		bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
@@ -586,7 +591,7 @@
 		} else {
 			bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
 		}
-		submit_bh(WRITE, bh1);
+		submit_bh(WRITE_SYNC_PLUG, bh1);
 		gfs2_log_lock(sdp);
 		ptr += 2;
 	}
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index a6892ed..eacd78a 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/gfs2_ondisk.h>
 #include <asm/atomic.h>
+#include <linux/slow-work.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -113,12 +114,18 @@
 	if (error)
 		goto fail_unregister;
 
+	error = slow_work_register_user();
+	if (error)
+		goto fail_slow;
+
 	gfs2_register_debugfs();
 
 	printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__);
 
 	return 0;
 
+fail_slow:
+	unregister_filesystem(&gfs2meta_fs_type);
 fail_unregister:
 	unregister_filesystem(&gfs2_fs_type);
 fail:
@@ -156,6 +163,7 @@
 	gfs2_unregister_debugfs();
 	unregister_filesystem(&gfs2_fs_type);
 	unregister_filesystem(&gfs2meta_fs_type);
+	slow_work_unregister_user();
 
 	kmem_cache_destroy(gfs2_quotad_cachep);
 	kmem_cache_destroy(gfs2_rgrpd_cachep);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 8d6f132..cb8d7a9 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -31,19 +31,66 @@
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
-#include "ops_address.h"
 
-static int aspace_get_block(struct inode *inode, sector_t lblock,
-			    struct buffer_head *bh_result, int create)
+static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
 {
-	gfs2_assert_warn(inode->i_sb->s_fs_info, 0);
-	return -EOPNOTSUPP;
-}
+	int err;
+	struct buffer_head *bh, *head;
+	int nr_underway = 0;
+	int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ?
+			WRITE_SYNC_PLUG : WRITE));
 
-static int gfs2_aspace_writepage(struct page *page,
-				 struct writeback_control *wbc)
-{
-	return block_write_full_page(page, aspace_get_block, wbc);
+	BUG_ON(!PageLocked(page));
+	BUG_ON(!page_has_buffers(page));
+
+	head = page_buffers(page);
+	bh = head;
+
+	do {
+		if (!buffer_mapped(bh))
+			continue;
+		/*
+		 * If it's a fully non-blocking write attempt and we cannot
+		 * lock the buffer then redirty the page.  Note that this can
+		 * potentially cause a busy-wait loop from pdflush and kswapd
+		 * activity, but those code paths have their own higher-level
+		 * throttling.
+		 */
+		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
+			lock_buffer(bh);
+		} else if (!trylock_buffer(bh)) {
+			redirty_page_for_writepage(wbc, page);
+			continue;
+		}
+		if (test_clear_buffer_dirty(bh)) {
+			mark_buffer_async_write(bh);
+		} else {
+			unlock_buffer(bh);
+		}
+	} while ((bh = bh->b_this_page) != head);
+
+	/*
+	 * The page and its buffers are protected by PageWriteback(), so we can
+	 * drop the bh refcounts early.
+	 */
+	BUG_ON(PageWriteback(page));
+	set_page_writeback(page);
+
+	do {
+		struct buffer_head *next = bh->b_this_page;
+		if (buffer_async_write(bh)) {
+			submit_bh(write_op, bh);
+			nr_underway++;
+		}
+		bh = next;
+	} while (bh != head);
+	unlock_page(page);
+
+	err = 0;
+	if (nr_underway == 0)
+		end_page_writeback(page);
+
+	return err;
 }
 
 static const struct address_space_operations aspace_aops = {
@@ -201,16 +248,32 @@
 int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
 		   struct buffer_head **bhp)
 {
-	*bhp = gfs2_getbuf(gl, blkno, CREATE);
-	if (!buffer_uptodate(*bhp)) {
-		ll_rw_block(READ_META, 1, bhp);
-		if (flags & DIO_WAIT) {
-			int error = gfs2_meta_wait(gl->gl_sbd, *bhp);
-			if (error) {
-				brelse(*bhp);
-				return error;
-			}
-		}
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct buffer_head *bh;
+
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		return -EIO;
+
+	*bhp = bh = gfs2_getbuf(gl, blkno, CREATE);
+
+	lock_buffer(bh);
+	if (buffer_uptodate(bh)) {
+		unlock_buffer(bh);
+		return 0;
+	}
+	bh->b_end_io = end_buffer_read_sync;
+	get_bh(bh);
+	submit_bh(READ_SYNC | (1 << BIO_RW_META), bh);
+	if (!(flags & DIO_WAIT))
+		return 0;
+
+	wait_on_buffer(bh);
+	if (unlikely(!buffer_uptodate(bh))) {
+		struct gfs2_trans *tr = current->journal_info;
+		if (tr && tr->tr_touched)
+			gfs2_io_error_bh(sdp, bh);
+		brelse(bh);
+		return -EIO;
 	}
 
 	return 0;
@@ -404,7 +467,7 @@
 	if (buffer_uptodate(first_bh))
 		goto out;
 	if (!buffer_locked(first_bh))
-		ll_rw_block(READ_META, 1, &first_bh);
+		ll_rw_block(READ_SYNC | (1 << BIO_RW_META), 1, &first_bh);
 
 	dblock++;
 	extlen--;
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
deleted file mode 100644
index f7e8527..0000000
--- a/fs/gfs2/mount.c
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/parser.h>
-
-#include "gfs2.h"
-#include "incore.h"
-#include "super.h"
-#include "sys.h"
-#include "util.h"
-
-enum {
-	Opt_lockproto,
-	Opt_locktable,
-	Opt_hostdata,
-	Opt_spectator,
-	Opt_ignore_local_fs,
-	Opt_localflocks,
-	Opt_localcaching,
-	Opt_debug,
-	Opt_nodebug,
-	Opt_upgrade,
-	Opt_acl,
-	Opt_noacl,
-	Opt_quota_off,
-	Opt_quota_account,
-	Opt_quota_on,
-	Opt_quota,
-	Opt_noquota,
-	Opt_suiddir,
-	Opt_nosuiddir,
-	Opt_data_writeback,
-	Opt_data_ordered,
-	Opt_meta,
-	Opt_discard,
-	Opt_nodiscard,
-	Opt_err,
-};
-
-static const match_table_t tokens = {
-	{Opt_lockproto, "lockproto=%s"},
-	{Opt_locktable, "locktable=%s"},
-	{Opt_hostdata, "hostdata=%s"},
-	{Opt_spectator, "spectator"},
-	{Opt_ignore_local_fs, "ignore_local_fs"},
-	{Opt_localflocks, "localflocks"},
-	{Opt_localcaching, "localcaching"},
-	{Opt_debug, "debug"},
-	{Opt_nodebug, "nodebug"},
-	{Opt_upgrade, "upgrade"},
-	{Opt_acl, "acl"},
-	{Opt_noacl, "noacl"},
-	{Opt_quota_off, "quota=off"},
-	{Opt_quota_account, "quota=account"},
-	{Opt_quota_on, "quota=on"},
-	{Opt_quota, "quota"},
-	{Opt_noquota, "noquota"},
-	{Opt_suiddir, "suiddir"},
-	{Opt_nosuiddir, "nosuiddir"},
-	{Opt_data_writeback, "data=writeback"},
-	{Opt_data_ordered, "data=ordered"},
-	{Opt_meta, "meta"},
-	{Opt_discard, "discard"},
-	{Opt_nodiscard, "nodiscard"},
-	{Opt_err, NULL}
-};
-
-/**
- * gfs2_mount_args - Parse mount options
- * @sdp:
- * @data:
- *
- * Return: errno
- */
-
-int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
-{
-	char *o;
-	int token;
-	substring_t tmp[MAX_OPT_ARGS];
-
-	/* Split the options into tokens with the "," character and
-	   process them */
-
-	while (1) {
-		o = strsep(&options, ",");
-		if (o == NULL)
-			break;
-		if (*o == '\0')
-			continue;
-
-		token = match_token(o, tokens, tmp);
-		switch (token) {
-		case Opt_lockproto:
-			match_strlcpy(args->ar_lockproto, &tmp[0],
-				      GFS2_LOCKNAME_LEN);
-			break;
-		case Opt_locktable:
-			match_strlcpy(args->ar_locktable, &tmp[0],
-				      GFS2_LOCKNAME_LEN);
-			break;
-		case Opt_hostdata:
-			match_strlcpy(args->ar_hostdata, &tmp[0],
-				      GFS2_LOCKNAME_LEN);
-			break;
-		case Opt_spectator:
-			args->ar_spectator = 1;
-			break;
-		case Opt_ignore_local_fs:
-			args->ar_ignore_local_fs = 1;
-			break;
-		case Opt_localflocks:
-			args->ar_localflocks = 1;
-			break;
-		case Opt_localcaching:
-			args->ar_localcaching = 1;
-			break;
-		case Opt_debug:
-			args->ar_debug = 1;
-			break;
-		case Opt_nodebug:
-			args->ar_debug = 0;
-			break;
-		case Opt_upgrade:
-			args->ar_upgrade = 1;
-			break;
-		case Opt_acl:
-			args->ar_posix_acl = 1;
-			break;
-		case Opt_noacl:
-			args->ar_posix_acl = 0;
-			break;
-		case Opt_quota_off:
-		case Opt_noquota:
-			args->ar_quota = GFS2_QUOTA_OFF;
-			break;
-		case Opt_quota_account:
-			args->ar_quota = GFS2_QUOTA_ACCOUNT;
-			break;
-		case Opt_quota_on:
-		case Opt_quota:
-			args->ar_quota = GFS2_QUOTA_ON;
-			break;
-		case Opt_suiddir:
-			args->ar_suiddir = 1;
-			break;
-		case Opt_nosuiddir:
-			args->ar_suiddir = 0;
-			break;
-		case Opt_data_writeback:
-			args->ar_data = GFS2_DATA_WRITEBACK;
-			break;
-		case Opt_data_ordered:
-			args->ar_data = GFS2_DATA_ORDERED;
-			break;
-		case Opt_meta:
-			args->ar_meta = 1;
-			break;
-		case Opt_discard:
-			args->ar_discard = 1;
-			break;
-		case Opt_nodiscard:
-			args->ar_discard = 0;
-			break;
-		case Opt_err:
-		default:
-			fs_info(sdp, "invalid mount option: %s\n", o);
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h
deleted file mode 100644
index 5da2128..0000000
--- a/fs/gfs2/ops_address.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __OPS_ADDRESS_DOT_H__
-#define __OPS_ADDRESS_DOT_H__
-
-#include <linux/fs.h>
-#include <linux/buffer_head.h>
-#include <linux/mm.h>
-
-extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
-extern int gfs2_internal_read(struct gfs2_inode *ip,
-			      struct file_ra_state *ra_state,
-			      char *buf, loff_t *pos, unsigned size);
-extern void gfs2_set_aops(struct inode *inode);
-
-#endif /* __OPS_ADDRESS_DOT_H__ */
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 1ff9473..7bc3c45 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -17,6 +17,7 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/gfs2_ondisk.h>
+#include <linux/slow-work.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -32,6 +33,7 @@
 #include "log.h"
 #include "quota.h"
 #include "dir.h"
+#include "trace_gfs2.h"
 
 #define DO 0
 #define UNDO 1
@@ -55,8 +57,6 @@
 	spin_lock_init(&gt->gt_spin);
 
 	gt->gt_incore_log_blocks = 1024;
-	gt->gt_log_flush_secs = 60;
-	gt->gt_recoverd_secs = 60;
 	gt->gt_logd_secs = 1;
 	gt->gt_quota_simul_sync = 64;
 	gt->gt_quota_warn_period = 10;
@@ -526,11 +526,11 @@
 	}
 
 	/* Set up the buffer cache and SB for real */
-	if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
+	if (sdp->sd_sb.sb_bsize < bdev_logical_block_size(sb->s_bdev)) {
 		ret = -EINVAL;
 		fs_err(sdp, "FS block size (%u) is too small for device "
 		       "block size (%u)\n",
-		       sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
+		       sdp->sd_sb.sb_bsize, bdev_logical_block_size(sb->s_bdev));
 		goto out;
 	}
 	if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
@@ -676,6 +676,7 @@
 			break;
 
 		INIT_LIST_HEAD(&jd->extent_list);
+		slow_work_init(&jd->jd_work, &gfs2_recover_ops);
 		jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
 		if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
 			if (!jd->jd_inode)
@@ -701,14 +702,13 @@
 {
 	struct inode *master = sdp->sd_master_dir->d_inode;
 	struct gfs2_holder ji_gh;
-	struct task_struct *p;
 	struct gfs2_inode *ip;
 	int jindex = 1;
 	int error = 0;
 
 	if (undo) {
 		jindex = 0;
-		goto fail_recoverd;
+		goto fail_jinode_gh;
 	}
 
 	sdp->sd_jindex = gfs2_lookup_simple(master, "jindex");
@@ -776,6 +776,7 @@
 		/* Map the extents for this journal's blocks */
 		map_journal_extents(sdp);
 	}
+	trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free));
 
 	if (sdp->sd_lockstruct.ls_first) {
 		unsigned int x;
@@ -801,18 +802,8 @@
 	gfs2_glock_dq_uninit(&ji_gh);
 	jindex = 0;
 
-	p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd");
-	error = IS_ERR(p);
-	if (error) {
-		fs_err(sdp, "can't start recoverd thread: %d\n", error);
-		goto fail_jinode_gh;
-	}
-	sdp->sd_recoverd_process = p;
-
 	return 0;
 
-fail_recoverd:
-	kthread_stop(sdp->sd_recoverd_process);
 fail_jinode_gh:
 	if (!sdp->sd_args.ar_spectator)
 		gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
@@ -1165,6 +1156,7 @@
 
 	sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT;
 	sdp->sd_args.ar_data = GFS2_DATA_DEFAULT;
+	sdp->sd_args.ar_commit = 60;
 
 	error = gfs2_mount_args(sdp, &sdp->sd_args, data);
 	if (error) {
@@ -1172,8 +1164,10 @@
 		goto fail;
 	}
 
-	if (sdp->sd_args.ar_spectator)
+	if (sdp->sd_args.ar_spectator) {
                 sb->s_flags |= MS_RDONLY;
+		set_bit(SDF_NORECOVERY, &sdp->sd_flags);
+	}
 	if (sdp->sd_args.ar_posix_acl)
 		sb->s_flags |= MS_POSIXACL;
 
@@ -1191,6 +1185,8 @@
                                GFS2_BASIC_BLOCK_SHIFT;
 	sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
 
+	sdp->sd_tune.gt_log_flush_secs = sdp->sd_args.ar_commit;
+
 	error = init_names(sdp, silent);
 	if (error)
 		goto fail;
@@ -1279,9 +1275,22 @@
 	return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt);
 }
 
-static struct super_block *get_gfs2_sb(const char *dev_name)
+static int test_meta_super(struct super_block *s, void *ptr)
 {
-	struct super_block *sb;
+	struct block_device *bdev = ptr;
+	return (bdev == s->s_bdev);
+}
+
+static int set_meta_super(struct super_block *s, void *ptr)
+{
+	return -EINVAL;
+}
+
+static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
+			    const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	struct super_block *s;
+	struct gfs2_sbd *sdp;
 	struct path path;
 	int error;
 
@@ -1289,30 +1298,17 @@
 	if (error) {
 		printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n",
 		       dev_name, error);
-		return NULL;
+		return error;
 	}
-	sb = path.dentry->d_inode->i_sb;
-	if (sb && (sb->s_type == &gfs2_fs_type))
-		atomic_inc(&sb->s_active);
-	else
-		sb = NULL;
+	s = sget(&gfs2_fs_type, test_meta_super, set_meta_super,
+		 path.dentry->d_inode->i_sb->s_bdev);
 	path_put(&path);
-	return sb;
-}
-
-static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
-			    const char *dev_name, void *data, struct vfsmount *mnt)
-{
-	struct super_block *sb = NULL;
-	struct gfs2_sbd *sdp;
-
-	sb = get_gfs2_sb(dev_name);
-	if (!sb) {
+	if (IS_ERR(s)) {
 		printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
-		return -ENOENT;
+		return PTR_ERR(s);
 	}
-	sdp = sb->s_fs_info;
-	mnt->mnt_sb = sb;
+	sdp = s->s_fs_info;
+	mnt->mnt_sb = s;
 	mnt->mnt_root = dget(sdp->sd_master_dir);
 	return 0;
 }
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 1c70fa5..f8bd20b 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -262,6 +262,44 @@
 	return error;
 }
 
+/*
+ * gfs2_unlink_ok - check to see that a inode is still in a directory
+ * @dip: the directory
+ * @name: the name of the file
+ * @ip: the inode
+ *
+ * Assumes that the lock on (at least) @dip is held.
+ *
+ * Returns: 0 if the parent/child relationship is correct, errno if it isn't
+ */
+
+static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
+			  const struct gfs2_inode *ip)
+{
+	int error;
+
+	if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
+		return -EPERM;
+
+	if ((dip->i_inode.i_mode & S_ISVTX) &&
+	    dip->i_inode.i_uid != current_fsuid() &&
+	    ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
+		return -EPERM;
+
+	if (IS_APPEND(&dip->i_inode))
+		return -EPERM;
+
+	error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
+	if (error)
+		return error;
+
+	error = gfs2_dir_check(&dip->i_inode, name, ip);
+	if (error)
+		return error;
+
+	return 0;
+}
+
 /**
  * gfs2_unlink - Unlink a file
  * @dir: The inode of the directory containing the file to unlink
@@ -473,6 +511,59 @@
 }
 
 /**
+ * gfs2_rmdiri - Remove a directory
+ * @dip: The parent directory of the directory to be removed
+ * @name: The name of the directory to be removed
+ * @ip: The GFS2 inode of the directory to be removed
+ *
+ * Assumes Glocks on dip and ip are held
+ *
+ * Returns: errno
+ */
+
+static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
+		       struct gfs2_inode *ip)
+{
+	struct qstr dotname;
+	int error;
+
+	if (ip->i_entries != 2) {
+		if (gfs2_consist_inode(ip))
+			gfs2_dinode_print(ip);
+		return -EIO;
+	}
+
+	error = gfs2_dir_del(dip, name);
+	if (error)
+		return error;
+
+	error = gfs2_change_nlink(dip, -1);
+	if (error)
+		return error;
+
+	gfs2_str2qstr(&dotname, ".");
+	error = gfs2_dir_del(ip, &dotname);
+	if (error)
+		return error;
+
+	gfs2_str2qstr(&dotname, "..");
+	error = gfs2_dir_del(ip, &dotname);
+	if (error)
+		return error;
+
+	/* It looks odd, but it really should be done twice */
+	error = gfs2_change_nlink(ip, -1);
+	if (error)
+		return error;
+
+	error = gfs2_change_nlink(ip, -1);
+	if (error)
+		return error;
+
+	return error;
+}
+
+/**
  * gfs2_rmdir - Remove a directory
  * @dir: The parent directory of the directory to be removed
  * @dentry: The dentry of the directory to remove
@@ -885,6 +976,61 @@
 }
 
 /**
+ * gfs2_readlinki - return the contents of a symlink
+ * @ip: the symlink's inode
+ * @buf: a pointer to the buffer to be filled
+ * @len: a pointer to the length of @buf
+ *
+ * If @buf is too small, a piece of memory is kmalloc()ed and needs
+ * to be freed by the caller.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
+{
+	struct gfs2_holder i_gh;
+	struct buffer_head *dibh;
+	unsigned int x;
+	int error;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
+	error = gfs2_glock_nq(&i_gh);
+	if (error) {
+		gfs2_holder_uninit(&i_gh);
+		return error;
+	}
+
+	if (!ip->i_disksize) {
+		gfs2_consist_inode(ip);
+		error = -EIO;
+		goto out;
+	}
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		goto out;
+
+	x = ip->i_disksize + 1;
+	if (x > *len) {
+		*buf = kmalloc(x, GFP_NOFS);
+		if (!*buf) {
+			error = -ENOMEM;
+			goto out_brelse;
+		}
+	}
+
+	memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
+	*len = x;
+
+out_brelse:
+	brelse(dibh);
+out:
+	gfs2_glock_dq_uninit(&i_gh);
+	return error;
+}
+
+/**
  * gfs2_readlink - Read the value of a symlink
  * @dentry: the symlink
  * @buf: the buffer to read the symlink data into
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
deleted file mode 100644
index 4580195..0000000
--- a/fs/gfs2/ops_super.c
+++ /dev/null
@@ -1,723 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/statfs.h>
-#include <linux/seq_file.h>
-#include <linux/mount.h>
-#include <linux/kthread.h>
-#include <linux/delay.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/crc32.h>
-#include <linux/time.h>
-
-#include "gfs2.h"
-#include "incore.h"
-#include "glock.h"
-#include "inode.h"
-#include "log.h"
-#include "quota.h"
-#include "recovery.h"
-#include "rgrp.h"
-#include "super.h"
-#include "sys.h"
-#include "util.h"
-#include "trans.h"
-#include "dir.h"
-#include "eattr.h"
-#include "bmap.h"
-#include "meta_io.h"
-
-#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
-
-/**
- * gfs2_write_inode - Make sure the inode is stable on the disk
- * @inode: The inode
- * @sync: synchronous write flag
- *
- * Returns: errno
- */
-
-static int gfs2_write_inode(struct inode *inode, int sync)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct gfs2_holder gh;
-	struct buffer_head *bh;
-	struct timespec atime;
-	struct gfs2_dinode *di;
-	int ret = 0;
-
-	/* Check this is a "normal" inode, etc */
-	if (!test_bit(GIF_USER, &ip->i_flags) ||
-	    (current->flags & PF_MEMALLOC))
-		return 0;
-	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
-	if (ret)
-		goto do_flush;
-	ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
-	if (ret)
-		goto do_unlock;
-	ret = gfs2_meta_inode_buffer(ip, &bh);
-	if (ret == 0) {
-		di = (struct gfs2_dinode *)bh->b_data;
-		atime.tv_sec = be64_to_cpu(di->di_atime);
-		atime.tv_nsec = be32_to_cpu(di->di_atime_nsec);
-		if (timespec_compare(&inode->i_atime, &atime) > 0) {
-			gfs2_trans_add_bh(ip->i_gl, bh, 1);
-			gfs2_dinode_out(ip, bh->b_data);
-		}
-		brelse(bh);
-	}
-	gfs2_trans_end(sdp);
-do_unlock:
-	gfs2_glock_dq_uninit(&gh);
-do_flush:
-	if (sync != 0)
-		gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
-	return ret;
-}
-
-/**
- * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
- * @sdp: the filesystem
- *
- * Returns: errno
- */
-
-static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
-{
-	struct gfs2_holder t_gh;
-	int error;
-
-	gfs2_quota_sync(sdp);
-	gfs2_statfs_sync(sdp);
-
-	error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
-				   &t_gh);
-	if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
-		return error;
-
-	gfs2_meta_syncfs(sdp);
-	gfs2_log_shutdown(sdp);
-
-	clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
-
-	if (t_gh.gh_gl)
-		gfs2_glock_dq_uninit(&t_gh);
-
-	gfs2_quota_cleanup(sdp);
-
-	return error;
-}
-
-/**
- * gfs2_put_super - Unmount the filesystem
- * @sb: The VFS superblock
- *
- */
-
-static void gfs2_put_super(struct super_block *sb)
-{
-	struct gfs2_sbd *sdp = sb->s_fs_info;
-	int error;
-
-	/*  Unfreeze the filesystem, if we need to  */
-
-	mutex_lock(&sdp->sd_freeze_lock);
-	if (sdp->sd_freeze_count)
-		gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
-	mutex_unlock(&sdp->sd_freeze_lock);
-
-	kthread_stop(sdp->sd_quotad_process);
-	kthread_stop(sdp->sd_logd_process);
-	kthread_stop(sdp->sd_recoverd_process);
-
-	if (!(sb->s_flags & MS_RDONLY)) {
-		error = gfs2_make_fs_ro(sdp);
-		if (error)
-			gfs2_io_error(sdp);
-	}
-	/*  At this point, we're through modifying the disk  */
-
-	/*  Release stuff  */
-
-	iput(sdp->sd_jindex);
-	iput(sdp->sd_inum_inode);
-	iput(sdp->sd_statfs_inode);
-	iput(sdp->sd_rindex);
-	iput(sdp->sd_quota_inode);
-
-	gfs2_glock_put(sdp->sd_rename_gl);
-	gfs2_glock_put(sdp->sd_trans_gl);
-
-	if (!sdp->sd_args.ar_spectator) {
-		gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
-		iput(sdp->sd_ir_inode);
-		iput(sdp->sd_sc_inode);
-		iput(sdp->sd_qc_inode);
-	}
-
-	gfs2_glock_dq_uninit(&sdp->sd_live_gh);
-	gfs2_clear_rgrpd(sdp);
-	gfs2_jindex_free(sdp);
-	/*  Take apart glock structures and buffer lists  */
-	gfs2_gl_hash_clear(sdp);
-	/*  Unmount the locking protocol  */
-	gfs2_lm_unmount(sdp);
-
-	/*  At this point, we're through participating in the lockspace  */
-	gfs2_sys_fs_del(sdp);
-}
-
-/**
- * gfs2_write_super
- * @sb: the superblock
- *
- */
-
-static void gfs2_write_super(struct super_block *sb)
-{
-	sb->s_dirt = 0;
-}
-
-/**
- * gfs2_sync_fs - sync the filesystem
- * @sb: the superblock
- *
- * Flushes the log to disk.
- */
-
-static int gfs2_sync_fs(struct super_block *sb, int wait)
-{
-	sb->s_dirt = 0;
-	if (wait && sb->s_fs_info)
-		gfs2_log_flush(sb->s_fs_info, NULL);
-	return 0;
-}
-
-/**
- * gfs2_freeze - prevent further writes to the filesystem
- * @sb: the VFS structure for the filesystem
- *
- */
-
-static int gfs2_freeze(struct super_block *sb)
-{
-	struct gfs2_sbd *sdp = sb->s_fs_info;
-	int error;
-
-	if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
-		return -EINVAL;
-
-	for (;;) {
-		error = gfs2_freeze_fs(sdp);
-		if (!error)
-			break;
-
-		switch (error) {
-		case -EBUSY:
-			fs_err(sdp, "waiting for recovery before freeze\n");
-			break;
-
-		default:
-			fs_err(sdp, "error freezing FS: %d\n", error);
-			break;
-		}
-
-		fs_err(sdp, "retrying...\n");
-		msleep(1000);
-	}
-	return 0;
-}
-
-/**
- * gfs2_unfreeze - reallow writes to the filesystem
- * @sb: the VFS structure for the filesystem
- *
- */
-
-static int gfs2_unfreeze(struct super_block *sb)
-{
-	gfs2_unfreeze_fs(sb->s_fs_info);
-	return 0;
-}
-
-/**
- * statfs_fill - fill in the sg for a given RG
- * @rgd: the RG
- * @sc: the sc structure
- *
- * Returns: 0 on success, -ESTALE if the LVB is invalid
- */
-
-static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
-			    struct gfs2_statfs_change_host *sc)
-{
-	gfs2_rgrp_verify(rgd);
-	sc->sc_total += rgd->rd_data;
-	sc->sc_free += rgd->rd_free;
-	sc->sc_dinodes += rgd->rd_dinodes;
-	return 0;
-}
-
-/**
- * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
- * @sdp: the filesystem
- * @sc: the sc info that will be returned
- *
- * Any error (other than a signal) will cause this routine to fall back
- * to the synchronous version.
- *
- * FIXME: This really shouldn't busy wait like this.
- *
- * Returns: errno
- */
-
-static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
-{
-	struct gfs2_holder ri_gh;
-	struct gfs2_rgrpd *rgd_next;
-	struct gfs2_holder *gha, *gh;
-	unsigned int slots = 64;
-	unsigned int x;
-	int done;
-	int error = 0, err;
-
-	memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
-	gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
-	if (!gha)
-		return -ENOMEM;
-
-	error = gfs2_rindex_hold(sdp, &ri_gh);
-	if (error)
-		goto out;
-
-	rgd_next = gfs2_rgrpd_get_first(sdp);
-
-	for (;;) {
-		done = 1;
-
-		for (x = 0; x < slots; x++) {
-			gh = gha + x;
-
-			if (gh->gh_gl && gfs2_glock_poll(gh)) {
-				err = gfs2_glock_wait(gh);
-				if (err) {
-					gfs2_holder_uninit(gh);
-					error = err;
-				} else {
-					if (!error)
-						error = statfs_slow_fill(
-							gh->gh_gl->gl_object, sc);
-					gfs2_glock_dq_uninit(gh);
-				}
-			}
-
-			if (gh->gh_gl)
-				done = 0;
-			else if (rgd_next && !error) {
-				error = gfs2_glock_nq_init(rgd_next->rd_gl,
-							   LM_ST_SHARED,
-							   GL_ASYNC,
-							   gh);
-				rgd_next = gfs2_rgrpd_get_next(rgd_next);
-				done = 0;
-			}
-
-			if (signal_pending(current))
-				error = -ERESTARTSYS;
-		}
-
-		if (done)
-			break;
-
-		yield();
-	}
-
-	gfs2_glock_dq_uninit(&ri_gh);
-
-out:
-	kfree(gha);
-	return error;
-}
-
-/**
- * gfs2_statfs_i - Do a statfs
- * @sdp: the filesystem
- * @sg: the sg structure
- *
- * Returns: errno
- */
-
-static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
-{
-	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
-	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
-
-	spin_lock(&sdp->sd_statfs_spin);
-
-	*sc = *m_sc;
-	sc->sc_total += l_sc->sc_total;
-	sc->sc_free += l_sc->sc_free;
-	sc->sc_dinodes += l_sc->sc_dinodes;
-
-	spin_unlock(&sdp->sd_statfs_spin);
-
-	if (sc->sc_free < 0)
-		sc->sc_free = 0;
-	if (sc->sc_free > sc->sc_total)
-		sc->sc_free = sc->sc_total;
-	if (sc->sc_dinodes < 0)
-		sc->sc_dinodes = 0;
-
-	return 0;
-}
-
-/**
- * gfs2_statfs - Gather and return stats about the filesystem
- * @sb: The superblock
- * @statfsbuf: The buffer
- *
- * Returns: 0 on success or error code
- */
-
-static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
-{
-	struct super_block *sb = dentry->d_inode->i_sb;
-	struct gfs2_sbd *sdp = sb->s_fs_info;
-	struct gfs2_statfs_change_host sc;
-	int error;
-
-	if (gfs2_tune_get(sdp, gt_statfs_slow))
-		error = gfs2_statfs_slow(sdp, &sc);
-	else
-		error = gfs2_statfs_i(sdp, &sc);
-
-	if (error)
-		return error;
-
-	buf->f_type = GFS2_MAGIC;
-	buf->f_bsize = sdp->sd_sb.sb_bsize;
-	buf->f_blocks = sc.sc_total;
-	buf->f_bfree = sc.sc_free;
-	buf->f_bavail = sc.sc_free;
-	buf->f_files = sc.sc_dinodes + sc.sc_free;
-	buf->f_ffree = sc.sc_free;
-	buf->f_namelen = GFS2_FNAMESIZE;
-
-	return 0;
-}
-
-/**
- * gfs2_remount_fs - called when the FS is remounted
- * @sb:  the filesystem
- * @flags:  the remount flags
- * @data:  extra data passed in (not used right now)
- *
- * Returns: errno
- */
-
-static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
-{
-	struct gfs2_sbd *sdp = sb->s_fs_info;
-	struct gfs2_args args = sdp->sd_args; /* Default to current settings */
-	int error;
-
-	error = gfs2_mount_args(sdp, &args, data);
-	if (error)
-		return error;
-
-	/* Not allowed to change locking details */
-	if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) ||
-	    strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) ||
-	    strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata))
-		return -EINVAL;
-
-	/* Some flags must not be changed */
-	if (args_neq(&args, &sdp->sd_args, spectator) ||
-	    args_neq(&args, &sdp->sd_args, ignore_local_fs) ||
-	    args_neq(&args, &sdp->sd_args, localflocks) ||
-	    args_neq(&args, &sdp->sd_args, localcaching) ||
-	    args_neq(&args, &sdp->sd_args, meta))
-		return -EINVAL;
-
-	if (sdp->sd_args.ar_spectator)
-		*flags |= MS_RDONLY;
-
-	if ((sb->s_flags ^ *flags) & MS_RDONLY) {
-		if (*flags & MS_RDONLY)
-			error = gfs2_make_fs_ro(sdp);
-		else
-			error = gfs2_make_fs_rw(sdp);
-		if (error)
-			return error;
-	}
-
-	sdp->sd_args = args;
-	if (sdp->sd_args.ar_posix_acl)
-		sb->s_flags |= MS_POSIXACL;
-	else
-		sb->s_flags &= ~MS_POSIXACL;
-	return 0;
-}
-
-/**
- * gfs2_drop_inode - Drop an inode (test for remote unlink)
- * @inode: The inode to drop
- *
- * If we've received a callback on an iopen lock then its because a
- * remote node tried to deallocate the inode but failed due to this node
- * still having the inode open. Here we mark the link count zero
- * since we know that it must have reached zero if the GLF_DEMOTE flag
- * is set on the iopen glock. If we didn't do a disk read since the
- * remote node removed the final link then we might otherwise miss
- * this event. This check ensures that this node will deallocate the
- * inode's blocks, or alternatively pass the baton on to another
- * node for later deallocation.
- */
-
-static void gfs2_drop_inode(struct inode *inode)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-
-	if (test_bit(GIF_USER, &ip->i_flags) && inode->i_nlink) {
-		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
-		if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags))
-			clear_nlink(inode);
-	}
-	generic_drop_inode(inode);
-}
-
-/**
- * gfs2_clear_inode - Deallocate an inode when VFS is done with it
- * @inode: The VFS inode
- *
- */
-
-static void gfs2_clear_inode(struct inode *inode)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-
-	/* This tells us its a "real" inode and not one which only
-	 * serves to contain an address space (see rgrp.c, meta_io.c)
-	 * which therefore doesn't have its own glocks.
-	 */
-	if (test_bit(GIF_USER, &ip->i_flags)) {
-		ip->i_gl->gl_object = NULL;
-		gfs2_glock_put(ip->i_gl);
-		ip->i_gl = NULL;
-		if (ip->i_iopen_gh.gh_gl) {
-			ip->i_iopen_gh.gh_gl->gl_object = NULL;
-			gfs2_glock_dq_uninit(&ip->i_iopen_gh);
-		}
-	}
-}
-
-static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
-{
-	do {
-		if (d1 == d2)
-			return 1;
-		d1 = d1->d_parent;
-	} while (!IS_ROOT(d1));
-	return 0;
-}
-
-/**
- * gfs2_show_options - Show mount options for /proc/mounts
- * @s: seq_file structure
- * @mnt: vfsmount
- *
- * Returns: 0 on success or error code
- */
-
-static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
-{
-	struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
-	struct gfs2_args *args = &sdp->sd_args;
-
-	if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir))
-		seq_printf(s, ",meta");
-	if (args->ar_lockproto[0])
-		seq_printf(s, ",lockproto=%s", args->ar_lockproto);
-	if (args->ar_locktable[0])
-		seq_printf(s, ",locktable=%s", args->ar_locktable);
-	if (args->ar_hostdata[0])
-		seq_printf(s, ",hostdata=%s", args->ar_hostdata);
-	if (args->ar_spectator)
-		seq_printf(s, ",spectator");
-	if (args->ar_ignore_local_fs)
-		seq_printf(s, ",ignore_local_fs");
-	if (args->ar_localflocks)
-		seq_printf(s, ",localflocks");
-	if (args->ar_localcaching)
-		seq_printf(s, ",localcaching");
-	if (args->ar_debug)
-		seq_printf(s, ",debug");
-	if (args->ar_upgrade)
-		seq_printf(s, ",upgrade");
-	if (args->ar_posix_acl)
-		seq_printf(s, ",acl");
-	if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
-		char *state;
-		switch (args->ar_quota) {
-		case GFS2_QUOTA_OFF:
-			state = "off";
-			break;
-		case GFS2_QUOTA_ACCOUNT:
-			state = "account";
-			break;
-		case GFS2_QUOTA_ON:
-			state = "on";
-			break;
-		default:
-			state = "unknown";
-			break;
-		}
-		seq_printf(s, ",quota=%s", state);
-	}
-	if (args->ar_suiddir)
-		seq_printf(s, ",suiddir");
-	if (args->ar_data != GFS2_DATA_DEFAULT) {
-		char *state;
-		switch (args->ar_data) {
-		case GFS2_DATA_WRITEBACK:
-			state = "writeback";
-			break;
-		case GFS2_DATA_ORDERED:
-			state = "ordered";
-			break;
-		default:
-			state = "unknown";
-			break;
-		}
-		seq_printf(s, ",data=%s", state);
-	}
-	if (args->ar_discard)
-		seq_printf(s, ",discard");
-
-	return 0;
-}
-
-/*
- * We have to (at the moment) hold the inodes main lock to cover
- * the gap between unlocking the shared lock on the iopen lock and
- * taking the exclusive lock. I'd rather do a shared -> exclusive
- * conversion on the iopen lock, but we can change that later. This
- * is safe, just less efficient.
- */
-
-static void gfs2_delete_inode(struct inode *inode)
-{
-	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_holder gh;
-	int error;
-
-	if (!test_bit(GIF_USER, &ip->i_flags))
-		goto out;
-
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
-	if (unlikely(error)) {
-		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
-		goto out;
-	}
-
-	gfs2_glock_dq_wait(&ip->i_iopen_gh);
-	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
-	error = gfs2_glock_nq(&ip->i_iopen_gh);
-	if (error)
-		goto out_truncate;
-
-	if (S_ISDIR(inode->i_mode) &&
-	    (ip->i_diskflags & GFS2_DIF_EXHASH)) {
-		error = gfs2_dir_exhash_dealloc(ip);
-		if (error)
-			goto out_unlock;
-	}
-
-	if (ip->i_eattr) {
-		error = gfs2_ea_dealloc(ip);
-		if (error)
-			goto out_unlock;
-	}
-
-	if (!gfs2_is_stuffed(ip)) {
-		error = gfs2_file_dealloc(ip);
-		if (error)
-			goto out_unlock;
-	}
-
-	error = gfs2_dinode_dealloc(ip);
-	if (error)
-		goto out_unlock;
-
-out_truncate:
-	error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
-	if (error)
-		goto out_unlock;
-	/* Needs to be done before glock release & also in a transaction */
-	truncate_inode_pages(&inode->i_data, 0);
-	gfs2_trans_end(sdp);
-
-out_unlock:
-	if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
-		gfs2_glock_dq(&ip->i_iopen_gh);
-	gfs2_holder_uninit(&ip->i_iopen_gh);
-	gfs2_glock_dq_uninit(&gh);
-	if (error && error != GLR_TRYFAILED)
-		fs_warn(sdp, "gfs2_delete_inode: %d\n", error);
-out:
-	truncate_inode_pages(&inode->i_data, 0);
-	clear_inode(inode);
-}
-
-static struct inode *gfs2_alloc_inode(struct super_block *sb)
-{
-	struct gfs2_inode *ip;
-
-	ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
-	if (ip) {
-		ip->i_flags = 0;
-		ip->i_gl = NULL;
-	}
-	return &ip->i_inode;
-}
-
-static void gfs2_destroy_inode(struct inode *inode)
-{
-	kmem_cache_free(gfs2_inode_cachep, inode);
-}
-
-const struct super_operations gfs2_super_ops = {
-	.alloc_inode		= gfs2_alloc_inode,
-	.destroy_inode		= gfs2_destroy_inode,
-	.write_inode		= gfs2_write_inode,
-	.delete_inode		= gfs2_delete_inode,
-	.put_super		= gfs2_put_super,
-	.write_super		= gfs2_write_super,
-	.sync_fs		= gfs2_sync_fs,
-	.freeze_fs 		= gfs2_freeze,
-	.unfreeze_fs		= gfs2_unfreeze,
-	.statfs			= gfs2_statfs,
-	.remount_fs		= gfs2_remount_fs,
-	.clear_inode		= gfs2_clear_inode,
-	.drop_inode		= gfs2_drop_inode,
-	.show_options		= gfs2_show_options,
-};
-
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 152e6c4..2e9b932 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -60,7 +60,6 @@
 #include "super.h"
 #include "trans.h"
 #include "inode.h"
-#include "ops_address.h"
 #include "util.h"
 
 #define QUOTA_USER 1
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 247e8f7..59d2695 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -13,8 +13,7 @@
 #include <linux/buffer_head.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
-#include <linux/kthread.h>
-#include <linux/freezer.h>
+#include <linux/slow-work.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -441,18 +440,25 @@
         kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
 }
 
-/**
- * gfs2_recover_journal - recover a given journal
- * @jd: the struct gfs2_jdesc describing the journal
- *
- * Acquire the journal's lock, check to see if the journal is clean, and
- * do recovery if necessary.
- *
- * Returns: errno
- */
-
-int gfs2_recover_journal(struct gfs2_jdesc *jd)
+static int gfs2_recover_get_ref(struct slow_work *work)
 {
+	struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
+	if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
+		return -EBUSY;
+	return 0;
+}
+
+static void gfs2_recover_put_ref(struct slow_work *work)
+{
+	struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
+	clear_bit(JDF_RECOVERY, &jd->jd_flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
+}
+
+static void gfs2_recover_work(struct slow_work *work)
+{
+	struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
 	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 	struct gfs2_log_header_host head;
@@ -569,7 +575,7 @@
 		gfs2_glock_dq_uninit(&j_gh);
 
 	fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
-	return 0;
+	return;
 
 fail_gunlock_tr:
 	gfs2_glock_dq_uninit(&t_gh);
@@ -584,70 +590,28 @@
 
 fail:
 	gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
-	return error;
 }
 
-static struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp)
+struct slow_work_ops gfs2_recover_ops = {
+	.get_ref = gfs2_recover_get_ref,
+	.put_ref = gfs2_recover_put_ref,
+	.execute = gfs2_recover_work,
+};
+
+
+static int gfs2_recovery_wait(void *word)
 {
-	struct gfs2_jdesc *jd;
-	int found = 0;
-
-	spin_lock(&sdp->sd_jindex_spin);
-
-	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
-		if (jd->jd_dirty) {
-			jd->jd_dirty = 0;
-			found = 1;
-			break;
-		}
-	}
-	spin_unlock(&sdp->sd_jindex_spin);
-
-	if (!found)
-		jd = NULL;
-
-	return jd;
+	schedule();
+	return 0;
 }
 
-/**
- * gfs2_check_journals - Recover any dirty journals
- * @sdp: the filesystem
- *
- */
-
-static void gfs2_check_journals(struct gfs2_sbd *sdp)
+int gfs2_recover_journal(struct gfs2_jdesc *jd)
 {
-	struct gfs2_jdesc *jd;
-
-	for (;;) {
-		jd = gfs2_jdesc_find_dirty(sdp);
-		if (!jd)
-			break;
-
-		if (jd != sdp->sd_jdesc)
-			gfs2_recover_journal(jd);
-	}
-}
-
-/**
- * gfs2_recoverd - Recover dead machine's journals
- * @sdp: Pointer to GFS2 superblock
- *
- */
-
-int gfs2_recoverd(void *data)
-{
-	struct gfs2_sbd *sdp = data;
-	unsigned long t;
-
-	while (!kthread_should_stop()) {
-		gfs2_check_journals(sdp);
-		t = gfs2_tune_get(sdp,  gt_recoverd_secs) * HZ;
-		if (freezing(current))
-			refrigerator();
-		schedule_timeout_interruptible(t);
-	}
-
+	int rv;
+	rv = slow_work_enqueue(&jd->jd_work);
+	if (rv)
+		return rv;
+	wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE);
 	return 0;
 }
 
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index a8218ea..1616ac2 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -28,7 +28,7 @@
 extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
 		    struct gfs2_log_header_host *head);
 extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd);
-extern int gfs2_recoverd(void *data);
+extern struct slow_work_ops gfs2_recover_ops;
 
 #endif /* __RECOVERY_DOT_H__ */
 
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 5650382..daa4ae3 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -29,7 +29,7 @@
 #include "util.h"
 #include "log.h"
 #include "inode.h"
-#include "ops_address.h"
+#include "trace_gfs2.h"
 
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
@@ -442,6 +442,7 @@
 	for (x = 0; x < length; x++) {
 		bi = rgd->rd_bits + x;
 
+		bi->bi_flags = 0;
 		/* small rgrp; bitmap stored completely in header block */
 		if (length == 1) {
 			bytes = bytes_left;
@@ -580,7 +581,6 @@
 
 	rgd->rd_gl->gl_object = rgd;
 	rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
-	rgd->rd_flags |= GFS2_RDF_CHECK;
 	return error;
 }
 
@@ -701,10 +701,9 @@
 	u32 rg_flags;
 
 	rg_flags = be32_to_cpu(str->rg_flags);
-	if (rg_flags & GFS2_RGF_NOALLOC)
-		rgd->rd_flags |= GFS2_RDF_NOALLOC;
-	else
-		rgd->rd_flags &= ~GFS2_RDF_NOALLOC;
+	rg_flags &= ~GFS2_RDF_MASK;
+	rgd->rd_flags &= GFS2_RDF_MASK;
+	rgd->rd_flags |= rg_flags;
 	rgd->rd_free = be32_to_cpu(str->rg_free);
 	rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
 	rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
@@ -713,11 +712,8 @@
 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
 {
 	struct gfs2_rgrp *str = buf;
-	u32 rg_flags = 0;
 
-	if (rgd->rd_flags & GFS2_RDF_NOALLOC)
-		rg_flags |= GFS2_RGF_NOALLOC;
-	str->rg_flags = cpu_to_be32(rg_flags);
+	str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
 	str->rg_free = cpu_to_be32(rgd->rd_free);
 	str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
 	str->__pad = cpu_to_be32(0);
@@ -775,8 +771,10 @@
 	}
 
 	if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
+		for (x = 0; x < length; x++)
+			clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
 		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
-		rgd->rd_flags |= GFS2_RDF_UPTODATE;
+		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
 	}
 
 	spin_lock(&sdp->sd_rindex_spin);
@@ -845,7 +843,7 @@
 	struct super_block *sb = sdp->sd_vfs;
 	struct block_device *bdev = sb->s_bdev;
 	const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize /
-					   bdev_hardsect_size(sb->s_bdev);
+					   bdev_logical_block_size(sb->s_bdev);
 	u64 blk;
 	sector_t start = 0;
 	sector_t nr_sects = 0;
@@ -903,6 +901,7 @@
 			continue;
 		if (sdp->sd_args.ar_discard)
 			gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi);
+		clear_bit(GBF_FULL, &bi->bi_flags);
 		memcpy(bi->bi_clone + bi->bi_offset,
 		       bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);
 	}
@@ -942,7 +941,7 @@
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 	int ret = 0;
 
-	if (rgd->rd_flags & GFS2_RDF_NOALLOC)
+	if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
 		return 0;
 
 	spin_lock(&sdp->sd_rindex_spin);
@@ -1315,30 +1314,37 @@
 {
 	struct gfs2_bitmap *bi = NULL;
 	const u32 length = rgd->rd_length;
-	u32 blk = 0;
+	u32 blk = BFITNOENT;
 	unsigned int buf, x;
 	const unsigned int elen = *n;
-	const u8 *buffer;
+	const u8 *buffer = NULL;
 
 	*n = 0;
 	/* Find bitmap block that contains bits for goal block */
 	for (buf = 0; buf < length; buf++) {
 		bi = rgd->rd_bits + buf;
-		if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
-			break;
+		/* Convert scope of "goal" from rgrp-wide to within found bit block */
+		if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
+			goal -= bi->bi_start * GFS2_NBBY;
+			goto do_search;
+		}
 	}
+	buf = 0;
+	goal = 0;
 
-	gfs2_assert(rgd->rd_sbd, buf < length);
-
-	/* Convert scope of "goal" from rgrp-wide to within found bit block */
-	goal -= bi->bi_start * GFS2_NBBY;
-
+do_search:
 	/* Search (up to entire) bitmap in this rgrp for allocatable block.
 	   "x <= length", instead of "x < length", because we typically start
 	   the search in the middle of a bit block, but if we can't find an
 	   allocatable block anywhere else, we want to be able wrap around and
 	   search in the first part of our first-searched bit block.  */
 	for (x = 0; x <= length; x++) {
+		bi = rgd->rd_bits + buf;
+
+		if (test_bit(GBF_FULL, &bi->bi_flags) &&
+		    (old_state == GFS2_BLKST_FREE))
+			goto skip;
+
 		/* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
 		   bitmaps, so we must search the originals for that. */
 		buffer = bi->bi_bh->b_data + bi->bi_offset;
@@ -1349,33 +1355,39 @@
 		if (blk != BFITNOENT)
 			break;
 
+		if ((goal == 0) && (old_state == GFS2_BLKST_FREE))
+			set_bit(GBF_FULL, &bi->bi_flags);
+
 		/* Try next bitmap block (wrap back to rgrp header if at end) */
-		buf = (buf + 1) % length;
-		bi = rgd->rd_bits + buf;
+skip:
+		buf++;
+		buf %= length;
 		goal = 0;
 	}
 
-	if (blk != BFITNOENT && old_state != new_state) {
-		*n = 1;
-		gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
-		gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
-			    bi->bi_len, blk, new_state);
-		goal = blk;
-		while (*n < elen) {
-			goal++;
-			if (goal >= (bi->bi_len * GFS2_NBBY))
-				break;
-			if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
-			    GFS2_BLKST_FREE)
-				break;
-			gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone,
-				    bi->bi_offset, bi->bi_len, goal,
-				    new_state);
-			(*n)++;
-		}
-	}
+	if (blk == BFITNOENT)
+		return blk;
+	*n = 1;
+	if (old_state == new_state)
+		goto out;
 
-	return (blk == BFITNOENT) ? blk : (bi->bi_start * GFS2_NBBY) + blk;
+	gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
+	gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
+		    bi->bi_len, blk, new_state);
+	goal = blk;
+	while (*n < elen) {
+		goal++;
+		if (goal >= (bi->bi_len * GFS2_NBBY))
+			break;
+		if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
+		    GFS2_BLKST_FREE)
+			break;
+		gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
+			    bi->bi_len, goal, new_state);
+		(*n)++;
+	}
+out:
+	return (bi->bi_start * GFS2_NBBY) + blk;
 }
 
 /**
@@ -1435,13 +1447,33 @@
 }
 
 /**
- * gfs2_alloc_block - Allocate a block
- * @ip: the inode to allocate the block for
+ * gfs2_rgrp_dump - print out an rgrp
+ * @seq: The iterator
+ * @gl: The glock in question
  *
- * Returns: the allocated block
  */
 
-u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
+int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
+{
+	const struct gfs2_rgrpd *rgd = gl->gl_object;
+	if (rgd == NULL)
+		return 0;
+	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n",
+		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
+		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes);
+	return 0;
+}
+
+/**
+ * gfs2_alloc_block - Allocate one or more blocks
+ * @ip: the inode to allocate the block for
+ * @bn: Used to return the starting block number
+ * @n: requested number of blocks/extent length (value/result)
+ *
+ * Returns: 0 or error
+ */
+
+int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct buffer_head *dibh;
@@ -1457,7 +1489,10 @@
 		goal = rgd->rd_last_alloc;
 
 	blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n);
-	BUG_ON(blk == BFITNOENT);
+
+	/* Since all blocks are reserved in advance, this shouldn't happen */
+	if (blk == BFITNOENT)
+		goto rgrp_error;
 
 	rgd->rd_last_alloc = blk;
 	block = rgd->rd_data0 + blk;
@@ -1469,7 +1504,9 @@
 		di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal);
 		brelse(dibh);
 	}
-	gfs2_assert_withdraw(sdp, rgd->rd_free >= *n);
+	if (rgd->rd_free < *n)
+		goto rgrp_error;
+
 	rgd->rd_free -= *n;
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1483,8 +1520,17 @@
 	spin_lock(&sdp->sd_rindex_spin);
 	rgd->rd_free_clone -= *n;
 	spin_unlock(&sdp->sd_rindex_spin);
+	trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED);
+	*bn = block;
+	return 0;
 
-	return block;
+rgrp_error:
+	fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
+	        (unsigned long long)rgd->rd_addr);
+	fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
+	gfs2_rgrp_dump(NULL, rgd->rd_gl);
+	rgd->rd_flags |= GFS2_RDF_ERROR;
+	return -EIO;
 }
 
 /**
@@ -1526,7 +1572,7 @@
 	spin_lock(&sdp->sd_rindex_spin);
 	rgd->rd_free_clone--;
 	spin_unlock(&sdp->sd_rindex_spin);
-
+	trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE);
 	return block;
 }
 
@@ -1546,7 +1592,7 @@
 	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
 	if (!rgd)
 		return;
-
+	trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
 	rgd->rd_free += blen;
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1574,7 +1620,7 @@
 	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
 	if (!rgd)
 		return;
-
+	trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
 	rgd->rd_free += blen;
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1597,6 +1643,7 @@
 	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
 	if (!rgd)
 		return;
+	trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED);
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
 	gfs2_trans_add_rg(rgd);
@@ -1628,6 +1675,7 @@
 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
 	gfs2_free_uninit_di(rgd, ip->i_no_addr);
+	trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE);
 	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
 }
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 3181c7e..1e76ff0 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -14,22 +14,22 @@
 struct gfs2_sbd;
 struct gfs2_holder;
 
-void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
+extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
 
 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk);
 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
 
-void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
-int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh);
+extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
+extern int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh);
 
-int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd);
-void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd);
-void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd);
+extern int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd);
+extern void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd);
+extern void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd);
 
-void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd);
+extern void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd);
 
-struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
+extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
 static inline void gfs2_alloc_put(struct gfs2_inode *ip)
 {
 	BUG_ON(ip->i_alloc == NULL);
@@ -37,22 +37,22 @@
 	ip->i_alloc = NULL;
 }
 
-int gfs2_inplace_reserve_i(struct gfs2_inode *ip,
-			 char *file, unsigned int line);
+extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file,
+				  unsigned int line);
 #define gfs2_inplace_reserve(ip) \
 gfs2_inplace_reserve_i((ip), __FILE__, __LINE__)
 
-void gfs2_inplace_release(struct gfs2_inode *ip);
+extern void gfs2_inplace_release(struct gfs2_inode *ip);
 
-unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block);
+extern unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block);
 
-u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n);
-u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation);
+extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
+extern u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation);
 
-void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
-void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
-void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
-void gfs2_unlink_di(struct inode *inode);
+extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
+extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
+extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
+extern void gfs2_unlink_di(struct inode *inode);
 
 struct gfs2_rgrp_list {
 	unsigned int rl_rgrps;
@@ -61,10 +61,11 @@
 	struct gfs2_holder *rl_ghs;
 };
 
-void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
-		    u64 block);
-void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state);
-void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
-u64 gfs2_ri_total(struct gfs2_sbd *sdp);
+extern void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
+			   u64 block);
+extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state);
+extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
+extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
+extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl);
 
 #endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 601913e..0a68013 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -7,14 +7,20 @@
  * of the GNU General Public License version 2.
  */
 
+#include <linux/bio.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
-#include <linux/crc32.h>
+#include <linux/statfs.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
 #include <linux/gfs2_ondisk.h>
-#include <linux/bio.h>
+#include <linux/crc32.h>
+#include <linux/time.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -31,6 +37,183 @@
 #include "super.h"
 #include "trans.h"
 #include "util.h"
+#include "sys.h"
+#include "eattr.h"
+
+#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
+
+enum {
+	Opt_lockproto,
+	Opt_locktable,
+	Opt_hostdata,
+	Opt_spectator,
+	Opt_ignore_local_fs,
+	Opt_localflocks,
+	Opt_localcaching,
+	Opt_debug,
+	Opt_nodebug,
+	Opt_upgrade,
+	Opt_acl,
+	Opt_noacl,
+	Opt_quota_off,
+	Opt_quota_account,
+	Opt_quota_on,
+	Opt_quota,
+	Opt_noquota,
+	Opt_suiddir,
+	Opt_nosuiddir,
+	Opt_data_writeback,
+	Opt_data_ordered,
+	Opt_meta,
+	Opt_discard,
+	Opt_nodiscard,
+	Opt_commit,
+	Opt_error,
+};
+
+static const match_table_t tokens = {
+	{Opt_lockproto, "lockproto=%s"},
+	{Opt_locktable, "locktable=%s"},
+	{Opt_hostdata, "hostdata=%s"},
+	{Opt_spectator, "spectator"},
+	{Opt_ignore_local_fs, "ignore_local_fs"},
+	{Opt_localflocks, "localflocks"},
+	{Opt_localcaching, "localcaching"},
+	{Opt_debug, "debug"},
+	{Opt_nodebug, "nodebug"},
+	{Opt_upgrade, "upgrade"},
+	{Opt_acl, "acl"},
+	{Opt_noacl, "noacl"},
+	{Opt_quota_off, "quota=off"},
+	{Opt_quota_account, "quota=account"},
+	{Opt_quota_on, "quota=on"},
+	{Opt_quota, "quota"},
+	{Opt_noquota, "noquota"},
+	{Opt_suiddir, "suiddir"},
+	{Opt_nosuiddir, "nosuiddir"},
+	{Opt_data_writeback, "data=writeback"},
+	{Opt_data_ordered, "data=ordered"},
+	{Opt_meta, "meta"},
+	{Opt_discard, "discard"},
+	{Opt_nodiscard, "nodiscard"},
+	{Opt_commit, "commit=%d"},
+	{Opt_error, NULL}
+};
+
+/**
+ * gfs2_mount_args - Parse mount options
+ * @sdp:
+ * @data:
+ *
+ * Return: errno
+ */
+
+int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
+{
+	char *o;
+	int token;
+	substring_t tmp[MAX_OPT_ARGS];
+	int rv;
+
+	/* Split the options into tokens with the "," character and
+	   process them */
+
+	while (1) {
+		o = strsep(&options, ",");
+		if (o == NULL)
+			break;
+		if (*o == '\0')
+			continue;
+
+		token = match_token(o, tokens, tmp);
+		switch (token) {
+		case Opt_lockproto:
+			match_strlcpy(args->ar_lockproto, &tmp[0],
+				      GFS2_LOCKNAME_LEN);
+			break;
+		case Opt_locktable:
+			match_strlcpy(args->ar_locktable, &tmp[0],
+				      GFS2_LOCKNAME_LEN);
+			break;
+		case Opt_hostdata:
+			match_strlcpy(args->ar_hostdata, &tmp[0],
+				      GFS2_LOCKNAME_LEN);
+			break;
+		case Opt_spectator:
+			args->ar_spectator = 1;
+			break;
+		case Opt_ignore_local_fs:
+			args->ar_ignore_local_fs = 1;
+			break;
+		case Opt_localflocks:
+			args->ar_localflocks = 1;
+			break;
+		case Opt_localcaching:
+			args->ar_localcaching = 1;
+			break;
+		case Opt_debug:
+			args->ar_debug = 1;
+			break;
+		case Opt_nodebug:
+			args->ar_debug = 0;
+			break;
+		case Opt_upgrade:
+			args->ar_upgrade = 1;
+			break;
+		case Opt_acl:
+			args->ar_posix_acl = 1;
+			break;
+		case Opt_noacl:
+			args->ar_posix_acl = 0;
+			break;
+		case Opt_quota_off:
+		case Opt_noquota:
+			args->ar_quota = GFS2_QUOTA_OFF;
+			break;
+		case Opt_quota_account:
+			args->ar_quota = GFS2_QUOTA_ACCOUNT;
+			break;
+		case Opt_quota_on:
+		case Opt_quota:
+			args->ar_quota = GFS2_QUOTA_ON;
+			break;
+		case Opt_suiddir:
+			args->ar_suiddir = 1;
+			break;
+		case Opt_nosuiddir:
+			args->ar_suiddir = 0;
+			break;
+		case Opt_data_writeback:
+			args->ar_data = GFS2_DATA_WRITEBACK;
+			break;
+		case Opt_data_ordered:
+			args->ar_data = GFS2_DATA_ORDERED;
+			break;
+		case Opt_meta:
+			args->ar_meta = 1;
+			break;
+		case Opt_discard:
+			args->ar_discard = 1;
+			break;
+		case Opt_nodiscard:
+			args->ar_discard = 0;
+			break;
+		case Opt_commit:
+			rv = match_int(&tmp[0], &args->ar_commit);
+			if (rv || args->ar_commit <= 0) {
+				fs_info(sdp, "commit mount option requires a positive numeric argument\n");
+				return rv ? rv : -EINVAL;
+			}
+			break;
+		case Opt_error:
+		default:
+			fs_info(sdp, "invalid mount option: %s\n", o);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
 
 /**
  * gfs2_jindex_free - Clear all the journal index information
@@ -436,3 +619,706 @@
 	mutex_unlock(&sdp->sd_freeze_lock);
 }
 
+
+/**
+ * gfs2_write_inode - Make sure the inode is stable on the disk
+ * @inode: The inode
+ * @sync: synchronous write flag
+ *
+ * Returns: errno
+ */
+
+static int gfs2_write_inode(struct inode *inode, int sync)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	struct gfs2_holder gh;
+	struct buffer_head *bh;
+	struct timespec atime;
+	struct gfs2_dinode *di;
+	int ret = 0;
+
+	/* Check this is a "normal" inode, etc */
+	if (!test_bit(GIF_USER, &ip->i_flags) ||
+	    (current->flags & PF_MEMALLOC))
+		return 0;
+	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	if (ret)
+		goto do_flush;
+	ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
+	if (ret)
+		goto do_unlock;
+	ret = gfs2_meta_inode_buffer(ip, &bh);
+	if (ret == 0) {
+		di = (struct gfs2_dinode *)bh->b_data;
+		atime.tv_sec = be64_to_cpu(di->di_atime);
+		atime.tv_nsec = be32_to_cpu(di->di_atime_nsec);
+		if (timespec_compare(&inode->i_atime, &atime) > 0) {
+			gfs2_trans_add_bh(ip->i_gl, bh, 1);
+			gfs2_dinode_out(ip, bh->b_data);
+		}
+		brelse(bh);
+	}
+	gfs2_trans_end(sdp);
+do_unlock:
+	gfs2_glock_dq_uninit(&gh);
+do_flush:
+	if (sync != 0)
+		gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
+	return ret;
+}
+
+/**
+ * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
+ * @sdp: the filesystem
+ *
+ * Returns: errno
+ */
+
+static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
+{
+	struct gfs2_holder t_gh;
+	int error;
+
+	gfs2_quota_sync(sdp);
+	gfs2_statfs_sync(sdp);
+
+	error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
+				   &t_gh);
+	if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+		return error;
+
+	gfs2_meta_syncfs(sdp);
+	gfs2_log_shutdown(sdp);
+
+	clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+
+	if (t_gh.gh_gl)
+		gfs2_glock_dq_uninit(&t_gh);
+
+	gfs2_quota_cleanup(sdp);
+
+	return error;
+}
+
+static int gfs2_umount_recovery_wait(void *word)
+{
+	schedule();
+	return 0;
+}
+
+/**
+ * gfs2_put_super - Unmount the filesystem
+ * @sb: The VFS superblock
+ *
+ */
+
+static void gfs2_put_super(struct super_block *sb)
+{
+	struct gfs2_sbd *sdp = sb->s_fs_info;
+	int error;
+	struct gfs2_jdesc *jd;
+
+	/*  Unfreeze the filesystem, if we need to  */
+
+	mutex_lock(&sdp->sd_freeze_lock);
+	if (sdp->sd_freeze_count)
+		gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
+	mutex_unlock(&sdp->sd_freeze_lock);
+
+	/* No more recovery requests */
+	set_bit(SDF_NORECOVERY, &sdp->sd_flags);
+	smp_mb();
+
+	/* Wait on outstanding recovery */
+restart:
+	spin_lock(&sdp->sd_jindex_spin);
+	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+		if (!test_bit(JDF_RECOVERY, &jd->jd_flags))
+			continue;
+		spin_unlock(&sdp->sd_jindex_spin);
+		wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
+			    gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE);
+		goto restart;
+	}
+	spin_unlock(&sdp->sd_jindex_spin);
+
+	kthread_stop(sdp->sd_quotad_process);
+	kthread_stop(sdp->sd_logd_process);
+
+	if (!(sb->s_flags & MS_RDONLY)) {
+		error = gfs2_make_fs_ro(sdp);
+		if (error)
+			gfs2_io_error(sdp);
+	}
+	/*  At this point, we're through modifying the disk  */
+
+	/*  Release stuff  */
+
+	iput(sdp->sd_jindex);
+	iput(sdp->sd_inum_inode);
+	iput(sdp->sd_statfs_inode);
+	iput(sdp->sd_rindex);
+	iput(sdp->sd_quota_inode);
+
+	gfs2_glock_put(sdp->sd_rename_gl);
+	gfs2_glock_put(sdp->sd_trans_gl);
+
+	if (!sdp->sd_args.ar_spectator) {
+		gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
+		iput(sdp->sd_ir_inode);
+		iput(sdp->sd_sc_inode);
+		iput(sdp->sd_qc_inode);
+	}
+
+	gfs2_glock_dq_uninit(&sdp->sd_live_gh);
+	gfs2_clear_rgrpd(sdp);
+	gfs2_jindex_free(sdp);
+	/*  Take apart glock structures and buffer lists  */
+	gfs2_gl_hash_clear(sdp);
+	/*  Unmount the locking protocol  */
+	gfs2_lm_unmount(sdp);
+
+	/*  At this point, we're through participating in the lockspace  */
+	gfs2_sys_fs_del(sdp);
+}
+
+/**
+ * gfs2_sync_fs - sync the filesystem
+ * @sb: the superblock
+ *
+ * Flushes the log to disk.
+ */
+
+static int gfs2_sync_fs(struct super_block *sb, int wait)
+{
+	if (wait && sb->s_fs_info)
+		gfs2_log_flush(sb->s_fs_info, NULL);
+	return 0;
+}
+
+/**
+ * gfs2_freeze - prevent further writes to the filesystem
+ * @sb: the VFS structure for the filesystem
+ *
+ */
+
+static int gfs2_freeze(struct super_block *sb)
+{
+	struct gfs2_sbd *sdp = sb->s_fs_info;
+	int error;
+
+	if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+		return -EINVAL;
+
+	for (;;) {
+		error = gfs2_freeze_fs(sdp);
+		if (!error)
+			break;
+
+		switch (error) {
+		case -EBUSY:
+			fs_err(sdp, "waiting for recovery before freeze\n");
+			break;
+
+		default:
+			fs_err(sdp, "error freezing FS: %d\n", error);
+			break;
+		}
+
+		fs_err(sdp, "retrying...\n");
+		msleep(1000);
+	}
+	return 0;
+}
+
+/**
+ * gfs2_unfreeze - reallow writes to the filesystem
+ * @sb: the VFS structure for the filesystem
+ *
+ */
+
+static int gfs2_unfreeze(struct super_block *sb)
+{
+	gfs2_unfreeze_fs(sb->s_fs_info);
+	return 0;
+}
+
+/**
+ * statfs_fill - fill in the sg for a given RG
+ * @rgd: the RG
+ * @sc: the sc structure
+ *
+ * Returns: 0 on success, -ESTALE if the LVB is invalid
+ */
+
+static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
+			    struct gfs2_statfs_change_host *sc)
+{
+	gfs2_rgrp_verify(rgd);
+	sc->sc_total += rgd->rd_data;
+	sc->sc_free += rgd->rd_free;
+	sc->sc_dinodes += rgd->rd_dinodes;
+	return 0;
+}
+
+/**
+ * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
+ * @sdp: the filesystem
+ * @sc: the sc info that will be returned
+ *
+ * Any error (other than a signal) will cause this routine to fall back
+ * to the synchronous version.
+ *
+ * FIXME: This really shouldn't busy wait like this.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
+{
+	struct gfs2_holder ri_gh;
+	struct gfs2_rgrpd *rgd_next;
+	struct gfs2_holder *gha, *gh;
+	unsigned int slots = 64;
+	unsigned int x;
+	int done;
+	int error = 0, err;
+
+	memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
+	gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
+	if (!gha)
+		return -ENOMEM;
+
+	error = gfs2_rindex_hold(sdp, &ri_gh);
+	if (error)
+		goto out;
+
+	rgd_next = gfs2_rgrpd_get_first(sdp);
+
+	for (;;) {
+		done = 1;
+
+		for (x = 0; x < slots; x++) {
+			gh = gha + x;
+
+			if (gh->gh_gl && gfs2_glock_poll(gh)) {
+				err = gfs2_glock_wait(gh);
+				if (err) {
+					gfs2_holder_uninit(gh);
+					error = err;
+				} else {
+					if (!error)
+						error = statfs_slow_fill(
+							gh->gh_gl->gl_object, sc);
+					gfs2_glock_dq_uninit(gh);
+				}
+			}
+
+			if (gh->gh_gl)
+				done = 0;
+			else if (rgd_next && !error) {
+				error = gfs2_glock_nq_init(rgd_next->rd_gl,
+							   LM_ST_SHARED,
+							   GL_ASYNC,
+							   gh);
+				rgd_next = gfs2_rgrpd_get_next(rgd_next);
+				done = 0;
+			}
+
+			if (signal_pending(current))
+				error = -ERESTARTSYS;
+		}
+
+		if (done)
+			break;
+
+		yield();
+	}
+
+	gfs2_glock_dq_uninit(&ri_gh);
+
+out:
+	kfree(gha);
+	return error;
+}
+
+/**
+ * gfs2_statfs_i - Do a statfs
+ * @sdp: the filesystem
+ * @sg: the sg structure
+ *
+ * Returns: errno
+ */
+
+static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
+{
+	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
+
+	spin_lock(&sdp->sd_statfs_spin);
+
+	*sc = *m_sc;
+	sc->sc_total += l_sc->sc_total;
+	sc->sc_free += l_sc->sc_free;
+	sc->sc_dinodes += l_sc->sc_dinodes;
+
+	spin_unlock(&sdp->sd_statfs_spin);
+
+	if (sc->sc_free < 0)
+		sc->sc_free = 0;
+	if (sc->sc_free > sc->sc_total)
+		sc->sc_free = sc->sc_total;
+	if (sc->sc_dinodes < 0)
+		sc->sc_dinodes = 0;
+
+	return 0;
+}
+
+/**
+ * gfs2_statfs - Gather and return stats about the filesystem
+ * @sb: The superblock
+ * @statfsbuf: The buffer
+ *
+ * Returns: 0 on success or error code
+ */
+
+static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct super_block *sb = dentry->d_inode->i_sb;
+	struct gfs2_sbd *sdp = sb->s_fs_info;
+	struct gfs2_statfs_change_host sc;
+	int error;
+
+	if (gfs2_tune_get(sdp, gt_statfs_slow))
+		error = gfs2_statfs_slow(sdp, &sc);
+	else
+		error = gfs2_statfs_i(sdp, &sc);
+
+	if (error)
+		return error;
+
+	buf->f_type = GFS2_MAGIC;
+	buf->f_bsize = sdp->sd_sb.sb_bsize;
+	buf->f_blocks = sc.sc_total;
+	buf->f_bfree = sc.sc_free;
+	buf->f_bavail = sc.sc_free;
+	buf->f_files = sc.sc_dinodes + sc.sc_free;
+	buf->f_ffree = sc.sc_free;
+	buf->f_namelen = GFS2_FNAMESIZE;
+
+	return 0;
+}
+
+/**
+ * gfs2_remount_fs - called when the FS is remounted
+ * @sb:  the filesystem
+ * @flags:  the remount flags
+ * @data:  extra data passed in (not used right now)
+ *
+ * Returns: errno
+ */
+
+static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+	struct gfs2_sbd *sdp = sb->s_fs_info;
+	struct gfs2_args args = sdp->sd_args; /* Default to current settings */
+	struct gfs2_tune *gt = &sdp->sd_tune;
+	int error;
+
+	spin_lock(&gt->gt_spin);
+	args.ar_commit = gt->gt_log_flush_secs;
+	spin_unlock(&gt->gt_spin);
+	error = gfs2_mount_args(sdp, &args, data);
+	if (error)
+		return error;
+
+	/* Not allowed to change locking details */
+	if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) ||
+	    strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) ||
+	    strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata))
+		return -EINVAL;
+
+	/* Some flags must not be changed */
+	if (args_neq(&args, &sdp->sd_args, spectator) ||
+	    args_neq(&args, &sdp->sd_args, ignore_local_fs) ||
+	    args_neq(&args, &sdp->sd_args, localflocks) ||
+	    args_neq(&args, &sdp->sd_args, localcaching) ||
+	    args_neq(&args, &sdp->sd_args, meta))
+		return -EINVAL;
+
+	if (sdp->sd_args.ar_spectator)
+		*flags |= MS_RDONLY;
+
+	if ((sb->s_flags ^ *flags) & MS_RDONLY) {
+		if (*flags & MS_RDONLY)
+			error = gfs2_make_fs_ro(sdp);
+		else
+			error = gfs2_make_fs_rw(sdp);
+		if (error)
+			return error;
+	}
+
+	sdp->sd_args = args;
+	if (sdp->sd_args.ar_posix_acl)
+		sb->s_flags |= MS_POSIXACL;
+	else
+		sb->s_flags &= ~MS_POSIXACL;
+	spin_lock(&gt->gt_spin);
+	gt->gt_log_flush_secs = args.ar_commit;
+	spin_unlock(&gt->gt_spin);
+
+	return 0;
+}
+
+/**
+ * gfs2_drop_inode - Drop an inode (test for remote unlink)
+ * @inode: The inode to drop
+ *
+ * If we've received a callback on an iopen lock then its because a
+ * remote node tried to deallocate the inode but failed due to this node
+ * still having the inode open. Here we mark the link count zero
+ * since we know that it must have reached zero if the GLF_DEMOTE flag
+ * is set on the iopen glock. If we didn't do a disk read since the
+ * remote node removed the final link then we might otherwise miss
+ * this event. This check ensures that this node will deallocate the
+ * inode's blocks, or alternatively pass the baton on to another
+ * node for later deallocation.
+ */
+
+static void gfs2_drop_inode(struct inode *inode)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+
+	if (test_bit(GIF_USER, &ip->i_flags) && inode->i_nlink) {
+		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
+		if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags))
+			clear_nlink(inode);
+	}
+	generic_drop_inode(inode);
+}
+
+/**
+ * gfs2_clear_inode - Deallocate an inode when VFS is done with it
+ * @inode: The VFS inode
+ *
+ */
+
+static void gfs2_clear_inode(struct inode *inode)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+
+	/* This tells us its a "real" inode and not one which only
+	 * serves to contain an address space (see rgrp.c, meta_io.c)
+	 * which therefore doesn't have its own glocks.
+	 */
+	if (test_bit(GIF_USER, &ip->i_flags)) {
+		ip->i_gl->gl_object = NULL;
+		gfs2_glock_put(ip->i_gl);
+		ip->i_gl = NULL;
+		if (ip->i_iopen_gh.gh_gl) {
+			ip->i_iopen_gh.gh_gl->gl_object = NULL;
+			gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+		}
+	}
+}
+
+static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
+{
+	do {
+		if (d1 == d2)
+			return 1;
+		d1 = d1->d_parent;
+	} while (!IS_ROOT(d1));
+	return 0;
+}
+
+/**
+ * gfs2_show_options - Show mount options for /proc/mounts
+ * @s: seq_file structure
+ * @mnt: vfsmount
+ *
+ * Returns: 0 on success or error code
+ */
+
+static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
+{
+	struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
+	struct gfs2_args *args = &sdp->sd_args;
+	int lfsecs;
+
+	if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir))
+		seq_printf(s, ",meta");
+	if (args->ar_lockproto[0])
+		seq_printf(s, ",lockproto=%s", args->ar_lockproto);
+	if (args->ar_locktable[0])
+		seq_printf(s, ",locktable=%s", args->ar_locktable);
+	if (args->ar_hostdata[0])
+		seq_printf(s, ",hostdata=%s", args->ar_hostdata);
+	if (args->ar_spectator)
+		seq_printf(s, ",spectator");
+	if (args->ar_ignore_local_fs)
+		seq_printf(s, ",ignore_local_fs");
+	if (args->ar_localflocks)
+		seq_printf(s, ",localflocks");
+	if (args->ar_localcaching)
+		seq_printf(s, ",localcaching");
+	if (args->ar_debug)
+		seq_printf(s, ",debug");
+	if (args->ar_upgrade)
+		seq_printf(s, ",upgrade");
+	if (args->ar_posix_acl)
+		seq_printf(s, ",acl");
+	if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
+		char *state;
+		switch (args->ar_quota) {
+		case GFS2_QUOTA_OFF:
+			state = "off";
+			break;
+		case GFS2_QUOTA_ACCOUNT:
+			state = "account";
+			break;
+		case GFS2_QUOTA_ON:
+			state = "on";
+			break;
+		default:
+			state = "unknown";
+			break;
+		}
+		seq_printf(s, ",quota=%s", state);
+	}
+	if (args->ar_suiddir)
+		seq_printf(s, ",suiddir");
+	if (args->ar_data != GFS2_DATA_DEFAULT) {
+		char *state;
+		switch (args->ar_data) {
+		case GFS2_DATA_WRITEBACK:
+			state = "writeback";
+			break;
+		case GFS2_DATA_ORDERED:
+			state = "ordered";
+			break;
+		default:
+			state = "unknown";
+			break;
+		}
+		seq_printf(s, ",data=%s", state);
+	}
+	if (args->ar_discard)
+		seq_printf(s, ",discard");
+	lfsecs = sdp->sd_tune.gt_log_flush_secs;
+	if (lfsecs != 60)
+		seq_printf(s, ",commit=%d", lfsecs);
+	return 0;
+}
+
+/*
+ * We have to (at the moment) hold the inodes main lock to cover
+ * the gap between unlocking the shared lock on the iopen lock and
+ * taking the exclusive lock. I'd rather do a shared -> exclusive
+ * conversion on the iopen lock, but we can change that later. This
+ * is safe, just less efficient.
+ */
+
+static void gfs2_delete_inode(struct inode *inode)
+{
+	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder gh;
+	int error;
+
+	if (!test_bit(GIF_USER, &ip->i_flags))
+		goto out;
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	if (unlikely(error)) {
+		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+		goto out;
+	}
+
+	gfs2_glock_dq_wait(&ip->i_iopen_gh);
+	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
+	error = gfs2_glock_nq(&ip->i_iopen_gh);
+	if (error)
+		goto out_truncate;
+
+	if (S_ISDIR(inode->i_mode) &&
+	    (ip->i_diskflags & GFS2_DIF_EXHASH)) {
+		error = gfs2_dir_exhash_dealloc(ip);
+		if (error)
+			goto out_unlock;
+	}
+
+	if (ip->i_eattr) {
+		error = gfs2_ea_dealloc(ip);
+		if (error)
+			goto out_unlock;
+	}
+
+	if (!gfs2_is_stuffed(ip)) {
+		error = gfs2_file_dealloc(ip);
+		if (error)
+			goto out_unlock;
+	}
+
+	error = gfs2_dinode_dealloc(ip);
+	if (error)
+		goto out_unlock;
+
+out_truncate:
+	error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
+	if (error)
+		goto out_unlock;
+	/* Needs to be done before glock release & also in a transaction */
+	truncate_inode_pages(&inode->i_data, 0);
+	gfs2_trans_end(sdp);
+
+out_unlock:
+	if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
+		gfs2_glock_dq(&ip->i_iopen_gh);
+	gfs2_holder_uninit(&ip->i_iopen_gh);
+	gfs2_glock_dq_uninit(&gh);
+	if (error && error != GLR_TRYFAILED && error != -EROFS)
+		fs_warn(sdp, "gfs2_delete_inode: %d\n", error);
+out:
+	truncate_inode_pages(&inode->i_data, 0);
+	clear_inode(inode);
+}
+
+static struct inode *gfs2_alloc_inode(struct super_block *sb)
+{
+	struct gfs2_inode *ip;
+
+	ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
+	if (ip) {
+		ip->i_flags = 0;
+		ip->i_gl = NULL;
+	}
+	return &ip->i_inode;
+}
+
+static void gfs2_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(gfs2_inode_cachep, inode);
+}
+
+const struct super_operations gfs2_super_ops = {
+	.alloc_inode		= gfs2_alloc_inode,
+	.destroy_inode		= gfs2_destroy_inode,
+	.write_inode		= gfs2_write_inode,
+	.delete_inode		= gfs2_delete_inode,
+	.put_super		= gfs2_put_super,
+	.sync_fs		= gfs2_sync_fs,
+	.freeze_fs 		= gfs2_freeze,
+	.unfreeze_fs		= gfs2_unfreeze,
+	.statfs			= gfs2_statfs,
+	.remount_fs		= gfs2_remount_fs,
+	.clear_inode		= gfs2_clear_inode,
+	.drop_inode		= gfs2_drop_inode,
+	.show_options		= gfs2_show_options,
+};
+
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 7655f502..23419dc 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -26,6 +26,36 @@
 #include "util.h"
 #include "glops.h"
 
+struct gfs2_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct gfs2_sbd *, char *);
+	ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
+};
+
+static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr,
+			      char *buf)
+{
+	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
+	struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
+	return a->show ? a->show(sdp, buf) : 0;
+}
+
+static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr,
+			       const char *buf, size_t len)
+{
+	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
+	struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
+	return a->store ? a->store(sdp, buf, len) : len;
+}
+
+static struct sysfs_ops gfs2_attr_ops = {
+	.show  = gfs2_attr_show,
+	.store = gfs2_attr_store,
+};
+
+
+static struct kset *gfs2_kset;
+
 static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
 {
 	return snprintf(buf, PAGE_SIZE, "%u:%u\n",
@@ -212,11 +242,6 @@
 	return len;
 }
 
-struct gfs2_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct gfs2_sbd *, char *);
-	ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
-};
 
 #define GFS2_ATTR(name, mode, show, store) \
 static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
@@ -246,58 +271,11 @@
 	NULL,
 };
 
-static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr,
-			      char *buf)
-{
-	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
-	struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
-	return a->show ? a->show(sdp, buf) : 0;
-}
-
-static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr,
-			       const char *buf, size_t len)
-{
-	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
-	struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
-	return a->store ? a->store(sdp, buf, len) : len;
-}
-
-static struct sysfs_ops gfs2_attr_ops = {
-	.show  = gfs2_attr_show,
-	.store = gfs2_attr_store,
-};
-
 static struct kobj_type gfs2_ktype = {
 	.default_attrs = gfs2_attrs,
 	.sysfs_ops     = &gfs2_attr_ops,
 };
 
-static struct kset *gfs2_kset;
-
-/*
- * display struct lm_lockstruct fields
- */
-
-struct lockstruct_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct gfs2_sbd *, char *);
-};
-
-#define LOCKSTRUCT_ATTR(name, fmt)                                          \
-static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf)                 \
-{                                                                           \
-	return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_lockstruct.ls_##name); \
-}                                                                           \
-static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name)
-
-LOCKSTRUCT_ATTR(jid,      "%u\n");
-LOCKSTRUCT_ATTR(first,    "%u\n");
-
-static struct attribute *lockstruct_attrs[] = {
-	&lockstruct_attr_jid.attr,
-	&lockstruct_attr_first.attr,
-	NULL,
-};
 
 /*
  * lock_module. Originally from lock_dlm
@@ -359,34 +337,33 @@
 	return sprintf(buf, "%d\n", ls->ls_first_done);
 }
 
-static ssize_t recover_show(struct gfs2_sbd *sdp, char *buf)
+static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 {
-	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-	return sprintf(buf, "%d\n", ls->ls_recover_jid);
-}
-
-static void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
-{
+	unsigned jid;
 	struct gfs2_jdesc *jd;
+	int rv;
 
+	rv = sscanf(buf, "%u", &jid);
+	if (rv != 1)
+		return -EINVAL;
+
+	rv = -ESHUTDOWN;
 	spin_lock(&sdp->sd_jindex_spin);
+	if (test_bit(SDF_NORECOVERY, &sdp->sd_flags))
+		goto out;
+	rv = -EBUSY;
+	if (sdp->sd_jdesc->jd_jid == jid)
+		goto out;
+	rv = -ENOENT;
 	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
 		if (jd->jd_jid != jid)
 			continue;
-		jd->jd_dirty = 1;
+		rv = slow_work_enqueue(&jd->jd_work);
 		break;
 	}
+out:
 	spin_unlock(&sdp->sd_jindex_spin);
-}
-
-static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
-{
-	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-	ls->ls_recover_jid = simple_strtol(buf, NULL, 0);
-	gfs2_jdesc_make_dirty(sdp, ls->ls_recover_jid);
-	if (sdp->sd_recoverd_process)
-		wake_up_process(sdp->sd_recoverd_process);
-	return len;
+	return rv ? rv : len;
 }
 
 static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf)
@@ -401,31 +378,31 @@
 	return sprintf(buf, "%d\n", ls->ls_recover_jid_status);
 }
 
-struct gdlm_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct gfs2_sbd *sdp, char *);
-	ssize_t (*store)(struct gfs2_sbd *sdp, const char *, size_t);
-};
+static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf)
+{
+	return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid);
+}
 
 #define GDLM_ATTR(_name,_mode,_show,_store) \
-static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
+static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
 
-GDLM_ATTR(proto_name,     0444, proto_name_show,     NULL);
-GDLM_ATTR(block,          0644, block_show,          block_store);
-GDLM_ATTR(withdraw,       0644, withdraw_show,       withdraw_store);
-GDLM_ATTR(id,             0444, lkid_show,           NULL);
-GDLM_ATTR(first,          0444, lkfirst_show,        NULL);
-GDLM_ATTR(first_done,     0444, first_done_show,     NULL);
-GDLM_ATTR(recover,        0644, recover_show,        recover_store);
-GDLM_ATTR(recover_done,   0444, recover_done_show,   NULL);
-GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
+GDLM_ATTR(proto_name,     0444, proto_name_show,	NULL);
+GDLM_ATTR(block,          0644, block_show,		block_store);
+GDLM_ATTR(withdraw,       0644, withdraw_show,		withdraw_store);
+GDLM_ATTR(id,             0444, lkid_show,		NULL);
+GDLM_ATTR(jid,		  0444, jid_show,		NULL);
+GDLM_ATTR(first,          0444, lkfirst_show,		NULL);
+GDLM_ATTR(first_done,     0444, first_done_show,	NULL);
+GDLM_ATTR(recover,        0200, NULL,			recover_store);
+GDLM_ATTR(recover_done,   0444, recover_done_show,	NULL);
+GDLM_ATTR(recover_status, 0444, recover_status_show,	NULL);
 
 static struct attribute *lock_module_attrs[] = {
 	&gdlm_attr_proto_name.attr,
 	&gdlm_attr_block.attr,
 	&gdlm_attr_withdraw.attr,
 	&gdlm_attr_id.attr,
-	&lockstruct_attr_jid.attr,
+	&gdlm_attr_jid.attr,
 	&gdlm_attr_first.attr,
 	&gdlm_attr_first_done.attr,
 	&gdlm_attr_recover.attr,
@@ -435,53 +412,6 @@
 };
 
 /*
- * display struct gfs2_args fields
- */
-
-struct args_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct gfs2_sbd *, char *);
-};
-
-#define ARGS_ATTR(name, fmt)                                                \
-static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf)                 \
-{                                                                           \
-	return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_args.ar_##name);       \
-}                                                                           \
-static struct args_attr args_attr_##name = __ATTR_RO(name)
-
-ARGS_ATTR(lockproto,       "%s\n");
-ARGS_ATTR(locktable,       "%s\n");
-ARGS_ATTR(hostdata,        "%s\n");
-ARGS_ATTR(spectator,       "%d\n");
-ARGS_ATTR(ignore_local_fs, "%d\n");
-ARGS_ATTR(localcaching,    "%d\n");
-ARGS_ATTR(localflocks,     "%d\n");
-ARGS_ATTR(debug,           "%d\n");
-ARGS_ATTR(upgrade,         "%d\n");
-ARGS_ATTR(posix_acl,       "%d\n");
-ARGS_ATTR(quota,           "%u\n");
-ARGS_ATTR(suiddir,         "%d\n");
-ARGS_ATTR(data,            "%d\n");
-
-static struct attribute *args_attrs[] = {
-	&args_attr_lockproto.attr,
-	&args_attr_locktable.attr,
-	&args_attr_hostdata.attr,
-	&args_attr_spectator.attr,
-	&args_attr_ignore_local_fs.attr,
-	&args_attr_localcaching.attr,
-	&args_attr_localflocks.attr,
-	&args_attr_debug.attr,
-	&args_attr_upgrade.attr,
-	&args_attr_posix_acl.attr,
-	&args_attr_quota.attr,
-	&args_attr_suiddir.attr,
-	&args_attr_data.attr,
-	NULL,
-};
-
-/*
  * get and set struct gfs2_tune fields
  */
 
@@ -531,14 +461,8 @@
 	return len;
 }
 
-struct tune_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct gfs2_sbd *, char *);
-	ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
-};
-
 #define TUNE_ATTR_3(name, show, store)                                        \
-static struct tune_attr tune_attr_##name = __ATTR(name, 0644, show, store)
+static struct gfs2_attr tune_attr_##name = __ATTR(name, 0644, show, store)
 
 #define TUNE_ATTR_2(name, store)                                              \
 static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf)                   \
@@ -554,15 +478,6 @@
 }                                                                             \
 TUNE_ATTR_2(name, name##_store)
 
-#define TUNE_ATTR_DAEMON(name, process)                                       \
-static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
-{                                                                             \
-	ssize_t r = tune_set(sdp, &sdp->sd_tune.gt_##name, 1, buf, len);      \
-	wake_up_process(sdp->sd_##process);                                   \
-	return r;                                                             \
-}                                                                             \
-TUNE_ATTR_2(name, name##_store)
-
 TUNE_ATTR(incore_log_blocks, 0);
 TUNE_ATTR(log_flush_secs, 0);
 TUNE_ATTR(quota_warn_period, 0);
@@ -574,8 +489,6 @@
 TUNE_ATTR(quota_simul_sync, 1);
 TUNE_ATTR(stall_secs, 1);
 TUNE_ATTR(statfs_quantum, 1);
-TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
-TUNE_ATTR_DAEMON(logd_secs, logd_process);
 TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
 
 static struct attribute *tune_attrs[] = {
@@ -589,23 +502,11 @@
 	&tune_attr_quota_simul_sync.attr,
 	&tune_attr_stall_secs.attr,
 	&tune_attr_statfs_quantum.attr,
-	&tune_attr_recoverd_secs.attr,
-	&tune_attr_logd_secs.attr,
 	&tune_attr_quota_scale.attr,
 	&tune_attr_new_files_jdata.attr,
 	NULL,
 };
 
-static struct attribute_group lockstruct_group = {
-	.name = "lockstruct",
-	.attrs = lockstruct_attrs,
-};
-
-static struct attribute_group args_group = {
-	.name = "args",
-	.attrs = args_attrs,
-};
-
 static struct attribute_group tune_group = {
 	.name = "tune",
 	.attrs = tune_attrs,
@@ -626,17 +527,9 @@
 	if (error)
 		goto fail;
 
-	error = sysfs_create_group(&sdp->sd_kobj, &lockstruct_group);
-	if (error)
-		goto fail_reg;
-
-	error = sysfs_create_group(&sdp->sd_kobj, &args_group);
-	if (error)
-		goto fail_lockstruct;
-
 	error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
 	if (error)
-		goto fail_args;
+		goto fail_reg;
 
 	error = sysfs_create_group(&sdp->sd_kobj, &lock_module_group);
 	if (error)
@@ -647,10 +540,6 @@
 
 fail_tune:
 	sysfs_remove_group(&sdp->sd_kobj, &tune_group);
-fail_args:
-	sysfs_remove_group(&sdp->sd_kobj, &args_group);
-fail_lockstruct:
-	sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
 fail_reg:
 	kobject_put(&sdp->sd_kobj);
 fail:
@@ -661,8 +550,6 @@
 void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
 {
 	sysfs_remove_group(&sdp->sd_kobj, &tune_group);
-	sysfs_remove_group(&sdp->sd_kobj, &args_group);
-	sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
 	sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
 	kobject_put(&sdp->sd_kobj);
 }
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
new file mode 100644
index 0000000..98d6ef1
--- /dev/null
+++ b/fs/gfs2/trace_gfs2.h
@@ -0,0 +1,407 @@
+#if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_GFS2_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gfs2
+#define TRACE_INCLUDE_FILE trace_gfs2
+
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/dlmconstants.h>
+#include <linux/gfs2_ondisk.h>
+#include "incore.h"
+#include "glock.h"
+
+#define dlm_state_name(nn) { DLM_LOCK_##nn, #nn }
+#define glock_trace_name(x) __print_symbolic(x,		\
+			    dlm_state_name(IV),		\
+			    dlm_state_name(NL),		\
+			    dlm_state_name(CR),		\
+			    dlm_state_name(CW),		\
+			    dlm_state_name(PR),		\
+			    dlm_state_name(PW),		\
+			    dlm_state_name(EX))
+
+#define block_state_name(x) __print_symbolic(x,			\
+			    { GFS2_BLKST_FREE, "free" },	\
+			    { GFS2_BLKST_USED, "used" },	\
+			    { GFS2_BLKST_DINODE, "dinode" },	\
+			    { GFS2_BLKST_UNLINKED, "unlinked" })
+
+#define show_glock_flags(flags) __print_flags(flags, "",	\
+	{(1UL << GLF_LOCK),			"l" },		\
+	{(1UL << GLF_DEMOTE),			"D" },		\
+	{(1UL << GLF_PENDING_DEMOTE),		"d" },		\
+	{(1UL << GLF_DEMOTE_IN_PROGRESS),	"p" },		\
+	{(1UL << GLF_DIRTY),			"y" },		\
+	{(1UL << GLF_LFLUSH),			"f" },		\
+	{(1UL << GLF_INVALIDATE_IN_PROGRESS),	"i" },		\
+	{(1UL << GLF_REPLY_PENDING),		"r" },		\
+	{(1UL << GLF_INITIAL),			"I" },		\
+	{(1UL << GLF_FROZEN),			"F" })
+
+#ifndef NUMPTY
+#define NUMPTY
+static inline u8 glock_trace_state(unsigned int state)
+{
+	switch(state) {
+	case LM_ST_SHARED:
+		return DLM_LOCK_PR;
+	case LM_ST_DEFERRED:
+		return DLM_LOCK_CW;
+	case LM_ST_EXCLUSIVE:
+		return DLM_LOCK_EX;
+	}
+	return DLM_LOCK_NL;
+}
+#endif
+
+/* Section 1 - Locking
+ *
+ * Objectives:
+ * Latency: Remote demote request to state change
+ * Latency: Local lock request to state change
+ * Latency: State change to lock grant
+ * Correctness: Ordering of local lock state vs. I/O requests
+ * Correctness: Responses to remote demote requests
+ */
+
+/* General glock state change (DLM lock request completes) */
+TRACE_EVENT(gfs2_glock_state_change,
+
+	TP_PROTO(const struct gfs2_glock *gl, unsigned int new_state),
+
+	TP_ARGS(gl, new_state),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	u64,	glnum			)
+		__field(	u32,	gltype			)
+		__field(	u8,	cur_state		)
+		__field(	u8,	new_state		)
+		__field(	u8,	dmt_state		)
+		__field(	u8,	tgt_state		)
+		__field(	unsigned long,	flags		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= gl->gl_sbd->sd_vfs->s_dev;
+		__entry->glnum		= gl->gl_name.ln_number;
+		__entry->gltype		= gl->gl_name.ln_type;
+		__entry->cur_state	= glock_trace_state(gl->gl_state);
+		__entry->new_state	= glock_trace_state(new_state);
+		__entry->tgt_state	= glock_trace_state(gl->gl_target);
+		__entry->dmt_state	= glock_trace_state(gl->gl_demote_state);
+		__entry->flags		= gl->gl_flags;
+	),
+
+	TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+		 (unsigned long long)__entry->glnum,
+		  glock_trace_name(__entry->cur_state),
+		  glock_trace_name(__entry->new_state),
+		  glock_trace_name(__entry->tgt_state),
+		  glock_trace_name(__entry->dmt_state),
+		  show_glock_flags(__entry->flags))
+);
+
+/* State change -> unlocked, glock is being deallocated */
+TRACE_EVENT(gfs2_glock_put,
+
+	TP_PROTO(const struct gfs2_glock *gl),
+
+	TP_ARGS(gl),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	glnum			)
+		__field(	u32,	gltype			)
+		__field(	u8,	cur_state		)
+		__field(	unsigned long,	flags		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= gl->gl_sbd->sd_vfs->s_dev;
+		__entry->gltype		= gl->gl_name.ln_type;
+		__entry->glnum		= gl->gl_name.ln_number;
+		__entry->cur_state	= glock_trace_state(gl->gl_state);
+		__entry->flags		= gl->gl_flags;
+	),
+
+	TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->gltype, (unsigned long long)__entry->glnum,
+                  glock_trace_name(__entry->cur_state),
+		  glock_trace_name(DLM_LOCK_IV),
+		  show_glock_flags(__entry->flags))
+
+);
+
+/* Callback (local or remote) requesting lock demotion */
+TRACE_EVENT(gfs2_demote_rq,
+
+	TP_PROTO(const struct gfs2_glock *gl),
+
+	TP_ARGS(gl),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	glnum			)
+		__field(	u32,	gltype			)
+		__field(	u8,	cur_state		)
+		__field(	u8,	dmt_state		)
+		__field(	unsigned long,	flags		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= gl->gl_sbd->sd_vfs->s_dev;
+		__entry->gltype		= gl->gl_name.ln_type;
+		__entry->glnum		= gl->gl_name.ln_number;
+		__entry->cur_state	= glock_trace_state(gl->gl_state);
+		__entry->dmt_state	= glock_trace_state(gl->gl_demote_state);
+		__entry->flags		= gl->gl_flags;
+	),
+
+	TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+		  (unsigned long long)__entry->glnum,
+                  glock_trace_name(__entry->cur_state),
+                  glock_trace_name(__entry->dmt_state),
+		  show_glock_flags(__entry->flags))
+
+);
+
+/* Promotion/grant of a glock */
+TRACE_EVENT(gfs2_promote,
+
+	TP_PROTO(const struct gfs2_holder *gh, int first),
+
+	TP_ARGS(gh, first),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	glnum			)
+		__field(	u32,	gltype			)
+		__field(	int,	first			)
+		__field(	u8,	state			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= gh->gh_gl->gl_sbd->sd_vfs->s_dev;
+		__entry->glnum	= gh->gh_gl->gl_name.ln_number;
+		__entry->gltype	= gh->gh_gl->gl_name.ln_type;
+		__entry->first	= first;
+		__entry->state	= glock_trace_state(gh->gh_state);
+	),
+
+	TP_printk("%u,%u glock %u:%llu promote %s %s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+		  (unsigned long long)__entry->glnum,
+		  __entry->first ? "first": "other",
+		  glock_trace_name(__entry->state))
+);
+
+/* Queue/dequeue a lock request */
+TRACE_EVENT(gfs2_glock_queue,
+
+	TP_PROTO(const struct gfs2_holder *gh, int queue),
+
+	TP_ARGS(gh, queue),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	glnum			)
+		__field(	u32,	gltype			)
+		__field(	int,	queue			)
+		__field(	u8,	state			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= gh->gh_gl->gl_sbd->sd_vfs->s_dev;
+		__entry->glnum	= gh->gh_gl->gl_name.ln_number;
+		__entry->gltype	= gh->gh_gl->gl_name.ln_type;
+		__entry->queue	= queue;
+		__entry->state	= glock_trace_state(gh->gh_state);
+	),
+
+	TP_printk("%u,%u glock %u:%llu %squeue %s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+		  (unsigned long long)__entry->glnum,
+		  __entry->queue ? "" : "de",
+		  glock_trace_name(__entry->state))
+);
+
+/* Section 2 - Log/journal
+ *
+ * Objectives:
+ * Latency: Log flush time
+ * Correctness: pin/unpin vs. disk I/O ordering
+ * Performance: Log usage stats
+ */
+
+/* Pin/unpin a block in the log */
+TRACE_EVENT(gfs2_pin,
+
+	TP_PROTO(const struct gfs2_bufdata *bd, int pin),
+
+	TP_ARGS(bd, pin),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	int,	pin			)
+		__field(	u32,	len			)
+		__field(	sector_t,	block		)
+		__field(	u64,	ino			)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bd->bd_gl->gl_sbd->sd_vfs->s_dev;
+		__entry->pin		= pin;
+		__entry->len		= bd->bd_bh->b_size;
+		__entry->block		= bd->bd_bh->b_blocknr;
+		__entry->ino		= bd->bd_gl->gl_name.ln_number;
+	),
+
+	TP_printk("%u,%u log %s %llu/%lu inode %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->pin ? "pin" : "unpin",
+		  (unsigned long long)__entry->block,
+		  (unsigned long)__entry->len,
+		  (unsigned long long)__entry->ino)
+);
+
+/* Flushing the log */
+TRACE_EVENT(gfs2_log_flush,
+
+	TP_PROTO(const struct gfs2_sbd *sdp, int start),
+
+	TP_ARGS(sdp, start),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	int,	start			)
+		__field(	u64,	log_seq			)
+	),
+
+	TP_fast_assign(
+		__entry->dev            = sdp->sd_vfs->s_dev;
+		__entry->start		= start;
+		__entry->log_seq	= sdp->sd_log_sequence;
+	),
+
+	TP_printk("%u,%u log flush %s %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->start ? "start" : "end",
+		  (unsigned long long)__entry->log_seq)
+);
+
+/* Reserving/releasing blocks in the log */
+TRACE_EVENT(gfs2_log_blocks,
+
+	TP_PROTO(const struct gfs2_sbd *sdp, int blocks),
+
+	TP_ARGS(sdp, blocks),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	int,	blocks			)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= sdp->sd_vfs->s_dev;
+		__entry->blocks		= blocks;
+	),
+
+	TP_printk("%u,%u log reserve %d", MAJOR(__entry->dev),
+		  MINOR(__entry->dev), __entry->blocks)
+);
+
+/* Section 3 - bmap
+ *
+ * Objectives:
+ * Latency: Bmap request time
+ * Performance: Block allocator tracing
+ * Correctness: Test of disard generation vs. blocks allocated
+ */
+
+/* Map an extent of blocks, possibly a new allocation */
+TRACE_EVENT(gfs2_bmap,
+
+	TP_PROTO(const struct gfs2_inode *ip, const struct buffer_head *bh,
+		sector_t lblock, int create, int errno),
+
+	TP_ARGS(ip, bh, lblock, create, errno),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	sector_t, lblock		)
+		__field(	sector_t, pblock		)
+		__field(	u64,	inum			)
+		__field(	unsigned long, state		)
+		__field(	u32,	len			)
+		__field(	int,	create			)
+		__field(	int,	errno			)
+	),
+
+	TP_fast_assign(
+		__entry->dev            = ip->i_gl->gl_sbd->sd_vfs->s_dev;
+		__entry->lblock		= lblock;
+		__entry->pblock		= buffer_mapped(bh) ?  bh->b_blocknr : 0;
+		__entry->inum		= ip->i_no_addr;
+		__entry->state		= bh->b_state;
+		__entry->len		= bh->b_size;
+		__entry->create		= create;
+		__entry->errno		= errno;
+	),
+
+	TP_printk("%u,%u bmap %llu map %llu/%lu to %llu flags:%08lx %s %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long long)__entry->inum,
+		  (unsigned long long)__entry->lblock,
+		  (unsigned long)__entry->len,
+		  (unsigned long long)__entry->pblock,
+		  __entry->state, __entry->create ? "create " : "nocreate",
+		  __entry->errno)
+);
+
+/* Keep track of blocks as they are allocated/freed */
+TRACE_EVENT(gfs2_block_alloc,
+
+	TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len,
+		u8 block_state),
+
+	TP_ARGS(ip, block, len, block_state),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	start			)
+		__field(	u64,	inum			)
+		__field(	u32,	len			)
+		__field(	u8,	block_state		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= ip->i_gl->gl_sbd->sd_vfs->s_dev;
+		__entry->start		= block;
+		__entry->inum		= ip->i_no_addr;
+		__entry->len		= len;
+		__entry->block_state	= block_state;
+	),
+
+	TP_printk("%u,%u bmap %llu alloc %llu/%lu %s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long long)__entry->inum,
+		  (unsigned long long)__entry->start,
+		  (unsigned long)__entry->len,
+		  block_state_name(__entry->block_state))
+);
+
+#endif /* _TRACE_GFS2_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
+
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 053752d..4ef0e9f 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -33,6 +33,9 @@
 	BUG_ON(current->journal_info);
 	BUG_ON(blocks == 0 && revokes == 0);
 
+	if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+		return -EROFS;
+
 	tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS);
 	if (!tr)
 		return -ENOMEM;
@@ -54,12 +57,6 @@
 	if (error)
 		goto fail_holder_uninit;
 
-	if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
-		tr->tr_t_gh.gh_flags |= GL_NOCACHE;
-		error = -EROFS;
-		goto fail_gunlock;
-	}
-
 	error = gfs2_log_reserve(sdp, tr->tr_reserved);
 	if (error)
 		goto fail_gunlock;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index a36bb74..6f833dc 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -49,11 +49,23 @@
  */
 static void hfs_write_super(struct super_block *sb)
 {
+	lock_super(sb);
 	sb->s_dirt = 0;
-	if (sb->s_flags & MS_RDONLY)
-		return;
+
 	/* sync everything to the buffers */
+	if (!(sb->s_flags & MS_RDONLY))
+		hfs_mdb_commit(sb);
+	unlock_super(sb);
+}
+
+static int hfs_sync_fs(struct super_block *sb, int wait)
+{
+	lock_super(sb);
 	hfs_mdb_commit(sb);
+	sb->s_dirt = 0;
+	unlock_super(sb);
+
+	return 0;
 }
 
 /*
@@ -65,9 +77,15 @@
  */
 static void hfs_put_super(struct super_block *sb)
 {
+	lock_kernel();
+
+	if (sb->s_dirt)
+		hfs_write_super(sb);
 	hfs_mdb_close(sb);
 	/* release the MDB's resources */
 	hfs_mdb_put(sb);
+
+	unlock_kernel();
 }
 
 /*
@@ -164,6 +182,7 @@
 	.clear_inode	= hfs_clear_inode,
 	.put_super	= hfs_put_super,
 	.write_super	= hfs_write_super,
+	.sync_fs	= hfs_sync_fs,
 	.statfs		= hfs_statfs,
 	.remount_fs     = hfs_remount,
 	.show_options	= hfs_show_options,
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index f2a6402..9fc3af0c 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -152,15 +152,14 @@
 	}
 }
 
-static void hfsplus_write_super(struct super_block *sb)
+static int hfsplus_sync_fs(struct super_block *sb, int wait)
 {
 	struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
 
 	dprint(DBG_SUPER, "hfsplus_write_super\n");
+
+	lock_super(sb);
 	sb->s_dirt = 0;
-	if (sb->s_flags & MS_RDONLY)
-		/* warn? */
-		return;
 
 	vhdr->free_blocks = cpu_to_be32(HFSPLUS_SB(sb).free_blocks);
 	vhdr->next_alloc = cpu_to_be32(HFSPLUS_SB(sb).next_alloc);
@@ -192,6 +191,16 @@
 		}
 		HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP;
 	}
+	unlock_super(sb);
+	return 0;
+}
+
+static void hfsplus_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		hfsplus_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
 }
 
 static void hfsplus_put_super(struct super_block *sb)
@@ -199,6 +208,11 @@
 	dprint(DBG_SUPER, "hfsplus_put_super\n");
 	if (!sb->s_fs_info)
 		return;
+
+	lock_kernel();
+
+	if (sb->s_dirt)
+		hfsplus_write_super(sb);
 	if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) {
 		struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
 
@@ -218,6 +232,8 @@
 		unload_nls(HFSPLUS_SB(sb).nls);
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
+
+	unlock_kernel();
 }
 
 static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -279,6 +295,7 @@
 	.clear_inode	= hfsplus_clear_inode,
 	.put_super	= hfsplus_put_super,
 	.write_super	= hfsplus_write_super,
+	.sync_fs	= hfsplus_sync_fs,
 	.statfs		= hfsplus_statfs,
 	.remount_fs	= hfsplus_remount,
 	.show_options	= hfsplus_show_options,
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index fc77965..f2feaa0 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -13,6 +13,7 @@
 #include <linux/statfs.h>
 #include <linux/magic.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 
 /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
 
@@ -99,11 +100,16 @@
 static void hpfs_put_super(struct super_block *s)
 {
 	struct hpfs_sb_info *sbi = hpfs_sb(s);
+
+	lock_kernel();
+
 	kfree(sbi->sb_cp_table);
 	kfree(sbi->sb_bmp_dir);
 	unmark_dirty(s);
 	s->s_fs_info = NULL;
 	kfree(sbi);
+
+	unlock_kernel();
 }
 
 unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno)
@@ -393,6 +399,8 @@
 	
 	*flags |= MS_NOATIME;
 	
+	lock_kernel();
+	lock_super(s);
 	uid = sbi->sb_uid; gid = sbi->sb_gid;
 	umask = 0777 & ~sbi->sb_mode;
 	lowercase = sbi->sb_lowercase; conv = sbi->sb_conv;
@@ -425,9 +433,13 @@
 
 	replace_mount_options(s, new_opts);
 
+	unlock_super(s);
+	unlock_kernel();
 	return 0;
 
 out_err:
+	unlock_super(s);
+	unlock_kernel();
 	kfree(new_opts);
 	return -EINVAL;
 }
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c1462d4..941c842 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -30,6 +30,7 @@
 #include <linux/dnotify.h>
 #include <linux/statfs.h>
 #include <linux/security.h>
+#include <linux/ima.h>
 
 #include <asm/uaccess.h>
 
@@ -986,6 +987,7 @@
 			&hugetlbfs_file_operations);
 	if (!file)
 		goto out_dentry; /* inode is already attached */
+	ima_counts_get(file);
 
 	return file;
 
diff --git a/fs/inode.c b/fs/inode.c
index 0571983..a88baeb 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -22,6 +22,7 @@
 #include <linux/cdev.h>
 #include <linux/bootmem.h>
 #include <linux/inotify.h>
+#include <linux/fsnotify.h>
 #include <linux/mount.h>
 #include <linux/async.h>
 
@@ -189,6 +190,10 @@
 	inode->i_private = NULL;
 	inode->i_mapping = mapping;
 
+#ifdef CONFIG_FSNOTIFY
+	inode->i_fsnotify_mask = 0;
+#endif
+
 	return inode;
 
 out_free_security:
@@ -219,7 +224,9 @@
 void destroy_inode(struct inode *inode)
 {
 	BUG_ON(inode_has_buffers(inode));
+	ima_inode_free(inode);
 	security_inode_free(inode);
+	fsnotify_inode_delete(inode);
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
 	else
@@ -251,6 +258,9 @@
 	INIT_LIST_HEAD(&inode->inotify_watches);
 	mutex_init(&inode->inotify_mutex);
 #endif
+#ifdef CONFIG_FSNOTIFY
+	INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries);
+#endif
 }
 EXPORT_SYMBOL(inode_init_once);
 
@@ -397,6 +407,7 @@
 	mutex_lock(&iprune_mutex);
 	spin_lock(&inode_lock);
 	inotify_unmount_inodes(&sb->s_inodes);
+	fsnotify_unmount_inodes(&sb->s_inodes);
 	busy = invalidate_list(&sb->s_inodes, &throw_away);
 	spin_unlock(&inode_lock);
 
@@ -1053,13 +1064,22 @@
 	struct super_block *sb = inode->i_sb;
 	ino_t ino = inode->i_ino;
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
-	struct inode *old;
 
 	inode->i_state |= I_LOCK|I_NEW;
 	while (1) {
+		struct hlist_node *node;
+		struct inode *old = NULL;
 		spin_lock(&inode_lock);
-		old = find_inode_fast(sb, head, ino);
-		if (likely(!old)) {
+		hlist_for_each_entry(old, node, head, i_hash) {
+			if (old->i_ino != ino)
+				continue;
+			if (old->i_sb != sb)
+				continue;
+			if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+				continue;
+			break;
+		}
+		if (likely(!node)) {
 			hlist_add_head(&inode->i_hash, head);
 			spin_unlock(&inode_lock);
 			return 0;
@@ -1081,14 +1101,24 @@
 {
 	struct super_block *sb = inode->i_sb;
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
-	struct inode *old;
 
 	inode->i_state |= I_LOCK|I_NEW;
 
 	while (1) {
+		struct hlist_node *node;
+		struct inode *old = NULL;
+
 		spin_lock(&inode_lock);
-		old = find_inode(sb, head, test, data);
-		if (likely(!old)) {
+		hlist_for_each_entry(old, node, head, i_hash) {
+			if (old->i_sb != sb)
+				continue;
+			if (!test(old, data))
+				continue;
+			if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+				continue;
+			break;
+		}
+		if (likely(!node)) {
 			hlist_add_head(&inode->i_hash, head);
 			spin_unlock(&inode_lock);
 			return 0;
@@ -1392,7 +1422,7 @@
 	if (IS_NOCMTIME(inode))
 		return;
 
-	err = mnt_want_write(file->f_path.mnt);
+	err = mnt_want_write_file(file);
 	if (err)
 		return;
 
diff --git a/fs/internal.h b/fs/internal.h
index b4dac4f..d55ef56 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -25,6 +25,8 @@
 	return sb == blockdev_superblock;
 }
 
+extern int __sync_blockdev(struct block_device *bdev, int wait);
+
 #else
 static inline void bdev_cache_init(void)
 {
@@ -34,6 +36,11 @@
 {
 	return 0;
 }
+
+static inline int __sync_blockdev(struct block_device *bdev, int wait)
+{
+	return 0;
+}
 #endif
 
 /*
@@ -66,3 +73,13 @@
  * fs_struct.c
  */
 extern void chroot_fs_refs(struct path *, struct path *);
+
+/*
+ * file_table.c
+ */
+extern void mark_files_ro(struct super_block *);
+
+/*
+ * super.c
+ */
+extern int do_remount_sb(struct super_block *, int, void *, int);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 82d9c42..286f38d 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -414,10 +414,6 @@
 	switch (cmd) {
 	case FIBMAP:
 		return ioctl_fibmap(filp, p);
-	case FS_IOC_FIEMAP:
-		return ioctl_fiemap(filp, arg);
-	case FIGETBSZ:
-		return put_user(inode->i_sb->s_blocksize, p);
 	case FIONREAD:
 		return put_user(i_size_read(inode) - filp->f_pos, p);
 	}
@@ -557,6 +553,16 @@
 		error = ioctl_fsthaw(filp);
 		break;
 
+	case FS_IOC_FIEMAP:
+		return ioctl_fiemap(filp, arg);
+
+	case FIGETBSZ:
+	{
+		struct inode *inode = filp->f_path.dentry->d_inode;
+		int __user *p = (int __user *)arg;
+		return put_user(inode->i_sb->s_blocksize, p);
+	}
+
 	default:
 		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
 			error = file_ioctl(filp, cmd, arg);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index b4cbe96..068b34b 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -42,11 +42,16 @@
 static void isofs_put_super(struct super_block *sb)
 {
 	struct isofs_sb_info *sbi = ISOFS_SB(sb);
+
 #ifdef CONFIG_JOLIET
+	lock_kernel();
+
 	if (sbi->s_nls_iocharset) {
 		unload_nls(sbi->s_nls_iocharset);
 		sbi->s_nls_iocharset = NULL;
 	}
+
+	unlock_kernel();
 #endif
 
 	kfree(sbi);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 06560c5..618e21c 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -241,7 +241,7 @@
 			spin_lock(&journal->j_list_lock);
 		}
 		/* Someone already cleaned up the buffer? */
-		if (!buffer_jbd(bh)
+		if (!buffer_jbd(bh) || bh2jh(bh) != jh
 			|| jh->b_transaction != commit_transaction
 			|| jh->b_jlist != BJ_SyncData) {
 			jbd_unlock_bh_state(bh);
@@ -478,7 +478,9 @@
 			spin_lock(&journal->j_list_lock);
 			continue;
 		}
-		if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
+		if (buffer_jbd(bh) && bh2jh(bh) == jh &&
+		    jh->b_transaction == commit_transaction &&
+		    jh->b_jlist == BJ_Locked) {
 			__journal_unfile_buffer(jh);
 			jbd_unlock_bh_state(bh);
 			journal_remove_journal_head(bh);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 5814410..62be7d2 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1781,7 +1781,7 @@
  * Journal abort has very specific semantics, which we describe
  * for journal abort.
  *
- * Two internal function, which provide abort to te jbd layer
+ * Two internal functions, which provide abort to the jbd layer
  * itself are here.
  */
 
@@ -1879,7 +1879,7 @@
  * int jbd2_journal_errno () - returns the journal's error state.
  * @journal: journal to examine.
  *
- * This is the errno numbet set with jbd2_journal_abort(), the last
+ * This is the errno number set with jbd2_journal_abort(), the last
  * time the journal was mounted - if the journal was stopped
  * without calling abort this will be 0.
  *
@@ -1903,7 +1903,7 @@
  * int jbd2_journal_clear_err () - clears the journal's error state
  * @journal: journal to act on.
  *
- * An error must be cleared or Acked to take a FS out of readonly
+ * An error must be cleared or acked to take a FS out of readonly
  * mode.
  */
 int jbd2_journal_clear_err(journal_t *journal)
@@ -1923,7 +1923,7 @@
  * void jbd2_journal_ack_err() - Ack journal err.
  * @journal: journal to act on.
  *
- * An error must be cleared or Acked to take a FS out of readonly
+ * An error must be cleared or acked to take a FS out of readonly
  * mode.
  */
 void jbd2_journal_ack_err(journal_t *journal)
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 249305d..3451a81 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -20,6 +20,7 @@
 #include <linux/vmalloc.h>
 #include <linux/vfs.h>
 #include <linux/crc32.h>
+#include <linux/smp_lock.h>
 #include "nodelist.h"
 
 static int jffs2_flash_setup(struct jffs2_sb_info *c);
@@ -387,6 +388,7 @@
 	   This also catches the case where it was stopped and this
 	   is just a remount to restart it.
 	   Flush the writebuffer, if neccecary, else we loose it */
+	lock_kernel();
 	if (!(sb->s_flags & MS_RDONLY)) {
 		jffs2_stop_garbage_collect_thread(c);
 		mutex_lock(&c->alloc_sem);
@@ -399,24 +401,10 @@
 
 	*flags |= MS_NOATIME;
 
+	unlock_kernel();
 	return 0;
 }
 
-void jffs2_write_super (struct super_block *sb)
-{
-	struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
-	sb->s_dirt = 0;
-
-	if (sb->s_flags & MS_RDONLY)
-		return;
-
-	D1(printk(KERN_DEBUG "jffs2_write_super()\n"));
-	jffs2_garbage_collect_trigger(c);
-	jffs2_erase_pending_blocks(c, 0);
-	jffs2_flush_wbuf_gc(c, 0);
-}
-
-
 /* jffs2_new_inode: allocate a new inode and inocache, add it to the hash,
    fill in the raw_inode while you're at it. */
 struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri)
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 5e194a5..2228380 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -181,7 +181,6 @@
 struct inode *jffs2_new_inode (struct inode *dir_i, int mode,
 			       struct jffs2_raw_inode *ri);
 int jffs2_statfs (struct dentry *, struct kstatfs *);
-void jffs2_write_super (struct super_block *);
 int jffs2_remount_fs (struct super_block *, int *, char *);
 int jffs2_do_fill_super(struct super_block *sb, void *data, int silent);
 void jffs2_gc_release_inode(struct jffs2_sb_info *c,
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 4c4e18c..07a22ca 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -53,10 +53,29 @@
 	inode_init_once(&f->vfs_inode);
 }
 
+static void jffs2_write_super(struct super_block *sb)
+{
+	struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
+
+	lock_super(sb);
+	sb->s_dirt = 0;
+
+	if (!(sb->s_flags & MS_RDONLY)) {
+		D1(printk(KERN_DEBUG "jffs2_write_super()\n"));
+		jffs2_garbage_collect_trigger(c);
+		jffs2_erase_pending_blocks(c, 0);
+		jffs2_flush_wbuf_gc(c, 0);
+	}
+
+	unlock_super(sb);
+}
+
 static int jffs2_sync_fs(struct super_block *sb, int wait)
 {
 	struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
 
+	jffs2_write_super(sb);
+
 	mutex_lock(&c->alloc_sem);
 	jffs2_flush_wbuf_pad(c);
 	mutex_unlock(&c->alloc_sem);
@@ -174,6 +193,11 @@
 
 	D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n"));
 
+	lock_kernel();
+
+	if (sb->s_dirt)
+		jffs2_write_super(sb);
+
 	mutex_lock(&c->alloc_sem);
 	jffs2_flush_wbuf_pad(c);
 	mutex_unlock(&c->alloc_sem);
@@ -192,6 +216,8 @@
 	if (c->mtd->sync)
 		c->mtd->sync(c->mtd);
 
+	unlock_kernel();
+
 	D1(printk(KERN_DEBUG "jffs2_put_super returning\n"));
 }
 
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 3460572..0fc3040 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -2571,6 +2571,7 @@
 
 			txAbort(tid, 0);
 			txEnd(tid);
+			mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
 
 			/* release the inode map lock */
 			IWRITE_UNLOCK(ipimap);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 6f21adf..09b1b6e 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -32,6 +32,7 @@
 #include <linux/crc32.h>
 #include <asm/uaccess.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
@@ -183,6 +184,9 @@
 	int rc;
 
 	jfs_info("In jfs_put_super");
+
+	lock_kernel();
+
 	rc = jfs_umount(sb);
 	if (rc)
 		jfs_err("jfs_umount failed with return code %d", rc);
@@ -195,6 +199,8 @@
 	sbi->direct_inode = NULL;
 
 	kfree(sbi);
+
+	unlock_kernel();
 }
 
 enum {
@@ -370,19 +376,24 @@
 	s64 newLVSize = 0;
 	int rc = 0;
 	int flag = JFS_SBI(sb)->flag;
+	int ret;
 
 	if (!parse_options(data, sb, &newLVSize, &flag)) {
 		return -EINVAL;
 	}
+	lock_kernel();
 	if (newLVSize) {
 		if (sb->s_flags & MS_RDONLY) {
 			printk(KERN_ERR
 		  "JFS: resize requires volume to be mounted read-write\n");
+			unlock_kernel();
 			return -EROFS;
 		}
 		rc = jfs_extendfs(sb, newLVSize, 0);
-		if (rc)
+		if (rc) {
+			unlock_kernel();
 			return rc;
+		}
 	}
 
 	if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
@@ -393,23 +404,31 @@
 		truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0);
 
 		JFS_SBI(sb)->flag = flag;
-		return jfs_mount_rw(sb, 1);
+		ret = jfs_mount_rw(sb, 1);
+		unlock_kernel();
+		return ret;
 	}
 	if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) {
 		rc = jfs_umount_rw(sb);
 		JFS_SBI(sb)->flag = flag;
+		unlock_kernel();
 		return rc;
 	}
 	if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY))
 		if (!(sb->s_flags & MS_RDONLY)) {
 			rc = jfs_umount_rw(sb);
-			if (rc)
+			if (rc) {
+				unlock_kernel();
 				return rc;
+			}
 			JFS_SBI(sb)->flag = flag;
-			return jfs_mount_rw(sb, 1);
+			ret = jfs_mount_rw(sb, 1);
+			unlock_kernel();
+			return ret;
 		}
 	JFS_SBI(sb)->flag = flag;
 
+	unlock_kernel();
 	return 0;
 }
 
@@ -720,8 +739,10 @@
 		blk++;
 	}
 out:
-	if (len == towrite)
+	if (len == towrite) {
+		mutex_unlock(&inode->i_mutex);
 		return err;
+	}
 	if (inode->i_size < off+len-towrite)
 		i_size_write(inode, off+len-towrite);
 	inode->i_version++;
diff --git a/fs/libfs.c b/fs/libfs.c
index 80046dd..ddfa899 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -9,6 +9,8 @@
 #include <linux/vfs.h>
 #include <linux/mutex.h>
 #include <linux/exportfs.h>
+#include <linux/writeback.h>
+#include <linux/buffer_head.h>
 
 #include <asm/uaccess.h>
 
@@ -807,6 +809,29 @@
 }
 EXPORT_SYMBOL_GPL(generic_fh_to_parent);
 
+int simple_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = 0, /* metadata-only; caller takes care of data */
+	};
+	struct inode *inode = dentry->d_inode;
+	int err;
+	int ret;
+
+	ret = sync_mapping_buffers(inode->i_mapping);
+	if (!(inode->i_state & I_DIRTY))
+		return ret;
+	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+		return ret;
+
+	err = sync_inode(inode, &wbc);
+	if (ret == 0)
+		ret = err;
+	return ret;
+}
+EXPORT_SYMBOL(simple_fsync);
+
 EXPORT_SYMBOL(dcache_dir_close);
 EXPORT_SYMBOL(dcache_dir_lseek);
 EXPORT_SYMBOL(dcache_dir_open);
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index d4946c4..e5f2064 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -22,7 +22,7 @@
 const struct file_operations minix_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= minix_readdir,
-	.fsync		= minix_sync_file,
+	.fsync		= simple_fsync,
 };
 
 static inline void dir_put_page(struct page *page)
diff --git a/fs/minix/file.c b/fs/minix/file.c
index 17765f6..3eec3e6 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -6,15 +6,12 @@
  *  minix regular file handling primitives
  */
 
-#include <linux/buffer_head.h>		/* for fsync_inode_buffers() */
 #include "minix.h"
 
 /*
  * We have mostly NULLs here: the current defaults are OK for
  * the minix filesystem.
  */
-int minix_sync_file(struct file *, struct dentry *, int);
-
 const struct file_operations minix_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
@@ -22,7 +19,7 @@
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
-	.fsync		= minix_sync_file,
+	.fsync		= simple_fsync,
 	.splice_read	= generic_file_splice_read,
 };
 
@@ -30,18 +27,3 @@
 	.truncate	= minix_truncate,
 	.getattr	= minix_getattr,
 };
-
-int minix_sync_file(struct file * file, struct dentry *dentry, int datasync)
-{
-	struct inode *inode = dentry->d_inode;
-	int err;
-
-	err = sync_mapping_buffers(inode->i_mapping);
-	if (!(inode->i_state & I_DIRTY))
-		return err;
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		return err;
-	
-	err |= minix_sync_inode(inode);
-	return err ? -EIO : 0;
-}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index daad3c2..f91a236 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -35,6 +35,8 @@
 	int i;
 	struct minix_sb_info *sbi = minix_sb(sb);
 
+	lock_kernel();
+
 	if (!(sb->s_flags & MS_RDONLY)) {
 		if (sbi->s_version != MINIX_V3)	 /* s_state is now out from V3 sb */
 			sbi->s_ms->s_state = sbi->s_mount_state;
@@ -49,7 +51,7 @@
 	sb->s_fs_info = NULL;
 	kfree(sbi);
 
-	return;
+	unlock_kernel();
 }
 
 static struct kmem_cache * minix_inode_cachep;
@@ -554,38 +556,25 @@
 	return bh;
 }
 
-static struct buffer_head *minix_update_inode(struct inode *inode)
-{
-	if (INODE_VERSION(inode) == MINIX_V1)
-		return V1_minix_update_inode(inode);
-	else
-		return V2_minix_update_inode(inode);
-}
-
-static int minix_write_inode(struct inode * inode, int wait)
-{
-	brelse(minix_update_inode(inode));
-	return 0;
-}
-
-int minix_sync_inode(struct inode * inode)
+static int minix_write_inode(struct inode *inode, int wait)
 {
 	int err = 0;
 	struct buffer_head *bh;
 
-	bh = minix_update_inode(inode);
-	if (bh && buffer_dirty(bh))
-	{
+	if (INODE_VERSION(inode) == MINIX_V1)
+		bh = V1_minix_update_inode(inode);
+	else
+		bh = V2_minix_update_inode(inode);
+	if (!bh)
+		return -EIO;
+	if (wait && buffer_dirty(bh)) {
 		sync_dirty_buffer(bh);
-		if (buffer_req(bh) && !buffer_uptodate(bh))
-		{
+		if (buffer_req(bh) && !buffer_uptodate(bh)) {
 			printk("IO error syncing minix inode [%s:%08lx]\n",
 				inode->i_sb->s_id, inode->i_ino);
-			err = -1;
+			err = -EIO;
 		}
 	}
-	else if (!bh)
-		err = -1;
 	brelse (bh);
 	return err;
 }
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index e6a0b19..cb7fdd1 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -57,7 +57,6 @@
 extern void V1_minix_truncate(struct inode *);
 extern void V2_minix_truncate(struct inode *);
 extern void minix_truncate(struct inode *);
-extern int minix_sync_inode(struct inode *);
 extern void minix_set_inode(struct inode *, dev_t);
 extern int V1_minix_get_block(struct inode *, long, struct buffer_head *, int);
 extern int V2_minix_get_block(struct inode *, long, struct buffer_head *, int);
@@ -72,7 +71,6 @@
 extern void minix_set_link(struct minix_dir_entry*, struct page*, struct inode*);
 extern struct minix_dir_entry *minix_dotdot(struct inode*, struct page**);
 extern ino_t minix_inode_by_name(struct dentry*);
-extern int minix_sync_file(struct file *, struct dentry *, int);
 
 extern const struct inode_operations minix_file_inode_operations;
 extern const struct inode_operations minix_dir_inode_operations;
diff --git a/fs/mpage.c b/fs/mpage.c
index 680ba60..42381bd 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -379,7 +379,8 @@
 	struct buffer_head map_bh;
 	unsigned long first_logical_block = 0;
 
-	clear_buffer_mapped(&map_bh);
+	map_bh.b_state = 0;
+	map_bh.b_size = 0;
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		struct page *page = list_entry(pages->prev, struct page, lru);
 
@@ -412,7 +413,8 @@
 	struct buffer_head map_bh;
 	unsigned long first_logical_block = 0;
 
-	clear_buffer_mapped(&map_bh);
+	map_bh.b_state = 0;
+	map_bh.b_size = 0;
 	bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
 			&map_bh, &first_logical_block, get_block);
 	if (bio)
diff --git a/fs/namei.c b/fs/namei.c
index 967c3db..527119a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -552,6 +552,17 @@
 	return result;
 }
 
+static __always_inline void set_root(struct nameidata *nd)
+{
+	if (!nd->root.mnt) {
+		struct fs_struct *fs = current->fs;
+		read_lock(&fs->lock);
+		nd->root = fs->root;
+		path_get(&nd->root);
+		read_unlock(&fs->lock);
+	}
+}
+
 static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
 {
 	int res = 0;
@@ -560,14 +571,10 @@
 		goto fail;
 
 	if (*link == '/') {
-		struct fs_struct *fs = current->fs;
-
+		set_root(nd);
 		path_put(&nd->path);
-
-		read_lock(&fs->lock);
-		nd->path = fs->root;
-		path_get(&fs->root);
-		read_unlock(&fs->lock);
+		nd->path = nd->root;
+		path_get(&nd->root);
 	}
 
 	res = link_path_walk(link, nd);
@@ -668,23 +675,23 @@
 	return err;
 }
 
-int follow_up(struct vfsmount **mnt, struct dentry **dentry)
+int follow_up(struct path *path)
 {
 	struct vfsmount *parent;
 	struct dentry *mountpoint;
 	spin_lock(&vfsmount_lock);
-	parent=(*mnt)->mnt_parent;
-	if (parent == *mnt) {
+	parent = path->mnt->mnt_parent;
+	if (parent == path->mnt) {
 		spin_unlock(&vfsmount_lock);
 		return 0;
 	}
 	mntget(parent);
-	mountpoint=dget((*mnt)->mnt_mountpoint);
+	mountpoint = dget(path->mnt->mnt_mountpoint);
 	spin_unlock(&vfsmount_lock);
-	dput(*dentry);
-	*dentry = mountpoint;
-	mntput(*mnt);
-	*mnt = parent;
+	dput(path->dentry);
+	path->dentry = mountpoint;
+	mntput(path->mnt);
+	path->mnt = parent;
 	return 1;
 }
 
@@ -695,7 +702,7 @@
 {
 	int res = 0;
 	while (d_mountpoint(path->dentry)) {
-		struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry);
+		struct vfsmount *mounted = lookup_mnt(path);
 		if (!mounted)
 			break;
 		dput(path->dentry);
@@ -708,32 +715,32 @@
 	return res;
 }
 
-static void follow_mount(struct vfsmount **mnt, struct dentry **dentry)
+static void follow_mount(struct path *path)
 {
-	while (d_mountpoint(*dentry)) {
-		struct vfsmount *mounted = lookup_mnt(*mnt, *dentry);
+	while (d_mountpoint(path->dentry)) {
+		struct vfsmount *mounted = lookup_mnt(path);
 		if (!mounted)
 			break;
-		dput(*dentry);
-		mntput(*mnt);
-		*mnt = mounted;
-		*dentry = dget(mounted->mnt_root);
+		dput(path->dentry);
+		mntput(path->mnt);
+		path->mnt = mounted;
+		path->dentry = dget(mounted->mnt_root);
 	}
 }
 
 /* no need for dcache_lock, as serialization is taken care in
  * namespace.c
  */
-int follow_down(struct vfsmount **mnt, struct dentry **dentry)
+int follow_down(struct path *path)
 {
 	struct vfsmount *mounted;
 
-	mounted = lookup_mnt(*mnt, *dentry);
+	mounted = lookup_mnt(path);
 	if (mounted) {
-		dput(*dentry);
-		mntput(*mnt);
-		*mnt = mounted;
-		*dentry = dget(mounted->mnt_root);
+		dput(path->dentry);
+		mntput(path->mnt);
+		path->mnt = mounted;
+		path->dentry = dget(mounted->mnt_root);
 		return 1;
 	}
 	return 0;
@@ -741,19 +748,16 @@
 
 static __always_inline void follow_dotdot(struct nameidata *nd)
 {
-	struct fs_struct *fs = current->fs;
+	set_root(nd);
 
 	while(1) {
 		struct vfsmount *parent;
 		struct dentry *old = nd->path.dentry;
 
-                read_lock(&fs->lock);
-		if (nd->path.dentry == fs->root.dentry &&
-		    nd->path.mnt == fs->root.mnt) {
-                        read_unlock(&fs->lock);
+		if (nd->path.dentry == nd->root.dentry &&
+		    nd->path.mnt == nd->root.mnt) {
 			break;
 		}
-                read_unlock(&fs->lock);
 		spin_lock(&dcache_lock);
 		if (nd->path.dentry != nd->path.mnt->mnt_root) {
 			nd->path.dentry = dget(nd->path.dentry->d_parent);
@@ -775,7 +779,7 @@
 		mntput(nd->path.mnt);
 		nd->path.mnt = parent;
 	}
-	follow_mount(&nd->path.mnt, &nd->path.dentry);
+	follow_mount(&nd->path);
 }
 
 /*
@@ -853,7 +857,8 @@
 			err = inode_permission(nd->path.dentry->d_inode,
 					       MAY_EXEC);
 		if (!err)
-			err = ima_path_check(&nd->path, MAY_EXEC);
+			err = ima_path_check(&nd->path, MAY_EXEC,
+				             IMA_COUNT_UPDATE);
  		if (err)
 			break;
 
@@ -1016,25 +1021,23 @@
 	return link_path_walk(name, nd);
 }
 
-/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
-static int do_path_lookup(int dfd, const char *name,
-				unsigned int flags, struct nameidata *nd)
+static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
 {
 	int retval = 0;
 	int fput_needed;
 	struct file *file;
-	struct fs_struct *fs = current->fs;
 
 	nd->last_type = LAST_ROOT; /* if there are only slashes... */
 	nd->flags = flags;
 	nd->depth = 0;
+	nd->root.mnt = NULL;
 
 	if (*name=='/') {
-		read_lock(&fs->lock);
-		nd->path = fs->root;
-		path_get(&fs->root);
-		read_unlock(&fs->lock);
+		set_root(nd);
+		nd->path = nd->root;
+		path_get(&nd->root);
 	} else if (dfd == AT_FDCWD) {
+		struct fs_struct *fs = current->fs;
 		read_lock(&fs->lock);
 		nd->path = fs->pwd;
 		path_get(&fs->pwd);
@@ -1062,17 +1065,29 @@
 
 		fput_light(file, fput_needed);
 	}
-
-	retval = path_walk(name, nd);
-	if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
-				nd->path.dentry->d_inode))
-		audit_inode(name, nd->path.dentry);
-out_fail:
-	return retval;
+	return 0;
 
 fput_fail:
 	fput_light(file, fput_needed);
-	goto out_fail;
+out_fail:
+	return retval;
+}
+
+/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
+static int do_path_lookup(int dfd, const char *name,
+				unsigned int flags, struct nameidata *nd)
+{
+	int retval = path_init(dfd, name, flags, nd);
+	if (!retval)
+		retval = path_walk(name, nd);
+	if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
+				nd->path.dentry->d_inode))
+		audit_inode(name, nd->path.dentry);
+	if (nd->root.mnt) {
+		path_put(&nd->root);
+		nd->root.mnt = NULL;
+	}
+	return retval;
 }
 
 int path_lookup(const char *name, unsigned int flags,
@@ -1112,14 +1127,18 @@
 	nd->path.dentry = dentry;
 	nd->path.mnt = mnt;
 	path_get(&nd->path);
+	nd->root = nd->path;
+	path_get(&nd->root);
 
 	retval = path_walk(name, nd);
 	if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
 				nd->path.dentry->d_inode))
 		audit_inode(name, nd->path.dentry);
 
-	return retval;
+	path_put(&nd->root);
+	nd->root.mnt = NULL;
 
+	return retval;
 }
 
 /**
@@ -1515,7 +1534,8 @@
 		return error;
 
 	error = ima_path_check(path,
-			       acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
+			       acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC),
+			       IMA_COUNT_UPDATE);
 	if (error)
 		return error;
 	/*
@@ -1674,9 +1694,14 @@
 	/*
 	 * Create - we need to know the parent.
 	 */
-	error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
+	error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
 	if (error)
 		return ERR_PTR(error);
+	error = path_walk(pathname, &nd);
+	if (error)
+		return ERR_PTR(error);
+	if (unlikely(!audit_dummy_context()))
+		audit_inode(pathname, nd.path.dentry);
 
 	/*
 	 * We have the parent and last component. First of all, check
@@ -1804,6 +1829,8 @@
 	if (!IS_ERR(nd.intent.open.file))
 		release_open_intent(&nd);
 exit_parent:
+	if (nd.root.mnt)
+		path_put(&nd.root);
 	path_put(&nd.path);
 	return ERR_PTR(error);
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 134d494..2dd333b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -131,10 +131,20 @@
 		INIT_LIST_HEAD(&mnt->mnt_share);
 		INIT_LIST_HEAD(&mnt->mnt_slave_list);
 		INIT_LIST_HEAD(&mnt->mnt_slave);
-		atomic_set(&mnt->__mnt_writers, 0);
+#ifdef CONFIG_SMP
+		mnt->mnt_writers = alloc_percpu(int);
+		if (!mnt->mnt_writers)
+			goto out_free_devname;
+#else
+		mnt->mnt_writers = 0;
+#endif
 	}
 	return mnt;
 
+#ifdef CONFIG_SMP
+out_free_devname:
+	kfree(mnt->mnt_devname);
+#endif
 out_free_id:
 	mnt_free_id(mnt);
 out_free_cache:
@@ -171,65 +181,38 @@
 }
 EXPORT_SYMBOL_GPL(__mnt_is_readonly);
 
-struct mnt_writer {
-	/*
-	 * If holding multiple instances of this lock, they
-	 * must be ordered by cpu number.
-	 */
-	spinlock_t lock;
-	struct lock_class_key lock_class; /* compiles out with !lockdep */
-	unsigned long count;
-	struct vfsmount *mnt;
-} ____cacheline_aligned_in_smp;
-static DEFINE_PER_CPU(struct mnt_writer, mnt_writers);
-
-static int __init init_mnt_writers(void)
+static inline void inc_mnt_writers(struct vfsmount *mnt)
 {
-	int cpu;
-	for_each_possible_cpu(cpu) {
-		struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
-		spin_lock_init(&writer->lock);
-		lockdep_set_class(&writer->lock, &writer->lock_class);
-		writer->count = 0;
-	}
-	return 0;
+#ifdef CONFIG_SMP
+	(*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++;
+#else
+	mnt->mnt_writers++;
+#endif
 }
-fs_initcall(init_mnt_writers);
 
-static void unlock_mnt_writers(void)
+static inline void dec_mnt_writers(struct vfsmount *mnt)
 {
+#ifdef CONFIG_SMP
+	(*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--;
+#else
+	mnt->mnt_writers--;
+#endif
+}
+
+static unsigned int count_mnt_writers(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	unsigned int count = 0;
 	int cpu;
-	struct mnt_writer *cpu_writer;
 
 	for_each_possible_cpu(cpu) {
-		cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_unlock(&cpu_writer->lock);
+		count += *per_cpu_ptr(mnt->mnt_writers, cpu);
 	}
-}
 
-static inline void __clear_mnt_count(struct mnt_writer *cpu_writer)
-{
-	if (!cpu_writer->mnt)
-		return;
-	/*
-	 * This is in case anyone ever leaves an invalid,
-	 * old ->mnt and a count of 0.
-	 */
-	if (!cpu_writer->count)
-		return;
-	atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers);
-	cpu_writer->count = 0;
-}
- /*
- * must hold cpu_writer->lock
- */
-static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
-					  struct vfsmount *mnt)
-{
-	if (cpu_writer->mnt == mnt)
-		return;
-	__clear_mnt_count(cpu_writer);
-	cpu_writer->mnt = mnt;
+	return count;
+#else
+	return mnt->mnt_writers;
+#endif
 }
 
 /*
@@ -253,74 +236,73 @@
 int mnt_want_write(struct vfsmount *mnt)
 {
 	int ret = 0;
-	struct mnt_writer *cpu_writer;
 
-	cpu_writer = &get_cpu_var(mnt_writers);
-	spin_lock(&cpu_writer->lock);
+	preempt_disable();
+	inc_mnt_writers(mnt);
+	/*
+	 * The store to inc_mnt_writers must be visible before we pass
+	 * MNT_WRITE_HOLD loop below, so that the slowpath can see our
+	 * incremented count after it has set MNT_WRITE_HOLD.
+	 */
+	smp_mb();
+	while (mnt->mnt_flags & MNT_WRITE_HOLD)
+		cpu_relax();
+	/*
+	 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
+	 * be set to match its requirements. So we must not load that until
+	 * MNT_WRITE_HOLD is cleared.
+	 */
+	smp_rmb();
 	if (__mnt_is_readonly(mnt)) {
+		dec_mnt_writers(mnt);
 		ret = -EROFS;
 		goto out;
 	}
-	use_cpu_writer_for_mount(cpu_writer, mnt);
-	cpu_writer->count++;
 out:
-	spin_unlock(&cpu_writer->lock);
-	put_cpu_var(mnt_writers);
+	preempt_enable();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(mnt_want_write);
 
-static void lock_mnt_writers(void)
-{
-	int cpu;
-	struct mnt_writer *cpu_writer;
-
-	for_each_possible_cpu(cpu) {
-		cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_lock(&cpu_writer->lock);
-		__clear_mnt_count(cpu_writer);
-		cpu_writer->mnt = NULL;
-	}
-}
-
-/*
- * These per-cpu write counts are not guaranteed to have
- * matched increments and decrements on any given cpu.
- * A file open()ed for write on one cpu and close()d on
- * another cpu will imbalance this count.  Make sure it
- * does not get too far out of whack.
+/**
+ * mnt_clone_write - get write access to a mount
+ * @mnt: the mount on which to take a write
+ *
+ * This is effectively like mnt_want_write, except
+ * it must only be used to take an extra write reference
+ * on a mountpoint that we already know has a write reference
+ * on it. This allows some optimisation.
+ *
+ * After finished, mnt_drop_write must be called as usual to
+ * drop the reference.
  */
-static void handle_write_count_underflow(struct vfsmount *mnt)
+int mnt_clone_write(struct vfsmount *mnt)
 {
-	if (atomic_read(&mnt->__mnt_writers) >=
-	    MNT_WRITER_UNDERFLOW_LIMIT)
-		return;
-	/*
-	 * It isn't necessary to hold all of the locks
-	 * at the same time, but doing it this way makes
-	 * us share a lot more code.
-	 */
-	lock_mnt_writers();
-	/*
-	 * vfsmount_lock is for mnt_flags.
-	 */
-	spin_lock(&vfsmount_lock);
-	/*
-	 * If coalescing the per-cpu writer counts did not
-	 * get us back to a positive writer count, we have
-	 * a bug.
-	 */
-	if ((atomic_read(&mnt->__mnt_writers) < 0) &&
-	    !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
-		WARN(1, KERN_DEBUG "leak detected on mount(%p) writers "
-				"count: %d\n",
-			mnt, atomic_read(&mnt->__mnt_writers));
-		/* use the flag to keep the dmesg spam down */
-		mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
-	}
-	spin_unlock(&vfsmount_lock);
-	unlock_mnt_writers();
+	/* superblock may be r/o */
+	if (__mnt_is_readonly(mnt))
+		return -EROFS;
+	preempt_disable();
+	inc_mnt_writers(mnt);
+	preempt_enable();
+	return 0;
 }
+EXPORT_SYMBOL_GPL(mnt_clone_write);
+
+/**
+ * mnt_want_write_file - get write access to a file's mount
+ * @file: the file who's mount on which to take a write
+ *
+ * This is like mnt_want_write, but it takes a file and can
+ * do some optimisations if the file is open for write already
+ */
+int mnt_want_write_file(struct file *file)
+{
+	if (!(file->f_mode & FMODE_WRITE))
+		return mnt_want_write(file->f_path.mnt);
+	else
+		return mnt_clone_write(file->f_path.mnt);
+}
+EXPORT_SYMBOL_GPL(mnt_want_write_file);
 
 /**
  * mnt_drop_write - give up write access to a mount
@@ -332,37 +314,9 @@
  */
 void mnt_drop_write(struct vfsmount *mnt)
 {
-	int must_check_underflow = 0;
-	struct mnt_writer *cpu_writer;
-
-	cpu_writer = &get_cpu_var(mnt_writers);
-	spin_lock(&cpu_writer->lock);
-
-	use_cpu_writer_for_mount(cpu_writer, mnt);
-	if (cpu_writer->count > 0) {
-		cpu_writer->count--;
-	} else {
-		must_check_underflow = 1;
-		atomic_dec(&mnt->__mnt_writers);
-	}
-
-	spin_unlock(&cpu_writer->lock);
-	/*
-	 * Logically, we could call this each time,
-	 * but the __mnt_writers cacheline tends to
-	 * be cold, and makes this expensive.
-	 */
-	if (must_check_underflow)
-		handle_write_count_underflow(mnt);
-	/*
-	 * This could be done right after the spinlock
-	 * is taken because the spinlock keeps us on
-	 * the cpu, and disables preemption.  However,
-	 * putting it here bounds the amount that
-	 * __mnt_writers can underflow.  Without it,
-	 * we could theoretically wrap __mnt_writers.
-	 */
-	put_cpu_var(mnt_writers);
+	preempt_disable();
+	dec_mnt_writers(mnt);
+	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(mnt_drop_write);
 
@@ -370,24 +324,41 @@
 {
 	int ret = 0;
 
-	lock_mnt_writers();
-	/*
-	 * With all the locks held, this value is stable
-	 */
-	if (atomic_read(&mnt->__mnt_writers) > 0) {
-		ret = -EBUSY;
-		goto out;
-	}
-	/*
-	 * nobody can do a successful mnt_want_write() with all
-	 * of the counts in MNT_DENIED_WRITE and the locks held.
-	 */
 	spin_lock(&vfsmount_lock);
-	if (!ret)
+	mnt->mnt_flags |= MNT_WRITE_HOLD;
+	/*
+	 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
+	 * should be visible before we do.
+	 */
+	smp_mb();
+
+	/*
+	 * With writers on hold, if this value is zero, then there are
+	 * definitely no active writers (although held writers may subsequently
+	 * increment the count, they'll have to wait, and decrement it after
+	 * seeing MNT_READONLY).
+	 *
+	 * It is OK to have counter incremented on one CPU and decremented on
+	 * another: the sum will add up correctly. The danger would be when we
+	 * sum up each counter, if we read a counter before it is incremented,
+	 * but then read another CPU's count which it has been subsequently
+	 * decremented from -- we would see more decrements than we should.
+	 * MNT_WRITE_HOLD protects against this scenario, because
+	 * mnt_want_write first increments count, then smp_mb, then spins on
+	 * MNT_WRITE_HOLD, so it can't be decremented by another CPU while
+	 * we're counting up here.
+	 */
+	if (count_mnt_writers(mnt) > 0)
+		ret = -EBUSY;
+	else
 		mnt->mnt_flags |= MNT_READONLY;
+	/*
+	 * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
+	 * that become unheld will see MNT_READONLY.
+	 */
+	smp_wmb();
+	mnt->mnt_flags &= ~MNT_WRITE_HOLD;
 	spin_unlock(&vfsmount_lock);
-out:
-	unlock_mnt_writers();
 	return ret;
 }
 
@@ -410,6 +381,9 @@
 {
 	kfree(mnt->mnt_devname);
 	mnt_free_id(mnt);
+#ifdef CONFIG_SMP
+	free_percpu(mnt->mnt_writers);
+#endif
 	kmem_cache_free(mnt_cache, mnt);
 }
 
@@ -442,11 +416,11 @@
  * lookup_mnt increments the ref count before returning
  * the vfsmount struct.
  */
-struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
+struct vfsmount *lookup_mnt(struct path *path)
 {
 	struct vfsmount *child_mnt;
 	spin_lock(&vfsmount_lock);
-	if ((child_mnt = __lookup_mnt(mnt, dentry, 1)))
+	if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
 		mntget(child_mnt);
 	spin_unlock(&vfsmount_lock);
 	return child_mnt;
@@ -604,38 +578,18 @@
 
 static inline void __mntput(struct vfsmount *mnt)
 {
-	int cpu;
 	struct super_block *sb = mnt->mnt_sb;
 	/*
-	 * We don't have to hold all of the locks at the
-	 * same time here because we know that we're the
-	 * last reference to mnt and that no new writers
-	 * can come in.
-	 */
-	for_each_possible_cpu(cpu) {
-		struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_lock(&cpu_writer->lock);
-		if (cpu_writer->mnt != mnt) {
-			spin_unlock(&cpu_writer->lock);
-			continue;
-		}
-		atomic_add(cpu_writer->count, &mnt->__mnt_writers);
-		cpu_writer->count = 0;
-		/*
-		 * Might as well do this so that no one
-		 * ever sees the pointer and expects
-		 * it to be valid.
-		 */
-		cpu_writer->mnt = NULL;
-		spin_unlock(&cpu_writer->lock);
-	}
-	/*
 	 * This probably indicates that somebody messed
 	 * up a mnt_want/drop_write() pair.  If this
 	 * happens, the filesystem was probably unable
 	 * to make r/w->r/o transitions.
 	 */
-	WARN_ON(atomic_read(&mnt->__mnt_writers));
+	/*
+	 * atomic_dec_and_lock() used to deal with ->mnt_count decrements
+	 * provides barriers, so count_mnt_writers() below is safe.  AV
+	 */
+	WARN_ON(count_mnt_writers(mnt));
 	dput(mnt->mnt_root);
 	free_vfsmnt(mnt);
 	deactivate_super(sb);
@@ -1106,11 +1060,8 @@
 		 * we just try to remount it readonly.
 		 */
 		down_write(&sb->s_umount);
-		if (!(sb->s_flags & MS_RDONLY)) {
-			lock_kernel();
+		if (!(sb->s_flags & MS_RDONLY))
 			retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
-			unlock_kernel();
-		}
 		up_write(&sb->s_umount);
 		return retval;
 	}
@@ -1253,11 +1204,11 @@
 	return NULL;
 }
 
-struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry)
+struct vfsmount *collect_mounts(struct path *path)
 {
 	struct vfsmount *tree;
 	down_write(&namespace_sem);
-	tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE);
+	tree = copy_tree(path->mnt, path->dentry, CL_COPY_ALL | CL_PRIVATE);
 	up_write(&namespace_sem);
 	return tree;
 }
@@ -1430,7 +1381,7 @@
 		goto out_unlock;
 
 	err = -ENOENT;
-	if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry))
+	if (!d_unlinked(path->dentry))
 		err = attach_recursive_mnt(mnt, path, NULL);
 out_unlock:
 	mutex_unlock(&path->dentry->d_inode->i_mutex);
@@ -1601,7 +1552,7 @@
 
 	down_write(&namespace_sem);
 	while (d_mountpoint(path->dentry) &&
-	       follow_down(&path->mnt, &path->dentry))
+	       follow_down(path))
 		;
 	err = -EINVAL;
 	if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
@@ -1612,7 +1563,7 @@
 	if (IS_DEADDIR(path->dentry->d_inode))
 		goto out1;
 
-	if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry))
+	if (d_unlinked(path->dentry))
 		goto out1;
 
 	err = -EINVAL;
@@ -1676,7 +1627,9 @@
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
+	lock_kernel();
 	mnt = do_kern_mount(type, flags, name, data);
+	unlock_kernel();
 	if (IS_ERR(mnt))
 		return PTR_ERR(mnt);
 
@@ -1695,10 +1648,10 @@
 	down_write(&namespace_sem);
 	/* Something was mounted here while we slept */
 	while (d_mountpoint(path->dentry) &&
-	       follow_down(&path->mnt, &path->dentry))
+	       follow_down(path))
 		;
 	err = -EINVAL;
-	if (!check_mnt(path->mnt))
+	if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
 		goto unlock;
 
 	/* Refuse the same filesystem on the same mount point */
@@ -2092,10 +2045,8 @@
 	if (retval < 0)
 		goto out3;
 
-	lock_kernel();
 	retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
 			  flags, (void *)data_page);
-	unlock_kernel();
 	free_page(data_page);
 
 out3:
@@ -2175,9 +2126,9 @@
 	error = -ENOENT;
 	if (IS_DEADDIR(new.dentry->d_inode))
 		goto out2;
-	if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry))
+	if (d_unlinked(new.dentry))
 		goto out2;
-	if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry))
+	if (d_unlinked(old.dentry))
 		goto out2;
 	error = -EBUSY;
 	if (new.mnt == root.mnt ||
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index d642f0e..b99ce20 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -736,6 +736,8 @@
 {
 	struct ncp_server *server = NCP_SBP(sb);
 
+	lock_kernel();
+
 	ncp_lock_server(server);
 	ncp_disconnect(server);
 	ncp_unlock_server(server);
@@ -769,6 +771,8 @@
 	vfree(server->packet);
 	sb->s_fs_info = NULL;
 	kfree(server);
+
+	unlock_kernel();
 }
 
 static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 64a288e..f01caec 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -154,7 +154,7 @@
 	goto out;
 out_follow:
 	while (d_mountpoint(nd->path.dentry) &&
-	       follow_down(&nd->path.mnt, &nd->path.dentry))
+	       follow_down(&nd->path))
 		;
 	err = 0;
 	goto out;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d2d6778..26127b6 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1813,6 +1813,7 @@
 	if (data == NULL)
 		return -ENOMEM;
 
+	lock_kernel();
 	/* fill out struct with values from existing mount */
 	data->flags = nfss->flags;
 	data->rsize = nfss->rsize;
@@ -1837,6 +1838,7 @@
 	error = nfs_compare_remount_data(nfss, data);
 out:
 	kfree(data);
+	unlock_kernel();
 	return error;
 }
 
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 5839b22..8b1f8ef 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -847,9 +847,8 @@
 	return exp_find_key(clp, FSID_NUM, fsidv, NULL);
 }
 
-static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt,
-				   struct dentry *dentry,
-				   struct cache_req *reqp)
+static svc_export *exp_get_by_name(svc_client *clp, const struct path *path,
+				     struct cache_req *reqp)
 {
 	struct svc_export *exp, key;
 	int err;
@@ -858,8 +857,7 @@
 		return ERR_PTR(-ENOENT);
 
 	key.ex_client = clp;
-	key.ex_path.mnt = mnt;
-	key.ex_path.dentry = dentry;
+	key.ex_path = *path;
 
 	exp = svc_export_lookup(&key);
 	if (exp == NULL)
@@ -873,24 +871,19 @@
 /*
  * Find the export entry for a given dentry.
  */
-static struct svc_export *exp_parent(svc_client *clp, struct vfsmount *mnt,
-				     struct dentry *dentry,
-				     struct cache_req *reqp)
+static struct svc_export *exp_parent(svc_client *clp, struct path *path)
 {
-	svc_export *exp;
+	struct dentry *saved = dget(path->dentry);
+	svc_export *exp = exp_get_by_name(clp, path, NULL);
 
-	dget(dentry);
-	exp = exp_get_by_name(clp, mnt, dentry, reqp);
-
-	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
-		struct dentry *parent;
-
-		parent = dget_parent(dentry);
-		dput(dentry);
-		dentry = parent;
-		exp = exp_get_by_name(clp, mnt, dentry, reqp);
+	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
+		struct dentry *parent = dget_parent(path->dentry);
+		dput(path->dentry);
+		path->dentry = parent;
+		exp = exp_get_by_name(clp, path, NULL);
 	}
-	dput(dentry);
+	dput(path->dentry);
+	path->dentry = saved;
 	return exp;
 }
 
@@ -1018,7 +1011,7 @@
 		goto out_put_clp;
 	err = -EINVAL;
 
-	exp = exp_get_by_name(clp, path.mnt, path.dentry, NULL);
+	exp = exp_get_by_name(clp, &path, NULL);
 
 	memset(&new, 0, sizeof(new));
 
@@ -1135,7 +1128,7 @@
 		goto out_domain;
 
 	err = -EINVAL;
-	exp = exp_get_by_name(dom, path.mnt, path.dentry, NULL);
+	exp = exp_get_by_name(dom, &path, NULL);
 	path_put(&path);
 	if (IS_ERR(exp))
 		goto out_domain;
@@ -1177,7 +1170,7 @@
 	dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n",
 		 name, path.dentry, clp->name,
 		 inode->i_sb->s_id, inode->i_ino);
-	exp = exp_parent(clp, path.mnt, path.dentry, NULL);
+	exp = exp_parent(clp, &path);
 	if (IS_ERR(exp)) {
 		err = PTR_ERR(exp);
 		goto out;
@@ -1207,7 +1200,7 @@
 	if (IS_ERR(ek))
 		return ERR_CAST(ek);
 
-	exp = exp_get_by_name(clp, ek->ek_path.mnt, ek->ek_path.dentry, reqp);
+	exp = exp_get_by_name(clp, &ek->ek_path, reqp);
 	cache_put(&ek->h, &svc_expkey_cache);
 
 	if (IS_ERR(exp))
@@ -1247,8 +1240,7 @@
  * use exp_get_by_name() or exp_find().
  */
 struct svc_export *
-rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt,
-		struct dentry *dentry)
+rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path)
 {
 	struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
 
@@ -1256,8 +1248,7 @@
 		goto gss;
 
 	/* First try the auth_unix client: */
-	exp = exp_get_by_name(rqstp->rq_client, mnt, dentry,
-						&rqstp->rq_chandle);
+	exp = exp_get_by_name(rqstp->rq_client, path, &rqstp->rq_chandle);
 	if (PTR_ERR(exp) == -ENOENT)
 		goto gss;
 	if (IS_ERR(exp))
@@ -1269,8 +1260,7 @@
 	/* Otherwise, try falling back on gss client */
 	if (rqstp->rq_gssclient == NULL)
 		return exp;
-	gssexp = exp_get_by_name(rqstp->rq_gssclient, mnt, dentry,
-						&rqstp->rq_chandle);
+	gssexp = exp_get_by_name(rqstp->rq_gssclient, path, &rqstp->rq_chandle);
 	if (PTR_ERR(gssexp) == -ENOENT)
 		return exp;
 	if (!IS_ERR(exp))
@@ -1309,23 +1299,19 @@
 }
 
 struct svc_export *
-rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt,
-		struct dentry *dentry)
+rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
 {
-	struct svc_export *exp;
+	struct dentry *saved = dget(path->dentry);
+	struct svc_export *exp = rqst_exp_get_by_name(rqstp, path);
 
-	dget(dentry);
-	exp = rqst_exp_get_by_name(rqstp, mnt, dentry);
-
-	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
-		struct dentry *parent;
-
-		parent = dget_parent(dentry);
-		dput(dentry);
-		dentry = parent;
-		exp = rqst_exp_get_by_name(rqstp, mnt, dentry);
+	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
+		struct dentry *parent = dget_parent(path->dentry);
+		dput(path->dentry);
+		path->dentry = parent;
+		exp = rqst_exp_get_by_name(rqstp, path);
 	}
-	dput(dentry);
+	dput(path->dentry);
+	path->dentry = saved;
 	return exp;
 }
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b660435..99f8357 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -55,6 +55,7 @@
 #include <linux/security.h>
 #endif /* CONFIG_NFSD_V4 */
 #include <linux/jhash.h>
+#include <linux/ima.h>
 
 #include <asm/uaccess.h>
 
@@ -100,36 +101,35 @@
 {
 	struct svc_export *exp = *expp, *exp2 = NULL;
 	struct dentry *dentry = *dpp;
-	struct vfsmount *mnt = mntget(exp->ex_path.mnt);
-	struct dentry *mounts = dget(dentry);
+	struct path path = {.mnt = mntget(exp->ex_path.mnt),
+			    .dentry = dget(dentry)};
 	int err = 0;
 
-	while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts));
+	while (d_mountpoint(path.dentry) && follow_down(&path))
+		;
 
-	exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts);
+	exp2 = rqst_exp_get_by_name(rqstp, &path);
 	if (IS_ERR(exp2)) {
 		if (PTR_ERR(exp2) != -ENOENT)
 			err = PTR_ERR(exp2);
-		dput(mounts);
-		mntput(mnt);
+		path_put(&path);
 		goto out;
 	}
 	if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
 		/* successfully crossed mount point */
 		/*
-		 * This is subtle: dentry is *not* under mnt at this point.
-		 * The only reason we are safe is that original mnt is pinned
-		 * down by exp, so we should dput before putting exp.
+		 * This is subtle: path.dentry is *not* on path.mnt
+		 * at this point.  The only reason we are safe is that
+		 * original mnt is pinned down by exp, so we should
+		 * put path *before* putting exp
 		 */
-		dput(dentry);
-		*dpp = mounts;
-		exp_put(exp);
+		*dpp = path.dentry;
+		path.dentry = dentry;
 		*expp = exp2;
-	} else {
-		exp_put(exp2);
-		dput(mounts);
+		exp2 = exp;
 	}
-	mntput(mnt);
+	path_put(&path);
+	exp_put(exp2);
 out:
 	return err;
 }
@@ -168,28 +168,29 @@
 			/* checking mountpoint crossing is very different when stepping up */
 			struct svc_export *exp2 = NULL;
 			struct dentry *dp;
-			struct vfsmount *mnt = mntget(exp->ex_path.mnt);
-			dentry = dget(dparent);
-			while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry))
-				;
-			dp = dget_parent(dentry);
-			dput(dentry);
-			dentry = dp;
+			struct path path = {.mnt = mntget(exp->ex_path.mnt),
+					    .dentry = dget(dparent)};
 
-			exp2 = rqst_exp_parent(rqstp, mnt, dentry);
+			while (path.dentry == path.mnt->mnt_root &&
+			       follow_up(&path))
+				;
+			dp = dget_parent(path.dentry);
+			dput(path.dentry);
+			path.dentry = dp;
+
+			exp2 = rqst_exp_parent(rqstp, &path);
 			if (PTR_ERR(exp2) == -ENOENT) {
-				dput(dentry);
 				dentry = dget(dparent);
 			} else if (IS_ERR(exp2)) {
 				host_err = PTR_ERR(exp2);
-				dput(dentry);
-				mntput(mnt);
+				path_put(&path);
 				goto out_nfserr;
 			} else {
+				dentry = dget(path.dentry);
 				exp_put(exp);
 				exp = exp2;
 			}
-			mntput(mnt);
+			path_put(&path);
 		}
 	} else {
 		fh_lock(fhp);
@@ -735,6 +736,8 @@
 			    flags, cred);
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
+	else
+		ima_counts_get(*filp);
 out_nfserr:
 	err = nfserrno(host_err);
 out:
@@ -2024,6 +2027,7 @@
 					struct dentry *dentry, int acc)
 {
 	struct inode	*inode = dentry->d_inode;
+	struct path	path;
 	int		err;
 
 	if (acc == NFSD_MAY_NOP)
@@ -2096,7 +2100,17 @@
 	if (err == -EACCES && S_ISREG(inode->i_mode) &&
 	    acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
 		err = inode_permission(inode, MAY_EXEC);
+	if (err)
+		goto nfsd_out;
 
+	/* Do integrity (permission) checking now, but defer incrementing
+	 * IMA counts to the actual file open.
+	 */
+	path.mnt = exp->ex_path.mnt;
+	path.dentry = dentry;
+	err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC),
+			     IMA_COUNT_LEAVE);
+nfsd_out:
 	return err? nfserrno(err) : 0;
 }
 
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 300f1cd..cadd36b 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -864,11 +864,11 @@
 	case NILFS_CHECKPOINT:
 		/*
 		 * Check for protecting existing snapshot mounts:
-		 * bd_mount_sem is used to make this operation atomic and
+		 * ns_mount_mutex is used to make this operation atomic and
 		 * exclusive with a new mount job.  Though it doesn't cover
 		 * umount, it's enough for the purpose.
 		 */
-		down(&nilfs->ns_bdev->bd_mount_sem);
+		mutex_lock(&nilfs->ns_mount_mutex);
 		if (nilfs_checkpoint_is_mounted(nilfs, cno, 1)) {
 			/* Current implementation does not have to protect
 			   plain read-only mounts since they are exclusive
@@ -877,7 +877,7 @@
 			ret = -EBUSY;
 		} else
 			ret = nilfs_cpfile_clear_snapshot(cpfile, cno);
-		up(&nilfs->ns_bdev->bd_mount_sem);
+		mutex_unlock(&nilfs->ns_mount_mutex);
 		return ret;
 	case NILFS_SNAPSHOT:
 		return nilfs_cpfile_set_snapshot(cpfile, cno);
diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h
index adccd4f..0776ccc 100644
--- a/fs/nilfs2/sb.h
+++ b/fs/nilfs2/sb.h
@@ -60,6 +60,7 @@
 	struct super_block *s_super;	/* reverse pointer to super_block */
 	struct the_nilfs *s_nilfs;
 	struct list_head s_list;	/* list head for nilfs->ns_supers */
+	atomic_t s_count;		/* reference count */
 
 	/* Segment constructor */
 	struct list_head s_dirty_files;	/* dirty files list */
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 6989b03..1777a34 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -65,9 +65,8 @@
 		   "(NILFS)");
 MODULE_LICENSE("GPL");
 
+static void nilfs_write_super(struct super_block *sb);
 static int nilfs_remount(struct super_block *sb, int *flags, char *data);
-static int test_exclusive_mount(struct file_system_type *fs_type,
-				struct block_device *bdev, int flags);
 
 /**
  * nilfs_error() - report failure condition on a filesystem
@@ -315,6 +314,11 @@
 	struct nilfs_sb_info *sbi = NILFS_SB(sb);
 	struct the_nilfs *nilfs = sbi->s_nilfs;
 
+	lock_kernel();
+
+	if (sb->s_dirt)
+		nilfs_write_super(sb);
+
 	nilfs_detach_segment_constructor(sbi);
 
 	if (!(sb->s_flags & MS_RDONLY)) {
@@ -323,12 +327,18 @@
 		nilfs_commit_super(sbi, 1);
 		up_write(&nilfs->ns_sem);
 	}
+	down_write(&nilfs->ns_super_sem);
+	if (nilfs->ns_current == sbi)
+		nilfs->ns_current = NULL;
+	up_write(&nilfs->ns_super_sem);
 
 	nilfs_detach_checkpoint(sbi);
 	put_nilfs(sbi->s_nilfs);
 	sbi->s_super = NULL;
 	sb->s_fs_info = NULL;
-	kfree(sbi);
+	nilfs_put_sbinfo(sbi);
+
+	unlock_kernel();
 }
 
 /**
@@ -383,6 +393,8 @@
 {
 	int err = 0;
 
+	nilfs_write_super(sb);
+
 	/* This function is called when super block should be written back */
 	if (wait)
 		err = nilfs_construct_segment(sb);
@@ -396,9 +408,9 @@
 	struct buffer_head *bh_cp;
 	int err;
 
-	down_write(&nilfs->ns_sem);
+	down_write(&nilfs->ns_super_sem);
 	list_add(&sbi->s_list, &nilfs->ns_supers);
-	up_write(&nilfs->ns_sem);
+	up_write(&nilfs->ns_super_sem);
 
 	sbi->s_ifile = nilfs_mdt_new(
 		nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP);
@@ -436,9 +448,9 @@
 	nilfs_mdt_destroy(sbi->s_ifile);
 	sbi->s_ifile = NULL;
 
-	down_write(&nilfs->ns_sem);
+	down_write(&nilfs->ns_super_sem);
 	list_del_init(&sbi->s_list);
-	up_write(&nilfs->ns_sem);
+	up_write(&nilfs->ns_super_sem);
 
 	return err;
 }
@@ -450,9 +462,9 @@
 	nilfs_mdt_clear(sbi->s_ifile);
 	nilfs_mdt_destroy(sbi->s_ifile);
 	sbi->s_ifile = NULL;
-	down_write(&nilfs->ns_sem);
+	down_write(&nilfs->ns_super_sem);
 	list_del_init(&sbi->s_list);
-	up_write(&nilfs->ns_sem);
+	up_write(&nilfs->ns_super_sem);
 }
 
 static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi)
@@ -752,7 +764,7 @@
  * @silent: silent mode flag
  * @nilfs: the_nilfs struct
  *
- * This function is called exclusively by bd_mount_mutex.
+ * This function is called exclusively by nilfs->ns_mount_mutex.
  * So, the recovery process is protected from other simultaneous mounts.
  */
 static int
@@ -773,6 +785,7 @@
 	get_nilfs(nilfs);
 	sbi->s_nilfs = nilfs;
 	sbi->s_super = sb;
+	atomic_set(&sbi->s_count, 1);
 
 	err = init_nilfs(nilfs, sbi, (char *)data);
 	if (err)
@@ -870,6 +883,11 @@
 		goto failed_root;
 	}
 
+	down_write(&nilfs->ns_super_sem);
+	if (!nilfs_test_opt(sbi, SNAPSHOT))
+		nilfs->ns_current = sbi;
+	up_write(&nilfs->ns_super_sem);
+
 	return 0;
 
  failed_root:
@@ -885,7 +903,7 @@
  failed_sbi:
 	put_nilfs(nilfs);
 	sb->s_fs_info = NULL;
-	kfree(sbi);
+	nilfs_put_sbinfo(sbi);
 	return err;
 }
 
@@ -898,6 +916,9 @@
 	struct nilfs_mount_options old_opts;
 	int err;
 
+	lock_kernel();
+
+	down_write(&nilfs->ns_super_sem);
 	old_sb_flags = sb->s_flags;
 	old_opts.mount_opt = sbi->s_mount_opt;
 	old_opts.snapshot_cno = sbi->s_snapshot_cno;
@@ -945,14 +966,12 @@
 		 * store the current valid flag.  (It may have been changed
 		 * by fsck since we originally mounted the partition.)
 		 */
-		down(&sb->s_bdev->bd_mount_sem);
-		/* Check existing RW-mount */
-		if (test_exclusive_mount(sb->s_type, sb->s_bdev, 0)) {
+		if (nilfs->ns_current && nilfs->ns_current != sbi) {
 			printk(KERN_WARNING "NILFS (device %s): couldn't "
-			       "remount because a RW-mount exists.\n",
+			       "remount because an RW-mount exists.\n",
 			       sb->s_id);
 			err = -EBUSY;
-			goto rw_remount_failed;
+			goto restore_opts;
 		}
 		if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) {
 			printk(KERN_WARNING "NILFS (device %s): couldn't "
@@ -960,7 +979,7 @@
 			       "the latest one.\n",
 			       sb->s_id);
 			err = -EINVAL;
-			goto rw_remount_failed;
+			goto restore_opts;
 		}
 		sb->s_flags &= ~MS_RDONLY;
 		nilfs_clear_opt(sbi, SNAPSHOT);
@@ -968,28 +987,31 @@
 
 		err = nilfs_attach_segment_constructor(sbi);
 		if (err)
-			goto rw_remount_failed;
+			goto restore_opts;
 
 		down_write(&nilfs->ns_sem);
 		nilfs_setup_super(sbi);
 		up_write(&nilfs->ns_sem);
 
-		up(&sb->s_bdev->bd_mount_sem);
+		nilfs->ns_current = sbi;
 	}
  out:
+	up_write(&nilfs->ns_super_sem);
+	unlock_kernel();
 	return 0;
 
- rw_remount_failed:
-	up(&sb->s_bdev->bd_mount_sem);
  restore_opts:
 	sb->s_flags = old_sb_flags;
 	sbi->s_mount_opt = old_opts.mount_opt;
 	sbi->s_snapshot_cno = old_opts.snapshot_cno;
+	up_write(&nilfs->ns_super_sem);
+	unlock_kernel();
 	return err;
 }
 
 struct nilfs_super_data {
 	struct block_device *bdev;
+	struct nilfs_sb_info *sbi;
 	__u64 cno;
 	int flags;
 };
@@ -1048,33 +1070,7 @@
 {
 	struct nilfs_super_data *sd = data;
 
-	return s->s_bdev == sd->bdev;
-}
-
-static int nilfs_test_bdev_super2(struct super_block *s, void *data)
-{
-	struct nilfs_super_data *sd = data;
-	int ret;
-
-	if (s->s_bdev != sd->bdev)
-		return 0;
-
-	if (!((s->s_flags | sd->flags) & MS_RDONLY))
-		return 1; /* Reuse an old R/W-mode super_block */
-
-	if (s->s_flags & sd->flags & MS_RDONLY) {
-		if (down_read_trylock(&s->s_umount)) {
-			ret = s->s_root &&
-				(sd->cno == NILFS_SB(s)->s_snapshot_cno);
-			up_read(&s->s_umount);
-			/*
-			 * This path is locked with sb_lock by sget().
-			 * So, drop_super() causes deadlock.
-			 */
-			return ret;
-		}
-	}
-	return 0;
+	return sd->sbi && s->s_fs_info == (void *)sd->sbi;
 }
 
 static int
@@ -1082,8 +1078,8 @@
 	     const char *dev_name, void *data, struct vfsmount *mnt)
 {
 	struct nilfs_super_data sd;
-	struct super_block *s, *s2;
-	struct the_nilfs *nilfs = NULL;
+	struct super_block *s;
+	struct the_nilfs *nilfs;
 	int err, need_to_close = 1;
 
 	sd.bdev = open_bdev_exclusive(dev_name, flags, fs_type);
@@ -1095,7 +1091,6 @@
 	 * much more information than normal filesystems to identify mount
 	 * instance.  For snapshot mounts, not only a mount type (ro-mount
 	 * or rw-mount) but also a checkpoint number is required.
-	 * The results are passed in sget() using nilfs_super_data.
 	 */
 	sd.cno = 0;
 	sd.flags = flags;
@@ -1104,64 +1099,59 @@
 		goto failed;
 	}
 
-	/*
-	 * once the super is inserted into the list by sget, s_umount
-	 * will protect the lockfs code from trying to start a snapshot
-	 * while we are mounting
-	 */
-	down(&sd.bdev->bd_mount_sem);
-	if (!sd.cno &&
-	    (err = test_exclusive_mount(fs_type, sd.bdev, flags ^ MS_RDONLY))) {
-		err = (err < 0) ? : -EBUSY;
-		goto failed_unlock;
+	nilfs = find_or_create_nilfs(sd.bdev);
+	if (!nilfs) {
+		err = -ENOMEM;
+		goto failed;
+	}
+
+	mutex_lock(&nilfs->ns_mount_mutex);
+
+	if (!sd.cno) {
+		/*
+		 * Check if an exclusive mount exists or not.
+		 * Snapshot mounts coexist with a current mount
+		 * (i.e. rw-mount or ro-mount), whereas rw-mount and
+		 * ro-mount are mutually exclusive.
+		 */
+		down_read(&nilfs->ns_super_sem);
+		if (nilfs->ns_current &&
+		    ((nilfs->ns_current->s_super->s_flags ^ flags)
+		     & MS_RDONLY)) {
+			up_read(&nilfs->ns_super_sem);
+			err = -EBUSY;
+			goto failed_unlock;
+		}
+		up_read(&nilfs->ns_super_sem);
 	}
 
 	/*
-	 * Phase-1: search any existent instance and get the_nilfs
+	 * Find existing nilfs_sb_info struct
+	 */
+	sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno);
+
+	if (!sd.cno)
+		/* trying to get the latest checkpoint.  */
+		sd.cno = nilfs_last_cno(nilfs);
+
+	/*
+	 * Get super block instance holding the nilfs_sb_info struct.
+	 * A new instance is allocated if no existing mount is present or
+	 * existing instance has been unmounted.
 	 */
 	s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd);
-	if (IS_ERR(s))
-		goto error_s;
+	if (sd.sbi)
+		nilfs_put_sbinfo(sd.sbi);
 
-	if (!s->s_root) {
-		err = -ENOMEM;
-		nilfs = alloc_nilfs(sd.bdev);
-		if (!nilfs)
-			goto cancel_new;
-	} else {
-		struct nilfs_sb_info *sbi = NILFS_SB(s);
-
-		/*
-		 * s_umount protects super_block from unmount process;
-		 * It covers pointers of nilfs_sb_info and the_nilfs.
-		 */
-		nilfs = sbi->s_nilfs;
-		get_nilfs(nilfs);
-		up_write(&s->s_umount);
-
-		/*
-		 * Phase-2: search specified snapshot or R/W mode super_block
-		 */
-		if (!sd.cno)
-			/* trying to get the latest checkpoint.  */
-			sd.cno = nilfs_last_cno(nilfs);
-
-		s2 = sget(fs_type, nilfs_test_bdev_super2,
-			  nilfs_set_bdev_super, &sd);
-		deactivate_super(s);
-		/*
-		 * Although deactivate_super() invokes close_bdev_exclusive() at
-		 * kill_block_super().  Here, s is an existent mount; we need
-		 * one more close_bdev_exclusive() call.
-		 */
-		s = s2;
-		if (IS_ERR(s))
-			goto error_s;
+	if (IS_ERR(s)) {
+		err = PTR_ERR(s);
+		goto failed_unlock;
 	}
 
 	if (!s->s_root) {
 		char b[BDEVNAME_SIZE];
 
+		/* New superblock instance created */
 		s->s_flags = flags;
 		strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id));
 		sb_set_blocksize(s, block_size(sd.bdev));
@@ -1172,26 +1162,18 @@
 
 		s->s_flags |= MS_ACTIVE;
 		need_to_close = 0;
-	} else if (!(s->s_flags & MS_RDONLY)) {
-		err = -EBUSY;
 	}
 
-	up(&sd.bdev->bd_mount_sem);
+	mutex_unlock(&nilfs->ns_mount_mutex);
 	put_nilfs(nilfs);
 	if (need_to_close)
 		close_bdev_exclusive(sd.bdev, flags);
 	simple_set_mnt(mnt, s);
 	return 0;
 
- error_s:
-	up(&sd.bdev->bd_mount_sem);
-	if (nilfs)
-		put_nilfs(nilfs);
-	close_bdev_exclusive(sd.bdev, flags);
-	return PTR_ERR(s);
-
  failed_unlock:
-	up(&sd.bdev->bd_mount_sem);
+	mutex_unlock(&nilfs->ns_mount_mutex);
+	put_nilfs(nilfs);
  failed:
 	close_bdev_exclusive(sd.bdev, flags);
 
@@ -1199,70 +1181,18 @@
 
  cancel_new:
 	/* Abandoning the newly allocated superblock */
-	up(&sd.bdev->bd_mount_sem);
-	if (nilfs)
-		put_nilfs(nilfs);
+	mutex_unlock(&nilfs->ns_mount_mutex);
+	put_nilfs(nilfs);
 	up_write(&s->s_umount);
 	deactivate_super(s);
 	/*
 	 * deactivate_super() invokes close_bdev_exclusive().
 	 * We must finish all post-cleaning before this call;
-	 * put_nilfs() and unlocking bd_mount_sem need the block device.
+	 * put_nilfs() needs the block device.
 	 */
 	return err;
 }
 
-static int nilfs_test_bdev_super3(struct super_block *s, void *data)
-{
-	struct nilfs_super_data *sd = data;
-	int ret;
-
-	if (s->s_bdev != sd->bdev)
-		return 0;
-	if (down_read_trylock(&s->s_umount)) {
-		ret = (s->s_flags & MS_RDONLY) && s->s_root &&
-			nilfs_test_opt(NILFS_SB(s), SNAPSHOT);
-		up_read(&s->s_umount);
-		if (ret)
-			return 0; /* ignore snapshot mounts */
-	}
-	return !((sd->flags ^ s->s_flags) & MS_RDONLY);
-}
-
-static int __false_bdev_super(struct super_block *s, void *data)
-{
-#if 0 /* XXX: workaround for lock debug. This is not good idea */
-	up_write(&s->s_umount);
-#endif
-	return -EFAULT;
-}
-
-/**
- * test_exclusive_mount - check whether an exclusive RW/RO mount exists or not.
- * fs_type: filesystem type
- * bdev: block device
- * flag: 0 (check rw-mount) or MS_RDONLY (check ro-mount)
- * res: pointer to an integer to store result
- *
- * This function must be called within a section protected by bd_mount_mutex.
- */
-static int test_exclusive_mount(struct file_system_type *fs_type,
-				struct block_device *bdev, int flags)
-{
-	struct super_block *s;
-	struct nilfs_super_data sd = { .flags = flags, .bdev = bdev };
-
-	s = sget(fs_type, nilfs_test_bdev_super3, __false_bdev_super, &sd);
-	if (IS_ERR(s)) {
-		if (PTR_ERR(s) != -EFAULT)
-			return PTR_ERR(s);
-		return 0;  /* Not found */
-	}
-	up_write(&s->s_umount);
-	deactivate_super(s);
-	return 1;  /* Found */
-}
-
 struct file_system_type nilfs_fs_type = {
 	.owner    = THIS_MODULE,
 	.name     = "nilfs2",
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 7f65b3b..e4e5c78 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -35,6 +35,10 @@
 #include "seglist.h"
 #include "segbuf.h"
 
+
+static LIST_HEAD(nilfs_objects);
+static DEFINE_SPINLOCK(nilfs_lock);
+
 void nilfs_set_last_segment(struct the_nilfs *nilfs,
 			    sector_t start_blocknr, u64 seq, __u64 cno)
 {
@@ -55,7 +59,7 @@
  * Return Value: On success, pointer to the_nilfs is returned.
  * On error, NULL is returned.
  */
-struct the_nilfs *alloc_nilfs(struct block_device *bdev)
+static struct the_nilfs *alloc_nilfs(struct block_device *bdev)
 {
 	struct the_nilfs *nilfs;
 
@@ -68,7 +72,10 @@
 	atomic_set(&nilfs->ns_writer_refcount, -1);
 	atomic_set(&nilfs->ns_ndirtyblks, 0);
 	init_rwsem(&nilfs->ns_sem);
+	init_rwsem(&nilfs->ns_super_sem);
+	mutex_init(&nilfs->ns_mount_mutex);
 	mutex_init(&nilfs->ns_writer_mutex);
+	INIT_LIST_HEAD(&nilfs->ns_list);
 	INIT_LIST_HEAD(&nilfs->ns_supers);
 	spin_lock_init(&nilfs->ns_last_segment_lock);
 	nilfs->ns_gc_inodes_h = NULL;
@@ -78,6 +85,45 @@
 }
 
 /**
+ * find_or_create_nilfs - find or create nilfs object
+ * @bdev: block device to which the_nilfs is related
+ *
+ * find_nilfs() looks up an existent nilfs object created on the
+ * device and gets the reference count of the object.  If no nilfs object
+ * is found on the device, a new nilfs object is allocated.
+ *
+ * Return Value: On success, pointer to the nilfs object is returned.
+ * On error, NULL is returned.
+ */
+struct the_nilfs *find_or_create_nilfs(struct block_device *bdev)
+{
+	struct the_nilfs *nilfs, *new = NULL;
+
+ retry:
+	spin_lock(&nilfs_lock);
+	list_for_each_entry(nilfs, &nilfs_objects, ns_list) {
+		if (nilfs->ns_bdev == bdev) {
+			get_nilfs(nilfs);
+			spin_unlock(&nilfs_lock);
+			if (new)
+				put_nilfs(new);
+			return nilfs; /* existing object */
+		}
+	}
+	if (new) {
+		list_add_tail(&new->ns_list, &nilfs_objects);
+		spin_unlock(&nilfs_lock);
+		return new; /* new object */
+	}
+	spin_unlock(&nilfs_lock);
+
+	new = alloc_nilfs(bdev);
+	if (new)
+		goto retry;
+	return NULL; /* insufficient memory */
+}
+
+/**
  * put_nilfs - release a reference to the_nilfs
  * @nilfs: the_nilfs structure to be released
  *
@@ -86,13 +132,20 @@
  */
 void put_nilfs(struct the_nilfs *nilfs)
 {
-	if (!atomic_dec_and_test(&nilfs->ns_count))
+	spin_lock(&nilfs_lock);
+	if (!atomic_dec_and_test(&nilfs->ns_count)) {
+		spin_unlock(&nilfs_lock);
 		return;
+	}
+	list_del_init(&nilfs->ns_list);
+	spin_unlock(&nilfs_lock);
+
 	/*
-	 * Increment of ns_count never occur below because the caller
+	 * Increment of ns_count never occurs below because the caller
 	 * of get_nilfs() holds at least one reference to the_nilfs.
 	 * Thus its exclusion control is not required here.
 	 */
+
 	might_sleep();
 	if (nilfs_loaded(nilfs)) {
 		nilfs_mdt_clear(nilfs->ns_sufile);
@@ -515,7 +568,7 @@
 
 	blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size);
 	if (sb->s_blocksize != blocksize) {
-		int hw_blocksize = bdev_hardsect_size(sb->s_bdev);
+		int hw_blocksize = bdev_logical_block_size(sb->s_bdev);
 
 		if (blocksize < hw_blocksize) {
 			printk(KERN_ERR
@@ -613,13 +666,63 @@
 	return ret;
 }
 
+/**
+ * nilfs_find_sbinfo - find existing nilfs_sb_info structure
+ * @nilfs: nilfs object
+ * @rw_mount: mount type (non-zero value for read/write mount)
+ * @cno: checkpoint number (zero for read-only mount)
+ *
+ * nilfs_find_sbinfo() returns the nilfs_sb_info structure which
+ * @rw_mount and @cno (in case of snapshots) matched.  If no instance
+ * was found, NULL is returned.  Although the super block instance can
+ * be unmounted after this function returns, the nilfs_sb_info struct
+ * is kept on memory until nilfs_put_sbinfo() is called.
+ */
+struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *nilfs,
+					int rw_mount, __u64 cno)
+{
+	struct nilfs_sb_info *sbi;
+
+	down_read(&nilfs->ns_super_sem);
+	/*
+	 * The SNAPSHOT flag and sb->s_flags are supposed to be
+	 * protected with nilfs->ns_super_sem.
+	 */
+	sbi = nilfs->ns_current;
+	if (rw_mount) {
+		if (sbi && !(sbi->s_super->s_flags & MS_RDONLY))
+			goto found; /* read/write mount */
+		else
+			goto out;
+	} else if (cno == 0) {
+		if (sbi && (sbi->s_super->s_flags & MS_RDONLY))
+			goto found; /* read-only mount */
+		else
+			goto out;
+	}
+
+	list_for_each_entry(sbi, &nilfs->ns_supers, s_list) {
+		if (nilfs_test_opt(sbi, SNAPSHOT) &&
+		    sbi->s_snapshot_cno == cno)
+			goto found; /* snapshot mount */
+	}
+ out:
+	up_read(&nilfs->ns_super_sem);
+	return NULL;
+
+ found:
+	atomic_inc(&sbi->s_count);
+	up_read(&nilfs->ns_super_sem);
+	return sbi;
+}
+
 int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno,
 				int snapshot_mount)
 {
 	struct nilfs_sb_info *sbi;
 	int ret = 0;
 
-	down_read(&nilfs->ns_sem);
+	down_read(&nilfs->ns_super_sem);
 	if (cno == 0 || cno > nilfs->ns_cno)
 		goto out_unlock;
 
@@ -636,6 +739,6 @@
 		ret++;
 
  out_unlock:
-	up_read(&nilfs->ns_sem);
+	up_read(&nilfs->ns_super_sem);
 	return ret;
 }
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 30fe587..e8adbff 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -43,12 +43,16 @@
  * struct the_nilfs - struct to supervise multiple nilfs mount points
  * @ns_flags: flags
  * @ns_count: reference count
+ * @ns_list: list head for nilfs_list
  * @ns_bdev: block device
  * @ns_bdi: backing dev info
  * @ns_writer: back pointer to writable nilfs_sb_info
  * @ns_sem: semaphore for shared states
+ * @ns_super_sem: semaphore for global operations across super block instances
+ * @ns_mount_mutex: mutex protecting mount process of nilfs
  * @ns_writer_mutex: mutex protecting ns_writer attach/detach
  * @ns_writer_refcount: number of referrers on ns_writer
+ * @ns_current: back pointer to current mount
  * @ns_sbh: buffer heads of on-disk super blocks
  * @ns_sbp: pointers to super block data
  * @ns_sbwtime: previous write time of super blocks
@@ -88,15 +92,24 @@
 struct the_nilfs {
 	unsigned long		ns_flags;
 	atomic_t		ns_count;
+	struct list_head	ns_list;
 
 	struct block_device    *ns_bdev;
 	struct backing_dev_info *ns_bdi;
 	struct nilfs_sb_info   *ns_writer;
 	struct rw_semaphore	ns_sem;
+	struct rw_semaphore	ns_super_sem;
+	struct mutex		ns_mount_mutex;
 	struct mutex		ns_writer_mutex;
 	atomic_t		ns_writer_refcount;
 
 	/*
+	 * components protected by ns_super_sem
+	 */
+	struct nilfs_sb_info   *ns_current;
+	struct list_head	ns_supers;
+
+	/*
 	 * used for
 	 * - loading the latest checkpoint exclusively.
 	 * - allocating a new full segment.
@@ -108,7 +121,6 @@
 	time_t			ns_sbwtime[2];
 	unsigned		ns_sbsize;
 	unsigned		ns_mount_state;
-	struct list_head	ns_supers;
 
 	/*
 	 * Following fields are dedicated to a writable FS-instance.
@@ -191,11 +203,12 @@
 #define NILFS_ALTSB_FREQ	60  /* spare superblock */
 
 void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
-struct the_nilfs *alloc_nilfs(struct block_device *);
+struct the_nilfs *find_or_create_nilfs(struct block_device *);
 void put_nilfs(struct the_nilfs *);
 int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *);
 int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *);
 int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
+struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64);
 int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int);
 int nilfs_near_disk_full(struct the_nilfs *);
 void nilfs_fall_back_super_block(struct the_nilfs *);
@@ -238,6 +251,12 @@
 	mutex_unlock(&nilfs->ns_writer_mutex);
 }
 
+static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi)
+{
+	if (!atomic_dec_and_test(&sbi->s_count))
+		kfree(sbi);
+}
+
 static inline void
 nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum,
 			sector_t *seg_start, sector_t *seg_end)
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index 50914d7..31dac7e 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -1,2 +1,15 @@
+config FSNOTIFY
+	bool "Filesystem notification backend"
+	default y
+	---help---
+	   fsnotify is a backend for filesystem notification.  fsnotify does
+	   not provide any userspace interface but does provide the basis
+	   needed for other notification schemes such as dnotify, inotify,
+	   and fanotify.
+
+	   Say Y here to enable fsnotify suport.
+
+	   If unsure, say Y.
+
 source "fs/notify/dnotify/Kconfig"
 source "fs/notify/inotify/Kconfig"
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index 5a95b60..0922cc8 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile
@@ -1,2 +1,4 @@
+obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o inode_mark.o
+
 obj-y			+= dnotify/
 obj-y			+= inotify/
diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig
index 26adf5d..904ff8d 100644
--- a/fs/notify/dnotify/Kconfig
+++ b/fs/notify/dnotify/Kconfig
@@ -1,5 +1,6 @@
 config DNOTIFY
 	bool "Dnotify support"
+	depends on FSNOTIFY
 	default y
 	help
 	  Dnotify is a directory-based per-fd file change notification system
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index b0aa2cd..828a889 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -3,6 +3,9 @@
  *
  * Copyright (C) 2000,2001,2002 Stephen Rothwell
  *
+ * Copyright (C) 2009 Eric Paris <Red Hat Inc>
+ * dnotify was largly rewritten to use the new fsnotify infrastructure
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
  * Free Software Foundation; either version 2, or (at your option) any
@@ -21,170 +24,417 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/fdtable.h>
+#include <linux/fsnotify_backend.h>
 
 int dir_notify_enable __read_mostly = 1;
 
-static struct kmem_cache *dn_cache __read_mostly;
+static struct kmem_cache *dnotify_struct_cache __read_mostly;
+static struct kmem_cache *dnotify_mark_entry_cache __read_mostly;
+static struct fsnotify_group *dnotify_group __read_mostly;
+static DEFINE_MUTEX(dnotify_mark_mutex);
 
-static void redo_inode_mask(struct inode *inode)
-{
-	unsigned long new_mask;
+/*
+ * dnotify will attach one of these to each inode (i_fsnotify_mark_entries) which
+ * is being watched by dnotify.  If multiple userspace applications are watching
+ * the same directory with dnotify their information is chained in dn
+ */
+struct dnotify_mark_entry {
+	struct fsnotify_mark_entry fsn_entry;
 	struct dnotify_struct *dn;
+};
 
+/*
+ * When a process starts or stops watching an inode the set of events which
+ * dnotify cares about for that inode may change.  This function runs the
+ * list of everything receiving dnotify events about this directory and calculates
+ * the set of all those events.  After it updates what dnotify is interested in
+ * it calls the fsnotify function so it can update the set of all events relevant
+ * to this inode.
+ */
+static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
+{
+	__u32 new_mask, old_mask;
+	struct dnotify_struct *dn;
+	struct dnotify_mark_entry *dnentry  = container_of(entry,
+							   struct dnotify_mark_entry,
+							   fsn_entry);
+
+	assert_spin_locked(&entry->lock);
+
+	old_mask = entry->mask;
 	new_mask = 0;
-	for (dn = inode->i_dnotify; dn != NULL; dn = dn->dn_next)
-		new_mask |= dn->dn_mask & ~DN_MULTISHOT;
-	inode->i_dnotify_mask = new_mask;
-}
+	for (dn = dnentry->dn; dn != NULL; dn = dn->dn_next)
+		new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
+	entry->mask = new_mask;
 
-void dnotify_flush(struct file *filp, fl_owner_t id)
-{
-	struct dnotify_struct *dn;
-	struct dnotify_struct **prev;
-	struct inode *inode;
-
-	inode = filp->f_path.dentry->d_inode;
-	if (!S_ISDIR(inode->i_mode))
+	if (old_mask == new_mask)
 		return;
-	spin_lock(&inode->i_lock);
-	prev = &inode->i_dnotify;
-	while ((dn = *prev) != NULL) {
-		if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
-			*prev = dn->dn_next;
-			redo_inode_mask(inode);
-			kmem_cache_free(dn_cache, dn);
-			break;
-		}
-		prev = &dn->dn_next;
-	}
-	spin_unlock(&inode->i_lock);
+
+	if (entry->inode)
+		fsnotify_recalc_inode_mask(entry->inode);
 }
 
-int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
+/*
+ * Mains fsnotify call where events are delivered to dnotify.
+ * Find the dnotify mark on the relevant inode, run the list of dnotify structs
+ * on that mark and determine which of them has expressed interest in receiving
+ * events of this type.  When found send the correct process and signal and
+ * destroy the dnotify struct if it was not registered to receive multiple
+ * events.
+ */
+static int dnotify_handle_event(struct fsnotify_group *group,
+				struct fsnotify_event *event)
 {
+	struct fsnotify_mark_entry *entry = NULL;
+	struct dnotify_mark_entry *dnentry;
+	struct inode *to_tell;
 	struct dnotify_struct *dn;
-	struct dnotify_struct *odn;
 	struct dnotify_struct **prev;
-	struct inode *inode;
-	fl_owner_t id = current->files;
-	struct file *f;
-	int error = 0;
+	struct fown_struct *fown;
 
-	if ((arg & ~DN_MULTISHOT) == 0) {
-		dnotify_flush(filp, id);
+	to_tell = event->to_tell;
+
+	spin_lock(&to_tell->i_lock);
+	entry = fsnotify_find_mark_entry(group, to_tell);
+	spin_unlock(&to_tell->i_lock);
+
+	/* unlikely since we alreay passed dnotify_should_send_event() */
+	if (unlikely(!entry))
 		return 0;
-	}
-	if (!dir_notify_enable)
-		return -EINVAL;
-	inode = filp->f_path.dentry->d_inode;
-	if (!S_ISDIR(inode->i_mode))
-		return -ENOTDIR;
-	dn = kmem_cache_alloc(dn_cache, GFP_KERNEL);
-	if (dn == NULL)
-		return -ENOMEM;
-	spin_lock(&inode->i_lock);
-	prev = &inode->i_dnotify;
-	while ((odn = *prev) != NULL) {
-		if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
-			odn->dn_fd = fd;
-			odn->dn_mask |= arg;
-			inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
-			goto out_free;
-		}
-		prev = &odn->dn_next;
-	}
+	dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
 
-	rcu_read_lock();
-	f = fcheck(fd);
-	rcu_read_unlock();
-	/* we'd lost the race with close(), sod off silently */
-	/* note that inode->i_lock prevents reordering problems
-	 * between accesses to descriptor table and ->i_dnotify */
-	if (f != filp)
-		goto out_free;
-
-	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
-	if (error)
-		goto out_free;
-
-	dn->dn_mask = arg;
-	dn->dn_fd = fd;
-	dn->dn_filp = filp;
-	dn->dn_owner = id;
-	inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
-	dn->dn_next = inode->i_dnotify;
-	inode->i_dnotify = dn;
-	spin_unlock(&inode->i_lock);
-	return 0;
-
-out_free:
-	spin_unlock(&inode->i_lock);
-	kmem_cache_free(dn_cache, dn);
-	return error;
-}
-
-void __inode_dir_notify(struct inode *inode, unsigned long event)
-{
-	struct dnotify_struct *	dn;
-	struct dnotify_struct **prev;
-	struct fown_struct *	fown;
-	int			changed = 0;
-
-	spin_lock(&inode->i_lock);
-	prev = &inode->i_dnotify;
+	spin_lock(&entry->lock);
+	prev = &dnentry->dn;
 	while ((dn = *prev) != NULL) {
-		if ((dn->dn_mask & event) == 0) {
+		if ((dn->dn_mask & event->mask) == 0) {
 			prev = &dn->dn_next;
 			continue;
 		}
 		fown = &dn->dn_filp->f_owner;
 		send_sigio(fown, dn->dn_fd, POLL_MSG);
-		if (dn->dn_mask & DN_MULTISHOT)
+		if (dn->dn_mask & FS_DN_MULTISHOT)
 			prev = &dn->dn_next;
 		else {
 			*prev = dn->dn_next;
-			changed = 1;
-			kmem_cache_free(dn_cache, dn);
+			kmem_cache_free(dnotify_struct_cache, dn);
+			dnotify_recalc_inode_mask(entry);
 		}
 	}
-	if (changed)
-		redo_inode_mask(inode);
-	spin_unlock(&inode->i_lock);
-}
 
-EXPORT_SYMBOL(__inode_dir_notify);
+	spin_unlock(&entry->lock);
+	fsnotify_put_mark(entry);
+
+	return 0;
+}
 
 /*
- * This is hopelessly wrong, but unfixable without API changes.  At
- * least it doesn't oops the kernel...
- *
- * To safely access ->d_parent we need to keep d_move away from it.  Use the
- * dentry's d_lock for this.
+ * Given an inode and mask determine if dnotify would be interested in sending
+ * userspace notification for that pair.
  */
-void dnotify_parent(struct dentry *dentry, unsigned long event)
+static bool dnotify_should_send_event(struct fsnotify_group *group,
+				      struct inode *inode, __u32 mask)
 {
-	struct dentry *parent;
+	struct fsnotify_mark_entry *entry;
+	bool send;
 
+	/* !dir_notify_enable should never get here, don't waste time checking
 	if (!dir_notify_enable)
+		return 0; */
+
+	/* not a dir, dnotify doesn't care */
+	if (!S_ISDIR(inode->i_mode))
+		return false;
+
+	spin_lock(&inode->i_lock);
+	entry = fsnotify_find_mark_entry(group, inode);
+	spin_unlock(&inode->i_lock);
+
+	/* no mark means no dnotify watch */
+	if (!entry)
+		return false;
+
+	mask = (mask & ~FS_EVENT_ON_CHILD);
+	send = (mask & entry->mask);
+
+	fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */
+
+	return send;
+}
+
+static void dnotify_free_mark(struct fsnotify_mark_entry *entry)
+{
+	struct dnotify_mark_entry *dnentry = container_of(entry,
+							  struct dnotify_mark_entry,
+							  fsn_entry);
+
+	BUG_ON(dnentry->dn);
+
+	kmem_cache_free(dnotify_mark_entry_cache, dnentry);
+}
+
+static struct fsnotify_ops dnotify_fsnotify_ops = {
+	.handle_event = dnotify_handle_event,
+	.should_send_event = dnotify_should_send_event,
+	.free_group_priv = NULL,
+	.freeing_mark = NULL,
+	.free_event_priv = NULL,
+};
+
+/*
+ * Called every time a file is closed.  Looks first for a dnotify mark on the
+ * inode.  If one is found run all of the ->dn entries attached to that
+ * mark for one relevant to this process closing the file and remove that
+ * dnotify_struct.  If that was the last dnotify_struct also remove the
+ * fsnotify_mark_entry.
+ */
+void dnotify_flush(struct file *filp, fl_owner_t id)
+{
+	struct fsnotify_mark_entry *entry;
+	struct dnotify_mark_entry *dnentry;
+	struct dnotify_struct *dn;
+	struct dnotify_struct **prev;
+	struct inode *inode;
+
+	inode = filp->f_path.dentry->d_inode;
+	if (!S_ISDIR(inode->i_mode))
 		return;
 
-	spin_lock(&dentry->d_lock);
-	parent = dentry->d_parent;
-	if (parent->d_inode->i_dnotify_mask & event) {
-		dget(parent);
-		spin_unlock(&dentry->d_lock);
-		__inode_dir_notify(parent->d_inode, event);
-		dput(parent);
-	} else {
-		spin_unlock(&dentry->d_lock);
+	spin_lock(&inode->i_lock);
+	entry = fsnotify_find_mark_entry(dnotify_group, inode);
+	spin_unlock(&inode->i_lock);
+	if (!entry)
+		return;
+	dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
+
+	mutex_lock(&dnotify_mark_mutex);
+
+	spin_lock(&entry->lock);
+	prev = &dnentry->dn;
+	while ((dn = *prev) != NULL) {
+		if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
+			*prev = dn->dn_next;
+			kmem_cache_free(dnotify_struct_cache, dn);
+			dnotify_recalc_inode_mask(entry);
+			break;
+		}
+		prev = &dn->dn_next;
 	}
+
+	spin_unlock(&entry->lock);
+
+	/* nothing else could have found us thanks to the dnotify_mark_mutex */
+	if (dnentry->dn == NULL)
+		fsnotify_destroy_mark_by_entry(entry);
+
+	fsnotify_recalc_group_mask(dnotify_group);
+
+	mutex_unlock(&dnotify_mark_mutex);
+
+	fsnotify_put_mark(entry);
 }
-EXPORT_SYMBOL_GPL(dnotify_parent);
+
+/* this conversion is done only at watch creation */
+static __u32 convert_arg(unsigned long arg)
+{
+	__u32 new_mask = FS_EVENT_ON_CHILD;
+
+	if (arg & DN_MULTISHOT)
+		new_mask |= FS_DN_MULTISHOT;
+	if (arg & DN_DELETE)
+		new_mask |= (FS_DELETE | FS_MOVED_FROM);
+	if (arg & DN_MODIFY)
+		new_mask |= FS_MODIFY;
+	if (arg & DN_ACCESS)
+		new_mask |= FS_ACCESS;
+	if (arg & DN_ATTRIB)
+		new_mask |= FS_ATTRIB;
+	if (arg & DN_RENAME)
+		new_mask |= FS_DN_RENAME;
+	if (arg & DN_CREATE)
+		new_mask |= (FS_CREATE | FS_MOVED_TO);
+
+	return new_mask;
+}
+
+/*
+ * If multiple processes watch the same inode with dnotify there is only one
+ * dnotify mark in inode->i_fsnotify_mark_entries but we chain a dnotify_struct
+ * onto that mark.  This function either attaches the new dnotify_struct onto
+ * that list, or it |= the mask onto an existing dnofiy_struct.
+ */
+static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnentry,
+		     fl_owner_t id, int fd, struct file *filp, __u32 mask)
+{
+	struct dnotify_struct *odn;
+
+	odn = dnentry->dn;
+	while (odn != NULL) {
+		/* adding more events to existing dnofiy_struct? */
+		if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
+			odn->dn_fd = fd;
+			odn->dn_mask |= mask;
+			return -EEXIST;
+		}
+		odn = odn->dn_next;
+	}
+
+	dn->dn_mask = mask;
+	dn->dn_fd = fd;
+	dn->dn_filp = filp;
+	dn->dn_owner = id;
+	dn->dn_next = dnentry->dn;
+	dnentry->dn = dn;
+
+	return 0;
+}
+
+/*
+ * When a process calls fcntl to attach a dnotify watch to a directory it ends
+ * up here.  Allocate both a mark for fsnotify to add and a dnotify_struct to be
+ * attached to the fsnotify_mark.
+ */
+int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
+{
+	struct dnotify_mark_entry *new_dnentry, *dnentry;
+	struct fsnotify_mark_entry *new_entry, *entry;
+	struct dnotify_struct *dn;
+	struct inode *inode;
+	fl_owner_t id = current->files;
+	struct file *f;
+	int destroy = 0, error = 0;
+	__u32 mask;
+
+	/* we use these to tell if we need to kfree */
+	new_entry = NULL;
+	dn = NULL;
+
+	if (!dir_notify_enable) {
+		error = -EINVAL;
+		goto out_err;
+	}
+
+	/* a 0 mask means we are explicitly removing the watch */
+	if ((arg & ~DN_MULTISHOT) == 0) {
+		dnotify_flush(filp, id);
+		error = 0;
+		goto out_err;
+	}
+
+	/* dnotify only works on directories */
+	inode = filp->f_path.dentry->d_inode;
+	if (!S_ISDIR(inode->i_mode)) {
+		error = -ENOTDIR;
+		goto out_err;
+	}
+
+	/* expect most fcntl to add new rather than augment old */
+	dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL);
+	if (!dn) {
+		error = -ENOMEM;
+		goto out_err;
+	}
+
+	/* new fsnotify mark, we expect most fcntl calls to add a new mark */
+	new_dnentry = kmem_cache_alloc(dnotify_mark_entry_cache, GFP_KERNEL);
+	if (!new_dnentry) {
+		error = -ENOMEM;
+		goto out_err;
+	}
+
+	/* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
+	mask = convert_arg(arg);
+
+	/* set up the new_entry and new_dnentry */
+	new_entry = &new_dnentry->fsn_entry;
+	fsnotify_init_mark(new_entry, dnotify_free_mark);
+	new_entry->mask = mask;
+	new_dnentry->dn = NULL;
+
+	/* this is needed to prevent the fcntl/close race described below */
+	mutex_lock(&dnotify_mark_mutex);
+
+	/* add the new_entry or find an old one. */
+	spin_lock(&inode->i_lock);
+	entry = fsnotify_find_mark_entry(dnotify_group, inode);
+	spin_unlock(&inode->i_lock);
+	if (entry) {
+		dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
+		spin_lock(&entry->lock);
+	} else {
+		fsnotify_add_mark(new_entry, dnotify_group, inode);
+		spin_lock(&new_entry->lock);
+		entry = new_entry;
+		dnentry = new_dnentry;
+		/* we used new_entry, so don't free it */
+		new_entry = NULL;
+	}
+
+	rcu_read_lock();
+	f = fcheck(fd);
+	rcu_read_unlock();
+
+	/* if (f != filp) means that we lost a race and another task/thread
+	 * actually closed the fd we are still playing with before we grabbed
+	 * the dnotify_mark_mutex and entry->lock.  Since closing the fd is the
+	 * only time we clean up the mark entries we need to get our mark off
+	 * the list. */
+	if (f != filp) {
+		/* if we added ourselves, shoot ourselves, it's possible that
+		 * the flush actually did shoot this entry.  That's fine too
+		 * since multiple calls to destroy_mark is perfectly safe, if
+		 * we found a dnentry already attached to the inode, just sod
+		 * off silently as the flush at close time dealt with it.
+		 */
+		if (dnentry == new_dnentry)
+			destroy = 1;
+		goto out;
+	}
+
+	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+	if (error) {
+		/* if we added, we must shoot */
+		if (dnentry == new_dnentry)
+			destroy = 1;
+		goto out;
+	}
+
+	error = attach_dn(dn, dnentry, id, fd, filp, mask);
+	/* !error means that we attached the dn to the dnentry, so don't free it */
+	if (!error)
+		dn = NULL;
+	/* -EEXIST means that we didn't add this new dn and used an old one.
+	 * that isn't an error (and the unused dn should be freed) */
+	else if (error == -EEXIST)
+		error = 0;
+
+	dnotify_recalc_inode_mask(entry);
+out:
+	spin_unlock(&entry->lock);
+
+	if (destroy)
+		fsnotify_destroy_mark_by_entry(entry);
+
+	fsnotify_recalc_group_mask(dnotify_group);
+
+	mutex_unlock(&dnotify_mark_mutex);
+	fsnotify_put_mark(entry);
+out_err:
+	if (new_entry)
+		fsnotify_put_mark(new_entry);
+	if (dn)
+		kmem_cache_free(dnotify_struct_cache, dn);
+	return error;
+}
 
 static int __init dnotify_init(void)
 {
-	dn_cache = kmem_cache_create("dnotify_cache",
-		sizeof(struct dnotify_struct), 0, SLAB_PANIC, NULL);
+	dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
+	dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC);
+
+	dnotify_group = fsnotify_obtain_group(DNOTIFY_GROUP_NUM,
+					      0, &dnotify_fsnotify_ops);
+	if (IS_ERR(dnotify_group))
+		panic("unable to allocate fsnotify group for dnotify\n");
 	return 0;
 }
 
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
new file mode 100644
index 0000000..ec2f7bd
--- /dev/null
+++ b/fs/notify/fsnotify.c
@@ -0,0 +1,186 @@
+/*
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/srcu.h>
+
+#include <linux/fsnotify_backend.h>
+#include "fsnotify.h"
+
+/*
+ * Clear all of the marks on an inode when it is being evicted from core
+ */
+void __fsnotify_inode_delete(struct inode *inode)
+{
+	fsnotify_clear_marks_by_inode(inode);
+}
+EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
+
+/*
+ * Given an inode, first check if we care what happens to our children.  Inotify
+ * and dnotify both tell their parents about events.  If we care about any event
+ * on a child we run all of our children and set a dentry flag saying that the
+ * parent cares.  Thus when an event happens on a child it can quickly tell if
+ * if there is a need to find a parent and send the event to the parent.
+ */
+void __fsnotify_update_child_dentry_flags(struct inode *inode)
+{
+	struct dentry *alias;
+	int watched;
+
+	if (!S_ISDIR(inode->i_mode))
+		return;
+
+	/* determine if the children should tell inode about their events */
+	watched = fsnotify_inode_watches_children(inode);
+
+	spin_lock(&dcache_lock);
+	/* run all of the dentries associated with this inode.  Since this is a
+	 * directory, there damn well better only be one item on this list */
+	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+		struct dentry *child;
+
+		/* run all of the children of the original inode and fix their
+		 * d_flags to indicate parental interest (their parent is the
+		 * original inode) */
+		list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
+			if (!child->d_inode)
+				continue;
+
+			spin_lock(&child->d_lock);
+			if (watched)
+				child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
+			else
+				child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
+			spin_unlock(&child->d_lock);
+		}
+	}
+	spin_unlock(&dcache_lock);
+}
+
+/* Notify this dentry's parent about a child's events. */
+void __fsnotify_parent(struct dentry *dentry, __u32 mask)
+{
+	struct dentry *parent;
+	struct inode *p_inode;
+	bool send = false;
+	bool should_update_children = false;
+
+	if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
+		return;
+
+	spin_lock(&dentry->d_lock);
+	parent = dentry->d_parent;
+	p_inode = parent->d_inode;
+
+	if (fsnotify_inode_watches_children(p_inode)) {
+		if (p_inode->i_fsnotify_mask & mask) {
+			dget(parent);
+			send = true;
+		}
+	} else {
+		/*
+		 * The parent doesn't care about events on it's children but
+		 * at least one child thought it did.  We need to run all the
+		 * children and update their d_flags to let them know p_inode
+		 * doesn't care about them any more.
+		 */
+		dget(parent);
+		should_update_children = true;
+	}
+
+	spin_unlock(&dentry->d_lock);
+
+	if (send) {
+		/* we are notifying a parent so come up with the new mask which
+		 * specifies these are events which came from a child. */
+		mask |= FS_EVENT_ON_CHILD;
+
+		fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
+			 dentry->d_name.name, 0);
+		dput(parent);
+	}
+
+	if (unlikely(should_update_children)) {
+		__fsnotify_update_child_dentry_flags(p_inode);
+		dput(parent);
+	}
+}
+EXPORT_SYMBOL_GPL(__fsnotify_parent);
+
+/*
+ * This is the main call to fsnotify.  The VFS calls into hook specific functions
+ * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
+ * out to all of the registered fsnotify_group.  Those groups can then use the
+ * notification event in whatever means they feel necessary.
+ */
+void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name, u32 cookie)
+{
+	struct fsnotify_group *group;
+	struct fsnotify_event *event = NULL;
+	int idx;
+	/* global tests shouldn't care about events on child only the specific event */
+	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
+
+	if (list_empty(&fsnotify_groups))
+		return;
+
+	if (!(test_mask & fsnotify_mask))
+		return;
+
+	if (!(test_mask & to_tell->i_fsnotify_mask))
+		return;
+	/*
+	 * SRCU!!  the groups list is very very much read only and the path is
+	 * very hot.  The VAST majority of events are not going to need to do
+	 * anything other than walk the list so it's crazy to pre-allocate.
+	 */
+	idx = srcu_read_lock(&fsnotify_grp_srcu);
+	list_for_each_entry_rcu(group, &fsnotify_groups, group_list) {
+		if (test_mask & group->mask) {
+			if (!group->ops->should_send_event(group, to_tell, mask))
+				continue;
+			if (!event) {
+				event = fsnotify_create_event(to_tell, mask, data, data_is, file_name, cookie);
+				/* shit, we OOM'd and now we can't tell, maybe
+				 * someday someone else will want to do something
+				 * here */
+				if (!event)
+					break;
+			}
+			group->ops->handle_event(group, event);
+		}
+	}
+	srcu_read_unlock(&fsnotify_grp_srcu, idx);
+	/*
+	 * fsnotify_create_event() took a reference so the event can't be cleaned
+	 * up while we are still trying to add it to lists, drop that one.
+	 */
+	if (event)
+		fsnotify_put_event(event);
+}
+EXPORT_SYMBOL_GPL(fsnotify);
+
+static __init int fsnotify_init(void)
+{
+	return init_srcu_struct(&fsnotify_grp_srcu);
+}
+subsys_initcall(fsnotify_init);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
new file mode 100644
index 0000000..4dc2408
--- /dev/null
+++ b/fs/notify/fsnotify.h
@@ -0,0 +1,34 @@
+#ifndef __FS_NOTIFY_FSNOTIFY_H_
+#define __FS_NOTIFY_FSNOTIFY_H_
+
+#include <linux/list.h>
+#include <linux/fsnotify.h>
+#include <linux/srcu.h>
+#include <linux/types.h>
+
+/* protects reads of fsnotify_groups */
+extern struct srcu_struct fsnotify_grp_srcu;
+/* all groups which receive fsnotify events */
+extern struct list_head fsnotify_groups;
+/* all bitwise OR of all event types (FS_*) for all fsnotify_groups */
+extern __u32 fsnotify_mask;
+
+/* destroy all events sitting in this groups notification queue */
+extern void fsnotify_flush_notify(struct fsnotify_group *group);
+
+/* final kfree of a group */
+extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
+
+/* run the list of all marks associated with inode and flag them to be freed */
+extern void fsnotify_clear_marks_by_inode(struct inode *inode);
+/*
+ * update the dentry->d_flags of all of inode's children to indicate if inode cares
+ * about events that happen to its children.
+ */
+extern void __fsnotify_update_child_dentry_flags(struct inode *inode);
+
+/* allocate and destroy and event holder to attach events to notification/access queues */
+extern struct fsnotify_event_holder *fsnotify_alloc_event_holder(void);
+extern void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder);
+
+#endif	/* __FS_NOTIFY_FSNOTIFY_H_ */
diff --git a/fs/notify/group.c b/fs/notify/group.c
new file mode 100644
index 0000000..0e16771
--- /dev/null
+++ b/fs/notify/group.c
@@ -0,0 +1,254 @@
+/*
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/srcu.h>
+#include <linux/rculist.h>
+#include <linux/wait.h>
+
+#include <linux/fsnotify_backend.h>
+#include "fsnotify.h"
+
+#include <asm/atomic.h>
+
+/* protects writes to fsnotify_groups and fsnotify_mask */
+static DEFINE_MUTEX(fsnotify_grp_mutex);
+/* protects reads while running the fsnotify_groups list */
+struct srcu_struct fsnotify_grp_srcu;
+/* all groups registered to receive filesystem notifications */
+LIST_HEAD(fsnotify_groups);
+/* bitwise OR of all events (FS_*) interesting to some group on this system */
+__u32 fsnotify_mask;
+
+/*
+ * When a new group registers or changes it's set of interesting events
+ * this function updates the fsnotify_mask to contain all interesting events
+ */
+void fsnotify_recalc_global_mask(void)
+{
+	struct fsnotify_group *group;
+	__u32 mask = 0;
+	int idx;
+
+	idx = srcu_read_lock(&fsnotify_grp_srcu);
+	list_for_each_entry_rcu(group, &fsnotify_groups, group_list)
+		mask |= group->mask;
+	srcu_read_unlock(&fsnotify_grp_srcu, idx);
+	fsnotify_mask = mask;
+}
+
+/*
+ * Update the group->mask by running all of the marks associated with this
+ * group and finding the bitwise | of all of the mark->mask.  If we change
+ * the group->mask we need to update the global mask of events interesting
+ * to the system.
+ */
+void fsnotify_recalc_group_mask(struct fsnotify_group *group)
+{
+	__u32 mask = 0;
+	__u32 old_mask = group->mask;
+	struct fsnotify_mark_entry *entry;
+
+	spin_lock(&group->mark_lock);
+	list_for_each_entry(entry, &group->mark_entries, g_list)
+		mask |= entry->mask;
+	spin_unlock(&group->mark_lock);
+
+	group->mask = mask;
+
+	if (old_mask != mask)
+		fsnotify_recalc_global_mask();
+}
+
+/*
+ * Take a reference to a group so things found under the fsnotify_grp_mutex
+ * can't get freed under us
+ */
+static void fsnotify_get_group(struct fsnotify_group *group)
+{
+	atomic_inc(&group->refcnt);
+}
+
+/*
+ * Final freeing of a group
+ */
+void fsnotify_final_destroy_group(struct fsnotify_group *group)
+{
+	/* clear the notification queue of all events */
+	fsnotify_flush_notify(group);
+
+	if (group->ops->free_group_priv)
+		group->ops->free_group_priv(group);
+
+	kfree(group);
+}
+
+/*
+ * Trying to get rid of a group.  We need to first get rid of any outstanding
+ * allocations and then free the group.  Remember that fsnotify_clear_marks_by_group
+ * could miss marks that are being freed by inode and those marks could still
+ * hold a reference to this group (via group->num_marks)  If we get into that
+ * situtation, the fsnotify_final_destroy_group will get called when that final
+ * mark is freed.
+ */
+static void fsnotify_destroy_group(struct fsnotify_group *group)
+{
+	/* clear all inode mark entries for this group */
+	fsnotify_clear_marks_by_group(group);
+
+	/* past the point of no return, matches the initial value of 1 */
+	if (atomic_dec_and_test(&group->num_marks))
+		fsnotify_final_destroy_group(group);
+}
+
+/*
+ * Remove this group from the global list of groups that will get events
+ * this can be done even if there are still references and things still using
+ * this group.  This just stops the group from getting new events.
+ */
+static void __fsnotify_evict_group(struct fsnotify_group *group)
+{
+	BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
+
+	if (group->on_group_list)
+		list_del_rcu(&group->group_list);
+	group->on_group_list = 0;
+}
+
+/*
+ * Called when a group is no longer interested in getting events.  This can be
+ * used if a group is misbehaving or if for some reason a group should no longer
+ * get any filesystem events.
+ */
+void fsnotify_evict_group(struct fsnotify_group *group)
+{
+	mutex_lock(&fsnotify_grp_mutex);
+	__fsnotify_evict_group(group);
+	mutex_unlock(&fsnotify_grp_mutex);
+}
+
+/*
+ * Drop a reference to a group.  Free it if it's through.
+ */
+void fsnotify_put_group(struct fsnotify_group *group)
+{
+	if (!atomic_dec_and_mutex_lock(&group->refcnt, &fsnotify_grp_mutex))
+		return;
+
+	/*
+	 * OK, now we know that there's no other users *and* we hold mutex,
+	 * so no new references will appear
+	 */
+	__fsnotify_evict_group(group);
+
+	/*
+	 * now it's off the list, so the only thing we might care about is
+	 * srcu access....
+	 */
+	mutex_unlock(&fsnotify_grp_mutex);
+	synchronize_srcu(&fsnotify_grp_srcu);
+
+	/* and now it is really dead. _Nothing_ could be seeing it */
+	fsnotify_recalc_global_mask();
+	fsnotify_destroy_group(group);
+}
+
+/*
+ * Simply run the fsnotify_groups list and find a group which matches
+ * the given parameters.  If a group is found we take a reference to that
+ * group.
+ */
+static struct fsnotify_group *fsnotify_find_group(unsigned int group_num, __u32 mask,
+						  const struct fsnotify_ops *ops)
+{
+	struct fsnotify_group *group_iter;
+	struct fsnotify_group *group = NULL;
+
+	BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
+
+	list_for_each_entry_rcu(group_iter, &fsnotify_groups, group_list) {
+		if (group_iter->group_num == group_num) {
+			if ((group_iter->mask == mask) &&
+			    (group_iter->ops == ops)) {
+				fsnotify_get_group(group_iter);
+				group = group_iter;
+			} else
+				group = ERR_PTR(-EEXIST);
+		}
+	}
+	return group;
+}
+
+/*
+ * Either finds an existing group which matches the group_num, mask, and ops or
+ * creates a new group and adds it to the global group list.  In either case we
+ * take a reference for the group returned.
+ */
+struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
+					     const struct fsnotify_ops *ops)
+{
+	struct fsnotify_group *group, *tgroup;
+
+	/* very low use, simpler locking if we just always alloc */
+	group = kmalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
+	if (!group)
+		return ERR_PTR(-ENOMEM);
+
+	atomic_set(&group->refcnt, 1);
+
+	group->on_group_list = 0;
+	group->group_num = group_num;
+	group->mask = mask;
+
+	mutex_init(&group->notification_mutex);
+	INIT_LIST_HEAD(&group->notification_list);
+	init_waitqueue_head(&group->notification_waitq);
+	group->q_len = 0;
+	group->max_events = UINT_MAX;
+
+	spin_lock_init(&group->mark_lock);
+	atomic_set(&group->num_marks, 0);
+	INIT_LIST_HEAD(&group->mark_entries);
+
+	group->ops = ops;
+
+	mutex_lock(&fsnotify_grp_mutex);
+	tgroup = fsnotify_find_group(group_num, mask, ops);
+	if (tgroup) {
+		/* group already exists */
+		mutex_unlock(&fsnotify_grp_mutex);
+		/* destroy the new one we made */
+		fsnotify_put_group(group);
+		return tgroup;
+	}
+
+	/* group not found, add a new one */
+	list_add_rcu(&group->group_list, &fsnotify_groups);
+	group->on_group_list = 1;
+	/* being on the fsnotify_groups list holds one num_marks */
+	atomic_inc(&group->num_marks);
+
+	mutex_unlock(&fsnotify_grp_mutex);
+
+	if (mask)
+		fsnotify_recalc_global_mask();
+
+	return group;
+}
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
new file mode 100644
index 0000000..c8a07c6
--- /dev/null
+++ b/fs/notify/inode_mark.c
@@ -0,0 +1,426 @@
+/*
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * fsnotify inode mark locking/lifetime/and refcnting
+ *
+ * REFCNT:
+ * The mark->refcnt tells how many "things" in the kernel currently are
+ * referencing this object.  The object typically will live inside the kernel
+ * with a refcnt of 2, one for each list it is on (i_list, g_list).  Any task
+ * which can find this object holding the appropriete locks, can take a reference
+ * and the object itself is guarenteed to survive until the reference is dropped.
+ *
+ * LOCKING:
+ * There are 3 spinlocks involved with fsnotify inode marks and they MUST
+ * be taken in order as follows:
+ *
+ * entry->lock
+ * group->mark_lock
+ * inode->i_lock
+ *
+ * entry->lock protects 2 things, entry->group and entry->inode.  You must hold
+ * that lock to dereference either of these things (they could be NULL even with
+ * the lock)
+ *
+ * group->mark_lock protects the mark_entries list anchored inside a given group
+ * and each entry is hooked via the g_list.  It also sorta protects the
+ * free_g_list, which when used is anchored by a private list on the stack of the
+ * task which held the group->mark_lock.
+ *
+ * inode->i_lock protects the i_fsnotify_mark_entries list anchored inside a
+ * given inode and each entry is hooked via the i_list. (and sorta the
+ * free_i_list)
+ *
+ *
+ * LIFETIME:
+ * Inode marks survive between when they are added to an inode and when their
+ * refcnt==0.
+ *
+ * The inode mark can be cleared for a number of different reasons including:
+ * - The inode is unlinked for the last time.  (fsnotify_inode_remove)
+ * - The inode is being evicted from cache. (fsnotify_inode_delete)
+ * - The fs the inode is on is unmounted.  (fsnotify_inode_delete/fsnotify_unmount_inodes)
+ * - Something explicitly requests that it be removed.  (fsnotify_destroy_mark_by_entry)
+ * - The fsnotify_group associated with the mark is going away and all such marks
+ *   need to be cleaned up. (fsnotify_clear_marks_by_group)
+ *
+ * Worst case we are given an inode and need to clean up all the marks on that
+ * inode.  We take i_lock and walk the i_fsnotify_mark_entries safely.  For each
+ * mark on the list we take a reference (so the mark can't disappear under us).
+ * We remove that mark form the inode's list of marks and we add this mark to a
+ * private list anchored on the stack using i_free_list;  At this point we no
+ * longer fear anything finding the mark using the inode's list of marks.
+ *
+ * We can safely and locklessly run the private list on the stack of everything
+ * we just unattached from the original inode.  For each mark on the private list
+ * we grab the mark-> and can thus dereference mark->group and mark->inode.  If
+ * we see the group and inode are not NULL we take those locks.  Now holding all
+ * 3 locks we can completely remove the mark from other tasks finding it in the
+ * future.  Remember, 10 things might already be referencing this mark, but they
+ * better be holding a ref.  We drop our reference we took before we unhooked it
+ * from the inode.  When the ref hits 0 we can free the mark.
+ *
+ * Very similarly for freeing by group, except we use free_g_list.
+ *
+ * This has the very interesting property of being able to run concurrently with
+ * any (or all) other directions.
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/writeback.h> /* for inode_lock */
+
+#include <asm/atomic.h>
+
+#include <linux/fsnotify_backend.h>
+#include "fsnotify.h"
+
+void fsnotify_get_mark(struct fsnotify_mark_entry *entry)
+{
+	atomic_inc(&entry->refcnt);
+}
+
+void fsnotify_put_mark(struct fsnotify_mark_entry *entry)
+{
+	if (atomic_dec_and_test(&entry->refcnt))
+		entry->free_mark(entry);
+}
+
+/*
+ * Recalculate the mask of events relevant to a given inode locked.
+ */
+static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
+{
+	struct fsnotify_mark_entry *entry;
+	struct hlist_node *pos;
+	__u32 new_mask = 0;
+
+	assert_spin_locked(&inode->i_lock);
+
+	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list)
+		new_mask |= entry->mask;
+	inode->i_fsnotify_mask = new_mask;
+}
+
+/*
+ * Recalculate the inode->i_fsnotify_mask, or the mask of all FS_* event types
+ * any notifier is interested in hearing for this inode.
+ */
+void fsnotify_recalc_inode_mask(struct inode *inode)
+{
+	spin_lock(&inode->i_lock);
+	fsnotify_recalc_inode_mask_locked(inode);
+	spin_unlock(&inode->i_lock);
+
+	__fsnotify_update_child_dentry_flags(inode);
+}
+
+/*
+ * Any time a mark is getting freed we end up here.
+ * The caller had better be holding a reference to this mark so we don't actually
+ * do the final put under the entry->lock
+ */
+void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
+{
+	struct fsnotify_group *group;
+	struct inode *inode;
+
+	spin_lock(&entry->lock);
+
+	group = entry->group;
+	inode = entry->inode;
+
+	BUG_ON(group && !inode);
+	BUG_ON(!group && inode);
+
+	/* if !group something else already marked this to die */
+	if (!group) {
+		spin_unlock(&entry->lock);
+		return;
+	}
+
+	/* 1 from caller and 1 for being on i_list/g_list */
+	BUG_ON(atomic_read(&entry->refcnt) < 2);
+
+	spin_lock(&group->mark_lock);
+	spin_lock(&inode->i_lock);
+
+	hlist_del_init(&entry->i_list);
+	entry->inode = NULL;
+
+	list_del_init(&entry->g_list);
+	entry->group = NULL;
+
+	fsnotify_put_mark(entry); /* for i_list and g_list */
+
+	/*
+	 * this mark is now off the inode->i_fsnotify_mark_entries list and we
+	 * hold the inode->i_lock, so this is the perfect time to update the
+	 * inode->i_fsnotify_mask
+	 */
+	fsnotify_recalc_inode_mask_locked(inode);
+
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&group->mark_lock);
+	spin_unlock(&entry->lock);
+
+	/*
+	 * Some groups like to know that marks are being freed.  This is a
+	 * callback to the group function to let it know that this entry
+	 * is being freed.
+	 */
+	if (group->ops->freeing_mark)
+		group->ops->freeing_mark(entry, group);
+
+	/*
+	 * __fsnotify_update_child_dentry_flags(inode);
+	 *
+	 * I really want to call that, but we can't, we have no idea if the inode
+	 * still exists the second we drop the entry->lock.
+	 *
+	 * The next time an event arrive to this inode from one of it's children
+	 * __fsnotify_parent will see that the inode doesn't care about it's
+	 * children and will update all of these flags then.  So really this
+	 * is just a lazy update (and could be a perf win...)
+	 */
+
+
+	iput(inode);
+
+	/*
+	 * it's possible that this group tried to destroy itself, but this
+	 * this mark was simultaneously being freed by inode.  If that's the
+	 * case, we finish freeing the group here.
+	 */
+	if (unlikely(atomic_dec_and_test(&group->num_marks)))
+		fsnotify_final_destroy_group(group);
+}
+
+/*
+ * Given a group, destroy all of the marks associated with that group.
+ */
+void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
+{
+	struct fsnotify_mark_entry *lentry, *entry;
+	LIST_HEAD(free_list);
+
+	spin_lock(&group->mark_lock);
+	list_for_each_entry_safe(entry, lentry, &group->mark_entries, g_list) {
+		list_add(&entry->free_g_list, &free_list);
+		list_del_init(&entry->g_list);
+		fsnotify_get_mark(entry);
+	}
+	spin_unlock(&group->mark_lock);
+
+	list_for_each_entry_safe(entry, lentry, &free_list, free_g_list) {
+		fsnotify_destroy_mark_by_entry(entry);
+		fsnotify_put_mark(entry);
+	}
+}
+
+/*
+ * Given an inode, destroy all of the marks associated with that inode.
+ */
+void fsnotify_clear_marks_by_inode(struct inode *inode)
+{
+	struct fsnotify_mark_entry *entry, *lentry;
+	struct hlist_node *pos, *n;
+	LIST_HEAD(free_list);
+
+	spin_lock(&inode->i_lock);
+	hlist_for_each_entry_safe(entry, pos, n, &inode->i_fsnotify_mark_entries, i_list) {
+		list_add(&entry->free_i_list, &free_list);
+		hlist_del_init(&entry->i_list);
+		fsnotify_get_mark(entry);
+	}
+	spin_unlock(&inode->i_lock);
+
+	list_for_each_entry_safe(entry, lentry, &free_list, free_i_list) {
+		fsnotify_destroy_mark_by_entry(entry);
+		fsnotify_put_mark(entry);
+	}
+}
+
+/*
+ * given a group and inode, find the mark associated with that combination.
+ * if found take a reference to that mark and return it, else return NULL
+ */
+struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group,
+						     struct inode *inode)
+{
+	struct fsnotify_mark_entry *entry;
+	struct hlist_node *pos;
+
+	assert_spin_locked(&inode->i_lock);
+
+	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list) {
+		if (entry->group == group) {
+			fsnotify_get_mark(entry);
+			return entry;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Nothing fancy, just initialize lists and locks and counters.
+ */
+void fsnotify_init_mark(struct fsnotify_mark_entry *entry,
+			void (*free_mark)(struct fsnotify_mark_entry *entry))
+
+{
+	spin_lock_init(&entry->lock);
+	atomic_set(&entry->refcnt, 1);
+	INIT_HLIST_NODE(&entry->i_list);
+	entry->group = NULL;
+	entry->mask = 0;
+	entry->inode = NULL;
+	entry->free_mark = free_mark;
+}
+
+/*
+ * Attach an initialized mark entry to a given group and inode.
+ * These marks may be used for the fsnotify backend to determine which
+ * event types should be delivered to which group and for which inodes.
+ */
+int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
+		      struct fsnotify_group *group, struct inode *inode)
+{
+	struct fsnotify_mark_entry *lentry;
+	int ret = 0;
+
+	inode = igrab(inode);
+	if (unlikely(!inode))
+		return -EINVAL;
+
+	/*
+	 * LOCKING ORDER!!!!
+	 * entry->lock
+	 * group->mark_lock
+	 * inode->i_lock
+	 */
+	spin_lock(&entry->lock);
+	spin_lock(&group->mark_lock);
+	spin_lock(&inode->i_lock);
+
+	entry->group = group;
+	entry->inode = inode;
+
+	lentry = fsnotify_find_mark_entry(group, inode);
+	if (!lentry) {
+		hlist_add_head(&entry->i_list, &inode->i_fsnotify_mark_entries);
+		list_add(&entry->g_list, &group->mark_entries);
+
+		fsnotify_get_mark(entry); /* for i_list and g_list */
+
+		atomic_inc(&group->num_marks);
+
+		fsnotify_recalc_inode_mask_locked(inode);
+	}
+
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&group->mark_lock);
+	spin_unlock(&entry->lock);
+
+	if (lentry) {
+		ret = -EEXIST;
+		iput(inode);
+		fsnotify_put_mark(lentry);
+	} else {
+		__fsnotify_update_child_dentry_flags(inode);
+	}
+
+	return ret;
+}
+
+/**
+ * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
+ * @list: list of inodes being unmounted (sb->s_inodes)
+ *
+ * Called with inode_lock held, protecting the unmounting super block's list
+ * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
+ * We temporarily drop inode_lock, however, and CAN block.
+ */
+void fsnotify_unmount_inodes(struct list_head *list)
+{
+	struct inode *inode, *next_i, *need_iput = NULL;
+
+	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
+		struct inode *need_iput_tmp;
+
+		/*
+		 * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
+		 * I_WILL_FREE, or I_NEW which is fine because by that point
+		 * the inode cannot have any associated watches.
+		 */
+		if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
+			continue;
+
+		/*
+		 * If i_count is zero, the inode cannot have any watches and
+		 * doing an __iget/iput with MS_ACTIVE clear would actually
+		 * evict all inodes with zero i_count from icache which is
+		 * unnecessarily violent and may in fact be illegal to do.
+		 */
+		if (!atomic_read(&inode->i_count))
+			continue;
+
+		need_iput_tmp = need_iput;
+		need_iput = NULL;
+
+		/* In case fsnotify_inode_delete() drops a reference. */
+		if (inode != need_iput_tmp)
+			__iget(inode);
+		else
+			need_iput_tmp = NULL;
+
+		/* In case the dropping of a reference would nuke next_i. */
+		if ((&next_i->i_sb_list != list) &&
+		    atomic_read(&next_i->i_count) &&
+		    !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) {
+			__iget(next_i);
+			need_iput = next_i;
+		}
+
+		/*
+		 * We can safely drop inode_lock here because we hold
+		 * references on both inode and next_i.  Also no new inodes
+		 * will be added since the umount has begun.  Finally,
+		 * iprune_mutex keeps shrink_icache_memory() away.
+		 */
+		spin_unlock(&inode_lock);
+
+		if (need_iput_tmp)
+			iput(need_iput_tmp);
+
+		/* for each watch, send FS_UNMOUNT and then remove it */
+		fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+
+		fsnotify_inode_delete(inode);
+
+		iput(inode);
+
+		spin_lock(&inode_lock);
+	}
+}
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index 4467928..5356884 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -1,26 +1,30 @@
 config INOTIFY
 	bool "Inotify file change notification support"
-	default y
+	default n
 	---help---
-	  Say Y here to enable inotify support.  Inotify is a file change
-	  notification system and a replacement for dnotify.  Inotify fixes
-	  numerous shortcomings in dnotify and introduces several new features
-	  including multiple file events, one-shot support, and unmount
-	  notification.
+	  Say Y here to enable legacy in kernel inotify support.  Inotify is a
+	  file change notification system.  It is a replacement for dnotify.
+	  This option only provides the legacy inotify in kernel API.  There
+	  are no in tree kernel users of this interface since it is deprecated.
+	  You only need this if you are loading an out of tree kernel module
+	  that uses inotify.
 
 	  For more information, see <file:Documentation/filesystems/inotify.txt>
 
-	  If unsure, say Y.
+	  If unsure, say N.
 
 config INOTIFY_USER
 	bool "Inotify support for userspace"
-	depends on INOTIFY
+	depends on FSNOTIFY
 	default y
 	---help---
 	  Say Y here to enable inotify support for userspace, including the
 	  associated system calls.  Inotify allows monitoring of both files and
 	  directories via a single open fd.  Events are read from the file
 	  descriptor, which is also select()- and poll()-able.
+	  Inotify fixes numerous shortcomings in dnotify and introduces several
+	  new features including multiple file events, one-shot support, and
+	  unmount notification.
 
 	  For more information, see <file:Documentation/filesystems/inotify.txt>
 
diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile
index e290f3b..9438281 100644
--- a/fs/notify/inotify/Makefile
+++ b/fs/notify/inotify/Makefile
@@ -1,2 +1,2 @@
 obj-$(CONFIG_INOTIFY)		+= inotify.o
-obj-$(CONFIG_INOTIFY_USER)	+= inotify_user.o
+obj-$(CONFIG_INOTIFY_USER)	+= inotify_fsnotify.o inotify_user.o
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
index 220c13f..40b1cf9 100644
--- a/fs/notify/inotify/inotify.c
+++ b/fs/notify/inotify/inotify.c
@@ -32,6 +32,7 @@
 #include <linux/list.h>
 #include <linux/writeback.h>
 #include <linux/inotify.h>
+#include <linux/fsnotify_backend.h>
 
 static atomic_t inotify_cookie;
 
@@ -905,6 +906,25 @@
  */
 static int __init inotify_setup(void)
 {
+	BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
+	BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
+	BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
+	BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
+	BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
+	BUILD_BUG_ON(IN_OPEN != FS_OPEN);
+	BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
+	BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
+	BUILD_BUG_ON(IN_CREATE != FS_CREATE);
+	BUILD_BUG_ON(IN_DELETE != FS_DELETE);
+	BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
+	BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
+	BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
+
+	BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
+	BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
+	BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
+	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
+
 	atomic_set(&inotify_cookie, 0);
 
 	return 0;
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
new file mode 100644
index 0000000..ea2605a
--- /dev/null
+++ b/fs/notify/inotify/inotify.h
@@ -0,0 +1,21 @@
+#include <linux/fsnotify_backend.h>
+#include <linux/inotify.h>
+#include <linux/slab.h> /* struct kmem_cache */
+
+extern struct kmem_cache *event_priv_cachep;
+
+struct inotify_event_private_data {
+	struct fsnotify_event_private_data fsnotify_event_priv_data;
+	int wd;
+};
+
+struct inotify_inode_mark_entry {
+	/* fsnotify_mark_entry MUST be the first thing */
+	struct fsnotify_mark_entry fsn_entry;
+	int wd;
+};
+
+extern void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group);
+extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
+
+extern const struct fsnotify_ops inotify_fsnotify_ops;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
new file mode 100644
index 0000000..7ef75b8
--- /dev/null
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -0,0 +1,138 @@
+/*
+ * fs/inotify_user.c - inotify support for userspace
+ *
+ * Authors:
+ *	John McCutchan	<ttb@tentacle.dhs.org>
+ *	Robert Love	<rml@novell.com>
+ *
+ * Copyright (C) 2005 John McCutchan
+ * Copyright 2006 Hewlett-Packard Development Company, L.P.
+ *
+ * Copyright (C) 2009 Eric Paris <Red Hat Inc>
+ * inotify was largely rewriten to make use of the fsnotify infrastructure
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/fs.h> /* struct inode */
+#include <linux/fsnotify_backend.h>
+#include <linux/inotify.h>
+#include <linux/path.h> /* struct path */
+#include <linux/slab.h> /* kmem_* */
+#include <linux/types.h>
+
+#include "inotify.h"
+
+static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
+{
+	struct fsnotify_mark_entry *entry;
+	struct inotify_inode_mark_entry *ientry;
+	struct inode *to_tell;
+	struct inotify_event_private_data *event_priv;
+	struct fsnotify_event_private_data *fsn_event_priv;
+	int wd, ret;
+
+	to_tell = event->to_tell;
+
+	spin_lock(&to_tell->i_lock);
+	entry = fsnotify_find_mark_entry(group, to_tell);
+	spin_unlock(&to_tell->i_lock);
+	/* race with watch removal?  We already passes should_send */
+	if (unlikely(!entry))
+		return 0;
+	ientry = container_of(entry, struct inotify_inode_mark_entry,
+			      fsn_entry);
+	wd = ientry->wd;
+
+	event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
+	if (unlikely(!event_priv))
+		return -ENOMEM;
+
+	fsn_event_priv = &event_priv->fsnotify_event_priv_data;
+
+	fsn_event_priv->group = group;
+	event_priv->wd = wd;
+
+	ret = fsnotify_add_notify_event(group, event, fsn_event_priv);
+	/* EEXIST is not an error */
+	if (ret == -EEXIST)
+		ret = 0;
+
+	/* did event_priv get attached? */
+	if (list_empty(&fsn_event_priv->event_list))
+		inotify_free_event_priv(fsn_event_priv);
+
+	/*
+	 * If we hold the entry until after the event is on the queue
+	 * IN_IGNORED won't be able to pass this event in the queue
+	 */
+	fsnotify_put_mark(entry);
+
+	return ret;
+}
+
+static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
+{
+	inotify_destroy_mark_entry(entry, group);
+}
+
+static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask)
+{
+	struct fsnotify_mark_entry *entry;
+	bool send;
+
+	spin_lock(&inode->i_lock);
+	entry = fsnotify_find_mark_entry(group, inode);
+	spin_unlock(&inode->i_lock);
+	if (!entry)
+		return false;
+
+	mask = (mask & ~FS_EVENT_ON_CHILD);
+	send = (entry->mask & mask);
+
+	/* find took a reference */
+	fsnotify_put_mark(entry);
+
+	return send;
+}
+
+static int idr_callback(int id, void *p, void *data)
+{
+	BUG();
+	return 0;
+}
+
+static void inotify_free_group_priv(struct fsnotify_group *group)
+{
+	/* ideally the idr is empty and we won't hit the BUG in teh callback */
+	idr_for_each(&group->inotify_data.idr, idr_callback, NULL);
+	idr_remove_all(&group->inotify_data.idr);
+	idr_destroy(&group->inotify_data.idr);
+}
+
+void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv)
+{
+	struct inotify_event_private_data *event_priv;
+
+
+	event_priv = container_of(fsn_event_priv, struct inotify_event_private_data,
+				  fsnotify_event_priv_data);
+
+	kmem_cache_free(event_priv_cachep, event_priv);
+}
+
+const struct fsnotify_ops inotify_fsnotify_ops = {
+	.handle_event = inotify_handle_event,
+	.should_send_event = inotify_should_send_event,
+	.free_group_priv = inotify_free_group_priv,
+	.free_event_priv = inotify_free_event_priv,
+	.freeing_mark = inotify_freeing_mark,
+};
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1634319..982a412 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -8,6 +8,9 @@
  * Copyright (C) 2005 John McCutchan
  * Copyright 2006 Hewlett-Packard Development Company, L.P.
  *
+ * Copyright (C) 2009 Eric Paris <Red Hat Inc>
+ * inotify was largely rewriten to make use of the fsnotify infrastructure
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
  * Free Software Foundation; either version 2, or (at your option) any
@@ -19,94 +22,48 @@
  * General Public License for more details.
  */
 
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
 #include <linux/file.h>
-#include <linux/mount.h>
-#include <linux/namei.h>
-#include <linux/poll.h>
-#include <linux/init.h>
-#include <linux/list.h>
+#include <linux/fs.h> /* struct inode */
+#include <linux/fsnotify_backend.h>
+#include <linux/idr.h>
+#include <linux/init.h> /* module_init */
 #include <linux/inotify.h>
+#include <linux/kernel.h> /* roundup() */
+#include <linux/magic.h> /* superblock magic number */
+#include <linux/mount.h> /* mntget */
+#include <linux/namei.h> /* LOOKUP_FOLLOW */
+#include <linux/path.h> /* struct path */
+#include <linux/sched.h> /* struct user */
+#include <linux/slab.h> /* struct kmem_cache */
 #include <linux/syscalls.h>
-#include <linux/magic.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/poll.h>
+#include <linux/wait.h>
+
+#include "inotify.h"
 
 #include <asm/ioctls.h>
 
-static struct kmem_cache *watch_cachep __read_mostly;
-static struct kmem_cache *event_cachep __read_mostly;
-
 static struct vfsmount *inotify_mnt __read_mostly;
 
+/* this just sits here and wastes global memory.  used to just pad userspace messages with zeros */
+static struct inotify_event nul_inotify_event;
+
 /* these are configurable via /proc/sys/fs/inotify/ */
 static int inotify_max_user_instances __read_mostly;
-static int inotify_max_user_watches __read_mostly;
 static int inotify_max_queued_events __read_mostly;
+int inotify_max_user_watches __read_mostly;
+
+static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
+struct kmem_cache *event_priv_cachep __read_mostly;
+static struct fsnotify_event *inotify_ignored_event;
 
 /*
- * Lock ordering:
- *
- * inotify_dev->up_mutex (ensures we don't re-add the same watch)
- * 	inode->inotify_mutex (protects inode's watch list)
- * 		inotify_handle->mutex (protects inotify_handle's watch list)
- * 			inotify_dev->ev_mutex (protects device's event queue)
+ * When inotify registers a new group it increments this and uses that
+ * value as an offset to set the fsnotify group "name" and priority.
  */
-
-/*
- * Lifetimes of the main data structures:
- *
- * inotify_device: Lifetime is managed by reference count, from
- * sys_inotify_init() until release.  Additional references can bump the count
- * via get_inotify_dev() and drop the count via put_inotify_dev().
- *
- * inotify_user_watch: Lifetime is from create_watch() to the receipt of an
- * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the
- * first event, or to inotify_destroy().
- */
-
-/*
- * struct inotify_device - represents an inotify instance
- *
- * This structure is protected by the mutex 'mutex'.
- */
-struct inotify_device {
-	wait_queue_head_t 	wq;		/* wait queue for i/o */
-	struct mutex		ev_mutex;	/* protects event queue */
-	struct mutex		up_mutex;	/* synchronizes watch updates */
-	struct list_head 	events;		/* list of queued events */
-	struct user_struct	*user;		/* user who opened this dev */
-	struct inotify_handle	*ih;		/* inotify handle */
-	struct fasync_struct    *fa;            /* async notification */
-	atomic_t		count;		/* reference count */
-	unsigned int		queue_size;	/* size of the queue (bytes) */
-	unsigned int		event_count;	/* number of pending events */
-	unsigned int		max_events;	/* maximum number of events */
-};
-
-/*
- * struct inotify_kernel_event - An inotify event, originating from a watch and
- * queued for user-space.  A list of these is attached to each instance of the
- * device.  In read(), this list is walked and all events that can fit in the
- * buffer are returned.
- *
- * Protected by dev->ev_mutex of the device in which we are queued.
- */
-struct inotify_kernel_event {
-	struct inotify_event	event;	/* the user-space event */
-	struct list_head        list;	/* entry in inotify_device's list */
-	char			*name;	/* filename, if any */
-};
-
-/*
- * struct inotify_user_watch - our version of an inotify_watch, we add
- * a reference to the associated inotify_device.
- */
-struct inotify_user_watch {
-	struct inotify_device	*dev;	/* associated device */
-	struct inotify_watch	wdata;	/* inotify watch data */
-};
+static atomic_t inotify_grp_num;
 
 #ifdef CONFIG_SYSCTL
 
@@ -149,280 +106,36 @@
 };
 #endif /* CONFIG_SYSCTL */
 
-static inline void get_inotify_dev(struct inotify_device *dev)
+static inline __u32 inotify_arg_to_mask(u32 arg)
 {
-	atomic_inc(&dev->count);
+	__u32 mask;
+
+	/* everything should accept their own ignored and cares about children */
+	mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD);
+
+	/* mask off the flags used to open the fd */
+	mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT));
+
+	return mask;
 }
 
-static inline void put_inotify_dev(struct inotify_device *dev)
+static inline u32 inotify_mask_to_arg(__u32 mask)
 {
-	if (atomic_dec_and_test(&dev->count)) {
-		atomic_dec(&dev->user->inotify_devs);
-		free_uid(dev->user);
-		kfree(dev);
-	}
+	return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED |
+		       IN_Q_OVERFLOW);
 }
 
-/*
- * free_inotify_user_watch - cleans up the watch and its references
- */
-static void free_inotify_user_watch(struct inotify_watch *w)
-{
-	struct inotify_user_watch *watch;
-	struct inotify_device *dev;
-
-	watch = container_of(w, struct inotify_user_watch, wdata);
-	dev = watch->dev;
-
-	atomic_dec(&dev->user->inotify_watches);
-	put_inotify_dev(dev);
-	kmem_cache_free(watch_cachep, watch);
-}
-
-/*
- * kernel_event - create a new kernel event with the given parameters
- *
- * This function can sleep.
- */
-static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
-						  const char *name)
-{
-	struct inotify_kernel_event *kevent;
-
-	kevent = kmem_cache_alloc(event_cachep, GFP_NOFS);
-	if (unlikely(!kevent))
-		return NULL;
-
-	/* we hand this out to user-space, so zero it just in case */
-	memset(&kevent->event, 0, sizeof(struct inotify_event));
-
-	kevent->event.wd = wd;
-	kevent->event.mask = mask;
-	kevent->event.cookie = cookie;
-
-	INIT_LIST_HEAD(&kevent->list);
-
-	if (name) {
-		size_t len, rem, event_size = sizeof(struct inotify_event);
-
-		/*
-		 * We need to pad the filename so as to properly align an
-		 * array of inotify_event structures.  Because the structure is
-		 * small and the common case is a small filename, we just round
-		 * up to the next multiple of the structure's sizeof.  This is
-		 * simple and safe for all architectures.
-		 */
-		len = strlen(name) + 1;
-		rem = event_size - len;
-		if (len > event_size) {
-			rem = event_size - (len % event_size);
-			if (len % event_size == 0)
-				rem = 0;
-		}
-
-		kevent->name = kmalloc(len + rem, GFP_NOFS);
-		if (unlikely(!kevent->name)) {
-			kmem_cache_free(event_cachep, kevent);
-			return NULL;
-		}
-		memcpy(kevent->name, name, len);
-		if (rem)
-			memset(kevent->name + len, 0, rem);
-		kevent->event.len = len + rem;
-	} else {
-		kevent->event.len = 0;
-		kevent->name = NULL;
-	}
-
-	return kevent;
-}
-
-/*
- * inotify_dev_get_event - return the next event in the given dev's queue
- *
- * Caller must hold dev->ev_mutex.
- */
-static inline struct inotify_kernel_event *
-inotify_dev_get_event(struct inotify_device *dev)
-{
-	return list_entry(dev->events.next, struct inotify_kernel_event, list);
-}
-
-/*
- * inotify_dev_get_last_event - return the last event in the given dev's queue
- *
- * Caller must hold dev->ev_mutex.
- */
-static inline struct inotify_kernel_event *
-inotify_dev_get_last_event(struct inotify_device *dev)
-{
-	if (list_empty(&dev->events))
-		return NULL;
-	return list_entry(dev->events.prev, struct inotify_kernel_event, list);
-}
-
-/*
- * inotify_dev_queue_event - event handler registered with core inotify, adds
- * a new event to the given device
- *
- * Can sleep (calls kernel_event()).
- */
-static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask,
-				    u32 cookie, const char *name,
-				    struct inode *ignored)
-{
-	struct inotify_user_watch *watch;
-	struct inotify_device *dev;
-	struct inotify_kernel_event *kevent, *last;
-
-	watch = container_of(w, struct inotify_user_watch, wdata);
-	dev = watch->dev;
-
-	mutex_lock(&dev->ev_mutex);
-
-	/* we can safely put the watch as we don't reference it while
-	 * generating the event
-	 */
-	if (mask & IN_IGNORED || w->mask & IN_ONESHOT)
-		put_inotify_watch(w); /* final put */
-
-	/* coalescing: drop this event if it is a dupe of the previous */
-	last = inotify_dev_get_last_event(dev);
-	if (last && last->event.mask == mask && last->event.wd == wd &&
-			last->event.cookie == cookie) {
-		const char *lastname = last->name;
-
-		if (!name && !lastname)
-			goto out;
-		if (name && lastname && !strcmp(lastname, name))
-			goto out;
-	}
-
-	/* the queue overflowed and we already sent the Q_OVERFLOW event */
-	if (unlikely(dev->event_count > dev->max_events))
-		goto out;
-
-	/* if the queue overflows, we need to notify user space */
-	if (unlikely(dev->event_count == dev->max_events))
-		kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
-	else
-		kevent = kernel_event(wd, mask, cookie, name);
-
-	if (unlikely(!kevent))
-		goto out;
-
-	/* queue the event and wake up anyone waiting */
-	dev->event_count++;
-	dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
-	list_add_tail(&kevent->list, &dev->events);
-	wake_up_interruptible(&dev->wq);
-	kill_fasync(&dev->fa, SIGIO, POLL_IN);
-
-out:
-	mutex_unlock(&dev->ev_mutex);
-}
-
-/*
- * remove_kevent - cleans up the given kevent
- *
- * Caller must hold dev->ev_mutex.
- */
-static void remove_kevent(struct inotify_device *dev,
-			  struct inotify_kernel_event *kevent)
-{
-	list_del(&kevent->list);
-
-	dev->event_count--;
-	dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
-}
-
-/*
- * free_kevent - frees the given kevent.
- */
-static void free_kevent(struct inotify_kernel_event *kevent)
-{
-	kfree(kevent->name);
-	kmem_cache_free(event_cachep, kevent);
-}
-
-/*
- * inotify_dev_event_dequeue - destroy an event on the given device
- *
- * Caller must hold dev->ev_mutex.
- */
-static void inotify_dev_event_dequeue(struct inotify_device *dev)
-{
-	if (!list_empty(&dev->events)) {
-		struct inotify_kernel_event *kevent;
-		kevent = inotify_dev_get_event(dev);
-		remove_kevent(dev, kevent);
-		free_kevent(kevent);
-	}
-}
-
-/*
- * find_inode - resolve a user-given path to a specific inode
- */
-static int find_inode(const char __user *dirname, struct path *path,
-		      unsigned flags)
-{
-	int error;
-
-	error = user_path_at(AT_FDCWD, dirname, flags, path);
-	if (error)
-		return error;
-	/* you can only watch an inode if you have read permissions on it */
-	error = inode_permission(path->dentry->d_inode, MAY_READ);
-	if (error)
-		path_put(path);
-	return error;
-}
-
-/*
- * create_watch - creates a watch on the given device.
- *
- * Callers must hold dev->up_mutex.
- */
-static int create_watch(struct inotify_device *dev, struct inode *inode,
-			u32 mask)
-{
-	struct inotify_user_watch *watch;
-	int ret;
-
-	if (atomic_read(&dev->user->inotify_watches) >=
-			inotify_max_user_watches)
-		return -ENOSPC;
-
-	watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
-	if (unlikely(!watch))
-		return -ENOMEM;
-
-	/* save a reference to device and bump the count to make it official */
-	get_inotify_dev(dev);
-	watch->dev = dev;
-
-	atomic_inc(&dev->user->inotify_watches);
-
-	inotify_init_watch(&watch->wdata);
-	ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
-	if (ret < 0)
-		free_inotify_user_watch(&watch->wdata);
-
-	return ret;
-}
-
-/* Device Interface */
-
+/* intofiy userspace file descriptor functions */
 static unsigned int inotify_poll(struct file *file, poll_table *wait)
 {
-	struct inotify_device *dev = file->private_data;
+	struct fsnotify_group *group = file->private_data;
 	int ret = 0;
 
-	poll_wait(file, &dev->wq, wait);
-	mutex_lock(&dev->ev_mutex);
-	if (!list_empty(&dev->events))
+	poll_wait(file, &group->notification_waitq, wait);
+	mutex_lock(&group->notification_mutex);
+	if (!fsnotify_notify_queue_is_empty(group))
 		ret = POLLIN | POLLRDNORM;
-	mutex_unlock(&dev->ev_mutex);
+	mutex_unlock(&group->notification_mutex);
 
 	return ret;
 }
@@ -432,26 +145,29 @@
  * enough to fit in "count". Return an error pointer if
  * not large enough.
  *
- * Called with the device ev_mutex held.
+ * Called with the group->notification_mutex held.
  */
-static struct inotify_kernel_event *get_one_event(struct inotify_device *dev,
-						  size_t count)
+static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
+					    size_t count)
 {
 	size_t event_size = sizeof(struct inotify_event);
-	struct inotify_kernel_event *kevent;
+	struct fsnotify_event *event;
 
-	if (list_empty(&dev->events))
+	if (fsnotify_notify_queue_is_empty(group))
 		return NULL;
 
-	kevent = inotify_dev_get_event(dev);
-	if (kevent->name)
-		event_size += kevent->event.len;
+	event = fsnotify_peek_notify_event(group);
+
+	event_size += roundup(event->name_len, event_size);
 
 	if (event_size > count)
 		return ERR_PTR(-EINVAL);
 
-	remove_kevent(dev, kevent);
-	return kevent;
+	/* held the notification_mutex the whole time, so this is the
+	 * same event we peeked above */
+	fsnotify_remove_notify_event(group);
+
+	return event;
 }
 
 /*
@@ -460,51 +176,90 @@
  * We already checked that the event size is smaller than the
  * buffer we had in "get_one_event()" above.
  */
-static ssize_t copy_event_to_user(struct inotify_kernel_event *kevent,
+static ssize_t copy_event_to_user(struct fsnotify_group *group,
+				  struct fsnotify_event *event,
 				  char __user *buf)
 {
+	struct inotify_event inotify_event;
+	struct fsnotify_event_private_data *fsn_priv;
+	struct inotify_event_private_data *priv;
 	size_t event_size = sizeof(struct inotify_event);
+	size_t name_len;
 
-	if (copy_to_user(buf, &kevent->event, event_size))
+	/* we get the inotify watch descriptor from the event private data */
+	spin_lock(&event->lock);
+	fsn_priv = fsnotify_remove_priv_from_event(group, event);
+	spin_unlock(&event->lock);
+
+	if (!fsn_priv)
+		inotify_event.wd = -1;
+	else {
+		priv = container_of(fsn_priv, struct inotify_event_private_data,
+				    fsnotify_event_priv_data);
+		inotify_event.wd = priv->wd;
+		inotify_free_event_priv(fsn_priv);
+	}
+
+	/* round up event->name_len so it is a multiple of event_size */
+	name_len = roundup(event->name_len, event_size);
+	inotify_event.len = name_len;
+
+	inotify_event.mask = inotify_mask_to_arg(event->mask);
+	inotify_event.cookie = event->sync_cookie;
+
+	/* send the main event */
+	if (copy_to_user(buf, &inotify_event, event_size))
 		return -EFAULT;
 
-	if (kevent->name) {
-		buf += event_size;
+	buf += event_size;
 
-		if (copy_to_user(buf, kevent->name, kevent->event.len))
+	/*
+	 * fsnotify only stores the pathname, so here we have to send the pathname
+	 * and then pad that pathname out to a multiple of sizeof(inotify_event)
+	 * with zeros.  I get my zeros from the nul_inotify_event.
+	 */
+	if (name_len) {
+		unsigned int len_to_zero = name_len - event->name_len;
+		/* copy the path name */
+		if (copy_to_user(buf, event->file_name, event->name_len))
 			return -EFAULT;
+		buf += event->name_len;
 
-		event_size += kevent->event.len;
+		/* fill userspace with 0's from nul_inotify_event */
+		if (copy_to_user(buf, &nul_inotify_event, len_to_zero))
+			return -EFAULT;
+		buf += len_to_zero;
+		event_size += name_len;
 	}
+
 	return event_size;
 }
 
 static ssize_t inotify_read(struct file *file, char __user *buf,
 			    size_t count, loff_t *pos)
 {
-	struct inotify_device *dev;
+	struct fsnotify_group *group;
+	struct fsnotify_event *kevent;
 	char __user *start;
 	int ret;
 	DEFINE_WAIT(wait);
 
 	start = buf;
-	dev = file->private_data;
+	group = file->private_data;
 
 	while (1) {
-		struct inotify_kernel_event *kevent;
+		prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
 
-		prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
-
-		mutex_lock(&dev->ev_mutex);
-		kevent = get_one_event(dev, count);
-		mutex_unlock(&dev->ev_mutex);
+		mutex_lock(&group->notification_mutex);
+		kevent = get_one_event(group, count);
+		mutex_unlock(&group->notification_mutex);
 
 		if (kevent) {
 			ret = PTR_ERR(kevent);
 			if (IS_ERR(kevent))
 				break;
-			ret = copy_event_to_user(kevent, buf);
-			free_kevent(kevent);
+			ret = copy_event_to_user(group, kevent, buf);
+			fsnotify_put_event(kevent);
 			if (ret < 0)
 				break;
 			buf += ret;
@@ -525,7 +280,7 @@
 		schedule();
 	}
 
-	finish_wait(&dev->wq, &wait);
+	finish_wait(&group->notification_waitq, &wait);
 	if (start != buf && ret != -EFAULT)
 		ret = buf - start;
 	return ret;
@@ -533,25 +288,19 @@
 
 static int inotify_fasync(int fd, struct file *file, int on)
 {
-	struct inotify_device *dev = file->private_data;
+	struct fsnotify_group *group = file->private_data;
 
-	return fasync_helper(fd, file, on, &dev->fa) >= 0 ? 0 : -EIO;
+	return fasync_helper(fd, file, on, &group->inotify_data.fa) >= 0 ? 0 : -EIO;
 }
 
 static int inotify_release(struct inode *ignored, struct file *file)
 {
-	struct inotify_device *dev = file->private_data;
+	struct fsnotify_group *group = file->private_data;
 
-	inotify_destroy(dev->ih);
+	fsnotify_clear_marks_by_group(group);
 
-	/* destroy all of the events on this device */
-	mutex_lock(&dev->ev_mutex);
-	while (!list_empty(&dev->events))
-		inotify_dev_event_dequeue(dev);
-	mutex_unlock(&dev->ev_mutex);
-
-	/* free this device: the put matching the get in inotify_init() */
-	put_inotify_dev(dev);
+	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
+	fsnotify_put_group(group);
 
 	return 0;
 }
@@ -559,16 +308,27 @@
 static long inotify_ioctl(struct file *file, unsigned int cmd,
 			  unsigned long arg)
 {
-	struct inotify_device *dev;
+	struct fsnotify_group *group;
+	struct fsnotify_event_holder *holder;
+	struct fsnotify_event *event;
 	void __user *p;
 	int ret = -ENOTTY;
+	size_t send_len = 0;
 
-	dev = file->private_data;
+	group = file->private_data;
 	p = (void __user *) arg;
 
 	switch (cmd) {
 	case FIONREAD:
-		ret = put_user(dev->queue_size, (int __user *) p);
+		mutex_lock(&group->notification_mutex);
+		list_for_each_entry(holder, &group->notification_list, event_list) {
+			event = holder->event;
+			send_len += sizeof(struct inotify_event);
+			send_len += roundup(event->name_len,
+					     sizeof(struct inotify_event));
+		}
+		mutex_unlock(&group->notification_mutex);
+		ret = put_user(send_len, (int __user *) p);
 		break;
 	}
 
@@ -576,23 +336,233 @@
 }
 
 static const struct file_operations inotify_fops = {
-	.poll           = inotify_poll,
-	.read           = inotify_read,
-	.fasync         = inotify_fasync,
-	.release        = inotify_release,
-	.unlocked_ioctl = inotify_ioctl,
+	.poll		= inotify_poll,
+	.read		= inotify_read,
+	.fasync		= inotify_fasync,
+	.release	= inotify_release,
+	.unlocked_ioctl	= inotify_ioctl,
 	.compat_ioctl	= inotify_ioctl,
 };
 
-static const struct inotify_operations inotify_user_ops = {
-	.handle_event	= inotify_dev_queue_event,
-	.destroy_watch	= free_inotify_user_watch,
-};
 
+/*
+ * find_inode - resolve a user-given path to a specific inode
+ */
+static int inotify_find_inode(const char __user *dirname, struct path *path, unsigned flags)
+{
+	int error;
+
+	error = user_path_at(AT_FDCWD, dirname, flags, path);
+	if (error)
+		return error;
+	/* you can only watch an inode if you have read permissions on it */
+	error = inode_permission(path->dentry->d_inode, MAY_READ);
+	if (error)
+		path_put(path);
+	return error;
+}
+
+/*
+ * When, for whatever reason, inotify is done with a mark (or what used to be a
+ * watch) we need to remove that watch from the idr and we need to send IN_IGNORED
+ * for the given wd.
+ *
+ * There is a bit of recursion here.  The loop looks like:
+ * 	inotify_destroy_mark_entry -> fsnotify_destroy_mark_by_entry ->
+ *	inotify_freeing_mark -> inotify_destory_mark_entry -> restart
+ * But the loop is broken in 2 places.  fsnotify_destroy_mark_by_entry sets
+ * entry->group = NULL before the call to inotify_freeing_mark, so the if (egroup)
+ * test below will not call back to fsnotify again.  But even if that test wasn't
+ * there this would still be safe since fsnotify_destroy_mark_by_entry() is
+ * safe from recursion.
+ */
+void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
+{
+	struct inotify_inode_mark_entry *ientry;
+	struct inotify_event_private_data *event_priv;
+	struct fsnotify_event_private_data *fsn_event_priv;
+	struct fsnotify_group *egroup;
+	struct idr *idr;
+
+	spin_lock(&entry->lock);
+	egroup = entry->group;
+
+	/* if egroup we aren't really done and something might still send events
+	 * for this inode, on the callback we'll send the IN_IGNORED */
+	if (egroup) {
+		spin_unlock(&entry->lock);
+		fsnotify_destroy_mark_by_entry(entry);
+		return;
+	}
+	spin_unlock(&entry->lock);
+
+	ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+
+	event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
+	if (unlikely(!event_priv))
+		goto skip_send_ignore;
+
+	fsn_event_priv = &event_priv->fsnotify_event_priv_data;
+
+	fsn_event_priv->group = group;
+	event_priv->wd = ientry->wd;
+
+	fsnotify_add_notify_event(group, inotify_ignored_event, fsn_event_priv);
+
+	/* did the private data get added? */
+	if (list_empty(&fsn_event_priv->event_list))
+		inotify_free_event_priv(fsn_event_priv);
+
+skip_send_ignore:
+
+	/* remove this entry from the idr */
+	spin_lock(&group->inotify_data.idr_lock);
+	idr = &group->inotify_data.idr;
+	idr_remove(idr, ientry->wd);
+	spin_unlock(&group->inotify_data.idr_lock);
+
+	/* removed from idr, drop that reference */
+	fsnotify_put_mark(entry);
+}
+
+/* ding dong the mark is dead */
+static void inotify_free_mark(struct fsnotify_mark_entry *entry)
+{
+	struct inotify_inode_mark_entry *ientry = (struct inotify_inode_mark_entry *)entry;
+
+	kmem_cache_free(inotify_inode_mark_cachep, ientry);
+}
+
+static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
+{
+	struct fsnotify_mark_entry *entry = NULL;
+	struct inotify_inode_mark_entry *ientry;
+	int ret = 0;
+	int add = (arg & IN_MASK_ADD);
+	__u32 mask;
+	__u32 old_mask, new_mask;
+
+	/* don't allow invalid bits: we don't want flags set */
+	mask = inotify_arg_to_mask(arg);
+	if (unlikely(!mask))
+		return -EINVAL;
+
+	ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
+	if (unlikely(!ientry))
+		return -ENOMEM;
+	/* we set the mask at the end after attaching it */
+	fsnotify_init_mark(&ientry->fsn_entry, inotify_free_mark);
+	ientry->wd = 0;
+
+find_entry:
+	spin_lock(&inode->i_lock);
+	entry = fsnotify_find_mark_entry(group, inode);
+	spin_unlock(&inode->i_lock);
+	if (entry) {
+		kmem_cache_free(inotify_inode_mark_cachep, ientry);
+		ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+	} else {
+		if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) {
+			ret = -ENOSPC;
+			goto out_err;
+		}
+
+		ret = fsnotify_add_mark(&ientry->fsn_entry, group, inode);
+		if (ret == -EEXIST)
+			goto find_entry;
+		else if (ret)
+			goto out_err;
+
+		entry = &ientry->fsn_entry;
+retry:
+		ret = -ENOMEM;
+		if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
+			goto out_err;
+
+		spin_lock(&group->inotify_data.idr_lock);
+		/* if entry is added to the idr we keep the reference obtained
+		 * through fsnotify_mark_add.  remember to drop this reference
+		 * when entry is removed from idr */
+		ret = idr_get_new_above(&group->inotify_data.idr, entry,
+					++group->inotify_data.last_wd,
+					&ientry->wd);
+		spin_unlock(&group->inotify_data.idr_lock);
+		if (ret) {
+			if (ret == -EAGAIN)
+				goto retry;
+			goto out_err;
+		}
+		atomic_inc(&group->inotify_data.user->inotify_watches);
+	}
+
+	spin_lock(&entry->lock);
+
+	old_mask = entry->mask;
+	if (add) {
+		entry->mask |= mask;
+		new_mask = entry->mask;
+	} else {
+		entry->mask = mask;
+		new_mask = entry->mask;
+	}
+
+	spin_unlock(&entry->lock);
+
+	if (old_mask != new_mask) {
+		/* more bits in old than in new? */
+		int dropped = (old_mask & ~new_mask);
+		/* more bits in this entry than the inode's mask? */
+		int do_inode = (new_mask & ~inode->i_fsnotify_mask);
+		/* more bits in this entry than the group? */
+		int do_group = (new_mask & ~group->mask);
+
+		/* update the inode with this new entry */
+		if (dropped || do_inode)
+			fsnotify_recalc_inode_mask(inode);
+
+		/* update the group mask with the new mask */
+		if (dropped || do_group)
+			fsnotify_recalc_group_mask(group);
+	}
+
+	return ientry->wd;
+
+out_err:
+	/* see this isn't supposed to happen, just kill the watch */
+	if (entry) {
+		fsnotify_destroy_mark_by_entry(entry);
+		fsnotify_put_mark(entry);
+	}
+	return ret;
+}
+
+static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events)
+{
+	struct fsnotify_group *group;
+	unsigned int grp_num;
+
+	/* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
+	grp_num = (INOTIFY_GROUP_NUM - atomic_inc_return(&inotify_grp_num));
+	group = fsnotify_obtain_group(grp_num, 0, &inotify_fsnotify_ops);
+	if (IS_ERR(group))
+		return group;
+
+	group->max_events = max_events;
+
+	spin_lock_init(&group->inotify_data.idr_lock);
+	idr_init(&group->inotify_data.idr);
+	group->inotify_data.last_wd = 0;
+	group->inotify_data.user = user;
+	group->inotify_data.fa = NULL;
+
+	return group;
+}
+
+
+/* inotify syscalls */
 SYSCALL_DEFINE1(inotify_init1, int, flags)
 {
-	struct inotify_device *dev;
-	struct inotify_handle *ih;
+	struct fsnotify_group *group;
 	struct user_struct *user;
 	struct file *filp;
 	int fd, ret;
@@ -621,45 +591,27 @@
 		goto out_free_uid;
 	}
 
-	dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
-	if (unlikely(!dev)) {
-		ret = -ENOMEM;
+	/* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
+	group = inotify_new_group(user, inotify_max_queued_events);
+	if (IS_ERR(group)) {
+		ret = PTR_ERR(group);
 		goto out_free_uid;
 	}
 
-	ih = inotify_init(&inotify_user_ops);
-	if (IS_ERR(ih)) {
-		ret = PTR_ERR(ih);
-		goto out_free_dev;
-	}
-	dev->ih = ih;
-	dev->fa = NULL;
-
 	filp->f_op = &inotify_fops;
 	filp->f_path.mnt = mntget(inotify_mnt);
 	filp->f_path.dentry = dget(inotify_mnt->mnt_root);
 	filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
 	filp->f_mode = FMODE_READ;
 	filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
-	filp->private_data = dev;
+	filp->private_data = group;
 
-	INIT_LIST_HEAD(&dev->events);
-	init_waitqueue_head(&dev->wq);
-	mutex_init(&dev->ev_mutex);
-	mutex_init(&dev->up_mutex);
-	dev->event_count = 0;
-	dev->queue_size = 0;
-	dev->max_events = inotify_max_queued_events;
-	dev->user = user;
-	atomic_set(&dev->count, 0);
-
-	get_inotify_dev(dev);
 	atomic_inc(&user->inotify_devs);
+
 	fd_install(fd, filp);
 
 	return fd;
-out_free_dev:
-	kfree(dev);
+
 out_free_uid:
 	free_uid(user);
 	put_filp(filp);
@@ -676,8 +628,8 @@
 SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
 		u32, mask)
 {
+	struct fsnotify_group *group;
 	struct inode *inode;
-	struct inotify_device *dev;
 	struct path path;
 	struct file *filp;
 	int ret, fput_needed;
@@ -698,20 +650,20 @@
 	if (mask & IN_ONLYDIR)
 		flags |= LOOKUP_DIRECTORY;
 
-	ret = find_inode(pathname, &path, flags);
-	if (unlikely(ret))
+	ret = inotify_find_inode(pathname, &path, flags);
+	if (ret)
 		goto fput_and_out;
 
-	/* inode held in place by reference to path; dev by fget on fd */
+	/* inode held in place by reference to path; group by fget on fd */
 	inode = path.dentry->d_inode;
-	dev = filp->private_data;
+	group = filp->private_data;
 
-	mutex_lock(&dev->up_mutex);
-	ret = inotify_find_update_watch(dev->ih, inode, mask);
-	if (ret == -ENOENT)
-		ret = create_watch(dev, inode, mask);
-	mutex_unlock(&dev->up_mutex);
+	/* create/update an inode mark */
+	ret = inotify_update_watch(group, inode, mask);
+	if (unlikely(ret))
+		goto path_put_and_out;
 
+path_put_and_out:
 	path_put(&path);
 fput_and_out:
 	fput_light(filp, fput_needed);
@@ -720,9 +672,10 @@
 
 SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 {
+	struct fsnotify_group *group;
+	struct fsnotify_mark_entry *entry;
 	struct file *filp;
-	struct inotify_device *dev;
-	int ret, fput_needed;
+	int ret = 0, fput_needed;
 
 	filp = fget_light(fd, &fput_needed);
 	if (unlikely(!filp))
@@ -734,10 +687,20 @@
 		goto out;
 	}
 
-	dev = filp->private_data;
+	group = filp->private_data;
 
-	/* we free our watch data when we get IN_IGNORED */
-	ret = inotify_rm_wd(dev->ih, wd);
+	spin_lock(&group->inotify_data.idr_lock);
+	entry = idr_find(&group->inotify_data.idr, wd);
+	if (unlikely(!entry)) {
+		spin_unlock(&group->inotify_data.idr_lock);
+		ret = -EINVAL;
+		goto out;
+	}
+	fsnotify_get_mark(entry);
+	spin_unlock(&group->inotify_data.idr_lock);
+
+	inotify_destroy_mark_entry(entry, group);
+	fsnotify_put_mark(entry);
 
 out:
 	fput_light(filp, fput_needed);
@@ -753,9 +716,9 @@
 }
 
 static struct file_system_type inotify_fs_type = {
-    .name           = "inotifyfs",
-    .get_sb         = inotify_get_sb,
-    .kill_sb        = kill_anon_super,
+    .name	= "inotifyfs",
+    .get_sb	= inotify_get_sb,
+    .kill_sb	= kill_anon_super,
 };
 
 /*
@@ -775,18 +738,16 @@
 	if (IS_ERR(inotify_mnt))
 		panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
 
+	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC);
+	event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
+	inotify_ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE, NULL, 0);
+	if (!inotify_ignored_event)
+		panic("unable to allocate the inotify ignored event\n");
+
 	inotify_max_queued_events = 16384;
 	inotify_max_user_instances = 128;
 	inotify_max_user_watches = 8192;
 
-	watch_cachep = kmem_cache_create("inotify_watch_cache",
-					 sizeof(struct inotify_user_watch),
-					 0, SLAB_PANIC, NULL);
-	event_cachep = kmem_cache_create("inotify_event_cache",
-					 sizeof(struct inotify_kernel_event),
-					 0, SLAB_PANIC, NULL);
-
 	return 0;
 }
-
 module_init(inotify_user_setup);
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
new file mode 100644
index 0000000..959b73e
--- /dev/null
+++ b/fs/notify/notification.c
@@ -0,0 +1,411 @@
+/*
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * Basic idea behind the notification queue: An fsnotify group (like inotify)
+ * sends the userspace notification about events asyncronously some time after
+ * the event happened.  When inotify gets an event it will need to add that
+ * event to the group notify queue.  Since a single event might need to be on
+ * multiple group's notification queues we can't add the event directly to each
+ * queue and instead add a small "event_holder" to each queue.  This event_holder
+ * has a pointer back to the original event.  Since the majority of events are
+ * going to end up on one, and only one, notification queue we embed one
+ * event_holder into each event.  This means we have a single allocation instead
+ * of always needing two.  If the embedded event_holder is already in use by
+ * another group a new event_holder (from fsnotify_event_holder_cachep) will be
+ * allocated and used.
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/mutex.h>
+#include <linux/namei.h>
+#include <linux/path.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <asm/atomic.h>
+
+#include <linux/fsnotify_backend.h>
+#include "fsnotify.h"
+
+static struct kmem_cache *fsnotify_event_cachep;
+static struct kmem_cache *fsnotify_event_holder_cachep;
+/*
+ * This is a magic event we send when the q is too full.  Since it doesn't
+ * hold real event information we just keep one system wide and use it any time
+ * it is needed.  It's refcnt is set 1 at kernel init time and will never
+ * get set to 0 so it will never get 'freed'
+ */
+static struct fsnotify_event q_overflow_event;
+static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0);
+
+/**
+ * fsnotify_get_cookie - return a unique cookie for use in synchronizing events.
+ * Called from fsnotify_move, which is inlined into filesystem modules.
+ */
+u32 fsnotify_get_cookie(void)
+{
+	return atomic_inc_return(&fsnotify_sync_cookie);
+}
+EXPORT_SYMBOL_GPL(fsnotify_get_cookie);
+
+/* return true if the notify queue is empty, false otherwise */
+bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
+{
+	BUG_ON(!mutex_is_locked(&group->notification_mutex));
+	return list_empty(&group->notification_list) ? true : false;
+}
+
+void fsnotify_get_event(struct fsnotify_event *event)
+{
+	atomic_inc(&event->refcnt);
+}
+
+void fsnotify_put_event(struct fsnotify_event *event)
+{
+	if (!event)
+		return;
+
+	if (atomic_dec_and_test(&event->refcnt)) {
+		if (event->data_type == FSNOTIFY_EVENT_PATH)
+			path_put(&event->path);
+
+		BUG_ON(!list_empty(&event->private_data_list));
+
+		kfree(event->file_name);
+		kmem_cache_free(fsnotify_event_cachep, event);
+	}
+}
+
+struct fsnotify_event_holder *fsnotify_alloc_event_holder(void)
+{
+	return kmem_cache_alloc(fsnotify_event_holder_cachep, GFP_KERNEL);
+}
+
+void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
+{
+	kmem_cache_free(fsnotify_event_holder_cachep, holder);
+}
+
+/*
+ * Find the private data that the group previously attached to this event when
+ * the group added the event to the notification queue (fsnotify_add_notify_event)
+ */
+struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event)
+{
+	struct fsnotify_event_private_data *lpriv;
+	struct fsnotify_event_private_data *priv = NULL;
+
+	assert_spin_locked(&event->lock);
+
+	list_for_each_entry(lpriv, &event->private_data_list, event_list) {
+		if (lpriv->group == group) {
+			priv = lpriv;
+			list_del(&priv->event_list);
+			break;
+		}
+	}
+	return priv;
+}
+
+/*
+ * Check if 2 events contain the same information.  We do not compare private data
+ * but at this moment that isn't a problem for any know fsnotify listeners.
+ */
+static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
+{
+	if ((old->mask == new->mask) &&
+	    (old->to_tell == new->to_tell) &&
+	    (old->data_type == new->data_type)) {
+		switch (old->data_type) {
+		case (FSNOTIFY_EVENT_INODE):
+			if (old->inode == new->inode)
+				return true;
+			break;
+		case (FSNOTIFY_EVENT_PATH):
+			if ((old->path.mnt == new->path.mnt) &&
+			    (old->path.dentry == new->path.dentry))
+				return true;
+		case (FSNOTIFY_EVENT_NONE):
+			return true;
+		};
+	}
+	return false;
+}
+
+/*
+ * Add an event to the group notification queue.  The group can later pull this
+ * event off the queue to deal with.  If the event is successfully added to the
+ * group's notification queue, a reference is taken on event.
+ */
+int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
+			      struct fsnotify_event_private_data *priv)
+{
+	struct fsnotify_event_holder *holder = NULL;
+	struct list_head *list = &group->notification_list;
+	struct fsnotify_event_holder *last_holder;
+	struct fsnotify_event *last_event;
+
+	/* easy to tell if priv was attached to the event */
+	INIT_LIST_HEAD(&priv->event_list);
+
+	/*
+	 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
+	 * Check if we expect to be able to use that holder.  If not alloc a new
+	 * holder.
+	 * For the overflow event it's possible that something will use the in
+	 * event holder before we get the lock so we may need to jump back and
+	 * alloc a new holder, this can't happen for most events...
+	 */
+	if (!list_empty(&event->holder.event_list)) {
+alloc_holder:
+		holder = fsnotify_alloc_event_holder();
+		if (!holder)
+			return -ENOMEM;
+	}
+
+	mutex_lock(&group->notification_mutex);
+
+	if (group->q_len >= group->max_events) {
+		event = &q_overflow_event;
+		/* sorry, no private data on the overflow event */
+		priv = NULL;
+	}
+
+	spin_lock(&event->lock);
+
+	if (list_empty(&event->holder.event_list)) {
+		if (unlikely(holder))
+			fsnotify_destroy_event_holder(holder);
+		holder = &event->holder;
+	} else if (unlikely(!holder)) {
+		/* between the time we checked above and got the lock the in
+		 * event holder was used, go back and get a new one */
+		spin_unlock(&event->lock);
+		mutex_unlock(&group->notification_mutex);
+		goto alloc_holder;
+	}
+
+	if (!list_empty(list)) {
+		last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
+		last_event = last_holder->event;
+		if (event_compare(last_event, event)) {
+			spin_unlock(&event->lock);
+			mutex_unlock(&group->notification_mutex);
+			if (holder != &event->holder)
+				fsnotify_destroy_event_holder(holder);
+			return -EEXIST;
+		}
+	}
+
+	group->q_len++;
+	holder->event = event;
+
+	fsnotify_get_event(event);
+	list_add_tail(&holder->event_list, list);
+	if (priv)
+		list_add_tail(&priv->event_list, &event->private_data_list);
+	spin_unlock(&event->lock);
+	mutex_unlock(&group->notification_mutex);
+
+	wake_up(&group->notification_waitq);
+	return 0;
+}
+
+/*
+ * Remove and return the first event from the notification list.  There is a
+ * reference held on this event since it was on the list.  It is the responsibility
+ * of the caller to drop this reference.
+ */
+struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
+{
+	struct fsnotify_event *event;
+	struct fsnotify_event_holder *holder;
+
+	BUG_ON(!mutex_is_locked(&group->notification_mutex));
+
+	holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
+
+	event = holder->event;
+
+	spin_lock(&event->lock);
+	holder->event = NULL;
+	list_del_init(&holder->event_list);
+	spin_unlock(&event->lock);
+
+	/* event == holder means we are referenced through the in event holder */
+	if (holder != &event->holder)
+		fsnotify_destroy_event_holder(holder);
+
+	group->q_len--;
+
+	return event;
+}
+
+/*
+ * This will not remove the event, that must be done with fsnotify_remove_notify_event()
+ */
+struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
+{
+	struct fsnotify_event *event;
+	struct fsnotify_event_holder *holder;
+
+	BUG_ON(!mutex_is_locked(&group->notification_mutex));
+
+	holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
+	event = holder->event;
+
+	return event;
+}
+
+/*
+ * Called when a group is being torn down to clean up any outstanding
+ * event notifications.
+ */
+void fsnotify_flush_notify(struct fsnotify_group *group)
+{
+	struct fsnotify_event *event;
+	struct fsnotify_event_private_data *priv;
+
+	mutex_lock(&group->notification_mutex);
+	while (!fsnotify_notify_queue_is_empty(group)) {
+		event = fsnotify_remove_notify_event(group);
+		/* if they don't implement free_event_priv they better not have attached any */
+		if (group->ops->free_event_priv) {
+			spin_lock(&event->lock);
+			priv = fsnotify_remove_priv_from_event(group, event);
+			spin_unlock(&event->lock);
+			if (priv)
+				group->ops->free_event_priv(priv);
+		}
+		fsnotify_put_event(event); /* matches fsnotify_add_notify_event */
+	}
+	mutex_unlock(&group->notification_mutex);
+}
+
+static void initialize_event(struct fsnotify_event *event)
+{
+	event->holder.event = NULL;
+	INIT_LIST_HEAD(&event->holder.event_list);
+	atomic_set(&event->refcnt, 1);
+
+	spin_lock_init(&event->lock);
+
+	event->path.dentry = NULL;
+	event->path.mnt = NULL;
+	event->inode = NULL;
+	event->data_type = FSNOTIFY_EVENT_NONE;
+
+	INIT_LIST_HEAD(&event->private_data_list);
+
+	event->to_tell = NULL;
+
+	event->file_name = NULL;
+	event->name_len = 0;
+
+	event->sync_cookie = 0;
+}
+
+/*
+ * fsnotify_create_event - Allocate a new event which will be sent to each
+ * group's handle_event function if the group was interested in this
+ * particular event.
+ *
+ * @to_tell the inode which is supposed to receive the event (sometimes a
+ *	parent of the inode to which the event happened.
+ * @mask what actually happened.
+ * @data pointer to the object which was actually affected
+ * @data_type flag indication if the data is a file, path, inode, nothing...
+ * @name the filename, if available
+ */
+struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
+					     int data_type, const char *name, u32 cookie)
+{
+	struct fsnotify_event *event;
+
+	event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
+	if (!event)
+		return NULL;
+
+	initialize_event(event);
+
+	if (name) {
+		event->file_name = kstrdup(name, GFP_KERNEL);
+		if (!event->file_name) {
+			kmem_cache_free(fsnotify_event_cachep, event);
+			return NULL;
+		}
+		event->name_len = strlen(event->file_name);
+	}
+
+	event->sync_cookie = cookie;
+	event->to_tell = to_tell;
+
+	switch (data_type) {
+	case FSNOTIFY_EVENT_FILE: {
+		struct file *file = data;
+		struct path *path = &file->f_path;
+		event->path.dentry = path->dentry;
+		event->path.mnt = path->mnt;
+		path_get(&event->path);
+		event->data_type = FSNOTIFY_EVENT_PATH;
+		break;
+	}
+	case FSNOTIFY_EVENT_PATH: {
+		struct path *path = data;
+		event->path.dentry = path->dentry;
+		event->path.mnt = path->mnt;
+		path_get(&event->path);
+		event->data_type = FSNOTIFY_EVENT_PATH;
+		break;
+	}
+	case FSNOTIFY_EVENT_INODE:
+		event->inode = data;
+		event->data_type = FSNOTIFY_EVENT_INODE;
+		break;
+	case FSNOTIFY_EVENT_NONE:
+		event->inode = NULL;
+		event->path.dentry = NULL;
+		event->path.mnt = NULL;
+		break;
+	default:
+		BUG();
+	}
+
+	event->mask = mask;
+
+	return event;
+}
+
+__init int fsnotify_notification_init(void)
+{
+	fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
+	fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
+
+	initialize_event(&q_overflow_event);
+	q_overflow_event.mask = FS_Q_OVERFLOW;
+
+	return 0;
+}
+subsys_initcall(fsnotify_notification_init);
+
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index f76951d..abaaa1c 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -25,7 +25,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/spinlock.h>
-#include <linux/blkdev.h>	/* For bdev_hardsect_size(). */
+#include <linux/blkdev.h>	/* For bdev_logical_block_size(). */
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
@@ -443,6 +443,8 @@
 	ntfs_volume *vol = NTFS_SB(sb);
 
 	ntfs_debug("Entering with remount options string: %s", opt);
+
+	lock_kernel();
 #ifndef NTFS_RW
 	/* For read-only compiled driver, enforce read-only flag. */
 	*flags |= MS_RDONLY;
@@ -466,15 +468,18 @@
 		if (NVolErrors(vol)) {
 			ntfs_error(sb, "Volume has errors and is read-only%s",
 					es);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (vol->vol_flags & VOLUME_IS_DIRTY) {
 			ntfs_error(sb, "Volume is dirty and read-only%s", es);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) {
 			ntfs_error(sb, "Volume has been modified by chkdsk "
 					"and is read-only%s", es);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
@@ -482,11 +487,13 @@
 					"(0x%x) and is read-only%s",
 					(unsigned)le16_to_cpu(vol->vol_flags),
 					es);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) {
 			ntfs_error(sb, "Failed to set dirty bit in volume "
 					"information flags%s", es);
+			unlock_kernel();
 			return -EROFS;
 		}
 #if 0
@@ -506,18 +513,21 @@
 			ntfs_error(sb, "Failed to empty journal $LogFile%s",
 					es);
 			NVolSetErrors(vol);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (!ntfs_mark_quotas_out_of_date(vol)) {
 			ntfs_error(sb, "Failed to mark quotas out of date%s",
 					es);
 			NVolSetErrors(vol);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (!ntfs_stamp_usnjrnl(vol)) {
 			ntfs_error(sb, "Failed to stamp transation log "
 					"($UsnJrnl)%s", es);
 			NVolSetErrors(vol);
+			unlock_kernel();
 			return -EROFS;
 		}
 	} else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
@@ -533,8 +543,11 @@
 
 	// TODO: Deal with *flags.
 
-	if (!parse_options(vol, opt))
+	if (!parse_options(vol, opt)) {
+		unlock_kernel();
 		return -EINVAL;
+	}
+	unlock_kernel();
 	ntfs_debug("Done.");
 	return 0;
 }
@@ -2246,6 +2259,9 @@
 	ntfs_volume *vol = NTFS_SB(sb);
 
 	ntfs_debug("Entering.");
+
+	lock_kernel();
+
 #ifdef NTFS_RW
 	/*
 	 * Commit all inodes while they are still open in case some of them
@@ -2373,39 +2389,12 @@
 		vol->mftmirr_ino = NULL;
 	}
 	/*
-	 * If any dirty inodes are left, throw away all mft data page cache
-	 * pages to allow a clean umount.  This should never happen any more
-	 * due to mft.c::ntfs_mft_writepage() cleaning all the dirty pages as
-	 * the underlying mft records are written out and cleaned.  If it does,
-	 * happen anyway, we want to know...
+	 * We should have no dirty inodes left, due to
+	 * mft.c::ntfs_mft_writepage() cleaning all the dirty pages as
+	 * the underlying mft records are written out and cleaned.
 	 */
 	ntfs_commit_inode(vol->mft_ino);
 	write_inode_now(vol->mft_ino, 1);
-	if (sb_has_dirty_inodes(sb)) {
-		const char *s1, *s2;
-
-		mutex_lock(&vol->mft_ino->i_mutex);
-		truncate_inode_pages(vol->mft_ino->i_mapping, 0);
-		mutex_unlock(&vol->mft_ino->i_mutex);
-		write_inode_now(vol->mft_ino, 1);
-		if (sb_has_dirty_inodes(sb)) {
-			static const char *_s1 = "inodes";
-			static const char *_s2 = "";
-			s1 = _s1;
-			s2 = _s2;
-		} else {
-			static const char *_s1 = "mft pages";
-			static const char *_s2 = "They have been thrown "
-					"away.  ";
-			s1 = _s1;
-			s2 = _s2;
-		}
-		ntfs_error(sb, "Dirty %s found at umount time.  %sYou should "
-				"run chkdsk.  Please email "
-				"linux-ntfs-dev@lists.sourceforge.net and say "
-				"that you saw this message.  Thank you.", s1,
-				s2);
-	}
 #endif /* NTFS_RW */
 
 	iput(vol->mft_ino);
@@ -2444,7 +2433,8 @@
 	}
 	sb->s_fs_info = NULL;
 	kfree(vol);
-	return;
+
+	unlock_kernel();
 }
 
 /**
@@ -2785,13 +2775,13 @@
 		goto err_out_now;
 
 	/* We support sector sizes up to the PAGE_CACHE_SIZE. */
-	if (bdev_hardsect_size(sb->s_bdev) > PAGE_CACHE_SIZE) {
+	if (bdev_logical_block_size(sb->s_bdev) > PAGE_CACHE_SIZE) {
 		if (!silent)
 			ntfs_error(sb, "Device has unsupported sector size "
 					"(%i).  The maximum supported sector "
 					"size on this architecture is %lu "
 					"bytes.",
-					bdev_hardsect_size(sb->s_bdev),
+					bdev_logical_block_size(sb->s_bdev),
 					PAGE_CACHE_SIZE);
 		goto err_out_now;
 	}
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 4f85ece..09cc25d 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1371,7 +1371,7 @@
 
 	bdevname(reg->hr_bdev, reg->hr_dev_name);
 
-	sectsize = bdev_hardsect_size(reg->hr_bdev);
+	sectsize = bdev_logical_block_size(reg->hr_bdev);
 	if (sectsize != reg->hr_block_bytes) {
 		mlog(ML_ERROR,
 		     "blocksize %u incorrect for device, expected %d",
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 79ff8d9..201b40a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -42,6 +42,7 @@
 #include <linux/mount.h>
 #include <linux/seq_file.h>
 #include <linux/quotaops.h>
+#include <linux/smp_lock.h>
 
 #define MLOG_MASK_PREFIX ML_SUPER
 #include <cluster/masklog.h>
@@ -126,7 +127,6 @@
 			    struct buffer_head **bh,
 			    int block,
 			    int sect_size);
-static void ocfs2_write_super(struct super_block *sb);
 static struct inode *ocfs2_alloc_inode(struct super_block *sb);
 static void ocfs2_destroy_inode(struct inode *inode);
 static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend);
@@ -141,7 +141,6 @@
 	.clear_inode	= ocfs2_clear_inode,
 	.delete_inode	= ocfs2_delete_inode,
 	.sync_fs	= ocfs2_sync_fs,
-	.write_super	= ocfs2_write_super,
 	.put_super	= ocfs2_put_super,
 	.remount_fs	= ocfs2_remount,
 	.show_options   = ocfs2_show_options,
@@ -365,24 +364,12 @@
 	.llseek =	generic_file_llseek,
 };
 
-/*
- * write_super and sync_fs ripped right out of ext3.
- */
-static void ocfs2_write_super(struct super_block *sb)
-{
-	if (mutex_trylock(&sb->s_lock) != 0)
-		BUG();
-	sb->s_dirt = 0;
-}
-
 static int ocfs2_sync_fs(struct super_block *sb, int wait)
 {
 	int status;
 	tid_t target;
 	struct ocfs2_super *osb = OCFS2_SB(sb);
 
-	sb->s_dirt = 0;
-
 	if (ocfs2_is_hard_readonly(osb))
 		return -EROFS;
 
@@ -595,6 +582,8 @@
 	struct mount_options parsed_options;
 	struct ocfs2_super *osb = OCFS2_SB(sb);
 
+	lock_kernel();
+
 	if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) {
 		ret = -EINVAL;
 		goto out;
@@ -698,6 +687,7 @@
 			ocfs2_set_journal_params(osb);
 	}
 out:
+	unlock_kernel();
 	return ret;
 }
 
@@ -713,7 +703,7 @@
 	*bh = NULL;
 
 	/* may be > 512 */
-	*sector_size = bdev_hardsect_size(sb->s_bdev);
+	*sector_size = bdev_logical_block_size(sb->s_bdev);
 	if (*sector_size > OCFS2_MAX_BLOCKSIZE) {
 		mlog(ML_ERROR, "Hardware sector size too large: %d (max=%d)\n",
 		     *sector_size, OCFS2_MAX_BLOCKSIZE);
@@ -1550,9 +1540,13 @@
 {
 	mlog_entry("(0x%p)\n", sb);
 
+	lock_kernel();
+
 	ocfs2_sync_blockdev(sb);
 	ocfs2_dismount_volume(sb, 0);
 
+	unlock_kernel();
+
 	mlog_exit_void();
 }
 
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 834b233..d17e774 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -11,21 +11,6 @@
 #include <linux/mpage.h>
 #include "omfs.h"
 
-static int omfs_sync_file(struct file *file, struct dentry *dentry,
-		int datasync)
-{
-	struct inode *inode = dentry->d_inode;
-	int err;
-
-	err = sync_mapping_buffers(inode->i_mapping);
-	if (!(inode->i_state & I_DIRTY))
-		return err;
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		return err;
-	err |= omfs_sync_inode(inode);
-	return err ? -EIO : 0;
-}
-
 static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset)
 {
 	return (sbi->s_sys_blocksize - offset -
@@ -344,7 +329,7 @@
 	.aio_read = generic_file_aio_read,
 	.aio_write = generic_file_aio_write,
 	.mmap = generic_file_mmap,
-	.fsync = omfs_sync_file,
+	.fsync = simple_fsync,
 	.splice_read = generic_file_splice_read,
 };
 
diff --git a/fs/open.c b/fs/open.c
index bdfbf03..7200e23 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -612,7 +612,7 @@
 
 	audit_inode(NULL, dentry);
 
-	err = mnt_want_write(file->f_path.mnt);
+	err = mnt_want_write_file(file);
 	if (err)
 		goto out_putf;
 	mutex_lock(&inode->i_mutex);
@@ -761,7 +761,7 @@
 	if (!file)
 		goto out;
 
-	error = mnt_want_write(file->f_path.mnt);
+	error = mnt_want_write_file(file);
 	if (error)
 		goto out_fput;
 	dentry = file->f_path.dentry;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 99e33ef..1a9c787 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -219,6 +219,13 @@
 	return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
 }
 
+ssize_t part_alignment_offset_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct hd_struct *p = dev_to_part(dev);
+	return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
+}
+
 ssize_t part_stat_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
 {
@@ -272,6 +279,7 @@
 static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
 static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
+static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
@@ -282,6 +290,7 @@
 	&dev_attr_partition.attr,
 	&dev_attr_start.attr,
 	&dev_attr_size.attr,
+	&dev_attr_alignment_offset.attr,
 	&dev_attr_stat.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
@@ -383,6 +392,7 @@
 	pdev = part_to_dev(p);
 
 	p->start_sect = start;
+	p->alignment_offset = queue_sector_alignment_offset(disk->queue, start);
 	p->nr_sects = len;
 	p->partno = partno;
 	p->policy = get_disk_ro(disk);
@@ -546,27 +556,49 @@
 
 	/* add partitions */
 	for (p = 1; p < state->limit; p++) {
-		sector_t size = state->parts[p].size;
-		sector_t from = state->parts[p].from;
+		sector_t size, from;
+try_scan:
+		size = state->parts[p].size;
 		if (!size)
 			continue;
+
+		from = state->parts[p].from;
 		if (from >= get_capacity(disk)) {
 			printk(KERN_WARNING
 			       "%s: p%d ignored, start %llu is behind the end of the disk\n",
 			       disk->disk_name, p, (unsigned long long) from);
 			continue;
 		}
+
 		if (from + size > get_capacity(disk)) {
-			/*
-			 * we can not ignore partitions of broken tables
-			 * created by for example camera firmware, but we
-			 * limit them to the end of the disk to avoid
-			 * creating invalid block devices
-			 */
+			struct block_device_operations *bdops = disk->fops;
+			unsigned long long capacity;
+
 			printk(KERN_WARNING
-			       "%s: p%d size %llu limited to end of disk\n",
+			       "%s: p%d size %llu exceeds device capacity, ",
 			       disk->disk_name, p, (unsigned long long) size);
-			size = get_capacity(disk) - from;
+
+			if (bdops->set_capacity &&
+			    (disk->flags & GENHD_FL_NATIVE_CAPACITY) == 0) {
+				printk(KERN_CONT "enabling native capacity\n");
+				capacity = bdops->set_capacity(disk, ~0ULL);
+				disk->flags |= GENHD_FL_NATIVE_CAPACITY;
+				if (capacity > get_capacity(disk)) {
+					set_capacity(disk, capacity);
+					check_disk_size_change(disk, bdev);
+					bdev->bd_invalidated = 0;
+				}
+				goto try_scan;
+			} else {
+				/*
+				 * we can not ignore partitions of broken tables
+				 * created by for example camera firmware, but
+				 * we limit them to the end of the disk to avoid
+				 * creating invalid block devices
+				 */
+				printk(KERN_CONT "limited to end of disk\n");
+				size = get_capacity(disk) - from;
+			}
 		}
 		part = add_partition(disk, p, from, size,
 				     state->parts[p].flags);
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 4629768..fc71aab 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -76,7 +76,7 @@
 	Sector sect;
 
 	res = 0;
-	blocksize = bdev_hardsect_size(bdev);
+	blocksize = bdev_logical_block_size(bdev);
 	if (blocksize <= 0)
 		goto out_exit;
 	i_size = i_size_read(bdev->bd_inode);
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 7965118..0028d2e 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -110,7 +110,7 @@
 	Sector sect;
 	unsigned char *data;
 	u32 this_sector, this_size;
-	int sector_size = bdev_hardsect_size(bdev) / 512;
+	int sector_size = bdev_logical_block_size(bdev) / 512;
 	int loopct = 0;		/* number of links followed
 				   without finding a data partition */
 	int i;
@@ -415,7 +415,7 @@
  
 int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
 {
-	int sector_size = bdev_hardsect_size(bdev) / 512;
+	int sector_size = bdev_logical_block_size(bdev) / 512;
 	Sector sect;
 	unsigned char *data;
 	struct partition *p;
diff --git a/fs/pipe.c b/fs/pipe.c
index 13414ec..f7dd21a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -302,6 +302,20 @@
 	return 0;
 }
 
+/**
+ * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
+ * @pipe:	the pipe that the buffer belongs to
+ * @buf:	the buffer to put a reference to
+ *
+ * Description:
+ *	This function releases a reference to @buf.
+ */
+void generic_pipe_buf_release(struct pipe_inode_info *pipe,
+			      struct pipe_buffer *buf)
+{
+	page_cache_release(buf->page);
+}
+
 static const struct pipe_buf_operations anon_pipe_buf_ops = {
 	.can_merge = 1,
 	.map = generic_pipe_buf_map,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3326bbf..1539e63 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2128,9 +2128,15 @@
 	if (copy_from_user(page, buf, count))
 		goto out_free;
 
+	/* Guard against adverse ptrace interaction */
+	length = mutex_lock_interruptible(&task->cred_guard_mutex);
+	if (length < 0)
+		goto out_free;
+
 	length = security_setprocattr(task,
 				      (char*)file->f_path.dentry->d_name.name,
 				      (void*)page, count);
+	mutex_unlock(&task->cred_guard_mutex);
 out_free:
 	free_page((unsigned long) page);
 out:
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index f6db961..753ca37 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -92,3 +92,28 @@
 	struct list_head lh;
 };
 void pde_users_dec(struct proc_dir_entry *pde);
+
+extern spinlock_t proc_subdir_lock;
+
+struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
+int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
+unsigned long task_vsize(struct mm_struct *);
+int task_statm(struct mm_struct *, int *, int *, int *, int *);
+void task_mem(struct seq_file *, struct mm_struct *);
+
+struct proc_dir_entry *de_get(struct proc_dir_entry *de);
+void de_put(struct proc_dir_entry *de);
+
+extern struct vfsmount *proc_mnt;
+int proc_fill_super(struct super_block *);
+struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
+
+/*
+ * These are generic /proc routines that use the internal
+ * "struct proc_dir_entry" tree to traverse the filesystem.
+ *
+ * The /proc root directory has extended versions to take care
+ * of the /proc/<pid> subdirectories.
+ */
+int proc_readdir(struct file *, void *, filldir_t);
+struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index 9bca39c..1afa4dd 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -12,20 +12,14 @@
 
 static int loadavg_proc_show(struct seq_file *m, void *v)
 {
-	int a, b, c;
-	unsigned long seq;
+	unsigned long avnrun[3];
 
-	do {
-		seq = read_seqbegin(&xtime_lock);
-		a = avenrun[0] + (FIXED_1/200);
-		b = avenrun[1] + (FIXED_1/200);
-		c = avenrun[2] + (FIXED_1/200);
-	} while (read_seqretry(&xtime_lock, seq));
+	get_avenrun(avnrun, FIXED_1/200, 0);
 
-	seq_printf(m, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
-		LOAD_INT(a), LOAD_FRAC(a),
-		LOAD_INT(b), LOAD_FRAC(b),
-		LOAD_INT(c), LOAD_FRAC(c),
+	seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
+		LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
+		LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
+		LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
 		nr_running(), nr_threads,
 		task_active_pid_ns(current)->last_pid);
 	return 0;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index de2bba5..fc6c302 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -11,6 +11,7 @@
 #include <linux/string.h>
 #include <asm/prom.h>
 #include <asm/uaccess.h>
+#include "internal.h"
 
 #ifndef HAVE_ARCH_DEVTREE_FIXUPS
 static inline void set_node_proc_entry(struct device_node *np,
diff --git a/fs/qnx4/Makefile b/fs/qnx4/Makefile
index 502d7fe..e4d408c 100644
--- a/fs/qnx4/Makefile
+++ b/fs/qnx4/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_QNX4FS_FS) += qnx4.o
 
-qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o fsync.o
+qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o
diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c
index 8425cf6..e1cd061 100644
--- a/fs/qnx4/bitmap.c
+++ b/fs/qnx4/bitmap.c
@@ -13,14 +13,9 @@
  * 28-06-1998 by Frank Denis : qnx4_free_inode (to be fixed) .
  */
 
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-#include <linux/stat.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
 #include <linux/buffer_head.h>
 #include <linux/bitops.h>
+#include "qnx4.h"
 
 #if 0
 int qnx4_new_block(struct super_block *sb)
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index ea9ffef..003c68f 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -11,14 +11,9 @@
  * 20-06-1998 by Frank Denis : Linux 2.1.99+ & dcache support.
  */
 
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-#include <linux/stat.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
-
+#include "qnx4.h"
 
 static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
@@ -84,7 +79,7 @@
 {
 	.read		= generic_read_dir,
 	.readdir	= qnx4_readdir,
-	.fsync		= file_fsync,
+	.fsync		= simple_fsync,
 };
 
 const struct inode_operations qnx4_dir_inode_operations =
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index 867f42b..09b170a 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -12,8 +12,7 @@
  * 27-06-1998 by Frank Denis : file overwriting.
  */
 
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
+#include "qnx4.h"
 
 /*
  * We have mostly NULL's here: the current defaults are ok for
@@ -29,7 +28,7 @@
 #ifdef CONFIG_QNX4FS_RW
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
-	.fsync		= qnx4_sync_file,
+	.fsync		= simple_fsync,
 #endif
 };
 
diff --git a/fs/qnx4/fsync.c b/fs/qnx4/fsync.c
deleted file mode 100644
index aa3b195..0000000
--- a/fs/qnx4/fsync.c
+++ /dev/null
@@ -1,169 +0,0 @@
-/* 
- * QNX4 file system, Linux implementation.
- * 
- * Version : 0.1
- * 
- * Using parts of the xiafs filesystem.
- * 
- * History :
- * 
- * 24-03-1998 by Richard Frowijn : first release.
- */
-
-#include <linux/errno.h>
-#include <linux/time.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/smp_lock.h>
-#include <linux/buffer_head.h>
-
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-
-#include <asm/system.h>
-
-/*
- * The functions for qnx4 fs file synchronization.
- */
-
-#ifdef CONFIG_QNX4FS_RW
-
-static int sync_block(struct inode *inode, unsigned short *block, int wait)
-{
-	struct buffer_head *bh;
-	unsigned short tmp;
-
-	if (!*block)
-		return 0;
-	tmp = *block;
-	bh = sb_find_get_block(inode->i_sb, *block);
-	if (!bh)
-		return 0;
-	if (*block != tmp) {
-		brelse(bh);
-		return 1;
-	}
-	if (wait && buffer_req(bh) && !buffer_uptodate(bh)) {
-		brelse(bh);
-		return -1;
-	}
-	if (wait || !buffer_uptodate(bh) || !buffer_dirty(bh)) {
-		brelse(bh);
-		return 0;
-	}
-	ll_rw_block(WRITE, 1, &bh);
-	atomic_dec(&bh->b_count);
-	return 0;
-}
-
-#ifdef WTF
-static int sync_iblock(struct inode *inode, unsigned short *iblock,
-		       struct buffer_head **bh, int wait)
-{
-	int rc;
-	unsigned short tmp;
-
-	*bh = NULL;
-	tmp = *iblock;
-	if (!tmp)
-		return 0;
-	rc = sync_block(inode, iblock, wait);
-	if (rc)
-		return rc;
-	*bh = sb_bread(inode->i_sb, tmp);
-	if (tmp != *iblock) {
-		brelse(*bh);
-		*bh = NULL;
-		return 1;
-	}
-	if (!*bh)
-		return -1;
-	return 0;
-}
-#endif
-
-static int sync_direct(struct inode *inode, int wait)
-{
-	int i;
-	int rc, err = 0;
-
-	for (i = 0; i < 7; i++) {
-		rc = sync_block(inode,
-				(unsigned short *) qnx4_raw_inode(inode)->di_first_xtnt.xtnt_blk + i, wait);
-		if (rc > 0)
-			break;
-		if (rc)
-			err = rc;
-	}
-	return err;
-}
-
-#ifdef WTF
-static int sync_indirect(struct inode *inode, unsigned short *iblock, int wait)
-{
-	int i;
-	struct buffer_head *ind_bh;
-	int rc, err = 0;
-
-	rc = sync_iblock(inode, iblock, &ind_bh, wait);
-	if (rc || !ind_bh)
-		return rc;
-
-	for (i = 0; i < 512; i++) {
-		rc = sync_block(inode,
-				((unsigned short *) ind_bh->b_data) + i,
-				wait);
-		if (rc > 0)
-			break;
-		if (rc)
-			err = rc;
-	}
-	brelse(ind_bh);
-	return err;
-}
-
-static int sync_dindirect(struct inode *inode, unsigned short *diblock,
-			  int wait)
-{
-	int i;
-	struct buffer_head *dind_bh;
-	int rc, err = 0;
-
-	rc = sync_iblock(inode, diblock, &dind_bh, wait);
-	if (rc || !dind_bh)
-		return rc;
-
-	for (i = 0; i < 512; i++) {
-		rc = sync_indirect(inode,
-				((unsigned short *) dind_bh->b_data) + i,
-				   wait);
-		if (rc > 0)
-			break;
-		if (rc)
-			err = rc;
-	}
-	brelse(dind_bh);
-	return err;
-}
-#endif
-
-int qnx4_sync_file(struct file *file, struct dentry *dentry, int unused)
-{
-        struct inode *inode = dentry->d_inode;
-	int wait, err = 0;
-        
-        (void) file;
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-	      S_ISLNK(inode->i_mode)))
-		return -EINVAL;
-
-	lock_kernel();
-	for (wait = 0; wait <= 1; wait++) {
-		err |= sync_direct(inode, wait);
-	}
-	err |= qnx4_sync_inode(inode);
-	unlock_kernel();
-	return (err < 0) ? -EIO : 0;
-}
-
-#endif
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index fe1f0f3..681df5f 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -13,19 +13,15 @@
  */
 
 #include <linux/module.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/highuid.h>
 #include <linux/smp_lock.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
-#include <linux/vfs.h>
-#include <asm/uaccess.h>
+#include <linux/writeback.h>
+#include <linux/statfs.h>
+#include "qnx4.h"
 
 #define QNX4_VERSION  4
 #define QNX4_BMNAME   ".bitmap"
@@ -34,31 +30,6 @@
 
 #ifdef CONFIG_QNX4FS_RW
 
-int qnx4_sync_inode(struct inode *inode)
-{
-	int err = 0;
-# if 0
-	struct buffer_head *bh;
-
-   	bh = qnx4_update_inode(inode);
-	if (bh && buffer_dirty(bh))
-	{
-		sync_dirty_buffer(bh);
-		if (buffer_req(bh) && !buffer_uptodate(bh))
-		{
-			printk ("IO error syncing qnx4 inode [%s:%08lx]\n",
-				inode->i_sb->s_id, inode->i_ino);
-			err = -1;
-		}
-	        brelse (bh);
-	} else if (!bh) {
-		err = -1;
-	}
-# endif
-
-	return err;
-}
-
 static void qnx4_delete_inode(struct inode *inode)
 {
 	QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino));
@@ -70,15 +41,7 @@
 	unlock_kernel();
 }
 
-static void qnx4_write_super(struct super_block *sb)
-{
-	lock_kernel();
-	QNX4DEBUG(("qnx4: write_super\n"));
-	sb->s_dirt = 0;
-	unlock_kernel();
-}
-
-static int qnx4_write_inode(struct inode *inode, int unused)
+static int qnx4_write_inode(struct inode *inode, int do_sync)
 {
 	struct qnx4_inode_entry *raw_inode;
 	int block, ino;
@@ -115,6 +78,16 @@
 	raw_inode->di_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
 	raw_inode->di_first_xtnt.xtnt_size = cpu_to_le32(inode->i_blocks);
 	mark_buffer_dirty(bh);
+	if (do_sync) {
+		sync_dirty_buffer(bh);
+		if (buffer_req(bh) && !buffer_uptodate(bh)) {
+			printk("qnx4: IO error syncing inode [%s:%08x]\n",
+					inode->i_sb->s_id, ino);
+			brelse(bh);
+			unlock_kernel();
+			return -EIO;
+		}
+	}
 	brelse(bh);
 	unlock_kernel();
 	return 0;
@@ -138,7 +111,6 @@
 #ifdef CONFIG_QNX4FS_RW
 	.write_inode	= qnx4_write_inode,
 	.delete_inode	= qnx4_delete_inode,
-	.write_super	= qnx4_write_super,
 #endif
 };
 
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index 775eed3..5972ed2 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -12,16 +12,9 @@
  * 04-07-1998 by Frank Denis : first step for rmdir/unlink.
  */
 
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/errno.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
+#include "qnx4.h"
 
 
 /*
@@ -187,7 +180,7 @@
 	de->di_status = 0;
 	memset(de->di_fname, 0, sizeof de->di_fname);
 	de->di_mode = 0;
-	mark_buffer_dirty(bh);
+	mark_buffer_dirty_inode(bh, dir);
 	clear_nlink(inode);
 	mark_inode_dirty(inode);
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
@@ -232,7 +225,7 @@
 	de->di_status = 0;
 	memset(de->di_fname, 0, sizeof de->di_fname);
 	de->di_mode = 0;
-	mark_buffer_dirty(bh);
+	mark_buffer_dirty_inode(bh, dir);
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
 	mark_inode_dirty(dir);
 	inode->i_ctime = dir->i_ctime;
diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h
new file mode 100644
index 0000000..9efc089
--- /dev/null
+++ b/fs/qnx4/qnx4.h
@@ -0,0 +1,57 @@
+#include <linux/fs.h>
+#include <linux/qnx4_fs.h>
+
+#define QNX4_DEBUG 0
+
+#if QNX4_DEBUG
+#define QNX4DEBUG(X) printk X
+#else
+#define QNX4DEBUG(X) (void) 0
+#endif
+
+struct qnx4_sb_info {
+	struct buffer_head	*sb_buf;	/* superblock buffer */
+	struct qnx4_super_block	*sb;		/* our superblock */
+	unsigned int		Version;	/* may be useful */
+	struct qnx4_inode_entry	*BitMap;	/* useful */
+};
+
+struct qnx4_inode_info {
+	struct qnx4_inode_entry raw;
+	loff_t mmu_private;
+	struct inode vfs_inode;
+};
+
+extern struct inode *qnx4_iget(struct super_block *, unsigned long);
+extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd);
+extern unsigned long qnx4_count_free_blocks(struct super_block *sb);
+extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
+
+extern struct buffer_head *qnx4_bread(struct inode *, int, int);
+
+extern const struct inode_operations qnx4_file_inode_operations;
+extern const struct inode_operations qnx4_dir_inode_operations;
+extern const struct file_operations qnx4_file_operations;
+extern const struct file_operations qnx4_dir_operations;
+extern int qnx4_is_free(struct super_block *sb, long block);
+extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy);
+extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd);
+extern void qnx4_truncate(struct inode *inode);
+extern void qnx4_free_inode(struct inode *inode);
+extern int qnx4_unlink(struct inode *dir, struct dentry *dentry);
+extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry);
+
+static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+static inline struct qnx4_inode_info *qnx4_i(struct inode *inode)
+{
+	return container_of(inode, struct qnx4_inode_info, vfs_inode);
+}
+
+static inline struct qnx4_inode_entry *qnx4_raw_inode(struct inode *inode)
+{
+	return &qnx4_i(inode)->raw;
+}
diff --git a/fs/qnx4/truncate.c b/fs/qnx4/truncate.c
index 6437c1c..d94d9ee 100644
--- a/fs/qnx4/truncate.c
+++ b/fs/qnx4/truncate.c
@@ -10,12 +10,8 @@
  * 30-06-1998 by Frank DENIS : ugly filler.
  */
 
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
 #include <linux/smp_lock.h>
-#include <asm/uaccess.h>
+#include "qnx4.h"
 
 #ifdef CONFIG_QNX4FS_RW
 
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index b7f5a46..95c5b42 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -159,10 +159,14 @@
 	return error;
 }
 
-static void quota_sync_sb(struct super_block *sb, int type)
+#ifdef CONFIG_QUOTA
+void sync_quota_sb(struct super_block *sb, int type)
 {
 	int cnt;
 
+	if (!sb->s_qcop->quota_sync)
+		return;
+
 	sb->s_qcop->quota_sync(sb, type);
 
 	if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)
@@ -191,17 +195,13 @@
 	}
 	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 }
+#endif
 
-void sync_dquots(struct super_block *sb, int type)
+static void sync_dquots(int type)
 {
+	struct super_block *sb;
 	int cnt;
 
-	if (sb) {
-		if (sb->s_qcop->quota_sync)
-			quota_sync_sb(sb, type);
-		return;
-	}
-
 	spin_lock(&sb_lock);
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
@@ -222,8 +222,8 @@
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
-		if (sb->s_root && sb->s_qcop->quota_sync)
-			quota_sync_sb(sb, type);
+		if (sb->s_root)
+			sync_quota_sb(sb, type);
 		up_read(&sb->s_umount);
 		spin_lock(&sb_lock);
 		if (__put_super_and_need_restart(sb))
@@ -301,7 +301,10 @@
 			return sb->s_qcop->set_dqblk(sb, type, id, &idq);
 		}
 		case Q_SYNC:
-			sync_dquots(sb, type);
+			if (sb)
+				sync_quota_sb(sb, type);
+			else
+				sync_dquots(type);
 			return 0;
 
 		case Q_XQUOTAON:
diff --git a/fs/read_write.c b/fs/read_write.c
index 9d1e76b..6c8c55d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -805,12 +805,6 @@
 		goto out;
 	if (!(in_file->f_mode & FMODE_READ))
 		goto fput_in;
-	retval = -EINVAL;
-	in_inode = in_file->f_path.dentry->d_inode;
-	if (!in_inode)
-		goto fput_in;
-	if (!in_file->f_op || !in_file->f_op->splice_read)
-		goto fput_in;
 	retval = -ESPIPE;
 	if (!ppos)
 		ppos = &in_file->f_pos;
@@ -834,6 +828,7 @@
 	retval = -EINVAL;
 	if (!out_file->f_op || !out_file->f_op->sendpage)
 		goto fput_out;
+	in_inode = in_file->f_path.dentry->d_inode;
 	out_inode = out_file->f_path.dentry->d_inode;
 	retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
 	if (retval < 0)
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 45ee3d3..6d2668f 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -44,13 +44,11 @@
 static inline bool is_privroot_deh(struct dentry *dir,
 				   struct reiserfs_de_head *deh)
 {
-	int ret = 0;
-#ifdef CONFIG_REISERFS_FS_XATTR
 	struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root;
-	ret = (dir == dir->d_parent && privroot->d_inode &&
-	       deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
-#endif
-	return ret;
+	if (reiserfs_expose_privroot(dir->d_sb))
+		return 0;
+	return (dir == dir->d_parent && privroot->d_inode &&
+	        deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
 }
 
 int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 3567fb9..2969773 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -28,6 +28,7 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/crc32.h>
+#include <linux/smp_lock.h>
 
 struct file_system_type reiserfs_fs_type;
 
@@ -64,18 +65,15 @@
 
 static int reiserfs_sync_fs(struct super_block *s, int wait)
 {
-	if (!(s->s_flags & MS_RDONLY)) {
-		struct reiserfs_transaction_handle th;
-		reiserfs_write_lock(s);
-		if (!journal_begin(&th, s, 1))
-			if (!journal_end_sync(&th, s, 1))
-				reiserfs_flush_old_commits(s);
-		s->s_dirt = 0;	/* Even if it's not true.
-				 * We'll loop forever in sync_supers otherwise */
-		reiserfs_write_unlock(s);
-	} else {
-		s->s_dirt = 0;
-	}
+	struct reiserfs_transaction_handle th;
+
+	reiserfs_write_lock(s);
+	if (!journal_begin(&th, s, 1))
+		if (!journal_end_sync(&th, s, 1))
+			reiserfs_flush_old_commits(s);
+	s->s_dirt = 0;	/* Even if it's not true.
+			 * We'll loop forever in sync_supers otherwise */
+	reiserfs_write_unlock(s);
 	return 0;
 }
 
@@ -468,6 +466,11 @@
 	struct reiserfs_transaction_handle th;
 	th.t_trans_id = 0;
 
+	lock_kernel();
+
+	if (s->s_dirt)
+		reiserfs_write_super(s);
+
 	/* change file system state to current state if it was mounted with read-write permissions */
 	if (!(s->s_flags & MS_RDONLY)) {
 		if (!journal_begin(&th, s, 10)) {
@@ -500,7 +503,7 @@
 	kfree(s->s_fs_info);
 	s->s_fs_info = NULL;
 
-	return;
+	unlock_kernel();
 }
 
 static struct kmem_cache *reiserfs_inode_cachep;
@@ -898,6 +901,7 @@
 		{"conv",.setmask = 1 << REISERFS_CONVERT},
 		{"attrs",.setmask = 1 << REISERFS_ATTRS},
 		{"noattrs",.clrmask = 1 << REISERFS_ATTRS},
+		{"expose_privroot", .setmask = 1 << REISERFS_EXPOSE_PRIVROOT},
 #ifdef CONFIG_REISERFS_FS_XATTR
 		{"user_xattr",.setmask = 1 << REISERFS_XATTRS_USER},
 		{"nouser_xattr",.clrmask = 1 << REISERFS_XATTRS_USER},
@@ -1193,6 +1197,7 @@
 	memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
 #endif
 
+	lock_kernel();
 	rs = SB_DISK_SUPER_BLOCK(s);
 
 	if (!reiserfs_parse_options
@@ -1315,10 +1320,12 @@
 
 out_ok:
 	replace_mount_options(s, new_opts);
+	unlock_kernel();
 	return 0;
 
 out_err:
 	kfree(new_opts);
+	unlock_kernel();
 	return err;
 }
 
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8e7deb0..f3d47d8 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -981,7 +981,8 @@
 				strlen(PRIVROOT_NAME));
 	if (!IS_ERR(dentry)) {
 		REISERFS_SB(s)->priv_root = dentry;
-		s->s_root->d_op = &xattr_lookup_poison_ops;
+		if (!reiserfs_expose_privroot(s))
+			s->s_root->d_op = &xattr_lookup_poison_ops;
 		if (dentry->d_inode)
 			dentry->d_inode->i_flags |= S_PRIVATE;
 	} else
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index fc27fbfc..1402d2d 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -474,6 +474,8 @@
 {
 	struct smb_sb_info *server = SMB_SB(sb);
 
+	lock_kernel();
+
 	smb_lock_server(server);
 	server->state = CONN_INVALID;
 	smbiod_unregister_server(server);
@@ -489,6 +491,8 @@
 	smb_unlock_server(server);
 	put_pid(server->conn_pid);
 	kfree(server);
+
+	unlock_kernel();
 }
 
 static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
diff --git a/fs/splice.c b/fs/splice.c
index 666953d5..73766d2 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -507,9 +507,131 @@
 
 	return ret;
 }
-
 EXPORT_SYMBOL(generic_file_splice_read);
 
+static const struct pipe_buf_operations default_pipe_buf_ops = {
+	.can_merge = 0,
+	.map = generic_pipe_buf_map,
+	.unmap = generic_pipe_buf_unmap,
+	.confirm = generic_pipe_buf_confirm,
+	.release = generic_pipe_buf_release,
+	.steal = generic_pipe_buf_steal,
+	.get = generic_pipe_buf_get,
+};
+
+static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
+			    unsigned long vlen, loff_t offset)
+{
+	mm_segment_t old_fs;
+	loff_t pos = offset;
+	ssize_t res;
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos);
+	set_fs(old_fs);
+
+	return res;
+}
+
+static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
+			    loff_t pos)
+{
+	mm_segment_t old_fs;
+	ssize_t res;
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	res = vfs_write(file, (const char __user *)buf, count, &pos);
+	set_fs(old_fs);
+
+	return res;
+}
+
+ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+				 struct pipe_inode_info *pipe, size_t len,
+				 unsigned int flags)
+{
+	unsigned int nr_pages;
+	unsigned int nr_freed;
+	size_t offset;
+	struct page *pages[PIPE_BUFFERS];
+	struct partial_page partial[PIPE_BUFFERS];
+	struct iovec vec[PIPE_BUFFERS];
+	pgoff_t index;
+	ssize_t res;
+	size_t this_len;
+	int error;
+	int i;
+	struct splice_pipe_desc spd = {
+		.pages = pages,
+		.partial = partial,
+		.flags = flags,
+		.ops = &default_pipe_buf_ops,
+		.spd_release = spd_release_page,
+	};
+
+	index = *ppos >> PAGE_CACHE_SHIFT;
+	offset = *ppos & ~PAGE_CACHE_MASK;
+	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+	for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) {
+		struct page *page;
+
+		page = alloc_page(GFP_USER);
+		error = -ENOMEM;
+		if (!page)
+			goto err;
+
+		this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
+		vec[i].iov_base = (void __user *) page_address(page);
+		vec[i].iov_len = this_len;
+		pages[i] = page;
+		spd.nr_pages++;
+		len -= this_len;
+		offset = 0;
+	}
+
+	res = kernel_readv(in, vec, spd.nr_pages, *ppos);
+	if (res < 0) {
+		error = res;
+		goto err;
+	}
+
+	error = 0;
+	if (!res)
+		goto err;
+
+	nr_freed = 0;
+	for (i = 0; i < spd.nr_pages; i++) {
+		this_len = min_t(size_t, vec[i].iov_len, res);
+		partial[i].offset = 0;
+		partial[i].len = this_len;
+		if (!this_len) {
+			__free_page(pages[i]);
+			pages[i] = NULL;
+			nr_freed++;
+		}
+		res -= this_len;
+	}
+	spd.nr_pages -= nr_freed;
+
+	res = splice_to_pipe(pipe, &spd);
+	if (res > 0)
+		*ppos += res;
+
+	return res;
+
+err:
+	for (i = 0; i < spd.nr_pages; i++)
+		__free_page(pages[i]);
+
+	return error;
+}
+EXPORT_SYMBOL(default_file_splice_read);
+
 /*
  * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
  * using sendpage(). Return the number of bytes sent.
@@ -881,6 +1003,36 @@
 
 EXPORT_SYMBOL(generic_file_splice_write);
 
+static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+			  struct splice_desc *sd)
+{
+	int ret;
+	void *data;
+
+	ret = buf->ops->confirm(pipe, buf);
+	if (ret)
+		return ret;
+
+	data = buf->ops->map(pipe, buf, 0);
+	ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
+	buf->ops->unmap(pipe, buf, data);
+
+	return ret;
+}
+
+static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
+					 struct file *out, loff_t *ppos,
+					 size_t len, unsigned int flags)
+{
+	ssize_t ret;
+
+	ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
+	if (ret > 0)
+		*ppos += ret;
+
+	return ret;
+}
+
 /**
  * generic_splice_sendpage - splice data from a pipe to a socket
  * @pipe:	pipe to splice from
@@ -908,11 +1060,10 @@
 static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
 			   loff_t *ppos, size_t len, unsigned int flags)
 {
+	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
+				loff_t *, size_t, unsigned int);
 	int ret;
 
-	if (unlikely(!out->f_op || !out->f_op->splice_write))
-		return -EINVAL;
-
 	if (unlikely(!(out->f_mode & FMODE_WRITE)))
 		return -EBADF;
 
@@ -923,7 +1074,11 @@
 	if (unlikely(ret < 0))
 		return ret;
 
-	return out->f_op->splice_write(pipe, out, ppos, len, flags);
+	splice_write = out->f_op->splice_write;
+	if (!splice_write)
+		splice_write = default_file_splice_write;
+
+	return splice_write(pipe, out, ppos, len, flags);
 }
 
 /*
@@ -933,11 +1088,10 @@
 			 struct pipe_inode_info *pipe, size_t len,
 			 unsigned int flags)
 {
+	ssize_t (*splice_read)(struct file *, loff_t *,
+			       struct pipe_inode_info *, size_t, unsigned int);
 	int ret;
 
-	if (unlikely(!in->f_op || !in->f_op->splice_read))
-		return -EINVAL;
-
 	if (unlikely(!(in->f_mode & FMODE_READ)))
 		return -EBADF;
 
@@ -945,7 +1099,11 @@
 	if (unlikely(ret < 0))
 		return ret;
 
-	return in->f_op->splice_read(in, ppos, pipe, len, flags);
+	splice_read = in->f_op->splice_read;
+	if (!splice_read)
+		splice_read = default_file_splice_read;
+
+	return splice_read(in, ppos, pipe, len, flags);
 }
 
 /**
@@ -1112,6 +1270,9 @@
 	return ret;
 }
 
+static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
+			       struct pipe_inode_info *opipe,
+			       size_t len, unsigned int flags);
 /*
  * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
  * location, so checking ->i_pipe is not enough to verify that this is a
@@ -1132,12 +1293,32 @@
 		      struct file *out, loff_t __user *off_out,
 		      size_t len, unsigned int flags)
 {
-	struct pipe_inode_info *pipe;
+	struct pipe_inode_info *ipipe;
+	struct pipe_inode_info *opipe;
 	loff_t offset, *off;
 	long ret;
 
-	pipe = pipe_info(in->f_path.dentry->d_inode);
-	if (pipe) {
+	ipipe = pipe_info(in->f_path.dentry->d_inode);
+	opipe = pipe_info(out->f_path.dentry->d_inode);
+
+	if (ipipe && opipe) {
+		if (off_in || off_out)
+			return -ESPIPE;
+
+		if (!(in->f_mode & FMODE_READ))
+			return -EBADF;
+
+		if (!(out->f_mode & FMODE_WRITE))
+			return -EBADF;
+
+		/* Splicing to self would be fun, but... */
+		if (ipipe == opipe)
+			return -EINVAL;
+
+		return splice_pipe_to_pipe(ipipe, opipe, len, flags);
+	}
+
+	if (ipipe) {
 		if (off_in)
 			return -ESPIPE;
 		if (off_out) {
@@ -1149,7 +1330,7 @@
 		} else
 			off = &out->f_pos;
 
-		ret = do_splice_from(pipe, out, off, len, flags);
+		ret = do_splice_from(ipipe, out, off, len, flags);
 
 		if (off_out && copy_to_user(off_out, off, sizeof(loff_t)))
 			ret = -EFAULT;
@@ -1157,8 +1338,7 @@
 		return ret;
 	}
 
-	pipe = pipe_info(out->f_path.dentry->d_inode);
-	if (pipe) {
+	if (opipe) {
 		if (off_out)
 			return -ESPIPE;
 		if (off_in) {
@@ -1170,7 +1350,7 @@
 		} else
 			off = &in->f_pos;
 
-		ret = do_splice_to(in, off, pipe, len, flags);
+		ret = do_splice_to(in, off, opipe, len, flags);
 
 		if (off_in && copy_to_user(off_in, off, sizeof(loff_t)))
 			ret = -EFAULT;
@@ -1511,7 +1691,7 @@
  * Make sure there's data to read. Wait for input if we can, otherwise
  * return an appropriate error.
  */
-static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
+static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 {
 	int ret;
 
@@ -1549,7 +1729,7 @@
  * Make sure there's writeable room. Wait for room if we can, otherwise
  * return an appropriate error.
  */
-static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
+static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 {
 	int ret;
 
@@ -1587,6 +1767,124 @@
 }
 
 /*
+ * Splice contents of ipipe to opipe.
+ */
+static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
+			       struct pipe_inode_info *opipe,
+			       size_t len, unsigned int flags)
+{
+	struct pipe_buffer *ibuf, *obuf;
+	int ret = 0, nbuf;
+	bool input_wakeup = false;
+
+
+retry:
+	ret = ipipe_prep(ipipe, flags);
+	if (ret)
+		return ret;
+
+	ret = opipe_prep(opipe, flags);
+	if (ret)
+		return ret;
+
+	/*
+	 * Potential ABBA deadlock, work around it by ordering lock
+	 * grabbing by pipe info address. Otherwise two different processes
+	 * could deadlock (one doing tee from A -> B, the other from B -> A).
+	 */
+	pipe_double_lock(ipipe, opipe);
+
+	do {
+		if (!opipe->readers) {
+			send_sig(SIGPIPE, current, 0);
+			if (!ret)
+				ret = -EPIPE;
+			break;
+		}
+
+		if (!ipipe->nrbufs && !ipipe->writers)
+			break;
+
+		/*
+		 * Cannot make any progress, because either the input
+		 * pipe is empty or the output pipe is full.
+		 */
+		if (!ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) {
+			/* Already processed some buffers, break */
+			if (ret)
+				break;
+
+			if (flags & SPLICE_F_NONBLOCK) {
+				ret = -EAGAIN;
+				break;
+			}
+
+			/*
+			 * We raced with another reader/writer and haven't
+			 * managed to process any buffers.  A zero return
+			 * value means EOF, so retry instead.
+			 */
+			pipe_unlock(ipipe);
+			pipe_unlock(opipe);
+			goto retry;
+		}
+
+		ibuf = ipipe->bufs + ipipe->curbuf;
+		nbuf = (opipe->curbuf + opipe->nrbufs) % PIPE_BUFFERS;
+		obuf = opipe->bufs + nbuf;
+
+		if (len >= ibuf->len) {
+			/*
+			 * Simply move the whole buffer from ipipe to opipe
+			 */
+			*obuf = *ibuf;
+			ibuf->ops = NULL;
+			opipe->nrbufs++;
+			ipipe->curbuf = (ipipe->curbuf + 1) % PIPE_BUFFERS;
+			ipipe->nrbufs--;
+			input_wakeup = true;
+		} else {
+			/*
+			 * Get a reference to this pipe buffer,
+			 * so we can copy the contents over.
+			 */
+			ibuf->ops->get(ipipe, ibuf);
+			*obuf = *ibuf;
+
+			/*
+			 * Don't inherit the gift flag, we need to
+			 * prevent multiple steals of this page.
+			 */
+			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
+
+			obuf->len = len;
+			opipe->nrbufs++;
+			ibuf->offset += obuf->len;
+			ibuf->len -= obuf->len;
+		}
+		ret += obuf->len;
+		len -= obuf->len;
+	} while (len);
+
+	pipe_unlock(ipipe);
+	pipe_unlock(opipe);
+
+	/*
+	 * If we put data in the output pipe, wakeup any potential readers.
+	 */
+	if (ret > 0) {
+		smp_mb();
+		if (waitqueue_active(&opipe->wait))
+			wake_up_interruptible(&opipe->wait);
+		kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
+	}
+	if (input_wakeup)
+		wakeup_pipe_writers(ipipe);
+
+	return ret;
+}
+
+/*
  * Link contents of ipipe to opipe.
  */
 static int link_pipe(struct pipe_inode_info *ipipe,
@@ -1690,9 +1988,9 @@
 		 * Keep going, unless we encounter an error. The ipipe/opipe
 		 * ordering doesn't really matter.
 		 */
-		ret = link_ipipe_prep(ipipe, flags);
+		ret = ipipe_prep(ipipe, flags);
 		if (!ret) {
-			ret = link_opipe_prep(opipe, flags);
+			ret = opipe_prep(opipe, flags);
 			if (!ret)
 				ret = link_pipe(ipipe, opipe, len, flags);
 		}
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 0adc624..3b52770 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -338,6 +338,8 @@
 
 static void squashfs_put_super(struct super_block *sb)
 {
+	lock_kernel();
+
 	if (sb->s_fs_info) {
 		struct squashfs_sb_info *sbi = sb->s_fs_info;
 		squashfs_cache_delete(sbi->block_cache);
@@ -350,6 +352,8 @@
 		kfree(sb->s_fs_info);
 		sb->s_fs_info = NULL;
 	}
+
+	unlock_kernel();
 }
 
 
diff --git a/fs/super.c b/fs/super.c
index 1943fdf..83b47416 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -28,7 +28,6 @@
 #include <linux/blkdev.h>
 #include <linux/quotaops.h>
 #include <linux/namei.h>
-#include <linux/buffer_head.h>		/* for fsync_super() */
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
@@ -38,7 +37,6 @@
 #include <linux/kobject.h>
 #include <linux/mutex.h>
 #include <linux/file.h>
-#include <linux/async.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -72,7 +70,6 @@
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
 		INIT_LIST_HEAD(&s->s_dentry_lru);
-		INIT_LIST_HEAD(&s->s_async_list);
 		init_rwsem(&s->s_umount);
 		mutex_init(&s->s_lock);
 		lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -285,38 +282,6 @@
 EXPORT_SYMBOL(lock_super);
 EXPORT_SYMBOL(unlock_super);
 
-/*
- * Write out and wait upon all dirty data associated with this
- * superblock.  Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.  Requires a second blkdev
- * flush by the caller to complete the operation.
- */
-void __fsync_super(struct super_block *sb)
-{
-	sync_inodes_sb(sb, 0);
-	vfs_dq_sync(sb);
-	lock_super(sb);
-	if (sb->s_dirt && sb->s_op->write_super)
-		sb->s_op->write_super(sb);
-	unlock_super(sb);
-	if (sb->s_op->sync_fs)
-		sb->s_op->sync_fs(sb, 1);
-	sync_blockdev(sb->s_bdev);
-	sync_inodes_sb(sb, 1);
-}
-
-/*
- * Write out and wait upon all dirty data associated with this
- * superblock.  Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.
- */
-int fsync_super(struct super_block *sb)
-{
-	__fsync_super(sb);
-	return sync_blockdev(sb->s_bdev);
-}
-EXPORT_SYMBOL_GPL(fsync_super);
-
 /**
  *	generic_shutdown_super	-	common helper for ->kill_sb()
  *	@sb: superblock to kill
@@ -338,21 +303,13 @@
 
 	if (sb->s_root) {
 		shrink_dcache_for_umount(sb);
-		fsync_super(sb);
-		lock_super(sb);
+		sync_filesystem(sb);
+		get_fs_excl();
 		sb->s_flags &= ~MS_ACTIVE;
 
-		/*
-		 * wait for asynchronous fs operations to finish before going further
-		 */
-		async_synchronize_full_domain(&sb->s_async_list);
-
 		/* bad name - it should be evict_inodes() */
 		invalidate_inodes(sb);
-		lock_kernel();
 
-		if (sop->write_super && sb->s_dirt)
-			sop->write_super(sb);
 		if (sop->put_super)
 			sop->put_super(sb);
 
@@ -362,9 +319,7 @@
 			   "Self-destruct in 5 seconds.  Have a nice day...\n",
 			   sb->s_id);
 		}
-
-		unlock_kernel();
-		unlock_super(sb);
+		put_fs_excl();
 	}
 	spin_lock(&sb_lock);
 	/* should be initialized for __put_super_and_need_restart() */
@@ -441,16 +396,14 @@
 
 EXPORT_SYMBOL(drop_super);
 
-static inline void write_super(struct super_block *sb)
-{
-	lock_super(sb);
-	if (sb->s_root && sb->s_dirt)
-		if (sb->s_op->write_super)
-			sb->s_op->write_super(sb);
-	unlock_super(sb);
-}
-
-/*
+/**
+ * sync_supers - helper for periodic superblock writeback
+ *
+ * Call the write_super method if present on all dirty superblocks in
+ * the system.  This is for the periodic writeback used by most older
+ * filesystems.  For data integrity superblock writeback use
+ * sync_filesystems() instead.
+ *
  * Note: check the dirty flag before waiting, so we don't
  * hold up the sync while mounting a device. (The newly
  * mounted device won't need syncing.)
@@ -462,12 +415,15 @@
 	spin_lock(&sb_lock);
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (sb->s_dirt) {
+		if (sb->s_op->write_super && sb->s_dirt) {
 			sb->s_count++;
 			spin_unlock(&sb_lock);
+
 			down_read(&sb->s_umount);
-			write_super(sb);
+			if (sb->s_root && sb->s_dirt)
+				sb->s_op->write_super(sb);
 			up_read(&sb->s_umount);
+
 			spin_lock(&sb_lock);
 			if (__put_super_and_need_restart(sb))
 				goto restart;
@@ -476,60 +432,6 @@
 	spin_unlock(&sb_lock);
 }
 
-/*
- * Call the ->sync_fs super_op against all filesystems which are r/w and
- * which implement it.
- *
- * This operation is careful to avoid the livelock which could easily happen
- * if two or more filesystems are being continuously dirtied.  s_need_sync_fs
- * is used only here.  We set it against all filesystems and then clear it as
- * we sync them.  So redirtied filesystems are skipped.
- *
- * But if process A is currently running sync_filesystems and then process B
- * calls sync_filesystems as well, process B will set all the s_need_sync_fs
- * flags again, which will cause process A to resync everything.  Fix that with
- * a local mutex.
- *
- * (Fabian) Avoid sync_fs with clean fs & wait mode 0
- */
-void sync_filesystems(int wait)
-{
-	struct super_block *sb;
-	static DEFINE_MUTEX(mutex);
-
-	mutex_lock(&mutex);		/* Could be down_interruptible */
-	spin_lock(&sb_lock);
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (!sb->s_op->sync_fs)
-			continue;
-		if (sb->s_flags & MS_RDONLY)
-			continue;
-		sb->s_need_sync_fs = 1;
-	}
-
-restart:
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (!sb->s_need_sync_fs)
-			continue;
-		sb->s_need_sync_fs = 0;
-		if (sb->s_flags & MS_RDONLY)
-			continue;	/* hm.  Was remounted r/o meanwhile */
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		async_synchronize_full_domain(&sb->s_async_list);
-		if (sb->s_root && (wait || sb->s_dirt))
-			sb->s_op->sync_fs(sb, wait);
-		up_read(&sb->s_umount);
-		/* restart only when sb is no longer on the list */
-		spin_lock(&sb_lock);
-		if (__put_super_and_need_restart(sb))
-			goto restart;
-	}
-	spin_unlock(&sb_lock);
-	mutex_unlock(&mutex);
-}
-
 /**
  *	get_super - get the superblock of a device
  *	@bdev: device to get the superblock for
@@ -616,45 +518,6 @@
 }
 
 /**
- *	mark_files_ro - mark all files read-only
- *	@sb: superblock in question
- *
- *	All files are marked read-only.  We don't care about pending
- *	delete files so this should be used in 'force' mode only.
- */
-
-static void mark_files_ro(struct super_block *sb)
-{
-	struct file *f;
-
-retry:
-	file_list_lock();
-	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
-		struct vfsmount *mnt;
-		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
-		       continue;
-		if (!file_count(f))
-			continue;
-		if (!(f->f_mode & FMODE_WRITE))
-			continue;
-		f->f_mode &= ~FMODE_WRITE;
-		if (file_check_writeable(f) != 0)
-			continue;
-		file_release_write(f);
-		mnt = mntget(f->f_path.mnt);
-		file_list_unlock();
-		/*
-		 * This can sleep, so we can't hold
-		 * the file_list_lock() spinlock.
-		 */
-		mnt_drop_write(mnt);
-		mntput(mnt);
-		goto retry;
-	}
-	file_list_unlock();
-}
-
-/**
  *	do_remount_sb - asks filesystem to change mount options.
  *	@sb:	superblock in question
  *	@flags:	numeric part of options
@@ -675,27 +538,31 @@
 	if (flags & MS_RDONLY)
 		acct_auto_close(sb);
 	shrink_dcache_sb(sb);
-	fsync_super(sb);
+	sync_filesystem(sb);
 
 	/* If we are remounting RDONLY and current sb is read/write,
 	   make sure there are no rw files opened */
 	if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) {
 		if (force)
 			mark_files_ro(sb);
-		else if (!fs_may_remount_ro(sb))
+		else if (!fs_may_remount_ro(sb)) {
+			unlock_kernel();
 			return -EBUSY;
+		}
 		retval = vfs_dq_off(sb, 1);
-		if (retval < 0 && retval != -ENOSYS)
+		if (retval < 0 && retval != -ENOSYS) {
+			unlock_kernel();
 			return -EBUSY;
+		}
 	}
 	remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
 
 	if (sb->s_op->remount_fs) {
-		lock_super(sb);
 		retval = sb->s_op->remount_fs(sb, &flags, data);
-		unlock_super(sb);
-		if (retval)
+		if (retval) {
+			unlock_kernel();
 			return retval;
+		}
 	}
 	sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
 	if (remount_rw)
@@ -711,18 +578,17 @@
 	list_for_each_entry(sb, &super_blocks, s_list) {
 		sb->s_count++;
 		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
+		down_write(&sb->s_umount);
 		if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) {
 			/*
 			 * ->remount_fs needs lock_kernel().
 			 *
 			 * What lock protects sb->s_flags??
 			 */
-			lock_kernel();
 			do_remount_sb(sb, MS_RDONLY, NULL, 1);
-			unlock_kernel();
 		}
-		drop_super(sb);
+		up_write(&sb->s_umount);
+		put_super(sb);
 		spin_lock(&sb_lock);
 	}
 	spin_unlock(&sb_lock);
diff --git a/fs/sync.c b/fs/sync.c
index 7abc65f..dd20002 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -13,38 +13,123 @@
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
+#include "internal.h"
 
 #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
 			SYNC_FILE_RANGE_WAIT_AFTER)
 
 /*
- * sync everything.  Start out by waking pdflush, because that writes back
- * all queues in parallel.
+ * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0)
+ * just dirties buffers with inodes so we have to submit IO for these buffers
+ * via __sync_blockdev(). This also speeds up the wait == 1 case since in that
+ * case write_inode() functions do sync_dirty_buffer() and thus effectively
+ * write one block at a time.
  */
-static void do_sync(unsigned long wait)
+static int __sync_filesystem(struct super_block *sb, int wait)
 {
-	wakeup_pdflush(0);
-	sync_inodes(0);		/* All mappings, inodes and their blockdevs */
-	vfs_dq_sync(NULL);
-	sync_supers();		/* Write the superblocks */
-	sync_filesystems(0);	/* Start syncing the filesystems */
-	sync_filesystems(wait);	/* Waitingly sync the filesystems */
-	sync_inodes(wait);	/* Mappings, inodes and blockdevs, again. */
+	/* Avoid doing twice syncing and cache pruning for quota sync */
 	if (!wait)
-		printk("Emergency Sync complete\n");
-	if (unlikely(laptop_mode))
-		laptop_sync_completion();
+		writeout_quota_sb(sb, -1);
+	else
+		sync_quota_sb(sb, -1);
+	sync_inodes_sb(sb, wait);
+	if (sb->s_op->sync_fs)
+		sb->s_op->sync_fs(sb, wait);
+	return __sync_blockdev(sb->s_bdev, wait);
+}
+
+/*
+ * Write out and wait upon all dirty data associated with this
+ * superblock.  Filesystem data as well as the underlying block
+ * device.  Takes the superblock lock.
+ */
+int sync_filesystem(struct super_block *sb)
+{
+	int ret;
+
+	/*
+	 * We need to be protected against the filesystem going from
+	 * r/o to r/w or vice versa.
+	 */
+	WARN_ON(!rwsem_is_locked(&sb->s_umount));
+
+	/*
+	 * No point in syncing out anything if the filesystem is read-only.
+	 */
+	if (sb->s_flags & MS_RDONLY)
+		return 0;
+
+	ret = __sync_filesystem(sb, 0);
+	if (ret < 0)
+		return ret;
+	return __sync_filesystem(sb, 1);
+}
+EXPORT_SYMBOL_GPL(sync_filesystem);
+
+/*
+ * Sync all the data for all the filesystems (called by sys_sync() and
+ * emergency sync)
+ *
+ * This operation is careful to avoid the livelock which could easily happen
+ * if two or more filesystems are being continuously dirtied.  s_need_sync
+ * is used only here.  We set it against all filesystems and then clear it as
+ * we sync them.  So redirtied filesystems are skipped.
+ *
+ * But if process A is currently running sync_filesystems and then process B
+ * calls sync_filesystems as well, process B will set all the s_need_sync
+ * flags again, which will cause process A to resync everything.  Fix that with
+ * a local mutex.
+ */
+static void sync_filesystems(int wait)
+{
+	struct super_block *sb;
+	static DEFINE_MUTEX(mutex);
+
+	mutex_lock(&mutex);		/* Could be down_interruptible */
+	spin_lock(&sb_lock);
+	list_for_each_entry(sb, &super_blocks, s_list)
+		sb->s_need_sync = 1;
+
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (!sb->s_need_sync)
+			continue;
+		sb->s_need_sync = 0;
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+
+		down_read(&sb->s_umount);
+		if (!(sb->s_flags & MS_RDONLY) && sb->s_root)
+			__sync_filesystem(sb, wait);
+		up_read(&sb->s_umount);
+
+		/* restart only when sb is no longer on the list */
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
+			goto restart;
+	}
+	spin_unlock(&sb_lock);
+	mutex_unlock(&mutex);
 }
 
 SYSCALL_DEFINE0(sync)
 {
-	do_sync(1);
+	sync_filesystems(0);
+	sync_filesystems(1);
+	if (unlikely(laptop_mode))
+		laptop_sync_completion();
 	return 0;
 }
 
 static void do_sync_work(struct work_struct *work)
 {
-	do_sync(0);
+	/*
+	 * Sync twice to reduce the possibility we skipped some inodes / pages
+	 * because they were temporarily locked
+	 */
+	sync_filesystems(0);
+	sync_filesystems(0);
+	printk("Emergency Sync complete\n");
 	kfree(work);
 }
 
@@ -75,10 +160,8 @@
 
 	/* sync the superblock to buffers */
 	sb = inode->i_sb;
-	lock_super(sb);
 	if (sb->s_dirt && sb->s_op->write_super)
 		sb->s_op->write_super(sb);
-	unlock_super(sb);
 
 	/* .. finally sync the buffers to disk */
 	err = sync_blockdev(sb->s_bdev);
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 56f6552..c779807 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -24,7 +24,7 @@
 const struct file_operations sysv_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= sysv_readdir,
-	.fsync		= sysv_sync_file,
+	.fsync		= simple_fsync,
 };
 
 static inline void dir_put_page(struct page *page)
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 589be21..96340c0 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -26,7 +26,7 @@
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
-	.fsync		= sysv_sync_file,
+	.fsync		= simple_fsync,
 	.splice_read	= generic_file_splice_read,
 };
 
@@ -34,18 +34,3 @@
 	.truncate	= sysv_truncate,
 	.getattr	= sysv_getattr,
 };
-
-int sysv_sync_file(struct file * file, struct dentry *dentry, int datasync)
-{
-	struct inode *inode = dentry->d_inode;
-	int err;
-
-	err = sync_mapping_buffers(inode->i_mapping);
-	if (!(inode->i_state & I_DIRTY))
-		return err;
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		return err;
-	
-	err |= sysv_sync_inode(inode);
-	return err ? -EIO : 0;
-}
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index da20b48..4799234 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -31,15 +31,13 @@
 #include <asm/byteorder.h>
 #include "sysv.h"
 
-/* This is only called on sync() and umount(), when s_dirt=1. */
-static void sysv_write_super(struct super_block *sb)
+static int sysv_sync_fs(struct super_block *sb, int wait)
 {
 	struct sysv_sb_info *sbi = SYSV_SB(sb);
 	unsigned long time = get_seconds(), old_time;
 
+	lock_super(sb);
 	lock_kernel();
-	if (sb->s_flags & MS_RDONLY)
-		goto clean;
 
 	/*
 	 * If we are going to write out the super block,
@@ -53,18 +51,30 @@
 		*sbi->s_sb_time = cpu_to_fs32(sbi, time);
 		mark_buffer_dirty(sbi->s_bh2);
 	}
-clean:
-	sb->s_dirt = 0;
+
 	unlock_kernel();
+	unlock_super(sb);
+
+	return 0;
+}
+
+static void sysv_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		sysv_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
 }
 
 static int sysv_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct sysv_sb_info *sbi = SYSV_SB(sb);
+	lock_super(sb);
 	if (sbi->s_forced_ro)
 		*flags |= MS_RDONLY;
 	if (!(*flags & MS_RDONLY))
 		sb->s_dirt = 1;
+	unlock_super(sb);
 	return 0;
 }
 
@@ -72,6 +82,11 @@
 {
 	struct sysv_sb_info *sbi = SYSV_SB(sb);
 
+	lock_kernel();
+
+	if (sb->s_dirt)
+		sysv_write_super(sb);
+
 	if (!(sb->s_flags & MS_RDONLY)) {
 		/* XXX ext2 also updates the state here */
 		mark_buffer_dirty(sbi->s_bh1);
@@ -84,6 +99,8 @@
 		brelse(sbi->s_bh2);
 
 	kfree(sbi);
+
+	unlock_kernel();
 }
 
 static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -236,7 +253,7 @@
 	return ERR_PTR(-EIO);
 }
 
-static struct buffer_head * sysv_update_inode(struct inode * inode)
+int sysv_write_inode(struct inode *inode, int wait)
 {
 	struct super_block * sb = inode->i_sb;
 	struct sysv_sb_info * sbi = SYSV_SB(sb);
@@ -244,19 +261,21 @@
 	struct sysv_inode * raw_inode;
 	struct sysv_inode_info * si;
 	unsigned int ino, block;
+	int err = 0;
 
 	ino = inode->i_ino;
 	if (!ino || ino > sbi->s_ninodes) {
 		printk("Bad inode number on dev %s: %d is out of range\n",
 		       inode->i_sb->s_id, ino);
-		return NULL;
+		return -EIO;
 	}
 	raw_inode = sysv_raw_inode(sb, ino, &bh);
 	if (!raw_inode) {
 		printk("unable to read i-node block\n");
-		return NULL;
+		return -EIO;
 	}
 
+	lock_kernel();
 	raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode);
 	raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid));
 	raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid));
@@ -272,38 +291,23 @@
 	for (block = 0; block < 10+1+1+1; block++)
 		write3byte(sbi, (u8 *)&si->i_data[block],
 			&raw_inode->i_data[3*block]);
-	mark_buffer_dirty(bh);
-	return bh;
-}
-
-int sysv_write_inode(struct inode * inode, int wait)
-{
-	struct buffer_head *bh;
-	lock_kernel();
-	bh = sysv_update_inode(inode);
-	brelse(bh);
 	unlock_kernel();
+	mark_buffer_dirty(bh);
+	if (wait) {
+                sync_dirty_buffer(bh);
+                if (buffer_req(bh) && !buffer_uptodate(bh)) {
+                        printk ("IO error syncing sysv inode [%s:%08x]\n",
+                                sb->s_id, ino);
+                        err = -EIO;
+                }
+        }
+	brelse(bh);
 	return 0;
 }
 
-int sysv_sync_inode(struct inode * inode)
+int sysv_sync_inode(struct inode *inode)
 {
-        int err = 0;
-        struct buffer_head *bh;
-
-        bh = sysv_update_inode(inode);
-        if (bh && buffer_dirty(bh)) {
-                sync_dirty_buffer(bh);
-                if (buffer_req(bh) && !buffer_uptodate(bh)) {
-                        printk ("IO error syncing sysv inode [%s:%08lx]\n",
-                                inode->i_sb->s_id, inode->i_ino);
-                        err = -1;
-                }
-        }
-        else if (!bh)
-                err = -1;
-        brelse (bh);
-        return err;
+	return sysv_write_inode(inode, 1);
 }
 
 static void sysv_delete_inode(struct inode *inode)
@@ -347,6 +351,7 @@
 	.delete_inode	= sysv_delete_inode,
 	.put_super	= sysv_put_super,
 	.write_super	= sysv_write_super,
+	.sync_fs	= sysv_sync_fs,
 	.remount_fs	= sysv_remount,
 	.statfs		= sysv_statfs,
 };
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 5784a31..53786eb 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -144,7 +144,6 @@
 extern struct inode *sysv_iget(struct super_block *, unsigned int);
 extern int sysv_write_inode(struct inode *, int);
 extern int sysv_sync_inode(struct inode *);
-extern int sysv_sync_file(struct file *, struct dentry *, int);
 extern void sysv_set_inode(struct inode *, dev_t);
 extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 extern int sysv_init_icache(void);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index e9f7a75..3589eab 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -36,6 +36,7 @@
 #include <linux/mount.h>
 #include <linux/math64.h>
 #include <linux/writeback.h>
+#include <linux/smp_lock.h>
 #include "ubifs.h"
 
 /*
@@ -447,9 +448,6 @@
 	if (!wait)
 		return 0;
 
-	if (sb->s_flags & MS_RDONLY)
-		return 0;
-
 	/*
 	 * VFS calls '->sync_fs()' before synchronizing all dirty inodes and
 	 * pages, so synchronize them first, then commit the journal. Strictly
@@ -1687,6 +1685,9 @@
 
 	ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num,
 		  c->vi.vol_id);
+
+	lock_kernel();
+
 	/*
 	 * The following asserts are only valid if there has not been a failure
 	 * of the media. For example, there will be dirty inodes if we failed
@@ -1753,6 +1754,8 @@
 	ubi_close_volume(c->ubi);
 	mutex_unlock(&c->umount_mutex);
 	kfree(c);
+
+	unlock_kernel();
 }
 
 static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
@@ -1768,17 +1771,22 @@
 		return err;
 	}
 
+	lock_kernel();
 	if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
 		if (c->ro_media) {
 			ubifs_msg("cannot re-mount due to prior errors");
+			unlock_kernel();
 			return -EROFS;
 		}
 		err = ubifs_remount_rw(c);
-		if (err)
+		if (err) {
+			unlock_kernel();
 			return err;
+		}
 	} else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
 		if (c->ro_media) {
 			ubifs_msg("cannot re-mount due to prior errors");
+			unlock_kernel();
 			return -EROFS;
 		}
 		ubifs_remount_ro(c);
@@ -1793,6 +1801,7 @@
 	}
 
 	ubifs_assert(c->lst.taken_empty_lebs > 0);
+	unlock_kernel();
 	return 0;
 }
 
diff --git a/fs/udf/Makefile b/fs/udf/Makefile
index 0d4503f..eb880f6 100644
--- a/fs/udf/Makefile
+++ b/fs/udf/Makefile
@@ -5,5 +5,5 @@
 obj-$(CONFIG_UDF_FS) += udf.o
 
 udf-objs     := balloc.o dir.o file.o ialloc.o inode.o lowlevel.o namei.o \
-		partition.o super.o truncate.o symlink.o fsync.o \
+		partition.o super.o truncate.o symlink.o \
 		directory.o misc.o udftime.o unicode.o
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 2efd4d5..61d9a76 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -210,5 +210,5 @@
 	.read			= generic_read_dir,
 	.readdir		= udf_readdir,
 	.ioctl			= udf_ioctl,
-	.fsync			= udf_fsync_file,
+	.fsync			= simple_fsync,
 };
diff --git a/fs/udf/file.c b/fs/udf/file.c
index eb91f3b..7464305 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -209,7 +209,7 @@
 	.write			= do_sync_write,
 	.aio_write		= udf_file_aio_write,
 	.release		= udf_release_file,
-	.fsync			= udf_fsync_file,
+	.fsync			= simple_fsync,
 	.splice_read		= generic_file_splice_read,
 	.llseek			= generic_file_llseek,
 };
diff --git a/fs/udf/fsync.c b/fs/udf/fsync.c
deleted file mode 100644
index b2c472b..0000000
--- a/fs/udf/fsync.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * fsync.c
- *
- * PURPOSE
- *  Fsync handling routines for the OSTA-UDF(tm) filesystem.
- *
- * COPYRIGHT
- *  This file is distributed under the terms of the GNU General Public
- *  License (GPL). Copies of the GPL can be obtained from:
- *      ftp://prep.ai.mit.edu/pub/gnu/GPL
- *  Each contributing author retains all rights to their own work.
- *
- *  (C) 1999-2001 Ben Fennema
- *  (C) 1999-2000 Stelias Computing Inc
- *
- * HISTORY
- *
- *  05/22/99 blf  Created.
- */
-
-#include "udfdecl.h"
-
-#include <linux/fs.h>
-
-static int udf_fsync_inode(struct inode *, int);
-
-/*
- *	File may be NULL when we are called. Perhaps we shouldn't
- *	even pass file to fsync ?
- */
-
-int udf_fsync_file(struct file *file, struct dentry *dentry, int datasync)
-{
-	struct inode *inode = dentry->d_inode;
-
-	return udf_fsync_inode(inode, datasync);
-}
-
-static int udf_fsync_inode(struct inode *inode, int datasync)
-{
-	int err;
-
-	err = sync_mapping_buffers(inode->i_mapping);
-	if (!(inode->i_state & I_DIRTY))
-		return err;
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		return err;
-
-	err |= udf_sync_inode(inode);
-
-	return err ? -EIO : 0;
-}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 72348cc..6832135 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -568,6 +568,7 @@
 	if (!udf_parse_options(options, &uopt, true))
 		return -EINVAL;
 
+	lock_kernel();
 	sbi->s_flags = uopt.flags;
 	sbi->s_uid   = uopt.uid;
 	sbi->s_gid   = uopt.gid;
@@ -581,13 +582,16 @@
 			*flags |= MS_RDONLY;
 	}
 
-	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
+		unlock_kernel();
 		return 0;
+	}
 	if (*flags & MS_RDONLY)
 		udf_close_lvid(sb);
 	else
 		udf_open_lvid(sb);
 
+	unlock_kernel();
 	return 0;
 }
 
@@ -1915,7 +1919,7 @@
 	if (uopt.flags & (1 << UDF_FLAG_BLOCKSIZE_SET)) {
 		ret = udf_load_vrs(sb, &uopt, silent, &fileset);
 	} else {
-		uopt.blocksize = bdev_hardsect_size(sb->s_bdev);
+		uopt.blocksize = bdev_logical_block_size(sb->s_bdev);
 		ret = udf_load_vrs(sb, &uopt, silent, &fileset);
 		if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
 			if (!silent)
@@ -2062,6 +2066,9 @@
 	struct udf_sb_info *sbi;
 
 	sbi = UDF_SB(sb);
+
+	lock_kernel();
+
 	if (sbi->s_vat_inode)
 		iput(sbi->s_vat_inode);
 	if (sbi->s_partitions)
@@ -2077,6 +2084,8 @@
 	kfree(sbi->s_partmaps);
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
+
+	unlock_kernel();
 }
 
 static int udf_sync_fs(struct super_block *sb, int wait)
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index cac51b7..8d46f42 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -223,9 +223,6 @@
 extern int udf_new_block(struct super_block *, struct inode *, uint16_t,
 			 uint32_t, int *);
 
-/* fsync.c */
-extern int udf_fsync_file(struct file *, struct dentry *, int);
-
 /* directory.c */
 extern struct fileIdentDesc *udf_fileident_read(struct inode *, loff_t *,
 						struct udf_fileident_bh *,
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 6321b79..6f671f1 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -666,6 +666,6 @@
 const struct file_operations ufs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= ufs_readdir,
-	.fsync		= ufs_sync_file,
+	.fsync		= simple_fsync,
 	.llseek		= generic_file_llseek,
 };
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 2bd3a16..73655c6 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -24,31 +24,10 @@
  */
 
 #include <linux/fs.h>
-#include <linux/buffer_head.h>	/* for sync_mapping_buffers() */
 
 #include "ufs_fs.h"
 #include "ufs.h"
 
-
-int ufs_sync_file(struct file *file, struct dentry *dentry, int datasync)
-{
-	struct inode *inode = dentry->d_inode;
-	int err;
-	int ret;
-
-	ret = sync_mapping_buffers(inode->i_mapping);
-	if (!(inode->i_state & I_DIRTY))
-		return ret;
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		return ret;
-
-	err = ufs_sync_inode(inode);
-	if (ret == 0)
-		ret = err;
-	return ret;
-}
-
-
 /*
  * We have mostly NULL's here: the current defaults are ok for
  * the ufs filesystem.
@@ -62,6 +41,6 @@
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
 	.open           = generic_file_open,
-	.fsync		= ufs_sync_file,
+	.fsync		= simple_fsync,
 	.splice_read	= generic_file_splice_read,
 };
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 6035929..5faed79 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -263,6 +263,7 @@
 	struct ufs_super_block_first * usb1;
 	va_list args;
 	
+	lock_kernel();
 	uspi = UFS_SB(sb)->s_uspi;
 	usb1 = ubh_get_usb_first(uspi);
 	
@@ -594,6 +595,9 @@
 
 	
 	UFSD("ENTER\n");
+
+	lock_kernel();
+
 	ufs_put_cstotal(sb);
 	size = uspi->s_cssize;
 	blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
@@ -621,6 +625,9 @@
 		brelse (sbi->s_ucg[i]);
 	kfree (sbi->s_ucg);
 	kfree (base);
+
+	unlock_kernel();
+
 	UFSD("EXIT\n");
 }
 
@@ -1118,32 +1125,45 @@
 	return -ENOMEM;
 }
 
-static void ufs_write_super(struct super_block *sb)
+static int ufs_sync_fs(struct super_block *sb, int wait)
 {
 	struct ufs_sb_private_info * uspi;
 	struct ufs_super_block_first * usb1;
 	struct ufs_super_block_third * usb3;
 	unsigned flags;
 
+	lock_super(sb);
 	lock_kernel();
+
 	UFSD("ENTER\n");
+
 	flags = UFS_SB(sb)->s_flags;
 	uspi = UFS_SB(sb)->s_uspi;
 	usb1 = ubh_get_usb_first(uspi);
 	usb3 = ubh_get_usb_third(uspi);
 
-	if (!(sb->s_flags & MS_RDONLY)) {
-		usb1->fs_time = cpu_to_fs32(sb, get_seconds());
-		if ((flags & UFS_ST_MASK) == UFS_ST_SUN 
-		  || (flags & UFS_ST_MASK) == UFS_ST_SUNOS
-		  || (flags & UFS_ST_MASK) == UFS_ST_SUNx86)
-			ufs_set_fs_state(sb, usb1, usb3,
-					UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
-		ufs_put_cstotal(sb);
-	}
+	usb1->fs_time = cpu_to_fs32(sb, get_seconds());
+	if ((flags & UFS_ST_MASK) == UFS_ST_SUN  ||
+	    (flags & UFS_ST_MASK) == UFS_ST_SUNOS ||
+	    (flags & UFS_ST_MASK) == UFS_ST_SUNx86)
+		ufs_set_fs_state(sb, usb1, usb3,
+				UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
+	ufs_put_cstotal(sb);
 	sb->s_dirt = 0;
+
 	UFSD("EXIT\n");
 	unlock_kernel();
+	unlock_super(sb);
+
+	return 0;
+}
+
+static void ufs_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		ufs_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
 }
 
 static void ufs_put_super(struct super_block *sb)
@@ -1152,6 +1172,9 @@
 		
 	UFSD("ENTER\n");
 
+	if (sb->s_dirt)
+		ufs_write_super(sb);
+
 	if (!(sb->s_flags & MS_RDONLY))
 		ufs_put_super_internal(sb);
 	
@@ -1171,7 +1194,9 @@
 	struct ufs_super_block_third * usb3;
 	unsigned new_mount_opt, ufstype;
 	unsigned flags;
-	
+
+	lock_kernel();
+	lock_super(sb);
 	uspi = UFS_SB(sb)->s_uspi;
 	flags = UFS_SB(sb)->s_flags;
 	usb1 = ubh_get_usb_first(uspi);
@@ -1184,17 +1209,24 @@
 	ufstype = UFS_SB(sb)->s_mount_opt & UFS_MOUNT_UFSTYPE;
 	new_mount_opt = 0;
 	ufs_set_opt (new_mount_opt, ONERROR_LOCK);
-	if (!ufs_parse_options (data, &new_mount_opt))
+	if (!ufs_parse_options (data, &new_mount_opt)) {
+		unlock_super(sb);
+		unlock_kernel();
 		return -EINVAL;
+	}
 	if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
 		new_mount_opt |= ufstype;
 	} else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
 		printk("ufstype can't be changed during remount\n");
+		unlock_super(sb);
+		unlock_kernel();
 		return -EINVAL;
 	}
 
 	if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
 		UFS_SB(sb)->s_mount_opt = new_mount_opt;
+		unlock_super(sb);
+		unlock_kernel();
 		return 0;
 	}
 	
@@ -1219,6 +1251,8 @@
 #ifndef CONFIG_UFS_FS_WRITE
 		printk("ufs was compiled with read-only support, "
 		"can't be mounted as read-write\n");
+		unlock_super(sb);
+		unlock_kernel();
 		return -EINVAL;
 #else
 		if (ufstype != UFS_MOUNT_UFSTYPE_SUN && 
@@ -1227,16 +1261,22 @@
 		    ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&
 		    ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
 			printk("this ufstype is read-only supported\n");
+			unlock_super(sb);
+			unlock_kernel();
 			return -EINVAL;
 		}
 		if (!ufs_read_cylinder_structures(sb)) {
 			printk("failed during remounting\n");
+			unlock_super(sb);
+			unlock_kernel();
 			return -EPERM;
 		}
 		sb->s_flags &= ~MS_RDONLY;
 #endif
 	}
 	UFS_SB(sb)->s_mount_opt = new_mount_opt;
+	unlock_super(sb);
+	unlock_kernel();
 	return 0;
 }
 
@@ -1352,6 +1392,7 @@
 	.delete_inode	= ufs_delete_inode,
 	.put_super	= ufs_put_super,
 	.write_super	= ufs_write_super,
+	.sync_fs	= ufs_sync_fs,
 	.statfs		= ufs_statfs,
 	.remount_fs	= ufs_remount,
 	.show_options   = ufs_show_options,
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index d0c4acd..644e77e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -99,7 +99,6 @@
 extern const struct inode_operations ufs_file_inode_operations;
 extern const struct file_operations ufs_file_operations;
 extern const struct address_space_operations ufs_aops;
-extern int ufs_sync_file(struct file *, struct dentry *, int);
 
 /* ialloc.c */
 extern void ufs_free_inode (struct inode *inode);
diff --git a/fs/xattr.c b/fs/xattr.c
index d51b8f9..1c3d0af 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -297,7 +297,7 @@
 		return error;
 	dentry = f->f_path.dentry;
 	audit_inode(NULL, dentry);
-	error = mnt_want_write(f->f_path.mnt);
+	error = mnt_want_write_file(f);
 	if (!error) {
 		error = setxattr(dentry, name, value, size, flags);
 		mnt_drop_write(f->f_path.mnt);
@@ -524,7 +524,7 @@
 		return error;
 	dentry = f->f_path.dentry;
 	audit_inode(NULL, dentry);
-	error = mnt_want_write(f->f_path.mnt);
+	error = mnt_want_write_file(f);
 	if (!error) {
 		error = removexattr(dentry, name);
 		mnt_drop_write(f->f_path.mnt);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index e28800a..1418b91 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1501,7 +1501,7 @@
 	struct block_device	*bdev)
 {
 	return xfs_setsize_buftarg_flags(btp,
-			PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0);
+			PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0);
 }
 
 int
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index bb68526..08d6bd9 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1104,15 +1104,6 @@
 	kfree(mp);
 }
 
-STATIC void
-xfs_fs_write_super(
-	struct super_block	*sb)
-{
-	if (!(sb->s_flags & MS_RDONLY))
-		xfs_sync_fsdata(XFS_M(sb), 0);
-	sb->s_dirt = 0;
-}
-
 STATIC int
 xfs_fs_sync_super(
 	struct super_block	*sb,
@@ -1137,7 +1128,6 @@
 		error = xfs_quiesce_data(mp);
 	else
 		error = xfs_sync_fsdata(mp, 0);
-	sb->s_dirt = 0;
 
 	if (unlikely(laptop_mode)) {
 		int	prev_sync_seq = mp->m_sync_seq;
@@ -1443,7 +1433,6 @@
 
 	XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
 
-	sb->s_dirt = 1;
 	sb->s_magic = XFS_SB_MAGIC;
 	sb->s_blocksize = mp->m_sb.sb_blocksize;
 	sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
@@ -1533,7 +1522,6 @@
 	.write_inode		= xfs_fs_write_inode,
 	.clear_inode		= xfs_fs_clear_inode,
 	.put_super		= xfs_fs_put_super,
-	.write_super		= xfs_fs_write_super,
 	.sync_fs		= xfs_fs_sync_super,
 	.freeze_fs		= xfs_fs_freeze,
 	.statfs			= xfs_fs_statfs,
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 8570b82..bcc39d3 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -628,8 +628,6 @@
 		xfs_trans_log_buf(tp, bp, offsetof(xfs_dsb_t, sb_icount),
 				  offsetof(xfs_dsb_t, sb_frextents) +
 				  sizeof(sbp->sb_frextents) - 1);
-
-	tp->t_mountp->m_super->s_dirt = 1;
 }
 
 /*
diff --git a/include/Kbuild b/include/Kbuild
index d8c3e3c..fe36acc 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -8,3 +8,4 @@
 header-y += rdma/
 header-y += video/
 header-y += drm/
+header-y += xen/
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 3673a13..81d3be4 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -134,7 +134,7 @@
 #define atomic_long_cmpxchg(l, old, new) \
 	(atomic64_cmpxchg((atomic64_t *)(l), (old), (new)))
 #define atomic_long_xchg(v, new) \
-	(atomic64_xchg((atomic64_t *)(l), (new)))
+	(atomic64_xchg((atomic64_t *)(v), (new)))
 
 #else  /*  BITS_PER_LONG == 64  */
 
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 8e6d0ca..e410f60 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -280,17 +280,18 @@
 #endif
 
 /*
- * A facility to provide batching of the reload of page tables with the
- * actual context switch code for paravirtualized guests.  By convention,
- * only one of the lazy modes (CPU, MMU) should be active at any given
- * time, entry should never be nested, and entry and exits should always
- * be paired.  This is for sanity of maintaining and reasoning about the
- * kernel code.
+ * A facility to provide batching of the reload of page tables and
+ * other process state with the actual context switch code for
+ * paravirtualized guests.  By convention, only one of the batched
+ * update (lazy) modes (CPU, MMU) should be active at any given time,
+ * entry should never be nested, and entry and exits should always be
+ * paired.  This is for sanity of maintaining and reasoning about the
+ * kernel code.  In this case, the exit (end of the context switch) is
+ * in architecture-specific code, and so doesn't need a generic
+ * definition.
  */
-#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE
-#define arch_enter_lazy_cpu_mode()	do {} while (0)
-#define arch_leave_lazy_cpu_mode()	do {} while (0)
-#define arch_flush_lazy_cpu_mode()	do {} while (0)
+#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
+#define arch_start_context_switch(prev)	do {} while (0)
 #endif
 
 #ifndef __HAVE_PFNMAP_TRACKING
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 89853bc..f1736ca 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -63,7 +63,7 @@
 #define BRANCH_PROFILE()
 #endif
 
-#ifdef CONFIG_EVENT_TRACER
+#ifdef CONFIG_EVENT_TRACING
 #define FTRACE_EVENTS()	VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
 			*(_ftrace_events)				\
 			VMLINUX_SYMBOL(__stop_ftrace_events) = .;
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 3c1924c..7300fb8 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -471,6 +471,9 @@
 	u32 property_ids[DRM_CONNECTOR_MAX_PROPERTY];
 	uint64_t property_values[DRM_CONNECTOR_MAX_PROPERTY];
 
+	/* requested DPMS state */
+	int dpms;
+
 	void *helper_private;
 
 	uint32_t encoder_ids[DRM_CONNECTOR_MAX_ENCODER];
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index ec073d82..6769ff6 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -99,6 +99,8 @@
 				     struct drm_framebuffer *old_fb);
 extern bool drm_helper_crtc_in_use(struct drm_crtc *crtc);
 
+extern void drm_helper_connector_dpms(struct drm_connector *connector, int mode);
+
 extern int drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb,
 					  struct drm_mode_fb_cmd *mode_cmd);
 
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 3f0eaa3..b3afd22 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -135,6 +135,7 @@
 header-y += ppdev.h
 header-y += prctl.h
 header-y += qnxtypes.h
+header-y += qnx4_fs.h
 header-y += radeonfb.h
 header-y += raw.h
 header-y += resource.h
@@ -308,7 +309,6 @@
 unifdef-y += ppp_defs.h
 unifdef-y += ppp-comp.h
 unifdef-y += ptrace.h
-unifdef-y += qnx4_fs.h
 unifdef-y += quota.h
 unifdef-y += random.h
 unifdef-y += irqnr.h
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 88be890..51b4b0a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -119,7 +119,7 @@
 extern int sbf_port;
 extern unsigned long acpi_realmode_flags;
 
-int acpi_register_gsi (u32 gsi, int triggering, int polarity);
+int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity);
 int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
 
 #ifdef CONFIG_X86_IO_APIC
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 48ee32a..64a982e 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -159,6 +159,7 @@
 #define UART01x_FR_MODEM_ANY	(UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS)
 
 #ifndef __ASSEMBLY__
+struct amba_device; /* in uncompress this is included but amba/bus.h is not */
 struct amba_pl010_data {
 	void (*set_mctrl)(struct amba_device *dev, void __iomem *base, unsigned int mctrl);
 };
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 7b214fd..12737be 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -218,12 +218,12 @@
 #define bio_sectors(bio)	((bio)->bi_size >> 9)
 #define bio_empty_barrier(bio)	(bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio))
 
-static inline unsigned int bio_cur_sectors(struct bio *bio)
+static inline unsigned int bio_cur_bytes(struct bio *bio)
 {
 	if (bio->bi_vcnt)
-		return bio_iovec(bio)->bv_len >> 9;
+		return bio_iovec(bio)->bv_len;
 	else /* dataless requests such as discard */
-		return bio->bi_size >> 9;
+		return bio->bi_size;
 }
 
 static inline void *bio_data(struct bio *bio)
@@ -279,7 +279,7 @@
 #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
 	(((addr1) | (mask)) == (((addr2) - 1) | (mask)))
 #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
-	__BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, (q)->seg_boundary_mask)
+	__BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q)))
 #define BIO_SEG_BOUNDARY(q, b1, b2) \
 	BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2)))
 
@@ -506,7 +506,7 @@
 }
 
 /*
- * BIO list managment for use by remapping drivers (e.g. DM or MD).
+ * BIO list management for use by remapping drivers (e.g. DM or MD) and loop.
  *
  * A bio_list anchors a singly-linked list of bios chained through the bi_next
  * member of the bio.  The bio_list also caches the last list member to allow
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b4f71f1..0b1a6ca 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -166,19 +166,9 @@
 	enum rq_cmd_type_bits cmd_type;
 	unsigned long atomic_flags;
 
-	/* Maintain bio traversal state for part by part I/O submission.
-	 * hard_* are block layer internals, no driver should touch them!
-	 */
-
-	sector_t sector;		/* next sector to submit */
-	sector_t hard_sector;		/* next sector to complete */
-	unsigned long nr_sectors;	/* no. of sectors left to submit */
-	unsigned long hard_nr_sectors;	/* no. of sectors left to complete */
-	/* no. of sectors left to submit in the current segment */
-	unsigned int current_nr_sectors;
-
-	/* no. of sectors left to complete in the current segment */
-	unsigned int hard_cur_sectors;
+	/* the following two fields are internal, NEVER access directly */
+	sector_t __sector;		/* sector cursor */
+	unsigned int __data_len;	/* total data len */
 
 	struct bio *bio;
 	struct bio *biotail;
@@ -211,8 +201,8 @@
 
 	unsigned short ioprio;
 
-	void *special;
-	char *buffer;
+	void *special;		/* opaque pointer available for LLD use */
+	char *buffer;		/* kaddr of the current segment if available */
 
 	int tag;
 	int errors;
@@ -226,10 +216,9 @@
 	unsigned char __cmd[BLK_MAX_CDB];
 	unsigned char *cmd;
 
-	unsigned int data_len;
 	unsigned int extra_len;	/* length of alignment and padding */
 	unsigned int sense_len;
-	void *data;
+	unsigned int resid_len;	/* residual count */
 	void *sense;
 
 	unsigned long deadline;
@@ -318,6 +307,26 @@
 	struct kobject kobj;
 };
 
+struct queue_limits {
+	unsigned long		bounce_pfn;
+	unsigned long		seg_boundary_mask;
+
+	unsigned int		max_hw_sectors;
+	unsigned int		max_sectors;
+	unsigned int		max_segment_size;
+	unsigned int		physical_block_size;
+	unsigned int		alignment_offset;
+	unsigned int		io_min;
+	unsigned int		io_opt;
+
+	unsigned short		logical_block_size;
+	unsigned short		max_hw_segments;
+	unsigned short		max_phys_segments;
+
+	unsigned char		misaligned;
+	unsigned char		no_cluster;
+};
+
 struct request_queue
 {
 	/*
@@ -369,7 +378,6 @@
 	/*
 	 * queue needs bounce pages for pages above this limit
 	 */
-	unsigned long		bounce_pfn;
 	gfp_t			bounce_gfp;
 
 	/*
@@ -398,14 +406,6 @@
 	unsigned int		nr_congestion_off;
 	unsigned int		nr_batching;
 
-	unsigned int		max_sectors;
-	unsigned int		max_hw_sectors;
-	unsigned short		max_phys_segments;
-	unsigned short		max_hw_segments;
-	unsigned short		hardsect_size;
-	unsigned int		max_segment_size;
-
-	unsigned long		seg_boundary_mask;
 	void			*dma_drain_buffer;
 	unsigned int		dma_drain_size;
 	unsigned int		dma_pad_mask;
@@ -415,12 +415,14 @@
 	struct list_head	tag_busy_list;
 
 	unsigned int		nr_sorted;
-	unsigned int		in_flight;
+	unsigned int		in_flight[2];
 
 	unsigned int		rq_timeout;
 	struct timer_list	timeout;
 	struct list_head	timeout_list;
 
+	struct queue_limits	limits;
+
 	/*
 	 * sg stuff
 	 */
@@ -522,6 +524,11 @@
 	__clear_bit(flag, &q->queue_flags);
 }
 
+static inline int queue_in_flight(struct request_queue *q)
+{
+	return q->in_flight[0] + q->in_flight[1];
+}
+
 static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 {
 	WARN_ON_ONCE(!queue_is_locked(q));
@@ -752,10 +759,17 @@
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
+extern struct request *blk_make_request(struct request_queue *, struct bio *,
+					gfp_t);
 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
 extern int blk_lld_busy(struct request_queue *q);
+extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
+			     struct bio_set *bs, gfp_t gfp_mask,
+			     int (*bio_ctr)(struct bio *, struct bio *, void *),
+			     void *data);
+extern void blk_rq_unprep_clone(struct request *rq);
 extern int blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern void blk_plug_device(struct request_queue *);
@@ -768,12 +782,6 @@
 			 struct scsi_ioctl_command __user *);
 
 /*
- * Temporary export, until SCSI gets fixed up.
- */
-extern int blk_rq_append_bio(struct request_queue *q, struct request *rq,
-			     struct bio *bio);
-
-/*
  * A queue has just exitted congestion.  Note this in the global counter of
  * congested queues, and wake up anyone who was waiting for requests to be
  * put back.
@@ -798,7 +806,6 @@
 extern void __blk_stop_queue(struct request_queue *q);
 extern void __blk_run_queue(struct request_queue *);
 extern void blk_run_queue(struct request_queue *);
-extern void blk_start_queueing(struct request_queue *);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
@@ -831,41 +838,73 @@
 		blk_run_backing_dev(mapping->backing_dev_info, NULL);
 }
 
-extern void blkdev_dequeue_request(struct request *req);
+/*
+ * blk_rq_pos()		: the current sector
+ * blk_rq_bytes()	: bytes left in the entire request
+ * blk_rq_cur_bytes()	: bytes left in the current segment
+ * blk_rq_sectors()	: sectors left in the entire request
+ * blk_rq_cur_sectors()	: sectors left in the current segment
+ */
+static inline sector_t blk_rq_pos(const struct request *rq)
+{
+	return rq->__sector;
+}
+
+static inline unsigned int blk_rq_bytes(const struct request *rq)
+{
+	return rq->__data_len;
+}
+
+static inline int blk_rq_cur_bytes(const struct request *rq)
+{
+	return rq->bio ? bio_cur_bytes(rq->bio) : 0;
+}
+
+static inline unsigned int blk_rq_sectors(const struct request *rq)
+{
+	return blk_rq_bytes(rq) >> 9;
+}
+
+static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
+{
+	return blk_rq_cur_bytes(rq) >> 9;
+}
 
 /*
- * blk_end_request() and friends.
- * __blk_end_request() and end_request() must be called with
- * the request queue spinlock acquired.
+ * Request issue related functions.
+ */
+extern struct request *blk_peek_request(struct request_queue *q);
+extern void blk_start_request(struct request *rq);
+extern struct request *blk_fetch_request(struct request_queue *q);
+
+/*
+ * Request completion related functions.
+ *
+ * blk_update_request() completes given number of bytes and updates
+ * the request without completing it.
+ *
+ * blk_end_request() and friends.  __blk_end_request() must be called
+ * with the request queue spinlock acquired.
  *
  * Several drivers define their own end_request and call
  * blk_end_request() for parts of the original function.
  * This prevents code duplication in drivers.
  */
-extern int blk_end_request(struct request *rq, int error,
-				unsigned int nr_bytes);
-extern int __blk_end_request(struct request *rq, int error,
-				unsigned int nr_bytes);
-extern int blk_end_bidi_request(struct request *rq, int error,
-				unsigned int nr_bytes, unsigned int bidi_bytes);
-extern void end_request(struct request *, int);
-extern int blk_end_request_callback(struct request *rq, int error,
-				unsigned int nr_bytes,
-				int (drv_callback)(struct request *));
+extern bool blk_update_request(struct request *rq, int error,
+			       unsigned int nr_bytes);
+extern bool blk_end_request(struct request *rq, int error,
+			    unsigned int nr_bytes);
+extern void blk_end_request_all(struct request *rq, int error);
+extern bool blk_end_request_cur(struct request *rq, int error);
+extern bool __blk_end_request(struct request *rq, int error,
+			      unsigned int nr_bytes);
+extern void __blk_end_request_all(struct request *rq, int error);
+extern bool __blk_end_request_cur(struct request *rq, int error);
+
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
 extern void blk_abort_request(struct request *);
 extern void blk_abort_queue(struct request_queue *);
-extern void blk_update_request(struct request *rq, int error,
-			       unsigned int nr_bytes);
-
-/*
- * blk_end_request() takes bytes instead of sectors as a complete size.
- * blk_rq_bytes() returns bytes left to complete in the entire request.
- * blk_rq_cur_bytes() returns bytes left to complete in the current segment.
- */
-extern unsigned int blk_rq_bytes(struct request *rq);
-extern unsigned int blk_rq_cur_bytes(struct request *rq);
 
 /*
  * Access functions for manipulating queue properties
@@ -877,10 +916,20 @@
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
 extern void blk_queue_max_sectors(struct request_queue *, unsigned int);
+extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
-extern void blk_queue_hardsect_size(struct request_queue *, unsigned short);
+extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
+extern void blk_queue_physical_block_size(struct request_queue *, unsigned short);
+extern void blk_queue_alignment_offset(struct request_queue *q,
+				       unsigned int alignment);
+extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
+extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
+extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
+			    sector_t offset);
+extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
+			      sector_t offset);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
 extern void blk_queue_dma_pad(struct request_queue *, unsigned int);
 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
@@ -967,19 +1016,87 @@
 
 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 
-static inline int queue_hardsect_size(struct request_queue *q)
+static inline unsigned long queue_bounce_pfn(struct request_queue *q)
+{
+	return q->limits.bounce_pfn;
+}
+
+static inline unsigned long queue_segment_boundary(struct request_queue *q)
+{
+	return q->limits.seg_boundary_mask;
+}
+
+static inline unsigned int queue_max_sectors(struct request_queue *q)
+{
+	return q->limits.max_sectors;
+}
+
+static inline unsigned int queue_max_hw_sectors(struct request_queue *q)
+{
+	return q->limits.max_hw_sectors;
+}
+
+static inline unsigned short queue_max_hw_segments(struct request_queue *q)
+{
+	return q->limits.max_hw_segments;
+}
+
+static inline unsigned short queue_max_phys_segments(struct request_queue *q)
+{
+	return q->limits.max_phys_segments;
+}
+
+static inline unsigned int queue_max_segment_size(struct request_queue *q)
+{
+	return q->limits.max_segment_size;
+}
+
+static inline unsigned short queue_logical_block_size(struct request_queue *q)
 {
 	int retval = 512;
 
-	if (q && q->hardsect_size)
-		retval = q->hardsect_size;
+	if (q && q->limits.logical_block_size)
+		retval = q->limits.logical_block_size;
 
 	return retval;
 }
 
-static inline int bdev_hardsect_size(struct block_device *bdev)
+static inline unsigned short bdev_logical_block_size(struct block_device *bdev)
 {
-	return queue_hardsect_size(bdev_get_queue(bdev));
+	return queue_logical_block_size(bdev_get_queue(bdev));
+}
+
+static inline unsigned int queue_physical_block_size(struct request_queue *q)
+{
+	return q->limits.physical_block_size;
+}
+
+static inline unsigned int queue_io_min(struct request_queue *q)
+{
+	return q->limits.io_min;
+}
+
+static inline unsigned int queue_io_opt(struct request_queue *q)
+{
+	return q->limits.io_opt;
+}
+
+static inline int queue_alignment_offset(struct request_queue *q)
+{
+	if (q && q->limits.misaligned)
+		return -1;
+
+	if (q && q->limits.alignment_offset)
+		return q->limits.alignment_offset;
+
+	return 0;
+}
+
+static inline int queue_sector_alignment_offset(struct request_queue *q,
+						sector_t sector)
+{
+	return ((sector << 9) - q->limits.alignment_offset)
+		& (q->limits.io_min - 1);
 }
 
 static inline int queue_dma_alignment(struct request_queue *q)
@@ -1109,6 +1226,8 @@
 	int (*direct_access) (struct block_device *, sector_t,
 						void **, unsigned long *);
 	int (*media_changed) (struct gendisk *);
+	unsigned long long (*set_capacity) (struct gendisk *,
+						unsigned long long);
 	int (*revalidate_disk) (struct gendisk *);
 	int (*getgeo)(struct block_device *, struct hd_geometry *);
 	struct module *owner;
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index d960889..7e4350e 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -116,9 +116,9 @@
  * The remap event
  */
 struct blk_io_trace_remap {
-	__be32 device;
 	__be32 device_from;
-	__be64 sector;
+	__be32 device_to;
+	__be64 sector_from;
 };
 
 enum {
@@ -165,8 +165,9 @@
 
 extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
 extern void blk_trace_shutdown(struct request_queue *);
-extern int do_blk_trace_setup(struct request_queue *q,
-	char *name, dev_t dev, struct blk_user_trace_setup *buts);
+extern int do_blk_trace_setup(struct request_queue *q, char *name,
+			      dev_t dev, struct block_device *bdev,
+			      struct blk_user_trace_setup *buts);
 extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
 
 /**
@@ -193,22 +194,42 @@
 extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
 				void *data, size_t len);
 extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+			   struct block_device *bdev,
 			   char __user *arg);
 extern int blk_trace_startstop(struct request_queue *q, int start);
 extern int blk_trace_remove(struct request_queue *q);
+extern int blk_trace_init_sysfs(struct device *dev);
 
 extern struct attribute_group blk_trace_attr_group;
 
 #else /* !CONFIG_BLK_DEV_IO_TRACE */
-#define blk_trace_ioctl(bdev, cmd, arg)		(-ENOTTY)
-#define blk_trace_shutdown(q)			do { } while (0)
-#define do_blk_trace_setup(q, name, dev, buts)	(-ENOTTY)
-#define blk_add_driver_data(q, rq, data, len)	do {} while (0)
-#define blk_trace_setup(q, name, dev, arg)	(-ENOTTY)
-#define blk_trace_startstop(q, start)		(-ENOTTY)
-#define blk_trace_remove(q)			(-ENOTTY)
-#define blk_add_trace_msg(q, fmt, ...)		do { } while (0)
+# define blk_trace_ioctl(bdev, cmd, arg)		(-ENOTTY)
+# define blk_trace_shutdown(q)				do { } while (0)
+# define do_blk_trace_setup(q, name, dev, bdev, buts)	(-ENOTTY)
+# define blk_add_driver_data(q, rq, data, len)		do {} while (0)
+# define blk_trace_setup(q, name, dev, bdev, arg)	(-ENOTTY)
+# define blk_trace_startstop(q, start)			(-ENOTTY)
+# define blk_trace_remove(q)				(-ENOTTY)
+# define blk_add_trace_msg(q, fmt, ...)			do { } while (0)
+static inline int blk_trace_init_sysfs(struct device *dev)
+{
+	return 0;
+}
 
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
+
+#if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK)
+
+static inline int blk_cmd_buf_len(struct request *rq)
+{
+	return blk_pc_request(rq) ? rq->cmd_len * 3 : 1;
+}
+
+extern void blk_dump_cmd(char *buf, struct request *rq);
+extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes);
+extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq);
+
+#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */
+
 #endif /* __KERNEL__ */
 #endif
diff --git a/include/linux/cdev.h b/include/linux/cdev.h
index fb45919..f389e31 100644
--- a/include/linux/cdev.h
+++ b/include/linux/cdev.h
@@ -28,6 +28,8 @@
 
 void cdev_del(struct cdev *);
 
+int cdev_index(struct inode *inode);
+
 void cd_forget(struct inode *);
 
 extern struct backing_dev_info directly_mappable_cdev_bdi;
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 5a40d14..c56457c 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -288,7 +288,15 @@
  */
 static inline int clocksource_enable(struct clocksource *cs)
 {
-	return cs->enable ? cs->enable(cs) : 0;
+	int ret = 0;
+
+	if (cs->enable)
+		ret = cs->enable(cs);
+
+	/* save mult_orig on enable */
+	cs->mult_orig = cs->mult;
+
+	return ret;
 }
 
 /**
diff --git a/include/linux/compat.h b/include/linux/compat.h
index f2ded21..af931ee 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -222,6 +222,8 @@
 int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from);
 int get_compat_sigevent(struct sigevent *event,
 		const struct compat_sigevent __user *u_event);
+long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
+				  struct compat_siginfo __user *uinfo);
 
 static inline int compat_timeval_compare(struct compat_timeval *lhs,
 					struct compat_timeval *rhs)
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 37bcb50..04fb513 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -261,6 +261,11 @@
 # define __section(S) __attribute__ ((__section__(#S)))
 #endif
 
+/* Are two types/vars the same type (ignoring qualifiers)? */
+#ifndef __same_type
+# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
+#endif
+
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 9f31538..c5ac87c 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -1022,6 +1022,8 @@
 
 bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
 bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
+bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
+bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
 void alloc_bootmem_cpumask_var(cpumask_var_t *mask);
 void free_cpumask_var(cpumask_var_t mask);
 void free_bootmem_cpumask_var(cpumask_var_t mask);
@@ -1040,6 +1042,19 @@
 	return true;
 }
 
+static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+{
+	cpumask_clear(*mask);
+	return true;
+}
+
+static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
+					  int node)
+{
+	cpumask_clear(*mask);
+	return true;
+}
+
 static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask)
 {
 }
diff --git a/include/linux/cramfs_fs.h b/include/linux/cramfs_fs.h
index 3be4e5a2..6fc2bed 100644
--- a/include/linux/cramfs_fs.h
+++ b/include/linux/cramfs_fs.h
@@ -2,9 +2,8 @@
 #define __CRAMFS_H
 
 #include <linux/types.h>
+#include <linux/magic.h>
 
-#define CRAMFS_MAGIC		0x28cd3d45	/* some random number */
-#define CRAMFS_MAGIC_WEND	0x453dcd28	/* magic number with the wrong endianess */
 #define CRAMFS_SIGNATURE	"Compressed ROMFS"
 
 /*
diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h
index 788850b..1fbdea4 100644
--- a/include/linux/cyclades.h
+++ b/include/linux/cyclades.h
@@ -142,19 +142,6 @@
 
 
 #ifndef DP_WINDOW_SIZE
-/* #include "cyclomz.h" */
-/****************** ****************** *******************/
-/*
- *	The data types defined below are used in all ZFIRM interface
- *	data structures. They accomodate differences between HW
- *	architectures and compilers.
- */
-
-typedef __u64  ucdouble;		/* 64 bits, unsigned */
-typedef __u32  uclong;			/* 32 bits, unsigned */
-typedef __u16  ucshort;		/* 16 bits, unsigned */
-typedef __u8   ucchar;			/* 8 bits, unsigned */
-
 /*
  *	Memory Window Sizes
  */
@@ -507,16 +494,20 @@
 
 /* Per card data structure */
 struct cyclades_card {
-    void __iomem *base_addr;
-    void __iomem *ctl_addr;
-    int irq;
-    unsigned int num_chips;	/* 0 if card absent, -1 if Z/PCI, else Y */
-    unsigned int first_line;	/* minor number of first channel on card */
-    unsigned int nports;	/* Number of ports in the card */
-    int bus_index;		/* address shift - 0 for ISA, 1 for PCI */
-    int intr_enabled;		/* FW Interrupt flag - 0 disabled, 1 enabled */
-    spinlock_t card_lock;
-    struct cyclades_port *ports;
+	void __iomem *base_addr;
+	union {
+		void __iomem *p9050;
+		struct RUNTIME_9060 __iomem *p9060;
+	} ctl_addr;
+	int irq;
+	unsigned int num_chips;	/* 0 if card absent, -1 if Z/PCI, else Y */
+	unsigned int first_line;	/* minor number of first channel on card */
+	unsigned int nports;	/* Number of ports in the card */
+	int bus_index;		/* address shift - 0 for ISA, 1 for PCI */
+	int intr_enabled;		/* FW Interrupt flag - 0 disabled, 1 enabled */
+	u32 hw_ver;
+	spinlock_t card_lock;
+	struct cyclades_port *ports;
 };
 
 /***************************************
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 1515636..30b93b2 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -180,10 +180,12 @@
 #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
 #define DCACHE_UNHASHED		0x0010	
 
-#define DCACHE_INOTIFY_PARENT_WATCHED	0x0020 /* Parent inode is watched */
+#define DCACHE_INOTIFY_PARENT_WATCHED	0x0020 /* Parent inode is watched by inotify */
 
 #define DCACHE_COOKIE		0x0040	/* For use by dcookie subsystem */
 
+#define DCACHE_FSNOTIFY_PARENT_WATCHED	0x0080 /* Parent inode is watched by some fsnotify listener */
+
 extern spinlock_t dcache_lock;
 extern seqlock_t rename_lock;
 
@@ -351,6 +353,11 @@
 	return (dentry->d_flags & DCACHE_UNHASHED);
 }
 
+static inline int d_unlinked(struct dentry *dentry)
+{
+	return d_unhashed(dentry) && !IS_ROOT(dentry);
+}
+
 static inline struct dentry *dget_parent(struct dentry *dentry)
 {
 	struct dentry *ret;
@@ -368,7 +375,7 @@
 	return dentry->d_mounted;
 }
 
-extern struct vfsmount *lookup_mnt(struct vfsmount *, struct dentry *);
+extern struct vfsmount *lookup_mnt(struct path *);
 extern struct dentry *lookup_create(struct nameidata *nd, int is_dir);
 
 extern int sysctl_vfs_cache_pressure;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index ded2d7c..49c2362 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -149,7 +149,7 @@
 	unsigned max_hw_sectors;
 	unsigned max_sectors;
 	unsigned max_segment_size;
-	unsigned short hardsect_size;
+	unsigned short logical_block_size;
 	unsigned short max_hw_segments;
 	unsigned short max_phys_segments;
 	unsigned char no_cluster; /* inverted so that 0 is default */
diff --git a/include/linux/device.h b/include/linux/device.h
index 5d5c197..a4a7b10 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -62,8 +62,6 @@
 	void (*shutdown)(struct device *dev);
 
 	int (*suspend)(struct device *dev, pm_message_t state);
-	int (*suspend_late)(struct device *dev, pm_message_t state);
-	int (*resume_early)(struct device *dev);
 	int (*resume)(struct device *dev);
 
 	struct dev_pm_ops *pm;
@@ -291,9 +289,6 @@
 	int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
 	void (*release)(struct device *dev);
 
-	int (*suspend)(struct device *dev, pm_message_t state);
-	int (*resume)(struct device *dev);
-
 	struct dev_pm_ops *pm;
 };
 
diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index b9cd386..0b3518c 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -81,8 +81,8 @@
  * the cluster, the calling node joins it.
  */
 
-int dlm_new_lockspace(char *name, int namelen, dlm_lockspace_t **lockspace,
-		      uint32_t flags, int lvblen);
+int dlm_new_lockspace(const char *name, int namelen,
+		      dlm_lockspace_t **lockspace, uint32_t flags, int lvblen);
 
 /*
  * dlm_release_lockspace
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 28d53cb..171ad8a 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -32,6 +32,8 @@
 
 extern void dma_debug_init(u32 num_entries);
 
+extern int dma_debug_resize_entries(u32 num_entries);
+
 extern void debug_dma_map_page(struct device *dev, struct page *page,
 			       size_t offset, size_t size,
 			       int direction, dma_addr_t dma_addr,
@@ -91,6 +93,11 @@
 {
 }
 
+static inline int dma_debug_resize_entries(u32 num_entries)
+{
+	return 0;
+}
+
 static inline void debug_dma_map_page(struct device *dev, struct page *page,
 				      size_t offset, size_t size,
 				      int direction, dma_addr_t dma_addr,
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index e397dc3..10ff5c4 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -108,6 +108,7 @@
 };
 #ifdef CONFIG_INTR_REMAP
 extern int intr_remapping_enabled;
+extern int intr_remapping_supported(void);
 extern int enable_intr_remapping(int);
 extern void disable_intr_remapping(void);
 extern int reenable_intr_remapping(int);
@@ -157,6 +158,8 @@
 }
 #define irq_remapped(irq)		(0)
 #define enable_intr_remapping(mode)	(-1)
+#define disable_intr_remapping()	(0)
+#define reenable_intr_remapping(mode)	(0)
 #define intr_remapping_enabled		(0)
 #endif
 
diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h
index 102a902..ecc0628 100644
--- a/include/linux/dnotify.h
+++ b/include/linux/dnotify.h
@@ -10,7 +10,7 @@
 
 struct dnotify_struct {
 	struct dnotify_struct *	dn_next;
-	unsigned long		dn_mask;
+	__u32			dn_mask;
 	int			dn_fd;
 	struct file *		dn_filp;
 	fl_owner_t		dn_owner;
@@ -21,23 +21,18 @@
 
 #ifdef CONFIG_DNOTIFY
 
-extern void __inode_dir_notify(struct inode *, unsigned long);
+#define DNOTIFY_ALL_EVENTS (FS_DELETE | FS_DELETE_CHILD |\
+			    FS_MODIFY | FS_MODIFY_CHILD |\
+			    FS_ACCESS | FS_ACCESS_CHILD |\
+			    FS_ATTRIB | FS_ATTRIB_CHILD |\
+			    FS_CREATE | FS_DN_RENAME |\
+			    FS_MOVED_FROM | FS_MOVED_TO)
+
 extern void dnotify_flush(struct file *, fl_owner_t);
 extern int fcntl_dirnotify(int, struct file *, unsigned long);
-extern void dnotify_parent(struct dentry *, unsigned long);
-
-static inline void inode_dir_notify(struct inode *inode, unsigned long event)
-{
-	if (inode->i_dnotify_mask & (event))
-		__inode_dir_notify(inode, event);
-}
 
 #else
 
-static inline void __inode_dir_notify(struct inode *inode, unsigned long event)
-{
-}
-
 static inline void dnotify_flush(struct file *filp, fl_owner_t id)
 {
 }
@@ -47,14 +42,6 @@
 	return -EINVAL;
 }
 
-static inline void dnotify_parent(struct dentry *dentry, unsigned long event)
-{
-}
-
-static inline void inode_dir_notify(struct inode *inode, unsigned long event)
-{
-}
-
 #endif /* CONFIG_DNOTIFY */
 
 #endif /* __KERNEL __ */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index c59b769..1cb3372e 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -103,10 +103,8 @@
 extern void elv_merge_requests(struct request_queue *, struct request *,
 			       struct request *);
 extern void elv_merged_request(struct request_queue *, struct request *, int);
-extern void elv_dequeue_request(struct request_queue *, struct request *);
 extern void elv_requeue_request(struct request_queue *, struct request *);
 extern int elv_queue_empty(struct request_queue *);
-extern struct request *elv_next_request(struct request_queue *q);
 extern struct request *elv_former_request(struct request_queue *, struct request *);
 extern struct request *elv_latter_request(struct request_queue *, struct request *);
 extern int elv_register_queue(struct request_queue *q);
@@ -171,7 +169,7 @@
 	ELV_MQUEUE_MUST,
 };
 
-#define rq_end_sector(rq)	((rq)->sector + (rq)->nr_sectors)
+#define rq_end_sector(rq)	(blk_rq_pos(rq) + blk_rq_sectors(rq))
 #define rb_entry_rq(node)	rb_entry((node), struct request, rb_node)
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3b534e5..ede84fa 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -729,8 +729,8 @@
 	struct timespec		i_atime;
 	struct timespec		i_mtime;
 	struct timespec		i_ctime;
-	unsigned int		i_blkbits;
 	blkcnt_t		i_blocks;
+	unsigned int		i_blkbits;
 	unsigned short          i_bytes;
 	umode_t			i_mode;
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
@@ -751,13 +751,12 @@
 		struct block_device	*i_bdev;
 		struct cdev		*i_cdev;
 	};
-	int			i_cindex;
 
 	__u32			i_generation;
 
-#ifdef CONFIG_DNOTIFY
-	unsigned long		i_dnotify_mask; /* Directory notify events */
-	struct dnotify_struct	*i_dnotify; /* for directory notifications */
+#ifdef CONFIG_FSNOTIFY
+	__u32			i_fsnotify_mask; /* all events this inode cares about */
+	struct hlist_head	i_fsnotify_mark_entries; /* fsnotify mark entries */
 #endif
 
 #ifdef CONFIG_INOTIFY
@@ -1321,7 +1320,7 @@
 	struct rw_semaphore	s_umount;
 	struct mutex		s_lock;
 	int			s_count;
-	int			s_need_sync_fs;
+	int			s_need_sync;
 	atomic_t		s_active;
 #ifdef CONFIG_SECURITY
 	void                    *s_security;
@@ -1372,11 +1371,6 @@
 	 * generic_show_options()
 	 */
 	char *s_options;
-
-	/*
-	 * storage for asynchronous operations
-	 */
-	struct list_head s_async_list;
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
@@ -1800,7 +1794,7 @@
 extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
 extern long do_mount(char *, char *, char *, unsigned long, void *);
-extern struct vfsmount *collect_mounts(struct vfsmount *, struct dentry *);
+extern struct vfsmount *collect_mounts(struct path *);
 extern void drop_collected_mounts(struct vfsmount *);
 
 extern int vfs_statfs(struct dentry *, struct kstatfs *);
@@ -1947,8 +1941,6 @@
 extern void emergency_thaw_all(void);
 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
 extern int fsync_bdev(struct block_device *);
-extern int fsync_super(struct super_block *);
-extern int fsync_no_super(struct block_device *);
 #else
 static inline void bd_forget(struct inode *inode) {}
 static inline int sync_blockdev(struct block_device *bdev) { return 0; }
@@ -1964,6 +1956,7 @@
 	return 0;
 }
 #endif
+extern int sync_filesystem(struct super_block *);
 extern const struct file_operations def_blk_fops;
 extern const struct file_operations def_chr_fops;
 extern const struct file_operations bad_sock_fops;
@@ -2082,12 +2075,8 @@
 
 extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
 extern void sync_supers(void);
-extern void sync_filesystems(int wait);
-extern void __fsync_super(struct super_block *sb);
 extern void emergency_sync(void);
 extern void emergency_remount(void);
-extern int do_remount_sb(struct super_block *sb, int flags,
-			 void *data, int force);
 #ifdef CONFIG_BLOCK
 extern sector_t bmap(struct inode *, sector_t);
 #endif
@@ -2205,6 +2194,8 @@
 /* fs/splice.c */
 extern ssize_t generic_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
+extern ssize_t default_file_splice_read(struct file *, loff_t *,
+		struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
@@ -2354,6 +2345,8 @@
 extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
 			loff_t *ppos, const void *from, size_t available);
 
+extern int simple_fsync(struct file *, struct dentry *, int);
+
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
 				struct page *, struct page *);
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 00fbd5b..936f9aa 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -13,6 +13,7 @@
 
 #include <linux/dnotify.h>
 #include <linux/inotify.h>
+#include <linux/fsnotify_backend.h>
 #include <linux/audit.h>
 
 /*
@@ -22,19 +23,45 @@
 static inline void fsnotify_d_instantiate(struct dentry *entry,
 						struct inode *inode)
 {
+	__fsnotify_d_instantiate(entry, inode);
+
 	inotify_d_instantiate(entry, inode);
 }
 
+/* Notify this dentry's parent about a child's events. */
+static inline void fsnotify_parent(struct dentry *dentry, __u32 mask)
+{
+	__fsnotify_parent(dentry, mask);
+
+	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
+}
+
 /*
  * fsnotify_d_move - entry has been moved
  * Called with dcache_lock and entry->d_lock held.
  */
 static inline void fsnotify_d_move(struct dentry *entry)
 {
+	/*
+	 * On move we need to update entry->d_flags to indicate if the new parent
+	 * cares about events from this entry.
+	 */
+	__fsnotify_update_dcache_flags(entry);
+
 	inotify_d_move(entry);
 }
 
 /*
+ * fsnotify_link_count - inode's link count changed
+ */
+static inline void fsnotify_link_count(struct inode *inode)
+{
+	inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL);
+
+	fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+}
+
+/*
  * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir
  */
 static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
@@ -42,42 +69,62 @@
 				 int isdir, struct inode *target, struct dentry *moved)
 {
 	struct inode *source = moved->d_inode;
-	u32 cookie = inotify_get_cookie();
+	u32 in_cookie = inotify_get_cookie();
+	u32 fs_cookie = fsnotify_get_cookie();
+	__u32 old_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_FROM);
+	__u32 new_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_TO);
 
 	if (old_dir == new_dir)
-		inode_dir_notify(old_dir, DN_RENAME);
-	else {
-		inode_dir_notify(old_dir, DN_DELETE);
-		inode_dir_notify(new_dir, DN_CREATE);
+		old_dir_mask |= FS_DN_RENAME;
+
+	if (isdir) {
+		isdir = IN_ISDIR;
+		old_dir_mask |= FS_IN_ISDIR;
+		new_dir_mask |= FS_IN_ISDIR;
 	}
 
-	if (isdir)
-		isdir = IN_ISDIR;
-	inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name,
+	inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir, in_cookie, old_name,
 				  source);
-	inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name,
+	inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, in_cookie, new_name,
 				  source);
 
+	fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name, fs_cookie);
+	fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name, fs_cookie);
+
 	if (target) {
 		inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL);
 		inotify_inode_is_dead(target);
+
+		/* this is really a link_count change not a removal */
+		fsnotify_link_count(target);
 	}
 
 	if (source) {
 		inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL);
+		fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 	}
 	audit_inode_child(new_name, moved, new_dir);
 }
 
 /*
+ * fsnotify_inode_delete - and inode is being evicted from cache, clean up is needed
+ */
+static inline void fsnotify_inode_delete(struct inode *inode)
+{
+	__fsnotify_inode_delete(inode);
+}
+
+/*
  * fsnotify_nameremove - a filename was removed from a directory
  */
 static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
 {
+	__u32 mask = FS_DELETE;
+
 	if (isdir)
-		isdir = IN_ISDIR;
-	dnotify_parent(dentry, DN_DELETE);
-	inotify_dentry_parent_queue_event(dentry, IN_DELETE|isdir, 0, dentry->d_name.name);
+		mask |= FS_IN_ISDIR;
+
+	fsnotify_parent(dentry, mask);
 }
 
 /*
@@ -87,14 +134,9 @@
 {
 	inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL);
 	inotify_inode_is_dead(inode);
-}
 
-/*
- * fsnotify_link_count - inode's link count changed
- */
-static inline void fsnotify_link_count(struct inode *inode)
-{
-	inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL);
+	fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+	__fsnotify_inode_delete(inode);
 }
 
 /*
@@ -102,10 +144,11 @@
  */
 static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 {
-	inode_dir_notify(inode, DN_CREATE);
 	inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name,
 				  dentry->d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
+
+	fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
 }
 
 /*
@@ -115,11 +158,12 @@
  */
 static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct dentry *new_dentry)
 {
-	inode_dir_notify(dir, DN_CREATE);
 	inotify_inode_queue_event(dir, IN_CREATE, 0, new_dentry->d_name.name,
 				  inode);
 	fsnotify_link_count(inode);
 	audit_inode_child(new_dentry->d_name.name, new_dentry, dir);
+
+	fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name, 0);
 }
 
 /*
@@ -127,10 +171,13 @@
  */
 static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 {
-	inode_dir_notify(inode, DN_CREATE);
-	inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, 
-				  dentry->d_name.name, dentry->d_inode);
+	__u32 mask = (FS_CREATE | FS_IN_ISDIR);
+	struct inode *d_inode = dentry->d_inode;
+
+	inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
+
+	fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
 }
 
 /*
@@ -139,14 +186,15 @@
 static inline void fsnotify_access(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
-	u32 mask = IN_ACCESS;
+	__u32 mask = FS_ACCESS;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 
-	dnotify_parent(dentry, DN_ACCESS);
-	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
+
+	fsnotify_parent(dentry, mask);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 }
 
 /*
@@ -155,14 +203,15 @@
 static inline void fsnotify_modify(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
-	u32 mask = IN_MODIFY;
+	__u32 mask = FS_MODIFY;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 
-	dnotify_parent(dentry, DN_MODIFY);
-	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
+
+	fsnotify_parent(dentry, mask);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 }
 
 /*
@@ -171,13 +220,15 @@
 static inline void fsnotify_open(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
-	u32 mask = IN_OPEN;
+	__u32 mask = FS_OPEN;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 
-	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
+
+	fsnotify_parent(dentry, mask);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 }
 
 /*
@@ -187,15 +238,16 @@
 {
 	struct dentry *dentry = file->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
-	const char *name = dentry->d_name.name;
 	fmode_t mode = file->f_mode;
-	u32 mask = (mode & FMODE_WRITE) ? IN_CLOSE_WRITE : IN_CLOSE_NOWRITE;
+	__u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 
-	inotify_dentry_parent_queue_event(dentry, mask, 0, name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
+
+	fsnotify_parent(dentry, mask);
+	fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL, 0);
 }
 
 /*
@@ -204,13 +256,15 @@
 static inline void fsnotify_xattr(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
-	u32 mask = IN_ATTRIB;
+	__u32 mask = FS_ATTRIB;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 
-	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
+
+	fsnotify_parent(dentry, mask);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 }
 
 /*
@@ -220,50 +274,37 @@
 static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 {
 	struct inode *inode = dentry->d_inode;
-	int dn_mask = 0;
-	u32 in_mask = 0;
+	__u32 mask = 0;
 
-	if (ia_valid & ATTR_UID) {
-		in_mask |= IN_ATTRIB;
-		dn_mask |= DN_ATTRIB;
-	}
-	if (ia_valid & ATTR_GID) {
-		in_mask |= IN_ATTRIB;
-		dn_mask |= DN_ATTRIB;
-	}
-	if (ia_valid & ATTR_SIZE) {
-		in_mask |= IN_MODIFY;
-		dn_mask |= DN_MODIFY;
-	}
+	if (ia_valid & ATTR_UID)
+		mask |= FS_ATTRIB;
+	if (ia_valid & ATTR_GID)
+		mask |= FS_ATTRIB;
+	if (ia_valid & ATTR_SIZE)
+		mask |= FS_MODIFY;
+
 	/* both times implies a utime(s) call */
 	if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME))
-	{
-		in_mask |= IN_ATTRIB;
-		dn_mask |= DN_ATTRIB;
-	} else if (ia_valid & ATTR_ATIME) {
-		in_mask |= IN_ACCESS;
-		dn_mask |= DN_ACCESS;
-	} else if (ia_valid & ATTR_MTIME) {
-		in_mask |= IN_MODIFY;
-		dn_mask |= DN_MODIFY;
-	}
-	if (ia_valid & ATTR_MODE) {
-		in_mask |= IN_ATTRIB;
-		dn_mask |= DN_ATTRIB;
-	}
+		mask |= FS_ATTRIB;
+	else if (ia_valid & ATTR_ATIME)
+		mask |= FS_ACCESS;
+	else if (ia_valid & ATTR_MTIME)
+		mask |= FS_MODIFY;
 
-	if (dn_mask)
-		dnotify_parent(dentry, dn_mask);
-	if (in_mask) {
+	if (ia_valid & ATTR_MODE)
+		mask |= FS_ATTRIB;
+
+	if (mask) {
 		if (S_ISDIR(inode->i_mode))
-			in_mask |= IN_ISDIR;
-		inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL);
-		inotify_dentry_parent_queue_event(dentry, in_mask, 0,
-						  dentry->d_name.name);
+			mask |= FS_IN_ISDIR;
+		inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
+
+		fsnotify_parent(dentry, mask);
+		fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 	}
 }
 
-#ifdef CONFIG_INOTIFY	/* inotify helpers */
+#if defined(CONFIG_INOTIFY) || defined(CONFIG_FSNOTIFY)	/* notify helpers */
 
 /*
  * fsnotify_oldname_init - save off the old filename before we change it
@@ -281,7 +322,7 @@
 	kfree(old_name);
 }
 
-#else	/* CONFIG_INOTIFY */
+#else	/* CONFIG_INOTIFY || CONFIG_FSNOTIFY */
 
 static inline const char *fsnotify_oldname_init(const char *name)
 {
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
new file mode 100644
index 0000000..44848aa
--- /dev/null
+++ b/include/linux/fsnotify_backend.h
@@ -0,0 +1,387 @@
+/*
+ * Filesystem access notification for Linux
+ *
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ */
+
+#ifndef __LINUX_FSNOTIFY_BACKEND_H
+#define __LINUX_FSNOTIFY_BACKEND_H
+
+#ifdef __KERNEL__
+
+#include <linux/idr.h> /* inotify uses this */
+#include <linux/fs.h> /* struct inode */
+#include <linux/list.h>
+#include <linux/path.h> /* struct path */
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/atomic.h>
+
+/*
+ * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
+ * convert between them.  dnotify only needs conversion at watch creation
+ * so no perf loss there.  fanotify isn't defined yet, so it can use the
+ * wholes if it needs more events.
+ */
+#define FS_ACCESS		0x00000001	/* File was accessed */
+#define FS_MODIFY		0x00000002	/* File was modified */
+#define FS_ATTRIB		0x00000004	/* Metadata changed */
+#define FS_CLOSE_WRITE		0x00000008	/* Writtable file was closed */
+#define FS_CLOSE_NOWRITE	0x00000010	/* Unwrittable file closed */
+#define FS_OPEN			0x00000020	/* File was opened */
+#define FS_MOVED_FROM		0x00000040	/* File was moved from X */
+#define FS_MOVED_TO		0x00000080	/* File was moved to Y */
+#define FS_CREATE		0x00000100	/* Subfile was created */
+#define FS_DELETE		0x00000200	/* Subfile was deleted */
+#define FS_DELETE_SELF		0x00000400	/* Self was deleted */
+#define FS_MOVE_SELF		0x00000800	/* Self was moved */
+
+#define FS_UNMOUNT		0x00002000	/* inode on umount fs */
+#define FS_Q_OVERFLOW		0x00004000	/* Event queued overflowed */
+#define FS_IN_IGNORED		0x00008000	/* last inotify event here */
+
+#define FS_IN_ISDIR		0x40000000	/* event occurred against dir */
+#define FS_IN_ONESHOT		0x80000000	/* only send event once */
+
+#define FS_DN_RENAME		0x10000000	/* file renamed */
+#define FS_DN_MULTISHOT		0x20000000	/* dnotify multishot */
+
+/* This inode cares about things that happen to its children.  Always set for
+ * dnotify and inotify. */
+#define FS_EVENT_ON_CHILD	0x08000000
+
+/* This is a list of all events that may get sent to a parernt based on fs event
+ * happening to inodes inside that directory */
+#define FS_EVENTS_POSS_ON_CHILD   (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\
+				   FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\
+				   FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\
+				   FS_DELETE)
+
+/* listeners that hard code group numbers near the top */
+#define DNOTIFY_GROUP_NUM	UINT_MAX
+#define INOTIFY_GROUP_NUM	(DNOTIFY_GROUP_NUM-1)
+
+struct fsnotify_group;
+struct fsnotify_event;
+struct fsnotify_mark_entry;
+struct fsnotify_event_private_data;
+
+/*
+ * Each group much define these ops.  The fsnotify infrastructure will call
+ * these operations for each relevant group.
+ *
+ * should_send_event - given a group, inode, and mask this function determines
+ *		if the group is interested in this event.
+ * handle_event - main call for a group to handle an fs event
+ * free_group_priv - called when a group refcnt hits 0 to clean up the private union
+ * freeing-mark - this means that a mark has been flagged to die when everything
+ *		finishes using it.  The function is supplied with what must be a
+ *		valid group and inode to use to clean up.
+ */
+struct fsnotify_ops {
+	bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode, __u32 mask);
+	int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event);
+	void (*free_group_priv)(struct fsnotify_group *group);
+	void (*freeing_mark)(struct fsnotify_mark_entry *entry, struct fsnotify_group *group);
+	void (*free_event_priv)(struct fsnotify_event_private_data *priv);
+};
+
+/*
+ * A group is a "thing" that wants to receive notification about filesystem
+ * events.  The mask holds the subset of event types this group cares about.
+ * refcnt on a group is up to the implementor and at any moment if it goes 0
+ * everything will be cleaned up.
+ */
+struct fsnotify_group {
+	/*
+	 * global list of all groups receiving events from fsnotify.
+	 * anchored by fsnotify_groups and protected by either fsnotify_grp_mutex
+	 * or fsnotify_grp_srcu depending on write vs read.
+	 */
+	struct list_head group_list;
+
+	/*
+	 * Defines all of the event types in which this group is interested.
+	 * This mask is a bitwise OR of the FS_* events from above.  Each time
+	 * this mask changes for a group (if it changes) the correct functions
+	 * must be called to update the global structures which indicate global
+	 * interest in event types.
+	 */
+	__u32 mask;
+
+	/*
+	 * How the refcnt is used is up to each group.  When the refcnt hits 0
+	 * fsnotify will clean up all of the resources associated with this group.
+	 * As an example, the dnotify group will always have a refcnt=1 and that
+	 * will never change.  Inotify, on the other hand, has a group per
+	 * inotify_init() and the refcnt will hit 0 only when that fd has been
+	 * closed.
+	 */
+	atomic_t refcnt;		/* things with interest in this group */
+	unsigned int group_num;		/* simply prevents accidental group collision */
+
+	const struct fsnotify_ops *ops;	/* how this group handles things */
+
+	/* needed to send notification to userspace */
+	struct mutex notification_mutex;	/* protect the notification_list */
+	struct list_head notification_list;	/* list of event_holder this group needs to send to userspace */
+	wait_queue_head_t notification_waitq;	/* read() on the notification file blocks on this waitq */
+	unsigned int q_len;			/* events on the queue */
+	unsigned int max_events;		/* maximum events allowed on the list */
+
+	/* stores all fastapth entries assoc with this group so they can be cleaned on unregister */
+	spinlock_t mark_lock;		/* protect mark_entries list */
+	atomic_t num_marks;		/* 1 for each mark entry and 1 for not being
+					 * past the point of no return when freeing
+					 * a group */
+	struct list_head mark_entries;	/* all inode mark entries for this group */
+
+	/* prevents double list_del of group_list.  protected by global fsnotify_grp_mutex */
+	bool on_group_list;
+
+	/* groups can define private fields here or use the void *private */
+	union {
+		void *private;
+#ifdef CONFIG_INOTIFY_USER
+		struct inotify_group_private_data {
+			spinlock_t	idr_lock;
+			struct idr      idr;
+			u32             last_wd;
+			struct fasync_struct    *fa;    /* async notification */
+			struct user_struct      *user;
+		} inotify_data;
+#endif
+	};
+};
+
+/*
+ * A single event can be queued in multiple group->notification_lists.
+ *
+ * each group->notification_list will point to an event_holder which in turns points
+ * to the actual event that needs to be sent to userspace.
+ *
+ * Seemed cheaper to create a refcnt'd event and a small holder for every group
+ * than create a different event for every group
+ *
+ */
+struct fsnotify_event_holder {
+	struct fsnotify_event *event;
+	struct list_head event_list;
+};
+
+/*
+ * Inotify needs to tack data onto an event.  This struct lets us later find the
+ * correct private data of the correct group.
+ */
+struct fsnotify_event_private_data {
+	struct fsnotify_group *group;
+	struct list_head event_list;
+};
+
+/*
+ * all of the information about the original object we want to now send to
+ * a group.  If you want to carry more info from the accessing task to the
+ * listener this structure is where you need to be adding fields.
+ */
+struct fsnotify_event {
+	/*
+	 * If we create an event we are also likely going to need a holder
+	 * to link to a group.  So embed one holder in the event.  Means only
+	 * one allocation for the common case where we only have one group
+	 */
+	struct fsnotify_event_holder holder;
+	spinlock_t lock;	/* protection for the associated event_holder and private_list */
+	/* to_tell may ONLY be dereferenced during handle_event(). */
+	struct inode *to_tell;	/* either the inode the event happened to or its parent */
+	/*
+	 * depending on the event type we should have either a path or inode
+	 * We hold a reference on path, but NOT on inode.  Since we have the ref on
+	 * the path, it may be dereferenced at any point during this object's
+	 * lifetime.  That reference is dropped when this object's refcnt hits
+	 * 0.  If this event contains an inode instead of a path, the inode may
+	 * ONLY be used during handle_event().
+	 */
+	union {
+		struct path path;
+		struct inode *inode;
+	};
+/* when calling fsnotify tell it if the data is a path or inode */
+#define FSNOTIFY_EVENT_NONE	0
+#define FSNOTIFY_EVENT_PATH	1
+#define FSNOTIFY_EVENT_INODE	2
+#define FSNOTIFY_EVENT_FILE	3
+	int data_type;		/* which of the above union we have */
+	atomic_t refcnt;	/* how many groups still are using/need to send this event */
+	__u32 mask;		/* the type of access, bitwise OR for FS_* event types */
+
+	u32 sync_cookie;	/* used to corrolate events, namely inotify mv events */
+	char *file_name;
+	size_t name_len;
+
+	struct list_head private_data_list;	/* groups can store private data here */
+};
+
+/*
+ * a mark is simply an entry attached to an in core inode which allows an
+ * fsnotify listener to indicate they are either no longer interested in events
+ * of a type matching mask or only interested in those events.
+ *
+ * these are flushed when an inode is evicted from core and may be flushed
+ * when the inode is modified (as seen by fsnotify_access).  Some fsnotify users
+ * (such as dnotify) will flush these when the open fd is closed and not at
+ * inode eviction or modification.
+ */
+struct fsnotify_mark_entry {
+	__u32 mask;			/* mask this mark entry is for */
+	/* we hold ref for each i_list and g_list.  also one ref for each 'thing'
+	 * in kernel that found and may be using this mark. */
+	atomic_t refcnt;		/* active things looking at this mark */
+	struct inode *inode;		/* inode this entry is associated with */
+	struct fsnotify_group *group;	/* group this mark entry is for */
+	struct hlist_node i_list;	/* list of mark_entries by inode->i_fsnotify_mark_entries */
+	struct list_head g_list;	/* list of mark_entries by group->i_fsnotify_mark_entries */
+	spinlock_t lock;		/* protect group, inode, and killme */
+	struct list_head free_i_list;	/* tmp list used when freeing this mark */
+	struct list_head free_g_list;	/* tmp list used when freeing this mark */
+	void (*free_mark)(struct fsnotify_mark_entry *entry); /* called on final put+free */
+};
+
+#ifdef CONFIG_FSNOTIFY
+
+/* called from the vfs helpers */
+
+/* main fsnotify call to send events */
+extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
+		     const char *name, u32 cookie);
+extern void __fsnotify_parent(struct dentry *dentry, __u32 mask);
+extern void __fsnotify_inode_delete(struct inode *inode);
+extern u32 fsnotify_get_cookie(void);
+
+static inline int fsnotify_inode_watches_children(struct inode *inode)
+{
+	/* FS_EVENT_ON_CHILD is set if the inode may care */
+	if (!(inode->i_fsnotify_mask & FS_EVENT_ON_CHILD))
+		return 0;
+	/* this inode might care about child events, does it care about the
+	 * specific set of events that can happen on a child? */
+	return inode->i_fsnotify_mask & FS_EVENTS_POSS_ON_CHILD;
+}
+
+/*
+ * Update the dentry with a flag indicating the interest of its parent to receive
+ * filesystem events when those events happens to this dentry->d_inode.
+ */
+static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	assert_spin_locked(&dcache_lock);
+	assert_spin_locked(&dentry->d_lock);
+
+	parent = dentry->d_parent;
+	if (fsnotify_inode_watches_children(parent->d_inode))
+		dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
+	else
+		dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
+}
+
+/*
+ * fsnotify_d_instantiate - instantiate a dentry for inode
+ * Called with dcache_lock held.
+ */
+static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
+{
+	if (!inode)
+		return;
+
+	assert_spin_locked(&dcache_lock);
+
+	spin_lock(&dentry->d_lock);
+	__fsnotify_update_dcache_flags(dentry);
+	spin_unlock(&dentry->d_lock);
+}
+
+/* called from fsnotify listeners, such as fanotify or dnotify */
+
+/* must call when a group changes its ->mask */
+extern void fsnotify_recalc_global_mask(void);
+/* get a reference to an existing or create a new group */
+extern struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num,
+						    __u32 mask,
+						    const struct fsnotify_ops *ops);
+/* run all marks associated with this group and update group->mask */
+extern void fsnotify_recalc_group_mask(struct fsnotify_group *group);
+/* drop reference on a group from fsnotify_obtain_group */
+extern void fsnotify_put_group(struct fsnotify_group *group);
+
+/* take a reference to an event */
+extern void fsnotify_get_event(struct fsnotify_event *event);
+extern void fsnotify_put_event(struct fsnotify_event *event);
+/* find private data previously attached to an event and unlink it */
+extern struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group,
+									   struct fsnotify_event *event);
+
+/* attach the event to the group notification queue */
+extern int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
+				     struct fsnotify_event_private_data *priv);
+/* true if the group notification queue is empty */
+extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
+/* return, but do not dequeue the first event on the notification queue */
+extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group);
+/* return AND dequeue the first event on the notification queue */
+extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group);
+
+/* functions used to manipulate the marks attached to inodes */
+
+/* run all marks associated with an inode and update inode->i_fsnotify_mask */
+extern void fsnotify_recalc_inode_mask(struct inode *inode);
+extern void fsnotify_init_mark(struct fsnotify_mark_entry *entry, void (*free_mark)(struct fsnotify_mark_entry *entry));
+/* find (and take a reference) to a mark associated with group and inode */
+extern struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group, struct inode *inode);
+/* attach the mark to both the group and the inode */
+extern int fsnotify_add_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group, struct inode *inode);
+/* given a mark, flag it to be freed when all references are dropped */
+extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry);
+/* run all the marks in a group, and flag them to be freed */
+extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
+extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry);
+extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry);
+extern void fsnotify_unmount_inodes(struct list_head *list);
+
+/* put here because inotify does some weird stuff when destroying watches */
+extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
+						    void *data, int data_is, const char *name,
+						    u32 cookie);
+
+#else
+
+static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
+			    const char *name, u32 cookie)
+{}
+
+static inline void __fsnotify_parent(struct dentry *dentry, __u32 mask)
+{}
+
+static inline void __fsnotify_inode_delete(struct inode *inode)
+{}
+
+static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
+{}
+
+static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
+{}
+
+static inline u32 fsnotify_get_cookie(void)
+{
+	return 0;
+}
+
+static inline void fsnotify_unmount_inodes(struct list_head *list)
+{}
+
+#endif	/* CONFIG_FSNOTIFY */
+
+#endif	/* __KERNEL __ */
+
+#endif	/* __LINUX_FSNOTIFY_BACKEND_H */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8a0c2f2..39b95c5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -233,8 +233,6 @@
 
 extern int skip_trace(unsigned long ip);
 
-extern void ftrace_release(void *start, unsigned long size);
-
 extern void ftrace_disable_daemon(void);
 extern void ftrace_enable_daemon(void);
 #else
@@ -325,13 +323,8 @@
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
-extern void ftrace_init_module(struct module *mod,
-			       unsigned long *start, unsigned long *end);
 #else
 static inline void ftrace_init(void) { }
-static inline void
-ftrace_init_module(struct module *mod,
-		   unsigned long *start, unsigned long *end) { }
 #endif
 
 /*
@@ -368,6 +361,7 @@
 	unsigned long ret;
 	unsigned long func;
 	unsigned long long calltime;
+	unsigned long long subtime;
 };
 
 /*
@@ -379,8 +373,6 @@
 
 extern int
 ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth);
-extern void
-ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret);
 
 /*
  * Sometimes we don't want to trace a function with the function
@@ -496,8 +488,15 @@
 
 extern int ftrace_dump_on_oops;
 
+#ifdef CONFIG_PREEMPT
+#define INIT_TRACE_RECURSION		.trace_recursion = 0,
+#endif
+
 #endif /* CONFIG_TRACING */
 
+#ifndef INIT_TRACE_RECURSION
+#define INIT_TRACE_RECURSION
+#endif
 
 #ifdef CONFIG_HW_BRANCH_TRACER
 
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
new file mode 100644
index 0000000..5c093ff
--- /dev/null
+++ b/include/linux/ftrace_event.h
@@ -0,0 +1,172 @@
+#ifndef _LINUX_FTRACE_EVENT_H
+#define _LINUX_FTRACE_EVENT_H
+
+#include <linux/trace_seq.h>
+#include <linux/ring_buffer.h>
+#include <linux/percpu.h>
+
+struct trace_array;
+struct tracer;
+struct dentry;
+
+DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
+
+struct trace_print_flags {
+	unsigned long		mask;
+	const char		*name;
+};
+
+const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
+				   unsigned long flags,
+				   const struct trace_print_flags *flag_array);
+
+const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
+				     const struct trace_print_flags *symbol_array);
+
+/*
+ * The trace entry - the most basic unit of tracing. This is what
+ * is printed in the end as a single line in the trace output, such as:
+ *
+ *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
+ */
+struct trace_entry {
+	unsigned short		type;
+	unsigned char		flags;
+	unsigned char		preempt_count;
+	int			pid;
+	int			tgid;
+};
+
+#define FTRACE_MAX_EVENT						\
+	((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1)
+
+/*
+ * Trace iterator - used by printout routines who present trace
+ * results to users and which routines might sleep, etc:
+ */
+struct trace_iterator {
+	struct trace_array	*tr;
+	struct tracer		*trace;
+	void			*private;
+	int			cpu_file;
+	struct mutex		mutex;
+	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
+	unsigned long		iter_flags;
+
+	/* The below is zeroed out in pipe_read */
+	struct trace_seq	seq;
+	struct trace_entry	*ent;
+	int			cpu;
+	u64			ts;
+
+	loff_t			pos;
+	long			idx;
+
+	cpumask_var_t		started;
+};
+
+
+typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
+					      int flags);
+struct trace_event {
+	struct hlist_node	node;
+	struct list_head	list;
+	int			type;
+	trace_print_func	trace;
+	trace_print_func	raw;
+	trace_print_func	hex;
+	trace_print_func	binary;
+};
+
+extern int register_ftrace_event(struct trace_event *event);
+extern int unregister_ftrace_event(struct trace_event *event);
+
+/* Return values for print_line callback */
+enum print_line_t {
+	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
+	TRACE_TYPE_HANDLED	= 1,
+	TRACE_TYPE_UNHANDLED	= 2,	/* Relay to other output functions */
+	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
+};
+
+
+struct ring_buffer_event *
+trace_current_buffer_lock_reserve(int type, unsigned long len,
+				  unsigned long flags, int pc);
+void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
+					unsigned long flags, int pc);
+void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
+					unsigned long flags, int pc);
+void trace_current_buffer_discard_commit(struct ring_buffer_event *event);
+
+void tracing_record_cmdline(struct task_struct *tsk);
+
+struct ftrace_event_call {
+	struct list_head	list;
+	char			*name;
+	char			*system;
+	struct dentry		*dir;
+	struct trace_event	*event;
+	int			enabled;
+	int			(*regfunc)(void);
+	void			(*unregfunc)(void);
+	int			id;
+	int			(*raw_init)(void);
+	int			(*show_format)(struct trace_seq *s);
+	int			(*define_fields)(void);
+	struct list_head	fields;
+	int			filter_active;
+	void			*filter;
+	void			*mod;
+
+#ifdef CONFIG_EVENT_PROFILE
+	atomic_t	profile_count;
+	int		(*profile_enable)(struct ftrace_event_call *);
+	void		(*profile_disable)(struct ftrace_event_call *);
+#endif
+};
+
+#define MAX_FILTER_PRED		32
+#define MAX_FILTER_STR_VAL	128
+
+extern int init_preds(struct ftrace_event_call *call);
+extern void destroy_preds(struct ftrace_event_call *call);
+extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+extern int filter_current_check_discard(struct ftrace_event_call *call,
+					void *rec,
+					struct ring_buffer_event *event);
+
+extern int trace_define_field(struct ftrace_event_call *call, char *type,
+			      char *name, int offset, int size, int is_signed);
+
+#define is_signed_type(type)	(((type)(-1)) < 0)
+
+int trace_set_clr_event(const char *system, const char *event, int set);
+
+/*
+ * The double __builtin_constant_p is because gcc will give us an error
+ * if we try to allocate the static variable to fmt if it is not a
+ * constant. Even with the outer if statement optimizing out.
+ */
+#define event_trace_printk(ip, fmt, args...)				\
+do {									\
+	__trace_printk_check_format(fmt, ##args);			\
+	tracing_record_cmdline(current);				\
+	if (__builtin_constant_p(fmt)) {				\
+		static const char *trace_printk_fmt			\
+		  __attribute__((section("__trace_printk_fmt"))) =	\
+			__builtin_constant_p(fmt) ? fmt : NULL;		\
+									\
+		__trace_bprintk(ip, trace_printk_fmt, ##args);		\
+	} else								\
+		__trace_printk(ip, fmt, ##args);			\
+} while (0)
+
+#define __common_field(type, item, is_signed)				\
+	ret = trace_define_field(event_call, #type, "common_" #item,	\
+				 offsetof(typeof(field.ent), item),	\
+				 sizeof(field.ent.item), is_signed);	\
+	if (ret)							\
+		return ret;
+
+#endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 162e5de..d41ed59 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -121,6 +121,13 @@
 #define FUSE_BIG_WRITES		(1 << 5)
 
 /**
+ * CUSE INIT request/reply flags
+ *
+ * CUSE_UNRESTRICTED_IOCTL:  use unrestricted ioctl
+ */
+#define CUSE_UNRESTRICTED_IOCTL	(1 << 0)
+
+/**
  * Release flags
  */
 #define FUSE_RELEASE_FLUSH	(1 << 0)
@@ -210,6 +217,9 @@
 	FUSE_DESTROY       = 38,
 	FUSE_IOCTL         = 39,
 	FUSE_POLL          = 40,
+
+	/* CUSE specific operations */
+	CUSE_INIT          = 4096,
 };
 
 enum fuse_notify_code {
@@ -401,6 +411,27 @@
 	__u32	max_write;
 };
 
+#define CUSE_INIT_INFO_MAX 4096
+
+struct cuse_init_in {
+	__u32	major;
+	__u32	minor;
+	__u32	unused;
+	__u32	flags;
+};
+
+struct cuse_init_out {
+	__u32	major;
+	__u32	minor;
+	__u32	unused;
+	__u32	flags;
+	__u32	max_read;
+	__u32	max_write;
+	__u32	dev_major;		/* chardev major */
+	__u32	dev_minor;		/* chardev minor */
+	__u32	spare[10];
+};
+
 struct fuse_interrupt_in {
 	__u64	unique;
 };
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 3bf5bb5..34956c8 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -23,6 +23,8 @@
 #define FUTEX_TRYLOCK_PI	8
 #define FUTEX_WAIT_BITSET	9
 #define FUTEX_WAKE_BITSET	10
+#define FUTEX_WAIT_REQUEUE_PI	11
+#define FUTEX_CMP_REQUEUE_PI	12
 
 #define FUTEX_PRIVATE_FLAG	128
 #define FUTEX_CLOCK_REALTIME	256
@@ -38,6 +40,10 @@
 #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG)
 #define FUTEX_WAIT_BITSET_PRIVATE	(FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG)
 #define FUTEX_WAKE_BITSET_PRIVATE	(FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAIT_REQUEUE_PI_PRIVATE	(FUTEX_WAIT_REQUEUE_PI | \
+					 FUTEX_PRIVATE_FLAG)
+#define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
+					 FUTEX_PRIVATE_FLAG)
 
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index a1a28ca..7cbd38d 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -90,6 +90,7 @@
 struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
+	sector_t alignment_offset;
 	struct device __dev;
 	struct kobject *holder_dir;
 	int policy, partno;
@@ -113,6 +114,7 @@
 #define GENHD_FL_UP				16
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	32
 #define GENHD_FL_EXT_DEVT			64 /* allow extended devt */
+#define GENHD_FL_NATIVE_CAPACITY		128
 
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0bbc15f..3760e7c 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -85,6 +85,9 @@
 			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
 			__GFP_NORETRY|__GFP_NOMEMALLOC)
 
+/* Control slab gfp mask during early boot */
+#define SLAB_GFP_BOOT_MASK __GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS)
+
 /* Control allocation constraints */
 #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9fed365..a6c6a2f 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -26,6 +26,9 @@
 #include <asm/io.h>
 #include <asm/mutex.h>
 
+/* for request_sense */
+#include <linux/cdrom.h>
+
 #if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300)
 # define SUPPORT_VLB_SYNC 0
 #else
@@ -175,7 +178,7 @@
 /*
  * Structure to hold all information about the location of this port
  */
-typedef struct hw_regs_s {
+struct ide_hw {
 	union {
 		struct ide_io_ports	io_ports;
 		unsigned long		io_ports_array[IDE_NR_PORTS];
@@ -183,12 +186,11 @@
 
 	int		irq;			/* our irq number */
 	ide_ack_intr_t	*ack_intr;		/* acknowledge interrupt */
-	hwif_chipset_t  chipset;
 	struct device	*dev, *parent;
 	unsigned long	config;
-} hw_regs_t;
+};
 
-static inline void ide_std_init_ports(hw_regs_t *hw,
+static inline void ide_std_init_ports(struct ide_hw *hw,
 				      unsigned long io_addr,
 				      unsigned long ctl_addr)
 {
@@ -215,21 +217,12 @@
 
 /*
  * Special Driver Flags
- *
- * set_geometry	: respecify drive geometry
- * recalibrate	: seek to cyl 0
- * set_multmode	: set multmode count
- * reserved	: unused
  */
-typedef union {
-	unsigned all			: 8;
-	struct {
-		unsigned set_geometry	: 1;
-		unsigned recalibrate	: 1;
-		unsigned set_multmode	: 1;
-		unsigned reserved	: 5;
-	} b;
-} special_t;
+enum {
+	IDE_SFLAG_SET_GEOMETRY		= (1 << 0),
+	IDE_SFLAG_RECALIBRATE		= (1 << 1),
+	IDE_SFLAG_SET_MULTMODE		= (1 << 2),
+};
 
 /*
  * Status returned from various ide_ functions
@@ -324,7 +317,6 @@
 	unsigned int		cursg_ofs;
 
 	struct request		*rq;		/* copy of request */
-	void			*special;	/* valid_t generally */
 };
 
 /* ATAPI packet command flags */
@@ -360,11 +352,7 @@
 
 	/* data buffer */
 	u8 *buf;
-	/* current buffer position */
-	u8 *cur_pos;
 	int buf_size;
-	/* missing/available data on the current buffer */
-	int b_count;
 
 	/* the corresponding request */
 	struct request *rq;
@@ -377,10 +365,6 @@
 	 */
 	u8 pc_buf[IDE_PC_BUFFER_SIZE];
 
-	/* idetape only */
-	struct idetape_bh *bh;
-	char *b_data;
-
 	unsigned long timeout;
 };
 
@@ -397,6 +381,7 @@
 struct ide_disk_ops {
 	int		(*check)(struct ide_drive_s *, const char *);
 	int		(*get_capacity)(struct ide_drive_s *);
+	u64		(*set_capacity)(struct ide_drive_s *, u64);
 	void		(*setup)(struct ide_drive_s *);
 	void		(*flush)(struct ide_drive_s *);
 	int		(*init_media)(struct ide_drive_s *, struct gendisk *);
@@ -474,6 +459,8 @@
 	IDE_DFLAG_NICE1			= (1 << 5),
 	/* device is physically present */
 	IDE_DFLAG_PRESENT		= (1 << 6),
+	/* disable Host Protected Area */
+	IDE_DFLAG_NOHPA			= (1 << 7),
 	/* id read from device (synthetic if not set) */
 	IDE_DFLAG_ID_READ		= (1 << 8),
 	IDE_DFLAG_NOPROBE		= (1 << 9),
@@ -512,6 +499,7 @@
 	/* write protect */
 	IDE_DFLAG_WP			= (1 << 29),
 	IDE_DFLAG_FORMAT_IN_PROGRESS	= (1 << 30),
+	IDE_DFLAG_NIEN_QUIRK		= (1 << 31),
 };
 
 struct ide_drive_s {
@@ -536,14 +524,13 @@
 	unsigned long sleep;		/* sleep until this time */
 	unsigned long timeout;		/* max time to wait for irq */
 
-	special_t	special;	/* special action flags */
+	u8	special_flags;		/* special action flags */
 
 	u8	select;			/* basic drive/head select reg value */
 	u8	retry_pio;		/* retrying dma capable host in pio */
 	u8	waiting_for_dma;	/* dma currently in progress */
 	u8	dma;			/* atapi dma flag */
 
-        u8	quirk_list;	/* considered quirky, set for a specific host */
         u8	init_speed;	/* transfer rate set at boot */
         u8	current_speed;	/* current transfer rate set */
 	u8	desired_speed;	/* desired transfer rate set */
@@ -568,8 +555,7 @@
 	unsigned int	drive_data;	/* used by set_pio_mode/dev_select() */
 	unsigned int	failures;	/* current failure count */
 	unsigned int	max_failures;	/* maximum allowed failure count */
-	u64		probed_capacity;/* initial reported media capacity (ide-cd only currently) */
-
+	u64		probed_capacity;/* initial/native media capacity */
 	u64		capacity64;	/* total number of sectors */
 
 	int		lun;		/* logical unit */
@@ -593,16 +579,16 @@
 	/* callback for packet commands */
 	int  (*pc_callback)(struct ide_drive_s *, int);
 
-	void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *);
-	int  (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *,
-			      unsigned int, int);
-
 	ide_startstop_t (*irq_handler)(struct ide_drive_s *);
 
 	unsigned long atapi_flags;
 
 	struct ide_atapi_pc request_sense_pc;
-	struct request request_sense_rq;
+
+	/* current sense rq and buffer */
+	bool sense_rq_armed;
+	struct request sense_rq;
+	struct request_sense sense_data;
 };
 
 typedef struct ide_drive_s ide_drive_t;
@@ -1174,7 +1160,10 @@
 int ide_do_start_stop(ide_drive_t *, struct gendisk *, int);
 int ide_set_media_lock(ide_drive_t *, struct gendisk *, int);
 void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
-void ide_retry_pc(ide_drive_t *, struct gendisk *);
+void ide_retry_pc(ide_drive_t *drive);
+
+void ide_prep_sense(ide_drive_t *drive, struct request *rq);
+int ide_queue_sense_rq(ide_drive_t *drive, void *special);
 
 int ide_cd_expiry(ide_drive_t *);
 
@@ -1225,7 +1214,7 @@
 }
 
 void ide_pci_setup_ports(struct pci_dev *, const struct ide_port_info *,
-			 hw_regs_t *, hw_regs_t **);
+			 struct ide_hw *, struct ide_hw **);
 void ide_setup_pci_noise(struct pci_dev *, const struct ide_port_info *);
 
 #ifdef CONFIG_BLK_DEV_IDEDMA_PCI
@@ -1464,16 +1453,18 @@
 void ide_register_region(struct gendisk *);
 void ide_unregister_region(struct gendisk *);
 
+void ide_check_nien_quirk_list(ide_drive_t *);
 void ide_undecoded_slave(ide_drive_t *);
 
 void ide_port_apply_params(ide_hwif_t *);
 int ide_sysfs_register_port(ide_hwif_t *);
 
-struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **);
+struct ide_host *ide_host_alloc(const struct ide_port_info *, struct ide_hw **,
+				unsigned int);
 void ide_host_free(struct ide_host *);
 int ide_host_register(struct ide_host *, const struct ide_port_info *,
-		      hw_regs_t **);
-int ide_host_add(const struct ide_port_info *, hw_regs_t **,
+		      struct ide_hw **);
+int ide_host_add(const struct ide_port_info *, struct ide_hw **, unsigned int,
 		 struct ide_host **);
 void ide_host_remove(struct ide_host *);
 int ide_legacy_device_add(const struct ide_port_info *, unsigned long);
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index cfe4fe1..60e8934d 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -79,6 +79,7 @@
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
 #define ETH_P_TIPC	0x88CA		/* TIPC 			*/
 #define ETH_P_FCOE	0x8906		/* Fibre Channel over Ethernet  */
+#define ETH_P_FIP	0x8914		/* FCoE Initialization Protocol */
 #define ETH_P_EDSA	0xDADA		/* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
 
 /*
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0e2aa45..b1b827d 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -13,14 +13,17 @@
 #include <linux/fs.h>
 struct linux_binprm;
 
+#define IMA_COUNT_UPDATE 1
+#define IMA_COUNT_LEAVE 0
+
 #ifdef CONFIG_IMA
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
-extern int ima_path_check(struct path *path, int mask);
+extern int ima_path_check(struct path *path, int mask, int update_counts);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
-extern void ima_shm_check(struct file *file);
+extern void ima_counts_get(struct file *file);
 
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -38,7 +41,7 @@
 	return;
 }
 
-static inline int ima_path_check(struct path *path, int mask)
+static inline int ima_path_check(struct path *path, int mask, int update_counts)
 {
 	return 0;
 }
@@ -53,7 +56,7 @@
 	return 0;
 }
 
-static inline void ima_shm_check(struct file *file)
+static inline void ima_counts_get(struct file *file)
 {
 	return;
 }
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d87247d..28b1f30 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -108,6 +108,15 @@
 
 extern struct cred init_cred;
 
+#ifdef CONFIG_PERF_COUNTERS
+# define INIT_PERF_COUNTERS(tsk)					\
+	.perf_counter_mutex = 						\
+		 __MUTEX_INITIALIZER(tsk.perf_counter_mutex),		\
+	.perf_counter_list = LIST_HEAD_INIT(tsk.perf_counter_list),
+#else
+# define INIT_PERF_COUNTERS(tsk)
+#endif
+
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -145,8 +154,8 @@
 	.group_leader	= &tsk,						\
 	.real_cred	= &init_cred,					\
 	.cred		= &init_cred,					\
-	.cred_exec_mutex =						\
-		 __MUTEX_INITIALIZER(tsk.cred_exec_mutex),		\
+	.cred_guard_mutex =						\
+		 __MUTEX_INITIALIZER(tsk.cred_guard_mutex),		\
 	.comm		= "swapper",					\
 	.thread		= INIT_THREAD,					\
 	.fs		= &init_fs,					\
@@ -171,9 +180,11 @@
 	},								\
 	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
 	INIT_IDS							\
+	INIT_PERF_COUNTERS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
 	INIT_FTRACE_GRAPH						\
+	INIT_TRACE_RECURSION						\
 }
 
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 91bb76f..c41e812 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -183,6 +183,7 @@
 extern void enable_irq(unsigned int irq);
 
 /* The following three functions are for the core kernel use only. */
+#ifdef CONFIG_GENERIC_HARDIRQS
 extern void suspend_device_irqs(void);
 extern void resume_device_irqs(void);
 #ifdef CONFIG_PM_SLEEP
@@ -190,6 +191,11 @@
 #else
 static inline int check_wakeup_irqs(void) { return 0; }
 #endif
+#else
+static inline void suspend_device_irqs(void) { };
+static inline void resume_device_irqs(void) { };
+static inline int check_wakeup_irqs(void) { return 0; }
+#endif
 
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
 
@@ -566,6 +572,6 @@
 extern int early_irq_init(void);
 extern int arch_probe_nr_irqs(void);
 extern int arch_early_irq_init(void);
-extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
+extern int arch_init_chip_data(struct irq_desc *desc, int node);
 
 #endif
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 08b987b..dd05434 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -64,7 +64,7 @@
  * and kmalloc'ed. These could be shared between processes.
  */
 struct io_context {
-	atomic_t refcount;
+	atomic_long_t refcount;
 	atomic_t nr_tasks;
 
 	/* all the fields below are protected by this lock */
@@ -91,8 +91,8 @@
 	 * if ref count is zero, don't allow sharing (ioc is going away, it's
 	 * a race).
 	 */
-	if (ioc && atomic_inc_not_zero(&ioc->refcount)) {
-		atomic_inc(&ioc->nr_tasks);
+	if (ioc && atomic_long_inc_not_zero(&ioc->refcount)) {
+		atomic_long_inc(&ioc->refcount);
 		return ioc;
 	}
 
diff --git a/include/linux/irq.h b/include/linux/irq.h
index b7cbeed..1e50c34f 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -117,7 +117,7 @@
 	void		(*eoi)(unsigned int irq);
 
 	void		(*end)(unsigned int irq);
-	void		(*set_affinity)(unsigned int irq,
+	int		(*set_affinity)(unsigned int irq,
 					const struct cpumask *dest);
 	int		(*retrigger)(unsigned int irq);
 	int		(*set_type)(unsigned int irq, unsigned int flow_type);
@@ -187,7 +187,7 @@
 	spinlock_t		lock;
 #ifdef CONFIG_SMP
 	cpumask_var_t		affinity;
-	unsigned int		cpu;
+	unsigned int		node;
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_var_t		pending_mask;
 #endif
@@ -201,26 +201,23 @@
 } ____cacheline_internodealigned_in_smp;
 
 extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
-					struct irq_desc *desc, int cpu);
+					struct irq_desc *desc, int node);
 extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
 
 #ifndef CONFIG_SPARSE_IRQ
 extern struct irq_desc irq_desc[NR_IRQS];
-#else /* CONFIG_SPARSE_IRQ */
-extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
-#endif /* CONFIG_SPARSE_IRQ */
-
-extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
-
-static inline struct irq_desc *
-irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
-{
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-	return irq_to_desc(irq);
-#else
-	return desc;
 #endif
+
+#ifdef CONFIG_NUMA_IRQ_DESC
+extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node);
+#else
+static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
+{
+	return desc;
 }
+#endif
+
+extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
 
 /*
  * Migration helpers for obsolete names, they will go away:
@@ -386,7 +383,7 @@
 extern void set_irq_probe(unsigned int irq);
 
 /* Handle dynamic irq creation and destruction */
-extern unsigned int create_irq_nr(unsigned int irq_want);
+extern unsigned int create_irq_nr(unsigned int irq_want, int node);
 extern int create_irq(void);
 extern void destroy_irq(unsigned int irq);
 
@@ -424,47 +421,44 @@
 
 #ifdef CONFIG_SMP
 /**
- * init_alloc_desc_masks - allocate cpumasks for irq_desc
+ * alloc_desc_masks - allocate cpumasks for irq_desc
  * @desc:	pointer to irq_desc struct
  * @cpu:	cpu which will be handling the cpumasks
  * @boot:	true if need bootmem
  *
  * Allocates affinity and pending_mask cpumask if required.
  * Returns true if successful (or not required).
- * Side effect: affinity has all bits set, pending_mask has all bits clear.
  */
-static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
-								bool boot)
+static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
+							bool boot)
 {
-	int node;
+	gfp_t gfp = GFP_ATOMIC;
 
-	if (boot) {
-		alloc_bootmem_cpumask_var(&desc->affinity);
-		cpumask_setall(desc->affinity);
+	if (boot)
+		gfp = GFP_NOWAIT;
 
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-		alloc_bootmem_cpumask_var(&desc->pending_mask);
-		cpumask_clear(desc->pending_mask);
-#endif
-		return true;
-	}
-
-	node = cpu_to_node(cpu);
-
-	if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node))
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	if (!alloc_cpumask_var_node(&desc->affinity, gfp, node))
 		return false;
-	cpumask_setall(desc->affinity);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-	if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) {
+	if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
 		free_cpumask_var(desc->affinity);
 		return false;
 	}
-	cpumask_clear(desc->pending_mask);
+#endif
 #endif
 	return true;
 }
 
+static inline void init_desc_masks(struct irq_desc *desc)
+{
+	cpumask_setall(desc->affinity);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	cpumask_clear(desc->pending_mask);
+#endif
+}
+
 /**
  * init_copy_desc_masks - copy cpumasks for irq_desc
  * @old_desc:	pointer to old irq_desc struct
@@ -478,7 +472,7 @@
 static inline void init_copy_desc_masks(struct irq_desc *old_desc,
 					struct irq_desc *new_desc)
 {
-#ifdef CONFIG_CPUMASKS_OFFSTACK
+#ifdef CONFIG_CPUMASK_OFFSTACK
 	cpumask_copy(new_desc->affinity, old_desc->affinity);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -499,12 +493,16 @@
 
 #else /* !CONFIG_SMP */
 
-static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
+static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
 								bool boot)
 {
 	return true;
 }
 
+static inline void init_desc_masks(struct irq_desc *desc)
+{
+}
+
 static inline void init_copy_desc_masks(struct irq_desc *old_desc,
 					struct irq_desc *new_desc)
 {
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 0c8b89f..a77c600 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -81,7 +81,12 @@
 	return sum;
 }
 
+
+/*
+ * Lock/unlock the current runqueue - to extract task statistics:
+ */
 extern unsigned long long task_delta_exec(struct task_struct *);
+
 extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
 extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
 extern void account_steal_time(cputime_t);
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
new file mode 100644
index 0000000..7796aed
--- /dev/null
+++ b/include/linux/kmemleak.h
@@ -0,0 +1,96 @@
+/*
+ * include/linux/kmemleak.h
+ *
+ * Copyright (C) 2008 ARM Limited
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __KMEMLEAK_H
+#define __KMEMLEAK_H
+
+#ifdef CONFIG_DEBUG_KMEMLEAK
+
+extern void kmemleak_init(void);
+extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
+			   gfp_t gfp);
+extern void kmemleak_free(const void *ptr);
+extern void kmemleak_padding(const void *ptr, unsigned long offset,
+			     size_t size);
+extern void kmemleak_not_leak(const void *ptr);
+extern void kmemleak_ignore(const void *ptr);
+extern void kmemleak_scan_area(const void *ptr, unsigned long offset,
+			       size_t length, gfp_t gfp);
+extern void kmemleak_no_scan(const void *ptr);
+
+static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
+					    int min_count, unsigned long flags,
+					    gfp_t gfp)
+{
+	if (!(flags & SLAB_NOLEAKTRACE))
+		kmemleak_alloc(ptr, size, min_count, gfp);
+}
+
+static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags)
+{
+	if (!(flags & SLAB_NOLEAKTRACE))
+		kmemleak_free(ptr);
+}
+
+static inline void kmemleak_erase(void **ptr)
+{
+	*ptr = NULL;
+}
+
+#else
+
+static inline void kmemleak_init(void)
+{
+}
+static inline void kmemleak_alloc(const void *ptr, size_t size, int min_count,
+				  gfp_t gfp)
+{
+}
+static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
+					    int min_count, unsigned long flags,
+					    gfp_t gfp)
+{
+}
+static inline void kmemleak_free(const void *ptr)
+{
+}
+static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags)
+{
+}
+static inline void kmemleak_not_leak(const void *ptr)
+{
+}
+static inline void kmemleak_ignore(const void *ptr)
+{
+}
+static inline void kmemleak_scan_area(const void *ptr, unsigned long offset,
+				      size_t length, gfp_t gfp)
+{
+}
+static inline void kmemleak_erase(void **ptr)
+{
+}
+static inline void kmemleak_no_scan(const void *ptr)
+{
+}
+
+#endif	/* CONFIG_DEBUG_KMEMLEAK */
+
+#endif	/* __KMEMLEAK_H */
diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
new file mode 100644
index 0000000..b616d39
--- /dev/null
+++ b/include/linux/kmemtrace.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2008 Eduard - Gabriel Munteanu
+ *
+ * This file is released under GPL version 2.
+ */
+
+#ifndef _LINUX_KMEMTRACE_H
+#define _LINUX_KMEMTRACE_H
+
+#ifdef __KERNEL__
+
+#include <trace/events/kmem.h>
+
+#ifdef CONFIG_KMEMTRACE
+extern void kmemtrace_init(void);
+#else
+static inline void kmemtrace_init(void)
+{
+}
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_KMEMTRACE_H */
+
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 8cc1379..3db5d8d 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -119,7 +119,7 @@
 			__u32 error_code;
 		} ex;
 		/* KVM_EXIT_IO */
-		struct kvm_io {
+		struct {
 #define KVM_EXIT_IO_IN  0
 #define KVM_EXIT_IO_OUT 1
 			__u8 direction;
@@ -224,10 +224,10 @@
 /* for KVM_GET_DIRTY_LOG */
 struct kvm_dirty_log {
 	__u32 slot;
-	__u32 padding;
+	__u32 padding1;
 	union {
 		void __user *dirty_bitmap; /* one bit per page */
-		__u64 padding;
+		__u64 padding2;
 	};
 };
 
@@ -409,6 +409,10 @@
 #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
 #define KVM_CAP_DEVICE_DEASSIGNMENT 27
 #endif
+#ifdef __KVM_HAVE_MSIX
+#define KVM_CAP_DEVICE_MSIX 28
+#endif
+#define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
 
@@ -482,11 +486,18 @@
 #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
 				   struct kvm_assigned_pci_dev)
 #define KVM_SET_GSI_ROUTING       _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
+/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
 #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
 			    struct kvm_assigned_irq)
+#define KVM_ASSIGN_DEV_IRQ        _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
 #define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
 #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
 				     struct kvm_assigned_pci_dev)
+#define KVM_ASSIGN_SET_MSIX_NR \
+			_IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr)
+#define KVM_ASSIGN_SET_MSIX_ENTRY \
+			_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
+#define KVM_DEASSIGN_DEV_IRQ       _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
 
 /*
  * ioctls for vcpu fds
@@ -577,6 +588,8 @@
 #define KVM_TRC_STLB_INVAL       (KVM_TRC_HANDLER + 0x18)
 #define KVM_TRC_PPC_INSTR        (KVM_TRC_HANDLER + 0x19)
 
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
+
 struct kvm_assigned_pci_dev {
 	__u32 assigned_dev_id;
 	__u32 busnr;
@@ -587,6 +600,17 @@
 	};
 };
 
+#define KVM_DEV_IRQ_HOST_INTX    (1 << 0)
+#define KVM_DEV_IRQ_HOST_MSI     (1 << 1)
+#define KVM_DEV_IRQ_HOST_MSIX    (1 << 2)
+
+#define KVM_DEV_IRQ_GUEST_INTX   (1 << 8)
+#define KVM_DEV_IRQ_GUEST_MSI    (1 << 9)
+#define KVM_DEV_IRQ_GUEST_MSIX   (1 << 10)
+
+#define KVM_DEV_IRQ_HOST_MASK	 0x00ff
+#define KVM_DEV_IRQ_GUEST_MASK   0xff00
+
 struct kvm_assigned_irq {
 	__u32 assigned_dev_id;
 	__u32 host_irq;
@@ -602,9 +626,19 @@
 	};
 };
 
-#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 
-#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION	KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
-#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
+struct kvm_assigned_msix_nr {
+	__u32 assigned_dev_id;
+	__u16 entry_nr;
+	__u16 padding;
+};
+
+#define KVM_MAX_MSIX_PER_DEV		512
+struct kvm_assigned_msix_entry {
+	__u32 assigned_dev_id;
+	__u32 gsi;
+	__u16 entry; /* The index of entry in the MSI-X table */
+	__u16 padding[3];
+};
 
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 894a56e..aacc544 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -38,6 +38,7 @@
 #define KVM_REQ_UNHALT             6
 #define KVM_REQ_MMU_SYNC           7
 #define KVM_REQ_KVMCLOCK_UPDATE    8
+#define KVM_REQ_KICK               9
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
@@ -72,7 +73,6 @@
 	struct mutex mutex;
 	int   cpu;
 	struct kvm_run *run;
-	int guest_mode;
 	unsigned long requests;
 	unsigned long guest_debug;
 	int fpu_active;
@@ -298,6 +298,7 @@
 void kvm_arch_hardware_unsetup(void);
 void kvm_arch_check_processor_compat(void *rtn);
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
 
 void kvm_free_physmem(struct kvm *kvm);
 
@@ -319,6 +320,13 @@
 	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
 };
 
+#define KVM_ASSIGNED_MSIX_PENDING		0x1
+struct kvm_guest_msix_entry {
+	u32 vector;
+	u16 entry;
+	u16 flags;
+};
+
 struct kvm_assigned_dev_kernel {
 	struct kvm_irq_ack_notifier ack_notifier;
 	struct work_struct interrupt_work;
@@ -326,18 +334,18 @@
 	int assigned_dev_id;
 	int host_busnr;
 	int host_devfn;
+	unsigned int entries_nr;
 	int host_irq;
 	bool host_irq_disabled;
+	struct msix_entry *host_msix_entries;
 	int guest_irq;
-#define KVM_ASSIGNED_DEV_GUEST_INTX	(1 << 0)
-#define KVM_ASSIGNED_DEV_GUEST_MSI	(1 << 1)
-#define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
-#define KVM_ASSIGNED_DEV_HOST_MSI	(1 << 9)
+	struct kvm_guest_msix_entry *guest_msix_entries;
 	unsigned long irq_requested_type;
 	int irq_source_id;
 	int flags;
 	struct pci_dev *dev;
 	struct kvm *kvm;
+	spinlock_t assigned_dev_lock;
 };
 
 struct kvm_irq_mask_notifier {
@@ -360,6 +368,9 @@
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
+/* For vcpu->arch.iommu_flags */
+#define KVM_IOMMU_CACHE_COHERENCY	0x1
+
 #ifdef CONFIG_IOMMU_API
 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
 			unsigned long npages);
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 2b8318c..fb46efb 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -40,4 +40,31 @@
 
 typedef hfn_t pfn_t;
 
+union kvm_ioapic_redirect_entry {
+	u64 bits;
+	struct {
+		u8 vector;
+		u8 delivery_mode:3;
+		u8 dest_mode:1;
+		u8 delivery_status:1;
+		u8 polarity:1;
+		u8 remote_irr:1;
+		u8 trig_mode:1;
+		u8 mask:1;
+		u8 reserve:7;
+		u8 reserved[4];
+		u8 dest_id;
+	} fields;
+};
+
+struct kvm_lapic_irq {
+	u32 vector;
+	u32 delivery_mode;
+	u32 dest_mode;
+	u32 level;
+	u32 trig_mode;
+	u32 shorthand;
+	u32 dest_id;
+};
+
 #endif /* __KVM_TYPES_H__ */
diff --git a/include/linux/lguest.h b/include/linux/lguest.h
index 175e63f..7bc1440 100644
--- a/include/linux/lguest.h
+++ b/include/linux/lguest.h
@@ -30,6 +30,10 @@
 	/* Wallclock time set by the Host. */
 	struct timespec time;
 
+	/* Interrupt pending set by the Host.  The Guest should do a hypercall
+	 * if it re-enables interrupts and sees this set (to X86_EFLAGS_IF). */
+	int irq_pending;
+
 	/* Async hypercall ring.  Instead of directly making hypercalls, we can
 	 * place them in here for processing the next time the Host wants.
 	 * This batching can be quite efficient. */
diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h
index a53407a..bfefbdf 100644
--- a/include/linux/lguest_launcher.h
+++ b/include/linux/lguest_launcher.h
@@ -57,7 +57,8 @@
 	LHREQ_INITIALIZE, /* + base, pfnlimit, start */
 	LHREQ_GETDMA, /* No longer used */
 	LHREQ_IRQ, /* + irq */
-	LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */
+	LHREQ_BREAK, /* No longer used */
+	LHREQ_EVENTFD, /* + address, fd. */
 };
 
 /* The alignment to use between consumer and producer parts of vring.
diff --git a/include/linux/loop.h b/include/linux/loop.h
index 4072544..66c194e 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -56,8 +56,7 @@
 	gfp_t		old_gfp_mask;
 
 	spinlock_t		lo_lock;
-	struct bio 		*lo_bio;
-	struct bio		*lo_biotail;
+	struct bio_list		lo_bio_list;
 	int			lo_state;
 	struct mutex		lo_ctl_mutex;
 	struct task_struct	*lo_thread;
diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
new file mode 100644
index 0000000..e461b2c
--- /dev/null
+++ b/include/linux/lsm_audit.h
@@ -0,0 +1,111 @@
+/*
+ * Common LSM logging functions
+ * Heavily borrowed from selinux/avc.h
+ *
+ * Author : Etienne BASSET  <etienne.basset@ensta.org>
+ *
+ * All credits to : Stephen Smalley, <sds@epoch.ncsc.mil>
+ * All BUGS to : Etienne BASSET  <etienne.basset@ensta.org>
+ */
+#ifndef _LSM_COMMON_LOGGING_
+#define _LSM_COMMON_LOGGING_
+
+#include <linux/stddef.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kdev_t.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/audit.h>
+#include <linux/in6.h>
+#include <linux/path.h>
+#include <linux/key.h>
+#include <linux/skbuff.h>
+#include <asm/system.h>
+
+
+/* Auxiliary data to use in generating the audit record. */
+struct common_audit_data {
+	char    type;
+#define LSM_AUDIT_DATA_FS      1
+#define LSM_AUDIT_DATA_NET     2
+#define LSM_AUDIT_DATA_CAP     3
+#define LSM_AUDIT_DATA_IPC     4
+#define LSM_AUDIT_DATA_TASK    5
+#define LSM_AUDIT_DATA_KEY     6
+	struct task_struct *tsk;
+	union 	{
+		struct {
+			struct path path;
+			struct inode *inode;
+		} fs;
+		struct {
+			int netif;
+			struct sock *sk;
+			u16 family;
+			__be16 dport;
+			__be16 sport;
+			union {
+				struct {
+					__be32 daddr;
+					__be32 saddr;
+				} v4;
+				struct {
+					struct in6_addr daddr;
+					struct in6_addr saddr;
+				} v6;
+			} fam;
+		} net;
+		int cap;
+		int ipc_id;
+		struct task_struct *tsk;
+#ifdef CONFIG_KEYS
+		struct {
+			key_serial_t key;
+			char *key_desc;
+		} key_struct;
+#endif
+	} u;
+	const char *function;
+	/* this union contains LSM specific data */
+	union {
+		/* SMACK data */
+		struct smack_audit_data {
+			char *subject;
+			char *object;
+			char *request;
+			int result;
+		} smack_audit_data;
+		/* SELinux data */
+		struct {
+			u32 ssid;
+			u32 tsid;
+			u16 tclass;
+			u32 requested;
+			u32 audited;
+			struct av_decision *avd;
+			int result;
+		} selinux_audit_data;
+	} lsm_priv;
+	/* these callback will be implemented by a specific LSM */
+	void (*lsm_pre_audit)(struct audit_buffer *, void *);
+	void (*lsm_post_audit)(struct audit_buffer *, void *);
+};
+
+#define v4info fam.v4
+#define v6info fam.v6
+
+int ipv4_skb_to_auditdata(struct sk_buff *skb,
+		struct common_audit_data *ad, u8 *proto);
+
+int ipv6_skb_to_auditdata(struct sk_buff *skb,
+		struct common_audit_data *ad, u8 *proto);
+
+/* Initialize an LSM audit data structure. */
+#define COMMON_AUDIT_DATA_INIT(_d, _t) \
+	{ memset((_d), 0, sizeof(struct common_audit_data)); \
+	 (_d)->type = LSM_AUDIT_DATA_##_t; (_d)->function = __func__; }
+
+void common_lsm_audit(struct common_audit_data *a);
+
+#endif
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 5b4e28b..1923327 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -6,9 +6,12 @@
 #define AFS_SUPER_MAGIC                0x5346414F
 #define AUTOFS_SUPER_MAGIC	0x0187
 #define CODA_SUPER_MAGIC	0x73757245
+#define CRAMFS_MAGIC		0x28cd3d45	/* some random number */
+#define CRAMFS_MAGIC_WEND	0x453dcd28	/* magic number with the wrong endianess */
 #define DEBUGFS_MAGIC          0x64626720
 #define SYSFS_MAGIC		0x62656572
 #define SECURITYFS_MAGIC	0x73636673
+#define SELINUX_MAGIC		0xf97cff8c
 #define TMPFS_MAGIC		0x01021994
 #define SQUASHFS_MAGIC		0x73717368
 #define EFS_SUPER_MAGIC		0x414A53
diff --git a/include/linux/mg_disk.h b/include/linux/mg_disk.h
deleted file mode 100644
index 1f76b1e..0000000
--- a/include/linux/mg_disk.h
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- *  include/linux/mg_disk.c
- *
- *  Support for the mGine m[g]flash IO mode.
- *  Based on legacy hd.c
- *
- * (c) 2008 mGine Co.,LTD
- * (c) 2008 unsik Kim <donari75@gmail.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-
-#ifndef __MG_DISK_H__
-#define __MG_DISK_H__
-
-#include <linux/blkdev.h>
-#include <linux/ata.h>
-
-/* name for block device */
-#define MG_DISK_NAME "mgd"
-/* name for platform device */
-#define MG_DEV_NAME "mg_disk"
-
-#define MG_DISK_MAJ 0
-#define MG_DISK_MAX_PART 16
-#define MG_SECTOR_SIZE 512
-#define MG_MAX_SECTS 256
-
-/* Register offsets */
-#define MG_BUFF_OFFSET			0x8000
-#define MG_STORAGE_BUFFER_SIZE		0x200
-#define MG_REG_OFFSET			0xC000
-#define MG_REG_FEATURE			(MG_REG_OFFSET + 2)	/* write case */
-#define MG_REG_ERROR			(MG_REG_OFFSET + 2)	/* read case */
-#define MG_REG_SECT_CNT			(MG_REG_OFFSET + 4)
-#define MG_REG_SECT_NUM			(MG_REG_OFFSET + 6)
-#define MG_REG_CYL_LOW			(MG_REG_OFFSET + 8)
-#define MG_REG_CYL_HIGH			(MG_REG_OFFSET + 0xA)
-#define MG_REG_DRV_HEAD			(MG_REG_OFFSET + 0xC)
-#define MG_REG_COMMAND			(MG_REG_OFFSET + 0xE)	/* write case */
-#define MG_REG_STATUS			(MG_REG_OFFSET + 0xE)	/* read  case */
-#define MG_REG_DRV_CTRL			(MG_REG_OFFSET + 0x10)
-#define MG_REG_BURST_CTRL		(MG_REG_OFFSET + 0x12)
-
-/* "Drive Select/Head Register" bit values */
-#define MG_REG_HEAD_MUST_BE_ON		0xA0 /* These 2 bits are always on */
-#define MG_REG_HEAD_DRIVE_MASTER	(0x00 | MG_REG_HEAD_MUST_BE_ON)
-#define MG_REG_HEAD_DRIVE_SLAVE		(0x10 | MG_REG_HEAD_MUST_BE_ON)
-#define MG_REG_HEAD_LBA_MODE		(0x40 | MG_REG_HEAD_MUST_BE_ON)
-
-
-/* "Device Control Register" bit values */
-#define MG_REG_CTRL_INTR_ENABLE			0x0
-#define MG_REG_CTRL_INTR_DISABLE		(0x1<<1)
-#define MG_REG_CTRL_RESET			(0x1<<2)
-#define MG_REG_CTRL_INTR_POLA_ACTIVE_HIGH	0x0
-#define MG_REG_CTRL_INTR_POLA_ACTIVE_LOW	(0x1<<4)
-#define MG_REG_CTRL_DPD_POLA_ACTIVE_LOW		0x0
-#define MG_REG_CTRL_DPD_POLA_ACTIVE_HIGH	(0x1<<5)
-#define MG_REG_CTRL_DPD_DISABLE			0x0
-#define MG_REG_CTRL_DPD_ENABLE			(0x1<<6)
-
-/* Status register bit */
-/* error bit in status register */
-#define MG_REG_STATUS_BIT_ERROR			0x01
-/* corrected error in status register */
-#define MG_REG_STATUS_BIT_CORRECTED_ERROR	0x04
-/* data request bit in status register */
-#define MG_REG_STATUS_BIT_DATA_REQ		0x08
-/* DSC - Drive Seek Complete */
-#define MG_REG_STATUS_BIT_SEEK_DONE		0x10
-/* DWF - Drive Write Fault */
-#define MG_REG_STATUS_BIT_WRITE_FAULT		0x20
-#define MG_REG_STATUS_BIT_READY			0x40
-#define MG_REG_STATUS_BIT_BUSY			0x80
-
-/* handy status */
-#define MG_STAT_READY	(MG_REG_STATUS_BIT_READY | MG_REG_STATUS_BIT_SEEK_DONE)
-#define MG_READY_OK(s)	(((s) & (MG_STAT_READY | \
-				(MG_REG_STATUS_BIT_BUSY | \
-				 MG_REG_STATUS_BIT_WRITE_FAULT | \
-				 MG_REG_STATUS_BIT_ERROR))) == MG_STAT_READY)
-
-/* Error register */
-#define MG_REG_ERR_AMNF		0x01
-#define MG_REG_ERR_ABRT		0x04
-#define MG_REG_ERR_IDNF		0x10
-#define MG_REG_ERR_UNC		0x40
-#define MG_REG_ERR_BBK		0x80
-
-/* error code for others */
-#define MG_ERR_NONE		0
-#define MG_ERR_TIMEOUT		0x100
-#define MG_ERR_INIT_STAT	0x101
-#define MG_ERR_TRANSLATION	0x102
-#define MG_ERR_CTRL_RST		0x103
-#define MG_ERR_INV_STAT		0x104
-#define MG_ERR_RSTOUT		0x105
-
-#define MG_MAX_ERRORS	6	/* Max read/write errors */
-
-/* command */
-#define MG_CMD_RD 0x20
-#define MG_CMD_WR 0x30
-#define MG_CMD_SLEEP 0x99
-#define MG_CMD_WAKEUP 0xC3
-#define MG_CMD_ID 0xEC
-#define MG_CMD_WR_CONF 0x3C
-#define MG_CMD_RD_CONF 0x40
-
-/* operation mode */
-#define MG_OP_CASCADE (1 << 0)
-#define MG_OP_CASCADE_SYNC_RD (1 << 1)
-#define MG_OP_CASCADE_SYNC_WR (1 << 2)
-#define MG_OP_INTERLEAVE (1 << 3)
-
-/* synchronous */
-#define MG_BURST_LAT_4 (3 << 4)
-#define MG_BURST_LAT_5 (4 << 4)
-#define MG_BURST_LAT_6 (5 << 4)
-#define MG_BURST_LAT_7 (6 << 4)
-#define MG_BURST_LAT_8 (7 << 4)
-#define MG_BURST_LEN_4 (1 << 1)
-#define MG_BURST_LEN_8 (2 << 1)
-#define MG_BURST_LEN_16 (3 << 1)
-#define MG_BURST_LEN_32 (4 << 1)
-#define MG_BURST_LEN_CONT (0 << 1)
-
-/* timeout value (unit: ms) */
-#define MG_TMAX_CONF_TO_CMD	1
-#define MG_TMAX_WAIT_RD_DRQ	10
-#define MG_TMAX_WAIT_WR_DRQ	500
-#define MG_TMAX_RST_TO_BUSY	10
-#define MG_TMAX_HDRST_TO_RDY	500
-#define MG_TMAX_SWRST_TO_RDY	500
-#define MG_TMAX_RSTOUT		3000
-
-/* device attribution */
-/* use mflash as boot device */
-#define MG_BOOT_DEV		(1 << 0)
-/* use mflash as storage device */
-#define MG_STORAGE_DEV		(1 << 1)
-/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */
-#define MG_STORAGE_DEV_SKIP_RST	(1 << 2)
-
-#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST)
-
-/* names of GPIO resource */
-#define MG_RST_PIN	"mg_rst"
-/* except MG_BOOT_DEV, reset-out pin should be assigned */
-#define MG_RSTOUT_PIN	"mg_rstout"
-
-/* private driver data */
-struct mg_drv_data {
-	/* disk resource */
-	u32 use_polling;
-
-	/* device attribution */
-	u32 dev_attr;
-
-	/* internally used */
-	struct mg_host *host;
-};
-
-/* main structure for mflash driver */
-struct mg_host {
-	struct device *dev;
-
-	struct request_queue *breq;
-	spinlock_t lock;
-	struct gendisk *gd;
-
-	struct timer_list timer;
-	void (*mg_do_intr) (struct mg_host *);
-
-	u16 id[ATA_ID_WORDS];
-
-	u16 cyls;
-	u16 heads;
-	u16 sectors;
-	u32 n_sectors;
-	u32 nres_sectors;
-
-	void __iomem *dev_base;
-	unsigned int irq;
-	unsigned int rst;
-	unsigned int rstout;
-
-	u32 major;
-	u32 error;
-};
-
-/*
- * Debugging macro and defines
- */
-#undef DO_MG_DEBUG
-#ifdef DO_MG_DEBUG
-#  define MG_DBG(fmt, args...) \
-	printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args)
-#else /* CONFIG_MG_DEBUG */
-#  define MG_DBG(fmt, args...) do { } while (0)
-#endif /* CONFIG_MG_DEBUG */
-
-#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bff1f0d..ad613ed 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -19,6 +19,7 @@
 struct file_ra_state;
 struct user_struct;
 struct writeback_control;
+struct rlimit;
 
 #ifndef CONFIG_DISCONTIGMEM          /* Don't use mapnrs, do it properly */
 extern unsigned long max_mapnr;
@@ -580,12 +581,10 @@
  */
 static inline unsigned long round_hint_to_min(unsigned long hint)
 {
-#ifdef CONFIG_SECURITY
 	hint &= PAGE_MASK;
 	if (((void *)hint != NULL) &&
 	    (hint < mmap_min_addr))
 		return PAGE_ALIGN(mmap_min_addr);
-#endif
 	return hint;
 }
 
@@ -1031,8 +1030,6 @@
 					unsigned long end_pfn);
 extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
-extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
-					unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
 extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 						unsigned long end_pfn);
@@ -1319,8 +1316,8 @@
 int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
 void vmemmap_populate_print_last(void);
 
-extern void *alloc_locked_buffer(size_t size);
-extern void free_locked_buffer(void *buffer, size_t size);
-extern void release_locked_buffer(void *buffer, size_t size);
+extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
+				 size_t size);
+extern void refund_locked_memory(struct mm_struct *mm, size_t size);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
index 3d1b7bd..97491f7 100644
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -30,6 +30,8 @@
 
 extern int register_kmmio_probe(struct kmmio_probe *p);
 extern void unregister_kmmio_probe(struct kmmio_probe *p);
+extern int kmmio_init(void);
+extern void kmmio_cleanup(void);
 
 #ifdef CONFIG_MMIOTRACE
 /* kmmio is active by some kmmio_probes? */
diff --git a/include/linux/module.h b/include/linux/module.h
index 627ac08..a7bc6e7b 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -77,6 +77,7 @@
 void sort_extable(struct exception_table_entry *start,
 		  struct exception_table_entry *finish);
 void sort_main_extable(void);
+void trim_init_extable(struct module *m);
 
 #ifdef MODULE
 #define MODULE_GENERIC_TABLE(gtype,name)			\
@@ -337,6 +338,14 @@
 	const char **trace_bprintk_fmt_start;
 	unsigned int num_trace_bprintk_fmt;
 #endif
+#ifdef CONFIG_EVENT_TRACING
+	struct ftrace_event_call *trace_events;
+	unsigned int num_trace_events;
+#endif
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+	unsigned long *ftrace_callsites;
+	unsigned int num_ftrace_callsites;
+#endif
 
 #ifdef CONFIG_MODULE_UNLOAD
 	/* What modules depend on me? */
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index a4f0b93..6547c3c 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -36,9 +36,14 @@
 /* Returns length written or -errno.  Buffer is 4k (ie. be short!) */
 typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp);
 
+/* Flag bits for kernel_param.flags */
+#define KPARAM_KMALLOCED	1
+#define KPARAM_ISBOOL		2
+
 struct kernel_param {
 	const char *name;
-	unsigned int perm;
+	u16 perm;
+	u16 flags;
 	param_set_fn set;
 	param_get_fn get;
 	union {
@@ -79,7 +84,7 @@
    parameters.  perm sets the visibility in sysfs: 000 means it's
    not there, read bits mean it's readable, write bits mean it's
    writable. */
-#define __module_param_call(prefix, name, set, get, arg, perm)		\
+#define __module_param_call(prefix, name, set, get, arg, isbool, perm)	\
 	/* Default value instead of permissions? */			\
 	static int __param_perm_check_##name __attribute__((unused)) =	\
 	BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2))	\
@@ -88,10 +93,13 @@
 	static struct kernel_param __moduleparam_const __param_##name	\
 	__used								\
     __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \
-	= { __param_str_##name, perm, set, get, { arg } }
+	= { __param_str_##name, perm, isbool ? KPARAM_ISBOOL : 0,	\
+	    set, get, { arg } }
 
 #define module_param_call(name, set, get, arg, perm)			      \
-	__module_param_call(MODULE_PARAM_PREFIX, name, set, get, arg, perm)
+	__module_param_call(MODULE_PARAM_PREFIX,			      \
+			    name, set, get, arg,			      \
+			    __same_type(*(arg), bool), perm)
 
 /* Helper functions: type is byte, short, ushort, int, uint, long,
    ulong, charp, bool or invbool, or XXX if you define param_get_XXX,
@@ -120,15 +128,16 @@
 #define core_param(name, var, type, perm)				\
 	param_check_##type(name, &(var));				\
 	__module_param_call("", name, param_set_##type, param_get_##type, \
-			    &var, perm)
+			    &var, __same_type(var, bool), perm)
 #endif /* !MODULE */
 
 /* Actually copy string: maxlen param is usually sizeof(string). */
 #define module_param_string(name, string, len, perm)			\
 	static const struct kparam_string __param_string_##name		\
 		= { len, string };					\
-	module_param_call(name, param_set_copystring, param_get_string,	\
-			  .str = &__param_string_##name, perm);		\
+	__module_param_call(MODULE_PARAM_PREFIX, name,			\
+			    param_set_copystring, param_get_string,	\
+			    .str = &__param_string_##name, 0, perm);	\
 	__MODULE_PARM_TYPE(name, "string")
 
 /* Called on module insert or kernel boot */
@@ -186,21 +195,30 @@
 extern int param_get_charp(char *buffer, struct kernel_param *kp);
 #define param_check_charp(name, p) __param_check(name, p, char *)
 
+/* For historical reasons "bool" parameters can be (unsigned) "int". */
 extern int param_set_bool(const char *val, struct kernel_param *kp);
 extern int param_get_bool(char *buffer, struct kernel_param *kp);
-#define param_check_bool(name, p) __param_check(name, p, int)
+#define param_check_bool(name, p)					\
+	static inline void __check_##name(void)				\
+	{								\
+		BUILD_BUG_ON(!__same_type(*(p), bool) &&		\
+			     !__same_type(*(p), unsigned int) &&	\
+			     !__same_type(*(p), int));			\
+	}
 
 extern int param_set_invbool(const char *val, struct kernel_param *kp);
 extern int param_get_invbool(char *buffer, struct kernel_param *kp);
-#define param_check_invbool(name, p) __param_check(name, p, int)
+#define param_check_invbool(name, p) __param_check(name, p, bool)
 
 /* Comma-separated array: *nump is set to number they actually specified. */
 #define module_param_array_named(name, array, type, nump, perm)		\
 	static const struct kparam_array __param_arr_##name		\
 	= { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type,\
 	    sizeof(array[0]), array };					\
-	module_param_call(name, param_array_set, param_array_get, 	\
-			  .arr = &__param_arr_##name, perm);		\
+	__module_param_call(MODULE_PARAM_PREFIX, name,			\
+			    param_array_set, param_array_get,		\
+			    .arr = &__param_arr_##name,			\
+			    __same_type(array[0], bool), perm);		\
 	__MODULE_PARM_TYPE(name, "array of " #type)
 
 #define module_param_array(name, type, nump, perm)		\
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 51f55f9..5d52753 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -30,7 +30,7 @@
 #define MNT_STRICTATIME 0x80
 
 #define MNT_SHRINKABLE	0x100
-#define MNT_IMBALANCED_WRITE_COUNT	0x200 /* just for debugging */
+#define MNT_WRITE_HOLD	0x200
 
 #define MNT_SHARED	0x1000	/* if the vfsmount is a shared mount */
 #define MNT_UNBINDABLE	0x2000	/* if the vfsmount is a unbindable mount */
@@ -65,13 +65,22 @@
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	int mnt_pinned;
 	int mnt_ghosts;
-	/*
-	 * This value is not stable unless all of the mnt_writers[] spinlocks
-	 * are held, and all mnt_writer[]s on this mount have 0 as their ->count
-	 */
-	atomic_t __mnt_writers;
+#ifdef CONFIG_SMP
+	int *mnt_writers;
+#else
+	int mnt_writers;
+#endif
 };
 
+static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	return mnt->mnt_writers;
+#else
+	return &mnt->mnt_writers;
+#endif
+}
+
 static inline struct vfsmount *mntget(struct vfsmount *mnt)
 {
 	if (mnt)
@@ -79,7 +88,11 @@
 	return mnt;
 }
 
+struct file; /* forward dec */
+
 extern int mnt_want_write(struct vfsmount *mnt);
+extern int mnt_want_write_file(struct file *file);
+extern int mnt_clone_write(struct vfsmount *mnt);
 extern void mnt_drop_write(struct vfsmount *mnt);
 extern void mntput_no_expire(struct vfsmount *mnt);
 extern void mnt_pin(struct vfsmount *mnt);
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 3069ec7..878cab4 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -150,5 +150,6 @@
  */
 extern int mutex_trylock(struct mutex *lock);
 extern void mutex_unlock(struct mutex *lock);
+extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 
 #endif
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 518098f..d870ae2 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -18,6 +18,7 @@
 struct nameidata {
 	struct path	path;
 	struct qstr	last;
+	struct path	root;
 	unsigned int	flags;
 	int		last_type;
 	unsigned	depth;
@@ -77,8 +78,8 @@
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
 extern struct dentry *lookup_one_noperm(const char *, struct dentry *);
 
-extern int follow_down(struct vfsmount **, struct dentry **);
-extern int follow_up(struct vfsmount **, struct dentry **);
+extern int follow_down(struct path *);
+extern int follow_up(struct path *);
 
 extern struct dentry *lock_rename(struct dentry *, struct dentry *);
 extern void unlock_rename(struct dentry *, struct dentry *);
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index bcd0201..a6d9ef2 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -125,11 +125,9 @@
 void			exp_readlock(void);
 void			exp_readunlock(void);
 struct svc_export *	rqst_exp_get_by_name(struct svc_rqst *,
-					     struct vfsmount *,
-					     struct dentry *);
+					     struct path *);
 struct svc_export *	rqst_exp_parent(struct svc_rqst *,
-					struct vfsmount *mnt,
-					struct dentry *dentry);
+					struct path *);
 int			exp_rootfh(struct auth_domain *, 
 					char *path, struct knfsd_fh *, int maxsize);
 __be32			exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 7339c7b..13f126c 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -18,7 +18,19 @@
 };
 
 void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
-void __init page_cgroup_init(void);
+
+#ifdef CONFIG_SPARSEMEM
+static inline void __init page_cgroup_init_flatmem(void)
+{
+}
+extern void __init page_cgroup_init(void);
+#else
+void __init page_cgroup_init_flatmem(void);
+static inline void __init page_cgroup_init(void)
+{
+}
+#endif
+
 struct page_cgroup *lookup_page_cgroup(struct page *page);
 
 enum {
@@ -87,6 +99,10 @@
 {
 }
 
+static inline void __init page_cgroup_init_flatmem(void)
+{
+}
+
 #endif
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0f71812..19f8e6d 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1005,6 +1005,7 @@
 #define PCI_DEVICE_ID_PLX_PCI200SYN	0x3196
 #define PCI_DEVICE_ID_PLX_9030          0x9030
 #define PCI_DEVICE_ID_PLX_9050		0x9050
+#define PCI_DEVICE_ID_PLX_9056		0x9056
 #define PCI_DEVICE_ID_PLX_9080		0x9080
 #define PCI_DEVICE_ID_PLX_GTEK_SERIAL2	0xa001
 
@@ -1314,6 +1315,13 @@
 
 #define PCI_VENDOR_ID_CREATIVE		0x1102 /* duplicate: ECTIVA */
 #define PCI_DEVICE_ID_CREATIVE_EMU10K1	0x0002
+#define PCI_DEVICE_ID_CREATIVE_20K1	0x0005
+#define PCI_DEVICE_ID_CREATIVE_20K2	0x000b
+#define PCI_SUBDEVICE_ID_CREATIVE_SB0760	0x0024
+#define PCI_SUBDEVICE_ID_CREATIVE_SB08801	0x0041
+#define PCI_SUBDEVICE_ID_CREATIVE_SB08802	0x0042
+#define PCI_SUBDEVICE_ID_CREATIVE_SB08803	0x0043
+#define PCI_SUBDEVICE_ID_CREATIVE_HENDRIX	0x6000
 
 #define PCI_VENDOR_ID_ECTIVA		0x1102 /* duplicate: CREATIVE */
 #define PCI_DEVICE_ID_ECTIVA_EV1938	0x8938
@@ -1847,6 +1855,10 @@
 #define PCI_SUBDEVICE_ID_HYPERCOPE_METRO	0x0107
 #define PCI_SUBDEVICE_ID_HYPERCOPE_CHAMP2	0x0108
 
+#define PCI_VENDOR_ID_DIGIGRAM		0x1369
+#define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_SERIAL_SUBSYSTEM	0xc001
+#define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_CAE_SERIAL_SUBSYSTEM	0xc002
+
 #define PCI_VENDOR_ID_KAWASAKI		0x136b
 #define PCI_DEVICE_ID_MCHIP_KL5A72002	0xff01
 
@@ -1996,10 +2008,12 @@
 #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_U	0xC118
 #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_GU	0xC11C
 #define PCI_DEVICE_ID_OXSEMI_16PCI954	0x9501
+#define PCI_DEVICE_ID_OXSEMI_C950	0x950B
 #define PCI_DEVICE_ID_OXSEMI_16PCI95N	0x9511
 #define PCI_DEVICE_ID_OXSEMI_16PCI954PP	0x9513
 #define PCI_DEVICE_ID_OXSEMI_16PCI952	0x9521
 #define PCI_DEVICE_ID_OXSEMI_16PCI952PP	0x9523
+#define PCI_SUBDEVICE_ID_OXSEMI_C950	0x0001
 
 #define PCI_VENDOR_ID_CHELSIO		0x1425
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 1581ff2..26fd9d1 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -86,7 +86,12 @@
 	void *ptrs[1];
 };
 
+/* pointer disguising messes up the kmemleak objects tracking */
+#ifndef CONFIG_DEBUG_KMEMLEAK
 #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
+#else
+#define __percpu_disguise(pdata) (struct percpu_data *)(pdata)
+#endif
 
 #define per_cpu_ptr(ptr, cpu)						\
 ({									\
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
new file mode 100644
index 0000000..1b3118a
--- /dev/null
+++ b/include/linux/perf_counter.h
@@ -0,0 +1,709 @@
+/*
+ *  Performance counters:
+ *
+ *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
+ *
+ *  Data type definitions, declarations, prototypes.
+ *
+ *    Started by: Thomas Gleixner and Ingo Molnar
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_PERF_COUNTER_H
+#define _LINUX_PERF_COUNTER_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/byteorder.h>
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * attr.type
+ */
+enum perf_type_id {
+	PERF_TYPE_HARDWARE			= 0,
+	PERF_TYPE_SOFTWARE			= 1,
+	PERF_TYPE_TRACEPOINT			= 2,
+	PERF_TYPE_HW_CACHE			= 3,
+	PERF_TYPE_RAW				= 4,
+
+	PERF_TYPE_MAX,				/* non-ABI */
+};
+
+/*
+ * Generalized performance counter event types, used by the
+ * attr.event_id parameter of the sys_perf_counter_open()
+ * syscall:
+ */
+enum perf_hw_id {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_HW_CPU_CYCLES		= 0,
+	PERF_COUNT_HW_INSTRUCTIONS		= 1,
+	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
+	PERF_COUNT_HW_CACHE_MISSES		= 3,
+	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_HW_BRANCH_MISSES		= 5,
+	PERF_COUNT_HW_BUS_CYCLES		= 6,
+
+	PERF_COUNT_HW_MAX,			/* non-ABI */
+};
+
+/*
+ * Generalized hardware cache counters:
+ *
+ *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
+ *       { read, write, prefetch } x
+ *       { accesses, misses }
+ */
+enum perf_hw_cache_id {
+	PERF_COUNT_HW_CACHE_L1D			= 0,
+	PERF_COUNT_HW_CACHE_L1I			= 1,
+	PERF_COUNT_HW_CACHE_LL			= 2,
+	PERF_COUNT_HW_CACHE_DTLB		= 3,
+	PERF_COUNT_HW_CACHE_ITLB		= 4,
+	PERF_COUNT_HW_CACHE_BPU			= 5,
+
+	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_id {
+	PERF_COUNT_HW_CACHE_OP_READ		= 0,
+	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
+	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
+
+	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_result_id {
+	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
+	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
+
+	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
+};
+
+/*
+ * Special "software" counters provided by the kernel, even if the hardware
+ * does not support performance counters. These counters measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum perf_sw_ids {
+	PERF_COUNT_SW_CPU_CLOCK			= 0,
+	PERF_COUNT_SW_TASK_CLOCK		= 1,
+	PERF_COUNT_SW_PAGE_FAULTS		= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+
+	PERF_COUNT_SW_MAX,			/* non-ABI */
+};
+
+/*
+ * Bits that can be set in attr.sample_type to request information
+ * in the overflow packets.
+ */
+enum perf_counter_sample_format {
+	PERF_SAMPLE_IP				= 1U << 0,
+	PERF_SAMPLE_TID				= 1U << 1,
+	PERF_SAMPLE_TIME			= 1U << 2,
+	PERF_SAMPLE_ADDR			= 1U << 3,
+	PERF_SAMPLE_GROUP			= 1U << 4,
+	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
+	PERF_SAMPLE_ID				= 1U << 6,
+	PERF_SAMPLE_CPU				= 1U << 7,
+	PERF_SAMPLE_PERIOD			= 1U << 8,
+
+	PERF_SAMPLE_MAX = 1U << 9,		/* non-ABI */
+};
+
+/*
+ * Bits that can be set in attr.read_format to request that
+ * reads on the counter should return the indicated quantities,
+ * in increasing order of bit value, after the counter value.
+ */
+enum perf_counter_read_format {
+	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
+	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
+	PERF_FORMAT_ID				= 1U << 2,
+
+	PERF_FORMAT_MAX = 1U << 3, 		/* non-ABI */
+};
+
+#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
+
+/*
+ * Hardware event to monitor via a performance monitoring counter:
+ */
+struct perf_counter_attr {
+
+	/*
+	 * Major type: hardware/software/tracepoint/etc.
+	 */
+	__u32			type;
+
+	/*
+	 * Size of the attr structure, for fwd/bwd compat.
+	 */
+	__u32			size;
+
+	/*
+	 * Type specific configuration information.
+	 */
+	__u64			config;
+
+	union {
+		__u64		sample_period;
+		__u64		sample_freq;
+	};
+
+	__u64			sample_type;
+	__u64			read_format;
+
+	__u64			disabled       :  1, /* off by default        */
+				inherit	       :  1, /* children inherit it   */
+				pinned	       :  1, /* must always be on PMU */
+				exclusive      :  1, /* only group on PMU     */
+				exclude_user   :  1, /* don't count user      */
+				exclude_kernel :  1, /* ditto kernel          */
+				exclude_hv     :  1, /* ditto hypervisor      */
+				exclude_idle   :  1, /* don't count when idle */
+				mmap           :  1, /* include mmap data     */
+				comm	       :  1, /* include comm data     */
+				freq           :  1, /* use freq, not period  */
+
+				__reserved_1   : 53;
+
+	__u32			wakeup_events;	/* wakeup every n events */
+	__u32			__reserved_2;
+
+	__u64			__reserved_3;
+};
+
+/*
+ * Ioctls that can be done on a perf counter fd:
+ */
+#define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
+#define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
+#define PERF_COUNTER_IOC_REFRESH	_IO ('$', 2)
+#define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
+#define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
+
+enum perf_counter_ioc_flags {
+	PERF_IOC_FLAG_GROUP		= 1U << 0,
+};
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_counter_mmap_page {
+	__u32	version;		/* version number of this structure */
+	__u32	compat_version;		/* lowest version this is compat with */
+
+	/*
+	 * Bits needed to read the hw counters in user-space.
+	 *
+	 *   u32 seq;
+	 *   s64 count;
+	 *
+	 *   do {
+	 *     seq = pc->lock;
+	 *
+	 *     barrier()
+	 *     if (pc->index) {
+	 *       count = pmc_read(pc->index - 1);
+	 *       count += pc->offset;
+	 *     } else
+	 *       goto regular_read;
+	 *
+	 *     barrier();
+	 *   } while (pc->lock != seq);
+	 *
+	 * NOTE: for obvious reason this only works on self-monitoring
+	 *       processes.
+	 */
+	__u32	lock;			/* seqlock for synchronization */
+	__u32	index;			/* hardware counter identifier */
+	__s64	offset;			/* add to hardware counter value */
+
+	/*
+	 * Control data for the mmap() data buffer.
+	 *
+	 * User-space reading this value should issue an rmb(), on SMP capable
+	 * platforms, after reading this value -- see perf_counter_wakeup().
+	 */
+	__u64   data_head;		/* head in the data section */
+};
+
+#define PERF_EVENT_MISC_CPUMODE_MASK		(3 << 0)
+#define PERF_EVENT_MISC_CPUMODE_UNKNOWN		(0 << 0)
+#define PERF_EVENT_MISC_KERNEL			(1 << 0)
+#define PERF_EVENT_MISC_USER			(2 << 0)
+#define PERF_EVENT_MISC_HYPERVISOR		(3 << 0)
+#define PERF_EVENT_MISC_OVERFLOW		(1 << 2)
+
+struct perf_event_header {
+	__u32	type;
+	__u16	misc;
+	__u16	size;
+};
+
+enum perf_event_type {
+
+	/*
+	 * The MMAP events record the PROT_EXEC mappings so that we can
+	 * correlate userspace IPs to code. They have the following structure:
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	u64				addr;
+	 *	u64				len;
+	 *	u64				pgoff;
+	 *	char				filename[];
+	 * };
+	 */
+	PERF_EVENT_MMAP			= 1,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	char				comm[];
+	 * };
+	 */
+	PERF_EVENT_COMM			= 3,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				time;
+	 *	u64				id;
+	 *	u64				sample_period;
+	 * };
+	 */
+	PERF_EVENT_PERIOD		= 4,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				time;
+	 *	u64				id;
+	 * };
+	 */
+	PERF_EVENT_THROTTLE		= 5,
+	PERF_EVENT_UNTHROTTLE		= 6,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 * };
+	 */
+	PERF_EVENT_FORK			= 7,
+
+	/*
+	 * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
+	 * will be PERF_RECORD_*
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	{ u64			ip;	  } && PERF_RECORD_IP
+	 *	{ u32			pid, tid; } && PERF_RECORD_TID
+	 *	{ u64			time;     } && PERF_RECORD_TIME
+	 *	{ u64			addr;     } && PERF_RECORD_ADDR
+	 *	{ u64			config;   } && PERF_RECORD_CONFIG
+	 *	{ u32			cpu, res; } && PERF_RECORD_CPU
+	 *
+	 *	{ u64			nr;
+	 *	  { u64 id, val; }	cnt[nr];  } && PERF_RECORD_GROUP
+	 *
+	 *	{ u16			nr,
+	 *				hv,
+	 *				kernel,
+	 *				user;
+	 *	  u64			ips[nr];  } && PERF_RECORD_CALLCHAIN
+	 * };
+	 */
+};
+
+#ifdef __KERNEL__
+/*
+ * Kernel-internal data types and definitions:
+ */
+
+#ifdef CONFIG_PERF_COUNTERS
+# include <asm/perf_counter.h>
+#endif
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <linux/hrtimer.h>
+#include <linux/fs.h>
+#include <linux/pid_namespace.h>
+#include <asm/atomic.h>
+
+struct task_struct;
+
+/**
+ * struct hw_perf_counter - performance counter hardware details:
+ */
+struct hw_perf_counter {
+#ifdef CONFIG_PERF_COUNTERS
+	union {
+		struct { /* hardware */
+			u64		config;
+			unsigned long	config_base;
+			unsigned long	counter_base;
+			int		idx;
+		};
+		union { /* software */
+			atomic64_t	count;
+			struct hrtimer	hrtimer;
+		};
+	};
+	atomic64_t			prev_count;
+	u64				sample_period;
+	u64				last_period;
+	atomic64_t			period_left;
+	u64				interrupts;
+
+	u64				freq_count;
+	u64				freq_interrupts;
+	u64				freq_stamp;
+#endif
+};
+
+struct perf_counter;
+
+/**
+ * struct pmu - generic performance monitoring unit
+ */
+struct pmu {
+	int (*enable)			(struct perf_counter *counter);
+	void (*disable)			(struct perf_counter *counter);
+	void (*read)			(struct perf_counter *counter);
+	void (*unthrottle)		(struct perf_counter *counter);
+};
+
+/**
+ * enum perf_counter_active_state - the states of a counter
+ */
+enum perf_counter_active_state {
+	PERF_COUNTER_STATE_ERROR	= -2,
+	PERF_COUNTER_STATE_OFF		= -1,
+	PERF_COUNTER_STATE_INACTIVE	=  0,
+	PERF_COUNTER_STATE_ACTIVE	=  1,
+};
+
+struct file;
+
+struct perf_mmap_data {
+	struct rcu_head			rcu_head;
+	int				nr_pages;	/* nr of data pages  */
+	int				nr_locked;	/* nr pages mlocked  */
+
+	atomic_t			poll;		/* POLL_ for wakeups */
+	atomic_t			events;		/* event limit       */
+
+	atomic_long_t			head;		/* write position    */
+	atomic_long_t			done_head;	/* completed head    */
+
+	atomic_t			lock;		/* concurrent writes */
+
+	atomic_t			wakeup;		/* needs a wakeup    */
+
+	struct perf_counter_mmap_page   *user_page;
+	void				*data_pages[0];
+};
+
+struct perf_pending_entry {
+	struct perf_pending_entry *next;
+	void (*func)(struct perf_pending_entry *);
+};
+
+/**
+ * struct perf_counter - performance counter kernel representation:
+ */
+struct perf_counter {
+#ifdef CONFIG_PERF_COUNTERS
+	struct list_head		list_entry;
+	struct list_head		event_entry;
+	struct list_head		sibling_list;
+	int				nr_siblings;
+	struct perf_counter		*group_leader;
+	const struct pmu		*pmu;
+
+	enum perf_counter_active_state	state;
+	atomic64_t			count;
+
+	/*
+	 * These are the total time in nanoseconds that the counter
+	 * has been enabled (i.e. eligible to run, and the task has
+	 * been scheduled in, if this is a per-task counter)
+	 * and running (scheduled onto the CPU), respectively.
+	 *
+	 * They are computed from tstamp_enabled, tstamp_running and
+	 * tstamp_stopped when the counter is in INACTIVE or ACTIVE state.
+	 */
+	u64				total_time_enabled;
+	u64				total_time_running;
+
+	/*
+	 * These are timestamps used for computing total_time_enabled
+	 * and total_time_running when the counter is in INACTIVE or
+	 * ACTIVE state, measured in nanoseconds from an arbitrary point
+	 * in time.
+	 * tstamp_enabled: the notional time when the counter was enabled
+	 * tstamp_running: the notional time when the counter was scheduled on
+	 * tstamp_stopped: in INACTIVE state, the notional time when the
+	 *	counter was scheduled off.
+	 */
+	u64				tstamp_enabled;
+	u64				tstamp_running;
+	u64				tstamp_stopped;
+
+	struct perf_counter_attr	attr;
+	struct hw_perf_counter		hw;
+
+	struct perf_counter_context	*ctx;
+	struct file			*filp;
+
+	/*
+	 * These accumulate total time (in nanoseconds) that children
+	 * counters have been enabled and running, respectively.
+	 */
+	atomic64_t			child_total_time_enabled;
+	atomic64_t			child_total_time_running;
+
+	/*
+	 * Protect attach/detach and child_list:
+	 */
+	struct mutex			child_mutex;
+	struct list_head		child_list;
+	struct perf_counter		*parent;
+
+	int				oncpu;
+	int				cpu;
+
+	struct list_head		owner_entry;
+	struct task_struct		*owner;
+
+	/* mmap bits */
+	struct mutex			mmap_mutex;
+	atomic_t			mmap_count;
+	struct perf_mmap_data		*data;
+
+	/* poll related */
+	wait_queue_head_t		waitq;
+	struct fasync_struct		*fasync;
+
+	/* delayed work for NMIs and such */
+	int				pending_wakeup;
+	int				pending_kill;
+	int				pending_disable;
+	struct perf_pending_entry	pending;
+
+	atomic_t			event_limit;
+
+	void (*destroy)(struct perf_counter *);
+	struct rcu_head			rcu_head;
+
+	struct pid_namespace		*ns;
+	u64				id;
+#endif
+};
+
+/**
+ * struct perf_counter_context - counter context structure
+ *
+ * Used as a container for task counters and CPU counters as well:
+ */
+struct perf_counter_context {
+	/*
+	 * Protect the states of the counters in the list,
+	 * nr_active, and the list:
+	 */
+	spinlock_t			lock;
+	/*
+	 * Protect the list of counters.  Locking either mutex or lock
+	 * is sufficient to ensure the list doesn't change; to change
+	 * the list you need to lock both the mutex and the spinlock.
+	 */
+	struct mutex			mutex;
+
+	struct list_head		counter_list;
+	struct list_head		event_list;
+	int				nr_counters;
+	int				nr_active;
+	int				is_active;
+	atomic_t			refcount;
+	struct task_struct		*task;
+
+	/*
+	 * Context clock, runs when context enabled.
+	 */
+	u64				time;
+	u64				timestamp;
+
+	/*
+	 * These fields let us detect when two contexts have both
+	 * been cloned (inherited) from a common ancestor.
+	 */
+	struct perf_counter_context	*parent_ctx;
+	u64				parent_gen;
+	u64				generation;
+	int				pin_count;
+	struct rcu_head			rcu_head;
+};
+
+/**
+ * struct perf_counter_cpu_context - per cpu counter context structure
+ */
+struct perf_cpu_context {
+	struct perf_counter_context	ctx;
+	struct perf_counter_context	*task_ctx;
+	int				active_oncpu;
+	int				max_pertask;
+	int				exclusive;
+
+	/*
+	 * Recursion avoidance:
+	 *
+	 * task, softirq, irq, nmi context
+	 */
+	int				recursion[4];
+};
+
+#ifdef CONFIG_PERF_COUNTERS
+
+/*
+ * Set by architecture code:
+ */
+extern int perf_max_counters;
+
+extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter);
+
+extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
+extern void perf_counter_task_sched_out(struct task_struct *task,
+					struct task_struct *next, int cpu);
+extern void perf_counter_task_tick(struct task_struct *task, int cpu);
+extern int perf_counter_init_task(struct task_struct *child);
+extern void perf_counter_exit_task(struct task_struct *child);
+extern void perf_counter_free_task(struct task_struct *task);
+extern void perf_counter_do_pending(void);
+extern void perf_counter_print_debug(void);
+extern void __perf_disable(void);
+extern bool __perf_enable(void);
+extern void perf_disable(void);
+extern void perf_enable(void);
+extern int perf_counter_task_disable(void);
+extern int perf_counter_task_enable(void);
+extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_counter_context *ctx, int cpu);
+extern void perf_counter_update_userpage(struct perf_counter *counter);
+
+struct perf_sample_data {
+	struct pt_regs			*regs;
+	u64				addr;
+	u64				period;
+};
+
+extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
+				 struct perf_sample_data *data);
+
+/*
+ * Return 1 for a software counter, 0 for a hardware counter
+ */
+static inline int is_software_counter(struct perf_counter *counter)
+{
+	return (counter->attr.type != PERF_TYPE_RAW) &&
+		(counter->attr.type != PERF_TYPE_HARDWARE) &&
+		(counter->attr.type != PERF_TYPE_HW_CACHE);
+}
+
+extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
+
+extern void __perf_counter_mmap(struct vm_area_struct *vma);
+
+static inline void perf_counter_mmap(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_EXEC)
+		__perf_counter_mmap(vma);
+}
+
+extern void perf_counter_comm(struct task_struct *tsk);
+extern void perf_counter_fork(struct task_struct *tsk);
+
+extern void perf_counter_task_migration(struct task_struct *task, int cpu);
+
+#define MAX_STACK_DEPTH			255
+
+struct perf_callchain_entry {
+	u16				nr;
+	u16				hv;
+	u16				kernel;
+	u16				user;
+	u64				ip[MAX_STACK_DEPTH];
+};
+
+extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
+
+extern int sysctl_perf_counter_paranoid;
+extern int sysctl_perf_counter_mlock;
+extern int sysctl_perf_counter_sample_rate;
+
+extern void perf_counter_init(void);
+
+#ifndef perf_misc_flags
+#define perf_misc_flags(regs)	(user_mode(regs) ? PERF_EVENT_MISC_USER : \
+				 PERF_EVENT_MISC_KERNEL)
+#define perf_instruction_pointer(regs)	instruction_pointer(regs)
+#endif
+
+#else
+static inline void
+perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
+static inline void
+perf_counter_task_sched_out(struct task_struct *task,
+			    struct task_struct *next, int cpu)		{ }
+static inline void
+perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
+static inline int perf_counter_init_task(struct task_struct *child)	{ return 0; }
+static inline void perf_counter_exit_task(struct task_struct *child)	{ }
+static inline void perf_counter_free_task(struct task_struct *task)	{ }
+static inline void perf_counter_do_pending(void)			{ }
+static inline void perf_counter_print_debug(void)			{ }
+static inline void perf_disable(void)					{ }
+static inline void perf_enable(void)					{ }
+static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
+static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
+
+static inline void
+perf_swcounter_event(u32 event, u64 nr, int nmi,
+		     struct pt_regs *regs, u64 addr)			{ }
+
+static inline void perf_counter_mmap(struct vm_area_struct *vma)	{ }
+static inline void perf_counter_comm(struct task_struct *tsk)		{ }
+static inline void perf_counter_fork(struct task_struct *tsk)		{ }
+static inline void perf_counter_init(void)				{ }
+static inline void perf_counter_task_migration(struct task_struct *task,
+					       int cpu)			{ }
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index c8f0385..b43a9e0 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -152,5 +152,6 @@
 void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
+void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
 
 #endif
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 1d4e2d2..b3f7476 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -382,14 +382,13 @@
 #ifdef CONFIG_PM_SLEEP
 extern void device_pm_lock(void);
 extern int sysdev_resume(void);
-extern void device_power_up(pm_message_t state);
-extern void device_resume(pm_message_t state);
+extern void dpm_resume_noirq(pm_message_t state);
+extern void dpm_resume_end(pm_message_t state);
 
 extern void device_pm_unlock(void);
 extern int sysdev_suspend(pm_message_t state);
-extern int device_power_down(pm_message_t state);
-extern int device_suspend(pm_message_t state);
-extern int device_prepare_suspend(pm_message_t state);
+extern int dpm_suspend_noirq(pm_message_t state);
+extern int dpm_suspend_start(pm_message_t state);
 
 extern void __suspend_report_result(const char *function, void *fn, int ret);
 
@@ -403,7 +402,7 @@
 #define device_pm_lock() do {} while (0)
 #define device_pm_unlock() do {} while (0)
 
-static inline int device_suspend(pm_message_t state)
+static inline int dpm_suspend_start(pm_message_t state)
 {
 	return 0;
 }
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 48d887e..b00df4c 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -85,4 +85,7 @@
 #define PR_SET_TIMERSLACK 29
 #define PR_GET_TIMERSLACK 30
 
+#define PR_TASK_PERF_COUNTERS_DISABLE		31
+#define PR_TASK_PERF_COUNTERS_ENABLE		32
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index fbfa3d4..e6e77d3 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -93,20 +93,9 @@
 
 #ifdef CONFIG_PROC_FS
 
-extern spinlock_t proc_subdir_lock;
-
 extern void proc_root_init(void);
 
 void proc_flush_task(struct task_struct *task);
-struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
-int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
-unsigned long task_vsize(struct mm_struct *);
-int task_statm(struct mm_struct *, int *, int *, int *, int *);
-void task_mem(struct seq_file *, struct mm_struct *);
-void clear_refs_smap(struct mm_struct *mm);
-
-struct proc_dir_entry *de_get(struct proc_dir_entry *de);
-void de_put(struct proc_dir_entry *de);
 
 extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 						struct proc_dir_entry *parent);
@@ -116,20 +105,7 @@
 				void *data);
 extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
 
-extern struct vfsmount *proc_mnt;
 struct pid_namespace;
-extern int proc_fill_super(struct super_block *);
-extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
-
-/*
- * These are generic /proc routines that use the internal
- * "struct proc_dir_entry" tree to traverse the filesystem.
- *
- * The /proc root directory has extended versions to take care
- * of the /proc/<pid> subdirectories.
- */
-extern int proc_readdir(struct file *, void *, filldir_t);
-extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
 
 extern int pid_ns_prepare_proc(struct pid_namespace *ns);
 extern void pid_ns_release_proc(struct pid_namespace *ns);
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 67c1565..59e133d 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -95,7 +95,6 @@
 			  struct task_struct *new_parent);
 extern void __ptrace_unlink(struct task_struct *child);
 extern void exit_ptrace(struct task_struct *tracer);
-extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags);
 #define PTRACE_MODE_READ   1
 #define PTRACE_MODE_ATTACH 2
 /* Returns 0 on success, -errno on denial. */
@@ -327,15 +326,6 @@
 #define arch_ptrace_untrace(task)		do { } while (0)
 #endif
 
-#ifndef arch_ptrace_fork
-/*
- * Do machine-specific work to initialize a new task.
- *
- * This is called from copy_process().
- */
-#define arch_ptrace_fork(child, clone_flags)	do { } while (0)
-#endif
-
 extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);
diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h
index 787d19e..8b9aee1 100644
--- a/include/linux/qnx4_fs.h
+++ b/include/linux/qnx4_fs.h
@@ -85,65 +85,4 @@
 	struct qnx4_inode_entry AltBoot;
 };
 
-#ifdef __KERNEL__
-
-#define QNX4_DEBUG 0
-
-#if QNX4_DEBUG
-#define QNX4DEBUG(X) printk X
-#else
-#define QNX4DEBUG(X) (void) 0
-#endif
-
-struct qnx4_sb_info {
-	struct buffer_head	*sb_buf;	/* superblock buffer */
-	struct qnx4_super_block	*sb;		/* our superblock */
-	unsigned int		Version;	/* may be useful */
-	struct qnx4_inode_entry	*BitMap;	/* useful */
-};
-
-struct qnx4_inode_info {
-	struct qnx4_inode_entry raw;
-	loff_t mmu_private;
-	struct inode vfs_inode;
-};
-
-extern struct inode *qnx4_iget(struct super_block *, unsigned long);
-extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd);
-extern unsigned long qnx4_count_free_blocks(struct super_block *sb);
-extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
-
-extern struct buffer_head *qnx4_bread(struct inode *, int, int);
-
-extern const struct inode_operations qnx4_file_inode_operations;
-extern const struct inode_operations qnx4_dir_inode_operations;
-extern const struct file_operations qnx4_file_operations;
-extern const struct file_operations qnx4_dir_operations;
-extern int qnx4_is_free(struct super_block *sb, long block);
-extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy);
-extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd);
-extern void qnx4_truncate(struct inode *inode);
-extern void qnx4_free_inode(struct inode *inode);
-extern int qnx4_unlink(struct inode *dir, struct dentry *dentry);
-extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry);
-extern int qnx4_sync_file(struct file *file, struct dentry *dentry, int);
-extern int qnx4_sync_inode(struct inode *inode);
-
-static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb)
-{
-	return sb->s_fs_info;
-}
-
-static inline struct qnx4_inode_info *qnx4_i(struct inode *inode)
-{
-	return container_of(inode, struct qnx4_inode_info, vfs_inode);
-}
-
-static inline struct qnx4_inode_entry *qnx4_raw_inode(struct inode *inode)
-{
-	return &qnx4_i(inode)->raw;
-}
-
-#endif				/* __KERNEL__ */
-
 #endif
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 36353d9..7bc4575 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -20,7 +20,12 @@
 /*
  * declaration of quota_function calls in kernel.
  */
-void sync_dquots(struct super_block *sb, int type);
+void sync_quota_sb(struct super_block *sb, int type);
+static inline void writeout_quota_sb(struct super_block *sb, int type)
+{
+	if (sb->s_qcop->quota_sync)
+		sb->s_qcop->quota_sync(sb, type);
+}
 
 int dquot_initialize(struct inode *inode, int type);
 int dquot_drop(struct inode *inode);
@@ -253,12 +258,7 @@
 		inode->i_sb->dq_op->free_inode(inode, 1);
 }
 
-/* The following two functions cannot be called inside a transaction */
-static inline void vfs_dq_sync(struct super_block *sb)
-{
-	sync_dquots(sb, -1);
-}
-
+/* Cannot be called inside a transaction */
 static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
 	int ret = -ENOSYS;
@@ -334,7 +334,11 @@
 {
 }
 
-static inline void vfs_dq_sync(struct super_block *sb)
+static inline void sync_quota_sb(struct super_block *sb, int type)
+{
+}
+
+static inline void writeout_quota_sb(struct super_block *sb, int type)
 {
 }
 
diff --git a/include/linux/rational.h b/include/linux/rational.h
new file mode 100644
index 0000000..4f532fc
--- /dev/null
+++ b/include/linux/rational.h
@@ -0,0 +1,19 @@
+/*
+ * rational fractions
+ *
+ * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com>
+ *
+ * helper functions when coping with rational numbers,
+ * e.g. when calculating optimum numerator/denominator pairs for
+ * pll configuration taking into account restricted register size
+ */
+
+#ifndef _LINUX_RATIONAL_H
+#define _LINUX_RATIONAL_H
+
+void rational_best_approximation(
+	unsigned long given_numerator, unsigned long given_denominator,
+	unsigned long max_numerator, unsigned long max_denominator,
+	unsigned long *best_numerator, unsigned long *best_denominator);
+
+#endif /* _LINUX_RATIONAL_H */
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index e649bd3..5710f43 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -198,6 +198,32 @@
 	at->prev = last;
 }
 
+/**
+ * list_entry_rcu - get the struct for this entry
+ * @ptr:        the &struct list_head pointer.
+ * @type:       the type of the struct this is embedded in.
+ * @member:     the name of the list_struct within the struct.
+ *
+ * This primitive may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
+ */
+#define list_entry_rcu(ptr, type, member) \
+	container_of(rcu_dereference(ptr), type, member)
+
+/**
+ * list_first_entry_rcu - get the first element from a list
+ * @ptr:        the list head to take the element from.
+ * @type:       the type of the struct this is embedded in.
+ * @member:     the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ *
+ * This primitive may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
+ */
+#define list_first_entry_rcu(ptr, type, member) \
+	list_entry_rcu((ptr)->next, type, member)
+
 #define __list_for_each_rcu(pos, head) \
 	for (pos = rcu_dereference((head)->next); \
 		pos != (head); \
@@ -214,9 +240,9 @@
  * as long as the traversal is guarded by rcu_read_lock().
  */
 #define list_for_each_entry_rcu(pos, head, member) \
-	for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \
+	for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \
 		prefetch(pos->member.next), &pos->member != (head); \
-		pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member))
+		pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
 
 
 /**
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 58b2aa5..5a51538 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -161,8 +161,15 @@
 	unsigned long offline_fqs;	/* Kicked due to being offline. */
 	unsigned long resched_ipi;	/* Sent a resched IPI. */
 
-	/* 5) For future __rcu_pending statistics. */
+	/* 5) __rcu_pending() statistics. */
 	long n_rcu_pending;		/* rcu_pending() calls since boot. */
+	long n_rp_qs_pending;
+	long n_rp_cb_ready;
+	long n_rp_cpu_needs_gp;
+	long n_rp_gp_completed;
+	long n_rp_gp_started;
+	long n_rp_need_fqs;
+	long n_rp_need_nothing;
 
 	int cpu;
 };
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 6473650..dab68bb 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -453,6 +453,7 @@
 	REISERFS_ATTRS,
 	REISERFS_XATTRS_USER,
 	REISERFS_POSIXACL,
+	REISERFS_EXPOSE_PRIVROOT,
 	REISERFS_BARRIER_NONE,
 	REISERFS_BARRIER_FLUSH,
 
@@ -490,6 +491,7 @@
 #define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK))
 #define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER))
 #define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL))
+#define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT))
 #define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s))
 #define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE))
 #define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH))
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index e1b7b21..8670f15 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -11,7 +11,7 @@
  * Don't refer to this struct directly, use functions below.
  */
 struct ring_buffer_event {
-	u32		type:2, len:3, time_delta:27;
+	u32		type_len:5, time_delta:27;
 	u32		array[];
 };
 
@@ -24,7 +24,8 @@
  *				  size is variable depending on how much
  *				  padding is needed
  *				 If time_delta is non zero:
- *				  everything else same as RINGBUF_TYPE_DATA
+ *				  array[0] holds the actual length
+ *				  size = 4 + length (bytes)
  *
  * @RINGBUF_TYPE_TIME_EXTEND:	Extend the time delta
  *				 array[0] = time delta (28 .. 59)
@@ -35,22 +36,23 @@
  *				 array[1..2] = tv_sec
  *				 size = 16 bytes
  *
- * @RINGBUF_TYPE_DATA:		Data record
- *				 If len is zero:
+ * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX:
+ *				Data record
+ *				 If type_len is zero:
  *				  array[0] holds the actual length
  *				  array[1..(length+3)/4] holds data
- *				  size = 4 + 4 + length (bytes)
+ *				  size = 4 + length (bytes)
  *				 else
- *				  length = len << 2
+ *				  length = type_len << 2
  *				  array[0..(length+3)/4-1] holds data
  *				  size = 4 + length (bytes)
  */
 enum ring_buffer_type {
+	RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28,
 	RINGBUF_TYPE_PADDING,
 	RINGBUF_TYPE_TIME_EXTEND,
 	/* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
 	RINGBUF_TYPE_TIME_STAMP,
-	RINGBUF_TYPE_DATA,
 };
 
 unsigned ring_buffer_event_length(struct ring_buffer_event *event);
@@ -68,13 +70,54 @@
 	return event->time_delta;
 }
 
+/*
+ * ring_buffer_event_discard can discard any event in the ring buffer.
+ *   it is up to the caller to protect against a reader from
+ *   consuming it or a writer from wrapping and replacing it.
+ *
+ * No external protection is needed if this is called before
+ * the event is commited. But in that case it would be better to
+ * use ring_buffer_discard_commit.
+ *
+ * Note, if an event that has not been committed is discarded
+ * with ring_buffer_event_discard, it must still be committed.
+ */
 void ring_buffer_event_discard(struct ring_buffer_event *event);
 
 /*
+ * ring_buffer_discard_commit will remove an event that has not
+ *   ben committed yet. If this is used, then ring_buffer_unlock_commit
+ *   must not be called on the discarded event. This function
+ *   will try to remove the event from the ring buffer completely
+ *   if another event has not been written after it.
+ *
+ * Example use:
+ *
+ *  if (some_condition)
+ *    ring_buffer_discard_commit(buffer, event);
+ *  else
+ *    ring_buffer_unlock_commit(buffer, event);
+ */
+void ring_buffer_discard_commit(struct ring_buffer *buffer,
+				struct ring_buffer_event *event);
+
+/*
  * size is in bytes for each per CPU buffer.
  */
 struct ring_buffer *
-ring_buffer_alloc(unsigned long size, unsigned flags);
+__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key);
+
+/*
+ * Because the ring buffer is generic, if other users of the ring buffer get
+ * traced by ftrace, it can produce lockdep warnings. We need to keep each
+ * ring buffer's lock class separate.
+ */
+#define ring_buffer_alloc(size, flags)			\
+({							\
+	static struct lock_class_key __key;		\
+	__ring_buffer_alloc((size), (flags), &__key);	\
+})
+
 void ring_buffer_free(struct ring_buffer *buffer);
 
 int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
@@ -122,6 +165,8 @@
 unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
 unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu);
 
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
 void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
@@ -137,6 +182,11 @@
 int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
 			  size_t len, int cpu, int full);
 
+struct trace_seq;
+
+int ring_buffer_print_entry_header(struct trace_seq *s);
+int ring_buffer_print_page_header(struct trace_seq *s);
+
 enum ring_buffer_flags {
 	RB_FL_OVERWRITE		= 1 << 0,
 };
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc..4896fdf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -77,6 +77,7 @@
 #include <linux/proportions.h>
 #include <linux/seccomp.h>
 #include <linux/rcupdate.h>
+#include <linux/rculist.h>
 #include <linux/rtmutex.h>
 
 #include <linux/time.h>
@@ -96,8 +97,9 @@
 struct futex_pi_state;
 struct robust_list_head;
 struct bio;
-struct bts_tracer;
 struct fs_struct;
+struct bts_context;
+struct perf_counter_context;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -116,6 +118,7 @@
  *    11 bit fractions.
  */
 extern unsigned long avenrun[];		/* Load averages */
+extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
 
 #define FSHIFT		11		/* nr of bits of precision */
 #define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
@@ -135,8 +138,9 @@
 extern int nr_processes(void);
 extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
-extern unsigned long nr_active(void);
 extern unsigned long nr_iowait(void);
+extern void calc_global_load(void);
+extern u64 cpu_nr_migrations(int cpu);
 
 extern unsigned long get_parent_ip(unsigned long addr);
 
@@ -672,6 +676,10 @@
 	struct work_struct work;
 #endif
 #endif
+
+#ifdef CONFIG_PERF_COUNTERS
+	atomic_long_t locked_vm;
+#endif
 };
 
 extern int uids_sysfs_init(void);
@@ -838,7 +846,17 @@
 	 */
 	u32 reciprocal_cpu_power;
 
-	unsigned long cpumask[];
+	/*
+	 * The CPUs this group covers.
+	 *
+	 * NOTE: this field is variable length. (Allocated dynamically
+	 * by attaching extra space to the end of the structure,
+	 * depending on how many CPUs the kernel has booted up with)
+	 *
+	 * It is also be embedded into static data structures at build
+	 * time. (See 'struct static_sched_group' in kernel/sched.c)
+	 */
+	unsigned long cpumask[0];
 };
 
 static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
@@ -924,8 +942,17 @@
 	char *name;
 #endif
 
-	/* span of all CPUs in this domain */
-	unsigned long span[];
+	/*
+	 * Span of all CPUs in this domain.
+	 *
+	 * NOTE: this field is variable length. (Allocated dynamically
+	 * by attaching extra space to the end of the structure,
+	 * depending on how many CPUs the kernel has booted up with)
+	 *
+	 * It is also be embedded into static data structures at build
+	 * time. (See 'struct static_sched_domain' in kernel/sched.c)
+	 */
+	unsigned long span[0];
 };
 
 static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
@@ -1052,9 +1079,10 @@
 	u64			last_wakeup;
 	u64			avg_overlap;
 
+	u64			nr_migrations;
+
 	u64			start_runtime;
 	u64			avg_wakeup;
-	u64			nr_migrations;
 
 #ifdef CONFIG_SCHEDSTATS
 	u64			wait_start;
@@ -1209,18 +1237,11 @@
 	struct list_head ptraced;
 	struct list_head ptrace_entry;
 
-#ifdef CONFIG_X86_PTRACE_BTS
 	/*
 	 * This is the tracer handle for the ptrace BTS extension.
 	 * This field actually belongs to the ptracer task.
 	 */
-	struct bts_tracer *bts;
-	/*
-	 * The buffer to hold the BTS data.
-	 */
-	void *bts_buffer;
-	size_t bts_size;
-#endif /* CONFIG_X86_PTRACE_BTS */
+	struct bts_context *bts;
 
 	/* PID/PID hash table linkage. */
 	struct pid_link pids[PIDTYPE_MAX];
@@ -1247,7 +1268,9 @@
 					 * credentials (COW) */
 	const struct cred *cred;	/* effective (overridable) subjective task
 					 * credentials (COW) */
-	struct mutex cred_exec_mutex;	/* execve vs ptrace cred calculation mutex */
+	struct mutex cred_guard_mutex;	/* guard against foreign influences on
+					 * credential calculations
+					 * (notably. ptrace) */
 
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
@@ -1380,6 +1403,11 @@
 	struct list_head pi_state_list;
 	struct futex_pi_state *pi_state_cache;
 #endif
+#ifdef CONFIG_PERF_COUNTERS
+	struct perf_counter_context *perf_counter_ctxp;
+	struct mutex perf_counter_mutex;
+	struct list_head perf_counter_list;
+#endif
 #ifdef CONFIG_NUMA
 	struct mempolicy *mempolicy;
 	short il_next;
@@ -1428,7 +1456,9 @@
 #ifdef CONFIG_TRACING
 	/* state flags for use by tracers */
 	unsigned long trace;
-#endif
+	/* bitmask of trace recursion */
+	unsigned long trace_recursion;
+#endif /* CONFIG_TRACING */
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
@@ -1885,6 +1915,7 @@
 
 extern void proc_caches_init(void);
 extern void flush_signals(struct task_struct *);
+extern void __flush_signals(struct task_struct *);
 extern void ignore_signals(struct task_struct *);
 extern void flush_signal_handlers(struct task_struct *, int force_default);
 extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
@@ -2001,8 +2032,10 @@
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
+extern void wait_task_context_switch(struct task_struct *p);
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
+static inline void wait_task_context_switch(struct task_struct *p) {}
 static inline unsigned long wait_task_inactive(struct task_struct *p,
 					       long match_state)
 {
@@ -2010,7 +2043,8 @@
 }
 #endif
 
-#define next_task(p)	list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks)
+#define next_task(p) \
+	list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
 
 #define for_each_process(p) \
 	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
@@ -2049,8 +2083,8 @@
 
 static inline struct task_struct *next_thread(const struct task_struct *p)
 {
-	return list_entry(rcu_dereference(p->thread_group.next),
-			  struct task_struct, thread_group);
+	return list_entry_rcu(p->thread_group.next,
+			      struct task_struct, thread_group);
 }
 
 static inline int thread_group_empty(struct task_struct *p)
@@ -2388,6 +2422,13 @@
 #define TASK_SIZE_OF(tsk)	TASK_SIZE
 #endif
 
+/*
+ * Call the function if the target task is executing on a CPU right now:
+ */
+extern void task_oncpu_function_call(struct task_struct *p,
+				     void (*func) (void *info), void *info);
+
+
 #ifdef CONFIG_MM_OWNER
 extern void mm_update_next_owner(struct mm_struct *mm);
 extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
diff --git a/include/linux/security.h b/include/linux/security.h
index d5fd616..5eff459 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2197,6 +2197,8 @@
 				     unsigned long addr,
 				     unsigned long addr_only)
 {
+	if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO))
+		return -EACCES;
 	return 0;
 }
 
diff --git a/include/linux/serial.h b/include/linux/serial.h
index 9136cc5..e5bb75a 100644
--- a/include/linux/serial.h
+++ b/include/linux/serial.h
@@ -96,54 +96,76 @@
 
 /*
  * Definitions for async_struct (and serial_struct) flags field
+ *
+ * Define ASYNCB_* for convenient use with {test,set,clear}_bit.
  */
-#define ASYNC_HUP_NOTIFY 0x0001 /* Notify getty on hangups and closes 
-				   on the callout port */
-#define ASYNC_FOURPORT  0x0002	/* Set OU1, OUT2 per AST Fourport settings */
-#define ASYNC_SAK	0x0004	/* Secure Attention Key (Orange book) */
-#define ASYNC_SPLIT_TERMIOS 0x0008 /* Separate termios for dialin/callout */
+#define ASYNCB_HUP_NOTIFY	 0 /* Notify getty on hangups and closes
+				    * on the callout port */
+#define ASYNCB_FOURPORT		 1 /* Set OU1, OUT2 per AST Fourport settings */
+#define ASYNCB_SAK		 2 /* Secure Attention Key (Orange book) */
+#define ASYNCB_SPLIT_TERMIOS	 3 /* Separate termios for dialin/callout */
+#define ASYNCB_SPD_HI		 4 /* Use 56000 instead of 38400 bps */
+#define ASYNCB_SPD_VHI		 5 /* Use 115200 instead of 38400 bps */
+#define ASYNCB_SKIP_TEST	 6 /* Skip UART test during autoconfiguration */
+#define ASYNCB_AUTO_IRQ		 7 /* Do automatic IRQ during
+				    * autoconfiguration */
+#define ASYNCB_SESSION_LOCKOUT	 8 /* Lock out cua opens based on session */
+#define ASYNCB_PGRP_LOCKOUT	 9 /* Lock out cua opens based on pgrp */
+#define ASYNCB_CALLOUT_NOHUP	10 /* Don't do hangups for cua device */
+#define ASYNCB_HARDPPS_CD	11 /* Call hardpps when CD goes high  */
+#define ASYNCB_SPD_SHI		12 /* Use 230400 instead of 38400 bps */
+#define ASYNCB_LOW_LATENCY	13 /* Request low latency behaviour */
+#define ASYNCB_BUGGY_UART	14 /* This is a buggy UART, skip some safety
+				    * checks.  Note: can be dangerous! */
+#define ASYNCB_AUTOPROBE	15 /* Port was autoprobed by PCI or PNP code */
+#define ASYNCB_LAST_USER	15
 
-#define ASYNC_SPD_MASK	0x1030
-#define ASYNC_SPD_HI	0x0010	/* Use 56000 instead of 38400 bps */
+/* Internal flags used only by kernel */
+#define ASYNCB_INITIALIZED	31 /* Serial port was initialized */
+#define ASYNCB_NORMAL_ACTIVE	29 /* Normal device is active */
+#define ASYNCB_BOOT_AUTOCONF	28 /* Autoconfigure port on bootup */
+#define ASYNCB_CLOSING		27 /* Serial port is closing */
+#define ASYNCB_CTS_FLOW		26 /* Do CTS flow control */
+#define ASYNCB_CHECK_CD		25 /* i.e., CLOCAL */
+#define ASYNCB_SHARE_IRQ	24 /* for multifunction cards, no longer used */
+#define ASYNCB_CONS_FLOW	23 /* flow control for console  */
+#define ASYNCB_BOOT_ONLYMCA	22 /* Probe only if MCA bus */
+#define ASYNCB_FIRST_KERNEL	22
 
-#define ASYNC_SPD_VHI	0x0020  /* Use 115200 instead of 38400 bps */
-#define ASYNC_SPD_CUST	0x0030  /* Use user-specified divisor */
+#define ASYNC_HUP_NOTIFY	(1U << ASYNCB_HUP_NOTIFY)
+#define ASYNC_FOURPORT		(1U << ASYNCB_FOURPORT)
+#define ASYNC_SAK		(1U << ASYNCB_SAK)
+#define ASYNC_SPLIT_TERMIOS	(1U << ASYNCB_SPLIT_TERMIOS)
+#define ASYNC_SPD_HI		(1U << ASYNCB_SPD_HI)
+#define ASYNC_SPD_VHI		(1U << ASYNCB_SPD_VHI)
+#define ASYNC_SKIP_TEST		(1U << ASYNCB_SKIP_TEST)
+#define ASYNC_AUTO_IRQ		(1U << ASYNCB_AUTO_IRQ)
+#define ASYNC_SESSION_LOCKOUT	(1U << ASYNCB_SESSION_LOCKOUT)
+#define ASYNC_PGRP_LOCKOUT	(1U << ASYNCB_PGRP_LOCKOUT)
+#define ASYNC_CALLOUT_NOHUP	(1U << ASYNCB_CALLOUT_NOHUP)
+#define ASYNC_HARDPPS_CD	(1U << ASYNCB_HARDPPS_CD)
+#define ASYNC_SPD_SHI		(1U << ASYNCB_SPD_SHI)
+#define ASYNC_LOW_LATENCY	(1U << ASYNCB_LOW_LATENCY)
+#define ASYNC_BUGGY_UART	(1U << ASYNCB_BUGGY_UART)
+#define ASYNC_AUTOPROBE		(1U << ASYNCB_AUTOPROBE)
 
-#define ASYNC_SKIP_TEST	0x0040 /* Skip UART test during autoconfiguration */
-#define ASYNC_AUTO_IRQ  0x0080 /* Do automatic IRQ during autoconfiguration */
-#define ASYNC_SESSION_LOCKOUT 0x0100 /* Lock out cua opens based on session */
-#define ASYNC_PGRP_LOCKOUT    0x0200 /* Lock out cua opens based on pgrp */
-#define ASYNC_CALLOUT_NOHUP   0x0400 /* Don't do hangups for cua device */
+#define ASYNC_FLAGS		((1U << ASYNCB_LAST_USER) - 1)
+#define ASYNC_USR_MASK		(ASYNC_SPD_HI|ASYNC_SPD_VHI| \
+		ASYNC_CALLOUT_NOHUP|ASYNC_SPD_SHI|ASYNC_LOW_LATENCY)
+#define ASYNC_SPD_CUST		(ASYNC_SPD_HI|ASYNC_SPD_VHI)
+#define ASYNC_SPD_WARP		(ASYNC_SPD_HI|ASYNC_SPD_SHI)
+#define ASYNC_SPD_MASK		(ASYNC_SPD_HI|ASYNC_SPD_VHI|ASYNC_SPD_SHI)
 
-#define ASYNC_HARDPPS_CD	0x0800	/* Call hardpps when CD goes high  */
-
-#define ASYNC_SPD_SHI	0x1000	/* Use 230400 instead of 38400 bps */
-#define ASYNC_SPD_WARP	0x1010	/* Use 460800 instead of 38400 bps */
-
-#define ASYNC_LOW_LATENCY 0x2000 /* Request low latency behaviour */
-
-#define ASYNC_BUGGY_UART  0x4000 /* This is a buggy UART, skip some safety
-				  * checks.  Note: can be dangerous! */
-
-#define ASYNC_AUTOPROBE	 0x8000 /* Port was autoprobed by PCI or PNP code */
-
-#define ASYNC_FLAGS	0x7FFF	/* Possible legal async flags */
-#define ASYNC_USR_MASK	0x3430	/* Legal flags that non-privileged
-				 * users can set or reset */
-
-/* Internal flags used only by kernel/chr_drv/serial.c */
-#define ASYNC_INITIALIZED	0x80000000 /* Serial port was initialized */
-#define ASYNC_NORMAL_ACTIVE	0x20000000 /* Normal device is active */
-#define ASYNC_BOOT_AUTOCONF	0x10000000 /* Autoconfigure port on bootup */
-#define ASYNC_CLOSING		0x08000000 /* Serial port is closing */
-#define ASYNC_CTS_FLOW		0x04000000 /* Do CTS flow control */
-#define ASYNC_CHECK_CD		0x02000000 /* i.e., CLOCAL */
-#define ASYNC_SHARE_IRQ		0x01000000 /* for multifunction cards
-					     --- no longer used */
-#define ASYNC_CONS_FLOW		0x00800000 /* flow control for console  */
-
-#define ASYNC_BOOT_ONLYMCA	0x00400000 /* Probe only if MCA bus */
-#define ASYNC_INTERNAL_FLAGS	0xFFC00000 /* Internal flags */
+#define ASYNC_INITIALIZED	(1U << ASYNCB_INITIALIZED)
+#define ASYNC_NORMAL_ACTIVE	(1U << ASYNCB_NORMAL_ACTIVE)
+#define ASYNC_BOOT_AUTOCONF	(1U << ASYNCB_BOOT_AUTOCONF)
+#define ASYNC_CLOSING		(1U << ASYNCB_CLOSING)
+#define ASYNC_CTS_FLOW		(1U << ASYNCB_CTS_FLOW)
+#define ASYNC_CHECK_CD		(1U << ASYNCB_CHECK_CD)
+#define ASYNC_SHARE_IRQ		(1U << ASYNCB_SHARE_IRQ)
+#define ASYNC_CONS_FLOW		(1U << ASYNCB_CONS_FLOW)
+#define ASYNC_BOOT_ONLYMCA	(1U << ASYNCB_BOOT_ONLYMCA)
+#define ASYNC_INTERNAL_FLAGS	(~((1U << ASYNCB_FIRST_KERNEL) - 1))
 
 /*
  * Multiport serial configuration structure --- external structure
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 57a97e5..6fd80c4 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -41,7 +41,8 @@
 #define PORT_XSCALE	15
 #define PORT_RM9000	16	/* PMC-Sierra RM9xxx internal UART */
 #define PORT_OCTEON	17	/* Cavium OCTEON internal UART */
-#define PORT_MAX_8250	17	/* max port ID */
+#define PORT_AR7	18	/* Texas Instruments AR7 internal UART */
+#define PORT_MAX_8250	18	/* max port ID */
 
 /*
  * ARM specific type numbers.  These are not currently guaranteed
@@ -167,6 +168,9 @@
 /* MAX3100 */
 #define PORT_MAX3100    86
 
+/* Timberdale UART */
+#define PORT_TIMBUART	87
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 893cc53..1c297dd 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -25,8 +25,7 @@
 	unsigned int	irqs[SCIx_NR_IRQS];	/* ERI, RXI, TXI, BRI */
 	unsigned int	type;			/* SCI / SCIF / IRDA */
 	upf_t		flags;			/* UPF_* flags */
+	char		*clk;			/* clock string */
 };
 
-int early_sci_setup(struct uart_port *port);
-
 #endif /* __LINUX_SERIAL_SCI_H */
diff --git a/include/linux/sh_cmt.h b/include/linux/sh_cmt.h
deleted file mode 100644
index 68cacde..0000000
--- a/include/linux/sh_cmt.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __SH_CMT_H__
-#define __SH_CMT_H__
-
-struct sh_cmt_config {
-	char *name;
-	unsigned long channel_offset;
-	int timer_bit;
-	char *clk;
-	unsigned long clockevent_rating;
-	unsigned long clocksource_rating;
-};
-
-#endif /* __SH_CMT_H__ */
diff --git a/include/linux/sh_timer.h b/include/linux/sh_timer.h
new file mode 100644
index 0000000..864bd56
--- /dev/null
+++ b/include/linux/sh_timer.h
@@ -0,0 +1,13 @@
+#ifndef __SH_TIMER_H__
+#define __SH_TIMER_H__
+
+struct sh_timer_config {
+	char *name;
+	long channel_offset;
+	int timer_bit;
+	char *clk;
+	unsigned long clockevent_rating;
+	unsigned long clocksource_rating;
+};
+
+#endif /* __SH_TIMER_H__ */
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 84f997f..c755283 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -235,6 +235,8 @@
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
 extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
+extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
+				 siginfo_t *info);
 extern long do_sigpending(void __user *, unsigned long);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern int show_unhandled_signals;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 24c5602..219b8fb 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -62,6 +62,8 @@
 # define SLAB_DEBUG_OBJECTS	0x00000000UL
 #endif
 
+#define SLAB_NOLEAKTRACE	0x00800000UL	/* Avoid kmemleak tracing */
+
 /* The following flags affect the page allocator grouping pages by mobility */
 #define SLAB_RECLAIM_ACCOUNT	0x00020000UL		/* Objects are reclaimable */
 #define SLAB_TEMPORARY		SLAB_RECLAIM_ACCOUNT	/* Objects are short-lived */
@@ -317,4 +319,6 @@
 	return kmalloc_node(size, flags | __GFP_ZERO, node);
 }
 
+void __init kmem_cache_init_late(void);
+
 #endif	/* _LINUX_SLAB_H */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 5ac9b0b..713f841 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,7 +14,7 @@
 #include <asm/page.h>		/* kmalloc_sizes.h needs PAGE_SIZE */
 #include <asm/cache.h>		/* kmalloc_sizes.h needs L1_CACHE_BYTES */
 #include <linux/compiler.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 
 /* Size description struct for general caches. */
 struct cache_sizes {
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index 0ec00b3..bb5368d 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -34,4 +34,9 @@
 	return kmalloc(size, flags);
 }
 
+static inline void kmem_cache_init_late(void)
+{
+	/* Nothing to do */
+}
+
 #endif /* __LINUX_SLOB_DEF_H */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 5046f90..4dcbc2c 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,7 +10,7 @@
 #include <linux/gfp.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 
 enum stat_item {
 	ALLOC_FASTPATH,		/* Allocation from cpu slab */
@@ -302,4 +302,6 @@
 }
 #endif
 
+void __init kmem_cache_init_late(void);
+
 #endif /* _LINUX_SLUB_DEF_H */
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 938234c..d4841ed 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -60,6 +60,7 @@
 #define __raw_spin_is_locked(lock)	((void)(lock), 0)
 /* for sched.c and kernel_lock.c: */
 # define __raw_spin_lock(lock)		do { (void)(lock); } while (0)
+# define __raw_spin_lock_flags(lock, flags)	do { (void)(lock); } while (0)
 # define __raw_spin_unlock(lock)	do { (void)(lock); } while (0)
 # define __raw_spin_trylock(lock)	({ (void)(lock); 1; })
 #endif /* DEBUG_SPINLOCK */
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 5f3faa9..18e7c7c 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -11,8 +11,7 @@
 #include <linux/pipe_fs_i.h>
 
 /*
- * splice is tied to pipes as a transport (at least for now), so we'll just
- * add the splice flags here.
+ * Flags passed in from splice/tee/vmsplice
  */
 #define SPLICE_F_MOVE	(0x01)	/* move pages instead of copying */
 #define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 795032e..cd15df6 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -245,11 +245,6 @@
 
 extern void hibernation_set_ops(struct platform_hibernation_ops *ops);
 extern int hibernate(void);
-extern int hibernate_nvs_register(unsigned long start, unsigned long size);
-extern int hibernate_nvs_alloc(void);
-extern void hibernate_nvs_free(void);
-extern void hibernate_nvs_save(void);
-extern void hibernate_nvs_restore(void);
 extern bool system_entering_hibernation(void);
 #else /* CONFIG_HIBERNATION */
 static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
@@ -258,6 +253,16 @@
 
 static inline void hibernation_set_ops(struct platform_hibernation_ops *ops) {}
 static inline int hibernate(void) { return -ENOSYS; }
+static inline bool system_entering_hibernation(void) { return false; }
+#endif /* CONFIG_HIBERNATION */
+
+#ifdef CONFIG_HIBERNATION_NVS
+extern int hibernate_nvs_register(unsigned long start, unsigned long size);
+extern int hibernate_nvs_alloc(void);
+extern void hibernate_nvs_free(void);
+extern void hibernate_nvs_save(void);
+extern void hibernate_nvs_restore(void);
+#else /* CONFIG_HIBERNATION_NVS */
 static inline int hibernate_nvs_register(unsigned long a, unsigned long b)
 {
 	return 0;
@@ -266,8 +271,7 @@
 static inline void hibernate_nvs_free(void) {}
 static inline void hibernate_nvs_save(void) {}
 static inline void hibernate_nvs_restore(void) {}
-static inline bool system_entering_hibernation(void) { return false; }
-#endif /* CONFIG_HIBERNATION */
+#endif /* CONFIG_HIBERNATION_NVS */
 
 #ifdef CONFIG_PM_SLEEP
 void save_processor_state(void);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index ac9ff54..cb1a663 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -29,7 +29,8 @@
 
 extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev,
 				      phys_addr_t address);
-extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address);
+extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev,
+				       dma_addr_t address);
 
 extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size);
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 3052084..418d90f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -55,6 +55,7 @@
 struct robust_list_head;
 struct getcpu_cache;
 struct old_linux_dirent;
+struct perf_counter_attr;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -755,4 +756,8 @@
 
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
+
+asmlinkage long sys_perf_counter_open(
+		struct perf_counter_attr __user *attr_uptr,
+		pid_t pid, int cpu, int group_fd, unsigned long flags);
 #endif
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index e6b820f..a8cc4e1 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -21,13 +21,14 @@
 		struct {
 			unsigned long arg0, arg1, arg2, arg3;
 		};
-		/* For futex_wait */
+		/* For futex_wait and futex_wait_requeue_pi */
 		struct {
 			u32 *uaddr;
 			u32 val;
 			u32 flags;
 			u32 bitset;
 			u64 time;
+			u32 *uaddr2;
 		} futex;
 		/* For nanosleep */
 		struct {
diff --git a/include/linux/time.h b/include/linux/time.h
index 242f624..ea16c1a 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -113,6 +113,21 @@
 #define CURRENT_TIME		(current_kernel_time())
 #define CURRENT_TIME_SEC	((struct timespec) { get_seconds(), 0 })
 
+/* Some architectures do not supply their own clocksource.
+ * This is mainly the case in architectures that get their
+ * inter-tick times by reading the counter on their interval
+ * timer. Since these timers wrap every tick, they're not really
+ * useful as clocksources. Wrapping them to act like one is possible
+ * but not very efficient. So we provide a callout these arches
+ * can implement for use with the jiffies clocksource to provide
+ * finer then tick granular time.
+ */
+#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
+extern u32 arch_gettimeoffset(void);
+#else
+static inline u32 arch_gettimeoffset(void) { return 0; }
+#endif
+
 extern void do_gettimeofday(struct timeval *tv);
 extern int do_settimeofday(struct timespec *tv);
 extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz);
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
new file mode 100644
index 0000000..c68bccb
--- /dev/null
+++ b/include/linux/trace_seq.h
@@ -0,0 +1,92 @@
+#ifndef _LINUX_TRACE_SEQ_H
+#define _LINUX_TRACE_SEQ_H
+
+#include <linux/fs.h>
+
+/*
+ * Trace sequences are used to allow a function to call several other functions
+ * to create a string of data to use (up to a max of PAGE_SIZE.
+ */
+
+struct trace_seq {
+	unsigned char		buffer[PAGE_SIZE];
+	unsigned int		len;
+	unsigned int		readpos;
+};
+
+static inline void
+trace_seq_init(struct trace_seq *s)
+{
+	s->len = 0;
+	s->readpos = 0;
+}
+
+/*
+ * Currently only defined when tracing is enabled.
+ */
+#ifdef CONFIG_TRACING
+extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+	__attribute__ ((format (printf, 2, 0)));
+extern int
+trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
+extern void trace_print_seq(struct seq_file *m, struct trace_seq *s);
+extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+				 size_t cnt);
+extern int trace_seq_puts(struct trace_seq *s, const char *str);
+extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
+extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
+extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
+				size_t len);
+extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
+extern int trace_seq_path(struct trace_seq *s, struct path *path);
+
+#else /* CONFIG_TRACING */
+static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+{
+	return 0;
+}
+static inline int
+trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
+{
+	return 0;
+}
+
+static inline void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+{
+}
+static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+				 size_t cnt)
+{
+	return 0;
+}
+static inline int trace_seq_puts(struct trace_seq *s, const char *str)
+{
+	return 0;
+}
+static inline int trace_seq_putc(struct trace_seq *s, unsigned char c)
+{
+	return 0;
+}
+static inline int
+trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
+{
+	return 0;
+}
+static inline int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
+				       size_t len)
+{
+	return 0;
+}
+static inline void *trace_seq_reserve(struct trace_seq *s, size_t len)
+{
+	return NULL;
+}
+static inline int trace_seq_path(struct trace_seq *s, struct path *path)
+{
+	return 0;
+}
+#endif /* CONFIG_TRACING */
+
+#endif /* _LINUX_TRACE_SEQ_H */
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index c7aa154..eb96603 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -259,14 +259,12 @@
 
 /**
  * tracehook_report_clone - in parent, new child is about to start running
- * @trace:		return value from tracehook_prepare_clone()
  * @regs:		parent's user register state
  * @clone_flags:	flags from parent's system call
  * @pid:		new child's PID in the parent's namespace
  * @child:		new child task
  *
- * Called after a child is set up, but before it has been started
- * running.  @trace is the value returned by tracehook_prepare_clone().
+ * Called after a child is set up, but before it has been started running.
  * This is not a good place to block, because the child has not started
  * yet.  Suspend the child here if desired, and then block in
  * tracehook_report_clone_complete().  This must prevent the child from
@@ -276,13 +274,14 @@
  *
  * Called with no locks held, but the child cannot run until this returns.
  */
-static inline void tracehook_report_clone(int trace, struct pt_regs *regs,
+static inline void tracehook_report_clone(struct pt_regs *regs,
 					  unsigned long clone_flags,
 					  pid_t pid, struct task_struct *child)
 {
-	if (unlikely(trace) || unlikely(clone_flags & CLONE_PTRACE)) {
+	if (unlikely(task_ptrace(child))) {
 		/*
-		 * The child starts up with an immediate SIGSTOP.
+		 * It doesn't matter who attached/attaching to this
+		 * task, the pending SIGSTOP is right in any case.
 		 */
 		sigaddset(&child->pending.signal, SIGSTOP);
 		set_tsk_thread_flag(child, TIF_SIGPENDING);
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index d35a7ee..14df7e6 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -31,6 +31,8 @@
 					 * Keep in sync with vmlinux.lds.h.
 					 */
 
+#ifndef DECLARE_TRACE
+
 #define TP_PROTO(args...)	args
 #define TP_ARGS(args...)		args
 
@@ -114,6 +116,7 @@
 	struct tracepoint *end)
 { }
 #endif /* CONFIG_TRACEPOINTS */
+#endif /* DECLARE_TRACE */
 
 /*
  * Connect a probe to a tracepoint.
@@ -154,10 +157,8 @@
 }
 
 #define PARAMS(args...) args
-#define TRACE_FORMAT(name, proto, args, fmt)		\
-	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 
-
+#ifndef TRACE_EVENT
 /*
  * For use with the TRACE_EVENT macro:
  *
@@ -262,5 +263,6 @@
 
 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#endif
 
 #endif
diff --git a/include/linux/tty.h b/include/linux/tty.h
index fc39db95..1488d8c 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -185,7 +185,7 @@
 struct tty_port_operations {
 	/* Return 1 if the carrier is raised */
 	int (*carrier_raised)(struct tty_port *port);
-	void (*raise_dtr_rts)(struct tty_port *port);
+	void (*dtr_rts)(struct tty_port *port, int raise);
 };
 	
 struct tty_port {
@@ -201,6 +201,9 @@
 	unsigned char		*xmit_buf;	/* Optional buffer */
 	int			close_delay;	/* Close port delay */
 	int			closing_wait;	/* Delay for output */
+	int			drain_delay;	/* Set to zero if no pure time
+						   based drain is needed else
+						   set to size of fifo */
 };
 
 /*
@@ -223,8 +226,11 @@
 	struct tty_driver *driver;
 	const struct tty_operations *ops;
 	int index;
-	/* The ldisc objects are protected by tty_ldisc_lock at the moment */
-	struct tty_ldisc ldisc;
+
+	/* Protects ldisc changes: Lock tty not pty */
+	struct mutex ldisc_mutex;
+	struct tty_ldisc *ldisc;
+
 	struct mutex termios_mutex;
 	spinlock_t ctrl_lock;
 	/* Termios values are protected by the termios mutex */
@@ -311,6 +317,7 @@
 #define TTY_CLOSING 		7	/* ->close() in progress */
 #define TTY_LDISC 		9	/* Line discipline attached */
 #define TTY_LDISC_CHANGING 	10	/* Line discipline changing */
+#define TTY_LDISC_OPEN	 	11	/* Line discipline is open */
 #define TTY_HW_COOK_OUT 	14	/* Hardware can do output cooking */
 #define TTY_HW_COOK_IN 		15	/* Hardware can do input cooking */
 #define TTY_PTY_LOCK 		16	/* pty private */
@@ -403,6 +410,7 @@
 extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *);
 extern void tty_ldisc_deref(struct tty_ldisc *);
 extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *);
+extern void tty_ldisc_hangup(struct tty_struct *tty);
 extern const struct file_operations tty_ldiscs_proc_fops;
 
 extern void tty_wakeup(struct tty_struct *tty);
@@ -425,6 +433,9 @@
 extern void tty_release_dev(struct file *filp);
 extern int tty_init_termios(struct tty_struct *tty);
 
+extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty);
+extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
+
 extern struct mutex tty_mutex;
 
 extern void tty_write_unlock(struct tty_struct *tty);
@@ -438,6 +449,7 @@
 extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty);
 extern int tty_port_carrier_raised(struct tty_port *port);
 extern void tty_port_raise_dtr_rts(struct tty_port *port);
+extern void tty_port_lower_dtr_rts(struct tty_port *port);
 extern void tty_port_hangup(struct tty_port *port);
 extern int tty_port_block_til_ready(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index bcba84e..3566129 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -127,7 +127,8 @@
  * 	the line discipline are close to full, and it should somehow
  * 	signal that no more characters should be sent to the tty.
  *
- *	Optional: Always invoke via tty_throttle();
+ *	Optional: Always invoke via tty_throttle(), called under the
+ *	termios lock.
  * 
  * void (*unthrottle)(struct tty_struct * tty);
  *
@@ -135,7 +136,8 @@
  * 	that characters can now be sent to the tty without fear of
  * 	overrunning the input buffers of the line disciplines.
  * 
- *	Optional: Always invoke via tty_unthrottle();
+ *	Optional: Always invoke via tty_unthrottle(), called under the
+ *	termios lock.
  *
  * void (*stop)(struct tty_struct *tty);
  *
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 625e9e4..8cdfed7 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -224,8 +224,7 @@
 	/* Called by console with tty = NULL and by tty */
 	int  (*open)(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-	void (*close)(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+	void (*close)(struct usb_serial_port *port);
 	int  (*write)(struct tty_struct *tty, struct usb_serial_port *port,
 			const unsigned char *buf, int count);
 	/* Called only by the tty layer */
@@ -241,6 +240,10 @@
 	int  (*tiocmget)(struct tty_struct *tty, struct file *file);
 	int  (*tiocmset)(struct tty_struct *tty, struct file *file,
 			 unsigned int set, unsigned int clear);
+	/* Called by the tty layer for port level work. There may or may not
+	   be an attached tty at this point */
+	void (*dtr_rts)(struct usb_serial_port *port, int on);
+	int  (*carrier_raised)(struct usb_serial_port *port);
 	/* USB events */
 	void (*read_int_callback)(struct urb *urb);
 	void (*write_int_callback)(struct urb *urb);
@@ -283,8 +286,7 @@
 		struct usb_serial_port *port, struct file *filp);
 extern int usb_serial_generic_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
-extern void usb_serial_generic_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+extern void usb_serial_generic_close(struct usb_serial_port *port);
 extern int usb_serial_generic_resume(struct usb_serial *serial);
 extern int usb_serial_generic_write_room(struct tty_struct *tty);
 extern int usb_serial_generic_chars_in_buffer(struct tty_struct *tty);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 06005fa..4fca4f5 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -10,14 +10,17 @@
 
 /**
  * virtqueue - a queue to register buffers for sending or receiving.
+ * @list: the chain of virtqueues for this device
  * @callback: the function to call when buffers are consumed (can be NULL).
+ * @name: the name of this virtqueue (mainly for debugging)
  * @vdev: the virtio device this queue was created for.
  * @vq_ops: the operations for this virtqueue (see below).
  * @priv: a pointer for the virtqueue implementation to use.
  */
-struct virtqueue
-{
+struct virtqueue {
+	struct list_head list;
 	void (*callback)(struct virtqueue *vq);
+	const char *name;
 	struct virtio_device *vdev;
 	struct virtqueue_ops *vq_ops;
 	void *priv;
@@ -76,15 +79,16 @@
  * @dev: underlying device.
  * @id: the device type identification (used to match it with a driver).
  * @config: the configuration ops for this device.
+ * @vqs: the list of virtqueues for this device.
  * @features: the features supported by both driver and device.
  * @priv: private pointer for the driver's use.
  */
-struct virtio_device
-{
+struct virtio_device {
 	int index;
 	struct device dev;
 	struct virtio_device_id id;
 	struct virtio_config_ops *config;
+	struct list_head vqs;
 	/* Note that this is a Linux set_bit-style bitmap. */
 	unsigned long features[1];
 	void *priv;
@@ -99,8 +103,7 @@
  * @id_table: the ids serviced by this driver.
  * @feature_table: an array of feature numbers supported by this device.
  * @feature_table_size: number of entries in the feature table array.
- * @probe: the function to call when a device is found.  Returns a token for
- *    remove, or PTR_ERR().
+ * @probe: the function to call when a device is found.  Returns 0 or -errno.
  * @remove: the function when a device is removed.
  * @config_changed: optional function to call when the device configuration
  *    changes; may be called in interrupt context.
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 94c56d2..be7d255 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -15,6 +15,10 @@
 #define VIRTIO_BLK_F_GEOMETRY	4	/* Legacy geometry available  */
 #define VIRTIO_BLK_F_RO		5	/* Disk is read-only */
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
+#define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
+#define VIRTIO_BLK_F_IDENTIFY	8	/* ATA IDENTIFY supported */
+
+#define VIRTIO_BLK_ID_BYTES	(sizeof(__u16[256]))	/* IDENTIFY DATA */
 
 struct virtio_blk_config
 {
@@ -32,6 +36,7 @@
 	} geometry;
 	/* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
 	__u32 blk_size;
+	__u8 identify[VIRTIO_BLK_ID_BYTES];
 } __attribute__((packed));
 
 /* These two define direction. */
@@ -55,6 +60,13 @@
 	__u64 sector;
 };
 
+struct virtio_scsi_inhdr {
+	__u32 errors;
+	__u32 data_len;
+	__u32 sense_len;
+	__u32 residual;
+};
+
 /* And this is the final byte of the write scatter-gather list. */
 #define VIRTIO_BLK_S_OK		0
 #define VIRTIO_BLK_S_IOERR	1
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index bf8ec28..99f5145 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -29,6 +29,7 @@
 #define VIRTIO_F_NOTIFY_ON_EMPTY	24
 
 #ifdef __KERNEL__
+#include <linux/err.h>
 #include <linux/virtio.h>
 
 /**
@@ -49,15 +50,26 @@
  * @set_status: write the status byte
  *	vdev: the virtio_device
  *	status: the new status byte
+ * @request_vqs: request the specified number of virtqueues
+ *	vdev: the virtio_device
+ *	max_vqs: the max number of virtqueues we want
+ *      If supplied, must call before any virtqueues are instantiated.
+ *      To modify the max number of virtqueues after request_vqs has been
+ *      called, call free_vqs and then request_vqs with a new value.
+ * @free_vqs: cleanup resources allocated by request_vqs
+ *	vdev: the virtio_device
+ *      If supplied, must call after all virtqueues have been deleted.
  * @reset: reset the device
  *	vdev: the virtio device
  *	After this, status and feature negotiation must be done again
- * @find_vq: find a virtqueue and instantiate it.
+ * @find_vqs: find virtqueues and instantiate them.
  *	vdev: the virtio_device
- *	index: the 0-based virtqueue number in case there's more than one.
- *	callback: the virqtueue callback
- *	Returns the new virtqueue or ERR_PTR() (eg. -ENOENT).
- * @del_vq: free a virtqueue found by find_vq().
+ *	nvqs: the number of virtqueues to find
+ *	vqs: on success, includes new virtqueues
+ *	callbacks: array of callbacks, for each virtqueue
+ *	names: array of virtqueue names (mainly for debugging)
+ *	Returns 0 on success or error status
+ * @del_vqs: free virtqueues found by find_vqs().
  * @get_features: get the array of feature bits for this device.
  *	vdev: the virtio_device
  *	Returns the first 32 feature bits (all we currently need).
@@ -66,6 +78,7 @@
  *	This gives the final feature bits for the device: it can change
  *	the dev->feature bits if it wants.
  */
+typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops
 {
 	void (*get)(struct virtio_device *vdev, unsigned offset,
@@ -75,10 +88,11 @@
 	u8 (*get_status)(struct virtio_device *vdev);
 	void (*set_status)(struct virtio_device *vdev, u8 status);
 	void (*reset)(struct virtio_device *vdev);
-	struct virtqueue *(*find_vq)(struct virtio_device *vdev,
-				     unsigned index,
-				     void (*callback)(struct virtqueue *));
-	void (*del_vq)(struct virtqueue *vq);
+	int (*find_vqs)(struct virtio_device *, unsigned nvqs,
+			struct virtqueue *vqs[],
+			vq_callback_t *callbacks[],
+			const char *names[]);
+	void (*del_vqs)(struct virtio_device *);
 	u32 (*get_features)(struct virtio_device *vdev);
 	void (*finalize_features)(struct virtio_device *vdev);
 };
@@ -99,7 +113,9 @@
 	if (__builtin_constant_p(fbit))
 		BUILD_BUG_ON(fbit >= 32);
 
-	virtio_check_driver_offered_feature(vdev, fbit);
+	if (fbit < VIRTIO_TRANSPORT_F_START)
+		virtio_check_driver_offered_feature(vdev, fbit);
+
 	return test_bit(fbit, vdev->features);
 }
 
@@ -126,5 +142,18 @@
 	vdev->config->get(vdev, offset, buf, len);
 	return 0;
 }
+
+static inline
+struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
+					vq_callback_t *c, const char *n)
+{
+	vq_callback_t *callbacks[] = { c };
+	const char *names[] = { n };
+	struct virtqueue *vq;
+	int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names);
+	if (err < 0)
+		return ERR_PTR(err);
+	return vq;
+}
 #endif /* __KERNEL__ */
 #endif /* _LINUX_VIRTIO_CONFIG_H */
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index cd0fd5d..9a3d7c4 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -47,9 +47,17 @@
 /* The bit of the ISR which indicates a device configuration change. */
 #define VIRTIO_PCI_ISR_CONFIG		0x2
 
+/* MSI-X registers: only enabled if MSI-X is enabled. */
+/* A 16-bit vector for configuration changes. */
+#define VIRTIO_MSI_CONFIG_VECTOR        20
+/* A 16-bit vector for selected queue notifications. */
+#define VIRTIO_MSI_QUEUE_VECTOR         22
+/* Vector value used to disable MSI for queue */
+#define VIRTIO_MSI_NO_VECTOR            0xffff
+
 /* The remaining space is defined by each driver as the per-driver
  * configuration space */
-#define VIRTIO_PCI_CONFIG		20
+#define VIRTIO_PCI_CONFIG(dev)		((dev)->msix_enabled ? 24 : 20)
 
 /* Virtio ABI version, this must match exactly */
 #define VIRTIO_PCI_ABI_VERSION		0
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index 71e0372..693e0ec 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -14,6 +14,8 @@
 #define VRING_DESC_F_NEXT	1
 /* This marks a buffer as write-only (otherwise read-only). */
 #define VRING_DESC_F_WRITE	2
+/* This means the buffer contains a list of buffer descriptors. */
+#define VRING_DESC_F_INDIRECT	4
 
 /* The Host uses this in used->flags to advise the Guest: don't kick me when
  * you add a buffer.  It's unreliable, so it's simply an optimization.  Guest
@@ -24,6 +26,9 @@
  * optimization.  */
 #define VRING_AVAIL_F_NO_INTERRUPT	1
 
+/* We support indirect buffer descriptors */
+#define VIRTIO_RING_F_INDIRECT_DESC	28
+
 /* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
 struct vring_desc
 {
@@ -119,7 +124,8 @@
 				      struct virtio_device *vdev,
 				      void *pages,
 				      void (*notify)(struct virtqueue *vq),
-				      void (*callback)(struct virtqueue *vq));
+				      void (*callback)(struct virtqueue *vq),
+				      const char *name);
 void vring_del_virtqueue(struct virtqueue *vq);
 /* Filter out transport-specific feature bits. */
 void vring_transport_features(struct virtio_device *vdev);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index bc02463..6788e1a 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -132,8 +132,6 @@
 	list_del(&old->task_list);
 }
 
-void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
-			int nr_exclusive, int sync, void *key);
 void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 9344547..3224820 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -79,7 +79,6 @@
 void writeback_inodes(struct writeback_control *wbc);
 int inode_wait(void *);
 void sync_inodes_sb(struct super_block *, int wait);
-void sync_inodes(int wait);
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
diff --git a/include/scsi/fc/fc_fip.h b/include/scsi/fc/fc_fip.h
index 0627a9a..3d138c1f 100644
--- a/include/scsi/fc/fc_fip.h
+++ b/include/scsi/fc/fc_fip.h
@@ -22,13 +22,6 @@
  * http://www.t11.org/ftp/t11/pub/fc/bb-5/08-543v1.pdf
  */
 
-/*
- * The FIP ethertype eventually goes in net/if_ether.h.
- */
-#ifndef ETH_P_FIP
-#define ETH_P_FIP	0x8914	/* FIP Ethertype */
-#endif
-
 #define FIP_DEF_PRI	128	/* default selection priority */
 #define FIP_DEF_FC_MAP	0x0efc00 /* default FCoE MAP (MAC OUI) value */
 #define FIP_DEF_FKA	8000	/* default FCF keep-alive/advert period (mS) */
diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h
index d0ed522..4426f00 100644
--- a/include/scsi/iscsi_if.h
+++ b/include/scsi/iscsi_if.h
@@ -22,6 +22,11 @@
 #define ISCSI_IF_H
 
 #include <scsi/iscsi_proto.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+
+#define ISCSI_NL_GRP_ISCSID	1
+#define ISCSI_NL_GRP_UIP	2
 
 #define UEVENT_BASE			10
 #define KEVENT_BASE			100
@@ -50,7 +55,10 @@
 	ISCSI_UEVENT_TGT_DSCVR		= UEVENT_BASE + 15,
 	ISCSI_UEVENT_SET_HOST_PARAM	= UEVENT_BASE + 16,
 	ISCSI_UEVENT_UNBIND_SESSION	= UEVENT_BASE + 17,
-	ISCSI_UEVENT_CREATE_BOUND_SESSION	= UEVENT_BASE + 18,
+	ISCSI_UEVENT_CREATE_BOUND_SESSION		= UEVENT_BASE + 18,
+	ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST	= UEVENT_BASE + 19,
+
+	ISCSI_UEVENT_PATH_UPDATE	= UEVENT_BASE + 20,
 
 	/* up events */
 	ISCSI_KEVENT_RECV_PDU		= KEVENT_BASE + 1,
@@ -59,6 +67,9 @@
 	ISCSI_KEVENT_DESTROY_SESSION	= KEVENT_BASE + 4,
 	ISCSI_KEVENT_UNBIND_SESSION	= KEVENT_BASE + 5,
 	ISCSI_KEVENT_CREATE_SESSION	= KEVENT_BASE + 6,
+
+	ISCSI_KEVENT_PATH_REQ		= KEVENT_BASE + 7,
+	ISCSI_KEVENT_IF_DOWN		= KEVENT_BASE + 8,
 };
 
 enum iscsi_tgt_dscvr {
@@ -131,6 +142,10 @@
 		struct msg_transport_connect {
 			uint32_t	non_blocking;
 		} ep_connect;
+		struct msg_transport_connect_through_host {
+			uint32_t	host_no;
+			uint32_t	non_blocking;
+		} ep_connect_through_host;
 		struct msg_transport_poll {
 			uint64_t	ep_handle;
 			uint32_t	timeout_ms;
@@ -154,6 +169,9 @@
 			uint32_t	param; /* enum iscsi_host_param */
 			uint32_t	len;
 		} set_host_param;
+		struct msg_set_path {
+			uint32_t	host_no;
+		} set_path;
 	} u;
 	union {
 		/* messages k -> u */
@@ -187,10 +205,39 @@
 		struct msg_transport_connect_ret {
 			uint64_t	handle;
 		} ep_connect_ret;
+		struct msg_req_path {
+			uint32_t	host_no;
+		} req_path;
+		struct msg_notify_if_down {
+			uint32_t	host_no;
+		} notify_if_down;
 	} r;
 } __attribute__ ((aligned (sizeof(uint64_t))));
 
 /*
+ * To keep the struct iscsi_uevent size the same for userspace code
+ * compatibility, the main structure for ISCSI_UEVENT_PATH_UPDATE and
+ * ISCSI_KEVENT_PATH_REQ is defined separately and comes after the
+ * struct iscsi_uevent in the NETLINK_ISCSI message.
+ */
+struct iscsi_path {
+	uint64_t	handle;
+	uint8_t		mac_addr[6];
+	uint8_t		mac_addr_old[6];
+	uint32_t	ip_addr_len;	/* 4 or 16 */
+	union {
+		struct in_addr	v4_addr;
+		struct in6_addr	v6_addr;
+	} src;
+	union {
+		struct in_addr	v4_addr;
+		struct in6_addr	v6_addr;
+	} dst;
+	uint16_t	vlan_id;
+	uint16_t	pmtu;
+} __attribute__ ((aligned (sizeof(uint64_t))));
+
+/*
  * Common error codes
  */
 enum iscsi_err {
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 45f9cc6..ebdd9f4 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -679,6 +679,7 @@
 	unsigned int		e_d_tov;
 	unsigned int		r_a_tov;
 	u8			max_retry_count;
+	u8			max_rport_retry_count;
 	u16			link_speed;
 	u16			link_supported_speeds;
 	u16			lro_xid;	/* max xid for fcoe lro */
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 0289f57..196525c 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -82,9 +82,12 @@
 
 
 enum {
+	ISCSI_TASK_FREE,
 	ISCSI_TASK_COMPLETED,
 	ISCSI_TASK_PENDING,
 	ISCSI_TASK_RUNNING,
+	ISCSI_TASK_ABRT_TMF,		/* aborted due to TMF */
+	ISCSI_TASK_ABRT_SESS_RECOV,	/* aborted due to session recovery */
 };
 
 struct iscsi_r2t_info {
@@ -181,9 +184,7 @@
 
 	/* xmit */
 	struct list_head	mgmtqueue;	/* mgmt (control) xmit queue */
-	struct list_head	mgmt_run_list;	/* list of control tasks */
-	struct list_head	xmitqueue;	/* data-path cmd queue */
-	struct list_head	run_list;	/* list of cmds in progress */
+	struct list_head	cmdqueue;	/* data-path cmd queue */
 	struct list_head	requeue;	/* tasks needing another run */
 	struct work_struct	xmitwork;	/* per-conn. xmit workqueue */
 	unsigned long		suspend_tx;	/* suspend Tx */
@@ -406,6 +407,7 @@
 				char *, int);
 extern int iscsi_verify_itt(struct iscsi_conn *, itt_t);
 extern struct iscsi_task *iscsi_itt_to_ctask(struct iscsi_conn *, itt_t);
+extern struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *, itt_t);
 extern void iscsi_requeue_task(struct iscsi_task *task);
 extern void iscsi_put_task(struct iscsi_task *task);
 extern void __iscsi_get_task(struct iscsi_task *task);
diff --git a/include/scsi/osd_attributes.h b/include/scsi/osd_attributes.h
index f888a6f..56e920a 100644
--- a/include/scsi/osd_attributes.h
+++ b/include/scsi/osd_attributes.h
@@ -29,6 +29,7 @@
 	OSD_APAGE_PARTITION_INFORMATION = OSD_APAGE_PARTITION_FIRST + 1,
 	OSD_APAGE_PARTITION_QUOTAS	= OSD_APAGE_PARTITION_FIRST + 2,
 	OSD_APAGE_PARTITION_TIMESTAMP	= OSD_APAGE_PARTITION_FIRST + 3,
+	OSD_APAGE_PARTITION_ATTR_ACCESS = OSD_APAGE_PARTITION_FIRST + 4,
 	OSD_APAGE_PARTITION_SECURITY	= OSD_APAGE_PARTITION_FIRST + 5,
 	OSD_APAGE_PARTITION_LAST	= 0x5FFFFFFF,
 
@@ -51,7 +52,9 @@
 	OSD_APAGE_RESERVED_TYPE_LAST	= 0xEFFFFFFF,
 
 	OSD_APAGE_COMMON_FIRST		= 0xF0000000,
-	OSD_APAGE_COMMON_LAST		= 0xFFFFFFFE,
+	OSD_APAGE_COMMON_LAST		= 0xFFFFFFFD,
+
+	OSD_APAGE_CURRENT_COMMAND	= 0xFFFFFFFE,
 
 	OSD_APAGE_REQUEST_ALL		= 0xFFFFFFFF,
 };
@@ -106,10 +109,30 @@
 	OSD_ATTR_RI_PRODUCT_REVISION_LEVEL   = 0x7,   /* 4        */
 	OSD_ATTR_RI_PRODUCT_SERIAL_NUMBER    = 0x8,   /* variable */
 	OSD_ATTR_RI_OSD_NAME                 = 0x9,   /* variable */
+	OSD_ATTR_RI_MAX_CDB_CONTINUATION_LEN = 0xA,   /* 4        */
 	OSD_ATTR_RI_TOTAL_CAPACITY           = 0x80,  /* 8        */
 	OSD_ATTR_RI_USED_CAPACITY            = 0x81,  /* 8        */
 	OSD_ATTR_RI_NUMBER_OF_PARTITIONS     = 0xC0,  /* 8        */
 	OSD_ATTR_RI_CLOCK                    = 0x100, /* 6        */
+	OARI_DEFAULT_ISOLATION_METHOD        = 0X110, /* 1        */
+	OARI_SUPPORTED_ISOLATION_METHODS     = 0X111, /* 32       */
+
+	OARI_DATA_ATOMICITY_GUARANTEE                   = 0X120,   /* 8       */
+	OARI_DATA_ATOMICITY_ALIGNMENT                   = 0X121,   /* 8       */
+	OARI_ATTRIBUTES_ATOMICITY_GUARANTEE             = 0X122,   /* 8       */
+	OARI_DATA_ATTRIBUTES_ATOMICITY_MULTIPLIER       = 0X123,   /* 1       */
+
+	OARI_MAXIMUM_SNAPSHOTS_COUNT                    = 0X1C1,    /* 0 or 4 */
+	OARI_MAXIMUM_CLONES_COUNT                       = 0X1C2,    /* 0 or 4 */
+	OARI_MAXIMUM_BRANCH_DEPTH                       = 0X1CC,    /* 0 or 4 */
+	OARI_SUPPORTED_OBJECT_DUPLICATION_METHOD_FIRST  = 0X200,    /* 0 or 4 */
+	OARI_SUPPORTED_OBJECT_DUPLICATION_METHOD_LAST   = 0X2ff,    /* 0 or 4 */
+	OARI_SUPPORTED_TIME_OF_DUPLICATION_METHOD_FIRST = 0X300,    /* 0 or 4 */
+	OARI_SUPPORTED_TIME_OF_DUPLICATION_METHOD_LAST  = 0X30F,    /* 0 or 4 */
+	OARI_SUPPORT_FOR_DUPLICATED_OBJECT_FREEZING     = 0X310,    /* 0 or 4 */
+	OARI_SUPPORT_FOR_SNAPSHOT_REFRESHING            = 0X311,    /* 0 or 1 */
+	OARI_SUPPORTED_CDB_CONTINUATION_DESC_TYPE_FIRST = 0X7000001,/* 0 or 4 */
+	OARI_SUPPORTED_CDB_CONTINUATION_DESC_TYPE_LAST  = 0X700FFFF,/* 0 or 4 */
 };
 /* Root_Information_attributes_page does not have a get_page structure */
 
@@ -120,7 +143,15 @@
 	OSD_ATTR_PI_PARTITION_ID            = 0x1,     /* 8        */
 	OSD_ATTR_PI_USERNAME                = 0x9,     /* variable */
 	OSD_ATTR_PI_USED_CAPACITY           = 0x81,    /* 8        */
+	OSD_ATTR_PI_USED_CAPACITY_INCREMENT = 0x84,    /* 0 or 8   */
 	OSD_ATTR_PI_NUMBER_OF_OBJECTS       = 0xC1,    /* 8        */
+
+	OSD_ATTR_PI_ACTUAL_DATA_SPACE                      = 0xD1, /* 0 or 8 */
+	OSD_ATTR_PI_RESERVED_DATA_SPACE                    = 0xD2, /* 0 or 8 */
+	OSD_ATTR_PI_DEFAULT_SNAPSHOT_DUPLICATION_METHOD    = 0x200,/* 0 or 4 */
+	OSD_ATTR_PI_DEFAULT_CLONE_DUPLICATION_METHOD       = 0x201,/* 0 or 4 */
+	OSD_ATTR_PI_DEFAULT_SP_TIME_OF_DUPLICATION         = 0x300,/* 0 or 4 */
+	OSD_ATTR_PI_DEFAULT_CLONE_TIME_OF_DUPLICATION      = 0x301,/* 0 or 4 */
 };
 /* Partition Information attributes page does not have a get_page structure */
 
@@ -131,6 +162,7 @@
 	OSD_ATTR_CI_PARTITION_ID           = 0x1,       /* 8        */
 	OSD_ATTR_CI_COLLECTION_OBJECT_ID   = 0x2,       /* 8        */
 	OSD_ATTR_CI_USERNAME               = 0x9,       /* variable */
+	OSD_ATTR_CI_COLLECTION_TYPE        = 0xA,       /* 1        */
 	OSD_ATTR_CI_USED_CAPACITY          = 0x81,      /* 8        */
 };
 /* Collection Information attributes page does not have a get_page structure */
@@ -144,6 +176,8 @@
 	OSD_ATTR_OI_USERNAME             = 0x9,       /* variable */
 	OSD_ATTR_OI_USED_CAPACITY        = 0x81,      /* 8        */
 	OSD_ATTR_OI_LOGICAL_LENGTH       = 0x82,      /* 8        */
+	SD_ATTR_OI_ACTUAL_DATA_SPACE     = 0XD1,      /* 0 OR 8   */
+	SD_ATTR_OI_RESERVED_DATA_SPACE   = 0XD2,      /* 0 OR 8   */
 };
 /* Object Information attributes page does not have a get_page structure */
 
@@ -248,7 +282,18 @@
 	struct osd_timestamp data_modified_time;
 }  __packed;
 
-/* 7.1.2.19 Collections attributes page */
+/* OSD2r05: 7.1.3.19 Attributes Access attributes page
+ * (OSD_APAGE_PARTITION_ATTR_ACCESS)
+ *
+ * each attribute is of the form below. Total array length is deduced
+ * from the attribute's length
+ * (See allowed_attributes_access of the struct osd_cap_object_descriptor)
+ */
+struct attributes_access_attr {
+	struct osd_attributes_list_attrid attr_list[0];
+} __packed;
+
+/* OSD2r05: 7.1.2.21 Collections attributes page */
 /* TBD */
 
 /* 7.1.2.20 Root Policy/Security attributes page (OSD_APAGE_ROOT_SECURITY) */
@@ -324,4 +369,29 @@
 	__be32 policy_access_tag;
 }  __packed;
 
+/* OSD2r05: 7.1.3.31 Current Command attributes page
+ * (OSD_APAGE_CURRENT_COMMAND)
+ */
+enum {
+	OSD_ATTR_CC_RESPONSE_INTEGRITY_CHECK_VALUE     = 0x1, /* 32  */
+	OSD_ATTR_CC_OBJECT_TYPE                        = 0x2, /* 1   */
+	OSD_ATTR_CC_PARTITION_ID                       = 0x3, /* 8   */
+	OSD_ATTR_CC_OBJECT_ID                          = 0x4, /* 8   */
+	OSD_ATTR_CC_STARTING_BYTE_ADDRESS_OF_APPEND    = 0x5, /* 8   */
+	OSD_ATTR_CC_CHANGE_IN_USED_CAPACITY            = 0x6, /* 8   */
+};
+
+/*TBD: osdv1_current_command_attributes_page */
+
+struct osdv2_current_command_attributes_page {
+	struct osd_attr_page_header hdr;  /* id=0xFFFFFFFE, size=0x44 */
+	u8 response_integrity_check_value[OSD_CRYPTO_KEYID_SIZE];
+	u8 object_type;
+	u8 reserved[3];
+	__be64 partition_id;
+	__be64 object_id;
+	__be64 starting_byte_address_of_append;
+	__be64 change_in_used_capacity;
+};
+
 #endif /*ndef __OSD_ATTRIBUTES_H__*/
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index b24d961..02bd9f7 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -18,6 +18,7 @@
 #include "osd_types.h"
 
 #include <linux/blkdev.h>
+#include <scsi/scsi_device.h>
 
 /* Note: "NI" in comments below means "Not Implemented yet" */
 
@@ -47,6 +48,7 @@
  */
 struct osd_dev {
 	struct scsi_device *scsi_device;
+	struct file *file;
 	unsigned def_timeout;
 
 #ifdef OSD_VER1_SUPPORT
@@ -69,6 +71,10 @@
 
 /* some hi level device operations */
 int osd_auto_detect_ver(struct osd_dev *od, void *caps);    /* GFP_KERNEL */
+static inline struct request_queue *osd_request_queue(struct osd_dev *od)
+{
+	return od->scsi_device->request_queue;
+}
 
 /* we might want to use function vector in the future */
 static inline void osd_dev_set_ver(struct osd_dev *od, enum osd_std_version v)
@@ -363,7 +369,9 @@
 void osd_req_remove_object(struct osd_request *or, struct osd_obj_id *);
 
 void osd_req_write(struct osd_request *or,
-	const struct osd_obj_id *, struct bio *data_out, u64 offset);
+	const struct osd_obj_id *obj, u64 offset, struct bio *bio, u64 len);
+int osd_req_write_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
 void osd_req_append(struct osd_request *or,
 	const struct osd_obj_id *, struct bio *data_out);/* NI */
 void osd_req_create_write(struct osd_request *or,
@@ -378,7 +386,9 @@
 	/*V2*/ u64 offset, /*V2*/ u64 len);
 
 void osd_req_read(struct osd_request *or,
-	const struct osd_obj_id *, struct bio *data_in, u64 offset);
+	const struct osd_obj_id *obj, u64 offset, struct bio *bio, u64 len);
+int osd_req_read_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
 
 /*
  * Root/Partition/Collection/Object Attributes commands
diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h
index 62b2ab8..2cc8e8b 100644
--- a/include/scsi/osd_protocol.h
+++ b/include/scsi/osd_protocol.h
@@ -303,7 +303,15 @@
 	OSD_ACT_V2(REMOVE_MEMBER_OBJECTS,	0x21)
 	OSD_ACT_V2(GET_MEMBER_ATTRIBUTES,	0x22)
 	OSD_ACT_V2(SET_MEMBER_ATTRIBUTES,	0x23)
+
+	OSD_ACT_V2(CREATE_CLONE,		0x28)
+	OSD_ACT_V2(CREATE_SNAPSHOT,		0x29)
+	OSD_ACT_V2(DETACH_CLONE,		0x2A)
+	OSD_ACT_V2(REFRESH_SNAPSHOT_CLONE,	0x2B)
+	OSD_ACT_V2(RESTORE_PARTITION_FROM_SNAPSHOT, 0x2C)
+
 	OSD_ACT_V2(READ_MAP,			0x31)
+	OSD_ACT_V2(READ_MAPS_COMPARE,		0x32)
 
 	OSD_ACT_V1_V2(PERFORM_SCSI_COMMAND,	0x8F7E, 0x8F7C)
 	OSD_ACT_V1_V2(SCSI_TASK_MANAGEMENT,	0x8F7F, 0x8F7D)
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 43b50d3..3878d1d 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -270,7 +270,7 @@
 
 static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd)
 {
-	return scmd->request->sector;
+	return blk_rq_pos(scmd->request);
 }
 
 static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd)
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 457588e..349c7f3 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -126,12 +126,14 @@
 			       int *index, int *age);
 
 	void (*session_recovery_timedout) (struct iscsi_cls_session *session);
-	struct iscsi_endpoint *(*ep_connect) (struct sockaddr *dst_addr,
+	struct iscsi_endpoint *(*ep_connect) (struct Scsi_Host *shost,
+					      struct sockaddr *dst_addr,
 					      int non_blocking);
 	int (*ep_poll) (struct iscsi_endpoint *ep, int timeout_ms);
 	void (*ep_disconnect) (struct iscsi_endpoint *ep);
 	int (*tgt_dscvr) (struct Scsi_Host *shost, enum iscsi_tgt_dscvr type,
 			  uint32_t enable, struct sockaddr *dst_addr);
+	int (*set_path) (struct Scsi_Host *shost, struct iscsi_path *params);
 };
 
 /*
@@ -148,6 +150,10 @@
 extern int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
 			  char *data, uint32_t data_size);
 
+extern int iscsi_offload_mesg(struct Scsi_Host *shost,
+			      struct iscsi_transport *transport, uint32_t type,
+			      char *data, uint16_t data_size);
+
 struct iscsi_cls_conn {
 	struct list_head conn_list;	/* item in connlist */
 	void *dd_data;			/* LLD private data */
diff --git a/include/sound/asound.h b/include/sound/asound.h
index 6add80f..82aed3f 100644
--- a/include/sound/asound.h
+++ b/include/sound/asound.h
@@ -255,6 +255,7 @@
 #define SNDRV_PCM_INFO_HALF_DUPLEX	0x00100000	/* only half duplex */
 #define SNDRV_PCM_INFO_JOINT_DUPLEX	0x00200000	/* playback and capture stream are somewhat correlated */
 #define SNDRV_PCM_INFO_SYNC_START	0x00400000	/* pcm support some kind of sync go */
+#define SNDRV_PCM_INFO_FIFO_IN_FRAMES	0x80000000	/* internal kernel flag - FIFO size is in frames */
 
 typedef int __bitwise snd_pcm_state_t;
 #define	SNDRV_PCM_STATE_OPEN		((__force snd_pcm_state_t) 0) /* stream is open */
diff --git a/include/sound/core.h b/include/sound/core.h
index 3dea798..309cb96 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -300,19 +300,10 @@
 		    struct module *module, int extra_size,
 		    struct snd_card **card_ret);
 
-static inline __deprecated
-struct snd_card *snd_card_new(int idx, const char *id,
-			      struct module *module, int extra_size)
-{
-	struct snd_card *card;
-	if (snd_card_create(idx, id, module, extra_size, &card) < 0)
-		return NULL;
-	return card;
-}
-
 int snd_card_disconnect(struct snd_card *card);
 int snd_card_free(struct snd_card *card);
 int snd_card_free_when_closed(struct snd_card *card);
+void snd_card_set_id(struct snd_card *card, const char *id);
 int snd_card_register(struct snd_card *card);
 int snd_card_info_init(void);
 int snd_card_info_done(void);
diff --git a/include/sound/driver.h b/include/sound/driver.h
deleted file mode 100644
index f035943..0000000
--- a/include/sound/driver.h
+++ /dev/null
@@ -1 +0,0 @@
-#warning "This file is deprecated"
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index c172968..2389352 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -98,6 +98,7 @@
 #define SNDRV_PCM_IOCTL1_INFO		1
 #define SNDRV_PCM_IOCTL1_CHANNEL_INFO	2
 #define SNDRV_PCM_IOCTL1_GSTATE		3
+#define SNDRV_PCM_IOCTL1_FIFO_SIZE	4
 
 #define SNDRV_PCM_TRIGGER_STOP		0
 #define SNDRV_PCM_TRIGGER_START		1
@@ -270,6 +271,7 @@
 	snd_pcm_uframes_t hw_ptr_base;	/* Position at buffer restart */
 	snd_pcm_uframes_t hw_ptr_interrupt; /* Position at interrupt time */
 	unsigned long hw_ptr_jiffies;	/* Time when hw_ptr is updated */
+	snd_pcm_sframes_t delay;	/* extra delay; typically FIFO size */
 
 	/* -- HW params -- */
 	snd_pcm_access_t access;	/* access mode */
@@ -486,80 +488,6 @@
 void snd_pcm_vma_notify_data(void *client, void *data);
 int snd_pcm_mmap_data(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area);
 
-#if BITS_PER_LONG >= 64
-
-static inline void div64_32(u_int64_t *n, u_int32_t div, u_int32_t *rem)
-{
-	*rem = *n % div;
-	*n /= div;
-}
-
-#elif defined(i386)
-
-static inline void div64_32(u_int64_t *n, u_int32_t div, u_int32_t *rem)
-{
-	u_int32_t low, high;
-	low = *n & 0xffffffff;
-	high = *n >> 32;
-	if (high) {
-		u_int32_t high1 = high % div;
-		high /= div;
-		asm("divl %2":"=a" (low), "=d" (*rem):"rm" (div), "a" (low), "d" (high1));
-		*n = (u_int64_t)high << 32 | low;
-	} else {
-		*n = low / div;
-		*rem = low % div;
-	}
-}
-#else
-
-static inline void divl(u_int32_t high, u_int32_t low,
-			u_int32_t div,
-			u_int32_t *q, u_int32_t *r)
-{
-	u_int64_t n = (u_int64_t)high << 32 | low;
-	u_int64_t d = (u_int64_t)div << 31;
-	u_int32_t q1 = 0;
-	int c = 32;
-	while (n > 0xffffffffU) {
-		q1 <<= 1;
-		if (n >= d) {
-			n -= d;
-			q1 |= 1;
-		}
-		d >>= 1;
-		c--;
-	}
-	q1 <<= c;
-	if (n) {
-		low = n;
-		*q = q1 | (low / div);
-		*r = low % div;
-	} else {
-		*r = 0;
-		*q = q1;
-	}
-	return;
-}
-
-static inline void div64_32(u_int64_t *n, u_int32_t div, u_int32_t *rem)
-{
-	u_int32_t low, high;
-	low = *n & 0xffffffff;
-	high = *n >> 32;
-	if (high) {
-		u_int32_t high1 = high % div;
-		u_int32_t low1 = low;
-		high /= div;
-		divl(high1, low1, div, &low, rem);
-		*n = (u_int64_t)high << 32 | low;
-	} else {
-		*n = low / div;
-		*rem = low % div;
-	}
-}
-#endif
-
 /*
  *  PCM library
  */
diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 1367647..352d7ee 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -45,24 +45,6 @@
 #define SND_SOC_DAIFMT_GATED		(1 << 4) /* clock is gated */
 
 /*
- * DAI Left/Right Clocks.
- *
- * Specifies whether the DAI can support different samples for similtanious
- * playback and capture. This usually requires a seperate physical frame
- * clock for playback and capture.
- */
-#define SND_SOC_DAIFMT_SYNC		(0 << 5) /* Tx FRM = Rx FRM */
-#define SND_SOC_DAIFMT_ASYNC		(1 << 5) /* Tx FRM ~ Rx FRM */
-
-/*
- * TDM
- *
- * Time Division Multiplexing. Allows PCM data to be multplexed with other
- * data on the DAI.
- */
-#define SND_SOC_DAIFMT_TDM		(1 << 6)
-
-/*
  * DAI hardware signal inversions.
  *
  * Specifies whether the DAI can also support inverted clocks for the specified
@@ -96,6 +78,10 @@
 #define SND_SOC_CLOCK_IN		0
 #define SND_SOC_CLOCK_OUT		1
 
+#define SND_SOC_STD_AC97_FMTS (SNDRV_PCM_FMTBIT_S16_LE |\
+                               SNDRV_PCM_FMTBIT_S32_LE |\
+                               SNDRV_PCM_FMTBIT_S32_BE)
+
 struct snd_soc_dai_ops;
 struct snd_soc_dai;
 struct snd_ac97_bus_ops;
@@ -208,6 +194,7 @@
 	/* DAI capabilities */
 	struct snd_soc_pcm_stream capture;
 	struct snd_soc_pcm_stream playback;
+	unsigned int symmetric_rates:1;
 
 	/* DAI runtime info */
 	struct snd_pcm_runtime *runtime;
@@ -219,11 +206,8 @@
 	/* DAI private data */
 	void *private_data;
 
-	/* parent codec/platform */
-	union {
-		struct snd_soc_codec *codec;
-		struct snd_soc_platform *platform;
-	};
+	/* parent platform */
+	struct snd_soc_platform *platform;
 
 	struct list_head list;
 };
diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index a7def6a..ec8a45f 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -140,16 +140,30 @@
 #define SND_SOC_DAPM_DAC(wname, stname, wreg, wshift, winvert) \
 {	.id = snd_soc_dapm_dac, .name = wname, .sname = stname, .reg = wreg, \
 	.shift = wshift, .invert = winvert}
+#define SND_SOC_DAPM_DAC_E(wname, stname, wreg, wshift, winvert, \
+			   wevent, wflags)				\
+{	.id = snd_soc_dapm_dac, .name = wname, .sname = stname, .reg = wreg, \
+	.shift = wshift, .invert = winvert, \
+	.event = wevent, .event_flags = wflags}
 #define SND_SOC_DAPM_ADC(wname, stname, wreg, wshift, winvert) \
 {	.id = snd_soc_dapm_adc, .name = wname, .sname = stname, .reg = wreg, \
 	.shift = wshift, .invert = winvert}
+#define SND_SOC_DAPM_ADC_E(wname, stname, wreg, wshift, winvert, \
+			   wevent, wflags)				\
+{	.id = snd_soc_dapm_adc, .name = wname, .sname = stname, .reg = wreg, \
+	.shift = wshift, .invert = winvert, \
+	.event = wevent, .event_flags = wflags}
 
-/* generic register modifier widget */
+/* generic widgets */
 #define SND_SOC_DAPM_REG(wid, wname, wreg, wshift, wmask, won_val, woff_val) \
 {	.id = wid, .name = wname, .kcontrols = NULL, .num_kcontrols = 0, \
 	.reg = -((wreg) + 1), .shift = wshift, .mask = wmask, \
 	.on_val = won_val, .off_val = woff_val, .event = dapm_reg_event, \
 	.event_flags = SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD}
+#define SND_SOC_DAPM_SUPPLY(wname, wreg, wshift, winvert, wevent, wflags) \
+{	.id = snd_soc_dapm_supply, .name = wname, .reg = wreg,	\
+	.shift = wshift, .invert = winvert, .event = wevent, \
+	.event_flags = wflags}
 
 /* dapm kcontrol types */
 #define SOC_DAPM_SINGLE(xname, reg, shift, max, invert) \
@@ -265,8 +279,6 @@
 /* dapm events */
 int snd_soc_dapm_stream_event(struct snd_soc_codec *codec, char *stream,
 	int event);
-int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev,
-	enum snd_soc_bias_level level);
 
 /* dapm sys fs - used by the core */
 int snd_soc_dapm_sys_add(struct device *dev);
@@ -298,6 +310,7 @@
 	snd_soc_dapm_vmid,			/* codec bias/vmid - to minimise pops */
 	snd_soc_dapm_pre,			/* machine specific pre widget - exec first */
 	snd_soc_dapm_post,			/* machine specific post widget - exec last */
+	snd_soc_dapm_supply,		/* power/clock supply */
 };
 
 /*
@@ -357,6 +370,8 @@
 	unsigned char suspend:1;		/* was active before suspend */
 	unsigned char pmdown:1;			/* waiting for timeout */
 
+	int (*power_check)(struct snd_soc_dapm_widget *w);
+
 	/* external events */
 	unsigned short event_flags;		/* flags to specify event types */
 	int (*event)(struct snd_soc_dapm_widget*, struct snd_kcontrol *, int);
@@ -368,6 +383,9 @@
 	/* widget input and outputs */
 	struct list_head sources;
 	struct list_head sinks;
+
+	/* used during DAPM updates */
+	struct list_head power_list;
 };
 
 #endif
diff --git a/include/sound/soc.h b/include/sound/soc.h
index a40bc6f..cf6111d 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -118,6 +118,14 @@
 	.info = snd_soc_info_volsw, \
 	.get = xhandler_get, .put = xhandler_put, \
 	.private_value = SOC_SINGLE_VALUE(xreg, xshift, xmax, xinvert) }
+#define SOC_DOUBLE_EXT(xname, xreg, shift_left, shift_right, xmax, xinvert,\
+	 xhandler_get, xhandler_put) \
+{	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname),\
+	.info = snd_soc_info_volsw, \
+	.get = xhandler_get, .put = xhandler_put, \
+	.private_value = (unsigned long)&(struct soc_mixer_control) \
+		{.reg = xreg, .shift = shift_left, .rshift = shift_right, \
+		 .max = xmax, .invert = xinvert} }
 #define SOC_SINGLE_EXT_TLV(xname, xreg, xshift, xmax, xinvert,\
 	 xhandler_get, xhandler_put, tlv_array) \
 {	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \
@@ -206,10 +214,6 @@
 			struct snd_soc_jack_gpio *gpios);
 #endif
 
-/* codec IO */
-#define snd_soc_read(codec, reg) codec->read(codec, reg)
-#define snd_soc_write(codec, reg, value) codec->write(codec, reg, value)
-
 /* codec register bit access */
 int snd_soc_update_bits(struct snd_soc_codec *codec, unsigned short reg,
 				unsigned short mask, unsigned short value);
@@ -331,6 +335,7 @@
 	struct module *owner;
 	struct mutex mutex;
 	struct device *dev;
+	struct snd_soc_device *socdev;
 
 	struct list_head list;
 
@@ -364,6 +369,8 @@
 	enum snd_soc_bias_level bias_level;
 	enum snd_soc_bias_level suspend_bias_level;
 	struct delayed_work delayed_work;
+	struct list_head up_list;
+	struct list_head down_list;
 
 	/* codec DAI's */
 	struct snd_soc_dai *dai;
@@ -417,6 +424,12 @@
 	/* codec/machine specific init - e.g. add machine controls */
 	int (*init)(struct snd_soc_codec *codec);
 
+	/* Symmetry requirements */
+	unsigned int symmetric_rates:1;
+
+	/* Symmetry data - only valid if symmetry is being enforced */
+	unsigned int rate;
+
 	/* DAI pcm */
 	struct snd_pcm *pcm;
 };
@@ -490,6 +503,19 @@
 	void *dapm;
 };
 
+/* codec IO */
+static inline unsigned int snd_soc_read(struct snd_soc_codec *codec,
+					unsigned int reg)
+{
+	return codec->read(codec, reg);
+}
+
+static inline unsigned int snd_soc_write(struct snd_soc_codec *codec,
+					 unsigned int reg, unsigned int val)
+{
+	return codec->write(codec, reg, val);
+}
+
 #include <sound/soc-dai.h>
 
 #endif
diff --git a/include/sound/wm9081.h b/include/sound/wm9081.h
new file mode 100644
index 0000000..e173ddb
--- /dev/null
+++ b/include/sound/wm9081.h
@@ -0,0 +1,25 @@
+/*
+ * linux/sound/wm9081.h -- Platform data for WM9081
+ *
+ * Copyright 2009 Wolfson Microelectronics. PLC.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_SND_WM_9081_H
+#define __LINUX_SND_WM_9081_H
+
+struct wm9081_retune_mobile_setting {
+	const char *name;
+	unsigned int rate;
+	u16 config[20];
+};
+
+struct wm9081_retune_mobile_config {
+	struct wm9081_retune_mobile_setting *configs;
+	int num_configs;
+};
+
+#endif
diff --git a/include/trace/block.h b/include/trace/block.h
deleted file mode 100644
index 25b7068..0000000
--- a/include/trace/block.h
+++ /dev/null
@@ -1,76 +0,0 @@
-#ifndef _TRACE_BLOCK_H
-#define _TRACE_BLOCK_H
-
-#include <linux/blkdev.h>
-#include <linux/tracepoint.h>
-
-DECLARE_TRACE(block_rq_abort,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_insert,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_issue,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_requeue,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_complete,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_bio_bounce,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_complete,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_backmerge,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_frontmerge,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_queue,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_getrq,
-	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
-	      TP_ARGS(q, bio, rw));
-
-DECLARE_TRACE(block_sleeprq,
-	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
-	      TP_ARGS(q, bio, rw));
-
-DECLARE_TRACE(block_plug,
-	TP_PROTO(struct request_queue *q),
-	      TP_ARGS(q));
-
-DECLARE_TRACE(block_unplug_timer,
-	TP_PROTO(struct request_queue *q),
-	      TP_ARGS(q));
-
-DECLARE_TRACE(block_unplug_io,
-	TP_PROTO(struct request_queue *q),
-	      TP_ARGS(q));
-
-DECLARE_TRACE(block_split,
-	TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu),
-	      TP_ARGS(q, bio, pdu));
-
-DECLARE_TRACE(block_remap,
-	TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
-		 sector_t from, sector_t to),
-	      TP_ARGS(q, bio, dev, from, to));
-
-#endif
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
new file mode 100644
index 0000000..f7a7ae1
--- /dev/null
+++ b/include/trace/define_trace.h
@@ -0,0 +1,75 @@
+/*
+ * Trace files that want to automate creationg of all tracepoints defined
+ * in their file should include this file. The following are macros that the
+ * trace file may define:
+ *
+ * TRACE_SYSTEM defines the system the tracepoint is for
+ *
+ * TRACE_INCLUDE_FILE if the file name is something other than TRACE_SYSTEM.h
+ *     This macro may be defined to tell define_trace.h what file to include.
+ *     Note, leave off the ".h".
+ *
+ * TRACE_INCLUDE_PATH if the path is something other than core kernel include/trace
+ *     then this macro can define the path to use. Note, the path is relative to
+ *     define_trace.h, not the file including it. Full path names for out of tree
+ *     modules must be used.
+ */
+
+#ifdef CREATE_TRACE_POINTS
+
+/* Prevent recursion */
+#undef CREATE_TRACE_POINTS
+
+#include <linux/stringify.h>
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
+	DEFINE_TRACE(name)
+
+#undef DECLARE_TRACE
+#define DECLARE_TRACE(name, proto, args)	\
+	DEFINE_TRACE(name)
+
+#undef TRACE_INCLUDE
+#undef __TRACE_INCLUDE
+
+#ifndef TRACE_INCLUDE_FILE
+# define TRACE_INCLUDE_FILE TRACE_SYSTEM
+# define UNDEF_TRACE_INCLUDE_FILE
+#endif
+
+#ifndef TRACE_INCLUDE_PATH
+# define __TRACE_INCLUDE(system) <trace/events/system.h>
+# define UNDEF_TRACE_INCLUDE_PATH
+#else
+# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h)
+#endif
+
+# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system)
+
+/* Let the trace headers be reread */
+#define TRACE_HEADER_MULTI_READ
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#ifdef CONFIG_EVENT_TRACING
+#include <trace/ftrace.h>
+#endif
+
+#undef TRACE_HEADER_MULTI_READ
+
+/* Only undef what we defined in this file */
+#ifdef UNDEF_TRACE_INCLUDE_FILE
+# undef TRACE_INCLUDE_FILE
+# undef UNDEF_TRACE_INCLUDE_FILE
+#endif
+
+#ifdef UNDEF_TRACE_INCLUDE_PATH
+# undef TRACE_INCLUDE_PATH
+# undef UNDEF_TRACE_INCLUDE_PATH
+#endif
+
+/* We may be processing more files */
+#define CREATE_TRACE_POINTS
+
+#endif /* CREATE_TRACE_POINTS */
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
new file mode 100644
index 0000000..d6b05f4
--- /dev/null
+++ b/include/trace/events/block.h
@@ -0,0 +1,493 @@
+#if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BLOCK_H
+
+#include <linux/blktrace_api.h>
+#include <linux/blkdev.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM block
+
+TRACE_EVENT(block_rq_abort,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  int,		errors			)
+		__array(  char,		rwbs,	6		)
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
+		__entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
+		__entry->errors    = rq->errors;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+	),
+
+	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __get_str(cmd),
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->errors)
+);
+
+TRACE_EVENT(block_rq_insert,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  unsigned int,	bytes			)
+		__array(  char,		rwbs,	6		)
+		__array(  char,         comm,   TASK_COMM_LEN   )
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
+		__entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
+		__entry->bytes     = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __entry->bytes, __get_str(cmd),
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_rq_issue,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  unsigned int,	bytes			)
+		__array(  char,		rwbs,	6		)
+		__array(  char,		comm,   TASK_COMM_LEN   )
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
+		__entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
+		__entry->bytes     = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __entry->bytes, __get_str(cmd),
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_rq_requeue,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  int,		errors			)
+		__array(  char,		rwbs,	6		)
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
+		__entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
+		__entry->errors	   = rq->errors;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+	),
+
+	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __get_str(cmd),
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->errors)
+);
+
+TRACE_EVENT(block_rq_complete,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  int,		errors			)
+		__array(  char,		rwbs,	6		)
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
+		__entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
+		__entry->errors    = rq->errors;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+	),
+
+	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __get_str(cmd),
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->errors)
+);
+TRACE_EVENT(block_bio_bounce,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_complete,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev		)
+		__field( sector_t,	sector		)
+		__field( unsigned,	nr_sector	)
+		__field( int,		error		)
+		__array( char,		rwbs,	6	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->error)
+);
+
+TRACE_EVENT(block_bio_backmerge,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_frontmerge,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_queue,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_getrq,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+	TP_ARGS(q, bio, rw),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+        ),
+
+	TP_fast_assign(
+		__entry->dev		= bio ? bio->bi_bdev->bd_dev : 0;
+		__entry->sector		= bio ? bio->bi_sector : 0;
+		__entry->nr_sector	= bio ? bio->bi_size >> 9 : 0;
+		blk_fill_rwbs(__entry->rwbs,
+			      bio ? bio->bi_rw : 0, __entry->nr_sector);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+        ),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_sleeprq,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+	TP_ARGS(q, bio, rw),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio ? bio->bi_bdev->bd_dev : 0;
+		__entry->sector		= bio ? bio->bi_sector : 0;
+		__entry->nr_sector	= bio ? bio->bi_size >> 9 : 0;
+		blk_fill_rwbs(__entry->rwbs,
+			    bio ? bio->bi_rw : 0, __entry->nr_sector);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_plug,
+
+	TP_PROTO(struct request_queue *q),
+
+	TP_ARGS(q),
+
+	TP_STRUCT__entry(
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("[%s]", __entry->comm)
+);
+
+TRACE_EVENT(block_unplug_timer,
+
+	TP_PROTO(struct request_queue *q),
+
+	TP_ARGS(q),
+
+	TP_STRUCT__entry(
+		__field( int,		nr_rq			)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->nr_rq	= q->rq.count[READ] + q->rq.count[WRITE];
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+);
+
+TRACE_EVENT(block_unplug_io,
+
+	TP_PROTO(struct request_queue *q),
+
+	TP_ARGS(q),
+
+	TP_STRUCT__entry(
+		__field( int,		nr_rq			)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->nr_rq	= q->rq.count[READ] + q->rq.count[WRITE];
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+);
+
+TRACE_EVENT(block_split,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio,
+		 unsigned int new_sector),
+
+	TP_ARGS(q, bio, new_sector),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev				)
+		__field( sector_t,	sector				)
+		__field( sector_t,	new_sector			)
+		__array( char,		rwbs,		6		)
+		__array( char,		comm,		TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->new_sector	= new_sector;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu / %llu [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  (unsigned long long)__entry->new_sector,
+		  __entry->comm)
+);
+
+TRACE_EVENT(block_remap,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
+		 sector_t from),
+
+	TP_ARGS(q, bio, dev, from),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev		)
+		__field( sector_t,	sector		)
+		__field( unsigned int,	nr_sector	)
+		__field( dev_t,		old_dev		)
+		__field( sector_t,	old_sector	)
+		__array( char,		rwbs,	6	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		__entry->old_dev	= dev;
+		__entry->old_sector	= from;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+	),
+
+	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector,
+		  MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+		  (unsigned long long)__entry->old_sector)
+);
+
+#endif /* _TRACE_BLOCK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
new file mode 100644
index 0000000..b0c7ede
--- /dev/null
+++ b/include/trace/events/irq.h
@@ -0,0 +1,145 @@
+#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IRQ_H
+
+#include <linux/tracepoint.h>
+#include <linux/interrupt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM irq
+
+#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
+#define show_softirq_name(val)			\
+	__print_symbolic(val,			\
+			 softirq_name(HI),	\
+			 softirq_name(TIMER),	\
+			 softirq_name(NET_TX),	\
+			 softirq_name(NET_RX),	\
+			 softirq_name(BLOCK),	\
+			 softirq_name(TASKLET),	\
+			 softirq_name(SCHED),	\
+			 softirq_name(HRTIMER),	\
+			 softirq_name(RCU))
+
+/**
+ * irq_handler_entry - called immediately before the irq action handler
+ * @irq: irq number
+ * @action: pointer to struct irqaction
+ *
+ * The struct irqaction pointed to by @action contains various
+ * information about the handler, including the device name,
+ * @action->name, and the device id, @action->dev_id. When used in
+ * conjunction with the irq_handler_exit tracepoint, we can figure
+ * out irq handler latencies.
+ */
+TRACE_EVENT(irq_handler_entry,
+
+	TP_PROTO(int irq, struct irqaction *action),
+
+	TP_ARGS(irq, action),
+
+	TP_STRUCT__entry(
+		__field(	int,	irq		)
+		__string(	name,	action->name	)
+	),
+
+	TP_fast_assign(
+		__entry->irq = irq;
+		__assign_str(name, action->name);
+	),
+
+	TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name))
+);
+
+/**
+ * irq_handler_exit - called immediately after the irq action handler returns
+ * @irq: irq number
+ * @action: pointer to struct irqaction
+ * @ret: return value
+ *
+ * If the @ret value is set to IRQ_HANDLED, then we know that the corresponding
+ * @action->handler scuccessully handled this irq. Otherwise, the irq might be
+ * a shared irq line, or the irq was not handled successfully. Can be used in
+ * conjunction with the irq_handler_entry to understand irq handler latencies.
+ */
+TRACE_EVENT(irq_handler_exit,
+
+	TP_PROTO(int irq, struct irqaction *action, int ret),
+
+	TP_ARGS(irq, action, ret),
+
+	TP_STRUCT__entry(
+		__field(	int,	irq	)
+		__field(	int,	ret	)
+	),
+
+	TP_fast_assign(
+		__entry->irq	= irq;
+		__entry->ret	= ret;
+	),
+
+	TP_printk("irq=%d return=%s",
+		  __entry->irq, __entry->ret ? "handled" : "unhandled")
+);
+
+/**
+ * softirq_entry - called immediately before the softirq handler
+ * @h: pointer to struct softirq_action
+ * @vec: pointer to first struct softirq_action in softirq_vec array
+ *
+ * The @h parameter, contains a pointer to the struct softirq_action
+ * which has a pointer to the action handler that is called. By subtracting
+ * the @vec pointer from the @h pointer, we can determine the softirq
+ * number. Also, when used in combination with the softirq_exit tracepoint
+ * we can determine the softirq latency.
+ */
+TRACE_EVENT(softirq_entry,
+
+	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+
+	TP_ARGS(h, vec),
+
+	TP_STRUCT__entry(
+		__field(	int,	vec			)
+	),
+
+	TP_fast_assign(
+		__entry->vec = (int)(h - vec);
+	),
+
+	TP_printk("softirq=%d action=%s", __entry->vec,
+		  show_softirq_name(__entry->vec))
+);
+
+/**
+ * softirq_exit - called immediately after the softirq handler returns
+ * @h: pointer to struct softirq_action
+ * @vec: pointer to first struct softirq_action in softirq_vec array
+ *
+ * The @h parameter contains a pointer to the struct softirq_action
+ * that has handled the softirq. By subtracting the @vec pointer from
+ * the @h pointer, we can determine the softirq number. Also, when used in
+ * combination with the softirq_entry tracepoint we can determine the softirq
+ * latency.
+ */
+TRACE_EVENT(softirq_exit,
+
+	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+
+	TP_ARGS(h, vec),
+
+	TP_STRUCT__entry(
+		__field(	int,	vec			)
+	),
+
+	TP_fast_assign(
+		__entry->vec = (int)(h - vec);
+	),
+
+	TP_printk("softirq=%d action=%s", __entry->vec,
+		  show_softirq_name(__entry->vec))
+);
+
+#endif /*  _TRACE_IRQ_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
new file mode 100644
index 0000000..9baba50
--- /dev/null
+++ b/include/trace/events/kmem.h
@@ -0,0 +1,231 @@
+#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KMEM_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kmem
+
+/*
+ * The order of these masks is important. Matching masks will be seen
+ * first and the left over flags will end up showing by themselves.
+ *
+ * For example, if we have GFP_KERNEL before GFP_USER we wil get:
+ *
+ *  GFP_KERNEL|GFP_HARDWALL
+ *
+ * Thus most bits set go first.
+ */
+#define show_gfp_flags(flags)						\
+	(flags) ? __print_flags(flags, "|",				\
+	{(unsigned long)GFP_HIGHUSER_MOVABLE,	"GFP_HIGHUSER_MOVABLE"}, \
+	{(unsigned long)GFP_HIGHUSER,		"GFP_HIGHUSER"},	\
+	{(unsigned long)GFP_USER,		"GFP_USER"},		\
+	{(unsigned long)GFP_TEMPORARY,		"GFP_TEMPORARY"},	\
+	{(unsigned long)GFP_KERNEL,		"GFP_KERNEL"},		\
+	{(unsigned long)GFP_NOFS,		"GFP_NOFS"},		\
+	{(unsigned long)GFP_ATOMIC,		"GFP_ATOMIC"},		\
+	{(unsigned long)GFP_NOIO,		"GFP_NOIO"},		\
+	{(unsigned long)__GFP_HIGH,		"GFP_HIGH"},		\
+	{(unsigned long)__GFP_WAIT,		"GFP_WAIT"},		\
+	{(unsigned long)__GFP_IO,		"GFP_IO"},		\
+	{(unsigned long)__GFP_COLD,		"GFP_COLD"},		\
+	{(unsigned long)__GFP_NOWARN,		"GFP_NOWARN"},		\
+	{(unsigned long)__GFP_REPEAT,		"GFP_REPEAT"},		\
+	{(unsigned long)__GFP_NOFAIL,		"GFP_NOFAIL"},		\
+	{(unsigned long)__GFP_NORETRY,		"GFP_NORETRY"},		\
+	{(unsigned long)__GFP_COMP,		"GFP_COMP"},		\
+	{(unsigned long)__GFP_ZERO,		"GFP_ZERO"},		\
+	{(unsigned long)__GFP_NOMEMALLOC,	"GFP_NOMEMALLOC"},	\
+	{(unsigned long)__GFP_HARDWALL,		"GFP_HARDWALL"},	\
+	{(unsigned long)__GFP_THISNODE,		"GFP_THISNODE"},	\
+	{(unsigned long)__GFP_RECLAIMABLE,	"GFP_RECLAIMABLE"},	\
+	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"}		\
+	) : "GFP_NOWAIT"
+
+TRACE_EVENT(kmalloc,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		show_gfp_flags(__entry->gfp_flags))
+);
+
+TRACE_EVENT(kmem_cache_alloc,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		show_gfp_flags(__entry->gfp_flags))
+);
+
+TRACE_EVENT(kmalloc_node,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags,
+		 int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		node		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->node		= node;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		show_gfp_flags(__entry->gfp_flags),
+		__entry->node)
+);
+
+TRACE_EVENT(kmem_cache_alloc_node,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags,
+		 int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		node		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->node		= node;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		show_gfp_flags(__entry->gfp_flags),
+		__entry->node)
+);
+
+TRACE_EVENT(kfree,
+
+	TP_PROTO(unsigned long call_site, const void *ptr),
+
+	TP_ARGS(call_site, ptr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+	),
+
+	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+
+TRACE_EVENT(kmem_cache_free,
+
+	TP_PROTO(unsigned long call_site, const void *ptr),
+
+	TP_ARGS(call_site, ptr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+	),
+
+	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+#endif /* _TRACE_KMEM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h
new file mode 100644
index 0000000..0e956c9
--- /dev/null
+++ b/include/trace/events/lockdep.h
@@ -0,0 +1,96 @@
+#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_LOCKDEP_H
+
+#include <linux/lockdep.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM lockdep
+
+#ifdef CONFIG_LOCKDEP
+
+TRACE_EVENT(lock_acquire,
+
+	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
+		int trylock, int read, int check,
+		struct lockdep_map *next_lock, unsigned long ip),
+
+	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, flags)
+		__string(name, lock->name)
+	),
+
+	TP_fast_assign(
+		__entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0);
+		__assign_str(name, lock->name);
+	),
+
+	TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "",
+		  (__entry->flags & 2) ? "read " : "",
+		  __get_str(name))
+);
+
+TRACE_EVENT(lock_release,
+
+	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
+
+	TP_ARGS(lock, nested, ip),
+
+	TP_STRUCT__entry(
+		__string(name, lock->name)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, lock->name);
+	),
+
+	TP_printk("%s", __get_str(name))
+);
+
+#ifdef CONFIG_LOCK_STAT
+
+TRACE_EVENT(lock_contended,
+
+	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
+
+	TP_ARGS(lock, ip),
+
+	TP_STRUCT__entry(
+		__string(name, lock->name)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, lock->name);
+	),
+
+	TP_printk("%s", __get_str(name))
+);
+
+TRACE_EVENT(lock_acquired,
+	TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
+
+	TP_ARGS(lock, ip, waittime),
+
+	TP_STRUCT__entry(
+		__string(name, lock->name)
+		__field(unsigned long, wait_usec)
+		__field(unsigned long, wait_nsec_rem)
+	),
+	TP_fast_assign(
+		__assign_str(name, lock->name);
+		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
+		__entry->wait_usec = (unsigned long) waittime;
+	),
+	TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec,
+				       __entry->wait_nsec_rem)
+);
+
+#endif
+#endif
+
+#endif /* _TRACE_LOCKDEP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/sched_event_types.h b/include/trace/events/sched.h
similarity index 90%
rename from include/trace/sched_event_types.h
rename to include/trace/events/sched.h
index 63547dc..24ab5bc 100644
--- a/include/trace/sched_event_types.h
+++ b/include/trace/events/sched.h
@@ -1,9 +1,8 @@
+#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SCHED_H
 
-/* use <trace/sched.h> instead */
-#ifndef TRACE_EVENT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM sched
@@ -157,6 +156,7 @@
 		__array(	char,	prev_comm,	TASK_COMM_LEN	)
 		__field(	pid_t,	prev_pid			)
 		__field(	int,	prev_prio			)
+		__field(	long,	prev_state			)
 		__array(	char,	next_comm,	TASK_COMM_LEN	)
 		__field(	pid_t,	next_pid			)
 		__field(	int,	next_prio			)
@@ -166,13 +166,19 @@
 		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
 		__entry->prev_pid	= prev->pid;
 		__entry->prev_prio	= prev->prio;
+		__entry->prev_state	= prev->state;
 		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
 		__entry->next_pid	= next->pid;
 		__entry->next_prio	= next->prio;
 	),
 
-	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
+	TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]",
 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
+		__entry->prev_state ?
+		  __print_flags(__entry->prev_state, "|",
+				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
+				{ 128, "W" }) : "R",
 		__entry->next_comm, __entry->next_pid, __entry->next_prio)
 );
 
@@ -181,9 +187,9 @@
  */
 TRACE_EVENT(sched_migrate_task,
 
-	TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
+	TP_PROTO(struct task_struct *p, int dest_cpu),
 
-	TP_ARGS(p, orig_cpu, dest_cpu),
+	TP_ARGS(p, dest_cpu),
 
 	TP_STRUCT__entry(
 		__array(	char,	comm,	TASK_COMM_LEN	)
@@ -197,7 +203,7 @@
 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
 		__entry->pid		= p->pid;
 		__entry->prio		= p->prio;
-		__entry->orig_cpu	= orig_cpu;
+		__entry->orig_cpu	= task_cpu(p);
 		__entry->dest_cpu	= dest_cpu;
 	),
 
@@ -334,4 +340,7 @@
 		  __entry->sig, __entry->comm, __entry->pid)
 );
 
-#undef TRACE_SYSTEM
+#endif /* _TRACE_SCHED_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
new file mode 100644
index 0000000..1e8fabb
--- /dev/null
+++ b/include/trace/events/skb.h
@@ -0,0 +1,40 @@
+#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SKB_H
+
+#include <linux/skbuff.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM skb
+
+/*
+ * Tracepoint for free an sk_buff:
+ */
+TRACE_EVENT(kfree_skb,
+
+	TP_PROTO(struct sk_buff *skb, void *location),
+
+	TP_ARGS(skb, location),
+
+	TP_STRUCT__entry(
+		__field(	void *,		skbaddr		)
+		__field(	unsigned short,	protocol	)
+		__field(	void *,		location	)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+		if (skb) {
+			__entry->protocol = ntohs(skb->protocol);
+		}
+		__entry->location = location;
+	),
+
+	TP_printk("skbaddr=%p protocol=%u location=%p",
+		__entry->skbaddr, __entry->protocol, __entry->location)
+);
+
+#endif /* _TRACE_SKB_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
new file mode 100644
index 0000000..035f1bf
--- /dev/null
+++ b/include/trace/events/workqueue.h
@@ -0,0 +1,100 @@
+#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WORKQUEUE_H
+
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM workqueue
+
+TRACE_EVENT(workqueue_insertion,
+
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+	TP_ARGS(wq_thread, work),
+
+	TP_STRUCT__entry(
+		__array(char,		thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,		thread_pid)
+		__field(work_func_t,	func)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->func		= work->func;
+	),
+
+	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+		__entry->thread_pid, __entry->func)
+);
+
+TRACE_EVENT(workqueue_execution,
+
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+	TP_ARGS(wq_thread, work),
+
+	TP_STRUCT__entry(
+		__array(char,		thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,		thread_pid)
+		__field(work_func_t,	func)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->func		= work->func;
+	),
+
+	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+		__entry->thread_pid, __entry->func)
+);
+
+/* Trace the creation of one workqueue thread on a cpu */
+TRACE_EVENT(workqueue_creation,
+
+	TP_PROTO(struct task_struct *wq_thread, int cpu),
+
+	TP_ARGS(wq_thread, cpu),
+
+	TP_STRUCT__entry(
+		__array(char,	thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,	thread_pid)
+		__field(int,	cpu)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->cpu		= cpu;
+	),
+
+	TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm,
+		__entry->thread_pid, __entry->cpu)
+);
+
+TRACE_EVENT(workqueue_destruction,
+
+	TP_PROTO(struct task_struct *wq_thread),
+
+	TP_ARGS(wq_thread),
+
+	TP_STRUCT__entry(
+		__array(char,	thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,	thread_pid)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+	),
+
+	TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid)
+);
+
+#endif /* _TRACE_WORKQUEUE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
new file mode 100644
index 0000000..1867553
--- /dev/null
+++ b/include/trace/ftrace.h
@@ -0,0 +1,591 @@
+/*
+ * Stage 1 of the trace events.
+ *
+ * Override the macros in <trace/trace_events.h> to include the following:
+ *
+ * struct ftrace_raw_<call> {
+ *	struct trace_entry		ent;
+ *	<type>				<item>;
+ *	<type2>				<item2>[<len>];
+ *	[...]
+ * };
+ *
+ * The <type> <item> is created by the __field(type, item) macro or
+ * the __array(type2, item2, len) macro.
+ * We simply do "type item;", and that will create the fields
+ * in the structure.
+ */
+
+#include <linux/ftrace_event.h>
+
+#undef __field
+#define __field(type, item)		type	item;
+
+#undef __array
+#define __array(type, item, len)	type	item[len];
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len) unsigned short __data_loc_##item;
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)
+
+#undef TP_STRUCT__entry
+#define TP_STRUCT__entry(args...) args
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
+	struct ftrace_raw_##name {				\
+		struct trace_entry	ent;			\
+		tstruct						\
+		char			__data[0];		\
+	};							\
+	static struct ftrace_event_call event_##name
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+
+/*
+ * Stage 2 of the trace events.
+ *
+ * Include the following:
+ *
+ * struct ftrace_data_offsets_<call> {
+ *	int				<item1>;
+ *	int				<item2>;
+ *	[...]
+ * };
+ *
+ * The __dynamic_array() macro will create each int <item>, this is
+ * to keep the offset of each array from the beginning of the event.
+ */
+
+#undef __field
+#define __field(type, item);
+
+#undef __array
+#define __array(type, item, len)
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)	int item;
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+	struct ftrace_data_offsets_##call {				\
+		tstruct;						\
+	};
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Setup the showing format of trace point.
+ *
+ * int
+ * ftrace_format_##call(struct trace_seq *s)
+ * {
+ *	struct ftrace_raw_##call field;
+ *	int ret;
+ *
+ *	ret = trace_seq_printf(s, #type " " #item ";"
+ *			       " offset:%u; size:%u;\n",
+ *			       offsetof(struct ftrace_raw_##call, item),
+ *			       sizeof(field.type));
+ *
+ * }
+ */
+
+#undef TP_STRUCT__entry
+#define TP_STRUCT__entry(args...) args
+
+#undef __field
+#define __field(type, item)					\
+	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
+			       "offset:%u;\tsize:%u;\n",		\
+			       (unsigned int)offsetof(typeof(field), item), \
+			       (unsigned int)sizeof(field.item));	\
+	if (!ret)							\
+		return 0;
+
+#undef __array
+#define __array(type, item, len)						\
+	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
+			       "offset:%u;\tsize:%u;\n",		\
+			       (unsigned int)offsetof(typeof(field), item), \
+			       (unsigned int)sizeof(field.item));	\
+	if (!ret)							\
+		return 0;
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				       \
+	ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t"	       \
+			       "offset:%u;\tsize:%u;\n",		       \
+			       (unsigned int)offsetof(typeof(field),	       \
+					__data_loc_##item),		       \
+			       (unsigned int)sizeof(field.__data_loc_##item)); \
+	if (!ret)							       \
+		return 0;
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)
+
+#undef __entry
+#define __entry REC
+
+#undef __print_symbolic
+#undef __get_dynamic_array
+#undef __get_str
+
+#undef TP_printk
+#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
+
+#undef TP_fast_assign
+#define TP_fast_assign(args...) args
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
+static int								\
+ftrace_format_##call(struct trace_seq *s)				\
+{									\
+	struct ftrace_raw_##call field __attribute__((unused));		\
+	int ret = 0;							\
+									\
+	tstruct;							\
+									\
+	trace_seq_printf(s, "\nprint fmt: " print);			\
+									\
+	return ret;							\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Stage 3 of the trace events.
+ *
+ * Override the macros in <trace/trace_events.h> to include the following:
+ *
+ * enum print_line_t
+ * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
+ * {
+ *	struct trace_seq *s = &iter->seq;
+ *	struct ftrace_raw_<call> *field; <-- defined in stage 1
+ *	struct trace_entry *entry;
+ *	struct trace_seq *p;
+ *	int ret;
+ *
+ *	entry = iter->ent;
+ *
+ *	if (entry->type != event_<call>.id) {
+ *		WARN_ON_ONCE(1);
+ *		return TRACE_TYPE_UNHANDLED;
+ *	}
+ *
+ *	field = (typeof(field))entry;
+ *
+ *	p = get_cpu_var(ftrace_event_seq);
+ *	trace_seq_init(p);
+ *	ret = trace_seq_printf(s, <TP_printk> "\n");
+ *	put_cpu();
+ *	if (!ret)
+ *		return TRACE_TYPE_PARTIAL_LINE;
+ *
+ *	return TRACE_TYPE_HANDLED;
+ * }
+ *
+ * This is the method used to print the raw event to the trace
+ * output format. Note, this is not needed if the data is read
+ * in binary.
+ */
+
+#undef __entry
+#define __entry field
+
+#undef TP_printk
+#define TP_printk(fmt, args...) fmt "\n", args
+
+#undef __get_dynamic_array
+#define __get_dynamic_array(field)	\
+		((void *)__entry + __entry->__data_loc_##field)
+
+#undef __get_str
+#define __get_str(field) (char *)__get_dynamic_array(field)
+
+#undef __print_flags
+#define __print_flags(flag, delim, flag_array...)			\
+	({								\
+		static const struct trace_print_flags flags[] =		\
+			{ flag_array, { -1, NULL }};			\
+		ftrace_print_flags_seq(p, delim, flag, flags);		\
+	})
+
+#undef __print_symbolic
+#define __print_symbolic(value, symbol_array...)			\
+	({								\
+		static const struct trace_print_flags symbols[] =	\
+			{ symbol_array, { -1, NULL }};			\
+		ftrace_print_symbols_seq(p, value, symbols);		\
+	})
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+enum print_line_t							\
+ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
+{									\
+	struct trace_seq *s = &iter->seq;				\
+	struct ftrace_raw_##call *field;				\
+	struct trace_entry *entry;					\
+	struct trace_seq *p;						\
+	int ret;							\
+									\
+	entry = iter->ent;						\
+									\
+	if (entry->type != event_##call.id) {				\
+		WARN_ON_ONCE(1);					\
+		return TRACE_TYPE_UNHANDLED;				\
+	}								\
+									\
+	field = (typeof(field))entry;					\
+									\
+	p = &get_cpu_var(ftrace_event_seq);				\
+	trace_seq_init(p);						\
+	ret = trace_seq_printf(s, #call ": " print);			\
+	put_cpu();							\
+	if (!ret)							\
+		return TRACE_TYPE_PARTIAL_LINE;				\
+									\
+	return TRACE_TYPE_HANDLED;					\
+}
+	
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef __field
+#define __field(type, item)						\
+	ret = trace_define_field(event_call, #type, #item,		\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item), is_signed_type(type));	\
+	if (ret)							\
+		return ret;
+
+#undef __array
+#define __array(type, item, len)					\
+	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
+	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item), 0);		\
+	if (ret)							\
+		return ret;
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				       \
+	ret = trace_define_field(event_call, "__data_loc" "[" #type "]", #item,\
+				offsetof(typeof(field), __data_loc_##item),    \
+				 sizeof(field.__data_loc_##item), 0);
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
+int									\
+ftrace_define_fields_##call(void)					\
+{									\
+	struct ftrace_raw_##call field;					\
+	struct ftrace_event_call *event_call = &event_##call;		\
+	int ret;							\
+									\
+	__common_field(int, type, 1);					\
+	__common_field(unsigned char, flags, 0);			\
+	__common_field(unsigned char, preempt_count, 0);		\
+	__common_field(int, pid, 1);					\
+	__common_field(int, tgid, 1);					\
+									\
+	tstruct;							\
+									\
+	return ret;							\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * remember the offset of each array from the beginning of the event.
+ */
+
+#undef __entry
+#define __entry entry
+
+#undef __field
+#define __field(type, item)
+
+#undef __array
+#define __array(type, item, len)
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				\
+	__data_offsets->item = __data_size +				\
+			       offsetof(typeof(*entry), __data);	\
+	__data_size += (len) * sizeof(type);
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1)       \
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+static inline int ftrace_get_offsets_##call(				\
+	struct ftrace_data_offsets_##call *__data_offsets, proto)       \
+{									\
+	int __data_size = 0;						\
+	struct ftrace_raw_##call __maybe_unused *entry;			\
+									\
+	tstruct;							\
+									\
+	return __data_size;						\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Stage 4 of the trace events.
+ *
+ * Override the macros in <trace/trace_events.h> to include the following:
+ *
+ * static void ftrace_event_<call>(proto)
+ * {
+ *	event_trace_printk(_RET_IP_, "<call>: " <fmt>);
+ * }
+ *
+ * static int ftrace_reg_event_<call>(void)
+ * {
+ *	int ret;
+ *
+ *	ret = register_trace_<call>(ftrace_event_<call>);
+ *	if (!ret)
+ *		pr_info("event trace: Could not activate trace point "
+ *			"probe to  <call>");
+ *	return ret;
+ * }
+ *
+ * static void ftrace_unreg_event_<call>(void)
+ * {
+ *	unregister_trace_<call>(ftrace_event_<call>);
+ * }
+ *
+ *
+ * For those macros defined with TRACE_EVENT:
+ *
+ * static struct ftrace_event_call event_<call>;
+ *
+ * static void ftrace_raw_event_<call>(proto)
+ * {
+ *	struct ring_buffer_event *event;
+ *	struct ftrace_raw_<call> *entry; <-- defined in stage 1
+ *	unsigned long irq_flags;
+ *	int pc;
+ *
+ *	local_save_flags(irq_flags);
+ *	pc = preempt_count();
+ *
+ *	event = trace_current_buffer_lock_reserve(event_<call>.id,
+ *				  sizeof(struct ftrace_raw_<call>),
+ *				  irq_flags, pc);
+ *	if (!event)
+ *		return;
+ *	entry	= ring_buffer_event_data(event);
+ *
+ *	<assign>;  <-- Here we assign the entries by the __field and
+ *			__array macros.
+ *
+ *	trace_current_buffer_unlock_commit(event, irq_flags, pc);
+ * }
+ *
+ * static int ftrace_raw_reg_event_<call>(void)
+ * {
+ *	int ret;
+ *
+ *	ret = register_trace_<call>(ftrace_raw_event_<call>);
+ *	if (!ret)
+ *		pr_info("event trace: Could not activate trace point "
+ *			"probe to <call>");
+ *	return ret;
+ * }
+ *
+ * static void ftrace_unreg_event_<call>(void)
+ * {
+ *	unregister_trace_<call>(ftrace_raw_event_<call>);
+ * }
+ *
+ * static struct trace_event ftrace_event_type_<call> = {
+ *	.trace			= ftrace_raw_output_<call>, <-- stage 2
+ * };
+ *
+ * static int ftrace_raw_init_event_<call>(void)
+ * {
+ *	int id;
+ *
+ *	id = register_ftrace_event(&ftrace_event_type_<call>);
+ *	if (!id)
+ *		return -ENODEV;
+ *	event_<call>.id = id;
+ *	return 0;
+ * }
+ *
+ * static struct ftrace_event_call __used
+ * __attribute__((__aligned__(4)))
+ * __attribute__((section("_ftrace_events"))) event_<call> = {
+ *	.name			= "<call>",
+ *	.system			= "<system>",
+ *	.raw_init		= ftrace_raw_init_event_<call>,
+ *	.regfunc		= ftrace_reg_event_<call>,
+ *	.unregfunc		= ftrace_unreg_event_<call>,
+ *	.show_format		= ftrace_format_<call>,
+ * }
+ *
+ */
+
+#undef TP_FMT
+#define TP_FMT(fmt, args...)	fmt "\n", ##args
+
+#ifdef CONFIG_EVENT_PROFILE
+#define _TRACE_PROFILE(call, proto, args)				\
+static void ftrace_profile_##call(proto)				\
+{									\
+	extern void perf_tpcounter_event(int);				\
+	perf_tpcounter_event(event_##call.id);				\
+}									\
+									\
+static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
+{									\
+	int ret = 0;							\
+									\
+	if (!atomic_inc_return(&event_call->profile_count))		\
+		ret = register_trace_##call(ftrace_profile_##call);	\
+									\
+	return ret;							\
+}									\
+									\
+static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
+{									\
+	if (atomic_add_negative(-1, &event_call->profile_count))	\
+		unregister_trace_##call(ftrace_profile_##call);		\
+}
+
+#define _TRACE_PROFILE_INIT(call)					\
+	.profile_count = ATOMIC_INIT(-1),				\
+	.profile_enable = ftrace_profile_enable_##call,			\
+	.profile_disable = ftrace_profile_disable_##call,
+
+#else
+#define _TRACE_PROFILE(call, proto, args)
+#define _TRACE_PROFILE_INIT(call)
+#endif
+
+#undef __entry
+#define __entry entry
+
+#undef __field
+#define __field(type, item)
+
+#undef __array
+#define __array(type, item, len)
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				\
+	__entry->__data_loc_##item = __data_offsets.item;
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)       	\
+
+#undef __assign_str
+#define __assign_str(dst, src)						\
+	strcpy(__get_str(dst), src);
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
+									\
+static struct ftrace_event_call event_##call;				\
+									\
+static void ftrace_raw_event_##call(proto)				\
+{									\
+	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
+	struct ftrace_event_call *event_call = &event_##call;		\
+	struct ring_buffer_event *event;				\
+	struct ftrace_raw_##call *entry;				\
+	unsigned long irq_flags;					\
+	int __data_size;						\
+	int pc;								\
+									\
+	local_save_flags(irq_flags);					\
+	pc = preempt_count();						\
+									\
+	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
+									\
+	event = trace_current_buffer_lock_reserve(event_##call.id,	\
+				 sizeof(*entry) + __data_size,		\
+				 irq_flags, pc);			\
+	if (!event)							\
+		return;							\
+	entry	= ring_buffer_event_data(event);			\
+									\
+									\
+	tstruct								\
+									\
+	{ assign; }							\
+									\
+	if (!filter_current_check_discard(event_call, entry, event))	\
+		trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
+}									\
+									\
+static int ftrace_raw_reg_event_##call(void)				\
+{									\
+	int ret;							\
+									\
+	ret = register_trace_##call(ftrace_raw_event_##call);		\
+	if (ret)							\
+		pr_info("event trace: Could not activate trace point "	\
+			"probe to " #call "\n");			\
+	return ret;							\
+}									\
+									\
+static void ftrace_raw_unreg_event_##call(void)				\
+{									\
+	unregister_trace_##call(ftrace_raw_event_##call);		\
+}									\
+									\
+static struct trace_event ftrace_event_type_##call = {			\
+	.trace			= ftrace_raw_output_##call,		\
+};									\
+									\
+static int ftrace_raw_init_event_##call(void)				\
+{									\
+	int id;								\
+									\
+	id = register_ftrace_event(&ftrace_event_type_##call);		\
+	if (!id)							\
+		return -ENODEV;						\
+	event_##call.id = id;						\
+	INIT_LIST_HEAD(&event_##call.fields);				\
+	init_preds(&event_##call);					\
+	return 0;							\
+}									\
+									\
+static struct ftrace_event_call __used					\
+__attribute__((__aligned__(4)))						\
+__attribute__((section("_ftrace_events"))) event_##call = {		\
+	.name			= #call,				\
+	.system			= __stringify(TRACE_SYSTEM),		\
+	.event			= &ftrace_event_type_##call,		\
+	.raw_init		= ftrace_raw_init_event_##call,		\
+	.regfunc		= ftrace_raw_reg_event_##call,		\
+	.unregfunc		= ftrace_raw_unreg_event_##call,	\
+	.show_format		= ftrace_format_##call,			\
+	.define_fields		= ftrace_define_fields_##call,		\
+	_TRACE_PROFILE_INIT(call)					\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef _TRACE_PROFILE
+#undef _TRACE_PROFILE_INIT
+
diff --git a/include/trace/irq.h b/include/trace/irq.h
deleted file mode 100644
index ff5d449..0000000
--- a/include/trace/irq.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _TRACE_IRQ_H
-#define _TRACE_IRQ_H
-
-#include <linux/interrupt.h>
-#include <linux/tracepoint.h>
-
-#include <trace/irq_event_types.h>
-
-#endif
diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h
deleted file mode 100644
index 85964eb..0000000
--- a/include/trace/irq_event_types.h
+++ /dev/null
@@ -1,55 +0,0 @@
-
-/* use <trace/irq.h> instead */
-#ifndef TRACE_FORMAT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM irq
-
-/*
- * Tracepoint for entry of interrupt handler:
- */
-TRACE_FORMAT(irq_handler_entry,
-	TP_PROTO(int irq, struct irqaction *action),
-	TP_ARGS(irq, action),
-	TP_FMT("irq=%d handler=%s", irq, action->name)
-	);
-
-/*
- * Tracepoint for return of an interrupt handler:
- */
-TRACE_EVENT(irq_handler_exit,
-
-	TP_PROTO(int irq, struct irqaction *action, int ret),
-
-	TP_ARGS(irq, action, ret),
-
-	TP_STRUCT__entry(
-		__field(	int,	irq	)
-		__field(	int,	ret	)
-	),
-
-	TP_fast_assign(
-		__entry->irq	= irq;
-		__entry->ret	= ret;
-	),
-
-	TP_printk("irq=%d return=%s",
-		  __entry->irq, __entry->ret ? "handled" : "unhandled")
-);
-
-TRACE_FORMAT(softirq_entry,
-	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
-	TP_ARGS(h, vec),
-	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-	);
-
-TRACE_FORMAT(softirq_exit,
-	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
-	TP_ARGS(h, vec),
-	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-	);
-
-#undef TRACE_SYSTEM
diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h
deleted file mode 100644
index 28ee69f..0000000
--- a/include/trace/kmemtrace.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (C) 2008 Eduard - Gabriel Munteanu
- *
- * This file is released under GPL version 2.
- */
-
-#ifndef _LINUX_KMEMTRACE_H
-#define _LINUX_KMEMTRACE_H
-
-#ifdef __KERNEL__
-
-#include <linux/tracepoint.h>
-#include <linux/types.h>
-
-#ifdef CONFIG_KMEMTRACE
-extern void kmemtrace_init(void);
-#else
-static inline void kmemtrace_init(void)
-{
-}
-#endif
-
-DECLARE_TRACE(kmalloc,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
-DECLARE_TRACE(kmem_cache_alloc,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
-DECLARE_TRACE(kmalloc_node,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags,
-		      int node),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
-DECLARE_TRACE(kmem_cache_alloc_node,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags,
-		      int node),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
-DECLARE_TRACE(kfree,
-	      TP_PROTO(unsigned long call_site, const void *ptr),
-	      TP_ARGS(call_site, ptr));
-DECLARE_TRACE(kmem_cache_free,
-	      TP_PROTO(unsigned long call_site, const void *ptr),
-	      TP_ARGS(call_site, ptr));
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_KMEMTRACE_H */
-
diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h
deleted file mode 100644
index 5ca67df..0000000
--- a/include/trace/lockdep.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _TRACE_LOCKDEP_H
-#define _TRACE_LOCKDEP_H
-
-#include <linux/lockdep.h>
-#include <linux/tracepoint.h>
-
-#include <trace/lockdep_event_types.h>
-
-#endif
diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h
deleted file mode 100644
index adccfcd..0000000
--- a/include/trace/lockdep_event_types.h
+++ /dev/null
@@ -1,44 +0,0 @@
-
-#ifndef TRACE_FORMAT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM lock
-
-#ifdef CONFIG_LOCKDEP
-
-TRACE_FORMAT(lock_acquire,
-	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
-		int trylock, int read, int check,
-		struct lockdep_map *next_lock, unsigned long ip),
-	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
-	TP_FMT("%s%s%s", trylock ? "try " : "",
-		read ? "read " : "", lock->name)
-	);
-
-TRACE_FORMAT(lock_release,
-	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
-	TP_ARGS(lock, nested, ip),
-	TP_FMT("%s", lock->name)
-	);
-
-#ifdef CONFIG_LOCK_STAT
-
-TRACE_FORMAT(lock_contended,
-	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
-	TP_ARGS(lock, ip),
-	TP_FMT("%s", lock->name)
-	);
-
-TRACE_FORMAT(lock_acquired,
-	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
-	TP_ARGS(lock, ip),
-	TP_FMT("%s", lock->name)
-	);
-
-#endif
-#endif
-
-#undef TRACE_SYSTEM
diff --git a/include/trace/sched.h b/include/trace/sched.h
deleted file mode 100644
index 4e372a1..0000000
--- a/include/trace/sched.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _TRACE_SCHED_H
-#define _TRACE_SCHED_H
-
-#include <linux/sched.h>
-#include <linux/tracepoint.h>
-
-#include <trace/sched_event_types.h>
-
-#endif
diff --git a/include/trace/skb.h b/include/trace/skb.h
deleted file mode 100644
index b66206d..0000000
--- a/include/trace/skb.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _TRACE_SKB_H_
-#define _TRACE_SKB_H_
-
-#include <linux/skbuff.h>
-#include <linux/tracepoint.h>
-
-DECLARE_TRACE(kfree_skb,
-	TP_PROTO(struct sk_buff *skb, void *location),
-	TP_ARGS(skb, location));
-
-#endif
diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h
deleted file mode 100644
index df56f56..0000000
--- a/include/trace/trace_event_types.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* trace/<type>_event_types.h here */
-
-#include <trace/sched_event_types.h>
-#include <trace/irq_event_types.h>
-#include <trace/lockdep_event_types.h>
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
deleted file mode 100644
index fd13750..0000000
--- a/include/trace/trace_events.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* trace/<type>.h here */
-
-#include <trace/sched.h>
-#include <trace/irq.h>
-#include <trace/lockdep.h>
diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h
deleted file mode 100644
index 7626523..0000000
--- a/include/trace/workqueue.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __TRACE_WORKQUEUE_H
-#define __TRACE_WORKQUEUE_H
-
-#include <linux/tracepoint.h>
-#include <linux/workqueue.h>
-#include <linux/sched.h>
-
-DECLARE_TRACE(workqueue_insertion,
-	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-	   TP_ARGS(wq_thread, work));
-
-DECLARE_TRACE(workqueue_execution,
-	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-	   TP_ARGS(wq_thread, work));
-
-/* Trace the creation of one workqueue thread on a cpu */
-DECLARE_TRACE(workqueue_creation,
-	   TP_PROTO(struct task_struct *wq_thread, int cpu),
-	   TP_ARGS(wq_thread, cpu));
-
-DECLARE_TRACE(workqueue_destruction,
-	   TP_PROTO(struct task_struct *wq_thread),
-	   TP_ARGS(wq_thread));
-
-#endif /* __TRACE_WORKQUEUE_H */
diff --git a/include/xen/Kbuild b/include/xen/Kbuild
new file mode 100644
index 0000000..4e65c16
--- /dev/null
+++ b/include/xen/Kbuild
@@ -0,0 +1 @@
+header-y += evtchn.h
diff --git a/include/xen/events.h b/include/xen/events.h
index 0d5f1ad..e68d59a 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -53,4 +53,7 @@
    irq will be disabled so it won't deliver an interrupt. */
 void xen_poll_irq(int irq);
 
+/* Determine the IRQ which is bound to an event channel */
+unsigned irq_from_evtchn(unsigned int evtchn);
+
 #endif	/* _XEN_EVENTS_H */
diff --git a/include/xen/evtchn.h b/include/xen/evtchn.h
new file mode 100644
index 0000000..14e833ee4
--- /dev/null
+++ b/include/xen/evtchn.h
@@ -0,0 +1,88 @@
+/******************************************************************************
+ * evtchn.h
+ *
+ * Interface to /dev/xen/evtchn.
+ *
+ * Copyright (c) 2003-2005, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __LINUX_PUBLIC_EVTCHN_H__
+#define __LINUX_PUBLIC_EVTCHN_H__
+
+/*
+ * Bind a fresh port to VIRQ @virq.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_VIRQ				\
+	_IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq))
+struct ioctl_evtchn_bind_virq {
+	unsigned int virq;
+};
+
+/*
+ * Bind a fresh port to remote <@remote_domain, @remote_port>.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_INTERDOMAIN			\
+	_IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain))
+struct ioctl_evtchn_bind_interdomain {
+	unsigned int remote_domain, remote_port;
+};
+
+/*
+ * Allocate a fresh port for binding to @remote_domain.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_UNBOUND_PORT			\
+	_IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port))
+struct ioctl_evtchn_bind_unbound_port {
+	unsigned int remote_domain;
+};
+
+/*
+ * Unbind previously allocated @port.
+ */
+#define IOCTL_EVTCHN_UNBIND				\
+	_IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind))
+struct ioctl_evtchn_unbind {
+	unsigned int port;
+};
+
+/*
+ * Unbind previously allocated @port.
+ */
+#define IOCTL_EVTCHN_NOTIFY				\
+	_IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify))
+struct ioctl_evtchn_notify {
+	unsigned int port;
+};
+
+/* Clear and reinitialise the event buffer. Clear error condition. */
+#define IOCTL_EVTCHN_RESET				\
+	_IOC(_IOC_NONE, 'E', 5, 0)
+
+#endif /* __LINUX_PUBLIC_EVTCHN_H__ */
diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h
index 453235e..e8b6519 100644
--- a/include/xen/interface/version.h
+++ b/include/xen/interface/version.h
@@ -57,4 +57,7 @@
 /* Declares the features reported by XENVER_get_features. */
 #include "features.h"
 
+/* arg == NULL; returns host memory page size. */
+#define XENVER_pagesize 7
+
 #endif /* __XEN_PUBLIC_VERSION_H__ */
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index f87f961..b9763ba 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -91,8 +91,7 @@
 	void (*otherend_changed)(struct xenbus_device *dev,
 				 enum xenbus_state backend_state);
 	int (*remove)(struct xenbus_device *dev);
-	int (*suspend)(struct xenbus_device *dev);
-	int (*suspend_cancel)(struct xenbus_device *dev);
+	int (*suspend)(struct xenbus_device *dev, pm_message_t state);
 	int (*resume)(struct xenbus_device *dev);
 	int (*uevent)(struct xenbus_device *, char **, int, char *, int);
 	struct device_driver driver;
diff --git a/init/Kconfig b/init/Kconfig
index 7be4d38..d3a5096 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -302,13 +302,14 @@
 
 config AUDIT_TREE
 	def_bool y
-	depends on AUDITSYSCALL && INOTIFY
+	depends on AUDITSYSCALL
+	select INOTIFY
 
 menu "RCU Subsystem"
 
 choice
 	prompt "RCU Implementation"
-	default CLASSIC_RCU
+	default TREE_RCU
 
 config CLASSIC_RCU
 	bool "Classic RCU"
@@ -933,6 +934,42 @@
           by some high performance threaded applications. Disabling
           this option saves about 7k.
 
+config HAVE_PERF_COUNTERS
+	bool
+	help
+	  See tools/perf/design.txt for details.
+
+menu "Performance Counters"
+
+config PERF_COUNTERS
+	bool "Kernel Performance Counters"
+	depends on HAVE_PERF_COUNTERS
+	select ANON_INODES
+	help
+	  Enable kernel support for performance counter hardware.
+
+	  Performance counters are special hardware registers available
+	  on most modern CPUs. These registers count the number of certain
+	  types of hw events: such as instructions executed, cachemisses
+	  suffered, or branches mis-predicted - without slowing down the
+	  kernel or applications. These registers can also trigger interrupts
+	  when a threshold number of events have passed - and can thus be
+	  used to profile the code that runs on that CPU.
+
+	  The Linux Performance Counter subsystem provides an abstraction of
+	  these hardware capabilities, available via a system call. It
+	  provides per task and per CPU counters, and it provides event
+	  capabilities on top of those.
+
+	  Say Y if unsure.
+
+config EVENT_PROFILE
+	bool "Tracepoint profile sources"
+	depends on PERF_COUNTERS && EVENT_TRACER
+	default y
+
+endmenu
+
 config VM_EVENT_COUNTERS
 	default y
 	bool "Enable VM event counters for /proc/vmstat" if EMBEDDED
diff --git a/init/main.c b/init/main.c
index d721dad..f6204f7 100644
--- a/init/main.c
+++ b/init/main.c
@@ -56,6 +56,7 @@
 #include <linux/debug_locks.h>
 #include <linux/debugobjects.h>
 #include <linux/lockdep.h>
+#include <linux/kmemleak.h>
 #include <linux/pid_namespace.h>
 #include <linux/device.h>
 #include <linux/kthread.h>
@@ -64,6 +65,7 @@
 #include <linux/idr.h>
 #include <linux/ftrace.h>
 #include <linux/async.h>
+#include <linux/kmemtrace.h>
 #include <trace/boot.h>
 
 #include <asm/io.h>
@@ -71,7 +73,6 @@
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
-#include <trace/kmemtrace.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/smp.h>
@@ -533,6 +534,21 @@
 {
 }
 
+/*
+ * Set up kernel memory allocators
+ */
+static void __init mm_init(void)
+{
+	/*
+	 * page_cgroup requires countinous pages as memmap
+	 * and it's bigger than MAX_ORDER unless SPARSEMEM.
+	 */
+	page_cgroup_init_flatmem();
+	mem_init();
+	kmem_cache_init();
+	vmalloc_init();
+}
+
 asmlinkage void __init start_kernel(void)
 {
 	char * command_line;
@@ -574,6 +590,23 @@
 	setup_nr_cpu_ids();
 	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */
 
+	build_all_zonelists();
+	page_alloc_init();
+
+	printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
+	parse_early_param();
+	parse_args("Booting kernel", static_command_line, __start___param,
+		   __stop___param - __start___param,
+		   &unknown_bootoption);
+	/*
+	 * These use large bootmem allocations and must precede
+	 * kmem_cache_init()
+	 */
+	pidhash_init();
+	vfs_caches_init_early();
+	sort_main_extable();
+	trap_init();
+	mm_init();
 	/*
 	 * Set up the scheduler prior starting any interrupts (such as the
 	 * timer interrupt). Full topology setup happens at smp_init()
@@ -585,25 +618,16 @@
 	 * fragile until we cpu_idle() for the first time.
 	 */
 	preempt_disable();
-	build_all_zonelists();
-	page_alloc_init();
-	printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
-	parse_early_param();
-	parse_args("Booting kernel", static_command_line, __start___param,
-		   __stop___param - __start___param,
-		   &unknown_bootoption);
 	if (!irqs_disabled()) {
 		printk(KERN_WARNING "start_kernel(): bug: interrupts were "
 				"enabled *very* early, fixing it\n");
 		local_irq_disable();
 	}
-	sort_main_extable();
-	trap_init();
 	rcu_init();
 	/* init some links before init_ISA_irqs() */
 	early_irq_init();
 	init_IRQ();
-	pidhash_init();
+	prio_tree_init();
 	init_timers();
 	hrtimers_init();
 	softirq_init();
@@ -616,6 +640,7 @@
 				 "enabled early\n");
 	early_boot_irqs_on();
 	local_irq_enable();
+	kmem_cache_init_late();
 
 	/*
 	 * HACK ALERT! This is early. We're enabling the console before
@@ -645,15 +670,12 @@
 		initrd_start = 0;
 	}
 #endif
-	vmalloc_init();
-	vfs_caches_init_early();
 	cpuset_init_early();
 	page_cgroup_init();
-	mem_init();
 	enable_debug_pagealloc();
 	cpu_hotplug_init();
-	kmem_cache_init();
 	kmemtrace_init();
+	kmemleak_init();
 	debug_objects_mem_init();
 	idr_init_cache();
 	setup_per_cpu_pageset();
@@ -663,7 +685,6 @@
 	calibrate_delay();
 	pidmap_init();
 	pgtable_cache_init();
-	prio_tree_init();
 	anon_vma_init();
 #ifdef CONFIG_X86
 	if (efi_enabled)
diff --git a/ipc/sem.c b/ipc/sem.c
index 16a2189..87c2b64 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1290,8 +1290,8 @@
 		int i;
 
 		rcu_read_lock();
-		un = list_entry(rcu_dereference(ulp->list_proc.next),
-					struct sem_undo, list_proc);
+		un = list_entry_rcu(ulp->list_proc.next,
+				    struct sem_undo, list_proc);
 		if (&un->list_proc == &ulp->list_proc)
 			semid = -1;
 		 else
diff --git a/ipc/shm.c b/ipc/shm.c
index faa46da..15dd238 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -384,7 +384,6 @@
 	error = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto no_file;
-	ima_shm_check(file);
 
 	id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
 	if (id < 0) {
@@ -891,7 +890,7 @@
 	file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations);
 	if (!file)
 		goto out_free;
-	ima_shm_check(file);
+	ima_counts_get(file);
 
 	file->private_data = sfd;
 	file->f_mapping = shp->shm_file->f_mapping;
@@ -969,10 +968,13 @@
 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma, *next;
+	struct vm_area_struct *vma;
 	unsigned long addr = (unsigned long)shmaddr;
-	loff_t size = 0;
 	int retval = -EINVAL;
+#ifdef CONFIG_MMU
+	loff_t size = 0;
+	struct vm_area_struct *next;
+#endif
 
 	if (addr & ~PAGE_MASK)
 		return retval;
diff --git a/kernel/Makefile b/kernel/Makefile
index 4242366..90b53f6 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -93,8 +93,10 @@
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace/
 obj-$(CONFIG_TRACING) += trace/
+obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_SLOW_WORK) += slow-work.o
+obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
 
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/async.c b/kernel/async.c
index 5054030..27235f5 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -92,23 +92,18 @@
 static async_cookie_t  __lowest_in_progress(struct list_head *running)
 {
 	struct async_entry *entry;
-	async_cookie_t ret = next_cookie; /* begin with "infinity" value */
 
 	if (!list_empty(running)) {
 		entry = list_first_entry(running,
 			struct async_entry, list);
-		ret = entry->cookie;
+		return entry->cookie;
 	}
 
-	if (!list_empty(&async_pending)) {
-		list_for_each_entry(entry, &async_pending, list)
-			if (entry->running == running) {
-				ret = entry->cookie;
-				break;
-			}
-	}
+	list_for_each_entry(entry, &async_pending, list)
+		if (entry->running == running)
+			return entry->cookie;
 
-	return ret;
+	return next_cookie;	/* "infinity" value */
 }
 
 static async_cookie_t  lowest_in_progress(struct list_head *running)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 6e73517..1f6396d7 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -568,7 +568,7 @@
 		if (err)
 			goto skip_it;
 
-		root_mnt = collect_mounts(path.mnt, path.dentry);
+		root_mnt = collect_mounts(&path);
 		path_put(&path);
 		if (!root_mnt)
 			goto skip_it;
@@ -660,7 +660,7 @@
 	err = kern_path(tree->pathname, 0, &path);
 	if (err)
 		goto Err;
-	mnt = collect_mounts(path.mnt, path.dentry);
+	mnt = collect_mounts(&path);
 	path_put(&path);
 	if (!mnt) {
 		err = -ENOMEM;
@@ -720,7 +720,7 @@
 	err = kern_path(new, 0, &path);
 	if (err)
 		return err;
-	tagged = collect_mounts(path.mnt, path.dentry);
+	tagged = collect_mounts(&path);
 	path_put(&path);
 	if (!tagged)
 		return -ENOMEM;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a7267bf..3fb789f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -46,6 +46,7 @@
 #include <linux/cgroupstats.h>
 #include <linux/hash.h>
 #include <linux/namei.h>
+#include <linux/smp_lock.h>
 
 #include <asm/atomic.h>
 
@@ -900,6 +901,7 @@
 	struct cgroup *cgrp = &root->top_cgroup;
 	struct cgroup_sb_opts opts;
 
+	lock_kernel();
 	mutex_lock(&cgrp->dentry->d_inode->i_mutex);
 	mutex_lock(&cgroup_mutex);
 
@@ -927,6 +929,7 @@
 	kfree(opts.release_agent);
 	mutex_unlock(&cgroup_mutex);
 	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
+	unlock_kernel();
 	return ret;
 }
 
diff --git a/kernel/compat.c b/kernel/compat.c
index 42d5654..f6c204f0 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -882,6 +882,17 @@
 
 }
 
+asmlinkage long
+compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
+			     struct compat_siginfo __user *uinfo)
+{
+	siginfo_t info;
+
+	if (copy_siginfo_from_user32(&info, uinfo))
+		return -EFAULT;
+	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
+}
+
 #ifdef __ARCH_WANT_COMPAT_SYS_TIME
 
 /* compat_time_t is a 32 bit "long" and needs to get converted. */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 026facc..d5a7e17 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1857,7 +1857,7 @@
 
 int __init cpuset_init_early(void)
 {
-	alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed);
+	alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_NOWAIT);
 
 	top_cpuset.mems_generation = cpuset_mems_generation++;
 	return 0;
diff --git a/kernel/cred.c b/kernel/cred.c
index 3a03918..1bb4d7e 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -167,7 +167,7 @@
 
 /*
  * Prepare credentials for current to perform an execve()
- * - The caller must hold current->cred_exec_mutex
+ * - The caller must hold current->cred_guard_mutex
  */
 struct cred *prepare_exec_creds(void)
 {
@@ -276,7 +276,7 @@
 	struct cred *new;
 	int ret;
 
-	mutex_init(&p->cred_exec_mutex);
+	mutex_init(&p->cred_guard_mutex);
 
 	if (
 #ifdef CONFIG_KEYS
diff --git a/kernel/exit.c b/kernel/exit.c
index abf9cf3..b6c90b5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -48,7 +48,8 @@
 #include <linux/tracehook.h>
 #include <linux/fs_struct.h>
 #include <linux/init_task.h>
-#include <trace/sched.h>
+#include <linux/perf_counter.h>
+#include <trace/events/sched.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -56,10 +57,6 @@
 #include <asm/mmu_context.h>
 #include "cred-internals.h"
 
-DEFINE_TRACE(sched_process_free);
-DEFINE_TRACE(sched_process_exit);
-DEFINE_TRACE(sched_process_wait);
-
 static void exit_mm(struct task_struct * tsk);
 
 static void __unhash_process(struct task_struct *p)
@@ -158,6 +155,9 @@
 {
 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 
+#ifdef CONFIG_PERF_COUNTERS
+	WARN_ON_ONCE(tsk->perf_counter_ctxp);
+#endif
 	trace_sched_process_free(tsk);
 	put_task_struct(tsk);
 }
@@ -174,6 +174,7 @@
 	atomic_dec(&__task_cred(p)->user->processes);
 
 	proc_flush_task(p);
+
 	write_lock_irq(&tasklist_lock);
 	tracehook_finish_release_task(p);
 	__exit_signal(p);
@@ -975,16 +976,19 @@
 		module_put(tsk->binfmt->module);
 
 	proc_exit_connector(tsk);
+
+	/*
+	 * Flush inherited counters to the parent - before the parent
+	 * gets woken up by child-exit notifications.
+	 */
+	perf_counter_exit_task(tsk);
+
 	exit_notify(tsk, group_dead);
 #ifdef CONFIG_NUMA
 	mpol_put(tsk->mempolicy);
 	tsk->mempolicy = NULL;
 #endif
 #ifdef CONFIG_FUTEX
-	/*
-	 * This must happen late, after the PID is not
-	 * hashed anymore:
-	 */
 	if (unlikely(!list_empty(&tsk->pi_state_list)))
 		exit_pi_state_list(tsk);
 	if (unlikely(current->pi_state_cache))
@@ -1476,6 +1480,7 @@
 		 */
 		if (*notask_error)
 			*notask_error = ret;
+		return 0;
 	}
 
 	if (likely(!ptrace) && unlikely(p->ptrace)) {
diff --git a/kernel/fork.c b/kernel/fork.c
index b9e2edd..4430eb1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -61,8 +61,8 @@
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
 #include <linux/fs_struct.h>
-#include <trace/sched.h>
 #include <linux/magic.h>
+#include <linux/perf_counter.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -71,6 +71,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <trace/events/sched.h>
+
 /*
  * Protected counters by write_lock_irq(&tasklist_lock)
  */
@@ -83,8 +85,6 @@
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
 
-DEFINE_TRACE(sched_process_fork);
-
 int nr_processes(void)
 {
 	int cpu;
@@ -982,6 +982,8 @@
 	if (!p)
 		goto fork_out;
 
+	ftrace_graph_init_task(p);
+
 	rt_mutex_init_task(p);
 
 #ifdef CONFIG_PROVE_LOCKING
@@ -1089,12 +1091,16 @@
 #ifdef CONFIG_DEBUG_MUTEXES
 	p->blocked_on = NULL; /* not blocked yet */
 #endif
-	if (unlikely(current->ptrace))
-		ptrace_fork(p, clone_flags);
+
+	p->bts = NULL;
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
 
+	retval = perf_counter_init_task(p);
+	if (retval)
+		goto bad_fork_cleanup_policy;
+
 	if ((retval = audit_alloc(p)))
 		goto bad_fork_cleanup_policy;
 	/* copy all the process information */
@@ -1131,8 +1137,6 @@
 		}
 	}
 
-	ftrace_graph_init_task(p);
-
 	p->pid = pid_nr(pid);
 	p->tgid = p->pid;
 	if (clone_flags & CLONE_THREAD)
@@ -1141,7 +1145,7 @@
 	if (current->nsproxy != p->nsproxy) {
 		retval = ns_cgroup_clone(p, pid);
 		if (retval)
-			goto bad_fork_free_graph;
+			goto bad_fork_free_pid;
 	}
 
 	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
@@ -1233,7 +1237,7 @@
 		spin_unlock(&current->sighand->siglock);
 		write_unlock_irq(&tasklist_lock);
 		retval = -ERESTARTNOINTR;
-		goto bad_fork_free_graph;
+		goto bad_fork_free_pid;
 	}
 
 	if (clone_flags & CLONE_THREAD) {
@@ -1268,8 +1272,6 @@
 	cgroup_post_fork(p);
 	return p;
 
-bad_fork_free_graph:
-	ftrace_graph_exit_task(p);
 bad_fork_free_pid:
 	if (pid != &init_struct_pid)
 		free_pid(pid);
@@ -1293,6 +1295,7 @@
 bad_fork_cleanup_audit:
 	audit_free(p);
 bad_fork_cleanup_policy:
+	perf_counter_free_task(p);
 #ifdef CONFIG_NUMA
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
@@ -1406,10 +1409,16 @@
 		if (clone_flags & CLONE_VFORK) {
 			p->vfork_done = &vfork;
 			init_completion(&vfork);
+		} else if (!(clone_flags & CLONE_VM)) {
+			/*
+			 * vfork will do an exec which will call
+			 * set_task_comm()
+			 */
+			perf_counter_fork(p);
 		}
 
 		audit_finish_fork(p);
-		tracehook_report_clone(trace, regs, clone_flags, nr, p);
+		tracehook_report_clone(regs, clone_flags, nr, p);
 
 		/*
 		 * We set PF_STARTING at creation in case tracing wants to
diff --git a/kernel/futex.c b/kernel/futex.c
index d546b2d..80b5ce7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -19,6 +19,10 @@
  *  PRIVATE futexes by Eric Dumazet
  *  Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
  *
+ *  Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
+ *  Copyright (C) IBM Corporation, 2009
+ *  Thanks to Thomas Gleixner for conceptual design and careful reviews.
+ *
  *  Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
  *  enough at me, Linus for the original (flawed) idea, Matthew
  *  Kirkwood for proof-of-concept implementation.
@@ -96,8 +100,8 @@
  */
 struct futex_q {
 	struct plist_node list;
-	/* There can only be a single waiter */
-	wait_queue_head_t waiter;
+	/* Waiter reference */
+	struct task_struct *task;
 
 	/* Which hash list lock to use: */
 	spinlock_t *lock_ptr;
@@ -107,7 +111,9 @@
 
 	/* Optional priority inheritance state: */
 	struct futex_pi_state *pi_state;
-	struct task_struct *task;
+
+	/* rt_waiter storage for requeue_pi: */
+	struct rt_mutex_waiter *rt_waiter;
 
 	/* Bitset for the optional bitmasked wakeup */
 	u32 bitset;
@@ -278,6 +284,25 @@
 	drop_futex_key_refs(key);
 }
 
+/**
+ * futex_top_waiter() - Return the highest priority waiter on a futex
+ * @hb:     the hash bucket the futex_q's reside in
+ * @key:    the futex key (to distinguish it from other futex futex_q's)
+ *
+ * Must be called with the hb lock held.
+ */
+static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
+					union futex_key *key)
+{
+	struct futex_q *this;
+
+	plist_for_each_entry(this, &hb->chain, list) {
+		if (match_futex(&this->key, key))
+			return this;
+	}
+	return NULL;
+}
+
 static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
 {
 	u32 curval;
@@ -539,28 +564,160 @@
 	return 0;
 }
 
+/**
+ * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex
+ * @uaddr:		the pi futex user address
+ * @hb:			the pi futex hash bucket
+ * @key:		the futex key associated with uaddr and hb
+ * @ps:			the pi_state pointer where we store the result of the
+ *			lookup
+ * @task:		the task to perform the atomic lock work for.  This will
+ *			be "current" except in the case of requeue pi.
+ * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)
+ *
+ * Returns:
+ *  0 - ready to wait
+ *  1 - acquired the lock
+ * <0 - error
+ *
+ * The hb->lock and futex_key refs shall be held by the caller.
+ */
+static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
+				union futex_key *key,
+				struct futex_pi_state **ps,
+				struct task_struct *task, int set_waiters)
+{
+	int lock_taken, ret, ownerdied = 0;
+	u32 uval, newval, curval;
+
+retry:
+	ret = lock_taken = 0;
+
+	/*
+	 * To avoid races, we attempt to take the lock here again
+	 * (by doing a 0 -> TID atomic cmpxchg), while holding all
+	 * the locks. It will most likely not succeed.
+	 */
+	newval = task_pid_vnr(task);
+	if (set_waiters)
+		newval |= FUTEX_WAITERS;
+
+	curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
+
+	if (unlikely(curval == -EFAULT))
+		return -EFAULT;
+
+	/*
+	 * Detect deadlocks.
+	 */
+	if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task))))
+		return -EDEADLK;
+
+	/*
+	 * Surprise - we got the lock. Just return to userspace:
+	 */
+	if (unlikely(!curval))
+		return 1;
+
+	uval = curval;
+
+	/*
+	 * Set the FUTEX_WAITERS flag, so the owner will know it has someone
+	 * to wake at the next unlock.
+	 */
+	newval = curval | FUTEX_WAITERS;
+
+	/*
+	 * There are two cases, where a futex might have no owner (the
+	 * owner TID is 0): OWNER_DIED. We take over the futex in this
+	 * case. We also do an unconditional take over, when the owner
+	 * of the futex died.
+	 *
+	 * This is safe as we are protected by the hash bucket lock !
+	 */
+	if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
+		/* Keep the OWNER_DIED bit */
+		newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task);
+		ownerdied = 0;
+		lock_taken = 1;
+	}
+
+	curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
+
+	if (unlikely(curval == -EFAULT))
+		return -EFAULT;
+	if (unlikely(curval != uval))
+		goto retry;
+
+	/*
+	 * We took the lock due to owner died take over.
+	 */
+	if (unlikely(lock_taken))
+		return 1;
+
+	/*
+	 * We dont have the lock. Look up the PI state (or create it if
+	 * we are the first waiter):
+	 */
+	ret = lookup_pi_state(uval, hb, key, ps);
+
+	if (unlikely(ret)) {
+		switch (ret) {
+		case -ESRCH:
+			/*
+			 * No owner found for this futex. Check if the
+			 * OWNER_DIED bit is set to figure out whether
+			 * this is a robust futex or not.
+			 */
+			if (get_futex_value_locked(&curval, uaddr))
+				return -EFAULT;
+
+			/*
+			 * We simply start over in case of a robust
+			 * futex. The code above will take the futex
+			 * and return happy.
+			 */
+			if (curval & FUTEX_OWNER_DIED) {
+				ownerdied = 1;
+				goto retry;
+			}
+		default:
+			break;
+		}
+	}
+
+	return ret;
+}
+
 /*
  * The hash bucket lock must be held when this is called.
  * Afterwards, the futex_q must not be accessed.
  */
 static void wake_futex(struct futex_q *q)
 {
+	struct task_struct *p = q->task;
+
+	/*
+	 * We set q->lock_ptr = NULL _before_ we wake up the task. If
+	 * a non futex wake up happens on another CPU then the task
+	 * might exit and p would dereference a non existing task
+	 * struct. Prevent this by holding a reference on p across the
+	 * wake up.
+	 */
+	get_task_struct(p);
+
 	plist_del(&q->list, &q->list.plist);
 	/*
-	 * The lock in wake_up_all() is a crucial memory barrier after the
-	 * plist_del() and also before assigning to q->lock_ptr.
-	 */
-	wake_up(&q->waiter);
-	/*
-	 * The waiting task can free the futex_q as soon as this is written,
-	 * without taking any locks.  This must come last.
-	 *
-	 * A memory barrier is required here to prevent the following store to
-	 * lock_ptr from getting ahead of the wakeup. Clearing the lock at the
-	 * end of wake_up() does not prevent this store from moving.
+	 * The waiting task can free the futex_q as soon as
+	 * q->lock_ptr = NULL is written, without taking any locks. A
+	 * memory barrier is required here to prevent the following
+	 * store to lock_ptr from getting ahead of the plist_del.
 	 */
 	smp_wmb();
 	q->lock_ptr = NULL;
+
+	wake_up_state(p, TASK_NORMAL);
+	put_task_struct(p);
 }
 
 static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
@@ -689,7 +846,7 @@
 
 	plist_for_each_entry_safe(this, next, head, list) {
 		if (match_futex (&this->key, &key)) {
-			if (this->pi_state) {
+			if (this->pi_state || this->rt_waiter) {
 				ret = -EINVAL;
 				break;
 			}
@@ -802,24 +959,185 @@
 	return ret;
 }
 
-/*
- * Requeue all waiters hashed on one physical page to another
- * physical page.
+/**
+ * requeue_futex() - Requeue a futex_q from one hb to another
+ * @q:		the futex_q to requeue
+ * @hb1:	the source hash_bucket
+ * @hb2:	the target hash_bucket
+ * @key2:	the new key for the requeued futex_q
+ */
+static inline
+void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
+		   struct futex_hash_bucket *hb2, union futex_key *key2)
+{
+
+	/*
+	 * If key1 and key2 hash to the same bucket, no need to
+	 * requeue.
+	 */
+	if (likely(&hb1->chain != &hb2->chain)) {
+		plist_del(&q->list, &hb1->chain);
+		plist_add(&q->list, &hb2->chain);
+		q->lock_ptr = &hb2->lock;
+#ifdef CONFIG_DEBUG_PI_LIST
+		q->list.plist.lock = &hb2->lock;
+#endif
+	}
+	get_futex_key_refs(key2);
+	q->key = *key2;
+}
+
+/**
+ * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
+ * q:	the futex_q
+ * key:	the key of the requeue target futex
+ *
+ * During futex_requeue, with requeue_pi=1, it is possible to acquire the
+ * target futex if it is uncontended or via a lock steal.  Set the futex_q key
+ * to the requeue target futex so the waiter can detect the wakeup on the right
+ * futex, but remove it from the hb and NULL the rt_waiter so it can detect
+ * atomic lock acquisition.  Must be called with the q->lock_ptr held.
+ */
+static inline
+void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
+{
+	drop_futex_key_refs(&q->key);
+	get_futex_key_refs(key);
+	q->key = *key;
+
+	WARN_ON(plist_node_empty(&q->list));
+	plist_del(&q->list, &q->list.plist);
+
+	WARN_ON(!q->rt_waiter);
+	q->rt_waiter = NULL;
+
+	wake_up_state(q->task, TASK_NORMAL);
+}
+
+/**
+ * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
+ * @pifutex:		the user address of the to futex
+ * @hb1:		the from futex hash bucket, must be locked by the caller
+ * @hb2:		the to futex hash bucket, must be locked by the caller
+ * @key1:		the from futex key
+ * @key2:		the to futex key
+ * @ps:			address to store the pi_state pointer
+ * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)
+ *
+ * Try and get the lock on behalf of the top waiter if we can do it atomically.
+ * Wake the top waiter if we succeed.  If the caller specified set_waiters,
+ * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
+ * hb1 and hb2 must be held by the caller.
+ *
+ * Returns:
+ *  0 - failed to acquire the lock atomicly
+ *  1 - acquired the lock
+ * <0 - error
+ */
+static int futex_proxy_trylock_atomic(u32 __user *pifutex,
+				 struct futex_hash_bucket *hb1,
+				 struct futex_hash_bucket *hb2,
+				 union futex_key *key1, union futex_key *key2,
+				 struct futex_pi_state **ps, int set_waiters)
+{
+	struct futex_q *top_waiter = NULL;
+	u32 curval;
+	int ret;
+
+	if (get_futex_value_locked(&curval, pifutex))
+		return -EFAULT;
+
+	/*
+	 * Find the top_waiter and determine if there are additional waiters.
+	 * If the caller intends to requeue more than 1 waiter to pifutex,
+	 * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now,
+	 * as we have means to handle the possible fault.  If not, don't set
+	 * the bit unecessarily as it will force the subsequent unlock to enter
+	 * the kernel.
+	 */
+	top_waiter = futex_top_waiter(hb1, key1);
+
+	/* There are no waiters, nothing for us to do. */
+	if (!top_waiter)
+		return 0;
+
+	/*
+	 * Try to take the lock for top_waiter.  Set the FUTEX_WAITERS bit in
+	 * the contended case or if set_waiters is 1.  The pi_state is returned
+	 * in ps in contended cases.
+	 */
+	ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
+				   set_waiters);
+	if (ret == 1)
+		requeue_pi_wake_futex(top_waiter, key2);
+
+	return ret;
+}
+
+/**
+ * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
+ * uaddr1:	source futex user address
+ * uaddr2:	target futex user address
+ * nr_wake:	number of waiters to wake (must be 1 for requeue_pi)
+ * nr_requeue:	number of waiters to requeue (0-INT_MAX)
+ * requeue_pi:	if we are attempting to requeue from a non-pi futex to a
+ * 		pi futex (pi to pi requeue is not supported)
+ *
+ * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
+ * uaddr2 atomically on behalf of the top waiter.
+ *
+ * Returns:
+ * >=0 - on success, the number of tasks requeued or woken
+ *  <0 - on error
  */
 static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
-			 int nr_wake, int nr_requeue, u32 *cmpval)
+			 int nr_wake, int nr_requeue, u32 *cmpval,
+			 int requeue_pi)
 {
 	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
+	int drop_count = 0, task_count = 0, ret;
+	struct futex_pi_state *pi_state = NULL;
 	struct futex_hash_bucket *hb1, *hb2;
 	struct plist_head *head1;
 	struct futex_q *this, *next;
-	int ret, drop_count = 0;
+	u32 curval2;
+
+	if (requeue_pi) {
+		/*
+		 * requeue_pi requires a pi_state, try to allocate it now
+		 * without any locks in case it fails.
+		 */
+		if (refill_pi_state_cache())
+			return -ENOMEM;
+		/*
+		 * requeue_pi must wake as many tasks as it can, up to nr_wake
+		 * + nr_requeue, since it acquires the rt_mutex prior to
+		 * returning to userspace, so as to not leave the rt_mutex with
+		 * waiters and no owner.  However, second and third wake-ups
+		 * cannot be predicted as they involve race conditions with the
+		 * first wake and a fault while looking up the pi_state.  Both
+		 * pthread_cond_signal() and pthread_cond_broadcast() should
+		 * use nr_wake=1.
+		 */
+		if (nr_wake != 1)
+			return -EINVAL;
+	}
 
 retry:
+	if (pi_state != NULL) {
+		/*
+		 * We will have to lookup the pi_state again, so free this one
+		 * to keep the accounting correct.
+		 */
+		free_pi_state(pi_state);
+		pi_state = NULL;
+	}
+
 	ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
 	if (unlikely(ret != 0))
 		goto out;
-	ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_READ);
+	ret = get_futex_key(uaddr2, fshared, &key2,
+			    requeue_pi ? VERIFY_WRITE : VERIFY_READ);
 	if (unlikely(ret != 0))
 		goto out_put_key1;
 
@@ -854,32 +1172,99 @@
 		}
 	}
 
+	if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
+		/*
+		 * Attempt to acquire uaddr2 and wake the top waiter. If we
+		 * intend to requeue waiters, force setting the FUTEX_WAITERS
+		 * bit.  We force this here where we are able to easily handle
+		 * faults rather in the requeue loop below.
+		 */
+		ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
+						 &key2, &pi_state, nr_requeue);
+
+		/*
+		 * At this point the top_waiter has either taken uaddr2 or is
+		 * waiting on it.  If the former, then the pi_state will not
+		 * exist yet, look it up one more time to ensure we have a
+		 * reference to it.
+		 */
+		if (ret == 1) {
+			WARN_ON(pi_state);
+			task_count++;
+			ret = get_futex_value_locked(&curval2, uaddr2);
+			if (!ret)
+				ret = lookup_pi_state(curval2, hb2, &key2,
+						      &pi_state);
+		}
+
+		switch (ret) {
+		case 0:
+			break;
+		case -EFAULT:
+			double_unlock_hb(hb1, hb2);
+			put_futex_key(fshared, &key2);
+			put_futex_key(fshared, &key1);
+			ret = get_user(curval2, uaddr2);
+			if (!ret)
+				goto retry;
+			goto out;
+		case -EAGAIN:
+			/* The owner was exiting, try again. */
+			double_unlock_hb(hb1, hb2);
+			put_futex_key(fshared, &key2);
+			put_futex_key(fshared, &key1);
+			cond_resched();
+			goto retry;
+		default:
+			goto out_unlock;
+		}
+	}
+
 	head1 = &hb1->chain;
 	plist_for_each_entry_safe(this, next, head1, list) {
-		if (!match_futex (&this->key, &key1))
-			continue;
-		if (++ret <= nr_wake) {
-			wake_futex(this);
-		} else {
-			/*
-			 * If key1 and key2 hash to the same bucket, no need to
-			 * requeue.
-			 */
-			if (likely(head1 != &hb2->chain)) {
-				plist_del(&this->list, &hb1->chain);
-				plist_add(&this->list, &hb2->chain);
-				this->lock_ptr = &hb2->lock;
-#ifdef CONFIG_DEBUG_PI_LIST
-				this->list.plist.lock = &hb2->lock;
-#endif
-			}
-			this->key = key2;
-			get_futex_key_refs(&key2);
-			drop_count++;
+		if (task_count - nr_wake >= nr_requeue)
+			break;
 
-			if (ret - nr_wake >= nr_requeue)
-				break;
+		if (!match_futex(&this->key, &key1))
+			continue;
+
+		WARN_ON(!requeue_pi && this->rt_waiter);
+		WARN_ON(requeue_pi && !this->rt_waiter);
+
+		/*
+		 * Wake nr_wake waiters.  For requeue_pi, if we acquired the
+		 * lock, we already woke the top_waiter.  If not, it will be
+		 * woken by futex_unlock_pi().
+		 */
+		if (++task_count <= nr_wake && !requeue_pi) {
+			wake_futex(this);
+			continue;
 		}
+
+		/*
+		 * Requeue nr_requeue waiters and possibly one more in the case
+		 * of requeue_pi if we couldn't acquire the lock atomically.
+		 */
+		if (requeue_pi) {
+			/* Prepare the waiter to take the rt_mutex. */
+			atomic_inc(&pi_state->refcount);
+			this->pi_state = pi_state;
+			ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
+							this->rt_waiter,
+							this->task, 1);
+			if (ret == 1) {
+				/* We got the lock. */
+				requeue_pi_wake_futex(this, &key2);
+				continue;
+			} else if (ret) {
+				/* -EDEADLK */
+				this->pi_state = NULL;
+				free_pi_state(pi_state);
+				goto out_unlock;
+			}
+		}
+		requeue_futex(this, hb1, hb2, &key2);
+		drop_count++;
 	}
 
 out_unlock:
@@ -899,7 +1284,9 @@
 out_put_key1:
 	put_futex_key(fshared, &key1);
 out:
-	return ret;
+	if (pi_state != NULL)
+		free_pi_state(pi_state);
+	return ret ? ret : task_count;
 }
 
 /* The key must be already stored in q->key. */
@@ -907,8 +1294,6 @@
 {
 	struct futex_hash_bucket *hb;
 
-	init_waitqueue_head(&q->waiter);
-
 	get_futex_key_refs(&q->key);
 	hb = hash_futex(&q->key);
 	q->lock_ptr = &hb->lock;
@@ -1119,35 +1504,149 @@
  */
 #define FLAGS_SHARED		0x01
 #define FLAGS_CLOCKRT		0x02
+#define FLAGS_HAS_TIMEOUT	0x04
 
 static long futex_wait_restart(struct restart_block *restart);
 
-static int futex_wait(u32 __user *uaddr, int fshared,
-		      u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
+/**
+ * fixup_owner() - Post lock pi_state and corner case management
+ * @uaddr:	user address of the futex
+ * @fshared:	whether the futex is shared (1) or not (0)
+ * @q:		futex_q (contains pi_state and access to the rt_mutex)
+ * @locked:	if the attempt to take the rt_mutex succeeded (1) or not (0)
+ *
+ * After attempting to lock an rt_mutex, this function is called to cleanup
+ * the pi_state owner as well as handle race conditions that may allow us to
+ * acquire the lock. Must be called with the hb lock held.
+ *
+ * Returns:
+ *  1 - success, lock taken
+ *  0 - success, lock not taken
+ * <0 - on error (-EFAULT)
+ */
+static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
+		       int locked)
 {
-	struct task_struct *curr = current;
-	struct restart_block *restart;
-	DECLARE_WAITQUEUE(wait, curr);
-	struct futex_hash_bucket *hb;
-	struct futex_q q;
+	struct task_struct *owner;
+	int ret = 0;
+
+	if (locked) {
+		/*
+		 * Got the lock. We might not be the anticipated owner if we
+		 * did a lock-steal - fix up the PI-state in that case:
+		 */
+		if (q->pi_state->owner != current)
+			ret = fixup_pi_state_owner(uaddr, q, current, fshared);
+		goto out;
+	}
+
+	/*
+	 * Catch the rare case, where the lock was released when we were on the
+	 * way back before we locked the hash bucket.
+	 */
+	if (q->pi_state->owner == current) {
+		/*
+		 * Try to get the rt_mutex now. This might fail as some other
+		 * task acquired the rt_mutex after we removed ourself from the
+		 * rt_mutex waiters list.
+		 */
+		if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
+			locked = 1;
+			goto out;
+		}
+
+		/*
+		 * pi_state is incorrect, some other task did a lock steal and
+		 * we returned due to timeout or signal without taking the
+		 * rt_mutex. Too late. We can access the rt_mutex_owner without
+		 * locking, as the other task is now blocked on the hash bucket
+		 * lock. Fix the state up.
+		 */
+		owner = rt_mutex_owner(&q->pi_state->pi_mutex);
+		ret = fixup_pi_state_owner(uaddr, q, owner, fshared);
+		goto out;
+	}
+
+	/*
+	 * Paranoia check. If we did not take the lock, then we should not be
+	 * the owner, nor the pending owner, of the rt_mutex.
+	 */
+	if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
+		printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
+				"pi-state %p\n", ret,
+				q->pi_state->pi_mutex.owner,
+				q->pi_state->owner);
+
+out:
+	return ret ? ret : locked;
+}
+
+/**
+ * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
+ * @hb:		the futex hash bucket, must be locked by the caller
+ * @q:		the futex_q to queue up on
+ * @timeout:	the prepared hrtimer_sleeper, or null for no timeout
+ */
+static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
+				struct hrtimer_sleeper *timeout)
+{
+	queue_me(q, hb);
+
+	/*
+	 * There might have been scheduling since the queue_me(), as we
+	 * cannot hold a spinlock across the get_user() in case it
+	 * faults, and we cannot just set TASK_INTERRUPTIBLE state when
+	 * queueing ourselves into the futex hash. This code thus has to
+	 * rely on the futex_wake() code removing us from hash when it
+	 * wakes us up.
+	 */
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	/* Arm the timer */
+	if (timeout) {
+		hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
+		if (!hrtimer_active(&timeout->timer))
+			timeout->task = NULL;
+	}
+
+	/*
+	 * !plist_node_empty() is safe here without any lock.
+	 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
+	 */
+	if (likely(!plist_node_empty(&q->list))) {
+		/*
+		 * If the timer has already expired, current will already be
+		 * flagged for rescheduling. Only call schedule if there
+		 * is no timeout, or if it has yet to expire.
+		 */
+		if (!timeout || timeout->task)
+			schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+}
+
+/**
+ * futex_wait_setup() - Prepare to wait on a futex
+ * @uaddr:	the futex userspace address
+ * @val:	the expected value
+ * @fshared:	whether the futex is shared (1) or not (0)
+ * @q:		the associated futex_q
+ * @hb:		storage for hash_bucket pointer to be returned to caller
+ *
+ * Setup the futex_q and locate the hash_bucket.  Get the futex value and
+ * compare it with the expected value.  Handle atomic faults internally.
+ * Return with the hb lock held and a q.key reference on success, and unlocked
+ * with no q.key reference on failure.
+ *
+ * Returns:
+ *  0 - uaddr contains val and hb has been locked
+ * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
+ */
+static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
+			   struct futex_q *q, struct futex_hash_bucket **hb)
+{
 	u32 uval;
 	int ret;
-	struct hrtimer_sleeper t;
-	int rem = 0;
-
-	if (!bitset)
-		return -EINVAL;
-
-	q.pi_state = NULL;
-	q.bitset = bitset;
-retry:
-	q.key = FUTEX_KEY_INIT;
-	ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_READ);
-	if (unlikely(ret != 0))
-		goto out;
-
-retry_private:
-	hb = queue_lock(&q);
 
 	/*
 	 * Access the page AFTER the hash-bucket is locked.
@@ -1165,95 +1664,83 @@
 	 * A consequence is that futex_wait() can return zero and absorb
 	 * a wakeup when *uaddr != val on entry to the syscall.  This is
 	 * rare, but normal.
-	 *
-	 * For shared futexes, we hold the mmap semaphore, so the mapping
-	 * cannot have changed since we looked it up in get_futex_key.
 	 */
+retry:
+	q->key = FUTEX_KEY_INIT;
+	ret = get_futex_key(uaddr, fshared, &q->key, VERIFY_READ);
+	if (unlikely(ret != 0))
+		return ret;
+
+retry_private:
+	*hb = queue_lock(q);
+
 	ret = get_futex_value_locked(&uval, uaddr);
 
-	if (unlikely(ret)) {
-		queue_unlock(&q, hb);
+	if (ret) {
+		queue_unlock(q, *hb);
 
 		ret = get_user(uval, uaddr);
 		if (ret)
-			goto out_put_key;
+			goto out;
 
 		if (!fshared)
 			goto retry_private;
 
-		put_futex_key(fshared, &q.key);
+		put_futex_key(fshared, &q->key);
 		goto retry;
 	}
-	ret = -EWOULDBLOCK;
-	if (unlikely(uval != val)) {
-		queue_unlock(&q, hb);
-		goto out_put_key;
+
+	if (uval != val) {
+		queue_unlock(q, *hb);
+		ret = -EWOULDBLOCK;
 	}
 
-	/* Only actually queue if *uaddr contained val.  */
-	queue_me(&q, hb);
+out:
+	if (ret)
+		put_futex_key(fshared, &q->key);
+	return ret;
+}
 
-	/*
-	 * There might have been scheduling since the queue_me(), as we
-	 * cannot hold a spinlock across the get_user() in case it
-	 * faults, and we cannot just set TASK_INTERRUPTIBLE state when
-	 * queueing ourselves into the futex hash.  This code thus has to
-	 * rely on the futex_wake() code removing us from hash when it
-	 * wakes us up.
-	 */
+static int futex_wait(u32 __user *uaddr, int fshared,
+		      u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
+{
+	struct hrtimer_sleeper timeout, *to = NULL;
+	struct restart_block *restart;
+	struct futex_hash_bucket *hb;
+	struct futex_q q;
+	int ret;
 
-	/* add_wait_queue is the barrier after __set_current_state. */
-	__set_current_state(TASK_INTERRUPTIBLE);
-	add_wait_queue(&q.waiter, &wait);
-	/*
-	 * !plist_node_empty() is safe here without any lock.
-	 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
-	 */
-	if (likely(!plist_node_empty(&q.list))) {
-		if (!abs_time)
-			schedule();
-		else {
-			hrtimer_init_on_stack(&t.timer,
-					      clockrt ? CLOCK_REALTIME :
-					      CLOCK_MONOTONIC,
-					      HRTIMER_MODE_ABS);
-			hrtimer_init_sleeper(&t, current);
-			hrtimer_set_expires_range_ns(&t.timer, *abs_time,
-						     current->timer_slack_ns);
+	if (!bitset)
+		return -EINVAL;
 
-			hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
-			if (!hrtimer_active(&t.timer))
-				t.task = NULL;
+	q.pi_state = NULL;
+	q.bitset = bitset;
+	q.rt_waiter = NULL;
 
-			/*
-			 * the timer could have already expired, in which
-			 * case current would be flagged for rescheduling.
-			 * Don't bother calling schedule.
-			 */
-			if (likely(t.task))
-				schedule();
+	if (abs_time) {
+		to = &timeout;
 
-			hrtimer_cancel(&t.timer);
-
-			/* Flag if a timeout occured */
-			rem = (t.task == NULL);
-
-			destroy_hrtimer_on_stack(&t.timer);
-		}
+		hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
+				      CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+		hrtimer_init_sleeper(to, current);
+		hrtimer_set_expires_range_ns(&to->timer, *abs_time,
+					     current->timer_slack_ns);
 	}
-	__set_current_state(TASK_RUNNING);
 
-	/*
-	 * NOTE: we don't remove ourselves from the waitqueue because
-	 * we are the only user of it.
-	 */
+	/* Prepare to wait on uaddr. */
+	ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
+	if (ret)
+		goto out;
+
+	/* queue_me and wait for wakeup, timeout, or a signal. */
+	futex_wait_queue_me(hb, &q, to);
 
 	/* If we were woken (and unqueued), we succeeded, whatever. */
 	ret = 0;
 	if (!unqueue_me(&q))
 		goto out_put_key;
 	ret = -ETIMEDOUT;
-	if (rem)
+	if (to && !to->task)
 		goto out_put_key;
 
 	/*
@@ -1270,7 +1757,7 @@
 	restart->futex.val = val;
 	restart->futex.time = abs_time->tv64;
 	restart->futex.bitset = bitset;
-	restart->futex.flags = 0;
+	restart->futex.flags = FLAGS_HAS_TIMEOUT;
 
 	if (fshared)
 		restart->futex.flags |= FLAGS_SHARED;
@@ -1282,6 +1769,10 @@
 out_put_key:
 	put_futex_key(fshared, &q.key);
 out:
+	if (to) {
+		hrtimer_cancel(&to->timer);
+		destroy_hrtimer_on_stack(&to->timer);
+	}
 	return ret;
 }
 
@@ -1290,13 +1781,16 @@
 {
 	u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
 	int fshared = 0;
-	ktime_t t;
+	ktime_t t, *tp = NULL;
 
-	t.tv64 = restart->futex.time;
+	if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
+		t.tv64 = restart->futex.time;
+		tp = &t;
+	}
 	restart->fn = do_no_restart_syscall;
 	if (restart->futex.flags & FLAGS_SHARED)
 		fshared = 1;
-	return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
+	return (long)futex_wait(uaddr, fshared, restart->futex.val, tp,
 				restart->futex.bitset,
 				restart->futex.flags & FLAGS_CLOCKRT);
 }
@@ -1312,11 +1806,10 @@
 			 int detect, ktime_t *time, int trylock)
 {
 	struct hrtimer_sleeper timeout, *to = NULL;
-	struct task_struct *curr = current;
 	struct futex_hash_bucket *hb;
-	u32 uval, newval, curval;
+	u32 uval;
 	struct futex_q q;
-	int ret, lock_taken, ownerdied = 0;
+	int res, ret;
 
 	if (refill_pi_state_cache())
 		return -ENOMEM;
@@ -1330,6 +1823,7 @@
 	}
 
 	q.pi_state = NULL;
+	q.rt_waiter = NULL;
 retry:
 	q.key = FUTEX_KEY_INIT;
 	ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE);
@@ -1339,81 +1833,15 @@
 retry_private:
 	hb = queue_lock(&q);
 
-retry_locked:
-	ret = lock_taken = 0;
-
-	/*
-	 * To avoid races, we attempt to take the lock here again
-	 * (by doing a 0 -> TID atomic cmpxchg), while holding all
-	 * the locks. It will most likely not succeed.
-	 */
-	newval = task_pid_vnr(current);
-
-	curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
-
-	if (unlikely(curval == -EFAULT))
-		goto uaddr_faulted;
-
-	/*
-	 * Detect deadlocks. In case of REQUEUE_PI this is a valid
-	 * situation and we return success to user space.
-	 */
-	if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) {
-		ret = -EDEADLK;
-		goto out_unlock_put_key;
-	}
-
-	/*
-	 * Surprise - we got the lock. Just return to userspace:
-	 */
-	if (unlikely(!curval))
-		goto out_unlock_put_key;
-
-	uval = curval;
-
-	/*
-	 * Set the WAITERS flag, so the owner will know it has someone
-	 * to wake at next unlock
-	 */
-	newval = curval | FUTEX_WAITERS;
-
-	/*
-	 * There are two cases, where a futex might have no owner (the
-	 * owner TID is 0): OWNER_DIED. We take over the futex in this
-	 * case. We also do an unconditional take over, when the owner
-	 * of the futex died.
-	 *
-	 * This is safe as we are protected by the hash bucket lock !
-	 */
-	if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
-		/* Keep the OWNER_DIED bit */
-		newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(current);
-		ownerdied = 0;
-		lock_taken = 1;
-	}
-
-	curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
-
-	if (unlikely(curval == -EFAULT))
-		goto uaddr_faulted;
-	if (unlikely(curval != uval))
-		goto retry_locked;
-
-	/*
-	 * We took the lock due to owner died take over.
-	 */
-	if (unlikely(lock_taken))
-		goto out_unlock_put_key;
-
-	/*
-	 * We dont have the lock. Look up the PI state (or create it if
-	 * we are the first waiter):
-	 */
-	ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);
-
+	ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
 	if (unlikely(ret)) {
 		switch (ret) {
-
+		case 1:
+			/* We got the lock. */
+			ret = 0;
+			goto out_unlock_put_key;
+		case -EFAULT:
+			goto uaddr_faulted;
 		case -EAGAIN:
 			/*
 			 * Task is exiting and we just wait for the
@@ -1423,25 +1851,6 @@
 			put_futex_key(fshared, &q.key);
 			cond_resched();
 			goto retry;
-
-		case -ESRCH:
-			/*
-			 * No owner found for this futex. Check if the
-			 * OWNER_DIED bit is set to figure out whether
-			 * this is a robust futex or not.
-			 */
-			if (get_futex_value_locked(&curval, uaddr))
-				goto uaddr_faulted;
-
-			/*
-			 * We simply start over in case of a robust
-			 * futex. The code above will take the futex
-			 * and return happy.
-			 */
-			if (curval & FUTEX_OWNER_DIED) {
-				ownerdied = 1;
-				goto retry_locked;
-			}
 		default:
 			goto out_unlock_put_key;
 		}
@@ -1465,71 +1874,21 @@
 	}
 
 	spin_lock(q.lock_ptr);
-
-	if (!ret) {
-		/*
-		 * Got the lock. We might not be the anticipated owner
-		 * if we did a lock-steal - fix up the PI-state in
-		 * that case:
-		 */
-		if (q.pi_state->owner != curr)
-			ret = fixup_pi_state_owner(uaddr, &q, curr, fshared);
-	} else {
-		/*
-		 * Catch the rare case, where the lock was released
-		 * when we were on the way back before we locked the
-		 * hash bucket.
-		 */
-		if (q.pi_state->owner == curr) {
-			/*
-			 * Try to get the rt_mutex now. This might
-			 * fail as some other task acquired the
-			 * rt_mutex after we removed ourself from the
-			 * rt_mutex waiters list.
-			 */
-			if (rt_mutex_trylock(&q.pi_state->pi_mutex))
-				ret = 0;
-			else {
-				/*
-				 * pi_state is incorrect, some other
-				 * task did a lock steal and we
-				 * returned due to timeout or signal
-				 * without taking the rt_mutex. Too
-				 * late. We can access the
-				 * rt_mutex_owner without locking, as
-				 * the other task is now blocked on
-				 * the hash bucket lock. Fix the state
-				 * up.
-				 */
-				struct task_struct *owner;
-				int res;
-
-				owner = rt_mutex_owner(&q.pi_state->pi_mutex);
-				res = fixup_pi_state_owner(uaddr, &q, owner,
-							   fshared);
-
-				/* propagate -EFAULT, if the fixup failed */
-				if (res)
-					ret = res;
-			}
-		} else {
-			/*
-			 * Paranoia check. If we did not take the lock
-			 * in the trylock above, then we should not be
-			 * the owner of the rtmutex, neither the real
-			 * nor the pending one:
-			 */
-			if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr)
-				printk(KERN_ERR "futex_lock_pi: ret = %d "
-				       "pi-mutex: %p pi-state %p\n", ret,
-				       q.pi_state->pi_mutex.owner,
-				       q.pi_state->owner);
-		}
-	}
+	/*
+	 * Fixup the pi_state owner and possibly acquire the lock if we
+	 * haven't already.
+	 */
+	res = fixup_owner(uaddr, fshared, &q, !ret);
+	/*
+	 * If fixup_owner() returned an error, proprogate that.  If it acquired
+	 * the lock, clear our -ETIMEDOUT or -EINTR.
+	 */
+	if (res)
+		ret = (res < 0) ? res : 0;
 
 	/*
-	 * If fixup_pi_state_owner() faulted and was unable to handle the
-	 * fault, unlock it and return the fault to userspace.
+	 * If fixup_owner() faulted and was unable to handle the fault, unlock
+	 * it and return the fault to userspace.
 	 */
 	if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
 		rt_mutex_unlock(&q.pi_state->pi_mutex);
@@ -1537,9 +1896,7 @@
 	/* Unqueue and drop the lock */
 	unqueue_me_pi(&q);
 
-	if (to)
-		destroy_hrtimer_on_stack(&to->timer);
-	return ret != -EINTR ? ret : -ERESTARTNOINTR;
+	goto out;
 
 out_unlock_put_key:
 	queue_unlock(&q, hb);
@@ -1549,7 +1906,7 @@
 out:
 	if (to)
 		destroy_hrtimer_on_stack(&to->timer);
-	return ret;
+	return ret != -EINTR ? ret : -ERESTARTNOINTR;
 
 uaddr_faulted:
 	/*
@@ -1572,7 +1929,6 @@
 	goto retry;
 }
 
-
 /*
  * Userspace attempted a TID -> 0 atomic transition, and failed.
  * This is the in-kernel slowpath: we look up the PI state (if any),
@@ -1674,6 +2030,229 @@
 	return ret;
 }
 
+/**
+ * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex
+ * @hb:		the hash_bucket futex_q was original enqueued on
+ * @q:		the futex_q woken while waiting to be requeued
+ * @key2:	the futex_key of the requeue target futex
+ * @timeout:	the timeout associated with the wait (NULL if none)
+ *
+ * Detect if the task was woken on the initial futex as opposed to the requeue
+ * target futex.  If so, determine if it was a timeout or a signal that caused
+ * the wakeup and return the appropriate error code to the caller.  Must be
+ * called with the hb lock held.
+ *
+ * Returns
+ *  0 - no early wakeup detected
+ * <0 - -ETIMEDOUT or -ERESTARTNOINTR
+ */
+static inline
+int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
+				   struct futex_q *q, union futex_key *key2,
+				   struct hrtimer_sleeper *timeout)
+{
+	int ret = 0;
+
+	/*
+	 * With the hb lock held, we avoid races while we process the wakeup.
+	 * We only need to hold hb (and not hb2) to ensure atomicity as the
+	 * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
+	 * It can't be requeued from uaddr2 to something else since we don't
+	 * support a PI aware source futex for requeue.
+	 */
+	if (!match_futex(&q->key, key2)) {
+		WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
+		/*
+		 * We were woken prior to requeue by a timeout or a signal.
+		 * Unqueue the futex_q and determine which it was.
+		 */
+		plist_del(&q->list, &q->list.plist);
+		drop_futex_key_refs(&q->key);
+
+		if (timeout && !timeout->task)
+			ret = -ETIMEDOUT;
+		else
+			ret = -ERESTARTNOINTR;
+	}
+	return ret;
+}
+
+/**
+ * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
+ * @uaddr:	the futex we initialyl wait on (non-pi)
+ * @fshared:	whether the futexes are shared (1) or not (0).  They must be
+ * 		the same type, no requeueing from private to shared, etc.
+ * @val:	the expected value of uaddr
+ * @abs_time:	absolute timeout
+ * @bitset:	32 bit wakeup bitset set by userspace, defaults to all.
+ * @clockrt:	whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0)
+ * @uaddr2:	the pi futex we will take prior to returning to user-space
+ *
+ * The caller will wait on uaddr and will be requeued by futex_requeue() to
+ * uaddr2 which must be PI aware.  Normal wakeup will wake on uaddr2 and
+ * complete the acquisition of the rt_mutex prior to returning to userspace.
+ * This ensures the rt_mutex maintains an owner when it has waiters; without
+ * one, the pi logic wouldn't know which task to boost/deboost, if there was a
+ * need to.
+ *
+ * We call schedule in futex_wait_queue_me() when we enqueue and return there
+ * via the following:
+ * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
+ * 2) wakeup on uaddr2 after a requeue and subsequent unlock
+ * 3) signal (before or after requeue)
+ * 4) timeout (before or after requeue)
+ *
+ * If 3, we setup a restart_block with futex_wait_requeue_pi() as the function.
+ *
+ * If 2, we may then block on trying to take the rt_mutex and return via:
+ * 5) successful lock
+ * 6) signal
+ * 7) timeout
+ * 8) other lock acquisition failure
+ *
+ * If 6, we setup a restart_block with futex_lock_pi() as the function.
+ *
+ * If 4 or 7, we cleanup and return with -ETIMEDOUT.
+ *
+ * Returns:
+ *  0 - On success
+ * <0 - On error
+ */
+static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
+				 u32 val, ktime_t *abs_time, u32 bitset,
+				 int clockrt, u32 __user *uaddr2)
+{
+	struct hrtimer_sleeper timeout, *to = NULL;
+	struct rt_mutex_waiter rt_waiter;
+	struct rt_mutex *pi_mutex = NULL;
+	struct futex_hash_bucket *hb;
+	union futex_key key2;
+	struct futex_q q;
+	int res, ret;
+
+	if (!bitset)
+		return -EINVAL;
+
+	if (abs_time) {
+		to = &timeout;
+		hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
+				      CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+		hrtimer_init_sleeper(to, current);
+		hrtimer_set_expires_range_ns(&to->timer, *abs_time,
+					     current->timer_slack_ns);
+	}
+
+	/*
+	 * The waiter is allocated on our stack, manipulated by the requeue
+	 * code while we sleep on uaddr.
+	 */
+	debug_rt_mutex_init_waiter(&rt_waiter);
+	rt_waiter.task = NULL;
+
+	q.pi_state = NULL;
+	q.bitset = bitset;
+	q.rt_waiter = &rt_waiter;
+
+	key2 = FUTEX_KEY_INIT;
+	ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
+	if (unlikely(ret != 0))
+		goto out;
+
+	/* Prepare to wait on uaddr. */
+	ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
+	if (ret)
+		goto out_key2;
+
+	/* Queue the futex_q, drop the hb lock, wait for wakeup. */
+	futex_wait_queue_me(hb, &q, to);
+
+	spin_lock(&hb->lock);
+	ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
+	spin_unlock(&hb->lock);
+	if (ret)
+		goto out_put_keys;
+
+	/*
+	 * In order for us to be here, we know our q.key == key2, and since
+	 * we took the hb->lock above, we also know that futex_requeue() has
+	 * completed and we no longer have to concern ourselves with a wakeup
+	 * race with the atomic proxy lock acquition by the requeue code.
+	 */
+
+	/* Check if the requeue code acquired the second futex for us. */
+	if (!q.rt_waiter) {
+		/*
+		 * Got the lock. We might not be the anticipated owner if we
+		 * did a lock-steal - fix up the PI-state in that case.
+		 */
+		if (q.pi_state && (q.pi_state->owner != current)) {
+			spin_lock(q.lock_ptr);
+			ret = fixup_pi_state_owner(uaddr2, &q, current,
+						   fshared);
+			spin_unlock(q.lock_ptr);
+		}
+	} else {
+		/*
+		 * We have been woken up by futex_unlock_pi(), a timeout, or a
+		 * signal.  futex_unlock_pi() will not destroy the lock_ptr nor
+		 * the pi_state.
+		 */
+		WARN_ON(!&q.pi_state);
+		pi_mutex = &q.pi_state->pi_mutex;
+		ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
+		debug_rt_mutex_free_waiter(&rt_waiter);
+
+		spin_lock(q.lock_ptr);
+		/*
+		 * Fixup the pi_state owner and possibly acquire the lock if we
+		 * haven't already.
+		 */
+		res = fixup_owner(uaddr2, fshared, &q, !ret);
+		/*
+		 * If fixup_owner() returned an error, proprogate that.  If it
+		 * acquired the lock, clear our -ETIMEDOUT or -EINTR.
+		 */
+		if (res)
+			ret = (res < 0) ? res : 0;
+
+		/* Unqueue and drop the lock. */
+		unqueue_me_pi(&q);
+	}
+
+	/*
+	 * If fixup_pi_state_owner() faulted and was unable to handle the
+	 * fault, unlock the rt_mutex and return the fault to userspace.
+	 */
+	if (ret == -EFAULT) {
+		if (rt_mutex_owner(pi_mutex) == current)
+			rt_mutex_unlock(pi_mutex);
+	} else if (ret == -EINTR) {
+		/*
+		 * We've already been requeued, but we have no way to
+		 * restart by calling futex_lock_pi() directly. We
+		 * could restart the syscall, but that will look at
+		 * the user space value and return right away. So we
+		 * drop back with EWOULDBLOCK to tell user space that
+		 * "val" has been changed. That's the same what the
+		 * restart of the syscall would do in
+		 * futex_wait_setup().
+		 */
+		ret = -EWOULDBLOCK;
+	}
+
+out_put_keys:
+	put_futex_key(fshared, &q.key);
+out_key2:
+	put_futex_key(fshared, &key2);
+
+out:
+	if (to) {
+		hrtimer_cancel(&to->timer);
+		destroy_hrtimer_on_stack(&to->timer);
+	}
+	return ret;
+}
+
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
  * thread exit time.
@@ -1896,7 +2475,7 @@
 		fshared = 1;
 
 	clockrt = op & FUTEX_CLOCK_REALTIME;
-	if (clockrt && cmd != FUTEX_WAIT_BITSET)
+	if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
 		return -ENOSYS;
 
 	switch (cmd) {
@@ -1911,10 +2490,11 @@
 		ret = futex_wake(uaddr, fshared, val, val3);
 		break;
 	case FUTEX_REQUEUE:
-		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL);
+		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
 		break;
 	case FUTEX_CMP_REQUEUE:
-		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3);
+		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
+				    0);
 		break;
 	case FUTEX_WAKE_OP:
 		ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
@@ -1931,6 +2511,15 @@
 		if (futex_cmpxchg_enabled)
 			ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
 		break;
+	case FUTEX_WAIT_REQUEUE_PI:
+		val3 = FUTEX_BITSET_MATCH_ANY;
+		ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3,
+					    clockrt, uaddr2);
+		break;
+	case FUTEX_CMP_REQUEUE_PI:
+		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
+				    1);
+		break;
 	default:
 		ret = -ENOSYS;
 	}
@@ -1948,7 +2537,8 @@
 	int cmd = op & FUTEX_CMD_MASK;
 
 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
-		      cmd == FUTEX_WAIT_BITSET)) {
+		      cmd == FUTEX_WAIT_BITSET ||
+		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
 		if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
 			return -EFAULT;
 		if (!timespec_valid(&ts))
@@ -1960,11 +2550,11 @@
 		tp = &t;
 	}
 	/*
-	 * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE.
+	 * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*.
 	 * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
 	 */
 	if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
-	    cmd == FUTEX_WAKE_OP)
+	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
 		val2 = (u32) (unsigned long) utime;
 
 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 3394f8f..7d04780 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -3,5 +3,5 @@
 obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
-obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o
+obj-$(CONFIG_NUMA_IRQ_DESC) += numa_migrate.o
 obj-$(CONFIG_PM_SLEEP) += pm.o
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index c687ba4..13c68e7 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -359,7 +359,6 @@
 
 	spin_lock(&desc->lock);
 	mask_ack_irq(desc, irq);
-	desc = irq_remap_to_desc(irq, desc);
 
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
@@ -438,7 +437,6 @@
 	desc->status &= ~IRQ_INPROGRESS;
 out:
 	desc->chip->eoi(irq);
-	desc = irq_remap_to_desc(irq, desc);
 
 	spin_unlock(&desc->lock);
 }
@@ -475,7 +473,6 @@
 		    !desc->action)) {
 		desc->status |= (IRQ_PENDING | IRQ_MASKED);
 		mask_ack_irq(desc, irq);
-		desc = irq_remap_to_desc(irq, desc);
 		goto out_unlock;
 	}
 	kstat_incr_irqs_this_cpu(irq, desc);
@@ -483,7 +480,6 @@
 	/* Start handling the irq */
 	if (desc->chip->ack)
 		desc->chip->ack(irq);
-	desc = irq_remap_to_desc(irq, desc);
 
 	/* Mark the IRQ currently in progress.*/
 	desc->status |= IRQ_INPROGRESS;
@@ -544,10 +540,8 @@
 	if (!noirqdebug)
 		note_interrupt(irq, desc, action_ret);
 
-	if (desc->chip->eoi) {
+	if (desc->chip->eoi)
 		desc->chip->eoi(irq);
-		desc = irq_remap_to_desc(irq, desc);
-	}
 }
 
 void
@@ -582,10 +576,8 @@
 
 	/* Uninstall? */
 	if (handle == handle_bad_irq) {
-		if (desc->chip != &no_irq_chip) {
+		if (desc->chip != &no_irq_chip)
 			mask_ack_irq(desc, irq);
-			desc = irq_remap_to_desc(irq, desc);
-		}
 		desc->status |= IRQ_DISABLED;
 		desc->depth = 1;
 	}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 26e0875..065205b 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -11,14 +11,15 @@
  */
 
 #include <linux/irq.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/rculist.h>
 #include <linux/hash.h>
-#include <trace/irq.h>
 #include <linux/bootmem.h>
+#include <trace/events/irq.h>
 
 #include "internals.h"
 
@@ -44,7 +45,7 @@
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
 static void __init init_irq_default_affinity(void)
 {
-	alloc_bootmem_cpumask_var(&irq_default_affinity);
+	alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
 	cpumask_setall(irq_default_affinity);
 }
 #else
@@ -81,45 +82,48 @@
 	.lock       = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
 };
 
-void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
+void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr)
 {
-	int node;
 	void *ptr;
 
-	node = cpu_to_node(cpu);
-	ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), GFP_ATOMIC, node);
+	if (slab_is_available())
+		ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs),
+				   GFP_ATOMIC, node);
+	else
+		ptr = alloc_bootmem_node(NODE_DATA(node),
+				nr * sizeof(*desc->kstat_irqs));
 
 	/*
 	 * don't overwite if can not get new one
 	 * init_copy_kstat_irqs() could still use old one
 	 */
 	if (ptr) {
-		printk(KERN_DEBUG "  alloc kstat_irqs on cpu %d node %d\n",
-			 cpu, node);
+		printk(KERN_DEBUG "  alloc kstat_irqs on node %d\n", node);
 		desc->kstat_irqs = ptr;
 	}
 }
 
-static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
+static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
 {
 	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
 
 	spin_lock_init(&desc->lock);
 	desc->irq = irq;
 #ifdef CONFIG_SMP
-	desc->cpu = cpu;
+	desc->node = node;
 #endif
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
-	init_kstat_irqs(desc, cpu, nr_cpu_ids);
+	init_kstat_irqs(desc, node, nr_cpu_ids);
 	if (!desc->kstat_irqs) {
 		printk(KERN_ERR "can not alloc kstat_irqs\n");
 		BUG_ON(1);
 	}
-	if (!init_alloc_desc_masks(desc, cpu, false)) {
+	if (!alloc_desc_masks(desc, node, false)) {
 		printk(KERN_ERR "can not alloc irq_desc cpumasks\n");
 		BUG_ON(1);
 	}
-	arch_init_chip_data(desc, cpu);
+	init_desc_masks(desc);
+	arch_init_chip_data(desc, node);
 }
 
 /*
@@ -146,6 +150,7 @@
 {
 	struct irq_desc *desc;
 	int legacy_count;
+	int node;
 	int i;
 
 	init_irq_default_affinity();
@@ -156,20 +161,21 @@
 
 	desc = irq_desc_legacy;
 	legacy_count = ARRAY_SIZE(irq_desc_legacy);
+ 	node = first_online_node;
 
 	/* allocate irq_desc_ptrs array based on nr_irqs */
-	irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *));
+	irq_desc_ptrs = kcalloc(nr_irqs, sizeof(void *), GFP_NOWAIT);
 
 	/* allocate based on nr_cpu_ids */
-	/* FIXME: invert kstat_irgs, and it'd be a per_cpu_alloc'd thing */
-	kstat_irqs_legacy = alloc_bootmem(NR_IRQS_LEGACY * nr_cpu_ids *
-					  sizeof(int));
+	kstat_irqs_legacy = kzalloc_node(NR_IRQS_LEGACY * nr_cpu_ids *
+					  sizeof(int), GFP_NOWAIT, node);
 
 	for (i = 0; i < legacy_count; i++) {
 		desc[i].irq = i;
 		desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
 		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
-		init_alloc_desc_masks(&desc[i], 0, true);
+		alloc_desc_masks(&desc[i], node, true);
+		init_desc_masks(&desc[i]);
 		irq_desc_ptrs[i] = desc + i;
 	}
 
@@ -187,11 +193,10 @@
 	return NULL;
 }
 
-struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
 {
 	struct irq_desc *desc;
 	unsigned long flags;
-	int node;
 
 	if (irq >= nr_irqs) {
 		WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n",
@@ -210,15 +215,17 @@
 	if (desc)
 		goto out_unlock;
 
-	node = cpu_to_node(cpu);
-	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
-	printk(KERN_DEBUG "  alloc irq_desc for %d on cpu %d node %d\n",
-		 irq, cpu, node);
+	if (slab_is_available())
+		desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
+	else
+		desc = alloc_bootmem_node(NODE_DATA(node), sizeof(*desc));
+
+	printk(KERN_DEBUG "  alloc irq_desc for %d on node %d\n", irq, node);
 	if (!desc) {
 		printk(KERN_ERR "can not alloc irq_desc\n");
 		BUG_ON(1);
 	}
-	init_one_irq_desc(irq, desc, cpu);
+	init_one_irq_desc(irq, desc, node);
 
 	irq_desc_ptrs[irq] = desc;
 
@@ -256,7 +263,8 @@
 
 	for (i = 0; i < count; i++) {
 		desc[i].irq = i;
-		init_alloc_desc_masks(&desc[i], 0, true);
+		alloc_desc_masks(&desc[i], 0, true);
+		init_desc_masks(&desc[i]);
 		desc[i].kstat_irqs = kstat_irqs_all[i];
 	}
 	return arch_early_irq_init();
@@ -267,7 +275,7 @@
 	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
 }
 
-struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
 {
 	return irq_to_desc(irq);
 }
@@ -348,9 +356,6 @@
 	       "but no thread function available.", irq, action->name);
 }
 
-DEFINE_TRACE(irq_handler_entry);
-DEFINE_TRACE(irq_handler_exit);
-
 /**
  * handle_IRQ_event - irq action chain handler
  * @irq:	the interrupt number
@@ -453,11 +458,8 @@
 		/*
 		 * No locking required for CPU-local interrupts:
 		 */
-		if (desc->chip->ack) {
+		if (desc->chip->ack)
 			desc->chip->ack(irq);
-			/* get new one */
-			desc = irq_remap_to_desc(irq, desc);
-		}
 		if (likely(!(desc->status & IRQ_DISABLED))) {
 			action_ret = handle_IRQ_event(irq, desc->action);
 			if (!noirqdebug)
@@ -468,10 +470,8 @@
 	}
 
 	spin_lock(&desc->lock);
-	if (desc->chip->ack) {
+	if (desc->chip->ack)
 		desc->chip->ack(irq);
-		desc = irq_remap_to_desc(irq, desc);
-	}
 	/*
 	 * REPLAY is when Linux resends an IRQ that was dropped earlier
 	 * WAITING is used by probe to mark irqs that are being tested
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 01ce20e..7346825 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -16,7 +16,7 @@
 extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
 
 extern struct lock_class_key irq_desc_lock_class;
-extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr);
+extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
 extern void clear_kstat_irqs(struct irq_desc *desc);
 extern spinlock_t sparse_irq_lock;
 
@@ -42,6 +42,9 @@
 
 extern int irq_select_affinity_usr(unsigned int irq);
 
+extern void
+irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask);
+
 /*
  * Debugging printout:
  */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 2734eca..aaf5c9d 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -80,7 +80,7 @@
 	return 1;
 }
 
-static void
+void
 irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
 {
 	struct irqaction *action = desc->action;
@@ -109,17 +109,22 @@
 	spin_lock_irqsave(&desc->lock, flags);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-	if (desc->status & IRQ_MOVE_PCNTXT)
-		desc->chip->set_affinity(irq, cpumask);
+	if (desc->status & IRQ_MOVE_PCNTXT) {
+		if (!desc->chip->set_affinity(irq, cpumask)) {
+			cpumask_copy(desc->affinity, cpumask);
+			irq_set_thread_affinity(desc, cpumask);
+		}
+	}
 	else {
 		desc->status |= IRQ_MOVE_PENDING;
 		cpumask_copy(desc->pending_mask, cpumask);
 	}
 #else
-	cpumask_copy(desc->affinity, cpumask);
-	desc->chip->set_affinity(irq, cpumask);
+	if (!desc->chip->set_affinity(irq, cpumask)) {
+		cpumask_copy(desc->affinity, cpumask);
+		irq_set_thread_affinity(desc, cpumask);
+	}
 #endif
-	irq_set_thread_affinity(desc, cpumask);
 	desc->status |= IRQ_AFFINITY_SET;
 	spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index e05ad9b..cfe767c 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -1,5 +1,8 @@
 
 #include <linux/irq.h>
+#include <linux/interrupt.h>
+
+#include "internals.h"
 
 void move_masked_irq(int irq)
 {
@@ -39,11 +42,12 @@
 	 * masking the irqs.
 	 */
 	if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask)
-		   < nr_cpu_ids)) {
-		cpumask_and(desc->affinity,
-			    desc->pending_mask, cpu_online_mask);
-		desc->chip->set_affinity(irq, desc->affinity);
-	}
+		   < nr_cpu_ids))
+		if (!desc->chip->set_affinity(irq, desc->pending_mask)) {
+			cpumask_copy(desc->affinity, desc->pending_mask);
+			irq_set_thread_affinity(desc, desc->pending_mask);
+		}
+
 	cpumask_clear(desc->pending_mask);
 }
 
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 44bbdcb..2f69bee 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -15,9 +15,9 @@
 
 static void init_copy_kstat_irqs(struct irq_desc *old_desc,
 				 struct irq_desc *desc,
-				 int cpu, int nr)
+				 int node, int nr)
 {
-	init_kstat_irqs(desc, cpu, nr);
+	init_kstat_irqs(desc, node, nr);
 
 	if (desc->kstat_irqs != old_desc->kstat_irqs)
 		memcpy(desc->kstat_irqs, old_desc->kstat_irqs,
@@ -34,20 +34,20 @@
 }
 
 static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
-		 struct irq_desc *desc, int cpu)
+		 struct irq_desc *desc, int node)
 {
 	memcpy(desc, old_desc, sizeof(struct irq_desc));
-	if (!init_alloc_desc_masks(desc, cpu, false)) {
+	if (!alloc_desc_masks(desc, node, false)) {
 		printk(KERN_ERR "irq %d: can not get new irq_desc cpumask "
 				"for migration.\n", irq);
 		return false;
 	}
 	spin_lock_init(&desc->lock);
-	desc->cpu = cpu;
+	desc->node = node;
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
-	init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
+	init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
 	init_copy_desc_masks(old_desc, desc);
-	arch_init_copy_chip_data(old_desc, desc, cpu);
+	arch_init_copy_chip_data(old_desc, desc, node);
 	return true;
 }
 
@@ -59,12 +59,11 @@
 }
 
 static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
-						int cpu)
+						int node)
 {
 	struct irq_desc *desc;
 	unsigned int irq;
 	unsigned long flags;
-	int node;
 
 	irq = old_desc->irq;
 
@@ -76,7 +75,6 @@
 	if (desc && old_desc != desc)
 		goto out_unlock;
 
-	node = cpu_to_node(cpu);
 	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
 	if (!desc) {
 		printk(KERN_ERR "irq %d: can not get new irq_desc "
@@ -85,7 +83,7 @@
 		desc = old_desc;
 		goto out_unlock;
 	}
-	if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) {
+	if (!init_copy_one_irq_desc(irq, old_desc, desc, node)) {
 		/* still use old one */
 		kfree(desc);
 		desc = old_desc;
@@ -97,9 +95,7 @@
 
 	/* free the old one */
 	free_one_irq_desc(old_desc, desc);
-	spin_unlock(&old_desc->lock);
 	kfree(old_desc);
-	spin_lock(&desc->lock);
 
 	return desc;
 
@@ -109,24 +105,14 @@
 	return desc;
 }
 
-struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu)
+struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
 {
-	int old_cpu;
-	int node, old_node;
-
 	/* those all static, do move them */
 	if (desc->irq < NR_IRQS_LEGACY)
 		return desc;
 
-	old_cpu = desc->cpu;
-	if (old_cpu != cpu) {
-		node = cpu_to_node(cpu);
-		old_node = cpu_to_node(old_cpu);
-		if (old_node != node)
-			desc = __real_move_irq_desc(desc, cpu);
-		else
-			desc->cpu = cpu;
-	}
+	if (desc->node != node)
+		desc = __real_move_irq_desc(desc, node);
 
 	return desc;
 }
diff --git a/kernel/kexec.c b/kernel/kexec.c
index e498377..ae1c352 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1448,17 +1448,17 @@
 			goto Restore_console;
 		}
 		suspend_console();
-		error = device_suspend(PMSG_FREEZE);
+		error = dpm_suspend_start(PMSG_FREEZE);
 		if (error)
 			goto Resume_console;
-		/* At this point, device_suspend() has been called,
-		 * but *not* device_power_down(). We *must*
-		 * device_power_down() now.  Otherwise, drivers for
+		/* At this point, dpm_suspend_start() has been called,
+		 * but *not* dpm_suspend_noirq(). We *must* call
+		 * dpm_suspend_noirq() now.  Otherwise, drivers for
 		 * some devices (e.g. interrupt controllers) become
 		 * desynchronized with the actual state of the
 		 * hardware at resume time, and evil weirdness ensues.
 		 */
-		error = device_power_down(PMSG_FREEZE);
+		error = dpm_suspend_noirq(PMSG_FREEZE);
 		if (error)
 			goto Resume_devices;
 		error = disable_nonboot_cpus();
@@ -1486,9 +1486,9 @@
 		local_irq_enable();
  Enable_cpus:
 		enable_nonboot_cpus();
-		device_power_up(PMSG_RESTORE);
+		dpm_resume_noirq(PMSG_RESTORE);
  Resume_devices:
-		device_resume(PMSG_RESTORE);
+		dpm_resume_end(PMSG_RESTORE);
  Resume_console:
 		resume_console();
 		thaw_processes();
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4ebaf85..41c88fe 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -13,7 +13,7 @@
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #define KTHREAD_NICE_LEVEL (-5)
 
@@ -21,9 +21,6 @@
 static LIST_HEAD(kthread_create_list);
 struct task_struct *kthreadd_task;
 
-DEFINE_TRACE(sched_kthread_stop);
-DEFINE_TRACE(sched_kthread_stop_ret);
-
 struct kthread_create_info
 {
 	/* Information passed to kthread() from kthreadd. */
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index accb40c..8bbeef9 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -42,12 +42,14 @@
 #include <linux/hash.h>
 #include <linux/ftrace.h>
 #include <linux/stringify.h>
-#include <trace/lockdep.h>
 
 #include <asm/sections.h>
 
 #include "lockdep_internals.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/lockdep.h>
+
 #ifdef CONFIG_PROVE_LOCKING
 int prove_locking = 1;
 module_param(prove_locking, int, 0644);
@@ -2935,8 +2937,6 @@
 }
 EXPORT_SYMBOL_GPL(lock_set_class);
 
-DEFINE_TRACE(lock_acquire);
-
 /*
  * We are not always called with irqs disabled - do that here,
  * and also avoid lockdep recursion:
@@ -2963,8 +2963,6 @@
 }
 EXPORT_SYMBOL_GPL(lock_acquire);
 
-DEFINE_TRACE(lock_release);
-
 void lock_release(struct lockdep_map *lock, int nested,
 			  unsigned long ip)
 {
@@ -3105,6 +3103,8 @@
 		hlock->holdtime_stamp = now;
 	}
 
+	trace_lock_acquired(lock, ip, waittime);
+
 	stats = get_lock_stats(hlock_class(hlock));
 	if (waittime) {
 		if (hlock->read)
@@ -3120,8 +3120,6 @@
 	lock->ip = ip;
 }
 
-DEFINE_TRACE(lock_contended);
-
 void lock_contended(struct lockdep_map *lock, unsigned long ip)
 {
 	unsigned long flags;
@@ -3143,14 +3141,10 @@
 }
 EXPORT_SYMBOL_GPL(lock_contended);
 
-DEFINE_TRACE(lock_acquired);
-
 void lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
 	unsigned long flags;
 
-	trace_lock_acquired(lock, ip);
-
 	if (unlikely(!lock_stat))
 		return;
 
diff --git a/kernel/module.c b/kernel/module.c
index e797812..e4ab36c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -18,6 +18,7 @@
 */
 #include <linux/module.h>
 #include <linux/moduleloader.h>
+#include <linux/ftrace_event.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
 #include <linux/fs.h>
@@ -52,6 +53,7 @@
 #include <linux/ftrace.h>
 #include <linux/async.h>
 #include <linux/percpu.h>
+#include <linux/kmemleak.h>
 
 #if 0
 #define DEBUGP printk
@@ -72,6 +74,9 @@
 EXPORT_SYMBOL_GPL(module_mutex);
 static LIST_HEAD(modules);
 
+/* Block module loading/unloading? */
+int modules_disabled = 0;
+
 /* Waiting for a module to finish initializing? */
 static DECLARE_WAIT_QUEUE_HEAD(module_wq);
 
@@ -429,6 +434,7 @@
 	unsigned long extra;
 	unsigned int i;
 	void *ptr;
+	int cpu;
 
 	if (align > PAGE_SIZE) {
 		printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
@@ -458,6 +464,11 @@
 			if (!split_block(i, size))
 				return NULL;
 
+		/* add the per-cpu scanning areas */
+		for_each_possible_cpu(cpu)
+			kmemleak_alloc(ptr + per_cpu_offset(cpu), size, 0,
+				       GFP_KERNEL);
+
 		/* Mark allocated */
 		pcpu_size[i] = -pcpu_size[i];
 		return ptr;
@@ -472,6 +483,7 @@
 {
 	unsigned int i;
 	void *ptr = __per_cpu_start + block_size(pcpu_size[0]);
+	int cpu;
 
 	/* First entry is core kernel percpu data. */
 	for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
@@ -483,6 +495,10 @@
 	BUG();
 
  free:
+	/* remove the per-cpu scanning areas */
+	for_each_possible_cpu(cpu)
+		kmemleak_free(freeme + per_cpu_offset(cpu));
+
 	/* Merge with previous? */
 	if (pcpu_size[i-1] >= 0) {
 		pcpu_size[i-1] += pcpu_size[i];
@@ -777,7 +793,7 @@
 	char name[MODULE_NAME_LEN];
 	int ret, forced = 0;
 
-	if (!capable(CAP_SYS_MODULE))
+	if (!capable(CAP_SYS_MODULE) || modules_disabled)
 		return -EPERM;
 
 	if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0)
@@ -1489,9 +1505,6 @@
 	/* Free any allocated parameters. */
 	destroy_params(mod->kp, mod->num_kp);
 
-	/* release any pointers to mcount in this module */
-	ftrace_release(mod->module_core, mod->core_size);
-
 	/* This may be NULL, but that's OK */
 	module_free(mod, mod->module_init);
 	kfree(mod->args);
@@ -1878,6 +1891,36 @@
 	return ret;
 }
 
+#ifdef CONFIG_DEBUG_KMEMLEAK
+static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
+				 Elf_Shdr *sechdrs, char *secstrings)
+{
+	unsigned int i;
+
+	/* only scan the sections containing data */
+	kmemleak_scan_area(mod->module_core, (unsigned long)mod -
+			   (unsigned long)mod->module_core,
+			   sizeof(struct module), GFP_KERNEL);
+
+	for (i = 1; i < hdr->e_shnum; i++) {
+		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+			continue;
+		if (strncmp(secstrings + sechdrs[i].sh_name, ".data", 5) != 0
+		    && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0)
+			continue;
+
+		kmemleak_scan_area(mod->module_core, sechdrs[i].sh_addr -
+				   (unsigned long)mod->module_core,
+				   sechdrs[i].sh_size, GFP_KERNEL);
+	}
+}
+#else
+static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
+					Elf_Shdr *sechdrs, char *secstrings)
+{
+}
+#endif
+
 /* Allocate and load the module: note that size of section 0 is always
    zero, and we rely on this for optional sections. */
 static noinline struct module *load_module(void __user *umod,
@@ -1892,11 +1935,9 @@
 	unsigned int symindex = 0;
 	unsigned int strindex = 0;
 	unsigned int modindex, versindex, infoindex, pcpuindex;
-	unsigned int num_mcount;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
-	unsigned long *mseg;
 	mm_segment_t old_fs;
 
 	DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2050,6 +2091,12 @@
 
 	/* Do the allocs. */
 	ptr = module_alloc_update_bounds(mod->core_size);
+	/*
+	 * The pointer to this block is stored in the module structure
+	 * which is inside the block. Just mark it as not being a
+	 * leak.
+	 */
+	kmemleak_not_leak(ptr);
 	if (!ptr) {
 		err = -ENOMEM;
 		goto free_percpu;
@@ -2058,6 +2105,13 @@
 	mod->module_core = ptr;
 
 	ptr = module_alloc_update_bounds(mod->init_size);
+	/*
+	 * The pointer to this block is stored in the module structure
+	 * which is inside the block. This block doesn't need to be
+	 * scanned as it contains data and code that will be freed
+	 * after the module is initialized.
+	 */
+	kmemleak_ignore(ptr);
 	if (!ptr && mod->init_size) {
 		err = -ENOMEM;
 		goto free_core;
@@ -2088,6 +2142,7 @@
 	}
 	/* Module has been moved. */
 	mod = (void *)sechdrs[modindex].sh_addr;
+	kmemleak_load_module(mod, hdr, sechdrs, secstrings);
 
 #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
 	mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t),
@@ -2172,7 +2227,19 @@
 					sizeof(*mod->tracepoints),
 					&mod->num_tracepoints);
 #endif
-
+#ifdef CONFIG_EVENT_TRACING
+	mod->trace_events = section_objs(hdr, sechdrs, secstrings,
+					 "_ftrace_events",
+					 sizeof(*mod->trace_events),
+					 &mod->num_trace_events);
+#endif
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+	/* sechdrs[0].sh_size is always zero */
+	mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings,
+					     "__mcount_loc",
+					     sizeof(*mod->ftrace_callsites),
+					     &mod->num_ftrace_callsites);
+#endif
 #ifdef CONFIG_MODVERSIONS
 	if ((mod->num_syms && !mod->crcs)
 	    || (mod->num_gpl_syms && !mod->gpl_crcs)
@@ -2237,11 +2304,6 @@
 			dynamic_debug_setup(debug, num_debug);
 	}
 
-	/* sechdrs[0].sh_size is always zero */
-	mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
-			    sizeof(*mseg), &num_mcount);
-	ftrace_init_module(mod, mseg, mseg + num_mcount);
-
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
 		goto cleanup;
@@ -2302,7 +2364,6 @@
  cleanup:
 	kobject_del(&mod->mkobj.kobj);
 	kobject_put(&mod->mkobj.kobj);
-	ftrace_release(mod->module_core, mod->core_size);
  free_unload:
 	module_unload_free(mod);
 #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
@@ -2336,7 +2397,7 @@
 	int ret = 0;
 
 	/* Must have permission */
-	if (!capable(CAP_SYS_MODULE))
+	if (!capable(CAP_SYS_MODULE) || modules_disabled)
 		return -EPERM;
 
 	/* Only one module load at a time, please */
@@ -2394,6 +2455,7 @@
 	mutex_lock(&module_mutex);
 	/* Drop initial reference. */
 	module_put(mod);
+	trim_init_extable(mod);
 	module_free(mod, mod->module_init);
 	mod->module_init = NULL;
 	mod->init_size = 0;
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 507cf2b..947b3ad 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -89,7 +89,7 @@
  *
  * This function is similar to (but not equivalent to) down().
  */
-void inline __sched mutex_lock(struct mutex *lock)
+void __sched mutex_lock(struct mutex *lock)
 {
 	might_sleep();
 	/*
@@ -249,7 +249,9 @@
 
 		/* didnt get the lock, go to sleep: */
 		spin_unlock_mutex(&lock->wait_lock, flags);
-		__schedule();
+		preempt_enable_no_resched();
+		schedule();
+		preempt_disable();
 		spin_lock_mutex(&lock->wait_lock, flags);
 	}
 
@@ -471,5 +473,28 @@
 
 	return ret;
 }
-
 EXPORT_SYMBOL(mutex_trylock);
+
+/**
+ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
+ * @cnt: the atomic which we are to dec
+ * @lock: the mutex to return holding if we dec to 0
+ *
+ * return true and hold lock if we dec to 0, return false otherwise
+ */
+int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
+{
+	/* dec if we can't possibly hit 0 */
+	if (atomic_add_unless(cnt, -1, 1))
+		return 0;
+	/* we might hit 0, so take the lock */
+	mutex_lock(lock);
+	if (!atomic_dec_and_test(cnt)) {
+		/* when we actually did the dec, we didn't hit 0 */
+		mutex_unlock(lock);
+		return 0;
+	}
+	/* we hit 0, and we hold the lock */
+	return 1;
+}
+EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
diff --git a/kernel/params.c b/kernel/params.c
index de273ec..7f6912c 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -24,9 +24,6 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 
-/* We abuse the high bits of "perm" to record whether we kmalloc'ed. */
-#define KPARAM_KMALLOCED	0x80000000
-
 #if 0
 #define DEBUGP printk
 #else
@@ -220,13 +217,13 @@
 		return -ENOSPC;
 	}
 
-	if (kp->perm & KPARAM_KMALLOCED)
+	if (kp->flags & KPARAM_KMALLOCED)
 		kfree(*(char **)kp->arg);
 
 	/* This is a hack.  We can't need to strdup in early boot, and we
 	 * don't need to; this mangled commandline is preserved. */
 	if (slab_is_available()) {
-		kp->perm |= KPARAM_KMALLOCED;
+		kp->flags |= KPARAM_KMALLOCED;
 		*(char **)kp->arg = kstrdup(val, GFP_KERNEL);
 		if (!kp->arg)
 			return -ENOMEM;
@@ -241,44 +238,63 @@
 	return sprintf(buffer, "%s", *((char **)kp->arg));
 }
 
+/* Actually could be a bool or an int, for historical reasons. */
 int param_set_bool(const char *val, struct kernel_param *kp)
 {
+	bool v;
+
 	/* No equals means "set"... */
 	if (!val) val = "1";
 
 	/* One of =[yYnN01] */
 	switch (val[0]) {
 	case 'y': case 'Y': case '1':
-		*(int *)kp->arg = 1;
-		return 0;
+		v = true;
+		break;
 	case 'n': case 'N': case '0':
-		*(int *)kp->arg = 0;
-		return 0;
+		v = false;
+		break;
+	default:
+		return -EINVAL;
 	}
-	return -EINVAL;
+
+	if (kp->flags & KPARAM_ISBOOL)
+		*(bool *)kp->arg = v;
+	else
+		*(int *)kp->arg = v;
+	return 0;
 }
 
 int param_get_bool(char *buffer, struct kernel_param *kp)
 {
+	bool val;
+	if (kp->flags & KPARAM_ISBOOL)
+		val = *(bool *)kp->arg;
+	else
+		val = *(int *)kp->arg;
+
 	/* Y and N chosen as being relatively non-coder friendly */
-	return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'Y' : 'N');
+	return sprintf(buffer, "%c", val ? 'Y' : 'N');
 }
 
+/* This one must be bool. */
 int param_set_invbool(const char *val, struct kernel_param *kp)
 {
-	int boolval, ret;
+	int ret;
+	bool boolval;
 	struct kernel_param dummy;
 
 	dummy.arg = &boolval;
+	dummy.flags = KPARAM_ISBOOL;
 	ret = param_set_bool(val, &dummy);
 	if (ret == 0)
-		*(int *)kp->arg = !boolval;
+		*(bool *)kp->arg = !boolval;
 	return ret;
 }
 
 int param_get_invbool(char *buffer, struct kernel_param *kp)
 {
-	return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'N' : 'Y');
+	return sprintf(buffer, "%c", (*(bool *)kp->arg) ? 'N' : 'Y');
 }
 
 /* We break the rule and mangle the string. */
@@ -591,7 +607,7 @@
 	unsigned int i;
 
 	for (i = 0; i < num; i++)
-		if (params[i].perm & KPARAM_KMALLOCED)
+		if (params[i].flags & KPARAM_KMALLOCED)
 			kfree(*(char **)params[i].arg);
 }
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
new file mode 100644
index 0000000..29b685f
--- /dev/null
+++ b/kernel/perf_counter.c
@@ -0,0 +1,4339 @@
+/*
+ * Performance counter core code
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ *  For licensing details see kernel-base/COPYING
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/sysfs.h>
+#include <linux/dcache.h>
+#include <linux/percpu.h>
+#include <linux/ptrace.h>
+#include <linux/vmstat.h>
+#include <linux/hardirq.h>
+#include <linux/rculist.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/anon_inodes.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_counter.h>
+
+#include <asm/irq_regs.h>
+
+/*
+ * Each CPU has a list of per CPU counters:
+ */
+DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
+
+int perf_max_counters __read_mostly = 1;
+static int perf_reserved_percpu __read_mostly;
+static int perf_overcommit __read_mostly = 1;
+
+static atomic_t nr_counters __read_mostly;
+static atomic_t nr_mmap_counters __read_mostly;
+static atomic_t nr_comm_counters __read_mostly;
+
+/*
+ * perf counter paranoia level:
+ *  0 - not paranoid
+ *  1 - disallow cpu counters to unpriv
+ *  2 - disallow kernel profiling to unpriv
+ */
+int sysctl_perf_counter_paranoid __read_mostly;
+
+static inline bool perf_paranoid_cpu(void)
+{
+	return sysctl_perf_counter_paranoid > 0;
+}
+
+static inline bool perf_paranoid_kernel(void)
+{
+	return sysctl_perf_counter_paranoid > 1;
+}
+
+int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
+
+/*
+ * max perf counter sample rate
+ */
+int sysctl_perf_counter_sample_rate __read_mostly = 100000;
+
+static atomic64_t perf_counter_id;
+
+/*
+ * Lock for (sysadmin-configurable) counter reservations:
+ */
+static DEFINE_SPINLOCK(perf_resource_lock);
+
+/*
+ * Architecture provided APIs - weak aliases:
+ */
+extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
+{
+	return NULL;
+}
+
+void __weak hw_perf_disable(void)		{ barrier(); }
+void __weak hw_perf_enable(void)		{ barrier(); }
+
+void __weak hw_perf_counter_setup(int cpu)	{ barrier(); }
+
+int __weak
+hw_perf_group_sched_in(struct perf_counter *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_counter_context *ctx, int cpu)
+{
+	return 0;
+}
+
+void __weak perf_counter_print_debug(void)	{ }
+
+static DEFINE_PER_CPU(int, disable_count);
+
+void __perf_disable(void)
+{
+	__get_cpu_var(disable_count)++;
+}
+
+bool __perf_enable(void)
+{
+	return !--__get_cpu_var(disable_count);
+}
+
+void perf_disable(void)
+{
+	__perf_disable();
+	hw_perf_disable();
+}
+
+void perf_enable(void)
+{
+	if (__perf_enable())
+		hw_perf_enable();
+}
+
+static void get_ctx(struct perf_counter_context *ctx)
+{
+	atomic_inc(&ctx->refcount);
+}
+
+static void free_ctx(struct rcu_head *head)
+{
+	struct perf_counter_context *ctx;
+
+	ctx = container_of(head, struct perf_counter_context, rcu_head);
+	kfree(ctx);
+}
+
+static void put_ctx(struct perf_counter_context *ctx)
+{
+	if (atomic_dec_and_test(&ctx->refcount)) {
+		if (ctx->parent_ctx)
+			put_ctx(ctx->parent_ctx);
+		if (ctx->task)
+			put_task_struct(ctx->task);
+		call_rcu(&ctx->rcu_head, free_ctx);
+	}
+}
+
+/*
+ * Get the perf_counter_context for a task and lock it.
+ * This has to cope with with the fact that until it is locked,
+ * the context could get moved to another task.
+ */
+static struct perf_counter_context *
+perf_lock_task_context(struct task_struct *task, unsigned long *flags)
+{
+	struct perf_counter_context *ctx;
+
+	rcu_read_lock();
+ retry:
+	ctx = rcu_dereference(task->perf_counter_ctxp);
+	if (ctx) {
+		/*
+		 * If this context is a clone of another, it might
+		 * get swapped for another underneath us by
+		 * perf_counter_task_sched_out, though the
+		 * rcu_read_lock() protects us from any context
+		 * getting freed.  Lock the context and check if it
+		 * got swapped before we could get the lock, and retry
+		 * if so.  If we locked the right context, then it
+		 * can't get swapped on us any more.
+		 */
+		spin_lock_irqsave(&ctx->lock, *flags);
+		if (ctx != rcu_dereference(task->perf_counter_ctxp)) {
+			spin_unlock_irqrestore(&ctx->lock, *flags);
+			goto retry;
+		}
+	}
+	rcu_read_unlock();
+	return ctx;
+}
+
+/*
+ * Get the context for a task and increment its pin_count so it
+ * can't get swapped to another task.  This also increments its
+ * reference count so that the context can't get freed.
+ */
+static struct perf_counter_context *perf_pin_task_context(struct task_struct *task)
+{
+	struct perf_counter_context *ctx;
+	unsigned long flags;
+
+	ctx = perf_lock_task_context(task, &flags);
+	if (ctx) {
+		++ctx->pin_count;
+		get_ctx(ctx);
+		spin_unlock_irqrestore(&ctx->lock, flags);
+	}
+	return ctx;
+}
+
+static void perf_unpin_context(struct perf_counter_context *ctx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->lock, flags);
+	--ctx->pin_count;
+	spin_unlock_irqrestore(&ctx->lock, flags);
+	put_ctx(ctx);
+}
+
+/*
+ * Add a counter from the lists for its context.
+ * Must be called with ctx->mutex and ctx->lock held.
+ */
+static void
+list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
+{
+	struct perf_counter *group_leader = counter->group_leader;
+
+	/*
+	 * Depending on whether it is a standalone or sibling counter,
+	 * add it straight to the context's counter list, or to the group
+	 * leader's sibling list:
+	 */
+	if (group_leader == counter)
+		list_add_tail(&counter->list_entry, &ctx->counter_list);
+	else {
+		list_add_tail(&counter->list_entry, &group_leader->sibling_list);
+		group_leader->nr_siblings++;
+	}
+
+	list_add_rcu(&counter->event_entry, &ctx->event_list);
+	ctx->nr_counters++;
+}
+
+/*
+ * Remove a counter from the lists for its context.
+ * Must be called with ctx->mutex and ctx->lock held.
+ */
+static void
+list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
+{
+	struct perf_counter *sibling, *tmp;
+
+	if (list_empty(&counter->list_entry))
+		return;
+	ctx->nr_counters--;
+
+	list_del_init(&counter->list_entry);
+	list_del_rcu(&counter->event_entry);
+
+	if (counter->group_leader != counter)
+		counter->group_leader->nr_siblings--;
+
+	/*
+	 * If this was a group counter with sibling counters then
+	 * upgrade the siblings to singleton counters by adding them
+	 * to the context list directly:
+	 */
+	list_for_each_entry_safe(sibling, tmp,
+				 &counter->sibling_list, list_entry) {
+
+		list_move_tail(&sibling->list_entry, &ctx->counter_list);
+		sibling->group_leader = sibling;
+	}
+}
+
+static void
+counter_sched_out(struct perf_counter *counter,
+		  struct perf_cpu_context *cpuctx,
+		  struct perf_counter_context *ctx)
+{
+	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+		return;
+
+	counter->state = PERF_COUNTER_STATE_INACTIVE;
+	counter->tstamp_stopped = ctx->time;
+	counter->pmu->disable(counter);
+	counter->oncpu = -1;
+
+	if (!is_software_counter(counter))
+		cpuctx->active_oncpu--;
+	ctx->nr_active--;
+	if (counter->attr.exclusive || !cpuctx->active_oncpu)
+		cpuctx->exclusive = 0;
+}
+
+static void
+group_sched_out(struct perf_counter *group_counter,
+		struct perf_cpu_context *cpuctx,
+		struct perf_counter_context *ctx)
+{
+	struct perf_counter *counter;
+
+	if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
+		return;
+
+	counter_sched_out(group_counter, cpuctx, ctx);
+
+	/*
+	 * Schedule out siblings (if any):
+	 */
+	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
+		counter_sched_out(counter, cpuctx, ctx);
+
+	if (group_counter->attr.exclusive)
+		cpuctx->exclusive = 0;
+}
+
+/*
+ * Cross CPU call to remove a performance counter
+ *
+ * We disable the counter on the hardware level first. After that we
+ * remove it from the context list.
+ */
+static void __perf_counter_remove_from_context(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_counter *counter = info;
+	struct perf_counter_context *ctx = counter->ctx;
+
+	/*
+	 * If this is a task context, we need to check whether it is
+	 * the current task context of this cpu. If not it has been
+	 * scheduled out before the smp call arrived.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
+		return;
+
+	spin_lock(&ctx->lock);
+	/*
+	 * Protect the list operation against NMI by disabling the
+	 * counters on a global level.
+	 */
+	perf_disable();
+
+	counter_sched_out(counter, cpuctx, ctx);
+
+	list_del_counter(counter, ctx);
+
+	if (!ctx->task) {
+		/*
+		 * Allow more per task counters with respect to the
+		 * reservation:
+		 */
+		cpuctx->max_pertask =
+			min(perf_max_counters - ctx->nr_counters,
+			    perf_max_counters - perf_reserved_percpu);
+	}
+
+	perf_enable();
+	spin_unlock(&ctx->lock);
+}
+
+
+/*
+ * Remove the counter from a task's (or a CPU's) list of counters.
+ *
+ * Must be called with ctx->mutex held.
+ *
+ * CPU counters are removed with a smp call. For task counters we only
+ * call when the task is on a CPU.
+ *
+ * If counter->ctx is a cloned context, callers must make sure that
+ * every task struct that counter->ctx->task could possibly point to
+ * remains valid.  This is OK when called from perf_release since
+ * that only calls us on the top-level context, which can't be a clone.
+ * When called from perf_counter_exit_task, it's OK because the
+ * context has been detached from its task.
+ */
+static void perf_counter_remove_from_context(struct perf_counter *counter)
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Per cpu counters are removed via an smp call and
+		 * the removal is always sucessful.
+		 */
+		smp_call_function_single(counter->cpu,
+					 __perf_counter_remove_from_context,
+					 counter, 1);
+		return;
+	}
+
+retry:
+	task_oncpu_function_call(task, __perf_counter_remove_from_context,
+				 counter);
+
+	spin_lock_irq(&ctx->lock);
+	/*
+	 * If the context is active we need to retry the smp call.
+	 */
+	if (ctx->nr_active && !list_empty(&counter->list_entry)) {
+		spin_unlock_irq(&ctx->lock);
+		goto retry;
+	}
+
+	/*
+	 * The lock prevents that this context is scheduled in so we
+	 * can remove the counter safely, if the call above did not
+	 * succeed.
+	 */
+	if (!list_empty(&counter->list_entry)) {
+		list_del_counter(counter, ctx);
+	}
+	spin_unlock_irq(&ctx->lock);
+}
+
+static inline u64 perf_clock(void)
+{
+	return cpu_clock(smp_processor_id());
+}
+
+/*
+ * Update the record of the current time in a context.
+ */
+static void update_context_time(struct perf_counter_context *ctx)
+{
+	u64 now = perf_clock();
+
+	ctx->time += now - ctx->timestamp;
+	ctx->timestamp = now;
+}
+
+/*
+ * Update the total_time_enabled and total_time_running fields for a counter.
+ */
+static void update_counter_times(struct perf_counter *counter)
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	u64 run_end;
+
+	if (counter->state < PERF_COUNTER_STATE_INACTIVE)
+		return;
+
+	counter->total_time_enabled = ctx->time - counter->tstamp_enabled;
+
+	if (counter->state == PERF_COUNTER_STATE_INACTIVE)
+		run_end = counter->tstamp_stopped;
+	else
+		run_end = ctx->time;
+
+	counter->total_time_running = run_end - counter->tstamp_running;
+}
+
+/*
+ * Update total_time_enabled and total_time_running for all counters in a group.
+ */
+static void update_group_times(struct perf_counter *leader)
+{
+	struct perf_counter *counter;
+
+	update_counter_times(leader);
+	list_for_each_entry(counter, &leader->sibling_list, list_entry)
+		update_counter_times(counter);
+}
+
+/*
+ * Cross CPU call to disable a performance counter
+ */
+static void __perf_counter_disable(void *info)
+{
+	struct perf_counter *counter = info;
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_counter_context *ctx = counter->ctx;
+
+	/*
+	 * If this is a per-task counter, need to check whether this
+	 * counter's task is the current task on this cpu.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
+		return;
+
+	spin_lock(&ctx->lock);
+
+	/*
+	 * If the counter is on, turn it off.
+	 * If it is in error state, leave it in error state.
+	 */
+	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
+		update_context_time(ctx);
+		update_counter_times(counter);
+		if (counter == counter->group_leader)
+			group_sched_out(counter, cpuctx, ctx);
+		else
+			counter_sched_out(counter, cpuctx, ctx);
+		counter->state = PERF_COUNTER_STATE_OFF;
+	}
+
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Disable a counter.
+ *
+ * If counter->ctx is a cloned context, callers must make sure that
+ * every task struct that counter->ctx->task could possibly point to
+ * remains valid.  This condition is satisifed when called through
+ * perf_counter_for_each_child or perf_counter_for_each because they
+ * hold the top-level counter's child_mutex, so any descendant that
+ * goes to exit will block in sync_child_counter.
+ * When called from perf_pending_counter it's OK because counter->ctx
+ * is the current context on this CPU and preemption is disabled,
+ * hence we can't get into perf_counter_task_sched_out for this context.
+ */
+static void perf_counter_disable(struct perf_counter *counter)
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Disable the counter on the cpu that it's on
+		 */
+		smp_call_function_single(counter->cpu, __perf_counter_disable,
+					 counter, 1);
+		return;
+	}
+
+ retry:
+	task_oncpu_function_call(task, __perf_counter_disable, counter);
+
+	spin_lock_irq(&ctx->lock);
+	/*
+	 * If the counter is still active, we need to retry the cross-call.
+	 */
+	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
+		spin_unlock_irq(&ctx->lock);
+		goto retry;
+	}
+
+	/*
+	 * Since we have the lock this context can't be scheduled
+	 * in, so we can change the state safely.
+	 */
+	if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
+		update_counter_times(counter);
+		counter->state = PERF_COUNTER_STATE_OFF;
+	}
+
+	spin_unlock_irq(&ctx->lock);
+}
+
+static int
+counter_sched_in(struct perf_counter *counter,
+		 struct perf_cpu_context *cpuctx,
+		 struct perf_counter_context *ctx,
+		 int cpu)
+{
+	if (counter->state <= PERF_COUNTER_STATE_OFF)
+		return 0;
+
+	counter->state = PERF_COUNTER_STATE_ACTIVE;
+	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
+	/*
+	 * The new state must be visible before we turn it on in the hardware:
+	 */
+	smp_wmb();
+
+	if (counter->pmu->enable(counter)) {
+		counter->state = PERF_COUNTER_STATE_INACTIVE;
+		counter->oncpu = -1;
+		return -EAGAIN;
+	}
+
+	counter->tstamp_running += ctx->time - counter->tstamp_stopped;
+
+	if (!is_software_counter(counter))
+		cpuctx->active_oncpu++;
+	ctx->nr_active++;
+
+	if (counter->attr.exclusive)
+		cpuctx->exclusive = 1;
+
+	return 0;
+}
+
+static int
+group_sched_in(struct perf_counter *group_counter,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_counter_context *ctx,
+	       int cpu)
+{
+	struct perf_counter *counter, *partial_group;
+	int ret;
+
+	if (group_counter->state == PERF_COUNTER_STATE_OFF)
+		return 0;
+
+	ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu);
+	if (ret)
+		return ret < 0 ? ret : 0;
+
+	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
+		return -EAGAIN;
+
+	/*
+	 * Schedule in siblings as one group (if any):
+	 */
+	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+		if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
+			partial_group = counter;
+			goto group_error;
+		}
+	}
+
+	return 0;
+
+group_error:
+	/*
+	 * Groups can be scheduled in as one unit only, so undo any
+	 * partial group before returning:
+	 */
+	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+		if (counter == partial_group)
+			break;
+		counter_sched_out(counter, cpuctx, ctx);
+	}
+	counter_sched_out(group_counter, cpuctx, ctx);
+
+	return -EAGAIN;
+}
+
+/*
+ * Return 1 for a group consisting entirely of software counters,
+ * 0 if the group contains any hardware counters.
+ */
+static int is_software_only_group(struct perf_counter *leader)
+{
+	struct perf_counter *counter;
+
+	if (!is_software_counter(leader))
+		return 0;
+
+	list_for_each_entry(counter, &leader->sibling_list, list_entry)
+		if (!is_software_counter(counter))
+			return 0;
+
+	return 1;
+}
+
+/*
+ * Work out whether we can put this counter group on the CPU now.
+ */
+static int group_can_go_on(struct perf_counter *counter,
+			   struct perf_cpu_context *cpuctx,
+			   int can_add_hw)
+{
+	/*
+	 * Groups consisting entirely of software counters can always go on.
+	 */
+	if (is_software_only_group(counter))
+		return 1;
+	/*
+	 * If an exclusive group is already on, no other hardware
+	 * counters can go on.
+	 */
+	if (cpuctx->exclusive)
+		return 0;
+	/*
+	 * If this group is exclusive and there are already
+	 * counters on the CPU, it can't go on.
+	 */
+	if (counter->attr.exclusive && cpuctx->active_oncpu)
+		return 0;
+	/*
+	 * Otherwise, try to add it if all previous groups were able
+	 * to go on.
+	 */
+	return can_add_hw;
+}
+
+static void add_counter_to_ctx(struct perf_counter *counter,
+			       struct perf_counter_context *ctx)
+{
+	list_add_counter(counter, ctx);
+	counter->tstamp_enabled = ctx->time;
+	counter->tstamp_running = ctx->time;
+	counter->tstamp_stopped = ctx->time;
+}
+
+/*
+ * Cross CPU call to install and enable a performance counter
+ *
+ * Must be called with ctx->mutex held
+ */
+static void __perf_install_in_context(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_counter *counter = info;
+	struct perf_counter_context *ctx = counter->ctx;
+	struct perf_counter *leader = counter->group_leader;
+	int cpu = smp_processor_id();
+	int err;
+
+	/*
+	 * If this is a task context, we need to check whether it is
+	 * the current task context of this cpu. If not it has been
+	 * scheduled out before the smp call arrived.
+	 * Or possibly this is the right context but it isn't
+	 * on this cpu because it had no counters.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx) {
+		if (cpuctx->task_ctx || ctx->task != current)
+			return;
+		cpuctx->task_ctx = ctx;
+	}
+
+	spin_lock(&ctx->lock);
+	ctx->is_active = 1;
+	update_context_time(ctx);
+
+	/*
+	 * Protect the list operation against NMI by disabling the
+	 * counters on a global level. NOP for non NMI based counters.
+	 */
+	perf_disable();
+
+	add_counter_to_ctx(counter, ctx);
+
+	/*
+	 * Don't put the counter on if it is disabled or if
+	 * it is in a group and the group isn't on.
+	 */
+	if (counter->state != PERF_COUNTER_STATE_INACTIVE ||
+	    (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE))
+		goto unlock;
+
+	/*
+	 * An exclusive counter can't go on if there are already active
+	 * hardware counters, and no hardware counter can go on if there
+	 * is already an exclusive counter on.
+	 */
+	if (!group_can_go_on(counter, cpuctx, 1))
+		err = -EEXIST;
+	else
+		err = counter_sched_in(counter, cpuctx, ctx, cpu);
+
+	if (err) {
+		/*
+		 * This counter couldn't go on.  If it is in a group
+		 * then we have to pull the whole group off.
+		 * If the counter group is pinned then put it in error state.
+		 */
+		if (leader != counter)
+			group_sched_out(leader, cpuctx, ctx);
+		if (leader->attr.pinned) {
+			update_group_times(leader);
+			leader->state = PERF_COUNTER_STATE_ERROR;
+		}
+	}
+
+	if (!err && !ctx->task && cpuctx->max_pertask)
+		cpuctx->max_pertask--;
+
+ unlock:
+	perf_enable();
+
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Attach a performance counter to a context
+ *
+ * First we add the counter to the list with the hardware enable bit
+ * in counter->hw_config cleared.
+ *
+ * If the counter is attached to a task which is on a CPU we use a smp
+ * call to enable it in the task context. The task might have been
+ * scheduled away, but we check this in the smp call again.
+ *
+ * Must be called with ctx->mutex held.
+ */
+static void
+perf_install_in_context(struct perf_counter_context *ctx,
+			struct perf_counter *counter,
+			int cpu)
+{
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Per cpu counters are installed via an smp call and
+		 * the install is always sucessful.
+		 */
+		smp_call_function_single(cpu, __perf_install_in_context,
+					 counter, 1);
+		return;
+	}
+
+retry:
+	task_oncpu_function_call(task, __perf_install_in_context,
+				 counter);
+
+	spin_lock_irq(&ctx->lock);
+	/*
+	 * we need to retry the smp call.
+	 */
+	if (ctx->is_active && list_empty(&counter->list_entry)) {
+		spin_unlock_irq(&ctx->lock);
+		goto retry;
+	}
+
+	/*
+	 * The lock prevents that this context is scheduled in so we
+	 * can add the counter safely, if it the call above did not
+	 * succeed.
+	 */
+	if (list_empty(&counter->list_entry))
+		add_counter_to_ctx(counter, ctx);
+	spin_unlock_irq(&ctx->lock);
+}
+
+/*
+ * Cross CPU call to enable a performance counter
+ */
+static void __perf_counter_enable(void *info)
+{
+	struct perf_counter *counter = info;
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_counter_context *ctx = counter->ctx;
+	struct perf_counter *leader = counter->group_leader;
+	int err;
+
+	/*
+	 * If this is a per-task counter, need to check whether this
+	 * counter's task is the current task on this cpu.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx) {
+		if (cpuctx->task_ctx || ctx->task != current)
+			return;
+		cpuctx->task_ctx = ctx;
+	}
+
+	spin_lock(&ctx->lock);
+	ctx->is_active = 1;
+	update_context_time(ctx);
+
+	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
+		goto unlock;
+	counter->state = PERF_COUNTER_STATE_INACTIVE;
+	counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
+
+	/*
+	 * If the counter is in a group and isn't the group leader,
+	 * then don't put it on unless the group is on.
+	 */
+	if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)
+		goto unlock;
+
+	if (!group_can_go_on(counter, cpuctx, 1)) {
+		err = -EEXIST;
+	} else {
+		perf_disable();
+		if (counter == leader)
+			err = group_sched_in(counter, cpuctx, ctx,
+					     smp_processor_id());
+		else
+			err = counter_sched_in(counter, cpuctx, ctx,
+					       smp_processor_id());
+		perf_enable();
+	}
+
+	if (err) {
+		/*
+		 * If this counter can't go on and it's part of a
+		 * group, then the whole group has to come off.
+		 */
+		if (leader != counter)
+			group_sched_out(leader, cpuctx, ctx);
+		if (leader->attr.pinned) {
+			update_group_times(leader);
+			leader->state = PERF_COUNTER_STATE_ERROR;
+		}
+	}
+
+ unlock:
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Enable a counter.
+ *
+ * If counter->ctx is a cloned context, callers must make sure that
+ * every task struct that counter->ctx->task could possibly point to
+ * remains valid.  This condition is satisfied when called through
+ * perf_counter_for_each_child or perf_counter_for_each as described
+ * for perf_counter_disable.
+ */
+static void perf_counter_enable(struct perf_counter *counter)
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Enable the counter on the cpu that it's on
+		 */
+		smp_call_function_single(counter->cpu, __perf_counter_enable,
+					 counter, 1);
+		return;
+	}
+
+	spin_lock_irq(&ctx->lock);
+	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
+		goto out;
+
+	/*
+	 * If the counter is in error state, clear that first.
+	 * That way, if we see the counter in error state below, we
+	 * know that it has gone back into error state, as distinct
+	 * from the task having been scheduled away before the
+	 * cross-call arrived.
+	 */
+	if (counter->state == PERF_COUNTER_STATE_ERROR)
+		counter->state = PERF_COUNTER_STATE_OFF;
+
+ retry:
+	spin_unlock_irq(&ctx->lock);
+	task_oncpu_function_call(task, __perf_counter_enable, counter);
+
+	spin_lock_irq(&ctx->lock);
+
+	/*
+	 * If the context is active and the counter is still off,
+	 * we need to retry the cross-call.
+	 */
+	if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF)
+		goto retry;
+
+	/*
+	 * Since we have the lock this context can't be scheduled
+	 * in, so we can change the state safely.
+	 */
+	if (counter->state == PERF_COUNTER_STATE_OFF) {
+		counter->state = PERF_COUNTER_STATE_INACTIVE;
+		counter->tstamp_enabled =
+			ctx->time - counter->total_time_enabled;
+	}
+ out:
+	spin_unlock_irq(&ctx->lock);
+}
+
+static int perf_counter_refresh(struct perf_counter *counter, int refresh)
+{
+	/*
+	 * not supported on inherited counters
+	 */
+	if (counter->attr.inherit)
+		return -EINVAL;
+
+	atomic_add(refresh, &counter->event_limit);
+	perf_counter_enable(counter);
+
+	return 0;
+}
+
+void __perf_counter_sched_out(struct perf_counter_context *ctx,
+			      struct perf_cpu_context *cpuctx)
+{
+	struct perf_counter *counter;
+
+	spin_lock(&ctx->lock);
+	ctx->is_active = 0;
+	if (likely(!ctx->nr_counters))
+		goto out;
+	update_context_time(ctx);
+
+	perf_disable();
+	if (ctx->nr_active) {
+		list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+			if (counter != counter->group_leader)
+				counter_sched_out(counter, cpuctx, ctx);
+			else
+				group_sched_out(counter, cpuctx, ctx);
+		}
+	}
+	perf_enable();
+ out:
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Test whether two contexts are equivalent, i.e. whether they
+ * have both been cloned from the same version of the same context
+ * and they both have the same number of enabled counters.
+ * If the number of enabled counters is the same, then the set
+ * of enabled counters should be the same, because these are both
+ * inherited contexts, therefore we can't access individual counters
+ * in them directly with an fd; we can only enable/disable all
+ * counters via prctl, or enable/disable all counters in a family
+ * via ioctl, which will have the same effect on both contexts.
+ */
+static int context_equiv(struct perf_counter_context *ctx1,
+			 struct perf_counter_context *ctx2)
+{
+	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
+		&& ctx1->parent_gen == ctx2->parent_gen
+		&& !ctx1->pin_count && !ctx2->pin_count;
+}
+
+/*
+ * Called from scheduler to remove the counters of the current task,
+ * with interrupts disabled.
+ *
+ * We stop each counter and update the counter value in counter->count.
+ *
+ * This does not protect us against NMI, but disable()
+ * sets the disabled bit in the control field of counter _before_
+ * accessing the counter control register. If a NMI hits, then it will
+ * not restart the counter.
+ */
+void perf_counter_task_sched_out(struct task_struct *task,
+				 struct task_struct *next, int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_counter_context *ctx = task->perf_counter_ctxp;
+	struct perf_counter_context *next_ctx;
+	struct perf_counter_context *parent;
+	struct pt_regs *regs;
+	int do_switch = 1;
+
+	regs = task_pt_regs(task);
+	perf_swcounter_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0);
+
+	if (likely(!ctx || !cpuctx->task_ctx))
+		return;
+
+	update_context_time(ctx);
+
+	rcu_read_lock();
+	parent = rcu_dereference(ctx->parent_ctx);
+	next_ctx = next->perf_counter_ctxp;
+	if (parent && next_ctx &&
+	    rcu_dereference(next_ctx->parent_ctx) == parent) {
+		/*
+		 * Looks like the two contexts are clones, so we might be
+		 * able to optimize the context switch.  We lock both
+		 * contexts and check that they are clones under the
+		 * lock (including re-checking that neither has been
+		 * uncloned in the meantime).  It doesn't matter which
+		 * order we take the locks because no other cpu could
+		 * be trying to lock both of these tasks.
+		 */
+		spin_lock(&ctx->lock);
+		spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
+		if (context_equiv(ctx, next_ctx)) {
+			/*
+			 * XXX do we need a memory barrier of sorts
+			 * wrt to rcu_dereference() of perf_counter_ctxp
+			 */
+			task->perf_counter_ctxp = next_ctx;
+			next->perf_counter_ctxp = ctx;
+			ctx->task = next;
+			next_ctx->task = task;
+			do_switch = 0;
+		}
+		spin_unlock(&next_ctx->lock);
+		spin_unlock(&ctx->lock);
+	}
+	rcu_read_unlock();
+
+	if (do_switch) {
+		__perf_counter_sched_out(ctx, cpuctx);
+		cpuctx->task_ctx = NULL;
+	}
+}
+
+/*
+ * Called with IRQs disabled
+ */
+static void __perf_counter_task_sched_out(struct perf_counter_context *ctx)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+
+	if (!cpuctx->task_ctx)
+		return;
+
+	if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
+		return;
+
+	__perf_counter_sched_out(ctx, cpuctx);
+	cpuctx->task_ctx = NULL;
+}
+
+/*
+ * Called with IRQs disabled
+ */
+static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
+{
+	__perf_counter_sched_out(&cpuctx->ctx, cpuctx);
+}
+
+static void
+__perf_counter_sched_in(struct perf_counter_context *ctx,
+			struct perf_cpu_context *cpuctx, int cpu)
+{
+	struct perf_counter *counter;
+	int can_add_hw = 1;
+
+	spin_lock(&ctx->lock);
+	ctx->is_active = 1;
+	if (likely(!ctx->nr_counters))
+		goto out;
+
+	ctx->timestamp = perf_clock();
+
+	perf_disable();
+
+	/*
+	 * First go through the list and put on any pinned groups
+	 * in order to give them the best chance of going on.
+	 */
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (counter->state <= PERF_COUNTER_STATE_OFF ||
+		    !counter->attr.pinned)
+			continue;
+		if (counter->cpu != -1 && counter->cpu != cpu)
+			continue;
+
+		if (counter != counter->group_leader)
+			counter_sched_in(counter, cpuctx, ctx, cpu);
+		else {
+			if (group_can_go_on(counter, cpuctx, 1))
+				group_sched_in(counter, cpuctx, ctx, cpu);
+		}
+
+		/*
+		 * If this pinned group hasn't been scheduled,
+		 * put it in error state.
+		 */
+		if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
+			update_group_times(counter);
+			counter->state = PERF_COUNTER_STATE_ERROR;
+		}
+	}
+
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		/*
+		 * Ignore counters in OFF or ERROR state, and
+		 * ignore pinned counters since we did them already.
+		 */
+		if (counter->state <= PERF_COUNTER_STATE_OFF ||
+		    counter->attr.pinned)
+			continue;
+
+		/*
+		 * Listen to the 'cpu' scheduling filter constraint
+		 * of counters:
+		 */
+		if (counter->cpu != -1 && counter->cpu != cpu)
+			continue;
+
+		if (counter != counter->group_leader) {
+			if (counter_sched_in(counter, cpuctx, ctx, cpu))
+				can_add_hw = 0;
+		} else {
+			if (group_can_go_on(counter, cpuctx, can_add_hw)) {
+				if (group_sched_in(counter, cpuctx, ctx, cpu))
+					can_add_hw = 0;
+			}
+		}
+	}
+	perf_enable();
+ out:
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Called from scheduler to add the counters of the current task
+ * with interrupts disabled.
+ *
+ * We restore the counter value and then enable it.
+ *
+ * This does not protect us against NMI, but enable()
+ * sets the enabled bit in the control field of counter _before_
+ * accessing the counter control register. If a NMI hits, then it will
+ * keep the counter running.
+ */
+void perf_counter_task_sched_in(struct task_struct *task, int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_counter_context *ctx = task->perf_counter_ctxp;
+
+	if (likely(!ctx))
+		return;
+	if (cpuctx->task_ctx == ctx)
+		return;
+	__perf_counter_sched_in(ctx, cpuctx, cpu);
+	cpuctx->task_ctx = ctx;
+}
+
+static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
+{
+	struct perf_counter_context *ctx = &cpuctx->ctx;
+
+	__perf_counter_sched_in(ctx, cpuctx, cpu);
+}
+
+#define MAX_INTERRUPTS (~0ULL)
+
+static void perf_log_throttle(struct perf_counter *counter, int enable);
+static void perf_log_period(struct perf_counter *counter, u64 period);
+
+static void perf_adjust_period(struct perf_counter *counter, u64 events)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	u64 period, sample_period;
+	s64 delta;
+
+	events *= hwc->sample_period;
+	period = div64_u64(events, counter->attr.sample_freq);
+
+	delta = (s64)(period - hwc->sample_period);
+	delta = (delta + 7) / 8; /* low pass filter */
+
+	sample_period = hwc->sample_period + delta;
+
+	if (!sample_period)
+		sample_period = 1;
+
+	perf_log_period(counter, sample_period);
+
+	hwc->sample_period = sample_period;
+}
+
+static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
+{
+	struct perf_counter *counter;
+	struct hw_perf_counter *hwc;
+	u64 interrupts, freq;
+
+	spin_lock(&ctx->lock);
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+			continue;
+
+		hwc = &counter->hw;
+
+		interrupts = hwc->interrupts;
+		hwc->interrupts = 0;
+
+		/*
+		 * unthrottle counters on the tick
+		 */
+		if (interrupts == MAX_INTERRUPTS) {
+			perf_log_throttle(counter, 1);
+			counter->pmu->unthrottle(counter);
+			interrupts = 2*sysctl_perf_counter_sample_rate/HZ;
+		}
+
+		if (!counter->attr.freq || !counter->attr.sample_freq)
+			continue;
+
+		/*
+		 * if the specified freq < HZ then we need to skip ticks
+		 */
+		if (counter->attr.sample_freq < HZ) {
+			freq = counter->attr.sample_freq;
+
+			hwc->freq_count += freq;
+			hwc->freq_interrupts += interrupts;
+
+			if (hwc->freq_count < HZ)
+				continue;
+
+			interrupts = hwc->freq_interrupts;
+			hwc->freq_interrupts = 0;
+			hwc->freq_count -= HZ;
+		} else
+			freq = HZ;
+
+		perf_adjust_period(counter, freq * interrupts);
+
+		/*
+		 * In order to avoid being stalled by an (accidental) huge
+		 * sample period, force reset the sample period if we didn't
+		 * get any events in this freq period.
+		 */
+		if (!interrupts) {
+			perf_disable();
+			counter->pmu->disable(counter);
+			atomic_set(&hwc->period_left, 0);
+			counter->pmu->enable(counter);
+			perf_enable();
+		}
+	}
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Round-robin a context's counters:
+ */
+static void rotate_ctx(struct perf_counter_context *ctx)
+{
+	struct perf_counter *counter;
+
+	if (!ctx->nr_counters)
+		return;
+
+	spin_lock(&ctx->lock);
+	/*
+	 * Rotate the first entry last (works just fine for group counters too):
+	 */
+	perf_disable();
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		list_move_tail(&counter->list_entry, &ctx->counter_list);
+		break;
+	}
+	perf_enable();
+
+	spin_unlock(&ctx->lock);
+}
+
+void perf_counter_task_tick(struct task_struct *curr, int cpu)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_counter_context *ctx;
+
+	if (!atomic_read(&nr_counters))
+		return;
+
+	cpuctx = &per_cpu(perf_cpu_context, cpu);
+	ctx = curr->perf_counter_ctxp;
+
+	perf_ctx_adjust_freq(&cpuctx->ctx);
+	if (ctx)
+		perf_ctx_adjust_freq(ctx);
+
+	perf_counter_cpu_sched_out(cpuctx);
+	if (ctx)
+		__perf_counter_task_sched_out(ctx);
+
+	rotate_ctx(&cpuctx->ctx);
+	if (ctx)
+		rotate_ctx(ctx);
+
+	perf_counter_cpu_sched_in(cpuctx, cpu);
+	if (ctx)
+		perf_counter_task_sched_in(curr, cpu);
+}
+
+/*
+ * Cross CPU call to read the hardware counter
+ */
+static void __read(void *info)
+{
+	struct perf_counter *counter = info;
+	struct perf_counter_context *ctx = counter->ctx;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if (ctx->is_active)
+		update_context_time(ctx);
+	counter->pmu->read(counter);
+	update_counter_times(counter);
+	local_irq_restore(flags);
+}
+
+static u64 perf_counter_read(struct perf_counter *counter)
+{
+	/*
+	 * If counter is enabled and currently active on a CPU, update the
+	 * value in the counter structure:
+	 */
+	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
+		smp_call_function_single(counter->oncpu,
+					 __read, counter, 1);
+	} else if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
+		update_counter_times(counter);
+	}
+
+	return atomic64_read(&counter->count);
+}
+
+/*
+ * Initialize the perf_counter context in a task_struct:
+ */
+static void
+__perf_counter_init_context(struct perf_counter_context *ctx,
+			    struct task_struct *task)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	spin_lock_init(&ctx->lock);
+	mutex_init(&ctx->mutex);
+	INIT_LIST_HEAD(&ctx->counter_list);
+	INIT_LIST_HEAD(&ctx->event_list);
+	atomic_set(&ctx->refcount, 1);
+	ctx->task = task;
+}
+
+static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
+{
+	struct perf_counter_context *parent_ctx;
+	struct perf_counter_context *ctx;
+	struct perf_cpu_context *cpuctx;
+	struct task_struct *task;
+	unsigned long flags;
+	int err;
+
+	/*
+	 * If cpu is not a wildcard then this is a percpu counter:
+	 */
+	if (cpu != -1) {
+		/* Must be root to operate on a CPU counter: */
+		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+			return ERR_PTR(-EACCES);
+
+		if (cpu < 0 || cpu > num_possible_cpus())
+			return ERR_PTR(-EINVAL);
+
+		/*
+		 * We could be clever and allow to attach a counter to an
+		 * offline CPU and activate it when the CPU comes up, but
+		 * that's for later.
+		 */
+		if (!cpu_isset(cpu, cpu_online_map))
+			return ERR_PTR(-ENODEV);
+
+		cpuctx = &per_cpu(perf_cpu_context, cpu);
+		ctx = &cpuctx->ctx;
+		get_ctx(ctx);
+
+		return ctx;
+	}
+
+	rcu_read_lock();
+	if (!pid)
+		task = current;
+	else
+		task = find_task_by_vpid(pid);
+	if (task)
+		get_task_struct(task);
+	rcu_read_unlock();
+
+	if (!task)
+		return ERR_PTR(-ESRCH);
+
+	/*
+	 * Can't attach counters to a dying task.
+	 */
+	err = -ESRCH;
+	if (task->flags & PF_EXITING)
+		goto errout;
+
+	/* Reuse ptrace permission checks for now. */
+	err = -EACCES;
+	if (!ptrace_may_access(task, PTRACE_MODE_READ))
+		goto errout;
+
+ retry:
+	ctx = perf_lock_task_context(task, &flags);
+	if (ctx) {
+		parent_ctx = ctx->parent_ctx;
+		if (parent_ctx) {
+			put_ctx(parent_ctx);
+			ctx->parent_ctx = NULL;		/* no longer a clone */
+		}
+		/*
+		 * Get an extra reference before dropping the lock so that
+		 * this context won't get freed if the task exits.
+		 */
+		get_ctx(ctx);
+		spin_unlock_irqrestore(&ctx->lock, flags);
+	}
+
+	if (!ctx) {
+		ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
+		err = -ENOMEM;
+		if (!ctx)
+			goto errout;
+		__perf_counter_init_context(ctx, task);
+		get_ctx(ctx);
+		if (cmpxchg(&task->perf_counter_ctxp, NULL, ctx)) {
+			/*
+			 * We raced with some other task; use
+			 * the context they set.
+			 */
+			kfree(ctx);
+			goto retry;
+		}
+		get_task_struct(task);
+	}
+
+	put_task_struct(task);
+	return ctx;
+
+ errout:
+	put_task_struct(task);
+	return ERR_PTR(err);
+}
+
+static void free_counter_rcu(struct rcu_head *head)
+{
+	struct perf_counter *counter;
+
+	counter = container_of(head, struct perf_counter, rcu_head);
+	if (counter->ns)
+		put_pid_ns(counter->ns);
+	kfree(counter);
+}
+
+static void perf_pending_sync(struct perf_counter *counter);
+
+static void free_counter(struct perf_counter *counter)
+{
+	perf_pending_sync(counter);
+
+	atomic_dec(&nr_counters);
+	if (counter->attr.mmap)
+		atomic_dec(&nr_mmap_counters);
+	if (counter->attr.comm)
+		atomic_dec(&nr_comm_counters);
+
+	if (counter->destroy)
+		counter->destroy(counter);
+
+	put_ctx(counter->ctx);
+	call_rcu(&counter->rcu_head, free_counter_rcu);
+}
+
+/*
+ * Called when the last reference to the file is gone.
+ */
+static int perf_release(struct inode *inode, struct file *file)
+{
+	struct perf_counter *counter = file->private_data;
+	struct perf_counter_context *ctx = counter->ctx;
+
+	file->private_data = NULL;
+
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	perf_counter_remove_from_context(counter);
+	mutex_unlock(&ctx->mutex);
+
+	mutex_lock(&counter->owner->perf_counter_mutex);
+	list_del_init(&counter->owner_entry);
+	mutex_unlock(&counter->owner->perf_counter_mutex);
+	put_task_struct(counter->owner);
+
+	free_counter(counter);
+
+	return 0;
+}
+
+/*
+ * Read the performance counter - simple non blocking version for now
+ */
+static ssize_t
+perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
+{
+	u64 values[3];
+	int n;
+
+	/*
+	 * Return end-of-file for a read on a counter that is in
+	 * error state (i.e. because it was pinned but it couldn't be
+	 * scheduled on to the CPU at some point).
+	 */
+	if (counter->state == PERF_COUNTER_STATE_ERROR)
+		return 0;
+
+	WARN_ON_ONCE(counter->ctx->parent_ctx);
+	mutex_lock(&counter->child_mutex);
+	values[0] = perf_counter_read(counter);
+	n = 1;
+	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		values[n++] = counter->total_time_enabled +
+			atomic64_read(&counter->child_total_time_enabled);
+	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		values[n++] = counter->total_time_running +
+			atomic64_read(&counter->child_total_time_running);
+	if (counter->attr.read_format & PERF_FORMAT_ID)
+		values[n++] = counter->id;
+	mutex_unlock(&counter->child_mutex);
+
+	if (count < n * sizeof(u64))
+		return -EINVAL;
+	count = n * sizeof(u64);
+
+	if (copy_to_user(buf, values, count))
+		return -EFAULT;
+
+	return count;
+}
+
+static ssize_t
+perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	struct perf_counter *counter = file->private_data;
+
+	return perf_read_hw(counter, buf, count);
+}
+
+static unsigned int perf_poll(struct file *file, poll_table *wait)
+{
+	struct perf_counter *counter = file->private_data;
+	struct perf_mmap_data *data;
+	unsigned int events = POLL_HUP;
+
+	rcu_read_lock();
+	data = rcu_dereference(counter->data);
+	if (data)
+		events = atomic_xchg(&data->poll, 0);
+	rcu_read_unlock();
+
+	poll_wait(file, &counter->waitq, wait);
+
+	return events;
+}
+
+static void perf_counter_reset(struct perf_counter *counter)
+{
+	(void)perf_counter_read(counter);
+	atomic64_set(&counter->count, 0);
+	perf_counter_update_userpage(counter);
+}
+
+static void perf_counter_for_each_sibling(struct perf_counter *counter,
+					  void (*func)(struct perf_counter *))
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	struct perf_counter *sibling;
+
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	counter = counter->group_leader;
+
+	func(counter);
+	list_for_each_entry(sibling, &counter->sibling_list, list_entry)
+		func(sibling);
+	mutex_unlock(&ctx->mutex);
+}
+
+/*
+ * Holding the top-level counter's child_mutex means that any
+ * descendant process that has inherited this counter will block
+ * in sync_child_counter if it goes to exit, thus satisfying the
+ * task existence requirements of perf_counter_enable/disable.
+ */
+static void perf_counter_for_each_child(struct perf_counter *counter,
+					void (*func)(struct perf_counter *))
+{
+	struct perf_counter *child;
+
+	WARN_ON_ONCE(counter->ctx->parent_ctx);
+	mutex_lock(&counter->child_mutex);
+	func(counter);
+	list_for_each_entry(child, &counter->child_list, child_list)
+		func(child);
+	mutex_unlock(&counter->child_mutex);
+}
+
+static void perf_counter_for_each(struct perf_counter *counter,
+				  void (*func)(struct perf_counter *))
+{
+	struct perf_counter *child;
+
+	WARN_ON_ONCE(counter->ctx->parent_ctx);
+	mutex_lock(&counter->child_mutex);
+	perf_counter_for_each_sibling(counter, func);
+	list_for_each_entry(child, &counter->child_list, child_list)
+		perf_counter_for_each_sibling(child, func);
+	mutex_unlock(&counter->child_mutex);
+}
+
+static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	unsigned long size;
+	int ret = 0;
+	u64 value;
+
+	if (!counter->attr.sample_period)
+		return -EINVAL;
+
+	size = copy_from_user(&value, arg, sizeof(value));
+	if (size != sizeof(value))
+		return -EFAULT;
+
+	if (!value)
+		return -EINVAL;
+
+	spin_lock_irq(&ctx->lock);
+	if (counter->attr.freq) {
+		if (value > sysctl_perf_counter_sample_rate) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+
+		counter->attr.sample_freq = value;
+	} else {
+		perf_log_period(counter, value);
+
+		counter->attr.sample_period = value;
+		counter->hw.sample_period = value;
+	}
+unlock:
+	spin_unlock_irq(&ctx->lock);
+
+	return ret;
+}
+
+static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct perf_counter *counter = file->private_data;
+	void (*func)(struct perf_counter *);
+	u32 flags = arg;
+
+	switch (cmd) {
+	case PERF_COUNTER_IOC_ENABLE:
+		func = perf_counter_enable;
+		break;
+	case PERF_COUNTER_IOC_DISABLE:
+		func = perf_counter_disable;
+		break;
+	case PERF_COUNTER_IOC_RESET:
+		func = perf_counter_reset;
+		break;
+
+	case PERF_COUNTER_IOC_REFRESH:
+		return perf_counter_refresh(counter, arg);
+
+	case PERF_COUNTER_IOC_PERIOD:
+		return perf_counter_period(counter, (u64 __user *)arg);
+
+	default:
+		return -ENOTTY;
+	}
+
+	if (flags & PERF_IOC_FLAG_GROUP)
+		perf_counter_for_each(counter, func);
+	else
+		perf_counter_for_each_child(counter, func);
+
+	return 0;
+}
+
+int perf_counter_task_enable(void)
+{
+	struct perf_counter *counter;
+
+	mutex_lock(&current->perf_counter_mutex);
+	list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
+		perf_counter_for_each_child(counter, perf_counter_enable);
+	mutex_unlock(&current->perf_counter_mutex);
+
+	return 0;
+}
+
+int perf_counter_task_disable(void)
+{
+	struct perf_counter *counter;
+
+	mutex_lock(&current->perf_counter_mutex);
+	list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
+		perf_counter_for_each_child(counter, perf_counter_disable);
+	mutex_unlock(&current->perf_counter_mutex);
+
+	return 0;
+}
+
+/*
+ * Callers need to ensure there can be no nesting of this function, otherwise
+ * the seqlock logic goes bad. We can not serialize this because the arch
+ * code calls this from NMI context.
+ */
+void perf_counter_update_userpage(struct perf_counter *counter)
+{
+	struct perf_counter_mmap_page *userpg;
+	struct perf_mmap_data *data;
+
+	rcu_read_lock();
+	data = rcu_dereference(counter->data);
+	if (!data)
+		goto unlock;
+
+	userpg = data->user_page;
+
+	/*
+	 * Disable preemption so as to not let the corresponding user-space
+	 * spin too long if we get preempted.
+	 */
+	preempt_disable();
+	++userpg->lock;
+	barrier();
+	userpg->index = counter->hw.idx;
+	userpg->offset = atomic64_read(&counter->count);
+	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
+		userpg->offset -= atomic64_read(&counter->hw.prev_count);
+
+	barrier();
+	++userpg->lock;
+	preempt_enable();
+unlock:
+	rcu_read_unlock();
+}
+
+static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct perf_counter *counter = vma->vm_file->private_data;
+	struct perf_mmap_data *data;
+	int ret = VM_FAULT_SIGBUS;
+
+	rcu_read_lock();
+	data = rcu_dereference(counter->data);
+	if (!data)
+		goto unlock;
+
+	if (vmf->pgoff == 0) {
+		vmf->page = virt_to_page(data->user_page);
+	} else {
+		int nr = vmf->pgoff - 1;
+
+		if ((unsigned)nr > data->nr_pages)
+			goto unlock;
+
+		vmf->page = virt_to_page(data->data_pages[nr]);
+	}
+	get_page(vmf->page);
+	ret = 0;
+unlock:
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
+{
+	struct perf_mmap_data *data;
+	unsigned long size;
+	int i;
+
+	WARN_ON(atomic_read(&counter->mmap_count));
+
+	size = sizeof(struct perf_mmap_data);
+	size += nr_pages * sizeof(void *);
+
+	data = kzalloc(size, GFP_KERNEL);
+	if (!data)
+		goto fail;
+
+	data->user_page = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!data->user_page)
+		goto fail_user_page;
+
+	for (i = 0; i < nr_pages; i++) {
+		data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
+		if (!data->data_pages[i])
+			goto fail_data_pages;
+	}
+
+	data->nr_pages = nr_pages;
+	atomic_set(&data->lock, -1);
+
+	rcu_assign_pointer(counter->data, data);
+
+	return 0;
+
+fail_data_pages:
+	for (i--; i >= 0; i--)
+		free_page((unsigned long)data->data_pages[i]);
+
+	free_page((unsigned long)data->user_page);
+
+fail_user_page:
+	kfree(data);
+
+fail:
+	return -ENOMEM;
+}
+
+static void __perf_mmap_data_free(struct rcu_head *rcu_head)
+{
+	struct perf_mmap_data *data;
+	int i;
+
+	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
+
+	free_page((unsigned long)data->user_page);
+	for (i = 0; i < data->nr_pages; i++)
+		free_page((unsigned long)data->data_pages[i]);
+	kfree(data);
+}
+
+static void perf_mmap_data_free(struct perf_counter *counter)
+{
+	struct perf_mmap_data *data = counter->data;
+
+	WARN_ON(atomic_read(&counter->mmap_count));
+
+	rcu_assign_pointer(counter->data, NULL);
+	call_rcu(&data->rcu_head, __perf_mmap_data_free);
+}
+
+static void perf_mmap_open(struct vm_area_struct *vma)
+{
+	struct perf_counter *counter = vma->vm_file->private_data;
+
+	atomic_inc(&counter->mmap_count);
+}
+
+static void perf_mmap_close(struct vm_area_struct *vma)
+{
+	struct perf_counter *counter = vma->vm_file->private_data;
+
+	WARN_ON_ONCE(counter->ctx->parent_ctx);
+	if (atomic_dec_and_mutex_lock(&counter->mmap_count, &counter->mmap_mutex)) {
+		struct user_struct *user = current_user();
+
+		atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm);
+		vma->vm_mm->locked_vm -= counter->data->nr_locked;
+		perf_mmap_data_free(counter);
+		mutex_unlock(&counter->mmap_mutex);
+	}
+}
+
+static struct vm_operations_struct perf_mmap_vmops = {
+	.open  = perf_mmap_open,
+	.close = perf_mmap_close,
+	.fault = perf_mmap_fault,
+};
+
+static int perf_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct perf_counter *counter = file->private_data;
+	unsigned long user_locked, user_lock_limit;
+	struct user_struct *user = current_user();
+	unsigned long locked, lock_limit;
+	unsigned long vma_size;
+	unsigned long nr_pages;
+	long user_extra, extra;
+	int ret = 0;
+
+	if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
+		return -EINVAL;
+
+	vma_size = vma->vm_end - vma->vm_start;
+	nr_pages = (vma_size / PAGE_SIZE) - 1;
+
+	/*
+	 * If we have data pages ensure they're a power-of-two number, so we
+	 * can do bitmasks instead of modulo.
+	 */
+	if (nr_pages != 0 && !is_power_of_2(nr_pages))
+		return -EINVAL;
+
+	if (vma_size != PAGE_SIZE * (1 + nr_pages))
+		return -EINVAL;
+
+	if (vma->vm_pgoff != 0)
+		return -EINVAL;
+
+	WARN_ON_ONCE(counter->ctx->parent_ctx);
+	mutex_lock(&counter->mmap_mutex);
+	if (atomic_inc_not_zero(&counter->mmap_count)) {
+		if (nr_pages != counter->data->nr_pages)
+			ret = -EINVAL;
+		goto unlock;
+	}
+
+	user_extra = nr_pages + 1;
+	user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
+
+	/*
+	 * Increase the limit linearly with more CPUs:
+	 */
+	user_lock_limit *= num_online_cpus();
+
+	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
+
+	extra = 0;
+	if (user_locked > user_lock_limit)
+		extra = user_locked - user_lock_limit;
+
+	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+	lock_limit >>= PAGE_SHIFT;
+	locked = vma->vm_mm->locked_vm + extra;
+
+	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+		ret = -EPERM;
+		goto unlock;
+	}
+
+	WARN_ON(counter->data);
+	ret = perf_mmap_data_alloc(counter, nr_pages);
+	if (ret)
+		goto unlock;
+
+	atomic_set(&counter->mmap_count, 1);
+	atomic_long_add(user_extra, &user->locked_vm);
+	vma->vm_mm->locked_vm += extra;
+	counter->data->nr_locked = extra;
+unlock:
+	mutex_unlock(&counter->mmap_mutex);
+
+	vma->vm_flags &= ~VM_MAYWRITE;
+	vma->vm_flags |= VM_RESERVED;
+	vma->vm_ops = &perf_mmap_vmops;
+
+	return ret;
+}
+
+static int perf_fasync(int fd, struct file *filp, int on)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct perf_counter *counter = filp->private_data;
+	int retval;
+
+	mutex_lock(&inode->i_mutex);
+	retval = fasync_helper(fd, filp, on, &counter->fasync);
+	mutex_unlock(&inode->i_mutex);
+
+	if (retval < 0)
+		return retval;
+
+	return 0;
+}
+
+static const struct file_operations perf_fops = {
+	.release		= perf_release,
+	.read			= perf_read,
+	.poll			= perf_poll,
+	.unlocked_ioctl		= perf_ioctl,
+	.compat_ioctl		= perf_ioctl,
+	.mmap			= perf_mmap,
+	.fasync			= perf_fasync,
+};
+
+/*
+ * Perf counter wakeup
+ *
+ * If there's data, ensure we set the poll() state and publish everything
+ * to user-space before waking everybody up.
+ */
+
+void perf_counter_wakeup(struct perf_counter *counter)
+{
+	wake_up_all(&counter->waitq);
+
+	if (counter->pending_kill) {
+		kill_fasync(&counter->fasync, SIGIO, counter->pending_kill);
+		counter->pending_kill = 0;
+	}
+}
+
+/*
+ * Pending wakeups
+ *
+ * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
+ *
+ * The NMI bit means we cannot possibly take locks. Therefore, maintain a
+ * single linked list and use cmpxchg() to add entries lockless.
+ */
+
+static void perf_pending_counter(struct perf_pending_entry *entry)
+{
+	struct perf_counter *counter = container_of(entry,
+			struct perf_counter, pending);
+
+	if (counter->pending_disable) {
+		counter->pending_disable = 0;
+		perf_counter_disable(counter);
+	}
+
+	if (counter->pending_wakeup) {
+		counter->pending_wakeup = 0;
+		perf_counter_wakeup(counter);
+	}
+}
+
+#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
+
+static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
+	PENDING_TAIL,
+};
+
+static void perf_pending_queue(struct perf_pending_entry *entry,
+			       void (*func)(struct perf_pending_entry *))
+{
+	struct perf_pending_entry **head;
+
+	if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
+		return;
+
+	entry->func = func;
+
+	head = &get_cpu_var(perf_pending_head);
+
+	do {
+		entry->next = *head;
+	} while (cmpxchg(head, entry->next, entry) != entry->next);
+
+	set_perf_counter_pending();
+
+	put_cpu_var(perf_pending_head);
+}
+
+static int __perf_pending_run(void)
+{
+	struct perf_pending_entry *list;
+	int nr = 0;
+
+	list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
+	while (list != PENDING_TAIL) {
+		void (*func)(struct perf_pending_entry *);
+		struct perf_pending_entry *entry = list;
+
+		list = list->next;
+
+		func = entry->func;
+		entry->next = NULL;
+		/*
+		 * Ensure we observe the unqueue before we issue the wakeup,
+		 * so that we won't be waiting forever.
+		 * -- see perf_not_pending().
+		 */
+		smp_wmb();
+
+		func(entry);
+		nr++;
+	}
+
+	return nr;
+}
+
+static inline int perf_not_pending(struct perf_counter *counter)
+{
+	/*
+	 * If we flush on whatever cpu we run, there is a chance we don't
+	 * need to wait.
+	 */
+	get_cpu();
+	__perf_pending_run();
+	put_cpu();
+
+	/*
+	 * Ensure we see the proper queue state before going to sleep
+	 * so that we do not miss the wakeup. -- see perf_pending_handle()
+	 */
+	smp_rmb();
+	return counter->pending.next == NULL;
+}
+
+static void perf_pending_sync(struct perf_counter *counter)
+{
+	wait_event(counter->waitq, perf_not_pending(counter));
+}
+
+void perf_counter_do_pending(void)
+{
+	__perf_pending_run();
+}
+
+/*
+ * Callchain support -- arch specific
+ */
+
+__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+	return NULL;
+}
+
+/*
+ * Output
+ */
+
+struct perf_output_handle {
+	struct perf_counter	*counter;
+	struct perf_mmap_data	*data;
+	unsigned long		head;
+	unsigned long		offset;
+	int			nmi;
+	int			overflow;
+	int			locked;
+	unsigned long		flags;
+};
+
+static void perf_output_wakeup(struct perf_output_handle *handle)
+{
+	atomic_set(&handle->data->poll, POLL_IN);
+
+	if (handle->nmi) {
+		handle->counter->pending_wakeup = 1;
+		perf_pending_queue(&handle->counter->pending,
+				   perf_pending_counter);
+	} else
+		perf_counter_wakeup(handle->counter);
+}
+
+/*
+ * Curious locking construct.
+ *
+ * We need to ensure a later event doesn't publish a head when a former
+ * event isn't done writing. However since we need to deal with NMIs we
+ * cannot fully serialize things.
+ *
+ * What we do is serialize between CPUs so we only have to deal with NMI
+ * nesting on a single CPU.
+ *
+ * We only publish the head (and generate a wakeup) when the outer-most
+ * event completes.
+ */
+static void perf_output_lock(struct perf_output_handle *handle)
+{
+	struct perf_mmap_data *data = handle->data;
+	int cpu;
+
+	handle->locked = 0;
+
+	local_irq_save(handle->flags);
+	cpu = smp_processor_id();
+
+	if (in_nmi() && atomic_read(&data->lock) == cpu)
+		return;
+
+	while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
+		cpu_relax();
+
+	handle->locked = 1;
+}
+
+static void perf_output_unlock(struct perf_output_handle *handle)
+{
+	struct perf_mmap_data *data = handle->data;
+	unsigned long head;
+	int cpu;
+
+	data->done_head = data->head;
+
+	if (!handle->locked)
+		goto out;
+
+again:
+	/*
+	 * The xchg implies a full barrier that ensures all writes are done
+	 * before we publish the new head, matched by a rmb() in userspace when
+	 * reading this position.
+	 */
+	while ((head = atomic_long_xchg(&data->done_head, 0)))
+		data->user_page->data_head = head;
+
+	/*
+	 * NMI can happen here, which means we can miss a done_head update.
+	 */
+
+	cpu = atomic_xchg(&data->lock, -1);
+	WARN_ON_ONCE(cpu != smp_processor_id());
+
+	/*
+	 * Therefore we have to validate we did not indeed do so.
+	 */
+	if (unlikely(atomic_long_read(&data->done_head))) {
+		/*
+		 * Since we had it locked, we can lock it again.
+		 */
+		while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
+			cpu_relax();
+
+		goto again;
+	}
+
+	if (atomic_xchg(&data->wakeup, 0))
+		perf_output_wakeup(handle);
+out:
+	local_irq_restore(handle->flags);
+}
+
+static int perf_output_begin(struct perf_output_handle *handle,
+			     struct perf_counter *counter, unsigned int size,
+			     int nmi, int overflow)
+{
+	struct perf_mmap_data *data;
+	unsigned int offset, head;
+
+	/*
+	 * For inherited counters we send all the output towards the parent.
+	 */
+	if (counter->parent)
+		counter = counter->parent;
+
+	rcu_read_lock();
+	data = rcu_dereference(counter->data);
+	if (!data)
+		goto out;
+
+	handle->data	 = data;
+	handle->counter	 = counter;
+	handle->nmi	 = nmi;
+	handle->overflow = overflow;
+
+	if (!data->nr_pages)
+		goto fail;
+
+	perf_output_lock(handle);
+
+	do {
+		offset = head = atomic_long_read(&data->head);
+		head += size;
+	} while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
+
+	handle->offset	= offset;
+	handle->head	= head;
+
+	if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT))
+		atomic_set(&data->wakeup, 1);
+
+	return 0;
+
+fail:
+	perf_output_wakeup(handle);
+out:
+	rcu_read_unlock();
+
+	return -ENOSPC;
+}
+
+static void perf_output_copy(struct perf_output_handle *handle,
+			     const void *buf, unsigned int len)
+{
+	unsigned int pages_mask;
+	unsigned int offset;
+	unsigned int size;
+	void **pages;
+
+	offset		= handle->offset;
+	pages_mask	= handle->data->nr_pages - 1;
+	pages		= handle->data->data_pages;
+
+	do {
+		unsigned int page_offset;
+		int nr;
+
+		nr	    = (offset >> PAGE_SHIFT) & pages_mask;
+		page_offset = offset & (PAGE_SIZE - 1);
+		size	    = min_t(unsigned int, PAGE_SIZE - page_offset, len);
+
+		memcpy(pages[nr] + page_offset, buf, size);
+
+		len	    -= size;
+		buf	    += size;
+		offset	    += size;
+	} while (len);
+
+	handle->offset = offset;
+
+	/*
+	 * Check we didn't copy past our reservation window, taking the
+	 * possible unsigned int wrap into account.
+	 */
+	WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
+}
+
+#define perf_output_put(handle, x) \
+	perf_output_copy((handle), &(x), sizeof(x))
+
+static void perf_output_end(struct perf_output_handle *handle)
+{
+	struct perf_counter *counter = handle->counter;
+	struct perf_mmap_data *data = handle->data;
+
+	int wakeup_events = counter->attr.wakeup_events;
+
+	if (handle->overflow && wakeup_events) {
+		int events = atomic_inc_return(&data->events);
+		if (events >= wakeup_events) {
+			atomic_sub(wakeup_events, &data->events);
+			atomic_set(&data->wakeup, 1);
+		}
+	}
+
+	perf_output_unlock(handle);
+	rcu_read_unlock();
+}
+
+static u32 perf_counter_pid(struct perf_counter *counter, struct task_struct *p)
+{
+	/*
+	 * only top level counters have the pid namespace they were created in
+	 */
+	if (counter->parent)
+		counter = counter->parent;
+
+	return task_tgid_nr_ns(p, counter->ns);
+}
+
+static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
+{
+	/*
+	 * only top level counters have the pid namespace they were created in
+	 */
+	if (counter->parent)
+		counter = counter->parent;
+
+	return task_pid_nr_ns(p, counter->ns);
+}
+
+static void perf_counter_output(struct perf_counter *counter, int nmi,
+				struct perf_sample_data *data)
+{
+	int ret;
+	u64 sample_type = counter->attr.sample_type;
+	struct perf_output_handle handle;
+	struct perf_event_header header;
+	u64 ip;
+	struct {
+		u32 pid, tid;
+	} tid_entry;
+	struct {
+		u64 id;
+		u64 counter;
+	} group_entry;
+	struct perf_callchain_entry *callchain = NULL;
+	int callchain_size = 0;
+	u64 time;
+	struct {
+		u32 cpu, reserved;
+	} cpu_entry;
+
+	header.type = 0;
+	header.size = sizeof(header);
+
+	header.misc = PERF_EVENT_MISC_OVERFLOW;
+	header.misc |= perf_misc_flags(data->regs);
+
+	if (sample_type & PERF_SAMPLE_IP) {
+		ip = perf_instruction_pointer(data->regs);
+		header.type |= PERF_SAMPLE_IP;
+		header.size += sizeof(ip);
+	}
+
+	if (sample_type & PERF_SAMPLE_TID) {
+		/* namespace issues */
+		tid_entry.pid = perf_counter_pid(counter, current);
+		tid_entry.tid = perf_counter_tid(counter, current);
+
+		header.type |= PERF_SAMPLE_TID;
+		header.size += sizeof(tid_entry);
+	}
+
+	if (sample_type & PERF_SAMPLE_TIME) {
+		/*
+		 * Maybe do better on x86 and provide cpu_clock_nmi()
+		 */
+		time = sched_clock();
+
+		header.type |= PERF_SAMPLE_TIME;
+		header.size += sizeof(u64);
+	}
+
+	if (sample_type & PERF_SAMPLE_ADDR) {
+		header.type |= PERF_SAMPLE_ADDR;
+		header.size += sizeof(u64);
+	}
+
+	if (sample_type & PERF_SAMPLE_ID) {
+		header.type |= PERF_SAMPLE_ID;
+		header.size += sizeof(u64);
+	}
+
+	if (sample_type & PERF_SAMPLE_CPU) {
+		header.type |= PERF_SAMPLE_CPU;
+		header.size += sizeof(cpu_entry);
+
+		cpu_entry.cpu = raw_smp_processor_id();
+	}
+
+	if (sample_type & PERF_SAMPLE_PERIOD) {
+		header.type |= PERF_SAMPLE_PERIOD;
+		header.size += sizeof(u64);
+	}
+
+	if (sample_type & PERF_SAMPLE_GROUP) {
+		header.type |= PERF_SAMPLE_GROUP;
+		header.size += sizeof(u64) +
+			counter->nr_siblings * sizeof(group_entry);
+	}
+
+	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+		callchain = perf_callchain(data->regs);
+
+		if (callchain) {
+			callchain_size = (1 + callchain->nr) * sizeof(u64);
+
+			header.type |= PERF_SAMPLE_CALLCHAIN;
+			header.size += callchain_size;
+		}
+	}
+
+	ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, header);
+
+	if (sample_type & PERF_SAMPLE_IP)
+		perf_output_put(&handle, ip);
+
+	if (sample_type & PERF_SAMPLE_TID)
+		perf_output_put(&handle, tid_entry);
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		perf_output_put(&handle, time);
+
+	if (sample_type & PERF_SAMPLE_ADDR)
+		perf_output_put(&handle, data->addr);
+
+	if (sample_type & PERF_SAMPLE_ID)
+		perf_output_put(&handle, counter->id);
+
+	if (sample_type & PERF_SAMPLE_CPU)
+		perf_output_put(&handle, cpu_entry);
+
+	if (sample_type & PERF_SAMPLE_PERIOD)
+		perf_output_put(&handle, data->period);
+
+	/*
+	 * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
+	 */
+	if (sample_type & PERF_SAMPLE_GROUP) {
+		struct perf_counter *leader, *sub;
+		u64 nr = counter->nr_siblings;
+
+		perf_output_put(&handle, nr);
+
+		leader = counter->group_leader;
+		list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+			if (sub != counter)
+				sub->pmu->read(sub);
+
+			group_entry.id = sub->id;
+			group_entry.counter = atomic64_read(&sub->count);
+
+			perf_output_put(&handle, group_entry);
+		}
+	}
+
+	if (callchain)
+		perf_output_copy(&handle, callchain, callchain_size);
+
+	perf_output_end(&handle);
+}
+
+/*
+ * fork tracking
+ */
+
+struct perf_fork_event {
+	struct task_struct	*task;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				ppid;
+	} event;
+};
+
+static void perf_counter_fork_output(struct perf_counter *counter,
+				     struct perf_fork_event *fork_event)
+{
+	struct perf_output_handle handle;
+	int size = fork_event->event.header.size;
+	struct task_struct *task = fork_event->task;
+	int ret = perf_output_begin(&handle, counter, size, 0, 0);
+
+	if (ret)
+		return;
+
+	fork_event->event.pid = perf_counter_pid(counter, task);
+	fork_event->event.ppid = perf_counter_pid(counter, task->real_parent);
+
+	perf_output_put(&handle, fork_event->event);
+	perf_output_end(&handle);
+}
+
+static int perf_counter_fork_match(struct perf_counter *counter)
+{
+	if (counter->attr.comm || counter->attr.mmap)
+		return 1;
+
+	return 0;
+}
+
+static void perf_counter_fork_ctx(struct perf_counter_context *ctx,
+				  struct perf_fork_event *fork_event)
+{
+	struct perf_counter *counter;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
+		if (perf_counter_fork_match(counter))
+			perf_counter_fork_output(counter, fork_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_counter_fork_event(struct perf_fork_event *fork_event)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_counter_context *ctx;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_counter_fork_ctx(&cpuctx->ctx, fork_event);
+	put_cpu_var(perf_cpu_context);
+
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_counter_ctxp);
+	if (ctx)
+		perf_counter_fork_ctx(ctx, fork_event);
+	rcu_read_unlock();
+}
+
+void perf_counter_fork(struct task_struct *task)
+{
+	struct perf_fork_event fork_event;
+
+	if (!atomic_read(&nr_comm_counters) &&
+	    !atomic_read(&nr_mmap_counters))
+		return;
+
+	fork_event = (struct perf_fork_event){
+		.task	= task,
+		.event  = {
+			.header = {
+				.type = PERF_EVENT_FORK,
+				.size = sizeof(fork_event.event),
+			},
+		},
+	};
+
+	perf_counter_fork_event(&fork_event);
+}
+
+/*
+ * comm tracking
+ */
+
+struct perf_comm_event {
+	struct task_struct	*task;
+	char			*comm;
+	int			comm_size;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				tid;
+	} event;
+};
+
+static void perf_counter_comm_output(struct perf_counter *counter,
+				     struct perf_comm_event *comm_event)
+{
+	struct perf_output_handle handle;
+	int size = comm_event->event.header.size;
+	int ret = perf_output_begin(&handle, counter, size, 0, 0);
+
+	if (ret)
+		return;
+
+	comm_event->event.pid = perf_counter_pid(counter, comm_event->task);
+	comm_event->event.tid = perf_counter_tid(counter, comm_event->task);
+
+	perf_output_put(&handle, comm_event->event);
+	perf_output_copy(&handle, comm_event->comm,
+				   comm_event->comm_size);
+	perf_output_end(&handle);
+}
+
+static int perf_counter_comm_match(struct perf_counter *counter)
+{
+	if (counter->attr.comm)
+		return 1;
+
+	return 0;
+}
+
+static void perf_counter_comm_ctx(struct perf_counter_context *ctx,
+				  struct perf_comm_event *comm_event)
+{
+	struct perf_counter *counter;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
+		if (perf_counter_comm_match(counter))
+			perf_counter_comm_output(counter, comm_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_counter_comm_event(struct perf_comm_event *comm_event)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_counter_context *ctx;
+	unsigned int size;
+	char *comm = comm_event->task->comm;
+
+	size = ALIGN(strlen(comm)+1, sizeof(u64));
+
+	comm_event->comm = comm;
+	comm_event->comm_size = size;
+
+	comm_event->event.header.size = sizeof(comm_event->event) + size;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_counter_comm_ctx(&cpuctx->ctx, comm_event);
+	put_cpu_var(perf_cpu_context);
+
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_counter_ctxp);
+	if (ctx)
+		perf_counter_comm_ctx(ctx, comm_event);
+	rcu_read_unlock();
+}
+
+void perf_counter_comm(struct task_struct *task)
+{
+	struct perf_comm_event comm_event;
+
+	if (!atomic_read(&nr_comm_counters))
+		return;
+
+	comm_event = (struct perf_comm_event){
+		.task	= task,
+		.event  = {
+			.header = { .type = PERF_EVENT_COMM, },
+		},
+	};
+
+	perf_counter_comm_event(&comm_event);
+}
+
+/*
+ * mmap tracking
+ */
+
+struct perf_mmap_event {
+	struct vm_area_struct	*vma;
+
+	const char		*file_name;
+	int			file_size;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				tid;
+		u64				start;
+		u64				len;
+		u64				pgoff;
+	} event;
+};
+
+static void perf_counter_mmap_output(struct perf_counter *counter,
+				     struct perf_mmap_event *mmap_event)
+{
+	struct perf_output_handle handle;
+	int size = mmap_event->event.header.size;
+	int ret = perf_output_begin(&handle, counter, size, 0, 0);
+
+	if (ret)
+		return;
+
+	mmap_event->event.pid = perf_counter_pid(counter, current);
+	mmap_event->event.tid = perf_counter_tid(counter, current);
+
+	perf_output_put(&handle, mmap_event->event);
+	perf_output_copy(&handle, mmap_event->file_name,
+				   mmap_event->file_size);
+	perf_output_end(&handle);
+}
+
+static int perf_counter_mmap_match(struct perf_counter *counter,
+				   struct perf_mmap_event *mmap_event)
+{
+	if (counter->attr.mmap)
+		return 1;
+
+	return 0;
+}
+
+static void perf_counter_mmap_ctx(struct perf_counter_context *ctx,
+				  struct perf_mmap_event *mmap_event)
+{
+	struct perf_counter *counter;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
+		if (perf_counter_mmap_match(counter, mmap_event))
+			perf_counter_mmap_output(counter, mmap_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_counter_context *ctx;
+	struct vm_area_struct *vma = mmap_event->vma;
+	struct file *file = vma->vm_file;
+	unsigned int size;
+	char tmp[16];
+	char *buf = NULL;
+	const char *name;
+
+	if (file) {
+		buf = kzalloc(PATH_MAX, GFP_KERNEL);
+		if (!buf) {
+			name = strncpy(tmp, "//enomem", sizeof(tmp));
+			goto got_name;
+		}
+		name = d_path(&file->f_path, buf, PATH_MAX);
+		if (IS_ERR(name)) {
+			name = strncpy(tmp, "//toolong", sizeof(tmp));
+			goto got_name;
+		}
+	} else {
+		name = arch_vma_name(mmap_event->vma);
+		if (name)
+			goto got_name;
+
+		if (!vma->vm_mm) {
+			name = strncpy(tmp, "[vdso]", sizeof(tmp));
+			goto got_name;
+		}
+
+		name = strncpy(tmp, "//anon", sizeof(tmp));
+		goto got_name;
+	}
+
+got_name:
+	size = ALIGN(strlen(name)+1, sizeof(u64));
+
+	mmap_event->file_name = name;
+	mmap_event->file_size = size;
+
+	mmap_event->event.header.size = sizeof(mmap_event->event) + size;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event);
+	put_cpu_var(perf_cpu_context);
+
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_counter_ctxp);
+	if (ctx)
+		perf_counter_mmap_ctx(ctx, mmap_event);
+	rcu_read_unlock();
+
+	kfree(buf);
+}
+
+void __perf_counter_mmap(struct vm_area_struct *vma)
+{
+	struct perf_mmap_event mmap_event;
+
+	if (!atomic_read(&nr_mmap_counters))
+		return;
+
+	mmap_event = (struct perf_mmap_event){
+		.vma	= vma,
+		.event  = {
+			.header = { .type = PERF_EVENT_MMAP, },
+			.start  = vma->vm_start,
+			.len    = vma->vm_end - vma->vm_start,
+			.pgoff  = vma->vm_pgoff,
+		},
+	};
+
+	perf_counter_mmap_event(&mmap_event);
+}
+
+/*
+ * Log sample_period changes so that analyzing tools can re-normalize the
+ * event flow.
+ */
+
+struct freq_event {
+	struct perf_event_header	header;
+	u64				time;
+	u64				id;
+	u64				period;
+};
+
+static void perf_log_period(struct perf_counter *counter, u64 period)
+{
+	struct perf_output_handle handle;
+	struct freq_event event;
+	int ret;
+
+	if (counter->hw.sample_period == period)
+		return;
+
+	if (counter->attr.sample_type & PERF_SAMPLE_PERIOD)
+		return;
+
+	event = (struct freq_event) {
+		.header = {
+			.type = PERF_EVENT_PERIOD,
+			.misc = 0,
+			.size = sizeof(event),
+		},
+		.time = sched_clock(),
+		.id = counter->id,
+		.period = period,
+	};
+
+	ret = perf_output_begin(&handle, counter, sizeof(event), 1, 0);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, event);
+	perf_output_end(&handle);
+}
+
+/*
+ * IRQ throttle logging
+ */
+
+static void perf_log_throttle(struct perf_counter *counter, int enable)
+{
+	struct perf_output_handle handle;
+	int ret;
+
+	struct {
+		struct perf_event_header	header;
+		u64				time;
+		u64				id;
+	} throttle_event = {
+		.header = {
+			.type = PERF_EVENT_THROTTLE + 1,
+			.misc = 0,
+			.size = sizeof(throttle_event),
+		},
+		.time	= sched_clock(),
+		.id	= counter->id,
+	};
+
+	ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, throttle_event);
+	perf_output_end(&handle);
+}
+
+/*
+ * Generic counter overflow handling.
+ */
+
+int perf_counter_overflow(struct perf_counter *counter, int nmi,
+			  struct perf_sample_data *data)
+{
+	int events = atomic_read(&counter->event_limit);
+	int throttle = counter->pmu->unthrottle != NULL;
+	struct hw_perf_counter *hwc = &counter->hw;
+	int ret = 0;
+
+	if (!throttle) {
+		hwc->interrupts++;
+	} else {
+		if (hwc->interrupts != MAX_INTERRUPTS) {
+			hwc->interrupts++;
+			if (HZ * hwc->interrupts >
+					(u64)sysctl_perf_counter_sample_rate) {
+				hwc->interrupts = MAX_INTERRUPTS;
+				perf_log_throttle(counter, 0);
+				ret = 1;
+			}
+		} else {
+			/*
+			 * Keep re-disabling counters even though on the previous
+			 * pass we disabled it - just in case we raced with a
+			 * sched-in and the counter got enabled again:
+			 */
+			ret = 1;
+		}
+	}
+
+	if (counter->attr.freq) {
+		u64 now = sched_clock();
+		s64 delta = now - hwc->freq_stamp;
+
+		hwc->freq_stamp = now;
+
+		if (delta > 0 && delta < TICK_NSEC)
+			perf_adjust_period(counter, NSEC_PER_SEC / (int)delta);
+	}
+
+	/*
+	 * XXX event_limit might not quite work as expected on inherited
+	 * counters
+	 */
+
+	counter->pending_kill = POLL_IN;
+	if (events && atomic_dec_and_test(&counter->event_limit)) {
+		ret = 1;
+		counter->pending_kill = POLL_HUP;
+		if (nmi) {
+			counter->pending_disable = 1;
+			perf_pending_queue(&counter->pending,
+					   perf_pending_counter);
+		} else
+			perf_counter_disable(counter);
+	}
+
+	perf_counter_output(counter, nmi, data);
+	return ret;
+}
+
+/*
+ * Generic software counter infrastructure
+ */
+
+static void perf_swcounter_update(struct perf_counter *counter)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	u64 prev, now;
+	s64 delta;
+
+again:
+	prev = atomic64_read(&hwc->prev_count);
+	now = atomic64_read(&hwc->count);
+	if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev)
+		goto again;
+
+	delta = now - prev;
+
+	atomic64_add(delta, &counter->count);
+	atomic64_sub(delta, &hwc->period_left);
+}
+
+static void perf_swcounter_set_period(struct perf_counter *counter)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	s64 left = atomic64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		atomic64_add(period, &hwc->period_left);
+		hwc->last_period = period;
+	}
+
+	atomic64_set(&hwc->prev_count, -left);
+	atomic64_set(&hwc->count, -left);
+}
+
+static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
+{
+	enum hrtimer_restart ret = HRTIMER_RESTART;
+	struct perf_sample_data data;
+	struct perf_counter *counter;
+	u64 period;
+
+	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
+	counter->pmu->read(counter);
+
+	data.addr = 0;
+	data.regs = get_irq_regs();
+	/*
+	 * In case we exclude kernel IPs or are somehow not in interrupt
+	 * context, provide the next best thing, the user IP.
+	 */
+	if ((counter->attr.exclude_kernel || !data.regs) &&
+			!counter->attr.exclude_user)
+		data.regs = task_pt_regs(current);
+
+	if (data.regs) {
+		if (perf_counter_overflow(counter, 0, &data))
+			ret = HRTIMER_NORESTART;
+	}
+
+	period = max_t(u64, 10000, counter->hw.sample_period);
+	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
+
+	return ret;
+}
+
+static void perf_swcounter_overflow(struct perf_counter *counter,
+				    int nmi, struct pt_regs *regs, u64 addr)
+{
+	struct perf_sample_data data = {
+		.regs	= regs,
+		.addr	= addr,
+		.period	= counter->hw.last_period,
+	};
+
+	perf_swcounter_update(counter);
+	perf_swcounter_set_period(counter);
+	if (perf_counter_overflow(counter, nmi, &data))
+		/* soft-disable the counter */
+		;
+
+}
+
+static int perf_swcounter_is_counting(struct perf_counter *counter)
+{
+	struct perf_counter_context *ctx;
+	unsigned long flags;
+	int count;
+
+	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
+		return 1;
+
+	if (counter->state != PERF_COUNTER_STATE_INACTIVE)
+		return 0;
+
+	/*
+	 * If the counter is inactive, it could be just because
+	 * its task is scheduled out, or because it's in a group
+	 * which could not go on the PMU.  We want to count in
+	 * the first case but not the second.  If the context is
+	 * currently active then an inactive software counter must
+	 * be the second case.  If it's not currently active then
+	 * we need to know whether the counter was active when the
+	 * context was last active, which we can determine by
+	 * comparing counter->tstamp_stopped with ctx->time.
+	 *
+	 * We are within an RCU read-side critical section,
+	 * which protects the existence of *ctx.
+	 */
+	ctx = counter->ctx;
+	spin_lock_irqsave(&ctx->lock, flags);
+	count = 1;
+	/* Re-check state now we have the lock */
+	if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
+	    counter->ctx->is_active ||
+	    counter->tstamp_stopped < ctx->time)
+		count = 0;
+	spin_unlock_irqrestore(&ctx->lock, flags);
+	return count;
+}
+
+static int perf_swcounter_match(struct perf_counter *counter,
+				enum perf_type_id type,
+				u32 event, struct pt_regs *regs)
+{
+	if (!perf_swcounter_is_counting(counter))
+		return 0;
+
+	if (counter->attr.type != type)
+		return 0;
+	if (counter->attr.config != event)
+		return 0;
+
+	if (regs) {
+		if (counter->attr.exclude_user && user_mode(regs))
+			return 0;
+
+		if (counter->attr.exclude_kernel && !user_mode(regs))
+			return 0;
+	}
+
+	return 1;
+}
+
+static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
+			       int nmi, struct pt_regs *regs, u64 addr)
+{
+	int neg = atomic64_add_negative(nr, &counter->hw.count);
+
+	if (counter->hw.sample_period && !neg && regs)
+		perf_swcounter_overflow(counter, nmi, regs, addr);
+}
+
+static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
+				     enum perf_type_id type, u32 event,
+				     u64 nr, int nmi, struct pt_regs *regs,
+				     u64 addr)
+{
+	struct perf_counter *counter;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
+		if (perf_swcounter_match(counter, type, event, regs))
+			perf_swcounter_add(counter, nr, nmi, regs, addr);
+	}
+	rcu_read_unlock();
+}
+
+static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
+{
+	if (in_nmi())
+		return &cpuctx->recursion[3];
+
+	if (in_irq())
+		return &cpuctx->recursion[2];
+
+	if (in_softirq())
+		return &cpuctx->recursion[1];
+
+	return &cpuctx->recursion[0];
+}
+
+static void __perf_swcounter_event(enum perf_type_id type, u32 event,
+				   u64 nr, int nmi, struct pt_regs *regs,
+				   u64 addr)
+{
+	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+	int *recursion = perf_swcounter_recursion_context(cpuctx);
+	struct perf_counter_context *ctx;
+
+	if (*recursion)
+		goto out;
+
+	(*recursion)++;
+	barrier();
+
+	perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
+				 nr, nmi, regs, addr);
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_counter_ctxp);
+	if (ctx)
+		perf_swcounter_ctx_event(ctx, type, event, nr, nmi, regs, addr);
+	rcu_read_unlock();
+
+	barrier();
+	(*recursion)--;
+
+out:
+	put_cpu_var(perf_cpu_context);
+}
+
+void
+perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
+{
+	__perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr);
+}
+
+static void perf_swcounter_read(struct perf_counter *counter)
+{
+	perf_swcounter_update(counter);
+}
+
+static int perf_swcounter_enable(struct perf_counter *counter)
+{
+	perf_swcounter_set_period(counter);
+	return 0;
+}
+
+static void perf_swcounter_disable(struct perf_counter *counter)
+{
+	perf_swcounter_update(counter);
+}
+
+static const struct pmu perf_ops_generic = {
+	.enable		= perf_swcounter_enable,
+	.disable	= perf_swcounter_disable,
+	.read		= perf_swcounter_read,
+};
+
+/*
+ * Software counter: cpu wall time clock
+ */
+
+static void cpu_clock_perf_counter_update(struct perf_counter *counter)
+{
+	int cpu = raw_smp_processor_id();
+	s64 prev;
+	u64 now;
+
+	now = cpu_clock(cpu);
+	prev = atomic64_read(&counter->hw.prev_count);
+	atomic64_set(&counter->hw.prev_count, now);
+	atomic64_add(now - prev, &counter->count);
+}
+
+static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	int cpu = raw_smp_processor_id();
+
+	atomic64_set(&hwc->prev_count, cpu_clock(cpu));
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hwc->hrtimer.function = perf_swcounter_hrtimer;
+	if (hwc->sample_period) {
+		u64 period = max_t(u64, 10000, hwc->sample_period);
+		__hrtimer_start_range_ns(&hwc->hrtimer,
+				ns_to_ktime(period), 0,
+				HRTIMER_MODE_REL, 0);
+	}
+
+	return 0;
+}
+
+static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
+{
+	if (counter->hw.sample_period)
+		hrtimer_cancel(&counter->hw.hrtimer);
+	cpu_clock_perf_counter_update(counter);
+}
+
+static void cpu_clock_perf_counter_read(struct perf_counter *counter)
+{
+	cpu_clock_perf_counter_update(counter);
+}
+
+static const struct pmu perf_ops_cpu_clock = {
+	.enable		= cpu_clock_perf_counter_enable,
+	.disable	= cpu_clock_perf_counter_disable,
+	.read		= cpu_clock_perf_counter_read,
+};
+
+/*
+ * Software counter: task time clock
+ */
+
+static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
+{
+	u64 prev;
+	s64 delta;
+
+	prev = atomic64_xchg(&counter->hw.prev_count, now);
+	delta = now - prev;
+	atomic64_add(delta, &counter->count);
+}
+
+static int task_clock_perf_counter_enable(struct perf_counter *counter)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	u64 now;
+
+	now = counter->ctx->time;
+
+	atomic64_set(&hwc->prev_count, now);
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hwc->hrtimer.function = perf_swcounter_hrtimer;
+	if (hwc->sample_period) {
+		u64 period = max_t(u64, 10000, hwc->sample_period);
+		__hrtimer_start_range_ns(&hwc->hrtimer,
+				ns_to_ktime(period), 0,
+				HRTIMER_MODE_REL, 0);
+	}
+
+	return 0;
+}
+
+static void task_clock_perf_counter_disable(struct perf_counter *counter)
+{
+	if (counter->hw.sample_period)
+		hrtimer_cancel(&counter->hw.hrtimer);
+	task_clock_perf_counter_update(counter, counter->ctx->time);
+
+}
+
+static void task_clock_perf_counter_read(struct perf_counter *counter)
+{
+	u64 time;
+
+	if (!in_nmi()) {
+		update_context_time(counter->ctx);
+		time = counter->ctx->time;
+	} else {
+		u64 now = perf_clock();
+		u64 delta = now - counter->ctx->timestamp;
+		time = counter->ctx->time + delta;
+	}
+
+	task_clock_perf_counter_update(counter, time);
+}
+
+static const struct pmu perf_ops_task_clock = {
+	.enable		= task_clock_perf_counter_enable,
+	.disable	= task_clock_perf_counter_disable,
+	.read		= task_clock_perf_counter_read,
+};
+
+/*
+ * Software counter: cpu migrations
+ */
+void perf_counter_task_migration(struct task_struct *task, int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_counter_context *ctx;
+
+	perf_swcounter_ctx_event(&cpuctx->ctx, PERF_TYPE_SOFTWARE,
+				 PERF_COUNT_SW_CPU_MIGRATIONS,
+				 1, 1, NULL, 0);
+
+	ctx = perf_pin_task_context(task);
+	if (ctx) {
+		perf_swcounter_ctx_event(ctx, PERF_TYPE_SOFTWARE,
+					 PERF_COUNT_SW_CPU_MIGRATIONS,
+					 1, 1, NULL, 0);
+		perf_unpin_context(ctx);
+	}
+}
+
+#ifdef CONFIG_EVENT_PROFILE
+void perf_tpcounter_event(int event_id)
+{
+	struct pt_regs *regs = get_irq_regs();
+
+	if (!regs)
+		regs = task_pt_regs(current);
+
+	__perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0);
+}
+EXPORT_SYMBOL_GPL(perf_tpcounter_event);
+
+extern int ftrace_profile_enable(int);
+extern void ftrace_profile_disable(int);
+
+static void tp_perf_counter_destroy(struct perf_counter *counter)
+{
+	ftrace_profile_disable(perf_event_id(&counter->attr));
+}
+
+static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
+{
+	int event_id = perf_event_id(&counter->attr);
+	int ret;
+
+	ret = ftrace_profile_enable(event_id);
+	if (ret)
+		return NULL;
+
+	counter->destroy = tp_perf_counter_destroy;
+
+	return &perf_ops_generic;
+}
+#else
+static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
+{
+	return NULL;
+}
+#endif
+
+static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
+{
+	const struct pmu *pmu = NULL;
+
+	/*
+	 * Software counters (currently) can't in general distinguish
+	 * between user, kernel and hypervisor events.
+	 * However, context switches and cpu migrations are considered
+	 * to be kernel events, and page faults are never hypervisor
+	 * events.
+	 */
+	switch (counter->attr.config) {
+	case PERF_COUNT_SW_CPU_CLOCK:
+		pmu = &perf_ops_cpu_clock;
+
+		break;
+	case PERF_COUNT_SW_TASK_CLOCK:
+		/*
+		 * If the user instantiates this as a per-cpu counter,
+		 * use the cpu_clock counter instead.
+		 */
+		if (counter->ctx->task)
+			pmu = &perf_ops_task_clock;
+		else
+			pmu = &perf_ops_cpu_clock;
+
+		break;
+	case PERF_COUNT_SW_PAGE_FAULTS:
+	case PERF_COUNT_SW_PAGE_FAULTS_MIN:
+	case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
+	case PERF_COUNT_SW_CONTEXT_SWITCHES:
+	case PERF_COUNT_SW_CPU_MIGRATIONS:
+		pmu = &perf_ops_generic;
+		break;
+	}
+
+	return pmu;
+}
+
+/*
+ * Allocate and initialize a counter structure
+ */
+static struct perf_counter *
+perf_counter_alloc(struct perf_counter_attr *attr,
+		   int cpu,
+		   struct perf_counter_context *ctx,
+		   struct perf_counter *group_leader,
+		   gfp_t gfpflags)
+{
+	const struct pmu *pmu;
+	struct perf_counter *counter;
+	struct hw_perf_counter *hwc;
+	long err;
+
+	counter = kzalloc(sizeof(*counter), gfpflags);
+	if (!counter)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * Single counters are their own group leaders, with an
+	 * empty sibling list:
+	 */
+	if (!group_leader)
+		group_leader = counter;
+
+	mutex_init(&counter->child_mutex);
+	INIT_LIST_HEAD(&counter->child_list);
+
+	INIT_LIST_HEAD(&counter->list_entry);
+	INIT_LIST_HEAD(&counter->event_entry);
+	INIT_LIST_HEAD(&counter->sibling_list);
+	init_waitqueue_head(&counter->waitq);
+
+	mutex_init(&counter->mmap_mutex);
+
+	counter->cpu		= cpu;
+	counter->attr		= *attr;
+	counter->group_leader	= group_leader;
+	counter->pmu		= NULL;
+	counter->ctx		= ctx;
+	counter->oncpu		= -1;
+
+	counter->ns		= get_pid_ns(current->nsproxy->pid_ns);
+	counter->id		= atomic64_inc_return(&perf_counter_id);
+
+	counter->state		= PERF_COUNTER_STATE_INACTIVE;
+
+	if (attr->disabled)
+		counter->state = PERF_COUNTER_STATE_OFF;
+
+	pmu = NULL;
+
+	hwc = &counter->hw;
+	hwc->sample_period = attr->sample_period;
+	if (attr->freq && attr->sample_freq)
+		hwc->sample_period = 1;
+
+	atomic64_set(&hwc->period_left, hwc->sample_period);
+
+	/*
+	 * we currently do not support PERF_SAMPLE_GROUP on inherited counters
+	 */
+	if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
+		goto done;
+
+	switch (attr->type) {
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+		pmu = hw_perf_counter_init(counter);
+		break;
+
+	case PERF_TYPE_SOFTWARE:
+		pmu = sw_perf_counter_init(counter);
+		break;
+
+	case PERF_TYPE_TRACEPOINT:
+		pmu = tp_perf_counter_init(counter);
+		break;
+
+	default:
+		break;
+	}
+done:
+	err = 0;
+	if (!pmu)
+		err = -EINVAL;
+	else if (IS_ERR(pmu))
+		err = PTR_ERR(pmu);
+
+	if (err) {
+		if (counter->ns)
+			put_pid_ns(counter->ns);
+		kfree(counter);
+		return ERR_PTR(err);
+	}
+
+	counter->pmu = pmu;
+
+	atomic_inc(&nr_counters);
+	if (counter->attr.mmap)
+		atomic_inc(&nr_mmap_counters);
+	if (counter->attr.comm)
+		atomic_inc(&nr_comm_counters);
+
+	return counter;
+}
+
+static int perf_copy_attr(struct perf_counter_attr __user *uattr,
+			  struct perf_counter_attr *attr)
+{
+	int ret;
+	u32 size;
+
+	if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
+		return -EFAULT;
+
+	/*
+	 * zero the full structure, so that a short copy will be nice.
+	 */
+	memset(attr, 0, sizeof(*attr));
+
+	ret = get_user(size, &uattr->size);
+	if (ret)
+		return ret;
+
+	if (size > PAGE_SIZE)	/* silly large */
+		goto err_size;
+
+	if (!size)		/* abi compat */
+		size = PERF_ATTR_SIZE_VER0;
+
+	if (size < PERF_ATTR_SIZE_VER0)
+		goto err_size;
+
+	/*
+	 * If we're handed a bigger struct than we know of,
+	 * ensure all the unknown bits are 0.
+	 */
+	if (size > sizeof(*attr)) {
+		unsigned long val;
+		unsigned long __user *addr;
+		unsigned long __user *end;
+
+		addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr),
+				sizeof(unsigned long));
+		end  = PTR_ALIGN((void __user *)uattr + size,
+				sizeof(unsigned long));
+
+		for (; addr < end; addr += sizeof(unsigned long)) {
+			ret = get_user(val, addr);
+			if (ret)
+				return ret;
+			if (val)
+				goto err_size;
+		}
+	}
+
+	ret = copy_from_user(attr, uattr, size);
+	if (ret)
+		return -EFAULT;
+
+	/*
+	 * If the type exists, the corresponding creation will verify
+	 * the attr->config.
+	 */
+	if (attr->type >= PERF_TYPE_MAX)
+		return -EINVAL;
+
+	if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
+		return -EINVAL;
+
+	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
+		return -EINVAL;
+
+	if (attr->read_format & ~(PERF_FORMAT_MAX-1))
+		return -EINVAL;
+
+out:
+	return ret;
+
+err_size:
+	put_user(sizeof(*attr), &uattr->size);
+	ret = -E2BIG;
+	goto out;
+}
+
+/**
+ * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
+ *
+ * @attr_uptr:	event type attributes for monitoring/sampling
+ * @pid:		target pid
+ * @cpu:		target cpu
+ * @group_fd:		group leader counter fd
+ */
+SYSCALL_DEFINE5(perf_counter_open,
+		struct perf_counter_attr __user *, attr_uptr,
+		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
+{
+	struct perf_counter *counter, *group_leader;
+	struct perf_counter_attr attr;
+	struct perf_counter_context *ctx;
+	struct file *counter_file = NULL;
+	struct file *group_file = NULL;
+	int fput_needed = 0;
+	int fput_needed2 = 0;
+	int ret;
+
+	/* for future expandability... */
+	if (flags)
+		return -EINVAL;
+
+	ret = perf_copy_attr(attr_uptr, &attr);
+	if (ret)
+		return ret;
+
+	if (!attr.exclude_kernel) {
+		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+			return -EACCES;
+	}
+
+	if (attr.freq) {
+		if (attr.sample_freq > sysctl_perf_counter_sample_rate)
+			return -EINVAL;
+	}
+
+	/*
+	 * Get the target context (task or percpu):
+	 */
+	ctx = find_get_context(pid, cpu);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	/*
+	 * Look up the group leader (we will attach this counter to it):
+	 */
+	group_leader = NULL;
+	if (group_fd != -1) {
+		ret = -EINVAL;
+		group_file = fget_light(group_fd, &fput_needed);
+		if (!group_file)
+			goto err_put_context;
+		if (group_file->f_op != &perf_fops)
+			goto err_put_context;
+
+		group_leader = group_file->private_data;
+		/*
+		 * Do not allow a recursive hierarchy (this new sibling
+		 * becoming part of another group-sibling):
+		 */
+		if (group_leader->group_leader != group_leader)
+			goto err_put_context;
+		/*
+		 * Do not allow to attach to a group in a different
+		 * task or CPU context:
+		 */
+		if (group_leader->ctx != ctx)
+			goto err_put_context;
+		/*
+		 * Only a group leader can be exclusive or pinned
+		 */
+		if (attr.exclusive || attr.pinned)
+			goto err_put_context;
+	}
+
+	counter = perf_counter_alloc(&attr, cpu, ctx, group_leader,
+				     GFP_KERNEL);
+	ret = PTR_ERR(counter);
+	if (IS_ERR(counter))
+		goto err_put_context;
+
+	ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
+	if (ret < 0)
+		goto err_free_put_context;
+
+	counter_file = fget_light(ret, &fput_needed2);
+	if (!counter_file)
+		goto err_free_put_context;
+
+	counter->filp = counter_file;
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	perf_install_in_context(ctx, counter, cpu);
+	++ctx->generation;
+	mutex_unlock(&ctx->mutex);
+
+	counter->owner = current;
+	get_task_struct(current);
+	mutex_lock(&current->perf_counter_mutex);
+	list_add_tail(&counter->owner_entry, &current->perf_counter_list);
+	mutex_unlock(&current->perf_counter_mutex);
+
+	fput_light(counter_file, fput_needed2);
+
+out_fput:
+	fput_light(group_file, fput_needed);
+
+	return ret;
+
+err_free_put_context:
+	kfree(counter);
+
+err_put_context:
+	put_ctx(ctx);
+
+	goto out_fput;
+}
+
+/*
+ * inherit a counter from parent task to child task:
+ */
+static struct perf_counter *
+inherit_counter(struct perf_counter *parent_counter,
+	      struct task_struct *parent,
+	      struct perf_counter_context *parent_ctx,
+	      struct task_struct *child,
+	      struct perf_counter *group_leader,
+	      struct perf_counter_context *child_ctx)
+{
+	struct perf_counter *child_counter;
+
+	/*
+	 * Instead of creating recursive hierarchies of counters,
+	 * we link inherited counters back to the original parent,
+	 * which has a filp for sure, which we use as the reference
+	 * count:
+	 */
+	if (parent_counter->parent)
+		parent_counter = parent_counter->parent;
+
+	child_counter = perf_counter_alloc(&parent_counter->attr,
+					   parent_counter->cpu, child_ctx,
+					   group_leader, GFP_KERNEL);
+	if (IS_ERR(child_counter))
+		return child_counter;
+	get_ctx(child_ctx);
+
+	/*
+	 * Make the child state follow the state of the parent counter,
+	 * not its attr.disabled bit.  We hold the parent's mutex,
+	 * so we won't race with perf_counter_{en, dis}able_family.
+	 */
+	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
+		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
+	else
+		child_counter->state = PERF_COUNTER_STATE_OFF;
+
+	if (parent_counter->attr.freq)
+		child_counter->hw.sample_period = parent_counter->hw.sample_period;
+
+	/*
+	 * Link it up in the child's context:
+	 */
+	add_counter_to_ctx(child_counter, child_ctx);
+
+	child_counter->parent = parent_counter;
+	/*
+	 * inherit into child's child as well:
+	 */
+	child_counter->attr.inherit = 1;
+
+	/*
+	 * Get a reference to the parent filp - we will fput it
+	 * when the child counter exits. This is safe to do because
+	 * we are in the parent and we know that the filp still
+	 * exists and has a nonzero count:
+	 */
+	atomic_long_inc(&parent_counter->filp->f_count);
+
+	/*
+	 * Link this into the parent counter's child list
+	 */
+	WARN_ON_ONCE(parent_counter->ctx->parent_ctx);
+	mutex_lock(&parent_counter->child_mutex);
+	list_add_tail(&child_counter->child_list, &parent_counter->child_list);
+	mutex_unlock(&parent_counter->child_mutex);
+
+	return child_counter;
+}
+
+static int inherit_group(struct perf_counter *parent_counter,
+	      struct task_struct *parent,
+	      struct perf_counter_context *parent_ctx,
+	      struct task_struct *child,
+	      struct perf_counter_context *child_ctx)
+{
+	struct perf_counter *leader;
+	struct perf_counter *sub;
+	struct perf_counter *child_ctr;
+
+	leader = inherit_counter(parent_counter, parent, parent_ctx,
+				 child, NULL, child_ctx);
+	if (IS_ERR(leader))
+		return PTR_ERR(leader);
+	list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) {
+		child_ctr = inherit_counter(sub, parent, parent_ctx,
+					    child, leader, child_ctx);
+		if (IS_ERR(child_ctr))
+			return PTR_ERR(child_ctr);
+	}
+	return 0;
+}
+
+static void sync_child_counter(struct perf_counter *child_counter,
+			       struct perf_counter *parent_counter)
+{
+	u64 child_val;
+
+	child_val = atomic64_read(&child_counter->count);
+
+	/*
+	 * Add back the child's count to the parent's count:
+	 */
+	atomic64_add(child_val, &parent_counter->count);
+	atomic64_add(child_counter->total_time_enabled,
+		     &parent_counter->child_total_time_enabled);
+	atomic64_add(child_counter->total_time_running,
+		     &parent_counter->child_total_time_running);
+
+	/*
+	 * Remove this counter from the parent's list
+	 */
+	WARN_ON_ONCE(parent_counter->ctx->parent_ctx);
+	mutex_lock(&parent_counter->child_mutex);
+	list_del_init(&child_counter->child_list);
+	mutex_unlock(&parent_counter->child_mutex);
+
+	/*
+	 * Release the parent counter, if this was the last
+	 * reference to it.
+	 */
+	fput(parent_counter->filp);
+}
+
+static void
+__perf_counter_exit_task(struct perf_counter *child_counter,
+			 struct perf_counter_context *child_ctx)
+{
+	struct perf_counter *parent_counter;
+
+	update_counter_times(child_counter);
+	perf_counter_remove_from_context(child_counter);
+
+	parent_counter = child_counter->parent;
+	/*
+	 * It can happen that parent exits first, and has counters
+	 * that are still around due to the child reference. These
+	 * counters need to be zapped - but otherwise linger.
+	 */
+	if (parent_counter) {
+		sync_child_counter(child_counter, parent_counter);
+		free_counter(child_counter);
+	}
+}
+
+/*
+ * When a child task exits, feed back counter values to parent counters.
+ */
+void perf_counter_exit_task(struct task_struct *child)
+{
+	struct perf_counter *child_counter, *tmp;
+	struct perf_counter_context *child_ctx;
+	unsigned long flags;
+
+	if (likely(!child->perf_counter_ctxp))
+		return;
+
+	local_irq_save(flags);
+	/*
+	 * We can't reschedule here because interrupts are disabled,
+	 * and either child is current or it is a task that can't be
+	 * scheduled, so we are now safe from rescheduling changing
+	 * our context.
+	 */
+	child_ctx = child->perf_counter_ctxp;
+	__perf_counter_task_sched_out(child_ctx);
+
+	/*
+	 * Take the context lock here so that if find_get_context is
+	 * reading child->perf_counter_ctxp, we wait until it has
+	 * incremented the context's refcount before we do put_ctx below.
+	 */
+	spin_lock(&child_ctx->lock);
+	child->perf_counter_ctxp = NULL;
+	if (child_ctx->parent_ctx) {
+		/*
+		 * This context is a clone; unclone it so it can't get
+		 * swapped to another process while we're removing all
+		 * the counters from it.
+		 */
+		put_ctx(child_ctx->parent_ctx);
+		child_ctx->parent_ctx = NULL;
+	}
+	spin_unlock(&child_ctx->lock);
+	local_irq_restore(flags);
+
+	/*
+	 * We can recurse on the same lock type through:
+	 *
+	 *   __perf_counter_exit_task()
+	 *     sync_child_counter()
+	 *       fput(parent_counter->filp)
+	 *         perf_release()
+	 *           mutex_lock(&ctx->mutex)
+	 *
+	 * But since its the parent context it won't be the same instance.
+	 */
+	mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
+
+again:
+	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
+				 list_entry)
+		__perf_counter_exit_task(child_counter, child_ctx);
+
+	/*
+	 * If the last counter was a group counter, it will have appended all
+	 * its siblings to the list, but we obtained 'tmp' before that which
+	 * will still point to the list head terminating the iteration.
+	 */
+	if (!list_empty(&child_ctx->counter_list))
+		goto again;
+
+	mutex_unlock(&child_ctx->mutex);
+
+	put_ctx(child_ctx);
+}
+
+/*
+ * free an unexposed, unused context as created by inheritance by
+ * init_task below, used by fork() in case of fail.
+ */
+void perf_counter_free_task(struct task_struct *task)
+{
+	struct perf_counter_context *ctx = task->perf_counter_ctxp;
+	struct perf_counter *counter, *tmp;
+
+	if (!ctx)
+		return;
+
+	mutex_lock(&ctx->mutex);
+again:
+	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) {
+		struct perf_counter *parent = counter->parent;
+
+		if (WARN_ON_ONCE(!parent))
+			continue;
+
+		mutex_lock(&parent->child_mutex);
+		list_del_init(&counter->child_list);
+		mutex_unlock(&parent->child_mutex);
+
+		fput(parent->filp);
+
+		list_del_counter(counter, ctx);
+		free_counter(counter);
+	}
+
+	if (!list_empty(&ctx->counter_list))
+		goto again;
+
+	mutex_unlock(&ctx->mutex);
+
+	put_ctx(ctx);
+}
+
+/*
+ * Initialize the perf_counter context in task_struct
+ */
+int perf_counter_init_task(struct task_struct *child)
+{
+	struct perf_counter_context *child_ctx, *parent_ctx;
+	struct perf_counter_context *cloned_ctx;
+	struct perf_counter *counter;
+	struct task_struct *parent = current;
+	int inherited_all = 1;
+	int ret = 0;
+
+	child->perf_counter_ctxp = NULL;
+
+	mutex_init(&child->perf_counter_mutex);
+	INIT_LIST_HEAD(&child->perf_counter_list);
+
+	if (likely(!parent->perf_counter_ctxp))
+		return 0;
+
+	/*
+	 * This is executed from the parent task context, so inherit
+	 * counters that have been marked for cloning.
+	 * First allocate and initialize a context for the child.
+	 */
+
+	child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
+	if (!child_ctx)
+		return -ENOMEM;
+
+	__perf_counter_init_context(child_ctx, child);
+	child->perf_counter_ctxp = child_ctx;
+	get_task_struct(child);
+
+	/*
+	 * If the parent's context is a clone, pin it so it won't get
+	 * swapped under us.
+	 */
+	parent_ctx = perf_pin_task_context(parent);
+
+	/*
+	 * No need to check if parent_ctx != NULL here; since we saw
+	 * it non-NULL earlier, the only reason for it to become NULL
+	 * is if we exit, and since we're currently in the middle of
+	 * a fork we can't be exiting at the same time.
+	 */
+
+	/*
+	 * Lock the parent list. No need to lock the child - not PID
+	 * hashed yet and not running, so nobody can access it.
+	 */
+	mutex_lock(&parent_ctx->mutex);
+
+	/*
+	 * We dont have to disable NMIs - we are only looking at
+	 * the list, not manipulating it:
+	 */
+	list_for_each_entry_rcu(counter, &parent_ctx->event_list, event_entry) {
+		if (counter != counter->group_leader)
+			continue;
+
+		if (!counter->attr.inherit) {
+			inherited_all = 0;
+			continue;
+		}
+
+		ret = inherit_group(counter, parent, parent_ctx,
+					     child, child_ctx);
+		if (ret) {
+			inherited_all = 0;
+			break;
+		}
+	}
+
+	if (inherited_all) {
+		/*
+		 * Mark the child context as a clone of the parent
+		 * context, or of whatever the parent is a clone of.
+		 * Note that if the parent is a clone, it could get
+		 * uncloned at any point, but that doesn't matter
+		 * because the list of counters and the generation
+		 * count can't have changed since we took the mutex.
+		 */
+		cloned_ctx = rcu_dereference(parent_ctx->parent_ctx);
+		if (cloned_ctx) {
+			child_ctx->parent_ctx = cloned_ctx;
+			child_ctx->parent_gen = parent_ctx->parent_gen;
+		} else {
+			child_ctx->parent_ctx = parent_ctx;
+			child_ctx->parent_gen = parent_ctx->generation;
+		}
+		get_ctx(child_ctx->parent_ctx);
+	}
+
+	mutex_unlock(&parent_ctx->mutex);
+
+	perf_unpin_context(parent_ctx);
+
+	return ret;
+}
+
+static void __cpuinit perf_counter_init_cpu(int cpu)
+{
+	struct perf_cpu_context *cpuctx;
+
+	cpuctx = &per_cpu(perf_cpu_context, cpu);
+	__perf_counter_init_context(&cpuctx->ctx, NULL);
+
+	spin_lock(&perf_resource_lock);
+	cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu;
+	spin_unlock(&perf_resource_lock);
+
+	hw_perf_counter_setup(cpu);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void __perf_counter_exit_cpu(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_counter_context *ctx = &cpuctx->ctx;
+	struct perf_counter *counter, *tmp;
+
+	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry)
+		__perf_counter_remove_from_context(counter);
+}
+static void perf_counter_exit_cpu(int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_counter_context *ctx = &cpuctx->ctx;
+
+	mutex_lock(&ctx->mutex);
+	smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1);
+	mutex_unlock(&ctx->mutex);
+}
+#else
+static inline void perf_counter_exit_cpu(int cpu) { }
+#endif
+
+static int __cpuinit
+perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	switch (action) {
+
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		perf_counter_init_cpu(cpu);
+		break;
+
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		perf_counter_exit_cpu(cpu);
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+/*
+ * This has to have a higher priority than migration_notifier in sched.c.
+ */
+static struct notifier_block __cpuinitdata perf_cpu_nb = {
+	.notifier_call		= perf_cpu_notify,
+	.priority		= 20,
+};
+
+void __init perf_counter_init(void)
+{
+	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
+			(void *)(long)smp_processor_id());
+	register_cpu_notifier(&perf_cpu_nb);
+}
+
+static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf)
+{
+	return sprintf(buf, "%d\n", perf_reserved_percpu);
+}
+
+static ssize_t
+perf_set_reserve_percpu(struct sysdev_class *class,
+			const char *buf,
+			size_t count)
+{
+	struct perf_cpu_context *cpuctx;
+	unsigned long val;
+	int err, cpu, mpt;
+
+	err = strict_strtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val > perf_max_counters)
+		return -EINVAL;
+
+	spin_lock(&perf_resource_lock);
+	perf_reserved_percpu = val;
+	for_each_online_cpu(cpu) {
+		cpuctx = &per_cpu(perf_cpu_context, cpu);
+		spin_lock_irq(&cpuctx->ctx.lock);
+		mpt = min(perf_max_counters - cpuctx->ctx.nr_counters,
+			  perf_max_counters - perf_reserved_percpu);
+		cpuctx->max_pertask = mpt;
+		spin_unlock_irq(&cpuctx->ctx.lock);
+	}
+	spin_unlock(&perf_resource_lock);
+
+	return count;
+}
+
+static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf)
+{
+	return sprintf(buf, "%d\n", perf_overcommit);
+}
+
+static ssize_t
+perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count)
+{
+	unsigned long val;
+	int err;
+
+	err = strict_strtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val > 1)
+		return -EINVAL;
+
+	spin_lock(&perf_resource_lock);
+	perf_overcommit = val;
+	spin_unlock(&perf_resource_lock);
+
+	return count;
+}
+
+static SYSDEV_CLASS_ATTR(
+				reserve_percpu,
+				0644,
+				perf_show_reserve_percpu,
+				perf_set_reserve_percpu
+			);
+
+static SYSDEV_CLASS_ATTR(
+				overcommit,
+				0644,
+				perf_show_overcommit,
+				perf_set_overcommit
+			);
+
+static struct attribute *perfclass_attrs[] = {
+	&attr_reserve_percpu.attr,
+	&attr_overcommit.attr,
+	NULL
+};
+
+static struct attribute_group perfclass_attr_group = {
+	.attrs			= perfclass_attrs,
+	.name			= "perf_counters",
+};
+
+static int __init perf_counter_sysfs_init(void)
+{
+	return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
+				  &perfclass_attr_group);
+}
+device_initcall(perf_counter_sysfs_init);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 23bd4da..72067cb 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -116,9 +116,13 @@
 
 	  Turning OFF this setting is NOT recommended! If in doubt, say Y.
 
+config HIBERNATION_NVS
+	bool
+
 config HIBERNATION
 	bool "Hibernation (aka 'suspend to disk')"
 	depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
+	select HIBERNATION_NVS if HAS_IOMEM
 	---help---
 	  Enable the suspend to disk (STD) functionality, which is usually
 	  called "hibernation" in user interfaces.  STD checkpoints the
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 720ea4f..c3b81c3 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -6,6 +6,9 @@
 obj-$(CONFIG_PM)		+= main.o
 obj-$(CONFIG_PM_SLEEP)		+= console.o
 obj-$(CONFIG_FREEZER)		+= process.o
-obj-$(CONFIG_HIBERNATION)	+= swsusp.o disk.o snapshot.o swap.o user.o
+obj-$(CONFIG_SUSPEND)		+= suspend.o
+obj-$(CONFIG_PM_TEST_SUSPEND)	+= suspend_test.o
+obj-$(CONFIG_HIBERNATION)	+= swsusp.o hibernate.o snapshot.o swap.o user.o
+obj-$(CONFIG_HIBERNATION_NVS)	+= hibernate_nvs.o
 
 obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o
diff --git a/kernel/power/disk.c b/kernel/power/hibernate.c
similarity index 96%
rename from kernel/power/disk.c
rename to kernel/power/hibernate.c
index 5cb080e..81d2e74 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/hibernate.c
@@ -1,12 +1,12 @@
 /*
- * kernel/power/disk.c - Suspend-to-disk support.
+ * kernel/power/hibernate.c - Hibernation (a.k.a suspend-to-disk) support.
  *
  * Copyright (c) 2003 Patrick Mochel
  * Copyright (c) 2003 Open Source Development Lab
  * Copyright (c) 2004 Pavel Machek <pavel@suse.cz>
+ * Copyright (c) 2009 Rafael J. Wysocki, Novell Inc.
  *
  * This file is released under the GPLv2.
- *
  */
 
 #include <linux/suspend.h>
@@ -215,13 +215,13 @@
 	if (error)
 		return error;
 
-	/* At this point, device_suspend() has been called, but *not*
-	 * device_power_down(). We *must* call device_power_down() now.
+	/* At this point, dpm_suspend_start() has been called, but *not*
+	 * dpm_suspend_noirq(). We *must* call dpm_suspend_noirq() now.
 	 * Otherwise, drivers for some devices (e.g. interrupt controllers)
 	 * become desynchronized with the actual state of the hardware
 	 * at resume time, and evil weirdness ensues.
 	 */
-	error = device_power_down(PMSG_FREEZE);
+	error = dpm_suspend_noirq(PMSG_FREEZE);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down, "
 			"aborting hibernation\n");
@@ -262,7 +262,7 @@
 
  Power_up:
 	sysdev_resume();
-	/* NOTE:  device_power_up() is just a resume() for devices
+	/* NOTE:  dpm_resume_noirq() is just a resume() for devices
 	 * that suspended with irqs off ... no overall powerup.
 	 */
 
@@ -275,7 +275,7 @@
  Platform_finish:
 	platform_finish(platform_mode);
 
-	device_power_up(in_suspend ?
+	dpm_resume_noirq(in_suspend ?
 		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
 
 	return error;
@@ -304,7 +304,7 @@
 		goto Close;
 
 	suspend_console();
-	error = device_suspend(PMSG_FREEZE);
+	error = dpm_suspend_start(PMSG_FREEZE);
 	if (error)
 		goto Recover_platform;
 
@@ -315,7 +315,7 @@
 	/* Control returns here after successful restore */
 
  Resume_devices:
-	device_resume(in_suspend ?
+	dpm_resume_end(in_suspend ?
 		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
 	resume_console();
  Close:
@@ -339,7 +339,7 @@
 {
 	int error;
 
-	error = device_power_down(PMSG_QUIESCE);
+	error = dpm_suspend_noirq(PMSG_QUIESCE);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down, "
 			"aborting resume\n");
@@ -394,7 +394,7 @@
  Cleanup:
 	platform_restore_cleanup(platform_mode);
 
-	device_power_up(PMSG_RECOVER);
+	dpm_resume_noirq(PMSG_RECOVER);
 
 	return error;
 }
@@ -414,10 +414,10 @@
 
 	pm_prepare_console();
 	suspend_console();
-	error = device_suspend(PMSG_QUIESCE);
+	error = dpm_suspend_start(PMSG_QUIESCE);
 	if (!error) {
 		error = resume_target_kernel(platform_mode);
-		device_resume(PMSG_RECOVER);
+		dpm_resume_end(PMSG_RECOVER);
 	}
 	resume_console();
 	pm_restore_console();
@@ -447,14 +447,14 @@
 
 	entering_platform_hibernation = true;
 	suspend_console();
-	error = device_suspend(PMSG_HIBERNATE);
+	error = dpm_suspend_start(PMSG_HIBERNATE);
 	if (error) {
 		if (hibernation_ops->recover)
 			hibernation_ops->recover();
 		goto Resume_devices;
 	}
 
-	error = device_power_down(PMSG_HIBERNATE);
+	error = dpm_suspend_noirq(PMSG_HIBERNATE);
 	if (error)
 		goto Resume_devices;
 
@@ -479,11 +479,11 @@
  Platofrm_finish:
 	hibernation_ops->finish();
 
-	device_power_up(PMSG_RESTORE);
+	dpm_suspend_noirq(PMSG_RESTORE);
 
  Resume_devices:
 	entering_platform_hibernation = false;
-	device_resume(PMSG_RESTORE);
+	dpm_resume_end(PMSG_RESTORE);
 	resume_console();
 
  Close:
diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/hibernate_nvs.c
new file mode 100644
index 0000000..39ac698
--- /dev/null
+++ b/kernel/power/hibernate_nvs.c
@@ -0,0 +1,135 @@
+/*
+ * linux/kernel/power/hibernate_nvs.c - Routines for handling NVS memory
+ *
+ * Copyright (C) 2008,2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/suspend.h>
+
+/*
+ * Platforms, like ACPI, may want us to save some memory used by them during
+ * hibernation and to restore the contents of this memory during the subsequent
+ * resume.  The code below implements a mechanism allowing us to do that.
+ */
+
+struct nvs_page {
+	unsigned long phys_start;
+	unsigned int size;
+	void *kaddr;
+	void *data;
+	struct list_head node;
+};
+
+static LIST_HEAD(nvs_list);
+
+/**
+ *	hibernate_nvs_register - register platform NVS memory region to save
+ *	@start - physical address of the region
+ *	@size - size of the region
+ *
+ *	The NVS region need not be page-aligned (both ends) and we arrange
+ *	things so that the data from page-aligned addresses in this region will
+ *	be copied into separate RAM pages.
+ */
+int hibernate_nvs_register(unsigned long start, unsigned long size)
+{
+	struct nvs_page *entry, *next;
+
+	while (size > 0) {
+		unsigned int nr_bytes;
+
+		entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
+		if (!entry)
+			goto Error;
+
+		list_add_tail(&entry->node, &nvs_list);
+		entry->phys_start = start;
+		nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
+		entry->size = (size < nr_bytes) ? size : nr_bytes;
+
+		start += entry->size;
+		size -= entry->size;
+	}
+	return 0;
+
+ Error:
+	list_for_each_entry_safe(entry, next, &nvs_list, node) {
+		list_del(&entry->node);
+		kfree(entry);
+	}
+	return -ENOMEM;
+}
+
+/**
+ *	hibernate_nvs_free - free data pages allocated for saving NVS regions
+ */
+void hibernate_nvs_free(void)
+{
+	struct nvs_page *entry;
+
+	list_for_each_entry(entry, &nvs_list, node)
+		if (entry->data) {
+			free_page((unsigned long)entry->data);
+			entry->data = NULL;
+			if (entry->kaddr) {
+				iounmap(entry->kaddr);
+				entry->kaddr = NULL;
+			}
+		}
+}
+
+/**
+ *	hibernate_nvs_alloc - allocate memory necessary for saving NVS regions
+ */
+int hibernate_nvs_alloc(void)
+{
+	struct nvs_page *entry;
+
+	list_for_each_entry(entry, &nvs_list, node) {
+		entry->data = (void *)__get_free_page(GFP_KERNEL);
+		if (!entry->data) {
+			hibernate_nvs_free();
+			return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+/**
+ *	hibernate_nvs_save - save NVS memory regions
+ */
+void hibernate_nvs_save(void)
+{
+	struct nvs_page *entry;
+
+	printk(KERN_INFO "PM: Saving platform NVS memory\n");
+
+	list_for_each_entry(entry, &nvs_list, node)
+		if (entry->data) {
+			entry->kaddr = ioremap(entry->phys_start, entry->size);
+			memcpy(entry->data, entry->kaddr, entry->size);
+		}
+}
+
+/**
+ *	hibernate_nvs_restore - restore NVS memory regions
+ *
+ *	This function is going to be called with interrupts disabled, so it
+ *	cannot iounmap the virtual addresses used to access the NVS region.
+ */
+void hibernate_nvs_restore(void)
+{
+	struct nvs_page *entry;
+
+	printk(KERN_INFO "PM: Restoring platform NVS memory\n");
+
+	list_for_each_entry(entry, &nvs_list, node)
+		if (entry->data)
+			memcpy(entry->kaddr, entry->data, entry->size);
+}
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 8680282..f710e36 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -8,20 +8,9 @@
  *
  */
 
-#include <linux/module.h>
-#include <linux/suspend.h>
 #include <linux/kobject.h>
 #include <linux/string.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/kmod.h>
-#include <linux/init.h>
-#include <linux/console.h>
-#include <linux/cpu.h>
 #include <linux/resume-trace.h>
-#include <linux/freezer.h>
-#include <linux/vmstat.h>
-#include <linux/syscalls.h>
 
 #include "power.h"
 
@@ -119,373 +108,6 @@
 
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_SUSPEND
-
-static int suspend_test(int level)
-{
-#ifdef CONFIG_PM_DEBUG
-	if (pm_test_level == level) {
-		printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
-		mdelay(5000);
-		return 1;
-	}
-#endif /* !CONFIG_PM_DEBUG */
-	return 0;
-}
-
-#ifdef CONFIG_PM_TEST_SUSPEND
-
-/*
- * We test the system suspend code by setting an RTC wakealarm a short
- * time in the future, then suspending.  Suspending the devices won't
- * normally take long ... some systems only need a few milliseconds.
- *
- * The time it takes is system-specific though, so when we test this
- * during system bootup we allow a LOT of time.
- */
-#define TEST_SUSPEND_SECONDS	5
-
-static unsigned long suspend_test_start_time;
-
-static void suspend_test_start(void)
-{
-	/* FIXME Use better timebase than "jiffies", ideally a clocksource.
-	 * What we want is a hardware counter that will work correctly even
-	 * during the irqs-are-off stages of the suspend/resume cycle...
-	 */
-	suspend_test_start_time = jiffies;
-}
-
-static void suspend_test_finish(const char *label)
-{
-	long nj = jiffies - suspend_test_start_time;
-	unsigned msec;
-
-	msec = jiffies_to_msecs(abs(nj));
-	pr_info("PM: %s took %d.%03d seconds\n", label,
-			msec / 1000, msec % 1000);
-
-	/* Warning on suspend means the RTC alarm period needs to be
-	 * larger -- the system was sooo slooowwww to suspend that the
-	 * alarm (should have) fired before the system went to sleep!
-	 *
-	 * Warning on either suspend or resume also means the system
-	 * has some performance issues.  The stack dump of a WARN_ON
-	 * is more likely to get the right attention than a printk...
-	 */
-	WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label);
-}
-
-#else
-
-static void suspend_test_start(void)
-{
-}
-
-static void suspend_test_finish(const char *label)
-{
-}
-
-#endif
-
-/* This is just an arbitrary number */
-#define FREE_PAGE_NUMBER (100)
-
-static struct platform_suspend_ops *suspend_ops;
-
-/**
- *	suspend_set_ops - Set the global suspend method table.
- *	@ops:	Pointer to ops structure.
- */
-
-void suspend_set_ops(struct platform_suspend_ops *ops)
-{
-	mutex_lock(&pm_mutex);
-	suspend_ops = ops;
-	mutex_unlock(&pm_mutex);
-}
-
-/**
- * suspend_valid_only_mem - generic memory-only valid callback
- *
- * Platform drivers that implement mem suspend only and only need
- * to check for that in their .valid callback can use this instead
- * of rolling their own .valid callback.
- */
-int suspend_valid_only_mem(suspend_state_t state)
-{
-	return state == PM_SUSPEND_MEM;
-}
-
-/**
- *	suspend_prepare - Do prep work before entering low-power state.
- *
- *	This is common code that is called for each state that we're entering.
- *	Run suspend notifiers, allocate a console and stop all processes.
- */
-static int suspend_prepare(void)
-{
-	int error;
-	unsigned int free_pages;
-
-	if (!suspend_ops || !suspend_ops->enter)
-		return -EPERM;
-
-	pm_prepare_console();
-
-	error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
-	if (error)
-		goto Finish;
-
-	error = usermodehelper_disable();
-	if (error)
-		goto Finish;
-
-	if (suspend_freeze_processes()) {
-		error = -EAGAIN;
-		goto Thaw;
-	}
-
-	free_pages = global_page_state(NR_FREE_PAGES);
-	if (free_pages < FREE_PAGE_NUMBER) {
-		pr_debug("PM: free some memory\n");
-		shrink_all_memory(FREE_PAGE_NUMBER - free_pages);
-		if (nr_free_pages() < FREE_PAGE_NUMBER) {
-			error = -ENOMEM;
-			printk(KERN_ERR "PM: No enough memory\n");
-		}
-	}
-	if (!error)
-		return 0;
-
- Thaw:
-	suspend_thaw_processes();
-	usermodehelper_enable();
- Finish:
-	pm_notifier_call_chain(PM_POST_SUSPEND);
-	pm_restore_console();
-	return error;
-}
-
-/* default implementation */
-void __attribute__ ((weak)) arch_suspend_disable_irqs(void)
-{
-	local_irq_disable();
-}
-
-/* default implementation */
-void __attribute__ ((weak)) arch_suspend_enable_irqs(void)
-{
-	local_irq_enable();
-}
-
-/**
- *	suspend_enter - enter the desired system sleep state.
- *	@state:		state to enter
- *
- *	This function should be called after devices have been suspended.
- */
-static int suspend_enter(suspend_state_t state)
-{
-	int error;
-
-	if (suspend_ops->prepare) {
-		error = suspend_ops->prepare();
-		if (error)
-			return error;
-	}
-
-	error = device_power_down(PMSG_SUSPEND);
-	if (error) {
-		printk(KERN_ERR "PM: Some devices failed to power down\n");
-		goto Platfrom_finish;
-	}
-
-	if (suspend_ops->prepare_late) {
-		error = suspend_ops->prepare_late();
-		if (error)
-			goto Power_up_devices;
-	}
-
-	if (suspend_test(TEST_PLATFORM))
-		goto Platform_wake;
-
-	error = disable_nonboot_cpus();
-	if (error || suspend_test(TEST_CPUS))
-		goto Enable_cpus;
-
-	arch_suspend_disable_irqs();
-	BUG_ON(!irqs_disabled());
-
-	error = sysdev_suspend(PMSG_SUSPEND);
-	if (!error) {
-		if (!suspend_test(TEST_CORE))
-			error = suspend_ops->enter(state);
-		sysdev_resume();
-	}
-
-	arch_suspend_enable_irqs();
-	BUG_ON(irqs_disabled());
-
- Enable_cpus:
-	enable_nonboot_cpus();
-
- Platform_wake:
-	if (suspend_ops->wake)
-		suspend_ops->wake();
-
- Power_up_devices:
-	device_power_up(PMSG_RESUME);
-
- Platfrom_finish:
-	if (suspend_ops->finish)
-		suspend_ops->finish();
-
-	return error;
-}
-
-/**
- *	suspend_devices_and_enter - suspend devices and enter the desired system
- *				    sleep state.
- *	@state:		  state to enter
- */
-int suspend_devices_and_enter(suspend_state_t state)
-{
-	int error;
-
-	if (!suspend_ops)
-		return -ENOSYS;
-
-	if (suspend_ops->begin) {
-		error = suspend_ops->begin(state);
-		if (error)
-			goto Close;
-	}
-	suspend_console();
-	suspend_test_start();
-	error = device_suspend(PMSG_SUSPEND);
-	if (error) {
-		printk(KERN_ERR "PM: Some devices failed to suspend\n");
-		goto Recover_platform;
-	}
-	suspend_test_finish("suspend devices");
-	if (suspend_test(TEST_DEVICES))
-		goto Recover_platform;
-
-	suspend_enter(state);
-
- Resume_devices:
-	suspend_test_start();
-	device_resume(PMSG_RESUME);
-	suspend_test_finish("resume devices");
-	resume_console();
- Close:
-	if (suspend_ops->end)
-		suspend_ops->end();
-	return error;
-
- Recover_platform:
-	if (suspend_ops->recover)
-		suspend_ops->recover();
-	goto Resume_devices;
-}
-
-/**
- *	suspend_finish - Do final work before exiting suspend sequence.
- *
- *	Call platform code to clean up, restart processes, and free the 
- *	console that we've allocated. This is not called for suspend-to-disk.
- */
-static void suspend_finish(void)
-{
-	suspend_thaw_processes();
-	usermodehelper_enable();
-	pm_notifier_call_chain(PM_POST_SUSPEND);
-	pm_restore_console();
-}
-
-
-
-
-static const char * const pm_states[PM_SUSPEND_MAX] = {
-	[PM_SUSPEND_STANDBY]	= "standby",
-	[PM_SUSPEND_MEM]	= "mem",
-};
-
-static inline int valid_state(suspend_state_t state)
-{
-	/* All states need lowlevel support and need to be valid
-	 * to the lowlevel implementation, no valid callback
-	 * implies that none are valid. */
-	if (!suspend_ops || !suspend_ops->valid || !suspend_ops->valid(state))
-		return 0;
-	return 1;
-}
-
-
-/**
- *	enter_state - Do common work of entering low-power state.
- *	@state:		pm_state structure for state we're entering.
- *
- *	Make sure we're the only ones trying to enter a sleep state. Fail
- *	if someone has beat us to it, since we don't want anything weird to
- *	happen when we wake up.
- *	Then, do the setup for suspend, enter the state, and cleaup (after
- *	we've woken up).
- */
-static int enter_state(suspend_state_t state)
-{
-	int error;
-
-	if (!valid_state(state))
-		return -ENODEV;
-
-	if (!mutex_trylock(&pm_mutex))
-		return -EBUSY;
-
-	printk(KERN_INFO "PM: Syncing filesystems ... ");
-	sys_sync();
-	printk("done.\n");
-
-	pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
-	error = suspend_prepare();
-	if (error)
-		goto Unlock;
-
-	if (suspend_test(TEST_FREEZER))
-		goto Finish;
-
-	pr_debug("PM: Entering %s sleep\n", pm_states[state]);
-	error = suspend_devices_and_enter(state);
-
- Finish:
-	pr_debug("PM: Finishing wakeup.\n");
-	suspend_finish();
- Unlock:
-	mutex_unlock(&pm_mutex);
-	return error;
-}
-
-
-/**
- *	pm_suspend - Externally visible function for suspending system.
- *	@state:		Enumerated value of state to enter.
- *
- *	Determine whether or not value is within range, get state 
- *	structure, and enter (above).
- */
-
-int pm_suspend(suspend_state_t state)
-{
-	if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX)
-		return enter_state(state);
-	return -EINVAL;
-}
-
-EXPORT_SYMBOL(pm_suspend);
-
-#endif /* CONFIG_SUSPEND */
-
 struct kobject *power_kobj;
 
 /**
@@ -498,7 +120,6 @@
  *	store() accepts one of those strings, translates it into the 
  *	proper enumerated value, and initiates a suspend transition.
  */
-
 static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
 			  char *buf)
 {
@@ -596,7 +217,6 @@
 	.attrs = g,
 };
 
-
 static int __init pm_init(void)
 {
 	power_kobj = kobject_create_and_add("power", NULL);
@@ -606,144 +226,3 @@
 }
 
 core_initcall(pm_init);
-
-
-#ifdef CONFIG_PM_TEST_SUSPEND
-
-#include <linux/rtc.h>
-
-/*
- * To test system suspend, we need a hands-off mechanism to resume the
- * system.  RTCs wake alarms are a common self-contained mechanism.
- */
-
-static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
-{
-	static char err_readtime[] __initdata =
-		KERN_ERR "PM: can't read %s time, err %d\n";
-	static char err_wakealarm [] __initdata =
-		KERN_ERR "PM: can't set %s wakealarm, err %d\n";
-	static char err_suspend[] __initdata =
-		KERN_ERR "PM: suspend test failed, error %d\n";
-	static char info_test[] __initdata =
-		KERN_INFO "PM: test RTC wakeup from '%s' suspend\n";
-
-	unsigned long		now;
-	struct rtc_wkalrm	alm;
-	int			status;
-
-	/* this may fail if the RTC hasn't been initialized */
-	status = rtc_read_time(rtc, &alm.time);
-	if (status < 0) {
-		printk(err_readtime, dev_name(&rtc->dev), status);
-		return;
-	}
-	rtc_tm_to_time(&alm.time, &now);
-
-	memset(&alm, 0, sizeof alm);
-	rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time);
-	alm.enabled = true;
-
-	status = rtc_set_alarm(rtc, &alm);
-	if (status < 0) {
-		printk(err_wakealarm, dev_name(&rtc->dev), status);
-		return;
-	}
-
-	if (state == PM_SUSPEND_MEM) {
-		printk(info_test, pm_states[state]);
-		status = pm_suspend(state);
-		if (status == -ENODEV)
-			state = PM_SUSPEND_STANDBY;
-	}
-	if (state == PM_SUSPEND_STANDBY) {
-		printk(info_test, pm_states[state]);
-		status = pm_suspend(state);
-	}
-	if (status < 0)
-		printk(err_suspend, status);
-
-	/* Some platforms can't detect that the alarm triggered the
-	 * wakeup, or (accordingly) disable it after it afterwards.
-	 * It's supposed to give oneshot behavior; cope.
-	 */
-	alm.enabled = false;
-	rtc_set_alarm(rtc, &alm);
-}
-
-static int __init has_wakealarm(struct device *dev, void *name_ptr)
-{
-	struct rtc_device *candidate = to_rtc_device(dev);
-
-	if (!candidate->ops->set_alarm)
-		return 0;
-	if (!device_may_wakeup(candidate->dev.parent))
-		return 0;
-
-	*(const char **)name_ptr = dev_name(dev);
-	return 1;
-}
-
-/*
- * Kernel options like "test_suspend=mem" force suspend/resume sanity tests
- * at startup time.  They're normally disabled, for faster boot and because
- * we can't know which states really work on this particular system.
- */
-static suspend_state_t test_state __initdata = PM_SUSPEND_ON;
-
-static char warn_bad_state[] __initdata =
-	KERN_WARNING "PM: can't test '%s' suspend state\n";
-
-static int __init setup_test_suspend(char *value)
-{
-	unsigned i;
-
-	/* "=mem" ==> "mem" */
-	value++;
-	for (i = 0; i < PM_SUSPEND_MAX; i++) {
-		if (!pm_states[i])
-			continue;
-		if (strcmp(pm_states[i], value) != 0)
-			continue;
-		test_state = (__force suspend_state_t) i;
-		return 0;
-	}
-	printk(warn_bad_state, value);
-	return 0;
-}
-__setup("test_suspend", setup_test_suspend);
-
-static int __init test_suspend(void)
-{
-	static char		warn_no_rtc[] __initdata =
-		KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n";
-
-	char			*pony = NULL;
-	struct rtc_device	*rtc = NULL;
-
-	/* PM is initialized by now; is that state testable? */
-	if (test_state == PM_SUSPEND_ON)
-		goto done;
-	if (!valid_state(test_state)) {
-		printk(warn_bad_state, pm_states[test_state]);
-		goto done;
-	}
-
-	/* RTCs have initialized by now too ... can we use one? */
-	class_find_device(rtc_class, NULL, &pony, has_wakealarm);
-	if (pony)
-		rtc = rtc_class_open(pony);
-	if (!rtc) {
-		printk(warn_no_rtc);
-		goto done;
-	}
-
-	/* go for it */
-	test_wakealarm(rtc, test_state);
-	rtc_class_close(rtc);
-done:
-	return 0;
-}
-late_initcall(test_suspend);
-
-#endif /* CONFIG_PM_TEST_SUSPEND */
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 46b5ec7..26d5a26 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -45,7 +45,7 @@
  */
 #define SPARE_PAGES	((1024 * 1024) >> PAGE_SHIFT)
 
-/* kernel/power/disk.c */
+/* kernel/power/hibernate.c */
 extern int hibernation_snapshot(int platform_mode);
 extern int hibernation_restore(int platform_mode);
 extern int hibernation_platform_enter(void);
@@ -74,7 +74,7 @@
 
 extern int create_basic_memory_bitmaps(void);
 extern void free_basic_memory_bitmaps(void);
-extern unsigned int count_data_pages(void);
+extern int swsusp_shrink_memory(void);
 
 /**
  *	Auxiliary structure used for reading the snapshot image data and
@@ -147,9 +147,8 @@
  */
 #define SF_PLATFORM_MODE	1
 
-/* kernel/power/disk.c */
+/* kernel/power/hibernate.c */
 extern int swsusp_check(void);
-extern int swsusp_shrink_memory(void);
 extern void swsusp_free(void);
 extern int swsusp_read(unsigned int *flags_p);
 extern int swsusp_write(unsigned int flags);
@@ -161,22 +160,36 @@
 				unsigned int, char *);
 
 #ifdef CONFIG_SUSPEND
-/* kernel/power/main.c */
+/* kernel/power/suspend.c */
+extern const char *const pm_states[];
+
+extern bool valid_state(suspend_state_t state);
 extern int suspend_devices_and_enter(suspend_state_t state);
+extern int enter_state(suspend_state_t state);
 #else /* !CONFIG_SUSPEND */
 static inline int suspend_devices_and_enter(suspend_state_t state)
 {
 	return -ENOSYS;
 }
+static inline int enter_state(suspend_state_t state) { return -ENOSYS; }
+static inline bool valid_state(suspend_state_t state) { return false; }
 #endif /* !CONFIG_SUSPEND */
 
+#ifdef CONFIG_PM_TEST_SUSPEND
+/* kernel/power/suspend_test.c */
+extern void suspend_test_start(void);
+extern void suspend_test_finish(const char *label);
+#else /* !CONFIG_PM_TEST_SUSPEND */
+static inline void suspend_test_start(void) {}
+static inline void suspend_test_finish(const char *label) {}
+#endif /* !CONFIG_PM_TEST_SUSPEND */
+
 #ifdef CONFIG_PM_SLEEP
 /* kernel/power/main.c */
 extern int pm_notifier_call_chain(unsigned long val);
 #endif
 
 #ifdef CONFIG_HIGHMEM
-unsigned int count_highmem_pages(void);
 int restore_highmem(void);
 #else
 static inline unsigned int count_highmem_pages(void) { return 0; }
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 33e2e4a..523a451 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -39,6 +39,14 @@
 static void swsusp_set_page_forbidden(struct page *);
 static void swsusp_unset_page_forbidden(struct page *);
 
+/*
+ * Preferred image size in bytes (tunable via /sys/power/image_size).
+ * When it is set to N, swsusp will do its best to ensure the image
+ * size will not exceed N bytes, but if that is impossible, it will
+ * try to create the smallest image possible.
+ */
+unsigned long image_size = 500 * 1024 * 1024;
+
 /* List of PBEs needed for restoring the pages that were allocated before
  * the suspend and included in the suspend image, but have also been
  * allocated by the "resume" kernel, so their contents cannot be written
@@ -840,7 +848,7 @@
  *	pages.
  */
 
-unsigned int count_highmem_pages(void)
+static unsigned int count_highmem_pages(void)
 {
 	struct zone *zone;
 	unsigned int n = 0;
@@ -902,7 +910,7 @@
  *	pages.
  */
 
-unsigned int count_data_pages(void)
+static unsigned int count_data_pages(void)
 {
 	struct zone *zone;
 	unsigned long pfn, max_zone_pfn;
@@ -1058,6 +1066,74 @@
 	buffer = NULL;
 }
 
+/**
+ *	swsusp_shrink_memory -  Try to free as much memory as needed
+ *
+ *	... but do not OOM-kill anyone
+ *
+ *	Notice: all userland should be stopped before it is called, or
+ *	livelock is possible.
+ */
+
+#define SHRINK_BITE	10000
+static inline unsigned long __shrink_memory(long tmp)
+{
+	if (tmp > SHRINK_BITE)
+		tmp = SHRINK_BITE;
+	return shrink_all_memory(tmp);
+}
+
+int swsusp_shrink_memory(void)
+{
+	long tmp;
+	struct zone *zone;
+	unsigned long pages = 0;
+	unsigned int i = 0;
+	char *p = "-\\|/";
+	struct timeval start, stop;
+
+	printk(KERN_INFO "PM: Shrinking memory...  ");
+	do_gettimeofday(&start);
+	do {
+		long size, highmem_size;
+
+		highmem_size = count_highmem_pages();
+		size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES;
+		tmp = size;
+		size += highmem_size;
+		for_each_populated_zone(zone) {
+			tmp += snapshot_additional_pages(zone);
+			if (is_highmem(zone)) {
+				highmem_size -=
+					zone_page_state(zone, NR_FREE_PAGES);
+			} else {
+				tmp -= zone_page_state(zone, NR_FREE_PAGES);
+				tmp += zone->lowmem_reserve[ZONE_NORMAL];
+			}
+		}
+
+		if (highmem_size < 0)
+			highmem_size = 0;
+
+		tmp += highmem_size;
+		if (tmp > 0) {
+			tmp = __shrink_memory(tmp);
+			if (!tmp)
+				return -ENOMEM;
+			pages += tmp;
+		} else if (size > image_size / PAGE_SIZE) {
+			tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
+			pages += tmp;
+		}
+		printk("\b%c", p[i++%4]);
+	} while (tmp > 0);
+	do_gettimeofday(&stop);
+	printk("\bdone (%lu pages freed)\n", pages);
+	swsusp_show_speed(&start, &stop, pages, "Freed");
+
+	return 0;
+}
+
 #ifdef CONFIG_HIGHMEM
 /**
   *	count_pages_for_highmem - compute the number of non-highmem pages
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
new file mode 100644
index 0000000..6f10dfc
--- /dev/null
+++ b/kernel/power/suspend.c
@@ -0,0 +1,300 @@
+/*
+ * kernel/power/suspend.c - Suspend to RAM and standby functionality.
+ *
+ * Copyright (c) 2003 Patrick Mochel
+ * Copyright (c) 2003 Open Source Development Lab
+ * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/syscalls.h>
+
+#include "power.h"
+
+const char *const pm_states[PM_SUSPEND_MAX] = {
+	[PM_SUSPEND_STANDBY]	= "standby",
+	[PM_SUSPEND_MEM]	= "mem",
+};
+
+static struct platform_suspend_ops *suspend_ops;
+
+/**
+ *	suspend_set_ops - Set the global suspend method table.
+ *	@ops:	Pointer to ops structure.
+ */
+void suspend_set_ops(struct platform_suspend_ops *ops)
+{
+	mutex_lock(&pm_mutex);
+	suspend_ops = ops;
+	mutex_unlock(&pm_mutex);
+}
+
+bool valid_state(suspend_state_t state)
+{
+	/*
+	 * All states need lowlevel support and need to be valid to the lowlevel
+	 * implementation, no valid callback implies that none are valid.
+	 */
+	return suspend_ops && suspend_ops->valid && suspend_ops->valid(state);
+}
+
+/**
+ * suspend_valid_only_mem - generic memory-only valid callback
+ *
+ * Platform drivers that implement mem suspend only and only need
+ * to check for that in their .valid callback can use this instead
+ * of rolling their own .valid callback.
+ */
+int suspend_valid_only_mem(suspend_state_t state)
+{
+	return state == PM_SUSPEND_MEM;
+}
+
+static int suspend_test(int level)
+{
+#ifdef CONFIG_PM_DEBUG
+	if (pm_test_level == level) {
+		printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
+		mdelay(5000);
+		return 1;
+	}
+#endif /* !CONFIG_PM_DEBUG */
+	return 0;
+}
+
+/**
+ *	suspend_prepare - Do prep work before entering low-power state.
+ *
+ *	This is common code that is called for each state that we're entering.
+ *	Run suspend notifiers, allocate a console and stop all processes.
+ */
+static int suspend_prepare(void)
+{
+	int error;
+
+	if (!suspend_ops || !suspend_ops->enter)
+		return -EPERM;
+
+	pm_prepare_console();
+
+	error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
+	if (error)
+		goto Finish;
+
+	error = usermodehelper_disable();
+	if (error)
+		goto Finish;
+
+	error = suspend_freeze_processes();
+	if (!error)
+		return 0;
+
+	suspend_thaw_processes();
+	usermodehelper_enable();
+ Finish:
+	pm_notifier_call_chain(PM_POST_SUSPEND);
+	pm_restore_console();
+	return error;
+}
+
+/* default implementation */
+void __attribute__ ((weak)) arch_suspend_disable_irqs(void)
+{
+	local_irq_disable();
+}
+
+/* default implementation */
+void __attribute__ ((weak)) arch_suspend_enable_irqs(void)
+{
+	local_irq_enable();
+}
+
+/**
+ *	suspend_enter - enter the desired system sleep state.
+ *	@state:		state to enter
+ *
+ *	This function should be called after devices have been suspended.
+ */
+static int suspend_enter(suspend_state_t state)
+{
+	int error;
+
+	if (suspend_ops->prepare) {
+		error = suspend_ops->prepare();
+		if (error)
+			return error;
+	}
+
+	error = dpm_suspend_noirq(PMSG_SUSPEND);
+	if (error) {
+		printk(KERN_ERR "PM: Some devices failed to power down\n");
+		goto Platfrom_finish;
+	}
+
+	if (suspend_ops->prepare_late) {
+		error = suspend_ops->prepare_late();
+		if (error)
+			goto Power_up_devices;
+	}
+
+	if (suspend_test(TEST_PLATFORM))
+		goto Platform_wake;
+
+	error = disable_nonboot_cpus();
+	if (error || suspend_test(TEST_CPUS))
+		goto Enable_cpus;
+
+	arch_suspend_disable_irqs();
+	BUG_ON(!irqs_disabled());
+
+	error = sysdev_suspend(PMSG_SUSPEND);
+	if (!error) {
+		if (!suspend_test(TEST_CORE))
+			error = suspend_ops->enter(state);
+		sysdev_resume();
+	}
+
+	arch_suspend_enable_irqs();
+	BUG_ON(irqs_disabled());
+
+ Enable_cpus:
+	enable_nonboot_cpus();
+
+ Platform_wake:
+	if (suspend_ops->wake)
+		suspend_ops->wake();
+
+ Power_up_devices:
+	dpm_resume_noirq(PMSG_RESUME);
+
+ Platfrom_finish:
+	if (suspend_ops->finish)
+		suspend_ops->finish();
+
+	return error;
+}
+
+/**
+ *	suspend_devices_and_enter - suspend devices and enter the desired system
+ *				    sleep state.
+ *	@state:		  state to enter
+ */
+int suspend_devices_and_enter(suspend_state_t state)
+{
+	int error;
+
+	if (!suspend_ops)
+		return -ENOSYS;
+
+	if (suspend_ops->begin) {
+		error = suspend_ops->begin(state);
+		if (error)
+			goto Close;
+	}
+	suspend_console();
+	suspend_test_start();
+	error = dpm_suspend_start(PMSG_SUSPEND);
+	if (error) {
+		printk(KERN_ERR "PM: Some devices failed to suspend\n");
+		goto Recover_platform;
+	}
+	suspend_test_finish("suspend devices");
+	if (suspend_test(TEST_DEVICES))
+		goto Recover_platform;
+
+	suspend_enter(state);
+
+ Resume_devices:
+	suspend_test_start();
+	dpm_resume_end(PMSG_RESUME);
+	suspend_test_finish("resume devices");
+	resume_console();
+ Close:
+	if (suspend_ops->end)
+		suspend_ops->end();
+	return error;
+
+ Recover_platform:
+	if (suspend_ops->recover)
+		suspend_ops->recover();
+	goto Resume_devices;
+}
+
+/**
+ *	suspend_finish - Do final work before exiting suspend sequence.
+ *
+ *	Call platform code to clean up, restart processes, and free the
+ *	console that we've allocated. This is not called for suspend-to-disk.
+ */
+static void suspend_finish(void)
+{
+	suspend_thaw_processes();
+	usermodehelper_enable();
+	pm_notifier_call_chain(PM_POST_SUSPEND);
+	pm_restore_console();
+}
+
+/**
+ *	enter_state - Do common work of entering low-power state.
+ *	@state:		pm_state structure for state we're entering.
+ *
+ *	Make sure we're the only ones trying to enter a sleep state. Fail
+ *	if someone has beat us to it, since we don't want anything weird to
+ *	happen when we wake up.
+ *	Then, do the setup for suspend, enter the state, and cleaup (after
+ *	we've woken up).
+ */
+int enter_state(suspend_state_t state)
+{
+	int error;
+
+	if (!valid_state(state))
+		return -ENODEV;
+
+	if (!mutex_trylock(&pm_mutex))
+		return -EBUSY;
+
+	printk(KERN_INFO "PM: Syncing filesystems ... ");
+	sys_sync();
+	printk("done.\n");
+
+	pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
+	error = suspend_prepare();
+	if (error)
+		goto Unlock;
+
+	if (suspend_test(TEST_FREEZER))
+		goto Finish;
+
+	pr_debug("PM: Entering %s sleep\n", pm_states[state]);
+	error = suspend_devices_and_enter(state);
+
+ Finish:
+	pr_debug("PM: Finishing wakeup.\n");
+	suspend_finish();
+ Unlock:
+	mutex_unlock(&pm_mutex);
+	return error;
+}
+
+/**
+ *	pm_suspend - Externally visible function for suspending system.
+ *	@state:		Enumerated value of state to enter.
+ *
+ *	Determine whether or not value is within range, get state
+ *	structure, and enter (above).
+ */
+int pm_suspend(suspend_state_t state)
+{
+	if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX)
+		return enter_state(state);
+	return -EINVAL;
+}
+EXPORT_SYMBOL(pm_suspend);
diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c
new file mode 100644
index 0000000..17d8bb1a
--- /dev/null
+++ b/kernel/power/suspend_test.c
@@ -0,0 +1,187 @@
+/*
+ * kernel/power/suspend_test.c - Suspend to RAM and standby test facility.
+ *
+ * Copyright (c) 2009 Pavel Machek <pavel@ucw.cz>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/init.h>
+#include <linux/rtc.h>
+
+#include "power.h"
+
+/*
+ * We test the system suspend code by setting an RTC wakealarm a short
+ * time in the future, then suspending.  Suspending the devices won't
+ * normally take long ... some systems only need a few milliseconds.
+ *
+ * The time it takes is system-specific though, so when we test this
+ * during system bootup we allow a LOT of time.
+ */
+#define TEST_SUSPEND_SECONDS	5
+
+static unsigned long suspend_test_start_time;
+
+void suspend_test_start(void)
+{
+	/* FIXME Use better timebase than "jiffies", ideally a clocksource.
+	 * What we want is a hardware counter that will work correctly even
+	 * during the irqs-are-off stages of the suspend/resume cycle...
+	 */
+	suspend_test_start_time = jiffies;
+}
+
+void suspend_test_finish(const char *label)
+{
+	long nj = jiffies - suspend_test_start_time;
+	unsigned msec;
+
+	msec = jiffies_to_msecs(abs(nj));
+	pr_info("PM: %s took %d.%03d seconds\n", label,
+			msec / 1000, msec % 1000);
+
+	/* Warning on suspend means the RTC alarm period needs to be
+	 * larger -- the system was sooo slooowwww to suspend that the
+	 * alarm (should have) fired before the system went to sleep!
+	 *
+	 * Warning on either suspend or resume also means the system
+	 * has some performance issues.  The stack dump of a WARN_ON
+	 * is more likely to get the right attention than a printk...
+	 */
+	WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label);
+}
+
+/*
+ * To test system suspend, we need a hands-off mechanism to resume the
+ * system.  RTCs wake alarms are a common self-contained mechanism.
+ */
+
+static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
+{
+	static char err_readtime[] __initdata =
+		KERN_ERR "PM: can't read %s time, err %d\n";
+	static char err_wakealarm [] __initdata =
+		KERN_ERR "PM: can't set %s wakealarm, err %d\n";
+	static char err_suspend[] __initdata =
+		KERN_ERR "PM: suspend test failed, error %d\n";
+	static char info_test[] __initdata =
+		KERN_INFO "PM: test RTC wakeup from '%s' suspend\n";
+
+	unsigned long		now;
+	struct rtc_wkalrm	alm;
+	int			status;
+
+	/* this may fail if the RTC hasn't been initialized */
+	status = rtc_read_time(rtc, &alm.time);
+	if (status < 0) {
+		printk(err_readtime, dev_name(&rtc->dev), status);
+		return;
+	}
+	rtc_tm_to_time(&alm.time, &now);
+
+	memset(&alm, 0, sizeof alm);
+	rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time);
+	alm.enabled = true;
+
+	status = rtc_set_alarm(rtc, &alm);
+	if (status < 0) {
+		printk(err_wakealarm, dev_name(&rtc->dev), status);
+		return;
+	}
+
+	if (state == PM_SUSPEND_MEM) {
+		printk(info_test, pm_states[state]);
+		status = pm_suspend(state);
+		if (status == -ENODEV)
+			state = PM_SUSPEND_STANDBY;
+	}
+	if (state == PM_SUSPEND_STANDBY) {
+		printk(info_test, pm_states[state]);
+		status = pm_suspend(state);
+	}
+	if (status < 0)
+		printk(err_suspend, status);
+
+	/* Some platforms can't detect that the alarm triggered the
+	 * wakeup, or (accordingly) disable it after it afterwards.
+	 * It's supposed to give oneshot behavior; cope.
+	 */
+	alm.enabled = false;
+	rtc_set_alarm(rtc, &alm);
+}
+
+static int __init has_wakealarm(struct device *dev, void *name_ptr)
+{
+	struct rtc_device *candidate = to_rtc_device(dev);
+
+	if (!candidate->ops->set_alarm)
+		return 0;
+	if (!device_may_wakeup(candidate->dev.parent))
+		return 0;
+
+	*(const char **)name_ptr = dev_name(dev);
+	return 1;
+}
+
+/*
+ * Kernel options like "test_suspend=mem" force suspend/resume sanity tests
+ * at startup time.  They're normally disabled, for faster boot and because
+ * we can't know which states really work on this particular system.
+ */
+static suspend_state_t test_state __initdata = PM_SUSPEND_ON;
+
+static char warn_bad_state[] __initdata =
+	KERN_WARNING "PM: can't test '%s' suspend state\n";
+
+static int __init setup_test_suspend(char *value)
+{
+	unsigned i;
+
+	/* "=mem" ==> "mem" */
+	value++;
+	for (i = 0; i < PM_SUSPEND_MAX; i++) {
+		if (!pm_states[i])
+			continue;
+		if (strcmp(pm_states[i], value) != 0)
+			continue;
+		test_state = (__force suspend_state_t) i;
+		return 0;
+	}
+	printk(warn_bad_state, value);
+	return 0;
+}
+__setup("test_suspend", setup_test_suspend);
+
+static int __init test_suspend(void)
+{
+	static char		warn_no_rtc[] __initdata =
+		KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n";
+
+	char			*pony = NULL;
+	struct rtc_device	*rtc = NULL;
+
+	/* PM is initialized by now; is that state testable? */
+	if (test_state == PM_SUSPEND_ON)
+		goto done;
+	if (!valid_state(test_state)) {
+		printk(warn_bad_state, pm_states[test_state]);
+		goto done;
+	}
+
+	/* RTCs have initialized by now too ... can we use one? */
+	class_find_device(rtc_class, NULL, &pony, has_wakealarm);
+	if (pony)
+		rtc = rtc_class_open(pony);
+	if (!rtc) {
+		printk(warn_no_rtc);
+		goto done;
+	}
+
+	/* go for it */
+	test_wakealarm(rtc, test_state);
+	rtc_class_close(rtc);
+done:
+	return 0;
+}
+late_initcall(test_suspend);
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 78c3504..6a07f4d 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -55,14 +55,6 @@
 
 #include "power.h"
 
-/*
- * Preferred image size in bytes (tunable via /sys/power/image_size).
- * When it is set to N, swsusp will do its best to ensure the image
- * size will not exceed N bytes, but if that is impossible, it will
- * try to create the smallest image possible.
- */
-unsigned long image_size = 500 * 1024 * 1024;
-
 int in_suspend __nosavedata = 0;
 
 /**
@@ -194,193 +186,3 @@
 			centisecs / 100, centisecs % 100,
 			kps / 1000, (kps % 1000) / 10);
 }
-
-/**
- *	swsusp_shrink_memory -  Try to free as much memory as needed
- *
- *	... but do not OOM-kill anyone
- *
- *	Notice: all userland should be stopped before it is called, or
- *	livelock is possible.
- */
-
-#define SHRINK_BITE	10000
-static inline unsigned long __shrink_memory(long tmp)
-{
-	if (tmp > SHRINK_BITE)
-		tmp = SHRINK_BITE;
-	return shrink_all_memory(tmp);
-}
-
-int swsusp_shrink_memory(void)
-{
-	long tmp;
-	struct zone *zone;
-	unsigned long pages = 0;
-	unsigned int i = 0;
-	char *p = "-\\|/";
-	struct timeval start, stop;
-
-	printk(KERN_INFO "PM: Shrinking memory...  ");
-	do_gettimeofday(&start);
-	do {
-		long size, highmem_size;
-
-		highmem_size = count_highmem_pages();
-		size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES;
-		tmp = size;
-		size += highmem_size;
-		for_each_populated_zone(zone) {
-			tmp += snapshot_additional_pages(zone);
-			if (is_highmem(zone)) {
-				highmem_size -=
-					zone_page_state(zone, NR_FREE_PAGES);
-			} else {
-				tmp -= zone_page_state(zone, NR_FREE_PAGES);
-				tmp += zone->lowmem_reserve[ZONE_NORMAL];
-			}
-		}
-
-		if (highmem_size < 0)
-			highmem_size = 0;
-
-		tmp += highmem_size;
-		if (tmp > 0) {
-			tmp = __shrink_memory(tmp);
-			if (!tmp)
-				return -ENOMEM;
-			pages += tmp;
-		} else if (size > image_size / PAGE_SIZE) {
-			tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
-			pages += tmp;
-		}
-		printk("\b%c", p[i++%4]);
-	} while (tmp > 0);
-	do_gettimeofday(&stop);
-	printk("\bdone (%lu pages freed)\n", pages);
-	swsusp_show_speed(&start, &stop, pages, "Freed");
-
-	return 0;
-}
-
-/*
- * Platforms, like ACPI, may want us to save some memory used by them during
- * hibernation and to restore the contents of this memory during the subsequent
- * resume.  The code below implements a mechanism allowing us to do that.
- */
-
-struct nvs_page {
-	unsigned long phys_start;
-	unsigned int size;
-	void *kaddr;
-	void *data;
-	struct list_head node;
-};
-
-static LIST_HEAD(nvs_list);
-
-/**
- *	hibernate_nvs_register - register platform NVS memory region to save
- *	@start - physical address of the region
- *	@size - size of the region
- *
- *	The NVS region need not be page-aligned (both ends) and we arrange
- *	things so that the data from page-aligned addresses in this region will
- *	be copied into separate RAM pages.
- */
-int hibernate_nvs_register(unsigned long start, unsigned long size)
-{
-	struct nvs_page *entry, *next;
-
-	while (size > 0) {
-		unsigned int nr_bytes;
-
-		entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
-		if (!entry)
-			goto Error;
-
-		list_add_tail(&entry->node, &nvs_list);
-		entry->phys_start = start;
-		nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
-		entry->size = (size < nr_bytes) ? size : nr_bytes;
-
-		start += entry->size;
-		size -= entry->size;
-	}
-	return 0;
-
- Error:
-	list_for_each_entry_safe(entry, next, &nvs_list, node) {
-		list_del(&entry->node);
-		kfree(entry);
-	}
-	return -ENOMEM;
-}
-
-/**
- *	hibernate_nvs_free - free data pages allocated for saving NVS regions
- */
-void hibernate_nvs_free(void)
-{
-	struct nvs_page *entry;
-
-	list_for_each_entry(entry, &nvs_list, node)
-		if (entry->data) {
-			free_page((unsigned long)entry->data);
-			entry->data = NULL;
-			if (entry->kaddr) {
-				iounmap(entry->kaddr);
-				entry->kaddr = NULL;
-			}
-		}
-}
-
-/**
- *	hibernate_nvs_alloc - allocate memory necessary for saving NVS regions
- */
-int hibernate_nvs_alloc(void)
-{
-	struct nvs_page *entry;
-
-	list_for_each_entry(entry, &nvs_list, node) {
-		entry->data = (void *)__get_free_page(GFP_KERNEL);
-		if (!entry->data) {
-			hibernate_nvs_free();
-			return -ENOMEM;
-		}
-	}
-	return 0;
-}
-
-/**
- *	hibernate_nvs_save - save NVS memory regions
- */
-void hibernate_nvs_save(void)
-{
-	struct nvs_page *entry;
-
-	printk(KERN_INFO "PM: Saving platform NVS memory\n");
-
-	list_for_each_entry(entry, &nvs_list, node)
-		if (entry->data) {
-			entry->kaddr = ioremap(entry->phys_start, entry->size);
-			memcpy(entry->data, entry->kaddr, entry->size);
-		}
-}
-
-/**
- *	hibernate_nvs_restore - restore NVS memory regions
- *
- *	This function is going to be called with interrupts disabled, so it
- *	cannot iounmap the virtual addresses used to access the NVS region.
- */
-void hibernate_nvs_restore(void)
-{
-	struct nvs_page *entry;
-
-	printk(KERN_INFO "PM: Restoring platform NVS memory\n");
-
-	list_for_each_entry(entry, &nvs_list, node)
-		if (entry->data)
-			memcpy(entry->kaddr, entry->data, entry->size);
-}
diff --git a/kernel/profile.c b/kernel/profile.c
index 7724e04..28cf26a 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -111,12 +111,6 @@
 	/* only text is profiled */
 	prof_len = (_etext - _stext) >> prof_shift;
 	buffer_bytes = prof_len*sizeof(atomic_t);
-	if (!slab_is_available()) {
-		prof_buffer = alloc_bootmem(buffer_bytes);
-		alloc_bootmem_cpumask_var(&prof_cpu_mask);
-		cpumask_copy(prof_cpu_mask, cpu_possible_mask);
-		return 0;
-	}
 
 	if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
 		return -ENOMEM;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0692ab5..f6d8b8c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -25,16 +25,6 @@
 
 
 /*
- * Initialize a new task whose father had been ptraced.
- *
- * Called from copy_process().
- */
-void ptrace_fork(struct task_struct *child, unsigned long clone_flags)
-{
-	arch_ptrace_fork(child, clone_flags);
-}
-
-/*
  * ptrace a task: make the debugger its new parent and
  * move it to the ptrace list.
  *
@@ -185,10 +175,11 @@
 	if (same_thread_group(task, current))
 		goto out;
 
-	/* Protect exec's credential calculations against our interference;
-	 * SUID, SGID and LSM creds get determined differently under ptrace.
+	/* Protect the target's credential calculations against our
+	 * interference; SUID, SGID and LSM creds get determined differently
+	 * under ptrace.
 	 */
-	retval = mutex_lock_interruptible(&task->cred_exec_mutex);
+	retval = mutex_lock_interruptible(&task->cred_guard_mutex);
 	if (retval  < 0)
 		goto out;
 
@@ -232,7 +223,7 @@
 bad:
 	write_unlock_irqrestore(&tasklist_lock, flags);
 	task_unlock(task);
-	mutex_unlock(&task->cred_exec_mutex);
+	mutex_unlock(&task->cred_guard_mutex);
 out:
 	return retval;
 }
@@ -304,6 +295,8 @@
 	if (child->ptrace) {
 		child->exit_code = data;
 		dead = __ptrace_detach(current, child);
+		if (!child->exit_state)
+			wake_up_process(child);
 	}
 	write_unlock_irq(&tasklist_lock);
 
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index ce97a4d..beb0e65 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -1356,17 +1356,11 @@
 
 		rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
 		spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
-		ret = 0;
+		ret = 0; /* unused */
 		__wait_event_interruptible(rcu_ctrlblk.sched_wq,
 			rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
 			ret);
 
-		/*
-		 * Signals would prevent us from sleeping, and we cannot
-		 * do much with them in any case.  So flush them.
-		 */
-		if (ret)
-			flush_signals(current);
 		couldsleepnext = 0;
 
 	} while (!kthread_should_stop());
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d2a372f..0dccfbb 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1259,31 +1259,44 @@
 	check_cpu_stall(rsp, rdp);
 
 	/* Is the RCU core waiting for a quiescent state from this CPU? */
-	if (rdp->qs_pending)
+	if (rdp->qs_pending) {
+		rdp->n_rp_qs_pending++;
 		return 1;
+	}
 
 	/* Does this CPU have callbacks ready to invoke? */
-	if (cpu_has_callbacks_ready_to_invoke(rdp))
+	if (cpu_has_callbacks_ready_to_invoke(rdp)) {
+		rdp->n_rp_cb_ready++;
 		return 1;
+	}
 
 	/* Has RCU gone idle with this CPU needing another grace period? */
-	if (cpu_needs_another_gp(rsp, rdp))
+	if (cpu_needs_another_gp(rsp, rdp)) {
+		rdp->n_rp_cpu_needs_gp++;
 		return 1;
+	}
 
 	/* Has another RCU grace period completed?  */
-	if (ACCESS_ONCE(rsp->completed) != rdp->completed) /* outside of lock */
+	if (ACCESS_ONCE(rsp->completed) != rdp->completed) { /* outside lock */
+		rdp->n_rp_gp_completed++;
 		return 1;
+	}
 
 	/* Has a new RCU grace period started? */
-	if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) /* outside of lock */
+	if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) { /* outside lock */
+		rdp->n_rp_gp_started++;
 		return 1;
+	}
 
 	/* Has an RCU GP gone long enough to send resched IPIs &c? */
 	if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) &&
-	    ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0))
+	    ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) {
+		rdp->n_rp_need_fqs++;
 		return 1;
+	}
 
 	/* nothing to do */
+	rdp->n_rp_need_nothing++;
 	return 0;
 }
 
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 4b1875b..fe1dcdb 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -213,7 +213,63 @@
 	.release = single_release,
 };
 
-static struct dentry *rcudir, *datadir, *datadir_csv, *hierdir, *gpdir;
+static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
+{
+	seq_printf(m, "%3d%cnp=%ld "
+		   "qsp=%ld cbr=%ld cng=%ld gpc=%ld gps=%ld nf=%ld nn=%ld\n",
+		   rdp->cpu,
+		   cpu_is_offline(rdp->cpu) ? '!' : ' ',
+		   rdp->n_rcu_pending,
+		   rdp->n_rp_qs_pending,
+		   rdp->n_rp_cb_ready,
+		   rdp->n_rp_cpu_needs_gp,
+		   rdp->n_rp_gp_completed,
+		   rdp->n_rp_gp_started,
+		   rdp->n_rp_need_fqs,
+		   rdp->n_rp_need_nothing);
+}
+
+static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp)
+{
+	int cpu;
+	struct rcu_data *rdp;
+
+	for_each_possible_cpu(cpu) {
+		rdp = rsp->rda[cpu];
+		if (rdp->beenonline)
+			print_one_rcu_pending(m, rdp);
+	}
+}
+
+static int show_rcu_pending(struct seq_file *m, void *unused)
+{
+	seq_puts(m, "rcu:\n");
+	print_rcu_pendings(m, &rcu_state);
+	seq_puts(m, "rcu_bh:\n");
+	print_rcu_pendings(m, &rcu_bh_state);
+	return 0;
+}
+
+static int rcu_pending_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcu_pending, NULL);
+}
+
+static struct file_operations rcu_pending_fops = {
+	.owner = THIS_MODULE,
+	.open = rcu_pending_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static struct dentry *rcudir;
+static struct dentry *datadir;
+static struct dentry *datadir_csv;
+static struct dentry *gpdir;
+static struct dentry *hierdir;
+static struct dentry *rcu_pendingdir;
+
 static int __init rcuclassic_trace_init(void)
 {
 	rcudir = debugfs_create_dir("rcu", NULL);
@@ -238,6 +294,11 @@
 						NULL, &rcuhier_fops);
 	if (!hierdir)
 		goto free_out;
+
+	rcu_pendingdir = debugfs_create_file("rcu_pending", 0444, rcudir,
+						NULL, &rcu_pending_fops);
+	if (!rcu_pendingdir)
+		goto free_out;
 	return 0;
 free_out:
 	if (datadir)
@@ -257,6 +318,7 @@
 	debugfs_remove(datadir_csv);
 	debugfs_remove(gpdir);
 	debugfs_remove(hierdir);
+	debugfs_remove(rcu_pendingdir);
 	debugfs_remove(rcudir);
 }
 
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 69d9cb9..820c5af 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -300,7 +300,8 @@
  * assigned pending owner [which might not have taken the
  * lock yet]:
  */
-static inline int try_to_steal_lock(struct rt_mutex *lock)
+static inline int try_to_steal_lock(struct rt_mutex *lock,
+				    struct task_struct *task)
 {
 	struct task_struct *pendowner = rt_mutex_owner(lock);
 	struct rt_mutex_waiter *next;
@@ -309,11 +310,11 @@
 	if (!rt_mutex_owner_pending(lock))
 		return 0;
 
-	if (pendowner == current)
+	if (pendowner == task)
 		return 1;
 
 	spin_lock_irqsave(&pendowner->pi_lock, flags);
-	if (current->prio >= pendowner->prio) {
+	if (task->prio >= pendowner->prio) {
 		spin_unlock_irqrestore(&pendowner->pi_lock, flags);
 		return 0;
 	}
@@ -338,21 +339,21 @@
 	 * We are going to steal the lock and a waiter was
 	 * enqueued on the pending owners pi_waiters queue. So
 	 * we have to enqueue this waiter into
-	 * current->pi_waiters list. This covers the case,
-	 * where current is boosted because it holds another
+	 * task->pi_waiters list. This covers the case,
+	 * where task is boosted because it holds another
 	 * lock and gets unboosted because the booster is
 	 * interrupted, so we would delay a waiter with higher
-	 * priority as current->normal_prio.
+	 * priority as task->normal_prio.
 	 *
 	 * Note: in the rare case of a SCHED_OTHER task changing
 	 * its priority and thus stealing the lock, next->task
-	 * might be current:
+	 * might be task:
 	 */
-	if (likely(next->task != current)) {
-		spin_lock_irqsave(&current->pi_lock, flags);
-		plist_add(&next->pi_list_entry, &current->pi_waiters);
-		__rt_mutex_adjust_prio(current);
-		spin_unlock_irqrestore(&current->pi_lock, flags);
+	if (likely(next->task != task)) {
+		spin_lock_irqsave(&task->pi_lock, flags);
+		plist_add(&next->pi_list_entry, &task->pi_waiters);
+		__rt_mutex_adjust_prio(task);
+		spin_unlock_irqrestore(&task->pi_lock, flags);
 	}
 	return 1;
 }
@@ -389,7 +390,7 @@
 	 */
 	mark_rt_mutex_waiters(lock);
 
-	if (rt_mutex_owner(lock) && !try_to_steal_lock(lock))
+	if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current))
 		return 0;
 
 	/* We got the lock. */
@@ -411,6 +412,7 @@
  */
 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 				   struct rt_mutex_waiter *waiter,
+				   struct task_struct *task,
 				   int detect_deadlock)
 {
 	struct task_struct *owner = rt_mutex_owner(lock);
@@ -418,21 +420,21 @@
 	unsigned long flags;
 	int chain_walk = 0, res;
 
-	spin_lock_irqsave(&current->pi_lock, flags);
-	__rt_mutex_adjust_prio(current);
-	waiter->task = current;
+	spin_lock_irqsave(&task->pi_lock, flags);
+	__rt_mutex_adjust_prio(task);
+	waiter->task = task;
 	waiter->lock = lock;
-	plist_node_init(&waiter->list_entry, current->prio);
-	plist_node_init(&waiter->pi_list_entry, current->prio);
+	plist_node_init(&waiter->list_entry, task->prio);
+	plist_node_init(&waiter->pi_list_entry, task->prio);
 
 	/* Get the top priority waiter on the lock */
 	if (rt_mutex_has_waiters(lock))
 		top_waiter = rt_mutex_top_waiter(lock);
 	plist_add(&waiter->list_entry, &lock->wait_list);
 
-	current->pi_blocked_on = waiter;
+	task->pi_blocked_on = waiter;
 
-	spin_unlock_irqrestore(&current->pi_lock, flags);
+	spin_unlock_irqrestore(&task->pi_lock, flags);
 
 	if (waiter == rt_mutex_top_waiter(lock)) {
 		spin_lock_irqsave(&owner->pi_lock, flags);
@@ -460,7 +462,7 @@
 	spin_unlock(&lock->wait_lock);
 
 	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
-					 current);
+					 task);
 
 	spin_lock(&lock->wait_lock);
 
@@ -605,6 +607,85 @@
 	rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
 }
 
+/**
+ * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
+ * @lock:		 the rt_mutex to take
+ * @state:		 the state the task should block in (TASK_INTERRUPTIBLE
+ * 			 or TASK_UNINTERRUPTIBLE)
+ * @timeout:		 the pre-initialized and started timer, or NULL for none
+ * @waiter:		 the pre-initialized rt_mutex_waiter
+ * @detect_deadlock:	 passed to task_blocks_on_rt_mutex
+ *
+ * lock->wait_lock must be held by the caller.
+ */
+static int __sched
+__rt_mutex_slowlock(struct rt_mutex *lock, int state,
+		    struct hrtimer_sleeper *timeout,
+		    struct rt_mutex_waiter *waiter,
+		    int detect_deadlock)
+{
+	int ret = 0;
+
+	for (;;) {
+		/* Try to acquire the lock: */
+		if (try_to_take_rt_mutex(lock))
+			break;
+
+		/*
+		 * TASK_INTERRUPTIBLE checks for signals and
+		 * timeout. Ignored otherwise.
+		 */
+		if (unlikely(state == TASK_INTERRUPTIBLE)) {
+			/* Signal pending? */
+			if (signal_pending(current))
+				ret = -EINTR;
+			if (timeout && !timeout->task)
+				ret = -ETIMEDOUT;
+			if (ret)
+				break;
+		}
+
+		/*
+		 * waiter->task is NULL the first time we come here and
+		 * when we have been woken up by the previous owner
+		 * but the lock got stolen by a higher prio task.
+		 */
+		if (!waiter->task) {
+			ret = task_blocks_on_rt_mutex(lock, waiter, current,
+						      detect_deadlock);
+			/*
+			 * If we got woken up by the owner then start loop
+			 * all over without going into schedule to try
+			 * to get the lock now:
+			 */
+			if (unlikely(!waiter->task)) {
+				/*
+				 * Reset the return value. We might
+				 * have returned with -EDEADLK and the
+				 * owner released the lock while we
+				 * were walking the pi chain.
+				 */
+				ret = 0;
+				continue;
+			}
+			if (unlikely(ret))
+				break;
+		}
+
+		spin_unlock(&lock->wait_lock);
+
+		debug_rt_mutex_print_deadlock(waiter);
+
+		if (waiter->task)
+			schedule_rt_mutex(lock);
+
+		spin_lock(&lock->wait_lock);
+		set_current_state(state);
+	}
+
+	return ret;
+}
+
 /*
  * Slow path lock function:
  */
@@ -636,62 +717,8 @@
 			timeout->task = NULL;
 	}
 
-	for (;;) {
-		/* Try to acquire the lock: */
-		if (try_to_take_rt_mutex(lock))
-			break;
-
-		/*
-		 * TASK_INTERRUPTIBLE checks for signals and
-		 * timeout. Ignored otherwise.
-		 */
-		if (unlikely(state == TASK_INTERRUPTIBLE)) {
-			/* Signal pending? */
-			if (signal_pending(current))
-				ret = -EINTR;
-			if (timeout && !timeout->task)
-				ret = -ETIMEDOUT;
-			if (ret)
-				break;
-		}
-
-		/*
-		 * waiter.task is NULL the first time we come here and
-		 * when we have been woken up by the previous owner
-		 * but the lock got stolen by a higher prio task.
-		 */
-		if (!waiter.task) {
-			ret = task_blocks_on_rt_mutex(lock, &waiter,
-						      detect_deadlock);
-			/*
-			 * If we got woken up by the owner then start loop
-			 * all over without going into schedule to try
-			 * to get the lock now:
-			 */
-			if (unlikely(!waiter.task)) {
-				/*
-				 * Reset the return value. We might
-				 * have returned with -EDEADLK and the
-				 * owner released the lock while we
-				 * were walking the pi chain.
-				 */
-				ret = 0;
-				continue;
-			}
-			if (unlikely(ret))
-				break;
-		}
-
-		spin_unlock(&lock->wait_lock);
-
-		debug_rt_mutex_print_deadlock(&waiter);
-
-		if (waiter.task)
-			schedule_rt_mutex(lock);
-
-		spin_lock(&lock->wait_lock);
-		set_current_state(state);
-	}
+	ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
+				  detect_deadlock);
 
 	set_current_state(TASK_RUNNING);
 
@@ -864,9 +891,9 @@
 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
 
 /**
- * rt_mutex_lock_interruptible_ktime - lock a rt_mutex interruptible
- *				       the timeout structure is provided
- *				       by the caller
+ * rt_mutex_timed_lock - lock a rt_mutex interruptible
+ *			the timeout structure is provided
+ *			by the caller
  *
  * @lock: 		the rt_mutex to be locked
  * @timeout:		timeout structure or NULL (no timeout)
@@ -913,7 +940,7 @@
 }
 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
 
-/***
+/**
  * rt_mutex_destroy - mark a mutex unusable
  * @lock: the mutex to be destroyed
  *
@@ -986,6 +1013,59 @@
 }
 
 /**
+ * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
+ * @lock:		the rt_mutex to take
+ * @waiter:		the pre-initialized rt_mutex_waiter
+ * @task:		the task to prepare
+ * @detect_deadlock:	perform deadlock detection (1) or not (0)
+ *
+ * Returns:
+ *  0 - task blocked on lock
+ *  1 - acquired the lock for task, caller should wake it up
+ * <0 - error
+ *
+ * Special API call for FUTEX_REQUEUE_PI support.
+ */
+int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+			      struct rt_mutex_waiter *waiter,
+			      struct task_struct *task, int detect_deadlock)
+{
+	int ret;
+
+	spin_lock(&lock->wait_lock);
+
+	mark_rt_mutex_waiters(lock);
+
+	if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
+		/* We got the lock for task. */
+		debug_rt_mutex_lock(lock);
+
+		rt_mutex_set_owner(lock, task, 0);
+
+		rt_mutex_deadlock_account_lock(lock, task);
+		return 1;
+	}
+
+	ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
+
+
+	if (ret && !waiter->task) {
+		/*
+		 * Reset the return value. We might have
+		 * returned with -EDEADLK and the owner
+		 * released the lock while we were walking the
+		 * pi chain.  Let the waiter sort it out.
+		 */
+		ret = 0;
+	}
+	spin_unlock(&lock->wait_lock);
+
+	debug_rt_mutex_print_deadlock(waiter);
+
+	return ret;
+}
+
+/**
  * rt_mutex_next_owner - return the next owner of the lock
  *
  * @lock: the rt lock query
@@ -1004,3 +1084,57 @@
 
 	return rt_mutex_top_waiter(lock)->task;
 }
+
+/**
+ * rt_mutex_finish_proxy_lock() - Complete lock acquisition
+ * @lock:		the rt_mutex we were woken on
+ * @to:			the timeout, null if none. hrtimer should already have
+ * 			been started.
+ * @waiter:		the pre-initialized rt_mutex_waiter
+ * @detect_deadlock:	perform deadlock detection (1) or not (0)
+ *
+ * Complete the lock acquisition started our behalf by another thread.
+ *
+ * Returns:
+ *  0 - success
+ * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
+ *
+ * Special API call for PI-futex requeue support
+ */
+int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
+			       struct hrtimer_sleeper *to,
+			       struct rt_mutex_waiter *waiter,
+			       int detect_deadlock)
+{
+	int ret;
+
+	spin_lock(&lock->wait_lock);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter,
+				  detect_deadlock);
+
+	set_current_state(TASK_RUNNING);
+
+	if (unlikely(waiter->task))
+		remove_waiter(lock, waiter);
+
+	/*
+	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+	 * have to fix that up.
+	 */
+	fixup_rt_mutex_waiters(lock);
+
+	spin_unlock(&lock->wait_lock);
+
+	/*
+	 * Readjust priority, when we did not get the lock. We might have been
+	 * the pending owner and boosted. Since we did not take the lock, the
+	 * PI boost has to go.
+	 */
+	if (unlikely(ret))
+		rt_mutex_adjust_prio(current);
+
+	return ret;
+}
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h
index e124bf5..97a2f81 100644
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -120,6 +120,14 @@
 				       struct task_struct *proxy_owner);
 extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
 				  struct task_struct *proxy_owner);
+extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+				     struct rt_mutex_waiter *waiter,
+				     struct task_struct *task,
+				     int detect_deadlock);
+extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
+				      struct hrtimer_sleeper *to,
+				      struct rt_mutex_waiter *waiter,
+				      int detect_deadlock);
 
 #ifdef CONFIG_DEBUG_RT_MUTEXES
 # include "rtmutex-debug.h"
diff --git a/kernel/sched.c b/kernel/sched.c
index 26efa47..8ec9d13 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -39,6 +39,7 @@
 #include <linux/completion.h>
 #include <linux/kernel_stat.h>
 #include <linux/debug_locks.h>
+#include <linux/perf_counter.h>
 #include <linux/security.h>
 #include <linux/notifier.h>
 #include <linux/profile.h>
@@ -68,17 +69,18 @@
 #include <linux/pagemap.h>
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
-#include <linux/bootmem.h>
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
-#include <trace/sched.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
 
 #include "sched_cpupri.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/sched.h>
+
 /*
  * Convert user-nice values [ -20 ... 0 ... 19 ]
  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -118,12 +120,6 @@
  */
 #define RUNTIME_INF	((u64)~0ULL)
 
-DEFINE_TRACE(sched_wait_task);
-DEFINE_TRACE(sched_wakeup);
-DEFINE_TRACE(sched_wakeup_new);
-DEFINE_TRACE(sched_switch);
-DEFINE_TRACE(sched_migrate_task);
-
 #ifdef CONFIG_SMP
 
 static void double_rq_lock(struct rq *rq1, struct rq *rq2);
@@ -584,6 +580,7 @@
 	struct load_weight load;
 	unsigned long nr_load_updates;
 	u64 nr_switches;
+	u64 nr_migrations_in;
 
 	struct cfs_rq cfs;
 	struct rt_rq rt;
@@ -630,6 +627,10 @@
 	struct list_head migration_queue;
 #endif
 
+	/* calc_load related fields */
+	unsigned long calc_load_update;
+	long calc_load_active;
+
 #ifdef CONFIG_SCHED_HRTICK
 #ifdef CONFIG_SMP
 	int hrtick_csd_pending;
@@ -692,7 +693,7 @@
 #define task_rq(p)		cpu_rq(task_cpu(p))
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 
-static inline void update_rq_clock(struct rq *rq)
+inline void update_rq_clock(struct rq *rq)
 {
 	rq->clock = sched_clock_cpu(cpu_of(rq));
 }
@@ -1728,6 +1729,8 @@
 }
 #endif
 
+static void calc_load_account_active(struct rq *this_rq);
+
 #include "sched_stats.h"
 #include "sched_idletask.c"
 #include "sched_fair.c"
@@ -1958,7 +1961,7 @@
 
 	clock_offset = old_rq->clock - new_rq->clock;
 
-	trace_sched_migrate_task(p, task_cpu(p), new_cpu);
+	trace_sched_migrate_task(p, new_cpu);
 
 #ifdef CONFIG_SCHEDSTATS
 	if (p->se.wait_start)
@@ -1967,12 +1970,16 @@
 		p->se.sleep_start -= clock_offset;
 	if (p->se.block_start)
 		p->se.block_start -= clock_offset;
+#endif
 	if (old_cpu != new_cpu) {
-		schedstat_inc(p, se.nr_migrations);
+		p->se.nr_migrations++;
+		new_rq->nr_migrations_in++;
+#ifdef CONFIG_SCHEDSTATS
 		if (task_hot(p, old_rq->clock, NULL))
 			schedstat_inc(p, se.nr_forced2_migrations);
-	}
 #endif
+		perf_counter_task_migration(p, new_cpu);
+	}
 	p->se.vruntime -= old_cfsrq->min_vruntime -
 					 new_cfsrq->min_vruntime;
 
@@ -2015,6 +2022,49 @@
 }
 
 /*
+ * wait_task_context_switch -	wait for a thread to complete at least one
+ *				context switch.
+ *
+ * @p must not be current.
+ */
+void wait_task_context_switch(struct task_struct *p)
+{
+	unsigned long nvcsw, nivcsw, flags;
+	int running;
+	struct rq *rq;
+
+	nvcsw	= p->nvcsw;
+	nivcsw	= p->nivcsw;
+	for (;;) {
+		/*
+		 * The runqueue is assigned before the actual context
+		 * switch. We need to take the runqueue lock.
+		 *
+		 * We could check initially without the lock but it is
+		 * very likely that we need to take the lock in every
+		 * iteration.
+		 */
+		rq = task_rq_lock(p, &flags);
+		running = task_running(rq, p);
+		task_rq_unlock(rq, &flags);
+
+		if (likely(!running))
+			break;
+		/*
+		 * The switch count is incremented before the actual
+		 * context switch. We thus wait for two switches to be
+		 * sure at least one completed.
+		 */
+		if ((p->nvcsw - nvcsw) > 1)
+			break;
+		if ((p->nivcsw - nivcsw) > 1)
+			break;
+
+		cpu_relax();
+	}
+}
+
+/*
  * wait_task_inactive - wait for a thread to unschedule.
  *
  * If @match_state is nonzero, it's the @p->state value just checked and
@@ -2142,6 +2192,7 @@
 		smp_send_reschedule(cpu);
 	preempt_enable();
 }
+EXPORT_SYMBOL_GPL(kick_process);
 
 /*
  * Return a low guess at the load of a migration-source cpu weighted
@@ -2324,6 +2375,27 @@
 
 #endif /* CONFIG_SMP */
 
+/**
+ * task_oncpu_function_call - call a function on the cpu on which a task runs
+ * @p:		the task to evaluate
+ * @func:	the function to be called
+ * @info:	the function call argument
+ *
+ * Calls the function @func when the task is currently running. This might
+ * be on the current CPU, which just calls the function directly
+ */
+void task_oncpu_function_call(struct task_struct *p,
+			      void (*func) (void *info), void *info)
+{
+	int cpu;
+
+	preempt_disable();
+	cpu = task_cpu(p);
+	if (task_curr(p))
+		smp_call_function_single(cpu, func, info, 1);
+	preempt_enable();
+}
+
 /***
  * try_to_wake_up - wake up a thread
  * @p: the to-be-woken-up thread
@@ -2458,6 +2530,17 @@
 	return success;
 }
 
+/**
+ * wake_up_process - Wake up a specific process
+ * @p: The process to be woken up.
+ *
+ * Attempt to wake up the nominated process and move it to the set of runnable
+ * processes.  Returns 1 if the process was woken up, 0 if it was already
+ * running.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
+ */
 int wake_up_process(struct task_struct *p)
 {
 	return try_to_wake_up(p, TASK_ALL, 0);
@@ -2480,6 +2563,7 @@
 	p->se.exec_start		= 0;
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
+	p->se.nr_migrations		= 0;
 	p->se.last_wakeup		= 0;
 	p->se.avg_overlap		= 0;
 	p->se.start_runtime		= 0;
@@ -2710,6 +2794,7 @@
 	 */
 	prev_state = prev->state;
 	finish_arch_switch(prev);
+	perf_counter_task_sched_in(current, cpu_of(rq));
 	finish_lock_switch(rq, prev);
 #ifdef CONFIG_SMP
 	if (post_schedule)
@@ -2766,7 +2851,7 @@
 	 * combine the page table reload and the switch backend into
 	 * one hypercall.
 	 */
-	arch_enter_lazy_cpu_mode();
+	arch_start_context_switch(prev);
 
 	if (unlikely(!mm)) {
 		next->active_mm = oldmm;
@@ -2856,19 +2941,81 @@
 	return sum;
 }
 
-unsigned long nr_active(void)
+/* Variables and functions for calc_load */
+static atomic_long_t calc_load_tasks;
+static unsigned long calc_load_update;
+unsigned long avenrun[3];
+EXPORT_SYMBOL(avenrun);
+
+/**
+ * get_avenrun - get the load average array
+ * @loads:	pointer to dest load array
+ * @offset:	offset to add
+ * @shift:	shift count to shift the result left
+ *
+ * These values are estimates at best, so no need for locking.
+ */
+void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
 {
-	unsigned long i, running = 0, uninterruptible = 0;
+	loads[0] = (avenrun[0] + offset) << shift;
+	loads[1] = (avenrun[1] + offset) << shift;
+	loads[2] = (avenrun[2] + offset) << shift;
+}
 
-	for_each_online_cpu(i) {
-		running += cpu_rq(i)->nr_running;
-		uninterruptible += cpu_rq(i)->nr_uninterruptible;
+static unsigned long
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
+{
+	load *= exp;
+	load += active * (FIXED_1 - exp);
+	return load >> FSHIFT;
+}
+
+/*
+ * calc_load - update the avenrun load estimates 10 ticks after the
+ * CPUs have updated calc_load_tasks.
+ */
+void calc_global_load(void)
+{
+	unsigned long upd = calc_load_update + 10;
+	long active;
+
+	if (time_before(jiffies, upd))
+		return;
+
+	active = atomic_long_read(&calc_load_tasks);
+	active = active > 0 ? active * FIXED_1 : 0;
+
+	avenrun[0] = calc_load(avenrun[0], EXP_1, active);
+	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
+	avenrun[2] = calc_load(avenrun[2], EXP_15, active);
+
+	calc_load_update += LOAD_FREQ;
+}
+
+/*
+ * Either called from update_cpu_load() or from a cpu going idle
+ */
+static void calc_load_account_active(struct rq *this_rq)
+{
+	long nr_active, delta;
+
+	nr_active = this_rq->nr_running;
+	nr_active += (long) this_rq->nr_uninterruptible;
+
+	if (nr_active != this_rq->calc_load_active) {
+		delta = nr_active - this_rq->calc_load_active;
+		this_rq->calc_load_active = nr_active;
+		atomic_long_add(delta, &calc_load_tasks);
 	}
+}
 
-	if (unlikely((long)uninterruptible < 0))
-		uninterruptible = 0;
-
-	return running + uninterruptible;
+/*
+ * Externally visible per-cpu scheduler statistics:
+ * cpu_nr_migrations(cpu) - number of migrations into that cpu
+ */
+u64 cpu_nr_migrations(int cpu)
+{
+	return cpu_rq(cpu)->nr_migrations_in;
 }
 
 /*
@@ -2899,6 +3046,11 @@
 			new_load += scale-1;
 		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
 	}
+
+	if (time_after_eq(jiffies, this_rq->calc_load_update)) {
+		this_rq->calc_load_update += LOAD_FREQ;
+		calc_load_account_active(this_rq);
+	}
 }
 
 #ifdef CONFIG_SMP
@@ -4240,10 +4392,126 @@
 static struct {
 	atomic_t load_balancer;
 	cpumask_var_t cpu_mask;
+	cpumask_var_t ilb_grp_nohz_mask;
 } nohz ____cacheline_aligned = {
 	.load_balancer = ATOMIC_INIT(-1),
 };
 
+#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+/**
+ * lowest_flag_domain - Return lowest sched_domain containing flag.
+ * @cpu:	The cpu whose lowest level of sched domain is to
+ *		be returned.
+ * @flag:	The flag to check for the lowest sched_domain
+ *		for the given cpu.
+ *
+ * Returns the lowest sched_domain of a cpu which contains the given flag.
+ */
+static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
+{
+	struct sched_domain *sd;
+
+	for_each_domain(cpu, sd)
+		if (sd && (sd->flags & flag))
+			break;
+
+	return sd;
+}
+
+/**
+ * for_each_flag_domain - Iterates over sched_domains containing the flag.
+ * @cpu:	The cpu whose domains we're iterating over.
+ * @sd:		variable holding the value of the power_savings_sd
+ *		for cpu.
+ * @flag:	The flag to filter the sched_domains to be iterated.
+ *
+ * Iterates over all the scheduler domains for a given cpu that has the 'flag'
+ * set, starting from the lowest sched_domain to the highest.
+ */
+#define for_each_flag_domain(cpu, sd, flag) \
+	for (sd = lowest_flag_domain(cpu, flag); \
+		(sd && (sd->flags & flag)); sd = sd->parent)
+
+/**
+ * is_semi_idle_group - Checks if the given sched_group is semi-idle.
+ * @ilb_group:	group to be checked for semi-idleness
+ *
+ * Returns:	1 if the group is semi-idle. 0 otherwise.
+ *
+ * We define a sched_group to be semi idle if it has atleast one idle-CPU
+ * and atleast one non-idle CPU. This helper function checks if the given
+ * sched_group is semi-idle or not.
+ */
+static inline int is_semi_idle_group(struct sched_group *ilb_group)
+{
+	cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask,
+					sched_group_cpus(ilb_group));
+
+	/*
+	 * A sched_group is semi-idle when it has atleast one busy cpu
+	 * and atleast one idle cpu.
+	 */
+	if (cpumask_empty(nohz.ilb_grp_nohz_mask))
+		return 0;
+
+	if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group)))
+		return 0;
+
+	return 1;
+}
+/**
+ * find_new_ilb - Finds the optimum idle load balancer for nomination.
+ * @cpu:	The cpu which is nominating a new idle_load_balancer.
+ *
+ * Returns:	Returns the id of the idle load balancer if it exists,
+ *		Else, returns >= nr_cpu_ids.
+ *
+ * This algorithm picks the idle load balancer such that it belongs to a
+ * semi-idle powersavings sched_domain. The idea is to try and avoid
+ * completely idle packages/cores just for the purpose of idle load balancing
+ * when there are other idle cpu's which are better suited for that job.
+ */
+static int find_new_ilb(int cpu)
+{
+	struct sched_domain *sd;
+	struct sched_group *ilb_group;
+
+	/*
+	 * Have idle load balancer selection from semi-idle packages only
+	 * when power-aware load balancing is enabled
+	 */
+	if (!(sched_smt_power_savings || sched_mc_power_savings))
+		goto out_done;
+
+	/*
+	 * Optimize for the case when we have no idle CPUs or only one
+	 * idle CPU. Don't walk the sched_domain hierarchy in such cases
+	 */
+	if (cpumask_weight(nohz.cpu_mask) < 2)
+		goto out_done;
+
+	for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
+		ilb_group = sd->groups;
+
+		do {
+			if (is_semi_idle_group(ilb_group))
+				return cpumask_first(nohz.ilb_grp_nohz_mask);
+
+			ilb_group = ilb_group->next;
+
+		} while (ilb_group != sd->groups);
+	}
+
+out_done:
+	return cpumask_first(nohz.cpu_mask);
+}
+#else /*  (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
+static inline int find_new_ilb(int call_cpu)
+{
+	return cpumask_first(nohz.cpu_mask);
+}
+#endif
+
 /*
  * This routine will try to nominate the ilb (idle load balancing)
  * owner among the cpus whose ticks are stopped. ilb owner will do the idle
@@ -4298,8 +4566,24 @@
 			/* make me the ilb owner */
 			if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1)
 				return 1;
-		} else if (atomic_read(&nohz.load_balancer) == cpu)
+		} else if (atomic_read(&nohz.load_balancer) == cpu) {
+			int new_ilb;
+
+			if (!(sched_smt_power_savings ||
+						sched_mc_power_savings))
+				return 1;
+			/*
+			 * Check to see if there is a more power-efficient
+			 * ilb.
+			 */
+			new_ilb = find_new_ilb(cpu);
+			if (new_ilb < nr_cpu_ids && new_ilb != cpu) {
+				atomic_set(&nohz.load_balancer, -1);
+				resched_cpu(new_ilb);
+				return 0;
+			}
 			return 1;
+		}
 	} else {
 		if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
 			return 0;
@@ -4468,15 +4752,7 @@
 		}
 
 		if (atomic_read(&nohz.load_balancer) == -1) {
-			/*
-			 * simple selection for now: Nominate the
-			 * first cpu in the nohz list to be the next
-			 * ilb owner.
-			 *
-			 * TBD: Traverse the sched domains and nominate
-			 * the nearest cpu in the nohz.cpu_mask.
-			 */
-			int ilb = cpumask_first(nohz.cpu_mask);
+			int ilb = find_new_ilb(cpu);
 
 			if (ilb < nr_cpu_ids)
 				resched_cpu(ilb);
@@ -4840,6 +5116,8 @@
 	curr->sched_class->task_tick(rq, curr, 0);
 	spin_unlock(&rq->lock);
 
+	perf_counter_task_tick(curr, cpu);
+
 #ifdef CONFIG_SMP
 	rq->idle_at_tick = idle_cpu(cpu);
 	trigger_load_balance(rq, cpu);
@@ -5007,13 +5285,15 @@
 /*
  * schedule() is the main scheduler function.
  */
-asmlinkage void __sched __schedule(void)
+asmlinkage void __sched schedule(void)
 {
 	struct task_struct *prev, *next;
 	unsigned long *switch_count;
 	struct rq *rq;
 	int cpu;
 
+need_resched:
+	preempt_disable();
 	cpu = smp_processor_id();
 	rq = cpu_rq(cpu);
 	rcu_qsctr_inc(cpu);
@@ -5053,6 +5333,7 @@
 
 	if (likely(prev != next)) {
 		sched_info_switch(prev, next);
+		perf_counter_task_sched_out(prev, next, cpu);
 
 		rq->nr_switches++;
 		rq->curr = next;
@@ -5070,15 +5351,9 @@
 
 	if (unlikely(reacquire_kernel_lock(current) < 0))
 		goto need_resched_nonpreemptible;
-}
 
-asmlinkage void __sched schedule(void)
-{
-need_resched:
-	preempt_disable();
-	__schedule();
 	preempt_enable_no_resched();
-	if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
+	if (need_resched())
 		goto need_resched;
 }
 EXPORT_SYMBOL(schedule);
@@ -5221,7 +5496,7 @@
  * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
  * zero in this (rare) case, and we handle it by continuing to scan the queue.
  */
-void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
 			int nr_exclusive, int sync, void *key)
 {
 	wait_queue_t *curr, *next;
@@ -5241,6 +5516,9 @@
  * @mode: which threads
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
  * @key: is directly passed to the wakeup function
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
  */
 void __wake_up(wait_queue_head_t *q, unsigned int mode,
 			int nr_exclusive, void *key)
@@ -5279,6 +5557,9 @@
  * with each other. This can prevent needless bouncing between CPUs.
  *
  * On UP it can prevent extra preemption.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
  */
 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
 			int nr_exclusive, void *key)
@@ -5315,6 +5596,9 @@
  * awakened in the same order in which they were queued.
  *
  * See also complete_all(), wait_for_completion() and related routines.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
  */
 void complete(struct completion *x)
 {
@@ -5332,6 +5616,9 @@
  * @x:  holds the state of this particular completion
  *
  * This will wake up all threads waiting on this particular completion event.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
  */
 void complete_all(struct completion *x)
 {
@@ -6490,8 +6777,9 @@
 #ifdef CONFIG_DEBUG_STACK_USAGE
 	free = stack_not_used(p);
 #endif
-	printk(KERN_CONT "%5lu %5d %6d\n", free,
-		task_pid_nr(p), task_pid_nr(p->real_parent));
+	printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
+		task_pid_nr(p), task_pid_nr(p->real_parent),
+		(unsigned long)task_thread_info(p)->flags);
 
 	show_stack(p, NULL);
 }
@@ -6970,6 +7258,14 @@
 
 	}
 }
+
+/*
+ * remove the tasks which were accounted by rq from calc_load_tasks.
+ */
+static void calc_global_load_remove(struct rq *rq)
+{
+	atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
+}
 #endif /* CONFIG_HOTPLUG_CPU */
 
 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
@@ -7204,6 +7500,8 @@
 		/* Update our root-domain */
 		rq = cpu_rq(cpu);
 		spin_lock_irqsave(&rq->lock, flags);
+		rq->calc_load_update = calc_load_update;
+		rq->calc_load_active = 0;
 		if (rq->rd) {
 			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
 
@@ -7243,7 +7541,7 @@
 		cpuset_unlock();
 		migrate_nr_uninterruptible(rq);
 		BUG_ON(rq->nr_running != 0);
-
+		calc_global_load_remove(rq);
 		/*
 		 * No need to migrate the tasks: it was best-effort if
 		 * they didn't take sched_hotcpu_mutex. Just wake up
@@ -7279,8 +7577,10 @@
 	return NOTIFY_OK;
 }
 
-/* Register at highest priority so that task migration (migrate_all_tasks)
- * happens before everything else.
+/*
+ * Register at high priority so that task migration (migrate_all_tasks)
+ * happens before everything else.  This has to be lower priority than
+ * the notifier in the perf_counter subsystem, though.
  */
 static struct notifier_block __cpuinitdata migration_notifier = {
 	.notifier_call = migration_call,
@@ -7525,24 +7825,21 @@
 
 static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
 {
+	gfp_t gfp = GFP_KERNEL;
+
 	memset(rd, 0, sizeof(*rd));
 
-	if (bootmem) {
-		alloc_bootmem_cpumask_var(&def_root_domain.span);
-		alloc_bootmem_cpumask_var(&def_root_domain.online);
-		alloc_bootmem_cpumask_var(&def_root_domain.rto_mask);
-		cpupri_init(&rd->cpupri, true);
-		return 0;
-	}
+	if (bootmem)
+		gfp = GFP_NOWAIT;
 
-	if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
+	if (!alloc_cpumask_var(&rd->span, gfp))
 		goto out;
-	if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
+	if (!alloc_cpumask_var(&rd->online, gfp))
 		goto free_span;
-	if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
+	if (!alloc_cpumask_var(&rd->rto_mask, gfp))
 		goto free_online;
 
-	if (cpupri_init(&rd->cpupri, false) != 0)
+	if (cpupri_init(&rd->cpupri, bootmem) != 0)
 		goto free_rto_mask;
 	return 0;
 
@@ -7753,8 +8050,9 @@
 
 /*
  * The cpus mask in sched_group and sched_domain hangs off the end.
- * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space
- * for nr_cpu_ids < CONFIG_NR_CPUS.
+ *
+ * ( See the the comments in include/linux/sched.h:struct sched_group
+ *   and struct sched_domain. )
  */
 struct static_sched_group {
 	struct sched_group sg;
@@ -7875,7 +8173,7 @@
 			struct sched_domain *sd;
 
 			sd = &per_cpu(phys_domains, j).sd;
-			if (j != cpumask_first(sched_group_cpus(sd->groups))) {
+			if (j != group_first_cpu(sd->groups)) {
 				/*
 				 * Only add "power" once for each
 				 * physical package.
@@ -7953,7 +8251,7 @@
 
 	WARN_ON(!sd || !sd->groups);
 
-	if (cpu != cpumask_first(sched_group_cpus(sd->groups)))
+	if (cpu != group_first_cpu(sd->groups))
 		return;
 
 	child = sd->child;
@@ -8865,7 +9163,7 @@
 	 * we use alloc_bootmem().
 	 */
 	if (alloc_size) {
-		ptr = (unsigned long)alloc_bootmem(alloc_size);
+		ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 		init_task_group.se = (struct sched_entity **)ptr;
@@ -8938,6 +9236,8 @@
 		rq = cpu_rq(i);
 		spin_lock_init(&rq->lock);
 		rq->nr_running = 0;
+		rq->calc_load_active = 0;
+		rq->calc_load_update = jiffies + LOAD_FREQ;
 		init_cfs_rq(&rq->cfs, rq);
 		init_rt_rq(&rq->rt, rq);
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -8958,7 +9258,7 @@
 		 * 1024) and two child groups A0 and A1 (of weight 1024 each),
 		 * then A0's share of the cpu resource is:
 		 *
-		 * 	A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
+		 *	A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
 		 *
 		 * We achieve this by letting init_task_group's tasks sit
 		 * directly in rq->cfs (i.e init_task_group->se[] = NULL).
@@ -9045,20 +9345,26 @@
 	 * when this runqueue becomes "idle".
 	 */
 	init_idle(current, smp_processor_id());
+
+	calc_load_update = jiffies + LOAD_FREQ;
+
 	/*
 	 * During early bootup we pretend to be a normal task:
 	 */
 	current->sched_class = &fair_sched_class;
 
 	/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
-	alloc_bootmem_cpumask_var(&nohz_cpu_mask);
+	alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
 #ifdef CONFIG_SMP
 #ifdef CONFIG_NO_HZ
-	alloc_bootmem_cpumask_var(&nohz.cpu_mask);
+	alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
+	alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT);
 #endif
-	alloc_bootmem_cpumask_var(&cpu_isolated_map);
+	alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
 #endif /* SMP */
 
+	perf_counter_init();
+
 	scheduler_running = 1;
 }
 
@@ -9800,6 +10106,13 @@
 	if (sysctl_sched_rt_period <= 0)
 		return -EINVAL;
 
+	/*
+	 * There's always some RT tasks in the root group
+	 * -- migration, kstopmachine etc..
+	 */
+	if (sysctl_sched_rt_runtime == 0)
+		return -EBUSY;
+
 	spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
 	for_each_possible_cpu(i) {
 		struct rt_rq *rt_rq = &cpu_rq(i)->rt;
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index cdd3c89..7deffc9 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -154,8 +154,12 @@
  */
 int __init_refok cpupri_init(struct cpupri *cp, bool bootmem)
 {
+	gfp_t gfp = GFP_KERNEL;
 	int i;
 
+	if (bootmem)
+		gfp = GFP_NOWAIT;
+
 	memset(cp, 0, sizeof(*cp));
 
 	for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
@@ -163,9 +167,7 @@
 
 		spin_lock_init(&vec->lock);
 		vec->count = 0;
-		if (bootmem)
-			alloc_bootmem_cpumask_var(&vec->mask);
-		else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL))
+		if (!zalloc_cpumask_var(&vec->mask, gfp))
 			goto cleanup;
 	}
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 3816f21..5f9650e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1487,17 +1487,10 @@
 
 	find_matching_se(&se, &pse);
 
-	while (se) {
-		BUG_ON(!pse);
+	BUG_ON(!pse);
 
-		if (wakeup_preempt_entity(se, pse) == 1) {
-			resched_task(curr);
-			break;
-		}
-
-		se = parent_entity(se);
-		pse = parent_entity(pse);
-	}
+	if (wakeup_preempt_entity(se, pse) == 1)
+		resched_task(curr);
 }
 
 static struct task_struct *pick_next_task_fair(struct rq *rq)
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 8a21a2e..499672c 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -22,7 +22,8 @@
 static struct task_struct *pick_next_task_idle(struct rq *rq)
 {
 	schedstat_inc(rq, sched_goidle);
-
+	/* adjust the active tasks as we might go into a long sleep */
+	calc_load_account_active(rq);
 	return rq->idle;
 }
 
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index f2c66f8..9bf0d2a 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1591,7 +1591,7 @@
 	unsigned int i;
 
 	for_each_possible_cpu(i)
-		alloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
+		zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
 					GFP_KERNEL, cpu_to_node(i));
 }
 #endif /* CONFIG_SMP */
diff --git a/kernel/signal.c b/kernel/signal.c
index d803473..809a228 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -27,7 +27,7 @@
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
 #include <linux/nsproxy.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include <asm/param.h>
 #include <asm/uaccess.h>
@@ -41,8 +41,6 @@
 
 static struct kmem_cache *sigqueue_cachep;
 
-DEFINE_TRACE(sched_signal_send);
-
 static void __user *sig_handler(struct task_struct *t, int sig)
 {
 	return t->sighand->action[sig - 1].sa.sa_handler;
@@ -249,14 +247,19 @@
 /*
  * Flush all pending signals for a task.
  */
+void __flush_signals(struct task_struct *t)
+{
+	clear_tsk_thread_flag(t, TIF_SIGPENDING);
+	flush_sigqueue(&t->pending);
+	flush_sigqueue(&t->signal->shared_pending);
+}
+
 void flush_signals(struct task_struct *t)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&t->sighand->siglock, flags);
-	clear_tsk_thread_flag(t, TIF_SIGPENDING);
-	flush_sigqueue(&t->pending);
-	flush_sigqueue(&t->signal->shared_pending);
+	__flush_signals(t);
 	spin_unlock_irqrestore(&t->sighand->siglock, flags);
 }
 
@@ -2278,24 +2281,17 @@
 	return kill_something_info(sig, &info, pid);
 }
 
-static int do_tkill(pid_t tgid, pid_t pid, int sig)
+static int
+do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
 {
-	int error;
-	struct siginfo info;
 	struct task_struct *p;
 	unsigned long flags;
-
-	error = -ESRCH;
-	info.si_signo = sig;
-	info.si_errno = 0;
-	info.si_code = SI_TKILL;
-	info.si_pid = task_tgid_vnr(current);
-	info.si_uid = current_uid();
+	int error = -ESRCH;
 
 	rcu_read_lock();
 	p = find_task_by_vpid(pid);
 	if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
-		error = check_kill_permission(sig, &info, p);
+		error = check_kill_permission(sig, info, p);
 		/*
 		 * The null signal is a permissions and process existence
 		 * probe.  No signal is actually delivered.
@@ -2305,7 +2301,7 @@
 		 * signal is private anyway.
 		 */
 		if (!error && sig && lock_task_sighand(p, &flags)) {
-			error = specific_send_sig_info(sig, &info, p);
+			error = specific_send_sig_info(sig, info, p);
 			unlock_task_sighand(p, &flags);
 		}
 	}
@@ -2314,6 +2310,19 @@
 	return error;
 }
 
+static int do_tkill(pid_t tgid, pid_t pid, int sig)
+{
+	struct siginfo info;
+
+	info.si_signo = sig;
+	info.si_errno = 0;
+	info.si_code = SI_TKILL;
+	info.si_pid = task_tgid_vnr(current);
+	info.si_uid = current_uid();
+
+	return do_send_specific(tgid, pid, sig, &info);
+}
+
 /**
  *  sys_tgkill - send signal to one specific thread
  *  @tgid: the thread group ID of the thread
@@ -2363,6 +2372,32 @@
 	return kill_proc_info(sig, &info, pid);
 }
 
+long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
+{
+	/* This is only valid for single tasks */
+	if (pid <= 0 || tgid <= 0)
+		return -EINVAL;
+
+	/* Not even root can pretend to send signals from the kernel.
+	   Nor can they impersonate a kill(), which adds source info.  */
+	if (info->si_code >= 0)
+		return -EPERM;
+	info->si_signo = sig;
+
+	return do_send_specific(tgid, pid, sig, info);
+}
+
+SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig,
+		siginfo_t __user *, uinfo)
+{
+	siginfo_t info;
+
+	if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
+		return -EFAULT;
+
+	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
+}
+
 int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 {
 	struct task_struct *t = current;
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index b28d1913..521ed20 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -372,8 +372,8 @@
 		vsmax *= atomic_read(&slow_work_thread_count);
 		vsmax /= 100;
 
-		prepare_to_wait(&slow_work_thread_wq, &wait,
-				TASK_INTERRUPTIBLE);
+		prepare_to_wait_exclusive(&slow_work_thread_wq, &wait,
+					  TASK_INTERRUPTIBLE);
 		if (!freezing(current) &&
 		    !slow_work_threads_should_exit &&
 		    !slow_work_available(vsmax) &&
diff --git a/kernel/smp.c b/kernel/smp.c
index 858baac..ad63d85 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -52,7 +52,7 @@
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
-		if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
+		if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
 				cpu_to_node(cpu)))
 			return NOTIFY_BAD;
 		break;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b525dd3..258885a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -24,7 +24,9 @@
 #include <linux/ftrace.h>
 #include <linux/smp.h>
 #include <linux/tick.h>
-#include <trace/irq.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/irq.h>
 
 #include <asm/irq.h>
 /*
@@ -186,9 +188,6 @@
  */
 #define MAX_SOFTIRQ_RESTART 10
 
-DEFINE_TRACE(softirq_entry);
-DEFINE_TRACE(softirq_exit);
-
 asmlinkage void __do_softirq(void)
 {
 	struct softirq_action *h;
@@ -828,7 +827,7 @@
 	return 0;
 }
 
-int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
+int __weak arch_init_chip_data(struct irq_desc *desc, int node)
 {
 	return 0;
 }
diff --git a/kernel/sys.c b/kernel/sys.c
index e7998cf..438d99a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -14,6 +14,7 @@
 #include <linux/prctl.h>
 #include <linux/highuid.h>
 #include <linux/fs.h>
+#include <linux/perf_counter.h>
 #include <linux/resource.h>
 #include <linux/kernel.h>
 #include <linux/kexec.h>
@@ -1793,6 +1794,12 @@
 		case PR_SET_TSC:
 			error = SET_TSC_CTL(arg2);
 			break;
+		case PR_TASK_PERF_COUNTERS_DISABLE:
+			error = perf_counter_task_disable();
+			break;
+		case PR_TASK_PERF_COUNTERS_ENABLE:
+			error = perf_counter_task_enable();
+			break;
 		case PR_GET_TIMERSLACK:
 			error = current->timer_slack_ns;
 			break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 27dad29..68320f6 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -175,3 +175,6 @@
 cond_syscall(compat_sys_timerfd_gettime);
 cond_syscall(sys_eventfd);
 cond_syscall(sys_eventfd2);
+
+/* performance counters: */
+cond_syscall(sys_perf_counter_open);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b2970d5..ce664f9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -49,6 +49,7 @@
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
 #include <linux/slow-work.h>
+#include <linux/perf_counter.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -114,6 +115,7 @@
 
 #ifdef CONFIG_MODULES
 extern char modprobe_path[];
+extern int modules_disabled;
 #endif
 #ifdef CONFIG_CHR_DEV_SG
 extern int sg_big_buff;
@@ -534,6 +536,17 @@
 		.proc_handler	= &proc_dostring,
 		.strategy	= &sysctl_string,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "modules_disabled",
+		.data		= &modules_disabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		/* only handle a transition from default "0" to "1" */
+		.proc_handler	= &proc_dointvec_minmax,
+		.extra1		= &one,
+		.extra2		= &one,
+	},
 #endif
 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
 	{
@@ -731,6 +744,14 @@
 	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "bootloader_version",
+		.data		= &bootloader_version,
+		.maxlen		= sizeof (int),
+		.mode		= 0444,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "kstack_depth_to_print",
 		.data		= &kstack_depth_to_print,
 		.maxlen		= sizeof(int),
@@ -912,6 +933,32 @@
 		.child		= slow_work_sysctls,
 	},
 #endif
+#ifdef CONFIG_PERF_COUNTERS
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "perf_counter_paranoid",
+		.data		= &sysctl_perf_counter_paranoid,
+		.maxlen		= sizeof(sysctl_perf_counter_paranoid),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "perf_counter_mlock_kb",
+		.data		= &sysctl_perf_counter_mlock,
+		.maxlen		= sizeof(sysctl_perf_counter_mlock),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "perf_counter_max_sample_rate",
+		.data		= &sysctl_perf_counter_sample_rate,
+		.maxlen		= sizeof(sysctl_perf_counter_sample_rate),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
@@ -1225,7 +1272,6 @@
 		.strategy	= &sysctl_jiffies,
 	},
 #endif
-#ifdef CONFIG_SECURITY
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "mmap_min_addr",
@@ -1234,7 +1280,6 @@
 		.mode		= 0644,
 		.proc_handler	= &proc_doulongvec_minmax,
 	},
-#endif
 #ifdef CONFIG_NUMA
 	{
 		.ctl_name	= CTL_UNNUMBERED,
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index ecfd7b5..80189f6 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -402,9 +402,6 @@
 	unsigned long flags;
 	int ret;
 
-	/* save mult_orig on registration */
-	c->mult_orig = c->mult;
-
 	spin_lock_irqsave(&clocksource_lock, flags);
 	ret = clocksource_enqueue(c);
 	if (!ret)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 687dff4..e8c77d9 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -22,7 +22,7 @@
 
 /*
  * This read-write spinlock protects us from races in SMP while
- * playing with xtime and avenrun.
+ * playing with xtime.
  */
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
 
@@ -77,6 +77,10 @@
 	clock->cycle_last = cycle_now;
 
 	nsec = cyc2ns(clock, cycle_delta);
+
+	/* If arch requires, add in gettimeoffset() */
+	nsec += arch_gettimeoffset();
+
 	timespec_add_ns(&xtime, nsec);
 
 	nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
@@ -111,6 +115,9 @@
 		/* convert to nanoseconds: */
 		nsecs = cyc2ns(clock, cycle_delta);
 
+		/* If arch requires, add in gettimeoffset() */
+		nsecs += arch_gettimeoffset();
+
 	} while (read_seqretry(&xtime_lock, seq));
 
 	timespec_add_ns(ts, nsecs);
diff --git a/kernel/timer.c b/kernel/timer.c
index cffffad..c01e568 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,6 +37,7 @@
 #include <linux/delay.h>
 #include <linux/tick.h>
 #include <linux/kallsyms.h>
+#include <linux/perf_counter.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -1123,53 +1124,14 @@
 }
 
 /*
- * Nr of active tasks - counted in fixed-point numbers
- */
-static unsigned long count_active_tasks(void)
-{
-	return nr_active() * FIXED_1;
-}
-
-/*
- * Hmm.. Changed this, as the GNU make sources (load.c) seems to
- * imply that avenrun[] is the standard name for this kind of thing.
- * Nothing else seems to be standardized: the fractional size etc
- * all seem to differ on different machines.
- *
- * Requires xtime_lock to access.
- */
-unsigned long avenrun[3];
-
-EXPORT_SYMBOL(avenrun);
-
-/*
- * calc_load - given tick count, update the avenrun load estimates.
- * This is called while holding a write_lock on xtime_lock.
- */
-static inline void calc_load(unsigned long ticks)
-{
-	unsigned long active_tasks; /* fixed-point */
-	static int count = LOAD_FREQ;
-
-	count -= ticks;
-	if (unlikely(count < 0)) {
-		active_tasks = count_active_tasks();
-		do {
-			CALC_LOAD(avenrun[0], EXP_1, active_tasks);
-			CALC_LOAD(avenrun[1], EXP_5, active_tasks);
-			CALC_LOAD(avenrun[2], EXP_15, active_tasks);
-			count += LOAD_FREQ;
-		} while (count < 0);
-	}
-}
-
-/*
  * This function runs timers and the timer-tq in bottom half context.
  */
 static void run_timer_softirq(struct softirq_action *h)
 {
 	struct tvec_base *base = __get_cpu_var(tvec_bases);
 
+	perf_counter_do_pending();
+
 	hrtimer_run_pending();
 
 	if (time_after_eq(jiffies, base->timer_jiffies))
@@ -1187,16 +1149,6 @@
 }
 
 /*
- * Called by the timer interrupt. xtime_lock must already be taken
- * by the timer IRQ!
- */
-static inline void update_times(unsigned long ticks)
-{
-	update_wall_time();
-	calc_load(ticks);
-}
-
-/*
  * The 64-bit jiffies value is not atomic - you MUST NOT read it
  * without sampling the sequence number in xtime_lock.
  * jiffies is defined in the linker script...
@@ -1205,7 +1157,8 @@
 void do_timer(unsigned long ticks)
 {
 	jiffies_64 += ticks;
-	update_times(ticks);
+	update_wall_time();
+	calc_global_load();
 }
 
 #ifdef __ARCH_WANT_SYS_ALARM
@@ -1406,37 +1359,17 @@
 {
 	unsigned long mem_total, sav_total;
 	unsigned int mem_unit, bitcount;
-	unsigned long seq;
+	struct timespec tp;
 
 	memset(info, 0, sizeof(struct sysinfo));
 
-	do {
-		struct timespec tp;
-		seq = read_seqbegin(&xtime_lock);
+	ktime_get_ts(&tp);
+	monotonic_to_bootbased(&tp);
+	info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
 
-		/*
-		 * This is annoying.  The below is the same thing
-		 * posix_get_clock_monotonic() does, but it wants to
-		 * take the lock which we want to cover the loads stuff
-		 * too.
-		 */
+	get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
 
-		getnstimeofday(&tp);
-		tp.tv_sec += wall_to_monotonic.tv_sec;
-		tp.tv_nsec += wall_to_monotonic.tv_nsec;
-		monotonic_to_bootbased(&tp);
-		if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
-			tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
-			tp.tv_sec++;
-		}
-		info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
-
-		info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
-		info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
-		info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
-
-		info->procs = nr_threads;
-	} while (read_seqretry(&xtime_lock, seq));
+	info->procs = nr_threads;
 
 	si_meminfo(info);
 	si_swapinfo(info);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 417d198..4a13e5a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -48,6 +48,21 @@
        depends on HAVE_FTRACE_NMI_ENTER
        default y
 
+config EVENT_TRACING
+	select CONTEXT_SWITCH_TRACER
+	bool
+
+config CONTEXT_SWITCH_TRACER
+	select MARKERS
+	bool
+
+# All tracer options should select GENERIC_TRACER. For those options that are
+# enabled by all tracers (context switch and event tracer) they select TRACING.
+# This allows those options to appear when no other tracer is selected. But the
+# options do not appear when something else selects it. We need the two options
+# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
+# hidding of the automatic options options.
+
 config TRACING
 	bool
 	select DEBUG_FS
@@ -56,6 +71,11 @@
 	select TRACEPOINTS
 	select NOP_TRACER
 	select BINARY_PRINTF
+	select EVENT_TRACING
+
+config GENERIC_TRACER
+	bool
+	select TRACING
 
 #
 # Minimum requirements an architecture has to meet for us to
@@ -73,14 +93,20 @@
 
 if TRACING_SUPPORT
 
-menu "Tracers"
+menuconfig FTRACE
+	bool "Tracers"
+	default y if DEBUG_KERNEL
+	help
+	 Enable the kernel tracing infrastructure.
+
+if FTRACE
 
 config FUNCTION_TRACER
 	bool "Kernel Function Tracer"
 	depends on HAVE_FUNCTION_TRACER
 	select FRAME_POINTER
 	select KALLSYMS
-	select TRACING
+	select GENERIC_TRACER
 	select CONTEXT_SWITCH_TRACER
 	help
 	  Enable the kernel to trace every kernel function. This is done
@@ -104,13 +130,14 @@
 	  the return value. This is done by setting the current return 
 	  address on the current task structure into a stack of calls.
 
+
 config IRQSOFF_TRACER
 	bool "Interrupts-off Latency Tracer"
 	default n
 	depends on TRACE_IRQFLAGS_SUPPORT
 	depends on GENERIC_TIME
 	select TRACE_IRQFLAGS
-	select TRACING
+	select GENERIC_TRACER
 	select TRACER_MAX_TRACE
 	help
 	  This option measures the time spent in irqs-off critical
@@ -131,7 +158,7 @@
 	default n
 	depends on GENERIC_TIME
 	depends on PREEMPT
-	select TRACING
+	select GENERIC_TRACER
 	select TRACER_MAX_TRACE
 	help
 	  This option measures the time spent in preemption off critical
@@ -150,7 +177,7 @@
 config SYSPROF_TRACER
 	bool "Sysprof Tracer"
 	depends on X86
-	select TRACING
+	select GENERIC_TRACER
 	select CONTEXT_SWITCH_TRACER
 	help
 	  This tracer provides the trace needed by the 'Sysprof' userspace
@@ -158,40 +185,33 @@
 
 config SCHED_TRACER
 	bool "Scheduling Latency Tracer"
-	select TRACING
+	select GENERIC_TRACER
 	select CONTEXT_SWITCH_TRACER
 	select TRACER_MAX_TRACE
 	help
 	  This tracer tracks the latency of the highest priority task
 	  to be scheduled in, starting from the point it has woken up.
 
-config CONTEXT_SWITCH_TRACER
-	bool "Trace process context switches"
-	select TRACING
-	select MARKERS
-	help
-	  This tracer gets called from the context switch and records
-	  all switching of tasks.
-
-config EVENT_TRACER
-	bool "Trace various events in the kernel"
+config ENABLE_DEFAULT_TRACERS
+	bool "Trace process context switches and events"
+	depends on !GENERIC_TRACER
 	select TRACING
 	help
 	  This tracer hooks to various trace points in the kernel
 	  allowing the user to pick and choose which trace point they
-	  want to trace.
+	  want to trace. It also includes the sched_switch tracer plugin.
 
 config FTRACE_SYSCALLS
 	bool "Trace syscalls"
 	depends on HAVE_FTRACE_SYSCALLS
-	select TRACING
+	select GENERIC_TRACER
 	select KALLSYMS
 	help
 	  Basic tracer to catch the syscall entry and exit events.
 
 config BOOT_TRACER
 	bool "Trace boot initcalls"
-	select TRACING
+	select GENERIC_TRACER
 	select CONTEXT_SWITCH_TRACER
 	help
 	  This tracer helps developers to optimize boot times: it records
@@ -207,8 +227,36 @@
 	  to enable this on bootup.
 
 config TRACE_BRANCH_PROFILING
+	bool
+	select GENERIC_TRACER
+
+choice
+	prompt "Branch Profiling"
+	default BRANCH_PROFILE_NONE
+	help
+	 The branch profiling is a software profiler. It will add hooks
+	 into the C conditionals to test which path a branch takes.
+
+	 The likely/unlikely profiler only looks at the conditions that
+	 are annotated with a likely or unlikely macro.
+
+	 The "all branch" profiler will profile every if statement in the
+	 kernel. This profiler will also enable the likely/unlikely
+	 profiler as well.
+
+	 Either of the above profilers add a bit of overhead to the system.
+	 If unsure choose "No branch profiling".
+
+config BRANCH_PROFILE_NONE
+	bool "No branch profiling"
+	help
+	 No branch profiling. Branch profiling adds a bit of overhead.
+	 Only enable it if you want to analyse the branching behavior.
+	 Otherwise keep it disabled.
+
+config PROFILE_ANNOTATED_BRANCHES
 	bool "Trace likely/unlikely profiler"
-	select TRACING
+	select TRACE_BRANCH_PROFILING
 	help
 	  This tracer profiles all the the likely and unlikely macros
 	  in the kernel. It will display the results in:
@@ -218,11 +266,9 @@
 	  Note: this will add a significant overhead, only turn this
 	  on if you need to profile the system's use of these macros.
 
-	  Say N if unsure.
-
 config PROFILE_ALL_BRANCHES
 	bool "Profile all if conditionals"
-	depends on TRACE_BRANCH_PROFILING
+	select TRACE_BRANCH_PROFILING
 	help
 	  This tracer profiles all branch conditions. Every if ()
 	  taken in the kernel is recorded whether it hit or miss.
@@ -230,11 +276,12 @@
 
 	  /debugfs/tracing/profile_branch
 
+	  This option also enables the likely/unlikely profiler.
+
 	  This configuration, when enabled, will impose a great overhead
 	  on the system. This should only be enabled when the system
 	  is to be analyzed
-
-	  Say N if unsure.
+endchoice
 
 config TRACING_BRANCHES
 	bool
@@ -261,7 +308,7 @@
 config POWER_TRACER
 	bool "Trace power consumption behavior"
 	depends on X86
-	select TRACING
+	select GENERIC_TRACER
 	help
 	  This tracer helps developers to analyze and optimize the kernels
 	  power management decisions, specifically the C-state and P-state
@@ -295,14 +342,14 @@
 config HW_BRANCH_TRACER
 	depends on HAVE_HW_BRANCH_TRACER
 	bool "Trace hw branches"
-	select TRACING
+	select GENERIC_TRACER
 	help
 	  This tracer records all branches on the system in a circular
 	  buffer giving access to the last N branches for each cpu.
 
 config KMEMTRACE
 	bool "Trace SLAB allocations"
-	select TRACING
+	select GENERIC_TRACER
 	help
 	  kmemtrace provides tracing for slab allocator functions, such as
 	  kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
@@ -322,7 +369,7 @@
 
 config WORKQUEUE_TRACER
 	bool "Trace workqueues"
-	select TRACING
+	select GENERIC_TRACER
 	help
 	  The workqueue tracer provides some statistical informations
           about each cpu workqueue thread such as the number of the
@@ -338,7 +385,7 @@
 	select RELAY
 	select DEBUG_FS
 	select TRACEPOINTS
-	select TRACING
+	select GENERIC_TRACER
 	select STACKTRACE
 	help
 	  Say Y here if you want to be able to trace the block layer actions
@@ -375,6 +422,20 @@
 	 were made. If so, it runs stop_machine (stops all CPUS)
 	 and modifies the code to jump over the call to ftrace.
 
+config FUNCTION_PROFILER
+	bool "Kernel function profiler"
+	depends on FUNCTION_TRACER
+	default n
+	help
+	 This option enables the kernel function profiler. A file is created
+	 in debugfs called function_profile_enabled which defaults to zero.
+	 When a 1 is echoed into this file profiling begins, and when a
+	 zero is entered, profiling stops. A file in the trace_stats
+	 directory called functions, that show the list of functions that
+	 have been hit and their counters.
+
+	 If in doubt, say N
+
 config FTRACE_MCOUNT_RECORD
 	def_bool y
 	depends on DYNAMIC_FTRACE
@@ -385,7 +446,7 @@
 
 config FTRACE_STARTUP_TEST
 	bool "Perform a startup test on ftrace"
-	depends on TRACING
+	depends on GENERIC_TRACER
 	select FTRACE_SELFTEST
 	help
 	  This option performs a series of startup tests on ftrace. On bootup
@@ -396,7 +457,7 @@
 config MMIOTRACE
 	bool "Memory mapped IO tracing"
 	depends on HAVE_MMIOTRACE_SUPPORT && PCI
-	select TRACING
+	select GENERIC_TRACER
 	help
 	  Mmiotrace traces Memory Mapped I/O access and is meant for
 	  debugging and reverse engineering. It is called from the ioremap
@@ -416,7 +477,23 @@
 
 	  Say N, unless you absolutely know what you are doing.
 
-endmenu
+config RING_BUFFER_BENCHMARK
+	tristate "Ring buffer benchmark stress tester"
+	depends on RING_BUFFER
+	help
+	  This option creates a test to stress the ring buffer and bench mark it.
+	  It creates its own ring buffer such that it will not interfer with
+	  any other users of the ring buffer (such as ftrace). It then creates
+	  a producer and consumer that will run for 10 seconds and sleep for
+	  10 seconds. Each interval it will print out the number of events
+	  it recorded and give a rough estimate of how long each iteration took.
+
+	  It does not disable interrupts or raise its priority, so it may be
+	  affected by processes that are running.
+
+	  If unsure, say N
+
+endif # FTRACE
 
 endif # TRACING_SUPPORT
 
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 2630f51..844164d 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -15,11 +15,17 @@
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 endif
 
+#
+# Make the trace clocks available generally: it's infrastructure
+# relied on by ptrace for example:
+#
+obj-y += trace_clock.o
+
 obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
 obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
+obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o
 
 obj-$(CONFIG_TRACING) += trace.o
-obj-$(CONFIG_TRACING) += trace_clock.o
 obj-$(CONFIG_TRACING) += trace_output.o
 obj-$(CONFIG_TRACING) += trace_stat.o
 obj-$(CONFIG_TRACING) += trace_printk.o
@@ -39,12 +45,14 @@
 obj-$(CONFIG_POWER_TRACER) += trace_power.o
 obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
-obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
-obj-$(CONFIG_EVENT_TRACER) += trace_events.o
-obj-$(CONFIG_EVENT_TRACER) += events.o
-obj-$(CONFIG_EVENT_TRACER) += trace_export.o
+obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
+ifeq ($(CONFIG_BLOCK),y)
+obj-$(CONFIG_EVENT_TRACING) += blktrace.o
+endif
+obj-$(CONFIG_EVENT_TRACING) += trace_events.o
+obj-$(CONFIG_EVENT_TRACING) += trace_export.o
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
 obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
-obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o
+obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 921ef5d..39af8af 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -23,10 +23,14 @@
 #include <linux/mutex.h>
 #include <linux/debugfs.h>
 #include <linux/time.h>
-#include <trace/block.h>
 #include <linux/uaccess.h>
+
+#include <trace/events/block.h>
+
 #include "trace_output.h"
 
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+
 static unsigned int blktrace_seq __read_mostly = 1;
 
 static struct trace_array *blk_tr;
@@ -147,7 +151,7 @@
 {
 	if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
 		return 1;
-	if (sector < bt->start_lba || sector > bt->end_lba)
+	if (sector && (sector < bt->start_lba || sector > bt->end_lba))
 		return 1;
 	if (bt->pid && pid != bt->pid)
 		return 1;
@@ -192,7 +196,7 @@
 	what |= MASK_TC_BIT(rw, DISCARD);
 
 	pid = tsk->pid;
-	if (unlikely(act_log_check(bt, what, sector, pid)))
+	if (act_log_check(bt, what, sector, pid))
 		return;
 	cpu = raw_smp_processor_id();
 
@@ -262,6 +266,7 @@
 {
 	debugfs_remove(bt->msg_file);
 	debugfs_remove(bt->dropped_file);
+	debugfs_remove(bt->dir);
 	relay_close(bt->rchan);
 	free_percpu(bt->sequence);
 	free_percpu(bt->msg_data);
@@ -403,11 +408,29 @@
 	.remove_buf_file	= blk_remove_buf_file_callback,
 };
 
+static void blk_trace_setup_lba(struct blk_trace *bt,
+				struct block_device *bdev)
+{
+	struct hd_struct *part = NULL;
+
+	if (bdev)
+		part = bdev->bd_part;
+
+	if (part) {
+		bt->start_lba = part->start_sect;
+		bt->end_lba = part->start_sect + part->nr_sects;
+	} else {
+		bt->start_lba = 0;
+		bt->end_lba = -1ULL;
+	}
+}
+
 /*
  * Setup everything required to start tracing
  */
 int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
-			struct blk_user_trace_setup *buts)
+		       struct block_device *bdev,
+		       struct blk_user_trace_setup *buts)
 {
 	struct blk_trace *old_bt, *bt = NULL;
 	struct dentry *dir = NULL;
@@ -480,10 +503,13 @@
 	if (!bt->act_mask)
 		bt->act_mask = (u16) -1;
 
-	bt->start_lba = buts->start_lba;
-	bt->end_lba = buts->end_lba;
-	if (!bt->end_lba)
-		bt->end_lba = -1ULL;
+	blk_trace_setup_lba(bt, bdev);
+
+	/* overwrite with user settings */
+	if (buts->start_lba)
+		bt->start_lba = buts->start_lba;
+	if (buts->end_lba)
+		bt->end_lba = buts->end_lba;
 
 	bt->pid = buts->pid;
 	bt->trace_state = Blktrace_setup;
@@ -505,6 +531,7 @@
 }
 
 int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+		    struct block_device *bdev,
 		    char __user *arg)
 {
 	struct blk_user_trace_setup buts;
@@ -514,7 +541,7 @@
 	if (ret)
 		return -EFAULT;
 
-	ret = do_blk_trace_setup(q, name, dev, &buts);
+	ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
 	if (ret)
 		return ret;
 
@@ -582,7 +609,7 @@
 	switch (cmd) {
 	case BLKTRACESETUP:
 		bdevname(bdev, b);
-		ret = blk_trace_setup(q, b, bdev->bd_dev, arg);
+		ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
 		break;
 	case BLKTRACESTART:
 		start = 1;
@@ -642,12 +669,12 @@
 
 	if (blk_pc_request(rq)) {
 		what |= BLK_TC_ACT(BLK_TC_PC);
-		__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
-				rq->cmd_len, rq->cmd);
+		__blk_add_trace(bt, 0, blk_rq_bytes(rq), rw,
+				what, rq->errors, rq->cmd_len, rq->cmd);
 	} else  {
 		what |= BLK_TC_ACT(BLK_TC_FS);
-		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
-				rw, what, rq->errors, 0, NULL);
+		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw,
+				what, rq->errors, 0, NULL);
 	}
 }
 
@@ -809,7 +836,6 @@
  * @bio:	the source bio
  * @dev:	target device
  * @from:	source sector
- * @to:		target sector
  *
  * Description:
  *     Device mapper or raid target sometimes need to split a bio because
@@ -817,7 +843,7 @@
  *
  **/
 static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
-				       dev_t dev, sector_t from, sector_t to)
+				       dev_t dev, sector_t from)
 {
 	struct blk_trace *bt = q->blk_trace;
 	struct blk_io_trace_remap r;
@@ -825,12 +851,13 @@
 	if (likely(!bt))
 		return;
 
-	r.device = cpu_to_be32(dev);
-	r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
-	r.sector = cpu_to_be64(to);
+	r.device_from = cpu_to_be32(dev);
+	r.device_to   = cpu_to_be32(bio->bi_bdev->bd_dev);
+	r.sector_from = cpu_to_be64(from);
 
-	__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
-			!bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
+	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
+			BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE),
+			sizeof(r), &r);
 }
 
 /**
@@ -854,11 +881,11 @@
 		return;
 
 	if (blk_pc_request(rq))
-		__blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
-				rq->errors, len, data);
+		__blk_add_trace(bt, 0, blk_rq_bytes(rq), 0,
+				BLK_TA_DRV_DATA, rq->errors, len, data);
 	else
-		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
-				0, BLK_TA_DRV_DATA, rq->errors, len, data);
+		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0,
+				BLK_TA_DRV_DATA, rq->errors, len, data);
 }
 EXPORT_SYMBOL_GPL(blk_add_driver_data);
 
@@ -971,6 +998,16 @@
 	return te_blk_io_trace(ent) + 1;
 }
 
+static inline u32 t_action(const struct trace_entry *ent)
+{
+	return te_blk_io_trace(ent)->action;
+}
+
+static inline u32 t_bytes(const struct trace_entry *ent)
+{
+	return te_blk_io_trace(ent)->bytes;
+}
+
 static inline u32 t_sec(const struct trace_entry *ent)
 {
 	return te_blk_io_trace(ent)->bytes >> 9;
@@ -996,11 +1033,11 @@
 			  struct blk_io_trace_remap *r)
 {
 	const struct blk_io_trace_remap *__r = pdu_start(ent);
-	__u64 sector = __r->sector;
+	__u64 sector_from = __r->sector_from;
 
-	r->device = be32_to_cpu(__r->device);
 	r->device_from = be32_to_cpu(__r->device_from);
-	r->sector = be64_to_cpu(sector);
+	r->device_to   = be32_to_cpu(__r->device_to);
+	r->sector_from = be64_to_cpu(sector_from);
 }
 
 typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
@@ -1031,36 +1068,98 @@
 				MAJOR(t->device), MINOR(t->device), act, rwbs);
 }
 
+static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent)
+{
+	const unsigned char *pdu_buf;
+	int pdu_len;
+	int i, end, ret;
+
+	pdu_buf = pdu_start(ent);
+	pdu_len = te_blk_io_trace(ent)->pdu_len;
+
+	if (!pdu_len)
+		return 1;
+
+	/* find the last zero that needs to be printed */
+	for (end = pdu_len - 1; end >= 0; end--)
+		if (pdu_buf[end])
+			break;
+	end++;
+
+	if (!trace_seq_putc(s, '('))
+		return 0;
+
+	for (i = 0; i < pdu_len; i++) {
+
+		ret = trace_seq_printf(s, "%s%02x",
+				       i == 0 ? "" : " ", pdu_buf[i]);
+		if (!ret)
+			return ret;
+
+		/*
+		 * stop when the rest is just zeroes and indicate so
+		 * with a ".." appended
+		 */
+		if (i == end && end != pdu_len - 1)
+			return trace_seq_puts(s, " ..) ");
+	}
+
+	return trace_seq_puts(s, ") ");
+}
+
 static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
 {
 	char cmd[TASK_COMM_LEN];
 
 	trace_find_cmdline(ent->pid, cmd);
 
-	if (t_sec(ent))
-		return trace_seq_printf(s, "%llu + %u [%s]\n",
-					t_sector(ent), t_sec(ent), cmd);
-	return trace_seq_printf(s, "[%s]\n", cmd);
+	if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
+		int ret;
+
+		ret = trace_seq_printf(s, "%u ", t_bytes(ent));
+		if (!ret)
+			return 0;
+		ret = blk_log_dump_pdu(s, ent);
+		if (!ret)
+			return 0;
+		return trace_seq_printf(s, "[%s]\n", cmd);
+	} else {
+		if (t_sec(ent))
+			return trace_seq_printf(s, "%llu + %u [%s]\n",
+						t_sector(ent), t_sec(ent), cmd);
+		return trace_seq_printf(s, "[%s]\n", cmd);
+	}
 }
 
 static int blk_log_with_error(struct trace_seq *s,
 			      const struct trace_entry *ent)
 {
-	if (t_sec(ent))
-		return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent),
-					t_sec(ent), t_error(ent));
-	return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent));
+	if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
+		int ret;
+
+		ret = blk_log_dump_pdu(s, ent);
+		if (ret)
+			return trace_seq_printf(s, "[%d]\n", t_error(ent));
+		return 0;
+	} else {
+		if (t_sec(ent))
+			return trace_seq_printf(s, "%llu + %u [%d]\n",
+						t_sector(ent),
+						t_sec(ent), t_error(ent));
+		return trace_seq_printf(s, "%llu [%d]\n",
+					t_sector(ent), t_error(ent));
+	}
 }
 
 static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
 {
-	struct blk_io_trace_remap r = { .device = 0, };
+	struct blk_io_trace_remap r = { .device_from = 0, };
 
 	get_pdu_remap(ent, &r);
 	return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
-			       t_sector(ent),
-			       t_sec(ent), MAJOR(r.device), MINOR(r.device),
-			       (unsigned long long)r.sector);
+				t_sector(ent), t_sec(ent),
+				MAJOR(r.device_from), MINOR(r.device_from),
+				(unsigned long long)r.sector_from);
 }
 
 static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
@@ -1117,7 +1216,6 @@
 static void blk_tracer_start(struct trace_array *tr)
 {
 	blk_tracer_enabled = true;
-	trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
 }
 
 static int blk_tracer_init(struct trace_array *tr)
@@ -1130,7 +1228,6 @@
 static void blk_tracer_stop(struct trace_array *tr)
 {
 	blk_tracer_enabled = false;
-	trace_flags |= TRACE_ITER_CONTEXT_INFO;
 }
 
 static void blk_tracer_reset(struct trace_array *tr)
@@ -1182,7 +1279,7 @@
 	}
 
 	if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
-		ret = trace_seq_printf(s, "Bad pc action %x\n", what);
+		ret = trace_seq_printf(s, "Unknown action %x\n", what);
 	else {
 		ret = log_action(iter, what2act[what].act[long_act]);
 		if (ret)
@@ -1195,9 +1292,6 @@
 static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
 					       int flags)
 {
-	if (!trace_print_context(iter))
-		return TRACE_TYPE_PARTIAL_LINE;
-
 	return print_one_line(iter, false);
 }
 
@@ -1232,6 +1326,18 @@
 	return print_one_line(iter, true);
 }
 
+static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set)
+{
+	/* don't output context-info for blk_classic output */
+	if (bit == TRACE_BLK_OPT_CLASSIC) {
+		if (set)
+			trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
+		else
+			trace_flags |= TRACE_ITER_CONTEXT_INFO;
+	}
+	return 0;
+}
+
 static struct tracer blk_tracer __read_mostly = {
 	.name		= "blk",
 	.init		= blk_tracer_init,
@@ -1241,6 +1347,7 @@
 	.print_header	= blk_tracer_print_header,
 	.print_line	= blk_tracer_print_line,
 	.flags		= &blk_tracer_flags,
+	.set_flag	= blk_tracer_set_flag,
 };
 
 static struct trace_event trace_blk_event = {
@@ -1285,7 +1392,8 @@
 /*
  * Setup everything required to start tracing
  */
-static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
+static int blk_trace_setup_queue(struct request_queue *q,
+				 struct block_device *bdev)
 {
 	struct blk_trace *old_bt, *bt = NULL;
 	int ret = -ENOMEM;
@@ -1298,9 +1406,10 @@
 	if (!bt->msg_data)
 		goto free_bt;
 
-	bt->dev = dev;
+	bt->dev = bdev->bd_dev;
 	bt->act_mask = (u16)-1;
-	bt->end_lba = -1ULL;
+
+	blk_trace_setup_lba(bt, bdev);
 
 	old_bt = xchg(&q->blk_trace, bt);
 	if (old_bt != NULL) {
@@ -1517,7 +1626,7 @@
 
 	if (attr == &dev_attr_enable) {
 		if (value)
-			ret = blk_trace_setup_queue(q, bdev->bd_dev);
+			ret = blk_trace_setup_queue(q, bdev);
 		else
 			ret = blk_trace_remove_queue(q);
 		goto out_unlock_bdev;
@@ -1525,7 +1634,7 @@
 
 	ret = 0;
 	if (q->blk_trace == NULL)
-		ret = blk_trace_setup_queue(q, bdev->bd_dev);
+		ret = blk_trace_setup_queue(q, bdev);
 
 	if (ret == 0) {
 		if (attr == &dev_attr_act_mask)
@@ -1548,3 +1657,77 @@
 	return ret ? ret : count;
 }
 
+int blk_trace_init_sysfs(struct device *dev)
+{
+	return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
+}
+
+#endif /* CONFIG_BLK_DEV_IO_TRACE */
+
+#ifdef CONFIG_EVENT_TRACING
+
+void blk_dump_cmd(char *buf, struct request *rq)
+{
+	int i, end;
+	int len = rq->cmd_len;
+	unsigned char *cmd = rq->cmd;
+
+	if (!blk_pc_request(rq)) {
+		buf[0] = '\0';
+		return;
+	}
+
+	for (end = len - 1; end >= 0; end--)
+		if (cmd[end])
+			break;
+	end++;
+
+	for (i = 0; i < len; i++) {
+		buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]);
+		if (i == end && end != len - 1) {
+			sprintf(buf, " ..");
+			break;
+		}
+	}
+}
+
+void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
+{
+	int i = 0;
+
+	if (rw & WRITE)
+		rwbs[i++] = 'W';
+	else if (rw & 1 << BIO_RW_DISCARD)
+		rwbs[i++] = 'D';
+	else if (bytes)
+		rwbs[i++] = 'R';
+	else
+		rwbs[i++] = 'N';
+
+	if (rw & 1 << BIO_RW_AHEAD)
+		rwbs[i++] = 'A';
+	if (rw & 1 << BIO_RW_BARRIER)
+		rwbs[i++] = 'B';
+	if (rw & 1 << BIO_RW_SYNCIO)
+		rwbs[i++] = 'S';
+	if (rw & 1 << BIO_RW_META)
+		rwbs[i++] = 'M';
+
+	rwbs[i] = '\0';
+}
+
+void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
+{
+	int rw = rq->cmd_flags & 0x03;
+	int bytes;
+
+	if (blk_discard_rq(rq))
+		rw |= (1 << BIO_RW_DISCARD);
+
+	bytes = blk_rq_bytes(rq);
+
+	blk_fill_rwbs(rwbs, rw, bytes);
+}
+
+#endif /* CONFIG_EVENT_TRACING */
+
diff --git a/kernel/trace/events.c b/kernel/trace/events.c
deleted file mode 100644
index 246f2aa..0000000
--- a/kernel/trace/events.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * This is the place to register all trace points as events.
- */
-
-#include <linux/stringify.h>
-
-#include <trace/trace_events.h>
-
-#include "trace_output.h"
-
-#include "trace_events_stage_1.h"
-#include "trace_events_stage_2.h"
-#include "trace_events_stage_3.h"
-
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f1ed080..bb60732 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -29,11 +29,13 @@
 #include <linux/list.h>
 #include <linux/hash.h>
 
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include <asm/ftrace.h>
+#include <asm/setup.h>
 
-#include "trace.h"
+#include "trace_output.h"
+#include "trace_stat.h"
 
 #define FTRACE_WARN_ON(cond)			\
 	do {					\
@@ -68,7 +70,7 @@
 
 static struct ftrace_ops ftrace_list_end __read_mostly =
 {
-	.func = ftrace_stub,
+	.func		= ftrace_stub,
 };
 
 static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
@@ -240,6 +242,580 @@
 #endif
 }
 
+#ifdef CONFIG_FUNCTION_PROFILER
+struct ftrace_profile {
+	struct hlist_node		node;
+	unsigned long			ip;
+	unsigned long			counter;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	unsigned long long		time;
+#endif
+};
+
+struct ftrace_profile_page {
+	struct ftrace_profile_page	*next;
+	unsigned long			index;
+	struct ftrace_profile		records[];
+};
+
+struct ftrace_profile_stat {
+	atomic_t			disabled;
+	struct hlist_head		*hash;
+	struct ftrace_profile_page	*pages;
+	struct ftrace_profile_page	*start;
+	struct tracer_stat		stat;
+};
+
+#define PROFILE_RECORDS_SIZE						\
+	(PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
+
+#define PROFILES_PER_PAGE					\
+	(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
+
+static int ftrace_profile_bits __read_mostly;
+static int ftrace_profile_enabled __read_mostly;
+
+/* ftrace_profile_lock - synchronize the enable and disable of the profiler */
+static DEFINE_MUTEX(ftrace_profile_lock);
+
+static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
+
+#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
+
+static void *
+function_stat_next(void *v, int idx)
+{
+	struct ftrace_profile *rec = v;
+	struct ftrace_profile_page *pg;
+
+	pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
+
+ again:
+	rec++;
+	if ((void *)rec >= (void *)&pg->records[pg->index]) {
+		pg = pg->next;
+		if (!pg)
+			return NULL;
+		rec = &pg->records[0];
+		if (!rec->counter)
+			goto again;
+	}
+
+	return rec;
+}
+
+static void *function_stat_start(struct tracer_stat *trace)
+{
+	struct ftrace_profile_stat *stat =
+		container_of(trace, struct ftrace_profile_stat, stat);
+
+	if (!stat || !stat->start)
+		return NULL;
+
+	return function_stat_next(&stat->start->records[0], 0);
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/* function graph compares on total time */
+static int function_stat_cmp(void *p1, void *p2)
+{
+	struct ftrace_profile *a = p1;
+	struct ftrace_profile *b = p2;
+
+	if (a->time < b->time)
+		return -1;
+	if (a->time > b->time)
+		return 1;
+	else
+		return 0;
+}
+#else
+/* not function graph compares against hits */
+static int function_stat_cmp(void *p1, void *p2)
+{
+	struct ftrace_profile *a = p1;
+	struct ftrace_profile *b = p2;
+
+	if (a->counter < b->counter)
+		return -1;
+	if (a->counter > b->counter)
+		return 1;
+	else
+		return 0;
+}
+#endif
+
+static int function_stat_headers(struct seq_file *m)
+{
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	seq_printf(m, "  Function                               "
+		   "Hit    Time            Avg\n"
+		      "  --------                               "
+		   "---    ----            ---\n");
+#else
+	seq_printf(m, "  Function                               Hit\n"
+		      "  --------                               ---\n");
+#endif
+	return 0;
+}
+
+static int function_stat_show(struct seq_file *m, void *v)
+{
+	struct ftrace_profile *rec = v;
+	char str[KSYM_SYMBOL_LEN];
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	static DEFINE_MUTEX(mutex);
+	static struct trace_seq s;
+	unsigned long long avg;
+#endif
+
+	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+	seq_printf(m, "  %-30.30s  %10lu", str, rec->counter);
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	seq_printf(m, "    ");
+	avg = rec->time;
+	do_div(avg, rec->counter);
+
+	mutex_lock(&mutex);
+	trace_seq_init(&s);
+	trace_print_graph_duration(rec->time, &s);
+	trace_seq_puts(&s, "    ");
+	trace_print_graph_duration(avg, &s);
+	trace_print_seq(m, &s);
+	mutex_unlock(&mutex);
+#endif
+	seq_putc(m, '\n');
+
+	return 0;
+}
+
+static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
+{
+	struct ftrace_profile_page *pg;
+
+	pg = stat->pages = stat->start;
+
+	while (pg) {
+		memset(pg->records, 0, PROFILE_RECORDS_SIZE);
+		pg->index = 0;
+		pg = pg->next;
+	}
+
+	memset(stat->hash, 0,
+	       FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
+}
+
+int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
+{
+	struct ftrace_profile_page *pg;
+	int functions;
+	int pages;
+	int i;
+
+	/* If we already allocated, do nothing */
+	if (stat->pages)
+		return 0;
+
+	stat->pages = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!stat->pages)
+		return -ENOMEM;
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+	functions = ftrace_update_tot_cnt;
+#else
+	/*
+	 * We do not know the number of functions that exist because
+	 * dynamic tracing is what counts them. With past experience
+	 * we have around 20K functions. That should be more than enough.
+	 * It is highly unlikely we will execute every function in
+	 * the kernel.
+	 */
+	functions = 20000;
+#endif
+
+	pg = stat->start = stat->pages;
+
+	pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE);
+
+	for (i = 0; i < pages; i++) {
+		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
+		if (!pg->next)
+			goto out_free;
+		pg = pg->next;
+	}
+
+	return 0;
+
+ out_free:
+	pg = stat->start;
+	while (pg) {
+		unsigned long tmp = (unsigned long)pg;
+
+		pg = pg->next;
+		free_page(tmp);
+	}
+
+	free_page((unsigned long)stat->pages);
+	stat->pages = NULL;
+	stat->start = NULL;
+
+	return -ENOMEM;
+}
+
+static int ftrace_profile_init_cpu(int cpu)
+{
+	struct ftrace_profile_stat *stat;
+	int size;
+
+	stat = &per_cpu(ftrace_profile_stats, cpu);
+
+	if (stat->hash) {
+		/* If the profile is already created, simply reset it */
+		ftrace_profile_reset(stat);
+		return 0;
+	}
+
+	/*
+	 * We are profiling all functions, but usually only a few thousand
+	 * functions are hit. We'll make a hash of 1024 items.
+	 */
+	size = FTRACE_PROFILE_HASH_SIZE;
+
+	stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
+
+	if (!stat->hash)
+		return -ENOMEM;
+
+	if (!ftrace_profile_bits) {
+		size--;
+
+		for (; size; size >>= 1)
+			ftrace_profile_bits++;
+	}
+
+	/* Preallocate the function profiling pages */
+	if (ftrace_profile_pages_init(stat) < 0) {
+		kfree(stat->hash);
+		stat->hash = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int ftrace_profile_init(void)
+{
+	int cpu;
+	int ret = 0;
+
+	for_each_online_cpu(cpu) {
+		ret = ftrace_profile_init_cpu(cpu);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+/* interrupts must be disabled */
+static struct ftrace_profile *
+ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
+{
+	struct ftrace_profile *rec;
+	struct hlist_head *hhd;
+	struct hlist_node *n;
+	unsigned long key;
+
+	key = hash_long(ip, ftrace_profile_bits);
+	hhd = &stat->hash[key];
+
+	if (hlist_empty(hhd))
+		return NULL;
+
+	hlist_for_each_entry_rcu(rec, n, hhd, node) {
+		if (rec->ip == ip)
+			return rec;
+	}
+
+	return NULL;
+}
+
+static void ftrace_add_profile(struct ftrace_profile_stat *stat,
+			       struct ftrace_profile *rec)
+{
+	unsigned long key;
+
+	key = hash_long(rec->ip, ftrace_profile_bits);
+	hlist_add_head_rcu(&rec->node, &stat->hash[key]);
+}
+
+/*
+ * The memory is already allocated, this simply finds a new record to use.
+ */
+static struct ftrace_profile *
+ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
+{
+	struct ftrace_profile *rec = NULL;
+
+	/* prevent recursion (from NMIs) */
+	if (atomic_inc_return(&stat->disabled) != 1)
+		goto out;
+
+	/*
+	 * Try to find the function again since an NMI
+	 * could have added it
+	 */
+	rec = ftrace_find_profiled_func(stat, ip);
+	if (rec)
+		goto out;
+
+	if (stat->pages->index == PROFILES_PER_PAGE) {
+		if (!stat->pages->next)
+			goto out;
+		stat->pages = stat->pages->next;
+	}
+
+	rec = &stat->pages->records[stat->pages->index++];
+	rec->ip = ip;
+	ftrace_add_profile(stat, rec);
+
+ out:
+	atomic_dec(&stat->disabled);
+
+	return rec;
+}
+
+static void
+function_profile_call(unsigned long ip, unsigned long parent_ip)
+{
+	struct ftrace_profile_stat *stat;
+	struct ftrace_profile *rec;
+	unsigned long flags;
+
+	if (!ftrace_profile_enabled)
+		return;
+
+	local_irq_save(flags);
+
+	stat = &__get_cpu_var(ftrace_profile_stats);
+	if (!stat->hash || !ftrace_profile_enabled)
+		goto out;
+
+	rec = ftrace_find_profiled_func(stat, ip);
+	if (!rec) {
+		rec = ftrace_profile_alloc(stat, ip);
+		if (!rec)
+			goto out;
+	}
+
+	rec->counter++;
+ out:
+	local_irq_restore(flags);
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static int profile_graph_entry(struct ftrace_graph_ent *trace)
+{
+	function_profile_call(trace->func, 0);
+	return 1;
+}
+
+static void profile_graph_return(struct ftrace_graph_ret *trace)
+{
+	struct ftrace_profile_stat *stat;
+	unsigned long long calltime;
+	struct ftrace_profile *rec;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	stat = &__get_cpu_var(ftrace_profile_stats);
+	if (!stat->hash || !ftrace_profile_enabled)
+		goto out;
+
+	calltime = trace->rettime - trace->calltime;
+
+	if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
+		int index;
+
+		index = trace->depth;
+
+		/* Append this call time to the parent time to subtract */
+		if (index)
+			current->ret_stack[index - 1].subtime += calltime;
+
+		if (current->ret_stack[index].subtime < calltime)
+			calltime -= current->ret_stack[index].subtime;
+		else
+			calltime = 0;
+	}
+
+	rec = ftrace_find_profiled_func(stat, trace->func);
+	if (rec)
+		rec->time += calltime;
+
+ out:
+	local_irq_restore(flags);
+}
+
+static int register_ftrace_profiler(void)
+{
+	return register_ftrace_graph(&profile_graph_return,
+				     &profile_graph_entry);
+}
+
+static void unregister_ftrace_profiler(void)
+{
+	unregister_ftrace_graph();
+}
+#else
+static struct ftrace_ops ftrace_profile_ops __read_mostly =
+{
+	.func		= function_profile_call,
+};
+
+static int register_ftrace_profiler(void)
+{
+	return register_ftrace_function(&ftrace_profile_ops);
+}
+
+static void unregister_ftrace_profiler(void)
+{
+	unregister_ftrace_function(&ftrace_profile_ops);
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+static ssize_t
+ftrace_profile_write(struct file *filp, const char __user *ubuf,
+		     size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	char buf[64];		/* big enough to hold a number */
+	int ret;
+
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	val = !!val;
+
+	mutex_lock(&ftrace_profile_lock);
+	if (ftrace_profile_enabled ^ val) {
+		if (val) {
+			ret = ftrace_profile_init();
+			if (ret < 0) {
+				cnt = ret;
+				goto out;
+			}
+
+			ret = register_ftrace_profiler();
+			if (ret < 0) {
+				cnt = ret;
+				goto out;
+			}
+			ftrace_profile_enabled = 1;
+		} else {
+			ftrace_profile_enabled = 0;
+			/*
+			 * unregister_ftrace_profiler calls stop_machine
+			 * so this acts like an synchronize_sched.
+			 */
+			unregister_ftrace_profiler();
+		}
+	}
+ out:
+	mutex_unlock(&ftrace_profile_lock);
+
+	filp->f_pos += cnt;
+
+	return cnt;
+}
+
+static ssize_t
+ftrace_profile_read(struct file *filp, char __user *ubuf,
+		     size_t cnt, loff_t *ppos)
+{
+	char buf[64];		/* big enough to hold a number */
+	int r;
+
+	r = sprintf(buf, "%u\n", ftrace_profile_enabled);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static const struct file_operations ftrace_profile_fops = {
+	.open		= tracing_open_generic,
+	.read		= ftrace_profile_read,
+	.write		= ftrace_profile_write,
+};
+
+/* used to initialize the real stat files */
+static struct tracer_stat function_stats __initdata = {
+	.name		= "functions",
+	.stat_start	= function_stat_start,
+	.stat_next	= function_stat_next,
+	.stat_cmp	= function_stat_cmp,
+	.stat_headers	= function_stat_headers,
+	.stat_show	= function_stat_show
+};
+
+static void ftrace_profile_debugfs(struct dentry *d_tracer)
+{
+	struct ftrace_profile_stat *stat;
+	struct dentry *entry;
+	char *name;
+	int ret;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		stat = &per_cpu(ftrace_profile_stats, cpu);
+
+		/* allocate enough for function name + cpu number */
+		name = kmalloc(32, GFP_KERNEL);
+		if (!name) {
+			/*
+			 * The files created are permanent, if something happens
+			 * we still do not free memory.
+			 */
+			kfree(stat);
+			WARN(1,
+			     "Could not allocate stat file for cpu %d\n",
+			     cpu);
+			return;
+		}
+		stat->stat = function_stats;
+		snprintf(name, 32, "function%d", cpu);
+		stat->stat.name = name;
+		ret = register_stat_tracer(&stat->stat);
+		if (ret) {
+			WARN(1,
+			     "Could not register function stat for cpu %d\n",
+			     cpu);
+			kfree(name);
+			return;
+		}
+	}
+
+	entry = debugfs_create_file("function_profile_enabled", 0644,
+				    d_tracer, NULL, &ftrace_profile_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs "
+			   "'function_profile_enabled' entry\n");
+}
+
+#else /* CONFIG_FUNCTION_PROFILER */
+static void ftrace_profile_debugfs(struct dentry *d_tracer)
+{
+}
+#endif /* CONFIG_FUNCTION_PROFILER */
+
 /* set when tracing only a pid */
 struct pid *ftrace_pid_trace;
 static struct pid * const ftrace_swapper_pid = &init_struct_pid;
@@ -261,7 +837,6 @@
 	struct rcu_head		rcu;
 };
 
-
 enum {
 	FTRACE_ENABLE_CALLS		= (1 << 0),
 	FTRACE_DISABLE_CALLS		= (1 << 1),
@@ -346,30 +921,6 @@
 	rec->flags |= FTRACE_FL_FREE;
 }
 
-void ftrace_release(void *start, unsigned long size)
-{
-	struct dyn_ftrace *rec;
-	struct ftrace_page *pg;
-	unsigned long s = (unsigned long)start;
-	unsigned long e = s + size;
-
-	if (ftrace_disabled || !start)
-		return;
-
-	mutex_lock(&ftrace_lock);
-	do_for_each_ftrace_rec(pg, rec) {
-		if ((rec->ip >= s) && (rec->ip < e)) {
-			/*
-			 * rec->ip is changed in ftrace_free_rec()
-			 * It should not between s and e if record was freed.
-			 */
-			FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
-			ftrace_free_rec(rec);
-		}
-	} while_for_each_ftrace_rec();
-	mutex_unlock(&ftrace_lock);
-}
-
 static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
 {
 	struct dyn_ftrace *rec;
@@ -1408,7 +1959,7 @@
 
 static struct ftrace_ops trace_probe_ops __read_mostly =
 {
-	.func = function_trace_probe_call,
+	.func		= function_trace_probe_call,
 };
 
 static int ftrace_probe_registered;
@@ -1823,6 +2374,45 @@
 	ftrace_set_regex(buf, len, reset, 0);
 }
 
+/*
+ * command line interface to allow users to set filters on boot up.
+ */
+#define FTRACE_FILTER_SIZE		COMMAND_LINE_SIZE
+static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
+static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
+
+static int __init set_ftrace_notrace(char *str)
+{
+	strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
+	return 1;
+}
+__setup("ftrace_notrace=", set_ftrace_notrace);
+
+static int __init set_ftrace_filter(char *str)
+{
+	strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
+	return 1;
+}
+__setup("ftrace_filter=", set_ftrace_filter);
+
+static void __init set_ftrace_early_filter(char *buf, int enable)
+{
+	char *func;
+
+	while (buf) {
+		func = strsep(&buf, ",");
+		ftrace_set_regex(func, strlen(func), 0, enable);
+	}
+}
+
+static void __init set_ftrace_early_filters(void)
+{
+	if (ftrace_filter_buf[0])
+		set_ftrace_early_filter(ftrace_filter_buf, 1);
+	if (ftrace_notrace_buf[0])
+		set_ftrace_early_filter(ftrace_notrace_buf, 0);
+}
+
 static int
 ftrace_regex_release(struct inode *inode, struct file *file, int enable)
 {
@@ -2128,38 +2718,23 @@
 
 static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
 {
-	struct dentry *entry;
 
-	entry = debugfs_create_file("available_filter_functions", 0444,
-				    d_tracer, NULL, &ftrace_avail_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'available_filter_functions' entry\n");
+	trace_create_file("available_filter_functions", 0444,
+			d_tracer, NULL, &ftrace_avail_fops);
 
-	entry = debugfs_create_file("failures", 0444,
-				    d_tracer, NULL, &ftrace_failures_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'failures' entry\n");
+	trace_create_file("failures", 0444,
+			d_tracer, NULL, &ftrace_failures_fops);
 
-	entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer,
-				    NULL, &ftrace_filter_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'set_ftrace_filter' entry\n");
+	trace_create_file("set_ftrace_filter", 0644, d_tracer,
+			NULL, &ftrace_filter_fops);
 
-	entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer,
+	trace_create_file("set_ftrace_notrace", 0644, d_tracer,
 				    NULL, &ftrace_notrace_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'set_ftrace_notrace' entry\n");
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	entry = debugfs_create_file("set_graph_function", 0444, d_tracer,
+	trace_create_file("set_graph_function", 0444, d_tracer,
 				    NULL,
 				    &ftrace_graph_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'set_graph_function' entry\n");
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 	return 0;
@@ -2197,14 +2772,72 @@
 	return 0;
 }
 
-void ftrace_init_module(struct module *mod,
-			unsigned long *start, unsigned long *end)
+#ifdef CONFIG_MODULES
+void ftrace_release(void *start, void *end)
+{
+	struct dyn_ftrace *rec;
+	struct ftrace_page *pg;
+	unsigned long s = (unsigned long)start;
+	unsigned long e = (unsigned long)end;
+
+	if (ftrace_disabled || !start || start == end)
+		return;
+
+	mutex_lock(&ftrace_lock);
+	do_for_each_ftrace_rec(pg, rec) {
+		if ((rec->ip >= s) && (rec->ip < e)) {
+			/*
+			 * rec->ip is changed in ftrace_free_rec()
+			 * It should not between s and e if record was freed.
+			 */
+			FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
+			ftrace_free_rec(rec);
+		}
+	} while_for_each_ftrace_rec();
+	mutex_unlock(&ftrace_lock);
+}
+
+static void ftrace_init_module(struct module *mod,
+			       unsigned long *start, unsigned long *end)
 {
 	if (ftrace_disabled || start == end)
 		return;
 	ftrace_convert_nops(mod, start, end);
 }
 
+static int ftrace_module_notify(struct notifier_block *self,
+				unsigned long val, void *data)
+{
+	struct module *mod = data;
+
+	switch (val) {
+	case MODULE_STATE_COMING:
+		ftrace_init_module(mod, mod->ftrace_callsites,
+				   mod->ftrace_callsites +
+				   mod->num_ftrace_callsites);
+		break;
+	case MODULE_STATE_GOING:
+		ftrace_release(mod->ftrace_callsites,
+			       mod->ftrace_callsites +
+			       mod->num_ftrace_callsites);
+		break;
+	}
+
+	return 0;
+}
+#else
+static int ftrace_module_notify(struct notifier_block *self,
+				unsigned long val, void *data)
+{
+	return 0;
+}
+#endif /* CONFIG_MODULES */
+
+struct notifier_block ftrace_module_nb = {
+	.notifier_call = ftrace_module_notify,
+	.priority = 0,
+};
+
 extern unsigned long __start_mcount_loc[];
 extern unsigned long __stop_mcount_loc[];
 
@@ -2236,6 +2869,12 @@
 				  __start_mcount_loc,
 				  __stop_mcount_loc);
 
+	ret = register_module_notifier(&ftrace_module_nb);
+	if (ret)
+		pr_warning("Failed to register trace ftrace module notifier\n");
+
+	set_ftrace_early_filters();
+
 	return;
  failed:
 	ftrace_disabled = 1;
@@ -2417,7 +3056,6 @@
 static __init int ftrace_init_debugfs(void)
 {
 	struct dentry *d_tracer;
-	struct dentry *entry;
 
 	d_tracer = tracing_init_dentry();
 	if (!d_tracer)
@@ -2425,11 +3063,11 @@
 
 	ftrace_init_dyn_debugfs(d_tracer);
 
-	entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer,
-				    NULL, &ftrace_pid_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'set_ftrace_pid' entry\n");
+	trace_create_file("set_ftrace_pid", 0644, d_tracer,
+			    NULL, &ftrace_pid_fops);
+
+	ftrace_profile_debugfs(d_tracer);
+
 	return 0;
 }
 fs_initcall(ftrace_init_debugfs);
@@ -2538,7 +3176,7 @@
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
-static atomic_t ftrace_graph_active;
+static int ftrace_graph_active;
 static struct notifier_block ftrace_suspend_notifier;
 
 int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
@@ -2580,12 +3218,12 @@
 		}
 
 		if (t->ret_stack == NULL) {
-			t->curr_ret_stack = -1;
-			/* Make sure IRQs see the -1 first: */
-			barrier();
-			t->ret_stack = ret_stack_list[start++];
 			atomic_set(&t->tracing_graph_pause, 0);
 			atomic_set(&t->trace_overrun, 0);
+			t->curr_ret_stack = -1;
+			/* Make sure the tasks see the -1 first: */
+			smp_wmb();
+			t->ret_stack = ret_stack_list[start++];
 		}
 	} while_each_thread(g, t);
 
@@ -2643,8 +3281,10 @@
 		return -ENOMEM;
 
 	/* The cpu_boot init_task->ret_stack will never be freed */
-	for_each_online_cpu(cpu)
-		ftrace_graph_init_task(idle_task(cpu));
+	for_each_online_cpu(cpu) {
+		if (!idle_task(cpu)->ret_stack)
+			ftrace_graph_init_task(idle_task(cpu));
+	}
 
 	do {
 		ret = alloc_retstack_tasklist(ret_stack_list);
@@ -2690,7 +3330,7 @@
 	mutex_lock(&ftrace_lock);
 
 	/* we currently allow only one tracer registered at a time */
-	if (atomic_read(&ftrace_graph_active)) {
+	if (ftrace_graph_active) {
 		ret = -EBUSY;
 		goto out;
 	}
@@ -2698,10 +3338,10 @@
 	ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
 	register_pm_notifier(&ftrace_suspend_notifier);
 
-	atomic_inc(&ftrace_graph_active);
+	ftrace_graph_active++;
 	ret = start_graph_tracing();
 	if (ret) {
-		atomic_dec(&ftrace_graph_active);
+		ftrace_graph_active--;
 		goto out;
 	}
 
@@ -2719,10 +3359,10 @@
 {
 	mutex_lock(&ftrace_lock);
 
-	if (!unlikely(atomic_read(&ftrace_graph_active)))
+	if (unlikely(!ftrace_graph_active))
 		goto out;
 
-	atomic_dec(&ftrace_graph_active);
+	ftrace_graph_active--;
 	unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
 	ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
 	ftrace_graph_entry = ftrace_graph_entry_stub;
@@ -2736,18 +3376,25 @@
 /* Allocate a return stack for newly created task */
 void ftrace_graph_init_task(struct task_struct *t)
 {
-	if (atomic_read(&ftrace_graph_active)) {
-		t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
+	/* Make sure we do not use the parent ret_stack */
+	t->ret_stack = NULL;
+
+	if (ftrace_graph_active) {
+		struct ftrace_ret_stack *ret_stack;
+
+		ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
 				* sizeof(struct ftrace_ret_stack),
 				GFP_KERNEL);
-		if (!t->ret_stack)
+		if (!ret_stack)
 			return;
 		t->curr_ret_stack = -1;
 		atomic_set(&t->tracing_graph_pause, 0);
 		atomic_set(&t->trace_overrun, 0);
 		t->ftrace_timestamp = 0;
-	} else
-		t->ret_stack = NULL;
+		/* make curr_ret_stack visable before we add the ret_stack */
+		smp_wmb();
+		t->ret_stack = ret_stack;
+	}
 }
 
 void ftrace_graph_exit_task(struct task_struct *t)
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 5011f4d..86cdf67 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -12,7 +12,7 @@
 #include <linux/dcache.h>
 #include <linux/fs.h>
 
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 
 #include "trace_output.h"
 #include "trace.h"
@@ -42,6 +42,7 @@
 				   gfp_t gfp_flags,
 				   int node)
 {
+	struct ftrace_event_call *call = &event_kmem_alloc;
 	struct trace_array *tr = kmemtrace_array;
 	struct kmemtrace_alloc_entry *entry;
 	struct ring_buffer_event *event;
@@ -62,7 +63,8 @@
 	entry->gfp_flags	= gfp_flags;
 	entry->node		= node;
 
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 
 	trace_wake_up();
 }
@@ -71,6 +73,7 @@
 				  unsigned long call_site,
 				  const void *ptr)
 {
+	struct ftrace_event_call *call = &event_kmem_free;
 	struct trace_array *tr = kmemtrace_array;
 	struct kmemtrace_free_entry *entry;
 	struct ring_buffer_event *event;
@@ -86,7 +89,8 @@
 	entry->call_site	= call_site;
 	entry->ptr		= ptr;
 
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 
 	trace_wake_up();
 }
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 960cbf4..2e642b2 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -22,6 +22,28 @@
 #include "trace.h"
 
 /*
+ * The ring buffer header is special. We must manually up keep it.
+ */
+int ring_buffer_print_entry_header(struct trace_seq *s)
+{
+	int ret;
+
+	ret = trace_seq_printf(s, "# compressed entry header\n");
+	ret = trace_seq_printf(s, "\ttype_len    :    5 bits\n");
+	ret = trace_seq_printf(s, "\ttime_delta  :   27 bits\n");
+	ret = trace_seq_printf(s, "\tarray       :   32 bits\n");
+	ret = trace_seq_printf(s, "\n");
+	ret = trace_seq_printf(s, "\tpadding     : type == %d\n",
+			       RINGBUF_TYPE_PADDING);
+	ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
+			       RINGBUF_TYPE_TIME_EXTEND);
+	ret = trace_seq_printf(s, "\tdata max type_len  == %d\n",
+			       RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
+
+	return ret;
+}
+
+/*
  * The ring buffer is made up of a list of pages. A separate list of pages is
  * allocated for each CPU. A writer may only write to a buffer that is
  * associated with the CPU it is currently executing on.  A reader may read
@@ -182,7 +204,10 @@
 
 #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
 #define RB_ALIGNMENT		4U
-#define RB_MAX_SMALL_DATA	28
+#define RB_MAX_SMALL_DATA	(RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
+
+/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
+#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
 
 enum {
 	RB_LEN_TIME_EXTEND = 8,
@@ -191,48 +216,28 @@
 
 static inline int rb_null_event(struct ring_buffer_event *event)
 {
-	return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0;
+	return event->type_len == RINGBUF_TYPE_PADDING
+			&& event->time_delta == 0;
 }
 
 static inline int rb_discarded_event(struct ring_buffer_event *event)
 {
-	return event->type == RINGBUF_TYPE_PADDING && event->time_delta;
+	return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta;
 }
 
 static void rb_event_set_padding(struct ring_buffer_event *event)
 {
-	event->type = RINGBUF_TYPE_PADDING;
+	event->type_len = RINGBUF_TYPE_PADDING;
 	event->time_delta = 0;
 }
 
-/**
- * ring_buffer_event_discard - discard an event in the ring buffer
- * @buffer: the ring buffer
- * @event: the event to discard
- *
- * Sometimes a event that is in the ring buffer needs to be ignored.
- * This function lets the user discard an event in the ring buffer
- * and then that event will not be read later.
- *
- * Note, it is up to the user to be careful with this, and protect
- * against races. If the user discards an event that has been consumed
- * it is possible that it could corrupt the ring buffer.
- */
-void ring_buffer_event_discard(struct ring_buffer_event *event)
-{
-	event->type = RINGBUF_TYPE_PADDING;
-	/* time delta must be non zero */
-	if (!event->time_delta)
-		event->time_delta = 1;
-}
-
 static unsigned
 rb_event_data_length(struct ring_buffer_event *event)
 {
 	unsigned length;
 
-	if (event->len)
-		length = event->len * RB_ALIGNMENT;
+	if (event->type_len)
+		length = event->type_len * RB_ALIGNMENT;
 	else
 		length = event->array[0];
 	return length + RB_EVNT_HDR_SIZE;
@@ -242,12 +247,12 @@
 static unsigned
 rb_event_length(struct ring_buffer_event *event)
 {
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		if (rb_null_event(event))
 			/* undefined */
 			return -1;
-		return rb_event_data_length(event);
+		return  event->array[0] + RB_EVNT_HDR_SIZE;
 
 	case RINGBUF_TYPE_TIME_EXTEND:
 		return RB_LEN_TIME_EXTEND;
@@ -271,7 +276,7 @@
 unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 {
 	unsigned length = rb_event_length(event);
-	if (event->type != RINGBUF_TYPE_DATA)
+	if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 		return length;
 	length -= RB_EVNT_HDR_SIZE;
 	if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
@@ -284,9 +289,9 @@
 static void *
 rb_event_data(struct ring_buffer_event *event)
 {
-	BUG_ON(event->type != RINGBUF_TYPE_DATA);
+	BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
 	/* If length is in len field, then array[0] has the data */
-	if (event->len)
+	if (event->type_len)
 		return (void *)&event->array[0];
 	/* Otherwise length is in array[0] and array[1] has the data */
 	return (void *)&event->array[1];
@@ -316,9 +321,10 @@
 };
 
 struct buffer_page {
+	struct list_head list;		/* list of buffer pages */
 	local_t		 write;		/* index for next write */
 	unsigned	 read;		/* index for next read */
-	struct list_head list;		/* list of free pages */
+	local_t		 entries;	/* entries on this page */
 	struct buffer_data_page *page;	/* Actual data page */
 };
 
@@ -361,6 +367,34 @@
 
 #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
 
+/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
+#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
+
+/* Max number of timestamps that can fit on a page */
+#define RB_TIMESTAMPS_PER_PAGE	(BUF_PAGE_SIZE / RB_LEN_TIME_STAMP)
+
+int ring_buffer_print_page_header(struct trace_seq *s)
+{
+	struct buffer_data_page field;
+	int ret;
+
+	ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
+			       "offset:0;\tsize:%u;\n",
+			       (unsigned int)sizeof(field.time_stamp));
+
+	ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
+			       "offset:%u;\tsize:%u;\n",
+			       (unsigned int)offsetof(typeof(field), commit),
+			       (unsigned int)sizeof(field.commit));
+
+	ret = trace_seq_printf(s, "\tfield: char data;\t"
+			       "offset:%u;\tsize:%u;\n",
+			       (unsigned int)offsetof(typeof(field), data),
+			       (unsigned int)BUF_PAGE_SIZE);
+
+	return ret;
+}
+
 /*
  * head_page == tail_page && head == tail then buffer is empty.
  */
@@ -375,8 +409,11 @@
 	struct buffer_page		*tail_page;	/* write to tail */
 	struct buffer_page		*commit_page;	/* committed pages */
 	struct buffer_page		*reader_page;
+	unsigned long			nmi_dropped;
+	unsigned long			commit_overrun;
 	unsigned long			overrun;
-	unsigned long			entries;
+	unsigned long			read;
+	local_t				entries;
 	u64				write_stamp;
 	u64				read_stamp;
 	atomic_t			record_disabled;
@@ -389,6 +426,8 @@
 	atomic_t			record_disabled;
 	cpumask_var_t			cpumask;
 
+	struct lock_class_key		*reader_lock_key;
+
 	struct mutex			mutex;
 
 	struct ring_buffer_per_cpu	**buffers;
@@ -420,13 +459,18 @@
 /* Up this if you want to test the TIME_EXTENTS and normalization */
 #define DEBUG_SHIFT 0
 
+static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu)
+{
+	/* shift to debug/test normalization and TIME_EXTENTS */
+	return buffer->clock() << DEBUG_SHIFT;
+}
+
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
 {
 	u64 time;
 
 	preempt_disable_notrace();
-	/* shift to debug/test normalization and TIME_EXTENTS */
-	time = buffer->clock() << DEBUG_SHIFT;
+	time = rb_time_stamp(buffer, cpu);
 	preempt_enable_no_resched_notrace();
 
 	return time;
@@ -523,6 +567,7 @@
 	cpu_buffer->cpu = cpu;
 	cpu_buffer->buffer = buffer;
 	spin_lock_init(&cpu_buffer->reader_lock);
+	lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
 	cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 	INIT_LIST_HEAD(&cpu_buffer->pages);
 
@@ -593,7 +638,8 @@
  * when the buffer wraps. If this flag is not set, the buffer will
  * drop data when the tail hits the head.
  */
-struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
+struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
+					struct lock_class_key *key)
 {
 	struct ring_buffer *buffer;
 	int bsize;
@@ -616,6 +662,7 @@
 	buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
 	buffer->flags = flags;
 	buffer->clock = trace_clock_local;
+	buffer->reader_lock_key = key;
 
 	/* need at least two pages */
 	if (buffer->pages == 1)
@@ -673,7 +720,7 @@
 	kfree(buffer);
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(ring_buffer_alloc);
+EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
 
 /**
  * ring_buffer_free - free a ring buffer.
@@ -947,31 +994,6 @@
 	return rb_page_commit(cpu_buffer->head_page);
 }
 
-/*
- * When the tail hits the head and the buffer is in overwrite mode,
- * the head jumps to the next page and all content on the previous
- * page is discarded. But before doing so, we update the overrun
- * variable of the buffer.
- */
-static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
-{
-	struct ring_buffer_event *event;
-	unsigned long head;
-
-	for (head = 0; head < rb_head_size(cpu_buffer);
-	     head += rb_event_length(event)) {
-
-		event = __rb_page_index(cpu_buffer->head_page, head);
-		if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
-			return;
-		/* Only count data entries */
-		if (event->type != RINGBUF_TYPE_DATA)
-			continue;
-		cpu_buffer->overrun++;
-		cpu_buffer->entries--;
-	}
-}
-
 static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
 			       struct buffer_page **bpage)
 {
@@ -991,7 +1013,7 @@
 	return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
 }
 
-static int
+static inline int
 rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
 	     struct ring_buffer_event *event)
 {
@@ -1110,28 +1132,21 @@
 rb_update_event(struct ring_buffer_event *event,
 			 unsigned type, unsigned length)
 {
-	event->type = type;
+	event->type_len = type;
 
 	switch (type) {
 
 	case RINGBUF_TYPE_PADDING:
-		break;
-
 	case RINGBUF_TYPE_TIME_EXTEND:
-		event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
-		break;
-
 	case RINGBUF_TYPE_TIME_STAMP:
-		event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
 		break;
 
-	case RINGBUF_TYPE_DATA:
+	case 0:
 		length -= RB_EVNT_HDR_SIZE;
-		if (length > RB_MAX_SMALL_DATA) {
-			event->len = 0;
+		if (length > RB_MAX_SMALL_DATA)
 			event->array[0] = length;
-		} else
-			event->len = DIV_ROUND_UP(length, RB_ALIGNMENT);
+		else
+			event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
 		break;
 	default:
 		BUG();
@@ -1155,16 +1170,144 @@
 	return length;
 }
 
+
+static struct ring_buffer_event *
+rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
+	     unsigned long length, unsigned long tail,
+	     struct buffer_page *commit_page,
+	     struct buffer_page *tail_page, u64 *ts)
+{
+	struct buffer_page *next_page, *head_page, *reader_page;
+	struct ring_buffer *buffer = cpu_buffer->buffer;
+	struct ring_buffer_event *event;
+	bool lock_taken = false;
+	unsigned long flags;
+
+	next_page = tail_page;
+
+	local_irq_save(flags);
+	/*
+	 * Since the write to the buffer is still not
+	 * fully lockless, we must be careful with NMIs.
+	 * The locks in the writers are taken when a write
+	 * crosses to a new page. The locks protect against
+	 * races with the readers (this will soon be fixed
+	 * with a lockless solution).
+	 *
+	 * Because we can not protect against NMIs, and we
+	 * want to keep traces reentrant, we need to manage
+	 * what happens when we are in an NMI.
+	 *
+	 * NMIs can happen after we take the lock.
+	 * If we are in an NMI, only take the lock
+	 * if it is not already taken. Otherwise
+	 * simply fail.
+	 */
+	if (unlikely(in_nmi())) {
+		if (!__raw_spin_trylock(&cpu_buffer->lock)) {
+			cpu_buffer->nmi_dropped++;
+			goto out_reset;
+		}
+	} else
+		__raw_spin_lock(&cpu_buffer->lock);
+
+	lock_taken = true;
+
+	rb_inc_page(cpu_buffer, &next_page);
+
+	head_page = cpu_buffer->head_page;
+	reader_page = cpu_buffer->reader_page;
+
+	/* we grabbed the lock before incrementing */
+	if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
+		goto out_reset;
+
+	/*
+	 * If for some reason, we had an interrupt storm that made
+	 * it all the way around the buffer, bail, and warn
+	 * about it.
+	 */
+	if (unlikely(next_page == commit_page)) {
+		cpu_buffer->commit_overrun++;
+		goto out_reset;
+	}
+
+	if (next_page == head_page) {
+		if (!(buffer->flags & RB_FL_OVERWRITE))
+			goto out_reset;
+
+		/* tail_page has not moved yet? */
+		if (tail_page == cpu_buffer->tail_page) {
+			/* count overflows */
+			cpu_buffer->overrun +=
+				local_read(&head_page->entries);
+
+			rb_inc_page(cpu_buffer, &head_page);
+			cpu_buffer->head_page = head_page;
+			cpu_buffer->head_page->read = 0;
+		}
+	}
+
+	/*
+	 * If the tail page is still the same as what we think
+	 * it is, then it is up to us to update the tail
+	 * pointer.
+	 */
+	if (tail_page == cpu_buffer->tail_page) {
+		local_set(&next_page->write, 0);
+		local_set(&next_page->entries, 0);
+		local_set(&next_page->page->commit, 0);
+		cpu_buffer->tail_page = next_page;
+
+		/* reread the time stamp */
+		*ts = rb_time_stamp(buffer, cpu_buffer->cpu);
+		cpu_buffer->tail_page->page->time_stamp = *ts;
+	}
+
+	/*
+	 * The actual tail page has moved forward.
+	 */
+	if (tail < BUF_PAGE_SIZE) {
+		/* Mark the rest of the page with padding */
+		event = __rb_page_index(tail_page, tail);
+		rb_event_set_padding(event);
+	}
+
+	/* Set the write back to the previous setting */
+	local_sub(length, &tail_page->write);
+
+	/*
+	 * If this was a commit entry that failed,
+	 * increment that too
+	 */
+	if (tail_page == cpu_buffer->commit_page &&
+	    tail == rb_commit_index(cpu_buffer)) {
+		rb_set_commit_to_write(cpu_buffer);
+	}
+
+	__raw_spin_unlock(&cpu_buffer->lock);
+	local_irq_restore(flags);
+
+	/* fail and let the caller try again */
+	return ERR_PTR(-EAGAIN);
+
+ out_reset:
+	/* reset write */
+	local_sub(length, &tail_page->write);
+
+	if (likely(lock_taken))
+		__raw_spin_unlock(&cpu_buffer->lock);
+	local_irq_restore(flags);
+	return NULL;
+}
+
 static struct ring_buffer_event *
 __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		  unsigned type, unsigned long length, u64 *ts)
 {
-	struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
-	unsigned long tail, write;
-	struct ring_buffer *buffer = cpu_buffer->buffer;
+	struct buffer_page *tail_page, *commit_page;
 	struct ring_buffer_event *event;
-	unsigned long flags;
-	bool lock_taken = false;
+	unsigned long tail, write;
 
 	commit_page = cpu_buffer->commit_page;
 	/* we just need to protect against interrupts */
@@ -1174,112 +1317,9 @@
 	tail = write - length;
 
 	/* See if we shot pass the end of this buffer page */
-	if (write > BUF_PAGE_SIZE) {
-		struct buffer_page *next_page = tail_page;
-
-		local_irq_save(flags);
-		/*
-		 * Since the write to the buffer is still not
-		 * fully lockless, we must be careful with NMIs.
-		 * The locks in the writers are taken when a write
-		 * crosses to a new page. The locks protect against
-		 * races with the readers (this will soon be fixed
-		 * with a lockless solution).
-		 *
-		 * Because we can not protect against NMIs, and we
-		 * want to keep traces reentrant, we need to manage
-		 * what happens when we are in an NMI.
-		 *
-		 * NMIs can happen after we take the lock.
-		 * If we are in an NMI, only take the lock
-		 * if it is not already taken. Otherwise
-		 * simply fail.
-		 */
-		if (unlikely(in_nmi())) {
-			if (!__raw_spin_trylock(&cpu_buffer->lock))
-				goto out_reset;
-		} else
-			__raw_spin_lock(&cpu_buffer->lock);
-
-		lock_taken = true;
-
-		rb_inc_page(cpu_buffer, &next_page);
-
-		head_page = cpu_buffer->head_page;
-		reader_page = cpu_buffer->reader_page;
-
-		/* we grabbed the lock before incrementing */
-		if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
-			goto out_reset;
-
-		/*
-		 * If for some reason, we had an interrupt storm that made
-		 * it all the way around the buffer, bail, and warn
-		 * about it.
-		 */
-		if (unlikely(next_page == commit_page)) {
-			WARN_ON_ONCE(1);
-			goto out_reset;
-		}
-
-		if (next_page == head_page) {
-			if (!(buffer->flags & RB_FL_OVERWRITE))
-				goto out_reset;
-
-			/* tail_page has not moved yet? */
-			if (tail_page == cpu_buffer->tail_page) {
-				/* count overflows */
-				rb_update_overflow(cpu_buffer);
-
-				rb_inc_page(cpu_buffer, &head_page);
-				cpu_buffer->head_page = head_page;
-				cpu_buffer->head_page->read = 0;
-			}
-		}
-
-		/*
-		 * If the tail page is still the same as what we think
-		 * it is, then it is up to us to update the tail
-		 * pointer.
-		 */
-		if (tail_page == cpu_buffer->tail_page) {
-			local_set(&next_page->write, 0);
-			local_set(&next_page->page->commit, 0);
-			cpu_buffer->tail_page = next_page;
-
-			/* reread the time stamp */
-			*ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu);
-			cpu_buffer->tail_page->page->time_stamp = *ts;
-		}
-
-		/*
-		 * The actual tail page has moved forward.
-		 */
-		if (tail < BUF_PAGE_SIZE) {
-			/* Mark the rest of the page with padding */
-			event = __rb_page_index(tail_page, tail);
-			rb_event_set_padding(event);
-		}
-
-		if (tail <= BUF_PAGE_SIZE)
-			/* Set the write back to the previous setting */
-			local_set(&tail_page->write, tail);
-
-		/*
-		 * If this was a commit entry that failed,
-		 * increment that too
-		 */
-		if (tail_page == cpu_buffer->commit_page &&
-		    tail == rb_commit_index(cpu_buffer)) {
-			rb_set_commit_to_write(cpu_buffer);
-		}
-
-		__raw_spin_unlock(&cpu_buffer->lock);
-		local_irq_restore(flags);
-
-		/* fail and let the caller try again */
-		return ERR_PTR(-EAGAIN);
-	}
+	if (write > BUF_PAGE_SIZE)
+		return rb_move_tail(cpu_buffer, length, tail,
+				    commit_page, tail_page, ts);
 
 	/* We reserved something on the buffer */
 
@@ -1289,6 +1329,10 @@
 	event = __rb_page_index(tail_page, tail);
 	rb_update_event(event, type, length);
 
+	/* The passed in type is zero for DATA */
+	if (likely(!type))
+		local_inc(&tail_page->entries);
+
 	/*
 	 * If this is a commit and the tail is zero, then update
 	 * this page's time stamp.
@@ -1297,16 +1341,38 @@
 		cpu_buffer->commit_page->page->time_stamp = *ts;
 
 	return event;
+}
 
- out_reset:
-	/* reset write */
-	if (tail <= BUF_PAGE_SIZE)
-		local_set(&tail_page->write, tail);
+static inline int
+rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
+		  struct ring_buffer_event *event)
+{
+	unsigned long new_index, old_index;
+	struct buffer_page *bpage;
+	unsigned long index;
+	unsigned long addr;
 
-	if (likely(lock_taken))
-		__raw_spin_unlock(&cpu_buffer->lock);
-	local_irq_restore(flags);
-	return NULL;
+	new_index = rb_event_index(event);
+	old_index = new_index + rb_event_length(event);
+	addr = (unsigned long)event;
+	addr &= PAGE_MASK;
+
+	bpage = cpu_buffer->tail_page;
+
+	if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
+		/*
+		 * This is on the tail page. It is possible that
+		 * a write could come in and move the tail page
+		 * and write to the next page. That is fine
+		 * because we just shorten what is on this page.
+		 */
+		index = local_cmpxchg(&bpage->write, old_index, new_index);
+		if (index == old_index)
+			return 1;
+	}
+
+	/* could not discard */
+	return 0;
 }
 
 static int
@@ -1351,16 +1417,23 @@
 			event->array[0] = *delta >> TS_SHIFT;
 		} else {
 			cpu_buffer->commit_page->page->time_stamp = *ts;
-			event->time_delta = 0;
-			event->array[0] = 0;
+			/* try to discard, since we do not need this */
+			if (!rb_try_to_discard(cpu_buffer, event)) {
+				/* nope, just zero it */
+				event->time_delta = 0;
+				event->array[0] = 0;
+			}
 		}
 		cpu_buffer->write_stamp = *ts;
 		/* let the caller know this was the commit */
 		ret = 1;
 	} else {
-		/* Darn, this is just wasted space */
-		event->time_delta = 0;
-		event->array[0] = 0;
+		/* Try to discard the event */
+		if (!rb_try_to_discard(cpu_buffer, event)) {
+			/* Darn, this is just wasted space */
+			event->time_delta = 0;
+			event->array[0] = 0;
+		}
 		ret = 0;
 	}
 
@@ -1371,13 +1444,14 @@
 
 static struct ring_buffer_event *
 rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
-		      unsigned type, unsigned long length)
+		      unsigned long length)
 {
 	struct ring_buffer_event *event;
-	u64 ts, delta;
+	u64 ts, delta = 0;
 	int commit = 0;
 	int nr_loops = 0;
 
+	length = rb_calculate_event_length(length);
  again:
 	/*
 	 * We allow for interrupts to reenter here and do a trace.
@@ -1391,7 +1465,7 @@
 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
 		return NULL;
 
-	ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
+	ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
 
 	/*
 	 * Only the first commit can update the timestamp.
@@ -1401,23 +1475,24 @@
 	 * also be made. But only the entry that did the actual
 	 * commit will be something other than zero.
 	 */
-	if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
-	    rb_page_write(cpu_buffer->tail_page) ==
-	    rb_commit_index(cpu_buffer)) {
+	if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
+		   rb_page_write(cpu_buffer->tail_page) ==
+		   rb_commit_index(cpu_buffer))) {
+		u64 diff;
 
-		delta = ts - cpu_buffer->write_stamp;
+		diff = ts - cpu_buffer->write_stamp;
 
-		/* make sure this delta is calculated here */
+		/* make sure this diff is calculated here */
 		barrier();
 
 		/* Did the write stamp get updated already? */
 		if (unlikely(ts < cpu_buffer->write_stamp))
-			delta = 0;
+			goto get_event;
 
-		if (test_time_stamp(delta)) {
+		delta = diff;
+		if (unlikely(test_time_stamp(delta))) {
 
 			commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
-
 			if (commit == -EBUSY)
 				return NULL;
 
@@ -1426,12 +1501,11 @@
 
 			RB_WARN_ON(cpu_buffer, commit < 0);
 		}
-	} else
-		/* Non commits have zero deltas */
-		delta = 0;
+	}
 
-	event = __rb_reserve_next(cpu_buffer, type, length, &ts);
-	if (PTR_ERR(event) == -EAGAIN)
+ get_event:
+	event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
+	if (unlikely(PTR_ERR(event) == -EAGAIN))
 		goto again;
 
 	if (!event) {
@@ -1448,7 +1522,7 @@
 	 * If the timestamp was commited, make the commit our entry
 	 * now so that we will update it when needed.
 	 */
-	if (commit)
+	if (unlikely(commit))
 		rb_set_commit_event(cpu_buffer, event);
 	else if (!rb_is_commit(cpu_buffer, event))
 		delta = 0;
@@ -1458,6 +1532,36 @@
 	return event;
 }
 
+#define TRACE_RECURSIVE_DEPTH 16
+
+static int trace_recursive_lock(void)
+{
+	current->trace_recursion++;
+
+	if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
+		return 0;
+
+	/* Disable all tracing before we do anything else */
+	tracing_off_permanent();
+
+	printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
+		    "HC[%lu]:SC[%lu]:NMI[%lu]\n",
+		    current->trace_recursion,
+		    hardirq_count() >> HARDIRQ_SHIFT,
+		    softirq_count() >> SOFTIRQ_SHIFT,
+		    in_nmi());
+
+	WARN_ON_ONCE(1);
+	return -1;
+}
+
+static void trace_recursive_unlock(void)
+{
+	WARN_ON_ONCE(!current->trace_recursion);
+
+	current->trace_recursion--;
+}
+
 static DEFINE_PER_CPU(int, rb_need_resched);
 
 /**
@@ -1491,6 +1595,9 @@
 	/* If we are tracing schedule, we don't want to recurse */
 	resched = ftrace_preempt_disable();
 
+	if (trace_recursive_lock())
+		goto out_nocheck;
+
 	cpu = raw_smp_processor_id();
 
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -1501,11 +1608,10 @@
 	if (atomic_read(&cpu_buffer->record_disabled))
 		goto out;
 
-	length = rb_calculate_event_length(length);
-	if (length > BUF_PAGE_SIZE)
+	if (length > BUF_MAX_DATA_SIZE)
 		goto out;
 
-	event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
+	event = rb_reserve_next_event(cpu_buffer, length);
 	if (!event)
 		goto out;
 
@@ -1520,6 +1626,9 @@
 	return event;
 
  out:
+	trace_recursive_unlock();
+
+ out_nocheck:
 	ftrace_preempt_enable(resched);
 	return NULL;
 }
@@ -1528,7 +1637,7 @@
 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
 		      struct ring_buffer_event *event)
 {
-	cpu_buffer->entries++;
+	local_inc(&cpu_buffer->entries);
 
 	/* Only process further if we own the commit */
 	if (!rb_is_commit(cpu_buffer, event))
@@ -1558,6 +1667,8 @@
 
 	rb_commit(cpu_buffer, event);
 
+	trace_recursive_unlock();
+
 	/*
 	 * Only the last preempt count needs to restore preemption.
 	 */
@@ -1570,6 +1681,99 @@
 }
 EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
 
+static inline void rb_event_discard(struct ring_buffer_event *event)
+{
+	/* array[0] holds the actual length for the discarded event */
+	event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
+	event->type_len = RINGBUF_TYPE_PADDING;
+	/* time delta must be non zero */
+	if (!event->time_delta)
+		event->time_delta = 1;
+}
+
+/**
+ * ring_buffer_event_discard - discard any event in the ring buffer
+ * @event: the event to discard
+ *
+ * Sometimes a event that is in the ring buffer needs to be ignored.
+ * This function lets the user discard an event in the ring buffer
+ * and then that event will not be read later.
+ *
+ * Note, it is up to the user to be careful with this, and protect
+ * against races. If the user discards an event that has been consumed
+ * it is possible that it could corrupt the ring buffer.
+ */
+void ring_buffer_event_discard(struct ring_buffer_event *event)
+{
+	rb_event_discard(event);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_event_discard);
+
+/**
+ * ring_buffer_commit_discard - discard an event that has not been committed
+ * @buffer: the ring buffer
+ * @event: non committed event to discard
+ *
+ * This is similar to ring_buffer_event_discard but must only be
+ * performed on an event that has not been committed yet. The difference
+ * is that this will also try to free the event from the ring buffer
+ * if another event has not been added behind it.
+ *
+ * If another event has been added behind it, it will set the event
+ * up as discarded, and perform the commit.
+ *
+ * If this function is called, do not call ring_buffer_unlock_commit on
+ * the event.
+ */
+void ring_buffer_discard_commit(struct ring_buffer *buffer,
+				struct ring_buffer_event *event)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu;
+
+	/* The event is discarded regardless */
+	rb_event_discard(event);
+
+	/*
+	 * This must only be called if the event has not been
+	 * committed yet. Thus we can assume that preemption
+	 * is still disabled.
+	 */
+	RB_WARN_ON(buffer, preemptible());
+
+	cpu = smp_processor_id();
+	cpu_buffer = buffer->buffers[cpu];
+
+	if (!rb_try_to_discard(cpu_buffer, event))
+		goto out;
+
+	/*
+	 * The commit is still visible by the reader, so we
+	 * must increment entries.
+	 */
+	local_inc(&cpu_buffer->entries);
+ out:
+	/*
+	 * If a write came in and pushed the tail page
+	 * we still need to update the commit pointer
+	 * if we were the commit.
+	 */
+	if (rb_is_commit(cpu_buffer, event))
+		rb_set_commit_to_write(cpu_buffer);
+
+	trace_recursive_unlock();
+
+	/*
+	 * Only the last preempt count needs to restore preemption.
+	 */
+	if (preempt_count() == 1)
+		ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
+	else
+		preempt_enable_no_resched_notrace();
+
+}
+EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
+
 /**
  * ring_buffer_write - write data to the buffer without reserving
  * @buffer: The ring buffer to write to.
@@ -1589,7 +1793,6 @@
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
-	unsigned long event_length;
 	void *body;
 	int ret = -EBUSY;
 	int cpu, resched;
@@ -1612,9 +1815,10 @@
 	if (atomic_read(&cpu_buffer->record_disabled))
 		goto out;
 
-	event_length = rb_calculate_event_length(length);
-	event = rb_reserve_next_event(cpu_buffer,
-				      RINGBUF_TYPE_DATA, event_length);
+	if (length > BUF_MAX_DATA_SIZE)
+		goto out;
+
+	event = rb_reserve_next_event(cpu_buffer, length);
 	if (!event)
 		goto out;
 
@@ -1728,7 +1932,8 @@
 		return 0;
 
 	cpu_buffer = buffer->buffers[cpu];
-	ret = cpu_buffer->entries;
+	ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun)
+		- cpu_buffer->read;
 
 	return ret;
 }
@@ -1755,6 +1960,47 @@
 EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
 
 /**
+ * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long ret;
+
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	ret = cpu_buffer->nmi_dropped;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
+
+/**
+ * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long
+ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long ret;
+
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	ret = cpu_buffer->commit_overrun;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
+
+/**
  * ring_buffer_entries - get the number of entries in a buffer
  * @buffer: The ring buffer
  *
@@ -1770,7 +2016,8 @@
 	/* if you care about this being correct, lock the buffer */
 	for_each_buffer_cpu(buffer, cpu) {
 		cpu_buffer = buffer->buffers[cpu];
-		entries += cpu_buffer->entries;
+		entries += (local_read(&cpu_buffer->entries) -
+			    cpu_buffer->overrun) - cpu_buffer->read;
 	}
 
 	return entries;
@@ -1862,7 +2109,7 @@
 {
 	u64 delta;
 
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		return;
 
@@ -1893,7 +2140,7 @@
 {
 	u64 delta;
 
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		return;
 
@@ -1966,6 +2213,7 @@
 	cpu_buffer->reader_page->list.prev = reader->list.prev;
 
 	local_set(&cpu_buffer->reader_page->write, 0);
+	local_set(&cpu_buffer->reader_page->entries, 0);
 	local_set(&cpu_buffer->reader_page->page->commit, 0);
 
 	/* Make the reader page now replace the head */
@@ -2008,8 +2256,9 @@
 
 	event = rb_reader_event(cpu_buffer);
 
-	if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event))
-		cpu_buffer->entries--;
+	if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX
+			|| rb_discarded_event(event))
+		cpu_buffer->read++;
 
 	rb_update_read_stamp(cpu_buffer, event);
 
@@ -2031,8 +2280,8 @@
 	 * Check if we are at the end of the buffer.
 	 */
 	if (iter->head >= rb_page_size(iter->head_page)) {
-		if (RB_WARN_ON(buffer,
-			       iter->head_page == cpu_buffer->commit_page))
+		/* discarded commits can make the page empty */
+		if (iter->head_page == cpu_buffer->commit_page)
 			return;
 		rb_inc_iter(iter);
 		return;
@@ -2075,12 +2324,10 @@
 	/*
 	 * We repeat when a timestamp is encountered. It is possible
 	 * to get multiple timestamps from an interrupt entering just
-	 * as one timestamp is about to be written. The max times
-	 * that this can happen is the number of nested interrupts we
-	 * can have.  Nesting 10 deep of interrupts is clearly
-	 * an anomaly.
+	 * as one timestamp is about to be written, or from discarded
+	 * commits. The most that we can have is the number on a single page.
 	 */
-	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
+	if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
 		return NULL;
 
 	reader = rb_get_reader_page(cpu_buffer);
@@ -2089,7 +2336,7 @@
 
 	event = rb_reader_event(cpu_buffer);
 
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		if (rb_null_event(event))
 			RB_WARN_ON(cpu_buffer, 1);
@@ -2146,14 +2393,14 @@
 
  again:
 	/*
-	 * We repeat when a timestamp is encountered. It is possible
-	 * to get multiple timestamps from an interrupt entering just
-	 * as one timestamp is about to be written. The max times
-	 * that this can happen is the number of nested interrupts we
-	 * can have. Nesting 10 deep of interrupts is clearly
-	 * an anomaly.
+	 * We repeat when a timestamp is encountered.
+	 * We can get multiple timestamps by nested interrupts or also
+	 * if filtering is on (discarding commits). Since discarding
+	 * commits can be frequent we can get a lot of timestamps.
+	 * But we limit them by not adding timestamps if they begin
+	 * at the start of a page.
 	 */
-	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
+	if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
 		return NULL;
 
 	if (rb_per_cpu_empty(cpu_buffer))
@@ -2161,7 +2408,7 @@
 
 	event = rb_iter_head_event(iter);
 
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		if (rb_null_event(event)) {
 			rb_inc_iter(iter);
@@ -2220,7 +2467,7 @@
 	event = rb_buffer_peek(buffer, cpu, ts);
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
-	if (event && event->type == RINGBUF_TYPE_PADDING) {
+	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
 		goto again;
 	}
@@ -2248,7 +2495,7 @@
 	event = rb_iter_peek(iter, ts);
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
-	if (event && event->type == RINGBUF_TYPE_PADDING) {
+	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
 		goto again;
 	}
@@ -2293,7 +2540,7 @@
  out:
 	preempt_enable();
 
-	if (event && event->type == RINGBUF_TYPE_PADDING) {
+	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
 		goto again;
 	}
@@ -2386,7 +2633,7 @@
  out:
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
-	if (event && event->type == RINGBUF_TYPE_PADDING) {
+	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
 		goto again;
 	}
@@ -2411,6 +2658,7 @@
 	cpu_buffer->head_page
 		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
 	local_set(&cpu_buffer->head_page->write, 0);
+	local_set(&cpu_buffer->head_page->entries, 0);
 	local_set(&cpu_buffer->head_page->page->commit, 0);
 
 	cpu_buffer->head_page->read = 0;
@@ -2420,11 +2668,15 @@
 
 	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
 	local_set(&cpu_buffer->reader_page->write, 0);
+	local_set(&cpu_buffer->reader_page->entries, 0);
 	local_set(&cpu_buffer->reader_page->page->commit, 0);
 	cpu_buffer->reader_page->read = 0;
 
+	cpu_buffer->nmi_dropped = 0;
+	cpu_buffer->commit_overrun = 0;
 	cpu_buffer->overrun = 0;
-	cpu_buffer->entries = 0;
+	cpu_buffer->read = 0;
+	local_set(&cpu_buffer->entries, 0);
 
 	cpu_buffer->write_stamp = 0;
 	cpu_buffer->read_stamp = 0;
@@ -2443,6 +2695,8 @@
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return;
 
+	atomic_inc(&cpu_buffer->record_disabled);
+
 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 
 	__raw_spin_lock(&cpu_buffer->lock);
@@ -2452,6 +2706,8 @@
 	__raw_spin_unlock(&cpu_buffer->lock);
 
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+	atomic_dec(&cpu_buffer->record_disabled);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
 
@@ -2578,28 +2834,6 @@
 }
 EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
 
-static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
-			      struct buffer_data_page *bpage,
-			      unsigned int offset)
-{
-	struct ring_buffer_event *event;
-	unsigned long head;
-
-	__raw_spin_lock(&cpu_buffer->lock);
-	for (head = offset; head < local_read(&bpage->commit);
-	     head += rb_event_length(event)) {
-
-		event = __rb_data_page_index(bpage, head);
-		if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
-			return;
-		/* Only count data entries */
-		if (event->type != RINGBUF_TYPE_DATA)
-			continue;
-		cpu_buffer->entries--;
-	}
-	__raw_spin_unlock(&cpu_buffer->lock);
-}
-
 /**
  * ring_buffer_alloc_read_page - allocate a page to read from buffer
  * @buffer: the buffer to allocate for.
@@ -2630,6 +2864,7 @@
 
 	return bpage;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
 
 /**
  * ring_buffer_free_read_page - free an allocated read page
@@ -2642,6 +2877,7 @@
 {
 	free_page((unsigned long)data);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
 
 /**
  * ring_buffer_read_page - extract a page from the ring buffer
@@ -2768,16 +3004,17 @@
 		/* we copied everything to the beginning */
 		read = 0;
 	} else {
+		/* update the entry counter */
+		cpu_buffer->read += local_read(&reader->entries);
+
 		/* swap the pages */
 		rb_init_page(bpage);
 		bpage = reader->page;
 		reader->page = *data_page;
 		local_set(&reader->write, 0);
+		local_set(&reader->entries, 0);
 		reader->read = 0;
 		*data_page = bpage;
-
-		/* update the entry counter */
-		rb_remove_entries(cpu_buffer, bpage, read);
 	}
 	ret = read;
 
@@ -2787,6 +3024,7 @@
  out:
 	return ret;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_read_page);
 
 static ssize_t
 rb_simple_read(struct file *filp, char __user *ubuf,
@@ -2845,14 +3083,11 @@
 static __init int rb_init_debugfs(void)
 {
 	struct dentry *d_tracer;
-	struct dentry *entry;
 
 	d_tracer = tracing_init_dentry();
 
-	entry = debugfs_create_file("tracing_on", 0644, d_tracer,
-				    &ring_buffer_flags, &rb_simple_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'tracing_on' entry\n");
+	trace_create_file("tracing_on", 0644, d_tracer,
+			    &ring_buffer_flags, &rb_simple_fops);
 
 	return 0;
 }
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
new file mode 100644
index 0000000..8d68e14
--- /dev/null
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -0,0 +1,416 @@
+/*
+ * ring buffer tester and benchmark
+ *
+ * Copyright (C) 2009 Steven Rostedt <srostedt@redhat.com>
+ */
+#include <linux/ring_buffer.h>
+#include <linux/completion.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/time.h>
+
+struct rb_page {
+	u64		ts;
+	local_t		commit;
+	char		data[4080];
+};
+
+/* run time and sleep time in seconds */
+#define RUN_TIME	10
+#define SLEEP_TIME	10
+
+/* number of events for writer to wake up the reader */
+static int wakeup_interval = 100;
+
+static int reader_finish;
+static struct completion read_start;
+static struct completion read_done;
+
+static struct ring_buffer *buffer;
+static struct task_struct *producer;
+static struct task_struct *consumer;
+static unsigned long read;
+
+static int disable_reader;
+module_param(disable_reader, uint, 0644);
+MODULE_PARM_DESC(disable_reader, "only run producer");
+
+static int read_events;
+
+static int kill_test;
+
+#define KILL_TEST()				\
+	do {					\
+		if (!kill_test) {		\
+			kill_test = 1;		\
+			WARN_ON(1);		\
+		}				\
+	} while (0)
+
+enum event_status {
+	EVENT_FOUND,
+	EVENT_DROPPED,
+};
+
+static enum event_status read_event(int cpu)
+{
+	struct ring_buffer_event *event;
+	int *entry;
+	u64 ts;
+
+	event = ring_buffer_consume(buffer, cpu, &ts);
+	if (!event)
+		return EVENT_DROPPED;
+
+	entry = ring_buffer_event_data(event);
+	if (*entry != cpu) {
+		KILL_TEST();
+		return EVENT_DROPPED;
+	}
+
+	read++;
+	return EVENT_FOUND;
+}
+
+static enum event_status read_page(int cpu)
+{
+	struct ring_buffer_event *event;
+	struct rb_page *rpage;
+	unsigned long commit;
+	void *bpage;
+	int *entry;
+	int ret;
+	int inc;
+	int i;
+
+	bpage = ring_buffer_alloc_read_page(buffer);
+	if (!bpage)
+		return EVENT_DROPPED;
+
+	ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1);
+	if (ret >= 0) {
+		rpage = bpage;
+		commit = local_read(&rpage->commit);
+		for (i = 0; i < commit && !kill_test; i += inc) {
+
+			if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) {
+				KILL_TEST();
+				break;
+			}
+
+			inc = -1;
+			event = (void *)&rpage->data[i];
+			switch (event->type_len) {
+			case RINGBUF_TYPE_PADDING:
+				/* We don't expect any padding */
+				KILL_TEST();
+				break;
+			case RINGBUF_TYPE_TIME_EXTEND:
+				inc = 8;
+				break;
+			case 0:
+				entry = ring_buffer_event_data(event);
+				if (*entry != cpu) {
+					KILL_TEST();
+					break;
+				}
+				read++;
+				if (!event->array[0]) {
+					KILL_TEST();
+					break;
+				}
+				inc = event->array[0];
+				break;
+			default:
+				entry = ring_buffer_event_data(event);
+				if (*entry != cpu) {
+					KILL_TEST();
+					break;
+				}
+				read++;
+				inc = ((event->type_len + 1) * 4);
+			}
+			if (kill_test)
+				break;
+
+			if (inc <= 0) {
+				KILL_TEST();
+				break;
+			}
+		}
+	}
+	ring_buffer_free_read_page(buffer, bpage);
+
+	if (ret < 0)
+		return EVENT_DROPPED;
+	return EVENT_FOUND;
+}
+
+static void ring_buffer_consumer(void)
+{
+	/* toggle between reading pages and events */
+	read_events ^= 1;
+
+	read = 0;
+	while (!reader_finish && !kill_test) {
+		int found;
+
+		do {
+			int cpu;
+
+			found = 0;
+			for_each_online_cpu(cpu) {
+				enum event_status stat;
+
+				if (read_events)
+					stat = read_event(cpu);
+				else
+					stat = read_page(cpu);
+
+				if (kill_test)
+					break;
+				if (stat == EVENT_FOUND)
+					found = 1;
+			}
+		} while (found && !kill_test);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (reader_finish)
+			break;
+
+		schedule();
+		__set_current_state(TASK_RUNNING);
+	}
+	reader_finish = 0;
+	complete(&read_done);
+}
+
+static void ring_buffer_producer(void)
+{
+	struct timeval start_tv;
+	struct timeval end_tv;
+	unsigned long long time;
+	unsigned long long entries;
+	unsigned long long overruns;
+	unsigned long missed = 0;
+	unsigned long hit = 0;
+	unsigned long avg;
+	int cnt = 0;
+
+	/*
+	 * Hammer the buffer for 10 secs (this may
+	 * make the system stall)
+	 */
+	pr_info("Starting ring buffer hammer\n");
+	do_gettimeofday(&start_tv);
+	do {
+		struct ring_buffer_event *event;
+		int *entry;
+
+		event = ring_buffer_lock_reserve(buffer, 10);
+		if (!event) {
+			missed++;
+		} else {
+			hit++;
+			entry = ring_buffer_event_data(event);
+			*entry = smp_processor_id();
+			ring_buffer_unlock_commit(buffer, event);
+		}
+		do_gettimeofday(&end_tv);
+
+		cnt++;
+		if (consumer && !(cnt % wakeup_interval))
+			wake_up_process(consumer);
+
+#ifndef CONFIG_PREEMPT
+		/*
+		 * If we are a non preempt kernel, the 10 second run will
+		 * stop everything while it runs. Instead, we will call
+		 * cond_resched and also add any time that was lost by a
+		 * rescedule.
+		 *
+		 * Do a cond resched at the same frequency we would wake up
+		 * the reader.
+		 */
+		if (cnt % wakeup_interval)
+			cond_resched();
+#endif
+
+	} while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
+	pr_info("End ring buffer hammer\n");
+
+	if (consumer) {
+		/* Init both completions here to avoid races */
+		init_completion(&read_start);
+		init_completion(&read_done);
+		/* the completions must be visible before the finish var */
+		smp_wmb();
+		reader_finish = 1;
+		/* finish var visible before waking up the consumer */
+		smp_wmb();
+		wake_up_process(consumer);
+		wait_for_completion(&read_done);
+	}
+
+	time = end_tv.tv_sec - start_tv.tv_sec;
+	time *= USEC_PER_SEC;
+	time += (long long)((long)end_tv.tv_usec - (long)start_tv.tv_usec);
+
+	entries = ring_buffer_entries(buffer);
+	overruns = ring_buffer_overruns(buffer);
+
+	if (kill_test)
+		pr_info("ERROR!\n");
+	pr_info("Time:     %lld (usecs)\n", time);
+	pr_info("Overruns: %lld\n", overruns);
+	if (disable_reader)
+		pr_info("Read:     (reader disabled)\n");
+	else
+		pr_info("Read:     %ld  (by %s)\n", read,
+			read_events ? "events" : "pages");
+	pr_info("Entries:  %lld\n", entries);
+	pr_info("Total:    %lld\n", entries + overruns + read);
+	pr_info("Missed:   %ld\n", missed);
+	pr_info("Hit:      %ld\n", hit);
+
+	/* Convert time from usecs to millisecs */
+	do_div(time, USEC_PER_MSEC);
+	if (time)
+		hit /= (long)time;
+	else
+		pr_info("TIME IS ZERO??\n");
+
+	pr_info("Entries per millisec: %ld\n", hit);
+
+	if (hit) {
+		/* Calculate the average time in nanosecs */
+		avg = NSEC_PER_MSEC / hit;
+		pr_info("%ld ns per entry\n", avg);
+	}
+
+	if (missed) {
+		if (time)
+			missed /= (long)time;
+
+		pr_info("Total iterations per millisec: %ld\n", hit + missed);
+
+		/* it is possible that hit + missed will overflow and be zero */
+		if (!(hit + missed)) {
+			pr_info("hit + missed overflowed and totalled zero!\n");
+			hit--; /* make it non zero */
+		}
+
+		/* Caculate the average time in nanosecs */
+		avg = NSEC_PER_MSEC / (hit + missed);
+		pr_info("%ld ns per entry\n", avg);
+	}
+}
+
+static void wait_to_die(void)
+{
+	set_current_state(TASK_INTERRUPTIBLE);
+	while (!kthread_should_stop()) {
+		schedule();
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+	__set_current_state(TASK_RUNNING);
+}
+
+static int ring_buffer_consumer_thread(void *arg)
+{
+	while (!kthread_should_stop() && !kill_test) {
+		complete(&read_start);
+
+		ring_buffer_consumer();
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (kthread_should_stop() || kill_test)
+			break;
+
+		schedule();
+		__set_current_state(TASK_RUNNING);
+	}
+	__set_current_state(TASK_RUNNING);
+
+	if (kill_test)
+		wait_to_die();
+
+	return 0;
+}
+
+static int ring_buffer_producer_thread(void *arg)
+{
+	init_completion(&read_start);
+
+	while (!kthread_should_stop() && !kill_test) {
+		ring_buffer_reset(buffer);
+
+		if (consumer) {
+			smp_wmb();
+			wake_up_process(consumer);
+			wait_for_completion(&read_start);
+		}
+
+		ring_buffer_producer();
+
+		pr_info("Sleeping for 10 secs\n");
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(HZ * SLEEP_TIME);
+		__set_current_state(TASK_RUNNING);
+	}
+
+	if (kill_test)
+		wait_to_die();
+
+	return 0;
+}
+
+static int __init ring_buffer_benchmark_init(void)
+{
+	int ret;
+
+	/* make a one meg buffer in overwite mode */
+	buffer = ring_buffer_alloc(1000000, RB_FL_OVERWRITE);
+	if (!buffer)
+		return -ENOMEM;
+
+	if (!disable_reader) {
+		consumer = kthread_create(ring_buffer_consumer_thread,
+					  NULL, "rb_consumer");
+		ret = PTR_ERR(consumer);
+		if (IS_ERR(consumer))
+			goto out_fail;
+	}
+
+	producer = kthread_run(ring_buffer_producer_thread,
+			       NULL, "rb_producer");
+	ret = PTR_ERR(producer);
+
+	if (IS_ERR(producer))
+		goto out_kill;
+
+	return 0;
+
+ out_kill:
+	if (consumer)
+		kthread_stop(consumer);
+
+ out_fail:
+	ring_buffer_free(buffer);
+	return ret;
+}
+
+static void __exit ring_buffer_benchmark_exit(void)
+{
+	kthread_stop(producer);
+	if (consumer)
+		kthread_stop(consumer);
+	ring_buffer_free(buffer);
+}
+
+module_init(ring_buffer_benchmark_init);
+module_exit(ring_buffer_benchmark_exit);
+
+MODULE_AUTHOR("Steven Rostedt");
+MODULE_DESCRIPTION("ring_buffer_benchmark");
+MODULE_LICENSE("GPL");
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index cda81ec..8acd9b8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -171,6 +171,13 @@
 
 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
 
+int filter_current_check_discard(struct ftrace_event_call *call, void *rec,
+				 struct ring_buffer_event *event)
+{
+	return filter_check_discard(call, rec, global_trace.buffer, event);
+}
+EXPORT_SYMBOL_GPL(filter_current_check_discard);
+
 cycle_t ftrace_now(int cpu)
 {
 	u64 ts;
@@ -255,7 +262,8 @@
 
 /* trace_flags holds trace_options default values */
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
-	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME;
+	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
+	TRACE_ITER_GRAPH_TIME;
 
 /**
  * trace_wake_up - wake up tasks waiting for trace input
@@ -317,6 +325,7 @@
 	"latency-format",
 	"global-clock",
 	"sleep-time",
+	"graph-time",
 	NULL
 };
 
@@ -402,17 +411,6 @@
 	return cnt;
 }
 
-static void
-trace_print_seq(struct seq_file *m, struct trace_seq *s)
-{
-	int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
-
-	s->buffer[len] = 0;
-	seq_puts(m, s->buffer);
-
-	trace_seq_init(s);
-}
-
 /**
  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
  * @tr: tracer
@@ -641,6 +639,16 @@
 		tracing_reset(tr, cpu);
 }
 
+void tracing_reset_current(int cpu)
+{
+	tracing_reset(&global_trace, cpu);
+}
+
+void tracing_reset_current_online_cpus(void)
+{
+	tracing_reset_online_cpus(&global_trace);
+}
+
 #define SAVED_CMDLINES 128
 #define NO_CMDLINE_MAP UINT_MAX
 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
@@ -800,6 +808,7 @@
 		return;
 	}
 
+	preempt_disable();
 	__raw_spin_lock(&trace_cmdline_lock);
 	map = map_pid_to_cmdline[pid];
 	if (map != NO_CMDLINE_MAP)
@@ -808,6 +817,7 @@
 		strcpy(comm, "<...>");
 
 	__raw_spin_unlock(&trace_cmdline_lock);
+	preempt_enable();
 }
 
 void tracing_record_cmdline(struct task_struct *tsk)
@@ -840,7 +850,7 @@
 }
 
 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
-						    unsigned char type,
+						    int type,
 						    unsigned long len,
 						    unsigned long flags, int pc)
 {
@@ -883,30 +893,40 @@
 }
 
 struct ring_buffer_event *
-trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
+trace_current_buffer_lock_reserve(int type, unsigned long len,
 				  unsigned long flags, int pc)
 {
 	return trace_buffer_lock_reserve(&global_trace,
 					 type, len, flags, pc);
 }
+EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
 
 void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
 					unsigned long flags, int pc)
 {
-	return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
+	__trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
 }
+EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
 
 void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
 					unsigned long flags, int pc)
 {
-	return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
+	__trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
 }
+EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
+
+void trace_current_buffer_discard_commit(struct ring_buffer_event *event)
+{
+	ring_buffer_discard_commit(global_trace.buffer, event);
+}
+EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
 
 void
 trace_function(struct trace_array *tr,
 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
 	       int pc)
 {
+	struct ftrace_event_call *call = &event_function;
 	struct ring_buffer_event *event;
 	struct ftrace_entry *entry;
 
@@ -921,7 +941,9 @@
 	entry	= ring_buffer_event_data(event);
 	entry->ip			= ip;
 	entry->parent_ip		= parent_ip;
-	ring_buffer_unlock_commit(tr->buffer, event);
+
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 }
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -930,6 +952,7 @@
 				unsigned long flags,
 				int pc)
 {
+	struct ftrace_event_call *call = &event_funcgraph_entry;
 	struct ring_buffer_event *event;
 	struct ftrace_graph_ent_entry *entry;
 
@@ -942,7 +965,8 @@
 		return 0;
 	entry	= ring_buffer_event_data(event);
 	entry->graph_ent			= *trace;
-	ring_buffer_unlock_commit(global_trace.buffer, event);
+	if (!filter_current_check_discard(call, entry, event))
+		ring_buffer_unlock_commit(global_trace.buffer, event);
 
 	return 1;
 }
@@ -952,6 +976,7 @@
 				unsigned long flags,
 				int pc)
 {
+	struct ftrace_event_call *call = &event_funcgraph_exit;
 	struct ring_buffer_event *event;
 	struct ftrace_graph_ret_entry *entry;
 
@@ -964,7 +989,8 @@
 		return;
 	entry	= ring_buffer_event_data(event);
 	entry->ret				= *trace;
-	ring_buffer_unlock_commit(global_trace.buffer, event);
+	if (!filter_current_check_discard(call, entry, event))
+		ring_buffer_unlock_commit(global_trace.buffer, event);
 }
 #endif
 
@@ -982,6 +1008,7 @@
 				 int skip, int pc)
 {
 #ifdef CONFIG_STACKTRACE
+	struct ftrace_event_call *call = &event_kernel_stack;
 	struct ring_buffer_event *event;
 	struct stack_entry *entry;
 	struct stack_trace trace;
@@ -999,7 +1026,8 @@
 	trace.entries		= entry->caller;
 
 	save_stack_trace(&trace);
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 #endif
 }
 
@@ -1024,6 +1052,7 @@
 				   unsigned long flags, int pc)
 {
 #ifdef CONFIG_STACKTRACE
+	struct ftrace_event_call *call = &event_user_stack;
 	struct ring_buffer_event *event;
 	struct userstack_entry *entry;
 	struct stack_trace trace;
@@ -1045,7 +1074,8 @@
 	trace.entries		= entry->caller;
 
 	save_stack_trace_user(&trace);
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 #endif
 }
 
@@ -1089,6 +1119,7 @@
 			   struct task_struct *next,
 			   unsigned long flags, int pc)
 {
+	struct ftrace_event_call *call = &event_context_switch;
 	struct ring_buffer_event *event;
 	struct ctx_switch_entry *entry;
 
@@ -1104,7 +1135,9 @@
 	entry->next_prio		= next->prio;
 	entry->next_state		= next->state;
 	entry->next_cpu	= task_cpu(next);
-	trace_buffer_unlock_commit(tr, event, flags, pc);
+
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		trace_buffer_unlock_commit(tr, event, flags, pc);
 }
 
 void
@@ -1113,6 +1146,7 @@
 			   struct task_struct *curr,
 			   unsigned long flags, int pc)
 {
+	struct ftrace_event_call *call = &event_wakeup;
 	struct ring_buffer_event *event;
 	struct ctx_switch_entry *entry;
 
@@ -1129,7 +1163,8 @@
 	entry->next_state		= wakee->state;
 	entry->next_cpu			= task_cpu(wakee);
 
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 	ftrace_trace_stack(tr, flags, 6, pc);
 	ftrace_trace_userstack(tr, flags, pc);
 }
@@ -1230,11 +1265,13 @@
 		(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 	static u32 trace_buf[TRACE_BUF_SIZE];
 
+	struct ftrace_event_call *call = &event_bprint;
 	struct ring_buffer_event *event;
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
 	struct bprint_entry *entry;
 	unsigned long flags;
+	int disable;
 	int resched;
 	int cpu, len = 0, size, pc;
 
@@ -1249,7 +1286,8 @@
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 
-	if (unlikely(atomic_read(&data->disabled)))
+	disable = atomic_inc_return(&data->disabled);
+	if (unlikely(disable != 1))
 		goto out;
 
 	/* Lockdep uses trace_printk for lock tracing */
@@ -1269,13 +1307,15 @@
 	entry->fmt			= fmt;
 
 	memcpy(entry->buf, trace_buf, sizeof(u32) * len);
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 
 out_unlock:
 	__raw_spin_unlock(&trace_buf_lock);
 	local_irq_restore(flags);
 
 out:
+	atomic_dec_return(&data->disabled);
 	ftrace_preempt_enable(resched);
 	unpause_graph_tracing();
 
@@ -1288,12 +1328,14 @@
 	static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
 	static char trace_buf[TRACE_BUF_SIZE];
 
+	struct ftrace_event_call *call = &event_print;
 	struct ring_buffer_event *event;
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
 	int cpu, len = 0, size, pc;
 	struct print_entry *entry;
 	unsigned long irq_flags;
+	int disable;
 
 	if (tracing_disabled || tracing_selftest_running)
 		return 0;
@@ -1303,7 +1345,8 @@
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 
-	if (unlikely(atomic_read(&data->disabled)))
+	disable = atomic_inc_return(&data->disabled);
+	if (unlikely(disable != 1))
 		goto out;
 
 	pause_graph_tracing();
@@ -1323,13 +1366,15 @@
 
 	memcpy(&entry->buf, trace_buf, len);
 	entry->buf[len] = 0;
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 
  out_unlock:
 	__raw_spin_unlock(&trace_buf_lock);
 	raw_local_irq_restore(irq_flags);
 	unpause_graph_tracing();
  out:
+	atomic_dec_return(&data->disabled);
 	preempt_enable_notrace();
 
 	return len;
@@ -1526,12 +1571,14 @@
 		p = s_next(m, p, &l);
 	}
 
+	trace_event_read_lock();
 	return p;
 }
 
 static void s_stop(struct seq_file *m, void *p)
 {
 	atomic_dec(&trace_record_cmdline_disabled);
+	trace_event_read_unlock();
 }
 
 static void print_lat_help_header(struct seq_file *m)
@@ -1774,6 +1821,7 @@
 	return 1;
 }
 
+/*  Called with trace_event_read_lock() held. */
 static enum print_line_t print_trace_line(struct trace_iterator *iter)
 {
 	enum print_line_t ret;
@@ -2397,6 +2445,56 @@
 };
 
 static ssize_t
+tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
+				size_t cnt, loff_t *ppos)
+{
+	char *buf_comm;
+	char *file_buf;
+	char *buf;
+	int len = 0;
+	int pid;
+	int i;
+
+	file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
+	if (!file_buf)
+		return -ENOMEM;
+
+	buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
+	if (!buf_comm) {
+		kfree(file_buf);
+		return -ENOMEM;
+	}
+
+	buf = file_buf;
+
+	for (i = 0; i < SAVED_CMDLINES; i++) {
+		int r;
+
+		pid = map_cmdline_to_pid[i];
+		if (pid == -1 || pid == NO_CMDLINE_MAP)
+			continue;
+
+		trace_find_cmdline(pid, buf_comm);
+		r = sprintf(buf, "%d %s\n", pid, buf_comm);
+		buf += r;
+		len += r;
+	}
+
+	len = simple_read_from_buffer(ubuf, cnt, ppos,
+				      file_buf, len);
+
+	kfree(file_buf);
+	kfree(buf_comm);
+
+	return len;
+}
+
+static const struct file_operations tracing_saved_cmdlines_fops = {
+    .open       = tracing_open_generic,
+    .read       = tracing_saved_cmdlines_read,
+};
+
+static ssize_t
 tracing_ctrl_read(struct file *filp, char __user *ubuf,
 		  size_t cnt, loff_t *ppos)
 {
@@ -2728,6 +2826,9 @@
 	/* trace pipe does not show start of buffer */
 	cpumask_setall(iter->started);
 
+	if (trace_flags & TRACE_ITER_LATENCY_FMT)
+		iter->iter_flags |= TRACE_FILE_LAT_FMT;
+
 	iter->cpu_file = cpu_file;
 	iter->tr = &global_trace;
 	mutex_init(&iter->mutex);
@@ -2915,6 +3016,7 @@
 	       offsetof(struct trace_iterator, seq));
 	iter->pos = -1;
 
+	trace_event_read_lock();
 	while (find_next_entry_inc(iter) != NULL) {
 		enum print_line_t ret;
 		int len = iter->seq.len;
@@ -2931,6 +3033,7 @@
 		if (iter->seq.len >= cnt)
 			break;
 	}
+	trace_event_read_unlock();
 
 	/* Now copy what we have to the user */
 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
@@ -3053,6 +3156,8 @@
 		goto out_err;
 	}
 
+	trace_event_read_lock();
+
 	/* Fill as many pages as possible. */
 	for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
 		pages[i] = alloc_page(GFP_KERNEL);
@@ -3075,6 +3180,7 @@
 		trace_seq_init(&iter->seq);
 	}
 
+	trace_event_read_unlock();
 	mutex_unlock(&iter->mutex);
 
 	spd.nr_pages = i;
@@ -3425,7 +3531,7 @@
 		.spd_release	= buffer_spd_release,
 	};
 	struct buffer_ref *ref;
-	int size, i;
+	int entries, size, i;
 	size_t ret;
 
 	if (*ppos & (PAGE_SIZE - 1)) {
@@ -3440,7 +3546,9 @@
 		len &= PAGE_MASK;
 	}
 
-	for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) {
+	entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
+
+	for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
 		struct page *page;
 		int r;
 
@@ -3457,7 +3565,7 @@
 		}
 
 		r = ring_buffer_read_page(ref->buffer, &ref->page,
-					  len, info->cpu, 0);
+					  len, info->cpu, 1);
 		if (r < 0) {
 			ring_buffer_free_read_page(ref->buffer,
 						   ref->page);
@@ -3481,6 +3589,8 @@
 		spd.partial[i].private = (unsigned long)ref;
 		spd.nr_pages++;
 		*ppos += PAGE_SIZE;
+
+		entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
 	}
 
 	spd.nr_pages = i;
@@ -3508,6 +3618,45 @@
 	.llseek		= no_llseek,
 };
 
+static ssize_t
+tracing_stats_read(struct file *filp, char __user *ubuf,
+		   size_t count, loff_t *ppos)
+{
+	unsigned long cpu = (unsigned long)filp->private_data;
+	struct trace_array *tr = &global_trace;
+	struct trace_seq *s;
+	unsigned long cnt;
+
+	s = kmalloc(sizeof(*s), GFP_ATOMIC);
+	if (!s)
+		return ENOMEM;
+
+	trace_seq_init(s);
+
+	cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "entries: %ld\n", cnt);
+
+	cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "overrun: %ld\n", cnt);
+
+	cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
+
+	cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "nmi dropped: %ld\n", cnt);
+
+	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
+
+	kfree(s);
+
+	return count;
+}
+
+static const struct file_operations tracing_stats_fops = {
+	.open		= tracing_open_generic,
+	.read		= tracing_stats_read,
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
@@ -3597,7 +3746,7 @@
 static void tracing_init_debugfs_percpu(long cpu)
 {
 	struct dentry *d_percpu = tracing_dentry_percpu();
-	struct dentry *entry, *d_cpu;
+	struct dentry *d_cpu;
 	/* strlen(cpu) + MAX(log10(cpu)) + '\0' */
 	char cpu_dir[7];
 
@@ -3612,21 +3761,18 @@
 	}
 
 	/* per cpu trace_pipe */
-	entry = debugfs_create_file("trace_pipe", 0444, d_cpu,
-				(void *) cpu, &tracing_pipe_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'trace_pipe' entry\n");
+	trace_create_file("trace_pipe", 0444, d_cpu,
+			(void *) cpu, &tracing_pipe_fops);
 
 	/* per cpu trace */
-	entry = debugfs_create_file("trace", 0644, d_cpu,
-				(void *) cpu, &tracing_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'trace' entry\n");
+	trace_create_file("trace", 0644, d_cpu,
+			(void *) cpu, &tracing_fops);
 
-	entry = debugfs_create_file("trace_pipe_raw", 0444, d_cpu,
-				    (void *) cpu, &tracing_buffers_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'trace_pipe_raw' entry\n");
+	trace_create_file("trace_pipe_raw", 0444, d_cpu,
+			(void *) cpu, &tracing_buffers_fops);
+
+	trace_create_file("stats", 0444, d_cpu,
+			(void *) cpu, &tracing_stats_fops);
 }
 
 #ifdef CONFIG_FTRACE_SELFTEST
@@ -3782,6 +3928,22 @@
 	.write = trace_options_core_write,
 };
 
+struct dentry *trace_create_file(const char *name,
+				 mode_t mode,
+				 struct dentry *parent,
+				 void *data,
+				 const struct file_operations *fops)
+{
+	struct dentry *ret;
+
+	ret = debugfs_create_file(name, mode, parent, data, fops);
+	if (!ret)
+		pr_warning("Could not create debugfs '%s' entry\n", name);
+
+	return ret;
+}
+
+
 static struct dentry *trace_options_init_dentry(void)
 {
 	struct dentry *d_tracer;
@@ -3809,7 +3971,6 @@
 			 struct tracer_opt *opt)
 {
 	struct dentry *t_options;
-	struct dentry *entry;
 
 	t_options = trace_options_init_dentry();
 	if (!t_options)
@@ -3818,11 +3979,9 @@
 	topt->flags = flags;
 	topt->opt = opt;
 
-	entry = debugfs_create_file(opt->name, 0644, t_options, topt,
+	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
 				    &trace_options_fops);
 
-	topt->entry = entry;
-
 }
 
 static struct trace_option_dentry *
@@ -3877,123 +4036,84 @@
 create_trace_option_core_file(const char *option, long index)
 {
 	struct dentry *t_options;
-	struct dentry *entry;
 
 	t_options = trace_options_init_dentry();
 	if (!t_options)
 		return NULL;
 
-	entry = debugfs_create_file(option, 0644, t_options, (void *)index,
+	return trace_create_file(option, 0644, t_options, (void *)index,
 				    &trace_options_core_fops);
-
-	return entry;
 }
 
 static __init void create_trace_options_dir(void)
 {
 	struct dentry *t_options;
-	struct dentry *entry;
 	int i;
 
 	t_options = trace_options_init_dentry();
 	if (!t_options)
 		return;
 
-	for (i = 0; trace_options[i]; i++) {
-		entry = create_trace_option_core_file(trace_options[i], i);
-		if (!entry)
-			pr_warning("Could not create debugfs %s entry\n",
-				   trace_options[i]);
-	}
+	for (i = 0; trace_options[i]; i++)
+		create_trace_option_core_file(trace_options[i], i);
 }
 
 static __init int tracer_init_debugfs(void)
 {
 	struct dentry *d_tracer;
-	struct dentry *entry;
 	int cpu;
 
 	d_tracer = tracing_init_dentry();
 
-	entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
-				    &global_trace, &tracing_ctrl_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
+	trace_create_file("tracing_enabled", 0644, d_tracer,
+			&global_trace, &tracing_ctrl_fops);
 
-	entry = debugfs_create_file("trace_options", 0644, d_tracer,
-				    NULL, &tracing_iter_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'trace_options' entry\n");
+	trace_create_file("trace_options", 0644, d_tracer,
+			NULL, &tracing_iter_fops);
 
-	create_trace_options_dir();
+	trace_create_file("tracing_cpumask", 0644, d_tracer,
+			NULL, &tracing_cpumask_fops);
 
-	entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
-				    NULL, &tracing_cpumask_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
+	trace_create_file("trace", 0644, d_tracer,
+			(void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
 
-	entry = debugfs_create_file("trace", 0644, d_tracer,
-				 (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'trace' entry\n");
+	trace_create_file("available_tracers", 0444, d_tracer,
+			&global_trace, &show_traces_fops);
 
-	entry = debugfs_create_file("available_tracers", 0444, d_tracer,
-				    &global_trace, &show_traces_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'available_tracers' entry\n");
+	trace_create_file("current_tracer", 0644, d_tracer,
+			&global_trace, &set_tracer_fops);
 
-	entry = debugfs_create_file("current_tracer", 0444, d_tracer,
-				    &global_trace, &set_tracer_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'current_tracer' entry\n");
+	trace_create_file("tracing_max_latency", 0644, d_tracer,
+			&tracing_max_latency, &tracing_max_lat_fops);
 
-	entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
-				    &tracing_max_latency,
-				    &tracing_max_lat_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'tracing_max_latency' entry\n");
+	trace_create_file("tracing_thresh", 0644, d_tracer,
+			&tracing_thresh, &tracing_max_lat_fops);
 
-	entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
-				    &tracing_thresh, &tracing_max_lat_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'tracing_thresh' entry\n");
-	entry = debugfs_create_file("README", 0644, d_tracer,
-				    NULL, &tracing_readme_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'README' entry\n");
+	trace_create_file("README", 0444, d_tracer,
+			NULL, &tracing_readme_fops);
 
-	entry = debugfs_create_file("trace_pipe", 0444, d_tracer,
+	trace_create_file("trace_pipe", 0444, d_tracer,
 			(void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'trace_pipe' entry\n");
 
-	entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
-				    &global_trace, &tracing_entries_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'buffer_size_kb' entry\n");
+	trace_create_file("buffer_size_kb", 0644, d_tracer,
+			&global_trace, &tracing_entries_fops);
 
-	entry = debugfs_create_file("trace_marker", 0220, d_tracer,
-				    NULL, &tracing_mark_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'trace_marker' entry\n");
+	trace_create_file("trace_marker", 0220, d_tracer,
+			NULL, &tracing_mark_fops);
+
+	trace_create_file("saved_cmdlines", 0444, d_tracer,
+			NULL, &tracing_saved_cmdlines_fops);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-	entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
-				    &ftrace_update_tot_cnt,
-				    &tracing_dyn_info_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'dyn_ftrace_total_info' entry\n");
+	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
+			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
 #endif
 #ifdef CONFIG_SYSPROF_TRACER
 	init_tracer_sysprof_debugfs(d_tracer);
 #endif
 
+	create_trace_options_dir();
+
 	for_each_tracing_cpu(cpu)
 		tracing_init_debugfs_percpu(cpu);
 
@@ -4064,7 +4184,8 @@
 
 static void __ftrace_dump(bool disable_tracing)
 {
-	static DEFINE_SPINLOCK(ftrace_dump_lock);
+	static raw_spinlock_t ftrace_dump_lock =
+		(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 	/* use static because iter can be a bit big for the stack */
 	static struct trace_iterator iter;
 	unsigned int old_userobj;
@@ -4073,7 +4194,8 @@
 	int cnt = 0, cpu;
 
 	/* only one dump */
-	spin_lock_irqsave(&ftrace_dump_lock, flags);
+	local_irq_save(flags);
+	__raw_spin_lock(&ftrace_dump_lock);
 	if (dump_ran)
 		goto out;
 
@@ -4145,7 +4267,8 @@
 	}
 
  out:
-	spin_unlock_irqrestore(&ftrace_dump_lock, flags);
+	__raw_spin_unlock(&ftrace_dump_lock);
+	local_irq_restore(flags);
 }
 
 /* By default: disable tracing after the dump */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e685ac2..6e735d4 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,9 +9,12 @@
 #include <linux/mmiotrace.h>
 #include <linux/ftrace.h>
 #include <trace/boot.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 #include <trace/power.h>
 
+#include <linux/trace_seq.h>
+#include <linux/ftrace_event.h>
+
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
 
@@ -42,20 +45,6 @@
 };
 
 /*
- * The trace entry - the most basic unit of tracing. This is what
- * is printed in the end as a single line in the trace output, such as:
- *
- *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
- */
-struct trace_entry {
-	unsigned char		type;
-	unsigned char		flags;
-	unsigned char		preempt_count;
-	int			pid;
-	int			tgid;
-};
-
-/*
  * Function trace entry - function address and parent function addres:
  */
 struct ftrace_entry {
@@ -263,8 +252,6 @@
 	char			comm[TASK_COMM_LEN];
 };
 
-struct trace_iterator;
-
 /*
  * The trace array - an array of per-CPU trace arrays. This is the
  * highest level data structure that individual tracers deal with.
@@ -339,15 +326,6 @@
 		__ftrace_bad_type();					\
 	} while (0)
 
-/* Return values for print_line callback */
-enum print_line_t {
-	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
-	TRACE_TYPE_HANDLED	= 1,
-	TRACE_TYPE_UNHANDLED	= 2,	/* Relay to other output functions */
-	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
-};
-
-
 /*
  * An option specific to a tracer. This is a boolean value.
  * The bit is the bit index that sets its value on the
@@ -423,60 +401,30 @@
 	struct tracer_stat	*stats;
 };
 
-struct trace_seq {
-	unsigned char		buffer[PAGE_SIZE];
-	unsigned int		len;
-	unsigned int		readpos;
-};
-
-static inline void
-trace_seq_init(struct trace_seq *s)
-{
-	s->len = 0;
-	s->readpos = 0;
-}
-
 
 #define TRACE_PIPE_ALL_CPU	-1
 
-/*
- * Trace iterator - used by printout routines who present trace
- * results to users and which routines might sleep, etc:
- */
-struct trace_iterator {
-	struct trace_array	*tr;
-	struct tracer		*trace;
-	void			*private;
-	int			cpu_file;
-	struct mutex		mutex;
-	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
-
-	/* The below is zeroed out in pipe_read */
-	struct trace_seq	seq;
-	struct trace_entry	*ent;
-	int			cpu;
-	u64			ts;
-
-	unsigned long		iter_flags;
-	loff_t			pos;
-	long			idx;
-
-	cpumask_var_t		started;
-};
-
 int tracer_init(struct tracer *t, struct trace_array *tr);
 int tracing_is_enabled(void);
 void trace_wake_up(void);
 void tracing_reset(struct trace_array *tr, int cpu);
 void tracing_reset_online_cpus(struct trace_array *tr);
+void tracing_reset_current(int cpu);
+void tracing_reset_current_online_cpus(void);
 int tracing_open_generic(struct inode *inode, struct file *filp);
+struct dentry *trace_create_file(const char *name,
+				 mode_t mode,
+				 struct dentry *parent,
+				 void *data,
+				 const struct file_operations *fops);
+
 struct dentry *tracing_init_dentry(void);
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 
 struct ring_buffer_event;
 
 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
-						    unsigned char type,
+						    int type,
 						    unsigned long len,
 						    unsigned long flags,
 						    int pc);
@@ -484,14 +432,6 @@
 				struct ring_buffer_event *event,
 				unsigned long flags, int pc);
 
-struct ring_buffer_event *
-trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
-				  unsigned long flags, int pc);
-void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
-					unsigned long flags, int pc);
-void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
-					unsigned long flags, int pc);
-
 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
 						struct trace_array_cpu *data);
 
@@ -514,7 +454,6 @@
 				struct task_struct *prev,
 				struct task_struct *next,
 				unsigned long flags, int pc);
-void tracing_record_cmdline(struct task_struct *tsk);
 
 void tracing_sched_wakeup_trace(struct trace_array *tr,
 				struct task_struct *wakee,
@@ -599,6 +538,8 @@
 					       struct trace_array *tr);
 extern int trace_selftest_startup_branch(struct tracer *trace,
 					 struct trace_array *tr);
+extern int trace_selftest_startup_hw_branches(struct tracer *trace,
+					      struct trace_array *tr);
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
@@ -613,6 +554,8 @@
 /* Standard output formatting function used for function return traces */
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 extern enum print_line_t print_graph_function(struct trace_iterator *iter);
+extern enum print_line_t
+trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* TODO: make this variable */
@@ -644,7 +587,6 @@
 	return 1;
 }
 #endif /* CONFIG_DYNAMIC_FTRACE */
-
 #else /* CONFIG_FUNCTION_GRAPH_TRACER */
 static inline enum print_line_t
 print_graph_function(struct trace_iterator *iter)
@@ -692,6 +634,7 @@
 	TRACE_ITER_LATENCY_FMT		= 0x40000,
 	TRACE_ITER_GLOBAL_CLK		= 0x80000,
 	TRACE_ITER_SLEEP_TIME		= 0x100000,
+	TRACE_ITER_GRAPH_TIME		= 0x200000,
 };
 
 /*
@@ -790,103 +733,113 @@
 	char			*type;
 	int			offset;
 	int			size;
+	int			is_signed;
 };
 
-struct ftrace_event_call {
-	char			*name;
-	char			*system;
-	struct dentry		*dir;
-	int			enabled;
-	int			(*regfunc)(void);
-	void			(*unregfunc)(void);
-	int			id;
-	int			(*raw_init)(void);
-	int			(*show_format)(struct trace_seq *s);
-	int			(*define_fields)(void);
-	struct list_head	fields;
+struct event_filter {
+	int			n_preds;
 	struct filter_pred	**preds;
-
-#ifdef CONFIG_EVENT_PROFILE
-	atomic_t	profile_count;
-	int		(*profile_enable)(struct ftrace_event_call *);
-	void		(*profile_disable)(struct ftrace_event_call *);
-#endif
+	char			*filter_string;
 };
 
 struct event_subsystem {
 	struct list_head	list;
 	const char		*name;
 	struct dentry		*entry;
-	struct filter_pred	**preds;
+	void			*filter;
 };
 
-#define events_for_each(event)						\
-	for (event = __start_ftrace_events;				\
-	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
-	     event++)
-
-#define MAX_FILTER_PRED 8
-
 struct filter_pred;
 
-typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
+typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
+				 int val1, int val2);
 
 struct filter_pred {
 	filter_pred_fn_t fn;
 	u64 val;
-	char *str_val;
+	char str_val[MAX_FILTER_STR_VAL];
 	int str_len;
 	char *field_name;
 	int offset;
 	int not;
-	int or;
-	int compound;
-	int clear;
+	int op;
+	int pop_n;
 };
 
-int trace_define_field(struct ftrace_event_call *call, char *type,
-		       char *name, int offset, int size);
-extern void filter_free_pred(struct filter_pred *pred);
-extern void filter_print_preds(struct filter_pred **preds,
+extern void print_event_filter(struct ftrace_event_call *call,
 			       struct trace_seq *s);
-extern int filter_parse(char **pbuf, struct filter_pred *pred);
-extern int filter_add_pred(struct ftrace_event_call *call,
-			   struct filter_pred *pred);
-extern void filter_free_preds(struct ftrace_event_call *call);
-extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
-extern void filter_free_subsystem_preds(struct event_subsystem *system);
-extern int filter_add_subsystem_pred(struct event_subsystem *system,
-				     struct filter_pred *pred);
+extern int apply_event_filter(struct ftrace_event_call *call,
+			      char *filter_string);
+extern int apply_subsystem_event_filter(struct event_subsystem *system,
+					char *filter_string);
+extern void print_subsystem_event_filter(struct event_subsystem *system,
+					 struct trace_seq *s);
 
-void event_trace_printk(unsigned long ip, const char *fmt, ...);
-extern struct ftrace_event_call __start_ftrace_events[];
-extern struct ftrace_event_call __stop_ftrace_events[];
+static inline int
+filter_check_discard(struct ftrace_event_call *call, void *rec,
+		     struct ring_buffer *buffer,
+		     struct ring_buffer_event *event)
+{
+	if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
+		ring_buffer_discard_commit(buffer, event);
+		return 1;
+	}
 
-#define for_each_event(event)						\
-	for (event = __start_ftrace_events;				\
-	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
-	     event++)
+	return 0;
+}
+
+#define DEFINE_COMPARISON_PRED(type)					\
+static int filter_pred_##type(struct filter_pred *pred, void *event,	\
+			      int val1, int val2)			\
+{									\
+	type *addr = (type *)(event + pred->offset);			\
+	type val = (type)pred->val;					\
+	int match = 0;							\
+									\
+	switch (pred->op) {						\
+	case OP_LT:							\
+		match = (*addr < val);					\
+		break;							\
+	case OP_LE:							\
+		match = (*addr <= val);					\
+		break;							\
+	case OP_GT:							\
+		match = (*addr > val);					\
+		break;							\
+	case OP_GE:							\
+		match = (*addr >= val);					\
+		break;							\
+	default:							\
+		break;							\
+	}								\
+									\
+	return match;							\
+}
+
+#define DEFINE_EQUALITY_PRED(size)					\
+static int filter_pred_##size(struct filter_pred *pred, void *event,	\
+			      int val1, int val2)			\
+{									\
+	u##size *addr = (u##size *)(event + pred->offset);		\
+	u##size val = (u##size)pred->val;				\
+	int match;							\
+									\
+	match = (val == *addr) ^ pred->not;				\
+									\
+	return match;							\
+}
+
+extern struct mutex event_mutex;
+extern struct list_head ftrace_events;
 
 extern const char *__start___trace_bprintk_fmt[];
 extern const char *__stop___trace_bprintk_fmt[];
 
-/*
- * The double __builtin_constant_p is because gcc will give us an error
- * if we try to allocate the static variable to fmt if it is not a
- * constant. Even with the outer if statement optimizing out.
- */
-#define event_trace_printk(ip, fmt, args...)				\
-do {									\
-	__trace_printk_check_format(fmt, ##args);			\
-	tracing_record_cmdline(current);				\
-	if (__builtin_constant_p(fmt)) {				\
-		static const char *trace_printk_fmt			\
-		  __attribute__((section("__trace_printk_fmt"))) =	\
-			__builtin_constant_p(fmt) ? fmt : NULL;		\
-									\
-		__trace_bprintk(ip, trace_printk_fmt, ##args);		\
-	} else								\
-		__trace_printk(ip, fmt, ##args);			\
-} while (0)
+#undef TRACE_EVENT_FORMAT
+#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
+	extern struct ftrace_event_call event_##call;
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt)
+#include "trace_event_types.h"
 
 #endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 7a30fc4..a29ef23 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -9,6 +9,7 @@
 #include <linux/debugfs.h>
 #include <linux/ftrace.h>
 #include <linux/kallsyms.h>
+#include <linux/time.h>
 
 #include "trace.h"
 #include "trace_output.h"
@@ -67,7 +68,7 @@
 	trace_assign_type(field, entry);
 	call = &field->boot_call;
 	ts = iter->ts;
-	nsec_rem = do_div(ts, 1000000000);
+	nsec_rem = do_div(ts, NSEC_PER_SEC);
 
 	ret = trace_seq_printf(s, "[%5ld.%09ld] calling  %s @ %i\n",
 			(unsigned long)ts, nsec_rem, call->func, call->caller);
@@ -92,7 +93,7 @@
 	trace_assign_type(field, entry);
 	init_ret = &field->boot_ret;
 	ts = iter->ts;
-	nsec_rem = do_div(ts, 1000000000);
+	nsec_rem = do_div(ts, NSEC_PER_SEC);
 
 	ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
 			"returned %d after %llu msecs\n",
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 8333715..7a7a9fd 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -30,6 +30,7 @@
 static void
 probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
 {
+	struct ftrace_event_call *call = &event_branch;
 	struct trace_array *tr = branch_tracer;
 	struct ring_buffer_event *event;
 	struct trace_branch *entry;
@@ -73,7 +74,8 @@
 	entry->line = f->line;
 	entry->correct = val == expect;
 
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 
  out:
 	atomic_dec(&tr->data[cpu]->disabled);
@@ -271,7 +273,7 @@
 	return 0;
 }
 
-static void *annotated_branch_stat_start(void)
+static void *annotated_branch_stat_start(struct tracer_stat *trace)
 {
 	return __start_annotated_branch_profile;
 }
@@ -346,7 +348,7 @@
 	return 0;
 }
 
-static void *all_branch_stat_start(void)
+static void *all_branch_stat_start(struct tracer_stat *trace)
 {
 	return __start_branch_profile;
 }
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 22cba99..5b5895a 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -10,22 +10,30 @@
 int ftrace_profile_enable(int event_id)
 {
 	struct ftrace_event_call *event;
+	int ret = -EINVAL;
 
-	for_each_event(event) {
-		if (event->id == event_id)
-			return event->profile_enable(event);
+	mutex_lock(&event_mutex);
+	list_for_each_entry(event, &ftrace_events, list) {
+		if (event->id == event_id) {
+			ret = event->profile_enable(event);
+			break;
+		}
 	}
+	mutex_unlock(&event_mutex);
 
-	return -EINVAL;
+	return ret;
 }
 
 void ftrace_profile_disable(int event_id)
 {
 	struct ftrace_event_call *event;
 
-	for_each_event(event) {
-		if (event->id == event_id)
-			return event->profile_disable(event);
+	mutex_lock(&event_mutex);
+	list_for_each_entry(event, &ftrace_events, list) {
+		if (event->id == event_id) {
+			event->profile_disable(event);
+			break;
+		}
 	}
+	mutex_unlock(&event_mutex);
 }
-
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index fd78bee..5e32e37 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -57,7 +57,7 @@
 	TP_RAW_FMT("%u:%u:%u  ==+ %u:%u:%u [%03u]")
 );
 
-TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore,
+TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore,
 	TRACE_STRUCT(
 		TRACE_FIELD(unsigned long, arg1, arg1)
 		TRACE_FIELD(unsigned long, arg2, arg2)
@@ -122,8 +122,10 @@
 TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore,
 	TRACE_STRUCT(
 		TRACE_FIELD(unsigned int, line, line)
-		TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, func)
-		TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file)
+		TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func,
+				    TRACE_FUNC_SIZE+1, func)
+		TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file,
+				    TRACE_FUNC_SIZE+1, file)
 		TRACE_FIELD(char, correct, correct)
 	),
 	TP_RAW_FMT("%u:%s:%s (%u)")
@@ -139,8 +141,8 @@
 
 TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore,
 	TRACE_STRUCT(
-		TRACE_FIELD(ktime_t, state_data.stamp, stamp)
-		TRACE_FIELD(ktime_t, state_data.end, end)
+		TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1)
+		TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1)
 		TRACE_FIELD(int, state_data.type, type)
 		TRACE_FIELD(int, state_data.state, state)
 	),
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 576f4fa..aa08be6 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -8,19 +8,25 @@
  *
  */
 
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
+#include <linux/delay.h>
 
 #include "trace_output.h"
 
 #define TRACE_SYSTEM "TRACE_SYSTEM"
 
-static DEFINE_MUTEX(event_mutex);
+DEFINE_MUTEX(event_mutex);
+
+LIST_HEAD(ftrace_events);
 
 int trace_define_field(struct ftrace_event_call *call, char *type,
-		       char *name, int offset, int size)
+		       char *name, int offset, int size, int is_signed)
 {
 	struct ftrace_event_field *field;
 
@@ -38,6 +44,7 @@
 
 	field->offset = offset;
 	field->size = size;
+	field->is_signed = is_signed;
 	list_add(&field->link, &call->fields);
 
 	return 0;
@@ -51,47 +58,94 @@
 
 	return -ENOMEM;
 }
+EXPORT_SYMBOL_GPL(trace_define_field);
 
-static void ftrace_clear_events(void)
+#ifdef CONFIG_MODULES
+
+static void trace_destroy_fields(struct ftrace_event_call *call)
 {
-	struct ftrace_event_call *call = (void *)__start_ftrace_events;
+	struct ftrace_event_field *field, *next;
 
-
-	while ((unsigned long)call < (unsigned long)__stop_ftrace_events) {
-
-		if (call->enabled) {
-			call->enabled = 0;
-			call->unregfunc();
-		}
-		call++;
+	list_for_each_entry_safe(field, next, &call->fields, link) {
+		list_del(&field->link);
+		kfree(field->type);
+		kfree(field->name);
+		kfree(field);
 	}
 }
 
+#endif /* CONFIG_MODULES */
+
 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 					int enable)
 {
-
 	switch (enable) {
 	case 0:
 		if (call->enabled) {
 			call->enabled = 0;
+			tracing_stop_cmdline_record();
 			call->unregfunc();
 		}
 		break;
 	case 1:
 		if (!call->enabled) {
 			call->enabled = 1;
+			tracing_start_cmdline_record();
 			call->regfunc();
 		}
 		break;
 	}
 }
 
+static void ftrace_clear_events(void)
+{
+	struct ftrace_event_call *call;
+
+	mutex_lock(&event_mutex);
+	list_for_each_entry(call, &ftrace_events, list) {
+		ftrace_event_enable_disable(call, 0);
+	}
+	mutex_unlock(&event_mutex);
+}
+
+/*
+ * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
+ */
+static int __ftrace_set_clr_event(const char *match, const char *sub,
+				  const char *event, int set)
+{
+	struct ftrace_event_call *call;
+	int ret = -EINVAL;
+
+	mutex_lock(&event_mutex);
+	list_for_each_entry(call, &ftrace_events, list) {
+
+		if (!call->name || !call->regfunc)
+			continue;
+
+		if (match &&
+		    strcmp(match, call->name) != 0 &&
+		    strcmp(match, call->system) != 0)
+			continue;
+
+		if (sub && strcmp(sub, call->system) != 0)
+			continue;
+
+		if (event && strcmp(event, call->name) != 0)
+			continue;
+
+		ftrace_event_enable_disable(call, set);
+
+		ret = 0;
+	}
+	mutex_unlock(&event_mutex);
+
+	return ret;
+}
+
 static int ftrace_set_clr_event(char *buf, int set)
 {
-	struct ftrace_event_call *call = __start_ftrace_events;
 	char *event = NULL, *sub = NULL, *match;
-	int ret = -EINVAL;
 
 	/*
 	 * The buf format can be <subsystem>:<event-name>
@@ -117,30 +171,24 @@
 			event = NULL;
 	}
 
-	mutex_lock(&event_mutex);
-	for_each_event(call) {
+	return __ftrace_set_clr_event(match, sub, event, set);
+}
 
-		if (!call->name || !call->regfunc)
-			continue;
-
-		if (match &&
-		    strcmp(match, call->name) != 0 &&
-		    strcmp(match, call->system) != 0)
-			continue;
-
-		if (sub && strcmp(sub, call->system) != 0)
-			continue;
-
-		if (event && strcmp(event, call->name) != 0)
-			continue;
-
-		ftrace_event_enable_disable(call, set);
-
-		ret = 0;
-	}
-	mutex_unlock(&event_mutex);
-
-	return ret;
+/**
+ * trace_set_clr_event - enable or disable an event
+ * @system: system name to match (NULL for any system)
+ * @event: event name to match (NULL for all events, within system)
+ * @set: 1 to enable, 0 to disable
+ *
+ * This is a way for other parts of the kernel to enable or disable
+ * event recording.
+ *
+ * Returns 0 on success, -EINVAL if the parameters do not match any
+ * registered events.
+ */
+int trace_set_clr_event(const char *system, const char *event, int set)
+{
+	return __ftrace_set_clr_event(NULL, system, event, set);
 }
 
 /* 128 should be much more than enough */
@@ -224,15 +272,17 @@
 static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct ftrace_event_call *call = m->private;
-	struct ftrace_event_call *next = call;
+	struct list_head *list = m->private;
+	struct ftrace_event_call *call;
 
 	(*pos)++;
 
 	for (;;) {
-		if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
+		if (list == &ftrace_events)
 			return NULL;
 
+		call = list_entry(list, struct ftrace_event_call, list);
+
 		/*
 		 * The ftrace subsystem is for showing formats only.
 		 * They can not be enabled or disabled via the event files.
@@ -240,45 +290,51 @@
 		if (call->regfunc)
 			break;
 
-		call++;
-		next = call;
+		list = list->next;
 	}
 
-	m->private = ++next;
+	m->private = list->next;
 
 	return call;
 }
 
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
+	mutex_lock(&event_mutex);
+	if (*pos == 0)
+		m->private = ftrace_events.next;
 	return t_next(m, NULL, pos);
 }
 
 static void *
 s_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct ftrace_event_call *call = m->private;
-	struct ftrace_event_call *next;
+	struct list_head *list = m->private;
+	struct ftrace_event_call *call;
 
 	(*pos)++;
 
  retry:
-	if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
+	if (list == &ftrace_events)
 		return NULL;
 
+	call = list_entry(list, struct ftrace_event_call, list);
+
 	if (!call->enabled) {
-		call++;
+		list = list->next;
 		goto retry;
 	}
 
-	next = call;
-	m->private = ++next;
+	m->private = list->next;
 
 	return call;
 }
 
 static void *s_start(struct seq_file *m, loff_t *pos)
 {
+	mutex_lock(&event_mutex);
+	if (*pos == 0)
+		m->private = ftrace_events.next;
 	return s_next(m, NULL, pos);
 }
 
@@ -295,12 +351,12 @@
 
 static void t_stop(struct seq_file *m, void *p)
 {
+	mutex_unlock(&event_mutex);
 }
 
 static int
 ftrace_event_seq_open(struct inode *inode, struct file *file)
 {
-	int ret;
 	const struct seq_operations *seq_ops;
 
 	if ((file->f_mode & FMODE_WRITE) &&
@@ -308,13 +364,7 @@
 		ftrace_clear_events();
 
 	seq_ops = inode->i_private;
-	ret = seq_open(file, seq_ops);
-	if (!ret) {
-		struct seq_file *m = file->private_data;
-
-		m->private = __start_ftrace_events;
-	}
-	return ret;
+	return seq_open(file, seq_ops);
 }
 
 static ssize_t
@@ -374,8 +424,93 @@
 	return cnt;
 }
 
+static ssize_t
+system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
+		   loff_t *ppos)
+{
+	const char set_to_char[4] = { '?', '0', '1', 'X' };
+	const char *system = filp->private_data;
+	struct ftrace_event_call *call;
+	char buf[2];
+	int set = 0;
+	int ret;
+
+	mutex_lock(&event_mutex);
+	list_for_each_entry(call, &ftrace_events, list) {
+		if (!call->name || !call->regfunc)
+			continue;
+
+		if (system && strcmp(call->system, system) != 0)
+			continue;
+
+		/*
+		 * We need to find out if all the events are set
+		 * or if all events or cleared, or if we have
+		 * a mixture.
+		 */
+		set |= (1 << !!call->enabled);
+
+		/*
+		 * If we have a mixture, no need to look further.
+		 */
+		if (set == 3)
+			break;
+	}
+	mutex_unlock(&event_mutex);
+
+	buf[0] = set_to_char[set];
+	buf[1] = '\n';
+
+	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
+
+	return ret;
+}
+
+static ssize_t
+system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
+		    loff_t *ppos)
+{
+	const char *system = filp->private_data;
+	unsigned long val;
+	char buf[64];
+	ssize_t ret;
+
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	ret = tracing_update_buffers();
+	if (ret < 0)
+		return ret;
+
+	if (val != 0 && val != 1)
+		return -EINVAL;
+
+	ret = __ftrace_set_clr_event(NULL, system, NULL, val);
+	if (ret)
+		goto out;
+
+	ret = cnt;
+
+out:
+	*ppos += cnt;
+
+	return ret;
+}
+
+extern char *__bad_type_size(void);
+
 #undef FIELD
 #define FIELD(type, name)						\
+	sizeof(type) != sizeof(field.name) ? __bad_type_size() :	\
 	#type, "common_" #name, offsetof(typeof(field), name),		\
 		sizeof(field.name)
 
@@ -391,7 +526,7 @@
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\n",
-				FIELD(unsigned char, type),
+				FIELD(unsigned short, type),
 				FIELD(unsigned char, flags),
 				FIELD(unsigned char, preempt_count),
 				FIELD(int, pid),
@@ -481,7 +616,7 @@
 
 	trace_seq_init(s);
 
-	filter_print_preds(call->preds, s);
+	print_event_filter(call, s);
 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 
 	kfree(s);
@@ -494,38 +629,26 @@
 		   loff_t *ppos)
 {
 	struct ftrace_event_call *call = filp->private_data;
-	char buf[64], *pbuf = buf;
-	struct filter_pred *pred;
+	char *buf;
 	int err;
 
-	if (cnt >= sizeof(buf))
+	if (cnt >= PAGE_SIZE)
 		return -EINVAL;
 
-	if (copy_from_user(&buf, ubuf, cnt))
-		return -EFAULT;
-	buf[cnt] = '\0';
-
-	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
-	if (!pred)
+	buf = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!buf)
 		return -ENOMEM;
 
-	err = filter_parse(&pbuf, pred);
-	if (err < 0) {
-		filter_free_pred(pred);
-		return err;
+	if (copy_from_user(buf, ubuf, cnt)) {
+		free_page((unsigned long) buf);
+		return -EFAULT;
 	}
+	buf[cnt] = '\0';
 
-	if (pred->clear) {
-		filter_free_preds(call);
-		filter_free_pred(pred);
-		return cnt;
-	}
-
-	err = filter_add_pred(call, pred);
-	if (err < 0) {
-		filter_free_pred(pred);
+	err = apply_event_filter(call, buf);
+	free_page((unsigned long) buf);
+	if (err < 0)
 		return err;
-	}
 
 	*ppos += cnt;
 
@@ -549,7 +672,7 @@
 
 	trace_seq_init(s);
 
-	filter_print_preds(system->preds, s);
+	print_subsystem_event_filter(system, s);
 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 
 	kfree(s);
@@ -562,45 +685,56 @@
 		       loff_t *ppos)
 {
 	struct event_subsystem *system = filp->private_data;
-	char buf[64], *pbuf = buf;
-	struct filter_pred *pred;
+	char *buf;
 	int err;
 
-	if (cnt >= sizeof(buf))
+	if (cnt >= PAGE_SIZE)
 		return -EINVAL;
 
-	if (copy_from_user(&buf, ubuf, cnt))
-		return -EFAULT;
-	buf[cnt] = '\0';
-
-	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
-	if (!pred)
+	buf = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!buf)
 		return -ENOMEM;
 
-	err = filter_parse(&pbuf, pred);
-	if (err < 0) {
-		filter_free_pred(pred);
-		return err;
+	if (copy_from_user(buf, ubuf, cnt)) {
+		free_page((unsigned long) buf);
+		return -EFAULT;
 	}
+	buf[cnt] = '\0';
 
-	if (pred->clear) {
-		filter_free_subsystem_preds(system);
-		filter_free_pred(pred);
-		return cnt;
-	}
-
-	err = filter_add_subsystem_pred(system, pred);
-	if (err < 0) {
-		filter_free_subsystem_preds(system);
-		filter_free_pred(pred);
+	err = apply_subsystem_event_filter(system, buf);
+	free_page((unsigned long) buf);
+	if (err < 0)
 		return err;
-	}
 
 	*ppos += cnt;
 
 	return cnt;
 }
 
+static ssize_t
+show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	int (*func)(struct trace_seq *s) = filp->private_data;
+	struct trace_seq *s;
+	int r;
+
+	if (*ppos)
+		return 0;
+
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+
+	trace_seq_init(s);
+
+	func(s);
+	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
+
+	kfree(s);
+
+	return r;
+}
+
 static const struct seq_operations show_event_seq_ops = {
 	.start = t_start,
 	.next = t_next,
@@ -658,6 +792,17 @@
 	.write = subsystem_filter_write,
 };
 
+static const struct file_operations ftrace_system_enable_fops = {
+	.open = tracing_open_generic,
+	.read = system_enable_read,
+	.write = system_enable_write,
+};
+
+static const struct file_operations ftrace_show_header_fops = {
+	.open = tracing_open_generic,
+	.read = show_header,
+};
+
 static struct dentry *event_trace_events_dir(void)
 {
 	static struct dentry *d_tracer;
@@ -684,6 +829,7 @@
 event_subsystem_dir(const char *name, struct dentry *d_events)
 {
 	struct event_subsystem *system;
+	struct dentry *entry;
 
 	/* First see if we did not already create this dir */
 	list_for_each_entry(system, &event_subsystems, list) {
@@ -707,16 +853,46 @@
 		return d_events;
 	}
 
-	system->name = name;
+	system->name = kstrdup(name, GFP_KERNEL);
+	if (!system->name) {
+		debugfs_remove(system->entry);
+		kfree(system);
+		return d_events;
+	}
+
 	list_add(&system->list, &event_subsystems);
 
-	system->preds = NULL;
+	system->filter = NULL;
+
+	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
+	if (!system->filter) {
+		pr_warning("Could not allocate filter for subsystem "
+			   "'%s'\n", name);
+		return system->entry;
+	}
+
+	entry = debugfs_create_file("filter", 0644, system->entry, system,
+				    &ftrace_subsystem_filter_fops);
+	if (!entry) {
+		kfree(system->filter);
+		system->filter = NULL;
+		pr_warning("Could not create debugfs "
+			   "'%s/filter' entry\n", name);
+	}
+
+	entry = trace_create_file("enable", 0644, system->entry,
+				  (void *)system->name,
+				  &ftrace_system_enable_fops);
 
 	return system->entry;
 }
 
 static int
-event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
+event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
+		 const struct file_operations *id,
+		 const struct file_operations *enable,
+		 const struct file_operations *filter,
+		 const struct file_operations *format)
 {
 	struct dentry *entry;
 	int ret;
@@ -725,7 +901,7 @@
 	 * If the trace point header did not define TRACE_SYSTEM
 	 * then the system would be called "TRACE_SYSTEM".
 	 */
-	if (strcmp(call->system, "TRACE_SYSTEM") != 0)
+	if (strcmp(call->system, TRACE_SYSTEM) != 0)
 		d_events = event_subsystem_dir(call->system, d_events);
 
 	if (call->raw_init) {
@@ -744,21 +920,13 @@
 		return -1;
 	}
 
-	if (call->regfunc) {
-		entry = debugfs_create_file("enable", 0644, call->dir, call,
-					    &ftrace_enable_fops);
-		if (!entry)
-			pr_warning("Could not create debugfs "
-				   "'%s/enable' entry\n", call->name);
-	}
+	if (call->regfunc)
+		entry = trace_create_file("enable", 0644, call->dir, call,
+					  enable);
 
-	if (call->id) {
-		entry = debugfs_create_file("id", 0444, call->dir, call,
-				&ftrace_event_id_fops);
-		if (!entry)
-			pr_warning("Could not create debugfs '%s/id' entry\n",
-					call->name);
-	}
+	if (call->id)
+		entry = trace_create_file("id", 0444, call->dir, call,
+					  id);
 
 	if (call->define_fields) {
 		ret = call->define_fields();
@@ -767,32 +935,195 @@
 				   " events/%s\n", call->name);
 			return ret;
 		}
-		entry = debugfs_create_file("filter", 0644, call->dir, call,
-					    &ftrace_event_filter_fops);
-		if (!entry)
-			pr_warning("Could not create debugfs "
-				   "'%s/filter' entry\n", call->name);
+		entry = trace_create_file("filter", 0644, call->dir, call,
+					  filter);
 	}
 
 	/* A trace may not want to export its format */
 	if (!call->show_format)
 		return 0;
 
-	entry = debugfs_create_file("format", 0444, call->dir, call,
-				    &ftrace_event_format_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'%s/format' entry\n", call->name);
+	entry = trace_create_file("format", 0444, call->dir, call,
+				  format);
 
 	return 0;
 }
 
+#define for_each_event(event, start, end)			\
+	for (event = start;					\
+	     (unsigned long)event < (unsigned long)end;		\
+	     event++)
+
+#ifdef CONFIG_MODULES
+
+static LIST_HEAD(ftrace_module_file_list);
+
+/*
+ * Modules must own their file_operations to keep up with
+ * reference counting.
+ */
+struct ftrace_module_file_ops {
+	struct list_head		list;
+	struct module			*mod;
+	struct file_operations		id;
+	struct file_operations		enable;
+	struct file_operations		format;
+	struct file_operations		filter;
+};
+
+static struct ftrace_module_file_ops *
+trace_create_file_ops(struct module *mod)
+{
+	struct ftrace_module_file_ops *file_ops;
+
+	/*
+	 * This is a bit of a PITA. To allow for correct reference
+	 * counting, modules must "own" their file_operations.
+	 * To do this, we allocate the file operations that will be
+	 * used in the event directory.
+	 */
+
+	file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
+	if (!file_ops)
+		return NULL;
+
+	file_ops->mod = mod;
+
+	file_ops->id = ftrace_event_id_fops;
+	file_ops->id.owner = mod;
+
+	file_ops->enable = ftrace_enable_fops;
+	file_ops->enable.owner = mod;
+
+	file_ops->filter = ftrace_event_filter_fops;
+	file_ops->filter.owner = mod;
+
+	file_ops->format = ftrace_event_format_fops;
+	file_ops->format.owner = mod;
+
+	list_add(&file_ops->list, &ftrace_module_file_list);
+
+	return file_ops;
+}
+
+static void trace_module_add_events(struct module *mod)
+{
+	struct ftrace_module_file_ops *file_ops = NULL;
+	struct ftrace_event_call *call, *start, *end;
+	struct dentry *d_events;
+
+	start = mod->trace_events;
+	end = mod->trace_events + mod->num_trace_events;
+
+	if (start == end)
+		return;
+
+	d_events = event_trace_events_dir();
+	if (!d_events)
+		return;
+
+	for_each_event(call, start, end) {
+		/* The linker may leave blanks */
+		if (!call->name)
+			continue;
+
+		/*
+		 * This module has events, create file ops for this module
+		 * if not already done.
+		 */
+		if (!file_ops) {
+			file_ops = trace_create_file_ops(mod);
+			if (!file_ops)
+				return;
+		}
+		call->mod = mod;
+		list_add(&call->list, &ftrace_events);
+		event_create_dir(call, d_events,
+				 &file_ops->id, &file_ops->enable,
+				 &file_ops->filter, &file_ops->format);
+	}
+}
+
+static void trace_module_remove_events(struct module *mod)
+{
+	struct ftrace_module_file_ops *file_ops;
+	struct ftrace_event_call *call, *p;
+	bool found = false;
+
+	down_write(&trace_event_mutex);
+	list_for_each_entry_safe(call, p, &ftrace_events, list) {
+		if (call->mod == mod) {
+			found = true;
+			ftrace_event_enable_disable(call, 0);
+			if (call->event)
+				__unregister_ftrace_event(call->event);
+			debugfs_remove_recursive(call->dir);
+			list_del(&call->list);
+			trace_destroy_fields(call);
+			destroy_preds(call);
+		}
+	}
+
+	/* Now free the file_operations */
+	list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
+		if (file_ops->mod == mod)
+			break;
+	}
+	if (&file_ops->list != &ftrace_module_file_list) {
+		list_del(&file_ops->list);
+		kfree(file_ops);
+	}
+
+	/*
+	 * It is safest to reset the ring buffer if the module being unloaded
+	 * registered any events.
+	 */
+	if (found)
+		tracing_reset_current_online_cpus();
+	up_write(&trace_event_mutex);
+}
+
+static int trace_module_notify(struct notifier_block *self,
+			       unsigned long val, void *data)
+{
+	struct module *mod = data;
+
+	mutex_lock(&event_mutex);
+	switch (val) {
+	case MODULE_STATE_COMING:
+		trace_module_add_events(mod);
+		break;
+	case MODULE_STATE_GOING:
+		trace_module_remove_events(mod);
+		break;
+	}
+	mutex_unlock(&event_mutex);
+
+	return 0;
+}
+#else
+static int trace_module_notify(struct notifier_block *self,
+			       unsigned long val, void *data)
+{
+	return 0;
+}
+#endif /* CONFIG_MODULES */
+
+struct notifier_block trace_module_nb = {
+	.notifier_call = trace_module_notify,
+	.priority = 0,
+};
+
+extern struct ftrace_event_call __start_ftrace_events[];
+extern struct ftrace_event_call __stop_ftrace_events[];
+
 static __init int event_trace_init(void)
 {
-	struct ftrace_event_call *call = __start_ftrace_events;
+	struct ftrace_event_call *call;
 	struct dentry *d_tracer;
 	struct dentry *entry;
 	struct dentry *d_events;
+	int ret;
 
 	d_tracer = tracing_init_dentry();
 	if (!d_tracer)
@@ -816,13 +1147,243 @@
 	if (!d_events)
 		return 0;
 
-	for_each_event(call) {
+	/* ring buffer internal formats */
+	trace_create_file("header_page", 0444, d_events,
+			  ring_buffer_print_page_header,
+			  &ftrace_show_header_fops);
+
+	trace_create_file("header_event", 0444, d_events,
+			  ring_buffer_print_entry_header,
+			  &ftrace_show_header_fops);
+
+	trace_create_file("enable", 0644, d_events,
+			  NULL, &ftrace_system_enable_fops);
+
+	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
 		/* The linker may leave blanks */
 		if (!call->name)
 			continue;
-		event_create_dir(call, d_events);
+		list_add(&call->list, &ftrace_events);
+		event_create_dir(call, d_events, &ftrace_event_id_fops,
+				 &ftrace_enable_fops, &ftrace_event_filter_fops,
+				 &ftrace_event_format_fops);
 	}
 
+	ret = register_module_notifier(&trace_module_nb);
+	if (ret)
+		pr_warning("Failed to register trace events module notifier\n");
+
 	return 0;
 }
 fs_initcall(event_trace_init);
+
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+
+static DEFINE_SPINLOCK(test_spinlock);
+static DEFINE_SPINLOCK(test_spinlock_irq);
+static DEFINE_MUTEX(test_mutex);
+
+static __init void test_work(struct work_struct *dummy)
+{
+	spin_lock(&test_spinlock);
+	spin_lock_irq(&test_spinlock_irq);
+	udelay(1);
+	spin_unlock_irq(&test_spinlock_irq);
+	spin_unlock(&test_spinlock);
+
+	mutex_lock(&test_mutex);
+	msleep(1);
+	mutex_unlock(&test_mutex);
+}
+
+static __init int event_test_thread(void *unused)
+{
+	void *test_malloc;
+
+	test_malloc = kmalloc(1234, GFP_KERNEL);
+	if (!test_malloc)
+		pr_info("failed to kmalloc\n");
+
+	schedule_on_each_cpu(test_work);
+
+	kfree(test_malloc);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	while (!kthread_should_stop())
+		schedule();
+
+	return 0;
+}
+
+/*
+ * Do various things that may trigger events.
+ */
+static __init void event_test_stuff(void)
+{
+	struct task_struct *test_thread;
+
+	test_thread = kthread_run(event_test_thread, NULL, "test-events");
+	msleep(1);
+	kthread_stop(test_thread);
+}
+
+/*
+ * For every trace event defined, we will test each trace point separately,
+ * and then by groups, and finally all trace points.
+ */
+static __init void event_trace_self_tests(void)
+{
+	struct ftrace_event_call *call;
+	struct event_subsystem *system;
+	int ret;
+
+	pr_info("Running tests on trace events:\n");
+
+	list_for_each_entry(call, &ftrace_events, list) {
+
+		/* Only test those that have a regfunc */
+		if (!call->regfunc)
+			continue;
+
+		pr_info("Testing event %s: ", call->name);
+
+		/*
+		 * If an event is already enabled, someone is using
+		 * it and the self test should not be on.
+		 */
+		if (call->enabled) {
+			pr_warning("Enabled event during self test!\n");
+			WARN_ON_ONCE(1);
+			continue;
+		}
+
+		ftrace_event_enable_disable(call, 1);
+		event_test_stuff();
+		ftrace_event_enable_disable(call, 0);
+
+		pr_cont("OK\n");
+	}
+
+	/* Now test at the sub system level */
+
+	pr_info("Running tests on trace event systems:\n");
+
+	list_for_each_entry(system, &event_subsystems, list) {
+
+		/* the ftrace system is special, skip it */
+		if (strcmp(system->name, "ftrace") == 0)
+			continue;
+
+		pr_info("Testing event system %s: ", system->name);
+
+		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
+		if (WARN_ON_ONCE(ret)) {
+			pr_warning("error enabling system %s\n",
+				   system->name);
+			continue;
+		}
+
+		event_test_stuff();
+
+		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
+		if (WARN_ON_ONCE(ret))
+			pr_warning("error disabling system %s\n",
+				   system->name);
+
+		pr_cont("OK\n");
+	}
+
+	/* Test with all events enabled */
+
+	pr_info("Running tests on all trace events:\n");
+	pr_info("Testing all events: ");
+
+	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
+	if (WARN_ON_ONCE(ret)) {
+		pr_warning("error enabling all events\n");
+		return;
+	}
+
+	event_test_stuff();
+
+	/* reset sysname */
+	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
+	if (WARN_ON_ONCE(ret)) {
+		pr_warning("error disabling all events\n");
+		return;
+	}
+
+	pr_cont("OK\n");
+}
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+static DEFINE_PER_CPU(atomic_t, test_event_disable);
+
+static void
+function_test_events_call(unsigned long ip, unsigned long parent_ip)
+{
+	struct ring_buffer_event *event;
+	struct ftrace_entry *entry;
+	unsigned long flags;
+	long disabled;
+	int resched;
+	int cpu;
+	int pc;
+
+	pc = preempt_count();
+	resched = ftrace_preempt_disable();
+	cpu = raw_smp_processor_id();
+	disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu));
+
+	if (disabled != 1)
+		goto out;
+
+	local_save_flags(flags);
+
+	event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry),
+						  flags, pc);
+	if (!event)
+		goto out;
+	entry	= ring_buffer_event_data(event);
+	entry->ip			= ip;
+	entry->parent_ip		= parent_ip;
+
+	trace_nowake_buffer_unlock_commit(event, flags, pc);
+
+ out:
+	atomic_dec(&per_cpu(test_event_disable, cpu));
+	ftrace_preempt_enable(resched);
+}
+
+static struct ftrace_ops trace_ops __initdata  =
+{
+	.func = function_test_events_call,
+};
+
+static __init void event_trace_self_test_with_function(void)
+{
+	register_ftrace_function(&trace_ops);
+	pr_info("Running tests again, along with the function tracer\n");
+	event_trace_self_tests();
+	unregister_ftrace_function(&trace_ops);
+}
+#else
+static __init void event_trace_self_test_with_function(void)
+{
+}
+#endif
+
+static __init int event_trace_self_tests_init(void)
+{
+
+	event_trace_self_tests();
+
+	event_trace_self_test_with_function();
+
+	return 0;
+}
+
+late_initcall(event_trace_self_tests_init);
+
+#endif
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e03cbf1..db6e54b 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -22,55 +22,138 @@
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
+#include <linux/mutex.h>
 
 #include "trace.h"
 #include "trace_output.h"
 
-static int filter_pred_64(struct filter_pred *pred, void *event)
+static DEFINE_MUTEX(filter_mutex);
+
+enum filter_op_ids
 {
-	u64 *addr = (u64 *)(event + pred->offset);
-	u64 val = (u64)pred->val;
-	int match;
+	OP_OR,
+	OP_AND,
+	OP_NE,
+	OP_EQ,
+	OP_LT,
+	OP_LE,
+	OP_GT,
+	OP_GE,
+	OP_NONE,
+	OP_OPEN_PAREN,
+};
 
-	match = (val == *addr) ^ pred->not;
+struct filter_op {
+	int id;
+	char *string;
+	int precedence;
+};
 
-	return match;
+static struct filter_op filter_ops[] = {
+	{ OP_OR, "||", 1 },
+	{ OP_AND, "&&", 2 },
+	{ OP_NE, "!=", 4 },
+	{ OP_EQ, "==", 4 },
+	{ OP_LT, "<", 5 },
+	{ OP_LE, "<=", 5 },
+	{ OP_GT, ">", 5 },
+	{ OP_GE, ">=", 5 },
+	{ OP_NONE, "OP_NONE", 0 },
+	{ OP_OPEN_PAREN, "(", 0 },
+};
+
+enum {
+	FILT_ERR_NONE,
+	FILT_ERR_INVALID_OP,
+	FILT_ERR_UNBALANCED_PAREN,
+	FILT_ERR_TOO_MANY_OPERANDS,
+	FILT_ERR_OPERAND_TOO_LONG,
+	FILT_ERR_FIELD_NOT_FOUND,
+	FILT_ERR_ILLEGAL_FIELD_OP,
+	FILT_ERR_ILLEGAL_INTVAL,
+	FILT_ERR_BAD_SUBSYS_FILTER,
+	FILT_ERR_TOO_MANY_PREDS,
+	FILT_ERR_MISSING_FIELD,
+	FILT_ERR_INVALID_FILTER,
+};
+
+static char *err_text[] = {
+	"No error",
+	"Invalid operator",
+	"Unbalanced parens",
+	"Too many operands",
+	"Operand too long",
+	"Field not found",
+	"Illegal operation for field type",
+	"Illegal integer value",
+	"Couldn't find or set field in one of a subsystem's events",
+	"Too many terms in predicate expression",
+	"Missing field name and/or value",
+	"Meaningless filter expression",
+};
+
+struct opstack_op {
+	int op;
+	struct list_head list;
+};
+
+struct postfix_elt {
+	int op;
+	char *operand;
+	struct list_head list;
+};
+
+struct filter_parse_state {
+	struct filter_op *ops;
+	struct list_head opstack;
+	struct list_head postfix;
+	int lasterr;
+	int lasterr_pos;
+
+	struct {
+		char *string;
+		unsigned int cnt;
+		unsigned int tail;
+	} infix;
+
+	struct {
+		char string[MAX_FILTER_STR_VAL];
+		int pos;
+		unsigned int tail;
+	} operand;
+};
+
+DEFINE_COMPARISON_PRED(s64);
+DEFINE_COMPARISON_PRED(u64);
+DEFINE_COMPARISON_PRED(s32);
+DEFINE_COMPARISON_PRED(u32);
+DEFINE_COMPARISON_PRED(s16);
+DEFINE_COMPARISON_PRED(u16);
+DEFINE_COMPARISON_PRED(s8);
+DEFINE_COMPARISON_PRED(u8);
+
+DEFINE_EQUALITY_PRED(64);
+DEFINE_EQUALITY_PRED(32);
+DEFINE_EQUALITY_PRED(16);
+DEFINE_EQUALITY_PRED(8);
+
+static int filter_pred_and(struct filter_pred *pred __attribute((unused)),
+			   void *event __attribute((unused)),
+			   int val1, int val2)
+{
+	return val1 && val2;
 }
 
-static int filter_pred_32(struct filter_pred *pred, void *event)
+static int filter_pred_or(struct filter_pred *pred __attribute((unused)),
+			  void *event __attribute((unused)),
+			  int val1, int val2)
 {
-	u32 *addr = (u32 *)(event + pred->offset);
-	u32 val = (u32)pred->val;
-	int match;
-
-	match = (val == *addr) ^ pred->not;
-
-	return match;
+	return val1 || val2;
 }
 
-static int filter_pred_16(struct filter_pred *pred, void *event)
-{
-	u16 *addr = (u16 *)(event + pred->offset);
-	u16 val = (u16)pred->val;
-	int match;
-
-	match = (val == *addr) ^ pred->not;
-
-	return match;
-}
-
-static int filter_pred_8(struct filter_pred *pred, void *event)
-{
-	u8 *addr = (u8 *)(event + pred->offset);
-	u8 val = (u8)pred->val;
-	int match;
-
-	match = (val == *addr) ^ pred->not;
-
-	return match;
-}
-
-static int filter_pred_string(struct filter_pred *pred, void *event)
+/* Filter predicate for fixed sized arrays of characters */
+static int filter_pred_string(struct filter_pred *pred, void *event,
+			      int val1, int val2)
 {
 	char *addr = (char *)(event + pred->offset);
 	int cmp, match;
@@ -82,59 +165,154 @@
 	return match;
 }
 
+/*
+ * Filter predicate for dynamic sized arrays of characters.
+ * These are implemented through a list of strings at the end
+ * of the entry.
+ * Also each of these strings have a field in the entry which
+ * contains its offset from the beginning of the entry.
+ * We have then first to get this field, dereference it
+ * and add it to the address of the entry, and at last we have
+ * the address of the string.
+ */
+static int filter_pred_strloc(struct filter_pred *pred, void *event,
+			      int val1, int val2)
+{
+	int str_loc = *(int *)(event + pred->offset);
+	char *addr = (char *)(event + str_loc);
+	int cmp, match;
+
+	cmp = strncmp(addr, pred->str_val, pred->str_len);
+
+	match = (!cmp) ^ pred->not;
+
+	return match;
+}
+
+static int filter_pred_none(struct filter_pred *pred, void *event,
+			    int val1, int val2)
+{
+	return 0;
+}
+
 /* return 1 if event matches, 0 otherwise (discard) */
 int filter_match_preds(struct ftrace_event_call *call, void *rec)
 {
-	int i, matched, and_failed = 0;
-	struct filter_pred *pred;
-
-	for (i = 0; i < MAX_FILTER_PRED; i++) {
-		if (call->preds[i]) {
-			pred = call->preds[i];
-			if (and_failed && !pred->or)
-				continue;
-			matched = pred->fn(pred, rec);
-			if (!matched && !pred->or) {
-				and_failed = 1;
-				continue;
-			} else if (matched && pred->or)
-				return 1;
-		} else
-			break;
-	}
-
-	if (and_failed)
-		return 0;
-
-	return 1;
-}
-
-void filter_print_preds(struct filter_pred **preds, struct trace_seq *s)
-{
-	char *field_name;
+	struct event_filter *filter = call->filter;
+	int match, top = 0, val1 = 0, val2 = 0;
+	int stack[MAX_FILTER_PRED];
 	struct filter_pred *pred;
 	int i;
 
-	if (!preds) {
-		trace_seq_printf(s, "none\n");
-		return;
+	for (i = 0; i < filter->n_preds; i++) {
+		pred = filter->preds[i];
+		if (!pred->pop_n) {
+			match = pred->fn(pred, rec, val1, val2);
+			stack[top++] = match;
+			continue;
+		}
+		if (pred->pop_n > top) {
+			WARN_ON_ONCE(1);
+			return 0;
+		}
+		val1 = stack[--top];
+		val2 = stack[--top];
+		match = pred->fn(pred, rec, val1, val2);
+		stack[top++] = match;
 	}
 
-	for (i = 0; i < MAX_FILTER_PRED; i++) {
-		if (preds[i]) {
-			pred = preds[i];
-			field_name = pred->field_name;
-			if (i)
-				trace_seq_printf(s, pred->or ? "|| " : "&& ");
-			trace_seq_printf(s, "%s ", field_name);
-			trace_seq_printf(s, pred->not ? "!= " : "== ");
-			if (pred->str_val)
-				trace_seq_printf(s, "%s\n", pred->str_val);
-			else
-				trace_seq_printf(s, "%llu\n", pred->val);
-		} else
-			break;
-	}
+	return stack[--top];
+}
+EXPORT_SYMBOL_GPL(filter_match_preds);
+
+static void parse_error(struct filter_parse_state *ps, int err, int pos)
+{
+	ps->lasterr = err;
+	ps->lasterr_pos = pos;
+}
+
+static void remove_filter_string(struct event_filter *filter)
+{
+	kfree(filter->filter_string);
+	filter->filter_string = NULL;
+}
+
+static int replace_filter_string(struct event_filter *filter,
+				 char *filter_string)
+{
+	kfree(filter->filter_string);
+	filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
+	if (!filter->filter_string)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int append_filter_string(struct event_filter *filter,
+				char *string)
+{
+	int newlen;
+	char *new_filter_string;
+
+	BUG_ON(!filter->filter_string);
+	newlen = strlen(filter->filter_string) + strlen(string) + 1;
+	new_filter_string = kmalloc(newlen, GFP_KERNEL);
+	if (!new_filter_string)
+		return -ENOMEM;
+
+	strcpy(new_filter_string, filter->filter_string);
+	strcat(new_filter_string, string);
+	kfree(filter->filter_string);
+	filter->filter_string = new_filter_string;
+
+	return 0;
+}
+
+static void append_filter_err(struct filter_parse_state *ps,
+			      struct event_filter *filter)
+{
+	int pos = ps->lasterr_pos;
+	char *buf, *pbuf;
+
+	buf = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!buf)
+		return;
+
+	append_filter_string(filter, "\n");
+	memset(buf, ' ', PAGE_SIZE);
+	if (pos > PAGE_SIZE - 128)
+		pos = 0;
+	buf[pos] = '^';
+	pbuf = &buf[pos] + 1;
+
+	sprintf(pbuf, "\nparse_error: %s\n", err_text[ps->lasterr]);
+	append_filter_string(filter, buf);
+	free_page((unsigned long) buf);
+}
+
+void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
+{
+	struct event_filter *filter = call->filter;
+
+	mutex_lock(&filter_mutex);
+	if (filter->filter_string)
+		trace_seq_printf(s, "%s\n", filter->filter_string);
+	else
+		trace_seq_printf(s, "none\n");
+	mutex_unlock(&filter_mutex);
+}
+
+void print_subsystem_event_filter(struct event_subsystem *system,
+				  struct trace_seq *s)
+{
+	struct event_filter *filter = system->filter;
+
+	mutex_lock(&filter_mutex);
+	if (filter->filter_string)
+		trace_seq_printf(s, "%s\n", filter->filter_string);
+	else
+		trace_seq_printf(s, "none\n");
+	mutex_unlock(&filter_mutex);
 }
 
 static struct ftrace_event_field *
@@ -150,284 +328,828 @@
 	return NULL;
 }
 
-void filter_free_pred(struct filter_pred *pred)
+static void filter_free_pred(struct filter_pred *pred)
 {
 	if (!pred)
 		return;
 
 	kfree(pred->field_name);
-	kfree(pred->str_val);
 	kfree(pred);
 }
 
-void filter_free_preds(struct ftrace_event_call *call)
+static void filter_clear_pred(struct filter_pred *pred)
 {
-	int i;
-
-	if (call->preds) {
-		for (i = 0; i < MAX_FILTER_PRED; i++)
-			filter_free_pred(call->preds[i]);
-		kfree(call->preds);
-		call->preds = NULL;
-	}
+	kfree(pred->field_name);
+	pred->field_name = NULL;
+	pred->str_len = 0;
 }
 
-void filter_free_subsystem_preds(struct event_subsystem *system)
+static int filter_set_pred(struct filter_pred *dest,
+			   struct filter_pred *src,
+			   filter_pred_fn_t fn)
 {
-	struct ftrace_event_call *call = __start_ftrace_events;
-	int i;
-
-	if (system->preds) {
-		for (i = 0; i < MAX_FILTER_PRED; i++)
-			filter_free_pred(system->preds[i]);
-		kfree(system->preds);
-		system->preds = NULL;
-	}
-
-	events_for_each(call) {
-		if (!call->name || !call->regfunc)
-			continue;
-
-		if (!strcmp(call->system, system->name))
-			filter_free_preds(call);
-	}
-}
-
-static int __filter_add_pred(struct ftrace_event_call *call,
-			     struct filter_pred *pred)
-{
-	int i;
-
-	if (call->preds && !pred->compound)
-		filter_free_preds(call);
-
-	if (!call->preds) {
-		call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
-				      GFP_KERNEL);
-		if (!call->preds)
+	*dest = *src;
+	if (src->field_name) {
+		dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
+		if (!dest->field_name)
 			return -ENOMEM;
 	}
-
-	for (i = 0; i < MAX_FILTER_PRED; i++) {
-		if (!call->preds[i]) {
-			call->preds[i] = pred;
-			return 0;
-		}
-	}
-
-	return -ENOSPC;
-}
-
-static int is_string_field(const char *type)
-{
-	if (strchr(type, '[') && strstr(type, "char"))
-		return 1;
+	dest->fn = fn;
 
 	return 0;
 }
 
-int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
+static void filter_disable_preds(struct ftrace_event_call *call)
 {
-	struct ftrace_event_field *field;
-
-	field = find_event_field(call, pred->field_name);
-	if (!field)
-		return -EINVAL;
-
-	pred->offset = field->offset;
-
-	if (is_string_field(field->type)) {
-		if (!pred->str_val)
-			return -EINVAL;
-		pred->fn = filter_pred_string;
-		pred->str_len = field->size;
-		return __filter_add_pred(call, pred);
-	} else {
-		if (pred->str_val)
-			return -EINVAL;
-	}
-
-	switch (field->size) {
-	case 8:
-		pred->fn = filter_pred_64;
-		break;
-	case 4:
-		pred->fn = filter_pred_32;
-		break;
-	case 2:
-		pred->fn = filter_pred_16;
-		break;
-	case 1:
-		pred->fn = filter_pred_8;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return __filter_add_pred(call, pred);
-}
-
-static struct filter_pred *copy_pred(struct filter_pred *pred)
-{
-	struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL);
-	if (!new_pred)
-		return NULL;
-
-	memcpy(new_pred, pred, sizeof(*pred));
-
-	if (pred->field_name) {
-		new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
-		if (!new_pred->field_name) {
-			kfree(new_pred);
-			return NULL;
-		}
-	}
-
-	if (pred->str_val) {
-		new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL);
-		if (!new_pred->str_val) {
-			filter_free_pred(new_pred);
-			return NULL;
-		}
-	}
-
-	return new_pred;
-}
-
-int filter_add_subsystem_pred(struct event_subsystem *system,
-			      struct filter_pred *pred)
-{
-	struct ftrace_event_call *call = __start_ftrace_events;
-	struct filter_pred *event_pred;
+	struct event_filter *filter = call->filter;
 	int i;
 
-	if (system->preds && !pred->compound)
-		filter_free_subsystem_preds(system);
+	call->filter_active = 0;
+	filter->n_preds = 0;
 
-	if (!system->preds) {
-		system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
-					GFP_KERNEL);
-		if (!system->preds)
-			return -ENOMEM;
-	}
+	for (i = 0; i < MAX_FILTER_PRED; i++)
+		filter->preds[i]->fn = filter_pred_none;
+}
+
+void destroy_preds(struct ftrace_event_call *call)
+{
+	struct event_filter *filter = call->filter;
+	int i;
 
 	for (i = 0; i < MAX_FILTER_PRED; i++) {
-		if (!system->preds[i]) {
-			system->preds[i] = pred;
-			break;
-		}
+		if (filter->preds[i])
+			filter_free_pred(filter->preds[i]);
 	}
+	kfree(filter->preds);
+	kfree(filter);
+	call->filter = NULL;
+}
 
-	if (i == MAX_FILTER_PRED)
-		return -ENOSPC;
+int init_preds(struct ftrace_event_call *call)
+{
+	struct event_filter *filter;
+	struct filter_pred *pred;
+	int i;
 
-	events_for_each(call) {
-		int err;
+	filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!call->filter)
+		return -ENOMEM;
 
-		if (!call->name || !call->regfunc)
-			continue;
+	call->filter_active = 0;
+	filter->n_preds = 0;
 
-		if (strcmp(call->system, system->name))
-			continue;
+	filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
+	if (!filter->preds)
+		goto oom;
 
-		if (!find_event_field(call, pred->field_name))
-			continue;
-
-		event_pred = copy_pred(pred);
-		if (!event_pred)
+	for (i = 0; i < MAX_FILTER_PRED; i++) {
+		pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+		if (!pred)
 			goto oom;
-
-		err = filter_add_pred(call, event_pred);
-		if (err)
-			filter_free_pred(event_pred);
-		if (err == -ENOMEM)
-			goto oom;
+		pred->fn = filter_pred_none;
+		filter->preds[i] = pred;
 	}
 
 	return 0;
 
 oom:
-	system->preds[i] = NULL;
+	destroy_preds(call);
+
 	return -ENOMEM;
 }
+EXPORT_SYMBOL_GPL(init_preds);
 
-int filter_parse(char **pbuf, struct filter_pred *pred)
+static void filter_free_subsystem_preds(struct event_subsystem *system)
 {
-	char *tmp, *tok, *val_str = NULL;
-	int tok_n = 0;
+	struct event_filter *filter = system->filter;
+	struct ftrace_event_call *call;
+	int i;
 
-	/* field ==/!= number, or/and field ==/!= number, number */
-	while ((tok = strsep(pbuf, " \n"))) {
-		if (tok_n == 0) {
-			if (!strcmp(tok, "0")) {
-				pred->clear = 1;
-				return 0;
-			} else if (!strcmp(tok, "&&")) {
-				pred->or = 0;
-				pred->compound = 1;
-			} else if (!strcmp(tok, "||")) {
-				pred->or = 1;
-				pred->compound = 1;
-			} else
-				pred->field_name = tok;
-			tok_n = 1;
-			continue;
-		}
-		if (tok_n == 1) {
-			if (!pred->field_name)
-				pred->field_name = tok;
-			else if (!strcmp(tok, "!="))
-				pred->not = 1;
-			else if (!strcmp(tok, "=="))
-				pred->not = 0;
-			else {
-				pred->field_name = NULL;
-				return -EINVAL;
-			}
-			tok_n = 2;
-			continue;
-		}
-		if (tok_n == 2) {
-			if (pred->compound) {
-				if (!strcmp(tok, "!="))
-					pred->not = 1;
-				else if (!strcmp(tok, "=="))
-					pred->not = 0;
-				else {
-					pred->field_name = NULL;
-					return -EINVAL;
-				}
-			} else {
-				val_str = tok;
-				break; /* done */
-			}
-			tok_n = 3;
-			continue;
-		}
-		if (tok_n == 3) {
-			val_str = tok;
-			break; /* done */
-		}
+	if (filter->n_preds) {
+		for (i = 0; i < filter->n_preds; i++)
+			filter_free_pred(filter->preds[i]);
+		kfree(filter->preds);
+		filter->preds = NULL;
+		filter->n_preds = 0;
 	}
 
-	if (!val_str) {
-		pred->field_name = NULL;
-		return -EINVAL;
+	mutex_lock(&event_mutex);
+	list_for_each_entry(call, &ftrace_events, list) {
+		if (!call->define_fields)
+			continue;
+
+		if (!strcmp(call->system, system->name)) {
+			filter_disable_preds(call);
+			remove_filter_string(call->filter);
+		}
+	}
+	mutex_unlock(&event_mutex);
+}
+
+static int filter_add_pred_fn(struct filter_parse_state *ps,
+			      struct ftrace_event_call *call,
+			      struct filter_pred *pred,
+			      filter_pred_fn_t fn)
+{
+	struct event_filter *filter = call->filter;
+	int idx, err;
+
+	if (filter->n_preds == MAX_FILTER_PRED) {
+		parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
+		return -ENOSPC;
 	}
 
-	pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
-	if (!pred->field_name)
-		return -ENOMEM;
+	idx = filter->n_preds;
+	filter_clear_pred(filter->preds[idx]);
+	err = filter_set_pred(filter->preds[idx], pred, fn);
+	if (err)
+		return err;
 
-	pred->val = simple_strtoull(val_str, &tmp, 0);
-	if (tmp == val_str) {
-		pred->str_val = kstrdup(val_str, GFP_KERNEL);
-		if (!pred->str_val)
-			return -ENOMEM;
-	} else if (*tmp != '\0')
-		return -EINVAL;
+	filter->n_preds++;
+	call->filter_active = 1;
 
 	return 0;
 }
 
+enum {
+	FILTER_STATIC_STRING = 1,
+	FILTER_DYN_STRING
+};
+
+static int is_string_field(const char *type)
+{
+	if (strstr(type, "__data_loc") && strstr(type, "char"))
+		return FILTER_DYN_STRING;
+
+	if (strchr(type, '[') && strstr(type, "char"))
+		return FILTER_STATIC_STRING;
+
+	return 0;
+}
+
+static int is_legal_op(struct ftrace_event_field *field, int op)
+{
+	if (is_string_field(field->type) && (op != OP_EQ && op != OP_NE))
+		return 0;
+
+	return 1;
+}
+
+static filter_pred_fn_t select_comparison_fn(int op, int field_size,
+					     int field_is_signed)
+{
+	filter_pred_fn_t fn = NULL;
+
+	switch (field_size) {
+	case 8:
+		if (op == OP_EQ || op == OP_NE)
+			fn = filter_pred_64;
+		else if (field_is_signed)
+			fn = filter_pred_s64;
+		else
+			fn = filter_pred_u64;
+		break;
+	case 4:
+		if (op == OP_EQ || op == OP_NE)
+			fn = filter_pred_32;
+		else if (field_is_signed)
+			fn = filter_pred_s32;
+		else
+			fn = filter_pred_u32;
+		break;
+	case 2:
+		if (op == OP_EQ || op == OP_NE)
+			fn = filter_pred_16;
+		else if (field_is_signed)
+			fn = filter_pred_s16;
+		else
+			fn = filter_pred_u16;
+		break;
+	case 1:
+		if (op == OP_EQ || op == OP_NE)
+			fn = filter_pred_8;
+		else if (field_is_signed)
+			fn = filter_pred_s8;
+		else
+			fn = filter_pred_u8;
+		break;
+	}
+
+	return fn;
+}
+
+static int filter_add_pred(struct filter_parse_state *ps,
+			   struct ftrace_event_call *call,
+			   struct filter_pred *pred)
+{
+	struct ftrace_event_field *field;
+	filter_pred_fn_t fn;
+	unsigned long long val;
+	int string_type;
+
+	pred->fn = filter_pred_none;
+
+	if (pred->op == OP_AND) {
+		pred->pop_n = 2;
+		return filter_add_pred_fn(ps, call, pred, filter_pred_and);
+	} else if (pred->op == OP_OR) {
+		pred->pop_n = 2;
+		return filter_add_pred_fn(ps, call, pred, filter_pred_or);
+	}
+
+	field = find_event_field(call, pred->field_name);
+	if (!field) {
+		parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
+		return -EINVAL;
+	}
+
+	pred->offset = field->offset;
+
+	if (!is_legal_op(field, pred->op)) {
+		parse_error(ps, FILT_ERR_ILLEGAL_FIELD_OP, 0);
+		return -EINVAL;
+	}
+
+	string_type = is_string_field(field->type);
+	if (string_type) {
+		if (string_type == FILTER_STATIC_STRING)
+			fn = filter_pred_string;
+		else
+			fn = filter_pred_strloc;
+		pred->str_len = field->size;
+		if (pred->op == OP_NE)
+			pred->not = 1;
+		return filter_add_pred_fn(ps, call, pred, fn);
+	} else {
+		if (strict_strtoull(pred->str_val, 0, &val)) {
+			parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
+			return -EINVAL;
+		}
+		pred->val = val;
+	}
+
+	fn = select_comparison_fn(pred->op, field->size, field->is_signed);
+	if (!fn) {
+		parse_error(ps, FILT_ERR_INVALID_OP, 0);
+		return -EINVAL;
+	}
+
+	if (pred->op == OP_NE)
+		pred->not = 1;
+
+	return filter_add_pred_fn(ps, call, pred, fn);
+}
+
+static int filter_add_subsystem_pred(struct filter_parse_state *ps,
+				     struct event_subsystem *system,
+				     struct filter_pred *pred,
+				     char *filter_string)
+{
+	struct event_filter *filter = system->filter;
+	struct ftrace_event_call *call;
+	int err = 0;
+
+	if (!filter->preds) {
+		filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
+					GFP_KERNEL);
+
+		if (!filter->preds)
+			return -ENOMEM;
+	}
+
+	if (filter->n_preds == MAX_FILTER_PRED) {
+		parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
+		return -ENOSPC;
+	}
+
+	filter->preds[filter->n_preds] = pred;
+	filter->n_preds++;
+
+	mutex_lock(&event_mutex);
+	list_for_each_entry(call, &ftrace_events, list) {
+
+		if (!call->define_fields)
+			continue;
+
+		if (strcmp(call->system, system->name))
+			continue;
+
+		err = filter_add_pred(ps, call, pred);
+		if (err) {
+			mutex_unlock(&event_mutex);
+			filter_free_subsystem_preds(system);
+			parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+			goto out;
+		}
+		replace_filter_string(call->filter, filter_string);
+	}
+	mutex_unlock(&event_mutex);
+out:
+	return err;
+}
+
+static void parse_init(struct filter_parse_state *ps,
+		       struct filter_op *ops,
+		       char *infix_string)
+{
+	memset(ps, '\0', sizeof(*ps));
+
+	ps->infix.string = infix_string;
+	ps->infix.cnt = strlen(infix_string);
+	ps->ops = ops;
+
+	INIT_LIST_HEAD(&ps->opstack);
+	INIT_LIST_HEAD(&ps->postfix);
+}
+
+static char infix_next(struct filter_parse_state *ps)
+{
+	ps->infix.cnt--;
+
+	return ps->infix.string[ps->infix.tail++];
+}
+
+static char infix_peek(struct filter_parse_state *ps)
+{
+	if (ps->infix.tail == strlen(ps->infix.string))
+		return 0;
+
+	return ps->infix.string[ps->infix.tail];
+}
+
+static void infix_advance(struct filter_parse_state *ps)
+{
+	ps->infix.cnt--;
+	ps->infix.tail++;
+}
+
+static inline int is_precedence_lower(struct filter_parse_state *ps,
+				      int a, int b)
+{
+	return ps->ops[a].precedence < ps->ops[b].precedence;
+}
+
+static inline int is_op_char(struct filter_parse_state *ps, char c)
+{
+	int i;
+
+	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
+		if (ps->ops[i].string[0] == c)
+			return 1;
+	}
+
+	return 0;
+}
+
+static int infix_get_op(struct filter_parse_state *ps, char firstc)
+{
+	char nextc = infix_peek(ps);
+	char opstr[3];
+	int i;
+
+	opstr[0] = firstc;
+	opstr[1] = nextc;
+	opstr[2] = '\0';
+
+	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
+		if (!strcmp(opstr, ps->ops[i].string)) {
+			infix_advance(ps);
+			return ps->ops[i].id;
+		}
+	}
+
+	opstr[1] = '\0';
+
+	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
+		if (!strcmp(opstr, ps->ops[i].string))
+			return ps->ops[i].id;
+	}
+
+	return OP_NONE;
+}
+
+static inline void clear_operand_string(struct filter_parse_state *ps)
+{
+	memset(ps->operand.string, '\0', MAX_FILTER_STR_VAL);
+	ps->operand.tail = 0;
+}
+
+static inline int append_operand_char(struct filter_parse_state *ps, char c)
+{
+	if (ps->operand.tail == MAX_FILTER_STR_VAL - 1)
+		return -EINVAL;
+
+	ps->operand.string[ps->operand.tail++] = c;
+
+	return 0;
+}
+
+static int filter_opstack_push(struct filter_parse_state *ps, int op)
+{
+	struct opstack_op *opstack_op;
+
+	opstack_op = kmalloc(sizeof(*opstack_op), GFP_KERNEL);
+	if (!opstack_op)
+		return -ENOMEM;
+
+	opstack_op->op = op;
+	list_add(&opstack_op->list, &ps->opstack);
+
+	return 0;
+}
+
+static int filter_opstack_empty(struct filter_parse_state *ps)
+{
+	return list_empty(&ps->opstack);
+}
+
+static int filter_opstack_top(struct filter_parse_state *ps)
+{
+	struct opstack_op *opstack_op;
+
+	if (filter_opstack_empty(ps))
+		return OP_NONE;
+
+	opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
+
+	return opstack_op->op;
+}
+
+static int filter_opstack_pop(struct filter_parse_state *ps)
+{
+	struct opstack_op *opstack_op;
+	int op;
+
+	if (filter_opstack_empty(ps))
+		return OP_NONE;
+
+	opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
+	op = opstack_op->op;
+	list_del(&opstack_op->list);
+
+	kfree(opstack_op);
+
+	return op;
+}
+
+static void filter_opstack_clear(struct filter_parse_state *ps)
+{
+	while (!filter_opstack_empty(ps))
+		filter_opstack_pop(ps);
+}
+
+static char *curr_operand(struct filter_parse_state *ps)
+{
+	return ps->operand.string;
+}
+
+static int postfix_append_operand(struct filter_parse_state *ps, char *operand)
+{
+	struct postfix_elt *elt;
+
+	elt = kmalloc(sizeof(*elt), GFP_KERNEL);
+	if (!elt)
+		return -ENOMEM;
+
+	elt->op = OP_NONE;
+	elt->operand = kstrdup(operand, GFP_KERNEL);
+	if (!elt->operand) {
+		kfree(elt);
+		return -ENOMEM;
+	}
+
+	list_add_tail(&elt->list, &ps->postfix);
+
+	return 0;
+}
+
+static int postfix_append_op(struct filter_parse_state *ps, int op)
+{
+	struct postfix_elt *elt;
+
+	elt = kmalloc(sizeof(*elt), GFP_KERNEL);
+	if (!elt)
+		return -ENOMEM;
+
+	elt->op = op;
+	elt->operand = NULL;
+
+	list_add_tail(&elt->list, &ps->postfix);
+
+	return 0;
+}
+
+static void postfix_clear(struct filter_parse_state *ps)
+{
+	struct postfix_elt *elt;
+
+	while (!list_empty(&ps->postfix)) {
+		elt = list_first_entry(&ps->postfix, struct postfix_elt, list);
+		kfree(elt->operand);
+		list_del(&elt->list);
+	}
+}
+
+static int filter_parse(struct filter_parse_state *ps)
+{
+	int in_string = 0;
+	int op, top_op;
+	char ch;
+
+	while ((ch = infix_next(ps))) {
+		if (ch == '"') {
+			in_string ^= 1;
+			continue;
+		}
+
+		if (in_string)
+			goto parse_operand;
+
+		if (isspace(ch))
+			continue;
+
+		if (is_op_char(ps, ch)) {
+			op = infix_get_op(ps, ch);
+			if (op == OP_NONE) {
+				parse_error(ps, FILT_ERR_INVALID_OP, 0);
+				return -EINVAL;
+			}
+
+			if (strlen(curr_operand(ps))) {
+				postfix_append_operand(ps, curr_operand(ps));
+				clear_operand_string(ps);
+			}
+
+			while (!filter_opstack_empty(ps)) {
+				top_op = filter_opstack_top(ps);
+				if (!is_precedence_lower(ps, top_op, op)) {
+					top_op = filter_opstack_pop(ps);
+					postfix_append_op(ps, top_op);
+					continue;
+				}
+				break;
+			}
+
+			filter_opstack_push(ps, op);
+			continue;
+		}
+
+		if (ch == '(') {
+			filter_opstack_push(ps, OP_OPEN_PAREN);
+			continue;
+		}
+
+		if (ch == ')') {
+			if (strlen(curr_operand(ps))) {
+				postfix_append_operand(ps, curr_operand(ps));
+				clear_operand_string(ps);
+			}
+
+			top_op = filter_opstack_pop(ps);
+			while (top_op != OP_NONE) {
+				if (top_op == OP_OPEN_PAREN)
+					break;
+				postfix_append_op(ps, top_op);
+				top_op = filter_opstack_pop(ps);
+			}
+			if (top_op == OP_NONE) {
+				parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
+				return -EINVAL;
+			}
+			continue;
+		}
+parse_operand:
+		if (append_operand_char(ps, ch)) {
+			parse_error(ps, FILT_ERR_OPERAND_TOO_LONG, 0);
+			return -EINVAL;
+		}
+	}
+
+	if (strlen(curr_operand(ps)))
+		postfix_append_operand(ps, curr_operand(ps));
+
+	while (!filter_opstack_empty(ps)) {
+		top_op = filter_opstack_pop(ps);
+		if (top_op == OP_NONE)
+			break;
+		if (top_op == OP_OPEN_PAREN) {
+			parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
+			return -EINVAL;
+		}
+		postfix_append_op(ps, top_op);
+	}
+
+	return 0;
+}
+
+static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
+{
+	struct filter_pred *pred;
+
+	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+	if (!pred)
+		return NULL;
+
+	pred->field_name = kstrdup(operand1, GFP_KERNEL);
+	if (!pred->field_name) {
+		kfree(pred);
+		return NULL;
+	}
+
+	strcpy(pred->str_val, operand2);
+	pred->str_len = strlen(operand2);
+
+	pred->op = op;
+
+	return pred;
+}
+
+static struct filter_pred *create_logical_pred(int op)
+{
+	struct filter_pred *pred;
+
+	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+	if (!pred)
+		return NULL;
+
+	pred->op = op;
+
+	return pred;
+}
+
+static int check_preds(struct filter_parse_state *ps)
+{
+	int n_normal_preds = 0, n_logical_preds = 0;
+	struct postfix_elt *elt;
+
+	list_for_each_entry(elt, &ps->postfix, list) {
+		if (elt->op == OP_NONE)
+			continue;
+
+		if (elt->op == OP_AND || elt->op == OP_OR) {
+			n_logical_preds++;
+			continue;
+		}
+		n_normal_preds++;
+	}
+
+	if (!n_normal_preds || n_logical_preds >= n_normal_preds) {
+		parse_error(ps, FILT_ERR_INVALID_FILTER, 0);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int replace_preds(struct event_subsystem *system,
+			 struct ftrace_event_call *call,
+			 struct filter_parse_state *ps,
+			 char *filter_string)
+{
+	char *operand1 = NULL, *operand2 = NULL;
+	struct filter_pred *pred;
+	struct postfix_elt *elt;
+	int err;
+
+	err = check_preds(ps);
+	if (err)
+		return err;
+
+	list_for_each_entry(elt, &ps->postfix, list) {
+		if (elt->op == OP_NONE) {
+			if (!operand1)
+				operand1 = elt->operand;
+			else if (!operand2)
+				operand2 = elt->operand;
+			else {
+				parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
+				return -EINVAL;
+			}
+			continue;
+		}
+
+		if (elt->op == OP_AND || elt->op == OP_OR) {
+			pred = create_logical_pred(elt->op);
+			if (call) {
+				err = filter_add_pred(ps, call, pred);
+				filter_free_pred(pred);
+			} else
+				err = filter_add_subsystem_pred(ps, system,
+							pred, filter_string);
+			if (err)
+				return err;
+
+			operand1 = operand2 = NULL;
+			continue;
+		}
+
+		if (!operand1 || !operand2) {
+			parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
+			return -EINVAL;
+		}
+
+		pred = create_pred(elt->op, operand1, operand2);
+		if (call) {
+			err = filter_add_pred(ps, call, pred);
+			filter_free_pred(pred);
+		} else
+			err = filter_add_subsystem_pred(ps, system, pred,
+							filter_string);
+		if (err)
+			return err;
+
+		operand1 = operand2 = NULL;
+	}
+
+	return 0;
+}
+
+int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
+{
+	int err;
+
+	struct filter_parse_state *ps;
+
+	mutex_lock(&filter_mutex);
+
+	if (!strcmp(strstrip(filter_string), "0")) {
+		filter_disable_preds(call);
+		remove_filter_string(call->filter);
+		mutex_unlock(&filter_mutex);
+		return 0;
+	}
+
+	err = -ENOMEM;
+	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+	if (!ps)
+		goto out_unlock;
+
+	filter_disable_preds(call);
+	replace_filter_string(call->filter, filter_string);
+
+	parse_init(ps, filter_ops, filter_string);
+	err = filter_parse(ps);
+	if (err) {
+		append_filter_err(ps, call->filter);
+		goto out;
+	}
+
+	err = replace_preds(NULL, call, ps, filter_string);
+	if (err)
+		append_filter_err(ps, call->filter);
+
+out:
+	filter_opstack_clear(ps);
+	postfix_clear(ps);
+	kfree(ps);
+out_unlock:
+	mutex_unlock(&filter_mutex);
+
+	return err;
+}
+
+int apply_subsystem_event_filter(struct event_subsystem *system,
+				 char *filter_string)
+{
+	int err;
+
+	struct filter_parse_state *ps;
+
+	mutex_lock(&filter_mutex);
+
+	if (!strcmp(strstrip(filter_string), "0")) {
+		filter_free_subsystem_preds(system);
+		remove_filter_string(system->filter);
+		mutex_unlock(&filter_mutex);
+		return 0;
+	}
+
+	err = -ENOMEM;
+	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+	if (!ps)
+		goto out_unlock;
+
+	filter_free_subsystem_preds(system);
+	replace_filter_string(system->filter, filter_string);
+
+	parse_init(ps, filter_ops, filter_string);
+	err = filter_parse(ps);
+	if (err) {
+		append_filter_err(ps, system->filter);
+		goto out;
+	}
+
+	err = replace_preds(system, NULL, ps, filter_string);
+	if (err)
+		append_filter_err(ps, system->filter);
+
+out:
+	filter_opstack_clear(ps);
+	postfix_clear(ps);
+	kfree(ps);
+out_unlock:
+	mutex_unlock(&filter_mutex);
+
+	return err;
+}
 
diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h
deleted file mode 100644
index 38985f9..0000000
--- a/kernel/trace/trace_events_stage_1.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Stage 1 of the trace events.
- *
- * Override the macros in <trace/trace_event_types.h> to include the following:
- *
- * struct ftrace_raw_<call> {
- *	struct trace_entry		ent;
- *	<type>				<item>;
- *	<type2>				<item2>[<len>];
- *	[...]
- * };
- *
- * The <type> <item> is created by the __field(type, item) macro or
- * the __array(type2, item2, len) macro.
- * We simply do "type item;", and that will create the fields
- * in the structure.
- */
-
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(call, proto, args, fmt)
-
-#undef __array
-#define __array(type, item, len)	type	item[len];
-
-#undef __field
-#define __field(type, item)		type	item;
-
-#undef TP_STRUCT__entry
-#define TP_STRUCT__entry(args...) args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
-	struct ftrace_raw_##name {				\
-		struct trace_entry	ent;			\
-		tstruct						\
-	};							\
-	static struct ftrace_event_call event_##name
-
-#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
deleted file mode 100644
index d363c66..0000000
--- a/kernel/trace/trace_events_stage_2.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Stage 2 of the trace events.
- *
- * Override the macros in <trace/trace_event_types.h> to include the following:
- *
- * enum print_line_t
- * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
- * {
- *	struct trace_seq *s = &iter->seq;
- *	struct ftrace_raw_<call> *field; <-- defined in stage 1
- *	struct trace_entry *entry;
- *	int ret;
- *
- *	entry = iter->ent;
- *
- *	if (entry->type != event_<call>.id) {
- *		WARN_ON_ONCE(1);
- *		return TRACE_TYPE_UNHANDLED;
- *	}
- *
- *	field = (typeof(field))entry;
- *
- *	ret = trace_seq_printf(s, <TP_printk> "\n");
- *	if (!ret)
- *		return TRACE_TYPE_PARTIAL_LINE;
- *
- *	return TRACE_TYPE_HANDLED;
- * }
- *
- * This is the method used to print the raw event to the trace
- * output format. Note, this is not needed if the data is read
- * in binary.
- */
-
-#undef __entry
-#define __entry field
-
-#undef TP_printk
-#define TP_printk(fmt, args...) fmt "\n", args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-enum print_line_t							\
-ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
-{									\
-	struct trace_seq *s = &iter->seq;				\
-	struct ftrace_raw_##call *field;				\
-	struct trace_entry *entry;					\
-	int ret;							\
-									\
-	entry = iter->ent;						\
-									\
-	if (entry->type != event_##call.id) {				\
-		WARN_ON_ONCE(1);					\
-		return TRACE_TYPE_UNHANDLED;				\
-	}								\
-									\
-	field = (typeof(field))entry;					\
-									\
-	ret = trace_seq_printf(s, #call ": " print);			\
-	if (!ret)							\
-		return TRACE_TYPE_PARTIAL_LINE;				\
-									\
-	return TRACE_TYPE_HANDLED;					\
-}
-	
-#include <trace/trace_event_types.h>
-
-/*
- * Setup the showing format of trace point.
- *
- * int
- * ftrace_format_##call(struct trace_seq *s)
- * {
- *	struct ftrace_raw_##call field;
- *	int ret;
- *
- *	ret = trace_seq_printf(s, #type " " #item ";"
- *			       " offset:%u; size:%u;\n",
- *			       offsetof(struct ftrace_raw_##call, item),
- *			       sizeof(field.type));
- *
- * }
- */
-
-#undef TP_STRUCT__entry
-#define TP_STRUCT__entry(args...) args
-
-#undef __field
-#define __field(type, item)					\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
-			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
-	if (!ret)							\
-		return 0;
-
-#undef __array
-#define __array(type, item, len)						\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
-			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
-	if (!ret)							\
-		return 0;
-
-#undef __entry
-#define __entry REC
-
-#undef TP_printk
-#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
-
-#undef TP_fast_assign
-#define TP_fast_assign(args...) args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
-static int								\
-ftrace_format_##call(struct trace_seq *s)				\
-{									\
-	struct ftrace_raw_##call field;					\
-	int ret;							\
-									\
-	tstruct;							\
-									\
-	trace_seq_printf(s, "\nprint fmt: " print);			\
-									\
-	return ret;							\
-}
-
-#include <trace/trace_event_types.h>
-
-#undef __field
-#define __field(type, item)						\
-	ret = trace_define_field(event_call, #type, #item,		\
-				 offsetof(typeof(field), item),		\
-				 sizeof(field.item));			\
-	if (ret)							\
-		return ret;
-
-#undef __array
-#define __array(type, item, len)					\
-	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
-				 offsetof(typeof(field), item),		\
-				 sizeof(field.item));			\
-	if (ret)							\
-		return ret;
-
-#define __common_field(type, item)					\
-	ret = trace_define_field(event_call, #type, "common_" #item,	\
-				 offsetof(typeof(field.ent), item),	\
-				 sizeof(field.ent.item));		\
-	if (ret)							\
-		return ret;
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
-int									\
-ftrace_define_fields_##call(void)					\
-{									\
-	struct ftrace_raw_##call field;					\
-	struct ftrace_event_call *event_call = &event_##call;		\
-	int ret;							\
-									\
-	__common_field(unsigned char, type);				\
-	__common_field(unsigned char, flags);				\
-	__common_field(unsigned char, preempt_count);			\
-	__common_field(int, pid);					\
-	__common_field(int, tgid);					\
-									\
-	tstruct;							\
-									\
-	return ret;							\
-}
-
-#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
deleted file mode 100644
index 9d2fa78..0000000
--- a/kernel/trace/trace_events_stage_3.h
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Stage 3 of the trace events.
- *
- * Override the macros in <trace/trace_event_types.h> to include the following:
- *
- * static void ftrace_event_<call>(proto)
- * {
- *	event_trace_printk(_RET_IP_, "<call>: " <fmt>);
- * }
- *
- * static int ftrace_reg_event_<call>(void)
- * {
- *	int ret;
- *
- *	ret = register_trace_<call>(ftrace_event_<call>);
- *	if (!ret)
- *		pr_info("event trace: Could not activate trace point "
- *			"probe to  <call>");
- *	return ret;
- * }
- *
- * static void ftrace_unreg_event_<call>(void)
- * {
- *	unregister_trace_<call>(ftrace_event_<call>);
- * }
- *
- * For those macros defined with TRACE_FORMAT:
- *
- * static struct ftrace_event_call __used
- * __attribute__((__aligned__(4)))
- * __attribute__((section("_ftrace_events"))) event_<call> = {
- *	.name			= "<call>",
- *	.regfunc		= ftrace_reg_event_<call>,
- *	.unregfunc		= ftrace_unreg_event_<call>,
- * }
- *
- *
- * For those macros defined with TRACE_EVENT:
- *
- * static struct ftrace_event_call event_<call>;
- *
- * static void ftrace_raw_event_<call>(proto)
- * {
- *	struct ring_buffer_event *event;
- *	struct ftrace_raw_<call> *entry; <-- defined in stage 1
- *	unsigned long irq_flags;
- *	int pc;
- *
- *	local_save_flags(irq_flags);
- *	pc = preempt_count();
- *
- *	event = trace_current_buffer_lock_reserve(event_<call>.id,
- *				  sizeof(struct ftrace_raw_<call>),
- *				  irq_flags, pc);
- *	if (!event)
- *		return;
- *	entry	= ring_buffer_event_data(event);
- *
- *	<assign>;  <-- Here we assign the entries by the __field and
- *			__array macros.
- *
- *	trace_current_buffer_unlock_commit(event, irq_flags, pc);
- * }
- *
- * static int ftrace_raw_reg_event_<call>(void)
- * {
- *	int ret;
- *
- *	ret = register_trace_<call>(ftrace_raw_event_<call>);
- *	if (!ret)
- *		pr_info("event trace: Could not activate trace point "
- *			"probe to <call>");
- *	return ret;
- * }
- *
- * static void ftrace_unreg_event_<call>(void)
- * {
- *	unregister_trace_<call>(ftrace_raw_event_<call>);
- * }
- *
- * static struct trace_event ftrace_event_type_<call> = {
- *	.trace			= ftrace_raw_output_<call>, <-- stage 2
- * };
- *
- * static int ftrace_raw_init_event_<call>(void)
- * {
- *	int id;
- *
- *	id = register_ftrace_event(&ftrace_event_type_<call>);
- *	if (!id)
- *		return -ENODEV;
- *	event_<call>.id = id;
- *	return 0;
- * }
- *
- * static struct ftrace_event_call __used
- * __attribute__((__aligned__(4)))
- * __attribute__((section("_ftrace_events"))) event_<call> = {
- *	.name			= "<call>",
- *	.system			= "<system>",
- *	.raw_init		= ftrace_raw_init_event_<call>,
- *	.regfunc		= ftrace_reg_event_<call>,
- *	.unregfunc		= ftrace_unreg_event_<call>,
- *	.show_format		= ftrace_format_<call>,
- * }
- *
- */
-
-#undef TP_FMT
-#define TP_FMT(fmt, args...)	fmt "\n", ##args
-
-#ifdef CONFIG_EVENT_PROFILE
-#define _TRACE_PROFILE(call, proto, args)				\
-static void ftrace_profile_##call(proto)				\
-{									\
-	extern void perf_tpcounter_event(int);				\
-	perf_tpcounter_event(event_##call.id);				\
-}									\
-									\
-static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
-{									\
-	int ret = 0;							\
-									\
-	if (!atomic_inc_return(&call->profile_count))			\
-		ret = register_trace_##call(ftrace_profile_##call);	\
-									\
-	return ret;							\
-}									\
-									\
-static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
-{									\
-	if (atomic_add_negative(-1, &call->profile_count))		\
-		unregister_trace_##call(ftrace_profile_##call);		\
-}
-
-#define _TRACE_PROFILE_INIT(call)					\
-	.profile_count = ATOMIC_INIT(-1),				\
-	.profile_enable = ftrace_profile_enable_##call,			\
-	.profile_disable = ftrace_profile_disable_##call,
-
-#else
-#define _TRACE_PROFILE(call, proto, args)
-#define _TRACE_PROFILE_INIT(call)
-#endif
-
-#define _TRACE_FORMAT(call, proto, args, fmt)				\
-static void ftrace_event_##call(proto)					\
-{									\
-	event_trace_printk(_RET_IP_, #call ": " fmt);			\
-}									\
-									\
-static int ftrace_reg_event_##call(void)				\
-{									\
-	int ret;							\
-									\
-	ret = register_trace_##call(ftrace_event_##call);		\
-	if (ret)							\
-		pr_info("event trace: Could not activate trace point "	\
-			"probe to " #call "\n");			\
-	return ret;							\
-}									\
-									\
-static void ftrace_unreg_event_##call(void)				\
-{									\
-	unregister_trace_##call(ftrace_event_##call);			\
-}									\
-									\
-static struct ftrace_event_call event_##call;				\
-									\
-static int ftrace_init_event_##call(void)				\
-{									\
-	int id;								\
-									\
-	id = register_ftrace_event(NULL);				\
-	if (!id)							\
-		return -ENODEV;						\
-	event_##call.id = id;						\
-	return 0;							\
-}
-
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(call, proto, args, fmt)				\
-_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt))		\
-_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
-static struct ftrace_event_call __used					\
-__attribute__((__aligned__(4)))						\
-__attribute__((section("_ftrace_events"))) event_##call = {		\
-	.name			= #call,				\
-	.system			= __stringify(TRACE_SYSTEM),		\
-	.raw_init		= ftrace_init_event_##call,		\
-	.regfunc		= ftrace_reg_event_##call,		\
-	.unregfunc		= ftrace_unreg_event_##call,		\
-	_TRACE_PROFILE_INIT(call)					\
-}
-
-#undef __entry
-#define __entry entry
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
-									\
-static struct ftrace_event_call event_##call;				\
-									\
-static void ftrace_raw_event_##call(proto)				\
-{									\
-	struct ftrace_event_call *call = &event_##call;			\
-	struct ring_buffer_event *event;				\
-	struct ftrace_raw_##call *entry;				\
-	unsigned long irq_flags;					\
-	int pc;								\
-									\
-	local_save_flags(irq_flags);					\
-	pc = preempt_count();						\
-									\
-	event = trace_current_buffer_lock_reserve(event_##call.id,	\
-				  sizeof(struct ftrace_raw_##call),	\
-				  irq_flags, pc);			\
-	if (!event)							\
-		return;							\
-	entry	= ring_buffer_event_data(event);			\
-									\
-	assign;								\
-									\
-	if (call->preds && !filter_match_preds(call, entry))		\
-		ring_buffer_event_discard(event);			\
-									\
-	trace_nowake_buffer_unlock_commit(event, irq_flags, pc);	\
-									\
-}									\
-									\
-static int ftrace_raw_reg_event_##call(void)				\
-{									\
-	int ret;							\
-									\
-	ret = register_trace_##call(ftrace_raw_event_##call);		\
-	if (ret)							\
-		pr_info("event trace: Could not activate trace point "	\
-			"probe to " #call "\n");			\
-	return ret;							\
-}									\
-									\
-static void ftrace_raw_unreg_event_##call(void)				\
-{									\
-	unregister_trace_##call(ftrace_raw_event_##call);		\
-}									\
-									\
-static struct trace_event ftrace_event_type_##call = {			\
-	.trace			= ftrace_raw_output_##call,		\
-};									\
-									\
-static int ftrace_raw_init_event_##call(void)				\
-{									\
-	int id;								\
-									\
-	id = register_ftrace_event(&ftrace_event_type_##call);		\
-	if (!id)							\
-		return -ENODEV;						\
-	event_##call.id = id;						\
-	INIT_LIST_HEAD(&event_##call.fields);				\
-	return 0;							\
-}									\
-									\
-static struct ftrace_event_call __used					\
-__attribute__((__aligned__(4)))						\
-__attribute__((section("_ftrace_events"))) event_##call = {		\
-	.name			= #call,				\
-	.system			= __stringify(TRACE_SYSTEM),		\
-	.raw_init		= ftrace_raw_init_event_##call,		\
-	.regfunc		= ftrace_raw_reg_event_##call,		\
-	.unregfunc		= ftrace_raw_unreg_event_##call,	\
-	.show_format		= ftrace_format_##call,			\
-	.define_fields		= ftrace_define_fields_##call,		\
-	_TRACE_PROFILE_INIT(call)					\
-}
-
-#include <trace/trace_event_types.h>
-
-#undef _TRACE_PROFILE
-#undef _TRACE_PROFILE_INIT
-
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 07a22c3..d06cf89 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -19,8 +19,12 @@
 #undef TRACE_STRUCT
 #define TRACE_STRUCT(args...) args
 
+extern void __bad_type_size(void);
+
 #undef TRACE_FIELD
 #define TRACE_FIELD(type, item, assign)					\
+	if (sizeof(type) != sizeof(field.item))				\
+		__bad_type_size();					\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
 			       "offset:%u;\tsize:%u;\n",		\
 			       (unsigned int)offsetof(typeof(field), item), \
@@ -30,7 +34,7 @@
 
 
 #undef TRACE_FIELD_SPECIAL
-#define TRACE_FIELD_SPECIAL(type_item, item, cmd)			\
+#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd)			\
 	ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t"	\
 			       "offset:%u;\tsize:%u;\n",		\
 			       (unsigned int)offsetof(typeof(field), item), \
@@ -46,6 +50,9 @@
 	if (!ret)							\
 		return 0;
 
+#undef TRACE_FIELD_SIGN
+#define TRACE_FIELD_SIGN(type, item, assign, is_signed)	\
+	TRACE_FIELD(type, item, assign)
 
 #undef TP_RAW_FMT
 #define TP_RAW_FMT(args...) args
@@ -65,6 +72,22 @@
 	return ret;							\
 }
 
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,	\
+				    tpfmt)				\
+static int								\
+ftrace_format_##call(struct trace_seq *s)				\
+{									\
+	struct args field;						\
+	int ret;							\
+									\
+	tstruct;							\
+									\
+	trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt);		\
+									\
+	return ret;							\
+}
+
 #include "trace_event_types.h"
 
 #undef TRACE_ZERO_CHAR
@@ -78,6 +101,10 @@
 #define TRACE_FIELD(type, item, assign)\
 	entry->item = assign;
 
+#undef TRACE_FIELD_SIGN
+#define TRACE_FIELD_SIGN(type, item, assign, is_signed)	\
+	TRACE_FIELD(type, item, assign)
+
 #undef TP_CMD
 #define TP_CMD(cmd...)	cmd
 
@@ -85,18 +112,95 @@
 #define TRACE_ENTRY	entry
 
 #undef TRACE_FIELD_SPECIAL
-#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \
+#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd)	\
 	cmd;
 
 #undef TRACE_EVENT_FORMAT
 #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
+int ftrace_define_fields_##call(void);					\
+static int ftrace_raw_init_event_##call(void);				\
 									\
-static struct ftrace_event_call __used					\
+struct ftrace_event_call __used						\
+__attribute__((__aligned__(4)))						\
+__attribute__((section("_ftrace_events"))) event_##call = {		\
+	.name			= #call,				\
+	.id			= proto,				\
+	.system			= __stringify(TRACE_SYSTEM),		\
+	.raw_init		= ftrace_raw_init_event_##call,		\
+	.show_format		= ftrace_format_##call,			\
+	.define_fields		= ftrace_define_fields_##call,		\
+};									\
+static int ftrace_raw_init_event_##call(void)				\
+{									\
+	INIT_LIST_HEAD(&event_##call.fields);				\
+	init_preds(&event_##call);					\
+	return 0;							\
+}									\
+
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,	\
+				    tpfmt)				\
+									\
+struct ftrace_event_call __used						\
 __attribute__((__aligned__(4)))						\
 __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.name			= #call,				\
 	.id			= proto,				\
 	.system			= __stringify(TRACE_SYSTEM),		\
 	.show_format		= ftrace_format_##call,			\
+};
+
+#include "trace_event_types.h"
+
+#undef TRACE_FIELD
+#define TRACE_FIELD(type, item, assign)					\
+	ret = trace_define_field(event_call, #type, #item,		\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item), is_signed_type(type));	\
+	if (ret)							\
+		return ret;
+
+#undef TRACE_FIELD_SPECIAL
+#define TRACE_FIELD_SPECIAL(type, item, len, cmd)			\
+	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item), 0);		\
+	if (ret)							\
+		return ret;
+
+#undef TRACE_FIELD_SIGN
+#define TRACE_FIELD_SIGN(type, item, assign, is_signed)			\
+	ret = trace_define_field(event_call, #type, #item,		\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item), is_signed);	\
+	if (ret)							\
+		return ret;
+
+#undef TRACE_FIELD_ZERO_CHAR
+#define TRACE_FIELD_ZERO_CHAR(item)
+
+#undef TRACE_EVENT_FORMAT
+#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
+int									\
+ftrace_define_fields_##call(void)					\
+{									\
+	struct ftrace_event_call *event_call = &event_##call;		\
+	struct args field;						\
+	int ret;							\
+									\
+	__common_field(unsigned char, type, 0);				\
+	__common_field(unsigned char, flags, 0);			\
+	__common_field(unsigned char, preempt_count, 0);		\
+	__common_field(int, pid, 1);					\
+	__common_field(int, tgid, 1);					\
+									\
+	tstruct;							\
+									\
+	return ret;							\
 }
+
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,	\
+				    tpfmt)
+
 #include "trace_event_types.h"
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index d28687e..8b59241 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -65,6 +65,12 @@
 	if (!current->ret_stack)
 		return -EBUSY;
 
+	/*
+	 * We must make sure the ret_stack is tested before we read
+	 * anything else.
+	 */
+	smp_rmb();
+
 	/* The return trace stack is full */
 	if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
 		atomic_inc(&current->trace_overrun);
@@ -78,13 +84,14 @@
 	current->ret_stack[index].ret = ret;
 	current->ret_stack[index].func = func;
 	current->ret_stack[index].calltime = calltime;
+	current->ret_stack[index].subtime = 0;
 	*depth = index;
 
 	return 0;
 }
 
 /* Retrieve a function return address to the trace stack on thread info.*/
-void
+static void
 ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
 {
 	int index;
@@ -104,9 +111,6 @@
 	trace->calltime = current->ret_stack[index].calltime;
 	trace->overrun = atomic_read(&current->trace_overrun);
 	trace->depth = index;
-	barrier();
-	current->curr_ret_stack--;
-
 }
 
 /*
@@ -121,6 +125,8 @@
 	ftrace_pop_return_trace(&trace, &ret);
 	trace.rettime = trace_clock_local();
 	ftrace_graph_return(&trace);
+	barrier();
+	current->curr_ret_stack--;
 
 	if (unlikely(!ret)) {
 		ftrace_graph_stop();
@@ -426,8 +432,8 @@
 	return TRACE_TYPE_HANDLED;
 }
 
-static enum print_line_t
-print_graph_duration(unsigned long long duration, struct trace_seq *s)
+enum print_line_t
+trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
 {
 	unsigned long nsecs_rem = do_div(duration, 1000);
 	/* log10(ULONG_MAX) + '\0' */
@@ -464,12 +470,23 @@
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+print_graph_duration(unsigned long long duration, struct trace_seq *s)
+{
+	int ret;
+
+	ret = trace_print_graph_duration(duration, s);
+	if (ret != TRACE_TYPE_HANDLED)
+		return ret;
 
 	ret = trace_seq_printf(s, "|  ");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
-	return TRACE_TYPE_HANDLED;
 
+	return TRACE_TYPE_HANDLED;
 }
 
 /* Case of a leaf function on its call entry */
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 7bfdf4c..ca7d7c4 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -1,10 +1,9 @@
 /*
- * h/w branch tracer for x86 based on bts
+ * h/w branch tracer for x86 based on BTS
  *
  * Copyright (C) 2008-2009 Intel Corporation.
  * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
  */
-#include <linux/spinlock.h>
 #include <linux/kallsyms.h>
 #include <linux/debugfs.h>
 #include <linux/ftrace.h>
@@ -15,110 +14,119 @@
 
 #include <asm/ds.h>
 
-#include "trace.h"
 #include "trace_output.h"
+#include "trace.h"
 
 
-#define SIZEOF_BTS (1 << 13)
+#define BTS_BUFFER_SIZE (1 << 13)
 
-/*
- * The tracer lock protects the below per-cpu tracer array.
- * It needs to be held to:
- * - start tracing on all cpus
- * - stop tracing on all cpus
- * - start tracing on a single hotplug cpu
- * - stop tracing on a single hotplug cpu
- * - read the trace from all cpus
- * - read the trace from a single cpu
- */
-static DEFINE_SPINLOCK(bts_tracer_lock);
 static DEFINE_PER_CPU(struct bts_tracer *, tracer);
-static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
+static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer);
 
 #define this_tracer per_cpu(tracer, smp_processor_id())
-#define this_buffer per_cpu(buffer, smp_processor_id())
 
-static int __read_mostly trace_hw_branches_enabled;
+static int trace_hw_branches_enabled __read_mostly;
+static int trace_hw_branches_suspended __read_mostly;
 static struct trace_array *hw_branch_trace __read_mostly;
 
 
-/*
- * Start tracing on the current cpu.
- * The argument is ignored.
- *
- * pre: bts_tracer_lock must be locked.
- */
-static void bts_trace_start_cpu(void *arg)
+static void bts_trace_init_cpu(int cpu)
 {
-	if (this_tracer)
-		ds_release_bts(this_tracer);
+	per_cpu(tracer, cpu) =
+		ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE,
+				   NULL, (size_t)-1, BTS_KERNEL);
 
-	this_tracer =
-		ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS,
-			       /* ovfl = */ NULL, /* th = */ (size_t)-1,
-			       BTS_KERNEL);
-	if (IS_ERR(this_tracer)) {
-		this_tracer = NULL;
-		return;
+	if (IS_ERR(per_cpu(tracer, cpu)))
+		per_cpu(tracer, cpu) = NULL;
+}
+
+static int bts_trace_init(struct trace_array *tr)
+{
+	int cpu;
+
+	hw_branch_trace = tr;
+	trace_hw_branches_enabled = 0;
+
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		bts_trace_init_cpu(cpu);
+
+		if (likely(per_cpu(tracer, cpu)))
+			trace_hw_branches_enabled = 1;
 	}
+	trace_hw_branches_suspended = 0;
+	put_online_cpus();
+
+	/* If we could not enable tracing on a single cpu, we fail. */
+	return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP;
+}
+
+static void bts_trace_reset(struct trace_array *tr)
+{
+	int cpu;
+
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		if (likely(per_cpu(tracer, cpu))) {
+			ds_release_bts(per_cpu(tracer, cpu));
+			per_cpu(tracer, cpu) = NULL;
+		}
+	}
+	trace_hw_branches_enabled = 0;
+	trace_hw_branches_suspended = 0;
+	put_online_cpus();
 }
 
 static void bts_trace_start(struct trace_array *tr)
 {
-	spin_lock(&bts_tracer_lock);
+	int cpu;
 
-	on_each_cpu(bts_trace_start_cpu, NULL, 1);
-	trace_hw_branches_enabled = 1;
-
-	spin_unlock(&bts_tracer_lock);
-}
-
-/*
- * Stop tracing on the current cpu.
- * The argument is ignored.
- *
- * pre: bts_tracer_lock must be locked.
- */
-static void bts_trace_stop_cpu(void *arg)
-{
-	if (this_tracer) {
-		ds_release_bts(this_tracer);
-		this_tracer = NULL;
-	}
+	get_online_cpus();
+	for_each_online_cpu(cpu)
+		if (likely(per_cpu(tracer, cpu)))
+			ds_resume_bts(per_cpu(tracer, cpu));
+	trace_hw_branches_suspended = 0;
+	put_online_cpus();
 }
 
 static void bts_trace_stop(struct trace_array *tr)
 {
-	spin_lock(&bts_tracer_lock);
+	int cpu;
 
-	trace_hw_branches_enabled = 0;
-	on_each_cpu(bts_trace_stop_cpu, NULL, 1);
-
-	spin_unlock(&bts_tracer_lock);
+	get_online_cpus();
+	for_each_online_cpu(cpu)
+		if (likely(per_cpu(tracer, cpu)))
+			ds_suspend_bts(per_cpu(tracer, cpu));
+	trace_hw_branches_suspended = 1;
+	put_online_cpus();
 }
 
 static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
 				     unsigned long action, void *hcpu)
 {
-	unsigned int cpu = (unsigned long)hcpu;
-
-	spin_lock(&bts_tracer_lock);
-
-	if (!trace_hw_branches_enabled)
-		goto out;
+	int cpu = (long)hcpu;
 
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_DOWN_FAILED:
-		smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
+		/* The notification is sent with interrupts enabled. */
+		if (trace_hw_branches_enabled) {
+			bts_trace_init_cpu(cpu);
+
+			if (trace_hw_branches_suspended &&
+			    likely(per_cpu(tracer, cpu)))
+				ds_suspend_bts(per_cpu(tracer, cpu));
+		}
 		break;
+
 	case CPU_DOWN_PREPARE:
-		smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
-		break;
+		/* The notification is sent with interrupts enabled. */
+		if (likely(per_cpu(tracer, cpu))) {
+			ds_release_bts(per_cpu(tracer, cpu));
+			per_cpu(tracer, cpu) = NULL;
+		}
 	}
 
- out:
-	spin_unlock(&bts_tracer_lock);
 	return NOTIFY_DONE;
 }
 
@@ -126,20 +134,6 @@
 	.notifier_call = bts_hotcpu_handler
 };
 
-static int bts_trace_init(struct trace_array *tr)
-{
-	hw_branch_trace = tr;
-
-	bts_trace_start(tr);
-
-	return 0;
-}
-
-static void bts_trace_reset(struct trace_array *tr)
-{
-	bts_trace_stop(tr);
-}
-
 static void bts_trace_print_header(struct seq_file *m)
 {
 	seq_puts(m, "# CPU#        TO  <-  FROM\n");
@@ -147,10 +141,10 @@
 
 static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
 {
+	unsigned long symflags = TRACE_ITER_SYM_OFFSET;
 	struct trace_entry *entry = iter->ent;
 	struct trace_seq *seq = &iter->seq;
 	struct hw_branch_entry *it;
-	unsigned long symflags = TRACE_ITER_SYM_OFFSET;
 
 	trace_assign_type(it, entry);
 
@@ -168,6 +162,7 @@
 
 void trace_hw_branch(u64 from, u64 to)
 {
+	struct ftrace_event_call *call = &event_hw_branch;
 	struct trace_array *tr = hw_branch_trace;
 	struct ring_buffer_event *event;
 	struct hw_branch_entry *entry;
@@ -194,7 +189,8 @@
 	entry->ent.type = TRACE_HW_BRANCHES;
 	entry->from = from;
 	entry->to   = to;
-	trace_buffer_unlock_commit(tr, event, 0, 0);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		trace_buffer_unlock_commit(tr, event, 0, 0);
 
  out:
 	atomic_dec(&tr->data[cpu]->disabled);
@@ -224,11 +220,11 @@
 /*
  * Collect the trace on the current cpu and write it into the ftrace buffer.
  *
- * pre: bts_tracer_lock must be locked
+ * pre: tracing must be suspended on the current cpu
  */
 static void trace_bts_cpu(void *arg)
 {
-	struct trace_array *tr = (struct trace_array *) arg;
+	struct trace_array *tr = (struct trace_array *)arg;
 	const struct bts_trace *trace;
 	unsigned char *at;
 
@@ -241,10 +237,9 @@
 	if (unlikely(!this_tracer))
 		return;
 
-	ds_suspend_bts(this_tracer);
 	trace = ds_read_bts(this_tracer);
 	if (!trace)
-		goto out;
+		return;
 
 	for (at = trace->ds.top; (void *)at < trace->ds.end;
 	     at += trace->ds.size)
@@ -253,18 +248,27 @@
 	for (at = trace->ds.begin; (void *)at < trace->ds.top;
 	     at += trace->ds.size)
 		trace_bts_at(trace, at);
-
-out:
-	ds_resume_bts(this_tracer);
 }
 
 static void trace_bts_prepare(struct trace_iterator *iter)
 {
-	spin_lock(&bts_tracer_lock);
+	int cpu;
 
+	get_online_cpus();
+	for_each_online_cpu(cpu)
+		if (likely(per_cpu(tracer, cpu)))
+			ds_suspend_bts(per_cpu(tracer, cpu));
+	/*
+	 * We need to collect the trace on the respective cpu since ftrace
+	 * implicitly adds the record for the current cpu.
+	 * Once that is more flexible, we could collect the data from any cpu.
+	 */
 	on_each_cpu(trace_bts_cpu, iter->tr, 1);
 
-	spin_unlock(&bts_tracer_lock);
+	for_each_online_cpu(cpu)
+		if (likely(per_cpu(tracer, cpu)))
+			ds_resume_bts(per_cpu(tracer, cpu));
+	put_online_cpus();
 }
 
 static void trace_bts_close(struct trace_iterator *iter)
@@ -274,11 +278,11 @@
 
 void trace_hw_branch_oops(void)
 {
-	spin_lock(&bts_tracer_lock);
-
-	trace_bts_cpu(hw_branch_trace);
-
-	spin_unlock(&bts_tracer_lock);
+	if (this_tracer) {
+		ds_suspend_bts_noirq(this_tracer);
+		trace_bts_cpu(hw_branch_trace);
+		ds_resume_bts_noirq(this_tracer);
+	}
 }
 
 struct tracer bts_tracer __read_mostly =
@@ -291,7 +295,10 @@
 	.start		= bts_trace_start,
 	.stop		= bts_trace_stop,
 	.open		= trace_bts_prepare,
-	.close		= trace_bts_close
+	.close		= trace_bts_close,
+#ifdef CONFIG_FTRACE_SELFTEST
+	.selftest	= trace_selftest_startup_hw_branches,
+#endif /* CONFIG_FTRACE_SELFTEST */
 };
 
 __init static int init_bts_trace(void)
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 8e37fcd..d53b45e 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,8 @@
 #include <linux/kernel.h>
 #include <linux/mmiotrace.h>
 #include <linux/pci.h>
+#include <linux/time.h>
+
 #include <asm/atomic.h>
 
 #include "trace.h"
@@ -174,7 +176,7 @@
 	struct mmiotrace_rw *rw;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
-	unsigned long usec_rem	= do_div(t, 1000000ULL);
+	unsigned long usec_rem	= do_div(t, USEC_PER_SEC);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
 
@@ -221,7 +223,7 @@
 	struct mmiotrace_map *m;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
-	unsigned long usec_rem	= do_div(t, 1000000ULL);
+	unsigned long usec_rem	= do_div(t, USEC_PER_SEC);
 	unsigned secs		= (unsigned long)t;
 	int ret;
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 64b54a5..7938f3a 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -14,11 +14,25 @@
 /* must be a power of 2 */
 #define EVENT_HASHSIZE	128
 
-static DEFINE_MUTEX(trace_event_mutex);
+DECLARE_RWSEM(trace_event_mutex);
+
+DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
+EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
+
 static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 
 static int next_event_type = __TRACE_LAST_TYPE + 1;
 
+void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+{
+	int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
+
+	s->buffer[len] = 0;
+	seq_puts(m, s->buffer);
+
+	trace_seq_init(s);
+}
+
 enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
@@ -84,6 +98,39 @@
 
 	return len;
 }
+EXPORT_SYMBOL_GPL(trace_seq_printf);
+
+/**
+ * trace_seq_vprintf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+{
+	int len = (PAGE_SIZE - 1) - s->len;
+	int ret;
+
+	if (!len)
+		return 0;
+
+	ret = vsnprintf(s->buffer + s->len, len, fmt, args);
+
+	/* If we can't write it all, don't bother writing anything */
+	if (ret >= len)
+		return 0;
+
+	s->len += ret;
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(trace_seq_vprintf);
 
 int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 {
@@ -201,6 +248,67 @@
 	return 0;
 }
 
+const char *
+ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
+		       unsigned long flags,
+		       const struct trace_print_flags *flag_array)
+{
+	unsigned long mask;
+	const char *str;
+	const char *ret = p->buffer + p->len;
+	int i;
+
+	for (i = 0;  flag_array[i].name && flags; i++) {
+
+		mask = flag_array[i].mask;
+		if ((flags & mask) != mask)
+			continue;
+
+		str = flag_array[i].name;
+		flags &= ~mask;
+		if (p->len && delim)
+			trace_seq_puts(p, delim);
+		trace_seq_puts(p, str);
+	}
+
+	/* check for left over flags */
+	if (flags) {
+		if (p->len && delim)
+			trace_seq_puts(p, delim);
+		trace_seq_printf(p, "0x%lx", flags);
+	}
+
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+EXPORT_SYMBOL(ftrace_print_flags_seq);
+
+const char *
+ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
+			 const struct trace_print_flags *symbol_array)
+{
+	int i;
+	const char *ret = p->buffer + p->len;
+
+	for (i = 0;  symbol_array[i].name; i++) {
+
+		if (val != symbol_array[i].mask)
+			continue;
+
+		trace_seq_puts(p, symbol_array[i].name);
+		break;
+	}
+
+	if (!p->len)
+		trace_seq_printf(p, "0x%lx", val);
+		
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+EXPORT_SYMBOL(ftrace_print_symbols_seq);
+
 #ifdef CONFIG_KRETPROBES
 static inline const char *kretprobed(const char *name)
 {
@@ -311,17 +419,20 @@
 
 		if (ip == ULONG_MAX || !ret)
 			break;
-		if (i && ret)
-			ret = trace_seq_puts(s, " <- ");
+		if (ret)
+			ret = trace_seq_puts(s, " => ");
 		if (!ip) {
 			if (ret)
 				ret = trace_seq_puts(s, "??");
+			if (ret)
+				ret = trace_seq_puts(s, "\n");
 			continue;
 		}
 		if (!ret)
 			break;
 		if (ret)
 			ret = seq_print_user_ip(s, mm, ip, sym_flags);
+		ret = trace_seq_puts(s, "\n");
 	}
 
 	if (mm)
@@ -455,6 +566,7 @@
  * @type: the type of event to look for
  *
  * Returns an event of type @type otherwise NULL
+ * Called with trace_event_read_lock() held.
  */
 struct trace_event *ftrace_find_event(int type)
 {
@@ -464,7 +576,7 @@
 
 	key = type & (EVENT_HASHSIZE - 1);
 
-	hlist_for_each_entry_rcu(event, n, &event_hash[key], node) {
+	hlist_for_each_entry(event, n, &event_hash[key], node) {
 		if (event->type == type)
 			return event;
 	}
@@ -472,6 +584,46 @@
 	return NULL;
 }
 
+static LIST_HEAD(ftrace_event_list);
+
+static int trace_search_list(struct list_head **list)
+{
+	struct trace_event *e;
+	int last = __TRACE_LAST_TYPE;
+
+	if (list_empty(&ftrace_event_list)) {
+		*list = &ftrace_event_list;
+		return last + 1;
+	}
+
+	/*
+	 * We used up all possible max events,
+	 * lets see if somebody freed one.
+	 */
+	list_for_each_entry(e, &ftrace_event_list, list) {
+		if (e->type != last + 1)
+			break;
+		last++;
+	}
+
+	/* Did we used up all 65 thousand events??? */
+	if ((last + 1) > FTRACE_MAX_EVENT)
+		return 0;
+
+	*list = &e->list;
+	return last + 1;
+}
+
+void trace_event_read_lock(void)
+{
+	down_read(&trace_event_mutex);
+}
+
+void trace_event_read_unlock(void)
+{
+	up_read(&trace_event_mutex);
+}
+
 /**
  * register_ftrace_event - register output for an event type
  * @event: the event type to register
@@ -492,22 +644,42 @@
 	unsigned key;
 	int ret = 0;
 
-	mutex_lock(&trace_event_mutex);
+	down_write(&trace_event_mutex);
 
-	if (!event) {
-		ret = next_event_type++;
+	if (WARN_ON(!event))
 		goto out;
-	}
 
-	if (!event->type)
-		event->type = next_event_type++;
-	else if (event->type > __TRACE_LAST_TYPE) {
+	INIT_LIST_HEAD(&event->list);
+
+	if (!event->type) {
+		struct list_head *list = NULL;
+
+		if (next_event_type > FTRACE_MAX_EVENT) {
+
+			event->type = trace_search_list(&list);
+			if (!event->type)
+				goto out;
+
+		} else {
+			
+			event->type = next_event_type++;
+			list = &ftrace_event_list;
+		}
+
+		if (WARN_ON(ftrace_find_event(event->type)))
+			goto out;
+
+		list_add_tail(&event->list, list);
+
+	} else if (event->type > __TRACE_LAST_TYPE) {
 		printk(KERN_WARNING "Need to add type to trace.h\n");
 		WARN_ON(1);
-	}
-
-	if (ftrace_find_event(event->type))
 		goto out;
+	} else {
+		/* Is this event already used */
+		if (ftrace_find_event(event->type))
+			goto out;
+	}
 
 	if (event->trace == NULL)
 		event->trace = trace_nop_print;
@@ -520,14 +692,25 @@
 
 	key = event->type & (EVENT_HASHSIZE - 1);
 
-	hlist_add_head_rcu(&event->node, &event_hash[key]);
+	hlist_add_head(&event->node, &event_hash[key]);
 
 	ret = event->type;
  out:
-	mutex_unlock(&trace_event_mutex);
+	up_write(&trace_event_mutex);
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(register_ftrace_event);
+
+/*
+ * Used by module code with the trace_event_mutex held for write.
+ */
+int __unregister_ftrace_event(struct trace_event *event)
+{
+	hlist_del(&event->node);
+	list_del(&event->list);
+	return 0;
+}
 
 /**
  * unregister_ftrace_event - remove a no longer used event
@@ -535,12 +718,13 @@
  */
 int unregister_ftrace_event(struct trace_event *event)
 {
-	mutex_lock(&trace_event_mutex);
-	hlist_del(&event->node);
-	mutex_unlock(&trace_event_mutex);
+	down_write(&trace_event_mutex);
+	__unregister_ftrace_event(event);
+	up_write(&trace_event_mutex);
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(unregister_ftrace_event);
 
 /*
  * Standard events
@@ -833,14 +1017,16 @@
 
 	trace_assign_type(field, iter->ent);
 
+	if (!trace_seq_puts(s, "<stack trace>\n"))
+		goto partial;
 	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
-		if (i) {
-			if (!trace_seq_puts(s, " <= "))
-				goto partial;
+		if (!field->caller[i] || (field->caller[i] == ULONG_MAX))
+			break;
+		if (!trace_seq_puts(s, " => "))
+			goto partial;
 
-			if (!seq_print_ip_sym(s, field->caller[i], flags))
-				goto partial;
-		}
+		if (!seq_print_ip_sym(s, field->caller[i], flags))
+			goto partial;
 		if (!trace_seq_puts(s, "\n"))
 			goto partial;
 	}
@@ -868,10 +1054,10 @@
 
 	trace_assign_type(field, iter->ent);
 
-	if (!seq_print_userip_objs(field, s, flags))
+	if (!trace_seq_puts(s, "<user stack trace>\n"))
 		goto partial;
 
-	if (!trace_seq_putc(s, '\n'))
+	if (!seq_print_userip_objs(field, s, flags))
 		goto partial;
 
 	return TRACE_TYPE_HANDLED;
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index e0bde39..d38bec4 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -1,41 +1,17 @@
 #ifndef __TRACE_EVENTS_H
 #define __TRACE_EVENTS_H
 
+#include <linux/trace_seq.h>
 #include "trace.h"
 
-typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
-					      int flags);
-
-struct trace_event {
-	struct hlist_node	node;
-	int			type;
-	trace_print_func	trace;
-	trace_print_func	raw;
-	trace_print_func	hex;
-	trace_print_func	binary;
-};
-
 extern enum print_line_t
 trace_print_bprintk_msg_only(struct trace_iterator *iter);
 extern enum print_line_t
 trace_print_printk_msg_only(struct trace_iterator *iter);
 
-extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
-	__attribute__ ((format (printf, 2, 3)));
-extern int
-trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
 extern int
 seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
 		unsigned long sym_flags);
-extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
-				 size_t cnt);
-extern int trace_seq_puts(struct trace_seq *s, const char *str);
-extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
-extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
-extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
-				size_t len);
-extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
-extern int trace_seq_path(struct trace_seq *s, struct path *path);
 extern int seq_print_userip_objs(const struct userstack_entry *entry,
 				 struct trace_seq *s, unsigned long sym_flags);
 extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
@@ -44,13 +20,17 @@
 extern int trace_print_context(struct trace_iterator *iter);
 extern int trace_print_lat_context(struct trace_iterator *iter);
 
+extern void trace_event_read_lock(void);
+extern void trace_event_read_unlock(void);
 extern struct trace_event *ftrace_find_event(int type);
-extern int register_ftrace_event(struct trace_event *event);
-extern int unregister_ftrace_event(struct trace_event *event);
 
 extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
 					 int flags);
 
+/* used by module unregistering */
+extern int __unregister_ftrace_event(struct trace_event *event);
+extern struct rw_semaphore trace_event_mutex;
+
 #define MAX_MEMHEX_BYTES	8
 #define HEX_CHARS		(MAX_MEMHEX_BYTES*2 + 1)
 
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 1184397..8a30d98 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -36,6 +36,7 @@
 
 static void probe_power_end(struct power_trace *it)
 {
+	struct ftrace_event_call *call = &event_power;
 	struct ring_buffer_event *event;
 	struct trace_power *entry;
 	struct trace_array_cpu *data;
@@ -54,7 +55,8 @@
 		goto out;
 	entry	= ring_buffer_event_data(event);
 	entry->state_data = *it;
-	trace_buffer_unlock_commit(tr, event, 0, 0);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		trace_buffer_unlock_commit(tr, event, 0, 0);
  out:
 	preempt_enable();
 }
@@ -62,6 +64,7 @@
 static void probe_power_mark(struct power_trace *it, unsigned int type,
 				unsigned int level)
 {
+	struct ftrace_event_call *call = &event_power;
 	struct ring_buffer_event *event;
 	struct trace_power *entry;
 	struct trace_array_cpu *data;
@@ -84,7 +87,8 @@
 		goto out;
 	entry	= ring_buffer_event_data(event);
 	entry->state_data = *it;
-	trace_buffer_unlock_commit(tr, event, 0, 0);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		trace_buffer_unlock_commit(tr, event, 0, 0);
  out:
 	preempt_enable();
 }
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index eb81556..9bece96 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -245,17 +245,13 @@
 static __init int init_trace_printk_function_export(void)
 {
 	struct dentry *d_tracer;
-	struct dentry *entry;
 
 	d_tracer = tracing_init_dentry();
 	if (!d_tracer)
 		return 0;
 
-	entry = debugfs_create_file("printk_formats", 0444, d_tracer,
+	trace_create_file("printk_formats", 0444, d_tracer,
 				    NULL, &ftrace_formats_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'printk_formats' entry\n");
 
 	return 0;
 }
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 9117cea..a98106d 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -10,7 +10,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include "trace.h"
 
@@ -29,13 +29,13 @@
 	int cpu;
 	int pc;
 
-	if (!sched_ref || sched_stopped)
+	if (unlikely(!sched_ref))
 		return;
 
 	tracing_record_cmdline(prev);
 	tracing_record_cmdline(next);
 
-	if (!tracer_enabled)
+	if (!tracer_enabled || sched_stopped)
 		return;
 
 	pc = preempt_count();
@@ -56,15 +56,15 @@
 	unsigned long flags;
 	int cpu, pc;
 
-	if (!likely(tracer_enabled))
+	if (unlikely(!sched_ref))
+		return;
+
+	tracing_record_cmdline(current);
+
+	if (!tracer_enabled || sched_stopped)
 		return;
 
 	pc = preempt_count();
-	tracing_record_cmdline(current);
-
-	if (sched_stopped)
-		return;
-
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
 	data = ctx_trace->data[cpu];
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 5bc00e8f..eacb272 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,7 +15,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include "trace.h"
 
@@ -138,9 +138,6 @@
 
 	pc = preempt_count();
 
-	/* The task we are waiting for is waking up */
-	data = wakeup_trace->data[wakeup_cpu];
-
 	/* disable local data, not wakeup_cpu data */
 	cpu = raw_smp_processor_id();
 	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
@@ -154,6 +151,9 @@
 	if (unlikely(!tracer_enabled || next != wakeup_task))
 		goto out_unlock;
 
+	/* The task we are waiting for is waking up */
+	data = wakeup_trace->data[wakeup_cpu];
+
 	trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
 	tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
 
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 08f4eb2..00dd648 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -16,6 +16,7 @@
 	case TRACE_BRANCH:
 	case TRACE_GRAPH_ENT:
 	case TRACE_GRAPH_RET:
+	case TRACE_HW_BRANCHES:
 		return 1;
 	}
 	return 0;
@@ -188,6 +189,7 @@
 #else
 # define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
 #endif /* CONFIG_DYNAMIC_FTRACE */
+
 /*
  * Simple verification test of ftrace function tracer.
  * Enable ftrace, sleep 1/10 second, and then read the trace
@@ -749,3 +751,59 @@
 	return ret;
 }
 #endif /* CONFIG_BRANCH_TRACER */
+
+#ifdef CONFIG_HW_BRANCH_TRACER
+int
+trace_selftest_startup_hw_branches(struct tracer *trace,
+				   struct trace_array *tr)
+{
+	struct trace_iterator *iter;
+	struct tracer tracer;
+	unsigned long count;
+	int ret;
+
+	if (!trace->open) {
+		printk(KERN_CONT "missing open function...");
+		return -1;
+	}
+
+	ret = tracer_init(trace, tr);
+	if (ret) {
+		warn_failed_init_tracer(trace, ret);
+		return ret;
+	}
+
+	/*
+	 * The hw-branch tracer needs to collect the trace from the various
+	 * cpu trace buffers - before tracing is stopped.
+	 */
+	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return -ENOMEM;
+
+	memcpy(&tracer, trace, sizeof(tracer));
+
+	iter->trace = &tracer;
+	iter->tr = tr;
+	iter->pos = -1;
+	mutex_init(&iter->mutex);
+
+	trace->open(iter);
+
+	mutex_destroy(&iter->mutex);
+	kfree(iter);
+
+	tracing_stop();
+
+	ret = trace_test_buffer(tr, &count);
+	trace->reset(tr);
+	tracing_start();
+
+	if (!ret && !count) {
+		printk(KERN_CONT "no entries found..");
+		ret = -1;
+	}
+
+	return ret;
+}
+#endif /* CONFIG_HW_BRANCH_TRACER */
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index c750f65..2d7aebd 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -265,7 +265,7 @@
 		seq_printf(m, "        Depth    Size   Location"
 			   "    (%d entries)\n"
 			   "        -----    ----   --------\n",
-			   max_stack_trace.nr_entries);
+			   max_stack_trace.nr_entries - 1);
 
 		if (!stack_tracer_enabled && !max_stack_size)
 			print_disabled(m);
@@ -352,19 +352,14 @@
 static __init int stack_trace_init(void)
 {
 	struct dentry *d_tracer;
-	struct dentry *entry;
 
 	d_tracer = tracing_init_dentry();
 
-	entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
-				    &max_stack_size, &stack_max_size_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'stack_max_size' entry\n");
+	trace_create_file("stack_max_size", 0644, d_tracer,
+			&max_stack_size, &stack_max_size_fops);
 
-	entry = debugfs_create_file("stack_trace", 0444, d_tracer,
-				    NULL, &stack_trace_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'stack_trace' entry\n");
+	trace_create_file("stack_trace", 0444, d_tracer,
+			NULL, &stack_trace_fops);
 
 	if (stack_tracer_enabled)
 		register_ftrace_function(&trace_ops);
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index acdebd7..c006437 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -1,7 +1,7 @@
 /*
  * Infrastructure for statistic tracing (histogram output).
  *
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (C) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
  *
  * Based on the code from trace_branch.c which is
  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
@@ -10,22 +10,27 @@
 
 
 #include <linux/list.h>
+#include <linux/rbtree.h>
 #include <linux/debugfs.h>
 #include "trace_stat.h"
 #include "trace.h"
 
 
-/* List of stat entries from a tracer */
-struct trace_stat_list {
-	struct list_head	list;
+/*
+ * List of stat red-black nodes from a tracer
+ * We use a such tree to sort quickly the stat
+ * entries from the tracer.
+ */
+struct stat_node {
+	struct rb_node		node;
 	void			*stat;
 };
 
 /* A stat session is the stats output in one file */
-struct tracer_stat_session {
+struct stat_session {
 	struct list_head	session_list;
 	struct tracer_stat	*ts;
-	struct list_head	stat_list;
+	struct rb_root		stat_root;
 	struct mutex		stat_mutex;
 	struct dentry		*file;
 };
@@ -37,18 +42,48 @@
 /* The root directory for all stat files */
 static struct dentry		*stat_dir;
 
-
-static void reset_stat_session(struct tracer_stat_session *session)
+/*
+ * Iterate through the rbtree using a post order traversal path
+ * to release the next node.
+ * It won't necessary release one at each iteration
+ * but it will at least advance closer to the next one
+ * to be released.
+ */
+static struct rb_node *release_next(struct rb_node *node)
 {
-	struct trace_stat_list *node, *next;
+	struct stat_node *snode;
+	struct rb_node *parent = rb_parent(node);
 
-	list_for_each_entry_safe(node, next, &session->stat_list, list)
-		kfree(node);
+	if (node->rb_left)
+		return node->rb_left;
+	else if (node->rb_right)
+		return node->rb_right;
+	else {
+		if (!parent)
+			;
+		else if (parent->rb_left == node)
+			parent->rb_left = NULL;
+		else
+			parent->rb_right = NULL;
 
-	INIT_LIST_HEAD(&session->stat_list);
+		snode = container_of(node, struct stat_node, node);
+		kfree(snode);
+
+		return parent;
+	}
 }
 
-static void destroy_session(struct tracer_stat_session *session)
+static void reset_stat_session(struct stat_session *session)
+{
+	struct rb_node *node = session->stat_root.rb_node;
+
+	while (node)
+		node = release_next(node);
+
+	session->stat_root = RB_ROOT;
+}
+
+static void destroy_session(struct stat_session *session)
 {
 	debugfs_remove(session->file);
 	reset_stat_session(session);
@@ -56,25 +91,60 @@
 	kfree(session);
 }
 
-/*
- * For tracers that don't provide a stat_cmp callback.
- * This one will force an immediate insertion on tail of
- * the list.
- */
-static int dummy_cmp(void *p1, void *p2)
+typedef int (*cmp_stat_t)(void *, void *);
+
+static int insert_stat(struct rb_root *root, void *stat, cmp_stat_t cmp)
 {
-	return 1;
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	struct stat_node *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	data->stat = stat;
+
+	/*
+	 * Figure out where to put new node
+	 * This is a descendent sorting
+	 */
+	while (*new) {
+		struct stat_node *this;
+		int result;
+
+		this = container_of(*new, struct stat_node, node);
+		result = cmp(data->stat, this->stat);
+
+		parent = *new;
+		if (result >= 0)
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+	return 0;
 }
 
 /*
- * Initialize the stat list at each trace_stat file opening.
+ * For tracers that don't provide a stat_cmp callback.
+ * This one will force an insertion as right-most node
+ * in the rbtree.
+ */
+static int dummy_cmp(void *p1, void *p2)
+{
+	return -1;
+}
+
+/*
+ * Initialize the stat rbtree at each trace_stat file opening.
  * All of these copies and sorting are required on all opening
  * since the stats could have changed between two file sessions.
  */
-static int stat_seq_init(struct tracer_stat_session *session)
+static int stat_seq_init(struct stat_session *session)
 {
-	struct trace_stat_list *iter_entry, *new_entry;
 	struct tracer_stat *ts = session->ts;
+	struct rb_root *root = &session->stat_root;
 	void *stat;
 	int ret = 0;
 	int i;
@@ -85,29 +155,16 @@
 	if (!ts->stat_cmp)
 		ts->stat_cmp = dummy_cmp;
 
-	stat = ts->stat_start();
+	stat = ts->stat_start(ts);
 	if (!stat)
 		goto exit;
 
-	/*
-	 * The first entry. Actually this is the second, but the first
-	 * one (the stat_list head) is pointless.
-	 */
-	new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
-	if (!new_entry) {
-		ret = -ENOMEM;
+	ret = insert_stat(root, stat, ts->stat_cmp);
+	if (ret)
 		goto exit;
-	}
-
-	INIT_LIST_HEAD(&new_entry->list);
-
-	list_add(&new_entry->list, &session->stat_list);
-
-	new_entry->stat = stat;
 
 	/*
-	 * Iterate over the tracer stat entries and store them in a sorted
-	 * list.
+	 * Iterate over the tracer stat entries and store them in an rbtree.
 	 */
 	for (i = 1; ; i++) {
 		stat = ts->stat_next(stat, i);
@@ -116,36 +173,16 @@
 		if (!stat)
 			break;
 
-		new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
-		if (!new_entry) {
-			ret = -ENOMEM;
-			goto exit_free_list;
-		}
-
-		INIT_LIST_HEAD(&new_entry->list);
-		new_entry->stat = stat;
-
-		list_for_each_entry_reverse(iter_entry, &session->stat_list,
-				list) {
-
-			/* Insertion with a descendent sorting */
-			if (ts->stat_cmp(iter_entry->stat,
-					new_entry->stat) >= 0) {
-
-				list_add(&new_entry->list, &iter_entry->list);
-				break;
-			}
-		}
-
-		/* The current larger value */
-		if (list_empty(&new_entry->list))
-			list_add(&new_entry->list, &session->stat_list);
+		ret = insert_stat(root, stat, ts->stat_cmp);
+		if (ret)
+			goto exit_free_rbtree;
 	}
+
 exit:
 	mutex_unlock(&session->stat_mutex);
 	return ret;
 
-exit_free_list:
+exit_free_rbtree:
 	reset_stat_session(session);
 	mutex_unlock(&session->stat_mutex);
 	return ret;
@@ -154,38 +191,51 @@
 
 static void *stat_seq_start(struct seq_file *s, loff_t *pos)
 {
-	struct tracer_stat_session *session = s->private;
+	struct stat_session *session = s->private;
+	struct rb_node *node;
+	int i;
 
-	/* Prevent from tracer switch or stat_list modification */
+	/* Prevent from tracer switch or rbtree modification */
 	mutex_lock(&session->stat_mutex);
 
 	/* If we are in the beginning of the file, print the headers */
-	if (!*pos && session->ts->stat_headers)
+	if (!*pos && session->ts->stat_headers) {
+		(*pos)++;
 		return SEQ_START_TOKEN;
+	}
 
-	return seq_list_start(&session->stat_list, *pos);
+	node = rb_first(&session->stat_root);
+	for (i = 0; node && i < *pos; i++)
+		node = rb_next(node);
+
+	(*pos)++;
+
+	return node;
 }
 
 static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
 {
-	struct tracer_stat_session *session = s->private;
+	struct stat_session *session = s->private;
+	struct rb_node *node = p;
+
+	(*pos)++;
 
 	if (p == SEQ_START_TOKEN)
-		return seq_list_start(&session->stat_list, *pos);
+		return rb_first(&session->stat_root);
 
-	return seq_list_next(p, &session->stat_list, pos);
+	return rb_next(node);
 }
 
 static void stat_seq_stop(struct seq_file *s, void *p)
 {
-	struct tracer_stat_session *session = s->private;
+	struct stat_session *session = s->private;
 	mutex_unlock(&session->stat_mutex);
 }
 
 static int stat_seq_show(struct seq_file *s, void *v)
 {
-	struct tracer_stat_session *session = s->private;
-	struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
+	struct stat_session *session = s->private;
+	struct stat_node *l = container_of(v, struct stat_node, node);
 
 	if (v == SEQ_START_TOKEN)
 		return session->ts->stat_headers(s);
@@ -205,7 +255,7 @@
 {
 	int ret;
 
-	struct tracer_stat_session *session = inode->i_private;
+	struct stat_session *session = inode->i_private;
 
 	ret = seq_open(file, &trace_stat_seq_ops);
 	if (!ret) {
@@ -218,11 +268,11 @@
 }
 
 /*
- * Avoid consuming memory with our now useless list.
+ * Avoid consuming memory with our now useless rbtree.
  */
 static int tracing_stat_release(struct inode *i, struct file *f)
 {
-	struct tracer_stat_session *session = i->i_private;
+	struct stat_session *session = i->i_private;
 
 	mutex_lock(&session->stat_mutex);
 	reset_stat_session(session);
@@ -251,7 +301,7 @@
 	return 0;
 }
 
-static int init_stat_file(struct tracer_stat_session *session)
+static int init_stat_file(struct stat_session *session)
 {
 	if (!stat_dir && tracing_stat_init())
 		return -ENODEV;
@@ -266,7 +316,7 @@
 
 int register_stat_tracer(struct tracer_stat *trace)
 {
-	struct tracer_stat_session *session, *node, *tmp;
+	struct stat_session *session, *node;
 	int ret;
 
 	if (!trace)
@@ -277,7 +327,7 @@
 
 	/* Already registered? */
 	mutex_lock(&all_stat_sessions_mutex);
-	list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
+	list_for_each_entry(node, &all_stat_sessions, session_list) {
 		if (node->ts == trace) {
 			mutex_unlock(&all_stat_sessions_mutex);
 			return -EINVAL;
@@ -286,15 +336,13 @@
 	mutex_unlock(&all_stat_sessions_mutex);
 
 	/* Init the session */
-	session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL);
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
 	if (!session)
 		return -ENOMEM;
 
 	session->ts = trace;
 	INIT_LIST_HEAD(&session->session_list);
-	INIT_LIST_HEAD(&session->stat_list);
 	mutex_init(&session->stat_mutex);
-	session->file = NULL;
 
 	ret = init_stat_file(session);
 	if (ret) {
@@ -312,7 +360,7 @@
 
 void unregister_stat_tracer(struct tracer_stat *trace)
 {
-	struct tracer_stat_session *node, *tmp;
+	struct stat_session *node, *tmp;
 
 	mutex_lock(&all_stat_sessions_mutex);
 	list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
index 202274c..f3546a2 100644
--- a/kernel/trace/trace_stat.h
+++ b/kernel/trace/trace_stat.h
@@ -12,7 +12,7 @@
 	/* The name of your stat file */
 	const char		*name;
 	/* Iteration over statistic entries */
-	void			*(*stat_start)(void);
+	void			*(*stat_start)(struct tracer_stat *trace);
 	void			*(*stat_next)(void *prev, int idx);
 	/* Compare two entries for stats sorting */
 	int			(*stat_cmp)(void *p1, void *p2);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 91fd19c..e04b76c 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -321,11 +321,7 @@
 
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
 {
-	struct dentry *entry;
 
-	entry = debugfs_create_file("sysprof_sample_period", 0644,
+	trace_create_file("sysprof_sample_period", 0644,
 			d_tracer, NULL, &sysprof_sample_fops);
-	if (entry)
-		return;
-	pr_warning("Could not create debugfs 'sysprof_sample_period' entry\n");
 }
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 797201e..97fcea4 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -6,7 +6,7 @@
  */
 
 
-#include <trace/workqueue.h>
+#include <trace/events/workqueue.h>
 #include <linux/list.h>
 #include <linux/percpu.h>
 #include "trace_stat.h"
@@ -16,8 +16,6 @@
 /* A cpu workqueue thread */
 struct cpu_workqueue_stats {
 	struct list_head            list;
-/* Useful to know if we print the cpu headers */
-	bool		            first_entry;
 	int		            cpu;
 	pid_t			    pid;
 /* Can be inserted from interrupt or user context, need to be atomic */
@@ -47,12 +45,11 @@
 			  struct work_struct *work)
 {
 	int cpu = cpumask_first(&wq_thread->cpus_allowed);
-	struct cpu_workqueue_stats *node, *next;
+	struct cpu_workqueue_stats *node;
 	unsigned long flags;
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
-							list) {
+	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
 		if (node->pid == wq_thread->pid) {
 			atomic_inc(&node->inserted);
 			goto found;
@@ -69,12 +66,11 @@
 			  struct work_struct *work)
 {
 	int cpu = cpumask_first(&wq_thread->cpus_allowed);
-	struct cpu_workqueue_stats *node, *next;
+	struct cpu_workqueue_stats *node;
 	unsigned long flags;
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
-							list) {
+	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
 		if (node->pid == wq_thread->pid) {
 			node->executed++;
 			goto found;
@@ -105,8 +101,6 @@
 	cws->pid = wq_thread->pid;
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	if (list_empty(&workqueue_cpu_stat(cpu)->list))
-		cws->first_entry = true;
 	list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
 	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 }
@@ -152,7 +146,7 @@
 	return ret;
 }
 
-static void *workqueue_stat_start(void)
+static void *workqueue_stat_start(struct tracer_stat *trace)
 {
 	int cpu;
 	void *ret = NULL;
@@ -191,16 +185,9 @@
 static int workqueue_stat_show(struct seq_file *s, void *p)
 {
 	struct cpu_workqueue_stats *cws = p;
-	unsigned long flags;
-	int cpu = cws->cpu;
 	struct pid *pid;
 	struct task_struct *tsk;
 
-	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
-		seq_printf(s, "\n");
-	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
-
 	pid = find_get_pid(cws->pid);
 	if (pid) {
 		tsk = get_pid_task(pid, PIDTYPE_PID);
diff --git a/kernel/wait.c b/kernel/wait.c
index 42a2dbc..ea7c3b4 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -154,7 +154,7 @@
 	if (!list_empty(&wait->task_list))
 		list_del_init(&wait->task_list);
 	else if (waitqueue_active(q))
-		__wake_up_common(q, mode, 1, 0, key);
+		__wake_up_locked_key(q, mode, key);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(abort_exclusive_wait);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f71fb2a..0668795 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -33,7 +33,8 @@
 #include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
-#include <trace/workqueue.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/workqueue.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -124,8 +125,6 @@
 	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
 }
 
-DEFINE_TRACE(workqueue_insertion);
-
 static void insert_work(struct cpu_workqueue_struct *cwq,
 			struct work_struct *work, struct list_head *head)
 {
@@ -262,8 +261,6 @@
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
-DEFINE_TRACE(workqueue_execution);
-
 static void run_workqueue(struct cpu_workqueue_struct *cwq)
 {
 	spin_lock_irq(&cwq->lock);
@@ -753,8 +750,6 @@
 	return cwq;
 }
 
-DEFINE_TRACE(workqueue_creation);
-
 static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
 	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -860,8 +855,6 @@
 }
 EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
-DEFINE_TRACE(workqueue_destruction);
-
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
 {
 	/*
diff --git a/lib/Kconfig b/lib/Kconfig
index 8ade0a7..9960be0 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -10,6 +10,9 @@
 config BITREVERSE
 	tristate
 
+config RATIONAL
+	boolean
+
 config GENERIC_FIND_FIRST_BIT
 	bool
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 6cdcf38..116a350 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -336,6 +336,38 @@
 	  out which slabs are relevant to a particular load.
 	  Try running: slabinfo -DA
 
+config DEBUG_KMEMLEAK
+	bool "Kernel memory leak detector"
+	depends on DEBUG_KERNEL && EXPERIMENTAL && (X86 || ARM) && \
+		!MEMORY_HOTPLUG
+	select DEBUG_SLAB if SLAB
+	select SLUB_DEBUG if SLUB
+	select DEBUG_FS if SYSFS
+	select STACKTRACE if STACKTRACE_SUPPORT
+	select KALLSYMS
+	help
+	  Say Y here if you want to enable the memory leak
+	  detector. The memory allocation/freeing is traced in a way
+	  similar to the Boehm's conservative garbage collector, the
+	  difference being that the orphan objects are not freed but
+	  only shown in /sys/kernel/debug/kmemleak. Enabling this
+	  feature will introduce an overhead to memory
+	  allocations. See Documentation/kmemleak.txt for more
+	  details.
+
+	  In order to access the kmemleak file, debugfs needs to be
+	  mounted (usually at /sys/kernel/debug).
+
+config DEBUG_KMEMLEAK_TEST
+	tristate "Simple test for the kernel memory leak detector"
+	depends on DEBUG_KMEMLEAK
+	help
+	  Say Y or M here to build a test for the kernel memory leak
+	  detector. This option enables a module that explicitly leaks
+	  memory.
+
+	  If unsure, say N.
+
 config DEBUG_PREEMPT
 	bool "Debug preemptible kernel"
 	depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64)
diff --git a/lib/Makefile b/lib/Makefile
index 33a40e4..1f6edef 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -50,6 +50,7 @@
 endif
 
 obj-$(CONFIG_BITREVERSE) += bitrev.o
+obj-$(CONFIG_RATIONAL)	+= rational.o
 obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o
 obj-$(CONFIG_CRC16)	+= crc16.o
 obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 1f71b97..7bb4142 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -92,15 +92,8 @@
  */
 bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
 {
-	if (likely(slab_is_available()))
-		*mask = kmalloc_node(cpumask_size(), flags, node);
-	else {
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-		printk(KERN_ERR
-			"=> alloc_cpumask_var: kmalloc not available!\n");
-#endif
-		*mask = NULL;
-	}
+	*mask = kmalloc_node(cpumask_size(), flags, node);
+
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 	if (!*mask) {
 		printk(KERN_ERR "=> alloc_cpumask_var: failed!\n");
@@ -119,6 +112,12 @@
 }
 EXPORT_SYMBOL(alloc_cpumask_var_node);
 
+bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
+{
+	return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node);
+}
+EXPORT_SYMBOL(zalloc_cpumask_var_node);
+
 /**
  * alloc_cpumask_var - allocate a struct cpumask
  * @mask: pointer to cpumask_var_t where the cpumask is returned
@@ -135,6 +134,12 @@
 }
 EXPORT_SYMBOL(alloc_cpumask_var);
 
+bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+{
+	return alloc_cpumask_var(mask, flags | __GFP_ZERO);
+}
+EXPORT_SYMBOL(zalloc_cpumask_var);
+
 /**
  * alloc_bootmem_cpumask_var - allocate a struct cpumask from the bootmem arena.
  * @mask: pointer to cpumask_var_t where the cpumask is returned
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 69da09a..ad65fc0 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -23,9 +23,11 @@
 #include <linux/dma-debug.h>
 #include <linux/spinlock.h>
 #include <linux/debugfs.h>
+#include <linux/uaccess.h>
 #include <linux/device.h>
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/ctype.h>
 #include <linux/list.h>
 #include <linux/slab.h>
 
@@ -85,6 +87,7 @@
 
 static u32 num_free_entries;
 static u32 min_free_entries;
+static u32 nr_total_entries;
 
 /* number of preallocated entries requested by kernel cmdline */
 static u32 req_entries;
@@ -97,6 +100,16 @@
 static struct dentry *show_num_errors_dent  __read_mostly;
 static struct dentry *num_free_entries_dent __read_mostly;
 static struct dentry *min_free_entries_dent __read_mostly;
+static struct dentry *filter_dent           __read_mostly;
+
+/* per-driver filter related state */
+
+#define NAME_MAX_LEN	64
+
+static char                  current_driver_name[NAME_MAX_LEN] __read_mostly;
+static struct device_driver *current_driver                    __read_mostly;
+
+static DEFINE_RWLOCK(driver_name_lock);
 
 static const char *type2name[4] = { "single", "page",
 				    "scather-gather", "coherent" };
@@ -104,6 +117,11 @@
 static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
 				   "DMA_FROM_DEVICE", "DMA_NONE" };
 
+/* little merge helper - remove it after the merge window */
+#ifndef BUS_NOTIFY_UNBOUND_DRIVER
+#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
+#endif
+
 /*
  * The access to some variables in this macro is racy. We can't use atomic_t
  * here because all these variables are exported to debugfs. Some of them even
@@ -121,15 +139,54 @@
 {
 #ifdef CONFIG_STACKTRACE
 	if (entry) {
-		printk(KERN_WARNING "Mapped at:\n");
+		pr_warning("Mapped at:\n");
 		print_stack_trace(&entry->stacktrace, 0);
 	}
 #endif
 }
 
+static bool driver_filter(struct device *dev)
+{
+	struct device_driver *drv;
+	unsigned long flags;
+	bool ret;
+
+	/* driver filter off */
+	if (likely(!current_driver_name[0]))
+		return true;
+
+	/* driver filter on and initialized */
+	if (current_driver && dev->driver == current_driver)
+		return true;
+
+	if (current_driver || !current_driver_name[0])
+		return false;
+
+	/* driver filter on but not yet initialized */
+	drv = get_driver(dev->driver);
+	if (!drv)
+		return false;
+
+	/* lock to protect against change of current_driver_name */
+	read_lock_irqsave(&driver_name_lock, flags);
+
+	ret = false;
+	if (drv->name &&
+	    strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) {
+		current_driver = drv;
+		ret = true;
+	}
+
+	read_unlock_irqrestore(&driver_name_lock, flags);
+	put_driver(drv);
+
+	return ret;
+}
+
 #define err_printk(dev, entry, format, arg...) do {		\
 		error_count += 1;				\
-		if (show_all_errors || show_num_errors > 0) {	\
+		if (driver_filter(dev) &&			\
+		    (show_all_errors || show_num_errors > 0)) {	\
 			WARN(1, "%s %s: " format,		\
 			     dev_driver_string(dev),		\
 			     dev_name(dev) , ## arg);		\
@@ -185,15 +242,50 @@
 static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket,
 						struct dma_debug_entry *ref)
 {
-	struct dma_debug_entry *entry;
+	struct dma_debug_entry *entry, *ret = NULL;
+	int matches = 0, match_lvl, last_lvl = 0;
 
 	list_for_each_entry(entry, &bucket->list, list) {
-		if ((entry->dev_addr == ref->dev_addr) &&
-		    (entry->dev == ref->dev))
+		if ((entry->dev_addr != ref->dev_addr) ||
+		    (entry->dev != ref->dev))
+			continue;
+
+		/*
+		 * Some drivers map the same physical address multiple
+		 * times. Without a hardware IOMMU this results in the
+		 * same device addresses being put into the dma-debug
+		 * hash multiple times too. This can result in false
+		 * positives being reported. Therfore we implement a
+		 * best-fit algorithm here which returns the entry from
+		 * the hash which fits best to the reference value
+		 * instead of the first-fit.
+		 */
+		matches += 1;
+		match_lvl = 0;
+		entry->size      == ref->size      ? ++match_lvl : match_lvl;
+		entry->type      == ref->type      ? ++match_lvl : match_lvl;
+		entry->direction == ref->direction ? ++match_lvl : match_lvl;
+
+		if (match_lvl == 3) {
+			/* perfect-fit - return the result */
 			return entry;
+		} else if (match_lvl > last_lvl) {
+			/*
+			 * We found an entry that fits better then the
+			 * previous one
+			 */
+			last_lvl = match_lvl;
+			ret      = entry;
+		}
 	}
 
-	return NULL;
+	/*
+	 * If we have multiple matches but no perfect-fit, just return
+	 * NULL.
+	 */
+	ret = (matches == 1) ? ret : NULL;
+
+	return ret;
 }
 
 /*
@@ -257,6 +349,21 @@
 	put_hash_bucket(bucket, &flags);
 }
 
+static struct dma_debug_entry *__dma_entry_alloc(void)
+{
+	struct dma_debug_entry *entry;
+
+	entry = list_entry(free_entries.next, struct dma_debug_entry, list);
+	list_del(&entry->list);
+	memset(entry, 0, sizeof(*entry));
+
+	num_free_entries -= 1;
+	if (num_free_entries < min_free_entries)
+		min_free_entries = num_free_entries;
+
+	return entry;
+}
+
 /* struct dma_entry allocator
  *
  * The next two functions implement the allocator for
@@ -270,15 +377,12 @@
 	spin_lock_irqsave(&free_entries_lock, flags);
 
 	if (list_empty(&free_entries)) {
-		printk(KERN_ERR "DMA-API: debugging out of memory "
-				"- disabling\n");
+		pr_err("DMA-API: debugging out of memory - disabling\n");
 		global_disable = true;
 		goto out;
 	}
 
-	entry = list_entry(free_entries.next, struct dma_debug_entry, list);
-	list_del(&entry->list);
-	memset(entry, 0, sizeof(*entry));
+	entry = __dma_entry_alloc();
 
 #ifdef CONFIG_STACKTRACE
 	entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
@@ -286,9 +390,6 @@
 	entry->stacktrace.skip = 2;
 	save_stack_trace(&entry->stacktrace);
 #endif
-	num_free_entries -= 1;
-	if (num_free_entries < min_free_entries)
-		min_free_entries = num_free_entries;
 
 out:
 	spin_unlock_irqrestore(&free_entries_lock, flags);
@@ -310,6 +411,53 @@
 	spin_unlock_irqrestore(&free_entries_lock, flags);
 }
 
+int dma_debug_resize_entries(u32 num_entries)
+{
+	int i, delta, ret = 0;
+	unsigned long flags;
+	struct dma_debug_entry *entry;
+	LIST_HEAD(tmp);
+
+	spin_lock_irqsave(&free_entries_lock, flags);
+
+	if (nr_total_entries < num_entries) {
+		delta = num_entries - nr_total_entries;
+
+		spin_unlock_irqrestore(&free_entries_lock, flags);
+
+		for (i = 0; i < delta; i++) {
+			entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+			if (!entry)
+				break;
+
+			list_add_tail(&entry->list, &tmp);
+		}
+
+		spin_lock_irqsave(&free_entries_lock, flags);
+
+		list_splice(&tmp, &free_entries);
+		nr_total_entries += i;
+		num_free_entries += i;
+	} else {
+		delta = nr_total_entries - num_entries;
+
+		for (i = 0; i < delta && !list_empty(&free_entries); i++) {
+			entry = __dma_entry_alloc();
+			kfree(entry);
+		}
+
+		nr_total_entries -= i;
+	}
+
+	if (nr_total_entries != num_entries)
+		ret = 1;
+
+	spin_unlock_irqrestore(&free_entries_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(dma_debug_resize_entries);
+
 /*
  * DMA-API debugging init code
  *
@@ -334,8 +482,7 @@
 	num_free_entries = num_entries;
 	min_free_entries = num_entries;
 
-	printk(KERN_INFO "DMA-API: preallocated %d debug entries\n",
-			num_entries);
+	pr_info("DMA-API: preallocated %d debug entries\n", num_entries);
 
 	return 0;
 
@@ -349,11 +496,102 @@
 	return -ENOMEM;
 }
 
+static ssize_t filter_read(struct file *file, char __user *user_buf,
+			   size_t count, loff_t *ppos)
+{
+	char buf[NAME_MAX_LEN + 1];
+	unsigned long flags;
+	int len;
+
+	if (!current_driver_name[0])
+		return 0;
+
+	/*
+	 * We can't copy to userspace directly because current_driver_name can
+	 * only be read under the driver_name_lock with irqs disabled. So
+	 * create a temporary copy first.
+	 */
+	read_lock_irqsave(&driver_name_lock, flags);
+	len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name);
+	read_unlock_irqrestore(&driver_name_lock, flags);
+
+	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+}
+
+static ssize_t filter_write(struct file *file, const char __user *userbuf,
+			    size_t count, loff_t *ppos)
+{
+	char buf[NAME_MAX_LEN];
+	unsigned long flags;
+	size_t len;
+	int i;
+
+	/*
+	 * We can't copy from userspace directly. Access to
+	 * current_driver_name is protected with a write_lock with irqs
+	 * disabled. Since copy_from_user can fault and may sleep we
+	 * need to copy to temporary buffer first
+	 */
+	len = min(count, (size_t)(NAME_MAX_LEN - 1));
+	if (copy_from_user(buf, userbuf, len))
+		return -EFAULT;
+
+	buf[len] = 0;
+
+	write_lock_irqsave(&driver_name_lock, flags);
+
+	/*
+	 * Now handle the string we got from userspace very carefully.
+	 * The rules are:
+	 *         - only use the first token we got
+	 *         - token delimiter is everything looking like a space
+	 *           character (' ', '\n', '\t' ...)
+	 *
+	 */
+	if (!isalnum(buf[0])) {
+		/*
+		 * If the first character userspace gave us is not
+		 * alphanumerical then assume the filter should be
+		 * switched off.
+		 */
+		if (current_driver_name[0])
+			pr_info("DMA-API: switching off dma-debug driver filter\n");
+		current_driver_name[0] = 0;
+		current_driver = NULL;
+		goto out_unlock;
+	}
+
+	/*
+	 * Now parse out the first token and use it as the name for the
+	 * driver to filter for.
+	 */
+	for (i = 0; i < NAME_MAX_LEN; ++i) {
+		current_driver_name[i] = buf[i];
+		if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
+			break;
+	}
+	current_driver_name[i] = 0;
+	current_driver = NULL;
+
+	pr_info("DMA-API: enable driver filter for driver [%s]\n",
+		current_driver_name);
+
+out_unlock:
+	write_unlock_irqrestore(&driver_name_lock, flags);
+
+	return count;
+}
+
+const struct file_operations filter_fops = {
+	.read  = filter_read,
+	.write = filter_write,
+};
+
 static int dma_debug_fs_init(void)
 {
 	dma_debug_dent = debugfs_create_dir("dma-api", NULL);
 	if (!dma_debug_dent) {
-		printk(KERN_ERR "DMA-API: can not create debugfs directory\n");
+		pr_err("DMA-API: can not create debugfs directory\n");
 		return -ENOMEM;
 	}
 
@@ -392,6 +630,11 @@
 	if (!min_free_entries_dent)
 		goto out_err;
 
+	filter_dent = debugfs_create_file("driver_filter", 0644,
+					  dma_debug_dent, NULL, &filter_fops);
+	if (!filter_dent)
+		goto out_err;
+
 	return 0;
 
 out_err:
@@ -400,9 +643,64 @@
 	return -ENOMEM;
 }
 
+static int device_dma_allocations(struct device *dev)
+{
+	struct dma_debug_entry *entry;
+	unsigned long flags;
+	int count = 0, i;
+
+	local_irq_save(flags);
+
+	for (i = 0; i < HASH_SIZE; ++i) {
+		spin_lock(&dma_entry_hash[i].lock);
+		list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
+			if (entry->dev == dev)
+				count += 1;
+		}
+		spin_unlock(&dma_entry_hash[i].lock);
+	}
+
+	local_irq_restore(flags);
+
+	return count;
+}
+
+static int dma_debug_device_change(struct notifier_block *nb,
+				    unsigned long action, void *data)
+{
+	struct device *dev = data;
+	int count;
+
+
+	switch (action) {
+	case BUS_NOTIFY_UNBOUND_DRIVER:
+		count = device_dma_allocations(dev);
+		if (count == 0)
+			break;
+		err_printk(dev, NULL, "DMA-API: device driver has pending "
+				"DMA allocations while released from device "
+				"[count=%d]\n", count);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
 void dma_debug_add_bus(struct bus_type *bus)
 {
-	/* FIXME: register notifier */
+	struct notifier_block *nb;
+
+	nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
+	if (nb == NULL) {
+		pr_err("dma_debug_add_bus: out of memory\n");
+		return;
+	}
+
+	nb->notifier_call = dma_debug_device_change;
+
+	bus_register_notifier(bus, nb);
 }
 
 /*
@@ -421,8 +719,7 @@
 	}
 
 	if (dma_debug_fs_init() != 0) {
-		printk(KERN_ERR "DMA-API: error creating debugfs entries "
-				"- disabling\n");
+		pr_err("DMA-API: error creating debugfs entries - disabling\n");
 		global_disable = true;
 
 		return;
@@ -432,14 +729,15 @@
 		num_entries = req_entries;
 
 	if (prealloc_memory(num_entries) != 0) {
-		printk(KERN_ERR "DMA-API: debugging out of memory error "
-				"- disabled\n");
+		pr_err("DMA-API: debugging out of memory error - disabled\n");
 		global_disable = true;
 
 		return;
 	}
 
-	printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n");
+	nr_total_entries = num_free_entries;
+
+	pr_info("DMA-API: debugging enabled by kernel config\n");
 }
 
 static __init int dma_debug_cmdline(char *str)
@@ -448,8 +746,7 @@
 		return -EINVAL;
 
 	if (strncmp(str, "off", 3) == 0) {
-		printk(KERN_INFO "DMA-API: debugging disabled on kernel "
-				 "command line\n");
+		pr_info("DMA-API: debugging disabled on kernel command line\n");
 		global_disable = true;
 	}
 
@@ -723,15 +1020,15 @@
 		entry->type           = dma_debug_sg;
 		entry->dev            = dev;
 		entry->paddr          = sg_phys(s);
-		entry->size           = s->length;
-		entry->dev_addr       = s->dma_address;
+		entry->size           = sg_dma_len(s);
+		entry->dev_addr       = sg_dma_address(s);
 		entry->direction      = direction;
 		entry->sg_call_ents   = nents;
 		entry->sg_mapped_ents = mapped_ents;
 
 		if (!PageHighMem(sg_page(s))) {
 			check_for_stack(dev, sg_virt(s));
-			check_for_illegal_area(dev, sg_virt(s), s->length);
+			check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
 		}
 
 		add_dma_entry(entry);
@@ -739,13 +1036,33 @@
 }
 EXPORT_SYMBOL(debug_dma_map_sg);
 
+static int get_nr_mapped_entries(struct device *dev, struct scatterlist *s)
+{
+	struct dma_debug_entry *entry, ref;
+	struct hash_bucket *bucket;
+	unsigned long flags;
+	int mapped_ents;
+
+	ref.dev      = dev;
+	ref.dev_addr = sg_dma_address(s);
+	ref.size     = sg_dma_len(s),
+
+	bucket       = get_hash_bucket(&ref, &flags);
+	entry        = hash_bucket_find(bucket, &ref);
+	mapped_ents  = 0;
+
+	if (entry)
+		mapped_ents = entry->sg_mapped_ents;
+	put_hash_bucket(bucket, &flags);
+
+	return mapped_ents;
+}
+
 void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 			int nelems, int dir)
 {
-	struct dma_debug_entry *entry;
 	struct scatterlist *s;
 	int mapped_ents = 0, i;
-	unsigned long flags;
 
 	if (unlikely(global_disable))
 		return;
@@ -756,8 +1073,8 @@
 			.type           = dma_debug_sg,
 			.dev            = dev,
 			.paddr          = sg_phys(s),
-			.dev_addr       = s->dma_address,
-			.size           = s->length,
+			.dev_addr       = sg_dma_address(s),
+			.size           = sg_dma_len(s),
 			.direction      = dir,
 			.sg_call_ents   = 0,
 		};
@@ -765,14 +1082,9 @@
 		if (mapped_ents && i >= mapped_ents)
 			break;
 
-		if (mapped_ents == 0) {
-			struct hash_bucket *bucket;
+		if (!i) {
 			ref.sg_call_ents = nelems;
-			bucket = get_hash_bucket(&ref, &flags);
-			entry = hash_bucket_find(bucket, &ref);
-			if (entry)
-				mapped_ents = entry->sg_mapped_ents;
-			put_hash_bucket(bucket, &flags);
+			mapped_ents = get_nr_mapped_entries(dev, s);
 		}
 
 		check_unmap(&ref);
@@ -874,14 +1186,20 @@
 			       int nelems, int direction)
 {
 	struct scatterlist *s;
-	int i;
+	int mapped_ents = 0, i;
 
 	if (unlikely(global_disable))
 		return;
 
 	for_each_sg(sg, s, nelems, i) {
-		check_sync(dev, s->dma_address, s->dma_length, 0,
-				direction, true);
+		if (!i)
+			mapped_ents = get_nr_mapped_entries(dev, s);
+
+		if (i >= mapped_ents)
+			break;
+
+		check_sync(dev, sg_dma_address(s), sg_dma_len(s), 0,
+			   direction, true);
 	}
 }
 EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
@@ -890,15 +1208,39 @@
 				  int nelems, int direction)
 {
 	struct scatterlist *s;
-	int i;
+	int mapped_ents = 0, i;
 
 	if (unlikely(global_disable))
 		return;
 
 	for_each_sg(sg, s, nelems, i) {
-		check_sync(dev, s->dma_address, s->dma_length, 0,
-				direction, false);
+		if (!i)
+			mapped_ents = get_nr_mapped_entries(dev, s);
+
+		if (i >= mapped_ents)
+			break;
+
+		check_sync(dev, sg_dma_address(s), sg_dma_len(s), 0,
+			   direction, false);
 	}
 }
 EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
 
+static int __init dma_debug_driver_setup(char *str)
+{
+	int i;
+
+	for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) {
+		current_driver_name[i] = *str;
+		if (*str == 0)
+			break;
+	}
+
+	if (current_driver_name[0])
+		pr_info("DMA-API: enable driver filter for driver [%s]\n",
+			current_driver_name);
+
+
+	return 1;
+}
+__setup("dma_debug_driver=", dma_debug_driver_setup);
diff --git a/lib/extable.c b/lib/extable.c
index 179c087..4cac81e 100644
--- a/lib/extable.c
+++ b/lib/extable.c
@@ -39,7 +39,26 @@
 	sort(start, finish - start, sizeof(struct exception_table_entry),
 	     cmp_ex, NULL);
 }
-#endif
+
+#ifdef CONFIG_MODULES
+/*
+ * If the exception table is sorted, any referring to the module init
+ * will be at the beginning or the end.
+ */
+void trim_init_extable(struct module *m)
+{
+	/*trim the beginning*/
+	while (m->num_exentries && within_module_init(m->extable[0].insn, m)) {
+		m->extable++;
+		m->num_exentries--;
+	}
+	/*trim the end*/
+	while (m->num_exentries &&
+		within_module_init(m->extable[m->num_exentries-1].insn, m))
+		m->num_exentries--;
+}
+#endif /* CONFIG_MODULES */
+#endif /* !ARCH_HAS_SORT_EXTABLE */
 
 #ifndef ARCH_HAS_SEARCH_EXTABLE
 /*
diff --git a/lib/rational.c b/lib/rational.c
new file mode 100644
index 0000000..b3c099b
--- /dev/null
+++ b/lib/rational.c
@@ -0,0 +1,62 @@
+/*
+ * rational fractions
+ *
+ * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com>
+ *
+ * helper functions when coping with rational numbers
+ */
+
+#include <linux/rational.h>
+
+/*
+ * calculate best rational approximation for a given fraction
+ * taking into account restricted register size, e.g. to find
+ * appropriate values for a pll with 5 bit denominator and
+ * 8 bit numerator register fields, trying to set up with a
+ * frequency ratio of 3.1415, one would say:
+ *
+ * rational_best_approximation(31415, 10000,
+ *		(1 << 8) - 1, (1 << 5) - 1, &n, &d);
+ *
+ * you may look at given_numerator as a fixed point number,
+ * with the fractional part size described in given_denominator.
+ *
+ * for theoretical background, see:
+ * http://en.wikipedia.org/wiki/Continued_fraction
+ */
+
+void rational_best_approximation(
+	unsigned long given_numerator, unsigned long given_denominator,
+	unsigned long max_numerator, unsigned long max_denominator,
+	unsigned long *best_numerator, unsigned long *best_denominator)
+{
+	unsigned long n, d, n0, d0, n1, d1;
+	n = given_numerator;
+	d = given_denominator;
+	n0 = d1 = 0;
+	n1 = d0 = 1;
+	for (;;) {
+		unsigned long t, a;
+		if ((n1 > max_numerator) || (d1 > max_denominator)) {
+			n1 = n0;
+			d1 = d0;
+			break;
+		}
+		if (d == 0)
+			break;
+		t = d;
+		a = n / d;
+		d = n % d;
+		n = t;
+		t = n0 + a * n1;
+		n0 = n1;
+		n1 = t;
+		t = d0 + a * d1;
+		d0 = d1;
+		d1 = t;
+	}
+	*best_numerator = n1;
+	*best_denominator = d1;
+}
+
+EXPORT_SYMBOL(rational_best_approximation);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 2b0b5a7..bffe6d7 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -60,8 +60,8 @@
 int swiotlb_force;
 
 /*
- * Used to do a quick range check in swiotlb_unmap_single and
- * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
+ * Used to do a quick range check in unmap_single and
+ * sync_single_*, to see if the memory was in fact allocated by this
  * API.
  */
 static char *io_tlb_start, *io_tlb_end;
@@ -129,7 +129,7 @@
 	return paddr;
 }
 
-phys_addr_t __weak swiotlb_bus_to_phys(dma_addr_t baddr)
+phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
 {
 	return baddr;
 }
@@ -140,9 +140,15 @@
 	return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
 }
 
-static void *swiotlb_bus_to_virt(dma_addr_t address)
+void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
 {
-	return phys_to_virt(swiotlb_bus_to_phys(address));
+	return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
+}
+
+int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
+					       dma_addr_t addr, size_t size)
+{
+	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
 }
 
 int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
@@ -309,10 +315,10 @@
 	return -ENOMEM;
 }
 
-static int
+static inline int
 address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
 {
-	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
+	return swiotlb_arch_address_needs_mapping(hwdev, addr, size);
 }
 
 static inline int range_needs_mapping(phys_addr_t paddr, size_t size)
@@ -341,7 +347,7 @@
 		unsigned long flags;
 
 		while (size) {
-			sz = min(PAGE_SIZE - offset, size);
+			sz = min_t(size_t, PAGE_SIZE - offset, size);
 
 			local_irq_save(flags);
 			buffer = kmap_atomic(pfn_to_page(pfn),
@@ -476,7 +482,7 @@
  * dma_addr is the kernel virtual address of the bounce buffer to unmap.
  */
 static void
-unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
 {
 	unsigned long flags;
 	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -560,7 +566,6 @@
 				   size)) {
 		/*
 		 * The allocated memory isn't reachable by the device.
-		 * Fall back on swiotlb_map_single().
 		 */
 		free_pages((unsigned long) ret, order);
 		ret = NULL;
@@ -568,9 +573,8 @@
 	if (!ret) {
 		/*
 		 * We are either out of memory or the device can't DMA
-		 * to GFP_DMA memory; fall back on
-		 * swiotlb_map_single(), which will grab memory from
-		 * the lowest available address range.
+		 * to GFP_DMA memory; fall back on map_single(), which
+		 * will grab memory from the lowest available address range.
 		 */
 		ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
 		if (!ret)
@@ -587,7 +591,7 @@
 		       (unsigned long long)dev_addr);
 
 		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-		unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
+		do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
 		return NULL;
 	}
 	*dma_handle = dev_addr;
@@ -604,7 +608,7 @@
 		free_pages((unsigned long) vaddr, get_order(size));
 	else
 		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-		unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
+		do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
 }
 EXPORT_SYMBOL(swiotlb_free_coherent);
 
@@ -634,7 +638,7 @@
  * physical address to use is returned.
  *
  * Once the device is given the dma address, the device owns this memory until
- * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
+ * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
  */
 dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 			    unsigned long offset, size_t size,
@@ -642,18 +646,17 @@
 			    struct dma_attrs *attrs)
 {
 	phys_addr_t phys = page_to_phys(page) + offset;
-	void *ptr = page_address(page) + offset;
 	dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys);
 	void *map;
 
 	BUG_ON(dir == DMA_NONE);
 	/*
-	 * If the pointer passed in happens to be in the device's DMA window,
+	 * If the address happens to be in the device's DMA window,
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
 	if (!address_needs_mapping(dev, dev_addr, size) &&
-	    !range_needs_mapping(virt_to_phys(ptr), size))
+	    !range_needs_mapping(phys, size))
 		return dev_addr;
 
 	/*
@@ -679,23 +682,35 @@
 
 /*
  * Unmap a single streaming mode DMA translation.  The dma_addr and size must
- * match what was provided for in a previous swiotlb_map_single call.  All
+ * match what was provided for in a previous swiotlb_map_page call.  All
  * other usages are undefined.
  *
  * After this call, reads by the cpu to the buffer are guaranteed to see
  * whatever the device wrote there.
  */
+static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
+			 size_t size, int dir)
+{
+	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+
+	BUG_ON(dir == DMA_NONE);
+
+	if (is_swiotlb_buffer(dma_addr)) {
+		do_unmap_single(hwdev, dma_addr, size, dir);
+		return;
+	}
+
+	if (dir != DMA_FROM_DEVICE)
+		return;
+
+	dma_mark_clean(dma_addr, size);
+}
+
 void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
 			size_t size, enum dma_data_direction dir,
 			struct dma_attrs *attrs)
 {
-	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
-
-	BUG_ON(dir == DMA_NONE);
-	if (is_swiotlb_buffer(dma_addr))
-		unmap_single(hwdev, dma_addr, size, dir);
-	else if (dir == DMA_FROM_DEVICE)
-		dma_mark_clean(dma_addr, size);
+	unmap_single(hwdev, dev_addr, size, dir);
 }
 EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
 
@@ -703,7 +718,7 @@
  * Make physical memory consistent for a single streaming mode DMA translation
  * after a transfer.
  *
- * If you perform a swiotlb_map_single() but wish to interrogate the buffer
+ * If you perform a swiotlb_map_page() but wish to interrogate the buffer
  * using the cpu, yet do not wish to teardown the dma mapping, you must
  * call this function before doing so.  At the next point you give the dma
  * address back to the card, you must first perform a
@@ -713,13 +728,19 @@
 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 		    size_t size, int dir, int target)
 {
-	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
+	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
-	if (is_swiotlb_buffer(dma_addr))
+
+	if (is_swiotlb_buffer(dma_addr)) {
 		sync_single(hwdev, dma_addr, size, dir, target);
-	else if (dir == DMA_FROM_DEVICE)
-		dma_mark_clean(dma_addr, size);
+		return;
+	}
+
+	if (dir != DMA_FROM_DEVICE)
+		return;
+
+	dma_mark_clean(dma_addr, size);
 }
 
 void
@@ -746,13 +767,7 @@
 			  unsigned long offset, size_t size,
 			  int dir, int target)
 {
-	char *dma_addr = swiotlb_bus_to_virt(dev_addr) + offset;
-
-	BUG_ON(dir == DMA_NONE);
-	if (is_swiotlb_buffer(dma_addr))
-		sync_single(hwdev, dma_addr, size, dir, target);
-	else if (dir == DMA_FROM_DEVICE)
-		dma_mark_clean(dma_addr, size);
+	swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
 }
 
 void
@@ -777,7 +792,7 @@
 
 /*
  * Map a set of buffers described by scatterlist in streaming mode for DMA.
- * This is the scatter-gather version of the above swiotlb_map_single
+ * This is the scatter-gather version of the above swiotlb_map_page
  * interface.  Here the scatter gather list elements are each tagged with the
  * appropriate dma address and length.  They are obtained via
  * sg_dma_{address,length}(SG).
@@ -788,7 +803,7 @@
  *       The routine returns the number of addr/length pairs actually
  *       used, at most nents.
  *
- * Device ownership issues as mentioned above for swiotlb_map_single are the
+ * Device ownership issues as mentioned above for swiotlb_map_page are the
  * same here.
  */
 int
@@ -836,7 +851,7 @@
 
 /*
  * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
- * concerning calls here are the same as for swiotlb_unmap_single() above.
+ * concerning calls here are the same as for swiotlb_unmap_page() above.
  */
 void
 swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
@@ -847,13 +862,9 @@
 
 	BUG_ON(dir == DMA_NONE);
 
-	for_each_sg(sgl, sg, nelems, i) {
-		if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg)))
-			unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
-				     sg->dma_length, dir);
-		else if (dir == DMA_FROM_DEVICE)
-			dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
-	}
+	for_each_sg(sgl, sg, nelems, i)
+		unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
+
 }
 EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
 
@@ -879,15 +890,9 @@
 	struct scatterlist *sg;
 	int i;
 
-	BUG_ON(dir == DMA_NONE);
-
-	for_each_sg(sgl, sg, nelems, i) {
-		if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg)))
-			sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
+	for_each_sg(sgl, sg, nelems, i)
+		swiotlb_sync_single(hwdev, sg->dma_address,
 				    sg->dma_length, dir, target);
-		else if (dir == DMA_FROM_DEVICE)
-			dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
-	}
 }
 
 void
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 7536ace..756ccaf 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -408,6 +408,8 @@
 	FORMAT_TYPE_LONG_LONG,
 	FORMAT_TYPE_ULONG,
 	FORMAT_TYPE_LONG,
+	FORMAT_TYPE_UBYTE,
+	FORMAT_TYPE_BYTE,
 	FORMAT_TYPE_USHORT,
 	FORMAT_TYPE_SHORT,
 	FORMAT_TYPE_UINT,
@@ -573,12 +575,15 @@
 }
 
 static char *symbol_string(char *buf, char *end, void *ptr,
-				struct printf_spec spec)
+				struct printf_spec spec, char ext)
 {
 	unsigned long value = (unsigned long) ptr;
 #ifdef CONFIG_KALLSYMS
 	char sym[KSYM_SYMBOL_LEN];
-	sprint_symbol(sym, value);
+	if (ext != 'f')
+		sprint_symbol(sym, value);
+	else
+		kallsyms_lookup(value, NULL, NULL, NULL, sym);
 	return string(buf, end, sym, spec);
 #else
 	spec.field_width = 2*sizeof(void *);
@@ -690,7 +695,8 @@
  *
  * Right now we handle:
  *
- * - 'F' For symbolic function descriptor pointers
+ * - 'F' For symbolic function descriptor pointers with offset
+ * - 'f' For simple symbolic function names without offset
  * - 'S' For symbolic direct pointers
  * - 'R' For a struct resource pointer, it prints the range of
  *       addresses (not the name nor the flags)
@@ -713,10 +719,11 @@
 
 	switch (*fmt) {
 	case 'F':
+	case 'f':
 		ptr = dereference_function_descriptor(ptr);
 		/* Fallthrough */
 	case 'S':
-		return symbol_string(buf, end, ptr, spec);
+		return symbol_string(buf, end, ptr, spec, *fmt);
 	case 'R':
 		return resource_string(buf, end, ptr, spec);
 	case 'm':
@@ -853,11 +860,15 @@
 	spec->qualifier = -1;
 	if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
 	    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
-		spec->qualifier = *fmt;
-		++fmt;
-		if (spec->qualifier == 'l' && *fmt == 'l') {
-			spec->qualifier = 'L';
-			++fmt;
+		spec->qualifier = *fmt++;
+		if (unlikely(spec->qualifier == *fmt)) {
+			if (spec->qualifier == 'l') {
+				spec->qualifier = 'L';
+				++fmt;
+			} else if (spec->qualifier == 'h') {
+				spec->qualifier = 'H';
+				++fmt;
+			}
 		}
 	}
 
@@ -919,6 +930,11 @@
 		spec->type = FORMAT_TYPE_SIZE_T;
 	} else if (spec->qualifier == 't') {
 		spec->type = FORMAT_TYPE_PTRDIFF;
+	} else if (spec->qualifier == 'H') {
+		if (spec->flags & SIGN)
+			spec->type = FORMAT_TYPE_BYTE;
+		else
+			spec->type = FORMAT_TYPE_UBYTE;
 	} else if (spec->qualifier == 'h') {
 		if (spec->flags & SIGN)
 			spec->type = FORMAT_TYPE_SHORT;
@@ -943,7 +959,8 @@
  *
  * This function follows C99 vsnprintf, but has some extensions:
  * %pS output the name of a text symbol
- * %pF output the name of a function pointer
+ * %pF output the name of a function pointer with its offset
+ * %pf output the name of a function pointer without its offset
  * %pR output the address range in a struct resource
  *
  * The return value is the number of characters which would
@@ -1087,6 +1104,12 @@
 			case FORMAT_TYPE_PTRDIFF:
 				num = va_arg(args, ptrdiff_t);
 				break;
+			case FORMAT_TYPE_UBYTE:
+				num = (unsigned char) va_arg(args, int);
+				break;
+			case FORMAT_TYPE_BYTE:
+				num = (signed char) va_arg(args, int);
+				break;
 			case FORMAT_TYPE_USHORT:
 				num = (unsigned short) va_arg(args, int);
 				break;
@@ -1363,6 +1386,10 @@
 			case FORMAT_TYPE_PTRDIFF:
 				save_arg(ptrdiff_t);
 				break;
+			case FORMAT_TYPE_UBYTE:
+			case FORMAT_TYPE_BYTE:
+				save_arg(char);
+				break;
 			case FORMAT_TYPE_USHORT:
 			case FORMAT_TYPE_SHORT:
 				save_arg(short);
@@ -1391,7 +1418,8 @@
  *
  * The format follows C99 vsnprintf, but has some extensions:
  * %pS output the name of a text symbol
- * %pF output the name of a function pointer
+ * %pF output the name of a function pointer with its offset
+ * %pf output the name of a function pointer without its offset
  * %pR output the address range in a struct resource
  * %n is ignored
  *
@@ -1538,6 +1566,12 @@
 			case FORMAT_TYPE_PTRDIFF:
 				num = get_arg(ptrdiff_t);
 				break;
+			case FORMAT_TYPE_UBYTE:
+				num = get_arg(unsigned char);
+				break;
+			case FORMAT_TYPE_BYTE:
+				num = get_arg(signed char);
+				break;
 			case FORMAT_TYPE_USHORT:
 				num = get_arg(unsigned short);
 				break;
diff --git a/mm/Kconfig b/mm/Kconfig
index c2b57d8..71830ba 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -226,6 +226,25 @@
 config MMU_NOTIFIER
 	bool
 
+config DEFAULT_MMAP_MIN_ADDR
+        int "Low address space to protect from user allocation"
+        default 4096
+        help
+	  This is the portion of low virtual memory which should be protected
+	  from userspace allocation.  Keeping a user from writing to low pages
+	  can help reduce the impact of kernel NULL pointer bugs.
+
+	  For most ia64, ppc64 and x86 users with lots of address space
+	  a value of 65536 is reasonable and should cause no problems.
+	  On arm and other archs it should not be higher than 32768.
+	  Programs which use vm86 functionality would either need additional
+	  permissions from either the LSM or the capabilities module or have
+	  this protection disabled.
+
+	  This value can be changed after boot using the
+	  /proc/sys/vm/mmap_min_addr tunable.
+
+
 config NOMMU_INITIAL_TRIM_EXCESS
 	int "Turn on mmap() excess space trimming before booting"
 	depends on !MMU
diff --git a/mm/Makefile b/mm/Makefile
index ec73c68..e89acb0 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -38,3 +38,5 @@
 endif
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
+obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
+obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
diff --git a/mm/bootmem.c b/mm/bootmem.c
index daf9271..282df0a 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -532,6 +532,9 @@
 					unsigned long size, unsigned long align,
 					unsigned long goal, unsigned long limit)
 {
+	if (WARN_ON_ONCE(slab_is_available()))
+		return kzalloc(size, GFP_NOWAIT);
+
 #ifdef CONFIG_HAVE_ARCH_BOOTMEM
 	bootmem_data_t *p_bdata;
 
@@ -662,6 +665,9 @@
 void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 				   unsigned long align, unsigned long goal)
 {
+	if (WARN_ON_ONCE(slab_is_available()))
+		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
 	return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
 }
 
@@ -693,6 +699,9 @@
 {
 	void *ptr;
 
+	if (WARN_ON_ONCE(slab_is_available()))
+		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
 	ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
 	if (ptr)
 		return ptr;
@@ -745,6 +754,9 @@
 void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
 				       unsigned long align, unsigned long goal)
 {
+	if (WARN_ON_ONCE(slab_is_available()))
+		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
 	return ___alloc_bootmem_node(pgdat->bdata, size, align,
 				goal, ARCH_LOW_ADDRESS_LIMIT);
 }
diff --git a/mm/bounce.c b/mm/bounce.c
index e590272..4ebe3ea 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -14,16 +14,15 @@
 #include <linux/hash.h>
 #include <linux/highmem.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
 #include <asm/tlbflush.h>
 
+#include <trace/events/block.h>
+
 #define POOL_SIZE	64
 #define ISA_POOL_SIZE	16
 
 static mempool_t *page_pool, *isa_page_pool;
 
-DEFINE_TRACE(block_bio_bounce);
-
 #ifdef CONFIG_HIGHMEM
 static __init int init_emergency_pool(void)
 {
@@ -192,7 +191,7 @@
 		/*
 		 * is destination page below bounce pfn?
 		 */
-		if (page_to_pfn(page) <= q->bounce_pfn)
+		if (page_to_pfn(page) <= queue_bounce_pfn(q))
 			continue;
 
 		/*
@@ -284,7 +283,7 @@
 	 * don't waste time iterating over bio segments
 	 */
 	if (!(q->bounce_gfp & GFP_DMA)) {
-		if (q->bounce_pfn >= blk_max_pfn)
+		if (queue_bounce_pfn(q) >= blk_max_pfn)
 			return;
 		pool = page_pool;
 	} else {
diff --git a/mm/kmemleak-test.c b/mm/kmemleak-test.c
new file mode 100644
index 0000000..d5292fc
--- /dev/null
+++ b/mm/kmemleak-test.c
@@ -0,0 +1,111 @@
+/*
+ * mm/kmemleak-test.c
+ *
+ * Copyright (C) 2008 ARM Limited
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+#include <linux/fdtable.h>
+
+#include <linux/kmemleak.h>
+
+struct test_node {
+	long header[25];
+	struct list_head list;
+	long footer[25];
+};
+
+static LIST_HEAD(test_list);
+static DEFINE_PER_CPU(void *, test_pointer);
+
+/*
+ * Some very simple testing. This function needs to be extended for
+ * proper testing.
+ */
+static int __init kmemleak_test_init(void)
+{
+	struct test_node *elem;
+	int i;
+
+	printk(KERN_INFO "Kmemleak testing\n");
+
+	/* make some orphan objects */
+	pr_info("kmemleak: kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL));
+	pr_info("kmemleak: kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL));
+	pr_info("kmemleak: kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL));
+	pr_info("kmemleak: kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL));
+	pr_info("kmemleak: kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL));
+	pr_info("kmemleak: kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL));
+	pr_info("kmemleak: kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL));
+	pr_info("kmemleak: kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL));
+#ifndef CONFIG_MODULES
+	pr_info("kmemleak: kmem_cache_alloc(files_cachep) = %p\n",
+		kmem_cache_alloc(files_cachep, GFP_KERNEL));
+	pr_info("kmemleak: kmem_cache_alloc(files_cachep) = %p\n",
+		kmem_cache_alloc(files_cachep, GFP_KERNEL));
+#endif
+	pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64));
+	pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64));
+	pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64));
+	pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64));
+	pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64));
+
+	/*
+	 * Add elements to a list. They should only appear as orphan
+	 * after the module is removed.
+	 */
+	for (i = 0; i < 10; i++) {
+		elem = kmalloc(sizeof(*elem), GFP_KERNEL);
+		pr_info("kmemleak: kmalloc(sizeof(*elem)) = %p\n", elem);
+		if (!elem)
+			return -ENOMEM;
+		memset(elem, 0, sizeof(*elem));
+		INIT_LIST_HEAD(&elem->list);
+
+		list_add_tail(&elem->list, &test_list);
+	}
+
+	for_each_possible_cpu(i) {
+		per_cpu(test_pointer, i) = kmalloc(129, GFP_KERNEL);
+		pr_info("kmemleak: kmalloc(129) = %p\n",
+			per_cpu(test_pointer, i));
+	}
+
+	return 0;
+}
+module_init(kmemleak_test_init);
+
+static void __exit kmemleak_test_exit(void)
+{
+	struct test_node *elem, *tmp;
+
+	/*
+	 * Remove the list elements without actually freeing the
+	 * memory.
+	 */
+	list_for_each_entry_safe(elem, tmp, &test_list, list)
+		list_del(&elem->list);
+}
+module_exit(kmemleak_test_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
new file mode 100644
index 0000000..58ec86c
--- /dev/null
+++ b/mm/kmemleak.c
@@ -0,0 +1,1498 @@
+/*
+ * mm/kmemleak.c
+ *
+ * Copyright (C) 2008 ARM Limited
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ * For more information on the algorithm and kmemleak usage, please see
+ * Documentation/kmemleak.txt.
+ *
+ * Notes on locking
+ * ----------------
+ *
+ * The following locks and mutexes are used by kmemleak:
+ *
+ * - kmemleak_lock (rwlock): protects the object_list modifications and
+ *   accesses to the object_tree_root. The object_list is the main list
+ *   holding the metadata (struct kmemleak_object) for the allocated memory
+ *   blocks. The object_tree_root is a priority search tree used to look-up
+ *   metadata based on a pointer to the corresponding memory block.  The
+ *   kmemleak_object structures are added to the object_list and
+ *   object_tree_root in the create_object() function called from the
+ *   kmemleak_alloc() callback and removed in delete_object() called from the
+ *   kmemleak_free() callback
+ * - kmemleak_object.lock (spinlock): protects a kmemleak_object. Accesses to
+ *   the metadata (e.g. count) are protected by this lock. Note that some
+ *   members of this structure may be protected by other means (atomic or
+ *   kmemleak_lock). This lock is also held when scanning the corresponding
+ *   memory block to avoid the kernel freeing it via the kmemleak_free()
+ *   callback. This is less heavyweight than holding a global lock like
+ *   kmemleak_lock during scanning
+ * - scan_mutex (mutex): ensures that only one thread may scan the memory for
+ *   unreferenced objects at a time. The gray_list contains the objects which
+ *   are already referenced or marked as false positives and need to be
+ *   scanned. This list is only modified during a scanning episode when the
+ *   scan_mutex is held. At the end of a scan, the gray_list is always empty.
+ *   Note that the kmemleak_object.use_count is incremented when an object is
+ *   added to the gray_list and therefore cannot be freed
+ * - kmemleak_mutex (mutex): prevents multiple users of the "kmemleak" debugfs
+ *   file together with modifications to the memory scanning parameters
+ *   including the scan_thread pointer
+ *
+ * The kmemleak_object structures have a use_count incremented or decremented
+ * using the get_object()/put_object() functions. When the use_count becomes
+ * 0, this count can no longer be incremented and put_object() schedules the
+ * kmemleak_object freeing via an RCU callback. All calls to the get_object()
+ * function must be protected by rcu_read_lock() to avoid accessing a freed
+ * structure.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/jiffies.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/prio_tree.h>
+#include <linux/gfp.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/rcupdate.h>
+#include <linux/stacktrace.h>
+#include <linux/cache.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/mmzone.h>
+#include <linux/slab.h>
+#include <linux/thread_info.h>
+#include <linux/err.h>
+#include <linux/uaccess.h>
+#include <linux/string.h>
+#include <linux/nodemask.h>
+#include <linux/mm.h>
+
+#include <asm/sections.h>
+#include <asm/processor.h>
+#include <asm/atomic.h>
+
+#include <linux/kmemleak.h>
+
+/*
+ * Kmemleak configuration and common defines.
+ */
+#define MAX_TRACE		16	/* stack trace length */
+#define REPORTS_NR		50	/* maximum number of reported leaks */
+#define MSECS_MIN_AGE		5000	/* minimum object age for reporting */
+#define MSECS_SCAN_YIELD	10	/* CPU yielding period */
+#define SECS_FIRST_SCAN		60	/* delay before the first scan */
+#define SECS_SCAN_WAIT		600	/* subsequent auto scanning delay */
+
+#define BYTES_PER_POINTER	sizeof(void *)
+
+/* scanning area inside a memory block */
+struct kmemleak_scan_area {
+	struct hlist_node node;
+	unsigned long offset;
+	size_t length;
+};
+
+/*
+ * Structure holding the metadata for each allocated memory block.
+ * Modifications to such objects should be made while holding the
+ * object->lock. Insertions or deletions from object_list, gray_list or
+ * tree_node are already protected by the corresponding locks or mutex (see
+ * the notes on locking above). These objects are reference-counted
+ * (use_count) and freed using the RCU mechanism.
+ */
+struct kmemleak_object {
+	spinlock_t lock;
+	unsigned long flags;		/* object status flags */
+	struct list_head object_list;
+	struct list_head gray_list;
+	struct prio_tree_node tree_node;
+	struct rcu_head rcu;		/* object_list lockless traversal */
+	/* object usage count; object freed when use_count == 0 */
+	atomic_t use_count;
+	unsigned long pointer;
+	size_t size;
+	/* minimum number of a pointers found before it is considered leak */
+	int min_count;
+	/* the total number of pointers found pointing to this object */
+	int count;
+	/* memory ranges to be scanned inside an object (empty for all) */
+	struct hlist_head area_list;
+	unsigned long trace[MAX_TRACE];
+	unsigned int trace_len;
+	unsigned long jiffies;		/* creation timestamp */
+	pid_t pid;			/* pid of the current task */
+	char comm[TASK_COMM_LEN];	/* executable name */
+};
+
+/* flag representing the memory block allocation status */
+#define OBJECT_ALLOCATED	(1 << 0)
+/* flag set after the first reporting of an unreference object */
+#define OBJECT_REPORTED		(1 << 1)
+/* flag set to not scan the object */
+#define OBJECT_NO_SCAN		(1 << 2)
+
+/* the list of all allocated objects */
+static LIST_HEAD(object_list);
+/* the list of gray-colored objects (see color_gray comment below) */
+static LIST_HEAD(gray_list);
+/* prio search tree for object boundaries */
+static struct prio_tree_root object_tree_root;
+/* rw_lock protecting the access to object_list and prio_tree_root */
+static DEFINE_RWLOCK(kmemleak_lock);
+
+/* allocation caches for kmemleak internal data */
+static struct kmem_cache *object_cache;
+static struct kmem_cache *scan_area_cache;
+
+/* set if tracing memory operations is enabled */
+static atomic_t kmemleak_enabled = ATOMIC_INIT(0);
+/* set in the late_initcall if there were no errors */
+static atomic_t kmemleak_initialized = ATOMIC_INIT(0);
+/* enables or disables early logging of the memory operations */
+static atomic_t kmemleak_early_log = ATOMIC_INIT(1);
+/* set if a fata kmemleak error has occurred */
+static atomic_t kmemleak_error = ATOMIC_INIT(0);
+
+/* minimum and maximum address that may be valid pointers */
+static unsigned long min_addr = ULONG_MAX;
+static unsigned long max_addr;
+
+/* used for yielding the CPU to other tasks during scanning */
+static unsigned long next_scan_yield;
+static struct task_struct *scan_thread;
+static unsigned long jiffies_scan_yield;
+static unsigned long jiffies_min_age;
+/* delay between automatic memory scannings */
+static signed long jiffies_scan_wait;
+/* enables or disables the task stacks scanning */
+static int kmemleak_stack_scan;
+/* mutex protecting the memory scanning */
+static DEFINE_MUTEX(scan_mutex);
+/* mutex protecting the access to the /sys/kernel/debug/kmemleak file */
+static DEFINE_MUTEX(kmemleak_mutex);
+
+/* number of leaks reported (for limitation purposes) */
+static int reported_leaks;
+
+/*
+ * Early object allocation/freeing logging. Kkmemleak is initialized after the
+ * kernel allocator. However, both the kernel allocator and kmemleak may
+ * allocate memory blocks which need to be tracked. Kkmemleak defines an
+ * arbitrary buffer to hold the allocation/freeing information before it is
+ * fully initialized.
+ */
+
+/* kmemleak operation type for early logging */
+enum {
+	KMEMLEAK_ALLOC,
+	KMEMLEAK_FREE,
+	KMEMLEAK_NOT_LEAK,
+	KMEMLEAK_IGNORE,
+	KMEMLEAK_SCAN_AREA,
+	KMEMLEAK_NO_SCAN
+};
+
+/*
+ * Structure holding the information passed to kmemleak callbacks during the
+ * early logging.
+ */
+struct early_log {
+	int op_type;			/* kmemleak operation type */
+	const void *ptr;		/* allocated/freed memory block */
+	size_t size;			/* memory block size */
+	int min_count;			/* minimum reference count */
+	unsigned long offset;		/* scan area offset */
+	size_t length;			/* scan area length */
+};
+
+/* early logging buffer and current position */
+static struct early_log early_log[200];
+static int crt_early_log;
+
+static void kmemleak_disable(void);
+
+/*
+ * Print a warning and dump the stack trace.
+ */
+#define kmemleak_warn(x...)	do {	\
+	pr_warning(x);			\
+	dump_stack();			\
+} while (0)
+
+/*
+ * Macro invoked when a serious kmemleak condition occured and cannot be
+ * recovered from. Kkmemleak will be disabled and further allocation/freeing
+ * tracing no longer available.
+ */
+#define kmemleak_panic(x...)	do {	\
+	kmemleak_warn(x);		\
+	kmemleak_disable();		\
+} while (0)
+
+/*
+ * Object colors, encoded with count and min_count:
+ * - white - orphan object, not enough references to it (count < min_count)
+ * - gray  - not orphan, not marked as false positive (min_count == 0) or
+ *		sufficient references to it (count >= min_count)
+ * - black - ignore, it doesn't contain references (e.g. text section)
+ *		(min_count == -1). No function defined for this color.
+ * Newly created objects don't have any color assigned (object->count == -1)
+ * before the next memory scan when they become white.
+ */
+static int color_white(const struct kmemleak_object *object)
+{
+	return object->count != -1 && object->count < object->min_count;
+}
+
+static int color_gray(const struct kmemleak_object *object)
+{
+	return object->min_count != -1 && object->count >= object->min_count;
+}
+
+/*
+ * Objects are considered referenced if their color is gray and they have not
+ * been deleted.
+ */
+static int referenced_object(struct kmemleak_object *object)
+{
+	return (object->flags & OBJECT_ALLOCATED) && color_gray(object);
+}
+
+/*
+ * Objects are considered unreferenced only if their color is white, they have
+ * not be deleted and have a minimum age to avoid false positives caused by
+ * pointers temporarily stored in CPU registers.
+ */
+static int unreferenced_object(struct kmemleak_object *object)
+{
+	return (object->flags & OBJECT_ALLOCATED) && color_white(object) &&
+		time_is_before_eq_jiffies(object->jiffies + jiffies_min_age);
+}
+
+/*
+ * Printing of the (un)referenced objects information, either to the seq file
+ * or to the kernel log. The print_referenced/print_unreferenced functions
+ * must be called with the object->lock held.
+ */
+#define print_helper(seq, x...)	do {	\
+	struct seq_file *s = (seq);	\
+	if (s)				\
+		seq_printf(s, x);	\
+	else				\
+		pr_info(x);		\
+} while (0)
+
+static void print_referenced(struct kmemleak_object *object)
+{
+	pr_info("kmemleak: referenced object 0x%08lx (size %zu)\n",
+		object->pointer, object->size);
+}
+
+static void print_unreferenced(struct seq_file *seq,
+			       struct kmemleak_object *object)
+{
+	int i;
+
+	print_helper(seq, "kmemleak: unreferenced object 0x%08lx (size %zu):\n",
+		     object->pointer, object->size);
+	print_helper(seq, "  comm \"%s\", pid %d, jiffies %lu\n",
+		     object->comm, object->pid, object->jiffies);
+	print_helper(seq, "  backtrace:\n");
+
+	for (i = 0; i < object->trace_len; i++) {
+		void *ptr = (void *)object->trace[i];
+		print_helper(seq, "    [<%p>] %pS\n", ptr, ptr);
+	}
+}
+
+/*
+ * Print the kmemleak_object information. This function is used mainly for
+ * debugging special cases when kmemleak operations. It must be called with
+ * the object->lock held.
+ */
+static void dump_object_info(struct kmemleak_object *object)
+{
+	struct stack_trace trace;
+
+	trace.nr_entries = object->trace_len;
+	trace.entries = object->trace;
+
+	pr_notice("kmemleak: Object 0x%08lx (size %zu):\n",
+		  object->tree_node.start, object->size);
+	pr_notice("  comm \"%s\", pid %d, jiffies %lu\n",
+		  object->comm, object->pid, object->jiffies);
+	pr_notice("  min_count = %d\n", object->min_count);
+	pr_notice("  count = %d\n", object->count);
+	pr_notice("  backtrace:\n");
+	print_stack_trace(&trace, 4);
+}
+
+/*
+ * Look-up a memory block metadata (kmemleak_object) in the priority search
+ * tree based on a pointer value. If alias is 0, only values pointing to the
+ * beginning of the memory block are allowed. The kmemleak_lock must be held
+ * when calling this function.
+ */
+static struct kmemleak_object *lookup_object(unsigned long ptr, int alias)
+{
+	struct prio_tree_node *node;
+	struct prio_tree_iter iter;
+	struct kmemleak_object *object;
+
+	prio_tree_iter_init(&iter, &object_tree_root, ptr, ptr);
+	node = prio_tree_next(&iter);
+	if (node) {
+		object = prio_tree_entry(node, struct kmemleak_object,
+					 tree_node);
+		if (!alias && object->pointer != ptr) {
+			kmemleak_warn("kmemleak: Found object by alias");
+			object = NULL;
+		}
+	} else
+		object = NULL;
+
+	return object;
+}
+
+/*
+ * Increment the object use_count. Return 1 if successful or 0 otherwise. Note
+ * that once an object's use_count reached 0, the RCU freeing was already
+ * registered and the object should no longer be used. This function must be
+ * called under the protection of rcu_read_lock().
+ */
+static int get_object(struct kmemleak_object *object)
+{
+	return atomic_inc_not_zero(&object->use_count);
+}
+
+/*
+ * RCU callback to free a kmemleak_object.
+ */
+static void free_object_rcu(struct rcu_head *rcu)
+{
+	struct hlist_node *elem, *tmp;
+	struct kmemleak_scan_area *area;
+	struct kmemleak_object *object =
+		container_of(rcu, struct kmemleak_object, rcu);
+
+	/*
+	 * Once use_count is 0 (guaranteed by put_object), there is no other
+	 * code accessing this object, hence no need for locking.
+	 */
+	hlist_for_each_entry_safe(area, elem, tmp, &object->area_list, node) {
+		hlist_del(elem);
+		kmem_cache_free(scan_area_cache, area);
+	}
+	kmem_cache_free(object_cache, object);
+}
+
+/*
+ * Decrement the object use_count. Once the count is 0, free the object using
+ * an RCU callback. Since put_object() may be called via the kmemleak_free() ->
+ * delete_object() path, the delayed RCU freeing ensures that there is no
+ * recursive call to the kernel allocator. Lock-less RCU object_list traversal
+ * is also possible.
+ */
+static void put_object(struct kmemleak_object *object)
+{
+	if (!atomic_dec_and_test(&object->use_count))
+		return;
+
+	/* should only get here after delete_object was called */
+	WARN_ON(object->flags & OBJECT_ALLOCATED);
+
+	call_rcu(&object->rcu, free_object_rcu);
+}
+
+/*
+ * Look up an object in the prio search tree and increase its use_count.
+ */
+static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
+{
+	unsigned long flags;
+	struct kmemleak_object *object = NULL;
+
+	rcu_read_lock();
+	read_lock_irqsave(&kmemleak_lock, flags);
+	if (ptr >= min_addr && ptr < max_addr)
+		object = lookup_object(ptr, alias);
+	read_unlock_irqrestore(&kmemleak_lock, flags);
+
+	/* check whether the object is still available */
+	if (object && !get_object(object))
+		object = NULL;
+	rcu_read_unlock();
+
+	return object;
+}
+
+/*
+ * Create the metadata (struct kmemleak_object) corresponding to an allocated
+ * memory block and add it to the object_list and object_tree_root.
+ */
+static void create_object(unsigned long ptr, size_t size, int min_count,
+			  gfp_t gfp)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+	struct prio_tree_node *node;
+	struct stack_trace trace;
+
+	object = kmem_cache_alloc(object_cache, gfp & ~GFP_SLAB_BUG_MASK);
+	if (!object) {
+		kmemleak_panic("kmemleak: Cannot allocate a kmemleak_object "
+			       "structure\n");
+		return;
+	}
+
+	INIT_LIST_HEAD(&object->object_list);
+	INIT_LIST_HEAD(&object->gray_list);
+	INIT_HLIST_HEAD(&object->area_list);
+	spin_lock_init(&object->lock);
+	atomic_set(&object->use_count, 1);
+	object->flags = OBJECT_ALLOCATED;
+	object->pointer = ptr;
+	object->size = size;
+	object->min_count = min_count;
+	object->count = -1;			/* no color initially */
+	object->jiffies = jiffies;
+
+	/* task information */
+	if (in_irq()) {
+		object->pid = 0;
+		strncpy(object->comm, "hardirq", sizeof(object->comm));
+	} else if (in_softirq()) {
+		object->pid = 0;
+		strncpy(object->comm, "softirq", sizeof(object->comm));
+	} else {
+		object->pid = current->pid;
+		/*
+		 * There is a small chance of a race with set_task_comm(),
+		 * however using get_task_comm() here may cause locking
+		 * dependency issues with current->alloc_lock. In the worst
+		 * case, the command line is not correct.
+		 */
+		strncpy(object->comm, current->comm, sizeof(object->comm));
+	}
+
+	/* kernel backtrace */
+	trace.max_entries = MAX_TRACE;
+	trace.nr_entries = 0;
+	trace.entries = object->trace;
+	trace.skip = 1;
+	save_stack_trace(&trace);
+	object->trace_len = trace.nr_entries;
+
+	INIT_PRIO_TREE_NODE(&object->tree_node);
+	object->tree_node.start = ptr;
+	object->tree_node.last = ptr + size - 1;
+
+	write_lock_irqsave(&kmemleak_lock, flags);
+	min_addr = min(min_addr, ptr);
+	max_addr = max(max_addr, ptr + size);
+	node = prio_tree_insert(&object_tree_root, &object->tree_node);
+	/*
+	 * The code calling the kernel does not yet have the pointer to the
+	 * memory block to be able to free it.  However, we still hold the
+	 * kmemleak_lock here in case parts of the kernel started freeing
+	 * random memory blocks.
+	 */
+	if (node != &object->tree_node) {
+		unsigned long flags;
+
+		kmemleak_panic("kmemleak: Cannot insert 0x%lx into the object "
+			       "search tree (already existing)\n", ptr);
+		object = lookup_object(ptr, 1);
+		spin_lock_irqsave(&object->lock, flags);
+		dump_object_info(object);
+		spin_unlock_irqrestore(&object->lock, flags);
+
+		goto out;
+	}
+	list_add_tail_rcu(&object->object_list, &object_list);
+out:
+	write_unlock_irqrestore(&kmemleak_lock, flags);
+}
+
+/*
+ * Remove the metadata (struct kmemleak_object) for a memory block from the
+ * object_list and object_tree_root and decrement its use_count.
+ */
+static void delete_object(unsigned long ptr)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+
+	write_lock_irqsave(&kmemleak_lock, flags);
+	object = lookup_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("kmemleak: Freeing unknown object at 0x%08lx\n",
+			      ptr);
+		write_unlock_irqrestore(&kmemleak_lock, flags);
+		return;
+	}
+	prio_tree_remove(&object_tree_root, &object->tree_node);
+	list_del_rcu(&object->object_list);
+	write_unlock_irqrestore(&kmemleak_lock, flags);
+
+	WARN_ON(!(object->flags & OBJECT_ALLOCATED));
+	WARN_ON(atomic_read(&object->use_count) < 1);
+
+	/*
+	 * Locking here also ensures that the corresponding memory block
+	 * cannot be freed when it is being scanned.
+	 */
+	spin_lock_irqsave(&object->lock, flags);
+	if (object->flags & OBJECT_REPORTED)
+		print_referenced(object);
+	object->flags &= ~OBJECT_ALLOCATED;
+	spin_unlock_irqrestore(&object->lock, flags);
+	put_object(object);
+}
+
+/*
+ * Make a object permanently as gray-colored so that it can no longer be
+ * reported as a leak. This is used in general to mark a false positive.
+ */
+static void make_gray_object(unsigned long ptr)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+
+	object = find_and_get_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("kmemleak: Graying unknown object at 0x%08lx\n",
+			      ptr);
+		return;
+	}
+
+	spin_lock_irqsave(&object->lock, flags);
+	object->min_count = 0;
+	spin_unlock_irqrestore(&object->lock, flags);
+	put_object(object);
+}
+
+/*
+ * Mark the object as black-colored so that it is ignored from scans and
+ * reporting.
+ */
+static void make_black_object(unsigned long ptr)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+
+	object = find_and_get_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("kmemleak: Blacking unknown object at 0x%08lx\n",
+			      ptr);
+		return;
+	}
+
+	spin_lock_irqsave(&object->lock, flags);
+	object->min_count = -1;
+	spin_unlock_irqrestore(&object->lock, flags);
+	put_object(object);
+}
+
+/*
+ * Add a scanning area to the object. If at least one such area is added,
+ * kmemleak will only scan these ranges rather than the whole memory block.
+ */
+static void add_scan_area(unsigned long ptr, unsigned long offset,
+			  size_t length, gfp_t gfp)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+	struct kmemleak_scan_area *area;
+
+	object = find_and_get_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("kmemleak: Adding scan area to unknown "
+			      "object at 0x%08lx\n", ptr);
+		return;
+	}
+
+	area = kmem_cache_alloc(scan_area_cache, gfp & ~GFP_SLAB_BUG_MASK);
+	if (!area) {
+		kmemleak_warn("kmemleak: Cannot allocate a scan area\n");
+		goto out;
+	}
+
+	spin_lock_irqsave(&object->lock, flags);
+	if (offset + length > object->size) {
+		kmemleak_warn("kmemleak: Scan area larger than object "
+			      "0x%08lx\n", ptr);
+		dump_object_info(object);
+		kmem_cache_free(scan_area_cache, area);
+		goto out_unlock;
+	}
+
+	INIT_HLIST_NODE(&area->node);
+	area->offset = offset;
+	area->length = length;
+
+	hlist_add_head(&area->node, &object->area_list);
+out_unlock:
+	spin_unlock_irqrestore(&object->lock, flags);
+out:
+	put_object(object);
+}
+
+/*
+ * Set the OBJECT_NO_SCAN flag for the object corresponding to the give
+ * pointer. Such object will not be scanned by kmemleak but references to it
+ * are searched.
+ */
+static void object_no_scan(unsigned long ptr)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+
+	object = find_and_get_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("kmemleak: Not scanning unknown object at "
+			      "0x%08lx\n", ptr);
+		return;
+	}
+
+	spin_lock_irqsave(&object->lock, flags);
+	object->flags |= OBJECT_NO_SCAN;
+	spin_unlock_irqrestore(&object->lock, flags);
+	put_object(object);
+}
+
+/*
+ * Log an early kmemleak_* call to the early_log buffer. These calls will be
+ * processed later once kmemleak is fully initialized.
+ */
+static void log_early(int op_type, const void *ptr, size_t size,
+		      int min_count, unsigned long offset, size_t length)
+{
+	unsigned long flags;
+	struct early_log *log;
+
+	if (crt_early_log >= ARRAY_SIZE(early_log)) {
+		kmemleak_panic("kmemleak: Early log buffer exceeded\n");
+		return;
+	}
+
+	/*
+	 * There is no need for locking since the kernel is still in UP mode
+	 * at this stage. Disabling the IRQs is enough.
+	 */
+	local_irq_save(flags);
+	log = &early_log[crt_early_log];
+	log->op_type = op_type;
+	log->ptr = ptr;
+	log->size = size;
+	log->min_count = min_count;
+	log->offset = offset;
+	log->length = length;
+	crt_early_log++;
+	local_irq_restore(flags);
+}
+
+/*
+ * Memory allocation function callback. This function is called from the
+ * kernel allocators when a new block is allocated (kmem_cache_alloc, kmalloc,
+ * vmalloc etc.).
+ */
+void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp)
+{
+	pr_debug("%s(0x%p, %zu, %d)\n", __func__, ptr, size, min_count);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		create_object((unsigned long)ptr, size, min_count, gfp);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_ALLOC, ptr, size, min_count, 0, 0);
+}
+EXPORT_SYMBOL_GPL(kmemleak_alloc);
+
+/*
+ * Memory freeing function callback. This function is called from the kernel
+ * allocators when a block is freed (kmem_cache_free, kfree, vfree etc.).
+ */
+void kmemleak_free(const void *ptr)
+{
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		delete_object((unsigned long)ptr);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(kmemleak_free);
+
+/*
+ * Mark an already allocated memory block as a false positive. This will cause
+ * the block to no longer be reported as leak and always be scanned.
+ */
+void kmemleak_not_leak(const void *ptr)
+{
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		make_gray_object((unsigned long)ptr);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0, 0, 0);
+}
+EXPORT_SYMBOL(kmemleak_not_leak);
+
+/*
+ * Ignore a memory block. This is usually done when it is known that the
+ * corresponding block is not a leak and does not contain any references to
+ * other allocated memory blocks.
+ */
+void kmemleak_ignore(const void *ptr)
+{
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		make_black_object((unsigned long)ptr);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_IGNORE, ptr, 0, 0, 0, 0);
+}
+EXPORT_SYMBOL(kmemleak_ignore);
+
+/*
+ * Limit the range to be scanned in an allocated memory block.
+ */
+void kmemleak_scan_area(const void *ptr, unsigned long offset, size_t length,
+			gfp_t gfp)
+{
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		add_scan_area((unsigned long)ptr, offset, length, gfp);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_SCAN_AREA, ptr, 0, 0, offset, length);
+}
+EXPORT_SYMBOL(kmemleak_scan_area);
+
+/*
+ * Inform kmemleak not to scan the given memory block.
+ */
+void kmemleak_no_scan(const void *ptr)
+{
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		object_no_scan((unsigned long)ptr);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0, 0, 0);
+}
+EXPORT_SYMBOL(kmemleak_no_scan);
+
+/*
+ * Yield the CPU so that other tasks get a chance to run.  The yielding is
+ * rate-limited to avoid excessive number of calls to the schedule() function
+ * during memory scanning.
+ */
+static void scan_yield(void)
+{
+	might_sleep();
+
+	if (time_is_before_eq_jiffies(next_scan_yield)) {
+		schedule();
+		next_scan_yield = jiffies + jiffies_scan_yield;
+	}
+}
+
+/*
+ * Memory scanning is a long process and it needs to be interruptable. This
+ * function checks whether such interrupt condition occured.
+ */
+static int scan_should_stop(void)
+{
+	if (!atomic_read(&kmemleak_enabled))
+		return 1;
+
+	/*
+	 * This function may be called from either process or kthread context,
+	 * hence the need to check for both stop conditions.
+	 */
+	if (current->mm)
+		return signal_pending(current);
+	else
+		return kthread_should_stop();
+
+	return 0;
+}
+
+/*
+ * Scan a memory block (exclusive range) for valid pointers and add those
+ * found to the gray list.
+ */
+static void scan_block(void *_start, void *_end,
+		       struct kmemleak_object *scanned)
+{
+	unsigned long *ptr;
+	unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER);
+	unsigned long *end = _end - (BYTES_PER_POINTER - 1);
+
+	for (ptr = start; ptr < end; ptr++) {
+		unsigned long flags;
+		unsigned long pointer = *ptr;
+		struct kmemleak_object *object;
+
+		if (scan_should_stop())
+			break;
+
+		/*
+		 * When scanning a memory block with a corresponding
+		 * kmemleak_object, the CPU yielding is handled in the calling
+		 * code since it holds the object->lock to avoid the block
+		 * freeing.
+		 */
+		if (!scanned)
+			scan_yield();
+
+		object = find_and_get_object(pointer, 1);
+		if (!object)
+			continue;
+		if (object == scanned) {
+			/* self referenced, ignore */
+			put_object(object);
+			continue;
+		}
+
+		/*
+		 * Avoid the lockdep recursive warning on object->lock being
+		 * previously acquired in scan_object(). These locks are
+		 * enclosed by scan_mutex.
+		 */
+		spin_lock_irqsave_nested(&object->lock, flags,
+					 SINGLE_DEPTH_NESTING);
+		if (!color_white(object)) {
+			/* non-orphan, ignored or new */
+			spin_unlock_irqrestore(&object->lock, flags);
+			put_object(object);
+			continue;
+		}
+
+		/*
+		 * Increase the object's reference count (number of pointers
+		 * to the memory block). If this count reaches the required
+		 * minimum, the object's color will become gray and it will be
+		 * added to the gray_list.
+		 */
+		object->count++;
+		if (color_gray(object))
+			list_add_tail(&object->gray_list, &gray_list);
+		else
+			put_object(object);
+		spin_unlock_irqrestore(&object->lock, flags);
+	}
+}
+
+/*
+ * Scan a memory block corresponding to a kmemleak_object. A condition is
+ * that object->use_count >= 1.
+ */
+static void scan_object(struct kmemleak_object *object)
+{
+	struct kmemleak_scan_area *area;
+	struct hlist_node *elem;
+	unsigned long flags;
+
+	/*
+	 * Once the object->lock is aquired, the corresponding memory block
+	 * cannot be freed (the same lock is aquired in delete_object).
+	 */
+	spin_lock_irqsave(&object->lock, flags);
+	if (object->flags & OBJECT_NO_SCAN)
+		goto out;
+	if (!(object->flags & OBJECT_ALLOCATED))
+		/* already freed object */
+		goto out;
+	if (hlist_empty(&object->area_list))
+		scan_block((void *)object->pointer,
+			   (void *)(object->pointer + object->size), object);
+	else
+		hlist_for_each_entry(area, elem, &object->area_list, node)
+			scan_block((void *)(object->pointer + area->offset),
+				   (void *)(object->pointer + area->offset
+					    + area->length), object);
+out:
+	spin_unlock_irqrestore(&object->lock, flags);
+}
+
+/*
+ * Scan data sections and all the referenced memory blocks allocated via the
+ * kernel's standard allocators. This function must be called with the
+ * scan_mutex held.
+ */
+static void kmemleak_scan(void)
+{
+	unsigned long flags;
+	struct kmemleak_object *object, *tmp;
+	struct task_struct *task;
+	int i;
+
+	/* prepare the kmemleak_object's */
+	rcu_read_lock();
+	list_for_each_entry_rcu(object, &object_list, object_list) {
+		spin_lock_irqsave(&object->lock, flags);
+#ifdef DEBUG
+		/*
+		 * With a few exceptions there should be a maximum of
+		 * 1 reference to any object at this point.
+		 */
+		if (atomic_read(&object->use_count) > 1) {
+			pr_debug("kmemleak: object->use_count = %d\n",
+				 atomic_read(&object->use_count));
+			dump_object_info(object);
+		}
+#endif
+		/* reset the reference count (whiten the object) */
+		object->count = 0;
+		if (color_gray(object) && get_object(object))
+			list_add_tail(&object->gray_list, &gray_list);
+
+		spin_unlock_irqrestore(&object->lock, flags);
+	}
+	rcu_read_unlock();
+
+	/* data/bss scanning */
+	scan_block(_sdata, _edata, NULL);
+	scan_block(__bss_start, __bss_stop, NULL);
+
+#ifdef CONFIG_SMP
+	/* per-cpu sections scanning */
+	for_each_possible_cpu(i)
+		scan_block(__per_cpu_start + per_cpu_offset(i),
+			   __per_cpu_end + per_cpu_offset(i), NULL);
+#endif
+
+	/*
+	 * Struct page scanning for each node. The code below is not yet safe
+	 * with MEMORY_HOTPLUG.
+	 */
+	for_each_online_node(i) {
+		pg_data_t *pgdat = NODE_DATA(i);
+		unsigned long start_pfn = pgdat->node_start_pfn;
+		unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
+		unsigned long pfn;
+
+		for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+			struct page *page;
+
+			if (!pfn_valid(pfn))
+				continue;
+			page = pfn_to_page(pfn);
+			/* only scan if page is in use */
+			if (page_count(page) == 0)
+				continue;
+			scan_block(page, page + 1, NULL);
+		}
+	}
+
+	/*
+	 * Scanning the task stacks may introduce false negatives and it is
+	 * not enabled by default.
+	 */
+	if (kmemleak_stack_scan) {
+		read_lock(&tasklist_lock);
+		for_each_process(task)
+			scan_block(task_stack_page(task),
+				   task_stack_page(task) + THREAD_SIZE, NULL);
+		read_unlock(&tasklist_lock);
+	}
+
+	/*
+	 * Scan the objects already referenced from the sections scanned
+	 * above. More objects will be referenced and, if there are no memory
+	 * leaks, all the objects will be scanned. The list traversal is safe
+	 * for both tail additions and removals from inside the loop. The
+	 * kmemleak objects cannot be freed from outside the loop because their
+	 * use_count was increased.
+	 */
+	object = list_entry(gray_list.next, typeof(*object), gray_list);
+	while (&object->gray_list != &gray_list) {
+		scan_yield();
+
+		/* may add new objects to the list */
+		if (!scan_should_stop())
+			scan_object(object);
+
+		tmp = list_entry(object->gray_list.next, typeof(*object),
+				 gray_list);
+
+		/* remove the object from the list and release it */
+		list_del(&object->gray_list);
+		put_object(object);
+
+		object = tmp;
+	}
+	WARN_ON(!list_empty(&gray_list));
+}
+
+/*
+ * Thread function performing automatic memory scanning. Unreferenced objects
+ * at the end of a memory scan are reported but only the first time.
+ */
+static int kmemleak_scan_thread(void *arg)
+{
+	static int first_run = 1;
+
+	pr_info("kmemleak: Automatic memory scanning thread started\n");
+
+	/*
+	 * Wait before the first scan to allow the system to fully initialize.
+	 */
+	if (first_run) {
+		first_run = 0;
+		ssleep(SECS_FIRST_SCAN);
+	}
+
+	while (!kthread_should_stop()) {
+		struct kmemleak_object *object;
+		signed long timeout = jiffies_scan_wait;
+
+		mutex_lock(&scan_mutex);
+
+		kmemleak_scan();
+		reported_leaks = 0;
+
+		rcu_read_lock();
+		list_for_each_entry_rcu(object, &object_list, object_list) {
+			unsigned long flags;
+
+			if (reported_leaks >= REPORTS_NR)
+				break;
+			spin_lock_irqsave(&object->lock, flags);
+			if (!(object->flags & OBJECT_REPORTED) &&
+			    unreferenced_object(object)) {
+				print_unreferenced(NULL, object);
+				object->flags |= OBJECT_REPORTED;
+				reported_leaks++;
+			} else if ((object->flags & OBJECT_REPORTED) &&
+				   referenced_object(object)) {
+				print_referenced(object);
+				object->flags &= ~OBJECT_REPORTED;
+			}
+			spin_unlock_irqrestore(&object->lock, flags);
+		}
+		rcu_read_unlock();
+
+		mutex_unlock(&scan_mutex);
+		/* wait before the next scan */
+		while (timeout && !kthread_should_stop())
+			timeout = schedule_timeout_interruptible(timeout);
+	}
+
+	pr_info("kmemleak: Automatic memory scanning thread ended\n");
+
+	return 0;
+}
+
+/*
+ * Start the automatic memory scanning thread. This function must be called
+ * with the kmemleak_mutex held.
+ */
+void start_scan_thread(void)
+{
+	if (scan_thread)
+		return;
+	scan_thread = kthread_run(kmemleak_scan_thread, NULL, "kmemleak");
+	if (IS_ERR(scan_thread)) {
+		pr_warning("kmemleak: Failed to create the scan thread\n");
+		scan_thread = NULL;
+	}
+}
+
+/*
+ * Stop the automatic memory scanning thread. This function must be called
+ * with the kmemleak_mutex held.
+ */
+void stop_scan_thread(void)
+{
+	if (scan_thread) {
+		kthread_stop(scan_thread);
+		scan_thread = NULL;
+	}
+}
+
+/*
+ * Iterate over the object_list and return the first valid object at or after
+ * the required position with its use_count incremented. The function triggers
+ * a memory scanning when the pos argument points to the first position.
+ */
+static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct kmemleak_object *object;
+	loff_t n = *pos;
+
+	if (!n) {
+		kmemleak_scan();
+		reported_leaks = 0;
+	}
+	if (reported_leaks >= REPORTS_NR)
+		return NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(object, &object_list, object_list) {
+		if (n-- > 0)
+			continue;
+		if (get_object(object))
+			goto out;
+	}
+	object = NULL;
+out:
+	rcu_read_unlock();
+	return object;
+}
+
+/*
+ * Return the next object in the object_list. The function decrements the
+ * use_count of the previous object and increases that of the next one.
+ */
+static void *kmemleak_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct kmemleak_object *prev_obj = v;
+	struct kmemleak_object *next_obj = NULL;
+	struct list_head *n = &prev_obj->object_list;
+
+	++(*pos);
+	if (reported_leaks >= REPORTS_NR)
+		goto out;
+
+	rcu_read_lock();
+	list_for_each_continue_rcu(n, &object_list) {
+		next_obj = list_entry(n, struct kmemleak_object, object_list);
+		if (get_object(next_obj))
+			break;
+	}
+	rcu_read_unlock();
+out:
+	put_object(prev_obj);
+	return next_obj;
+}
+
+/*
+ * Decrement the use_count of the last object required, if any.
+ */
+static void kmemleak_seq_stop(struct seq_file *seq, void *v)
+{
+	if (v)
+		put_object(v);
+}
+
+/*
+ * Print the information for an unreferenced object to the seq file.
+ */
+static int kmemleak_seq_show(struct seq_file *seq, void *v)
+{
+	struct kmemleak_object *object = v;
+	unsigned long flags;
+
+	spin_lock_irqsave(&object->lock, flags);
+	if (!unreferenced_object(object))
+		goto out;
+	print_unreferenced(seq, object);
+	reported_leaks++;
+out:
+	spin_unlock_irqrestore(&object->lock, flags);
+	return 0;
+}
+
+static const struct seq_operations kmemleak_seq_ops = {
+	.start = kmemleak_seq_start,
+	.next  = kmemleak_seq_next,
+	.stop  = kmemleak_seq_stop,
+	.show  = kmemleak_seq_show,
+};
+
+static int kmemleak_open(struct inode *inode, struct file *file)
+{
+	int ret = 0;
+
+	if (!atomic_read(&kmemleak_enabled))
+		return -EBUSY;
+
+	ret = mutex_lock_interruptible(&kmemleak_mutex);
+	if (ret < 0)
+		goto out;
+	if (file->f_mode & FMODE_READ) {
+		ret = mutex_lock_interruptible(&scan_mutex);
+		if (ret < 0)
+			goto kmemleak_unlock;
+		ret = seq_open(file, &kmemleak_seq_ops);
+		if (ret < 0)
+			goto scan_unlock;
+	}
+	return ret;
+
+scan_unlock:
+	mutex_unlock(&scan_mutex);
+kmemleak_unlock:
+	mutex_unlock(&kmemleak_mutex);
+out:
+	return ret;
+}
+
+static int kmemleak_release(struct inode *inode, struct file *file)
+{
+	int ret = 0;
+
+	if (file->f_mode & FMODE_READ) {
+		seq_release(inode, file);
+		mutex_unlock(&scan_mutex);
+	}
+	mutex_unlock(&kmemleak_mutex);
+
+	return ret;
+}
+
+/*
+ * File write operation to configure kmemleak at run-time. The following
+ * commands can be written to the /sys/kernel/debug/kmemleak file:
+ *   off	- disable kmemleak (irreversible)
+ *   stack=on	- enable the task stacks scanning
+ *   stack=off	- disable the tasks stacks scanning
+ *   scan=on	- start the automatic memory scanning thread
+ *   scan=off	- stop the automatic memory scanning thread
+ *   scan=...	- set the automatic memory scanning period in seconds (0 to
+ *		  disable it)
+ */
+static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
+			      size_t size, loff_t *ppos)
+{
+	char buf[64];
+	int buf_size;
+
+	if (!atomic_read(&kmemleak_enabled))
+		return -EBUSY;
+
+	buf_size = min(size, (sizeof(buf) - 1));
+	if (strncpy_from_user(buf, user_buf, buf_size) < 0)
+		return -EFAULT;
+	buf[buf_size] = 0;
+
+	if (strncmp(buf, "off", 3) == 0)
+		kmemleak_disable();
+	else if (strncmp(buf, "stack=on", 8) == 0)
+		kmemleak_stack_scan = 1;
+	else if (strncmp(buf, "stack=off", 9) == 0)
+		kmemleak_stack_scan = 0;
+	else if (strncmp(buf, "scan=on", 7) == 0)
+		start_scan_thread();
+	else if (strncmp(buf, "scan=off", 8) == 0)
+		stop_scan_thread();
+	else if (strncmp(buf, "scan=", 5) == 0) {
+		unsigned long secs;
+		int err;
+
+		err = strict_strtoul(buf + 5, 0, &secs);
+		if (err < 0)
+			return err;
+		stop_scan_thread();
+		if (secs) {
+			jiffies_scan_wait = msecs_to_jiffies(secs * 1000);
+			start_scan_thread();
+		}
+	} else
+		return -EINVAL;
+
+	/* ignore the rest of the buffer, only one command at a time */
+	*ppos += size;
+	return size;
+}
+
+static const struct file_operations kmemleak_fops = {
+	.owner		= THIS_MODULE,
+	.open		= kmemleak_open,
+	.read		= seq_read,
+	.write		= kmemleak_write,
+	.llseek		= seq_lseek,
+	.release	= kmemleak_release,
+};
+
+/*
+ * Perform the freeing of the kmemleak internal objects after waiting for any
+ * current memory scan to complete.
+ */
+static int kmemleak_cleanup_thread(void *arg)
+{
+	struct kmemleak_object *object;
+
+	mutex_lock(&kmemleak_mutex);
+	stop_scan_thread();
+	mutex_unlock(&kmemleak_mutex);
+
+	mutex_lock(&scan_mutex);
+	rcu_read_lock();
+	list_for_each_entry_rcu(object, &object_list, object_list)
+		delete_object(object->pointer);
+	rcu_read_unlock();
+	mutex_unlock(&scan_mutex);
+
+	return 0;
+}
+
+/*
+ * Start the clean-up thread.
+ */
+static void kmemleak_cleanup(void)
+{
+	struct task_struct *cleanup_thread;
+
+	cleanup_thread = kthread_run(kmemleak_cleanup_thread, NULL,
+				     "kmemleak-clean");
+	if (IS_ERR(cleanup_thread))
+		pr_warning("kmemleak: Failed to create the clean-up thread\n");
+}
+
+/*
+ * Disable kmemleak. No memory allocation/freeing will be traced once this
+ * function is called. Disabling kmemleak is an irreversible operation.
+ */
+static void kmemleak_disable(void)
+{
+	/* atomically check whether it was already invoked */
+	if (atomic_cmpxchg(&kmemleak_error, 0, 1))
+		return;
+
+	/* stop any memory operation tracing */
+	atomic_set(&kmemleak_early_log, 0);
+	atomic_set(&kmemleak_enabled, 0);
+
+	/* check whether it is too early for a kernel thread */
+	if (atomic_read(&kmemleak_initialized))
+		kmemleak_cleanup();
+
+	pr_info("Kernel memory leak detector disabled\n");
+}
+
+/*
+ * Allow boot-time kmemleak disabling (enabled by default).
+ */
+static int kmemleak_boot_config(char *str)
+{
+	if (!str)
+		return -EINVAL;
+	if (strcmp(str, "off") == 0)
+		kmemleak_disable();
+	else if (strcmp(str, "on") != 0)
+		return -EINVAL;
+	return 0;
+}
+early_param("kmemleak", kmemleak_boot_config);
+
+/*
+ * Kkmemleak initialization.
+ */
+void __init kmemleak_init(void)
+{
+	int i;
+	unsigned long flags;
+
+	jiffies_scan_yield = msecs_to_jiffies(MSECS_SCAN_YIELD);
+	jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
+	jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
+
+	object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE);
+	scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE);
+	INIT_PRIO_TREE_ROOT(&object_tree_root);
+
+	/* the kernel is still in UP mode, so disabling the IRQs is enough */
+	local_irq_save(flags);
+	if (!atomic_read(&kmemleak_error)) {
+		atomic_set(&kmemleak_enabled, 1);
+		atomic_set(&kmemleak_early_log, 0);
+	}
+	local_irq_restore(flags);
+
+	/*
+	 * This is the point where tracking allocations is safe. Automatic
+	 * scanning is started during the late initcall. Add the early logged
+	 * callbacks to the kmemleak infrastructure.
+	 */
+	for (i = 0; i < crt_early_log; i++) {
+		struct early_log *log = &early_log[i];
+
+		switch (log->op_type) {
+		case KMEMLEAK_ALLOC:
+			kmemleak_alloc(log->ptr, log->size, log->min_count,
+				       GFP_KERNEL);
+			break;
+		case KMEMLEAK_FREE:
+			kmemleak_free(log->ptr);
+			break;
+		case KMEMLEAK_NOT_LEAK:
+			kmemleak_not_leak(log->ptr);
+			break;
+		case KMEMLEAK_IGNORE:
+			kmemleak_ignore(log->ptr);
+			break;
+		case KMEMLEAK_SCAN_AREA:
+			kmemleak_scan_area(log->ptr, log->offset, log->length,
+					   GFP_KERNEL);
+			break;
+		case KMEMLEAK_NO_SCAN:
+			kmemleak_no_scan(log->ptr);
+			break;
+		default:
+			WARN_ON(1);
+		}
+	}
+}
+
+/*
+ * Late initialization function.
+ */
+static int __init kmemleak_late_init(void)
+{
+	struct dentry *dentry;
+
+	atomic_set(&kmemleak_initialized, 1);
+
+	if (atomic_read(&kmemleak_error)) {
+		/*
+		 * Some error occured and kmemleak was disabled. There is a
+		 * small chance that kmemleak_disable() was called immediately
+		 * after setting kmemleak_initialized and we may end up with
+		 * two clean-up threads but serialized by scan_mutex.
+		 */
+		kmemleak_cleanup();
+		return -ENOMEM;
+	}
+
+	dentry = debugfs_create_file("kmemleak", S_IRUGO, NULL, NULL,
+				     &kmemleak_fops);
+	if (!dentry)
+		pr_warning("kmemleak: Failed to create the debugfs kmemleak "
+			   "file\n");
+	mutex_lock(&kmemleak_mutex);
+	start_scan_thread();
+	mutex_unlock(&kmemleak_mutex);
+
+	pr_info("Kernel memory leak detector initialized\n");
+
+	return 0;
+}
+late_initcall(kmemleak_late_init);
diff --git a/mm/mlock.c b/mm/mlock.c
index cbe9e05..ac13043 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -629,52 +629,43 @@
 	free_uid(user);
 }
 
-void *alloc_locked_buffer(size_t size)
+int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
+			  size_t size)
 {
-	unsigned long rlim, vm, pgsz;
-	void *buffer = NULL;
+	unsigned long lim, vm, pgsz;
+	int error = -ENOMEM;
 
 	pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&mm->mmap_sem);
 
-	rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
-	vm   = current->mm->total_vm + pgsz;
-	if (rlim < vm)
+	lim = rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+	vm   = mm->total_vm + pgsz;
+	if (lim < vm)
 		goto out;
 
-	rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
-	vm   = current->mm->locked_vm + pgsz;
-	if (rlim < vm)
+	lim = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+	vm   = mm->locked_vm + pgsz;
+	if (lim < vm)
 		goto out;
 
-	buffer = kzalloc(size, GFP_KERNEL);
-	if (!buffer)
-		goto out;
+	mm->total_vm  += pgsz;
+	mm->locked_vm += pgsz;
 
-	current->mm->total_vm  += pgsz;
-	current->mm->locked_vm += pgsz;
-
+	error = 0;
  out:
-	up_write(&current->mm->mmap_sem);
-	return buffer;
+	up_write(&mm->mmap_sem);
+	return error;
 }
 
-void release_locked_buffer(void *buffer, size_t size)
+void refund_locked_memory(struct mm_struct *mm, size_t size)
 {
 	unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&mm->mmap_sem);
 
-	current->mm->total_vm  -= pgsz;
-	current->mm->locked_vm -= pgsz;
+	mm->total_vm  -= pgsz;
+	mm->locked_vm -= pgsz;
 
-	up_write(&current->mm->mmap_sem);
-}
-
-void free_locked_buffer(void *buffer, size_t size)
-{
-	release_locked_buffer(buffer, size);
-
-	kfree(buffer);
+	up_write(&mm->mmap_sem);
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index 6b7b1a9..34579b2 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -28,6 +28,7 @@
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
 #include <linux/mmu_notifier.h>
+#include <linux/perf_counter.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -87,6 +88,9 @@
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
 struct percpu_counter vm_committed_as;
 
+/* amount of vm to protect from userspace access */
+unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
+
 /*
  * Check that a process has enough memory to allocate a new virtual
  * mapping. 0 means there is enough memory for the allocation to
@@ -1219,6 +1223,8 @@
 	if (correct_wcount)
 		atomic_inc(&inode->i_writecount);
 out:
+	perf_counter_mmap(vma);
+
 	mm->total_vm += len >> PAGE_SHIFT;
 	vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
 	if (vm_flags & VM_LOCKED) {
@@ -2305,6 +2311,8 @@
 
 	mm->total_vm += len >> PAGE_SHIFT;
 
+	perf_counter_mmap(vma);
+
 	return 0;
 }
 
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 258197b..d80311b 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -23,6 +23,7 @@
 #include <linux/swapops.h>
 #include <linux/mmu_notifier.h>
 #include <linux/migrate.h>
+#include <linux/perf_counter.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
@@ -299,6 +300,7 @@
 		error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
 		if (error)
 			goto out;
+		perf_counter_mmap(vma);
 		nstart = tmp;
 
 		if (nstart < prev->vm_end)
diff --git a/mm/nommu.c b/mm/nommu.c
index b571ef7..2fd2ad5 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -69,6 +69,9 @@
 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
 int heap_stack_gap = 0;
 
+/* amount of vm to protect from userspace access */
+unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
+
 atomic_long_t mmap_pages_allocated;
 
 EXPORT_SYMBOL(mem_map);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fe753ec..17d5f53 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -46,6 +46,7 @@
 #include <linux/page-isolation.h>
 #include <linux/page_cgroup.h>
 #include <linux/debugobjects.h>
+#include <linux/kmemleak.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -149,10 +150,6 @@
   static int __meminitdata nr_nodemap_entries;
   static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
   static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-  static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
-  static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
   static unsigned long __initdata required_kernelcore;
   static unsigned long __initdata required_movablecore;
   static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
@@ -3103,64 +3100,6 @@
 }
 
 /**
- * push_node_boundaries - Push node boundaries to at least the requested boundary
- * @nid: The nid of the node to push the boundary for
- * @start_pfn: The start pfn of the node
- * @end_pfn: The end pfn of the node
- *
- * In reserve-based hot-add, mem_map is allocated that is unused until hotadd
- * time. Specifically, on x86_64, SRAT will report ranges that can potentially
- * be hotplugged even though no physical memory exists. This function allows
- * an arch to push out the node boundaries so mem_map is allocated that can
- * be used later.
- */
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-void __init push_node_boundaries(unsigned int nid,
-		unsigned long start_pfn, unsigned long end_pfn)
-{
-	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-			"Entering push_node_boundaries(%u, %lu, %lu)\n",
-			nid, start_pfn, end_pfn);
-
-	/* Initialise the boundary for this node if necessary */
-	if (node_boundary_end_pfn[nid] == 0)
-		node_boundary_start_pfn[nid] = -1UL;
-
-	/* Update the boundaries */
-	if (node_boundary_start_pfn[nid] > start_pfn)
-		node_boundary_start_pfn[nid] = start_pfn;
-	if (node_boundary_end_pfn[nid] < end_pfn)
-		node_boundary_end_pfn[nid] = end_pfn;
-}
-
-/* If necessary, push the node boundary out for reserve hotadd */
-static void __meminit account_node_boundary(unsigned int nid,
-		unsigned long *start_pfn, unsigned long *end_pfn)
-{
-	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-			"Entering account_node_boundary(%u, %lu, %lu)\n",
-			nid, *start_pfn, *end_pfn);
-
-	/* Return if boundary information has not been provided */
-	if (node_boundary_end_pfn[nid] == 0)
-		return;
-
-	/* Check the boundaries and update if necessary */
-	if (node_boundary_start_pfn[nid] < *start_pfn)
-		*start_pfn = node_boundary_start_pfn[nid];
-	if (node_boundary_end_pfn[nid] > *end_pfn)
-		*end_pfn = node_boundary_end_pfn[nid];
-}
-#else
-void __init push_node_boundaries(unsigned int nid,
-		unsigned long start_pfn, unsigned long end_pfn) {}
-
-static void __meminit account_node_boundary(unsigned int nid,
-		unsigned long *start_pfn, unsigned long *end_pfn) {}
-#endif
-
-
-/**
  * get_pfn_range_for_nid - Return the start and end page frames for a node
  * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
  * @start_pfn: Passed by reference. On return, it will have the node start_pfn.
@@ -3185,9 +3124,6 @@
 
 	if (*start_pfn == -1UL)
 		*start_pfn = 0;
-
-	/* Push the node boundaries out if requested */
-	account_node_boundary(nid, start_pfn, end_pfn);
 }
 
 /*
@@ -3793,10 +3729,6 @@
 {
 	memset(early_node_map, 0, sizeof(early_node_map));
 	nr_nodemap_entries = 0;
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-	memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn));
-	memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn));
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
 }
 
 /* Compare two active node_active_regions */
@@ -4615,6 +4547,16 @@
 	if (_hash_mask)
 		*_hash_mask = (1 << log2qty) - 1;
 
+	/*
+	 * If hashdist is set, the table allocation is done with __vmalloc()
+	 * which invokes the kmemleak_alloc() callback. This function may also
+	 * be called before the slab and kmemleak are initialised when
+	 * kmemleak simply buffers the request to be executed later
+	 * (GFP_ATOMIC flag ignored in this case).
+	 */
+	if (!hashdist)
+		kmemleak_alloc(table, size, 1, GFP_ATOMIC);
+
 	return table;
 }
 
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 791905c..11a8a10 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -69,7 +69,7 @@
 	return 0;
 }
 
-void __init page_cgroup_init(void)
+void __init page_cgroup_init_flatmem(void)
 {
 
 	int nid, fail;
@@ -113,16 +113,11 @@
 	if (!section->page_cgroup) {
 		nid = page_to_nid(pfn_to_page(pfn));
 		table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
-		if (slab_is_available()) {
-			base = kmalloc_node(table_size,
-					GFP_KERNEL | __GFP_NOWARN, nid);
-			if (!base)
-				base = vmalloc_node(table_size, nid);
-		} else {
-			base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
-				table_size,
-				PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
-		}
+		VM_BUG_ON(!slab_is_available());
+		base = kmalloc_node(table_size,
+				GFP_KERNEL | __GFP_NOWARN, nid);
+		if (!base)
+			base = vmalloc_node(table_size, nid);
 	} else {
 		/*
  		 * We don't have to allocate page_cgroup again, but
diff --git a/mm/percpu.c b/mm/percpu.c
index 1aa5d8f..c0b2c1a 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -23,7 +23,7 @@
  * Allocation is done in offset-size areas of single unit space.  Ie,
  * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0,
  * c1:u1, c1:u2 and c1:u3.  Percpu access can be done by configuring
- * percpu base registers UNIT_SIZE apart.
+ * percpu base registers pcpu_unit_size apart.
  *
  * There are usually many small percpu allocations many of them as
  * small as 4 bytes.  The allocator organizes chunks into lists
@@ -38,8 +38,8 @@
  * region and negative allocated.  Allocation inside a chunk is done
  * by scanning this map sequentially and serving the first matching
  * entry.  This is mostly copied from the percpu_modalloc() allocator.
- * Chunks are also linked into a rb tree to ease address to chunk
- * mapping during free.
+ * Chunks can be determined from the address using the index field
+ * in the page struct. The index field contains a pointer to the chunk.
  *
  * To use this allocator, arch code should do the followings.
  *
@@ -61,7 +61,6 @@
 #include <linux/mutex.h>
 #include <linux/percpu.h>
 #include <linux/pfn.h>
-#include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
@@ -88,7 +87,6 @@
 
 struct pcpu_chunk {
 	struct list_head	list;		/* linked to pcpu_slot lists */
-	struct rb_node		rb_node;	/* key is chunk->vm->addr */
 	int			free_size;	/* free bytes in the chunk */
 	int			contig_hint;	/* max contiguous size hint */
 	struct vm_struct	*vm;		/* mapped vmalloc region */
@@ -110,9 +108,21 @@
 void *pcpu_base_addr __read_mostly;
 EXPORT_SYMBOL_GPL(pcpu_base_addr);
 
-/* optional reserved chunk, only accessible for reserved allocations */
+/*
+ * The first chunk which always exists.  Note that unlike other
+ * chunks, this one can be allocated and mapped in several different
+ * ways and thus often doesn't live in the vmalloc area.
+ */
+static struct pcpu_chunk *pcpu_first_chunk;
+
+/*
+ * Optional reserved chunk.  This chunk reserves part of the first
+ * chunk and serves it for reserved allocations.  The amount of
+ * reserved offset is in pcpu_reserved_chunk_limit.  When reserved
+ * area doesn't exist, the following variables contain NULL and 0
+ * respectively.
+ */
 static struct pcpu_chunk *pcpu_reserved_chunk;
-/* offset limit of the reserved chunk */
 static int pcpu_reserved_chunk_limit;
 
 /*
@@ -121,7 +131,7 @@
  * There are two locks - pcpu_alloc_mutex and pcpu_lock.  The former
  * protects allocation/reclaim paths, chunks and chunk->page arrays.
  * The latter is a spinlock and protects the index data structures -
- * chunk slots, rbtree, chunks and area maps in chunks.
+ * chunk slots, chunks and area maps in chunks.
  *
  * During allocation, pcpu_alloc_mutex is kept locked all the time and
  * pcpu_lock is grabbed and released as necessary.  All actual memory
@@ -140,7 +150,6 @@
 static DEFINE_SPINLOCK(pcpu_lock);	/* protects index data structures */
 
 static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
-static struct rb_root pcpu_addr_root = RB_ROOT;	/* chunks by address */
 
 /* reclaim work to release fully free chunks, scheduled from free path */
 static void pcpu_reclaim(struct work_struct *work);
@@ -191,6 +200,18 @@
 	return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL;
 }
 
+/* set the pointer to a chunk in a page struct */
+static void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu)
+{
+	page->index = (unsigned long)pcpu;
+}
+
+/* obtain pointer to a chunk from a page struct */
+static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page)
+{
+	return (struct pcpu_chunk *)page->index;
+}
+
 /**
  * pcpu_mem_alloc - allocate memory
  * @size: bytes to allocate
@@ -257,93 +278,26 @@
 	}
 }
 
-static struct rb_node **pcpu_chunk_rb_search(void *addr,
-					     struct rb_node **parentp)
-{
-	struct rb_node **p = &pcpu_addr_root.rb_node;
-	struct rb_node *parent = NULL;
-	struct pcpu_chunk *chunk;
-
-	while (*p) {
-		parent = *p;
-		chunk = rb_entry(parent, struct pcpu_chunk, rb_node);
-
-		if (addr < chunk->vm->addr)
-			p = &(*p)->rb_left;
-		else if (addr > chunk->vm->addr)
-			p = &(*p)->rb_right;
-		else
-			break;
-	}
-
-	if (parentp)
-		*parentp = parent;
-	return p;
-}
-
 /**
- * pcpu_chunk_addr_search - search for chunk containing specified address
- * @addr: address to search for
- *
- * Look for chunk which might contain @addr.  More specifically, it
- * searchs for the chunk with the highest start address which isn't
- * beyond @addr.
- *
- * CONTEXT:
- * pcpu_lock.
+ * pcpu_chunk_addr_search - determine chunk containing specified address
+ * @addr: address for which the chunk needs to be determined.
  *
  * RETURNS:
  * The address of the found chunk.
  */
 static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 {
-	struct rb_node *n, *parent;
-	struct pcpu_chunk *chunk;
+	void *first_start = pcpu_first_chunk->vm->addr;
 
-	/* is it in the reserved chunk? */
-	if (pcpu_reserved_chunk) {
-		void *start = pcpu_reserved_chunk->vm->addr;
-
-		if (addr >= start && addr < start + pcpu_reserved_chunk_limit)
+	/* is it in the first chunk? */
+	if (addr >= first_start && addr < first_start + pcpu_chunk_size) {
+		/* is it in the reserved area? */
+		if (addr < first_start + pcpu_reserved_chunk_limit)
 			return pcpu_reserved_chunk;
+		return pcpu_first_chunk;
 	}
 
-	/* nah... search the regular ones */
-	n = *pcpu_chunk_rb_search(addr, &parent);
-	if (!n) {
-		/* no exactly matching chunk, the parent is the closest */
-		n = parent;
-		BUG_ON(!n);
-	}
-	chunk = rb_entry(n, struct pcpu_chunk, rb_node);
-
-	if (addr < chunk->vm->addr) {
-		/* the parent was the next one, look for the previous one */
-		n = rb_prev(n);
-		BUG_ON(!n);
-		chunk = rb_entry(n, struct pcpu_chunk, rb_node);
-	}
-
-	return chunk;
-}
-
-/**
- * pcpu_chunk_addr_insert - insert chunk into address rb tree
- * @new: chunk to insert
- *
- * Insert @new into address rb tree.
- *
- * CONTEXT:
- * pcpu_lock.
- */
-static void pcpu_chunk_addr_insert(struct pcpu_chunk *new)
-{
-	struct rb_node **p, *parent;
-
-	p = pcpu_chunk_rb_search(new->vm->addr, &parent);
-	BUG_ON(*p);
-	rb_link_node(&new->rb_node, parent, p);
-	rb_insert_color(&new->rb_node, &pcpu_addr_root);
+	return pcpu_get_page_chunk(vmalloc_to_page(addr));
 }
 
 /**
@@ -755,6 +709,7 @@
 						  alloc_mask, 0);
 			if (!*pagep)
 				goto err;
+			pcpu_set_page_chunk(*pagep, chunk);
 		}
 	}
 
@@ -879,7 +834,6 @@
 
 	spin_lock_irq(&pcpu_lock);
 	pcpu_chunk_relocate(chunk, -1);
-	pcpu_chunk_addr_insert(chunk);
 	goto restart;
 
 area_found:
@@ -968,7 +922,6 @@
 		if (chunk == list_first_entry(head, struct pcpu_chunk, list))
 			continue;
 
-		rb_erase(&chunk->rb_node, &pcpu_addr_root);
 		list_move(&chunk->list, &todo);
 	}
 
@@ -1147,7 +1100,8 @@
 
 	if (reserved_size) {
 		schunk->free_size = reserved_size;
-		pcpu_reserved_chunk = schunk;	/* not for dynamic alloc */
+		pcpu_reserved_chunk = schunk;
+		pcpu_reserved_chunk_limit = static_size + reserved_size;
 	} else {
 		schunk->free_size = dyn_size;
 		dyn_size = 0;			/* dynamic area covered */
@@ -1158,8 +1112,6 @@
 	if (schunk->free_size)
 		schunk->map[schunk->map_used++] = schunk->free_size;
 
-	pcpu_reserved_chunk_limit = static_size + schunk->free_size;
-
 	/* init dynamic chunk if necessary */
 	if (dyn_size) {
 		dchunk = alloc_bootmem(sizeof(struct pcpu_chunk));
@@ -1226,13 +1178,8 @@
 	}
 
 	/* link the first chunk in */
-	if (!dchunk) {
-		pcpu_chunk_relocate(schunk, -1);
-		pcpu_chunk_addr_insert(schunk);
-	} else {
-		pcpu_chunk_relocate(dchunk, -1);
-		pcpu_chunk_addr_insert(dchunk);
-	}
+	pcpu_first_chunk = dchunk ?: schunk;
+	pcpu_chunk_relocate(pcpu_first_chunk, -1);
 
 	/* we're done */
 	pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0);
diff --git a/mm/shmem.c b/mm/shmem.c
index b25f95c..0132fbd 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2659,6 +2659,7 @@
 	if (error)
 		goto close_file;
 #endif
+	ima_counts_get(file);
 	return file;
 
 close_file:
@@ -2684,7 +2685,6 @@
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	ima_shm_check(file);
 	if (vma->vm_file)
 		fput(vma->vm_file);
 	vma->vm_file = file;
diff --git a/mm/slab.c b/mm/slab.c
index 9a90b00..18e3164 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,11 +102,12 @@
 #include	<linux/cpu.h>
 #include	<linux/sysctl.h>
 #include	<linux/module.h>
-#include	<trace/kmemtrace.h>
+#include	<linux/kmemtrace.h>
 #include	<linux/rcupdate.h>
 #include	<linux/string.h>
 #include	<linux/uaccess.h>
 #include	<linux/nodemask.h>
+#include	<linux/kmemleak.h>
 #include	<linux/mempolicy.h>
 #include	<linux/mutex.h>
 #include	<linux/fault-inject.h>
@@ -178,13 +179,13 @@
 			 SLAB_STORE_USER | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
 			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
-			 SLAB_DEBUG_OBJECTS)
+			 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE)
 #else
 # define CREATE_MASK	(SLAB_HWCACHE_ALIGN | \
 			 SLAB_CACHE_DMA | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
 			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
-			 SLAB_DEBUG_OBJECTS)
+			 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE)
 #endif
 
 /*
@@ -303,6 +304,12 @@
 };
 
 /*
+ * The slab allocator is initialized with interrupts disabled. Therefore, make
+ * sure early boot allocations don't accidentally enable interrupts.
+ */
+static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK;
+
+/*
  * Need this for bootstrapping a per node allocator.
  */
 #define NUM_INIT_LISTS (3 * MAX_NUMNODES)
@@ -315,7 +322,7 @@
 			struct kmem_list3 *l3, int tofree);
 static void free_block(struct kmem_cache *cachep, void **objpp, int len,
 			int node);
-static int enable_cpucache(struct kmem_cache *cachep);
+static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
 static void cache_reap(struct work_struct *unused);
 
 /*
@@ -752,6 +759,7 @@
 	NONE,
 	PARTIAL_AC,
 	PARTIAL_L3,
+	EARLY,
 	FULL
 } g_cpucache_up;
 
@@ -760,7 +768,7 @@
  */
 int slab_is_available(void)
 {
-	return g_cpucache_up == FULL;
+	return g_cpucache_up >= EARLY;
 }
 
 static DEFINE_PER_CPU(struct delayed_work, reap_work);
@@ -958,12 +966,20 @@
 }
 
 static struct array_cache *alloc_arraycache(int node, int entries,
-					    int batchcount)
+					    int batchcount, gfp_t gfp)
 {
 	int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
 	struct array_cache *nc = NULL;
 
-	nc = kmalloc_node(memsize, GFP_KERNEL, node);
+	nc = kmalloc_node(memsize, gfp, node);
+	/*
+	 * The array_cache structures contain pointers to free object.
+	 * However, when such objects are allocated or transfered to another
+	 * cache the pointers are not cleared and they could be counted as
+	 * valid references during a kmemleak scan. Therefore, kmemleak must
+	 * not scan such objects.
+	 */
+	kmemleak_no_scan(nc);
 	if (nc) {
 		nc->avail = 0;
 		nc->limit = entries;
@@ -1003,7 +1019,7 @@
 #define drain_alien_cache(cachep, alien) do { } while (0)
 #define reap_alien(cachep, l3) do { } while (0)
 
-static inline struct array_cache **alloc_alien_cache(int node, int limit)
+static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
 {
 	return (struct array_cache **)BAD_ALIEN_MAGIC;
 }
@@ -1034,7 +1050,7 @@
 static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
 static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
 
-static struct array_cache **alloc_alien_cache(int node, int limit)
+static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
 {
 	struct array_cache **ac_ptr;
 	int memsize = sizeof(void *) * nr_node_ids;
@@ -1042,14 +1058,14 @@
 
 	if (limit > 1)
 		limit = 12;
-	ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node);
+	ac_ptr = kmalloc_node(memsize, gfp, node);
 	if (ac_ptr) {
 		for_each_node(i) {
 			if (i == node || !node_online(i)) {
 				ac_ptr[i] = NULL;
 				continue;
 			}
-			ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d);
+			ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
 			if (!ac_ptr[i]) {
 				for (i--; i >= 0; i--)
 					kfree(ac_ptr[i]);
@@ -1282,20 +1298,20 @@
 		struct array_cache **alien = NULL;
 
 		nc = alloc_arraycache(node, cachep->limit,
-					cachep->batchcount);
+					cachep->batchcount, GFP_KERNEL);
 		if (!nc)
 			goto bad;
 		if (cachep->shared) {
 			shared = alloc_arraycache(node,
 				cachep->shared * cachep->batchcount,
-				0xbaadf00d);
+				0xbaadf00d, GFP_KERNEL);
 			if (!shared) {
 				kfree(nc);
 				goto bad;
 			}
 		}
 		if (use_alien_caches) {
-			alien = alloc_alien_cache(node, cachep->limit);
+			alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
 			if (!alien) {
 				kfree(shared);
 				kfree(nc);
@@ -1399,10 +1415,9 @@
 {
 	struct kmem_list3 *ptr;
 
-	ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid);
+	ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);
 	BUG_ON(!ptr);
 
-	local_irq_disable();
 	memcpy(ptr, list, sizeof(struct kmem_list3));
 	/*
 	 * Do not assume that spinlocks can be initialized via memcpy:
@@ -1411,7 +1426,6 @@
 
 	MAKE_ALL_LISTS(cachep, ptr, nodeid);
 	cachep->nodelists[nodeid] = ptr;
-	local_irq_enable();
 }
 
 /*
@@ -1575,9 +1589,8 @@
 	{
 		struct array_cache *ptr;
 
-		ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
+		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
 
-		local_irq_disable();
 		BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
 		memcpy(ptr, cpu_cache_get(&cache_cache),
 		       sizeof(struct arraycache_init));
@@ -1587,11 +1600,9 @@
 		spin_lock_init(&ptr->lock);
 
 		cache_cache.array[smp_processor_id()] = ptr;
-		local_irq_enable();
 
-		ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
+		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
 
-		local_irq_disable();
 		BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
 		       != &initarray_generic.cache);
 		memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
@@ -1603,7 +1614,6 @@
 
 		malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
 		    ptr;
-		local_irq_enable();
 	}
 	/* 5) Replace the bootstrap kmem_list3's */
 	{
@@ -1622,19 +1632,27 @@
 		}
 	}
 
-	/* 6) resize the head arrays to their final sizes */
-	{
-		struct kmem_cache *cachep;
-		mutex_lock(&cache_chain_mutex);
-		list_for_each_entry(cachep, &cache_chain, next)
-			if (enable_cpucache(cachep))
-				BUG();
-		mutex_unlock(&cache_chain_mutex);
-	}
+	g_cpucache_up = EARLY;
 
 	/* Annotate slab for lockdep -- annotate the malloc caches */
 	init_lock_keys();
+}
 
+void __init kmem_cache_init_late(void)
+{
+	struct kmem_cache *cachep;
+
+	/*
+	 * Interrupts are enabled now so all GFP allocations are safe.
+	 */
+	slab_gfp_mask = __GFP_BITS_MASK;
+
+	/* 6) resize the head arrays to their final sizes */
+	mutex_lock(&cache_chain_mutex);
+	list_for_each_entry(cachep, &cache_chain, next)
+		if (enable_cpucache(cachep, GFP_NOWAIT))
+			BUG();
+	mutex_unlock(&cache_chain_mutex);
 
 	/* Done! */
 	g_cpucache_up = FULL;
@@ -2064,10 +2082,10 @@
 	return left_over;
 }
 
-static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
+static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	if (g_cpucache_up == FULL)
-		return enable_cpucache(cachep);
+		return enable_cpucache(cachep, gfp);
 
 	if (g_cpucache_up == NONE) {
 		/*
@@ -2089,7 +2107,7 @@
 			g_cpucache_up = PARTIAL_AC;
 	} else {
 		cachep->array[smp_processor_id()] =
-			kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
+			kmalloc(sizeof(struct arraycache_init), gfp);
 
 		if (g_cpucache_up == PARTIAL_AC) {
 			set_up_list3s(cachep, SIZE_L3);
@@ -2099,7 +2117,7 @@
 			for_each_online_node(node) {
 				cachep->nodelists[node] =
 				    kmalloc_node(sizeof(struct kmem_list3),
-						GFP_KERNEL, node);
+						gfp, node);
 				BUG_ON(!cachep->nodelists[node]);
 				kmem_list3_init(cachep->nodelists[node]);
 			}
@@ -2153,6 +2171,7 @@
 {
 	size_t left_over, slab_size, ralign;
 	struct kmem_cache *cachep = NULL, *pc;
+	gfp_t gfp;
 
 	/*
 	 * Sanity checks... these are all serious usage bugs.
@@ -2168,8 +2187,10 @@
 	 * We use cache_chain_mutex to ensure a consistent view of
 	 * cpu_online_mask as well.  Please see cpuup_callback
 	 */
-	get_online_cpus();
-	mutex_lock(&cache_chain_mutex);
+	if (slab_is_available()) {
+		get_online_cpus();
+		mutex_lock(&cache_chain_mutex);
+	}
 
 	list_for_each_entry(pc, &cache_chain, next) {
 		char tmp;
@@ -2278,8 +2299,13 @@
 	 */
 	align = ralign;
 
+	if (slab_is_available())
+		gfp = GFP_KERNEL;
+	else
+		gfp = GFP_NOWAIT;
+
 	/* Get cache's description obj. */
-	cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
+	cachep = kmem_cache_zalloc(&cache_cache, gfp);
 	if (!cachep)
 		goto oops;
 
@@ -2382,7 +2408,7 @@
 	cachep->ctor = ctor;
 	cachep->name = name;
 
-	if (setup_cpu_cache(cachep)) {
+	if (setup_cpu_cache(cachep, gfp)) {
 		__kmem_cache_destroy(cachep);
 		cachep = NULL;
 		goto oops;
@@ -2394,8 +2420,10 @@
 	if (!cachep && (flags & SLAB_PANIC))
 		panic("kmem_cache_create(): failed to create slab `%s'\n",
 		      name);
-	mutex_unlock(&cache_chain_mutex);
-	put_online_cpus();
+	if (slab_is_available()) {
+		mutex_unlock(&cache_chain_mutex);
+		put_online_cpus();
+	}
 	return cachep;
 }
 EXPORT_SYMBOL(kmem_cache_create);
@@ -2621,6 +2649,14 @@
 		/* Slab management obj is off-slab. */
 		slabp = kmem_cache_alloc_node(cachep->slabp_cache,
 					      local_flags, nodeid);
+		/*
+		 * If the first object in the slab is leaked (it's allocated
+		 * but no one has a reference to it), we want to make sure
+		 * kmemleak does not treat the ->s_mem pointer as a reference
+		 * to the object. Otherwise we will not report the leak.
+		 */
+		kmemleak_scan_area(slabp, offsetof(struct slab, list),
+				   sizeof(struct list_head), local_flags);
 		if (!slabp)
 			return NULL;
 	} else {
@@ -3141,6 +3177,12 @@
 		STATS_INC_ALLOCMISS(cachep);
 		objp = cache_alloc_refill(cachep, flags);
 	}
+	/*
+	 * To avoid a false negative, if an object that is in one of the
+	 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
+	 * treat the array pointers as a reference to the object.
+	 */
+	kmemleak_erase(&ac->entry[ac->avail]);
 	return objp;
 }
 
@@ -3327,6 +3369,8 @@
 	unsigned long save_flags;
 	void *ptr;
 
+	flags &= slab_gfp_mask;
+
 	lockdep_trace_alloc(flags);
 
 	if (slab_should_failslab(cachep, flags))
@@ -3360,6 +3404,8 @@
   out:
 	local_irq_restore(save_flags);
 	ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
+	kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
+				 flags);
 
 	if (unlikely((flags & __GFP_ZERO) && ptr))
 		memset(ptr, 0, obj_size(cachep));
@@ -3405,6 +3451,8 @@
 	unsigned long save_flags;
 	void *objp;
 
+	flags &= slab_gfp_mask;
+
 	lockdep_trace_alloc(flags);
 
 	if (slab_should_failslab(cachep, flags))
@@ -3415,6 +3463,8 @@
 	objp = __do_cache_alloc(cachep, flags);
 	local_irq_restore(save_flags);
 	objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
+	kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,
+				 flags);
 	prefetchw(objp);
 
 	if (unlikely((flags & __GFP_ZERO) && objp))
@@ -3530,6 +3580,7 @@
 	struct array_cache *ac = cpu_cache_get(cachep);
 
 	check_irq_off();
+	kmemleak_free_recursive(objp, cachep->flags);
 	objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
 
 	/*
@@ -3802,7 +3853,7 @@
 /*
  * This initializes kmem_list3 or resizes various caches for all nodes.
  */
-static int alloc_kmemlist(struct kmem_cache *cachep)
+static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
 {
 	int node;
 	struct kmem_list3 *l3;
@@ -3812,7 +3863,7 @@
 	for_each_online_node(node) {
 
                 if (use_alien_caches) {
-                        new_alien = alloc_alien_cache(node, cachep->limit);
+                        new_alien = alloc_alien_cache(node, cachep->limit, gfp);
                         if (!new_alien)
                                 goto fail;
                 }
@@ -3821,7 +3872,7 @@
 		if (cachep->shared) {
 			new_shared = alloc_arraycache(node,
 				cachep->shared*cachep->batchcount,
-					0xbaadf00d);
+					0xbaadf00d, gfp);
 			if (!new_shared) {
 				free_alien_cache(new_alien);
 				goto fail;
@@ -3850,7 +3901,7 @@
 			free_alien_cache(new_alien);
 			continue;
 		}
-		l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node);
+		l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);
 		if (!l3) {
 			free_alien_cache(new_alien);
 			kfree(new_shared);
@@ -3906,18 +3957,18 @@
 
 /* Always called with the cache_chain_mutex held */
 static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
-				int batchcount, int shared)
+				int batchcount, int shared, gfp_t gfp)
 {
 	struct ccupdate_struct *new;
 	int i;
 
-	new = kzalloc(sizeof(*new), GFP_KERNEL);
+	new = kzalloc(sizeof(*new), gfp);
 	if (!new)
 		return -ENOMEM;
 
 	for_each_online_cpu(i) {
 		new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
-						batchcount);
+						batchcount, gfp);
 		if (!new->new[i]) {
 			for (i--; i >= 0; i--)
 				kfree(new->new[i]);
@@ -3944,11 +3995,11 @@
 		kfree(ccold);
 	}
 	kfree(new);
-	return alloc_kmemlist(cachep);
+	return alloc_kmemlist(cachep, gfp);
 }
 
 /* Called with cache_chain_mutex held always */
-static int enable_cpucache(struct kmem_cache *cachep)
+static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	int err;
 	int limit, shared;
@@ -3994,7 +4045,7 @@
 	if (limit > 32)
 		limit = 32;
 #endif
-	err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared);
+	err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
 	if (err)
 		printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
 		       cachep->name, -err);
@@ -4300,7 +4351,8 @@
 				res = 0;
 			} else {
 				res = do_tune_cpucache(cachep, limit,
-						       batchcount, shared);
+						       batchcount, shared,
+						       GFP_KERNEL);
 			}
 			break;
 		}
diff --git a/mm/slob.c b/mm/slob.c
index f92e66d..12f2614 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -66,7 +66,8 @@
 #include <linux/module.h>
 #include <linux/rcupdate.h>
 #include <linux/list.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
+#include <linux/kmemleak.h>
 #include <asm/atomic.h>
 
 /*
@@ -509,6 +510,7 @@
 				   size, PAGE_SIZE << order, gfp, node);
 	}
 
+	kmemleak_alloc(ret, size, 1, gfp);
 	return ret;
 }
 EXPORT_SYMBOL(__kmalloc_node);
@@ -521,6 +523,7 @@
 
 	if (unlikely(ZERO_OR_NULL_PTR(block)))
 		return;
+	kmemleak_free(block);
 
 	sp = slob_page(block);
 	if (is_slob_page(sp)) {
@@ -584,12 +587,14 @@
 	} else if (flags & SLAB_PANIC)
 		panic("Cannot create slab cache %s\n", name);
 
+	kmemleak_alloc(c, sizeof(struct kmem_cache), 1, GFP_KERNEL);
 	return c;
 }
 EXPORT_SYMBOL(kmem_cache_create);
 
 void kmem_cache_destroy(struct kmem_cache *c)
 {
+	kmemleak_free(c);
 	slob_free(c, sizeof(struct kmem_cache));
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
@@ -613,6 +618,7 @@
 	if (c->ctor)
 		c->ctor(b);
 
+	kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags);
 	return b;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
@@ -635,6 +641,7 @@
 
 void kmem_cache_free(struct kmem_cache *c, void *b)
 {
+	kmemleak_free_recursive(b, c->flags);
 	if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
 		struct slob_rcu *slob_rcu;
 		slob_rcu = b + (c->size - sizeof(struct slob_rcu));
diff --git a/mm/slub.c b/mm/slub.c
index 65ffda5..30354bf 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -17,9 +17,10 @@
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
+#include <linux/kmemleak.h>
 #include <linux/mempolicy.h>
 #include <linux/ctype.h>
 #include <linux/debugobjects.h>
@@ -143,7 +144,7 @@
  * Set of flags that will prevent slab merging
  */
 #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
-		SLAB_TRACE | SLAB_DESTROY_BY_RCU)
+		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE)
 
 #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
 		SLAB_CACHE_DMA)
@@ -177,6 +178,12 @@
 	SYSFS		/* Sysfs up */
 } slab_state = DOWN;
 
+/*
+ * The slab allocator is initialized with interrupts disabled. Therefore, make
+ * sure early boot allocations don't accidentally enable interrupts.
+ */
+static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK;
+
 /* A list of all slab caches on the system */
 static DECLARE_RWSEM(slub_lock);
 static LIST_HEAD(slab_caches);
@@ -1594,6 +1601,8 @@
 	unsigned long flags;
 	unsigned int objsize;
 
+	gfpflags &= slab_gfp_mask;
+
 	lockdep_trace_alloc(gfpflags);
 	might_sleep_if(gfpflags & __GFP_WAIT);
 
@@ -1617,6 +1626,7 @@
 	if (unlikely((gfpflags & __GFP_ZERO) && object))
 		memset(object, 0, objsize);
 
+	kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags);
 	return object;
 }
 
@@ -1746,6 +1756,7 @@
 	struct kmem_cache_cpu *c;
 	unsigned long flags;
 
+	kmemleak_free_recursive(x, s->flags);
 	local_irq_save(flags);
 	c = get_cpu_slab(s, smp_processor_id());
 	debug_check_no_locks_freed(object, c->objsize);
@@ -2557,13 +2568,16 @@
 	if (gfp_flags & SLUB_DMA)
 		flags = SLAB_CACHE_DMA;
 
-	down_write(&slub_lock);
+	/*
+	 * This function is called with IRQs disabled during early-boot on
+	 * single CPU so there's no need to take slub_lock here.
+	 */
 	if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
 								flags, NULL))
 		goto panic;
 
 	list_add(&s->list, &slab_caches);
-	up_write(&slub_lock);
+
 	if (sysfs_slab_add(s))
 		goto panic;
 	return s;
@@ -3021,7 +3035,7 @@
 	 * kmem_cache_open for slab_state == DOWN.
 	 */
 	create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node",
-		sizeof(struct kmem_cache_node), GFP_KERNEL);
+		sizeof(struct kmem_cache_node), GFP_NOWAIT);
 	kmalloc_caches[0].refcount = -1;
 	caches++;
 
@@ -3034,16 +3048,16 @@
 	/* Caches that are not of the two-to-the-power-of size */
 	if (KMALLOC_MIN_SIZE <= 64) {
 		create_kmalloc_cache(&kmalloc_caches[1],
-				"kmalloc-96", 96, GFP_KERNEL);
+				"kmalloc-96", 96, GFP_NOWAIT);
 		caches++;
 		create_kmalloc_cache(&kmalloc_caches[2],
-				"kmalloc-192", 192, GFP_KERNEL);
+				"kmalloc-192", 192, GFP_NOWAIT);
 		caches++;
 	}
 
 	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
 		create_kmalloc_cache(&kmalloc_caches[i],
-			"kmalloc", 1 << i, GFP_KERNEL);
+			"kmalloc", 1 << i, GFP_NOWAIT);
 		caches++;
 	}
 
@@ -3080,7 +3094,7 @@
 	/* Provide the correct kmalloc names now that the caches are up */
 	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++)
 		kmalloc_caches[i]. name =
-			kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
+			kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
 
 #ifdef CONFIG_SMP
 	register_cpu_notifier(&slab_notifier);
@@ -3098,6 +3112,14 @@
 		nr_cpu_ids, nr_node_ids);
 }
 
+void __init kmem_cache_init_late(void)
+{
+	/*
+	 * Interrupts are enabled now so all GFP allocations are safe.
+	 */
+	slab_gfp_mask = __GFP_BITS_MASK;
+}
+
 /*
  * Find a mergeable slab cache
  */
diff --git a/mm/util.c b/mm/util.c
index 55bef16..abc65aa 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -4,9 +4,11 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/sched.h>
-#include <linux/tracepoint.h>
 #include <asm/uaccess.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/kmem.h>
+
 /**
  * kstrdup - allocate space for and copy an existing string
  * @s: the string to duplicate
@@ -255,13 +257,6 @@
 EXPORT_SYMBOL_GPL(get_user_pages_fast);
 
 /* Tracepoints definitions. */
-DEFINE_TRACE(kmalloc);
-DEFINE_TRACE(kmem_cache_alloc);
-DEFINE_TRACE(kmalloc_node);
-DEFINE_TRACE(kmem_cache_alloc_node);
-DEFINE_TRACE(kfree);
-DEFINE_TRACE(kmem_cache_free);
-
 EXPORT_TRACEPOINT_SYMBOL(kmalloc);
 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
 EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 083716e..f8189a4 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -23,8 +23,8 @@
 #include <linux/rbtree.h>
 #include <linux/radix-tree.h>
 #include <linux/rcupdate.h>
-#include <linux/bootmem.h>
 #include <linux/pfn.h>
+#include <linux/kmemleak.h>
 
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
@@ -1032,7 +1032,7 @@
 
 	/* Import existing vmlist entries. */
 	for (tmp = vmlist; tmp; tmp = tmp->next) {
-		va = alloc_bootmem(sizeof(struct vmap_area));
+		va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
 		va->flags = tmp->flags | VM_VM_AREA;
 		va->va_start = (unsigned long)tmp->addr;
 		va->va_end = va->va_start + tmp->size;
@@ -1327,6 +1327,9 @@
 void vfree(const void *addr)
 {
 	BUG_ON(in_interrupt());
+
+	kmemleak_free(addr);
+
 	__vunmap(addr, 1);
 }
 EXPORT_SYMBOL(vfree);
@@ -1439,8 +1442,17 @@
 
 void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
 {
-	return __vmalloc_area_node(area, gfp_mask, prot, -1,
-					__builtin_return_address(0));
+	void *addr = __vmalloc_area_node(area, gfp_mask, prot, -1,
+					 __builtin_return_address(0));
+
+	/*
+	 * A ref_count = 3 is needed because the vm_struct and vmap_area
+	 * structures allocated in the __get_vm_area_node() function contain
+	 * references to the virtual address of the vmalloc'ed block.
+	 */
+	kmemleak_alloc(addr, area->size - PAGE_SIZE, 3, gfp_mask);
+
+	return addr;
 }
 
 /**
@@ -1459,6 +1471,8 @@
 						int node, void *caller)
 {
 	struct vm_struct *area;
+	void *addr;
+	unsigned long real_size = size;
 
 	size = PAGE_ALIGN(size);
 	if (!size || (size >> PAGE_SHIFT) > num_physpages)
@@ -1470,7 +1484,16 @@
 	if (!area)
 		return NULL;
 
-	return __vmalloc_area_node(area, gfp_mask, prot, node, caller);
+	addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
+
+	/*
+	 * A ref_count = 3 is needed because the vm_struct and vmap_area
+	 * structures allocated in the __get_vm_area_node() function contain
+	 * references to the virtual address of the vmalloc'ed block.
+	 */
+	kmemleak_alloc(addr, real_size, 3, gfp_mask);
+
+	return addr;
 }
 
 void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d254306..95c08a8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2056,7 +2056,7 @@
 		+ global_page_state(NR_INACTIVE_FILE);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_HIBERNATION
 /*
  * Helper function for shrink_all_memory().  Tries to reclaim 'nr_pages' pages
  * from LRU lists system-wide, for given pass and priority.
@@ -2196,7 +2196,7 @@
 
 	return sc.nr_reclaimed;
 }
-#endif
+#endif /* CONFIG_HIBERNATION */
 
 /* It's optimal to keep kswapds on the same CPUs as their memory, but
    not required for correctness.  So if the last cpu in a node goes
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index bb8579a..a49484e 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -246,7 +246,7 @@
 	chan->vdev = vdev;
 
 	/* We expect one virtqueue, for requests. */
-	chan->vq = vdev->config->find_vq(vdev, 0, req_done);
+	chan->vq = virtio_find_single_vq(vdev, req_done, "requests");
 	if (IS_ERR(chan->vq)) {
 		err = PTR_ERR(chan->vq);
 		goto out_free_vq;
@@ -261,7 +261,7 @@
 	return 0;
 
 out_free_vq:
-	vdev->config->del_vq(chan->vq);
+	vdev->config->del_vqs(vdev);
 fail:
 	mutex_lock(&virtio_9p_lock);
 	chan_index--;
@@ -332,7 +332,7 @@
 	BUG_ON(chan->inuse);
 
 	if (chan->initialized) {
-		vdev->config->del_vq(chan->vq);
+		vdev->config->del_vqs(vdev);
 		chan->initialized = false;
 	}
 }
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 9fd0dc3..b75b6ce 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -23,7 +23,7 @@
 #include <linux/bitops.h>
 #include <net/genetlink.h>
 
-#include <trace/skb.h>
+#include <trace/events/skb.h>
 
 #include <asm/unaligned.h>
 
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index c8fb456..499a67e 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -19,11 +19,11 @@
 #include <linux/workqueue.h>
 #include <linux/netlink.h>
 #include <linux/net_dropmon.h>
-#include <trace/skb.h>
 
 #include <asm/unaligned.h>
 #include <asm/bitops.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/skb.h>
 
-DEFINE_TRACE(kfree_skb);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e505b53..c2e4fb8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -65,7 +65,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
-#include <trace/skb.h>
+#include <trace/events/skb.h>
 
 #include "kmap_skb.h"
 
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index cc29b44..e5becb9 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -167,6 +167,9 @@
 	struct tcf_exts e;
 	int err;
 
+	if (!tca[TCA_OPTIONS])
+		return -EINVAL;
+
 	if (head == NULL) {
 		if (!handle)
 			return -EINVAL;
diff --git a/samples/Kconfig b/samples/Kconfig
index 4b02f5a..b75d28c 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -19,6 +19,12 @@
 	help
 	  This build tracepoints example modules.
 
+config SAMPLE_TRACE_EVENTS
+	tristate "Build trace_events examples -- loadable modules only"
+	depends on EVENT_TRACING && m
+	help
+	  This build trace event example modules.
+
 config SAMPLE_KOBJECT
 	tristate "Build kobject examples"
 	help
diff --git a/samples/Makefile b/samples/Makefile
index 10eaca8..13e4b47 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,3 @@
 # Makefile for Linux samples code
 
-obj-$(CONFIG_SAMPLES)	+= markers/ kobject/ kprobes/ tracepoints/
+obj-$(CONFIG_SAMPLES)	+= markers/ kobject/ kprobes/ tracepoints/ trace_events/
diff --git a/samples/trace_events/Makefile b/samples/trace_events/Makefile
new file mode 100644
index 0000000..0d428dc
--- /dev/null
+++ b/samples/trace_events/Makefile
@@ -0,0 +1,6 @@
+# builds the trace events example kernel modules;
+# then to use one (as root):  insmod <module_name.ko>
+
+CFLAGS_trace-events-sample.o := -I$(src)
+
+obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
new file mode 100644
index 0000000..aabc4e9
--- /dev/null
+++ b/samples/trace_events/trace-events-sample.c
@@ -0,0 +1,52 @@
+#include <linux/module.h>
+#include <linux/kthread.h>
+
+/*
+ * Any file that uses trace points, must include the header.
+ * But only one file, must include the header by defining
+ * CREATE_TRACE_POINTS first.  This will make the C code that
+ * creates the handles for the trace points.
+ */
+#define CREATE_TRACE_POINTS
+#include "trace-events-sample.h"
+
+
+static void simple_thread_func(int cnt)
+{
+	set_current_state(TASK_INTERRUPTIBLE);
+	schedule_timeout(HZ);
+	trace_foo_bar("hello", cnt);
+}
+
+static int simple_thread(void *arg)
+{
+	int cnt = 0;
+
+	while (!kthread_should_stop())
+		simple_thread_func(cnt++);
+
+	return 0;
+}
+
+static struct task_struct *simple_tsk;
+
+static int __init trace_event_init(void)
+{
+	simple_tsk = kthread_run(simple_thread, NULL, "event-sample");
+	if (IS_ERR(simple_tsk))
+		return -1;
+
+	return 0;
+}
+
+static void __exit trace_event_exit(void)
+{
+	kthread_stop(simple_tsk);
+}
+
+module_init(trace_event_init);
+module_exit(trace_event_exit);
+
+MODULE_AUTHOR("Steven Rostedt");
+MODULE_DESCRIPTION("trace-events-sample");
+MODULE_LICENSE("GPL");
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
new file mode 100644
index 0000000..128a897
--- /dev/null
+++ b/samples/trace_events/trace-events-sample.h
@@ -0,0 +1,129 @@
+/*
+ * Notice that this file is not protected like a normal header.
+ * We also must allow for rereading of this file. The
+ *
+ *  || defined(TRACE_HEADER_MULTI_READ)
+ *
+ * serves this purpose.
+ */
+#if !defined(_TRACE_EVENT_SAMPLE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EVENT_SAMPLE_H
+
+/*
+ * All trace headers should include tracepoint.h, until we finally
+ * make it into a standard header.
+ */
+#include <linux/tracepoint.h>
+
+/*
+ * If TRACE_SYSTEM is defined, that will be the directory created
+ * in the ftrace directory under /debugfs/tracing/events/<system>
+ *
+ * The define_trace.h belowe will also look for a file name of
+ * TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here.
+ *
+ * If you want a different system than file name, you can override
+ * the header name by defining TRACE_INCLUDE_FILE
+ *
+ * If this file was called, goofy.h, then we would define:
+ *
+ * #define TRACE_INCLUDE_FILE goofy
+ *
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sample
+
+/*
+ * The TRACE_EVENT macro is broken up into 5 parts.
+ *
+ * name: name of the trace point. This is also how to enable the tracepoint.
+ *   A function called trace_foo_bar() will be created.
+ *
+ * proto: the prototype of the function trace_foo_bar()
+ *   Here it is trace_foo_bar(char *foo, int bar).
+ *
+ * args:  must match the arguments in the prototype.
+ *    Here it is simply "foo, bar".
+ *
+ * struct:  This defines the way the data will be stored in the ring buffer.
+ *    There are currently two types of elements. __field and __array.
+ *    a __field is broken up into (type, name). Where type can be any
+ *    type but an array.
+ *    For an array. there are three fields. (type, name, size). The
+ *    type of elements in the array, the name of the field and the size
+ *    of the array.
+ *
+ *    __array( char, foo, 10) is the same as saying   char foo[10].
+ *
+ * fast_assign: This is a C like function that is used to store the items
+ *    into the ring buffer.
+ *
+ * printk: This is a way to print out the data in pretty print. This is
+ *    useful if the system crashes and you are logging via a serial line,
+ *    the data can be printed to the console using this "printk" method.
+ *
+ * Note, that for both the assign and the printk, __entry is the handler
+ * to the data structure in the ring buffer, and is defined by the
+ * TP_STRUCT__entry.
+ */
+TRACE_EVENT(foo_bar,
+
+	TP_PROTO(char *foo, int bar),
+
+	TP_ARGS(foo, bar),
+
+	TP_STRUCT__entry(
+		__array(	char,	foo,    10		)
+		__field(	int,	bar			)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->foo, foo, 10);
+		__entry->bar	= bar;
+	),
+
+	TP_printk("foo %s %d", __entry->foo, __entry->bar)
+);
+#endif
+
+/***** NOTICE! The #if protection ends here. *****/
+
+
+/*
+ * There are several ways I could have done this. If I left out the
+ * TRACE_INCLUDE_PATH, then it would default to the kernel source
+ * include/trace/events directory.
+ *
+ * I could specify a path from the define_trace.h file back to this
+ * file.
+ *
+ * #define TRACE_INCLUDE_PATH ../../samples/trace_events
+ *
+ * But I chose to simply make it use the current directory and then in
+ * the Makefile I added:
+ *
+ * CFLAGS_trace-events-sample.o := -I$(PWD)/samples/trace_events/
+ *
+ * This will make sure the current path is part of the include
+ * structure for our file so that we can find it.
+ *
+ * I could have made only the top level directory the include:
+ *
+ * CFLAGS_trace-events-sample.o := -I$(PWD)
+ *
+ * And then let the path to this directory be the TRACE_INCLUDE_PATH:
+ *
+ * #define TRACE_INCLUDE_PATH samples/trace_events
+ *
+ * But then if something defines "samples" or "trace_events" then we
+ * could risk that being converted too, and give us an unexpected
+ * result.
+ */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+/*
+ * TRACE_INCLUDE_FILE is not needed if the filename and TRACE_SYSTEM are equal
+ */
+#define TRACE_INCLUDE_FILE trace-events-sample
+#include <trace/define_trace.h>
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index cba61ca..2b70661 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -188,20 +188,34 @@
 # ---------------------------------------------------------------------------
 
 quiet_cmd_gzip = GZIP    $@
-cmd_gzip = gzip -f -9 < $< > $@
+cmd_gzip = (cat $(filter-out FORCE,$^) | gzip -f -9 > $@) || \
+	(rm -f $@ ; false)
 
 
 # Bzip2
 # ---------------------------------------------------------------------------
 
-# Bzip2 does not include size in file... so we have to fake that
-size_append=$(CONFIG_SHELL) $(srctree)/scripts/bin_size
+# Bzip2 and LZMA do not include size in file... so we have to fake that;
+# append the size as a 32-bit littleendian number as gzip does.
+size_append = echo -ne $(shell						\
+dec_size=0;								\
+for F in $1; do								\
+	fsize=$$(stat -c "%s" $$F);					\
+	dec_size=$$(expr $$dec_size + $$fsize);				\
+done;									\
+printf "%08x" $$dec_size |						\
+	sed 's/\(..\)\(..\)\(..\)\(..\)/\\\\x\4\\\\x\3\\\\x\2\\\\x\1/g'	\
+)
 
-quiet_cmd_bzip2 = BZIP2    $@
-cmd_bzip2 = (bzip2 -9 < $< && $(size_append) $<) > $@ || (rm -f $@ ; false)
+quiet_cmd_bzip2 = BZIP2   $@
+cmd_bzip2 = (cat $(filter-out FORCE,$^) | \
+	bzip2 -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
+	(rm -f $@ ; false)
 
 # Lzma
 # ---------------------------------------------------------------------------
 
 quiet_cmd_lzma = LZMA    $@
-cmd_lzma = (lzma -9 -c $< && $(size_append) $<) >$@ || (rm -f $@ ; false)
+cmd_lzma = (cat $(filter-out FORCE,$^) | \
+	lzma -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
+	(rm -f $@ ; false)
diff --git a/scripts/bin_size b/scripts/bin_size
deleted file mode 100644
index 43e1b36..0000000
--- a/scripts/bin_size
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-
-if [ $# = 0 ] ; then
-   echo Usage: $0 file
-fi
-
-size_dec=`stat -c "%s" $1`
-size_hex_echo_string=`printf "%08x" $size_dec |
-     sed 's/\(..\)\(..\)\(..\)\(..\)/\\\\x\4\\\\x\3\\\\x\2\\\\x\1/g'`
-/bin/echo -ne $size_hex_echo_string
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 3208a3a..acd8c4a 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1828,6 +1828,25 @@
     $state = 0;
 }
 
+sub tracepoint_munge($) {
+	my $file = shift;
+	my $tracepointname = 0;
+	my $tracepointargs = 0;
+
+	if($prototype =~ m/TRACE_EVENT\((.*?),/) {
+		$tracepointname = $1;
+	}
+	if($prototype =~ m/TP_PROTO\((.*?)\)/) {
+		$tracepointargs = $1;
+	}
+	if (($tracepointname eq 0) || ($tracepointargs eq 0)) {
+		print STDERR "Warning(${file}:$.): Unrecognized tracepoint format: \n".
+			     "$prototype\n";
+	} else {
+		$prototype = "static inline void trace_$tracepointname($tracepointargs)";
+	}
+}
+
 sub syscall_munge() {
 	my $void = 0;
 
@@ -1882,6 +1901,9 @@
 	if ($prototype =~ /SYSCALL_DEFINE/) {
 		syscall_munge();
 	}
+	if ($prototype =~ /TRACE_EVENT/) {
+		tracepoint_munge($file);
+	}
 	dump_function($prototype, $file);
 	reset_state();
     }
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index a334428..40e0045 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -641,7 +641,7 @@
 	id->vendor = TO_NATIVE(id->vendor);
 
 	strcpy(alias, "virtio:");
-	ADD(alias, "d", 1, id->device);
+	ADD(alias, "d", id->device != VIRTIO_DEV_ANY_ID, id->device);
 	ADD(alias, "v", id->vendor != VIRTIO_DEV_ANY_ID, id->vendor);
 
 	add_wildcard(alias);
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 409596e..0fae7da 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -26,7 +26,7 @@
 # which will also be the location of that section after final link.
 # e.g.
 #
-#  .section ".text.sched"
+#  .section ".sched.text", "ax"
 #  .globl my_func
 #  my_func:
 #        [...]
@@ -39,7 +39,7 @@
 #        [...]
 #
 # Both relocation offsets for the mcounts in the above example will be
-# offset from .text.sched. If we make another file called tmp.s with:
+# offset from .sched.text. If we make another file called tmp.s with:
 #
 #  .section __mcount_loc
 #  .quad  my_func + 0x5
@@ -51,7 +51,7 @@
 # But this gets hard if my_func is not globl (a static function).
 # In such a case we have:
 #
-#  .section ".text.sched"
+#  .section ".sched.text", "ax"
 #  my_func:
 #        [...]
 #        call mcount  (offset: 0x5)
diff --git a/security/Kconfig b/security/Kconfig
index bb24477..d23c839 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -110,29 +110,9 @@
 
 	  See <http://www.linuxjournal.com/article.php?sid=6279> for
 	  more information about this module.
-	  
+
 	  If you are unsure how to answer this question, answer N.
 
-config SECURITY_DEFAULT_MMAP_MIN_ADDR
-        int "Low address space to protect from user allocation"
-        depends on SECURITY
-        default 0
-        help
-	  This is the portion of low virtual memory which should be protected
-	  from userspace allocation.  Keeping a user from writing to low pages
-	  can help reduce the impact of kernel NULL pointer bugs.
-
-	  For most ia64, ppc64 and x86 users with lots of address space
-	  a value of 65536 is reasonable and should cause no problems.
-	  On arm and other archs it should not be higher than 32768.
-	  Programs which use vm86 functionality would either need additional
-	  permissions from either the LSM or the capabilities module or have
-	  this protection disabled.
-
-	  This value can be changed after boot using the
-	  /proc/sys/vm/mmap_min_addr tunable.
-
-
 source security/selinux/Kconfig
 source security/smack/Kconfig
 source security/tomoyo/Kconfig
diff --git a/security/Makefile b/security/Makefile
index fa77021..c67557c 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -16,6 +16,9 @@
 # Must precede capability.o in order to stack properly.
 obj-$(CONFIG_SECURITY_SELINUX)		+= selinux/built-in.o
 obj-$(CONFIG_SECURITY_SMACK)		+= smack/built-in.o
+ifeq ($(CONFIG_AUDIT),y)
+obj-$(CONFIG_SECURITY_SMACK)		+= lsm_audit.o
+endif
 obj-$(CONFIG_SECURITY_TOMOYO)		+= tomoyo/built-in.o
 obj-$(CONFIG_SECURITY_ROOTPLUG)		+= root_plug.o
 obj-$(CONFIG_CGROUP_DEVICE)		+= device_cgroup.o
diff --git a/security/commoncap.c b/security/commoncap.c
index beac025..48b7e02 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -28,6 +28,28 @@
 #include <linux/prctl.h>
 #include <linux/securebits.h>
 
+/*
+ * If a non-root user executes a setuid-root binary in
+ * !secure(SECURE_NOROOT) mode, then we raise capabilities.
+ * However if fE is also set, then the intent is for only
+ * the file capabilities to be applied, and the setuid-root
+ * bit is left on either to change the uid (plausible) or
+ * to get full privilege on a kernel without file capabilities
+ * support.  So in that case we do not raise capabilities.
+ *
+ * Warn if that happens, once per boot.
+ */
+static void warn_setuid_and_fcaps_mixed(char *fname)
+{
+	static int warned;
+	if (!warned) {
+		printk(KERN_INFO "warning: `%s' has both setuid-root and"
+			" effective capabilities. Therefore not raising all"
+			" capabilities.\n", fname);
+		warned = 1;
+	}
+}
+
 int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
 {
 	NETLINK_CB(skb).eff_cap = current_cap();
@@ -464,6 +486,15 @@
 
 	if (!issecure(SECURE_NOROOT)) {
 		/*
+		 * If the legacy file capability is set, then don't set privs
+		 * for a setuid root binary run by a non-root user.  Do set it
+		 * for a root user just to cause least surprise to an admin.
+		 */
+		if (effective && new->uid != 0 && new->euid == 0) {
+			warn_setuid_and_fcaps_mixed(bprm->filename);
+			goto skip;
+		}
+		/*
 		 * To support inheritance of root-permissions and suid-root
 		 * executables under compatibility mode, we override the
 		 * capability sets for the file.
@@ -478,6 +509,7 @@
 		if (new->euid == 0)
 			effective = true;
 	}
+skip:
 
 	/* Don't let someone trace a set[ug]id/setpcap binary with the revised
 	 * credentials unless they have the appropriate permit
diff --git a/security/inode.c b/security/inode.c
index f3b91bf..f7496c6 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -287,7 +287,7 @@
 {
 	struct dentry *parent;
 
-	if (!dentry)
+	if (!dentry || IS_ERR(dentry))
 		return;
 
 	parent = dentry->d_parent;
diff --git a/security/integrity/ima/ima_audit.c b/security/integrity/ima/ima_audit.c
index 1e082bb..ff513ff 100644
--- a/security/integrity/ima/ima_audit.c
+++ b/security/integrity/ima/ima_audit.c
@@ -22,18 +22,9 @@
 static int __init ima_audit_setup(char *str)
 {
 	unsigned long audit;
-	int rc, result = 0;
-	char *op = "ima_audit";
-	char *cause;
 
-	rc = strict_strtoul(str, 0, &audit);
-	if (rc || audit > 1)
-		result = 1;
-	else
-		ima_audit = audit;
-	cause = ima_audit ? "enabled" : "not_enabled";
-	integrity_audit_msg(AUDIT_INTEGRITY_STATUS, NULL, NULL,
-			    op, cause, result, 0);
+	if (!strict_strtoul(str, 0, &audit))
+		ima_audit = audit ? 1 : 0;
 	return 1;
 }
 __setup("ima_audit=", ima_audit_setup);
@@ -50,23 +41,14 @@
 
 	ab = audit_log_start(current->audit_context, GFP_KERNEL, audit_msgno);
 	audit_log_format(ab, "integrity: pid=%d uid=%u auid=%u ses=%u",
-			 current->pid, current->cred->uid,
+			 current->pid, current_cred()->uid,
 			 audit_get_loginuid(current),
 			 audit_get_sessionid(current));
 	audit_log_task_context(ab);
-	switch (audit_msgno) {
-	case AUDIT_INTEGRITY_DATA:
-	case AUDIT_INTEGRITY_METADATA:
-	case AUDIT_INTEGRITY_PCR:
-	case AUDIT_INTEGRITY_STATUS:
-		audit_log_format(ab, " op=%s cause=%s", op, cause);
-		break;
-	case AUDIT_INTEGRITY_HASH:
-		audit_log_format(ab, " op=%s hash=%s", op, cause);
-		break;
-	default:
-		audit_log_format(ab, " op=%s", op);
-	}
+	audit_log_format(ab, " op=");
+	audit_log_string(ab, op);
+	audit_log_format(ab, " cause=");
+	audit_log_string(ab, cause);
 	audit_log_format(ab, " comm=");
 	audit_log_untrustedstring(ab, current->comm);
 	if (fname) {
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 50d572b..63003a6 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -103,7 +103,7 @@
 	return rc;
 }
 
-static void ima_pcrread(int idx, u8 *pcr)
+static void __init ima_pcrread(int idx, u8 *pcr)
 {
 	if (!ima_used_chip)
 		return;
@@ -115,7 +115,7 @@
 /*
  * Calculate the boot aggregate hash
  */
-int ima_calc_boot_aggregate(char *digest)
+int __init ima_calc_boot_aggregate(char *digest)
 {
 	struct hash_desc desc;
 	struct scatterlist sg;
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index ffbe259..6bfc7ea 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -15,6 +15,7 @@
  *	implemenents security file system for reporting
  *	current measurement list and IMA statistics
  */
+#include <linux/fcntl.h>
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <linux/rculist.h>
@@ -84,8 +85,8 @@
 	 * against concurrent list-extension
 	 */
 	rcu_read_lock();
-	qe = list_entry(rcu_dereference(qe->later.next),
-			struct ima_queue_entry, later);
+	qe = list_entry_rcu(qe->later.next,
+			    struct ima_queue_entry, later);
 	rcu_read_unlock();
 	(*pos)++;
 
@@ -283,6 +284,9 @@
  */
 int ima_open_policy(struct inode * inode, struct file * filp)
 {
+	/* No point in being allowed to open it if you aren't going to write */
+	if (!(filp->f_flags & O_WRONLY))
+		return -EACCES;
 	if (atomic_dec_and_test(&policy_opencount))
 		return 0;
 	return -EBUSY;
@@ -315,7 +319,7 @@
 	.release = ima_release_policy
 };
 
-int ima_fs_init(void)
+int __init ima_fs_init(void)
 {
 	ima_dir = securityfs_create_dir("ima", NULL);
 	if (IS_ERR(ima_dir))
@@ -349,7 +353,7 @@
 		goto out;
 
 	ima_policy = securityfs_create_file("policy",
-					    S_IRUSR | S_IRGRP | S_IWUSR,
+					    S_IWUSR,
 					    ima_dir, NULL,
 					    &ima_measure_policy_ops);
 	if (IS_ERR(ima_policy))
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index ec79f1e..b8dd693 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -196,7 +196,7 @@
 	kref_set(&iint->refcount, 1);
 }
 
-void ima_iintcache_init(void)
+void __init ima_iintcache_init(void)
 {
 	iint_cache =
 	    kmem_cache_create("iint_cache", sizeof(struct ima_iint_cache), 0,
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index 0b0bb8c..a40da7a 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -38,7 +38,7 @@
  * a different value.) Violations add a zero entry to the measurement
  * list and extend the aggregate PCR value with ff...ff's.
  */
-static void ima_add_boot_aggregate(void)
+static void __init ima_add_boot_aggregate(void)
 {
 	struct ima_template_entry *entry;
 	const char *op = "add_boot_aggregate";
@@ -71,7 +71,7 @@
 			    audit_cause, result, 0);
 }
 
-int ima_init(void)
+int __init ima_init(void)
 {
 	u8 pcr_i[IMA_DIGEST_SIZE];
 	int rc;
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index f4e7266..6f61187 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -29,20 +29,8 @@
 char *ima_hash = "sha1";
 static int __init hash_setup(char *str)
 {
-	const char *op = "hash_setup";
-	const char *hash = "sha1";
-	int result = 0;
-	int audit_info = 0;
-
-	if (strncmp(str, "md5", 3) == 0) {
-		hash = "md5";
-		ima_hash = str;
-	} else if (strncmp(str, "sha1", 4) != 0) {
-		hash = "invalid_hash_type";
-		result = 1;
-	}
-	integrity_audit_msg(AUDIT_INTEGRITY_HASH, NULL, NULL, op, hash,
-			    result, audit_info);
+	if (strncmp(str, "md5", 3) == 0)
+		ima_hash = "md5";
 	return 1;
 }
 __setup("ima_hash=", hash_setup);
@@ -128,10 +116,6 @@
 {
 	int rc = 0;
 
-	if (IS_ERR(file)) {
-		pr_info("%s dentry_open failed\n", filename);
-		return rc;
-	}
 	iint->opencount++;
 	iint->readcount++;
 
@@ -141,6 +125,15 @@
 	return rc;
 }
 
+static void ima_update_counts(struct ima_iint_cache *iint, int mask)
+{
+	iint->opencount++;
+	if ((mask & MAY_WRITE) || (mask == 0))
+		iint->writecount++;
+	else if (mask & (MAY_READ | MAY_EXEC))
+		iint->readcount++;
+}
+
 /**
  * ima_path_check - based on policy, collect/store measurement.
  * @path: contains a pointer to the path to be measured
@@ -156,10 +149,10 @@
  *	- Opening a file for read when already open for write,
  * 	  could result in a file measurement error.
  *
- * Return 0 on success, an error code on failure.
- * (Based on the results of appraise_measurement().)
+ * Always return 0 and audit dentry_open failures.
+ * (Return code will be based upon measurement appraisal.)
  */
-int ima_path_check(struct path *path, int mask)
+int ima_path_check(struct path *path, int mask, int update_counts)
 {
 	struct inode *inode = path->dentry->d_inode;
 	struct ima_iint_cache *iint;
@@ -173,11 +166,8 @@
 		return 0;
 
 	mutex_lock(&iint->mutex);
-	iint->opencount++;
-	if ((mask & MAY_WRITE) || (mask == 0))
-		iint->writecount++;
-	else if (mask & (MAY_READ | MAY_EXEC))
-		iint->readcount++;
+	if (update_counts)
+		ima_update_counts(iint, mask);
 
 	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
 	if (rc < 0)
@@ -196,7 +186,19 @@
 		struct dentry *dentry = dget(path->dentry);
 		struct vfsmount *mnt = mntget(path->mnt);
 
-		file = dentry_open(dentry, mnt, O_RDONLY, current->cred);
+		file = dentry_open(dentry, mnt, O_RDONLY | O_LARGEFILE,
+				   current_cred());
+		if (IS_ERR(file)) {
+			int audit_info = 0;
+
+			integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode,
+					    dentry->d_name.name,
+					    "add_measurement",
+					    "dentry_open failed",
+					    1, audit_info);
+			file = NULL;
+			goto out;
+		}
 		rc = get_path_measurement(iint, file, dentry->d_name.name);
 	}
 out:
@@ -206,6 +208,7 @@
 	kref_put(&iint->refcount, iint_free);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ima_path_check);
 
 static int process_measurement(struct file *file, const unsigned char *filename,
 			       int mask, int function)
@@ -234,7 +237,16 @@
 	return rc;
 }
 
-static void opencount_get(struct file *file)
+/*
+ * ima_opens_get - increment file counts
+ *
+ * - for IPC shm and shmat file.
+ * - for nfsd exported files.
+ *
+ * Increment the counts for these files to prevent unnecessary
+ * imbalance messages.
+ */
+void ima_counts_get(struct file *file)
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ima_iint_cache *iint;
@@ -246,8 +258,14 @@
 		return;
 	mutex_lock(&iint->mutex);
 	iint->opencount++;
+	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+		iint->readcount++;
+
+	if (file->f_mode & FMODE_WRITE)
+		iint->writecount++;
 	mutex_unlock(&iint->mutex);
 }
+EXPORT_SYMBOL_GPL(ima_counts_get);
 
 /**
  * ima_file_mmap - based on policy, collect/store measurement.
@@ -272,18 +290,6 @@
 	return 0;
 }
 
-/*
- * ima_shm_check - IPC shm and shmat create/fput a file
- *
- * Maintain the opencount for these files to prevent unnecessary
- * imbalance messages.
- */
-void ima_shm_check(struct file *file)
-{
-	opencount_get(file);
-	return;
-}
-
 /**
  * ima_bprm_check - based on policy, collect/store measurement.
  * @bprm: contains the linux_binprm structure
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index b5291ad..e127839 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -45,24 +45,30 @@
 	} lsm[MAX_LSM_RULES];
 };
 
-/* Without LSM specific knowledge, the default policy can only be
+/*
+ * Without LSM specific knowledge, the default policy can only be
  * written in terms of .action, .func, .mask, .fsmagic, and .uid
  */
+
+/*
+ * The minimum rule set to allow for full TCB coverage.  Measures all files
+ * opened or mmap for exec and everything read by root.  Dangerous because
+ * normal users can easily run the machine out of memory simply building
+ * and running executables.
+ */
 static struct ima_measure_rule_entry default_rules[] = {
-	{.action = DONT_MEASURE,.fsmagic = PROC_SUPER_MAGIC,
-	 .flags = IMA_FSMAGIC},
+	{.action = DONT_MEASURE,.fsmagic = PROC_SUPER_MAGIC,.flags = IMA_FSMAGIC},
 	{.action = DONT_MEASURE,.fsmagic = SYSFS_MAGIC,.flags = IMA_FSMAGIC},
 	{.action = DONT_MEASURE,.fsmagic = DEBUGFS_MAGIC,.flags = IMA_FSMAGIC},
 	{.action = DONT_MEASURE,.fsmagic = TMPFS_MAGIC,.flags = IMA_FSMAGIC},
-	{.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC,
-	 .flags = IMA_FSMAGIC},
-	{.action = DONT_MEASURE,.fsmagic = 0xF97CFF8C,.flags = IMA_FSMAGIC},
+	{.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC,.flags = IMA_FSMAGIC},
+	{.action = DONT_MEASURE,.fsmagic = SELINUX_MAGIC,.flags = IMA_FSMAGIC},
 	{.action = MEASURE,.func = FILE_MMAP,.mask = MAY_EXEC,
 	 .flags = IMA_FUNC | IMA_MASK},
 	{.action = MEASURE,.func = BPRM_CHECK,.mask = MAY_EXEC,
 	 .flags = IMA_FUNC | IMA_MASK},
 	{.action = MEASURE,.func = PATH_CHECK,.mask = MAY_READ,.uid = 0,
-	 .flags = IMA_FUNC | IMA_MASK | IMA_UID}
+	 .flags = IMA_FUNC | IMA_MASK | IMA_UID},
 };
 
 static LIST_HEAD(measure_default_rules);
@@ -71,6 +77,14 @@
 
 static DEFINE_MUTEX(ima_measure_mutex);
 
+static bool ima_use_tcb __initdata;
+static int __init default_policy_setup(char *str)
+{
+	ima_use_tcb = 1;
+	return 1;
+}
+__setup("ima_tcb", default_policy_setup);
+
 /**
  * ima_match_rules - determine whether an inode matches the measure rule.
  * @rule: a pointer to a rule
@@ -96,7 +110,7 @@
 	if ((rule->flags & IMA_UID) && rule->uid != tsk->cred->uid)
 		return false;
 	for (i = 0; i < MAX_LSM_RULES; i++) {
-		int rc;
+		int rc = 0;
 		u32 osid, sid;
 
 		if (!rule->lsm[i].rule)
@@ -109,7 +123,7 @@
 			security_inode_getsecid(inode, &osid);
 			rc = security_filter_rule_match(osid,
 							rule->lsm[i].type,
-							AUDIT_EQUAL,
+							Audit_equal,
 							rule->lsm[i].rule,
 							NULL);
 			break;
@@ -119,7 +133,7 @@
 			security_task_getsecid(tsk, &sid);
 			rc = security_filter_rule_match(sid,
 							rule->lsm[i].type,
-							AUDIT_EQUAL,
+							Audit_equal,
 							rule->lsm[i].rule,
 							NULL);
 		default:
@@ -164,11 +178,17 @@
  * ima_measure points to either the measure_default_rules or the
  * the new measure_policy_rules.
  */
-void ima_init_policy(void)
+void __init ima_init_policy(void)
 {
-	int i;
+	int i, entries;
 
-	for (i = 0; i < ARRAY_SIZE(default_rules); i++)
+	/* if !ima_use_tcb set entries = 0 so we load NO default rules */
+	if (ima_use_tcb)
+		entries = ARRAY_SIZE(default_rules);
+	else
+		entries = 0;
+
+	for (i = 0; i < entries; i++)
 		list_add_tail(&default_rules[i].list, &measure_default_rules);
 	ima_measure = &measure_default_rules;
 }
@@ -227,7 +247,7 @@
 
 	entry->lsm[lsm_rule].type = audit_type;
 	result = security_filter_rule_init(entry->lsm[lsm_rule].type,
-					   AUDIT_EQUAL, args,
+					   Audit_equal, args,
 					   &entry->lsm[lsm_rule].rule);
 	return result;
 }
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
new file mode 100644
index 0000000..94b8684
--- /dev/null
+++ b/security/lsm_audit.c
@@ -0,0 +1,386 @@
+/*
+ * common LSM auditing functions
+ *
+ * Based on code written for SELinux by :
+ *			Stephen Smalley, <sds@epoch.ncsc.mil>
+ * 			James Morris <jmorris@redhat.com>
+ * Author : Etienne Basset, <etienne.basset@ensta.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <net/sock.h>
+#include <linux/un.h>
+#include <net/af_unix.h>
+#include <linux/audit.h>
+#include <linux/ipv6.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/dccp.h>
+#include <linux/sctp.h>
+#include <linux/lsm_audit.h>
+
+/**
+ * ipv4_skb_to_auditdata : fill auditdata from skb
+ * @skb : the skb
+ * @ad : the audit data to fill
+ * @proto : the layer 4 protocol
+ *
+ * return  0 on success
+ */
+int ipv4_skb_to_auditdata(struct sk_buff *skb,
+		struct common_audit_data *ad, u8 *proto)
+{
+	int ret = 0;
+	struct iphdr *ih;
+
+	ih = ip_hdr(skb);
+	if (ih == NULL)
+		return -EINVAL;
+
+	ad->u.net.v4info.saddr = ih->saddr;
+	ad->u.net.v4info.daddr = ih->daddr;
+
+	if (proto)
+		*proto = ih->protocol;
+	/* non initial fragment */
+	if (ntohs(ih->frag_off) & IP_OFFSET)
+		return 0;
+
+	switch (ih->protocol) {
+	case IPPROTO_TCP: {
+		struct tcphdr *th = tcp_hdr(skb);
+		if (th == NULL)
+			break;
+
+		ad->u.net.sport = th->source;
+		ad->u.net.dport = th->dest;
+		break;
+	}
+	case IPPROTO_UDP: {
+		struct udphdr *uh = udp_hdr(skb);
+		if (uh == NULL)
+			break;
+
+		ad->u.net.sport = uh->source;
+		ad->u.net.dport = uh->dest;
+		break;
+	}
+	case IPPROTO_DCCP: {
+		struct dccp_hdr *dh = dccp_hdr(skb);
+		if (dh == NULL)
+			break;
+
+		ad->u.net.sport = dh->dccph_sport;
+		ad->u.net.dport = dh->dccph_dport;
+		break;
+	}
+	case IPPROTO_SCTP: {
+		struct sctphdr *sh = sctp_hdr(skb);
+		if (sh == NULL)
+			break;
+		ad->u.net.sport = sh->source;
+		ad->u.net.dport = sh->dest;
+		break;
+	}
+	default:
+		ret = -EINVAL;
+	}
+	return ret;
+}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * ipv6_skb_to_auditdata : fill auditdata from skb
+ * @skb : the skb
+ * @ad : the audit data to fill
+ * @proto : the layer 4 protocol
+ *
+ * return  0 on success
+ */
+int ipv6_skb_to_auditdata(struct sk_buff *skb,
+		struct common_audit_data *ad, u8 *proto)
+{
+	int offset, ret = 0;
+	struct ipv6hdr *ip6;
+	u8 nexthdr;
+
+	ip6 = ipv6_hdr(skb);
+	if (ip6 == NULL)
+		return -EINVAL;
+	ipv6_addr_copy(&ad->u.net.v6info.saddr, &ip6->saddr);
+	ipv6_addr_copy(&ad->u.net.v6info.daddr, &ip6->daddr);
+	ret = 0;
+	/* IPv6 can have several extension header before the Transport header
+	 * skip them */
+	offset = skb_network_offset(skb);
+	offset += sizeof(*ip6);
+	nexthdr = ip6->nexthdr;
+	offset = ipv6_skip_exthdr(skb, offset, &nexthdr);
+	if (offset < 0)
+		return 0;
+	if (proto)
+		*proto = nexthdr;
+	switch (nexthdr) {
+	case IPPROTO_TCP: {
+		struct tcphdr _tcph, *th;
+
+		th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
+		if (th == NULL)
+			break;
+
+		ad->u.net.sport = th->source;
+		ad->u.net.dport = th->dest;
+		break;
+	}
+	case IPPROTO_UDP: {
+		struct udphdr _udph, *uh;
+
+		uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
+		if (uh == NULL)
+			break;
+
+		ad->u.net.sport = uh->source;
+		ad->u.net.dport = uh->dest;
+		break;
+	}
+	case IPPROTO_DCCP: {
+		struct dccp_hdr _dccph, *dh;
+
+		dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph);
+		if (dh == NULL)
+			break;
+
+		ad->u.net.sport = dh->dccph_sport;
+		ad->u.net.dport = dh->dccph_dport;
+		break;
+	}
+	case IPPROTO_SCTP: {
+		struct sctphdr _sctph, *sh;
+
+		sh = skb_header_pointer(skb, offset, sizeof(_sctph), &_sctph);
+		if (sh == NULL)
+			break;
+		ad->u.net.sport = sh->source;
+		ad->u.net.dport = sh->dest;
+		break;
+	}
+	default:
+		ret = -EINVAL;
+	}
+	return ret;
+}
+#endif
+
+
+static inline void print_ipv6_addr(struct audit_buffer *ab,
+				   struct in6_addr *addr, __be16 port,
+				   char *name1, char *name2)
+{
+	if (!ipv6_addr_any(addr))
+		audit_log_format(ab, " %s=%pI6", name1, addr);
+	if (port)
+		audit_log_format(ab, " %s=%d", name2, ntohs(port));
+}
+
+static inline void print_ipv4_addr(struct audit_buffer *ab, __be32 addr,
+				   __be16 port, char *name1, char *name2)
+{
+	if (addr)
+		audit_log_format(ab, " %s=%pI4", name1, &addr);
+	if (port)
+		audit_log_format(ab, " %s=%d", name2, ntohs(port));
+}
+
+/**
+ * dump_common_audit_data - helper to dump common audit data
+ * @a : common audit data
+ *
+ */
+static void dump_common_audit_data(struct audit_buffer *ab,
+				   struct common_audit_data *a)
+{
+	struct inode *inode = NULL;
+	struct task_struct *tsk = current;
+
+	if (a->tsk)
+		tsk = a->tsk;
+	if (tsk && tsk->pid) {
+		audit_log_format(ab, " pid=%d comm=", tsk->pid);
+		audit_log_untrustedstring(ab, tsk->comm);
+	}
+
+	switch (a->type) {
+	case LSM_AUDIT_DATA_IPC:
+		audit_log_format(ab, " key=%d ", a->u.ipc_id);
+		break;
+	case LSM_AUDIT_DATA_CAP:
+		audit_log_format(ab, " capability=%d ", a->u.cap);
+		break;
+	case LSM_AUDIT_DATA_FS:
+		if (a->u.fs.path.dentry) {
+			struct dentry *dentry = a->u.fs.path.dentry;
+			if (a->u.fs.path.mnt) {
+				audit_log_d_path(ab, "path=", &a->u.fs.path);
+			} else {
+				audit_log_format(ab, " name=");
+				audit_log_untrustedstring(ab,
+						 dentry->d_name.name);
+			}
+			inode = dentry->d_inode;
+		} else if (a->u.fs.inode) {
+			struct dentry *dentry;
+			inode = a->u.fs.inode;
+			dentry = d_find_alias(inode);
+			if (dentry) {
+				audit_log_format(ab, " name=");
+				audit_log_untrustedstring(ab,
+						 dentry->d_name.name);
+				dput(dentry);
+			}
+		}
+		if (inode)
+			audit_log_format(ab, " dev=%s ino=%lu",
+					inode->i_sb->s_id,
+					inode->i_ino);
+		break;
+	case LSM_AUDIT_DATA_TASK:
+		tsk = a->u.tsk;
+		if (tsk && tsk->pid) {
+			audit_log_format(ab, " pid=%d comm=", tsk->pid);
+			audit_log_untrustedstring(ab, tsk->comm);
+		}
+		break;
+	case LSM_AUDIT_DATA_NET:
+		if (a->u.net.sk) {
+			struct sock *sk = a->u.net.sk;
+			struct unix_sock *u;
+			int len = 0;
+			char *p = NULL;
+
+			switch (sk->sk_family) {
+			case AF_INET: {
+				struct inet_sock *inet = inet_sk(sk);
+
+				print_ipv4_addr(ab, inet->rcv_saddr,
+						inet->sport,
+						"laddr", "lport");
+				print_ipv4_addr(ab, inet->daddr,
+						inet->dport,
+						"faddr", "fport");
+				break;
+			}
+			case AF_INET6: {
+				struct inet_sock *inet = inet_sk(sk);
+				struct ipv6_pinfo *inet6 = inet6_sk(sk);
+
+				print_ipv6_addr(ab, &inet6->rcv_saddr,
+						inet->sport,
+						"laddr", "lport");
+				print_ipv6_addr(ab, &inet6->daddr,
+						inet->dport,
+						"faddr", "fport");
+				break;
+			}
+			case AF_UNIX:
+				u = unix_sk(sk);
+				if (u->dentry) {
+					struct path path = {
+						.dentry = u->dentry,
+						.mnt = u->mnt
+					};
+					audit_log_d_path(ab, "path=", &path);
+					break;
+				}
+				if (!u->addr)
+					break;
+				len = u->addr->len-sizeof(short);
+				p = &u->addr->name->sun_path[0];
+				audit_log_format(ab, " path=");
+				if (*p)
+					audit_log_untrustedstring(ab, p);
+				else
+					audit_log_n_hex(ab, p, len);
+				break;
+			}
+		}
+
+		switch (a->u.net.family) {
+		case AF_INET:
+			print_ipv4_addr(ab, a->u.net.v4info.saddr,
+					a->u.net.sport,
+					"saddr", "src");
+			print_ipv4_addr(ab, a->u.net.v4info.daddr,
+					a->u.net.dport,
+					"daddr", "dest");
+			break;
+		case AF_INET6:
+			print_ipv6_addr(ab, &a->u.net.v6info.saddr,
+					a->u.net.sport,
+					"saddr", "src");
+			print_ipv6_addr(ab, &a->u.net.v6info.daddr,
+					a->u.net.dport,
+					"daddr", "dest");
+			break;
+		}
+		if (a->u.net.netif > 0) {
+			struct net_device *dev;
+
+			/* NOTE: we always use init's namespace */
+			dev = dev_get_by_index(&init_net, a->u.net.netif);
+			if (dev) {
+				audit_log_format(ab, " netif=%s", dev->name);
+				dev_put(dev);
+			}
+		}
+		break;
+#ifdef CONFIG_KEYS
+	case LSM_AUDIT_DATA_KEY:
+		audit_log_format(ab, " key_serial=%u", a->u.key_struct.key);
+		if (a->u.key_struct.key_desc) {
+			audit_log_format(ab, " key_desc=");
+			audit_log_untrustedstring(ab, a->u.key_struct.key_desc);
+		}
+		break;
+#endif
+	} /* switch (a->type) */
+}
+
+/**
+ * common_lsm_audit - generic LSM auditing function
+ * @a:  auxiliary audit data
+ *
+ * setup the audit buffer for common security information
+ * uses callback to print LSM specific information
+ */
+void common_lsm_audit(struct common_audit_data *a)
+{
+	struct audit_buffer *ab;
+
+	if (a == NULL)
+		return;
+	/* we use GFP_ATOMIC so we won't sleep */
+	ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC);
+
+	if (ab == NULL)
+		return;
+
+	if (a->lsm_pre_audit)
+		a->lsm_pre_audit(ab, a);
+
+	dump_common_audit_data(ab, a);
+
+	if (a->lsm_post_audit)
+		a->lsm_post_audit(ab, a);
+
+	audit_log_end(ab);
+}
diff --git a/security/root_plug.c b/security/root_plug.c
index 40fb4f1..2f7ffa6 100644
--- a/security/root_plug.c
+++ b/security/root_plug.c
@@ -71,18 +71,6 @@
 }
 
 static struct security_operations rootplug_security_ops = {
-	/* Use the capability functions for some of the hooks */
-	.ptrace_may_access =		cap_ptrace_may_access,
-	.ptrace_traceme =		cap_ptrace_traceme,
-	.capget =			cap_capget,
-	.capset =			cap_capset,
-	.capable =			cap_capable,
-
-	.bprm_set_creds =		cap_bprm_set_creds,
-
-	.task_fix_setuid =		cap_task_fix_setuid,
-	.task_prctl =			cap_task_prctl,
-
 	.bprm_check_security =		rootplug_bprm_check_security,
 };
 
diff --git a/security/security.c b/security/security.c
index 5284255..dc7674f 100644
--- a/security/security.c
+++ b/security/security.c
@@ -26,9 +26,6 @@
 
 struct security_operations *security_ops;	/* Initialized to NULL */
 
-/* amount of vm to protect from userspace access */
-unsigned long mmap_min_addr = CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR;
-
 static inline int verify(struct security_operations *ops)
 {
 	/* verify the security_operations structure exists */
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 7f9b5fa..b2ab608 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -927,7 +927,7 @@
 	if (denied) {
 		if (flags & AVC_STRICT)
 			rc = -EACCES;
-		else if (!selinux_enforcing || security_permissive_sid(ssid))
+		else if (!selinux_enforcing || (avd->flags & AVD_FLAGS_PERMISSIVE))
 			avc_update_node(AVC_CALLBACK_GRANT, requested, ssid,
 					tsid, tclass, avd->seqno);
 		else
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 2fcad7c..195906b 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1980,10 +1980,6 @@
 	u32 tsid, sid;
 	int rc;
 
-	rc = secondary_ops->sysctl(table, op);
-	if (rc)
-		return rc;
-
 	sid = current_sid();
 
 	rc = selinux_sysctl_get_sid(table, (op == 0001) ?
@@ -2375,10 +2371,8 @@
 {
 	const struct task_security_struct *tsec = current_security();
 	struct itimerval itimer;
-	struct sighand_struct *psig;
 	u32 osid, sid;
 	int rc, i;
-	unsigned long flags;
 
 	osid = tsec->osid;
 	sid = tsec->sid;
@@ -2398,22 +2392,20 @@
 		memset(&itimer, 0, sizeof itimer);
 		for (i = 0; i < 3; i++)
 			do_setitimer(i, &itimer, NULL);
-		flush_signals(current);
 		spin_lock_irq(&current->sighand->siglock);
-		flush_signal_handlers(current, 1);
-		sigemptyset(&current->blocked);
-		recalc_sigpending();
+		if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
+			__flush_signals(current);
+			flush_signal_handlers(current, 1);
+			sigemptyset(&current->blocked);
+		}
 		spin_unlock_irq(&current->sighand->siglock);
 	}
 
 	/* Wake up the parent if it is waiting so that it can recheck
 	 * wait permission to the new task SID. */
-	read_lock_irq(&tasklist_lock);
-	psig = current->parent->sighand;
-	spin_lock_irqsave(&psig->siglock, flags);
-	wake_up_interruptible(&current->parent->signal->wait_chldexit);
-	spin_unlock_irqrestore(&psig->siglock, flags);
-	read_unlock_irq(&tasklist_lock);
+	read_lock(&tasklist_lock);
+	wake_up_interruptible(&current->real_parent->signal->wait_chldexit);
+	read_unlock(&tasklist_lock);
 }
 
 /* superblock security operations */
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 5c3434f..ca83579 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -8,14 +8,13 @@
 #ifndef _SELINUX_SECURITY_H_
 #define _SELINUX_SECURITY_H_
 
+#include <linux/magic.h>
 #include "flask.h"
 
 #define SECSID_NULL			0x00000000 /* unspecified SID */
 #define SECSID_WILD			0xffffffff /* wildcard SID */
 #define SECCLASS_NULL			0x0000 /* no class */
 
-#define SELINUX_MAGIC 0xf97cff8c
-
 /* Identify specific policy version changes */
 #define POLICYDB_VERSION_BASE		15
 #define POLICYDB_VERSION_BOOL		16
@@ -91,9 +90,11 @@
 	u32 auditallow;
 	u32 auditdeny;
 	u32 seqno;
+	u32 flags;
 };
 
-int security_permissive_sid(u32 sid);
+/* definitions of av_decision.flags */
+#define AVD_FLAGS_PERMISSIVE	0x0001
 
 int security_compute_av(u32 ssid, u32 tsid,
 	u16 tclass, u32 requested,
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index c6875fd..dd7cc6d 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -112,6 +112,8 @@
 	{ AUDIT_DEL_RULE,	NETLINK_AUDIT_SOCKET__NLMSG_WRITE    },
 	{ AUDIT_USER,		NETLINK_AUDIT_SOCKET__NLMSG_RELAY    },
 	{ AUDIT_SIGNAL_INFO,	NETLINK_AUDIT_SOCKET__NLMSG_READ     },
+	{ AUDIT_TRIM,		NETLINK_AUDIT_SOCKET__NLMSG_WRITE    },
+	{ AUDIT_MAKE_EQUIV,	NETLINK_AUDIT_SOCKET__NLMSG_WRITE    },
 	{ AUDIT_TTY_GET,	NETLINK_AUDIT_SOCKET__NLMSG_READ     },
 	{ AUDIT_TTY_SET,	NETLINK_AUDIT_SOCKET__NLMSG_TTY_AUDIT	},
 };
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 2d5136e..b4fc506 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -527,10 +527,10 @@
 		goto out2;
 
 	length = scnprintf(buf, SIMPLE_TRANSACTION_LIMIT,
-			  "%x %x %x %x %u",
+			  "%x %x %x %x %u %x",
 			  avd.allowed, 0xffffffff,
 			  avd.auditallow, avd.auditdeny,
-			  avd.seqno);
+			  avd.seqno, avd.flags);
 out2:
 	kfree(tcon);
 out:
@@ -803,10 +803,6 @@
 		goto out;
 	}
 
-	if (count > PAGE_SIZE) {
-		ret = -EINVAL;
-		goto out;
-	}
 	page = (char *)get_zeroed_page(GFP_KERNEL);
 	if (!page) {
 		ret = -ENOMEM;
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index deeec6c..500e6f7 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -410,6 +410,7 @@
 	avd->auditallow = 0;
 	avd->auditdeny = 0xffffffff;
 	avd->seqno = latest_granting;
+	avd->flags = 0;
 
 	/*
 	 * Check for all the invalid cases.
@@ -528,31 +529,6 @@
 	return 0;
 }
 
-/*
- * Given a sid find if the type has the permissive flag set
- */
-int security_permissive_sid(u32 sid)
-{
-	struct context *context;
-	u32 type;
-	int rc;
-
-	read_lock(&policy_rwlock);
-
-	context = sidtab_search(&sidtab, sid);
-	BUG_ON(!context);
-
-	type = context->type;
-	/*
-	 * we are intentionally using type here, not type-1, the 0th bit may
-	 * someday indicate that we are globally setting permissive in policy.
-	 */
-	rc = ebitmap_get_bit(&policydb.permissive_map, type);
-
-	read_unlock(&policy_rwlock);
-	return rc;
-}
-
 static int security_validtrans_handle_fail(struct context *ocontext,
 					   struct context *ncontext,
 					   struct context *tcontext,
@@ -767,6 +743,10 @@
 
 	rc = context_struct_compute_av(scontext, tcontext, tclass,
 				       requested, avd);
+
+	/* permissive domain? */
+	if (ebitmap_get_bit(&policydb.permissive_map, scontext->type))
+	    avd->flags |= AVD_FLAGS_PERMISSIVE;
 out:
 	read_unlock(&policy_rwlock);
 	return rc;
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 42ef313..243bec1 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -20,6 +20,7 @@
 #include <net/netlabel.h>
 #include <linux/list.h>
 #include <linux/rculist.h>
+#include <linux/lsm_audit.h>
 
 /*
  * Why 23? CIPSO is constrained to 30, so a 32 byte buffer is
@@ -179,6 +180,20 @@
 #define MAY_NOT		0
 
 /*
+ * Number of access types used by Smack (rwxa)
+ */
+#define SMK_NUM_ACCESS_TYPE 4
+
+/*
+ * Smack audit data; is empty if CONFIG_AUDIT not set
+ * to save some stack
+ */
+struct smk_audit_info {
+#ifdef CONFIG_AUDIT
+	struct common_audit_data a;
+#endif
+};
+/*
  * These functions are in smack_lsm.c
  */
 struct inode_smack *new_inode_smack(char *);
@@ -186,8 +201,8 @@
 /*
  * These functions are in smack_access.c
  */
-int smk_access(char *, char *, int);
-int smk_curacc(char *, u32);
+int smk_access(char *, char *, int, struct smk_audit_info *);
+int smk_curacc(char *, u32, struct smk_audit_info *);
 int smack_to_cipso(const char *, struct smack_cipso *);
 void smack_from_cipso(u32, char *, char *);
 char *smack_from_secid(const u32);
@@ -237,4 +252,93 @@
 	return sip->smk_inode;
 }
 
+/*
+ * logging functions
+ */
+#define SMACK_AUDIT_DENIED 0x1
+#define SMACK_AUDIT_ACCEPT 0x2
+extern int log_policy;
+
+void smack_log(char *subject_label, char *object_label,
+		int request,
+		int result, struct smk_audit_info *auditdata);
+
+#ifdef CONFIG_AUDIT
+
+/*
+ * some inline functions to set up audit data
+ * they do nothing if CONFIG_AUDIT is not set
+ *
+ */
+static inline void smk_ad_init(struct smk_audit_info *a, const char *func,
+			       char type)
+{
+	memset(a, 0, sizeof(*a));
+	a->a.type = type;
+	a->a.function = func;
+}
+
+static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a,
+					 struct task_struct *t)
+{
+	a->a.u.tsk = t;
+}
+static inline void smk_ad_setfield_u_fs_path_dentry(struct smk_audit_info *a,
+						    struct dentry *d)
+{
+	a->a.u.fs.path.dentry = d;
+}
+static inline void smk_ad_setfield_u_fs_path_mnt(struct smk_audit_info *a,
+						 struct vfsmount *m)
+{
+	a->a.u.fs.path.mnt = m;
+}
+static inline void smk_ad_setfield_u_fs_inode(struct smk_audit_info *a,
+					      struct inode *i)
+{
+	a->a.u.fs.inode = i;
+}
+static inline void smk_ad_setfield_u_fs_path(struct smk_audit_info *a,
+					     struct path p)
+{
+	a->a.u.fs.path = p;
+}
+static inline void smk_ad_setfield_u_net_sk(struct smk_audit_info *a,
+					    struct sock *sk)
+{
+	a->a.u.net.sk = sk;
+}
+
+#else /* no AUDIT */
+
+static inline void smk_ad_init(struct smk_audit_info *a, const char *func,
+			       char type)
+{
+}
+static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a,
+					 struct task_struct *t)
+{
+}
+static inline void smk_ad_setfield_u_fs_path_dentry(struct smk_audit_info *a,
+						    struct dentry *d)
+{
+}
+static inline void smk_ad_setfield_u_fs_path_mnt(struct smk_audit_info *a,
+						 struct vfsmount *m)
+{
+}
+static inline void smk_ad_setfield_u_fs_inode(struct smk_audit_info *a,
+					      struct inode *i)
+{
+}
+static inline void smk_ad_setfield_u_fs_path(struct smk_audit_info *a,
+					     struct path p)
+{
+}
+static inline void smk_ad_setfield_u_net_sk(struct smk_audit_info *a,
+					    struct sock *sk)
+{
+}
+#endif
+
 #endif  /* _SECURITY_SMACK_H */
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index ac0a270..513dc1a 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -59,11 +59,18 @@
  */
 static u32 smack_next_secid = 10;
 
+/*
+ * what events do we log
+ * can be overwritten at run-time by /smack/logging
+ */
+int log_policy = SMACK_AUDIT_DENIED;
+
 /**
  * smk_access - determine if a subject has a specific access to an object
  * @subject_label: a pointer to the subject's Smack label
  * @object_label: a pointer to the object's Smack label
  * @request: the access requested, in "MAY" format
+ * @a : a pointer to the audit data
  *
  * This function looks up the subject/object pair in the
  * access rule list and returns 0 if the access is permitted,
@@ -78,10 +85,12 @@
  * will be on the list, so checking the pointers may be a worthwhile
  * optimization.
  */
-int smk_access(char *subject_label, char *object_label, int request)
+int smk_access(char *subject_label, char *object_label, int request,
+	       struct smk_audit_info *a)
 {
 	u32 may = MAY_NOT;
 	struct smack_rule *srp;
+	int rc = 0;
 
 	/*
 	 * Hardcoded comparisons.
@@ -89,8 +98,10 @@
 	 * A star subject can't access any object.
 	 */
 	if (subject_label == smack_known_star.smk_known ||
-	    strcmp(subject_label, smack_known_star.smk_known) == 0)
-		return -EACCES;
+	    strcmp(subject_label, smack_known_star.smk_known) == 0) {
+		rc = -EACCES;
+		goto out_audit;
+	}
 	/*
 	 * An internet object can be accessed by any subject.
 	 * Tasks cannot be assigned the internet label.
@@ -100,20 +111,20 @@
 	    subject_label == smack_known_web.smk_known ||
 	    strcmp(object_label, smack_known_web.smk_known) == 0 ||
 	    strcmp(subject_label, smack_known_web.smk_known) == 0)
-		return 0;
+		goto out_audit;
 	/*
 	 * A star object can be accessed by any subject.
 	 */
 	if (object_label == smack_known_star.smk_known ||
 	    strcmp(object_label, smack_known_star.smk_known) == 0)
-		return 0;
+		goto out_audit;
 	/*
 	 * An object can be accessed in any way by a subject
 	 * with the same label.
 	 */
 	if (subject_label == object_label ||
 	    strcmp(subject_label, object_label) == 0)
-		return 0;
+		goto out_audit;
 	/*
 	 * A hat subject can read any object.
 	 * A floor object can be read by any subject.
@@ -121,10 +132,10 @@
 	if ((request & MAY_ANYREAD) == request) {
 		if (object_label == smack_known_floor.smk_known ||
 		    strcmp(object_label, smack_known_floor.smk_known) == 0)
-			return 0;
+			goto out_audit;
 		if (subject_label == smack_known_hat.smk_known ||
 		    strcmp(subject_label, smack_known_hat.smk_known) == 0)
-			return 0;
+			goto out_audit;
 	}
 	/*
 	 * Beyond here an explicit relationship is required.
@@ -148,28 +159,36 @@
 	 * This is a bit map operation.
 	 */
 	if ((request & may) == request)
-		return 0;
+		goto out_audit;
 
-	return -EACCES;
+	rc = -EACCES;
+out_audit:
+#ifdef CONFIG_AUDIT
+	if (a)
+		smack_log(subject_label, object_label, request, rc, a);
+#endif
+	return rc;
 }
 
 /**
  * smk_curacc - determine if current has a specific access to an object
  * @obj_label: a pointer to the object's Smack label
  * @mode: the access requested, in "MAY" format
+ * @a : common audit data
  *
  * This function checks the current subject label/object label pair
  * in the access rule list and returns 0 if the access is permitted,
  * non zero otherwise. It allows that current may have the capability
  * to override the rules.
  */
-int smk_curacc(char *obj_label, u32 mode)
+int smk_curacc(char *obj_label, u32 mode, struct smk_audit_info *a)
 {
 	int rc;
+	char *sp = current_security();
 
-	rc = smk_access(current_security(), obj_label, mode);
+	rc = smk_access(sp, obj_label, mode, NULL);
 	if (rc == 0)
-		return 0;
+		goto out_audit;
 
 	/*
 	 * Return if a specific label has been designated as the
@@ -177,14 +196,105 @@
 	 * have that label.
 	 */
 	if (smack_onlycap != NULL && smack_onlycap != current->cred->security)
-		return rc;
+		goto out_audit;
 
 	if (capable(CAP_MAC_OVERRIDE))
 		return 0;
 
+out_audit:
+#ifdef CONFIG_AUDIT
+	if (a)
+		smack_log(sp, obj_label, mode, rc, a);
+#endif
 	return rc;
 }
 
+#ifdef CONFIG_AUDIT
+/**
+ * smack_str_from_perm : helper to transalate an int to a
+ * readable string
+ * @string : the string to fill
+ * @access : the int
+ *
+ */
+static inline void smack_str_from_perm(char *string, int access)
+{
+	int i = 0;
+	if (access & MAY_READ)
+		string[i++] = 'r';
+	if (access & MAY_WRITE)
+		string[i++] = 'w';
+	if (access & MAY_EXEC)
+		string[i++] = 'x';
+	if (access & MAY_APPEND)
+		string[i++] = 'a';
+	string[i] = '\0';
+}
+/**
+ * smack_log_callback - SMACK specific information
+ * will be called by generic audit code
+ * @ab : the audit_buffer
+ * @a  : audit_data
+ *
+ */
+static void smack_log_callback(struct audit_buffer *ab, void *a)
+{
+	struct common_audit_data *ad = a;
+	struct smack_audit_data *sad = &ad->lsm_priv.smack_audit_data;
+	audit_log_format(ab, "lsm=SMACK fn=%s action=%s", ad->function,
+			 sad->result ? "denied" : "granted");
+	audit_log_format(ab, " subject=");
+	audit_log_untrustedstring(ab, sad->subject);
+	audit_log_format(ab, " object=");
+	audit_log_untrustedstring(ab, sad->object);
+	audit_log_format(ab, " requested=%s", sad->request);
+}
+
+/**
+ *  smack_log - Audit the granting or denial of permissions.
+ *  @subject_label : smack label of the requester
+ *  @object_label  : smack label of the object being accessed
+ *  @request: requested permissions
+ *  @result: result from smk_access
+ *  @a:  auxiliary audit data
+ *
+ * Audit the granting or denial of permissions in accordance
+ * with the policy.
+ */
+void smack_log(char *subject_label, char *object_label, int request,
+	       int result, struct smk_audit_info *ad)
+{
+	char request_buffer[SMK_NUM_ACCESS_TYPE + 1];
+	struct smack_audit_data *sad;
+	struct common_audit_data *a = &ad->a;
+
+	/* check if we have to log the current event */
+	if (result != 0 && (log_policy & SMACK_AUDIT_DENIED) == 0)
+		return;
+	if (result == 0 && (log_policy & SMACK_AUDIT_ACCEPT) == 0)
+		return;
+
+	if (a->function == NULL)
+		a->function = "unknown";
+
+	/* end preparing the audit data */
+	sad = &a->lsm_priv.smack_audit_data;
+	smack_str_from_perm(request_buffer, request);
+	sad->subject = subject_label;
+	sad->object  = object_label;
+	sad->request = request_buffer;
+	sad->result  = result;
+	a->lsm_pre_audit = smack_log_callback;
+
+	common_lsm_audit(a);
+}
+#else /* #ifdef CONFIG_AUDIT */
+void smack_log(char *subject_label, char *object_label, int request,
+               int result, struct smk_audit_info *ad)
+{
+}
+#endif
+
 static DEFINE_MUTEX(smack_known_lock);
 
 /**
@@ -209,7 +319,8 @@
 		if (found)
 			smack[i] = '\0';
 		else if (i >= len || string[i] > '~' || string[i] <= ' ' ||
-			 string[i] == '/') {
+			 string[i] == '/' || string[i] == '"' ||
+			 string[i] == '\\' || string[i] == '\'') {
 			smack[i] = '\0';
 			found = 1;
 		} else
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 98b3195..0023182 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -30,7 +30,6 @@
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
 #include <linux/audit.h>
-
 #include "smack.h"
 
 #define task_security(task)	(task_cred_xxx((task), security))
@@ -103,14 +102,24 @@
 static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode)
 {
 	int rc;
+	struct smk_audit_info ad;
+	char *sp, *tsp;
 
 	rc = cap_ptrace_may_access(ctp, mode);
 	if (rc != 0)
 		return rc;
 
-	rc = smk_access(current_security(), task_security(ctp), MAY_READWRITE);
+	sp = current_security();
+	tsp = task_security(ctp);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
+	smk_ad_setfield_u_tsk(&ad, ctp);
+
+	/* we won't log here, because rc can be overriden */
+	rc = smk_access(sp, tsp, MAY_READWRITE, NULL);
 	if (rc != 0 && capable(CAP_MAC_OVERRIDE))
-		return 0;
+		rc = 0;
+
+	smack_log(sp, tsp, MAY_READWRITE, rc, &ad);
 	return rc;
 }
 
@@ -125,14 +134,24 @@
 static int smack_ptrace_traceme(struct task_struct *ptp)
 {
 	int rc;
+	struct smk_audit_info ad;
+	char *sp, *tsp;
 
 	rc = cap_ptrace_traceme(ptp);
 	if (rc != 0)
 		return rc;
 
-	rc = smk_access(task_security(ptp), current_security(), MAY_READWRITE);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
+	smk_ad_setfield_u_tsk(&ad, ptp);
+
+	sp = current_security();
+	tsp = task_security(ptp);
+	/* we won't log here, because rc can be overriden */
+	rc = smk_access(tsp, sp, MAY_READWRITE, NULL);
 	if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE))
-		return 0;
+		rc = 0;
+
+	smack_log(tsp, sp, MAY_READWRITE, rc, &ad);
 	return rc;
 }
 
@@ -327,8 +346,14 @@
 static int smack_sb_statfs(struct dentry *dentry)
 {
 	struct superblock_smack *sbp = dentry->d_sb->s_security;
+	int rc;
+	struct smk_audit_info ad;
 
-	return smk_curacc(sbp->smk_floor, MAY_READ);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
+
+	rc = smk_curacc(sbp->smk_floor, MAY_READ, &ad);
+	return rc;
 }
 
 /**
@@ -346,8 +371,12 @@
 			  char *type, unsigned long flags, void *data)
 {
 	struct superblock_smack *sbp = path->mnt->mnt_sb->s_security;
+	struct smk_audit_info ad;
 
-	return smk_curacc(sbp->smk_floor, MAY_WRITE);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path(&ad, *path);
+
+	return smk_curacc(sbp->smk_floor, MAY_WRITE, &ad);
 }
 
 /**
@@ -361,10 +390,14 @@
 static int smack_sb_umount(struct vfsmount *mnt, int flags)
 {
 	struct superblock_smack *sbp;
+	struct smk_audit_info ad;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, mnt->mnt_mountpoint);
+	smk_ad_setfield_u_fs_path_mnt(&ad, mnt);
 
 	sbp = mnt->mnt_sb->s_security;
-
-	return smk_curacc(sbp->smk_floor, MAY_WRITE);
+	return smk_curacc(sbp->smk_floor, MAY_WRITE, &ad);
 }
 
 /*
@@ -441,15 +474,20 @@
 static int smack_inode_link(struct dentry *old_dentry, struct inode *dir,
 			    struct dentry *new_dentry)
 {
-	int rc;
 	char *isp;
+	struct smk_audit_info ad;
+	int rc;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry);
 
 	isp = smk_of_inode(old_dentry->d_inode);
-	rc = smk_curacc(isp, MAY_WRITE);
+	rc = smk_curacc(isp, MAY_WRITE, &ad);
 
 	if (rc == 0 && new_dentry->d_inode != NULL) {
 		isp = smk_of_inode(new_dentry->d_inode);
-		rc = smk_curacc(isp, MAY_WRITE);
+		smk_ad_setfield_u_fs_path_dentry(&ad, new_dentry);
+		rc = smk_curacc(isp, MAY_WRITE, &ad);
 	}
 
 	return rc;
@@ -466,18 +504,24 @@
 static int smack_inode_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct inode *ip = dentry->d_inode;
+	struct smk_audit_info ad;
 	int rc;
 
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
+
 	/*
 	 * You need write access to the thing you're unlinking
 	 */
-	rc = smk_curacc(smk_of_inode(ip), MAY_WRITE);
-	if (rc == 0)
+	rc = smk_curacc(smk_of_inode(ip), MAY_WRITE, &ad);
+	if (rc == 0) {
 		/*
 		 * You also need write access to the containing directory
 		 */
-		rc = smk_curacc(smk_of_inode(dir), MAY_WRITE);
-
+		smk_ad_setfield_u_fs_path_dentry(&ad, NULL);
+		smk_ad_setfield_u_fs_inode(&ad, dir);
+		rc = smk_curacc(smk_of_inode(dir), MAY_WRITE, &ad);
+	}
 	return rc;
 }
 
@@ -491,17 +535,24 @@
  */
 static int smack_inode_rmdir(struct inode *dir, struct dentry *dentry)
 {
+	struct smk_audit_info ad;
 	int rc;
 
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
+
 	/*
 	 * You need write access to the thing you're removing
 	 */
-	rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE);
-	if (rc == 0)
+	rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
+	if (rc == 0) {
 		/*
 		 * You also need write access to the containing directory
 		 */
-		rc = smk_curacc(smk_of_inode(dir), MAY_WRITE);
+		smk_ad_setfield_u_fs_path_dentry(&ad, NULL);
+		smk_ad_setfield_u_fs_inode(&ad, dir);
+		rc = smk_curacc(smk_of_inode(dir), MAY_WRITE, &ad);
+	}
 
 	return rc;
 }
@@ -525,15 +576,19 @@
 {
 	int rc;
 	char *isp;
+	struct smk_audit_info ad;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry);
 
 	isp = smk_of_inode(old_dentry->d_inode);
-	rc = smk_curacc(isp, MAY_READWRITE);
+	rc = smk_curacc(isp, MAY_READWRITE, &ad);
 
 	if (rc == 0 && new_dentry->d_inode != NULL) {
 		isp = smk_of_inode(new_dentry->d_inode);
-		rc = smk_curacc(isp, MAY_READWRITE);
+		smk_ad_setfield_u_fs_path_dentry(&ad, new_dentry);
+		rc = smk_curacc(isp, MAY_READWRITE, &ad);
 	}
-
 	return rc;
 }
 
@@ -548,13 +603,15 @@
  */
 static int smack_inode_permission(struct inode *inode, int mask)
 {
+	struct smk_audit_info ad;
 	/*
 	 * No permission to check. Existence test. Yup, it's there.
 	 */
 	if (mask == 0)
 		return 0;
-
-	return smk_curacc(smk_of_inode(inode), mask);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_inode(&ad, inode);
+	return smk_curacc(smk_of_inode(inode), mask, &ad);
 }
 
 /**
@@ -566,13 +623,16 @@
  */
 static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
 {
+	struct smk_audit_info ad;
 	/*
 	 * Need to allow for clearing the setuid bit.
 	 */
 	if (iattr->ia_valid & ATTR_FORCE)
 		return 0;
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
-	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE);
+	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
 }
 
 /**
@@ -584,7 +644,12 @@
  */
 static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
 {
-	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ);
+	struct smk_audit_info ad;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
+	smk_ad_setfield_u_fs_path_mnt(&ad, mnt);
+	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad);
 }
 
 /**
@@ -602,6 +667,7 @@
 static int smack_inode_setxattr(struct dentry *dentry, const char *name,
 				const void *value, size_t size, int flags)
 {
+	struct smk_audit_info ad;
 	int rc = 0;
 
 	if (strcmp(name, XATTR_NAME_SMACK) == 0 ||
@@ -619,8 +685,11 @@
 	} else
 		rc = cap_inode_setxattr(dentry, name, value, size, flags);
 
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
+
 	if (rc == 0)
-		rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE);
+		rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
 
 	return rc;
 }
@@ -672,7 +741,12 @@
  */
 static int smack_inode_getxattr(struct dentry *dentry, const char *name)
 {
-	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ);
+	struct smk_audit_info ad;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
+
+	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad);
 }
 
 /*
@@ -686,6 +760,7 @@
  */
 static int smack_inode_removexattr(struct dentry *dentry, const char *name)
 {
+	struct smk_audit_info ad;
 	int rc = 0;
 
 	if (strcmp(name, XATTR_NAME_SMACK) == 0 ||
@@ -696,8 +771,10 @@
 	} else
 		rc = cap_inode_removexattr(dentry, name);
 
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 	if (rc == 0)
-		rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE);
+		rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
 
 	return rc;
 }
@@ -856,12 +933,16 @@
 			    unsigned long arg)
 {
 	int rc = 0;
+	struct smk_audit_info ad;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path(&ad, file->f_path);
 
 	if (_IOC_DIR(cmd) & _IOC_WRITE)
-		rc = smk_curacc(file->f_security, MAY_WRITE);
+		rc = smk_curacc(file->f_security, MAY_WRITE, &ad);
 
 	if (rc == 0 && (_IOC_DIR(cmd) & _IOC_READ))
-		rc = smk_curacc(file->f_security, MAY_READ);
+		rc = smk_curacc(file->f_security, MAY_READ, &ad);
 
 	return rc;
 }
@@ -875,7 +956,11 @@
  */
 static int smack_file_lock(struct file *file, unsigned int cmd)
 {
-	return smk_curacc(file->f_security, MAY_WRITE);
+	struct smk_audit_info ad;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, file->f_path.dentry);
+	return smk_curacc(file->f_security, MAY_WRITE, &ad);
 }
 
 /**
@@ -889,8 +974,12 @@
 static int smack_file_fcntl(struct file *file, unsigned int cmd,
 			    unsigned long arg)
 {
+	struct smk_audit_info ad;
 	int rc;
 
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path(&ad, file->f_path);
+
 	switch (cmd) {
 	case F_DUPFD:
 	case F_GETFD:
@@ -898,7 +987,7 @@
 	case F_GETLK:
 	case F_GETOWN:
 	case F_GETSIG:
-		rc = smk_curacc(file->f_security, MAY_READ);
+		rc = smk_curacc(file->f_security, MAY_READ, &ad);
 		break;
 	case F_SETFD:
 	case F_SETFL:
@@ -906,10 +995,10 @@
 	case F_SETLKW:
 	case F_SETOWN:
 	case F_SETSIG:
-		rc = smk_curacc(file->f_security, MAY_WRITE);
+		rc = smk_curacc(file->f_security, MAY_WRITE, &ad);
 		break;
 	default:
-		rc = smk_curacc(file->f_security, MAY_READWRITE);
+		rc = smk_curacc(file->f_security, MAY_READWRITE, &ad);
 	}
 
 	return rc;
@@ -944,14 +1033,21 @@
 {
 	struct file *file;
 	int rc;
+	char *tsp = tsk->cred->security;
+	struct smk_audit_info ad;
 
 	/*
 	 * struct fown_struct is never outside the context of a struct file
 	 */
 	file = container_of(fown, struct file, f_owner);
-	rc = smk_access(file->f_security, tsk->cred->security, MAY_WRITE);
+	/* we don't log here as rc can be overriden */
+	rc = smk_access(file->f_security, tsp, MAY_WRITE, NULL);
 	if (rc != 0 && has_capability(tsk, CAP_MAC_OVERRIDE))
-		return 0;
+		rc = 0;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
+	smk_ad_setfield_u_tsk(&ad, tsk);
+	smack_log(file->f_security, tsp, MAY_WRITE, rc, &ad);
 	return rc;
 }
 
@@ -964,7 +1060,10 @@
 static int smack_file_receive(struct file *file)
 {
 	int may = 0;
+	struct smk_audit_info ad;
 
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
+	smk_ad_setfield_u_fs_path(&ad, file->f_path);
 	/*
 	 * This code relies on bitmasks.
 	 */
@@ -973,7 +1072,7 @@
 	if (file->f_mode & FMODE_WRITE)
 		may |= MAY_WRITE;
 
-	return smk_curacc(file->f_security, may);
+	return smk_curacc(file->f_security, may, &ad);
 }
 
 /*
@@ -1053,6 +1152,22 @@
 }
 
 /**
+ * smk_curacc_on_task - helper to log task related access
+ * @p: the task object
+ * @access : the access requested
+ *
+ * Return 0 if access is permitted
+ */
+static int smk_curacc_on_task(struct task_struct *p, int access)
+{
+	struct smk_audit_info ad;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
+	smk_ad_setfield_u_tsk(&ad, p);
+	return smk_curacc(task_security(p), access, &ad);
+}
+
+/**
  * smack_task_setpgid - Smack check on setting pgid
  * @p: the task object
  * @pgid: unused
@@ -1061,7 +1176,7 @@
  */
 static int smack_task_setpgid(struct task_struct *p, pid_t pgid)
 {
-	return smk_curacc(task_security(p), MAY_WRITE);
+	return smk_curacc_on_task(p, MAY_WRITE);
 }
 
 /**
@@ -1072,7 +1187,7 @@
  */
 static int smack_task_getpgid(struct task_struct *p)
 {
-	return smk_curacc(task_security(p), MAY_READ);
+	return smk_curacc_on_task(p, MAY_READ);
 }
 
 /**
@@ -1083,7 +1198,7 @@
  */
 static int smack_task_getsid(struct task_struct *p)
 {
-	return smk_curacc(task_security(p), MAY_READ);
+	return smk_curacc_on_task(p, MAY_READ);
 }
 
 /**
@@ -1111,7 +1226,7 @@
 
 	rc = cap_task_setnice(p, nice);
 	if (rc == 0)
-		rc = smk_curacc(task_security(p), MAY_WRITE);
+		rc = smk_curacc_on_task(p, MAY_WRITE);
 	return rc;
 }
 
@@ -1128,7 +1243,7 @@
 
 	rc = cap_task_setioprio(p, ioprio);
 	if (rc == 0)
-		rc = smk_curacc(task_security(p), MAY_WRITE);
+		rc = smk_curacc_on_task(p, MAY_WRITE);
 	return rc;
 }
 
@@ -1140,7 +1255,7 @@
  */
 static int smack_task_getioprio(struct task_struct *p)
 {
-	return smk_curacc(task_security(p), MAY_READ);
+	return smk_curacc_on_task(p, MAY_READ);
 }
 
 /**
@@ -1158,7 +1273,7 @@
 
 	rc = cap_task_setscheduler(p, policy, lp);
 	if (rc == 0)
-		rc = smk_curacc(task_security(p), MAY_WRITE);
+		rc = smk_curacc_on_task(p, MAY_WRITE);
 	return rc;
 }
 
@@ -1170,7 +1285,7 @@
  */
 static int smack_task_getscheduler(struct task_struct *p)
 {
-	return smk_curacc(task_security(p), MAY_READ);
+	return smk_curacc_on_task(p, MAY_READ);
 }
 
 /**
@@ -1181,7 +1296,7 @@
  */
 static int smack_task_movememory(struct task_struct *p)
 {
-	return smk_curacc(task_security(p), MAY_WRITE);
+	return smk_curacc_on_task(p, MAY_WRITE);
 }
 
 /**
@@ -1199,18 +1314,23 @@
 static int smack_task_kill(struct task_struct *p, struct siginfo *info,
 			   int sig, u32 secid)
 {
+	struct smk_audit_info ad;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
+	smk_ad_setfield_u_tsk(&ad, p);
 	/*
 	 * Sending a signal requires that the sender
 	 * can write the receiver.
 	 */
 	if (secid == 0)
-		return smk_curacc(task_security(p), MAY_WRITE);
+		return smk_curacc(task_security(p), MAY_WRITE, &ad);
 	/*
 	 * If the secid isn't 0 we're dealing with some USB IO
 	 * specific behavior. This is not clean. For one thing
 	 * we can't take privilege into account.
 	 */
-	return smk_access(smack_from_secid(secid), task_security(p), MAY_WRITE);
+	return smk_access(smack_from_secid(secid), task_security(p),
+			  MAY_WRITE, &ad);
 }
 
 /**
@@ -1221,11 +1341,15 @@
  */
 static int smack_task_wait(struct task_struct *p)
 {
+	struct smk_audit_info ad;
+	char *sp = current_security();
+	char *tsp = task_security(p);
 	int rc;
 
-	rc = smk_access(current_security(), task_security(p), MAY_WRITE);
+	/* we don't log here, we can be overriden */
+	rc = smk_access(sp, tsp, MAY_WRITE, NULL);
 	if (rc == 0)
-		return 0;
+		goto out_log;
 
 	/*
 	 * Allow the operation to succeed if either task
@@ -1239,8 +1363,12 @@
 	 * the smack value.
 	 */
 	if (capable(CAP_MAC_OVERRIDE) || has_capability(p, CAP_MAC_OVERRIDE))
-		return 0;
-
+		rc = 0;
+	/* we log only if we didn't get overriden */
+ out_log:
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
+	smk_ad_setfield_u_tsk(&ad, p);
+	smack_log(sp, tsp, MAY_WRITE, rc, &ad);
 	return rc;
 }
 
@@ -1456,12 +1584,19 @@
 	int sk_lbl;
 	char *hostsp;
 	struct socket_smack *ssp = sk->sk_security;
+	struct smk_audit_info ad;
 
 	rcu_read_lock();
 	hostsp = smack_host_label(sap);
 	if (hostsp != NULL) {
 		sk_lbl = SMACK_UNLABELED_SOCKET;
-		rc = smk_access(ssp->smk_out, hostsp, MAY_WRITE);
+#ifdef CONFIG_AUDIT
+		smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
+		ad.a.u.net.family = sap->sin_family;
+		ad.a.u.net.dport = sap->sin_port;
+		ad.a.u.net.v4info.daddr = sap->sin_addr.s_addr;
+#endif
+		rc = smk_access(ssp->smk_out, hostsp, MAY_WRITE, &ad);
 	} else {
 		sk_lbl = SMACK_CIPSO_SOCKET;
 		rc = 0;
@@ -1657,6 +1792,25 @@
 }
 
 /**
+ * smk_curacc_shm : check if current has access on shm
+ * @shp : the object
+ * @access : access requested
+ *
+ * Returns 0 if current has the requested access, error code otherwise
+ */
+static int smk_curacc_shm(struct shmid_kernel *shp, int access)
+{
+	char *ssp = smack_of_shm(shp);
+	struct smk_audit_info ad;
+
+#ifdef CONFIG_AUDIT
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC);
+	ad.a.u.ipc_id = shp->shm_perm.id;
+#endif
+	return smk_curacc(ssp, access, &ad);
+}
+
+/**
  * smack_shm_associate - Smack access check for shm
  * @shp: the object
  * @shmflg: access requested
@@ -1665,11 +1819,10 @@
  */
 static int smack_shm_associate(struct shmid_kernel *shp, int shmflg)
 {
-	char *ssp = smack_of_shm(shp);
 	int may;
 
 	may = smack_flags_to_may(shmflg);
-	return smk_curacc(ssp, may);
+	return smk_curacc_shm(shp, may);
 }
 
 /**
@@ -1681,7 +1834,6 @@
  */
 static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd)
 {
-	char *ssp;
 	int may;
 
 	switch (cmd) {
@@ -1704,9 +1856,7 @@
 	default:
 		return -EINVAL;
 	}
-
-	ssp = smack_of_shm(shp);
-	return smk_curacc(ssp, may);
+	return smk_curacc_shm(shp, may);
 }
 
 /**
@@ -1720,11 +1870,10 @@
 static int smack_shm_shmat(struct shmid_kernel *shp, char __user *shmaddr,
 			   int shmflg)
 {
-	char *ssp = smack_of_shm(shp);
 	int may;
 
 	may = smack_flags_to_may(shmflg);
-	return smk_curacc(ssp, may);
+	return smk_curacc_shm(shp, may);
 }
 
 /**
@@ -1766,6 +1915,25 @@
 }
 
 /**
+ * smk_curacc_sem : check if current has access on sem
+ * @sma : the object
+ * @access : access requested
+ *
+ * Returns 0 if current has the requested access, error code otherwise
+ */
+static int smk_curacc_sem(struct sem_array *sma, int access)
+{
+	char *ssp = smack_of_sem(sma);
+	struct smk_audit_info ad;
+
+#ifdef CONFIG_AUDIT
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC);
+	ad.a.u.ipc_id = sma->sem_perm.id;
+#endif
+	return smk_curacc(ssp, access, &ad);
+}
+
+/**
  * smack_sem_associate - Smack access check for sem
  * @sma: the object
  * @semflg: access requested
@@ -1774,11 +1942,10 @@
  */
 static int smack_sem_associate(struct sem_array *sma, int semflg)
 {
-	char *ssp = smack_of_sem(sma);
 	int may;
 
 	may = smack_flags_to_may(semflg);
-	return smk_curacc(ssp, may);
+	return smk_curacc_sem(sma, may);
 }
 
 /**
@@ -1790,7 +1957,6 @@
  */
 static int smack_sem_semctl(struct sem_array *sma, int cmd)
 {
-	char *ssp;
 	int may;
 
 	switch (cmd) {
@@ -1819,8 +1985,7 @@
 		return -EINVAL;
 	}
 
-	ssp = smack_of_sem(sma);
-	return smk_curacc(ssp, may);
+	return smk_curacc_sem(sma, may);
 }
 
 /**
@@ -1837,9 +2002,7 @@
 static int smack_sem_semop(struct sem_array *sma, struct sembuf *sops,
 			   unsigned nsops, int alter)
 {
-	char *ssp = smack_of_sem(sma);
-
-	return smk_curacc(ssp, MAY_READWRITE);
+	return smk_curacc_sem(sma, MAY_READWRITE);
 }
 
 /**
@@ -1881,6 +2044,25 @@
 }
 
 /**
+ * smk_curacc_msq : helper to check if current has access on msq
+ * @msq : the msq
+ * @access : access requested
+ *
+ * return 0 if current has access, error otherwise
+ */
+static int smk_curacc_msq(struct msg_queue *msq, int access)
+{
+	char *msp = smack_of_msq(msq);
+	struct smk_audit_info ad;
+
+#ifdef CONFIG_AUDIT
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC);
+	ad.a.u.ipc_id = msq->q_perm.id;
+#endif
+	return smk_curacc(msp, access, &ad);
+}
+
+/**
  * smack_msg_queue_associate - Smack access check for msg_queue
  * @msq: the object
  * @msqflg: access requested
@@ -1889,11 +2071,10 @@
  */
 static int smack_msg_queue_associate(struct msg_queue *msq, int msqflg)
 {
-	char *msp = smack_of_msq(msq);
 	int may;
 
 	may = smack_flags_to_may(msqflg);
-	return smk_curacc(msp, may);
+	return smk_curacc_msq(msq, may);
 }
 
 /**
@@ -1905,7 +2086,6 @@
  */
 static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd)
 {
-	char *msp;
 	int may;
 
 	switch (cmd) {
@@ -1927,8 +2107,7 @@
 		return -EINVAL;
 	}
 
-	msp = smack_of_msq(msq);
-	return smk_curacc(msp, may);
+	return smk_curacc_msq(msq, may);
 }
 
 /**
@@ -1942,11 +2121,10 @@
 static int smack_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg,
 				  int msqflg)
 {
-	char *msp = smack_of_msq(msq);
-	int rc;
+	int may;
 
-	rc = smack_flags_to_may(msqflg);
-	return smk_curacc(msp, rc);
+	may = smack_flags_to_may(msqflg);
+	return smk_curacc_msq(msq, may);
 }
 
 /**
@@ -1962,9 +2140,7 @@
 static int smack_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
 			struct task_struct *target, long type, int mode)
 {
-	char *msp = smack_of_msq(msq);
-
-	return smk_curacc(msp, MAY_READWRITE);
+	return smk_curacc_msq(msq, MAY_READWRITE);
 }
 
 /**
@@ -1977,10 +2153,14 @@
 static int smack_ipc_permission(struct kern_ipc_perm *ipp, short flag)
 {
 	char *isp = ipp->security;
-	int may;
+	int may = smack_flags_to_may(flag);
+	struct smk_audit_info ad;
 
-	may = smack_flags_to_may(flag);
-	return smk_curacc(isp, may);
+#ifdef CONFIG_AUDIT
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC);
+	ad.a.u.ipc_id = ipp->id;
+#endif
+	return smk_curacc(isp, may, &ad);
 }
 
 /**
@@ -2239,8 +2419,12 @@
 {
 	struct inode *sp = SOCK_INODE(sock);
 	struct inode *op = SOCK_INODE(other);
+	struct smk_audit_info ad;
 
-	return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_READWRITE);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
+	smk_ad_setfield_u_net_sk(&ad, other->sk);
+	return smk_access(smk_of_inode(sp), smk_of_inode(op),
+				 MAY_READWRITE, &ad);
 }
 
 /**
@@ -2255,8 +2439,11 @@
 {
 	struct inode *sp = SOCK_INODE(sock);
 	struct inode *op = SOCK_INODE(other);
+	struct smk_audit_info ad;
 
-	return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_WRITE);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
+	smk_ad_setfield_u_net_sk(&ad, other->sk);
+	return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_WRITE, &ad);
 }
 
 /**
@@ -2371,7 +2558,7 @@
 	char smack[SMK_LABELLEN];
 	char *csp;
 	int rc;
-
+	struct smk_audit_info ad;
 	if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
 		return 0;
 
@@ -2389,13 +2576,19 @@
 
 	netlbl_secattr_destroy(&secattr);
 
+#ifdef CONFIG_AUDIT
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
+	ad.a.u.net.family = sk->sk_family;
+	ad.a.u.net.netif = skb->iif;
+	ipv4_skb_to_auditdata(skb, &ad.a, NULL);
+#endif
 	/*
 	 * Receiving a packet requires that the other end
 	 * be able to write here. Read access is not required.
 	 * This is the simplist possible security model
 	 * for networking.
 	 */
-	rc = smk_access(csp, ssp->smk_in, MAY_WRITE);
+	rc = smk_access(csp, ssp->smk_in, MAY_WRITE, &ad);
 	if (rc != 0)
 		netlbl_skbuff_err(skb, rc, 0);
 	return rc;
@@ -2524,6 +2717,7 @@
 	struct iphdr *hdr;
 	char smack[SMK_LABELLEN];
 	int rc;
+	struct smk_audit_info ad;
 
 	/* handle mapped IPv4 packets arriving via IPv6 sockets */
 	if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP))
@@ -2537,11 +2731,17 @@
 		strncpy(smack, smack_known_huh.smk_known, SMK_MAXLEN);
 	netlbl_secattr_destroy(&secattr);
 
+#ifdef CONFIG_AUDIT
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
+	ad.a.u.net.family = family;
+	ad.a.u.net.netif = skb->iif;
+	ipv4_skb_to_auditdata(skb, &ad.a, NULL);
+#endif
 	/*
 	 * Receiving a packet requires that the other end be able to write
 	 * here. Read access is not required.
 	 */
-	rc = smk_access(smack, ssp->smk_in, MAY_WRITE);
+	rc = smk_access(smack, ssp->smk_in, MAY_WRITE, &ad);
 	if (rc != 0)
 		return rc;
 
@@ -2643,6 +2843,7 @@
 				const struct cred *cred, key_perm_t perm)
 {
 	struct key *keyp;
+	struct smk_audit_info ad;
 
 	keyp = key_ref_to_ptr(key_ref);
 	if (keyp == NULL)
@@ -2658,8 +2859,13 @@
 	 */
 	if (cred->security == NULL)
 		return -EACCES;
-
-	return smk_access(cred->security, keyp->security, MAY_READWRITE);
+#ifdef CONFIG_AUDIT
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_KEY);
+	ad.a.u.key_struct.key = keyp->serial;
+	ad.a.u.key_struct.key_desc = keyp->description;
+#endif
+	return smk_access(cred->security, keyp->security,
+				 MAY_READWRITE, &ad);
 }
 #endif /* CONFIG_KEYS */
 
@@ -2828,15 +3034,7 @@
 
 	.ptrace_may_access =		smack_ptrace_may_access,
 	.ptrace_traceme =		smack_ptrace_traceme,
-	.capget = 			cap_capget,
-	.capset = 			cap_capset,
-	.capable = 			cap_capable,
 	.syslog = 			smack_syslog,
-	.settime = 			cap_settime,
-	.vm_enough_memory = 		cap_vm_enough_memory,
-
-	.bprm_set_creds = 		cap_bprm_set_creds,
-	.bprm_secureexec = 		cap_bprm_secureexec,
 
 	.sb_alloc_security = 		smack_sb_alloc_security,
 	.sb_free_security = 		smack_sb_free_security,
@@ -2860,8 +3058,6 @@
 	.inode_post_setxattr = 		smack_inode_post_setxattr,
 	.inode_getxattr = 		smack_inode_getxattr,
 	.inode_removexattr = 		smack_inode_removexattr,
-	.inode_need_killpriv =		cap_inode_need_killpriv,
-	.inode_killpriv =		cap_inode_killpriv,
 	.inode_getsecurity = 		smack_inode_getsecurity,
 	.inode_setsecurity = 		smack_inode_setsecurity,
 	.inode_listsecurity = 		smack_inode_listsecurity,
@@ -2882,7 +3078,6 @@
 	.cred_commit =			smack_cred_commit,
 	.kernel_act_as =		smack_kernel_act_as,
 	.kernel_create_files_as =	smack_kernel_create_files_as,
-	.task_fix_setuid =		cap_task_fix_setuid,
 	.task_setpgid = 		smack_task_setpgid,
 	.task_getpgid = 		smack_task_getpgid,
 	.task_getsid = 			smack_task_getsid,
@@ -2896,7 +3091,6 @@
 	.task_kill = 			smack_task_kill,
 	.task_wait = 			smack_task_wait,
 	.task_to_inode = 		smack_task_to_inode,
-	.task_prctl =			cap_task_prctl,
 
 	.ipc_permission = 		smack_ipc_permission,
 	.ipc_getsecid =			smack_ipc_getsecid,
@@ -2923,9 +3117,6 @@
 	.sem_semctl = 			smack_sem_semctl,
 	.sem_semop = 			smack_sem_semop,
 
-	.netlink_send =			cap_netlink_send,
-	.netlink_recv = 		cap_netlink_recv,
-
 	.d_instantiate = 		smack_d_instantiate,
 
 	.getprocattr = 			smack_getprocattr,
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index e03a7e1..f83a809 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -41,6 +41,7 @@
 	SMK_AMBIENT	= 7,	/* internet ambient label */
 	SMK_NETLBLADDR	= 8,	/* single label hosts */
 	SMK_ONLYCAP	= 9,	/* the only "capable" label */
+	SMK_LOGGING	= 10,	/* logging */
 };
 
 /*
@@ -734,8 +735,8 @@
 		return;
 	}
 
-	m = list_entry(rcu_dereference(smk_netlbladdr_list.next),
-			 struct smk_netlbladdr, list);
+	m = list_entry_rcu(smk_netlbladdr_list.next,
+			   struct smk_netlbladdr, list);
 
 	/* the comparison '>' is a bit hacky, but works */
 	if (new->smk_mask.s_addr > m->smk_mask.s_addr) {
@@ -748,8 +749,8 @@
 			list_add_rcu(&new->list, &m->list);
 			return;
 		}
-		m_next = list_entry(rcu_dereference(m->list.next),
-				 struct smk_netlbladdr, list);
+		m_next = list_entry_rcu(m->list.next,
+					struct smk_netlbladdr, list);
 		if (new->smk_mask.s_addr > m_next->smk_mask.s_addr) {
 			list_add_rcu(&new->list, &m->list);
 			return;
@@ -775,7 +776,7 @@
 	struct sockaddr_in newname;
 	char smack[SMK_LABELLEN];
 	char *sp;
-	char data[SMK_NETLBLADDRMAX];
+	char data[SMK_NETLBLADDRMAX + 1];
 	char *host = (char *)&newname.sin_addr.s_addr;
 	int rc;
 	struct netlbl_audit audit_info;
@@ -1192,6 +1193,69 @@
 };
 
 /**
+ * smk_read_logging - read() for /smack/logging
+ * @filp: file pointer, not actually used
+ * @buf: where to put the result
+ * @cn: maximum to send along
+ * @ppos: where to start
+ *
+ * Returns number of bytes read or error code, as appropriate
+ */
+static ssize_t smk_read_logging(struct file *filp, char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	char temp[32];
+	ssize_t rc;
+
+	if (*ppos != 0)
+		return 0;
+
+	sprintf(temp, "%d\n", log_policy);
+	rc = simple_read_from_buffer(buf, count, ppos, temp, strlen(temp));
+	return rc;
+}
+
+/**
+ * smk_write_logging - write() for /smack/logging
+ * @file: file pointer, not actually used
+ * @buf: where to get the data from
+ * @count: bytes sent
+ * @ppos: where to start
+ *
+ * Returns number of bytes written or error code, as appropriate
+ */
+static ssize_t smk_write_logging(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	char temp[32];
+	int i;
+
+	if (!capable(CAP_MAC_ADMIN))
+		return -EPERM;
+
+	if (count >= sizeof(temp) || count == 0)
+		return -EINVAL;
+
+	if (copy_from_user(temp, buf, count) != 0)
+		return -EFAULT;
+
+	temp[count] = '\0';
+
+	if (sscanf(temp, "%d", &i) != 1)
+		return -EINVAL;
+	if (i < 0 || i > 3)
+		return -EINVAL;
+	log_policy = i;
+	return count;
+}
+
+
+
+static const struct file_operations smk_logging_ops = {
+	.read		= smk_read_logging,
+	.write		= smk_write_logging,
+};
+/**
  * smk_fill_super - fill the /smackfs superblock
  * @sb: the empty superblock
  * @data: unused
@@ -1221,6 +1285,8 @@
 			{"netlabel", &smk_netlbladdr_ops, S_IRUGO|S_IWUSR},
 		[SMK_ONLYCAP]	=
 			{"onlycap", &smk_onlycap_ops, S_IRUGO|S_IWUSR},
+		[SMK_LOGGING]	=
+			{"logging", &smk_logging_ops, S_IRUGO|S_IWUSR},
 		/* last one */ {""}
 	};
 
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index ddfb9cc..fdd1f4b8 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -28,7 +28,13 @@
 	"disabled", "enabled", "enabled", "enabled"
 };
 
-/* Table for profile. */
+/*
+ * tomoyo_control_array is a static data which contains
+ *
+ *  (1) functionality name used by /sys/kernel/security/tomoyo/profile .
+ *  (2) initial values for "struct tomoyo_profile".
+ *  (3) max values for "struct tomoyo_profile".
+ */
 static struct {
 	const char *keyword;
 	unsigned int current_value;
@@ -39,7 +45,13 @@
 	[TOMOYO_VERBOSE]          = { "TOMOYO_VERBOSE",      1,       1 },
 };
 
-/* Profile table. Memory is allocated as needed. */
+/*
+ * tomoyo_profile is a structure which is used for holding the mode of access
+ * controls. TOMOYO has 4 modes: disabled, learning, permissive, enforcing.
+ * An administrator can define up to 256 profiles.
+ * The ->profile of "struct tomoyo_domain_info" is used for remembering
+ * the profile's number (0 - 255) assigned to that domain.
+ */
 static struct tomoyo_profile {
 	unsigned int value[TOMOYO_MAX_CONTROL_INDEX];
 	const struct tomoyo_path_info *comment;
@@ -428,7 +440,6 @@
 	const char *name = ptr->name;
 	const int len = strlen(name);
 
-	ptr->total_len = len;
 	ptr->const_len = tomoyo_const_part_length(name);
 	ptr->is_dir = len && (name[len - 1] == '/');
 	ptr->is_patterned = (ptr->const_len < len);
@@ -866,7 +877,6 @@
 
 	if (profile >= TOMOYO_MAX_PROFILES)
 		return NULL;
-	/***** EXCLUSIVE SECTION START *****/
 	mutex_lock(&lock);
 	ptr = tomoyo_profile_ptr[profile];
 	if (ptr)
@@ -880,7 +890,6 @@
 	tomoyo_profile_ptr[profile] = ptr;
  ok:
 	mutex_unlock(&lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return ptr;
 }
 
@@ -1009,7 +1018,19 @@
 	return 0;
 }
 
-/* Structure for policy manager. */
+/*
+ * tomoyo_policy_manager_entry is a structure which is used for holding list of
+ * domainnames or programs which are permitted to modify configuration via
+ * /sys/kernel/security/tomoyo/ interface.
+ * It has following fields.
+ *
+ *  (1) "list" which is linked to tomoyo_policy_manager_list .
+ *  (2) "manager" is a domainname or a program's pathname.
+ *  (3) "is_domain" is a bool which is true if "manager" is a domainname, false
+ *      otherwise.
+ *  (4) "is_deleted" is a bool which is true if marked as deleted, false
+ *      otherwise.
+ */
 struct tomoyo_policy_manager_entry {
 	struct list_head list;
 	/* A path to program or a domainname. */
@@ -1018,7 +1039,36 @@
 	bool is_deleted; /* True if this entry is deleted. */
 };
 
-/* The list for "struct tomoyo_policy_manager_entry". */
+/*
+ * tomoyo_policy_manager_list is used for holding list of domainnames or
+ * programs which are permitted to modify configuration via
+ * /sys/kernel/security/tomoyo/ interface.
+ *
+ * An entry is added by
+ *
+ * # echo '<kernel> /sbin/mingetty /bin/login /bin/bash' > \
+ *                                        /sys/kernel/security/tomoyo/manager
+ *  (if you want to specify by a domainname)
+ *
+ *  or
+ *
+ * # echo '/usr/lib/ccs/editpolicy' > /sys/kernel/security/tomoyo/manager
+ *  (if you want to specify by a program's location)
+ *
+ * and is deleted by
+ *
+ * # echo 'delete <kernel> /sbin/mingetty /bin/login /bin/bash' > \
+ *                                        /sys/kernel/security/tomoyo/manager
+ *
+ *  or
+ *
+ * # echo 'delete /usr/lib/ccs/editpolicy' > \
+ *                                        /sys/kernel/security/tomoyo/manager
+ *
+ * and all entries are retrieved by
+ *
+ * # cat /sys/kernel/security/tomoyo/manager
+ */
 static LIST_HEAD(tomoyo_policy_manager_list);
 static DECLARE_RWSEM(tomoyo_policy_manager_list_lock);
 
@@ -1050,7 +1100,6 @@
 	saved_manager = tomoyo_save_name(manager);
 	if (!saved_manager)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_policy_manager_list_lock);
 	list_for_each_entry(ptr, &tomoyo_policy_manager_list, list) {
 		if (ptr->manager != saved_manager)
@@ -1072,7 +1121,6 @@
 	error = 0;
  out:
 	up_write(&tomoyo_policy_manager_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -1117,10 +1165,9 @@
 				 list);
 		if (ptr->is_deleted)
 			continue;
-		if (!tomoyo_io_printf(head, "%s\n", ptr->manager->name)) {
-			done = false;
+		done = tomoyo_io_printf(head, "%s\n", ptr->manager->name);
+		if (!done)
 			break;
-		}
 	}
 	up_read(&tomoyo_policy_manager_list_lock);
 	head->read_eof = done;
@@ -1197,13 +1244,11 @@
 
 	if (sscanf(data, "pid=%u", &pid) == 1) {
 		struct task_struct *p;
-		/***** CRITICAL SECTION START *****/
 		read_lock(&tasklist_lock);
 		p = find_task_by_vpid(pid);
 		if (p)
 			domain = tomoyo_real_domain(p);
 		read_unlock(&tasklist_lock);
-		/***** CRITICAL SECTION END *****/
 	} else if (!strncmp(data, "domain=", 7)) {
 		if (tomoyo_is_domain_def(data + 7)) {
 			down_read(&tomoyo_domain_list_lock);
@@ -1447,15 +1492,14 @@
 		    TOMOYO_DOMAIN_FLAGS_IGNORE_GLOBAL_ALLOW_READ)
 			ignore_global_allow_read
 				= TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ "\n";
-		if (!tomoyo_io_printf(head,
-				      "%s\n" TOMOYO_KEYWORD_USE_PROFILE "%u\n"
-				      "%s%s%s\n", domain->domainname->name,
-				      domain->profile, quota_exceeded,
-				      transition_failed,
-				      ignore_global_allow_read)) {
-			done = false;
+		done = tomoyo_io_printf(head, "%s\n" TOMOYO_KEYWORD_USE_PROFILE
+					"%u\n%s%s%s\n",
+					domain->domainname->name,
+					domain->profile, quota_exceeded,
+					transition_failed,
+					ignore_global_allow_read);
+		if (!done)
 			break;
-		}
 		head->read_step = 2;
 acl_loop:
 		if (head->read_step == 3)
@@ -1463,24 +1507,22 @@
 		/* Print ACL entries in the domain. */
 		down_read(&tomoyo_domain_acl_info_list_lock);
 		list_for_each_cookie(apos, head->read_var2,
-				      &domain->acl_info_list) {
+				     &domain->acl_info_list) {
 			struct tomoyo_acl_info *ptr
 				= list_entry(apos, struct tomoyo_acl_info,
-					      list);
-			if (!tomoyo_print_entry(head, ptr)) {
-				done = false;
+					     list);
+			done = tomoyo_print_entry(head, ptr);
+			if (!done)
 				break;
-			}
 		}
 		up_read(&tomoyo_domain_acl_info_list_lock);
 		if (!done)
 			break;
 		head->read_step = 3;
 tail_mark:
-		if (!tomoyo_io_printf(head, "\n")) {
-			done = false;
+		done = tomoyo_io_printf(head, "\n");
+		if (!done)
 			break;
-		}
 		head->read_step = 1;
 		if (head->read_single_domain)
 			break;
@@ -1550,11 +1592,10 @@
 		domain = list_entry(pos, struct tomoyo_domain_info, list);
 		if (domain->is_deleted)
 			continue;
-		if (!tomoyo_io_printf(head, "%u %s\n", domain->profile,
-				      domain->domainname->name)) {
-			done = false;
+		done = tomoyo_io_printf(head, "%u %s\n", domain->profile,
+					domain->domainname->name);
+		if (!done)
 			break;
-		}
 	}
 	up_read(&tomoyo_domain_list_lock);
 	head->read_eof = done;
@@ -1594,13 +1635,11 @@
 		const int pid = head->read_step;
 		struct task_struct *p;
 		struct tomoyo_domain_info *domain = NULL;
-		/***** CRITICAL SECTION START *****/
 		read_lock(&tasklist_lock);
 		p = find_task_by_vpid(pid);
 		if (p)
 			domain = tomoyo_real_domain(p);
 		read_unlock(&tasklist_lock);
-		/***** CRITICAL SECTION END *****/
 		if (domain)
 			tomoyo_io_printf(head, "%d %u %s", pid, domain->profile,
 					 domain->domainname->name);
@@ -2138,7 +2177,13 @@
 	return tomoyo_write_control(file, buf, count);
 }
 
-/* Operations for /sys/kernel/security/tomoyo/ interface. */
+/*
+ * tomoyo_operations is a "struct file_operations" which is used for handling
+ * /sys/kernel/security/tomoyo/ interface.
+ *
+ * Some files under /sys/kernel/security/tomoyo/ directory accept open(O_RDWR).
+ * See tomoyo_io_buffer for internals.
+ */
 static const struct file_operations tomoyo_operations = {
 	.open    = tomoyo_open,
 	.release = tomoyo_release,
diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h
index 678f4ff..6d6ba09 100644
--- a/security/tomoyo/common.h
+++ b/security/tomoyo/common.h
@@ -26,16 +26,43 @@
 struct dentry;
 struct vfsmount;
 
-/* Temporary buffer for holding pathnames. */
+/*
+ * tomoyo_page_buffer is a structure which is used for holding a pathname
+ * obtained from "struct dentry" and "struct vfsmount" pair.
+ * As of now, it is 4096 bytes. If users complain that 4096 bytes is too small
+ * (because TOMOYO escapes non ASCII printable characters using \ooo format),
+ * we will make the buffer larger.
+ */
 struct tomoyo_page_buffer {
 	char buffer[4096];
 };
 
-/* Structure for holding a token. */
+/*
+ * tomoyo_path_info is a structure which is used for holding a string data
+ * used by TOMOYO.
+ * This structure has several fields for supporting pattern matching.
+ *
+ * (1) "name" is the '\0' terminated string data.
+ * (2) "hash" is full_name_hash(name, strlen(name)).
+ *     This allows tomoyo_pathcmp() to compare by hash before actually compare
+ *     using strcmp().
+ * (3) "const_len" is the length of the initial segment of "name" which
+ *     consists entirely of non wildcard characters. In other words, the length
+ *     which we can compare two strings using strncmp().
+ * (4) "is_dir" is a bool which is true if "name" ends with "/",
+ *     false otherwise.
+ *     TOMOYO distinguishes directory and non-directory. A directory ends with
+ *     "/" and non-directory does not end with "/".
+ * (5) "is_patterned" is a bool which is true if "name" contains wildcard
+ *     characters, false otherwise. This allows TOMOYO to use "hash" and
+ *     strcmp() for string comparison if "is_patterned" is false.
+ * (6) "depth" is calculated using the number of "/" characters in "name".
+ *     This allows TOMOYO to avoid comparing two pathnames which never match
+ *     (e.g. whether "/var/www/html/index.html" matches "/tmp/sh-thd-\$").
+ */
 struct tomoyo_path_info {
 	const char *name;
 	u32 hash;          /* = full_name_hash(name, strlen(name)) */
-	u16 total_len;     /* = strlen(name)                       */
 	u16 const_len;     /* = tomoyo_const_part_length(name)     */
 	bool is_dir;       /* = tomoyo_strendswith(name, "/")      */
 	bool is_patterned; /* = tomoyo_path_contains_pattern(name) */
@@ -51,7 +78,20 @@
  */
 #define TOMOYO_MAX_PATHNAME_LEN 4000
 
-/* Structure for holding requested pathname. */
+/*
+ * tomoyo_path_info_with_data is a structure which is used for holding a
+ * pathname obtained from "struct dentry" and "struct vfsmount" pair.
+ *
+ * "struct tomoyo_path_info_with_data" consists of "struct tomoyo_path_info"
+ * and buffer for the pathname, while "struct tomoyo_page_buffer" consists of
+ * buffer for the pathname only.
+ *
+ * "struct tomoyo_path_info_with_data" is intended to allow TOMOYO to release
+ * both "struct tomoyo_path_info" and buffer for the pathname by single kfree()
+ * so that we don't need to return two pointers to the caller. If the caller
+ * puts "struct tomoyo_path_info" on stack memory, we will be able to remove
+ * "struct tomoyo_path_info_with_data".
+ */
 struct tomoyo_path_info_with_data {
 	/* Keep "head" first, for this pointer is passed to tomoyo_free(). */
 	struct tomoyo_path_info head;
@@ -61,7 +101,15 @@
 };
 
 /*
- * Common header for holding ACL entries.
+ * tomoyo_acl_info is a structure which is used for holding
+ *
+ *  (1) "list" which is linked to the ->acl_info_list of
+ *      "struct tomoyo_domain_info"
+ *  (2) "type" which tells
+ *      (a) type & 0x7F : type of the entry (either
+ *          "struct tomoyo_single_path_acl_record" or
+ *          "struct tomoyo_double_path_acl_record")
+ *      (b) type & 0x80 : whether the entry is marked as "deleted".
  *
  * Packing "struct tomoyo_acl_info" allows
  * "struct tomoyo_single_path_acl_record" to embed "u16" and
@@ -81,7 +129,28 @@
 /* This ACL entry is deleted.           */
 #define TOMOYO_ACL_DELETED        0x80
 
-/* Structure for domain information. */
+/*
+ * tomoyo_domain_info is a structure which is used for holding permissions
+ * (e.g. "allow_read /lib/libc-2.5.so") given to each domain.
+ * It has following fields.
+ *
+ *  (1) "list" which is linked to tomoyo_domain_list .
+ *  (2) "acl_info_list" which is linked to "struct tomoyo_acl_info".
+ *  (3) "domainname" which holds the name of the domain.
+ *  (4) "profile" which remembers profile number assigned to this domain.
+ *  (5) "is_deleted" is a bool which is true if this domain is marked as
+ *      "deleted", false otherwise.
+ *  (6) "quota_warned" is a bool which is used for suppressing warning message
+ *      when learning mode learned too much entries.
+ *  (7) "flags" which remembers this domain's attributes.
+ *
+ * A domain's lifecycle is an analogy of files on / directory.
+ * Multiple domains with the same domainname cannot be created (as with
+ * creating files with the same filename fails with -EEXIST).
+ * If a process reached a domain, that process can reside in that domain after
+ * that domain is marked as "deleted" (as with a process can access an already
+ * open()ed file after that file was unlink()ed).
+ */
 struct tomoyo_domain_info {
 	struct list_head list;
 	struct list_head acl_info_list;
@@ -108,10 +177,18 @@
 #define TOMOYO_DOMAIN_FLAGS_TRANSITION_FAILED        2
 
 /*
- * Structure for "allow_read/write", "allow_execute", "allow_read",
- * "allow_write", "allow_create", "allow_unlink", "allow_mkdir", "allow_rmdir",
- * "allow_mkfifo", "allow_mksock", "allow_mkblock", "allow_mkchar",
- * "allow_truncate", "allow_symlink" and "allow_rewrite" directive.
+ * tomoyo_single_path_acl_record is a structure which is used for holding an
+ * entry with one pathname operation (e.g. open(), mkdir()).
+ * It has following fields.
+ *
+ *  (1) "head" which is a "struct tomoyo_acl_info".
+ *  (2) "perm" which is a bitmask of permitted operations.
+ *  (3) "filename" is the pathname.
+ *
+ * Directives held by this structure are "allow_read/write", "allow_execute",
+ * "allow_read", "allow_write", "allow_create", "allow_unlink", "allow_mkdir",
+ * "allow_rmdir", "allow_mkfifo", "allow_mksock", "allow_mkblock",
+ * "allow_mkchar", "allow_truncate", "allow_symlink" and "allow_rewrite".
  */
 struct tomoyo_single_path_acl_record {
 	struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_SINGLE_PATH_ACL */
@@ -120,7 +197,18 @@
 	const struct tomoyo_path_info *filename;
 };
 
-/* Structure for "allow_rename" and "allow_link" directive. */
+/*
+ * tomoyo_double_path_acl_record is a structure which is used for holding an
+ * entry with two pathnames operation (i.e. link() and rename()).
+ * It has following fields.
+ *
+ *  (1) "head" which is a "struct tomoyo_acl_info".
+ *  (2) "perm" which is a bitmask of permitted operations.
+ *  (3) "filename1" is the source/old pathname.
+ *  (4) "filename2" is the destination/new pathname.
+ *
+ * Directives held by this structure are "allow_rename" and "allow_link".
+ */
 struct tomoyo_double_path_acl_record {
 	struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_DOUBLE_PATH_ACL */
 	u8 perm;
@@ -153,7 +241,29 @@
 #define TOMOYO_VERBOSE                       2
 #define TOMOYO_MAX_CONTROL_INDEX             3
 
-/* Structure for reading/writing policy via securityfs interfaces. */
+/*
+ * tomoyo_io_buffer is a structure which is used for reading and modifying
+ * configuration via /sys/kernel/security/tomoyo/ interface.
+ * It has many fields. ->read_var1 , ->read_var2 , ->write_var1 are used as
+ * cursors.
+ *
+ * Since the content of /sys/kernel/security/tomoyo/domain_policy is a list of
+ * "struct tomoyo_domain_info" entries and each "struct tomoyo_domain_info"
+ * entry has a list of "struct tomoyo_acl_info", we need two cursors when
+ * reading (one is for traversing tomoyo_domain_list and the other is for
+ * traversing "struct tomoyo_acl_info"->acl_info_list ).
+ *
+ * If a line written to /sys/kernel/security/tomoyo/domain_policy starts with
+ * "select ", TOMOYO seeks the cursor ->read_var1 and ->write_var1 to the
+ * domain with the domainname specified by the rest of that line (NULL is set
+ * if seek failed).
+ * If a line written to /sys/kernel/security/tomoyo/domain_policy starts with
+ * "delete ", TOMOYO deletes an entry or a domain specified by the rest of that
+ * line (->write_var1 is set to NULL if a domain was deleted).
+ * If a line written to /sys/kernel/security/tomoyo/domain_policy starts with
+ * neither "select " nor "delete ", an entry or a domain specified by that line
+ * is appended.
+ */
 struct tomoyo_io_buffer {
 	int (*read) (struct tomoyo_io_buffer *);
 	int (*write) (struct tomoyo_io_buffer *);
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 2d67487..1d8b169 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -19,11 +19,63 @@
 /* The initial domain. */
 struct tomoyo_domain_info tomoyo_kernel_domain;
 
-/* The list for "struct tomoyo_domain_info". */
+/*
+ * tomoyo_domain_list is used for holding list of domains.
+ * The ->acl_info_list of "struct tomoyo_domain_info" is used for holding
+ * permissions (e.g. "allow_read /lib/libc-2.5.so") given to each domain.
+ *
+ * An entry is added by
+ *
+ * # ( echo "<kernel>"; echo "allow_execute /sbin/init" ) > \
+ *                                  /sys/kernel/security/tomoyo/domain_policy
+ *
+ * and is deleted by
+ *
+ * # ( echo "<kernel>"; echo "delete allow_execute /sbin/init" ) > \
+ *                                  /sys/kernel/security/tomoyo/domain_policy
+ *
+ * and all entries are retrieved by
+ *
+ * # cat /sys/kernel/security/tomoyo/domain_policy
+ *
+ * A domain is added by
+ *
+ * # echo "<kernel>" > /sys/kernel/security/tomoyo/domain_policy
+ *
+ * and is deleted by
+ *
+ * # echo "delete <kernel>" > /sys/kernel/security/tomoyo/domain_policy
+ *
+ * and all domains are retrieved by
+ *
+ * # grep '^<kernel>' /sys/kernel/security/tomoyo/domain_policy
+ *
+ * Normally, a domainname is monotonically getting longer because a domainname
+ * which the process will belong to if an execve() operation succeeds is
+ * defined as a concatenation of "current domainname" + "pathname passed to
+ * execve()".
+ * See tomoyo_domain_initializer_list and tomoyo_domain_keeper_list for
+ * exceptions.
+ */
 LIST_HEAD(tomoyo_domain_list);
 DECLARE_RWSEM(tomoyo_domain_list_lock);
 
-/* Structure for "initialize_domain" and "no_initialize_domain" keyword. */
+/*
+ * tomoyo_domain_initializer_entry is a structure which is used for holding
+ * "initialize_domain" and "no_initialize_domain" entries.
+ * It has following fields.
+ *
+ *  (1) "list" which is linked to tomoyo_domain_initializer_list .
+ *  (2) "domainname" which is "a domainname" or "the last component of a
+ *      domainname". This field is NULL if "from" clause is not specified.
+ *  (3) "program" which is a program's pathname.
+ *  (4) "is_deleted" is a bool which is true if marked as deleted, false
+ *      otherwise.
+ *  (5) "is_not" is a bool which is true if "no_initialize_domain", false
+ *      otherwise.
+ *  (6) "is_last_name" is a bool which is true if "domainname" is "the last
+ *      component of a domainname", false otherwise.
+ */
 struct tomoyo_domain_initializer_entry {
 	struct list_head list;
 	const struct tomoyo_path_info *domainname;    /* This may be NULL */
@@ -34,7 +86,23 @@
 	bool is_last_name;
 };
 
-/* Structure for "keep_domain" and "no_keep_domain" keyword. */
+/*
+ * tomoyo_domain_keeper_entry is a structure which is used for holding
+ * "keep_domain" and "no_keep_domain" entries.
+ * It has following fields.
+ *
+ *  (1) "list" which is linked to tomoyo_domain_keeper_list .
+ *  (2) "domainname" which is "a domainname" or "the last component of a
+ *      domainname".
+ *  (3) "program" which is a program's pathname.
+ *      This field is NULL if "from" clause is not specified.
+ *  (4) "is_deleted" is a bool which is true if marked as deleted, false
+ *      otherwise.
+ *  (5) "is_not" is a bool which is true if "no_initialize_domain", false
+ *      otherwise.
+ *  (6) "is_last_name" is a bool which is true if "domainname" is "the last
+ *      component of a domainname", false otherwise.
+ */
 struct tomoyo_domain_keeper_entry {
 	struct list_head list;
 	const struct tomoyo_path_info *domainname;
@@ -45,7 +113,16 @@
 	bool is_last_name;
 };
 
-/* Structure for "alias" keyword. */
+/*
+ * tomoyo_alias_entry is a structure which is used for holding "alias" entries.
+ * It has following fields.
+ *
+ *  (1) "list" which is linked to tomoyo_alias_list .
+ *  (2) "original_name" which is a dereferenced pathname.
+ *  (3) "aliased_name" which is a symlink's pathname.
+ *  (4) "is_deleted" is a bool which is true if marked as deleted, false
+ *      otherwise.
+ */
 struct tomoyo_alias_entry {
 	struct list_head list;
 	const struct tomoyo_path_info *original_name;
@@ -67,14 +144,12 @@
 {
 	/* We need to serialize because this is bitfield operation. */
 	static DEFINE_SPINLOCK(lock);
-	/***** CRITICAL SECTION START *****/
 	spin_lock(&lock);
 	if (!is_delete)
 		domain->flags |= flags;
 	else
 		domain->flags &= ~flags;
 	spin_unlock(&lock);
-	/***** CRITICAL SECTION END *****/
 }
 
 /**
@@ -94,7 +169,42 @@
 	return cp0;
 }
 
-/* The list for "struct tomoyo_domain_initializer_entry". */
+/*
+ * tomoyo_domain_initializer_list is used for holding list of programs which
+ * triggers reinitialization of domainname. Normally, a domainname is
+ * monotonically getting longer. But sometimes, we restart daemon programs.
+ * It would be convenient for us that "a daemon started upon system boot" and
+ * "the daemon restarted from console" belong to the same domain. Thus, TOMOYO
+ * provides a way to shorten domainnames.
+ *
+ * An entry is added by
+ *
+ * # echo 'initialize_domain /usr/sbin/httpd' > \
+ *                               /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and is deleted by
+ *
+ * # echo 'delete initialize_domain /usr/sbin/httpd' > \
+ *                               /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and all entries are retrieved by
+ *
+ * # grep ^initialize_domain /sys/kernel/security/tomoyo/exception_policy
+ *
+ * In the example above, /usr/sbin/httpd will belong to
+ * "<kernel> /usr/sbin/httpd" domain.
+ *
+ * You may specify a domainname using "from" keyword.
+ * "initialize_domain /usr/sbin/httpd from <kernel> /etc/rc.d/init.d/httpd"
+ * will cause "/usr/sbin/httpd" executed from "<kernel> /etc/rc.d/init.d/httpd"
+ * domain to belong to "<kernel> /usr/sbin/httpd" domain.
+ *
+ * You may add "no_" prefix to "initialize_domain".
+ * "initialize_domain /usr/sbin/httpd" and
+ * "no_initialize_domain /usr/sbin/httpd from <kernel> /etc/rc.d/init.d/httpd"
+ * will cause "/usr/sbin/httpd" to belong to "<kernel> /usr/sbin/httpd" domain
+ * unless executed from "<kernel> /etc/rc.d/init.d/httpd" domain.
+ */
 static LIST_HEAD(tomoyo_domain_initializer_list);
 static DECLARE_RWSEM(tomoyo_domain_initializer_list_lock);
 
@@ -135,7 +245,6 @@
 	saved_program = tomoyo_save_name(program);
 	if (!saved_program)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_domain_initializer_list_lock);
 	list_for_each_entry(ptr, &tomoyo_domain_initializer_list, list) {
 		if (ptr->is_not != is_not ||
@@ -161,7 +270,6 @@
 	error = 0;
  out:
 	up_write(&tomoyo_domain_initializer_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -193,13 +301,12 @@
 			from = " from ";
 			domain = ptr->domainname->name;
 		}
-		if (!tomoyo_io_printf(head,
-				      "%s" TOMOYO_KEYWORD_INITIALIZE_DOMAIN
-				      "%s%s%s\n", no, ptr->program->name, from,
-				      domain)) {
-			done = false;
+		done = tomoyo_io_printf(head,
+					"%s" TOMOYO_KEYWORD_INITIALIZE_DOMAIN
+					"%s%s%s\n", no, ptr->program->name,
+					from, domain);
+		if (!done)
 			break;
-		}
 	}
 	up_read(&tomoyo_domain_initializer_list_lock);
 	return done;
@@ -273,7 +380,44 @@
 	return flag;
 }
 
-/* The list for "struct tomoyo_domain_keeper_entry". */
+/*
+ * tomoyo_domain_keeper_list is used for holding list of domainnames which
+ * suppresses domain transition. Normally, a domainname is monotonically
+ * getting longer. But sometimes, we want to suppress domain transition.
+ * It would be convenient for us that programs executed from a login session
+ * belong to the same domain. Thus, TOMOYO provides a way to suppress domain
+ * transition.
+ *
+ * An entry is added by
+ *
+ * # echo 'keep_domain <kernel> /usr/sbin/sshd /bin/bash' > \
+ *                              /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and is deleted by
+ *
+ * # echo 'delete keep_domain <kernel> /usr/sbin/sshd /bin/bash' > \
+ *                              /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and all entries are retrieved by
+ *
+ * # grep ^keep_domain /sys/kernel/security/tomoyo/exception_policy
+ *
+ * In the example above, any process which belongs to
+ * "<kernel> /usr/sbin/sshd /bin/bash" domain will remain in that domain,
+ * unless explicitly specified by "initialize_domain" or "no_keep_domain".
+ *
+ * You may specify a program using "from" keyword.
+ * "keep_domain /bin/pwd from <kernel> /usr/sbin/sshd /bin/bash"
+ * will cause "/bin/pwd" executed from "<kernel> /usr/sbin/sshd /bin/bash"
+ * domain to remain in "<kernel> /usr/sbin/sshd /bin/bash" domain.
+ *
+ * You may add "no_" prefix to "keep_domain".
+ * "keep_domain <kernel> /usr/sbin/sshd /bin/bash" and
+ * "no_keep_domain /usr/bin/passwd from <kernel> /usr/sbin/sshd /bin/bash" will
+ * cause "/usr/bin/passwd" to belong to
+ * "<kernel> /usr/sbin/sshd /bin/bash /usr/bin/passwd" domain, unless
+ * explicitly specified by "initialize_domain".
+ */
 static LIST_HEAD(tomoyo_domain_keeper_list);
 static DECLARE_RWSEM(tomoyo_domain_keeper_list_lock);
 
@@ -296,7 +440,6 @@
 	struct tomoyo_domain_keeper_entry *ptr;
 	const struct tomoyo_path_info *saved_domainname;
 	const struct tomoyo_path_info *saved_program = NULL;
-	static DEFINE_MUTEX(lock);
 	int error = -ENOMEM;
 	bool is_last_name = false;
 
@@ -315,7 +458,6 @@
 	saved_domainname = tomoyo_save_name(domainname);
 	if (!saved_domainname)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_domain_keeper_list_lock);
 	list_for_each_entry(ptr, &tomoyo_domain_keeper_list, list) {
 		if (ptr->is_not != is_not ||
@@ -341,7 +483,6 @@
 	error = 0;
  out:
 	up_write(&tomoyo_domain_keeper_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -394,13 +535,12 @@
 			from = " from ";
 			program = ptr->program->name;
 		}
-		if (!tomoyo_io_printf(head,
-				      "%s" TOMOYO_KEYWORD_KEEP_DOMAIN
-				      "%s%s%s\n", no, program, from,
-				      ptr->domainname->name)) {
-			done = false;
+		done = tomoyo_io_printf(head,
+					"%s" TOMOYO_KEYWORD_KEEP_DOMAIN
+					"%s%s%s\n", no, program, from,
+					ptr->domainname->name);
+		if (!done)
 			break;
-		}
 	}
 	up_read(&tomoyo_domain_keeper_list_lock);
 	return done;
@@ -446,7 +586,36 @@
 	return flag;
 }
 
-/* The list for "struct tomoyo_alias_entry". */
+/*
+ * tomoyo_alias_list is used for holding list of symlink's pathnames which are
+ * allowed to be passed to an execve() request. Normally, the domainname which
+ * the current process will belong to after execve() succeeds is calculated
+ * using dereferenced pathnames. But some programs behave differently depending
+ * on the name passed to argv[0]. For busybox, calculating domainname using
+ * dereferenced pathnames will cause all programs in the busybox to belong to
+ * the same domain. Thus, TOMOYO provides a way to allow use of symlink's
+ * pathname for checking execve()'s permission and calculating domainname which
+ * the current process will belong to after execve() succeeds.
+ *
+ * An entry is added by
+ *
+ * # echo 'alias /bin/busybox /bin/cat' > \
+ *                            /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and is deleted by
+ *
+ * # echo 'delete alias /bin/busybox /bin/cat' > \
+ *                            /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and all entries are retrieved by
+ *
+ * # grep ^alias /sys/kernel/security/tomoyo/exception_policy
+ *
+ * In the example above, if /bin/cat is a symlink to /bin/busybox and execution
+ * of /bin/cat is requested, permission is checked for /bin/cat rather than
+ * /bin/busybox and domainname which the current process will belong to after
+ * execve() succeeds is calculated using /bin/cat rather than /bin/busybox .
+ */
 static LIST_HEAD(tomoyo_alias_list);
 static DECLARE_RWSEM(tomoyo_alias_list_lock);
 
@@ -476,7 +645,6 @@
 	saved_aliased_name = tomoyo_save_name(aliased_name);
 	if (!saved_original_name || !saved_aliased_name)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_alias_list_lock);
 	list_for_each_entry(ptr, &tomoyo_alias_list, list) {
 		if (ptr->original_name != saved_original_name ||
@@ -499,7 +667,6 @@
 	error = 0;
  out:
 	up_write(&tomoyo_alias_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -522,12 +689,11 @@
 		ptr = list_entry(pos, struct tomoyo_alias_entry, list);
 		if (ptr->is_deleted)
 			continue;
-		if (!tomoyo_io_printf(head, TOMOYO_KEYWORD_ALIAS "%s %s\n",
-				      ptr->original_name->name,
-				      ptr->aliased_name->name)) {
-			done = false;
+		done = tomoyo_io_printf(head, TOMOYO_KEYWORD_ALIAS "%s %s\n",
+					ptr->original_name->name,
+					ptr->aliased_name->name);
+		if (!done)
 			break;
-		}
 	}
 	up_read(&tomoyo_alias_list_lock);
 	return done;
@@ -567,7 +733,6 @@
 
 	name.name = domainname;
 	tomoyo_fill_path_info(&name);
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_domain_list_lock);
 	/* Is there an active domain? */
 	list_for_each_entry(domain, &tomoyo_domain_list, list) {
@@ -581,7 +746,6 @@
 		break;
 	}
 	up_write(&tomoyo_domain_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return 0;
 }
 
@@ -600,7 +764,6 @@
 	struct tomoyo_domain_info *domain = NULL;
 	const struct tomoyo_path_info *saved_domainname;
 
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_domain_list_lock);
 	domain = tomoyo_find_domain(domainname);
 	if (domain)
@@ -619,7 +782,6 @@
 		    domain->domainname != saved_domainname)
 			continue;
 		flag = false;
-		/***** CRITICAL SECTION START *****/
 		read_lock(&tasklist_lock);
 		for_each_process(p) {
 			if (tomoyo_real_domain(p) != domain)
@@ -628,7 +790,6 @@
 			break;
 		}
 		read_unlock(&tasklist_lock);
-		/***** CRITICAL SECTION END *****/
 		if (flag)
 			continue;
 		list_for_each_entry(ptr, &domain->acl_info_list, list) {
@@ -651,7 +812,6 @@
 	}
  out:
 	up_write(&tomoyo_domain_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return domain;
 }
 
@@ -739,7 +899,7 @@
 	}
 
 	/* Check execute permission. */
-	retval = tomoyo_check_exec_perm(old_domain, &r, tmp);
+	retval = tomoyo_check_exec_perm(old_domain, &r);
 	if (retval < 0)
 		goto out;
 
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index 2316da8..5ae3a57 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -14,21 +14,50 @@
 #include "realpath.h"
 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
 
-/* Structure for "allow_read" keyword. */
+/*
+ * tomoyo_globally_readable_file_entry is a structure which is used for holding
+ * "allow_read" entries.
+ * It has following fields.
+ *
+ *  (1) "list" which is linked to tomoyo_globally_readable_list .
+ *  (2) "filename" is a pathname which is allowed to open(O_RDONLY).
+ *  (3) "is_deleted" is a bool which is true if marked as deleted, false
+ *      otherwise.
+ */
 struct tomoyo_globally_readable_file_entry {
 	struct list_head list;
 	const struct tomoyo_path_info *filename;
 	bool is_deleted;
 };
 
-/* Structure for "file_pattern" keyword. */
+/*
+ * tomoyo_pattern_entry is a structure which is used for holding
+ * "tomoyo_pattern_list" entries.
+ * It has following fields.
+ *
+ *  (1) "list" which is linked to tomoyo_pattern_list .
+ *  (2) "pattern" is a pathname pattern which is used for converting pathnames
+ *      to pathname patterns during learning mode.
+ *  (3) "is_deleted" is a bool which is true if marked as deleted, false
+ *      otherwise.
+ */
 struct tomoyo_pattern_entry {
 	struct list_head list;
 	const struct tomoyo_path_info *pattern;
 	bool is_deleted;
 };
 
-/* Structure for "deny_rewrite" keyword. */
+/*
+ * tomoyo_no_rewrite_entry is a structure which is used for holding
+ * "deny_rewrite" entries.
+ * It has following fields.
+ *
+ *  (1) "list" which is linked to tomoyo_no_rewrite_list .
+ *  (2) "pattern" is a pathname which is by default not permitted to modify
+ *      already existing content.
+ *  (3) "is_deleted" is a bool which is true if marked as deleted, false
+ *      otherwise.
+ */
 struct tomoyo_no_rewrite_entry {
 	struct list_head list;
 	const struct tomoyo_path_info *pattern;
@@ -141,7 +170,31 @@
 					 struct tomoyo_domain_info *
 					 const domain, const bool is_delete);
 
-/* The list for "struct tomoyo_globally_readable_file_entry". */
+/*
+ * tomoyo_globally_readable_list is used for holding list of pathnames which
+ * are by default allowed to be open()ed for reading by any process.
+ *
+ * An entry is added by
+ *
+ * # echo 'allow_read /lib/libc-2.5.so' > \
+ *                               /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and is deleted by
+ *
+ * # echo 'delete allow_read /lib/libc-2.5.so' > \
+ *                               /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and all entries are retrieved by
+ *
+ * # grep ^allow_read /sys/kernel/security/tomoyo/exception_policy
+ *
+ * In the example above, any process is allowed to
+ * open("/lib/libc-2.5.so", O_RDONLY).
+ * One exception is, if the domain which current process belongs to is marked
+ * as "ignore_global_allow_read", current process can't do so unless explicitly
+ * given "allow_read /lib/libc-2.5.so" to the domain which current process
+ * belongs to.
+ */
 static LIST_HEAD(tomoyo_globally_readable_list);
 static DECLARE_RWSEM(tomoyo_globally_readable_list_lock);
 
@@ -166,7 +219,6 @@
 	saved_filename = tomoyo_save_name(filename);
 	if (!saved_filename)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_globally_readable_list_lock);
 	list_for_each_entry(ptr, &tomoyo_globally_readable_list, list) {
 		if (ptr->filename != saved_filename)
@@ -187,7 +239,6 @@
 	error = 0;
  out:
 	up_write(&tomoyo_globally_readable_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -249,17 +300,44 @@
 				 list);
 		if (ptr->is_deleted)
 			continue;
-		if (!tomoyo_io_printf(head, TOMOYO_KEYWORD_ALLOW_READ "%s\n",
-				      ptr->filename->name)) {
-			done = false;
+		done = tomoyo_io_printf(head, TOMOYO_KEYWORD_ALLOW_READ "%s\n",
+					ptr->filename->name);
+		if (!done)
 			break;
-		}
 	}
 	up_read(&tomoyo_globally_readable_list_lock);
 	return done;
 }
 
-/* The list for "struct tomoyo_pattern_entry". */
+/* tomoyo_pattern_list is used for holding list of pathnames which are used for
+ * converting pathnames to pathname patterns during learning mode.
+ *
+ * An entry is added by
+ *
+ * # echo 'file_pattern /proc/\$/mounts' > \
+ *                             /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and is deleted by
+ *
+ * # echo 'delete file_pattern /proc/\$/mounts' > \
+ *                             /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and all entries are retrieved by
+ *
+ * # grep ^file_pattern /sys/kernel/security/tomoyo/exception_policy
+ *
+ * In the example above, if a process which belongs to a domain which is in
+ * learning mode requested open("/proc/1/mounts", O_RDONLY),
+ * "allow_read /proc/\$/mounts" is automatically added to the domain which that
+ * process belongs to.
+ *
+ * It is not a desirable behavior that we have to use /proc/\$/ instead of
+ * /proc/self/ when current process needs to access only current process's
+ * information. As of now, LSM version of TOMOYO is using __d_path() for
+ * calculating pathname. Non LSM version of TOMOYO is using its own function
+ * which pretends as if /proc/self/ is not a symlink; so that we can forbid
+ * current process from accessing other process's information.
+ */
 static LIST_HEAD(tomoyo_pattern_list);
 static DECLARE_RWSEM(tomoyo_pattern_list_lock);
 
@@ -284,7 +362,6 @@
 	saved_pattern = tomoyo_save_name(pattern);
 	if (!saved_pattern)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_pattern_list_lock);
 	list_for_each_entry(ptr, &tomoyo_pattern_list, list) {
 		if (saved_pattern != ptr->pattern)
@@ -305,7 +382,6 @@
 	error = 0;
  out:
 	up_write(&tomoyo_pattern_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -373,17 +449,44 @@
 		ptr = list_entry(pos, struct tomoyo_pattern_entry, list);
 		if (ptr->is_deleted)
 			continue;
-		if (!tomoyo_io_printf(head, TOMOYO_KEYWORD_FILE_PATTERN "%s\n",
-				      ptr->pattern->name)) {
-			done = false;
+		done = tomoyo_io_printf(head, TOMOYO_KEYWORD_FILE_PATTERN
+					"%s\n", ptr->pattern->name);
+		if (!done)
 			break;
-		}
 	}
 	up_read(&tomoyo_pattern_list_lock);
 	return done;
 }
 
-/* The list for "struct tomoyo_no_rewrite_entry". */
+/*
+ * tomoyo_no_rewrite_list is used for holding list of pathnames which are by
+ * default forbidden to modify already written content of a file.
+ *
+ * An entry is added by
+ *
+ * # echo 'deny_rewrite /var/log/messages' > \
+ *                              /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and is deleted by
+ *
+ * # echo 'delete deny_rewrite /var/log/messages' > \
+ *                              /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and all entries are retrieved by
+ *
+ * # grep ^deny_rewrite /sys/kernel/security/tomoyo/exception_policy
+ *
+ * In the example above, if a process requested to rewrite /var/log/messages ,
+ * the process can't rewrite unless the domain which that process belongs to
+ * has "allow_rewrite /var/log/messages" entry.
+ *
+ * It is not a desirable behavior that we have to add "\040(deleted)" suffix
+ * when we want to allow rewriting already unlink()ed file. As of now,
+ * LSM version of TOMOYO is using __d_path() for calculating pathname.
+ * Non LSM version of TOMOYO is using its own function which doesn't append
+ * " (deleted)" suffix if the file is already unlink()ed; so that we don't
+ * need to worry whether the file is already unlink()ed or not.
+ */
 static LIST_HEAD(tomoyo_no_rewrite_list);
 static DECLARE_RWSEM(tomoyo_no_rewrite_list_lock);
 
@@ -407,7 +510,6 @@
 	saved_pattern = tomoyo_save_name(pattern);
 	if (!saved_pattern)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_no_rewrite_list_lock);
 	list_for_each_entry(ptr, &tomoyo_no_rewrite_list, list) {
 		if (ptr->pattern != saved_pattern)
@@ -428,7 +530,6 @@
 	error = 0;
  out:
 	up_write(&tomoyo_no_rewrite_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -489,11 +590,10 @@
 		ptr = list_entry(pos, struct tomoyo_no_rewrite_entry, list);
 		if (ptr->is_deleted)
 			continue;
-		if (!tomoyo_io_printf(head, TOMOYO_KEYWORD_DENY_REWRITE "%s\n",
-				      ptr->pattern->name)) {
-			done = false;
+		done = tomoyo_io_printf(head, TOMOYO_KEYWORD_DENY_REWRITE
+					"%s\n", ptr->pattern->name);
+		if (!done)
 			break;
-		}
 	}
 	up_read(&tomoyo_no_rewrite_list_lock);
 	return done;
@@ -745,7 +845,6 @@
 	saved_filename = tomoyo_save_name(filename);
 	if (!saved_filename)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_domain_acl_info_list_lock);
 	if (is_delete)
 		goto delete;
@@ -800,7 +899,6 @@
 	}
  out:
 	up_write(&tomoyo_domain_acl_info_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -836,7 +934,6 @@
 	saved_filename2 = tomoyo_save_name(filename2);
 	if (!saved_filename1 || !saved_filename2)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_domain_acl_info_list_lock);
 	if (is_delete)
 		goto delete;
@@ -884,7 +981,6 @@
 	}
  out:
 	up_write(&tomoyo_domain_acl_info_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -1025,13 +1121,11 @@
  *
  * @domain:   Pointer to "struct tomoyo_domain_info".
  * @filename: Check permission for "execute".
- * @tmp:      Buffer for temporary use.
  *
  * Returns 0 on success, negativevalue otherwise.
  */
 int tomoyo_check_exec_perm(struct tomoyo_domain_info *domain,
-			   const struct tomoyo_path_info *filename,
-			   struct tomoyo_page_buffer *tmp)
+			   const struct tomoyo_path_info *filename)
 {
 	const u8 mode = tomoyo_check_flags(domain, TOMOYO_MAC_FOR_FILE);
 
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index 40927a8..5f2e332 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -220,7 +220,6 @@
 		= roundup(size, max(sizeof(void *), sizeof(long)));
 	if (word_aligned_size > PATH_MAX)
 		return NULL;
-	/***** EXCLUSIVE SECTION START *****/
 	mutex_lock(&lock);
 	if (buf_used_len + word_aligned_size > PATH_MAX) {
 		if (!tomoyo_quota_for_elements ||
@@ -251,7 +250,6 @@
 		}
 	}
 	mutex_unlock(&lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return ptr;
 }
 
@@ -267,7 +265,16 @@
  */
 #define TOMOYO_MAX_HASH 256
 
-/* Structure for string data. */
+/*
+ * tomoyo_name_entry is a structure which is used for linking
+ * "struct tomoyo_path_info" into tomoyo_name_list .
+ *
+ * Since tomoyo_name_list manages a list of strings which are shared by
+ * multiple processes (whereas "struct tomoyo_path_info" inside
+ * "struct tomoyo_path_info_with_data" is not shared), a reference counter will
+ * be added to "struct tomoyo_name_entry" rather than "struct tomoyo_path_info"
+ * when TOMOYO starts supporting garbage collector.
+ */
 struct tomoyo_name_entry {
 	struct list_head list;
 	struct tomoyo_path_info entry;
@@ -281,10 +288,10 @@
 };
 
 /*
- * The list for "struct tomoyo_name_entry".
- *
- * This list is updated only inside tomoyo_save_name(), thus
- * no global mutex exists.
+ * tomoyo_name_list is used for holding string data used by TOMOYO.
+ * Since same string data is likely used for multiple times (e.g.
+ * "/lib/libc-2.5.so"), TOMOYO shares string data in the form of
+ * "const struct tomoyo_path_info *".
  */
 static struct list_head tomoyo_name_list[TOMOYO_MAX_HASH];
 
@@ -318,7 +325,6 @@
 		return NULL;
 	}
 	hash = full_name_hash((const unsigned char *) name, len - 1);
-	/***** EXCLUSIVE SECTION START *****/
 	mutex_lock(&lock);
 	list_for_each_entry(ptr, &tomoyo_name_list[hash % TOMOYO_MAX_HASH],
 			     list) {
@@ -366,7 +372,6 @@
 	}
  out:
 	mutex_unlock(&lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return ptr ? &ptr->entry : NULL;
 }
 
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index e42be5c..3194d09 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -262,6 +262,10 @@
 	return tomoyo_check_open_permission(tomoyo_domain(), &f->f_path, flags);
 }
 
+/*
+ * tomoyo_security_ops is a "struct security_operations" which is used for
+ * registering TOMOYO.
+ */
 static struct security_operations tomoyo_security_ops = {
 	.name                = "tomoyo",
 	.cred_prepare        = tomoyo_cred_prepare,
diff --git a/security/tomoyo/tomoyo.h b/security/tomoyo/tomoyo.h
index 41c6eba..0fd588a 100644
--- a/security/tomoyo/tomoyo.h
+++ b/security/tomoyo/tomoyo.h
@@ -17,13 +17,11 @@
 struct inode;
 struct linux_binprm;
 struct pt_regs;
-struct tomoyo_page_buffer;
 
 int tomoyo_check_file_perm(struct tomoyo_domain_info *domain,
 			   const char *filename, const u8 perm);
 int tomoyo_check_exec_perm(struct tomoyo_domain_info *domain,
-			   const struct tomoyo_path_info *filename,
-			   struct tomoyo_page_buffer *buf);
+			   const struct tomoyo_path_info *filename);
 int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
 				 struct path *path, const int flag);
 int tomoyo_check_1path_perm(struct tomoyo_domain_info *domain,
@@ -90,17 +88,10 @@
 	return current_cred()->security;
 }
 
-/* Caller holds tasklist_lock spinlock. */
 static inline struct tomoyo_domain_info *tomoyo_real_domain(struct task_struct
 							    *task)
 {
-	/***** CRITICAL SECTION START *****/
-	const struct cred *cred = get_task_cred(task);
-	struct tomoyo_domain_info *domain = cred->security;
-
-	put_cred(cred);
-	return domain;
-	/***** CRITICAL SECTION END *****/
+	return task_cred_xxx(task, security);
 }
 
 #endif /* !defined(_SECURITY_TOMOYO_TOMOYO_H) */
diff --git a/sound/aoa/fabrics/layout.c b/sound/aoa/fabrics/layout.c
index fbf5c93..586965f 100644
--- a/sound/aoa/fabrics/layout.c
+++ b/sound/aoa/fabrics/layout.c
@@ -1037,7 +1037,7 @@
 	}
 	ldev->selfptr_headphone.ptr = ldev;
 	ldev->selfptr_lineout.ptr = ldev;
-	sdev->ofdev.dev.driver_data = ldev;
+	dev_set_drvdata(&sdev->ofdev.dev, ldev);
 	list_add(&ldev->list, &layouts_list);
 	layouts_list_items++;
 
@@ -1081,7 +1081,7 @@
 
 static int aoa_fabric_layout_remove(struct soundbus_dev *sdev)
 {
-	struct layout_dev *ldev = sdev->ofdev.dev.driver_data;
+	struct layout_dev *ldev = dev_get_drvdata(&sdev->ofdev.dev);
 	int i;
 
 	for (i=0; i<MAX_CODECS_PER_BUS; i++) {
@@ -1114,7 +1114,7 @@
 #ifdef CONFIG_PM
 static int aoa_fabric_layout_suspend(struct soundbus_dev *sdev, pm_message_t state)
 {
-	struct layout_dev *ldev = sdev->ofdev.dev.driver_data;
+	struct layout_dev *ldev = dev_get_drvdata(&sdev->ofdev.dev);
 
 	if (ldev->gpio.methods && ldev->gpio.methods->all_amps_off)
 		ldev->gpio.methods->all_amps_off(&ldev->gpio);
@@ -1124,7 +1124,7 @@
 
 static int aoa_fabric_layout_resume(struct soundbus_dev *sdev)
 {
-	struct layout_dev *ldev = sdev->ofdev.dev.driver_data;
+	struct layout_dev *ldev = dev_get_drvdata(&sdev->ofdev.dev);
 
 	if (ldev->gpio.methods && ldev->gpio.methods->all_amps_off)
 		ldev->gpio.methods->all_amps_restore(&ldev->gpio);
diff --git a/sound/aoa/soundbus/i2sbus/core.c b/sound/aoa/soundbus/i2sbus/core.c
index 418c84c..4e3b819 100644
--- a/sound/aoa/soundbus/i2sbus/core.c
+++ b/sound/aoa/soundbus/i2sbus/core.c
@@ -358,14 +358,14 @@
 		return -ENODEV;
 	}
 
-	dev->ofdev.dev.driver_data = control;
+	dev_set_drvdata(&dev->ofdev.dev, control);
 
 	return 0;
 }
 
 static int i2sbus_remove(struct macio_dev* dev)
 {
-	struct i2sbus_control *control = dev->ofdev.dev.driver_data;
+	struct i2sbus_control *control = dev_get_drvdata(&dev->ofdev.dev);
 	struct i2sbus_dev *i2sdev, *tmp;
 
 	list_for_each_entry_safe(i2sdev, tmp, &control->list, item)
@@ -377,7 +377,7 @@
 #ifdef CONFIG_PM
 static int i2sbus_suspend(struct macio_dev* dev, pm_message_t state)
 {
-	struct i2sbus_control *control = dev->ofdev.dev.driver_data;
+	struct i2sbus_control *control = dev_get_drvdata(&dev->ofdev.dev);
 	struct codec_info_item *cii;
 	struct i2sbus_dev* i2sdev;
 	int err, ret = 0;
@@ -407,7 +407,7 @@
 
 static int i2sbus_resume(struct macio_dev* dev)
 {
-	struct i2sbus_control *control = dev->ofdev.dev.driver_data;
+	struct i2sbus_control *control = dev_get_drvdata(&dev->ofdev.dev);
 	struct codec_info_item *cii;
 	struct i2sbus_dev* i2sdev;
 	int err, ret = 0;
diff --git a/sound/core/Kconfig b/sound/core/Kconfig
index 7bbdda0..6061fb5 100644
--- a/sound/core/Kconfig
+++ b/sound/core/Kconfig
@@ -205,3 +205,5 @@
 
 config SND_VMASTER
 	bool
+
+source "sound/core/seq/Kconfig"
diff --git a/sound/core/init.c b/sound/core/init.c
index fd56afe..d5d40d7 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -152,15 +152,8 @@
 	card = kzalloc(sizeof(*card) + extra_size, GFP_KERNEL);
 	if (!card)
 		return -ENOMEM;
-	if (xid) {
-		if (!snd_info_check_reserved_words(xid)) {
-			snd_printk(KERN_ERR
-				   "given id string '%s' is reserved.\n", xid);
-			err = -EBUSY;
-			goto __error;
-		}
+	if (xid)
 		strlcpy(card->id, xid, sizeof(card->id));
-	}
 	err = 0;
 	mutex_lock(&snd_card_mutex);
 	if (idx < 0) {
@@ -483,22 +476,28 @@
 
 EXPORT_SYMBOL(snd_card_free);
 
-static void choose_default_id(struct snd_card *card)
+static void snd_card_set_id_no_lock(struct snd_card *card, const char *nid)
 {
 	int i, len, idx_flag = 0, loops = SNDRV_CARDS;
-	char *id, *spos;
+	const char *spos, *src;
+	char *id;
 	
-	id = spos = card->shortname;	
-	while (*id != '\0') {
-		if (*id == ' ')
-			spos = id + 1;
-		id++;
+	if (nid == NULL) {
+		id = card->shortname;
+		spos = src = id;
+		while (*id != '\0') {
+			if (*id == ' ')
+				spos = id + 1;
+			id++;
+		}
+	} else {
+		spos = src = nid;
 	}
 	id = card->id;
 	while (*spos != '\0' && !isalnum(*spos))
 		spos++;
 	if (isdigit(*spos))
-		*id++ = isalpha(card->shortname[0]) ? card->shortname[0] : 'D';
+		*id++ = isalpha(src[0]) ? src[0] : 'D';
 	while (*spos != '\0' && (size_t)(id - card->id) < sizeof(card->id) - 1) {
 		if (isalnum(*spos))
 			*id++ = *spos;
@@ -513,7 +512,7 @@
 
 	while (1) {
 	      	if (loops-- == 0) {
-      			snd_printk(KERN_ERR "unable to choose default card id (%s)\n", id);
+			snd_printk(KERN_ERR "unable to set card id (%s)\n", id);
       			strcpy(card->id, card->proc_root->name);
       			return;
       		}
@@ -539,14 +538,33 @@
 			spos = id + len - 2;
 			if ((size_t)len <= sizeof(card->id) - 2)
 				spos++;
-			*spos++ = '_';
-			*spos++ = '1';
-			*spos++ = '\0';
+			*(char *)spos++ = '_';
+			*(char *)spos++ = '1';
+			*(char *)spos++ = '\0';
 			idx_flag++;
 		}
 	}
 }
 
+/**
+ *  snd_card_set_id - set card identification name
+ *  @card: soundcard structure
+ *  @nid: new identification string
+ *
+ *  This function sets the card identification and checks for name
+ *  collisions.
+ */
+void snd_card_set_id(struct snd_card *card, const char *nid)
+{
+	/* check if user specified own card->id */
+	if (card->id[0] != '\0')
+		return;
+	mutex_lock(&snd_card_mutex);
+	snd_card_set_id_no_lock(card, nid);
+	mutex_unlock(&snd_card_mutex);
+}
+EXPORT_SYMBOL(snd_card_set_id);
+
 #ifndef CONFIG_SYSFS_DEPRECATED
 static ssize_t
 card_id_show_attr(struct device *dev,
@@ -640,8 +658,7 @@
 		mutex_unlock(&snd_card_mutex);
 		return 0;
 	}
-	if (card->id[0] == '\0')
-		choose_default_id(card);
+	snd_card_set_id_no_lock(card, card->id[0] == '\0' ? NULL : card->id);
 	snd_cards[card->number] = card;
 	mutex_unlock(&snd_card_mutex);
 	init_info_for_card(card);
diff --git a/sound/core/jack.c b/sound/core/jack.c
index d54d1a0..f705eec 100644
--- a/sound/core/jack.c
+++ b/sound/core/jack.c
@@ -63,7 +63,7 @@
 
 	/* Default to the sound card device. */
 	if (!jack->input_dev->dev.parent)
-		jack->input_dev->dev.parent = card->dev;
+		jack->input_dev->dev.parent = snd_card_get_device_link(card);
 
 	err = input_register_device(jack->input_dev);
 	if (err == 0)
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index dda000b..dbe406b 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -31,6 +31,7 @@
 #include <linux/time.h>
 #include <linux/vmalloc.h>
 #include <linux/moduleparam.h>
+#include <linux/math64.h>
 #include <linux/string.h>
 #include <sound/core.h>
 #include <sound/minors.h>
@@ -617,9 +618,7 @@
 #else
 	{
 		u64 bsize = (u64)runtime->oss.buffer_bytes * (u64)bytes;
-		u32 rem;
-		div64_32(&bsize, buffer_size, &rem);
-		return (long)bsize;
+		return div_u64(bsize, buffer_size);
 	}
 #endif
 }
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index d659995..333e4dd 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -22,6 +22,7 @@
 
 #include <linux/slab.h>
 #include <linux/time.h>
+#include <linux/math64.h>
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/info.h>
@@ -126,24 +127,37 @@
 }
 
 #ifdef CONFIG_SND_PCM_XRUN_DEBUG
-#define xrun_debug(substream)	((substream)->pstr->xrun_debug)
+#define xrun_debug(substream, mask)	((substream)->pstr->xrun_debug & (mask))
 #else
-#define xrun_debug(substream)	0
+#define xrun_debug(substream, mask)	0
 #endif
 
-#define dump_stack_on_xrun(substream) do {	\
-		if (xrun_debug(substream) > 1)	\
-			dump_stack();		\
+#define dump_stack_on_xrun(substream) do {		\
+		if (xrun_debug(substream, 2))		\
+			dump_stack();			\
 	} while (0)
 
+static void pcm_debug_name(struct snd_pcm_substream *substream,
+			   char *name, size_t len)
+{
+	snprintf(name, len, "pcmC%dD%d%c:%d",
+		 substream->pcm->card->number,
+		 substream->pcm->device,
+		 substream->stream ? 'c' : 'p',
+		 substream->number);
+}
+
 static void xrun(struct snd_pcm_substream *substream)
 {
+	struct snd_pcm_runtime *runtime = substream->runtime;
+
+	if (runtime->tstamp_mode == SNDRV_PCM_TSTAMP_ENABLE)
+		snd_pcm_gettime(runtime, (struct timespec *)&runtime->status->tstamp);
 	snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN);
-	if (xrun_debug(substream)) {
-		snd_printd(KERN_DEBUG "XRUN: pcmC%dD%d%c\n",
-			   substream->pcm->card->number,
-			   substream->pcm->device,
-			   substream->stream ? 'c' : 'p');
+	if (xrun_debug(substream, 1)) {
+		char name[16];
+		pcm_debug_name(substream, name, sizeof(name));
+		snd_printd(KERN_DEBUG "XRUN: %s\n", name);
 		dump_stack_on_xrun(substream);
 	}
 }
@@ -154,16 +168,16 @@
 {
 	snd_pcm_uframes_t pos;
 
-	if (runtime->tstamp_mode == SNDRV_PCM_TSTAMP_ENABLE)
-		snd_pcm_gettime(runtime, (struct timespec *)&runtime->status->tstamp);
 	pos = substream->ops->pointer(substream);
 	if (pos == SNDRV_PCM_POS_XRUN)
 		return pos; /* XRUN */
 	if (pos >= runtime->buffer_size) {
 		if (printk_ratelimit()) {
-			snd_printd(KERN_ERR  "BUG: stream = %i, pos = 0x%lx, "
+			char name[16];
+			pcm_debug_name(substream, name, sizeof(name));
+			snd_printd(KERN_ERR  "BUG: %s, pos = 0x%lx, "
 				   "buffer size = 0x%lx, period size = 0x%lx\n",
-				   substream->stream, pos, runtime->buffer_size,
+				   name, pos, runtime->buffer_size,
 				   runtime->period_size);
 		}
 		pos = 0;
@@ -197,7 +211,7 @@
 
 #define hw_ptr_error(substream, fmt, args...)				\
 	do {								\
-		if (xrun_debug(substream)) {				\
+		if (xrun_debug(substream, 1)) {				\
 			if (printk_ratelimit()) {			\
 				snd_printd("PCM: " fmt, ##args);	\
 			}						\
@@ -251,7 +265,7 @@
 	}
 
 	/* Do jiffies check only in xrun_debug mode */
-	if (!xrun_debug(substream))
+	if (!xrun_debug(substream, 4))
 		goto no_jiffies_check;
 
 	/* Skip the jiffies check for hardwares with BATCH flag.
@@ -261,6 +275,9 @@
 	if (runtime->hw.info & SNDRV_PCM_INFO_BATCH)
 		goto no_jiffies_check;
 	hdelta = new_hw_ptr - old_hw_ptr;
+	if (hdelta < runtime->delay)
+		goto no_jiffies_check;
+	hdelta -= runtime->delay;
 	jdelta = jiffies - runtime->hw_ptr_jiffies;
 	if (((hdelta * HZ) / runtime->rate) > jdelta + HZ/100) {
 		delta = jdelta /
@@ -294,14 +311,20 @@
 		hw_ptr_interrupt =
 			new_hw_ptr - new_hw_ptr % runtime->period_size;
 	}
+	runtime->hw_ptr_interrupt = hw_ptr_interrupt;
+
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
 	    runtime->silence_size > 0)
 		snd_pcm_playback_silence(substream, new_hw_ptr);
 
+	if (runtime->status->hw_ptr == new_hw_ptr)
+		return 0;
+
 	runtime->hw_ptr_base = hw_base;
 	runtime->status->hw_ptr = new_hw_ptr;
 	runtime->hw_ptr_jiffies = jiffies;
-	runtime->hw_ptr_interrupt = hw_ptr_interrupt;
+	if (runtime->tstamp_mode == SNDRV_PCM_TSTAMP_ENABLE)
+		snd_pcm_gettime(runtime, (struct timespec *)&runtime->status->tstamp);
 
 	return snd_pcm_update_hw_ptr_post(substream, runtime);
 }
@@ -342,8 +365,12 @@
 		new_hw_ptr = hw_base + pos;
 	}
 	/* Do jiffies check only in xrun_debug mode */
-	if (xrun_debug(substream) &&
-	    ((delta * HZ) / runtime->rate) > jdelta + HZ/100) {
+	if (!xrun_debug(substream, 4))
+		goto no_jiffies_check;
+	if (delta < runtime->delay)
+		goto no_jiffies_check;
+	delta -= runtime->delay;
+	if (((delta * HZ) / runtime->rate) > jdelta + HZ/100) {
 		hw_ptr_error(substream,
 			     "hw_ptr skipping! "
 			     "(pos=%ld, delta=%ld, period=%ld, jdelta=%lu/%lu)\n",
@@ -352,13 +379,19 @@
 			     ((delta * HZ) / runtime->rate));
 		return 0;
 	}
+ no_jiffies_check:
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
 	    runtime->silence_size > 0)
 		snd_pcm_playback_silence(substream, new_hw_ptr);
 
+	if (runtime->status->hw_ptr == new_hw_ptr)
+		return 0;
+
 	runtime->hw_ptr_base = hw_base;
 	runtime->status->hw_ptr = new_hw_ptr;
 	runtime->hw_ptr_jiffies = jiffies;
+	if (runtime->tstamp_mode == SNDRV_PCM_TSTAMP_ENABLE)
+		snd_pcm_gettime(runtime, (struct timespec *)&runtime->status->tstamp);
 
 	return snd_pcm_update_hw_ptr_post(substream, runtime);
 }
@@ -452,7 +485,7 @@
 		*r = 0;
 		return UINT_MAX;
 	}
-	div64_32(&n, c, r);
+	n = div_u64_rem(n, c, r);
 	if (n >= UINT_MAX) {
 		*r = 0;
 		return UINT_MAX;
@@ -1524,6 +1557,23 @@
 	return 0;
 }
 
+static int snd_pcm_lib_ioctl_fifo_size(struct snd_pcm_substream *substream,
+				       void *arg)
+{
+	struct snd_pcm_hw_params *params = arg;
+	snd_pcm_format_t format;
+	int channels, width;
+
+	params->fifo_size = substream->runtime->hw.fifo_size;
+	if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_FIFO_IN_FRAMES)) {
+		format = params_format(params);
+		channels = params_channels(params);
+		width = snd_pcm_format_physical_width(format);
+		params->fifo_size /= width * channels;
+	}
+	return 0;
+}
+
 /**
  * snd_pcm_lib_ioctl - a generic PCM ioctl callback
  * @substream: the pcm substream instance
@@ -1545,6 +1595,8 @@
 		return snd_pcm_lib_ioctl_reset(substream, arg);
 	case SNDRV_PCM_IOCTL1_CHANNEL_INFO:
 		return snd_pcm_lib_ioctl_channel_info(substream, arg);
+	case SNDRV_PCM_IOCTL1_FIFO_SIZE:
+		return snd_pcm_lib_ioctl_fifo_size(substream, arg);
 	}
 	return -ENXIO;
 }
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index b5da656..84da3ba 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -312,9 +312,18 @@
 
 	hw = &substream->runtime->hw;
 	if (!params->info)
-		params->info = hw->info;
-	if (!params->fifo_size)
-		params->fifo_size = hw->fifo_size;
+		params->info = hw->info & ~SNDRV_PCM_INFO_FIFO_IN_FRAMES;
+	if (!params->fifo_size) {
+		if (snd_mask_min(&params->masks[SNDRV_PCM_HW_PARAM_FORMAT]) ==
+		    snd_mask_max(&params->masks[SNDRV_PCM_HW_PARAM_FORMAT]) &&
+                    snd_mask_min(&params->masks[SNDRV_PCM_HW_PARAM_CHANNELS]) ==
+                    snd_mask_max(&params->masks[SNDRV_PCM_HW_PARAM_CHANNELS])) {
+			changed = substream->ops->ioctl(substream,
+					SNDRV_PCM_IOCTL1_FIFO_SIZE, params);
+			if (params < 0)
+				return changed;
+		}
+	}
 	params->rmask = 0;
 	return 0;
 }
@@ -587,14 +596,15 @@
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
 		status->avail = snd_pcm_playback_avail(runtime);
 		if (runtime->status->state == SNDRV_PCM_STATE_RUNNING ||
-		    runtime->status->state == SNDRV_PCM_STATE_DRAINING)
+		    runtime->status->state == SNDRV_PCM_STATE_DRAINING) {
 			status->delay = runtime->buffer_size - status->avail;
-		else
+			status->delay += runtime->delay;
+		} else
 			status->delay = 0;
 	} else {
 		status->avail = snd_pcm_capture_avail(runtime);
 		if (runtime->status->state == SNDRV_PCM_STATE_RUNNING)
-			status->delay = status->avail;
+			status->delay = status->avail + runtime->delay;
 		else
 			status->delay = 0;
 	}
@@ -2410,6 +2420,7 @@
 			n = snd_pcm_playback_hw_avail(runtime);
 		else
 			n = snd_pcm_capture_avail(runtime);
+		n += runtime->delay;
 		break;
 	case SNDRV_PCM_STATE_XRUN:
 		err = -EPIPE;
diff --git a/sound/core/seq/Kconfig b/sound/core/seq/Kconfig
new file mode 100644
index 0000000..b851fd8
--- /dev/null
+++ b/sound/core/seq/Kconfig
@@ -0,0 +1,16 @@
+# define SND_XXX_SEQ to min(SND_SEQUENCER,SND_XXX)
+
+config SND_RAWMIDI_SEQ
+	def_tristate SND_SEQUENCER && SND_RAWMIDI
+
+config SND_OPL3_LIB_SEQ
+	def_tristate SND_SEQUENCER && SND_OPL3_LIB
+
+config SND_OPL4_LIB_SEQ
+	def_tristate SND_SEQUENCER && SND_OPL4_LIB
+
+config SND_SBAWE_SEQ
+	def_tristate SND_SEQUENCER && SND_SBAWE
+
+config SND_EMU10K1_SEQ
+	def_tristate SND_SEQUENCER && SND_EMU10K1
diff --git a/sound/core/seq/Makefile b/sound/core/seq/Makefile
index 0695937..1bcb360 100644
--- a/sound/core/seq/Makefile
+++ b/sound/core/seq/Makefile
@@ -17,14 +17,6 @@
 snd-seq-dummy-objs := seq_dummy.o
 snd-seq-virmidi-objs := seq_virmidi.o
 
-#
-# this function returns:
-#   "m" - CONFIG_SND_SEQUENCER is m
-#   <empty string> - CONFIG_SND_SEQUENCER is undefined
-#   otherwise parameter #1 value
-#
-sequencer = $(if $(subst y,,$(CONFIG_SND_SEQUENCER)),$(if $(1),m),$(if $(CONFIG_SND_SEQUENCER),$(1)))
-
 obj-$(CONFIG_SND_SEQUENCER) += snd-seq.o snd-seq-device.o
 ifeq ($(CONFIG_SND_SEQUENCER_OSS),y)
 obj-$(CONFIG_SND_SEQUENCER) += snd-seq-midi-event.o
@@ -33,8 +25,8 @@
 
 # Toplevel Module Dependency
 obj-$(CONFIG_SND_VIRMIDI) += snd-seq-virmidi.o snd-seq-midi-event.o
-obj-$(call sequencer,$(CONFIG_SND_RAWMIDI)) += snd-seq-midi.o snd-seq-midi-event.o
-obj-$(call sequencer,$(CONFIG_SND_OPL3_LIB)) += snd-seq-midi-event.o snd-seq-midi-emul.o
-obj-$(call sequencer,$(CONFIG_SND_OPL4_LIB)) += snd-seq-midi-event.o snd-seq-midi-emul.o
-obj-$(call sequencer,$(CONFIG_SND_SBAWE)) += snd-seq-midi-emul.o snd-seq-virmidi.o
-obj-$(call sequencer,$(CONFIG_SND_EMU10K1)) += snd-seq-midi-emul.o snd-seq-virmidi.o
+obj-$(CONFIG_SND_RAWMIDI_SEQ) += snd-seq-midi.o snd-seq-midi-event.o
+obj-$(CONFIG_SND_OPL3_LIB_SEQ) += snd-seq-midi-event.o snd-seq-midi-emul.o
+obj-$(CONFIG_SND_OPL4_LIB_SEQ) += snd-seq-midi-event.o snd-seq-midi-emul.o
+obj-$(CONFIG_SND_SBAWE_SEQ) += snd-seq-midi-emul.o snd-seq-virmidi.o
+obj-$(CONFIG_SND_EMU10K1_SEQ) += snd-seq-midi-emul.o snd-seq-virmidi.o
diff --git a/sound/drivers/opl3/Makefile b/sound/drivers/opl3/Makefile
index 19767a6..7f2c2a1 100644
--- a/sound/drivers/opl3/Makefile
+++ b/sound/drivers/opl3/Makefile
@@ -7,14 +7,6 @@
 snd-opl3-synth-y := opl3_seq.o opl3_midi.o opl3_drums.o
 snd-opl3-synth-$(CONFIG_SND_SEQUENCER_OSS) += opl3_oss.o
 
-#
-# this function returns:
-#   "m" - CONFIG_SND_SEQUENCER is m
-#   <empty string> - CONFIG_SND_SEQUENCER is undefined
-#   otherwise parameter #1 value
-#
-sequencer = $(if $(subst y,,$(CONFIG_SND_SEQUENCER)),$(if $(1),m),$(if $(CONFIG_SND_SEQUENCER),$(1)))
-
 obj-$(CONFIG_SND_OPL3_LIB) += snd-opl3-lib.o
 obj-$(CONFIG_SND_OPL4_LIB) += snd-opl3-lib.o
-obj-$(call sequencer,$(CONFIG_SND_OPL3_LIB)) += snd-opl3-synth.o
+obj-$(CONFIG_SND_OPL3_LIB_SEQ) += snd-opl3-synth.o
diff --git a/sound/drivers/opl4/Makefile b/sound/drivers/opl4/Makefile
index d178b39..b94009b 100644
--- a/sound/drivers/opl4/Makefile
+++ b/sound/drivers/opl4/Makefile
@@ -6,13 +6,5 @@
 snd-opl4-lib-objs := opl4_lib.o opl4_mixer.o opl4_proc.o
 snd-opl4-synth-objs := opl4_seq.o opl4_synth.o yrw801.o
 
-#
-# this function returns:
-#   "m" - CONFIG_SND_SEQUENCER is m
-#   <empty string> - CONFIG_SND_SEQUENCER is undefined
-#   otherwise parameter #1 value
-#
-sequencer = $(if $(subst y,,$(CONFIG_SND_SEQUENCER)),$(if $(1),m),$(if $(CONFIG_SND_SEQUENCER),$(1)))
-
 obj-$(CONFIG_SND_OPL4_LIB) += snd-opl4-lib.o
-obj-$(call sequencer,$(CONFIG_SND_OPL4_LIB)) += snd-opl4-synth.o
+obj-$(CONFIG_SND_OPL4_LIB_SEQ) += snd-opl4-synth.o
diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig
index c6942a4..51a7e37 100644
--- a/sound/isa/Kconfig
+++ b/sound/isa/Kconfig
@@ -177,15 +177,18 @@
 	  will be called snd-es18xx.
 
 config SND_SC6000
-	tristate "Gallant SC-6000, Audio Excel DSP 16"
+	tristate "Gallant SC-6000/6600/7000 and Audio Excel DSP 16"
 	depends on HAS_IOPORT
 	select SND_WSS_LIB
 	select SND_OPL3_LIB
 	select SND_MPU401_UART
 	help
-	  Say Y here to include support for Gallant SC-6000 card and clones:
+	  Say Y here to include support for Gallant SC-6000, SC-6600, SC-7000
+	  cards and clones:
 	  Audio Excel DSP 16 and Zoltrix AV302.
 
+	  These cards are based on CompuMedia ASC-9308 or ASC-9408 chips.
+
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-sc6000.
 
diff --git a/sound/isa/es1688/es1688.c b/sound/isa/es1688/es1688.c
index 442b081..07df201 100644
--- a/sound/isa/es1688/es1688.c
+++ b/sound/isa/es1688/es1688.c
@@ -193,7 +193,7 @@
 static struct isa_driver snd_es1688_driver = {
 	.match		= snd_es1688_match,
 	.probe		= snd_es1688_probe,
-	.remove		= snd_es1688_remove,
+	.remove		= __devexit_p(snd_es1688_remove),
 #if 0	/* FIXME */
 	.suspend	= snd_es1688_suspend,
 	.resume		= snd_es1688_resume,
diff --git a/sound/isa/gus/gusextreme.c b/sound/isa/gus/gusextreme.c
index 180a8de..65e4b18 100644
--- a/sound/isa/gus/gusextreme.c
+++ b/sound/isa/gus/gusextreme.c
@@ -348,7 +348,7 @@
 static struct isa_driver snd_gusextreme_driver = {
 	.match		= snd_gusextreme_match,
 	.probe		= snd_gusextreme_probe,
-	.remove		= snd_gusextreme_remove,
+	.remove		= __devexit_p(snd_gusextreme_remove),
 #if 0	/* FIXME */
 	.suspend	= snd_gusextreme_suspend,
 	.resume		= snd_gusextreme_resume,
diff --git a/sound/isa/sb/Makefile b/sound/isa/sb/Makefile
index 1098a56..faeffceb 100644
--- a/sound/isa/sb/Makefile
+++ b/sound/isa/sb/Makefile
@@ -13,14 +13,6 @@
 snd-emu8000-synth-objs := emu8000_synth.o emu8000_callback.o emu8000_patch.o emu8000_pcm.o
 snd-es968-objs := es968.o
 
-#
-# this function returns:
-#   "m" - CONFIG_SND_SEQUENCER is m
-#   <empty string> - CONFIG_SND_SEQUENCER is undefined
-#   otherwise parameter #1 value
-#
-sequencer = $(if $(subst y,,$(CONFIG_SND_SEQUENCER)),$(if $(1),m),$(if $(CONFIG_SND_SEQUENCER),$(1)))
-
 # Toplevel Module Dependency
 obj-$(CONFIG_SND_SB_COMMON) += snd-sb-common.o
 obj-$(CONFIG_SND_SB16_DSP) += snd-sb16-dsp.o
@@ -33,4 +25,4 @@
   obj-$(CONFIG_SND_SB16) += snd-sb16-csp.o
   obj-$(CONFIG_SND_SBAWE) += snd-sb16-csp.o
 endif
-obj-$(call sequencer,$(CONFIG_SND_SBAWE)) += snd-emu8000-synth.o
+obj-$(CONFIG_SND_SBAWE_SEQ) += snd-emu8000-synth.o
diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c
index 7820106..9a8bbf6 100644
--- a/sound/isa/sc6000.c
+++ b/sound/isa/sc6000.c
@@ -2,6 +2,8 @@
  *  Driver for Gallant SC-6000 soundcard. This card is also known as
  *  Audio Excel DSP 16 or Zoltrix AV302.
  *  These cards use CompuMedia ASC-9308 chip + AD1848 codec.
+ *  SC-6600 and SC-7000 cards are also supported. They are based on
+ *  CompuMedia ASC-9408 chip and CS4231 codec.
  *
  *  Copyright (C) 2007 Krzysztof Helt <krzysztof.h1@wp.pl>
  *
@@ -54,6 +56,7 @@
 						/* 0x300, 0x310, 0x320, 0x330 */
 static int mpu_irq[SNDRV_CARDS] = SNDRV_DEFAULT_IRQ;	/* 5, 7, 9, 10, 0 */
 static int dma[SNDRV_CARDS] = SNDRV_DEFAULT_DMA;	/* 0, 1, 3 */
+static bool joystick[SNDRV_CARDS] = { [0 ... (SNDRV_CARDS-1)] = false };
 
 module_param_array(index, int, NULL, 0444);
 MODULE_PARM_DESC(index, "Index value for sc-6000 based soundcard.");
@@ -73,6 +76,8 @@
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for sc-6000 driver.");
 module_param_array(dma, int, NULL, 0444);
 MODULE_PARM_DESC(dma, "DMA # for sc-6000 driver.");
+module_param_array(joystick, bool, NULL, 0444);
+MODULE_PARM_DESC(joystick, "Enable gameport.");
 
 /*
  * Commands of SC6000's DSP (SBPRO+special).
@@ -191,7 +196,7 @@
 	return val;
 }
 
-static __devinit int sc6000_wait_data(char __iomem *vport)
+static int sc6000_wait_data(char __iomem *vport)
 {
 	int loop = 1000;
 	unsigned char val = 0;
@@ -206,7 +211,7 @@
 	return -EAGAIN;
 }
 
-static __devinit int sc6000_read(char __iomem *vport)
+static int sc6000_read(char __iomem *vport)
 {
 	if (sc6000_wait_data(vport))
 		return -EBUSY;
@@ -215,7 +220,7 @@
 
 }
 
-static __devinit int sc6000_write(char __iomem *vport, int cmd)
+static int sc6000_write(char __iomem *vport, int cmd)
 {
 	unsigned char val;
 	int loop = 500000;
@@ -276,8 +281,33 @@
 }
 
 /* detection and initialization */
-static int __devinit sc6000_cfg_write(char __iomem *vport,
-				      unsigned char softcfg)
+static int __devinit sc6000_hw_cfg_write(char __iomem *vport, const int *cfg)
+{
+	if (sc6000_write(vport, COMMAND_6C) < 0) {
+		snd_printk(KERN_WARNING "CMD 0x%x: failed!\n", COMMAND_6C);
+		return -EIO;
+	}
+	if (sc6000_write(vport, COMMAND_5C) < 0) {
+		snd_printk(KERN_ERR "CMD 0x%x: failed!\n", COMMAND_5C);
+		return -EIO;
+	}
+	if (sc6000_write(vport, cfg[0]) < 0) {
+		snd_printk(KERN_ERR "DATA 0x%x: failed!\n", cfg[0]);
+		return -EIO;
+	}
+	if (sc6000_write(vport, cfg[1]) < 0) {
+		snd_printk(KERN_ERR "DATA 0x%x: failed!\n", cfg[1]);
+		return -EIO;
+	}
+	if (sc6000_write(vport, COMMAND_C5) < 0) {
+		snd_printk(KERN_ERR "CMD 0x%x: failed!\n", COMMAND_C5);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int sc6000_cfg_write(char __iomem *vport, unsigned char softcfg)
 {
 
 	if (sc6000_write(vport, WRITE_MDIRQ_CFG)) {
@@ -291,7 +321,7 @@
 	return 0;
 }
 
-static int __devinit sc6000_setup_board(char __iomem *vport, int config)
+static int sc6000_setup_board(char __iomem *vport, int config)
 {
 	int loop = 10;
 
@@ -334,16 +364,39 @@
 	return 0;
 }
 
-static int __devinit sc6000_init_board(char __iomem *vport, int irq, int dma,
-					char __iomem *vmss_port, int mpu_irq)
+static void __devinit sc6000_hw_cfg_encode(char __iomem *vport, int *cfg,
+					   long xport, long xmpu,
+					   long xmss_port, int joystick)
+{
+	cfg[0] = 0;
+	cfg[1] = 0;
+	if (xport == 0x240)
+		cfg[0] |= 1;
+	if (xmpu != SNDRV_AUTO_PORT) {
+		cfg[0] |= (xmpu & 0x30) >> 2;
+		cfg[1] |= 0x20;
+	}
+	if (xmss_port == 0xe80)
+		cfg[0] |= 0x10;
+	cfg[0] |= 0x40;		/* always set */
+	if (!joystick)
+		cfg[0] |= 0x02;
+	cfg[1] |= 0x80;		/* enable WSS system */
+	cfg[1] &= ~0x40;	/* disable IDE */
+	snd_printd("hw cfg %x, %x\n", cfg[0], cfg[1]);
+}
+
+static int __devinit sc6000_init_board(char __iomem *vport,
+					char __iomem *vmss_port, int dev)
 {
 	char answer[15];
 	char version[2];
-	int mss_config = sc6000_irq_to_softcfg(irq) |
-			 sc6000_dma_to_softcfg(dma);
+	int mss_config = sc6000_irq_to_softcfg(irq[dev]) |
+			 sc6000_dma_to_softcfg(dma[dev]);
 	int config = mss_config |
-		     sc6000_mpu_irq_to_softcfg(mpu_irq);
+		     sc6000_mpu_irq_to_softcfg(mpu_irq[dev]);
 	int err;
+	int old = 0;
 
 	err = sc6000_dsp_reset(vport);
 	if (err < 0) {
@@ -360,7 +413,6 @@
 	/*
 	 * My SC-6000 card return "SC-6000" in DSPCopyright, so
 	 * if we have something different, we have to be warned.
-	 * Mine returns "SC-6000A " - KH
 	 */
 	if (strncmp("SC-6000", answer, 7))
 		snd_printk(KERN_WARNING "Warning: non SC-6000 audio card!\n");
@@ -372,13 +424,32 @@
 	printk(KERN_INFO PFX "Detected model: %s, DSP version %d.%d\n",
 		answer, version[0], version[1]);
 
-	/*
-	 * 0x0A == (IRQ 7, DMA 1, MIRQ 0)
-	 */
-	err = sc6000_cfg_write(vport, 0x0a);
+	/* set configuration */
+	sc6000_write(vport, COMMAND_5C);
+	if (sc6000_read(vport) < 0)
+		old = 1;
+
+	if (!old) {
+		int cfg[2];
+		sc6000_hw_cfg_encode(vport, &cfg[0], port[dev], mpu_port[dev],
+				     mss_port[dev], joystick[dev]);
+		if (sc6000_hw_cfg_write(vport, cfg) < 0) {
+			snd_printk(KERN_ERR "sc6000_hw_cfg_write: failed!\n");
+			return -EIO;
+		}
+	}
+	err = sc6000_setup_board(vport, config);
 	if (err < 0) {
-		snd_printk(KERN_ERR "sc6000_cfg_write: failed!\n");
-		return -EFAULT;
+		snd_printk(KERN_ERR "sc6000_setup_board: failed!\n");
+		return -ENODEV;
+	}
+
+	sc6000_dsp_reset(vport);
+
+	if (!old) {
+		sc6000_write(vport, COMMAND_60);
+		sc6000_write(vport, 0x02);
+		sc6000_dsp_reset(vport);
 	}
 
 	err = sc6000_setup_board(vport, config);
@@ -386,10 +457,9 @@
 		snd_printk(KERN_ERR "sc6000_setup_board: failed!\n");
 		return -ENODEV;
 	}
-
 	err = sc6000_init_mss(vport, config, vmss_port, mss_config);
 	if (err < 0) {
-		snd_printk(KERN_ERR "Can not initialize "
+		snd_printk(KERN_ERR "Cannot initialize "
 			   "Microsoft Sound System mode.\n");
 		return -ENODEV;
 	}
@@ -485,14 +555,16 @@
 	struct snd_card *card;
 	struct snd_wss *chip;
 	struct snd_opl3 *opl3;
-	char __iomem *vport;
+	char __iomem **vport;
 	char __iomem *vmss_port;
 
 
-	err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card);
+	err = snd_card_create(index[dev], id[dev], THIS_MODULE, sizeof(vport),
+				&card);
 	if (err < 0)
 		return err;
 
+	vport = card->private_data;
 	if (xirq == SNDRV_AUTO_IRQ) {
 		xirq = snd_legacy_find_free_irq(possible_irqs);
 		if (xirq < 0) {
@@ -517,8 +589,8 @@
 		err = -EBUSY;
 		goto err_exit;
 	}
-	vport = devm_ioport_map(devptr, port[dev], 0x10);
-	if (!vport) {
+	*vport = devm_ioport_map(devptr, port[dev], 0x10);
+	if (*vport == NULL) {
 		snd_printk(KERN_ERR PFX
 			   "I/O port cannot be iomaped.\n");
 		err = -EBUSY;
@@ -533,7 +605,7 @@
 		goto err_unmap1;
 	}
 	vmss_port = devm_ioport_map(devptr, mss_port[dev], 4);
-	if (!vport) {
+	if (!vmss_port) {
 		snd_printk(KERN_ERR PFX
 			   "MSS port I/O cannot be iomaped.\n");
 		err = -EBUSY;
@@ -544,7 +616,7 @@
 		   port[dev], xirq, xdma,
 		   mpu_irq[dev] == SNDRV_AUTO_IRQ ? 0 : mpu_irq[dev]);
 
-	err = sc6000_init_board(vport, xirq, xdma, vmss_port, mpu_irq[dev]);
+	err = sc6000_init_board(*vport, vmss_port, dev);
 	if (err < 0)
 		goto err_unmap2;
 
@@ -552,7 +624,6 @@
 			     WSS_HW_DETECT, 0, &chip);
 	if (err < 0)
 		goto err_unmap2;
-	card->private_data = chip;
 
 	err = snd_wss_pcm(chip, 0, NULL);
 	if (err < 0) {
@@ -608,6 +679,7 @@
 	return 0;
 
 err_unmap2:
+	sc6000_setup_board(*vport, 0);
 	release_region(mss_port[dev], 4);
 err_unmap1:
 	release_region(port[dev], 0x10);
@@ -618,11 +690,17 @@
 
 static int __devexit snd_sc6000_remove(struct device *devptr, unsigned int dev)
 {
+	struct snd_card *card = dev_get_drvdata(devptr);
+	char __iomem **vport = card->private_data;
+
+	if (sc6000_setup_board(*vport, 0) < 0)
+		snd_printk(KERN_WARNING "sc6000_setup_board failed on exit!\n");
+
 	release_region(port[dev], 0x10);
 	release_region(mss_port[dev], 4);
 
-	snd_card_free(dev_get_drvdata(devptr));
 	dev_set_drvdata(devptr, NULL);
+	snd_card_free(card);
 	return 0;
 }
 
diff --git a/sound/mips/sgio2audio.c b/sound/mips/sgio2audio.c
index 66f3b48..e497525 100644
--- a/sound/mips/sgio2audio.c
+++ b/sound/mips/sgio2audio.c
@@ -619,8 +619,7 @@
 /* hw_free callback */
 static int snd_sgio2audio_pcm_hw_free(struct snd_pcm_substream *substream)
 {
-	if (substream->runtime->dma_area)
-		vfree(substream->runtime->dma_area);
+	vfree(substream->runtime->dma_area);
 	substream->runtime->dma_area = NULL;
 	return 0;
 }
diff --git a/sound/oss/Kconfig b/sound/oss/Kconfig
index 1ca7427..bcf2a06 100644
--- a/sound/oss/Kconfig
+++ b/sound/oss/Kconfig
@@ -561,7 +561,7 @@
 
 config SOUND_SH_DAC_AUDIO
 	tristate "SuperH DAC audio support"
-	depends on CPU_SH3
+	depends on CPU_SH3 && HIGH_RES_TIMERS
 
 config SOUND_SH_DAC_AUDIO_CHANNEL
 	int "DAC channel"
diff --git a/sound/oss/sh_dac_audio.c b/sound/oss/sh_dac_audio.c
index 78cfb66..b2ed875 100644
--- a/sound/oss/sh_dac_audio.c
+++ b/sound/oss/sh_dac_audio.c
@@ -18,47 +18,36 @@
 #include <linux/sound.h>
 #include <linux/soundcard.h>
 #include <linux/interrupt.h>
+#include <linux/hrtimer.h>
 #include <asm/io.h>
 #include <asm/uaccess.h>
 #include <asm/irq.h>
 #include <asm/delay.h>
 #include <asm/clock.h>
-#include <asm/cpu/dac.h>
-#include <asm/cpu/timer.h>
+#include <cpu/dac.h>
 #include <asm/machvec.h>
 #include <mach/hp6xx.h>
 #include <asm/hd64461.h>
 
 #define MODNAME "sh_dac_audio"
 
-#define TMU_TOCR_INIT	0x00
-
-#define TMU1_TCR_INIT	0x0020	/* Clock/4, rising edge; interrupt on */
-#define TMU1_TSTR_INIT  0x02	/* Bit to turn on TMU1 */
-
 #define BUFFER_SIZE 48000
 
 static int rate;
 static int empty;
 static char *data_buffer, *buffer_begin, *buffer_end;
 static int in_use, device_major;
+static struct hrtimer hrtimer;
+static ktime_t wakeups_per_second;
 
 static void dac_audio_start_timer(void)
 {
-	u8 tstr;
-
-	tstr = ctrl_inb(TMU_TSTR);
-	tstr |= TMU1_TSTR_INIT;
-	ctrl_outb(tstr, TMU_TSTR);
+	hrtimer_start(&hrtimer, wakeups_per_second, HRTIMER_MODE_REL);
 }
 
 static void dac_audio_stop_timer(void)
 {
-	u8 tstr;
-
-	tstr = ctrl_inb(TMU_TSTR);
-	tstr &= ~TMU1_TSTR_INIT;
-	ctrl_outb(tstr, TMU_TSTR);
+	hrtimer_cancel(&hrtimer);
 }
 
 static void dac_audio_reset(void)
@@ -77,38 +66,30 @@
 static void dac_audio_start(void)
 {
 	if (mach_is_hp6xx()) {
-		u16 v = inw(HD64461_GPADR);
+		u16 v = __raw_readw(HD64461_GPADR);
 		v &= ~HD64461_GPADR_SPEAKER;
-		outw(v, HD64461_GPADR);
+		__raw_writew(v, HD64461_GPADR);
 	}
 
 	sh_dac_enable(CONFIG_SOUND_SH_DAC_AUDIO_CHANNEL);
-	ctrl_outw(TMU1_TCR_INIT, TMU1_TCR);
 }
 static void dac_audio_stop(void)
 {
 	dac_audio_stop_timer();
 
 	if (mach_is_hp6xx()) {
-		u16 v = inw(HD64461_GPADR);
+		u16 v = __raw_readw(HD64461_GPADR);
 		v |= HD64461_GPADR_SPEAKER;
-		outw(v, HD64461_GPADR);
+		__raw_writew(v, HD64461_GPADR);
 	}
 
- 	sh_dac_output(0, CONFIG_SOUND_SH_DAC_AUDIO_CHANNEL);
+	sh_dac_output(0, CONFIG_SOUND_SH_DAC_AUDIO_CHANNEL);
 	sh_dac_disable(CONFIG_SOUND_SH_DAC_AUDIO_CHANNEL);
 }
 
 static void dac_audio_set_rate(void)
 {
-	unsigned long interval;
- 	struct clk *clk;
-
- 	clk = clk_get(NULL, "module_clk");
- 	interval = (clk_get_rate(clk) / 4) / rate;
- 	clk_put(clk);
-	ctrl_outl(interval, TMU1_TCOR);
-	ctrl_outl(interval, TMU1_TCNT);
+	wakeups_per_second = ktime_set(0, 1000000000 / rate);
 }
 
 static int dac_audio_ioctl(struct inode *inode, struct file *file,
@@ -265,32 +246,26 @@
       .release =	dac_audio_release,
 };
 
-static irqreturn_t timer1_interrupt(int irq, void *dev)
+static enum hrtimer_restart sh_dac_audio_timer(struct hrtimer *handle)
 {
-	unsigned long timer_status;
-
-	timer_status = ctrl_inw(TMU1_TCR);
-	timer_status &= ~0x100;
-	ctrl_outw(timer_status, TMU1_TCR);
-
 	if (!empty) {
 		sh_dac_output(*buffer_begin, CONFIG_SOUND_SH_DAC_AUDIO_CHANNEL);
 		buffer_begin++;
 
 		if (buffer_begin == data_buffer + BUFFER_SIZE)
 			buffer_begin = data_buffer;
-		if (buffer_begin == buffer_end) {
+		if (buffer_begin == buffer_end)
 			empty = 1;
-			dac_audio_stop_timer();
-		}
 	}
-	return IRQ_HANDLED;
+
+	if (!empty)
+		hrtimer_start(&hrtimer, wakeups_per_second, HRTIMER_MODE_REL);
+
+	return HRTIMER_NORESTART;
 }
 
 static int __init dac_audio_init(void)
 {
-	int retval;
-
 	if ((device_major = register_sound_dsp(&dac_audio_fops, -1)) < 0) {
 		printk(KERN_ERR "Cannot register dsp device");
 		return device_major;
@@ -306,21 +281,25 @@
 	rate = 8000;
 	dac_audio_set_rate();
 
-	retval =
-	    request_irq(TIMER1_IRQ, timer1_interrupt, IRQF_DISABLED, MODNAME, 0);
-	if (retval < 0) {
-		printk(KERN_ERR "sh_dac_audio: IRQ %d request failed\n",
-		       TIMER1_IRQ);
-		return retval;
-	}
+	/* Today: High Resolution Timer driven DAC playback.
+	 * The timer callback gets called once per sample. Ouch.
+	 *
+	 * Future: A much better approach would be to use the
+	 * SH7720 CMT+DMAC+DAC hardware combination like this:
+	 * - Program sample rate using CMT0 or CMT1
+	 * - Program DMAC to use CMT for timing and output to DAC
+	 * - Play sound using DMAC, let CPU sleep.
+	 * - While at it, rewrite this driver to use ALSA.
+	 */
+
+	hrtimer_init(&hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer.function = sh_dac_audio_timer;
 
 	return 0;
 }
 
 static void __exit dac_audio_exit(void)
 {
-	free_irq(TIMER1_IRQ, 0);
-
 	unregister_sound_dsp(device_major);
 	kfree((void *)data_buffer);
 }
diff --git a/sound/parisc/harmony.c b/sound/parisc/harmony.c
index 6055fd6..e924492 100644
--- a/sound/parisc/harmony.c
+++ b/sound/parisc/harmony.c
@@ -935,7 +935,7 @@
 	h->iobase = ioremap_nocache(padev->hpa.start, HARMONY_SIZE);
 	if (h->iobase == NULL) {
 		printk(KERN_ERR PFX "unable to remap hpa 0x%lx\n",
-		       padev->hpa.start);
+		       (unsigned long)padev->hpa.start);
 		err = -EBUSY;
 		goto free_and_ret;
 	}
@@ -1020,7 +1020,7 @@
 	.name = "harmony",
 	.id_table = snd_harmony_devtable,
 	.probe = snd_harmony_probe,
-	.remove = snd_harmony_remove,
+	.remove = __devexit_p(snd_harmony_remove),
 };
 
 static int __init 
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 93422e3..748f6b7 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -275,6 +275,16 @@
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-cs5535audio.
 
+config SND_CTXFI
+	tristate "Creative Sound Blaster X-Fi"
+	select SND_PCM
+	help
+	  If you want to use soundcards based on Creative Sound Blastr X-Fi
+	  boards with 20k1 or 20k2 chips, say Y here.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called snd-ctxfi.
+
 config SND_DARLA20
 	tristate "(Echoaudio) Darla20"
 	select FW_LOADER
@@ -532,6 +542,9 @@
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-hdsp.
 
+comment "Don't forget to add built-in firmwares for HDSP driver"
+	depends on SND_HDSP=y
+
 config SND_HDSPM
 	tristate "RME Hammerfall DSP MADI"
 	select SND_HWDEP
@@ -622,6 +635,16 @@
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-korg1212.
 
+config SND_LX6464ES
+	tristate "Digigram LX6464ES"
+	select SND_PCM
+	help
+	  Say Y here to include support for Digigram LX6464ES boards.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called snd-lx6464es.
+
+
 config SND_MAESTRO3
 	tristate "ESS Allegro/Maestro3"
 	select SND_AC97_CODEC
@@ -764,8 +787,8 @@
 	select SND_OXYGEN_LIB
 	help
 	  Say Y here to include support for sound cards based on the
-	  Asus AV100/AV200 chips, i.e., Xonar D1, DX, D2, D2X, and
-	  Essence STX.
+	  Asus AV100/AV200 chips, i.e., Xonar D1, DX, D2, D2X,
+	  Essence ST (Deluxe), and Essence STX.
 	  Support for the HDAV1.3 (Deluxe) is very experimental.
 
 	  To compile this driver as a module, choose M here: the module
diff --git a/sound/pci/Makefile b/sound/pci/Makefile
index 65b25d2..ecfc609 100644
--- a/sound/pci/Makefile
+++ b/sound/pci/Makefile
@@ -59,9 +59,11 @@
 	ali5451/ \
 	au88x0/ \
 	aw2/ \
+	ctxfi/ \
 	ca0106/ \
 	cs46xx/ \
 	cs5535audio/ \
+	lx6464es/ \
 	echoaudio/ \
 	emu10k1/ \
 	hda/ \
diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c
index 3906f5a..23f49f3 100644
--- a/sound/pci/au88x0/au88x0_core.c
+++ b/sound/pci/au88x0/au88x0_core.c
@@ -1255,8 +1255,8 @@
 	int temp;
 
 	temp = hwread(vortex->mmio, VORTEX_ADBDMA_STAT + (adbdma << 2));
-	temp = (dma->period_virt * dma->period_bytes) + (temp & POS_MASK);
-	return (temp);
+	temp = (dma->period_virt * dma->period_bytes) + (temp & (dma->period_bytes - 1));
+	return temp;
 }
 
 static void vortex_adbdma_startfifo(vortex_t * vortex, int adbdma)
@@ -1504,8 +1504,7 @@
 	int temp;
 
 	temp = hwread(vortex->mmio, VORTEX_WTDMA_STAT + (wtdma << 2));
-	//temp = (temp & POS_MASK) + (((temp>>WT_SUBBUF_SHIFT) & WT_SUBBUF_MASK)*(dma->cfg0&POS_MASK));
-	temp = (temp & POS_MASK) + ((dma->period_virt) * (dma->period_bytes));
+	temp = (dma->period_virt * dma->period_bytes) + (temp & (dma->period_bytes - 1));
 	return temp;
 }
 
@@ -2441,7 +2440,8 @@
 		spin_lock(&vortex->lock);
 		for (i = 0; i < NR_ADB; i++) {
 			if (vortex->dma_adb[i].fifo_status == FIFO_START) {
-				if (vortex_adbdma_bufshift(vortex, i)) ;
+				if (!vortex_adbdma_bufshift(vortex, i))
+					continue;
 				spin_unlock(&vortex->lock);
 				snd_pcm_period_elapsed(vortex->dma_adb[i].
 						       substream);
diff --git a/sound/pci/bt87x.c b/sound/pci/bt87x.c
index ce3f2e9..24585c6 100644
--- a/sound/pci/bt87x.c
+++ b/sound/pci/bt87x.c
@@ -810,6 +810,8 @@
 	BT_DEVICE(PCI_DEVICE_ID_BROOKTREE_878, 0x107d, 0x6606, GENERIC),
 	/* Voodoo TV 200 */
 	BT_DEVICE(PCI_DEVICE_ID_BROOKTREE_878, 0x121a, 0x3000, GENERIC),
+	/* Askey Computer Corp. MagicTView'99 */
+	BT_DEVICE(PCI_DEVICE_ID_BROOKTREE_878, 0x144f, 0x3000, GENERIC),
 	/* AVerMedia Studio No. 103, 203, ...? */
 	BT_DEVICE(PCI_DEVICE_ID_BROOKTREE_878, 0x1461, 0x0003, AVPHONE98),
 	/* Prolink PixelView PV-M4900 */
diff --git a/sound/pci/ca0106/ca0106_main.c b/sound/pci/ca0106/ca0106_main.c
index bfac30f..57b992a 100644
--- a/sound/pci/ca0106/ca0106_main.c
+++ b/sound/pci/ca0106/ca0106_main.c
@@ -1319,7 +1319,6 @@
         }
 
 	pcm->info_flags = 0;
-	pcm->dev_subclass = SNDRV_PCM_SUBCLASS_GENERIC_MIX;
 	strcpy(pcm->name, "CA0106");
 
 	for(substream = pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream; 
diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c
index c111efe..c8c6f43 100644
--- a/sound/pci/ca0106/ca0106_mixer.c
+++ b/sound/pci/ca0106/ca0106_mixer.c
@@ -739,7 +739,7 @@
 	} while (0)
 
 static __devinitdata
-DECLARE_TLV_DB_SCALE(snd_ca0106_master_db_scale, -6375, 50, 1);
+DECLARE_TLV_DB_SCALE(snd_ca0106_master_db_scale, -6375, 25, 1);
 
 static char *slave_vols[] __devinitdata = {
 	"Analog Front Playback Volume",
@@ -841,6 +841,9 @@
 					      snd_ca0106_master_db_scale);
 	if (!vmaster)
 		return -ENOMEM;
+	err = snd_ctl_add(card, vmaster);
+	if (err < 0)
+		return err;
 	add_slaves(card, vmaster, slave_vols);
 
 	if (emu->details->spi_dac == 1) {
@@ -848,8 +851,13 @@
 						      NULL);
 		if (!vmaster)
 			return -ENOMEM;
+		err = snd_ctl_add(card, vmaster);
+		if (err < 0)
+			return err;
 		add_slaves(card, vmaster, slave_sws);
 	}
+
+	strcpy(card->mixername, "CA0106");
         return 0;
 }
 
diff --git a/sound/pci/ctxfi/Makefile b/sound/pci/ctxfi/Makefile
new file mode 100644
index 0000000..15075f8
--- /dev/null
+++ b/sound/pci/ctxfi/Makefile
@@ -0,0 +1,5 @@
+snd-ctxfi-objs := xfi.o ctatc.o ctvmem.o ctpcm.o ctmixer.o ctresource.o \
+	ctsrc.o ctamixer.o ctdaio.o ctimap.o cthardware.o cttimer.o \
+	cthw20k2.o cthw20k1.o
+
+obj-$(CONFIG_SND_CTXFI) += snd-ctxfi.o
diff --git a/sound/pci/ctxfi/ct20k1reg.h b/sound/pci/ctxfi/ct20k1reg.h
new file mode 100644
index 0000000..f2e34e3
--- /dev/null
+++ b/sound/pci/ctxfi/ct20k1reg.h
@@ -0,0 +1,636 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ */
+
+#ifndef CT20K1REG_H
+#define CT20k1REG_H
+
+/* 20k1 registers */
+#define 	DSPXRAM_START 			0x000000
+#define 	DSPXRAM_END 			0x013FFC
+#define 	DSPAXRAM_START 			0x020000
+#define 	DSPAXRAM_END 			0x023FFC
+#define 	DSPYRAM_START 			0x040000
+#define 	DSPYRAM_END 			0x04FFFC
+#define 	DSPAYRAM_START 			0x020000
+#define 	DSPAYRAM_END 			0x063FFC
+#define 	DSPMICRO_START 			0x080000
+#define 	DSPMICRO_END 			0x0B3FFC
+#define 	DSP0IO_START 			0x100000
+#define 	DSP0IO_END	 		0x101FFC
+#define 	AUDIORINGIPDSP0_START 		0x100000
+#define 	AUDIORINGIPDSP0_END 		0x1003FC
+#define 	AUDIORINGOPDSP0_START 		0x100400
+#define 	AUDIORINGOPDSP0_END 		0x1007FC
+#define 	AUDPARARINGIODSP0_START 	0x100800
+#define 	AUDPARARINGIODSP0_END	 	0x100BFC
+#define 	DSP0LOCALHWREG_START 		0x100C00
+#define 	DSP0LOCALHWREG_END	 	0x100C3C
+#define 	DSP0XYRAMAGINDEX_START 		0x100C40
+#define 	DSP0XYRAMAGINDEX_END	 	0x100C5C
+#define 	DSP0XYRAMAGMDFR_START 		0x100C60
+#define 	DSP0XYRAMAGMDFR_END	 	0x100C7C
+#define 	DSP0INTCONTLVEC_START 		0x100C80
+#define 	DSP0INTCONTLVEC_END	 	0x100CD8
+#define 	INTCONTLGLOBALREG_START 	0x100D1C
+#define 	INTCONTLGLOBALREG_END	 	0x100D3C
+#define		HOSTINTFPORTADDRCONTDSP0	0x100D40
+#define		HOSTINTFPORTDATADSP0		0x100D44
+#define		TIME0PERENBDSP0			0x100D60
+#define		TIME0COUNTERDSP0		0x100D64
+#define		TIME1PERENBDSP0			0x100D68
+#define		TIME1COUNTERDSP0		0x100D6C
+#define		TIME2PERENBDSP0			0x100D70
+#define		TIME2COUNTERDSP0		0x100D74
+#define		TIME3PERENBDSP0			0x100D78
+#define		TIME3COUNTERDSP0		0x100D7C
+#define 	XRAMINDOPERREFNOUP_STARTDSP0 	0x100D80
+#define 	XRAMINDOPERREFNOUP_ENDDSP0	0x100D9C
+#define 	XRAMINDOPERREFUP_STARTDSP0	0x100DA0
+#define 	XRAMINDOPERREFUP_ENDDSP0	0x100DBC
+#define 	YRAMINDOPERREFNOUP_STARTDSP0 	0x100DC0
+#define 	YRAMINDOPERREFNOUP_ENDDSP0 	0x100DDC
+#define 	YRAMINDOPERREFUP_STARTDSP0	0x100DE0
+#define 	YRAMINDOPERREFUP_ENDDSP0 	0x100DFC
+#define 	DSP0CONDCODE 			0x100E00
+#define 	DSP0STACKFLAG 			0x100E04
+#define 	DSP0PROGCOUNTSTACKPTREG 	0x100E08
+#define 	DSP0PROGCOUNTSTACKDATAREG 	0x100E0C
+#define 	DSP0CURLOOPADDRREG 		0x100E10
+#define 	DSP0CURLOOPCOUNT 		0x100E14
+#define 	DSP0TOPLOOPCOUNTSTACK 		0x100E18
+#define 	DSP0TOPLOOPADDRSTACK 		0x100E1C
+#define 	DSP0LOOPSTACKPTR 		0x100E20
+#define 	DSP0STASSTACKDATAREG 		0x100E24
+#define 	DSP0STASSTACKPTR 		0x100E28
+#define 	DSP0PROGCOUNT 			0x100E2C
+#define  	GLOBDSPDEBGREG			0x100E30
+#define  	GLOBDSPBREPTRREG		0x100E30
+#define 	DSP0XYRAMBASE_START 		0x100EA0
+#define 	DSP0XYRAMBASE_END 		0x100EBC
+#define 	DSP0XYRAMLENG_START 		0x100EC0
+#define 	DSP0XYRAMLENG_END 		0x100EDC
+#define		SEMAPHOREREGDSP0		0x100EE0
+#define		DSP0INTCONTMASKREG		0x100EE4
+#define		DSP0INTCONTPENDREG		0x100EE8
+#define		DSP0INTCONTSERVINT		0x100EEC
+#define		DSPINTCONTEXTINTMODREG		0x100EEC
+#define		GPIODSP0			0x100EFC
+#define 	DMADSPBASEADDRREG_STARTDSP0	0x100F00
+#define 	DMADSPBASEADDRREG_ENDDSP0	0x100F1C
+#define 	DMAHOSTBASEADDRREG_STARTDSP0	0x100F20
+#define 	DMAHOSTBASEADDRREG_ENDDSP0	0x100F3C
+#define 	DMADSPCURADDRREG_STARTDSP0	0x100F40
+#define 	DMADSPCURADDRREG_ENDDSP0	0x100F5C
+#define 	DMAHOSTCURADDRREG_STARTDSP0	0x100F60
+#define 	DMAHOSTCURADDRREG_ENDDSP0	0x100F7C
+#define 	DMATANXCOUNTREG_STARTDSP0	0x100F80
+#define 	DMATANXCOUNTREG_ENDDSP0		0x100F9C
+#define 	DMATIMEBUGREG_STARTDSP0		0x100FA0
+#define 	DMATIMEBUGREG_ENDDSP0		0x100FAC
+#define 	DMACNTLMODFREG_STARTDSP0	0x100FA0
+#define 	DMACNTLMODFREG_ENDDSP0		0x100FAC
+
+#define 	DMAGLOBSTATSREGDSP0		0x100FEC
+#define 	DSP0XGPRAM_START 		0x101000
+#define 	DSP0XGPRAM_END 			0x1017FC
+#define 	DSP0YGPRAM_START 		0x101800
+#define 	DSP0YGPRAM_END 			0x101FFC
+
+
+
+
+#define 	AUDIORINGIPDSP1_START 		0x102000
+#define 	AUDIORINGIPDSP1_END	 	0x1023FC
+#define 	AUDIORINGOPDSP1_START 		0x102400
+#define 	AUDIORINGOPDSP1_END	 	0x1027FC
+#define 	AUDPARARINGIODSP1_START 	0x102800
+#define 	AUDPARARINGIODSP1_END	 	0x102BFC
+#define 	DSP1LOCALHWREG_START 		0x102C00
+#define 	DSP1LOCALHWREG_END	 	0x102C3C
+#define 	DSP1XYRAMAGINDEX_START 		0x102C40
+#define 	DSP1XYRAMAGINDEX_END	 	0x102C5C
+#define 	DSP1XYRAMAGMDFR_START 		0x102C60
+#define 	DSP1XYRAMAGMDFR_END	 	0x102C7C
+#define 	DSP1INTCONTLVEC_START 		0x102C80
+#define 	DSP1INTCONTLVEC_END	 	0x102CD8
+#define		HOSTINTFPORTADDRCONTDSP1	0x102D40
+#define		HOSTINTFPORTDATADSP1		0x102D44
+#define		TIME0PERENBDSP1			0x102D60
+#define		TIME0COUNTERDSP1		0x102D64
+#define		TIME1PERENBDSP1			0x102D68
+#define		TIME1COUNTERDSP1		0x102D6C
+#define		TIME2PERENBDSP1			0x102D70
+#define		TIME2COUNTERDSP1		0x102D74
+#define		TIME3PERENBDSP1			0x102D78
+#define		TIME3COUNTERDSP1		0x102D7C
+#define 	XRAMINDOPERREFNOUP_STARTDSP1 	0x102D80
+#define 	XRAMINDOPERREFNOUP_ENDDSP1	0x102D9C
+#define 	XRAMINDOPERREFUP_STARTDSP1	0x102DA0
+#define 	XRAMINDOPERREFUP_ENDDSP1	0x102DBC
+#define 	YRAMINDOPERREFNOUP_STARTDSP1 	0x102DC0
+#define 	YRAMINDOPERREFNOUP_ENDDSP1	0x102DDC
+#define 	YRAMINDOPERREFUP_STARTDSP1	0x102DE0
+#define 	YRAMINDOPERREFUP_ENDDSP1	0x102DFC
+
+#define 	DSP1CONDCODE 			0x102E00
+#define 	DSP1STACKFLAG 			0x102E04
+#define 	DSP1PROGCOUNTSTACKPTREG 	0x102E08
+#define 	DSP1PROGCOUNTSTACKDATAREG 	0x102E0C
+#define 	DSP1CURLOOPADDRREG 		0x102E10
+#define 	DSP1CURLOOPCOUNT 		0x102E14
+#define 	DSP1TOPLOOPCOUNTSTACK 		0x102E18
+#define 	DSP1TOPLOOPADDRSTACK 		0x102E1C
+#define 	DSP1LOOPSTACKPTR 		0x102E20
+#define 	DSP1STASSTACKDATAREG 		0x102E24
+#define 	DSP1STASSTACKPTR 		0x102E28
+#define 	DSP1PROGCOUNT 			0x102E2C
+#define 	DSP1XYRAMBASE_START 		0x102EA0
+#define 	DSP1XYRAMBASE_END 		0x102EBC
+#define 	DSP1XYRAMLENG_START 		0x102EC0
+#define 	DSP1XYRAMLENG_END 		0x102EDC
+#define		SEMAPHOREREGDSP1		0x102EE0
+#define		DSP1INTCONTMASKREG		0x102EE4
+#define		DSP1INTCONTPENDREG		0x102EE8
+#define		DSP1INTCONTSERVINT		0x102EEC
+#define		GPIODSP1			0x102EFC
+#define 	DMADSPBASEADDRREG_STARTDSP1	0x102F00
+#define 	DMADSPBASEADDRREG_ENDDSP1	0x102F1C
+#define 	DMAHOSTBASEADDRREG_STARTDSP1	0x102F20
+#define 	DMAHOSTBASEADDRREG_ENDDSP1	0x102F3C
+#define 	DMADSPCURADDRREG_STARTDSP1	0x102F40
+#define 	DMADSPCURADDRREG_ENDDSP1	0x102F5C
+#define 	DMAHOSTCURADDRREG_STARTDSP1	0x102F60
+#define 	DMAHOSTCURADDRREG_ENDDSP1	0x102F7C
+#define 	DMATANXCOUNTREG_STARTDSP1	0x102F80
+#define 	DMATANXCOUNTREG_ENDDSP1		0x102F9C
+#define 	DMATIMEBUGREG_STARTDSP1		0x102FA0
+#define 	DMATIMEBUGREG_ENDDSP1		0x102FAC
+#define 	DMACNTLMODFREG_STARTDSP1	0x102FA0
+#define 	DMACNTLMODFREG_ENDDSP1		0x102FAC
+
+#define 	DMAGLOBSTATSREGDSP1		0x102FEC
+#define 	DSP1XGPRAM_START 		0x103000
+#define 	DSP1XGPRAM_END 			0x1033FC
+#define 	DSP1YGPRAM_START 		0x103400
+#define 	DSP1YGPRAM_END 			0x1037FC
+
+
+
+#define 	AUDIORINGIPDSP2_START 		0x104000
+#define 	AUDIORINGIPDSP2_END	 	0x1043FC
+#define 	AUDIORINGOPDSP2_START 		0x104400
+#define 	AUDIORINGOPDSP2_END	 	0x1047FC
+#define 	AUDPARARINGIODSP2_START 	0x104800
+#define 	AUDPARARINGIODSP2_END	 	0x104BFC
+#define 	DSP2LOCALHWREG_START 		0x104C00
+#define 	DSP2LOCALHWREG_END	 	0x104C3C
+#define 	DSP2XYRAMAGINDEX_START 		0x104C40
+#define 	DSP2XYRAMAGINDEX_END	 	0x104C5C
+#define 	DSP2XYRAMAGMDFR_START 		0x104C60
+#define 	DSP2XYRAMAGMDFR_END		0x104C7C
+#define 	DSP2INTCONTLVEC_START 		0x104C80
+#define 	DSP2INTCONTLVEC_END	 	0x104CD8
+#define		HOSTINTFPORTADDRCONTDSP2	0x104D40
+#define		HOSTINTFPORTDATADSP2		0x104D44
+#define		TIME0PERENBDSP2			0x104D60
+#define		TIME0COUNTERDSP2		0x104D64
+#define		TIME1PERENBDSP2			0x104D68
+#define		TIME1COUNTERDSP2		0x104D6C
+#define		TIME2PERENBDSP2			0x104D70
+#define		TIME2COUNTERDSP2		0x104D74
+#define		TIME3PERENBDSP2			0x104D78
+#define		TIME3COUNTERDSP2		0x104D7C
+#define 	XRAMINDOPERREFNOUP_STARTDSP2 	0x104D80
+#define 	XRAMINDOPERREFNOUP_ENDDSP2	0x104D9C
+#define 	XRAMINDOPERREFUP_STARTDSP2	0x104DA0
+#define 	XRAMINDOPERREFUP_ENDDSP2	0x104DBC
+#define 	YRAMINDOPERREFNOUP_STARTDSP2 	0x104DC0
+#define 	YRAMINDOPERREFNOUP_ENDDSP2	0x104DDC
+#define 	YRAMINDOPERREFUP_STARTDSP2	0x104DE0
+#define 	YRAMINDOPERREFUP_ENDDSP2 	0x104DFC
+#define 	DSP2CONDCODE 			0x104E00
+#define 	DSP2STACKFLAG 			0x104E04
+#define 	DSP2PROGCOUNTSTACKPTREG 	0x104E08
+#define 	DSP2PROGCOUNTSTACKDATAREG 	0x104E0C
+#define 	DSP2CURLOOPADDRREG 		0x104E10
+#define 	DSP2CURLOOPCOUNT 		0x104E14
+#define 	DSP2TOPLOOPCOUNTSTACK 		0x104E18
+#define 	DSP2TOPLOOPADDRSTACK 		0x104E1C
+#define 	DSP2LOOPSTACKPTR 		0x104E20
+#define 	DSP2STASSTACKDATAREG 		0x104E24
+#define 	DSP2STASSTACKPTR 		0x104E28
+#define 	DSP2PROGCOUNT 			0x104E2C
+#define 	DSP2XYRAMBASE_START 		0x104EA0
+#define 	DSP2XYRAMBASE_END 		0x104EBC
+#define 	DSP2XYRAMLENG_START 		0x104EC0
+#define 	DSP2XYRAMLENG_END 		0x104EDC
+#define		SEMAPHOREREGDSP2		0x104EE0
+#define		DSP2INTCONTMASKREG		0x104EE4
+#define		DSP2INTCONTPENDREG		0x104EE8
+#define		DSP2INTCONTSERVINT		0x104EEC
+#define		GPIODSP2			0x104EFC
+#define 	DMADSPBASEADDRREG_STARTDSP2	0x104F00
+#define 	DMADSPBASEADDRREG_ENDDSP2	0x104F1C
+#define 	DMAHOSTBASEADDRREG_STARTDSP2	0x104F20
+#define 	DMAHOSTBASEADDRREG_ENDDSP2	0x104F3C
+#define 	DMADSPCURADDRREG_STARTDSP2	0x104F40
+#define 	DMADSPCURADDRREG_ENDDSP2	0x104F5C
+#define 	DMAHOSTCURADDRREG_STARTDSP2	0x104F60
+#define 	DMAHOSTCURADDRREG_ENDDSP2	0x104F7C
+#define 	DMATANXCOUNTREG_STARTDSP2	0x104F80
+#define 	DMATANXCOUNTREG_ENDDSP2		0x104F9C
+#define 	DMATIMEBUGREG_STARTDSP2		0x104FA0
+#define 	DMATIMEBUGREG_ENDDSP2		0x104FAC
+#define 	DMACNTLMODFREG_STARTDSP2	0x104FA0
+#define 	DMACNTLMODFREG_ENDDSP2		0x104FAC
+
+#define 	DMAGLOBSTATSREGDSP2		0x104FEC
+#define 	DSP2XGPRAM_START 		0x105000
+#define 	DSP2XGPRAM_END 			0x1051FC
+#define 	DSP2YGPRAM_START 		0x105800
+#define 	DSP2YGPRAM_END 			0x1059FC
+
+
+
+#define 	AUDIORINGIPDSP3_START 		0x106000
+#define 	AUDIORINGIPDSP3_END	 	0x1063FC
+#define 	AUDIORINGOPDSP3_START 		0x106400
+#define 	AUDIORINGOPDSP3_END	 	0x1067FC
+#define 	AUDPARARINGIODSP3_START 	0x106800
+#define 	AUDPARARINGIODSP3_END	 	0x106BFC
+#define 	DSP3LOCALHWREG_START 		0x106C00
+#define 	DSP3LOCALHWREG_END	 	0x106C3C
+#define 	DSP3XYRAMAGINDEX_START 		0x106C40
+#define 	DSP3XYRAMAGINDEX_END	 	0x106C5C
+#define 	DSP3XYRAMAGMDFR_START 		0x106C60
+#define 	DSP3XYRAMAGMDFR_END		0x106C7C
+#define 	DSP3INTCONTLVEC_START 		0x106C80
+#define 	DSP3INTCONTLVEC_END	 	0x106CD8
+#define		HOSTINTFPORTADDRCONTDSP3	0x106D40
+#define		HOSTINTFPORTDATADSP3		0x106D44
+#define		TIME0PERENBDSP3			0x106D60
+#define		TIME0COUNTERDSP3		0x106D64
+#define		TIME1PERENBDSP3			0x106D68
+#define		TIME1COUNTERDSP3		0x106D6C
+#define		TIME2PERENBDSP3			0x106D70
+#define		TIME2COUNTERDSP3		0x106D74
+#define		TIME3PERENBDSP3			0x106D78
+#define		TIME3COUNTERDSP3		0x106D7C
+#define 	XRAMINDOPERREFNOUP_STARTDSP3 	0x106D80
+#define 	XRAMINDOPERREFNOUP_ENDDSP3	0x106D9C
+#define 	XRAMINDOPERREFUP_STARTDSP3	0x106DA0
+#define 	XRAMINDOPERREFUP_ENDDSP3	0x106DBC
+#define 	YRAMINDOPERREFNOUP_STARTDSP3 	0x106DC0
+#define 	YRAMINDOPERREFNOUP_ENDDSP3	0x106DDC
+#define 	YRAMINDOPERREFUP_STARTDSP3	0x106DE0
+#define 	YRAMINDOPERREFUP_ENDDSP3	0x100DFC
+
+#define 	DSP3CONDCODE 			0x106E00
+#define 	DSP3STACKFLAG 			0x106E04
+#define 	DSP3PROGCOUNTSTACKPTREG 	0x106E08
+#define 	DSP3PROGCOUNTSTACKDATAREG 	0x106E0C
+#define 	DSP3CURLOOPADDRREG 		0x106E10
+#define 	DSP3CURLOOPCOUNT 		0x106E14
+#define 	DSP3TOPLOOPCOUNTSTACK 		0x106E18
+#define 	DSP3TOPLOOPADDRSTACK 		0x106E1C
+#define 	DSP3LOOPSTACKPTR 		0x106E20
+#define 	DSP3STASSTACKDATAREG 		0x106E24
+#define 	DSP3STASSTACKPTR 		0x106E28
+#define 	DSP3PROGCOUNT 			0x106E2C
+#define 	DSP3XYRAMBASE_START 		0x106EA0
+#define 	DSP3XYRAMBASE_END 		0x106EBC
+#define 	DSP3XYRAMLENG_START 		0x106EC0
+#define 	DSP3XYRAMLENG_END 		0x106EDC
+#define		SEMAPHOREREGDSP3		0x106EE0
+#define		DSP3INTCONTMASKREG		0x106EE4
+#define		DSP3INTCONTPENDREG		0x106EE8
+#define		DSP3INTCONTSERVINT		0x106EEC
+#define		GPIODSP3			0x106EFC
+#define 	DMADSPBASEADDRREG_STARTDSP3	0x106F00
+#define 	DMADSPBASEADDRREG_ENDDSP3	0x106F1C
+#define 	DMAHOSTBASEADDRREG_STARTDSP3	0x106F20
+#define 	DMAHOSTBASEADDRREG_ENDDSP3	0x106F3C
+#define 	DMADSPCURADDRREG_STARTDSP3	0x106F40
+#define 	DMADSPCURADDRREG_ENDDSP3	0x106F5C
+#define 	DMAHOSTCURADDRREG_STARTDSP3	0x106F60
+#define 	DMAHOSTCURADDRREG_ENDDSP3	0x106F7C
+#define 	DMATANXCOUNTREG_STARTDSP3	0x106F80
+#define 	DMATANXCOUNTREG_ENDDSP3		0x106F9C
+#define 	DMATIMEBUGREG_STARTDSP3		0x106FA0
+#define 	DMATIMEBUGREG_ENDDSP3		0x106FAC
+#define 	DMACNTLMODFREG_STARTDSP3	0x106FA0
+#define 	DMACNTLMODFREG_ENDDSP3		0x106FAC
+
+#define 	DMAGLOBSTATSREGDSP3		0x106FEC
+#define 	DSP3XGPRAM_START 		0x107000
+#define 	DSP3XGPRAM_END 			0x1071FC
+#define 	DSP3YGPRAM_START 		0x107800
+#define 	DSP3YGPRAM_END 			0x1079FC
+
+/* end of DSP reg definitions */
+
+#define  	DSPAIMAP_START			0x108000
+#define  	DSPAIMAP_END			0x1083FC
+#define  	DSPPIMAP_START			0x108400
+#define  	DSPPIMAP_END			0x1087FC
+#define  	DSPPOMAP_START			0x108800
+#define  	DSPPOMAP_END			0x108BFC
+#define  	DSPPOCTL			0x108C00
+#define 	TKCTL_START			0x110000
+#define 	TKCTL_END			0x110FFC
+#define 	TKCC_START			0x111000
+#define 	TKCC_END			0x111FFC
+#define 	TKIMAP_START			0x112000
+#define 	TKIMAP_END			0x112FFC
+#define		TKDCTR16			0x113000
+#define		TKPB16				0x113004
+#define		TKBS16				0x113008
+#define		TKDCTR32			0x11300C
+#define		TKPB32				0x113010
+#define		TKBS32				0x113014
+#define		ICDCTR16			0x113018
+#define		ITBS16				0x11301C
+#define		ICDCTR32			0x113020
+#define		ITBS32				0x113024
+#define		ITSTART				0x113028
+#define		TKSQ				0x11302C
+
+#define		TKSCCTL_START			0x114000
+#define		TKSCCTL_END			0x11403C
+#define		TKSCADR_START			0x114100
+#define		TKSCADR_END			0x11413C
+#define		TKSCDATAX_START			0x114800
+#define		TKSCDATAX_END			0x1149FC
+#define		TKPCDATAX_START			0x120000
+#define		TKPCDATAX_END			0x12FFFC
+
+#define		MALSA				0x130000
+#define		MAPPHA				0x130004
+#define		MAPPLA				0x130008
+#define		MALSB				0x130010
+#define		MAPPHB				0x130014
+#define		MAPPLB				0x130018
+
+#define 	TANSPORTMAPABREGS_START		0x130020
+#define 	TANSPORTMAPABREGS_END		0x13A2FC
+
+#define		PTPAHX				0x13B000
+#define		PTPALX				0x13B004
+
+#define		TANSPPAGETABLEPHYADDR015_START	0x13B008
+#define		TANSPPAGETABLEPHYADDR015_END	0x13B07C
+#define		TRNQADRX_START			0x13B100
+#define		TRNQADRX_END			0x13B13C
+#define		TRNQTIMX_START			0x13B200
+#define		TRNQTIMX_END			0x13B23C
+#define		TRNQAPARMX_START		0x13B300
+#define		TRNQAPARMX_END			0x13B33C
+
+#define		TRNQCNT				0x13B400
+#define		TRNCTL				0x13B404
+#define		TRNIS				0x13B408
+#define		TRNCURTS			0x13B40C
+
+#define		AMOP_START			0x140000
+#define		AMOPLO				0x140000
+#define		AMOPHI				0x140004
+#define		AMOP_END			0x147FFC
+#define		PMOP_START			0x148000
+#define		PMOPLO				0x148000
+#define		PMOPHI				0x148004
+#define		PMOP_END			0x14FFFC
+#define		PCURR_START			0x150000
+#define		PCURR_END			0x153FFC
+#define		PTRAG_START			0x154000
+#define		PTRAG_END			0x157FFC
+#define		PSR_START			0x158000
+#define		PSR_END				0x15BFFC
+
+#define		PFSTAT4SEG_START		0x160000
+#define		PFSTAT4SEG_END			0x160BFC
+#define		PFSTAT2SEG_START		0x160C00
+#define		PFSTAT2SEG_END			0x1617FC
+#define		PFTARG4SEG_START		0x164000
+#define		PFTARG4SEG_END			0x164BFC
+#define		PFTARG2SEG_START		0x164C00
+#define		PFTARG2SEG_END			0x1657FC
+#define		PFSR4SEG_START			0x168000
+#define		PFSR4SEG_END			0x168BFC
+#define		PFSR2SEG_START			0x168C00
+#define		PFSR2SEG_END			0x1697FC
+#define		PCURRMS4SEG_START		0x16C000
+#define		PCURRMS4SEG_END			0x16CCFC
+#define		PCURRMS2SEG_START		0x16CC00
+#define		PCURRMS2SEG_END			0x16D7FC
+#define		PTARGMS4SEG_START		0x170000
+#define		PTARGMS4SEG_END			0x172FFC
+#define		PTARGMS2SEG_START		0x173000
+#define		PTARGMS2SEG_END			0x1747FC
+#define		PSRMS4SEG_START			0x170000
+#define		PSRMS4SEG_END			0x172FFC
+#define		PSRMS2SEG_START			0x173000
+#define		PSRMS2SEG_END			0x1747FC
+
+#define		PRING_LO_START			0x190000
+#define		PRING_LO_END			0x193FFC
+#define		PRING_HI_START			0x194000
+#define		PRING_HI_END			0x197FFC
+#define		PRING_LO_HI_START		0x198000
+#define		PRING_LO_HI			0x198000
+#define		PRING_LO_HI_END			0x19BFFC
+
+#define		PINTFIFO			0x1A0000
+#define		SRCCTL				0x1B0000
+#define		SRCCCR				0x1B0004
+#define		SRCIMAP				0x1B0008
+#define		SRCODDC				0x1B000C
+#define		SRCCA				0x1B0010
+#define		SRCCF				0x1B0014
+#define		SRCSA				0x1B0018
+#define		SRCLA				0x1B001C
+#define		SRCCTLSWR			0x1B0020
+
+/* SRC HERE */
+#define		SRCALBA				0x1B002C
+#define		SRCMCTL				0x1B012C
+#define		SRCCERR				0x1B022C
+#define		SRCITB				0x1B032C
+#define		SRCIPM				0x1B082C
+#define		SRCIP				0x1B102C
+#define		SRCENBSTAT			0x1B202C
+#define		SRCENBLO			0x1B212C
+#define		SRCENBHI			0x1B222C
+#define		SRCENBS				0x1B232C
+#define		SRCENB				0x1B282C
+#define		SRCENB07			0x1B282C
+#define		SRCENBS07			0x1B302C
+
+#define		SRCDN0Z				0x1B0030
+#define		SRCDN0Z0			0x1B0030
+#define		SRCDN0Z1			0x1B0034
+#define		SRCDN0Z2			0x1B0038
+#define		SRCDN0Z3			0x1B003C
+#define		SRCDN1Z				0x1B0040
+#define		SRCDN1Z0			0x1B0040
+#define		SRCDN1Z1			0x1B0044
+#define		SRCDN1Z2			0x1B0048
+#define		SRCDN1Z3			0x1B004C
+#define		SRCDN1Z4			0x1B0050
+#define		SRCDN1Z5			0x1B0054
+#define		SRCDN1Z6			0x1B0058
+#define		SRCDN1Z7			0x1B005C
+#define		SRCUPZ				0x1B0060
+#define		SRCUPZ0				0x1B0060
+#define		SRCUPZ1				0x1B0064
+#define		SRCUPZ2				0x1B0068
+#define		SRCUPZ3				0x1B006C
+#define		SRCUPZ4				0x1B0070
+#define		SRCUPZ5				0x1B0074
+#define		SRCUPZ6				0x1B0078
+#define		SRCUPZ7				0x1B007C
+#define		SRCCD0				0x1B0080
+#define		SRCCD1				0x1B0084
+#define		SRCCD2				0x1B0088
+#define		SRCCD3				0x1B008C
+#define		SRCCD4				0x1B0090
+#define		SRCCD5				0x1B0094
+#define		SRCCD6				0x1B0098
+#define		SRCCD7				0x1B009C
+#define		SRCCD8				0x1B00A0
+#define		SRCCD9				0x1B00A4
+#define		SRCCDA				0x1B00A8
+#define		SRCCDB				0x1B00AC
+#define		SRCCDC				0x1B00B0
+#define		SRCCDD				0x1B00B4
+#define		SRCCDE				0x1B00B8
+#define		SRCCDF				0x1B00BC
+#define		SRCCD10				0x1B00C0
+#define		SRCCD11				0x1B00C4
+#define		SRCCD12				0x1B00C8
+#define		SRCCD13				0x1B00CC
+#define		SRCCD14				0x1B00D0
+#define		SRCCD15				0x1B00D4
+#define		SRCCD16				0x1B00D8
+#define		SRCCD17				0x1B00DC
+#define		SRCCD18				0x1B00E0
+#define		SRCCD19				0x1B00E4
+#define		SRCCD1A				0x1B00E8
+#define		SRCCD1B				0x1B00EC
+#define		SRCCD1C				0x1B00F0
+#define		SRCCD1D				0x1B00F4
+#define		SRCCD1E				0x1B00F8
+#define		SRCCD1F				0x1B00FC
+
+#define		SRCCONTRBLOCK_START		0x1B0100
+#define		SRCCONTRBLOCK_END		0x1BFFFC
+#define		FILTOP_START	0x1C0000
+#define		FILTOP_END	0x1C05FC
+#define		FILTIMAP_START	0x1C0800
+#define		FILTIMAP_END	0x1C0DFC
+#define		FILTZ1_START	0x1C1000
+#define		FILTZ1_END	0x1C15FC
+#define		FILTZ2_START	0x1C1800
+#define		FILTZ2_END	0x1C1DFC
+#define		DAOIMAP_START	0x1C5000
+#define		DAOIMAP		0x1C5000
+#define		DAOIMAP_END	0x1C5124
+
+#define		AC97D		0x1C5400
+#define		AC97A		0x1C5404
+#define		AC97CTL		0x1C5408
+#define		I2SCTL		0x1C5420
+
+#define		SPOS		0x1C5440
+#define		SPOSA		0x1C5440
+#define		SPOSB		0x1C5444
+#define		SPOSC		0x1C5448
+#define		SPOSD		0x1C544C
+
+#define		SPISA		0x1C5450
+#define		SPISB		0x1C5454
+#define		SPISC		0x1C5458
+#define		SPISD		0x1C545C
+
+#define		SPFSCTL		0x1C5460
+
+#define		SPFS0		0x1C5468
+#define		SPFS1		0x1C546C
+#define		SPFS2		0x1C5470
+#define		SPFS3		0x1C5474
+#define		SPFS4		0x1C5478
+#define		SPFS5		0x1C547C
+
+#define		SPOCTL		0x1C5480
+#define		SPICTL		0x1C5484
+#define		SPISTS		0x1C5488
+#define		SPINTP		0x1C548C
+#define		SPINTE		0x1C5490
+#define		SPUTCTLAB	0x1C5494
+#define		SPUTCTLCD	0x1C5498
+
+#define		SRTSPA		0x1C54C0
+#define		SRTSPB		0x1C54C4
+#define		SRTSPC		0x1C54C8
+#define		SRTSPD		0x1C54CC
+
+#define		SRTSCTL		0x1C54D0
+#define		SRTSCTLA	0x1C54D0
+#define		SRTSCTLB	0x1C54D4
+#define		SRTSCTLC	0x1C54D8
+#define		SRTSCTLD	0x1C54DC
+
+#define		SRTI2S		0x1C54E0
+#define		SRTICTL		0x1C54F0
+
+#define		WC		0x1C6000
+#define		TIMR		0x1C6004
+# define	TIMR_IE		(1<<15)
+# define	TIMR_IP		(1<<14)
+
+#define		GIP		0x1C6010
+#define		GIE		0x1C6014
+#define		DIE		0x1C6018
+#define		DIC		0x1C601C
+#define		GPIO		0x1C6020
+#define		GPIOCTL		0x1C6024
+#define		GPIP		0x1C6028
+#define		GPIE		0x1C602C
+#define		DSPINT0		0x1C6030
+#define		DSPEIOC		0x1C6034
+#define		MUADAT		0x1C6040
+#define		MUACMD		0x1C6044
+#define 	MUASTAT		0x1C6044
+#define		MUBDAT		0x1C6048
+#define		MUBCMD		0x1C604C
+#define		MUBSTAT		0x1C604C
+#define		UARTCMA		0x1C6050
+#define		UARTCMB		0x1C6054
+#define		UARTIP		0x1C6058
+#define		UARTIE		0x1C605C
+#define		PLLCTL		0x1C6060
+#define		PLLDCD		0x1C6064
+#define		GCTL		0x1C6070
+#define		ID0		0x1C6080
+#define		ID1		0x1C6084
+#define		ID2		0x1C6088
+#define		ID3		0x1C608C
+#define		SDRCTL		0x1C7000
+
+
+#define I2SA_L    0x0L
+#define I2SA_R    0x1L
+#define I2SB_L    0x8L
+#define I2SB_R    0x9L
+#define I2SC_L    0x10L
+#define I2SC_R    0x11L
+#define I2SD_L    0x18L
+#define I2SD_R    0x19L
+
+#endif /* CT20K1REG_H */
+
+
diff --git a/sound/pci/ctxfi/ct20k2reg.h b/sound/pci/ctxfi/ct20k2reg.h
new file mode 100644
index 0000000..2d07986
--- /dev/null
+++ b/sound/pci/ctxfi/ct20k2reg.h
@@ -0,0 +1,85 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ */
+
+#ifndef _20K2REGISTERS_H_
+#define _20K2REGISTERS_H_
+
+
+/* Timer Registers */
+#define TIMER_TIMR          0x1B7004
+#define INTERRUPT_GIP       0x1B7010
+#define INTERRUPT_GIE       0x1B7014
+
+/* I2C Registers */
+#define I2C_IF_ADDRESS   0x1B9000
+#define I2C_IF_WDATA     0x1B9004
+#define I2C_IF_RDATA     0x1B9008
+#define I2C_IF_STATUS    0x1B900C
+#define I2C_IF_WLOCK     0x1B9010
+
+/* Global Control Registers */
+#define GLOBAL_CNTL_GCTL    0x1B7090
+
+/* PLL Registers */
+#define PLL_CTL 		0x1B7080
+#define PLL_STAT		0x1B7084
+#define PLL_ENB			0x1B7088
+
+/* SRC Registers */
+#define SRC_CTL             0x1A0000 /* 0x1A0000 + (256 * Chn) */
+#define SRC_CCR             0x1A0004 /* 0x1A0004 + (256 * Chn) */
+#define SRC_IMAP            0x1A0008 /* 0x1A0008 + (256 * Chn) */
+#define SRC_CA              0x1A0010 /* 0x1A0010 + (256 * Chn) */
+#define SRC_CF              0x1A0014 /* 0x1A0014 + (256 * Chn) */
+#define SRC_SA              0x1A0018 /* 0x1A0018 + (256 * Chn) */
+#define SRC_LA              0x1A001C /* 0x1A001C + (256 * Chn) */
+#define SRC_CTLSWR	    0x1A0020 /* 0x1A0020 + (256 * Chn) */
+#define SRC_CD		    0x1A0080 /* 0x1A0080 + (256 * Chn) + (4 * Regn) */
+#define SRC_MCTL		0x1A012C
+#define SRC_IP			0x1A102C /* 0x1A102C + (256 * Regn) */
+#define SRC_ENB			0x1A282C /* 0x1A282C + (256 * Regn) */
+#define SRC_ENBSTAT		0x1A202C
+#define SRC_ENBSA		0x1A232C
+#define SRC_DN0Z		0x1A0030
+#define SRC_DN1Z		0x1A0040
+#define SRC_UPZ			0x1A0060
+
+/* GPIO Registers */
+#define GPIO_DATA           0x1B7020
+#define GPIO_CTRL           0x1B7024
+
+/* Virtual memory registers */
+#define VMEM_PTPAL          0x1C6300 /* 0x1C6300 + (16 * Chn) */
+#define VMEM_PTPAH          0x1C6304 /* 0x1C6304 + (16 * Chn) */
+#define VMEM_CTL            0x1C7000
+
+/* Transport Registers */
+#define TRANSPORT_ENB       0x1B6000
+#define TRANSPORT_CTL       0x1B6004
+#define TRANSPORT_INT       0x1B6008
+
+/* Audio IO */
+#define AUDIO_IO_AIM        0x1B5000 /* 0x1B5000 + (0x04 * Chn) */
+#define AUDIO_IO_TX_CTL     0x1B5400 /* 0x1B5400 + (0x40 * Chn) */
+#define AUDIO_IO_TX_CSTAT_L 0x1B5408 /* 0x1B5408 + (0x40 * Chn) */
+#define AUDIO_IO_TX_CSTAT_H 0x1B540C /* 0x1B540C + (0x40 * Chn) */
+#define AUDIO_IO_RX_CTL     0x1B5410 /* 0x1B5410 + (0x40 * Chn) */
+#define AUDIO_IO_RX_SRT_CTL 0x1B5420 /* 0x1B5420 + (0x40 * Chn) */
+#define AUDIO_IO_MCLK       0x1B5600
+#define AUDIO_IO_TX_BLRCLK  0x1B5604
+#define AUDIO_IO_RX_BLRCLK  0x1B5608
+
+/* Mixer */
+#define MIXER_AMOPLO		0x130000 /* 0x130000 + (8 * Chn) [4095 : 0] */
+#define MIXER_AMOPHI		0x130004 /* 0x130004 + (8 * Chn) [4095 : 0] */
+#define MIXER_PRING_LO_HI	0x188000 /* 0x188000 + (4 * Chn) [4095 : 0] */
+#define MIXER_PMOPLO		0x138000 /* 0x138000 + (8 * Chn) [4095 : 0] */
+#define MIXER_PMOPHI		0x138004 /* 0x138004 + (8 * Chn) [4095 : 0] */
+#define MIXER_AR_ENABLE		0x19000C
+
+#endif
diff --git a/sound/pci/ctxfi/ctamixer.c b/sound/pci/ctxfi/ctamixer.c
new file mode 100644
index 0000000..a1db51b3
--- /dev/null
+++ b/sound/pci/ctxfi/ctamixer.c
@@ -0,0 +1,488 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctamixer.c
+ *
+ * @Brief
+ * This file contains the implementation of the Audio Mixer
+ * resource management object.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 21 2008
+ *
+ */
+
+#include "ctamixer.h"
+#include "cthardware.h"
+#include <linux/slab.h>
+
+#define AMIXER_RESOURCE_NUM	256
+#define SUM_RESOURCE_NUM	256
+
+#define AMIXER_Y_IMMEDIATE	1
+
+#define BLANK_SLOT		4094
+
+static int amixer_master(struct rsc *rsc)
+{
+	rsc->conj = 0;
+	return rsc->idx = container_of(rsc, struct amixer, rsc)->idx[0];
+}
+
+static int amixer_next_conj(struct rsc *rsc)
+{
+	rsc->conj++;
+	return container_of(rsc, struct amixer, rsc)->idx[rsc->conj];
+}
+
+static int amixer_index(const struct rsc *rsc)
+{
+	return container_of(rsc, struct amixer, rsc)->idx[rsc->conj];
+}
+
+static int amixer_output_slot(const struct rsc *rsc)
+{
+	return (amixer_index(rsc) << 4) + 0x4;
+}
+
+static struct rsc_ops amixer_basic_rsc_ops = {
+	.master		= amixer_master,
+	.next_conj	= amixer_next_conj,
+	.index		= amixer_index,
+	.output_slot	= amixer_output_slot,
+};
+
+static int amixer_set_input(struct amixer *amixer, struct rsc *rsc)
+{
+	struct hw *hw;
+
+	hw = amixer->rsc.hw;
+	hw->amixer_set_mode(amixer->rsc.ctrl_blk, AMIXER_Y_IMMEDIATE);
+	amixer->input = rsc;
+	if (NULL == rsc)
+		hw->amixer_set_x(amixer->rsc.ctrl_blk, BLANK_SLOT);
+	else
+		hw->amixer_set_x(amixer->rsc.ctrl_blk,
+					rsc->ops->output_slot(rsc));
+
+	return 0;
+}
+
+/* y is a 14-bit immediate constant */
+static int amixer_set_y(struct amixer *amixer, unsigned int y)
+{
+	struct hw *hw;
+
+	hw = amixer->rsc.hw;
+	hw->amixer_set_y(amixer->rsc.ctrl_blk, y);
+
+	return 0;
+}
+
+static int amixer_set_invalid_squash(struct amixer *amixer, unsigned int iv)
+{
+	struct hw *hw;
+
+	hw = amixer->rsc.hw;
+	hw->amixer_set_iv(amixer->rsc.ctrl_blk, iv);
+
+	return 0;
+}
+
+static int amixer_set_sum(struct amixer *amixer, struct sum *sum)
+{
+	struct hw *hw;
+
+	hw = amixer->rsc.hw;
+	amixer->sum = sum;
+	if (NULL == sum) {
+		hw->amixer_set_se(amixer->rsc.ctrl_blk, 0);
+	} else {
+		hw->amixer_set_se(amixer->rsc.ctrl_blk, 1);
+		hw->amixer_set_sadr(amixer->rsc.ctrl_blk,
+					sum->rsc.ops->index(&sum->rsc));
+	}
+
+	return 0;
+}
+
+static int amixer_commit_write(struct amixer *amixer)
+{
+	struct hw *hw;
+	unsigned int index;
+	int i;
+	struct rsc *input;
+	struct sum *sum;
+
+	hw = amixer->rsc.hw;
+	input = amixer->input;
+	sum = amixer->sum;
+
+	/* Program master and conjugate resources */
+	amixer->rsc.ops->master(&amixer->rsc);
+	if (NULL != input)
+		input->ops->master(input);
+
+	if (NULL != sum)
+		sum->rsc.ops->master(&sum->rsc);
+
+	for (i = 0; i < amixer->rsc.msr; i++) {
+		hw->amixer_set_dirty_all(amixer->rsc.ctrl_blk);
+		if (NULL != input) {
+			hw->amixer_set_x(amixer->rsc.ctrl_blk,
+						input->ops->output_slot(input));
+			input->ops->next_conj(input);
+		}
+		if (NULL != sum) {
+			hw->amixer_set_sadr(amixer->rsc.ctrl_blk,
+						sum->rsc.ops->index(&sum->rsc));
+			sum->rsc.ops->next_conj(&sum->rsc);
+		}
+		index = amixer->rsc.ops->output_slot(&amixer->rsc);
+		hw->amixer_commit_write(hw, index, amixer->rsc.ctrl_blk);
+		amixer->rsc.ops->next_conj(&amixer->rsc);
+	}
+	amixer->rsc.ops->master(&amixer->rsc);
+	if (NULL != input)
+		input->ops->master(input);
+
+	if (NULL != sum)
+		sum->rsc.ops->master(&sum->rsc);
+
+	return 0;
+}
+
+static int amixer_commit_raw_write(struct amixer *amixer)
+{
+	struct hw *hw;
+	unsigned int index;
+
+	hw = amixer->rsc.hw;
+	index = amixer->rsc.ops->output_slot(&amixer->rsc);
+	hw->amixer_commit_write(hw, index, amixer->rsc.ctrl_blk);
+
+	return 0;
+}
+
+static int amixer_get_y(struct amixer *amixer)
+{
+	struct hw *hw;
+
+	hw = amixer->rsc.hw;
+	return hw->amixer_get_y(amixer->rsc.ctrl_blk);
+}
+
+static int amixer_setup(struct amixer *amixer, struct rsc *input,
+			unsigned int scale, struct sum *sum)
+{
+	amixer_set_input(amixer, input);
+	amixer_set_y(amixer, scale);
+	amixer_set_sum(amixer, sum);
+	amixer_commit_write(amixer);
+	return 0;
+}
+
+static struct amixer_rsc_ops amixer_ops = {
+	.set_input		= amixer_set_input,
+	.set_invalid_squash	= amixer_set_invalid_squash,
+	.set_scale		= amixer_set_y,
+	.set_sum		= amixer_set_sum,
+	.commit_write		= amixer_commit_write,
+	.commit_raw_write	= amixer_commit_raw_write,
+	.setup			= amixer_setup,
+	.get_scale		= amixer_get_y,
+};
+
+static int amixer_rsc_init(struct amixer *amixer,
+			   const struct amixer_desc *desc,
+			   struct amixer_mgr *mgr)
+{
+	int err;
+
+	err = rsc_init(&amixer->rsc, amixer->idx[0],
+			AMIXER, desc->msr, mgr->mgr.hw);
+	if (err)
+		return err;
+
+	/* Set amixer specific operations */
+	amixer->rsc.ops = &amixer_basic_rsc_ops;
+	amixer->ops = &amixer_ops;
+	amixer->input = NULL;
+	amixer->sum = NULL;
+
+	amixer_setup(amixer, NULL, 0, NULL);
+
+	return 0;
+}
+
+static int amixer_rsc_uninit(struct amixer *amixer)
+{
+	amixer_setup(amixer, NULL, 0, NULL);
+	rsc_uninit(&amixer->rsc);
+	amixer->ops = NULL;
+	amixer->input = NULL;
+	amixer->sum = NULL;
+	return 0;
+}
+
+static int get_amixer_rsc(struct amixer_mgr *mgr,
+			  const struct amixer_desc *desc,
+			  struct amixer **ramixer)
+{
+	int err, i;
+	unsigned int idx;
+	struct amixer *amixer;
+	unsigned long flags;
+
+	*ramixer = NULL;
+
+	/* Allocate mem for amixer resource */
+	amixer = kzalloc(sizeof(*amixer), GFP_KERNEL);
+	if (NULL == amixer) {
+		err = -ENOMEM;
+		return err;
+	}
+
+	/* Check whether there are sufficient
+	 * amixer resources to meet request. */
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i = 0; i < desc->msr; i++) {
+		err = mgr_get_resource(&mgr->mgr, 1, &idx);
+		if (err)
+			break;
+
+		amixer->idx[i] = idx;
+	}
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	if (err) {
+		printk(KERN_ERR "ctxfi: Can't meet AMIXER resource request!\n");
+		goto error;
+	}
+
+	err = amixer_rsc_init(amixer, desc, mgr);
+	if (err)
+		goto error;
+
+	*ramixer = amixer;
+
+	return 0;
+
+error:
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i--; i >= 0; i--)
+		mgr_put_resource(&mgr->mgr, 1, amixer->idx[i]);
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	kfree(amixer);
+	return err;
+}
+
+static int put_amixer_rsc(struct amixer_mgr *mgr, struct amixer *amixer)
+{
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i = 0; i < amixer->rsc.msr; i++)
+		mgr_put_resource(&mgr->mgr, 1, amixer->idx[i]);
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	amixer_rsc_uninit(amixer);
+	kfree(amixer);
+
+	return 0;
+}
+
+int amixer_mgr_create(void *hw, struct amixer_mgr **ramixer_mgr)
+{
+	int err;
+	struct amixer_mgr *amixer_mgr;
+
+	*ramixer_mgr = NULL;
+	amixer_mgr = kzalloc(sizeof(*amixer_mgr), GFP_KERNEL);
+	if (NULL == amixer_mgr)
+		return -ENOMEM;
+
+	err = rsc_mgr_init(&amixer_mgr->mgr, AMIXER, AMIXER_RESOURCE_NUM, hw);
+	if (err)
+		goto error;
+
+	spin_lock_init(&amixer_mgr->mgr_lock);
+
+	amixer_mgr->get_amixer = get_amixer_rsc;
+	amixer_mgr->put_amixer = put_amixer_rsc;
+
+	*ramixer_mgr = amixer_mgr;
+
+	return 0;
+
+error:
+	kfree(amixer_mgr);
+	return err;
+}
+
+int amixer_mgr_destroy(struct amixer_mgr *amixer_mgr)
+{
+	rsc_mgr_uninit(&amixer_mgr->mgr);
+	kfree(amixer_mgr);
+	return 0;
+}
+
+/* SUM resource management */
+
+static int sum_master(struct rsc *rsc)
+{
+	rsc->conj = 0;
+	return rsc->idx = container_of(rsc, struct sum, rsc)->idx[0];
+}
+
+static int sum_next_conj(struct rsc *rsc)
+{
+	rsc->conj++;
+	return container_of(rsc, struct sum, rsc)->idx[rsc->conj];
+}
+
+static int sum_index(const struct rsc *rsc)
+{
+	return container_of(rsc, struct sum, rsc)->idx[rsc->conj];
+}
+
+static int sum_output_slot(const struct rsc *rsc)
+{
+	return (sum_index(rsc) << 4) + 0xc;
+}
+
+static struct rsc_ops sum_basic_rsc_ops = {
+	.master		= sum_master,
+	.next_conj	= sum_next_conj,
+	.index		= sum_index,
+	.output_slot	= sum_output_slot,
+};
+
+static int sum_rsc_init(struct sum *sum,
+			const struct sum_desc *desc,
+			struct sum_mgr *mgr)
+{
+	int err;
+
+	err = rsc_init(&sum->rsc, sum->idx[0], SUM, desc->msr, mgr->mgr.hw);
+	if (err)
+		return err;
+
+	sum->rsc.ops = &sum_basic_rsc_ops;
+
+	return 0;
+}
+
+static int sum_rsc_uninit(struct sum *sum)
+{
+	rsc_uninit(&sum->rsc);
+	return 0;
+}
+
+static int get_sum_rsc(struct sum_mgr *mgr,
+		       const struct sum_desc *desc,
+		       struct sum **rsum)
+{
+	int err, i;
+	unsigned int idx;
+	struct sum *sum;
+	unsigned long flags;
+
+	*rsum = NULL;
+
+	/* Allocate mem for sum resource */
+	sum = kzalloc(sizeof(*sum), GFP_KERNEL);
+	if (NULL == sum) {
+		err = -ENOMEM;
+		return err;
+	}
+
+	/* Check whether there are sufficient sum resources to meet request. */
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i = 0; i < desc->msr; i++) {
+		err = mgr_get_resource(&mgr->mgr, 1, &idx);
+		if (err)
+			break;
+
+		sum->idx[i] = idx;
+	}
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	if (err) {
+		printk(KERN_ERR "ctxfi: Can't meet SUM resource request!\n");
+		goto error;
+	}
+
+	err = sum_rsc_init(sum, desc, mgr);
+	if (err)
+		goto error;
+
+	*rsum = sum;
+
+	return 0;
+
+error:
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i--; i >= 0; i--)
+		mgr_put_resource(&mgr->mgr, 1, sum->idx[i]);
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	kfree(sum);
+	return err;
+}
+
+static int put_sum_rsc(struct sum_mgr *mgr, struct sum *sum)
+{
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i = 0; i < sum->rsc.msr; i++)
+		mgr_put_resource(&mgr->mgr, 1, sum->idx[i]);
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	sum_rsc_uninit(sum);
+	kfree(sum);
+
+	return 0;
+}
+
+int sum_mgr_create(void *hw, struct sum_mgr **rsum_mgr)
+{
+	int err;
+	struct sum_mgr *sum_mgr;
+
+	*rsum_mgr = NULL;
+	sum_mgr = kzalloc(sizeof(*sum_mgr), GFP_KERNEL);
+	if (NULL == sum_mgr)
+		return -ENOMEM;
+
+	err = rsc_mgr_init(&sum_mgr->mgr, SUM, SUM_RESOURCE_NUM, hw);
+	if (err)
+		goto error;
+
+	spin_lock_init(&sum_mgr->mgr_lock);
+
+	sum_mgr->get_sum = get_sum_rsc;
+	sum_mgr->put_sum = put_sum_rsc;
+
+	*rsum_mgr = sum_mgr;
+
+	return 0;
+
+error:
+	kfree(sum_mgr);
+	return err;
+}
+
+int sum_mgr_destroy(struct sum_mgr *sum_mgr)
+{
+	rsc_mgr_uninit(&sum_mgr->mgr);
+	kfree(sum_mgr);
+	return 0;
+}
+
diff --git a/sound/pci/ctxfi/ctamixer.h b/sound/pci/ctxfi/ctamixer.h
new file mode 100644
index 0000000..cc49e5a
--- /dev/null
+++ b/sound/pci/ctxfi/ctamixer.h
@@ -0,0 +1,96 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctamixer.h
+ *
+ * @Brief
+ * This file contains the definition of the Audio Mixer
+ * resource management object.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 21 2008
+ *
+ */
+
+#ifndef CTAMIXER_H
+#define CTAMIXER_H
+
+#include "ctresource.h"
+#include <linux/spinlock.h>
+
+/* Define the descriptor of a summation node resource */
+struct sum {
+	struct rsc rsc;		/* Basic resource info */
+	unsigned char idx[8];
+};
+
+/* Define sum resource request description info */
+struct sum_desc {
+	unsigned int msr;
+};
+
+struct sum_mgr {
+	struct rsc_mgr mgr;	/* Basic resource manager info */
+	spinlock_t mgr_lock;
+
+	 /* request one sum resource */
+	int (*get_sum)(struct sum_mgr *mgr,
+			const struct sum_desc *desc, struct sum **rsum);
+	/* return one sum resource */
+	int (*put_sum)(struct sum_mgr *mgr, struct sum *sum);
+};
+
+/* Constructor and destructor of daio resource manager */
+int sum_mgr_create(void *hw, struct sum_mgr **rsum_mgr);
+int sum_mgr_destroy(struct sum_mgr *sum_mgr);
+
+/* Define the descriptor of a amixer resource */
+struct amixer_rsc_ops;
+
+struct amixer {
+	struct rsc rsc;		/* Basic resource info */
+	unsigned char idx[8];
+	struct rsc *input;	/* pointer to a resource acting as source */
+	struct sum *sum;	/* Put amixer output to this summation node */
+	struct amixer_rsc_ops *ops;	/* AMixer specific operations */
+};
+
+struct amixer_rsc_ops {
+	int (*set_input)(struct amixer *amixer, struct rsc *rsc);
+	int (*set_scale)(struct amixer *amixer, unsigned int scale);
+	int (*set_invalid_squash)(struct amixer *amixer, unsigned int iv);
+	int (*set_sum)(struct amixer *amixer, struct sum *sum);
+	int (*commit_write)(struct amixer *amixer);
+	/* Only for interleaved recording */
+	int (*commit_raw_write)(struct amixer *amixer);
+	int (*setup)(struct amixer *amixer, struct rsc *input,
+			unsigned int scale, struct sum *sum);
+	int (*get_scale)(struct amixer *amixer);
+};
+
+/* Define amixer resource request description info */
+struct amixer_desc {
+	unsigned int msr;
+};
+
+struct amixer_mgr {
+	struct rsc_mgr mgr;	/* Basic resource manager info */
+	spinlock_t mgr_lock;
+
+	 /* request one amixer resource */
+	int (*get_amixer)(struct amixer_mgr *mgr,
+			  const struct amixer_desc *desc,
+			  struct amixer **ramixer);
+	/* return one amixer resource */
+	int (*put_amixer)(struct amixer_mgr *mgr, struct amixer *amixer);
+};
+
+/* Constructor and destructor of amixer resource manager */
+int amixer_mgr_create(void *hw, struct amixer_mgr **ramixer_mgr);
+int amixer_mgr_destroy(struct amixer_mgr *amixer_mgr);
+
+#endif /* CTAMIXER_H */
diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
new file mode 100644
index 0000000..80fb2ba
--- /dev/null
+++ b/sound/pci/ctxfi/ctatc.c
@@ -0,0 +1,1619 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File    ctatc.c
+ *
+ * @Brief
+ * This file contains the implementation of the device resource management
+ * object.
+ *
+ * @Author Liu Chun
+ * @Date Mar 28 2008
+ */
+
+#include "ctatc.h"
+#include "ctpcm.h"
+#include "ctmixer.h"
+#include "cthardware.h"
+#include "ctsrc.h"
+#include "ctamixer.h"
+#include "ctdaio.h"
+#include "cttimer.h"
+#include <linux/delay.h>
+#include <sound/pcm.h>
+#include <sound/control.h>
+#include <sound/asoundef.h>
+
+#define MONO_SUM_SCALE	0x19a8	/* 2^(-0.5) in 14-bit floating format */
+#define DAIONUM		7
+#define MAX_MULTI_CHN	8
+
+#define IEC958_DEFAULT_CON ((IEC958_AES0_NONAUDIO \
+			    | IEC958_AES0_CON_NOT_COPYRIGHT) \
+			    | ((IEC958_AES1_CON_MIXER \
+			    | IEC958_AES1_CON_ORIGINAL) << 8) \
+			    | (0x10 << 16) \
+			    | ((IEC958_AES3_CON_FS_48000) << 24))
+
+static struct snd_pci_quirk __devinitdata subsys_20k1_list[] = {
+	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0022, "SB055x", CTSB055X),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x002f, "SB055x", CTSB055X),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0029, "SB073x", CTSB073X),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0031, "SB073x", CTSB073X),
+	SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_CREATIVE, 0xf000, 0x6000,
+			   "UAA", CTUAA),
+	SND_PCI_QUIRK_VENDOR(PCI_VENDOR_ID_CREATIVE,
+			     "Unknown", CT20K1_UNKNOWN),
+	{ } /* terminator */
+};
+
+static struct snd_pci_quirk __devinitdata subsys_20k2_list[] = {
+	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, PCI_SUBDEVICE_ID_CREATIVE_SB0760,
+		      "SB0760", CTSB0760),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, PCI_SUBDEVICE_ID_CREATIVE_SB08801,
+		      "SB0880", CTSB0880),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, PCI_SUBDEVICE_ID_CREATIVE_SB08802,
+		      "SB0880", CTSB0880),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, PCI_SUBDEVICE_ID_CREATIVE_SB08803,
+		      "SB0880", CTSB0880),
+	SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_CREATIVE, 0xf000,
+			   PCI_SUBDEVICE_ID_CREATIVE_HENDRIX, "HENDRIX",
+			   CTHENDRIX),
+	{ } /* terminator */
+};
+
+static const char *ct_subsys_name[NUM_CTCARDS] = {
+	[CTSB055X]	= "SB055x",
+	[CTSB073X]	= "SB073x",
+	[CTSB0760]	= "SB076x",
+	[CTUAA]		= "UAA",
+	[CT20K1_UNKNOWN] = "Unknown",
+	[CTHENDRIX]	= "Hendrix",
+	[CTSB0880]	= "SB0880",
+};
+
+static struct {
+	int (*create)(struct ct_atc *atc,
+			enum CTALSADEVS device, const char *device_name);
+	int (*destroy)(void *alsa_dev);
+	const char *public_name;
+} alsa_dev_funcs[NUM_CTALSADEVS] = {
+	[FRONT]		= { .create = ct_alsa_pcm_create,
+			    .destroy = NULL,
+			    .public_name = "Front/WaveIn"},
+	[SURROUND]	= { .create = ct_alsa_pcm_create,
+			    .destroy = NULL,
+			    .public_name = "Surround"},
+	[CLFE]		= { .create = ct_alsa_pcm_create,
+			    .destroy = NULL,
+			    .public_name = "Center/LFE"},
+	[SIDE]		= { .create = ct_alsa_pcm_create,
+			    .destroy = NULL,
+			    .public_name = "Side"},
+	[IEC958]	= { .create = ct_alsa_pcm_create,
+			    .destroy = NULL,
+			    .public_name = "IEC958 Non-audio"},
+
+	[MIXER]		= { .create = ct_alsa_mix_create,
+			    .destroy = NULL,
+			    .public_name = "Mixer"}
+};
+
+typedef int (*create_t)(void *, void **);
+typedef int (*destroy_t)(void *);
+
+static struct {
+	int (*create)(void *hw, void **rmgr);
+	int (*destroy)(void *mgr);
+} rsc_mgr_funcs[NUM_RSCTYP] = {
+	[SRC] 		= { .create 	= (create_t)src_mgr_create,
+			    .destroy 	= (destroy_t)src_mgr_destroy	},
+	[SRCIMP] 	= { .create 	= (create_t)srcimp_mgr_create,
+			    .destroy 	= (destroy_t)srcimp_mgr_destroy	},
+	[AMIXER]	= { .create	= (create_t)amixer_mgr_create,
+			    .destroy	= (destroy_t)amixer_mgr_destroy	},
+	[SUM]		= { .create	= (create_t)sum_mgr_create,
+			    .destroy	= (destroy_t)sum_mgr_destroy	},
+	[DAIO]		= { .create	= (create_t)daio_mgr_create,
+			    .destroy	= (destroy_t)daio_mgr_destroy	}
+};
+
+static int
+atc_pcm_release_resources(struct ct_atc *atc, struct ct_atc_pcm *apcm);
+
+/* *
+ * Only mono and interleaved modes are supported now.
+ * Always allocates a contiguous channel block.
+ * */
+
+static int ct_map_audio_buffer(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct snd_pcm_runtime *runtime;
+	struct ct_vm *vm;
+
+	if (NULL == apcm->substream)
+		return 0;
+
+	runtime = apcm->substream->runtime;
+	vm = atc->vm;
+
+	apcm->vm_block = vm->map(vm, apcm->substream, runtime->dma_bytes);
+
+	if (NULL == apcm->vm_block)
+		return -ENOENT;
+
+	return 0;
+}
+
+static void ct_unmap_audio_buffer(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct ct_vm *vm;
+
+	if (NULL == apcm->vm_block)
+		return;
+
+	vm = atc->vm;
+
+	vm->unmap(vm, apcm->vm_block);
+
+	apcm->vm_block = NULL;
+}
+
+static unsigned long atc_get_ptp_phys(struct ct_atc *atc, int index)
+{
+	struct ct_vm *vm;
+	void *kvirt_addr;
+	unsigned long phys_addr;
+
+	vm = atc->vm;
+	kvirt_addr = vm->get_ptp_virt(vm, index);
+	if (kvirt_addr == NULL)
+		phys_addr = (~0UL);
+	else
+		phys_addr = virt_to_phys(kvirt_addr);
+
+	return phys_addr;
+}
+
+static unsigned int convert_format(snd_pcm_format_t snd_format)
+{
+	switch (snd_format) {
+	case SNDRV_PCM_FORMAT_U8:
+		return SRC_SF_U8;
+	case SNDRV_PCM_FORMAT_S16_LE:
+		return SRC_SF_S16;
+	case SNDRV_PCM_FORMAT_S24_3LE:
+		return SRC_SF_S24;
+	case SNDRV_PCM_FORMAT_S32_LE:
+		return SRC_SF_S32;
+	case SNDRV_PCM_FORMAT_FLOAT_LE:
+		return SRC_SF_F32;
+	default:
+		printk(KERN_ERR "ctxfi: not recognized snd format is %d \n",
+			snd_format);
+		return SRC_SF_S16;
+	}
+}
+
+static unsigned int
+atc_get_pitch(unsigned int input_rate, unsigned int output_rate)
+{
+	unsigned int pitch;
+	int b;
+
+	/* get pitch and convert to fixed-point 8.24 format. */
+	pitch = (input_rate / output_rate) << 24;
+	input_rate %= output_rate;
+	input_rate /= 100;
+	output_rate /= 100;
+	for (b = 31; ((b >= 0) && !(input_rate >> b)); )
+		b--;
+
+	if (b >= 0) {
+		input_rate <<= (31 - b);
+		input_rate /= output_rate;
+		b = 24 - (31 - b);
+		if (b >= 0)
+			input_rate <<= b;
+		else
+			input_rate >>= -b;
+
+		pitch |= input_rate;
+	}
+
+	return pitch;
+}
+
+static int select_rom(unsigned int pitch)
+{
+	if ((pitch > 0x00428f5c) && (pitch < 0x01b851ec)) {
+		/* 0.26 <= pitch <= 1.72 */
+		return 1;
+	} else if ((0x01d66666 == pitch) || (0x01d66667 == pitch)) {
+		/* pitch == 1.8375 */
+		return 2;
+	} else if (0x02000000 == pitch) {
+		/* pitch == 2 */
+		return 3;
+	} else if ((pitch >= 0x0) && (pitch <= 0x08000000)) {
+		/* 0 <= pitch <= 8 */
+		return 0;
+	} else {
+		return -ENOENT;
+	}
+}
+
+static int atc_pcm_playback_prepare(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src_mgr *src_mgr = atc->rsc_mgrs[SRC];
+	struct amixer_mgr *amixer_mgr = atc->rsc_mgrs[AMIXER];
+	struct src_desc desc = {0};
+	struct amixer_desc mix_dsc = {0};
+	struct src *src;
+	struct amixer *amixer;
+	int err;
+	int n_amixer = apcm->substream->runtime->channels, i = 0;
+	int device = apcm->substream->pcm->device;
+	unsigned int pitch;
+	unsigned long flags;
+
+	if (NULL != apcm->src) {
+		/* Prepared pcm playback */
+		return 0;
+	}
+
+	/* first release old resources */
+	atc->pcm_release_resources(atc, apcm);
+
+	/* Get SRC resource */
+	desc.multi = apcm->substream->runtime->channels;
+	desc.msr = atc->msr;
+	desc.mode = MEMRD;
+	err = src_mgr->get_src(src_mgr, &desc, (struct src **)&apcm->src);
+	if (err)
+		goto error1;
+
+	pitch = atc_get_pitch(apcm->substream->runtime->rate,
+						(atc->rsr * atc->msr));
+	src = apcm->src;
+	src->ops->set_pitch(src, pitch);
+	src->ops->set_rom(src, select_rom(pitch));
+	src->ops->set_sf(src, convert_format(apcm->substream->runtime->format));
+	src->ops->set_pm(src, (src->ops->next_interleave(src) != NULL));
+
+	/* Get AMIXER resource */
+	n_amixer = (n_amixer < 2) ? 2 : n_amixer;
+	apcm->amixers = kzalloc(sizeof(void *)*n_amixer, GFP_KERNEL);
+	if (NULL == apcm->amixers) {
+		err = -ENOMEM;
+		goto error1;
+	}
+	mix_dsc.msr = atc->msr;
+	for (i = 0, apcm->n_amixer = 0; i < n_amixer; i++) {
+		err = amixer_mgr->get_amixer(amixer_mgr, &mix_dsc,
+					(struct amixer **)&apcm->amixers[i]);
+		if (err)
+			goto error1;
+
+		apcm->n_amixer++;
+	}
+
+	/* Set up device virtual mem map */
+	err = ct_map_audio_buffer(atc, apcm);
+	if (err < 0)
+		goto error1;
+
+	/* Connect resources */
+	src = apcm->src;
+	for (i = 0; i < n_amixer; i++) {
+		amixer = apcm->amixers[i];
+		spin_lock_irqsave(&atc->atc_lock, flags);
+		amixer->ops->setup(amixer, &src->rsc,
+					INIT_VOL, atc->pcm[i+device*2]);
+		spin_unlock_irqrestore(&atc->atc_lock, flags);
+		src = src->ops->next_interleave(src);
+		if (NULL == src)
+			src = apcm->src;
+	}
+
+	ct_timer_prepare(apcm->timer);
+
+	return 0;
+
+error1:
+	atc_pcm_release_resources(atc, apcm);
+	return err;
+}
+
+static int
+atc_pcm_release_resources(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src_mgr *src_mgr = atc->rsc_mgrs[SRC];
+	struct srcimp_mgr *srcimp_mgr = atc->rsc_mgrs[SRCIMP];
+	struct amixer_mgr *amixer_mgr = atc->rsc_mgrs[AMIXER];
+	struct sum_mgr *sum_mgr = atc->rsc_mgrs[SUM];
+	struct srcimp *srcimp;
+	int i;
+
+	if (NULL != apcm->srcimps) {
+		for (i = 0; i < apcm->n_srcimp; i++) {
+			srcimp = apcm->srcimps[i];
+			srcimp->ops->unmap(srcimp);
+			srcimp_mgr->put_srcimp(srcimp_mgr, srcimp);
+			apcm->srcimps[i] = NULL;
+		}
+		kfree(apcm->srcimps);
+		apcm->srcimps = NULL;
+	}
+
+	if (NULL != apcm->srccs) {
+		for (i = 0; i < apcm->n_srcc; i++) {
+			src_mgr->put_src(src_mgr, apcm->srccs[i]);
+			apcm->srccs[i] = NULL;
+		}
+		kfree(apcm->srccs);
+		apcm->srccs = NULL;
+	}
+
+	if (NULL != apcm->amixers) {
+		for (i = 0; i < apcm->n_amixer; i++) {
+			amixer_mgr->put_amixer(amixer_mgr, apcm->amixers[i]);
+			apcm->amixers[i] = NULL;
+		}
+		kfree(apcm->amixers);
+		apcm->amixers = NULL;
+	}
+
+	if (NULL != apcm->mono) {
+		sum_mgr->put_sum(sum_mgr, apcm->mono);
+		apcm->mono = NULL;
+	}
+
+	if (NULL != apcm->src) {
+		src_mgr->put_src(src_mgr, apcm->src);
+		apcm->src = NULL;
+	}
+
+	if (NULL != apcm->vm_block) {
+		/* Undo device virtual mem map */
+		ct_unmap_audio_buffer(atc, apcm);
+		apcm->vm_block = NULL;
+	}
+
+	return 0;
+}
+
+static int atc_pcm_playback_start(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	unsigned int max_cisz;
+	struct src *src = apcm->src;
+
+	if (apcm->started)
+		return 0;
+	apcm->started = 1;
+
+	max_cisz = src->multi * src->rsc.msr;
+	max_cisz = 0x80 * (max_cisz < 8 ? max_cisz : 8);
+
+	src->ops->set_sa(src, apcm->vm_block->addr);
+	src->ops->set_la(src, apcm->vm_block->addr + apcm->vm_block->size);
+	src->ops->set_ca(src, apcm->vm_block->addr + max_cisz);
+	src->ops->set_cisz(src, max_cisz);
+
+	src->ops->set_bm(src, 1);
+	src->ops->set_state(src, SRC_STATE_INIT);
+	src->ops->commit_write(src);
+
+	ct_timer_start(apcm->timer);
+	return 0;
+}
+
+static int atc_pcm_stop(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src *src;
+	int i;
+
+	ct_timer_stop(apcm->timer);
+
+	src = apcm->src;
+	src->ops->set_bm(src, 0);
+	src->ops->set_state(src, SRC_STATE_OFF);
+	src->ops->commit_write(src);
+
+	if (NULL != apcm->srccs) {
+		for (i = 0; i < apcm->n_srcc; i++) {
+			src = apcm->srccs[i];
+			src->ops->set_bm(src, 0);
+			src->ops->set_state(src, SRC_STATE_OFF);
+			src->ops->commit_write(src);
+		}
+	}
+
+	apcm->started = 0;
+
+	return 0;
+}
+
+static int
+atc_pcm_playback_position(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src *src = apcm->src;
+	u32 size, max_cisz;
+	int position;
+
+	if (!src)
+		return 0;
+	position = src->ops->get_ca(src);
+
+	size = apcm->vm_block->size;
+	max_cisz = src->multi * src->rsc.msr;
+	max_cisz = 128 * (max_cisz < 8 ? max_cisz : 8);
+
+	return (position + size - max_cisz - apcm->vm_block->addr) % size;
+}
+
+struct src_node_conf_t {
+	unsigned int pitch;
+	unsigned int msr:8;
+	unsigned int mix_msr:8;
+	unsigned int imp_msr:8;
+	unsigned int vo:1;
+};
+
+static void setup_src_node_conf(struct ct_atc *atc, struct ct_atc_pcm *apcm,
+				struct src_node_conf_t *conf, int *n_srcc)
+{
+	unsigned int pitch;
+
+	/* get pitch and convert to fixed-point 8.24 format. */
+	pitch = atc_get_pitch((atc->rsr * atc->msr),
+				apcm->substream->runtime->rate);
+	*n_srcc = 0;
+
+	if (1 == atc->msr) {
+		*n_srcc = apcm->substream->runtime->channels;
+		conf[0].pitch = pitch;
+		conf[0].mix_msr = conf[0].imp_msr = conf[0].msr = 1;
+		conf[0].vo = 1;
+	} else if (2 == atc->msr) {
+		if (0x8000000 < pitch) {
+			/* Need two-stage SRCs, SRCIMPs and
+			 * AMIXERs for converting format */
+			conf[0].pitch = (atc->msr << 24);
+			conf[0].msr = conf[0].mix_msr = 1;
+			conf[0].imp_msr = atc->msr;
+			conf[0].vo = 0;
+			conf[1].pitch = atc_get_pitch(atc->rsr,
+					apcm->substream->runtime->rate);
+			conf[1].msr = conf[1].mix_msr = conf[1].imp_msr = 1;
+			conf[1].vo = 1;
+			*n_srcc = apcm->substream->runtime->channels * 2;
+		} else if (0x1000000 < pitch) {
+			/* Need one-stage SRCs, SRCIMPs and
+			 * AMIXERs for converting format */
+			conf[0].pitch = pitch;
+			conf[0].msr = conf[0].mix_msr
+				    = conf[0].imp_msr = atc->msr;
+			conf[0].vo = 1;
+			*n_srcc = apcm->substream->runtime->channels;
+		}
+	}
+}
+
+static int
+atc_pcm_capture_get_resources(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src_mgr *src_mgr = atc->rsc_mgrs[SRC];
+	struct srcimp_mgr *srcimp_mgr = atc->rsc_mgrs[SRCIMP];
+	struct amixer_mgr *amixer_mgr = atc->rsc_mgrs[AMIXER];
+	struct sum_mgr *sum_mgr = atc->rsc_mgrs[SUM];
+	struct src_desc src_dsc = {0};
+	struct src *src;
+	struct srcimp_desc srcimp_dsc = {0};
+	struct srcimp *srcimp;
+	struct amixer_desc mix_dsc = {0};
+	struct sum_desc sum_dsc = {0};
+	unsigned int pitch;
+	int multi, err, i;
+	int n_srcimp, n_amixer, n_srcc, n_sum;
+	struct src_node_conf_t src_node_conf[2] = {{0} };
+
+	/* first release old resources */
+	atc_pcm_release_resources(atc, apcm);
+
+	/* The numbers of converting SRCs and SRCIMPs should be determined
+	 * by pitch value. */
+
+	multi = apcm->substream->runtime->channels;
+
+	/* get pitch and convert to fixed-point 8.24 format. */
+	pitch = atc_get_pitch((atc->rsr * atc->msr),
+				apcm->substream->runtime->rate);
+
+	setup_src_node_conf(atc, apcm, src_node_conf, &n_srcc);
+	n_sum = (1 == multi) ? 1 : 0;
+	n_amixer = n_sum * 2 + n_srcc;
+	n_srcimp = n_srcc;
+	if ((multi > 1) && (0x8000000 >= pitch)) {
+		/* Need extra AMIXERs and SRCIMPs for special treatment
+		 * of interleaved recording of conjugate channels */
+		n_amixer += multi * atc->msr;
+		n_srcimp += multi * atc->msr;
+	} else {
+		n_srcimp += multi;
+	}
+
+	if (n_srcc) {
+		apcm->srccs = kzalloc(sizeof(void *)*n_srcc, GFP_KERNEL);
+		if (NULL == apcm->srccs)
+			return -ENOMEM;
+	}
+	if (n_amixer) {
+		apcm->amixers = kzalloc(sizeof(void *)*n_amixer, GFP_KERNEL);
+		if (NULL == apcm->amixers) {
+			err = -ENOMEM;
+			goto error1;
+		}
+	}
+	apcm->srcimps = kzalloc(sizeof(void *)*n_srcimp, GFP_KERNEL);
+	if (NULL == apcm->srcimps) {
+		err = -ENOMEM;
+		goto error1;
+	}
+
+	/* Allocate SRCs for sample rate conversion if needed */
+	src_dsc.multi = 1;
+	src_dsc.mode = ARCRW;
+	for (i = 0, apcm->n_srcc = 0; i < n_srcc; i++) {
+		src_dsc.msr = src_node_conf[i/multi].msr;
+		err = src_mgr->get_src(src_mgr, &src_dsc,
+					(struct src **)&apcm->srccs[i]);
+		if (err)
+			goto error1;
+
+		src = apcm->srccs[i];
+		pitch = src_node_conf[i/multi].pitch;
+		src->ops->set_pitch(src, pitch);
+		src->ops->set_rom(src, select_rom(pitch));
+		src->ops->set_vo(src, src_node_conf[i/multi].vo);
+
+		apcm->n_srcc++;
+	}
+
+	/* Allocate AMIXERs for routing SRCs of conversion if needed */
+	for (i = 0, apcm->n_amixer = 0; i < n_amixer; i++) {
+		if (i < (n_sum*2))
+			mix_dsc.msr = atc->msr;
+		else if (i < (n_sum*2+n_srcc))
+			mix_dsc.msr = src_node_conf[(i-n_sum*2)/multi].mix_msr;
+		else
+			mix_dsc.msr = 1;
+
+		err = amixer_mgr->get_amixer(amixer_mgr, &mix_dsc,
+					(struct amixer **)&apcm->amixers[i]);
+		if (err)
+			goto error1;
+
+		apcm->n_amixer++;
+	}
+
+	/* Allocate a SUM resource to mix all input channels together */
+	sum_dsc.msr = atc->msr;
+	err = sum_mgr->get_sum(sum_mgr, &sum_dsc, (struct sum **)&apcm->mono);
+	if (err)
+		goto error1;
+
+	pitch = atc_get_pitch((atc->rsr * atc->msr),
+				apcm->substream->runtime->rate);
+	/* Allocate SRCIMP resources */
+	for (i = 0, apcm->n_srcimp = 0; i < n_srcimp; i++) {
+		if (i < (n_srcc))
+			srcimp_dsc.msr = src_node_conf[i/multi].imp_msr;
+		else if (1 == multi)
+			srcimp_dsc.msr = (pitch <= 0x8000000) ? atc->msr : 1;
+		else
+			srcimp_dsc.msr = 1;
+
+		err = srcimp_mgr->get_srcimp(srcimp_mgr, &srcimp_dsc, &srcimp);
+		if (err)
+			goto error1;
+
+		apcm->srcimps[i] = srcimp;
+		apcm->n_srcimp++;
+	}
+
+	/* Allocate a SRC for writing data to host memory */
+	src_dsc.multi = apcm->substream->runtime->channels;
+	src_dsc.msr = 1;
+	src_dsc.mode = MEMWR;
+	err = src_mgr->get_src(src_mgr, &src_dsc, (struct src **)&apcm->src);
+	if (err)
+		goto error1;
+
+	src = apcm->src;
+	src->ops->set_pitch(src, pitch);
+
+	/* Set up device virtual mem map */
+	err = ct_map_audio_buffer(atc, apcm);
+	if (err < 0)
+		goto error1;
+
+	return 0;
+
+error1:
+	atc_pcm_release_resources(atc, apcm);
+	return err;
+}
+
+static int atc_pcm_capture_prepare(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src *src;
+	struct amixer *amixer;
+	struct srcimp *srcimp;
+	struct ct_mixer *mixer = atc->mixer;
+	struct sum *mono;
+	struct rsc *out_ports[8] = {NULL};
+	int err, i, j, n_sum, multi;
+	unsigned int pitch;
+	int mix_base = 0, imp_base = 0;
+
+	if (NULL != apcm->src) {
+		/* Prepared pcm capture */
+		return 0;
+	}
+
+	/* Get needed resources. */
+	err = atc_pcm_capture_get_resources(atc, apcm);
+	if (err)
+		return err;
+
+	/* Connect resources */
+	mixer->get_output_ports(mixer, MIX_PCMO_FRONT,
+				&out_ports[0], &out_ports[1]);
+
+	multi = apcm->substream->runtime->channels;
+	if (1 == multi) {
+		mono = apcm->mono;
+		for (i = 0; i < 2; i++) {
+			amixer = apcm->amixers[i];
+			amixer->ops->setup(amixer, out_ports[i],
+						MONO_SUM_SCALE, mono);
+		}
+		out_ports[0] = &mono->rsc;
+		n_sum = 1;
+		mix_base = n_sum * 2;
+	}
+
+	for (i = 0; i < apcm->n_srcc; i++) {
+		src = apcm->srccs[i];
+		srcimp = apcm->srcimps[imp_base+i];
+		amixer = apcm->amixers[mix_base+i];
+		srcimp->ops->map(srcimp, src, out_ports[i%multi]);
+		amixer->ops->setup(amixer, &src->rsc, INIT_VOL, NULL);
+		out_ports[i%multi] = &amixer->rsc;
+	}
+
+	pitch = atc_get_pitch((atc->rsr * atc->msr),
+				apcm->substream->runtime->rate);
+
+	if ((multi > 1) && (pitch <= 0x8000000)) {
+		/* Special connection for interleaved
+		 * recording with conjugate channels */
+		for (i = 0; i < multi; i++) {
+			out_ports[i]->ops->master(out_ports[i]);
+			for (j = 0; j < atc->msr; j++) {
+				amixer = apcm->amixers[apcm->n_srcc+j*multi+i];
+				amixer->ops->set_input(amixer, out_ports[i]);
+				amixer->ops->set_scale(amixer, INIT_VOL);
+				amixer->ops->set_sum(amixer, NULL);
+				amixer->ops->commit_raw_write(amixer);
+				out_ports[i]->ops->next_conj(out_ports[i]);
+
+				srcimp = apcm->srcimps[apcm->n_srcc+j*multi+i];
+				srcimp->ops->map(srcimp, apcm->src,
+							&amixer->rsc);
+			}
+		}
+	} else {
+		for (i = 0; i < multi; i++) {
+			srcimp = apcm->srcimps[apcm->n_srcc+i];
+			srcimp->ops->map(srcimp, apcm->src, out_ports[i]);
+		}
+	}
+
+	ct_timer_prepare(apcm->timer);
+
+	return 0;
+}
+
+static int atc_pcm_capture_start(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src *src;
+	struct src_mgr *src_mgr = atc->rsc_mgrs[SRC];
+	int i, multi;
+
+	if (apcm->started)
+		return 0;
+
+	apcm->started = 1;
+	multi = apcm->substream->runtime->channels;
+	/* Set up converting SRCs */
+	for (i = 0; i < apcm->n_srcc; i++) {
+		src = apcm->srccs[i];
+		src->ops->set_pm(src, ((i%multi) != (multi-1)));
+		src_mgr->src_disable(src_mgr, src);
+	}
+
+	/*  Set up recording SRC */
+	src = apcm->src;
+	src->ops->set_sf(src, convert_format(apcm->substream->runtime->format));
+	src->ops->set_sa(src, apcm->vm_block->addr);
+	src->ops->set_la(src, apcm->vm_block->addr + apcm->vm_block->size);
+	src->ops->set_ca(src, apcm->vm_block->addr);
+	src_mgr->src_disable(src_mgr, src);
+
+	/* Disable relevant SRCs firstly */
+	src_mgr->commit_write(src_mgr);
+
+	/* Enable SRCs respectively */
+	for (i = 0; i < apcm->n_srcc; i++) {
+		src = apcm->srccs[i];
+		src->ops->set_state(src, SRC_STATE_RUN);
+		src->ops->commit_write(src);
+		src_mgr->src_enable_s(src_mgr, src);
+	}
+	src = apcm->src;
+	src->ops->set_bm(src, 1);
+	src->ops->set_state(src, SRC_STATE_RUN);
+	src->ops->commit_write(src);
+	src_mgr->src_enable_s(src_mgr, src);
+
+	/* Enable relevant SRCs synchronously */
+	src_mgr->commit_write(src_mgr);
+
+	ct_timer_start(apcm->timer);
+	return 0;
+}
+
+static int
+atc_pcm_capture_position(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src *src = apcm->src;
+
+	if (!src)
+		return 0;
+	return src->ops->get_ca(src) - apcm->vm_block->addr;
+}
+
+static int spdif_passthru_playback_get_resources(struct ct_atc *atc,
+						 struct ct_atc_pcm *apcm)
+{
+	struct src_mgr *src_mgr = atc->rsc_mgrs[SRC];
+	struct amixer_mgr *amixer_mgr = atc->rsc_mgrs[AMIXER];
+	struct src_desc desc = {0};
+	struct amixer_desc mix_dsc = {0};
+	struct src *src;
+	int err;
+	int n_amixer = apcm->substream->runtime->channels, i;
+	unsigned int pitch, rsr = atc->pll_rate;
+
+	/* first release old resources */
+	atc_pcm_release_resources(atc, apcm);
+
+	/* Get SRC resource */
+	desc.multi = apcm->substream->runtime->channels;
+	desc.msr = 1;
+	while (apcm->substream->runtime->rate > (rsr * desc.msr))
+		desc.msr <<= 1;
+
+	desc.mode = MEMRD;
+	err = src_mgr->get_src(src_mgr, &desc, (struct src **)&apcm->src);
+	if (err)
+		goto error1;
+
+	pitch = atc_get_pitch(apcm->substream->runtime->rate, (rsr * desc.msr));
+	src = apcm->src;
+	src->ops->set_pitch(src, pitch);
+	src->ops->set_rom(src, select_rom(pitch));
+	src->ops->set_sf(src, convert_format(apcm->substream->runtime->format));
+	src->ops->set_pm(src, (src->ops->next_interleave(src) != NULL));
+	src->ops->set_bp(src, 1);
+
+	/* Get AMIXER resource */
+	n_amixer = (n_amixer < 2) ? 2 : n_amixer;
+	apcm->amixers = kzalloc(sizeof(void *)*n_amixer, GFP_KERNEL);
+	if (NULL == apcm->amixers) {
+		err = -ENOMEM;
+		goto error1;
+	}
+	mix_dsc.msr = desc.msr;
+	for (i = 0, apcm->n_amixer = 0; i < n_amixer; i++) {
+		err = amixer_mgr->get_amixer(amixer_mgr, &mix_dsc,
+					(struct amixer **)&apcm->amixers[i]);
+		if (err)
+			goto error1;
+
+		apcm->n_amixer++;
+	}
+
+	/* Set up device virtual mem map */
+	err = ct_map_audio_buffer(atc, apcm);
+	if (err < 0)
+		goto error1;
+
+	return 0;
+
+error1:
+	atc_pcm_release_resources(atc, apcm);
+	return err;
+}
+
+static int atc_pll_init(struct ct_atc *atc, int rate)
+{
+	struct hw *hw = atc->hw;
+	int err;
+	err = hw->pll_init(hw, rate);
+	atc->pll_rate = err ? 0 : rate;
+	return err;
+}
+
+static int
+spdif_passthru_playback_setup(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct dao *dao = container_of(atc->daios[SPDIFOO], struct dao, daio);
+	unsigned long flags;
+	unsigned int rate = apcm->substream->runtime->rate;
+	unsigned int status;
+	int err;
+	unsigned char iec958_con_fs;
+
+	switch (rate) {
+	case 48000:
+		iec958_con_fs = IEC958_AES3_CON_FS_48000;
+		break;
+	case 44100:
+		iec958_con_fs = IEC958_AES3_CON_FS_44100;
+		break;
+	case 32000:
+		iec958_con_fs = IEC958_AES3_CON_FS_32000;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	spin_lock_irqsave(&atc->atc_lock, flags);
+	dao->ops->get_spos(dao, &status);
+	if (((status >> 24) & IEC958_AES3_CON_FS) != iec958_con_fs) {
+		status &= ((~IEC958_AES3_CON_FS) << 24);
+		status |= (iec958_con_fs << 24);
+		dao->ops->set_spos(dao, status);
+		dao->ops->commit_write(dao);
+	}
+	if ((rate != atc->pll_rate) && (32000 != rate))
+		err = atc_pll_init(atc, rate);
+	spin_unlock_irqrestore(&atc->atc_lock, flags);
+
+	return err;
+}
+
+static int
+spdif_passthru_playback_prepare(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src *src;
+	struct amixer *amixer;
+	struct dao *dao;
+	int err;
+	int i;
+	unsigned long flags;
+
+	if (NULL != apcm->src)
+		return 0;
+
+	/* Configure SPDIFOO and PLL to passthrough mode;
+	 * determine pll_rate. */
+	err = spdif_passthru_playback_setup(atc, apcm);
+	if (err)
+		return err;
+
+	/* Get needed resources. */
+	err = spdif_passthru_playback_get_resources(atc, apcm);
+	if (err)
+		return err;
+
+	/* Connect resources */
+	src = apcm->src;
+	for (i = 0; i < apcm->n_amixer; i++) {
+		amixer = apcm->amixers[i];
+		amixer->ops->setup(amixer, &src->rsc, INIT_VOL, NULL);
+		src = src->ops->next_interleave(src);
+		if (NULL == src)
+			src = apcm->src;
+	}
+	/* Connect to SPDIFOO */
+	spin_lock_irqsave(&atc->atc_lock, flags);
+	dao = container_of(atc->daios[SPDIFOO], struct dao, daio);
+	amixer = apcm->amixers[0];
+	dao->ops->set_left_input(dao, &amixer->rsc);
+	amixer = apcm->amixers[1];
+	dao->ops->set_right_input(dao, &amixer->rsc);
+	spin_unlock_irqrestore(&atc->atc_lock, flags);
+
+	ct_timer_prepare(apcm->timer);
+
+	return 0;
+}
+
+static int atc_select_line_in(struct ct_atc *atc)
+{
+	struct hw *hw = atc->hw;
+	struct ct_mixer *mixer = atc->mixer;
+	struct src *src;
+
+	if (hw->is_adc_source_selected(hw, ADC_LINEIN))
+		return 0;
+
+	mixer->set_input_left(mixer, MIX_MIC_IN, NULL);
+	mixer->set_input_right(mixer, MIX_MIC_IN, NULL);
+
+	hw->select_adc_source(hw, ADC_LINEIN);
+
+	src = atc->srcs[2];
+	mixer->set_input_left(mixer, MIX_LINE_IN, &src->rsc);
+	src = atc->srcs[3];
+	mixer->set_input_right(mixer, MIX_LINE_IN, &src->rsc);
+
+	return 0;
+}
+
+static int atc_select_mic_in(struct ct_atc *atc)
+{
+	struct hw *hw = atc->hw;
+	struct ct_mixer *mixer = atc->mixer;
+	struct src *src;
+
+	if (hw->is_adc_source_selected(hw, ADC_MICIN))
+		return 0;
+
+	mixer->set_input_left(mixer, MIX_LINE_IN, NULL);
+	mixer->set_input_right(mixer, MIX_LINE_IN, NULL);
+
+	hw->select_adc_source(hw, ADC_MICIN);
+
+	src = atc->srcs[2];
+	mixer->set_input_left(mixer, MIX_MIC_IN, &src->rsc);
+	src = atc->srcs[3];
+	mixer->set_input_right(mixer, MIX_MIC_IN, &src->rsc);
+
+	return 0;
+}
+
+static int atc_have_digit_io_switch(struct ct_atc *atc)
+{
+	struct hw *hw = atc->hw;
+
+	return hw->have_digit_io_switch(hw);
+}
+
+static int atc_select_digit_io(struct ct_atc *atc)
+{
+	struct hw *hw = atc->hw;
+
+	if (hw->is_adc_source_selected(hw, ADC_NONE))
+		return 0;
+
+	hw->select_adc_source(hw, ADC_NONE);
+
+	return 0;
+}
+
+static int atc_daio_unmute(struct ct_atc *atc, unsigned char state, int type)
+{
+	struct daio_mgr *daio_mgr = atc->rsc_mgrs[DAIO];
+
+	if (state)
+		daio_mgr->daio_enable(daio_mgr, atc->daios[type]);
+	else
+		daio_mgr->daio_disable(daio_mgr, atc->daios[type]);
+
+	daio_mgr->commit_write(daio_mgr);
+
+	return 0;
+}
+
+static int
+atc_dao_get_status(struct ct_atc *atc, unsigned int *status, int type)
+{
+	struct dao *dao = container_of(atc->daios[type], struct dao, daio);
+	return dao->ops->get_spos(dao, status);
+}
+
+static int
+atc_dao_set_status(struct ct_atc *atc, unsigned int status, int type)
+{
+	struct dao *dao = container_of(atc->daios[type], struct dao, daio);
+
+	dao->ops->set_spos(dao, status);
+	dao->ops->commit_write(dao);
+	return 0;
+}
+
+static int atc_line_front_unmute(struct ct_atc *atc, unsigned char state)
+{
+	return atc_daio_unmute(atc, state, LINEO1);
+}
+
+static int atc_line_surround_unmute(struct ct_atc *atc, unsigned char state)
+{
+	return atc_daio_unmute(atc, state, LINEO4);
+}
+
+static int atc_line_clfe_unmute(struct ct_atc *atc, unsigned char state)
+{
+	return atc_daio_unmute(atc, state, LINEO3);
+}
+
+static int atc_line_rear_unmute(struct ct_atc *atc, unsigned char state)
+{
+	return atc_daio_unmute(atc, state, LINEO2);
+}
+
+static int atc_line_in_unmute(struct ct_atc *atc, unsigned char state)
+{
+	return atc_daio_unmute(atc, state, LINEIM);
+}
+
+static int atc_spdif_out_unmute(struct ct_atc *atc, unsigned char state)
+{
+	return atc_daio_unmute(atc, state, SPDIFOO);
+}
+
+static int atc_spdif_in_unmute(struct ct_atc *atc, unsigned char state)
+{
+	return atc_daio_unmute(atc, state, SPDIFIO);
+}
+
+static int atc_spdif_out_get_status(struct ct_atc *atc, unsigned int *status)
+{
+	return atc_dao_get_status(atc, status, SPDIFOO);
+}
+
+static int atc_spdif_out_set_status(struct ct_atc *atc, unsigned int status)
+{
+	return atc_dao_set_status(atc, status, SPDIFOO);
+}
+
+static int atc_spdif_out_passthru(struct ct_atc *atc, unsigned char state)
+{
+	unsigned long flags;
+	struct dao_desc da_dsc = {0};
+	struct dao *dao;
+	int err;
+	struct ct_mixer *mixer = atc->mixer;
+	struct rsc *rscs[2] = {NULL};
+	unsigned int spos = 0;
+
+	spin_lock_irqsave(&atc->atc_lock, flags);
+	dao = container_of(atc->daios[SPDIFOO], struct dao, daio);
+	da_dsc.msr = state ? 1 : atc->msr;
+	da_dsc.passthru = state ? 1 : 0;
+	err = dao->ops->reinit(dao, &da_dsc);
+	if (state) {
+		spos = IEC958_DEFAULT_CON;
+	} else {
+		mixer->get_output_ports(mixer, MIX_SPDIF_OUT,
+					&rscs[0], &rscs[1]);
+		dao->ops->set_left_input(dao, rscs[0]);
+		dao->ops->set_right_input(dao, rscs[1]);
+		/* Restore PLL to atc->rsr if needed. */
+		if (atc->pll_rate != atc->rsr)
+			err = atc_pll_init(atc, atc->rsr);
+	}
+	dao->ops->set_spos(dao, spos);
+	dao->ops->commit_write(dao);
+	spin_unlock_irqrestore(&atc->atc_lock, flags);
+
+	return err;
+}
+
+static int ct_atc_destroy(struct ct_atc *atc)
+{
+	struct daio_mgr *daio_mgr;
+	struct dao *dao;
+	struct dai *dai;
+	struct daio *daio;
+	struct sum_mgr *sum_mgr;
+	struct src_mgr *src_mgr;
+	struct srcimp_mgr *srcimp_mgr;
+	struct srcimp *srcimp;
+	struct ct_mixer *mixer;
+	int i = 0;
+
+	if (NULL == atc)
+		return 0;
+
+	if (atc->timer) {
+		ct_timer_free(atc->timer);
+		atc->timer = NULL;
+	}
+
+	/* Stop hardware and disable all interrupts */
+	if (NULL != atc->hw)
+		((struct hw *)atc->hw)->card_stop(atc->hw);
+
+	/* Destroy internal mixer objects */
+	if (NULL != atc->mixer) {
+		mixer = atc->mixer;
+		mixer->set_input_left(mixer, MIX_LINE_IN, NULL);
+		mixer->set_input_right(mixer, MIX_LINE_IN, NULL);
+		mixer->set_input_left(mixer, MIX_MIC_IN, NULL);
+		mixer->set_input_right(mixer, MIX_MIC_IN, NULL);
+		mixer->set_input_left(mixer, MIX_SPDIF_IN, NULL);
+		mixer->set_input_right(mixer, MIX_SPDIF_IN, NULL);
+		ct_mixer_destroy(atc->mixer);
+	}
+
+	if (NULL != atc->daios) {
+		daio_mgr = (struct daio_mgr *)atc->rsc_mgrs[DAIO];
+		for (i = 0; i < atc->n_daio; i++) {
+			daio = atc->daios[i];
+			if (daio->type < LINEIM) {
+				dao = container_of(daio, struct dao, daio);
+				dao->ops->clear_left_input(dao);
+				dao->ops->clear_right_input(dao);
+			} else {
+				dai = container_of(daio, struct dai, daio);
+				/* some thing to do for dai ... */
+			}
+			daio_mgr->put_daio(daio_mgr, daio);
+		}
+		kfree(atc->daios);
+	}
+
+	if (NULL != atc->pcm) {
+		sum_mgr = atc->rsc_mgrs[SUM];
+		for (i = 0; i < atc->n_pcm; i++)
+			sum_mgr->put_sum(sum_mgr, atc->pcm[i]);
+
+		kfree(atc->pcm);
+	}
+
+	if (NULL != atc->srcs) {
+		src_mgr = atc->rsc_mgrs[SRC];
+		for (i = 0; i < atc->n_src; i++)
+			src_mgr->put_src(src_mgr, atc->srcs[i]);
+
+		kfree(atc->srcs);
+	}
+
+	if (NULL != atc->srcimps) {
+		srcimp_mgr = atc->rsc_mgrs[SRCIMP];
+		for (i = 0; i < atc->n_srcimp; i++) {
+			srcimp = atc->srcimps[i];
+			srcimp->ops->unmap(srcimp);
+			srcimp_mgr->put_srcimp(srcimp_mgr, atc->srcimps[i]);
+		}
+		kfree(atc->srcimps);
+	}
+
+	for (i = 0; i < NUM_RSCTYP; i++) {
+		if ((NULL != rsc_mgr_funcs[i].destroy) &&
+		    (NULL != atc->rsc_mgrs[i]))
+			rsc_mgr_funcs[i].destroy(atc->rsc_mgrs[i]);
+
+	}
+
+	if (NULL != atc->hw)
+		destroy_hw_obj((struct hw *)atc->hw);
+
+	/* Destroy device virtual memory manager object */
+	if (NULL != atc->vm) {
+		ct_vm_destroy(atc->vm);
+		atc->vm = NULL;
+	}
+
+	kfree(atc);
+
+	return 0;
+}
+
+static int atc_dev_free(struct snd_device *dev)
+{
+	struct ct_atc *atc = dev->device_data;
+	return ct_atc_destroy(atc);
+}
+
+static int __devinit atc_identify_card(struct ct_atc *atc)
+{
+	const struct snd_pci_quirk *p;
+	const struct snd_pci_quirk *list;
+
+	switch (atc->chip_type) {
+	case ATC20K1:
+		atc->chip_name = "20K1";
+		list = subsys_20k1_list;
+		break;
+	case ATC20K2:
+		atc->chip_name = "20K2";
+		list = subsys_20k2_list;
+		break;
+	default:
+		return -ENOENT;
+	}
+	p = snd_pci_quirk_lookup(atc->pci, list);
+	if (!p)
+		return -ENOENT;
+	atc->model = p->value;
+	atc->model_name = ct_subsys_name[atc->model];
+	snd_printd("ctxfi: chip %s model %s (%04x:%04x) is found\n",
+		   atc->chip_name, atc->model_name,
+		   atc->pci->subsystem_vendor,
+		   atc->pci->subsystem_device);
+	return 0;
+}
+
+int __devinit ct_atc_create_alsa_devs(struct ct_atc *atc)
+{
+	enum CTALSADEVS i;
+	int err;
+
+	alsa_dev_funcs[MIXER].public_name = atc->chip_name;
+
+	for (i = 0; i < NUM_CTALSADEVS; i++) {
+		if (NULL == alsa_dev_funcs[i].create)
+			continue;
+
+		err = alsa_dev_funcs[i].create(atc, i,
+				alsa_dev_funcs[i].public_name);
+		if (err) {
+			printk(KERN_ERR "ctxfi: "
+			       "Creating alsa device %d failed!\n", i);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int __devinit atc_create_hw_devs(struct ct_atc *atc)
+{
+	struct hw *hw;
+	struct card_conf info = {0};
+	int i, err;
+
+	err = create_hw_obj(atc->pci, atc->chip_type, atc->model, &hw);
+	if (err) {
+		printk(KERN_ERR "Failed to create hw obj!!!\n");
+		return err;
+	}
+	atc->hw = hw;
+
+	/* Initialize card hardware. */
+	info.rsr = atc->rsr;
+	info.msr = atc->msr;
+	info.vm_pgt_phys = atc_get_ptp_phys(atc, 0);
+	err = hw->card_init(hw, &info);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < NUM_RSCTYP; i++) {
+		if (NULL == rsc_mgr_funcs[i].create)
+			continue;
+
+		err = rsc_mgr_funcs[i].create(atc->hw, &atc->rsc_mgrs[i]);
+		if (err) {
+			printk(KERN_ERR "ctxfi: "
+			       "Failed to create rsc_mgr %d!!!\n", i);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int __devinit atc_get_resources(struct ct_atc *atc)
+{
+	struct daio_desc da_desc = {0};
+	struct daio_mgr *daio_mgr;
+	struct src_desc src_dsc = {0};
+	struct src_mgr *src_mgr;
+	struct srcimp_desc srcimp_dsc = {0};
+	struct srcimp_mgr *srcimp_mgr;
+	struct sum_desc sum_dsc = {0};
+	struct sum_mgr *sum_mgr;
+	int err, i;
+
+	atc->daios = kzalloc(sizeof(void *)*(DAIONUM), GFP_KERNEL);
+	if (NULL == atc->daios)
+		return -ENOMEM;
+
+	atc->srcs = kzalloc(sizeof(void *)*(2*2), GFP_KERNEL);
+	if (NULL == atc->srcs)
+		return -ENOMEM;
+
+	atc->srcimps = kzalloc(sizeof(void *)*(2*2), GFP_KERNEL);
+	if (NULL == atc->srcimps)
+		return -ENOMEM;
+
+	atc->pcm = kzalloc(sizeof(void *)*(2*4), GFP_KERNEL);
+	if (NULL == atc->pcm)
+		return -ENOMEM;
+
+	daio_mgr = (struct daio_mgr *)atc->rsc_mgrs[DAIO];
+	da_desc.msr = atc->msr;
+	for (i = 0, atc->n_daio = 0; i < DAIONUM-1; i++) {
+		da_desc.type = i;
+		err = daio_mgr->get_daio(daio_mgr, &da_desc,
+					(struct daio **)&atc->daios[i]);
+		if (err) {
+			printk(KERN_ERR "ctxfi: Failed to get DAIO "
+					"resource %d!!!\n", i);
+			return err;
+		}
+		atc->n_daio++;
+	}
+	if (atc->model == CTSB073X)
+		da_desc.type = SPDIFI1;
+	else
+		da_desc.type = SPDIFIO;
+	err = daio_mgr->get_daio(daio_mgr, &da_desc,
+				(struct daio **)&atc->daios[i]);
+	if (err) {
+		printk(KERN_ERR "ctxfi: Failed to get S/PDIF-in resource!!!\n");
+		return err;
+	}
+	atc->n_daio++;
+
+	src_mgr = atc->rsc_mgrs[SRC];
+	src_dsc.multi = 1;
+	src_dsc.msr = atc->msr;
+	src_dsc.mode = ARCRW;
+	for (i = 0, atc->n_src = 0; i < (2*2); i++) {
+		err = src_mgr->get_src(src_mgr, &src_dsc,
+					(struct src **)&atc->srcs[i]);
+		if (err)
+			return err;
+
+		atc->n_src++;
+	}
+
+	srcimp_mgr = atc->rsc_mgrs[SRCIMP];
+	srcimp_dsc.msr = 8; /* SRCIMPs for S/PDIFIn SRT */
+	for (i = 0, atc->n_srcimp = 0; i < (2*1); i++) {
+		err = srcimp_mgr->get_srcimp(srcimp_mgr, &srcimp_dsc,
+					(struct srcimp **)&atc->srcimps[i]);
+		if (err)
+			return err;
+
+		atc->n_srcimp++;
+	}
+	srcimp_dsc.msr = 8; /* SRCIMPs for LINE/MICIn SRT */
+	for (i = 0; i < (2*1); i++) {
+		err = srcimp_mgr->get_srcimp(srcimp_mgr, &srcimp_dsc,
+				(struct srcimp **)&atc->srcimps[2*1+i]);
+		if (err)
+			return err;
+
+		atc->n_srcimp++;
+	}
+
+	sum_mgr = atc->rsc_mgrs[SUM];
+	sum_dsc.msr = atc->msr;
+	for (i = 0, atc->n_pcm = 0; i < (2*4); i++) {
+		err = sum_mgr->get_sum(sum_mgr, &sum_dsc,
+					(struct sum **)&atc->pcm[i]);
+		if (err)
+			return err;
+
+		atc->n_pcm++;
+	}
+
+	err = ct_mixer_create(atc, (struct ct_mixer **)&atc->mixer);
+	if (err) {
+		printk(KERN_ERR "ctxfi: Failed to create mixer obj!!!\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static void __devinit
+atc_connect_dai(struct src_mgr *src_mgr, struct dai *dai,
+		struct src **srcs, struct srcimp **srcimps)
+{
+	struct rsc *rscs[2] = {NULL};
+	struct src *src;
+	struct srcimp *srcimp;
+	int i = 0;
+
+	rscs[0] = &dai->daio.rscl;
+	rscs[1] = &dai->daio.rscr;
+	for (i = 0; i < 2; i++) {
+		src = srcs[i];
+		srcimp = srcimps[i];
+		srcimp->ops->map(srcimp, src, rscs[i]);
+		src_mgr->src_disable(src_mgr, src);
+	}
+
+	src_mgr->commit_write(src_mgr); /* Actually disable SRCs */
+
+	src = srcs[0];
+	src->ops->set_pm(src, 1);
+	for (i = 0; i < 2; i++) {
+		src = srcs[i];
+		src->ops->set_state(src, SRC_STATE_RUN);
+		src->ops->commit_write(src);
+		src_mgr->src_enable_s(src_mgr, src);
+	}
+
+	dai->ops->set_srt_srcl(dai, &(srcs[0]->rsc));
+	dai->ops->set_srt_srcr(dai, &(srcs[1]->rsc));
+
+	dai->ops->set_enb_src(dai, 1);
+	dai->ops->set_enb_srt(dai, 1);
+	dai->ops->commit_write(dai);
+
+	src_mgr->commit_write(src_mgr); /* Synchronously enable SRCs */
+}
+
+static void __devinit atc_connect_resources(struct ct_atc *atc)
+{
+	struct dai *dai;
+	struct dao *dao;
+	struct src *src;
+	struct sum *sum;
+	struct ct_mixer *mixer;
+	struct rsc *rscs[2] = {NULL};
+	int i, j;
+
+	mixer = atc->mixer;
+
+	for (i = MIX_WAVE_FRONT, j = LINEO1; i <= MIX_SPDIF_OUT; i++, j++) {
+		mixer->get_output_ports(mixer, i, &rscs[0], &rscs[1]);
+		dao = container_of(atc->daios[j], struct dao, daio);
+		dao->ops->set_left_input(dao, rscs[0]);
+		dao->ops->set_right_input(dao, rscs[1]);
+	}
+
+	dai = container_of(atc->daios[LINEIM], struct dai, daio);
+	atc_connect_dai(atc->rsc_mgrs[SRC], dai,
+			(struct src **)&atc->srcs[2],
+			(struct srcimp **)&atc->srcimps[2]);
+	src = atc->srcs[2];
+	mixer->set_input_left(mixer, MIX_LINE_IN, &src->rsc);
+	src = atc->srcs[3];
+	mixer->set_input_right(mixer, MIX_LINE_IN, &src->rsc);
+
+	dai = container_of(atc->daios[SPDIFIO], struct dai, daio);
+	atc_connect_dai(atc->rsc_mgrs[SRC], dai,
+			(struct src **)&atc->srcs[0],
+			(struct srcimp **)&atc->srcimps[0]);
+
+	src = atc->srcs[0];
+	mixer->set_input_left(mixer, MIX_SPDIF_IN, &src->rsc);
+	src = atc->srcs[1];
+	mixer->set_input_right(mixer, MIX_SPDIF_IN, &src->rsc);
+
+	for (i = MIX_PCMI_FRONT, j = 0; i <= MIX_PCMI_SURROUND; i++, j += 2) {
+		sum = atc->pcm[j];
+		mixer->set_input_left(mixer, i, &sum->rsc);
+		sum = atc->pcm[j+1];
+		mixer->set_input_right(mixer, i, &sum->rsc);
+	}
+}
+
+static struct ct_atc atc_preset __devinitdata = {
+	.map_audio_buffer = ct_map_audio_buffer,
+	.unmap_audio_buffer = ct_unmap_audio_buffer,
+	.pcm_playback_prepare = atc_pcm_playback_prepare,
+	.pcm_release_resources = atc_pcm_release_resources,
+	.pcm_playback_start = atc_pcm_playback_start,
+	.pcm_playback_stop = atc_pcm_stop,
+	.pcm_playback_position = atc_pcm_playback_position,
+	.pcm_capture_prepare = atc_pcm_capture_prepare,
+	.pcm_capture_start = atc_pcm_capture_start,
+	.pcm_capture_stop = atc_pcm_stop,
+	.pcm_capture_position = atc_pcm_capture_position,
+	.spdif_passthru_playback_prepare = spdif_passthru_playback_prepare,
+	.get_ptp_phys = atc_get_ptp_phys,
+	.select_line_in = atc_select_line_in,
+	.select_mic_in = atc_select_mic_in,
+	.select_digit_io = atc_select_digit_io,
+	.line_front_unmute = atc_line_front_unmute,
+	.line_surround_unmute = atc_line_surround_unmute,
+	.line_clfe_unmute = atc_line_clfe_unmute,
+	.line_rear_unmute = atc_line_rear_unmute,
+	.line_in_unmute = atc_line_in_unmute,
+	.spdif_out_unmute = atc_spdif_out_unmute,
+	.spdif_in_unmute = atc_spdif_in_unmute,
+	.spdif_out_get_status = atc_spdif_out_get_status,
+	.spdif_out_set_status = atc_spdif_out_set_status,
+	.spdif_out_passthru = atc_spdif_out_passthru,
+	.have_digit_io_switch = atc_have_digit_io_switch,
+};
+
+/**
+ *  ct_atc_create - create and initialize a hardware manager
+ *  @card: corresponding alsa card object
+ *  @pci: corresponding kernel pci device object
+ *  @ratc: return created object address in it
+ *
+ *  Creates and initializes a hardware manager.
+ *
+ *  Creates kmallocated ct_atc structure. Initializes hardware.
+ *  Returns 0 if suceeds, or negative error code if fails.
+ */
+
+int __devinit ct_atc_create(struct snd_card *card, struct pci_dev *pci,
+			    unsigned int rsr, unsigned int msr,
+			    int chip_type, struct ct_atc **ratc)
+{
+	struct ct_atc *atc;
+	static struct snd_device_ops ops = {
+		.dev_free = atc_dev_free,
+	};
+	int err;
+
+	*ratc = NULL;
+
+	atc = kzalloc(sizeof(*atc), GFP_KERNEL);
+	if (NULL == atc)
+		return -ENOMEM;
+
+	/* Set operations */
+	*atc = atc_preset;
+
+	atc->card = card;
+	atc->pci = pci;
+	atc->rsr = rsr;
+	atc->msr = msr;
+	atc->chip_type = chip_type;
+
+	spin_lock_init(&atc->atc_lock);
+
+	/* Find card model */
+	err = atc_identify_card(atc);
+	if (err < 0) {
+		printk(KERN_ERR "ctatc: Card not recognised\n");
+		goto error1;
+	}
+
+	/* Set up device virtual memory management object */
+	err = ct_vm_create(&atc->vm);
+	if (err < 0)
+		goto error1;
+
+	/* Create all atc hw devices */
+	err = atc_create_hw_devs(atc);
+	if (err < 0)
+		goto error1;
+
+	/* Get resources */
+	err = atc_get_resources(atc);
+	if (err < 0)
+		goto error1;
+
+	/* Build topology */
+	atc_connect_resources(atc);
+
+	atc->timer = ct_timer_new(atc);
+	if (!atc->timer)
+		goto error1;
+
+	err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, atc, &ops);
+	if (err < 0)
+		goto error1;
+
+	snd_card_set_dev(card, &pci->dev);
+
+	*ratc = atc;
+	return 0;
+
+error1:
+	ct_atc_destroy(atc);
+	printk(KERN_ERR "ctxfi: Something wrong!!!\n");
+	return err;
+}
diff --git a/sound/pci/ctxfi/ctatc.h b/sound/pci/ctxfi/ctatc.h
new file mode 100644
index 0000000..a033472
--- /dev/null
+++ b/sound/pci/ctxfi/ctatc.h
@@ -0,0 +1,147 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctatc.h
+ *
+ * @Brief
+ * This file contains the definition of the device resource management object.
+ *
+ * @Author	Liu Chun
+ * @Date 	Mar 28 2008
+ *
+ */
+
+#ifndef CTATC_H
+#define CTATC_H
+
+#include <linux/types.h>
+#include <linux/spinlock_types.h>
+#include <linux/pci.h>
+#include <linux/timer.h>
+#include <sound/core.h>
+
+#include "ctvmem.h"
+#include "ctresource.h"
+
+enum CTALSADEVS {		/* Types of alsa devices */
+	FRONT,
+	SURROUND,
+	CLFE,
+	SIDE,
+	IEC958,
+	MIXER,
+	NUM_CTALSADEVS		/* This should always be the last */
+};
+
+struct ct_atc_chip_sub_details {
+	u16 subsys;
+	const char *nm_model;
+};
+
+struct ct_atc_chip_details {
+	u16 vendor;
+	u16 device;
+	const struct ct_atc_chip_sub_details *sub_details;
+	const char *nm_card;
+};
+
+struct ct_atc;
+struct ct_timer;
+struct ct_timer_instance;
+
+/* alsa pcm stream descriptor */
+struct ct_atc_pcm {
+	struct snd_pcm_substream *substream;
+	void (*interrupt)(struct ct_atc_pcm *apcm);
+	struct ct_timer_instance *timer;
+	unsigned int started:1;
+
+	/* Only mono and interleaved modes are supported now. */
+	struct ct_vm_block *vm_block;
+	void *src;		/* SRC for interacting with host memory */
+	void **srccs;		/* SRCs for sample rate conversion */
+	void **srcimps;		/* SRC Input Mappers */
+	void **amixers;		/* AMIXERs for routing converted data */
+	void *mono;		/* A SUM resource for mixing chs to one */
+	unsigned char n_srcc;	/* Number of converting SRCs */
+	unsigned char n_srcimp;	/* Number of SRC Input Mappers */
+	unsigned char n_amixer;	/* Number of AMIXERs */
+};
+
+/* Chip resource management object */
+struct ct_atc {
+	struct pci_dev *pci;
+	struct snd_card *card;
+	unsigned int rsr; /* reference sample rate in Hz */
+	unsigned int msr; /* master sample rate in rsr */
+	unsigned int pll_rate; /* current rate of Phase Lock Loop */
+
+	int chip_type;
+	int model;
+	const char *chip_name;
+	const char *model_name;
+
+	struct ct_vm *vm; /* device virtual memory manager for this card */
+	int (*map_audio_buffer)(struct ct_atc *atc, struct ct_atc_pcm *apcm);
+	void (*unmap_audio_buffer)(struct ct_atc *atc, struct ct_atc_pcm *apcm);
+	unsigned long (*get_ptp_phys)(struct ct_atc *atc, int index);
+
+	spinlock_t atc_lock;
+
+	int (*pcm_playback_prepare)(struct ct_atc *atc,
+				    struct ct_atc_pcm *apcm);
+	int (*pcm_playback_start)(struct ct_atc *atc, struct ct_atc_pcm *apcm);
+	int (*pcm_playback_stop)(struct ct_atc *atc, struct ct_atc_pcm *apcm);
+	int (*pcm_playback_position)(struct ct_atc *atc,
+				     struct ct_atc_pcm *apcm);
+	int (*spdif_passthru_playback_prepare)(struct ct_atc *atc,
+					       struct ct_atc_pcm *apcm);
+	int (*pcm_capture_prepare)(struct ct_atc *atc, struct ct_atc_pcm *apcm);
+	int (*pcm_capture_start)(struct ct_atc *atc, struct ct_atc_pcm *apcm);
+	int (*pcm_capture_stop)(struct ct_atc *atc, struct ct_atc_pcm *apcm);
+	int (*pcm_capture_position)(struct ct_atc *atc,
+				    struct ct_atc_pcm *apcm);
+	int (*pcm_release_resources)(struct ct_atc *atc,
+				     struct ct_atc_pcm *apcm);
+	int (*select_line_in)(struct ct_atc *atc);
+	int (*select_mic_in)(struct ct_atc *atc);
+	int (*select_digit_io)(struct ct_atc *atc);
+	int (*line_front_unmute)(struct ct_atc *atc, unsigned char state);
+	int (*line_surround_unmute)(struct ct_atc *atc, unsigned char state);
+	int (*line_clfe_unmute)(struct ct_atc *atc, unsigned char state);
+	int (*line_rear_unmute)(struct ct_atc *atc, unsigned char state);
+	int (*line_in_unmute)(struct ct_atc *atc, unsigned char state);
+	int (*spdif_out_unmute)(struct ct_atc *atc, unsigned char state);
+	int (*spdif_in_unmute)(struct ct_atc *atc, unsigned char state);
+	int (*spdif_out_get_status)(struct ct_atc *atc, unsigned int *status);
+	int (*spdif_out_set_status)(struct ct_atc *atc, unsigned int status);
+	int (*spdif_out_passthru)(struct ct_atc *atc, unsigned char state);
+	int (*have_digit_io_switch)(struct ct_atc *atc);
+
+	/* Don't touch! Used for internal object. */
+	void *rsc_mgrs[NUM_RSCTYP]; /* chip resource managers */
+	void *mixer;		/* internal mixer object */
+	void *hw;		/* chip specific hardware access object */
+	void **daios;		/* digital audio io resources */
+	void **pcm;		/* SUMs for collecting all pcm stream */
+	void **srcs;		/* Sample Rate Converters for input signal */
+	void **srcimps;		/* input mappers for SRCs */
+	unsigned char n_daio;
+	unsigned char n_src;
+	unsigned char n_srcimp;
+	unsigned char n_pcm;
+
+	struct ct_timer *timer;
+};
+
+
+int __devinit ct_atc_create(struct snd_card *card, struct pci_dev *pci,
+			    unsigned int rsr, unsigned int msr, int chip_type,
+			    struct ct_atc **ratc);
+int __devinit ct_atc_create_alsa_devs(struct ct_atc *atc);
+
+#endif /* CTATC_H */
diff --git a/sound/pci/ctxfi/ctdaio.c b/sound/pci/ctxfi/ctdaio.c
new file mode 100644
index 0000000..082e35c
--- /dev/null
+++ b/sound/pci/ctxfi/ctdaio.c
@@ -0,0 +1,769 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctdaio.c
+ *
+ * @Brief
+ * This file contains the implementation of Digital Audio Input Output
+ * resource management object.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 23 2008
+ *
+ */
+
+#include "ctdaio.h"
+#include "cthardware.h"
+#include "ctimap.h"
+#include <linux/slab.h>
+#include <linux/kernel.h>
+
+#define DAIO_RESOURCE_NUM	NUM_DAIOTYP
+#define DAIO_OUT_MAX		SPDIFOO
+
+union daio_usage {
+	struct {
+		unsigned short lineo1:1;
+		unsigned short lineo2:1;
+		unsigned short lineo3:1;
+		unsigned short lineo4:1;
+		unsigned short spdifoo:1;
+		unsigned short lineim:1;
+		unsigned short spdifio:1;
+		unsigned short spdifi1:1;
+	} bf;
+	unsigned short data;
+};
+
+struct daio_rsc_idx {
+	unsigned short left;
+	unsigned short right;
+};
+
+struct daio_rsc_idx idx_20k1[NUM_DAIOTYP] = {
+	[LINEO1] = {.left = 0x00, .right = 0x01},
+	[LINEO2] = {.left = 0x18, .right = 0x19},
+	[LINEO3] = {.left = 0x08, .right = 0x09},
+	[LINEO4] = {.left = 0x10, .right = 0x11},
+	[LINEIM] = {.left = 0x1b5, .right = 0x1bd},
+	[SPDIFOO] = {.left = 0x20, .right = 0x21},
+	[SPDIFIO] = {.left = 0x15, .right = 0x1d},
+	[SPDIFI1] = {.left = 0x95, .right = 0x9d},
+};
+
+struct daio_rsc_idx idx_20k2[NUM_DAIOTYP] = {
+	[LINEO1] = {.left = 0x40, .right = 0x41},
+	[LINEO2] = {.left = 0x70, .right = 0x71},
+	[LINEO3] = {.left = 0x50, .right = 0x51},
+	[LINEO4] = {.left = 0x60, .right = 0x61},
+	[LINEIM] = {.left = 0x45, .right = 0xc5},
+	[SPDIFOO] = {.left = 0x00, .right = 0x01},
+	[SPDIFIO] = {.left = 0x05, .right = 0x85},
+};
+
+static int daio_master(struct rsc *rsc)
+{
+	/* Actually, this is not the resource index of DAIO.
+	 * For DAO, it is the input mapper index. And, for DAI,
+	 * it is the output time-slot index. */
+	return rsc->conj = rsc->idx;
+}
+
+static int daio_index(const struct rsc *rsc)
+{
+	return rsc->conj;
+}
+
+static int daio_out_next_conj(struct rsc *rsc)
+{
+	return rsc->conj += 2;
+}
+
+static int daio_in_next_conj_20k1(struct rsc *rsc)
+{
+	return rsc->conj += 0x200;
+}
+
+static int daio_in_next_conj_20k2(struct rsc *rsc)
+{
+	return rsc->conj += 0x100;
+}
+
+static struct rsc_ops daio_out_rsc_ops = {
+	.master		= daio_master,
+	.next_conj	= daio_out_next_conj,
+	.index		= daio_index,
+	.output_slot	= NULL,
+};
+
+static struct rsc_ops daio_in_rsc_ops_20k1 = {
+	.master		= daio_master,
+	.next_conj	= daio_in_next_conj_20k1,
+	.index		= NULL,
+	.output_slot	= daio_index,
+};
+
+static struct rsc_ops daio_in_rsc_ops_20k2 = {
+	.master		= daio_master,
+	.next_conj	= daio_in_next_conj_20k2,
+	.index		= NULL,
+	.output_slot	= daio_index,
+};
+
+static unsigned int daio_device_index(enum DAIOTYP type, struct hw *hw)
+{
+	switch (hw->chip_type) {
+	case ATC20K1:
+		switch (type) {
+		case SPDIFOO:	return 0;
+		case SPDIFIO:	return 0;
+		case SPDIFI1:	return 1;
+		case LINEO1:	return 4;
+		case LINEO2:	return 7;
+		case LINEO3:	return 5;
+		case LINEO4:	return 6;
+		case LINEIM:	return 7;
+		default:	return -EINVAL;
+		}
+	case ATC20K2:
+		switch (type) {
+		case SPDIFOO:	return 0;
+		case SPDIFIO:	return 0;
+		case LINEO1:	return 4;
+		case LINEO2:	return 7;
+		case LINEO3:	return 5;
+		case LINEO4:	return 6;
+		case LINEIM:	return 4;
+		default:	return -EINVAL;
+		}
+	default:
+		return -EINVAL;
+	}
+}
+
+static int dao_rsc_reinit(struct dao *dao, const struct dao_desc *desc);
+
+static int dao_spdif_get_spos(struct dao *dao, unsigned int *spos)
+{
+	((struct hw *)dao->hw)->dao_get_spos(dao->ctrl_blk, spos);
+	return 0;
+}
+
+static int dao_spdif_set_spos(struct dao *dao, unsigned int spos)
+{
+	((struct hw *)dao->hw)->dao_set_spos(dao->ctrl_blk, spos);
+	return 0;
+}
+
+static int dao_commit_write(struct dao *dao)
+{
+	((struct hw *)dao->hw)->dao_commit_write(dao->hw,
+		daio_device_index(dao->daio.type, dao->hw), dao->ctrl_blk);
+	return 0;
+}
+
+static int dao_set_left_input(struct dao *dao, struct rsc *input)
+{
+	struct imapper *entry;
+	struct daio *daio = &dao->daio;
+	int i;
+
+	entry = kzalloc((sizeof(*entry) * daio->rscl.msr), GFP_KERNEL);
+	if (NULL == entry)
+		return -ENOMEM;
+
+	/* Program master and conjugate resources */
+	input->ops->master(input);
+	daio->rscl.ops->master(&daio->rscl);
+	for (i = 0; i < daio->rscl.msr; i++, entry++) {
+		entry->slot = input->ops->output_slot(input);
+		entry->user = entry->addr = daio->rscl.ops->index(&daio->rscl);
+		dao->mgr->imap_add(dao->mgr, entry);
+		dao->imappers[i] = entry;
+
+		input->ops->next_conj(input);
+		daio->rscl.ops->next_conj(&daio->rscl);
+	}
+	input->ops->master(input);
+	daio->rscl.ops->master(&daio->rscl);
+
+	return 0;
+}
+
+static int dao_set_right_input(struct dao *dao, struct rsc *input)
+{
+	struct imapper *entry;
+	struct daio *daio = &dao->daio;
+	int i;
+
+	entry = kzalloc((sizeof(*entry) * daio->rscr.msr), GFP_KERNEL);
+	if (NULL == entry)
+		return -ENOMEM;
+
+	/* Program master and conjugate resources */
+	input->ops->master(input);
+	daio->rscr.ops->master(&daio->rscr);
+	for (i = 0; i < daio->rscr.msr; i++, entry++) {
+		entry->slot = input->ops->output_slot(input);
+		entry->user = entry->addr = daio->rscr.ops->index(&daio->rscr);
+		dao->mgr->imap_add(dao->mgr, entry);
+		dao->imappers[daio->rscl.msr + i] = entry;
+
+		input->ops->next_conj(input);
+		daio->rscr.ops->next_conj(&daio->rscr);
+	}
+	input->ops->master(input);
+	daio->rscr.ops->master(&daio->rscr);
+
+	return 0;
+}
+
+static int dao_clear_left_input(struct dao *dao)
+{
+	struct imapper *entry;
+	struct daio *daio = &dao->daio;
+	int i;
+
+	if (NULL == dao->imappers[0])
+		return 0;
+
+	entry = dao->imappers[0];
+	dao->mgr->imap_delete(dao->mgr, entry);
+	/* Program conjugate resources */
+	for (i = 1; i < daio->rscl.msr; i++) {
+		entry = dao->imappers[i];
+		dao->mgr->imap_delete(dao->mgr, entry);
+		dao->imappers[i] = NULL;
+	}
+
+	kfree(dao->imappers[0]);
+	dao->imappers[0] = NULL;
+
+	return 0;
+}
+
+static int dao_clear_right_input(struct dao *dao)
+{
+	struct imapper *entry;
+	struct daio *daio = &dao->daio;
+	int i;
+
+	if (NULL == dao->imappers[daio->rscl.msr])
+		return 0;
+
+	entry = dao->imappers[daio->rscl.msr];
+	dao->mgr->imap_delete(dao->mgr, entry);
+	/* Program conjugate resources */
+	for (i = 1; i < daio->rscr.msr; i++) {
+		entry = dao->imappers[daio->rscl.msr + i];
+		dao->mgr->imap_delete(dao->mgr, entry);
+		dao->imappers[daio->rscl.msr + i] = NULL;
+	}
+
+	kfree(dao->imappers[daio->rscl.msr]);
+	dao->imappers[daio->rscl.msr] = NULL;
+
+	return 0;
+}
+
+static struct dao_rsc_ops dao_ops = {
+	.set_spos		= dao_spdif_set_spos,
+	.commit_write		= dao_commit_write,
+	.get_spos		= dao_spdif_get_spos,
+	.reinit			= dao_rsc_reinit,
+	.set_left_input		= dao_set_left_input,
+	.set_right_input	= dao_set_right_input,
+	.clear_left_input	= dao_clear_left_input,
+	.clear_right_input	= dao_clear_right_input,
+};
+
+static int dai_set_srt_srcl(struct dai *dai, struct rsc *src)
+{
+	src->ops->master(src);
+	((struct hw *)dai->hw)->dai_srt_set_srcm(dai->ctrl_blk,
+						src->ops->index(src));
+	return 0;
+}
+
+static int dai_set_srt_srcr(struct dai *dai, struct rsc *src)
+{
+	src->ops->master(src);
+	((struct hw *)dai->hw)->dai_srt_set_srco(dai->ctrl_blk,
+						src->ops->index(src));
+	return 0;
+}
+
+static int dai_set_srt_msr(struct dai *dai, unsigned int msr)
+{
+	unsigned int rsr;
+
+	for (rsr = 0; msr > 1; msr >>= 1)
+		rsr++;
+
+	((struct hw *)dai->hw)->dai_srt_set_rsr(dai->ctrl_blk, rsr);
+	return 0;
+}
+
+static int dai_set_enb_src(struct dai *dai, unsigned int enb)
+{
+	((struct hw *)dai->hw)->dai_srt_set_ec(dai->ctrl_blk, enb);
+	return 0;
+}
+
+static int dai_set_enb_srt(struct dai *dai, unsigned int enb)
+{
+	((struct hw *)dai->hw)->dai_srt_set_et(dai->ctrl_blk, enb);
+	return 0;
+}
+
+static int dai_commit_write(struct dai *dai)
+{
+	((struct hw *)dai->hw)->dai_commit_write(dai->hw,
+		daio_device_index(dai->daio.type, dai->hw), dai->ctrl_blk);
+	return 0;
+}
+
+static struct dai_rsc_ops dai_ops = {
+	.set_srt_srcl		= dai_set_srt_srcl,
+	.set_srt_srcr		= dai_set_srt_srcr,
+	.set_srt_msr		= dai_set_srt_msr,
+	.set_enb_src		= dai_set_enb_src,
+	.set_enb_srt		= dai_set_enb_srt,
+	.commit_write		= dai_commit_write,
+};
+
+static int daio_rsc_init(struct daio *daio,
+			 const struct daio_desc *desc,
+			 void *hw)
+{
+	int err;
+	unsigned int idx_l, idx_r;
+
+	switch (((struct hw *)hw)->chip_type) {
+	case ATC20K1:
+		idx_l = idx_20k1[desc->type].left;
+		idx_r = idx_20k1[desc->type].right;
+		break;
+	case ATC20K2:
+		idx_l = idx_20k2[desc->type].left;
+		idx_r = idx_20k2[desc->type].right;
+		break;
+	default:
+		return -EINVAL;
+	}
+	err = rsc_init(&daio->rscl, idx_l, DAIO, desc->msr, hw);
+	if (err)
+		return err;
+
+	err = rsc_init(&daio->rscr, idx_r, DAIO, desc->msr, hw);
+	if (err)
+		goto error1;
+
+	/* Set daio->rscl/r->ops to daio specific ones */
+	if (desc->type <= DAIO_OUT_MAX) {
+		daio->rscl.ops = daio->rscr.ops = &daio_out_rsc_ops;
+	} else {
+		switch (((struct hw *)hw)->chip_type) {
+		case ATC20K1:
+			daio->rscl.ops = daio->rscr.ops = &daio_in_rsc_ops_20k1;
+			break;
+		case ATC20K2:
+			daio->rscl.ops = daio->rscr.ops = &daio_in_rsc_ops_20k2;
+			break;
+		default:
+			break;
+		}
+	}
+	daio->type = desc->type;
+
+	return 0;
+
+error1:
+	rsc_uninit(&daio->rscl);
+	return err;
+}
+
+static int daio_rsc_uninit(struct daio *daio)
+{
+	rsc_uninit(&daio->rscl);
+	rsc_uninit(&daio->rscr);
+
+	return 0;
+}
+
+static int dao_rsc_init(struct dao *dao,
+			const struct daio_desc *desc,
+			struct daio_mgr *mgr)
+{
+	struct hw *hw = mgr->mgr.hw;
+	unsigned int conf;
+	int err;
+
+	err = daio_rsc_init(&dao->daio, desc, mgr->mgr.hw);
+	if (err)
+		return err;
+
+	dao->imappers = kzalloc(sizeof(void *)*desc->msr*2, GFP_KERNEL);
+	if (NULL == dao->imappers) {
+		err = -ENOMEM;
+		goto error1;
+	}
+	dao->ops = &dao_ops;
+	dao->mgr = mgr;
+	dao->hw = hw;
+	err = hw->dao_get_ctrl_blk(&dao->ctrl_blk);
+	if (err)
+		goto error2;
+
+	hw->daio_mgr_dsb_dao(mgr->mgr.ctrl_blk,
+			daio_device_index(dao->daio.type, hw));
+	hw->daio_mgr_commit_write(hw, mgr->mgr.ctrl_blk);
+
+	conf = (desc->msr & 0x7) | (desc->passthru << 3);
+	hw->daio_mgr_dao_init(mgr->mgr.ctrl_blk,
+			daio_device_index(dao->daio.type, hw), conf);
+	hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk,
+			daio_device_index(dao->daio.type, hw));
+	hw->daio_mgr_commit_write(hw, mgr->mgr.ctrl_blk);
+
+	return 0;
+
+error2:
+	kfree(dao->imappers);
+	dao->imappers = NULL;
+error1:
+	daio_rsc_uninit(&dao->daio);
+	return err;
+}
+
+static int dao_rsc_uninit(struct dao *dao)
+{
+	if (NULL != dao->imappers) {
+		if (NULL != dao->imappers[0])
+			dao_clear_left_input(dao);
+
+		if (NULL != dao->imappers[dao->daio.rscl.msr])
+			dao_clear_right_input(dao);
+
+		kfree(dao->imappers);
+		dao->imappers = NULL;
+	}
+	((struct hw *)dao->hw)->dao_put_ctrl_blk(dao->ctrl_blk);
+	dao->hw = dao->ctrl_blk = NULL;
+	daio_rsc_uninit(&dao->daio);
+
+	return 0;
+}
+
+static int dao_rsc_reinit(struct dao *dao, const struct dao_desc *desc)
+{
+	struct daio_mgr *mgr = dao->mgr;
+	struct daio_desc dsc = {0};
+
+	dsc.type = dao->daio.type;
+	dsc.msr = desc->msr;
+	dsc.passthru = desc->passthru;
+	dao_rsc_uninit(dao);
+	return dao_rsc_init(dao, &dsc, mgr);
+}
+
+static int dai_rsc_init(struct dai *dai,
+			const struct daio_desc *desc,
+			struct daio_mgr *mgr)
+{
+	int err;
+	struct hw *hw = mgr->mgr.hw;
+	unsigned int rsr, msr;
+
+	err = daio_rsc_init(&dai->daio, desc, mgr->mgr.hw);
+	if (err)
+		return err;
+
+	dai->ops = &dai_ops;
+	dai->hw = mgr->mgr.hw;
+	err = hw->dai_get_ctrl_blk(&dai->ctrl_blk);
+	if (err)
+		goto error1;
+
+	for (rsr = 0, msr = desc->msr; msr > 1; msr >>= 1)
+		rsr++;
+
+	hw->dai_srt_set_rsr(dai->ctrl_blk, rsr);
+	hw->dai_srt_set_drat(dai->ctrl_blk, 0);
+	/* default to disabling control of a SRC */
+	hw->dai_srt_set_ec(dai->ctrl_blk, 0);
+	hw->dai_srt_set_et(dai->ctrl_blk, 0); /* default to disabling SRT */
+	hw->dai_commit_write(hw,
+		daio_device_index(dai->daio.type, dai->hw), dai->ctrl_blk);
+
+	return 0;
+
+error1:
+	daio_rsc_uninit(&dai->daio);
+	return err;
+}
+
+static int dai_rsc_uninit(struct dai *dai)
+{
+	((struct hw *)dai->hw)->dai_put_ctrl_blk(dai->ctrl_blk);
+	dai->hw = dai->ctrl_blk = NULL;
+	daio_rsc_uninit(&dai->daio);
+	return 0;
+}
+
+static int daio_mgr_get_rsc(struct rsc_mgr *mgr, enum DAIOTYP type)
+{
+	if (((union daio_usage *)mgr->rscs)->data & (0x1 << type))
+		return -ENOENT;
+
+	((union daio_usage *)mgr->rscs)->data |= (0x1 << type);
+
+	return 0;
+}
+
+static int daio_mgr_put_rsc(struct rsc_mgr *mgr, enum DAIOTYP type)
+{
+	((union daio_usage *)mgr->rscs)->data &= ~(0x1 << type);
+
+	return 0;
+}
+
+static int get_daio_rsc(struct daio_mgr *mgr,
+			const struct daio_desc *desc,
+			struct daio **rdaio)
+{
+	int err;
+	struct dai *dai = NULL;
+	struct dao *dao = NULL;
+	unsigned long flags;
+
+	*rdaio = NULL;
+
+	/* Check whether there are sufficient daio resources to meet request. */
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	err = daio_mgr_get_rsc(&mgr->mgr, desc->type);
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	if (err) {
+		printk(KERN_ERR "Can't meet DAIO resource request!\n");
+		return err;
+	}
+
+	/* Allocate mem for daio resource */
+	if (desc->type <= DAIO_OUT_MAX) {
+		dao = kzalloc(sizeof(*dao), GFP_KERNEL);
+		if (NULL == dao) {
+			err = -ENOMEM;
+			goto error;
+		}
+		err = dao_rsc_init(dao, desc, mgr);
+		if (err)
+			goto error;
+
+		*rdaio = &dao->daio;
+	} else {
+		dai = kzalloc(sizeof(*dai), GFP_KERNEL);
+		if (NULL == dai) {
+			err = -ENOMEM;
+			goto error;
+		}
+		err = dai_rsc_init(dai, desc, mgr);
+		if (err)
+			goto error;
+
+		*rdaio = &dai->daio;
+	}
+
+	mgr->daio_enable(mgr, *rdaio);
+	mgr->commit_write(mgr);
+
+	return 0;
+
+error:
+	if (NULL != dao)
+		kfree(dao);
+	else if (NULL != dai)
+		kfree(dai);
+
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	daio_mgr_put_rsc(&mgr->mgr, desc->type);
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	return err;
+}
+
+static int put_daio_rsc(struct daio_mgr *mgr, struct daio *daio)
+{
+	unsigned long flags;
+
+	mgr->daio_disable(mgr, daio);
+	mgr->commit_write(mgr);
+
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	daio_mgr_put_rsc(&mgr->mgr, daio->type);
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+
+	if (daio->type <= DAIO_OUT_MAX) {
+		dao_rsc_uninit(container_of(daio, struct dao, daio));
+		kfree(container_of(daio, struct dao, daio));
+	} else {
+		dai_rsc_uninit(container_of(daio, struct dai, daio));
+		kfree(container_of(daio, struct dai, daio));
+	}
+
+	return 0;
+}
+
+static int daio_mgr_enb_daio(struct daio_mgr *mgr, struct daio *daio)
+{
+	struct hw *hw = mgr->mgr.hw;
+
+	if (DAIO_OUT_MAX >= daio->type) {
+		hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk,
+				daio_device_index(daio->type, hw));
+	} else {
+		hw->daio_mgr_enb_dai(mgr->mgr.ctrl_blk,
+				daio_device_index(daio->type, hw));
+	}
+	return 0;
+}
+
+static int daio_mgr_dsb_daio(struct daio_mgr *mgr, struct daio *daio)
+{
+	struct hw *hw = mgr->mgr.hw;
+
+	if (DAIO_OUT_MAX >= daio->type) {
+		hw->daio_mgr_dsb_dao(mgr->mgr.ctrl_blk,
+				daio_device_index(daio->type, hw));
+	} else {
+		hw->daio_mgr_dsb_dai(mgr->mgr.ctrl_blk,
+				daio_device_index(daio->type, hw));
+	}
+	return 0;
+}
+
+static int daio_map_op(void *data, struct imapper *entry)
+{
+	struct rsc_mgr *mgr = &((struct daio_mgr *)data)->mgr;
+	struct hw *hw = mgr->hw;
+
+	hw->daio_mgr_set_imaparc(mgr->ctrl_blk, entry->slot);
+	hw->daio_mgr_set_imapnxt(mgr->ctrl_blk, entry->next);
+	hw->daio_mgr_set_imapaddr(mgr->ctrl_blk, entry->addr);
+	hw->daio_mgr_commit_write(mgr->hw, mgr->ctrl_blk);
+
+	return 0;
+}
+
+static int daio_imap_add(struct daio_mgr *mgr, struct imapper *entry)
+{
+	unsigned long flags;
+	int err;
+
+	spin_lock_irqsave(&mgr->imap_lock, flags);
+	if ((0 == entry->addr) && (mgr->init_imap_added)) {
+		input_mapper_delete(&mgr->imappers, mgr->init_imap,
+							daio_map_op, mgr);
+		mgr->init_imap_added = 0;
+	}
+	err = input_mapper_add(&mgr->imappers, entry, daio_map_op, mgr);
+	spin_unlock_irqrestore(&mgr->imap_lock, flags);
+
+	return err;
+}
+
+static int daio_imap_delete(struct daio_mgr *mgr, struct imapper *entry)
+{
+	unsigned long flags;
+	int err;
+
+	spin_lock_irqsave(&mgr->imap_lock, flags);
+	err = input_mapper_delete(&mgr->imappers, entry, daio_map_op, mgr);
+	if (list_empty(&mgr->imappers)) {
+		input_mapper_add(&mgr->imappers, mgr->init_imap,
+							daio_map_op, mgr);
+		mgr->init_imap_added = 1;
+	}
+	spin_unlock_irqrestore(&mgr->imap_lock, flags);
+
+	return err;
+}
+
+static int daio_mgr_commit_write(struct daio_mgr *mgr)
+{
+	struct hw *hw = mgr->mgr.hw;
+
+	hw->daio_mgr_commit_write(hw, mgr->mgr.ctrl_blk);
+	return 0;
+}
+
+int daio_mgr_create(void *hw, struct daio_mgr **rdaio_mgr)
+{
+	int err, i;
+	struct daio_mgr *daio_mgr;
+	struct imapper *entry;
+
+	*rdaio_mgr = NULL;
+	daio_mgr = kzalloc(sizeof(*daio_mgr), GFP_KERNEL);
+	if (NULL == daio_mgr)
+		return -ENOMEM;
+
+	err = rsc_mgr_init(&daio_mgr->mgr, DAIO, DAIO_RESOURCE_NUM, hw);
+	if (err)
+		goto error1;
+
+	spin_lock_init(&daio_mgr->mgr_lock);
+	spin_lock_init(&daio_mgr->imap_lock);
+	INIT_LIST_HEAD(&daio_mgr->imappers);
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (NULL == entry) {
+		err = -ENOMEM;
+		goto error2;
+	}
+	entry->slot = entry->addr = entry->next = entry->user = 0;
+	list_add(&entry->list, &daio_mgr->imappers);
+	daio_mgr->init_imap = entry;
+	daio_mgr->init_imap_added = 1;
+
+	daio_mgr->get_daio = get_daio_rsc;
+	daio_mgr->put_daio = put_daio_rsc;
+	daio_mgr->daio_enable = daio_mgr_enb_daio;
+	daio_mgr->daio_disable = daio_mgr_dsb_daio;
+	daio_mgr->imap_add = daio_imap_add;
+	daio_mgr->imap_delete = daio_imap_delete;
+	daio_mgr->commit_write = daio_mgr_commit_write;
+
+	for (i = 0; i < 8; i++) {
+		((struct hw *)hw)->daio_mgr_dsb_dao(daio_mgr->mgr.ctrl_blk, i);
+		((struct hw *)hw)->daio_mgr_dsb_dai(daio_mgr->mgr.ctrl_blk, i);
+	}
+	((struct hw *)hw)->daio_mgr_commit_write(hw, daio_mgr->mgr.ctrl_blk);
+
+	*rdaio_mgr = daio_mgr;
+
+	return 0;
+
+error2:
+	rsc_mgr_uninit(&daio_mgr->mgr);
+error1:
+	kfree(daio_mgr);
+	return err;
+}
+
+int daio_mgr_destroy(struct daio_mgr *daio_mgr)
+{
+	unsigned long flags;
+
+	/* free daio input mapper list */
+	spin_lock_irqsave(&daio_mgr->imap_lock, flags);
+	free_input_mapper_list(&daio_mgr->imappers);
+	spin_unlock_irqrestore(&daio_mgr->imap_lock, flags);
+
+	rsc_mgr_uninit(&daio_mgr->mgr);
+	kfree(daio_mgr);
+
+	return 0;
+}
+
diff --git a/sound/pci/ctxfi/ctdaio.h b/sound/pci/ctxfi/ctdaio.h
new file mode 100644
index 0000000..0f52ce5
--- /dev/null
+++ b/sound/pci/ctxfi/ctdaio.h
@@ -0,0 +1,122 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctdaio.h
+ *
+ * @Brief
+ * This file contains the definition of Digital Audio Input Output
+ * resource management object.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 23 2008
+ *
+ */
+
+#ifndef CTDAIO_H
+#define CTDAIO_H
+
+#include "ctresource.h"
+#include "ctimap.h"
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+/* Define the descriptor of a daio resource */
+enum DAIOTYP {
+	LINEO1,
+	LINEO2,
+	LINEO3,
+	LINEO4,
+	SPDIFOO,	/* S/PDIF Out (Flexijack/Optical) */
+	LINEIM,
+	SPDIFIO,	/* S/PDIF In (Flexijack/Optical) on the card */
+	SPDIFI1,	/* S/PDIF In on internal Drive Bay */
+	NUM_DAIOTYP
+};
+
+struct dao_rsc_ops;
+struct dai_rsc_ops;
+struct daio_mgr;
+
+struct daio {
+	struct rsc rscl;	/* Basic resource info for left TX/RX */
+	struct rsc rscr;	/* Basic resource info for right TX/RX */
+	enum DAIOTYP type;
+};
+
+struct dao {
+	struct daio daio;
+	struct dao_rsc_ops *ops;	/* DAO specific operations */
+	struct imapper **imappers;
+	struct daio_mgr *mgr;
+	void *hw;
+	void *ctrl_blk;
+};
+
+struct dai {
+	struct daio daio;
+	struct dai_rsc_ops *ops;	/* DAI specific operations */
+	void *hw;
+	void *ctrl_blk;
+};
+
+struct dao_desc {
+	unsigned int msr:4;
+	unsigned int passthru:1;
+};
+
+struct dao_rsc_ops {
+	int (*set_spos)(struct dao *dao, unsigned int spos);
+	int (*commit_write)(struct dao *dao);
+	int (*get_spos)(struct dao *dao, unsigned int *spos);
+	int (*reinit)(struct dao *dao, const struct dao_desc *desc);
+	int (*set_left_input)(struct dao *dao, struct rsc *input);
+	int (*set_right_input)(struct dao *dao, struct rsc *input);
+	int (*clear_left_input)(struct dao *dao);
+	int (*clear_right_input)(struct dao *dao);
+};
+
+struct dai_rsc_ops {
+	int (*set_srt_srcl)(struct dai *dai, struct rsc *src);
+	int (*set_srt_srcr)(struct dai *dai, struct rsc *src);
+	int (*set_srt_msr)(struct dai *dai, unsigned int msr);
+	int (*set_enb_src)(struct dai *dai, unsigned int enb);
+	int (*set_enb_srt)(struct dai *dai, unsigned int enb);
+	int (*commit_write)(struct dai *dai);
+};
+
+/* Define daio resource request description info */
+struct daio_desc {
+	unsigned int type:4;
+	unsigned int msr:4;
+	unsigned int passthru:1;
+};
+
+struct daio_mgr {
+	struct rsc_mgr mgr;	/* Basic resource manager info */
+	spinlock_t mgr_lock;
+	spinlock_t imap_lock;
+	struct list_head imappers;
+	struct imapper *init_imap;
+	unsigned int init_imap_added;
+
+	 /* request one daio resource */
+	int (*get_daio)(struct daio_mgr *mgr,
+			const struct daio_desc *desc, struct daio **rdaio);
+	/* return one daio resource */
+	int (*put_daio)(struct daio_mgr *mgr, struct daio *daio);
+	int (*daio_enable)(struct daio_mgr *mgr, struct daio *daio);
+	int (*daio_disable)(struct daio_mgr *mgr, struct daio *daio);
+	int (*imap_add)(struct daio_mgr *mgr, struct imapper *entry);
+	int (*imap_delete)(struct daio_mgr *mgr, struct imapper *entry);
+	int (*commit_write)(struct daio_mgr *mgr);
+};
+
+/* Constructor and destructor of daio resource manager */
+int daio_mgr_create(void *hw, struct daio_mgr **rdaio_mgr);
+int daio_mgr_destroy(struct daio_mgr *daio_mgr);
+
+#endif /* CTDAIO_H */
diff --git a/sound/pci/ctxfi/cthardware.c b/sound/pci/ctxfi/cthardware.c
new file mode 100644
index 0000000..8e64f48
--- /dev/null
+++ b/sound/pci/ctxfi/cthardware.c
@@ -0,0 +1,91 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	cthardware.c
+ *
+ * @Brief
+ * This file contains the implementation of hardware access methord.
+ *
+ * @Author	Liu Chun
+ * @Date 	Jun 26 2008
+ *
+ */
+
+#include "cthardware.h"
+#include "cthw20k1.h"
+#include "cthw20k2.h"
+#include <linux/bug.h>
+
+int __devinit create_hw_obj(struct pci_dev *pci, enum CHIPTYP chip_type,
+			    enum CTCARDS model, struct hw **rhw)
+{
+	int err;
+
+	switch (chip_type) {
+	case ATC20K1:
+		err = create_20k1_hw_obj(rhw);
+		break;
+	case ATC20K2:
+		err = create_20k2_hw_obj(rhw);
+		break;
+	default:
+		err = -ENODEV;
+		break;
+	}
+	if (err)
+		return err;
+
+	(*rhw)->pci = pci;
+	(*rhw)->chip_type = chip_type;
+	(*rhw)->model = model;
+
+	return 0;
+}
+
+int destroy_hw_obj(struct hw *hw)
+{
+	int err;
+
+	switch (hw->pci->device) {
+	case 0x0005:	/* 20k1 device */
+		err = destroy_20k1_hw_obj(hw);
+		break;
+	case 0x000B:	/* 20k2 device */
+		err = destroy_20k2_hw_obj(hw);
+		break;
+	default:
+		err = -ENODEV;
+		break;
+	}
+
+	return err;
+}
+
+unsigned int get_field(unsigned int data, unsigned int field)
+{
+	int i;
+
+	BUG_ON(!field);
+	/* @field should always be greater than 0 */
+	for (i = 0; !(field & (1 << i)); )
+		i++;
+
+	return (data & field) >> i;
+}
+
+void set_field(unsigned int *data, unsigned int field, unsigned int value)
+{
+	int i;
+
+	BUG_ON(!field);
+	/* @field should always be greater than 0 */
+	for (i = 0; !(field & (1 << i)); )
+		i++;
+
+	*data = (*data & (~field)) | ((value << i) & field);
+}
+
diff --git a/sound/pci/ctxfi/cthardware.h b/sound/pci/ctxfi/cthardware.h
new file mode 100644
index 0000000..4a8e04f
--- /dev/null
+++ b/sound/pci/ctxfi/cthardware.h
@@ -0,0 +1,196 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	cthardware.h
+ *
+ * @Brief
+ * This file contains the definition of hardware access methord.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 13 2008
+ *
+ */
+
+#ifndef CTHARDWARE_H
+#define CTHARDWARE_H
+
+#include <linux/types.h>
+#include <linux/pci.h>
+
+enum CHIPTYP {
+	ATC20K1,
+	ATC20K2,
+	ATCNONE
+};
+
+enum CTCARDS {
+	/* 20k1 models */
+	CTSB055X,
+	CTSB073X,
+	CTUAA,
+	CT20K1_UNKNOWN,
+	/* 20k2 models */
+	CTSB0760,
+	CTHENDRIX,
+	CTSB0880,
+	NUM_CTCARDS		/* This should always be the last */
+};
+
+/* Type of input source for ADC */
+enum ADCSRC{
+	ADC_MICIN,
+	ADC_LINEIN,
+	ADC_VIDEO,
+	ADC_AUX,
+	ADC_NONE	/* Switch to digital input */
+};
+
+struct card_conf {
+	/* device virtual mem page table page physical addr
+	 * (supporting one page table page now) */
+	unsigned long vm_pgt_phys;
+	unsigned int rsr;	/* reference sample rate in Hzs*/
+	unsigned int msr;	/* master sample rate in rsrs */
+};
+
+struct hw {
+	int (*card_init)(struct hw *hw, struct card_conf *info);
+	int (*card_stop)(struct hw *hw);
+	int (*pll_init)(struct hw *hw, unsigned int rsr);
+	int (*is_adc_source_selected)(struct hw *hw, enum ADCSRC source);
+	int (*select_adc_source)(struct hw *hw, enum ADCSRC source);
+	int (*have_digit_io_switch)(struct hw *hw);
+
+	/* SRC operations */
+	int (*src_rsc_get_ctrl_blk)(void **rblk);
+	int (*src_rsc_put_ctrl_blk)(void *blk);
+	int (*src_set_state)(void *blk, unsigned int state);
+	int (*src_set_bm)(void *blk, unsigned int bm);
+	int (*src_set_rsr)(void *blk, unsigned int rsr);
+	int (*src_set_sf)(void *blk, unsigned int sf);
+	int (*src_set_wr)(void *blk, unsigned int wr);
+	int (*src_set_pm)(void *blk, unsigned int pm);
+	int (*src_set_rom)(void *blk, unsigned int rom);
+	int (*src_set_vo)(void *blk, unsigned int vo);
+	int (*src_set_st)(void *blk, unsigned int st);
+	int (*src_set_ie)(void *blk, unsigned int ie);
+	int (*src_set_ilsz)(void *blk, unsigned int ilsz);
+	int (*src_set_bp)(void *blk, unsigned int bp);
+	int (*src_set_cisz)(void *blk, unsigned int cisz);
+	int (*src_set_ca)(void *blk, unsigned int ca);
+	int (*src_set_sa)(void *blk, unsigned int sa);
+	int (*src_set_la)(void *blk, unsigned int la);
+	int (*src_set_pitch)(void *blk, unsigned int pitch);
+	int (*src_set_clear_zbufs)(void *blk, unsigned int clear);
+	int (*src_set_dirty)(void *blk, unsigned int flags);
+	int (*src_set_dirty_all)(void *blk);
+	int (*src_commit_write)(struct hw *hw, unsigned int idx, void *blk);
+	int (*src_get_ca)(struct hw *hw, unsigned int idx, void *blk);
+	unsigned int (*src_get_dirty)(void *blk);
+	unsigned int (*src_dirty_conj_mask)(void);
+	int (*src_mgr_get_ctrl_blk)(void **rblk);
+	int (*src_mgr_put_ctrl_blk)(void *blk);
+	/* syncly enable src @idx */
+	int (*src_mgr_enbs_src)(void *blk, unsigned int idx);
+	/* enable src @idx */
+	int (*src_mgr_enb_src)(void *blk, unsigned int idx);
+	/* disable src @idx */
+	int (*src_mgr_dsb_src)(void *blk, unsigned int idx);
+	int (*src_mgr_commit_write)(struct hw *hw, void *blk);
+
+	/* SRC Input Mapper operations */
+	int (*srcimp_mgr_get_ctrl_blk)(void **rblk);
+	int (*srcimp_mgr_put_ctrl_blk)(void *blk);
+	int (*srcimp_mgr_set_imaparc)(void *blk, unsigned int slot);
+	int (*srcimp_mgr_set_imapuser)(void *blk, unsigned int user);
+	int (*srcimp_mgr_set_imapnxt)(void *blk, unsigned int next);
+	int (*srcimp_mgr_set_imapaddr)(void *blk, unsigned int addr);
+	int (*srcimp_mgr_commit_write)(struct hw *hw, void *blk);
+
+	/* AMIXER operations */
+	int (*amixer_rsc_get_ctrl_blk)(void **rblk);
+	int (*amixer_rsc_put_ctrl_blk)(void *blk);
+	int (*amixer_mgr_get_ctrl_blk)(void **rblk);
+	int (*amixer_mgr_put_ctrl_blk)(void *blk);
+	int (*amixer_set_mode)(void *blk, unsigned int mode);
+	int (*amixer_set_iv)(void *blk, unsigned int iv);
+	int (*amixer_set_x)(void *blk, unsigned int x);
+	int (*amixer_set_y)(void *blk, unsigned int y);
+	int (*amixer_set_sadr)(void *blk, unsigned int sadr);
+	int (*amixer_set_se)(void *blk, unsigned int se);
+	int (*amixer_set_dirty)(void *blk, unsigned int flags);
+	int (*amixer_set_dirty_all)(void *blk);
+	int (*amixer_commit_write)(struct hw *hw, unsigned int idx, void *blk);
+	int (*amixer_get_y)(void *blk);
+	unsigned int (*amixer_get_dirty)(void *blk);
+
+	/* DAIO operations */
+	int (*dai_get_ctrl_blk)(void **rblk);
+	int (*dai_put_ctrl_blk)(void *blk);
+	int (*dai_srt_set_srco)(void *blk, unsigned int src);
+	int (*dai_srt_set_srcm)(void *blk, unsigned int src);
+	int (*dai_srt_set_rsr)(void *blk, unsigned int rsr);
+	int (*dai_srt_set_drat)(void *blk, unsigned int drat);
+	int (*dai_srt_set_ec)(void *blk, unsigned int ec);
+	int (*dai_srt_set_et)(void *blk, unsigned int et);
+	int (*dai_commit_write)(struct hw *hw, unsigned int idx, void *blk);
+	int (*dao_get_ctrl_blk)(void **rblk);
+	int (*dao_put_ctrl_blk)(void *blk);
+	int (*dao_set_spos)(void *blk, unsigned int spos);
+	int (*dao_commit_write)(struct hw *hw, unsigned int idx, void *blk);
+	int (*dao_get_spos)(void *blk, unsigned int *spos);
+
+	int (*daio_mgr_get_ctrl_blk)(struct hw *hw, void **rblk);
+	int (*daio_mgr_put_ctrl_blk)(void *blk);
+	int (*daio_mgr_enb_dai)(void *blk, unsigned int idx);
+	int (*daio_mgr_dsb_dai)(void *blk, unsigned int idx);
+	int (*daio_mgr_enb_dao)(void *blk, unsigned int idx);
+	int (*daio_mgr_dsb_dao)(void *blk, unsigned int idx);
+	int (*daio_mgr_dao_init)(void *blk, unsigned int idx,
+						unsigned int conf);
+	int (*daio_mgr_set_imaparc)(void *blk, unsigned int slot);
+	int (*daio_mgr_set_imapnxt)(void *blk, unsigned int next);
+	int (*daio_mgr_set_imapaddr)(void *blk, unsigned int addr);
+	int (*daio_mgr_commit_write)(struct hw *hw, void *blk);
+
+	int (*set_timer_irq)(struct hw *hw, int enable);
+	int (*set_timer_tick)(struct hw *hw, unsigned int tick);
+	unsigned int (*get_wc)(struct hw *hw);
+
+	void (*irq_callback)(void *data, unsigned int bit);
+	void *irq_callback_data;
+
+	struct pci_dev *pci;	/* the pci kernel structure of this card */
+	int irq;
+	unsigned long io_base;
+	unsigned long mem_base;
+
+	enum CHIPTYP chip_type;
+	enum CTCARDS model;
+};
+
+int create_hw_obj(struct pci_dev *pci, enum CHIPTYP chip_type,
+		  enum CTCARDS model, struct hw **rhw);
+int destroy_hw_obj(struct hw *hw);
+
+unsigned int get_field(unsigned int data, unsigned int field);
+void set_field(unsigned int *data, unsigned int field, unsigned int value);
+
+/* IRQ bits */
+#define	PLL_INT		(1 << 10) /* PLL input-clock out-of-range */
+#define FI_INT		(1 << 9)  /* forced interrupt */
+#define IT_INT		(1 << 8)  /* timer interrupt */
+#define PCI_INT		(1 << 7)  /* PCI bus error pending */
+#define URT_INT		(1 << 6)  /* UART Tx/Rx */
+#define GPI_INT		(1 << 5)  /* GPI pin */
+#define MIX_INT		(1 << 4)  /* mixer parameter segment FIFO channels */
+#define DAI_INT		(1 << 3)  /* DAI (SR-tracker or SPDIF-receiver) */
+#define TP_INT		(1 << 2)  /* transport priority queue */
+#define DSP_INT		(1 << 1)  /* DSP */
+#define SRC_INT		(1 << 0)  /* SRC channels */
+
+#endif /* CTHARDWARE_H */
diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c
new file mode 100644
index 0000000..cb69d9d
--- /dev/null
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -0,0 +1,2248 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	cthw20k1.c
+ *
+ * @Brief
+ * This file contains the implementation of hardware access methord for 20k1.
+ *
+ * @Author	Liu Chun
+ * @Date 	Jun 24 2008
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include "cthw20k1.h"
+#include "ct20k1reg.h"
+
+#if BITS_PER_LONG == 32
+#define CT_XFI_DMA_MASK		DMA_BIT_MASK(32) /* 32 bit PTE */
+#else
+#define CT_XFI_DMA_MASK		DMA_BIT_MASK(64) /* 64 bit PTE */
+#endif
+
+struct hw20k1 {
+	struct hw hw;
+	spinlock_t reg_20k1_lock;
+	spinlock_t reg_pci_lock;
+};
+
+static u32 hw_read_20kx(struct hw *hw, u32 reg);
+static void hw_write_20kx(struct hw *hw, u32 reg, u32 data);
+static u32 hw_read_pci(struct hw *hw, u32 reg);
+static void hw_write_pci(struct hw *hw, u32 reg, u32 data);
+
+/*
+ * Type definition block.
+ * The layout of control structures can be directly applied on 20k2 chip.
+ */
+
+/*
+ * SRC control block definitions.
+ */
+
+/* SRC resource control block */
+#define SRCCTL_STATE	0x00000007
+#define SRCCTL_BM	0x00000008
+#define SRCCTL_RSR	0x00000030
+#define SRCCTL_SF	0x000001C0
+#define SRCCTL_WR	0x00000200
+#define SRCCTL_PM	0x00000400
+#define SRCCTL_ROM	0x00001800
+#define SRCCTL_VO	0x00002000
+#define SRCCTL_ST	0x00004000
+#define SRCCTL_IE	0x00008000
+#define SRCCTL_ILSZ	0x000F0000
+#define SRCCTL_BP	0x00100000
+
+#define SRCCCR_CISZ	0x000007FF
+#define SRCCCR_CWA	0x001FF800
+#define SRCCCR_D	0x00200000
+#define SRCCCR_RS	0x01C00000
+#define SRCCCR_NAL	0x3E000000
+#define SRCCCR_RA	0xC0000000
+
+#define SRCCA_CA	0x03FFFFFF
+#define SRCCA_RS	0x1C000000
+#define SRCCA_NAL	0xE0000000
+
+#define SRCSA_SA	0x03FFFFFF
+
+#define SRCLA_LA	0x03FFFFFF
+
+/* Mixer Parameter Ring ram Low and Hight register.
+ * Fixed-point value in 8.24 format for parameter channel */
+#define MPRLH_PITCH	0xFFFFFFFF
+
+/* SRC resource register dirty flags */
+union src_dirty {
+	struct {
+		u16 ctl:1;
+		u16 ccr:1;
+		u16 sa:1;
+		u16 la:1;
+		u16 ca:1;
+		u16 mpr:1;
+		u16 czbfs:1;	/* Clear Z-Buffers */
+		u16 rsv:9;
+	} bf;
+	u16 data;
+};
+
+struct src_rsc_ctrl_blk {
+	unsigned int	ctl;
+	unsigned int 	ccr;
+	unsigned int	ca;
+	unsigned int	sa;
+	unsigned int	la;
+	unsigned int	mpr;
+	union src_dirty	dirty;
+};
+
+/* SRC manager control block */
+union src_mgr_dirty {
+	struct {
+		u16 enb0:1;
+		u16 enb1:1;
+		u16 enb2:1;
+		u16 enb3:1;
+		u16 enb4:1;
+		u16 enb5:1;
+		u16 enb6:1;
+		u16 enb7:1;
+		u16 enbsa:1;
+		u16 rsv:7;
+	} bf;
+	u16 data;
+};
+
+struct src_mgr_ctrl_blk {
+	unsigned int		enbsa;
+	unsigned int		enb[8];
+	union src_mgr_dirty	dirty;
+};
+
+/* SRCIMP manager control block */
+#define SRCAIM_ARC	0x00000FFF
+#define SRCAIM_NXT	0x00FF0000
+#define SRCAIM_SRC	0xFF000000
+
+struct srcimap {
+	unsigned int srcaim;
+	unsigned int idx;
+};
+
+/* SRCIMP manager register dirty flags */
+union srcimp_mgr_dirty {
+	struct {
+		u16 srcimap:1;
+		u16 rsv:15;
+	} bf;
+	u16 data;
+};
+
+struct srcimp_mgr_ctrl_blk {
+	struct srcimap		srcimap;
+	union srcimp_mgr_dirty	dirty;
+};
+
+/*
+ * Function implementation block.
+ */
+
+static int src_get_rsc_ctrl_blk(void **rblk)
+{
+	struct src_rsc_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int src_put_rsc_ctrl_blk(void *blk)
+{
+	kfree((struct src_rsc_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int src_set_state(void *blk, unsigned int state)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_STATE, state);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_bm(void *blk, unsigned int bm)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_BM, bm);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_rsr(void *blk, unsigned int rsr)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_RSR, rsr);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_sf(void *blk, unsigned int sf)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_SF, sf);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_wr(void *blk, unsigned int wr)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_WR, wr);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_pm(void *blk, unsigned int pm)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_PM, pm);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_rom(void *blk, unsigned int rom)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_ROM, rom);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_vo(void *blk, unsigned int vo)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_VO, vo);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_st(void *blk, unsigned int st)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_ST, st);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_ie(void *blk, unsigned int ie)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_IE, ie);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_ilsz(void *blk, unsigned int ilsz)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_ILSZ, ilsz);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_bp(void *blk, unsigned int bp)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_BP, bp);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_cisz(void *blk, unsigned int cisz)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ccr, SRCCCR_CISZ, cisz);
+	ctl->dirty.bf.ccr = 1;
+	return 0;
+}
+
+static int src_set_ca(void *blk, unsigned int ca)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ca, SRCCA_CA, ca);
+	ctl->dirty.bf.ca = 1;
+	return 0;
+}
+
+static int src_set_sa(void *blk, unsigned int sa)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->sa, SRCSA_SA, sa);
+	ctl->dirty.bf.sa = 1;
+	return 0;
+}
+
+static int src_set_la(void *blk, unsigned int la)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->la, SRCLA_LA, la);
+	ctl->dirty.bf.la = 1;
+	return 0;
+}
+
+static int src_set_pitch(void *blk, unsigned int pitch)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->mpr, MPRLH_PITCH, pitch);
+	ctl->dirty.bf.mpr = 1;
+	return 0;
+}
+
+static int src_set_clear_zbufs(void *blk, unsigned int clear)
+{
+	((struct src_rsc_ctrl_blk *)blk)->dirty.bf.czbfs = (clear ? 1 : 0);
+	return 0;
+}
+
+static int src_set_dirty(void *blk, unsigned int flags)
+{
+	((struct src_rsc_ctrl_blk *)blk)->dirty.data = (flags & 0xffff);
+	return 0;
+}
+
+static int src_set_dirty_all(void *blk)
+{
+	((struct src_rsc_ctrl_blk *)blk)->dirty.data = ~(0x0);
+	return 0;
+}
+
+#define AR_SLOT_SIZE		4096
+#define AR_SLOT_BLOCK_SIZE	16
+#define AR_PTS_PITCH		6
+#define AR_PARAM_SRC_OFFSET	0x60
+
+static unsigned int src_param_pitch_mixer(unsigned int src_idx)
+{
+	return ((src_idx << 4) + AR_PTS_PITCH + AR_SLOT_SIZE
+			- AR_PARAM_SRC_OFFSET) % AR_SLOT_SIZE;
+
+}
+
+static int src_commit_write(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+	int i;
+
+	if (ctl->dirty.bf.czbfs) {
+		/* Clear Z-Buffer registers */
+		for (i = 0; i < 8; i++)
+			hw_write_20kx(hw, SRCUPZ+idx*0x100+i*0x4, 0);
+
+		for (i = 0; i < 4; i++)
+			hw_write_20kx(hw, SRCDN0Z+idx*0x100+i*0x4, 0);
+
+		for (i = 0; i < 8; i++)
+			hw_write_20kx(hw, SRCDN1Z+idx*0x100+i*0x4, 0);
+
+		ctl->dirty.bf.czbfs = 0;
+	}
+	if (ctl->dirty.bf.mpr) {
+		/* Take the parameter mixer resource in the same group as that
+		 * the idx src is in for simplicity. Unlike src, all conjugate
+		 * parameter mixer resources must be programmed for
+		 * corresponding conjugate src resources. */
+		unsigned int pm_idx = src_param_pitch_mixer(idx);
+		hw_write_20kx(hw, PRING_LO_HI+4*pm_idx, ctl->mpr);
+		hw_write_20kx(hw, PMOPLO+8*pm_idx, 0x3);
+		hw_write_20kx(hw, PMOPHI+8*pm_idx, 0x0);
+		ctl->dirty.bf.mpr = 0;
+	}
+	if (ctl->dirty.bf.sa) {
+		hw_write_20kx(hw, SRCSA+idx*0x100, ctl->sa);
+		ctl->dirty.bf.sa = 0;
+	}
+	if (ctl->dirty.bf.la) {
+		hw_write_20kx(hw, SRCLA+idx*0x100, ctl->la);
+		ctl->dirty.bf.la = 0;
+	}
+	if (ctl->dirty.bf.ca) {
+		hw_write_20kx(hw, SRCCA+idx*0x100, ctl->ca);
+		ctl->dirty.bf.ca = 0;
+	}
+
+	/* Write srccf register */
+	hw_write_20kx(hw, SRCCF+idx*0x100, 0x0);
+
+	if (ctl->dirty.bf.ccr) {
+		hw_write_20kx(hw, SRCCCR+idx*0x100, ctl->ccr);
+		ctl->dirty.bf.ccr = 0;
+	}
+	if (ctl->dirty.bf.ctl) {
+		hw_write_20kx(hw, SRCCTL+idx*0x100, ctl->ctl);
+		ctl->dirty.bf.ctl = 0;
+	}
+
+	return 0;
+}
+
+static int src_get_ca(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	ctl->ca = hw_read_20kx(hw, SRCCA+idx*0x100);
+	ctl->dirty.bf.ca = 0;
+
+	return get_field(ctl->ca, SRCCA_CA);
+}
+
+static unsigned int src_get_dirty(void *blk)
+{
+	return ((struct src_rsc_ctrl_blk *)blk)->dirty.data;
+}
+
+static unsigned int src_dirty_conj_mask(void)
+{
+	return 0x20;
+}
+
+static int src_mgr_enbs_src(void *blk, unsigned int idx)
+{
+	((struct src_mgr_ctrl_blk *)blk)->enbsa = ~(0x0);
+	((struct src_mgr_ctrl_blk *)blk)->dirty.bf.enbsa = 1;
+	((struct src_mgr_ctrl_blk *)blk)->enb[idx/32] |= (0x1 << (idx%32));
+	return 0;
+}
+
+static int src_mgr_enb_src(void *blk, unsigned int idx)
+{
+	((struct src_mgr_ctrl_blk *)blk)->enb[idx/32] |= (0x1 << (idx%32));
+	((struct src_mgr_ctrl_blk *)blk)->dirty.data |= (0x1 << (idx/32));
+	return 0;
+}
+
+static int src_mgr_dsb_src(void *blk, unsigned int idx)
+{
+	((struct src_mgr_ctrl_blk *)blk)->enb[idx/32] &= ~(0x1 << (idx%32));
+	((struct src_mgr_ctrl_blk *)blk)->dirty.data |= (0x1 << (idx/32));
+	return 0;
+}
+
+static int src_mgr_commit_write(struct hw *hw, void *blk)
+{
+	struct src_mgr_ctrl_blk *ctl = blk;
+	int i;
+	unsigned int ret;
+
+	if (ctl->dirty.bf.enbsa) {
+		do {
+			ret = hw_read_20kx(hw, SRCENBSTAT);
+		} while (ret & 0x1);
+		hw_write_20kx(hw, SRCENBS, ctl->enbsa);
+		ctl->dirty.bf.enbsa = 0;
+	}
+	for (i = 0; i < 8; i++) {
+		if ((ctl->dirty.data & (0x1 << i))) {
+			hw_write_20kx(hw, SRCENB+(i*0x100), ctl->enb[i]);
+			ctl->dirty.data &= ~(0x1 << i);
+		}
+	}
+
+	return 0;
+}
+
+static int src_mgr_get_ctrl_blk(void **rblk)
+{
+	struct src_mgr_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int src_mgr_put_ctrl_blk(void *blk)
+{
+	kfree((struct src_mgr_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int srcimp_mgr_get_ctrl_blk(void **rblk)
+{
+	struct srcimp_mgr_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int srcimp_mgr_put_ctrl_blk(void *blk)
+{
+	kfree((struct srcimp_mgr_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int srcimp_mgr_set_imaparc(void *blk, unsigned int slot)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srcimap.srcaim, SRCAIM_ARC, slot);
+	ctl->dirty.bf.srcimap = 1;
+	return 0;
+}
+
+static int srcimp_mgr_set_imapuser(void *blk, unsigned int user)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srcimap.srcaim, SRCAIM_SRC, user);
+	ctl->dirty.bf.srcimap = 1;
+	return 0;
+}
+
+static int srcimp_mgr_set_imapnxt(void *blk, unsigned int next)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srcimap.srcaim, SRCAIM_NXT, next);
+	ctl->dirty.bf.srcimap = 1;
+	return 0;
+}
+
+static int srcimp_mgr_set_imapaddr(void *blk, unsigned int addr)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	ctl->srcimap.idx = addr;
+	ctl->dirty.bf.srcimap = 1;
+	return 0;
+}
+
+static int srcimp_mgr_commit_write(struct hw *hw, void *blk)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	if (ctl->dirty.bf.srcimap) {
+		hw_write_20kx(hw, SRCIMAP+ctl->srcimap.idx*0x100,
+						ctl->srcimap.srcaim);
+		ctl->dirty.bf.srcimap = 0;
+	}
+
+	return 0;
+}
+
+/*
+ * AMIXER control block definitions.
+ */
+
+#define AMOPLO_M	0x00000003
+#define AMOPLO_X	0x0003FFF0
+#define AMOPLO_Y	0xFFFC0000
+
+#define AMOPHI_SADR	0x000000FF
+#define AMOPHI_SE	0x80000000
+
+/* AMIXER resource register dirty flags */
+union amixer_dirty {
+	struct {
+		u16 amoplo:1;
+		u16 amophi:1;
+		u16 rsv:14;
+	} bf;
+	u16 data;
+};
+
+/* AMIXER resource control block */
+struct amixer_rsc_ctrl_blk {
+	unsigned int		amoplo;
+	unsigned int		amophi;
+	union amixer_dirty	dirty;
+};
+
+static int amixer_set_mode(void *blk, unsigned int mode)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amoplo, AMOPLO_M, mode);
+	ctl->dirty.bf.amoplo = 1;
+	return 0;
+}
+
+static int amixer_set_iv(void *blk, unsigned int iv)
+{
+	/* 20k1 amixer does not have this field */
+	return 0;
+}
+
+static int amixer_set_x(void *blk, unsigned int x)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amoplo, AMOPLO_X, x);
+	ctl->dirty.bf.amoplo = 1;
+	return 0;
+}
+
+static int amixer_set_y(void *blk, unsigned int y)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amoplo, AMOPLO_Y, y);
+	ctl->dirty.bf.amoplo = 1;
+	return 0;
+}
+
+static int amixer_set_sadr(void *blk, unsigned int sadr)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amophi, AMOPHI_SADR, sadr);
+	ctl->dirty.bf.amophi = 1;
+	return 0;
+}
+
+static int amixer_set_se(void *blk, unsigned int se)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amophi, AMOPHI_SE, se);
+	ctl->dirty.bf.amophi = 1;
+	return 0;
+}
+
+static int amixer_set_dirty(void *blk, unsigned int flags)
+{
+	((struct amixer_rsc_ctrl_blk *)blk)->dirty.data = (flags & 0xffff);
+	return 0;
+}
+
+static int amixer_set_dirty_all(void *blk)
+{
+	((struct amixer_rsc_ctrl_blk *)blk)->dirty.data = ~(0x0);
+	return 0;
+}
+
+static int amixer_commit_write(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	if (ctl->dirty.bf.amoplo || ctl->dirty.bf.amophi) {
+		hw_write_20kx(hw, AMOPLO+idx*8, ctl->amoplo);
+		ctl->dirty.bf.amoplo = 0;
+		hw_write_20kx(hw, AMOPHI+idx*8, ctl->amophi);
+		ctl->dirty.bf.amophi = 0;
+	}
+
+	return 0;
+}
+
+static int amixer_get_y(void *blk)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	return get_field(ctl->amoplo, AMOPLO_Y);
+}
+
+static unsigned int amixer_get_dirty(void *blk)
+{
+	return ((struct amixer_rsc_ctrl_blk *)blk)->dirty.data;
+}
+
+static int amixer_rsc_get_ctrl_blk(void **rblk)
+{
+	struct amixer_rsc_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int amixer_rsc_put_ctrl_blk(void *blk)
+{
+	kfree((struct amixer_rsc_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int amixer_mgr_get_ctrl_blk(void **rblk)
+{
+	/*amixer_mgr_ctrl_blk_t *blk;*/
+
+	*rblk = NULL;
+	/*blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;*/
+
+	return 0;
+}
+
+static int amixer_mgr_put_ctrl_blk(void *blk)
+{
+	/*kfree((amixer_mgr_ctrl_blk_t *)blk);*/
+
+	return 0;
+}
+
+/*
+ * DAIO control block definitions.
+ */
+
+/* Receiver Sample Rate Tracker Control register */
+#define SRTCTL_SRCR	0x000000FF
+#define SRTCTL_SRCL	0x0000FF00
+#define SRTCTL_RSR	0x00030000
+#define SRTCTL_DRAT	0x000C0000
+#define SRTCTL_RLE	0x10000000
+#define SRTCTL_RLP	0x20000000
+#define SRTCTL_EC	0x40000000
+#define SRTCTL_ET	0x80000000
+
+/* DAIO Receiver register dirty flags */
+union dai_dirty {
+	struct {
+		u16 srtctl:1;
+		u16 rsv:15;
+	} bf;
+	u16 data;
+};
+
+/* DAIO Receiver control block */
+struct dai_ctrl_blk {
+	unsigned int	srtctl;
+	union dai_dirty	dirty;
+};
+
+/* S/PDIF Transmitter register dirty flags */
+union dao_dirty {
+	struct {
+		u16 spos:1;
+		u16 rsv:15;
+	} bf;
+	u16 data;
+};
+
+/* S/PDIF Transmitter control block */
+struct dao_ctrl_blk {
+	unsigned int 	spos; /* S/PDIF Output Channel Status Register */
+	union dao_dirty	dirty;
+};
+
+/* Audio Input Mapper RAM */
+#define AIM_ARC		0x00000FFF
+#define AIM_NXT		0x007F0000
+
+struct daoimap {
+	unsigned int aim;
+	unsigned int idx;
+};
+
+/* I2S Transmitter/Receiver Control register */
+#define I2SCTL_EA	0x00000004
+#define I2SCTL_EI	0x00000010
+
+/* S/PDIF Transmitter Control register */
+#define SPOCTL_OE	0x00000001
+#define SPOCTL_OS	0x0000000E
+#define SPOCTL_RIV	0x00000010
+#define SPOCTL_LIV	0x00000020
+#define SPOCTL_SR	0x000000C0
+
+/* S/PDIF Receiver Control register */
+#define SPICTL_EN	0x00000001
+#define SPICTL_I24	0x00000002
+#define SPICTL_IB	0x00000004
+#define SPICTL_SM	0x00000008
+#define SPICTL_VM	0x00000010
+
+/* DAIO manager register dirty flags */
+union daio_mgr_dirty {
+	struct {
+		u32 i2soctl:4;
+		u32 i2sictl:4;
+		u32 spoctl:4;
+		u32 spictl:4;
+		u32 daoimap:1;
+		u32 rsv:15;
+	} bf;
+	u32 data;
+};
+
+/* DAIO manager control block */
+struct daio_mgr_ctrl_blk {
+	unsigned int		i2sctl;
+	unsigned int		spoctl;
+	unsigned int		spictl;
+	struct daoimap		daoimap;
+	union daio_mgr_dirty	dirty;
+};
+
+static int dai_srt_set_srcr(void *blk, unsigned int src)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srtctl, SRTCTL_SRCR, src);
+	ctl->dirty.bf.srtctl = 1;
+	return 0;
+}
+
+static int dai_srt_set_srcl(void *blk, unsigned int src)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srtctl, SRTCTL_SRCL, src);
+	ctl->dirty.bf.srtctl = 1;
+	return 0;
+}
+
+static int dai_srt_set_rsr(void *blk, unsigned int rsr)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srtctl, SRTCTL_RSR, rsr);
+	ctl->dirty.bf.srtctl = 1;
+	return 0;
+}
+
+static int dai_srt_set_drat(void *blk, unsigned int drat)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srtctl, SRTCTL_DRAT, drat);
+	ctl->dirty.bf.srtctl = 1;
+	return 0;
+}
+
+static int dai_srt_set_ec(void *blk, unsigned int ec)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srtctl, SRTCTL_EC, ec ? 1 : 0);
+	ctl->dirty.bf.srtctl = 1;
+	return 0;
+}
+
+static int dai_srt_set_et(void *blk, unsigned int et)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srtctl, SRTCTL_ET, et ? 1 : 0);
+	ctl->dirty.bf.srtctl = 1;
+	return 0;
+}
+
+static int dai_commit_write(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	if (ctl->dirty.bf.srtctl) {
+		if (idx < 4) {
+			/* S/PDIF SRTs */
+			hw_write_20kx(hw, SRTSCTL+0x4*idx, ctl->srtctl);
+		} else {
+			/* I2S SRT */
+			hw_write_20kx(hw, SRTICTL, ctl->srtctl);
+		}
+		ctl->dirty.bf.srtctl = 0;
+	}
+
+	return 0;
+}
+
+static int dai_get_ctrl_blk(void **rblk)
+{
+	struct dai_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int dai_put_ctrl_blk(void *blk)
+{
+	kfree((struct dai_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int dao_set_spos(void *blk, unsigned int spos)
+{
+	((struct dao_ctrl_blk *)blk)->spos = spos;
+	((struct dao_ctrl_blk *)blk)->dirty.bf.spos = 1;
+	return 0;
+}
+
+static int dao_commit_write(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct dao_ctrl_blk *ctl = blk;
+
+	if (ctl->dirty.bf.spos) {
+		if (idx < 4) {
+			/* S/PDIF SPOSx */
+			hw_write_20kx(hw, SPOS+0x4*idx, ctl->spos);
+		}
+		ctl->dirty.bf.spos = 0;
+	}
+
+	return 0;
+}
+
+static int dao_get_spos(void *blk, unsigned int *spos)
+{
+	*spos = ((struct dao_ctrl_blk *)blk)->spos;
+	return 0;
+}
+
+static int dao_get_ctrl_blk(void **rblk)
+{
+	struct dao_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int dao_put_ctrl_blk(void *blk)
+{
+	kfree((struct dao_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int daio_mgr_enb_dai(void *blk, unsigned int idx)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	if (idx < 4) {
+		/* S/PDIF input */
+		set_field(&ctl->spictl, SPICTL_EN << (idx*8), 1);
+		ctl->dirty.bf.spictl |= (0x1 << idx);
+	} else {
+		/* I2S input */
+		idx %= 4;
+		set_field(&ctl->i2sctl, I2SCTL_EI << (idx*8), 1);
+		ctl->dirty.bf.i2sictl |= (0x1 << idx);
+	}
+	return 0;
+}
+
+static int daio_mgr_dsb_dai(void *blk, unsigned int idx)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	if (idx < 4) {
+		/* S/PDIF input */
+		set_field(&ctl->spictl, SPICTL_EN << (idx*8), 0);
+		ctl->dirty.bf.spictl |= (0x1 << idx);
+	} else {
+		/* I2S input */
+		idx %= 4;
+		set_field(&ctl->i2sctl, I2SCTL_EI << (idx*8), 0);
+		ctl->dirty.bf.i2sictl |= (0x1 << idx);
+	}
+	return 0;
+}
+
+static int daio_mgr_enb_dao(void *blk, unsigned int idx)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	if (idx < 4) {
+		/* S/PDIF output */
+		set_field(&ctl->spoctl, SPOCTL_OE << (idx*8), 1);
+		ctl->dirty.bf.spoctl |= (0x1 << idx);
+	} else {
+		/* I2S output */
+		idx %= 4;
+		set_field(&ctl->i2sctl, I2SCTL_EA << (idx*8), 1);
+		ctl->dirty.bf.i2soctl |= (0x1 << idx);
+	}
+	return 0;
+}
+
+static int daio_mgr_dsb_dao(void *blk, unsigned int idx)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	if (idx < 4) {
+		/* S/PDIF output */
+		set_field(&ctl->spoctl, SPOCTL_OE << (idx*8), 0);
+		ctl->dirty.bf.spoctl |= (0x1 << idx);
+	} else {
+		/* I2S output */
+		idx %= 4;
+		set_field(&ctl->i2sctl, I2SCTL_EA << (idx*8), 0);
+		ctl->dirty.bf.i2soctl |= (0x1 << idx);
+	}
+	return 0;
+}
+
+static int daio_mgr_dao_init(void *blk, unsigned int idx, unsigned int conf)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	if (idx < 4) {
+		/* S/PDIF output */
+		switch ((conf & 0x7)) {
+		case 0:
+			set_field(&ctl->spoctl, SPOCTL_SR << (idx*8), 3);
+			break; /* CDIF */
+		case 1:
+			set_field(&ctl->spoctl, SPOCTL_SR << (idx*8), 0);
+			break;
+		case 2:
+			set_field(&ctl->spoctl, SPOCTL_SR << (idx*8), 1);
+			break;
+		case 4:
+			set_field(&ctl->spoctl, SPOCTL_SR << (idx*8), 2);
+			break;
+		default:
+			break;
+		}
+		set_field(&ctl->spoctl, SPOCTL_LIV << (idx*8),
+			  (conf >> 4) & 0x1); /* Non-audio */
+		set_field(&ctl->spoctl, SPOCTL_RIV << (idx*8),
+			  (conf >> 4) & 0x1); /* Non-audio */
+		set_field(&ctl->spoctl, SPOCTL_OS << (idx*8),
+			  ((conf >> 3) & 0x1) ? 2 : 2); /* Raw */
+
+		ctl->dirty.bf.spoctl |= (0x1 << idx);
+	} else {
+		/* I2S output */
+		/*idx %= 4; */
+	}
+	return 0;
+}
+
+static int daio_mgr_set_imaparc(void *blk, unsigned int slot)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->daoimap.aim, AIM_ARC, slot);
+	ctl->dirty.bf.daoimap = 1;
+	return 0;
+}
+
+static int daio_mgr_set_imapnxt(void *blk, unsigned int next)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->daoimap.aim, AIM_NXT, next);
+	ctl->dirty.bf.daoimap = 1;
+	return 0;
+}
+
+static int daio_mgr_set_imapaddr(void *blk, unsigned int addr)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	ctl->daoimap.idx = addr;
+	ctl->dirty.bf.daoimap = 1;
+	return 0;
+}
+
+static int daio_mgr_commit_write(struct hw *hw, void *blk)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+	int i;
+
+	if (ctl->dirty.bf.i2sictl || ctl->dirty.bf.i2soctl) {
+		for (i = 0; i < 4; i++) {
+			if ((ctl->dirty.bf.i2sictl & (0x1 << i)))
+				ctl->dirty.bf.i2sictl &= ~(0x1 << i);
+
+			if ((ctl->dirty.bf.i2soctl & (0x1 << i)))
+				ctl->dirty.bf.i2soctl &= ~(0x1 << i);
+		}
+		hw_write_20kx(hw, I2SCTL, ctl->i2sctl);
+		mdelay(1);
+	}
+	if (ctl->dirty.bf.spoctl) {
+		for (i = 0; i < 4; i++) {
+			if ((ctl->dirty.bf.spoctl & (0x1 << i)))
+				ctl->dirty.bf.spoctl &= ~(0x1 << i);
+		}
+		hw_write_20kx(hw, SPOCTL, ctl->spoctl);
+		mdelay(1);
+	}
+	if (ctl->dirty.bf.spictl) {
+		for (i = 0; i < 4; i++) {
+			if ((ctl->dirty.bf.spictl & (0x1 << i)))
+				ctl->dirty.bf.spictl &= ~(0x1 << i);
+		}
+		hw_write_20kx(hw, SPICTL, ctl->spictl);
+		mdelay(1);
+	}
+	if (ctl->dirty.bf.daoimap) {
+		hw_write_20kx(hw, DAOIMAP+ctl->daoimap.idx*4,
+					ctl->daoimap.aim);
+		ctl->dirty.bf.daoimap = 0;
+	}
+
+	return 0;
+}
+
+static int daio_mgr_get_ctrl_blk(struct hw *hw, void **rblk)
+{
+	struct daio_mgr_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	blk->i2sctl = hw_read_20kx(hw, I2SCTL);
+	blk->spoctl = hw_read_20kx(hw, SPOCTL);
+	blk->spictl = hw_read_20kx(hw, SPICTL);
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int daio_mgr_put_ctrl_blk(void *blk)
+{
+	kfree((struct daio_mgr_ctrl_blk *)blk);
+
+	return 0;
+}
+
+/* Timer interrupt */
+static int set_timer_irq(struct hw *hw, int enable)
+{
+	hw_write_20kx(hw, GIE, enable ? IT_INT : 0);
+	return 0;
+}
+
+static int set_timer_tick(struct hw *hw, unsigned int ticks)
+{
+	if (ticks)
+		ticks |= TIMR_IE | TIMR_IP;
+	hw_write_20kx(hw, TIMR, ticks);
+	return 0;
+}
+
+static unsigned int get_wc(struct hw *hw)
+{
+	return hw_read_20kx(hw, WC);
+}
+
+/* Card hardware initialization block */
+struct dac_conf {
+	unsigned int msr; /* master sample rate in rsrs */
+};
+
+struct adc_conf {
+	unsigned int msr; 	/* master sample rate in rsrs */
+	unsigned char input; 	/* the input source of ADC */
+	unsigned char mic20db; 	/* boost mic by 20db if input is microphone */
+};
+
+struct daio_conf {
+	unsigned int msr; /* master sample rate in rsrs */
+};
+
+struct trn_conf {
+	unsigned long vm_pgt_phys;
+};
+
+static int hw_daio_init(struct hw *hw, const struct daio_conf *info)
+{
+	u32 i2sorg;
+	u32 spdorg;
+
+	/* Read I2S CTL.  Keep original value. */
+	/*i2sorg = hw_read_20kx(hw, I2SCTL);*/
+	i2sorg = 0x94040404; /* enable all audio out and I2S-D input */
+	/* Program I2S with proper master sample rate and enable
+	 * the correct I2S channel. */
+	i2sorg &= 0xfffffffc;
+
+	/* Enable S/PDIF-out-A in fixed 24-bit data
+	 * format and default to 48kHz. */
+	/* Disable all before doing any changes. */
+	hw_write_20kx(hw, SPOCTL, 0x0);
+	spdorg = 0x05;
+
+	switch (info->msr) {
+	case 1:
+		i2sorg |= 1;
+		spdorg |= (0x0 << 6);
+		break;
+	case 2:
+		i2sorg |= 2;
+		spdorg |= (0x1 << 6);
+		break;
+	case 4:
+		i2sorg |= 3;
+		spdorg |= (0x2 << 6);
+		break;
+	default:
+		i2sorg |= 1;
+		break;
+	}
+
+	hw_write_20kx(hw, I2SCTL, i2sorg);
+	hw_write_20kx(hw, SPOCTL, spdorg);
+
+	/* Enable S/PDIF-in-A in fixed 24-bit data format. */
+	/* Disable all before doing any changes. */
+	hw_write_20kx(hw, SPICTL, 0x0);
+	mdelay(1);
+	spdorg = 0x0a0a0a0a;
+	hw_write_20kx(hw, SPICTL, spdorg);
+	mdelay(1);
+
+	return 0;
+}
+
+/* TRANSPORT operations */
+static int hw_trn_init(struct hw *hw, const struct trn_conf *info)
+{
+	u32 trnctl;
+	u32 ptp_phys_low, ptp_phys_high;
+
+	/* Set up device page table */
+	if ((~0UL) == info->vm_pgt_phys) {
+		printk(KERN_ERR "Wrong device page table page address!\n");
+		return -1;
+	}
+
+	trnctl = 0x13;  /* 32-bit, 4k-size page */
+	ptp_phys_low = (u32)info->vm_pgt_phys;
+	ptp_phys_high = upper_32_bits(info->vm_pgt_phys);
+	if (sizeof(void *) == 8) /* 64bit address */
+		trnctl |= (1 << 2);
+#if 0 /* Only 4k h/w pages for simplicitiy */
+#if PAGE_SIZE == 8192
+	trnctl |= (1<<5);
+#endif
+#endif
+	hw_write_20kx(hw, PTPALX, ptp_phys_low);
+	hw_write_20kx(hw, PTPAHX, ptp_phys_high);
+	hw_write_20kx(hw, TRNCTL, trnctl);
+	hw_write_20kx(hw, TRNIS, 0x200c01); /* realy needed? */
+
+	return 0;
+}
+
+/* Card initialization */
+#define GCTL_EAC	0x00000001
+#define GCTL_EAI	0x00000002
+#define GCTL_BEP	0x00000004
+#define GCTL_BES	0x00000008
+#define GCTL_DSP	0x00000010
+#define GCTL_DBP	0x00000020
+#define GCTL_ABP	0x00000040
+#define GCTL_TBP	0x00000080
+#define GCTL_SBP	0x00000100
+#define GCTL_FBP	0x00000200
+#define GCTL_XA		0x00000400
+#define GCTL_ET		0x00000800
+#define GCTL_PR		0x00001000
+#define GCTL_MRL	0x00002000
+#define GCTL_SDE	0x00004000
+#define GCTL_SDI	0x00008000
+#define GCTL_SM		0x00010000
+#define GCTL_SR		0x00020000
+#define GCTL_SD		0x00040000
+#define GCTL_SE		0x00080000
+#define GCTL_AID	0x00100000
+
+static int hw_pll_init(struct hw *hw, unsigned int rsr)
+{
+	unsigned int pllctl;
+	int i;
+
+	pllctl = (48000 == rsr) ? 0x1480a001 : 0x1480a731;
+	for (i = 0; i < 3; i++) {
+		if (hw_read_20kx(hw, PLLCTL) == pllctl)
+			break;
+
+		hw_write_20kx(hw, PLLCTL, pllctl);
+		mdelay(40);
+	}
+	if (i >= 3) {
+		printk(KERN_ALERT "PLL initialization failed!!!\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int hw_auto_init(struct hw *hw)
+{
+	unsigned int gctl;
+	int i;
+
+	gctl = hw_read_20kx(hw, GCTL);
+	set_field(&gctl, GCTL_EAI, 0);
+	hw_write_20kx(hw, GCTL, gctl);
+	set_field(&gctl, GCTL_EAI, 1);
+	hw_write_20kx(hw, GCTL, gctl);
+	mdelay(10);
+	for (i = 0; i < 400000; i++) {
+		gctl = hw_read_20kx(hw, GCTL);
+		if (get_field(gctl, GCTL_AID))
+			break;
+	}
+	if (!get_field(gctl, GCTL_AID)) {
+		printk(KERN_ALERT "Card Auto-init failed!!!\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int i2c_unlock(struct hw *hw)
+{
+	if ((hw_read_pci(hw, 0xcc) & 0xff) == 0xaa)
+		return 0;
+
+	hw_write_pci(hw, 0xcc, 0x8c);
+	hw_write_pci(hw, 0xcc, 0x0e);
+	if ((hw_read_pci(hw, 0xcc) & 0xff) == 0xaa)
+		return 0;
+
+	hw_write_pci(hw, 0xcc, 0xee);
+	hw_write_pci(hw, 0xcc, 0xaa);
+	if ((hw_read_pci(hw, 0xcc) & 0xff) == 0xaa)
+		return 0;
+
+	return -1;
+}
+
+static void i2c_lock(struct hw *hw)
+{
+	if ((hw_read_pci(hw, 0xcc) & 0xff) == 0xaa)
+		hw_write_pci(hw, 0xcc, 0x00);
+}
+
+static void i2c_write(struct hw *hw, u32 device, u32 addr, u32 data)
+{
+	unsigned int ret;
+
+	do {
+		ret = hw_read_pci(hw, 0xEC);
+	} while (!(ret & 0x800000));
+	hw_write_pci(hw, 0xE0, device);
+	hw_write_pci(hw, 0xE4, (data << 8) | (addr & 0xff));
+}
+
+/* DAC operations */
+
+static int hw_reset_dac(struct hw *hw)
+{
+	u32 i;
+	u16 gpioorg;
+	unsigned int ret;
+
+	if (i2c_unlock(hw))
+		return -1;
+
+	do {
+		ret = hw_read_pci(hw, 0xEC);
+	} while (!(ret & 0x800000));
+	hw_write_pci(hw, 0xEC, 0x05);  /* write to i2c status control */
+
+	/* To be effective, need to reset the DAC twice. */
+	for (i = 0; i < 2;  i++) {
+		/* set gpio */
+		mdelay(100);
+		gpioorg = (u16)hw_read_20kx(hw, GPIO);
+		gpioorg &= 0xfffd;
+		hw_write_20kx(hw, GPIO, gpioorg);
+		mdelay(1);
+		hw_write_20kx(hw, GPIO, gpioorg | 0x2);
+	}
+
+	i2c_write(hw, 0x00180080, 0x01, 0x80);
+	i2c_write(hw, 0x00180080, 0x02, 0x10);
+
+	i2c_lock(hw);
+
+	return 0;
+}
+
+static int hw_dac_init(struct hw *hw, const struct dac_conf *info)
+{
+	u32 data;
+	u16 gpioorg;
+	unsigned int ret;
+
+	if (hw->model == CTSB055X) {
+		/* SB055x, unmute outputs */
+		gpioorg = (u16)hw_read_20kx(hw, GPIO);
+		gpioorg &= 0xffbf;	/* set GPIO6 to low */
+		gpioorg |= 2;		/* set GPIO1 to high */
+		hw_write_20kx(hw, GPIO, gpioorg);
+		return 0;
+	}
+
+	/* mute outputs */
+	gpioorg = (u16)hw_read_20kx(hw, GPIO);
+	gpioorg &= 0xffbf;
+	hw_write_20kx(hw, GPIO, gpioorg);
+
+	hw_reset_dac(hw);
+
+	if (i2c_unlock(hw))
+		return -1;
+
+	hw_write_pci(hw, 0xEC, 0x05);  /* write to i2c status control */
+	do {
+		ret = hw_read_pci(hw, 0xEC);
+	} while (!(ret & 0x800000));
+
+	switch (info->msr) {
+	case 1:
+		data = 0x24;
+		break;
+	case 2:
+		data = 0x25;
+		break;
+	case 4:
+		data = 0x26;
+		break;
+	default:
+		data = 0x24;
+		break;
+	}
+
+	i2c_write(hw, 0x00180080, 0x06, data);
+	i2c_write(hw, 0x00180080, 0x09, data);
+	i2c_write(hw, 0x00180080, 0x0c, data);
+	i2c_write(hw, 0x00180080, 0x0f, data);
+
+	i2c_lock(hw);
+
+	/* unmute outputs */
+	gpioorg = (u16)hw_read_20kx(hw, GPIO);
+	gpioorg = gpioorg | 0x40;
+	hw_write_20kx(hw, GPIO, gpioorg);
+
+	return 0;
+}
+
+/* ADC operations */
+
+static int is_adc_input_selected_SB055x(struct hw *hw, enum ADCSRC type)
+{
+	return 0;
+}
+
+static int is_adc_input_selected_SBx(struct hw *hw, enum ADCSRC type)
+{
+	u32 data;
+
+	data = hw_read_20kx(hw, GPIO);
+	switch (type) {
+	case ADC_MICIN:
+		data = ((data & (0x1<<7)) && (data & (0x1<<8)));
+		break;
+	case ADC_LINEIN:
+		data = (!(data & (0x1<<7)) && (data & (0x1<<8)));
+		break;
+	case ADC_NONE: /* Digital I/O */
+		data = (!(data & (0x1<<8)));
+		break;
+	default:
+		data = 0;
+	}
+	return data;
+}
+
+static int is_adc_input_selected_hendrix(struct hw *hw, enum ADCSRC type)
+{
+	u32 data;
+
+	data = hw_read_20kx(hw, GPIO);
+	switch (type) {
+	case ADC_MICIN:
+		data = (data & (0x1 << 7)) ? 1 : 0;
+		break;
+	case ADC_LINEIN:
+		data = (data & (0x1 << 7)) ? 0 : 1;
+		break;
+	default:
+		data = 0;
+	}
+	return data;
+}
+
+static int hw_is_adc_input_selected(struct hw *hw, enum ADCSRC type)
+{
+	switch (hw->model) {
+	case CTSB055X:
+		return is_adc_input_selected_SB055x(hw, type);
+	case CTSB073X:
+		return is_adc_input_selected_hendrix(hw, type);
+	case CTUAA:
+		return is_adc_input_selected_hendrix(hw, type);
+	default:
+		return is_adc_input_selected_SBx(hw, type);
+	}
+}
+
+static int
+adc_input_select_SB055x(struct hw *hw, enum ADCSRC type, unsigned char boost)
+{
+	u32 data;
+
+	/*
+	 * check and set the following GPIO bits accordingly
+	 * ADC_Gain		= GPIO2
+	 * DRM_off		= GPIO3
+	 * Mic_Pwr_on		= GPIO7
+	 * Digital_IO_Sel	= GPIO8
+	 * Mic_Sw		= GPIO9
+	 * Aux/MicLine_Sw	= GPIO12
+	 */
+	data = hw_read_20kx(hw, GPIO);
+	data &= 0xec73;
+	switch (type) {
+	case ADC_MICIN:
+		data |= (0x1<<7) | (0x1<<8) | (0x1<<9) ;
+		data |= boost ? (0x1<<2) : 0;
+		break;
+	case ADC_LINEIN:
+		data |= (0x1<<8);
+		break;
+	case ADC_AUX:
+		data |= (0x1<<8) | (0x1<<12);
+		break;
+	case ADC_NONE:
+		data |= (0x1<<12);  /* set to digital */
+		break;
+	default:
+		return -1;
+	}
+
+	hw_write_20kx(hw, GPIO, data);
+
+	return 0;
+}
+
+
+static int
+adc_input_select_SBx(struct hw *hw, enum ADCSRC type, unsigned char boost)
+{
+	u32 data;
+	u32 i2c_data;
+	unsigned int ret;
+
+	if (i2c_unlock(hw))
+		return -1;
+
+	do {
+		ret = hw_read_pci(hw, 0xEC);
+	} while (!(ret & 0x800000)); /* i2c ready poll */
+	/* set i2c access mode as Direct Control */
+	hw_write_pci(hw, 0xEC, 0x05);
+
+	data = hw_read_20kx(hw, GPIO);
+	switch (type) {
+	case ADC_MICIN:
+		data |= ((0x1 << 7) | (0x1 << 8));
+		i2c_data = 0x1;  /* Mic-in */
+		break;
+	case ADC_LINEIN:
+		data &= ~(0x1 << 7);
+		data |= (0x1 << 8);
+		i2c_data = 0x2; /* Line-in */
+		break;
+	case ADC_NONE:
+		data &= ~(0x1 << 8);
+		i2c_data = 0x0; /* set to Digital */
+		break;
+	default:
+		i2c_lock(hw);
+		return -1;
+	}
+	hw_write_20kx(hw, GPIO, data);
+	i2c_write(hw, 0x001a0080, 0x2a, i2c_data);
+	if (boost) {
+		i2c_write(hw, 0x001a0080, 0x1c, 0xe7); /* +12dB boost */
+		i2c_write(hw, 0x001a0080, 0x1e, 0xe7); /* +12dB boost */
+	} else {
+		i2c_write(hw, 0x001a0080, 0x1c, 0xcf); /* No boost */
+		i2c_write(hw, 0x001a0080, 0x1e, 0xcf); /* No boost */
+	}
+
+	i2c_lock(hw);
+
+	return 0;
+}
+
+static int
+adc_input_select_hendrix(struct hw *hw, enum ADCSRC type, unsigned char boost)
+{
+	u32 data;
+	u32 i2c_data;
+	unsigned int ret;
+
+	if (i2c_unlock(hw))
+		return -1;
+
+	do {
+		ret = hw_read_pci(hw, 0xEC);
+	} while (!(ret & 0x800000)); /* i2c ready poll */
+	/* set i2c access mode as Direct Control */
+	hw_write_pci(hw, 0xEC, 0x05);
+
+	data = hw_read_20kx(hw, GPIO);
+	switch (type) {
+	case ADC_MICIN:
+		data |= (0x1 << 7);
+		i2c_data = 0x1;  /* Mic-in */
+		break;
+	case ADC_LINEIN:
+		data &= ~(0x1 << 7);
+		i2c_data = 0x2; /* Line-in */
+		break;
+	default:
+		i2c_lock(hw);
+		return -1;
+	}
+	hw_write_20kx(hw, GPIO, data);
+	i2c_write(hw, 0x001a0080, 0x2a, i2c_data);
+	if (boost) {
+		i2c_write(hw, 0x001a0080, 0x1c, 0xe7); /* +12dB boost */
+		i2c_write(hw, 0x001a0080, 0x1e, 0xe7); /* +12dB boost */
+	} else {
+		i2c_write(hw, 0x001a0080, 0x1c, 0xcf); /* No boost */
+		i2c_write(hw, 0x001a0080, 0x1e, 0xcf); /* No boost */
+	}
+
+	i2c_lock(hw);
+
+	return 0;
+}
+
+static int hw_adc_input_select(struct hw *hw, enum ADCSRC type)
+{
+	int state = type == ADC_MICIN;
+
+	switch (hw->model) {
+	case CTSB055X:
+		return adc_input_select_SB055x(hw, type, state);
+	case CTSB073X:
+		return adc_input_select_hendrix(hw, type, state);
+	case CTUAA:
+		return adc_input_select_hendrix(hw, type, state);
+	default:
+		return adc_input_select_SBx(hw, type, state);
+	}
+}
+
+static int adc_init_SB055x(struct hw *hw, int input, int mic20db)
+{
+	return adc_input_select_SB055x(hw, input, mic20db);
+}
+
+static int adc_init_SBx(struct hw *hw, int input, int mic20db)
+{
+	u16 gpioorg;
+	u16 input_source;
+	u32 adcdata;
+	unsigned int ret;
+
+	input_source = 0x100;  /* default to analog */
+	switch (input) {
+	case ADC_MICIN:
+		adcdata = 0x1;
+		input_source = 0x180;  /* set GPIO7 to select Mic */
+		break;
+	case ADC_LINEIN:
+		adcdata = 0x2;
+		break;
+	case ADC_VIDEO:
+		adcdata = 0x4;
+		break;
+	case ADC_AUX:
+		adcdata = 0x8;
+		break;
+	case ADC_NONE:
+		adcdata = 0x0;
+		input_source = 0x0;  /* set to Digital */
+		break;
+	default:
+		adcdata = 0x0;
+		break;
+	}
+
+	if (i2c_unlock(hw))
+		return -1;
+
+	do {
+		ret = hw_read_pci(hw, 0xEC);
+	} while (!(ret & 0x800000)); /* i2c ready poll */
+	hw_write_pci(hw, 0xEC, 0x05);  /* write to i2c status control */
+
+	i2c_write(hw, 0x001a0080, 0x0e, 0x08);
+	i2c_write(hw, 0x001a0080, 0x18, 0x0a);
+	i2c_write(hw, 0x001a0080, 0x28, 0x86);
+	i2c_write(hw, 0x001a0080, 0x2a, adcdata);
+
+	if (mic20db) {
+		i2c_write(hw, 0x001a0080, 0x1c, 0xf7);
+		i2c_write(hw, 0x001a0080, 0x1e, 0xf7);
+	} else {
+		i2c_write(hw, 0x001a0080, 0x1c, 0xcf);
+		i2c_write(hw, 0x001a0080, 0x1e, 0xcf);
+	}
+
+	if (!(hw_read_20kx(hw, ID0) & 0x100))
+		i2c_write(hw, 0x001a0080, 0x16, 0x26);
+
+	i2c_lock(hw);
+
+	gpioorg = (u16)hw_read_20kx(hw,  GPIO);
+	gpioorg &= 0xfe7f;
+	gpioorg |= input_source;
+	hw_write_20kx(hw, GPIO, gpioorg);
+
+	return 0;
+}
+
+static int hw_adc_init(struct hw *hw, const struct adc_conf *info)
+{
+	if (hw->model == CTSB055X)
+		return adc_init_SB055x(hw, info->input, info->mic20db);
+	else
+		return adc_init_SBx(hw, info->input, info->mic20db);
+}
+
+static int hw_have_digit_io_switch(struct hw *hw)
+{
+	/* SB073x and Vista compatible cards have no digit IO switch */
+	return !(hw->model == CTSB073X || hw->model == CTUAA);
+}
+
+#define CTLBITS(a, b, c, d)	(((a) << 24) | ((b) << 16) | ((c) << 8) | (d))
+
+#define UAA_CFG_PWRSTATUS	0x44
+#define UAA_CFG_SPACE_FLAG	0xA0
+#define UAA_CORE_CHANGE		0x3FFC
+static int uaa_to_xfi(struct pci_dev *pci)
+{
+	unsigned int bar0, bar1, bar2, bar3, bar4, bar5;
+	unsigned int cmd, irq, cl_size, l_timer, pwr;
+	unsigned int is_uaa;
+	unsigned int data[4] = {0};
+	unsigned int io_base;
+	void *mem_base;
+	int i;
+	const u32 CTLX = CTLBITS('C', 'T', 'L', 'X');
+	const u32 CTL_ = CTLBITS('C', 'T', 'L', '-');
+	const u32 CTLF = CTLBITS('C', 'T', 'L', 'F');
+	const u32 CTLi = CTLBITS('C', 'T', 'L', 'i');
+	const u32 CTLA = CTLBITS('C', 'T', 'L', 'A');
+	const u32 CTLZ = CTLBITS('C', 'T', 'L', 'Z');
+	const u32 CTLL = CTLBITS('C', 'T', 'L', 'L');
+
+	/* By default, Hendrix card UAA Bar0 should be using memory... */
+	io_base = pci_resource_start(pci, 0);
+	mem_base = ioremap(io_base, pci_resource_len(pci, 0));
+	if (NULL == mem_base)
+		return -ENOENT;
+
+	/* Read current mode from Mode Change Register */
+	for (i = 0; i < 4; i++)
+		data[i] = readl(mem_base + UAA_CORE_CHANGE);
+
+	/* Determine current mode... */
+	if (data[0] == CTLA) {
+		is_uaa = ((data[1] == CTLZ && data[2] == CTLL
+			  && data[3] == CTLA) || (data[1] == CTLA
+			  && data[2] == CTLZ && data[3] == CTLL));
+	} else if (data[0] == CTLZ) {
+		is_uaa = (data[1] == CTLL
+				&& data[2] == CTLA && data[3] == CTLA);
+	} else if (data[0] == CTLL) {
+		is_uaa = (data[1] == CTLA
+				&& data[2] == CTLA && data[3] == CTLZ);
+	} else {
+		is_uaa = 0;
+	}
+
+	if (!is_uaa) {
+		/* Not in UAA mode currently. Return directly. */
+		iounmap(mem_base);
+		return 0;
+	}
+
+	pci_read_config_dword(pci, PCI_BASE_ADDRESS_0, &bar0);
+	pci_read_config_dword(pci, PCI_BASE_ADDRESS_1, &bar1);
+	pci_read_config_dword(pci, PCI_BASE_ADDRESS_2, &bar2);
+	pci_read_config_dword(pci, PCI_BASE_ADDRESS_3, &bar3);
+	pci_read_config_dword(pci, PCI_BASE_ADDRESS_4, &bar4);
+	pci_read_config_dword(pci, PCI_BASE_ADDRESS_5, &bar5);
+	pci_read_config_dword(pci, PCI_INTERRUPT_LINE, &irq);
+	pci_read_config_dword(pci, PCI_CACHE_LINE_SIZE, &cl_size);
+	pci_read_config_dword(pci, PCI_LATENCY_TIMER, &l_timer);
+	pci_read_config_dword(pci, UAA_CFG_PWRSTATUS, &pwr);
+	pci_read_config_dword(pci, PCI_COMMAND, &cmd);
+
+	/* Set up X-Fi core PCI configuration space. */
+	/* Switch to X-Fi config space with BAR0 exposed. */
+	pci_write_config_dword(pci, UAA_CFG_SPACE_FLAG, 0x87654321);
+	/* Copy UAA's BAR5 into X-Fi BAR0 */
+	pci_write_config_dword(pci, PCI_BASE_ADDRESS_0, bar5);
+	/* Switch to X-Fi config space without BAR0 exposed. */
+	pci_write_config_dword(pci, UAA_CFG_SPACE_FLAG, 0x12345678);
+	pci_write_config_dword(pci, PCI_BASE_ADDRESS_1, bar1);
+	pci_write_config_dword(pci, PCI_BASE_ADDRESS_2, bar2);
+	pci_write_config_dword(pci, PCI_BASE_ADDRESS_3, bar3);
+	pci_write_config_dword(pci, PCI_BASE_ADDRESS_4, bar4);
+	pci_write_config_dword(pci, PCI_INTERRUPT_LINE, irq);
+	pci_write_config_dword(pci, PCI_CACHE_LINE_SIZE, cl_size);
+	pci_write_config_dword(pci, PCI_LATENCY_TIMER, l_timer);
+	pci_write_config_dword(pci, UAA_CFG_PWRSTATUS, pwr);
+	pci_write_config_dword(pci, PCI_COMMAND, cmd);
+
+	/* Switch to X-Fi mode */
+	writel(CTLX, (mem_base + UAA_CORE_CHANGE));
+	writel(CTL_, (mem_base + UAA_CORE_CHANGE));
+	writel(CTLF, (mem_base + UAA_CORE_CHANGE));
+	writel(CTLi, (mem_base + UAA_CORE_CHANGE));
+
+	iounmap(mem_base);
+
+	return 0;
+}
+
+static irqreturn_t ct_20k1_interrupt(int irq, void *dev_id)
+{
+	struct hw *hw = dev_id;
+	unsigned int status;
+
+	status = hw_read_20kx(hw, GIP);
+	if (!status)
+		return IRQ_NONE;
+
+	if (hw->irq_callback)
+		hw->irq_callback(hw->irq_callback_data, status);
+
+	hw_write_20kx(hw, GIP, status);
+	return IRQ_HANDLED;
+}
+
+static int hw_card_start(struct hw *hw)
+{
+	int err;
+	struct pci_dev *pci = hw->pci;
+
+	err = pci_enable_device(pci);
+	if (err < 0)
+		return err;
+
+	/* Set DMA transfer mask */
+	if (pci_set_dma_mask(pci, CT_XFI_DMA_MASK) < 0 ||
+	    pci_set_consistent_dma_mask(pci, CT_XFI_DMA_MASK) < 0) {
+		printk(KERN_ERR "architecture does not support PCI "
+				"busmaster DMA with mask 0x%llx\n",
+		       CT_XFI_DMA_MASK);
+		err = -ENXIO;
+		goto error1;
+	}
+
+	err = pci_request_regions(pci, "XFi");
+	if (err < 0)
+		goto error1;
+
+	/* Switch to X-Fi mode from UAA mode if neeeded */
+	if (hw->model == CTUAA) {
+		err = uaa_to_xfi(pci);
+		if (err)
+			goto error2;
+
+		hw->io_base = pci_resource_start(pci, 5);
+	} else {
+		hw->io_base = pci_resource_start(pci, 0);
+	}
+
+	err = request_irq(pci->irq, ct_20k1_interrupt, IRQF_SHARED,
+			  "ctxfi", hw);
+	if (err < 0) {
+		printk(KERN_ERR "XFi: Cannot get irq %d\n", pci->irq);
+		goto error2;
+	}
+	hw->irq = pci->irq;
+
+	pci_set_master(pci);
+
+	return 0;
+
+error2:
+	pci_release_regions(pci);
+	hw->io_base = 0;
+error1:
+	pci_disable_device(pci);
+	return err;
+}
+
+static int hw_card_stop(struct hw *hw)
+{
+	/* TODO: Disable interrupt and so on... */
+	if (hw->irq >= 0)
+		synchronize_irq(hw->irq);
+	return 0;
+}
+
+static int hw_card_shutdown(struct hw *hw)
+{
+	if (hw->irq >= 0)
+		free_irq(hw->irq, hw);
+
+	hw->irq	= -1;
+
+	if (NULL != ((void *)hw->mem_base))
+		iounmap((void *)hw->mem_base);
+
+	hw->mem_base = (unsigned long)NULL;
+
+	if (hw->io_base)
+		pci_release_regions(hw->pci);
+
+	hw->io_base = 0;
+
+	pci_disable_device(hw->pci);
+
+	return 0;
+}
+
+static int hw_card_init(struct hw *hw, struct card_conf *info)
+{
+	int err;
+	unsigned int gctl;
+	u32 data;
+	struct dac_conf dac_info = {0};
+	struct adc_conf adc_info = {0};
+	struct daio_conf daio_info = {0};
+	struct trn_conf trn_info = {0};
+
+	/* Get PCI io port base address and do Hendrix switch if needed. */
+	if (!hw->io_base) {
+		err = hw_card_start(hw);
+		if (err)
+			return err;
+	}
+
+	/* PLL init */
+	err = hw_pll_init(hw, info->rsr);
+	if (err < 0)
+		return err;
+
+	/* kick off auto-init */
+	err = hw_auto_init(hw);
+	if (err < 0)
+		return err;
+
+	/* Enable audio ring */
+	gctl = hw_read_20kx(hw, GCTL);
+	set_field(&gctl, GCTL_EAC, 1);
+	set_field(&gctl, GCTL_DBP, 1);
+	set_field(&gctl, GCTL_TBP, 1);
+	set_field(&gctl, GCTL_FBP, 1);
+	set_field(&gctl, GCTL_ET, 1);
+	hw_write_20kx(hw, GCTL, gctl);
+	mdelay(10);
+
+	/* Reset all global pending interrupts */
+	hw_write_20kx(hw, GIE, 0);
+	/* Reset all SRC pending interrupts */
+	hw_write_20kx(hw, SRCIP, 0);
+	mdelay(30);
+
+	/* Detect the card ID and configure GPIO accordingly. */
+	switch (hw->model) {
+	case CTSB055X:
+		hw_write_20kx(hw, GPIOCTL, 0x13fe);
+		break;
+	case CTSB073X:
+		hw_write_20kx(hw, GPIOCTL, 0x00e6);
+		break;
+	case CTUAA:
+		hw_write_20kx(hw, GPIOCTL, 0x00c2);
+		break;
+	default:
+		hw_write_20kx(hw, GPIOCTL, 0x01e6);
+		break;
+	}
+
+	trn_info.vm_pgt_phys = info->vm_pgt_phys;
+	err = hw_trn_init(hw, &trn_info);
+	if (err < 0)
+		return err;
+
+	daio_info.msr = info->msr;
+	err = hw_daio_init(hw, &daio_info);
+	if (err < 0)
+		return err;
+
+	dac_info.msr = info->msr;
+	err = hw_dac_init(hw, &dac_info);
+	if (err < 0)
+		return err;
+
+	adc_info.msr = info->msr;
+	adc_info.input = ADC_LINEIN;
+	adc_info.mic20db = 0;
+	err = hw_adc_init(hw, &adc_info);
+	if (err < 0)
+		return err;
+
+	data = hw_read_20kx(hw, SRCMCTL);
+	data |= 0x1; /* Enables input from the audio ring */
+	hw_write_20kx(hw, SRCMCTL, data);
+
+	return 0;
+}
+
+static u32 hw_read_20kx(struct hw *hw, u32 reg)
+{
+	u32 value;
+	unsigned long flags;
+
+	spin_lock_irqsave(
+		&container_of(hw, struct hw20k1, hw)->reg_20k1_lock, flags);
+	outl(reg, hw->io_base + 0x0);
+	value = inl(hw->io_base + 0x4);
+	spin_unlock_irqrestore(
+		&container_of(hw, struct hw20k1, hw)->reg_20k1_lock, flags);
+
+	return value;
+}
+
+static void hw_write_20kx(struct hw *hw, u32 reg, u32 data)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(
+		&container_of(hw, struct hw20k1, hw)->reg_20k1_lock, flags);
+	outl(reg, hw->io_base + 0x0);
+	outl(data, hw->io_base + 0x4);
+	spin_unlock_irqrestore(
+		&container_of(hw, struct hw20k1, hw)->reg_20k1_lock, flags);
+
+}
+
+static u32 hw_read_pci(struct hw *hw, u32 reg)
+{
+	u32 value;
+	unsigned long flags;
+
+	spin_lock_irqsave(
+		&container_of(hw, struct hw20k1, hw)->reg_pci_lock, flags);
+	outl(reg, hw->io_base + 0x10);
+	value = inl(hw->io_base + 0x14);
+	spin_unlock_irqrestore(
+		&container_of(hw, struct hw20k1, hw)->reg_pci_lock, flags);
+
+	return value;
+}
+
+static void hw_write_pci(struct hw *hw, u32 reg, u32 data)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(
+		&container_of(hw, struct hw20k1, hw)->reg_pci_lock, flags);
+	outl(reg, hw->io_base + 0x10);
+	outl(data, hw->io_base + 0x14);
+	spin_unlock_irqrestore(
+		&container_of(hw, struct hw20k1, hw)->reg_pci_lock, flags);
+}
+
+static struct hw ct20k1_preset __devinitdata = {
+	.irq = -1,
+
+	.card_init = hw_card_init,
+	.card_stop = hw_card_stop,
+	.pll_init = hw_pll_init,
+	.is_adc_source_selected = hw_is_adc_input_selected,
+	.select_adc_source = hw_adc_input_select,
+	.have_digit_io_switch = hw_have_digit_io_switch,
+
+	.src_rsc_get_ctrl_blk = src_get_rsc_ctrl_blk,
+	.src_rsc_put_ctrl_blk = src_put_rsc_ctrl_blk,
+	.src_mgr_get_ctrl_blk = src_mgr_get_ctrl_blk,
+	.src_mgr_put_ctrl_blk = src_mgr_put_ctrl_blk,
+	.src_set_state = src_set_state,
+	.src_set_bm = src_set_bm,
+	.src_set_rsr = src_set_rsr,
+	.src_set_sf = src_set_sf,
+	.src_set_wr = src_set_wr,
+	.src_set_pm = src_set_pm,
+	.src_set_rom = src_set_rom,
+	.src_set_vo = src_set_vo,
+	.src_set_st = src_set_st,
+	.src_set_ie = src_set_ie,
+	.src_set_ilsz = src_set_ilsz,
+	.src_set_bp = src_set_bp,
+	.src_set_cisz = src_set_cisz,
+	.src_set_ca = src_set_ca,
+	.src_set_sa = src_set_sa,
+	.src_set_la = src_set_la,
+	.src_set_pitch = src_set_pitch,
+	.src_set_dirty = src_set_dirty,
+	.src_set_clear_zbufs = src_set_clear_zbufs,
+	.src_set_dirty_all = src_set_dirty_all,
+	.src_commit_write = src_commit_write,
+	.src_get_ca = src_get_ca,
+	.src_get_dirty = src_get_dirty,
+	.src_dirty_conj_mask = src_dirty_conj_mask,
+	.src_mgr_enbs_src = src_mgr_enbs_src,
+	.src_mgr_enb_src = src_mgr_enb_src,
+	.src_mgr_dsb_src = src_mgr_dsb_src,
+	.src_mgr_commit_write = src_mgr_commit_write,
+
+	.srcimp_mgr_get_ctrl_blk = srcimp_mgr_get_ctrl_blk,
+	.srcimp_mgr_put_ctrl_blk = srcimp_mgr_put_ctrl_blk,
+	.srcimp_mgr_set_imaparc = srcimp_mgr_set_imaparc,
+	.srcimp_mgr_set_imapuser = srcimp_mgr_set_imapuser,
+	.srcimp_mgr_set_imapnxt = srcimp_mgr_set_imapnxt,
+	.srcimp_mgr_set_imapaddr = srcimp_mgr_set_imapaddr,
+	.srcimp_mgr_commit_write = srcimp_mgr_commit_write,
+
+	.amixer_rsc_get_ctrl_blk = amixer_rsc_get_ctrl_blk,
+	.amixer_rsc_put_ctrl_blk = amixer_rsc_put_ctrl_blk,
+	.amixer_mgr_get_ctrl_blk = amixer_mgr_get_ctrl_blk,
+	.amixer_mgr_put_ctrl_blk = amixer_mgr_put_ctrl_blk,
+	.amixer_set_mode = amixer_set_mode,
+	.amixer_set_iv = amixer_set_iv,
+	.amixer_set_x = amixer_set_x,
+	.amixer_set_y = amixer_set_y,
+	.amixer_set_sadr = amixer_set_sadr,
+	.amixer_set_se = amixer_set_se,
+	.amixer_set_dirty = amixer_set_dirty,
+	.amixer_set_dirty_all = amixer_set_dirty_all,
+	.amixer_commit_write = amixer_commit_write,
+	.amixer_get_y = amixer_get_y,
+	.amixer_get_dirty = amixer_get_dirty,
+
+	.dai_get_ctrl_blk = dai_get_ctrl_blk,
+	.dai_put_ctrl_blk = dai_put_ctrl_blk,
+	.dai_srt_set_srco = dai_srt_set_srcr,
+	.dai_srt_set_srcm = dai_srt_set_srcl,
+	.dai_srt_set_rsr = dai_srt_set_rsr,
+	.dai_srt_set_drat = dai_srt_set_drat,
+	.dai_srt_set_ec = dai_srt_set_ec,
+	.dai_srt_set_et = dai_srt_set_et,
+	.dai_commit_write = dai_commit_write,
+
+	.dao_get_ctrl_blk = dao_get_ctrl_blk,
+	.dao_put_ctrl_blk = dao_put_ctrl_blk,
+	.dao_set_spos = dao_set_spos,
+	.dao_commit_write = dao_commit_write,
+	.dao_get_spos = dao_get_spos,
+
+	.daio_mgr_get_ctrl_blk = daio_mgr_get_ctrl_blk,
+	.daio_mgr_put_ctrl_blk = daio_mgr_put_ctrl_blk,
+	.daio_mgr_enb_dai = daio_mgr_enb_dai,
+	.daio_mgr_dsb_dai = daio_mgr_dsb_dai,
+	.daio_mgr_enb_dao = daio_mgr_enb_dao,
+	.daio_mgr_dsb_dao = daio_mgr_dsb_dao,
+	.daio_mgr_dao_init = daio_mgr_dao_init,
+	.daio_mgr_set_imaparc = daio_mgr_set_imaparc,
+	.daio_mgr_set_imapnxt = daio_mgr_set_imapnxt,
+	.daio_mgr_set_imapaddr = daio_mgr_set_imapaddr,
+	.daio_mgr_commit_write = daio_mgr_commit_write,
+
+	.set_timer_irq = set_timer_irq,
+	.set_timer_tick = set_timer_tick,
+	.get_wc = get_wc,
+};
+
+int __devinit create_20k1_hw_obj(struct hw **rhw)
+{
+	struct hw20k1 *hw20k1;
+
+	*rhw = NULL;
+	hw20k1 = kzalloc(sizeof(*hw20k1), GFP_KERNEL);
+	if (NULL == hw20k1)
+		return -ENOMEM;
+
+	spin_lock_init(&hw20k1->reg_20k1_lock);
+	spin_lock_init(&hw20k1->reg_pci_lock);
+
+	hw20k1->hw = ct20k1_preset;
+
+	*rhw = &hw20k1->hw;
+
+	return 0;
+}
+
+int destroy_20k1_hw_obj(struct hw *hw)
+{
+	if (hw->io_base)
+		hw_card_shutdown(hw);
+
+	kfree(container_of(hw, struct hw20k1, hw));
+	return 0;
+}
diff --git a/sound/pci/ctxfi/cthw20k1.h b/sound/pci/ctxfi/cthw20k1.h
new file mode 100644
index 0000000..02f72fb
--- /dev/null
+++ b/sound/pci/ctxfi/cthw20k1.h
@@ -0,0 +1,26 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	cthw20k1.h
+ *
+ * @Brief
+ * This file contains the definition of hardware access methord.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 13 2008
+ *
+ */
+
+#ifndef CTHW20K1_H
+#define CTHW20K1_H
+
+#include "cthardware.h"
+
+int create_20k1_hw_obj(struct hw **rhw);
+int destroy_20k1_hw_obj(struct hw *hw);
+
+#endif /* CTHW20K1_H */
diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c
new file mode 100644
index 0000000..4493a51
--- /dev/null
+++ b/sound/pci/ctxfi/cthw20k2.c
@@ -0,0 +1,2137 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	cthw20k2.c
+ *
+ * @Brief
+ * This file contains the implementation of hardware access methord for 20k2.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 14 2008
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include "cthw20k2.h"
+#include "ct20k2reg.h"
+
+#if BITS_PER_LONG == 32
+#define CT_XFI_DMA_MASK		DMA_BIT_MASK(32) /* 32 bit PTE */
+#else
+#define CT_XFI_DMA_MASK		DMA_BIT_MASK(64) /* 64 bit PTE */
+#endif
+
+struct hw20k2 {
+	struct hw hw;
+	/* for i2c */
+	unsigned char dev_id;
+	unsigned char addr_size;
+	unsigned char data_size;
+};
+
+static u32 hw_read_20kx(struct hw *hw, u32 reg);
+static void hw_write_20kx(struct hw *hw, u32 reg, u32 data);
+
+/*
+ * Type definition block.
+ * The layout of control structures can be directly applied on 20k2 chip.
+ */
+
+/*
+ * SRC control block definitions.
+ */
+
+/* SRC resource control block */
+#define SRCCTL_STATE	0x00000007
+#define SRCCTL_BM	0x00000008
+#define SRCCTL_RSR	0x00000030
+#define SRCCTL_SF	0x000001C0
+#define SRCCTL_WR	0x00000200
+#define SRCCTL_PM	0x00000400
+#define SRCCTL_ROM	0x00001800
+#define SRCCTL_VO	0x00002000
+#define SRCCTL_ST	0x00004000
+#define SRCCTL_IE	0x00008000
+#define SRCCTL_ILSZ	0x000F0000
+#define SRCCTL_BP	0x00100000
+
+#define SRCCCR_CISZ	0x000007FF
+#define SRCCCR_CWA	0x001FF800
+#define SRCCCR_D	0x00200000
+#define SRCCCR_RS	0x01C00000
+#define SRCCCR_NAL	0x3E000000
+#define SRCCCR_RA	0xC0000000
+
+#define SRCCA_CA	0x0FFFFFFF
+#define SRCCA_RS	0xE0000000
+
+#define SRCSA_SA	0x0FFFFFFF
+
+#define SRCLA_LA	0x0FFFFFFF
+
+/* Mixer Parameter Ring ram Low and Hight register.
+ * Fixed-point value in 8.24 format for parameter channel */
+#define MPRLH_PITCH	0xFFFFFFFF
+
+/* SRC resource register dirty flags */
+union src_dirty {
+	struct {
+		u16 ctl:1;
+		u16 ccr:1;
+		u16 sa:1;
+		u16 la:1;
+		u16 ca:1;
+		u16 mpr:1;
+		u16 czbfs:1;	/* Clear Z-Buffers */
+		u16 rsv:9;
+	} bf;
+	u16 data;
+};
+
+struct src_rsc_ctrl_blk {
+	unsigned int	ctl;
+	unsigned int 	ccr;
+	unsigned int	ca;
+	unsigned int	sa;
+	unsigned int	la;
+	unsigned int	mpr;
+	union src_dirty	dirty;
+};
+
+/* SRC manager control block */
+union src_mgr_dirty {
+	struct {
+		u16 enb0:1;
+		u16 enb1:1;
+		u16 enb2:1;
+		u16 enb3:1;
+		u16 enb4:1;
+		u16 enb5:1;
+		u16 enb6:1;
+		u16 enb7:1;
+		u16 enbsa:1;
+		u16 rsv:7;
+	} bf;
+	u16 data;
+};
+
+struct src_mgr_ctrl_blk {
+	unsigned int		enbsa;
+	unsigned int		enb[8];
+	union src_mgr_dirty	dirty;
+};
+
+/* SRCIMP manager control block */
+#define SRCAIM_ARC	0x00000FFF
+#define SRCAIM_NXT	0x00FF0000
+#define SRCAIM_SRC	0xFF000000
+
+struct srcimap {
+	unsigned int srcaim;
+	unsigned int idx;
+};
+
+/* SRCIMP manager register dirty flags */
+union srcimp_mgr_dirty {
+	struct {
+		u16 srcimap:1;
+		u16 rsv:15;
+	} bf;
+	u16 data;
+};
+
+struct srcimp_mgr_ctrl_blk {
+	struct srcimap		srcimap;
+	union srcimp_mgr_dirty	dirty;
+};
+
+/*
+ * Function implementation block.
+ */
+
+static int src_get_rsc_ctrl_blk(void **rblk)
+{
+	struct src_rsc_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int src_put_rsc_ctrl_blk(void *blk)
+{
+	kfree(blk);
+
+	return 0;
+}
+
+static int src_set_state(void *blk, unsigned int state)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_STATE, state);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_bm(void *blk, unsigned int bm)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_BM, bm);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_rsr(void *blk, unsigned int rsr)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_RSR, rsr);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_sf(void *blk, unsigned int sf)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_SF, sf);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_wr(void *blk, unsigned int wr)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_WR, wr);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_pm(void *blk, unsigned int pm)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_PM, pm);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_rom(void *blk, unsigned int rom)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_ROM, rom);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_vo(void *blk, unsigned int vo)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_VO, vo);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_st(void *blk, unsigned int st)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_ST, st);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_ie(void *blk, unsigned int ie)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_IE, ie);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_ilsz(void *blk, unsigned int ilsz)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_ILSZ, ilsz);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_bp(void *blk, unsigned int bp)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_BP, bp);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_cisz(void *blk, unsigned int cisz)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ccr, SRCCCR_CISZ, cisz);
+	ctl->dirty.bf.ccr = 1;
+	return 0;
+}
+
+static int src_set_ca(void *blk, unsigned int ca)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ca, SRCCA_CA, ca);
+	ctl->dirty.bf.ca = 1;
+	return 0;
+}
+
+static int src_set_sa(void *blk, unsigned int sa)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->sa, SRCSA_SA, sa);
+	ctl->dirty.bf.sa = 1;
+	return 0;
+}
+
+static int src_set_la(void *blk, unsigned int la)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->la, SRCLA_LA, la);
+	ctl->dirty.bf.la = 1;
+	return 0;
+}
+
+static int src_set_pitch(void *blk, unsigned int pitch)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->mpr, MPRLH_PITCH, pitch);
+	ctl->dirty.bf.mpr = 1;
+	return 0;
+}
+
+static int src_set_clear_zbufs(void *blk, unsigned int clear)
+{
+	((struct src_rsc_ctrl_blk *)blk)->dirty.bf.czbfs = (clear ? 1 : 0);
+	return 0;
+}
+
+static int src_set_dirty(void *blk, unsigned int flags)
+{
+	((struct src_rsc_ctrl_blk *)blk)->dirty.data = (flags & 0xffff);
+	return 0;
+}
+
+static int src_set_dirty_all(void *blk)
+{
+	((struct src_rsc_ctrl_blk *)blk)->dirty.data = ~(0x0);
+	return 0;
+}
+
+#define AR_SLOT_SIZE		4096
+#define AR_SLOT_BLOCK_SIZE	16
+#define AR_PTS_PITCH		6
+#define AR_PARAM_SRC_OFFSET	0x60
+
+static unsigned int src_param_pitch_mixer(unsigned int src_idx)
+{
+	return ((src_idx << 4) + AR_PTS_PITCH + AR_SLOT_SIZE
+			- AR_PARAM_SRC_OFFSET) % AR_SLOT_SIZE;
+
+}
+
+static int src_commit_write(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+	int i;
+
+	if (ctl->dirty.bf.czbfs) {
+		/* Clear Z-Buffer registers */
+		for (i = 0; i < 8; i++)
+			hw_write_20kx(hw, SRC_UPZ+idx*0x100+i*0x4, 0);
+
+		for (i = 0; i < 4; i++)
+			hw_write_20kx(hw, SRC_DN0Z+idx*0x100+i*0x4, 0);
+
+		for (i = 0; i < 8; i++)
+			hw_write_20kx(hw, SRC_DN1Z+idx*0x100+i*0x4, 0);
+
+		ctl->dirty.bf.czbfs = 0;
+	}
+	if (ctl->dirty.bf.mpr) {
+		/* Take the parameter mixer resource in the same group as that
+		 * the idx src is in for simplicity. Unlike src, all conjugate
+		 * parameter mixer resources must be programmed for
+		 * corresponding conjugate src resources. */
+		unsigned int pm_idx = src_param_pitch_mixer(idx);
+		hw_write_20kx(hw, MIXER_PRING_LO_HI+4*pm_idx, ctl->mpr);
+		hw_write_20kx(hw, MIXER_PMOPLO+8*pm_idx, 0x3);
+		hw_write_20kx(hw, MIXER_PMOPHI+8*pm_idx, 0x0);
+		ctl->dirty.bf.mpr = 0;
+	}
+	if (ctl->dirty.bf.sa) {
+		hw_write_20kx(hw, SRC_SA+idx*0x100, ctl->sa);
+		ctl->dirty.bf.sa = 0;
+	}
+	if (ctl->dirty.bf.la) {
+		hw_write_20kx(hw, SRC_LA+idx*0x100, ctl->la);
+		ctl->dirty.bf.la = 0;
+	}
+	if (ctl->dirty.bf.ca) {
+		hw_write_20kx(hw, SRC_CA+idx*0x100, ctl->ca);
+		ctl->dirty.bf.ca = 0;
+	}
+
+	/* Write srccf register */
+	hw_write_20kx(hw, SRC_CF+idx*0x100, 0x0);
+
+	if (ctl->dirty.bf.ccr) {
+		hw_write_20kx(hw, SRC_CCR+idx*0x100, ctl->ccr);
+		ctl->dirty.bf.ccr = 0;
+	}
+	if (ctl->dirty.bf.ctl) {
+		hw_write_20kx(hw, SRC_CTL+idx*0x100, ctl->ctl);
+		ctl->dirty.bf.ctl = 0;
+	}
+
+	return 0;
+}
+
+static int src_get_ca(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	ctl->ca = hw_read_20kx(hw, SRC_CA+idx*0x100);
+	ctl->dirty.bf.ca = 0;
+
+	return get_field(ctl->ca, SRCCA_CA);
+}
+
+static unsigned int src_get_dirty(void *blk)
+{
+	return ((struct src_rsc_ctrl_blk *)blk)->dirty.data;
+}
+
+static unsigned int src_dirty_conj_mask(void)
+{
+	return 0x20;
+}
+
+static int src_mgr_enbs_src(void *blk, unsigned int idx)
+{
+	((struct src_mgr_ctrl_blk *)blk)->enbsa |= (0x1 << ((idx%128)/4));
+	((struct src_mgr_ctrl_blk *)blk)->dirty.bf.enbsa = 1;
+	((struct src_mgr_ctrl_blk *)blk)->enb[idx/32] |= (0x1 << (idx%32));
+	return 0;
+}
+
+static int src_mgr_enb_src(void *blk, unsigned int idx)
+{
+	((struct src_mgr_ctrl_blk *)blk)->enb[idx/32] |= (0x1 << (idx%32));
+	((struct src_mgr_ctrl_blk *)blk)->dirty.data |= (0x1 << (idx/32));
+	return 0;
+}
+
+static int src_mgr_dsb_src(void *blk, unsigned int idx)
+{
+	((struct src_mgr_ctrl_blk *)blk)->enb[idx/32] &= ~(0x1 << (idx%32));
+	((struct src_mgr_ctrl_blk *)blk)->dirty.data |= (0x1 << (idx/32));
+	return 0;
+}
+
+static int src_mgr_commit_write(struct hw *hw, void *blk)
+{
+	struct src_mgr_ctrl_blk *ctl = blk;
+	int i;
+	unsigned int ret;
+
+	if (ctl->dirty.bf.enbsa) {
+		do {
+			ret = hw_read_20kx(hw, SRC_ENBSTAT);
+		} while (ret & 0x1);
+		hw_write_20kx(hw, SRC_ENBSA, ctl->enbsa);
+		ctl->dirty.bf.enbsa = 0;
+	}
+	for (i = 0; i < 8; i++) {
+		if ((ctl->dirty.data & (0x1 << i))) {
+			hw_write_20kx(hw, SRC_ENB+(i*0x100), ctl->enb[i]);
+			ctl->dirty.data &= ~(0x1 << i);
+		}
+	}
+
+	return 0;
+}
+
+static int src_mgr_get_ctrl_blk(void **rblk)
+{
+	struct src_mgr_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int src_mgr_put_ctrl_blk(void *blk)
+{
+	kfree(blk);
+
+	return 0;
+}
+
+static int srcimp_mgr_get_ctrl_blk(void **rblk)
+{
+	struct srcimp_mgr_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int srcimp_mgr_put_ctrl_blk(void *blk)
+{
+	kfree(blk);
+
+	return 0;
+}
+
+static int srcimp_mgr_set_imaparc(void *blk, unsigned int slot)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srcimap.srcaim, SRCAIM_ARC, slot);
+	ctl->dirty.bf.srcimap = 1;
+	return 0;
+}
+
+static int srcimp_mgr_set_imapuser(void *blk, unsigned int user)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srcimap.srcaim, SRCAIM_SRC, user);
+	ctl->dirty.bf.srcimap = 1;
+	return 0;
+}
+
+static int srcimp_mgr_set_imapnxt(void *blk, unsigned int next)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srcimap.srcaim, SRCAIM_NXT, next);
+	ctl->dirty.bf.srcimap = 1;
+	return 0;
+}
+
+static int srcimp_mgr_set_imapaddr(void *blk, unsigned int addr)
+{
+	((struct srcimp_mgr_ctrl_blk *)blk)->srcimap.idx = addr;
+	((struct srcimp_mgr_ctrl_blk *)blk)->dirty.bf.srcimap = 1;
+	return 0;
+}
+
+static int srcimp_mgr_commit_write(struct hw *hw, void *blk)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	if (ctl->dirty.bf.srcimap) {
+		hw_write_20kx(hw, SRC_IMAP+ctl->srcimap.idx*0x100,
+						ctl->srcimap.srcaim);
+		ctl->dirty.bf.srcimap = 0;
+	}
+
+	return 0;
+}
+
+/*
+ * AMIXER control block definitions.
+ */
+
+#define AMOPLO_M	0x00000003
+#define AMOPLO_IV	0x00000004
+#define AMOPLO_X	0x0003FFF0
+#define AMOPLO_Y	0xFFFC0000
+
+#define AMOPHI_SADR	0x000000FF
+#define AMOPHI_SE	0x80000000
+
+/* AMIXER resource register dirty flags */
+union amixer_dirty {
+	struct {
+		u16 amoplo:1;
+		u16 amophi:1;
+		u16 rsv:14;
+	} bf;
+	u16 data;
+};
+
+/* AMIXER resource control block */
+struct amixer_rsc_ctrl_blk {
+	unsigned int		amoplo;
+	unsigned int		amophi;
+	union amixer_dirty	dirty;
+};
+
+static int amixer_set_mode(void *blk, unsigned int mode)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amoplo, AMOPLO_M, mode);
+	ctl->dirty.bf.amoplo = 1;
+	return 0;
+}
+
+static int amixer_set_iv(void *blk, unsigned int iv)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amoplo, AMOPLO_IV, iv);
+	ctl->dirty.bf.amoplo = 1;
+	return 0;
+}
+
+static int amixer_set_x(void *blk, unsigned int x)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amoplo, AMOPLO_X, x);
+	ctl->dirty.bf.amoplo = 1;
+	return 0;
+}
+
+static int amixer_set_y(void *blk, unsigned int y)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amoplo, AMOPLO_Y, y);
+	ctl->dirty.bf.amoplo = 1;
+	return 0;
+}
+
+static int amixer_set_sadr(void *blk, unsigned int sadr)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amophi, AMOPHI_SADR, sadr);
+	ctl->dirty.bf.amophi = 1;
+	return 0;
+}
+
+static int amixer_set_se(void *blk, unsigned int se)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amophi, AMOPHI_SE, se);
+	ctl->dirty.bf.amophi = 1;
+	return 0;
+}
+
+static int amixer_set_dirty(void *blk, unsigned int flags)
+{
+	((struct amixer_rsc_ctrl_blk *)blk)->dirty.data = (flags & 0xffff);
+	return 0;
+}
+
+static int amixer_set_dirty_all(void *blk)
+{
+	((struct amixer_rsc_ctrl_blk *)blk)->dirty.data = ~(0x0);
+	return 0;
+}
+
+static int amixer_commit_write(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	if (ctl->dirty.bf.amoplo || ctl->dirty.bf.amophi) {
+		hw_write_20kx(hw, MIXER_AMOPLO+idx*8, ctl->amoplo);
+		ctl->dirty.bf.amoplo = 0;
+		hw_write_20kx(hw, MIXER_AMOPHI+idx*8, ctl->amophi);
+		ctl->dirty.bf.amophi = 0;
+	}
+
+	return 0;
+}
+
+static int amixer_get_y(void *blk)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	return get_field(ctl->amoplo, AMOPLO_Y);
+}
+
+static unsigned int amixer_get_dirty(void *blk)
+{
+	return ((struct amixer_rsc_ctrl_blk *)blk)->dirty.data;
+}
+
+static int amixer_rsc_get_ctrl_blk(void **rblk)
+{
+	struct amixer_rsc_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int amixer_rsc_put_ctrl_blk(void *blk)
+{
+	kfree(blk);
+
+	return 0;
+}
+
+static int amixer_mgr_get_ctrl_blk(void **rblk)
+{
+	*rblk = NULL;
+
+	return 0;
+}
+
+static int amixer_mgr_put_ctrl_blk(void *blk)
+{
+	return 0;
+}
+
+/*
+ * DAIO control block definitions.
+ */
+
+/* Receiver Sample Rate Tracker Control register */
+#define SRTCTL_SRCO	0x000000FF
+#define SRTCTL_SRCM	0x0000FF00
+#define SRTCTL_RSR	0x00030000
+#define SRTCTL_DRAT	0x00300000
+#define SRTCTL_EC	0x01000000
+#define SRTCTL_ET	0x10000000
+
+/* DAIO Receiver register dirty flags */
+union dai_dirty {
+	struct {
+		u16 srt:1;
+		u16 rsv:15;
+	} bf;
+	u16 data;
+};
+
+/* DAIO Receiver control block */
+struct dai_ctrl_blk {
+	unsigned int	srt;
+	union dai_dirty	dirty;
+};
+
+/* Audio Input Mapper RAM */
+#define AIM_ARC		0x00000FFF
+#define AIM_NXT		0x007F0000
+
+struct daoimap {
+	unsigned int aim;
+	unsigned int idx;
+};
+
+/* Audio Transmitter Control and Status register */
+#define ATXCTL_EN	0x00000001
+#define ATXCTL_MODE	0x00000010
+#define ATXCTL_CD	0x00000020
+#define ATXCTL_RAW	0x00000100
+#define ATXCTL_MT	0x00000200
+#define ATXCTL_NUC	0x00003000
+#define ATXCTL_BEN	0x00010000
+#define ATXCTL_BMUX	0x00700000
+#define ATXCTL_B24	0x01000000
+#define ATXCTL_CPF	0x02000000
+#define ATXCTL_RIV	0x10000000
+#define ATXCTL_LIV	0x20000000
+#define ATXCTL_RSAT	0x40000000
+#define ATXCTL_LSAT	0x80000000
+
+/* XDIF Transmitter register dirty flags */
+union dao_dirty {
+	struct {
+		u16 atxcsl:1;
+		u16 rsv:15;
+	} bf;
+	u16 data;
+};
+
+/* XDIF Transmitter control block */
+struct dao_ctrl_blk {
+	/* XDIF Transmitter Channel Status Low Register */
+	unsigned int	atxcsl;
+	union dao_dirty	dirty;
+};
+
+/* Audio Receiver Control register */
+#define ARXCTL_EN	0x00000001
+
+/* DAIO manager register dirty flags */
+union daio_mgr_dirty {
+	struct {
+		u32 atxctl:8;
+		u32 arxctl:8;
+		u32 daoimap:1;
+		u32 rsv:15;
+	} bf;
+	u32 data;
+};
+
+/* DAIO manager control block */
+struct daio_mgr_ctrl_blk {
+	struct daoimap		daoimap;
+	unsigned int		txctl[8];
+	unsigned int		rxctl[8];
+	union daio_mgr_dirty	dirty;
+};
+
+static int dai_srt_set_srco(void *blk, unsigned int src)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srt, SRTCTL_SRCO, src);
+	ctl->dirty.bf.srt = 1;
+	return 0;
+}
+
+static int dai_srt_set_srcm(void *blk, unsigned int src)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srt, SRTCTL_SRCM, src);
+	ctl->dirty.bf.srt = 1;
+	return 0;
+}
+
+static int dai_srt_set_rsr(void *blk, unsigned int rsr)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srt, SRTCTL_RSR, rsr);
+	ctl->dirty.bf.srt = 1;
+	return 0;
+}
+
+static int dai_srt_set_drat(void *blk, unsigned int drat)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srt, SRTCTL_DRAT, drat);
+	ctl->dirty.bf.srt = 1;
+	return 0;
+}
+
+static int dai_srt_set_ec(void *blk, unsigned int ec)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srt, SRTCTL_EC, ec ? 1 : 0);
+	ctl->dirty.bf.srt = 1;
+	return 0;
+}
+
+static int dai_srt_set_et(void *blk, unsigned int et)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srt, SRTCTL_ET, et ? 1 : 0);
+	ctl->dirty.bf.srt = 1;
+	return 0;
+}
+
+static int dai_commit_write(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	if (ctl->dirty.bf.srt) {
+		hw_write_20kx(hw, AUDIO_IO_RX_SRT_CTL+0x40*idx, ctl->srt);
+		ctl->dirty.bf.srt = 0;
+	}
+
+	return 0;
+}
+
+static int dai_get_ctrl_blk(void **rblk)
+{
+	struct dai_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int dai_put_ctrl_blk(void *blk)
+{
+	kfree(blk);
+
+	return 0;
+}
+
+static int dao_set_spos(void *blk, unsigned int spos)
+{
+	((struct dao_ctrl_blk *)blk)->atxcsl = spos;
+	((struct dao_ctrl_blk *)blk)->dirty.bf.atxcsl = 1;
+	return 0;
+}
+
+static int dao_commit_write(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct dao_ctrl_blk *ctl = blk;
+
+	if (ctl->dirty.bf.atxcsl) {
+		if (idx < 4) {
+			/* S/PDIF SPOSx */
+			hw_write_20kx(hw, AUDIO_IO_TX_CSTAT_L+0x40*idx,
+							ctl->atxcsl);
+		}
+		ctl->dirty.bf.atxcsl = 0;
+	}
+
+	return 0;
+}
+
+static int dao_get_spos(void *blk, unsigned int *spos)
+{
+	*spos = ((struct dao_ctrl_blk *)blk)->atxcsl;
+	return 0;
+}
+
+static int dao_get_ctrl_blk(void **rblk)
+{
+	struct dao_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int dao_put_ctrl_blk(void *blk)
+{
+	kfree(blk);
+
+	return 0;
+}
+
+static int daio_mgr_enb_dai(void *blk, unsigned int idx)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->rxctl[idx], ARXCTL_EN, 1);
+	ctl->dirty.bf.arxctl |= (0x1 << idx);
+	return 0;
+}
+
+static int daio_mgr_dsb_dai(void *blk, unsigned int idx)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->rxctl[idx], ARXCTL_EN, 0);
+
+	ctl->dirty.bf.arxctl |= (0x1 << idx);
+	return 0;
+}
+
+static int daio_mgr_enb_dao(void *blk, unsigned int idx)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->txctl[idx], ATXCTL_EN, 1);
+	ctl->dirty.bf.atxctl |= (0x1 << idx);
+	return 0;
+}
+
+static int daio_mgr_dsb_dao(void *blk, unsigned int idx)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->txctl[idx], ATXCTL_EN, 0);
+	ctl->dirty.bf.atxctl |= (0x1 << idx);
+	return 0;
+}
+
+static int daio_mgr_dao_init(void *blk, unsigned int idx, unsigned int conf)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	if (idx < 4) {
+		/* S/PDIF output */
+		switch ((conf & 0x7)) {
+		case 1:
+			set_field(&ctl->txctl[idx], ATXCTL_NUC, 0);
+			break;
+		case 2:
+			set_field(&ctl->txctl[idx], ATXCTL_NUC, 1);
+			break;
+		case 4:
+			set_field(&ctl->txctl[idx], ATXCTL_NUC, 2);
+			break;
+		case 8:
+			set_field(&ctl->txctl[idx], ATXCTL_NUC, 3);
+			break;
+		default:
+			break;
+		}
+		/* CDIF */
+		set_field(&ctl->txctl[idx], ATXCTL_CD, (!(conf & 0x7)));
+		/* Non-audio */
+		set_field(&ctl->txctl[idx], ATXCTL_LIV, (conf >> 4) & 0x1);
+		/* Non-audio */
+		set_field(&ctl->txctl[idx], ATXCTL_RIV, (conf >> 4) & 0x1);
+		set_field(&ctl->txctl[idx], ATXCTL_RAW,
+			  ((conf >> 3) & 0x1) ? 0 : 0);
+		ctl->dirty.bf.atxctl |= (0x1 << idx);
+	} else {
+		/* I2S output */
+		/*idx %= 4; */
+	}
+	return 0;
+}
+
+static int daio_mgr_set_imaparc(void *blk, unsigned int slot)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->daoimap.aim, AIM_ARC, slot);
+	ctl->dirty.bf.daoimap = 1;
+	return 0;
+}
+
+static int daio_mgr_set_imapnxt(void *blk, unsigned int next)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->daoimap.aim, AIM_NXT, next);
+	ctl->dirty.bf.daoimap = 1;
+	return 0;
+}
+
+static int daio_mgr_set_imapaddr(void *blk, unsigned int addr)
+{
+	((struct daio_mgr_ctrl_blk *)blk)->daoimap.idx = addr;
+	((struct daio_mgr_ctrl_blk *)blk)->dirty.bf.daoimap = 1;
+	return 0;
+}
+
+static int daio_mgr_commit_write(struct hw *hw, void *blk)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+	unsigned int data;
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		if ((ctl->dirty.bf.atxctl & (0x1 << i))) {
+			data = ctl->txctl[i];
+			hw_write_20kx(hw, (AUDIO_IO_TX_CTL+(0x40*i)), data);
+			ctl->dirty.bf.atxctl &= ~(0x1 << i);
+			mdelay(1);
+		}
+		if ((ctl->dirty.bf.arxctl & (0x1 << i))) {
+			data = ctl->rxctl[i];
+			hw_write_20kx(hw, (AUDIO_IO_RX_CTL+(0x40*i)), data);
+			ctl->dirty.bf.arxctl &= ~(0x1 << i);
+			mdelay(1);
+		}
+	}
+	if (ctl->dirty.bf.daoimap) {
+		hw_write_20kx(hw, AUDIO_IO_AIM+ctl->daoimap.idx*4,
+						ctl->daoimap.aim);
+		ctl->dirty.bf.daoimap = 0;
+	}
+
+	return 0;
+}
+
+static int daio_mgr_get_ctrl_blk(struct hw *hw, void **rblk)
+{
+	struct daio_mgr_ctrl_blk *blk;
+	int i;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	for (i = 0; i < 8; i++) {
+		blk->txctl[i] = hw_read_20kx(hw, AUDIO_IO_TX_CTL+(0x40*i));
+		blk->rxctl[i] = hw_read_20kx(hw, AUDIO_IO_RX_CTL+(0x40*i));
+	}
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int daio_mgr_put_ctrl_blk(void *blk)
+{
+	kfree(blk);
+
+	return 0;
+}
+
+/* Card hardware initialization block */
+struct dac_conf {
+	unsigned int msr; /* master sample rate in rsrs */
+};
+
+struct adc_conf {
+	unsigned int msr; 	/* master sample rate in rsrs */
+	unsigned char input; 	/* the input source of ADC */
+	unsigned char mic20db; 	/* boost mic by 20db if input is microphone */
+};
+
+struct daio_conf {
+	unsigned int msr; /* master sample rate in rsrs */
+};
+
+struct trn_conf {
+	unsigned long vm_pgt_phys;
+};
+
+static int hw_daio_init(struct hw *hw, const struct daio_conf *info)
+{
+	u32 data;
+	int i;
+
+	/* Program I2S with proper sample rate and enable the correct I2S
+	 * channel. ED(0/8/16/24): Enable all I2S/I2X master clock output */
+	if (1 == info->msr) {
+		hw_write_20kx(hw, AUDIO_IO_MCLK, 0x01010101);
+		hw_write_20kx(hw, AUDIO_IO_TX_BLRCLK, 0x01010101);
+		hw_write_20kx(hw, AUDIO_IO_RX_BLRCLK, 0);
+	} else if (2 == info->msr) {
+		hw_write_20kx(hw, AUDIO_IO_MCLK, 0x11111111);
+		/* Specify all playing 96khz
+		 * EA [0]	- Enabled
+		 * RTA [4:5]	- 96kHz
+		 * EB [8]	- Enabled
+		 * RTB [12:13]	- 96kHz
+		 * EC [16]	- Enabled
+		 * RTC [20:21]	- 96kHz
+		 * ED [24]	- Enabled
+		 * RTD [28:29]	- 96kHz */
+		hw_write_20kx(hw, AUDIO_IO_TX_BLRCLK, 0x11111111);
+		hw_write_20kx(hw, AUDIO_IO_RX_BLRCLK, 0);
+	} else {
+		printk(KERN_ALERT "ctxfi: ERROR!!! Invalid sampling rate!!!\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < 8; i++) {
+		if (i <= 3) {
+			/* 1st 3 channels are SPDIFs (SB0960) */
+			if (i == 3)
+				data = 0x1001001;
+			else
+				data = 0x1000001;
+
+			hw_write_20kx(hw, (AUDIO_IO_TX_CTL+(0x40*i)), data);
+			hw_write_20kx(hw, (AUDIO_IO_RX_CTL+(0x40*i)), data);
+
+			/* Initialize the SPDIF Out Channel status registers.
+			 * The value specified here is based on the typical
+			 * values provided in the specification, namely: Clock
+			 * Accuracy of 1000ppm, Sample Rate of 48KHz,
+			 * unspecified source number, Generation status = 1,
+			 * Category code = 0x12 (Digital Signal Mixer),
+			 * Mode = 0, Emph = 0, Copy Permitted, AN = 0
+			 * (indicating that we're transmitting digital audio,
+			 * and the Professional Use bit is 0. */
+
+			hw_write_20kx(hw, AUDIO_IO_TX_CSTAT_L+(0x40*i),
+					0x02109204); /* Default to 48kHz */
+
+			hw_write_20kx(hw, AUDIO_IO_TX_CSTAT_H+(0x40*i), 0x0B);
+		} else {
+			/* Next 5 channels are I2S (SB0960) */
+			data = 0x11;
+			hw_write_20kx(hw, AUDIO_IO_RX_CTL+(0x40*i), data);
+			if (2 == info->msr) {
+				/* Four channels per sample period */
+				data |= 0x1000;
+			}
+			hw_write_20kx(hw, AUDIO_IO_TX_CTL+(0x40*i), data);
+		}
+	}
+
+	return 0;
+}
+
+/* TRANSPORT operations */
+static int hw_trn_init(struct hw *hw, const struct trn_conf *info)
+{
+	u32 vmctl, data;
+	u32 ptp_phys_low, ptp_phys_high;
+	int i;
+
+	/* Set up device page table */
+	if ((~0UL) == info->vm_pgt_phys) {
+		printk(KERN_ALERT "ctxfi: "
+		       "Wrong device page table page address!!!\n");
+		return -1;
+	}
+
+	vmctl = 0x80000C0F;  /* 32-bit, 4k-size page */
+	ptp_phys_low = (u32)info->vm_pgt_phys;
+	ptp_phys_high = upper_32_bits(info->vm_pgt_phys);
+	if (sizeof(void *) == 8) /* 64bit address */
+		vmctl |= (3 << 8);
+	/* Write page table physical address to all PTPAL registers */
+	for (i = 0; i < 64; i++) {
+		hw_write_20kx(hw, VMEM_PTPAL+(16*i), ptp_phys_low);
+		hw_write_20kx(hw, VMEM_PTPAH+(16*i), ptp_phys_high);
+	}
+	/* Enable virtual memory transfer */
+	hw_write_20kx(hw, VMEM_CTL, vmctl);
+	/* Enable transport bus master and queueing of request */
+	hw_write_20kx(hw, TRANSPORT_CTL, 0x03);
+	hw_write_20kx(hw, TRANSPORT_INT, 0x200c01);
+	/* Enable transport ring */
+	data = hw_read_20kx(hw, TRANSPORT_ENB);
+	hw_write_20kx(hw, TRANSPORT_ENB, (data | 0x03));
+
+	return 0;
+}
+
+/* Card initialization */
+#define GCTL_AIE	0x00000001
+#define GCTL_UAA	0x00000002
+#define GCTL_DPC	0x00000004
+#define GCTL_DBP	0x00000008
+#define GCTL_ABP	0x00000010
+#define GCTL_TBP	0x00000020
+#define GCTL_SBP	0x00000040
+#define GCTL_FBP	0x00000080
+#define GCTL_ME		0x00000100
+#define GCTL_AID	0x00001000
+
+#define PLLCTL_SRC	0x00000007
+#define PLLCTL_SPE	0x00000008
+#define PLLCTL_RD	0x000000F0
+#define PLLCTL_FD	0x0001FF00
+#define PLLCTL_OD	0x00060000
+#define PLLCTL_B	0x00080000
+#define PLLCTL_AS	0x00100000
+#define PLLCTL_LF	0x03E00000
+#define PLLCTL_SPS	0x1C000000
+#define PLLCTL_AD	0x60000000
+
+#define PLLSTAT_CCS	0x00000007
+#define PLLSTAT_SPL	0x00000008
+#define PLLSTAT_CRD	0x000000F0
+#define PLLSTAT_CFD	0x0001FF00
+#define PLLSTAT_SL	0x00020000
+#define PLLSTAT_FAS	0x00040000
+#define PLLSTAT_B	0x00080000
+#define PLLSTAT_PD	0x00100000
+#define PLLSTAT_OCA	0x00200000
+#define PLLSTAT_NCA	0x00400000
+
+static int hw_pll_init(struct hw *hw, unsigned int rsr)
+{
+	unsigned int pllenb;
+	unsigned int pllctl;
+	unsigned int pllstat;
+	int i;
+
+	pllenb = 0xB;
+	hw_write_20kx(hw, PLL_ENB, pllenb);
+	pllctl = 0x20D00000;
+	set_field(&pllctl, PLLCTL_FD, 16 - 4);
+	hw_write_20kx(hw, PLL_CTL, pllctl);
+	mdelay(40);
+	pllctl = hw_read_20kx(hw, PLL_CTL);
+	set_field(&pllctl, PLLCTL_B, 0);
+	if (48000 == rsr) {
+		set_field(&pllctl, PLLCTL_FD, 16 - 2);
+		set_field(&pllctl, PLLCTL_RD, 1 - 1);
+	} else { /* 44100 */
+		set_field(&pllctl, PLLCTL_FD, 147 - 2);
+		set_field(&pllctl, PLLCTL_RD, 10 - 1);
+	}
+	hw_write_20kx(hw, PLL_CTL, pllctl);
+	mdelay(40);
+	for (i = 0; i < 1000; i++) {
+		pllstat = hw_read_20kx(hw, PLL_STAT);
+		if (get_field(pllstat, PLLSTAT_PD))
+			continue;
+
+		if (get_field(pllstat, PLLSTAT_B) !=
+					get_field(pllctl, PLLCTL_B))
+			continue;
+
+		if (get_field(pllstat, PLLSTAT_CCS) !=
+					get_field(pllctl, PLLCTL_SRC))
+			continue;
+
+		if (get_field(pllstat, PLLSTAT_CRD) !=
+					get_field(pllctl, PLLCTL_RD))
+			continue;
+
+		if (get_field(pllstat, PLLSTAT_CFD) !=
+					get_field(pllctl, PLLCTL_FD))
+			continue;
+
+		break;
+	}
+	if (i >= 1000) {
+		printk(KERN_ALERT "ctxfi: PLL initialization failed!!!\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int hw_auto_init(struct hw *hw)
+{
+	unsigned int gctl;
+	int i;
+
+	gctl = hw_read_20kx(hw, GLOBAL_CNTL_GCTL);
+	set_field(&gctl, GCTL_AIE, 0);
+	hw_write_20kx(hw, GLOBAL_CNTL_GCTL, gctl);
+	set_field(&gctl, GCTL_AIE, 1);
+	hw_write_20kx(hw, GLOBAL_CNTL_GCTL, gctl);
+	mdelay(10);
+	for (i = 0; i < 400000; i++) {
+		gctl = hw_read_20kx(hw, GLOBAL_CNTL_GCTL);
+		if (get_field(gctl, GCTL_AID))
+			break;
+	}
+	if (!get_field(gctl, GCTL_AID)) {
+		printk(KERN_ALERT "ctxfi: Card Auto-init failed!!!\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+/* DAC operations */
+
+#define CS4382_MC1 		0x1
+#define CS4382_MC2 		0x2
+#define CS4382_MC3		0x3
+#define CS4382_FC		0x4
+#define CS4382_IC		0x5
+#define CS4382_XC1		0x6
+#define CS4382_VCA1 		0x7
+#define CS4382_VCB1 		0x8
+#define CS4382_XC2		0x9
+#define CS4382_VCA2 		0xA
+#define CS4382_VCB2 		0xB
+#define CS4382_XC3		0xC
+#define CS4382_VCA3		0xD
+#define CS4382_VCB3		0xE
+#define CS4382_XC4 		0xF
+#define CS4382_VCA4 		0x10
+#define CS4382_VCB4 		0x11
+#define CS4382_CREV 		0x12
+
+/* I2C status */
+#define STATE_LOCKED		0x00
+#define STATE_UNLOCKED		0xAA
+#define DATA_READY		0x800000    /* Used with I2C_IF_STATUS */
+#define DATA_ABORT		0x10000     /* Used with I2C_IF_STATUS */
+
+#define I2C_STATUS_DCM	0x00000001
+#define I2C_STATUS_BC	0x00000006
+#define I2C_STATUS_APD	0x00000008
+#define I2C_STATUS_AB	0x00010000
+#define I2C_STATUS_DR	0x00800000
+
+#define I2C_ADDRESS_PTAD	0x0000FFFF
+#define I2C_ADDRESS_SLAD	0x007F0000
+
+struct regs_cs4382 {
+	u32 mode_control_1;
+	u32 mode_control_2;
+	u32 mode_control_3;
+
+	u32 filter_control;
+	u32 invert_control;
+
+	u32 mix_control_P1;
+	u32 vol_control_A1;
+	u32 vol_control_B1;
+
+	u32 mix_control_P2;
+	u32 vol_control_A2;
+	u32 vol_control_B2;
+
+	u32 mix_control_P3;
+	u32 vol_control_A3;
+	u32 vol_control_B3;
+
+	u32 mix_control_P4;
+	u32 vol_control_A4;
+	u32 vol_control_B4;
+};
+
+static int hw20k2_i2c_unlock_full_access(struct hw *hw)
+{
+	u8 UnlockKeySequence_FLASH_FULLACCESS_MODE[2] =  {0xB3, 0xD4};
+
+	/* Send keys for forced BIOS mode */
+	hw_write_20kx(hw, I2C_IF_WLOCK,
+			UnlockKeySequence_FLASH_FULLACCESS_MODE[0]);
+	hw_write_20kx(hw, I2C_IF_WLOCK,
+			UnlockKeySequence_FLASH_FULLACCESS_MODE[1]);
+	/* Check whether the chip is unlocked */
+	if (hw_read_20kx(hw, I2C_IF_WLOCK) == STATE_UNLOCKED)
+		return 0;
+
+	return -1;
+}
+
+static int hw20k2_i2c_lock_chip(struct hw *hw)
+{
+	/* Write twice */
+	hw_write_20kx(hw, I2C_IF_WLOCK, STATE_LOCKED);
+	hw_write_20kx(hw, I2C_IF_WLOCK, STATE_LOCKED);
+	if (hw_read_20kx(hw, I2C_IF_WLOCK) == STATE_LOCKED)
+		return 0;
+
+	return -1;
+}
+
+static int hw20k2_i2c_init(struct hw *hw, u8 dev_id, u8 addr_size, u8 data_size)
+{
+	struct hw20k2 *hw20k2 = (struct hw20k2 *)hw;
+	int err;
+	unsigned int i2c_status;
+	unsigned int i2c_addr;
+
+	err = hw20k2_i2c_unlock_full_access(hw);
+	if (err < 0)
+		return err;
+
+	hw20k2->addr_size = addr_size;
+	hw20k2->data_size = data_size;
+	hw20k2->dev_id = dev_id;
+
+	i2c_addr = 0;
+	set_field(&i2c_addr, I2C_ADDRESS_SLAD, dev_id);
+
+	hw_write_20kx(hw, I2C_IF_ADDRESS, i2c_addr);
+
+	i2c_status = hw_read_20kx(hw, I2C_IF_STATUS);
+
+	set_field(&i2c_status, I2C_STATUS_DCM, 1); /* Direct control mode */
+
+	hw_write_20kx(hw, I2C_IF_STATUS, i2c_status);
+
+	return 0;
+}
+
+static int hw20k2_i2c_uninit(struct hw *hw)
+{
+	unsigned int i2c_status;
+	unsigned int i2c_addr;
+
+	i2c_addr = 0;
+	set_field(&i2c_addr, I2C_ADDRESS_SLAD, 0x57); /* I2C id */
+
+	hw_write_20kx(hw, I2C_IF_ADDRESS, i2c_addr);
+
+	i2c_status = hw_read_20kx(hw, I2C_IF_STATUS);
+
+	set_field(&i2c_status, I2C_STATUS_DCM, 0); /* I2C mode */
+
+	hw_write_20kx(hw, I2C_IF_STATUS, i2c_status);
+
+	return hw20k2_i2c_lock_chip(hw);
+}
+
+static int hw20k2_i2c_wait_data_ready(struct hw *hw)
+{
+	int i = 0x400000;
+	unsigned int ret;
+
+	do {
+		ret = hw_read_20kx(hw, I2C_IF_STATUS);
+	} while ((!(ret & DATA_READY)) && --i);
+
+	return i;
+}
+
+static int hw20k2_i2c_read(struct hw *hw, u16 addr, u32 *datap)
+{
+	struct hw20k2 *hw20k2 = (struct hw20k2 *)hw;
+	unsigned int i2c_status;
+
+	i2c_status = hw_read_20kx(hw, I2C_IF_STATUS);
+	set_field(&i2c_status, I2C_STATUS_BC,
+		  (4 == hw20k2->addr_size) ? 0 : hw20k2->addr_size);
+	hw_write_20kx(hw, I2C_IF_STATUS, i2c_status);
+	if (!hw20k2_i2c_wait_data_ready(hw))
+		return -1;
+
+	hw_write_20kx(hw, I2C_IF_WDATA, addr);
+	if (!hw20k2_i2c_wait_data_ready(hw))
+		return -1;
+
+	/* Force a read operation */
+	hw_write_20kx(hw, I2C_IF_RDATA, 0);
+	if (!hw20k2_i2c_wait_data_ready(hw))
+		return -1;
+
+	*datap = hw_read_20kx(hw, I2C_IF_RDATA);
+
+	return 0;
+}
+
+static int hw20k2_i2c_write(struct hw *hw, u16 addr, u32 data)
+{
+	struct hw20k2 *hw20k2 = (struct hw20k2 *)hw;
+	unsigned int i2c_data = (data << (hw20k2->addr_size * 8)) | addr;
+	unsigned int i2c_status;
+
+	i2c_status = hw_read_20kx(hw, I2C_IF_STATUS);
+
+	set_field(&i2c_status, I2C_STATUS_BC,
+		  (4 == (hw20k2->addr_size + hw20k2->data_size)) ?
+		  0 : (hw20k2->addr_size + hw20k2->data_size));
+
+	hw_write_20kx(hw, I2C_IF_STATUS, i2c_status);
+	hw20k2_i2c_wait_data_ready(hw);
+	/* Dummy write to trigger the write oprtation */
+	hw_write_20kx(hw, I2C_IF_WDATA, 0);
+	hw20k2_i2c_wait_data_ready(hw);
+
+	/* This is the real data */
+	hw_write_20kx(hw, I2C_IF_WDATA, i2c_data);
+	hw20k2_i2c_wait_data_ready(hw);
+
+	return 0;
+}
+
+static int hw_dac_init(struct hw *hw, const struct dac_conf *info)
+{
+	int err;
+	u32 data;
+	int i;
+	struct regs_cs4382 cs_read = {0};
+	struct regs_cs4382 cs_def = {
+				   0x00000001,  /* Mode Control 1 */
+				   0x00000000,  /* Mode Control 2 */
+				   0x00000084,  /* Mode Control 3 */
+				   0x00000000,  /* Filter Control */
+				   0x00000000,  /* Invert Control */
+				   0x00000024,  /* Mixing Control Pair 1 */
+				   0x00000000,  /* Vol Control A1 */
+				   0x00000000,  /* Vol Control B1 */
+				   0x00000024,  /* Mixing Control Pair 2 */
+				   0x00000000,  /* Vol Control A2 */
+				   0x00000000,  /* Vol Control B2 */
+				   0x00000024,  /* Mixing Control Pair 3 */
+				   0x00000000,  /* Vol Control A3 */
+				   0x00000000,  /* Vol Control B3 */
+				   0x00000024,  /* Mixing Control Pair 4 */
+				   0x00000000,  /* Vol Control A4 */
+				   0x00000000   /* Vol Control B4 */
+				 };
+
+	/* Set DAC reset bit as output */
+	data = hw_read_20kx(hw, GPIO_CTRL);
+	data |= 0x02;
+	hw_write_20kx(hw, GPIO_CTRL, data);
+
+	err = hw20k2_i2c_init(hw, 0x18, 1, 1);
+	if (err < 0)
+		goto End;
+
+	for (i = 0; i < 2; i++) {
+		/* Reset DAC twice just in-case the chip
+		 * didn't initialized properly */
+		data = hw_read_20kx(hw, GPIO_DATA);
+		/* GPIO data bit 1 */
+		data &= 0xFFFFFFFD;
+		hw_write_20kx(hw, GPIO_DATA, data);
+		mdelay(10);
+		data |= 0x2;
+		hw_write_20kx(hw, GPIO_DATA, data);
+		mdelay(50);
+
+		/* Reset the 2nd time */
+		data &= 0xFFFFFFFD;
+		hw_write_20kx(hw, GPIO_DATA, data);
+		mdelay(10);
+		data |= 0x2;
+		hw_write_20kx(hw, GPIO_DATA, data);
+		mdelay(50);
+
+		if (hw20k2_i2c_read(hw, CS4382_MC1,  &cs_read.mode_control_1))
+			continue;
+
+		if (hw20k2_i2c_read(hw, CS4382_MC2,  &cs_read.mode_control_2))
+			continue;
+
+		if (hw20k2_i2c_read(hw, CS4382_MC3,  &cs_read.mode_control_3))
+			continue;
+
+		if (hw20k2_i2c_read(hw, CS4382_FC,   &cs_read.filter_control))
+			continue;
+
+		if (hw20k2_i2c_read(hw, CS4382_IC,   &cs_read.invert_control))
+			continue;
+
+		if (hw20k2_i2c_read(hw, CS4382_XC1,  &cs_read.mix_control_P1))
+			continue;
+
+		if (hw20k2_i2c_read(hw, CS4382_VCA1, &cs_read.vol_control_A1))
+			continue;
+
+		if (hw20k2_i2c_read(hw, CS4382_VCB1, &cs_read.vol_control_B1))
+			continue;
+
+		if (hw20k2_i2c_read(hw, CS4382_XC2,  &cs_read.mix_control_P2))
+			continue;
+
+		if (hw20k2_i2c_read(hw, CS4382_VCA2, &cs_read.vol_control_A2))
+			continue;
+
+		if (hw20k2_i2c_read(hw, CS4382_VCB2, &cs_read.vol_control_B2))
+			continue;
+
+		if (hw20k2_i2c_read(hw, CS4382_XC3,  &cs_read.mix_control_P3))
+			continue;
+
+		if (hw20k2_i2c_read(hw, CS4382_VCA3, &cs_read.vol_control_A3))
+			continue;
+
+		if (hw20k2_i2c_read(hw, CS4382_VCB3, &cs_read.vol_control_B3))
+			continue;
+
+		if (hw20k2_i2c_read(hw, CS4382_XC4,  &cs_read.mix_control_P4))
+			continue;
+
+		if (hw20k2_i2c_read(hw, CS4382_VCA4, &cs_read.vol_control_A4))
+			continue;
+
+		if (hw20k2_i2c_read(hw, CS4382_VCB4, &cs_read.vol_control_B4))
+			continue;
+
+		if (memcmp(&cs_read, &cs_def, sizeof(cs_read)))
+			continue;
+		else
+			break;
+	}
+
+	if (i >= 2)
+		goto End;
+
+	/* Note: Every I2C write must have some delay.
+	 * This is not a requirement but the delay works here... */
+	hw20k2_i2c_write(hw, CS4382_MC1, 0x80);
+	hw20k2_i2c_write(hw, CS4382_MC2, 0x10);
+	if (1 == info->msr) {
+		hw20k2_i2c_write(hw, CS4382_XC1, 0x24);
+		hw20k2_i2c_write(hw, CS4382_XC2, 0x24);
+		hw20k2_i2c_write(hw, CS4382_XC3, 0x24);
+		hw20k2_i2c_write(hw, CS4382_XC4, 0x24);
+	} else if (2 == info->msr) {
+		hw20k2_i2c_write(hw, CS4382_XC1, 0x25);
+		hw20k2_i2c_write(hw, CS4382_XC2, 0x25);
+		hw20k2_i2c_write(hw, CS4382_XC3, 0x25);
+		hw20k2_i2c_write(hw, CS4382_XC4, 0x25);
+	} else {
+		hw20k2_i2c_write(hw, CS4382_XC1, 0x26);
+		hw20k2_i2c_write(hw, CS4382_XC2, 0x26);
+		hw20k2_i2c_write(hw, CS4382_XC3, 0x26);
+		hw20k2_i2c_write(hw, CS4382_XC4, 0x26);
+	}
+
+	return 0;
+End:
+
+	hw20k2_i2c_uninit(hw);
+	return -1;
+}
+
+/* ADC operations */
+#define MAKE_WM8775_ADDR(addr, data)	(u32)(((addr<<1)&0xFE)|((data>>8)&0x1))
+#define MAKE_WM8775_DATA(data)	(u32)(data&0xFF)
+
+#define WM8775_IC       0x0B
+#define WM8775_MMC      0x0C
+#define WM8775_AADCL    0x0E
+#define WM8775_AADCR    0x0F
+#define WM8775_ADCMC    0x15
+#define WM8775_RESET    0x17
+
+static int hw_is_adc_input_selected(struct hw *hw, enum ADCSRC type)
+{
+	u32 data;
+
+	data = hw_read_20kx(hw, GPIO_DATA);
+	switch (type) {
+	case ADC_MICIN:
+		data = (data & (0x1 << 14)) ? 1 : 0;
+		break;
+	case ADC_LINEIN:
+		data = (data & (0x1 << 14)) ? 0 : 1;
+		break;
+	default:
+		data = 0;
+	}
+	return data;
+}
+
+static int hw_adc_input_select(struct hw *hw, enum ADCSRC type)
+{
+	u32 data;
+
+	data = hw_read_20kx(hw, GPIO_DATA);
+	switch (type) {
+	case ADC_MICIN:
+		data |= (0x1 << 14);
+		hw_write_20kx(hw, GPIO_DATA, data);
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x101),
+				MAKE_WM8775_DATA(0x101)); /* Mic-in */
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xE7),
+				MAKE_WM8775_DATA(0xE7)); /* +12dB boost */
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xE7),
+				MAKE_WM8775_DATA(0xE7)); /* +12dB boost */
+		break;
+	case ADC_LINEIN:
+		data &= ~(0x1 << 14);
+		hw_write_20kx(hw, GPIO_DATA, data);
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x102),
+				MAKE_WM8775_DATA(0x102)); /* Line-in */
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xCF),
+				MAKE_WM8775_DATA(0xCF)); /* No boost */
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xCF),
+				MAKE_WM8775_DATA(0xCF)); /* No boost */
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int hw_adc_init(struct hw *hw, const struct adc_conf *info)
+{
+	int err;
+	u32 mux = 2, data, ctl;
+
+	/*  Set ADC reset bit as output */
+	data = hw_read_20kx(hw, GPIO_CTRL);
+	data |= (0x1 << 15);
+	hw_write_20kx(hw, GPIO_CTRL, data);
+
+	/* Initialize I2C */
+	err = hw20k2_i2c_init(hw, 0x1A, 1, 1);
+	if (err < 0) {
+		printk(KERN_ALERT "ctxfi: Failure to acquire I2C!!!\n");
+		goto error;
+	}
+
+	/* Make ADC in normal operation */
+	data = hw_read_20kx(hw, GPIO_DATA);
+	data &= ~(0x1 << 15);
+	mdelay(10);
+	data |= (0x1 << 15);
+	hw_write_20kx(hw, GPIO_DATA, data);
+	mdelay(50);
+
+	/* Set the master mode (256fs) */
+	if (1 == info->msr) {
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_MMC, 0x02),
+						MAKE_WM8775_DATA(0x02));
+	} else if (2 == info->msr) {
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_MMC, 0x0A),
+						MAKE_WM8775_DATA(0x0A));
+	} else {
+		printk(KERN_ALERT "ctxfi: Invalid master sampling "
+				  "rate (msr %d)!!!\n", info->msr);
+		err = -EINVAL;
+		goto error;
+	}
+
+	/* Configure GPIO bit 14 change to line-in/mic-in */
+	ctl = hw_read_20kx(hw, GPIO_CTRL);
+	ctl |= 0x1 << 14;
+	hw_write_20kx(hw, GPIO_CTRL, ctl);
+
+	/* Check using Mic-in or Line-in */
+	data = hw_read_20kx(hw, GPIO_DATA);
+
+	if (mux == 1) {
+		/* Configures GPIO data to select Mic-in */
+		data |= 0x1 << 14;
+		hw_write_20kx(hw, GPIO_DATA, data);
+
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x101),
+				MAKE_WM8775_DATA(0x101)); /* Mic-in */
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xE7),
+				MAKE_WM8775_DATA(0xE7)); /* +12dB boost */
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xE7),
+				MAKE_WM8775_DATA(0xE7)); /* +12dB boost */
+	} else if (mux == 2) {
+		/* Configures GPIO data to select Line-in */
+		data &= ~(0x1 << 14);
+		hw_write_20kx(hw, GPIO_DATA, data);
+
+		/* Setup ADC */
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x102),
+				MAKE_WM8775_DATA(0x102)); /* Line-in */
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xCF),
+				MAKE_WM8775_DATA(0xCF)); /* No boost */
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xCF),
+				MAKE_WM8775_DATA(0xCF)); /* No boost */
+	} else {
+		printk(KERN_ALERT "ctxfi: ERROR!!! Invalid input mux!!!\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	return 0;
+
+error:
+	hw20k2_i2c_uninit(hw);
+	return err;
+}
+
+static int hw_have_digit_io_switch(struct hw *hw)
+{
+	return 0;
+}
+
+static int hw_card_start(struct hw *hw)
+{
+	int err = 0;
+	struct pci_dev *pci = hw->pci;
+	unsigned int gctl;
+
+	err = pci_enable_device(pci);
+	if (err < 0)
+		return err;
+
+	/* Set DMA transfer mask */
+	if (pci_set_dma_mask(pci, CT_XFI_DMA_MASK) < 0 ||
+	    pci_set_consistent_dma_mask(pci, CT_XFI_DMA_MASK) < 0) {
+		printk(KERN_ERR "ctxfi: architecture does not support PCI "
+		"busmaster DMA with mask 0x%llx\n", CT_XFI_DMA_MASK);
+		err = -ENXIO;
+		goto error1;
+	}
+
+	err = pci_request_regions(pci, "XFi");
+	if (err < 0)
+		goto error1;
+
+	hw->io_base = pci_resource_start(hw->pci, 2);
+	hw->mem_base = (unsigned long)ioremap(hw->io_base,
+					pci_resource_len(hw->pci, 2));
+	if (NULL == (void *)hw->mem_base) {
+		err = -ENOENT;
+		goto error2;
+	}
+
+	/* Switch to 20k2 mode from UAA mode. */
+	gctl = hw_read_20kx(hw, GLOBAL_CNTL_GCTL);
+	set_field(&gctl, GCTL_UAA, 0);
+	hw_write_20kx(hw, GLOBAL_CNTL_GCTL, gctl);
+
+	/*if ((err = request_irq(pci->irq, ct_atc_interrupt, IRQF_SHARED,
+				atc->chip_details->nm_card, hw))) {
+		goto error3;
+	}
+	hw->irq = pci->irq;
+	*/
+
+	pci_set_master(pci);
+
+	return 0;
+
+/*error3:
+	iounmap((void *)hw->mem_base);
+	hw->mem_base = (unsigned long)NULL;*/
+error2:
+	pci_release_regions(pci);
+	hw->io_base = 0;
+error1:
+	pci_disable_device(pci);
+	return err;
+}
+
+static int hw_card_stop(struct hw *hw)
+{
+	/* TODO: Disable interrupt and so on... */
+	return 0;
+}
+
+static int hw_card_shutdown(struct hw *hw)
+{
+	if (hw->irq >= 0)
+		free_irq(hw->irq, hw);
+
+	hw->irq	= -1;
+
+	if (NULL != ((void *)hw->mem_base))
+		iounmap((void *)hw->mem_base);
+
+	hw->mem_base = (unsigned long)NULL;
+
+	if (hw->io_base)
+		pci_release_regions(hw->pci);
+
+	hw->io_base = 0;
+
+	pci_disable_device(hw->pci);
+
+	return 0;
+}
+
+static int hw_card_init(struct hw *hw, struct card_conf *info)
+{
+	int err;
+	unsigned int gctl;
+	u32 data = 0;
+	struct dac_conf dac_info = {0};
+	struct adc_conf adc_info = {0};
+	struct daio_conf daio_info = {0};
+	struct trn_conf trn_info = {0};
+
+	/* Get PCI io port/memory base address and
+	 * do 20kx core switch if needed. */
+	if (!hw->io_base) {
+		err = hw_card_start(hw);
+		if (err)
+			return err;
+	}
+
+	/* PLL init */
+	err = hw_pll_init(hw, info->rsr);
+	if (err < 0)
+		return err;
+
+	/* kick off auto-init */
+	err = hw_auto_init(hw);
+	if (err < 0)
+		return err;
+
+	gctl = hw_read_20kx(hw, GLOBAL_CNTL_GCTL);
+	set_field(&gctl, GCTL_DBP, 1);
+	set_field(&gctl, GCTL_TBP, 1);
+	set_field(&gctl, GCTL_FBP, 1);
+	set_field(&gctl, GCTL_DPC, 0);
+	hw_write_20kx(hw, GLOBAL_CNTL_GCTL, gctl);
+
+	/* Reset all global pending interrupts */
+	hw_write_20kx(hw, INTERRUPT_GIE, 0);
+	/* Reset all SRC pending interrupts */
+	hw_write_20kx(hw, SRC_IP, 0);
+
+	/* TODO: detect the card ID and configure GPIO accordingly. */
+	/* Configures GPIO (0xD802 0x98028) */
+	/*hw_write_20kx(hw, GPIO_CTRL, 0x7F07);*/
+	/* Configures GPIO (SB0880) */
+	/*hw_write_20kx(hw, GPIO_CTRL, 0xFF07);*/
+	hw_write_20kx(hw, GPIO_CTRL, 0xD802);
+
+	/* Enable audio ring */
+	hw_write_20kx(hw, MIXER_AR_ENABLE, 0x01);
+
+	trn_info.vm_pgt_phys = info->vm_pgt_phys;
+	err = hw_trn_init(hw, &trn_info);
+	if (err < 0)
+		return err;
+
+	daio_info.msr = info->msr;
+	err = hw_daio_init(hw, &daio_info);
+	if (err < 0)
+		return err;
+
+	dac_info.msr = info->msr;
+	err = hw_dac_init(hw, &dac_info);
+	if (err < 0)
+		return err;
+
+	adc_info.msr = info->msr;
+	adc_info.input = ADC_LINEIN;
+	adc_info.mic20db = 0;
+	err = hw_adc_init(hw, &adc_info);
+	if (err < 0)
+		return err;
+
+	data = hw_read_20kx(hw, SRC_MCTL);
+	data |= 0x1; /* Enables input from the audio ring */
+	hw_write_20kx(hw, SRC_MCTL, data);
+
+	return 0;
+}
+
+static u32 hw_read_20kx(struct hw *hw, u32 reg)
+{
+	return readl((void *)(hw->mem_base + reg));
+}
+
+static void hw_write_20kx(struct hw *hw, u32 reg, u32 data)
+{
+	writel(data, (void *)(hw->mem_base + reg));
+}
+
+static struct hw ct20k2_preset __devinitdata = {
+	.irq = -1,
+
+	.card_init = hw_card_init,
+	.card_stop = hw_card_stop,
+	.pll_init = hw_pll_init,
+	.is_adc_source_selected = hw_is_adc_input_selected,
+	.select_adc_source = hw_adc_input_select,
+	.have_digit_io_switch = hw_have_digit_io_switch,
+
+	.src_rsc_get_ctrl_blk = src_get_rsc_ctrl_blk,
+	.src_rsc_put_ctrl_blk = src_put_rsc_ctrl_blk,
+	.src_mgr_get_ctrl_blk = src_mgr_get_ctrl_blk,
+	.src_mgr_put_ctrl_blk = src_mgr_put_ctrl_blk,
+	.src_set_state = src_set_state,
+	.src_set_bm = src_set_bm,
+	.src_set_rsr = src_set_rsr,
+	.src_set_sf = src_set_sf,
+	.src_set_wr = src_set_wr,
+	.src_set_pm = src_set_pm,
+	.src_set_rom = src_set_rom,
+	.src_set_vo = src_set_vo,
+	.src_set_st = src_set_st,
+	.src_set_ie = src_set_ie,
+	.src_set_ilsz = src_set_ilsz,
+	.src_set_bp = src_set_bp,
+	.src_set_cisz = src_set_cisz,
+	.src_set_ca = src_set_ca,
+	.src_set_sa = src_set_sa,
+	.src_set_la = src_set_la,
+	.src_set_pitch = src_set_pitch,
+	.src_set_dirty = src_set_dirty,
+	.src_set_clear_zbufs = src_set_clear_zbufs,
+	.src_set_dirty_all = src_set_dirty_all,
+	.src_commit_write = src_commit_write,
+	.src_get_ca = src_get_ca,
+	.src_get_dirty = src_get_dirty,
+	.src_dirty_conj_mask = src_dirty_conj_mask,
+	.src_mgr_enbs_src = src_mgr_enbs_src,
+	.src_mgr_enb_src = src_mgr_enb_src,
+	.src_mgr_dsb_src = src_mgr_dsb_src,
+	.src_mgr_commit_write = src_mgr_commit_write,
+
+	.srcimp_mgr_get_ctrl_blk = srcimp_mgr_get_ctrl_blk,
+	.srcimp_mgr_put_ctrl_blk = srcimp_mgr_put_ctrl_blk,
+	.srcimp_mgr_set_imaparc = srcimp_mgr_set_imaparc,
+	.srcimp_mgr_set_imapuser = srcimp_mgr_set_imapuser,
+	.srcimp_mgr_set_imapnxt = srcimp_mgr_set_imapnxt,
+	.srcimp_mgr_set_imapaddr = srcimp_mgr_set_imapaddr,
+	.srcimp_mgr_commit_write = srcimp_mgr_commit_write,
+
+	.amixer_rsc_get_ctrl_blk = amixer_rsc_get_ctrl_blk,
+	.amixer_rsc_put_ctrl_blk = amixer_rsc_put_ctrl_blk,
+	.amixer_mgr_get_ctrl_blk = amixer_mgr_get_ctrl_blk,
+	.amixer_mgr_put_ctrl_blk = amixer_mgr_put_ctrl_blk,
+	.amixer_set_mode = amixer_set_mode,
+	.amixer_set_iv = amixer_set_iv,
+	.amixer_set_x = amixer_set_x,
+	.amixer_set_y = amixer_set_y,
+	.amixer_set_sadr = amixer_set_sadr,
+	.amixer_set_se = amixer_set_se,
+	.amixer_set_dirty = amixer_set_dirty,
+	.amixer_set_dirty_all = amixer_set_dirty_all,
+	.amixer_commit_write = amixer_commit_write,
+	.amixer_get_y = amixer_get_y,
+	.amixer_get_dirty = amixer_get_dirty,
+
+	.dai_get_ctrl_blk = dai_get_ctrl_blk,
+	.dai_put_ctrl_blk = dai_put_ctrl_blk,
+	.dai_srt_set_srco = dai_srt_set_srco,
+	.dai_srt_set_srcm = dai_srt_set_srcm,
+	.dai_srt_set_rsr = dai_srt_set_rsr,
+	.dai_srt_set_drat = dai_srt_set_drat,
+	.dai_srt_set_ec = dai_srt_set_ec,
+	.dai_srt_set_et = dai_srt_set_et,
+	.dai_commit_write = dai_commit_write,
+
+	.dao_get_ctrl_blk = dao_get_ctrl_blk,
+	.dao_put_ctrl_blk = dao_put_ctrl_blk,
+	.dao_set_spos = dao_set_spos,
+	.dao_commit_write = dao_commit_write,
+	.dao_get_spos = dao_get_spos,
+
+	.daio_mgr_get_ctrl_blk = daio_mgr_get_ctrl_blk,
+	.daio_mgr_put_ctrl_blk = daio_mgr_put_ctrl_blk,
+	.daio_mgr_enb_dai = daio_mgr_enb_dai,
+	.daio_mgr_dsb_dai = daio_mgr_dsb_dai,
+	.daio_mgr_enb_dao = daio_mgr_enb_dao,
+	.daio_mgr_dsb_dao = daio_mgr_dsb_dao,
+	.daio_mgr_dao_init = daio_mgr_dao_init,
+	.daio_mgr_set_imaparc = daio_mgr_set_imaparc,
+	.daio_mgr_set_imapnxt = daio_mgr_set_imapnxt,
+	.daio_mgr_set_imapaddr = daio_mgr_set_imapaddr,
+	.daio_mgr_commit_write = daio_mgr_commit_write,
+};
+
+int __devinit create_20k2_hw_obj(struct hw **rhw)
+{
+	struct hw20k2 *hw20k2;
+
+	*rhw = NULL;
+	hw20k2 = kzalloc(sizeof(*hw20k2), GFP_KERNEL);
+	if (!hw20k2)
+		return -ENOMEM;
+
+	hw20k2->hw = ct20k2_preset;
+	*rhw = &hw20k2->hw;
+
+	return 0;
+}
+
+int destroy_20k2_hw_obj(struct hw *hw)
+{
+	if (hw->io_base)
+		hw_card_shutdown(hw);
+
+	kfree(hw);
+	return 0;
+}
diff --git a/sound/pci/ctxfi/cthw20k2.h b/sound/pci/ctxfi/cthw20k2.h
new file mode 100644
index 0000000..d2b7daa
--- /dev/null
+++ b/sound/pci/ctxfi/cthw20k2.h
@@ -0,0 +1,26 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	cthw20k2.h
+ *
+ * @Brief
+ * This file contains the definition of hardware access methord.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 13 2008
+ *
+ */
+
+#ifndef CTHW20K2_H
+#define CTHW20K2_H
+
+#include "cthardware.h"
+
+int create_20k2_hw_obj(struct hw **rhw);
+int destroy_20k2_hw_obj(struct hw *hw);
+
+#endif /* CTHW20K2_H */
diff --git a/sound/pci/ctxfi/ctimap.c b/sound/pci/ctxfi/ctimap.c
new file mode 100644
index 0000000..0b73368
--- /dev/null
+++ b/sound/pci/ctxfi/ctimap.c
@@ -0,0 +1,112 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctimap.c
+ *
+ * @Brief
+ * This file contains the implementation of generic input mapper operations
+ * for input mapper management.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 23 2008
+ *
+ */
+
+#include "ctimap.h"
+#include <linux/slab.h>
+
+int input_mapper_add(struct list_head *mappers, struct imapper *entry,
+		     int (*map_op)(void *, struct imapper *), void *data)
+{
+	struct list_head *pos, *pre, *head;
+	struct imapper *pre_ent, *pos_ent;
+
+	head = mappers;
+
+	if (list_empty(head)) {
+		entry->next = entry->addr;
+		map_op(data, entry);
+		list_add(&entry->list, head);
+		return 0;
+	}
+
+	list_for_each(pos, head) {
+		pos_ent = list_entry(pos, struct imapper, list);
+		if (pos_ent->slot > entry->slot) {
+			/* found a position in list */
+			break;
+		}
+	}
+
+	if (pos != head) {
+		pre = pos->prev;
+		if (pre == head)
+			pre = head->prev;
+
+		__list_add(&entry->list, pos->prev, pos);
+	} else {
+		pre = head->prev;
+		pos = head->next;
+		list_add_tail(&entry->list, head);
+	}
+
+	pre_ent = list_entry(pre, struct imapper, list);
+	pos_ent = list_entry(pos, struct imapper, list);
+
+	entry->next = pos_ent->addr;
+	map_op(data, entry);
+	pre_ent->next = entry->addr;
+	map_op(data, pre_ent);
+
+	return 0;
+}
+
+int input_mapper_delete(struct list_head *mappers, struct imapper *entry,
+		     int (*map_op)(void *, struct imapper *), void *data)
+{
+	struct list_head *next, *pre, *head;
+	struct imapper *pre_ent, *next_ent;
+
+	head = mappers;
+
+	if (list_empty(head))
+		return 0;
+
+	pre = (entry->list.prev == head) ? head->prev : entry->list.prev;
+	next = (entry->list.next == head) ? head->next : entry->list.next;
+
+	if (pre == &entry->list) {
+		/* entry is the only one node in mappers list */
+		entry->next = entry->addr = entry->user = entry->slot = 0;
+		map_op(data, entry);
+		list_del(&entry->list);
+		return 0;
+	}
+
+	pre_ent = list_entry(pre, struct imapper, list);
+	next_ent = list_entry(next, struct imapper, list);
+
+	pre_ent->next = next_ent->addr;
+	map_op(data, pre_ent);
+	list_del(&entry->list);
+
+	return 0;
+}
+
+void free_input_mapper_list(struct list_head *head)
+{
+	struct imapper *entry;
+	struct list_head *pos;
+
+	while (!list_empty(head)) {
+		pos = head->next;
+		list_del(pos);
+		entry = list_entry(pos, struct imapper, list);
+		kfree(entry);
+	}
+}
+
diff --git a/sound/pci/ctxfi/ctimap.h b/sound/pci/ctxfi/ctimap.h
new file mode 100644
index 0000000..53ccf9b
--- /dev/null
+++ b/sound/pci/ctxfi/ctimap.h
@@ -0,0 +1,40 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctimap.h
+ *
+ * @Brief
+ * This file contains the definition of generic input mapper operations
+ * for input mapper management.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 23 2008
+ *
+ */
+
+#ifndef CTIMAP_H
+#define CTIMAP_H
+
+#include <linux/list.h>
+
+struct imapper {
+	unsigned short slot; /* the id of the slot containing input data */
+	unsigned short user; /* the id of the user resource consuming data */
+	unsigned short addr; /* the input mapper ram id */
+	unsigned short next; /* the next input mapper ram id */
+	struct list_head	list;
+};
+
+int input_mapper_add(struct list_head *mappers, struct imapper *entry,
+		     int (*map_op)(void *, struct imapper *), void *data);
+
+int input_mapper_delete(struct list_head *mappers, struct imapper *entry,
+		     int (*map_op)(void *, struct imapper *), void *data);
+
+void free_input_mapper_list(struct list_head *mappers);
+
+#endif /* CTIMAP_H */
diff --git a/sound/pci/ctxfi/ctmixer.c b/sound/pci/ctxfi/ctmixer.c
new file mode 100644
index 0000000..666722d
--- /dev/null
+++ b/sound/pci/ctxfi/ctmixer.c
@@ -0,0 +1,1123 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctmixer.c
+ *
+ * @Brief
+ * This file contains the implementation of alsa mixer device functions.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 28 2008
+ *
+ */
+
+
+#include "ctmixer.h"
+#include "ctamixer.h"
+#include <linux/slab.h>
+#include <sound/core.h>
+#include <sound/control.h>
+#include <sound/asoundef.h>
+#include <sound/pcm.h>
+#include <sound/tlv.h>
+
+enum CT_SUM_CTL {
+	SUM_IN_F,
+	SUM_IN_R,
+	SUM_IN_C,
+	SUM_IN_S,
+	SUM_IN_F_C,
+
+	NUM_CT_SUMS
+};
+
+enum CT_AMIXER_CTL {
+	/* volume control mixers */
+	AMIXER_MASTER_F,
+	AMIXER_MASTER_R,
+	AMIXER_MASTER_C,
+	AMIXER_MASTER_S,
+	AMIXER_PCM_F,
+	AMIXER_PCM_R,
+	AMIXER_PCM_C,
+	AMIXER_PCM_S,
+	AMIXER_SPDIFI,
+	AMIXER_LINEIN,
+	AMIXER_MIC,
+	AMIXER_SPDIFO,
+	AMIXER_WAVE_F,
+	AMIXER_WAVE_R,
+	AMIXER_WAVE_C,
+	AMIXER_WAVE_S,
+	AMIXER_MASTER_F_C,
+	AMIXER_PCM_F_C,
+	AMIXER_SPDIFI_C,
+	AMIXER_LINEIN_C,
+	AMIXER_MIC_C,
+
+	/* this should always be the last one */
+	NUM_CT_AMIXERS
+};
+
+enum CTALSA_MIXER_CTL {
+	/* volume control mixers */
+	MIXER_MASTER_P,
+	MIXER_PCM_P,
+	MIXER_LINEIN_P,
+	MIXER_MIC_P,
+	MIXER_SPDIFI_P,
+	MIXER_SPDIFO_P,
+	MIXER_WAVEF_P,
+	MIXER_WAVER_P,
+	MIXER_WAVEC_P,
+	MIXER_WAVES_P,
+	MIXER_MASTER_C,
+	MIXER_PCM_C,
+	MIXER_LINEIN_C,
+	MIXER_MIC_C,
+	MIXER_SPDIFI_C,
+
+	/* switch control mixers */
+	MIXER_PCM_C_S,
+	MIXER_LINEIN_C_S,
+	MIXER_MIC_C_S,
+	MIXER_SPDIFI_C_S,
+	MIXER_LINEIN_P_S,
+	MIXER_SPDIFO_P_S,
+	MIXER_SPDIFI_P_S,
+	MIXER_WAVEF_P_S,
+	MIXER_WAVER_P_S,
+	MIXER_WAVEC_P_S,
+	MIXER_WAVES_P_S,
+	MIXER_DIGITAL_IO_S,
+	MIXER_IEC958_MASK,
+	MIXER_IEC958_DEFAULT,
+	MIXER_IEC958_STREAM,
+
+	/* this should always be the last one */
+	NUM_CTALSA_MIXERS
+};
+
+#define VOL_MIXER_START		MIXER_MASTER_P
+#define VOL_MIXER_END		MIXER_SPDIFI_C
+#define VOL_MIXER_NUM		(VOL_MIXER_END - VOL_MIXER_START + 1)
+#define SWH_MIXER_START		MIXER_PCM_C_S
+#define SWH_MIXER_END		MIXER_DIGITAL_IO_S
+#define SWH_CAPTURE_START	MIXER_PCM_C_S
+#define SWH_CAPTURE_END		MIXER_SPDIFI_C_S
+
+#define CHN_NUM		2
+
+struct ct_kcontrol_init {
+	unsigned char ctl;
+	char *name;
+};
+
+static struct ct_kcontrol_init
+ct_kcontrol_init_table[NUM_CTALSA_MIXERS] = {
+	[MIXER_MASTER_P] = {
+		.ctl = 1,
+		.name = "Master Playback Volume",
+	},
+	[MIXER_MASTER_C] = {
+		.ctl = 1,
+		.name = "Master Capture Volume",
+	},
+	[MIXER_PCM_P] = {
+		.ctl = 1,
+		.name = "PCM Playback Volume",
+	},
+	[MIXER_PCM_C] = {
+		.ctl = 1,
+		.name = "PCM Capture Volume",
+	},
+	[MIXER_LINEIN_P] = {
+		.ctl = 1,
+		.name = "Line-in Playback Volume",
+	},
+	[MIXER_LINEIN_C] = {
+		.ctl = 1,
+		.name = "Line-in Capture Volume",
+	},
+	[MIXER_MIC_P] = {
+		.ctl = 1,
+		.name = "Mic Playback Volume",
+	},
+	[MIXER_MIC_C] = {
+		.ctl = 1,
+		.name = "Mic Capture Volume",
+	},
+	[MIXER_SPDIFI_P] = {
+		.ctl = 1,
+		.name = "S/PDIF-in Playback Volume",
+	},
+	[MIXER_SPDIFI_C] = {
+		.ctl = 1,
+		.name = "S/PDIF-in Capture Volume",
+	},
+	[MIXER_SPDIFO_P] = {
+		.ctl = 1,
+		.name = "S/PDIF-out Playback Volume",
+	},
+	[MIXER_WAVEF_P] = {
+		.ctl = 1,
+		.name = "Front Playback Volume",
+	},
+	[MIXER_WAVES_P] = {
+		.ctl = 1,
+		.name = "Side Playback Volume",
+	},
+	[MIXER_WAVEC_P] = {
+		.ctl = 1,
+		.name = "Center/LFE Playback Volume",
+	},
+	[MIXER_WAVER_P] = {
+		.ctl = 1,
+		.name = "Surround Playback Volume",
+	},
+
+	[MIXER_PCM_C_S] = {
+		.ctl = 1,
+		.name = "PCM Capture Switch",
+	},
+	[MIXER_LINEIN_C_S] = {
+		.ctl = 1,
+		.name = "Line-in Capture Switch",
+	},
+	[MIXER_MIC_C_S] = {
+		.ctl = 1,
+		.name = "Mic Capture Switch",
+	},
+	[MIXER_SPDIFI_C_S] = {
+		.ctl = 1,
+		.name = "S/PDIF-in Capture Switch",
+	},
+	[MIXER_LINEIN_P_S] = {
+		.ctl = 1,
+		.name = "Line-in Playback Switch",
+	},
+	[MIXER_SPDIFO_P_S] = {
+		.ctl = 1,
+		.name = "S/PDIF-out Playback Switch",
+	},
+	[MIXER_SPDIFI_P_S] = {
+		.ctl = 1,
+		.name = "S/PDIF-in Playback Switch",
+	},
+	[MIXER_WAVEF_P_S] = {
+		.ctl = 1,
+		.name = "Front Playback Switch",
+	},
+	[MIXER_WAVES_P_S] = {
+		.ctl = 1,
+		.name = "Side Playback Switch",
+	},
+	[MIXER_WAVEC_P_S] = {
+		.ctl = 1,
+		.name = "Center/LFE Playback Switch",
+	},
+	[MIXER_WAVER_P_S] = {
+		.ctl = 1,
+		.name = "Surround Playback Switch",
+	},
+	[MIXER_DIGITAL_IO_S] = {
+		.ctl = 0,
+		.name = "Digit-IO Playback Switch",
+	},
+};
+
+static void
+ct_mixer_recording_select(struct ct_mixer *mixer, enum CT_AMIXER_CTL type);
+
+static void
+ct_mixer_recording_unselect(struct ct_mixer *mixer, enum CT_AMIXER_CTL type);
+
+static struct snd_kcontrol *kctls[2] = {NULL};
+
+static enum CT_AMIXER_CTL get_amixer_index(enum CTALSA_MIXER_CTL alsa_index)
+{
+	switch (alsa_index) {
+	case MIXER_MASTER_P:	return AMIXER_MASTER_F;
+	case MIXER_MASTER_C:	return AMIXER_MASTER_F_C;
+	case MIXER_PCM_P:	return AMIXER_PCM_F;
+	case MIXER_PCM_C:
+	case MIXER_PCM_C_S:	return AMIXER_PCM_F_C;
+	case MIXER_LINEIN_P:	return AMIXER_LINEIN;
+	case MIXER_LINEIN_C:
+	case MIXER_LINEIN_C_S:	return AMIXER_LINEIN_C;
+	case MIXER_MIC_P:	return AMIXER_MIC;
+	case MIXER_MIC_C:
+	case MIXER_MIC_C_S:	return AMIXER_MIC_C;
+	case MIXER_SPDIFI_P:	return AMIXER_SPDIFI;
+	case MIXER_SPDIFI_C:
+	case MIXER_SPDIFI_C_S:	return AMIXER_SPDIFI_C;
+	case MIXER_SPDIFO_P:	return AMIXER_SPDIFO;
+	case MIXER_WAVEF_P:	return AMIXER_WAVE_F;
+	case MIXER_WAVES_P:	return AMIXER_WAVE_S;
+	case MIXER_WAVEC_P:	return AMIXER_WAVE_C;
+	case MIXER_WAVER_P:	return AMIXER_WAVE_R;
+	default:		return NUM_CT_AMIXERS;
+	}
+}
+
+static enum CT_AMIXER_CTL get_recording_amixer(enum CT_AMIXER_CTL index)
+{
+	switch (index) {
+	case AMIXER_MASTER_F:	return AMIXER_MASTER_F_C;
+	case AMIXER_PCM_F:	return AMIXER_PCM_F_C;
+	case AMIXER_SPDIFI:	return AMIXER_SPDIFI_C;
+	case AMIXER_LINEIN:	return AMIXER_LINEIN_C;
+	case AMIXER_MIC:	return AMIXER_MIC_C;
+	default:		return NUM_CT_AMIXERS;
+	}
+}
+
+static unsigned char
+get_switch_state(struct ct_mixer *mixer, enum CTALSA_MIXER_CTL type)
+{
+	return (mixer->switch_state & (0x1 << (type - SWH_MIXER_START)))
+		? 1 : 0;
+}
+
+static void
+set_switch_state(struct ct_mixer *mixer,
+		 enum CTALSA_MIXER_CTL type, unsigned char state)
+{
+	if (state)
+		mixer->switch_state |= (0x1 << (type - SWH_MIXER_START));
+	else
+		mixer->switch_state &= ~(0x1 << (type - SWH_MIXER_START));
+}
+
+#if 0 /* not used */
+/* Map integer value ranging from 0 to 65535 to 14-bit float value ranging
+ * from 2^-6 to (1+1023/1024) */
+static unsigned int uint16_to_float14(unsigned int x)
+{
+	unsigned int i;
+
+	if (x < 17)
+		return 0;
+
+	x *= 2031;
+	x /= 65535;
+	x += 16;
+
+	/* i <= 6 */
+	for (i = 0; !(x & 0x400); i++)
+		x <<= 1;
+
+	x = (((7 - i) & 0x7) << 10) | (x & 0x3ff);
+
+	return x;
+}
+
+static unsigned int float14_to_uint16(unsigned int x)
+{
+	unsigned int e;
+
+	if (!x)
+		return x;
+
+	e = (x >> 10) & 0x7;
+	x &= 0x3ff;
+	x += 1024;
+	x >>= (7 - e);
+	x -= 16;
+	x *= 65535;
+	x /= 2031;
+
+	return x;
+}
+#endif /* not used */
+
+#define VOL_SCALE	0x1c
+#define VOL_MAX		0x100
+
+static const DECLARE_TLV_DB_SCALE(ct_vol_db_scale, -6400, 25, 1);
+
+static int ct_alsa_mix_volume_info(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+	uinfo->count = 2;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = VOL_MAX;
+
+	return 0;
+}
+
+static int ct_alsa_mix_volume_get(struct snd_kcontrol *kcontrol,
+				  struct snd_ctl_elem_value *ucontrol)
+{
+	struct ct_atc *atc = snd_kcontrol_chip(kcontrol);
+	enum CT_AMIXER_CTL type = get_amixer_index(kcontrol->private_value);
+	struct amixer *amixer;
+	int i, val;
+
+	for (i = 0; i < 2; i++) {
+		amixer = ((struct ct_mixer *)atc->mixer)->
+						amixers[type*CHN_NUM+i];
+		val = amixer->ops->get_scale(amixer) / VOL_SCALE;
+		if (val < 0)
+			val = 0;
+		else if (val > VOL_MAX)
+			val = VOL_MAX;
+		ucontrol->value.integer.value[i] = val;
+	}
+
+	return 0;
+}
+
+static int ct_alsa_mix_volume_put(struct snd_kcontrol *kcontrol,
+				  struct snd_ctl_elem_value *ucontrol)
+{
+	struct ct_atc *atc = snd_kcontrol_chip(kcontrol);
+	struct ct_mixer *mixer = atc->mixer;
+	enum CT_AMIXER_CTL type = get_amixer_index(kcontrol->private_value);
+	struct amixer *amixer;
+	int i, j, val, oval, change = 0;
+
+	for (i = 0; i < 2; i++) {
+		val = ucontrol->value.integer.value[i];
+		if (val < 0)
+			val = 0;
+		else if (val > VOL_MAX)
+			val = VOL_MAX;
+		val *= VOL_SCALE;
+		amixer = mixer->amixers[type*CHN_NUM+i];
+		oval = amixer->ops->get_scale(amixer);
+		if (val != oval) {
+			amixer->ops->set_scale(amixer, val);
+			amixer->ops->commit_write(amixer);
+			change = 1;
+			/* Synchronize Master/PCM playback AMIXERs. */
+			if (AMIXER_MASTER_F == type || AMIXER_PCM_F == type) {
+				for (j = 1; j < 4; j++) {
+					amixer = mixer->
+						amixers[(type+j)*CHN_NUM+i];
+					amixer->ops->set_scale(amixer, val);
+					amixer->ops->commit_write(amixer);
+				}
+			}
+		}
+	}
+
+	return change;
+}
+
+static struct snd_kcontrol_new vol_ctl = {
+	.access		= SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			  SNDRV_CTL_ELEM_ACCESS_TLV_READ,
+	.iface		= SNDRV_CTL_ELEM_IFACE_MIXER,
+	.info		= ct_alsa_mix_volume_info,
+	.get		= ct_alsa_mix_volume_get,
+	.put		= ct_alsa_mix_volume_put,
+	.tlv		= { .p =  ct_vol_db_scale },
+};
+
+static void
+do_line_mic_switch(struct ct_atc *atc, enum CTALSA_MIXER_CTL type)
+{
+
+	if (MIXER_LINEIN_C_S == type) {
+		atc->select_line_in(atc);
+		set_switch_state(atc->mixer, MIXER_MIC_C_S, 0);
+		snd_ctl_notify(atc->card, SNDRV_CTL_EVENT_MASK_VALUE,
+							&kctls[1]->id);
+	} else if (MIXER_MIC_C_S == type) {
+		atc->select_mic_in(atc);
+		set_switch_state(atc->mixer, MIXER_LINEIN_C_S, 0);
+		snd_ctl_notify(atc->card, SNDRV_CTL_EVENT_MASK_VALUE,
+							&kctls[0]->id);
+	}
+}
+
+static void
+do_digit_io_switch(struct ct_atc *atc, int state)
+{
+	struct ct_mixer *mixer = atc->mixer;
+
+	if (state) {
+		atc->select_digit_io(atc);
+		atc->spdif_out_unmute(atc,
+				get_switch_state(mixer, MIXER_SPDIFO_P_S));
+		atc->spdif_in_unmute(atc, 1);
+		atc->line_in_unmute(atc, 0);
+		return;
+	}
+
+	if (get_switch_state(mixer, MIXER_LINEIN_C_S))
+		atc->select_line_in(atc);
+	else if (get_switch_state(mixer, MIXER_MIC_C_S))
+		atc->select_mic_in(atc);
+
+	atc->spdif_out_unmute(atc, 0);
+	atc->spdif_in_unmute(atc, 0);
+	atc->line_in_unmute(atc, 1);
+	return;
+}
+
+static int ct_alsa_mix_switch_info(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 1;
+	uinfo->value.integer.step = 1;
+
+	return 0;
+}
+
+static int ct_alsa_mix_switch_get(struct snd_kcontrol *kcontrol,
+				  struct snd_ctl_elem_value *ucontrol)
+{
+	struct ct_mixer *mixer =
+		((struct ct_atc *)snd_kcontrol_chip(kcontrol))->mixer;
+	enum CTALSA_MIXER_CTL type = kcontrol->private_value;
+
+	ucontrol->value.integer.value[0] = get_switch_state(mixer, type);
+	return 0;
+}
+
+static int ct_alsa_mix_switch_put(struct snd_kcontrol *kcontrol,
+				  struct snd_ctl_elem_value *ucontrol)
+{
+	struct ct_atc *atc = snd_kcontrol_chip(kcontrol);
+	struct ct_mixer *mixer = atc->mixer;
+	enum CTALSA_MIXER_CTL type = kcontrol->private_value;
+	int state;
+
+	state = ucontrol->value.integer.value[0];
+	if (get_switch_state(mixer, type) == state)
+		return 0;
+
+	set_switch_state(mixer, type, state);
+	/* Do changes in mixer. */
+	if ((SWH_CAPTURE_START <= type) && (SWH_CAPTURE_END >= type)) {
+		if (state) {
+			ct_mixer_recording_select(mixer,
+						  get_amixer_index(type));
+		} else {
+			ct_mixer_recording_unselect(mixer,
+						    get_amixer_index(type));
+		}
+	}
+	/* Do changes out of mixer. */
+	if (state && (MIXER_LINEIN_C_S == type || MIXER_MIC_C_S == type))
+		do_line_mic_switch(atc, type);
+	else if (MIXER_WAVEF_P_S == type)
+		atc->line_front_unmute(atc, state);
+	else if (MIXER_WAVES_P_S == type)
+		atc->line_surround_unmute(atc, state);
+	else if (MIXER_WAVEC_P_S == type)
+		atc->line_clfe_unmute(atc, state);
+	else if (MIXER_WAVER_P_S == type)
+		atc->line_rear_unmute(atc, state);
+	else if (MIXER_LINEIN_P_S == type)
+		atc->line_in_unmute(atc, state);
+	else if (MIXER_SPDIFO_P_S == type)
+		atc->spdif_out_unmute(atc, state);
+	else if (MIXER_SPDIFI_P_S == type)
+		atc->spdif_in_unmute(atc, state);
+	else if (MIXER_DIGITAL_IO_S == type)
+		do_digit_io_switch(atc, state);
+
+	return 1;
+}
+
+static struct snd_kcontrol_new swh_ctl = {
+	.access		= SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.iface		= SNDRV_CTL_ELEM_IFACE_MIXER,
+	.info		= ct_alsa_mix_switch_info,
+	.get		= ct_alsa_mix_switch_get,
+	.put		= ct_alsa_mix_switch_put
+};
+
+static int ct_spdif_info(struct snd_kcontrol *kcontrol,
+			 struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_IEC958;
+	uinfo->count = 1;
+	return 0;
+}
+
+static int ct_spdif_get_mask(struct snd_kcontrol *kcontrol,
+			     struct snd_ctl_elem_value *ucontrol)
+{
+	ucontrol->value.iec958.status[0] = 0xff;
+	ucontrol->value.iec958.status[1] = 0xff;
+	ucontrol->value.iec958.status[2] = 0xff;
+	ucontrol->value.iec958.status[3] = 0xff;
+	return 0;
+}
+
+static int ct_spdif_default_get(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	unsigned int status = SNDRV_PCM_DEFAULT_CON_SPDIF;
+
+	ucontrol->value.iec958.status[0] = (status >> 0) & 0xff;
+	ucontrol->value.iec958.status[1] = (status >> 8) & 0xff;
+	ucontrol->value.iec958.status[2] = (status >> 16) & 0xff;
+	ucontrol->value.iec958.status[3] = (status >> 24) & 0xff;
+
+	return 0;
+}
+
+static int ct_spdif_get(struct snd_kcontrol *kcontrol,
+			struct snd_ctl_elem_value *ucontrol)
+{
+	struct ct_atc *atc = snd_kcontrol_chip(kcontrol);
+	unsigned int status;
+
+	atc->spdif_out_get_status(atc, &status);
+	ucontrol->value.iec958.status[0] = (status >> 0) & 0xff;
+	ucontrol->value.iec958.status[1] = (status >> 8) & 0xff;
+	ucontrol->value.iec958.status[2] = (status >> 16) & 0xff;
+	ucontrol->value.iec958.status[3] = (status >> 24) & 0xff;
+
+	return 0;
+}
+
+static int ct_spdif_put(struct snd_kcontrol *kcontrol,
+			struct snd_ctl_elem_value *ucontrol)
+{
+	struct ct_atc *atc = snd_kcontrol_chip(kcontrol);
+	int change;
+	unsigned int status, old_status;
+
+	status = (ucontrol->value.iec958.status[0] << 0) |
+		 (ucontrol->value.iec958.status[1] << 8) |
+		 (ucontrol->value.iec958.status[2] << 16) |
+		 (ucontrol->value.iec958.status[3] << 24);
+
+	atc->spdif_out_get_status(atc, &old_status);
+	change = (old_status != status);
+	if (change)
+		atc->spdif_out_set_status(atc, status);
+
+	return change;
+}
+
+static struct snd_kcontrol_new iec958_mask_ctl = {
+	.access		= SNDRV_CTL_ELEM_ACCESS_READ,
+	.iface		= SNDRV_CTL_ELEM_IFACE_PCM,
+	.name		= SNDRV_CTL_NAME_IEC958("", PLAYBACK, MASK),
+	.count		= 1,
+	.info		= ct_spdif_info,
+	.get		= ct_spdif_get_mask,
+	.private_value	= MIXER_IEC958_MASK
+};
+
+static struct snd_kcontrol_new iec958_default_ctl = {
+	.iface		= SNDRV_CTL_ELEM_IFACE_PCM,
+	.name		= SNDRV_CTL_NAME_IEC958("", PLAYBACK, DEFAULT),
+	.count		= 1,
+	.info		= ct_spdif_info,
+	.get		= ct_spdif_default_get,
+	.put		= ct_spdif_put,
+	.private_value	= MIXER_IEC958_DEFAULT
+};
+
+static struct snd_kcontrol_new iec958_ctl = {
+	.access		= SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.iface		= SNDRV_CTL_ELEM_IFACE_PCM,
+	.name		= SNDRV_CTL_NAME_IEC958("", PLAYBACK, PCM_STREAM),
+	.count		= 1,
+	.info		= ct_spdif_info,
+	.get		= ct_spdif_get,
+	.put		= ct_spdif_put,
+	.private_value	= MIXER_IEC958_STREAM
+};
+
+#define NUM_IEC958_CTL 3
+
+static int
+ct_mixer_kcontrol_new(struct ct_mixer *mixer, struct snd_kcontrol_new *new)
+{
+	struct snd_kcontrol *kctl;
+	int err;
+
+	kctl = snd_ctl_new1(new, mixer->atc);
+	if (NULL == kctl)
+		return -ENOMEM;
+
+	if (SNDRV_CTL_ELEM_IFACE_PCM == kctl->id.iface)
+		kctl->id.device = IEC958;
+
+	err = snd_ctl_add(mixer->atc->card, kctl);
+	if (err)
+		return err;
+
+	switch (new->private_value) {
+	case MIXER_LINEIN_C_S:
+		kctls[0] = kctl; break;
+	case MIXER_MIC_C_S:
+		kctls[1] = kctl; break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int ct_mixer_kcontrols_create(struct ct_mixer *mixer)
+{
+	enum CTALSA_MIXER_CTL type;
+	struct ct_atc *atc = mixer->atc;
+	int err;
+
+	/* Create snd kcontrol instances on demand */
+	for (type = VOL_MIXER_START; type <= VOL_MIXER_END; type++) {
+		if (ct_kcontrol_init_table[type].ctl) {
+			vol_ctl.name = ct_kcontrol_init_table[type].name;
+			vol_ctl.private_value = (unsigned long)type;
+			err = ct_mixer_kcontrol_new(mixer, &vol_ctl);
+			if (err)
+				return err;
+		}
+	}
+
+	ct_kcontrol_init_table[MIXER_DIGITAL_IO_S].ctl =
+					atc->have_digit_io_switch(atc);
+	for (type = SWH_MIXER_START; type <= SWH_MIXER_END; type++) {
+		if (ct_kcontrol_init_table[type].ctl) {
+			swh_ctl.name = ct_kcontrol_init_table[type].name;
+			swh_ctl.private_value = (unsigned long)type;
+			err = ct_mixer_kcontrol_new(mixer, &swh_ctl);
+			if (err)
+				return err;
+		}
+	}
+
+	err = ct_mixer_kcontrol_new(mixer, &iec958_mask_ctl);
+	if (err)
+		return err;
+
+	err = ct_mixer_kcontrol_new(mixer, &iec958_default_ctl);
+	if (err)
+		return err;
+
+	err = ct_mixer_kcontrol_new(mixer, &iec958_ctl);
+	if (err)
+		return err;
+
+	atc->line_front_unmute(atc, 1);
+	set_switch_state(mixer, MIXER_WAVEF_P_S, 1);
+	atc->line_surround_unmute(atc, 0);
+	set_switch_state(mixer, MIXER_WAVES_P_S, 0);
+	atc->line_clfe_unmute(atc, 0);
+	set_switch_state(mixer, MIXER_WAVEC_P_S, 0);
+	atc->line_rear_unmute(atc, 0);
+	set_switch_state(mixer, MIXER_WAVER_P_S, 0);
+	atc->spdif_out_unmute(atc, 0);
+	set_switch_state(mixer, MIXER_SPDIFO_P_S, 0);
+	atc->line_in_unmute(atc, 0);
+	set_switch_state(mixer, MIXER_LINEIN_P_S, 0);
+	atc->spdif_in_unmute(atc, 0);
+	set_switch_state(mixer, MIXER_SPDIFI_P_S, 0);
+
+	set_switch_state(mixer, MIXER_PCM_C_S, 1);
+	set_switch_state(mixer, MIXER_LINEIN_C_S, 1);
+	set_switch_state(mixer, MIXER_SPDIFI_C_S, 1);
+
+	return 0;
+}
+
+static void
+ct_mixer_recording_select(struct ct_mixer *mixer, enum CT_AMIXER_CTL type)
+{
+	struct amixer *amix_d;
+	struct sum *sum_c;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		amix_d = mixer->amixers[type*CHN_NUM+i];
+		sum_c = mixer->sums[SUM_IN_F_C*CHN_NUM+i];
+		amix_d->ops->set_sum(amix_d, sum_c);
+		amix_d->ops->commit_write(amix_d);
+	}
+}
+
+static void
+ct_mixer_recording_unselect(struct ct_mixer *mixer, enum CT_AMIXER_CTL type)
+{
+	struct amixer *amix_d;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		amix_d = mixer->amixers[type*CHN_NUM+i];
+		amix_d->ops->set_sum(amix_d, NULL);
+		amix_d->ops->commit_write(amix_d);
+	}
+}
+
+static int ct_mixer_get_resources(struct ct_mixer *mixer)
+{
+	struct sum_mgr *sum_mgr;
+	struct sum *sum;
+	struct sum_desc sum_desc = {0};
+	struct amixer_mgr *amixer_mgr;
+	struct amixer *amixer;
+	struct amixer_desc am_desc = {0};
+	int err;
+	int i;
+
+	/* Allocate sum resources for mixer obj */
+	sum_mgr = (struct sum_mgr *)mixer->atc->rsc_mgrs[SUM];
+	sum_desc.msr = mixer->atc->msr;
+	for (i = 0; i < (NUM_CT_SUMS * CHN_NUM); i++) {
+		err = sum_mgr->get_sum(sum_mgr, &sum_desc, &sum);
+		if (err) {
+			printk(KERN_ERR "ctxfi:Failed to get sum resources for "
+					  "front output!\n");
+			break;
+		}
+		mixer->sums[i] = sum;
+	}
+	if (err)
+		goto error1;
+
+	/* Allocate amixer resources for mixer obj */
+	amixer_mgr = (struct amixer_mgr *)mixer->atc->rsc_mgrs[AMIXER];
+	am_desc.msr = mixer->atc->msr;
+	for (i = 0; i < (NUM_CT_AMIXERS * CHN_NUM); i++) {
+		err = amixer_mgr->get_amixer(amixer_mgr, &am_desc, &amixer);
+		if (err) {
+			printk(KERN_ERR "ctxfi:Failed to get amixer resources "
+			       "for mixer obj!\n");
+			break;
+		}
+		mixer->amixers[i] = amixer;
+	}
+	if (err)
+		goto error2;
+
+	return 0;
+
+error2:
+	for (i = 0; i < (NUM_CT_AMIXERS * CHN_NUM); i++) {
+		if (NULL != mixer->amixers[i]) {
+			amixer = mixer->amixers[i];
+			amixer_mgr->put_amixer(amixer_mgr, amixer);
+			mixer->amixers[i] = NULL;
+		}
+	}
+error1:
+	for (i = 0; i < (NUM_CT_SUMS * CHN_NUM); i++) {
+		if (NULL != mixer->sums[i]) {
+			sum_mgr->put_sum(sum_mgr, (struct sum *)mixer->sums[i]);
+			mixer->sums[i] = NULL;
+		}
+	}
+
+	return err;
+}
+
+static int ct_mixer_get_mem(struct ct_mixer **rmixer)
+{
+	struct ct_mixer *mixer;
+	int err;
+
+	*rmixer = NULL;
+	/* Allocate mem for mixer obj */
+	mixer = kzalloc(sizeof(*mixer), GFP_KERNEL);
+	if (NULL == mixer)
+		return -ENOMEM;
+
+	mixer->amixers = kzalloc(sizeof(void *)*(NUM_CT_AMIXERS*CHN_NUM),
+				 GFP_KERNEL);
+	if (NULL == mixer->amixers) {
+		err = -ENOMEM;
+		goto error1;
+	}
+	mixer->sums = kzalloc(sizeof(void *)*(NUM_CT_SUMS*CHN_NUM), GFP_KERNEL);
+	if (NULL == mixer->sums) {
+		err = -ENOMEM;
+		goto error2;
+	}
+
+	*rmixer = mixer;
+	return 0;
+
+error2:
+	kfree(mixer->amixers);
+error1:
+	kfree(mixer);
+	return err;
+}
+
+static int ct_mixer_topology_build(struct ct_mixer *mixer)
+{
+	struct sum *sum;
+	struct amixer *amix_d, *amix_s;
+	enum CT_AMIXER_CTL i, j;
+
+	/* Build topology from destination to source */
+
+	/* Set up Master mixer */
+	for (i = AMIXER_MASTER_F, j = SUM_IN_F;
+					i <= AMIXER_MASTER_S; i++, j++) {
+		amix_d = mixer->amixers[i*CHN_NUM];
+		sum = mixer->sums[j*CHN_NUM];
+		amix_d->ops->setup(amix_d, &sum->rsc, INIT_VOL, NULL);
+		amix_d = mixer->amixers[i*CHN_NUM+1];
+		sum = mixer->sums[j*CHN_NUM+1];
+		amix_d->ops->setup(amix_d, &sum->rsc, INIT_VOL, NULL);
+	}
+
+	/* Set up Wave-out mixer */
+	for (i = AMIXER_WAVE_F, j = AMIXER_MASTER_F;
+					i <= AMIXER_WAVE_S; i++, j++) {
+		amix_d = mixer->amixers[i*CHN_NUM];
+		amix_s = mixer->amixers[j*CHN_NUM];
+		amix_d->ops->setup(amix_d, &amix_s->rsc, INIT_VOL, NULL);
+		amix_d = mixer->amixers[i*CHN_NUM+1];
+		amix_s = mixer->amixers[j*CHN_NUM+1];
+		amix_d->ops->setup(amix_d, &amix_s->rsc, INIT_VOL, NULL);
+	}
+
+	/* Set up S/PDIF-out mixer */
+	amix_d = mixer->amixers[AMIXER_SPDIFO*CHN_NUM];
+	amix_s = mixer->amixers[AMIXER_MASTER_F*CHN_NUM];
+	amix_d->ops->setup(amix_d, &amix_s->rsc, INIT_VOL, NULL);
+	amix_d = mixer->amixers[AMIXER_SPDIFO*CHN_NUM+1];
+	amix_s = mixer->amixers[AMIXER_MASTER_F*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, &amix_s->rsc, INIT_VOL, NULL);
+
+	/* Set up PCM-in mixer */
+	for (i = AMIXER_PCM_F, j = SUM_IN_F; i <= AMIXER_PCM_S; i++, j++) {
+		amix_d = mixer->amixers[i*CHN_NUM];
+		sum = mixer->sums[j*CHN_NUM];
+		amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+		amix_d = mixer->amixers[i*CHN_NUM+1];
+		sum = mixer->sums[j*CHN_NUM+1];
+		amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+	}
+
+	/* Set up Line-in mixer */
+	amix_d = mixer->amixers[AMIXER_LINEIN*CHN_NUM];
+	sum = mixer->sums[SUM_IN_F*CHN_NUM];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+	amix_d = mixer->amixers[AMIXER_LINEIN*CHN_NUM+1];
+	sum = mixer->sums[SUM_IN_F*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+
+	/* Set up Mic-in mixer */
+	amix_d = mixer->amixers[AMIXER_MIC*CHN_NUM];
+	sum = mixer->sums[SUM_IN_F*CHN_NUM];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+	amix_d = mixer->amixers[AMIXER_MIC*CHN_NUM+1];
+	sum = mixer->sums[SUM_IN_F*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+
+	/* Set up S/PDIF-in mixer */
+	amix_d = mixer->amixers[AMIXER_SPDIFI*CHN_NUM];
+	sum = mixer->sums[SUM_IN_F*CHN_NUM];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+	amix_d = mixer->amixers[AMIXER_SPDIFI*CHN_NUM+1];
+	sum = mixer->sums[SUM_IN_F*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+
+	/* Set up Master recording mixer */
+	amix_d = mixer->amixers[AMIXER_MASTER_F_C*CHN_NUM];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM];
+	amix_d->ops->setup(amix_d, &sum->rsc, INIT_VOL, NULL);
+	amix_d = mixer->amixers[AMIXER_MASTER_F_C*CHN_NUM+1];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, &sum->rsc, INIT_VOL, NULL);
+
+	/* Set up PCM-in recording mixer */
+	amix_d = mixer->amixers[AMIXER_PCM_F_C*CHN_NUM];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+	amix_d = mixer->amixers[AMIXER_PCM_F_C*CHN_NUM+1];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+
+	/* Set up Line-in recording mixer */
+	amix_d = mixer->amixers[AMIXER_LINEIN_C*CHN_NUM];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+	amix_d = mixer->amixers[AMIXER_LINEIN_C*CHN_NUM+1];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+
+	/* Set up Mic-in recording mixer */
+	amix_d = mixer->amixers[AMIXER_MIC_C*CHN_NUM];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+	amix_d = mixer->amixers[AMIXER_MIC_C*CHN_NUM+1];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+
+	/* Set up S/PDIF-in recording mixer */
+	amix_d = mixer->amixers[AMIXER_SPDIFI_C*CHN_NUM];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+	amix_d = mixer->amixers[AMIXER_SPDIFI_C*CHN_NUM+1];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+
+	return 0;
+}
+
+static int mixer_set_input_port(struct amixer *amixer, struct rsc *rsc)
+{
+	amixer->ops->set_input(amixer, rsc);
+	amixer->ops->commit_write(amixer);
+
+	return 0;
+}
+
+static enum CT_AMIXER_CTL port_to_amixer(enum MIXER_PORT_T type)
+{
+	switch (type) {
+	case MIX_WAVE_FRONT:	return AMIXER_WAVE_F;
+	case MIX_WAVE_SURROUND:	return AMIXER_WAVE_S;
+	case MIX_WAVE_CENTLFE:	return AMIXER_WAVE_C;
+	case MIX_WAVE_REAR:	return AMIXER_WAVE_R;
+	case MIX_PCMO_FRONT:	return AMIXER_MASTER_F_C;
+	case MIX_SPDIF_OUT:	return AMIXER_SPDIFO;
+	case MIX_LINE_IN:	return AMIXER_LINEIN;
+	case MIX_MIC_IN:	return AMIXER_MIC;
+	case MIX_SPDIF_IN:	return AMIXER_SPDIFI;
+	case MIX_PCMI_FRONT:	return AMIXER_PCM_F;
+	case MIX_PCMI_SURROUND:	return AMIXER_PCM_S;
+	case MIX_PCMI_CENTLFE:	return AMIXER_PCM_C;
+	case MIX_PCMI_REAR:	return AMIXER_PCM_R;
+	default: 		return 0;
+	}
+}
+
+static int mixer_get_output_ports(struct ct_mixer *mixer,
+				  enum MIXER_PORT_T type,
+				  struct rsc **rleft, struct rsc **rright)
+{
+	enum CT_AMIXER_CTL amix = port_to_amixer(type);
+
+	if (NULL != rleft)
+		*rleft = &((struct amixer *)mixer->amixers[amix*CHN_NUM])->rsc;
+
+	if (NULL != rright)
+		*rright =
+			&((struct amixer *)mixer->amixers[amix*CHN_NUM+1])->rsc;
+
+	return 0;
+}
+
+static int mixer_set_input_left(struct ct_mixer *mixer,
+				enum MIXER_PORT_T type, struct rsc *rsc)
+{
+	enum CT_AMIXER_CTL amix = port_to_amixer(type);
+
+	mixer_set_input_port(mixer->amixers[amix*CHN_NUM], rsc);
+	amix = get_recording_amixer(amix);
+	if (amix < NUM_CT_AMIXERS)
+		mixer_set_input_port(mixer->amixers[amix*CHN_NUM], rsc);
+
+	return 0;
+}
+
+static int
+mixer_set_input_right(struct ct_mixer *mixer,
+		      enum MIXER_PORT_T type, struct rsc *rsc)
+{
+	enum CT_AMIXER_CTL amix = port_to_amixer(type);
+
+	mixer_set_input_port(mixer->amixers[amix*CHN_NUM+1], rsc);
+	amix = get_recording_amixer(amix);
+	if (amix < NUM_CT_AMIXERS)
+		mixer_set_input_port(mixer->amixers[amix*CHN_NUM+1], rsc);
+
+	return 0;
+}
+
+int ct_mixer_destroy(struct ct_mixer *mixer)
+{
+	struct sum_mgr *sum_mgr = (struct sum_mgr *)mixer->atc->rsc_mgrs[SUM];
+	struct amixer_mgr *amixer_mgr =
+			(struct amixer_mgr *)mixer->atc->rsc_mgrs[AMIXER];
+	struct amixer *amixer;
+	int i = 0;
+
+	/* Release amixer resources */
+	for (i = 0; i < (NUM_CT_AMIXERS * CHN_NUM); i++) {
+		if (NULL != mixer->amixers[i]) {
+			amixer = mixer->amixers[i];
+			amixer_mgr->put_amixer(amixer_mgr, amixer);
+		}
+	}
+
+	/* Release sum resources */
+	for (i = 0; i < (NUM_CT_SUMS * CHN_NUM); i++) {
+		if (NULL != mixer->sums[i])
+			sum_mgr->put_sum(sum_mgr, (struct sum *)mixer->sums[i]);
+	}
+
+	/* Release mem assigned to mixer object */
+	kfree(mixer->sums);
+	kfree(mixer->amixers);
+	kfree(mixer);
+
+	return 0;
+}
+
+int ct_mixer_create(struct ct_atc *atc, struct ct_mixer **rmixer)
+{
+	struct ct_mixer *mixer;
+	int err;
+
+	*rmixer = NULL;
+
+	/* Allocate mem for mixer obj */
+	err = ct_mixer_get_mem(&mixer);
+	if (err)
+		return err;
+
+	mixer->switch_state = 0;
+	mixer->atc = atc;
+	/* Set operations */
+	mixer->get_output_ports = mixer_get_output_ports;
+	mixer->set_input_left = mixer_set_input_left;
+	mixer->set_input_right = mixer_set_input_right;
+
+	/* Allocate chip resources for mixer obj */
+	err = ct_mixer_get_resources(mixer);
+	if (err)
+		goto error;
+
+	/* Build internal mixer topology */
+	ct_mixer_topology_build(mixer);
+
+	*rmixer = mixer;
+
+	return 0;
+
+error:
+	ct_mixer_destroy(mixer);
+	return err;
+}
+
+int ct_alsa_mix_create(struct ct_atc *atc,
+		       enum CTALSADEVS device,
+		       const char *device_name)
+{
+	int err;
+
+	/* Create snd kcontrol instances on demand */
+	/* vol_ctl.device = swh_ctl.device = device; */ /* better w/ device 0 */
+	err = ct_mixer_kcontrols_create((struct ct_mixer *)atc->mixer);
+	if (err)
+		return err;
+
+	strcpy(atc->card->mixername, device_name);
+
+	return 0;
+}
diff --git a/sound/pci/ctxfi/ctmixer.h b/sound/pci/ctxfi/ctmixer.h
new file mode 100644
index 0000000..e2d96eb
--- /dev/null
+++ b/sound/pci/ctxfi/ctmixer.h
@@ -0,0 +1,67 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctmixer.h
+ *
+ * @Brief
+ * This file contains the definition of the mixer device functions.
+ *
+ * @Author	Liu Chun
+ * @Date 	Mar 28 2008
+ *
+ */
+
+#ifndef CTMIXER_H
+#define CTMIXER_H
+
+#include "ctatc.h"
+#include "ctresource.h"
+
+#define INIT_VOL	0x1c00
+
+enum MIXER_PORT_T {
+	MIX_WAVE_FRONT,
+	MIX_WAVE_REAR,
+	MIX_WAVE_CENTLFE,
+	MIX_WAVE_SURROUND,
+	MIX_SPDIF_OUT,
+	MIX_PCMO_FRONT,
+	MIX_MIC_IN,
+	MIX_LINE_IN,
+	MIX_SPDIF_IN,
+	MIX_PCMI_FRONT,
+	MIX_PCMI_REAR,
+	MIX_PCMI_CENTLFE,
+	MIX_PCMI_SURROUND,
+
+	NUM_MIX_PORTS
+};
+
+/* alsa mixer descriptor */
+struct ct_mixer {
+	struct ct_atc *atc;
+
+	void **amixers;		/* amixer resources for volume control */
+	void **sums;		/* sum resources for signal collection */
+	unsigned int switch_state; /* A bit-map to indicate state of switches */
+
+	int (*get_output_ports)(struct ct_mixer *mixer, enum MIXER_PORT_T type,
+				  struct rsc **rleft, struct rsc **rright);
+
+	int (*set_input_left)(struct ct_mixer *mixer,
+			      enum MIXER_PORT_T type, struct rsc *rsc);
+	int (*set_input_right)(struct ct_mixer *mixer,
+			       enum MIXER_PORT_T type, struct rsc *rsc);
+};
+
+int ct_alsa_mix_create(struct ct_atc *atc,
+		       enum CTALSADEVS device,
+		       const char *device_name);
+int ct_mixer_create(struct ct_atc *atc, struct ct_mixer **rmixer);
+int ct_mixer_destroy(struct ct_mixer *mixer);
+
+#endif /* CTMIXER_H */
diff --git a/sound/pci/ctxfi/ctpcm.c b/sound/pci/ctxfi/ctpcm.c
new file mode 100644
index 0000000..9e5c0c4
--- /dev/null
+++ b/sound/pci/ctxfi/ctpcm.c
@@ -0,0 +1,426 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctpcm.c
+ *
+ * @Brief
+ * This file contains the definition of the pcm device functions.
+ *
+ * @Author	Liu Chun
+ * @Date 	Apr 2 2008
+ *
+ */
+
+#include "ctpcm.h"
+#include "cttimer.h"
+#include <sound/pcm.h>
+
+/* Hardware descriptions for playback */
+static struct snd_pcm_hardware ct_pcm_playback_hw = {
+	.info			= (SNDRV_PCM_INFO_MMAP |
+				   SNDRV_PCM_INFO_INTERLEAVED |
+				   SNDRV_PCM_INFO_BLOCK_TRANSFER |
+				   SNDRV_PCM_INFO_MMAP_VALID |
+				   SNDRV_PCM_INFO_PAUSE),
+	.formats		= (SNDRV_PCM_FMTBIT_U8 |
+				   SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_S24_3LE |
+				   SNDRV_PCM_FMTBIT_S32_LE |
+				   SNDRV_PCM_FMTBIT_FLOAT_LE),
+	.rates			= (SNDRV_PCM_RATE_CONTINUOUS |
+				   SNDRV_PCM_RATE_8000_192000),
+	.rate_min		= 8000,
+	.rate_max		= 192000,
+	.channels_min		= 1,
+	.channels_max		= 2,
+	.buffer_bytes_max	= (128*1024),
+	.period_bytes_min	= (64),
+	.period_bytes_max	= (128*1024),
+	.periods_min		= 2,
+	.periods_max		= 1024,
+	.fifo_size		= 0,
+};
+
+static struct snd_pcm_hardware ct_spdif_passthru_playback_hw = {
+	.info			= (SNDRV_PCM_INFO_MMAP |
+				   SNDRV_PCM_INFO_INTERLEAVED |
+				   SNDRV_PCM_INFO_BLOCK_TRANSFER |
+				   SNDRV_PCM_INFO_MMAP_VALID |
+				   SNDRV_PCM_INFO_PAUSE),
+	.formats		= SNDRV_PCM_FMTBIT_S16_LE,
+	.rates			= (SNDRV_PCM_RATE_48000 |
+				   SNDRV_PCM_RATE_44100 |
+				   SNDRV_PCM_RATE_32000),
+	.rate_min		= 32000,
+	.rate_max		= 48000,
+	.channels_min		= 2,
+	.channels_max		= 2,
+	.buffer_bytes_max	= (128*1024),
+	.period_bytes_min	= (64),
+	.period_bytes_max	= (128*1024),
+	.periods_min		= 2,
+	.periods_max		= 1024,
+	.fifo_size		= 0,
+};
+
+/* Hardware descriptions for capture */
+static struct snd_pcm_hardware ct_pcm_capture_hw = {
+	.info			= (SNDRV_PCM_INFO_MMAP |
+				   SNDRV_PCM_INFO_INTERLEAVED |
+				   SNDRV_PCM_INFO_BLOCK_TRANSFER |
+				   SNDRV_PCM_INFO_PAUSE |
+				   SNDRV_PCM_INFO_MMAP_VALID),
+	.formats		= (SNDRV_PCM_FMTBIT_U8 |
+				   SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_S24_3LE |
+				   SNDRV_PCM_FMTBIT_S32_LE |
+				   SNDRV_PCM_FMTBIT_FLOAT_LE),
+	.rates			= (SNDRV_PCM_RATE_CONTINUOUS |
+				   SNDRV_PCM_RATE_8000_96000),
+	.rate_min		= 8000,
+	.rate_max		= 96000,
+	.channels_min		= 1,
+	.channels_max		= 2,
+	.buffer_bytes_max	= (128*1024),
+	.period_bytes_min	= (384),
+	.period_bytes_max	= (64*1024),
+	.periods_min		= 2,
+	.periods_max		= 1024,
+	.fifo_size		= 0,
+};
+
+static void ct_atc_pcm_interrupt(struct ct_atc_pcm *atc_pcm)
+{
+	struct ct_atc_pcm *apcm = atc_pcm;
+
+	if (NULL == apcm->substream)
+		return;
+
+	snd_pcm_period_elapsed(apcm->substream);
+}
+
+static void ct_atc_pcm_free_substream(struct snd_pcm_runtime *runtime)
+{
+	struct ct_atc_pcm *apcm = runtime->private_data;
+	struct ct_atc *atc = snd_pcm_substream_chip(apcm->substream);
+
+	atc->pcm_release_resources(atc, apcm);
+	ct_timer_instance_free(apcm->timer);
+	kfree(apcm);
+	runtime->private_data = NULL;
+}
+
+/* pcm playback operations */
+static int ct_pcm_playback_open(struct snd_pcm_substream *substream)
+{
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm;
+	int err;
+
+	apcm = kzalloc(sizeof(*apcm), GFP_KERNEL);
+	if (NULL == apcm)
+		return -ENOMEM;
+
+	apcm->substream = substream;
+	apcm->interrupt = ct_atc_pcm_interrupt;
+	runtime->private_data = apcm;
+	runtime->private_free = ct_atc_pcm_free_substream;
+	if (IEC958 == substream->pcm->device) {
+		runtime->hw = ct_spdif_passthru_playback_hw;
+		atc->spdif_out_passthru(atc, 1);
+	} else {
+		runtime->hw = ct_pcm_playback_hw;
+		if (FRONT == substream->pcm->device)
+			runtime->hw.channels_max = 8;
+	}
+
+	err = snd_pcm_hw_constraint_integer(runtime,
+					    SNDRV_PCM_HW_PARAM_PERIODS);
+	if (err < 0) {
+		kfree(apcm);
+		return err;
+	}
+	err = snd_pcm_hw_constraint_minmax(runtime,
+					   SNDRV_PCM_HW_PARAM_BUFFER_BYTES,
+					   1024, UINT_MAX);
+	if (err < 0) {
+		kfree(apcm);
+		return err;
+	}
+
+	apcm->timer = ct_timer_instance_new(atc->timer, apcm);
+	if (!apcm->timer)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int ct_pcm_playback_close(struct snd_pcm_substream *substream)
+{
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+
+	/* TODO: Notify mixer inactive. */
+	if (IEC958 == substream->pcm->device)
+		atc->spdif_out_passthru(atc, 0);
+
+	/* The ct_atc_pcm object will be freed by runtime->private_free */
+
+	return 0;
+}
+
+static int ct_pcm_hw_params(struct snd_pcm_substream *substream,
+				     struct snd_pcm_hw_params *hw_params)
+{
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct ct_atc_pcm *apcm = substream->runtime->private_data;
+	int err;
+
+	err = snd_pcm_lib_malloc_pages(substream,
+					params_buffer_bytes(hw_params));
+	if (err < 0)
+		return err;
+	/* clear previous resources */
+	atc->pcm_release_resources(atc, apcm);
+	return err;
+}
+
+static int ct_pcm_hw_free(struct snd_pcm_substream *substream)
+{
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct ct_atc_pcm *apcm = substream->runtime->private_data;
+
+	/* clear previous resources */
+	atc->pcm_release_resources(atc, apcm);
+	/* Free snd-allocated pages */
+	return snd_pcm_lib_free_pages(substream);
+}
+
+
+static int ct_pcm_playback_prepare(struct snd_pcm_substream *substream)
+{
+	int err;
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm = runtime->private_data;
+
+	if (IEC958 == substream->pcm->device)
+		err = atc->spdif_passthru_playback_prepare(atc, apcm);
+	else
+		err = atc->pcm_playback_prepare(atc, apcm);
+
+	if (err < 0) {
+		printk(KERN_ERR "ctxfi: Preparing pcm playback failed!!!\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static int
+ct_pcm_playback_trigger(struct snd_pcm_substream *substream, int cmd)
+{
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm = runtime->private_data;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+	case SNDRV_PCM_TRIGGER_RESUME:
+	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+		atc->pcm_playback_start(atc, apcm);
+		break;
+	case SNDRV_PCM_TRIGGER_STOP:
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+		atc->pcm_playback_stop(atc, apcm);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static snd_pcm_uframes_t
+ct_pcm_playback_pointer(struct snd_pcm_substream *substream)
+{
+	unsigned long position;
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm = runtime->private_data;
+
+	/* Read out playback position */
+	position = atc->pcm_playback_position(atc, apcm);
+	position = bytes_to_frames(runtime, position);
+	if (position >= runtime->buffer_size)
+		position = 0;
+	return position;
+}
+
+/* pcm capture operations */
+static int ct_pcm_capture_open(struct snd_pcm_substream *substream)
+{
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm;
+	int err;
+
+	apcm = kzalloc(sizeof(*apcm), GFP_KERNEL);
+	if (NULL == apcm)
+		return -ENOMEM;
+
+	apcm->started = 0;
+	apcm->substream = substream;
+	apcm->interrupt = ct_atc_pcm_interrupt;
+	runtime->private_data = apcm;
+	runtime->private_free = ct_atc_pcm_free_substream;
+	runtime->hw = ct_pcm_capture_hw;
+	runtime->hw.rate_max = atc->rsr * atc->msr;
+
+	err = snd_pcm_hw_constraint_integer(runtime,
+					    SNDRV_PCM_HW_PARAM_PERIODS);
+	if (err < 0) {
+		kfree(apcm);
+		return err;
+	}
+	err = snd_pcm_hw_constraint_minmax(runtime,
+					   SNDRV_PCM_HW_PARAM_BUFFER_BYTES,
+					   1024, UINT_MAX);
+	if (err < 0) {
+		kfree(apcm);
+		return err;
+	}
+
+	apcm->timer = ct_timer_instance_new(atc->timer, apcm);
+	if (!apcm->timer)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int ct_pcm_capture_close(struct snd_pcm_substream *substream)
+{
+	/* The ct_atc_pcm object will be freed by runtime->private_free */
+	/* TODO: Notify mixer inactive. */
+	return 0;
+}
+
+static int ct_pcm_capture_prepare(struct snd_pcm_substream *substream)
+{
+	int err;
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm = runtime->private_data;
+
+	err = atc->pcm_capture_prepare(atc, apcm);
+	if (err < 0) {
+		printk(KERN_ERR "ctxfi: Preparing pcm capture failed!!!\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static int
+ct_pcm_capture_trigger(struct snd_pcm_substream *substream, int cmd)
+{
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm = runtime->private_data;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+		atc->pcm_capture_start(atc, apcm);
+		break;
+	case SNDRV_PCM_TRIGGER_STOP:
+		atc->pcm_capture_stop(atc, apcm);
+		break;
+	default:
+		atc->pcm_capture_stop(atc, apcm);
+		break;
+	}
+
+	return 0;
+}
+
+static snd_pcm_uframes_t
+ct_pcm_capture_pointer(struct snd_pcm_substream *substream)
+{
+	unsigned long position;
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm = runtime->private_data;
+
+	/* Read out playback position */
+	position = atc->pcm_capture_position(atc, apcm);
+	position = bytes_to_frames(runtime, position);
+	if (position >= runtime->buffer_size)
+		position = 0;
+	return position;
+}
+
+/* PCM operators for playback */
+static struct snd_pcm_ops ct_pcm_playback_ops = {
+	.open	 	= ct_pcm_playback_open,
+	.close		= ct_pcm_playback_close,
+	.ioctl		= snd_pcm_lib_ioctl,
+	.hw_params	= ct_pcm_hw_params,
+	.hw_free	= ct_pcm_hw_free,
+	.prepare	= ct_pcm_playback_prepare,
+	.trigger	= ct_pcm_playback_trigger,
+	.pointer	= ct_pcm_playback_pointer,
+	.page		= snd_pcm_sgbuf_ops_page,
+};
+
+/* PCM operators for capture */
+static struct snd_pcm_ops ct_pcm_capture_ops = {
+	.open	 	= ct_pcm_capture_open,
+	.close		= ct_pcm_capture_close,
+	.ioctl		= snd_pcm_lib_ioctl,
+	.hw_params	= ct_pcm_hw_params,
+	.hw_free	= ct_pcm_hw_free,
+	.prepare	= ct_pcm_capture_prepare,
+	.trigger	= ct_pcm_capture_trigger,
+	.pointer	= ct_pcm_capture_pointer,
+	.page		= snd_pcm_sgbuf_ops_page,
+};
+
+/* Create ALSA pcm device */
+int ct_alsa_pcm_create(struct ct_atc *atc,
+		       enum CTALSADEVS device,
+		       const char *device_name)
+{
+	struct snd_pcm *pcm;
+	int err;
+	int playback_count, capture_count;
+
+	playback_count = (IEC958 == device) ? 1 : 8;
+	capture_count = (FRONT == device) ? 1 : 0;
+	err = snd_pcm_new(atc->card, "ctxfi", device,
+			  playback_count, capture_count, &pcm);
+	if (err < 0) {
+		printk(KERN_ERR "ctxfi: snd_pcm_new failed!! Err=%d\n", err);
+		return err;
+	}
+
+	pcm->private_data = atc;
+	pcm->info_flags = 0;
+	pcm->dev_subclass = SNDRV_PCM_SUBCLASS_GENERIC_MIX;
+	strlcpy(pcm->name, device_name, sizeof(pcm->name));
+
+	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &ct_pcm_playback_ops);
+
+	if (FRONT == device)
+		snd_pcm_set_ops(pcm,
+				SNDRV_PCM_STREAM_CAPTURE, &ct_pcm_capture_ops);
+
+	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV_SG,
+			snd_dma_pci_data(atc->pci), 128*1024, 128*1024);
+
+	return 0;
+}
diff --git a/sound/pci/ctxfi/ctpcm.h b/sound/pci/ctxfi/ctpcm.h
new file mode 100644
index 0000000..178da0d
--- /dev/null
+++ b/sound/pci/ctxfi/ctpcm.h
@@ -0,0 +1,27 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctpcm.h
+ *
+ * @Brief
+ * This file contains the definition of the pcm device functions.
+ *
+ * @Author	Liu Chun
+ * @Date 	Mar 28 2008
+ *
+ */
+
+#ifndef CTPCM_H
+#define CTPCM_H
+
+#include "ctatc.h"
+
+int ct_alsa_pcm_create(struct ct_atc *atc,
+		       enum CTALSADEVS device,
+		       const char *device_name);
+
+#endif /* CTPCM_H */
diff --git a/sound/pci/ctxfi/ctresource.c b/sound/pci/ctxfi/ctresource.c
new file mode 100644
index 0000000..889c495
--- /dev/null
+++ b/sound/pci/ctxfi/ctresource.c
@@ -0,0 +1,301 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctresource.c
+ *
+ * @Brief
+ * This file contains the implementation of some generic helper functions.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 15 2008
+ *
+ */
+
+#include "ctresource.h"
+#include "cthardware.h"
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#define AUDIO_SLOT_BLOCK_NUM 	256
+
+/* Resource allocation based on bit-map management mechanism */
+static int
+get_resource(u8 *rscs, unsigned int amount,
+	     unsigned int multi, unsigned int *ridx)
+{
+	int i, j, k, n;
+
+	/* Check whether there are sufficient resources to meet request. */
+	for (i = 0, n = multi; i < amount; i++) {
+		j = i / 8;
+		k = i % 8;
+		if (rscs[j] & ((u8)1 << k)) {
+			n = multi;
+			continue;
+		}
+		if (!(--n))
+			break; /* found sufficient contiguous resources */
+	}
+
+	if (i >= amount) {
+		/* Can not find sufficient contiguous resources */
+		return -ENOENT;
+	}
+
+	/* Mark the contiguous bits in resource bit-map as used */
+	for (n = multi; n > 0; n--) {
+		j = i / 8;
+		k = i % 8;
+		rscs[j] |= ((u8)1 << k);
+		i--;
+	}
+
+	*ridx = i + 1;
+
+	return 0;
+}
+
+static int put_resource(u8 *rscs, unsigned int multi, unsigned int idx)
+{
+	unsigned int i, j, k, n;
+
+	/* Mark the contiguous bits in resource bit-map as used */
+	for (n = multi, i = idx; n > 0; n--) {
+		j = i / 8;
+		k = i % 8;
+		rscs[j] &= ~((u8)1 << k);
+		i++;
+	}
+
+	return 0;
+}
+
+int mgr_get_resource(struct rsc_mgr *mgr, unsigned int n, unsigned int *ridx)
+{
+	int err;
+
+	if (n > mgr->avail)
+		return -ENOENT;
+
+	err = get_resource(mgr->rscs, mgr->amount, n, ridx);
+	if (!err)
+		mgr->avail -= n;
+
+	return err;
+}
+
+int mgr_put_resource(struct rsc_mgr *mgr, unsigned int n, unsigned int idx)
+{
+	put_resource(mgr->rscs, n, idx);
+	mgr->avail += n;
+
+	return 0;
+}
+
+static unsigned char offset_in_audio_slot_block[NUM_RSCTYP] = {
+	/* SRC channel is at Audio Ring slot 1 every 16 slots. */
+	[SRC]		= 0x1,
+	[AMIXER]	= 0x4,
+	[SUM]		= 0xc,
+};
+
+static int rsc_index(const struct rsc *rsc)
+{
+    return rsc->conj;
+}
+
+static int audio_ring_slot(const struct rsc *rsc)
+{
+    return (rsc->conj << 4) + offset_in_audio_slot_block[rsc->type];
+}
+
+static int rsc_next_conj(struct rsc *rsc)
+{
+	unsigned int i;
+	for (i = 0; (i < 8) && (!(rsc->msr & (0x1 << i))); )
+		i++;
+	rsc->conj += (AUDIO_SLOT_BLOCK_NUM >> i);
+	return rsc->conj;
+}
+
+static int rsc_master(struct rsc *rsc)
+{
+	return rsc->conj = rsc->idx;
+}
+
+static struct rsc_ops rsc_generic_ops = {
+	.index		= rsc_index,
+	.output_slot	= audio_ring_slot,
+	.master		= rsc_master,
+	.next_conj	= rsc_next_conj,
+};
+
+int rsc_init(struct rsc *rsc, u32 idx, enum RSCTYP type, u32 msr, void *hw)
+{
+	int err = 0;
+
+	rsc->idx = idx;
+	rsc->conj = idx;
+	rsc->type = type;
+	rsc->msr = msr;
+	rsc->hw = hw;
+	rsc->ops = &rsc_generic_ops;
+	if (NULL == hw) {
+		rsc->ctrl_blk = NULL;
+		return 0;
+	}
+
+	switch (type) {
+	case SRC:
+		err = ((struct hw *)hw)->src_rsc_get_ctrl_blk(&rsc->ctrl_blk);
+		break;
+	case AMIXER:
+		err = ((struct hw *)hw)->
+				amixer_rsc_get_ctrl_blk(&rsc->ctrl_blk);
+		break;
+	case SRCIMP:
+	case SUM:
+	case DAIO:
+		break;
+	default:
+		printk(KERN_ERR
+		       "ctxfi: Invalid resource type value %d!\n", type);
+		return -EINVAL;
+	}
+
+	if (err) {
+		printk(KERN_ERR
+		       "ctxfi: Failed to get resource control block!\n");
+		return err;
+	}
+
+	return 0;
+}
+
+int rsc_uninit(struct rsc *rsc)
+{
+	if ((NULL != rsc->hw) && (NULL != rsc->ctrl_blk)) {
+		switch (rsc->type) {
+		case SRC:
+			((struct hw *)rsc->hw)->
+				src_rsc_put_ctrl_blk(rsc->ctrl_blk);
+			break;
+		case AMIXER:
+			((struct hw *)rsc->hw)->
+				amixer_rsc_put_ctrl_blk(rsc->ctrl_blk);
+			break;
+		case SUM:
+		case DAIO:
+			break;
+		default:
+			printk(KERN_ERR "ctxfi: "
+			       "Invalid resource type value %d!\n", rsc->type);
+			break;
+		}
+
+		rsc->hw = rsc->ctrl_blk = NULL;
+	}
+
+	rsc->idx = rsc->conj = 0;
+	rsc->type = NUM_RSCTYP;
+	rsc->msr = 0;
+
+	return 0;
+}
+
+int rsc_mgr_init(struct rsc_mgr *mgr, enum RSCTYP type,
+		 unsigned int amount, void *hw_obj)
+{
+	int err = 0;
+	struct hw *hw = hw_obj;
+
+	mgr->type = NUM_RSCTYP;
+
+	mgr->rscs = kzalloc(((amount + 8 - 1) / 8), GFP_KERNEL);
+	if (NULL == mgr->rscs)
+		return -ENOMEM;
+
+	switch (type) {
+	case SRC:
+		err = hw->src_mgr_get_ctrl_blk(&mgr->ctrl_blk);
+		break;
+	case SRCIMP:
+		err = hw->srcimp_mgr_get_ctrl_blk(&mgr->ctrl_blk);
+		break;
+	case AMIXER:
+		err = hw->amixer_mgr_get_ctrl_blk(&mgr->ctrl_blk);
+		break;
+	case DAIO:
+		err = hw->daio_mgr_get_ctrl_blk(hw, &mgr->ctrl_blk);
+		break;
+	case SUM:
+		break;
+	default:
+		printk(KERN_ERR
+		       "ctxfi: Invalid resource type value %d!\n", type);
+		err = -EINVAL;
+		goto error;
+	}
+
+	if (err) {
+		printk(KERN_ERR
+		       "ctxfi: Failed to get manager control block!\n");
+		goto error;
+	}
+
+	mgr->type = type;
+	mgr->avail = mgr->amount = amount;
+	mgr->hw = hw;
+
+	return 0;
+
+error:
+	kfree(mgr->rscs);
+	return err;
+}
+
+int rsc_mgr_uninit(struct rsc_mgr *mgr)
+{
+	if (NULL != mgr->rscs) {
+		kfree(mgr->rscs);
+		mgr->rscs = NULL;
+	}
+
+	if ((NULL != mgr->hw) && (NULL != mgr->ctrl_blk)) {
+		switch (mgr->type) {
+		case SRC:
+			((struct hw *)mgr->hw)->
+				src_mgr_put_ctrl_blk(mgr->ctrl_blk);
+			break;
+		case SRCIMP:
+			((struct hw *)mgr->hw)->
+				srcimp_mgr_put_ctrl_blk(mgr->ctrl_blk);
+			break;
+		case AMIXER:
+			((struct hw *)mgr->hw)->
+				amixer_mgr_put_ctrl_blk(mgr->ctrl_blk);
+			break;
+		case DAIO:
+			((struct hw *)mgr->hw)->
+				daio_mgr_put_ctrl_blk(mgr->ctrl_blk);
+			break;
+		case SUM:
+			break;
+		default:
+			printk(KERN_ERR "ctxfi: "
+			       "Invalid resource type value %d!\n", mgr->type);
+			break;
+		}
+
+		mgr->hw = mgr->ctrl_blk = NULL;
+	}
+
+	mgr->type = NUM_RSCTYP;
+	mgr->avail = mgr->amount = 0;
+
+	return 0;
+}
diff --git a/sound/pci/ctxfi/ctresource.h b/sound/pci/ctxfi/ctresource.h
new file mode 100644
index 0000000..0838c2e
--- /dev/null
+++ b/sound/pci/ctxfi/ctresource.h
@@ -0,0 +1,72 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctresource.h
+ *
+ * @Brief
+ * This file contains the definition of generic hardware resources for
+ * resource management.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 13 2008
+ *
+ */
+
+#ifndef CTRESOURCE_H
+#define CTRESOURCE_H
+
+#include <linux/types.h>
+
+enum RSCTYP {
+	SRC,
+	SRCIMP,
+	AMIXER,
+	SUM,
+	DAIO,
+	NUM_RSCTYP	/* This must be the last one and less than 16 */
+};
+
+struct rsc_ops;
+
+struct rsc {
+	u32 idx:12;	/* The index of a resource */
+	u32 type:4;	/* The type (RSCTYP) of a resource */
+	u32 conj:12;	/* Current conjugate index */
+	u32 msr:4;	/* The Master Sample Rate a resource working on */
+	void *ctrl_blk;	/* Chip specific control info block for a resource */
+	void *hw;	/* Chip specific object for hardware access means */
+	struct rsc_ops *ops;	/* Generic resource operations */
+};
+
+struct rsc_ops {
+	int (*master)(struct rsc *rsc);	/* Move to master resource */
+	int (*next_conj)(struct rsc *rsc); /* Move to next conjugate resource */
+	int (*index)(const struct rsc *rsc); /* Return the index of resource */
+	/* Return the output slot number */
+	int (*output_slot)(const struct rsc *rsc);
+};
+
+int rsc_init(struct rsc *rsc, u32 idx, enum RSCTYP type, u32 msr, void *hw);
+int rsc_uninit(struct rsc *rsc);
+
+struct rsc_mgr {
+	enum RSCTYP type; /* The type (RSCTYP) of resource to manage */
+	unsigned int amount; /* The total amount of a kind of resource */
+	unsigned int avail; /* The amount of currently available resources */
+	unsigned char *rscs; /* The bit-map for resource allocation */
+	void *ctrl_blk; /* Chip specific control info block */
+	void *hw; /* Chip specific object for hardware access */
+};
+
+/* Resource management is based on bit-map mechanism */
+int rsc_mgr_init(struct rsc_mgr *mgr, enum RSCTYP type,
+		 unsigned int amount, void *hw);
+int rsc_mgr_uninit(struct rsc_mgr *mgr);
+int mgr_get_resource(struct rsc_mgr *mgr, unsigned int n, unsigned int *ridx);
+int mgr_put_resource(struct rsc_mgr *mgr, unsigned int n, unsigned int idx);
+
+#endif /* CTRESOURCE_H */
diff --git a/sound/pci/ctxfi/ctsrc.c b/sound/pci/ctxfi/ctsrc.c
new file mode 100644
index 0000000..e1c145d
--- /dev/null
+++ b/sound/pci/ctxfi/ctsrc.c
@@ -0,0 +1,886 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctsrc.c
+ *
+ * @Brief
+ * This file contains the implementation of the Sample Rate Convertor
+ * resource management object.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 13 2008
+ *
+ */
+
+#include "ctsrc.h"
+#include "cthardware.h"
+#include <linux/slab.h>
+
+#define SRC_RESOURCE_NUM	64
+#define SRCIMP_RESOURCE_NUM	256
+
+static unsigned int conj_mask;
+
+static int src_default_config_memrd(struct src *src);
+static int src_default_config_memwr(struct src *src);
+static int src_default_config_arcrw(struct src *src);
+
+static int (*src_default_config[3])(struct src *) = {
+	[MEMRD] = src_default_config_memrd,
+	[MEMWR] = src_default_config_memwr,
+	[ARCRW] = src_default_config_arcrw
+};
+
+static int src_set_state(struct src *src, unsigned int state)
+{
+	struct hw *hw;
+
+	hw = src->rsc.hw;
+	hw->src_set_state(src->rsc.ctrl_blk, state);
+
+	return 0;
+}
+
+static int src_set_bm(struct src *src, unsigned int bm)
+{
+	struct hw *hw;
+
+	hw = src->rsc.hw;
+	hw->src_set_bm(src->rsc.ctrl_blk, bm);
+
+	return 0;
+}
+
+static int src_set_sf(struct src *src, unsigned int sf)
+{
+	struct hw *hw;
+
+	hw = src->rsc.hw;
+	hw->src_set_sf(src->rsc.ctrl_blk, sf);
+
+	return 0;
+}
+
+static int src_set_pm(struct src *src, unsigned int pm)
+{
+	struct hw *hw;
+
+	hw = src->rsc.hw;
+	hw->src_set_pm(src->rsc.ctrl_blk, pm);
+
+	return 0;
+}
+
+static int src_set_rom(struct src *src, unsigned int rom)
+{
+	struct hw *hw;
+
+	hw = src->rsc.hw;
+	hw->src_set_rom(src->rsc.ctrl_blk, rom);
+
+	return 0;
+}
+
+static int src_set_vo(struct src *src, unsigned int vo)
+{
+	struct hw *hw;
+
+	hw = src->rsc.hw;
+	hw->src_set_vo(src->rsc.ctrl_blk, vo);
+
+	return 0;
+}
+
+static int src_set_st(struct src *src, unsigned int st)
+{
+	struct hw *hw;
+
+	hw = src->rsc.hw;
+	hw->src_set_st(src->rsc.ctrl_blk, st);
+
+	return 0;
+}
+
+static int src_set_bp(struct src *src, unsigned int bp)
+{
+	struct hw *hw;
+
+	hw = src->rsc.hw;
+	hw->src_set_bp(src->rsc.ctrl_blk, bp);
+
+	return 0;
+}
+
+static int src_set_cisz(struct src *src, unsigned int cisz)
+{
+	struct hw *hw;
+
+	hw = src->rsc.hw;
+	hw->src_set_cisz(src->rsc.ctrl_blk, cisz);
+
+	return 0;
+}
+
+static int src_set_ca(struct src *src, unsigned int ca)
+{
+	struct hw *hw;
+
+	hw = src->rsc.hw;
+	hw->src_set_ca(src->rsc.ctrl_blk, ca);
+
+	return 0;
+}
+
+static int src_set_sa(struct src *src, unsigned int sa)
+{
+	struct hw *hw;
+
+	hw = src->rsc.hw;
+	hw->src_set_sa(src->rsc.ctrl_blk, sa);
+
+	return 0;
+}
+
+static int src_set_la(struct src *src, unsigned int la)
+{
+	struct hw *hw;
+
+	hw = src->rsc.hw;
+	hw->src_set_la(src->rsc.ctrl_blk, la);
+
+	return 0;
+}
+
+static int src_set_pitch(struct src *src, unsigned int pitch)
+{
+	struct hw *hw;
+
+	hw = src->rsc.hw;
+	hw->src_set_pitch(src->rsc.ctrl_blk, pitch);
+
+	return 0;
+}
+
+static int src_set_clear_zbufs(struct src *src)
+{
+	struct hw *hw;
+
+	hw = src->rsc.hw;
+	hw->src_set_clear_zbufs(src->rsc.ctrl_blk, 1);
+
+	return 0;
+}
+
+static int src_commit_write(struct src *src)
+{
+	struct hw *hw;
+	int i;
+	unsigned int dirty = 0;
+
+	hw = src->rsc.hw;
+	src->rsc.ops->master(&src->rsc);
+	if (src->rsc.msr > 1) {
+		/* Save dirty flags for conjugate resource programming */
+		dirty = hw->src_get_dirty(src->rsc.ctrl_blk) & conj_mask;
+	}
+	hw->src_commit_write(hw, src->rsc.ops->index(&src->rsc),
+						src->rsc.ctrl_blk);
+
+	/* Program conjugate parameter mixer resources */
+	if (MEMWR == src->mode)
+		return 0;
+
+	for (i = 1; i < src->rsc.msr; i++) {
+		src->rsc.ops->next_conj(&src->rsc);
+		hw->src_set_dirty(src->rsc.ctrl_blk, dirty);
+		hw->src_commit_write(hw, src->rsc.ops->index(&src->rsc),
+							src->rsc.ctrl_blk);
+	}
+	src->rsc.ops->master(&src->rsc);
+
+	return 0;
+}
+
+static int src_get_ca(struct src *src)
+{
+	struct hw *hw;
+
+	hw = src->rsc.hw;
+	return hw->src_get_ca(hw, src->rsc.ops->index(&src->rsc),
+						src->rsc.ctrl_blk);
+}
+
+static int src_init(struct src *src)
+{
+	src_default_config[src->mode](src);
+
+	return 0;
+}
+
+static struct src *src_next_interleave(struct src *src)
+{
+	return src->intlv;
+}
+
+static int src_default_config_memrd(struct src *src)
+{
+	struct hw *hw = src->rsc.hw;
+	unsigned int rsr, msr;
+
+	hw->src_set_state(src->rsc.ctrl_blk, SRC_STATE_OFF);
+	hw->src_set_bm(src->rsc.ctrl_blk, 1);
+	for (rsr = 0, msr = src->rsc.msr; msr > 1; msr >>= 1)
+		rsr++;
+
+	hw->src_set_rsr(src->rsc.ctrl_blk, rsr);
+	hw->src_set_sf(src->rsc.ctrl_blk, SRC_SF_S16);
+	hw->src_set_wr(src->rsc.ctrl_blk, 0);
+	hw->src_set_pm(src->rsc.ctrl_blk, 0);
+	hw->src_set_rom(src->rsc.ctrl_blk, 0);
+	hw->src_set_vo(src->rsc.ctrl_blk, 0);
+	hw->src_set_st(src->rsc.ctrl_blk, 0);
+	hw->src_set_ilsz(src->rsc.ctrl_blk, src->multi - 1);
+	hw->src_set_cisz(src->rsc.ctrl_blk, 0x80);
+	hw->src_set_sa(src->rsc.ctrl_blk, 0x0);
+	hw->src_set_la(src->rsc.ctrl_blk, 0x1000);
+	hw->src_set_ca(src->rsc.ctrl_blk, 0x80);
+	hw->src_set_pitch(src->rsc.ctrl_blk, 0x1000000);
+	hw->src_set_clear_zbufs(src->rsc.ctrl_blk, 1);
+
+	src->rsc.ops->master(&src->rsc);
+	hw->src_commit_write(hw, src->rsc.ops->index(&src->rsc),
+						src->rsc.ctrl_blk);
+
+	for (msr = 1; msr < src->rsc.msr; msr++) {
+		src->rsc.ops->next_conj(&src->rsc);
+		hw->src_set_pitch(src->rsc.ctrl_blk, 0x1000000);
+		hw->src_commit_write(hw, src->rsc.ops->index(&src->rsc),
+							src->rsc.ctrl_blk);
+	}
+	src->rsc.ops->master(&src->rsc);
+
+	return 0;
+}
+
+static int src_default_config_memwr(struct src *src)
+{
+	struct hw *hw = src->rsc.hw;
+
+	hw->src_set_state(src->rsc.ctrl_blk, SRC_STATE_OFF);
+	hw->src_set_bm(src->rsc.ctrl_blk, 1);
+	hw->src_set_rsr(src->rsc.ctrl_blk, 0);
+	hw->src_set_sf(src->rsc.ctrl_blk, SRC_SF_S16);
+	hw->src_set_wr(src->rsc.ctrl_blk, 1);
+	hw->src_set_pm(src->rsc.ctrl_blk, 0);
+	hw->src_set_rom(src->rsc.ctrl_blk, 0);
+	hw->src_set_vo(src->rsc.ctrl_blk, 0);
+	hw->src_set_st(src->rsc.ctrl_blk, 0);
+	hw->src_set_ilsz(src->rsc.ctrl_blk, 0);
+	hw->src_set_cisz(src->rsc.ctrl_blk, 0x80);
+	hw->src_set_sa(src->rsc.ctrl_blk, 0x0);
+	hw->src_set_la(src->rsc.ctrl_blk, 0x1000);
+	hw->src_set_ca(src->rsc.ctrl_blk, 0x80);
+	hw->src_set_pitch(src->rsc.ctrl_blk, 0x1000000);
+	hw->src_set_clear_zbufs(src->rsc.ctrl_blk, 1);
+
+	src->rsc.ops->master(&src->rsc);
+	hw->src_commit_write(hw, src->rsc.ops->index(&src->rsc),
+						src->rsc.ctrl_blk);
+
+	return 0;
+}
+
+static int src_default_config_arcrw(struct src *src)
+{
+	struct hw *hw = src->rsc.hw;
+	unsigned int rsr, msr;
+	unsigned int dirty;
+
+	hw->src_set_state(src->rsc.ctrl_blk, SRC_STATE_OFF);
+	hw->src_set_bm(src->rsc.ctrl_blk, 0);
+	for (rsr = 0, msr = src->rsc.msr; msr > 1; msr >>= 1)
+		rsr++;
+
+	hw->src_set_rsr(src->rsc.ctrl_blk, rsr);
+	hw->src_set_sf(src->rsc.ctrl_blk, SRC_SF_F32);
+	hw->src_set_wr(src->rsc.ctrl_blk, 0);
+	hw->src_set_pm(src->rsc.ctrl_blk, 0);
+	hw->src_set_rom(src->rsc.ctrl_blk, 0);
+	hw->src_set_vo(src->rsc.ctrl_blk, 0);
+	hw->src_set_st(src->rsc.ctrl_blk, 0);
+	hw->src_set_ilsz(src->rsc.ctrl_blk, 0);
+	hw->src_set_cisz(src->rsc.ctrl_blk, 0x80);
+	hw->src_set_sa(src->rsc.ctrl_blk, 0x0);
+	/*hw->src_set_sa(src->rsc.ctrl_blk, 0x100);*/
+	hw->src_set_la(src->rsc.ctrl_blk, 0x1000);
+	/*hw->src_set_la(src->rsc.ctrl_blk, 0x03ffffe0);*/
+	hw->src_set_ca(src->rsc.ctrl_blk, 0x80);
+	hw->src_set_pitch(src->rsc.ctrl_blk, 0x1000000);
+	hw->src_set_clear_zbufs(src->rsc.ctrl_blk, 1);
+
+	dirty = hw->src_get_dirty(src->rsc.ctrl_blk);
+	src->rsc.ops->master(&src->rsc);
+	for (msr = 0; msr < src->rsc.msr; msr++) {
+		hw->src_set_dirty(src->rsc.ctrl_blk, dirty);
+		hw->src_commit_write(hw, src->rsc.ops->index(&src->rsc),
+							src->rsc.ctrl_blk);
+		src->rsc.ops->next_conj(&src->rsc);
+	}
+	src->rsc.ops->master(&src->rsc);
+
+	return 0;
+}
+
+static struct src_rsc_ops src_rsc_ops = {
+	.set_state		= src_set_state,
+	.set_bm			= src_set_bm,
+	.set_sf			= src_set_sf,
+	.set_pm			= src_set_pm,
+	.set_rom		= src_set_rom,
+	.set_vo			= src_set_vo,
+	.set_st			= src_set_st,
+	.set_bp			= src_set_bp,
+	.set_cisz		= src_set_cisz,
+	.set_ca			= src_set_ca,
+	.set_sa			= src_set_sa,
+	.set_la			= src_set_la,
+	.set_pitch		= src_set_pitch,
+	.set_clr_zbufs		= src_set_clear_zbufs,
+	.commit_write		= src_commit_write,
+	.get_ca			= src_get_ca,
+	.init			= src_init,
+	.next_interleave	= src_next_interleave,
+};
+
+static int
+src_rsc_init(struct src *src, u32 idx,
+	     const struct src_desc *desc, struct src_mgr *mgr)
+{
+	int err;
+	int i, n;
+	struct src *p;
+
+	n = (MEMRD == desc->mode) ? desc->multi : 1;
+	for (i = 0, p = src; i < n; i++, p++) {
+		err = rsc_init(&p->rsc, idx + i, SRC, desc->msr, mgr->mgr.hw);
+		if (err)
+			goto error1;
+
+		/* Initialize src specific rsc operations */
+		p->ops = &src_rsc_ops;
+		p->multi = (0 == i) ? desc->multi : 1;
+		p->mode = desc->mode;
+		src_default_config[desc->mode](p);
+		mgr->src_enable(mgr, p);
+		p->intlv = p + 1;
+	}
+	(--p)->intlv = NULL;	/* Set @intlv of the last SRC to NULL */
+
+	mgr->commit_write(mgr);
+
+	return 0;
+
+error1:
+	for (i--, p--; i >= 0; i--, p--) {
+		mgr->src_disable(mgr, p);
+		rsc_uninit(&p->rsc);
+	}
+	mgr->commit_write(mgr);
+	return err;
+}
+
+static int src_rsc_uninit(struct src *src, struct src_mgr *mgr)
+{
+	int i, n;
+	struct src *p;
+
+	n = (MEMRD == src->mode) ? src->multi : 1;
+	for (i = 0, p = src; i < n; i++, p++) {
+		mgr->src_disable(mgr, p);
+		rsc_uninit(&p->rsc);
+		p->multi = 0;
+		p->ops = NULL;
+		p->mode = NUM_SRCMODES;
+		p->intlv = NULL;
+	}
+	mgr->commit_write(mgr);
+
+	return 0;
+}
+
+static int
+get_src_rsc(struct src_mgr *mgr, const struct src_desc *desc, struct src **rsrc)
+{
+	unsigned int idx = SRC_RESOURCE_NUM;
+	int err;
+	struct src *src;
+	unsigned long flags;
+
+	*rsrc = NULL;
+
+	/* Check whether there are sufficient src resources to meet request. */
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	if (MEMRD == desc->mode)
+		err = mgr_get_resource(&mgr->mgr, desc->multi, &idx);
+	else
+		err = mgr_get_resource(&mgr->mgr, 1, &idx);
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	if (err) {
+		printk(KERN_ERR "ctxfi: Can't meet SRC resource request!\n");
+		return err;
+	}
+
+	/* Allocate mem for master src resource */
+	if (MEMRD == desc->mode)
+		src = kzalloc(sizeof(*src)*desc->multi, GFP_KERNEL);
+	else
+		src = kzalloc(sizeof(*src), GFP_KERNEL);
+
+	if (NULL == src) {
+		err = -ENOMEM;
+		goto error1;
+	}
+
+	err = src_rsc_init(src, idx, desc, mgr);
+	if (err)
+		goto error2;
+
+	*rsrc = src;
+
+	return 0;
+
+error2:
+	kfree(src);
+error1:
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	if (MEMRD == desc->mode)
+		mgr_put_resource(&mgr->mgr, desc->multi, idx);
+	else
+		mgr_put_resource(&mgr->mgr, 1, idx);
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	return err;
+}
+
+static int put_src_rsc(struct src_mgr *mgr, struct src *src)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	src->rsc.ops->master(&src->rsc);
+	if (MEMRD == src->mode)
+		mgr_put_resource(&mgr->mgr, src->multi,
+				 src->rsc.ops->index(&src->rsc));
+	else
+		mgr_put_resource(&mgr->mgr, 1, src->rsc.ops->index(&src->rsc));
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	src_rsc_uninit(src, mgr);
+	kfree(src);
+
+	return 0;
+}
+
+static int src_enable_s(struct src_mgr *mgr, struct src *src)
+{
+	struct hw *hw = mgr->mgr.hw;
+	int i;
+
+	src->rsc.ops->master(&src->rsc);
+	for (i = 0; i < src->rsc.msr; i++) {
+		hw->src_mgr_enbs_src(mgr->mgr.ctrl_blk,
+				     src->rsc.ops->index(&src->rsc));
+		src->rsc.ops->next_conj(&src->rsc);
+	}
+	src->rsc.ops->master(&src->rsc);
+
+	return 0;
+}
+
+static int src_enable(struct src_mgr *mgr, struct src *src)
+{
+	struct hw *hw = mgr->mgr.hw;
+	int i;
+
+	src->rsc.ops->master(&src->rsc);
+	for (i = 0; i < src->rsc.msr; i++) {
+		hw->src_mgr_enb_src(mgr->mgr.ctrl_blk,
+				    src->rsc.ops->index(&src->rsc));
+		src->rsc.ops->next_conj(&src->rsc);
+	}
+	src->rsc.ops->master(&src->rsc);
+
+	return 0;
+}
+
+static int src_disable(struct src_mgr *mgr, struct src *src)
+{
+	struct hw *hw = mgr->mgr.hw;
+	int i;
+
+	src->rsc.ops->master(&src->rsc);
+	for (i = 0; i < src->rsc.msr; i++) {
+		hw->src_mgr_dsb_src(mgr->mgr.ctrl_blk,
+				    src->rsc.ops->index(&src->rsc));
+		src->rsc.ops->next_conj(&src->rsc);
+	}
+	src->rsc.ops->master(&src->rsc);
+
+	return 0;
+}
+
+static int src_mgr_commit_write(struct src_mgr *mgr)
+{
+	struct hw *hw = mgr->mgr.hw;
+
+	hw->src_mgr_commit_write(hw, mgr->mgr.ctrl_blk);
+
+	return 0;
+}
+
+int src_mgr_create(void *hw, struct src_mgr **rsrc_mgr)
+{
+	int err, i;
+	struct src_mgr *src_mgr;
+
+	*rsrc_mgr = NULL;
+	src_mgr = kzalloc(sizeof(*src_mgr), GFP_KERNEL);
+	if (NULL == src_mgr)
+		return -ENOMEM;
+
+	err = rsc_mgr_init(&src_mgr->mgr, SRC, SRC_RESOURCE_NUM, hw);
+	if (err)
+		goto error1;
+
+	spin_lock_init(&src_mgr->mgr_lock);
+	conj_mask = ((struct hw *)hw)->src_dirty_conj_mask();
+
+	src_mgr->get_src = get_src_rsc;
+	src_mgr->put_src = put_src_rsc;
+	src_mgr->src_enable_s = src_enable_s;
+	src_mgr->src_enable = src_enable;
+	src_mgr->src_disable = src_disable;
+	src_mgr->commit_write = src_mgr_commit_write;
+
+	/* Disable all SRC resources. */
+	for (i = 0; i < 256; i++)
+		((struct hw *)hw)->src_mgr_dsb_src(src_mgr->mgr.ctrl_blk, i);
+
+	((struct hw *)hw)->src_mgr_commit_write(hw, src_mgr->mgr.ctrl_blk);
+
+	*rsrc_mgr = src_mgr;
+
+	return 0;
+
+error1:
+	kfree(src_mgr);
+	return err;
+}
+
+int src_mgr_destroy(struct src_mgr *src_mgr)
+{
+	rsc_mgr_uninit(&src_mgr->mgr);
+	kfree(src_mgr);
+
+	return 0;
+}
+
+/* SRCIMP resource manager operations */
+
+static int srcimp_master(struct rsc *rsc)
+{
+	rsc->conj = 0;
+	return rsc->idx = container_of(rsc, struct srcimp, rsc)->idx[0];
+}
+
+static int srcimp_next_conj(struct rsc *rsc)
+{
+	rsc->conj++;
+	return container_of(rsc, struct srcimp, rsc)->idx[rsc->conj];
+}
+
+static int srcimp_index(const struct rsc *rsc)
+{
+	return container_of(rsc, struct srcimp, rsc)->idx[rsc->conj];
+}
+
+static struct rsc_ops srcimp_basic_rsc_ops = {
+	.master		= srcimp_master,
+	.next_conj	= srcimp_next_conj,
+	.index		= srcimp_index,
+	.output_slot	= NULL,
+};
+
+static int srcimp_map(struct srcimp *srcimp, struct src *src, struct rsc *input)
+{
+	struct imapper *entry;
+	int i;
+
+	srcimp->rsc.ops->master(&srcimp->rsc);
+	src->rsc.ops->master(&src->rsc);
+	input->ops->master(input);
+
+	/* Program master and conjugate resources */
+	for (i = 0; i < srcimp->rsc.msr; i++) {
+		entry = &srcimp->imappers[i];
+		entry->slot = input->ops->output_slot(input);
+		entry->user = src->rsc.ops->index(&src->rsc);
+		entry->addr = srcimp->rsc.ops->index(&srcimp->rsc);
+		srcimp->mgr->imap_add(srcimp->mgr, entry);
+		srcimp->mapped |= (0x1 << i);
+
+		srcimp->rsc.ops->next_conj(&srcimp->rsc);
+		input->ops->next_conj(input);
+	}
+
+	srcimp->rsc.ops->master(&srcimp->rsc);
+	input->ops->master(input);
+
+	return 0;
+}
+
+static int srcimp_unmap(struct srcimp *srcimp)
+{
+	int i;
+
+	/* Program master and conjugate resources */
+	for (i = 0; i < srcimp->rsc.msr; i++) {
+		if (srcimp->mapped & (0x1 << i)) {
+			srcimp->mgr->imap_delete(srcimp->mgr,
+						 &srcimp->imappers[i]);
+			srcimp->mapped &= ~(0x1 << i);
+		}
+	}
+
+	return 0;
+}
+
+static struct srcimp_rsc_ops srcimp_ops = {
+	.map = srcimp_map,
+	.unmap = srcimp_unmap
+};
+
+static int srcimp_rsc_init(struct srcimp *srcimp,
+			   const struct srcimp_desc *desc,
+			   struct srcimp_mgr *mgr)
+{
+	int err;
+
+	err = rsc_init(&srcimp->rsc, srcimp->idx[0],
+		       SRCIMP, desc->msr, mgr->mgr.hw);
+	if (err)
+		return err;
+
+	/* Reserve memory for imapper nodes */
+	srcimp->imappers = kzalloc(sizeof(struct imapper)*desc->msr,
+				   GFP_KERNEL);
+	if (NULL == srcimp->imappers) {
+		err = -ENOMEM;
+		goto error1;
+	}
+
+	/* Set srcimp specific operations */
+	srcimp->rsc.ops = &srcimp_basic_rsc_ops;
+	srcimp->ops = &srcimp_ops;
+	srcimp->mgr = mgr;
+
+	srcimp->rsc.ops->master(&srcimp->rsc);
+
+	return 0;
+
+error1:
+	rsc_uninit(&srcimp->rsc);
+	return err;
+}
+
+static int srcimp_rsc_uninit(struct srcimp *srcimp)
+{
+	if (NULL != srcimp->imappers) {
+		kfree(srcimp->imappers);
+		srcimp->imappers = NULL;
+	}
+	srcimp->ops = NULL;
+	srcimp->mgr = NULL;
+	rsc_uninit(&srcimp->rsc);
+
+	return 0;
+}
+
+static int get_srcimp_rsc(struct srcimp_mgr *mgr,
+			  const struct srcimp_desc *desc,
+			  struct srcimp **rsrcimp)
+{
+	int err, i;
+	unsigned int idx;
+	struct srcimp *srcimp;
+	unsigned long flags;
+
+	*rsrcimp = NULL;
+
+	/* Allocate mem for SRCIMP resource */
+	srcimp = kzalloc(sizeof(*srcimp), GFP_KERNEL);
+	if (NULL == srcimp) {
+		err = -ENOMEM;
+		return err;
+	}
+
+	/* Check whether there are sufficient SRCIMP resources. */
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i = 0; i < desc->msr; i++) {
+		err = mgr_get_resource(&mgr->mgr, 1, &idx);
+		if (err)
+			break;
+
+		srcimp->idx[i] = idx;
+	}
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	if (err) {
+		printk(KERN_ERR "ctxfi: Can't meet SRCIMP resource request!\n");
+		goto error1;
+	}
+
+	err = srcimp_rsc_init(srcimp, desc, mgr);
+	if (err)
+		goto error1;
+
+	*rsrcimp = srcimp;
+
+	return 0;
+
+error1:
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i--; i >= 0; i--)
+		mgr_put_resource(&mgr->mgr, 1, srcimp->idx[i]);
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	kfree(srcimp);
+	return err;
+}
+
+static int put_srcimp_rsc(struct srcimp_mgr *mgr, struct srcimp *srcimp)
+{
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i = 0; i < srcimp->rsc.msr; i++)
+		mgr_put_resource(&mgr->mgr, 1, srcimp->idx[i]);
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	srcimp_rsc_uninit(srcimp);
+	kfree(srcimp);
+
+	return 0;
+}
+
+static int srcimp_map_op(void *data, struct imapper *entry)
+{
+	struct rsc_mgr *mgr = &((struct srcimp_mgr *)data)->mgr;
+	struct hw *hw = mgr->hw;
+
+	hw->srcimp_mgr_set_imaparc(mgr->ctrl_blk, entry->slot);
+	hw->srcimp_mgr_set_imapuser(mgr->ctrl_blk, entry->user);
+	hw->srcimp_mgr_set_imapnxt(mgr->ctrl_blk, entry->next);
+	hw->srcimp_mgr_set_imapaddr(mgr->ctrl_blk, entry->addr);
+	hw->srcimp_mgr_commit_write(mgr->hw, mgr->ctrl_blk);
+
+	return 0;
+}
+
+static int srcimp_imap_add(struct srcimp_mgr *mgr, struct imapper *entry)
+{
+	unsigned long flags;
+	int err;
+
+	spin_lock_irqsave(&mgr->imap_lock, flags);
+	if ((0 == entry->addr) && (mgr->init_imap_added)) {
+		input_mapper_delete(&mgr->imappers,
+				    mgr->init_imap, srcimp_map_op, mgr);
+		mgr->init_imap_added = 0;
+	}
+	err = input_mapper_add(&mgr->imappers, entry, srcimp_map_op, mgr);
+	spin_unlock_irqrestore(&mgr->imap_lock, flags);
+
+	return err;
+}
+
+static int srcimp_imap_delete(struct srcimp_mgr *mgr, struct imapper *entry)
+{
+	unsigned long flags;
+	int err;
+
+	spin_lock_irqsave(&mgr->imap_lock, flags);
+	err = input_mapper_delete(&mgr->imappers, entry, srcimp_map_op, mgr);
+	if (list_empty(&mgr->imappers)) {
+		input_mapper_add(&mgr->imappers, mgr->init_imap,
+				 srcimp_map_op, mgr);
+		mgr->init_imap_added = 1;
+	}
+	spin_unlock_irqrestore(&mgr->imap_lock, flags);
+
+	return err;
+}
+
+int srcimp_mgr_create(void *hw, struct srcimp_mgr **rsrcimp_mgr)
+{
+	int err;
+	struct srcimp_mgr *srcimp_mgr;
+	struct imapper *entry;
+
+	*rsrcimp_mgr = NULL;
+	srcimp_mgr = kzalloc(sizeof(*srcimp_mgr), GFP_KERNEL);
+	if (NULL == srcimp_mgr)
+		return -ENOMEM;
+
+	err = rsc_mgr_init(&srcimp_mgr->mgr, SRCIMP, SRCIMP_RESOURCE_NUM, hw);
+	if (err)
+		goto error1;
+
+	spin_lock_init(&srcimp_mgr->mgr_lock);
+	spin_lock_init(&srcimp_mgr->imap_lock);
+	INIT_LIST_HEAD(&srcimp_mgr->imappers);
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (NULL == entry) {
+		err = -ENOMEM;
+		goto error2;
+	}
+	entry->slot = entry->addr = entry->next = entry->user = 0;
+	list_add(&entry->list, &srcimp_mgr->imappers);
+	srcimp_mgr->init_imap = entry;
+	srcimp_mgr->init_imap_added = 1;
+
+	srcimp_mgr->get_srcimp = get_srcimp_rsc;
+	srcimp_mgr->put_srcimp = put_srcimp_rsc;
+	srcimp_mgr->imap_add = srcimp_imap_add;
+	srcimp_mgr->imap_delete = srcimp_imap_delete;
+
+	*rsrcimp_mgr = srcimp_mgr;
+
+	return 0;
+
+error2:
+	rsc_mgr_uninit(&srcimp_mgr->mgr);
+error1:
+	kfree(srcimp_mgr);
+	return err;
+}
+
+int srcimp_mgr_destroy(struct srcimp_mgr *srcimp_mgr)
+{
+	unsigned long flags;
+
+	/* free src input mapper list */
+	spin_lock_irqsave(&srcimp_mgr->imap_lock, flags);
+	free_input_mapper_list(&srcimp_mgr->imappers);
+	spin_unlock_irqrestore(&srcimp_mgr->imap_lock, flags);
+
+	rsc_mgr_uninit(&srcimp_mgr->mgr);
+	kfree(srcimp_mgr);
+
+	return 0;
+}
diff --git a/sound/pci/ctxfi/ctsrc.h b/sound/pci/ctxfi/ctsrc.h
new file mode 100644
index 0000000..259366a
--- /dev/null
+++ b/sound/pci/ctxfi/ctsrc.h
@@ -0,0 +1,149 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctsrc.h
+ *
+ * @Brief
+ * This file contains the definition of the Sample Rate Convertor
+ * resource management object.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 13 2008
+ *
+ */
+
+#ifndef CTSRC_H
+#define CTSRC_H
+
+#include "ctresource.h"
+#include "ctimap.h"
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+#define SRC_STATE_OFF	0x0
+#define SRC_STATE_INIT	0x4
+#define SRC_STATE_RUN	0x5
+
+#define SRC_SF_U8	0x0
+#define SRC_SF_S16	0x1
+#define SRC_SF_S24	0x2
+#define SRC_SF_S32	0x3
+#define SRC_SF_F32	0x4
+
+/* Define the descriptor of a src resource */
+enum SRCMODE {
+	MEMRD,		/* Read data from host memory */
+	MEMWR,		/* Write data to host memory */
+	ARCRW,		/* Read from and write to audio ring channel */
+	NUM_SRCMODES
+};
+
+struct src_rsc_ops;
+
+struct src {
+	struct rsc rsc; /* Basic resource info */
+	struct src *intlv; /* Pointer to next interleaved SRC in a series */
+	struct src_rsc_ops *ops; /* SRC specific operations */
+	/* Number of contiguous srcs for interleaved usage */
+	unsigned char multi;
+	unsigned char mode; /* Working mode of this SRC resource */
+};
+
+struct src_rsc_ops {
+	int (*set_state)(struct src *src, unsigned int state);
+	int (*set_bm)(struct src *src, unsigned int bm);
+	int (*set_sf)(struct src *src, unsigned int sf);
+	int (*set_pm)(struct src *src, unsigned int pm);
+	int (*set_rom)(struct src *src, unsigned int rom);
+	int (*set_vo)(struct src *src, unsigned int vo);
+	int (*set_st)(struct src *src, unsigned int st);
+	int (*set_bp)(struct src *src, unsigned int bp);
+	int (*set_cisz)(struct src *src, unsigned int cisz);
+	int (*set_ca)(struct src *src, unsigned int ca);
+	int (*set_sa)(struct src *src, unsigned int sa);
+	int (*set_la)(struct src *src, unsigned int la);
+	int (*set_pitch)(struct src *src, unsigned int pitch);
+	int (*set_clr_zbufs)(struct src *src);
+	int (*commit_write)(struct src *src);
+	int (*get_ca)(struct src *src);
+	int (*init)(struct src *src);
+	struct src* (*next_interleave)(struct src *src);
+};
+
+/* Define src resource request description info */
+struct src_desc {
+	/* Number of contiguous master srcs for interleaved usage */
+	unsigned char multi;
+	unsigned char msr;
+	unsigned char mode; /* Working mode of the requested srcs */
+};
+
+/* Define src manager object */
+struct src_mgr {
+	struct rsc_mgr mgr;	/* Basic resource manager info */
+	spinlock_t mgr_lock;
+
+	 /* request src resource */
+	int (*get_src)(struct src_mgr *mgr,
+		       const struct src_desc *desc, struct src **rsrc);
+	/* return src resource */
+	int (*put_src)(struct src_mgr *mgr, struct src *src);
+	int (*src_enable_s)(struct src_mgr *mgr, struct src *src);
+	int (*src_enable)(struct src_mgr *mgr, struct src *src);
+	int (*src_disable)(struct src_mgr *mgr, struct src *src);
+	int (*commit_write)(struct src_mgr *mgr);
+};
+
+/* Define the descriptor of a SRC Input Mapper resource */
+struct srcimp_mgr;
+struct srcimp_rsc_ops;
+
+struct srcimp {
+	struct rsc rsc;
+	unsigned char idx[8];
+	struct imapper *imappers;
+	unsigned int mapped; /* A bit-map indicating which conj rsc is mapped */
+	struct srcimp_mgr *mgr;
+	struct srcimp_rsc_ops *ops;
+};
+
+struct srcimp_rsc_ops {
+	int (*map)(struct srcimp *srcimp, struct src *user, struct rsc *input);
+	int (*unmap)(struct srcimp *srcimp);
+};
+
+/* Define SRCIMP resource request description info */
+struct srcimp_desc {
+	unsigned int msr;
+};
+
+struct srcimp_mgr {
+	struct rsc_mgr mgr;	/* Basic resource manager info */
+	spinlock_t mgr_lock;
+	spinlock_t imap_lock;
+	struct list_head imappers;
+	struct imapper *init_imap;
+	unsigned int init_imap_added;
+
+	 /* request srcimp resource */
+	int (*get_srcimp)(struct srcimp_mgr *mgr,
+			  const struct srcimp_desc *desc,
+			  struct srcimp **rsrcimp);
+	/* return srcimp resource */
+	int (*put_srcimp)(struct srcimp_mgr *mgr, struct srcimp *srcimp);
+	int (*imap_add)(struct srcimp_mgr *mgr, struct imapper *entry);
+	int (*imap_delete)(struct srcimp_mgr *mgr, struct imapper *entry);
+};
+
+/* Constructor and destructor of SRC resource manager */
+int src_mgr_create(void *hw, struct src_mgr **rsrc_mgr);
+int src_mgr_destroy(struct src_mgr *src_mgr);
+/* Constructor and destructor of SRCIMP resource manager */
+int srcimp_mgr_create(void *hw, struct srcimp_mgr **rsrc_mgr);
+int srcimp_mgr_destroy(struct srcimp_mgr *srcimp_mgr);
+
+#endif /* CTSRC_H */
diff --git a/sound/pci/ctxfi/cttimer.c b/sound/pci/ctxfi/cttimer.c
new file mode 100644
index 0000000..779c6c3
--- /dev/null
+++ b/sound/pci/ctxfi/cttimer.c
@@ -0,0 +1,441 @@
+/*
+ * PCM timer handling on ctxfi
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ */
+
+#include <linux/slab.h>
+#include <linux/math64.h>
+#include <linux/moduleparam.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include "ctatc.h"
+#include "cthardware.h"
+#include "cttimer.h"
+
+static int use_system_timer;
+MODULE_PARM_DESC(use_system_timer, "Foce to use system-timer");
+module_param(use_system_timer, bool, S_IRUGO);
+
+struct ct_timer_ops {
+	void (*init)(struct ct_timer_instance *);
+	void (*prepare)(struct ct_timer_instance *);
+	void (*start)(struct ct_timer_instance *);
+	void (*stop)(struct ct_timer_instance *);
+	void (*free_instance)(struct ct_timer_instance *);
+	void (*interrupt)(struct ct_timer *);
+	void (*free_global)(struct ct_timer *);
+};
+
+/* timer instance -- assigned to each PCM stream */
+struct ct_timer_instance {
+	spinlock_t lock;
+	struct ct_timer *timer_base;
+	struct ct_atc_pcm *apcm;
+	struct snd_pcm_substream *substream;
+	struct timer_list timer;
+	struct list_head instance_list;
+	struct list_head running_list;
+	unsigned int position;
+	unsigned int frag_count;
+	unsigned int running:1;
+	unsigned int need_update:1;
+};
+
+/* timer instance manager */
+struct ct_timer {
+	spinlock_t lock;		/* global timer lock (for xfitimer) */
+	spinlock_t list_lock;		/* lock for instance list */
+	struct ct_atc *atc;
+	struct ct_timer_ops *ops;
+	struct list_head instance_head;
+	struct list_head running_head;
+	unsigned int wc;		/* current wallclock */
+	unsigned int irq_handling:1;	/* in IRQ handling */
+	unsigned int reprogram:1;	/* need to reprogram the internval */
+	unsigned int running:1;		/* global timer running */
+};
+
+
+/*
+ * system-timer-based updates
+ */
+
+static void ct_systimer_callback(unsigned long data)
+{
+	struct ct_timer_instance *ti = (struct ct_timer_instance *)data;
+	struct snd_pcm_substream *substream = ti->substream;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm = ti->apcm;
+	unsigned int period_size = runtime->period_size;
+	unsigned int buffer_size = runtime->buffer_size;
+	unsigned long flags;
+	unsigned int position, dist, interval;
+
+	position = substream->ops->pointer(substream);
+	dist = (position + buffer_size - ti->position) % buffer_size;
+	if (dist >= period_size ||
+	    position / period_size != ti->position / period_size) {
+		apcm->interrupt(apcm);
+		ti->position = position;
+	}
+	/* Add extra HZ*5/1000 to avoid overrun issue when recording
+	 * at 8kHz in 8-bit format or at 88kHz in 24-bit format. */
+	interval = ((period_size - (position % period_size))
+		   * HZ + (runtime->rate - 1)) / runtime->rate + HZ * 5 / 1000;
+	spin_lock_irqsave(&ti->lock, flags);
+	if (ti->running)
+		mod_timer(&ti->timer, jiffies + interval);
+	spin_unlock_irqrestore(&ti->lock, flags);
+}
+
+static void ct_systimer_init(struct ct_timer_instance *ti)
+{
+	setup_timer(&ti->timer, ct_systimer_callback,
+		    (unsigned long)ti);
+}
+
+static void ct_systimer_start(struct ct_timer_instance *ti)
+{
+	struct snd_pcm_runtime *runtime = ti->substream->runtime;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ti->lock, flags);
+	ti->running = 1;
+	mod_timer(&ti->timer,
+		  jiffies + (runtime->period_size * HZ +
+			     (runtime->rate - 1)) / runtime->rate);
+	spin_unlock_irqrestore(&ti->lock, flags);
+}
+
+static void ct_systimer_stop(struct ct_timer_instance *ti)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ti->lock, flags);
+	ti->running = 0;
+	del_timer(&ti->timer);
+	spin_unlock_irqrestore(&ti->lock, flags);
+}
+
+static void ct_systimer_prepare(struct ct_timer_instance *ti)
+{
+	ct_systimer_stop(ti);
+	try_to_del_timer_sync(&ti->timer);
+}
+
+#define ct_systimer_free	ct_systimer_prepare
+
+static struct ct_timer_ops ct_systimer_ops = {
+	.init = ct_systimer_init,
+	.free_instance = ct_systimer_free,
+	.prepare = ct_systimer_prepare,
+	.start = ct_systimer_start,
+	.stop = ct_systimer_stop,
+};
+
+
+/*
+ * Handling multiple streams using a global emu20k1 timer irq
+ */
+
+#define CT_TIMER_FREQ	48000
+#define MIN_TICKS	1
+#define MAX_TICKS	((1 << 13) - 1)
+
+static void ct_xfitimer_irq_rearm(struct ct_timer *atimer, int ticks)
+{
+	struct hw *hw = atimer->atc->hw;
+	if (ticks > MAX_TICKS)
+		ticks = MAX_TICKS;
+	hw->set_timer_tick(hw, ticks);
+	if (!atimer->running)
+		hw->set_timer_irq(hw, 1);
+	atimer->running = 1;
+}
+
+static void ct_xfitimer_irq_stop(struct ct_timer *atimer)
+{
+	if (atimer->running) {
+		struct hw *hw = atimer->atc->hw;
+		hw->set_timer_irq(hw, 0);
+		hw->set_timer_tick(hw, 0);
+		atimer->running = 0;
+	}
+}
+
+static inline unsigned int ct_xfitimer_get_wc(struct ct_timer *atimer)
+{
+	struct hw *hw = atimer->atc->hw;
+	return hw->get_wc(hw);
+}
+
+/*
+ * reprogram the timer interval;
+ * checks the running instance list and determines the next timer interval.
+ * also updates the each stream position, returns the number of streams
+ * to call snd_pcm_period_elapsed() appropriately
+ *
+ * call this inside the lock and irq disabled
+ */
+static int ct_xfitimer_reprogram(struct ct_timer *atimer)
+{
+	struct ct_timer_instance *ti;
+	unsigned int min_intr = (unsigned int)-1;
+	int updates = 0;
+	unsigned int wc, diff;
+
+	if (list_empty(&atimer->running_head)) {
+		ct_xfitimer_irq_stop(atimer);
+		atimer->reprogram = 0; /* clear flag */
+		return 0;
+	}
+
+	wc = ct_xfitimer_get_wc(atimer);
+	diff = wc - atimer->wc;
+	atimer->wc = wc;
+	list_for_each_entry(ti, &atimer->running_head, running_list) {
+		if (ti->frag_count > diff)
+			ti->frag_count -= diff;
+		else {
+			unsigned int pos;
+			unsigned int period_size, rate;
+
+			period_size = ti->substream->runtime->period_size;
+			rate = ti->substream->runtime->rate;
+			pos = ti->substream->ops->pointer(ti->substream);
+			if (pos / period_size != ti->position / period_size) {
+				ti->need_update = 1;
+				ti->position = pos;
+				updates++;
+			}
+			pos %= period_size;
+			pos = period_size - pos;
+			ti->frag_count = div_u64((u64)pos * CT_TIMER_FREQ +
+						 rate - 1, rate);
+		}
+		if (ti->frag_count < min_intr)
+			min_intr = ti->frag_count;
+	}
+
+	if (min_intr < MIN_TICKS)
+		min_intr = MIN_TICKS;
+	ct_xfitimer_irq_rearm(atimer, min_intr);
+	atimer->reprogram = 0; /* clear flag */
+	return updates;
+}
+
+/* look through the instance list and call period_elapsed if needed */
+static void ct_xfitimer_check_period(struct ct_timer *atimer)
+{
+	struct ct_timer_instance *ti;
+	unsigned long flags;
+
+	spin_lock_irqsave(&atimer->list_lock, flags);
+	list_for_each_entry(ti, &atimer->instance_head, instance_list) {
+		if (ti->need_update) {
+			ti->need_update = 0;
+			ti->apcm->interrupt(ti->apcm);
+		}
+	}
+	spin_unlock_irqrestore(&atimer->list_lock, flags);
+}
+
+/* Handle timer-interrupt */
+static void ct_xfitimer_callback(struct ct_timer *atimer)
+{
+	int update;
+	unsigned long flags;
+
+	spin_lock_irqsave(&atimer->lock, flags);
+	atimer->irq_handling = 1;
+	do {
+		update = ct_xfitimer_reprogram(atimer);
+		spin_unlock(&atimer->lock);
+		if (update)
+			ct_xfitimer_check_period(atimer);
+		spin_lock(&atimer->lock);
+	} while (atimer->reprogram);
+	atimer->irq_handling = 0;
+	spin_unlock_irqrestore(&atimer->lock, flags);
+}
+
+static void ct_xfitimer_prepare(struct ct_timer_instance *ti)
+{
+	ti->frag_count = ti->substream->runtime->period_size;
+	ti->need_update = 0;
+}
+
+
+/* start/stop the timer */
+static void ct_xfitimer_update(struct ct_timer *atimer)
+{
+	unsigned long flags;
+	int update;
+
+	spin_lock_irqsave(&atimer->lock, flags);
+	if (atimer->irq_handling) {
+		/* reached from IRQ handler; let it handle later */
+		atimer->reprogram = 1;
+		spin_unlock_irqrestore(&atimer->lock, flags);
+		return;
+	}
+
+	ct_xfitimer_irq_stop(atimer);
+	update = ct_xfitimer_reprogram(atimer);
+	spin_unlock_irqrestore(&atimer->lock, flags);
+	if (update)
+		ct_xfitimer_check_period(atimer);
+}
+
+static void ct_xfitimer_start(struct ct_timer_instance *ti)
+{
+	struct ct_timer *atimer = ti->timer_base;
+	unsigned long flags;
+
+	spin_lock_irqsave(&atimer->lock, flags);
+	if (list_empty(&ti->running_list))
+		atimer->wc = ct_xfitimer_get_wc(atimer);
+	list_add(&ti->running_list, &atimer->running_head);
+	spin_unlock_irqrestore(&atimer->lock, flags);
+	ct_xfitimer_update(atimer);
+}
+
+static void ct_xfitimer_stop(struct ct_timer_instance *ti)
+{
+	struct ct_timer *atimer = ti->timer_base;
+	unsigned long flags;
+
+	spin_lock_irqsave(&atimer->lock, flags);
+	list_del_init(&ti->running_list);
+	ti->need_update = 0;
+	spin_unlock_irqrestore(&atimer->lock, flags);
+	ct_xfitimer_update(atimer);
+}
+
+static void ct_xfitimer_free_global(struct ct_timer *atimer)
+{
+	ct_xfitimer_irq_stop(atimer);
+}
+
+static struct ct_timer_ops ct_xfitimer_ops = {
+	.prepare = ct_xfitimer_prepare,
+	.start = ct_xfitimer_start,
+	.stop = ct_xfitimer_stop,
+	.interrupt = ct_xfitimer_callback,
+	.free_global = ct_xfitimer_free_global,
+};
+
+/*
+ * timer instance
+ */
+
+struct ct_timer_instance *
+ct_timer_instance_new(struct ct_timer *atimer, struct ct_atc_pcm *apcm)
+{
+	struct ct_timer_instance *ti;
+
+	ti = kzalloc(sizeof(*ti), GFP_KERNEL);
+	if (!ti)
+		return NULL;
+	spin_lock_init(&ti->lock);
+	INIT_LIST_HEAD(&ti->instance_list);
+	INIT_LIST_HEAD(&ti->running_list);
+	ti->timer_base = atimer;
+	ti->apcm = apcm;
+	ti->substream = apcm->substream;
+	if (atimer->ops->init)
+		atimer->ops->init(ti);
+
+	spin_lock_irq(&atimer->list_lock);
+	list_add(&ti->instance_list, &atimer->instance_head);
+	spin_unlock_irq(&atimer->list_lock);
+
+	return ti;
+}
+
+void ct_timer_prepare(struct ct_timer_instance *ti)
+{
+	if (ti->timer_base->ops->prepare)
+		ti->timer_base->ops->prepare(ti);
+	ti->position = 0;
+	ti->running = 0;
+}
+
+void ct_timer_start(struct ct_timer_instance *ti)
+{
+	struct ct_timer *atimer = ti->timer_base;
+	atimer->ops->start(ti);
+}
+
+void ct_timer_stop(struct ct_timer_instance *ti)
+{
+	struct ct_timer *atimer = ti->timer_base;
+	atimer->ops->stop(ti);
+}
+
+void ct_timer_instance_free(struct ct_timer_instance *ti)
+{
+	struct ct_timer *atimer = ti->timer_base;
+
+	atimer->ops->stop(ti); /* to be sure */
+	if (atimer->ops->free_instance)
+		atimer->ops->free_instance(ti);
+
+	spin_lock_irq(&atimer->list_lock);
+	list_del(&ti->instance_list);
+	spin_unlock_irq(&atimer->list_lock);
+
+	kfree(ti);
+}
+
+/*
+ * timer manager
+ */
+
+static void ct_timer_interrupt(void *data, unsigned int status)
+{
+	struct ct_timer *timer = data;
+
+	/* Interval timer interrupt */
+	if ((status & IT_INT) && timer->ops->interrupt)
+		timer->ops->interrupt(timer);
+}
+
+struct ct_timer *ct_timer_new(struct ct_atc *atc)
+{
+	struct ct_timer *atimer;
+	struct hw *hw;
+
+	atimer = kzalloc(sizeof(*atimer), GFP_KERNEL);
+	if (!atimer)
+		return NULL;
+	spin_lock_init(&atimer->lock);
+	spin_lock_init(&atimer->list_lock);
+	INIT_LIST_HEAD(&atimer->instance_head);
+	INIT_LIST_HEAD(&atimer->running_head);
+	atimer->atc = atc;
+	hw = atc->hw;
+	if (!use_system_timer && hw->set_timer_irq) {
+		snd_printd(KERN_INFO "ctxfi: Use xfi-native timer\n");
+		atimer->ops = &ct_xfitimer_ops;
+		hw->irq_callback_data = atimer;
+		hw->irq_callback = ct_timer_interrupt;
+	} else {
+		snd_printd(KERN_INFO "ctxfi: Use system timer\n");
+		atimer->ops = &ct_systimer_ops;
+	}
+	return atimer;
+}
+
+void ct_timer_free(struct ct_timer *atimer)
+{
+	struct hw *hw = atimer->atc->hw;
+	hw->irq_callback = NULL;
+	if (atimer->ops->free_global)
+		atimer->ops->free_global(atimer);
+	kfree(atimer);
+}
+
diff --git a/sound/pci/ctxfi/cttimer.h b/sound/pci/ctxfi/cttimer.h
new file mode 100644
index 0000000..9793482
--- /dev/null
+++ b/sound/pci/ctxfi/cttimer.h
@@ -0,0 +1,29 @@
+/*
+ * Timer handling
+ */
+
+#ifndef __CTTIMER_H
+#define __CTTIMER_H
+
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+
+struct snd_pcm_substream;
+struct ct_atc;
+struct ct_atc_pcm;
+
+struct ct_timer;
+struct ct_timer_instance;
+
+struct ct_timer *ct_timer_new(struct ct_atc *atc);
+void ct_timer_free(struct ct_timer *atimer);
+
+struct ct_timer_instance *
+ct_timer_instance_new(struct ct_timer *atimer, struct ct_atc_pcm *apcm);
+void ct_timer_instance_free(struct ct_timer_instance *ti);
+void ct_timer_start(struct ct_timer_instance *ti);
+void ct_timer_stop(struct ct_timer_instance *ti);
+void ct_timer_prepare(struct ct_timer_instance *ti);
+
+#endif /* __CTTIMER_H */
diff --git a/sound/pci/ctxfi/ctvmem.c b/sound/pci/ctxfi/ctvmem.c
new file mode 100644
index 0000000..67665a7
--- /dev/null
+++ b/sound/pci/ctxfi/ctvmem.c
@@ -0,0 +1,250 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File    ctvmem.c
+ *
+ * @Brief
+ * This file contains the implementation of virtual memory management object
+ * for card device.
+ *
+ * @Author Liu Chun
+ * @Date Apr 1 2008
+ */
+
+#include "ctvmem.h"
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <sound/pcm.h>
+
+#define CT_PTES_PER_PAGE (CT_PAGE_SIZE / sizeof(void *))
+#define CT_ADDRS_PER_PAGE (CT_PTES_PER_PAGE * CT_PAGE_SIZE)
+
+/* *
+ * Find or create vm block based on requested @size.
+ * @size must be page aligned.
+ * */
+static struct ct_vm_block *
+get_vm_block(struct ct_vm *vm, unsigned int size)
+{
+	struct ct_vm_block *block = NULL, *entry;
+	struct list_head *pos;
+
+	size = CT_PAGE_ALIGN(size);
+	if (size > vm->size) {
+		printk(KERN_ERR "ctxfi: Fail! No sufficient device virtural "
+				  "memory space available!\n");
+		return NULL;
+	}
+
+	mutex_lock(&vm->lock);
+	list_for_each(pos, &vm->unused) {
+		entry = list_entry(pos, struct ct_vm_block, list);
+		if (entry->size >= size)
+			break; /* found a block that is big enough */
+	}
+	if (pos == &vm->unused)
+		goto out;
+
+	if (entry->size == size) {
+		/* Move the vm node from unused list to used list directly */
+		list_del(&entry->list);
+		list_add(&entry->list, &vm->used);
+		vm->size -= size;
+		block = entry;
+		goto out;
+	}
+
+	block = kzalloc(sizeof(*block), GFP_KERNEL);
+	if (NULL == block)
+		goto out;
+
+	block->addr = entry->addr;
+	block->size = size;
+	list_add(&block->list, &vm->used);
+	entry->addr += size;
+	entry->size -= size;
+	vm->size -= size;
+
+ out:
+	mutex_unlock(&vm->lock);
+	return block;
+}
+
+static void put_vm_block(struct ct_vm *vm, struct ct_vm_block *block)
+{
+	struct ct_vm_block *entry, *pre_ent;
+	struct list_head *pos, *pre;
+
+	block->size = CT_PAGE_ALIGN(block->size);
+
+	mutex_lock(&vm->lock);
+	list_del(&block->list);
+	vm->size += block->size;
+
+	list_for_each(pos, &vm->unused) {
+		entry = list_entry(pos, struct ct_vm_block, list);
+		if (entry->addr >= (block->addr + block->size))
+			break; /* found a position */
+	}
+	if (pos == &vm->unused) {
+		list_add_tail(&block->list, &vm->unused);
+		entry = block;
+	} else {
+		if ((block->addr + block->size) == entry->addr) {
+			entry->addr = block->addr;
+			entry->size += block->size;
+			kfree(block);
+		} else {
+			__list_add(&block->list, pos->prev, pos);
+			entry = block;
+		}
+	}
+
+	pos = &entry->list;
+	pre = pos->prev;
+	while (pre != &vm->unused) {
+		entry = list_entry(pos, struct ct_vm_block, list);
+		pre_ent = list_entry(pre, struct ct_vm_block, list);
+		if ((pre_ent->addr + pre_ent->size) > entry->addr)
+			break;
+
+		pre_ent->size += entry->size;
+		list_del(pos);
+		kfree(entry);
+		pos = pre;
+		pre = pos->prev;
+	}
+	mutex_unlock(&vm->lock);
+}
+
+/* Map host addr (kmalloced/vmalloced) to device logical addr. */
+static struct ct_vm_block *
+ct_vm_map(struct ct_vm *vm, struct snd_pcm_substream *substream, int size)
+{
+	struct ct_vm_block *block;
+	unsigned int pte_start;
+	unsigned i, pages;
+	unsigned long *ptp;
+
+	block = get_vm_block(vm, size);
+	if (block == NULL) {
+		printk(KERN_ERR "ctxfi: No virtual memory block that is big "
+				  "enough to allocate!\n");
+		return NULL;
+	}
+
+	ptp = vm->ptp[0];
+	pte_start = (block->addr >> CT_PAGE_SHIFT);
+	pages = block->size >> CT_PAGE_SHIFT;
+	for (i = 0; i < pages; i++) {
+		unsigned long addr;
+		addr = snd_pcm_sgbuf_get_addr(substream, i << CT_PAGE_SHIFT);
+		ptp[pte_start + i] = addr;
+	}
+
+	block->size = size;
+	return block;
+}
+
+static void ct_vm_unmap(struct ct_vm *vm, struct ct_vm_block *block)
+{
+	/* do unmapping */
+	put_vm_block(vm, block);
+}
+
+/* *
+ * return the host (kmalloced) addr of the @index-th device
+ * page talbe page on success, or NULL on failure.
+ * The first returned NULL indicates the termination.
+ * */
+static void *
+ct_get_ptp_virt(struct ct_vm *vm, int index)
+{
+	void *addr;
+
+	addr = (index >= CT_PTP_NUM) ? NULL : vm->ptp[index];
+
+	return addr;
+}
+
+int ct_vm_create(struct ct_vm **rvm)
+{
+	struct ct_vm *vm;
+	struct ct_vm_block *block;
+	int i;
+
+	*rvm = NULL;
+
+	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+	if (NULL == vm)
+		return -ENOMEM;
+
+	mutex_init(&vm->lock);
+
+	/* Allocate page table pages */
+	for (i = 0; i < CT_PTP_NUM; i++) {
+		vm->ptp[i] = kmalloc(PAGE_SIZE, GFP_KERNEL);
+		if (NULL == vm->ptp[i])
+			break;
+	}
+	if (!i) {
+		/* no page table pages are allocated */
+		kfree(vm);
+		return -ENOMEM;
+	}
+	vm->size = CT_ADDRS_PER_PAGE * i;
+	/* Initialise remaining ptps */
+	for (; i < CT_PTP_NUM; i++)
+		vm->ptp[i] = NULL;
+
+	vm->map = ct_vm_map;
+	vm->unmap = ct_vm_unmap;
+	vm->get_ptp_virt = ct_get_ptp_virt;
+	INIT_LIST_HEAD(&vm->unused);
+	INIT_LIST_HEAD(&vm->used);
+	block = kzalloc(sizeof(*block), GFP_KERNEL);
+	if (NULL != block) {
+		block->addr = 0;
+		block->size = vm->size;
+		list_add(&block->list, &vm->unused);
+	}
+
+	*rvm = vm;
+	return 0;
+}
+
+/* The caller must ensure no mapping pages are being used
+ * by hardware before calling this function */
+void ct_vm_destroy(struct ct_vm *vm)
+{
+	int i;
+	struct list_head *pos;
+	struct ct_vm_block *entry;
+
+	/* free used and unused list nodes */
+	while (!list_empty(&vm->used)) {
+		pos = vm->used.next;
+		list_del(pos);
+		entry = list_entry(pos, struct ct_vm_block, list);
+		kfree(entry);
+	}
+	while (!list_empty(&vm->unused)) {
+		pos = vm->unused.next;
+		list_del(pos);
+		entry = list_entry(pos, struct ct_vm_block, list);
+		kfree(entry);
+	}
+
+	/* free allocated page table pages */
+	for (i = 0; i < CT_PTP_NUM; i++)
+		kfree(vm->ptp[i]);
+
+	vm->size = 0;
+
+	kfree(vm);
+}
diff --git a/sound/pci/ctxfi/ctvmem.h b/sound/pci/ctxfi/ctvmem.h
new file mode 100644
index 0000000..01e4fd0
--- /dev/null
+++ b/sound/pci/ctxfi/ctvmem.h
@@ -0,0 +1,61 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File    ctvmem.h
+ *
+ * @Brief
+ * This file contains the definition of virtual memory management object
+ * for card device.
+ *
+ * @Author Liu Chun
+ * @Date Mar 28 2008
+ */
+
+#ifndef CTVMEM_H
+#define CTVMEM_H
+
+#define CT_PTP_NUM	1	/* num of device page table pages */
+
+#include <linux/mutex.h>
+#include <linux/list.h>
+
+/* The chip can handle the page table of 4k pages
+ * (emu20k1 can handle even 8k pages, but we don't use it right now)
+ */
+#define CT_PAGE_SIZE	4096
+#define CT_PAGE_SHIFT	12
+#define CT_PAGE_MASK	(~(PAGE_SIZE - 1))
+#define CT_PAGE_ALIGN(addr)	ALIGN(addr, CT_PAGE_SIZE)
+
+struct ct_vm_block {
+	unsigned int addr;	/* starting logical addr of this block */
+	unsigned int size;	/* size of this device virtual mem block */
+	struct list_head list;
+};
+
+struct snd_pcm_substream;
+
+/* Virtual memory management object for card device */
+struct ct_vm {
+	void *ptp[CT_PTP_NUM];		/* Device page table pages */
+	unsigned int size;		/* Available addr space in bytes */
+	struct list_head unused;	/* List of unused blocks */
+	struct list_head used;		/* List of used blocks */
+	struct mutex lock;
+
+	/* Map host addr (kmalloced/vmalloced) to device logical addr. */
+	struct ct_vm_block *(*map)(struct ct_vm *, struct snd_pcm_substream *,
+				   int size);
+	/* Unmap device logical addr area. */
+	void (*unmap)(struct ct_vm *, struct ct_vm_block *block);
+	void *(*get_ptp_virt)(struct ct_vm *vm, int index);
+};
+
+int ct_vm_create(struct ct_vm **rvm);
+void ct_vm_destroy(struct ct_vm *vm);
+
+#endif /* CTVMEM_H */
diff --git a/sound/pci/ctxfi/xfi.c b/sound/pci/ctxfi/xfi.c
new file mode 100644
index 0000000..2d3dd89
--- /dev/null
+++ b/sound/pci/ctxfi/xfi.c
@@ -0,0 +1,142 @@
+/*
+ * xfi linux driver.
+ *
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ */
+
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/moduleparam.h>
+#include <linux/pci_ids.h>
+#include <sound/core.h>
+#include <sound/initval.h>
+#include "ctatc.h"
+#include "cthardware.h"
+
+MODULE_AUTHOR("Creative Technology Ltd");
+MODULE_DESCRIPTION("X-Fi driver version 1.03");
+MODULE_LICENSE("GPL v2");
+MODULE_SUPPORTED_DEVICE("{{Creative Labs, Sound Blaster X-Fi}");
+
+static unsigned int reference_rate = 48000;
+static unsigned int multiple = 2;
+MODULE_PARM_DESC(reference_rate, "Reference rate (default=48000)");
+module_param(reference_rate, uint, S_IRUGO);
+MODULE_PARM_DESC(multiple, "Rate multiplier (default=2)");
+module_param(multiple, uint, S_IRUGO);
+
+static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
+static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
+static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
+
+module_param_array(index, int, NULL, 0444);
+MODULE_PARM_DESC(index, "Index value for Creative X-Fi driver");
+module_param_array(id, charp, NULL, 0444);
+MODULE_PARM_DESC(id, "ID string for Creative X-Fi driver");
+module_param_array(enable, bool, NULL, 0444);
+MODULE_PARM_DESC(enable, "Enable Creative X-Fi driver");
+
+static struct pci_device_id ct_pci_dev_ids[] = {
+	/* only X-Fi is supported, so... */
+	{ PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_20K1),
+	  .driver_data = ATC20K1,
+	},
+	{ PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_20K2),
+	  .driver_data = ATC20K2,
+	},
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, ct_pci_dev_ids);
+
+static int __devinit
+ct_card_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+{
+	static int dev;
+	struct snd_card *card;
+	struct ct_atc *atc;
+	int err;
+
+	if (dev >= SNDRV_CARDS)
+		return -ENODEV;
+
+	if (!enable[dev]) {
+		dev++;
+		return -ENOENT;
+	}
+	err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card);
+	if (err)
+		return err;
+	if ((reference_rate != 48000) && (reference_rate != 44100)) {
+		printk(KERN_ERR "ctxfi: Invalid reference_rate value %u!!!\n",
+		       reference_rate);
+		printk(KERN_ERR "ctxfi: The valid values for reference_rate "
+		       "are 48000 and 44100, Value 48000 is assumed.\n");
+		reference_rate = 48000;
+	}
+	if ((multiple != 1) && (multiple != 2)) {
+		printk(KERN_ERR "ctxfi: Invalid multiple value %u!!!\n",
+		       multiple);
+		printk(KERN_ERR "ctxfi: The valid values for multiple are "
+		       "1 and 2, Value 2 is assumed.\n");
+		multiple = 2;
+	}
+	err = ct_atc_create(card, pci, reference_rate, multiple,
+			    pci_id->driver_data, &atc);
+	if (err < 0)
+		goto error;
+
+	card->private_data = atc;
+
+	/* Create alsa devices supported by this card */
+	err = ct_atc_create_alsa_devs(atc);
+	if (err < 0)
+		goto error;
+
+	strcpy(card->driver, "SB-XFi");
+	strcpy(card->shortname, "Creative X-Fi");
+	snprintf(card->longname, sizeof(card->longname), "%s %s %s",
+		 card->shortname, atc->chip_name, atc->model_name);
+
+	err = snd_card_register(card);
+	if (err < 0)
+		goto error;
+
+	pci_set_drvdata(pci, card);
+	dev++;
+
+	return 0;
+
+error:
+	snd_card_free(card);
+	return err;
+}
+
+static void __devexit ct_card_remove(struct pci_dev *pci)
+{
+	snd_card_free(pci_get_drvdata(pci));
+	pci_set_drvdata(pci, NULL);
+}
+
+static struct pci_driver ct_driver = {
+	.name = "SB-XFi",
+	.id_table = ct_pci_dev_ids,
+	.probe = ct_card_probe,
+	.remove = __devexit_p(ct_card_remove),
+};
+
+static int __init ct_card_init(void)
+{
+	return pci_register_driver(&ct_driver);
+}
+
+static void __exit ct_card_exit(void)
+{
+	pci_unregister_driver(&ct_driver);
+}
+
+module_init(ct_card_init)
+module_exit(ct_card_exit)
diff --git a/sound/pci/emu10k1/Makefile b/sound/pci/emu10k1/Makefile
index cf2d563..fc5591e 100644
--- a/sound/pci/emu10k1/Makefile
+++ b/sound/pci/emu10k1/Makefile
@@ -9,15 +9,7 @@
 snd-emu10k1-synth-objs := emu10k1_synth.o emu10k1_callback.o emu10k1_patch.o
 snd-emu10k1x-objs := emu10k1x.o
 
-#
-# this function returns:
-#   "m" - CONFIG_SND_SEQUENCER is m
-#   <empty string> - CONFIG_SND_SEQUENCER is undefined
-#   otherwise parameter #1 value
-#
-sequencer = $(if $(subst y,,$(CONFIG_SND_SEQUENCER)),$(if $(1),m),$(if $(CONFIG_SND_SEQUENCER),$(1)))
-
 # Toplevel Module Dependency
 obj-$(CONFIG_SND_EMU10K1) += snd-emu10k1.o
-obj-$(call sequencer,$(CONFIG_SND_EMU10K1)) += snd-emu10k1-synth.o
+obj-$(CONFIG_SND_EMU10K1_SEQ) += snd-emu10k1-synth.o
 obj-$(CONFIG_SND_EMU10K1X) += snd-emu10k1x.o
diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c
index 1970f0e..4d3ad79 100644
--- a/sound/pci/emu10k1/emu10k1x.c
+++ b/sound/pci/emu10k1/emu10k1x.c
@@ -858,7 +858,6 @@
 	}
 
 	pcm->info_flags = 0;
-	pcm->dev_subclass = SNDRV_PCM_SUBCLASS_GENERIC_MIX;
 	switch(device) {
 	case 0:
 		strcpy(pcm->name, "EMU10K1X Front");
diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c
index 78f62fd..55b83ef 100644
--- a/sound/pci/emu10k1/emupcm.c
+++ b/sound/pci/emu10k1/emupcm.c
@@ -1736,7 +1736,7 @@
 	.buffer_bytes_max =	(128*1024),
 	.period_bytes_min =	1024,
 	.period_bytes_max =	(128*1024),
-	.periods_min =		1,
+	.periods_min =		2,
 	.periods_max =		1024,
 	.fifo_size =		0,
 };
diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig
index eb2a19b..c710150 100644
--- a/sound/pci/hda/Kconfig
+++ b/sound/pci/hda/Kconfig
@@ -139,6 +139,19 @@
 	  snd-hda-codec-conexant.
 	  This module is automatically loaded at probing.
 
+config SND_HDA_CODEC_CA0110
+	bool "Build Creative CA0110-IBG codec support"
+	depends on SND_HDA_INTEL
+	default y
+	help
+	  Say Y here to include Creative CA0110-IBG codec support in
+	  snd-hda-intel driver, found on some Creative X-Fi cards.
+
+	  When the HD-audio driver is built as a module, the codec
+	  support code is also built as another module,
+	  snd-hda-codec-ca0110.
+	  This module is automatically loaded at probing.
+
 config SND_HDA_CODEC_CMEDIA
 	bool "Build C-Media HD-audio codec support"
 	default y
diff --git a/sound/pci/hda/Makefile b/sound/pci/hda/Makefile
index 50f9d09..e3081d4 100644
--- a/sound/pci/hda/Makefile
+++ b/sound/pci/hda/Makefile
@@ -13,6 +13,7 @@
 snd-hda-codec-idt-objs :=	patch_sigmatel.o
 snd-hda-codec-si3054-objs :=	patch_si3054.o
 snd-hda-codec-atihdmi-objs :=	patch_atihdmi.o
+snd-hda-codec-ca0110-objs :=	patch_ca0110.o
 snd-hda-codec-conexant-objs :=	patch_conexant.o
 snd-hda-codec-via-objs :=	patch_via.o
 snd-hda-codec-nvhdmi-objs :=	patch_nvhdmi.o
@@ -40,6 +41,9 @@
 ifdef CONFIG_SND_HDA_CODEC_ATIHDMI
 obj-$(CONFIG_SND_HDA_INTEL) += snd-hda-codec-atihdmi.o
 endif
+ifdef CONFIG_SND_HDA_CODEC_CA0110
+obj-$(CONFIG_SND_HDA_INTEL) += snd-hda-codec-ca0110.o
+endif
 ifdef CONFIG_SND_HDA_CODEC_CONEXANT
 obj-$(CONFIG_SND_HDA_INTEL) += snd-hda-codec-conexant.o
 endif
diff --git a/sound/pci/hda/hda_beep.c b/sound/pci/hda/hda_beep.c
index 4de5bac..29272f2 100644
--- a/sound/pci/hda/hda_beep.c
+++ b/sound/pci/hda/hda_beep.c
@@ -45,6 +45,46 @@
 			AC_VERB_SET_BEEP_CONTROL, beep->tone);
 }
 
+/* (non-standard) Linear beep tone calculation for IDT/STAC codecs 
+ *
+ * The tone frequency of beep generator on IDT/STAC codecs is
+ * defined from the 8bit tone parameter, in Hz,
+ *    freq = 48000 * (257 - tone) / 1024
+ * that is from 12kHz to 93.75kHz in step of 46.875 hz
+ */
+static int beep_linear_tone(struct hda_beep *beep, int hz)
+{
+	hz *= 1000; /* fixed point */
+	hz = hz - DIGBEEP_HZ_MIN;
+	if (hz < 0)
+		hz = 0; /* turn off PC beep*/
+	else if (hz >= (DIGBEEP_HZ_MAX - DIGBEEP_HZ_MIN))
+		hz = 0xff;
+	else {
+		hz /= DIGBEEP_HZ_STEP;
+		hz++;
+	}
+	return hz;
+}
+
+/* HD-audio standard beep tone parameter calculation
+ *
+ * The tone frequency in Hz is calculated as
+ *   freq = 48000 / (tone * 4)
+ * from 47Hz to 12kHz
+ */
+static int beep_standard_tone(struct hda_beep *beep, int hz)
+{
+	if (hz <= 0)
+		return 0; /* disabled */
+	hz = 12000 / hz;
+	if (hz > 0xff)
+		return 0xff;
+	if (hz <= 0)
+		return 1;
+	return hz;
+}
+
 static int snd_hda_beep_event(struct input_dev *dev, unsigned int type,
 				unsigned int code, int hz)
 {
@@ -55,21 +95,14 @@
 		if (hz)
 			hz = 1000;
 	case SND_TONE:
-		hz *= 1000; /* fixed point */
-		hz = hz - DIGBEEP_HZ_MIN;
-		if (hz < 0)
-			hz = 0; /* turn off PC beep*/
-		else if (hz >= (DIGBEEP_HZ_MAX - DIGBEEP_HZ_MIN))
-			hz = 0xff;
-		else {
-			hz /= DIGBEEP_HZ_STEP;
-			hz++;
-		}
+		if (beep->linear_tone)
+			beep->tone = beep_linear_tone(beep, hz);
+		else
+			beep->tone = beep_standard_tone(beep, hz);
 		break;
 	default:
 		return -1;
 	}
-	beep->tone = hz;
 
 	/* schedule beep event */
 	schedule_work(&beep->beep_work);
diff --git a/sound/pci/hda/hda_beep.h b/sound/pci/hda/hda_beep.h
index 51bf6a5..0c3de78 100644
--- a/sound/pci/hda/hda_beep.h
+++ b/sound/pci/hda/hda_beep.h
@@ -30,8 +30,9 @@
 	struct hda_codec *codec;
 	char phys[32];
 	int tone;
-	int nid;
-	int enabled;
+	hda_nid_t nid;
+	unsigned int enabled:1;
+	unsigned int linear_tone:1;	/* linear tone for IDT/STAC codec */
 	struct work_struct beep_work; /* scheduled task for beep event */
 };
 
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 8820faf..562403a 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -48,6 +48,7 @@
 	{ 0x1095, "Silicon Image" },
 	{ 0x10de, "Nvidia" },
 	{ 0x10ec, "Realtek" },
+	{ 0x1102, "Creative" },
 	{ 0x1106, "VIA" },
 	{ 0x111d, "IDT" },
 	{ 0x11c1, "LSI" },
@@ -157,6 +158,39 @@
 	return val;
 }
 
+/*
+ * Send and receive a verb
+ */
+static int codec_exec_verb(struct hda_codec *codec, unsigned int cmd,
+			   unsigned int *res)
+{
+	struct hda_bus *bus = codec->bus;
+	int err;
+
+	if (res)
+		*res = -1;
+ again:
+	snd_hda_power_up(codec);
+	mutex_lock(&bus->cmd_mutex);
+	err = bus->ops.command(bus, cmd);
+	if (!err && res)
+		*res = bus->ops.get_response(bus);
+	mutex_unlock(&bus->cmd_mutex);
+	snd_hda_power_down(codec);
+	if (res && *res == -1 && bus->rirb_error) {
+		if (bus->response_reset) {
+			snd_printd("hda_codec: resetting BUS due to "
+				   "fatal communication error\n");
+			bus->ops.bus_reset(bus);
+		}
+		goto again;
+	}
+	/* clear reset-flag when the communication gets recovered */
+	if (!err)
+		bus->response_reset = 0;
+	return err;
+}
+
 /**
  * snd_hda_codec_read - send a command and get the response
  * @codec: the HDA codec
@@ -173,18 +207,9 @@
 				int direct,
 				unsigned int verb, unsigned int parm)
 {
-	struct hda_bus *bus = codec->bus;
+	unsigned cmd = make_codec_cmd(codec, nid, direct, verb, parm);
 	unsigned int res;
-
-	res = make_codec_cmd(codec, nid, direct, verb, parm);
-	snd_hda_power_up(codec);
-	mutex_lock(&bus->cmd_mutex);
-	if (!bus->ops.command(bus, res))
-		res = bus->ops.get_response(bus);
-	else
-		res = (unsigned int)-1;
-	mutex_unlock(&bus->cmd_mutex);
-	snd_hda_power_down(codec);
+	codec_exec_verb(codec, cmd, &res);
 	return res;
 }
 EXPORT_SYMBOL_HDA(snd_hda_codec_read);
@@ -204,17 +229,10 @@
 int snd_hda_codec_write(struct hda_codec *codec, hda_nid_t nid, int direct,
 			 unsigned int verb, unsigned int parm)
 {
-	struct hda_bus *bus = codec->bus;
+	unsigned int cmd = make_codec_cmd(codec, nid, direct, verb, parm);
 	unsigned int res;
-	int err;
-
-	res = make_codec_cmd(codec, nid, direct, verb, parm);
-	snd_hda_power_up(codec);
-	mutex_lock(&bus->cmd_mutex);
-	err = bus->ops.command(bus, res);
-	mutex_unlock(&bus->cmd_mutex);
-	snd_hda_power_down(codec);
-	return err;
+	return codec_exec_verb(codec, cmd,
+			       codec->bus->sync_write ? &res : NULL);
 }
 EXPORT_SYMBOL_HDA(snd_hda_codec_write);
 
@@ -613,7 +631,10 @@
 	const struct hda_vendor_id *c;
 	const char *vendor = NULL;
 	u16 vendor_id = codec->vendor_id >> 16;
-	char tmp[16], name[32];
+	char tmp[16];
+
+	if (codec->vendor_name)
+		goto get_chip_name;
 
 	for (c = hda_vendor_ids; c->id; c++) {
 		if (c->id == vendor_id) {
@@ -625,14 +646,21 @@
 		sprintf(tmp, "Generic %04x", vendor_id);
 		vendor = tmp;
 	}
+	codec->vendor_name = kstrdup(vendor, GFP_KERNEL);
+	if (!codec->vendor_name)
+		return -ENOMEM;
+
+ get_chip_name:
+	if (codec->chip_name)
+		return 0;
+
 	if (codec->preset && codec->preset->name)
-		snprintf(name, sizeof(name), "%s %s", vendor,
-			 codec->preset->name);
-	else
-		snprintf(name, sizeof(name), "%s ID %x", vendor,
-			 codec->vendor_id & 0xffff);
-	codec->name = kstrdup(name, GFP_KERNEL);
-	if (!codec->name)
+		codec->chip_name = kstrdup(codec->preset->name, GFP_KERNEL);
+	else {
+		sprintf(tmp, "ID %x", codec->vendor_id & 0xffff);
+		codec->chip_name = kstrdup(tmp, GFP_KERNEL);
+	}
+	if (!codec->chip_name)
 		return -ENOMEM;
 	return 0;
 }
@@ -838,7 +866,8 @@
 	module_put(codec->owner);
 	free_hda_cache(&codec->amp_cache);
 	free_hda_cache(&codec->cmd_cache);
-	kfree(codec->name);
+	kfree(codec->vendor_name);
+	kfree(codec->chip_name);
 	kfree(codec->modelname);
 	kfree(codec->wcaps);
 	kfree(codec);
@@ -979,15 +1008,16 @@
 	int err;
 
 	codec->preset = find_codec_preset(codec);
-	if (!codec->name) {
+	if (!codec->vendor_name || !codec->chip_name) {
 		err = get_codec_name(codec);
 		if (err < 0)
 			return err;
 	}
 	/* audio codec should override the mixer name */
 	if (codec->afg || !*codec->bus->card->mixername)
-		strlcpy(codec->bus->card->mixername, codec->name,
-			sizeof(codec->bus->card->mixername));
+		snprintf(codec->bus->card->mixername,
+			 sizeof(codec->bus->card->mixername),
+			 "%s %s", codec->vendor_name, codec->chip_name);
 
 	if (is_generic_config(codec)) {
 		err = snd_hda_parse_generic_codec(codec);
@@ -1055,6 +1085,8 @@
 /* FIXME: more better hash key? */
 #define HDA_HASH_KEY(nid,dir,idx) (u32)((nid) + ((idx) << 16) + ((dir) << 24))
 #define HDA_HASH_PINCAP_KEY(nid) (u32)((nid) + (0x02 << 24))
+#define HDA_HASH_PARPCM_KEY(nid) (u32)((nid) + (0x03 << 24))
+#define HDA_HASH_PARSTR_KEY(nid) (u32)((nid) + (0x04 << 24))
 #define INFO_AMP_CAPS	(1<<0)
 #define INFO_AMP_VOL(ch)	(1 << (1 + (ch)))
 
@@ -1145,19 +1177,32 @@
 }
 EXPORT_SYMBOL_HDA(snd_hda_override_amp_caps);
 
-u32 snd_hda_query_pin_caps(struct hda_codec *codec, hda_nid_t nid)
+static unsigned int
+query_caps_hash(struct hda_codec *codec, hda_nid_t nid, u32 key,
+		unsigned int (*func)(struct hda_codec *, hda_nid_t))
 {
 	struct hda_amp_info *info;
 
-	info = get_alloc_amp_hash(codec, HDA_HASH_PINCAP_KEY(nid));
+	info = get_alloc_amp_hash(codec, key);
 	if (!info)
 		return 0;
 	if (!info->head.val) {
-		info->amp_caps = snd_hda_param_read(codec, nid, AC_PAR_PIN_CAP);
 		info->head.val |= INFO_AMP_CAPS;
+		info->amp_caps = func(codec, nid);
 	}
 	return info->amp_caps;
 }
+
+static unsigned int read_pin_cap(struct hda_codec *codec, hda_nid_t nid)
+{
+	return snd_hda_param_read(codec, nid, AC_PAR_PIN_CAP);
+}
+
+u32 snd_hda_query_pin_caps(struct hda_codec *codec, hda_nid_t nid)
+{
+	return query_caps_hash(codec, nid, HDA_HASH_PINCAP_KEY(nid),
+			       read_pin_cap);
+}
 EXPORT_SYMBOL_HDA(snd_hda_query_pin_caps);
 
 /*
@@ -1432,6 +1477,8 @@
 	memset(&id, 0, sizeof(id));
 	id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
 	id.index = idx;
+	if (snd_BUG_ON(strlen(name) >= sizeof(id.name)))
+		return NULL;
 	strcpy(id.name, name);
 	return snd_ctl_find_id(codec->bus->card, &id);
 }
@@ -2242,28 +2289,22 @@
 int snd_hda_codec_write_cache(struct hda_codec *codec, hda_nid_t nid,
 			      int direct, unsigned int verb, unsigned int parm)
 {
-	struct hda_bus *bus = codec->bus;
-	unsigned int res;
-	int err;
+	int err = snd_hda_codec_write(codec, nid, direct, verb, parm);
+	struct hda_cache_head *c;
+	u32 key;
 
-	res = make_codec_cmd(codec, nid, direct, verb, parm);
-	snd_hda_power_up(codec);
-	mutex_lock(&bus->cmd_mutex);
-	err = bus->ops.command(bus, res);
-	if (!err) {
-		struct hda_cache_head *c;
-		u32 key;
-		/* parm may contain the verb stuff for get/set amp */
-		verb = verb | (parm >> 8);
-		parm &= 0xff;
-		key = build_cmd_cache_key(nid, verb);
-		c = get_alloc_hash(&codec->cmd_cache, key);
-		if (c)
-			c->val = parm;
-	}
-	mutex_unlock(&bus->cmd_mutex);
-	snd_hda_power_down(codec);
-	return err;
+	if (err < 0)
+		return err;
+	/* parm may contain the verb stuff for get/set amp */
+	verb = verb | (parm >> 8);
+	parm &= 0xff;
+	key = build_cmd_cache_key(nid, verb);
+	mutex_lock(&codec->bus->cmd_mutex);
+	c = get_alloc_hash(&codec->cmd_cache, key);
+	if (c)
+		c->val = parm;
+	mutex_unlock(&codec->bus->cmd_mutex);
+	return 0;
 }
 EXPORT_SYMBOL_HDA(snd_hda_codec_write_cache);
 
@@ -2321,7 +2362,8 @@
 		if (wcaps & AC_WCAP_POWER) {
 			unsigned int wid_type = (wcaps & AC_WCAP_TYPE) >>
 				AC_WCAP_TYPE_SHIFT;
-			if (wid_type == AC_WID_PIN) {
+			if (power_state == AC_PWRST_D3 &&
+			    wid_type == AC_WID_PIN) {
 				unsigned int pincap;
 				/*
 				 * don't power down the widget if it controls
@@ -2333,7 +2375,7 @@
 						nid, 0,
 						AC_VERB_GET_EAPD_BTLENABLE, 0);
 					eapd &= 0x02;
-					if (power_state == AC_PWRST_D3 && eapd)
+					if (eapd)
 						continue;
 				}
 			}
@@ -2544,6 +2586,41 @@
 }
 EXPORT_SYMBOL_HDA(snd_hda_calc_stream_format);
 
+static unsigned int get_pcm_param(struct hda_codec *codec, hda_nid_t nid)
+{
+	unsigned int val = 0;
+	if (nid != codec->afg &&
+	    (get_wcaps(codec, nid) & AC_WCAP_FORMAT_OVRD))
+		val = snd_hda_param_read(codec, nid, AC_PAR_PCM);
+	if (!val || val == -1)
+		val = snd_hda_param_read(codec, codec->afg, AC_PAR_PCM);
+	if (!val || val == -1)
+		return 0;
+	return val;
+}
+
+static unsigned int query_pcm_param(struct hda_codec *codec, hda_nid_t nid)
+{
+	return query_caps_hash(codec, nid, HDA_HASH_PARPCM_KEY(nid),
+			       get_pcm_param);
+}
+
+static unsigned int get_stream_param(struct hda_codec *codec, hda_nid_t nid)
+{
+	unsigned int streams = snd_hda_param_read(codec, nid, AC_PAR_STREAM);
+	if (!streams || streams == -1)
+		streams = snd_hda_param_read(codec, codec->afg, AC_PAR_STREAM);
+	if (!streams || streams == -1)
+		return 0;
+	return streams;
+}
+
+static unsigned int query_stream_param(struct hda_codec *codec, hda_nid_t nid)
+{
+	return query_caps_hash(codec, nid, HDA_HASH_PARSTR_KEY(nid),
+			       get_stream_param);
+}
+
 /**
  * snd_hda_query_supported_pcm - query the supported PCM rates and formats
  * @codec: the HDA codec
@@ -2562,15 +2639,8 @@
 {
 	unsigned int i, val, wcaps;
 
-	val = 0;
 	wcaps = get_wcaps(codec, nid);
-	if (nid != codec->afg && (wcaps & AC_WCAP_FORMAT_OVRD)) {
-		val = snd_hda_param_read(codec, nid, AC_PAR_PCM);
-		if (val == -1)
-			return -EIO;
-	}
-	if (!val)
-		val = snd_hda_param_read(codec, codec->afg, AC_PAR_PCM);
+	val = query_pcm_param(codec, nid);
 
 	if (ratesp) {
 		u32 rates = 0;
@@ -2592,15 +2662,9 @@
 		u64 formats = 0;
 		unsigned int streams, bps;
 
-		streams = snd_hda_param_read(codec, nid, AC_PAR_STREAM);
-		if (streams == -1)
+		streams = query_stream_param(codec, nid);
+		if (!streams)
 			return -EIO;
-		if (!streams) {
-			streams = snd_hda_param_read(codec, codec->afg,
-						     AC_PAR_STREAM);
-			if (streams == -1)
-				return -EIO;
-		}
 
 		bps = 0;
 		if (streams & AC_SUPFMT_PCM) {
@@ -2674,17 +2738,9 @@
 	int i;
 	unsigned int val = 0, rate, stream;
 
-	if (nid != codec->afg &&
-	    (get_wcaps(codec, nid) & AC_WCAP_FORMAT_OVRD)) {
-		val = snd_hda_param_read(codec, nid, AC_PAR_PCM);
-		if (val == -1)
-			return 0;
-	}
-	if (!val) {
-		val = snd_hda_param_read(codec, codec->afg, AC_PAR_PCM);
-		if (val == -1)
-			return 0;
-	}
+	val = query_pcm_param(codec, nid);
+	if (!val)
+		return 0;
 
 	rate = format & 0xff00;
 	for (i = 0; i < AC_PAR_PCM_RATE_BITS; i++)
@@ -2696,12 +2752,8 @@
 	if (i >= AC_PAR_PCM_RATE_BITS)
 		return 0;
 
-	stream = snd_hda_param_read(codec, nid, AC_PAR_STREAM);
-	if (stream == -1)
-		return 0;
-	if (!stream && nid != codec->afg)
-		stream = snd_hda_param_read(codec, codec->afg, AC_PAR_STREAM);
-	if (!stream || stream == -1)
+	stream = query_stream_param(codec, nid);
+	if (!stream)
 		return 0;
 
 	if (stream & AC_SUPFMT_PCM) {
@@ -3835,11 +3887,10 @@
 /**
  * snd_hda_suspend - suspend the codecs
  * @bus: the HDA bus
- * @state: suspsend state
  *
  * Returns 0 if successful.
  */
-int snd_hda_suspend(struct hda_bus *bus, pm_message_t state)
+int snd_hda_suspend(struct hda_bus *bus)
 {
 	struct hda_codec *codec;
 
diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h
index 2fdecf4..cad79ef 100644
--- a/sound/pci/hda/hda_codec.h
+++ b/sound/pci/hda/hda_codec.h
@@ -574,6 +574,8 @@
 	/* attach a PCM stream */
 	int (*attach_pcm)(struct hda_bus *bus, struct hda_codec *codec,
 			  struct hda_pcm *pcm);
+	/* reset bus for retry verb */
+	void (*bus_reset)(struct hda_bus *bus);
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 	/* notify power-up/down from codec to controller */
 	void (*pm_notify)(struct hda_bus *bus);
@@ -622,7 +624,13 @@
 
 	/* misc op flags */
 	unsigned int needs_damn_long_delay :1;
+	unsigned int allow_bus_reset:1;	/* allow bus reset at fatal error */
+	unsigned int sync_write:1;	/* sync after verb write */
+	/* status for codec/controller */
 	unsigned int shutdown :1;	/* being unloaded */
+	unsigned int rirb_error:1;	/* error in codec communication */
+	unsigned int response_reset:1;	/* controller was reset */
+	unsigned int in_reset:1;	/* during reset operation */
 };
 
 /*
@@ -747,7 +755,8 @@
 	/* detected preset */
 	const struct hda_codec_preset *preset;
 	struct module *owner;
-	const char *name;	/* codec name */
+	const char *vendor_name;	/* codec vendor name */
+	const char *chip_name;		/* codec chip name */
 	const char *modelname;	/* model name for preset */
 
 	/* set by patch */
@@ -905,7 +914,7 @@
  * power management
  */
 #ifdef CONFIG_PM
-int snd_hda_suspend(struct hda_bus *bus, pm_message_t state);
+int snd_hda_suspend(struct hda_bus *bus);
 int snd_hda_resume(struct hda_bus *bus);
 #endif
 
diff --git a/sound/pci/hda/hda_hwdep.c b/sound/pci/hda/hda_hwdep.c
index 1c57505..6812fbe 100644
--- a/sound/pci/hda/hda_hwdep.c
+++ b/sound/pci/hda/hda_hwdep.c
@@ -242,7 +242,8 @@
 CODEC_INFO_SHOW(revision_id);
 CODEC_INFO_SHOW(afg);
 CODEC_INFO_SHOW(mfg);
-CODEC_INFO_STR_SHOW(name);
+CODEC_INFO_STR_SHOW(vendor_name);
+CODEC_INFO_STR_SHOW(chip_name);
 CODEC_INFO_STR_SHOW(modelname);
 
 #define CODEC_INFO_STORE(type)					\
@@ -275,7 +276,8 @@
 CODEC_INFO_STORE(vendor_id);
 CODEC_INFO_STORE(subsystem_id);
 CODEC_INFO_STORE(revision_id);
-CODEC_INFO_STR_STORE(name);
+CODEC_INFO_STR_STORE(vendor_name);
+CODEC_INFO_STR_STORE(chip_name);
 CODEC_INFO_STR_STORE(modelname);
 
 #define CODEC_ACTION_STORE(type)				\
@@ -499,7 +501,8 @@
 	CODEC_ATTR_RW(revision_id),
 	CODEC_ATTR_RO(afg),
 	CODEC_ATTR_RO(mfg),
-	CODEC_ATTR_RW(name),
+	CODEC_ATTR_RW(vendor_name),
+	CODEC_ATTR_RW(chip_name),
 	CODEC_ATTR_RW(modelname),
 	CODEC_ATTR_RW(init_verbs),
 	CODEC_ATTR_RW(hints),
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 3128e1a..4e9ea70 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -128,21 +128,33 @@
 			 "{ULI, M5461}}");
 MODULE_DESCRIPTION("Intel HDA driver");
 
+#ifdef CONFIG_SND_VERBOSE_PRINTK
+#define SFX	/* nop */
+#else
 #define SFX	"hda-intel: "
-
+#endif
 
 /*
  * registers
  */
 #define ICH6_REG_GCAP			0x00
+#define   ICH6_GCAP_64OK	(1 << 0)   /* 64bit address support */
+#define   ICH6_GCAP_NSDO	(3 << 1)   /* # of serial data out signals */
+#define   ICH6_GCAP_BSS		(31 << 3)  /* # of bidirectional streams */
+#define   ICH6_GCAP_ISS		(15 << 8)  /* # of input streams */
+#define   ICH6_GCAP_OSS		(15 << 12) /* # of output streams */
 #define ICH6_REG_VMIN			0x02
 #define ICH6_REG_VMAJ			0x03
 #define ICH6_REG_OUTPAY			0x04
 #define ICH6_REG_INPAY			0x06
 #define ICH6_REG_GCTL			0x08
+#define   ICH6_GCTL_RESET	(1 << 0)   /* controller reset */
+#define   ICH6_GCTL_FCNTRL	(1 << 1)   /* flush control */
+#define   ICH6_GCTL_UNSOL	(1 << 8)   /* accept unsol. response enable */
 #define ICH6_REG_WAKEEN			0x0c
 #define ICH6_REG_STATESTS		0x0e
 #define ICH6_REG_GSTS			0x10
+#define   ICH6_GSTS_FSTS	(1 << 1)   /* flush status */
 #define ICH6_REG_INTCTL			0x20
 #define ICH6_REG_INTSTS			0x24
 #define ICH6_REG_WALCLK			0x30
@@ -150,17 +162,27 @@
 #define ICH6_REG_CORBLBASE		0x40
 #define ICH6_REG_CORBUBASE		0x44
 #define ICH6_REG_CORBWP			0x48
-#define ICH6_REG_CORBRP			0x4A
+#define ICH6_REG_CORBRP			0x4a
+#define   ICH6_CORBRP_RST	(1 << 15)  /* read pointer reset */
 #define ICH6_REG_CORBCTL		0x4c
+#define   ICH6_CORBCTL_RUN	(1 << 1)   /* enable DMA */
+#define   ICH6_CORBCTL_CMEIE	(1 << 0)   /* enable memory error irq */
 #define ICH6_REG_CORBSTS		0x4d
+#define   ICH6_CORBSTS_CMEI	(1 << 0)   /* memory error indication */
 #define ICH6_REG_CORBSIZE		0x4e
 
 #define ICH6_REG_RIRBLBASE		0x50
 #define ICH6_REG_RIRBUBASE		0x54
 #define ICH6_REG_RIRBWP			0x58
+#define   ICH6_RIRBWP_RST	(1 << 15)  /* write pointer reset */
 #define ICH6_REG_RINTCNT		0x5a
 #define ICH6_REG_RIRBCTL		0x5c
+#define   ICH6_RBCTL_IRQ_EN	(1 << 0)   /* enable IRQ */
+#define   ICH6_RBCTL_DMA_EN	(1 << 1)   /* enable DMA */
+#define   ICH6_RBCTL_OVERRUN_EN	(1 << 2)   /* enable overrun irq */
 #define ICH6_REG_RIRBSTS		0x5d
+#define   ICH6_RBSTS_IRQ	(1 << 0)   /* response irq */
+#define   ICH6_RBSTS_OVERRUN	(1 << 2)   /* overrun irq */
 #define ICH6_REG_RIRBSIZE		0x5e
 
 #define ICH6_REG_IC			0x60
@@ -257,16 +279,6 @@
 #define ICH6_INT_CTRL_EN	0x40000000 /* controller interrupt enable bit */
 #define ICH6_INT_GLOBAL_EN	0x80000000 /* global interrupt enable bit */
 
-/* GCTL unsolicited response enable bit */
-#define ICH6_GCTL_UREN		(1<<8)
-
-/* GCTL reset bit */
-#define ICH6_GCTL_RESET		(1<<0)
-
-/* CORB/RIRB control, read/write pointer */
-#define ICH6_RBCTL_DMA_EN	0x02	/* enable DMA */
-#define ICH6_RBCTL_IRQ_EN	0x01	/* enable IRQ */
-#define ICH6_RBRWP_CLR		0x8000	/* read/write pointer clear */
 /* below are so far hardcoded - should read registers in future */
 #define ICH6_MAX_CORB_ENTRIES	256
 #define ICH6_MAX_RIRB_ENTRIES	256
@@ -512,25 +524,25 @@
 	/* set the corb write pointer to 0 */
 	azx_writew(chip, CORBWP, 0);
 	/* reset the corb hw read pointer */
-	azx_writew(chip, CORBRP, ICH6_RBRWP_CLR);
+	azx_writew(chip, CORBRP, ICH6_CORBRP_RST);
 	/* enable corb dma */
-	azx_writeb(chip, CORBCTL, ICH6_RBCTL_DMA_EN);
+	azx_writeb(chip, CORBCTL, ICH6_CORBCTL_RUN);
 
 	/* RIRB set up */
 	chip->rirb.addr = chip->rb.addr + 2048;
 	chip->rirb.buf = (u32 *)(chip->rb.area + 2048);
+	chip->rirb.wp = chip->rirb.rp = chip->rirb.cmds = 0;
 	azx_writel(chip, RIRBLBASE, (u32)chip->rirb.addr);
 	azx_writel(chip, RIRBUBASE, upper_32_bits(chip->rirb.addr));
 
 	/* set the rirb size to 256 entries (ULI requires explicitly) */
 	azx_writeb(chip, RIRBSIZE, 0x02);
 	/* reset the rirb hw write pointer */
-	azx_writew(chip, RIRBWP, ICH6_RBRWP_CLR);
+	azx_writew(chip, RIRBWP, ICH6_RIRBWP_RST);
 	/* set N=1, get RIRB response interrupt for new entry */
 	azx_writew(chip, RINTCNT, 1);
 	/* enable rirb dma and response irq */
 	azx_writeb(chip, RIRBCTL, ICH6_RBCTL_DMA_EN | ICH6_RBCTL_IRQ_EN);
-	chip->rirb.rp = chip->rirb.cmds = 0;
 }
 
 static void azx_free_cmd_io(struct azx *chip)
@@ -606,6 +618,7 @@
 		}
 		if (!chip->rirb.cmds) {
 			smp_rmb();
+			bus->rirb_error = 0;
 			return chip->rirb.res; /* the last value */
 		}
 		if (time_after(jiffies, timeout))
@@ -619,19 +632,21 @@
 	}
 
 	if (chip->msi) {
-		snd_printk(KERN_WARNING "hda_intel: No response from codec, "
+		snd_printk(KERN_WARNING SFX "No response from codec, "
 			   "disabling MSI: last cmd=0x%08x\n", chip->last_cmd);
 		free_irq(chip->irq, chip);
 		chip->irq = -1;
 		pci_disable_msi(chip->pci);
 		chip->msi = 0;
-		if (azx_acquire_irq(chip, 1) < 0)
+		if (azx_acquire_irq(chip, 1) < 0) {
+			bus->rirb_error = 1;
 			return -1;
+		}
 		goto again;
 	}
 
 	if (!chip->polling_mode) {
-		snd_printk(KERN_WARNING "hda_intel: azx_get_response timeout, "
+		snd_printk(KERN_WARNING SFX "azx_get_response timeout, "
 			   "switching to polling mode: last cmd=0x%08x\n",
 			   chip->last_cmd);
 		chip->polling_mode = 1;
@@ -646,14 +661,23 @@
 		return -1;
 	}
 
+	/* a fatal communication error; need either to reset or to fallback
+	 * to the single_cmd mode
+	 */
+	bus->rirb_error = 1;
+	if (bus->allow_bus_reset && !bus->response_reset && !bus->in_reset) {
+		bus->response_reset = 1;
+		return -1; /* give a chance to retry */
+	}
+
 	snd_printk(KERN_ERR "hda_intel: azx_get_response timeout, "
 		   "switching to single_cmd mode: last cmd=0x%08x\n",
 		   chip->last_cmd);
-	chip->rirb.rp = azx_readb(chip, RIRBWP);
-	chip->rirb.cmds = 0;
-	/* switch to single_cmd mode */
 	chip->single_cmd = 1;
+	bus->response_reset = 0;
+	/* re-initialize CORB/RIRB */
 	azx_free_cmd_io(chip);
+	azx_init_cmd_io(chip);
 	return -1;
 }
 
@@ -667,12 +691,34 @@
  *       I left the codes, however, for debugging/testing purposes.
  */
 
+/* receive a response */
+static int azx_single_wait_for_response(struct azx *chip)
+{
+	int timeout = 50;
+
+	while (timeout--) {
+		/* check IRV busy bit */
+		if (azx_readw(chip, IRS) & ICH6_IRS_VALID) {
+			/* reuse rirb.res as the response return value */
+			chip->rirb.res = azx_readl(chip, IR);
+			return 0;
+		}
+		udelay(1);
+	}
+	if (printk_ratelimit())
+		snd_printd(SFX "get_response timeout: IRS=0x%x\n",
+			   azx_readw(chip, IRS));
+	chip->rirb.res = -1;
+	return -EIO;
+}
+
 /* send a command */
 static int azx_single_send_cmd(struct hda_bus *bus, u32 val)
 {
 	struct azx *chip = bus->private_data;
 	int timeout = 50;
 
+	bus->rirb_error = 0;
 	while (timeout--) {
 		/* check ICB busy bit */
 		if (!((azx_readw(chip, IRS) & ICH6_IRS_BUSY))) {
@@ -682,7 +728,7 @@
 			azx_writel(chip, IC, val);
 			azx_writew(chip, IRS, azx_readw(chip, IRS) |
 				   ICH6_IRS_BUSY);
-			return 0;
+			return azx_single_wait_for_response(chip);
 		}
 		udelay(1);
 	}
@@ -696,18 +742,7 @@
 static unsigned int azx_single_get_response(struct hda_bus *bus)
 {
 	struct azx *chip = bus->private_data;
-	int timeout = 50;
-
-	while (timeout--) {
-		/* check IRV busy bit */
-		if (azx_readw(chip, IRS) & ICH6_IRS_VALID)
-			return azx_readl(chip, IR);
-		udelay(1);
-	}
-	if (printk_ratelimit())
-		snd_printd(SFX "get_response timeout: IRS=0x%x\n",
-			   azx_readw(chip, IRS));
-	return (unsigned int)-1;
+	return chip->rirb.res;
 }
 
 /*
@@ -775,17 +810,17 @@
 
 	/* check to see if controller is ready */
 	if (!azx_readb(chip, GCTL)) {
-		snd_printd("azx_reset: controller not ready!\n");
+		snd_printd(SFX "azx_reset: controller not ready!\n");
 		return -EBUSY;
 	}
 
 	/* Accept unsolicited responses */
-	azx_writel(chip, GCTL, azx_readl(chip, GCTL) | ICH6_GCTL_UREN);
+	azx_writel(chip, GCTL, azx_readl(chip, GCTL) | ICH6_GCTL_UNSOL);
 
 	/* detect codecs */
 	if (!chip->codec_mask) {
 		chip->codec_mask = azx_readw(chip, STATESTS);
-		snd_printdd("codec_mask = 0x%x\n", chip->codec_mask);
+		snd_printdd(SFX "codec_mask = 0x%x\n", chip->codec_mask);
 	}
 
 	return 0;
@@ -895,8 +930,7 @@
 	azx_int_enable(chip);
 
 	/* initialize the codec command I/O */
-	if (!chip->single_cmd)
-		azx_init_cmd_io(chip);
+	azx_init_cmd_io(chip);
 
 	/* program the position buffer */
 	azx_writel(chip, DPLBASE, (u32)chip->posbuf.addr);
@@ -953,12 +987,12 @@
 	case AZX_DRIVER_SCH:
 		pci_read_config_word(chip->pci, INTEL_SCH_HDA_DEVC, &snoop);
 		if (snoop & INTEL_SCH_HDA_DEVC_NOSNOOP) {
-			pci_write_config_word(chip->pci, INTEL_SCH_HDA_DEVC, \
+			pci_write_config_word(chip->pci, INTEL_SCH_HDA_DEVC,
 				snoop & (~INTEL_SCH_HDA_DEVC_NOSNOOP));
 			pci_read_config_word(chip->pci,
 				INTEL_SCH_HDA_DEVC, &snoop);
-			snd_printdd("HDA snoop disabled, enabling ... %s\n",\
-				(snoop & INTEL_SCH_HDA_DEVC_NOSNOOP) \
+			snd_printdd(SFX "HDA snoop disabled, enabling ... %s\n",
+				(snoop & INTEL_SCH_HDA_DEVC_NOSNOOP)
 				? "Failed" : "OK");
 		}
 		break;
@@ -1012,7 +1046,7 @@
 	/* clear rirb int */
 	status = azx_readb(chip, RIRBSTS);
 	if (status & RIRB_INT_MASK) {
-		if (!chip->single_cmd && (status & RIRB_INT_RESPONSE))
+		if (status & RIRB_INT_RESPONSE)
 			azx_update_rirb(chip);
 		azx_writeb(chip, RIRBSTS, RIRB_INT_MASK);
 	}
@@ -1098,7 +1132,7 @@
 				pos_align;
 		pos_adj = frames_to_bytes(runtime, pos_adj);
 		if (pos_adj >= period_bytes) {
-			snd_printk(KERN_WARNING "Too big adjustment %d\n",
+			snd_printk(KERN_WARNING SFX "Too big adjustment %d\n",
 				   bdl_pos_adj[chip->dev_index]);
 			pos_adj = 0;
 		} else {
@@ -1122,7 +1156,7 @@
 	return 0;
 
  error:
-	snd_printk(KERN_ERR "Too many BDL entries: buffer=%d, period=%d\n",
+	snd_printk(KERN_ERR SFX "Too many BDL entries: buffer=%d, period=%d\n",
 		   azx_dev->bufsize, period_bytes);
 	return -EINVAL;
 }
@@ -1215,7 +1249,7 @@
 	chip->probing = 0;
 	if (res == -1)
 		return -EIO;
-	snd_printdd("hda_intel: codec #%d probed OK\n", addr);
+	snd_printdd(SFX "codec #%d probed OK\n", addr);
 	return 0;
 }
 
@@ -1223,6 +1257,26 @@
 				 struct hda_pcm *cpcm);
 static void azx_stop_chip(struct azx *chip);
 
+static void azx_bus_reset(struct hda_bus *bus)
+{
+	struct azx *chip = bus->private_data;
+
+	bus->in_reset = 1;
+	azx_stop_chip(chip);
+	azx_init_chip(chip);
+#ifdef CONFIG_PM
+	if (chip->initialized) {
+		int i;
+
+		for (i = 0; i < AZX_MAX_PCMS; i++)
+			snd_pcm_suspend_all(chip->pcm[i]);
+		snd_hda_suspend(chip->bus);
+		snd_hda_resume(chip->bus);
+	}
+#endif
+	bus->in_reset = 0;
+}
+
 /*
  * Codec initialization
  */
@@ -1246,6 +1300,7 @@
 	bus_temp.ops.command = azx_send_cmd;
 	bus_temp.ops.get_response = azx_get_response;
 	bus_temp.ops.attach_pcm = azx_attach_pcm_stream;
+	bus_temp.ops.bus_reset = azx_bus_reset;
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 	bus_temp.power_save = &power_save;
 	bus_temp.ops.pm_notify = azx_power_notify;
@@ -1270,8 +1325,8 @@
 				/* Some BIOSen give you wrong codec addresses
 				 * that don't exist
 				 */
-				snd_printk(KERN_WARNING
-					   "hda_intel: Codec #%d probe error; "
+				snd_printk(KERN_WARNING SFX
+					   "Codec #%d probe error; "
 					   "disabling it...\n", c);
 				chip->codec_mask &= ~(1 << c);
 				/* More badly, accessing to a non-existing
@@ -1487,7 +1542,7 @@
 	bufsize = snd_pcm_lib_buffer_bytes(substream);
 	period_bytes = snd_pcm_lib_period_bytes(substream);
 
-	snd_printdd("azx_pcm_prepare: bufsize=0x%x, format=0x%x\n",
+	snd_printdd(SFX "azx_pcm_prepare: bufsize=0x%x, format=0x%x\n",
 		    bufsize, format_val);
 
 	if (bufsize != azx_dev->bufsize ||
@@ -1830,7 +1885,7 @@
 			  &pcm);
 	if (err < 0)
 		return err;
-	strcpy(pcm->name, cpcm->name);
+	strlcpy(pcm->name, cpcm->name, sizeof(pcm->name));
 	apcm = kzalloc(sizeof(*apcm), GFP_KERNEL);
 	if (apcm == NULL)
 		return -ENOMEM;
@@ -1973,7 +2028,7 @@
 	for (i = 0; i < AZX_MAX_PCMS; i++)
 		snd_pcm_suspend_all(chip->pcm[i]);
 	if (chip->initialized)
-		snd_hda_suspend(chip->bus, state);
+		snd_hda_suspend(chip->bus);
 	azx_stop_chip(chip);
 	if (chip->irq >= 0) {
 		free_irq(chip->irq, chip);
@@ -2265,14 +2320,14 @@
 	synchronize_irq(chip->irq);
 
 	gcap = azx_readw(chip, GCAP);
-	snd_printdd("chipset global capabilities = 0x%x\n", gcap);
+	snd_printdd(SFX "chipset global capabilities = 0x%x\n", gcap);
 
 	/* ATI chips seems buggy about 64bit DMA addresses */
 	if (chip->driver_type == AZX_DRIVER_ATI)
-		gcap &= ~0x01;
+		gcap &= ~ICH6_GCAP_64OK;
 
 	/* allow 64bit DMA address if supported by H/W */
-	if ((gcap & 0x01) && !pci_set_dma_mask(pci, DMA_BIT_MASK(64)))
+	if ((gcap & ICH6_GCAP_64OK) && !pci_set_dma_mask(pci, DMA_BIT_MASK(64)))
 		pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(64));
 	else {
 		pci_set_dma_mask(pci, DMA_BIT_MASK(32));
@@ -2309,7 +2364,7 @@
 	chip->azx_dev = kcalloc(chip->num_streams, sizeof(*chip->azx_dev),
 				GFP_KERNEL);
 	if (!chip->azx_dev) {
-		snd_printk(KERN_ERR "cannot malloc azx_dev\n");
+		snd_printk(KERN_ERR SFX "cannot malloc azx_dev\n");
 		goto errout;
 	}
 
@@ -2332,11 +2387,9 @@
 		goto errout;
 	}
 	/* allocate CORB/RIRB */
-	if (!chip->single_cmd) {
-		err = azx_alloc_cmd_io(chip);
-		if (err < 0)
-			goto errout;
-	}
+	err = azx_alloc_cmd_io(chip);
+	if (err < 0)
+		goto errout;
 
 	/* initialize streams */
 	azx_init_stream(chip);
@@ -2359,9 +2412,11 @@
 	}
 
 	strcpy(card->driver, "HDA-Intel");
-	strcpy(card->shortname, driver_short_names[chip->driver_type]);
-	sprintf(card->longname, "%s at 0x%lx irq %i",
-		card->shortname, chip->addr, chip->irq);
+	strlcpy(card->shortname, driver_short_names[chip->driver_type],
+		sizeof(card->shortname));
+	snprintf(card->longname, sizeof(card->longname),
+		 "%s at 0x%lx irq %i",
+		 card->shortname, chip->addr, chip->irq);
 
 	*rchip = chip;
 	return 0;
@@ -2514,6 +2569,20 @@
 	{ PCI_DEVICE(0x10de, 0x0d97), .driver_data = AZX_DRIVER_NVIDIA },
 	/* Teradici */
 	{ PCI_DEVICE(0x6549, 0x1200), .driver_data = AZX_DRIVER_TERA },
+	/* Creative X-Fi (CA0110-IBG) */
+#if !defined(CONFIG_SND_CTXFI) && !defined(CONFIG_SND_CTXFI_MODULE)
+	/* the following entry conflicts with snd-ctxfi driver,
+	 * as ctxfi driver mutates from HD-audio to native mode with
+	 * a special command sequence.
+	 */
+	{ PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_ANY_ID),
+	  .class = PCI_CLASS_MULTIMEDIA_HD_AUDIO << 8,
+	  .class_mask = 0xffffff,
+	  .driver_data = AZX_DRIVER_GENERIC },
+#else
+	/* this entry seems still valid -- i.e. without emu20kx chip */
+	{ PCI_DEVICE(0x1102, 0x0009), .driver_data = AZX_DRIVER_GENERIC },
+#endif
 	/* AMD Generic, PCI class code and Vendor ID for HD Audio */
 	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_ANY_ID),
 	  .class = PCI_CLASS_MULTIMEDIA_HD_AUDIO << 8,
diff --git a/sound/pci/hda/hda_proc.c b/sound/pci/hda/hda_proc.c
index 93d7499..418c5d1 100644
--- a/sound/pci/hda/hda_proc.c
+++ b/sound/pci/hda/hda_proc.c
@@ -466,8 +466,12 @@
 	hda_nid_t nid;
 	int i, nodes;
 
-	snd_iprintf(buffer, "Codec: %s\n",
-		    codec->name ? codec->name : "Not Set");
+	snd_iprintf(buffer, "Codec: ");
+	if (codec->vendor_name && codec->chip_name)
+		snd_iprintf(buffer, "%s %s\n",
+			    codec->vendor_name, codec->chip_name);
+	else
+		snd_iprintf(buffer, "Not Set\n");
 	snd_iprintf(buffer, "Address: %d\n", codec->addr);
 	snd_iprintf(buffer, "Function Id: 0x%x\n", codec->function_id);
 	snd_iprintf(buffer, "Vendor Id: 0x%08x\n", codec->vendor_id);
diff --git a/sound/pci/hda/patch_ca0110.c b/sound/pci/hda/patch_ca0110.c
new file mode 100644
index 0000000..392d108
--- /dev/null
+++ b/sound/pci/hda/patch_ca0110.c
@@ -0,0 +1,573 @@
+/*
+ * HD audio interface patch for Creative X-Fi CA0110-IBG chip
+ *
+ * Copyright (c) 2008 Takashi Iwai <tiwai@suse.de>
+ *
+ *  This driver is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This driver is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <sound/core.h>
+#include "hda_codec.h"
+#include "hda_local.h"
+
+/*
+ */
+
+struct ca0110_spec {
+	struct auto_pin_cfg autocfg;
+	struct hda_multi_out multiout;
+	hda_nid_t out_pins[AUTO_CFG_MAX_OUTS];
+	hda_nid_t dacs[AUTO_CFG_MAX_OUTS];
+	hda_nid_t hp_dac;
+	hda_nid_t input_pins[AUTO_PIN_LAST];
+	hda_nid_t adcs[AUTO_PIN_LAST];
+	hda_nid_t dig_out;
+	hda_nid_t dig_in;
+	unsigned int num_inputs;
+	const char *input_labels[AUTO_PIN_LAST];
+	struct hda_pcm pcm_rec[2];	/* PCM information */
+};
+
+/*
+ * PCM callbacks
+ */
+static int ca0110_playback_pcm_open(struct hda_pcm_stream *hinfo,
+				    struct hda_codec *codec,
+				    struct snd_pcm_substream *substream)
+{
+	struct ca0110_spec *spec = codec->spec;
+	return snd_hda_multi_out_analog_open(codec, &spec->multiout, substream,
+					     hinfo);
+}
+
+static int ca0110_playback_pcm_prepare(struct hda_pcm_stream *hinfo,
+				       struct hda_codec *codec,
+				       unsigned int stream_tag,
+				       unsigned int format,
+				       struct snd_pcm_substream *substream)
+{
+	struct ca0110_spec *spec = codec->spec;
+	return snd_hda_multi_out_analog_prepare(codec, &spec->multiout,
+						stream_tag, format, substream);
+}
+
+static int ca0110_playback_pcm_cleanup(struct hda_pcm_stream *hinfo,
+				       struct hda_codec *codec,
+				       struct snd_pcm_substream *substream)
+{
+	struct ca0110_spec *spec = codec->spec;
+	return snd_hda_multi_out_analog_cleanup(codec, &spec->multiout);
+}
+
+/*
+ * Digital out
+ */
+static int ca0110_dig_playback_pcm_open(struct hda_pcm_stream *hinfo,
+					struct hda_codec *codec,
+					struct snd_pcm_substream *substream)
+{
+	struct ca0110_spec *spec = codec->spec;
+	return snd_hda_multi_out_dig_open(codec, &spec->multiout);
+}
+
+static int ca0110_dig_playback_pcm_close(struct hda_pcm_stream *hinfo,
+					 struct hda_codec *codec,
+					 struct snd_pcm_substream *substream)
+{
+	struct ca0110_spec *spec = codec->spec;
+	return snd_hda_multi_out_dig_close(codec, &spec->multiout);
+}
+
+static int ca0110_dig_playback_pcm_prepare(struct hda_pcm_stream *hinfo,
+					   struct hda_codec *codec,
+					   unsigned int stream_tag,
+					   unsigned int format,
+					   struct snd_pcm_substream *substream)
+{
+	struct ca0110_spec *spec = codec->spec;
+	return snd_hda_multi_out_dig_prepare(codec, &spec->multiout, stream_tag,
+					     format, substream);
+}
+
+/*
+ * Analog capture
+ */
+static int ca0110_capture_pcm_prepare(struct hda_pcm_stream *hinfo,
+				      struct hda_codec *codec,
+				      unsigned int stream_tag,
+				      unsigned int format,
+				      struct snd_pcm_substream *substream)
+{
+	struct ca0110_spec *spec = codec->spec;
+
+	snd_hda_codec_setup_stream(codec, spec->adcs[substream->number],
+				   stream_tag, 0, format);
+	return 0;
+}
+
+static int ca0110_capture_pcm_cleanup(struct hda_pcm_stream *hinfo,
+				      struct hda_codec *codec,
+				      struct snd_pcm_substream *substream)
+{
+	struct ca0110_spec *spec = codec->spec;
+
+	snd_hda_codec_cleanup_stream(codec, spec->adcs[substream->number]);
+	return 0;
+}
+
+/*
+ */
+
+static char *dirstr[2] = { "Playback", "Capture" };
+
+static int _add_switch(struct hda_codec *codec, hda_nid_t nid, const char *pfx,
+		       int chan, int dir)
+{
+	char namestr[44];
+	int type = dir ? HDA_INPUT : HDA_OUTPUT;
+	struct snd_kcontrol_new knew =
+		HDA_CODEC_MUTE_MONO(namestr, nid, chan, 0, type);
+	sprintf(namestr, "%s %s Switch", pfx, dirstr[dir]);
+	return snd_hda_ctl_add(codec, snd_ctl_new1(&knew, codec));
+}
+
+static int _add_volume(struct hda_codec *codec, hda_nid_t nid, const char *pfx,
+		       int chan, int dir)
+{
+	char namestr[44];
+	int type = dir ? HDA_INPUT : HDA_OUTPUT;
+	struct snd_kcontrol_new knew =
+		HDA_CODEC_VOLUME_MONO(namestr, nid, chan, 0, type);
+	sprintf(namestr, "%s %s Volume", pfx, dirstr[dir]);
+	return snd_hda_ctl_add(codec, snd_ctl_new1(&knew, codec));
+}
+
+#define add_out_switch(codec, nid, pfx)	_add_switch(codec, nid, pfx, 3, 0)
+#define add_out_volume(codec, nid, pfx)	_add_volume(codec, nid, pfx, 3, 0)
+#define add_in_switch(codec, nid, pfx)	_add_switch(codec, nid, pfx, 3, 1)
+#define add_in_volume(codec, nid, pfx)	_add_volume(codec, nid, pfx, 3, 1)
+#define add_mono_switch(codec, nid, pfx, chan) \
+	_add_switch(codec, nid, pfx, chan, 0)
+#define add_mono_volume(codec, nid, pfx, chan) \
+	_add_volume(codec, nid, pfx, chan, 0)
+
+static int ca0110_build_controls(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+	static char *prefix[AUTO_CFG_MAX_OUTS] = {
+		"Front", "Surround", NULL, "Side", "Multi"
+	};
+	hda_nid_t mutenid;
+	int i, err;
+
+	for (i = 0; i < spec->multiout.num_dacs; i++) {
+		if (get_wcaps(codec, spec->out_pins[i]) & AC_WCAP_OUT_AMP)
+			mutenid = spec->out_pins[i];
+		else
+			mutenid = spec->multiout.dac_nids[i];
+		if (!prefix[i]) {
+			err = add_mono_switch(codec, mutenid,
+					      "Center", 1);
+			if (err < 0)
+				return err;
+			err = add_mono_switch(codec, mutenid,
+					      "LFE", 1);
+			if (err < 0)
+				return err;
+			err = add_mono_volume(codec, spec->multiout.dac_nids[i],
+					      "Center", 1);
+			if (err < 0)
+				return err;
+			err = add_mono_volume(codec, spec->multiout.dac_nids[i],
+					      "LFE", 1);
+			if (err < 0)
+				return err;
+		} else {
+			err = add_out_switch(codec, mutenid,
+					     prefix[i]);
+			if (err < 0)
+				return err;
+			err = add_out_volume(codec, spec->multiout.dac_nids[i],
+					 prefix[i]);
+			if (err < 0)
+				return err;
+		}
+	}
+	if (cfg->hp_outs) {
+		if (get_wcaps(codec, cfg->hp_pins[0]) & AC_WCAP_OUT_AMP)
+			mutenid = cfg->hp_pins[0];
+		else
+			mutenid = spec->multiout.dac_nids[i];
+
+		err = add_out_switch(codec, mutenid, "Headphone");
+		if (err < 0)
+			return err;
+		if (spec->hp_dac) {
+			err = add_out_volume(codec, spec->hp_dac, "Headphone");
+			if (err < 0)
+				return err;
+		}
+	}
+	for (i = 0; i < spec->num_inputs; i++) {
+		const char *label = spec->input_labels[i];
+		if (get_wcaps(codec, spec->input_pins[i]) & AC_WCAP_IN_AMP)
+			mutenid = spec->input_pins[i];
+		else
+			mutenid = spec->adcs[i];
+		err = add_in_switch(codec, mutenid, label);
+		if (err < 0)
+			return err;
+		err = add_in_volume(codec, spec->adcs[i], label);
+		if (err < 0)
+			return err;
+	}
+
+	if (spec->dig_out) {
+		err = snd_hda_create_spdif_out_ctls(codec, spec->dig_out);
+		if (err < 0)
+			return err;
+		err = snd_hda_create_spdif_share_sw(codec, &spec->multiout);
+		if (err < 0)
+			return err;
+		spec->multiout.share_spdif = 1;
+	}
+	if (spec->dig_in) {
+		err = snd_hda_create_spdif_in_ctls(codec, spec->dig_in);
+		if (err < 0)
+			return err;
+		err = add_in_volume(codec, spec->dig_in, "IEC958");
+	}
+	return 0;
+}
+
+/*
+ */
+static struct hda_pcm_stream ca0110_pcm_analog_playback = {
+	.substreams = 1,
+	.channels_min = 2,
+	.channels_max = 8,
+	.ops = {
+		.open = ca0110_playback_pcm_open,
+		.prepare = ca0110_playback_pcm_prepare,
+		.cleanup = ca0110_playback_pcm_cleanup
+	},
+};
+
+static struct hda_pcm_stream ca0110_pcm_analog_capture = {
+	.substreams = 1,
+	.channels_min = 2,
+	.channels_max = 2,
+	.ops = {
+		.prepare = ca0110_capture_pcm_prepare,
+		.cleanup = ca0110_capture_pcm_cleanup
+	},
+};
+
+static struct hda_pcm_stream ca0110_pcm_digital_playback = {
+	.substreams = 1,
+	.channels_min = 2,
+	.channels_max = 2,
+	.ops = {
+		.open = ca0110_dig_playback_pcm_open,
+		.close = ca0110_dig_playback_pcm_close,
+		.prepare = ca0110_dig_playback_pcm_prepare
+	},
+};
+
+static struct hda_pcm_stream ca0110_pcm_digital_capture = {
+	.substreams = 1,
+	.channels_min = 2,
+	.channels_max = 2,
+};
+
+static int ca0110_build_pcms(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec = codec->spec;
+	struct hda_pcm *info = spec->pcm_rec;
+
+	codec->pcm_info = info;
+	codec->num_pcms = 0;
+
+	info->name = "CA0110 Analog";
+	info->stream[SNDRV_PCM_STREAM_PLAYBACK] = ca0110_pcm_analog_playback;
+	info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = spec->dacs[0];
+	info->stream[SNDRV_PCM_STREAM_PLAYBACK].channels_max =
+		spec->multiout.max_channels;
+	info->stream[SNDRV_PCM_STREAM_CAPTURE] = ca0110_pcm_analog_capture;
+	info->stream[SNDRV_PCM_STREAM_CAPTURE].substreams = spec->num_inputs;
+	info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adcs[0];
+	codec->num_pcms++;
+
+	if (!spec->dig_out && !spec->dig_in)
+		return 0;
+
+	info++;
+	info->name = "CA0110 Digital";
+	info->pcm_type = HDA_PCM_TYPE_SPDIF;
+	if (spec->dig_out) {
+		info->stream[SNDRV_PCM_STREAM_PLAYBACK] =
+			ca0110_pcm_digital_playback;
+		info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = spec->dig_out;
+	}
+	if (spec->dig_in) {
+		info->stream[SNDRV_PCM_STREAM_CAPTURE] =
+			ca0110_pcm_digital_capture;
+		info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->dig_in;
+	}
+	codec->num_pcms++;
+
+	return 0;
+}
+
+static void init_output(struct hda_codec *codec, hda_nid_t pin, hda_nid_t dac)
+{
+	if (pin) {
+		snd_hda_codec_write(codec, pin, 0,
+				    AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP);
+		if (get_wcaps(codec, pin) & AC_WCAP_OUT_AMP)
+			snd_hda_codec_write(codec, pin, 0,
+					    AC_VERB_SET_AMP_GAIN_MUTE,
+					    AMP_OUT_UNMUTE);
+	}
+	if (dac)
+		snd_hda_codec_write(codec, dac, 0,
+				    AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO);
+}
+
+static void init_input(struct hda_codec *codec, hda_nid_t pin, hda_nid_t adc)
+{
+	if (pin) {
+		snd_hda_codec_write(codec, pin, 0,
+				    AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80);
+		if (get_wcaps(codec, pin) & AC_WCAP_IN_AMP)
+			snd_hda_codec_write(codec, pin, 0,
+					    AC_VERB_SET_AMP_GAIN_MUTE,
+					    AMP_IN_UNMUTE(0));
+	}
+	if (adc)
+		snd_hda_codec_write(codec, adc, 0, AC_VERB_SET_AMP_GAIN_MUTE,
+				    AMP_IN_UNMUTE(0));
+}
+
+static int ca0110_init(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+	int i;
+
+	for (i = 0; i < spec->multiout.num_dacs; i++)
+		init_output(codec, spec->out_pins[i],
+			    spec->multiout.dac_nids[i]);
+	init_output(codec, cfg->hp_pins[0], spec->hp_dac);
+	init_output(codec, cfg->dig_out_pins[0], spec->dig_out);
+
+	for (i = 0; i < spec->num_inputs; i++)
+		init_input(codec, spec->input_pins[i], spec->adcs[i]);
+	init_input(codec, cfg->dig_in_pin, spec->dig_in);
+	return 0;
+}
+
+static void ca0110_free(struct hda_codec *codec)
+{
+	kfree(codec->spec);
+}
+
+static struct hda_codec_ops ca0110_patch_ops = {
+	.build_controls = ca0110_build_controls,
+	.build_pcms = ca0110_build_pcms,
+	.init = ca0110_init,
+	.free = ca0110_free,
+};
+
+
+static void parse_line_outs(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+	int i, n;
+	unsigned int def_conf;
+	hda_nid_t nid;
+
+	n = 0;
+	for (i = 0; i < cfg->line_outs; i++) {
+		nid = cfg->line_out_pins[i];
+		def_conf = snd_hda_codec_get_pincfg(codec, nid);
+		if (!def_conf)
+			continue; /* invalid pin */
+		if (snd_hda_get_connections(codec, nid, &spec->dacs[i], 1) != 1)
+			continue;
+		spec->out_pins[n++] = nid;
+	}
+	spec->multiout.dac_nids = spec->dacs;
+	spec->multiout.num_dacs = n;
+	spec->multiout.max_channels = n * 2;
+}
+
+static void parse_hp_out(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+	int i;
+	unsigned int def_conf;
+	hda_nid_t nid, dac;
+
+	if (!cfg->hp_outs)
+		return;
+	nid = cfg->hp_pins[0];
+	def_conf = snd_hda_codec_get_pincfg(codec, nid);
+	if (!def_conf) {
+		cfg->hp_outs = 0;
+		return;
+	}
+	if (snd_hda_get_connections(codec, nid, &dac, 1) != 1)
+		return;
+
+	for (i = 0; i < cfg->line_outs; i++)
+		if (dac == spec->dacs[i])
+			break;
+	if (i >= cfg->line_outs) {
+		spec->hp_dac = dac;
+		spec->multiout.hp_nid = dac;
+	}
+}
+
+static void parse_input(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+	hda_nid_t nid, pin;
+	int n, i, j;
+
+	n = 0;
+	nid = codec->start_nid;
+	for (i = 0; i < codec->num_nodes; i++, nid++) {
+		unsigned int wcaps = get_wcaps(codec, nid);
+		unsigned int type = (wcaps & AC_WCAP_TYPE) >>
+			AC_WCAP_TYPE_SHIFT;
+		if (type != AC_WID_AUD_IN)
+			continue;
+		if (snd_hda_get_connections(codec, nid, &pin, 1) != 1)
+			continue;
+		if (pin == cfg->dig_in_pin) {
+			spec->dig_in = nid;
+			continue;
+		}
+		for (j = 0; j < AUTO_PIN_LAST; j++)
+			if (cfg->input_pins[j] == pin)
+				break;
+		if (j >= AUTO_PIN_LAST)
+			continue;
+		spec->input_pins[n] = pin;
+		spec->input_labels[n] = auto_pin_cfg_labels[j];
+		spec->adcs[n] = nid;
+		n++;
+	}
+	spec->num_inputs = n;
+}
+
+static void parse_digital(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+
+	if (cfg->dig_outs &&
+	    snd_hda_get_connections(codec, cfg->dig_out_pins[0],
+				    &spec->dig_out, 1) == 1)
+		spec->multiout.dig_out_nid = cfg->dig_out_pins[0];
+}
+
+static int ca0110_parse_auto_config(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec = codec->spec;
+	int err;
+
+	err = snd_hda_parse_pin_def_config(codec, &spec->autocfg, NULL);
+	if (err < 0)
+		return err;
+
+	parse_line_outs(codec);
+	parse_hp_out(codec);
+	parse_digital(codec);
+	parse_input(codec);
+	return 0;
+}
+
+
+int patch_ca0110(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec;
+	int err;
+
+	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+	codec->spec = spec;
+
+	codec->bus->needs_damn_long_delay = 1;
+
+	err = ca0110_parse_auto_config(codec);
+	if (err < 0)
+		goto error;
+
+	codec->patch_ops = ca0110_patch_ops;
+
+	return 0;
+
+ error:
+	kfree(codec->spec);
+	codec->spec = NULL;
+	return err;
+}
+
+
+/*
+ * patch entries
+ */
+static struct hda_codec_preset snd_hda_preset_ca0110[] = {
+	{ .id = 0x1102000a, .name = "CA0110-IBG", .patch = patch_ca0110 },
+	{ .id = 0x1102000b, .name = "CA0110-IBG", .patch = patch_ca0110 },
+	{ .id = 0x1102000d, .name = "SB0880 X-Fi", .patch = patch_ca0110 },
+	{} /* terminator */
+};
+
+MODULE_ALIAS("snd-hda-codec-id:1102000a");
+MODULE_ALIAS("snd-hda-codec-id:1102000b");
+MODULE_ALIAS("snd-hda-codec-id:1102000d");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Creative CA0110-IBG HD-audio codec");
+
+static struct hda_codec_preset_list ca0110_list = {
+	.preset = snd_hda_preset_ca0110,
+	.owner = THIS_MODULE,
+};
+
+static int __init patch_ca0110_init(void)
+{
+	return snd_hda_add_codec_preset(&ca0110_list);
+}
+
+static void __exit patch_ca0110_exit(void)
+{
+	snd_hda_delete_codec_preset(&ca0110_list);
+}
+
+module_init(patch_ca0110_init)
+module_exit(patch_ca0110_exit)
diff --git a/sound/pci/hda/patch_nvhdmi.c b/sound/pci/hda/patch_nvhdmi.c
index d57d813..f5792e2 100644
--- a/sound/pci/hda/patch_nvhdmi.c
+++ b/sound/pci/hda/patch_nvhdmi.c
@@ -35,9 +35,28 @@
 	struct hda_pcm pcm_rec;
 };
 
+#define Nv_VERB_SET_Channel_Allocation          0xF79
+#define Nv_VERB_SET_Info_Frame_Checksum         0xF7A
+#define Nv_VERB_SET_Audio_Protection_On         0xF98
+#define Nv_VERB_SET_Audio_Protection_Off        0xF99
+
+#define Nv_Master_Convert_nid   0x04
+#define Nv_Master_Pin_nid       0x05
+
+static hda_nid_t nvhdmi_convert_nids[4] = {
+	/*front, rear, clfe, rear_surr */
+	0x6, 0x8, 0xa, 0xc,
+};
+
 static struct hda_verb nvhdmi_basic_init[] = {
+	/* set audio protect on */
+	{ 0x1, Nv_VERB_SET_Audio_Protection_On, 0x1},
 	/* enable digital output on pin widget */
-	{ 0x05, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x5, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | 0x5 },
+	{ 0x7, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | 0x5 },
+	{ 0x9, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | 0x5 },
+	{ 0xb, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | 0x5 },
+	{ 0xd, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | 0x5 },
 	{} /* terminator */
 };
 
@@ -66,48 +85,205 @@
  * Digital out
  */
 static int nvhdmi_dig_playback_pcm_open(struct hda_pcm_stream *hinfo,
-				     struct hda_codec *codec,
-				     struct snd_pcm_substream *substream)
+					struct hda_codec *codec,
+					struct snd_pcm_substream *substream)
 {
 	struct nvhdmi_spec *spec = codec->spec;
 	return snd_hda_multi_out_dig_open(codec, &spec->multiout);
 }
 
-static int nvhdmi_dig_playback_pcm_close(struct hda_pcm_stream *hinfo,
-				      struct hda_codec *codec,
-				      struct snd_pcm_substream *substream)
+static int nvhdmi_dig_playback_pcm_close_8ch(struct hda_pcm_stream *hinfo,
+					struct hda_codec *codec,
+					struct snd_pcm_substream *substream)
+{
+	struct nvhdmi_spec *spec = codec->spec;
+	int i;
+
+	snd_hda_codec_write(codec, Nv_Master_Convert_nid,
+			0, AC_VERB_SET_CHANNEL_STREAMID, 0);
+	for (i = 0; i < 4; i++) {
+		/* set the stream id */
+		snd_hda_codec_write(codec, nvhdmi_convert_nids[i], 0,
+				AC_VERB_SET_CHANNEL_STREAMID, 0);
+		/* set the stream format */
+		snd_hda_codec_write(codec, nvhdmi_convert_nids[i], 0,
+				AC_VERB_SET_STREAM_FORMAT, 0);
+	}
+
+	return snd_hda_multi_out_dig_close(codec, &spec->multiout);
+}
+
+static int nvhdmi_dig_playback_pcm_close_2ch(struct hda_pcm_stream *hinfo,
+					struct hda_codec *codec,
+					struct snd_pcm_substream *substream)
 {
 	struct nvhdmi_spec *spec = codec->spec;
 	return snd_hda_multi_out_dig_close(codec, &spec->multiout);
 }
 
-static int nvhdmi_dig_playback_pcm_prepare(struct hda_pcm_stream *hinfo,
-					    struct hda_codec *codec,
-					    unsigned int stream_tag,
-					    unsigned int format,
-					    struct snd_pcm_substream *substream)
+static int nvhdmi_dig_playback_pcm_prepare_8ch(struct hda_pcm_stream *hinfo,
+					struct hda_codec *codec,
+					unsigned int stream_tag,
+					unsigned int format,
+					struct snd_pcm_substream *substream)
+{
+	int chs;
+	unsigned int dataDCC1, dataDCC2, chan, chanmask, channel_id;
+	int i;
+
+	mutex_lock(&codec->spdif_mutex);
+
+	chs = substream->runtime->channels;
+	chan = chs ? (chs - 1) : 1;
+
+	switch (chs) {
+	default:
+	case 0:
+	case 2:
+		chanmask = 0x00;
+		break;
+	case 4:
+		chanmask = 0x08;
+		break;
+	case 6:
+		chanmask = 0x0b;
+		break;
+	case 8:
+		chanmask = 0x13;
+		break;
+	}
+	dataDCC1 = AC_DIG1_ENABLE | AC_DIG1_COPYRIGHT;
+	dataDCC2 = 0x2;
+
+	/* set the Audio InforFrame Channel Allocation */
+	snd_hda_codec_write(codec, 0x1, 0,
+			Nv_VERB_SET_Channel_Allocation, chanmask);
+
+	/* turn off SPDIF once; otherwise the IEC958 bits won't be updated */
+	if (codec->spdif_status_reset && (codec->spdif_ctls & AC_DIG1_ENABLE))
+		snd_hda_codec_write(codec,
+				Nv_Master_Convert_nid,
+				0,
+				AC_VERB_SET_DIGI_CONVERT_1,
+				codec->spdif_ctls & ~AC_DIG1_ENABLE & 0xff);
+
+	/* set the stream id */
+	snd_hda_codec_write(codec, Nv_Master_Convert_nid, 0,
+			AC_VERB_SET_CHANNEL_STREAMID, (stream_tag << 4) | 0x0);
+
+	/* set the stream format */
+	snd_hda_codec_write(codec, Nv_Master_Convert_nid, 0,
+			AC_VERB_SET_STREAM_FORMAT, format);
+
+	/* turn on again (if needed) */
+	/* enable and set the channel status audio/data flag */
+	if (codec->spdif_status_reset && (codec->spdif_ctls & AC_DIG1_ENABLE)) {
+		snd_hda_codec_write(codec,
+				Nv_Master_Convert_nid,
+				0,
+				AC_VERB_SET_DIGI_CONVERT_1,
+				codec->spdif_ctls & 0xff);
+		snd_hda_codec_write(codec,
+				Nv_Master_Convert_nid,
+				0,
+				AC_VERB_SET_DIGI_CONVERT_2, dataDCC2);
+	}
+
+	for (i = 0; i < 4; i++) {
+		if (chs == 2)
+			channel_id = 0;
+		else
+			channel_id = i * 2;
+
+		/* turn off SPDIF once;
+		 *otherwise the IEC958 bits won't be updated
+		 */
+		if (codec->spdif_status_reset &&
+		(codec->spdif_ctls & AC_DIG1_ENABLE))
+			snd_hda_codec_write(codec,
+				nvhdmi_convert_nids[i],
+				0,
+				AC_VERB_SET_DIGI_CONVERT_1,
+				codec->spdif_ctls & ~AC_DIG1_ENABLE & 0xff);
+		/* set the stream id */
+		snd_hda_codec_write(codec,
+				nvhdmi_convert_nids[i],
+				0,
+				AC_VERB_SET_CHANNEL_STREAMID,
+				(stream_tag << 4) | channel_id);
+		/* set the stream format */
+		snd_hda_codec_write(codec,
+				nvhdmi_convert_nids[i],
+				0,
+				AC_VERB_SET_STREAM_FORMAT,
+				format);
+		/* turn on again (if needed) */
+		/* enable and set the channel status audio/data flag */
+		if (codec->spdif_status_reset &&
+		(codec->spdif_ctls & AC_DIG1_ENABLE)) {
+			snd_hda_codec_write(codec,
+					nvhdmi_convert_nids[i],
+					0,
+					AC_VERB_SET_DIGI_CONVERT_1,
+					codec->spdif_ctls & 0xff);
+			snd_hda_codec_write(codec,
+					nvhdmi_convert_nids[i],
+					0,
+					AC_VERB_SET_DIGI_CONVERT_2, dataDCC2);
+		}
+	}
+
+	/* set the Audio Info Frame Checksum */
+	snd_hda_codec_write(codec, 0x1, 0,
+			Nv_VERB_SET_Info_Frame_Checksum,
+			(0x71 - chan - chanmask));
+
+	mutex_unlock(&codec->spdif_mutex);
+	return 0;
+}
+
+static int nvhdmi_dig_playback_pcm_prepare_2ch(struct hda_pcm_stream *hinfo,
+					struct hda_codec *codec,
+					unsigned int stream_tag,
+					unsigned int format,
+					struct snd_pcm_substream *substream)
 {
 	struct nvhdmi_spec *spec = codec->spec;
 	return snd_hda_multi_out_dig_prepare(codec, &spec->multiout, stream_tag,
-					     format, substream);
+					format, substream);
 }
 
-static struct hda_pcm_stream nvhdmi_pcm_digital_playback = {
+static struct hda_pcm_stream nvhdmi_pcm_digital_playback_8ch = {
 	.substreams = 1,
 	.channels_min = 2,
-	.channels_max = 2,
-	.nid = 0x4, /* NID to query formats and rates and setup streams */
+	.channels_max = 8,
+	.nid = Nv_Master_Convert_nid,
 	.rates = SNDRV_PCM_RATE_48000,
 	.maxbps = 16,
 	.formats = SNDRV_PCM_FMTBIT_S16_LE,
 	.ops = {
 		.open = nvhdmi_dig_playback_pcm_open,
-		.close = nvhdmi_dig_playback_pcm_close,
-		.prepare = nvhdmi_dig_playback_pcm_prepare
+		.close = nvhdmi_dig_playback_pcm_close_8ch,
+		.prepare = nvhdmi_dig_playback_pcm_prepare_8ch
 	},
 };
 
-static int nvhdmi_build_pcms(struct hda_codec *codec)
+static struct hda_pcm_stream nvhdmi_pcm_digital_playback_2ch = {
+	.substreams = 1,
+	.channels_min = 2,
+	.channels_max = 2,
+	.nid = Nv_Master_Convert_nid,
+	.rates = SNDRV_PCM_RATE_48000,
+	.maxbps = 16,
+	.formats = SNDRV_PCM_FMTBIT_S16_LE,
+	.ops = {
+		.open = nvhdmi_dig_playback_pcm_open,
+		.close = nvhdmi_dig_playback_pcm_close_2ch,
+		.prepare = nvhdmi_dig_playback_pcm_prepare_2ch
+	},
+};
+
+static int nvhdmi_build_pcms_8ch(struct hda_codec *codec)
 {
 	struct nvhdmi_spec *spec = codec->spec;
 	struct hda_pcm *info = &spec->pcm_rec;
@@ -117,7 +293,24 @@
 
 	info->name = "NVIDIA HDMI";
 	info->pcm_type = HDA_PCM_TYPE_HDMI;
-	info->stream[SNDRV_PCM_STREAM_PLAYBACK] = nvhdmi_pcm_digital_playback;
+	info->stream[SNDRV_PCM_STREAM_PLAYBACK]
+					= nvhdmi_pcm_digital_playback_8ch;
+
+	return 0;
+}
+
+static int nvhdmi_build_pcms_2ch(struct hda_codec *codec)
+{
+	struct nvhdmi_spec *spec = codec->spec;
+	struct hda_pcm *info = &spec->pcm_rec;
+
+	codec->num_pcms = 1;
+	codec->pcm_info = info;
+
+	info->name = "NVIDIA HDMI";
+	info->pcm_type = HDA_PCM_TYPE_HDMI;
+	info->stream[SNDRV_PCM_STREAM_PLAYBACK]
+					= nvhdmi_pcm_digital_playback_2ch;
 
 	return 0;
 }
@@ -127,14 +320,21 @@
 	kfree(codec->spec);
 }
 
-static struct hda_codec_ops nvhdmi_patch_ops = {
+static struct hda_codec_ops nvhdmi_patch_ops_8ch = {
 	.build_controls = nvhdmi_build_controls,
-	.build_pcms = nvhdmi_build_pcms,
+	.build_pcms = nvhdmi_build_pcms_8ch,
 	.init = nvhdmi_init,
 	.free = nvhdmi_free,
 };
 
-static int patch_nvhdmi(struct hda_codec *codec)
+static struct hda_codec_ops nvhdmi_patch_ops_2ch = {
+	.build_controls = nvhdmi_build_controls,
+	.build_pcms = nvhdmi_build_pcms_2ch,
+	.init = nvhdmi_init,
+	.free = nvhdmi_free,
+};
+
+static int patch_nvhdmi_8ch(struct hda_codec *codec)
 {
 	struct nvhdmi_spec *spec;
 
@@ -144,13 +344,30 @@
 
 	codec->spec = spec;
 
-	spec->multiout.num_dacs = 0;	  /* no analog */
-	spec->multiout.max_channels = 2;
-	spec->multiout.dig_out_nid = 0x4; /* NID for copying analog to digital,
-					   * seems to be unused in pure-digital
-					   * case. */
+	spec->multiout.num_dacs = 0;  /* no analog */
+	spec->multiout.max_channels = 8;
+	spec->multiout.dig_out_nid = Nv_Master_Convert_nid;
 
-	codec->patch_ops = nvhdmi_patch_ops;
+	codec->patch_ops = nvhdmi_patch_ops_8ch;
+
+	return 0;
+}
+
+static int patch_nvhdmi_2ch(struct hda_codec *codec)
+{
+	struct nvhdmi_spec *spec;
+
+	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+	if (spec == NULL)
+		return -ENOMEM;
+
+	codec->spec = spec;
+
+	spec->multiout.num_dacs = 0;  /* no analog */
+	spec->multiout.max_channels = 2;
+	spec->multiout.dig_out_nid = Nv_Master_Convert_nid;
+
+	codec->patch_ops = nvhdmi_patch_ops_2ch;
 
 	return 0;
 }
@@ -159,11 +376,11 @@
  * patch entries
  */
 static struct hda_codec_preset snd_hda_preset_nvhdmi[] = {
-	{ .id = 0x10de0002, .name = "MCP78 HDMI", .patch = patch_nvhdmi },
-	{ .id = 0x10de0006, .name = "MCP78 HDMI", .patch = patch_nvhdmi },
-	{ .id = 0x10de0007, .name = "MCP7A HDMI", .patch = patch_nvhdmi },
-	{ .id = 0x10de0067, .name = "MCP67 HDMI", .patch = patch_nvhdmi },
-	{ .id = 0x10de8001, .name = "MCP73 HDMI", .patch = patch_nvhdmi },
+	{ .id = 0x10de0002, .name = "MCP78 HDMI", .patch = patch_nvhdmi_8ch },
+	{ .id = 0x10de0006, .name = "MCP78 HDMI", .patch = patch_nvhdmi_8ch },
+	{ .id = 0x10de0007, .name = "MCP7A HDMI", .patch = patch_nvhdmi_8ch },
+	{ .id = 0x10de0067, .name = "MCP67 HDMI", .patch = patch_nvhdmi_2ch },
+	{ .id = 0x10de8001, .name = "MCP73 HDMI", .patch = patch_nvhdmi_2ch },
 	{} /* terminator */
 };
 
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 0fd258e..337d2a5 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -190,6 +190,7 @@
 	ALC663_ASUS_MODE6,
 	ALC272_DELL,
 	ALC272_DELL_ZM1,
+	ALC272_SAMSUNG_NC10,
 	ALC662_AUTO,
 	ALC662_MODEL_LAST,
 };
@@ -205,6 +206,7 @@
 	ALC882_ASUS_A7M,
 	ALC885_MACPRO,
 	ALC885_MBP3,
+	ALC885_MB5,
 	ALC885_IMAC24,
 	ALC882_AUTO,
 	ALC882_MODEL_LAST,
@@ -218,9 +220,11 @@
 	ALC883_6ST_DIG,
 	ALC883_TARGA_DIG,
 	ALC883_TARGA_2ch_DIG,
+	ALC883_TARGA_8ch_DIG,
 	ALC883_ACER,
 	ALC883_ACER_ASPIRE,
 	ALC888_ACER_ASPIRE_4930G,
+	ALC888_ACER_ASPIRE_8930G,
 	ALC883_MEDION,
 	ALC883_MEDION_MD2,
 	ALC883_LAPTOP_EAPD,
@@ -238,7 +242,9 @@
 	ALC883_3ST_6ch_INTEL,
 	ALC888_ASUS_M90V,
 	ALC888_ASUS_EEE1601,
+	ALC889A_MB31,
 	ALC1200_ASUS_P5Q,
+	ALC883_SONY_VAIO_TT,
 	ALC883_AUTO,
 	ALC883_MODEL_LAST,
 };
@@ -253,6 +259,15 @@
 /* for GPIO Poll */
 #define GPIO_MASK	0x03
 
+/* extra amp-initialization sequence types */
+enum {
+	ALC_INIT_NONE,
+	ALC_INIT_DEFAULT,
+	ALC_INIT_GPIO1,
+	ALC_INIT_GPIO2,
+	ALC_INIT_GPIO3,
+};
+
 struct alc_spec {
 	/* codec parameterization */
 	struct snd_kcontrol_new *mixers[5];	/* mixer arrays */
@@ -266,13 +281,13 @@
 						 */
 	unsigned int num_init_verbs;
 
-	char *stream_name_analog;	/* analog PCM stream */
+	char stream_name_analog[16];	/* analog PCM stream */
 	struct hda_pcm_stream *stream_analog_playback;
 	struct hda_pcm_stream *stream_analog_capture;
 	struct hda_pcm_stream *stream_analog_alt_playback;
 	struct hda_pcm_stream *stream_analog_alt_capture;
 
-	char *stream_name_digital;	/* digital PCM stream */
+	char stream_name_digital[16];	/* digital PCM stream */
 	struct hda_pcm_stream *stream_digital_playback;
 	struct hda_pcm_stream *stream_digital_capture;
 
@@ -301,6 +316,8 @@
 	const struct hda_channel_mode *channel_mode;
 	int num_channel_mode;
 	int need_dac_fix;
+	int const_channel_count;
+	int ext_channel_count;
 
 	/* PCM information */
 	struct hda_pcm pcm_rec[3];	/* used in alc_build_pcms() */
@@ -322,6 +339,7 @@
 
 	/* other flags */
 	unsigned int no_analog :1; /* digital I/O only */
+	int init_amp;
 
 	/* for virtual master */
 	hda_nid_t vmaster_nid;
@@ -355,6 +373,7 @@
 	unsigned int num_channel_mode;
 	const struct hda_channel_mode *channel_mode;
 	int need_dac_fix;
+	int const_channel_count;
 	unsigned int num_mux_defs;
 	const struct hda_input_mux *input_mux;
 	void (*unsol_event)(struct hda_codec *, unsigned int);
@@ -449,7 +468,7 @@
 	struct alc_spec *spec = codec->spec;
 	return snd_hda_ch_mode_get(codec, ucontrol, spec->channel_mode,
 				   spec->num_channel_mode,
-				   spec->multiout.max_channels);
+				   spec->ext_channel_count);
 }
 
 static int alc_ch_mode_put(struct snd_kcontrol *kcontrol,
@@ -459,9 +478,12 @@
 	struct alc_spec *spec = codec->spec;
 	int err = snd_hda_ch_mode_put(codec, ucontrol, spec->channel_mode,
 				      spec->num_channel_mode,
-				      &spec->multiout.max_channels);
-	if (err >= 0 && spec->need_dac_fix)
-		spec->multiout.num_dacs = spec->multiout.max_channels / 2;
+				      &spec->ext_channel_count);
+	if (err >= 0 && !spec->const_channel_count) {
+		spec->multiout.max_channels = spec->ext_channel_count;
+		if (spec->need_dac_fix)
+			spec->multiout.num_dacs = spec->multiout.max_channels / 2;
+	}
 	return err;
 }
 
@@ -841,8 +863,13 @@
 	spec->channel_mode = preset->channel_mode;
 	spec->num_channel_mode = preset->num_channel_mode;
 	spec->need_dac_fix = preset->need_dac_fix;
+	spec->const_channel_count = preset->const_channel_count;
 
-	spec->multiout.max_channels = spec->channel_mode[0].channels;
+	if (preset->const_channel_count)
+		spec->multiout.max_channels = preset->const_channel_count;
+	else
+		spec->multiout.max_channels = spec->channel_mode[0].channels;
+	spec->ext_channel_count = spec->channel_mode[0].channels;
 
 	spec->multiout.num_dacs = preset->num_dacs;
 	spec->multiout.dac_nids = preset->dac_nids;
@@ -921,20 +948,26 @@
 	alc_fix_pll(codec);
 }
 
-static void alc_sku_automute(struct hda_codec *codec)
+static void alc_automute_pin(struct hda_codec *codec)
 {
 	struct alc_spec *spec = codec->spec;
 	unsigned int present;
-	unsigned int hp_nid = spec->autocfg.hp_pins[0];
-	unsigned int sp_nid = spec->autocfg.speaker_pins[0];
+	unsigned int nid = spec->autocfg.hp_pins[0];
+	int i;
 
 	/* need to execute and sync at first */
-	snd_hda_codec_read(codec, hp_nid, 0, AC_VERB_SET_PIN_SENSE, 0);
-	present = snd_hda_codec_read(codec, hp_nid, 0,
+	snd_hda_codec_read(codec, nid, 0, AC_VERB_SET_PIN_SENSE, 0);
+	present = snd_hda_codec_read(codec, nid, 0,
 				     AC_VERB_GET_PIN_SENSE, 0);
-	spec->jack_present = (present & 0x80000000) != 0;
-	snd_hda_codec_write(codec, sp_nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
-			    spec->jack_present ? 0 : PIN_OUT);
+	spec->jack_present = (present & AC_PINSENSE_PRESENCE) != 0;
+	for (i = 0; i < ARRAY_SIZE(spec->autocfg.speaker_pins); i++) {
+		nid = spec->autocfg.speaker_pins[i];
+		if (!nid)
+			break;
+		snd_hda_codec_write(codec, nid, 0,
+				    AC_VERB_SET_PIN_WIDGET_CONTROL,
+				    spec->jack_present ? 0 : PIN_OUT);
+	}
 }
 
 #if 0 /* it's broken in some acses -- temporarily disabled */
@@ -969,16 +1002,19 @@
 		res >>= 28;
 	else
 		res >>= 26;
-	if (res == ALC880_HP_EVENT)
-		alc_sku_automute(codec);
-
-	if (res == ALC880_MIC_EVENT)
+	switch (res) {
+	case ALC880_HP_EVENT:
+		alc_automute_pin(codec);
+		break;
+	case ALC880_MIC_EVENT:
 		alc_mic_automute(codec);
+		break;
+	}
 }
 
 static void alc_inithook(struct hda_codec *codec)
 {
-	alc_sku_automute(codec);
+	alc_automute_pin(codec);
 	alc_mic_automute(codec);
 }
 
@@ -1000,69 +1036,21 @@
 				    AC_VERB_SET_PROC_COEF, 0x3030);
 }
 
-/* 32-bit subsystem ID for BIOS loading in HD Audio codec.
- *	31 ~ 16 :	Manufacture ID
- *	15 ~ 8	:	SKU ID
- *	7  ~ 0	:	Assembly ID
- *	port-A --> pin 39/41, port-E --> pin 14/15, port-D --> pin 35/36
- */
-static void alc_subsystem_id(struct hda_codec *codec,
-			     unsigned int porta, unsigned int porte,
-			     unsigned int portd)
+static void alc_auto_init_amp(struct hda_codec *codec, int type)
 {
-	unsigned int ass, tmp, i;
-	unsigned nid;
-	struct alc_spec *spec = codec->spec;
+	unsigned int tmp;
 
-	ass = codec->subsystem_id & 0xffff;
-	if ((ass != codec->bus->pci->subsystem_device) && (ass & 1))
-		goto do_sku;
-
-	/*
-	 * 31~30	: port conetcivity
-	 * 29~21	: reserve
-	 * 20		: PCBEEP input
-	 * 19~16	: Check sum (15:1)
-	 * 15~1		: Custom
-	 * 0		: override
-	*/
-	nid = 0x1d;
-	if (codec->vendor_id == 0x10ec0260)
-		nid = 0x17;
-	ass = snd_hda_codec_get_pincfg(codec, nid);
-	if (!(ass & 1) && !(ass & 0x100000))
-		return;
-	if ((ass >> 30) != 1)	/* no physical connection */
-		return;
-
-	/* check sum */
-	tmp = 0;
-	for (i = 1; i < 16; i++) {
-		if ((ass >> i) & 1)
-			tmp++;
-	}
-	if (((ass >> 16) & 0xf) != tmp)
-		return;
-do_sku:
-	/*
-	 * 0 : override
-	 * 1 :	Swap Jack
-	 * 2 : 0 --> Desktop, 1 --> Laptop
-	 * 3~5 : External Amplifier control
-	 * 7~6 : Reserved
-	*/
-	tmp = (ass & 0x38) >> 3;	/* external Amp control */
-	switch (tmp) {
-	case 1:
+	switch (type) {
+	case ALC_INIT_GPIO1:
 		snd_hda_sequence_write(codec, alc_gpio1_init_verbs);
 		break;
-	case 3:
+	case ALC_INIT_GPIO2:
 		snd_hda_sequence_write(codec, alc_gpio2_init_verbs);
 		break;
-	case 7:
+	case ALC_INIT_GPIO3:
 		snd_hda_sequence_write(codec, alc_gpio3_init_verbs);
 		break;
-	case 5:	/* set EAPD output high */
+	case ALC_INIT_DEFAULT:
 		switch (codec->vendor_id) {
 		case 0x10ec0260:
 			snd_hda_codec_write(codec, 0x0f, 0,
@@ -1116,7 +1104,7 @@
 					    tmp | 0x2010);
 			break;
 		case 0x10ec0888:
-			/*alc888_coef_init(codec);*/ /* called in alc_init() */
+			alc888_coef_init(codec);
 			break;
 		case 0x10ec0267:
 		case 0x10ec0268:
@@ -1131,7 +1119,107 @@
 					    tmp | 0x3000);
 			break;
 		}
-	default:
+		break;
+	}
+}
+
+static void alc_init_auto_hp(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+
+	if (!spec->autocfg.hp_pins[0])
+		return;
+
+	if (!spec->autocfg.speaker_pins[0]) {
+		if (spec->autocfg.line_out_pins[0] &&
+		    spec->autocfg.line_out_type == AUTO_PIN_SPEAKER_OUT)
+			spec->autocfg.speaker_pins[0] =
+				spec->autocfg.line_out_pins[0];
+		else
+			return;
+	}
+
+	snd_printdd("realtek: Enable HP auto-muting on NID 0x%x\n",
+		    spec->autocfg.hp_pins[0]);
+	snd_hda_codec_write_cache(codec, spec->autocfg.hp_pins[0], 0,
+				  AC_VERB_SET_UNSOLICITED_ENABLE,
+				  AC_USRSP_EN | ALC880_HP_EVENT);
+	spec->unsol_event = alc_sku_unsol_event;
+}
+
+/* check subsystem ID and set up device-specific initialization;
+ * return 1 if initialized, 0 if invalid SSID
+ */
+/* 32-bit subsystem ID for BIOS loading in HD Audio codec.
+ *	31 ~ 16 :	Manufacture ID
+ *	15 ~ 8	:	SKU ID
+ *	7  ~ 0	:	Assembly ID
+ *	port-A --> pin 39/41, port-E --> pin 14/15, port-D --> pin 35/36
+ */
+static int alc_subsystem_id(struct hda_codec *codec,
+			    hda_nid_t porta, hda_nid_t porte,
+			    hda_nid_t portd)
+{
+	unsigned int ass, tmp, i;
+	unsigned nid;
+	struct alc_spec *spec = codec->spec;
+
+	ass = codec->subsystem_id & 0xffff;
+	if ((ass != codec->bus->pci->subsystem_device) && (ass & 1))
+		goto do_sku;
+
+	/* invalid SSID, check the special NID pin defcfg instead */
+	/*
+	 * 31~30	: port conetcivity
+	 * 29~21	: reserve
+	 * 20		: PCBEEP input
+	 * 19~16	: Check sum (15:1)
+	 * 15~1		: Custom
+	 * 0		: override
+	*/
+	nid = 0x1d;
+	if (codec->vendor_id == 0x10ec0260)
+		nid = 0x17;
+	ass = snd_hda_codec_get_pincfg(codec, nid);
+	snd_printd("realtek: No valid SSID, "
+		   "checking pincfg 0x%08x for NID 0x%x\n",
+		   ass, nid);
+	if (!(ass & 1) && !(ass & 0x100000))
+		return 0;
+	if ((ass >> 30) != 1)	/* no physical connection */
+		return 0;
+
+	/* check sum */
+	tmp = 0;
+	for (i = 1; i < 16; i++) {
+		if ((ass >> i) & 1)
+			tmp++;
+	}
+	if (((ass >> 16) & 0xf) != tmp)
+		return 0;
+do_sku:
+	snd_printd("realtek: Enabling init ASM_ID=0x%04x CODEC_ID=%08x\n",
+		   ass & 0xffff, codec->vendor_id);
+	/*
+	 * 0 : override
+	 * 1 :	Swap Jack
+	 * 2 : 0 --> Desktop, 1 --> Laptop
+	 * 3~5 : External Amplifier control
+	 * 7~6 : Reserved
+	*/
+	tmp = (ass & 0x38) >> 3;	/* external Amp control */
+	switch (tmp) {
+	case 1:
+		spec->init_amp = ALC_INIT_GPIO1;
+		break;
+	case 3:
+		spec->init_amp = ALC_INIT_GPIO2;
+		break;
+	case 7:
+		spec->init_amp = ALC_INIT_GPIO3;
+		break;
+	case 5:
+		spec->init_amp = ALC_INIT_DEFAULT;
 		break;
 	}
 
@@ -1139,7 +1227,7 @@
 	 * when the external headphone out jack is plugged"
 	 */
 	if (!(ass & 0x8000))
-		return;
+		return 1;
 	/*
 	 * 10~8 : Jack location
 	 * 12~11: Headphone out -> 00: PortA, 01: PortE, 02: PortD, 03: Resvered
@@ -1147,14 +1235,6 @@
 	 * 15   : 1 --> enable the function "Mute internal speaker
 	 *	        when the external headphone out jack is plugged"
 	 */
-	if (!spec->autocfg.speaker_pins[0]) {
-		if (spec->autocfg.line_out_pins[0])
-			spec->autocfg.speaker_pins[0] =
-				spec->autocfg.line_out_pins[0];
-		else
-			return;
-	}
-
 	if (!spec->autocfg.hp_pins[0]) {
 		tmp = (ass >> 11) & 0x3;	/* HP to chassis */
 		if (tmp == 0)
@@ -1164,23 +1244,23 @@
 		else if (tmp == 2)
 			spec->autocfg.hp_pins[0] = portd;
 		else
-			return;
+			return 1;
 	}
-	if (spec->autocfg.hp_pins[0])
-		snd_hda_codec_write(codec, spec->autocfg.hp_pins[0], 0,
-			AC_VERB_SET_UNSOLICITED_ENABLE,
-			AC_USRSP_EN | ALC880_HP_EVENT);
 
-#if 0 /* it's broken in some acses -- temporarily disabled */
-	if (spec->autocfg.input_pins[AUTO_PIN_MIC] &&
-		spec->autocfg.input_pins[AUTO_PIN_FRONT_MIC])
-		snd_hda_codec_write(codec,
-			spec->autocfg.input_pins[AUTO_PIN_MIC], 0,
-			AC_VERB_SET_UNSOLICITED_ENABLE,
-			AC_USRSP_EN | ALC880_MIC_EVENT);
-#endif /* disabled */
+	alc_init_auto_hp(codec);
+	return 1;
+}
 
-	spec->unsol_event = alc_sku_unsol_event;
+static void alc_ssid_check(struct hda_codec *codec,
+			   hda_nid_t porta, hda_nid_t porte, hda_nid_t portd)
+{
+	if (!alc_subsystem_id(codec, porta, porte, portd)) {
+		struct alc_spec *spec = codec->spec;
+		snd_printd("realtek: "
+			   "Enable default setup for auto mode as fallback\n");
+		spec->init_amp = ALC_INIT_DEFAULT;
+		alc_init_auto_hp(codec);
+	}
 }
 
 /*
@@ -1315,32 +1395,58 @@
 	{}
 };
 
-static void alc888_fujitsu_xa3530_automute(struct hda_codec *codec)
+static void alc_automute_amp(struct hda_codec *codec)
 {
-	unsigned int present;
-	unsigned int bits;
-	/* Line out presence */
-	present = snd_hda_codec_read(codec, 0x17, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	/* HP out presence */
-	present = present || snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
+	struct alc_spec *spec = codec->spec;
+	unsigned int val, mute;
+	hda_nid_t nid;
+	int i;
+
+	spec->jack_present = 0;
+	for (i = 0; i < ARRAY_SIZE(spec->autocfg.hp_pins); i++) {
+		nid = spec->autocfg.hp_pins[i];
+		if (!nid)
+			break;
+		val = snd_hda_codec_read(codec, nid, 0,
+					 AC_VERB_GET_PIN_SENSE, 0);
+		if (val & AC_PINSENSE_PRESENCE) {
+			spec->jack_present = 1;
+			break;
+		}
+	}
+
+	mute = spec->jack_present ? HDA_AMP_MUTE : 0;
 	/* Toggle internal speakers muting */
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-	/* Toggle internal bass muting */
-	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
+	for (i = 0; i < ARRAY_SIZE(spec->autocfg.speaker_pins); i++) {
+		nid = spec->autocfg.speaker_pins[i];
+		if (!nid)
+			break;
+		snd_hda_codec_amp_stereo(codec, nid, HDA_OUTPUT, 0,
+					 HDA_AMP_MUTE, mute);
+	}
 }
 
-static void alc888_fujitsu_xa3530_unsol_event(struct hda_codec *codec,
-		unsigned int res)
+static void alc_automute_amp_unsol_event(struct hda_codec *codec,
+					 unsigned int res)
 {
-	if (res >> 26 == ALC880_HP_EVENT)
-		alc888_fujitsu_xa3530_automute(codec);
+	if (codec->vendor_id == 0x10ec0880)
+		res >>= 28;
+	else
+		res >>= 26;
+	if (res == ALC880_HP_EVENT)
+		alc_automute_amp(codec);
 }
 
+static void alc888_fujitsu_xa3530_init_hook(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x17; /* line-out */
+	spec->autocfg.hp_pins[1] = 0x1b; /* hp */
+	spec->autocfg.speaker_pins[0] = 0x14; /* speaker */
+	spec->autocfg.speaker_pins[1] = 0x15; /* bass */
+	alc_automute_amp(codec);
+}
 
 /*
  * ALC888 Acer Aspire 4930G model
@@ -1364,6 +1470,59 @@
 	{ }
 };
 
+/*
+ * ALC889 Acer Aspire 8930G model
+ */
+
+static struct hda_verb alc889_acer_aspire_8930g_verbs[] = {
+/* Front Mic: set to PIN_IN (empty by default) */
+	{0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+/* Unselect Front Mic by default in input mixer 3 */
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0xb)},
+/* Enable unsolicited event for HP jack */
+	{0x15, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
+/* Connect Internal Front to Front */
+	{0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x14, AC_VERB_SET_CONNECT_SEL, 0x00},
+/* Connect Internal Rear to Rear */
+	{0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x1b, AC_VERB_SET_CONNECT_SEL, 0x01},
+/* Connect Internal CLFE to CLFE */
+	{0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x16, AC_VERB_SET_CONNECT_SEL, 0x02},
+/* Connect HP out to Front */
+	{0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | PIN_HP},
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x00},
+/* Enable all DACs */
+/*  DAC DISABLE/MUTE 1? */
+/*  setting bits 1-5 disables DAC nids 0x02-0x06 apparently. Init=0x38 */
+	{0x20, AC_VERB_SET_COEF_INDEX, 0x03},
+	{0x20, AC_VERB_SET_PROC_COEF, 0x0000},
+/*  DAC DISABLE/MUTE 2? */
+/*  some bit here disables the other DACs. Init=0x4900 */
+	{0x20, AC_VERB_SET_COEF_INDEX, 0x08},
+	{0x20, AC_VERB_SET_PROC_COEF, 0x0000},
+/* Enable amplifiers */
+	{0x14, AC_VERB_SET_EAPD_BTLENABLE, 0x02},
+	{0x15, AC_VERB_SET_EAPD_BTLENABLE, 0x02},
+/* DMIC fix
+ * This laptop has a stereo digital microphone. The mics are only 1cm apart
+ * which makes the stereo useless. However, either the mic or the ALC889
+ * makes the signal become a difference/sum signal instead of standard
+ * stereo, which is annoying. So instead we flip this bit which makes the
+ * codec replicate the sum signal to both channels, turning it into a
+ * normal mono mic.
+ */
+/*  DMIC_CONTROL? Init value = 0x0001 */
+	{0x20, AC_VERB_SET_COEF_INDEX, 0x0b},
+	{0x20, AC_VERB_SET_PROC_COEF, 0x0003},
+	{ }
+};
+
 static struct hda_input_mux alc888_2_capture_sources[2] = {
 	/* Front mic only available on one ADC */
 	{
@@ -1385,6 +1544,38 @@
 	}
 };
 
+static struct hda_input_mux alc889_capture_sources[3] = {
+	/* Digital mic only available on first "ADC" */
+	{
+		.num_items = 5,
+		.items = {
+			{ "Mic", 0x0 },
+			{ "Line", 0x2 },
+			{ "CD", 0x4 },
+			{ "Front Mic", 0xb },
+			{ "Input Mix", 0xa },
+		},
+	},
+	{
+		.num_items = 4,
+		.items = {
+			{ "Mic", 0x0 },
+			{ "Line", 0x2 },
+			{ "CD", 0x4 },
+			{ "Input Mix", 0xa },
+		},
+	},
+	{
+		.num_items = 4,
+		.items = {
+			{ "Mic", 0x0 },
+			{ "Line", 0x2 },
+			{ "CD", 0x4 },
+			{ "Input Mix", 0xa },
+		},
+	}
+};
+
 static struct snd_kcontrol_new alc888_base_mixer[] = {
 	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
 	HDA_BIND_MUTE("Front Playback Switch", 0x0c, 2, HDA_INPUT),
@@ -1407,22 +1598,24 @@
 	{ } /* end */
 };
 
-static void alc888_acer_aspire_4930g_automute(struct hda_codec *codec)
+static void alc888_acer_aspire_4930g_init_hook(struct hda_codec *codec)
 {
-	unsigned int present;
-	unsigned int bits;
-	present = snd_hda_codec_read(codec, 0x15, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_amp(codec);
 }
 
-static void alc888_acer_aspire_4930g_unsol_event(struct hda_codec *codec,
-		unsigned int res)
+static void alc889_acer_aspire_8930g_init_hook(struct hda_codec *codec)
 {
-	if (res >> 26 == ALC880_HP_EVENT)
-		alc888_acer_aspire_4930g_automute(codec);
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[1] = 0x16;
+	spec->autocfg.speaker_pins[2] = 0x1b;
+	alc_automute_amp(codec);
 }
 
 /*
@@ -2390,21 +2583,6 @@
 	{ }
 };
 
-/* toggle speaker-output according to the hp-jack state */
-static void alc880_uniwill_hp_automute(struct hda_codec *codec)
-{
- 	unsigned int present;
-	unsigned char bits;
-
- 	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-	snd_hda_codec_amp_stereo(codec, 0x16, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-}
-
 /* auto-toggle front mic */
 static void alc880_uniwill_mic_automute(struct hda_codec *codec)
 {
@@ -2417,9 +2595,14 @@
 	snd_hda_codec_amp_stereo(codec, 0x0b, HDA_INPUT, 1, HDA_AMP_MUTE, bits);
 }
 
-static void alc880_uniwill_automute(struct hda_codec *codec)
+static void alc880_uniwill_init_hook(struct hda_codec *codec)
 {
-	alc880_uniwill_hp_automute(codec);
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x16;
+	alc_automute_amp(codec);
 	alc880_uniwill_mic_automute(codec);
 }
 
@@ -2430,24 +2613,22 @@
 	 * definition.  4bit tag is placed at 28 bit!
 	 */
 	switch (res >> 28) {
-	case ALC880_HP_EVENT:
-		alc880_uniwill_hp_automute(codec);
-		break;
 	case ALC880_MIC_EVENT:
 		alc880_uniwill_mic_automute(codec);
 		break;
+	default:
+		alc_automute_amp_unsol_event(codec, res);
+		break;
 	}
 }
 
-static void alc880_uniwill_p53_hp_automute(struct hda_codec *codec)
+static void alc880_uniwill_p53_init_hook(struct hda_codec *codec)
 {
- 	unsigned int present;
-	unsigned char bits;
+	struct alc_spec *spec = codec->spec;
 
- 	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0, HDA_AMP_MUTE, bits);
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x15;
+	alc_automute_amp(codec);
 }
 
 static void alc880_uniwill_p53_dcvol_automute(struct hda_codec *codec)
@@ -2469,10 +2650,10 @@
 	/* Looks like the unsol event is incompatible with the standard
 	 * definition.  4bit tag is placed at 28 bit!
 	 */
-	if ((res >> 28) == ALC880_HP_EVENT)
-		alc880_uniwill_p53_hp_automute(codec);
 	if ((res >> 28) == ALC880_DCVOL_EVENT)
 		alc880_uniwill_p53_dcvol_automute(codec);
+	else
+		alc_automute_amp_unsol_event(codec, res);
 }
 
 /*
@@ -2542,6 +2723,7 @@
 /* Enable GPIO mask and set output */
 #define alc880_gpio1_init_verbs	alc_gpio1_init_verbs
 #define alc880_gpio2_init_verbs	alc_gpio2_init_verbs
+#define alc880_gpio3_init_verbs	alc_gpio3_init_verbs
 
 /* Clevo m520g init */
 static struct hda_verb alc880_pin_clevo_init_verbs[] = {
@@ -2704,30 +2886,18 @@
 	{0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
 	{0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
 	/* jack sense */
-	{0x1b, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | 0x1},
+	{0x1b, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | ALC880_HP_EVENT},
 	{ }
 };
 
 /* toggle speaker-output according to the hp-jack state */
-static void alc880_lg_automute(struct hda_codec *codec)
+static void alc880_lg_init_hook(struct hda_codec *codec)
 {
-	unsigned int present;
-	unsigned char bits;
+	struct alc_spec *spec = codec->spec;
 
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x17, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-}
-
-static void alc880_lg_unsol_event(struct hda_codec *codec, unsigned int res)
-{
-	/* Looks like the unsol event is incompatible with the standard
-	 * definition.  4bit tag is placed at 28 bit!
-	 */
-	if ((res >> 28) == 0x01)
-		alc880_lg_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x17;
+	alc_automute_amp(codec);
 }
 
 /*
@@ -2801,30 +2971,18 @@
 	{0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
 	{0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
 	/* jack sense */
-	{0x1b, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | 0x1},
+	{0x1b, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | ALC880_HP_EVENT},
 	{ }
 };
 
 /* toggle speaker-output according to the hp-jack state */
-static void alc880_lg_lw_automute(struct hda_codec *codec)
+static void alc880_lg_lw_init_hook(struct hda_codec *codec)
 {
-	unsigned int present;
-	unsigned char bits;
+	struct alc_spec *spec = codec->spec;
 
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-}
-
-static void alc880_lg_lw_unsol_event(struct hda_codec *codec, unsigned int res)
-{
-	/* Looks like the unsol event is incompatible with the standard
-	 * definition.  4bit tag is placed at 28 bit!
-	 */
-	if ((res >> 28) == 0x01)
-		alc880_lg_lw_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_amp(codec);
 }
 
 static struct snd_kcontrol_new alc880_medion_rim_mixer[] = {
@@ -2871,16 +3029,10 @@
 /* toggle speaker-output according to the hp-jack state */
 static void alc880_medion_rim_automute(struct hda_codec *codec)
 {
-	unsigned int present;
-	unsigned char bits;
-
-	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0)
-		& AC_PINSENSE_PRESENCE;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x1b, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-	if (present)
+	struct alc_spec *spec = codec->spec;
+	alc_automute_amp(codec);
+	/* toggle EAPD */
+	if (spec->jack_present)
 		snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, 0);
 	else
 		snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, 2);
@@ -2896,6 +3048,15 @@
 		alc880_medion_rim_automute(codec);
 }
 
+static void alc880_medion_rim_init_hook(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x1b;
+	alc880_medion_rim_automute(codec);
+}
+
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 static struct hda_amp_list alc880_loopbacks[] = {
 	{ 0x0b, HDA_INPUT, 0 },
@@ -2924,8 +3085,7 @@
 	unsigned int i;
 
 	alc_fix_pll(codec);
-	if (codec->vendor_id == 0x10ec0888)
-		alc888_coef_init(codec);
+	alc_auto_init_amp(codec, spec->init_amp);
 
 	for (i = 0; i < spec->num_init_verbs; i++)
 		snd_hda_sequence_write(codec, spec->init_verbs[i]);
@@ -3127,7 +3287,10 @@
 	if (spec->no_analog)
 		goto skip_analog;
 
+	snprintf(spec->stream_name_analog, sizeof(spec->stream_name_analog),
+		 "%s Analog", codec->chip_name);
 	info->name = spec->stream_name_analog;
+	
 	if (spec->stream_analog_playback) {
 		if (snd_BUG_ON(!spec->multiout.dac_nids))
 			return -EINVAL;
@@ -3153,6 +3316,9 @@
  skip_analog:
 	/* SPDIF for stream index #1 */
 	if (spec->multiout.dig_out_nid || spec->dig_in_nid) {
+		snprintf(spec->stream_name_digital,
+			 sizeof(spec->stream_name_digital),
+			 "%s Digital", codec->chip_name);
 		codec->num_pcms = 2;
 	        codec->slave_dig_outs = spec->multiout.slave_dig_outs;
 		info = spec->pcm_rec + 1;
@@ -3755,7 +3921,7 @@
 		.channel_mode = alc880_2_jack_modes,
 		.input_mux = &alc880_f1734_capture_source,
 		.unsol_event = alc880_uniwill_p53_unsol_event,
-		.init_hook = alc880_uniwill_p53_hp_automute,
+		.init_hook = alc880_uniwill_p53_init_hook,
 	},
 	[ALC880_ASUS] = {
 		.mixers = { alc880_asus_mixer },
@@ -3832,7 +3998,7 @@
 		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 		.unsol_event = alc880_uniwill_unsol_event,
-		.init_hook = alc880_uniwill_automute,
+		.init_hook = alc880_uniwill_init_hook,
 	},
 	[ALC880_UNIWILL_P53] = {
 		.mixers = { alc880_uniwill_p53_mixer },
@@ -3844,7 +4010,7 @@
 		.channel_mode = alc880_threestack_modes,
 		.input_mux = &alc880_capture_source,
 		.unsol_event = alc880_uniwill_p53_unsol_event,
-		.init_hook = alc880_uniwill_p53_hp_automute,
+		.init_hook = alc880_uniwill_p53_init_hook,
 	},
 	[ALC880_FUJITSU] = {
 		.mixers = { alc880_fujitsu_mixer },
@@ -3858,7 +4024,7 @@
 		.channel_mode = alc880_2_jack_modes,
 		.input_mux = &alc880_capture_source,
 		.unsol_event = alc880_uniwill_p53_unsol_event,
-		.init_hook = alc880_uniwill_p53_hp_automute,
+		.init_hook = alc880_uniwill_p53_init_hook,
 	},
 	[ALC880_CLEVO] = {
 		.mixers = { alc880_three_stack_mixer },
@@ -3883,8 +4049,8 @@
 		.channel_mode = alc880_lg_ch_modes,
 		.need_dac_fix = 1,
 		.input_mux = &alc880_lg_capture_source,
-		.unsol_event = alc880_lg_unsol_event,
-		.init_hook = alc880_lg_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc880_lg_init_hook,
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 		.loopbacks = alc880_lg_loopbacks,
 #endif
@@ -3899,8 +4065,8 @@
 		.num_channel_mode = ARRAY_SIZE(alc880_lg_lw_modes),
 		.channel_mode = alc880_lg_lw_modes,
 		.input_mux = &alc880_lg_lw_capture_source,
-		.unsol_event = alc880_lg_lw_unsol_event,
-		.init_hook = alc880_lg_lw_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc880_lg_lw_init_hook,
 	},
 	[ALC880_MEDION_RIM] = {
 		.mixers = { alc880_medion_rim_mixer },
@@ -3914,7 +4080,7 @@
 		.channel_mode = alc880_2_jack_modes,
 		.input_mux = &alc880_medion_rim_capture_source,
 		.unsol_event = alc880_medion_rim_unsol_event,
-		.init_hook = alc880_medion_rim_automute,
+		.init_hook = alc880_medion_rim_init_hook,
 	},
 #ifdef CONFIG_SND_DEBUG
 	[ALC880_TEST] = {
@@ -4199,7 +4365,6 @@
 	struct alc_spec *spec = codec->spec;
 	int i;
 
-	alc_subsystem_id(codec, 0x15, 0x1b, 0x14);
 	for (i = 0; i < spec->autocfg.line_outs; i++) {
 		hda_nid_t nid = spec->autocfg.line_out_pins[i];
 		int pin_type = get_pin_type(spec->autocfg.line_out_type);
@@ -4304,6 +4469,8 @@
 	spec->num_mux_defs = 1;
 	spec->input_mux = &spec->private_imux[0];
 
+	alc_ssid_check(codec, 0x15, 0x1b, 0x14);
+
 	return 1;
 }
 
@@ -4361,8 +4528,8 @@
 						  alc880_models,
 						  alc880_cfg_tbl);
 	if (board_config < 0) {
-		printk(KERN_INFO "hda_codec: Unknown model for ALC880, "
-		       "trying auto-probe from BIOS...\n");
+		printk(KERN_INFO "hda_codec: Unknown model for %s, "
+		       "trying auto-probe from BIOS...\n", codec->chip_name);
 		board_config = ALC880_AUTO;
 	}
 
@@ -4389,12 +4556,10 @@
 	if (board_config != ALC880_AUTO)
 		setup_preset(spec, &alc880_presets[board_config]);
 
-	spec->stream_name_analog = "ALC880 Analog";
 	spec->stream_analog_playback = &alc880_pcm_analog_playback;
 	spec->stream_analog_capture = &alc880_pcm_analog_capture;
 	spec->stream_analog_alt_capture = &alc880_pcm_analog_alt_capture;
 
-	spec->stream_name_digital = "ALC880 Digital";
 	spec->stream_digital_playback = &alc880_pcm_digital_playback;
 	spec->stream_digital_capture = &alc880_pcm_digital_capture;
 
@@ -5679,7 +5844,6 @@
 	struct alc_spec *spec = codec->spec;
 	hda_nid_t nid;
 
-	alc_subsystem_id(codec, 0x10, 0x15, 0x0f);
 	nid = spec->autocfg.line_out_pins[0];
 	if (nid) {
 		int pin_type = get_pin_type(spec->autocfg.line_out_type);
@@ -5789,6 +5953,8 @@
 	spec->num_mux_defs = 1;
 	spec->input_mux = &spec->private_imux[0];
 
+	alc_ssid_check(codec, 0x10, 0x15, 0x0f);
+
 	return 1;
 }
 
@@ -6006,8 +6172,9 @@
 						  alc260_models,
 						  alc260_cfg_tbl);
 	if (board_config < 0) {
-		snd_printd(KERN_INFO "hda_codec: Unknown model for ALC260, "
-			   "trying auto-probe from BIOS...\n");
+		snd_printd(KERN_INFO "hda_codec: Unknown model for %s, "
+			   "trying auto-probe from BIOS...\n",
+			   codec->chip_name);
 		board_config = ALC260_AUTO;
 	}
 
@@ -6034,11 +6201,9 @@
 	if (board_config != ALC260_AUTO)
 		setup_preset(spec, &alc260_presets[board_config]);
 
-	spec->stream_name_analog = "ALC260 Analog";
 	spec->stream_analog_playback = &alc260_pcm_analog_playback;
 	spec->stream_analog_capture = &alc260_pcm_analog_capture;
 
-	spec->stream_name_digital = "ALC260 Digital";
 	spec->stream_digital_playback = &alc260_pcm_digital_playback;
 	spec->stream_digital_capture = &alc260_pcm_digital_capture;
 
@@ -6115,6 +6280,16 @@
 		{ "CD", 0x4 },
 	},
 };
+
+static struct hda_input_mux mb5_capture_source = {
+	.num_items = 3,
+	.items = {
+		{ "Mic", 0x1 },
+		{ "Line", 0x2 },
+		{ "CD", 0x4 },
+	},
+};
+
 /*
  * 2ch mode
  */
@@ -6202,6 +6377,34 @@
 	{ 6, alc885_mbp_ch6_init },
 };
 
+/*
+ * 2ch
+ * Speakers/Woofer/HP = Front
+ * LineIn = Input
+ */
+static struct hda_verb alc885_mb5_ch2_init[] = {
+	{0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+	{ } /* end */
+};
+
+/*
+ * 6ch mode
+ * Speakers/HP = Front
+ * Woofer = LFE
+ * LineIn = Surround
+ */
+static struct hda_verb alc885_mb5_ch6_init[] = {
+	{0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x01},
+	{ } /* end */
+};
+
+static struct hda_channel_mode alc885_mb5_6ch_modes[2] = {
+	{ 2, alc885_mb5_ch2_init },
+	{ 6, alc885_mb5_ch6_init },
+};
 
 /* Pin assignment: Front=0x14, Rear=0x15, CLFE=0x16, Side=0x17
  *                 Mic=0x18, Front Mic=0x19, Line-In=0x1a, HP=0x1b
@@ -6244,6 +6447,25 @@
 	HDA_CODEC_VOLUME("Mic Boost", 0x18, 0x00, HDA_INPUT),
 	{ } /* end */
 };
+
+static struct snd_kcontrol_new alc885_mb5_mixer[] = {
+	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x00, HDA_OUTPUT),
+	HDA_BIND_MUTE   ("Front Playback Switch", 0x0c, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("Surround Playback Volume", 0x0d, 0x00, HDA_OUTPUT),
+	HDA_BIND_MUTE   ("Surround Playback Switch", 0x0d, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("LFE Playback Volume", 0x0e, 0x00, HDA_OUTPUT),
+	HDA_BIND_MUTE   ("LFE Playback Switch", 0x0e, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("HP Playback Volume", 0x0f, 0x00, HDA_OUTPUT),
+	HDA_BIND_MUTE   ("HP Playback Switch", 0x0f, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
+	HDA_CODEC_MUTE  ("Line Playback Switch", 0x0b, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x01, HDA_INPUT),
+	HDA_CODEC_MUTE  ("Mic Playback Switch", 0x0b, 0x01, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line Boost", 0x15, 0x00, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Boost", 0x19, 0x00, HDA_INPUT),
+	{ } /* end */
+};
+
 static struct snd_kcontrol_new alc882_w2jc_mixer[] = {
 	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
 	HDA_BIND_MUTE("Front Playback Switch", 0x0c, 2, HDA_INPUT),
@@ -6471,6 +6693,55 @@
 	{ }
 };
 
+/* Macbook 5,1 */
+static struct hda_verb alc885_mb5_init_verbs[] = {
+	/* DACs */
+	{0x02, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x03, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	/* Front mixer */
+	{0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+	{0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+	{0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+	/* Surround mixer */
+	{0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+	{0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+	{0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+	/* LFE mixer */
+	{0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+	{0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+	{0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+	/* HP mixer */
+	{0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+	{0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+	{0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+	/* Front Pin (0x0c) */
+	{0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | 0x01},
+	{0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x18, AC_VERB_SET_CONNECT_SEL, 0x00},
+	/* LFE Pin (0x0e) */
+	{0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | 0x01},
+	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x1a, AC_VERB_SET_CONNECT_SEL, 0x02},
+	/* HP Pin (0x0f) */
+	{0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x14, AC_VERB_SET_CONNECT_SEL, 0x03},
+	/* Front Mic pin: input vref at 80% */
+	{0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+	{0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+	/* Line In pin */
+	{0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(3)},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(2)},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(4)},
+	{ }
+};
+
 /* Macbook Pro rev3 */
 static struct hda_verb alc885_mbp3_init_verbs[] = {
 	/* Front mixer: unmute input/output amp left and right (volume = 0) */
@@ -6560,45 +6831,23 @@
 };
 
 /* Toggle speaker-output according to the hp-jack state */
-static void alc885_imac24_automute(struct hda_codec *codec)
+static void alc885_imac24_automute_init_hook(struct hda_codec *codec)
 {
- 	unsigned int present;
+	struct alc_spec *spec = codec->spec;
 
- 	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	snd_hda_codec_amp_stereo(codec, 0x18, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-	snd_hda_codec_amp_stereo(codec, 0x1a, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x18;
+	spec->autocfg.speaker_pins[1] = 0x1a;
+	alc_automute_amp(codec);
 }
 
-/* Processes unsolicited events. */
-static void alc885_imac24_unsol_event(struct hda_codec *codec,
-				      unsigned int res)
+static void alc885_mbp3_init_hook(struct hda_codec *codec)
 {
-	/* Headphone insertion or removal. */
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc885_imac24_automute(codec);
-}
+	struct alc_spec *spec = codec->spec;
 
-static void alc885_mbp3_automute(struct hda_codec *codec)
-{
- 	unsigned int present;
-
- 	present = snd_hda_codec_read(codec, 0x15, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	snd_hda_codec_amp_stereo(codec, 0x14,  HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? 0 : HDA_AMP_MUTE);
-
-}
-static void alc885_mbp3_unsol_event(struct hda_codec *codec,
-				    unsigned int res)
-{
-	/* Headphone insertion or removal. */
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc885_mbp3_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_amp(codec);
 }
 
 
@@ -6623,24 +6872,25 @@
 /* toggle speaker-output according to the hp-jack state */
 static void alc882_targa_automute(struct hda_codec *codec)
 {
- 	unsigned int present;
-
- 	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	snd_hda_codec_amp_stereo(codec, 0x1b, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
+	struct alc_spec *spec = codec->spec;
+	alc_automute_amp(codec);
 	snd_hda_codec_write_cache(codec, 1, 0, AC_VERB_SET_GPIO_DATA,
-				  present ? 1 : 3);
+				  spec->jack_present ? 1 : 3);
+}
+
+static void alc882_targa_init_hook(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x1b;
+	alc882_targa_automute(codec);
 }
 
 static void alc882_targa_unsol_event(struct hda_codec *codec, unsigned int res)
 {
-	/* Looks like the unsol event is incompatible with the standard
-	 * definition.  4bit tag is placed at 26 bit!
-	 */
-	if (((res >> 26) == ALC880_HP_EVENT)) {
+	if ((res >> 26) == ALC880_HP_EVENT)
 		alc882_targa_automute(codec);
-	}
 }
 
 static struct hda_verb alc882_asus_a7j_verbs[] = {
@@ -6722,7 +6972,7 @@
 static void alc885_imac24_init_hook(struct hda_codec *codec)
 {
 	alc885_macpro_init_hook(codec);
-	alc885_imac24_automute(codec);
+	alc885_imac24_automute_init_hook(codec);
 }
 
 /*
@@ -6815,6 +7065,7 @@
 	[ALC882_ASUS_A7J]	= "asus-a7j",
 	[ALC882_ASUS_A7M]	= "asus-a7m",
 	[ALC885_MACPRO]		= "macpro",
+	[ALC885_MB5]		= "mb5",
 	[ALC885_MBP3]		= "mbp3",
 	[ALC885_IMAC24]		= "imac24",
 	[ALC882_AUTO]		= "auto",
@@ -6892,8 +7143,20 @@
 		.input_mux = &alc882_capture_source,
 		.dig_out_nid = ALC882_DIGOUT_NID,
 		.dig_in_nid = ALC882_DIGIN_NID,
-		.unsol_event = alc885_mbp3_unsol_event,
-		.init_hook = alc885_mbp3_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc885_mbp3_init_hook,
+	},
+	[ALC885_MB5] = {
+		.mixers = { alc885_mb5_mixer, alc882_chmode_mixer },
+		.init_verbs = { alc885_mb5_init_verbs,
+				alc880_gpio1_init_verbs },
+		.num_dacs = ARRAY_SIZE(alc882_dac_nids),
+		.dac_nids = alc882_dac_nids,
+		.channel_mode = alc885_mb5_6ch_modes,
+		.num_channel_mode = ARRAY_SIZE(alc885_mb5_6ch_modes),
+		.input_mux = &mb5_capture_source,
+		.dig_out_nid = ALC882_DIGOUT_NID,
+		.dig_in_nid = ALC882_DIGIN_NID,
 	},
 	[ALC885_MACPRO] = {
 		.mixers = { alc882_macpro_mixer },
@@ -6917,7 +7180,7 @@
 		.num_channel_mode = ARRAY_SIZE(alc882_ch_modes),
 		.channel_mode = alc882_ch_modes,
 		.input_mux = &alc882_capture_source,
-		.unsol_event = alc885_imac24_unsol_event,
+		.unsol_event = alc_automute_amp_unsol_event,
 		.init_hook = alc885_imac24_init_hook,
 	},
 	[ALC882_TARGA] = {
@@ -6934,7 +7197,7 @@
 		.need_dac_fix = 1,
 		.input_mux = &alc882_capture_source,
 		.unsol_event = alc882_targa_unsol_event,
-		.init_hook = alc882_targa_automute,
+		.init_hook = alc882_targa_init_hook,
 	},
 	[ALC882_ASUS_A7J] = {
 		.mixers = { alc882_asus_a7j_mixer, alc882_chmode_mixer },
@@ -7014,7 +7277,6 @@
 	struct alc_spec *spec = codec->spec;
 	int i;
 
-	alc_subsystem_id(codec, 0x15, 0x1b, 0x14);
 	for (i = 0; i <= HDA_SIDE; i++) {
 		hda_nid_t nid = spec->autocfg.line_out_pins[i];
 		int pin_type = get_pin_type(spec->autocfg.line_out_type);
@@ -7197,10 +7459,17 @@
 		case 0x106b00a1: /* Macbook (might be wrong - PCI SSID?) */
 		case 0x106b00a4: /* MacbookPro4,1 */
 		case 0x106b2c00: /* Macbook Pro rev3 */
-		case 0x106b3600: /* Macbook 3.1 */
+		/* Macbook 3.1 (0x106b3600) is handled by patch_alc883() */
 		case 0x106b3800: /* MacbookPro4,1 - latter revision */
 			board_config = ALC885_MBP3;
 			break;
+		case 0x106b3f00: /* Macbook 5,1 */
+		case 0x106b4000: /* Macbook Pro 5,1 - FIXME: HP jack sense
+				  *   seems not working, so apparently
+				  *   no perfect solution yet
+				  */
+			board_config = ALC885_MB5;
+			break;
 		default:
 			/* ALC889A is handled better as ALC888-compatible */
 			if (codec->revision_id == 0x100101 ||
@@ -7208,8 +7477,9 @@
 				alc_free(codec);
 				return patch_alc883(codec);
 			}
-			printk(KERN_INFO "hda_codec: Unknown model for ALC882, "
-		       			 "trying auto-probe from BIOS...\n");
+			printk(KERN_INFO "hda_codec: Unknown model for %s, "
+			       "trying auto-probe from BIOS...\n",
+			       codec->chip_name);
 			board_config = ALC882_AUTO;
 		}
 	}
@@ -7239,14 +7509,6 @@
 	if (board_config != ALC882_AUTO)
 		setup_preset(spec, &alc882_presets[board_config]);
 
-	if (codec->vendor_id == 0x10ec0885) {
-		spec->stream_name_analog = "ALC885 Analog";
-		spec->stream_name_digital = "ALC885 Digital";
-	} else {
-		spec->stream_name_analog = "ALC882 Analog";
-		spec->stream_name_digital = "ALC882 Digital";
-	}
-
 	spec->stream_analog_playback = &alc882_pcm_analog_playback;
 	spec->stream_analog_capture = &alc882_pcm_analog_capture;
 	/* FIXME: setup DAC5 */
@@ -7399,6 +7661,17 @@
 	},
 };
 
+static struct hda_input_mux alc889A_mb31_capture_source = {
+	.num_items = 2,
+	.items = {
+		{ "Mic", 0x0 },
+		/* Front Mic (0x01) unused */
+		{ "Line", 0x2 },
+		/* Line 2 (0x03) unused */
+		/* CD (0x04) unsused? */
+	},
+};
+
 /*
  * 2ch mode
  */
@@ -7448,6 +7721,73 @@
 	{ 6, alc883_3ST_ch6_init },
 };
 
+
+/*
+ * 2ch mode
+ */
+static struct hda_verb alc883_4ST_ch2_init[] = {
+	{ 0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80 },
+	{ 0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE },
+	{ 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN },
+	{ 0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE },
+	{ } /* end */
+};
+
+/*
+ * 4ch mode
+ */
+static struct hda_verb alc883_4ST_ch4_init[] = {
+	{ 0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80 },
+	{ 0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE },
+	{ 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x1a, AC_VERB_SET_CONNECT_SEL, 0x01 },
+	{ } /* end */
+};
+
+/*
+ * 6ch mode
+ */
+static struct hda_verb alc883_4ST_ch6_init[] = {
+	{ 0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x18, AC_VERB_SET_CONNECT_SEL, 0x02 },
+	{ 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x1a, AC_VERB_SET_CONNECT_SEL, 0x01 },
+	{ } /* end */
+};
+
+/*
+ * 8ch mode
+ */
+static struct hda_verb alc883_4ST_ch8_init[] = {
+	{ 0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x17, AC_VERB_SET_CONNECT_SEL, 0x03 },
+	{ 0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x18, AC_VERB_SET_CONNECT_SEL, 0x02 },
+	{ 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x1a, AC_VERB_SET_CONNECT_SEL, 0x01 },
+	{ } /* end */
+};
+
+static struct hda_channel_mode alc883_4ST_8ch_modes[4] = {
+	{ 2, alc883_4ST_ch2_init },
+	{ 4, alc883_4ST_ch4_init },
+	{ 6, alc883_4ST_ch6_init },
+	{ 8, alc883_4ST_ch8_init },
+};
+
+
 /*
  * 2ch mode
  */
@@ -7517,6 +7857,49 @@
 	{ 8, alc883_sixstack_ch8_init },
 };
 
+/* 2ch mode (Speaker:front, Subwoofer:CLFE, Line:input, Headphones:front) */
+static struct hda_verb alc889A_mb31_ch2_init[] = {
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x00},             /* HP as front */
+	{0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, /* Subwoofer on */
+	{0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},    /* Line as input */
+	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},   /* Line off */
+	{ } /* end */
+};
+
+/* 4ch mode (Speaker:front, Subwoofer:CLFE, Line:CLFE, Headphones:front) */
+static struct hda_verb alc889A_mb31_ch4_init[] = {
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x00},             /* HP as front */
+	{0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, /* Subwoofer on */
+	{0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},   /* Line as output */
+	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, /* Line on */
+	{ } /* end */
+};
+
+/* 5ch mode (Speaker:front, Subwoofer:CLFE, Line:input, Headphones:rear) */
+static struct hda_verb alc889A_mb31_ch5_init[] = {
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x01},             /* HP as rear */
+	{0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, /* Subwoofer on */
+	{0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},    /* Line as input */
+	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},   /* Line off */
+	{ } /* end */
+};
+
+/* 6ch mode (Speaker:front, Subwoofer:off, Line:CLFE, Headphones:Rear) */
+static struct hda_verb alc889A_mb31_ch6_init[] = {
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x01},             /* HP as front */
+	{0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},   /* Subwoofer off */
+	{0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},   /* Line as output */
+	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, /* Line on */
+	{ } /* end */
+};
+
+static struct hda_channel_mode alc889A_mb31_6ch_modes[4] = {
+	{ 2, alc889A_mb31_ch2_init },
+	{ 4, alc889A_mb31_ch4_init },
+	{ 5, alc889A_mb31_ch5_init },
+	{ 6, alc889A_mb31_ch6_init },
+};
+
 static struct hda_verb alc883_medion_eapd_verbs[] = {
         /* eanable EAPD on medion laptop */
 	{0x20, AC_VERB_SET_COEF_INDEX, 0x07},
@@ -7782,8 +8165,6 @@
 	HDA_BIND_MUTE_MONO("LFE Playback Switch", 0x0d, 2, 2, HDA_INPUT),
 	HDA_CODEC_VOLUME("Side Playback Volume", 0x0f, 0x0, HDA_OUTPUT),
 	HDA_BIND_MUTE("Side Playback Switch", 0x0f, 2, HDA_INPUT),
-	HDA_CODEC_MUTE("Headphone Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
-	HDA_CODEC_MUTE("iSpeaker Playback Switch", 0x1a, 0x0, HDA_OUTPUT),
 	HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x04, HDA_INPUT),
 	HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
 	HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
@@ -7797,6 +8178,42 @@
 	{ } /* end */
 };
 
+static struct snd_kcontrol_new alc889A_mb31_mixer[] = {
+	/* Output mixers */
+	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x00, HDA_OUTPUT),
+	HDA_BIND_MUTE("Front Playback Switch", 0x0c, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("Surround Playback Volume", 0x0d, 0x00, HDA_OUTPUT),
+	HDA_BIND_MUTE("Surround Playback Switch", 0x0d, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME_MONO("Center Playback Volume", 0x0e, 1, 0x00,
+		HDA_OUTPUT),
+	HDA_BIND_MUTE_MONO("Center Playback Switch", 0x0e, 1, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME_MONO("LFE Playback Volume", 0x0e, 2, 0x00, HDA_OUTPUT),
+	HDA_BIND_MUTE_MONO("LFE Playback Switch", 0x0e, 2, 0x02, HDA_INPUT),
+	/* Output switches */
+	HDA_CODEC_MUTE("Enable Speaker", 0x14, 0x00, HDA_OUTPUT),
+	HDA_CODEC_MUTE("Enable Headphones", 0x15, 0x00, HDA_OUTPUT),
+	HDA_CODEC_MUTE_MONO("Enable LFE", 0x16, 2, 0x00, HDA_OUTPUT),
+	/* Boost mixers */
+	HDA_CODEC_VOLUME("Mic Boost", 0x18, 0x00, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line Boost", 0x1a, 0x00, HDA_INPUT),
+	/* Input mixers */
+	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x00, HDA_INPUT),
+	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x00, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
+	HDA_CODEC_MUTE("Line Playback Switch", 0x0b, 0x02, HDA_INPUT),
+	{ } /* end */
+};
+
+static struct snd_kcontrol_new alc883_vaiott_mixer[] = {
+	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+	HDA_BIND_MUTE("Front Playback Switch", 0x0c, 2, HDA_INPUT),
+	HDA_CODEC_MUTE("Headphone Playback Switch", 0x15, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x1, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Boost", 0x19, 0, HDA_INPUT),
+	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x1, HDA_INPUT),
+	{ } /* end */
+};
+
 static struct hda_bind_ctls alc883_bind_cap_vol = {
 	.ops = &snd_hda_bind_vol,
 	.values = {
@@ -7932,16 +8349,14 @@
 };
 
 /* toggle speaker-output according to the hp-jack state */
-static void alc883_mitac_hp_automute(struct hda_codec *codec)
+static void alc883_mitac_init_hook(struct hda_codec *codec)
 {
-	unsigned int present;
+	struct alc_spec *spec = codec->spec;
 
-	present = snd_hda_codec_read(codec, 0x15, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-	snd_hda_codec_amp_stereo(codec, 0x17, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[1] = 0x17;
+	alc_automute_amp(codec);
 }
 
 /* auto-toggle front mic */
@@ -7958,25 +8373,6 @@
 }
 */
 
-static void alc883_mitac_automute(struct hda_codec *codec)
-{
-	alc883_mitac_hp_automute(codec);
-	/* alc883_mitac_mic_automute(codec); */
-}
-
-static void alc883_mitac_unsol_event(struct hda_codec *codec,
-					   unsigned int res)
-{
-	switch (res >> 26) {
-	case ALC880_HP_EVENT:
-		alc883_mitac_hp_automute(codec);
-		break;
-	case ALC880_MIC_EVENT:
-		/* alc883_mitac_mic_automute(codec); */
-		break;
-	}
-}
-
 static struct hda_verb alc883_mitac_verbs[] = {
 	/* HP */
 	{0x15, AC_VERB_SET_CONNECT_SEL, 0x00},
@@ -8028,14 +8424,24 @@
 	{0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
 	{0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
 
-	{0x18, AC_VERB_SET_CONNECT_SEL, 0x02}, /* mic/clfe */
-	{0x1a, AC_VERB_SET_CONNECT_SEL, 0x01}, /* line/surround */
-	{0x1b, AC_VERB_SET_CONNECT_SEL, 0x00}, /* HP */
+/* Connect Line-Out side jack (SPDIF) to Side */
+	{0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x17, AC_VERB_SET_CONNECT_SEL, 0x03},
+/* Connect Mic jack to CLFE */
+	{0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x18, AC_VERB_SET_CONNECT_SEL, 0x02},
+/* Connect Line-in jack to Surround */
+	{0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x1a, AC_VERB_SET_CONNECT_SEL, 0x01},
+/* Connect HP out jack to Front */
+	{0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x1b, AC_VERB_SET_CONNECT_SEL, 0x00},
 
 	{0x14, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
-	{0x01, AC_VERB_SET_GPIO_MASK, 0x03},
-	{0x01, AC_VERB_SET_GPIO_DIRECTION, 0x03},
-	{0x01, AC_VERB_SET_GPIO_DATA, 0x03},
 
 	{ } /* end */
 };
@@ -8094,29 +8500,26 @@
 	{ }
 };
 
-static void alc888_3st_hp_front_automute(struct hda_codec *codec)
-{
-	unsigned int present, bits;
+static struct hda_verb alc883_vaiott_verbs[] = {
+	/* HP */
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x00},
+	{0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
 
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-			AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-	snd_hda_codec_amp_stereo(codec, 0x16, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-	snd_hda_codec_amp_stereo(codec, 0x18, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-}
+	/* enable unsolicited event */
+	{0x15, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
 
-static void alc888_3st_hp_unsol_event(struct hda_codec *codec,
-				      unsigned int res)
+	{ } /* end */
+};
+
+static void alc888_3st_hp_init_hook(struct hda_codec *codec)
 {
-	switch (res >> 26) {
-	case ALC880_HP_EVENT:
-		alc888_3st_hp_front_automute(codec);
-		break;
-	}
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[1] = 0x16;
+	spec->autocfg.speaker_pins[2] = 0x18;
+	alc_automute_amp(codec);
 }
 
 static struct hda_verb alc888_3st_hp_verbs[] = {
@@ -8213,56 +8616,18 @@
 };
 
 /* toggle speaker-output according to the hp-jack state */
-static void alc883_medion_md2_automute(struct hda_codec *codec)
+static void alc883_medion_md2_init_hook(struct hda_codec *codec)
 {
- 	unsigned int present;
+	struct alc_spec *spec = codec->spec;
 
- 	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-}
-
-static void alc883_medion_md2_unsol_event(struct hda_codec *codec,
-					  unsigned int res)
-{
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc883_medion_md2_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x15;
+	alc_automute_amp(codec);
 }
 
 /* toggle speaker-output according to the hp-jack state */
-static void alc883_tagra_automute(struct hda_codec *codec)
-{
- 	unsigned int present;
-	unsigned char bits;
-
- 	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x1b, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-	snd_hda_codec_write_cache(codec, 1, 0, AC_VERB_SET_GPIO_DATA,
-				  present ? 1 : 3);
-}
-
-static void alc883_tagra_unsol_event(struct hda_codec *codec, unsigned int res)
-{
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc883_tagra_automute(codec);
-}
-
-/* toggle speaker-output according to the hp-jack state */
-static void alc883_clevo_m720_hp_automute(struct hda_codec *codec)
-{
-	unsigned int present;
-	unsigned char bits;
-
-	present = snd_hda_codec_read(codec, 0x15, 0, AC_VERB_GET_PIN_SENSE, 0)
-		& AC_PINSENSE_PRESENCE;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-}
+#define alc883_tagra_init_hook		alc882_targa_init_hook
+#define alc883_tagra_unsol_event	alc882_targa_unsol_event
 
 static void alc883_clevo_m720_mic_automute(struct hda_codec *codec)
 {
@@ -8274,9 +8639,13 @@
 				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
 }
 
-static void alc883_clevo_m720_automute(struct hda_codec *codec)
+static void alc883_clevo_m720_init_hook(struct hda_codec *codec)
 {
-	alc883_clevo_m720_hp_automute(codec);
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_amp(codec);
 	alc883_clevo_m720_mic_automute(codec);
 }
 
@@ -8284,52 +8653,32 @@
 					   unsigned int res)
 {
 	switch (res >> 26) {
-	case ALC880_HP_EVENT:
-		alc883_clevo_m720_hp_automute(codec);
-		break;
 	case ALC880_MIC_EVENT:
 		alc883_clevo_m720_mic_automute(codec);
 		break;
+	default:
+		alc_automute_amp_unsol_event(codec, res);
+		break;
 	}
 }
 
 /* toggle speaker-output according to the hp-jack state */
-static void alc883_2ch_fujitsu_pi2515_automute(struct hda_codec *codec)
+static void alc883_2ch_fujitsu_pi2515_init_hook(struct hda_codec *codec)
 {
- 	unsigned int present;
-	unsigned char bits;
+	struct alc_spec *spec = codec->spec;
 
- 	present = snd_hda_codec_read(codec, 0x14, 0, AC_VERB_GET_PIN_SENSE, 0)
-		& AC_PINSENSE_PRESENCE;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x15;
+	alc_automute_amp(codec);
 }
 
-static void alc883_2ch_fujitsu_pi2515_unsol_event(struct hda_codec *codec,
-						  unsigned int res)
+static void alc883_haier_w66_init_hook(struct hda_codec *codec)
 {
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc883_2ch_fujitsu_pi2515_automute(codec);
-}
+	struct alc_spec *spec = codec->spec;
 
-static void alc883_haier_w66_automute(struct hda_codec *codec)
-{
-	unsigned int present;
-	unsigned char bits;
-
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? 0x80 : 0;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 0x80, bits);
-}
-
-static void alc883_haier_w66_unsol_event(struct hda_codec *codec,
-					 unsigned int res)
-{
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc883_haier_w66_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_amp(codec);
 }
 
 static void alc883_lenovo_101e_ispeaker_automute(struct hda_codec *codec)
@@ -8337,8 +8686,8 @@
  	unsigned int present;
 	unsigned char bits;
 
- 	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
+	present = snd_hda_codec_read(codec, 0x14, 0, AC_VERB_GET_PIN_SENSE, 0)
+		& AC_PINSENSE_PRESENCE;
 	bits = present ? HDA_AMP_MUTE : 0;
 	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
 				 HDA_AMP_MUTE, bits);
@@ -8368,23 +8717,14 @@
 }
 
 /* toggle speaker-output according to the hp-jack state */
-static void alc883_acer_aspire_automute(struct hda_codec *codec)
+static void alc883_acer_aspire_init_hook(struct hda_codec *codec)
 {
- 	unsigned int present;
+	struct alc_spec *spec = codec->spec;
 
- 	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-	snd_hda_codec_amp_stereo(codec, 0x16, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-}
-
-static void alc883_acer_aspire_unsol_event(struct hda_codec *codec,
-					   unsigned int res)
-{
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc883_acer_aspire_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[1] = 0x16;
+	alc_automute_amp(codec);
 }
 
 static struct hda_verb alc883_acer_eapd_verbs[] = {
@@ -8405,75 +8745,39 @@
 	{ }
 };
 
-static void alc888_6st_dell_front_automute(struct hda_codec *codec)
+static void alc888_6st_dell_init_hook(struct hda_codec *codec)
 {
- 	unsigned int present;
+	struct alc_spec *spec = codec->spec;
 
- 	present = snd_hda_codec_read(codec, 0x1b, 0,
-				AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-				HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-	snd_hda_codec_amp_stereo(codec, 0x16, HDA_OUTPUT, 0,
-				HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-	snd_hda_codec_amp_stereo(codec, 0x17, HDA_OUTPUT, 0,
-				HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[1] = 0x15;
+	spec->autocfg.speaker_pins[2] = 0x16;
+	spec->autocfg.speaker_pins[3] = 0x17;
+	alc_automute_amp(codec);
 }
 
-static void alc888_6st_dell_unsol_event(struct hda_codec *codec,
-					     unsigned int res)
+static void alc888_lenovo_sky_init_hook(struct hda_codec *codec)
 {
-	switch (res >> 26) {
-	case ALC880_HP_EVENT:
-		/* printk(KERN_DEBUG "hp_event\n"); */
-		alc888_6st_dell_front_automute(codec);
-		break;
-	}
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[1] = 0x15;
+	spec->autocfg.speaker_pins[2] = 0x16;
+	spec->autocfg.speaker_pins[3] = 0x17;
+	spec->autocfg.speaker_pins[4] = 0x1a;
+	alc_automute_amp(codec);
 }
 
-static void alc888_lenovo_sky_front_automute(struct hda_codec *codec)
+static void alc883_vaiott_init_hook(struct hda_codec *codec)
 {
-	unsigned int mute;
-	unsigned int present;
+	struct alc_spec *spec = codec->spec;
 
-	snd_hda_codec_read(codec, 0x1b, 0, AC_VERB_SET_PIN_SENSE, 0);
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0);
-	present = (present & 0x80000000) != 0;
-	if (present) {
-		/* mute internal speaker */
-		snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, HDA_AMP_MUTE);
-		snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, HDA_AMP_MUTE);
-		snd_hda_codec_amp_stereo(codec, 0x16, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, HDA_AMP_MUTE);
-		snd_hda_codec_amp_stereo(codec, 0x17, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, HDA_AMP_MUTE);
-		snd_hda_codec_amp_stereo(codec, 0x1a, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, HDA_AMP_MUTE);
-	} else {
-		/* unmute internal speaker if necessary */
-		mute = snd_hda_codec_amp_read(codec, 0x1b, 0, HDA_OUTPUT, 0);
-		snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-		snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-		snd_hda_codec_amp_stereo(codec, 0x16, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-		snd_hda_codec_amp_stereo(codec, 0x17, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-		snd_hda_codec_amp_stereo(codec, 0x1a, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-	}
-}
-
-static void alc883_lenovo_sky_unsol_event(struct hda_codec *codec,
-					     unsigned int res)
-{
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc888_lenovo_sky_front_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[1] = 0x17;
+	alc_automute_amp(codec);
 }
 
 /*
@@ -8561,39 +8865,33 @@
 			    0x7000 | (0x01 << 8) | (present ? 0x80 : 0));
 }
 
-static void alc883_M90V_speaker_automute(struct hda_codec *codec)
+static void alc883_M90V_init_hook(struct hda_codec *codec)
 {
-	unsigned int present;
-	unsigned char bits;
+	struct alc_spec *spec = codec->spec;
 
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0)
-		& AC_PINSENSE_PRESENCE;
-	bits = present ? 0 : PIN_OUT;
-	snd_hda_codec_write(codec, 0x14, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
-			    bits);
-	snd_hda_codec_write(codec, 0x15, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
-			    bits);
-	snd_hda_codec_write(codec, 0x16, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
-			    bits);
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[1] = 0x15;
+	spec->autocfg.speaker_pins[2] = 0x16;
+	alc_automute_pin(codec);
 }
 
 static void alc883_mode2_unsol_event(struct hda_codec *codec,
 					   unsigned int res)
 {
 	switch (res >> 26) {
-	case ALC880_HP_EVENT:
-		alc883_M90V_speaker_automute(codec);
-		break;
 	case ALC880_MIC_EVENT:
 		alc883_nb_mic_automute(codec);
 		break;
+	default:
+		alc_sku_unsol_event(codec, res);
+		break;
 	}
 }
 
 static void alc883_mode2_inithook(struct hda_codec *codec)
 {
-	alc883_M90V_speaker_automute(codec);
+	alc883_M90V_init_hook(codec);
 	alc883_nb_mic_automute(codec);
 }
 
@@ -8610,32 +8908,49 @@
 	{ } /* end */
 };
 
-static void alc883_eee1601_speaker_automute(struct hda_codec *codec)
+static void alc883_eee1601_inithook(struct hda_codec *codec)
 {
-	unsigned int present;
-	unsigned char bits;
+	struct alc_spec *spec = codec->spec;
 
-	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0)
-		& AC_PINSENSE_PRESENCE;
-	bits = present ? 0 : PIN_OUT;
-	snd_hda_codec_write(codec, 0x1b, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
-			    bits);
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x1b;
+	alc_automute_pin(codec);
 }
 
-static void alc883_eee1601_unsol_event(struct hda_codec *codec,
-					   unsigned int res)
+static struct hda_verb alc889A_mb31_verbs[] = {
+	/* Init rear pin (used as headphone output) */
+	{0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc4},    /* Apple Headphones */
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x00},           /* Connect to front */
+	{0x15, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
+	/* Init line pin (used as output in 4ch and 6ch mode) */
+	{0x1a, AC_VERB_SET_CONNECT_SEL, 0x02},           /* Connect to CLFE */
+	/* Init line 2 pin (used as headphone out by default) */
+	{0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},  /* Use as input */
+	{0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, /* Mute output */
+	{ } /* end */
+};
+
+/* Mute speakers according to the headphone jack state */
+static void alc889A_mb31_automute(struct hda_codec *codec)
 {
-	switch (res >> 26) {
-	case ALC880_HP_EVENT:
-		alc883_eee1601_speaker_automute(codec);
-		break;
+	unsigned int present;
+
+	/* Mute only in 2ch or 4ch mode */
+	if (snd_hda_codec_read(codec, 0x15, 0, AC_VERB_GET_CONNECT_SEL, 0)
+	    == 0x00) {
+		present = snd_hda_codec_read(codec, 0x15, 0,
+			AC_VERB_GET_PIN_SENSE, 0) & AC_PINSENSE_PRESENCE;
+		snd_hda_codec_amp_stereo(codec, 0x14,  HDA_OUTPUT, 0,
+			HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
+		snd_hda_codec_amp_stereo(codec, 0x16, HDA_OUTPUT, 0,
+			HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
 	}
 }
 
-static void alc883_eee1601_inithook(struct hda_codec *codec)
+static void alc889A_mb31_unsol_event(struct hda_codec *codec, unsigned int res)
 {
-	alc883_eee1601_speaker_automute(codec);
+	if ((res >> 26) == ALC880_HP_EVENT)
+		alc889A_mb31_automute(codec);
 }
 
 #ifdef CONFIG_SND_HDA_POWER_SAVE
@@ -8659,9 +8974,11 @@
 	[ALC883_6ST_DIG]	= "6stack-dig",
 	[ALC883_TARGA_DIG]	= "targa-dig",
 	[ALC883_TARGA_2ch_DIG]	= "targa-2ch-dig",
+	[ALC883_TARGA_8ch_DIG]	= "targa-8ch-dig",
 	[ALC883_ACER]		= "acer",
 	[ALC883_ACER_ASPIRE]	= "acer-aspire",
 	[ALC888_ACER_ASPIRE_4930G]	= "acer-aspire-4930g",
+	[ALC888_ACER_ASPIRE_8930G]	= "acer-aspire-8930g",
 	[ALC883_MEDION]		= "medion",
 	[ALC883_MEDION_MD2]	= "medion-md2",
 	[ALC883_LAPTOP_EAPD]	= "laptop-eapd",
@@ -8678,6 +8995,8 @@
 	[ALC888_FUJITSU_XA3530] = "fujitsu-xa3530",
 	[ALC883_3ST_6ch_INTEL]	= "3stack-6ch-intel",
 	[ALC1200_ASUS_P5Q]	= "asus-p5q",
+	[ALC889A_MB31]		= "mb31",
+	[ALC883_SONY_VAIO_TT]	= "sony-vaio-tt",
 	[ALC883_AUTO]		= "auto",
 };
 
@@ -8693,14 +9012,18 @@
 		ALC888_ACER_ASPIRE_4930G),
 	SND_PCI_QUIRK(0x1025, 0x013f, "Acer Aspire 5930G",
 		ALC888_ACER_ASPIRE_4930G),
+	SND_PCI_QUIRK(0x1025, 0x0145, "Acer Aspire 8930G",
+		ALC888_ACER_ASPIRE_8930G),
 	SND_PCI_QUIRK(0x1025, 0x0157, "Acer X3200", ALC883_AUTO),
 	SND_PCI_QUIRK(0x1025, 0x0158, "Acer AX1700-U3700A", ALC883_AUTO),
 	SND_PCI_QUIRK(0x1025, 0x015e, "Acer Aspire 6930G",
 		ALC888_ACER_ASPIRE_4930G),
 	SND_PCI_QUIRK(0x1025, 0x0166, "Acer Aspire 6530G",
 		ALC888_ACER_ASPIRE_4930G),
-	/* default Acer */
-	SND_PCI_QUIRK_VENDOR(0x1025, "Acer laptop", ALC883_ACER),
+	/* default Acer -- disabled as it causes more problems.
+	 *    model=auto should work fine now
+	 */
+	/* SND_PCI_QUIRK_VENDOR(0x1025, "Acer laptop", ALC883_ACER), */
 	SND_PCI_QUIRK(0x1028, 0x020d, "Dell Inspiron 530", ALC888_6ST_DELL),
 	SND_PCI_QUIRK(0x103c, 0x2a3d, "HP Pavillion", ALC883_6ST_DIG),
 	SND_PCI_QUIRK(0x103c, 0x2a4f, "HP Samba", ALC888_3ST_HP),
@@ -8736,6 +9059,7 @@
 	SND_PCI_QUIRK(0x1462, 0x4314, "MSI", ALC883_TARGA_DIG),
 	SND_PCI_QUIRK(0x1462, 0x4319, "MSI", ALC883_TARGA_DIG),
 	SND_PCI_QUIRK(0x1462, 0x4324, "MSI", ALC883_TARGA_DIG),
+	SND_PCI_QUIRK(0x1462, 0x6510, "MSI GX620", ALC883_TARGA_8ch_DIG),
 	SND_PCI_QUIRK(0x1462, 0x6668, "MSI", ALC883_6ST_DIG),
 	SND_PCI_QUIRK(0x1462, 0x7187, "MSI", ALC883_6ST_DIG),
 	SND_PCI_QUIRK(0x1462, 0x7250, "MSI", ALC883_6ST_DIG),
@@ -8768,6 +9092,7 @@
 	SND_PCI_QUIRK(0x8086, 0x2503, "82801H", ALC883_MITAC),
 	SND_PCI_QUIRK(0x8086, 0x0022, "DX58SO", ALC883_3ST_6ch_INTEL),
 	SND_PCI_QUIRK(0x8086, 0xd601, "D102GGC", ALC883_3ST_6ch),
+	SND_PCI_QUIRK(0x104d, 0x9047, "Sony Vaio TT", ALC883_SONY_VAIO_TT),
 	{}
 };
 
@@ -8848,7 +9173,7 @@
 		.need_dac_fix = 1,
 		.input_mux = &alc883_capture_source,
 		.unsol_event = alc883_tagra_unsol_event,
-		.init_hook = alc883_tagra_automute,
+		.init_hook = alc883_tagra_init_hook,
 	},
 	[ALC883_TARGA_2ch_DIG] = {
 		.mixers = { alc883_tagra_2ch_mixer},
@@ -8862,7 +9187,25 @@
 		.channel_mode = alc883_3ST_2ch_modes,
 		.input_mux = &alc883_capture_source,
 		.unsol_event = alc883_tagra_unsol_event,
-		.init_hook = alc883_tagra_automute,
+		.init_hook = alc883_tagra_init_hook,
+	},
+	[ALC883_TARGA_8ch_DIG] = {
+		.mixers = { alc883_base_mixer, alc883_chmode_mixer },
+		.init_verbs = { alc883_init_verbs, alc880_gpio3_init_verbs,
+				alc883_tagra_verbs },
+		.num_dacs = ARRAY_SIZE(alc883_dac_nids),
+		.dac_nids = alc883_dac_nids,
+		.num_adc_nids = ARRAY_SIZE(alc883_adc_nids_rev),
+		.adc_nids = alc883_adc_nids_rev,
+		.capsrc_nids = alc883_capsrc_nids_rev,
+		.dig_out_nid = ALC883_DIGOUT_NID,
+		.dig_in_nid = ALC883_DIGIN_NID,
+		.num_channel_mode = ARRAY_SIZE(alc883_4ST_8ch_modes),
+		.channel_mode = alc883_4ST_8ch_modes,
+		.need_dac_fix = 1,
+		.input_mux = &alc883_capture_source,
+		.unsol_event = alc883_tagra_unsol_event,
+		.init_hook = alc883_tagra_init_hook,
 	},
 	[ALC883_ACER] = {
 		.mixers = { alc883_base_mixer },
@@ -8887,8 +9230,8 @@
 		.num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes),
 		.channel_mode = alc883_3ST_2ch_modes,
 		.input_mux = &alc883_capture_source,
-		.unsol_event = alc883_acer_aspire_unsol_event,
-		.init_hook = alc883_acer_aspire_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc883_acer_aspire_init_hook,
 	},
 	[ALC888_ACER_ASPIRE_4930G] = {
 		.mixers = { alc888_base_mixer,
@@ -8907,8 +9250,29 @@
 		.num_mux_defs =
 			ARRAY_SIZE(alc888_2_capture_sources),
 		.input_mux = alc888_2_capture_sources,
-		.unsol_event = alc888_acer_aspire_4930g_unsol_event,
-		.init_hook = alc888_acer_aspire_4930g_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc888_acer_aspire_4930g_init_hook,
+	},
+	[ALC888_ACER_ASPIRE_8930G] = {
+		.mixers = { alc888_base_mixer,
+				alc883_chmode_mixer },
+		.init_verbs = { alc883_init_verbs, alc880_gpio1_init_verbs,
+				alc889_acer_aspire_8930g_verbs },
+		.num_dacs = ARRAY_SIZE(alc883_dac_nids),
+		.dac_nids = alc883_dac_nids,
+		.num_adc_nids = ARRAY_SIZE(alc889_adc_nids),
+		.adc_nids = alc889_adc_nids,
+		.capsrc_nids = alc889_capsrc_nids,
+		.dig_out_nid = ALC883_DIGOUT_NID,
+		.num_channel_mode = ARRAY_SIZE(alc883_3ST_6ch_modes),
+		.channel_mode = alc883_3ST_6ch_modes,
+		.need_dac_fix = 1,
+		.const_channel_count = 6,
+		.num_mux_defs =
+			ARRAY_SIZE(alc889_capture_sources),
+		.input_mux = alc889_capture_sources,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc889_acer_aspire_8930g_init_hook,
 	},
 	[ALC883_MEDION] = {
 		.mixers = { alc883_fivestack_mixer,
@@ -8932,8 +9296,8 @@
 		.num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes),
 		.channel_mode = alc883_3ST_2ch_modes,
 		.input_mux = &alc883_capture_source,
-		.unsol_event = alc883_medion_md2_unsol_event,
-		.init_hook = alc883_medion_md2_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc883_medion_md2_init_hook,
 	},
 	[ALC883_LAPTOP_EAPD] = {
 		.mixers = { alc883_base_mixer },
@@ -8954,7 +9318,7 @@
 		.channel_mode = alc883_3ST_2ch_modes,
 		.input_mux = &alc883_capture_source,
 		.unsol_event = alc883_clevo_m720_unsol_event,
-		.init_hook = alc883_clevo_m720_automute,
+		.init_hook = alc883_clevo_m720_init_hook,
 	},
 	[ALC883_LENOVO_101E_2ch] = {
 		.mixers = { alc883_lenovo_101e_2ch_mixer},
@@ -8978,8 +9342,8 @@
 		.channel_mode = alc883_3ST_2ch_modes,
 		.need_dac_fix = 1,
 		.input_mux = &alc883_lenovo_nb0763_capture_source,
-		.unsol_event = alc883_medion_md2_unsol_event,
-		.init_hook = alc883_medion_md2_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc883_medion_md2_init_hook,
 	},
 	[ALC888_LENOVO_MS7195_DIG] = {
 		.mixers = { alc883_3ST_6ch_mixer, alc883_chmode_mixer },
@@ -9003,8 +9367,8 @@
 		.num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes),
 		.channel_mode = alc883_3ST_2ch_modes,
 		.input_mux = &alc883_capture_source,
-		.unsol_event = alc883_haier_w66_unsol_event,
-		.init_hook = alc883_haier_w66_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc883_haier_w66_init_hook,
 	},
 	[ALC888_3ST_HP] = {
 		.mixers = { alc883_3ST_6ch_mixer, alc883_chmode_mixer },
@@ -9015,8 +9379,8 @@
 		.channel_mode = alc888_3st_hp_modes,
 		.need_dac_fix = 1,
 		.input_mux = &alc883_capture_source,
-		.unsol_event = alc888_3st_hp_unsol_event,
-		.init_hook = alc888_3st_hp_front_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc888_3st_hp_init_hook,
 	},
 	[ALC888_6ST_DELL] = {
 		.mixers = { alc883_base_mixer, alc883_chmode_mixer },
@@ -9028,8 +9392,8 @@
 		.num_channel_mode = ARRAY_SIZE(alc883_sixstack_modes),
 		.channel_mode = alc883_sixstack_modes,
 		.input_mux = &alc883_capture_source,
-		.unsol_event = alc888_6st_dell_unsol_event,
-		.init_hook = alc888_6st_dell_front_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc888_6st_dell_init_hook,
 	},
 	[ALC883_MITAC] = {
 		.mixers = { alc883_mitac_mixer },
@@ -9039,8 +9403,8 @@
 		.num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes),
 		.channel_mode = alc883_3ST_2ch_modes,
 		.input_mux = &alc883_capture_source,
-		.unsol_event = alc883_mitac_unsol_event,
-		.init_hook = alc883_mitac_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc883_mitac_init_hook,
 	},
 	[ALC883_FUJITSU_PI2515] = {
 		.mixers = { alc883_2ch_fujitsu_pi2515_mixer },
@@ -9052,8 +9416,8 @@
 		.num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes),
 		.channel_mode = alc883_3ST_2ch_modes,
 		.input_mux = &alc883_fujitsu_pi2515_capture_source,
-		.unsol_event = alc883_2ch_fujitsu_pi2515_unsol_event,
-		.init_hook = alc883_2ch_fujitsu_pi2515_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc883_2ch_fujitsu_pi2515_init_hook,
 	},
 	[ALC888_FUJITSU_XA3530] = {
 		.mixers = { alc888_base_mixer, alc883_chmode_mixer },
@@ -9070,8 +9434,8 @@
 		.num_mux_defs =
 			ARRAY_SIZE(alc888_2_capture_sources),
 		.input_mux = alc888_2_capture_sources,
-		.unsol_event = alc888_fujitsu_xa3530_unsol_event,
-		.init_hook = alc888_fujitsu_xa3530_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc888_fujitsu_xa3530_init_hook,
 	},
 	[ALC888_LENOVO_SKY] = {
 		.mixers = { alc888_lenovo_sky_mixer, alc883_chmode_mixer },
@@ -9083,8 +9447,8 @@
 		.channel_mode = alc883_sixstack_modes,
 		.need_dac_fix = 1,
 		.input_mux = &alc883_lenovo_sky_capture_source,
-		.unsol_event = alc883_lenovo_sky_unsol_event,
-		.init_hook = alc888_lenovo_sky_front_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc888_lenovo_sky_init_hook,
 	},
 	[ALC888_ASUS_M90V] = {
 		.mixers = { alc883_3ST_6ch_mixer, alc883_chmode_mixer },
@@ -9112,7 +9476,7 @@
 		.channel_mode = alc883_3ST_2ch_modes,
 		.need_dac_fix = 1,
 		.input_mux = &alc883_asus_eee1601_capture_source,
-		.unsol_event = alc883_eee1601_unsol_event,
+		.unsol_event = alc_sku_unsol_event,
 		.init_hook = alc883_eee1601_inithook,
 	},
 	[ALC1200_ASUS_P5Q] = {
@@ -9127,6 +9491,32 @@
 		.channel_mode = alc883_sixstack_modes,
 		.input_mux = &alc883_capture_source,
 	},
+	[ALC889A_MB31] = {
+		.mixers = { alc889A_mb31_mixer, alc883_chmode_mixer},
+		.init_verbs = { alc883_init_verbs, alc889A_mb31_verbs,
+			alc880_gpio1_init_verbs },
+		.adc_nids = alc883_adc_nids,
+		.num_adc_nids = ARRAY_SIZE(alc883_adc_nids),
+		.dac_nids = alc883_dac_nids,
+		.num_dacs = ARRAY_SIZE(alc883_dac_nids),
+		.channel_mode = alc889A_mb31_6ch_modes,
+		.num_channel_mode = ARRAY_SIZE(alc889A_mb31_6ch_modes),
+		.input_mux = &alc889A_mb31_capture_source,
+		.dig_out_nid = ALC883_DIGOUT_NID,
+		.unsol_event = alc889A_mb31_unsol_event,
+		.init_hook = alc889A_mb31_automute,
+	},
+	[ALC883_SONY_VAIO_TT] = {
+		.mixers = { alc883_vaiott_mixer },
+		.init_verbs = { alc883_init_verbs, alc883_vaiott_verbs },
+		.num_dacs = ARRAY_SIZE(alc883_dac_nids),
+		.dac_nids = alc883_dac_nids,
+		.num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes),
+		.channel_mode = alc883_3ST_2ch_modes,
+		.input_mux = &alc883_capture_source,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc883_vaiott_init_hook,
+	},
 };
 
 
@@ -9155,7 +9545,6 @@
 	struct alc_spec *spec = codec->spec;
 	int i;
 
-	alc_subsystem_id(codec, 0x15, 0x1b, 0x14);
 	for (i = 0; i <= HDA_SIDE; i++) {
 		hda_nid_t nid = spec->autocfg.line_out_pins[i];
 		int pin_type = get_pin_type(spec->autocfg.line_out_type);
@@ -9273,10 +9662,18 @@
 	board_config = snd_hda_check_board_config(codec, ALC883_MODEL_LAST,
 						  alc883_models,
 						  alc883_cfg_tbl);
-	if (board_config < 0) {
-		printk(KERN_INFO "hda_codec: Unknown model for ALC883, "
-		       "trying auto-probe from BIOS...\n");
-		board_config = ALC883_AUTO;
+	if (board_config < 0 || board_config >= ALC883_MODEL_LAST) {
+		/* Pick up systems that don't supply PCI SSID */
+		switch (codec->subsystem_id) {
+		case 0x106b3600: /* Macbook 3.1 */
+			board_config = ALC889A_MB31;
+			break;
+		default:
+			printk(KERN_INFO
+				"hda_codec: Unknown model for %s, trying "
+				"auto-probe from BIOS...\n", codec->chip_name);
+			board_config = ALC883_AUTO;
+		}
 	}
 
 	if (board_config == ALC883_AUTO) {
@@ -9304,13 +9701,6 @@
 
 	switch (codec->vendor_id) {
 	case 0x10ec0888:
-		if (codec->revision_id == 0x100101) {
-			spec->stream_name_analog = "ALC1200 Analog";
-			spec->stream_name_digital = "ALC1200 Digital";
-		} else {
-			spec->stream_name_analog = "ALC888 Analog";
-			spec->stream_name_digital = "ALC888 Digital";
-		}
 		if (!spec->num_adc_nids) {
 			spec->num_adc_nids = ARRAY_SIZE(alc883_adc_nids);
 			spec->adc_nids = alc883_adc_nids;
@@ -9318,10 +9708,9 @@
 		if (!spec->capsrc_nids)
 			spec->capsrc_nids = alc883_capsrc_nids;
 		spec->capture_style = CAPT_MIX; /* matrix-style capture */
+		spec->init_amp = ALC_INIT_DEFAULT; /* always initialize */
 		break;
 	case 0x10ec0889:
-		spec->stream_name_analog = "ALC889 Analog";
-		spec->stream_name_digital = "ALC889 Digital";
 		if (!spec->num_adc_nids) {
 			spec->num_adc_nids = ARRAY_SIZE(alc889_adc_nids);
 			spec->adc_nids = alc889_adc_nids;
@@ -9332,8 +9721,6 @@
 							capture */
 		break;
 	default:
-		spec->stream_name_analog = "ALC883 Analog";
-		spec->stream_name_digital = "ALC883 Digital";
 		if (!spec->num_adc_nids) {
 			spec->num_adc_nids = ARRAY_SIZE(alc883_adc_nids);
 			spec->adc_nids = alc883_adc_nids;
@@ -9413,24 +9800,6 @@
 	{ } /* end */
 };
 
-static struct snd_kcontrol_new alc262_hippo1_mixer[] = {
-	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
-	HDA_CODEC_MUTE("Front Playback Switch", 0x14, 0x0, HDA_OUTPUT),
-	HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x04, HDA_INPUT),
-	HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
-	HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
-	HDA_CODEC_MUTE("Line Playback Switch", 0x0b, 0x02, HDA_INPUT),
-	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
-	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
-	HDA_CODEC_VOLUME("Mic Boost", 0x18, 0, HDA_INPUT),
-	HDA_CODEC_VOLUME("Front Mic Playback Volume", 0x0b, 0x01, HDA_INPUT),
-	HDA_CODEC_MUTE("Front Mic Playback Switch", 0x0b, 0x01, HDA_INPUT),
-	HDA_CODEC_VOLUME("Front Mic Boost", 0x19, 0, HDA_INPUT),
-	/*HDA_CODEC_VOLUME("Headphone Playback Volume", 0x0D, 0x0, HDA_OUTPUT),*/
-	HDA_CODEC_MUTE("Headphone Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
-	{ } /* end */
-};
-
 /* update HP, line and mono-out pins according to the master switch */
 static void alc262_hp_master_update(struct hda_codec *codec)
 {
@@ -9486,14 +9855,7 @@
 	alc262_hp_wildwest_automute(codec);
 }
 
-static int alc262_hp_master_sw_get(struct snd_kcontrol *kcontrol,
-				   struct snd_ctl_elem_value *ucontrol)
-{
-	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-	struct alc_spec *spec = codec->spec;
-	*ucontrol->value.integer.value = spec->master_sw;
-	return 0;
-}
+#define alc262_hp_master_sw_get		alc260_hp_master_sw_get
 
 static int alc262_hp_master_sw_put(struct snd_kcontrol *kcontrol,
 				   struct snd_ctl_elem_value *ucontrol)
@@ -9509,14 +9871,17 @@
 	return 1;
 }
 
+#define ALC262_HP_MASTER_SWITCH					\
+	{							\
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,		\
+		.name = "Master Playback Switch",		\
+		.info = snd_ctl_boolean_mono_info,		\
+		.get = alc262_hp_master_sw_get,			\
+		.put = alc262_hp_master_sw_put,			\
+	}
+
 static struct snd_kcontrol_new alc262_HP_BPC_mixer[] = {
-	{
-		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-		.name = "Master Playback Switch",
-		.info = snd_ctl_boolean_mono_info,
-		.get = alc262_hp_master_sw_get,
-		.put = alc262_hp_master_sw_put,
-	},
+	ALC262_HP_MASTER_SWITCH,
 	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
 	HDA_CODEC_MUTE("Front Playback Switch", 0x15, 0x0, HDA_OUTPUT),
 	HDA_CODEC_MUTE("Headphone Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
@@ -9540,13 +9905,7 @@
 };
 
 static struct snd_kcontrol_new alc262_HP_BPC_WildWest_mixer[] = {
-	{
-		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-		.name = "Master Playback Switch",
-		.info = snd_ctl_boolean_mono_info,
-		.get = alc262_hp_master_sw_get,
-		.put = alc262_hp_master_sw_put,
-	},
+	ALC262_HP_MASTER_SWITCH,
 	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
 	HDA_CODEC_MUTE("Front Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
 	HDA_CODEC_VOLUME("Headphone Playback Volume", 0x0d, 0x0, HDA_OUTPUT),
@@ -9573,32 +9932,13 @@
 };
 
 /* mute/unmute internal speaker according to the hp jack and mute state */
-static void alc262_hp_t5735_automute(struct hda_codec *codec, int force)
+static void alc262_hp_t5735_init_hook(struct hda_codec *codec)
 {
 	struct alc_spec *spec = codec->spec;
 
-	if (force || !spec->sense_updated) {
-		unsigned int present;
-		present = snd_hda_codec_read(codec, 0x15, 0,
-					     AC_VERB_GET_PIN_SENSE, 0);
-		spec->jack_present = (present & AC_PINSENSE_PRESENCE) != 0;
-		spec->sense_updated = 1;
-	}
-	snd_hda_codec_amp_stereo(codec, 0x0c, HDA_OUTPUT, 0, HDA_AMP_MUTE,
-				 spec->jack_present ? HDA_AMP_MUTE : 0);
-}
-
-static void alc262_hp_t5735_unsol_event(struct hda_codec *codec,
-					unsigned int res)
-{
-	if ((res >> 26) != ALC880_HP_EVENT)
-		return;
-	alc262_hp_t5735_automute(codec, 1);
-}
-
-static void alc262_hp_t5735_init_hook(struct hda_codec *codec)
-{
-	alc262_hp_t5735_automute(codec, 1);
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x0c; /* HACK: not actually a pin */
+	alc_automute_amp(codec);
 }
 
 static struct snd_kcontrol_new alc262_hp_t5735_mixer[] = {
@@ -9651,46 +9991,132 @@
 	},
 };
 
-/* bind hp and internal speaker mute (with plug check) */
-static int alc262_sony_master_sw_put(struct snd_kcontrol *kcontrol,
-				     struct snd_ctl_elem_value *ucontrol)
+/* bind hp and internal speaker mute (with plug check) as master switch */
+static void alc262_hippo_master_update(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+	hda_nid_t hp_nid = spec->autocfg.hp_pins[0];
+	hda_nid_t line_nid = spec->autocfg.line_out_pins[0];
+	hda_nid_t speaker_nid = spec->autocfg.speaker_pins[0];
+	unsigned int mute;
+
+	/* HP */
+	mute = spec->master_sw ? 0 : HDA_AMP_MUTE;
+	snd_hda_codec_amp_stereo(codec, hp_nid, HDA_OUTPUT, 0,
+				 HDA_AMP_MUTE, mute);
+	/* mute internal speaker per jack sense */
+	if (spec->jack_present)
+		mute = HDA_AMP_MUTE;
+	if (line_nid)
+		snd_hda_codec_amp_stereo(codec, line_nid, HDA_OUTPUT, 0,
+					 HDA_AMP_MUTE, mute);
+	if (speaker_nid && speaker_nid != line_nid)
+		snd_hda_codec_amp_stereo(codec, speaker_nid, HDA_OUTPUT, 0,
+					 HDA_AMP_MUTE, mute);
+}
+
+#define alc262_hippo_master_sw_get	alc262_hp_master_sw_get
+
+static int alc262_hippo_master_sw_put(struct snd_kcontrol *kcontrol,
+				      struct snd_ctl_elem_value *ucontrol)
 {
 	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-	long *valp = ucontrol->value.integer.value;
-	int change;
+	struct alc_spec *spec = codec->spec;
+	int val = !!*ucontrol->value.integer.value;
 
-	/* change hp mute */
-	change = snd_hda_codec_amp_update(codec, 0x15, 0, HDA_OUTPUT, 0,
-					  HDA_AMP_MUTE,
-					  valp[0] ? 0 : HDA_AMP_MUTE);
-	change |= snd_hda_codec_amp_update(codec, 0x15, 1, HDA_OUTPUT, 0,
-					   HDA_AMP_MUTE,
-					   valp[1] ? 0 : HDA_AMP_MUTE);
-	if (change) {
-		/* change speaker according to HP jack state */
-		struct alc_spec *spec = codec->spec;
-		unsigned int mute;
-		if (spec->jack_present)
-			mute = HDA_AMP_MUTE;
-		else
-			mute = snd_hda_codec_amp_read(codec, 0x15, 0,
-						      HDA_OUTPUT, 0);
-		snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-	}
-	return change;
+	if (val == spec->master_sw)
+		return 0;
+	spec->master_sw = val;
+	alc262_hippo_master_update(codec);
+	return 1;
 }
 
+#define ALC262_HIPPO_MASTER_SWITCH				\
+	{							\
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,		\
+		.name = "Master Playback Switch",		\
+		.info = snd_ctl_boolean_mono_info,		\
+		.get = alc262_hippo_master_sw_get,		\
+		.put = alc262_hippo_master_sw_put,		\
+	}
+
+static struct snd_kcontrol_new alc262_hippo_mixer[] = {
+	ALC262_HIPPO_MASTER_SWITCH,
+	HDA_CODEC_VOLUME("Speaker Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x04, HDA_INPUT),
+	HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
+	HDA_CODEC_MUTE("Line Playback Switch", 0x0b, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
+	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Boost", 0x18, 0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Front Mic Playback Volume", 0x0b, 0x01, HDA_INPUT),
+	HDA_CODEC_MUTE("Front Mic Playback Switch", 0x0b, 0x01, HDA_INPUT),
+	HDA_CODEC_VOLUME("Front Mic Boost", 0x19, 0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Headphone Playback Volume", 0x0d, 0x0, HDA_OUTPUT),
+	{ } /* end */
+};
+
+static struct snd_kcontrol_new alc262_hippo1_mixer[] = {
+	HDA_CODEC_VOLUME("Master Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+	ALC262_HIPPO_MASTER_SWITCH,
+	HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x04, HDA_INPUT),
+	HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
+	HDA_CODEC_MUTE("Line Playback Switch", 0x0b, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
+	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Boost", 0x18, 0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Front Mic Playback Volume", 0x0b, 0x01, HDA_INPUT),
+	HDA_CODEC_MUTE("Front Mic Playback Switch", 0x0b, 0x01, HDA_INPUT),
+	HDA_CODEC_VOLUME("Front Mic Boost", 0x19, 0, HDA_INPUT),
+	{ } /* end */
+};
+
+/* mute/unmute internal speaker according to the hp jack and mute state */
+static void alc262_hippo_automute(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+	hda_nid_t hp_nid = spec->autocfg.hp_pins[0];
+	unsigned int present;
+
+	/* need to execute and sync at first */
+	snd_hda_codec_read(codec, hp_nid, 0, AC_VERB_SET_PIN_SENSE, 0);
+	present = snd_hda_codec_read(codec, hp_nid, 0,
+				     AC_VERB_GET_PIN_SENSE, 0);
+	spec->jack_present = (present & 0x80000000) != 0;
+	alc262_hippo_master_update(codec);
+}
+
+static void alc262_hippo_unsol_event(struct hda_codec *codec, unsigned int res)
+{
+	if ((res >> 26) != ALC880_HP_EVENT)
+		return;
+	alc262_hippo_automute(codec);
+}
+
+static void alc262_hippo_init_hook(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc262_hippo_automute(codec);
+}
+
+static void alc262_hippo1_init_hook(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc262_hippo_automute(codec);
+}
+
+
 static struct snd_kcontrol_new alc262_sony_mixer[] = {
 	HDA_CODEC_VOLUME("Master Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
-	{
-		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-		.name = "Master Playback Switch",
-		.info = snd_hda_mixer_amp_switch_info,
-		.get = snd_hda_mixer_amp_switch_get,
-		.put = alc262_sony_master_sw_put,
-		.private_value = HDA_COMPOSE_AMP_VAL(0x15, 3, 0, HDA_OUTPUT),
-	},
+	ALC262_HIPPO_MASTER_SWITCH,
 	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
 	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
 	HDA_CODEC_VOLUME("ATAPI Mic Playback Volume", 0x0b, 0x01, HDA_INPUT),
@@ -9699,8 +10125,8 @@
 };
 
 static struct snd_kcontrol_new alc262_benq_t31_mixer[] = {
-	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
-	HDA_CODEC_MUTE("Front Playback Switch", 0x14, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("Master Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+	ALC262_HIPPO_MASTER_SWITCH,
 	HDA_CODEC_MUTE("Headphone Playback Switch", 0x15, 0x0, HDA_OUTPUT),
 	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
 	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
@@ -9741,34 +10167,15 @@
 };
 
 /* unsolicited event for HP jack sensing */
-static void alc262_tyan_automute(struct hda_codec *codec)
+static void alc262_tyan_init_hook(struct hda_codec *codec)
 {
-	unsigned int mute;
-	unsigned int present;
+	struct alc_spec *spec = codec->spec;
 
-	snd_hda_codec_read(codec, 0x1b, 0, AC_VERB_SET_PIN_SENSE, 0);
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0);
-	present = (present & 0x80000000) != 0;
-	if (present) {
-		/* mute line output on ATX panel */
-		snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, HDA_AMP_MUTE);
-	} else {
-		/* unmute line output if necessary */
-		mute = snd_hda_codec_amp_read(codec, 0x1b, 0, HDA_OUTPUT, 0);
-		snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-	}
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x15;
+	alc_automute_amp(codec);
 }
 
-static void alc262_tyan_unsol_event(struct hda_codec *codec,
-				       unsigned int res)
-{
-	if ((res >> 26) != ALC880_HP_EVENT)
-		return;
-	alc262_tyan_automute(codec);
-}
 
 #define alc262_capture_mixer		alc882_capture_mixer
 #define alc262_capture_alt_mixer	alc882_capture_alt_mixer
@@ -9923,99 +10330,25 @@
 				AC_VERB_SET_CONNECT_SEL, present ? 0x0 : 0x09);
 }
 
-/* toggle speaker-output according to the hp-jack state */
-static void alc262_toshiba_s06_speaker_automute(struct hda_codec *codec)
-{
-	unsigned int present;
-	unsigned char bits;
-
-	present = snd_hda_codec_read(codec, 0x15, 0,
-					AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? 0 : PIN_OUT;
-	snd_hda_codec_write(codec, 0x14, 0,
-					AC_VERB_SET_PIN_WIDGET_CONTROL, bits);
-}
-
-
 
 /* unsolicited event for HP jack sensing */
 static void alc262_toshiba_s06_unsol_event(struct hda_codec *codec,
 				       unsigned int res)
 {
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc262_toshiba_s06_speaker_automute(codec);
 	if ((res >> 26) == ALC880_MIC_EVENT)
 		alc262_dmic_automute(codec);
-
+	else
+		alc_sku_unsol_event(codec, res);
 }
 
 static void alc262_toshiba_s06_init_hook(struct hda_codec *codec)
 {
-	alc262_toshiba_s06_speaker_automute(codec);
-	alc262_dmic_automute(codec);
-}
-
-/* mute/unmute internal speaker according to the hp jack and mute state */
-static void alc262_hippo_automute(struct hda_codec *codec)
-{
 	struct alc_spec *spec = codec->spec;
-	unsigned int mute;
-	unsigned int present;
 
-	/* need to execute and sync at first */
-	snd_hda_codec_read(codec, 0x15, 0, AC_VERB_SET_PIN_SENSE, 0);
-	present = snd_hda_codec_read(codec, 0x15, 0,
-				     AC_VERB_GET_PIN_SENSE, 0);
-	spec->jack_present = (present & 0x80000000) != 0;
-	if (spec->jack_present) {
-		/* mute internal speaker */
-		snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, HDA_AMP_MUTE);
-	} else {
-		/* unmute internal speaker if necessary */
-		mute = snd_hda_codec_amp_read(codec, 0x15, 0, HDA_OUTPUT, 0);
-		snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-	}
-}
-
-/* unsolicited event for HP jack sensing */
-static void alc262_hippo_unsol_event(struct hda_codec *codec,
-				       unsigned int res)
-{
-	if ((res >> 26) != ALC880_HP_EVENT)
-		return;
-	alc262_hippo_automute(codec);
-}
-
-static void alc262_hippo1_automute(struct hda_codec *codec)
-{
-	unsigned int mute;
-	unsigned int present;
-
-	snd_hda_codec_read(codec, 0x1b, 0, AC_VERB_SET_PIN_SENSE, 0);
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0);
-	present = (present & 0x80000000) != 0;
-	if (present) {
-		/* mute internal speaker */
-		snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, HDA_AMP_MUTE);
-	} else {
-		/* unmute internal speaker if necessary */
-		mute = snd_hda_codec_amp_read(codec, 0x1b, 0, HDA_OUTPUT, 0);
-		snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-	}
-}
-
-/* unsolicited event for HP jack sensing */
-static void alc262_hippo1_unsol_event(struct hda_codec *codec,
-				       unsigned int res)
-{
-	if ((res >> 26) != ALC880_HP_EVENT)
-		return;
-	alc262_hippo1_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_pin(codec);
+	alc262_dmic_automute(codec);
 }
 
 /*
@@ -10285,14 +10618,7 @@
 
 static struct snd_kcontrol_new alc262_toshiba_rx1_mixer[] = {
 	HDA_BIND_VOL("Master Playback Volume", &alc262_fujitsu_bind_master_vol),
-	{
-		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-		.name = "Master Playback Switch",
-		.info = snd_hda_mixer_amp_switch_info,
-		.get = snd_hda_mixer_amp_switch_get,
-		.put = alc262_sony_master_sw_put,
-		.private_value = HDA_COMPOSE_AMP_VAL(0x15, 3, 0, HDA_OUTPUT),
-	},
+	ALC262_HIPPO_MASTER_SWITCH,
 	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
 	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
 	HDA_CODEC_VOLUME("Mic Boost", 0x18, 0, HDA_INPUT),
@@ -10639,31 +10965,46 @@
 	{0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
 	{0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
 
-	{0x14, AC_VERB_SET_AMP_GAIN_MUTE, 0x7023 },
+	{0x14, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000 },
 	{0x18, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000 },
         {0x19, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000 },
-	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, 0x7023 },
+	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000 },
 	{0x1c, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000 },
 	{0x1d, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000 },
 
 
 	/* FIXME: use matrix-type input source selection */
-	/* Mixer elements: 0x18, 19, 1a, 1b, 1c, 1d, 14, 15, 16, 17, 0b */
-	/* Input mixer1: unmute Mic, F-Mic, Line, CD inputs */
+	/* Mixer elements: 0x18, 19, 1a, 1b, 1c, 1d, 14, 15, 0b, 12 */
+	/* Input mixer1: only unmute Mic */
 	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x03 << 8))},
-	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x02 << 8))},
-	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x04 << 8))},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x01 << 8))},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x05 << 8))},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x06 << 8))},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x07 << 8))},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x08 << 8))},
 	/* Input mixer2 */
 	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x03 << 8))},
-	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x02 << 8))},
-	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x04 << 8))},
+	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x01 << 8))},
+	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
+	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
+	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
+	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x05 << 8))},
+	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x06 << 8))},
+	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x07 << 8))},
+	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x08 << 8))},
 	/* Input mixer3 */
 	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x03 << 8))},
-	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x02 << 8))},
-	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x04 << 8))},
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x01 << 8))},
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x05 << 8))},
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x06 << 8))},
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x07 << 8))},
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x08 << 8))},
 
 	{0x1b, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
 
@@ -10843,6 +11184,8 @@
 	if (err < 0)
 		return err;
 
+	alc_ssid_check(codec, 0x15, 0x14, 0x1b);
+
 	return 1;
 }
 
@@ -10945,7 +11288,7 @@
 		.input_mux = &alc262_capture_source,
 	},
 	[ALC262_HIPPO] = {
-		.mixers = { alc262_base_mixer },
+		.mixers = { alc262_hippo_mixer },
 		.init_verbs = { alc262_init_verbs, alc262_hippo_unsol_verbs},
 		.num_dacs = ARRAY_SIZE(alc262_dac_nids),
 		.dac_nids = alc262_dac_nids,
@@ -10955,7 +11298,7 @@
 		.channel_mode = alc262_modes,
 		.input_mux = &alc262_capture_source,
 		.unsol_event = alc262_hippo_unsol_event,
-		.init_hook = alc262_hippo_automute,
+		.init_hook = alc262_hippo_init_hook,
 	},
 	[ALC262_HIPPO_1] = {
 		.mixers = { alc262_hippo1_mixer },
@@ -10967,8 +11310,8 @@
 		.num_channel_mode = ARRAY_SIZE(alc262_modes),
 		.channel_mode = alc262_modes,
 		.input_mux = &alc262_capture_source,
-		.unsol_event = alc262_hippo1_unsol_event,
-		.init_hook = alc262_hippo1_automute,
+		.unsol_event = alc262_hippo_unsol_event,
+		.init_hook = alc262_hippo1_init_hook,
 	},
 	[ALC262_FUJITSU] = {
 		.mixers = { alc262_fujitsu_mixer },
@@ -11030,7 +11373,7 @@
 		.num_channel_mode = ARRAY_SIZE(alc262_modes),
 		.channel_mode = alc262_modes,
 		.input_mux = &alc262_capture_source,
-		.unsol_event = alc262_hp_t5735_unsol_event,
+		.unsol_event = alc_automute_amp_unsol_event,
 		.init_hook = alc262_hp_t5735_init_hook,
 	},
 	[ALC262_HP_RP5700] = {
@@ -11062,7 +11405,7 @@
 		.channel_mode = alc262_modes,
 		.input_mux = &alc262_capture_source,
 		.unsol_event = alc262_hippo_unsol_event,
-		.init_hook = alc262_hippo_automute,
+		.init_hook = alc262_hippo_init_hook,
 	},
 	[ALC262_BENQ_T31] = {
 		.mixers = { alc262_benq_t31_mixer },
@@ -11074,7 +11417,7 @@
 		.channel_mode = alc262_modes,
 		.input_mux = &alc262_capture_source,
 		.unsol_event = alc262_hippo_unsol_event,
-		.init_hook = alc262_hippo_automute,
+		.init_hook = alc262_hippo_init_hook,
 	},
 	[ALC262_ULTRA] = {
 		.mixers = { alc262_ultra_mixer },
@@ -11139,7 +11482,7 @@
 		.channel_mode = alc262_modes,
 		.input_mux = &alc262_capture_source,
 		.unsol_event = alc262_hippo_unsol_event,
-		.init_hook = alc262_hippo_automute,
+		.init_hook = alc262_hippo_init_hook,
 	},
 	[ALC262_TYAN] = {
 		.mixers = { alc262_tyan_mixer },
@@ -11151,8 +11494,8 @@
 		.num_channel_mode = ARRAY_SIZE(alc262_modes),
 		.channel_mode = alc262_modes,
 		.input_mux = &alc262_capture_source,
-		.unsol_event = alc262_tyan_unsol_event,
-		.init_hook = alc262_tyan_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc262_tyan_init_hook,
 	},
 };
 
@@ -11187,8 +11530,8 @@
 						  alc262_cfg_tbl);
 
 	if (board_config < 0) {
-		printk(KERN_INFO "hda_codec: Unknown model for ALC262, "
-		       "trying auto-probe from BIOS...\n");
+		printk(KERN_INFO "hda_codec: Unknown model for %s, "
+		       "trying auto-probe from BIOS...\n", codec->chip_name);
 		board_config = ALC262_AUTO;
 	}
 
@@ -11217,11 +11560,9 @@
 	if (board_config != ALC262_AUTO)
 		setup_preset(spec, &alc262_presets[board_config]);
 
-	spec->stream_name_analog = "ALC262 Analog";
 	spec->stream_analog_playback = &alc262_pcm_analog_playback;
 	spec->stream_analog_capture = &alc262_pcm_analog_capture;
 
-	spec->stream_name_digital = "ALC262 Digital";
 	spec->stream_digital_playback = &alc262_pcm_digital_playback;
 	spec->stream_digital_capture = &alc262_pcm_digital_capture;
 
@@ -11296,6 +11637,17 @@
 	{ }
 };
 
+static struct snd_kcontrol_new alc268_toshiba_mixer[] = {
+	/* output mixer control */
+	HDA_CODEC_VOLUME("Front Playback Volume", 0x2, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("Headphone Playback Volume", 0x3, 0x0, HDA_OUTPUT),
+	ALC262_HIPPO_MASTER_SWITCH,
+	HDA_CODEC_VOLUME("Mic Boost", 0x18, 0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Front Mic Boost", 0x19, 0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line In Boost", 0x1a, 0, HDA_INPUT),
+	{ }
+};
+
 /* bind Beep switches of both NID 0x0f and 0x10 */
 static struct hda_bind_ctls alc268_bind_beep_sw = {
 	.ops = &snd_hda_bind_sw,
@@ -11319,8 +11671,6 @@
 };
 
 /* Toshiba specific */
-#define alc268_toshiba_automute	alc262_hippo_automute
-
 static struct hda_verb alc268_toshiba_verbs[] = {
 	{0x15, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
 	{ } /* end */
@@ -11456,13 +11806,8 @@
 };
 
 /* unsolicited event for HP jack sensing */
-static void alc268_toshiba_unsol_event(struct hda_codec *codec,
-				       unsigned int res)
-{
-	if ((res >> 26) != ALC880_HP_EVENT)
-		return;
-	alc268_toshiba_automute(codec);
-}
+#define alc268_toshiba_unsol_event	alc262_hippo_unsol_event
+#define alc268_toshiba_init_hook	alc262_hippo_init_hook
 
 static void alc268_acer_unsol_event(struct hda_codec *codec,
 				       unsigned int res)
@@ -11537,30 +11882,15 @@
 };
 
 /* mute/unmute internal speaker according to the hp jack and mute state */
-static void alc268_dell_automute(struct hda_codec *codec)
+static void alc268_dell_init_hook(struct hda_codec *codec)
 {
-	unsigned int present;
-	unsigned int mute;
+	struct alc_spec *spec = codec->spec;
 
-	present = snd_hda_codec_read(codec, 0x15, 0, AC_VERB_GET_PIN_SENSE, 0);
-	if (present & 0x80000000)
-		mute = HDA_AMP_MUTE;
-	else
-		mute = snd_hda_codec_amp_read(codec, 0x15, 0, HDA_OUTPUT, 0);
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, mute);
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_pin(codec);
 }
 
-static void alc268_dell_unsol_event(struct hda_codec *codec,
-				    unsigned int res)
-{
-	if ((res >> 26) != ALC880_HP_EVENT)
-		return;
-	alc268_dell_automute(codec);
-}
-
-#define alc268_dell_init_hook	alc268_dell_automute
-
 static struct snd_kcontrol_new alc267_quanta_il1_mixer[] = {
 	HDA_CODEC_VOLUME("Speaker Playback Volume", 0x2, 0x0, HDA_OUTPUT),
 	HDA_CODEC_MUTE("Speaker Playback Switch", 0x14, 0x0, HDA_OUTPUT),
@@ -11579,16 +11909,6 @@
 	{ }
 };
 
-static void alc267_quanta_il1_hp_automute(struct hda_codec *codec)
-{
-	unsigned int present;
-
-	present = snd_hda_codec_read(codec, 0x15, 0, AC_VERB_GET_PIN_SENSE, 0)
-		& AC_PINSENSE_PRESENCE;
-	snd_hda_codec_write(codec, 0x14, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
-			    present ? 0 : PIN_OUT);
-}
-
 static void alc267_quanta_il1_mic_automute(struct hda_codec *codec)
 {
 	unsigned int present;
@@ -11600,9 +11920,13 @@
 			    present ? 0x00 : 0x01);
 }
 
-static void alc267_quanta_il1_automute(struct hda_codec *codec)
+static void alc267_quanta_il1_init_hook(struct hda_codec *codec)
 {
-	alc267_quanta_il1_hp_automute(codec);
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_pin(codec);
 	alc267_quanta_il1_mic_automute(codec);
 }
 
@@ -11610,12 +11934,12 @@
 					   unsigned int res)
 {
 	switch (res >> 26) {
-	case ALC880_HP_EVENT:
-		alc267_quanta_il1_hp_automute(codec);
-		break;
 	case ALC880_MIC_EVENT:
 		alc267_quanta_il1_mic_automute(codec);
 		break;
+	default:
+		alc_sku_unsol_event(codec, res);
+		break;
 	}
 }
 
@@ -12063,16 +12387,16 @@
 						ALC268_ACER_ASPIRE_ONE),
 	SND_PCI_QUIRK(0x1028, 0x0253, "Dell OEM", ALC268_DELL),
 	SND_PCI_QUIRK(0x1028, 0x02b0, "Dell Inspiron Mini9", ALC268_DELL),
-	SND_PCI_QUIRK(0x103c, 0x30cc, "TOSHIBA", ALC268_TOSHIBA),
-	SND_PCI_QUIRK(0x103c, 0x30f1, "HP TX25xx series", ALC268_TOSHIBA),
+	SND_PCI_QUIRK_MASK(0x103c, 0xff00, 0x3000, "HP TX25xx series",
+			   ALC268_TOSHIBA),
 	SND_PCI_QUIRK(0x1043, 0x1205, "ASUS W7J", ALC268_3ST),
-	SND_PCI_QUIRK(0x1179, 0xff10, "TOSHIBA A205", ALC268_TOSHIBA),
-	SND_PCI_QUIRK(0x1179, 0xff50, "TOSHIBA A305", ALC268_TOSHIBA),
-	SND_PCI_QUIRK(0x1179, 0xff64, "TOSHIBA L305", ALC268_TOSHIBA),
+	SND_PCI_QUIRK(0x1170, 0x0040, "ZEPTO", ALC268_ZEPTO),
+	SND_PCI_QUIRK_MASK(0x1179, 0xff00, 0xff00, "TOSHIBA A/Lx05",
+			   ALC268_TOSHIBA),
 	SND_PCI_QUIRK(0x14c0, 0x0025, "COMPAL IFL90/JFL-92", ALC268_TOSHIBA),
 	SND_PCI_QUIRK(0x152d, 0x0763, "Diverse (CPR2000)", ALC268_ACER),
 	SND_PCI_QUIRK(0x152d, 0x0771, "Quanta IL1", ALC267_QUANTA_IL1),
-	SND_PCI_QUIRK(0x1170, 0x0040, "ZEPTO", ALC268_ZEPTO),
+	SND_PCI_QUIRK(0x1854, 0x1775, "LG R510", ALC268_DELL),
 	{}
 };
 
@@ -12090,7 +12414,7 @@
 		.channel_mode = alc268_modes,
 		.input_mux = &alc268_capture_source,
 		.unsol_event = alc267_quanta_il1_unsol_event,
-		.init_hook = alc267_quanta_il1_automute,
+		.init_hook = alc267_quanta_il1_init_hook,
 	},
 	[ALC268_3ST] = {
 		.mixers = { alc268_base_mixer, alc268_capture_alt_mixer,
@@ -12108,7 +12432,7 @@
 		.input_mux = &alc268_capture_source,
 	},
 	[ALC268_TOSHIBA] = {
-		.mixers = { alc268_base_mixer, alc268_capture_alt_mixer,
+		.mixers = { alc268_toshiba_mixer, alc268_capture_alt_mixer,
 			    alc268_beep_mixer },
 		.init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
 				alc268_toshiba_verbs },
@@ -12122,7 +12446,7 @@
 		.channel_mode = alc268_modes,
 		.input_mux = &alc268_capture_source,
 		.unsol_event = alc268_toshiba_unsol_event,
-		.init_hook = alc268_toshiba_automute,
+		.init_hook = alc268_toshiba_init_hook,
 	},
 	[ALC268_ACER] = {
 		.mixers = { alc268_acer_mixer, alc268_capture_alt_mixer,
@@ -12185,7 +12509,7 @@
 		.hp_nid = 0x02,
 		.num_channel_mode = ARRAY_SIZE(alc268_modes),
 		.channel_mode = alc268_modes,
-		.unsol_event = alc268_dell_unsol_event,
+		.unsol_event = alc_sku_unsol_event,
 		.init_hook = alc268_dell_init_hook,
 		.input_mux = &alc268_capture_source,
 	},
@@ -12205,7 +12529,7 @@
 		.channel_mode = alc268_modes,
 		.input_mux = &alc268_capture_source,
 		.unsol_event = alc268_toshiba_unsol_event,
-		.init_hook = alc268_toshiba_automute
+		.init_hook = alc268_toshiba_init_hook
 	},
 #ifdef CONFIG_SND_DEBUG
 	[ALC268_TEST] = {
@@ -12243,8 +12567,8 @@
 						  alc268_cfg_tbl);
 
 	if (board_config < 0 || board_config >= ALC268_MODEL_LAST) {
-		printk(KERN_INFO "hda_codec: Unknown model for ALC268, "
-		       "trying auto-probe from BIOS...\n");
+		printk(KERN_INFO "hda_codec: Unknown model for %s, "
+		       "trying auto-probe from BIOS...\n", codec->chip_name);
 		board_config = ALC268_AUTO;
 	}
 
@@ -12265,14 +12589,6 @@
 	if (board_config != ALC268_AUTO)
 		setup_preset(spec, &alc268_presets[board_config]);
 
-	if (codec->vendor_id == 0x10ec0267) {
-		spec->stream_name_analog = "ALC267 Analog";
-		spec->stream_name_digital = "ALC267 Digital";
-	} else {
-		spec->stream_name_analog = "ALC268 Analog";
-		spec->stream_name_digital = "ALC268 Digital";
-	}
-
 	spec->stream_analog_playback = &alc268_pcm_analog_playback;
 	spec->stream_analog_capture = &alc268_pcm_analog_capture;
 	spec->stream_analog_alt_capture = &alc268_pcm_analog_alt_capture;
@@ -13099,8 +13415,8 @@
 						  alc269_cfg_tbl);
 
 	if (board_config < 0) {
-		printk(KERN_INFO "hda_codec: Unknown model for ALC269, "
-		       "trying auto-probe from BIOS...\n");
+		printk(KERN_INFO "hda_codec: Unknown model for %s, "
+		       "trying auto-probe from BIOS...\n", codec->chip_name);
 		board_config = ALC269_AUTO;
 	}
 
@@ -13127,7 +13443,6 @@
 	if (board_config != ALC269_AUTO)
 		setup_preset(spec, &alc269_presets[board_config]);
 
-	spec->stream_name_analog = "ALC269 Analog";
 	if (codec->subsystem_id == 0x17aa3bf8) {
 		/* Due to a hardware problem on Lenovo Ideadpad, we need to
 		 * fix the sample rate of analog I/O to 44.1kHz
@@ -13138,7 +13453,6 @@
 		spec->stream_analog_playback = &alc269_pcm_analog_playback;
 		spec->stream_analog_capture = &alc269_pcm_analog_capture;
 	}
-	spec->stream_name_digital = "ALC269 Digital";
 	spec->stream_digital_playback = &alc269_pcm_digital_playback;
 	spec->stream_digital_capture = &alc269_pcm_digital_capture;
 
@@ -13927,7 +14241,6 @@
 	struct alc_spec *spec = codec->spec;
 	int i;
 
-	alc_subsystem_id(codec, 0x0e, 0x0f, 0x0b);
 	for (i = 0; i < spec->autocfg.line_outs; i++) {
 		hda_nid_t nid = spec->autocfg.line_out_pins[i];
 		int pin_type = get_pin_type(spec->autocfg.line_out_type);
@@ -14010,6 +14323,8 @@
 	spec->num_adc_nids = ARRAY_SIZE(alc861_adc_nids);
 	set_capture_mixer(spec);
 
+	alc_ssid_check(codec, 0x0e, 0x0f, 0x0b);
+
 	return 1;
 }
 
@@ -14199,8 +14514,8 @@
 						  alc861_cfg_tbl);
 
 	if (board_config < 0) {
-		printk(KERN_INFO "hda_codec: Unknown model for ALC861, "
-		       "trying auto-probe from BIOS...\n");
+		printk(KERN_INFO "hda_codec: Unknown model for %s, "
+		       "trying auto-probe from BIOS...\n", codec->chip_name);
 		board_config = ALC861_AUTO;
 	}
 
@@ -14227,11 +14542,9 @@
 	if (board_config != ALC861_AUTO)
 		setup_preset(spec, &alc861_presets[board_config]);
 
-	spec->stream_name_analog = "ALC861 Analog";
 	spec->stream_analog_playback = &alc861_pcm_analog_playback;
 	spec->stream_analog_capture = &alc861_pcm_analog_capture;
 
-	spec->stream_name_digital = "ALC861 Digital";
 	spec->stream_digital_playback = &alc861_pcm_digital_playback;
 	spec->stream_digital_capture = &alc861_pcm_digital_capture;
 
@@ -14618,19 +14931,6 @@
 	{}
 };
 
-/* toggle speaker-output according to the hp-jack state */
-static void alc861vd_lenovo_hp_automute(struct hda_codec *codec)
-{
-	unsigned int present;
-	unsigned char bits;
-
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-}
-
 static void alc861vd_lenovo_mic_automute(struct hda_codec *codec)
 {
 	unsigned int present;
@@ -14643,9 +14943,13 @@
 				 HDA_AMP_MUTE, bits);
 }
 
-static void alc861vd_lenovo_automute(struct hda_codec *codec)
+static void alc861vd_lenovo_init_hook(struct hda_codec *codec)
 {
-	alc861vd_lenovo_hp_automute(codec);
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_amp(codec);
 	alc861vd_lenovo_mic_automute(codec);
 }
 
@@ -14653,12 +14957,12 @@
 					unsigned int res)
 {
 	switch (res >> 26) {
-	case ALC880_HP_EVENT:
-		alc861vd_lenovo_hp_automute(codec);
-		break;
 	case ALC880_MIC_EVENT:
 		alc861vd_lenovo_mic_automute(codec);
 		break;
+	default:
+		alc_automute_amp_unsol_event(codec, res);
+		break;
 	}
 }
 
@@ -14708,20 +15012,13 @@
 };
 
 /* toggle speaker-output according to the hp-jack state */
-static void alc861vd_dallas_automute(struct hda_codec *codec)
+static void alc861vd_dallas_init_hook(struct hda_codec *codec)
 {
-	unsigned int present;
+	struct alc_spec *spec = codec->spec;
 
-	present = snd_hda_codec_read(codec, 0x15, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-}
-
-static void alc861vd_dallas_unsol_event(struct hda_codec *codec, unsigned int res)
-{
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc861vd_dallas_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_amp(codec);
 }
 
 #ifdef CONFIG_SND_HDA_POWER_SAVE
@@ -14835,7 +15132,7 @@
 		.channel_mode = alc861vd_3stack_2ch_modes,
 		.input_mux = &alc861vd_capture_source,
 		.unsol_event = alc861vd_lenovo_unsol_event,
-		.init_hook = alc861vd_lenovo_automute,
+		.init_hook = alc861vd_lenovo_init_hook,
 	},
 	[ALC861VD_DALLAS] = {
 		.mixers = { alc861vd_dallas_mixer },
@@ -14845,8 +15142,8 @@
 		.num_channel_mode = ARRAY_SIZE(alc861vd_3stack_2ch_modes),
 		.channel_mode = alc861vd_3stack_2ch_modes,
 		.input_mux = &alc861vd_dallas_capture_source,
-		.unsol_event = alc861vd_dallas_unsol_event,
-		.init_hook = alc861vd_dallas_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc861vd_dallas_init_hook,
 	},
 	[ALC861VD_HP] = {
 		.mixers = { alc861vd_hp_mixer },
@@ -14857,8 +15154,8 @@
 		.num_channel_mode = ARRAY_SIZE(alc861vd_3stack_2ch_modes),
 		.channel_mode = alc861vd_3stack_2ch_modes,
 		.input_mux = &alc861vd_hp_capture_source,
-		.unsol_event = alc861vd_dallas_unsol_event,
-		.init_hook = alc861vd_dallas_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc861vd_dallas_init_hook,
 	},
 	[ALC660VD_ASUS_V1S] = {
 		.mixers = { alc861vd_lenovo_mixer },
@@ -14873,7 +15170,7 @@
 		.channel_mode = alc861vd_3stack_2ch_modes,
 		.input_mux = &alc861vd_capture_source,
 		.unsol_event = alc861vd_lenovo_unsol_event,
-		.init_hook = alc861vd_lenovo_automute,
+		.init_hook = alc861vd_lenovo_init_hook,
 	},
 };
 
@@ -14891,7 +15188,6 @@
 	struct alc_spec *spec = codec->spec;
 	int i;
 
-	alc_subsystem_id(codec, 0x15, 0x1b, 0x14);
 	for (i = 0; i <= HDA_SIDE; i++) {
 		hda_nid_t nid = spec->autocfg.line_out_pins[i];
 		int pin_type = get_pin_type(spec->autocfg.line_out_type);
@@ -15109,6 +15405,8 @@
 	if (err < 0)
 		return err;
 
+	alc_ssid_check(codec, 0x15, 0x1b, 0x14);
+
 	return 1;
 }
 
@@ -15140,8 +15438,8 @@
 						  alc861vd_cfg_tbl);
 
 	if (board_config < 0 || board_config >= ALC861VD_MODEL_LAST) {
-		printk(KERN_INFO "hda_codec: Unknown model for ALC660VD/"
-			"ALC861VD, trying auto-probe from BIOS...\n");
+		printk(KERN_INFO "hda_codec: Unknown model for %s, "
+		       "trying auto-probe from BIOS...\n", codec->chip_name);
 		board_config = ALC861VD_AUTO;
 	}
 
@@ -15169,13 +15467,8 @@
 		setup_preset(spec, &alc861vd_presets[board_config]);
 
 	if (codec->vendor_id == 0x10ec0660) {
-		spec->stream_name_analog = "ALC660-VD Analog";
-		spec->stream_name_digital = "ALC660-VD Digital";
 		/* always turn on EAPD */
 		add_verb(spec, alc660vd_eapd_verbs);
-	} else {
-		spec->stream_name_analog = "ALC861VD Analog";
-		spec->stream_name_digital = "ALC861VD Digital";
 	}
 
 	spec->stream_analog_playback = &alc861vd_pcm_analog_playback;
@@ -15289,6 +15582,38 @@
 	},
 };
 
+#if 1 /* set to 0 for testing other input sources below */
+static struct hda_input_mux alc272_nc10_capture_source = {
+	.num_items = 2,
+	.items = {
+		{ "Autoselect Mic", 0x0 },
+		{ "Internal Mic", 0x1 },
+	},
+};
+#else
+static struct hda_input_mux alc272_nc10_capture_source = {
+	.num_items = 16,
+	.items = {
+		{ "Autoselect Mic", 0x0 },
+		{ "Internal Mic", 0x1 },
+		{ "In-0x02", 0x2 },
+		{ "In-0x03", 0x3 },
+		{ "In-0x04", 0x4 },
+		{ "In-0x05", 0x5 },
+		{ "In-0x06", 0x6 },
+		{ "In-0x07", 0x7 },
+		{ "In-0x08", 0x8 },
+		{ "In-0x09", 0x9 },
+		{ "In-0x0a", 0x0a },
+		{ "In-0x0b", 0x0b },
+		{ "In-0x0c", 0x0c },
+		{ "In-0x0d", 0x0d },
+		{ "In-0x0e", 0x0e },
+		{ "In-0x0f", 0x0f },
+	},
+};
+#endif
+
 /*
  * 2ch mode
  */
@@ -15428,10 +15753,8 @@
 };
 
 static struct snd_kcontrol_new alc662_eeepc_p701_mixer[] = {
-	HDA_CODEC_MUTE("Speaker Playback Switch", 0x14, 0x0, HDA_OUTPUT),
-
-	HDA_CODEC_VOLUME("Line-Out Playback Volume", 0x02, 0x0, HDA_OUTPUT),
-	HDA_CODEC_MUTE("Line-Out Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("Master Playback Volume", 0x02, 0x0, HDA_OUTPUT),
+	ALC262_HIPPO_MASTER_SWITCH,
 
 	HDA_CODEC_VOLUME("e-Mic Boost", 0x18, 0, HDA_INPUT),
 	HDA_CODEC_VOLUME("e-Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
@@ -15444,15 +15767,11 @@
 };
 
 static struct snd_kcontrol_new alc662_eeepc_ep20_mixer[] = {
-	HDA_CODEC_VOLUME("Line-Out Playback Volume", 0x02, 0x0, HDA_OUTPUT),
-	HDA_CODEC_MUTE("Line-Out Playback Switch", 0x14, 0x0, HDA_OUTPUT),
+	ALC262_HIPPO_MASTER_SWITCH,
+	HDA_CODEC_VOLUME("Front Playback Volume", 0x02, 0x0, HDA_OUTPUT),
 	HDA_CODEC_VOLUME("Surround Playback Volume", 0x03, 0x0, HDA_OUTPUT),
-	HDA_BIND_MUTE("Surround Playback Switch", 0x03, 2, HDA_INPUT),
 	HDA_CODEC_VOLUME_MONO("Center Playback Volume", 0x04, 1, 0x0, HDA_OUTPUT),
 	HDA_CODEC_VOLUME_MONO("LFE Playback Volume", 0x04, 2, 0x0, HDA_OUTPUT),
-	HDA_BIND_MUTE_MONO("Center Playback Switch", 0x04, 1, 2, HDA_INPUT),
-	HDA_BIND_MUTE_MONO("LFE Playback Switch", 0x04, 2, 2, HDA_INPUT),
-	HDA_CODEC_MUTE("Speaker Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
 	HDA_BIND_MUTE("MuteCtrl Playback Switch", 0x0c, 2, HDA_INPUT),
 	HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
 	HDA_CODEC_MUTE("Line Playback Switch", 0x0b, 0x02, HDA_INPUT),
@@ -15960,51 +16279,25 @@
 static void alc662_eeepc_unsol_event(struct hda_codec *codec,
 				     unsigned int res)
 {
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc262_hippo1_automute( codec );
-
 	if ((res >> 26) == ALC880_MIC_EVENT)
 		alc662_eeepc_mic_automute(codec);
+	else
+		alc262_hippo_unsol_event(codec, res);
 }
 
 static void alc662_eeepc_inithook(struct hda_codec *codec)
 {
-	alc262_hippo1_automute( codec );
+	alc262_hippo1_init_hook(codec);
 	alc662_eeepc_mic_automute(codec);
 }
 
-static void alc662_eeepc_ep20_automute(struct hda_codec *codec)
-{
-	unsigned int mute;
-	unsigned int present;
-
-	snd_hda_codec_read(codec, 0x14, 0, AC_VERB_SET_PIN_SENSE, 0);
-	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0);
-	present = (present & 0x80000000) != 0;
-	if (present) {
-		/* mute internal speaker */
-		snd_hda_codec_amp_stereo(codec, 0x1b, HDA_OUTPUT, 0,
-					HDA_AMP_MUTE, HDA_AMP_MUTE);
-	} else {
-		/* unmute internal speaker if necessary */
-		mute = snd_hda_codec_amp_read(codec, 0x14, 0, HDA_OUTPUT, 0);
-		snd_hda_codec_amp_stereo(codec, 0x1b, HDA_OUTPUT, 0,
-					HDA_AMP_MUTE, mute);
-	}
-}
-
-/* unsolicited event for HP jack sensing */
-static void alc662_eeepc_ep20_unsol_event(struct hda_codec *codec,
-					  unsigned int res)
-{
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc662_eeepc_ep20_automute(codec);
-}
-
 static void alc662_eeepc_ep20_inithook(struct hda_codec *codec)
 {
-	alc662_eeepc_ep20_automute(codec);
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x1b;
+	alc262_hippo_master_update(codec);
 }
 
 static void alc663_m51va_speaker_automute(struct hda_codec *codec)
@@ -16338,35 +16631,9 @@
 	alc662_eeepc_mic_automute(codec);
 }
 
-/* bind hp and internal speaker mute (with plug check) */
-static int alc662_ecs_master_sw_put(struct snd_kcontrol *kcontrol,
-				     struct snd_ctl_elem_value *ucontrol)
-{
-	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-	long *valp = ucontrol->value.integer.value;
-	int change;
-
-	change = snd_hda_codec_amp_update(codec, 0x1b, 0, HDA_OUTPUT, 0,
-					  HDA_AMP_MUTE,
-					  valp[0] ? 0 : HDA_AMP_MUTE);
-	change |= snd_hda_codec_amp_update(codec, 0x1b, 1, HDA_OUTPUT, 0,
-					   HDA_AMP_MUTE,
-					   valp[1] ? 0 : HDA_AMP_MUTE);
-	if (change)
-		alc262_hippo1_automute(codec);
-	return change;
-}
-
 static struct snd_kcontrol_new alc662_ecs_mixer[] = {
 	HDA_CODEC_VOLUME("Master Playback Volume", 0x02, 0x0, HDA_OUTPUT),
-	{
-		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-		.name = "Master Playback Switch",
-		.info = snd_hda_mixer_amp_switch_info,
-		.get = snd_hda_mixer_amp_switch_get,
-		.put = alc662_ecs_master_sw_put,
-		.private_value = HDA_COMPOSE_AMP_VAL(0x1b, 3, 0, HDA_OUTPUT),
-	},
+	ALC262_HIPPO_MASTER_SWITCH,
 
 	HDA_CODEC_VOLUME("e-Mic/LineIn Boost", 0x18, 0, HDA_INPUT),
 	HDA_CODEC_VOLUME("e-Mic/LineIn Playback Volume", 0x0b, 0x0, HDA_INPUT),
@@ -16378,6 +16645,23 @@
 	{ } /* end */
 };
 
+static struct snd_kcontrol_new alc272_nc10_mixer[] = {
+	/* Master Playback automatically created from Speaker and Headphone */
+	HDA_CODEC_VOLUME("Speaker Playback Volume", 0x02, 0x0, HDA_OUTPUT),
+	HDA_CODEC_MUTE("Speaker Playback Switch", 0x14, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("Headphone Playback Volume", 0x03, 0x0, HDA_OUTPUT),
+	HDA_CODEC_MUTE("Headphone Playback Switch", 0x21, 0x0, HDA_OUTPUT),
+
+	HDA_CODEC_VOLUME("Ext Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
+	HDA_CODEC_MUTE("Ext Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Ext Mic Boost", 0x18, 0, HDA_INPUT),
+
+	HDA_CODEC_VOLUME("Int Mic Playback Volume", 0x0b, 0x1, HDA_INPUT),
+	HDA_CODEC_MUTE("Int Mic Playback Switch", 0x0b, 0x1, HDA_INPUT),
+	HDA_CODEC_VOLUME("Int Mic Boost", 0x19, 0, HDA_INPUT),
+	{ } /* end */
+};
+
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 #define alc662_loopbacks	alc880_loopbacks
 #endif
@@ -16411,6 +16695,9 @@
 	[ALC663_ASUS_MODE4] = "asus-mode4",
 	[ALC663_ASUS_MODE5] = "asus-mode5",
 	[ALC663_ASUS_MODE6] = "asus-mode6",
+	[ALC272_DELL]		= "dell",
+	[ALC272_DELL_ZM1]	= "dell-zm1",
+	[ALC272_SAMSUNG_NC10]	= "samsung-nc10",
 	[ALC662_AUTO]		= "auto",
 };
 
@@ -16468,6 +16755,7 @@
 	SND_PCI_QUIRK(0x105b, 0x0cd6, "Foxconn", ALC662_ECS),
 	SND_PCI_QUIRK(0x105b, 0x0d47, "Foxconn 45CMX/45GMX/45CMX-K",
 		      ALC662_3ST_6ch_DIG),
+	SND_PCI_QUIRK(0x144d, 0xca00, "Samsung NC10", ALC272_SAMSUNG_NC10),
 	SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte 945GCM-S2L",
 		      ALC662_3ST_6ch_DIG),
 	SND_PCI_QUIRK(0x1565, 0x820f, "Biostar TA780G M2+", ALC662_3ST_6ch_DIG),
@@ -16558,7 +16846,7 @@
 		.num_channel_mode = ARRAY_SIZE(alc662_3ST_6ch_modes),
 		.channel_mode = alc662_3ST_6ch_modes,
 		.input_mux = &alc662_lenovo_101e_capture_source,
-		.unsol_event = alc662_eeepc_ep20_unsol_event,
+		.unsol_event = alc662_eeepc_unsol_event,
 		.init_hook = alc662_eeepc_ep20_inithook,
 	},
 	[ALC662_ECS] = {
@@ -16739,6 +17027,18 @@
 		.unsol_event = alc663_m51va_unsol_event,
 		.init_hook = alc663_m51va_inithook,
 	},
+	[ALC272_SAMSUNG_NC10] = {
+		.mixers = { alc272_nc10_mixer },
+		.init_verbs = { alc662_init_verbs,
+				alc663_21jd_amic_init_verbs },
+		.num_dacs = ARRAY_SIZE(alc272_dac_nids),
+		.dac_nids = alc272_dac_nids,
+		.num_channel_mode = ARRAY_SIZE(alc662_3ST_2ch_modes),
+		.channel_mode = alc662_3ST_2ch_modes,
+		.input_mux = &alc272_nc10_capture_source,
+		.unsol_event = alc663_mode4_unsol_event,
+		.init_hook = alc663_mode4_inithook,
+	},
 };
 
 
@@ -16933,7 +17233,6 @@
 	struct alc_spec *spec = codec->spec;
 	int i;
 
-	alc_subsystem_id(codec, 0x15, 0x1b, 0x14);
 	for (i = 0; i <= HDA_SIDE; i++) {
 		hda_nid_t nid = spec->autocfg.line_out_pins[i];
 		int pin_type = get_pin_type(spec->autocfg.line_out_type);
@@ -17030,6 +17329,8 @@
 	if (err < 0)
 		return err;
 
+	alc_ssid_check(codec, 0x15, 0x1b, 0x14);
+
 	return 1;
 }
 
@@ -17062,8 +17363,8 @@
 						  alc662_models,
 			  	                  alc662_cfg_tbl);
 	if (board_config < 0) {
-		printk(KERN_INFO "hda_codec: Unknown model for ALC662, "
-		       "trying auto-probe from BIOS...\n");
+		printk(KERN_INFO "hda_codec: Unknown model for %s, "
+		       "trying auto-probe from BIOS...\n", codec->chip_name);
 		board_config = ALC662_AUTO;
 	}
 
@@ -17090,17 +17391,6 @@
 	if (board_config != ALC662_AUTO)
 		setup_preset(spec, &alc662_presets[board_config]);
 
-	if (codec->vendor_id == 0x10ec0663) {
-		spec->stream_name_analog = "ALC663 Analog";
-		spec->stream_name_digital = "ALC663 Digital";
-	} else if (codec->vendor_id == 0x10ec0272) {
-		spec->stream_name_analog = "ALC272 Analog";
-		spec->stream_name_digital = "ALC272 Digital";
-	} else {
-		spec->stream_name_analog = "ALC662 Analog";
-		spec->stream_name_digital = "ALC662 Digital";
-	}
-
 	spec->stream_analog_playback = &alc662_pcm_analog_playback;
 	spec->stream_analog_capture = &alc662_pcm_analog_capture;
 
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index d2fd8ef..93e47c9 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -100,6 +100,7 @@
 	STAC_HP_M4,
 	STAC_HP_DV5,
 	STAC_HP_HDX,
+	STAC_HP_DV4_1222NR,
 	STAC_92HD71BXX_MODELS
 };
 
@@ -193,6 +194,7 @@
 	unsigned int gpio_dir;
 	unsigned int gpio_data;
 	unsigned int gpio_mute;
+	unsigned int gpio_led;
 
 	/* stream */
 	unsigned int stream_delay;
@@ -634,6 +636,40 @@
 	return 0;
 }
 
+static unsigned int stac92xx_vref_set(struct hda_codec *codec,
+					hda_nid_t nid, unsigned int new_vref)
+{
+	unsigned int error;
+	unsigned int pincfg;
+	pincfg = snd_hda_codec_read(codec, nid, 0,
+				AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+
+	pincfg &= 0xff;
+	pincfg &= ~(AC_PINCTL_VREFEN | AC_PINCTL_IN_EN | AC_PINCTL_OUT_EN);
+	pincfg |= new_vref;
+
+	if (new_vref == AC_PINCTL_VREF_HIZ)
+		pincfg |= AC_PINCTL_OUT_EN;
+	else
+		pincfg |= AC_PINCTL_IN_EN;
+
+	error = snd_hda_codec_write_cache(codec, nid, 0,
+					AC_VERB_SET_PIN_WIDGET_CONTROL, pincfg);
+	if (error < 0)
+		return error;
+	else
+		return 1;
+}
+
+static unsigned int stac92xx_vref_get(struct hda_codec *codec, hda_nid_t nid)
+{
+	unsigned int vref;
+	vref = snd_hda_codec_read(codec, nid, 0,
+				AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+	vref &= AC_PINCTL_VREFEN;
+	return vref;
+}
+
 static int stac92xx_mux_enum_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
 	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
@@ -995,6 +1031,17 @@
 		.private_value = verb_read | (verb_write << 16), \
 	}
 
+#define DC_BIAS(xname, idx, nid) \
+	{ \
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+		.name = xname, \
+		.index = idx, \
+		.info = stac92xx_dc_bias_info, \
+		.get = stac92xx_dc_bias_get, \
+		.put = stac92xx_dc_bias_put, \
+		.private_value = nid, \
+	}
+
 static struct snd_kcontrol_new stac9200_mixer[] = {
 	HDA_CODEC_VOLUME("Master Playback Volume", 0xb, 0, HDA_OUTPUT),
 	HDA_CODEC_MUTE("Master Playback Switch", 0xb, 0, HDA_OUTPUT),
@@ -1543,6 +1590,8 @@
 	/* SigmaTel reference board */
 	SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0x2668,
 		      "DFI LanParty", STAC_REF),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0xfb30,
+		      "SigmaTel",STAC_9205_REF),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_DFI, 0x3101,
 		      "DFI LanParty", STAC_REF),
 	/* Dell laptops have BIOS problem */
@@ -1837,6 +1886,7 @@
 	[STAC_HP_M4]		= NULL,
 	[STAC_HP_DV5]		= NULL,
 	[STAC_HP_HDX]           = NULL,
+	[STAC_HP_DV4_1222NR]	= NULL,
 };
 
 static const char *stac92hd71bxx_models[STAC_92HD71BXX_MODELS] = {
@@ -1848,6 +1898,7 @@
 	[STAC_HP_M4] = "hp-m4",
 	[STAC_HP_DV5] = "hp-dv5",
 	[STAC_HP_HDX] = "hp-hdx",
+	[STAC_HP_DV4_1222NR] = "hp-dv4-1222nr",
 };
 
 static struct snd_pci_quirk stac92hd71bxx_cfg_tbl[] = {
@@ -1856,6 +1907,8 @@
 		      "DFI LanParty", STAC_92HD71BXX_REF),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_DFI, 0x3101,
 		      "DFI LanParty", STAC_92HD71BXX_REF),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x30fb,
+		      "HP dv4-1222nr", STAC_HP_DV4_1222NR),
 	SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xfff0, 0x3080,
 		      "HP", STAC_HP_DV5),
 	SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xfff0, 0x30f0,
@@ -2545,7 +2598,8 @@
 	return 0;
 }
 
-static unsigned int stac92xx_get_vref(struct hda_codec *codec, hda_nid_t nid)
+static unsigned int stac92xx_get_default_vref(struct hda_codec *codec,
+					hda_nid_t nid)
 {
 	unsigned int pincap = snd_hda_query_pin_caps(codec, nid);
 	pincap = (pincap & AC_PINCAP_VREF) >> AC_PINCAP_VREF_SHIFT;
@@ -2599,15 +2653,108 @@
 	return 1;
 }
 
-#define stac92xx_io_switch_info		snd_ctl_boolean_mono_info
+static int stac92xx_dc_bias_info(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_info *uinfo)
+{
+	int i;
+	static char *texts[] = {
+		"Mic In", "Line In", "Line Out"
+	};
+
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct sigmatel_spec *spec = codec->spec;
+	hda_nid_t nid = kcontrol->private_value;
+
+	if (nid == spec->mic_switch || nid == spec->line_switch)
+		i = 3;
+	else
+		i = 2;
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->value.enumerated.items = i;
+	uinfo->count = 1;
+	if (uinfo->value.enumerated.item >= i)
+		uinfo->value.enumerated.item = i-1;
+	strcpy(uinfo->value.enumerated.name,
+		texts[uinfo->value.enumerated.item]);
+
+	return 0;
+}
+
+static int stac92xx_dc_bias_get(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	hda_nid_t nid = kcontrol->private_value;
+	unsigned int vref = stac92xx_vref_get(codec, nid);
+
+	if (vref == stac92xx_get_default_vref(codec, nid))
+		ucontrol->value.enumerated.item[0] = 0;
+	else if (vref == AC_PINCTL_VREF_GRD)
+		ucontrol->value.enumerated.item[0] = 1;
+	else if (vref == AC_PINCTL_VREF_HIZ)
+		ucontrol->value.enumerated.item[0] = 2;
+
+	return 0;
+}
+
+static int stac92xx_dc_bias_put(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	unsigned int new_vref = 0;
+	unsigned int error;
+	hda_nid_t nid = kcontrol->private_value;
+
+	if (ucontrol->value.enumerated.item[0] == 0)
+		new_vref = stac92xx_get_default_vref(codec, nid);
+	else if (ucontrol->value.enumerated.item[0] == 1)
+		new_vref = AC_PINCTL_VREF_GRD;
+	else if (ucontrol->value.enumerated.item[0] == 2)
+		new_vref = AC_PINCTL_VREF_HIZ;
+	else
+		return 0;
+
+	if (new_vref != stac92xx_vref_get(codec, nid)) {
+		error = stac92xx_vref_set(codec, nid, new_vref);
+		return error;
+	}
+
+	return 0;
+}
+
+static int stac92xx_io_switch_info(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_info *uinfo)
+{
+	static char *texts[2];
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct sigmatel_spec *spec = codec->spec;
+
+	if (kcontrol->private_value == spec->line_switch)
+		texts[0] = "Line In";
+	else
+		texts[0] = "Mic In";
+	texts[1] = "Line Out";
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->value.enumerated.items = 2;
+	uinfo->count = 1;
+
+	if (uinfo->value.enumerated.item >= 2)
+		uinfo->value.enumerated.item = 1;
+	strcpy(uinfo->value.enumerated.name,
+		texts[uinfo->value.enumerated.item]);
+
+	return 0;
+}
 
 static int stac92xx_io_switch_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
 {
 	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
 	struct sigmatel_spec *spec = codec->spec;
-	int io_idx = kcontrol-> private_value & 0xff;
+	hda_nid_t nid = kcontrol->private_value;
+	int io_idx = (nid == spec->mic_switch) ? 1 : 0;
 
-	ucontrol->value.integer.value[0] = spec->io_switch[io_idx];
+	ucontrol->value.enumerated.item[0] = spec->io_switch[io_idx];
 	return 0;
 }
 
@@ -2615,9 +2762,9 @@
 {
         struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
 	struct sigmatel_spec *spec = codec->spec;
-        hda_nid_t nid = kcontrol->private_value >> 8;
-	int io_idx = kcontrol-> private_value & 0xff;
-	unsigned short val = !!ucontrol->value.integer.value[0];
+	hda_nid_t nid = kcontrol->private_value;
+	int io_idx = (nid == spec->mic_switch) ? 1 : 0;
+	unsigned short val = !!ucontrol->value.enumerated.item[0];
 
 	spec->io_switch[io_idx] = val;
 
@@ -2626,7 +2773,7 @@
 	else {
 		unsigned int pinctl = AC_PINCTL_IN_EN;
 		if (io_idx) /* set VREF for mic */
-			pinctl |= stac92xx_get_vref(codec, nid);
+			pinctl |= stac92xx_get_default_vref(codec, nid);
 		stac92xx_auto_set_pinctl(codec, nid, pinctl);
 	}
 
@@ -2707,7 +2854,8 @@
 	STAC_CTL_WIDGET_AMP_VOL,
 	STAC_CTL_WIDGET_HP_SWITCH,
 	STAC_CTL_WIDGET_IO_SWITCH,
-	STAC_CTL_WIDGET_CLFE_SWITCH
+	STAC_CTL_WIDGET_CLFE_SWITCH,
+	STAC_CTL_WIDGET_DC_BIAS
 };
 
 static struct snd_kcontrol_new stac92xx_control_templates[] = {
@@ -2719,6 +2867,7 @@
 	STAC_CODEC_HP_SWITCH(NULL),
 	STAC_CODEC_IO_SWITCH(NULL, 0),
 	STAC_CODEC_CLFE_SWITCH(NULL, 0),
+	DC_BIAS(NULL, 0, 0),
 };
 
 /* add dynamic controls */
@@ -2782,6 +2931,34 @@
 	.put = stac92xx_mux_enum_put,
 };
 
+static inline int stac92xx_add_jack_mode_control(struct hda_codec *codec,
+						hda_nid_t nid, int idx)
+{
+	int def_conf = snd_hda_codec_get_pincfg(codec, nid);
+	int control = 0;
+	struct sigmatel_spec *spec = codec->spec;
+	char name[22];
+
+	if (!((get_defcfg_connect(def_conf)) & AC_JACK_PORT_FIXED)) {
+		if (stac92xx_get_default_vref(codec, nid) == AC_PINCTL_VREF_GRD
+			&& nid == spec->line_switch)
+			control = STAC_CTL_WIDGET_IO_SWITCH;
+		else if (snd_hda_query_pin_caps(codec, nid)
+			& (AC_PINCAP_VREF_GRD << AC_PINCAP_VREF_SHIFT))
+			control = STAC_CTL_WIDGET_DC_BIAS;
+		else if (nid == spec->mic_switch)
+			control = STAC_CTL_WIDGET_IO_SWITCH;
+	}
+
+	if (control) {
+		strcpy(name, auto_pin_cfg_labels[idx]);
+		return stac92xx_add_control(codec->spec, control,
+					strcat(name, " Jack Mode"), nid);
+	}
+
+	return 0;
+}
+
 static int stac92xx_add_input_source(struct sigmatel_spec *spec)
 {
 	struct snd_kcontrol_new *knew;
@@ -3144,7 +3321,9 @@
 					       const struct auto_pin_cfg *cfg)
 {
 	struct sigmatel_spec *spec = codec->spec;
+	hda_nid_t nid;
 	int err;
+	int idx;
 
 	err = create_multi_out_ctls(codec, cfg->line_outs, cfg->line_out_pins,
 				    spec->multiout.dac_nids,
@@ -3161,20 +3340,13 @@
 			return err;
 	}
 
-	if (spec->line_switch) {
-		err = stac92xx_add_control(spec, STAC_CTL_WIDGET_IO_SWITCH,
-					   "Line In as Output Switch",
-					   spec->line_switch << 8);
-		if (err < 0)
-			return err;
-	}
-
-	if (spec->mic_switch) {
-		err = stac92xx_add_control(spec, STAC_CTL_WIDGET_IO_SWITCH,
-					   "Mic as Output Switch",
-					   (spec->mic_switch << 8) | 1);
-		if (err < 0)
-			return err;
+	for (idx = AUTO_PIN_MIC; idx <= AUTO_PIN_FRONT_LINE; idx++) {
+		nid = cfg->input_pins[idx];
+		if (nid) {
+			err = stac92xx_add_jack_mode_control(codec, nid, idx);
+			if (err < 0)
+				return err;
+		}
 	}
 
 	return 0;
@@ -3639,6 +3811,8 @@
 		err = snd_hda_attach_beep_device(codec, nid);
 		if (err < 0)
 			return err;
+		/* IDT/STAC codecs have linear beep tone parameter */
+		codec->beep->linear_tone = 1;
 		/* if no beep switch is available, make its own one */
 		caps = query_amp_caps(codec, nid, HDA_OUTPUT);
 		if (codec->beep &&
@@ -4082,7 +4256,7 @@
 			unsigned int pinctl, conf;
 			if (i == AUTO_PIN_MIC || i == AUTO_PIN_FRONT_MIC) {
 				/* for mic pins, force to initialize */
-				pinctl = stac92xx_get_vref(codec, nid);
+				pinctl = stac92xx_get_default_vref(codec, nid);
 				pinctl |= AC_PINCTL_IN_EN;
 				stac92xx_auto_set_pinctl(codec, nid, pinctl);
 			} else {
@@ -4535,17 +4709,19 @@
 	return 0;
 }
 
-
 /*
- * using power check for controlling mute led of HP HDX notebooks
+ * using power check for controlling mute led of HP notebooks
  * check for mute state only on Speakers (nid = 0x10)
  *
  * For this feature CONFIG_SND_HDA_POWER_SAVE is needed, otherwise
  * the LED is NOT working properly !
+ *
+ * Changed name to reflect that it now works for any designated
+ * model, not just HP HDX.
  */
 
 #ifdef CONFIG_SND_HDA_POWER_SAVE
-static int stac92xx_hp_hdx_check_power_status(struct hda_codec *codec,
+static int stac92xx_hp_check_power_status(struct hda_codec *codec,
 					      hda_nid_t nid)
 {
 	struct sigmatel_spec *spec = codec->spec;
@@ -4553,9 +4729,9 @@
 	if (nid == 0x10) {
 		if (snd_hda_codec_amp_read(codec, nid, 0, HDA_OUTPUT, 0) &
 		    HDA_AMP_MUTE)
-			spec->gpio_data &= ~0x08;  /* orange */
+			spec->gpio_data &= ~spec->gpio_led; /* orange */
 		else
-			spec->gpio_data |= 0x08;   /* white */
+			spec->gpio_data |= spec->gpio_led; /* white */
 
 		stac_gpio_set(codec, spec->gpio_mask,
 			      spec->gpio_dir,
@@ -5201,6 +5377,15 @@
 	if (get_wcaps(codec, 0xa) & AC_WCAP_IN_AMP)
 		snd_hda_sequence_write_cache(codec, unmute_init);
 
+	/* Some HP machines seem to have unstable codec communications
+	 * especially with ATI fglrx driver.  For recovering from the
+	 * CORB/RIRB stall, allow the BUS reset and keep always sync
+	 */
+	if (spec->board_config == STAC_HP_DV5) {
+		codec->bus->sync_write = 1;
+		codec->bus->allow_bus_reset = 1;
+	}
+
 	spec->aloopback_ctl = stac92hd71bxx_loopback;
 	spec->aloopback_mask = 0x50;
 	spec->aloopback_shift = 0;
@@ -5234,6 +5419,15 @@
 		spec->num_smuxes = 0;
 		spec->num_dmuxes = 1;
 		break;
+	case STAC_HP_DV4_1222NR:
+		spec->num_dmics = 1;
+		/* I don't know if it needs 1 or 2 smuxes - will wait for
+		 * bug reports to fix if needed
+		 */
+		spec->num_smuxes = 1;
+		spec->num_dmuxes = 1;
+		spec->gpio_led = 0x01;
+		/* fallthrough */
 	case STAC_HP_DV5:
 		snd_hda_codec_set_pincfg(codec, 0x0d, 0x90170010);
 		stac92xx_auto_set_pinctl(codec, 0x0d, AC_PINCTL_OUT_EN);
@@ -5242,22 +5436,21 @@
 		spec->num_dmics = 1;
 		spec->num_dmuxes = 1;
 		spec->num_smuxes = 1;
-		/*
-		 * For controlling MUTE LED on HP HDX16/HDX18 notebooks,
-		 * the CONFIG_SND_HDA_POWER_SAVE is needed to be set.
-		 */
-#ifdef CONFIG_SND_HDA_POWER_SAVE
 		/* orange/white mute led on GPIO3, orange=0, white=1 */
-		spec->gpio_mask |= 0x08;
-		spec->gpio_dir  |= 0x08;
-		spec->gpio_data |= 0x08;  /* set to white */
+		spec->gpio_led = 0x08;
+		break;
+	}
 
+#ifdef CONFIG_SND_HDA_POWER_SAVE
+	if (spec->gpio_led) {
+		spec->gpio_mask |= spec->gpio_led;
+		spec->gpio_dir |= spec->gpio_led;
+		spec->gpio_data |= spec->gpio_led;
 		/* register check_power_status callback. */
 		codec->patch_ops.check_power_status =
-		    stac92xx_hp_hdx_check_power_status;
+			stac92xx_hp_check_power_status;
+	}
 #endif	
-		break;
-	};
 
 	spec->multiout.dac_nids = spec->dac_nids;
 	if (spec->dinput_mux)
@@ -5282,7 +5475,7 @@
 	codec->proc_widget_hook = stac92hd7x_proc_hook;
 
 	return 0;
-};
+}
 
 static int patch_stac922x(struct hda_codec *codec)
 {
@@ -5437,7 +5630,7 @@
 			/* correct the device field to SPDIF out */
 			snd_hda_codec_set_pincfg(codec, 0x21, 0x01442070);
 			break;
-		};
+		}
 		/* configure the analog microphone on some laptops */
 		snd_hda_codec_set_pincfg(codec, 0x0c, 0x90a79130);
 		/* correct the front output jack as a hp out */
@@ -5747,6 +5940,7 @@
  	{ .id = 0x83847661, .name = "CXD9872RD/K", .patch = patch_stac9872 },
  	{ .id = 0x83847662, .name = "STAC9872AK", .patch = patch_stac9872 },
  	{ .id = 0x83847664, .name = "CXD9872AKD", .patch = patch_stac9872 },
+	{ .id = 0x83847698, .name = "STAC9205", .patch = patch_stac9205 },
  	{ .id = 0x838476a0, .name = "STAC9205", .patch = patch_stac9205 },
  	{ .id = 0x838476a1, .name = "STAC9205D", .patch = patch_stac9205 },
  	{ .id = 0x838476a2, .name = "STAC9204", .patch = patch_stac9205 },
diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index b25a5cc..8e004fb 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -205,7 +205,7 @@
 
 	/* playback */
 	struct hda_multi_out multiout;
-	hda_nid_t extra_dig_out_nid;
+	hda_nid_t slave_dig_outs[2];
 
 	/* capture */
 	unsigned int num_adc_nids;
@@ -731,21 +731,6 @@
 	return snd_hda_multi_out_dig_close(codec, &spec->multiout);
 }
 
-/* setup SPDIF output stream */
-static void setup_dig_playback_stream(struct hda_codec *codec, hda_nid_t nid,
-				 unsigned int stream_tag, unsigned int format)
-{
-	/* turn off SPDIF once; otherwise the IEC958 bits won't be updated */
-	if (codec->spdif_ctls & AC_DIG1_ENABLE)
-		snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_DIGI_CONVERT_1,
-				    codec->spdif_ctls & ~AC_DIG1_ENABLE & 0xff);
-	snd_hda_codec_setup_stream(codec, nid, stream_tag, 0, format);
-	/* turn on again (if needed) */
-	if (codec->spdif_ctls & AC_DIG1_ENABLE)
-		snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_DIGI_CONVERT_1,
-				    codec->spdif_ctls & 0xff);
-}
-
 static int via_dig_playback_pcm_prepare(struct hda_pcm_stream *hinfo,
 					struct hda_codec *codec,
 					unsigned int stream_tag,
@@ -753,19 +738,16 @@
 					struct snd_pcm_substream *substream)
 {
 	struct via_spec *spec = codec->spec;
-	hda_nid_t nid;
+	return snd_hda_multi_out_dig_prepare(codec, &spec->multiout,
+					     stream_tag, format, substream);
+}
 
-	/* 1st or 2nd S/PDIF */
-	if (substream->number == 0)
-		nid = spec->multiout.dig_out_nid;
-	else if (substream->number == 1)
-		nid = spec->extra_dig_out_nid;
-	else
-		return -1;
-
-	mutex_lock(&codec->spdif_mutex);
-	setup_dig_playback_stream(codec, nid, stream_tag, format);
-	mutex_unlock(&codec->spdif_mutex);
+static int via_dig_playback_pcm_cleanup(struct hda_pcm_stream *hinfo,
+					struct hda_codec *codec,
+					struct snd_pcm_substream *substream)
+{
+	struct via_spec *spec = codec->spec;
+	snd_hda_multi_out_dig_cleanup(codec, &spec->multiout);
 	return 0;
 }
 
@@ -842,7 +824,8 @@
 	.ops = {
 		.open = via_dig_playback_pcm_open,
 		.close = via_dig_playback_pcm_close,
-		.prepare = via_dig_playback_pcm_prepare
+		.prepare = via_dig_playback_pcm_prepare,
+		.cleanup = via_dig_playback_pcm_cleanup
 	},
 };
 
@@ -874,13 +857,6 @@
 		if (err < 0)
 			return err;
 		spec->multiout.share_spdif = 1;
-
-		if (spec->extra_dig_out_nid) {
-			err = snd_hda_create_spdif_out_ctls(codec,
-						    spec->extra_dig_out_nid);
-			if (err < 0)
-				return err;
-		}
 	}
 	if (spec->dig_in_nid) {
 		err = snd_hda_create_spdif_in_ctls(codec, spec->dig_in_nid);
@@ -1013,10 +989,6 @@
 		via_gpio_control(codec);
 }
 
-static hda_nid_t slave_dig_outs[] = {
-	0,
-};
-
 static int via_init(struct hda_codec *codec)
 {
 	struct via_spec *spec = codec->spec;
@@ -1051,8 +1023,9 @@
 		snd_hda_codec_write(codec, spec->autocfg.dig_in_pin, 0,
 				    AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN);
 
-	/* no slave outs */
-	codec->slave_dig_outs = slave_dig_outs;
+	/* assign slave outs */
+	if (spec->slave_dig_outs[0])
+		codec->slave_dig_outs = spec->slave_dig_outs;
 
  	return 0;
 }
@@ -2134,7 +2107,8 @@
 	.ops = {
 		.open = via_dig_playback_pcm_open,
 		.close = via_dig_playback_pcm_close,
-		.prepare = via_dig_playback_pcm_prepare
+		.prepare = via_dig_playback_pcm_prepare,
+		.cleanup = via_dig_playback_pcm_cleanup
 	},
 };
 
@@ -2589,14 +2563,15 @@
 };
 
 static struct hda_pcm_stream vt1708S_pcm_digital_playback = {
-	.substreams = 2,
+	.substreams = 1,
 	.channels_min = 2,
 	.channels_max = 2,
 	/* NID is set in via_build_pcms */
 	.ops = {
 		.open = via_dig_playback_pcm_open,
 		.close = via_dig_playback_pcm_close,
-		.prepare = via_dig_playback_pcm_prepare
+		.prepare = via_dig_playback_pcm_prepare,
+		.cleanup = via_dig_playback_pcm_cleanup
 	},
 };
 
@@ -2805,14 +2780,37 @@
 	return 0;
 }
 
+/* fill out digital output widgets; one for master and one for slave outputs */
+static void fill_dig_outs(struct hda_codec *codec)
+{
+	struct via_spec *spec = codec->spec;
+	int i;
+
+	for (i = 0; i < spec->autocfg.dig_outs; i++) {
+		hda_nid_t nid;
+		int conn;
+
+		nid = spec->autocfg.dig_out_pins[i];
+		if (!nid)
+			continue;
+		conn = snd_hda_get_connections(codec, nid, &nid, 1);
+		if (conn < 1)
+			continue;
+		if (!spec->multiout.dig_out_nid)
+			spec->multiout.dig_out_nid = nid;
+		else {
+			spec->slave_dig_outs[0] = nid;
+			break; /* at most two dig outs */
+		}
+	}
+}
+
 static int vt1708S_parse_auto_config(struct hda_codec *codec)
 {
 	struct via_spec *spec = codec->spec;
 	int err;
-	static hda_nid_t vt1708s_ignore[] = {0x21, 0};
 
-	err = snd_hda_parse_pin_def_config(codec, &spec->autocfg,
-					   vt1708s_ignore);
+	err = snd_hda_parse_pin_def_config(codec, &spec->autocfg, NULL);
 	if (err < 0)
 		return err;
 	err = vt1708S_auto_fill_dac_nids(spec, &spec->autocfg);
@@ -2833,10 +2831,7 @@
 
 	spec->multiout.max_channels = spec->multiout.num_dacs * 2;
 
-	if (spec->autocfg.dig_outs)
-		spec->multiout.dig_out_nid = VT1708S_DIGOUT_NID;
-
-	spec->extra_dig_out_nid = 0x15;
+	fill_dig_outs(codec);
 
 	if (spec->kctls.list)
 		spec->mixers[spec->num_mixers++] = spec->kctls.list;
@@ -3000,7 +2995,8 @@
 	.ops = {
 		.open = via_dig_playback_pcm_open,
 		.close = via_dig_playback_pcm_close,
-		.prepare = via_dig_playback_pcm_prepare
+		.prepare = via_dig_playback_pcm_prepare,
+		.cleanup = via_dig_playback_pcm_cleanup
 	},
 };
 
@@ -3128,10 +3124,8 @@
 {
 	struct via_spec *spec = codec->spec;
 	int err;
-	static hda_nid_t vt1702_ignore[] = {0x1C, 0};
 
-	err = snd_hda_parse_pin_def_config(codec, &spec->autocfg,
-					   vt1702_ignore);
+	err = snd_hda_parse_pin_def_config(codec, &spec->autocfg, NULL);
 	if (err < 0)
 		return err;
 	err = vt1702_auto_fill_dac_nids(spec, &spec->autocfg);
@@ -3152,10 +3146,7 @@
 
 	spec->multiout.max_channels = spec->multiout.num_dacs * 2;
 
-	if (spec->autocfg.dig_outs)
-		spec->multiout.dig_out_nid = VT1702_DIGOUT_NID;
-
-	spec->extra_dig_out_nid = 0x1B;
+	fill_dig_outs(codec);
 
 	if (spec->kctls.list)
 		spec->mixers[spec->num_mixers++] = spec->kctls.list;
diff --git a/sound/pci/ice1712/Makefile b/sound/pci/ice1712/Makefile
index f99fe08..536eae2 100644
--- a/sound/pci/ice1712/Makefile
+++ b/sound/pci/ice1712/Makefile
@@ -5,7 +5,7 @@
 
 snd-ice17xx-ak4xxx-objs := ak4xxx.o
 snd-ice1712-objs := ice1712.o delta.o hoontech.o ews.o
-snd-ice1724-objs := ice1724.o amp.o revo.o aureon.o vt1720_mobo.o pontis.o prodigy192.o prodigy_hifi.o juli.o phase.o wtm.o se.o
+snd-ice1724-objs := ice1724.o amp.o revo.o aureon.o vt1720_mobo.o pontis.o prodigy192.o prodigy_hifi.o juli.o phase.o wtm.o se.o maya44.o
 
 # Toplevel Module Dependency
 obj-$(CONFIG_SND_ICE1712) += snd-ice1712.o snd-ice17xx-ak4xxx.o
diff --git a/sound/pci/ice1712/ice1712.h b/sound/pci/ice1712/ice1712.h
index fdae6de..adc909e 100644
--- a/sound/pci/ice1712/ice1712.h
+++ b/sound/pci/ice1712/ice1712.h
@@ -335,6 +335,7 @@
 	unsigned int force_rdma1:1;	/* VT1720/4 - RDMA1 as non-spdif */
 	unsigned int midi_output:1;	/* VT1720/4: MIDI output triggered */
 	unsigned int midi_input:1;	/* VT1720/4: MIDI input triggered */
+	unsigned int own_routing:1;	/* VT1720/4: use own routing ctls */
 	unsigned int num_total_dacs;	/* total DACs */
 	unsigned int num_total_adcs;	/* total ADCs */
 	unsigned int cur_rate;		/* current rate */
@@ -458,10 +459,17 @@
 	return  snd_ice1712_gpio_read(ice) & mask;
 }
 
+/* route access functions */
+int snd_ice1724_get_route_val(struct snd_ice1712 *ice, int shift);
+int snd_ice1724_put_route_val(struct snd_ice1712 *ice, unsigned int val,
+								int shift);
+
 int snd_ice1712_spdif_build_controls(struct snd_ice1712 *ice);
 
-int snd_ice1712_akm4xxx_init(struct snd_akm4xxx *ak, const struct snd_akm4xxx *template,
-			     const struct snd_ak4xxx_private *priv, struct snd_ice1712 *ice);
+int snd_ice1712_akm4xxx_init(struct snd_akm4xxx *ak,
+			     const struct snd_akm4xxx *template,
+			     const struct snd_ak4xxx_private *priv,
+			     struct snd_ice1712 *ice);
 void snd_ice1712_akm4xxx_free(struct snd_ice1712 *ice);
 int snd_ice1712_akm4xxx_build_controls(struct snd_ice1712 *ice);
 
diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c
index 128510e7..36ade77 100644
--- a/sound/pci/ice1712/ice1724.c
+++ b/sound/pci/ice1712/ice1724.c
@@ -49,6 +49,7 @@
 #include "prodigy192.h"
 #include "prodigy_hifi.h"
 #include "juli.h"
+#include "maya44.h"
 #include "phase.h"
 #include "wtm.h"
 #include "se.h"
@@ -65,6 +66,7 @@
 	       PRODIGY192_DEVICE_DESC
 	       PRODIGY_HIFI_DEVICE_DESC
 	       JULI_DEVICE_DESC
+	       MAYA44_DEVICE_DESC
 	       PHASE_DEVICE_DESC
 	       WTM_DEVICE_DESC
 	       SE_DEVICE_DESC
@@ -626,7 +628,7 @@
 	return 0;
 }
 
-static void snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate,
+static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate,
 				    int force)
 {
 	unsigned long flags;
@@ -634,17 +636,18 @@
 	unsigned int i, old_rate;
 
 	if (rate > ice->hw_rates->list[ice->hw_rates->count - 1])
-		return;
+		return -EINVAL;
+
 	spin_lock_irqsave(&ice->reg_lock, flags);
 	if ((inb(ICEMT1724(ice, DMA_CONTROL)) & DMA_STARTS) ||
 	    (inb(ICEMT1724(ice, DMA_PAUSE)) & DMA_PAUSES)) {
 		/* running? we cannot change the rate now... */
 		spin_unlock_irqrestore(&ice->reg_lock, flags);
-		return;
+		return -EBUSY;
 	}
 	if (!force && is_pro_rate_locked(ice)) {
 		spin_unlock_irqrestore(&ice->reg_lock, flags);
-		return;
+		return (rate == ice->cur_rate) ? 0 : -EBUSY;
 	}
 
 	old_rate = ice->get_rate(ice);
@@ -652,7 +655,7 @@
 		ice->set_rate(ice, rate);
 	else if (rate == ice->cur_rate) {
 		spin_unlock_irqrestore(&ice->reg_lock, flags);
-		return;
+		return 0;
 	}
 
 	ice->cur_rate = rate;
@@ -674,13 +677,15 @@
 	}
 	if (ice->spdif.ops.setup_rate)
 		ice->spdif.ops.setup_rate(ice, rate);
+
+	return 0;
 }
 
 static int snd_vt1724_pcm_hw_params(struct snd_pcm_substream *substream,
 				    struct snd_pcm_hw_params *hw_params)
 {
 	struct snd_ice1712 *ice = snd_pcm_substream_chip(substream);
-	int i, chs;
+	int i, chs, err;
 
 	chs = params_channels(hw_params);
 	mutex_lock(&ice->open_mutex);
@@ -715,7 +720,11 @@
 		}
 	}
 	mutex_unlock(&ice->open_mutex);
-	snd_vt1724_set_pro_rate(ice, params_rate(hw_params), 0);
+
+	err = snd_vt1724_set_pro_rate(ice, params_rate(hw_params), 0);
+	if (err < 0)
+		return err;
+
 	return snd_pcm_lib_malloc_pages(substream, params_buffer_bytes(hw_params));
 }
 
@@ -848,20 +857,39 @@
 #endif
 }
 
-static const struct vt1724_pcm_reg vt1724_playback_pro_reg = {
+static const struct vt1724_pcm_reg vt1724_pdma0_reg = {
 	.addr = VT1724_MT_PLAYBACK_ADDR,
 	.size = VT1724_MT_PLAYBACK_SIZE,
 	.count = VT1724_MT_PLAYBACK_COUNT,
 	.start = VT1724_PDMA0_START,
 };
 
-static const struct vt1724_pcm_reg vt1724_capture_pro_reg = {
+static const struct vt1724_pcm_reg vt1724_pdma4_reg = {
+	.addr = VT1724_MT_PDMA4_ADDR,
+	.size = VT1724_MT_PDMA4_SIZE,
+	.count = VT1724_MT_PDMA4_COUNT,
+	.start = VT1724_PDMA4_START,
+};
+
+static const struct vt1724_pcm_reg vt1724_rdma0_reg = {
 	.addr = VT1724_MT_CAPTURE_ADDR,
 	.size = VT1724_MT_CAPTURE_SIZE,
 	.count = VT1724_MT_CAPTURE_COUNT,
 	.start = VT1724_RDMA0_START,
 };
 
+static const struct vt1724_pcm_reg vt1724_rdma1_reg = {
+	.addr = VT1724_MT_RDMA1_ADDR,
+	.size = VT1724_MT_RDMA1_SIZE,
+	.count = VT1724_MT_RDMA1_COUNT,
+	.start = VT1724_RDMA1_START,
+};
+
+#define vt1724_playback_pro_reg vt1724_pdma0_reg
+#define vt1724_playback_spdif_reg vt1724_pdma4_reg
+#define vt1724_capture_pro_reg vt1724_rdma0_reg
+#define vt1724_capture_spdif_reg vt1724_rdma1_reg
+
 static const struct snd_pcm_hardware snd_vt1724_playback_pro = {
 	.info =			(SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED |
 				 SNDRV_PCM_INFO_BLOCK_TRANSFER |
@@ -1077,20 +1105,6 @@
  * SPDIF PCM
  */
 
-static const struct vt1724_pcm_reg vt1724_playback_spdif_reg = {
-	.addr = VT1724_MT_PDMA4_ADDR,
-	.size = VT1724_MT_PDMA4_SIZE,
-	.count = VT1724_MT_PDMA4_COUNT,
-	.start = VT1724_PDMA4_START,
-};
-
-static const struct vt1724_pcm_reg vt1724_capture_spdif_reg = {
-	.addr = VT1724_MT_RDMA1_ADDR,
-	.size = VT1724_MT_RDMA1_SIZE,
-	.count = VT1724_MT_RDMA1_COUNT,
-	.start = VT1724_RDMA1_START,
-};
-
 /* update spdif control bits; call with reg_lock */
 static void update_spdif_bits(struct snd_ice1712 *ice, unsigned int val)
 {
@@ -1963,7 +1977,7 @@
 	return idx * 3;
 }
 
-static int get_route_val(struct snd_ice1712 *ice, int shift)
+int snd_ice1724_get_route_val(struct snd_ice1712 *ice, int shift)
 {
 	unsigned long val;
 	unsigned char eitem;
@@ -1982,7 +1996,8 @@
 	return eitem;
 }
 
-static int put_route_val(struct snd_ice1712 *ice, unsigned int val, int shift)
+int snd_ice1724_put_route_val(struct snd_ice1712 *ice, unsigned int val,
+								int shift)
 {
 	unsigned int old_val, nval;
 	int change;
@@ -2010,7 +2025,7 @@
 	struct snd_ice1712 *ice = snd_kcontrol_chip(kcontrol);
 	int idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
 	ucontrol->value.enumerated.item[0] =
-		get_route_val(ice, analog_route_shift(idx));
+		snd_ice1724_get_route_val(ice, analog_route_shift(idx));
 	return 0;
 }
 
@@ -2019,8 +2034,9 @@
 {
 	struct snd_ice1712 *ice = snd_kcontrol_chip(kcontrol);
 	int idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
-	return put_route_val(ice, ucontrol->value.enumerated.item[0],
-			     analog_route_shift(idx));
+	return snd_ice1724_put_route_val(ice,
+					 ucontrol->value.enumerated.item[0],
+					 analog_route_shift(idx));
 }
 
 static int snd_vt1724_pro_route_spdif_get(struct snd_kcontrol *kcontrol,
@@ -2029,7 +2045,7 @@
 	struct snd_ice1712 *ice = snd_kcontrol_chip(kcontrol);
 	int idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
 	ucontrol->value.enumerated.item[0] =
-		get_route_val(ice, digital_route_shift(idx));
+		snd_ice1724_get_route_val(ice, digital_route_shift(idx));
 	return 0;
 }
 
@@ -2038,11 +2054,13 @@
 {
 	struct snd_ice1712 *ice = snd_kcontrol_chip(kcontrol);
 	int idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
-	return put_route_val(ice, ucontrol->value.enumerated.item[0],
-			     digital_route_shift(idx));
+	return snd_ice1724_put_route_val(ice,
+					 ucontrol->value.enumerated.item[0],
+					 digital_route_shift(idx));
 }
 
-static struct snd_kcontrol_new snd_vt1724_mixer_pro_analog_route __devinitdata = {
+static struct snd_kcontrol_new snd_vt1724_mixer_pro_analog_route __devinitdata =
+{
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "H/W Playback Route",
 	.info = snd_vt1724_pro_route_info,
@@ -2109,6 +2127,7 @@
 	snd_vt1724_prodigy_hifi_cards,
 	snd_vt1724_prodigy192_cards,
 	snd_vt1724_juli_cards,
+	snd_vt1724_maya44_cards,
 	snd_vt1724_phase_cards,
 	snd_vt1724_wtm_cards,
 	snd_vt1724_se_cards,
@@ -2246,8 +2265,10 @@
 static void __devinit snd_vt1724_chip_reset(struct snd_ice1712 *ice)
 {
 	outb(VT1724_RESET , ICEREG1724(ice, CONTROL));
+	inb(ICEREG1724(ice, CONTROL)); /* pci posting flush */
 	msleep(10);
 	outb(0, ICEREG1724(ice, CONTROL));
+	inb(ICEREG1724(ice, CONTROL)); /* pci posting flush */
 	msleep(10);
 }
 
@@ -2277,9 +2298,12 @@
 	if (snd_BUG_ON(!ice->pcm))
 		return -EIO;
 
-	err = snd_ctl_add(ice->card, snd_ctl_new1(&snd_vt1724_mixer_pro_spdif_route, ice));
-	if (err < 0)
-		return err;
+	if (!ice->own_routing) {
+		err = snd_ctl_add(ice->card,
+			snd_ctl_new1(&snd_vt1724_mixer_pro_spdif_route, ice));
+		if (err < 0)
+			return err;
+	}
 
 	err = snd_ctl_add(ice->card, snd_ctl_new1(&snd_vt1724_spdif_switch, ice));
 	if (err < 0)
@@ -2326,7 +2350,7 @@
 	if (err < 0)
 		return err;
 
-	if (ice->num_total_dacs > 0) {
+	if (!ice->own_routing && ice->num_total_dacs > 0) {
 		struct snd_kcontrol_new tmp = snd_vt1724_mixer_pro_analog_route;
 		tmp.count = ice->num_total_dacs;
 		if (ice->vt1720 && tmp.count > 2)
diff --git a/sound/pci/ice1712/maya44.c b/sound/pci/ice1712/maya44.c
new file mode 100644
index 0000000..3e1c20a
--- /dev/null
+++ b/sound/pci/ice1712/maya44.c
@@ -0,0 +1,779 @@
+/*
+ *   ALSA driver for ICEnsemble VT1724 (Envy24HT)
+ *
+ *   Lowlevel functions for ESI Maya44 cards
+ *
+ *	Copyright (c) 2009 Takashi Iwai <tiwai@suse.de>
+ *	Based on the patches by Rainer Zimmermann <mail@lightshed.de>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <sound/core.h>
+#include <sound/control.h>
+#include <sound/pcm.h>
+#include <sound/tlv.h>
+
+#include "ice1712.h"
+#include "envy24ht.h"
+#include "maya44.h"
+
+/* WM8776 register indexes */
+#define WM8776_REG_HEADPHONE_L		0x00
+#define WM8776_REG_HEADPHONE_R		0x01
+#define WM8776_REG_HEADPHONE_MASTER	0x02
+#define WM8776_REG_DAC_ATTEN_L		0x03
+#define WM8776_REG_DAC_ATTEN_R		0x04
+#define WM8776_REG_DAC_ATTEN_MASTER	0x05
+#define WM8776_REG_DAC_PHASE		0x06
+#define WM8776_REG_DAC_CONTROL		0x07
+#define WM8776_REG_DAC_MUTE		0x08
+#define WM8776_REG_DAC_DEEMPH		0x09
+#define WM8776_REG_DAC_IF_CONTROL	0x0a
+#define WM8776_REG_ADC_IF_CONTROL	0x0b
+#define WM8776_REG_MASTER_MODE_CONTROL	0x0c
+#define WM8776_REG_POWERDOWN		0x0d
+#define WM8776_REG_ADC_ATTEN_L		0x0e
+#define WM8776_REG_ADC_ATTEN_R		0x0f
+#define WM8776_REG_ADC_ALC1		0x10
+#define WM8776_REG_ADC_ALC2		0x11
+#define WM8776_REG_ADC_ALC3		0x12
+#define WM8776_REG_ADC_NOISE_GATE	0x13
+#define WM8776_REG_ADC_LIMITER		0x14
+#define WM8776_REG_ADC_MUX		0x15
+#define WM8776_REG_OUTPUT_MUX		0x16
+#define WM8776_REG_RESET		0x17
+
+#define WM8776_NUM_REGS			0x18
+
+/* clock ratio identifiers for snd_wm8776_set_rate() */
+#define WM8776_CLOCK_RATIO_128FS	0
+#define WM8776_CLOCK_RATIO_192FS	1
+#define WM8776_CLOCK_RATIO_256FS	2
+#define WM8776_CLOCK_RATIO_384FS	3
+#define WM8776_CLOCK_RATIO_512FS	4
+#define WM8776_CLOCK_RATIO_768FS	5
+
+enum { WM_VOL_HP, WM_VOL_DAC, WM_VOL_ADC, WM_NUM_VOLS };
+enum { WM_SW_DAC, WM_SW_BYPASS, WM_NUM_SWITCHES };
+
+struct snd_wm8776 {
+	unsigned char addr;
+	unsigned short regs[WM8776_NUM_REGS];
+	unsigned char volumes[WM_NUM_VOLS][2];
+	unsigned int switch_bits;
+};
+
+struct snd_maya44 {
+	struct snd_ice1712 *ice;
+	struct snd_wm8776 wm[2];
+	struct mutex mutex;
+};
+
+
+/* write the given register and save the data to the cache */
+static void wm8776_write(struct snd_ice1712 *ice, struct snd_wm8776 *wm,
+			 unsigned char reg, unsigned short val)
+{
+	/*
+	 * WM8776 registers are up to 9 bits wide, bit 8 is placed in the LSB
+	 * of the address field
+	 */
+	snd_vt1724_write_i2c(ice, wm->addr,
+			     (reg << 1) | ((val >> 8) & 1),
+			     val & 0xff);
+	wm->regs[reg] = val;
+}
+
+/*
+ * update the given register with and/or mask and save the data to the cache
+ */
+static int wm8776_write_bits(struct snd_ice1712 *ice, struct snd_wm8776 *wm,
+			     unsigned char reg,
+			     unsigned short mask, unsigned short val)
+{
+	val |= wm->regs[reg] & ~mask;
+	if (val != wm->regs[reg]) {
+		wm8776_write(ice, wm, reg, val);
+		return 1;
+	}
+	return 0;
+}
+
+
+/*
+ * WM8776 volume controls
+ */
+
+struct maya_vol_info {
+	unsigned int maxval;		/* volume range: 0..maxval */
+	unsigned char regs[2];		/* left and right registers */
+	unsigned short mask;		/* value mask */
+	unsigned short offset;		/* zero-value offset */
+	unsigned short mute;		/* mute bit */
+	unsigned short update;		/* update bits */
+	unsigned char mux_bits[2];	/* extra bits for ADC mute */
+};
+
+static struct maya_vol_info vol_info[WM_NUM_VOLS] = {
+	[WM_VOL_HP] = {
+		.maxval = 80,
+		.regs = { WM8776_REG_HEADPHONE_L, WM8776_REG_HEADPHONE_R },
+		.mask = 0x7f,
+		.offset = 0x30,
+		.mute = 0x00,
+		.update = 0x180,	/* update and zero-cross enable */
+	},
+	[WM_VOL_DAC] = {
+		.maxval = 255,
+		.regs = { WM8776_REG_DAC_ATTEN_L, WM8776_REG_DAC_ATTEN_R },
+		.mask = 0xff,
+		.offset = 0x01,
+		.mute = 0x00,
+		.update = 0x100,	/* zero-cross enable */
+	},
+	[WM_VOL_ADC] = {
+		.maxval = 91,
+		.regs = { WM8776_REG_ADC_ATTEN_L, WM8776_REG_ADC_ATTEN_R },
+		.mask = 0xff,
+		.offset = 0xa5,
+		.mute = 0xa5,
+		.update = 0x100,	/* update */
+		.mux_bits = { 0x80, 0x40 }, /* ADCMUX bits */
+	},
+};
+
+/*
+ * dB tables
+ */
+/* headphone output: mute, -73..+6db (1db step) */
+static const DECLARE_TLV_DB_SCALE(db_scale_hp, -7400, 100, 1);
+/* DAC output: mute, -127..0db (0.5db step) */
+static const DECLARE_TLV_DB_SCALE(db_scale_dac, -12750, 50, 1);
+/* ADC gain: mute, -21..+24db (0.5db step) */
+static const DECLARE_TLV_DB_SCALE(db_scale_adc, -2100, 50, 1);
+
+static int maya_vol_info(struct snd_kcontrol *kcontrol,
+			 struct snd_ctl_elem_info *uinfo)
+{
+	unsigned int idx = kcontrol->private_value;
+	struct maya_vol_info *vol = &vol_info[idx];
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+	uinfo->count = 2;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = vol->maxval;
+	return 0;
+}
+
+static int maya_vol_get(struct snd_kcontrol *kcontrol,
+			struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	struct snd_wm8776 *wm =
+		&chip->wm[snd_ctl_get_ioff(kcontrol, &ucontrol->id)];
+	unsigned int idx = kcontrol->private_value;
+
+	mutex_lock(&chip->mutex);
+	ucontrol->value.integer.value[0] = wm->volumes[idx][0];
+	ucontrol->value.integer.value[1] = wm->volumes[idx][1];
+	mutex_unlock(&chip->mutex);
+	return 0;
+}
+
+static int maya_vol_put(struct snd_kcontrol *kcontrol,
+			struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	struct snd_wm8776 *wm =
+		&chip->wm[snd_ctl_get_ioff(kcontrol, &ucontrol->id)];
+	unsigned int idx = kcontrol->private_value;
+	struct maya_vol_info *vol = &vol_info[idx];
+	unsigned int val, data;
+	int ch, changed = 0;
+
+	mutex_lock(&chip->mutex);
+	for (ch = 0; ch < 2; ch++) {
+		val = ucontrol->value.integer.value[ch];
+		if (val > vol->maxval)
+			val = vol->maxval;
+		if (val == wm->volumes[idx][ch])
+			continue;
+		if (!val)
+			data = vol->mute;
+		else
+			data = (val - 1) + vol->offset;
+		data |= vol->update;
+		changed |= wm8776_write_bits(chip->ice, wm, vol->regs[ch],
+					     vol->mask | vol->update, data);
+		if (vol->mux_bits[ch])
+			wm8776_write_bits(chip->ice, wm, WM8776_REG_ADC_MUX,
+					  vol->mux_bits[ch],
+					  val ? 0 : vol->mux_bits[ch]);
+		wm->volumes[idx][ch] = val;
+	}
+	mutex_unlock(&chip->mutex);
+	return changed;
+}
+
+/*
+ * WM8776 switch controls
+ */
+
+#define COMPOSE_SW_VAL(idx, reg, mask)	((idx) | ((reg) << 8) | ((mask) << 16))
+#define GET_SW_VAL_IDX(val)	((val) & 0xff)
+#define GET_SW_VAL_REG(val)	(((val) >> 8) & 0xff)
+#define GET_SW_VAL_MASK(val)	(((val) >> 16) & 0xff)
+
+#define maya_sw_info	snd_ctl_boolean_mono_info
+
+static int maya_sw_get(struct snd_kcontrol *kcontrol,
+		       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	struct snd_wm8776 *wm =
+		&chip->wm[snd_ctl_get_ioff(kcontrol, &ucontrol->id)];
+	unsigned int idx = GET_SW_VAL_IDX(kcontrol->private_value);
+
+	ucontrol->value.integer.value[0] = (wm->switch_bits >> idx) & 1;
+	return 0;
+}
+
+static int maya_sw_put(struct snd_kcontrol *kcontrol,
+		       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	struct snd_wm8776 *wm =
+		&chip->wm[snd_ctl_get_ioff(kcontrol, &ucontrol->id)];
+	unsigned int idx = GET_SW_VAL_IDX(kcontrol->private_value);
+	unsigned int mask, val;
+	int changed;
+
+	mutex_lock(&chip->mutex);
+	mask = 1 << idx;
+	wm->switch_bits &= ~mask;
+	val = ucontrol->value.integer.value[0];
+	if (val)
+		wm->switch_bits |= mask;
+	mask = GET_SW_VAL_MASK(kcontrol->private_value);
+	changed = wm8776_write_bits(chip->ice, wm,
+				    GET_SW_VAL_REG(kcontrol->private_value),
+				    mask, val ? mask : 0);
+	mutex_unlock(&chip->mutex);
+	return changed;
+}
+
+/*
+ * GPIO pins (known ones for maya44)
+ */
+#define GPIO_PHANTOM_OFF	2
+#define GPIO_MIC_RELAY		4
+#define GPIO_SPDIF_IN_INV	5
+#define GPIO_MUST_BE_0		7
+
+/*
+ * GPIO switch controls
+ */
+
+#define COMPOSE_GPIO_VAL(shift, inv)	((shift) | ((inv) << 8))
+#define GET_GPIO_VAL_SHIFT(val)		((val) & 0xff)
+#define GET_GPIO_VAL_INV(val)		(((val) >> 8) & 1)
+
+static int maya_set_gpio_bits(struct snd_ice1712 *ice, unsigned int mask,
+			      unsigned int bits)
+{
+	unsigned int data;
+	data = snd_ice1712_gpio_read(ice);
+	if ((data & mask) == bits)
+		return 0;
+	snd_ice1712_gpio_write(ice, (data & ~mask) | bits);
+	return 1;
+}
+
+#define maya_gpio_sw_info	snd_ctl_boolean_mono_info
+
+static int maya_gpio_sw_get(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	unsigned int shift = GET_GPIO_VAL_SHIFT(kcontrol->private_value);
+	unsigned int val;
+
+	val = (snd_ice1712_gpio_read(chip->ice) >> shift) & 1;
+	if (GET_GPIO_VAL_INV(kcontrol->private_value))
+		val = !val;
+	ucontrol->value.integer.value[0] = val;
+	return 0;
+}
+
+static int maya_gpio_sw_put(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	unsigned int shift = GET_GPIO_VAL_SHIFT(kcontrol->private_value);
+	unsigned int val, mask;
+	int changed;
+
+	mutex_lock(&chip->mutex);
+	mask = 1 << shift;
+	val = ucontrol->value.integer.value[0];
+	if (GET_GPIO_VAL_INV(kcontrol->private_value))
+		val = !val;
+	val = val ? mask : 0;
+	changed = maya_set_gpio_bits(chip->ice, mask, val);
+	mutex_unlock(&chip->mutex);
+	return changed;
+}
+
+/*
+ * capture source selection
+ */
+
+/* known working input slots (0-4) */
+#define MAYA_LINE_IN	1	/* in-2 */
+#define MAYA_MIC_IN	4	/* in-5 */
+
+static void wm8776_select_input(struct snd_maya44 *chip, int idx, int line)
+{
+	wm8776_write_bits(chip->ice, &chip->wm[idx], WM8776_REG_ADC_MUX,
+			  0x1f, 1 << line);
+}
+
+static int maya_rec_src_info(struct snd_kcontrol *kcontrol,
+			     struct snd_ctl_elem_info *uinfo)
+{
+	static char *texts[] = { "Line", "Mic" };
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->count = 1;
+	uinfo->value.enumerated.items = ARRAY_SIZE(texts);
+	if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items)
+		uinfo->value.enumerated.item =
+			uinfo->value.enumerated.items - 1;
+	strcpy(uinfo->value.enumerated.name,
+	       texts[uinfo->value.enumerated.item]);
+	return 0;
+}
+
+static int maya_rec_src_get(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	int sel;
+
+	if (snd_ice1712_gpio_read(chip->ice) & (1 << GPIO_MIC_RELAY))
+		sel = 1;
+	else
+		sel = 0;
+	ucontrol->value.enumerated.item[0] = sel;
+	return 0;
+}
+
+static int maya_rec_src_put(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	int sel = ucontrol->value.enumerated.item[0];
+	int changed;
+
+	mutex_lock(&chip->mutex);
+	changed = maya_set_gpio_bits(chip->ice, GPIO_MIC_RELAY,
+				     sel ? GPIO_MIC_RELAY : 0);
+	wm8776_select_input(chip, 0, sel ? MAYA_MIC_IN : MAYA_LINE_IN);
+	mutex_unlock(&chip->mutex);
+	return changed;
+}
+
+/*
+ * Maya44 routing switch settings have different meanings than the standard
+ * ice1724 switches as defined in snd_vt1724_pro_route_info (ice1724.c).
+ */
+static int maya_pb_route_info(struct snd_kcontrol *kcontrol,
+			      struct snd_ctl_elem_info *uinfo)
+{
+	static char *texts[] = {
+		"PCM Out", /* 0 */
+		"Input 1", "Input 2", "Input 3", "Input 4"
+	};
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->count = 1;
+	uinfo->value.enumerated.items = ARRAY_SIZE(texts);
+	if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items)
+		uinfo->value.enumerated.item =
+			uinfo->value.enumerated.items - 1;
+	strcpy(uinfo->value.enumerated.name,
+	       texts[uinfo->value.enumerated.item]);
+	return 0;
+}
+
+static int maya_pb_route_shift(int idx)
+{
+	static const unsigned char shift[10] =
+		{ 8, 20, 0, 3, 11, 23, 14, 26, 17, 29 };
+	return shift[idx % 10];
+}
+
+static int maya_pb_route_get(struct snd_kcontrol *kcontrol,
+			     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	int idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
+	ucontrol->value.enumerated.item[0] =
+		snd_ice1724_get_route_val(chip->ice, maya_pb_route_shift(idx));
+	return 0;
+}
+
+static int maya_pb_route_put(struct snd_kcontrol *kcontrol,
+			     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	int idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
+	return snd_ice1724_put_route_val(chip->ice,
+					 ucontrol->value.enumerated.item[0],
+					 maya_pb_route_shift(idx));
+}
+
+
+/*
+ * controls to be added
+ */
+
+static struct snd_kcontrol_new maya_controls[] __devinitdata = {
+	{
+		.name = "Crossmix Playback Volume",
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			SNDRV_CTL_ELEM_ACCESS_TLV_READ,
+		.info = maya_vol_info,
+		.get = maya_vol_get,
+		.put = maya_vol_put,
+		.tlv = { .p = db_scale_hp },
+		.private_value = WM_VOL_HP,
+		.count = 2,
+	},
+	{
+		.name = "PCM Playback Volume",
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			SNDRV_CTL_ELEM_ACCESS_TLV_READ,
+		.info = maya_vol_info,
+		.get = maya_vol_get,
+		.put = maya_vol_put,
+		.tlv = { .p = db_scale_dac },
+		.private_value = WM_VOL_DAC,
+		.count = 2,
+	},
+	{
+		.name = "Line Capture Volume",
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			SNDRV_CTL_ELEM_ACCESS_TLV_READ,
+		.info = maya_vol_info,
+		.get = maya_vol_get,
+		.put = maya_vol_put,
+		.tlv = { .p = db_scale_adc },
+		.private_value = WM_VOL_ADC,
+		.count = 2,
+	},
+	{
+		.name = "PCM Playback Switch",
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.info = maya_sw_info,
+		.get = maya_sw_get,
+		.put = maya_sw_put,
+		.private_value = COMPOSE_SW_VAL(WM_SW_DAC,
+						WM8776_REG_OUTPUT_MUX, 0x01),
+		.count = 2,
+	},
+	{
+		.name = "Bypass Playback Switch",
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.info = maya_sw_info,
+		.get = maya_sw_get,
+		.put = maya_sw_put,
+		.private_value = COMPOSE_SW_VAL(WM_SW_BYPASS,
+						WM8776_REG_OUTPUT_MUX, 0x04),
+		.count = 2,
+	},
+	{
+		.name = "Capture Source",
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.info = maya_rec_src_info,
+		.get = maya_rec_src_get,
+		.put = maya_rec_src_put,
+	},
+	{
+		.name = "Mic Phantom Power Switch",
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.info = maya_gpio_sw_info,
+		.get = maya_gpio_sw_get,
+		.put = maya_gpio_sw_put,
+		.private_value = COMPOSE_GPIO_VAL(GPIO_PHANTOM_OFF, 1),
+	},
+	{
+		.name = "SPDIF Capture Switch",
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.info = maya_gpio_sw_info,
+		.get = maya_gpio_sw_get,
+		.put = maya_gpio_sw_put,
+		.private_value = COMPOSE_GPIO_VAL(GPIO_SPDIF_IN_INV, 1),
+	},
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.name = "H/W Playback Route",
+		.info = maya_pb_route_info,
+		.get = maya_pb_route_get,
+		.put = maya_pb_route_put,
+		.count = 4,  /* FIXME: do controls 5-9 have any meaning? */
+	},
+};
+
+static int __devinit maya44_add_controls(struct snd_ice1712 *ice)
+{
+	int err, i;
+
+	for (i = 0; i < ARRAY_SIZE(maya_controls); i++) {
+		err = snd_ctl_add(ice->card, snd_ctl_new1(&maya_controls[i],
+							  ice->spec));
+		if (err < 0)
+			return err;
+	}
+	return 0;
+}
+
+
+/*
+ * initialize a wm8776 chip
+ */
+static void __devinit wm8776_init(struct snd_ice1712 *ice,
+				  struct snd_wm8776 *wm, unsigned int addr)
+{
+	static const unsigned short inits_wm8776[] = {
+		0x02, 0x100, /* R2: headphone L+R muted + update */
+		0x05, 0x100, /* R5: DAC output L+R muted + update */
+		0x06, 0x000, /* R6: DAC output phase normal */
+		0x07, 0x091, /* R7: DAC enable zero cross detection,
+				normal output */
+		0x08, 0x000, /* R8: DAC soft mute off */
+		0x09, 0x000, /* R9: no deemph, DAC zero detect disabled */
+		0x0a, 0x022, /* R10: DAC I2C mode, std polarities, 24bit */
+		0x0b, 0x022, /* R11: ADC I2C mode, std polarities, 24bit,
+				highpass filter enabled */
+		0x0c, 0x042, /* R12: ADC+DAC slave, ADC+DAC 44,1kHz */
+		0x0d, 0x000, /* R13: all power up */
+		0x0e, 0x100, /* R14: ADC left muted,
+				enable zero cross detection */
+		0x0f, 0x100, /* R15: ADC right muted,
+				enable zero cross detection */
+			     /* R16: ALC...*/
+		0x11, 0x000, /* R17: disable ALC */
+			     /* R18: ALC...*/
+			     /* R19: noise gate...*/
+		0x15, 0x000, /* R21: ADC input mux init, mute all inputs */
+		0x16, 0x001, /* R22: output mux, select DAC */
+		0xff, 0xff
+	};
+
+	const unsigned short *ptr;
+	unsigned char reg;
+	unsigned short data;
+
+	wm->addr = addr;
+	/* enable DAC output; mute bypass, aux & all inputs */
+	wm->switch_bits = (1 << WM_SW_DAC);
+
+	ptr = inits_wm8776;
+	while (*ptr != 0xff) {
+		reg = *ptr++;
+		data = *ptr++;
+		wm8776_write(ice, wm, reg, data);
+	}
+}
+
+
+/*
+ * change the rate on the WM8776 codecs.
+ * this assumes that the VT17xx's rate is changed by the calling function.
+ * NOTE: even though the WM8776's are running in slave mode and rate
+ * selection is automatic, we need to call snd_wm8776_set_rate() here
+ * to make sure some flags are set correctly.
+ */
+static void set_rate(struct snd_ice1712 *ice, unsigned int rate)
+{
+	struct snd_maya44 *chip = ice->spec;
+	unsigned int ratio, adc_ratio, val;
+	int i;
+
+	switch (rate) {
+	case 192000:
+		ratio = WM8776_CLOCK_RATIO_128FS;
+		break;
+	case 176400:
+		ratio = WM8776_CLOCK_RATIO_128FS;
+		break;
+	case 96000:
+		ratio = WM8776_CLOCK_RATIO_256FS;
+		break;
+	case 88200:
+		ratio = WM8776_CLOCK_RATIO_384FS;
+		break;
+	case 48000:
+		ratio = WM8776_CLOCK_RATIO_512FS;
+		break;
+	case 44100:
+		ratio = WM8776_CLOCK_RATIO_512FS;
+		break;
+	case 32000:
+		ratio = WM8776_CLOCK_RATIO_768FS;
+		break;
+	case 0:
+		/* no hint - S/PDIF input is master, simply return */
+		return;
+	default:
+		snd_BUG();
+		return;
+	}
+
+	/*
+	 * this currently sets the same rate for ADC and DAC, but limits
+	 * ADC rate to 256X (96kHz). For 256X mode (96kHz), this sets ADC
+	 * oversampling to 64x, as recommended by WM8776 datasheet.
+	 * Setting the rate is not really necessary in slave mode.
+	 */
+	adc_ratio = ratio;
+	if (adc_ratio < WM8776_CLOCK_RATIO_256FS)
+		adc_ratio = WM8776_CLOCK_RATIO_256FS;
+
+	val = adc_ratio;
+	if (adc_ratio == WM8776_CLOCK_RATIO_256FS)
+		val |= 8;
+	val |= ratio << 4;
+
+	mutex_lock(&chip->mutex);
+	for (i = 0; i < 2; i++)
+		wm8776_write_bits(ice, &chip->wm[i],
+				  WM8776_REG_MASTER_MODE_CONTROL,
+				  0x180, val);
+	mutex_unlock(&chip->mutex);
+}
+
+/*
+ * supported sample rates (to override the default one)
+ */
+
+static unsigned int rates[] = {
+	32000, 44100, 48000, 64000, 88200, 96000, 176400, 192000
+};
+
+/* playback rates: 32..192 kHz */
+static struct snd_pcm_hw_constraint_list dac_rates = {
+	.count = ARRAY_SIZE(rates),
+	.list = rates,
+	.mask = 0
+};
+
+
+/*
+ * chip addresses on I2C bus
+ */
+static unsigned char wm8776_addr[2] __devinitdata = {
+	0x34, 0x36, /* codec 0 & 1 */
+};
+
+/*
+ * initialize the chip
+ */
+static int __devinit maya44_init(struct snd_ice1712 *ice)
+{
+	int i;
+	struct snd_maya44 *chip;
+
+	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+	mutex_init(&chip->mutex);
+	chip->ice = ice;
+	ice->spec = chip;
+
+	/* initialise codecs */
+	ice->num_total_dacs = 4;
+	ice->num_total_adcs = 4;
+	ice->akm_codecs = 0;
+
+	for (i = 0; i < 2; i++) {
+		wm8776_init(ice, &chip->wm[i], wm8776_addr[i]);
+		wm8776_select_input(chip, i, MAYA_LINE_IN);
+	}
+
+	/* set card specific rates */
+	ice->hw_rates = &dac_rates;
+
+	/* register change rate notifier */
+	ice->gpio.set_pro_rate = set_rate;
+
+	/* RDMA1 (2nd input channel) is used for ADC by default */
+	ice->force_rdma1 = 1;
+
+	/* have an own routing control */
+	ice->own_routing = 1;
+
+	return 0;
+}
+
+
+/*
+ * Maya44 boards don't provide the EEPROM data except for the vendor IDs.
+ * hence the driver needs to sets up it properly.
+ */
+
+static unsigned char maya44_eeprom[] __devinitdata = {
+	[ICE_EEP2_SYSCONF]     = 0x45,
+		/* clock xin1=49.152MHz, mpu401, 2 stereo ADCs+DACs */
+	[ICE_EEP2_ACLINK]      = 0x80,
+		/* I2S */
+	[ICE_EEP2_I2S]         = 0xf8,
+		/* vol, 96k, 24bit, 192k */
+	[ICE_EEP2_SPDIF]       = 0xc3,
+		/* enable spdif out, spdif out supp, spdif-in, ext spdif out */
+	[ICE_EEP2_GPIO_DIR]    = 0xff,
+	[ICE_EEP2_GPIO_DIR1]   = 0xff,
+	[ICE_EEP2_GPIO_DIR2]   = 0xff,
+	[ICE_EEP2_GPIO_MASK]   = 0/*0x9f*/,
+	[ICE_EEP2_GPIO_MASK1]  = 0/*0xff*/,
+	[ICE_EEP2_GPIO_MASK2]  = 0/*0x7f*/,
+	[ICE_EEP2_GPIO_STATE]  = (1 << GPIO_PHANTOM_OFF) |
+			(1 << GPIO_SPDIF_IN_INV),
+	[ICE_EEP2_GPIO_STATE1] = 0x00,
+	[ICE_EEP2_GPIO_STATE2] = 0x00,
+};
+
+/* entry point */
+struct snd_ice1712_card_info snd_vt1724_maya44_cards[] __devinitdata = {
+	{
+		.subvendor = VT1724_SUBDEVICE_MAYA44,
+		.name = "ESI Maya44",
+		.model = "maya44",
+		.chip_init = maya44_init,
+		.build_controls = maya44_add_controls,
+		.eeprom_size = sizeof(maya44_eeprom),
+		.eeprom_data = maya44_eeprom,
+	},
+	{ } /* terminator */
+};
diff --git a/sound/pci/ice1712/maya44.h b/sound/pci/ice1712/maya44.h
new file mode 100644
index 0000000..eafd03a
--- /dev/null
+++ b/sound/pci/ice1712/maya44.h
@@ -0,0 +1,10 @@
+#ifndef __SOUND_MAYA44_H
+#define __SOUND_MAYA44_H
+
+#define MAYA44_DEVICE_DESC		"{ESI,Maya44},"
+
+#define VT1724_SUBDEVICE_MAYA44		0x34315441	/* Maya44 */
+
+extern struct snd_ice1712_card_info  snd_vt1724_maya44_cards[];
+
+#endif	/* __SOUND_MAYA44_H */
diff --git a/sound/pci/lx6464es/Makefile b/sound/pci/lx6464es/Makefile
new file mode 100644
index 0000000..eb04a6c
--- /dev/null
+++ b/sound/pci/lx6464es/Makefile
@@ -0,0 +1,2 @@
+snd-lx6464es-objs := lx6464es.o lx_core.o
+obj-$(CONFIG_SND_LX6464ES) += snd-lx6464es.o
diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c
new file mode 100644
index 0000000..ccf1b38
--- /dev/null
+++ b/sound/pci/lx6464es/lx6464es.c
@@ -0,0 +1,1159 @@
+/* -*- linux-c -*- *
+ *
+ * ALSA driver for the digigram lx6464es interface
+ *
+ * Copyright (c) 2008, 2009 Tim Blechmann <tim@klingt.org>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include <sound/initval.h>
+#include <sound/control.h>
+#include <sound/info.h>
+
+#include "lx6464es.h"
+
+MODULE_AUTHOR("Tim Blechmann");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("digigram lx6464es");
+MODULE_SUPPORTED_DEVICE("{digigram lx6464es{}}");
+
+
+static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
+static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
+static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
+
+module_param_array(index, int, NULL, 0444);
+MODULE_PARM_DESC(index, "Index value for Digigram LX6464ES interface.");
+module_param_array(id, charp, NULL, 0444);
+MODULE_PARM_DESC(id, "ID string for  Digigram LX6464ES interface.");
+module_param_array(enable, bool, NULL, 0444);
+MODULE_PARM_DESC(enable, "Enable/disable specific Digigram LX6464ES soundcards.");
+
+static const char card_name[] = "LX6464ES";
+
+
+#define PCI_DEVICE_ID_PLX_LX6464ES		PCI_DEVICE_ID_PLX_9056
+
+static struct pci_device_id snd_lx6464es_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_LX6464ES),
+	  .subvendor = PCI_VENDOR_ID_DIGIGRAM,
+	  .subdevice = PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_SERIAL_SUBSYSTEM
+	},			/* LX6464ES */
+	{ PCI_DEVICE(PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_LX6464ES),
+	  .subvendor = PCI_VENDOR_ID_DIGIGRAM,
+	  .subdevice = PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_CAE_SERIAL_SUBSYSTEM
+	},			/* LX6464ES-CAE */
+	{ 0, },
+};
+
+MODULE_DEVICE_TABLE(pci, snd_lx6464es_ids);
+
+
+
+/* PGO pour USERo dans le registre pci_0x06/loc_0xEC */
+#define CHIPSC_RESET_XILINX (1L<<16)
+
+
+/* alsa callbacks */
+static struct snd_pcm_hardware lx_caps = {
+	.info             = (SNDRV_PCM_INFO_MMAP |
+			     SNDRV_PCM_INFO_INTERLEAVED |
+			     SNDRV_PCM_INFO_MMAP_VALID |
+			     SNDRV_PCM_INFO_SYNC_START),
+	.formats	  = (SNDRV_PCM_FMTBIT_S16_LE |
+			     SNDRV_PCM_FMTBIT_S16_BE |
+			     SNDRV_PCM_FMTBIT_S24_3LE |
+			     SNDRV_PCM_FMTBIT_S24_3BE),
+	.rates            = (SNDRV_PCM_RATE_CONTINUOUS |
+			     SNDRV_PCM_RATE_8000_192000),
+	.rate_min         = 8000,
+	.rate_max         = 192000,
+	.channels_min     = 2,
+	.channels_max     = 64,
+	.buffer_bytes_max = 64*2*3*MICROBLAZE_IBL_MAX*MAX_STREAM_BUFFER,
+	.period_bytes_min = (2*2*MICROBLAZE_IBL_MIN*2),
+	.period_bytes_max = (4*64*MICROBLAZE_IBL_MAX*MAX_STREAM_BUFFER),
+	.periods_min      = 2,
+	.periods_max      = MAX_STREAM_BUFFER,
+};
+
+static int lx_set_granularity(struct lx6464es *chip, u32 gran);
+
+
+static int lx_hardware_open(struct lx6464es *chip,
+			    struct snd_pcm_substream *substream)
+{
+	int err = 0;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	int channels = runtime->channels;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_pcm_uframes_t period_size = runtime->period_size;
+
+	snd_printd(LXP "allocating pipe for %d channels\n", channels);
+	err = lx_pipe_allocate(chip, 0, is_capture, channels);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "allocating pipe failed\n");
+		return err;
+	}
+
+	err = lx_set_granularity(chip, period_size);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "setting granularity to %ld failed\n",
+			   period_size);
+		return err;
+	}
+
+	return 0;
+}
+
+static int lx_hardware_start(struct lx6464es *chip,
+			     struct snd_pcm_substream *substream)
+{
+	int err = 0;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_printd(LXP "setting stream format\n");
+	err = lx_stream_set_format(chip, runtime, 0, is_capture);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "setting stream format failed\n");
+		return err;
+	}
+
+	snd_printd(LXP "starting pipe\n");
+	err = lx_pipe_start(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "starting pipe failed\n");
+		return err;
+	}
+
+	snd_printd(LXP "waiting for pipe to start\n");
+	err = lx_pipe_wait_for_start(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "waiting for pipe failed\n");
+		return err;
+	}
+
+	return err;
+}
+
+
+static int lx_hardware_stop(struct lx6464es *chip,
+			    struct snd_pcm_substream *substream)
+{
+	int err = 0;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_printd(LXP "pausing pipe\n");
+	err = lx_pipe_pause(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "pausing pipe failed\n");
+		return err;
+	}
+
+	snd_printd(LXP "waiting for pipe to become idle\n");
+	err = lx_pipe_wait_for_idle(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "waiting for pipe failed\n");
+		return err;
+	}
+
+	snd_printd(LXP "stopping pipe\n");
+	err = lx_pipe_stop(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(LXP "stopping pipe failed\n");
+		return err;
+	}
+
+	return err;
+}
+
+
+static int lx_hardware_close(struct lx6464es *chip,
+			     struct snd_pcm_substream *substream)
+{
+	int err = 0;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_printd(LXP "releasing pipe\n");
+	err = lx_pipe_release(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(LXP "releasing pipe failed\n");
+		return err;
+	}
+
+	return err;
+}
+
+
+static int lx_pcm_open(struct snd_pcm_substream *substream)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	int err = 0;
+	int board_rate;
+
+	snd_printdd("->lx_pcm_open\n");
+	mutex_lock(&chip->setup_mutex);
+
+	/* copy the struct snd_pcm_hardware struct */
+	runtime->hw = lx_caps;
+
+#if 0
+	/* buffer-size should better be multiple of period-size */
+	err = snd_pcm_hw_constraint_integer(runtime,
+					    SNDRV_PCM_HW_PARAM_PERIODS);
+	if (err < 0) {
+		snd_printk(KERN_WARNING LXP "could not constrain periods\n");
+		goto exit;
+	}
+#endif
+
+	/* the clock rate cannot be changed */
+	board_rate = chip->board_sample_rate;
+	err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_RATE,
+					   board_rate, board_rate);
+
+	if (err < 0) {
+		snd_printk(KERN_WARNING LXP "could not constrain periods\n");
+		goto exit;
+	}
+
+	/* constrain period size */
+	err = snd_pcm_hw_constraint_minmax(runtime,
+					   SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
+					   MICROBLAZE_IBL_MIN,
+					   MICROBLAZE_IBL_MAX);
+	if (err < 0) {
+		snd_printk(KERN_WARNING LXP
+			   "could not constrain period size\n");
+		goto exit;
+	}
+
+	snd_pcm_hw_constraint_step(runtime, 0,
+				   SNDRV_PCM_HW_PARAM_BUFFER_SIZE, 32);
+
+	snd_pcm_set_sync(substream);
+	err = 0;
+
+exit:
+	runtime->private_data = chip;
+
+	mutex_unlock(&chip->setup_mutex);
+	snd_printdd("<-lx_pcm_open, %d\n", err);
+	return err;
+}
+
+static int lx_pcm_close(struct snd_pcm_substream *substream)
+{
+	int err = 0;
+	snd_printdd("->lx_pcm_close\n");
+	return err;
+}
+
+static snd_pcm_uframes_t lx_pcm_stream_pointer(struct snd_pcm_substream
+					       *substream)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	snd_pcm_uframes_t pos;
+	unsigned long flags;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	struct lx_stream *lx_stream = is_capture ? &chip->capture_stream :
+		&chip->playback_stream;
+
+	snd_printdd("->lx_pcm_stream_pointer\n");
+
+	spin_lock_irqsave(&chip->lock, flags);
+	pos = lx_stream->frame_pos * substream->runtime->period_size;
+	spin_unlock_irqrestore(&chip->lock, flags);
+
+	snd_printdd(LXP "stream_pointer at %ld\n", pos);
+	return pos;
+}
+
+static int lx_pcm_prepare(struct snd_pcm_substream *substream)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	int err = 0;
+	const int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_printdd("->lx_pcm_prepare\n");
+
+	mutex_lock(&chip->setup_mutex);
+
+	if (chip->hardware_running[is_capture]) {
+		err = lx_hardware_stop(chip, substream);
+		if (err < 0) {
+			snd_printk(KERN_ERR LXP "failed to stop hardware. "
+				   "Error code %d\n", err);
+			goto exit;
+		}
+
+		err = lx_hardware_close(chip, substream);
+		if (err < 0) {
+			snd_printk(KERN_ERR LXP "failed to close hardware. "
+				   "Error code %d\n", err);
+			goto exit;
+		}
+	}
+
+	snd_printd(LXP "opening hardware\n");
+	err = lx_hardware_open(chip, substream);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "failed to open hardware. "
+			   "Error code %d\n", err);
+		goto exit;
+	}
+
+	err = lx_hardware_start(chip, substream);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "failed to start hardware. "
+			   "Error code %d\n", err);
+		goto exit;
+	}
+
+	chip->hardware_running[is_capture] = 1;
+
+	if (chip->board_sample_rate != substream->runtime->rate) {
+		if (!err)
+			chip->board_sample_rate = substream->runtime->rate;
+	}
+
+exit:
+	mutex_unlock(&chip->setup_mutex);
+	return err;
+}
+
+static int lx_pcm_hw_params(struct snd_pcm_substream *substream,
+			    struct snd_pcm_hw_params *hw_params, int is_capture)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	int err = 0;
+
+	snd_printdd("->lx_pcm_hw_params\n");
+
+	mutex_lock(&chip->setup_mutex);
+
+	/* set dma buffer */
+	err = snd_pcm_lib_malloc_pages(substream,
+				       params_buffer_bytes(hw_params));
+
+	if (is_capture)
+		chip->capture_stream.stream = substream;
+	else
+		chip->playback_stream.stream = substream;
+
+	mutex_unlock(&chip->setup_mutex);
+	return err;
+}
+
+static int lx_pcm_hw_params_playback(struct snd_pcm_substream *substream,
+				 struct snd_pcm_hw_params *hw_params)
+{
+	return lx_pcm_hw_params(substream, hw_params, 0);
+}
+
+static int lx_pcm_hw_params_capture(struct snd_pcm_substream *substream,
+				 struct snd_pcm_hw_params *hw_params)
+{
+	return lx_pcm_hw_params(substream, hw_params, 1);
+}
+
+static int lx_pcm_hw_free(struct snd_pcm_substream *substream)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	int err = 0;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_printdd("->lx_pcm_hw_free\n");
+	mutex_lock(&chip->setup_mutex);
+
+	if (chip->hardware_running[is_capture]) {
+		err = lx_hardware_stop(chip, substream);
+		if (err < 0) {
+			snd_printk(KERN_ERR LXP "failed to stop hardware. "
+				   "Error code %d\n", err);
+			goto exit;
+		}
+
+		err = lx_hardware_close(chip, substream);
+		if (err < 0) {
+			snd_printk(KERN_ERR LXP "failed to close hardware. "
+				   "Error code %d\n", err);
+			goto exit;
+		}
+
+		chip->hardware_running[is_capture] = 0;
+	}
+
+	err = snd_pcm_lib_free_pages(substream);
+
+	if (is_capture)
+		chip->capture_stream.stream = 0;
+	else
+		chip->playback_stream.stream = 0;
+
+exit:
+	mutex_unlock(&chip->setup_mutex);
+	return err;
+}
+
+static void lx_trigger_start(struct lx6464es *chip, struct lx_stream *lx_stream)
+{
+	struct snd_pcm_substream *substream = lx_stream->stream;
+	const int is_capture = lx_stream->is_capture;
+
+	int err;
+
+	const u32 channels = substream->runtime->channels;
+	const u32 bytes_per_frame = channels * 3;
+	const u32 period_size = substream->runtime->period_size;
+	const u32 periods = substream->runtime->periods;
+	const u32 period_bytes = period_size * bytes_per_frame;
+
+	dma_addr_t buf = substream->dma_buffer.addr;
+	int i;
+
+	u32 needed, freed;
+	u32 size_array[5];
+
+	for (i = 0; i != periods; ++i) {
+		u32 buffer_index = 0;
+
+		err = lx_buffer_ask(chip, 0, is_capture, &needed, &freed,
+				    size_array);
+		snd_printdd(LXP "starting: needed %d, freed %d\n",
+			    needed, freed);
+
+		err = lx_buffer_give(chip, 0, is_capture, period_bytes,
+				     lower_32_bits(buf), upper_32_bits(buf),
+				     &buffer_index);
+
+		snd_printdd(LXP "starting: buffer index %x on %p (%d bytes)\n",
+			    buffer_index, (void *)buf, period_bytes);
+		buf += period_bytes;
+	}
+
+	err = lx_buffer_ask(chip, 0, is_capture, &needed, &freed, size_array);
+	snd_printdd(LXP "starting: needed %d, freed %d\n", needed, freed);
+
+	snd_printd(LXP "starting: starting stream\n");
+	err = lx_stream_start(chip, 0, is_capture);
+	if (err < 0)
+		snd_printk(KERN_ERR LXP "couldn't start stream\n");
+	else
+		lx_stream->status = LX_STREAM_STATUS_RUNNING;
+
+	lx_stream->frame_pos = 0;
+}
+
+static void lx_trigger_stop(struct lx6464es *chip, struct lx_stream *lx_stream)
+{
+	const int is_capture = lx_stream->is_capture;
+	int err;
+
+	snd_printd(LXP "stopping: stopping stream\n");
+	err = lx_stream_stop(chip, 0, is_capture);
+	if (err < 0)
+		snd_printk(KERN_ERR LXP "couldn't stop stream\n");
+	else
+		lx_stream->status = LX_STREAM_STATUS_FREE;
+
+}
+
+static void lx_trigger_tasklet_dispatch_stream(struct lx6464es *chip,
+					       struct lx_stream *lx_stream)
+{
+	switch (lx_stream->status) {
+	case LX_STREAM_STATUS_SCHEDULE_RUN:
+		lx_trigger_start(chip, lx_stream);
+		break;
+
+	case LX_STREAM_STATUS_SCHEDULE_STOP:
+		lx_trigger_stop(chip, lx_stream);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void lx_trigger_tasklet(unsigned long data)
+{
+	struct lx6464es *chip = (struct lx6464es *)data;
+	unsigned long flags;
+
+	snd_printdd("->lx_trigger_tasklet\n");
+
+	spin_lock_irqsave(&chip->lock, flags);
+	lx_trigger_tasklet_dispatch_stream(chip, &chip->capture_stream);
+	lx_trigger_tasklet_dispatch_stream(chip, &chip->playback_stream);
+	spin_unlock_irqrestore(&chip->lock, flags);
+}
+
+static int lx_pcm_trigger_dispatch(struct lx6464es *chip,
+				   struct lx_stream *lx_stream, int cmd)
+{
+	int err = 0;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+		lx_stream->status = LX_STREAM_STATUS_SCHEDULE_RUN;
+		break;
+
+	case SNDRV_PCM_TRIGGER_STOP:
+		lx_stream->status = LX_STREAM_STATUS_SCHEDULE_STOP;
+		break;
+
+	default:
+		err = -EINVAL;
+		goto exit;
+	}
+	tasklet_schedule(&chip->trigger_tasklet);
+
+exit:
+	return err;
+}
+
+
+static int lx_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	const int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+	struct lx_stream *stream = is_capture ? &chip->capture_stream :
+		&chip->playback_stream;
+
+	snd_printdd("->lx_pcm_trigger\n");
+
+	return lx_pcm_trigger_dispatch(chip, stream, cmd);
+}
+
+static int snd_lx6464es_free(struct lx6464es *chip)
+{
+	snd_printdd("->snd_lx6464es_free\n");
+
+	lx_irq_disable(chip);
+
+	if (chip->irq >= 0)
+		free_irq(chip->irq, chip);
+
+	iounmap(chip->port_dsp_bar);
+	ioport_unmap(chip->port_plx_remapped);
+
+	pci_release_regions(chip->pci);
+	pci_disable_device(chip->pci);
+
+	kfree(chip);
+
+	return 0;
+}
+
+static int snd_lx6464es_dev_free(struct snd_device *device)
+{
+	return snd_lx6464es_free(device->device_data);
+}
+
+/* reset the dsp during initialization */
+static int __devinit lx_init_xilinx_reset(struct lx6464es *chip)
+{
+	int i;
+	u32 plx_reg = lx_plx_reg_read(chip, ePLX_CHIPSC);
+
+	snd_printdd("->lx_init_xilinx_reset\n");
+
+	/* activate reset of xilinx */
+	plx_reg &= ~CHIPSC_RESET_XILINX;
+
+	lx_plx_reg_write(chip, ePLX_CHIPSC, plx_reg);
+	msleep(1);
+
+	lx_plx_reg_write(chip, ePLX_MBOX3, 0);
+	msleep(1);
+
+	plx_reg |= CHIPSC_RESET_XILINX;
+	lx_plx_reg_write(chip, ePLX_CHIPSC, plx_reg);
+
+	/* deactivate reset of xilinx */
+	for (i = 0; i != 100; ++i) {
+		u32 reg_mbox3;
+		msleep(10);
+		reg_mbox3 = lx_plx_reg_read(chip, ePLX_MBOX3);
+		if (reg_mbox3) {
+			snd_printd(LXP "xilinx reset done\n");
+			snd_printdd(LXP "xilinx took %d loops\n", i);
+			break;
+		}
+	}
+
+	/* todo: add some error handling? */
+
+	/* clear mr */
+	lx_dsp_reg_write(chip, eReg_CSM, 0);
+
+	/* le xilinx ES peut ne pas etre encore pret, on attend. */
+	msleep(600);
+
+	return 0;
+}
+
+static int __devinit lx_init_xilinx_test(struct lx6464es *chip)
+{
+	u32 reg;
+
+	snd_printdd("->lx_init_xilinx_test\n");
+
+	/* TEST if we have access to Xilinx/MicroBlaze */
+	lx_dsp_reg_write(chip, eReg_CSM, 0);
+
+	reg = lx_dsp_reg_read(chip, eReg_CSM);
+
+	if (reg) {
+		snd_printk(KERN_ERR LXP "Problem: Reg_CSM %x.\n", reg);
+
+		/* PCI9056_SPACE0_REMAP */
+		lx_plx_reg_write(chip, ePLX_PCICR, 1);
+
+		reg = lx_dsp_reg_read(chip, eReg_CSM);
+		if (reg) {
+			snd_printk(KERN_ERR LXP "Error: Reg_CSM %x.\n", reg);
+			return -EAGAIN; /* seems to be appropriate */
+		}
+	}
+
+	snd_printd(LXP "Xilinx/MicroBlaze access test successful\n");
+
+	return 0;
+}
+
+/* initialize ethersound */
+static int __devinit lx_init_ethersound_config(struct lx6464es *chip)
+{
+	int i;
+	u32 orig_conf_es = lx_dsp_reg_read(chip, eReg_CONFES);
+
+	u32 default_conf_es = (64 << IOCR_OUTPUTS_OFFSET) |
+		(64 << IOCR_INPUTS_OFFSET) |
+		(FREQ_RATIO_SINGLE_MODE << FREQ_RATIO_OFFSET);
+
+	u32 conf_es = (orig_conf_es & CONFES_READ_PART_MASK)
+		| (default_conf_es & CONFES_WRITE_PART_MASK);
+
+	snd_printdd("->lx_init_ethersound\n");
+
+	chip->freq_ratio = FREQ_RATIO_SINGLE_MODE;
+
+	/*
+	 * write it to the card !
+	 * this actually kicks the ES xilinx, the first time since poweron.
+	 * the MAC address in the Reg_ADMACESMSB Reg_ADMACESLSB registers
+	 * is not ready before this is done, and the bit 2 in Reg_CSES is set.
+	 * */
+	lx_dsp_reg_write(chip, eReg_CONFES, conf_es);
+
+	for (i = 0; i != 1000; ++i) {
+		if (lx_dsp_reg_read(chip, eReg_CSES) & 4) {
+			snd_printd(LXP "ethersound initialized after %dms\n",
+				   i);
+			goto ethersound_initialized;
+		}
+		msleep(1);
+	}
+	snd_printk(KERN_WARNING LXP
+		   "ethersound could not be initialized after %dms\n", i);
+	return -ETIMEDOUT;
+
+ ethersound_initialized:
+	snd_printd(LXP "ethersound initialized\n");
+	return 0;
+}
+
+static int __devinit lx_init_get_version_features(struct lx6464es *chip)
+{
+	u32 dsp_version;
+
+	int err;
+
+	snd_printdd("->lx_init_get_version_features\n");
+
+	err = lx_dsp_get_version(chip, &dsp_version);
+
+	if (err == 0) {
+		u32 freq;
+
+		snd_printk(LXP "DSP version: V%02d.%02d #%d\n",
+			   (dsp_version>>16) & 0xff, (dsp_version>>8) & 0xff,
+			   dsp_version & 0xff);
+
+		/* later: what firmware version do we expect? */
+
+		/* retrieve Play/Rec features */
+		/* done here because we may have to handle alternate
+		 * DSP files. */
+		/* later */
+
+		/* init the EtherSound sample rate */
+		err = lx_dsp_get_clock_frequency(chip, &freq);
+		if (err == 0)
+			chip->board_sample_rate = freq;
+		snd_printd(LXP "actual clock frequency %d\n", freq);
+	} else {
+		snd_printk(KERN_ERR LXP "DSP corrupted \n");
+		err = -EAGAIN;
+	}
+
+	return err;
+}
+
+static int lx_set_granularity(struct lx6464es *chip, u32 gran)
+{
+	int err = 0;
+	u32 snapped_gran = MICROBLAZE_IBL_MIN;
+
+	snd_printdd("->lx_set_granularity\n");
+
+	/* blocksize is a power of 2 */
+	while ((snapped_gran < gran) &&
+	       (snapped_gran < MICROBLAZE_IBL_MAX)) {
+		snapped_gran *= 2;
+	}
+
+	if (snapped_gran == chip->pcm_granularity)
+		return 0;
+
+	err = lx_dsp_set_granularity(chip, snapped_gran);
+	if (err < 0) {
+		snd_printk(KERN_WARNING LXP "could not set granularity\n");
+		err = -EAGAIN;
+	}
+
+	if (snapped_gran != gran)
+		snd_printk(LXP "snapped blocksize to %d\n", snapped_gran);
+
+	snd_printd(LXP "set blocksize on board %d\n", snapped_gran);
+	chip->pcm_granularity = snapped_gran;
+
+	return err;
+}
+
+/* initialize and test the xilinx dsp chip */
+static int __devinit lx_init_dsp(struct lx6464es *chip)
+{
+	int err;
+	u8 mac_address[6];
+	int i;
+
+	snd_printdd("->lx_init_dsp\n");
+
+	snd_printd(LXP "initialize board\n");
+	err = lx_init_xilinx_reset(chip);
+	if (err)
+		return err;
+
+	snd_printd(LXP "testing board\n");
+	err = lx_init_xilinx_test(chip);
+	if (err)
+		return err;
+
+	snd_printd(LXP "initialize ethersound configuration\n");
+	err = lx_init_ethersound_config(chip);
+	if (err)
+		return err;
+
+	lx_irq_enable(chip);
+
+	/** \todo the mac address should be ready by not, but it isn't,
+	 *  so we wait for it */
+	for (i = 0; i != 1000; ++i) {
+		err = lx_dsp_get_mac(chip, mac_address);
+		if (err)
+			return err;
+		if (mac_address[0] || mac_address[1] || mac_address[2] ||
+		    mac_address[3] || mac_address[4] || mac_address[5])
+			goto mac_ready;
+		msleep(1);
+	}
+	return -ETIMEDOUT;
+
+mac_ready:
+	snd_printd(LXP "mac address ready read after: %dms\n", i);
+	snd_printk(LXP "mac address: %02X.%02X.%02X.%02X.%02X.%02X\n",
+		   mac_address[0], mac_address[1], mac_address[2],
+		   mac_address[3], mac_address[4], mac_address[5]);
+
+	err = lx_init_get_version_features(chip);
+	if (err)
+		return err;
+
+	lx_set_granularity(chip, MICROBLAZE_IBL_DEFAULT);
+
+	chip->playback_mute = 0;
+
+	return err;
+}
+
+static struct snd_pcm_ops lx_ops_playback = {
+	.open      = lx_pcm_open,
+	.close     = lx_pcm_close,
+	.ioctl     = snd_pcm_lib_ioctl,
+	.prepare   = lx_pcm_prepare,
+	.hw_params = lx_pcm_hw_params_playback,
+	.hw_free   = lx_pcm_hw_free,
+	.trigger   = lx_pcm_trigger,
+	.pointer   = lx_pcm_stream_pointer,
+};
+
+static struct snd_pcm_ops lx_ops_capture = {
+	.open      = lx_pcm_open,
+	.close     = lx_pcm_close,
+	.ioctl     = snd_pcm_lib_ioctl,
+	.prepare   = lx_pcm_prepare,
+	.hw_params = lx_pcm_hw_params_capture,
+	.hw_free   = lx_pcm_hw_free,
+	.trigger   = lx_pcm_trigger,
+	.pointer   = lx_pcm_stream_pointer,
+};
+
+static int __devinit lx_pcm_create(struct lx6464es *chip)
+{
+	int err;
+	struct snd_pcm *pcm;
+
+	u32 size = 64 *		     /* channels */
+		3 *		     /* 24 bit samples */
+		MAX_STREAM_BUFFER *  /* periods */
+		MICROBLAZE_IBL_MAX * /* frames per period */
+		2;		     /* duplex */
+
+	size = PAGE_ALIGN(size);
+
+	/* hardcoded device name & channel count */
+	err = snd_pcm_new(chip->card, (char *)card_name, 0,
+			  1, 1, &pcm);
+
+	pcm->private_data = chip;
+
+	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &lx_ops_playback);
+	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &lx_ops_capture);
+
+	pcm->info_flags = 0;
+	strcpy(pcm->name, card_name);
+
+	err = snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
+						    snd_dma_pci_data(chip->pci),
+						    size, size);
+	if (err < 0)
+		return err;
+
+	chip->pcm = pcm;
+	chip->capture_stream.is_capture = 1;
+
+	return 0;
+}
+
+static int lx_control_playback_info(struct snd_kcontrol *kcontrol,
+				    struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 1;
+	return 0;
+}
+
+static int lx_control_playback_get(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_value *ucontrol)
+{
+	struct lx6464es *chip = snd_kcontrol_chip(kcontrol);
+	ucontrol->value.integer.value[0] = chip->playback_mute;
+	return 0;
+}
+
+static int lx_control_playback_put(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_value *ucontrol)
+{
+	struct lx6464es *chip = snd_kcontrol_chip(kcontrol);
+	int changed = 0;
+	int current_value = chip->playback_mute;
+
+	if (current_value != ucontrol->value.integer.value[0]) {
+		lx_level_unmute(chip, 0, !current_value);
+		chip->playback_mute = !current_value;
+		changed = 1;
+	}
+	return changed;
+}
+
+static struct snd_kcontrol_new lx_control_playback_switch __devinitdata = {
+	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.name = "PCM Playback Switch",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.private_value = 0,
+	.info = lx_control_playback_info,
+	.get = lx_control_playback_get,
+	.put = lx_control_playback_put
+};
+
+
+
+static void lx_proc_levels_read(struct snd_info_entry *entry,
+				struct snd_info_buffer *buffer)
+{
+	u32 levels[64];
+	int err;
+	int i, j;
+	struct lx6464es *chip = entry->private_data;
+
+	snd_iprintf(buffer, "capture levels:\n");
+	err = lx_level_peaks(chip, 1, 64, levels);
+	if (err < 0)
+		return;
+
+	for (i = 0; i != 8; ++i) {
+		for (j = 0; j != 8; ++j)
+			snd_iprintf(buffer, "%08x ", levels[i*8+j]);
+		snd_iprintf(buffer, "\n");
+	}
+
+	snd_iprintf(buffer, "\nplayback levels:\n");
+
+	err = lx_level_peaks(chip, 0, 64, levels);
+	if (err < 0)
+		return;
+
+	for (i = 0; i != 8; ++i) {
+		for (j = 0; j != 8; ++j)
+			snd_iprintf(buffer, "%08x ", levels[i*8+j]);
+		snd_iprintf(buffer, "\n");
+	}
+
+	snd_iprintf(buffer, "\n");
+}
+
+static int __devinit lx_proc_create(struct snd_card *card, struct lx6464es *chip)
+{
+	struct snd_info_entry *entry;
+	int err = snd_card_proc_new(card, "levels", &entry);
+	if (err < 0)
+		return err;
+
+	snd_info_set_text_ops(entry, chip, lx_proc_levels_read);
+	return 0;
+}
+
+
+static int __devinit snd_lx6464es_create(struct snd_card *card,
+					 struct pci_dev *pci,
+					 struct lx6464es **rchip)
+{
+	struct lx6464es *chip;
+	int err;
+
+	static struct snd_device_ops ops = {
+		.dev_free = snd_lx6464es_dev_free,
+	};
+
+	snd_printdd("->snd_lx6464es_create\n");
+
+	*rchip = NULL;
+
+	/* enable PCI device */
+	err = pci_enable_device(pci);
+	if (err < 0)
+		return err;
+
+	pci_set_master(pci);
+
+	/* check if we can restrict PCI DMA transfers to 32 bits */
+	err = pci_set_dma_mask(pci, DMA_32BIT_MASK);
+	if (err < 0) {
+		snd_printk(KERN_ERR "architecture does not support "
+			   "32bit PCI busmaster DMA\n");
+		pci_disable_device(pci);
+		return -ENXIO;
+	}
+
+	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+	if (chip == NULL) {
+		err = -ENOMEM;
+		goto alloc_failed;
+	}
+
+	chip->card = card;
+	chip->pci = pci;
+	chip->irq = -1;
+
+	/* initialize synchronization structs */
+	spin_lock_init(&chip->lock);
+	spin_lock_init(&chip->msg_lock);
+	mutex_init(&chip->setup_mutex);
+	tasklet_init(&chip->trigger_tasklet, lx_trigger_tasklet,
+		     (unsigned long)chip);
+	tasklet_init(&chip->tasklet_capture, lx_tasklet_capture,
+		     (unsigned long)chip);
+	tasklet_init(&chip->tasklet_playback, lx_tasklet_playback,
+		     (unsigned long)chip);
+
+	/* request resources */
+	err = pci_request_regions(pci, card_name);
+	if (err < 0)
+		goto request_regions_failed;
+
+	/* plx port */
+	chip->port_plx = pci_resource_start(pci, 1);
+	chip->port_plx_remapped = ioport_map(chip->port_plx,
+					     pci_resource_len(pci, 1));
+
+	/* dsp port */
+	chip->port_dsp_bar = pci_ioremap_bar(pci, 2);
+
+	err = request_irq(pci->irq, lx_interrupt, IRQF_SHARED,
+			  card_name, chip);
+	if (err) {
+		snd_printk(KERN_ERR LXP "unable to grab IRQ %d\n", pci->irq);
+		goto request_irq_failed;
+	}
+	chip->irq = pci->irq;
+
+	err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
+	if (err < 0)
+		goto device_new_failed;
+
+	err = lx_init_dsp(chip);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "error during DSP initialization\n");
+		return err;
+	}
+
+	err = lx_pcm_create(chip);
+	if (err < 0)
+		return err;
+
+	err = lx_proc_create(card, chip);
+	if (err < 0)
+		return err;
+
+	err = snd_ctl_add(card, snd_ctl_new1(&lx_control_playback_switch,
+					     chip));
+	if (err < 0)
+		return err;
+
+	snd_card_set_dev(card, &pci->dev);
+
+	*rchip = chip;
+	return 0;
+
+device_new_failed:
+	free_irq(pci->irq, chip);
+
+request_irq_failed:
+	pci_release_regions(pci);
+
+request_regions_failed:
+	kfree(chip);
+
+alloc_failed:
+	pci_disable_device(pci);
+
+	return err;
+}
+
+static int __devinit snd_lx6464es_probe(struct pci_dev *pci,
+					const struct pci_device_id *pci_id)
+{
+	static int dev;
+	struct snd_card *card;
+	struct lx6464es *chip;
+	int err;
+
+	snd_printdd("->snd_lx6464es_probe\n");
+
+	if (dev >= SNDRV_CARDS)
+		return -ENODEV;
+	if (!enable[dev]) {
+		dev++;
+		return -ENOENT;
+	}
+
+	err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card);
+	if (err < 0)
+		return err;
+
+	err = snd_lx6464es_create(card, pci, &chip);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "error during snd_lx6464es_create\n");
+		goto out_free;
+	}
+
+	strcpy(card->driver, "lx6464es");
+	strcpy(card->shortname, "Digigram LX6464ES");
+	sprintf(card->longname, "%s at 0x%lx, 0x%p, irq %i",
+		card->shortname, chip->port_plx,
+		chip->port_dsp_bar, chip->irq);
+
+	err = snd_card_register(card);
+	if (err < 0)
+		goto out_free;
+
+	snd_printdd(LXP "initialization successful\n");
+	pci_set_drvdata(pci, card);
+	dev++;
+	return 0;
+
+out_free:
+	snd_card_free(card);
+	return err;
+
+}
+
+static void __devexit snd_lx6464es_remove(struct pci_dev *pci)
+{
+	snd_card_free(pci_get_drvdata(pci));
+	pci_set_drvdata(pci, NULL);
+}
+
+
+static struct pci_driver driver = {
+	.name =     "Digigram LX6464ES",
+	.id_table = snd_lx6464es_ids,
+	.probe =    snd_lx6464es_probe,
+	.remove = __devexit_p(snd_lx6464es_remove),
+};
+
+
+/* module initialization */
+static int __init mod_init(void)
+{
+	return pci_register_driver(&driver);
+}
+
+static void __exit mod_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
+module_init(mod_init);
+module_exit(mod_exit);
diff --git a/sound/pci/lx6464es/lx6464es.h b/sound/pci/lx6464es/lx6464es.h
new file mode 100644
index 0000000..012c010
--- /dev/null
+++ b/sound/pci/lx6464es/lx6464es.h
@@ -0,0 +1,114 @@
+/* -*- linux-c -*- *
+ *
+ * ALSA driver for the digigram lx6464es interface
+ *
+ * Copyright (c) 2009 Tim Blechmann <tim@klingt.org>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#ifndef LX6464ES_H
+#define LX6464ES_H
+
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+
+#include <sound/core.h>
+#include <sound/pcm.h>
+
+#include "lx_core.h"
+
+#define LXP "LX6464ES: "
+
+enum {
+    ES_cmd_free         = 0,    /* no command executing */
+    ES_cmd_processing   = 1,	/* execution of a read/write command */
+    ES_read_pending     = 2,    /* a asynchron read command is pending */
+    ES_read_finishing   = 3,    /* a read command has finished waiting (set by
+				 * Interrupt or CancelIrp) */
+};
+
+enum lx_stream_status {
+	LX_STREAM_STATUS_FREE,
+/* 	LX_STREAM_STATUS_OPEN, */
+	LX_STREAM_STATUS_SCHEDULE_RUN,
+/* 	LX_STREAM_STATUS_STARTED, */
+	LX_STREAM_STATUS_RUNNING,
+	LX_STREAM_STATUS_SCHEDULE_STOP,
+/* 	LX_STREAM_STATUS_STOPPED, */
+/* 	LX_STREAM_STATUS_PAUSED */
+};
+
+
+struct lx_stream {
+	struct snd_pcm_substream  *stream;
+	snd_pcm_uframes_t          frame_pos;
+	enum lx_stream_status      status; /* free, open, running, draining
+					    * pause */
+	int                        is_capture:1;
+};
+
+
+struct lx6464es {
+	struct snd_card        *card;
+	struct pci_dev         *pci;
+	int			irq;
+
+	spinlock_t		lock;        /* interrupt spinlock */
+	struct mutex            setup_mutex; /* mutex used in hw_params, open
+					      * and close */
+
+	struct tasklet_struct   trigger_tasklet; /* trigger tasklet */
+	struct tasklet_struct   tasklet_capture;
+	struct tasklet_struct   tasklet_playback;
+
+	/* ports */
+	unsigned long		port_plx;	   /* io port (size=256) */
+	void __iomem           *port_plx_remapped; /* remapped plx port */
+	void __iomem           *port_dsp_bar;      /* memory port (32-bit,
+						    * non-prefetchable,
+						    * size=8K) */
+
+	/* messaging */
+	spinlock_t		msg_lock;          /* message spinlock */
+	atomic_t	        send_message_locked;
+	struct lx_rmh           rmh;
+
+	/* configuration */
+	uint			freq_ratio : 2;
+	uint                    playback_mute : 1;
+	uint                    hardware_running[2];
+	u32                     board_sample_rate; /* sample rate read from
+						    * board */
+	u32                     sample_rate;	   /* our sample rate */
+	u16                     pcm_granularity;   /* board blocksize */
+
+	/* dma */
+	struct snd_dma_buffer   capture_dma_buf;
+	struct snd_dma_buffer   playback_dma_buf;
+
+	/* pcm */
+	struct snd_pcm         *pcm;
+
+	/* streams */
+	struct lx_stream        capture_stream;
+	struct lx_stream        playback_stream;
+};
+
+
+#endif /* LX6464ES_H */
diff --git a/sound/pci/lx6464es/lx_core.c b/sound/pci/lx6464es/lx_core.c
new file mode 100644
index 0000000..5812780
--- /dev/null
+++ b/sound/pci/lx6464es/lx_core.c
@@ -0,0 +1,1444 @@
+/* -*- linux-c -*- *
+ *
+ * ALSA driver for the digigram lx6464es interface
+ * low-level interface
+ *
+ * Copyright (c) 2009 Tim Blechmann <tim@klingt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+/* #define RMH_DEBUG 1 */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "lx6464es.h"
+#include "lx_core.h"
+
+/* low-level register access */
+
+static const unsigned long dsp_port_offsets[] = {
+	0,
+	0x400,
+	0x401,
+	0x402,
+	0x403,
+	0x404,
+	0x405,
+	0x406,
+	0x407,
+	0x408,
+	0x409,
+	0x40a,
+	0x40b,
+	0x40c,
+
+	0x410,
+	0x411,
+	0x412,
+	0x413,
+	0x414,
+	0x415,
+	0x416,
+
+	0x420,
+	0x430,
+	0x431,
+	0x432,
+	0x433,
+	0x434,
+	0x440
+};
+
+static void __iomem *lx_dsp_register(struct lx6464es *chip, int port)
+{
+	void __iomem *base_address = chip->port_dsp_bar;
+	return base_address + dsp_port_offsets[port]*4;
+}
+
+unsigned long lx_dsp_reg_read(struct lx6464es *chip, int port)
+{
+	void __iomem *address = lx_dsp_register(chip, port);
+	return ioread32(address);
+}
+
+void lx_dsp_reg_readbuf(struct lx6464es *chip, int port, u32 *data, u32 len)
+{
+	void __iomem *address = lx_dsp_register(chip, port);
+	memcpy_fromio(data, address, len*sizeof(u32));
+}
+
+
+void lx_dsp_reg_write(struct lx6464es *chip, int port, unsigned data)
+{
+	void __iomem *address = lx_dsp_register(chip, port);
+	iowrite32(data, address);
+}
+
+void lx_dsp_reg_writebuf(struct lx6464es *chip, int port, const u32 *data,
+			 u32 len)
+{
+	void __iomem *address = lx_dsp_register(chip, port);
+	memcpy_toio(address, data, len*sizeof(u32));
+}
+
+
+static const unsigned long plx_port_offsets[] = {
+	0x04,
+	0x40,
+	0x44,
+	0x48,
+	0x4c,
+	0x50,
+	0x54,
+	0x58,
+	0x5c,
+	0x64,
+	0x68,
+	0x6C
+};
+
+static void __iomem *lx_plx_register(struct lx6464es *chip, int port)
+{
+	void __iomem *base_address = chip->port_plx_remapped;
+	return base_address + plx_port_offsets[port];
+}
+
+unsigned long lx_plx_reg_read(struct lx6464es *chip, int port)
+{
+	void __iomem *address = lx_plx_register(chip, port);
+	return ioread32(address);
+}
+
+void lx_plx_reg_write(struct lx6464es *chip, int port, u32 data)
+{
+	void __iomem *address = lx_plx_register(chip, port);
+	iowrite32(data, address);
+}
+
+u32 lx_plx_mbox_read(struct lx6464es *chip, int mbox_nr)
+{
+	int index;
+
+	switch (mbox_nr) {
+	case 1:
+		index = ePLX_MBOX1;    break;
+	case 2:
+		index = ePLX_MBOX2;    break;
+	case 3:
+		index = ePLX_MBOX3;    break;
+	case 4:
+		index = ePLX_MBOX4;    break;
+	case 5:
+		index = ePLX_MBOX5;    break;
+	case 6:
+		index = ePLX_MBOX6;    break;
+	case 7:
+		index = ePLX_MBOX7;    break;
+	case 0:			/* reserved for HF flags */
+		snd_BUG();
+	default:
+		return 0xdeadbeef;
+	}
+
+	return lx_plx_reg_read(chip, index);
+}
+
+int lx_plx_mbox_write(struct lx6464es *chip, int mbox_nr, u32 value)
+{
+	int index = -1;
+
+	switch (mbox_nr) {
+	case 1:
+		index = ePLX_MBOX1;    break;
+	case 3:
+		index = ePLX_MBOX3;    break;
+	case 4:
+		index = ePLX_MBOX4;    break;
+	case 5:
+		index = ePLX_MBOX5;    break;
+	case 6:
+		index = ePLX_MBOX6;    break;
+	case 7:
+		index = ePLX_MBOX7;    break;
+	case 0:			/* reserved for HF flags */
+	case 2:			/* reserved for Pipe States
+				 * the DSP keeps an image of it */
+		snd_BUG();
+		return -EBADRQC;
+	}
+
+	lx_plx_reg_write(chip, index, value);
+	return 0;
+}
+
+
+/* rmh */
+
+#ifdef CONFIG_SND_DEBUG
+#define CMD_NAME(a) a
+#else
+#define CMD_NAME(a) NULL
+#endif
+
+#define Reg_CSM_MR			0x00000002
+#define Reg_CSM_MC			0x00000001
+
+struct dsp_cmd_info {
+	u32    dcCodeOp;	/* Op Code of the command (usually 1st 24-bits
+				 * word).*/
+	u16    dcCmdLength;	/* Command length in words of 24 bits.*/
+	u16    dcStatusType;	/* Status type: 0 for fixed length, 1 for
+				 * random. */
+	u16    dcStatusLength;	/* Status length (if fixed).*/
+	char  *dcOpName;
+};
+
+/*
+  Initialization and control data for the Microblaze interface
+  - OpCode:
+    the opcode field of the command set at the proper offset
+  - CmdLength
+    the number of command words
+  - StatusType
+    offset in the status registers: 0 means that the return value may be
+    different from 0, and must be read
+  - StatusLength
+    the number of status words (in addition to the return value)
+*/
+
+static struct dsp_cmd_info dsp_commands[] =
+{
+	{ (CMD_00_INFO_DEBUG << OPCODE_OFFSET)			, 1 /*custom*/
+	  , 1	, 0 /**/		    , CMD_NAME("INFO_DEBUG") },
+	{ (CMD_01_GET_SYS_CFG << OPCODE_OFFSET) 		, 1 /**/
+	  , 1      , 2 /**/		    , CMD_NAME("GET_SYS_CFG") },
+	{ (CMD_02_SET_GRANULARITY << OPCODE_OFFSET)	        , 1 /**/
+	  , 1      , 0 /**/		    , CMD_NAME("SET_GRANULARITY") },
+	{ (CMD_03_SET_TIMER_IRQ << OPCODE_OFFSET)		, 1 /**/
+	  , 1      , 0 /**/		    , CMD_NAME("SET_TIMER_IRQ") },
+	{ (CMD_04_GET_EVENT << OPCODE_OFFSET)			, 1 /**/
+	  , 1      , 0 /*up to 10*/     , CMD_NAME("GET_EVENT") },
+	{ (CMD_05_GET_PIPES << OPCODE_OFFSET)			, 1 /**/
+	  , 1      , 2 /*up to 4*/      , CMD_NAME("GET_PIPES") },
+	{ (CMD_06_ALLOCATE_PIPE << OPCODE_OFFSET)		, 1 /**/
+	  , 0      , 0 /**/		    , CMD_NAME("ALLOCATE_PIPE") },
+	{ (CMD_07_RELEASE_PIPE << OPCODE_OFFSET)		, 1 /**/
+	  , 0      , 0 /**/		    , CMD_NAME("RELEASE_PIPE") },
+	{ (CMD_08_ASK_BUFFERS << OPCODE_OFFSET) 		, 1 /**/
+	  , 1      , MAX_STREAM_BUFFER  , CMD_NAME("ASK_BUFFERS") },
+	{ (CMD_09_STOP_PIPE << OPCODE_OFFSET)			, 1 /**/
+	  , 0      , 0 /*up to 2*/      , CMD_NAME("STOP_PIPE") },
+	{ (CMD_0A_GET_PIPE_SPL_COUNT << OPCODE_OFFSET)	        , 1 /**/
+	  , 1      , 1 /*up to 2*/      , CMD_NAME("GET_PIPE_SPL_COUNT") },
+	{ (CMD_0B_TOGGLE_PIPE_STATE << OPCODE_OFFSET)           , 1 /*up to 5*/
+	  , 1      , 0 /**/		    , CMD_NAME("TOGGLE_PIPE_STATE") },
+	{ (CMD_0C_DEF_STREAM << OPCODE_OFFSET)			, 1 /*up to 4*/
+	  , 1      , 0 /**/		    , CMD_NAME("DEF_STREAM") },
+	{ (CMD_0D_SET_MUTE  << OPCODE_OFFSET)			, 3 /**/
+	  , 1      , 0 /**/		    , CMD_NAME("SET_MUTE") },
+	{ (CMD_0E_GET_STREAM_SPL_COUNT << OPCODE_OFFSET)        , 1/**/
+	  , 1      , 2 /**/		    , CMD_NAME("GET_STREAM_SPL_COUNT") },
+	{ (CMD_0F_UPDATE_BUFFER << OPCODE_OFFSET)		, 3 /*up to 4*/
+	  , 0      , 1 /**/		    , CMD_NAME("UPDATE_BUFFER") },
+	{ (CMD_10_GET_BUFFER << OPCODE_OFFSET)			, 1 /**/
+	  , 1      , 4 /**/		    , CMD_NAME("GET_BUFFER") },
+	{ (CMD_11_CANCEL_BUFFER << OPCODE_OFFSET)		, 1 /**/
+	  , 1      , 1 /*up to 4*/      , CMD_NAME("CANCEL_BUFFER") },
+	{ (CMD_12_GET_PEAK << OPCODE_OFFSET)			, 1 /**/
+	  , 1      , 1 /**/		    , CMD_NAME("GET_PEAK") },
+	{ (CMD_13_SET_STREAM_STATE << OPCODE_OFFSET)	        , 1 /**/
+	  , 1      , 0 /**/		    , CMD_NAME("SET_STREAM_STATE") },
+};
+
+static void lx_message_init(struct lx_rmh *rmh, enum cmd_mb_opcodes cmd)
+{
+	snd_BUG_ON(cmd >= CMD_14_INVALID);
+
+	rmh->cmd[0] = dsp_commands[cmd].dcCodeOp;
+	rmh->cmd_len = dsp_commands[cmd].dcCmdLength;
+	rmh->stat_len = dsp_commands[cmd].dcStatusLength;
+	rmh->dsp_stat = dsp_commands[cmd].dcStatusType;
+	rmh->cmd_idx = cmd;
+	memset(&rmh->cmd[1], 0, (REG_CRM_NUMBER - 1) * sizeof(u32));
+
+#ifdef CONFIG_SND_DEBUG
+	memset(rmh->stat, 0, REG_CRM_NUMBER * sizeof(u32));
+#endif
+#ifdef RMH_DEBUG
+	rmh->cmd_idx = cmd;
+#endif
+}
+
+#ifdef RMH_DEBUG
+#define LXRMH "lx6464es rmh: "
+static void lx_message_dump(struct lx_rmh *rmh)
+{
+	u8 idx = rmh->cmd_idx;
+	int i;
+
+	snd_printk(LXRMH "command %s\n", dsp_commands[idx].dcOpName);
+
+	for (i = 0; i != rmh->cmd_len; ++i)
+		snd_printk(LXRMH "\tcmd[%d] %08x\n", i, rmh->cmd[i]);
+
+	for (i = 0; i != rmh->stat_len; ++i)
+		snd_printk(LXRMH "\tstat[%d]: %08x\n", i, rmh->stat[i]);
+	snd_printk("\n");
+}
+#else
+static inline void lx_message_dump(struct lx_rmh *rmh)
+{}
+#endif
+
+
+
+/* sleep 500 - 100 = 400 times 100us -> the timeout is >= 40 ms */
+#define XILINX_TIMEOUT_MS       40
+#define XILINX_POLL_NO_SLEEP    100
+#define XILINX_POLL_ITERATIONS  150
+
+#if 0 /* not used now */
+static int lx_message_send(struct lx6464es *chip, struct lx_rmh *rmh)
+{
+	u32 reg = ED_DSP_TIMED_OUT;
+	int dwloop;
+	int answer_received;
+
+	if (lx_dsp_reg_read(chip, eReg_CSM) & (Reg_CSM_MC | Reg_CSM_MR)) {
+		snd_printk(KERN_ERR LXP "PIOSendMessage eReg_CSM %x\n", reg);
+		return -EBUSY;
+	}
+
+	/* write command */
+	lx_dsp_reg_writebuf(chip, eReg_CRM1, rmh->cmd, rmh->cmd_len);
+
+	snd_BUG_ON(atomic_read(&chip->send_message_locked) != 0);
+	atomic_set(&chip->send_message_locked, 1);
+
+	/* MicoBlaze gogogo */
+	lx_dsp_reg_write(chip, eReg_CSM, Reg_CSM_MC);
+
+	/* wait for interrupt to answer */
+	for (dwloop = 0; dwloop != XILINX_TIMEOUT_MS; ++dwloop) {
+		answer_received = atomic_read(&chip->send_message_locked);
+		if (answer_received == 0)
+			break;
+		msleep(1);
+	}
+
+	if (answer_received == 0) {
+		/* in Debug mode verify Reg_CSM_MR */
+		snd_BUG_ON(!(lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR));
+
+		/* command finished, read status */
+		if (rmh->dsp_stat == 0)
+			reg = lx_dsp_reg_read(chip, eReg_CRM1);
+		else
+			reg = 0;
+	} else {
+		int i;
+		snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send! "
+			   "Interrupts disabled?\n");
+
+		/* attente bit Reg_CSM_MR */
+		for (i = 0; i != XILINX_POLL_ITERATIONS; i++) {
+			if ((lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR)) {
+				if (rmh->dsp_stat == 0)
+					reg = lx_dsp_reg_read(chip, eReg_CRM1);
+				else
+					reg = 0;
+				goto polling_successful;
+			}
+
+			if (i > XILINX_POLL_NO_SLEEP)
+				msleep(1);
+		}
+		snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send! "
+			   "polling failed\n");
+
+polling_successful:
+		atomic_set(&chip->send_message_locked, 0);
+	}
+
+	if ((reg & ERROR_VALUE) == 0) {
+		/* read response */
+		if (rmh->stat_len) {
+			snd_BUG_ON(rmh->stat_len >= (REG_CRM_NUMBER-1));
+
+			lx_dsp_reg_readbuf(chip, eReg_CRM2, rmh->stat,
+					   rmh->stat_len);
+		}
+	} else
+		snd_printk(KERN_WARNING LXP "lx_message_send: error_value %x\n",
+			   reg);
+
+	/* clear Reg_CSM_MR */
+	lx_dsp_reg_write(chip, eReg_CSM, 0);
+
+	switch (reg) {
+	case ED_DSP_TIMED_OUT:
+		snd_printk(KERN_WARNING LXP "lx_message_send: dsp timeout\n");
+		return -ETIMEDOUT;
+
+	case ED_DSP_CRASHED:
+		snd_printk(KERN_WARNING LXP "lx_message_send: dsp crashed\n");
+		return -EAGAIN;
+	}
+
+	lx_message_dump(rmh);
+	return 0;
+}
+#endif /* not used now */
+
+static int lx_message_send_atomic(struct lx6464es *chip, struct lx_rmh *rmh)
+{
+	u32 reg = ED_DSP_TIMED_OUT;
+	int dwloop;
+
+	if (lx_dsp_reg_read(chip, eReg_CSM) & (Reg_CSM_MC | Reg_CSM_MR)) {
+		snd_printk(KERN_ERR LXP "PIOSendMessage eReg_CSM %x\n", reg);
+		return -EBUSY;
+	}
+
+	/* write command */
+	lx_dsp_reg_writebuf(chip, eReg_CRM1, rmh->cmd, rmh->cmd_len);
+
+	/* MicoBlaze gogogo */
+	lx_dsp_reg_write(chip, eReg_CSM, Reg_CSM_MC);
+
+	/* wait for interrupt to answer */
+	for (dwloop = 0; dwloop != XILINX_TIMEOUT_MS * 1000; ++dwloop) {
+		if (lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR) {
+			if (rmh->dsp_stat == 0)
+				reg = lx_dsp_reg_read(chip, eReg_CRM1);
+			else
+				reg = 0;
+			goto polling_successful;
+		} else
+			udelay(1);
+	}
+	snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send_atomic! "
+		   "polling failed\n");
+
+polling_successful:
+	if ((reg & ERROR_VALUE) == 0) {
+		/* read response */
+		if (rmh->stat_len) {
+			snd_BUG_ON(rmh->stat_len >= (REG_CRM_NUMBER-1));
+			lx_dsp_reg_readbuf(chip, eReg_CRM2, rmh->stat,
+					   rmh->stat_len);
+		}
+	} else
+		snd_printk(LXP "rmh error: %08x\n", reg);
+
+	/* clear Reg_CSM_MR */
+	lx_dsp_reg_write(chip, eReg_CSM, 0);
+
+	switch (reg) {
+	case ED_DSP_TIMED_OUT:
+		snd_printk(KERN_WARNING LXP "lx_message_send: dsp timeout\n");
+		return -ETIMEDOUT;
+
+	case ED_DSP_CRASHED:
+		snd_printk(KERN_WARNING LXP "lx_message_send: dsp crashed\n");
+		return -EAGAIN;
+	}
+
+	lx_message_dump(rmh);
+
+	return reg;
+}
+
+
+/* low-level dsp access */
+int __devinit lx_dsp_get_version(struct lx6464es *chip, u32 *rdsp_version)
+{
+	u16 ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+
+	lx_message_init(&chip->rmh, CMD_01_GET_SYS_CFG);
+	ret = lx_message_send_atomic(chip, &chip->rmh);
+
+	*rdsp_version = chip->rmh.stat[1];
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return ret;
+}
+
+int lx_dsp_get_clock_frequency(struct lx6464es *chip, u32 *rfreq)
+{
+	u16 ret = 0;
+	unsigned long flags;
+	u32 freq_raw = 0;
+	u32 freq = 0;
+	u32 frequency = 0;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+
+	lx_message_init(&chip->rmh, CMD_01_GET_SYS_CFG);
+	ret = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (ret == 0) {
+		freq_raw = chip->rmh.stat[0] >> FREQ_FIELD_OFFSET;
+		freq = freq_raw & XES_FREQ_COUNT8_MASK;
+
+		if ((freq < XES_FREQ_COUNT8_48_MAX) ||
+		    (freq > XES_FREQ_COUNT8_44_MIN))
+			frequency = 0; /* unknown */
+		else if (freq >= XES_FREQ_COUNT8_44_MAX)
+			frequency = 44100;
+		else
+			frequency = 48000;
+	}
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+
+	*rfreq = frequency * chip->freq_ratio;
+
+	return ret;
+}
+
+int lx_dsp_get_mac(struct lx6464es *chip, u8 *mac_address)
+{
+	u32 macmsb, maclsb;
+
+	macmsb = lx_dsp_reg_read(chip, eReg_ADMACESMSB) & 0x00FFFFFF;
+	maclsb = lx_dsp_reg_read(chip, eReg_ADMACESLSB) & 0x00FFFFFF;
+
+	/* todo: endianess handling */
+	mac_address[5] = ((u8 *)(&maclsb))[0];
+	mac_address[4] = ((u8 *)(&maclsb))[1];
+	mac_address[3] = ((u8 *)(&maclsb))[2];
+	mac_address[2] = ((u8 *)(&macmsb))[0];
+	mac_address[1] = ((u8 *)(&macmsb))[1];
+	mac_address[0] = ((u8 *)(&macmsb))[2];
+
+	return 0;
+}
+
+
+int lx_dsp_set_granularity(struct lx6464es *chip, u32 gran)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+
+	lx_message_init(&chip->rmh, CMD_02_SET_GRANULARITY);
+	chip->rmh.cmd[0] |= gran;
+
+	ret = lx_message_send_atomic(chip, &chip->rmh);
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return ret;
+}
+
+int lx_dsp_read_async_events(struct lx6464es *chip, u32 *data)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+
+	lx_message_init(&chip->rmh, CMD_04_GET_EVENT);
+	chip->rmh.stat_len = 9;	/* we don't necessarily need the full length */
+
+	ret = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (!ret)
+		memcpy(data, chip->rmh.stat, chip->rmh.stat_len * sizeof(u32));
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return ret;
+}
+
+#define CSES_TIMEOUT        100     /* microseconds */
+#define CSES_CE             0x0001
+#define CSES_BROADCAST      0x0002
+#define CSES_UPDATE_LDSV    0x0004
+
+int lx_dsp_es_check_pipeline(struct lx6464es *chip)
+{
+	int i;
+
+	for (i = 0; i != CSES_TIMEOUT; ++i) {
+		/*
+		 * le bit CSES_UPDATE_LDSV est à 1 dés que le macprog
+		 * est pret. il re-passe à 0 lorsque le premier read a
+		 * été fait. pour l'instant on retire le test car ce bit
+		 * passe a 1 environ 200 à 400 ms aprés que le registre
+		 * confES à été écrit (kick du xilinx ES).
+		 *
+		 * On ne teste que le bit CE.
+		 * */
+
+		u32 cses = lx_dsp_reg_read(chip, eReg_CSES);
+
+		if ((cses & CSES_CE) == 0)
+			return 0;
+
+		udelay(1);
+	}
+
+	return -ETIMEDOUT;
+}
+
+
+#define PIPE_INFO_TO_CMD(capture, pipe)					\
+	((u32)((u32)(pipe) | ((capture) ? ID_IS_CAPTURE : 0L)) << ID_OFFSET)
+
+
+
+/* low-level pipe handling */
+int lx_pipe_allocate(struct lx6464es *chip, u32 pipe, int is_capture,
+		     int channels)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_06_ALLOCATE_PIPE);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.cmd[0] |= channels;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+
+	if (err != 0)
+		snd_printk(KERN_ERR "lx6464es: could not allocate pipe\n");
+
+	return err;
+}
+
+int lx_pipe_release(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_07_RELEASE_PIPE);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+
+	return err;
+}
+
+int lx_buffer_ask(struct lx6464es *chip, u32 pipe, int is_capture,
+		  u32 *r_needed, u32 *r_freed, u32 *size_array)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+#ifdef CONFIG_SND_DEBUG
+	if (size_array)
+		memset(size_array, 0, sizeof(u32)*MAX_STREAM_BUFFER);
+#endif
+
+	*r_needed = 0;
+	*r_freed = 0;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_08_ASK_BUFFERS);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (!err) {
+		int i;
+		for (i = 0; i < MAX_STREAM_BUFFER; ++i) {
+			u32 stat = chip->rmh.stat[i];
+			if (stat & (BF_EOB << BUFF_FLAGS_OFFSET)) {
+				/* finished */
+				*r_freed += 1;
+				if (size_array)
+					size_array[i] = stat & MASK_DATA_SIZE;
+			} else if ((stat & (BF_VALID << BUFF_FLAGS_OFFSET))
+				   == 0)
+				/* free */
+				*r_needed += 1;
+		}
+
+#if 0
+		snd_printdd(LXP "CMD_08_ASK_BUFFERS: needed %d, freed %d\n",
+			    *r_needed, *r_freed);
+		for (i = 0; i < MAX_STREAM_BUFFER; ++i) {
+			for (i = 0; i != chip->rmh.stat_len; ++i)
+				snd_printdd("  stat[%d]: %x, %x\n", i,
+					    chip->rmh.stat[i],
+					    chip->rmh.stat[i] & MASK_DATA_SIZE);
+		}
+#endif
+	}
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+
+int lx_pipe_stop(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_09_STOP_PIPE);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+static int lx_pipe_toggle_state(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0B_TOGGLE_PIPE_STATE);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+
+int lx_pipe_start(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	int err;
+
+	err = lx_pipe_wait_for_idle(chip, pipe, is_capture);
+	if (err < 0)
+		return err;
+
+	err = lx_pipe_toggle_state(chip, pipe, is_capture);
+
+	return err;
+}
+
+int lx_pipe_pause(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	int err = 0;
+
+	err = lx_pipe_wait_for_start(chip, pipe, is_capture);
+	if (err < 0)
+		return err;
+
+	err = lx_pipe_toggle_state(chip, pipe, is_capture);
+
+	return err;
+}
+
+
+int lx_pipe_sample_count(struct lx6464es *chip, u32 pipe, int is_capture,
+			 u64 *rsample_count)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0A_GET_PIPE_SPL_COUNT);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.stat_len = 2;	/* need all words here! */
+
+	err = lx_message_send_atomic(chip, &chip->rmh); /* don't sleep! */
+
+	if (err != 0)
+		snd_printk(KERN_ERR
+			   "lx6464es: could not query pipe's sample count\n");
+	else {
+		*rsample_count = ((u64)(chip->rmh.stat[0] & MASK_SPL_COUNT_HI)
+				  << 24)     /* hi part */
+			+ chip->rmh.stat[1]; /* lo part */
+	}
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+int lx_pipe_state(struct lx6464es *chip, u32 pipe, int is_capture, u16 *rstate)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0A_GET_PIPE_SPL_COUNT);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (err != 0)
+		snd_printk(KERN_ERR "lx6464es: could not query pipe's state\n");
+	else
+		*rstate = (chip->rmh.stat[0] >> PSTATE_OFFSET) & 0x0F;
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+static int lx_pipe_wait_for_state(struct lx6464es *chip, u32 pipe,
+				  int is_capture, u16 state)
+{
+	int i;
+
+	/* max 2*PCMOnlyGranularity = 2*1024 at 44100 = < 50 ms:
+	 * timeout 50 ms */
+	for (i = 0; i != 50; ++i) {
+		u16 current_state;
+		int err = lx_pipe_state(chip, pipe, is_capture, &current_state);
+
+		if (err < 0)
+			return err;
+
+		if (current_state == state)
+			return 0;
+
+		mdelay(1);
+	}
+
+	return -ETIMEDOUT;
+}
+
+int lx_pipe_wait_for_start(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	return lx_pipe_wait_for_state(chip, pipe, is_capture, PSTATE_RUN);
+}
+
+int lx_pipe_wait_for_idle(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	return lx_pipe_wait_for_state(chip, pipe, is_capture, PSTATE_IDLE);
+}
+
+/* low-level stream handling */
+int lx_stream_set_state(struct lx6464es *chip, u32 pipe,
+			       int is_capture, enum stream_state_t state)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_13_SET_STREAM_STATE);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.cmd[0] |= state;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+
+	return err;
+}
+
+int lx_stream_set_format(struct lx6464es *chip, struct snd_pcm_runtime *runtime,
+			 u32 pipe, int is_capture)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	u32 channels = runtime->channels;
+
+	if (runtime->channels != channels)
+		snd_printk(KERN_ERR LXP "channel count mismatch: %d vs %d",
+			   runtime->channels, channels);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0C_DEF_STREAM);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	if (runtime->sample_bits == 16)
+		/* 16 bit format */
+		chip->rmh.cmd[0] |= (STREAM_FMT_16b << STREAM_FMT_OFFSET);
+
+	if (snd_pcm_format_little_endian(runtime->format))
+		/* little endian/intel format */
+		chip->rmh.cmd[0] |= (STREAM_FMT_intel << STREAM_FMT_OFFSET);
+
+	chip->rmh.cmd[0] |= channels-1;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+
+	return err;
+}
+
+int lx_stream_state(struct lx6464es *chip, u32 pipe, int is_capture,
+		    int *rstate)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0E_GET_STREAM_SPL_COUNT);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	*rstate = (chip->rmh.stat[0] & SF_START) ? START_STATE : PAUSE_STATE;
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+int lx_stream_sample_position(struct lx6464es *chip, u32 pipe, int is_capture,
+			      u64 *r_bytepos)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0E_GET_STREAM_SPL_COUNT);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	*r_bytepos = ((u64) (chip->rmh.stat[0] & MASK_SPL_COUNT_HI)
+		      << 32)	     /* hi part */
+		+ chip->rmh.stat[1]; /* lo part */
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+/* low-level buffer handling */
+int lx_buffer_give(struct lx6464es *chip, u32 pipe, int is_capture,
+		   u32 buffer_size, u32 buf_address_lo, u32 buf_address_hi,
+		   u32 *r_buffer_index)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0F_UPDATE_BUFFER);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.cmd[0] |= BF_NOTIFY_EOB; /* request interrupt notification */
+
+	/* todo: pause request, circular buffer */
+
+	chip->rmh.cmd[1] = buffer_size & MASK_DATA_SIZE;
+	chip->rmh.cmd[2] = buf_address_lo;
+
+	if (buf_address_hi) {
+		chip->rmh.cmd_len = 4;
+		chip->rmh.cmd[3] = buf_address_hi;
+		chip->rmh.cmd[0] |= BF_64BITS_ADR;
+	}
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (err == 0) {
+		*r_buffer_index = chip->rmh.stat[0];
+		goto done;
+	}
+
+	if (err == EB_RBUFFERS_TABLE_OVERFLOW)
+		snd_printk(LXP "lx_buffer_give EB_RBUFFERS_TABLE_OVERFLOW\n");
+
+	if (err == EB_INVALID_STREAM)
+		snd_printk(LXP "lx_buffer_give EB_INVALID_STREAM\n");
+
+	if (err == EB_CMD_REFUSED)
+		snd_printk(LXP "lx_buffer_give EB_CMD_REFUSED\n");
+
+ done:
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+int lx_buffer_free(struct lx6464es *chip, u32 pipe, int is_capture,
+		   u32 *r_buffer_size)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_11_CANCEL_BUFFER);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.cmd[0] |= MASK_BUFFER_ID; /* ask for the current buffer: the
+					     * microblaze will seek for it */
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (err == 0)
+		*r_buffer_size = chip->rmh.stat[0]  & MASK_DATA_SIZE;
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+int lx_buffer_cancel(struct lx6464es *chip, u32 pipe, int is_capture,
+		     u32 buffer_index)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_11_CANCEL_BUFFER);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.cmd[0] |= buffer_index;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+
+/* low-level gain/peak handling
+ *
+ * \todo: can we unmute capture/playback channels independently?
+ *
+ * */
+int lx_level_unmute(struct lx6464es *chip, int is_capture, int unmute)
+{
+	int err;
+	unsigned long flags;
+
+	/* bit set to 1: channel muted */
+	u64 mute_mask = unmute ? 0 : 0xFFFFFFFFFFFFFFFFLLU;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0D_SET_MUTE);
+
+	chip->rmh.cmd[0] |= PIPE_INFO_TO_CMD(is_capture, 0);
+
+	chip->rmh.cmd[1] = (u32)(mute_mask >> (u64)32);	       /* hi part */
+	chip->rmh.cmd[2] = (u32)(mute_mask & (u64)0xFFFFFFFF); /* lo part */
+
+	snd_printk("mute %x %x %x\n", chip->rmh.cmd[0], chip->rmh.cmd[1],
+		   chip->rmh.cmd[2]);
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+static u32 peak_map[] = {
+	0x00000109, /* -90.308dB */
+	0x0000083B, /* -72.247dB */
+	0x000020C4, /* -60.205dB */
+	0x00008273, /* -48.030dB */
+	0x00020756, /* -36.005dB */
+	0x00040C37, /* -30.001dB */
+	0x00081385, /* -24.002dB */
+	0x00101D3F, /* -18.000dB */
+	0x0016C310, /* -15.000dB */
+	0x002026F2, /* -12.001dB */
+	0x002D6A86, /* -9.000dB */
+	0x004026E6, /* -6.004dB */
+	0x005A9DF6, /* -3.000dB */
+	0x0065AC8B, /* -2.000dB */
+	0x00721481, /* -1.000dB */
+	0x007FFFFF, /* FS */
+};
+
+int lx_level_peaks(struct lx6464es *chip, int is_capture, int channels,
+		   u32 *r_levels)
+{
+	int err = 0;
+	unsigned long flags;
+	int i;
+	spin_lock_irqsave(&chip->msg_lock, flags);
+
+	for (i = 0; i < channels; i += 4) {
+		u32 s0, s1, s2, s3;
+
+		lx_message_init(&chip->rmh, CMD_12_GET_PEAK);
+		chip->rmh.cmd[0] |= PIPE_INFO_TO_CMD(is_capture, i);
+
+		err = lx_message_send_atomic(chip, &chip->rmh);
+
+		if (err == 0) {
+			s0 = peak_map[chip->rmh.stat[0] & 0x0F];
+			s1 = peak_map[(chip->rmh.stat[0] >>  4) & 0xf];
+			s2 = peak_map[(chip->rmh.stat[0] >>  8) & 0xf];
+			s3 = peak_map[(chip->rmh.stat[0] >>  12) & 0xf];
+		} else
+			s0 = s1 = s2 = s3 = 0;
+
+		r_levels[0] = s0;
+		r_levels[1] = s1;
+		r_levels[2] = s2;
+		r_levels[3] = s3;
+
+		r_levels += 4;
+	}
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+/* interrupt handling */
+#define PCX_IRQ_NONE 0
+#define IRQCS_ACTIVE_PCIDB  0x00002000L         /* Bit nø 13 */
+#define IRQCS_ENABLE_PCIIRQ 0x00000100L         /* Bit nø 08 */
+#define IRQCS_ENABLE_PCIDB  0x00000200L         /* Bit nø 09 */
+
+static u32 lx_interrupt_test_ack(struct lx6464es *chip)
+{
+	u32 irqcs = lx_plx_reg_read(chip, ePLX_IRQCS);
+
+	/* Test if PCI Doorbell interrupt is active */
+	if (irqcs & IRQCS_ACTIVE_PCIDB)	{
+		u32 temp;
+		irqcs = PCX_IRQ_NONE;
+
+		while ((temp = lx_plx_reg_read(chip, ePLX_L2PCIDB))) {
+			/* RAZ interrupt */
+			irqcs |= temp;
+			lx_plx_reg_write(chip, ePLX_L2PCIDB, temp);
+		}
+
+		return irqcs;
+	}
+	return PCX_IRQ_NONE;
+}
+
+static int lx_interrupt_ack(struct lx6464es *chip, u32 *r_irqsrc,
+			    int *r_async_pending, int *r_async_escmd)
+{
+	u32 irq_async;
+	u32 irqsrc = lx_interrupt_test_ack(chip);
+
+	if (irqsrc == PCX_IRQ_NONE)
+		return 0;
+
+	*r_irqsrc = irqsrc;
+
+	irq_async = irqsrc & MASK_SYS_ASYNC_EVENTS; /* + EtherSound response
+						     * (set by xilinx) + EOB */
+
+	if (irq_async & MASK_SYS_STATUS_ESA) {
+		irq_async &= ~MASK_SYS_STATUS_ESA;
+		*r_async_escmd = 1;
+	}
+
+	if (irqsrc & MASK_SYS_STATUS_CMD_DONE)
+		/* xilinx command notification */
+		atomic_set(&chip->send_message_locked, 0);
+
+	if (irq_async) {
+		/* snd_printd("interrupt: async event pending\n"); */
+		*r_async_pending = 1;
+	}
+
+	return 1;
+}
+
+static int lx_interrupt_handle_async_events(struct lx6464es *chip, u32 irqsrc,
+					    int *r_freq_changed,
+					    u64 *r_notified_in_pipe_mask,
+					    u64 *r_notified_out_pipe_mask)
+{
+	int err;
+	u32 stat[9];		/* answer from CMD_04_GET_EVENT */
+
+	/* On peut optimiser pour ne pas lire les evenements vides
+	 * les mots de rÃĩponse sont dans l'ordre suivant :
+	 * Stat[0]	mot de status gÃĩnÃĩral
+	 * Stat[1]	fin de buffer OUT pF
+	 * Stat[2]	fin de buffer OUT pf
+	 * Stat[3]	fin de buffer IN pF
+	 * Stat[4]	fin de buffer IN pf
+	 * Stat[5]	underrun poid fort
+	 * Stat[6]	underrun poid faible
+	 * Stat[7]	overrun poid fort
+	 * Stat[8]	overrun poid faible
+	 * */
+
+	u64 orun_mask;
+	u64 urun_mask;
+#if 0
+	int has_underrun   = (irqsrc & MASK_SYS_STATUS_URUN) ? 1 : 0;
+	int has_overrun    = (irqsrc & MASK_SYS_STATUS_ORUN) ? 1 : 0;
+#endif
+	int eb_pending_out = (irqsrc & MASK_SYS_STATUS_EOBO) ? 1 : 0;
+	int eb_pending_in  = (irqsrc & MASK_SYS_STATUS_EOBI) ? 1 : 0;
+
+	*r_freq_changed = (irqsrc & MASK_SYS_STATUS_FREQ) ? 1 : 0;
+
+	err = lx_dsp_read_async_events(chip, stat);
+	if (err < 0)
+		return err;
+
+	if (eb_pending_in) {
+		*r_notified_in_pipe_mask = ((u64)stat[3] << 32)
+			+ stat[4];
+		snd_printdd(LXP "interrupt: EOBI pending %llx\n",
+			    *r_notified_in_pipe_mask);
+	}
+	if (eb_pending_out) {
+		*r_notified_out_pipe_mask = ((u64)stat[1] << 32)
+			+ stat[2];
+		snd_printdd(LXP "interrupt: EOBO pending %llx\n",
+			    *r_notified_out_pipe_mask);
+	}
+
+	orun_mask = ((u64)stat[7] << 32) + stat[8];
+	urun_mask = ((u64)stat[5] << 32) + stat[6];
+
+	/* todo: handle xrun notification */
+
+	return err;
+}
+
+static int lx_interrupt_request_new_buffer(struct lx6464es *chip,
+					   struct lx_stream *lx_stream)
+{
+	struct snd_pcm_substream *substream = lx_stream->stream;
+	int is_capture = lx_stream->is_capture;
+	int err;
+	unsigned long flags;
+
+	const u32 channels = substream->runtime->channels;
+	const u32 bytes_per_frame = channels * 3;
+	const u32 period_size = substream->runtime->period_size;
+	const u32 period_bytes = period_size * bytes_per_frame;
+	const u32 pos = lx_stream->frame_pos;
+	const u32 next_pos = ((pos+1) == substream->runtime->periods) ?
+		0 : pos + 1;
+
+	dma_addr_t buf = substream->dma_buffer.addr + pos * period_bytes;
+	u32 buf_hi = 0;
+	u32 buf_lo = 0;
+	u32 buffer_index = 0;
+
+	u32 needed, freed;
+	u32 size_array[MAX_STREAM_BUFFER];
+
+	snd_printdd("->lx_interrupt_request_new_buffer\n");
+
+	spin_lock_irqsave(&chip->lock, flags);
+
+	err = lx_buffer_ask(chip, 0, is_capture, &needed, &freed, size_array);
+	snd_printdd(LXP "interrupt: needed %d, freed %d\n", needed, freed);
+
+	unpack_pointer(buf, &buf_lo, &buf_hi);
+	err = lx_buffer_give(chip, 0, is_capture, period_bytes, buf_lo, buf_hi,
+			     &buffer_index);
+	snd_printdd(LXP "interrupt: gave buffer index %x on %p (%d bytes)\n",
+		    buffer_index, (void *)buf, period_bytes);
+
+	lx_stream->frame_pos = next_pos;
+	spin_unlock_irqrestore(&chip->lock, flags);
+
+	return err;
+}
+
+void lx_tasklet_playback(unsigned long data)
+{
+	struct lx6464es *chip = (struct lx6464es *)data;
+	struct lx_stream *lx_stream = &chip->playback_stream;
+	int err;
+
+	snd_printdd("->lx_tasklet_playback\n");
+
+	err = lx_interrupt_request_new_buffer(chip, lx_stream);
+	if (err < 0)
+		snd_printk(KERN_ERR LXP
+			   "cannot request new buffer for playback\n");
+
+	snd_pcm_period_elapsed(lx_stream->stream);
+}
+
+void lx_tasklet_capture(unsigned long data)
+{
+	struct lx6464es *chip = (struct lx6464es *)data;
+	struct lx_stream *lx_stream = &chip->capture_stream;
+	int err;
+
+	snd_printdd("->lx_tasklet_capture\n");
+	err = lx_interrupt_request_new_buffer(chip, lx_stream);
+	if (err < 0)
+		snd_printk(KERN_ERR LXP
+			   "cannot request new buffer for capture\n");
+
+	snd_pcm_period_elapsed(lx_stream->stream);
+}
+
+
+
+static int lx_interrupt_handle_audio_transfer(struct lx6464es *chip,
+					      u64 notified_in_pipe_mask,
+					      u64 notified_out_pipe_mask)
+{
+	int err = 0;
+
+	if (notified_in_pipe_mask) {
+		snd_printdd(LXP "requesting audio transfer for capture\n");
+		tasklet_hi_schedule(&chip->tasklet_capture);
+	}
+
+	if (notified_out_pipe_mask) {
+		snd_printdd(LXP "requesting audio transfer for playback\n");
+		tasklet_hi_schedule(&chip->tasklet_playback);
+	}
+
+	return err;
+}
+
+
+irqreturn_t lx_interrupt(int irq, void *dev_id)
+{
+	struct lx6464es *chip = dev_id;
+	int async_pending, async_escmd;
+	u32 irqsrc;
+
+	spin_lock(&chip->lock);
+
+	snd_printdd("**************************************************\n");
+
+	if (!lx_interrupt_ack(chip, &irqsrc, &async_pending, &async_escmd)) {
+		spin_unlock(&chip->lock);
+		snd_printdd("IRQ_NONE\n");
+		return IRQ_NONE; /* this device did not cause the interrupt */
+	}
+
+	if (irqsrc & MASK_SYS_STATUS_CMD_DONE)
+		goto exit;
+
+#if 0
+	if (irqsrc & MASK_SYS_STATUS_EOBI)
+		snd_printdd(LXP "interrupt: EOBI\n");
+
+	if (irqsrc & MASK_SYS_STATUS_EOBO)
+		snd_printdd(LXP "interrupt: EOBO\n");
+
+	if (irqsrc & MASK_SYS_STATUS_URUN)
+		snd_printdd(LXP "interrupt: URUN\n");
+
+	if (irqsrc & MASK_SYS_STATUS_ORUN)
+		snd_printdd(LXP "interrupt: ORUN\n");
+#endif
+
+	if (async_pending) {
+		u64 notified_in_pipe_mask = 0;
+		u64 notified_out_pipe_mask = 0;
+		int freq_changed;
+		int err;
+
+		/* handle async events */
+		err = lx_interrupt_handle_async_events(chip, irqsrc,
+						       &freq_changed,
+						       &notified_in_pipe_mask,
+						       &notified_out_pipe_mask);
+		if (err)
+			snd_printk(KERN_ERR LXP
+				   "error handling async events\n");
+
+		err = lx_interrupt_handle_audio_transfer(chip,
+							 notified_in_pipe_mask,
+							 notified_out_pipe_mask
+			);
+		if (err)
+			snd_printk(KERN_ERR LXP
+				   "error during audio transfer\n");
+	}
+
+	if (async_escmd) {
+#if 0
+		/* backdoor for ethersound commands
+		 *
+		 * for now, we do not need this
+		 *
+		 * */
+
+		snd_printdd("lx6464es: interrupt requests escmd handling\n");
+#endif
+	}
+
+exit:
+	spin_unlock(&chip->lock);
+	return IRQ_HANDLED;	/* this device caused the interrupt */
+}
+
+
+static void lx_irq_set(struct lx6464es *chip, int enable)
+{
+	u32 reg = lx_plx_reg_read(chip, ePLX_IRQCS);
+
+	/* enable/disable interrupts
+	 *
+	 * Set the Doorbell and PCI interrupt enable bits
+	 *
+	 * */
+	if (enable)
+		reg |=  (IRQCS_ENABLE_PCIIRQ | IRQCS_ENABLE_PCIDB);
+	else
+		reg &= ~(IRQCS_ENABLE_PCIIRQ | IRQCS_ENABLE_PCIDB);
+	lx_plx_reg_write(chip, ePLX_IRQCS, reg);
+}
+
+void lx_irq_enable(struct lx6464es *chip)
+{
+	snd_printdd("->lx_irq_enable\n");
+	lx_irq_set(chip, 1);
+}
+
+void lx_irq_disable(struct lx6464es *chip)
+{
+	snd_printdd("->lx_irq_disable\n");
+	lx_irq_set(chip, 0);
+}
diff --git a/sound/pci/lx6464es/lx_core.h b/sound/pci/lx6464es/lx_core.h
new file mode 100644
index 0000000..6bd9cbb
--- /dev/null
+++ b/sound/pci/lx6464es/lx_core.h
@@ -0,0 +1,242 @@
+/* -*- linux-c -*- *
+ *
+ * ALSA driver for the digigram lx6464es interface
+ * low-level interface
+ *
+ * Copyright (c) 2009 Tim Blechmann <tim@klingt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#ifndef LX_CORE_H
+#define LX_CORE_H
+
+#include <linux/interrupt.h>
+
+#include "lx_defs.h"
+
+#define REG_CRM_NUMBER		12
+
+struct lx6464es;
+
+/* low-level register access */
+
+/* dsp register access */
+enum {
+	eReg_BASE,
+	eReg_CSM,
+	eReg_CRM1,
+	eReg_CRM2,
+	eReg_CRM3,
+	eReg_CRM4,
+	eReg_CRM5,
+	eReg_CRM6,
+	eReg_CRM7,
+	eReg_CRM8,
+	eReg_CRM9,
+	eReg_CRM10,
+	eReg_CRM11,
+	eReg_CRM12,
+
+	eReg_ICR,
+	eReg_CVR,
+	eReg_ISR,
+	eReg_RXHTXH,
+	eReg_RXMTXM,
+	eReg_RHLTXL,
+	eReg_RESETDSP,
+
+	eReg_CSUF,
+	eReg_CSES,
+	eReg_CRESMSB,
+	eReg_CRESLSB,
+	eReg_ADMACESMSB,
+	eReg_ADMACESLSB,
+	eReg_CONFES,
+
+	eMaxPortLx
+};
+
+unsigned long lx_dsp_reg_read(struct lx6464es *chip, int port);
+void lx_dsp_reg_readbuf(struct lx6464es *chip, int port, u32 *data, u32 len);
+void lx_dsp_reg_write(struct lx6464es *chip, int port, unsigned data);
+void lx_dsp_reg_writebuf(struct lx6464es *chip, int port, const u32 *data,
+			 u32 len);
+
+/* plx register access */
+enum {
+    ePLX_PCICR,
+
+    ePLX_MBOX0,
+    ePLX_MBOX1,
+    ePLX_MBOX2,
+    ePLX_MBOX3,
+    ePLX_MBOX4,
+    ePLX_MBOX5,
+    ePLX_MBOX6,
+    ePLX_MBOX7,
+
+    ePLX_L2PCIDB,
+    ePLX_IRQCS,
+    ePLX_CHIPSC,
+
+    eMaxPort
+};
+
+unsigned long lx_plx_reg_read(struct lx6464es *chip, int port);
+void lx_plx_reg_write(struct lx6464es *chip, int port, u32 data);
+
+/* rhm */
+struct lx_rmh {
+	u16	cmd_len;	/* length of the command to send (WORDs) */
+	u16	stat_len;	/* length of the status received (WORDs) */
+	u16	dsp_stat;	/* status type, RMP_SSIZE_XXX */
+	u16	cmd_idx;	/* index of the command */
+	u32	cmd[REG_CRM_NUMBER];
+	u32	stat[REG_CRM_NUMBER];
+};
+
+
+/* low-level dsp access */
+int __devinit lx_dsp_get_version(struct lx6464es *chip, u32 *rdsp_version);
+int lx_dsp_get_clock_frequency(struct lx6464es *chip, u32 *rfreq);
+int lx_dsp_set_granularity(struct lx6464es *chip, u32 gran);
+int lx_dsp_read_async_events(struct lx6464es *chip, u32 *data);
+int lx_dsp_get_mac(struct lx6464es *chip, u8 *mac_address);
+
+
+/* low-level pipe handling */
+int lx_pipe_allocate(struct lx6464es *chip, u32 pipe, int is_capture,
+		     int channels);
+int lx_pipe_release(struct lx6464es *chip, u32 pipe, int is_capture);
+int lx_pipe_sample_count(struct lx6464es *chip, u32 pipe, int is_capture,
+			 u64 *rsample_count);
+int lx_pipe_state(struct lx6464es *chip, u32 pipe, int is_capture, u16 *rstate);
+int lx_pipe_stop(struct lx6464es *chip, u32 pipe, int is_capture);
+int lx_pipe_start(struct lx6464es *chip, u32 pipe, int is_capture);
+int lx_pipe_pause(struct lx6464es *chip, u32 pipe, int is_capture);
+
+int lx_pipe_wait_for_start(struct lx6464es *chip, u32 pipe, int is_capture);
+int lx_pipe_wait_for_idle(struct lx6464es *chip, u32 pipe, int is_capture);
+
+/* low-level stream handling */
+int lx_stream_set_format(struct lx6464es *chip, struct snd_pcm_runtime *runtime,
+			 u32 pipe, int is_capture);
+int lx_stream_state(struct lx6464es *chip, u32 pipe, int is_capture,
+		    int *rstate);
+int lx_stream_sample_position(struct lx6464es *chip, u32 pipe, int is_capture,
+			      u64 *r_bytepos);
+
+int lx_stream_set_state(struct lx6464es *chip, u32 pipe,
+			int is_capture, enum stream_state_t state);
+
+static inline int lx_stream_start(struct lx6464es *chip, u32 pipe,
+				  int is_capture)
+{
+	snd_printdd("->lx_stream_start\n");
+	return lx_stream_set_state(chip, pipe, is_capture, SSTATE_RUN);
+}
+
+static inline int lx_stream_pause(struct lx6464es *chip, u32 pipe,
+				  int is_capture)
+{
+	snd_printdd("->lx_stream_pause\n");
+	return lx_stream_set_state(chip, pipe, is_capture, SSTATE_PAUSE);
+}
+
+static inline int lx_stream_stop(struct lx6464es *chip, u32 pipe,
+				 int is_capture)
+{
+	snd_printdd("->lx_stream_stop\n");
+	return lx_stream_set_state(chip, pipe, is_capture, SSTATE_STOP);
+}
+
+/* low-level buffer handling */
+int lx_buffer_ask(struct lx6464es *chip, u32 pipe, int is_capture,
+		  u32 *r_needed, u32 *r_freed, u32 *size_array);
+int lx_buffer_give(struct lx6464es *chip, u32 pipe, int is_capture,
+		   u32 buffer_size, u32 buf_address_lo, u32 buf_address_hi,
+		   u32 *r_buffer_index);
+int lx_buffer_free(struct lx6464es *chip, u32 pipe, int is_capture,
+		   u32 *r_buffer_size);
+int lx_buffer_cancel(struct lx6464es *chip, u32 pipe, int is_capture,
+		     u32 buffer_index);
+
+/* low-level gain/peak handling */
+int lx_level_unmute(struct lx6464es *chip, int is_capture, int unmute);
+int lx_level_peaks(struct lx6464es *chip, int is_capture, int channels,
+		   u32 *r_levels);
+
+
+/* interrupt handling */
+irqreturn_t lx_interrupt(int irq, void *dev_id);
+void lx_irq_enable(struct lx6464es *chip);
+void lx_irq_disable(struct lx6464es *chip);
+
+void lx_tasklet_capture(unsigned long data);
+void lx_tasklet_playback(unsigned long data);
+
+
+/* Stream Format Header Defines (for LIN and IEEE754) */
+#define HEADER_FMT_BASE		HEADER_FMT_BASE_LIN
+#define HEADER_FMT_BASE_LIN	0xFED00000
+#define HEADER_FMT_BASE_FLOAT	0xFAD00000
+#define HEADER_FMT_MONO		0x00000080 /* bit 23 in header_lo. WARNING: old
+					    * bit 22 is ignored in float
+					    * format */
+#define HEADER_FMT_INTEL	0x00008000
+#define HEADER_FMT_16BITS	0x00002000
+#define HEADER_FMT_24BITS	0x00004000
+#define HEADER_FMT_UPTO11	0x00000200 /* frequency is less or equ. to 11k.
+					    * */
+#define HEADER_FMT_UPTO32	0x00000100 /* frequency is over 11k and less
+					    * then 32k.*/
+
+
+#define BIT_FMP_HEADER          23
+#define BIT_FMP_SD              22
+#define BIT_FMP_MULTICHANNEL    19
+
+#define START_STATE             1
+#define PAUSE_STATE             0
+
+
+
+
+
+/* from PcxAll_e.h */
+/* Start/Pause condition for pipes (PCXStartPipe, PCXPausePipe) */
+#define START_PAUSE_IMMEDIATE           0
+#define START_PAUSE_ON_SYNCHRO          1
+#define START_PAUSE_ON_TIME_CODE        2
+
+
+/* Pipe / Stream state */
+#define START_STATE             1
+#define PAUSE_STATE             0
+
+static inline void unpack_pointer(dma_addr_t ptr, u32 *r_low, u32 *r_high)
+{
+	*r_low = (u32)(ptr & 0xffffffff);
+#if BITS_PER_LONG == 32
+	*r_high = 0;
+#else
+	*r_high = (u32)((u64)ptr>>32);
+#endif
+}
+
+#endif /* LX_CORE_H */
diff --git a/sound/pci/lx6464es/lx_defs.h b/sound/pci/lx6464es/lx_defs.h
new file mode 100644
index 0000000..49d36bd
--- /dev/null
+++ b/sound/pci/lx6464es/lx_defs.h
@@ -0,0 +1,376 @@
+/* -*- linux-c -*- *
+ *
+ * ALSA driver for the digigram lx6464es interface
+ * adapted upstream headers
+ *
+ * Copyright (c) 2009 Tim Blechmann <tim@klingt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#ifndef LX_DEFS_H
+#define LX_DEFS_H
+
+/* code adapted from ethersound.h */
+#define	XES_FREQ_COUNT8_MASK    0x00001FFF /* compteur 25MHz entre 8 ech. */
+#define	XES_FREQ_COUNT8_44_MIN  0x00001288 /* 25M /
+					    * [ 44k - ( 44.1k + 48k ) / 2 ]
+					    * * 8 */
+#define	XES_FREQ_COUNT8_44_MAX	0x000010F0 /* 25M / [ ( 44.1k + 48k ) / 2 ]
+					    * * 8 */
+#define	XES_FREQ_COUNT8_48_MAX	0x00000F08 /* 25M /
+					    * [ 48k + ( 44.1k + 48k ) / 2 ]
+					    * * 8 */
+
+/* code adapted from LXES_registers.h */
+
+#define IOCR_OUTPUTS_OFFSET 0	/* (rw) offset for the number of OUTs in the
+				 * ConfES register. */
+#define IOCR_INPUTS_OFFSET  8	/* (rw) offset for the number of INs in the
+				 * ConfES register. */
+#define FREQ_RATIO_OFFSET  19	/* (rw) offset for frequency ratio in the
+				 * ConfES register. */
+#define	FREQ_RATIO_SINGLE_MODE 0x01 /* value for single mode frequency ratio:
+				     * sample rate = frequency rate. */
+
+#define CONFES_READ_PART_MASK	0x00070000
+#define CONFES_WRITE_PART_MASK	0x00F80000
+
+/* code adapted from if_drv_mb.h */
+
+#define MASK_SYS_STATUS_ERROR	(1L << 31) /* events that lead to a PCI irq if
+					    * not yet pending */
+#define MASK_SYS_STATUS_URUN	(1L << 30)
+#define MASK_SYS_STATUS_ORUN	(1L << 29)
+#define MASK_SYS_STATUS_EOBO	(1L << 28)
+#define MASK_SYS_STATUS_EOBI	(1L << 27)
+#define MASK_SYS_STATUS_FREQ	(1L << 26)
+#define MASK_SYS_STATUS_ESA	(1L << 25) /* reserved, this is set by the
+					    * XES */
+#define MASK_SYS_STATUS_TIMER	(1L << 24)
+
+#define MASK_SYS_ASYNC_EVENTS	(MASK_SYS_STATUS_ERROR |		\
+				 MASK_SYS_STATUS_URUN  |		\
+				 MASK_SYS_STATUS_ORUN  |		\
+				 MASK_SYS_STATUS_EOBO  |		\
+				 MASK_SYS_STATUS_EOBI  |		\
+				 MASK_SYS_STATUS_FREQ  |		\
+				 MASK_SYS_STATUS_ESA)
+
+#define MASK_SYS_PCI_EVENTS		(MASK_SYS_ASYNC_EVENTS |	\
+					 MASK_SYS_STATUS_TIMER)
+
+#define MASK_SYS_TIMER_COUNT	0x0000FFFF
+
+#define MASK_SYS_STATUS_EOT_PLX		(1L << 22) /* event that remains
+						    * internal: reserved fo end
+						    * of plx dma */
+#define MASK_SYS_STATUS_XES		(1L << 21) /* event that remains
+						    * internal: pending XES
+						    * IRQ */
+#define MASK_SYS_STATUS_CMD_DONE	(1L << 20) /* alternate command
+						    * management: notify driver
+						    * instead of polling */
+
+
+#define MAX_STREAM_BUFFER 5	/* max amount of stream buffers. */
+
+#define MICROBLAZE_IBL_MIN		 32
+#define MICROBLAZE_IBL_DEFAULT	        128
+#define MICROBLAZE_IBL_MAX		512
+/* #define MASK_GRANULARITY		(2*MICROBLAZE_IBL_MAX-1) */
+
+
+
+/* command opcodes, see reference for details */
+
+/*
+ the capture bit position in the object_id field in driver commands
+ depends upon the number of managed channels. For now, 64 IN + 64 OUT are
+ supported. HOwever, the communication protocol forsees 1024 channels, hence
+ bit 10 indicates a capture (input) object).
+*/
+#define ID_IS_CAPTURE (1L << 10)
+#define ID_OFFSET	13	/* object ID is at the 13th bit in the
+				 * 1st command word.*/
+#define ID_CH_MASK    0x3F
+#define OPCODE_OFFSET	24	/* offset of the command opcode in the first
+				 * command word.*/
+
+enum cmd_mb_opcodes {
+	CMD_00_INFO_DEBUG	        = 0x00,
+	CMD_01_GET_SYS_CFG		= 0x01,
+	CMD_02_SET_GRANULARITY		= 0x02,
+	CMD_03_SET_TIMER_IRQ		= 0x03,
+	CMD_04_GET_EVENT		= 0x04,
+	CMD_05_GET_PIPES		= 0x05,
+
+	CMD_06_ALLOCATE_PIPE            = 0x06,
+	CMD_07_RELEASE_PIPE		= 0x07,
+	CMD_08_ASK_BUFFERS		= 0x08,
+	CMD_09_STOP_PIPE		= 0x09,
+	CMD_0A_GET_PIPE_SPL_COUNT	= 0x0a,
+	CMD_0B_TOGGLE_PIPE_STATE	= 0x0b,
+
+	CMD_0C_DEF_STREAM		= 0x0c,
+	CMD_0D_SET_MUTE			= 0x0d,
+	CMD_0E_GET_STREAM_SPL_COUNT     = 0x0e,
+	CMD_0F_UPDATE_BUFFER		= 0x0f,
+	CMD_10_GET_BUFFER		= 0x10,
+	CMD_11_CANCEL_BUFFER		= 0x11,
+	CMD_12_GET_PEAK			= 0x12,
+	CMD_13_SET_STREAM_STATE		= 0x13,
+	CMD_14_INVALID			= 0x14,
+};
+
+/* pipe states */
+enum pipe_state_t {
+	PSTATE_IDLE	= 0,	/* the pipe is not processed in the XES_IRQ
+				 * (free or stopped, or paused). */
+	PSTATE_RUN	= 1,	/* sustained play/record state. */
+	PSTATE_PURGE	= 2,	/* the ES channels are now off, render pipes do
+				 * not DMA, record pipe do a last DMA. */
+	PSTATE_ACQUIRE	= 3,	/* the ES channels are now on, render pipes do
+				 * not yet increase their sample count, record
+				 * pipes do not DMA. */
+	PSTATE_CLOSING	= 4,	/* the pipe is releasing, and may not yet
+				 * receive an "alloc" command. */
+};
+
+/* stream states */
+enum stream_state_t {
+	SSTATE_STOP	=  0x00,       /* setting to stop resets the stream spl
+					* count.*/
+	SSTATE_RUN	= (0x01 << 0), /* start DMA and spl count handling. */
+	SSTATE_PAUSE	= (0x01 << 1), /* pause DMA and spl count handling. */
+};
+
+/* buffer flags */
+enum buffer_flags {
+	BF_VALID	= 0x80,	/* set if the buffer is valid, clear if free.*/
+	BF_CURRENT	= 0x40,	/* set if this is the current buffer (there is
+				 * always a current buffer).*/
+	BF_NOTIFY_EOB	= 0x20,	/* set if this buffer must cause a PCI event
+				 * when finished.*/
+	BF_CIRCULAR	= 0x10,	/* set if buffer[1] must be copied to buffer[0]
+				 * by the end of this buffer.*/
+	BF_64BITS_ADR	= 0x08,	/* set if the hi part of the address is valid.*/
+	BF_xx		= 0x04,	/* future extension.*/
+	BF_EOB		= 0x02,	/* set if finished, but not yet free.*/
+	BF_PAUSE	= 0x01,	/* pause stream at buffer end.*/
+	BF_ZERO		= 0x00,	/* no flags (init).*/
+};
+
+/**
+*	Stream Flags definitions
+*/
+enum stream_flags {
+	SF_ZERO		= 0x00000000, /* no flags (stream invalid). */
+	SF_VALID	= 0x10000000, /* the stream has a valid DMA_conf
+				       * info (setstreamformat). */
+	SF_XRUN		= 0x20000000, /* the stream is un x-run state. */
+	SF_START	= 0x40000000, /* the DMA is running.*/
+	SF_ASIO		= 0x80000000, /* ASIO.*/
+};
+
+
+#define MASK_SPL_COUNT_HI 0x00FFFFFF /* 4 MSBits are status bits */
+#define PSTATE_OFFSET             28 /* 4 MSBits are status bits */
+
+
+#define MASK_STREAM_HAS_MAPPING	(1L << 12)
+#define MASK_STREAM_IS_ASIO	(1L <<  9)
+#define STREAM_FMT_OFFSET	10   /* the stream fmt bits start at the 10th
+				      * bit in the command word. */
+
+#define STREAM_FMT_16b          0x02
+#define STREAM_FMT_intel        0x01
+
+#define FREQ_FIELD_OFFSET	15  /* offset of the freq field in the response
+				     * word */
+
+#define BUFF_FLAGS_OFFSET	  24 /*  offset of the buffer flags in the
+				      *  response word. */
+#define MASK_DATA_SIZE	  0x00FFFFFF /* this must match the field size of
+				      * datasize in the buffer_t structure. */
+
+#define MASK_BUFFER_ID	        0xFF /* the cancel command awaits a buffer ID,
+				      * may be 0xFF for "current". */
+
+
+/* code adapted from PcxErr_e.h */
+
+/* Bits masks */
+
+#define ERROR_MASK              0x8000
+
+#define SOURCE_MASK             0x7800
+
+#define E_SOURCE_BOARD          0x4000 /* 8 >> 1 */
+#define E_SOURCE_DRV            0x2000 /* 4 >> 1 */
+#define E_SOURCE_API            0x1000 /* 2 >> 1 */
+/* Error tools */
+#define E_SOURCE_TOOLS          0x0800 /* 1 >> 1 */
+/* Error pcxaudio */
+#define E_SOURCE_AUDIO          0x1800 /* 3 >> 1 */
+/* Error virtual pcx */
+#define E_SOURCE_VPCX           0x2800 /* 5 >> 1 */
+/* Error dispatcher */
+#define E_SOURCE_DISPATCHER     0x3000 /* 6 >> 1 */
+/* Error from CobraNet firmware */
+#define E_SOURCE_COBRANET       0x3800 /* 7 >> 1 */
+
+#define E_SOURCE_USER           0x7800
+
+#define CLASS_MASK              0x0700
+
+#define CODE_MASK               0x00FF
+
+/* Bits values */
+
+/* Values for the error/warning bit */
+#define ERROR_VALUE             0x8000
+#define WARNING_VALUE           0x0000
+
+/* Class values */
+#define E_CLASS_GENERAL                  0x0000
+#define E_CLASS_INVALID_CMD              0x0100
+#define E_CLASS_INVALID_STD_OBJECT       0x0200
+#define E_CLASS_RSRC_IMPOSSIBLE          0x0300
+#define E_CLASS_WRONG_CONTEXT            0x0400
+#define E_CLASS_BAD_SPECIFIC_PARAMETER   0x0500
+#define E_CLASS_REAL_TIME_ERROR          0x0600
+#define E_CLASS_DIRECTSHOW               0x0700
+#define E_CLASS_FREE                     0x0700
+
+
+/* Complete DRV error code for the general class */
+#define ED_GN           (ERROR_VALUE | E_SOURCE_DRV | E_CLASS_GENERAL)
+#define ED_CONCURRENCY                  (ED_GN | 0x01)
+#define ED_DSP_CRASHED                  (ED_GN | 0x02)
+#define ED_UNKNOWN_BOARD                (ED_GN | 0x03)
+#define ED_NOT_INSTALLED                (ED_GN | 0x04)
+#define ED_CANNOT_OPEN_SVC_MANAGER      (ED_GN | 0x05)
+#define ED_CANNOT_READ_REGISTRY         (ED_GN | 0x06)
+#define ED_DSP_VERSION_MISMATCH         (ED_GN | 0x07)
+#define ED_UNAVAILABLE_FEATURE          (ED_GN | 0x08)
+#define ED_CANCELLED                    (ED_GN | 0x09)
+#define ED_NO_RESPONSE_AT_IRQA          (ED_GN | 0x10)
+#define ED_INVALID_ADDRESS              (ED_GN | 0x11)
+#define ED_DSP_CORRUPTED                (ED_GN | 0x12)
+#define ED_PENDING_OPERATION            (ED_GN | 0x13)
+#define ED_NET_ALLOCATE_MEMORY_IMPOSSIBLE   (ED_GN | 0x14)
+#define ED_NET_REGISTER_ERROR               (ED_GN | 0x15)
+#define ED_NET_THREAD_ERROR                 (ED_GN | 0x16)
+#define ED_NET_OPEN_ERROR                   (ED_GN | 0x17)
+#define ED_NET_CLOSE_ERROR                  (ED_GN | 0x18)
+#define ED_NET_NO_MORE_PACKET               (ED_GN | 0x19)
+#define ED_NET_NO_MORE_BUFFER               (ED_GN | 0x1A)
+#define ED_NET_SEND_ERROR                   (ED_GN | 0x1B)
+#define ED_NET_RECEIVE_ERROR                (ED_GN | 0x1C)
+#define ED_NET_WRONG_MSG_SIZE               (ED_GN | 0x1D)
+#define ED_NET_WAIT_ERROR                   (ED_GN | 0x1E)
+#define ED_NET_EEPROM_ERROR                 (ED_GN | 0x1F)
+#define ED_INVALID_RS232_COM_NUMBER         (ED_GN | 0x20)
+#define ED_INVALID_RS232_INIT               (ED_GN | 0x21)
+#define ED_FILE_ERROR                       (ED_GN | 0x22)
+#define ED_INVALID_GPIO_CMD                 (ED_GN | 0x23)
+#define ED_RS232_ALREADY_OPENED             (ED_GN | 0x24)
+#define ED_RS232_NOT_OPENED                 (ED_GN | 0x25)
+#define ED_GPIO_ALREADY_OPENED              (ED_GN | 0x26)
+#define ED_GPIO_NOT_OPENED                  (ED_GN | 0x27)
+#define ED_REGISTRY_ERROR                   (ED_GN | 0x28) /* <- NCX */
+#define ED_INVALID_SERVICE                  (ED_GN | 0x29) /* <- NCX */
+
+#define ED_READ_FILE_ALREADY_OPENED	    (ED_GN | 0x2a) /* <- Decalage
+							    * pour RCX
+							    * (old 0x28)
+							    * */
+#define ED_READ_FILE_INVALID_COMMAND	    (ED_GN | 0x2b) /* ~ */
+#define ED_READ_FILE_INVALID_PARAMETER	    (ED_GN | 0x2c) /* ~ */
+#define ED_READ_FILE_ALREADY_CLOSED	    (ED_GN | 0x2d) /* ~ */
+#define ED_READ_FILE_NO_INFORMATION	    (ED_GN | 0x2e) /* ~ */
+#define ED_READ_FILE_INVALID_HANDLE	    (ED_GN | 0x2f) /* ~ */
+#define ED_READ_FILE_END_OF_FILE	    (ED_GN | 0x30) /* ~ */
+#define ED_READ_FILE_ERROR	            (ED_GN | 0x31) /* ~ */
+
+#define ED_DSP_CRASHED_EXC_DSPSTACK_OVERFLOW (ED_GN | 0x32) /* <- Decalage pour
+							     * PCX (old 0x14) */
+#define ED_DSP_CRASHED_EXC_SYSSTACK_OVERFLOW (ED_GN | 0x33) /* ~ */
+#define ED_DSP_CRASHED_EXC_ILLEGAL           (ED_GN | 0x34) /* ~ */
+#define ED_DSP_CRASHED_EXC_TIMER_REENTRY     (ED_GN | 0x35) /* ~ */
+#define ED_DSP_CRASHED_EXC_FATAL_ERROR       (ED_GN | 0x36) /* ~ */
+
+#define ED_FLASH_PCCARD_NOT_PRESENT          (ED_GN | 0x37)
+
+#define ED_NO_CURRENT_CLOCK                  (ED_GN | 0x38)
+
+/* Complete DRV error code for real time class */
+#define ED_RT           (ERROR_VALUE | E_SOURCE_DRV | E_CLASS_REAL_TIME_ERROR)
+#define ED_DSP_TIMED_OUT                (ED_RT | 0x01)
+#define ED_DSP_CHK_TIMED_OUT            (ED_RT | 0x02)
+#define ED_STREAM_OVERRUN               (ED_RT | 0x03)
+#define ED_DSP_BUSY                     (ED_RT | 0x04)
+#define ED_DSP_SEMAPHORE_TIME_OUT       (ED_RT | 0x05)
+#define ED_BOARD_TIME_OUT               (ED_RT | 0x06)
+#define ED_XILINX_ERROR                 (ED_RT | 0x07)
+#define ED_COBRANET_ITF_NOT_RESPONDING  (ED_RT | 0x08)
+
+/* Complete BOARD error code for the invaid standard object class */
+#define EB_ISO          (ERROR_VALUE | E_SOURCE_BOARD | \
+			 E_CLASS_INVALID_STD_OBJECT)
+#define EB_INVALID_EFFECT               (EB_ISO | 0x00)
+#define EB_INVALID_PIPE                 (EB_ISO | 0x40)
+#define EB_INVALID_STREAM               (EB_ISO | 0x80)
+#define EB_INVALID_AUDIO                (EB_ISO | 0xC0)
+
+/* Complete BOARD error code for impossible resource allocation class */
+#define EB_RI           (ERROR_VALUE | E_SOURCE_BOARD | E_CLASS_RSRC_IMPOSSIBLE)
+#define EB_ALLOCATE_ALL_STREAM_TRANSFERT_BUFFERS_IMPOSSIBLE (EB_RI | 0x01)
+#define EB_ALLOCATE_PIPE_SAMPLE_BUFFER_IMPOSSIBLE           (EB_RI | 0x02)
+
+#define EB_ALLOCATE_MEM_STREAM_IMPOSSIBLE		\
+	EB_ALLOCATE_ALL_STREAM_TRANSFERT_BUFFERS_IMPOSSIBLE
+#define EB_ALLOCATE_MEM_PIPE_IMPOSSIBLE			\
+	EB_ALLOCATE_PIPE_SAMPLE_BUFFER_IMPOSSIBLE
+
+#define EB_ALLOCATE_DIFFERED_CMD_IMPOSSIBLE     (EB_RI | 0x03)
+#define EB_TOO_MANY_DIFFERED_CMD                (EB_RI | 0x04)
+#define EB_RBUFFERS_TABLE_OVERFLOW              (EB_RI | 0x05)
+#define EB_ALLOCATE_EFFECTS_IMPOSSIBLE          (EB_RI | 0x08)
+#define EB_ALLOCATE_EFFECT_POS_IMPOSSIBLE       (EB_RI | 0x09)
+#define EB_RBUFFER_NOT_AVAILABLE                (EB_RI | 0x0A)
+#define EB_ALLOCATE_CONTEXT_LIII_IMPOSSIBLE     (EB_RI | 0x0B)
+#define EB_STATUS_DIALOG_IMPOSSIBLE             (EB_RI | 0x1D)
+#define EB_CONTROL_CMD_IMPOSSIBLE               (EB_RI | 0x1E)
+#define EB_STATUS_SEND_IMPOSSIBLE               (EB_RI | 0x1F)
+#define EB_ALLOCATE_PIPE_IMPOSSIBLE             (EB_RI | 0x40)
+#define EB_ALLOCATE_STREAM_IMPOSSIBLE           (EB_RI | 0x80)
+#define EB_ALLOCATE_AUDIO_IMPOSSIBLE            (EB_RI | 0xC0)
+
+/* Complete BOARD error code for wrong call context class */
+#define EB_WCC          (ERROR_VALUE | E_SOURCE_BOARD | E_CLASS_WRONG_CONTEXT)
+#define EB_CMD_REFUSED                  (EB_WCC | 0x00)
+#define EB_START_STREAM_REFUSED         (EB_WCC | 0xFC)
+#define EB_SPC_REFUSED                  (EB_WCC | 0xFD)
+#define EB_CSN_REFUSED                  (EB_WCC | 0xFE)
+#define EB_CSE_REFUSED                  (EB_WCC | 0xFF)
+
+
+
+
+#endif /* LX_DEFS_H */
diff --git a/sound/pci/oxygen/oxygen_pcm.c b/sound/pci/oxygen/oxygen_pcm.c
index c262049..3b5ca70 100644
--- a/sound/pci/oxygen/oxygen_pcm.c
+++ b/sound/pci/oxygen/oxygen_pcm.c
@@ -487,10 +487,14 @@
 {
 	struct oxygen *chip = snd_pcm_substream_chip(substream);
 	unsigned int channel = oxygen_substream_channel(substream);
+	unsigned int channel_mask = 1 << channel;
 
 	spin_lock_irq(&chip->reg_lock);
-	chip->interrupt_mask &= ~(1 << channel);
+	chip->interrupt_mask &= ~channel_mask;
 	oxygen_write16(chip, OXYGEN_INTERRUPT_MASK, chip->interrupt_mask);
+
+	oxygen_set_bits8(chip, OXYGEN_DMA_FLUSH, channel_mask);
+	oxygen_clear_bits8(chip, OXYGEN_DMA_FLUSH, channel_mask);
 	spin_unlock_irq(&chip->reg_lock);
 
 	return snd_pcm_lib_free_pages(substream);
diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c
index bc5ce11..bf971f7 100644
--- a/sound/pci/oxygen/virtuoso.c
+++ b/sound/pci/oxygen/virtuoso.c
@@ -113,8 +113,8 @@
  */
 
 /*
- * Xonar Essence STX
- * -----------------
+ * Xonar Essence ST (Deluxe)/STX
+ * -----------------------------
  *
  * CMI8788:
  *
@@ -180,6 +180,8 @@
 	MODEL_DX,
 	MODEL_HDAV,	/* without daughterboard */
 	MODEL_HDAV_H6,	/* with H6 daughterboard */
+	MODEL_ST,
+	MODEL_ST_H6,
 	MODEL_STX,
 };
 
@@ -188,8 +190,10 @@
 	{ OXYGEN_PCI_SUBID(0x1043, 0x8275), .driver_data = MODEL_DX },
 	{ OXYGEN_PCI_SUBID(0x1043, 0x82b7), .driver_data = MODEL_D2X },
 	{ OXYGEN_PCI_SUBID(0x1043, 0x8314), .driver_data = MODEL_HDAV },
+	{ OXYGEN_PCI_SUBID(0x1043, 0x8327), .driver_data = MODEL_DX },
 	{ OXYGEN_PCI_SUBID(0x1043, 0x834f), .driver_data = MODEL_D1 },
 	{ OXYGEN_PCI_SUBID(0x1043, 0x835c), .driver_data = MODEL_STX },
+	{ OXYGEN_PCI_SUBID(0x1043, 0x835d), .driver_data = MODEL_ST },
 	{ OXYGEN_PCI_SUBID_BROKEN_EEPROM },
 	{ }
 };
@@ -210,9 +214,9 @@
 #define GPIO_DX_FRONT_PANEL	0x0002
 #define GPIO_DX_INPUT_ROUTE	0x0100
 
-#define GPIO_HDAV_DB_MASK	0x0030
-#define GPIO_HDAV_DB_H6		0x0000
-#define GPIO_HDAV_DB_XX		0x0020
+#define GPIO_DB_MASK		0x0030
+#define GPIO_DB_H6		0x0000
+#define GPIO_DB_XX		0x0020
 
 #define GPIO_ST_HP_REAR		0x0002
 #define GPIO_ST_HP		0x0080
@@ -530,7 +534,7 @@
 	snd_component_add(chip->card, "CS5381");
 }
 
-static void xonar_stx_init(struct oxygen *chip)
+static void xonar_st_init(struct oxygen *chip)
 {
 	struct xonar_data *data = chip->model_data;
 
@@ -539,12 +543,11 @@
 		       OXYGEN_2WIRE_INTERRUPT_MASK |
 		       OXYGEN_2WIRE_SPEED_FAST);
 
+	if (chip->model.private_data == MODEL_ST_H6)
+		chip->model.dac_channels = 8;
 	data->anti_pop_delay = 100;
-	data->dacs = 1;
+	data->dacs = chip->model.private_data == MODEL_ST_H6 ? 4 : 1;
 	data->output_enable_bit = GPIO_DX_OUTPUT_ENABLE;
-	data->ext_power_reg = OXYGEN_GPI_DATA;
-	data->ext_power_int_reg = OXYGEN_GPI_INTERRUPT_MASK;
-	data->ext_power_bit = GPI_DX_EXT_POWER;
 	data->pcm1796_oversampling = PCM1796_OS_64;
 
 	pcm1796_init(chip);
@@ -560,6 +563,17 @@
 	snd_component_add(chip->card, "CS5381");
 }
 
+static void xonar_stx_init(struct oxygen *chip)
+{
+	struct xonar_data *data = chip->model_data;
+
+	data->ext_power_reg = OXYGEN_GPI_DATA;
+	data->ext_power_int_reg = OXYGEN_GPI_INTERRUPT_MASK;
+	data->ext_power_bit = GPI_DX_EXT_POWER;
+
+	xonar_st_init(chip);
+}
+
 static void xonar_disable_output(struct oxygen *chip)
 {
 	struct xonar_data *data = chip->model_data;
@@ -1021,7 +1035,8 @@
 	.model_data_size = sizeof(struct xonar_data),
 	.device_config = PLAYBACK_0_TO_I2S |
 			 PLAYBACK_1_TO_SPDIF |
-			 CAPTURE_0_FROM_I2S_2,
+			 CAPTURE_0_FROM_I2S_2 |
+			 CAPTURE_1_FROM_SPDIF,
 	.dac_channels = 8,
 	.dac_volume_min = 255 - 2*60,
 	.dac_volume_max = 255,
@@ -1034,7 +1049,7 @@
 static const struct oxygen_model model_xonar_st = {
 	.longname = "Asus Virtuoso 100",
 	.chip = "AV200",
-	.init = xonar_stx_init,
+	.init = xonar_st_init,
 	.control_filter = xonar_st_control_filter,
 	.mixer_init = xonar_st_mixer_init,
 	.cleanup = xonar_st_cleanup,
@@ -1067,6 +1082,7 @@
 		[MODEL_D2]	= &model_xonar_d2,
 		[MODEL_D2X]	= &model_xonar_d2,
 		[MODEL_HDAV]	= &model_xonar_hdav,
+		[MODEL_ST]	= &model_xonar_st,
 		[MODEL_STX]	= &model_xonar_st,
 	};
 	static const char *const names[] = {
@@ -1076,6 +1092,8 @@
 		[MODEL_D2X]	= "Xonar D2X",
 		[MODEL_HDAV]	= "Xonar HDAV1.3",
 		[MODEL_HDAV_H6]	= "Xonar HDAV1.3+H6",
+		[MODEL_ST]	= "Xonar Essence ST",
+		[MODEL_ST_H6]	= "Xonar Essence ST+H6",
 		[MODEL_STX]	= "Xonar Essence STX",
 	};
 	unsigned int model = id->driver_data;
@@ -1092,21 +1110,27 @@
 		chip->model.init = xonar_dx_init;
 		break;
 	case MODEL_HDAV:
-		oxygen_clear_bits16(chip, OXYGEN_GPIO_CONTROL,
-				    GPIO_HDAV_DB_MASK);
-		switch (oxygen_read16(chip, OXYGEN_GPIO_DATA) &
-			GPIO_HDAV_DB_MASK) {
-		case GPIO_HDAV_DB_H6:
+		oxygen_clear_bits16(chip, OXYGEN_GPIO_CONTROL, GPIO_DB_MASK);
+		switch (oxygen_read16(chip, OXYGEN_GPIO_DATA) & GPIO_DB_MASK) {
+		case GPIO_DB_H6:
 			model = MODEL_HDAV_H6;
 			break;
-		case GPIO_HDAV_DB_XX:
+		case GPIO_DB_XX:
 			snd_printk(KERN_ERR "unknown daughterboard\n");
 			return -ENODEV;
 		}
 		break;
+	case MODEL_ST:
+		oxygen_clear_bits16(chip, OXYGEN_GPIO_CONTROL, GPIO_DB_MASK);
+		switch (oxygen_read16(chip, OXYGEN_GPIO_DATA) & GPIO_DB_MASK) {
+		case GPIO_DB_H6:
+			model = MODEL_ST_H6;
+			break;
+		}
+		break;
 	case MODEL_STX:
-		oxygen_clear_bits16(chip, OXYGEN_GPIO_CONTROL,
-				    GPIO_HDAV_DB_MASK);
+		chip->model.init = xonar_stx_init;
+		oxygen_clear_bits16(chip, OXYGEN_GPIO_CONTROL, GPIO_DB_MASK);
 		break;
 	}
 
diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c
index e51a5ef..235a71e 100644
--- a/sound/pci/riptide/riptide.c
+++ b/sound/pci/riptide/riptide.c
@@ -507,41 +507,19 @@
  */
 
 static struct pci_device_id snd_riptide_ids[] = {
-	{
-	 .vendor = 0x127a,.device = 0x4310,
-	 .subvendor = PCI_ANY_ID,.subdevice = PCI_ANY_ID,
-	 },
-	{
-	 .vendor = 0x127a,.device = 0x4320,
-	 .subvendor = PCI_ANY_ID,.subdevice = PCI_ANY_ID,
-	 },
-	{
-	 .vendor = 0x127a,.device = 0x4330,
-	 .subvendor = PCI_ANY_ID,.subdevice = PCI_ANY_ID,
-	 },
-	{
-	 .vendor = 0x127a,.device = 0x4340,
-	 .subvendor = PCI_ANY_ID,.subdevice = PCI_ANY_ID,
-	 },
+	{ PCI_DEVICE(0x127a, 0x4310) },
+	{ PCI_DEVICE(0x127a, 0x4320) },
+	{ PCI_DEVICE(0x127a, 0x4330) },
+	{ PCI_DEVICE(0x127a, 0x4340) },
 	{0,},
 };
 
 #ifdef SUPPORT_JOYSTICK
 static struct pci_device_id snd_riptide_joystick_ids[] __devinitdata = {
-	{
-	 .vendor = 0x127a,.device = 0x4312,
-	 .subvendor = PCI_ANY_ID,.subdevice = PCI_ANY_ID,
-	 },
-	{
-	 .vendor = 0x127a,.device = 0x4322,
-	 .subvendor = PCI_ANY_ID,.subdevice = PCI_ANY_ID,
-	 },
-	{.vendor = 0x127a,.device = 0x4332,
-	 .subvendor = PCI_ANY_ID,.subdevice = PCI_ANY_ID,
-	 },
-	{.vendor = 0x127a,.device = 0x4342,
-	 .subvendor = PCI_ANY_ID,.subdevice = PCI_ANY_ID,
-	 },
+	{ PCI_DEVICE(0x127a, 0x4312) },
+	{ PCI_DEVICE(0x127a, 0x4322) },
+	{ PCI_DEVICE(0x127a, 0x4332) },
+	{ PCI_DEVICE(0x127a, 0x4342) },
 	{0,},
 };
 #endif
@@ -1209,12 +1187,79 @@
 }
 #endif
 
+static int try_to_load_firmware(struct cmdif *cif, struct snd_riptide *chip)
+{
+	union firmware_version firmware = { .ret = CMDRET_ZERO };
+	int i, timeout, err;
+
+	for (i = 0; i < 2; i++) {
+		WRITE_PORT_ULONG(cif->hwport->port[i].data1, 0);
+		WRITE_PORT_ULONG(cif->hwport->port[i].data2, 0);
+	}
+	SET_GRESET(cif->hwport);
+	udelay(100);
+	UNSET_GRESET(cif->hwport);
+	udelay(100);
+
+	for (timeout = 100000; --timeout; udelay(10)) {
+		if (IS_READY(cif->hwport) && !IS_GERR(cif->hwport))
+			break;
+	}
+	if (!timeout) {
+		snd_printk(KERN_ERR
+			   "Riptide: device not ready, audio status: 0x%x "
+			   "ready: %d gerr: %d\n",
+			   READ_AUDIO_STATUS(cif->hwport),
+			   IS_READY(cif->hwport), IS_GERR(cif->hwport));
+		return -EIO;
+	} else {
+		snd_printdd
+			("Riptide: audio status: 0x%x ready: %d gerr: %d\n",
+			 READ_AUDIO_STATUS(cif->hwport),
+			 IS_READY(cif->hwport), IS_GERR(cif->hwport));
+	}
+
+	SEND_GETV(cif, &firmware.ret);
+	snd_printdd("Firmware version: ASIC: %d CODEC %d AUXDSP %d PROG %d\n",
+		    firmware.firmware.ASIC, firmware.firmware.CODEC,
+		    firmware.firmware.AUXDSP, firmware.firmware.PROG);
+
+	for (i = 0; i < FIRMWARE_VERSIONS; i++) {
+		if (!memcmp(&firmware_versions[i], &firmware, sizeof(firmware)))
+			break;
+	}
+	if (i >= FIRMWARE_VERSIONS)
+		return 0; /* no match */
+
+	if (!chip)
+		return 1; /* OK */
+
+	snd_printdd("Writing Firmware\n");
+	if (!chip->fw_entry) {
+		err = request_firmware(&chip->fw_entry, "riptide.hex",
+				       &chip->pci->dev);
+		if (err) {
+			snd_printk(KERN_ERR
+				   "Riptide: Firmware not available %d\n", err);
+			return -EIO;
+		}
+	}
+	err = loadfirmware(cif, chip->fw_entry->data, chip->fw_entry->size);
+	if (err) {
+		snd_printk(KERN_ERR
+			   "Riptide: Could not load firmware %d\n", err);
+		return err;
+	}
+
+	chip->firmware = firmware;
+
+	return 1; /* OK */
+}
+
 static int riptide_reset(struct cmdif *cif, struct snd_riptide *chip)
 {
-	int timeout, tries;
 	union cmdret rptr = CMDRET_ZERO;
-	union firmware_version firmware;
-	int i, j, err, has_firmware;
+	int err, tries;
 
 	if (!cif)
 		return -EINVAL;
@@ -1227,75 +1272,11 @@
 	cif->is_reset = 0;
 
 	tries = RESET_TRIES;
-	has_firmware = 0;
-	while (has_firmware == 0 && tries-- > 0) {
-		for (i = 0; i < 2; i++) {
-			WRITE_PORT_ULONG(cif->hwport->port[i].data1, 0);
-			WRITE_PORT_ULONG(cif->hwport->port[i].data2, 0);
-		}
-		SET_GRESET(cif->hwport);
-		udelay(100);
-		UNSET_GRESET(cif->hwport);
-		udelay(100);
-
-		for (timeout = 100000; --timeout; udelay(10)) {
-			if (IS_READY(cif->hwport) && !IS_GERR(cif->hwport))
-				break;
-		}
-		if (timeout == 0) {
-			snd_printk(KERN_ERR
-				   "Riptide: device not ready, audio status: 0x%x ready: %d gerr: %d\n",
-				   READ_AUDIO_STATUS(cif->hwport),
-				   IS_READY(cif->hwport), IS_GERR(cif->hwport));
-			return -EIO;
-		} else {
-			snd_printdd
-			    ("Riptide: audio status: 0x%x ready: %d gerr: %d\n",
-			     READ_AUDIO_STATUS(cif->hwport),
-			     IS_READY(cif->hwport), IS_GERR(cif->hwport));
-		}
-
-		SEND_GETV(cif, &rptr);
-		for (i = 0; i < 4; i++)
-			firmware.ret.retwords[i] = rptr.retwords[i];
-
-		snd_printdd
-		    ("Firmware version: ASIC: %d CODEC %d AUXDSP %d PROG %d\n",
-		     firmware.firmware.ASIC, firmware.firmware.CODEC,
-		     firmware.firmware.AUXDSP, firmware.firmware.PROG);
-
-		for (j = 0; j < FIRMWARE_VERSIONS; j++) {
-			has_firmware = 1;
-			for (i = 0; i < 4; i++) {
-				if (firmware_versions[j].ret.retwords[i] !=
-				    firmware.ret.retwords[i])
-					has_firmware = 0;
-			}
-			if (has_firmware)
-				break;
-		}
-
-		if (chip != NULL && has_firmware == 0) {
-			snd_printdd("Writing Firmware\n");
-			if (!chip->fw_entry) {
-				if ((err =
-				     request_firmware(&chip->fw_entry,
-						      "riptide.hex",
-						      &chip->pci->dev)) != 0) {
-					snd_printk(KERN_ERR
-						   "Riptide: Firmware not available %d\n",
-						   err);
-					return -EIO;
-				}
-			}
-			err = loadfirmware(cif, chip->fw_entry->data,
-					   chip->fw_entry->size);
-			if (err)
-				snd_printk(KERN_ERR
-					   "Riptide: Could not load firmware %d\n",
-					   err);
-		}
-	}
+	do {
+		err = try_to_load_firmware(cif, chip);
+		if (err < 0)
+			return err;
+	} while (!err && --tries);
 
 	SEND_SACR(cif, 0, AC97_RESET);
 	SEND_RACR(cif, AC97_RESET, &rptr);
@@ -1337,11 +1318,6 @@
 	SET_AIE(cif->hwport);
 	SET_AIACK(cif->hwport);
 	cif->is_reset = 1;
-	if (chip) {
-		for (i = 0; i < 4; i++)
-			chip->firmware.ret.retwords[i] =
-			    firmware.ret.retwords[i];
-	}
 
 	return 0;
 }
@@ -2038,14 +2014,12 @@
 }
 
 #ifdef SUPPORT_JOYSTICK
-static int have_joystick;
-static struct pci_dev *riptide_gameport_pci;
-static struct gameport *riptide_gameport;
 
 static int __devinit
 snd_riptide_joystick_probe(struct pci_dev *pci, const struct pci_device_id *id)
 {
 	static int dev;
+	struct gameport *gameport;
 
 	if (dev >= SNDRV_CARDS)
 		return -ENODEV;
@@ -2054,36 +2028,33 @@
 		return -ENOENT;
 	}
 
-	if (joystick_port[dev]) {
-		riptide_gameport = gameport_allocate_port();
-		if (riptide_gameport) {
-			if (!request_region
-			    (joystick_port[dev], 8, "Riptide gameport")) {
-				snd_printk(KERN_WARNING
-					   "Riptide: cannot grab gameport 0x%x\n",
-					   joystick_port[dev]);
-				gameport_free_port(riptide_gameport);
-				riptide_gameport = NULL;
-			} else {
-				riptide_gameport_pci = pci;
-				riptide_gameport->io = joystick_port[dev];
-				gameport_register_port(riptide_gameport);
-			}
-		}
+	if (!joystick_port[dev++])
+		return 0;
+
+	gameport = gameport_allocate_port();
+	if (!gameport)
+		return -ENOMEM;
+	if (!request_region(joystick_port[dev], 8, "Riptide gameport")) {
+		snd_printk(KERN_WARNING
+			   "Riptide: cannot grab gameport 0x%x\n",
+			   joystick_port[dev]);
+		gameport_free_port(gameport);
+		return -EBUSY;
 	}
-	dev++;
+
+	gameport->io = joystick_port[dev];
+	gameport_register_port(gameport);
+	pci_set_drvdata(pci, gameport);
 	return 0;
 }
 
 static void __devexit snd_riptide_joystick_remove(struct pci_dev *pci)
 {
-	if (riptide_gameport) {
-		if (riptide_gameport_pci == pci) {
-			release_region(riptide_gameport->io, 8);
-			riptide_gameport_pci = NULL;
-			gameport_unregister_port(riptide_gameport);
-			riptide_gameport = NULL;
-		}
+	struct gameport *gameport = pci_get_drvdata(pci);
+	if (gameport) {
+		release_region(gameport->io, 8);
+		gameport_unregister_port(gameport);
+		pci_set_drvdata(pci, NULL);
 	}
 }
 #endif
@@ -2094,8 +2065,8 @@
 	static int dev;
 	struct snd_card *card;
 	struct snd_riptide *chip;
-	unsigned short addr;
-	int err = 0;
+	unsigned short val;
+	int err;
 
 	if (dev >= SNDRV_CARDS)
 		return -ENODEV;
@@ -2107,60 +2078,63 @@
 	err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card);
 	if (err < 0)
 		return err;
-	if ((err = snd_riptide_create(card, pci, &chip)) < 0) {
-		snd_card_free(card);
-		return err;
-	}
+	err = snd_riptide_create(card, pci, &chip);
+	if (err < 0)
+		goto error;
 	card->private_data = chip;
-	if ((err = snd_riptide_pcm(chip, 0, NULL)) < 0) {
-		snd_card_free(card);
-		return err;
-	}
-	if ((err = snd_riptide_mixer(chip)) < 0) {
-		snd_card_free(card);
-		return err;
-	}
-	pci_write_config_word(chip->pci, PCI_EXT_Legacy_Mask, LEGACY_ENABLE_ALL
-			      | (opl3_port[dev] ? LEGACY_ENABLE_FM : 0)
+	err = snd_riptide_pcm(chip, 0, NULL);
+	if (err < 0)
+		goto error;
+	err = snd_riptide_mixer(chip);
+	if (err < 0)
+		goto error;
+
+	val = LEGACY_ENABLE_ALL;
+	if (opl3_port[dev])
+		val |= LEGACY_ENABLE_FM;
 #ifdef SUPPORT_JOYSTICK
-			      | (joystick_port[dev] ? LEGACY_ENABLE_GAMEPORT :
-				 0)
+	if (joystick_port[dev])
+		val |= LEGACY_ENABLE_GAMEPORT;
 #endif
-			      | (mpu_port[dev]
-				 ? (LEGACY_ENABLE_MPU_INT | LEGACY_ENABLE_MPU) :
-				 0)
-			      | ((chip->irq << 4) & 0xF0));
-	if ((addr = mpu_port[dev]) != 0) {
-		pci_write_config_word(chip->pci, PCI_EXT_MPU_Base, addr);
-		if ((err = snd_mpu401_uart_new(card, 0, MPU401_HW_RIPTIDE,
-					       addr, 0, chip->irq, 0,
-					       &chip->rmidi)) < 0)
+	if (mpu_port[dev])
+		val |= LEGACY_ENABLE_MPU_INT | LEGACY_ENABLE_MPU;
+	val |= (chip->irq << 4) & 0xf0;
+	pci_write_config_word(chip->pci, PCI_EXT_Legacy_Mask, val);
+	if (mpu_port[dev]) {
+		val = mpu_port[dev];
+		pci_write_config_word(chip->pci, PCI_EXT_MPU_Base, val);
+		err = snd_mpu401_uart_new(card, 0, MPU401_HW_RIPTIDE,
+					  val, 0, chip->irq, 0,
+					  &chip->rmidi);
+		if (err < 0)
 			snd_printk(KERN_WARNING
 				   "Riptide: Can't Allocate MPU at 0x%x\n",
-				   addr);
+				   val);
 		else
-			chip->mpuaddr = addr;
+			chip->mpuaddr = val;
 	}
-	if ((addr = opl3_port[dev]) != 0) {
-		pci_write_config_word(chip->pci, PCI_EXT_FM_Base, addr);
-		if ((err = snd_opl3_create(card, addr, addr + 2,
-					   OPL3_HW_RIPTIDE, 0,
-					   &chip->opl3)) < 0)
+	if (opl3_port[dev]) {
+		val = opl3_port[dev];
+		pci_write_config_word(chip->pci, PCI_EXT_FM_Base, val);
+		err = snd_opl3_create(card, val, val + 2,
+				      OPL3_HW_RIPTIDE, 0, &chip->opl3);
+		if (err < 0)
 			snd_printk(KERN_WARNING
 				   "Riptide: Can't Allocate OPL3 at 0x%x\n",
-				   addr);
+				   val);
 		else {
-			chip->opladdr = addr;
-			if ((err =
-			     snd_opl3_hwdep_new(chip->opl3, 0, 1, NULL)) < 0)
+			chip->opladdr = val;
+			err = snd_opl3_hwdep_new(chip->opl3, 0, 1, NULL);
+			if (err < 0)
 				snd_printk(KERN_WARNING
 					   "Riptide: Can't Allocate OPL3-HWDEP\n");
 		}
 	}
 #ifdef SUPPORT_JOYSTICK
-	if ((addr = joystick_port[dev]) != 0) {
-		pci_write_config_word(chip->pci, PCI_EXT_Game_Base, addr);
-		chip->gameaddr = addr;
+	if (joystick_port[dev]) {
+		val = joystick_port[dev];
+		pci_write_config_word(chip->pci, PCI_EXT_Game_Base, val);
+		chip->gameaddr = val;
 	}
 #endif
 
@@ -2178,13 +2152,16 @@
 		 chip->opladdr);
 #endif
 	snd_riptide_proc_init(chip);
-	if ((err = snd_card_register(card)) < 0) {
-		snd_card_free(card);
-		return err;
-	}
+	err = snd_card_register(card);
+	if (err < 0)
+		goto error;
 	pci_set_drvdata(pci, card);
 	dev++;
 	return 0;
+
+ error:
+	snd_card_free(card);
+	return err;
 }
 
 static void __devexit snd_card_riptide_remove(struct pci_dev *pci)
@@ -2216,14 +2193,11 @@
 static int __init alsa_card_riptide_init(void)
 {
 	int err;
-	if ((err = pci_register_driver(&driver)) < 0)
+	err = pci_register_driver(&driver);
+	if (err < 0)
 		return err;
 #if defined(SUPPORT_JOYSTICK)
-	if (pci_register_driver(&joystick_driver) < 0) {
-		have_joystick = 0;
-		snd_printk(KERN_INFO "no joystick found\n");
-	} else
-		have_joystick = 1;
+	pci_register_driver(&joystick_driver);
 #endif
 	return 0;
 }
@@ -2232,8 +2206,7 @@
 {
 	pci_unregister_driver(&driver);
 #if defined(SUPPORT_JOYSTICK)
-	if (have_joystick)
-		pci_unregister_driver(&joystick_driver);
+	pci_unregister_driver(&joystick_driver);
 #endif
 }
 
diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c
index 314e735..3da5c02 100644
--- a/sound/pci/rme9652/hdsp.c
+++ b/sound/pci/rme9652/hdsp.c
@@ -28,6 +28,7 @@
 #include <linux/pci.h>
 #include <linux/firmware.h>
 #include <linux/moduleparam.h>
+#include <linux/math64.h>
 
 #include <sound/core.h>
 #include <sound/control.h>
@@ -402,9 +403,9 @@
 #define HDSP_DMA_AREA_BYTES ((HDSP_MAX_CHANNELS+1) * HDSP_CHANNEL_BUFFER_BYTES)
 #define HDSP_DMA_AREA_KILOBYTES (HDSP_DMA_AREA_BYTES/1024)
 
-/* use hotplug firmeare loader? */
+/* use hotplug firmware loader? */
 #if defined(CONFIG_FW_LOADER) || defined(CONFIG_FW_LOADER_MODULE)
-#if !defined(HDSP_USE_HWDEP_LOADER) && !defined(CONFIG_SND_HDSP)
+#if !defined(HDSP_USE_HWDEP_LOADER)
 #define HDSP_FW_LOADER
 #endif
 #endif
@@ -1047,7 +1048,6 @@
 static void hdsp_set_dds_value(struct hdsp *hdsp, int rate)
 {
 	u64 n;
-	u32 r;
 
 	if (rate >= 112000)
 		rate /= 4;
@@ -1055,7 +1055,7 @@
 		rate /= 2;
 
 	n = DDS_NUMERATOR;
-	div64_32(&n, rate, &r);
+	n = div_u64(n, rate);
 	/* n should be less than 2^32 for being written to FREQ register */
 	snd_BUG_ON(n >> 32);
 	/* HDSP_freqReg and HDSP_resetPointer are the same, so keep the DDS
@@ -3097,7 +3097,6 @@
 static int hdsp_dds_offset(struct hdsp *hdsp)
 {
 	u64 n;
-	u32 r;
 	unsigned int dds_value = hdsp->dds_value;
 	int system_sample_rate = hdsp->system_sample_rate;
 
@@ -3109,7 +3108,7 @@
 	 * dds_value = n / rate
 	 * rate = n / dds_value
 	 */
-	div64_32(&n, dds_value, &r);
+	n = div_u64(n, dds_value);
 	if (system_sample_rate >= 112000)
 		n *= 4;
 	else if (system_sample_rate >= 56000)
diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c
index bac2dc0..0dce331 100644
--- a/sound/pci/rme9652/hdspm.c
+++ b/sound/pci/rme9652/hdspm.c
@@ -29,6 +29,7 @@
 #include <linux/moduleparam.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
+#include <linux/math64.h>
 #include <asm/io.h>
 
 #include <sound/core.h>
@@ -831,7 +832,6 @@
 static void hdspm_set_dds_value(struct hdspm *hdspm, int rate)
 {
 	u64 n;
-	u32 r;
 	
 	if (rate >= 112000)
 		rate /= 4;
@@ -844,7 +844,7 @@
         */	   
 	/* n = 104857600000000ULL; */ /*  =  2^20 * 10^8 */
 	n = 110100480000000ULL;    /* Value checked for AES32 and MADI */
-	div64_32(&n, rate, &r);
+	n = div_u64(n, rate);
 	/* n should be less than 2^32 for being written to FREQ register */
 	snd_BUG_ON(n >> 32);
 	hdspm_write(hdspm, HDSPM_freqReg, (u32)n);
diff --git a/sound/ppc/awacs.c b/sound/ppc/awacs.c
index 80df9b1..2cc0eda 100644
--- a/sound/ppc/awacs.c
+++ b/sound/ppc/awacs.c
@@ -477,7 +477,7 @@
 #define AMP_CH_SPK	0
 #define AMP_CH_HD	1
 
-static struct snd_kcontrol_new snd_pmac_awacs_amp_vol[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_amp_vol[] __devinitdata = {
 	{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	  .name = "PC Speaker Playback Volume",
 	  .info = snd_pmac_awacs_info_volume_amp,
@@ -514,7 +514,7 @@
 	},
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_amp_hp_sw __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_amp_hp_sw __devinitdata = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "Headphone Playback Switch",
 	.info = snd_pmac_boolean_stereo_info,
@@ -523,7 +523,7 @@
 	.private_value = AMP_CH_HD,
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_amp_spk_sw __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_amp_spk_sw __devinitdata = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "PC Speaker Playback Switch",
 	.info = snd_pmac_boolean_stereo_info,
@@ -595,46 +595,46 @@
 /*
  * lists of mixer elements
  */
-static struct snd_kcontrol_new snd_pmac_awacs_mixers[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_mixers[] __devinitdata = {
 	AWACS_SWITCH("Master Capture Switch", 1, SHIFT_LOOPTHRU, 0),
 	AWACS_VOLUME("Master Capture Volume", 0, 4, 0),
 /*	AWACS_SWITCH("Unknown Playback Switch", 6, SHIFT_PAROUT0, 0), */
 };
 
-static struct snd_kcontrol_new snd_pmac_screamer_mixers_beige[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_screamer_mixers_beige[] __devinitdata = {
 	AWACS_VOLUME("Master Playback Volume", 2, 6, 1),
 	AWACS_VOLUME("Play-through Playback Volume", 5, 6, 1),
 	AWACS_SWITCH("Line Capture Switch", 0, SHIFT_MUX_MIC, 0),
 	AWACS_SWITCH("CD Capture Switch", 0, SHIFT_MUX_LINE, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_screamer_mixers_lo[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_screamer_mixers_lo[] __devinitdata = {
 	AWACS_VOLUME("Line out Playback Volume", 2, 6, 1),
 };
 
-static struct snd_kcontrol_new snd_pmac_screamer_mixers_imac[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_screamer_mixers_imac[] __devinitdata = {
 	AWACS_VOLUME("Play-through Playback Volume", 5, 6, 1),
 	AWACS_SWITCH("CD Capture Switch", 0, SHIFT_MUX_CD, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_screamer_mixers_g4agp[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_screamer_mixers_g4agp[] __devinitdata = {
 	AWACS_VOLUME("Line out Playback Volume", 2, 6, 1),
 	AWACS_VOLUME("Master Playback Volume", 5, 6, 1),
 	AWACS_SWITCH("CD Capture Switch", 0, SHIFT_MUX_CD, 0),
 	AWACS_SWITCH("Line Capture Switch", 0, SHIFT_MUX_MIC, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_mixers_pmac7500[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_mixers_pmac7500[] __devinitdata = {
 	AWACS_VOLUME("Line out Playback Volume", 2, 6, 1),
 	AWACS_SWITCH("CD Capture Switch", 0, SHIFT_MUX_CD, 0),
 	AWACS_SWITCH("Line Capture Switch", 0, SHIFT_MUX_MIC, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_mixers_pmac5500[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_mixers_pmac5500[] __devinitdata = {
 	AWACS_VOLUME("Headphone Playback Volume", 2, 6, 1),
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_mixers_pmac[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_mixers_pmac[] __devinitdata = {
 	AWACS_VOLUME("Master Playback Volume", 2, 6, 1),
 	AWACS_SWITCH("CD Capture Switch", 0, SHIFT_MUX_CD, 0),
 };
@@ -642,34 +642,34 @@
 /* FIXME: is this correct order?
  * screamer (powerbook G3 pismo) seems to have different bits...
  */
-static struct snd_kcontrol_new snd_pmac_awacs_mixers2[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_mixers2[] __devinitdata = {
 	AWACS_SWITCH("Line Capture Switch", 0, SHIFT_MUX_LINE, 0),
 	AWACS_SWITCH("Mic Capture Switch", 0, SHIFT_MUX_MIC, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_screamer_mixers2[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_screamer_mixers2[] __devinitdata = {
 	AWACS_SWITCH("Line Capture Switch", 0, SHIFT_MUX_MIC, 0),
 	AWACS_SWITCH("Mic Capture Switch", 0, SHIFT_MUX_LINE, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_mixers2_pmac5500[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_mixers2_pmac5500[] __devinitdata = {
 	AWACS_SWITCH("CD Capture Switch", 0, SHIFT_MUX_CD, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_master_sw __initdata =
+static struct snd_kcontrol_new snd_pmac_awacs_master_sw __devinitdata =
 AWACS_SWITCH("Master Playback Switch", 1, SHIFT_HDMUTE, 1);
 
-static struct snd_kcontrol_new snd_pmac_awacs_master_sw_imac __initdata =
+static struct snd_kcontrol_new snd_pmac_awacs_master_sw_imac __devinitdata =
 AWACS_SWITCH("Line out Playback Switch", 1, SHIFT_HDMUTE, 1);
 
-static struct snd_kcontrol_new snd_pmac_awacs_master_sw_pmac5500 __initdata =
+static struct snd_kcontrol_new snd_pmac_awacs_master_sw_pmac5500 __devinitdata =
 AWACS_SWITCH("Headphone Playback Switch", 1, SHIFT_HDMUTE, 1);
 
-static struct snd_kcontrol_new snd_pmac_awacs_mic_boost[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_mic_boost[] __devinitdata = {
 	AWACS_SWITCH("Mic Boost Capture Switch", 0, SHIFT_GAINLINE, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_screamer_mic_boost[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_screamer_mic_boost[] __devinitdata = {
 	{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	  .name = "Mic Boost Capture Volume",
 	  .info = snd_pmac_screamer_mic_boost_info,
@@ -678,34 +678,34 @@
 	},
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_mic_boost_pmac7500[] __initdata =
+static struct snd_kcontrol_new snd_pmac_awacs_mic_boost_pmac7500[] __devinitdata =
 {
 	AWACS_SWITCH("Line Boost Capture Switch", 0, SHIFT_GAINLINE, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_screamer_mic_boost_beige[] __initdata =
+static struct snd_kcontrol_new snd_pmac_screamer_mic_boost_beige[] __devinitdata =
 {
 	AWACS_SWITCH("Line Boost Capture Switch", 0, SHIFT_GAINLINE, 0),
 	AWACS_SWITCH("CD Boost Capture Switch", 6, SHIFT_MIC_BOOST, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_screamer_mic_boost_imac[] __initdata =
+static struct snd_kcontrol_new snd_pmac_screamer_mic_boost_imac[] __devinitdata =
 {
 	AWACS_SWITCH("Line Boost Capture Switch", 0, SHIFT_GAINLINE, 0),
 	AWACS_SWITCH("Mic Boost Capture Switch", 6, SHIFT_MIC_BOOST, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_speaker_vol[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_speaker_vol[] __devinitdata = {
 	AWACS_VOLUME("PC Speaker Playback Volume", 4, 6, 1),
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_speaker_sw __initdata =
+static struct snd_kcontrol_new snd_pmac_awacs_speaker_sw __devinitdata =
 AWACS_SWITCH("PC Speaker Playback Switch", 1, SHIFT_SPKMUTE, 1);
 
-static struct snd_kcontrol_new snd_pmac_awacs_speaker_sw_imac1 __initdata =
+static struct snd_kcontrol_new snd_pmac_awacs_speaker_sw_imac1 __devinitdata =
 AWACS_SWITCH("PC Speaker Playback Switch", 1, SHIFT_PAROUT1, 1);
 
-static struct snd_kcontrol_new snd_pmac_awacs_speaker_sw_imac2 __initdata =
+static struct snd_kcontrol_new snd_pmac_awacs_speaker_sw_imac2 __devinitdata =
 AWACS_SWITCH("PC Speaker Playback Switch", 1, SHIFT_PAROUT1, 0);
 
 
@@ -872,7 +872,7 @@
 /*
  * initialize chip
  */
-int __init
+int __devinit
 snd_pmac_awacs_init(struct snd_pmac *chip)
 {
 	int pm7500 = IS_PM7500;
diff --git a/sound/ppc/beep.c b/sound/ppc/beep.c
index 89f5c32..a9d3507 100644
--- a/sound/ppc/beep.c
+++ b/sound/ppc/beep.c
@@ -215,7 +215,7 @@
 };
 
 /* Initialize beep stuff */
-int __init snd_pmac_attach_beep(struct snd_pmac *chip)
+int __devinit snd_pmac_attach_beep(struct snd_pmac *chip)
 {
 	struct pmac_beep *beep;
 	struct input_dev *input_dev;
diff --git a/sound/ppc/burgundy.c b/sound/ppc/burgundy.c
index 45a7629..16ed240 100644
--- a/sound/ppc/burgundy.c
+++ b/sound/ppc/burgundy.c
@@ -46,12 +46,12 @@
 	timeout = 50;
 	while (!(in_le32(&chip->awacs->codec_stat) & MASK_EXTEND) && timeout--)
 		udelay(1);
-	if (! timeout)
+	if (timeout < 0)
 		printk(KERN_DEBUG "burgundy_extend_wait: timeout #1\n");
 	timeout = 50;
 	while ((in_le32(&chip->awacs->codec_stat) & MASK_EXTEND) && timeout--)
 		udelay(1);
-	if (! timeout)
+	if (timeout < 0)
 		printk(KERN_DEBUG "burgundy_extend_wait: timeout #2\n");
 }
 
@@ -468,7 +468,7 @@
 /*
  * Burgundy mixers
  */
-static struct snd_kcontrol_new snd_pmac_burgundy_mixers[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_burgundy_mixers[] __devinitdata = {
 	BURGUNDY_VOLUME_W("Master Playback Volume", 0,
 			MASK_ADDR_BURGUNDY_MASTER_VOLUME, 8),
 	BURGUNDY_VOLUME_W("CD Capture Volume", 0,
@@ -496,7 +496,7 @@
  */	BURGUNDY_SWITCH_B("PCM Capture Switch", 0,
 			MASK_ADDR_BURGUNDY_HOSTIFEH, 0x01, 0, 0)
 };
-static struct snd_kcontrol_new snd_pmac_burgundy_mixers_imac[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_burgundy_mixers_imac[] __devinitdata = {
 	BURGUNDY_VOLUME_W("Line in Capture Volume", 0,
 			MASK_ADDR_BURGUNDY_VOLLINE, 16),
 	BURGUNDY_VOLUME_W("Mic Capture Volume", 0,
@@ -522,7 +522,7 @@
 	BURGUNDY_SWITCH_B("Mic Boost Capture Switch", 0,
 			MASK_ADDR_BURGUNDY_INPBOOST, 0x40, 0x80, 1)
 };
-static struct snd_kcontrol_new snd_pmac_burgundy_mixers_pmac[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_burgundy_mixers_pmac[] __devinitdata = {
 	BURGUNDY_VOLUME_W("Line in Capture Volume", 0,
 			MASK_ADDR_BURGUNDY_VOLMIC, 16),
 	BURGUNDY_VOLUME_B("Line in Gain Capture Volume", 0,
@@ -538,33 +538,33 @@
 /*	BURGUNDY_SWITCH_B("Line in Boost Capture Switch", 0,
  *		MASK_ADDR_BURGUNDY_INPBOOST, 0x40, 0x80, 1) */
 };
-static struct snd_kcontrol_new snd_pmac_burgundy_master_sw_imac __initdata =
+static struct snd_kcontrol_new snd_pmac_burgundy_master_sw_imac __devinitdata =
 BURGUNDY_SWITCH_B("Master Playback Switch", 0,
 	MASK_ADDR_BURGUNDY_MORE_OUTPUTENABLES,
 	BURGUNDY_OUTPUT_LEFT | BURGUNDY_LINEOUT_LEFT | BURGUNDY_HP_LEFT,
 	BURGUNDY_OUTPUT_RIGHT | BURGUNDY_LINEOUT_RIGHT | BURGUNDY_HP_RIGHT, 1);
-static struct snd_kcontrol_new snd_pmac_burgundy_master_sw_pmac __initdata =
+static struct snd_kcontrol_new snd_pmac_burgundy_master_sw_pmac __devinitdata =
 BURGUNDY_SWITCH_B("Master Playback Switch", 0,
 	MASK_ADDR_BURGUNDY_MORE_OUTPUTENABLES,
 	BURGUNDY_OUTPUT_INTERN
 	| BURGUNDY_OUTPUT_LEFT, BURGUNDY_OUTPUT_RIGHT, 1);
-static struct snd_kcontrol_new snd_pmac_burgundy_speaker_sw_imac __initdata =
+static struct snd_kcontrol_new snd_pmac_burgundy_speaker_sw_imac __devinitdata =
 BURGUNDY_SWITCH_B("PC Speaker Playback Switch", 0,
 	MASK_ADDR_BURGUNDY_MORE_OUTPUTENABLES,
 	BURGUNDY_OUTPUT_LEFT, BURGUNDY_OUTPUT_RIGHT, 1);
-static struct snd_kcontrol_new snd_pmac_burgundy_speaker_sw_pmac __initdata =
+static struct snd_kcontrol_new snd_pmac_burgundy_speaker_sw_pmac __devinitdata =
 BURGUNDY_SWITCH_B("PC Speaker Playback Switch", 0,
 	MASK_ADDR_BURGUNDY_MORE_OUTPUTENABLES,
 	BURGUNDY_OUTPUT_INTERN, 0, 0);
-static struct snd_kcontrol_new snd_pmac_burgundy_line_sw_imac __initdata =
+static struct snd_kcontrol_new snd_pmac_burgundy_line_sw_imac __devinitdata =
 BURGUNDY_SWITCH_B("Line out Playback Switch", 0,
 	MASK_ADDR_BURGUNDY_MORE_OUTPUTENABLES,
 	BURGUNDY_LINEOUT_LEFT, BURGUNDY_LINEOUT_RIGHT, 1);
-static struct snd_kcontrol_new snd_pmac_burgundy_line_sw_pmac __initdata =
+static struct snd_kcontrol_new snd_pmac_burgundy_line_sw_pmac __devinitdata =
 BURGUNDY_SWITCH_B("Line out Playback Switch", 0,
 	MASK_ADDR_BURGUNDY_MORE_OUTPUTENABLES,
 	BURGUNDY_OUTPUT_LEFT, BURGUNDY_OUTPUT_RIGHT, 1);
-static struct snd_kcontrol_new snd_pmac_burgundy_hp_sw_imac __initdata =
+static struct snd_kcontrol_new snd_pmac_burgundy_hp_sw_imac __devinitdata =
 BURGUNDY_SWITCH_B("Headphone Playback Switch", 0,
 	MASK_ADDR_BURGUNDY_MORE_OUTPUTENABLES,
 	BURGUNDY_HP_LEFT, BURGUNDY_HP_RIGHT, 1);
@@ -618,7 +618,7 @@
 /*
  * initialize burgundy
  */
-int __init snd_pmac_burgundy_init(struct snd_pmac *chip)
+int __devinit snd_pmac_burgundy_init(struct snd_pmac *chip)
 {
 	int imac = machine_is_compatible("iMac");
 	int i, err;
diff --git a/sound/ppc/daca.c b/sound/ppc/daca.c
index f8d478c..24200b7 100644
--- a/sound/ppc/daca.c
+++ b/sound/ppc/daca.c
@@ -244,7 +244,7 @@
 }
 
 /* exported */
-int __init snd_pmac_daca_init(struct snd_pmac *chip)
+int __devinit snd_pmac_daca_init(struct snd_pmac *chip)
 {
 	int i, err;
 	struct pmac_daca *mix;
diff --git a/sound/ppc/keywest.c b/sound/ppc/keywest.c
index a5afb26..835fa19 100644
--- a/sound/ppc/keywest.c
+++ b/sound/ppc/keywest.c
@@ -33,10 +33,6 @@
 static struct pmac_keywest *keywest_ctx;
 
 
-#ifndef i2c_device_name
-#define i2c_device_name(x)	((x)->name)
-#endif
-
 static int keywest_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
@@ -56,7 +52,7 @@
 	if (! keywest_ctx)
 		return -EINVAL;
 
-	if (strncmp(i2c_device_name(adapter), "mac-io", 6))
+	if (strncmp(adapter->name, "mac-io", 6))
 		return 0; /* ignored */
 
 	memset(&info, 0, sizeof(struct i2c_board_info));
@@ -109,7 +105,7 @@
 	}
 }
 
-int __init snd_pmac_tumbler_post_init(void)
+int __devinit snd_pmac_tumbler_post_init(void)
 {
 	int err;
 	
@@ -124,7 +120,7 @@
 }
 
 /* exported */
-int __init snd_pmac_keywest_init(struct pmac_keywest *i2c)
+int __devinit snd_pmac_keywest_init(struct pmac_keywest *i2c)
 {
 	int err;
 
diff --git a/sound/ppc/pmac.c b/sound/ppc/pmac.c
index 9b4e9c3..7bc492e 100644
--- a/sound/ppc/pmac.c
+++ b/sound/ppc/pmac.c
@@ -702,7 +702,7 @@
 	.pointer =	snd_pmac_capture_pointer,
 };
 
-int __init snd_pmac_pcm_new(struct snd_pmac *chip)
+int __devinit snd_pmac_pcm_new(struct snd_pmac *chip)
 {
 	struct snd_pcm *pcm;
 	int err;
@@ -908,7 +908,7 @@
  * check the machine support byteswap (little-endian)
  */
 
-static void __init detect_byte_swap(struct snd_pmac *chip)
+static void __devinit detect_byte_swap(struct snd_pmac *chip)
 {
 	struct device_node *mio;
 
@@ -934,7 +934,7 @@
 /*
  * detect a sound chip
  */
-static int __init snd_pmac_detect(struct snd_pmac *chip)
+static int __devinit snd_pmac_detect(struct snd_pmac *chip)
 {
 	struct device_node *sound;
 	struct device_node *dn;
@@ -1143,7 +1143,7 @@
 	return 0;
 }
 
-static struct snd_kcontrol_new auto_mute_controls[] __initdata = {
+static struct snd_kcontrol_new auto_mute_controls[] __devinitdata = {
 	{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	  .name = "Auto Mute Switch",
 	  .info = snd_pmac_boolean_mono_info,
@@ -1158,7 +1158,7 @@
 	},
 };
 
-int __init snd_pmac_add_automute(struct snd_pmac *chip)
+int __devinit snd_pmac_add_automute(struct snd_pmac *chip)
 {
 	int err;
 	chip->auto_mute = 1;
@@ -1175,7 +1175,7 @@
 /*
  * create and detect a pmac chip record
  */
-int __init snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return)
+int __devinit snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return)
 {
 	struct snd_pmac *chip;
 	struct device_node *np;
diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c
index f361c26..53c81a5 100644
--- a/sound/ppc/snd_ps3.c
+++ b/sound/ppc/snd_ps3.c
@@ -18,80 +18,30 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
 #include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/io.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+
+#include <sound/asound.h>
+#include <sound/control.h>
 #include <sound/core.h>
 #include <sound/initval.h>
-#include <sound/pcm.h>
-#include <sound/asound.h>
 #include <sound/memalloc.h>
+#include <sound/pcm.h>
 #include <sound/pcm_params.h>
-#include <sound/control.h>
-#include <linux/dmapool.h>
-#include <linux/dma-mapping.h>
-#include <asm/firmware.h>
+
 #include <asm/dma.h>
+#include <asm/firmware.h>
 #include <asm/lv1call.h>
 #include <asm/ps3.h>
 #include <asm/ps3av.h>
 
-#include "snd_ps3_reg.h"
 #include "snd_ps3.h"
+#include "snd_ps3_reg.h"
 
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("PS3 sound driver");
-MODULE_AUTHOR("Sony Computer Entertainment Inc.");
-
-/* module  entries */
-static int __init snd_ps3_init(void);
-static void __exit snd_ps3_exit(void);
-
-/* ALSA snd driver ops */
-static int snd_ps3_pcm_open(struct snd_pcm_substream *substream);
-static int snd_ps3_pcm_close(struct snd_pcm_substream *substream);
-static int snd_ps3_pcm_prepare(struct snd_pcm_substream *substream);
-static int snd_ps3_pcm_trigger(struct snd_pcm_substream *substream,
-				 int cmd);
-static snd_pcm_uframes_t snd_ps3_pcm_pointer(struct snd_pcm_substream
-					     *substream);
-static int snd_ps3_pcm_hw_params(struct snd_pcm_substream *substream,
-				 struct snd_pcm_hw_params *hw_params);
-static int snd_ps3_pcm_hw_free(struct snd_pcm_substream *substream);
-
-
-/* ps3_system_bus_driver entries */
-static int __init snd_ps3_driver_probe(struct ps3_system_bus_device *dev);
-static int snd_ps3_driver_remove(struct ps3_system_bus_device *dev);
-
-/* address setup */
-static int snd_ps3_map_mmio(void);
-static void snd_ps3_unmap_mmio(void);
-static int snd_ps3_allocate_irq(void);
-static void snd_ps3_free_irq(void);
-static void snd_ps3_audio_set_base_addr(uint64_t ioaddr_start);
-
-/* interrupt handler */
-static irqreturn_t snd_ps3_interrupt(int irq, void *dev_id);
-
-
-/* set sampling rate/format */
-static int snd_ps3_set_avsetting(struct snd_pcm_substream *substream);
-/* take effect parameter change */
-static int snd_ps3_change_avsetting(struct snd_ps3_card_info *card);
-/* initialize avsetting and take it effect */
-static int snd_ps3_init_avsetting(struct snd_ps3_card_info *card);
-/* setup dma */
-static int snd_ps3_program_dma(struct snd_ps3_card_info *card,
-			       enum snd_ps3_dma_filltype filltype);
-static void snd_ps3_wait_for_dma_stop(struct snd_ps3_card_info *card);
-
-static dma_addr_t v_to_bus(struct snd_ps3_card_info *, void  *vaddr, int ch);
-
-
-module_init(snd_ps3_init);
-module_exit(snd_ps3_exit);
 
 /*
  * global
@@ -165,25 +115,13 @@
 	.fifo_size = PS3_AUDIO_FIFO_SIZE
 };
 
-static struct snd_pcm_ops snd_ps3_pcm_spdif_ops =
-{
-	.open = snd_ps3_pcm_open,
-	.close = snd_ps3_pcm_close,
-	.prepare = snd_ps3_pcm_prepare,
-	.ioctl = snd_pcm_lib_ioctl,
-	.trigger = snd_ps3_pcm_trigger,
-	.pointer = snd_ps3_pcm_pointer,
-	.hw_params = snd_ps3_pcm_hw_params,
-	.hw_free = snd_ps3_pcm_hw_free
-};
-
 static int snd_ps3_verify_dma_stop(struct snd_ps3_card_info *card,
 				   int count, int force_stop)
 {
 	int dma_ch, done, retries, stop_forced = 0;
 	uint32_t status;
 
-	for (dma_ch = 0; dma_ch < 8; dma_ch ++) {
+	for (dma_ch = 0; dma_ch < 8; dma_ch++) {
 		retries = count;
 		do {
 			status = read_reg(PS3_AUDIO_KICK(dma_ch)) &
@@ -259,9 +197,7 @@
 /*
  * convert virtual addr to ioif bus addr.
  */
-static dma_addr_t v_to_bus(struct snd_ps3_card_info *card,
-			   void * paddr,
-			   int ch)
+static dma_addr_t v_to_bus(struct snd_ps3_card_info *card, void *paddr, int ch)
 {
 	return card->dma_start_bus_addr[ch] +
 		(paddr - card->dma_start_vaddr[ch]);
@@ -321,7 +257,7 @@
 	spin_lock_irqsave(&card->dma_lock, irqsave);
 	for (ch = 0; ch < 2; ch++) {
 		start_vaddr = card->dma_next_transfer_vaddr[0];
-		for (stage = 0; stage < fill_stages; stage ++) {
+		for (stage = 0; stage < fill_stages; stage++) {
 			dma_ch = stage * 2 + ch;
 			if (silent)
 				dma_addr = card->null_buffer_start_dma_addr;
@@ -372,6 +308,71 @@
 }
 
 /*
+ * Interrupt handler
+ */
+static irqreturn_t snd_ps3_interrupt(int irq, void *dev_id)
+{
+
+	uint32_t port_intr;
+	int underflow_occured = 0;
+	struct snd_ps3_card_info *card = dev_id;
+
+	if (!card->running) {
+		update_reg(PS3_AUDIO_AX_IS, 0);
+		update_reg(PS3_AUDIO_INTR_0, 0);
+		return IRQ_HANDLED;
+	}
+
+	port_intr = read_reg(PS3_AUDIO_AX_IS);
+	/*
+	 *serial buffer empty detected (every 4 times),
+	 *program next dma and kick it
+	 */
+	if (port_intr & PS3_AUDIO_AX_IE_ASOBEIE(0)) {
+		write_reg(PS3_AUDIO_AX_IS, PS3_AUDIO_AX_IE_ASOBEIE(0));
+		if (port_intr & PS3_AUDIO_AX_IE_ASOBUIE(0)) {
+			write_reg(PS3_AUDIO_AX_IS, port_intr);
+			underflow_occured = 1;
+		}
+		if (card->silent) {
+			/* we are still in silent time */
+			snd_ps3_program_dma(card,
+				(underflow_occured) ?
+				SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL :
+				SND_PS3_DMA_FILLTYPE_SILENT_RUNNING);
+			snd_ps3_kick_dma(card);
+			card->silent--;
+		} else {
+			snd_ps3_program_dma(card,
+				(underflow_occured) ?
+				SND_PS3_DMA_FILLTYPE_FIRSTFILL :
+				SND_PS3_DMA_FILLTYPE_RUNNING);
+			snd_ps3_kick_dma(card);
+			snd_pcm_period_elapsed(card->substream);
+		}
+	} else if (port_intr & PS3_AUDIO_AX_IE_ASOBUIE(0)) {
+		write_reg(PS3_AUDIO_AX_IS, PS3_AUDIO_AX_IE_ASOBUIE(0));
+		/*
+		 * serial out underflow, but buffer empty not detected.
+		 * in this case, fill fifo with 0 to recover.  After
+		 * filling dummy data, serial automatically start to
+		 * consume them and then will generate normal buffer
+		 * empty interrupts.
+		 * If both buffer underflow and buffer empty are occured,
+		 * it is better to do nomal data transfer than empty one
+		 */
+		snd_ps3_program_dma(card,
+				    SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL);
+		snd_ps3_kick_dma(card);
+		snd_ps3_program_dma(card,
+				    SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL);
+		snd_ps3_kick_dma(card);
+	}
+	/* clear interrupt cause */
+	return IRQ_HANDLED;
+};
+
+/*
  * audio mute on/off
  * mute_on : 0 output enabled
  *           1 mute
@@ -382,6 +383,142 @@
 }
 
 /*
+ * av setting
+ * NOTE: calling this function may generate audio interrupt.
+ */
+static int snd_ps3_change_avsetting(struct snd_ps3_card_info *card)
+{
+	int ret, retries, i;
+	pr_debug("%s: start\n", __func__);
+
+	ret = ps3av_set_audio_mode(card->avs.avs_audio_ch,
+				  card->avs.avs_audio_rate,
+				  card->avs.avs_audio_width,
+				  card->avs.avs_audio_format,
+				  card->avs.avs_audio_source);
+	/*
+	 * Reset the following unwanted settings:
+	 */
+
+	/* disable all 3wire buffers */
+	update_mask_reg(PS3_AUDIO_AO_3WMCTRL,
+			~(PS3_AUDIO_AO_3WMCTRL_ASOEN(0) |
+			  PS3_AUDIO_AO_3WMCTRL_ASOEN(1) |
+			  PS3_AUDIO_AO_3WMCTRL_ASOEN(2) |
+			  PS3_AUDIO_AO_3WMCTRL_ASOEN(3)),
+			0);
+	wmb();	/* ensure the hardware sees the change */
+	/* wait for actually stopped */
+	retries = 1000;
+	while ((read_reg(PS3_AUDIO_AO_3WMCTRL) &
+		(PS3_AUDIO_AO_3WMCTRL_ASORUN(0) |
+		 PS3_AUDIO_AO_3WMCTRL_ASORUN(1) |
+		 PS3_AUDIO_AO_3WMCTRL_ASORUN(2) |
+		 PS3_AUDIO_AO_3WMCTRL_ASORUN(3))) &&
+	       --retries) {
+		udelay(1);
+	}
+
+	/* reset buffer pointer */
+	for (i = 0; i < 4; i++) {
+		update_reg(PS3_AUDIO_AO_3WCTRL(i),
+			   PS3_AUDIO_AO_3WCTRL_ASOBRST_RESET);
+		udelay(10);
+	}
+	wmb(); /* ensure the hardware actually start resetting */
+
+	/* enable 3wire#0 buffer */
+	update_reg(PS3_AUDIO_AO_3WMCTRL, PS3_AUDIO_AO_3WMCTRL_ASOEN(0));
+
+
+	/* In 24bit mode,ALSA inserts a zero byte at first byte of per sample */
+	update_mask_reg(PS3_AUDIO_AO_3WCTRL(0),
+			~PS3_AUDIO_AO_3WCTRL_ASODF,
+			PS3_AUDIO_AO_3WCTRL_ASODF_LSB);
+	update_mask_reg(PS3_AUDIO_AO_SPDCTRL(0),
+			~PS3_AUDIO_AO_SPDCTRL_SPODF,
+			PS3_AUDIO_AO_SPDCTRL_SPODF_LSB);
+	/* ensure all the setting above is written back to register */
+	wmb();
+	/* avsetting driver altered AX_IE, caller must reset it if you want */
+	pr_debug("%s: end\n", __func__);
+	return ret;
+}
+
+/*
+ *  set sampling rate according to the substream
+ */
+static int snd_ps3_set_avsetting(struct snd_pcm_substream *substream)
+{
+	struct snd_ps3_card_info *card = snd_pcm_substream_chip(substream);
+	struct snd_ps3_avsetting_info avs;
+	int ret;
+
+	avs = card->avs;
+
+	pr_debug("%s: called freq=%d width=%d\n", __func__,
+		 substream->runtime->rate,
+		 snd_pcm_format_width(substream->runtime->format));
+
+	pr_debug("%s: before freq=%d width=%d\n", __func__,
+		 card->avs.avs_audio_rate, card->avs.avs_audio_width);
+
+	/* sample rate */
+	switch (substream->runtime->rate) {
+	case 44100:
+		avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_44K;
+		break;
+	case 48000:
+		avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_48K;
+		break;
+	case 88200:
+		avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_88K;
+		break;
+	case 96000:
+		avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_96K;
+		break;
+	default:
+		pr_info("%s: invalid rate %d\n", __func__,
+			substream->runtime->rate);
+		return 1;
+	}
+
+	/* width */
+	switch (snd_pcm_format_width(substream->runtime->format)) {
+	case 16:
+		avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_16;
+		break;
+	case 24:
+		avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_24;
+		break;
+	default:
+		pr_info("%s: invalid width %d\n", __func__,
+			snd_pcm_format_width(substream->runtime->format));
+		return 1;
+	}
+
+	memcpy(avs.avs_cs_info, ps3av_mode_cs_info, 8);
+
+	if (memcmp(&card->avs, &avs, sizeof(avs))) {
+		pr_debug("%s: after freq=%d width=%d\n", __func__,
+			 card->avs.avs_audio_rate, card->avs.avs_audio_width);
+
+		card->avs = avs;
+		snd_ps3_change_avsetting(card);
+		ret = 0;
+	} else
+		ret = 1;
+
+	/* check CS non-audio bit and mute accordingly */
+	if (avs.avs_cs_info[0] & 0x02)
+		ps3av_audio_mute_analog(1); /* mute if non-audio */
+	else
+		ps3av_audio_mute_analog(0);
+
+	return ret;
+}
+
+/*
  * PCM operators
  */
 static int snd_ps3_pcm_open(struct snd_pcm_substream *substream)
@@ -406,6 +543,13 @@
 	return 0;
 };
 
+static int snd_ps3_pcm_close(struct snd_pcm_substream *substream)
+{
+	/* mute on */
+	snd_ps3_mute(1);
+	return 0;
+};
+
 static int snd_ps3_pcm_hw_params(struct snd_pcm_substream *substream,
 				 struct snd_pcm_hw_params *hw_params)
 {
@@ -417,6 +561,13 @@
 	return 0;
 };
 
+static int snd_ps3_pcm_hw_free(struct snd_pcm_substream *substream)
+{
+	int ret;
+	ret = snd_pcm_lib_free_pages(substream);
+	return ret;
+};
+
 static int snd_ps3_delay_to_bytes(struct snd_pcm_substream *substream,
 				  unsigned int delay_ms)
 {
@@ -556,202 +707,6 @@
 	return ret;
 };
 
-static int snd_ps3_pcm_hw_free(struct snd_pcm_substream *substream)
-{
-	int ret;
-	ret = snd_pcm_lib_free_pages(substream);
-	return ret;
-};
-
-static int snd_ps3_pcm_close(struct snd_pcm_substream *substream)
-{
-	/* mute on */
-	snd_ps3_mute(1);
-	return 0;
-};
-
-static void snd_ps3_audio_fixup(struct snd_ps3_card_info *card)
-{
-	/*
-	 * avsetting driver seems to never change the followings
-	 * so, init them here once
-	 */
-
-	/* no dma interrupt needed */
-	write_reg(PS3_AUDIO_INTR_EN_0, 0);
-
-	/* use every 4 buffer empty interrupt */
-	update_mask_reg(PS3_AUDIO_AX_IC,
-			PS3_AUDIO_AX_IC_AASOIMD_MASK,
-			PS3_AUDIO_AX_IC_AASOIMD_EVERY4);
-
-	/* enable 3wire clocks */
-	update_mask_reg(PS3_AUDIO_AO_3WMCTRL,
-			~(PS3_AUDIO_AO_3WMCTRL_ASOBCLKD_DISABLED |
-			  PS3_AUDIO_AO_3WMCTRL_ASOLRCKD_DISABLED),
-			0);
-	update_reg(PS3_AUDIO_AO_3WMCTRL,
-		   PS3_AUDIO_AO_3WMCTRL_ASOPLRCK_DEFAULT);
-}
-
-/*
- * av setting
- * NOTE: calling this function may generate audio interrupt.
- */
-static int snd_ps3_change_avsetting(struct snd_ps3_card_info *card)
-{
-	int ret, retries, i;
-	pr_debug("%s: start\n", __func__);
-
-	ret = ps3av_set_audio_mode(card->avs.avs_audio_ch,
-				  card->avs.avs_audio_rate,
-				  card->avs.avs_audio_width,
-				  card->avs.avs_audio_format,
-				  card->avs.avs_audio_source);
-	/*
-	 * Reset the following unwanted settings:
-	 */
-
-	/* disable all 3wire buffers */
-	update_mask_reg(PS3_AUDIO_AO_3WMCTRL,
-			~(PS3_AUDIO_AO_3WMCTRL_ASOEN(0) |
-			  PS3_AUDIO_AO_3WMCTRL_ASOEN(1) |
-			  PS3_AUDIO_AO_3WMCTRL_ASOEN(2) |
-			  PS3_AUDIO_AO_3WMCTRL_ASOEN(3)),
-			0);
-	wmb(); 	/* ensure the hardware sees the change */
-	/* wait for actually stopped */
-	retries = 1000;
-	while ((read_reg(PS3_AUDIO_AO_3WMCTRL) &
-		(PS3_AUDIO_AO_3WMCTRL_ASORUN(0) |
-		 PS3_AUDIO_AO_3WMCTRL_ASORUN(1) |
-		 PS3_AUDIO_AO_3WMCTRL_ASORUN(2) |
-		 PS3_AUDIO_AO_3WMCTRL_ASORUN(3))) &&
-	       --retries) {
-		udelay(1);
-	}
-
-	/* reset buffer pointer */
-	for (i = 0; i < 4; i++) {
-		update_reg(PS3_AUDIO_AO_3WCTRL(i),
-			   PS3_AUDIO_AO_3WCTRL_ASOBRST_RESET);
-		udelay(10);
-	}
-	wmb(); /* ensure the hardware actually start resetting */
-
-	/* enable 3wire#0 buffer */
-	update_reg(PS3_AUDIO_AO_3WMCTRL, PS3_AUDIO_AO_3WMCTRL_ASOEN(0));
-
-
-	/* In 24bit mode,ALSA inserts a zero byte at first byte of per sample */
-	update_mask_reg(PS3_AUDIO_AO_3WCTRL(0),
-			~PS3_AUDIO_AO_3WCTRL_ASODF,
-			PS3_AUDIO_AO_3WCTRL_ASODF_LSB);
-	update_mask_reg(PS3_AUDIO_AO_SPDCTRL(0),
-			~PS3_AUDIO_AO_SPDCTRL_SPODF,
-			PS3_AUDIO_AO_SPDCTRL_SPODF_LSB);
-	/* ensure all the setting above is written back to register */
-	wmb();
-	/* avsetting driver altered AX_IE, caller must reset it if you want */
-	pr_debug("%s: end\n", __func__);
-	return ret;
-}
-
-static int snd_ps3_init_avsetting(struct snd_ps3_card_info *card)
-{
-	int ret;
-	pr_debug("%s: start\n", __func__);
-	card->avs.avs_audio_ch = PS3AV_CMD_AUDIO_NUM_OF_CH_2;
-	card->avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_48K;
-	card->avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_16;
-	card->avs.avs_audio_format = PS3AV_CMD_AUDIO_FORMAT_PCM;
-	card->avs.avs_audio_source = PS3AV_CMD_AUDIO_SOURCE_SERIAL;
-	memcpy(card->avs.avs_cs_info, ps3av_mode_cs_info, 8);
-
-	ret = snd_ps3_change_avsetting(card);
-
-	snd_ps3_audio_fixup(card);
-
-	/* to start to generate SPDIF signal, fill data */
-	snd_ps3_program_dma(card, SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL);
-	snd_ps3_kick_dma(card);
-	pr_debug("%s: end\n", __func__);
-	return ret;
-}
-
-/*
- *  set sampling rate according to the substream
- */
-static int snd_ps3_set_avsetting(struct snd_pcm_substream *substream)
-{
-	struct snd_ps3_card_info *card = snd_pcm_substream_chip(substream);
-	struct snd_ps3_avsetting_info avs;
-	int ret;
-
-	avs = card->avs;
-
-	pr_debug("%s: called freq=%d width=%d\n", __func__,
-		 substream->runtime->rate,
-		 snd_pcm_format_width(substream->runtime->format));
-
-	pr_debug("%s: before freq=%d width=%d\n", __func__,
-		 card->avs.avs_audio_rate, card->avs.avs_audio_width);
-
-	/* sample rate */
-	switch (substream->runtime->rate) {
-	case 44100:
-		avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_44K;
-		break;
-	case 48000:
-		avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_48K;
-		break;
-	case 88200:
-		avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_88K;
-		break;
-	case 96000:
-		avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_96K;
-		break;
-	default:
-		pr_info("%s: invalid rate %d\n", __func__,
-			substream->runtime->rate);
-		return 1;
-	}
-
-	/* width */
-	switch (snd_pcm_format_width(substream->runtime->format)) {
-	case 16:
-		avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_16;
-		break;
-	case 24:
-		avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_24;
-		break;
-	default:
-		pr_info("%s: invalid width %d\n", __func__,
-			snd_pcm_format_width(substream->runtime->format));
-		return 1;
-	}
-
-	memcpy(avs.avs_cs_info, ps3av_mode_cs_info, 8);
-
-	if (memcmp(&card->avs, &avs, sizeof(avs))) {
-		pr_debug("%s: after freq=%d width=%d\n", __func__,
-			 card->avs.avs_audio_rate, card->avs.avs_audio_width);
-
-		card->avs = avs;
-		snd_ps3_change_avsetting(card);
-		ret = 0;
-	} else
-		ret = 1;
-
-	/* check CS non-audio bit and mute accordingly */
-	if (avs.avs_cs_info[0] & 0x02)
-		ps3av_audio_mute_analog(1); /* mute if non-audio */
-	else
-		ps3av_audio_mute_analog(0);
-
-	return ret;
-}
-
 /*
  * SPDIF status bits controls
  */
@@ -798,28 +753,39 @@
 	{
 		.access = SNDRV_CTL_ELEM_ACCESS_READ,
 		.iface = SNDRV_CTL_ELEM_IFACE_PCM,
-		.name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,CON_MASK),
+		.name = SNDRV_CTL_NAME_IEC958("", PLAYBACK, CON_MASK),
 		.info = snd_ps3_spdif_mask_info,
 		.get = snd_ps3_spdif_cmask_get,
 	},
 	{
 		.access = SNDRV_CTL_ELEM_ACCESS_READ,
 		.iface = SNDRV_CTL_ELEM_IFACE_PCM,
-		.name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PRO_MASK),
+		.name = SNDRV_CTL_NAME_IEC958("", PLAYBACK, PRO_MASK),
 		.info = snd_ps3_spdif_mask_info,
 		.get = snd_ps3_spdif_pmask_get,
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_PCM,
-		.name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT),
+		.name = SNDRV_CTL_NAME_IEC958("", PLAYBACK, DEFAULT),
 		.info = snd_ps3_spdif_mask_info,
 		.get = snd_ps3_spdif_default_get,
 		.put = snd_ps3_spdif_default_put,
 	},
 };
 
+static struct snd_pcm_ops snd_ps3_pcm_spdif_ops = {
+	.open = snd_ps3_pcm_open,
+	.close = snd_ps3_pcm_close,
+	.ioctl = snd_pcm_lib_ioctl,
+	.hw_params = snd_ps3_pcm_hw_params,
+	.hw_free = snd_ps3_pcm_hw_free,
+	.prepare = snd_ps3_pcm_prepare,
+	.trigger = snd_ps3_pcm_trigger,
+	.pointer = snd_ps3_pcm_pointer,
+};
 
-static int snd_ps3_map_mmio(void)
+
+static int __devinit snd_ps3_map_mmio(void)
 {
 	the_card.mapped_mmio_vaddr =
 		ioremap(the_card.ps3_dev->m_region->bus_addr,
@@ -841,7 +807,7 @@
 	the_card.mapped_mmio_vaddr = NULL;
 }
 
-static int snd_ps3_allocate_irq(void)
+static int __devinit snd_ps3_allocate_irq(void)
 {
 	int ret;
 	u64 lpar_addr, lpar_size;
@@ -899,7 +865,7 @@
 	ps3_irq_plug_destroy(the_card.irq_no);
 }
 
-static void snd_ps3_audio_set_base_addr(uint64_t ioaddr_start)
+static void __devinit snd_ps3_audio_set_base_addr(uint64_t ioaddr_start)
 {
 	uint64_t val;
 	int ret;
@@ -915,7 +881,53 @@
 			ret);
 }
 
-static int __init snd_ps3_driver_probe(struct ps3_system_bus_device *dev)
+static void __devinit snd_ps3_audio_fixup(struct snd_ps3_card_info *card)
+{
+	/*
+	 * avsetting driver seems to never change the followings
+	 * so, init them here once
+	 */
+
+	/* no dma interrupt needed */
+	write_reg(PS3_AUDIO_INTR_EN_0, 0);
+
+	/* use every 4 buffer empty interrupt */
+	update_mask_reg(PS3_AUDIO_AX_IC,
+			PS3_AUDIO_AX_IC_AASOIMD_MASK,
+			PS3_AUDIO_AX_IC_AASOIMD_EVERY4);
+
+	/* enable 3wire clocks */
+	update_mask_reg(PS3_AUDIO_AO_3WMCTRL,
+			~(PS3_AUDIO_AO_3WMCTRL_ASOBCLKD_DISABLED |
+			  PS3_AUDIO_AO_3WMCTRL_ASOLRCKD_DISABLED),
+			0);
+	update_reg(PS3_AUDIO_AO_3WMCTRL,
+		   PS3_AUDIO_AO_3WMCTRL_ASOPLRCK_DEFAULT);
+}
+
+static int __devinit snd_ps3_init_avsetting(struct snd_ps3_card_info *card)
+{
+	int ret;
+	pr_debug("%s: start\n", __func__);
+	card->avs.avs_audio_ch = PS3AV_CMD_AUDIO_NUM_OF_CH_2;
+	card->avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_48K;
+	card->avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_16;
+	card->avs.avs_audio_format = PS3AV_CMD_AUDIO_FORMAT_PCM;
+	card->avs.avs_audio_source = PS3AV_CMD_AUDIO_SOURCE_SERIAL;
+	memcpy(card->avs.avs_cs_info, ps3av_mode_cs_info, 8);
+
+	ret = snd_ps3_change_avsetting(card);
+
+	snd_ps3_audio_fixup(card);
+
+	/* to start to generate SPDIF signal, fill data */
+	snd_ps3_program_dma(card, SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL);
+	snd_ps3_kick_dma(card);
+	pr_debug("%s: end\n", __func__);
+	return ret;
+}
+
+static int __devinit snd_ps3_driver_probe(struct ps3_system_bus_device *dev)
 {
 	int i, ret;
 	u64 lpar_addr, lpar_size;
@@ -1020,11 +1032,12 @@
 	 * its size should be lager than PS3_AUDIO_FIFO_STAGE_SIZE * 2
 	 * PAGE_SIZE is enogh
 	 */
-	if (!(the_card.null_buffer_start_vaddr =
-	      dma_alloc_coherent(&the_card.ps3_dev->core,
-				 PAGE_SIZE,
-				 &the_card.null_buffer_start_dma_addr,
-				 GFP_KERNEL))) {
+	the_card.null_buffer_start_vaddr =
+		dma_alloc_coherent(&the_card.ps3_dev->core,
+				   PAGE_SIZE,
+				   &the_card.null_buffer_start_dma_addr,
+				   GFP_KERNEL);
+	if (!the_card.null_buffer_start_vaddr) {
 		pr_info("%s: nullbuffer alloc failed\n", __func__);
 		goto clean_preallocate;
 	}
@@ -1115,71 +1128,6 @@
 
 
 /*
- * Interrupt handler
- */
-static irqreturn_t snd_ps3_interrupt(int irq, void *dev_id)
-{
-
-	uint32_t port_intr;
-	int underflow_occured = 0;
-	struct snd_ps3_card_info *card = dev_id;
-
-	if (!card->running) {
-		update_reg(PS3_AUDIO_AX_IS, 0);
-		update_reg(PS3_AUDIO_INTR_0, 0);
-		return IRQ_HANDLED;
-	}
-
-	port_intr = read_reg(PS3_AUDIO_AX_IS);
-	/*
-	 *serial buffer empty detected (every 4 times),
-	 *program next dma and kick it
-	 */
-	if (port_intr & PS3_AUDIO_AX_IE_ASOBEIE(0)) {
-		write_reg(PS3_AUDIO_AX_IS, PS3_AUDIO_AX_IE_ASOBEIE(0));
-		if (port_intr & PS3_AUDIO_AX_IE_ASOBUIE(0)) {
-			write_reg(PS3_AUDIO_AX_IS, port_intr);
-			underflow_occured = 1;
-		}
-		if (card->silent) {
-			/* we are still in silent time */
-			snd_ps3_program_dma(card,
-				(underflow_occured) ?
-				SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL :
-				SND_PS3_DMA_FILLTYPE_SILENT_RUNNING);
-			snd_ps3_kick_dma(card);
-			card->silent --;
-		} else {
-			snd_ps3_program_dma(card,
-				(underflow_occured) ?
-				SND_PS3_DMA_FILLTYPE_FIRSTFILL :
-				SND_PS3_DMA_FILLTYPE_RUNNING);
-			snd_ps3_kick_dma(card);
-			snd_pcm_period_elapsed(card->substream);
-		}
-	} else if (port_intr & PS3_AUDIO_AX_IE_ASOBUIE(0)) {
-		write_reg(PS3_AUDIO_AX_IS, PS3_AUDIO_AX_IE_ASOBUIE(0));
-		/*
-		 * serial out underflow, but buffer empty not detected.
-		 * in this case, fill fifo with 0 to recover.  After
-		 * filling dummy data, serial automatically start to
-		 * consume them and then will generate normal buffer
-		 * empty interrupts.
-		 * If both buffer underflow and buffer empty are occured,
-		 * it is better to do nomal data transfer than empty one
-		 */
-		snd_ps3_program_dma(card,
-				    SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL);
-		snd_ps3_kick_dma(card);
-		snd_ps3_program_dma(card,
-				    SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL);
-		snd_ps3_kick_dma(card);
-	}
-	/* clear interrupt cause */
-	return IRQ_HANDLED;
-};
-
-/*
  * module/subsystem initialize/terminate
  */
 static int __init snd_ps3_init(void)
@@ -1197,10 +1145,15 @@
 
 	return ret;
 }
+module_init(snd_ps3_init);
 
 static void __exit snd_ps3_exit(void)
 {
 	ps3_system_bus_driver_unregister(&snd_ps3_bus_driver_info);
 }
+module_exit(snd_ps3_exit);
 
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("PS3 sound driver");
+MODULE_AUTHOR("Sony Computer Entertainment Inc.");
 MODULE_ALIAS(PS3_MODULE_ALIAS_SOUND);
diff --git a/sound/ppc/tumbler.c b/sound/ppc/tumbler.c
index 40222fc..08e584d 100644
--- a/sound/ppc/tumbler.c
+++ b/sound/ppc/tumbler.c
@@ -838,7 +838,7 @@
 
 /*
  */
-static struct snd_kcontrol_new tumbler_mixers[] __initdata = {
+static struct snd_kcontrol_new tumbler_mixers[] __devinitdata = {
 	{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	  .name = "Master Playback Volume",
 	  .info = tumbler_info_master_volume,
@@ -862,7 +862,7 @@
 	},
 };
 
-static struct snd_kcontrol_new snapper_mixers[] __initdata = {
+static struct snd_kcontrol_new snapper_mixers[] __devinitdata = {
 	{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	  .name = "Master Playback Volume",
 	  .info = tumbler_info_master_volume,
@@ -895,7 +895,7 @@
 	},
 };
 
-static struct snd_kcontrol_new tumbler_hp_sw __initdata = {
+static struct snd_kcontrol_new tumbler_hp_sw __devinitdata = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "Headphone Playback Switch",
 	.info = snd_pmac_boolean_mono_info,
@@ -903,7 +903,7 @@
 	.put = tumbler_put_mute_switch,
 	.private_value = TUMBLER_MUTE_HP,
 };
-static struct snd_kcontrol_new tumbler_speaker_sw __initdata = {
+static struct snd_kcontrol_new tumbler_speaker_sw __devinitdata = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "PC Speaker Playback Switch",
 	.info = snd_pmac_boolean_mono_info,
@@ -911,7 +911,7 @@
 	.put = tumbler_put_mute_switch,
 	.private_value = TUMBLER_MUTE_AMP,
 };
-static struct snd_kcontrol_new tumbler_lineout_sw __initdata = {
+static struct snd_kcontrol_new tumbler_lineout_sw __devinitdata = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "Line Out Playback Switch",
 	.info = snd_pmac_boolean_mono_info,
@@ -919,7 +919,7 @@
 	.put = tumbler_put_mute_switch,
 	.private_value = TUMBLER_MUTE_LINE,
 };
-static struct snd_kcontrol_new tumbler_drc_sw __initdata = {
+static struct snd_kcontrol_new tumbler_drc_sw __devinitdata = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "DRC Switch",
 	.info = snd_pmac_boolean_mono_info,
@@ -1269,7 +1269,7 @@
 #endif
 
 /* initialize tumbler */
-static int __init tumbler_init(struct snd_pmac *chip)
+static int __devinit tumbler_init(struct snd_pmac *chip)
 {
 	int irq;
 	struct pmac_tumbler *mix = chip->mixer_data;
@@ -1339,7 +1339,7 @@
 }
 
 /* exported */
-int __init snd_pmac_tumbler_init(struct snd_pmac *chip)
+int __devinit snd_pmac_tumbler_init(struct snd_pmac *chip)
 {
 	int i, err;
 	struct pmac_tumbler *mix;
diff --git a/sound/soc/Kconfig b/sound/soc/Kconfig
index 3d2bb6f..d3e786a 100644
--- a/sound/soc/Kconfig
+++ b/sound/soc/Kconfig
@@ -32,7 +32,9 @@
 source "sound/soc/omap/Kconfig"
 source "sound/soc/pxa/Kconfig"
 source "sound/soc/s3c24xx/Kconfig"
+source "sound/soc/s6000/Kconfig"
 source "sound/soc/sh/Kconfig"
+source "sound/soc/txx9/Kconfig"
 
 # Supported codecs
 source "sound/soc/codecs/Kconfig"
diff --git a/sound/soc/Makefile b/sound/soc/Makefile
index 0237879..6f1e28d 100644
--- a/sound/soc/Makefile
+++ b/sound/soc/Makefile
@@ -10,4 +10,6 @@
 obj-$(CONFIG_SND_SOC)	+= omap/
 obj-$(CONFIG_SND_SOC)	+= pxa/
 obj-$(CONFIG_SND_SOC)	+= s3c24xx/
+obj-$(CONFIG_SND_SOC)	+= s6000/
 obj-$(CONFIG_SND_SOC)	+= sh/
+obj-$(CONFIG_SND_SOC)	+= txx9/
diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig
index a608d70..e720d5e 100644
--- a/sound/soc/atmel/Kconfig
+++ b/sound/soc/atmel/Kconfig
@@ -41,3 +41,11 @@
           and FRAME signals on the PlayPaq.  Unless you want to play
           with the AT32 as the SSC master, you probably want to say N here,
           as this will give you better sound quality.
+
+config SND_AT91_SOC_AFEB9260
+	tristate "SoC Audio support for AFEB9260 board"
+	depends on ARCH_AT91 && MACH_AFEB9260 && SND_ATMEL_SOC
+	select SND_ATMEL_SOC_SSC
+	select SND_SOC_TLV320AIC23
+	help
+	  Say Y here to support sound on AFEB9260 board.
diff --git a/sound/soc/atmel/Makefile b/sound/soc/atmel/Makefile
index f54a7cc..e7ea56b 100644
--- a/sound/soc/atmel/Makefile
+++ b/sound/soc/atmel/Makefile
@@ -13,3 +13,4 @@
 
 obj-$(CONFIG_SND_AT91_SOC_SAM9G20_WM8731) += snd-soc-sam9g20-wm8731.o
 obj-$(CONFIG_SND_AT32_SOC_PLAYPAQ) += snd-soc-playpaq.o
+obj-$(CONFIG_SND_AT91_SOC_AFEB9260) += snd-soc-afeb9260.o
diff --git a/sound/soc/atmel/playpaq_wm8510.c b/sound/soc/atmel/playpaq_wm8510.c
index 7065753..9eb610c 100644
--- a/sound/soc/atmel/playpaq_wm8510.c
+++ b/sound/soc/atmel/playpaq_wm8510.c
@@ -117,7 +117,7 @@
 	 * Find actual rate, compare to requested rate
 	 */
 	actual_rate = (cd.ssc_rate / (cd.cmr_div * 2)) / (2 * (cd.period + 1));
-	pr_debug("playpaq_wm8510: Request rate = %d, actual rate = %d\n",
+	pr_debug("playpaq_wm8510: Request rate = %u, actual rate = %u\n",
 		 rate, actual_rate);
 
 
diff --git a/sound/soc/atmel/snd-soc-afeb9260.c b/sound/soc/atmel/snd-soc-afeb9260.c
new file mode 100644
index 0000000..23349de
--- /dev/null
+++ b/sound/soc/atmel/snd-soc-afeb9260.c
@@ -0,0 +1,203 @@
+/*
+ * afeb9260.c  --  SoC audio for AFEB9260
+ *
+ * Copyright (C) 2009 Sergey Lapin <slapin@ossfans.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+
+#include <linux/atmel-ssc.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+
+#include <asm/mach-types.h>
+#include <mach/hardware.h>
+#include <linux/gpio.h>
+
+#include "../codecs/tlv320aic23.h"
+#include "atmel-pcm.h"
+#include "atmel_ssc_dai.h"
+
+#define CODEC_CLOCK 	12000000
+
+static int afeb9260_hw_params(struct snd_pcm_substream *substream,
+			 struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_dai *codec_dai = rtd->dai->codec_dai;
+	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
+	int err;
+
+	/* Set codec DAI configuration */
+	err = snd_soc_dai_set_fmt(codec_dai,
+				  SND_SOC_DAIFMT_I2S|
+				  SND_SOC_DAIFMT_NB_IF |
+				  SND_SOC_DAIFMT_CBM_CFM);
+	if (err < 0) {
+		printk(KERN_ERR "can't set codec DAI configuration\n");
+		return err;
+	}
+
+	/* Set cpu DAI configuration */
+	err = snd_soc_dai_set_fmt(cpu_dai,
+				  SND_SOC_DAIFMT_I2S |
+				  SND_SOC_DAIFMT_NB_IF |
+				  SND_SOC_DAIFMT_CBM_CFM);
+	if (err < 0) {
+		printk(KERN_ERR "can't set cpu DAI configuration\n");
+		return err;
+	}
+
+	/* Set the codec system clock for DAC and ADC */
+	err =
+	    snd_soc_dai_set_sysclk(codec_dai, 0, CODEC_CLOCK, SND_SOC_CLOCK_IN);
+
+	if (err < 0) {
+		printk(KERN_ERR "can't set codec system clock\n");
+		return err;
+	}
+
+	return err;
+}
+
+static struct snd_soc_ops afeb9260_ops = {
+	.hw_params = afeb9260_hw_params,
+};
+
+static const struct snd_soc_dapm_widget tlv320aic23_dapm_widgets[] = {
+	SND_SOC_DAPM_HP("Headphone Jack", NULL),
+	SND_SOC_DAPM_LINE("Line In", NULL),
+	SND_SOC_DAPM_MIC("Mic Jack", NULL),
+};
+
+static const struct snd_soc_dapm_route audio_map[] = {
+	{"Headphone Jack", NULL, "LHPOUT"},
+	{"Headphone Jack", NULL, "RHPOUT"},
+
+	{"LLINEIN", NULL, "Line In"},
+	{"RLINEIN", NULL, "Line In"},
+
+	{"MICIN", NULL, "Mic Jack"},
+};
+
+static int afeb9260_tlv320aic23_init(struct snd_soc_codec *codec)
+{
+
+	/* Add afeb9260 specific widgets */
+	snd_soc_dapm_new_controls(codec, tlv320aic23_dapm_widgets,
+				  ARRAY_SIZE(tlv320aic23_dapm_widgets));
+
+	/* Set up afeb9260 specific audio path audio_map */
+	snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map));
+
+	snd_soc_dapm_enable_pin(codec, "Headphone Jack");
+	snd_soc_dapm_enable_pin(codec, "Line In");
+	snd_soc_dapm_enable_pin(codec, "Mic Jack");
+
+	snd_soc_dapm_sync(codec);
+
+	return 0;
+}
+
+/* Digital audio interface glue - connects codec <--> CPU */
+static struct snd_soc_dai_link afeb9260_dai = {
+	.name = "TLV320AIC23",
+	.stream_name = "AIC23",
+	.cpu_dai = &atmel_ssc_dai[0],
+	.codec_dai = &tlv320aic23_dai,
+	.init = afeb9260_tlv320aic23_init,
+	.ops = &afeb9260_ops,
+};
+
+/* Audio machine driver */
+static struct snd_soc_card snd_soc_machine_afeb9260 = {
+	.name = "AFEB9260",
+	.platform = &atmel_soc_platform,
+	.dai_link = &afeb9260_dai,
+	.num_links = 1,
+};
+
+/* Audio subsystem */
+static struct snd_soc_device afeb9260_snd_devdata = {
+	.card = &snd_soc_machine_afeb9260,
+	.codec_dev = &soc_codec_dev_tlv320aic23,
+};
+
+static struct platform_device *afeb9260_snd_device;
+
+static int __init afeb9260_soc_init(void)
+{
+	int err;
+	struct device *dev;
+	struct atmel_ssc_info *ssc_p = afeb9260_dai.cpu_dai->private_data;
+	struct ssc_device *ssc = NULL;
+
+	if (!(machine_is_afeb9260()))
+		return -ENODEV;
+
+	ssc = ssc_request(0);
+	if (IS_ERR(ssc)) {
+		printk(KERN_ERR "ASoC: Failed to request SSC 0\n");
+		err = PTR_ERR(ssc);
+		ssc = NULL;
+		goto err_ssc;
+	}
+	ssc_p->ssc = ssc;
+
+	afeb9260_snd_device = platform_device_alloc("soc-audio", -1);
+	if (!afeb9260_snd_device) {
+		printk(KERN_ERR "ASoC: Platform device allocation failed\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(afeb9260_snd_device, &afeb9260_snd_devdata);
+	afeb9260_snd_devdata.dev = &afeb9260_snd_device->dev;
+	err = platform_device_add(afeb9260_snd_device);
+	if (err)
+		goto err1;
+
+	dev = &afeb9260_snd_device->dev;
+
+	return 0;
+err1:
+	platform_device_del(afeb9260_snd_device);
+	platform_device_put(afeb9260_snd_device);
+err_ssc:
+	return err;
+
+}
+
+static void __exit afeb9260_soc_exit(void)
+{
+	platform_device_unregister(afeb9260_snd_device);
+}
+
+module_init(afeb9260_soc_init);
+module_exit(afeb9260_soc_exit);
+
+MODULE_AUTHOR("Sergey Lapin <slapin@ossfans.org>");
+MODULE_DESCRIPTION("ALSA SoC for AFEB9260");
+MODULE_LICENSE("GPL");
+
diff --git a/sound/soc/blackfin/bf5xx-ac97.c b/sound/soc/blackfin/bf5xx-ac97.c
index 8a935f2..b1ed423 100644
--- a/sound/soc/blackfin/bf5xx-ac97.c
+++ b/sound/soc/blackfin/bf5xx-ac97.c
@@ -31,6 +31,15 @@
 #include "bf5xx-sport.h"
 #include "bf5xx-ac97.h"
 
+/* Anomaly notes:
+ *  05000250 -	AD1980 is running in TDM mode and RFS/TFS are generated by SPORT
+ *		contrtoller. But, RFSDIV and TFSDIV are always set to 16*16-1,
+ *		while the max AC97 data size is 13*16. The DIV is always larger
+ *		than data size. AD73311 and ad2602 are not running in TDM mode.
+ *		AD1836 and AD73322 depend on external RFS/TFS only. So, this
+ *		anomaly does not affect blackfin sound drivers.
+*/
+
 static int *cmd_count;
 static int sport_num = CONFIG_SND_BF5XX_SPORT_NUM;
 
diff --git a/sound/soc/blackfin/bf5xx-sport.c b/sound/soc/blackfin/bf5xx-sport.c
index b7953c8..469ce7f 100644
--- a/sound/soc/blackfin/bf5xx-sport.c
+++ b/sound/soc/blackfin/bf5xx-sport.c
@@ -190,7 +190,7 @@
 	desc = get_dma_next_desc_ptr(sport->dma_rx_chan);
 	/* Copy the descriptor which will be damaged to backup */
 	temp_desc = *desc;
-	desc->x_count = 0xa;
+	desc->x_count = sport->dummy_count / 2;
 	desc->y_count = 0;
 	desc->next_desc_addr = sport->dummy_rx_desc;
 	local_irq_restore(flags);
@@ -309,7 +309,7 @@
 	desc = get_dma_next_desc_ptr(sport->dma_tx_chan);
 	/* Store the descriptor which will be damaged */
 	temp_desc = *desc;
-	desc->x_count = 0xa;
+	desc->x_count = sport->dummy_count / 2;
 	desc->y_count = 0;
 	desc->next_desc_addr = sport->dummy_tx_desc;
 	local_irq_restore(flags);
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index b6c7f7a..bbc97fd 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -18,7 +18,9 @@
 	select SND_SOC_AK4535 if I2C
 	select SND_SOC_CS4270 if I2C
 	select SND_SOC_PCM3008
+	select SND_SOC_SPDIF
 	select SND_SOC_SSM2602 if I2C
+	select SND_SOC_STAC9766 if SND_SOC_AC97_BUS
 	select SND_SOC_TLV320AIC23 if I2C
 	select SND_SOC_TLV320AIC26 if SPI_MASTER
 	select SND_SOC_TLV320AIC3X if I2C
@@ -35,8 +37,12 @@
 	select SND_SOC_WM8753 if SND_SOC_I2C_AND_SPI
 	select SND_SOC_WM8900 if I2C
 	select SND_SOC_WM8903 if I2C
+	select SND_SOC_WM8940 if I2C
+	select SND_SOC_WM8960 if I2C
 	select SND_SOC_WM8971 if I2C
+	select SND_SOC_WM8988 if SND_SOC_I2C_AND_SPI
 	select SND_SOC_WM8990 if I2C
+	select SND_SOC_WM9081 if I2C
 	select SND_SOC_WM9705 if SND_SOC_AC97_BUS
 	select SND_SOC_WM9712 if SND_SOC_AC97_BUS
 	select SND_SOC_WM9713 if SND_SOC_AC97_BUS
@@ -86,9 +92,15 @@
 config SND_SOC_PCM3008
        tristate
 
+config SND_SOC_SPDIF
+	tristate
+
 config SND_SOC_SSM2602
 	tristate
 
+config SND_SOC_STAC9766
+	tristate
+
 config SND_SOC_TLV320AIC23
 	tristate
 
@@ -138,12 +150,24 @@
 config SND_SOC_WM8903
 	tristate
 
+config SND_SOC_WM8940
+        tristate
+
+config SND_SOC_WM8960
+	tristate
+
 config SND_SOC_WM8971
 	tristate
 
+config SND_SOC_WM8988
+	tristate
+
 config SND_SOC_WM8990
 	tristate
 
+config SND_SOC_WM9081
+	tristate
+
 config SND_SOC_WM9705
 	tristate
 
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index f265380..8b75305 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -6,7 +6,9 @@
 snd-soc-cs4270-objs := cs4270.o
 snd-soc-l3-objs := l3.o
 snd-soc-pcm3008-objs := pcm3008.o
+snd-soc-spdif-objs := spdif_transciever.o
 snd-soc-ssm2602-objs := ssm2602.o
+snd-soc-stac9766-objs := stac9766.o
 snd-soc-tlv320aic23-objs := tlv320aic23.o
 snd-soc-tlv320aic26-objs := tlv320aic26.o
 snd-soc-tlv320aic3x-objs := tlv320aic3x.o
@@ -23,8 +25,12 @@
 snd-soc-wm8753-objs := wm8753.o
 snd-soc-wm8900-objs := wm8900.o
 snd-soc-wm8903-objs := wm8903.o
+snd-soc-wm8940-objs := wm8940.o
+snd-soc-wm8960-objs := wm8960.o
 snd-soc-wm8971-objs := wm8971.o
+snd-soc-wm8988-objs := wm8988.o
 snd-soc-wm8990-objs := wm8990.o
+snd-soc-wm9081-objs := wm9081.o
 snd-soc-wm9705-objs := wm9705.o
 snd-soc-wm9712-objs := wm9712.o
 snd-soc-wm9713-objs := wm9713.o
@@ -37,7 +43,9 @@
 obj-$(CONFIG_SND_SOC_CS4270)	+= snd-soc-cs4270.o
 obj-$(CONFIG_SND_SOC_L3)	+= snd-soc-l3.o
 obj-$(CONFIG_SND_SOC_PCM3008)	+= snd-soc-pcm3008.o
+obj-$(CONFIG_SND_SOC_SPDIF)	+= snd-soc-spdif.o
 obj-$(CONFIG_SND_SOC_SSM2602)	+= snd-soc-ssm2602.o
+obj-$(CONFIG_SND_SOC_STAC9766)	+= snd-soc-stac9766.o
 obj-$(CONFIG_SND_SOC_TLV320AIC23)	+= snd-soc-tlv320aic23.o
 obj-$(CONFIG_SND_SOC_TLV320AIC26)	+= snd-soc-tlv320aic26.o
 obj-$(CONFIG_SND_SOC_TLV320AIC3X)	+= snd-soc-tlv320aic3x.o
@@ -55,7 +63,11 @@
 obj-$(CONFIG_SND_SOC_WM8900)	+= snd-soc-wm8900.o
 obj-$(CONFIG_SND_SOC_WM8903)	+= snd-soc-wm8903.o
 obj-$(CONFIG_SND_SOC_WM8971)	+= snd-soc-wm8971.o
+obj-$(CONFIG_SND_SOC_WM8940)	+= snd-soc-wm8940.o
+obj-$(CONFIG_SND_SOC_WM8960)	+= snd-soc-wm8960.o
+obj-$(CONFIG_SND_SOC_WM8988)	+= snd-soc-wm8988.o
 obj-$(CONFIG_SND_SOC_WM8990)	+= snd-soc-wm8990.o
+obj-$(CONFIG_SND_SOC_WM9081)	+= snd-soc-wm9081.o
 obj-$(CONFIG_SND_SOC_WM9705)	+= snd-soc-wm9705.o
 obj-$(CONFIG_SND_SOC_WM9712)	+= snd-soc-wm9712.o
 obj-$(CONFIG_SND_SOC_WM9713)	+= snd-soc-wm9713.o
diff --git a/sound/soc/codecs/ac97.c b/sound/soc/codecs/ac97.c
index b0d4af1..932299b 100644
--- a/sound/soc/codecs/ac97.c
+++ b/sound/soc/codecs/ac97.c
@@ -53,13 +53,13 @@
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = STD_AC97_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.capture = {
 		.stream_name = "AC97 Capture",
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = STD_AC97_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.ops = &ac97_dai_ops,
 };
 EXPORT_SYMBOL_GPL(ac97_dai);
diff --git a/sound/soc/codecs/ad1980.c b/sound/soc/codecs/ad1980.c
index ddb3b08..d7440a9 100644
--- a/sound/soc/codecs/ad1980.c
+++ b/sound/soc/codecs/ad1980.c
@@ -137,13 +137,13 @@
 		.channels_min = 2,
 		.channels_max = 6,
 		.rates = SNDRV_PCM_RATE_48000,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE, },
+		.formats = SND_SOC_STD_AC97_FMTS, },
 	.capture = {
 		.stream_name = "Capture",
 		.channels_min = 2,
 		.channels_max = 2,
 		.rates = SNDRV_PCM_RATE_48000,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE, },
+		.formats = SND_SOC_STD_AC97_FMTS, },
 };
 EXPORT_SYMBOL_GPL(ad1980_dai);
 
diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c
index 7fa09a3..a32b822 100644
--- a/sound/soc/codecs/cs4270.c
+++ b/sound/soc/codecs/cs4270.c
@@ -18,7 +18,7 @@
  * - The machine driver's 'startup' function must call
  *   cs4270_set_dai_sysclk() with the value of MCLK.
  * - Only I2S and left-justified modes are supported
- * - Power management is not supported
+ * - Power management is supported
  */
 
 #include <linux/module.h>
@@ -27,6 +27,7 @@
 #include <sound/soc.h>
 #include <sound/initval.h>
 #include <linux/i2c.h>
+#include <linux/delay.h>
 
 #include "cs4270.h"
 
@@ -56,6 +57,7 @@
 #define CS4270_FIRSTREG	0x01
 #define CS4270_LASTREG	0x08
 #define CS4270_NUMREGS	(CS4270_LASTREG - CS4270_FIRSTREG + 1)
+#define CS4270_I2C_INCR	0x80
 
 /* Bit masks for the CS4270 registers */
 #define CS4270_CHIPID_ID	0xF0
@@ -64,6 +66,8 @@
 #define CS4270_PWRCTL_PDN_ADC	0x20
 #define CS4270_PWRCTL_PDN_DAC	0x02
 #define CS4270_PWRCTL_PDN	0x01
+#define CS4270_PWRCTL_PDN_ALL	\
+	(CS4270_PWRCTL_PDN_ADC | CS4270_PWRCTL_PDN_DAC | CS4270_PWRCTL_PDN)
 #define CS4270_MODE_SPEED_MASK	0x30
 #define CS4270_MODE_1X		0x00
 #define CS4270_MODE_2X		0x10
@@ -109,6 +113,7 @@
 	unsigned int mclk; /* Input frequency of the MCLK pin */
 	unsigned int mode; /* The mode (I2S or left-justified) */
 	unsigned int slave_mode;
+	unsigned int manual_mute;
 };
 
 /**
@@ -295,7 +300,7 @@
 	s32 length;
 
 	length = i2c_smbus_read_i2c_block_data(i2c_client,
-		CS4270_FIRSTREG | 0x80, CS4270_NUMREGS, cache);
+		CS4270_FIRSTREG | CS4270_I2C_INCR, CS4270_NUMREGS, cache);
 
 	if (length != CS4270_NUMREGS) {
 		dev_err(codec->dev, "i2c read failure, addr=0x%x\n",
@@ -453,7 +458,7 @@
 }
 
 /**
- * cs4270_mute - enable/disable the CS4270 external mute
+ * cs4270_dai_mute - enable/disable the CS4270 external mute
  * @dai: the SOC DAI
  * @mute: 0 = disable mute, 1 = enable mute
  *
@@ -462,21 +467,52 @@
  * board does not have the MUTEA or MUTEB pins connected to such circuitry,
  * then this function will do nothing.
  */
-static int cs4270_mute(struct snd_soc_dai *dai, int mute)
+static int cs4270_dai_mute(struct snd_soc_dai *dai, int mute)
 {
 	struct snd_soc_codec *codec = dai->codec;
+	struct cs4270_private *cs4270 = codec->private_data;
 	int reg6;
 
 	reg6 = snd_soc_read(codec, CS4270_MUTE);
 
 	if (mute)
 		reg6 |= CS4270_MUTE_DAC_A | CS4270_MUTE_DAC_B;
-	else
+	else {
 		reg6 &= ~(CS4270_MUTE_DAC_A | CS4270_MUTE_DAC_B);
+		reg6 |= cs4270->manual_mute;
+	}
 
 	return snd_soc_write(codec, CS4270_MUTE, reg6);
 }
 
+/**
+ * cs4270_soc_put_mute - put callback for the 'Master Playback switch'
+ * 			 alsa control.
+ * @kcontrol: mixer control
+ * @ucontrol: control element information
+ *
+ * This function basically passes the arguments on to the generic
+ * snd_soc_put_volsw() function and saves the mute information in
+ * our private data structure. This is because we want to prevent
+ * cs4270_dai_mute() neglecting the user's decision to manually
+ * mute the codec's output.
+ *
+ * Returns 0 for success.
+ */
+static int cs4270_soc_put_mute(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct cs4270_private *cs4270 = codec->private_data;
+	int left = !ucontrol->value.integer.value[0];
+	int right = !ucontrol->value.integer.value[1];
+
+	cs4270->manual_mute = (left ? CS4270_MUTE_DAC_A : 0) |
+			      (right ? CS4270_MUTE_DAC_B : 0);
+
+	return snd_soc_put_volsw(kcontrol, ucontrol);
+}
+
 /* A list of non-DAPM controls that the CS4270 supports */
 static const struct snd_kcontrol_new cs4270_snd_controls[] = {
 	SOC_DOUBLE_R("Master Playback Volume",
@@ -486,7 +522,9 @@
 	SOC_SINGLE("Zero Cross Switch", CS4270_TRANS, 5, 1, 0),
 	SOC_SINGLE("Popguard Switch", CS4270_MODE, 0, 1, 1),
 	SOC_SINGLE("Auto-Mute Switch", CS4270_MUTE, 5, 1, 0),
-	SOC_DOUBLE("Master Capture Switch", CS4270_MUTE, 3, 4, 1, 0)
+	SOC_DOUBLE("Master Capture Switch", CS4270_MUTE, 3, 4, 1, 1),
+	SOC_DOUBLE_EXT("Master Playback Switch", CS4270_MUTE, 0, 1, 1, 1,
+		snd_soc_get_volsw, cs4270_soc_put_mute),
 };
 
 /*
@@ -506,7 +544,7 @@
 	.hw_params	= cs4270_hw_params,
 	.set_sysclk	= cs4270_set_dai_sysclk,
 	.set_fmt	= cs4270_set_dai_fmt,
-	.digital_mute	= cs4270_mute,
+	.digital_mute	= cs4270_dai_mute,
 };
 
 struct snd_soc_dai cs4270_dai = {
@@ -753,6 +791,57 @@
 };
 MODULE_DEVICE_TABLE(i2c, cs4270_id);
 
+#ifdef CONFIG_PM
+
+/* This suspend/resume implementation can handle both - a simple standby
+ * where the codec remains powered, and a full suspend, where the voltage
+ * domain the codec is connected to is teared down and/or any other hardware
+ * reset condition is asserted.
+ *
+ * The codec's own power saving features are enabled in the suspend callback,
+ * and all registers are written back to the hardware when resuming.
+ */
+
+static int cs4270_i2c_suspend(struct i2c_client *client, pm_message_t mesg)
+{
+	struct cs4270_private *cs4270 = i2c_get_clientdata(client);
+	struct snd_soc_codec *codec = &cs4270->codec;
+	int reg = snd_soc_read(codec, CS4270_PWRCTL) | CS4270_PWRCTL_PDN_ALL;
+
+	return snd_soc_write(codec, CS4270_PWRCTL, reg);
+}
+
+static int cs4270_i2c_resume(struct i2c_client *client)
+{
+	struct cs4270_private *cs4270 = i2c_get_clientdata(client);
+	struct snd_soc_codec *codec = &cs4270->codec;
+	int reg;
+
+	/* In case the device was put to hard reset during sleep, we need to
+	 * wait 500ns here before any I2C communication. */
+	ndelay(500);
+
+	/* first restore the entire register cache ... */
+	for (reg = CS4270_FIRSTREG; reg <= CS4270_LASTREG; reg++) {
+		u8 val = snd_soc_read(codec, reg);
+
+		if (i2c_smbus_write_byte_data(client, reg, val)) {
+			dev_err(codec->dev, "i2c write failed\n");
+			return -EIO;
+		}
+	}
+
+	/* ... then disable the power-down bits */
+	reg = snd_soc_read(codec, CS4270_PWRCTL);
+	reg &= ~CS4270_PWRCTL_PDN_ALL;
+
+	return snd_soc_write(codec, CS4270_PWRCTL, reg);
+}
+#else
+#define cs4270_i2c_suspend	NULL
+#define cs4270_i2c_resume	NULL
+#endif /* CONFIG_PM */
+
 /*
  * cs4270_i2c_driver - I2C device identification
  *
@@ -767,6 +856,8 @@
 	.id_table = cs4270_id,
 	.probe = cs4270_i2c_probe,
 	.remove = cs4270_i2c_remove,
+	.suspend = cs4270_i2c_suspend,
+	.resume = cs4270_i2c_resume,
 };
 
 /*
diff --git a/sound/soc/codecs/spdif_transciever.c b/sound/soc/codecs/spdif_transciever.c
new file mode 100644
index 0000000..218b33a
--- /dev/null
+++ b/sound/soc/codecs/spdif_transciever.c
@@ -0,0 +1,71 @@
+/*
+ * ALSA SoC SPDIF DIT driver
+ *
+ *  This driver is used by controllers which can operate in DIT (SPDI/F) where
+ *  no codec is needed.  This file provides stub codec that can be used
+ *  in these configurations. TI DaVinci Audio controller uses this driver.
+ *
+ * Author:      Steve Chen,  <schen@mvista.com>
+ * Copyright:   (C) 2009 MontaVista Software, Inc., <source@mvista.com>
+ * Copyright:   (C) 2009  Texas Instruments, India
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <sound/soc.h>
+#include <sound/pcm.h>
+
+#include "spdif_transciever.h"
+
+#define STUB_RATES	SNDRV_PCM_RATE_8000_96000
+#define STUB_FORMATS	SNDRV_PCM_FMTBIT_S16_LE
+
+struct snd_soc_dai dit_stub_dai = {
+	.name		= "DIT",
+	.playback 	= {
+		.stream_name	= "Playback",
+		.channels_min	= 1,
+		.channels_max	= 384,
+		.rates		= STUB_RATES,
+		.formats	= STUB_FORMATS,
+	},
+};
+
+static int spdif_dit_probe(struct platform_device *pdev)
+{
+	dit_stub_dai.dev = &pdev->dev;
+	return snd_soc_register_dai(&dit_stub_dai);
+}
+
+static int spdif_dit_remove(struct platform_device *pdev)
+{
+	snd_soc_unregister_dai(&dit_stub_dai);
+	return 0;
+}
+
+static struct platform_driver spdif_dit_driver = {
+	.probe		= spdif_dit_probe,
+	.remove		= spdif_dit_remove,
+	.driver		= {
+		.name	= "spdif-dit",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init dit_modinit(void)
+{
+	return platform_driver_register(&spdif_dit_driver);
+}
+
+static void __exit dit_exit(void)
+{
+	platform_driver_unregister(&spdif_dit_driver);
+}
+
+module_init(dit_modinit);
+module_exit(dit_exit);
+
diff --git a/sound/soc/codecs/spdif_transciever.h b/sound/soc/codecs/spdif_transciever.h
new file mode 100644
index 0000000..296f2eb
--- /dev/null
+++ b/sound/soc/codecs/spdif_transciever.h
@@ -0,0 +1,17 @@
+/*
+ * ALSA SoC DIT/DIR driver header
+ *
+ * Author:      Steve Chen,  <schen@mvista.com>
+ * Copyright:   (C) 2008 MontaVista Software, Inc., <source@mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef CODEC_STUBS_H
+#define CODEC_STUBS_H
+
+extern struct snd_soc_dai dit_stub_dai;
+
+#endif /* CODEC_STUBS_H */
diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c
index 87f606c7..1fc4c8e 100644
--- a/sound/soc/codecs/ssm2602.c
+++ b/sound/soc/codecs/ssm2602.c
@@ -336,15 +336,17 @@
 			master_runtime->sample_bits,
 			master_runtime->rate);
 
-		snd_pcm_hw_constraint_minmax(substream->runtime,
-					     SNDRV_PCM_HW_PARAM_RATE,
-					     master_runtime->rate,
-					     master_runtime->rate);
+		if (master_runtime->rate != 0)
+			snd_pcm_hw_constraint_minmax(substream->runtime,
+						     SNDRV_PCM_HW_PARAM_RATE,
+						     master_runtime->rate,
+						     master_runtime->rate);
 
-		snd_pcm_hw_constraint_minmax(substream->runtime,
-					     SNDRV_PCM_HW_PARAM_SAMPLE_BITS,
-					     master_runtime->sample_bits,
-					     master_runtime->sample_bits);
+		if (master_runtime->sample_bits != 0)
+			snd_pcm_hw_constraint_minmax(substream->runtime,
+						     SNDRV_PCM_HW_PARAM_SAMPLE_BITS,
+						     master_runtime->sample_bits,
+						     master_runtime->sample_bits);
 
 		ssm2602->slave_substream = substream;
 	} else
@@ -372,6 +374,11 @@
 	struct snd_soc_device *socdev = rtd->socdev;
 	struct snd_soc_codec *codec = socdev->card->codec;
 	struct ssm2602_priv *ssm2602 = codec->private_data;
+
+	if (ssm2602->master_substream == substream)
+		ssm2602->master_substream = ssm2602->slave_substream;
+
+	ssm2602->slave_substream = NULL;
 	/* deactivate */
 	if (!codec->active)
 		ssm2602_write(codec, SSM2602_ACTIVE, 0);
@@ -497,11 +504,9 @@
 	return 0;
 }
 
-#define SSM2602_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_11025 |\
-		SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_22050 |\
-		SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 |\
-		SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 |\
-		SNDRV_PCM_RATE_96000)
+#define SSM2602_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_32000 |\
+		SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000 |\
+		SNDRV_PCM_RATE_88200 | SNDRV_PCM_RATE_96000)
 
 #define SSM2602_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE |\
 		SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE)
diff --git a/sound/soc/codecs/stac9766.c b/sound/soc/codecs/stac9766.c
new file mode 100644
index 0000000..8ad4b7b
--- /dev/null
+++ b/sound/soc/codecs/stac9766.c
@@ -0,0 +1,463 @@
+/*
+ * stac9766.c  --  ALSA SoC STAC9766 codec support
+ *
+ * Copyright 2009 Jon Smirl, Digispeaker
+ * Author: Jon Smirl <jonsmirl@gmail.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ *  Features:-
+ *
+ *   o Support for AC97 Codec, S/PDIF
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/ac97_codec.h>
+#include <sound/initval.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/tlv.h>
+#include <sound/soc-of-simple.h>
+
+#include "stac9766.h"
+
+#define STAC9766_VERSION "0.10"
+
+/*
+ * STAC9766 register cache
+ */
+static const u16 stac9766_reg[] = {
+	0x6A90, 0x8000, 0x8000, 0x8000, /* 6 */
+	0x0000, 0x0000, 0x8008, 0x8008, /* e */
+	0x8808, 0x8808, 0x8808, 0x8808, /* 16 */
+	0x8808, 0x0000, 0x8000, 0x0000, /* 1e */
+	0x0000, 0x0000, 0x0000, 0x000f, /* 26 */
+	0x0a05, 0x0400, 0xbb80, 0x0000, /* 2e */
+	0x0000, 0xbb80, 0x0000, 0x0000, /* 36 */
+	0x0000, 0x2000, 0x0000, 0x0100, /* 3e */
+	0x0000, 0x0000, 0x0080, 0x0000, /* 46 */
+	0x0000, 0x0000, 0x0003, 0xffff, /* 4e */
+	0x0000, 0x0000, 0x0000, 0x0000, /* 56 */
+	0x4000, 0x0000, 0x0000, 0x0000, /* 5e */
+	0x1201, 0xFFFF, 0xFFFF, 0x0000, /* 66 */
+	0x0000, 0x0000, 0x0000, 0x0000, /* 6e */
+	0x0000, 0x0000, 0x0000, 0x0006, /* 76 */
+	0x0000, 0x0000, 0x0000, 0x0000, /* 7e */
+};
+
+static const char *stac9766_record_mux[] = {"Mic", "CD", "Video", "AUX",
+			"Line", "Stereo Mix", "Mono Mix", "Phone"};
+static const char *stac9766_mono_mux[] = {"Mix", "Mic"};
+static const char *stac9766_mic_mux[] = {"Mic1", "Mic2"};
+static const char *stac9766_SPDIF_mux[] = {"PCM", "ADC Record"};
+static const char *stac9766_popbypass_mux[] = {"Normal", "Bypass Mixer"};
+static const char *stac9766_record_all_mux[] = {"All analog",
+	"Analog plus DAC"};
+static const char *stac9766_boost1[] = {"0dB", "10dB"};
+static const char *stac9766_boost2[] = {"0dB", "20dB"};
+static const char *stac9766_stereo_mic[] = {"Off", "On"};
+
+static const struct soc_enum stac9766_record_enum =
+	SOC_ENUM_DOUBLE(AC97_REC_SEL, 8, 0, 8, stac9766_record_mux);
+static const struct soc_enum stac9766_mono_enum =
+	SOC_ENUM_SINGLE(AC97_GENERAL_PURPOSE, 9, 2, stac9766_mono_mux);
+static const struct soc_enum stac9766_mic_enum =
+	SOC_ENUM_SINGLE(AC97_GENERAL_PURPOSE, 8, 2, stac9766_mic_mux);
+static const struct soc_enum stac9766_SPDIF_enum =
+	SOC_ENUM_SINGLE(AC97_STAC_DA_CONTROL, 1, 2, stac9766_SPDIF_mux);
+static const struct soc_enum stac9766_popbypass_enum =
+	SOC_ENUM_SINGLE(AC97_GENERAL_PURPOSE, 15, 2, stac9766_popbypass_mux);
+static const struct soc_enum stac9766_record_all_enum =
+	SOC_ENUM_SINGLE(AC97_STAC_ANALOG_SPECIAL, 12, 2,
+			stac9766_record_all_mux);
+static const struct soc_enum stac9766_boost1_enum =
+	SOC_ENUM_SINGLE(AC97_MIC, 6, 2, stac9766_boost1); /* 0/10dB */
+static const struct soc_enum stac9766_boost2_enum =
+	SOC_ENUM_SINGLE(AC97_STAC_ANALOG_SPECIAL, 2, 2, stac9766_boost2); /* 0/20dB */
+static const struct soc_enum stac9766_stereo_mic_enum =
+	SOC_ENUM_SINGLE(AC97_STAC_STEREO_MIC, 2, 1, stac9766_stereo_mic);
+
+static const DECLARE_TLV_DB_LINEAR(master_tlv, -4600, 0);
+static const DECLARE_TLV_DB_LINEAR(record_tlv, 0, 2250);
+static const DECLARE_TLV_DB_LINEAR(beep_tlv, -4500, 0);
+static const DECLARE_TLV_DB_LINEAR(mix_tlv, -3450, 1200);
+
+static const struct snd_kcontrol_new stac9766_snd_ac97_controls[] = {
+	SOC_DOUBLE_TLV("Speaker Volume", AC97_MASTER, 8, 0, 31, 1, master_tlv),
+	SOC_SINGLE("Speaker Switch", AC97_MASTER, 15, 1, 1),
+	SOC_DOUBLE_TLV("Headphone Volume", AC97_HEADPHONE, 8, 0, 31, 1,
+		       master_tlv),
+	SOC_SINGLE("Headphone Switch", AC97_HEADPHONE, 15, 1, 1),
+	SOC_SINGLE_TLV("Mono Out Volume", AC97_MASTER_MONO, 0, 31, 1,
+		       master_tlv),
+	SOC_SINGLE("Mono Out Switch", AC97_MASTER_MONO, 15, 1, 1),
+
+	SOC_DOUBLE_TLV("Record Volume", AC97_REC_GAIN, 8, 0, 15, 0, record_tlv),
+	SOC_SINGLE("Record Switch", AC97_REC_GAIN, 15, 1, 1),
+
+
+	SOC_SINGLE_TLV("Beep Volume", AC97_PC_BEEP, 1, 15, 1, beep_tlv),
+	SOC_SINGLE("Beep Switch", AC97_PC_BEEP, 15, 1, 1),
+	SOC_SINGLE("Beep Frequency", AC97_PC_BEEP, 5, 127, 1),
+	SOC_SINGLE_TLV("Phone Volume", AC97_PHONE, 0, 31, 1, mix_tlv),
+	SOC_SINGLE("Phone Switch", AC97_PHONE, 15, 1, 1),
+
+	SOC_ENUM("Mic Boost1", stac9766_boost1_enum),
+	SOC_ENUM("Mic Boost2", stac9766_boost2_enum),
+	SOC_SINGLE_TLV("Mic Volume", AC97_MIC, 0, 31, 1, mix_tlv),
+	SOC_SINGLE("Mic Switch", AC97_MIC, 15, 1, 1),
+	SOC_ENUM("Stereo Mic", stac9766_stereo_mic_enum),
+
+	SOC_DOUBLE_TLV("Line Volume", AC97_LINE, 8, 0, 31, 1, mix_tlv),
+	SOC_SINGLE("Line Switch", AC97_LINE, 15, 1, 1),
+	SOC_DOUBLE_TLV("CD Volume", AC97_CD, 8, 0, 31, 1, mix_tlv),
+	SOC_SINGLE("CD Switch", AC97_CD, 15, 1, 1),
+	SOC_DOUBLE_TLV("AUX Volume", AC97_AUX, 8, 0, 31, 1, mix_tlv),
+	SOC_SINGLE("AUX Switch", AC97_AUX, 15, 1, 1),
+	SOC_DOUBLE_TLV("Video Volume", AC97_VIDEO, 8, 0, 31, 1, mix_tlv),
+	SOC_SINGLE("Video Switch", AC97_VIDEO, 15, 1, 1),
+
+	SOC_DOUBLE_TLV("DAC Volume", AC97_PCM, 8, 0, 31, 1, mix_tlv),
+	SOC_SINGLE("DAC Switch", AC97_PCM, 15, 1, 1),
+	SOC_SINGLE("Loopback Test Switch", AC97_GENERAL_PURPOSE, 7, 1, 0),
+	SOC_SINGLE("3D Volume", AC97_3D_CONTROL, 3, 2, 1),
+	SOC_SINGLE("3D Switch", AC97_GENERAL_PURPOSE, 13, 1, 0),
+
+	SOC_ENUM("SPDIF Mux", stac9766_SPDIF_enum),
+	SOC_ENUM("Mic1/2 Mux", stac9766_mic_enum),
+	SOC_ENUM("Record All Mux", stac9766_record_all_enum),
+	SOC_ENUM("Record Mux", stac9766_record_enum),
+	SOC_ENUM("Mono Mux", stac9766_mono_enum),
+	SOC_ENUM("Pop Bypass Mux", stac9766_popbypass_enum),
+};
+
+static int stac9766_ac97_write(struct snd_soc_codec *codec, unsigned int reg,
+			       unsigned int val)
+{
+	u16 *cache = codec->reg_cache;
+
+	if (reg > AC97_STAC_PAGE0) {
+		stac9766_ac97_write(codec, AC97_INT_PAGING, 0);
+		soc_ac97_ops.write(codec->ac97, reg, val);
+		stac9766_ac97_write(codec, AC97_INT_PAGING, 1);
+		return 0;
+	}
+	if (reg / 2 > ARRAY_SIZE(stac9766_reg))
+		return -EIO;
+
+	soc_ac97_ops.write(codec->ac97, reg, val);
+	cache[reg / 2] = val;
+	return 0;
+}
+
+static unsigned int stac9766_ac97_read(struct snd_soc_codec *codec,
+				       unsigned int reg)
+{
+	u16 val = 0, *cache = codec->reg_cache;
+
+	if (reg > AC97_STAC_PAGE0) {
+		stac9766_ac97_write(codec, AC97_INT_PAGING, 0);
+		val = soc_ac97_ops.read(codec->ac97, reg - AC97_STAC_PAGE0);
+		stac9766_ac97_write(codec, AC97_INT_PAGING, 1);
+		return val;
+	}
+	if (reg / 2 > ARRAY_SIZE(stac9766_reg))
+		return -EIO;
+
+	if (reg == AC97_RESET || reg == AC97_GPIO_STATUS ||
+		reg == AC97_INT_PAGING || reg == AC97_VENDOR_ID1 ||
+		reg == AC97_VENDOR_ID2) {
+
+		val = soc_ac97_ops.read(codec->ac97, reg);
+		return val;
+	}
+	return cache[reg / 2];
+}
+
+static int ac97_analog_prepare(struct snd_pcm_substream *substream,
+			       struct snd_soc_dai *dai)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	unsigned short reg, vra;
+
+	vra = stac9766_ac97_read(codec, AC97_EXTENDED_STATUS);
+
+	vra |= 0x1; /* enable variable rate audio */
+
+	stac9766_ac97_write(codec, AC97_EXTENDED_STATUS, vra);
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		reg = AC97_PCM_FRONT_DAC_RATE;
+	else
+		reg = AC97_PCM_LR_ADC_RATE;
+
+	return stac9766_ac97_write(codec, reg, runtime->rate);
+}
+
+static int ac97_digital_prepare(struct snd_pcm_substream *substream,
+				struct snd_soc_dai *dai)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	unsigned short reg, vra;
+
+	stac9766_ac97_write(codec, AC97_SPDIF, 0x2002);
+
+	vra = stac9766_ac97_read(codec, AC97_EXTENDED_STATUS);
+	vra |= 0x5; /* Enable VRA and SPDIF out */
+
+	stac9766_ac97_write(codec, AC97_EXTENDED_STATUS, vra);
+
+	reg = AC97_PCM_FRONT_DAC_RATE;
+
+	return stac9766_ac97_write(codec, reg, runtime->rate);
+}
+
+static int ac97_digital_trigger(struct snd_pcm_substream *substream,
+				int cmd, struct snd_soc_dai *dai)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	unsigned short vra;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_STOP:
+		vra = stac9766_ac97_read(codec, AC97_EXTENDED_STATUS);
+		vra &= !0x04;
+		stac9766_ac97_write(codec, AC97_EXTENDED_STATUS, vra);
+		break;
+	}
+	return 0;
+}
+
+static int stac9766_set_bias_level(struct snd_soc_codec *codec,
+				   enum snd_soc_bias_level level)
+{
+	switch (level) {
+	case SND_SOC_BIAS_ON: /* full On */
+	case SND_SOC_BIAS_PREPARE: /* partial On */
+	case SND_SOC_BIAS_STANDBY: /* Off, with power */
+		stac9766_ac97_write(codec, AC97_POWERDOWN, 0x0000);
+		break;
+	case SND_SOC_BIAS_OFF: /* Off, without power */
+		/* disable everything including AC link */
+		stac9766_ac97_write(codec, AC97_POWERDOWN, 0xffff);
+		break;
+	}
+	codec->bias_level = level;
+	return 0;
+}
+
+static int stac9766_reset(struct snd_soc_codec *codec, int try_warm)
+{
+	if (try_warm && soc_ac97_ops.warm_reset) {
+		soc_ac97_ops.warm_reset(codec->ac97);
+		if (stac9766_ac97_read(codec, 0) == stac9766_reg[0])
+			return 1;
+	}
+
+	soc_ac97_ops.reset(codec->ac97);
+	if (soc_ac97_ops.warm_reset)
+		soc_ac97_ops.warm_reset(codec->ac97);
+	if (stac9766_ac97_read(codec, 0) != stac9766_reg[0])
+		return -EIO;
+	return 0;
+}
+
+static int stac9766_codec_suspend(struct platform_device *pdev,
+				  pm_message_t state)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+
+	stac9766_set_bias_level(codec, SND_SOC_BIAS_OFF);
+	return 0;
+}
+
+static int stac9766_codec_resume(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+	u16 id, reset;
+
+	reset = 0;
+	/* give the codec an AC97 warm reset to start the link */
+reset:
+	if (reset > 5) {
+		printk(KERN_ERR "stac9766 failed to resume");
+		return -EIO;
+	}
+	codec->ac97->bus->ops->warm_reset(codec->ac97);
+	id = soc_ac97_ops.read(codec->ac97, AC97_VENDOR_ID2);
+	if (id != 0x4c13) {
+		stac9766_reset(codec, 0);
+		reset++;
+		goto reset;
+	}
+	stac9766_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	if (codec->suspend_bias_level == SND_SOC_BIAS_ON)
+		stac9766_set_bias_level(codec, SND_SOC_BIAS_ON);
+
+	return 0;
+}
+
+static struct snd_soc_dai_ops stac9766_dai_ops_analog = {
+	.prepare = ac97_analog_prepare,
+};
+
+static struct snd_soc_dai_ops stac9766_dai_ops_digital = {
+	.prepare = ac97_digital_prepare,
+	.trigger = ac97_digital_trigger,
+};
+
+struct snd_soc_dai stac9766_dai[] = {
+{
+	.name = "stac9766 analog",
+	.id = 0,
+	.ac97_control = 1,
+
+	/* stream cababilities */
+	.playback = {
+		.stream_name = "stac9766 analog",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = SNDRV_PCM_RATE_8000_48000,
+		.formats = SND_SOC_STD_AC97_FMTS,
+	},
+	.capture = {
+		.stream_name = "stac9766 analog",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = SNDRV_PCM_RATE_8000_48000,
+		.formats = SND_SOC_STD_AC97_FMTS,
+	},
+	/* alsa ops */
+	.ops = &stac9766_dai_ops_analog,
+},
+{
+	.name = "stac9766 IEC958",
+	.id = 1,
+	.ac97_control = 1,
+
+	/* stream cababilities */
+	.playback = {
+		.stream_name = "stac9766 IEC958",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = SNDRV_PCM_RATE_32000 | \
+			SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000,
+		.formats = SNDRV_PCM_FORMAT_IEC958_SUBFRAME_BE,
+	},
+	/* alsa ops */
+	.ops = &stac9766_dai_ops_digital,
+}
+};
+EXPORT_SYMBOL_GPL(stac9766_dai);
+
+static int stac9766_codec_probe(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec;
+	int ret = 0;
+
+	printk(KERN_INFO "STAC9766 SoC Audio Codec %s\n", STAC9766_VERSION);
+
+	socdev->card->codec = kzalloc(sizeof(struct snd_soc_codec), GFP_KERNEL);
+	if (socdev->card->codec == NULL)
+		return -ENOMEM;
+	codec = socdev->card->codec;
+	mutex_init(&codec->mutex);
+
+	codec->reg_cache = kmemdup(stac9766_reg, sizeof(stac9766_reg),
+				   GFP_KERNEL);
+	if (codec->reg_cache == NULL) {
+		ret = -ENOMEM;
+		goto cache_err;
+	}
+	codec->reg_cache_size = sizeof(stac9766_reg);
+	codec->reg_cache_step = 2;
+
+	codec->name = "STAC9766";
+	codec->owner = THIS_MODULE;
+	codec->dai = stac9766_dai;
+	codec->num_dai = ARRAY_SIZE(stac9766_dai);
+	codec->write = stac9766_ac97_write;
+	codec->read = stac9766_ac97_read;
+	codec->set_bias_level = stac9766_set_bias_level;
+	INIT_LIST_HEAD(&codec->dapm_widgets);
+	INIT_LIST_HEAD(&codec->dapm_paths);
+
+	ret = snd_soc_new_ac97_codec(codec, &soc_ac97_ops, 0);
+	if (ret < 0)
+		goto codec_err;
+
+	/* register pcms */
+	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
+	if (ret < 0)
+		goto pcm_err;
+
+	/* do a cold reset for the controller and then try
+	 * a warm reset followed by an optional cold reset for codec */
+	stac9766_reset(codec, 0);
+	ret = stac9766_reset(codec, 1);
+	if (ret < 0) {
+		printk(KERN_ERR "Failed to reset STAC9766: AC97 link error\n");
+		goto reset_err;
+	}
+
+	stac9766_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	snd_soc_add_controls(codec, stac9766_snd_ac97_controls,
+			     ARRAY_SIZE(stac9766_snd_ac97_controls));
+
+	ret = snd_soc_init_card(socdev);
+	if (ret < 0)
+		goto reset_err;
+	return 0;
+
+reset_err:
+	snd_soc_free_pcms(socdev);
+pcm_err:
+	snd_soc_free_ac97_codec(codec);
+codec_err:
+	kfree(codec->private_data);
+cache_err:
+	kfree(socdev->card->codec);
+	socdev->card->codec = NULL;
+	return ret;
+}
+
+static int stac9766_codec_remove(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+
+	if (codec == NULL)
+		return 0;
+
+	snd_soc_free_pcms(socdev);
+	snd_soc_free_ac97_codec(codec);
+	kfree(codec->reg_cache);
+	kfree(codec);
+	return 0;
+}
+
+struct snd_soc_codec_device soc_codec_dev_stac9766 = {
+	.probe = stac9766_codec_probe,
+	.remove = stac9766_codec_remove,
+	.suspend = stac9766_codec_suspend,
+	.resume = stac9766_codec_resume,
+};
+EXPORT_SYMBOL_GPL(soc_codec_dev_stac9766);
+
+MODULE_DESCRIPTION("ASoC stac9766 driver");
+MODULE_AUTHOR("Jon Smirl <jonsmirl@gmail.com>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/stac9766.h b/sound/soc/codecs/stac9766.h
new file mode 100644
index 0000000..65642eb
--- /dev/null
+++ b/sound/soc/codecs/stac9766.h
@@ -0,0 +1,21 @@
+/*
+ * stac9766.h  --  STAC9766 Soc Audio driver
+ */
+
+#ifndef _STAC9766_H
+#define _STAC9766_H
+
+#define AC97_STAC_PAGE0 0x1000
+#define AC97_STAC_DA_CONTROL (AC97_STAC_PAGE0 | 0x6A)
+#define AC97_STAC_ANALOG_SPECIAL (AC97_STAC_PAGE0 | 0x6E)
+#define AC97_STAC_STEREO_MIC 0x78
+
+/* STAC9766 DAI ID's */
+#define STAC9766_DAI_AC97_ANALOG		0
+#define STAC9766_DAI_AC97_DIGITAL		1
+
+extern struct snd_soc_dai stac9766_dai[];
+extern struct snd_soc_codec_device soc_codec_dev_stac9766;
+
+
+#endif
diff --git a/sound/soc/codecs/tlv320aic23.c b/sound/soc/codecs/tlv320aic23.c
index c3f4afb..0b8dcb5 100644
--- a/sound/soc/codecs/tlv320aic23.c
+++ b/sound/soc/codecs/tlv320aic23.c
@@ -86,7 +86,7 @@
 	 */
 
 	if ((reg < 0 || reg > 9) && (reg != 15)) {
-		printk(KERN_WARNING "%s Invalid register R%d\n", __func__, reg);
+		printk(KERN_WARNING "%s Invalid register R%u\n", __func__, reg);
 		return -1;
 	}
 
@@ -98,7 +98,7 @@
 	if (codec->hw_write(codec->control_data, data, 2) == 2)
 		return 0;
 
-	printk(KERN_ERR "%s cannot write %03x to register R%d\n", __func__,
+	printk(KERN_ERR "%s cannot write %03x to register R%u\n", __func__,
 	       value, reg);
 
 	return -EIO;
@@ -273,14 +273,14 @@
  * Every divisor is a factor of 11*12
  */
 #define SR_MULT (11*12)
-#define A(x) (x) ? (SR_MULT/x) : 0
+#define A(x) (SR_MULT/x)
 static const unsigned char sr_adc_mult_table[] = {
-	A(2), A(2), A(12), A(12),  A(0), A(0), A(3), A(1),
-	A(2), A(2), A(11), A(11),  A(0), A(0), A(0), A(1)
+	A(2), A(2), A(12), A(12),  0, 0, A(3), A(1),
+	A(2), A(2), A(11), A(11),  0, 0, 0, A(1)
 };
 static const unsigned char sr_dac_mult_table[] = {
-	A(2), A(12), A(2), A(12),  A(0), A(0), A(3), A(1),
-	A(2), A(11), A(2), A(11),  A(0), A(0), A(0), A(1)
+	A(2), A(12), A(2), A(12),  0, 0, A(3), A(1),
+	A(2), A(11), A(2), A(11),  0, 0, 0, A(1)
 };
 
 static unsigned get_score(int adc, int adc_l, int adc_h, int need_adc,
@@ -523,6 +523,8 @@
 	case SND_SOC_DAIFMT_I2S:
 		iface_reg |= TLV320AIC23_FOR_I2S;
 		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		iface_reg |= TLV320AIC23_LRP_ON;
 	case SND_SOC_DAIFMT_DSP_B:
 		iface_reg |= TLV320AIC23_FOR_DSP;
 		break;
diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index df7c8c2..4dbb853 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -115,6 +115,7 @@
 	0x00, /* REG_VIBRA_PWM_SET	(0x47)	*/
 	0x00, /* REG_ANAMIC_GAIN	(0x48)	*/
 	0x00, /* REG_MISC_SET_2		(0x49)	*/
+	0x00, /* REG_SW_SHADOW		(0x4A)	- Shadow, non HW register */
 };
 
 /* codec private data */
@@ -125,6 +126,17 @@
 
 	struct snd_pcm_substream *master_substream;
 	struct snd_pcm_substream *slave_substream;
+
+	unsigned int configured;
+	unsigned int rate;
+	unsigned int sample_bits;
+	unsigned int channels;
+
+	unsigned int sysclk;
+
+	/* Headset output state handling */
+	unsigned int hsl_enabled;
+	unsigned int hsr_enabled;
 };
 
 /*
@@ -161,7 +173,11 @@
 			unsigned int reg, unsigned int value)
 {
 	twl4030_write_reg_cache(codec, reg, value);
-	return twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE, value, reg);
+	if (likely(reg < TWL4030_REG_SW_SHADOW))
+		return twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE, value,
+					    reg);
+	else
+		return 0;
 }
 
 static void twl4030_codec_enable(struct snd_soc_codec *codec, int enable)
@@ -188,6 +204,7 @@
 
 static void twl4030_init_chip(struct snd_soc_codec *codec)
 {
+	u8 *cache = codec->reg_cache;
 	int i;
 
 	/* clear CODECPDZ prior to setting register defaults */
@@ -195,7 +212,7 @@
 
 	/* set all audio section registers to reasonable defaults */
 	for (i = TWL4030_REG_OPTION; i <= TWL4030_REG_MISC_SET_2; i++)
-		twl4030_write(codec, i,	twl4030_reg[i]);
+		twl4030_write(codec, i,	cache[i]);
 
 }
 
@@ -232,7 +249,7 @@
 					TWL4030_REG_PRECKL_CTL);
 		reg_val = twl4030_read_reg_cache(codec, TWL4030_REG_PRECKR_CTL);
 		twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE,
-					reg_val & (~TWL4030_PRECKL_GAIN),
+					reg_val & (~TWL4030_PRECKR_GAIN),
 					TWL4030_REG_PRECKR_CTL);
 
 		/* Disable PLL */
@@ -316,104 +333,60 @@
 }
 
 /* Earpiece */
-static const char *twl4030_earpiece_texts[] =
-		{"Off", "DACL1", "DACL2", "DACR1"};
-
-static const unsigned int twl4030_earpiece_values[] =
-		{0x0, 0x1, 0x2, 0x4};
-
-static const struct soc_enum twl4030_earpiece_enum =
-	SOC_VALUE_ENUM_SINGLE(TWL4030_REG_EAR_CTL, 1, 0x7,
-			ARRAY_SIZE(twl4030_earpiece_texts),
-			twl4030_earpiece_texts,
-			twl4030_earpiece_values);
-
-static const struct snd_kcontrol_new twl4030_dapm_earpiece_control =
-SOC_DAPM_VALUE_ENUM("Route", twl4030_earpiece_enum);
+static const struct snd_kcontrol_new twl4030_dapm_earpiece_controls[] = {
+	SOC_DAPM_SINGLE("Voice", TWL4030_REG_EAR_CTL, 0, 1, 0),
+	SOC_DAPM_SINGLE("AudioL1", TWL4030_REG_EAR_CTL, 1, 1, 0),
+	SOC_DAPM_SINGLE("AudioL2", TWL4030_REG_EAR_CTL, 2, 1, 0),
+	SOC_DAPM_SINGLE("AudioR1", TWL4030_REG_EAR_CTL, 3, 1, 0),
+};
 
 /* PreDrive Left */
-static const char *twl4030_predrivel_texts[] =
-		{"Off", "DACL1", "DACL2", "DACR2"};
-
-static const unsigned int twl4030_predrivel_values[] =
-		{0x0, 0x1, 0x2, 0x4};
-
-static const struct soc_enum twl4030_predrivel_enum =
-	SOC_VALUE_ENUM_SINGLE(TWL4030_REG_PREDL_CTL, 1, 0x7,
-			ARRAY_SIZE(twl4030_predrivel_texts),
-			twl4030_predrivel_texts,
-			twl4030_predrivel_values);
-
-static const struct snd_kcontrol_new twl4030_dapm_predrivel_control =
-SOC_DAPM_VALUE_ENUM("Route", twl4030_predrivel_enum);
+static const struct snd_kcontrol_new twl4030_dapm_predrivel_controls[] = {
+	SOC_DAPM_SINGLE("Voice", TWL4030_REG_PREDL_CTL, 0, 1, 0),
+	SOC_DAPM_SINGLE("AudioL1", TWL4030_REG_PREDL_CTL, 1, 1, 0),
+	SOC_DAPM_SINGLE("AudioL2", TWL4030_REG_PREDL_CTL, 2, 1, 0),
+	SOC_DAPM_SINGLE("AudioR2", TWL4030_REG_PREDL_CTL, 3, 1, 0),
+};
 
 /* PreDrive Right */
-static const char *twl4030_predriver_texts[] =
-		{"Off", "DACR1", "DACR2", "DACL2"};
-
-static const unsigned int twl4030_predriver_values[] =
-		{0x0, 0x1, 0x2, 0x4};
-
-static const struct soc_enum twl4030_predriver_enum =
-	SOC_VALUE_ENUM_SINGLE(TWL4030_REG_PREDR_CTL, 1, 0x7,
-			ARRAY_SIZE(twl4030_predriver_texts),
-			twl4030_predriver_texts,
-			twl4030_predriver_values);
-
-static const struct snd_kcontrol_new twl4030_dapm_predriver_control =
-SOC_DAPM_VALUE_ENUM("Route", twl4030_predriver_enum);
+static const struct snd_kcontrol_new twl4030_dapm_predriver_controls[] = {
+	SOC_DAPM_SINGLE("Voice", TWL4030_REG_PREDR_CTL, 0, 1, 0),
+	SOC_DAPM_SINGLE("AudioR1", TWL4030_REG_PREDR_CTL, 1, 1, 0),
+	SOC_DAPM_SINGLE("AudioR2", TWL4030_REG_PREDR_CTL, 2, 1, 0),
+	SOC_DAPM_SINGLE("AudioL2", TWL4030_REG_PREDR_CTL, 3, 1, 0),
+};
 
 /* Headset Left */
-static const char *twl4030_hsol_texts[] =
-		{"Off", "DACL1", "DACL2"};
-
-static const struct soc_enum twl4030_hsol_enum =
-	SOC_ENUM_SINGLE(TWL4030_REG_HS_SEL, 1,
-			ARRAY_SIZE(twl4030_hsol_texts),
-			twl4030_hsol_texts);
-
-static const struct snd_kcontrol_new twl4030_dapm_hsol_control =
-SOC_DAPM_ENUM("Route", twl4030_hsol_enum);
+static const struct snd_kcontrol_new twl4030_dapm_hsol_controls[] = {
+	SOC_DAPM_SINGLE("Voice", TWL4030_REG_HS_SEL, 0, 1, 0),
+	SOC_DAPM_SINGLE("AudioL1", TWL4030_REG_HS_SEL, 1, 1, 0),
+	SOC_DAPM_SINGLE("AudioL2", TWL4030_REG_HS_SEL, 2, 1, 0),
+};
 
 /* Headset Right */
-static const char *twl4030_hsor_texts[] =
-		{"Off", "DACR1", "DACR2"};
-
-static const struct soc_enum twl4030_hsor_enum =
-	SOC_ENUM_SINGLE(TWL4030_REG_HS_SEL, 4,
-			ARRAY_SIZE(twl4030_hsor_texts),
-			twl4030_hsor_texts);
-
-static const struct snd_kcontrol_new twl4030_dapm_hsor_control =
-SOC_DAPM_ENUM("Route", twl4030_hsor_enum);
+static const struct snd_kcontrol_new twl4030_dapm_hsor_controls[] = {
+	SOC_DAPM_SINGLE("Voice", TWL4030_REG_HS_SEL, 3, 1, 0),
+	SOC_DAPM_SINGLE("AudioR1", TWL4030_REG_HS_SEL, 4, 1, 0),
+	SOC_DAPM_SINGLE("AudioR2", TWL4030_REG_HS_SEL, 5, 1, 0),
+};
 
 /* Carkit Left */
-static const char *twl4030_carkitl_texts[] =
-		{"Off", "DACL1", "DACL2"};
-
-static const struct soc_enum twl4030_carkitl_enum =
-	SOC_ENUM_SINGLE(TWL4030_REG_PRECKL_CTL, 1,
-			ARRAY_SIZE(twl4030_carkitl_texts),
-			twl4030_carkitl_texts);
-
-static const struct snd_kcontrol_new twl4030_dapm_carkitl_control =
-SOC_DAPM_ENUM("Route", twl4030_carkitl_enum);
+static const struct snd_kcontrol_new twl4030_dapm_carkitl_controls[] = {
+	SOC_DAPM_SINGLE("Voice", TWL4030_REG_PRECKL_CTL, 0, 1, 0),
+	SOC_DAPM_SINGLE("AudioL1", TWL4030_REG_PRECKL_CTL, 1, 1, 0),
+	SOC_DAPM_SINGLE("AudioL2", TWL4030_REG_PRECKL_CTL, 2, 1, 0),
+};
 
 /* Carkit Right */
-static const char *twl4030_carkitr_texts[] =
-		{"Off", "DACR1", "DACR2"};
-
-static const struct soc_enum twl4030_carkitr_enum =
-	SOC_ENUM_SINGLE(TWL4030_REG_PRECKR_CTL, 1,
-			ARRAY_SIZE(twl4030_carkitr_texts),
-			twl4030_carkitr_texts);
-
-static const struct snd_kcontrol_new twl4030_dapm_carkitr_control =
-SOC_DAPM_ENUM("Route", twl4030_carkitr_enum);
+static const struct snd_kcontrol_new twl4030_dapm_carkitr_controls[] = {
+	SOC_DAPM_SINGLE("Voice", TWL4030_REG_PRECKR_CTL, 0, 1, 0),
+	SOC_DAPM_SINGLE("AudioR1", TWL4030_REG_PRECKR_CTL, 1, 1, 0),
+	SOC_DAPM_SINGLE("AudioR2", TWL4030_REG_PRECKR_CTL, 2, 1, 0),
+};
 
 /* Handsfree Left */
 static const char *twl4030_handsfreel_texts[] =
-		{"Voice", "DACL1", "DACL2", "DACR2"};
+		{"Voice", "AudioL1", "AudioL2", "AudioR2"};
 
 static const struct soc_enum twl4030_handsfreel_enum =
 	SOC_ENUM_SINGLE(TWL4030_REG_HFL_CTL, 0,
@@ -423,9 +396,13 @@
 static const struct snd_kcontrol_new twl4030_dapm_handsfreel_control =
 SOC_DAPM_ENUM("Route", twl4030_handsfreel_enum);
 
+/* Handsfree Left virtual mute */
+static const struct snd_kcontrol_new twl4030_dapm_handsfreelmute_control =
+	SOC_DAPM_SINGLE("Switch", TWL4030_REG_SW_SHADOW, 0, 1, 0);
+
 /* Handsfree Right */
 static const char *twl4030_handsfreer_texts[] =
-		{"Voice", "DACR1", "DACR2", "DACL2"};
+		{"Voice", "AudioR1", "AudioR2", "AudioL2"};
 
 static const struct soc_enum twl4030_handsfreer_enum =
 	SOC_ENUM_SINGLE(TWL4030_REG_HFR_CTL, 0,
@@ -435,37 +412,48 @@
 static const struct snd_kcontrol_new twl4030_dapm_handsfreer_control =
 SOC_DAPM_ENUM("Route", twl4030_handsfreer_enum);
 
+/* Handsfree Right virtual mute */
+static const struct snd_kcontrol_new twl4030_dapm_handsfreermute_control =
+	SOC_DAPM_SINGLE("Switch", TWL4030_REG_SW_SHADOW, 1, 1, 0);
+
+/* Vibra */
+/* Vibra audio path selection */
+static const char *twl4030_vibra_texts[] =
+		{"AudioL1", "AudioR1", "AudioL2", "AudioR2"};
+
+static const struct soc_enum twl4030_vibra_enum =
+	SOC_ENUM_SINGLE(TWL4030_REG_VIBRA_CTL, 2,
+			ARRAY_SIZE(twl4030_vibra_texts),
+			twl4030_vibra_texts);
+
+static const struct snd_kcontrol_new twl4030_dapm_vibra_control =
+SOC_DAPM_ENUM("Route", twl4030_vibra_enum);
+
+/* Vibra path selection: local vibrator (PWM) or audio driven */
+static const char *twl4030_vibrapath_texts[] =
+		{"Local vibrator", "Audio"};
+
+static const struct soc_enum twl4030_vibrapath_enum =
+	SOC_ENUM_SINGLE(TWL4030_REG_VIBRA_CTL, 4,
+			ARRAY_SIZE(twl4030_vibrapath_texts),
+			twl4030_vibrapath_texts);
+
+static const struct snd_kcontrol_new twl4030_dapm_vibrapath_control =
+SOC_DAPM_ENUM("Route", twl4030_vibrapath_enum);
+
 /* Left analog microphone selection */
-static const char *twl4030_analoglmic_texts[] =
-		{"Off", "Main mic", "Headset mic", "AUXL", "Carkit mic"};
-
-static const unsigned int twl4030_analoglmic_values[] =
-		{0x0, 0x1, 0x2, 0x4, 0x8};
-
-static const struct soc_enum twl4030_analoglmic_enum =
-	SOC_VALUE_ENUM_SINGLE(TWL4030_REG_ANAMICL, 0, 0xf,
-			ARRAY_SIZE(twl4030_analoglmic_texts),
-			twl4030_analoglmic_texts,
-			twl4030_analoglmic_values);
-
-static const struct snd_kcontrol_new twl4030_dapm_analoglmic_control =
-SOC_DAPM_VALUE_ENUM("Route", twl4030_analoglmic_enum);
+static const struct snd_kcontrol_new twl4030_dapm_analoglmic_controls[] = {
+	SOC_DAPM_SINGLE("Main mic", TWL4030_REG_ANAMICL, 0, 1, 0),
+	SOC_DAPM_SINGLE("Headset mic", TWL4030_REG_ANAMICL, 1, 1, 0),
+	SOC_DAPM_SINGLE("AUXL", TWL4030_REG_ANAMICL, 2, 1, 0),
+	SOC_DAPM_SINGLE("Carkit mic", TWL4030_REG_ANAMICL, 3, 1, 0),
+};
 
 /* Right analog microphone selection */
-static const char *twl4030_analogrmic_texts[] =
-		{"Off", "Sub mic", "AUXR"};
-
-static const unsigned int twl4030_analogrmic_values[] =
-		{0x0, 0x1, 0x4};
-
-static const struct soc_enum twl4030_analogrmic_enum =
-	SOC_VALUE_ENUM_SINGLE(TWL4030_REG_ANAMICR, 0, 0x5,
-			ARRAY_SIZE(twl4030_analogrmic_texts),
-			twl4030_analogrmic_texts,
-			twl4030_analogrmic_values);
-
-static const struct snd_kcontrol_new twl4030_dapm_analogrmic_control =
-SOC_DAPM_VALUE_ENUM("Route", twl4030_analogrmic_enum);
+static const struct snd_kcontrol_new twl4030_dapm_analogrmic_controls[] = {
+	SOC_DAPM_SINGLE("Sub mic", TWL4030_REG_ANAMICR, 0, 1, 0),
+	SOC_DAPM_SINGLE("AUXR", TWL4030_REG_ANAMICR, 2, 1, 0),
+};
 
 /* TX1 L/R Analog/Digital microphone selection */
 static const char *twl4030_micpathtx1_texts[] =
@@ -507,6 +495,10 @@
 static const struct snd_kcontrol_new twl4030_dapm_abypassl2_control =
 	SOC_DAPM_SINGLE("Switch", TWL4030_REG_ARXL2_APGA_CTL, 2, 1, 0);
 
+/* Analog bypass for Voice */
+static const struct snd_kcontrol_new twl4030_dapm_abypassv_control =
+	SOC_DAPM_SINGLE("Switch", TWL4030_REG_VDL_APGA_CTL, 2, 1, 0);
+
 /* Digital bypass gain, 0 mutes the bypass */
 static const unsigned int twl4030_dapm_dbypass_tlv[] = {
 	TLV_DB_RANGE_HEAD(2),
@@ -526,6 +518,18 @@
 			TWL4030_REG_ATX2ARXPGA, 0, 7, 0,
 			twl4030_dapm_dbypass_tlv);
 
+/*
+ * Voice Sidetone GAIN volume control:
+ * from -51 to -10 dB in 1 dB steps (mute instead of -51 dB)
+ */
+static DECLARE_TLV_DB_SCALE(twl4030_dapm_dbypassv_tlv, -5100, 100, 1);
+
+/* Digital bypass voice: sidetone (VUL -> VDL)*/
+static const struct snd_kcontrol_new twl4030_dapm_dbypassv_control =
+	SOC_DAPM_SINGLE_TLV("Volume",
+			TWL4030_REG_VSTPGA, 0, 0x29, 0,
+			twl4030_dapm_dbypassv_tlv);
+
 static int micpath_event(struct snd_soc_dapm_widget *w,
 	struct snd_kcontrol *kcontrol, int event)
 {
@@ -556,63 +560,143 @@
 	return 0;
 }
 
-static int handsfree_event(struct snd_soc_dapm_widget *w,
-		struct snd_kcontrol *kcontrol, int event)
+static void handsfree_ramp(struct snd_soc_codec *codec, int reg, int ramp)
 {
-	struct soc_enum *e = (struct soc_enum *)w->kcontrols->private_value;
 	unsigned char hs_ctl;
 
-	hs_ctl = twl4030_read_reg_cache(w->codec, e->reg);
+	hs_ctl = twl4030_read_reg_cache(codec, reg);
 
-	if (hs_ctl & TWL4030_HF_CTL_REF_EN) {
+	if (ramp) {
+		/* HF ramp-up */
+		hs_ctl |= TWL4030_HF_CTL_REF_EN;
+		twl4030_write(codec, reg, hs_ctl);
+		udelay(10);
 		hs_ctl |= TWL4030_HF_CTL_RAMP_EN;
-		twl4030_write(w->codec, e->reg, hs_ctl);
+		twl4030_write(codec, reg, hs_ctl);
+		udelay(40);
 		hs_ctl |= TWL4030_HF_CTL_LOOP_EN;
-		twl4030_write(w->codec, e->reg, hs_ctl);
 		hs_ctl |= TWL4030_HF_CTL_HB_EN;
-		twl4030_write(w->codec, e->reg, hs_ctl);
+		twl4030_write(codec, reg, hs_ctl);
 	} else {
-		hs_ctl &= ~(TWL4030_HF_CTL_RAMP_EN | TWL4030_HF_CTL_LOOP_EN
-				| TWL4030_HF_CTL_HB_EN);
-		twl4030_write(w->codec, e->reg, hs_ctl);
+		/* HF ramp-down */
+		hs_ctl &= ~TWL4030_HF_CTL_LOOP_EN;
+		hs_ctl &= ~TWL4030_HF_CTL_HB_EN;
+		twl4030_write(codec, reg, hs_ctl);
+		hs_ctl &= ~TWL4030_HF_CTL_RAMP_EN;
+		twl4030_write(codec, reg, hs_ctl);
+		udelay(40);
+		hs_ctl &= ~TWL4030_HF_CTL_REF_EN;
+		twl4030_write(codec, reg, hs_ctl);
 	}
+}
 
+static int handsfreelpga_event(struct snd_soc_dapm_widget *w,
+		struct snd_kcontrol *kcontrol, int event)
+{
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		handsfree_ramp(w->codec, TWL4030_REG_HFL_CTL, 1);
+		break;
+	case SND_SOC_DAPM_POST_PMD:
+		handsfree_ramp(w->codec, TWL4030_REG_HFL_CTL, 0);
+		break;
+	}
 	return 0;
 }
 
-static int headsetl_event(struct snd_soc_dapm_widget *w,
+static int handsfreerpga_event(struct snd_soc_dapm_widget *w,
 		struct snd_kcontrol *kcontrol, int event)
 {
-	unsigned char hs_gain, hs_pop;
-
-	/* Save the current volume */
-	hs_gain = twl4030_read_reg_cache(w->codec, TWL4030_REG_HS_GAIN_SET);
-	hs_pop = twl4030_read_reg_cache(w->codec, TWL4030_REG_HS_POPN_SET);
-
 	switch (event) {
 	case SND_SOC_DAPM_POST_PMU:
-		/* Do the anti-pop/bias ramp enable according to the TRM */
-		hs_pop |= TWL4030_VMID_EN;
-		twl4030_write(w->codec, TWL4030_REG_HS_POPN_SET, hs_pop);
-		/* Is this needed? Can we just use whatever gain here? */
-		twl4030_write(w->codec, TWL4030_REG_HS_GAIN_SET,
-				(hs_gain & (~0x0f)) | 0x0a);
-		hs_pop |= TWL4030_RAMP_EN;
-		twl4030_write(w->codec, TWL4030_REG_HS_POPN_SET, hs_pop);
-
-		/* Restore the original volume */
-		twl4030_write(w->codec, TWL4030_REG_HS_GAIN_SET, hs_gain);
+		handsfree_ramp(w->codec, TWL4030_REG_HFR_CTL, 1);
 		break;
 	case SND_SOC_DAPM_POST_PMD:
-		/* Do the anti-pop/bias ramp disable according to the TRM */
+		handsfree_ramp(w->codec, TWL4030_REG_HFR_CTL, 0);
+		break;
+	}
+	return 0;
+}
+
+static void headset_ramp(struct snd_soc_codec *codec, int ramp)
+{
+	unsigned char hs_gain, hs_pop;
+	struct twl4030_priv *twl4030 = codec->private_data;
+	/* Base values for ramp delay calculation: 2^19 - 2^26 */
+	unsigned int ramp_base[] = {524288, 1048576, 2097152, 4194304,
+				    8388608, 16777216, 33554432, 67108864};
+
+	hs_gain = twl4030_read_reg_cache(codec, TWL4030_REG_HS_GAIN_SET);
+	hs_pop = twl4030_read_reg_cache(codec, TWL4030_REG_HS_POPN_SET);
+
+	if (ramp) {
+		/* Headset ramp-up according to the TRM */
+		hs_pop |= TWL4030_VMID_EN;
+		twl4030_write(codec, TWL4030_REG_HS_POPN_SET, hs_pop);
+		twl4030_write(codec, TWL4030_REG_HS_GAIN_SET, hs_gain);
+		hs_pop |= TWL4030_RAMP_EN;
+		twl4030_write(codec, TWL4030_REG_HS_POPN_SET, hs_pop);
+	} else {
+		/* Headset ramp-down _not_ according to
+		 * the TRM, but in a way that it is working */
 		hs_pop &= ~TWL4030_RAMP_EN;
-		twl4030_write(w->codec, TWL4030_REG_HS_POPN_SET, hs_pop);
+		twl4030_write(codec, TWL4030_REG_HS_POPN_SET, hs_pop);
+		/* Wait ramp delay time + 1, so the VMID can settle */
+		mdelay((ramp_base[(hs_pop & TWL4030_RAMP_DELAY) >> 2] /
+			twl4030->sysclk) + 1);
 		/* Bypass the reg_cache to mute the headset */
 		twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE,
 					hs_gain & (~0x0f),
 					TWL4030_REG_HS_GAIN_SET);
+
 		hs_pop &= ~TWL4030_VMID_EN;
-		twl4030_write(w->codec, TWL4030_REG_HS_POPN_SET, hs_pop);
+		twl4030_write(codec, TWL4030_REG_HS_POPN_SET, hs_pop);
+	}
+}
+
+static int headsetlpga_event(struct snd_soc_dapm_widget *w,
+		struct snd_kcontrol *kcontrol, int event)
+{
+	struct twl4030_priv *twl4030 = w->codec->private_data;
+
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		/* Do the ramp-up only once */
+		if (!twl4030->hsr_enabled)
+			headset_ramp(w->codec, 1);
+
+		twl4030->hsl_enabled = 1;
+		break;
+	case SND_SOC_DAPM_POST_PMD:
+		/* Do the ramp-down only if both headsetL/R is disabled */
+		if (!twl4030->hsr_enabled)
+			headset_ramp(w->codec, 0);
+
+		twl4030->hsl_enabled = 0;
+		break;
+	}
+	return 0;
+}
+
+static int headsetrpga_event(struct snd_soc_dapm_widget *w,
+		struct snd_kcontrol *kcontrol, int event)
+{
+	struct twl4030_priv *twl4030 = w->codec->private_data;
+
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		/* Do the ramp-up only once */
+		if (!twl4030->hsl_enabled)
+			headset_ramp(w->codec, 1);
+
+		twl4030->hsr_enabled = 1;
+		break;
+	case SND_SOC_DAPM_POST_PMD:
+		/* Do the ramp-down only if both headsetL/R is disabled */
+		if (!twl4030->hsl_enabled)
+			headset_ramp(w->codec, 0);
+
+		twl4030->hsr_enabled = 0;
 		break;
 	}
 	return 0;
@@ -624,7 +708,7 @@
 	struct soc_mixer_control *m =
 		(struct soc_mixer_control *)w->kcontrols->private_value;
 	struct twl4030_priv *twl4030 = w->codec->private_data;
-	unsigned char reg;
+	unsigned char reg, misc;
 
 	reg = twl4030_read_reg_cache(w->codec, m->reg);
 
@@ -636,14 +720,34 @@
 		else
 			twl4030->bypass_state &=
 				~(1 << (m->reg - TWL4030_REG_ARXL1_APGA_CTL));
+	} else if (m->reg == TWL4030_REG_VDL_APGA_CTL) {
+		/* Analog voice bypass */
+		if (reg & (1 << m->shift))
+			twl4030->bypass_state |= (1 << 4);
+		else
+			twl4030->bypass_state &= ~(1 << 4);
+	} else if (m->reg == TWL4030_REG_VSTPGA) {
+		/* Voice digital bypass */
+		if (reg)
+			twl4030->bypass_state |= (1 << 5);
+		else
+			twl4030->bypass_state &= ~(1 << 5);
 	} else {
 		/* Digital bypass */
 		if (reg & (0x7 << m->shift))
-			twl4030->bypass_state |= (1 << (m->shift ? 5 : 4));
+			twl4030->bypass_state |= (1 << (m->shift ? 7 : 6));
 		else
-			twl4030->bypass_state &= ~(1 << (m->shift ? 5 : 4));
+			twl4030->bypass_state &= ~(1 << (m->shift ? 7 : 6));
 	}
 
+	/* Enable master analog loopback mode if any analog switch is enabled*/
+	misc = twl4030_read_reg_cache(w->codec, TWL4030_REG_MISC_SET_1);
+	if (twl4030->bypass_state & 0x1F)
+		misc |= TWL4030_FMLOOP_EN;
+	else
+		misc &= ~TWL4030_FMLOOP_EN;
+	twl4030_write(w->codec, TWL4030_REG_MISC_SET_1, misc);
+
 	if (w->codec->bias_level == SND_SOC_BIAS_STANDBY) {
 		if (twl4030->bypass_state)
 			twl4030_codec_mute(w->codec, 0);
@@ -810,6 +914,48 @@
 	return err;
 }
 
+/* Codec operation modes */
+static const char *twl4030_op_modes_texts[] = {
+	"Option 2 (voice/audio)", "Option 1 (audio)"
+};
+
+static const struct soc_enum twl4030_op_modes_enum =
+	SOC_ENUM_SINGLE(TWL4030_REG_CODEC_MODE, 0,
+			ARRAY_SIZE(twl4030_op_modes_texts),
+			twl4030_op_modes_texts);
+
+int snd_soc_put_twl4030_opmode_enum_double(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct twl4030_priv *twl4030 = codec->private_data;
+	struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+	unsigned short val;
+	unsigned short mask, bitmask;
+
+	if (twl4030->configured) {
+		printk(KERN_ERR "twl4030 operation mode cannot be "
+			"changed on-the-fly\n");
+		return -EBUSY;
+	}
+
+	for (bitmask = 1; bitmask < e->max; bitmask <<= 1)
+		;
+	if (ucontrol->value.enumerated.item[0] > e->max - 1)
+		return -EINVAL;
+
+	val = ucontrol->value.enumerated.item[0] << e->shift_l;
+	mask = (bitmask - 1) << e->shift_l;
+	if (e->shift_l != e->shift_r) {
+		if (ucontrol->value.enumerated.item[1] > e->max - 1)
+			return -EINVAL;
+		val |= ucontrol->value.enumerated.item[1] << e->shift_r;
+		mask |= (bitmask - 1) << e->shift_r;
+	}
+
+	return snd_soc_update_bits(codec, e->reg, mask, val);
+}
+
 /*
  * FGAIN volume control:
  * from -62 to 0 dB in 1 dB steps (mute instead of -63 dB)
@@ -824,6 +970,12 @@
 static DECLARE_TLV_DB_SCALE(digital_coarse_tlv, 0, 600, 0);
 
 /*
+ * Voice Downlink GAIN volume control:
+ * from -37 to 12 dB in 1 dB steps (mute instead of -37 dB)
+ */
+static DECLARE_TLV_DB_SCALE(digital_voice_downlink_tlv, -3700, 100, 1);
+
+/*
  * Analog playback gain
  * -24 dB to 12 dB in 2 dB steps
  */
@@ -864,7 +1016,32 @@
 			ARRAY_SIZE(twl4030_rampdelay_texts),
 			twl4030_rampdelay_texts);
 
+/* Vibra H-bridge direction mode */
+static const char *twl4030_vibradirmode_texts[] = {
+	"Vibra H-bridge direction", "Audio data MSB",
+};
+
+static const struct soc_enum twl4030_vibradirmode_enum =
+	SOC_ENUM_SINGLE(TWL4030_REG_VIBRA_CTL, 5,
+			ARRAY_SIZE(twl4030_vibradirmode_texts),
+			twl4030_vibradirmode_texts);
+
+/* Vibra H-bridge direction */
+static const char *twl4030_vibradir_texts[] = {
+	"Positive polarity", "Negative polarity",
+};
+
+static const struct soc_enum twl4030_vibradir_enum =
+	SOC_ENUM_SINGLE(TWL4030_REG_VIBRA_CTL, 1,
+			ARRAY_SIZE(twl4030_vibradir_texts),
+			twl4030_vibradir_texts);
+
 static const struct snd_kcontrol_new twl4030_snd_controls[] = {
+	/* Codec operation mode control */
+	SOC_ENUM_EXT("Codec Operation Mode", twl4030_op_modes_enum,
+		snd_soc_get_enum_double,
+		snd_soc_put_twl4030_opmode_enum_double),
+
 	/* Common playback gain controls */
 	SOC_DOUBLE_R_TLV("DAC1 Digital Fine Playback Volume",
 		TWL4030_REG_ARXL1PGA, TWL4030_REG_ARXR1PGA,
@@ -893,6 +1070,16 @@
 		TWL4030_REG_ARXL2_APGA_CTL, TWL4030_REG_ARXR2_APGA_CTL,
 		1, 1, 0),
 
+	/* Common voice downlink gain controls */
+	SOC_SINGLE_TLV("DAC Voice Digital Downlink Volume",
+		TWL4030_REG_VRXPGA, 0, 0x31, 0, digital_voice_downlink_tlv),
+
+	SOC_SINGLE_TLV("DAC Voice Analog Downlink Volume",
+		TWL4030_REG_VDL_APGA_CTL, 3, 0x12, 1, analog_tlv),
+
+	SOC_SINGLE("DAC Voice Analog Downlink Switch",
+		TWL4030_REG_VDL_APGA_CTL, 1, 1, 0),
+
 	/* Separate output gain controls */
 	SOC_DOUBLE_R_TLV_TWL4030("PreDriv Playback Volume",
 		TWL4030_REG_PREDL_CTL, TWL4030_REG_PREDR_CTL,
@@ -920,6 +1107,9 @@
 		0, 3, 5, 0, input_gain_tlv),
 
 	SOC_ENUM("HS ramp delay", twl4030_rampdelay_enum),
+
+	SOC_ENUM("Vibra H-bridge mode", twl4030_vibradirmode_enum),
+	SOC_ENUM("Vibra H-bridge direction", twl4030_vibradir_enum),
 };
 
 static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
@@ -947,26 +1137,19 @@
 	SND_SOC_DAPM_OUTPUT("CARKITR"),
 	SND_SOC_DAPM_OUTPUT("HFL"),
 	SND_SOC_DAPM_OUTPUT("HFR"),
+	SND_SOC_DAPM_OUTPUT("VIBRA"),
 
 	/* DACs */
-	SND_SOC_DAPM_DAC("DAC Right1", "Right Front Playback",
+	SND_SOC_DAPM_DAC("DAC Right1", "Right Front HiFi Playback",
 			SND_SOC_NOPM, 0, 0),
-	SND_SOC_DAPM_DAC("DAC Left1", "Left Front Playback",
+	SND_SOC_DAPM_DAC("DAC Left1", "Left Front HiFi Playback",
 			SND_SOC_NOPM, 0, 0),
-	SND_SOC_DAPM_DAC("DAC Right2", "Right Rear Playback",
+	SND_SOC_DAPM_DAC("DAC Right2", "Right Rear HiFi Playback",
 			SND_SOC_NOPM, 0, 0),
-	SND_SOC_DAPM_DAC("DAC Left2", "Left Rear Playback",
+	SND_SOC_DAPM_DAC("DAC Left2", "Left Rear HiFi Playback",
 			SND_SOC_NOPM, 0, 0),
-
-	/* Analog PGAs */
-	SND_SOC_DAPM_PGA("ARXR1_APGA", TWL4030_REG_ARXR1_APGA_CTL,
-			0, 0, NULL, 0),
-	SND_SOC_DAPM_PGA("ARXL1_APGA", TWL4030_REG_ARXL1_APGA_CTL,
-			0, 0, NULL, 0),
-	SND_SOC_DAPM_PGA("ARXR2_APGA", TWL4030_REG_ARXR2_APGA_CTL,
-			0, 0, NULL, 0),
-	SND_SOC_DAPM_PGA("ARXL2_APGA", TWL4030_REG_ARXL2_APGA_CTL,
-			0, 0, NULL, 0),
+	SND_SOC_DAPM_DAC("DAC Voice", "Voice Playback",
+			SND_SOC_NOPM, 0, 0),
 
 	/* Analog bypasses */
 	SND_SOC_DAPM_SWITCH_E("Right1 Analog Loopback", SND_SOC_NOPM, 0, 0,
@@ -981,6 +1164,9 @@
 	SND_SOC_DAPM_SWITCH_E("Left2 Analog Loopback", SND_SOC_NOPM, 0, 0,
 			&twl4030_dapm_abypassl2_control,
 			bypass_event, SND_SOC_DAPM_POST_REG),
+	SND_SOC_DAPM_SWITCH_E("Voice Analog Loopback", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_abypassv_control,
+			bypass_event, SND_SOC_DAPM_POST_REG),
 
 	/* Digital bypasses */
 	SND_SOC_DAPM_SWITCH_E("Left Digital Loopback", SND_SOC_NOPM, 0, 0,
@@ -989,43 +1175,88 @@
 	SND_SOC_DAPM_SWITCH_E("Right Digital Loopback", SND_SOC_NOPM, 0, 0,
 			&twl4030_dapm_dbypassr_control, bypass_event,
 			SND_SOC_DAPM_POST_REG),
+	SND_SOC_DAPM_SWITCH_E("Voice Digital Loopback", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_dbypassv_control, bypass_event,
+			SND_SOC_DAPM_POST_REG),
 
-	SND_SOC_DAPM_MIXER("Analog R1 Playback Mixer", TWL4030_REG_AVDAC_CTL,
-			0, 0, NULL, 0),
-	SND_SOC_DAPM_MIXER("Analog L1 Playback Mixer", TWL4030_REG_AVDAC_CTL,
-			1, 0, NULL, 0),
-	SND_SOC_DAPM_MIXER("Analog R2 Playback Mixer", TWL4030_REG_AVDAC_CTL,
-			2, 0, NULL, 0),
-	SND_SOC_DAPM_MIXER("Analog L2 Playback Mixer", TWL4030_REG_AVDAC_CTL,
-			3, 0, NULL, 0),
+	/* Digital mixers, power control for the physical DACs */
+	SND_SOC_DAPM_MIXER("Digital R1 Playback Mixer",
+			TWL4030_REG_AVDAC_CTL, 0, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Digital L1 Playback Mixer",
+			TWL4030_REG_AVDAC_CTL, 1, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Digital R2 Playback Mixer",
+			TWL4030_REG_AVDAC_CTL, 2, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Digital L2 Playback Mixer",
+			TWL4030_REG_AVDAC_CTL, 3, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Digital Voice Playback Mixer",
+			TWL4030_REG_AVDAC_CTL, 4, 0, NULL, 0),
+
+	/* Analog mixers, power control for the physical PGAs */
+	SND_SOC_DAPM_MIXER("Analog R1 Playback Mixer",
+			TWL4030_REG_ARXR1_APGA_CTL, 0, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Analog L1 Playback Mixer",
+			TWL4030_REG_ARXL1_APGA_CTL, 0, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Analog R2 Playback Mixer",
+			TWL4030_REG_ARXR2_APGA_CTL, 0, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Analog L2 Playback Mixer",
+			TWL4030_REG_ARXL2_APGA_CTL, 0, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Analog Voice Playback Mixer",
+			TWL4030_REG_VDL_APGA_CTL, 0, 0, NULL, 0),
+
+	/* Output MIXER controls */
+	/* Earpiece */
+	SND_SOC_DAPM_MIXER("Earpiece Mixer", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_earpiece_controls[0],
+			ARRAY_SIZE(twl4030_dapm_earpiece_controls)),
+	/* PreDrivL/R */
+	SND_SOC_DAPM_MIXER("PredriveL Mixer", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_predrivel_controls[0],
+			ARRAY_SIZE(twl4030_dapm_predrivel_controls)),
+	SND_SOC_DAPM_MIXER("PredriveR Mixer", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_predriver_controls[0],
+			ARRAY_SIZE(twl4030_dapm_predriver_controls)),
+	/* HeadsetL/R */
+	SND_SOC_DAPM_MIXER("HeadsetL Mixer", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_hsol_controls[0],
+			ARRAY_SIZE(twl4030_dapm_hsol_controls)),
+	SND_SOC_DAPM_PGA_E("HeadsetL PGA", SND_SOC_NOPM,
+			0, 0, NULL, 0, headsetlpga_event,
+			SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
+	SND_SOC_DAPM_MIXER("HeadsetR Mixer", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_hsor_controls[0],
+			ARRAY_SIZE(twl4030_dapm_hsor_controls)),
+	SND_SOC_DAPM_PGA_E("HeadsetR PGA", SND_SOC_NOPM,
+			0, 0, NULL, 0, headsetrpga_event,
+			SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
+	/* CarkitL/R */
+	SND_SOC_DAPM_MIXER("CarkitL Mixer", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_carkitl_controls[0],
+			ARRAY_SIZE(twl4030_dapm_carkitl_controls)),
+	SND_SOC_DAPM_MIXER("CarkitR Mixer", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_carkitr_controls[0],
+			ARRAY_SIZE(twl4030_dapm_carkitr_controls)),
 
 	/* Output MUX controls */
-	/* Earpiece */
-	SND_SOC_DAPM_VALUE_MUX("Earpiece Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_earpiece_control),
-	/* PreDrivL/R */
-	SND_SOC_DAPM_VALUE_MUX("PredriveL Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_predrivel_control),
-	SND_SOC_DAPM_VALUE_MUX("PredriveR Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_predriver_control),
-	/* HeadsetL/R */
-	SND_SOC_DAPM_MUX_E("HeadsetL Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_hsol_control, headsetl_event,
-		SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
-	SND_SOC_DAPM_MUX("HeadsetR Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_hsor_control),
-	/* CarkitL/R */
-	SND_SOC_DAPM_MUX("CarkitL Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_carkitl_control),
-	SND_SOC_DAPM_MUX("CarkitR Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_carkitr_control),
 	/* HandsfreeL/R */
-	SND_SOC_DAPM_MUX_E("HandsfreeL Mux", TWL4030_REG_HFL_CTL, 5, 0,
-		&twl4030_dapm_handsfreel_control, handsfree_event,
-		SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
-	SND_SOC_DAPM_MUX_E("HandsfreeR Mux", TWL4030_REG_HFR_CTL, 5, 0,
-		&twl4030_dapm_handsfreer_control, handsfree_event,
-		SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
+	SND_SOC_DAPM_MUX("HandsfreeL Mux", SND_SOC_NOPM, 0, 0,
+		&twl4030_dapm_handsfreel_control),
+	SND_SOC_DAPM_SWITCH("HandsfreeL Switch", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_handsfreelmute_control),
+	SND_SOC_DAPM_PGA_E("HandsfreeL PGA", SND_SOC_NOPM,
+			0, 0, NULL, 0, handsfreelpga_event,
+			SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
+	SND_SOC_DAPM_MUX("HandsfreeR Mux", SND_SOC_NOPM, 5, 0,
+		&twl4030_dapm_handsfreer_control),
+	SND_SOC_DAPM_SWITCH("HandsfreeR Switch", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_handsfreermute_control),
+	SND_SOC_DAPM_PGA_E("HandsfreeR PGA", SND_SOC_NOPM,
+			0, 0, NULL, 0, handsfreerpga_event,
+			SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
+	/* Vibra */
+	SND_SOC_DAPM_MUX("Vibra Mux", TWL4030_REG_VIBRA_CTL, 0, 0,
+		&twl4030_dapm_vibra_control),
+	SND_SOC_DAPM_MUX("Vibra Route", SND_SOC_NOPM, 0, 0,
+		&twl4030_dapm_vibrapath_control),
 
 	/* Introducing four virtual ADC, since TWL4030 have four channel for
 	   capture */
@@ -1050,11 +1281,15 @@
 		SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD|
 		SND_SOC_DAPM_POST_REG),
 
-	/* Analog input muxes with switch for the capture amplifiers */
-	SND_SOC_DAPM_VALUE_MUX("Analog Left Capture Route",
-		TWL4030_REG_ANAMICL, 4, 0, &twl4030_dapm_analoglmic_control),
-	SND_SOC_DAPM_VALUE_MUX("Analog Right Capture Route",
-		TWL4030_REG_ANAMICR, 4, 0, &twl4030_dapm_analogrmic_control),
+	/* Analog input mixers for the capture amplifiers */
+	SND_SOC_DAPM_MIXER("Analog Left Capture Route",
+		TWL4030_REG_ANAMICL, 4, 0,
+		&twl4030_dapm_analoglmic_controls[0],
+		ARRAY_SIZE(twl4030_dapm_analoglmic_controls)),
+	SND_SOC_DAPM_MIXER("Analog Right Capture Route",
+		TWL4030_REG_ANAMICR, 4, 0,
+		&twl4030_dapm_analogrmic_controls[0],
+		ARRAY_SIZE(twl4030_dapm_analogrmic_controls)),
 
 	SND_SOC_DAPM_PGA("ADC Physical Left",
 		TWL4030_REG_AVADC_CTL, 3, 0, NULL, 0),
@@ -1073,62 +1308,86 @@
 };
 
 static const struct snd_soc_dapm_route intercon[] = {
-	{"Analog L1 Playback Mixer", NULL, "DAC Left1"},
-	{"Analog R1 Playback Mixer", NULL, "DAC Right1"},
-	{"Analog L2 Playback Mixer", NULL, "DAC Left2"},
-	{"Analog R2 Playback Mixer", NULL, "DAC Right2"},
+	{"Digital L1 Playback Mixer", NULL, "DAC Left1"},
+	{"Digital R1 Playback Mixer", NULL, "DAC Right1"},
+	{"Digital L2 Playback Mixer", NULL, "DAC Left2"},
+	{"Digital R2 Playback Mixer", NULL, "DAC Right2"},
+	{"Digital Voice Playback Mixer", NULL, "DAC Voice"},
 
-	{"ARXL1_APGA", NULL, "Analog L1 Playback Mixer"},
-	{"ARXR1_APGA", NULL, "Analog R1 Playback Mixer"},
-	{"ARXL2_APGA", NULL, "Analog L2 Playback Mixer"},
-	{"ARXR2_APGA", NULL, "Analog R2 Playback Mixer"},
+	{"Analog L1 Playback Mixer", NULL, "Digital L1 Playback Mixer"},
+	{"Analog R1 Playback Mixer", NULL, "Digital R1 Playback Mixer"},
+	{"Analog L2 Playback Mixer", NULL, "Digital L2 Playback Mixer"},
+	{"Analog R2 Playback Mixer", NULL, "Digital R2 Playback Mixer"},
+	{"Analog Voice Playback Mixer", NULL, "Digital Voice Playback Mixer"},
 
 	/* Internal playback routings */
 	/* Earpiece */
-	{"Earpiece Mux", "DACL1", "ARXL1_APGA"},
-	{"Earpiece Mux", "DACL2", "ARXL2_APGA"},
-	{"Earpiece Mux", "DACR1", "ARXR1_APGA"},
+	{"Earpiece Mixer", "Voice", "Analog Voice Playback Mixer"},
+	{"Earpiece Mixer", "AudioL1", "Analog L1 Playback Mixer"},
+	{"Earpiece Mixer", "AudioL2", "Analog L2 Playback Mixer"},
+	{"Earpiece Mixer", "AudioR1", "Analog R1 Playback Mixer"},
 	/* PreDrivL */
-	{"PredriveL Mux", "DACL1", "ARXL1_APGA"},
-	{"PredriveL Mux", "DACL2", "ARXL2_APGA"},
-	{"PredriveL Mux", "DACR2", "ARXR2_APGA"},
+	{"PredriveL Mixer", "Voice", "Analog Voice Playback Mixer"},
+	{"PredriveL Mixer", "AudioL1", "Analog L1 Playback Mixer"},
+	{"PredriveL Mixer", "AudioL2", "Analog L2 Playback Mixer"},
+	{"PredriveL Mixer", "AudioR2", "Analog R2 Playback Mixer"},
 	/* PreDrivR */
-	{"PredriveR Mux", "DACR1", "ARXR1_APGA"},
-	{"PredriveR Mux", "DACR2", "ARXR2_APGA"},
-	{"PredriveR Mux", "DACL2", "ARXL2_APGA"},
+	{"PredriveR Mixer", "Voice", "Analog Voice Playback Mixer"},
+	{"PredriveR Mixer", "AudioR1", "Analog R1 Playback Mixer"},
+	{"PredriveR Mixer", "AudioR2", "Analog R2 Playback Mixer"},
+	{"PredriveR Mixer", "AudioL2", "Analog L2 Playback Mixer"},
 	/* HeadsetL */
-	{"HeadsetL Mux", "DACL1", "ARXL1_APGA"},
-	{"HeadsetL Mux", "DACL2", "ARXL2_APGA"},
+	{"HeadsetL Mixer", "Voice", "Analog Voice Playback Mixer"},
+	{"HeadsetL Mixer", "AudioL1", "Analog L1 Playback Mixer"},
+	{"HeadsetL Mixer", "AudioL2", "Analog L2 Playback Mixer"},
+	{"HeadsetL PGA", NULL, "HeadsetL Mixer"},
 	/* HeadsetR */
-	{"HeadsetR Mux", "DACR1", "ARXR1_APGA"},
-	{"HeadsetR Mux", "DACR2", "ARXR2_APGA"},
+	{"HeadsetR Mixer", "Voice", "Analog Voice Playback Mixer"},
+	{"HeadsetR Mixer", "AudioR1", "Analog R1 Playback Mixer"},
+	{"HeadsetR Mixer", "AudioR2", "Analog R2 Playback Mixer"},
+	{"HeadsetR PGA", NULL, "HeadsetR Mixer"},
 	/* CarkitL */
-	{"CarkitL Mux", "DACL1", "ARXL1_APGA"},
-	{"CarkitL Mux", "DACL2", "ARXL2_APGA"},
+	{"CarkitL Mixer", "Voice", "Analog Voice Playback Mixer"},
+	{"CarkitL Mixer", "AudioL1", "Analog L1 Playback Mixer"},
+	{"CarkitL Mixer", "AudioL2", "Analog L2 Playback Mixer"},
 	/* CarkitR */
-	{"CarkitR Mux", "DACR1", "ARXR1_APGA"},
-	{"CarkitR Mux", "DACR2", "ARXR2_APGA"},
+	{"CarkitR Mixer", "Voice", "Analog Voice Playback Mixer"},
+	{"CarkitR Mixer", "AudioR1", "Analog R1 Playback Mixer"},
+	{"CarkitR Mixer", "AudioR2", "Analog R2 Playback Mixer"},
 	/* HandsfreeL */
-	{"HandsfreeL Mux", "DACL1", "ARXL1_APGA"},
-	{"HandsfreeL Mux", "DACL2", "ARXL2_APGA"},
-	{"HandsfreeL Mux", "DACR2", "ARXR2_APGA"},
+	{"HandsfreeL Mux", "Voice", "Analog Voice Playback Mixer"},
+	{"HandsfreeL Mux", "AudioL1", "Analog L1 Playback Mixer"},
+	{"HandsfreeL Mux", "AudioL2", "Analog L2 Playback Mixer"},
+	{"HandsfreeL Mux", "AudioR2", "Analog R2 Playback Mixer"},
+	{"HandsfreeL Switch", "Switch", "HandsfreeL Mux"},
+	{"HandsfreeL PGA", NULL, "HandsfreeL Switch"},
 	/* HandsfreeR */
-	{"HandsfreeR Mux", "DACR1", "ARXR1_APGA"},
-	{"HandsfreeR Mux", "DACR2", "ARXR2_APGA"},
-	{"HandsfreeR Mux", "DACL2", "ARXL2_APGA"},
+	{"HandsfreeR Mux", "Voice", "Analog Voice Playback Mixer"},
+	{"HandsfreeR Mux", "AudioR1", "Analog R1 Playback Mixer"},
+	{"HandsfreeR Mux", "AudioR2", "Analog R2 Playback Mixer"},
+	{"HandsfreeR Mux", "AudioL2", "Analog L2 Playback Mixer"},
+	{"HandsfreeR Switch", "Switch", "HandsfreeR Mux"},
+	{"HandsfreeR PGA", NULL, "HandsfreeR Switch"},
+	/* Vibra */
+	{"Vibra Mux", "AudioL1", "DAC Left1"},
+	{"Vibra Mux", "AudioR1", "DAC Right1"},
+	{"Vibra Mux", "AudioL2", "DAC Left2"},
+	{"Vibra Mux", "AudioR2", "DAC Right2"},
 
 	/* outputs */
-	{"OUTL", NULL, "ARXL2_APGA"},
-	{"OUTR", NULL, "ARXR2_APGA"},
-	{"EARPIECE", NULL, "Earpiece Mux"},
-	{"PREDRIVEL", NULL, "PredriveL Mux"},
-	{"PREDRIVER", NULL, "PredriveR Mux"},
-	{"HSOL", NULL, "HeadsetL Mux"},
-	{"HSOR", NULL, "HeadsetR Mux"},
-	{"CARKITL", NULL, "CarkitL Mux"},
-	{"CARKITR", NULL, "CarkitR Mux"},
-	{"HFL", NULL, "HandsfreeL Mux"},
-	{"HFR", NULL, "HandsfreeR Mux"},
+	{"OUTL", NULL, "Analog L2 Playback Mixer"},
+	{"OUTR", NULL, "Analog R2 Playback Mixer"},
+	{"EARPIECE", NULL, "Earpiece Mixer"},
+	{"PREDRIVEL", NULL, "PredriveL Mixer"},
+	{"PREDRIVER", NULL, "PredriveR Mixer"},
+	{"HSOL", NULL, "HeadsetL PGA"},
+	{"HSOR", NULL, "HeadsetR PGA"},
+	{"CARKITL", NULL, "CarkitL Mixer"},
+	{"CARKITR", NULL, "CarkitR Mixer"},
+	{"HFL", NULL, "HandsfreeL PGA"},
+	{"HFR", NULL, "HandsfreeR PGA"},
+	{"Vibra Route", "Audio", "Vibra Mux"},
+	{"VIBRA", NULL, "Vibra Route"},
 
 	/* Capture path */
 	{"Analog Left Capture Route", "Main mic", "MAINMIC"},
@@ -1168,18 +1427,22 @@
 	{"Left1 Analog Loopback", "Switch", "Analog Left Capture Route"},
 	{"Right2 Analog Loopback", "Switch", "Analog Right Capture Route"},
 	{"Left2 Analog Loopback", "Switch", "Analog Left Capture Route"},
+	{"Voice Analog Loopback", "Switch", "Analog Left Capture Route"},
 
 	{"Analog R1 Playback Mixer", NULL, "Right1 Analog Loopback"},
 	{"Analog L1 Playback Mixer", NULL, "Left1 Analog Loopback"},
 	{"Analog R2 Playback Mixer", NULL, "Right2 Analog Loopback"},
 	{"Analog L2 Playback Mixer", NULL, "Left2 Analog Loopback"},
+	{"Analog Voice Playback Mixer", NULL, "Voice Analog Loopback"},
 
 	/* Digital bypass routes */
 	{"Right Digital Loopback", "Volume", "TX1 Capture Route"},
 	{"Left Digital Loopback", "Volume", "TX1 Capture Route"},
+	{"Voice Digital Loopback", "Volume", "TX2 Capture Route"},
 
-	{"Analog R2 Playback Mixer", NULL, "Right Digital Loopback"},
-	{"Analog L2 Playback Mixer", NULL, "Left Digital Loopback"},
+	{"Digital R2 Playback Mixer", NULL, "Right Digital Loopback"},
+	{"Digital L2 Playback Mixer", NULL, "Left Digital Loopback"},
+	{"Digital Voice Playback Mixer", NULL, "Voice Digital Loopback"},
 
 };
 
@@ -1226,6 +1489,58 @@
 	return 0;
 }
 
+static void twl4030_constraints(struct twl4030_priv *twl4030,
+				struct snd_pcm_substream *mst_substream)
+{
+	struct snd_pcm_substream *slv_substream;
+
+	/* Pick the stream, which need to be constrained */
+	if (mst_substream == twl4030->master_substream)
+		slv_substream = twl4030->slave_substream;
+	else if (mst_substream == twl4030->slave_substream)
+		slv_substream = twl4030->master_substream;
+	else /* This should not happen.. */
+		return;
+
+	/* Set the constraints according to the already configured stream */
+	snd_pcm_hw_constraint_minmax(slv_substream->runtime,
+				SNDRV_PCM_HW_PARAM_RATE,
+				twl4030->rate,
+				twl4030->rate);
+
+	snd_pcm_hw_constraint_minmax(slv_substream->runtime,
+				SNDRV_PCM_HW_PARAM_SAMPLE_BITS,
+				twl4030->sample_bits,
+				twl4030->sample_bits);
+
+	snd_pcm_hw_constraint_minmax(slv_substream->runtime,
+				SNDRV_PCM_HW_PARAM_CHANNELS,
+				twl4030->channels,
+				twl4030->channels);
+}
+
+/* In case of 4 channel mode, the RX1 L/R for playback and the TX2 L/R for
+ * capture has to be enabled/disabled. */
+static void twl4030_tdm_enable(struct snd_soc_codec *codec, int direction,
+				int enable)
+{
+	u8 reg, mask;
+
+	reg = twl4030_read_reg_cache(codec, TWL4030_REG_OPTION);
+
+	if (direction == SNDRV_PCM_STREAM_PLAYBACK)
+		mask = TWL4030_ARXL1_VRX_EN | TWL4030_ARXR1_EN;
+	else
+		mask = TWL4030_ATXL2_VTXL_EN | TWL4030_ATXR2_VTXR_EN;
+
+	if (enable)
+		reg |= mask;
+	else
+		reg &= ~mask;
+
+	twl4030_write(codec, TWL4030_REG_OPTION, reg);
+}
+
 static int twl4030_startup(struct snd_pcm_substream *substream,
 			   struct snd_soc_dai *dai)
 {
@@ -1234,26 +1549,25 @@
 	struct snd_soc_codec *codec = socdev->card->codec;
 	struct twl4030_priv *twl4030 = codec->private_data;
 
-	/* If we already have a playback or capture going then constrain
-	 * this substream to match it.
-	 */
 	if (twl4030->master_substream) {
-		struct snd_pcm_runtime *master_runtime;
-		master_runtime = twl4030->master_substream->runtime;
-
-		snd_pcm_hw_constraint_minmax(substream->runtime,
-					     SNDRV_PCM_HW_PARAM_RATE,
-					     master_runtime->rate,
-					     master_runtime->rate);
-
-		snd_pcm_hw_constraint_minmax(substream->runtime,
-					     SNDRV_PCM_HW_PARAM_SAMPLE_BITS,
-					     master_runtime->sample_bits,
-					     master_runtime->sample_bits);
-
 		twl4030->slave_substream = substream;
-	} else
+		/* The DAI has one configuration for playback and capture, so
+		 * if the DAI has been already configured then constrain this
+		 * substream to match it. */
+		if (twl4030->configured)
+			twl4030_constraints(twl4030, twl4030->master_substream);
+	} else {
+		if (!(twl4030_read_reg_cache(codec, TWL4030_REG_CODEC_MODE) &
+			TWL4030_OPTION_1)) {
+			/* In option2 4 channel is not supported, set the
+			 * constraint for the first stream for channels, the
+			 * second stream will 'inherit' this cosntraint */
+			snd_pcm_hw_constraint_minmax(substream->runtime,
+						SNDRV_PCM_HW_PARAM_CHANNELS,
+						2, 2);
+		}
 		twl4030->master_substream = substream;
+	}
 
 	return 0;
 }
@@ -1270,6 +1584,17 @@
 		twl4030->master_substream = twl4030->slave_substream;
 
 	twl4030->slave_substream = NULL;
+
+	/* If all streams are closed, or the remaining stream has not yet
+	 * been configured than set the DAI as not configured. */
+	if (!twl4030->master_substream)
+		twl4030->configured = 0;
+	 else if (!twl4030->master_substream->runtime->channels)
+		twl4030->configured = 0;
+
+	 /* If the closing substream had 4 channel, do the necessary cleanup */
+	if (substream->runtime->channels == 4)
+		twl4030_tdm_enable(codec, substream->stream, 0);
 }
 
 static int twl4030_hw_params(struct snd_pcm_substream *substream,
@@ -1282,8 +1607,24 @@
 	struct twl4030_priv *twl4030 = codec->private_data;
 	u8 mode, old_mode, format, old_format;
 
-	if (substream == twl4030->slave_substream)
-		/* Ignoring hw_params for slave substream */
+	 /* If the substream has 4 channel, do the necessary setup */
+	if (params_channels(params) == 4) {
+		u8 format, mode;
+
+		format = twl4030_read_reg_cache(codec, TWL4030_REG_AUDIO_IF);
+		mode = twl4030_read_reg_cache(codec, TWL4030_REG_CODEC_MODE);
+
+		/* Safety check: are we in the correct operating mode and
+		 * the interface is in TDM mode? */
+		if ((mode & TWL4030_OPTION_1) &&
+		    ((format & TWL4030_AIF_FORMAT) == TWL4030_AIF_FORMAT_TDM))
+			twl4030_tdm_enable(codec, substream->stream, 1);
+		else
+			return -EINVAL;
+	}
+
+	if (twl4030->configured)
+		/* Ignoring hw_params for already configured DAI */
 		return 0;
 
 	/* bit rate */
@@ -1363,6 +1704,21 @@
 		/* set CODECPDZ afterwards */
 		twl4030_codec_enable(codec, 1);
 	}
+
+	/* Store the important parameters for the DAI configuration and set
+	 * the DAI as configured */
+	twl4030->configured = 1;
+	twl4030->rate = params_rate(params);
+	twl4030->sample_bits = hw_param_interval(params,
+					SNDRV_PCM_HW_PARAM_SAMPLE_BITS)->min;
+	twl4030->channels = params_channels(params);
+
+	/* If both playback and capture streams are open, and one of them
+	 * is setting the hw parameters right now (since we are here), set
+	 * constraints to the other stream to match the current one. */
+	if (twl4030->slave_substream)
+		twl4030_constraints(twl4030, substream);
+
 	return 0;
 }
 
@@ -1370,17 +1726,21 @@
 		int clk_id, unsigned int freq, int dir)
 {
 	struct snd_soc_codec *codec = codec_dai->codec;
+	struct twl4030_priv *twl4030 = codec->private_data;
 	u8 infreq;
 
 	switch (freq) {
 	case 19200000:
 		infreq = TWL4030_APLL_INFREQ_19200KHZ;
+		twl4030->sysclk = 19200;
 		break;
 	case 26000000:
 		infreq = TWL4030_APLL_INFREQ_26000KHZ;
+		twl4030->sysclk = 26000;
 		break;
 	case 38400000:
 		infreq = TWL4030_APLL_INFREQ_38400KHZ;
+		twl4030->sysclk = 38400;
 		break;
 	default:
 		printk(KERN_ERR "TWL4030 set sysclk: unknown rate %d\n",
@@ -1424,6 +1784,9 @@
 	case SND_SOC_DAIFMT_I2S:
 		format |= TWL4030_AIF_FORMAT_CODEC;
 		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		format |= TWL4030_AIF_FORMAT_TDM;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1443,6 +1806,180 @@
 	return 0;
 }
 
+/* In case of voice mode, the RX1 L(VRX) for downlink and the TX2 L/R
+ * (VTXL, VTXR) for uplink has to be enabled/disabled. */
+static void twl4030_voice_enable(struct snd_soc_codec *codec, int direction,
+				int enable)
+{
+	u8 reg, mask;
+
+	reg = twl4030_read_reg_cache(codec, TWL4030_REG_OPTION);
+
+	if (direction == SNDRV_PCM_STREAM_PLAYBACK)
+		mask = TWL4030_ARXL1_VRX_EN;
+	else
+		mask = TWL4030_ATXL2_VTXL_EN | TWL4030_ATXR2_VTXR_EN;
+
+	if (enable)
+		reg |= mask;
+	else
+		reg &= ~mask;
+
+	twl4030_write(codec, TWL4030_REG_OPTION, reg);
+}
+
+static int twl4030_voice_startup(struct snd_pcm_substream *substream,
+		struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_codec *codec = socdev->card->codec;
+	u8 infreq;
+	u8 mode;
+
+	/* If the system master clock is not 26MHz, the voice PCM interface is
+	 * not avilable.
+	 */
+	infreq = twl4030_read_reg_cache(codec, TWL4030_REG_APLL_CTL)
+		& TWL4030_APLL_INFREQ;
+
+	if (infreq != TWL4030_APLL_INFREQ_26000KHZ) {
+		printk(KERN_ERR "TWL4030 voice startup: "
+			"MCLK is not 26MHz, call set_sysclk() on init\n");
+		return -EINVAL;
+	}
+
+	/* If the codec mode is not option2, the voice PCM interface is not
+	 * avilable.
+	 */
+	mode = twl4030_read_reg_cache(codec, TWL4030_REG_CODEC_MODE)
+		& TWL4030_OPT_MODE;
+
+	if (mode != TWL4030_OPTION_2) {
+		printk(KERN_ERR "TWL4030 voice startup: "
+			"the codec mode is not option2\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void twl4030_voice_shutdown(struct snd_pcm_substream *substream,
+				struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_codec *codec = socdev->card->codec;
+
+	/* Enable voice digital filters */
+	twl4030_voice_enable(codec, substream->stream, 0);
+}
+
+static int twl4030_voice_hw_params(struct snd_pcm_substream *substream,
+		struct snd_pcm_hw_params *params, struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_codec *codec = socdev->card->codec;
+	u8 old_mode, mode;
+
+	/* Enable voice digital filters */
+	twl4030_voice_enable(codec, substream->stream, 1);
+
+	/* bit rate */
+	old_mode = twl4030_read_reg_cache(codec, TWL4030_REG_CODEC_MODE)
+		& ~(TWL4030_CODECPDZ);
+	mode = old_mode;
+
+	switch (params_rate(params)) {
+	case 8000:
+		mode &= ~(TWL4030_SEL_16K);
+		break;
+	case 16000:
+		mode |= TWL4030_SEL_16K;
+		break;
+	default:
+		printk(KERN_ERR "TWL4030 voice hw params: unknown rate %d\n",
+			params_rate(params));
+		return -EINVAL;
+	}
+
+	if (mode != old_mode) {
+		/* change rate and set CODECPDZ */
+		twl4030_codec_enable(codec, 0);
+		twl4030_write(codec, TWL4030_REG_CODEC_MODE, mode);
+		twl4030_codec_enable(codec, 1);
+	}
+
+	return 0;
+}
+
+static int twl4030_voice_set_dai_sysclk(struct snd_soc_dai *codec_dai,
+		int clk_id, unsigned int freq, int dir)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u8 infreq;
+
+	switch (freq) {
+	case 26000000:
+		infreq = TWL4030_APLL_INFREQ_26000KHZ;
+		break;
+	default:
+		printk(KERN_ERR "TWL4030 voice set sysclk: unknown rate %d\n",
+			freq);
+		return -EINVAL;
+	}
+
+	infreq |= TWL4030_APLL_EN;
+	twl4030_write(codec, TWL4030_REG_APLL_CTL, infreq);
+
+	return 0;
+}
+
+static int twl4030_voice_set_dai_fmt(struct snd_soc_dai *codec_dai,
+		unsigned int fmt)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u8 old_format, format;
+
+	/* get format */
+	old_format = twl4030_read_reg_cache(codec, TWL4030_REG_VOICE_IF);
+	format = old_format;
+
+	/* set master/slave audio interface */
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBS_CFM:
+		format &= ~(TWL4030_VIF_SLAVE_EN);
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		format |= TWL4030_VIF_SLAVE_EN;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* clock inversion */
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_IB_NF:
+		format &= ~(TWL4030_VIF_FORMAT);
+		break;
+	case SND_SOC_DAIFMT_NB_IF:
+		format |= TWL4030_VIF_FORMAT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (format != old_format) {
+		/* change format and set CODECPDZ */
+		twl4030_codec_enable(codec, 0);
+		twl4030_write(codec, TWL4030_REG_VOICE_IF, format);
+		twl4030_codec_enable(codec, 1);
+	}
+
+	return 0;
+}
+
 #define TWL4030_RATES	 (SNDRV_PCM_RATE_8000_48000)
 #define TWL4030_FORMATS	 (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FORMAT_S24_LE)
 
@@ -1454,21 +1991,47 @@
 	.set_fmt	= twl4030_set_dai_fmt,
 };
 
-struct snd_soc_dai twl4030_dai = {
+static struct snd_soc_dai_ops twl4030_dai_voice_ops = {
+	.startup	= twl4030_voice_startup,
+	.shutdown	= twl4030_voice_shutdown,
+	.hw_params	= twl4030_voice_hw_params,
+	.set_sysclk	= twl4030_voice_set_dai_sysclk,
+	.set_fmt	= twl4030_voice_set_dai_fmt,
+};
+
+struct snd_soc_dai twl4030_dai[] = {
+{
 	.name = "twl4030",
 	.playback = {
-		.stream_name = "Playback",
+		.stream_name = "HiFi Playback",
 		.channels_min = 2,
-		.channels_max = 2,
+		.channels_max = 4,
 		.rates = TWL4030_RATES | SNDRV_PCM_RATE_96000,
 		.formats = TWL4030_FORMATS,},
 	.capture = {
 		.stream_name = "Capture",
 		.channels_min = 2,
-		.channels_max = 2,
+		.channels_max = 4,
 		.rates = TWL4030_RATES,
 		.formats = TWL4030_FORMATS,},
 	.ops = &twl4030_dai_ops,
+},
+{
+	.name = "twl4030 Voice",
+	.playback = {
+		.stream_name = "Voice Playback",
+		.channels_min = 1,
+		.channels_max = 1,
+		.rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000,
+		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+	.capture = {
+		.stream_name = "Capture",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000,
+		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+	.ops = &twl4030_dai_voice_ops,
+},
 };
 EXPORT_SYMBOL_GPL(twl4030_dai);
 
@@ -1500,6 +2063,8 @@
 static int twl4030_init(struct snd_soc_device *socdev)
 {
 	struct snd_soc_codec *codec = socdev->card->codec;
+	struct twl4030_setup_data *setup = socdev->codec_data;
+	struct twl4030_priv *twl4030 = codec->private_data;
 	int ret = 0;
 
 	printk(KERN_INFO "TWL4030 Audio Codec init \n");
@@ -1509,14 +2074,31 @@
 	codec->read = twl4030_read_reg_cache;
 	codec->write = twl4030_write;
 	codec->set_bias_level = twl4030_set_bias_level;
-	codec->dai = &twl4030_dai;
-	codec->num_dai = 1;
+	codec->dai = twl4030_dai;
+	codec->num_dai = ARRAY_SIZE(twl4030_dai),
 	codec->reg_cache_size = sizeof(twl4030_reg);
 	codec->reg_cache = kmemdup(twl4030_reg, sizeof(twl4030_reg),
 					GFP_KERNEL);
 	if (codec->reg_cache == NULL)
 		return -ENOMEM;
 
+	/* Configuration for headset ramp delay from setup data */
+	if (setup) {
+		unsigned char hs_pop;
+
+		if (setup->sysclk)
+			twl4030->sysclk = setup->sysclk;
+		else
+			twl4030->sysclk = 26000;
+
+		hs_pop = twl4030_read_reg_cache(codec, TWL4030_REG_HS_POPN_SET);
+		hs_pop &= ~TWL4030_RAMP_DELAY;
+		hs_pop |= (setup->ramp_delay_value << 2);
+		twl4030_write_reg_cache(codec, TWL4030_REG_HS_POPN_SET, hs_pop);
+	} else {
+		twl4030->sysclk = 26000;
+	}
+
 	/* register pcms */
 	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
 	if (ret < 0) {
@@ -1604,13 +2186,13 @@
 
 static int __init twl4030_modinit(void)
 {
-	return snd_soc_register_dai(&twl4030_dai);
+	return snd_soc_register_dais(&twl4030_dai[0], ARRAY_SIZE(twl4030_dai));
 }
 module_init(twl4030_modinit);
 
 static void __exit twl4030_exit(void)
 {
-	snd_soc_unregister_dai(&twl4030_dai);
+	snd_soc_unregister_dais(&twl4030_dai[0], ARRAY_SIZE(twl4030_dai));
 }
 module_exit(twl4030_exit);
 
diff --git a/sound/soc/codecs/twl4030.h b/sound/soc/codecs/twl4030.h
index cb63765..fe5f395 100644
--- a/sound/soc/codecs/twl4030.h
+++ b/sound/soc/codecs/twl4030.h
@@ -92,8 +92,9 @@
 #define TWL4030_REG_VIBRA_PWM_SET	0x47
 #define TWL4030_REG_ANAMIC_GAIN		0x48
 #define TWL4030_REG_MISC_SET_2		0x49
+#define TWL4030_REG_SW_SHADOW		0x4A
 
-#define TWL4030_CACHEREGNUM	(TWL4030_REG_MISC_SET_2 + 1)
+#define TWL4030_CACHEREGNUM	(TWL4030_REG_SW_SHADOW + 1)
 
 /* Bitfield Definitions */
 
@@ -110,9 +111,22 @@
 #define TWL4030_APLL_RATE_44100		0x90
 #define TWL4030_APLL_RATE_48000		0xA0
 #define TWL4030_APLL_RATE_96000		0xE0
-#define TWL4030_SEL_16K			0x04
+#define TWL4030_SEL_16K			0x08
 #define TWL4030_CODECPDZ		0x02
 #define TWL4030_OPT_MODE		0x01
+#define TWL4030_OPTION_1		(1 << 0)
+#define TWL4030_OPTION_2		(0 << 0)
+
+/* TWL4030_OPTION (0x02) Fields */
+
+#define TWL4030_ATXL1_EN		(1 << 0)
+#define TWL4030_ATXR1_EN		(1 << 1)
+#define TWL4030_ATXL2_VTXL_EN		(1 << 2)
+#define TWL4030_ATXR2_VTXR_EN		(1 << 3)
+#define TWL4030_ARXL1_VRX_EN		(1 << 4)
+#define TWL4030_ARXR1_EN		(1 << 5)
+#define TWL4030_ARXL2_EN		(1 << 6)
+#define TWL4030_ARXR2_EN		(1 << 7)
 
 /* TWL4030_REG_MICBIAS_CTL (0x04) Fields */
 
@@ -171,6 +185,17 @@
 #define TWL4030_CLK256FS_EN		0x02
 #define TWL4030_AIF_EN			0x01
 
+/* VOICE_IF (0x0F) Fields */
+
+#define TWL4030_VIF_SLAVE_EN		0x80
+#define TWL4030_VIF_DIN_EN		0x40
+#define TWL4030_VIF_DOUT_EN		0x20
+#define TWL4030_VIF_SWAP		0x10
+#define TWL4030_VIF_FORMAT		0x08
+#define TWL4030_VIF_TRI_EN		0x04
+#define TWL4030_VIF_SUB_EN		0x02
+#define TWL4030_VIF_EN			0x01
+
 /* EAR_CTL (0x21) */
 #define TWL4030_EAR_GAIN		0x30
 
@@ -236,7 +261,19 @@
 #define TWL4030_SMOOTH_ANAVOL_EN	0x02
 #define TWL4030_DIGMIC_LR_SWAP_EN	0x01
 
-extern struct snd_soc_dai twl4030_dai;
+/* TWL4030_REG_SW_SHADOW (0x4A) Fields */
+#define TWL4030_HFL_EN			0x01
+#define TWL4030_HFR_EN			0x02
+
+#define TWL4030_DAI_HIFI		0
+#define TWL4030_DAI_VOICE		1
+
+extern struct snd_soc_dai twl4030_dai[2];
 extern struct snd_soc_codec_device soc_codec_dev_twl4030;
 
+struct twl4030_setup_data {
+	unsigned int ramp_delay_value;
+	unsigned int sysclk;
+};
+
 #endif	/* End of __TWL4030_AUDIO_H__ */
diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c
index ddefb8f..269b108 100644
--- a/sound/soc/codecs/uda134x.c
+++ b/sound/soc/codecs/uda134x.c
@@ -101,7 +101,7 @@
 	pr_debug("%s reg: %02X, value:%02X\n", __func__, reg, value);
 
 	if (reg >= UDA134X_REGS_NUM) {
-		printk(KERN_ERR "%s unkown register: reg: %d",
+		printk(KERN_ERR "%s unkown register: reg: %u",
 		       __func__, reg);
 		return -EINVAL;
 	}
@@ -296,7 +296,7 @@
 	struct snd_soc_codec *codec = codec_dai->codec;
 	struct uda134x_priv *uda134x = codec->private_data;
 
-	pr_debug("%s clk_id: %d, freq: %d, dir: %d\n", __func__,
+	pr_debug("%s clk_id: %d, freq: %u, dir: %d\n", __func__,
 		 clk_id, freq, dir);
 
 	/* Anything between 256fs*8Khz and 512fs*48Khz should be acceptable
diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c
index 0275321..e7348d3 100644
--- a/sound/soc/codecs/wm8350.c
+++ b/sound/soc/codecs/wm8350.c
@@ -1108,7 +1108,7 @@
 	if (ret < 0)
 		return ret;
 	dev_dbg(wm8350->dev,
-		"FLL in %d FLL out %d N 0x%x K 0x%x div %d ratio %d",
+		"FLL in %u FLL out %u N 0x%x K 0x%x div %d ratio %d",
 		freq_in, freq_out, fll_div.n, fll_div.k, fll_div.div,
 		fll_div.ratio);
 
diff --git a/sound/soc/codecs/wm8350.h b/sound/soc/codecs/wm8350.h
index d11bd92..d088eb4 100644
--- a/sound/soc/codecs/wm8350.h
+++ b/sound/soc/codecs/wm8350.h
@@ -13,6 +13,7 @@
 #define _WM8350_H
 
 #include <sound/soc.h>
+#include <linux/mfd/wm8350/audio.h>
 
 extern struct snd_soc_dai wm8350_dai;
 extern struct snd_soc_codec_device soc_codec_dev_wm8350;
diff --git a/sound/soc/codecs/wm8400.c b/sound/soc/codecs/wm8400.c
index 510efa6..502eefa 100644
--- a/sound/soc/codecs/wm8400.c
+++ b/sound/soc/codecs/wm8400.c
@@ -954,7 +954,7 @@
 		factors->outdiv *= 2;
 		if (factors->outdiv > 32) {
 			dev_err(wm8400->wm8400->dev,
-				"Unsupported FLL output frequency %dHz\n",
+				"Unsupported FLL output frequency %uHz\n",
 				Fout);
 			return -EINVAL;
 		}
@@ -1003,7 +1003,7 @@
 	factors->k = K / 10;
 
 	dev_dbg(wm8400->wm8400->dev,
-		"FLL: Fref=%d Fout=%d N=%x K=%x, FRATIO=%x OUTDIV=%x\n",
+		"FLL: Fref=%u Fout=%u N=%x K=%x, FRATIO=%x OUTDIV=%x\n",
 		Fref, Fout,
 		factors->n, factors->k, factors->fratio, factors->outdiv);
 
@@ -1473,8 +1473,8 @@
 
 	codec = &priv->codec;
 	codec->private_data = priv;
-	codec->control_data = dev->dev.driver_data;
-	priv->wm8400 = dev->dev.driver_data;
+	codec->control_data = dev_get_drvdata(&dev->dev);
+	priv->wm8400 = dev_get_drvdata(&dev->dev);
 
 	ret = regulator_bulk_get(priv->wm8400->dev,
 				 ARRAY_SIZE(power), &power[0]);
diff --git a/sound/soc/codecs/wm8510.c b/sound/soc/codecs/wm8510.c
index 6a4cea0..c8b8dba 100644
--- a/sound/soc/codecs/wm8510.c
+++ b/sound/soc/codecs/wm8510.c
@@ -298,7 +298,7 @@
 
 	if ((Ndiv < 6) || (Ndiv > 12))
 		printk(KERN_WARNING
-			"WM8510 N value %d outwith recommended range!d\n",
+			"WM8510 N value %u outwith recommended range!d\n",
 			Ndiv);
 
 	pll_div.n = Ndiv;
diff --git a/sound/soc/codecs/wm8580.c b/sound/soc/codecs/wm8580.c
index 9f6be3d..86c4b24d 100644
--- a/sound/soc/codecs/wm8580.c
+++ b/sound/soc/codecs/wm8580.c
@@ -415,7 +415,7 @@
 	unsigned int K, Ndiv, Nmod;
 	int i;
 
-	pr_debug("wm8580: PLL %dHz->%dHz\n", source, target);
+	pr_debug("wm8580: PLL %uHz->%uHz\n", source, target);
 
 	/* Scale the output frequency up; the PLL should run in the
 	 * region of 90-100MHz.
@@ -447,7 +447,7 @@
 
 	if ((Ndiv < 5) || (Ndiv > 13)) {
 		printk(KERN_ERR
-			"WM8580 N=%d outside supported range\n", Ndiv);
+			"WM8580 N=%u outside supported range\n", Ndiv);
 		return -EINVAL;
 	}
 
diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c
index e043e3f..7a20587 100644
--- a/sound/soc/codecs/wm8731.c
+++ b/sound/soc/codecs/wm8731.c
@@ -666,14 +666,14 @@
 	codec->hw_write = (hw_write_t)wm8731_spi_write;
 	codec->dev = &spi->dev;
 
-	spi->dev.driver_data = wm8731;
+	dev_set_drvdata(&spi->dev, wm8731);
 
 	return wm8731_register(wm8731);
 }
 
 static int __devexit wm8731_spi_remove(struct spi_device *spi)
 {
-	struct wm8731_priv *wm8731 = spi->dev.driver_data;
+	struct wm8731_priv *wm8731 = dev_get_drvdata(&spi->dev);
 
 	wm8731_unregister(wm8731);
 
diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c
index a6e8f3f..d28eeac 100644
--- a/sound/soc/codecs/wm8753.c
+++ b/sound/soc/codecs/wm8753.c
@@ -703,7 +703,7 @@
 
 	if ((Ndiv < 6) || (Ndiv > 12))
 		printk(KERN_WARNING
-			"wm8753: unsupported N = %d\n", Ndiv);
+			"wm8753: unsupported N = %u\n", Ndiv);
 
 	pll_div->n = Ndiv;
 	Nmod = target % source;
@@ -1822,14 +1822,14 @@
 	codec->hw_write = (hw_write_t)wm8753_spi_write;
 	codec->dev = &spi->dev;
 
-	spi->dev.driver_data = wm8753;
+	dev_set_drvdata(&spi->dev, wm8753);
 
 	return wm8753_register(wm8753);
 }
 
 static int __devexit wm8753_spi_remove(struct spi_device *spi)
 {
-	struct wm8753_priv *wm8753 = spi->dev.driver_data;
+	struct wm8753_priv *wm8753 = dev_get_drvdata(&spi->dev);
 	wm8753_unregister(wm8753);
 	return 0;
 }
diff --git a/sound/soc/codecs/wm8900.c b/sound/soc/codecs/wm8900.c
index 46c5ea1..3c78945 100644
--- a/sound/soc/codecs/wm8900.c
+++ b/sound/soc/codecs/wm8900.c
@@ -778,11 +778,11 @@
 	}
 
 	if (target > 100000000)
-		printk(KERN_WARNING "wm8900: FLL rate %d out of range, Fref=%d"
-		       " Fout=%d\n", target, Fref, Fout);
+		printk(KERN_WARNING "wm8900: FLL rate %u out of range, Fref=%u"
+		       " Fout=%u\n", target, Fref, Fout);
 	if (div > 32) {
 		printk(KERN_ERR "wm8900: Invalid FLL division rate %u, "
-		       "Fref=%d, Fout=%d, target=%d\n",
+		       "Fref=%u, Fout=%u, target=%u\n",
 		       div, Fref, Fout, target);
 		return -EINVAL;
 	}
diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c
index 8cf571f..d8a9222 100644
--- a/sound/soc/codecs/wm8903.c
+++ b/sound/soc/codecs/wm8903.c
@@ -217,7 +217,6 @@
 	int sysclk;
 
 	/* Reference counts */
-	int charge_pump_users;
 	int class_w_users;
 	int playback_active;
 	int capture_active;
@@ -373,6 +372,15 @@
 #define WM8903_OUTPUT_INT   0x2
 #define WM8903_OUTPUT_IN    0x1
 
+static int wm8903_cp_event(struct snd_soc_dapm_widget *w,
+			   struct snd_kcontrol *kcontrol, int event)
+{
+	WARN_ON(event != SND_SOC_DAPM_POST_PMU);
+	mdelay(4);
+
+	return 0;
+}
+
 /*
  * Event for headphone and line out amplifier power changes.  Special
  * power up/down sequences are required in order to maximise pop/click
@@ -382,19 +390,20 @@
 			       struct snd_kcontrol *kcontrol, int event)
 {
 	struct snd_soc_codec *codec = w->codec;
-	struct wm8903_priv *wm8903 = codec->private_data;
-	struct i2c_client *i2c = codec->control_data;
 	u16 val;
 	u16 reg;
+	u16 dcs_reg;
+	u16 dcs_bit;
 	int shift;
-	u16 cp_reg = wm8903_read(codec, WM8903_CHARGE_PUMP_0);
 
 	switch (w->reg) {
 	case WM8903_POWER_MANAGEMENT_2:
 		reg = WM8903_ANALOGUE_HP_0;
+		dcs_bit = 0 + w->shift;
 		break;
 	case WM8903_POWER_MANAGEMENT_3:
 		reg = WM8903_ANALOGUE_LINEOUT_0;
+		dcs_bit = 2 + w->shift;
 		break;
 	default:
 		BUG();
@@ -419,18 +428,6 @@
 		/* Short the output */
 		val &= ~(WM8903_OUTPUT_SHORT << shift);
 		wm8903_write(codec, reg, val);
-
-		wm8903->charge_pump_users++;
-
-		dev_dbg(&i2c->dev, "Charge pump use count now %d\n",
-			wm8903->charge_pump_users);
-
-		if (wm8903->charge_pump_users == 1) {
-			dev_dbg(&i2c->dev, "Enabling charge pump\n");
-			wm8903_write(codec, WM8903_CHARGE_PUMP_0,
-				     cp_reg | WM8903_CP_ENA);
-			mdelay(4);
-		}
 	}
 
 	if (event & SND_SOC_DAPM_POST_PMU) {
@@ -446,6 +443,11 @@
 		val |= (WM8903_OUTPUT_OUT << shift);
 		wm8903_write(codec, reg, val);
 
+		/* Enable the DC servo */
+		dcs_reg = wm8903_read(codec, WM8903_DC_SERVO_0);
+		dcs_reg |= dcs_bit;
+		wm8903_write(codec, WM8903_DC_SERVO_0, dcs_reg);
+
 		/* Remove the short */
 		val |= (WM8903_OUTPUT_SHORT << shift);
 		wm8903_write(codec, reg, val);
@@ -458,25 +460,17 @@
 		val &= ~(WM8903_OUTPUT_SHORT << shift);
 		wm8903_write(codec, reg, val);
 
+		/* Disable the DC servo */
+		dcs_reg = wm8903_read(codec, WM8903_DC_SERVO_0);
+		dcs_reg &= ~dcs_bit;
+		wm8903_write(codec, WM8903_DC_SERVO_0, dcs_reg);
+
 		/* Then disable the intermediate and output stages */
 		val &= ~((WM8903_OUTPUT_OUT | WM8903_OUTPUT_INT |
 			  WM8903_OUTPUT_IN) << shift);
 		wm8903_write(codec, reg, val);
 	}
 
-	if (event & SND_SOC_DAPM_POST_PMD) {
-		wm8903->charge_pump_users--;
-
-		dev_dbg(&i2c->dev, "Charge pump use count now %d\n",
-			wm8903->charge_pump_users);
-
-		if (wm8903->charge_pump_users == 0) {
-			dev_dbg(&i2c->dev, "Disabling charge pump\n");
-			wm8903_write(codec, WM8903_CHARGE_PUMP_0,
-				     cp_reg & ~WM8903_CP_ENA);
-		}
-	}
-
 	return 0;
 }
 
@@ -539,6 +533,7 @@
 /* ALSA can only do steps of .01dB */
 static const DECLARE_TLV_DB_SCALE(digital_tlv, -7200, 75, 1);
 
+static const DECLARE_TLV_DB_SCALE(digital_sidetone_tlv, -3600, 300, 0);
 static const DECLARE_TLV_DB_SCALE(out_tlv, -5700, 100, 0);
 
 static const DECLARE_TLV_DB_SCALE(drc_tlv_thresh, 0, 75, 0);
@@ -657,6 +652,16 @@
 	SOC_ENUM_SINGLE(WM8903_ANALOGUE_RIGHT_INPUT_1, 4, 3, rinput_mux_text);
 
 
+static const char *sidetone_text[] = {
+	"None", "Left", "Right"
+};
+
+static const struct soc_enum lsidetone_enum =
+	SOC_ENUM_SINGLE(WM8903_DAC_DIGITAL_0, 2, 3, sidetone_text);
+
+static const struct soc_enum rsidetone_enum =
+	SOC_ENUM_SINGLE(WM8903_DAC_DIGITAL_0, 0, 3, sidetone_text);
+
 static const struct snd_kcontrol_new wm8903_snd_controls[] = {
 
 /* Input PGAs - No TLV since the scale depends on PGA mode */
@@ -700,6 +705,9 @@
 SOC_ENUM("ADC Companding Mode", adc_companding),
 SOC_SINGLE("ADC Companding Switch", WM8903_AUDIO_INTERFACE_0, 3, 1, 0),
 
+SOC_DOUBLE_TLV("Digital Sidetone Volume", WM8903_DAC_DIGITAL_0, 4, 8,
+	       12, 0, digital_sidetone_tlv),
+
 /* DAC */
 SOC_DOUBLE_R_TLV("Digital Playback Volume", WM8903_DAC_DIGITAL_VOLUME_LEFT,
 		 WM8903_DAC_DIGITAL_VOLUME_RIGHT, 1, 120, 0, digital_tlv),
@@ -762,6 +770,12 @@
 static const struct snd_kcontrol_new rinput_inv_mux =
 	SOC_DAPM_ENUM("Right Inverting Input Mux", rinput_inv_enum);
 
+static const struct snd_kcontrol_new lsidetone_mux =
+	SOC_DAPM_ENUM("DACL Sidetone Mux", lsidetone_enum);
+
+static const struct snd_kcontrol_new rsidetone_mux =
+	SOC_DAPM_ENUM("DACR Sidetone Mux", rsidetone_enum);
+
 static const struct snd_kcontrol_new left_output_mixer[] = {
 SOC_DAPM_SINGLE("DACL Switch", WM8903_ANALOGUE_LEFT_MIX_0, 3, 1, 0),
 SOC_DAPM_SINGLE("DACR Switch", WM8903_ANALOGUE_LEFT_MIX_0, 2, 1, 0),
@@ -828,6 +842,9 @@
 SND_SOC_DAPM_ADC("ADCL", "Left HiFi Capture", WM8903_POWER_MANAGEMENT_6, 1, 0),
 SND_SOC_DAPM_ADC("ADCR", "Right HiFi Capture", WM8903_POWER_MANAGEMENT_6, 0, 0),
 
+SND_SOC_DAPM_MUX("DACL Sidetone", SND_SOC_NOPM, 0, 0, &lsidetone_mux),
+SND_SOC_DAPM_MUX("DACR Sidetone", SND_SOC_NOPM, 0, 0, &rsidetone_mux),
+
 SND_SOC_DAPM_DAC("DACL", "Left Playback", WM8903_POWER_MANAGEMENT_6, 3, 0),
 SND_SOC_DAPM_DAC("DACR", "Right Playback", WM8903_POWER_MANAGEMENT_6, 2, 0),
 
@@ -844,26 +861,29 @@
 SND_SOC_DAPM_PGA_E("Left Headphone Output PGA", WM8903_POWER_MANAGEMENT_2,
 		   1, 0, NULL, 0, wm8903_output_event,
 		   SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU |
-		   SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMD),
+		   SND_SOC_DAPM_PRE_PMD),
 SND_SOC_DAPM_PGA_E("Right Headphone Output PGA", WM8903_POWER_MANAGEMENT_2,
 		   0, 0, NULL, 0, wm8903_output_event,
 		   SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU |
-		   SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMD),
+		   SND_SOC_DAPM_PRE_PMD),
 
 SND_SOC_DAPM_PGA_E("Left Line Output PGA", WM8903_POWER_MANAGEMENT_3, 1, 0,
 		   NULL, 0, wm8903_output_event,
 		   SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU |
-		   SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMD),
+		   SND_SOC_DAPM_PRE_PMD),
 SND_SOC_DAPM_PGA_E("Right Line Output PGA", WM8903_POWER_MANAGEMENT_3, 0, 0,
 		   NULL, 0, wm8903_output_event,
 		   SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU |
-		   SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMD),
+		   SND_SOC_DAPM_PRE_PMD),
 
 SND_SOC_DAPM_PGA("Left Speaker PGA", WM8903_POWER_MANAGEMENT_5, 1, 0,
 		 NULL, 0),
 SND_SOC_DAPM_PGA("Right Speaker PGA", WM8903_POWER_MANAGEMENT_5, 0, 0,
 		 NULL, 0),
 
+SND_SOC_DAPM_SUPPLY("Charge Pump", WM8903_CHARGE_PUMP_0, 0, 0,
+		    wm8903_cp_event, SND_SOC_DAPM_POST_PMU),
+SND_SOC_DAPM_SUPPLY("CLK_DSP", WM8903_CLOCK_RATES_2, 1, 0, NULL, 0),
 };
 
 static const struct snd_soc_dapm_route intercon[] = {
@@ -909,7 +929,19 @@
 	{ "Right Input PGA", NULL, "Right Input Mode Mux" },
 
 	{ "ADCL", NULL, "Left Input PGA" },
+	{ "ADCL", NULL, "CLK_DSP" },
 	{ "ADCR", NULL, "Right Input PGA" },
+	{ "ADCR", NULL, "CLK_DSP" },
+
+	{ "DACL Sidetone", "Left", "ADCL" },
+	{ "DACL Sidetone", "Right", "ADCR" },
+	{ "DACR Sidetone", "Left", "ADCL" },
+	{ "DACR Sidetone", "Right", "ADCR" },
+
+	{ "DACL", NULL, "DACL Sidetone" },
+	{ "DACL", NULL, "CLK_DSP" },
+	{ "DACR", NULL, "DACR Sidetone" },
+	{ "DACR", NULL, "CLK_DSP" },
 
 	{ "Left Output Mixer", "Left Bypass Switch", "Left Input PGA" },
 	{ "Left Output Mixer", "Right Bypass Switch", "Right Input PGA" },
@@ -951,6 +983,11 @@
 
 	{ "ROP", NULL, "Right Speaker PGA" },
 	{ "RON", NULL, "Right Speaker PGA" },
+
+	{ "Left Headphone Output PGA", NULL, "Charge Pump" },
+	{ "Right Headphone Output PGA", NULL, "Charge Pump" },
+	{ "Left Line Output PGA", NULL, "Charge Pump" },
+	{ "Right Line Output PGA", NULL, "Charge Pump" },
 };
 
 static int wm8903_add_widgets(struct snd_soc_codec *codec)
@@ -985,6 +1022,11 @@
 			wm8903_write(codec, WM8903_CLOCK_RATES_2,
 				     WM8903_CLK_SYS_ENA);
 
+			/* Change DC servo dither level in startup sequence */
+			wm8903_write(codec, WM8903_WRITE_SEQUENCER_0, 0x11);
+			wm8903_write(codec, WM8903_WRITE_SEQUENCER_1, 0x1257);
+			wm8903_write(codec, WM8903_WRITE_SEQUENCER_2, 0x2);
+
 			wm8903_run_sequence(codec, 0);
 			wm8903_sync_reg_cache(codec, codec->reg_cache);
 
@@ -1277,14 +1319,8 @@
 	if (wm8903->master_substream) {
 		master_runtime = wm8903->master_substream->runtime;
 
-		dev_dbg(&i2c->dev, "Constraining to %d bits at %dHz\n",
-			master_runtime->sample_bits,
-			master_runtime->rate);
-
-		snd_pcm_hw_constraint_minmax(substream->runtime,
-					     SNDRV_PCM_HW_PARAM_RATE,
-					     master_runtime->rate,
-					     master_runtime->rate);
+		dev_dbg(&i2c->dev, "Constraining to %d bits\n",
+			master_runtime->sample_bits);
 
 		snd_pcm_hw_constraint_minmax(substream->runtime,
 					     SNDRV_PCM_HW_PARAM_SAMPLE_BITS,
@@ -1523,6 +1559,7 @@
 		 .formats = WM8903_FORMATS,
 	 },
 	.ops = &wm8903_dai_ops,
+	.symmetric_rates = 1,
 };
 EXPORT_SYMBOL_GPL(wm8903_dai);
 
diff --git a/sound/soc/codecs/wm8940.c b/sound/soc/codecs/wm8940.c
new file mode 100644
index 0000000..b8e17d6
--- /dev/null
+++ b/sound/soc/codecs/wm8940.c
@@ -0,0 +1,955 @@
+/*
+ * wm8940.c  --  WM8940 ALSA Soc Audio driver
+ *
+ * Author: Jonathan Cameron <jic23@cam.ac.uk>
+ *
+ * Based on wm8510.c
+ *    Copyright  2006 Wolfson Microelectronics PLC.
+ *    Author:  Liam Girdwood <lrg@slimlogic.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Not currently handled:
+ * Notch filter control
+ * AUXMode (inverting vs mixer)
+ * No means to obtain current gain if alc enabled.
+ * No use made of gpio
+ * Fast VMID discharge for power down
+ * Soft Start
+ * DLR and ALR Swaps not enabled
+ * Digital Sidetone not supported
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/initval.h>
+#include <sound/tlv.h>
+
+#include "wm8940.h"
+
+struct wm8940_priv {
+	unsigned int sysclk;
+	u16 reg_cache[WM8940_CACHEREGNUM];
+	struct snd_soc_codec codec;
+};
+
+static u16 wm8940_reg_defaults[] = {
+	0x8940, /* Soft Reset */
+	0x0000, /* Power 1 */
+	0x0000, /* Power 2 */
+	0x0000, /* Power 3 */
+	0x0010, /* Interface Control */
+	0x0000, /* Companding Control */
+	0x0140, /* Clock Control */
+	0x0000, /* Additional Controls */
+	0x0000, /* GPIO Control */
+	0x0002, /* Auto Increment Control */
+	0x0000, /* DAC Control */
+	0x00FF, /* DAC Volume */
+	0,
+	0,
+	0x0100, /* ADC Control */
+	0x00FF, /* ADC Volume */
+	0x0000, /* Notch Filter 1 Control 1 */
+	0x0000, /* Notch Filter 1 Control 2 */
+	0x0000, /* Notch Filter 2 Control 1 */
+	0x0000, /* Notch Filter 2 Control 2 */
+	0x0000, /* Notch Filter 3 Control 1 */
+	0x0000, /* Notch Filter 3 Control 2 */
+	0x0000, /* Notch Filter 4 Control 1 */
+	0x0000, /* Notch Filter 4 Control 2 */
+	0x0032, /* DAC Limit Control 1 */
+	0x0000, /* DAC Limit Control 2 */
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0x0038, /* ALC Control 1 */
+	0x000B, /* ALC Control 2 */
+	0x0032, /* ALC Control 3 */
+	0x0000, /* Noise Gate */
+	0x0041, /* PLLN */
+	0x000C, /* PLLK1 */
+	0x0093, /* PLLK2 */
+	0x00E9, /* PLLK3 */
+	0,
+	0,
+	0x0030, /* ALC Control 4 */
+	0,
+	0x0002, /* Input Control */
+	0x0050, /* PGA Gain */
+	0,
+	0x0002, /* ADC Boost Control */
+	0,
+	0x0002, /* Output Control */
+	0x0000, /* Speaker Mixer Control */
+	0,
+	0,
+	0,
+	0x0079, /* Speaker Volume */
+	0,
+	0x0000, /* Mono Mixer Control */
+};
+
+static inline unsigned int wm8940_read_reg_cache(struct snd_soc_codec *codec,
+						 unsigned int reg)
+{
+	u16 *cache = codec->reg_cache;
+
+	if (reg >= ARRAY_SIZE(wm8940_reg_defaults))
+		return -1;
+
+	return cache[reg];
+}
+
+static inline int wm8940_write_reg_cache(struct snd_soc_codec *codec,
+					  u16 reg, unsigned int value)
+{
+	u16 *cache = codec->reg_cache;
+
+	if (reg >= ARRAY_SIZE(wm8940_reg_defaults))
+		return -1;
+
+	cache[reg] = value;
+
+	return 0;
+}
+
+static int wm8940_write(struct snd_soc_codec *codec, unsigned int reg,
+			unsigned int value)
+{
+	int ret;
+	u8 data[3] = { reg,
+		       (value & 0xff00) >> 8,
+		       (value & 0x00ff)
+	};
+
+	wm8940_write_reg_cache(codec, reg, value);
+
+	ret = codec->hw_write(codec->control_data, data, 3);
+
+	if (ret < 0)
+		return ret;
+	else if (ret != 3)
+		return -EIO;
+	return 0;
+}
+
+static const char *wm8940_companding[] = { "Off", "NC", "u-law", "A-law" };
+static const struct soc_enum wm8940_adc_companding_enum
+= SOC_ENUM_SINGLE(WM8940_COMPANDINGCTL, 1, 4, wm8940_companding);
+static const struct soc_enum wm8940_dac_companding_enum
+= SOC_ENUM_SINGLE(WM8940_COMPANDINGCTL, 3, 4, wm8940_companding);
+
+static const char *wm8940_alc_mode_text[] = {"ALC", "Limiter"};
+static const struct soc_enum wm8940_alc_mode_enum
+= SOC_ENUM_SINGLE(WM8940_ALC3, 8, 2, wm8940_alc_mode_text);
+
+static const char *wm8940_mic_bias_level_text[] = {"0.9", "0.65"};
+static const struct soc_enum wm8940_mic_bias_level_enum
+= SOC_ENUM_SINGLE(WM8940_INPUTCTL, 8, 2, wm8940_mic_bias_level_text);
+
+static const char *wm8940_filter_mode_text[] = {"Audio", "Application"};
+static const struct soc_enum wm8940_filter_mode_enum
+= SOC_ENUM_SINGLE(WM8940_ADC, 7, 2, wm8940_filter_mode_text);
+
+static DECLARE_TLV_DB_SCALE(wm8940_spk_vol_tlv, -5700, 100, 1);
+static DECLARE_TLV_DB_SCALE(wm8940_att_tlv, -1000, 1000, 0);
+static DECLARE_TLV_DB_SCALE(wm8940_pga_vol_tlv, -1200, 75, 0);
+static DECLARE_TLV_DB_SCALE(wm8940_alc_min_tlv, -1200, 600, 0);
+static DECLARE_TLV_DB_SCALE(wm8940_alc_max_tlv, 675, 600, 0);
+static DECLARE_TLV_DB_SCALE(wm8940_alc_tar_tlv, -2250, 50, 0);
+static DECLARE_TLV_DB_SCALE(wm8940_lim_boost_tlv, 0, 100, 0);
+static DECLARE_TLV_DB_SCALE(wm8940_lim_thresh_tlv, -600, 100, 0);
+static DECLARE_TLV_DB_SCALE(wm8940_adc_tlv, -12750, 50, 1);
+static DECLARE_TLV_DB_SCALE(wm8940_capture_boost_vol_tlv, 0, 2000, 0);
+
+static const struct snd_kcontrol_new wm8940_snd_controls[] = {
+	SOC_SINGLE("Digital Loopback Switch", WM8940_COMPANDINGCTL,
+		   6, 1, 0),
+	SOC_ENUM("DAC Companding", wm8940_dac_companding_enum),
+	SOC_ENUM("ADC Companding", wm8940_adc_companding_enum),
+
+	SOC_ENUM("ALC Mode", wm8940_alc_mode_enum),
+	SOC_SINGLE("ALC Switch", WM8940_ALC1, 8, 1, 0),
+	SOC_SINGLE_TLV("ALC Capture Max Gain", WM8940_ALC1,
+		       3, 7, 1, wm8940_alc_max_tlv),
+	SOC_SINGLE_TLV("ALC Capture Min Gain", WM8940_ALC1,
+		       0, 7, 0, wm8940_alc_min_tlv),
+	SOC_SINGLE_TLV("ALC Capture Target", WM8940_ALC2,
+		       0, 14, 0, wm8940_alc_tar_tlv),
+	SOC_SINGLE("ALC Capture Hold", WM8940_ALC2, 4, 10, 0),
+	SOC_SINGLE("ALC Capture Decay", WM8940_ALC3, 4, 10, 0),
+	SOC_SINGLE("ALC Capture Attach", WM8940_ALC3, 0, 10, 0),
+	SOC_SINGLE("ALC ZC Switch", WM8940_ALC4, 1, 1, 0),
+	SOC_SINGLE("ALC Capture Noise Gate Switch", WM8940_NOISEGATE,
+		   3, 1, 0),
+	SOC_SINGLE("ALC Capture Noise Gate Threshold", WM8940_NOISEGATE,
+		   0, 7, 0),
+
+	SOC_SINGLE("DAC Playback Limiter Switch", WM8940_DACLIM1, 8, 1, 0),
+	SOC_SINGLE("DAC Playback Limiter Attack", WM8940_DACLIM1, 0, 9, 0),
+	SOC_SINGLE("DAC Playback Limiter Decay", WM8940_DACLIM1, 4, 11, 0),
+	SOC_SINGLE_TLV("DAC Playback Limiter Threshold", WM8940_DACLIM2,
+		       4, 9, 1, wm8940_lim_thresh_tlv),
+	SOC_SINGLE_TLV("DAC Playback Limiter Boost", WM8940_DACLIM2,
+		       0, 12, 0, wm8940_lim_boost_tlv),
+
+	SOC_SINGLE("Capture PGA ZC Switch", WM8940_PGAGAIN, 7, 1, 0),
+	SOC_SINGLE_TLV("Capture PGA Volume", WM8940_PGAGAIN,
+		       0, 63, 0, wm8940_pga_vol_tlv),
+	SOC_SINGLE_TLV("Digital Playback Volume", WM8940_DACVOL,
+		       0, 255, 0, wm8940_adc_tlv),
+	SOC_SINGLE_TLV("Digital Capture Volume", WM8940_ADCVOL,
+		       0, 255, 0, wm8940_adc_tlv),
+	SOC_ENUM("Mic Bias Level", wm8940_mic_bias_level_enum),
+	SOC_SINGLE_TLV("Capture Boost Volue", WM8940_ADCBOOST,
+		       8, 1, 0, wm8940_capture_boost_vol_tlv),
+	SOC_SINGLE_TLV("Speaker Playback Volume", WM8940_SPKVOL,
+		       0, 63, 0, wm8940_spk_vol_tlv),
+	SOC_SINGLE("Speaker Playback Switch", WM8940_SPKVOL,  6, 1, 1),
+
+	SOC_SINGLE_TLV("Speaker Mixer Line Bypass Volume", WM8940_SPKVOL,
+		       8, 1, 1, wm8940_att_tlv),
+	SOC_SINGLE("Speaker Playback ZC Switch", WM8940_SPKVOL, 7, 1, 0),
+
+	SOC_SINGLE("Mono Out Switch", WM8940_MONOMIX, 6, 1, 1),
+	SOC_SINGLE_TLV("Mono Mixer Line Bypass Volume", WM8940_MONOMIX,
+		       7, 1, 1, wm8940_att_tlv),
+
+	SOC_SINGLE("High Pass Filter Switch", WM8940_ADC, 8, 1, 0),
+	SOC_ENUM("High Pass Filter Mode", wm8940_filter_mode_enum),
+	SOC_SINGLE("High Pass Filter Cut Off", WM8940_ADC, 4, 7, 0),
+	SOC_SINGLE("ADC Inversion Switch", WM8940_ADC, 0, 1, 0),
+	SOC_SINGLE("DAC Inversion Switch", WM8940_DAC, 0, 1, 0),
+	SOC_SINGLE("DAC Auto Mute Switch", WM8940_DAC, 2, 1, 0),
+	SOC_SINGLE("ZC Timeout Clock Switch", WM8940_ADDCNTRL, 0, 1, 0),
+};
+
+static const struct snd_kcontrol_new wm8940_speaker_mixer_controls[] = {
+	SOC_DAPM_SINGLE("Line Bypass Switch", WM8940_SPKMIX, 1, 1, 0),
+	SOC_DAPM_SINGLE("Aux Playback Switch", WM8940_SPKMIX, 5, 1, 0),
+	SOC_DAPM_SINGLE("PCM Playback Switch", WM8940_SPKMIX, 0, 1, 0),
+};
+
+static const struct snd_kcontrol_new wm8940_mono_mixer_controls[] = {
+	SOC_DAPM_SINGLE("Line Bypass Switch", WM8940_MONOMIX, 1, 1, 0),
+	SOC_DAPM_SINGLE("Aux Playback Switch", WM8940_MONOMIX, 2, 1, 0),
+	SOC_DAPM_SINGLE("PCM Playback Switch", WM8940_MONOMIX, 0, 1, 0),
+};
+
+static DECLARE_TLV_DB_SCALE(wm8940_boost_vol_tlv, -1500, 300, 1);
+static const struct snd_kcontrol_new wm8940_input_boost_controls[] = {
+	SOC_DAPM_SINGLE("Mic PGA Switch", WM8940_PGAGAIN, 6, 1, 1),
+	SOC_DAPM_SINGLE_TLV("Aux Volume", WM8940_ADCBOOST,
+			    0, 7, 0, wm8940_boost_vol_tlv),
+	SOC_DAPM_SINGLE_TLV("Mic Volume", WM8940_ADCBOOST,
+			    4, 7, 0, wm8940_boost_vol_tlv),
+};
+
+static const struct snd_kcontrol_new wm8940_micpga_controls[] = {
+	SOC_DAPM_SINGLE("AUX Switch", WM8940_INPUTCTL, 2, 1, 0),
+	SOC_DAPM_SINGLE("MICP Switch", WM8940_INPUTCTL, 0, 1, 0),
+	SOC_DAPM_SINGLE("MICN Switch", WM8940_INPUTCTL, 1, 1, 0),
+};
+
+static const struct snd_soc_dapm_widget wm8940_dapm_widgets[] = {
+	SND_SOC_DAPM_MIXER("Speaker Mixer", WM8940_POWER3, 2, 0,
+			   &wm8940_speaker_mixer_controls[0],
+			   ARRAY_SIZE(wm8940_speaker_mixer_controls)),
+	SND_SOC_DAPM_MIXER("Mono Mixer", WM8940_POWER3, 3, 0,
+			   &wm8940_mono_mixer_controls[0],
+			   ARRAY_SIZE(wm8940_mono_mixer_controls)),
+	SND_SOC_DAPM_DAC("DAC", "HiFi Playback", WM8940_POWER3, 0, 0),
+
+	SND_SOC_DAPM_PGA("SpkN Out", WM8940_POWER3, 5, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("SpkP Out", WM8940_POWER3, 6, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("Mono Out", WM8940_POWER3, 7, 0, NULL, 0),
+	SND_SOC_DAPM_OUTPUT("MONOOUT"),
+	SND_SOC_DAPM_OUTPUT("SPKOUTP"),
+	SND_SOC_DAPM_OUTPUT("SPKOUTN"),
+
+	SND_SOC_DAPM_PGA("Aux Input", WM8940_POWER1, 6, 0, NULL, 0),
+	SND_SOC_DAPM_ADC("ADC", "HiFi Capture", WM8940_POWER2, 0, 0),
+	SND_SOC_DAPM_MIXER("Mic PGA", WM8940_POWER2, 2, 0,
+			   &wm8940_micpga_controls[0],
+			   ARRAY_SIZE(wm8940_micpga_controls)),
+	SND_SOC_DAPM_MIXER("Boost Mixer", WM8940_POWER2, 4, 0,
+			   &wm8940_input_boost_controls[0],
+			   ARRAY_SIZE(wm8940_input_boost_controls)),
+	SND_SOC_DAPM_MICBIAS("Mic Bias", WM8940_POWER1, 4, 0),
+
+	SND_SOC_DAPM_INPUT("MICN"),
+	SND_SOC_DAPM_INPUT("MICP"),
+	SND_SOC_DAPM_INPUT("AUX"),
+};
+
+static const struct snd_soc_dapm_route audio_map[] = {
+	/* Mono output mixer */
+	{"Mono Mixer", "PCM Playback Switch", "DAC"},
+	{"Mono Mixer", "Aux Playback Switch", "Aux Input"},
+	{"Mono Mixer", "Line Bypass Switch", "Boost Mixer"},
+
+	/* Speaker output mixer */
+	{"Speaker Mixer", "PCM Playback Switch", "DAC"},
+	{"Speaker Mixer", "Aux Playback Switch", "Aux Input"},
+	{"Speaker Mixer", "Line Bypass Switch", "Boost Mixer"},
+
+	/* Outputs */
+	{"Mono Out", NULL, "Mono Mixer"},
+	{"MONOOUT", NULL, "Mono Out"},
+	{"SpkN Out", NULL, "Speaker Mixer"},
+	{"SpkP Out", NULL, "Speaker Mixer"},
+	{"SPKOUTN", NULL, "SpkN Out"},
+	{"SPKOUTP", NULL, "SpkP Out"},
+
+	/*  Microphone PGA */
+	{"Mic PGA", "MICN Switch", "MICN"},
+	{"Mic PGA", "MICP Switch", "MICP"},
+	{"Mic PGA", "AUX Switch", "AUX"},
+
+	/* Boost Mixer */
+	{"Boost Mixer", "Mic PGA Switch", "Mic PGA"},
+	{"Boost Mixer", "Mic Volume",  "MICP"},
+	{"Boost Mixer", "Aux Volume", "Aux Input"},
+
+	{"ADC", NULL, "Boost Mixer"},
+};
+
+static int wm8940_add_widgets(struct snd_soc_codec *codec)
+{
+	int ret;
+
+	ret = snd_soc_dapm_new_controls(codec, wm8940_dapm_widgets,
+					ARRAY_SIZE(wm8940_dapm_widgets));
+	if (ret)
+		goto error_ret;
+	ret = snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map));
+	if (ret)
+		goto error_ret;
+	ret = snd_soc_dapm_new_widgets(codec);
+
+error_ret:
+	return ret;
+}
+
+#define wm8940_reset(c) wm8940_write(c, WM8940_SOFTRESET, 0);
+
+static int wm8940_set_dai_fmt(struct snd_soc_dai *codec_dai,
+			      unsigned int fmt)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 iface = wm8940_read_reg_cache(codec, WM8940_IFACE) & 0xFE67;
+	u16 clk = wm8940_read_reg_cache(codec, WM8940_CLOCK) & 0x1fe;
+
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		clk |= 1;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		break;
+	default:
+		return -EINVAL;
+	}
+	wm8940_write(codec, WM8940_CLOCK, clk);
+
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		iface |= (2 << 3);
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		iface |= (1 << 3);
+		break;
+	case SND_SOC_DAIFMT_RIGHT_J:
+		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		iface |= (3 << 3);
+		break;
+	case SND_SOC_DAIFMT_DSP_B:
+		iface |= (3 << 3) | (1 << 7);
+		break;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_NF:
+		break;
+	case SND_SOC_DAIFMT_NB_IF:
+		iface |= (1 << 7);
+		break;
+	case SND_SOC_DAIFMT_IB_NF:
+		iface |= (1 << 8);
+		break;
+	case SND_SOC_DAIFMT_IB_IF:
+		iface |= (1 << 8) | (1 << 7);
+		break;
+	}
+
+	wm8940_write(codec, WM8940_IFACE, iface);
+
+	return 0;
+}
+
+static int wm8940_i2s_hw_params(struct snd_pcm_substream *substream,
+				struct snd_pcm_hw_params *params,
+				struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_codec *codec = socdev->card->codec;
+	u16 iface = wm8940_read_reg_cache(codec, WM8940_IFACE) & 0xFD9F;
+	u16 addcntrl = wm8940_read_reg_cache(codec, WM8940_ADDCNTRL) & 0xFFF1;
+	u16 companding =  wm8940_read_reg_cache(codec,
+						WM8940_COMPANDINGCTL) & 0xFFDF;
+	int ret;
+
+	/* LoutR control */
+	if (substream->stream == SNDRV_PCM_STREAM_CAPTURE
+	    && params_channels(params) == 2)
+		iface |= (1 << 9);
+
+	switch (params_rate(params)) {
+	case SNDRV_PCM_RATE_8000:
+		addcntrl |= (0x5 << 1);
+		break;
+	case SNDRV_PCM_RATE_11025:
+		addcntrl |= (0x4 << 1);
+		break;
+	case SNDRV_PCM_RATE_16000:
+		addcntrl |= (0x3 << 1);
+		break;
+	case SNDRV_PCM_RATE_22050:
+		addcntrl |= (0x2 << 1);
+		break;
+	case SNDRV_PCM_RATE_32000:
+		addcntrl |= (0x1 << 1);
+		break;
+	case SNDRV_PCM_RATE_44100:
+	case SNDRV_PCM_RATE_48000:
+		break;
+	}
+	ret = wm8940_write(codec, WM8940_ADDCNTRL, addcntrl);
+	if (ret)
+		goto error_ret;
+
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S8:
+		companding = companding | (1 << 5);
+		break;
+	case SNDRV_PCM_FORMAT_S16_LE:
+		break;
+	case SNDRV_PCM_FORMAT_S20_3LE:
+		iface |= (1 << 5);
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		iface |= (2 << 5);
+		break;
+	case SNDRV_PCM_FORMAT_S32_LE:
+		iface |= (3 << 5);
+		break;
+	}
+	ret = wm8940_write(codec, WM8940_COMPANDINGCTL, companding);
+	if (ret)
+		goto error_ret;
+	ret = wm8940_write(codec, WM8940_IFACE, iface);
+
+error_ret:
+	return ret;
+}
+
+static int wm8940_mute(struct snd_soc_dai *dai, int mute)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	u16 mute_reg = wm8940_read_reg_cache(codec, WM8940_DAC) & 0xffbf;
+
+	if (mute)
+		mute_reg |= 0x40;
+
+	return wm8940_write(codec, WM8940_DAC, mute_reg);
+}
+
+static int wm8940_set_bias_level(struct snd_soc_codec *codec,
+				 enum snd_soc_bias_level level)
+{
+	u16 val;
+	u16 pwr_reg = wm8940_read_reg_cache(codec, WM8940_POWER1) & 0x1F0;
+	int ret = 0;
+
+	switch (level) {
+	case SND_SOC_BIAS_ON:
+		/* ensure bufioen and biasen */
+		pwr_reg |= (1 << 2) | (1 << 3);
+		/* Enable thermal shutdown */
+		val = wm8940_read_reg_cache(codec, WM8940_OUTPUTCTL);
+		ret = wm8940_write(codec, WM8940_OUTPUTCTL, val | 0x2);
+		if (ret)
+			break;
+		/* set vmid to 75k */
+		ret = wm8940_write(codec, WM8940_POWER1, pwr_reg | 0x1);
+		break;
+	case SND_SOC_BIAS_PREPARE:
+		/* ensure bufioen and biasen */
+		pwr_reg |= (1 << 2) | (1 << 3);
+		ret = wm8940_write(codec, WM8940_POWER1, pwr_reg | 0x1);
+		break;
+	case SND_SOC_BIAS_STANDBY:
+		/* ensure bufioen and biasen */
+		pwr_reg |= (1 << 2) | (1 << 3);
+		/* set vmid to 300k for standby */
+		ret = wm8940_write(codec, WM8940_POWER1, pwr_reg | 0x2);
+		break;
+	case SND_SOC_BIAS_OFF:
+		ret = wm8940_write(codec, WM8940_POWER1, pwr_reg);
+		break;
+	}
+
+	return ret;
+}
+
+struct pll_ {
+	unsigned int pre_scale:2;
+	unsigned int n:4;
+	unsigned int k;
+};
+
+static struct pll_ pll_div;
+
+/* The size in bits of the pll divide multiplied by 10
+ * to allow rounding later */
+#define FIXED_PLL_SIZE ((1 << 24) * 10)
+static void pll_factors(unsigned int target, unsigned int source)
+{
+	unsigned long long Kpart;
+	unsigned int K, Ndiv, Nmod;
+	/* The left shift ist to avoid accuracy loss when right shifting */
+	Ndiv = target / source;
+
+	if (Ndiv > 12) {
+		source <<= 1;
+		/* Multiply by 2 */
+		pll_div.pre_scale = 0;
+		Ndiv = target / source;
+	} else if (Ndiv < 3) {
+		source >>= 2;
+		/* Divide by 4 */
+		pll_div.pre_scale = 3;
+		Ndiv = target / source;
+	} else if (Ndiv < 6) {
+		source >>= 1;
+		/* divide by 2 */
+		pll_div.pre_scale = 2;
+		Ndiv = target / source;
+	} else
+		pll_div.pre_scale = 1;
+
+	if ((Ndiv < 6) || (Ndiv > 12))
+		printk(KERN_WARNING
+			"WM8940 N value %d outwith recommended range!d\n",
+			Ndiv);
+
+	pll_div.n = Ndiv;
+	Nmod = target % source;
+	Kpart = FIXED_PLL_SIZE * (long long)Nmod;
+
+	do_div(Kpart, source);
+
+	K = Kpart & 0xFFFFFFFF;
+
+	/* Check if we need to round */
+	if ((K % 10) >= 5)
+		K += 5;
+
+	/* Move down to proper range now rounding is done */
+	K /= 10;
+
+	pll_div.k = K;
+}
+
+/* Untested at the moment */
+static int wm8940_set_dai_pll(struct snd_soc_dai *codec_dai,
+		int pll_id, unsigned int freq_in, unsigned int freq_out)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 reg;
+
+	/* Turn off PLL */
+	reg = wm8940_read_reg_cache(codec, WM8940_POWER1);
+	wm8940_write(codec, WM8940_POWER1, reg & 0x1df);
+
+	if (freq_in == 0 || freq_out == 0) {
+		/* Clock CODEC directly from MCLK */
+		reg = wm8940_read_reg_cache(codec, WM8940_CLOCK);
+		wm8940_write(codec, WM8940_CLOCK, reg & 0x0ff);
+		/* Pll power down */
+		wm8940_write(codec, WM8940_PLLN, (1 << 7));
+		return 0;
+	}
+
+	/* Pll is followed by a frequency divide by 4 */
+	pll_factors(freq_out*4, freq_in);
+	if (pll_div.k)
+		wm8940_write(codec, WM8940_PLLN,
+			     (pll_div.pre_scale << 4) | pll_div.n | (1 << 6));
+	else /* No factional component */
+		wm8940_write(codec, WM8940_PLLN,
+			     (pll_div.pre_scale << 4) | pll_div.n);
+	wm8940_write(codec, WM8940_PLLK1, pll_div.k >> 18);
+	wm8940_write(codec, WM8940_PLLK2, (pll_div.k >> 9) & 0x1ff);
+	wm8940_write(codec, WM8940_PLLK3, pll_div.k & 0x1ff);
+	/* Enable the PLL */
+	reg = wm8940_read_reg_cache(codec, WM8940_POWER1);
+	wm8940_write(codec, WM8940_POWER1, reg | 0x020);
+
+	/* Run CODEC from PLL instead of MCLK */
+	reg = wm8940_read_reg_cache(codec, WM8940_CLOCK);
+	wm8940_write(codec, WM8940_CLOCK, reg | 0x100);
+
+	return 0;
+}
+
+static int wm8940_set_dai_sysclk(struct snd_soc_dai *codec_dai,
+				 int clk_id, unsigned int freq, int dir)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	struct wm8940_priv *wm8940 = codec->private_data;
+
+	switch (freq) {
+	case 11289600:
+	case 12000000:
+	case 12288000:
+	case 16934400:
+	case 18432000:
+		wm8940->sysclk = freq;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int wm8940_set_dai_clkdiv(struct snd_soc_dai *codec_dai,
+				 int div_id, int div)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 reg;
+	int ret = 0;
+
+	switch (div_id) {
+	case WM8940_BCLKDIV:
+		reg = wm8940_read_reg_cache(codec, WM8940_CLOCK) & 0xFFEF3;
+		ret = wm8940_write(codec, WM8940_CLOCK, reg | (div << 2));
+		break;
+	case WM8940_MCLKDIV:
+		reg = wm8940_read_reg_cache(codec, WM8940_CLOCK) & 0xFF1F;
+		ret = wm8940_write(codec, WM8940_CLOCK, reg | (div << 5));
+		break;
+	case WM8940_OPCLKDIV:
+		reg = wm8940_read_reg_cache(codec, WM8940_ADDCNTRL) & 0xFFCF;
+		ret = wm8940_write(codec, WM8940_ADDCNTRL, reg | (div << 4));
+		break;
+	}
+	return ret;
+}
+
+#define WM8940_RATES SNDRV_PCM_RATE_8000_48000
+
+#define WM8940_FORMATS (SNDRV_PCM_FMTBIT_S8 |				\
+			SNDRV_PCM_FMTBIT_S16_LE |			\
+			SNDRV_PCM_FMTBIT_S20_3LE |			\
+			SNDRV_PCM_FMTBIT_S24_LE |			\
+			SNDRV_PCM_FMTBIT_S32_LE)
+
+static struct snd_soc_dai_ops wm8940_dai_ops = {
+	.hw_params = wm8940_i2s_hw_params,
+	.set_sysclk = wm8940_set_dai_sysclk,
+	.digital_mute = wm8940_mute,
+	.set_fmt = wm8940_set_dai_fmt,
+	.set_clkdiv = wm8940_set_dai_clkdiv,
+	.set_pll = wm8940_set_dai_pll,
+};
+
+struct snd_soc_dai wm8940_dai = {
+	.name = "WM8940",
+	.playback = {
+		.stream_name = "Playback",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = WM8940_RATES,
+		.formats = WM8940_FORMATS,
+	},
+	.capture = {
+		.stream_name = "Capture",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = WM8940_RATES,
+		.formats = WM8940_FORMATS,
+	},
+	.ops = &wm8940_dai_ops,
+	.symmetric_rates = 1,
+};
+EXPORT_SYMBOL_GPL(wm8940_dai);
+
+static int wm8940_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+
+	return wm8940_set_bias_level(codec, SND_SOC_BIAS_OFF);
+}
+
+static int wm8940_resume(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+	int i;
+	int ret;
+	u8 data[3];
+	u16 *cache = codec->reg_cache;
+
+	/* Sync reg_cache with the hardware
+	 * Could use auto incremented writes to speed this up
+	 */
+	for (i = 0; i < ARRAY_SIZE(wm8940_reg_defaults); i++) {
+		data[0] = i;
+		data[1] = (cache[i] & 0xFF00) >> 8;
+		data[2] = cache[i] & 0x00FF;
+		ret = codec->hw_write(codec->control_data, data, 3);
+		if (ret < 0)
+			goto error_ret;
+		else if (ret != 3) {
+			ret = -EIO;
+			goto error_ret;
+		}
+	}
+	ret = wm8940_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+	if (ret)
+		goto error_ret;
+	ret = wm8940_set_bias_level(codec, codec->suspend_bias_level);
+
+error_ret:
+	return ret;
+}
+
+static struct snd_soc_codec *wm8940_codec;
+
+static int wm8940_probe(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec;
+
+	int ret = 0;
+
+	if (wm8940_codec == NULL) {
+		dev_err(&pdev->dev, "Codec device not registered\n");
+		return -ENODEV;
+	}
+
+	socdev->card->codec = wm8940_codec;
+	codec = wm8940_codec;
+
+	mutex_init(&codec->mutex);
+	/* register pcms */
+	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to create pcms: %d\n", ret);
+		goto pcm_err;
+	}
+
+	ret = snd_soc_add_controls(codec, wm8940_snd_controls,
+			     ARRAY_SIZE(wm8940_snd_controls));
+	if (ret)
+		goto error_free_pcms;
+	ret = wm8940_add_widgets(codec);
+	if (ret)
+		goto error_free_pcms;
+
+	ret = snd_soc_init_card(socdev);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to register card: %d\n", ret);
+		goto error_free_pcms;
+	}
+
+	return ret;
+
+error_free_pcms:
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+pcm_err:
+	return ret;
+}
+
+static int wm8940_remove(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+
+	return 0;
+}
+
+struct snd_soc_codec_device soc_codec_dev_wm8940 = {
+	.probe = wm8940_probe,
+	.remove = wm8940_remove,
+	.suspend = wm8940_suspend,
+	.resume = wm8940_resume,
+};
+EXPORT_SYMBOL_GPL(soc_codec_dev_wm8940);
+
+static int wm8940_register(struct wm8940_priv *wm8940)
+{
+	struct wm8940_setup_data *pdata = wm8940->codec.dev->platform_data;
+	struct snd_soc_codec *codec = &wm8940->codec;
+	int ret;
+	u16 reg;
+	if (wm8940_codec) {
+		dev_err(codec->dev, "Another WM8940 is registered\n");
+		return -EINVAL;
+	}
+
+	INIT_LIST_HEAD(&codec->dapm_widgets);
+	INIT_LIST_HEAD(&codec->dapm_paths);
+
+	codec->private_data = wm8940;
+	codec->name = "WM8940";
+	codec->owner = THIS_MODULE;
+	codec->read = wm8940_read_reg_cache;
+	codec->write = wm8940_write;
+	codec->bias_level = SND_SOC_BIAS_OFF;
+	codec->set_bias_level = wm8940_set_bias_level;
+	codec->dai = &wm8940_dai;
+	codec->num_dai = 1;
+	codec->reg_cache_size = ARRAY_SIZE(wm8940_reg_defaults);
+	codec->reg_cache = &wm8940->reg_cache;
+
+	memcpy(codec->reg_cache, wm8940_reg_defaults,
+	       sizeof(wm8940_reg_defaults));
+
+	ret = wm8940_reset(codec);
+	if (ret < 0) {
+		dev_err(codec->dev, "Failed to issue reset\n");
+		return ret;
+	}
+
+	wm8940_dai.dev = codec->dev;
+
+	wm8940_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	ret = wm8940_write(codec, WM8940_POWER1, 0x180);
+	if (ret < 0)
+		return ret;
+
+	if (!pdata)
+		dev_warn(codec->dev, "No platform data supplied\n");
+	else {
+		reg = wm8940_read_reg_cache(codec, WM8940_OUTPUTCTL);
+		ret = wm8940_write(codec, WM8940_OUTPUTCTL, reg | pdata->vroi);
+		if (ret < 0)
+			return ret;
+	}
+
+
+	wm8940_codec = codec;
+
+	ret = snd_soc_register_codec(codec);
+	if (ret) {
+		dev_err(codec->dev, "Failed to register codec: %d\n", ret);
+		return ret;
+	}
+
+	ret = snd_soc_register_dai(&wm8940_dai);
+	if (ret) {
+		dev_err(codec->dev, "Failed to register DAI: %d\n", ret);
+		snd_soc_unregister_codec(codec);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void wm8940_unregister(struct wm8940_priv *wm8940)
+{
+	wm8940_set_bias_level(&wm8940->codec, SND_SOC_BIAS_OFF);
+	snd_soc_unregister_dai(&wm8940_dai);
+	snd_soc_unregister_codec(&wm8940->codec);
+	kfree(wm8940);
+	wm8940_codec = NULL;
+}
+
+static int wm8940_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct wm8940_priv *wm8940;
+	struct snd_soc_codec *codec;
+
+	wm8940 = kzalloc(sizeof *wm8940, GFP_KERNEL);
+	if (wm8940 == NULL)
+		return -ENOMEM;
+
+	codec = &wm8940->codec;
+	codec->hw_write = (hw_write_t)i2c_master_send;
+	i2c_set_clientdata(i2c, wm8940);
+	codec->control_data = i2c;
+	codec->dev = &i2c->dev;
+
+	return wm8940_register(wm8940);
+}
+
+static int __devexit wm8940_i2c_remove(struct i2c_client *client)
+{
+	struct wm8940_priv *wm8940 = i2c_get_clientdata(client);
+
+	wm8940_unregister(wm8940);
+
+	return 0;
+}
+
+static const struct i2c_device_id wm8940_i2c_id[] = {
+	{ "wm8940", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, wm8940_i2c_id);
+
+static struct i2c_driver wm8940_i2c_driver = {
+	.driver = {
+		.name = "WM8940 I2C Codec",
+		.owner = THIS_MODULE,
+	},
+	.probe = wm8940_i2c_probe,
+	.remove = __devexit_p(wm8940_i2c_remove),
+	.id_table = wm8940_i2c_id,
+};
+
+static int __init wm8940_modinit(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&wm8940_i2c_driver);
+	if (ret)
+		printk(KERN_ERR "Failed to register WM8940 I2C driver: %d\n",
+		       ret);
+	return ret;
+}
+module_init(wm8940_modinit);
+
+static void __exit wm8940_exit(void)
+{
+	i2c_del_driver(&wm8940_i2c_driver);
+}
+module_exit(wm8940_exit);
+
+MODULE_DESCRIPTION("ASoC WM8940 driver");
+MODULE_AUTHOR("Jonathan Cameron");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/wm8940.h b/sound/soc/codecs/wm8940.h
new file mode 100644
index 0000000..8410eed
--- /dev/null
+++ b/sound/soc/codecs/wm8940.h
@@ -0,0 +1,104 @@
+/*
+ * wm8940.h -- WM8940 Soc Audio driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _WM8940_H
+#define _WM8940_H
+
+struct wm8940_setup_data {
+	/* Vref to analogue output resistance */
+#define WM8940_VROI_1K 0
+#define WM8940_VROI_30K 1
+	unsigned int vroi:1;
+};
+extern struct snd_soc_dai wm8940_dai;
+extern struct snd_soc_codec_device soc_codec_dev_wm8940;
+
+/* WM8940 register space */
+#define WM8940_SOFTRESET	0x00
+#define WM8940_POWER1		0x01
+#define WM8940_POWER2		0x02
+#define WM8940_POWER3		0x03
+#define WM8940_IFACE		0x04
+#define WM8940_COMPANDINGCTL	0x05
+#define WM8940_CLOCK		0x06
+#define WM8940_ADDCNTRL		0x07
+#define WM8940_GPIO		0x08
+#define WM8940_CTLINT		0x09
+#define WM8940_DAC		0x0A
+#define WM8940_DACVOL		0x0B
+
+#define WM8940_ADC		0x0E
+#define WM8940_ADCVOL		0x0F
+#define WM8940_NOTCH1		0x10
+#define WM8940_NOTCH2		0x11
+#define WM8940_NOTCH3		0x12
+#define WM8940_NOTCH4		0x13
+#define WM8940_NOTCH5		0x14
+#define WM8940_NOTCH6		0x15
+#define WM8940_NOTCH7		0x16
+#define WM8940_NOTCH8		0x17
+#define WM8940_DACLIM1		0x18
+#define WM8940_DACLIM2		0x19
+
+#define WM8940_ALC1		0x20
+#define WM8940_ALC2		0x21
+#define WM8940_ALC3		0x22
+#define WM8940_NOISEGATE	0x23
+#define WM8940_PLLN		0x24
+#define WM8940_PLLK1		0x25
+#define WM8940_PLLK2		0x26
+#define WM8940_PLLK3		0x27
+
+#define WM8940_ALC4		0x2A
+
+#define WM8940_INPUTCTL		0x2C
+#define WM8940_PGAGAIN		0x2D
+
+#define WM8940_ADCBOOST		0x2F
+
+#define WM8940_OUTPUTCTL	0x31
+#define WM8940_SPKMIX		0x32
+
+#define WM8940_SPKVOL		0x36
+
+#define WM8940_MONOMIX		0x38
+
+#define WM8940_CACHEREGNUM  0x57
+
+
+/* Clock divider Id's */
+#define WM8940_BCLKDIV 0
+#define WM8940_MCLKDIV 1
+#define WM8940_OPCLKDIV 2
+
+/* MCLK clock dividers */
+#define WM8940_MCLKDIV_1	0
+#define WM8940_MCLKDIV_1_5	1
+#define WM8940_MCLKDIV_2	2
+#define WM8940_MCLKDIV_3	3
+#define WM8940_MCLKDIV_4	4
+#define WM8940_MCLKDIV_6	5
+#define WM8940_MCLKDIV_8	6
+#define WM8940_MCLKDIV_12	7
+
+/* BCLK clock dividers */
+#define WM8940_BCLKDIV_1 0
+#define WM8940_BCLKDIV_2 1
+#define WM8940_BCLKDIV_4 2
+#define WM8940_BCLKDIV_8 3
+#define WM8940_BCLKDIV_16 4
+#define WM8940_BCLKDIV_32 5
+
+/* PLL Out Dividers */
+#define WM8940_OPCLKDIV_1 0
+#define WM8940_OPCLKDIV_2 1
+#define WM8940_OPCLKDIV_3 2
+#define WM8940_OPCLKDIV_4 3
+
+#endif /* _WM8940_H */
+
diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c
new file mode 100644
index 0000000..e224d8a
--- /dev/null
+++ b/sound/soc/codecs/wm8960.c
@@ -0,0 +1,969 @@
+/*
+ * wm8960.c  --  WM8960 ALSA SoC Audio driver
+ *
+ * Author: Liam Girdwood
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/initval.h>
+#include <sound/tlv.h>
+
+#include "wm8960.h"
+
+#define AUDIO_NAME "wm8960"
+
+struct snd_soc_codec_device soc_codec_dev_wm8960;
+
+/* R25 - Power 1 */
+#define WM8960_VREF      0x40
+
+/* R28 - Anti-pop 1 */
+#define WM8960_POBCTRL   0x80
+#define WM8960_BUFDCOPEN 0x10
+#define WM8960_BUFIOEN   0x08
+#define WM8960_SOFT_ST   0x04
+#define WM8960_HPSTBY    0x01
+
+/* R29 - Anti-pop 2 */
+#define WM8960_DISOP     0x40
+
+/*
+ * wm8960 register cache
+ * We can't read the WM8960 register space when we are
+ * using 2 wire for device control, so we cache them instead.
+ */
+static const u16 wm8960_reg[WM8960_CACHEREGNUM] = {
+	0x0097, 0x0097, 0x0000, 0x0000,
+	0x0000, 0x0008, 0x0000, 0x000a,
+	0x01c0, 0x0000, 0x00ff, 0x00ff,
+	0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x007b, 0x0100, 0x0032,
+	0x0000, 0x00c3, 0x00c3, 0x01c0,
+	0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000,
+	0x0100, 0x0100, 0x0050, 0x0050,
+	0x0050, 0x0050, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0040, 0x0000,
+	0x0000, 0x0050, 0x0050, 0x0000,
+	0x0002, 0x0037, 0x004d, 0x0080,
+	0x0008, 0x0031, 0x0026, 0x00e9,
+};
+
+struct wm8960_priv {
+	u16 reg_cache[WM8960_CACHEREGNUM];
+	struct snd_soc_codec codec;
+};
+
+/*
+ * read wm8960 register cache
+ */
+static inline unsigned int wm8960_read_reg_cache(struct snd_soc_codec *codec,
+	unsigned int reg)
+{
+	u16 *cache = codec->reg_cache;
+	if (reg == WM8960_RESET)
+		return 0;
+	if (reg >= WM8960_CACHEREGNUM)
+		return -1;
+	return cache[reg];
+}
+
+/*
+ * write wm8960 register cache
+ */
+static inline void wm8960_write_reg_cache(struct snd_soc_codec *codec,
+	u16 reg, unsigned int value)
+{
+	u16 *cache = codec->reg_cache;
+	if (reg >= WM8960_CACHEREGNUM)
+		return;
+	cache[reg] = value;
+}
+
+static inline unsigned int wm8960_read(struct snd_soc_codec *codec,
+	unsigned int reg)
+{
+	return wm8960_read_reg_cache(codec, reg);
+}
+
+/*
+ * write to the WM8960 register space
+ */
+static int wm8960_write(struct snd_soc_codec *codec, unsigned int reg,
+	unsigned int value)
+{
+	u8 data[2];
+
+	/* data is
+	 *   D15..D9 WM8960 register offset
+	 *   D8...D0 register data
+	 */
+	data[0] = (reg << 1) | ((value >> 8) & 0x0001);
+	data[1] = value & 0x00ff;
+
+	wm8960_write_reg_cache(codec, reg, value);
+	if (codec->hw_write(codec->control_data, data, 2) == 2)
+		return 0;
+	else
+		return -EIO;
+}
+
+#define wm8960_reset(c)	wm8960_write(c, WM8960_RESET, 0)
+
+/* enumerated controls */
+static const char *wm8960_deemph[] = {"None", "32Khz", "44.1Khz", "48Khz"};
+static const char *wm8960_polarity[] = {"No Inversion", "Left Inverted",
+	"Right Inverted", "Stereo Inversion"};
+static const char *wm8960_3d_upper_cutoff[] = {"High", "Low"};
+static const char *wm8960_3d_lower_cutoff[] = {"Low", "High"};
+static const char *wm8960_alcfunc[] = {"Off", "Right", "Left", "Stereo"};
+static const char *wm8960_alcmode[] = {"ALC", "Limiter"};
+
+static const struct soc_enum wm8960_enum[] = {
+	SOC_ENUM_SINGLE(WM8960_DACCTL1, 1, 4, wm8960_deemph),
+	SOC_ENUM_SINGLE(WM8960_DACCTL1, 5, 4, wm8960_polarity),
+	SOC_ENUM_SINGLE(WM8960_DACCTL2, 5, 4, wm8960_polarity),
+	SOC_ENUM_SINGLE(WM8960_3D, 6, 2, wm8960_3d_upper_cutoff),
+	SOC_ENUM_SINGLE(WM8960_3D, 5, 2, wm8960_3d_lower_cutoff),
+	SOC_ENUM_SINGLE(WM8960_ALC1, 7, 4, wm8960_alcfunc),
+	SOC_ENUM_SINGLE(WM8960_ALC3, 8, 2, wm8960_alcmode),
+};
+
+static const DECLARE_TLV_DB_SCALE(adc_tlv, -9700, 50, 0);
+static const DECLARE_TLV_DB_SCALE(dac_tlv, -12700, 50, 1);
+static const DECLARE_TLV_DB_SCALE(bypass_tlv, -2100, 300, 0);
+static const DECLARE_TLV_DB_SCALE(out_tlv, -12100, 100, 1);
+
+static const struct snd_kcontrol_new wm8960_snd_controls[] = {
+SOC_DOUBLE_R_TLV("Capture Volume", WM8960_LINVOL, WM8960_RINVOL,
+		 0, 63, 0, adc_tlv),
+SOC_DOUBLE_R("Capture Volume ZC Switch", WM8960_LINVOL, WM8960_RINVOL,
+	6, 1, 0),
+SOC_DOUBLE_R("Capture Switch", WM8960_LINVOL, WM8960_RINVOL,
+	7, 1, 0),
+
+SOC_DOUBLE_R_TLV("Playback Volume", WM8960_LDAC, WM8960_RDAC,
+		 0, 255, 0, dac_tlv),
+
+SOC_DOUBLE_R_TLV("Headphone Playback Volume", WM8960_LOUT1, WM8960_ROUT1,
+		 0, 127, 0, out_tlv),
+SOC_DOUBLE_R("Headphone Playback ZC Switch", WM8960_LOUT1, WM8960_ROUT1,
+	7, 1, 0),
+
+SOC_DOUBLE_R_TLV("Speaker Playback Volume", WM8960_LOUT2, WM8960_ROUT2,
+		 0, 127, 0, out_tlv),
+SOC_DOUBLE_R("Speaker Playback ZC Switch", WM8960_LOUT2, WM8960_ROUT2,
+	7, 1, 0),
+SOC_SINGLE("Speaker DC Volume", WM8960_CLASSD3, 3, 5, 0),
+SOC_SINGLE("Speaker AC Volume", WM8960_CLASSD3, 0, 5, 0),
+
+SOC_SINGLE("PCM Playback -6dB Switch", WM8960_DACCTL1, 7, 1, 0),
+SOC_ENUM("ADC Polarity", wm8960_enum[1]),
+SOC_ENUM("Playback De-emphasis", wm8960_enum[0]),
+SOC_SINGLE("ADC High Pass Filter Switch", WM8960_DACCTL1, 0, 1, 0),
+
+SOC_ENUM("DAC Polarity", wm8960_enum[2]),
+
+SOC_ENUM("3D Filter Upper Cut-Off", wm8960_enum[3]),
+SOC_ENUM("3D Filter Lower Cut-Off", wm8960_enum[4]),
+SOC_SINGLE("3D Volume", WM8960_3D, 1, 15, 0),
+SOC_SINGLE("3D Switch", WM8960_3D, 0, 1, 0),
+
+SOC_ENUM("ALC Function", wm8960_enum[5]),
+SOC_SINGLE("ALC Max Gain", WM8960_ALC1, 4, 7, 0),
+SOC_SINGLE("ALC Target", WM8960_ALC1, 0, 15, 1),
+SOC_SINGLE("ALC Min Gain", WM8960_ALC2, 4, 7, 0),
+SOC_SINGLE("ALC Hold Time", WM8960_ALC2, 0, 15, 0),
+SOC_ENUM("ALC Mode", wm8960_enum[6]),
+SOC_SINGLE("ALC Decay", WM8960_ALC3, 4, 15, 0),
+SOC_SINGLE("ALC Attack", WM8960_ALC3, 0, 15, 0),
+
+SOC_SINGLE("Noise Gate Threshold", WM8960_NOISEG, 3, 31, 0),
+SOC_SINGLE("Noise Gate Switch", WM8960_NOISEG, 0, 1, 0),
+
+SOC_DOUBLE_R("ADC PCM Capture Volume", WM8960_LINPATH, WM8960_RINPATH,
+	0, 127, 0),
+
+SOC_SINGLE_TLV("Left Output Mixer Boost Bypass Volume",
+	       WM8960_BYPASS1, 4, 7, 1, bypass_tlv),
+SOC_SINGLE_TLV("Left Output Mixer LINPUT3 Volume",
+	       WM8960_LOUTMIX, 4, 7, 1, bypass_tlv),
+SOC_SINGLE_TLV("Right Output Mixer Boost Bypass Volume",
+	       WM8960_BYPASS2, 4, 7, 1, bypass_tlv),
+SOC_SINGLE_TLV("Right Output Mixer RINPUT3 Volume",
+	       WM8960_ROUTMIX, 4, 7, 1, bypass_tlv),
+};
+
+static const struct snd_kcontrol_new wm8960_lin_boost[] = {
+SOC_DAPM_SINGLE("LINPUT2 Switch", WM8960_LINPATH, 6, 1, 0),
+SOC_DAPM_SINGLE("LINPUT3 Switch", WM8960_LINPATH, 7, 1, 0),
+SOC_DAPM_SINGLE("LINPUT1 Switch", WM8960_LINPATH, 8, 1, 0),
+};
+
+static const struct snd_kcontrol_new wm8960_lin[] = {
+SOC_DAPM_SINGLE("Boost Switch", WM8960_LINPATH, 3, 1, 0),
+};
+
+static const struct snd_kcontrol_new wm8960_rin_boost[] = {
+SOC_DAPM_SINGLE("RINPUT2 Switch", WM8960_RINPATH, 6, 1, 0),
+SOC_DAPM_SINGLE("RINPUT3 Switch", WM8960_RINPATH, 7, 1, 0),
+SOC_DAPM_SINGLE("RINPUT1 Switch", WM8960_RINPATH, 8, 1, 0),
+};
+
+static const struct snd_kcontrol_new wm8960_rin[] = {
+SOC_DAPM_SINGLE("Boost Switch", WM8960_RINPATH, 3, 1, 0),
+};
+
+static const struct snd_kcontrol_new wm8960_loutput_mixer[] = {
+SOC_DAPM_SINGLE("PCM Playback Switch", WM8960_LOUTMIX, 8, 1, 0),
+SOC_DAPM_SINGLE("LINPUT3 Switch", WM8960_LOUTMIX, 7, 1, 0),
+SOC_DAPM_SINGLE("Boost Bypass Switch", WM8960_BYPASS1, 7, 1, 0),
+};
+
+static const struct snd_kcontrol_new wm8960_routput_mixer[] = {
+SOC_DAPM_SINGLE("PCM Playback Switch", WM8960_ROUTMIX, 8, 1, 0),
+SOC_DAPM_SINGLE("RINPUT3 Switch", WM8960_ROUTMIX, 7, 1, 0),
+SOC_DAPM_SINGLE("Boost Bypass Switch", WM8960_BYPASS2, 7, 1, 0),
+};
+
+static const struct snd_kcontrol_new wm8960_mono_out[] = {
+SOC_DAPM_SINGLE("Left Switch", WM8960_MONOMIX1, 7, 1, 0),
+SOC_DAPM_SINGLE("Right Switch", WM8960_MONOMIX2, 7, 1, 0),
+};
+
+static const struct snd_soc_dapm_widget wm8960_dapm_widgets[] = {
+SND_SOC_DAPM_INPUT("LINPUT1"),
+SND_SOC_DAPM_INPUT("RINPUT1"),
+SND_SOC_DAPM_INPUT("LINPUT2"),
+SND_SOC_DAPM_INPUT("RINPUT2"),
+SND_SOC_DAPM_INPUT("LINPUT3"),
+SND_SOC_DAPM_INPUT("RINPUT3"),
+
+SND_SOC_DAPM_MICBIAS("MICB", WM8960_POWER1, 1, 0),
+
+SND_SOC_DAPM_MIXER("Left Boost Mixer", WM8960_POWER1, 5, 0,
+		   wm8960_lin_boost, ARRAY_SIZE(wm8960_lin_boost)),
+SND_SOC_DAPM_MIXER("Right Boost Mixer", WM8960_POWER1, 4, 0,
+		   wm8960_rin_boost, ARRAY_SIZE(wm8960_rin_boost)),
+
+SND_SOC_DAPM_MIXER("Left Input Mixer", WM8960_POWER3, 5, 0,
+		   wm8960_lin, ARRAY_SIZE(wm8960_lin)),
+SND_SOC_DAPM_MIXER("Right Input Mixer", WM8960_POWER3, 4, 0,
+		   wm8960_rin, ARRAY_SIZE(wm8960_rin)),
+
+SND_SOC_DAPM_ADC("Left ADC", "Capture", WM8960_POWER2, 3, 0),
+SND_SOC_DAPM_ADC("Right ADC", "Capture", WM8960_POWER2, 2, 0),
+
+SND_SOC_DAPM_DAC("Left DAC", "Playback", WM8960_POWER2, 8, 0),
+SND_SOC_DAPM_DAC("Right DAC", "Playback", WM8960_POWER2, 7, 0),
+
+SND_SOC_DAPM_MIXER("Left Output Mixer", WM8960_POWER3, 3, 0,
+	&wm8960_loutput_mixer[0],
+	ARRAY_SIZE(wm8960_loutput_mixer)),
+SND_SOC_DAPM_MIXER("Right Output Mixer", WM8960_POWER3, 2, 0,
+	&wm8960_routput_mixer[0],
+	ARRAY_SIZE(wm8960_routput_mixer)),
+
+SND_SOC_DAPM_MIXER("Mono Output Mixer", WM8960_POWER2, 1, 0,
+	&wm8960_mono_out[0],
+	ARRAY_SIZE(wm8960_mono_out)),
+
+SND_SOC_DAPM_PGA("LOUT1 PGA", WM8960_POWER2, 6, 0, NULL, 0),
+SND_SOC_DAPM_PGA("ROUT1 PGA", WM8960_POWER2, 5, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA("Left Speaker PGA", WM8960_POWER2, 4, 0, NULL, 0),
+SND_SOC_DAPM_PGA("Right Speaker PGA", WM8960_POWER2, 3, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA("Right Speaker Output", WM8960_CLASSD1, 7, 0, NULL, 0),
+SND_SOC_DAPM_PGA("Left Speaker Output", WM8960_CLASSD1, 6, 0, NULL, 0),
+
+SND_SOC_DAPM_OUTPUT("SPK_LP"),
+SND_SOC_DAPM_OUTPUT("SPK_LN"),
+SND_SOC_DAPM_OUTPUT("HP_L"),
+SND_SOC_DAPM_OUTPUT("HP_R"),
+SND_SOC_DAPM_OUTPUT("SPK_RP"),
+SND_SOC_DAPM_OUTPUT("SPK_RN"),
+SND_SOC_DAPM_OUTPUT("OUT3"),
+};
+
+static const struct snd_soc_dapm_route audio_paths[] = {
+	{ "Left Boost Mixer", "LINPUT1 Switch", "LINPUT1" },
+	{ "Left Boost Mixer", "LINPUT2 Switch", "LINPUT2" },
+	{ "Left Boost Mixer", "LINPUT3 Switch", "LINPUT3" },
+
+	{ "Left Input Mixer", "Boost Switch", "Left Boost Mixer", },
+	{ "Left Input Mixer", NULL, "LINPUT1", },  /* Really Boost Switch */
+	{ "Left Input Mixer", NULL, "LINPUT2" },
+	{ "Left Input Mixer", NULL, "LINPUT3" },
+
+	{ "Right Boost Mixer", "RINPUT1 Switch", "RINPUT1" },
+	{ "Right Boost Mixer", "RINPUT2 Switch", "RINPUT2" },
+	{ "Right Boost Mixer", "RINPUT3 Switch", "RINPUT3" },
+
+	{ "Right Input Mixer", "Boost Switch", "Right Boost Mixer", },
+	{ "Right Input Mixer", NULL, "RINPUT1", },  /* Really Boost Switch */
+	{ "Right Input Mixer", NULL, "RINPUT2" },
+	{ "Right Input Mixer", NULL, "LINPUT3" },
+
+	{ "Left ADC", NULL, "Left Input Mixer" },
+	{ "Right ADC", NULL, "Right Input Mixer" },
+
+	{ "Left Output Mixer", "LINPUT3 Switch", "LINPUT3" },
+	{ "Left Output Mixer", "Boost Bypass Switch", "Left Boost Mixer"} ,
+	{ "Left Output Mixer", "PCM Playback Switch", "Left DAC" },
+
+	{ "Right Output Mixer", "RINPUT3 Switch", "RINPUT3" },
+	{ "Right Output Mixer", "Boost Bypass Switch", "Right Boost Mixer" } ,
+	{ "Right Output Mixer", "PCM Playback Switch", "Right DAC" },
+
+	{ "Mono Output Mixer", "Left Switch", "Left Output Mixer" },
+	{ "Mono Output Mixer", "Right Switch", "Right Output Mixer" },
+
+	{ "LOUT1 PGA", NULL, "Left Output Mixer" },
+	{ "ROUT1 PGA", NULL, "Right Output Mixer" },
+
+	{ "HP_L", NULL, "LOUT1 PGA" },
+	{ "HP_R", NULL, "ROUT1 PGA" },
+
+	{ "Left Speaker PGA", NULL, "Left Output Mixer" },
+	{ "Right Speaker PGA", NULL, "Right Output Mixer" },
+
+	{ "Left Speaker Output", NULL, "Left Speaker PGA" },
+	{ "Right Speaker Output", NULL, "Right Speaker PGA" },
+
+	{ "SPK_LN", NULL, "Left Speaker Output" },
+	{ "SPK_LP", NULL, "Left Speaker Output" },
+	{ "SPK_RN", NULL, "Right Speaker Output" },
+	{ "SPK_RP", NULL, "Right Speaker Output" },
+
+	{ "OUT3", NULL, "Mono Output Mixer", }
+};
+
+static int wm8960_add_widgets(struct snd_soc_codec *codec)
+{
+	snd_soc_dapm_new_controls(codec, wm8960_dapm_widgets,
+				  ARRAY_SIZE(wm8960_dapm_widgets));
+
+	snd_soc_dapm_add_routes(codec, audio_paths, ARRAY_SIZE(audio_paths));
+
+	snd_soc_dapm_new_widgets(codec);
+	return 0;
+}
+
+static int wm8960_set_dai_fmt(struct snd_soc_dai *codec_dai,
+		unsigned int fmt)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 iface = 0;
+
+	/* set master/slave audio interface */
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		iface |= 0x0040;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* interface format */
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		iface |= 0x0002;
+		break;
+	case SND_SOC_DAIFMT_RIGHT_J:
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		iface |= 0x0001;
+		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		iface |= 0x0003;
+		break;
+	case SND_SOC_DAIFMT_DSP_B:
+		iface |= 0x0013;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* clock inversion */
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_NF:
+		break;
+	case SND_SOC_DAIFMT_IB_IF:
+		iface |= 0x0090;
+		break;
+	case SND_SOC_DAIFMT_IB_NF:
+		iface |= 0x0080;
+		break;
+	case SND_SOC_DAIFMT_NB_IF:
+		iface |= 0x0010;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* set iface */
+	wm8960_write(codec, WM8960_IFACE1, iface);
+	return 0;
+}
+
+static int wm8960_hw_params(struct snd_pcm_substream *substream,
+			    struct snd_pcm_hw_params *params,
+			    struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_codec *codec = socdev->card->codec;
+	u16 iface = wm8960_read(codec, WM8960_IFACE1) & 0xfff3;
+
+	/* bit size */
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		break;
+	case SNDRV_PCM_FORMAT_S20_3LE:
+		iface |= 0x0004;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		iface |= 0x0008;
+		break;
+	}
+
+	/* set iface */
+	wm8960_write(codec, WM8960_IFACE1, iface);
+	return 0;
+}
+
+static int wm8960_mute(struct snd_soc_dai *dai, int mute)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	u16 mute_reg = wm8960_read(codec, WM8960_DACCTL1) & 0xfff7;
+
+	if (mute)
+		wm8960_write(codec, WM8960_DACCTL1, mute_reg | 0x8);
+	else
+		wm8960_write(codec, WM8960_DACCTL1, mute_reg);
+	return 0;
+}
+
+static int wm8960_set_bias_level(struct snd_soc_codec *codec,
+				 enum snd_soc_bias_level level)
+{
+	struct wm8960_data *pdata = codec->dev->platform_data;
+	u16 reg;
+
+	switch (level) {
+	case SND_SOC_BIAS_ON:
+		break;
+
+	case SND_SOC_BIAS_PREPARE:
+		/* Set VMID to 2x50k */
+		reg = wm8960_read(codec, WM8960_POWER1);
+		reg &= ~0x180;
+		reg |= 0x80;
+		wm8960_write(codec, WM8960_POWER1, reg);
+		break;
+
+	case SND_SOC_BIAS_STANDBY:
+		if (codec->bias_level == SND_SOC_BIAS_OFF) {
+			/* Enable anti-pop features */
+			wm8960_write(codec, WM8960_APOP1,
+				     WM8960_POBCTRL | WM8960_SOFT_ST |
+				     WM8960_BUFDCOPEN | WM8960_BUFIOEN);
+
+			/* Discharge HP output */
+			reg = WM8960_DISOP;
+			if (pdata)
+				reg |= pdata->dres << 4;
+			wm8960_write(codec, WM8960_APOP2, reg);
+
+			msleep(400);
+
+			wm8960_write(codec, WM8960_APOP2, 0);
+
+			/* Enable & ramp VMID at 2x50k */
+			reg = wm8960_read(codec, WM8960_POWER1);
+			reg |= 0x80;
+			wm8960_write(codec, WM8960_POWER1, reg);
+			msleep(100);
+
+			/* Enable VREF */
+			wm8960_write(codec, WM8960_POWER1, reg | WM8960_VREF);
+
+			/* Disable anti-pop features */
+			wm8960_write(codec, WM8960_APOP1, WM8960_BUFIOEN);
+		}
+
+		/* Set VMID to 2x250k */
+		reg = wm8960_read(codec, WM8960_POWER1);
+		reg &= ~0x180;
+		reg |= 0x100;
+		wm8960_write(codec, WM8960_POWER1, reg);
+		break;
+
+	case SND_SOC_BIAS_OFF:
+		/* Enable anti-pop features */
+		wm8960_write(codec, WM8960_APOP1,
+			     WM8960_POBCTRL | WM8960_SOFT_ST |
+			     WM8960_BUFDCOPEN | WM8960_BUFIOEN);
+
+		/* Disable VMID and VREF, let them discharge */
+		wm8960_write(codec, WM8960_POWER1, 0);
+		msleep(600);
+
+		wm8960_write(codec, WM8960_APOP1, 0);
+		break;
+	}
+
+	codec->bias_level = level;
+
+	return 0;
+}
+
+/* PLL divisors */
+struct _pll_div {
+	u32 pre_div:1;
+	u32 n:4;
+	u32 k:24;
+};
+
+/* The size in bits of the pll divide multiplied by 10
+ * to allow rounding later */
+#define FIXED_PLL_SIZE ((1 << 24) * 10)
+
+static int pll_factors(unsigned int source, unsigned int target,
+		       struct _pll_div *pll_div)
+{
+	unsigned long long Kpart;
+	unsigned int K, Ndiv, Nmod;
+
+	pr_debug("WM8960 PLL: setting %dHz->%dHz\n", source, target);
+
+	/* Scale up target to PLL operating frequency */
+	target *= 4;
+
+	Ndiv = target / source;
+	if (Ndiv < 6) {
+		source >>= 1;
+		pll_div->pre_div = 1;
+		Ndiv = target / source;
+	} else
+		pll_div->pre_div = 0;
+
+	if ((Ndiv < 6) || (Ndiv > 12)) {
+		pr_err("WM8960 PLL: Unsupported N=%d\n", Ndiv);
+		return -EINVAL;
+	}
+
+	pll_div->n = Ndiv;
+	Nmod = target % source;
+	Kpart = FIXED_PLL_SIZE * (long long)Nmod;
+
+	do_div(Kpart, source);
+
+	K = Kpart & 0xFFFFFFFF;
+
+	/* Check if we need to round */
+	if ((K % 10) >= 5)
+		K += 5;
+
+	/* Move down to proper range now rounding is done */
+	K /= 10;
+
+	pll_div->k = K;
+
+	pr_debug("WM8960 PLL: N=%x K=%x pre_div=%d\n",
+		 pll_div->n, pll_div->k, pll_div->pre_div);
+
+	return 0;
+}
+
+static int wm8960_set_dai_pll(struct snd_soc_dai *codec_dai,
+		int pll_id, unsigned int freq_in, unsigned int freq_out)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 reg;
+	static struct _pll_div pll_div;
+	int ret;
+
+	if (freq_in && freq_out) {
+		ret = pll_factors(freq_in, freq_out, &pll_div);
+		if (ret != 0)
+			return ret;
+	}
+
+	/* Disable the PLL: even if we are changing the frequency the
+	 * PLL needs to be disabled while we do so. */
+	wm8960_write(codec, WM8960_CLOCK1,
+		     wm8960_read(codec, WM8960_CLOCK1) & ~1);
+	wm8960_write(codec, WM8960_POWER2,
+		     wm8960_read(codec, WM8960_POWER2) & ~1);
+
+	if (!freq_in || !freq_out)
+		return 0;
+
+	reg = wm8960_read(codec, WM8960_PLL1) & ~0x3f;
+	reg |= pll_div.pre_div << 4;
+	reg |= pll_div.n;
+
+	if (pll_div.k) {
+		reg |= 0x20;
+
+		wm8960_write(codec, WM8960_PLL2, (pll_div.k >> 18) & 0x3f);
+		wm8960_write(codec, WM8960_PLL3, (pll_div.k >> 9) & 0x1ff);
+		wm8960_write(codec, WM8960_PLL4, pll_div.k & 0x1ff);
+	}
+	wm8960_write(codec, WM8960_PLL1, reg);
+
+	/* Turn it on */
+	wm8960_write(codec, WM8960_POWER2,
+		     wm8960_read(codec, WM8960_POWER2) | 1);
+	msleep(250);
+	wm8960_write(codec, WM8960_CLOCK1,
+		     wm8960_read(codec, WM8960_CLOCK1) | 1);
+
+	return 0;
+}
+
+static int wm8960_set_dai_clkdiv(struct snd_soc_dai *codec_dai,
+		int div_id, int div)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 reg;
+
+	switch (div_id) {
+	case WM8960_SYSCLKSEL:
+		reg = wm8960_read(codec, WM8960_CLOCK1) & 0x1fe;
+		wm8960_write(codec, WM8960_CLOCK1, reg | div);
+		break;
+	case WM8960_SYSCLKDIV:
+		reg = wm8960_read(codec, WM8960_CLOCK1) & 0x1f9;
+		wm8960_write(codec, WM8960_CLOCK1, reg | div);
+		break;
+	case WM8960_DACDIV:
+		reg = wm8960_read(codec, WM8960_CLOCK1) & 0x1c7;
+		wm8960_write(codec, WM8960_CLOCK1, reg | div);
+		break;
+	case WM8960_OPCLKDIV:
+		reg = wm8960_read(codec, WM8960_PLL1) & 0x03f;
+		wm8960_write(codec, WM8960_PLL1, reg | div);
+		break;
+	case WM8960_DCLKDIV:
+		reg = wm8960_read(codec, WM8960_CLOCK2) & 0x03f;
+		wm8960_write(codec, WM8960_CLOCK2, reg | div);
+		break;
+	case WM8960_TOCLKSEL:
+		reg = wm8960_read(codec, WM8960_ADDCTL1) & 0x1fd;
+		wm8960_write(codec, WM8960_ADDCTL1, reg | div);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+#define WM8960_RATES SNDRV_PCM_RATE_8000_48000
+
+#define WM8960_FORMATS \
+	(SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE | \
+	SNDRV_PCM_FMTBIT_S24_LE)
+
+static struct snd_soc_dai_ops wm8960_dai_ops = {
+	.hw_params = wm8960_hw_params,
+	.digital_mute = wm8960_mute,
+	.set_fmt = wm8960_set_dai_fmt,
+	.set_clkdiv = wm8960_set_dai_clkdiv,
+	.set_pll = wm8960_set_dai_pll,
+};
+
+struct snd_soc_dai wm8960_dai = {
+	.name = "WM8960",
+	.playback = {
+		.stream_name = "Playback",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = WM8960_RATES,
+		.formats = WM8960_FORMATS,},
+	.capture = {
+		.stream_name = "Capture",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = WM8960_RATES,
+		.formats = WM8960_FORMATS,},
+	.ops = &wm8960_dai_ops,
+	.symmetric_rates = 1,
+};
+EXPORT_SYMBOL_GPL(wm8960_dai);
+
+static int wm8960_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+
+	wm8960_set_bias_level(codec, SND_SOC_BIAS_OFF);
+	return 0;
+}
+
+static int wm8960_resume(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+	int i;
+	u8 data[2];
+	u16 *cache = codec->reg_cache;
+
+	/* Sync reg_cache with the hardware */
+	for (i = 0; i < ARRAY_SIZE(wm8960_reg); i++) {
+		data[0] = (i << 1) | ((cache[i] >> 8) & 0x0001);
+		data[1] = cache[i] & 0x00ff;
+		codec->hw_write(codec->control_data, data, 2);
+	}
+
+	wm8960_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+	wm8960_set_bias_level(codec, codec->suspend_bias_level);
+	return 0;
+}
+
+static struct snd_soc_codec *wm8960_codec;
+
+static int wm8960_probe(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec;
+	int ret = 0;
+
+	if (wm8960_codec == NULL) {
+		dev_err(&pdev->dev, "Codec device not registered\n");
+		return -ENODEV;
+	}
+
+	socdev->card->codec = wm8960_codec;
+	codec = wm8960_codec;
+
+	/* register pcms */
+	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to create pcms: %d\n", ret);
+		goto pcm_err;
+	}
+
+	snd_soc_add_controls(codec, wm8960_snd_controls,
+			     ARRAY_SIZE(wm8960_snd_controls));
+	wm8960_add_widgets(codec);
+	ret = snd_soc_init_card(socdev);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to register card: %d\n", ret);
+		goto card_err;
+	}
+
+	return ret;
+
+card_err:
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+pcm_err:
+	return ret;
+}
+
+/* power down chip */
+static int wm8960_remove(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+
+	return 0;
+}
+
+struct snd_soc_codec_device soc_codec_dev_wm8960 = {
+	.probe = 	wm8960_probe,
+	.remove = 	wm8960_remove,
+	.suspend = 	wm8960_suspend,
+	.resume =	wm8960_resume,
+};
+EXPORT_SYMBOL_GPL(soc_codec_dev_wm8960);
+
+static int wm8960_register(struct wm8960_priv *wm8960)
+{
+	struct wm8960_data *pdata = wm8960->codec.dev->platform_data;
+	struct snd_soc_codec *codec = &wm8960->codec;
+	int ret;
+	u16 reg;
+
+	if (wm8960_codec) {
+		dev_err(codec->dev, "Another WM8960 is registered\n");
+		return -EINVAL;
+	}
+
+	if (!pdata) {
+		dev_warn(codec->dev, "No platform data supplied\n");
+	} else {
+		if (pdata->dres > WM8960_DRES_MAX) {
+			dev_err(codec->dev, "Invalid DRES: %d\n", pdata->dres);
+			pdata->dres = 0;
+		}
+	}
+
+	mutex_init(&codec->mutex);
+	INIT_LIST_HEAD(&codec->dapm_widgets);
+	INIT_LIST_HEAD(&codec->dapm_paths);
+
+	codec->private_data = wm8960;
+	codec->name = "WM8960";
+	codec->owner = THIS_MODULE;
+	codec->read = wm8960_read_reg_cache;
+	codec->write = wm8960_write;
+	codec->bias_level = SND_SOC_BIAS_OFF;
+	codec->set_bias_level = wm8960_set_bias_level;
+	codec->dai = &wm8960_dai;
+	codec->num_dai = 1;
+	codec->reg_cache_size = WM8960_CACHEREGNUM;
+	codec->reg_cache = &wm8960->reg_cache;
+
+	memcpy(codec->reg_cache, wm8960_reg, sizeof(wm8960_reg));
+
+	ret = wm8960_reset(codec);
+	if (ret < 0) {
+		dev_err(codec->dev, "Failed to issue reset\n");
+		return ret;
+	}
+
+	wm8960_dai.dev = codec->dev;
+
+	wm8960_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	/* Latch the update bits */
+	reg = wm8960_read(codec, WM8960_LINVOL);
+	wm8960_write(codec, WM8960_LINVOL, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_RINVOL);
+	wm8960_write(codec, WM8960_RINVOL, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_LADC);
+	wm8960_write(codec, WM8960_LADC, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_RADC);
+	wm8960_write(codec, WM8960_RADC, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_LDAC);
+	wm8960_write(codec, WM8960_LDAC, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_RDAC);
+	wm8960_write(codec, WM8960_RDAC, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_LOUT1);
+	wm8960_write(codec, WM8960_LOUT1, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_ROUT1);
+	wm8960_write(codec, WM8960_ROUT1, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_LOUT2);
+	wm8960_write(codec, WM8960_LOUT2, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_ROUT2);
+	wm8960_write(codec, WM8960_ROUT2, reg | 0x100);
+
+	wm8960_codec = codec;
+
+	ret = snd_soc_register_codec(codec);
+	if (ret != 0) {
+		dev_err(codec->dev, "Failed to register codec: %d\n", ret);
+		return ret;
+	}
+
+	ret = snd_soc_register_dai(&wm8960_dai);
+	if (ret != 0) {
+		dev_err(codec->dev, "Failed to register DAI: %d\n", ret);
+		snd_soc_unregister_codec(codec);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void wm8960_unregister(struct wm8960_priv *wm8960)
+{
+	wm8960_set_bias_level(&wm8960->codec, SND_SOC_BIAS_OFF);
+	snd_soc_unregister_dai(&wm8960_dai);
+	snd_soc_unregister_codec(&wm8960->codec);
+	kfree(wm8960);
+	wm8960_codec = NULL;
+}
+
+static __devinit int wm8960_i2c_probe(struct i2c_client *i2c,
+				      const struct i2c_device_id *id)
+{
+	struct wm8960_priv *wm8960;
+	struct snd_soc_codec *codec;
+
+	wm8960 = kzalloc(sizeof(struct wm8960_priv), GFP_KERNEL);
+	if (wm8960 == NULL)
+		return -ENOMEM;
+
+	codec = &wm8960->codec;
+	codec->hw_write = (hw_write_t)i2c_master_send;
+
+	i2c_set_clientdata(i2c, wm8960);
+	codec->control_data = i2c;
+
+	codec->dev = &i2c->dev;
+
+	return wm8960_register(wm8960);
+}
+
+static __devexit int wm8960_i2c_remove(struct i2c_client *client)
+{
+	struct wm8960_priv *wm8960 = i2c_get_clientdata(client);
+	wm8960_unregister(wm8960);
+	return 0;
+}
+
+static const struct i2c_device_id wm8960_i2c_id[] = {
+	{ "wm8960", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, wm8960_i2c_id);
+
+static struct i2c_driver wm8960_i2c_driver = {
+	.driver = {
+		.name = "WM8960 I2C Codec",
+		.owner = THIS_MODULE,
+	},
+	.probe =    wm8960_i2c_probe,
+	.remove =   __devexit_p(wm8960_i2c_remove),
+	.id_table = wm8960_i2c_id,
+};
+
+static int __init wm8960_modinit(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&wm8960_i2c_driver);
+	if (ret != 0) {
+		printk(KERN_ERR "Failed to register WM8960 I2C driver: %d\n",
+		       ret);
+	}
+
+	return ret;
+}
+module_init(wm8960_modinit);
+
+static void __exit wm8960_exit(void)
+{
+	i2c_del_driver(&wm8960_i2c_driver);
+}
+module_exit(wm8960_exit);
+
+
+MODULE_DESCRIPTION("ASoC WM8960 driver");
+MODULE_AUTHOR("Liam Girdwood");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/wm8960.h b/sound/soc/codecs/wm8960.h
new file mode 100644
index 0000000..c9af56c
--- /dev/null
+++ b/sound/soc/codecs/wm8960.h
@@ -0,0 +1,127 @@
+/*
+ * wm8960.h  --  WM8960 Soc Audio driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _WM8960_H
+#define _WM8960_H
+
+/* WM8960 register space */
+
+
+#define WM8960_CACHEREGNUM 	56
+
+#define WM8960_LINVOL		0x0
+#define WM8960_RINVOL		0x1
+#define WM8960_LOUT1		0x2
+#define WM8960_ROUT1		0x3
+#define WM8960_CLOCK1		0x4
+#define WM8960_DACCTL1		0x5
+#define WM8960_DACCTL2		0x6
+#define WM8960_IFACE1		0x7
+#define WM8960_CLOCK2		0x8
+#define WM8960_IFACE2		0x9
+#define WM8960_LDAC		0xa
+#define WM8960_RDAC		0xb
+
+#define WM8960_RESET		0xf
+#define WM8960_3D		0x10
+#define WM8960_ALC1		0x11
+#define WM8960_ALC2		0x12
+#define WM8960_ALC3		0x13
+#define WM8960_NOISEG		0x14
+#define WM8960_LADC		0x15
+#define WM8960_RADC		0x16
+#define WM8960_ADDCTL1		0x17
+#define WM8960_ADDCTL2		0x18
+#define WM8960_POWER1		0x19
+#define WM8960_POWER2		0x1a
+#define WM8960_ADDCTL3		0x1b
+#define WM8960_APOP1		0x1c
+#define WM8960_APOP2		0x1d
+
+#define WM8960_LINPATH		0x20
+#define WM8960_RINPATH		0x21
+#define WM8960_LOUTMIX		0x22
+
+#define WM8960_ROUTMIX		0x25
+#define WM8960_MONOMIX1		0x26
+#define WM8960_MONOMIX2		0x27
+#define WM8960_LOUT2		0x28
+#define WM8960_ROUT2		0x29
+#define WM8960_MONO		0x2a
+#define WM8960_INBMIX1		0x2b
+#define WM8960_INBMIX2		0x2c
+#define WM8960_BYPASS1		0x2d
+#define WM8960_BYPASS2		0x2e
+#define WM8960_POWER3		0x2f
+#define WM8960_ADDCTL4		0x30
+#define WM8960_CLASSD1		0x31
+
+#define WM8960_CLASSD3		0x33
+#define WM8960_PLL1		0x34
+#define WM8960_PLL2		0x35
+#define WM8960_PLL3		0x36
+#define WM8960_PLL4		0x37
+
+
+/*
+ * WM8960 Clock dividers
+ */
+#define WM8960_SYSCLKDIV 		0
+#define WM8960_DACDIV			1
+#define WM8960_OPCLKDIV			2
+#define WM8960_DCLKDIV			3
+#define WM8960_TOCLKSEL			4
+#define WM8960_SYSCLKSEL		5
+
+#define WM8960_SYSCLK_DIV_1		(0 << 1)
+#define WM8960_SYSCLK_DIV_2		(2 << 1)
+
+#define WM8960_SYSCLK_MCLK		(0 << 0)
+#define WM8960_SYSCLK_PLL		(1 << 0)
+
+#define WM8960_DAC_DIV_1		(0 << 3)
+#define WM8960_DAC_DIV_1_5		(1 << 3)
+#define WM8960_DAC_DIV_2		(2 << 3)
+#define WM8960_DAC_DIV_3		(3 << 3)
+#define WM8960_DAC_DIV_4		(4 << 3)
+#define WM8960_DAC_DIV_5_5		(5 << 3)
+#define WM8960_DAC_DIV_6		(6 << 3)
+
+#define WM8960_DCLK_DIV_1_5		(0 << 6)
+#define WM8960_DCLK_DIV_2		(1 << 6)
+#define WM8960_DCLK_DIV_3		(2 << 6)
+#define WM8960_DCLK_DIV_4		(3 << 6)
+#define WM8960_DCLK_DIV_6		(4 << 6)
+#define WM8960_DCLK_DIV_8		(5 << 6)
+#define WM8960_DCLK_DIV_12		(6 << 6)
+#define WM8960_DCLK_DIV_16		(7 << 6)
+
+#define WM8960_TOCLK_F19		(0 << 1)
+#define WM8960_TOCLK_F21		(1 << 1)
+
+#define WM8960_OPCLK_DIV_1		(0 << 0)
+#define WM8960_OPCLK_DIV_2		(1 << 0)
+#define WM8960_OPCLK_DIV_3		(2 << 0)
+#define WM8960_OPCLK_DIV_4		(3 << 0)
+#define WM8960_OPCLK_DIV_5_5		(4 << 0)
+#define WM8960_OPCLK_DIV_6		(5 << 0)
+
+extern struct snd_soc_dai wm8960_dai;
+extern struct snd_soc_codec_device soc_codec_dev_wm8960;
+
+#define WM8960_DRES_400R 0
+#define WM8960_DRES_200R 1
+#define WM8960_DRES_600R 2
+#define WM8960_DRES_150R 3
+#define WM8960_DRES_MAX  3
+
+struct wm8960_data {
+	int dres;
+};
+
+#endif
diff --git a/sound/soc/codecs/wm8988.c b/sound/soc/codecs/wm8988.c
new file mode 100644
index 0000000..c05f718
--- /dev/null
+++ b/sound/soc/codecs/wm8988.c
@@ -0,0 +1,1097 @@
+/*
+ * wm8988.c -- WM8988 ALSA SoC audio driver
+ *
+ * Copyright 2009 Wolfson Microelectronics plc
+ * Copyright 2005 Openedhand Ltd.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/i2c.h>
+#include <linux/spi/spi.h>
+#include <linux/platform_device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/tlv.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/initval.h>
+
+#include "wm8988.h"
+
+/*
+ * wm8988 register cache
+ * We can't read the WM8988 register space when we
+ * are using 2 wire for device control, so we cache them instead.
+ */
+static const u16 wm8988_reg[] = {
+	0x0097, 0x0097, 0x0079, 0x0079,  /*  0 */
+	0x0000, 0x0008, 0x0000, 0x000a,  /*  4 */
+	0x0000, 0x0000, 0x00ff, 0x00ff,  /*  8 */
+	0x000f, 0x000f, 0x0000, 0x0000,  /* 12 */
+	0x0000, 0x007b, 0x0000, 0x0032,  /* 16 */
+	0x0000, 0x00c3, 0x00c3, 0x00c0,  /* 20 */
+	0x0000, 0x0000, 0x0000, 0x0000,  /* 24 */
+	0x0000, 0x0000, 0x0000, 0x0000,  /* 28 */
+	0x0000, 0x0000, 0x0050, 0x0050,  /* 32 */
+	0x0050, 0x0050, 0x0050, 0x0050,  /* 36 */
+	0x0079, 0x0079, 0x0079,          /* 40 */
+};
+
+/* codec private data */
+struct wm8988_priv {
+	unsigned int sysclk;
+	struct snd_soc_codec codec;
+	struct snd_pcm_hw_constraint_list *sysclk_constraints;
+	u16 reg_cache[WM8988_NUM_REG];
+};
+
+
+/*
+ * read wm8988 register cache
+ */
+static inline unsigned int wm8988_read_reg_cache(struct snd_soc_codec *codec,
+	unsigned int reg)
+{
+	u16 *cache = codec->reg_cache;
+	if (reg > WM8988_NUM_REG)
+		return -1;
+	return cache[reg];
+}
+
+/*
+ * write wm8988 register cache
+ */
+static inline void wm8988_write_reg_cache(struct snd_soc_codec *codec,
+	unsigned int reg, unsigned int value)
+{
+	u16 *cache = codec->reg_cache;
+	if (reg > WM8988_NUM_REG)
+		return;
+	cache[reg] = value;
+}
+
+static int wm8988_write(struct snd_soc_codec *codec, unsigned int reg,
+	unsigned int value)
+{
+	u8 data[2];
+
+	/* data is
+	 *   D15..D9 WM8753 register offset
+	 *   D8...D0 register data
+	 */
+	data[0] = (reg << 1) | ((value >> 8) & 0x0001);
+	data[1] = value & 0x00ff;
+
+	wm8988_write_reg_cache(codec, reg, value);
+	if (codec->hw_write(codec->control_data, data, 2) == 2)
+		return 0;
+	else
+		return -EIO;
+}
+
+#define wm8988_reset(c)	wm8988_write(c, WM8988_RESET, 0)
+
+/*
+ * WM8988 Controls
+ */
+
+static const char *bass_boost_txt[] = {"Linear Control", "Adaptive Boost"};
+static const struct soc_enum bass_boost =
+	SOC_ENUM_SINGLE(WM8988_BASS, 7, 2, bass_boost_txt);
+
+static const char *bass_filter_txt[] = { "130Hz @ 48kHz", "200Hz @ 48kHz" };
+static const struct soc_enum bass_filter =
+	SOC_ENUM_SINGLE(WM8988_BASS, 6, 2, bass_filter_txt);
+
+static const char *treble_txt[] = {"8kHz", "4kHz"};
+static const struct soc_enum treble =
+	SOC_ENUM_SINGLE(WM8988_TREBLE, 6, 2, treble_txt);
+
+static const char *stereo_3d_lc_txt[] = {"200Hz", "500Hz"};
+static const struct soc_enum stereo_3d_lc =
+	SOC_ENUM_SINGLE(WM8988_3D, 5, 2, stereo_3d_lc_txt);
+
+static const char *stereo_3d_uc_txt[] = {"2.2kHz", "1.5kHz"};
+static const struct soc_enum stereo_3d_uc =
+	SOC_ENUM_SINGLE(WM8988_3D, 6, 2, stereo_3d_uc_txt);
+
+static const char *stereo_3d_func_txt[] = {"Capture", "Playback"};
+static const struct soc_enum stereo_3d_func =
+	SOC_ENUM_SINGLE(WM8988_3D, 7, 2, stereo_3d_func_txt);
+
+static const char *alc_func_txt[] = {"Off", "Right", "Left", "Stereo"};
+static const struct soc_enum alc_func =
+	SOC_ENUM_SINGLE(WM8988_ALC1, 7, 4, alc_func_txt);
+
+static const char *ng_type_txt[] = {"Constant PGA Gain",
+				    "Mute ADC Output"};
+static const struct soc_enum ng_type =
+	SOC_ENUM_SINGLE(WM8988_NGATE, 1, 2, ng_type_txt);
+
+static const char *deemph_txt[] = {"None", "32Khz", "44.1Khz", "48Khz"};
+static const struct soc_enum deemph =
+	SOC_ENUM_SINGLE(WM8988_ADCDAC, 1, 4, deemph_txt);
+
+static const char *adcpol_txt[] = {"Normal", "L Invert", "R Invert",
+				   "L + R Invert"};
+static const struct soc_enum adcpol =
+	SOC_ENUM_SINGLE(WM8988_ADCDAC, 5, 4, adcpol_txt);
+
+static const DECLARE_TLV_DB_SCALE(pga_tlv, -1725, 75, 0);
+static const DECLARE_TLV_DB_SCALE(adc_tlv, -9750, 50, 1);
+static const DECLARE_TLV_DB_SCALE(dac_tlv, -12750, 50, 1);
+static const DECLARE_TLV_DB_SCALE(out_tlv, -12100, 100, 1);
+static const DECLARE_TLV_DB_SCALE(bypass_tlv, -1500, 300, 0);
+
+static const struct snd_kcontrol_new wm8988_snd_controls[] = {
+
+SOC_ENUM("Bass Boost", bass_boost),
+SOC_ENUM("Bass Filter", bass_filter),
+SOC_SINGLE("Bass Volume", WM8988_BASS, 0, 15, 1),
+
+SOC_SINGLE("Treble Volume", WM8988_TREBLE, 0, 15, 0),
+SOC_ENUM("Treble Cut-off", treble),
+
+SOC_SINGLE("3D Switch", WM8988_3D, 0, 1, 0),
+SOC_SINGLE("3D Volume", WM8988_3D, 1, 15, 0),
+SOC_ENUM("3D Lower Cut-off", stereo_3d_lc),
+SOC_ENUM("3D Upper Cut-off", stereo_3d_uc),
+SOC_ENUM("3D Mode", stereo_3d_func),
+
+SOC_SINGLE("ALC Capture Target Volume", WM8988_ALC1, 0, 7, 0),
+SOC_SINGLE("ALC Capture Max Volume", WM8988_ALC1, 4, 7, 0),
+SOC_ENUM("ALC Capture Function", alc_func),
+SOC_SINGLE("ALC Capture ZC Switch", WM8988_ALC2, 7, 1, 0),
+SOC_SINGLE("ALC Capture Hold Time", WM8988_ALC2, 0, 15, 0),
+SOC_SINGLE("ALC Capture Decay Time", WM8988_ALC3, 4, 15, 0),
+SOC_SINGLE("ALC Capture Attack Time", WM8988_ALC3, 0, 15, 0),
+SOC_SINGLE("ALC Capture NG Threshold", WM8988_NGATE, 3, 31, 0),
+SOC_ENUM("ALC Capture NG Type", ng_type),
+SOC_SINGLE("ALC Capture NG Switch", WM8988_NGATE, 0, 1, 0),
+
+SOC_SINGLE("ZC Timeout Switch", WM8988_ADCTL1, 0, 1, 0),
+
+SOC_DOUBLE_R_TLV("Capture Digital Volume", WM8988_LADC, WM8988_RADC,
+		 0, 255, 0, adc_tlv),
+SOC_DOUBLE_R_TLV("Capture Volume", WM8988_LINVOL, WM8988_RINVOL,
+		 0, 63, 0, pga_tlv),
+SOC_DOUBLE_R("Capture ZC Switch", WM8988_LINVOL, WM8988_RINVOL, 6, 1, 0),
+SOC_DOUBLE_R("Capture Switch", WM8988_LINVOL, WM8988_RINVOL, 7, 1, 1),
+
+SOC_ENUM("Playback De-emphasis", deemph),
+
+SOC_ENUM("Capture Polarity", adcpol),
+SOC_SINGLE("Playback 6dB Attenuate", WM8988_ADCDAC, 7, 1, 0),
+SOC_SINGLE("Capture 6dB Attenuate", WM8988_ADCDAC, 8, 1, 0),
+
+SOC_DOUBLE_R_TLV("PCM Volume", WM8988_LDAC, WM8988_RDAC, 0, 255, 0, dac_tlv),
+
+SOC_SINGLE_TLV("Left Mixer Left Bypass Volume", WM8988_LOUTM1, 4, 7, 1,
+	       bypass_tlv),
+SOC_SINGLE_TLV("Left Mixer Right Bypass Volume", WM8988_LOUTM2, 4, 7, 1,
+	       bypass_tlv),
+SOC_SINGLE_TLV("Right Mixer Left Bypass Volume", WM8988_ROUTM1, 4, 7, 1,
+	       bypass_tlv),
+SOC_SINGLE_TLV("Right Mixer Right Bypass Volume", WM8988_ROUTM2, 4, 7, 1,
+	       bypass_tlv),
+
+SOC_DOUBLE_R("Output 1 Playback ZC Switch", WM8988_LOUT1V,
+	     WM8988_ROUT1V, 7, 1, 0),
+SOC_DOUBLE_R_TLV("Output 1 Playback Volume", WM8988_LOUT1V, WM8988_ROUT1V,
+		 0, 127, 0, out_tlv),
+
+SOC_DOUBLE_R("Output 2 Playback ZC Switch", WM8988_LOUT2V,
+	     WM8988_ROUT2V, 7, 1, 0),
+SOC_DOUBLE_R_TLV("Output 2 Playback Volume", WM8988_LOUT2V, WM8988_ROUT2V,
+		 0, 127, 0, out_tlv),
+
+};
+
+/*
+ * DAPM Controls
+ */
+
+static int wm8988_lrc_control(struct snd_soc_dapm_widget *w,
+			      struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_codec *codec = w->codec;
+	u16 adctl2 = wm8988_read_reg_cache(codec, WM8988_ADCTL2);
+
+	/* Use the DAC to gate LRC if active, otherwise use ADC */
+	if (wm8988_read_reg_cache(codec, WM8988_PWR2) & 0x180)
+		adctl2 &= ~0x4;
+	else
+		adctl2 |= 0x4;
+
+	return wm8988_write(codec, WM8988_ADCTL2, adctl2);
+}
+
+static const char *wm8988_line_texts[] = {
+	"Line 1", "Line 2", "PGA", "Differential"};
+
+static const unsigned int wm8988_line_values[] = {
+	0, 1, 3, 4};
+
+static const struct soc_enum wm8988_lline_enum =
+	SOC_VALUE_ENUM_SINGLE(WM8988_LOUTM1, 0, 7,
+			      ARRAY_SIZE(wm8988_line_texts),
+			      wm8988_line_texts,
+			      wm8988_line_values);
+static const struct snd_kcontrol_new wm8988_left_line_controls =
+	SOC_DAPM_VALUE_ENUM("Route", wm8988_lline_enum);
+
+static const struct soc_enum wm8988_rline_enum =
+	SOC_VALUE_ENUM_SINGLE(WM8988_ROUTM1, 0, 7,
+			      ARRAY_SIZE(wm8988_line_texts),
+			      wm8988_line_texts,
+			      wm8988_line_values);
+static const struct snd_kcontrol_new wm8988_right_line_controls =
+	SOC_DAPM_VALUE_ENUM("Route", wm8988_lline_enum);
+
+/* Left Mixer */
+static const struct snd_kcontrol_new wm8988_left_mixer_controls[] = {
+	SOC_DAPM_SINGLE("Playback Switch", WM8988_LOUTM1, 8, 1, 0),
+	SOC_DAPM_SINGLE("Left Bypass Switch", WM8988_LOUTM1, 7, 1, 0),
+	SOC_DAPM_SINGLE("Right Playback Switch", WM8988_LOUTM2, 8, 1, 0),
+	SOC_DAPM_SINGLE("Right Bypass Switch", WM8988_LOUTM2, 7, 1, 0),
+};
+
+/* Right Mixer */
+static const struct snd_kcontrol_new wm8988_right_mixer_controls[] = {
+	SOC_DAPM_SINGLE("Left Playback Switch", WM8988_ROUTM1, 8, 1, 0),
+	SOC_DAPM_SINGLE("Left Bypass Switch", WM8988_ROUTM1, 7, 1, 0),
+	SOC_DAPM_SINGLE("Playback Switch", WM8988_ROUTM2, 8, 1, 0),
+	SOC_DAPM_SINGLE("Right Bypass Switch", WM8988_ROUTM2, 7, 1, 0),
+};
+
+static const char *wm8988_pga_sel[] = {"Line 1", "Line 2", "Differential"};
+static const unsigned int wm8988_pga_val[] = { 0, 1, 3 };
+
+/* Left PGA Mux */
+static const struct soc_enum wm8988_lpga_enum =
+	SOC_VALUE_ENUM_SINGLE(WM8988_LADCIN, 6, 3,
+			      ARRAY_SIZE(wm8988_pga_sel),
+			      wm8988_pga_sel,
+			      wm8988_pga_val);
+static const struct snd_kcontrol_new wm8988_left_pga_controls =
+	SOC_DAPM_VALUE_ENUM("Route", wm8988_lpga_enum);
+
+/* Right PGA Mux */
+static const struct soc_enum wm8988_rpga_enum =
+	SOC_VALUE_ENUM_SINGLE(WM8988_RADCIN, 6, 3,
+			      ARRAY_SIZE(wm8988_pga_sel),
+			      wm8988_pga_sel,
+			      wm8988_pga_val);
+static const struct snd_kcontrol_new wm8988_right_pga_controls =
+	SOC_DAPM_VALUE_ENUM("Route", wm8988_rpga_enum);
+
+/* Differential Mux */
+static const char *wm8988_diff_sel[] = {"Line 1", "Line 2"};
+static const struct soc_enum diffmux =
+	SOC_ENUM_SINGLE(WM8988_ADCIN, 8, 2, wm8988_diff_sel);
+static const struct snd_kcontrol_new wm8988_diffmux_controls =
+	SOC_DAPM_ENUM("Route", diffmux);
+
+/* Mono ADC Mux */
+static const char *wm8988_mono_mux[] = {"Stereo", "Mono (Left)",
+	"Mono (Right)", "Digital Mono"};
+static const struct soc_enum monomux =
+	SOC_ENUM_SINGLE(WM8988_ADCIN, 6, 4, wm8988_mono_mux);
+static const struct snd_kcontrol_new wm8988_monomux_controls =
+	SOC_DAPM_ENUM("Route", monomux);
+
+static const struct snd_soc_dapm_widget wm8988_dapm_widgets[] = {
+	SND_SOC_DAPM_MICBIAS("Mic Bias", WM8988_PWR1, 1, 0),
+
+	SND_SOC_DAPM_MUX("Differential Mux", SND_SOC_NOPM, 0, 0,
+		&wm8988_diffmux_controls),
+	SND_SOC_DAPM_MUX("Left ADC Mux", SND_SOC_NOPM, 0, 0,
+		&wm8988_monomux_controls),
+	SND_SOC_DAPM_MUX("Right ADC Mux", SND_SOC_NOPM, 0, 0,
+		&wm8988_monomux_controls),
+
+	SND_SOC_DAPM_MUX("Left PGA Mux", WM8988_PWR1, 5, 0,
+		&wm8988_left_pga_controls),
+	SND_SOC_DAPM_MUX("Right PGA Mux", WM8988_PWR1, 4, 0,
+		&wm8988_right_pga_controls),
+
+	SND_SOC_DAPM_MUX("Left Line Mux", SND_SOC_NOPM, 0, 0,
+		&wm8988_left_line_controls),
+	SND_SOC_DAPM_MUX("Right Line Mux", SND_SOC_NOPM, 0, 0,
+		&wm8988_right_line_controls),
+
+	SND_SOC_DAPM_ADC("Right ADC", "Right Capture", WM8988_PWR1, 2, 0),
+	SND_SOC_DAPM_ADC("Left ADC", "Left Capture", WM8988_PWR1, 3, 0),
+
+	SND_SOC_DAPM_DAC("Right DAC", "Right Playback", WM8988_PWR2, 7, 0),
+	SND_SOC_DAPM_DAC("Left DAC", "Left Playback", WM8988_PWR2, 8, 0),
+
+	SND_SOC_DAPM_MIXER("Left Mixer", SND_SOC_NOPM, 0, 0,
+		&wm8988_left_mixer_controls[0],
+		ARRAY_SIZE(wm8988_left_mixer_controls)),
+	SND_SOC_DAPM_MIXER("Right Mixer", SND_SOC_NOPM, 0, 0,
+		&wm8988_right_mixer_controls[0],
+		ARRAY_SIZE(wm8988_right_mixer_controls)),
+
+	SND_SOC_DAPM_PGA("Right Out 2", WM8988_PWR2, 3, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("Left Out 2", WM8988_PWR2, 4, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("Right Out 1", WM8988_PWR2, 5, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("Left Out 1", WM8988_PWR2, 6, 0, NULL, 0),
+
+	SND_SOC_DAPM_POST("LRC control", wm8988_lrc_control),
+
+	SND_SOC_DAPM_OUTPUT("LOUT1"),
+	SND_SOC_DAPM_OUTPUT("ROUT1"),
+	SND_SOC_DAPM_OUTPUT("LOUT2"),
+	SND_SOC_DAPM_OUTPUT("ROUT2"),
+	SND_SOC_DAPM_OUTPUT("VREF"),
+
+	SND_SOC_DAPM_INPUT("LINPUT1"),
+	SND_SOC_DAPM_INPUT("LINPUT2"),
+	SND_SOC_DAPM_INPUT("RINPUT1"),
+	SND_SOC_DAPM_INPUT("RINPUT2"),
+};
+
+static const struct snd_soc_dapm_route audio_map[] = {
+
+	{ "Left Line Mux", "Line 1", "LINPUT1" },
+	{ "Left Line Mux", "Line 2", "LINPUT2" },
+	{ "Left Line Mux", "PGA", "Left PGA Mux" },
+	{ "Left Line Mux", "Differential", "Differential Mux" },
+
+	{ "Right Line Mux", "Line 1", "RINPUT1" },
+	{ "Right Line Mux", "Line 2", "RINPUT2" },
+	{ "Right Line Mux", "PGA", "Right PGA Mux" },
+	{ "Right Line Mux", "Differential", "Differential Mux" },
+
+	{ "Left PGA Mux", "Line 1", "LINPUT1" },
+	{ "Left PGA Mux", "Line 2", "LINPUT2" },
+	{ "Left PGA Mux", "Differential", "Differential Mux" },
+
+	{ "Right PGA Mux", "Line 1", "RINPUT1" },
+	{ "Right PGA Mux", "Line 2", "RINPUT2" },
+	{ "Right PGA Mux", "Differential", "Differential Mux" },
+
+	{ "Differential Mux", "Line 1", "LINPUT1" },
+	{ "Differential Mux", "Line 1", "RINPUT1" },
+	{ "Differential Mux", "Line 2", "LINPUT2" },
+	{ "Differential Mux", "Line 2", "RINPUT2" },
+
+	{ "Left ADC Mux", "Stereo", "Left PGA Mux" },
+	{ "Left ADC Mux", "Mono (Left)", "Left PGA Mux" },
+	{ "Left ADC Mux", "Digital Mono", "Left PGA Mux" },
+
+	{ "Right ADC Mux", "Stereo", "Right PGA Mux" },
+	{ "Right ADC Mux", "Mono (Right)", "Right PGA Mux" },
+	{ "Right ADC Mux", "Digital Mono", "Right PGA Mux" },
+
+	{ "Left ADC", NULL, "Left ADC Mux" },
+	{ "Right ADC", NULL, "Right ADC Mux" },
+
+	{ "Left Line Mux", "Line 1", "LINPUT1" },
+	{ "Left Line Mux", "Line 2", "LINPUT2" },
+	{ "Left Line Mux", "PGA", "Left PGA Mux" },
+	{ "Left Line Mux", "Differential", "Differential Mux" },
+
+	{ "Right Line Mux", "Line 1", "RINPUT1" },
+	{ "Right Line Mux", "Line 2", "RINPUT2" },
+	{ "Right Line Mux", "PGA", "Right PGA Mux" },
+	{ "Right Line Mux", "Differential", "Differential Mux" },
+
+	{ "Left Mixer", "Playback Switch", "Left DAC" },
+	{ "Left Mixer", "Left Bypass Switch", "Left Line Mux" },
+	{ "Left Mixer", "Right Playback Switch", "Right DAC" },
+	{ "Left Mixer", "Right Bypass Switch", "Right Line Mux" },
+
+	{ "Right Mixer", "Left Playback Switch", "Left DAC" },
+	{ "Right Mixer", "Left Bypass Switch", "Left Line Mux" },
+	{ "Right Mixer", "Playback Switch", "Right DAC" },
+	{ "Right Mixer", "Right Bypass Switch", "Right Line Mux" },
+
+	{ "Left Out 1", NULL, "Left Mixer" },
+	{ "LOUT1", NULL, "Left Out 1" },
+	{ "Right Out 1", NULL, "Right Mixer" },
+	{ "ROUT1", NULL, "Right Out 1" },
+
+	{ "Left Out 2", NULL, "Left Mixer" },
+	{ "LOUT2", NULL, "Left Out 2" },
+	{ "Right Out 2", NULL, "Right Mixer" },
+	{ "ROUT2", NULL, "Right Out 2" },
+};
+
+struct _coeff_div {
+	u32 mclk;
+	u32 rate;
+	u16 fs;
+	u8 sr:5;
+	u8 usb:1;
+};
+
+/* codec hifi mclk clock divider coefficients */
+static const struct _coeff_div coeff_div[] = {
+	/* 8k */
+	{12288000, 8000, 1536, 0x6, 0x0},
+	{11289600, 8000, 1408, 0x16, 0x0},
+	{18432000, 8000, 2304, 0x7, 0x0},
+	{16934400, 8000, 2112, 0x17, 0x0},
+	{12000000, 8000, 1500, 0x6, 0x1},
+
+	/* 11.025k */
+	{11289600, 11025, 1024, 0x18, 0x0},
+	{16934400, 11025, 1536, 0x19, 0x0},
+	{12000000, 11025, 1088, 0x19, 0x1},
+
+	/* 16k */
+	{12288000, 16000, 768, 0xa, 0x0},
+	{18432000, 16000, 1152, 0xb, 0x0},
+	{12000000, 16000, 750, 0xa, 0x1},
+
+	/* 22.05k */
+	{11289600, 22050, 512, 0x1a, 0x0},
+	{16934400, 22050, 768, 0x1b, 0x0},
+	{12000000, 22050, 544, 0x1b, 0x1},
+
+	/* 32k */
+	{12288000, 32000, 384, 0xc, 0x0},
+	{18432000, 32000, 576, 0xd, 0x0},
+	{12000000, 32000, 375, 0xa, 0x1},
+
+	/* 44.1k */
+	{11289600, 44100, 256, 0x10, 0x0},
+	{16934400, 44100, 384, 0x11, 0x0},
+	{12000000, 44100, 272, 0x11, 0x1},
+
+	/* 48k */
+	{12288000, 48000, 256, 0x0, 0x0},
+	{18432000, 48000, 384, 0x1, 0x0},
+	{12000000, 48000, 250, 0x0, 0x1},
+
+	/* 88.2k */
+	{11289600, 88200, 128, 0x1e, 0x0},
+	{16934400, 88200, 192, 0x1f, 0x0},
+	{12000000, 88200, 136, 0x1f, 0x1},
+
+	/* 96k */
+	{12288000, 96000, 128, 0xe, 0x0},
+	{18432000, 96000, 192, 0xf, 0x0},
+	{12000000, 96000, 125, 0xe, 0x1},
+};
+
+static inline int get_coeff(int mclk, int rate)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(coeff_div); i++) {
+		if (coeff_div[i].rate == rate && coeff_div[i].mclk == mclk)
+			return i;
+	}
+
+	return -EINVAL;
+}
+
+/* The set of rates we can generate from the above for each SYSCLK */
+
+static unsigned int rates_12288[] = {
+	8000, 12000, 16000, 24000, 24000, 32000, 48000, 96000,
+};
+
+static struct snd_pcm_hw_constraint_list constraints_12288 = {
+	.count	= ARRAY_SIZE(rates_12288),
+	.list	= rates_12288,
+};
+
+static unsigned int rates_112896[] = {
+	8000, 11025, 22050, 44100,
+};
+
+static struct snd_pcm_hw_constraint_list constraints_112896 = {
+	.count	= ARRAY_SIZE(rates_112896),
+	.list	= rates_112896,
+};
+
+static unsigned int rates_12[] = {
+	8000, 11025, 12000, 16000, 22050, 2400, 32000, 41100, 48000,
+	48000, 88235, 96000,
+};
+
+static struct snd_pcm_hw_constraint_list constraints_12 = {
+	.count	= ARRAY_SIZE(rates_12),
+	.list	= rates_12,
+};
+
+/*
+ * Note that this should be called from init rather than from hw_params.
+ */
+static int wm8988_set_dai_sysclk(struct snd_soc_dai *codec_dai,
+		int clk_id, unsigned int freq, int dir)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	struct wm8988_priv *wm8988 = codec->private_data;
+
+	switch (freq) {
+	case 11289600:
+	case 18432000:
+	case 22579200:
+	case 36864000:
+		wm8988->sysclk_constraints = &constraints_112896;
+		wm8988->sysclk = freq;
+		return 0;
+
+	case 12288000:
+	case 16934400:
+	case 24576000:
+	case 33868800:
+		wm8988->sysclk_constraints = &constraints_12288;
+		wm8988->sysclk = freq;
+		return 0;
+
+	case 12000000:
+	case 24000000:
+		wm8988->sysclk_constraints = &constraints_12;
+		wm8988->sysclk = freq;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int wm8988_set_dai_fmt(struct snd_soc_dai *codec_dai,
+		unsigned int fmt)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 iface = 0;
+
+	/* set master/slave audio interface */
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		iface = 0x0040;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* interface format */
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		iface |= 0x0002;
+		break;
+	case SND_SOC_DAIFMT_RIGHT_J:
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		iface |= 0x0001;
+		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		iface |= 0x0003;
+		break;
+	case SND_SOC_DAIFMT_DSP_B:
+		iface |= 0x0013;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* clock inversion */
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_NF:
+		break;
+	case SND_SOC_DAIFMT_IB_IF:
+		iface |= 0x0090;
+		break;
+	case SND_SOC_DAIFMT_IB_NF:
+		iface |= 0x0080;
+		break;
+	case SND_SOC_DAIFMT_NB_IF:
+		iface |= 0x0010;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	wm8988_write(codec, WM8988_IFACE, iface);
+	return 0;
+}
+
+static int wm8988_pcm_startup(struct snd_pcm_substream *substream,
+			      struct snd_soc_dai *dai)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	struct wm8988_priv *wm8988 = codec->private_data;
+
+	/* The set of sample rates that can be supported depends on the
+	 * MCLK supplied to the CODEC - enforce this.
+	 */
+	if (!wm8988->sysclk) {
+		dev_err(codec->dev,
+			"No MCLK configured, call set_sysclk() on init\n");
+		return -EINVAL;
+	}
+
+	snd_pcm_hw_constraint_list(substream->runtime, 0,
+				   SNDRV_PCM_HW_PARAM_RATE,
+				   wm8988->sysclk_constraints);
+
+	return 0;
+}
+
+static int wm8988_pcm_hw_params(struct snd_pcm_substream *substream,
+				struct snd_pcm_hw_params *params,
+				struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_codec *codec = socdev->card->codec;
+	struct wm8988_priv *wm8988 = codec->private_data;
+	u16 iface = wm8988_read_reg_cache(codec, WM8988_IFACE) & 0x1f3;
+	u16 srate = wm8988_read_reg_cache(codec, WM8988_SRATE) & 0x180;
+	int coeff;
+
+	coeff = get_coeff(wm8988->sysclk, params_rate(params));
+	if (coeff < 0) {
+		coeff = get_coeff(wm8988->sysclk / 2, params_rate(params));
+		srate |= 0x40;
+	}
+	if (coeff < 0) {
+		dev_err(codec->dev,
+			"Unable to configure sample rate %dHz with %dHz MCLK\n",
+			params_rate(params), wm8988->sysclk);
+		return coeff;
+	}
+
+	/* bit size */
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		break;
+	case SNDRV_PCM_FORMAT_S20_3LE:
+		iface |= 0x0004;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		iface |= 0x0008;
+		break;
+	case SNDRV_PCM_FORMAT_S32_LE:
+		iface |= 0x000c;
+		break;
+	}
+
+	/* set iface & srate */
+	wm8988_write(codec, WM8988_IFACE, iface);
+	if (coeff >= 0)
+		wm8988_write(codec, WM8988_SRATE, srate |
+			(coeff_div[coeff].sr << 1) | coeff_div[coeff].usb);
+
+	return 0;
+}
+
+static int wm8988_mute(struct snd_soc_dai *dai, int mute)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	u16 mute_reg = wm8988_read_reg_cache(codec, WM8988_ADCDAC) & 0xfff7;
+
+	if (mute)
+		wm8988_write(codec, WM8988_ADCDAC, mute_reg | 0x8);
+	else
+		wm8988_write(codec, WM8988_ADCDAC, mute_reg);
+	return 0;
+}
+
+static int wm8988_set_bias_level(struct snd_soc_codec *codec,
+				 enum snd_soc_bias_level level)
+{
+	u16 pwr_reg = wm8988_read_reg_cache(codec, WM8988_PWR1) & ~0x1c1;
+
+	switch (level) {
+	case SND_SOC_BIAS_ON:
+		break;
+
+	case SND_SOC_BIAS_PREPARE:
+		/* VREF, VMID=2x50k, digital enabled */
+		wm8988_write(codec, WM8988_PWR1, pwr_reg | 0x00c0);
+		break;
+
+	case SND_SOC_BIAS_STANDBY:
+		if (codec->bias_level == SND_SOC_BIAS_OFF) {
+			/* VREF, VMID=2x5k */
+			wm8988_write(codec, WM8988_PWR1, pwr_reg | 0x1c1);
+
+			/* Charge caps */
+			msleep(100);
+		}
+
+		/* VREF, VMID=2*500k, digital stopped */
+		wm8988_write(codec, WM8988_PWR1, pwr_reg | 0x0141);
+		break;
+
+	case SND_SOC_BIAS_OFF:
+		wm8988_write(codec, WM8988_PWR1, 0x0000);
+		break;
+	}
+	codec->bias_level = level;
+	return 0;
+}
+
+#define WM8988_RATES SNDRV_PCM_RATE_8000_96000
+
+#define WM8988_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE |\
+	SNDRV_PCM_FMTBIT_S24_LE)
+
+static struct snd_soc_dai_ops wm8988_ops = {
+	.startup = wm8988_pcm_startup,
+	.hw_params = wm8988_pcm_hw_params,
+	.set_fmt = wm8988_set_dai_fmt,
+	.set_sysclk = wm8988_set_dai_sysclk,
+	.digital_mute = wm8988_mute,
+};
+
+struct snd_soc_dai wm8988_dai = {
+	.name = "WM8988",
+	.playback = {
+		.stream_name = "Playback",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = WM8988_RATES,
+		.formats = WM8988_FORMATS,
+	},
+	.capture = {
+		.stream_name = "Capture",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = WM8988_RATES,
+		.formats = WM8988_FORMATS,
+	 },
+	.ops = &wm8988_ops,
+	.symmetric_rates = 1,
+};
+EXPORT_SYMBOL_GPL(wm8988_dai);
+
+static int wm8988_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+
+	wm8988_set_bias_level(codec, SND_SOC_BIAS_OFF);
+	return 0;
+}
+
+static int wm8988_resume(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+	int i;
+	u8 data[2];
+	u16 *cache = codec->reg_cache;
+
+	/* Sync reg_cache with the hardware */
+	for (i = 0; i < WM8988_NUM_REG; i++) {
+		if (i == WM8988_RESET)
+			continue;
+		data[0] = (i << 1) | ((cache[i] >> 8) & 0x0001);
+		data[1] = cache[i] & 0x00ff;
+		codec->hw_write(codec->control_data, data, 2);
+	}
+
+	wm8988_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	return 0;
+}
+
+static struct snd_soc_codec *wm8988_codec;
+
+static int wm8988_probe(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec;
+	int ret = 0;
+
+	if (wm8988_codec == NULL) {
+		dev_err(&pdev->dev, "Codec device not registered\n");
+		return -ENODEV;
+	}
+
+	socdev->card->codec = wm8988_codec;
+	codec = wm8988_codec;
+
+	/* register pcms */
+	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to create pcms: %d\n", ret);
+		goto pcm_err;
+	}
+
+	snd_soc_add_controls(codec, wm8988_snd_controls,
+				ARRAY_SIZE(wm8988_snd_controls));
+	snd_soc_dapm_new_controls(codec, wm8988_dapm_widgets,
+				  ARRAY_SIZE(wm8988_dapm_widgets));
+	snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map));
+	snd_soc_dapm_new_widgets(codec);
+
+	ret = snd_soc_init_card(socdev);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to register card: %d\n", ret);
+		goto card_err;
+	}
+
+	return ret;
+
+card_err:
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+pcm_err:
+	return ret;
+}
+
+static int wm8988_remove(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+
+	return 0;
+}
+
+struct snd_soc_codec_device soc_codec_dev_wm8988 = {
+	.probe = 	wm8988_probe,
+	.remove = 	wm8988_remove,
+	.suspend = 	wm8988_suspend,
+	.resume =	wm8988_resume,
+};
+EXPORT_SYMBOL_GPL(soc_codec_dev_wm8988);
+
+static int wm8988_register(struct wm8988_priv *wm8988)
+{
+	struct snd_soc_codec *codec = &wm8988->codec;
+	int ret;
+	u16 reg;
+
+	if (wm8988_codec) {
+		dev_err(codec->dev, "Another WM8988 is registered\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	mutex_init(&codec->mutex);
+	INIT_LIST_HEAD(&codec->dapm_widgets);
+	INIT_LIST_HEAD(&codec->dapm_paths);
+
+	codec->private_data = wm8988;
+	codec->name = "WM8988";
+	codec->owner = THIS_MODULE;
+	codec->read = wm8988_read_reg_cache;
+	codec->write = wm8988_write;
+	codec->dai = &wm8988_dai;
+	codec->num_dai = 1;
+	codec->reg_cache_size = ARRAY_SIZE(wm8988->reg_cache);
+	codec->reg_cache = &wm8988->reg_cache;
+	codec->bias_level = SND_SOC_BIAS_OFF;
+	codec->set_bias_level = wm8988_set_bias_level;
+
+	memcpy(codec->reg_cache, wm8988_reg,
+	       sizeof(wm8988_reg));
+
+	ret = wm8988_reset(codec);
+	if (ret < 0) {
+		dev_err(codec->dev, "Failed to issue reset\n");
+		return ret;
+	}
+
+	/* set the update bits (we always update left then right) */
+	reg = wm8988_read_reg_cache(codec, WM8988_RADC);
+	wm8988_write(codec, WM8988_RADC, reg | 0x100);
+	reg = wm8988_read_reg_cache(codec, WM8988_RDAC);
+	wm8988_write(codec, WM8988_RDAC, reg | 0x0100);
+	reg = wm8988_read_reg_cache(codec, WM8988_ROUT1V);
+	wm8988_write(codec, WM8988_ROUT1V, reg | 0x0100);
+	reg = wm8988_read_reg_cache(codec, WM8988_ROUT2V);
+	wm8988_write(codec, WM8988_ROUT2V, reg | 0x0100);
+	reg = wm8988_read_reg_cache(codec, WM8988_RINVOL);
+	wm8988_write(codec, WM8988_RINVOL, reg | 0x0100);
+
+	wm8988_set_bias_level(&wm8988->codec, SND_SOC_BIAS_STANDBY);
+
+	wm8988_dai.dev = codec->dev;
+
+	wm8988_codec = codec;
+
+	ret = snd_soc_register_codec(codec);
+	if (ret != 0) {
+		dev_err(codec->dev, "Failed to register codec: %d\n", ret);
+		return ret;
+	}
+
+	ret = snd_soc_register_dai(&wm8988_dai);
+	if (ret != 0) {
+		dev_err(codec->dev, "Failed to register DAI: %d\n", ret);
+		snd_soc_unregister_codec(codec);
+		return ret;
+	}
+
+	return 0;
+
+err:
+	kfree(wm8988);
+	return ret;
+}
+
+static void wm8988_unregister(struct wm8988_priv *wm8988)
+{
+	wm8988_set_bias_level(&wm8988->codec, SND_SOC_BIAS_OFF);
+	snd_soc_unregister_dai(&wm8988_dai);
+	snd_soc_unregister_codec(&wm8988->codec);
+	kfree(wm8988);
+	wm8988_codec = NULL;
+}
+
+#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
+static int wm8988_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct wm8988_priv *wm8988;
+	struct snd_soc_codec *codec;
+
+	wm8988 = kzalloc(sizeof(struct wm8988_priv), GFP_KERNEL);
+	if (wm8988 == NULL)
+		return -ENOMEM;
+
+	codec = &wm8988->codec;
+	codec->hw_write = (hw_write_t)i2c_master_send;
+
+	i2c_set_clientdata(i2c, wm8988);
+	codec->control_data = i2c;
+
+	codec->dev = &i2c->dev;
+
+	return wm8988_register(wm8988);
+}
+
+static int wm8988_i2c_remove(struct i2c_client *client)
+{
+	struct wm8988_priv *wm8988 = i2c_get_clientdata(client);
+	wm8988_unregister(wm8988);
+	return 0;
+}
+
+static const struct i2c_device_id wm8988_i2c_id[] = {
+	{ "wm8988", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, wm8988_i2c_id);
+
+static struct i2c_driver wm8988_i2c_driver = {
+	.driver = {
+		.name = "WM8988",
+		.owner = THIS_MODULE,
+	},
+	.probe = wm8988_i2c_probe,
+	.remove = wm8988_i2c_remove,
+	.id_table = wm8988_i2c_id,
+};
+#endif
+
+#if defined(CONFIG_SPI_MASTER)
+static int wm8988_spi_write(struct spi_device *spi, const char *data, int len)
+{
+	struct spi_transfer t;
+	struct spi_message m;
+	u8 msg[2];
+
+	if (len <= 0)
+		return 0;
+
+	msg[0] = data[0];
+	msg[1] = data[1];
+
+	spi_message_init(&m);
+	memset(&t, 0, (sizeof t));
+
+	t.tx_buf = &msg[0];
+	t.len = len;
+
+	spi_message_add_tail(&t, &m);
+	spi_sync(spi, &m);
+
+	return len;
+}
+
+static int __devinit wm8988_spi_probe(struct spi_device *spi)
+{
+	struct wm8988_priv *wm8988;
+	struct snd_soc_codec *codec;
+
+	wm8988 = kzalloc(sizeof(struct wm8988_priv), GFP_KERNEL);
+	if (wm8988 == NULL)
+		return -ENOMEM;
+
+	codec = &wm8988->codec;
+	codec->hw_write = (hw_write_t)wm8988_spi_write;
+	codec->control_data = spi;
+	codec->dev = &spi->dev;
+
+	spi->dev.driver_data = wm8988;
+
+	return wm8988_register(wm8988);
+}
+
+static int __devexit wm8988_spi_remove(struct spi_device *spi)
+{
+	struct wm8988_priv *wm8988 = spi->dev.driver_data;
+
+	wm8988_unregister(wm8988);
+
+	return 0;
+}
+
+static struct spi_driver wm8988_spi_driver = {
+	.driver = {
+		.name	= "wm8988",
+		.bus	= &spi_bus_type,
+		.owner	= THIS_MODULE,
+	},
+	.probe		= wm8988_spi_probe,
+	.remove		= __devexit_p(wm8988_spi_remove),
+};
+#endif
+
+static int __init wm8988_modinit(void)
+{
+	int ret;
+
+#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
+	ret = i2c_add_driver(&wm8988_i2c_driver);
+	if (ret != 0)
+		pr_err("WM8988: Unable to register I2C driver: %d\n", ret);
+#endif
+#if defined(CONFIG_SPI_MASTER)
+	ret = spi_register_driver(&wm8988_spi_driver);
+	if (ret != 0)
+		pr_err("WM8988: Unable to register SPI driver: %d\n", ret);
+#endif
+	return ret;
+}
+module_init(wm8988_modinit);
+
+static void __exit wm8988_exit(void)
+{
+#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
+	i2c_del_driver(&wm8988_i2c_driver);
+#endif
+#if defined(CONFIG_SPI_MASTER)
+	spi_unregister_driver(&wm8988_spi_driver);
+#endif
+}
+module_exit(wm8988_exit);
+
+
+MODULE_DESCRIPTION("ASoC WM8988 driver");
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/wm8988.h b/sound/soc/codecs/wm8988.h
new file mode 100644
index 0000000..4552d37
--- /dev/null
+++ b/sound/soc/codecs/wm8988.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2005 Openedhand Ltd.
+ *
+ * Author: Richard Purdie <richard@openedhand.com>
+ *
+ * Based on WM8753.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef _WM8988_H
+#define _WM8988_H
+
+/* WM8988 register space */
+
+#define WM8988_LINVOL    0x00
+#define WM8988_RINVOL    0x01
+#define WM8988_LOUT1V    0x02
+#define WM8988_ROUT1V    0x03
+#define WM8988_ADCDAC    0x05
+#define WM8988_IFACE     0x07
+#define WM8988_SRATE     0x08
+#define WM8988_LDAC      0x0a
+#define WM8988_RDAC      0x0b
+#define WM8988_BASS      0x0c
+#define WM8988_TREBLE    0x0d
+#define WM8988_RESET     0x0f
+#define WM8988_3D        0x10
+#define WM8988_ALC1      0x11
+#define WM8988_ALC2      0x12
+#define WM8988_ALC3      0x13
+#define WM8988_NGATE     0x14
+#define WM8988_LADC      0x15
+#define WM8988_RADC      0x16
+#define WM8988_ADCTL1    0x17
+#define WM8988_ADCTL2    0x18
+#define WM8988_PWR1      0x19
+#define WM8988_PWR2      0x1a
+#define WM8988_ADCTL3    0x1b
+#define WM8988_ADCIN     0x1f
+#define WM8988_LADCIN    0x20
+#define WM8988_RADCIN    0x21
+#define WM8988_LOUTM1    0x22
+#define WM8988_LOUTM2    0x23
+#define WM8988_ROUTM1    0x24
+#define WM8988_ROUTM2    0x25
+#define WM8988_LOUT2V    0x28
+#define WM8988_ROUT2V    0x29
+#define WM8988_LPPB      0x43
+#define WM8988_NUM_REG   0x44
+
+#define WM8988_SYSCLK	0
+
+extern struct snd_soc_dai wm8988_dai;
+extern struct snd_soc_codec_device soc_codec_dev_wm8988;
+
+#endif
diff --git a/sound/soc/codecs/wm8990.c b/sound/soc/codecs/wm8990.c
index 40cd274..d029818 100644
--- a/sound/soc/codecs/wm8990.c
+++ b/sound/soc/codecs/wm8990.c
@@ -998,7 +998,7 @@
 
 	if ((Ndiv < 6) || (Ndiv > 12))
 		printk(KERN_WARNING
-		"WM8990 N value outwith recommended range! N = %d\n", Ndiv);
+		"WM8990 N value outwith recommended range! N = %u\n", Ndiv);
 
 	pll_div->n = Ndiv;
 	Nmod = target % source;
diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c
new file mode 100644
index 0000000..86fc57e
--- /dev/null
+++ b/sound/soc/codecs/wm9081.c
@@ -0,0 +1,1534 @@
+/*
+ * wm9081.c  --  WM9081 ALSA SoC Audio driver
+ *
+ * Author: Mark Brown
+ *
+ * Copyright 2009 Wolfson Microelectronics plc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/initval.h>
+#include <sound/tlv.h>
+
+#include <sound/wm9081.h>
+#include "wm9081.h"
+
+static u16 wm9081_reg_defaults[] = {
+	0x0000,     /* R0  - Software Reset */
+	0x0000,     /* R1 */
+	0x00B9,     /* R2  - Analogue Lineout */
+	0x00B9,     /* R3  - Analogue Speaker PGA */
+	0x0001,     /* R4  - VMID Control */
+	0x0068,     /* R5  - Bias Control 1 */
+	0x0000,     /* R6 */
+	0x0000,     /* R7  - Analogue Mixer */
+	0x0000,     /* R8  - Anti Pop Control */
+	0x01DB,     /* R9  - Analogue Speaker 1 */
+	0x0018,     /* R10 - Analogue Speaker 2 */
+	0x0180,     /* R11 - Power Management */
+	0x0000,     /* R12 - Clock Control 1 */
+	0x0038,     /* R13 - Clock Control 2 */
+	0x4000,     /* R14 - Clock Control 3 */
+	0x0000,     /* R15 */
+	0x0000,     /* R16 - FLL Control 1 */
+	0x0200,     /* R17 - FLL Control 2 */
+	0x0000,     /* R18 - FLL Control 3 */
+	0x0204,     /* R19 - FLL Control 4 */
+	0x0000,     /* R20 - FLL Control 5 */
+	0x0000,     /* R21 */
+	0x0000,     /* R22 - Audio Interface 1 */
+	0x0002,     /* R23 - Audio Interface 2 */
+	0x0008,     /* R24 - Audio Interface 3 */
+	0x0022,     /* R25 - Audio Interface 4 */
+	0x0000,     /* R26 - Interrupt Status */
+	0x0006,     /* R27 - Interrupt Status Mask */
+	0x0000,     /* R28 - Interrupt Polarity */
+	0x0000,     /* R29 - Interrupt Control */
+	0x00C0,     /* R30 - DAC Digital 1 */
+	0x0008,     /* R31 - DAC Digital 2 */
+	0x09AF,     /* R32 - DRC 1 */
+	0x4201,     /* R33 - DRC 2 */
+	0x0000,     /* R34 - DRC 3 */
+	0x0000,     /* R35 - DRC 4 */
+	0x0000,     /* R36 */
+	0x0000,     /* R37 */
+	0x0000,     /* R38 - Write Sequencer 1 */
+	0x0000,     /* R39 - Write Sequencer 2 */
+	0x0002,     /* R40 - MW Slave 1 */
+	0x0000,     /* R41 */
+	0x0000,     /* R42 - EQ 1 */
+	0x0000,     /* R43 - EQ 2 */
+	0x0FCA,     /* R44 - EQ 3 */
+	0x0400,     /* R45 - EQ 4 */
+	0x00B8,     /* R46 - EQ 5 */
+	0x1EB5,     /* R47 - EQ 6 */
+	0xF145,     /* R48 - EQ 7 */
+	0x0B75,     /* R49 - EQ 8 */
+	0x01C5,     /* R50 - EQ 9 */
+	0x169E,     /* R51 - EQ 10 */
+	0xF829,     /* R52 - EQ 11 */
+	0x07AD,     /* R53 - EQ 12 */
+	0x1103,     /* R54 - EQ 13 */
+	0x1C58,     /* R55 - EQ 14 */
+	0xF373,     /* R56 - EQ 15 */
+	0x0A54,     /* R57 - EQ 16 */
+	0x0558,     /* R58 - EQ 17 */
+	0x0564,     /* R59 - EQ 18 */
+	0x0559,     /* R60 - EQ 19 */
+	0x4000,     /* R61 - EQ 20 */
+};
+
+static struct {
+	int ratio;
+	int clk_sys_rate;
+} clk_sys_rates[] = {
+	{ 64,   0 },
+	{ 128,  1 },
+	{ 192,  2 },
+	{ 256,  3 },
+	{ 384,  4 },
+	{ 512,  5 },
+	{ 768,  6 },
+	{ 1024, 7 },
+	{ 1408, 8 },
+	{ 1536, 9 },
+};
+
+static struct {
+	int rate;
+	int sample_rate;
+} sample_rates[] = {
+	{ 8000,  0  },
+	{ 11025, 1  },
+	{ 12000, 2  },
+	{ 16000, 3  },
+	{ 22050, 4  },
+	{ 24000, 5  },
+	{ 32000, 6  },
+	{ 44100, 7  },
+	{ 48000, 8  },
+	{ 88200, 9  },
+	{ 96000, 10 },
+};
+
+static struct {
+	int div; /* *10 due to .5s */
+	int bclk_div;
+} bclk_divs[] = {
+	{ 10,  0  },
+	{ 15,  1  },
+	{ 20,  2  },
+	{ 30,  3  },
+	{ 40,  4  },
+	{ 50,  5  },
+	{ 55,  6  },
+	{ 60,  7  },
+	{ 80,  8  },
+	{ 100, 9  },
+	{ 110, 10 },
+	{ 120, 11 },
+	{ 160, 12 },
+	{ 200, 13 },
+	{ 220, 14 },
+	{ 240, 15 },
+	{ 250, 16 },
+	{ 300, 17 },
+	{ 320, 18 },
+	{ 440, 19 },
+	{ 480, 20 },
+};
+
+struct wm9081_priv {
+	struct snd_soc_codec codec;
+	u16 reg_cache[WM9081_MAX_REGISTER + 1];
+	int sysclk_source;
+	int mclk_rate;
+	int sysclk_rate;
+	int fs;
+	int bclk;
+	int master;
+	int fll_fref;
+	int fll_fout;
+	struct wm9081_retune_mobile_config *retune;
+};
+
+static int wm9081_reg_is_volatile(int reg)
+{
+	switch (reg) {
+	default:
+		return 0;
+	}
+}
+
+static unsigned int wm9081_read_reg_cache(struct snd_soc_codec *codec,
+					  unsigned int reg)
+{
+	u16 *cache = codec->reg_cache;
+	BUG_ON(reg > WM9081_MAX_REGISTER);
+	return cache[reg];
+}
+
+static unsigned int wm9081_read_hw(struct snd_soc_codec *codec, u8 reg)
+{
+	struct i2c_msg xfer[2];
+	u16 data;
+	int ret;
+	struct i2c_client *client = codec->control_data;
+
+	BUG_ON(reg > WM9081_MAX_REGISTER);
+
+	/* Write register */
+	xfer[0].addr = client->addr;
+	xfer[0].flags = 0;
+	xfer[0].len = 1;
+	xfer[0].buf = &reg;
+
+	/* Read data */
+	xfer[1].addr = client->addr;
+	xfer[1].flags = I2C_M_RD;
+	xfer[1].len = 2;
+	xfer[1].buf = (u8 *)&data;
+
+	ret = i2c_transfer(client->adapter, xfer, 2);
+	if (ret != 2) {
+		dev_err(&client->dev, "i2c_transfer() returned %d\n", ret);
+		return 0;
+	}
+
+	return (data >> 8) | ((data & 0xff) << 8);
+}
+
+static unsigned int wm9081_read(struct snd_soc_codec *codec, unsigned int reg)
+{
+	if (wm9081_reg_is_volatile(reg))
+		return wm9081_read_hw(codec, reg);
+	else
+		return wm9081_read_reg_cache(codec, reg);
+}
+
+static int wm9081_write(struct snd_soc_codec *codec, unsigned int reg,
+			unsigned int value)
+{
+	u16 *cache = codec->reg_cache;
+	u8 data[3];
+
+	BUG_ON(reg > WM9081_MAX_REGISTER);
+
+	if (!wm9081_reg_is_volatile(reg))
+		cache[reg] = value;
+
+	data[0] = reg;
+	data[1] = value >> 8;
+	data[2] = value & 0x00ff;
+
+	if (codec->hw_write(codec->control_data, data, 3) == 3)
+		return 0;
+	else
+		return -EIO;
+}
+
+static int wm9081_reset(struct snd_soc_codec *codec)
+{
+	return wm9081_write(codec, WM9081_SOFTWARE_RESET, 0);
+}
+
+static const DECLARE_TLV_DB_SCALE(drc_in_tlv, -4500, 75, 0);
+static const DECLARE_TLV_DB_SCALE(drc_out_tlv, -2250, 75, 0);
+static const DECLARE_TLV_DB_SCALE(drc_min_tlv, -1800, 600, 0);
+static unsigned int drc_max_tlv[] = {
+	TLV_DB_RANGE_HEAD(4),
+	0, 0, TLV_DB_SCALE_ITEM(1200, 0, 0),
+	1, 1, TLV_DB_SCALE_ITEM(1800, 0, 0),
+	2, 2, TLV_DB_SCALE_ITEM(2400, 0, 0),
+	3, 3, TLV_DB_SCALE_ITEM(3600, 0, 0),
+};
+static const DECLARE_TLV_DB_SCALE(drc_qr_tlv, 1200, 600, 0);
+static const DECLARE_TLV_DB_SCALE(drc_startup_tlv, -300, 50, 0);
+
+static const DECLARE_TLV_DB_SCALE(eq_tlv, -1200, 100, 0);
+
+static const DECLARE_TLV_DB_SCALE(in_tlv, -600, 600, 0);
+static const DECLARE_TLV_DB_SCALE(dac_tlv, -7200, 75, 1);
+static const DECLARE_TLV_DB_SCALE(out_tlv, -5700, 100, 0);
+
+static const char *drc_high_text[] = {
+	"1",
+	"1/2",
+	"1/4",
+	"1/8",
+	"1/16",
+	"0",
+};
+
+static const struct soc_enum drc_high =
+	SOC_ENUM_SINGLE(WM9081_DRC_3, 3, 6, drc_high_text);
+
+static const char *drc_low_text[] = {
+	"1",
+	"1/2",
+	"1/4",
+	"1/8",
+	"0",
+};
+
+static const struct soc_enum drc_low =
+	SOC_ENUM_SINGLE(WM9081_DRC_3, 0, 5, drc_low_text);
+
+static const char *drc_atk_text[] = {
+	"181us",
+	"181us",
+	"363us",
+	"726us",
+	"1.45ms",
+	"2.9ms",
+	"5.8ms",
+	"11.6ms",
+	"23.2ms",
+	"46.4ms",
+	"92.8ms",
+	"185.6ms",
+};
+
+static const struct soc_enum drc_atk =
+	SOC_ENUM_SINGLE(WM9081_DRC_2, 12, 12, drc_atk_text);
+
+static const char *drc_dcy_text[] = {
+	"186ms",
+	"372ms",
+	"743ms",
+	"1.49s",
+	"2.97s",
+	"5.94s",
+	"11.89s",
+	"23.78s",
+	"47.56s",
+};
+
+static const struct soc_enum drc_dcy =
+	SOC_ENUM_SINGLE(WM9081_DRC_2, 8, 9, drc_dcy_text);
+
+static const char *drc_qr_dcy_text[] = {
+	"0.725ms",
+	"1.45ms",
+	"5.8ms",
+};
+
+static const struct soc_enum drc_qr_dcy =
+	SOC_ENUM_SINGLE(WM9081_DRC_2, 4, 3, drc_qr_dcy_text);
+
+static const char *dac_deemph_text[] = {
+	"None",
+	"32kHz",
+	"44.1kHz",
+	"48kHz",
+};
+
+static const struct soc_enum dac_deemph =
+	SOC_ENUM_SINGLE(WM9081_DAC_DIGITAL_2, 1, 4, dac_deemph_text);
+
+static const char *speaker_mode_text[] = {
+	"Class D",
+	"Class AB",
+};
+
+static const struct soc_enum speaker_mode =
+	SOC_ENUM_SINGLE(WM9081_ANALOGUE_SPEAKER_2, 6, 2, speaker_mode_text);
+
+static int speaker_mode_get(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	unsigned int reg;
+
+	reg = wm9081_read(codec, WM9081_ANALOGUE_SPEAKER_2);
+	if (reg & WM9081_SPK_MODE)
+		ucontrol->value.integer.value[0] = 1;
+	else
+		ucontrol->value.integer.value[0] = 0;
+
+	return 0;
+}
+
+/*
+ * Stop any attempts to change speaker mode while the speaker is enabled.
+ *
+ * We also have some special anti-pop controls dependant on speaker
+ * mode which must be changed along with the mode.
+ */
+static int speaker_mode_put(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	unsigned int reg_pwr = wm9081_read(codec, WM9081_POWER_MANAGEMENT);
+	unsigned int reg2 = wm9081_read(codec, WM9081_ANALOGUE_SPEAKER_2);
+
+	/* Are we changing anything? */
+	if (ucontrol->value.integer.value[0] ==
+	    ((reg2 & WM9081_SPK_MODE) != 0))
+		return 0;
+
+	/* Don't try to change modes while enabled */
+	if (reg_pwr & WM9081_SPK_ENA)
+		return -EINVAL;
+
+	if (ucontrol->value.integer.value[0]) {
+		/* Class AB */
+		reg2 &= ~(WM9081_SPK_INV_MUTE | WM9081_OUT_SPK_CTRL);
+		reg2 |= WM9081_SPK_MODE;
+	} else {
+		/* Class D */
+		reg2 |= WM9081_SPK_INV_MUTE | WM9081_OUT_SPK_CTRL;
+		reg2 &= ~WM9081_SPK_MODE;
+	}
+
+	wm9081_write(codec, WM9081_ANALOGUE_SPEAKER_2, reg2);
+
+	return 0;
+}
+
+static const struct snd_kcontrol_new wm9081_snd_controls[] = {
+SOC_SINGLE_TLV("IN1 Volume", WM9081_ANALOGUE_MIXER, 1, 1, 1, in_tlv),
+SOC_SINGLE_TLV("IN2 Volume", WM9081_ANALOGUE_MIXER, 3, 1, 1, in_tlv),
+
+SOC_SINGLE_TLV("Playback Volume", WM9081_DAC_DIGITAL_1, 1, 96, 0, dac_tlv),
+
+SOC_SINGLE("LINEOUT Switch", WM9081_ANALOGUE_LINEOUT, 7, 1, 1),
+SOC_SINGLE("LINEOUT ZC Switch", WM9081_ANALOGUE_LINEOUT, 6, 1, 0),
+SOC_SINGLE_TLV("LINEOUT Volume", WM9081_ANALOGUE_LINEOUT, 0, 63, 0, out_tlv),
+
+SOC_SINGLE("DRC Switch", WM9081_DRC_1, 15, 1, 0),
+SOC_ENUM("DRC High Slope", drc_high),
+SOC_ENUM("DRC Low Slope", drc_low),
+SOC_SINGLE_TLV("DRC Input Volume", WM9081_DRC_4, 5, 60, 1, drc_in_tlv),
+SOC_SINGLE_TLV("DRC Output Volume", WM9081_DRC_4, 0, 30, 1, drc_out_tlv),
+SOC_SINGLE_TLV("DRC Minimum Volume", WM9081_DRC_2, 2, 3, 1, drc_min_tlv),
+SOC_SINGLE_TLV("DRC Maximum Volume", WM9081_DRC_2, 0, 3, 0, drc_max_tlv),
+SOC_ENUM("DRC Attack", drc_atk),
+SOC_ENUM("DRC Decay", drc_dcy),
+SOC_SINGLE("DRC Quick Release Switch", WM9081_DRC_1, 2, 1, 0),
+SOC_SINGLE_TLV("DRC Quick Release Volume", WM9081_DRC_2, 6, 3, 0, drc_qr_tlv),
+SOC_ENUM("DRC Quick Release Decay", drc_qr_dcy),
+SOC_SINGLE_TLV("DRC Startup Volume", WM9081_DRC_1, 6, 18, 0, drc_startup_tlv),
+
+SOC_SINGLE("EQ Switch", WM9081_EQ_1, 0, 1, 0),
+
+SOC_SINGLE("Speaker DC Volume", WM9081_ANALOGUE_SPEAKER_1, 3, 5, 0),
+SOC_SINGLE("Speaker AC Volume", WM9081_ANALOGUE_SPEAKER_1, 0, 5, 0),
+SOC_SINGLE("Speaker Switch", WM9081_ANALOGUE_SPEAKER_PGA, 7, 1, 1),
+SOC_SINGLE("Speaker ZC Switch", WM9081_ANALOGUE_SPEAKER_PGA, 6, 1, 0),
+SOC_SINGLE_TLV("Speaker Volume", WM9081_ANALOGUE_SPEAKER_PGA, 0, 63, 0,
+	       out_tlv),
+SOC_ENUM("DAC Deemphasis", dac_deemph),
+SOC_ENUM_EXT("Speaker Mode", speaker_mode, speaker_mode_get, speaker_mode_put),
+};
+
+static const struct snd_kcontrol_new wm9081_eq_controls[] = {
+SOC_SINGLE_TLV("EQ1 Volume", WM9081_EQ_1, 11, 24, 0, eq_tlv),
+SOC_SINGLE_TLV("EQ2 Volume", WM9081_EQ_1, 6, 24, 0, eq_tlv),
+SOC_SINGLE_TLV("EQ3 Volume", WM9081_EQ_1, 1, 24, 0, eq_tlv),
+SOC_SINGLE_TLV("EQ4 Volume", WM9081_EQ_2, 11, 24, 0, eq_tlv),
+SOC_SINGLE_TLV("EQ5 Volume", WM9081_EQ_2, 6, 24, 0, eq_tlv),
+};
+
+static const struct snd_kcontrol_new mixer[] = {
+SOC_DAPM_SINGLE("IN1 Switch", WM9081_ANALOGUE_MIXER, 0, 1, 0),
+SOC_DAPM_SINGLE("IN2 Switch", WM9081_ANALOGUE_MIXER, 2, 1, 0),
+SOC_DAPM_SINGLE("Playback Switch", WM9081_ANALOGUE_MIXER, 4, 1, 0),
+};
+
+static int speaker_event(struct snd_soc_dapm_widget *w,
+			 struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_codec *codec = w->codec;
+	unsigned int reg = wm9081_read(codec, WM9081_POWER_MANAGEMENT);
+
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		reg |= WM9081_SPK_ENA;
+		break;
+
+	case SND_SOC_DAPM_PRE_PMD:
+		reg &= ~WM9081_SPK_ENA;
+		break;
+	}
+
+	wm9081_write(codec, WM9081_POWER_MANAGEMENT, reg);
+
+	return 0;
+}
+
+struct _fll_div {
+	u16 fll_fratio;
+	u16 fll_outdiv;
+	u16 fll_clk_ref_div;
+	u16 n;
+	u16 k;
+};
+
+/* The size in bits of the FLL divide multiplied by 10
+ * to allow rounding later */
+#define FIXED_FLL_SIZE ((1 << 16) * 10)
+
+static struct {
+	unsigned int min;
+	unsigned int max;
+	u16 fll_fratio;
+	int ratio;
+} fll_fratios[] = {
+	{       0,    64000, 4, 16 },
+	{   64000,   128000, 3,  8 },
+	{  128000,   256000, 2,  4 },
+	{  256000,  1000000, 1,  2 },
+	{ 1000000, 13500000, 0,  1 },
+};
+
+static int fll_factors(struct _fll_div *fll_div, unsigned int Fref,
+		       unsigned int Fout)
+{
+	u64 Kpart;
+	unsigned int K, Ndiv, Nmod, target;
+	unsigned int div;
+	int i;
+
+	/* Fref must be <=13.5MHz */
+	div = 1;
+	while ((Fref / div) > 13500000) {
+		div *= 2;
+
+		if (div > 8) {
+			pr_err("Can't scale %dMHz input down to <=13.5MHz\n",
+			       Fref);
+			return -EINVAL;
+		}
+	}
+	fll_div->fll_clk_ref_div = div / 2;
+
+	pr_debug("Fref=%u Fout=%u\n", Fref, Fout);
+
+	/* Apply the division for our remaining calculations */
+	Fref /= div;
+
+	/* Fvco should be 90-100MHz; don't check the upper bound */
+	div = 0;
+	target = Fout * 2;
+	while (target < 90000000) {
+		div++;
+		target *= 2;
+		if (div > 7) {
+			pr_err("Unable to find FLL_OUTDIV for Fout=%uHz\n",
+			       Fout);
+			return -EINVAL;
+		}
+	}
+	fll_div->fll_outdiv = div;
+
+	pr_debug("Fvco=%dHz\n", target);
+
+	/* Find an appropraite FLL_FRATIO and factor it out of the target */
+	for (i = 0; i < ARRAY_SIZE(fll_fratios); i++) {
+		if (fll_fratios[i].min <= Fref && Fref <= fll_fratios[i].max) {
+			fll_div->fll_fratio = fll_fratios[i].fll_fratio;
+			target /= fll_fratios[i].ratio;
+			break;
+		}
+	}
+	if (i == ARRAY_SIZE(fll_fratios)) {
+		pr_err("Unable to find FLL_FRATIO for Fref=%uHz\n", Fref);
+		return -EINVAL;
+	}
+
+	/* Now, calculate N.K */
+	Ndiv = target / Fref;
+
+	fll_div->n = Ndiv;
+	Nmod = target % Fref;
+	pr_debug("Nmod=%d\n", Nmod);
+
+	/* Calculate fractional part - scale up so we can round. */
+	Kpart = FIXED_FLL_SIZE * (long long)Nmod;
+
+	do_div(Kpart, Fref);
+
+	K = Kpart & 0xFFFFFFFF;
+
+	if ((K % 10) >= 5)
+		K += 5;
+
+	/* Move down to proper range now rounding is done */
+	fll_div->k = K / 10;
+
+	pr_debug("N=%x K=%x FLL_FRATIO=%x FLL_OUTDIV=%x FLL_CLK_REF_DIV=%x\n",
+		 fll_div->n, fll_div->k,
+		 fll_div->fll_fratio, fll_div->fll_outdiv,
+		 fll_div->fll_clk_ref_div);
+
+	return 0;
+}
+
+static int wm9081_set_fll(struct snd_soc_codec *codec, int fll_id,
+			  unsigned int Fref, unsigned int Fout)
+{
+	struct wm9081_priv *wm9081 = codec->private_data;
+	u16 reg1, reg4, reg5;
+	struct _fll_div fll_div;
+	int ret;
+	int clk_sys_reg;
+
+	/* Any change? */
+	if (Fref == wm9081->fll_fref && Fout == wm9081->fll_fout)
+		return 0;
+
+	/* Disable the FLL */
+	if (Fout == 0) {
+		dev_dbg(codec->dev, "FLL disabled\n");
+		wm9081->fll_fref = 0;
+		wm9081->fll_fout = 0;
+
+		return 0;
+	}
+
+	ret = fll_factors(&fll_div, Fref, Fout);
+	if (ret != 0)
+		return ret;
+
+	reg5 = wm9081_read(codec, WM9081_FLL_CONTROL_5);
+	reg5 &= ~WM9081_FLL_CLK_SRC_MASK;
+
+	switch (fll_id) {
+	case WM9081_SYSCLK_FLL_MCLK:
+		reg5 |= 0x1;
+		break;
+
+	default:
+		dev_err(codec->dev, "Unknown FLL ID %d\n", fll_id);
+		return -EINVAL;
+	}
+
+	/* Disable CLK_SYS while we reconfigure */
+	clk_sys_reg = wm9081_read(codec, WM9081_CLOCK_CONTROL_3);
+	if (clk_sys_reg & WM9081_CLK_SYS_ENA)
+		wm9081_write(codec, WM9081_CLOCK_CONTROL_3,
+			     clk_sys_reg & ~WM9081_CLK_SYS_ENA);
+
+	/* Any FLL configuration change requires that the FLL be
+	 * disabled first. */
+	reg1 = wm9081_read(codec, WM9081_FLL_CONTROL_1);
+	reg1 &= ~WM9081_FLL_ENA;
+	wm9081_write(codec, WM9081_FLL_CONTROL_1, reg1);
+
+	/* Apply the configuration */
+	if (fll_div.k)
+		reg1 |= WM9081_FLL_FRAC_MASK;
+	else
+		reg1 &= ~WM9081_FLL_FRAC_MASK;
+	wm9081_write(codec, WM9081_FLL_CONTROL_1, reg1);
+
+	wm9081_write(codec, WM9081_FLL_CONTROL_2,
+		     (fll_div.fll_outdiv << WM9081_FLL_OUTDIV_SHIFT) |
+		     (fll_div.fll_fratio << WM9081_FLL_FRATIO_SHIFT));
+	wm9081_write(codec, WM9081_FLL_CONTROL_3, fll_div.k);
+
+	reg4 = wm9081_read(codec, WM9081_FLL_CONTROL_4);
+	reg4 &= ~WM9081_FLL_N_MASK;
+	reg4 |= fll_div.n << WM9081_FLL_N_SHIFT;
+	wm9081_write(codec, WM9081_FLL_CONTROL_4, reg4);
+
+	reg5 &= ~WM9081_FLL_CLK_REF_DIV_MASK;
+	reg5 |= fll_div.fll_clk_ref_div << WM9081_FLL_CLK_REF_DIV_SHIFT;
+	wm9081_write(codec, WM9081_FLL_CONTROL_5, reg5);
+
+	/* Enable the FLL */
+	wm9081_write(codec, WM9081_FLL_CONTROL_1, reg1 | WM9081_FLL_ENA);
+
+	/* Then bring CLK_SYS up again if it was disabled */
+	if (clk_sys_reg & WM9081_CLK_SYS_ENA)
+		wm9081_write(codec, WM9081_CLOCK_CONTROL_3, clk_sys_reg);
+
+	dev_dbg(codec->dev, "FLL enabled at %dHz->%dHz\n", Fref, Fout);
+
+	wm9081->fll_fref = Fref;
+	wm9081->fll_fout = Fout;
+
+	return 0;
+}
+
+static int configure_clock(struct snd_soc_codec *codec)
+{
+	struct wm9081_priv *wm9081 = codec->private_data;
+	int new_sysclk, i, target;
+	unsigned int reg;
+	int ret = 0;
+	int mclkdiv = 0;
+	int fll = 0;
+
+	switch (wm9081->sysclk_source) {
+	case WM9081_SYSCLK_MCLK:
+		if (wm9081->mclk_rate > 12225000) {
+			mclkdiv = 1;
+			wm9081->sysclk_rate = wm9081->mclk_rate / 2;
+		} else {
+			wm9081->sysclk_rate = wm9081->mclk_rate;
+		}
+		wm9081_set_fll(codec, WM9081_SYSCLK_FLL_MCLK, 0, 0);
+		break;
+
+	case WM9081_SYSCLK_FLL_MCLK:
+		/* If we have a sample rate calculate a CLK_SYS that
+		 * gives us a suitable DAC configuration, plus BCLK.
+		 * Ideally we would check to see if we can clock
+		 * directly from MCLK and only use the FLL if this is
+		 * not the case, though care must be taken with free
+		 * running mode.
+		 */
+		if (wm9081->master && wm9081->bclk) {
+			/* Make sure we can generate CLK_SYS and BCLK
+			 * and that we've got 3MHz for optimal
+			 * performance. */
+			for (i = 0; i < ARRAY_SIZE(clk_sys_rates); i++) {
+				target = wm9081->fs * clk_sys_rates[i].ratio;
+				new_sysclk = target;
+				if (target >= wm9081->bclk &&
+				    target > 3000000)
+					break;
+			}
+		} else if (wm9081->fs) {
+			for (i = 0; i < ARRAY_SIZE(clk_sys_rates); i++) {
+				new_sysclk = clk_sys_rates[i].ratio
+					* wm9081->fs;
+				if (new_sysclk > 3000000)
+					break;
+			}
+		} else {
+			new_sysclk = 12288000;
+		}
+
+		ret = wm9081_set_fll(codec, WM9081_SYSCLK_FLL_MCLK,
+				     wm9081->mclk_rate, new_sysclk);
+		if (ret == 0) {
+			wm9081->sysclk_rate = new_sysclk;
+
+			/* Switch SYSCLK over to FLL */
+			fll = 1;
+		} else {
+			wm9081->sysclk_rate = wm9081->mclk_rate;
+		}
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	reg = wm9081_read(codec, WM9081_CLOCK_CONTROL_1);
+	if (mclkdiv)
+		reg |= WM9081_MCLKDIV2;
+	else
+		reg &= ~WM9081_MCLKDIV2;
+	wm9081_write(codec, WM9081_CLOCK_CONTROL_1, reg);
+
+	reg = wm9081_read(codec, WM9081_CLOCK_CONTROL_3);
+	if (fll)
+		reg |= WM9081_CLK_SRC_SEL;
+	else
+		reg &= ~WM9081_CLK_SRC_SEL;
+	wm9081_write(codec, WM9081_CLOCK_CONTROL_3, reg);
+
+	dev_dbg(codec->dev, "CLK_SYS is %dHz\n", wm9081->sysclk_rate);
+
+	return ret;
+}
+
+static int clk_sys_event(struct snd_soc_dapm_widget *w,
+			 struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_codec *codec = w->codec;
+	struct wm9081_priv *wm9081 = codec->private_data;
+
+	/* This should be done on init() for bypass paths */
+	switch (wm9081->sysclk_source) {
+	case WM9081_SYSCLK_MCLK:
+		dev_dbg(codec->dev, "Using %dHz MCLK\n", wm9081->mclk_rate);
+		break;
+	case WM9081_SYSCLK_FLL_MCLK:
+		dev_dbg(codec->dev, "Using %dHz MCLK with FLL\n",
+			wm9081->mclk_rate);
+		break;
+	default:
+		dev_err(codec->dev, "System clock not configured\n");
+		return -EINVAL;
+	}
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		configure_clock(codec);
+		break;
+
+	case SND_SOC_DAPM_POST_PMD:
+		/* Disable the FLL if it's running */
+		wm9081_set_fll(codec, 0, 0, 0);
+		break;
+	}
+
+	return 0;
+}
+
+static const struct snd_soc_dapm_widget wm9081_dapm_widgets[] = {
+SND_SOC_DAPM_INPUT("IN1"),
+SND_SOC_DAPM_INPUT("IN2"),
+
+SND_SOC_DAPM_DAC("DAC", "HiFi Playback", WM9081_POWER_MANAGEMENT, 0, 0),
+
+SND_SOC_DAPM_MIXER_NAMED_CTL("Mixer", SND_SOC_NOPM, 0, 0,
+			     mixer, ARRAY_SIZE(mixer)),
+
+SND_SOC_DAPM_PGA("LINEOUT PGA", WM9081_POWER_MANAGEMENT, 4, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA_E("Speaker PGA", WM9081_POWER_MANAGEMENT, 2, 0, NULL, 0,
+		   speaker_event,
+		   SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+
+SND_SOC_DAPM_OUTPUT("LINEOUT"),
+SND_SOC_DAPM_OUTPUT("SPKN"),
+SND_SOC_DAPM_OUTPUT("SPKP"),
+
+SND_SOC_DAPM_SUPPLY("CLK_SYS", WM9081_CLOCK_CONTROL_3, 0, 0, clk_sys_event,
+		    SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
+SND_SOC_DAPM_SUPPLY("CLK_DSP", WM9081_CLOCK_CONTROL_3, 1, 0, NULL, 0),
+SND_SOC_DAPM_SUPPLY("TOCLK", WM9081_CLOCK_CONTROL_3, 2, 0, NULL, 0),
+};
+
+
+static const struct snd_soc_dapm_route audio_paths[] = {
+	{ "DAC", NULL, "CLK_SYS" },
+	{ "DAC", NULL, "CLK_DSP" },
+
+	{ "Mixer", "IN1 Switch", "IN1" },
+	{ "Mixer", "IN2 Switch", "IN2" },
+	{ "Mixer", "Playback Switch", "DAC" },
+
+	{ "LINEOUT PGA", NULL, "Mixer" },
+	{ "LINEOUT PGA", NULL, "TOCLK" },
+	{ "LINEOUT PGA", NULL, "CLK_SYS" },
+
+	{ "LINEOUT", NULL, "LINEOUT PGA" },
+
+	{ "Speaker PGA", NULL, "Mixer" },
+	{ "Speaker PGA", NULL, "TOCLK" },
+	{ "Speaker PGA", NULL, "CLK_SYS" },
+
+	{ "SPKN", NULL, "Speaker PGA" },
+	{ "SPKP", NULL, "Speaker PGA" },
+};
+
+static int wm9081_set_bias_level(struct snd_soc_codec *codec,
+				 enum snd_soc_bias_level level)
+{
+	u16 reg;
+
+	switch (level) {
+	case SND_SOC_BIAS_ON:
+		break;
+
+	case SND_SOC_BIAS_PREPARE:
+		/* VMID=2*40k */
+		reg = wm9081_read(codec, WM9081_VMID_CONTROL);
+		reg &= ~WM9081_VMID_SEL_MASK;
+		reg |= 0x2;
+		wm9081_write(codec, WM9081_VMID_CONTROL, reg);
+
+		/* Normal bias current */
+		reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+		reg &= ~WM9081_STBY_BIAS_ENA;
+		wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg);
+		break;
+
+	case SND_SOC_BIAS_STANDBY:
+		/* Initial cold start */
+		if (codec->bias_level == SND_SOC_BIAS_OFF) {
+			/* Disable LINEOUT discharge */
+			reg = wm9081_read(codec, WM9081_ANTI_POP_CONTROL);
+			reg &= ~WM9081_LINEOUT_DISCH;
+			wm9081_write(codec, WM9081_ANTI_POP_CONTROL, reg);
+
+			/* Select startup bias source */
+			reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+			reg |= WM9081_BIAS_SRC | WM9081_BIAS_ENA;
+			wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg);
+
+			/* VMID 2*4k; Soft VMID ramp enable */
+			reg = wm9081_read(codec, WM9081_VMID_CONTROL);
+			reg |= WM9081_VMID_RAMP | 0x6;
+			wm9081_write(codec, WM9081_VMID_CONTROL, reg);
+
+			mdelay(100);
+
+			/* Normal bias enable & soft start off */
+			reg |= WM9081_BIAS_ENA;
+			reg &= ~WM9081_VMID_RAMP;
+			wm9081_write(codec, WM9081_VMID_CONTROL, reg);
+
+			/* Standard bias source */
+			reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+			reg &= ~WM9081_BIAS_SRC;
+			wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg);
+		}
+
+		/* VMID 2*240k */
+		reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+		reg &= ~WM9081_VMID_SEL_MASK;
+		reg |= 0x40;
+		wm9081_write(codec, WM9081_VMID_CONTROL, reg);
+
+		/* Standby bias current on */
+		reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+		reg |= WM9081_STBY_BIAS_ENA;
+		wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg);
+		break;
+
+	case SND_SOC_BIAS_OFF:
+		/* Startup bias source */
+		reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+		reg |= WM9081_BIAS_SRC;
+		wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg);
+
+		/* Disable VMID and biases with soft ramping */
+		reg = wm9081_read(codec, WM9081_VMID_CONTROL);
+		reg &= ~(WM9081_VMID_SEL_MASK | WM9081_BIAS_ENA);
+		reg |= WM9081_VMID_RAMP;
+		wm9081_write(codec, WM9081_VMID_CONTROL, reg);
+
+		/* Actively discharge LINEOUT */
+		reg = wm9081_read(codec, WM9081_ANTI_POP_CONTROL);
+		reg |= WM9081_LINEOUT_DISCH;
+		wm9081_write(codec, WM9081_ANTI_POP_CONTROL, reg);
+		break;
+	}
+
+	codec->bias_level = level;
+
+	return 0;
+}
+
+static int wm9081_set_dai_fmt(struct snd_soc_dai *dai,
+			      unsigned int fmt)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	struct wm9081_priv *wm9081 = codec->private_data;
+	unsigned int aif2 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_2);
+
+	aif2 &= ~(WM9081_AIF_BCLK_INV | WM9081_AIF_LRCLK_INV |
+		  WM9081_BCLK_DIR | WM9081_LRCLK_DIR | WM9081_AIF_FMT_MASK);
+
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBS_CFS:
+		wm9081->master = 0;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFM:
+		aif2 |= WM9081_LRCLK_DIR;
+		wm9081->master = 1;
+		break;
+	case SND_SOC_DAIFMT_CBM_CFS:
+		aif2 |= WM9081_BCLK_DIR;
+		wm9081->master = 1;
+		break;
+	case SND_SOC_DAIFMT_CBM_CFM:
+		aif2 |= WM9081_LRCLK_DIR | WM9081_BCLK_DIR;
+		wm9081->master = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_DSP_B:
+		aif2 |= WM9081_AIF_LRCLK_INV;
+	case SND_SOC_DAIFMT_DSP_A:
+		aif2 |= 0x3;
+		break;
+	case SND_SOC_DAIFMT_I2S:
+		aif2 |= 0x2;
+		break;
+	case SND_SOC_DAIFMT_RIGHT_J:
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		aif2 |= 0x1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_DSP_A:
+	case SND_SOC_DAIFMT_DSP_B:
+		/* frame inversion not valid for DSP modes */
+		switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+		case SND_SOC_DAIFMT_NB_NF:
+			break;
+		case SND_SOC_DAIFMT_IB_NF:
+			aif2 |= WM9081_AIF_BCLK_INV;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+
+	case SND_SOC_DAIFMT_I2S:
+	case SND_SOC_DAIFMT_RIGHT_J:
+	case SND_SOC_DAIFMT_LEFT_J:
+		switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+		case SND_SOC_DAIFMT_NB_NF:
+			break;
+		case SND_SOC_DAIFMT_IB_IF:
+			aif2 |= WM9081_AIF_BCLK_INV | WM9081_AIF_LRCLK_INV;
+			break;
+		case SND_SOC_DAIFMT_IB_NF:
+			aif2 |= WM9081_AIF_BCLK_INV;
+			break;
+		case SND_SOC_DAIFMT_NB_IF:
+			aif2 |= WM9081_AIF_LRCLK_INV;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	wm9081_write(codec, WM9081_AUDIO_INTERFACE_2, aif2);
+
+	return 0;
+}
+
+static int wm9081_hw_params(struct snd_pcm_substream *substream,
+			    struct snd_pcm_hw_params *params,
+			    struct snd_soc_dai *dai)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	struct wm9081_priv *wm9081 = codec->private_data;
+	int ret, i, best, best_val, cur_val;
+	unsigned int clk_ctrl2, aif1, aif2, aif3, aif4;
+
+	clk_ctrl2 = wm9081_read(codec, WM9081_CLOCK_CONTROL_2);
+	clk_ctrl2 &= ~(WM9081_CLK_SYS_RATE_MASK | WM9081_SAMPLE_RATE_MASK);
+
+	aif1 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_1);
+
+	aif2 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_2);
+	aif2 &= ~WM9081_AIF_WL_MASK;
+
+	aif3 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_3);
+	aif3 &= ~WM9081_BCLK_DIV_MASK;
+
+	aif4 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_4);
+	aif4 &= ~WM9081_LRCLK_RATE_MASK;
+
+	/* What BCLK do we need? */
+	wm9081->fs = params_rate(params);
+	wm9081->bclk = 2 * wm9081->fs;
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		wm9081->bclk *= 16;
+		break;
+	case SNDRV_PCM_FORMAT_S20_3LE:
+		wm9081->bclk *= 20;
+		aif2 |= 0x4;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		wm9081->bclk *= 24;
+		aif2 |= 0x8;
+		break;
+	case SNDRV_PCM_FORMAT_S32_LE:
+		wm9081->bclk *= 32;
+		aif2 |= 0xc;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (aif1 & WM9081_AIFDAC_TDM_MODE_MASK) {
+		int slots = ((aif1 & WM9081_AIFDAC_TDM_MODE_MASK) >>
+			     WM9081_AIFDAC_TDM_MODE_SHIFT) + 1;
+		wm9081->bclk *= slots;
+	}
+
+	dev_dbg(codec->dev, "Target BCLK is %dHz\n", wm9081->bclk);
+
+	ret = configure_clock(codec);
+	if (ret != 0)
+		return ret;
+
+	/* Select nearest CLK_SYS_RATE */
+	best = 0;
+	best_val = abs((wm9081->sysclk_rate / clk_sys_rates[0].ratio)
+		       - wm9081->fs);
+	for (i = 1; i < ARRAY_SIZE(clk_sys_rates); i++) {
+		cur_val = abs((wm9081->sysclk_rate /
+			       clk_sys_rates[i].ratio) - wm9081->fs);;
+		if (cur_val < best_val) {
+			best = i;
+			best_val = cur_val;
+		}
+	}
+	dev_dbg(codec->dev, "Selected CLK_SYS_RATIO of %d\n",
+		clk_sys_rates[best].ratio);
+	clk_ctrl2 |= (clk_sys_rates[best].clk_sys_rate
+		      << WM9081_CLK_SYS_RATE_SHIFT);
+
+	/* SAMPLE_RATE */
+	best = 0;
+	best_val = abs(wm9081->fs - sample_rates[0].rate);
+	for (i = 1; i < ARRAY_SIZE(sample_rates); i++) {
+		/* Closest match */
+		cur_val = abs(wm9081->fs - sample_rates[i].rate);
+		if (cur_val < best_val) {
+			best = i;
+			best_val = cur_val;
+		}
+	}
+	dev_dbg(codec->dev, "Selected SAMPLE_RATE of %dHz\n",
+		sample_rates[best].rate);
+	clk_ctrl2 |= (sample_rates[best].sample_rate
+			<< WM9081_SAMPLE_RATE_SHIFT);
+
+	/* BCLK_DIV */
+	best = 0;
+	best_val = INT_MAX;
+	for (i = 0; i < ARRAY_SIZE(bclk_divs); i++) {
+		cur_val = ((wm9081->sysclk_rate * 10) / bclk_divs[i].div)
+			- wm9081->bclk;
+		if (cur_val < 0) /* Table is sorted */
+			break;
+		if (cur_val < best_val) {
+			best = i;
+			best_val = cur_val;
+		}
+	}
+	wm9081->bclk = (wm9081->sysclk_rate * 10) / bclk_divs[best].div;
+	dev_dbg(codec->dev, "Selected BCLK_DIV of %d for %dHz BCLK\n",
+		bclk_divs[best].div, wm9081->bclk);
+	aif3 |= bclk_divs[best].bclk_div;
+
+	/* LRCLK is a simple fraction of BCLK */
+	dev_dbg(codec->dev, "LRCLK_RATE is %d\n", wm9081->bclk / wm9081->fs);
+	aif4 |= wm9081->bclk / wm9081->fs;
+
+	/* Apply a ReTune Mobile configuration if it's in use */
+	if (wm9081->retune) {
+		struct wm9081_retune_mobile_config *retune = wm9081->retune;
+		struct wm9081_retune_mobile_setting *s;
+		int eq1;
+
+		best = 0;
+		best_val = abs(retune->configs[0].rate - wm9081->fs);
+		for (i = 0; i < retune->num_configs; i++) {
+			cur_val = abs(retune->configs[i].rate - wm9081->fs);
+			if (cur_val < best_val) {
+				best_val = cur_val;
+				best = i;
+			}
+		}
+		s = &retune->configs[best];
+
+		dev_dbg(codec->dev, "ReTune Mobile %s tuned for %dHz\n",
+			s->name, s->rate);
+
+		/* If the EQ is enabled then disable it while we write out */
+		eq1 = wm9081_read(codec, WM9081_EQ_1) & WM9081_EQ_ENA;
+		if (eq1 & WM9081_EQ_ENA)
+			wm9081_write(codec, WM9081_EQ_1, 0);
+
+		/* Write out the other values */
+		for (i = 1; i < ARRAY_SIZE(s->config); i++)
+			wm9081_write(codec, WM9081_EQ_1 + i, s->config[i]);
+
+		eq1 |= (s->config[0] & ~WM9081_EQ_ENA);
+		wm9081_write(codec, WM9081_EQ_1, eq1);
+	}
+
+	wm9081_write(codec, WM9081_CLOCK_CONTROL_2, clk_ctrl2);
+	wm9081_write(codec, WM9081_AUDIO_INTERFACE_2, aif2);
+	wm9081_write(codec, WM9081_AUDIO_INTERFACE_3, aif3);
+	wm9081_write(codec, WM9081_AUDIO_INTERFACE_4, aif4);
+
+	return 0;
+}
+
+static int wm9081_digital_mute(struct snd_soc_dai *codec_dai, int mute)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	unsigned int reg;
+
+	reg = wm9081_read(codec, WM9081_DAC_DIGITAL_2);
+
+	if (mute)
+		reg |= WM9081_DAC_MUTE;
+	else
+		reg &= ~WM9081_DAC_MUTE;
+
+	wm9081_write(codec, WM9081_DAC_DIGITAL_2, reg);
+
+	return 0;
+}
+
+static int wm9081_set_sysclk(struct snd_soc_dai *codec_dai,
+			     int clk_id, unsigned int freq, int dir)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	struct wm9081_priv *wm9081 = codec->private_data;
+
+	switch (clk_id) {
+	case WM9081_SYSCLK_MCLK:
+	case WM9081_SYSCLK_FLL_MCLK:
+		wm9081->sysclk_source = clk_id;
+		wm9081->mclk_rate = freq;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int wm9081_set_tdm_slot(struct snd_soc_dai *dai,
+			       unsigned int mask, int slots)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	unsigned int aif1 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_1);
+
+	aif1 &= ~(WM9081_AIFDAC_TDM_SLOT_MASK | WM9081_AIFDAC_TDM_MODE_MASK);
+
+	if (slots < 1 || slots > 4)
+		return -EINVAL;
+
+	aif1 |= (slots - 1) << WM9081_AIFDAC_TDM_MODE_SHIFT;
+
+	switch (mask) {
+	case 1:
+		break;
+	case 2:
+		aif1 |= 0x10;
+		break;
+	case 4:
+		aif1 |= 0x20;
+		break;
+	case 8:
+		aif1 |= 0x30;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	wm9081_write(codec, WM9081_AUDIO_INTERFACE_1, aif1);
+
+	return 0;
+}
+
+#define WM9081_RATES SNDRV_PCM_RATE_8000_96000
+
+#define WM9081_FORMATS \
+	(SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE | \
+	 SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE)
+
+static struct snd_soc_dai_ops wm9081_dai_ops = {
+	.hw_params = wm9081_hw_params,
+	.set_sysclk = wm9081_set_sysclk,
+	.set_fmt = wm9081_set_dai_fmt,
+	.digital_mute = wm9081_digital_mute,
+	.set_tdm_slot = wm9081_set_tdm_slot,
+};
+
+/* We report two channels because the CODEC processes a stereo signal, even
+ * though it is only capable of handling a mono output.
+ */
+struct snd_soc_dai wm9081_dai = {
+	.name = "WM9081",
+	.playback = {
+		.stream_name = "HiFi Playback",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = WM9081_RATES,
+		.formats = WM9081_FORMATS,
+	},
+	.ops = &wm9081_dai_ops,
+};
+EXPORT_SYMBOL_GPL(wm9081_dai);
+
+
+static struct snd_soc_codec *wm9081_codec;
+
+static int wm9081_probe(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec;
+	struct wm9081_priv *wm9081;
+	int ret = 0;
+
+	if (wm9081_codec == NULL) {
+		dev_err(&pdev->dev, "Codec device not registered\n");
+		return -ENODEV;
+	}
+
+	socdev->card->codec = wm9081_codec;
+	codec = wm9081_codec;
+	wm9081 = codec->private_data;
+
+	/* register pcms */
+	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to create pcms: %d\n", ret);
+		goto pcm_err;
+	}
+
+	snd_soc_add_controls(codec, wm9081_snd_controls,
+			     ARRAY_SIZE(wm9081_snd_controls));
+	if (!wm9081->retune) {
+		dev_dbg(codec->dev,
+			"No ReTune Mobile data, using normal EQ\n");
+		snd_soc_add_controls(codec, wm9081_eq_controls,
+				     ARRAY_SIZE(wm9081_eq_controls));
+	}
+
+	snd_soc_dapm_new_controls(codec, wm9081_dapm_widgets,
+				  ARRAY_SIZE(wm9081_dapm_widgets));
+	snd_soc_dapm_add_routes(codec, audio_paths, ARRAY_SIZE(audio_paths));
+	snd_soc_dapm_new_widgets(codec);
+
+	ret = snd_soc_init_card(socdev);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to register card: %d\n", ret);
+		goto card_err;
+	}
+
+	return ret;
+
+card_err:
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+pcm_err:
+	return ret;
+}
+
+static int wm9081_remove(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int wm9081_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+
+	wm9081_set_bias_level(codec, SND_SOC_BIAS_OFF);
+
+	return 0;
+}
+
+static int wm9081_resume(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+	u16 *reg_cache = codec->reg_cache;
+	int i;
+
+	for (i = 0; i < codec->reg_cache_size; i++) {
+		if (i == WM9081_SOFTWARE_RESET)
+			continue;
+
+		wm9081_write(codec, i, reg_cache[i]);
+	}
+
+	wm9081_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	return 0;
+}
+#else
+#define wm9081_suspend NULL
+#define wm9081_resume NULL
+#endif
+
+struct snd_soc_codec_device soc_codec_dev_wm9081 = {
+	.probe = 	wm9081_probe,
+	.remove = 	wm9081_remove,
+	.suspend =	wm9081_suspend,
+	.resume =	wm9081_resume,
+};
+EXPORT_SYMBOL_GPL(soc_codec_dev_wm9081);
+
+static int wm9081_register(struct wm9081_priv *wm9081)
+{
+	struct snd_soc_codec *codec = &wm9081->codec;
+	int ret;
+	u16 reg;
+
+	if (wm9081_codec) {
+		dev_err(codec->dev, "Another WM9081 is registered\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	mutex_init(&codec->mutex);
+	INIT_LIST_HEAD(&codec->dapm_widgets);
+	INIT_LIST_HEAD(&codec->dapm_paths);
+
+	codec->private_data = wm9081;
+	codec->name = "WM9081";
+	codec->owner = THIS_MODULE;
+	codec->read = wm9081_read;
+	codec->write = wm9081_write;
+	codec->dai = &wm9081_dai;
+	codec->num_dai = 1;
+	codec->reg_cache_size = ARRAY_SIZE(wm9081->reg_cache);
+	codec->reg_cache = &wm9081->reg_cache;
+	codec->bias_level = SND_SOC_BIAS_OFF;
+	codec->set_bias_level = wm9081_set_bias_level;
+
+	memcpy(codec->reg_cache, wm9081_reg_defaults,
+	       sizeof(wm9081_reg_defaults));
+
+	reg = wm9081_read_hw(codec, WM9081_SOFTWARE_RESET);
+	if (reg != 0x9081) {
+		dev_err(codec->dev, "Device is not a WM9081: ID=0x%x\n", reg);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = wm9081_reset(codec);
+	if (ret < 0) {
+		dev_err(codec->dev, "Failed to issue reset\n");
+		return ret;
+	}
+
+	wm9081_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	/* Enable zero cross by default */
+	reg = wm9081_read(codec, WM9081_ANALOGUE_LINEOUT);
+	wm9081_write(codec, WM9081_ANALOGUE_LINEOUT, reg | WM9081_LINEOUTZC);
+	reg = wm9081_read(codec, WM9081_ANALOGUE_SPEAKER_PGA);
+	wm9081_write(codec, WM9081_ANALOGUE_SPEAKER_PGA,
+		     reg | WM9081_SPKPGAZC);
+
+	wm9081_dai.dev = codec->dev;
+
+	wm9081_codec = codec;
+
+	ret = snd_soc_register_codec(codec);
+	if (ret != 0) {
+		dev_err(codec->dev, "Failed to register codec: %d\n", ret);
+		return ret;
+	}
+
+	ret = snd_soc_register_dai(&wm9081_dai);
+	if (ret != 0) {
+		dev_err(codec->dev, "Failed to register DAI: %d\n", ret);
+		snd_soc_unregister_codec(codec);
+		return ret;
+	}
+
+	return 0;
+
+err:
+	kfree(wm9081);
+	return ret;
+}
+
+static void wm9081_unregister(struct wm9081_priv *wm9081)
+{
+	wm9081_set_bias_level(&wm9081->codec, SND_SOC_BIAS_OFF);
+	snd_soc_unregister_dai(&wm9081_dai);
+	snd_soc_unregister_codec(&wm9081->codec);
+	kfree(wm9081);
+	wm9081_codec = NULL;
+}
+
+static __devinit int wm9081_i2c_probe(struct i2c_client *i2c,
+				      const struct i2c_device_id *id)
+{
+	struct wm9081_priv *wm9081;
+	struct snd_soc_codec *codec;
+
+	wm9081 = kzalloc(sizeof(struct wm9081_priv), GFP_KERNEL);
+	if (wm9081 == NULL)
+		return -ENOMEM;
+
+	codec = &wm9081->codec;
+	codec->hw_write = (hw_write_t)i2c_master_send;
+	wm9081->retune = i2c->dev.platform_data;
+
+	i2c_set_clientdata(i2c, wm9081);
+	codec->control_data = i2c;
+
+	codec->dev = &i2c->dev;
+
+	return wm9081_register(wm9081);
+}
+
+static __devexit int wm9081_i2c_remove(struct i2c_client *client)
+{
+	struct wm9081_priv *wm9081 = i2c_get_clientdata(client);
+	wm9081_unregister(wm9081);
+	return 0;
+}
+
+static const struct i2c_device_id wm9081_i2c_id[] = {
+	{ "wm9081", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, wm9081_i2c_id);
+
+static struct i2c_driver wm9081_i2c_driver = {
+	.driver = {
+		.name = "wm9081",
+		.owner = THIS_MODULE,
+	},
+	.probe =    wm9081_i2c_probe,
+	.remove =   __devexit_p(wm9081_i2c_remove),
+	.id_table = wm9081_i2c_id,
+};
+
+static int __init wm9081_modinit(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&wm9081_i2c_driver);
+	if (ret != 0) {
+		printk(KERN_ERR "Failed to register WM9081 I2C driver: %d\n",
+		       ret);
+	}
+
+	return ret;
+}
+module_init(wm9081_modinit);
+
+static void __exit wm9081_exit(void)
+{
+	i2c_del_driver(&wm9081_i2c_driver);
+}
+module_exit(wm9081_exit);
+
+
+MODULE_DESCRIPTION("ASoC WM9081 driver");
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/wm9081.h b/sound/soc/codecs/wm9081.h
new file mode 100644
index 0000000..42d3bc7
--- /dev/null
+++ b/sound/soc/codecs/wm9081.h
@@ -0,0 +1,787 @@
+#ifndef WM9081_H
+#define WM9081_H
+
+/*
+ * wm9081.c  --  WM9081 ALSA SoC Audio driver
+ *
+ * Author: Mark Brown
+ *
+ * Copyright 2009 Wolfson Microelectronics plc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <sound/soc.h>
+
+extern struct snd_soc_dai wm9081_dai;
+extern struct snd_soc_codec_device soc_codec_dev_wm9081;
+
+/*
+ * SYSCLK sources
+ */
+#define WM9081_SYSCLK_MCLK      1   /* Use MCLK without FLL */
+#define WM9081_SYSCLK_FLL_MCLK  2   /* Use MCLK, enabling FLL if required */
+
+/*
+ * Register values.
+ */
+#define WM9081_SOFTWARE_RESET                   0x00
+#define WM9081_ANALOGUE_LINEOUT                 0x02
+#define WM9081_ANALOGUE_SPEAKER_PGA             0x03
+#define WM9081_VMID_CONTROL                     0x04
+#define WM9081_BIAS_CONTROL_1                   0x05
+#define WM9081_ANALOGUE_MIXER                   0x07
+#define WM9081_ANTI_POP_CONTROL                 0x08
+#define WM9081_ANALOGUE_SPEAKER_1               0x09
+#define WM9081_ANALOGUE_SPEAKER_2               0x0A
+#define WM9081_POWER_MANAGEMENT                 0x0B
+#define WM9081_CLOCK_CONTROL_1                  0x0C
+#define WM9081_CLOCK_CONTROL_2                  0x0D
+#define WM9081_CLOCK_CONTROL_3                  0x0E
+#define WM9081_FLL_CONTROL_1                    0x10
+#define WM9081_FLL_CONTROL_2                    0x11
+#define WM9081_FLL_CONTROL_3                    0x12
+#define WM9081_FLL_CONTROL_4                    0x13
+#define WM9081_FLL_CONTROL_5                    0x14
+#define WM9081_AUDIO_INTERFACE_1                0x16
+#define WM9081_AUDIO_INTERFACE_2                0x17
+#define WM9081_AUDIO_INTERFACE_3                0x18
+#define WM9081_AUDIO_INTERFACE_4                0x19
+#define WM9081_INTERRUPT_STATUS                 0x1A
+#define WM9081_INTERRUPT_STATUS_MASK            0x1B
+#define WM9081_INTERRUPT_POLARITY               0x1C
+#define WM9081_INTERRUPT_CONTROL                0x1D
+#define WM9081_DAC_DIGITAL_1                    0x1E
+#define WM9081_DAC_DIGITAL_2                    0x1F
+#define WM9081_DRC_1                            0x20
+#define WM9081_DRC_2                            0x21
+#define WM9081_DRC_3                            0x22
+#define WM9081_DRC_4                            0x23
+#define WM9081_WRITE_SEQUENCER_1                0x26
+#define WM9081_WRITE_SEQUENCER_2                0x27
+#define WM9081_MW_SLAVE_1                       0x28
+#define WM9081_EQ_1                             0x2A
+#define WM9081_EQ_2                             0x2B
+#define WM9081_EQ_3                             0x2C
+#define WM9081_EQ_4                             0x2D
+#define WM9081_EQ_5                             0x2E
+#define WM9081_EQ_6                             0x2F
+#define WM9081_EQ_7                             0x30
+#define WM9081_EQ_8                             0x31
+#define WM9081_EQ_9                             0x32
+#define WM9081_EQ_10                            0x33
+#define WM9081_EQ_11                            0x34
+#define WM9081_EQ_12                            0x35
+#define WM9081_EQ_13                            0x36
+#define WM9081_EQ_14                            0x37
+#define WM9081_EQ_15                            0x38
+#define WM9081_EQ_16                            0x39
+#define WM9081_EQ_17                            0x3A
+#define WM9081_EQ_18                            0x3B
+#define WM9081_EQ_19                            0x3C
+#define WM9081_EQ_20                            0x3D
+
+#define WM9081_REGISTER_COUNT                   55
+#define WM9081_MAX_REGISTER                     0x3D
+
+/*
+ * Field Definitions.
+ */
+
+/*
+ * R0 (0x00) - Software Reset
+ */
+#define WM9081_SW_RST_DEV_ID1_MASK              0xFFFF  /* SW_RST_DEV_ID1 - [15:0] */
+#define WM9081_SW_RST_DEV_ID1_SHIFT                  0  /* SW_RST_DEV_ID1 - [15:0] */
+#define WM9081_SW_RST_DEV_ID1_WIDTH                 16  /* SW_RST_DEV_ID1 - [15:0] */
+
+/*
+ * R2 (0x02) - Analogue Lineout
+ */
+#define WM9081_LINEOUT_MUTE                     0x0080  /* LINEOUT_MUTE */
+#define WM9081_LINEOUT_MUTE_MASK                0x0080  /* LINEOUT_MUTE */
+#define WM9081_LINEOUT_MUTE_SHIFT                    7  /* LINEOUT_MUTE */
+#define WM9081_LINEOUT_MUTE_WIDTH                    1  /* LINEOUT_MUTE */
+#define WM9081_LINEOUTZC                        0x0040  /* LINEOUTZC */
+#define WM9081_LINEOUTZC_MASK                   0x0040  /* LINEOUTZC */
+#define WM9081_LINEOUTZC_SHIFT                       6  /* LINEOUTZC */
+#define WM9081_LINEOUTZC_WIDTH                       1  /* LINEOUTZC */
+#define WM9081_LINEOUT_VOL_MASK                 0x003F  /* LINEOUT_VOL - [5:0] */
+#define WM9081_LINEOUT_VOL_SHIFT                     0  /* LINEOUT_VOL - [5:0] */
+#define WM9081_LINEOUT_VOL_WIDTH                     6  /* LINEOUT_VOL - [5:0] */
+
+/*
+ * R3 (0x03) - Analogue Speaker PGA
+ */
+#define WM9081_SPKPGA_MUTE                      0x0080  /* SPKPGA_MUTE */
+#define WM9081_SPKPGA_MUTE_MASK                 0x0080  /* SPKPGA_MUTE */
+#define WM9081_SPKPGA_MUTE_SHIFT                     7  /* SPKPGA_MUTE */
+#define WM9081_SPKPGA_MUTE_WIDTH                     1  /* SPKPGA_MUTE */
+#define WM9081_SPKPGAZC                         0x0040  /* SPKPGAZC */
+#define WM9081_SPKPGAZC_MASK                    0x0040  /* SPKPGAZC */
+#define WM9081_SPKPGAZC_SHIFT                        6  /* SPKPGAZC */
+#define WM9081_SPKPGAZC_WIDTH                        1  /* SPKPGAZC */
+#define WM9081_SPKPGA_VOL_MASK                  0x003F  /* SPKPGA_VOL - [5:0] */
+#define WM9081_SPKPGA_VOL_SHIFT                      0  /* SPKPGA_VOL - [5:0] */
+#define WM9081_SPKPGA_VOL_WIDTH                      6  /* SPKPGA_VOL - [5:0] */
+
+/*
+ * R4 (0x04) - VMID Control
+ */
+#define WM9081_VMID_BUF_ENA                     0x0020  /* VMID_BUF_ENA */
+#define WM9081_VMID_BUF_ENA_MASK                0x0020  /* VMID_BUF_ENA */
+#define WM9081_VMID_BUF_ENA_SHIFT                    5  /* VMID_BUF_ENA */
+#define WM9081_VMID_BUF_ENA_WIDTH                    1  /* VMID_BUF_ENA */
+#define WM9081_VMID_RAMP                        0x0008  /* VMID_RAMP */
+#define WM9081_VMID_RAMP_MASK                   0x0008  /* VMID_RAMP */
+#define WM9081_VMID_RAMP_SHIFT                       3  /* VMID_RAMP */
+#define WM9081_VMID_RAMP_WIDTH                       1  /* VMID_RAMP */
+#define WM9081_VMID_SEL_MASK                    0x0006  /* VMID_SEL - [2:1] */
+#define WM9081_VMID_SEL_SHIFT                        1  /* VMID_SEL - [2:1] */
+#define WM9081_VMID_SEL_WIDTH                        2  /* VMID_SEL - [2:1] */
+#define WM9081_VMID_FAST_ST                     0x0001  /* VMID_FAST_ST */
+#define WM9081_VMID_FAST_ST_MASK                0x0001  /* VMID_FAST_ST */
+#define WM9081_VMID_FAST_ST_SHIFT                    0  /* VMID_FAST_ST */
+#define WM9081_VMID_FAST_ST_WIDTH                    1  /* VMID_FAST_ST */
+
+/*
+ * R5 (0x05) - Bias Control 1
+ */
+#define WM9081_BIAS_SRC                         0x0040  /* BIAS_SRC */
+#define WM9081_BIAS_SRC_MASK                    0x0040  /* BIAS_SRC */
+#define WM9081_BIAS_SRC_SHIFT                        6  /* BIAS_SRC */
+#define WM9081_BIAS_SRC_WIDTH                        1  /* BIAS_SRC */
+#define WM9081_STBY_BIAS_LVL                    0x0020  /* STBY_BIAS_LVL */
+#define WM9081_STBY_BIAS_LVL_MASK               0x0020  /* STBY_BIAS_LVL */
+#define WM9081_STBY_BIAS_LVL_SHIFT                   5  /* STBY_BIAS_LVL */
+#define WM9081_STBY_BIAS_LVL_WIDTH                   1  /* STBY_BIAS_LVL */
+#define WM9081_STBY_BIAS_ENA                    0x0010  /* STBY_BIAS_ENA */
+#define WM9081_STBY_BIAS_ENA_MASK               0x0010  /* STBY_BIAS_ENA */
+#define WM9081_STBY_BIAS_ENA_SHIFT                   4  /* STBY_BIAS_ENA */
+#define WM9081_STBY_BIAS_ENA_WIDTH                   1  /* STBY_BIAS_ENA */
+#define WM9081_BIAS_LVL_MASK                    0x000C  /* BIAS_LVL - [3:2] */
+#define WM9081_BIAS_LVL_SHIFT                        2  /* BIAS_LVL - [3:2] */
+#define WM9081_BIAS_LVL_WIDTH                        2  /* BIAS_LVL - [3:2] */
+#define WM9081_BIAS_ENA                         0x0002  /* BIAS_ENA */
+#define WM9081_BIAS_ENA_MASK                    0x0002  /* BIAS_ENA */
+#define WM9081_BIAS_ENA_SHIFT                        1  /* BIAS_ENA */
+#define WM9081_BIAS_ENA_WIDTH                        1  /* BIAS_ENA */
+#define WM9081_STARTUP_BIAS_ENA                 0x0001  /* STARTUP_BIAS_ENA */
+#define WM9081_STARTUP_BIAS_ENA_MASK            0x0001  /* STARTUP_BIAS_ENA */
+#define WM9081_STARTUP_BIAS_ENA_SHIFT                0  /* STARTUP_BIAS_ENA */
+#define WM9081_STARTUP_BIAS_ENA_WIDTH                1  /* STARTUP_BIAS_ENA */
+
+/*
+ * R7 (0x07) - Analogue Mixer
+ */
+#define WM9081_DAC_SEL                          0x0010  /* DAC_SEL */
+#define WM9081_DAC_SEL_MASK                     0x0010  /* DAC_SEL */
+#define WM9081_DAC_SEL_SHIFT                         4  /* DAC_SEL */
+#define WM9081_DAC_SEL_WIDTH                         1  /* DAC_SEL */
+#define WM9081_IN2_VOL                          0x0008  /* IN2_VOL */
+#define WM9081_IN2_VOL_MASK                     0x0008  /* IN2_VOL */
+#define WM9081_IN2_VOL_SHIFT                         3  /* IN2_VOL */
+#define WM9081_IN2_VOL_WIDTH                         1  /* IN2_VOL */
+#define WM9081_IN2_ENA                          0x0004  /* IN2_ENA */
+#define WM9081_IN2_ENA_MASK                     0x0004  /* IN2_ENA */
+#define WM9081_IN2_ENA_SHIFT                         2  /* IN2_ENA */
+#define WM9081_IN2_ENA_WIDTH                         1  /* IN2_ENA */
+#define WM9081_IN1_VOL                          0x0002  /* IN1_VOL */
+#define WM9081_IN1_VOL_MASK                     0x0002  /* IN1_VOL */
+#define WM9081_IN1_VOL_SHIFT                         1  /* IN1_VOL */
+#define WM9081_IN1_VOL_WIDTH                         1  /* IN1_VOL */
+#define WM9081_IN1_ENA                          0x0001  /* IN1_ENA */
+#define WM9081_IN1_ENA_MASK                     0x0001  /* IN1_ENA */
+#define WM9081_IN1_ENA_SHIFT                         0  /* IN1_ENA */
+#define WM9081_IN1_ENA_WIDTH                         1  /* IN1_ENA */
+
+/*
+ * R8 (0x08) - Anti Pop Control
+ */
+#define WM9081_LINEOUT_DISCH                    0x0004  /* LINEOUT_DISCH */
+#define WM9081_LINEOUT_DISCH_MASK               0x0004  /* LINEOUT_DISCH */
+#define WM9081_LINEOUT_DISCH_SHIFT                   2  /* LINEOUT_DISCH */
+#define WM9081_LINEOUT_DISCH_WIDTH                   1  /* LINEOUT_DISCH */
+#define WM9081_LINEOUT_VROI                     0x0002  /* LINEOUT_VROI */
+#define WM9081_LINEOUT_VROI_MASK                0x0002  /* LINEOUT_VROI */
+#define WM9081_LINEOUT_VROI_SHIFT                    1  /* LINEOUT_VROI */
+#define WM9081_LINEOUT_VROI_WIDTH                    1  /* LINEOUT_VROI */
+#define WM9081_LINEOUT_CLAMP                    0x0001  /* LINEOUT_CLAMP */
+#define WM9081_LINEOUT_CLAMP_MASK               0x0001  /* LINEOUT_CLAMP */
+#define WM9081_LINEOUT_CLAMP_SHIFT                   0  /* LINEOUT_CLAMP */
+#define WM9081_LINEOUT_CLAMP_WIDTH                   1  /* LINEOUT_CLAMP */
+
+/*
+ * R9 (0x09) - Analogue Speaker 1
+ */
+#define WM9081_SPK_DCGAIN_MASK                  0x0038  /* SPK_DCGAIN - [5:3] */
+#define WM9081_SPK_DCGAIN_SHIFT                      3  /* SPK_DCGAIN - [5:3] */
+#define WM9081_SPK_DCGAIN_WIDTH                      3  /* SPK_DCGAIN - [5:3] */
+#define WM9081_SPK_ACGAIN_MASK                  0x0007  /* SPK_ACGAIN - [2:0] */
+#define WM9081_SPK_ACGAIN_SHIFT                      0  /* SPK_ACGAIN - [2:0] */
+#define WM9081_SPK_ACGAIN_WIDTH                      3  /* SPK_ACGAIN - [2:0] */
+
+/*
+ * R10 (0x0A) - Analogue Speaker 2
+ */
+#define WM9081_SPK_MODE                         0x0040  /* SPK_MODE */
+#define WM9081_SPK_MODE_MASK                    0x0040  /* SPK_MODE */
+#define WM9081_SPK_MODE_SHIFT                        6  /* SPK_MODE */
+#define WM9081_SPK_MODE_WIDTH                        1  /* SPK_MODE */
+#define WM9081_SPK_INV_MUTE                     0x0010  /* SPK_INV_MUTE */
+#define WM9081_SPK_INV_MUTE_MASK                0x0010  /* SPK_INV_MUTE */
+#define WM9081_SPK_INV_MUTE_SHIFT                    4  /* SPK_INV_MUTE */
+#define WM9081_SPK_INV_MUTE_WIDTH                    1  /* SPK_INV_MUTE */
+#define WM9081_OUT_SPK_CTRL                     0x0008  /* OUT_SPK_CTRL */
+#define WM9081_OUT_SPK_CTRL_MASK                0x0008  /* OUT_SPK_CTRL */
+#define WM9081_OUT_SPK_CTRL_SHIFT                    3  /* OUT_SPK_CTRL */
+#define WM9081_OUT_SPK_CTRL_WIDTH                    1  /* OUT_SPK_CTRL */
+
+/*
+ * R11 (0x0B) - Power Management
+ */
+#define WM9081_TSHUT_ENA                        0x0100  /* TSHUT_ENA */
+#define WM9081_TSHUT_ENA_MASK                   0x0100  /* TSHUT_ENA */
+#define WM9081_TSHUT_ENA_SHIFT                       8  /* TSHUT_ENA */
+#define WM9081_TSHUT_ENA_WIDTH                       1  /* TSHUT_ENA */
+#define WM9081_TSENSE_ENA                       0x0080  /* TSENSE_ENA */
+#define WM9081_TSENSE_ENA_MASK                  0x0080  /* TSENSE_ENA */
+#define WM9081_TSENSE_ENA_SHIFT                      7  /* TSENSE_ENA */
+#define WM9081_TSENSE_ENA_WIDTH                      1  /* TSENSE_ENA */
+#define WM9081_TEMP_SHUT                        0x0040  /* TEMP_SHUT */
+#define WM9081_TEMP_SHUT_MASK                   0x0040  /* TEMP_SHUT */
+#define WM9081_TEMP_SHUT_SHIFT                       6  /* TEMP_SHUT */
+#define WM9081_TEMP_SHUT_WIDTH                       1  /* TEMP_SHUT */
+#define WM9081_LINEOUT_ENA                      0x0010  /* LINEOUT_ENA */
+#define WM9081_LINEOUT_ENA_MASK                 0x0010  /* LINEOUT_ENA */
+#define WM9081_LINEOUT_ENA_SHIFT                     4  /* LINEOUT_ENA */
+#define WM9081_LINEOUT_ENA_WIDTH                     1  /* LINEOUT_ENA */
+#define WM9081_SPKPGA_ENA                       0x0004  /* SPKPGA_ENA */
+#define WM9081_SPKPGA_ENA_MASK                  0x0004  /* SPKPGA_ENA */
+#define WM9081_SPKPGA_ENA_SHIFT                      2  /* SPKPGA_ENA */
+#define WM9081_SPKPGA_ENA_WIDTH                      1  /* SPKPGA_ENA */
+#define WM9081_SPK_ENA                          0x0002  /* SPK_ENA */
+#define WM9081_SPK_ENA_MASK                     0x0002  /* SPK_ENA */
+#define WM9081_SPK_ENA_SHIFT                         1  /* SPK_ENA */
+#define WM9081_SPK_ENA_WIDTH                         1  /* SPK_ENA */
+#define WM9081_DAC_ENA                          0x0001  /* DAC_ENA */
+#define WM9081_DAC_ENA_MASK                     0x0001  /* DAC_ENA */
+#define WM9081_DAC_ENA_SHIFT                         0  /* DAC_ENA */
+#define WM9081_DAC_ENA_WIDTH                         1  /* DAC_ENA */
+
+/*
+ * R12 (0x0C) - Clock Control 1
+ */
+#define WM9081_CLK_OP_DIV_MASK                  0x1C00  /* CLK_OP_DIV - [12:10] */
+#define WM9081_CLK_OP_DIV_SHIFT                     10  /* CLK_OP_DIV - [12:10] */
+#define WM9081_CLK_OP_DIV_WIDTH                      3  /* CLK_OP_DIV - [12:10] */
+#define WM9081_CLK_TO_DIV_MASK                  0x0300  /* CLK_TO_DIV - [9:8] */
+#define WM9081_CLK_TO_DIV_SHIFT                      8  /* CLK_TO_DIV - [9:8] */
+#define WM9081_CLK_TO_DIV_WIDTH                      2  /* CLK_TO_DIV - [9:8] */
+#define WM9081_MCLKDIV2                         0x0080  /* MCLKDIV2 */
+#define WM9081_MCLKDIV2_MASK                    0x0080  /* MCLKDIV2 */
+#define WM9081_MCLKDIV2_SHIFT                        7  /* MCLKDIV2 */
+#define WM9081_MCLKDIV2_WIDTH                        1  /* MCLKDIV2 */
+
+/*
+ * R13 (0x0D) - Clock Control 2
+ */
+#define WM9081_CLK_SYS_RATE_MASK                0x00F0  /* CLK_SYS_RATE - [7:4] */
+#define WM9081_CLK_SYS_RATE_SHIFT                    4  /* CLK_SYS_RATE - [7:4] */
+#define WM9081_CLK_SYS_RATE_WIDTH                    4  /* CLK_SYS_RATE - [7:4] */
+#define WM9081_SAMPLE_RATE_MASK                 0x000F  /* SAMPLE_RATE - [3:0] */
+#define WM9081_SAMPLE_RATE_SHIFT                     0  /* SAMPLE_RATE - [3:0] */
+#define WM9081_SAMPLE_RATE_WIDTH                     4  /* SAMPLE_RATE - [3:0] */
+
+/*
+ * R14 (0x0E) - Clock Control 3
+ */
+#define WM9081_CLK_SRC_SEL                      0x2000  /* CLK_SRC_SEL */
+#define WM9081_CLK_SRC_SEL_MASK                 0x2000  /* CLK_SRC_SEL */
+#define WM9081_CLK_SRC_SEL_SHIFT                    13  /* CLK_SRC_SEL */
+#define WM9081_CLK_SRC_SEL_WIDTH                     1  /* CLK_SRC_SEL */
+#define WM9081_CLK_OP_ENA                       0x0020  /* CLK_OP_ENA */
+#define WM9081_CLK_OP_ENA_MASK                  0x0020  /* CLK_OP_ENA */
+#define WM9081_CLK_OP_ENA_SHIFT                      5  /* CLK_OP_ENA */
+#define WM9081_CLK_OP_ENA_WIDTH                      1  /* CLK_OP_ENA */
+#define WM9081_CLK_TO_ENA                       0x0004  /* CLK_TO_ENA */
+#define WM9081_CLK_TO_ENA_MASK                  0x0004  /* CLK_TO_ENA */
+#define WM9081_CLK_TO_ENA_SHIFT                      2  /* CLK_TO_ENA */
+#define WM9081_CLK_TO_ENA_WIDTH                      1  /* CLK_TO_ENA */
+#define WM9081_CLK_DSP_ENA                      0x0002  /* CLK_DSP_ENA */
+#define WM9081_CLK_DSP_ENA_MASK                 0x0002  /* CLK_DSP_ENA */
+#define WM9081_CLK_DSP_ENA_SHIFT                     1  /* CLK_DSP_ENA */
+#define WM9081_CLK_DSP_ENA_WIDTH                     1  /* CLK_DSP_ENA */
+#define WM9081_CLK_SYS_ENA                      0x0001  /* CLK_SYS_ENA */
+#define WM9081_CLK_SYS_ENA_MASK                 0x0001  /* CLK_SYS_ENA */
+#define WM9081_CLK_SYS_ENA_SHIFT                     0  /* CLK_SYS_ENA */
+#define WM9081_CLK_SYS_ENA_WIDTH                     1  /* CLK_SYS_ENA */
+
+/*
+ * R16 (0x10) - FLL Control 1
+ */
+#define WM9081_FLL_HOLD                         0x0008  /* FLL_HOLD */
+#define WM9081_FLL_HOLD_MASK                    0x0008  /* FLL_HOLD */
+#define WM9081_FLL_HOLD_SHIFT                        3  /* FLL_HOLD */
+#define WM9081_FLL_HOLD_WIDTH                        1  /* FLL_HOLD */
+#define WM9081_FLL_FRAC                         0x0004  /* FLL_FRAC */
+#define WM9081_FLL_FRAC_MASK                    0x0004  /* FLL_FRAC */
+#define WM9081_FLL_FRAC_SHIFT                        2  /* FLL_FRAC */
+#define WM9081_FLL_FRAC_WIDTH                        1  /* FLL_FRAC */
+#define WM9081_FLL_ENA                          0x0001  /* FLL_ENA */
+#define WM9081_FLL_ENA_MASK                     0x0001  /* FLL_ENA */
+#define WM9081_FLL_ENA_SHIFT                         0  /* FLL_ENA */
+#define WM9081_FLL_ENA_WIDTH                         1  /* FLL_ENA */
+
+/*
+ * R17 (0x11) - FLL Control 2
+ */
+#define WM9081_FLL_OUTDIV_MASK                  0x0700  /* FLL_OUTDIV - [10:8] */
+#define WM9081_FLL_OUTDIV_SHIFT                      8  /* FLL_OUTDIV - [10:8] */
+#define WM9081_FLL_OUTDIV_WIDTH                      3  /* FLL_OUTDIV - [10:8] */
+#define WM9081_FLL_CTRL_RATE_MASK               0x0070  /* FLL_CTRL_RATE - [6:4] */
+#define WM9081_FLL_CTRL_RATE_SHIFT                   4  /* FLL_CTRL_RATE - [6:4] */
+#define WM9081_FLL_CTRL_RATE_WIDTH                   3  /* FLL_CTRL_RATE - [6:4] */
+#define WM9081_FLL_FRATIO_MASK                  0x0007  /* FLL_FRATIO - [2:0] */
+#define WM9081_FLL_FRATIO_SHIFT                      0  /* FLL_FRATIO - [2:0] */
+#define WM9081_FLL_FRATIO_WIDTH                      3  /* FLL_FRATIO - [2:0] */
+
+/*
+ * R18 (0x12) - FLL Control 3
+ */
+#define WM9081_FLL_K_MASK                       0xFFFF  /* FLL_K - [15:0] */
+#define WM9081_FLL_K_SHIFT                           0  /* FLL_K - [15:0] */
+#define WM9081_FLL_K_WIDTH                          16  /* FLL_K - [15:0] */
+
+/*
+ * R19 (0x13) - FLL Control 4
+ */
+#define WM9081_FLL_N_MASK                       0x7FE0  /* FLL_N - [14:5] */
+#define WM9081_FLL_N_SHIFT                           5  /* FLL_N - [14:5] */
+#define WM9081_FLL_N_WIDTH                          10  /* FLL_N - [14:5] */
+#define WM9081_FLL_GAIN_MASK                    0x000F  /* FLL_GAIN - [3:0] */
+#define WM9081_FLL_GAIN_SHIFT                        0  /* FLL_GAIN - [3:0] */
+#define WM9081_FLL_GAIN_WIDTH                        4  /* FLL_GAIN - [3:0] */
+
+/*
+ * R20 (0x14) - FLL Control 5
+ */
+#define WM9081_FLL_CLK_REF_DIV_MASK             0x0018  /* FLL_CLK_REF_DIV - [4:3] */
+#define WM9081_FLL_CLK_REF_DIV_SHIFT                 3  /* FLL_CLK_REF_DIV - [4:3] */
+#define WM9081_FLL_CLK_REF_DIV_WIDTH                 2  /* FLL_CLK_REF_DIV - [4:3] */
+#define WM9081_FLL_CLK_SRC_MASK                 0x0003  /* FLL_CLK_SRC - [1:0] */
+#define WM9081_FLL_CLK_SRC_SHIFT                     0  /* FLL_CLK_SRC - [1:0] */
+#define WM9081_FLL_CLK_SRC_WIDTH                     2  /* FLL_CLK_SRC - [1:0] */
+
+/*
+ * R22 (0x16) - Audio Interface 1
+ */
+#define WM9081_AIFDAC_CHAN                      0x0040  /* AIFDAC_CHAN */
+#define WM9081_AIFDAC_CHAN_MASK                 0x0040  /* AIFDAC_CHAN */
+#define WM9081_AIFDAC_CHAN_SHIFT                     6  /* AIFDAC_CHAN */
+#define WM9081_AIFDAC_CHAN_WIDTH                     1  /* AIFDAC_CHAN */
+#define WM9081_AIFDAC_TDM_SLOT_MASK             0x0030  /* AIFDAC_TDM_SLOT - [5:4] */
+#define WM9081_AIFDAC_TDM_SLOT_SHIFT                 4  /* AIFDAC_TDM_SLOT - [5:4] */
+#define WM9081_AIFDAC_TDM_SLOT_WIDTH                 2  /* AIFDAC_TDM_SLOT - [5:4] */
+#define WM9081_AIFDAC_TDM_MODE_MASK             0x000C  /* AIFDAC_TDM_MODE - [3:2] */
+#define WM9081_AIFDAC_TDM_MODE_SHIFT                 2  /* AIFDAC_TDM_MODE - [3:2] */
+#define WM9081_AIFDAC_TDM_MODE_WIDTH                 2  /* AIFDAC_TDM_MODE - [3:2] */
+#define WM9081_DAC_COMP                         0x0002  /* DAC_COMP */
+#define WM9081_DAC_COMP_MASK                    0x0002  /* DAC_COMP */
+#define WM9081_DAC_COMP_SHIFT                        1  /* DAC_COMP */
+#define WM9081_DAC_COMP_WIDTH                        1  /* DAC_COMP */
+#define WM9081_DAC_COMPMODE                     0x0001  /* DAC_COMPMODE */
+#define WM9081_DAC_COMPMODE_MASK                0x0001  /* DAC_COMPMODE */
+#define WM9081_DAC_COMPMODE_SHIFT                    0  /* DAC_COMPMODE */
+#define WM9081_DAC_COMPMODE_WIDTH                    1  /* DAC_COMPMODE */
+
+/*
+ * R23 (0x17) - Audio Interface 2
+ */
+#define WM9081_AIF_TRIS                         0x0200  /* AIF_TRIS */
+#define WM9081_AIF_TRIS_MASK                    0x0200  /* AIF_TRIS */
+#define WM9081_AIF_TRIS_SHIFT                        9  /* AIF_TRIS */
+#define WM9081_AIF_TRIS_WIDTH                        1  /* AIF_TRIS */
+#define WM9081_DAC_DAT_INV                      0x0100  /* DAC_DAT_INV */
+#define WM9081_DAC_DAT_INV_MASK                 0x0100  /* DAC_DAT_INV */
+#define WM9081_DAC_DAT_INV_SHIFT                     8  /* DAC_DAT_INV */
+#define WM9081_DAC_DAT_INV_WIDTH                     1  /* DAC_DAT_INV */
+#define WM9081_AIF_BCLK_INV                     0x0080  /* AIF_BCLK_INV */
+#define WM9081_AIF_BCLK_INV_MASK                0x0080  /* AIF_BCLK_INV */
+#define WM9081_AIF_BCLK_INV_SHIFT                    7  /* AIF_BCLK_INV */
+#define WM9081_AIF_BCLK_INV_WIDTH                    1  /* AIF_BCLK_INV */
+#define WM9081_BCLK_DIR                         0x0040  /* BCLK_DIR */
+#define WM9081_BCLK_DIR_MASK                    0x0040  /* BCLK_DIR */
+#define WM9081_BCLK_DIR_SHIFT                        6  /* BCLK_DIR */
+#define WM9081_BCLK_DIR_WIDTH                        1  /* BCLK_DIR */
+#define WM9081_LRCLK_DIR                        0x0020  /* LRCLK_DIR */
+#define WM9081_LRCLK_DIR_MASK                   0x0020  /* LRCLK_DIR */
+#define WM9081_LRCLK_DIR_SHIFT                       5  /* LRCLK_DIR */
+#define WM9081_LRCLK_DIR_WIDTH                       1  /* LRCLK_DIR */
+#define WM9081_AIF_LRCLK_INV                    0x0010  /* AIF_LRCLK_INV */
+#define WM9081_AIF_LRCLK_INV_MASK               0x0010  /* AIF_LRCLK_INV */
+#define WM9081_AIF_LRCLK_INV_SHIFT                   4  /* AIF_LRCLK_INV */
+#define WM9081_AIF_LRCLK_INV_WIDTH                   1  /* AIF_LRCLK_INV */
+#define WM9081_AIF_WL_MASK                      0x000C  /* AIF_WL - [3:2] */
+#define WM9081_AIF_WL_SHIFT                          2  /* AIF_WL - [3:2] */
+#define WM9081_AIF_WL_WIDTH                          2  /* AIF_WL - [3:2] */
+#define WM9081_AIF_FMT_MASK                     0x0003  /* AIF_FMT - [1:0] */
+#define WM9081_AIF_FMT_SHIFT                         0  /* AIF_FMT - [1:0] */
+#define WM9081_AIF_FMT_WIDTH                         2  /* AIF_FMT - [1:0] */
+
+/*
+ * R24 (0x18) - Audio Interface 3
+ */
+#define WM9081_BCLK_DIV_MASK                    0x001F  /* BCLK_DIV - [4:0] */
+#define WM9081_BCLK_DIV_SHIFT                        0  /* BCLK_DIV - [4:0] */
+#define WM9081_BCLK_DIV_WIDTH                        5  /* BCLK_DIV - [4:0] */
+
+/*
+ * R25 (0x19) - Audio Interface 4
+ */
+#define WM9081_LRCLK_RATE_MASK                  0x07FF  /* LRCLK_RATE - [10:0] */
+#define WM9081_LRCLK_RATE_SHIFT                      0  /* LRCLK_RATE - [10:0] */
+#define WM9081_LRCLK_RATE_WIDTH                     11  /* LRCLK_RATE - [10:0] */
+
+/*
+ * R26 (0x1A) - Interrupt Status
+ */
+#define WM9081_WSEQ_BUSY_EINT                   0x0004  /* WSEQ_BUSY_EINT */
+#define WM9081_WSEQ_BUSY_EINT_MASK              0x0004  /* WSEQ_BUSY_EINT */
+#define WM9081_WSEQ_BUSY_EINT_SHIFT                  2  /* WSEQ_BUSY_EINT */
+#define WM9081_WSEQ_BUSY_EINT_WIDTH                  1  /* WSEQ_BUSY_EINT */
+#define WM9081_TSHUT_EINT                       0x0001  /* TSHUT_EINT */
+#define WM9081_TSHUT_EINT_MASK                  0x0001  /* TSHUT_EINT */
+#define WM9081_TSHUT_EINT_SHIFT                      0  /* TSHUT_EINT */
+#define WM9081_TSHUT_EINT_WIDTH                      1  /* TSHUT_EINT */
+
+/*
+ * R27 (0x1B) - Interrupt Status Mask
+ */
+#define WM9081_IM_WSEQ_BUSY_EINT                0x0004  /* IM_WSEQ_BUSY_EINT */
+#define WM9081_IM_WSEQ_BUSY_EINT_MASK           0x0004  /* IM_WSEQ_BUSY_EINT */
+#define WM9081_IM_WSEQ_BUSY_EINT_SHIFT               2  /* IM_WSEQ_BUSY_EINT */
+#define WM9081_IM_WSEQ_BUSY_EINT_WIDTH               1  /* IM_WSEQ_BUSY_EINT */
+#define WM9081_IM_TSHUT_EINT                    0x0001  /* IM_TSHUT_EINT */
+#define WM9081_IM_TSHUT_EINT_MASK               0x0001  /* IM_TSHUT_EINT */
+#define WM9081_IM_TSHUT_EINT_SHIFT                   0  /* IM_TSHUT_EINT */
+#define WM9081_IM_TSHUT_EINT_WIDTH                   1  /* IM_TSHUT_EINT */
+
+/*
+ * R28 (0x1C) - Interrupt Polarity
+ */
+#define WM9081_TSHUT_INV                        0x0001  /* TSHUT_INV */
+#define WM9081_TSHUT_INV_MASK                   0x0001  /* TSHUT_INV */
+#define WM9081_TSHUT_INV_SHIFT                       0  /* TSHUT_INV */
+#define WM9081_TSHUT_INV_WIDTH                       1  /* TSHUT_INV */
+
+/*
+ * R29 (0x1D) - Interrupt Control
+ */
+#define WM9081_IRQ_POL                          0x8000  /* IRQ_POL */
+#define WM9081_IRQ_POL_MASK                     0x8000  /* IRQ_POL */
+#define WM9081_IRQ_POL_SHIFT                        15  /* IRQ_POL */
+#define WM9081_IRQ_POL_WIDTH                         1  /* IRQ_POL */
+#define WM9081_IRQ_OP_CTRL                      0x0001  /* IRQ_OP_CTRL */
+#define WM9081_IRQ_OP_CTRL_MASK                 0x0001  /* IRQ_OP_CTRL */
+#define WM9081_IRQ_OP_CTRL_SHIFT                     0  /* IRQ_OP_CTRL */
+#define WM9081_IRQ_OP_CTRL_WIDTH                     1  /* IRQ_OP_CTRL */
+
+/*
+ * R30 (0x1E) - DAC Digital 1
+ */
+#define WM9081_DAC_VOL_MASK                     0x00FF  /* DAC_VOL - [7:0] */
+#define WM9081_DAC_VOL_SHIFT                         0  /* DAC_VOL - [7:0] */
+#define WM9081_DAC_VOL_WIDTH                         8  /* DAC_VOL - [7:0] */
+
+/*
+ * R31 (0x1F) - DAC Digital 2
+ */
+#define WM9081_DAC_MUTERATE                     0x0400  /* DAC_MUTERATE */
+#define WM9081_DAC_MUTERATE_MASK                0x0400  /* DAC_MUTERATE */
+#define WM9081_DAC_MUTERATE_SHIFT                   10  /* DAC_MUTERATE */
+#define WM9081_DAC_MUTERATE_WIDTH                    1  /* DAC_MUTERATE */
+#define WM9081_DAC_MUTEMODE                     0x0200  /* DAC_MUTEMODE */
+#define WM9081_DAC_MUTEMODE_MASK                0x0200  /* DAC_MUTEMODE */
+#define WM9081_DAC_MUTEMODE_SHIFT                    9  /* DAC_MUTEMODE */
+#define WM9081_DAC_MUTEMODE_WIDTH                    1  /* DAC_MUTEMODE */
+#define WM9081_DAC_MUTE                         0x0008  /* DAC_MUTE */
+#define WM9081_DAC_MUTE_MASK                    0x0008  /* DAC_MUTE */
+#define WM9081_DAC_MUTE_SHIFT                        3  /* DAC_MUTE */
+#define WM9081_DAC_MUTE_WIDTH                        1  /* DAC_MUTE */
+#define WM9081_DEEMPH_MASK                      0x0006  /* DEEMPH - [2:1] */
+#define WM9081_DEEMPH_SHIFT                          1  /* DEEMPH - [2:1] */
+#define WM9081_DEEMPH_WIDTH                          2  /* DEEMPH - [2:1] */
+
+/*
+ * R32 (0x20) - DRC 1
+ */
+#define WM9081_DRC_ENA                          0x8000  /* DRC_ENA */
+#define WM9081_DRC_ENA_MASK                     0x8000  /* DRC_ENA */
+#define WM9081_DRC_ENA_SHIFT                        15  /* DRC_ENA */
+#define WM9081_DRC_ENA_WIDTH                         1  /* DRC_ENA */
+#define WM9081_DRC_STARTUP_GAIN_MASK            0x07C0  /* DRC_STARTUP_GAIN - [10:6] */
+#define WM9081_DRC_STARTUP_GAIN_SHIFT                6  /* DRC_STARTUP_GAIN - [10:6] */
+#define WM9081_DRC_STARTUP_GAIN_WIDTH                5  /* DRC_STARTUP_GAIN - [10:6] */
+#define WM9081_DRC_FF_DLY                       0x0020  /* DRC_FF_DLY */
+#define WM9081_DRC_FF_DLY_MASK                  0x0020  /* DRC_FF_DLY */
+#define WM9081_DRC_FF_DLY_SHIFT                      5  /* DRC_FF_DLY */
+#define WM9081_DRC_FF_DLY_WIDTH                      1  /* DRC_FF_DLY */
+#define WM9081_DRC_QR                           0x0004  /* DRC_QR */
+#define WM9081_DRC_QR_MASK                      0x0004  /* DRC_QR */
+#define WM9081_DRC_QR_SHIFT                          2  /* DRC_QR */
+#define WM9081_DRC_QR_WIDTH                          1  /* DRC_QR */
+#define WM9081_DRC_ANTICLIP                     0x0002  /* DRC_ANTICLIP */
+#define WM9081_DRC_ANTICLIP_MASK                0x0002  /* DRC_ANTICLIP */
+#define WM9081_DRC_ANTICLIP_SHIFT                    1  /* DRC_ANTICLIP */
+#define WM9081_DRC_ANTICLIP_WIDTH                    1  /* DRC_ANTICLIP */
+
+/*
+ * R33 (0x21) - DRC 2
+ */
+#define WM9081_DRC_ATK_MASK                     0xF000  /* DRC_ATK - [15:12] */
+#define WM9081_DRC_ATK_SHIFT                        12  /* DRC_ATK - [15:12] */
+#define WM9081_DRC_ATK_WIDTH                         4  /* DRC_ATK - [15:12] */
+#define WM9081_DRC_DCY_MASK                     0x0F00  /* DRC_DCY - [11:8] */
+#define WM9081_DRC_DCY_SHIFT                         8  /* DRC_DCY - [11:8] */
+#define WM9081_DRC_DCY_WIDTH                         4  /* DRC_DCY - [11:8] */
+#define WM9081_DRC_QR_THR_MASK                  0x00C0  /* DRC_QR_THR - [7:6] */
+#define WM9081_DRC_QR_THR_SHIFT                      6  /* DRC_QR_THR - [7:6] */
+#define WM9081_DRC_QR_THR_WIDTH                      2  /* DRC_QR_THR - [7:6] */
+#define WM9081_DRC_QR_DCY_MASK                  0x0030  /* DRC_QR_DCY - [5:4] */
+#define WM9081_DRC_QR_DCY_SHIFT                      4  /* DRC_QR_DCY - [5:4] */
+#define WM9081_DRC_QR_DCY_WIDTH                      2  /* DRC_QR_DCY - [5:4] */
+#define WM9081_DRC_MINGAIN_MASK                 0x000C  /* DRC_MINGAIN - [3:2] */
+#define WM9081_DRC_MINGAIN_SHIFT                     2  /* DRC_MINGAIN - [3:2] */
+#define WM9081_DRC_MINGAIN_WIDTH                     2  /* DRC_MINGAIN - [3:2] */
+#define WM9081_DRC_MAXGAIN_MASK                 0x0003  /* DRC_MAXGAIN - [1:0] */
+#define WM9081_DRC_MAXGAIN_SHIFT                     0  /* DRC_MAXGAIN - [1:0] */
+#define WM9081_DRC_MAXGAIN_WIDTH                     2  /* DRC_MAXGAIN - [1:0] */
+
+/*
+ * R34 (0x22) - DRC 3
+ */
+#define WM9081_DRC_HI_COMP_MASK                 0x0038  /* DRC_HI_COMP - [5:3] */
+#define WM9081_DRC_HI_COMP_SHIFT                     3  /* DRC_HI_COMP - [5:3] */
+#define WM9081_DRC_HI_COMP_WIDTH                     3  /* DRC_HI_COMP - [5:3] */
+#define WM9081_DRC_LO_COMP_MASK                 0x0007  /* DRC_LO_COMP - [2:0] */
+#define WM9081_DRC_LO_COMP_SHIFT                     0  /* DRC_LO_COMP - [2:0] */
+#define WM9081_DRC_LO_COMP_WIDTH                     3  /* DRC_LO_COMP - [2:0] */
+
+/*
+ * R35 (0x23) - DRC 4
+ */
+#define WM9081_DRC_KNEE_IP_MASK                 0x07E0  /* DRC_KNEE_IP - [10:5] */
+#define WM9081_DRC_KNEE_IP_SHIFT                     5  /* DRC_KNEE_IP - [10:5] */
+#define WM9081_DRC_KNEE_IP_WIDTH                     6  /* DRC_KNEE_IP - [10:5] */
+#define WM9081_DRC_KNEE_OP_MASK                 0x001F  /* DRC_KNEE_OP - [4:0] */
+#define WM9081_DRC_KNEE_OP_SHIFT                     0  /* DRC_KNEE_OP - [4:0] */
+#define WM9081_DRC_KNEE_OP_WIDTH                     5  /* DRC_KNEE_OP - [4:0] */
+
+/*
+ * R38 (0x26) - Write Sequencer 1
+ */
+#define WM9081_WSEQ_ENA                         0x8000  /* WSEQ_ENA */
+#define WM9081_WSEQ_ENA_MASK                    0x8000  /* WSEQ_ENA */
+#define WM9081_WSEQ_ENA_SHIFT                       15  /* WSEQ_ENA */
+#define WM9081_WSEQ_ENA_WIDTH                        1  /* WSEQ_ENA */
+#define WM9081_WSEQ_ABORT                       0x0200  /* WSEQ_ABORT */
+#define WM9081_WSEQ_ABORT_MASK                  0x0200  /* WSEQ_ABORT */
+#define WM9081_WSEQ_ABORT_SHIFT                      9  /* WSEQ_ABORT */
+#define WM9081_WSEQ_ABORT_WIDTH                      1  /* WSEQ_ABORT */
+#define WM9081_WSEQ_START                       0x0100  /* WSEQ_START */
+#define WM9081_WSEQ_START_MASK                  0x0100  /* WSEQ_START */
+#define WM9081_WSEQ_START_SHIFT                      8  /* WSEQ_START */
+#define WM9081_WSEQ_START_WIDTH                      1  /* WSEQ_START */
+#define WM9081_WSEQ_START_INDEX_MASK            0x007F  /* WSEQ_START_INDEX - [6:0] */
+#define WM9081_WSEQ_START_INDEX_SHIFT                0  /* WSEQ_START_INDEX - [6:0] */
+#define WM9081_WSEQ_START_INDEX_WIDTH                7  /* WSEQ_START_INDEX - [6:0] */
+
+/*
+ * R39 (0x27) - Write Sequencer 2
+ */
+#define WM9081_WSEQ_CURRENT_INDEX_MASK          0x07F0  /* WSEQ_CURRENT_INDEX - [10:4] */
+#define WM9081_WSEQ_CURRENT_INDEX_SHIFT              4  /* WSEQ_CURRENT_INDEX - [10:4] */
+#define WM9081_WSEQ_CURRENT_INDEX_WIDTH              7  /* WSEQ_CURRENT_INDEX - [10:4] */
+#define WM9081_WSEQ_BUSY                        0x0001  /* WSEQ_BUSY */
+#define WM9081_WSEQ_BUSY_MASK                   0x0001  /* WSEQ_BUSY */
+#define WM9081_WSEQ_BUSY_SHIFT                       0  /* WSEQ_BUSY */
+#define WM9081_WSEQ_BUSY_WIDTH                       1  /* WSEQ_BUSY */
+
+/*
+ * R40 (0x28) - MW Slave 1
+ */
+#define WM9081_SPI_CFG                          0x0020  /* SPI_CFG */
+#define WM9081_SPI_CFG_MASK                     0x0020  /* SPI_CFG */
+#define WM9081_SPI_CFG_SHIFT                         5  /* SPI_CFG */
+#define WM9081_SPI_CFG_WIDTH                         1  /* SPI_CFG */
+#define WM9081_SPI_4WIRE                        0x0010  /* SPI_4WIRE */
+#define WM9081_SPI_4WIRE_MASK                   0x0010  /* SPI_4WIRE */
+#define WM9081_SPI_4WIRE_SHIFT                       4  /* SPI_4WIRE */
+#define WM9081_SPI_4WIRE_WIDTH                       1  /* SPI_4WIRE */
+#define WM9081_ARA_ENA                          0x0008  /* ARA_ENA */
+#define WM9081_ARA_ENA_MASK                     0x0008  /* ARA_ENA */
+#define WM9081_ARA_ENA_SHIFT                         3  /* ARA_ENA */
+#define WM9081_ARA_ENA_WIDTH                         1  /* ARA_ENA */
+#define WM9081_AUTO_INC                         0x0002  /* AUTO_INC */
+#define WM9081_AUTO_INC_MASK                    0x0002  /* AUTO_INC */
+#define WM9081_AUTO_INC_SHIFT                        1  /* AUTO_INC */
+#define WM9081_AUTO_INC_WIDTH                        1  /* AUTO_INC */
+
+/*
+ * R42 (0x2A) - EQ 1
+ */
+#define WM9081_EQ_B1_GAIN_MASK                  0xF800  /* EQ_B1_GAIN - [15:11] */
+#define WM9081_EQ_B1_GAIN_SHIFT                     11  /* EQ_B1_GAIN - [15:11] */
+#define WM9081_EQ_B1_GAIN_WIDTH                      5  /* EQ_B1_GAIN - [15:11] */
+#define WM9081_EQ_B2_GAIN_MASK                  0x07C0  /* EQ_B2_GAIN - [10:6] */
+#define WM9081_EQ_B2_GAIN_SHIFT                      6  /* EQ_B2_GAIN - [10:6] */
+#define WM9081_EQ_B2_GAIN_WIDTH                      5  /* EQ_B2_GAIN - [10:6] */
+#define WM9081_EQ_B4_GAIN_MASK                  0x003E  /* EQ_B4_GAIN - [5:1] */
+#define WM9081_EQ_B4_GAIN_SHIFT                      1  /* EQ_B4_GAIN - [5:1] */
+#define WM9081_EQ_B4_GAIN_WIDTH                      5  /* EQ_B4_GAIN - [5:1] */
+#define WM9081_EQ_ENA                           0x0001  /* EQ_ENA */
+#define WM9081_EQ_ENA_MASK                      0x0001  /* EQ_ENA */
+#define WM9081_EQ_ENA_SHIFT                          0  /* EQ_ENA */
+#define WM9081_EQ_ENA_WIDTH                          1  /* EQ_ENA */
+
+/*
+ * R43 (0x2B) - EQ 2
+ */
+#define WM9081_EQ_B3_GAIN_MASK                  0xF800  /* EQ_B3_GAIN - [15:11] */
+#define WM9081_EQ_B3_GAIN_SHIFT                     11  /* EQ_B3_GAIN - [15:11] */
+#define WM9081_EQ_B3_GAIN_WIDTH                      5  /* EQ_B3_GAIN - [15:11] */
+#define WM9081_EQ_B5_GAIN_MASK                  0x07C0  /* EQ_B5_GAIN - [10:6] */
+#define WM9081_EQ_B5_GAIN_SHIFT                      6  /* EQ_B5_GAIN - [10:6] */
+#define WM9081_EQ_B5_GAIN_WIDTH                      5  /* EQ_B5_GAIN - [10:6] */
+
+/*
+ * R44 (0x2C) - EQ 3
+ */
+#define WM9081_EQ_B1_A_MASK                     0xFFFF  /* EQ_B1_A - [15:0] */
+#define WM9081_EQ_B1_A_SHIFT                         0  /* EQ_B1_A - [15:0] */
+#define WM9081_EQ_B1_A_WIDTH                        16  /* EQ_B1_A - [15:0] */
+
+/*
+ * R45 (0x2D) - EQ 4
+ */
+#define WM9081_EQ_B1_B_MASK                     0xFFFF  /* EQ_B1_B - [15:0] */
+#define WM9081_EQ_B1_B_SHIFT                         0  /* EQ_B1_B - [15:0] */
+#define WM9081_EQ_B1_B_WIDTH                        16  /* EQ_B1_B - [15:0] */
+
+/*
+ * R46 (0x2E) - EQ 5
+ */
+#define WM9081_EQ_B1_PG_MASK                    0xFFFF  /* EQ_B1_PG - [15:0] */
+#define WM9081_EQ_B1_PG_SHIFT                        0  /* EQ_B1_PG - [15:0] */
+#define WM9081_EQ_B1_PG_WIDTH                       16  /* EQ_B1_PG - [15:0] */
+
+/*
+ * R47 (0x2F) - EQ 6
+ */
+#define WM9081_EQ_B2_A_MASK                     0xFFFF  /* EQ_B2_A - [15:0] */
+#define WM9081_EQ_B2_A_SHIFT                         0  /* EQ_B2_A - [15:0] */
+#define WM9081_EQ_B2_A_WIDTH                        16  /* EQ_B2_A - [15:0] */
+
+/*
+ * R48 (0x30) - EQ 7
+ */
+#define WM9081_EQ_B2_B_MASK                     0xFFFF  /* EQ_B2_B - [15:0] */
+#define WM9081_EQ_B2_B_SHIFT                         0  /* EQ_B2_B - [15:0] */
+#define WM9081_EQ_B2_B_WIDTH                        16  /* EQ_B2_B - [15:0] */
+
+/*
+ * R49 (0x31) - EQ 8
+ */
+#define WM9081_EQ_B2_C_MASK                     0xFFFF  /* EQ_B2_C - [15:0] */
+#define WM9081_EQ_B2_C_SHIFT                         0  /* EQ_B2_C - [15:0] */
+#define WM9081_EQ_B2_C_WIDTH                        16  /* EQ_B2_C - [15:0] */
+
+/*
+ * R50 (0x32) - EQ 9
+ */
+#define WM9081_EQ_B2_PG_MASK                    0xFFFF  /* EQ_B2_PG - [15:0] */
+#define WM9081_EQ_B2_PG_SHIFT                        0  /* EQ_B2_PG - [15:0] */
+#define WM9081_EQ_B2_PG_WIDTH                       16  /* EQ_B2_PG - [15:0] */
+
+/*
+ * R51 (0x33) - EQ 10
+ */
+#define WM9081_EQ_B4_A_MASK                     0xFFFF  /* EQ_B4_A - [15:0] */
+#define WM9081_EQ_B4_A_SHIFT                         0  /* EQ_B4_A - [15:0] */
+#define WM9081_EQ_B4_A_WIDTH                        16  /* EQ_B4_A - [15:0] */
+
+/*
+ * R52 (0x34) - EQ 11
+ */
+#define WM9081_EQ_B4_B_MASK                     0xFFFF  /* EQ_B4_B - [15:0] */
+#define WM9081_EQ_B4_B_SHIFT                         0  /* EQ_B4_B - [15:0] */
+#define WM9081_EQ_B4_B_WIDTH                        16  /* EQ_B4_B - [15:0] */
+
+/*
+ * R53 (0x35) - EQ 12
+ */
+#define WM9081_EQ_B4_C_MASK                     0xFFFF  /* EQ_B4_C - [15:0] */
+#define WM9081_EQ_B4_C_SHIFT                         0  /* EQ_B4_C - [15:0] */
+#define WM9081_EQ_B4_C_WIDTH                        16  /* EQ_B4_C - [15:0] */
+
+/*
+ * R54 (0x36) - EQ 13
+ */
+#define WM9081_EQ_B4_PG_MASK                    0xFFFF  /* EQ_B4_PG - [15:0] */
+#define WM9081_EQ_B4_PG_SHIFT                        0  /* EQ_B4_PG - [15:0] */
+#define WM9081_EQ_B4_PG_WIDTH                       16  /* EQ_B4_PG - [15:0] */
+
+/*
+ * R55 (0x37) - EQ 14
+ */
+#define WM9081_EQ_B3_A_MASK                     0xFFFF  /* EQ_B3_A - [15:0] */
+#define WM9081_EQ_B3_A_SHIFT                         0  /* EQ_B3_A - [15:0] */
+#define WM9081_EQ_B3_A_WIDTH                        16  /* EQ_B3_A - [15:0] */
+
+/*
+ * R56 (0x38) - EQ 15
+ */
+#define WM9081_EQ_B3_B_MASK                     0xFFFF  /* EQ_B3_B - [15:0] */
+#define WM9081_EQ_B3_B_SHIFT                         0  /* EQ_B3_B - [15:0] */
+#define WM9081_EQ_B3_B_WIDTH                        16  /* EQ_B3_B - [15:0] */
+
+/*
+ * R57 (0x39) - EQ 16
+ */
+#define WM9081_EQ_B3_C_MASK                     0xFFFF  /* EQ_B3_C - [15:0] */
+#define WM9081_EQ_B3_C_SHIFT                         0  /* EQ_B3_C - [15:0] */
+#define WM9081_EQ_B3_C_WIDTH                        16  /* EQ_B3_C - [15:0] */
+
+/*
+ * R58 (0x3A) - EQ 17
+ */
+#define WM9081_EQ_B3_PG_MASK                    0xFFFF  /* EQ_B3_PG - [15:0] */
+#define WM9081_EQ_B3_PG_SHIFT                        0  /* EQ_B3_PG - [15:0] */
+#define WM9081_EQ_B3_PG_WIDTH                       16  /* EQ_B3_PG - [15:0] */
+
+/*
+ * R59 (0x3B) - EQ 18
+ */
+#define WM9081_EQ_B5_A_MASK                     0xFFFF  /* EQ_B5_A - [15:0] */
+#define WM9081_EQ_B5_A_SHIFT                         0  /* EQ_B5_A - [15:0] */
+#define WM9081_EQ_B5_A_WIDTH                        16  /* EQ_B5_A - [15:0] */
+
+/*
+ * R60 (0x3C) - EQ 19
+ */
+#define WM9081_EQ_B5_B_MASK                     0xFFFF  /* EQ_B5_B - [15:0] */
+#define WM9081_EQ_B5_B_SHIFT                         0  /* EQ_B5_B - [15:0] */
+#define WM9081_EQ_B5_B_WIDTH                        16  /* EQ_B5_B - [15:0] */
+
+/*
+ * R61 (0x3D) - EQ 20
+ */
+#define WM9081_EQ_B5_PG_MASK                    0xFFFF  /* EQ_B5_PG - [15:0] */
+#define WM9081_EQ_B5_PG_SHIFT                        0  /* EQ_B5_PG - [15:0] */
+#define WM9081_EQ_B5_PG_WIDTH                       16  /* EQ_B5_PG - [15:0] */
+
+
+#endif
diff --git a/sound/soc/codecs/wm9705.c b/sound/soc/codecs/wm9705.c
index c2d1a7a..fa88b46 100644
--- a/sound/soc/codecs/wm9705.c
+++ b/sound/soc/codecs/wm9705.c
@@ -282,14 +282,14 @@
 			.channels_min = 1,
 			.channels_max = 2,
 			.rates = WM9705_AC97_RATES,
-			.formats = SNDRV_PCM_FMTBIT_S16_LE,
+			.formats = SND_SOC_STD_AC97_FMTS,
 		},
 		.capture = {
 			.stream_name = "HiFi Capture",
 			.channels_min = 1,
 			.channels_max = 2,
 			.rates = WM9705_AC97_RATES,
-			.formats = SNDRV_PCM_FMTBIT_S16_LE,
+			.formats = SND_SOC_STD_AC97_FMTS,
 		},
 		.ops = &wm9705_dai_ops,
 	},
diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c
index 765cf1e..1fd4e88 100644
--- a/sound/soc/codecs/wm9712.c
+++ b/sound/soc/codecs/wm9712.c
@@ -534,13 +534,13 @@
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = WM9712_AC97_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.capture = {
 		.stream_name = "HiFi Capture",
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = WM9712_AC97_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.ops = &wm9712_dai_ops_hifi,
 },
 {
@@ -550,7 +550,7 @@
 		.channels_min = 1,
 		.channels_max = 1,
 		.rates = WM9712_AC97_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.ops = &wm9712_dai_ops_aux,
 }
 };
@@ -585,6 +585,8 @@
 	}
 
 	soc_ac97_ops.reset(codec->ac97);
+	if (soc_ac97_ops.warm_reset)
+		soc_ac97_ops.warm_reset(codec->ac97);
 	if (ac97_read(codec, 0) != wm9712_reg[0])
 		goto err;
 	return 0;
diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c
index 523bad0..abed37a 100644
--- a/sound/soc/codecs/wm9713.c
+++ b/sound/soc/codecs/wm9713.c
@@ -189,6 +189,26 @@
 SOC_SINGLE("3D Depth", AC97_REC_GAIN_MIC, 0, 15, 1),
 };
 
+static int wm9713_voice_shutdown(struct snd_soc_dapm_widget *w,
+				 struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_codec *codec = w->codec;
+	u16 status, rate;
+
+	BUG_ON(event != SND_SOC_DAPM_PRE_PMD);
+
+	/* Gracefully shut down the voice interface. */
+	status = ac97_read(codec, AC97_EXTENDED_MID) | 0x1000;
+	rate = ac97_read(codec, AC97_HANDSET_RATE) & 0xF0FF;
+	ac97_write(codec, AC97_HANDSET_RATE, rate | 0x0200);
+	schedule_timeout_interruptible(msecs_to_jiffies(1));
+	ac97_write(codec, AC97_HANDSET_RATE, rate | 0x0F00);
+	ac97_write(codec, AC97_EXTENDED_MID, status);
+
+	return 0;
+}
+
+
 /* We have to create a fake left and right HP mixers because
  * the codec only has a single control that is shared by both channels.
  * This makes it impossible to determine the audio path using the current
@@ -400,7 +420,8 @@
 SND_SOC_DAPM_MIXER("HP Mixer", SND_SOC_NOPM, 0, 0, NULL, 0),
 SND_SOC_DAPM_MIXER("Line Mixer", SND_SOC_NOPM, 0, 0, NULL, 0),
 SND_SOC_DAPM_MIXER("Capture Mixer", SND_SOC_NOPM, 0, 0, NULL, 0),
-SND_SOC_DAPM_DAC("Voice DAC", "Voice Playback", AC97_EXTENDED_MID, 12, 1),
+SND_SOC_DAPM_DAC_E("Voice DAC", "Voice Playback", AC97_EXTENDED_MID, 12, 1,
+		   wm9713_voice_shutdown, SND_SOC_DAPM_PRE_PMD),
 SND_SOC_DAPM_DAC("Aux DAC", "Aux Playback", AC97_EXTENDED_MID, 11, 1),
 SND_SOC_DAPM_PGA("Left ADC", AC97_EXTENDED_MID, 5, 1, NULL, 0),
 SND_SOC_DAPM_PGA("Right ADC", AC97_EXTENDED_MID, 4, 1, NULL, 0),
@@ -689,7 +710,7 @@
 	Ndiv = target / source;
 	if ((Ndiv < 5) || (Ndiv > 12))
 		printk(KERN_WARNING
-			"WM9713 PLL N value %d out of recommended range!\n",
+			"WM9713 PLL N value %u out of recommended range!\n",
 			Ndiv);
 
 	pll_div->n = Ndiv;
@@ -936,21 +957,6 @@
 	return 0;
 }
 
-static void wm9713_voiceshutdown(struct snd_pcm_substream *substream,
-				 struct snd_soc_dai *dai)
-{
-	struct snd_soc_codec *codec = dai->codec;
-	u16 status, rate;
-
-	/* Gracefully shut down the voice interface. */
-	status = ac97_read(codec, AC97_EXTENDED_STATUS) | 0x1000;
-	rate = ac97_read(codec, AC97_HANDSET_RATE) & 0xF0FF;
-	ac97_write(codec, AC97_HANDSET_RATE, rate | 0x0200);
-	schedule_timeout_interruptible(msecs_to_jiffies(1));
-	ac97_write(codec, AC97_HANDSET_RATE, rate | 0x0F00);
-	ac97_write(codec, AC97_EXTENDED_MID, status);
-}
-
 static int ac97_hifi_prepare(struct snd_pcm_substream *substream,
 			     struct snd_soc_dai *dai)
 {
@@ -1019,7 +1025,6 @@
 
 static struct snd_soc_dai_ops wm9713_dai_ops_voice = {
 	.hw_params	= wm9713_pcm_hw_params,
-	.shutdown	= wm9713_voiceshutdown,
 	.set_clkdiv	= wm9713_set_dai_clkdiv,
 	.set_pll	= wm9713_set_dai_pll,
 	.set_fmt	= wm9713_set_dai_fmt,
@@ -1035,13 +1040,13 @@
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = WM9713_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.capture = {
 		.stream_name = "HiFi Capture",
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = WM9713_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.ops = &wm9713_dai_ops_hifi,
 	},
 	{
@@ -1051,7 +1056,7 @@
 		.channels_min = 1,
 		.channels_max = 1,
 		.rates = WM9713_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.ops = &wm9713_dai_ops_aux,
 	},
 	{
@@ -1069,6 +1074,7 @@
 		.rates = WM9713_PCM_RATES,
 		.formats = WM9713_PCM_FORMATS,},
 	.ops = &wm9713_dai_ops_voice,
+	.symmetric_rates = 1,
 	},
 };
 EXPORT_SYMBOL_GPL(wm9713_dai);
diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index 9fc9082..5dbebf8 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -1,5 +1,8 @@
 config SND_SOC_OF_SIMPLE
 	tristate
+	
+config SND_MPC52xx_DMA
+	tristate
 
 # ASoC platform support for the Freescale MPC8610 SOC.  This compiles drivers
 # for the SSI and the Elo DMA controller.  You will still need to select
@@ -22,7 +25,34 @@
 config SND_SOC_MPC5200_I2S
 	tristate "Freescale MPC5200 PSC in I2S mode driver"
 	depends on PPC_MPC52xx && PPC_BESTCOMM
-	select SND_SOC_OF_SIMPLE
+	select SND_MPC52xx_DMA
 	select PPC_BESTCOMM_GEN_BD
 	help
 	  Say Y here to support the MPC5200 PSCs in I2S mode.
+
+config SND_SOC_MPC5200_AC97
+	tristate "Freescale MPC5200 PSC in AC97 mode driver"
+	depends on PPC_MPC52xx && PPC_BESTCOMM
+	select AC97_BUS
+	select SND_MPC52xx_DMA
+	select PPC_BESTCOMM_GEN_BD
+	help
+	  Say Y here to support the MPC5200 PSCs in AC97 mode.
+
+config SND_MPC52xx_SOC_PCM030
+	tristate "SoC AC97 Audio support for Phytec pcm030 and WM9712"
+	depends on PPC_MPC5200_SIMPLE && BROKEN
+	select SND_SOC_MPC5200_AC97
+	select SND_SOC_WM9712
+	help
+	  Say Y if you want to add support for sound on the Phytec pcm030
+	  baseboard.
+
+config SND_MPC52xx_SOC_EFIKA
+	tristate "SoC AC97 Audio support for bbplan Efika and STAC9766"
+	depends on PPC_EFIKA && BROKEN
+	select SND_SOC_MPC5200_AC97
+	select SND_SOC_STAC9766
+	help
+	  Say Y if you want to add support for sound on the Efika.
+
diff --git a/sound/soc/fsl/Makefile b/sound/soc/fsl/Makefile
index f85134c..a83a739 100644
--- a/sound/soc/fsl/Makefile
+++ b/sound/soc/fsl/Makefile
@@ -10,5 +10,12 @@
 snd-soc-fsl-dma-objs := fsl_dma.o
 obj-$(CONFIG_SND_SOC_MPC8610) += snd-soc-fsl-ssi.o snd-soc-fsl-dma.o
 
+# MPC5200 Platform Support
+obj-$(CONFIG_SND_MPC52xx_DMA) += mpc5200_dma.o
 obj-$(CONFIG_SND_SOC_MPC5200_I2S) += mpc5200_psc_i2s.o
+obj-$(CONFIG_SND_SOC_MPC5200_AC97) += mpc5200_psc_ac97.o
+
+# MPC5200 Machine Support
+obj-$(CONFIG_SND_MPC52xx_SOC_PCM030) += pcm030-audio-fabric.o
+obj-$(CONFIG_SND_MPC52xx_SOC_EFIKA) += efika-audio-fabric.o
 
diff --git a/sound/soc/fsl/efika-audio-fabric.c b/sound/soc/fsl/efika-audio-fabric.c
new file mode 100644
index 0000000..85b0e75
--- /dev/null
+++ b/sound/soc/fsl/efika-audio-fabric.c
@@ -0,0 +1,90 @@
+/*
+ * Efika driver for the PSC of the Freescale MPC52xx
+ * configured as AC97 interface
+ *
+ * Copyright 2008 Jon Smirl, Digispeaker
+ * Author: Jon Smirl <jonsmirl@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/dma-mapping.h>
+
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/initval.h>
+#include <sound/soc.h>
+#include <sound/soc-of-simple.h>
+
+#include "mpc5200_dma.h"
+#include "mpc5200_psc_ac97.h"
+#include "../codecs/stac9766.h"
+
+static struct snd_soc_device device;
+static struct snd_soc_card card;
+
+static struct snd_soc_dai_link efika_fabric_dai[] = {
+{
+	.name = "AC97",
+	.stream_name = "AC97 Analog",
+	.codec_dai = &stac9766_dai[STAC9766_DAI_AC97_ANALOG],
+	.cpu_dai = &psc_ac97_dai[MPC5200_AC97_NORMAL],
+},
+{
+	.name = "AC97",
+	.stream_name = "AC97 IEC958",
+	.codec_dai = &stac9766_dai[STAC9766_DAI_AC97_DIGITAL],
+	.cpu_dai = &psc_ac97_dai[MPC5200_AC97_SPDIF],
+},
+};
+
+static __init int efika_fabric_init(void)
+{
+	struct platform_device *pdev;
+	int rc;
+
+	if (!machine_is_compatible("bplan,efika"))
+		return -ENODEV;
+
+	card.platform = &mpc5200_audio_dma_platform;
+	card.name = "Efika";
+	card.dai_link = efika_fabric_dai;
+	card.num_links = ARRAY_SIZE(efika_fabric_dai);
+
+	device.card = &card;
+	device.codec_dev = &soc_codec_dev_stac9766;
+
+	pdev = platform_device_alloc("soc-audio", 1);
+	if (!pdev) {
+		pr_err("efika_fabric_init: platform_device_alloc() failed\n");
+		return -ENODEV;
+	}
+
+	platform_set_drvdata(pdev, &device);
+	device.dev = &pdev->dev;
+
+	rc = platform_device_add(pdev);
+	if (rc) {
+		pr_err("efika_fabric_init: platform_device_add() failed\n");
+		return -ENODEV;
+	}
+	return 0;
+}
+
+module_init(efika_fabric_init);
+
+
+MODULE_AUTHOR("Jon Smirl <jonsmirl@gmail.com>");
+MODULE_DESCRIPTION(DRV_NAME ": mpc5200 Efika fabric driver");
+MODULE_LICENSE("GPL");
+
diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index 3711d84..93f0f38 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -375,18 +375,14 @@
 		struct snd_pcm_runtime *first_runtime =
 			ssi_private->first_stream->runtime;
 
-		if (!first_runtime->rate || !first_runtime->sample_bits) {
+		if (!first_runtime->sample_bits) {
 			dev_err(substream->pcm->card->dev,
-				"set sample rate and size in %s stream first\n",
+				"set sample size in %s stream first\n",
 				substream->stream == SNDRV_PCM_STREAM_PLAYBACK
 				? "capture" : "playback");
 			return -EAGAIN;
 		}
 
-		snd_pcm_hw_constraint_minmax(substream->runtime,
-			SNDRV_PCM_HW_PARAM_RATE,
-			first_runtime->rate, first_runtime->rate);
-
 		/* If we're in synchronous mode, then we need to constrain
 		 * the sample size as well.  We don't support independent sample
 		 * rates in asynchronous mode.
@@ -674,7 +670,7 @@
 	ssi_private->dev = ssi_info->dev;
 	ssi_private->asynchronous = ssi_info->asynchronous;
 
-	ssi_private->dev->driver_data = fsl_ssi_dai;
+	dev_set_drvdata(ssi_private->dev, fsl_ssi_dai);
 
 	/* Initialize the the device_attribute structure */
 	dev_attr->attr.name = "ssi-stats";
@@ -693,6 +689,7 @@
 	fsl_ssi_dai->name = ssi_private->name;
 	fsl_ssi_dai->id = ssi_info->id;
 	fsl_ssi_dai->dev = ssi_info->dev;
+	fsl_ssi_dai->symmetric_rates = 1;
 
 	ret = snd_soc_register_dai(fsl_ssi_dai);
 	if (ret != 0) {
diff --git a/sound/soc/fsl/mpc5200_dma.c b/sound/soc/fsl/mpc5200_dma.c
new file mode 100644
index 0000000..efec33a
--- /dev/null
+++ b/sound/soc/fsl/mpc5200_dma.c
@@ -0,0 +1,564 @@
+/*
+ * Freescale MPC5200 PSC DMA
+ * ALSA SoC Platform driver
+ *
+ * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ * Copyright (C) 2009 Jon Smirl, Digispeaker
+ */
+
+#include <linux/module.h>
+#include <linux/of_device.h>
+
+#include <sound/soc.h>
+
+#include <sysdev/bestcomm/bestcomm.h>
+#include <sysdev/bestcomm/gen_bd.h>
+#include <asm/mpc52xx_psc.h>
+
+#include "mpc5200_dma.h"
+
+/*
+ * Interrupt handlers
+ */
+static irqreturn_t psc_dma_status_irq(int irq, void *_psc_dma)
+{
+	struct psc_dma *psc_dma = _psc_dma;
+	struct mpc52xx_psc __iomem *regs = psc_dma->psc_regs;
+	u16 isr;
+
+	isr = in_be16(&regs->mpc52xx_psc_isr);
+
+	/* Playback underrun error */
+	if (psc_dma->playback.active && (isr & MPC52xx_PSC_IMR_TXEMP))
+		psc_dma->stats.underrun_count++;
+
+	/* Capture overrun error */
+	if (psc_dma->capture.active && (isr & MPC52xx_PSC_IMR_ORERR))
+		psc_dma->stats.overrun_count++;
+
+	out_8(&regs->command, MPC52xx_PSC_RST_ERR_STAT);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * psc_dma_bcom_enqueue_next_buffer - Enqueue another audio buffer
+ * @s: pointer to stream private data structure
+ *
+ * Enqueues another audio period buffer into the bestcomm queue.
+ *
+ * Note: The routine must only be called when there is space available in
+ * the queue.  Otherwise the enqueue will fail and the audio ring buffer
+ * will get out of sync
+ */
+static void psc_dma_bcom_enqueue_next_buffer(struct psc_dma_stream *s)
+{
+	struct bcom_bd *bd;
+
+	/* Prepare and enqueue the next buffer descriptor */
+	bd = bcom_prepare_next_buffer(s->bcom_task);
+	bd->status = s->period_bytes;
+	bd->data[0] = s->period_next_pt;
+	bcom_submit_next_buffer(s->bcom_task, NULL);
+
+	/* Update for next period */
+	s->period_next_pt += s->period_bytes;
+	if (s->period_next_pt >= s->period_end)
+		s->period_next_pt = s->period_start;
+}
+
+static void psc_dma_bcom_enqueue_tx(struct psc_dma_stream *s)
+{
+	while (s->appl_ptr < s->runtime->control->appl_ptr) {
+
+		if (bcom_queue_full(s->bcom_task))
+			return;
+
+		s->appl_ptr += s->period_size;
+
+		psc_dma_bcom_enqueue_next_buffer(s);
+	}
+}
+
+/* Bestcomm DMA irq handler */
+static irqreturn_t psc_dma_bcom_irq_tx(int irq, void *_psc_dma_stream)
+{
+	struct psc_dma_stream *s = _psc_dma_stream;
+
+	spin_lock(&s->psc_dma->lock);
+	/* For each finished period, dequeue the completed period buffer
+	 * and enqueue a new one in it's place. */
+	while (bcom_buffer_done(s->bcom_task)) {
+		bcom_retrieve_buffer(s->bcom_task, NULL, NULL);
+
+		s->period_current_pt += s->period_bytes;
+		if (s->period_current_pt >= s->period_end)
+			s->period_current_pt = s->period_start;
+	}
+	psc_dma_bcom_enqueue_tx(s);
+	spin_unlock(&s->psc_dma->lock);
+
+	/* If the stream is active, then also inform the PCM middle layer
+	 * of the period finished event. */
+	if (s->active)
+		snd_pcm_period_elapsed(s->stream);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t psc_dma_bcom_irq_rx(int irq, void *_psc_dma_stream)
+{
+	struct psc_dma_stream *s = _psc_dma_stream;
+
+	spin_lock(&s->psc_dma->lock);
+	/* For each finished period, dequeue the completed period buffer
+	 * and enqueue a new one in it's place. */
+	while (bcom_buffer_done(s->bcom_task)) {
+		bcom_retrieve_buffer(s->bcom_task, NULL, NULL);
+
+		s->period_current_pt += s->period_bytes;
+		if (s->period_current_pt >= s->period_end)
+			s->period_current_pt = s->period_start;
+
+		psc_dma_bcom_enqueue_next_buffer(s);
+	}
+	spin_unlock(&s->psc_dma->lock);
+
+	/* If the stream is active, then also inform the PCM middle layer
+	 * of the period finished event. */
+	if (s->active)
+		snd_pcm_period_elapsed(s->stream);
+
+	return IRQ_HANDLED;
+}
+
+static int psc_dma_hw_free(struct snd_pcm_substream *substream)
+{
+	snd_pcm_set_runtime_buffer(substream, NULL);
+	return 0;
+}
+
+/**
+ * psc_dma_trigger: start and stop the DMA transfer.
+ *
+ * This function is called by ALSA to start, stop, pause, and resume the DMA
+ * transfer of data.
+ */
+static int psc_dma_trigger(struct snd_pcm_substream *substream, int cmd)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct psc_dma_stream *s;
+	struct mpc52xx_psc __iomem *regs = psc_dma->psc_regs;
+	u16 imr;
+	unsigned long flags;
+	int i;
+
+	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
+		s = &psc_dma->capture;
+	else
+		s = &psc_dma->playback;
+
+	dev_dbg(psc_dma->dev, "psc_dma_trigger(substream=%p, cmd=%i)"
+		" stream_id=%i\n",
+		substream, cmd, substream->pstr->stream);
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+		s->period_bytes = frames_to_bytes(runtime,
+						  runtime->period_size);
+		s->period_start = virt_to_phys(runtime->dma_area);
+		s->period_end = s->period_start +
+				(s->period_bytes * runtime->periods);
+		s->period_next_pt = s->period_start;
+		s->period_current_pt = s->period_start;
+		s->period_size = runtime->period_size;
+		s->active = 1;
+
+		/* track appl_ptr so that we have a better chance of detecting
+		 * end of stream and not over running it.
+		 */
+		s->runtime = runtime;
+		s->appl_ptr = s->runtime->control->appl_ptr -
+				(runtime->period_size * runtime->periods);
+
+		/* Fill up the bestcomm bd queue and enable DMA.
+		 * This will begin filling the PSC's fifo.
+		 */
+		spin_lock_irqsave(&psc_dma->lock, flags);
+
+		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE) {
+			bcom_gen_bd_rx_reset(s->bcom_task);
+			for (i = 0; i < runtime->periods; i++)
+				if (!bcom_queue_full(s->bcom_task))
+					psc_dma_bcom_enqueue_next_buffer(s);
+		} else {
+			bcom_gen_bd_tx_reset(s->bcom_task);
+			psc_dma_bcom_enqueue_tx(s);
+		}
+
+		bcom_enable(s->bcom_task);
+		spin_unlock_irqrestore(&psc_dma->lock, flags);
+
+		out_8(&regs->command, MPC52xx_PSC_RST_ERR_STAT);
+
+		break;
+
+	case SNDRV_PCM_TRIGGER_STOP:
+		s->active = 0;
+
+		spin_lock_irqsave(&psc_dma->lock, flags);
+		bcom_disable(s->bcom_task);
+		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
+			bcom_gen_bd_rx_reset(s->bcom_task);
+		else
+			bcom_gen_bd_tx_reset(s->bcom_task);
+		spin_unlock_irqrestore(&psc_dma->lock, flags);
+
+		break;
+
+	default:
+		dev_dbg(psc_dma->dev, "invalid command\n");
+		return -EINVAL;
+	}
+
+	/* Update interrupt enable settings */
+	imr = 0;
+	if (psc_dma->playback.active)
+		imr |= MPC52xx_PSC_IMR_TXEMP;
+	if (psc_dma->capture.active)
+		imr |= MPC52xx_PSC_IMR_ORERR;
+	out_be16(&regs->isr_imr.imr, psc_dma->imr | imr);
+
+	return 0;
+}
+
+
+/* ---------------------------------------------------------------------
+ * The PSC DMA 'ASoC platform' driver
+ *
+ * Can be referenced by an 'ASoC machine' driver
+ * This driver only deals with the audio bus; it doesn't have any
+ * interaction with the attached codec
+ */
+
+static const struct snd_pcm_hardware psc_dma_hardware = {
+	.info = SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID |
+		SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER |
+		SNDRV_PCM_INFO_BATCH,
+	.formats = SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_BE |
+		SNDRV_PCM_FMTBIT_S24_BE | SNDRV_PCM_FMTBIT_S32_BE,
+	.rate_min = 8000,
+	.rate_max = 48000,
+	.channels_min = 1,
+	.channels_max = 2,
+	.period_bytes_max	= 1024 * 1024,
+	.period_bytes_min	= 32,
+	.periods_min		= 2,
+	.periods_max		= 256,
+	.buffer_bytes_max	= 2 * 1024 * 1024,
+	.fifo_size		= 512,
+};
+
+static int psc_dma_open(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
+	struct psc_dma_stream *s;
+	int rc;
+
+	dev_dbg(psc_dma->dev, "psc_dma_open(substream=%p)\n", substream);
+
+	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
+		s = &psc_dma->capture;
+	else
+		s = &psc_dma->playback;
+
+	snd_soc_set_runtime_hwparams(substream, &psc_dma_hardware);
+
+	rc = snd_pcm_hw_constraint_integer(runtime,
+		SNDRV_PCM_HW_PARAM_PERIODS);
+	if (rc < 0) {
+		dev_err(substream->pcm->card->dev, "invalid buffer size\n");
+		return rc;
+	}
+
+	s->stream = substream;
+	return 0;
+}
+
+static int psc_dma_close(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
+	struct psc_dma_stream *s;
+
+	dev_dbg(psc_dma->dev, "psc_dma_close(substream=%p)\n", substream);
+
+	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
+		s = &psc_dma->capture;
+	else
+		s = &psc_dma->playback;
+
+	if (!psc_dma->playback.active &&
+	    !psc_dma->capture.active) {
+
+		/* Disable all interrupts and reset the PSC */
+		out_be16(&psc_dma->psc_regs->isr_imr.imr, psc_dma->imr);
+		out_8(&psc_dma->psc_regs->command, 4 << 4); /* reset error */
+	}
+	s->stream = NULL;
+	return 0;
+}
+
+static snd_pcm_uframes_t
+psc_dma_pointer(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
+	struct psc_dma_stream *s;
+	dma_addr_t count;
+
+	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
+		s = &psc_dma->capture;
+	else
+		s = &psc_dma->playback;
+
+	count = s->period_current_pt - s->period_start;
+
+	return bytes_to_frames(substream->runtime, count);
+}
+
+static int
+psc_dma_hw_params(struct snd_pcm_substream *substream,
+			 struct snd_pcm_hw_params *params)
+{
+	snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer);
+
+	return 0;
+}
+
+static struct snd_pcm_ops psc_dma_ops = {
+	.open		= psc_dma_open,
+	.close		= psc_dma_close,
+	.hw_free	= psc_dma_hw_free,
+	.ioctl		= snd_pcm_lib_ioctl,
+	.pointer	= psc_dma_pointer,
+	.trigger	= psc_dma_trigger,
+	.hw_params	= psc_dma_hw_params,
+};
+
+static u64 psc_dma_dmamask = 0xffffffff;
+static int psc_dma_new(struct snd_card *card, struct snd_soc_dai *dai,
+			   struct snd_pcm *pcm)
+{
+	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
+	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
+	size_t size = psc_dma_hardware.buffer_bytes_max;
+	int rc = 0;
+
+	dev_dbg(rtd->socdev->dev, "psc_dma_new(card=%p, dai=%p, pcm=%p)\n",
+		card, dai, pcm);
+
+	if (!card->dev->dma_mask)
+		card->dev->dma_mask = &psc_dma_dmamask;
+	if (!card->dev->coherent_dma_mask)
+		card->dev->coherent_dma_mask = 0xffffffff;
+
+	if (pcm->streams[0].substream) {
+		rc = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, pcm->card->dev,
+				size, &pcm->streams[0].substream->dma_buffer);
+		if (rc)
+			goto playback_alloc_err;
+	}
+
+	if (pcm->streams[1].substream) {
+		rc = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, pcm->card->dev,
+				size, &pcm->streams[1].substream->dma_buffer);
+		if (rc)
+			goto capture_alloc_err;
+	}
+
+	if (rtd->socdev->card->codec->ac97)
+		rtd->socdev->card->codec->ac97->private_data = psc_dma;
+
+	return 0;
+
+ capture_alloc_err:
+	if (pcm->streams[0].substream)
+		snd_dma_free_pages(&pcm->streams[0].substream->dma_buffer);
+
+ playback_alloc_err:
+	dev_err(card->dev, "Cannot allocate buffer(s)\n");
+
+	return -ENOMEM;
+}
+
+static void psc_dma_free(struct snd_pcm *pcm)
+{
+	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
+	struct snd_pcm_substream *substream;
+	int stream;
+
+	dev_dbg(rtd->socdev->dev, "psc_dma_free(pcm=%p)\n", pcm);
+
+	for (stream = 0; stream < 2; stream++) {
+		substream = pcm->streams[stream].substream;
+		if (substream) {
+			snd_dma_free_pages(&substream->dma_buffer);
+			substream->dma_buffer.area = NULL;
+			substream->dma_buffer.addr = 0;
+		}
+	}
+}
+
+struct snd_soc_platform mpc5200_audio_dma_platform = {
+	.name		= "mpc5200-psc-audio",
+	.pcm_ops	= &psc_dma_ops,
+	.pcm_new	= &psc_dma_new,
+	.pcm_free	= &psc_dma_free,
+};
+EXPORT_SYMBOL_GPL(mpc5200_audio_dma_platform);
+
+int mpc5200_audio_dma_create(struct of_device *op)
+{
+	phys_addr_t fifo;
+	struct psc_dma *psc_dma;
+	struct resource res;
+	int size, irq, rc;
+	const __be32 *prop;
+	void __iomem *regs;
+
+	/* Fetch the registers and IRQ of the PSC */
+	irq = irq_of_parse_and_map(op->node, 0);
+	if (of_address_to_resource(op->node, 0, &res)) {
+		dev_err(&op->dev, "Missing reg property\n");
+		return -ENODEV;
+	}
+	regs = ioremap(res.start, 1 + res.end - res.start);
+	if (!regs) {
+		dev_err(&op->dev, "Could not map registers\n");
+		return -ENODEV;
+	}
+
+	/* Allocate and initialize the driver private data */
+	psc_dma = kzalloc(sizeof *psc_dma, GFP_KERNEL);
+	if (!psc_dma) {
+		iounmap(regs);
+		return -ENOMEM;
+	}
+
+	/* Get the PSC ID */
+	prop = of_get_property(op->node, "cell-index", &size);
+	if (!prop || size < sizeof *prop)
+		return -ENODEV;
+
+	spin_lock_init(&psc_dma->lock);
+	psc_dma->id = be32_to_cpu(*prop);
+	psc_dma->irq = irq;
+	psc_dma->psc_regs = regs;
+	psc_dma->fifo_regs = regs + sizeof *psc_dma->psc_regs;
+	psc_dma->dev = &op->dev;
+	psc_dma->playback.psc_dma = psc_dma;
+	psc_dma->capture.psc_dma = psc_dma;
+	snprintf(psc_dma->name, sizeof psc_dma->name, "PSC%u", psc_dma->id);
+
+	/* Find the address of the fifo data registers and setup the
+	 * DMA tasks */
+	fifo = res.start + offsetof(struct mpc52xx_psc, buffer.buffer_32);
+	psc_dma->capture.bcom_task =
+		bcom_psc_gen_bd_rx_init(psc_dma->id, 10, fifo, 512);
+	psc_dma->playback.bcom_task =
+		bcom_psc_gen_bd_tx_init(psc_dma->id, 10, fifo);
+	if (!psc_dma->capture.bcom_task ||
+	    !psc_dma->playback.bcom_task) {
+		dev_err(&op->dev, "Could not allocate bestcomm tasks\n");
+		iounmap(regs);
+		kfree(psc_dma);
+		return -ENODEV;
+	}
+
+	/* Disable all interrupts and reset the PSC */
+	out_be16(&psc_dma->psc_regs->isr_imr.imr, psc_dma->imr);
+	 /* reset receiver */
+	out_8(&psc_dma->psc_regs->command, MPC52xx_PSC_RST_RX);
+	 /* reset transmitter */
+	out_8(&psc_dma->psc_regs->command, MPC52xx_PSC_RST_TX);
+	 /* reset error */
+	out_8(&psc_dma->psc_regs->command, MPC52xx_PSC_RST_ERR_STAT);
+	 /* reset mode */
+	out_8(&psc_dma->psc_regs->command, MPC52xx_PSC_SEL_MODE_REG_1);
+
+	/* Set up mode register;
+	 * First write: RxRdy (FIFO Alarm) generates rx FIFO irq
+	 * Second write: register Normal mode for non loopback
+	 */
+	out_8(&psc_dma->psc_regs->mode, 0);
+	out_8(&psc_dma->psc_regs->mode, 0);
+
+	/* Set the TX and RX fifo alarm thresholds */
+	out_be16(&psc_dma->fifo_regs->rfalarm, 0x100);
+	out_8(&psc_dma->fifo_regs->rfcntl, 0x4);
+	out_be16(&psc_dma->fifo_regs->tfalarm, 0x100);
+	out_8(&psc_dma->fifo_regs->tfcntl, 0x7);
+
+	/* Lookup the IRQ numbers */
+	psc_dma->playback.irq =
+		bcom_get_task_irq(psc_dma->playback.bcom_task);
+	psc_dma->capture.irq =
+		bcom_get_task_irq(psc_dma->capture.bcom_task);
+
+	rc = request_irq(psc_dma->irq, &psc_dma_status_irq, IRQF_SHARED,
+			 "psc-dma-status", psc_dma);
+	rc |= request_irq(psc_dma->capture.irq,
+			  &psc_dma_bcom_irq_rx, IRQF_SHARED,
+			  "psc-dma-capture", &psc_dma->capture);
+	rc |= request_irq(psc_dma->playback.irq,
+			  &psc_dma_bcom_irq_tx, IRQF_SHARED,
+			  "psc-dma-playback", &psc_dma->playback);
+	if (rc) {
+		free_irq(psc_dma->irq, psc_dma);
+		free_irq(psc_dma->capture.irq,
+			 &psc_dma->capture);
+		free_irq(psc_dma->playback.irq,
+			 &psc_dma->playback);
+		return -ENODEV;
+	}
+
+	/* Save what we've done so it can be found again later */
+	dev_set_drvdata(&op->dev, psc_dma);
+
+	/* Tell the ASoC OF helpers about it */
+	return snd_soc_register_platform(&mpc5200_audio_dma_platform);
+}
+EXPORT_SYMBOL_GPL(mpc5200_audio_dma_create);
+
+int mpc5200_audio_dma_destroy(struct of_device *op)
+{
+	struct psc_dma *psc_dma = dev_get_drvdata(&op->dev);
+
+	dev_dbg(&op->dev, "mpc5200_audio_dma_destroy()\n");
+
+	snd_soc_unregister_platform(&mpc5200_audio_dma_platform);
+
+	bcom_gen_bd_rx_release(psc_dma->capture.bcom_task);
+	bcom_gen_bd_tx_release(psc_dma->playback.bcom_task);
+
+	/* Release irqs */
+	free_irq(psc_dma->irq, psc_dma);
+	free_irq(psc_dma->capture.irq, &psc_dma->capture);
+	free_irq(psc_dma->playback.irq, &psc_dma->playback);
+
+	iounmap(psc_dma->psc_regs);
+	kfree(psc_dma);
+	dev_set_drvdata(&op->dev, NULL);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mpc5200_audio_dma_destroy);
+
+MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
+MODULE_DESCRIPTION("Freescale MPC5200 PSC in DMA mode ASoC Driver");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/fsl/mpc5200_dma.h b/sound/soc/fsl/mpc5200_dma.h
new file mode 100644
index 0000000..2000803
--- /dev/null
+++ b/sound/soc/fsl/mpc5200_dma.h
@@ -0,0 +1,80 @@
+/*
+ * Freescale MPC5200 Audio DMA driver
+ */
+
+#ifndef __SOUND_SOC_FSL_MPC5200_DMA_H__
+#define __SOUND_SOC_FSL_MPC5200_DMA_H__
+
+#define PSC_STREAM_NAME_LEN 32
+
+/**
+ * psc_ac97_stream - Data specific to a single stream (playback or capture)
+ * @active:		flag indicating if the stream is active
+ * @psc_dma:		pointer back to parent psc_dma data structure
+ * @bcom_task:		bestcomm task structure
+ * @irq:		irq number for bestcomm task
+ * @period_start:	physical address of start of DMA region
+ * @period_end:		physical address of end of DMA region
+ * @period_next_pt:	physical address of next DMA buffer to enqueue
+ * @period_bytes:	size of DMA period in bytes
+ */
+struct psc_dma_stream {
+	struct snd_pcm_runtime *runtime;
+	snd_pcm_uframes_t appl_ptr;
+
+	int active;
+	struct psc_dma *psc_dma;
+	struct bcom_task *bcom_task;
+	int irq;
+	struct snd_pcm_substream *stream;
+	dma_addr_t period_start;
+	dma_addr_t period_end;
+	dma_addr_t period_next_pt;
+	dma_addr_t period_current_pt;
+	int period_bytes;
+	int period_size;
+};
+
+/**
+ * psc_dma - Private driver data
+ * @name: short name for this device ("PSC0", "PSC1", etc)
+ * @psc_regs: pointer to the PSC's registers
+ * @fifo_regs: pointer to the PSC's FIFO registers
+ * @irq: IRQ of this PSC
+ * @dev: struct device pointer
+ * @dai: the CPU DAI for this device
+ * @sicr: Base value used in serial interface control register; mode is ORed
+ *        with this value.
+ * @playback: Playback stream context data
+ * @capture: Capture stream context data
+ */
+struct psc_dma {
+	char name[32];
+	struct mpc52xx_psc __iomem *psc_regs;
+	struct mpc52xx_psc_fifo __iomem *fifo_regs;
+	unsigned int irq;
+	struct device *dev;
+	spinlock_t lock;
+	u32 sicr;
+	uint sysclk;
+	int imr;
+	int id;
+	unsigned int slots;
+
+	/* per-stream data */
+	struct psc_dma_stream playback;
+	struct psc_dma_stream capture;
+
+	/* Statistics */
+	struct {
+		unsigned long overrun_count;
+		unsigned long underrun_count;
+	} stats;
+};
+
+int mpc5200_audio_dma_create(struct of_device *op);
+int mpc5200_audio_dma_destroy(struct of_device *op);
+
+extern struct snd_soc_platform mpc5200_audio_dma_platform;
+
+#endif /* __SOUND_SOC_FSL_MPC5200_DMA_H__ */
diff --git a/sound/soc/fsl/mpc5200_psc_ac97.c b/sound/soc/fsl/mpc5200_psc_ac97.c
new file mode 100644
index 0000000..794a247
--- /dev/null
+++ b/sound/soc/fsl/mpc5200_psc_ac97.c
@@ -0,0 +1,329 @@
+/*
+ * linux/sound/mpc5200-ac97.c -- AC97 support for the Freescale MPC52xx chip.
+ *
+ * Copyright (C) 2009 Jon Smirl, Digispeaker
+ * Author: Jon Smirl <jonsmirl@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+
+#include <asm/time.h>
+#include <asm/delay.h>
+#include <asm/mpc52xx_psc.h>
+
+#include "mpc5200_dma.h"
+#include "mpc5200_psc_ac97.h"
+
+#define DRV_NAME "mpc5200-psc-ac97"
+
+/* ALSA only supports a single AC97 device so static is recommend here */
+static struct psc_dma *psc_dma;
+
+static unsigned short psc_ac97_read(struct snd_ac97 *ac97, unsigned short reg)
+{
+	int status;
+	unsigned int val;
+
+	/* Wait for command send status zero = ready */
+	status = spin_event_timeout(!(in_be16(&psc_dma->psc_regs->sr_csr.status) &
+				MPC52xx_PSC_SR_CMDSEND), 100, 0);
+	if (status == 0) {
+		pr_err("timeout on ac97 bus (rdy)\n");
+		return -ENODEV;
+	}
+	/* Send the read */
+	out_be32(&psc_dma->psc_regs->ac97_cmd, (1<<31) | ((reg & 0x7f) << 24));
+
+	/* Wait for the answer */
+	status = spin_event_timeout((in_be16(&psc_dma->psc_regs->sr_csr.status) &
+				MPC52xx_PSC_SR_DATA_VAL), 100, 0);
+	if (status == 0) {
+		pr_err("timeout on ac97 read (val) %x\n",
+				in_be16(&psc_dma->psc_regs->sr_csr.status));
+		return -ENODEV;
+	}
+	/* Get the data */
+	val = in_be32(&psc_dma->psc_regs->ac97_data);
+	if (((val >> 24) & 0x7f) != reg) {
+		pr_err("reg echo error on ac97 read\n");
+		return -ENODEV;
+	}
+	val = (val >> 8) & 0xffff;
+
+	return (unsigned short) val;
+}
+
+static void psc_ac97_write(struct snd_ac97 *ac97,
+				unsigned short reg, unsigned short val)
+{
+	int status;
+
+	/* Wait for command status zero = ready */
+	status = spin_event_timeout(!(in_be16(&psc_dma->psc_regs->sr_csr.status) &
+				MPC52xx_PSC_SR_CMDSEND), 100, 0);
+	if (status == 0) {
+		pr_err("timeout on ac97 bus (write)\n");
+		return;
+	}
+	/* Write data */
+	out_be32(&psc_dma->psc_regs->ac97_cmd,
+			((reg & 0x7f) << 24) | (val << 8));
+}
+
+static void psc_ac97_warm_reset(struct snd_ac97 *ac97)
+{
+	struct mpc52xx_psc __iomem *regs = psc_dma->psc_regs;
+
+	out_be32(&regs->sicr, psc_dma->sicr | MPC52xx_PSC_SICR_AWR);
+	udelay(3);
+	out_be32(&regs->sicr, psc_dma->sicr);
+}
+
+static void psc_ac97_cold_reset(struct snd_ac97 *ac97)
+{
+	struct mpc52xx_psc __iomem *regs = psc_dma->psc_regs;
+
+	/* Do a cold reset */
+	out_8(&regs->op1, MPC52xx_PSC_OP_RES);
+	udelay(10);
+	out_8(&regs->op0, MPC52xx_PSC_OP_RES);
+	udelay(50);
+	psc_ac97_warm_reset(ac97);
+}
+
+struct snd_ac97_bus_ops soc_ac97_ops = {
+	.read		= psc_ac97_read,
+	.write		= psc_ac97_write,
+	.reset		= psc_ac97_cold_reset,
+	.warm_reset	= psc_ac97_warm_reset,
+};
+EXPORT_SYMBOL_GPL(soc_ac97_ops);
+
+static int psc_ac97_hw_analog_params(struct snd_pcm_substream *substream,
+				 struct snd_pcm_hw_params *params,
+				 struct snd_soc_dai *cpu_dai)
+{
+	struct psc_dma *psc_dma = cpu_dai->private_data;
+
+	dev_dbg(psc_dma->dev, "%s(substream=%p) p_size=%i p_bytes=%i"
+		" periods=%i buffer_size=%i  buffer_bytes=%i channels=%i"
+		" rate=%i format=%i\n",
+		__func__, substream, params_period_size(params),
+		params_period_bytes(params), params_periods(params),
+		params_buffer_size(params), params_buffer_bytes(params),
+		params_channels(params), params_rate(params),
+		params_format(params));
+
+
+	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE) {
+		if (params_channels(params) == 1)
+			psc_dma->slots |= 0x00000100;
+		else
+			psc_dma->slots |= 0x00000300;
+	} else {
+		if (params_channels(params) == 1)
+			psc_dma->slots |= 0x01000000;
+		else
+			psc_dma->slots |= 0x03000000;
+	}
+	out_be32(&psc_dma->psc_regs->ac97_slots, psc_dma->slots);
+
+	return 0;
+}
+
+static int psc_ac97_hw_digital_params(struct snd_pcm_substream *substream,
+				 struct snd_pcm_hw_params *params,
+				 struct snd_soc_dai *cpu_dai)
+{
+	struct psc_dma *psc_dma = cpu_dai->private_data;
+
+	if (params_channels(params) == 1)
+		out_be32(&psc_dma->psc_regs->ac97_slots, 0x01000000);
+	else
+		out_be32(&psc_dma->psc_regs->ac97_slots, 0x03000000);
+
+	return 0;
+}
+
+static int psc_ac97_trigger(struct snd_pcm_substream *substream, int cmd,
+							struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_STOP:
+		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
+			psc_dma->slots &= 0xFFFF0000;
+		else
+			psc_dma->slots &= 0x0000FFFF;
+
+		out_be32(&psc_dma->psc_regs->ac97_slots, psc_dma->slots);
+		break;
+	}
+	return 0;
+}
+
+static int psc_ac97_probe(struct platform_device *pdev,
+					struct snd_soc_dai *cpu_dai)
+{
+	struct psc_dma *psc_dma = cpu_dai->private_data;
+	struct mpc52xx_psc __iomem *regs = psc_dma->psc_regs;
+
+	/* Go */
+	out_8(&regs->command, MPC52xx_PSC_TX_ENABLE | MPC52xx_PSC_RX_ENABLE);
+	return 0;
+}
+
+/* ---------------------------------------------------------------------
+ * ALSA SoC Bindings
+ *
+ * - Digital Audio Interface (DAI) template
+ * - create/destroy dai hooks
+ */
+
+/**
+ * psc_ac97_dai_template: template CPU Digital Audio Interface
+ */
+static struct snd_soc_dai_ops psc_ac97_analog_ops = {
+	.hw_params	= psc_ac97_hw_analog_params,
+	.trigger	= psc_ac97_trigger,
+};
+
+static struct snd_soc_dai_ops psc_ac97_digital_ops = {
+	.hw_params	= psc_ac97_hw_digital_params,
+};
+
+struct snd_soc_dai psc_ac97_dai[] = {
+{
+	.name   = "AC97",
+	.ac97_control = 1,
+	.probe	= psc_ac97_probe,
+	.playback = {
+		.channels_min   = 1,
+		.channels_max   = 6,
+		.rates          = SNDRV_PCM_RATE_8000_48000,
+		.formats = SNDRV_PCM_FMTBIT_S32_BE,
+	},
+	.capture = {
+		.channels_min   = 1,
+		.channels_max   = 2,
+		.rates          = SNDRV_PCM_RATE_8000_48000,
+		.formats = SNDRV_PCM_FMTBIT_S32_BE,
+	},
+	.ops = &psc_ac97_analog_ops,
+},
+{
+	.name   = "SPDIF",
+	.ac97_control = 1,
+	.playback = {
+		.channels_min   = 1,
+		.channels_max   = 2,
+		.rates          = SNDRV_PCM_RATE_32000 | \
+			SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000,
+		.formats = SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_BE,
+	},
+	.ops = &psc_ac97_digital_ops,
+} };
+EXPORT_SYMBOL_GPL(psc_ac97_dai);
+
+
+
+/* ---------------------------------------------------------------------
+ * OF platform bus binding code:
+ * - Probe/remove operations
+ * - OF device match table
+ */
+static int __devinit psc_ac97_of_probe(struct of_device *op,
+				      const struct of_device_id *match)
+{
+	int rc, i;
+	struct snd_ac97 ac97;
+	struct mpc52xx_psc __iomem *regs;
+
+	rc = mpc5200_audio_dma_create(op);
+	if (rc != 0)
+		return rc;
+
+	for (i = 0; i < ARRAY_SIZE(psc_ac97_dai); i++)
+		psc_ac97_dai[i].dev = &op->dev;
+
+	rc = snd_soc_register_dais(psc_ac97_dai, ARRAY_SIZE(psc_ac97_dai));
+	if (rc != 0) {
+		dev_err(&op->dev, "Failed to register DAI\n");
+		return rc;
+	}
+
+	psc_dma = dev_get_drvdata(&op->dev);
+	regs = psc_dma->psc_regs;
+	ac97.private_data = psc_dma;
+
+	for (i = 0; i < ARRAY_SIZE(psc_ac97_dai); i++)
+		psc_ac97_dai[i].private_data = psc_dma;
+
+	psc_dma->imr = 0;
+	out_be16(&psc_dma->psc_regs->isr_imr.imr, psc_dma->imr);
+
+	/* Configure the serial interface mode to AC97 */
+	psc_dma->sicr = MPC52xx_PSC_SICR_SIM_AC97 | MPC52xx_PSC_SICR_ENAC97;
+	out_be32(&regs->sicr, psc_dma->sicr);
+
+	/* No slots active */
+	out_be32(&regs->ac97_slots, 0x00000000);
+
+	return 0;
+}
+
+static int __devexit psc_ac97_of_remove(struct of_device *op)
+{
+	return mpc5200_audio_dma_destroy(op);
+}
+
+/* Match table for of_platform binding */
+static struct of_device_id psc_ac97_match[] __devinitdata = {
+	{ .compatible = "fsl,mpc5200-psc-ac97", },
+	{ .compatible = "fsl,mpc5200b-psc-ac97", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, psc_ac97_match);
+
+static struct of_platform_driver psc_ac97_driver = {
+	.match_table = psc_ac97_match,
+	.probe = psc_ac97_of_probe,
+	.remove = __devexit_p(psc_ac97_of_remove),
+	.driver = {
+		.name = "mpc5200-psc-ac97",
+		.owner = THIS_MODULE,
+	},
+};
+
+/* ---------------------------------------------------------------------
+ * Module setup and teardown; simply register the of_platform driver
+ * for the PSC in AC97 mode.
+ */
+static int __init psc_ac97_init(void)
+{
+	return of_register_platform_driver(&psc_ac97_driver);
+}
+module_init(psc_ac97_init);
+
+static void __exit psc_ac97_exit(void)
+{
+	of_unregister_platform_driver(&psc_ac97_driver);
+}
+module_exit(psc_ac97_exit);
+
+MODULE_AUTHOR("Jon Smirl <jonsmirl@gmail.com>");
+MODULE_DESCRIPTION("mpc5200 AC97 module");
+MODULE_LICENSE("GPL");
+
diff --git a/sound/soc/fsl/mpc5200_psc_ac97.h b/sound/soc/fsl/mpc5200_psc_ac97.h
new file mode 100644
index 0000000..4bc18c3
--- /dev/null
+++ b/sound/soc/fsl/mpc5200_psc_ac97.h
@@ -0,0 +1,15 @@
+/*
+ * Freescale MPC5200 PSC in AC97 mode
+ * ALSA SoC Digital Audio Interface (DAI) driver
+ *
+ */
+
+#ifndef __SOUND_SOC_FSL_MPC52xx_PSC_AC97_H__
+#define __SOUND_SOC_FSL_MPC52xx_PSC_AC97_H__
+
+extern struct snd_soc_dai psc_ac97_dai[];
+
+#define MPC5200_AC97_NORMAL 0
+#define MPC5200_AC97_SPDIF 1
+
+#endif /* __SOUND_SOC_FSL_MPC52xx_PSC_AC97_H__ */
diff --git a/sound/soc/fsl/mpc5200_psc_i2s.c b/sound/soc/fsl/mpc5200_psc_i2s.c
index 1111c71..ce8de90 100644
--- a/sound/soc/fsl/mpc5200_psc_i2s.c
+++ b/sound/soc/fsl/mpc5200_psc_i2s.c
@@ -3,31 +3,21 @@
  * ALSA SoC Digital Audio Interface (DAI) driver
  *
  * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ * Copyright (C) 2009 Jon Smirl, Digispeaker
  */
 
-#include <linux/init.h>
 #include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/device.h>
-#include <linux/delay.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
-#include <linux/dma-mapping.h>
 
-#include <sound/core.h>
 #include <sound/pcm.h>
 #include <sound/pcm_params.h>
-#include <sound/initval.h>
 #include <sound/soc.h>
-#include <sound/soc-of-simple.h>
 
-#include <sysdev/bestcomm/bestcomm.h>
-#include <sysdev/bestcomm/gen_bd.h>
 #include <asm/mpc52xx_psc.h>
 
-MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
-MODULE_DESCRIPTION("Freescale MPC5200 PSC in I2S mode ASoC Driver");
-MODULE_LICENSE("GPL");
+#include "mpc5200_psc_i2s.h"
+#include "mpc5200_dma.h"
 
 /**
  * PSC_I2S_RATES: sample rates supported by the I2S
@@ -44,191 +34,17 @@
  * PSC_I2S_FORMATS: audio formats supported by the PSC I2S mode
  */
 #define PSC_I2S_FORMATS (SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_BE | \
-			 SNDRV_PCM_FMTBIT_S24_BE | SNDRV_PCM_FMTBIT_S24_BE | \
-			 SNDRV_PCM_FMTBIT_S32_BE)
-
-/**
- * psc_i2s_stream - Data specific to a single stream (playback or capture)
- * @active:		flag indicating if the stream is active
- * @psc_i2s:		pointer back to parent psc_i2s data structure
- * @bcom_task:		bestcomm task structure
- * @irq:		irq number for bestcomm task
- * @period_start:	physical address of start of DMA region
- * @period_end:		physical address of end of DMA region
- * @period_next_pt:	physical address of next DMA buffer to enqueue
- * @period_bytes:	size of DMA period in bytes
- */
-struct psc_i2s_stream {
-	int active;
-	struct psc_i2s *psc_i2s;
-	struct bcom_task *bcom_task;
-	int irq;
-	struct snd_pcm_substream *stream;
-	dma_addr_t period_start;
-	dma_addr_t period_end;
-	dma_addr_t period_next_pt;
-	dma_addr_t period_current_pt;
-	int period_bytes;
-};
-
-/**
- * psc_i2s - Private driver data
- * @name: short name for this device ("PSC0", "PSC1", etc)
- * @psc_regs: pointer to the PSC's registers
- * @fifo_regs: pointer to the PSC's FIFO registers
- * @irq: IRQ of this PSC
- * @dev: struct device pointer
- * @dai: the CPU DAI for this device
- * @sicr: Base value used in serial interface control register; mode is ORed
- *        with this value.
- * @playback: Playback stream context data
- * @capture: Capture stream context data
- */
-struct psc_i2s {
-	char name[32];
-	struct mpc52xx_psc __iomem *psc_regs;
-	struct mpc52xx_psc_fifo __iomem *fifo_regs;
-	unsigned int irq;
-	struct device *dev;
-	struct snd_soc_dai dai;
-	spinlock_t lock;
-	u32 sicr;
-
-	/* per-stream data */
-	struct psc_i2s_stream playback;
-	struct psc_i2s_stream capture;
-
-	/* Statistics */
-	struct {
-		int overrun_count;
-		int underrun_count;
-	} stats;
-};
-
-/*
- * Interrupt handlers
- */
-static irqreturn_t psc_i2s_status_irq(int irq, void *_psc_i2s)
-{
-	struct psc_i2s *psc_i2s = _psc_i2s;
-	struct mpc52xx_psc __iomem *regs = psc_i2s->psc_regs;
-	u16 isr;
-
-	isr = in_be16(&regs->mpc52xx_psc_isr);
-
-	/* Playback underrun error */
-	if (psc_i2s->playback.active && (isr & MPC52xx_PSC_IMR_TXEMP))
-		psc_i2s->stats.underrun_count++;
-
-	/* Capture overrun error */
-	if (psc_i2s->capture.active && (isr & MPC52xx_PSC_IMR_ORERR))
-		psc_i2s->stats.overrun_count++;
-
-	out_8(&regs->command, 4 << 4);	/* reset the error status */
-
-	return IRQ_HANDLED;
-}
-
-/**
- * psc_i2s_bcom_enqueue_next_buffer - Enqueue another audio buffer
- * @s: pointer to stream private data structure
- *
- * Enqueues another audio period buffer into the bestcomm queue.
- *
- * Note: The routine must only be called when there is space available in
- * the queue.  Otherwise the enqueue will fail and the audio ring buffer
- * will get out of sync
- */
-static void psc_i2s_bcom_enqueue_next_buffer(struct psc_i2s_stream *s)
-{
-	struct bcom_bd *bd;
-
-	/* Prepare and enqueue the next buffer descriptor */
-	bd = bcom_prepare_next_buffer(s->bcom_task);
-	bd->status = s->period_bytes;
-	bd->data[0] = s->period_next_pt;
-	bcom_submit_next_buffer(s->bcom_task, NULL);
-
-	/* Update for next period */
-	s->period_next_pt += s->period_bytes;
-	if (s->period_next_pt >= s->period_end)
-		s->period_next_pt = s->period_start;
-}
-
-/* Bestcomm DMA irq handler */
-static irqreturn_t psc_i2s_bcom_irq(int irq, void *_psc_i2s_stream)
-{
-	struct psc_i2s_stream *s = _psc_i2s_stream;
-
-	/* For each finished period, dequeue the completed period buffer
-	 * and enqueue a new one in it's place. */
-	while (bcom_buffer_done(s->bcom_task)) {
-		bcom_retrieve_buffer(s->bcom_task, NULL, NULL);
-		s->period_current_pt += s->period_bytes;
-		if (s->period_current_pt >= s->period_end)
-			s->period_current_pt = s->period_start;
-		psc_i2s_bcom_enqueue_next_buffer(s);
-		bcom_enable(s->bcom_task);
-	}
-
-	/* If the stream is active, then also inform the PCM middle layer
-	 * of the period finished event. */
-	if (s->active)
-		snd_pcm_period_elapsed(s->stream);
-
-	return IRQ_HANDLED;
-}
-
-/**
- * psc_i2s_startup: create a new substream
- *
- * This is the first function called when a stream is opened.
- *
- * If this is the first stream open, then grab the IRQ and program most of
- * the PSC registers.
- */
-static int psc_i2s_startup(struct snd_pcm_substream *substream,
-			   struct snd_soc_dai *dai)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
-	int rc;
-
-	dev_dbg(psc_i2s->dev, "psc_i2s_startup(substream=%p)\n", substream);
-
-	if (!psc_i2s->playback.active &&
-	    !psc_i2s->capture.active) {
-		/* Setup the IRQs */
-		rc = request_irq(psc_i2s->irq, &psc_i2s_status_irq, IRQF_SHARED,
-				 "psc-i2s-status", psc_i2s);
-		rc |= request_irq(psc_i2s->capture.irq,
-				  &psc_i2s_bcom_irq, IRQF_SHARED,
-				  "psc-i2s-capture", &psc_i2s->capture);
-		rc |= request_irq(psc_i2s->playback.irq,
-				  &psc_i2s_bcom_irq, IRQF_SHARED,
-				  "psc-i2s-playback", &psc_i2s->playback);
-		if (rc) {
-			free_irq(psc_i2s->irq, psc_i2s);
-			free_irq(psc_i2s->capture.irq,
-				 &psc_i2s->capture);
-			free_irq(psc_i2s->playback.irq,
-				 &psc_i2s->playback);
-			return -ENODEV;
-		}
-	}
-
-	return 0;
-}
+			 SNDRV_PCM_FMTBIT_S24_BE | SNDRV_PCM_FMTBIT_S32_BE)
 
 static int psc_i2s_hw_params(struct snd_pcm_substream *substream,
 				 struct snd_pcm_hw_params *params,
 				 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
+	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
 	u32 mode;
 
-	dev_dbg(psc_i2s->dev, "%s(substream=%p) p_size=%i p_bytes=%i"
+	dev_dbg(psc_dma->dev, "%s(substream=%p) p_size=%i p_bytes=%i"
 		" periods=%i buffer_size=%i  buffer_bytes=%i\n",
 		__func__, substream, params_period_size(params),
 		params_period_bytes(params), params_periods(params),
@@ -248,174 +64,14 @@
 		mode = MPC52xx_PSC_SICR_SIM_CODEC_32;
 		break;
 	default:
-		dev_dbg(psc_i2s->dev, "invalid format\n");
+		dev_dbg(psc_dma->dev, "invalid format\n");
 		return -EINVAL;
 	}
-	out_be32(&psc_i2s->psc_regs->sicr, psc_i2s->sicr | mode);
-
-	snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer);
+	out_be32(&psc_dma->psc_regs->sicr, psc_dma->sicr | mode);
 
 	return 0;
 }
 
-static int psc_i2s_hw_free(struct snd_pcm_substream *substream,
-			   struct snd_soc_dai *dai)
-{
-	snd_pcm_set_runtime_buffer(substream, NULL);
-	return 0;
-}
-
-/**
- * psc_i2s_trigger: start and stop the DMA transfer.
- *
- * This function is called by ALSA to start, stop, pause, and resume the DMA
- * transfer of data.
- */
-static int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
-			   struct snd_soc_dai *dai)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
-	struct snd_pcm_runtime *runtime = substream->runtime;
-	struct psc_i2s_stream *s;
-	struct mpc52xx_psc __iomem *regs = psc_i2s->psc_regs;
-	u16 imr;
-	u8 psc_cmd;
-	unsigned long flags;
-
-	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
-		s = &psc_i2s->capture;
-	else
-		s = &psc_i2s->playback;
-
-	dev_dbg(psc_i2s->dev, "psc_i2s_trigger(substream=%p, cmd=%i)"
-		" stream_id=%i\n",
-		substream, cmd, substream->pstr->stream);
-
-	switch (cmd) {
-	case SNDRV_PCM_TRIGGER_START:
-		s->period_bytes = frames_to_bytes(runtime,
-						  runtime->period_size);
-		s->period_start = virt_to_phys(runtime->dma_area);
-		s->period_end = s->period_start +
-				(s->period_bytes * runtime->periods);
-		s->period_next_pt = s->period_start;
-		s->period_current_pt = s->period_start;
-		s->active = 1;
-
-		/* First; reset everything */
-		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE) {
-			out_8(&regs->command, MPC52xx_PSC_RST_RX);
-			out_8(&regs->command, MPC52xx_PSC_RST_ERR_STAT);
-		} else {
-			out_8(&regs->command, MPC52xx_PSC_RST_TX);
-			out_8(&regs->command, MPC52xx_PSC_RST_ERR_STAT);
-		}
-
-		/* Next, fill up the bestcomm bd queue and enable DMA.
-		 * This will begin filling the PSC's fifo. */
-		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
-			bcom_gen_bd_rx_reset(s->bcom_task);
-		else
-			bcom_gen_bd_tx_reset(s->bcom_task);
-		while (!bcom_queue_full(s->bcom_task))
-			psc_i2s_bcom_enqueue_next_buffer(s);
-		bcom_enable(s->bcom_task);
-
-		/* Due to errata in the i2s mode; need to line up enabling
-		 * the transmitter with a transition on the frame sync
-		 * line */
-
-		spin_lock_irqsave(&psc_i2s->lock, flags);
-		/* first make sure it is low */
-		while ((in_8(&regs->ipcr_acr.ipcr) & 0x80) != 0)
-			;
-		/* then wait for the transition to high */
-		while ((in_8(&regs->ipcr_acr.ipcr) & 0x80) == 0)
-			;
-		/* Finally, enable the PSC.
-		 * Receiver must always be enabled; even when we only want
-		 * transmit.  (see 15.3.2.3 of MPC5200B User's Guide) */
-		psc_cmd = MPC52xx_PSC_RX_ENABLE;
-		if (substream->pstr->stream == SNDRV_PCM_STREAM_PLAYBACK)
-			psc_cmd |= MPC52xx_PSC_TX_ENABLE;
-		out_8(&regs->command, psc_cmd);
-		spin_unlock_irqrestore(&psc_i2s->lock, flags);
-
-		break;
-
-	case SNDRV_PCM_TRIGGER_STOP:
-		/* Turn off the PSC */
-		s->active = 0;
-		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE) {
-			if (!psc_i2s->playback.active) {
-				out_8(&regs->command, 2 << 4);	/* reset rx */
-				out_8(&regs->command, 3 << 4);	/* reset tx */
-				out_8(&regs->command, 4 << 4);	/* reset err */
-			}
-		} else {
-			out_8(&regs->command, 3 << 4);	/* reset tx */
-			out_8(&regs->command, 4 << 4);	/* reset err */
-			if (!psc_i2s->capture.active)
-				out_8(&regs->command, 2 << 4);	/* reset rx */
-		}
-
-		bcom_disable(s->bcom_task);
-		while (!bcom_queue_empty(s->bcom_task))
-			bcom_retrieve_buffer(s->bcom_task, NULL, NULL);
-
-		break;
-
-	default:
-		dev_dbg(psc_i2s->dev, "invalid command\n");
-		return -EINVAL;
-	}
-
-	/* Update interrupt enable settings */
-	imr = 0;
-	if (psc_i2s->playback.active)
-		imr |= MPC52xx_PSC_IMR_TXEMP;
-	if (psc_i2s->capture.active)
-		imr |= MPC52xx_PSC_IMR_ORERR;
-	out_be16(&regs->isr_imr.imr, imr);
-
-	return 0;
-}
-
-/**
- * psc_i2s_shutdown: shutdown the data transfer on a stream
- *
- * Shutdown the PSC if there are no other substreams open.
- */
-static void psc_i2s_shutdown(struct snd_pcm_substream *substream,
-			     struct snd_soc_dai *dai)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
-
-	dev_dbg(psc_i2s->dev, "psc_i2s_shutdown(substream=%p)\n", substream);
-
-	/*
-	 * If this is the last active substream, disable the PSC and release
-	 * the IRQ.
-	 */
-	if (!psc_i2s->playback.active &&
-	    !psc_i2s->capture.active) {
-
-		/* Disable all interrupts and reset the PSC */
-		out_be16(&psc_i2s->psc_regs->isr_imr.imr, 0);
-		out_8(&psc_i2s->psc_regs->command, 3 << 4); /* reset tx */
-		out_8(&psc_i2s->psc_regs->command, 2 << 4); /* reset rx */
-		out_8(&psc_i2s->psc_regs->command, 1 << 4); /* reset mode */
-		out_8(&psc_i2s->psc_regs->command, 4 << 4); /* reset error */
-
-		/* Release irqs */
-		free_irq(psc_i2s->irq, psc_i2s);
-		free_irq(psc_i2s->capture.irq, &psc_i2s->capture);
-		free_irq(psc_i2s->playback.irq, &psc_i2s->playback);
-	}
-}
-
 /**
  * psc_i2s_set_sysclk: set the clock frequency and direction
  *
@@ -433,8 +89,8 @@
 static int psc_i2s_set_sysclk(struct snd_soc_dai *cpu_dai,
 			      int clk_id, unsigned int freq, int dir)
 {
-	struct psc_i2s *psc_i2s = cpu_dai->private_data;
-	dev_dbg(psc_i2s->dev, "psc_i2s_set_sysclk(cpu_dai=%p, dir=%i)\n",
+	struct psc_dma *psc_dma = cpu_dai->private_data;
+	dev_dbg(psc_dma->dev, "psc_i2s_set_sysclk(cpu_dai=%p, dir=%i)\n",
 				cpu_dai, dir);
 	return (dir == SND_SOC_CLOCK_IN) ? 0 : -EINVAL;
 }
@@ -452,8 +108,8 @@
  */
 static int psc_i2s_set_fmt(struct snd_soc_dai *cpu_dai, unsigned int format)
 {
-	struct psc_i2s *psc_i2s = cpu_dai->private_data;
-	dev_dbg(psc_i2s->dev, "psc_i2s_set_fmt(cpu_dai=%p, format=%i)\n",
+	struct psc_dma *psc_dma = cpu_dai->private_data;
+	dev_dbg(psc_dma->dev, "psc_i2s_set_fmt(cpu_dai=%p, format=%i)\n",
 				cpu_dai, format);
 	return (format == SND_SOC_DAIFMT_I2S) ? 0 : -EINVAL;
 }
@@ -469,16 +125,13 @@
  * psc_i2s_dai_template: template CPU Digital Audio Interface
  */
 static struct snd_soc_dai_ops psc_i2s_dai_ops = {
-	.startup	= psc_i2s_startup,
 	.hw_params	= psc_i2s_hw_params,
-	.hw_free	= psc_i2s_hw_free,
-	.shutdown	= psc_i2s_shutdown,
-	.trigger	= psc_i2s_trigger,
 	.set_sysclk	= psc_i2s_set_sysclk,
 	.set_fmt	= psc_i2s_set_fmt,
 };
 
-static struct snd_soc_dai psc_i2s_dai_template = {
+struct snd_soc_dai psc_i2s_dai[] = {{
+	.name   = "I2S",
 	.playback = {
 		.channels_min = 2,
 		.channels_max = 2,
@@ -492,223 +145,8 @@
 		.formats = PSC_I2S_FORMATS,
 	},
 	.ops = &psc_i2s_dai_ops,
-};
-
-/* ---------------------------------------------------------------------
- * The PSC I2S 'ASoC platform' driver
- *
- * Can be referenced by an 'ASoC machine' driver
- * This driver only deals with the audio bus; it doesn't have any
- * interaction with the attached codec
- */
-
-static const struct snd_pcm_hardware psc_i2s_pcm_hardware = {
-	.info = SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID |
-		SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER |
-		SNDRV_PCM_INFO_BATCH,
-	.formats = SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_BE |
-		   SNDRV_PCM_FMTBIT_S24_BE | SNDRV_PCM_FMTBIT_S32_BE,
-	.rate_min = 8000,
-	.rate_max = 48000,
-	.channels_min = 2,
-	.channels_max = 2,
-	.period_bytes_max	= 1024 * 1024,
-	.period_bytes_min	= 32,
-	.periods_min		= 2,
-	.periods_max		= 256,
-	.buffer_bytes_max	= 2 * 1024 * 1024,
-	.fifo_size		= 0,
-};
-
-static int psc_i2s_pcm_open(struct snd_pcm_substream *substream)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
-	struct psc_i2s_stream *s;
-
-	dev_dbg(psc_i2s->dev, "psc_i2s_pcm_open(substream=%p)\n", substream);
-
-	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
-		s = &psc_i2s->capture;
-	else
-		s = &psc_i2s->playback;
-
-	snd_soc_set_runtime_hwparams(substream, &psc_i2s_pcm_hardware);
-
-	s->stream = substream;
-	return 0;
-}
-
-static int psc_i2s_pcm_close(struct snd_pcm_substream *substream)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
-	struct psc_i2s_stream *s;
-
-	dev_dbg(psc_i2s->dev, "psc_i2s_pcm_close(substream=%p)\n", substream);
-
-	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
-		s = &psc_i2s->capture;
-	else
-		s = &psc_i2s->playback;
-
-	s->stream = NULL;
-	return 0;
-}
-
-static snd_pcm_uframes_t
-psc_i2s_pcm_pointer(struct snd_pcm_substream *substream)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
-	struct psc_i2s_stream *s;
-	dma_addr_t count;
-
-	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
-		s = &psc_i2s->capture;
-	else
-		s = &psc_i2s->playback;
-
-	count = s->period_current_pt - s->period_start;
-
-	return bytes_to_frames(substream->runtime, count);
-}
-
-static struct snd_pcm_ops psc_i2s_pcm_ops = {
-	.open		= psc_i2s_pcm_open,
-	.close		= psc_i2s_pcm_close,
-	.ioctl		= snd_pcm_lib_ioctl,
-	.pointer	= psc_i2s_pcm_pointer,
-};
-
-static u64 psc_i2s_pcm_dmamask = 0xffffffff;
-static int psc_i2s_pcm_new(struct snd_card *card, struct snd_soc_dai *dai,
-			   struct snd_pcm *pcm)
-{
-	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
-	size_t size = psc_i2s_pcm_hardware.buffer_bytes_max;
-	int rc = 0;
-
-	dev_dbg(rtd->socdev->dev, "psc_i2s_pcm_new(card=%p, dai=%p, pcm=%p)\n",
-		card, dai, pcm);
-
-	if (!card->dev->dma_mask)
-		card->dev->dma_mask = &psc_i2s_pcm_dmamask;
-	if (!card->dev->coherent_dma_mask)
-		card->dev->coherent_dma_mask = 0xffffffff;
-
-	if (pcm->streams[0].substream) {
-		rc = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, pcm->dev, size,
-					&pcm->streams[0].substream->dma_buffer);
-		if (rc)
-			goto playback_alloc_err;
-	}
-
-	if (pcm->streams[1].substream) {
-		rc = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, pcm->dev, size,
-					&pcm->streams[1].substream->dma_buffer);
-		if (rc)
-			goto capture_alloc_err;
-	}
-
-	return 0;
-
- capture_alloc_err:
-	if (pcm->streams[0].substream)
-		snd_dma_free_pages(&pcm->streams[0].substream->dma_buffer);
- playback_alloc_err:
-	dev_err(card->dev, "Cannot allocate buffer(s)\n");
-	return -ENOMEM;
-}
-
-static void psc_i2s_pcm_free(struct snd_pcm *pcm)
-{
-	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
-	struct snd_pcm_substream *substream;
-	int stream;
-
-	dev_dbg(rtd->socdev->dev, "psc_i2s_pcm_free(pcm=%p)\n", pcm);
-
-	for (stream = 0; stream < 2; stream++) {
-		substream = pcm->streams[stream].substream;
-		if (substream) {
-			snd_dma_free_pages(&substream->dma_buffer);
-			substream->dma_buffer.area = NULL;
-			substream->dma_buffer.addr = 0;
-		}
-	}
-}
-
-struct snd_soc_platform psc_i2s_pcm_soc_platform = {
-	.name		= "mpc5200-psc-audio",
-	.pcm_ops	= &psc_i2s_pcm_ops,
-	.pcm_new	= &psc_i2s_pcm_new,
-	.pcm_free	= &psc_i2s_pcm_free,
-};
-
-/* ---------------------------------------------------------------------
- * Sysfs attributes for debugging
- */
-
-static ssize_t psc_i2s_status_show(struct device *dev,
-			   struct device_attribute *attr, char *buf)
-{
-	struct psc_i2s *psc_i2s = dev_get_drvdata(dev);
-
-	return sprintf(buf, "status=%.4x sicr=%.8x rfnum=%i rfstat=0x%.4x "
-			"tfnum=%i tfstat=0x%.4x\n",
-			in_be16(&psc_i2s->psc_regs->sr_csr.status),
-			in_be32(&psc_i2s->psc_regs->sicr),
-			in_be16(&psc_i2s->fifo_regs->rfnum) & 0x1ff,
-			in_be16(&psc_i2s->fifo_regs->rfstat),
-			in_be16(&psc_i2s->fifo_regs->tfnum) & 0x1ff,
-			in_be16(&psc_i2s->fifo_regs->tfstat));
-}
-
-static int *psc_i2s_get_stat_attr(struct psc_i2s *psc_i2s, const char *name)
-{
-	if (strcmp(name, "playback_underrun") == 0)
-		return &psc_i2s->stats.underrun_count;
-	if (strcmp(name, "capture_overrun") == 0)
-		return &psc_i2s->stats.overrun_count;
-
-	return NULL;
-}
-
-static ssize_t psc_i2s_stat_show(struct device *dev,
-				 struct device_attribute *attr, char *buf)
-{
-	struct psc_i2s *psc_i2s = dev_get_drvdata(dev);
-	int *attrib;
-
-	attrib = psc_i2s_get_stat_attr(psc_i2s, attr->attr.name);
-	if (!attrib)
-		return 0;
-
-	return sprintf(buf, "%i\n", *attrib);
-}
-
-static ssize_t psc_i2s_stat_store(struct device *dev,
-				  struct device_attribute *attr,
-				  const char *buf,
-				  size_t count)
-{
-	struct psc_i2s *psc_i2s = dev_get_drvdata(dev);
-	int *attrib;
-
-	attrib = psc_i2s_get_stat_attr(psc_i2s, attr->attr.name);
-	if (!attrib)
-		return 0;
-
-	*attrib = simple_strtoul(buf, NULL, 0);
-	return count;
-}
-
-static DEVICE_ATTR(status, 0644, psc_i2s_status_show, NULL);
-static DEVICE_ATTR(playback_underrun, 0644, psc_i2s_stat_show,
-			psc_i2s_stat_store);
-static DEVICE_ATTR(capture_overrun, 0644, psc_i2s_stat_show,
-			psc_i2s_stat_store);
+} };
+EXPORT_SYMBOL_GPL(psc_i2s_dai);
 
 /* ---------------------------------------------------------------------
  * OF platform bus binding code:
@@ -718,150 +156,65 @@
 static int __devinit psc_i2s_of_probe(struct of_device *op,
 				      const struct of_device_id *match)
 {
-	phys_addr_t fifo;
-	struct psc_i2s *psc_i2s;
-	struct resource res;
-	int size, psc_id, irq, rc;
-	const __be32 *prop;
-	void __iomem *regs;
+	int rc;
+	struct psc_dma *psc_dma;
+	struct mpc52xx_psc __iomem *regs;
 
-	dev_dbg(&op->dev, "probing psc i2s device\n");
+	rc = mpc5200_audio_dma_create(op);
+	if (rc != 0)
+		return rc;
 
-	/* Get the PSC ID */
-	prop = of_get_property(op->node, "cell-index", &size);
-	if (!prop || size < sizeof *prop)
-		return -ENODEV;
-	psc_id = be32_to_cpu(*prop);
-
-	/* Fetch the registers and IRQ of the PSC */
-	irq = irq_of_parse_and_map(op->node, 0);
-	if (of_address_to_resource(op->node, 0, &res)) {
-		dev_err(&op->dev, "Missing reg property\n");
-		return -ENODEV;
-	}
-	regs = ioremap(res.start, 1 + res.end - res.start);
-	if (!regs) {
-		dev_err(&op->dev, "Could not map registers\n");
-		return -ENODEV;
+	rc = snd_soc_register_dais(psc_i2s_dai, ARRAY_SIZE(psc_i2s_dai));
+	if (rc != 0) {
+		pr_err("Failed to register DAI\n");
+		return 0;
 	}
 
-	/* Allocate and initialize the driver private data */
-	psc_i2s = kzalloc(sizeof *psc_i2s, GFP_KERNEL);
-	if (!psc_i2s) {
-		iounmap(regs);
-		return -ENOMEM;
-	}
-	spin_lock_init(&psc_i2s->lock);
-	psc_i2s->irq = irq;
-	psc_i2s->psc_regs = regs;
-	psc_i2s->fifo_regs = regs + sizeof *psc_i2s->psc_regs;
-	psc_i2s->dev = &op->dev;
-	psc_i2s->playback.psc_i2s = psc_i2s;
-	psc_i2s->capture.psc_i2s = psc_i2s;
-	snprintf(psc_i2s->name, sizeof psc_i2s->name, "PSC%u", psc_id+1);
-
-	/* Fill out the CPU DAI structure */
-	memcpy(&psc_i2s->dai, &psc_i2s_dai_template, sizeof psc_i2s->dai);
-	psc_i2s->dai.private_data = psc_i2s;
-	psc_i2s->dai.name = psc_i2s->name;
-	psc_i2s->dai.id = psc_id;
-
-	/* Find the address of the fifo data registers and setup the
-	 * DMA tasks */
-	fifo = res.start + offsetof(struct mpc52xx_psc, buffer.buffer_32);
-	psc_i2s->capture.bcom_task =
-		bcom_psc_gen_bd_rx_init(psc_id, 10, fifo, 512);
-	psc_i2s->playback.bcom_task =
-		bcom_psc_gen_bd_tx_init(psc_id, 10, fifo);
-	if (!psc_i2s->capture.bcom_task ||
-	    !psc_i2s->playback.bcom_task) {
-		dev_err(&op->dev, "Could not allocate bestcomm tasks\n");
-		iounmap(regs);
-		kfree(psc_i2s);
-		return -ENODEV;
-	}
-
-	/* Disable all interrupts and reset the PSC */
-	out_be16(&psc_i2s->psc_regs->isr_imr.imr, 0);
-	out_8(&psc_i2s->psc_regs->command, 3 << 4); /* reset transmitter */
-	out_8(&psc_i2s->psc_regs->command, 2 << 4); /* reset receiver */
-	out_8(&psc_i2s->psc_regs->command, 1 << 4); /* reset mode */
-	out_8(&psc_i2s->psc_regs->command, 4 << 4); /* reset error */
+	psc_dma = dev_get_drvdata(&op->dev);
+	regs = psc_dma->psc_regs;
 
 	/* Configure the serial interface mode; defaulting to CODEC8 mode */
-	psc_i2s->sicr = MPC52xx_PSC_SICR_DTS1 | MPC52xx_PSC_SICR_I2S |
+	psc_dma->sicr = MPC52xx_PSC_SICR_DTS1 | MPC52xx_PSC_SICR_I2S |
 			MPC52xx_PSC_SICR_CLKPOL;
-	if (of_get_property(op->node, "fsl,cellslave", NULL))
-		psc_i2s->sicr |= MPC52xx_PSC_SICR_CELLSLAVE |
-				 MPC52xx_PSC_SICR_GENCLK;
-	out_be32(&psc_i2s->psc_regs->sicr,
-		 psc_i2s->sicr | MPC52xx_PSC_SICR_SIM_CODEC_8);
+	out_be32(&psc_dma->psc_regs->sicr,
+		 psc_dma->sicr | MPC52xx_PSC_SICR_SIM_CODEC_8);
 
 	/* Check for the codec handle.  If it is not present then we
 	 * are done */
 	if (!of_get_property(op->node, "codec-handle", NULL))
 		return 0;
 
-	/* Set up mode register;
-	 * First write: RxRdy (FIFO Alarm) generates rx FIFO irq
-	 * Second write: register Normal mode for non loopback
-	 */
-	out_8(&psc_i2s->psc_regs->mode, 0);
-	out_8(&psc_i2s->psc_regs->mode, 0);
+	/* Due to errata in the dma mode; need to line up enabling
+	 * the transmitter with a transition on the frame sync
+	 * line */
 
-	/* Set the TX and RX fifo alarm thresholds */
-	out_be16(&psc_i2s->fifo_regs->rfalarm, 0x100);
-	out_8(&psc_i2s->fifo_regs->rfcntl, 0x4);
-	out_be16(&psc_i2s->fifo_regs->tfalarm, 0x100);
-	out_8(&psc_i2s->fifo_regs->tfcntl, 0x7);
+	/* first make sure it is low */
+	while ((in_8(&regs->ipcr_acr.ipcr) & 0x80) != 0)
+		;
+	/* then wait for the transition to high */
+	while ((in_8(&regs->ipcr_acr.ipcr) & 0x80) == 0)
+		;
+	/* Finally, enable the PSC.
+	 * Receiver must always be enabled; even when we only want
+	 * transmit.  (see 15.3.2.3 of MPC5200B User's Guide) */
 
-	/* Lookup the IRQ numbers */
-	psc_i2s->playback.irq =
-		bcom_get_task_irq(psc_i2s->playback.bcom_task);
-	psc_i2s->capture.irq =
-		bcom_get_task_irq(psc_i2s->capture.bcom_task);
-
-	/* Save what we've done so it can be found again later */
-	dev_set_drvdata(&op->dev, psc_i2s);
-
-	/* Register the SYSFS files */
-	rc = device_create_file(psc_i2s->dev, &dev_attr_status);
-	rc |= device_create_file(psc_i2s->dev, &dev_attr_capture_overrun);
-	rc |= device_create_file(psc_i2s->dev, &dev_attr_playback_underrun);
-	if (rc)
-		dev_info(psc_i2s->dev, "error creating sysfs files\n");
-
-	snd_soc_register_platform(&psc_i2s_pcm_soc_platform);
-
-	/* Tell the ASoC OF helpers about it */
-	of_snd_soc_register_platform(&psc_i2s_pcm_soc_platform, op->node,
-				     &psc_i2s->dai);
+	/* Go */
+	out_8(&psc_dma->psc_regs->command,
+			MPC52xx_PSC_TX_ENABLE | MPC52xx_PSC_RX_ENABLE);
 
 	return 0;
+
 }
 
 static int __devexit psc_i2s_of_remove(struct of_device *op)
 {
-	struct psc_i2s *psc_i2s = dev_get_drvdata(&op->dev);
-
-	dev_dbg(&op->dev, "psc_i2s_remove()\n");
-
-	snd_soc_unregister_platform(&psc_i2s_pcm_soc_platform);
-
-	bcom_gen_bd_rx_release(psc_i2s->capture.bcom_task);
-	bcom_gen_bd_tx_release(psc_i2s->playback.bcom_task);
-
-	iounmap(psc_i2s->psc_regs);
-	iounmap(psc_i2s->fifo_regs);
-	kfree(psc_i2s);
-	dev_set_drvdata(&op->dev, NULL);
-
-	return 0;
+	return mpc5200_audio_dma_destroy(op);
 }
 
 /* Match table for of_platform binding */
 static struct of_device_id psc_i2s_match[] __devinitdata = {
 	{ .compatible = "fsl,mpc5200-psc-i2s", },
+	{ .compatible = "fsl,mpc5200b-psc-i2s", },
 	{}
 };
 MODULE_DEVICE_TABLE(of, psc_i2s_match);
@@ -892,4 +245,7 @@
 }
 module_exit(psc_i2s_exit);
 
+MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
+MODULE_DESCRIPTION("Freescale MPC5200 PSC in I2S mode ASoC Driver");
+MODULE_LICENSE("GPL");
 
diff --git a/sound/soc/fsl/mpc5200_psc_i2s.h b/sound/soc/fsl/mpc5200_psc_i2s.h
new file mode 100644
index 0000000..ce55e07
--- /dev/null
+++ b/sound/soc/fsl/mpc5200_psc_i2s.h
@@ -0,0 +1,12 @@
+/*
+ * Freescale MPC5200 PSC in I2S mode
+ * ALSA SoC Digital Audio Interface (DAI) driver
+ *
+ */
+
+#ifndef __SOUND_SOC_FSL_MPC52xx_PSC_I2S_H__
+#define __SOUND_SOC_FSL_MPC52xx_PSC_I2S_H__
+
+extern struct snd_soc_dai psc_i2s_dai[];
+
+#endif /* __SOUND_SOC_FSL_MPC52xx_PSC_I2S_H__ */
diff --git a/sound/soc/fsl/pcm030-audio-fabric.c b/sound/soc/fsl/pcm030-audio-fabric.c
new file mode 100644
index 0000000..8766f7a
--- /dev/null
+++ b/sound/soc/fsl/pcm030-audio-fabric.c
@@ -0,0 +1,90 @@
+/*
+ * Phytec pcm030 driver for the PSC of the Freescale MPC52xx
+ * configured as AC97 interface
+ *
+ * Copyright 2008 Jon Smirl, Digispeaker
+ * Author: Jon Smirl <jonsmirl@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/dma-mapping.h>
+
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/initval.h>
+#include <sound/soc.h>
+#include <sound/soc-of-simple.h>
+
+#include "mpc5200_dma.h"
+#include "mpc5200_psc_ac97.h"
+#include "../codecs/wm9712.h"
+
+static struct snd_soc_device device;
+static struct snd_soc_card card;
+
+static struct snd_soc_dai_link pcm030_fabric_dai[] = {
+{
+	.name = "AC97",
+	.stream_name = "AC97 Analog",
+	.codec_dai = &wm9712_dai[WM9712_DAI_AC97_HIFI],
+	.cpu_dai = &psc_ac97_dai[MPC5200_AC97_NORMAL],
+},
+{
+	.name = "AC97",
+	.stream_name = "AC97 IEC958",
+	.codec_dai = &wm9712_dai[WM9712_DAI_AC97_AUX],
+	.cpu_dai = &psc_ac97_dai[MPC5200_AC97_SPDIF],
+},
+};
+
+static __init int pcm030_fabric_init(void)
+{
+	struct platform_device *pdev;
+	int rc;
+
+	if (!machine_is_compatible("phytec,pcm030"))
+		return -ENODEV;
+
+	card.platform = &mpc5200_audio_dma_platform;
+	card.name = "pcm030";
+	card.dai_link = pcm030_fabric_dai;
+	card.num_links = ARRAY_SIZE(pcm030_fabric_dai);
+
+	device.card = &card;
+	device.codec_dev = &soc_codec_dev_wm9712;
+
+	pdev = platform_device_alloc("soc-audio", 1);
+	if (!pdev) {
+		pr_err("pcm030_fabric_init: platform_device_alloc() failed\n");
+		return -ENODEV;
+	}
+
+	platform_set_drvdata(pdev, &device);
+	device.dev = &pdev->dev;
+
+	rc = platform_device_add(pdev);
+	if (rc) {
+		pr_err("pcm030_fabric_init: platform_device_add() failed\n");
+		return -ENODEV;
+	}
+	return 0;
+}
+
+module_init(pcm030_fabric_init);
+
+
+MODULE_AUTHOR("Jon Smirl <jonsmirl@gmail.com>");
+MODULE_DESCRIPTION(DRV_NAME ": mpc5200 pcm030 fabric driver");
+MODULE_LICENSE("GPL");
+
diff --git a/sound/soc/omap/Kconfig b/sound/soc/omap/Kconfig
index 675732e..b771238 100644
--- a/sound/soc/omap/Kconfig
+++ b/sound/soc/omap/Kconfig
@@ -39,6 +39,14 @@
 	help
 	  Say Y if you want to add support for SoC audio on the omap2evm board.
 
+config SND_OMAP_SOC_OMAP3EVM
+	tristate "SoC Audio support for OMAP3EVM board"
+	depends on TWL4030_CORE && SND_OMAP_SOC && MACH_OMAP3EVM
+	select SND_OMAP_SOC_MCBSP
+	select SND_SOC_TWL4030
+	help
+	  Say Y if you want to add support for SoC audio on the omap3evm board.
+
 config SND_OMAP_SOC_SDP3430
 	tristate "SoC Audio support for Texas Instruments SDP3430"
 	depends on TWL4030_CORE && SND_OMAP_SOC && MACH_OMAP_3430SDP
diff --git a/sound/soc/omap/Makefile b/sound/soc/omap/Makefile
index 0c9e4ac..a37f498 100644
--- a/sound/soc/omap/Makefile
+++ b/sound/soc/omap/Makefile
@@ -10,6 +10,7 @@
 snd-soc-osk5912-objs := osk5912.o
 snd-soc-overo-objs := overo.o
 snd-soc-omap2evm-objs := omap2evm.o
+snd-soc-omap3evm-objs := omap3evm.o
 snd-soc-sdp3430-objs := sdp3430.o
 snd-soc-omap3pandora-objs := omap3pandora.o
 snd-soc-omap3beagle-objs := omap3beagle.o
@@ -18,6 +19,7 @@
 obj-$(CONFIG_SND_OMAP_SOC_OSK5912) += snd-soc-osk5912.o
 obj-$(CONFIG_SND_OMAP_SOC_OVERO) += snd-soc-overo.o
 obj-$(CONFIG_MACH_OMAP2EVM) += snd-soc-omap2evm.o
+obj-$(CONFIG_MACH_OMAP3EVM) += snd-soc-omap3evm.o
 obj-$(CONFIG_SND_OMAP_SOC_SDP3430) += snd-soc-sdp3430.o
 obj-$(CONFIG_SND_OMAP_SOC_OMAP3_PANDORA) += snd-soc-omap3pandora.o
 obj-$(CONFIG_SND_OMAP_SOC_OMAP3_BEAGLE) += snd-soc-omap3beagle.o
diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c
index 91ef179..b60b1df 100644
--- a/sound/soc/omap/n810.c
+++ b/sound/soc/omap/n810.c
@@ -383,10 +383,9 @@
 	clk_set_parent(sys_clkout2_src, func96m_clk);
 	clk_set_rate(sys_clkout2, 12000000);
 
-	if (gpio_request(N810_HEADSET_AMP_GPIO, "hs_amp") < 0)
-		BUG();
-	if (gpio_request(N810_SPEAKER_AMP_GPIO, "spk_amp") < 0)
-		BUG();
+	BUG_ON((gpio_request(N810_HEADSET_AMP_GPIO, "hs_amp") < 0) ||
+	       (gpio_request(N810_SPEAKER_AMP_GPIO, "spk_amp") < 0));
+
 	gpio_direction_output(N810_HEADSET_AMP_GPIO, 0);
 	gpio_direction_output(N810_SPEAKER_AMP_GPIO, 0);
 
diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c
index 9126142..a5d46a7 100644
--- a/sound/soc/omap/omap-mcbsp.c
+++ b/sound/soc/omap/omap-mcbsp.c
@@ -215,8 +215,9 @@
 	struct omap_mcbsp_data *mcbsp_data = to_mcbsp(cpu_dai->private_data);
 	struct omap_mcbsp_reg_cfg *regs = &mcbsp_data->regs;
 	int dma, bus_id = mcbsp_data->bus_id, id = cpu_dai->id;
-	int wlen, channels;
+	int wlen, channels, wpf;
 	unsigned long port;
+	unsigned int format;
 
 	if (cpu_class_is_omap1()) {
 		dma = omap1_dma_reqs[bus_id][substream->stream];
@@ -244,18 +245,24 @@
 		return 0;
 	}
 
-	channels = params_channels(params);
+	format = mcbsp_data->fmt & SND_SOC_DAIFMT_FORMAT_MASK;
+	wpf = channels = params_channels(params);
 	switch (channels) {
 	case 2:
-		/* Use dual-phase frames */
-		regs->rcr2	|= RPHASE;
-		regs->xcr2	|= XPHASE;
+		if (format == SND_SOC_DAIFMT_I2S) {
+			/* Use dual-phase frames */
+			regs->rcr2	|= RPHASE;
+			regs->xcr2	|= XPHASE;
+			/* Set 1 word per (McBSP) frame for phase1 and phase2 */
+			wpf--;
+			regs->rcr2	|= RFRLEN2(wpf - 1);
+			regs->xcr2	|= XFRLEN2(wpf - 1);
+		}
 	case 1:
-		/* Set 1 word per (McBSP) frame */
-		regs->rcr2	|= RFRLEN2(1 - 1);
-		regs->rcr1	|= RFRLEN1(1 - 1);
-		regs->xcr2	|= XFRLEN2(1 - 1);
-		regs->xcr1	|= XFRLEN1(1 - 1);
+	case 4:
+		/* Set word per (McBSP) frame for phase1 */
+		regs->rcr1	|= RFRLEN1(wpf - 1);
+		regs->xcr1	|= XFRLEN1(wpf - 1);
 		break;
 	default:
 		/* Unsupported number of channels */
@@ -277,11 +284,12 @@
 	}
 
 	/* Set FS period and length in terms of bit clock periods */
-	switch (mcbsp_data->fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	switch (format) {
 	case SND_SOC_DAIFMT_I2S:
-		regs->srgr2	|= FPER(wlen * 2 - 1);
+		regs->srgr2	|= FPER(wlen * channels - 1);
 		regs->srgr1	|= FWID(wlen - 1);
 		break;
+	case SND_SOC_DAIFMT_DSP_A:
 	case SND_SOC_DAIFMT_DSP_B:
 		regs->srgr2	|= FPER(wlen * channels - 1);
 		regs->srgr1	|= FWID(0);
@@ -326,6 +334,13 @@
 		regs->rcr2	|= RDATDLY(1);
 		regs->xcr2	|= XDATDLY(1);
 		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		/* 1-bit data delay */
+		regs->rcr2      |= RDATDLY(1);
+		regs->xcr2      |= XDATDLY(1);
+		/* Invert FS polarity configuration */
+		temp_fmt ^= SND_SOC_DAIFMT_NB_IF;
+		break;
 	case SND_SOC_DAIFMT_DSP_B:
 		/* 0-bit data delay */
 		regs->rcr2      |= RDATDLY(0);
@@ -492,13 +507,13 @@
 	.id = (link_id),					\
 	.playback = {						\
 		.channels_min = 1,				\
-		.channels_max = 2,				\
+		.channels_max = 4,				\
 		.rates = OMAP_MCBSP_RATES,			\
 		.formats = SNDRV_PCM_FMTBIT_S16_LE,		\
 	},							\
 	.capture = {						\
 		.channels_min = 1,				\
-		.channels_max = 2,				\
+		.channels_max = 4,				\
 		.rates = OMAP_MCBSP_RATES,			\
 		.formats = SNDRV_PCM_FMTBIT_S16_LE,		\
 	},							\
diff --git a/sound/soc/omap/omap-pcm.c b/sound/soc/omap/omap-pcm.c
index 07cf7f4..6454e15 100644
--- a/sound/soc/omap/omap-pcm.c
+++ b/sound/soc/omap/omap-pcm.c
@@ -87,8 +87,10 @@
 	struct omap_pcm_dma_data *dma_data = rtd->dai->cpu_dai->dma_data;
 	int err = 0;
 
+	/* return if this is a bufferless transfer e.g.
+	 * codec <--> BT codec or GSM modem -- lg FIXME */
 	if (!dma_data)
-		return -ENODEV;
+		return 0;
 
 	snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer);
 	runtime->dma_bytes = params_buffer_bytes(params);
@@ -134,6 +136,11 @@
 	struct omap_pcm_dma_data *dma_data = prtd->dma_data;
 	struct omap_dma_channel_params dma_params;
 
+	/* return if this is a bufferless transfer e.g.
+	 * codec <--> BT codec or GSM modem -- lg FIXME */
+	if (!prtd->dma_data)
+		return 0;
+
 	memset(&dma_params, 0, sizeof(dma_params));
 	/*
 	 * Note: Regardless of interface data formats supported by OMAP McBSP
diff --git a/sound/soc/omap/omap2evm.c b/sound/soc/omap/omap2evm.c
index 0c2322d..027e1a4 100644
--- a/sound/soc/omap/omap2evm.c
+++ b/sound/soc/omap/omap2evm.c
@@ -86,7 +86,7 @@
 	.name = "TWL4030",
 	.stream_name = "TWL4030",
 	.cpu_dai = &omap_mcbsp_dai[0],
-	.codec_dai = &twl4030_dai,
+	.codec_dai = &twl4030_dai[TWL4030_DAI_HIFI],
 	.ops = &omap2evm_ops,
 };
 
diff --git a/sound/soc/omap/omap3beagle.c b/sound/soc/omap/omap3beagle.c
index fd24a4a..b0cff9f 100644
--- a/sound/soc/omap/omap3beagle.c
+++ b/sound/soc/omap/omap3beagle.c
@@ -41,23 +41,33 @@
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *codec_dai = rtd->dai->codec_dai;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
+	unsigned int fmt;
 	int ret;
 
+	switch (params_channels(params)) {
+	case 2: /* Stereo I2S mode */
+		fmt =	SND_SOC_DAIFMT_I2S |
+			SND_SOC_DAIFMT_NB_NF |
+			SND_SOC_DAIFMT_CBM_CFM;
+		break;
+	case 4: /* Four channel TDM mode */
+		fmt =	SND_SOC_DAIFMT_DSP_A |
+			SND_SOC_DAIFMT_IB_NF |
+			SND_SOC_DAIFMT_CBM_CFM;
+		break;
+	default:
+		return -EINVAL;
+	}
+
 	/* Set codec DAI configuration */
-	ret = snd_soc_dai_set_fmt(codec_dai,
-				  SND_SOC_DAIFMT_I2S |
-				  SND_SOC_DAIFMT_NB_NF |
-				  SND_SOC_DAIFMT_CBM_CFM);
+	ret = snd_soc_dai_set_fmt(codec_dai, fmt);
 	if (ret < 0) {
 		printk(KERN_ERR "can't set codec DAI configuration\n");
 		return ret;
 	}
 
 	/* Set cpu DAI configuration */
-	ret = snd_soc_dai_set_fmt(cpu_dai,
-				  SND_SOC_DAIFMT_I2S |
-				  SND_SOC_DAIFMT_NB_NF |
-				  SND_SOC_DAIFMT_CBM_CFM);
+	ret = snd_soc_dai_set_fmt(cpu_dai, fmt);
 	if (ret < 0) {
 		printk(KERN_ERR "can't set cpu DAI configuration\n");
 		return ret;
@@ -83,7 +93,7 @@
 	.name = "TWL4030",
 	.stream_name = "TWL4030",
 	.cpu_dai = &omap_mcbsp_dai[0],
-	.codec_dai = &twl4030_dai,
+	.codec_dai = &twl4030_dai[TWL4030_DAI_HIFI],
 	.ops = &omap3beagle_ops,
 };
 
diff --git a/sound/soc/omap/omap3evm.c b/sound/soc/omap/omap3evm.c
new file mode 100644
index 0000000..9114c26
--- /dev/null
+++ b/sound/soc/omap/omap3evm.c
@@ -0,0 +1,147 @@
+/*
+ * omap3evm.c  -- ALSA SoC support for OMAP3 EVM
+ *
+ * Author: Anuj Aggarwal <anuj.aggarwal@ti.com>
+ *
+ * Based on sound/soc/omap/beagle.c by Steve Sakoman
+ *
+ * Copyright (C) 2008 Texas Instruments, Incorporated
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
+ * whether express or implied; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+
+#include <asm/mach-types.h>
+#include <mach/hardware.h>
+#include <mach/gpio.h>
+#include <mach/mcbsp.h>
+
+#include "omap-mcbsp.h"
+#include "omap-pcm.h"
+#include "../codecs/twl4030.h"
+
+static int omap3evm_hw_params(struct snd_pcm_substream *substream,
+	struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_dai *codec_dai = rtd->dai->codec_dai;
+	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
+	int ret;
+
+	/* Set codec DAI configuration */
+	ret = snd_soc_dai_set_fmt(codec_dai,
+				  SND_SOC_DAIFMT_I2S |
+				  SND_SOC_DAIFMT_NB_NF |
+				  SND_SOC_DAIFMT_CBM_CFM);
+	if (ret < 0) {
+		printk(KERN_ERR "Can't set codec DAI configuration\n");
+		return ret;
+	}
+
+	/* Set cpu DAI configuration */
+	ret = snd_soc_dai_set_fmt(cpu_dai,
+				  SND_SOC_DAIFMT_I2S |
+				  SND_SOC_DAIFMT_NB_NF |
+				  SND_SOC_DAIFMT_CBM_CFM);
+	if (ret < 0) {
+		printk(KERN_ERR "Can't set cpu DAI configuration\n");
+		return ret;
+	}
+
+	/* Set the codec system clock for DAC and ADC */
+	ret = snd_soc_dai_set_sysclk(codec_dai, 0, 26000000,
+				     SND_SOC_CLOCK_IN);
+	if (ret < 0) {
+		printk(KERN_ERR "Can't set codec system clock\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static struct snd_soc_ops omap3evm_ops = {
+	.hw_params = omap3evm_hw_params,
+};
+
+/* Digital audio interface glue - connects codec <--> CPU */
+static struct snd_soc_dai_link omap3evm_dai = {
+	.name 		= "TWL4030",
+	.stream_name 	= "TWL4030",
+	.cpu_dai 	= &omap_mcbsp_dai[0],
+	.codec_dai 	= &twl4030_dai[TWL4030_DAI_HIFI],
+	.ops 		= &omap3evm_ops,
+};
+
+/* Audio machine driver */
+static struct snd_soc_card snd_soc_omap3evm = {
+	.name = "omap3evm",
+	.platform = &omap_soc_platform,
+	.dai_link = &omap3evm_dai,
+	.num_links = 1,
+};
+
+/* Audio subsystem */
+static struct snd_soc_device omap3evm_snd_devdata = {
+	.card = &snd_soc_omap3evm,
+	.codec_dev = &soc_codec_dev_twl4030,
+};
+
+static struct platform_device *omap3evm_snd_device;
+
+static int __init omap3evm_soc_init(void)
+{
+	int ret;
+
+	if (!machine_is_omap3evm()) {
+		pr_err("Not OMAP3 EVM!\n");
+		return -ENODEV;
+	}
+	pr_info("OMAP3 EVM SoC init\n");
+
+	omap3evm_snd_device = platform_device_alloc("soc-audio", -1);
+	if (!omap3evm_snd_device) {
+		printk(KERN_ERR "Platform device allocation failed\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(omap3evm_snd_device, &omap3evm_snd_devdata);
+	omap3evm_snd_devdata.dev = &omap3evm_snd_device->dev;
+	*(unsigned int *)omap3evm_dai.cpu_dai->private_data = 1;
+
+	ret = platform_device_add(omap3evm_snd_device);
+	if (ret)
+		goto err1;
+
+	return 0;
+
+err1:
+	printk(KERN_ERR "Unable to add platform device\n");
+	platform_device_put(omap3evm_snd_device);
+
+	return ret;
+}
+
+static void __exit omap3evm_soc_exit(void)
+{
+	platform_device_unregister(omap3evm_snd_device);
+}
+
+module_init(omap3evm_soc_init);
+module_exit(omap3evm_soc_exit);
+
+MODULE_AUTHOR("Anuj Aggarwal <anuj.aggarwal@ti.com>");
+MODULE_DESCRIPTION("ALSA SoC OMAP3 EVM");
+MODULE_LICENSE("GPLv2");
diff --git a/sound/soc/omap/omap3pandora.c b/sound/soc/omap/omap3pandora.c
index fe282d4..ad219aa 100644
--- a/sound/soc/omap/omap3pandora.c
+++ b/sound/soc/omap/omap3pandora.c
@@ -228,14 +228,14 @@
 		.name = "PCM1773",
 		.stream_name = "HiFi Out",
 		.cpu_dai = &omap_mcbsp_dai[0],
-		.codec_dai = &twl4030_dai,
+		.codec_dai = &twl4030_dai[TWL4030_DAI_HIFI],
 		.ops = &omap3pandora_out_ops,
 		.init = omap3pandora_out_init,
 	}, {
 		.name = "TWL4030",
 		.stream_name = "Line/Mic In",
 		.cpu_dai = &omap_mcbsp_dai[1],
-		.codec_dai = &twl4030_dai,
+		.codec_dai = &twl4030_dai[TWL4030_DAI_HIFI],
 		.ops = &omap3pandora_in_ops,
 		.init = omap3pandora_in_init,
 	}
diff --git a/sound/soc/omap/overo.c b/sound/soc/omap/overo.c
index a72dc4e..ec4f8fd 100644
--- a/sound/soc/omap/overo.c
+++ b/sound/soc/omap/overo.c
@@ -83,7 +83,7 @@
 	.name = "TWL4030",
 	.stream_name = "TWL4030",
 	.cpu_dai = &omap_mcbsp_dai[0],
-	.codec_dai = &twl4030_dai,
+	.codec_dai = &twl4030_dai[TWL4030_DAI_HIFI],
 	.ops = &overo_ops,
 };
 
diff --git a/sound/soc/omap/sdp3430.c b/sound/soc/omap/sdp3430.c
index 10f1c86..b719e5d 100644
--- a/sound/soc/omap/sdp3430.c
+++ b/sound/soc/omap/sdp3430.c
@@ -84,6 +84,49 @@
 	.hw_params = sdp3430_hw_params,
 };
 
+static int sdp3430_hw_voice_params(struct snd_pcm_substream *substream,
+	struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_dai *codec_dai = rtd->dai->codec_dai;
+	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
+	int ret;
+
+	/* Set codec DAI configuration */
+	ret = snd_soc_dai_set_fmt(codec_dai,
+				SND_SOC_DAIFMT_DSP_A |
+				SND_SOC_DAIFMT_IB_NF |
+				SND_SOC_DAIFMT_CBS_CFM);
+	if (ret) {
+		printk(KERN_ERR "can't set codec DAI configuration\n");
+		return ret;
+	}
+
+	/* Set cpu DAI configuration */
+	ret = snd_soc_dai_set_fmt(cpu_dai,
+				SND_SOC_DAIFMT_DSP_A |
+				SND_SOC_DAIFMT_IB_NF |
+				SND_SOC_DAIFMT_CBM_CFM);
+	if (ret < 0) {
+		printk(KERN_ERR "can't set cpu DAI configuration\n");
+		return ret;
+	}
+
+	/* Set the codec system clock for DAC and ADC */
+	ret = snd_soc_dai_set_sysclk(codec_dai, 0, 26000000,
+					    SND_SOC_CLOCK_IN);
+	if (ret < 0) {
+		printk(KERN_ERR "can't set codec system clock\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static struct snd_soc_ops sdp3430_voice_ops = {
+	.hw_params = sdp3430_hw_voice_params,
+};
+
 /* Headset jack */
 static struct snd_soc_jack hs_jack;
 
@@ -192,28 +235,58 @@
 	return ret;
 }
 
+static int sdp3430_twl4030_voice_init(struct snd_soc_codec *codec)
+{
+	unsigned short reg;
+
+	/* Enable voice interface */
+	reg = codec->read(codec, TWL4030_REG_VOICE_IF);
+	reg |= TWL4030_VIF_DIN_EN | TWL4030_VIF_DOUT_EN | TWL4030_VIF_EN;
+	codec->write(codec, TWL4030_REG_VOICE_IF, reg);
+
+	return 0;
+}
+
+
 /* Digital audio interface glue - connects codec <--> CPU */
-static struct snd_soc_dai_link sdp3430_dai = {
-	.name = "TWL4030",
-	.stream_name = "TWL4030",
-	.cpu_dai = &omap_mcbsp_dai[0],
-	.codec_dai = &twl4030_dai,
-	.init = sdp3430_twl4030_init,
-	.ops = &sdp3430_ops,
+static struct snd_soc_dai_link sdp3430_dai[] = {
+	{
+		.name = "TWL4030 I2S",
+		.stream_name = "TWL4030 Audio",
+		.cpu_dai = &omap_mcbsp_dai[0],
+		.codec_dai = &twl4030_dai[TWL4030_DAI_HIFI],
+		.init = sdp3430_twl4030_init,
+		.ops = &sdp3430_ops,
+	},
+	{
+		.name = "TWL4030 PCM",
+		.stream_name = "TWL4030 Voice",
+		.cpu_dai = &omap_mcbsp_dai[1],
+		.codec_dai = &twl4030_dai[TWL4030_DAI_VOICE],
+		.init = sdp3430_twl4030_voice_init,
+		.ops = &sdp3430_voice_ops,
+	},
 };
 
 /* Audio machine driver */
 static struct snd_soc_card snd_soc_sdp3430 = {
 	.name = "SDP3430",
 	.platform = &omap_soc_platform,
-	.dai_link = &sdp3430_dai,
-	.num_links = 1,
+	.dai_link = sdp3430_dai,
+	.num_links = ARRAY_SIZE(sdp3430_dai),
+};
+
+/* twl4030 setup */
+static struct twl4030_setup_data twl4030_setup = {
+	.ramp_delay_value = 3,
+	.sysclk = 26000,
 };
 
 /* Audio subsystem */
 static struct snd_soc_device sdp3430_snd_devdata = {
 	.card = &snd_soc_sdp3430,
 	.codec_dev = &soc_codec_dev_twl4030,
+	.codec_data = &twl4030_setup,
 };
 
 static struct platform_device *sdp3430_snd_device;
@@ -236,7 +309,8 @@
 
 	platform_set_drvdata(sdp3430_snd_device, &sdp3430_snd_devdata);
 	sdp3430_snd_devdata.dev = &sdp3430_snd_device->dev;
-	*(unsigned int *)sdp3430_dai.cpu_dai->private_data = 1; /* McBSP2 */
+	*(unsigned int *)sdp3430_dai[0].cpu_dai->private_data = 1; /* McBSP2 */
+	*(unsigned int *)sdp3430_dai[1].cpu_dai->private_data = 2; /* McBSP3 */
 
 	ret = platform_device_add(sdp3430_snd_device);
 	if (ret)
diff --git a/sound/soc/pxa/Kconfig b/sound/soc/pxa/Kconfig
index ad8a10f..dcd163a 100644
--- a/sound/soc/pxa/Kconfig
+++ b/sound/soc/pxa/Kconfig
@@ -89,13 +89,13 @@
 	  Toshiba e800 PDA
 
 config SND_PXA2XX_SOC_EM_X270
-	tristate "SoC Audio support for CompuLab EM-x270"
+	tristate "SoC Audio support for CompuLab EM-x270, eXeda and CM-X300"
 	depends on SND_PXA2XX_SOC && MACH_EM_X270
 	select SND_PXA2XX_SOC_AC97
 	select SND_SOC_WM9712
 	help
 	  Say Y if you want to add support for SoC audio on
-	  CompuLab EM-x270.
+	  CompuLab EM-x270, eXeda and CM-X300 machines.
 
 config SND_PXA2XX_SOC_PALM27X
 	bool "SoC Audio support for Palm T|X, T5 and LifeDrive"
@@ -134,3 +134,12 @@
         help
           Say Y if you want to add support for SoC audio on the
           MIO A701.
+
+config SND_PXA2XX_SOC_IMOTE2
+       tristate "SoC Audio support for IMote 2"
+       depends on SND_PXA2XX_SOC && MACH_INTELMOTE2
+       select SND_PXA2XX_SOC_I2S
+       select SND_SOC_WM8940
+       help
+         Say Y if you want to add support for SoC audio on the
+	 IMote 2.
diff --git a/sound/soc/pxa/Makefile b/sound/soc/pxa/Makefile
index 4b90c3c..6e096b4 100644
--- a/sound/soc/pxa/Makefile
+++ b/sound/soc/pxa/Makefile
@@ -22,6 +22,7 @@
 snd-soc-zylonite-objs := zylonite.o
 snd-soc-magician-objs := magician.o
 snd-soc-mioa701-objs := mioa701_wm9713.o
+snd-soc-imote2-objs := imote2.o
 
 obj-$(CONFIG_SND_PXA2XX_SOC_CORGI) += snd-soc-corgi.o
 obj-$(CONFIG_SND_PXA2XX_SOC_POODLE) += snd-soc-poodle.o
@@ -35,3 +36,4 @@
 obj-$(CONFIG_SND_PXA2XX_SOC_MAGICIAN) += snd-soc-magician.o
 obj-$(CONFIG_SND_PXA2XX_SOC_MIOA701) += snd-soc-mioa701.o
 obj-$(CONFIG_SND_SOC_ZYLONITE) += snd-soc-zylonite.o
+obj-$(CONFIG_SND_PXA2XX_SOC_IMOTE2) += snd-soc-imote2.o
diff --git a/sound/soc/pxa/em-x270.c b/sound/soc/pxa/em-x270.c
index 949be9c..f4756e4 100644
--- a/sound/soc/pxa/em-x270.c
+++ b/sound/soc/pxa/em-x270.c
@@ -1,7 +1,7 @@
 /*
- * em-x270.c  --  SoC audio for EM-X270
+ * SoC audio driver for EM-X270, eXeda and CM-X300
  *
- * Copyright 2007 CompuLab, Ltd.
+ * Copyright 2007, 2009 CompuLab, Ltd.
  *
  * Author: Mike Rapoport <mike@compulab.co.il>
  *
@@ -68,7 +68,8 @@
 {
 	int ret;
 
-	if (!machine_is_em_x270())
+	if (!(machine_is_em_x270() || machine_is_exeda()
+	      || machine_is_cm_x300()))
 		return -ENODEV;
 
 	em_x270_snd_device = platform_device_alloc("soc-audio", -1);
@@ -95,5 +96,5 @@
 
 /* Module information */
 MODULE_AUTHOR("Mike Rapoport");
-MODULE_DESCRIPTION("ALSA SoC EM-X270");
+MODULE_DESCRIPTION("ALSA SoC EM-X270, eXeda and CM-X300");
 MODULE_LICENSE("GPL");
diff --git a/sound/soc/pxa/imote2.c b/sound/soc/pxa/imote2.c
new file mode 100644
index 0000000..405587a
--- /dev/null
+++ b/sound/soc/pxa/imote2.c
@@ -0,0 +1,114 @@
+
+#include <linux/module.h>
+#include <sound/soc.h>
+
+#include <asm/mach-types.h>
+
+#include "../codecs/wm8940.h"
+#include "pxa2xx-i2s.h"
+#include "pxa2xx-pcm.h"
+
+static int imote2_asoc_hw_params(struct snd_pcm_substream *substream,
+				 struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_dai *codec_dai = rtd->dai->codec_dai;
+	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
+	unsigned int clk = 0;
+	int ret;
+
+	switch (params_rate(params)) {
+	case 8000:
+	case 16000:
+	case 48000:
+	case 96000:
+		clk = 12288000;
+		break;
+	case 11025:
+	case 22050:
+	case 44100:
+		clk = 11289600;
+		break;
+	}
+
+	/* set codec DAI configuration */
+	ret = snd_soc_dai_set_fmt(codec_dai, SND_SOC_DAIFMT_I2S
+				  | SND_SOC_DAIFMT_NB_NF
+				  | SND_SOC_DAIFMT_CBS_CFS);
+	if (ret < 0)
+		return ret;
+
+	/* CPU should be clock master */
+	ret = snd_soc_dai_set_fmt(cpu_dai,  SND_SOC_DAIFMT_I2S
+				  | SND_SOC_DAIFMT_NB_NF
+				  | SND_SOC_DAIFMT_CBS_CFS);
+	if (ret < 0)
+		return ret;
+
+	ret = snd_soc_dai_set_sysclk(codec_dai, 0, clk,
+				     SND_SOC_CLOCK_IN);
+	if (ret < 0)
+		return ret;
+
+	/* set the I2S system clock as input (unused) */
+	ret = snd_soc_dai_set_sysclk(cpu_dai, PXA2XX_I2S_SYSCLK, clk,
+		SND_SOC_CLOCK_OUT);
+
+	return ret;
+}
+
+static struct snd_soc_ops imote2_asoc_ops = {
+	.hw_params = imote2_asoc_hw_params,
+};
+
+static struct snd_soc_dai_link imote2_dai = {
+	.name = "WM8940",
+	.stream_name = "WM8940",
+	.cpu_dai = &pxa_i2s_dai,
+	.codec_dai = &wm8940_dai,
+	.ops = &imote2_asoc_ops,
+};
+
+static struct snd_soc_card snd_soc_imote2 = {
+	.name = "Imote2",
+	.platform = &pxa2xx_soc_platform,
+	.dai_link = &imote2_dai,
+	.num_links = 1,
+};
+
+static struct snd_soc_device imote2_snd_devdata = {
+	.card = &snd_soc_imote2,
+	.codec_dev = &soc_codec_dev_wm8940,
+};
+
+static struct platform_device *imote2_snd_device;
+
+static int __init imote2_asoc_init(void)
+{
+	int ret;
+
+	if (!machine_is_intelmote2())
+		return -ENODEV;
+	imote2_snd_device = platform_device_alloc("soc-audio", -1);
+	if (!imote2_snd_device)
+		return -ENOMEM;
+
+	platform_set_drvdata(imote2_snd_device, &imote2_snd_devdata);
+	imote2_snd_devdata.dev = &imote2_snd_device->dev;
+	ret = platform_device_add(imote2_snd_device);
+	if (ret)
+		platform_device_put(imote2_snd_device);
+
+	return ret;
+}
+module_init(imote2_asoc_init);
+
+static void __exit imote2_asoc_exit(void)
+{
+	platform_device_unregister(imote2_snd_device);
+}
+module_exit(imote2_asoc_exit);
+
+MODULE_AUTHOR("Jonathan Cameron");
+MODULE_DESCRIPTION("ALSA SoC Imote 2");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/pxa/magician.c b/sound/soc/pxa/magician.c
index 0625c34..c89a3cd 100644
--- a/sound/soc/pxa/magician.c
+++ b/sound/soc/pxa/magician.c
@@ -106,7 +106,7 @@
 			/* 513156 Hz ~= _2_ * 8000 Hz * 32 (+0.23%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_16;
 			break;
-		case 32:
+		default: /* 32 */
 			/* 1026312 Hz ~= _2_ * 8000 Hz * 64 (+0.23%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_8;
 		}
@@ -118,7 +118,7 @@
 			/* 351375 Hz ~= 11025 Hz * 32 (-0.41%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_4;
 			break;
-		case 32:
+		default: /* 32 */
 			/* 702750 Hz ~= 11025 Hz * 64 (-0.41%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_2;
 		}
@@ -130,7 +130,7 @@
 			/* 702750 Hz ~= 22050 Hz * 32 (-0.41%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_2;
 			break;
-		case 32:
+		default: /* 32 */
 			/* 1405500 Hz ~= 22050 Hz * 64 (-0.41%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_1;
 		}
@@ -142,7 +142,7 @@
 			/* 1405500 Hz ~= 44100 Hz * 32 (-0.41%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_2;
 			break;
-		case 32:
+		default: /* 32 */
 			/* 2811000 Hz ~= 44100 Hz * 64 (-0.41%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_1;
 		}
@@ -154,19 +154,20 @@
 			/* 1529375 Hz ~= 48000 Hz * 32 (-0.44%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_2;
 			break;
-		case 32:
+		default: /* 32 */
 			/* 3058750 Hz ~= 48000 Hz * 64 (-0.44%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_1;
 		}
 		break;
 	case 96000:
+	default:
 		acps = 12235000;
 		switch (width) {
 		case 16:
 			/* 3058750 Hz ~= 96000 Hz * 32 (-0.44%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_1;
 			break;
-		case 32:
+		default: /* 32 */
 			/* 6117500 Hz ~= 96000 Hz * 64 (-0.44%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_2;
 			div4 = PXA_SSP_CLK_SCDB_1;
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index 286be31..19c4540 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -50,139 +50,6 @@
 #endif
 };
 
-#define PXA2xx_SSP1_BASE	0x41000000
-#define PXA27x_SSP2_BASE	0x41700000
-#define PXA27x_SSP3_BASE	0x41900000
-#define PXA3xx_SSP4_BASE	0x41a00000
-
-static struct pxa2xx_pcm_dma_params pxa_ssp1_pcm_mono_out = {
-	.name			= "SSP1 PCM Mono out",
-	.dev_addr		= PXA2xx_SSP1_BASE + SSDR,
-	.drcmr			= &DRCMR(14),
-	.dcmd			= DCMD_INCSRCADDR | DCMD_FLOWTRG |
-				  DCMD_BURST16 | DCMD_WIDTH2,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp1_pcm_mono_in = {
-	.name			= "SSP1 PCM Mono in",
-	.dev_addr		= PXA2xx_SSP1_BASE + SSDR,
-	.drcmr			= &DRCMR(13),
-	.dcmd			= DCMD_INCTRGADDR | DCMD_FLOWSRC |
-				  DCMD_BURST16 | DCMD_WIDTH2,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp1_pcm_stereo_out = {
-	.name			= "SSP1 PCM Stereo out",
-	.dev_addr		= PXA2xx_SSP1_BASE + SSDR,
-	.drcmr			= &DRCMR(14),
-	.dcmd			= DCMD_INCSRCADDR | DCMD_FLOWTRG |
-				  DCMD_BURST16 | DCMD_WIDTH4,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp1_pcm_stereo_in = {
-	.name			= "SSP1 PCM Stereo in",
-	.dev_addr		= PXA2xx_SSP1_BASE + SSDR,
-	.drcmr			= &DRCMR(13),
-	.dcmd			= DCMD_INCTRGADDR | DCMD_FLOWSRC |
-				  DCMD_BURST16 | DCMD_WIDTH4,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp2_pcm_mono_out = {
-	.name			= "SSP2 PCM Mono out",
-	.dev_addr		= PXA27x_SSP2_BASE + SSDR,
-	.drcmr			= &DRCMR(16),
-	.dcmd			= DCMD_INCSRCADDR | DCMD_FLOWTRG |
-				  DCMD_BURST16 | DCMD_WIDTH2,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp2_pcm_mono_in = {
-	.name			= "SSP2 PCM Mono in",
-	.dev_addr		= PXA27x_SSP2_BASE + SSDR,
-	.drcmr			= &DRCMR(15),
-	.dcmd			= DCMD_INCTRGADDR | DCMD_FLOWSRC |
-				  DCMD_BURST16 | DCMD_WIDTH2,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp2_pcm_stereo_out = {
-	.name			= "SSP2 PCM Stereo out",
-	.dev_addr		= PXA27x_SSP2_BASE + SSDR,
-	.drcmr			= &DRCMR(16),
-	.dcmd			= DCMD_INCSRCADDR | DCMD_FLOWTRG |
-				  DCMD_BURST16 | DCMD_WIDTH4,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp2_pcm_stereo_in = {
-	.name			= "SSP2 PCM Stereo in",
-	.dev_addr		= PXA27x_SSP2_BASE + SSDR,
-	.drcmr			= &DRCMR(15),
-	.dcmd			= DCMD_INCTRGADDR | DCMD_FLOWSRC |
-				  DCMD_BURST16 | DCMD_WIDTH4,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp3_pcm_mono_out = {
-	.name			= "SSP3 PCM Mono out",
-	.dev_addr		= PXA27x_SSP3_BASE + SSDR,
-	.drcmr			= &DRCMR(67),
-	.dcmd			= DCMD_INCSRCADDR | DCMD_FLOWTRG |
-				  DCMD_BURST16 | DCMD_WIDTH2,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp3_pcm_mono_in = {
-	.name			= "SSP3 PCM Mono in",
-	.dev_addr		= PXA27x_SSP3_BASE + SSDR,
-	.drcmr			= &DRCMR(66),
-	.dcmd			= DCMD_INCTRGADDR | DCMD_FLOWSRC |
-				  DCMD_BURST16 | DCMD_WIDTH2,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp3_pcm_stereo_out = {
-	.name			= "SSP3 PCM Stereo out",
-	.dev_addr		= PXA27x_SSP3_BASE + SSDR,
-	.drcmr			= &DRCMR(67),
-	.dcmd			= DCMD_INCSRCADDR | DCMD_FLOWTRG |
-				  DCMD_BURST16 | DCMD_WIDTH4,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp3_pcm_stereo_in = {
-	.name			= "SSP3 PCM Stereo in",
-	.dev_addr		= PXA27x_SSP3_BASE + SSDR,
-	.drcmr			= &DRCMR(66),
-	.dcmd			= DCMD_INCTRGADDR | DCMD_FLOWSRC |
-				  DCMD_BURST16 | DCMD_WIDTH4,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp4_pcm_mono_out = {
-	.name			= "SSP4 PCM Mono out",
-	.dev_addr		= PXA3xx_SSP4_BASE + SSDR,
-	.drcmr			= &DRCMR(67),
-	.dcmd			= DCMD_INCSRCADDR | DCMD_FLOWTRG |
-				  DCMD_BURST16 | DCMD_WIDTH2,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp4_pcm_mono_in = {
-	.name			= "SSP4 PCM Mono in",
-	.dev_addr		= PXA3xx_SSP4_BASE + SSDR,
-	.drcmr			= &DRCMR(66),
-	.dcmd			= DCMD_INCTRGADDR | DCMD_FLOWSRC |
-				  DCMD_BURST16 | DCMD_WIDTH2,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp4_pcm_stereo_out = {
-	.name			= "SSP4 PCM Stereo out",
-	.dev_addr		= PXA3xx_SSP4_BASE + SSDR,
-	.drcmr			= &DRCMR(67),
-	.dcmd			= DCMD_INCSRCADDR | DCMD_FLOWTRG |
-				  DCMD_BURST16 | DCMD_WIDTH4,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp4_pcm_stereo_in = {
-	.name			= "SSP4 PCM Stereo in",
-	.dev_addr		= PXA3xx_SSP4_BASE + SSDR,
-	.drcmr			= &DRCMR(66),
-	.dcmd			= DCMD_INCTRGADDR | DCMD_FLOWSRC |
-				  DCMD_BURST16 | DCMD_WIDTH4,
-};
-
 static void dump_registers(struct ssp_device *ssp)
 {
 	dev_dbg(&ssp->pdev->dev, "SSCR0 0x%08x SSCR1 0x%08x SSTO 0x%08x\n",
@@ -194,25 +61,33 @@
 		 ssp_read_reg(ssp, SSACD));
 }
 
-static struct pxa2xx_pcm_dma_params *ssp_dma_params[4][4] = {
-	{
-		&pxa_ssp1_pcm_mono_out, &pxa_ssp1_pcm_mono_in,
-		&pxa_ssp1_pcm_stereo_out, &pxa_ssp1_pcm_stereo_in,
-	},
-	{
-		&pxa_ssp2_pcm_mono_out, &pxa_ssp2_pcm_mono_in,
-		&pxa_ssp2_pcm_stereo_out, &pxa_ssp2_pcm_stereo_in,
-	},
-	{
-		&pxa_ssp3_pcm_mono_out, &pxa_ssp3_pcm_mono_in,
-		&pxa_ssp3_pcm_stereo_out, &pxa_ssp3_pcm_stereo_in,
-	},
-	{
-		&pxa_ssp4_pcm_mono_out, &pxa_ssp4_pcm_mono_in,
-		&pxa_ssp4_pcm_stereo_out, &pxa_ssp4_pcm_stereo_in,
-	},
+struct pxa2xx_pcm_dma_data {
+	struct pxa2xx_pcm_dma_params params;
+	char name[20];
 };
 
+static struct pxa2xx_pcm_dma_params *
+ssp_get_dma_params(struct ssp_device *ssp, int width4, int out)
+{
+	struct pxa2xx_pcm_dma_data *dma;
+
+	dma = kzalloc(sizeof(struct pxa2xx_pcm_dma_data), GFP_KERNEL);
+	if (dma == NULL)
+		return NULL;
+
+	snprintf(dma->name, 20, "SSP%d PCM %s %s", ssp->port_id,
+			width4 ? "32-bit" : "16-bit", out ? "out" : "in");
+
+	dma->params.name = dma->name;
+	dma->params.drcmr = &DRCMR(out ? ssp->drcmr_tx : ssp->drcmr_rx);
+	dma->params.dcmd = (out ? (DCMD_INCSRCADDR | DCMD_FLOWTRG) :
+				  (DCMD_INCTRGADDR | DCMD_FLOWSRC)) |
+			(width4 ? DCMD_WIDTH4 : DCMD_WIDTH2) | DCMD_BURST16;
+	dma->params.dev_addr = ssp->phys_base + SSDR;
+
+	return &dma->params;
+}
+
 static int pxa_ssp_startup(struct snd_pcm_substream *substream,
 			   struct snd_soc_dai *dai)
 {
@@ -227,6 +102,11 @@
 		clk_enable(priv->dev.ssp->clk);
 		ssp_disable(&priv->dev);
 	}
+
+	if (cpu_dai->dma_data) {
+		kfree(cpu_dai->dma_data);
+		cpu_dai->dma_data = NULL;
+	}
 	return ret;
 }
 
@@ -241,6 +121,11 @@
 		ssp_disable(&priv->dev);
 		clk_disable(priv->dev.ssp->clk);
 	}
+
+	if (cpu_dai->dma_data) {
+		kfree(cpu_dai->dma_data);
+		cpu_dai->dma_data = NULL;
+	}
 }
 
 #ifdef CONFIG_PM
@@ -323,7 +208,7 @@
 		~(SSCR0_ECS |  SSCR0_NCS | SSCR0_MOD | SSCR0_ACS);
 
 	dev_dbg(&ssp->pdev->dev,
-		"pxa_ssp_set_dai_sysclk id: %d, clk_id %d, freq %d\n",
+		"pxa_ssp_set_dai_sysclk id: %d, clk_id %d, freq %u\n",
 		cpu_dai->id, clk_id, freq);
 
 	switch (clk_id) {
@@ -472,7 +357,7 @@
 			ssacd |= (0x6 << 4);
 
 			dev_dbg(&ssp->pdev->dev,
-				"Using SSACDD %x to supply %dHz\n",
+				"Using SSACDD %x to supply %uHz\n",
 				val, freq_out);
 			break;
 		}
@@ -589,7 +474,10 @@
 		case SND_SOC_DAIFMT_NB_IF:
 			break;
 		case SND_SOC_DAIFMT_IB_IF:
-			sspsp |= SSPSP_SCMODE(3);
+			sspsp |= SSPSP_SCMODE(2);
+			break;
+		case SND_SOC_DAIFMT_IB_NF:
+			sspsp |= SSPSP_SCMODE(2) | SSPSP_SFRMP;
 			break;
 		default:
 			return -EINVAL;
@@ -606,7 +494,13 @@
 		case SND_SOC_DAIFMT_NB_NF:
 			sspsp |= SSPSP_SFRMP;
 			break;
+		case SND_SOC_DAIFMT_NB_IF:
+			break;
 		case SND_SOC_DAIFMT_IB_IF:
+			sspsp |= SSPSP_SCMODE(2);
+			break;
+		case SND_SOC_DAIFMT_IB_NF:
+			sspsp |= SSPSP_SCMODE(2) | SSPSP_SFRMP;
 			break;
 		default:
 			return -EINVAL;
@@ -644,25 +538,23 @@
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
 	struct ssp_priv *priv = cpu_dai->private_data;
 	struct ssp_device *ssp = priv->dev.ssp;
-	int dma = 0, chn = params_channels(params);
+	int chn = params_channels(params);
 	u32 sscr0;
 	u32 sspsp;
 	int width = snd_pcm_format_physical_width(params_format(params));
 	int ttsa = ssp_read_reg(ssp, SSTSA) & 0xf;
 
-	/* select correct DMA params */
-	if (substream->stream != SNDRV_PCM_STREAM_PLAYBACK)
-		dma = 1; /* capture DMA offset is 1,3 */
+	/* generate correct DMA params */
+	if (cpu_dai->dma_data)
+		kfree(cpu_dai->dma_data);
+
 	/* Network mode with one active slot (ttsa == 1) can be used
 	 * to force 16-bit frame width on the wire (for S16_LE), even
 	 * with two channels. Use 16-bit DMA transfers for this case.
 	 */
-	if (((chn == 2) && (ttsa != 1)) || (width == 32))
-		dma += 2; /* 32-bit DMA offset is 2, 16-bit is 0 */
-
-	cpu_dai->dma_data = ssp_dma_params[cpu_dai->id][dma];
-
-	dev_dbg(&ssp->pdev->dev, "pxa_ssp_hw_params: dma %d\n", dma);
+	cpu_dai->dma_data = ssp_get_dma_params(ssp,
+			((chn == 2) && (ttsa != 1)) || (width == 32),
+			substream->stream == SNDRV_PCM_STREAM_PLAYBACK);
 
 	/* we can only change the settings if the port is not in use */
 	if (ssp_read_reg(ssp, SSCR0) & SSCR0_SSE)
diff --git a/sound/soc/pxa/pxa2xx-i2s.c b/sound/soc/pxa/pxa2xx-i2s.c
index 2f4b6e4..4743e26 100644
--- a/sound/soc/pxa/pxa2xx-i2s.c
+++ b/sound/soc/pxa/pxa2xx-i2s.c
@@ -106,10 +106,8 @@
 	if (IS_ERR(clk_i2s))
 		return PTR_ERR(clk_i2s);
 
-	if (!cpu_dai->active) {
-		SACR0 |= SACR0_RST;
+	if (!cpu_dai->active)
 		SACR0 = 0;
-	}
 
 	return 0;
 }
@@ -178,9 +176,7 @@
 
 	/* is port used by another stream */
 	if (!(SACR0 & SACR0_ENB)) {
-
 		SACR0 = 0;
-		SACR1 = 0;
 		if (pxa_i2s.master)
 			SACR0 |= SACR0_BCKD;
 
@@ -226,6 +222,10 @@
 
 	switch (cmd) {
 	case SNDRV_PCM_TRIGGER_START:
+		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+			SACR1 &= ~SACR1_DRPL;
+		else
+			SACR1 &= ~SACR1_DREC;
 		SACR0 |= SACR0_ENB;
 		break;
 	case SNDRV_PCM_TRIGGER_RESUME:
@@ -252,21 +252,16 @@
 		SAIMR &= ~SAIMR_RFS;
 	}
 
-	if (SACR1 & (SACR1_DREC | SACR1_DRPL)) {
+	if ((SACR1 & (SACR1_DREC | SACR1_DRPL)) == (SACR1_DREC | SACR1_DRPL)) {
 		SACR0 &= ~SACR0_ENB;
 		pxa_i2s_wait();
 		clk_disable(clk_i2s);
 	}
-
-	clk_put(clk_i2s);
 }
 
 #ifdef CONFIG_PM
 static int pxa2xx_i2s_suspend(struct snd_soc_dai *dai)
 {
-	if (!dai->active)
-		return 0;
-
 	/* store registers */
 	pxa_i2s.sacr0 = SACR0;
 	pxa_i2s.sacr1 = SACR1;
@@ -281,16 +276,14 @@
 
 static int pxa2xx_i2s_resume(struct snd_soc_dai *dai)
 {
-	if (!dai->active)
-		return 0;
-
 	pxa_i2s_wait();
 
-	SACR0 = pxa_i2s.sacr0 &= ~SACR0_ENB;
+	SACR0 = pxa_i2s.sacr0 & ~SACR0_ENB;
 	SACR1 = pxa_i2s.sacr1;
 	SAIMR = pxa_i2s.saimr;
 	SADIV = pxa_i2s.sadiv;
-	SACR0 |= SACR0_ENB;
+
+	SACR0 = pxa_i2s.sacr0;
 
 	return 0;
 }
@@ -329,6 +322,7 @@
 		.rates = PXA2XX_I2S_RATES,
 		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
 	.ops = &pxa_i2s_dai_ops,
+	.symmetric_rates = 1,
 };
 
 EXPORT_SYMBOL_GPL(pxa_i2s_dai);
@@ -346,6 +340,19 @@
 	if (ret != 0)
 		clk_put(clk_i2s);
 
+	/*
+	 * PXA Developer's Manual:
+	 * If SACR0[ENB] is toggled in the middle of a normal operation,
+	 * the SACR0[RST] bit must also be set and cleared to reset all
+	 * I2S controller registers.
+	 */
+	SACR0 = SACR0_RST;
+	SACR0 = 0;
+	/* Make sure RPL and REC are disabled */
+	SACR1 = SACR1_DRPL | SACR1_DREC;
+	/* Along with FIFO servicing */
+	SAIMR &= ~(SAIMR_RFS | SAIMR_TFS);
+
 	return ret;
 }
 
diff --git a/sound/soc/s3c24xx/neo1973_wm8753.c b/sound/soc/s3c24xx/neo1973_wm8753.c
index 289fadf..906709e 100644
--- a/sound/soc/s3c24xx/neo1973_wm8753.c
+++ b/sound/soc/s3c24xx/neo1973_wm8753.c
@@ -345,9 +345,11 @@
 static int lm4857_get_reg(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	int reg = kcontrol->private_value & 0xFF;
-	int shift = (kcontrol->private_value >> 8) & 0x0F;
-	int mask = (kcontrol->private_value >> 16) & 0xFF;
+	struct soc_mixer_control *mc =
+		(struct soc_mixer_control *)kcontrol->private_value;
+	int reg = mc->reg;
+	int shift = mc->shift;
+	int mask = mc->max;
 
 	pr_debug("Entered %s\n", __func__);
 
@@ -358,9 +360,11 @@
 static int lm4857_set_reg(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	int reg = kcontrol->private_value & 0xFF;
-	int shift = (kcontrol->private_value >> 8) & 0x0F;
-	int mask = (kcontrol->private_value >> 16) & 0xFF;
+	struct soc_mixer_control *mc =
+		(struct soc_mixer_control *)kcontrol->private_value;
+	int reg = mc->reg;
+	int shift = mc->shift;
+	int mask = mc->max;
 
 	if (((lm4857_regs[reg] >> shift) & mask) ==
 		ucontrol->value.integer.value[0])
diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c
index ab680aa..1a28317 100644
--- a/sound/soc/s3c24xx/s3c-i2s-v2.c
+++ b/sound/soc/s3c24xx/s3c-i2s-v2.c
@@ -37,6 +37,20 @@
 
 #include "s3c-i2s-v2.h"
 
+#undef S3C_IIS_V2_SUPPORTED
+
+#if defined(CONFIG_CPU_S3C2412) || defined(CONFIG_CPU_S3C2413)
+#define S3C_IIS_V2_SUPPORTED
+#endif
+
+#ifdef CONFIG_PLAT_S3C64XX
+#define S3C_IIS_V2_SUPPORTED
+#endif
+
+#ifndef S3C_IIS_V2_SUPPORTED
+#error Unsupported CPU model
+#endif
+
 #define S3C2412_I2S_DEBUG_CON 0
 
 static inline struct s3c_i2sv2_info *to_info(struct snd_soc_dai *cpu_dai)
@@ -75,7 +89,7 @@
 
 
 /* Turn on or off the transmission path. */
-void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on)
+static void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on)
 {
 	void __iomem *regs = i2s->regs;
 	u32 fic, con, mod;
@@ -105,7 +119,9 @@
 			break;
 
 		default:
-			dev_err(i2s->dev, "TXEN: Invalid MODE in IISMOD\n");
+			dev_err(i2s->dev, "TXEN: Invalid MODE %x in IISMOD\n",
+				mod & S3C2412_IISMOD_MODE_MASK);
+			break;
 		}
 
 		writel(con, regs + S3C2412_IISCON);
@@ -132,7 +148,9 @@
 			break;
 
 		default:
-			dev_err(i2s->dev, "TXDIS: Invalid MODE in IISMOD\n");
+			dev_err(i2s->dev, "TXDIS: Invalid MODE %x in IISMOD\n",
+				mod & S3C2412_IISMOD_MODE_MASK);
+			break;
 		}
 
 		writel(mod, regs + S3C2412_IISMOD);
@@ -143,9 +161,8 @@
 	dbg_showcon(__func__, con);
 	pr_debug("%s: IIS: CON=%x MOD=%x FIC=%x\n", __func__, con, mod, fic);
 }
-EXPORT_SYMBOL_GPL(s3c2412_snd_txctrl);
 
-void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on)
+static void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on)
 {
 	void __iomem *regs = i2s->regs;
 	u32 fic, con, mod;
@@ -175,7 +192,8 @@
 			break;
 
 		default:
-			dev_err(i2s->dev, "RXEN: Invalid MODE in IISMOD\n");
+			dev_err(i2s->dev, "RXEN: Invalid MODE %x in IISMOD\n",
+				mod & S3C2412_IISMOD_MODE_MASK);
 		}
 
 		writel(mod, regs + S3C2412_IISMOD);
@@ -199,7 +217,8 @@
 			break;
 
 		default:
-			dev_err(i2s->dev, "RXEN: Invalid MODE in IISMOD\n");
+			dev_err(i2s->dev, "RXDIS: Invalid MODE %x in IISMOD\n",
+				mod & S3C2412_IISMOD_MODE_MASK);
 		}
 
 		writel(con, regs + S3C2412_IISCON);
@@ -209,7 +228,6 @@
 	fic = readl(regs + S3C2412_IISFIC);
 	pr_debug("%s: IIS: CON=%x MOD=%x FIC=%x\n", __func__, con, mod, fic);
 }
-EXPORT_SYMBOL_GPL(s3c2412_snd_rxctrl);
 
 /*
  * Wait for the LR signal to allow synchronisation to the L/R clock
@@ -266,7 +284,7 @@
  */
 #define IISMOD_MASTER_MASK (1 << 11)
 #define IISMOD_SLAVE (1 << 11)
-#define IISMOD_MASTER (0x0)
+#define IISMOD_MASTER (0 << 11)
 #endif
 
 	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
@@ -281,7 +299,7 @@
 		iismod |= IISMOD_MASTER;
 		break;
 	default:
-		pr_debug("unknwon master/slave format\n");
+		pr_err("unknwon master/slave format\n");
 		return -EINVAL;
 	}
 
@@ -298,7 +316,7 @@
 		iismod |= S3C2412_IISMOD_SDF_IIS;
 		break;
 	default:
-		pr_debug("Unknown data format\n");
+		pr_err("Unknown data format\n");
 		return -EINVAL;
 	}
 
@@ -327,6 +345,7 @@
 	iismod = readl(i2s->regs + S3C2412_IISMOD);
 	pr_debug("%s: r: IISMOD: %x\n", __func__, iismod);
 
+#if defined(CONFIG_CPU_S3C2412) || defined(CONFIG_CPU_S3C2413)
 	switch (params_format(params)) {
 	case SNDRV_PCM_FORMAT_S8:
 		iismod |= S3C2412_IISMOD_8BIT;
@@ -335,6 +354,25 @@
 		iismod &= ~S3C2412_IISMOD_8BIT;
 		break;
 	}
+#endif
+
+#ifdef CONFIG_PLAT_S3C64XX
+	iismod &= ~0x606;
+	/* Sample size */
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S8:
+		/* 8 bit sample, 16fs BCLK */
+		iismod |= 0x2004;
+		break;
+	case SNDRV_PCM_FORMAT_S16_LE:
+		/* 16 bit sample, 32fs BCLK */
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		/* 24 bit sample, 48fs BCLK */
+		iismod |= 0x4002;
+		break;
+	}
+#endif
 
 	writel(iismod, i2s->regs + S3C2412_IISMOD);
 	pr_debug("%s: w: IISMOD: %x\n", __func__, iismod);
@@ -489,6 +527,8 @@
 	unsigned int best_rate = 0;
 	unsigned int best_deviation = INT_MAX;
 
+	pr_debug("Input clock rate %ldHz\n", clkrate);
+
 	if (fstab == NULL)
 		fstab = iis_fs_tab;
 
@@ -507,7 +547,7 @@
 		actual = clkrate / (fsdiv * div);
 		deviation = actual - rate;
 
-		printk(KERN_DEBUG "%dfs: div %d => result %d, deviation %d\n",
+		printk(KERN_DEBUG "%ufs: div %u => result %u, deviation %d\n",
 		       fsdiv, div, actual, deviation);
 
 		deviation = abs(deviation);
@@ -523,7 +563,7 @@
 			break;
 	}
 
-	printk(KERN_DEBUG "best: fs=%d, div=%d, rate=%d\n",
+	printk(KERN_DEBUG "best: fs=%u, div=%u, rate=%u\n",
 	       best_fs, best_div, best_rate);
 
 	info->fs_div = best_fs;
@@ -539,12 +579,31 @@
 		    unsigned long base)
 {
 	struct device *dev = &pdev->dev;
+	unsigned int iismod;
 
 	i2s->dev = dev;
 
 	/* record our i2s structure for later use in the callbacks */
 	dai->private_data = i2s;
 
+	if (!base) {
+		struct resource *res = platform_get_resource(pdev,
+							     IORESOURCE_MEM,
+							     0);
+		if (!res) {
+			dev_err(dev, "Unable to get register resource\n");
+			return -ENXIO;
+		}
+
+		if (!request_mem_region(res->start, resource_size(res),
+					"s3c64xx-i2s-v4")) {
+			dev_err(dev, "Unable to request register region\n");
+			return -EBUSY;
+		}
+
+		base = res->start;
+	}
+
 	i2s->regs = ioremap(base, 0x100);
 	if (i2s->regs == NULL) {
 		dev_err(dev, "cannot ioremap registers\n");
@@ -560,12 +619,16 @@
 
 	clk_enable(i2s->iis_pclk);
 
+	/* Mark ourselves as in TXRX mode so we can run through our cleanup
+	 * process without warnings. */
+	iismod = readl(i2s->regs + S3C2412_IISMOD);
+	iismod |= S3C2412_IISMOD_MODE_TXRX;
+	writel(iismod, i2s->regs + S3C2412_IISMOD);
 	s3c2412_snd_txctrl(i2s, 0);
 	s3c2412_snd_rxctrl(i2s, 0);
 
 	return 0;
 }
-
 EXPORT_SYMBOL_GPL(s3c_i2sv2_probe);
 
 #ifdef CONFIG_PM
diff --git a/sound/soc/s3c24xx/s3c2412-i2s.c b/sound/soc/s3c24xx/s3c2412-i2s.c
index b7e0b3f..168a088 100644
--- a/sound/soc/s3c24xx/s3c2412-i2s.c
+++ b/sound/soc/s3c24xx/s3c2412-i2s.c
@@ -120,7 +120,7 @@
 
 	s3c2412_i2s.iis_cclk = clk_get(&pdev->dev, "i2sclk");
 	if (s3c2412_i2s.iis_cclk == NULL) {
-		pr_debug("failed to get i2sclk clock\n");
+		pr_err("failed to get i2sclk clock\n");
 		iounmap(s3c2412_i2s.regs);
 		return -ENODEV;
 	}
diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.c b/sound/soc/s3c24xx/s3c64xx-i2s.c
index 33c5de7..3c06c40 100644
--- a/sound/soc/s3c24xx/s3c64xx-i2s.c
+++ b/sound/soc/s3c24xx/s3c64xx-i2s.c
@@ -108,48 +108,19 @@
 	return 0;
 }
 
-
-unsigned long s3c64xx_i2s_get_clockrate(struct snd_soc_dai *dai)
+struct clk *s3c64xx_i2s_get_clock(struct snd_soc_dai *dai)
 {
 	struct s3c_i2sv2_info *i2s = to_info(dai);
 
-	return clk_get_rate(i2s->iis_cclk);
+	return i2s->iis_cclk;
 }
-EXPORT_SYMBOL_GPL(s3c64xx_i2s_get_clockrate);
+EXPORT_SYMBOL_GPL(s3c64xx_i2s_get_clock);
 
 static int s3c64xx_i2s_probe(struct platform_device *pdev,
 			     struct snd_soc_dai *dai)
 {
-	struct device *dev = &pdev->dev;
-	struct s3c_i2sv2_info *i2s;
-	int ret;
-
-	dev_dbg(dev, "%s: probing dai %d\n", __func__, pdev->id);
-
-	if (pdev->id < 0 || pdev->id > ARRAY_SIZE(s3c64xx_i2s)) {
-		dev_err(dev, "id %d out of range\n", pdev->id);
-		return -EINVAL;
-	}
-
-	i2s = &s3c64xx_i2s[pdev->id];
-
-	ret = s3c_i2sv2_probe(pdev, dai, i2s,
-			      pdev->id ? S3C64XX_PA_IIS1 : S3C64XX_PA_IIS0);
-	if (ret)
-		return ret;
-
-	i2s->dma_capture = &s3c64xx_i2s_pcm_stereo_in[pdev->id];
-	i2s->dma_playback = &s3c64xx_i2s_pcm_stereo_out[pdev->id];
-
-	i2s->iis_cclk = clk_get(dev, "audio-bus");
-	if (IS_ERR(i2s->iis_cclk)) {
-		dev_err(dev, "failed to get audio-bus");
-		iounmap(i2s->regs);
-		return -ENODEV;
-	}
-
 	/* configure GPIO for i2s port */
-	switch (pdev->id) {
+	switch (dai->id) {
 	case 0:
 		s3c_gpio_cfgpin(S3C64XX_GPD(0), S3C64XX_GPD0_I2S0_CLK);
 		s3c_gpio_cfgpin(S3C64XX_GPD(1), S3C64XX_GPD1_I2S0_CDCLK);
@@ -175,41 +146,122 @@
 	SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 | SNDRV_PCM_RATE_96000)
 
 #define S3C64XX_I2S_FMTS \
-	(SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_LE)
+	(SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_LE |\
+	 SNDRV_PCM_FMTBIT_S24_LE)
 
 static struct snd_soc_dai_ops s3c64xx_i2s_dai_ops = {
 	.set_sysclk	= s3c64xx_i2s_set_sysclk,	
 };
 
-struct snd_soc_dai s3c64xx_i2s_dai = {
-	.name		= "s3c64xx-i2s",
-	.id		= 0,
-	.probe		= s3c64xx_i2s_probe,
-	.playback = {
-		.channels_min	= 2,
-		.channels_max	= 2,
-		.rates		= S3C64XX_I2S_RATES,
-		.formats	= S3C64XX_I2S_FMTS,
+struct snd_soc_dai s3c64xx_i2s_dai[] = {
+	{
+		.name		= "s3c64xx-i2s",
+		.id		= 0,
+		.probe		= s3c64xx_i2s_probe,
+		.playback = {
+			.channels_min	= 2,
+			.channels_max	= 2,
+			.rates		= S3C64XX_I2S_RATES,
+			.formats	= S3C64XX_I2S_FMTS,
+		},
+		.capture = {
+			 .channels_min	= 2,
+			 .channels_max	= 2,
+			 .rates		= S3C64XX_I2S_RATES,
+			 .formats	= S3C64XX_I2S_FMTS,
+		 },
+		.ops = &s3c64xx_i2s_dai_ops,
+		.symmetric_rates = 1,
 	},
-	.capture = {
-		.channels_min	= 2,
-		.channels_max	= 2,
-		.rates		= S3C64XX_I2S_RATES,
-		.formats	= S3C64XX_I2S_FMTS,
+	{
+		.name		= "s3c64xx-i2s",
+		.id		= 1,
+		.probe		= s3c64xx_i2s_probe,
+		.playback = {
+			.channels_min	= 2,
+			.channels_max	= 2,
+			.rates		= S3C64XX_I2S_RATES,
+			.formats	= S3C64XX_I2S_FMTS,
+		},
+		.capture = {
+			 .channels_min	= 2,
+			 .channels_max	= 2,
+			 .rates		= S3C64XX_I2S_RATES,
+			 .formats	= S3C64XX_I2S_FMTS,
+		 },
+		.ops = &s3c64xx_i2s_dai_ops,
+		.symmetric_rates = 1,
 	},
-	.ops = &s3c64xx_i2s_dai_ops,
 };
 EXPORT_SYMBOL_GPL(s3c64xx_i2s_dai);
 
+static __devinit int s3c64xx_iis_dev_probe(struct platform_device *pdev)
+{
+	struct s3c_i2sv2_info *i2s;
+	struct snd_soc_dai *dai;
+	int ret;
+
+	if (pdev->id >= ARRAY_SIZE(s3c64xx_i2s)) {
+		dev_err(&pdev->dev, "id %d out of range\n", pdev->id);
+		return -EINVAL;
+	}
+
+	i2s = &s3c64xx_i2s[pdev->id];
+	dai = &s3c64xx_i2s_dai[pdev->id];
+	dai->dev = &pdev->dev;
+
+	i2s->dma_capture = &s3c64xx_i2s_pcm_stereo_in[pdev->id];
+	i2s->dma_playback = &s3c64xx_i2s_pcm_stereo_out[pdev->id];
+
+	i2s->iis_cclk = clk_get(&pdev->dev, "audio-bus");
+	if (IS_ERR(i2s->iis_cclk)) {
+		dev_err(&pdev->dev, "failed to get audio-bus\n");
+		ret = PTR_ERR(i2s->iis_cclk);
+		goto err;
+	}
+
+	ret = s3c_i2sv2_probe(pdev, dai, i2s, 0);
+	if (ret)
+		goto err_clk;
+
+	ret = s3c_i2sv2_register_dai(dai);
+	if (ret != 0)
+		goto err_i2sv2;
+
+	return 0;
+
+err_i2sv2:
+	/* Not implemented for I2Sv2 core yet */
+err_clk:
+	clk_put(i2s->iis_cclk);
+err:
+	return ret;
+}
+
+static __devexit int s3c64xx_iis_dev_remove(struct platform_device *pdev)
+{
+	dev_err(&pdev->dev, "Device removal not yet supported\n");
+	return 0;
+}
+
+static struct platform_driver s3c64xx_iis_driver = {
+	.probe  = s3c64xx_iis_dev_probe,
+	.remove = s3c64xx_iis_dev_remove,
+	.driver = {
+		.name = "s3c64xx-iis",
+		.owner = THIS_MODULE,
+	},
+};
+
 static int __init s3c64xx_i2s_init(void)
 {
-	return  s3c_i2sv2_register_dai(&s3c64xx_i2s_dai);
+	return platform_driver_register(&s3c64xx_iis_driver);
 }
 module_init(s3c64xx_i2s_init);
 
 static void __exit s3c64xx_i2s_exit(void)
 {
-	snd_soc_unregister_dai(&s3c64xx_i2s_dai);
+	platform_driver_unregister(&s3c64xx_iis_driver);
 }
 module_exit(s3c64xx_i2s_exit);
 
@@ -217,6 +269,3 @@
 MODULE_AUTHOR("Ben Dooks, <ben@simtec.co.uk>");
 MODULE_DESCRIPTION("S3C64XX I2S SoC Interface");
 MODULE_LICENSE("GPL");
-
-
-
diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.h b/sound/soc/s3c24xx/s3c64xx-i2s.h
index b7ffe3c..02148ce 100644
--- a/sound/soc/s3c24xx/s3c64xx-i2s.h
+++ b/sound/soc/s3c24xx/s3c64xx-i2s.h
@@ -15,6 +15,8 @@
 #ifndef __SND_SOC_S3C24XX_S3C64XX_I2S_H
 #define __SND_SOC_S3C24XX_S3C64XX_I2S_H __FILE__
 
+struct clk;
+
 #include "s3c-i2s-v2.h"
 
 #define S3C64XX_DIV_BCLK	S3C_I2SV2_DIV_BCLK
@@ -24,8 +26,8 @@
 #define S3C64XX_CLKSRC_PCLK	(0)
 #define S3C64XX_CLKSRC_MUX	(1)
 
-extern struct snd_soc_dai s3c64xx_i2s_dai;
+extern struct snd_soc_dai s3c64xx_i2s_dai[];
 
-extern unsigned long s3c64xx_i2s_get_clockrate(struct snd_soc_dai *cpu_dai);
+extern struct clk *s3c64xx_i2s_get_clock(struct snd_soc_dai *dai);
 
 #endif /* __SND_SOC_S3C24XX_S3C64XX_I2S_H */
diff --git a/sound/soc/s6000/Kconfig b/sound/soc/s6000/Kconfig
new file mode 100644
index 0000000..c74eb3d
--- /dev/null
+++ b/sound/soc/s6000/Kconfig
@@ -0,0 +1,19 @@
+config SND_S6000_SOC
+	tristate "SoC Audio for the Stretch s6000 family"
+	depends on XTENSA_VARIANT_S6000
+	help
+	  Say Y or M if you want to add support for codecs attached to
+	  s6000 family chips. You will also need to select the platform
+	  to support below.
+
+config SND_S6000_SOC_I2S
+	tristate
+
+config SND_S6000_SOC_S6IPCAM
+	tristate "SoC Audio support for Stretch 6105 IP Camera"
+	depends on SND_S6000_SOC && XTENSA_PLATFORM_S6105
+	select SND_S6000_SOC_I2S
+	select SND_SOC_TLV320AIC3X
+	help
+	  Say Y if you want to add support for SoC audio on the
+	  Stretch s6105 IP Camera Reference Design.
diff --git a/sound/soc/s6000/Makefile b/sound/soc/s6000/Makefile
new file mode 100644
index 0000000..7a61361
--- /dev/null
+++ b/sound/soc/s6000/Makefile
@@ -0,0 +1,11 @@
+# s6000 Platform Support
+snd-soc-s6000-objs := s6000-pcm.o
+snd-soc-s6000-i2s-objs := s6000-i2s.o
+
+obj-$(CONFIG_SND_S6000_SOC) += snd-soc-s6000.o
+obj-$(CONFIG_SND_S6000_SOC_I2S) += snd-soc-s6000-i2s.o
+
+# s6105 Machine Support
+snd-soc-s6ipcam-objs := s6105-ipcam.o
+
+obj-$(CONFIG_SND_S6000_SOC_S6IPCAM) += snd-soc-s6ipcam.o
diff --git a/sound/soc/s6000/s6000-i2s.c b/sound/soc/s6000/s6000-i2s.c
new file mode 100644
index 0000000..c5cda18
--- /dev/null
+++ b/sound/soc/s6000/s6000-i2s.c
@@ -0,0 +1,629 @@
+/*
+ * ALSA SoC I2S Audio Layer for the Stretch S6000 family
+ *
+ * Author:      Daniel Gloeckner, <dg@emlix.com>
+ * Copyright:   (C) 2009 emlix GmbH <info@emlix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/initval.h>
+#include <sound/soc.h>
+
+#include "s6000-i2s.h"
+#include "s6000-pcm.h"
+
+struct s6000_i2s_dev {
+	dma_addr_t sifbase;
+	u8 __iomem *scbbase;
+	unsigned int wide;
+	unsigned int channel_in;
+	unsigned int channel_out;
+	unsigned int lines_in;
+	unsigned int lines_out;
+	struct s6000_pcm_dma_params dma_params;
+};
+
+#define S6_I2S_INTERRUPT_STATUS	0x00
+#define   S6_I2S_INT_OVERRUN	1
+#define   S6_I2S_INT_UNDERRUN	2
+#define   S6_I2S_INT_ALIGNMENT	4
+#define S6_I2S_INTERRUPT_ENABLE	0x04
+#define S6_I2S_INTERRUPT_RAW	0x08
+#define S6_I2S_INTERRUPT_CLEAR	0x0C
+#define S6_I2S_INTERRUPT_SET	0x10
+#define S6_I2S_MODE		0x20
+#define   S6_I2S_DUAL		0
+#define   S6_I2S_WIDE		1
+#define S6_I2S_TX_DEFAULT	0x24
+#define S6_I2S_DATA_CFG(c)	(0x40 + 0x10 * (c))
+#define   S6_I2S_IN		0
+#define   S6_I2S_OUT		1
+#define   S6_I2S_UNUSED		2
+#define S6_I2S_INTERFACE_CFG(c)	(0x44 + 0x10 * (c))
+#define   S6_I2S_DIV_MASK	0x001fff
+#define   S6_I2S_16BIT		0x000000
+#define   S6_I2S_20BIT		0x002000
+#define   S6_I2S_24BIT		0x004000
+#define   S6_I2S_32BIT		0x006000
+#define   S6_I2S_BITS_MASK	0x006000
+#define   S6_I2S_MEM_16BIT	0x000000
+#define   S6_I2S_MEM_32BIT	0x008000
+#define   S6_I2S_MEM_MASK	0x008000
+#define   S6_I2S_CHANNELS_SHIFT	16
+#define   S6_I2S_CHANNELS_MASK	0x030000
+#define   S6_I2S_SCK_IN		0x000000
+#define   S6_I2S_SCK_OUT	0x040000
+#define   S6_I2S_SCK_DIR	0x040000
+#define   S6_I2S_WS_IN		0x000000
+#define   S6_I2S_WS_OUT		0x080000
+#define   S6_I2S_WS_DIR		0x080000
+#define   S6_I2S_LEFT_FIRST	0x000000
+#define   S6_I2S_RIGHT_FIRST	0x100000
+#define   S6_I2S_FIRST		0x100000
+#define   S6_I2S_CUR_SCK	0x200000
+#define   S6_I2S_CUR_WS		0x400000
+#define S6_I2S_ENABLE(c)	(0x48 + 0x10 * (c))
+#define   S6_I2S_DISABLE_IF	0x02
+#define   S6_I2S_ENABLE_IF	0x03
+#define   S6_I2S_IS_BUSY	0x04
+#define   S6_I2S_DMA_ACTIVE	0x08
+#define   S6_I2S_IS_ENABLED	0x10
+
+#define S6_I2S_NUM_LINES	4
+
+#define S6_I2S_SIF_PORT0	0x0000000
+#define S6_I2S_SIF_PORT1	0x0000080 /* docs say 0x0000010 */
+
+static inline void s6_i2s_write_reg(struct s6000_i2s_dev *dev, int reg, u32 val)
+{
+	writel(val, dev->scbbase + reg);
+}
+
+static inline u32 s6_i2s_read_reg(struct s6000_i2s_dev *dev, int reg)
+{
+	return readl(dev->scbbase + reg);
+}
+
+static inline void s6_i2s_mod_reg(struct s6000_i2s_dev *dev, int reg,
+				  u32 mask, u32 val)
+{
+	val ^= s6_i2s_read_reg(dev, reg) & ~mask;
+	s6_i2s_write_reg(dev, reg, val);
+}
+
+static void s6000_i2s_start_channel(struct s6000_i2s_dev *dev, int channel)
+{
+	int i, j, cur, prev;
+
+	/*
+	 * Wait for WCLK to toggle 5 times before enabling the channel
+	 * s6000 Family Datasheet 3.6.4:
+	 *   "At least two cycles of WS must occur between commands
+	 *    to disable or enable the interface"
+	 */
+	j = 0;
+	prev = ~S6_I2S_CUR_WS;
+	for (i = 1000000; --i && j < 6; ) {
+		cur = s6_i2s_read_reg(dev, S6_I2S_INTERFACE_CFG(channel))
+		       & S6_I2S_CUR_WS;
+		if (prev != cur) {
+			prev = cur;
+			j++;
+		}
+	}
+	if (j < 6)
+		printk(KERN_WARNING "s6000-i2s: timeout waiting for WCLK\n");
+
+	s6_i2s_write_reg(dev, S6_I2S_ENABLE(channel), S6_I2S_ENABLE_IF);
+}
+
+static void s6000_i2s_stop_channel(struct s6000_i2s_dev *dev, int channel)
+{
+	s6_i2s_write_reg(dev, S6_I2S_ENABLE(channel), S6_I2S_DISABLE_IF);
+}
+
+static void s6000_i2s_start(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct s6000_i2s_dev *dev = rtd->dai->cpu_dai->private_data;
+	int channel;
+
+	channel = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ?
+			dev->channel_out : dev->channel_in;
+
+	s6000_i2s_start_channel(dev, channel);
+}
+
+static void s6000_i2s_stop(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct s6000_i2s_dev *dev = rtd->dai->cpu_dai->private_data;
+	int channel;
+
+	channel = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ?
+			dev->channel_out : dev->channel_in;
+
+	s6000_i2s_stop_channel(dev, channel);
+}
+
+static int s6000_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
+			     int after)
+{
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+	case SNDRV_PCM_TRIGGER_RESUME:
+	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+		if ((substream->stream == SNDRV_PCM_STREAM_CAPTURE) ^ !after)
+			s6000_i2s_start(substream);
+		break;
+	case SNDRV_PCM_TRIGGER_STOP:
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+		if (!after)
+			s6000_i2s_stop(substream);
+	}
+	return 0;
+}
+
+static unsigned int s6000_i2s_int_sources(struct s6000_i2s_dev *dev)
+{
+	unsigned int pending;
+	pending = s6_i2s_read_reg(dev, S6_I2S_INTERRUPT_RAW);
+	pending &= S6_I2S_INT_ALIGNMENT |
+		   S6_I2S_INT_UNDERRUN |
+		   S6_I2S_INT_OVERRUN;
+	s6_i2s_write_reg(dev, S6_I2S_INTERRUPT_CLEAR, pending);
+
+	return pending;
+}
+
+static unsigned int s6000_i2s_check_xrun(struct snd_soc_dai *cpu_dai)
+{
+	struct s6000_i2s_dev *dev = cpu_dai->private_data;
+	unsigned int errors;
+	unsigned int ret;
+
+	errors = s6000_i2s_int_sources(dev);
+	if (likely(!errors))
+		return 0;
+
+	ret = 0;
+	if (errors & S6_I2S_INT_ALIGNMENT)
+		printk(KERN_ERR "s6000-i2s: WCLK misaligned\n");
+	if (errors & S6_I2S_INT_UNDERRUN)
+		ret |= 1 << SNDRV_PCM_STREAM_PLAYBACK;
+	if (errors & S6_I2S_INT_OVERRUN)
+		ret |= 1 << SNDRV_PCM_STREAM_CAPTURE;
+	return ret;
+}
+
+static void s6000_i2s_wait_disabled(struct s6000_i2s_dev *dev)
+{
+	int channel;
+	int n = 50;
+	for (channel = 0; channel < 2; channel++) {
+		while (--n >= 0) {
+			int v = s6_i2s_read_reg(dev, S6_I2S_ENABLE(channel));
+			if ((v & S6_I2S_IS_ENABLED)
+			    || !(v & (S6_I2S_DMA_ACTIVE | S6_I2S_IS_BUSY)))
+				break;
+			udelay(20);
+		}
+	}
+	if (n < 0)
+		printk(KERN_WARNING "s6000-i2s: timeout disabling interfaces");
+}
+
+static int s6000_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai,
+				   unsigned int fmt)
+{
+	struct s6000_i2s_dev *dev = cpu_dai->private_data;
+	u32 w;
+
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		w = S6_I2S_SCK_IN | S6_I2S_WS_IN;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFM:
+		w = S6_I2S_SCK_OUT | S6_I2S_WS_IN;
+		break;
+	case SND_SOC_DAIFMT_CBM_CFS:
+		w = S6_I2S_SCK_IN | S6_I2S_WS_OUT;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		w = S6_I2S_SCK_OUT | S6_I2S_WS_OUT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_NF:
+		w |= S6_I2S_LEFT_FIRST;
+		break;
+	case SND_SOC_DAIFMT_NB_IF:
+		w |= S6_I2S_RIGHT_FIRST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	s6_i2s_mod_reg(dev, S6_I2S_INTERFACE_CFG(0),
+		       S6_I2S_FIRST | S6_I2S_WS_DIR | S6_I2S_SCK_DIR, w);
+	s6_i2s_mod_reg(dev, S6_I2S_INTERFACE_CFG(1),
+		       S6_I2S_FIRST | S6_I2S_WS_DIR | S6_I2S_SCK_DIR, w);
+
+	return 0;
+}
+
+static int s6000_i2s_set_clkdiv(struct snd_soc_dai *dai, int div_id, int div)
+{
+	struct s6000_i2s_dev *dev = dai->private_data;
+
+	if (!div || (div & 1) || div > (S6_I2S_DIV_MASK + 1) * 2)
+		return -EINVAL;
+
+	s6_i2s_mod_reg(dev, S6_I2S_INTERFACE_CFG(div_id),
+		       S6_I2S_DIV_MASK, div / 2 - 1);
+	return 0;
+}
+
+static int s6000_i2s_hw_params(struct snd_pcm_substream *substream,
+			       struct snd_pcm_hw_params *params,
+			       struct snd_soc_dai *dai)
+{
+	struct s6000_i2s_dev *dev = dai->private_data;
+	int interf;
+	u32 w = 0;
+
+	if (dev->wide)
+		interf = 0;
+	else {
+		w |= (((params_channels(params) - 2) / 2)
+		      << S6_I2S_CHANNELS_SHIFT) & S6_I2S_CHANNELS_MASK;
+		interf = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+				? dev->channel_out : dev->channel_in;
+	}
+
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		w |= S6_I2S_16BIT | S6_I2S_MEM_16BIT;
+		break;
+	case SNDRV_PCM_FORMAT_S32_LE:
+		w |= S6_I2S_32BIT | S6_I2S_MEM_32BIT;
+		break;
+	default:
+		printk(KERN_WARNING "s6000-i2s: unsupported PCM format %x\n",
+		       params_format(params));
+		return -EINVAL;
+	}
+
+	if (s6_i2s_read_reg(dev, S6_I2S_INTERFACE_CFG(interf))
+	     & S6_I2S_IS_ENABLED) {
+		printk(KERN_ERR "s6000-i2s: interface already enabled\n");
+		return -EBUSY;
+	}
+
+	s6_i2s_mod_reg(dev, S6_I2S_INTERFACE_CFG(interf),
+		       S6_I2S_CHANNELS_MASK|S6_I2S_MEM_MASK|S6_I2S_BITS_MASK,
+		       w);
+
+	return 0;
+}
+
+static int s6000_i2s_dai_probe(struct platform_device *pdev,
+			       struct snd_soc_dai *dai)
+{
+	struct s6000_i2s_dev *dev = dai->private_data;
+	struct s6000_snd_platform_data *pdata = pdev->dev.platform_data;
+
+	if (!pdata)
+		return -EINVAL;
+
+	dev->wide = pdata->wide;
+	dev->channel_in = pdata->channel_in;
+	dev->channel_out = pdata->channel_out;
+	dev->lines_in = pdata->lines_in;
+	dev->lines_out = pdata->lines_out;
+
+	s6_i2s_write_reg(dev, S6_I2S_MODE,
+			 dev->wide ? S6_I2S_WIDE : S6_I2S_DUAL);
+
+	if (dev->wide) {
+		int i;
+
+		if (dev->lines_in + dev->lines_out > S6_I2S_NUM_LINES)
+			return -EINVAL;
+
+		dev->channel_in = 0;
+		dev->channel_out = 1;
+		dai->capture.channels_min = 2 * dev->lines_in;
+		dai->capture.channels_max = dai->capture.channels_min;
+		dai->playback.channels_min = 2 * dev->lines_out;
+		dai->playback.channels_max = dai->playback.channels_min;
+
+		for (i = 0; i < dev->lines_out; i++)
+			s6_i2s_write_reg(dev, S6_I2S_DATA_CFG(i), S6_I2S_OUT);
+
+		for (; i < S6_I2S_NUM_LINES - dev->lines_in; i++)
+			s6_i2s_write_reg(dev, S6_I2S_DATA_CFG(i),
+					 S6_I2S_UNUSED);
+
+		for (; i < S6_I2S_NUM_LINES; i++)
+			s6_i2s_write_reg(dev, S6_I2S_DATA_CFG(i), S6_I2S_IN);
+	} else {
+		unsigned int cfg[2] = {S6_I2S_UNUSED, S6_I2S_UNUSED};
+
+		if (dev->lines_in > 1 || dev->lines_out > 1)
+			return -EINVAL;
+
+		dai->capture.channels_min = 2 * dev->lines_in;
+		dai->capture.channels_max = 8 * dev->lines_in;
+		dai->playback.channels_min = 2 * dev->lines_out;
+		dai->playback.channels_max = 8 * dev->lines_out;
+
+		if (dev->lines_in)
+			cfg[dev->channel_in] = S6_I2S_IN;
+		if (dev->lines_out)
+			cfg[dev->channel_out] = S6_I2S_OUT;
+
+		s6_i2s_write_reg(dev, S6_I2S_DATA_CFG(0), cfg[0]);
+		s6_i2s_write_reg(dev, S6_I2S_DATA_CFG(1), cfg[1]);
+	}
+
+	if (dev->lines_out) {
+		if (dev->lines_in) {
+			if (!dev->dma_params.dma_out)
+				return -ENODEV;
+		} else {
+			dev->dma_params.dma_out = dev->dma_params.dma_in;
+			dev->dma_params.dma_in = 0;
+		}
+	}
+	dev->dma_params.sif_in = dev->sifbase + (dev->channel_in ?
+					S6_I2S_SIF_PORT1 : S6_I2S_SIF_PORT0);
+	dev->dma_params.sif_out = dev->sifbase + (dev->channel_out ?
+					S6_I2S_SIF_PORT1 : S6_I2S_SIF_PORT0);
+	dev->dma_params.same_rate = pdata->same_rate | pdata->wide;
+	return 0;
+}
+
+#define S6000_I2S_RATES	(SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_5512 | \
+			 SNDRV_PCM_RATE_8000_192000)
+#define S6000_I2S_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE)
+
+static struct snd_soc_dai_ops s6000_i2s_dai_ops = {
+	.set_fmt = s6000_i2s_set_dai_fmt,
+	.set_clkdiv = s6000_i2s_set_clkdiv,
+	.hw_params = s6000_i2s_hw_params,
+};
+
+struct snd_soc_dai s6000_i2s_dai = {
+	.name = "s6000-i2s",
+	.id = 0,
+	.probe = s6000_i2s_dai_probe,
+	.playback = {
+		.channels_min = 2,
+		.channels_max = 8,
+		.formats = S6000_I2S_FORMATS,
+		.rates = S6000_I2S_RATES,
+		.rate_min = 0,
+		.rate_max = 1562500,
+	},
+	.capture = {
+		.channels_min = 2,
+		.channels_max = 8,
+		.formats = S6000_I2S_FORMATS,
+		.rates = S6000_I2S_RATES,
+		.rate_min = 0,
+		.rate_max = 1562500,
+	},
+	.ops = &s6000_i2s_dai_ops,
+}
+EXPORT_SYMBOL_GPL(s6000_i2s_dai);
+
+static int __devinit s6000_i2s_probe(struct platform_device *pdev)
+{
+	struct s6000_i2s_dev *dev;
+	struct resource *scbmem, *sifmem, *region, *dma1, *dma2;
+	u8 __iomem *mmio;
+	int ret;
+
+	scbmem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!scbmem) {
+		dev_err(&pdev->dev, "no mem resource?\n");
+		ret = -ENODEV;
+		goto err_release_none;
+	}
+
+	region = request_mem_region(scbmem->start,
+				    scbmem->end - scbmem->start + 1,
+				    pdev->name);
+	if (!region) {
+		dev_err(&pdev->dev, "I2S SCB region already claimed\n");
+		ret = -EBUSY;
+		goto err_release_none;
+	}
+
+	mmio = ioremap(scbmem->start, scbmem->end - scbmem->start + 1);
+	if (!mmio) {
+		dev_err(&pdev->dev, "can't ioremap SCB region\n");
+		ret = -ENOMEM;
+		goto err_release_scb;
+	}
+
+	sifmem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!sifmem) {
+		dev_err(&pdev->dev, "no second mem resource?\n");
+		ret = -ENODEV;
+		goto err_release_map;
+	}
+
+	region = request_mem_region(sifmem->start,
+				    sifmem->end - sifmem->start + 1,
+				    pdev->name);
+	if (!region) {
+		dev_err(&pdev->dev, "I2S SIF region already claimed\n");
+		ret = -EBUSY;
+		goto err_release_map;
+	}
+
+	dma1 = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+	if (!dma1) {
+		dev_err(&pdev->dev, "no dma resource?\n");
+		ret = -ENODEV;
+		goto err_release_sif;
+	}
+
+	region = request_mem_region(dma1->start, dma1->end - dma1->start + 1,
+				    pdev->name);
+	if (!region) {
+		dev_err(&pdev->dev, "I2S DMA region already claimed\n");
+		ret = -EBUSY;
+		goto err_release_sif;
+	}
+
+	dma2 = platform_get_resource(pdev, IORESOURCE_DMA, 1);
+	if (dma2) {
+		region = request_mem_region(dma2->start,
+					    dma2->end - dma2->start + 1,
+					    pdev->name);
+		if (!region) {
+			dev_err(&pdev->dev,
+				"I2S DMA region already claimed\n");
+			ret = -EBUSY;
+			goto err_release_dma1;
+		}
+	}
+
+	dev = kzalloc(sizeof(struct s6000_i2s_dev), GFP_KERNEL);
+	if (!dev) {
+		ret = -ENOMEM;
+		goto err_release_dma2;
+	}
+
+	s6000_i2s_dai.dev = &pdev->dev;
+	s6000_i2s_dai.private_data = dev;
+	s6000_i2s_dai.dma_data = &dev->dma_params;
+
+	dev->sifbase = sifmem->start;
+	dev->scbbase = mmio;
+
+	s6_i2s_write_reg(dev, S6_I2S_INTERRUPT_ENABLE, 0);
+	s6_i2s_write_reg(dev, S6_I2S_INTERRUPT_CLEAR,
+			 S6_I2S_INT_ALIGNMENT |
+			 S6_I2S_INT_UNDERRUN |
+			 S6_I2S_INT_OVERRUN);
+
+	s6000_i2s_stop_channel(dev, 0);
+	s6000_i2s_stop_channel(dev, 1);
+	s6000_i2s_wait_disabled(dev);
+
+	dev->dma_params.check_xrun = s6000_i2s_check_xrun;
+	dev->dma_params.trigger = s6000_i2s_trigger;
+	dev->dma_params.dma_in = dma1->start;
+	dev->dma_params.dma_out = dma2 ? dma2->start : 0;
+	dev->dma_params.irq = platform_get_irq(pdev, 0);
+	if (dev->dma_params.irq < 0) {
+		dev_err(&pdev->dev, "no irq resource?\n");
+		ret = -ENODEV;
+		goto err_release_dev;
+	}
+
+	s6_i2s_write_reg(dev, S6_I2S_INTERRUPT_ENABLE,
+			 S6_I2S_INT_ALIGNMENT |
+			 S6_I2S_INT_UNDERRUN |
+			 S6_I2S_INT_OVERRUN);
+
+	ret = snd_soc_register_dai(&s6000_i2s_dai);
+	if (ret)
+		goto err_release_dev;
+
+	return 0;
+
+err_release_dev:
+	kfree(dev);
+err_release_dma2:
+	if (dma2)
+		release_mem_region(dma2->start, dma2->end - dma2->start + 1);
+err_release_dma1:
+	release_mem_region(dma1->start, dma1->end - dma1->start + 1);
+err_release_sif:
+	release_mem_region(sifmem->start, (sifmem->end - sifmem->start) + 1);
+err_release_map:
+	iounmap(mmio);
+err_release_scb:
+	release_mem_region(scbmem->start, (scbmem->end - scbmem->start) + 1);
+err_release_none:
+	return ret;
+}
+
+static void __devexit s6000_i2s_remove(struct platform_device *pdev)
+{
+	struct s6000_i2s_dev *dev = s6000_i2s_dai.private_data;
+	struct resource *region;
+	void __iomem *mmio = dev->scbbase;
+
+	snd_soc_unregister_dai(&s6000_i2s_dai);
+
+	s6000_i2s_stop_channel(dev, 0);
+	s6000_i2s_stop_channel(dev, 1);
+
+	s6_i2s_write_reg(dev, S6_I2S_INTERRUPT_ENABLE, 0);
+	s6000_i2s_dai.private_data = 0;
+	kfree(dev);
+
+	region = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+	release_mem_region(region->start, region->end - region->start + 1);
+
+	region = platform_get_resource(pdev, IORESOURCE_DMA, 1);
+	if (region)
+		release_mem_region(region->start,
+				   region->end - region->start + 1);
+
+	region = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(region->start, (region->end - region->start) + 1);
+
+	iounmap(mmio);
+	region = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	release_mem_region(region->start, (region->end - region->start) + 1);
+}
+
+static struct platform_driver s6000_i2s_driver = {
+	.probe  = s6000_i2s_probe,
+	.remove = __devexit_p(s6000_i2s_remove),
+	.driver = {
+		.name   = "s6000-i2s",
+		.owner  = THIS_MODULE,
+	},
+};
+
+static int __init s6000_i2s_init(void)
+{
+	return platform_driver_register(&s6000_i2s_driver);
+}
+module_init(s6000_i2s_init);
+
+static void __exit s6000_i2s_exit(void)
+{
+	platform_driver_unregister(&s6000_i2s_driver);
+}
+module_exit(s6000_i2s_exit);
+
+MODULE_AUTHOR("Daniel Gloeckner");
+MODULE_DESCRIPTION("Stretch s6000 family I2S SoC Interface");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/s6000/s6000-i2s.h b/sound/soc/s6000/s6000-i2s.h
new file mode 100644
index 0000000..2375fdf
--- /dev/null
+++ b/sound/soc/s6000/s6000-i2s.h
@@ -0,0 +1,25 @@
+/*
+ * ALSA SoC I2S Audio Layer for the Stretch s6000 family
+ *
+ * Author:      Daniel Gloeckner, <dg@emlix.com>
+ * Copyright:   (C) 2009 emlix GmbH <info@emlix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _S6000_I2S_H
+#define _S6000_I2S_H
+
+extern struct snd_soc_dai s6000_i2s_dai;
+
+struct s6000_snd_platform_data {
+	int lines_in;
+	int lines_out;
+	int channel_in;
+	int channel_out;
+	int wide;
+	int same_rate;
+};
+#endif
diff --git a/sound/soc/s6000/s6000-pcm.c b/sound/soc/s6000/s6000-pcm.c
new file mode 100644
index 0000000..83b8028
--- /dev/null
+++ b/sound/soc/s6000/s6000-pcm.c
@@ -0,0 +1,497 @@
+/*
+ * ALSA PCM interface for the Stetch s6000 family
+ *
+ * Author:      Daniel Gloeckner, <dg@emlix.com>
+ * Copyright:   (C) 2009 emlix GmbH <info@emlix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+
+#include <asm/dma.h>
+#include <variant/dmac.h>
+
+#include "s6000-pcm.h"
+
+#define S6_PCM_PREALLOCATE_SIZE (96 * 1024)
+#define S6_PCM_PREALLOCATE_MAX  (2048 * 1024)
+
+static struct snd_pcm_hardware s6000_pcm_hardware = {
+	.info = (SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER |
+		 SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID |
+		 SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_JOINT_DUPLEX),
+	.formats = (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE),
+	.rates = (SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_5512 | \
+		  SNDRV_PCM_RATE_8000_192000),
+	.rate_min = 0,
+	.rate_max = 1562500,
+	.channels_min = 2,
+	.channels_max = 8,
+	.buffer_bytes_max = 0x7ffffff0,
+	.period_bytes_min = 16,
+	.period_bytes_max = 0xfffff0,
+	.periods_min = 2,
+	.periods_max = 1024, /* no limit */
+	.fifo_size = 0,
+};
+
+struct s6000_runtime_data {
+	spinlock_t lock;
+	int period;		/* current DMA period */
+};
+
+static void s6000_pcm_enqueue_dma(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct s6000_runtime_data *prtd = runtime->private_data;
+	struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+	struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data;
+	int channel;
+	unsigned int period_size;
+	unsigned int dma_offset;
+	dma_addr_t dma_pos;
+	dma_addr_t src, dst;
+
+	period_size = snd_pcm_lib_period_bytes(substream);
+	dma_offset = prtd->period * period_size;
+	dma_pos = runtime->dma_addr + dma_offset;
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+		src = dma_pos;
+		dst = par->sif_out;
+		channel = par->dma_out;
+	} else {
+		src = par->sif_in;
+		dst = dma_pos;
+		channel = par->dma_in;
+	}
+
+	if (!s6dmac_channel_enabled(DMA_MASK_DMAC(channel),
+				    DMA_INDEX_CHNL(channel)))
+		return;
+
+	if (s6dmac_fifo_full(DMA_MASK_DMAC(channel), DMA_INDEX_CHNL(channel))) {
+		printk(KERN_ERR "s6000-pcm: fifo full\n");
+		return;
+	}
+
+	BUG_ON(period_size & 15);
+	s6dmac_put_fifo(DMA_MASK_DMAC(channel), DMA_INDEX_CHNL(channel),
+			src, dst, period_size);
+
+	prtd->period++;
+	if (unlikely(prtd->period >= runtime->periods))
+		prtd->period = 0;
+}
+
+static irqreturn_t s6000_pcm_irq(int irq, void *data)
+{
+	struct snd_pcm *pcm = data;
+	struct snd_soc_pcm_runtime *runtime = pcm->private_data;
+	struct s6000_pcm_dma_params *params = runtime->dai->cpu_dai->dma_data;
+	struct s6000_runtime_data *prtd;
+	unsigned int has_xrun;
+	int i, ret = IRQ_NONE;
+	u32 channel[2] = {
+		[SNDRV_PCM_STREAM_PLAYBACK] = params->dma_out,
+		[SNDRV_PCM_STREAM_CAPTURE] = params->dma_in
+	};
+
+	has_xrun = params->check_xrun(runtime->dai->cpu_dai);
+
+	for (i = 0; i < ARRAY_SIZE(channel); ++i) {
+		struct snd_pcm_substream *substream = pcm->streams[i].substream;
+		unsigned int pending;
+
+		if (!channel[i])
+			continue;
+
+		if (unlikely(has_xrun & (1 << i)) &&
+		    substream->runtime &&
+		    snd_pcm_running(substream)) {
+			dev_dbg(pcm->dev, "xrun\n");
+			snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN);
+			ret = IRQ_HANDLED;
+		}
+
+		pending = s6dmac_int_sources(DMA_MASK_DMAC(channel[i]),
+					     DMA_INDEX_CHNL(channel[i]));
+
+		if (pending & 1) {
+			ret = IRQ_HANDLED;
+			if (likely(substream->runtime &&
+				   snd_pcm_running(substream))) {
+				snd_pcm_period_elapsed(substream);
+				dev_dbg(pcm->dev, "period elapsed %x %x\n",
+				       s6dmac_cur_src(DMA_MASK_DMAC(channel[i]),
+						   DMA_INDEX_CHNL(channel[i])),
+				       s6dmac_cur_dst(DMA_MASK_DMAC(channel[i]),
+						   DMA_INDEX_CHNL(channel[i])));
+				prtd = substream->runtime->private_data;
+				spin_lock(&prtd->lock);
+				s6000_pcm_enqueue_dma(substream);
+				spin_unlock(&prtd->lock);
+			}
+		}
+
+		if (unlikely(pending & ~7)) {
+			if (pending & (1 << 3))
+				printk(KERN_WARNING
+				       "s6000-pcm: DMA %x Underflow\n",
+				       channel[i]);
+			if (pending & (1 << 4))
+				printk(KERN_WARNING
+				       "s6000-pcm: DMA %x Overflow\n",
+				       channel[i]);
+			if (pending & 0x1e0)
+				printk(KERN_WARNING
+				       "s6000-pcm: DMA %x Master Error "
+				       "(mask %x)\n",
+				       channel[i], pending >> 5);
+
+		}
+	}
+
+	return ret;
+}
+
+static int s6000_pcm_start(struct snd_pcm_substream *substream)
+{
+	struct s6000_runtime_data *prtd = substream->runtime->private_data;
+	struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+	struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data;
+	unsigned long flags;
+	int srcinc;
+	u32 dma;
+
+	spin_lock_irqsave(&prtd->lock, flags);
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+		srcinc = 1;
+		dma = par->dma_out;
+	} else {
+		srcinc = 0;
+		dma = par->dma_in;
+	}
+	s6dmac_enable_chan(DMA_MASK_DMAC(dma), DMA_INDEX_CHNL(dma),
+			   1 /* priority 1 (0 is max) */,
+			   0 /* peripheral requests w/o xfer length mode */,
+			   srcinc /* source address increment */,
+			   srcinc^1 /* destination address increment */,
+			   0 /* chunksize 0 (skip impossible on this dma) */,
+			   0 /* source skip after chunk (impossible) */,
+			   0 /* destination skip after chunk (impossible) */,
+			   4 /* 16 byte burst size */,
+			   -1 /* don't conserve bandwidth */,
+			   0 /* low watermark irq descriptor theshold */,
+			   0 /* disable hardware timestamps */,
+			   1 /* enable channel */);
+
+	s6000_pcm_enqueue_dma(substream);
+	s6000_pcm_enqueue_dma(substream);
+
+	spin_unlock_irqrestore(&prtd->lock, flags);
+
+	return 0;
+}
+
+static int s6000_pcm_stop(struct snd_pcm_substream *substream)
+{
+	struct s6000_runtime_data *prtd = substream->runtime->private_data;
+	struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+	struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data;
+	unsigned long flags;
+	u32 channel;
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		channel = par->dma_out;
+	else
+		channel = par->dma_in;
+
+	s6dmac_set_terminal_count(DMA_MASK_DMAC(channel),
+				  DMA_INDEX_CHNL(channel), 0);
+
+	spin_lock_irqsave(&prtd->lock, flags);
+
+	s6dmac_disable_chan(DMA_MASK_DMAC(channel), DMA_INDEX_CHNL(channel));
+
+	spin_unlock_irqrestore(&prtd->lock, flags);
+
+	return 0;
+}
+
+static int s6000_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
+{
+	struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+	struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data;
+	int ret;
+
+	ret = par->trigger(substream, cmd, 0);
+	if (ret < 0)
+		return ret;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+	case SNDRV_PCM_TRIGGER_RESUME:
+	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+		ret = s6000_pcm_start(substream);
+		break;
+	case SNDRV_PCM_TRIGGER_STOP:
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+		ret = s6000_pcm_stop(substream);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	if (ret < 0)
+		return ret;
+
+	return par->trigger(substream, cmd, 1);
+}
+
+static int s6000_pcm_prepare(struct snd_pcm_substream *substream)
+{
+	struct s6000_runtime_data *prtd = substream->runtime->private_data;
+
+	prtd->period = 0;
+
+	return 0;
+}
+
+static snd_pcm_uframes_t s6000_pcm_pointer(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+	struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct s6000_runtime_data *prtd = runtime->private_data;
+	unsigned long flags;
+	unsigned int offset;
+	dma_addr_t count;
+
+	spin_lock_irqsave(&prtd->lock, flags);
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		count = s6dmac_cur_src(DMA_MASK_DMAC(par->dma_out),
+				       DMA_INDEX_CHNL(par->dma_out));
+	else
+		count = s6dmac_cur_dst(DMA_MASK_DMAC(par->dma_in),
+				       DMA_INDEX_CHNL(par->dma_in));
+
+	count -= runtime->dma_addr;
+
+	spin_unlock_irqrestore(&prtd->lock, flags);
+
+	offset = bytes_to_frames(runtime, count);
+	if (unlikely(offset >= runtime->buffer_size))
+		offset = 0;
+
+	return offset;
+}
+
+static int s6000_pcm_open(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+	struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct s6000_runtime_data *prtd;
+	int ret;
+
+	snd_soc_set_runtime_hwparams(substream, &s6000_pcm_hardware);
+
+	ret = snd_pcm_hw_constraint_step(runtime, 0,
+					 SNDRV_PCM_HW_PARAM_PERIOD_BYTES, 16);
+	if (ret < 0)
+		return ret;
+	ret = snd_pcm_hw_constraint_step(runtime, 0,
+					 SNDRV_PCM_HW_PARAM_BUFFER_BYTES, 16);
+	if (ret < 0)
+		return ret;
+	ret = snd_pcm_hw_constraint_integer(runtime,
+					    SNDRV_PCM_HW_PARAM_PERIODS);
+	if (ret < 0)
+		return ret;
+
+	if (par->same_rate) {
+		int rate;
+		spin_lock(&par->lock); /* needed? */
+		rate = par->rate;
+		spin_unlock(&par->lock);
+		if (rate != -1) {
+			ret = snd_pcm_hw_constraint_minmax(runtime,
+							SNDRV_PCM_HW_PARAM_RATE,
+							rate, rate);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	prtd = kzalloc(sizeof(struct s6000_runtime_data), GFP_KERNEL);
+	if (prtd == NULL)
+		return -ENOMEM;
+
+	spin_lock_init(&prtd->lock);
+
+	runtime->private_data = prtd;
+
+	return 0;
+}
+
+static int s6000_pcm_close(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct s6000_runtime_data *prtd = runtime->private_data;
+
+	kfree(prtd);
+
+	return 0;
+}
+
+static int s6000_pcm_hw_params(struct snd_pcm_substream *substream,
+				 struct snd_pcm_hw_params *hw_params)
+{
+	struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+	struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data;
+	int ret;
+	ret = snd_pcm_lib_malloc_pages(substream,
+				       params_buffer_bytes(hw_params));
+	if (ret < 0) {
+		printk(KERN_WARNING "s6000-pcm: allocation of memory failed\n");
+		return ret;
+	}
+
+	if (par->same_rate) {
+		spin_lock(&par->lock);
+		if (par->rate == -1 ||
+		    !(par->in_use & ~(1 << substream->stream))) {
+			par->rate = params_rate(hw_params);
+			par->in_use |= 1 << substream->stream;
+		} else if (params_rate(hw_params) != par->rate) {
+			snd_pcm_lib_free_pages(substream);
+			par->in_use &= ~(1 << substream->stream);
+			ret = -EBUSY;
+		}
+		spin_unlock(&par->lock);
+	}
+	return ret;
+}
+
+static int s6000_pcm_hw_free(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+	struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data;
+
+	spin_lock(&par->lock);
+	par->in_use &= ~(1 << substream->stream);
+	if (!par->in_use)
+		par->rate = -1;
+	spin_unlock(&par->lock);
+
+	return snd_pcm_lib_free_pages(substream);
+}
+
+static struct snd_pcm_ops s6000_pcm_ops = {
+	.open = 	s6000_pcm_open,
+	.close = 	s6000_pcm_close,
+	.ioctl = 	snd_pcm_lib_ioctl,
+	.hw_params = 	s6000_pcm_hw_params,
+	.hw_free = 	s6000_pcm_hw_free,
+	.trigger =	s6000_pcm_trigger,
+	.prepare = 	s6000_pcm_prepare,
+	.pointer = 	s6000_pcm_pointer,
+};
+
+static void s6000_pcm_free(struct snd_pcm *pcm)
+{
+	struct snd_soc_pcm_runtime *runtime = pcm->private_data;
+	struct s6000_pcm_dma_params *params = runtime->dai->cpu_dai->dma_data;
+
+	free_irq(params->irq, pcm);
+	snd_pcm_lib_preallocate_free_for_all(pcm);
+}
+
+static u64 s6000_pcm_dmamask = DMA_32BIT_MASK;
+
+static int s6000_pcm_new(struct snd_card *card,
+			 struct snd_soc_dai *dai, struct snd_pcm *pcm)
+{
+	struct snd_soc_pcm_runtime *runtime = pcm->private_data;
+	struct s6000_pcm_dma_params *params = runtime->dai->cpu_dai->dma_data;
+	int res;
+
+	if (!card->dev->dma_mask)
+		card->dev->dma_mask = &s6000_pcm_dmamask;
+	if (!card->dev->coherent_dma_mask)
+		card->dev->coherent_dma_mask = DMA_32BIT_MASK;
+
+	if (params->dma_in) {
+		s6dmac_disable_chan(DMA_MASK_DMAC(params->dma_in),
+				    DMA_INDEX_CHNL(params->dma_in));
+		s6dmac_int_sources(DMA_MASK_DMAC(params->dma_in),
+				   DMA_INDEX_CHNL(params->dma_in));
+	}
+
+	if (params->dma_out) {
+		s6dmac_disable_chan(DMA_MASK_DMAC(params->dma_out),
+				    DMA_INDEX_CHNL(params->dma_out));
+		s6dmac_int_sources(DMA_MASK_DMAC(params->dma_out),
+				   DMA_INDEX_CHNL(params->dma_out));
+	}
+
+	res = request_irq(params->irq, s6000_pcm_irq, IRQF_SHARED,
+			  s6000_soc_platform.name, pcm);
+	if (res) {
+		printk(KERN_ERR "s6000-pcm couldn't get IRQ\n");
+		return res;
+	}
+
+	res = snd_pcm_lib_preallocate_pages_for_all(pcm,
+						    SNDRV_DMA_TYPE_DEV,
+						    card->dev,
+						    S6_PCM_PREALLOCATE_SIZE,
+						    S6_PCM_PREALLOCATE_MAX);
+	if (res)
+		printk(KERN_WARNING "s6000-pcm: preallocation failed\n");
+
+	spin_lock_init(&params->lock);
+	params->in_use = 0;
+	params->rate = -1;
+	return 0;
+}
+
+struct snd_soc_platform s6000_soc_platform = {
+	.name = 	"s6000-audio",
+	.pcm_ops = 	&s6000_pcm_ops,
+	.pcm_new = 	s6000_pcm_new,
+	.pcm_free = 	s6000_pcm_free,
+};
+EXPORT_SYMBOL_GPL(s6000_soc_platform);
+
+static int __init s6000_pcm_init(void)
+{
+	return snd_soc_register_platform(&s6000_soc_platform);
+}
+module_init(s6000_pcm_init);
+
+static void __exit s6000_pcm_exit(void)
+{
+	snd_soc_unregister_platform(&s6000_soc_platform);
+}
+module_exit(s6000_pcm_exit);
+
+MODULE_AUTHOR("Daniel Gloeckner");
+MODULE_DESCRIPTION("Stretch s6000 family PCM DMA module");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/s6000/s6000-pcm.h b/sound/soc/s6000/s6000-pcm.h
new file mode 100644
index 0000000..96f23f6
--- /dev/null
+++ b/sound/soc/s6000/s6000-pcm.h
@@ -0,0 +1,35 @@
+/*
+ * ALSA PCM interface for the Stretch s6000 family
+ *
+ * Author:      Daniel Gloeckner, <dg@emlix.com>
+ * Copyright:   (C) 2009 emlix GmbH <info@emlix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _S6000_PCM_H
+#define _S6000_PCM_H
+
+struct snd_soc_dai;
+struct snd_pcm_substream;
+
+struct s6000_pcm_dma_params {
+	unsigned int (*check_xrun)(struct snd_soc_dai *cpu_dai);
+	int (*trigger)(struct snd_pcm_substream *substream, int cmd, int after);
+	dma_addr_t sif_in;
+	dma_addr_t sif_out;
+	u32 dma_in;
+	u32 dma_out;
+	int irq;
+	int same_rate;
+
+	spinlock_t lock;
+	int in_use;
+	int rate;
+};
+
+extern struct snd_soc_platform s6000_soc_platform;
+
+#endif
diff --git a/sound/soc/s6000/s6105-ipcam.c b/sound/soc/s6000/s6105-ipcam.c
new file mode 100644
index 0000000..b5f95f9
--- /dev/null
+++ b/sound/soc/s6000/s6105-ipcam.c
@@ -0,0 +1,244 @@
+/*
+ * ASoC driver for Stretch s6105 IP camera platform
+ *
+ * Author:      Daniel Gloeckner, <dg@emlix.com>
+ * Copyright:   (C) 2009 emlix GmbH <info@emlix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/timer.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+
+#include <variant/dmac.h>
+
+#include "../codecs/tlv320aic3x.h"
+#include "s6000-pcm.h"
+#include "s6000-i2s.h"
+
+#define S6105_CAM_CODEC_CLOCK 12288000
+
+static int s6105_hw_params(struct snd_pcm_substream *substream,
+			   struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_dai *codec_dai = rtd->dai->codec_dai;
+	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
+	int ret = 0;
+
+	/* set codec DAI configuration */
+	ret = snd_soc_dai_set_fmt(codec_dai, SND_SOC_DAIFMT_I2S |
+					     SND_SOC_DAIFMT_CBM_CFM);
+	if (ret < 0)
+		return ret;
+
+	/* set cpu DAI configuration */
+	ret = snd_soc_dai_set_fmt(cpu_dai, SND_SOC_DAIFMT_CBM_CFM |
+					   SND_SOC_DAIFMT_NB_NF);
+	if (ret < 0)
+		return ret;
+
+	/* set the codec system clock */
+	ret = snd_soc_dai_set_sysclk(codec_dai, 0, S6105_CAM_CODEC_CLOCK,
+					    SND_SOC_CLOCK_OUT);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static struct snd_soc_ops s6105_ops = {
+	.hw_params = s6105_hw_params,
+};
+
+/* s6105 machine dapm widgets */
+static const struct snd_soc_dapm_widget aic3x_dapm_widgets[] = {
+	SND_SOC_DAPM_LINE("Audio Out Differential", NULL),
+	SND_SOC_DAPM_LINE("Audio Out Stereo", NULL),
+	SND_SOC_DAPM_LINE("Audio In", NULL),
+};
+
+/* s6105 machine audio_mapnections to the codec pins */
+static const struct snd_soc_dapm_route audio_map[] = {
+	/* Audio Out connected to HPLOUT, HPLCOM, HPROUT */
+	{"Audio Out Differential", NULL, "HPLOUT"},
+	{"Audio Out Differential", NULL, "HPLCOM"},
+	{"Audio Out Stereo", NULL, "HPLOUT"},
+	{"Audio Out Stereo", NULL, "HPROUT"},
+
+	/* Audio In connected to LINE1L, LINE1R */
+	{"LINE1L", NULL, "Audio In"},
+	{"LINE1R", NULL, "Audio In"},
+};
+
+static int output_type_info(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->count = 1;
+	uinfo->value.enumerated.items = 2;
+	if (uinfo->value.enumerated.item) {
+		uinfo->value.enumerated.item = 1;
+		strcpy(uinfo->value.enumerated.name, "HPLOUT/HPROUT");
+	} else {
+		strcpy(uinfo->value.enumerated.name, "HPLOUT/HPLCOM");
+	}
+	return 0;
+}
+
+static int output_type_get(struct snd_kcontrol *kcontrol,
+			   struct snd_ctl_elem_value *ucontrol)
+{
+	ucontrol->value.enumerated.item[0] = kcontrol->private_value;
+	return 0;
+}
+
+static int output_type_put(struct snd_kcontrol *kcontrol,
+			   struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = kcontrol->private_data;
+	unsigned int val = (ucontrol->value.enumerated.item[0] != 0);
+	char *differential = "Audio Out Differential";
+	char *stereo = "Audio Out Stereo";
+
+	if (kcontrol->private_value == val)
+		return 0;
+	kcontrol->private_value = val;
+	snd_soc_dapm_disable_pin(codec, val ? differential : stereo);
+	snd_soc_dapm_sync(codec);
+	snd_soc_dapm_enable_pin(codec, val ? stereo : differential);
+	snd_soc_dapm_sync(codec);
+
+	return 1;
+}
+
+static const struct snd_kcontrol_new audio_out_mux = {
+	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.name = "Master Output Mux",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.info = output_type_info,
+	.get = output_type_get,
+	.put = output_type_put,
+	.private_value = 1 /* default to stereo */
+};
+
+/* Logic for a aic3x as connected on the s6105 ip camera ref design */
+static int s6105_aic3x_init(struct snd_soc_codec *codec)
+{
+	/* Add s6105 specific widgets */
+	snd_soc_dapm_new_controls(codec, aic3x_dapm_widgets,
+				  ARRAY_SIZE(aic3x_dapm_widgets));
+
+	/* Set up s6105 specific audio path audio_map */
+	snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map));
+
+	/* not present */
+	snd_soc_dapm_nc_pin(codec, "MONO_LOUT");
+	snd_soc_dapm_nc_pin(codec, "LINE2L");
+	snd_soc_dapm_nc_pin(codec, "LINE2R");
+
+	/* not connected */
+	snd_soc_dapm_nc_pin(codec, "MIC3L"); /* LINE2L on this chip */
+	snd_soc_dapm_nc_pin(codec, "MIC3R"); /* LINE2R on this chip */
+	snd_soc_dapm_nc_pin(codec, "LLOUT");
+	snd_soc_dapm_nc_pin(codec, "RLOUT");
+	snd_soc_dapm_nc_pin(codec, "HPRCOM");
+
+	/* always connected */
+	snd_soc_dapm_enable_pin(codec, "Audio In");
+
+	/* must correspond to audio_out_mux.private_value initializer */
+	snd_soc_dapm_disable_pin(codec, "Audio Out Differential");
+	snd_soc_dapm_sync(codec);
+	snd_soc_dapm_enable_pin(codec, "Audio Out Stereo");
+
+	snd_soc_dapm_sync(codec);
+
+	snd_ctl_add(codec->card, snd_ctl_new1(&audio_out_mux, codec));
+
+	return 0;
+}
+
+/* s6105 digital audio interface glue - connects codec <--> CPU */
+static struct snd_soc_dai_link s6105_dai = {
+	.name = "TLV320AIC31",
+	.stream_name = "AIC31",
+	.cpu_dai = &s6000_i2s_dai,
+	.codec_dai = &aic3x_dai,
+	.init = s6105_aic3x_init,
+	.ops = &s6105_ops,
+};
+
+/* s6105 audio machine driver */
+static struct snd_soc_card snd_soc_card_s6105 = {
+	.name = "Stretch IP Camera",
+	.platform = &s6000_soc_platform,
+	.dai_link = &s6105_dai,
+	.num_links = 1,
+};
+
+/* s6105 audio private data */
+static struct aic3x_setup_data s6105_aic3x_setup = {
+	.i2c_bus = 0,
+	.i2c_address = 0x18,
+};
+
+/* s6105 audio subsystem */
+static struct snd_soc_device s6105_snd_devdata = {
+	.card = &snd_soc_card_s6105,
+	.codec_dev = &soc_codec_dev_aic3x,
+	.codec_data = &s6105_aic3x_setup,
+};
+
+static struct s6000_snd_platform_data __initdata s6105_snd_data = {
+	.wide		= 0,
+	.channel_in	= 0,
+	.channel_out	= 1,
+	.lines_in	= 1,
+	.lines_out	= 1,
+	.same_rate	= 1,
+};
+
+static struct platform_device *s6105_snd_device;
+
+static int __init s6105_init(void)
+{
+	int ret;
+
+	s6105_snd_device = platform_device_alloc("soc-audio", -1);
+	if (!s6105_snd_device)
+		return -ENOMEM;
+
+	platform_set_drvdata(s6105_snd_device, &s6105_snd_devdata);
+	s6105_snd_devdata.dev = &s6105_snd_device->dev;
+	platform_device_add_data(s6105_snd_device, &s6105_snd_data,
+				 sizeof(s6105_snd_data));
+
+	ret = platform_device_add(s6105_snd_device);
+	if (ret)
+		platform_device_put(s6105_snd_device);
+
+	return ret;
+}
+
+static void __exit s6105_exit(void)
+{
+	platform_device_unregister(s6105_snd_device);
+}
+
+module_init(s6105_init);
+module_exit(s6105_exit);
+
+MODULE_AUTHOR("Daniel Gloeckner");
+MODULE_DESCRIPTION("Stretch s6105 IP camera ASoC driver");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/sh/ssi.c b/sound/soc/sh/ssi.c
index 56fa087..b378096 100644
--- a/sound/soc/sh/ssi.c
+++ b/sound/soc/sh/ssi.c
@@ -145,7 +145,7 @@
 	recv = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ? 0 : 1;
 
 	pr_debug("ssi_hw_params() enter\nssicr was    %08lx\n", ssicr);
-	pr_debug("bits: %d channels: %d\n", bits, channels);
+	pr_debug("bits: %u channels: %u\n", bits, channels);
 
 	ssicr &= ~(CR_TRMD | CR_CHNL_MASK | CR_DWL_MASK | CR_PDTA |
 		   CR_SWL_MASK);
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 1cd149b..3f44150 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -113,6 +113,35 @@
 }
 #endif
 
+static int soc_pcm_apply_symmetry(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_card *card = socdev->card;
+	struct snd_soc_dai_link *machine = rtd->dai;
+	struct snd_soc_dai *cpu_dai = machine->cpu_dai;
+	struct snd_soc_dai *codec_dai = machine->codec_dai;
+	int ret;
+
+	if (codec_dai->symmetric_rates || cpu_dai->symmetric_rates ||
+	    machine->symmetric_rates) {
+		dev_dbg(card->dev, "Symmetry forces %dHz rate\n", 
+			machine->rate);
+
+		ret = snd_pcm_hw_constraint_minmax(substream->runtime,
+						   SNDRV_PCM_HW_PARAM_RATE,
+						   machine->rate,
+						   machine->rate);
+		if (ret < 0) {
+			dev_err(card->dev,
+				"Unable to apply rate symmetry constraint: %d\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Called by ALSA when a PCM substream is opened, the runtime->hw record is
  * then initialized and any private data can be allocated. This also calls
@@ -221,6 +250,13 @@
 		goto machine_err;
 	}
 
+	/* Symmetry only applies if we've already got an active stream. */
+	if (cpu_dai->active || codec_dai->active) {
+		ret = soc_pcm_apply_symmetry(substream);
+		if (ret != 0)
+			goto machine_err;
+	}
+
 	pr_debug("asoc: %s <-> %s info:\n", codec_dai->name, cpu_dai->name);
 	pr_debug("asoc: rate mask 0x%x\n", runtime->hw.rates);
 	pr_debug("asoc: min ch %d max ch %d\n", runtime->hw.channels_min,
@@ -263,7 +299,6 @@
 {
 	struct snd_soc_card *card = container_of(work, struct snd_soc_card,
 						 delayed_work.work);
-	struct snd_soc_device *socdev = card->socdev;
 	struct snd_soc_codec *codec = card->codec;
 	struct snd_soc_dai *codec_dai;
 	int i;
@@ -279,27 +314,10 @@
 
 		/* are we waiting on this codec DAI stream */
 		if (codec_dai->pop_wait == 1) {
-
-			/* Reduce power if no longer active */
-			if (codec->active == 0) {
-				pr_debug("pop wq D1 %s %s\n", codec->name,
-					 codec_dai->playback.stream_name);
-				snd_soc_dapm_set_bias_level(socdev,
-					SND_SOC_BIAS_PREPARE);
-			}
-
 			codec_dai->pop_wait = 0;
 			snd_soc_dapm_stream_event(codec,
 				codec_dai->playback.stream_name,
 				SND_SOC_DAPM_STREAM_STOP);
-
-			/* Fall into standby if no longer active */
-			if (codec->active == 0) {
-				pr_debug("pop wq D3 %s %s\n", codec->name,
-					 codec_dai->playback.stream_name);
-				snd_soc_dapm_set_bias_level(socdev,
-					SND_SOC_BIAS_STANDBY);
-			}
 		}
 	}
 	mutex_unlock(&pcm_mutex);
@@ -363,10 +381,6 @@
 		snd_soc_dapm_stream_event(codec,
 			codec_dai->capture.stream_name,
 			SND_SOC_DAPM_STREAM_STOP);
-
-		if (codec->active == 0 && codec_dai->pop_wait == 0)
-			snd_soc_dapm_set_bias_level(socdev,
-						SND_SOC_BIAS_STANDBY);
 	}
 
 	mutex_unlock(&pcm_mutex);
@@ -431,36 +445,16 @@
 		cancel_delayed_work(&card->delayed_work);
 	}
 
-	/* do we need to power up codec */
-	if (codec->bias_level != SND_SOC_BIAS_ON) {
-		snd_soc_dapm_set_bias_level(socdev,
-					    SND_SOC_BIAS_PREPARE);
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		snd_soc_dapm_stream_event(codec,
+					  codec_dai->playback.stream_name,
+					  SND_SOC_DAPM_STREAM_START);
+	else
+		snd_soc_dapm_stream_event(codec,
+					  codec_dai->capture.stream_name,
+					  SND_SOC_DAPM_STREAM_START);
 
-		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-			snd_soc_dapm_stream_event(codec,
-					codec_dai->playback.stream_name,
-					SND_SOC_DAPM_STREAM_START);
-		else
-			snd_soc_dapm_stream_event(codec,
-					codec_dai->capture.stream_name,
-					SND_SOC_DAPM_STREAM_START);
-
-		snd_soc_dapm_set_bias_level(socdev, SND_SOC_BIAS_ON);
-		snd_soc_dai_digital_mute(codec_dai, 0);
-
-	} else {
-		/* codec already powered - power on widgets */
-		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-			snd_soc_dapm_stream_event(codec,
-					codec_dai->playback.stream_name,
-					SND_SOC_DAPM_STREAM_START);
-		else
-			snd_soc_dapm_stream_event(codec,
-					codec_dai->capture.stream_name,
-					SND_SOC_DAPM_STREAM_START);
-
-		snd_soc_dai_digital_mute(codec_dai, 0);
-	}
+	snd_soc_dai_digital_mute(codec_dai, 0);
 
 out:
 	mutex_unlock(&pcm_mutex);
@@ -521,6 +515,8 @@
 		}
 	}
 
+	machine->rate = params_rate(params);
+
 out:
 	mutex_unlock(&pcm_mutex);
 	return ret;
@@ -632,6 +628,12 @@
 	struct snd_soc_codec *codec = card->codec;
 	int i;
 
+	/* If the initialization of this soc device failed, there is no codec
+	 * associated with it. Just bail out in this case.
+	 */
+	if (!codec)
+		return 0;
+
 	/* Due to the resume being scheduled into a workqueue we could
 	* suspend before that's finished - wait for it to complete.
 	 */
@@ -1334,6 +1336,7 @@
 		return ret;
 	}
 
+	codec->socdev = socdev;
 	codec->card->dev = socdev->dev;
 	codec->card->private_data = codec;
 	strncpy(codec->card->driver, codec->name, sizeof(codec->card->driver));
@@ -1744,7 +1747,7 @@
 {
 	int max = kcontrol->private_value;
 
-	if (max == 1)
+	if (max == 1 && !strstr(kcontrol->id.name, " Volume"))
 		uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
 	else
 		uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
@@ -1774,7 +1777,7 @@
 	unsigned int shift = mc->shift;
 	unsigned int rshift = mc->rshift;
 
-	if (max == 1)
+	if (max == 1 && !strstr(kcontrol->id.name, " Volume"))
 		uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
 	else
 		uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
@@ -1881,7 +1884,7 @@
 		(struct soc_mixer_control *)kcontrol->private_value;
 	int max = mc->max;
 
-	if (max == 1)
+	if (max == 1 && !strstr(kcontrol->id.name, " Volume"))
 		uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
 	else
 		uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
@@ -2065,7 +2068,7 @@
 int snd_soc_dai_set_sysclk(struct snd_soc_dai *dai, int clk_id,
 	unsigned int freq, int dir)
 {
-	if (dai->ops->set_sysclk)
+	if (dai->ops && dai->ops->set_sysclk)
 		return dai->ops->set_sysclk(dai, clk_id, freq, dir);
 	else
 		return -EINVAL;
@@ -2085,7 +2088,7 @@
 int snd_soc_dai_set_clkdiv(struct snd_soc_dai *dai,
 	int div_id, int div)
 {
-	if (dai->ops->set_clkdiv)
+	if (dai->ops && dai->ops->set_clkdiv)
 		return dai->ops->set_clkdiv(dai, div_id, div);
 	else
 		return -EINVAL;
@@ -2104,7 +2107,7 @@
 int snd_soc_dai_set_pll(struct snd_soc_dai *dai,
 	int pll_id, unsigned int freq_in, unsigned int freq_out)
 {
-	if (dai->ops->set_pll)
+	if (dai->ops && dai->ops->set_pll)
 		return dai->ops->set_pll(dai, pll_id, freq_in, freq_out);
 	else
 		return -EINVAL;
@@ -2120,7 +2123,7 @@
  */
 int snd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
 {
-	if (dai->ops->set_fmt)
+	if (dai->ops && dai->ops->set_fmt)
 		return dai->ops->set_fmt(dai, fmt);
 	else
 		return -EINVAL;
@@ -2139,7 +2142,7 @@
 int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai,
 	unsigned int mask, int slots)
 {
-	if (dai->ops->set_sysclk)
+	if (dai->ops && dai->ops->set_tdm_slot)
 		return dai->ops->set_tdm_slot(dai, mask, slots);
 	else
 		return -EINVAL;
@@ -2155,7 +2158,7 @@
  */
 int snd_soc_dai_set_tristate(struct snd_soc_dai *dai, int tristate)
 {
-	if (dai->ops->set_sysclk)
+	if (dai->ops && dai->ops->set_tristate)
 		return dai->ops->set_tristate(dai, tristate);
 	else
 		return -EINVAL;
@@ -2171,7 +2174,7 @@
  */
 int snd_soc_dai_digital_mute(struct snd_soc_dai *dai, int mute)
 {
-	if (dai->ops->digital_mute)
+	if (dai->ops && dai->ops->digital_mute)
 		return dai->ops->digital_mute(dai, mute);
 	else
 		return -EINVAL;
@@ -2352,6 +2355,39 @@
 }
 EXPORT_SYMBOL_GPL(snd_soc_unregister_platform);
 
+static u64 codec_format_map[] = {
+	SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S16_BE,
+	SNDRV_PCM_FMTBIT_U16_LE | SNDRV_PCM_FMTBIT_U16_BE,
+	SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S24_BE,
+	SNDRV_PCM_FMTBIT_U24_LE | SNDRV_PCM_FMTBIT_U24_BE,
+	SNDRV_PCM_FMTBIT_S32_LE | SNDRV_PCM_FMTBIT_S32_BE,
+	SNDRV_PCM_FMTBIT_U32_LE | SNDRV_PCM_FMTBIT_U32_BE,
+	SNDRV_PCM_FMTBIT_S24_3LE | SNDRV_PCM_FMTBIT_U24_3BE,
+	SNDRV_PCM_FMTBIT_U24_3LE | SNDRV_PCM_FMTBIT_U24_3BE,
+	SNDRV_PCM_FMTBIT_S20_3LE | SNDRV_PCM_FMTBIT_S20_3BE,
+	SNDRV_PCM_FMTBIT_U20_3LE | SNDRV_PCM_FMTBIT_U20_3BE,
+	SNDRV_PCM_FMTBIT_S18_3LE | SNDRV_PCM_FMTBIT_S18_3BE,
+	SNDRV_PCM_FMTBIT_U18_3LE | SNDRV_PCM_FMTBIT_U18_3BE,
+	SNDRV_PCM_FMTBIT_FLOAT_LE | SNDRV_PCM_FMTBIT_FLOAT_BE,
+	SNDRV_PCM_FMTBIT_FLOAT64_LE | SNDRV_PCM_FMTBIT_FLOAT64_BE,
+	SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE
+	| SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_BE,
+};
+
+/* Fix up the DAI formats for endianness: codecs don't actually see
+ * the endianness of the data but we're using the CPU format
+ * definitions which do need to include endianness so we ensure that
+ * codec DAIs always have both big and little endian variants set.
+ */
+static void fixup_codec_formats(struct snd_soc_pcm_stream *stream)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(codec_format_map); i++)
+		if (stream->formats & codec_format_map[i])
+			stream->formats |= codec_format_map[i];
+}
+
 /**
  * snd_soc_register_codec - Register a codec with the ASoC core
  *
@@ -2359,6 +2395,8 @@
  */
 int snd_soc_register_codec(struct snd_soc_codec *codec)
 {
+	int i;
+
 	if (!codec->name)
 		return -EINVAL;
 
@@ -2368,6 +2406,11 @@
 
 	INIT_LIST_HEAD(&codec->list);
 
+	for (i = 0; i < codec->num_dai; i++) {
+		fixup_codec_formats(&codec->dai[i].playback);
+		fixup_codec_formats(&codec->dai[i].capture);
+	}
+
 	mutex_lock(&client_mutex);
 	list_add(&codec->list, &codec_list);
 	snd_soc_instantiate_cards();
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 735903a..21c6907 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -12,7 +12,7 @@
  *  Features:
  *    o Changes power status of internal codec blocks depending on the
  *      dynamic configuration of codec internal audio paths and active
- *      DAC's/ADC's.
+ *      DACs/ADCs.
  *    o Platform power domain - can support external components i.e. amps and
  *      mic/meadphone insertion events.
  *    o Automatic Mic Bias support
@@ -52,23 +52,21 @@
 
 /* dapm power sequences - make this per codec in the future */
 static int dapm_up_seq[] = {
-	snd_soc_dapm_pre, snd_soc_dapm_micbias, snd_soc_dapm_mic,
-	snd_soc_dapm_mux, snd_soc_dapm_value_mux, snd_soc_dapm_dac,
-	snd_soc_dapm_mixer, snd_soc_dapm_mixer_named_ctl, snd_soc_dapm_pga,
-	snd_soc_dapm_adc, snd_soc_dapm_hp, snd_soc_dapm_spk, snd_soc_dapm_post
+	snd_soc_dapm_pre, snd_soc_dapm_supply, snd_soc_dapm_micbias,
+	snd_soc_dapm_mic, snd_soc_dapm_mux, snd_soc_dapm_value_mux,
+	snd_soc_dapm_dac, snd_soc_dapm_mixer, snd_soc_dapm_mixer_named_ctl,
+	snd_soc_dapm_pga, snd_soc_dapm_adc, snd_soc_dapm_hp, snd_soc_dapm_spk,
+	snd_soc_dapm_post
 };
 
 static int dapm_down_seq[] = {
 	snd_soc_dapm_pre, snd_soc_dapm_adc, snd_soc_dapm_hp, snd_soc_dapm_spk,
 	snd_soc_dapm_pga, snd_soc_dapm_mixer_named_ctl, snd_soc_dapm_mixer,
 	snd_soc_dapm_dac, snd_soc_dapm_mic, snd_soc_dapm_micbias,
-	snd_soc_dapm_mux, snd_soc_dapm_value_mux, snd_soc_dapm_post
+	snd_soc_dapm_mux, snd_soc_dapm_value_mux, snd_soc_dapm_supply,
+	snd_soc_dapm_post
 };
 
-static int dapm_status = 1;
-module_param(dapm_status, int, 0);
-MODULE_PARM_DESC(dapm_status, "enable DPM sysfs entries");
-
 static void pop_wait(u32 pop_time)
 {
 	if (pop_time)
@@ -96,6 +94,48 @@
 	return kmemdup(_widget, sizeof(*_widget), GFP_KERNEL);
 }
 
+/**
+ * snd_soc_dapm_set_bias_level - set the bias level for the system
+ * @socdev: audio device
+ * @level: level to configure
+ *
+ * Configure the bias (power) levels for the SoC audio device.
+ *
+ * Returns 0 for success else error.
+ */
+static int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev,
+				       enum snd_soc_bias_level level)
+{
+	struct snd_soc_card *card = socdev->card;
+	struct snd_soc_codec *codec = socdev->card->codec;
+	int ret = 0;
+
+	switch (level) {
+	case SND_SOC_BIAS_ON:
+		dev_dbg(socdev->dev, "Setting full bias\n");
+		break;
+	case SND_SOC_BIAS_PREPARE:
+		dev_dbg(socdev->dev, "Setting bias prepare\n");
+		break;
+	case SND_SOC_BIAS_STANDBY:
+		dev_dbg(socdev->dev, "Setting standby bias\n");
+		break;
+	case SND_SOC_BIAS_OFF:
+		dev_dbg(socdev->dev, "Setting bias off\n");
+		break;
+	default:
+		dev_err(socdev->dev, "Setting invalid bias %d\n", level);
+		return -EINVAL;
+	}
+
+	if (card->set_bias_level)
+		ret = card->set_bias_level(card, level);
+	if (ret == 0 && codec->set_bias_level)
+		ret = codec->set_bias_level(codec, level);
+
+	return ret;
+}
+
 /* set up initial codec paths */
 static void dapm_set_path_status(struct snd_soc_dapm_widget *w,
 	struct snd_soc_dapm_path *p, int i)
@@ -165,6 +205,7 @@
 	case snd_soc_dapm_dac:
 	case snd_soc_dapm_micbias:
 	case snd_soc_dapm_vmid:
+	case snd_soc_dapm_supply:
 		p->connect = 1;
 	break;
 	/* does effect routing - dynamically connected */
@@ -179,7 +220,7 @@
 	}
 }
 
-/* connect mux widget to it's interconnecting audio paths */
+/* connect mux widget to its interconnecting audio paths */
 static int dapm_connect_mux(struct snd_soc_codec *codec,
 	struct snd_soc_dapm_widget *src, struct snd_soc_dapm_widget *dest,
 	struct snd_soc_dapm_path *path, const char *control_name,
@@ -202,7 +243,7 @@
 	return -ENODEV;
 }
 
-/* connect mixer widget to it's interconnecting audio paths */
+/* connect mixer widget to its interconnecting audio paths */
 static int dapm_connect_mixer(struct snd_soc_codec *codec,
 	struct snd_soc_dapm_widget *src, struct snd_soc_dapm_widget *dest,
 	struct snd_soc_dapm_path *path, const char *control_name)
@@ -357,8 +398,9 @@
 				path->long_name);
 			ret = snd_ctl_add(codec->card, path->kcontrol);
 			if (ret < 0) {
-				printk(KERN_ERR "asoc: failed to add dapm kcontrol %s\n",
-						path->long_name);
+				printk(KERN_ERR "asoc: failed to add dapm kcontrol %s: %d\n",
+				       path->long_name,
+				       ret);
 				kfree(path->long_name);
 				path->long_name = NULL;
 				return ret;
@@ -434,6 +476,9 @@
 	struct snd_soc_dapm_path *path;
 	int con = 0;
 
+	if (widget->id == snd_soc_dapm_supply)
+		return 0;
+
 	if (widget->id == snd_soc_dapm_adc && widget->active)
 		return 1;
 
@@ -470,6 +515,9 @@
 	struct snd_soc_dapm_path *path;
 	int con = 0;
 
+	if (widget->id == snd_soc_dapm_supply)
+		return 0;
+
 	/* active stream ? */
 	if (widget->id == snd_soc_dapm_dac && widget->active)
 		return 1;
@@ -521,6 +569,126 @@
 }
 EXPORT_SYMBOL_GPL(dapm_reg_event);
 
+/* Standard power change method, used to apply power changes to most
+ * widgets.
+ */
+static int dapm_generic_apply_power(struct snd_soc_dapm_widget *w)
+{
+	int ret;
+
+	/* call any power change event handlers */
+	if (w->event)
+		pr_debug("power %s event for %s flags %x\n",
+			 w->power ? "on" : "off",
+			 w->name, w->event_flags);
+
+	/* power up pre event */
+	if (w->power && w->event &&
+	    (w->event_flags & SND_SOC_DAPM_PRE_PMU)) {
+		ret = w->event(w, NULL, SND_SOC_DAPM_PRE_PMU);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* power down pre event */
+	if (!w->power && w->event &&
+	    (w->event_flags & SND_SOC_DAPM_PRE_PMD)) {
+		ret = w->event(w, NULL, SND_SOC_DAPM_PRE_PMD);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* Lower PGA volume to reduce pops */
+	if (w->id == snd_soc_dapm_pga && !w->power)
+		dapm_set_pga(w, w->power);
+
+	dapm_update_bits(w);
+
+	/* Raise PGA volume to reduce pops */
+	if (w->id == snd_soc_dapm_pga && w->power)
+		dapm_set_pga(w, w->power);
+
+	/* power up post event */
+	if (w->power && w->event &&
+	    (w->event_flags & SND_SOC_DAPM_POST_PMU)) {
+		ret = w->event(w,
+			       NULL, SND_SOC_DAPM_POST_PMU);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* power down post event */
+	if (!w->power && w->event &&
+	    (w->event_flags & SND_SOC_DAPM_POST_PMD)) {
+		ret = w->event(w, NULL, SND_SOC_DAPM_POST_PMD);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/* Generic check to see if a widget should be powered.
+ */
+static int dapm_generic_check_power(struct snd_soc_dapm_widget *w)
+{
+	int in, out;
+
+	in = is_connected_input_ep(w);
+	dapm_clear_walk(w->codec);
+	out = is_connected_output_ep(w);
+	dapm_clear_walk(w->codec);
+	return out != 0 && in != 0;
+}
+
+/* Check to see if an ADC has power */
+static int dapm_adc_check_power(struct snd_soc_dapm_widget *w)
+{
+	int in;
+
+	if (w->active) {
+		in = is_connected_input_ep(w);
+		dapm_clear_walk(w->codec);
+		return in != 0;
+	} else {
+		return dapm_generic_check_power(w);
+	}
+}
+
+/* Check to see if a DAC has power */
+static int dapm_dac_check_power(struct snd_soc_dapm_widget *w)
+{
+	int out;
+
+	if (w->active) {
+		out = is_connected_output_ep(w);
+		dapm_clear_walk(w->codec);
+		return out != 0;
+	} else {
+		return dapm_generic_check_power(w);
+	}
+}
+
+/* Check to see if a power supply is needed */
+static int dapm_supply_check_power(struct snd_soc_dapm_widget *w)
+{
+	struct snd_soc_dapm_path *path;
+	int power = 0;
+
+	/* Check if one of our outputs is connected */
+	list_for_each_entry(path, &w->sinks, list_source) {
+		if (path->sink && path->sink->power_check &&
+		    path->sink->power_check(path->sink)) {
+			power = 1;
+			break;
+		}
+	}
+
+	dapm_clear_walk(w->codec);
+
+	return power;
+}
+
 /*
  * Scan a single DAPM widget for a complete audio path and update the
  * power status appropriately.
@@ -528,32 +696,10 @@
 static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 			     struct snd_soc_dapm_widget *w)
 {
-	int in, out, power_change, power, ret;
+	int ret;
 
-	/* vmid - no action */
-	if (w->id == snd_soc_dapm_vmid)
-		return 0;
-
-	/* active ADC */
-	if (w->id == snd_soc_dapm_adc && w->active) {
-		in = is_connected_input_ep(w);
-		dapm_clear_walk(w->codec);
-		w->power = (in != 0) ? 1 : 0;
-		dapm_update_bits(w);
-		return 0;
-	}
-
-	/* active DAC */
-	if (w->id == snd_soc_dapm_dac && w->active) {
-		out = is_connected_output_ep(w);
-		dapm_clear_walk(w->codec);
-		w->power = (out != 0) ? 1 : 0;
-		dapm_update_bits(w);
-		return 0;
-	}
-
-	/* pre and post event widgets */
-	if (w->id == snd_soc_dapm_pre) {
+	switch (w->id) {
+	case snd_soc_dapm_pre:
 		if (!w->event)
 			return 0;
 
@@ -569,8 +715,8 @@
 				return ret;
 		}
 		return 0;
-	}
-	if (w->id == snd_soc_dapm_post) {
+
+	case snd_soc_dapm_post:
 		if (!w->event)
 			return 0;
 
@@ -586,70 +732,10 @@
 				return ret;
 		}
 		return 0;
+
+	default:
+		return dapm_generic_apply_power(w);
 	}
-
-	/* all other widgets */
-	in = is_connected_input_ep(w);
-	dapm_clear_walk(w->codec);
-	out = is_connected_output_ep(w);
-	dapm_clear_walk(w->codec);
-	power = (out != 0 && in != 0) ? 1 : 0;
-	power_change = (w->power == power) ? 0 : 1;
-	w->power = power;
-
-	if (!power_change)
-		return 0;
-
-	/* call any power change event handlers */
-	if (w->event)
-		pr_debug("power %s event for %s flags %x\n",
-			 w->power ? "on" : "off",
-			 w->name, w->event_flags);
-
-	/* power up pre event */
-	if (power && w->event &&
-	    (w->event_flags & SND_SOC_DAPM_PRE_PMU)) {
-		ret = w->event(w, NULL, SND_SOC_DAPM_PRE_PMU);
-		if (ret < 0)
-			return ret;
-	}
-
-	/* power down pre event */
-	if (!power && w->event &&
-	    (w->event_flags & SND_SOC_DAPM_PRE_PMD)) {
-		ret = w->event(w, NULL, SND_SOC_DAPM_PRE_PMD);
-		if (ret < 0)
-			return ret;
-	}
-
-	/* Lower PGA volume to reduce pops */
-	if (w->id == snd_soc_dapm_pga && !power)
-		dapm_set_pga(w, power);
-
-	dapm_update_bits(w);
-
-	/* Raise PGA volume to reduce pops */
-	if (w->id == snd_soc_dapm_pga && power)
-		dapm_set_pga(w, power);
-
-	/* power up post event */
-	if (power && w->event &&
-	    (w->event_flags & SND_SOC_DAPM_POST_PMU)) {
-		ret = w->event(w,
-			       NULL, SND_SOC_DAPM_POST_PMU);
-		if (ret < 0)
-			return ret;
-	}
-
-	/* power down post event */
-	if (!power && w->event &&
-	    (w->event_flags & SND_SOC_DAPM_POST_PMD)) {
-		ret = w->event(w, NULL, SND_SOC_DAPM_POST_PMD);
-		if (ret < 0)
-			return ret;
-	}
-
-	return 0;
 }
 
 /*
@@ -663,31 +749,102 @@
  */
 static int dapm_power_widgets(struct snd_soc_codec *codec, int event)
 {
+	struct snd_soc_device *socdev = codec->socdev;
 	struct snd_soc_dapm_widget *w;
-	int i, c = 1, *seq = NULL, ret = 0;
+	int ret = 0;
+	int i, power;
+	int sys_power = 0;
 
-	/* do we have a sequenced stream event */
-	if (event == SND_SOC_DAPM_STREAM_START) {
-		c = ARRAY_SIZE(dapm_up_seq);
-		seq = dapm_up_seq;
-	} else if (event == SND_SOC_DAPM_STREAM_STOP) {
-		c = ARRAY_SIZE(dapm_down_seq);
-		seq = dapm_down_seq;
+	INIT_LIST_HEAD(&codec->up_list);
+	INIT_LIST_HEAD(&codec->down_list);
+
+	/* Check which widgets we need to power and store them in
+	 * lists indicating if they should be powered up or down.
+	 */
+	list_for_each_entry(w, &codec->dapm_widgets, list) {
+		switch (w->id) {
+		case snd_soc_dapm_pre:
+			list_add_tail(&codec->down_list, &w->power_list);
+			break;
+		case snd_soc_dapm_post:
+			list_add_tail(&codec->up_list, &w->power_list);
+			break;
+
+		default:
+			if (!w->power_check)
+				continue;
+
+			power = w->power_check(w);
+			if (power)
+				sys_power = 1;
+
+			if (w->power == power)
+				continue;
+
+			if (power)
+				list_add_tail(&w->power_list, &codec->up_list);
+			else
+				list_add_tail(&w->power_list,
+					      &codec->down_list);
+
+			w->power = power;
+			break;
+		}
 	}
 
-	for (i = 0; i < c; i++) {
-		list_for_each_entry(w, &codec->dapm_widgets, list) {
+	/* If we're changing to all on or all off then prepare */
+	if ((sys_power && codec->bias_level == SND_SOC_BIAS_STANDBY) ||
+	    (!sys_power && codec->bias_level == SND_SOC_BIAS_ON)) {
+		ret = snd_soc_dapm_set_bias_level(socdev,
+						  SND_SOC_BIAS_PREPARE);
+		if (ret != 0)
+			pr_err("Failed to prepare bias: %d\n", ret);
+	}
 
+	/* Power down widgets first; try to avoid amplifying pops. */
+	for (i = 0; i < ARRAY_SIZE(dapm_down_seq); i++) {
+		list_for_each_entry(w, &codec->down_list, power_list) {
 			/* is widget in stream order */
-			if (seq && seq[i] && w->id != seq[i])
+			if (w->id != dapm_down_seq[i])
 				continue;
 
 			ret = dapm_power_widget(codec, event, w);
 			if (ret != 0)
-				return ret;
+				pr_err("Failed to power down %s: %d\n",
+				       w->name, ret);
 		}
 	}
 
+	/* Now power up. */
+	for (i = 0; i < ARRAY_SIZE(dapm_up_seq); i++) {
+		list_for_each_entry(w, &codec->up_list, power_list) {
+			/* is widget in stream order */
+			if (w->id != dapm_up_seq[i])
+				continue;
+
+			ret = dapm_power_widget(codec, event, w);
+			if (ret != 0)
+				pr_err("Failed to power up %s: %d\n",
+				       w->name, ret);
+		}
+	}
+
+	/* If we just powered the last thing off drop to standby bias */
+	if (codec->bias_level == SND_SOC_BIAS_PREPARE && !sys_power) {
+		ret = snd_soc_dapm_set_bias_level(socdev,
+						  SND_SOC_BIAS_STANDBY);
+		if (ret != 0)
+			pr_err("Failed to apply standby bias: %d\n", ret);
+	}
+
+	/* If we just powered up then move to active bias */
+	if (codec->bias_level == SND_SOC_BIAS_PREPARE && sys_power) {
+		ret = snd_soc_dapm_set_bias_level(socdev,
+						  SND_SOC_BIAS_ON);
+		if (ret != 0)
+			pr_err("Failed to apply active bias: %d\n", ret);
+	}
+
 	return 0;
 }
 
@@ -723,6 +880,7 @@
 		case snd_soc_dapm_pga:
 		case snd_soc_dapm_mixer:
 		case snd_soc_dapm_mixer_named_ctl:
+		case snd_soc_dapm_supply:
 			if (w->name) {
 				in = is_connected_input_ep(w);
 				dapm_clear_walk(w->codec);
@@ -851,6 +1009,7 @@
 		case snd_soc_dapm_pga:
 		case snd_soc_dapm_mixer:
 		case snd_soc_dapm_mixer_named_ctl:
+		case snd_soc_dapm_supply:
 			if (w->name)
 				count += sprintf(buf + count, "%s: %s\n",
 					w->name, w->power ? "On":"Off");
@@ -883,16 +1042,12 @@
 
 int snd_soc_dapm_sys_add(struct device *dev)
 {
-	if (!dapm_status)
-		return 0;
 	return device_create_file(dev, &dev_attr_dapm_widget);
 }
 
 static void snd_soc_dapm_sys_remove(struct device *dev)
 {
-	if (dapm_status) {
-		device_remove_file(dev, &dev_attr_dapm_widget);
-	}
+	device_remove_file(dev, &dev_attr_dapm_widget);
 }
 
 /* free all dapm widgets and resources */
@@ -1015,6 +1170,7 @@
 	case snd_soc_dapm_vmid:
 	case snd_soc_dapm_pre:
 	case snd_soc_dapm_post:
+	case snd_soc_dapm_supply:
 		list_add(&path->list, &codec->dapm_paths);
 		list_add(&path->list_sink, &wsink->sources);
 		list_add(&path->list_source, &wsource->sinks);
@@ -1108,15 +1264,22 @@
 		case snd_soc_dapm_switch:
 		case snd_soc_dapm_mixer:
 		case snd_soc_dapm_mixer_named_ctl:
+			w->power_check = dapm_generic_check_power;
 			dapm_new_mixer(codec, w);
 			break;
 		case snd_soc_dapm_mux:
 		case snd_soc_dapm_value_mux:
+			w->power_check = dapm_generic_check_power;
 			dapm_new_mux(codec, w);
 			break;
 		case snd_soc_dapm_adc:
+			w->power_check = dapm_adc_check_power;
+			break;
 		case snd_soc_dapm_dac:
+			w->power_check = dapm_dac_check_power;
+			break;
 		case snd_soc_dapm_pga:
+			w->power_check = dapm_generic_check_power;
 			dapm_new_pga(codec, w);
 			break;
 		case snd_soc_dapm_input:
@@ -1126,6 +1289,10 @@
 		case snd_soc_dapm_hp:
 		case snd_soc_dapm_mic:
 		case snd_soc_dapm_line:
+			w->power_check = dapm_generic_check_power;
+			break;
+		case snd_soc_dapm_supply:
+			w->power_check = dapm_supply_check_power;
 		case snd_soc_dapm_vmid:
 		case snd_soc_dapm_pre:
 		case snd_soc_dapm_post:
@@ -1626,35 +1793,11 @@
 EXPORT_SYMBOL_GPL(snd_soc_dapm_stream_event);
 
 /**
- * snd_soc_dapm_set_bias_level - set the bias level for the system
- * @socdev: audio device
- * @level: level to configure
- *
- * Configure the bias (power) levels for the SoC audio device.
- *
- * Returns 0 for success else error.
- */
-int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev,
-				enum snd_soc_bias_level level)
-{
-	struct snd_soc_card *card = socdev->card;
-	struct snd_soc_codec *codec = socdev->card->codec;
-	int ret = 0;
-
-	if (card->set_bias_level)
-		ret = card->set_bias_level(card, level);
-	if (ret == 0 && codec->set_bias_level)
-		ret = codec->set_bias_level(codec, level);
-
-	return ret;
-}
-
-/**
  * snd_soc_dapm_enable_pin - enable pin.
  * @codec: SoC codec
  * @pin: pin name
  *
- * Enables input/output pin and it's parents or children widgets iff there is
+ * Enables input/output pin and its parents or children widgets iff there is
  * a valid audio route and active audio stream.
  * NOTE: snd_soc_dapm_sync() needs to be called after this for DAPM to
  * do any widget power switching.
@@ -1670,7 +1813,7 @@
  * @codec: SoC codec
  * @pin: pin name
  *
- * Disables input/output pin and it's parents or children widgets.
+ * Disables input/output pin and its parents or children widgets.
  * NOTE: snd_soc_dapm_sync() needs to be called after this for DAPM to
  * do any widget power switching.
  */
diff --git a/sound/soc/txx9/Kconfig b/sound/soc/txx9/Kconfig
new file mode 100644
index 0000000..ebc9327
--- /dev/null
+++ b/sound/soc/txx9/Kconfig
@@ -0,0 +1,29 @@
+##
+## TXx9 ACLC
+##
+config SND_SOC_TXX9ACLC
+	tristate "SoC Audio for TXx9"
+	depends on HAS_TXX9_ACLC && TXX9_DMAC
+	help
+	  This option enables support for the AC Link Controllers in TXx9 SoC.
+
+config HAS_TXX9_ACLC
+	bool
+
+config SND_SOC_TXX9ACLC_AC97
+	tristate
+	select AC97_BUS
+	select SND_AC97_CODEC
+	select SND_SOC_AC97_BUS
+
+
+##
+## Boards
+##
+config SND_SOC_TXX9ACLC_GENERIC
+	tristate "Generic TXx9 ACLC sound machine"
+	depends on SND_SOC_TXX9ACLC
+	select SND_SOC_TXX9ACLC_AC97
+	select SND_SOC_AC97_CODEC
+	help
+	  This is a generic AC97 sound machine for use in TXx9 based systems.
diff --git a/sound/soc/txx9/Makefile b/sound/soc/txx9/Makefile
new file mode 100644
index 0000000..551f16c
--- /dev/null
+++ b/sound/soc/txx9/Makefile
@@ -0,0 +1,11 @@
+# Platform
+snd-soc-txx9aclc-objs := txx9aclc.o
+snd-soc-txx9aclc-ac97-objs := txx9aclc-ac97.o
+
+obj-$(CONFIG_SND_SOC_TXX9ACLC) += snd-soc-txx9aclc.o
+obj-$(CONFIG_SND_SOC_TXX9ACLC_AC97) += snd-soc-txx9aclc-ac97.o
+
+# Machine
+snd-soc-txx9aclc-generic-objs := txx9aclc-generic.o
+
+obj-$(CONFIG_SND_SOC_TXX9ACLC_GENERIC) += snd-soc-txx9aclc-generic.o
diff --git a/sound/soc/txx9/txx9aclc-ac97.c b/sound/soc/txx9/txx9aclc-ac97.c
new file mode 100644
index 0000000..0f83bdb
--- /dev/null
+++ b/sound/soc/txx9/txx9aclc-ac97.c
@@ -0,0 +1,255 @@
+/*
+ * TXx9 ACLC AC97 driver
+ *
+ * Copyright (C) 2009 Atsushi Nemoto
+ *
+ * Based on RBTX49xx patch from CELF patch archive.
+ * (C) Copyright TOSHIBA CORPORATION 2004-2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/soc.h>
+#include "txx9aclc.h"
+
+#define AC97_DIR	\
+	(SND_SOC_DAIDIR_PLAYBACK | SND_SOC_DAIDIR_CAPTURE)
+
+#define AC97_RATES	\
+	SNDRV_PCM_RATE_8000_48000
+
+#ifdef __BIG_ENDIAN
+#define AC97_FMTS	SNDRV_PCM_FMTBIT_S16_BE
+#else
+#define AC97_FMTS	SNDRV_PCM_FMTBIT_S16_LE
+#endif
+
+static DECLARE_WAIT_QUEUE_HEAD(ac97_waitq);
+
+/* REVISIT: How to find txx9aclc_soc_device from snd_ac97? */
+static struct txx9aclc_soc_device *txx9aclc_soc_dev;
+
+static int txx9aclc_regready(struct txx9aclc_soc_device *dev)
+{
+	struct txx9aclc_plat_drvdata *drvdata = txx9aclc_get_plat_drvdata(dev);
+
+	return __raw_readl(drvdata->base + ACINTSTS) & ACINT_REGACCRDY;
+}
+
+/* AC97 controller reads codec register */
+static unsigned short txx9aclc_ac97_read(struct snd_ac97 *ac97,
+					 unsigned short reg)
+{
+	struct txx9aclc_soc_device *dev = txx9aclc_soc_dev;
+	struct txx9aclc_plat_drvdata *drvdata = txx9aclc_get_plat_drvdata(dev);
+	void __iomem *base = drvdata->base;
+	u32 dat;
+
+	if (!(__raw_readl(base + ACINTSTS) & ACINT_CODECRDY(ac97->num)))
+		return 0xffff;
+	reg |= ac97->num << 7;
+	dat = (reg << ACREGACC_REG_SHIFT) | ACREGACC_READ;
+	__raw_writel(dat, base + ACREGACC);
+	__raw_writel(ACINT_REGACCRDY, base + ACINTEN);
+	if (!wait_event_timeout(ac97_waitq, txx9aclc_regready(dev), HZ)) {
+		__raw_writel(ACINT_REGACCRDY, base + ACINTDIS);
+		dev_err(dev->soc_dev.dev, "ac97 read timeout (reg %#x)\n", reg);
+		dat = 0xffff;
+		goto done;
+	}
+	dat = __raw_readl(base + ACREGACC);
+	if (((dat >> ACREGACC_REG_SHIFT) & 0xff) != reg) {
+		dev_err(dev->soc_dev.dev, "reg mismatch %x with %x\n",
+			dat, reg);
+		dat = 0xffff;
+		goto done;
+	}
+	dat = (dat >> ACREGACC_DAT_SHIFT) & 0xffff;
+done:
+	__raw_writel(ACINT_REGACCRDY, base + ACINTDIS);
+	return dat;
+}
+
+/* AC97 controller writes to codec register */
+static void txx9aclc_ac97_write(struct snd_ac97 *ac97, unsigned short reg,
+				unsigned short val)
+{
+	struct txx9aclc_soc_device *dev = txx9aclc_soc_dev;
+	struct txx9aclc_plat_drvdata *drvdata = txx9aclc_get_plat_drvdata(dev);
+	void __iomem *base = drvdata->base;
+
+	__raw_writel(((reg | (ac97->num << 7)) << ACREGACC_REG_SHIFT) |
+		     (val << ACREGACC_DAT_SHIFT),
+		     base + ACREGACC);
+	__raw_writel(ACINT_REGACCRDY, base + ACINTEN);
+	if (!wait_event_timeout(ac97_waitq, txx9aclc_regready(dev), HZ)) {
+		dev_err(dev->soc_dev.dev,
+			"ac97 write timeout (reg %#x)\n", reg);
+	}
+	__raw_writel(ACINT_REGACCRDY, base + ACINTDIS);
+}
+
+static void txx9aclc_ac97_cold_reset(struct snd_ac97 *ac97)
+{
+	struct txx9aclc_soc_device *dev = txx9aclc_soc_dev;
+	struct txx9aclc_plat_drvdata *drvdata = txx9aclc_get_plat_drvdata(dev);
+	void __iomem *base = drvdata->base;
+	u32 ready = ACINT_CODECRDY(ac97->num) | ACINT_REGACCRDY;
+
+	__raw_writel(ACCTL_ENLINK, base + ACCTLDIS);
+	mmiowb();
+	udelay(1);
+	__raw_writel(ACCTL_ENLINK, base + ACCTLEN);
+	/* wait for primary codec ready status */
+	__raw_writel(ready, base + ACINTEN);
+	if (!wait_event_timeout(ac97_waitq,
+				(__raw_readl(base + ACINTSTS) & ready) == ready,
+				HZ)) {
+		dev_err(&ac97->dev, "primary codec is not ready "
+			"(status %#x)\n",
+			__raw_readl(base + ACINTSTS));
+	}
+	__raw_writel(ACINT_REGACCRDY, base + ACINTSTS);
+	__raw_writel(ready, base + ACINTDIS);
+}
+
+/* AC97 controller operations */
+struct snd_ac97_bus_ops soc_ac97_ops = {
+	.read		= txx9aclc_ac97_read,
+	.write		= txx9aclc_ac97_write,
+	.reset		= txx9aclc_ac97_cold_reset,
+};
+EXPORT_SYMBOL_GPL(soc_ac97_ops);
+
+static irqreturn_t txx9aclc_ac97_irq(int irq, void *dev_id)
+{
+	struct txx9aclc_plat_drvdata *drvdata = dev_id;
+	void __iomem *base = drvdata->base;
+
+	__raw_writel(__raw_readl(base + ACINTMSTS), base + ACINTDIS);
+	wake_up(&ac97_waitq);
+	return IRQ_HANDLED;
+}
+
+static int txx9aclc_ac97_probe(struct platform_device *pdev,
+			       struct snd_soc_dai *dai)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct txx9aclc_soc_device *dev =
+		container_of(socdev, struct txx9aclc_soc_device, soc_dev);
+
+	dev->aclc_pdev = to_platform_device(dai->dev);
+	txx9aclc_soc_dev = dev;
+	return 0;
+}
+
+static void txx9aclc_ac97_remove(struct platform_device *pdev,
+				 struct snd_soc_dai *dai)
+{
+	struct platform_device *aclc_pdev = to_platform_device(dai->dev);
+	struct txx9aclc_plat_drvdata *drvdata = platform_get_drvdata(aclc_pdev);
+
+	/* disable AC-link */
+	__raw_writel(ACCTL_ENLINK, drvdata->base + ACCTLDIS);
+	txx9aclc_soc_dev = NULL;
+}
+
+struct snd_soc_dai txx9aclc_ac97_dai = {
+	.name			= "txx9aclc_ac97",
+	.ac97_control		= 1,
+	.probe			= txx9aclc_ac97_probe,
+	.remove			= txx9aclc_ac97_remove,
+	.playback = {
+		.rates		= AC97_RATES,
+		.formats	= AC97_FMTS,
+		.channels_min	= 2,
+		.channels_max	= 2,
+	},
+	.capture = {
+		.rates		= AC97_RATES,
+		.formats	= AC97_FMTS,
+		.channels_min	= 2,
+		.channels_max	= 2,
+	},
+};
+EXPORT_SYMBOL_GPL(txx9aclc_ac97_dai);
+
+static int __devinit txx9aclc_ac97_dev_probe(struct platform_device *pdev)
+{
+	struct txx9aclc_plat_drvdata *drvdata;
+	struct resource *r;
+	int err;
+	int irq;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r)
+		return -EBUSY;
+
+	if (!devm_request_mem_region(&pdev->dev, r->start, resource_size(r),
+				     dev_name(&pdev->dev)))
+		return -EBUSY;
+
+	drvdata = devm_kzalloc(&pdev->dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+	platform_set_drvdata(pdev, drvdata);
+	drvdata->physbase = r->start;
+	if (sizeof(drvdata->physbase) > sizeof(r->start) &&
+	    r->start >= TXX9_DIRECTMAP_BASE &&
+	    r->start < TXX9_DIRECTMAP_BASE + 0x400000)
+		drvdata->physbase |= 0xf00000000ull;
+	drvdata->base = devm_ioremap(&pdev->dev, r->start, resource_size(r));
+	if (!drvdata->base)
+		return -EBUSY;
+	err = devm_request_irq(&pdev->dev, irq, txx9aclc_ac97_irq,
+			       IRQF_DISABLED, dev_name(&pdev->dev), drvdata);
+	if (err < 0)
+		return err;
+
+	txx9aclc_ac97_dai.dev = &pdev->dev;
+	return snd_soc_register_dai(&txx9aclc_ac97_dai);
+}
+
+static int __devexit txx9aclc_ac97_dev_remove(struct platform_device *pdev)
+{
+	snd_soc_unregister_dai(&txx9aclc_ac97_dai);
+	return 0;
+}
+
+static struct platform_driver txx9aclc_ac97_driver = {
+	.probe		= txx9aclc_ac97_dev_probe,
+	.remove		= __devexit_p(txx9aclc_ac97_dev_remove),
+	.driver		= {
+		.name	= "txx9aclc-ac97",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init txx9aclc_ac97_init(void)
+{
+	return platform_driver_register(&txx9aclc_ac97_driver);
+}
+
+static void __exit txx9aclc_ac97_exit(void)
+{
+	platform_driver_unregister(&txx9aclc_ac97_driver);
+}
+
+module_init(txx9aclc_ac97_init);
+module_exit(txx9aclc_ac97_exit);
+
+MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>");
+MODULE_DESCRIPTION("TXx9 ACLC AC97 driver");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/txx9/txx9aclc-generic.c b/sound/soc/txx9/txx9aclc-generic.c
new file mode 100644
index 0000000..3175de9
--- /dev/null
+++ b/sound/soc/txx9/txx9aclc-generic.c
@@ -0,0 +1,98 @@
+/*
+ * Generic TXx9 ACLC machine driver
+ *
+ * Copyright (C) 2009 Atsushi Nemoto
+ *
+ * Based on RBTX49xx patch from CELF patch archive.
+ * (C) Copyright TOSHIBA CORPORATION 2004-2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This is a very generic AC97 sound machine driver for boards which
+ * have (AC97) audio at ACLC (e.g. RBTX49XX boards).
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/soc.h>
+#include "../codecs/ac97.h"
+#include "txx9aclc.h"
+
+static struct snd_soc_dai_link txx9aclc_generic_dai = {
+	.name = "AC97",
+	.stream_name = "AC97 HiFi",
+	.cpu_dai = &txx9aclc_ac97_dai,
+	.codec_dai = &ac97_dai,
+};
+
+static struct snd_soc_card txx9aclc_generic_card = {
+	.name		= "Generic TXx9 ACLC Audio",
+	.platform	= &txx9aclc_soc_platform,
+	.dai_link	= &txx9aclc_generic_dai,
+	.num_links	= 1,
+};
+
+static struct txx9aclc_soc_device txx9aclc_generic_soc_device = {
+	.soc_dev = {
+		.card		= &txx9aclc_generic_card,
+		.codec_dev	= &soc_codec_dev_ac97,
+	},
+};
+
+static int __init txx9aclc_generic_probe(struct platform_device *pdev)
+{
+	struct txx9aclc_soc_device *dev = &txx9aclc_generic_soc_device;
+	struct platform_device *soc_pdev;
+	int ret;
+
+	soc_pdev = platform_device_alloc("soc-audio", -1);
+	if (!soc_pdev)
+		return -ENOMEM;
+	platform_set_drvdata(soc_pdev, &dev->soc_dev);
+	dev->soc_dev.dev = &soc_pdev->dev;
+	ret = platform_device_add(soc_pdev);
+	if (ret) {
+		platform_device_put(soc_pdev);
+		return ret;
+	}
+	platform_set_drvdata(pdev, soc_pdev);
+	return 0;
+}
+
+static int __exit txx9aclc_generic_remove(struct platform_device *pdev)
+{
+	struct platform_device *soc_pdev = platform_get_drvdata(pdev);
+
+	platform_device_unregister(soc_pdev);
+	return 0;
+}
+
+static struct platform_driver txx9aclc_generic_driver = {
+	.remove = txx9aclc_generic_remove,
+	.driver = {
+		.name = "txx9aclc-generic",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init txx9aclc_generic_init(void)
+{
+	return platform_driver_probe(&txx9aclc_generic_driver,
+				     txx9aclc_generic_probe);
+}
+
+static void __exit txx9aclc_generic_exit(void)
+{
+	platform_driver_unregister(&txx9aclc_generic_driver);
+}
+
+module_init(txx9aclc_generic_init);
+module_exit(txx9aclc_generic_exit);
+
+MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>");
+MODULE_DESCRIPTION("Generic TXx9 ACLC ALSA SoC audio driver");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/txx9/txx9aclc.c b/sound/soc/txx9/txx9aclc.c
new file mode 100644
index 0000000..fa33661
--- /dev/null
+++ b/sound/soc/txx9/txx9aclc.c
@@ -0,0 +1,430 @@
+/*
+ * Generic TXx9 ACLC platform driver
+ *
+ * Copyright (C) 2009 Atsushi Nemoto
+ *
+ * Based on RBTX49xx patch from CELF patch archive.
+ * (C) Copyright TOSHIBA CORPORATION 2004-2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include "txx9aclc.h"
+
+static const struct snd_pcm_hardware txx9aclc_pcm_hardware = {
+	/*
+	 * REVISIT: SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID
+	 * needs more works for noncoherent MIPS.
+	 */
+	.info		  = SNDRV_PCM_INFO_INTERLEAVED |
+			    SNDRV_PCM_INFO_BATCH |
+			    SNDRV_PCM_INFO_PAUSE,
+#ifdef __BIG_ENDIAN
+	.formats	  = SNDRV_PCM_FMTBIT_S16_BE,
+#else
+	.formats	  = SNDRV_PCM_FMTBIT_S16_LE,
+#endif
+	.period_bytes_min = 1024,
+	.period_bytes_max = 8 * 1024,
+	.periods_min	  = 2,
+	.periods_max	  = 4096,
+	.buffer_bytes_max = 32 * 1024,
+};
+
+static int txx9aclc_pcm_hw_params(struct snd_pcm_substream *substream,
+				  struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = snd_pcm_substream_chip(substream);
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct txx9aclc_dmadata *dmadata = runtime->private_data;
+	int ret;
+
+	ret = snd_pcm_lib_malloc_pages(substream, params_buffer_bytes(params));
+	if (ret < 0)
+		return ret;
+
+	dev_dbg(socdev->dev,
+		"runtime->dma_area = %#lx dma_addr = %#lx dma_bytes = %zd "
+		"runtime->min_align %ld\n",
+		(unsigned long)runtime->dma_area,
+		(unsigned long)runtime->dma_addr, runtime->dma_bytes,
+		runtime->min_align);
+	dev_dbg(socdev->dev,
+		"periods %d period_bytes %d stream %d\n",
+		params_periods(params), params_period_bytes(params),
+		substream->stream);
+
+	dmadata->substream = substream;
+	dmadata->pos = 0;
+	return 0;
+}
+
+static int txx9aclc_pcm_hw_free(struct snd_pcm_substream *substream)
+{
+	return snd_pcm_lib_free_pages(substream);
+}
+
+static int txx9aclc_pcm_prepare(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct txx9aclc_dmadata *dmadata = runtime->private_data;
+
+	dmadata->dma_addr = runtime->dma_addr;
+	dmadata->buffer_bytes = snd_pcm_lib_buffer_bytes(substream);
+	dmadata->period_bytes = snd_pcm_lib_period_bytes(substream);
+
+	if (dmadata->buffer_bytes == dmadata->period_bytes) {
+		dmadata->frag_bytes = dmadata->period_bytes >> 1;
+		dmadata->frags = 2;
+	} else {
+		dmadata->frag_bytes = dmadata->period_bytes;
+		dmadata->frags = dmadata->buffer_bytes / dmadata->period_bytes;
+	}
+	dmadata->frag_count = 0;
+	dmadata->pos = 0;
+	return 0;
+}
+
+static void txx9aclc_dma_complete(void *arg)
+{
+	struct txx9aclc_dmadata *dmadata = arg;
+	unsigned long flags;
+
+	/* dma completion handler cannot submit new operations */
+	spin_lock_irqsave(&dmadata->dma_lock, flags);
+	if (dmadata->frag_count >= 0) {
+		dmadata->dmacount--;
+		BUG_ON(dmadata->dmacount < 0);
+		tasklet_schedule(&dmadata->tasklet);
+	}
+	spin_unlock_irqrestore(&dmadata->dma_lock, flags);
+}
+
+static struct dma_async_tx_descriptor *
+txx9aclc_dma_submit(struct txx9aclc_dmadata *dmadata, dma_addr_t buf_dma_addr)
+{
+	struct dma_chan *chan = dmadata->dma_chan;
+	struct dma_async_tx_descriptor *desc;
+	struct scatterlist sg;
+
+	sg_init_table(&sg, 1);
+	sg_set_page(&sg, pfn_to_page(PFN_DOWN(buf_dma_addr)),
+		    dmadata->frag_bytes, buf_dma_addr & (PAGE_SIZE - 1));
+	sg_dma_address(&sg) = buf_dma_addr;
+	desc = chan->device->device_prep_slave_sg(chan, &sg, 1,
+		dmadata->substream->stream == SNDRV_PCM_STREAM_PLAYBACK ?
+		DMA_TO_DEVICE : DMA_FROM_DEVICE,
+		DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!desc) {
+		dev_err(&chan->dev->device, "cannot prepare slave dma\n");
+		return NULL;
+	}
+	desc->callback = txx9aclc_dma_complete;
+	desc->callback_param = dmadata;
+	desc->tx_submit(desc);
+	return desc;
+}
+
+#define NR_DMA_CHAIN		2
+
+static void txx9aclc_dma_tasklet(unsigned long data)
+{
+	struct txx9aclc_dmadata *dmadata = (struct txx9aclc_dmadata *)data;
+	struct dma_chan *chan = dmadata->dma_chan;
+	struct dma_async_tx_descriptor *desc;
+	struct snd_pcm_substream *substream = dmadata->substream;
+	u32 ctlbit = substream->stream == SNDRV_PCM_STREAM_PLAYBACK ?
+		ACCTL_AUDODMA : ACCTL_AUDIDMA;
+	int i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dmadata->dma_lock, flags);
+	if (dmadata->frag_count < 0) {
+		struct txx9aclc_soc_device *dev =
+			container_of(dmadata, struct txx9aclc_soc_device,
+				     dmadata[substream->stream]);
+		struct txx9aclc_plat_drvdata *drvdata =
+			txx9aclc_get_plat_drvdata(dev);
+		void __iomem *base = drvdata->base;
+
+		spin_unlock_irqrestore(&dmadata->dma_lock, flags);
+		chan->device->device_terminate_all(chan);
+		/* first time */
+		for (i = 0; i < NR_DMA_CHAIN; i++) {
+			desc = txx9aclc_dma_submit(dmadata,
+				dmadata->dma_addr + i * dmadata->frag_bytes);
+			if (!desc)
+				return;
+		}
+		dmadata->dmacount = NR_DMA_CHAIN;
+		chan->device->device_issue_pending(chan);
+		spin_lock_irqsave(&dmadata->dma_lock, flags);
+		__raw_writel(ctlbit, base + ACCTLEN);
+		dmadata->frag_count = NR_DMA_CHAIN % dmadata->frags;
+		spin_unlock_irqrestore(&dmadata->dma_lock, flags);
+		return;
+	}
+	BUG_ON(dmadata->dmacount >= NR_DMA_CHAIN);
+	while (dmadata->dmacount < NR_DMA_CHAIN) {
+		dmadata->dmacount++;
+		spin_unlock_irqrestore(&dmadata->dma_lock, flags);
+		desc = txx9aclc_dma_submit(dmadata,
+			dmadata->dma_addr +
+			dmadata->frag_count * dmadata->frag_bytes);
+		if (!desc)
+			return;
+		chan->device->device_issue_pending(chan);
+
+		spin_lock_irqsave(&dmadata->dma_lock, flags);
+		dmadata->frag_count++;
+		dmadata->frag_count %= dmadata->frags;
+		dmadata->pos += dmadata->frag_bytes;
+		dmadata->pos %= dmadata->buffer_bytes;
+		if ((dmadata->frag_count * dmadata->frag_bytes) %
+		    dmadata->period_bytes == 0)
+			snd_pcm_period_elapsed(substream);
+	}
+	spin_unlock_irqrestore(&dmadata->dma_lock, flags);
+}
+
+static int txx9aclc_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
+{
+	struct txx9aclc_dmadata *dmadata = substream->runtime->private_data;
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct txx9aclc_soc_device *dev =
+		container_of(rtd->socdev, struct txx9aclc_soc_device, soc_dev);
+	struct txx9aclc_plat_drvdata *drvdata = txx9aclc_get_plat_drvdata(dev);
+	void __iomem *base = drvdata->base;
+	unsigned long flags;
+	int ret = 0;
+	u32 ctlbit = substream->stream == SNDRV_PCM_STREAM_PLAYBACK ?
+		ACCTL_AUDODMA : ACCTL_AUDIDMA;
+
+	spin_lock_irqsave(&dmadata->dma_lock, flags);
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+		dmadata->frag_count = -1;
+		tasklet_schedule(&dmadata->tasklet);
+		break;
+	case SNDRV_PCM_TRIGGER_STOP:
+	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+		__raw_writel(ctlbit, base + ACCTLDIS);
+		break;
+	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+	case SNDRV_PCM_TRIGGER_RESUME:
+		__raw_writel(ctlbit, base + ACCTLEN);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	spin_unlock_irqrestore(&dmadata->dma_lock, flags);
+	return ret;
+}
+
+static snd_pcm_uframes_t
+txx9aclc_pcm_pointer(struct snd_pcm_substream *substream)
+{
+	struct txx9aclc_dmadata *dmadata = substream->runtime->private_data;
+
+	return bytes_to_frames(substream->runtime, dmadata->pos);
+}
+
+static int txx9aclc_pcm_open(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct txx9aclc_soc_device *dev =
+		container_of(rtd->socdev, struct txx9aclc_soc_device, soc_dev);
+	struct txx9aclc_dmadata *dmadata = &dev->dmadata[substream->stream];
+	int ret;
+
+	ret = snd_soc_set_runtime_hwparams(substream, &txx9aclc_pcm_hardware);
+	if (ret)
+		return ret;
+	/* ensure that buffer size is a multiple of period size */
+	ret = snd_pcm_hw_constraint_integer(substream->runtime,
+					    SNDRV_PCM_HW_PARAM_PERIODS);
+	if (ret < 0)
+		return ret;
+	substream->runtime->private_data = dmadata;
+	return 0;
+}
+
+static int txx9aclc_pcm_close(struct snd_pcm_substream *substream)
+{
+	struct txx9aclc_dmadata *dmadata = substream->runtime->private_data;
+	struct dma_chan *chan = dmadata->dma_chan;
+
+	dmadata->frag_count = -1;
+	chan->device->device_terminate_all(chan);
+	return 0;
+}
+
+static struct snd_pcm_ops txx9aclc_pcm_ops = {
+	.open		= txx9aclc_pcm_open,
+	.close		= txx9aclc_pcm_close,
+	.ioctl		= snd_pcm_lib_ioctl,
+	.hw_params	= txx9aclc_pcm_hw_params,
+	.hw_free	= txx9aclc_pcm_hw_free,
+	.prepare	= txx9aclc_pcm_prepare,
+	.trigger	= txx9aclc_pcm_trigger,
+	.pointer	= txx9aclc_pcm_pointer,
+};
+
+static void txx9aclc_pcm_free_dma_buffers(struct snd_pcm *pcm)
+{
+	snd_pcm_lib_preallocate_free_for_all(pcm);
+}
+
+static int txx9aclc_pcm_new(struct snd_card *card, struct snd_soc_dai *dai,
+			    struct snd_pcm *pcm)
+{
+	return snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
+		card->dev, 64 * 1024, 4 * 1024 * 1024);
+}
+
+static bool filter(struct dma_chan *chan, void *param)
+{
+	struct txx9aclc_dmadata *dmadata = param;
+	char devname[BUS_ID_SIZE + 2];
+
+	sprintf(devname, "%s.%d", dmadata->dma_res->name,
+		(int)dmadata->dma_res->start);
+	if (strcmp(dev_name(chan->device->dev), devname) == 0) {
+		chan->private = &dmadata->dma_slave;
+		return true;
+	}
+	return false;
+}
+
+static int txx9aclc_dma_init(struct txx9aclc_soc_device *dev,
+			     struct txx9aclc_dmadata *dmadata)
+{
+	struct txx9aclc_plat_drvdata *drvdata = txx9aclc_get_plat_drvdata(dev);
+	struct txx9dmac_slave *ds = &dmadata->dma_slave;
+	dma_cap_mask_t mask;
+
+	spin_lock_init(&dmadata->dma_lock);
+
+	ds->reg_width = sizeof(u32);
+	if (dmadata->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+		ds->tx_reg = drvdata->physbase + ACAUDODAT;
+		ds->rx_reg = 0;
+	} else {
+		ds->tx_reg = 0;
+		ds->rx_reg = drvdata->physbase + ACAUDIDAT;
+	}
+
+	/* Try to grab a DMA channel */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	dmadata->dma_chan = dma_request_channel(mask, filter, dmadata);
+	if (!dmadata->dma_chan) {
+		dev_err(dev->soc_dev.dev,
+			"DMA channel for %s is not available\n",
+			dmadata->stream == SNDRV_PCM_STREAM_PLAYBACK ?
+			"playback" : "capture");
+		return -EBUSY;
+	}
+	tasklet_init(&dmadata->tasklet, txx9aclc_dma_tasklet,
+		     (unsigned long)dmadata);
+	return 0;
+}
+
+static int txx9aclc_pcm_probe(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct txx9aclc_soc_device *dev =
+		container_of(socdev, struct txx9aclc_soc_device, soc_dev);
+	struct resource *r;
+	int i;
+	int ret;
+
+	dev->dmadata[0].stream = SNDRV_PCM_STREAM_PLAYBACK;
+	dev->dmadata[1].stream = SNDRV_PCM_STREAM_CAPTURE;
+	for (i = 0; i < 2; i++) {
+		r = platform_get_resource(dev->aclc_pdev, IORESOURCE_DMA, i);
+		if (!r) {
+			ret = -EBUSY;
+			goto exit;
+		}
+		dev->dmadata[i].dma_res = r;
+		ret = txx9aclc_dma_init(dev, &dev->dmadata[i]);
+		if (ret)
+			goto exit;
+	}
+	return 0;
+
+exit:
+	for (i = 0; i < 2; i++) {
+		if (dev->dmadata[i].dma_chan)
+			dma_release_channel(dev->dmadata[i].dma_chan);
+		dev->dmadata[i].dma_chan = NULL;
+	}
+	return ret;
+}
+
+static int txx9aclc_pcm_remove(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct txx9aclc_soc_device *dev =
+		container_of(socdev, struct txx9aclc_soc_device, soc_dev);
+	struct txx9aclc_plat_drvdata *drvdata = txx9aclc_get_plat_drvdata(dev);
+	void __iomem *base = drvdata->base;
+	int i;
+
+	/* disable all FIFO DMAs */
+	__raw_writel(ACCTL_AUDODMA | ACCTL_AUDIDMA, base + ACCTLDIS);
+	/* dummy R/W to clear pending DMAREQ if any */
+	__raw_writel(__raw_readl(base + ACAUDIDAT), base + ACAUDODAT);
+
+	for (i = 0; i < 2; i++) {
+		struct txx9aclc_dmadata *dmadata = &dev->dmadata[i];
+		struct dma_chan *chan = dmadata->dma_chan;
+		if (chan) {
+			dmadata->frag_count = -1;
+			chan->device->device_terminate_all(chan);
+			dma_release_channel(chan);
+		}
+		dev->dmadata[i].dma_chan = NULL;
+	}
+	return 0;
+}
+
+struct snd_soc_platform txx9aclc_soc_platform = {
+	.name		= "txx9aclc-audio",
+	.probe		= txx9aclc_pcm_probe,
+	.remove		= txx9aclc_pcm_remove,
+	.pcm_ops 	= &txx9aclc_pcm_ops,
+	.pcm_new	= txx9aclc_pcm_new,
+	.pcm_free	= txx9aclc_pcm_free_dma_buffers,
+};
+EXPORT_SYMBOL_GPL(txx9aclc_soc_platform);
+
+static int __init txx9aclc_soc_platform_init(void)
+{
+	return snd_soc_register_platform(&txx9aclc_soc_platform);
+}
+
+static void __exit txx9aclc_soc_platform_exit(void)
+{
+	snd_soc_unregister_platform(&txx9aclc_soc_platform);
+}
+
+module_init(txx9aclc_soc_platform_init);
+module_exit(txx9aclc_soc_platform_exit);
+
+MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>");
+MODULE_DESCRIPTION("TXx9 ACLC Audio DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/txx9/txx9aclc.h b/sound/soc/txx9/txx9aclc.h
new file mode 100644
index 0000000..6769aab
--- /dev/null
+++ b/sound/soc/txx9/txx9aclc.h
@@ -0,0 +1,83 @@
+/*
+ * TXx9 SoC AC Link Controller
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __TXX9ACLC_H
+#define __TXX9ACLC_H
+
+#include <linux/interrupt.h>
+#include <asm/txx9/dmac.h>
+
+#define ACCTLEN			0x00	/* control enable */
+#define ACCTLDIS		0x04	/* control disable */
+#define   ACCTL_ENLINK		0x00000001	/* enable/disable AC-link */
+#define   ACCTL_AUDODMA		0x00000100	/* AUDODMA enable/disable */
+#define   ACCTL_AUDIDMA		0x00001000	/* AUDIDMA enable/disable */
+#define   ACCTL_AUDOEHLT	0x00010000	/* AUDO error halt
+						   enable/disable */
+#define   ACCTL_AUDIEHLT	0x00100000	/* AUDI error halt
+						   enable/disable */
+#define ACREGACC		0x08	/* codec register access */
+#define   ACREGACC_DAT_SHIFT	0	/* data field */
+#define   ACREGACC_REG_SHIFT	16	/* address field */
+#define   ACREGACC_CODECID_SHIFT	24	/* CODEC ID field */
+#define   ACREGACC_READ		0x80000000	/* CODEC read */
+#define   ACREGACC_WRITE	0x00000000	/* CODEC write */
+#define ACINTSTS		0x10	/* interrupt status */
+#define ACINTMSTS		0x14	/* interrupt masked status */
+#define ACINTEN			0x18	/* interrupt enable */
+#define ACINTDIS		0x1c	/* interrupt disable */
+#define   ACINT_CODECRDY(n)	(0x00000001 << (n))	/* CODECn ready */
+#define   ACINT_REGACCRDY	0x00000010	/* ACREGACC ready */
+#define   ACINT_AUDOERR		0x00000100	/* AUDO underrun error */
+#define   ACINT_AUDIERR		0x00001000	/* AUDI overrun error */
+#define ACDMASTS		0x80	/* DMA request status */
+#define   ACDMA_AUDO		0x00000001	/* AUDODMA pending */
+#define   ACDMA_AUDI		0x00000010	/* AUDIDMA pending */
+#define ACAUDODAT		0xa0	/* audio out data */
+#define ACAUDIDAT		0xb0	/* audio in data */
+#define ACREVID			0xfc	/* revision ID */
+
+struct txx9aclc_dmadata {
+	struct resource *dma_res;
+	struct txx9dmac_slave dma_slave;
+	struct dma_chan *dma_chan;
+	struct tasklet_struct tasklet;
+	spinlock_t dma_lock;
+	int stream; /* SNDRV_PCM_STREAM_PLAYBACK or SNDRV_PCM_STREAM_CAPTURE */
+	struct snd_pcm_substream *substream;
+	unsigned long pos;
+	dma_addr_t dma_addr;
+	unsigned long buffer_bytes;
+	unsigned long period_bytes;
+	unsigned long frag_bytes;
+	int frags;
+	int frag_count;
+	int dmacount;
+};
+
+struct txx9aclc_plat_drvdata {
+	void __iomem *base;
+	u64 physbase;
+};
+
+struct txx9aclc_soc_device {
+	struct snd_soc_device soc_dev;
+	struct platform_device *aclc_pdev;	/* for ioresources, drvdata */
+	struct txx9aclc_dmadata dmadata[2];
+};
+
+static inline struct txx9aclc_plat_drvdata *txx9aclc_get_plat_drvdata(
+	struct txx9aclc_soc_device *sdev)
+{
+	return platform_get_drvdata(sdev->aclc_pdev);
+}
+
+extern struct snd_soc_platform txx9aclc_soc_platform;
+extern struct snd_soc_dai txx9aclc_ac97_dai;
+
+#endif /* __TXX9ACLC_H */
diff --git a/sound/synth/Makefile b/sound/synth/Makefile
index e99fd76..11eb06a 100644
--- a/sound/synth/Makefile
+++ b/sound/synth/Makefile
@@ -5,16 +5,8 @@
 
 snd-util-mem-objs := util_mem.o
 
-#
-# this function returns:
-#   "m" - CONFIG_SND_SEQUENCER is m
-#   <empty string> - CONFIG_SND_SEQUENCER is undefined
-#   otherwise parameter #1 value
-#
-sequencer = $(if $(subst y,,$(CONFIG_SND_SEQUENCER)),$(if $(1),m),$(if $(CONFIG_SND_SEQUENCER),$(1)))
-
 # Toplevel Module Dependency
 obj-$(CONFIG_SND_EMU10K1) += snd-util-mem.o
 obj-$(CONFIG_SND_TRIDENT) += snd-util-mem.o
-obj-$(call sequencer,$(CONFIG_SND_SBAWE)) += snd-util-mem.o
-obj-$(call sequencer,$(CONFIG_SND)) += emux/
+obj-$(CONFIG_SND_SBAWE_SEQ) += snd-util-mem.o
+obj-$(CONFIG_SND_SEQUENCER) += emux/
diff --git a/sound/synth/emux/Makefile b/sound/synth/emux/Makefile
index b690352..328594e 100644
--- a/sound/synth/emux/Makefile
+++ b/sound/synth/emux/Makefile
@@ -7,14 +7,6 @@
 		       emux_effect.o emux_proc.o emux_hwdep.o soundfont.o \
 		       $(if $(CONFIG_SND_SEQUENCER_OSS),emux_oss.o)
 
-#
-# this function returns:
-#   "m" - CONFIG_SND_SEQUENCER is m
-#   <empty string> - CONFIG_SND_SEQUENCER is undefined
-#   otherwise parameter #1 value
-#
-sequencer = $(if $(subst y,,$(CONFIG_SND_SEQUENCER)),$(if $(1),m),$(if $(CONFIG_SND_SEQUENCER),$(1)))
-
 # Toplevel Module Dependencies
-obj-$(call sequencer,$(CONFIG_SND_SBAWE)) += snd-emux-synth.o
-obj-$(call sequencer,$(CONFIG_SND_EMU10K1)) += snd-emux-synth.o
+obj-$(CONFIG_SND_SBAWE_SEQ) += snd-emux-synth.o
+obj-$(CONFIG_SND_EMU10K1_SEQ) += snd-emux-synth.o
diff --git a/sound/usb/caiaq/audio.c b/sound/usb/caiaq/audio.c
index b13ce76..b144513 100644
--- a/sound/usb/caiaq/audio.c
+++ b/sound/usb/caiaq/audio.c
@@ -42,10 +42,10 @@
 	(stream << 1) | (~(i / (dev->n_streams * BYTES_PER_SAMPLE_USB)) & 1)
 
 static struct snd_pcm_hardware snd_usb_caiaq_pcm_hardware = {
-	.info 		= (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | 
+	.info 		= (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED |
 			   SNDRV_PCM_INFO_BLOCK_TRANSFER),
 	.formats 	= SNDRV_PCM_FMTBIT_S24_3BE,
-	.rates 		= (SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000 | 
+	.rates 		= (SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000 |
 			   SNDRV_PCM_RATE_96000),
 	.rate_min	= 44100,
 	.rate_max	= 0, /* will overwrite later */
@@ -68,7 +68,7 @@
 		dev->sub_capture[sub->number] = sub;
 }
 
-static void 
+static void
 deactivate_substream(struct snd_usb_caiaqdev *dev,
 		     struct snd_pcm_substream *sub)
 {
@@ -118,7 +118,7 @@
 			return -EPIPE;
 		}
 	}
-	
+
 	return 0;
 }
 
@@ -129,7 +129,7 @@
 	debug("%s(%p)\n", __func__, dev);
 	if (!dev->streaming)
 		return;
-	
+
 	dev->streaming = 0;
 
 	for (i = 0; i < N_URBS; i++) {
@@ -154,7 +154,7 @@
 	debug("%s(%p)\n", __func__, substream);
 	if (all_substreams_zero(dev->sub_playback) &&
 	    all_substreams_zero(dev->sub_capture)) {
-		/* when the last client has stopped streaming, 
+		/* when the last client has stopped streaming,
 		 * all sample rates are allowed again */
 		stream_stop(dev);
 		dev->pcm_info.rates = dev->samplerates;
@@ -194,7 +194,7 @@
 	struct snd_pcm_runtime *runtime = substream->runtime;
 
 	debug("%s(%p)\n", __func__, substream);
-	
+
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
 		dev->period_out_count[index] = BYTES_PER_SAMPLE + 1;
 		dev->audio_out_buf_pos[index] = BYTES_PER_SAMPLE + 1;
@@ -205,19 +205,19 @@
 
 	if (dev->streaming)
 		return 0;
-	
+
 	/* the first client that opens a stream defines the sample rate
 	 * setting for all subsequent calls, until the last client closed. */
 	for (i=0; i < ARRAY_SIZE(rates); i++)
 		if (runtime->rate == rates[i])
 			dev->pcm_info.rates = 1 << i;
-	
+
 	snd_pcm_limit_hw_rates(runtime);
 
 	bytes_per_sample = BYTES_PER_SAMPLE;
 	if (dev->spec.data_alignment == 2)
 		bytes_per_sample++;
-	
+
 	bpp = ((runtime->rate / 8000) + CLOCK_DRIFT_TOLERANCE)
 		* bytes_per_sample * CHANNELS_PER_STREAM * dev->n_streams;
 
@@ -232,7 +232,7 @@
 	ret = stream_start(dev);
 	if (ret)
 		return ret;
-	
+
 	dev->output_running = 0;
 	wait_event_timeout(dev->prepare_wait_queue, dev->output_running, HZ);
 	if (!dev->output_running) {
@@ -273,7 +273,7 @@
 		return SNDRV_PCM_POS_XRUN;
 
 	if (sub->stream == SNDRV_PCM_STREAM_PLAYBACK)
-		return bytes_to_frames(sub->runtime, 
+		return bytes_to_frames(sub->runtime,
 					dev->audio_out_buf_pos[index]);
 	else
 		return bytes_to_frames(sub->runtime,
@@ -291,7 +291,7 @@
 	.trigger =	snd_usb_caiaq_pcm_trigger,
 	.pointer =	snd_usb_caiaq_pcm_pointer
 };
-	
+
 static void check_for_elapsed_periods(struct snd_usb_caiaqdev *dev,
 				      struct snd_pcm_substream **subs)
 {
@@ -333,7 +333,7 @@
 				struct snd_pcm_runtime *rt = sub->runtime;
 				char *audio_buf = rt->dma_area;
 				int sz = frames_to_bytes(rt, rt->buffer_size);
-				audio_buf[dev->audio_in_buf_pos[stream]++] 
+				audio_buf[dev->audio_in_buf_pos[stream]++]
 					= usb_buf[i];
 				dev->period_in_count[stream]++;
 				if (dev->audio_in_buf_pos[stream] == sz)
@@ -354,14 +354,14 @@
 
 	for (i = 0; i < iso->actual_length;) {
 		if (i % (dev->n_streams * BYTES_PER_SAMPLE_USB) == 0) {
-			for (stream = 0; 
-			     stream < dev->n_streams; 
+			for (stream = 0;
+			     stream < dev->n_streams;
 			     stream++, i++) {
 				if (dev->first_packet)
 					continue;
 
 				check_byte = MAKE_CHECKBYTE(dev, stream, i);
-				
+
 				if ((usb_buf[i] & 0x3f) != check_byte)
 					dev->input_panic = 1;
 
@@ -410,21 +410,21 @@
 	}
 
 	if ((dev->input_panic || dev->output_panic) && !dev->warned) {
-		debug("streaming error detected %s %s\n", 
+		debug("streaming error detected %s %s\n",
 				dev->input_panic ? "(input)" : "",
 				dev->output_panic ? "(output)" : "");
 		dev->warned = 1;
 	}
 }
 
-static void fill_out_urb(struct snd_usb_caiaqdev *dev, 
-			 struct urb *urb, 
+static void fill_out_urb(struct snd_usb_caiaqdev *dev,
+			 struct urb *urb,
 			 const struct usb_iso_packet_descriptor *iso)
 {
 	unsigned char *usb_buf = urb->transfer_buffer + iso->offset;
 	struct snd_pcm_substream *sub;
 	int stream, i;
-	
+
 	for (i = 0; i < iso->length;) {
 		for (stream = 0; stream < dev->n_streams; stream++, i++) {
 			sub = dev->sub_playback[stream];
@@ -444,7 +444,7 @@
 
 		/* fill in the check bytes */
 		if (dev->spec.data_alignment == 2 &&
-		    i % (dev->n_streams * BYTES_PER_SAMPLE_USB) == 
+		    i % (dev->n_streams * BYTES_PER_SAMPLE_USB) ==
 		    	(dev->n_streams * CHANNELS_PER_STREAM))
 		    for (stream = 0; stream < dev->n_streams; stream++, i++)
 		    	usb_buf[i] = MAKE_CHECKBYTE(dev, stream, i);
@@ -453,7 +453,7 @@
 
 static void read_completed(struct urb *urb)
 {
-	struct snd_usb_caiaq_cb_info *info = urb->context; 
+	struct snd_usb_caiaq_cb_info *info = urb->context;
 	struct snd_usb_caiaqdev *dev;
 	struct urb *out;
 	int frame, len, send_it = 0, outframe = 0;
@@ -478,7 +478,7 @@
 		out->iso_frame_desc[outframe].length = len;
 		out->iso_frame_desc[outframe].actual_length = 0;
 		out->iso_frame_desc[outframe].offset = BYTES_PER_FRAME * frame;
-		
+
 		if (len > 0) {
 			spin_lock(&dev->spinlock);
 			fill_out_urb(dev, out, &out->iso_frame_desc[outframe]);
@@ -497,14 +497,14 @@
 		out->transfer_flags = URB_ISO_ASAP;
 		usb_submit_urb(out, GFP_ATOMIC);
 	}
-	
+
 	/* re-submit inbound urb */
 	for (frame = 0; frame < FRAMES_PER_URB; frame++) {
 		urb->iso_frame_desc[frame].offset = BYTES_PER_FRAME * frame;
 		urb->iso_frame_desc[frame].length = BYTES_PER_FRAME;
 		urb->iso_frame_desc[frame].actual_length = 0;
 	}
-	
+
 	urb->number_of_packets = FRAMES_PER_URB;
 	urb->transfer_flags = URB_ISO_ASAP;
 	usb_submit_urb(urb, GFP_ATOMIC);
@@ -528,7 +528,7 @@
 	struct usb_device *usb_dev = dev->chip.dev;
 	unsigned int pipe;
 
-	pipe = (dir == SNDRV_PCM_STREAM_PLAYBACK) ? 
+	pipe = (dir == SNDRV_PCM_STREAM_PLAYBACK) ?
 		usb_sndisocpipe(usb_dev, ENDPOINT_PLAYBACK) :
 		usb_rcvisocpipe(usb_dev, ENDPOINT_CAPTURE);
 
@@ -547,25 +547,25 @@
 			return urbs;
 		}
 
-		urbs[i]->transfer_buffer = 
+		urbs[i]->transfer_buffer =
 			kmalloc(FRAMES_PER_URB * BYTES_PER_FRAME, GFP_KERNEL);
 		if (!urbs[i]->transfer_buffer) {
 			log("unable to kmalloc() transfer buffer, OOM!?\n");
 			*ret = -ENOMEM;
 			return urbs;
 		}
-		
+
 		for (frame = 0; frame < FRAMES_PER_URB; frame++) {
-			struct usb_iso_packet_descriptor *iso = 
+			struct usb_iso_packet_descriptor *iso =
 				&urbs[i]->iso_frame_desc[frame];
-			
+
 			iso->offset = BYTES_PER_FRAME * frame;
 			iso->length = BYTES_PER_FRAME;
 		}
-		
+
 		urbs[i]->dev = usb_dev;
 		urbs[i]->pipe = pipe;
-		urbs[i]->transfer_buffer_length = FRAMES_PER_URB 
+		urbs[i]->transfer_buffer_length = FRAMES_PER_URB
 						* BYTES_PER_FRAME;
 		urbs[i]->context = &dev->data_cb_info[i];
 		urbs[i]->interval = 1;
@@ -589,7 +589,7 @@
 	for (i = 0; i < N_URBS; i++) {
 		if (!urbs[i])
 			continue;
-		
+
 		usb_kill_urb(urbs[i]);
 		kfree(urbs[i]->transfer_buffer);
 		usb_free_urb(urbs[i]);
@@ -602,11 +602,11 @@
 {
 	int i, ret;
 
-	dev->n_audio_in  = max(dev->spec.num_analog_audio_in, 
-			       dev->spec.num_digital_audio_in) / 
+	dev->n_audio_in  = max(dev->spec.num_analog_audio_in,
+			       dev->spec.num_digital_audio_in) /
 				CHANNELS_PER_STREAM;
 	dev->n_audio_out = max(dev->spec.num_analog_audio_out,
-			       dev->spec.num_digital_audio_out) / 
+			       dev->spec.num_digital_audio_out) /
 				CHANNELS_PER_STREAM;
 	dev->n_streams = max(dev->n_audio_in, dev->n_audio_out);
 
@@ -619,7 +619,7 @@
 		return -EINVAL;
 	}
 
-	ret = snd_pcm_new(dev->chip.card, dev->product_name, 0, 
+	ret = snd_pcm_new(dev->chip.card, dev->product_name, 0,
 			dev->n_audio_out, dev->n_audio_in, &dev->pcm);
 
 	if (ret < 0) {
@@ -632,7 +632,7 @@
 
 	memset(dev->sub_playback, 0, sizeof(dev->sub_playback));
 	memset(dev->sub_capture, 0, sizeof(dev->sub_capture));
-	
+
 	memcpy(&dev->pcm_info, &snd_usb_caiaq_pcm_hardware,
 			sizeof(snd_usb_caiaq_pcm_hardware));
 
@@ -651,9 +651,9 @@
 		break;
 	}
 
-	snd_pcm_set_ops(dev->pcm, SNDRV_PCM_STREAM_PLAYBACK, 
+	snd_pcm_set_ops(dev->pcm, SNDRV_PCM_STREAM_PLAYBACK,
 				&snd_usb_caiaq_ops);
-	snd_pcm_set_ops(dev->pcm, SNDRV_PCM_STREAM_CAPTURE, 
+	snd_pcm_set_ops(dev->pcm, SNDRV_PCM_STREAM_CAPTURE,
 				&snd_usb_caiaq_ops);
 
 	snd_pcm_lib_preallocate_pages_for_all(dev->pcm,
@@ -662,7 +662,7 @@
 					MAX_BUFFER_SIZE, MAX_BUFFER_SIZE);
 
 	dev->data_cb_info =
-		kmalloc(sizeof(struct snd_usb_caiaq_cb_info) * N_URBS, 
+		kmalloc(sizeof(struct snd_usb_caiaq_cb_info) * N_URBS,
 					GFP_KERNEL);
 
 	if (!dev->data_cb_info)
@@ -672,14 +672,14 @@
 		dev->data_cb_info[i].dev = dev;
 		dev->data_cb_info[i].index = i;
 	}
-	
+
 	dev->data_urbs_in = alloc_urbs(dev, SNDRV_PCM_STREAM_CAPTURE, &ret);
 	if (ret < 0) {
 		kfree(dev->data_cb_info);
 		free_urbs(dev->data_urbs_in);
 		return ret;
 	}
-	
+
 	dev->data_urbs_out = alloc_urbs(dev, SNDRV_PCM_STREAM_PLAYBACK, &ret);
 	if (ret < 0) {
 		kfree(dev->data_cb_info);
diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c
index 515de1c..2240624 100644
--- a/sound/usb/caiaq/device.c
+++ b/sound/usb/caiaq/device.c
@@ -35,7 +35,7 @@
 #include "input.h"
 
 MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
-MODULE_DESCRIPTION("caiaq USB audio, version 1.3.14");
+MODULE_DESCRIPTION("caiaq USB audio, version 1.3.16");
 MODULE_LICENSE("GPL");
 MODULE_SUPPORTED_DEVICE("{{Native Instruments, RigKontrol2},"
 			 "{Native Instruments, RigKontrol3},"
@@ -79,7 +79,7 @@
 	{
 		.match_flags =	USB_DEVICE_ID_MATCH_DEVICE,
 		.idVendor =	USB_VID_NATIVEINSTRUMENTS,
-		.idProduct =	USB_PID_RIGKONTROL2 
+		.idProduct =	USB_PID_RIGKONTROL2
 	},
 	{
 		.match_flags =	USB_DEVICE_ID_MATCH_DEVICE,
@@ -197,7 +197,7 @@
 
 	if (buffer && len > 0)
 		memcpy(dev->ep1_out_buf+1, buffer, len);
-	
+
 	dev->ep1_out_buf[0] = command;
 	return usb_bulk_msg(usb_dev, usb_sndbulkpipe(usb_dev, 1),
 			   dev->ep1_out_buf, len+1, &actual_len, 200);
@@ -208,7 +208,7 @@
 {
 	int ret;
 	char tmp[5];
-	
+
 	switch (rate) {
 	case 44100:	tmp[0] = SAMPLERATE_44100;   break;
 	case 48000:	tmp[0] = SAMPLERATE_48000;   break;
@@ -237,12 +237,12 @@
 
 	if (ret)
 		return ret;
-	
-	if (!wait_event_timeout(dev->ep1_wait_queue, 
+
+	if (!wait_event_timeout(dev->ep1_wait_queue,
 	    dev->audio_parm_answer >= 0, HZ))
 		return -EPIPE;
-		
-	if (dev->audio_parm_answer != 1) 
+
+	if (dev->audio_parm_answer != 1)
 		debug("unable to set the device's audio params\n");
 	else
 		dev->bpp = bpp;
@@ -250,8 +250,8 @@
 	return dev->audio_parm_answer == 1 ? 0 : -EINVAL;
 }
 
-int snd_usb_caiaq_set_auto_msg (struct snd_usb_caiaqdev *dev, 
-				int digital, int analog, int erp)
+int snd_usb_caiaq_set_auto_msg(struct snd_usb_caiaqdev *dev,
+			       int digital, int analog, int erp)
 {
 	char tmp[3] = { digital, analog, erp };
 	return snd_usb_caiaq_send_command(dev, EP1_CMD_AUTO_MSG,
@@ -262,7 +262,7 @@
 {
 	int ret;
 	char val[4];
-	
+
 	/* device-specific startup specials */
 	switch (dev->chip.usb_id) {
 	case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_RIGKONTROL2):
@@ -314,7 +314,7 @@
 			dev->control_state, 1);
 		break;
 	}
-	
+
 	if (dev->spec.num_analog_audio_out +
 	    dev->spec.num_analog_audio_in +
 	    dev->spec.num_digital_audio_out +
@@ -323,7 +323,7 @@
 		if (ret < 0)
 			log("Unable to set up audio system (ret=%d)\n", ret);
 	}
-	
+
 	if (dev->spec.num_midi_in +
 	    dev->spec.num_midi_out > 0) {
 		ret = snd_usb_caiaq_midi_init(dev);
@@ -363,7 +363,7 @@
 	if (devnum >= SNDRV_CARDS)
 		return -ENODEV;
 
-	err = snd_card_create(index[devnum], id[devnum], THIS_MODULE, 
+	err = snd_card_create(index[devnum], id[devnum], THIS_MODULE,
 			      sizeof(struct snd_usb_caiaqdev), &card);
 	if (err < 0)
 		return err;
@@ -382,11 +382,11 @@
 
 static int __devinit init_card(struct snd_usb_caiaqdev *dev)
 {
-	char *c;
+	char *c, usbpath[32];
 	struct usb_device *usb_dev = dev->chip.dev;
 	struct snd_card *card = dev->chip.card;
 	int err, len;
-	
+
 	if (usb_set_interface(usb_dev, 0, 1) != 0) {
 		log("can't set alt interface.\n");
 		return -EIO;
@@ -395,19 +395,19 @@
 	usb_init_urb(&dev->ep1_in_urb);
 	usb_init_urb(&dev->midi_out_urb);
 
-	usb_fill_bulk_urb(&dev->ep1_in_urb, usb_dev, 
+	usb_fill_bulk_urb(&dev->ep1_in_urb, usb_dev,
 			  usb_rcvbulkpipe(usb_dev, 0x1),
-			  dev->ep1_in_buf, EP1_BUFSIZE, 
+			  dev->ep1_in_buf, EP1_BUFSIZE,
 			  usb_ep1_command_reply_dispatch, dev);
 
-	usb_fill_bulk_urb(&dev->midi_out_urb, usb_dev, 
+	usb_fill_bulk_urb(&dev->midi_out_urb, usb_dev,
 			  usb_sndbulkpipe(usb_dev, 0x1),
-			  dev->midi_out_buf, EP1_BUFSIZE, 
+			  dev->midi_out_buf, EP1_BUFSIZE,
 			  snd_usb_caiaq_midi_output_done, dev);
-	
+
 	init_waitqueue_head(&dev->ep1_wait_queue);
 	init_waitqueue_head(&dev->prepare_wait_queue);
-	
+
 	if (usb_submit_urb(&dev->ep1_in_urb, GFP_KERNEL) != 0)
 		return -EIO;
 
@@ -420,47 +420,52 @@
 
 	usb_string(usb_dev, usb_dev->descriptor.iManufacturer,
 		   dev->vendor_name, CAIAQ_USB_STR_LEN);
-	
+
 	usb_string(usb_dev, usb_dev->descriptor.iProduct,
 		   dev->product_name, CAIAQ_USB_STR_LEN);
-	
-	usb_string(usb_dev, usb_dev->descriptor.iSerialNumber,
-		   dev->serial, CAIAQ_USB_STR_LEN);
 
-	/* terminate serial string at first white space occurence */
-	c = strchr(dev->serial, ' ');
-	if (c)
-		*c = '\0';
-	
-	strcpy(card->driver, MODNAME);
-	strcpy(card->shortname, dev->product_name);
+	strlcpy(card->driver, MODNAME, sizeof(card->driver));
+	strlcpy(card->shortname, dev->product_name, sizeof(card->shortname));
+	strlcpy(card->mixername, dev->product_name, sizeof(card->mixername));
 
-	len = snprintf(card->longname, sizeof(card->longname),
-		       "%s %s (serial %s, ",
-		       dev->vendor_name, dev->product_name, dev->serial);
+	/* if the id was not passed as module option, fill it with a shortened
+	 * version of the product string which does not contain any
+	 * whitespaces */
 
-	if (len < sizeof(card->longname) - 2)
-		len += usb_make_path(usb_dev, card->longname + len,
-				     sizeof(card->longname) - len);
+	if (*card->id == '\0') {
+		char id[sizeof(card->id)];
 
-	card->longname[len++] = ')';
-	card->longname[len] = '\0';
+		memset(id, 0, sizeof(id));
+
+		for (c = card->shortname, len = 0;
+			*c && len < sizeof(card->id); c++)
+			if (*c != ' ')
+				id[len++] = *c;
+
+		snd_card_set_id(card, id);
+	}
+
+	usb_make_path(usb_dev, usbpath, sizeof(usbpath));
+	snprintf(card->longname, sizeof(card->longname),
+		       "%s %s (%s)",
+		       dev->vendor_name, dev->product_name, usbpath);
+
 	setup_card(dev);
 	return 0;
 }
 
-static int __devinit snd_probe(struct usb_interface *intf, 
+static int __devinit snd_probe(struct usb_interface *intf,
 		     const struct usb_device_id *id)
 {
 	int ret;
 	struct snd_card *card;
 	struct usb_device *device = interface_to_usbdev(intf);
-	
+
 	ret = create_card(device, &card);
-	
+
 	if (ret < 0)
 		return ret;
-			
+
 	usb_set_intfdata(intf, card);
 	ret = init_card(caiaqdev(card));
 	if (ret < 0) {
@@ -468,7 +473,7 @@
 		snd_card_free(card);
 		return ret;
 	}
-	
+
 	return 0;
 }
 
@@ -489,10 +494,10 @@
 	snd_usb_caiaq_input_free(dev);
 #endif
 	snd_usb_caiaq_audio_free(dev);
-	
+
 	usb_kill_urb(&dev->ep1_in_urb);
 	usb_kill_urb(&dev->midi_out_urb);
-	
+
 	snd_card_free(card);
 	usb_reset_device(interface_to_usbdev(intf));
 }
diff --git a/sound/usb/caiaq/device.h b/sound/usb/caiaq/device.h
index 4cce1ad..ece7351 100644
--- a/sound/usb/caiaq/device.h
+++ b/sound/usb/caiaq/device.h
@@ -81,7 +81,6 @@
 
 	char vendor_name[CAIAQ_USB_STR_LEN];
 	char product_name[CAIAQ_USB_STR_LEN];
-	char serial[CAIAQ_USB_STR_LEN];
 
 	int n_streams, n_audio_in, n_audio_out;
 	int streaming, first_packet, output_running;
diff --git a/sound/usb/caiaq/midi.c b/sound/usb/caiaq/midi.c
index 8fa8cd8..538e8c0 100644
--- a/sound/usb/caiaq/midi.c
+++ b/sound/usb/caiaq/midi.c
@@ -40,7 +40,7 @@
 
 	if (!dev)
 		return;
-	
+
 	dev->midi_receive_substream = up ? substream : NULL;
 }
 
@@ -64,18 +64,18 @@
 				    struct snd_rawmidi_substream *substream)
 {
 	int len, ret;
-	
+
 	dev->midi_out_buf[0] = EP1_CMD_MIDI_WRITE;
 	dev->midi_out_buf[1] = 0; /* port */
 	len = snd_rawmidi_transmit(substream, dev->midi_out_buf + 3,
 				   EP1_BUFSIZE - 3);
-	
+
 	if (len <= 0)
 		return;
-	
+
 	dev->midi_out_buf[2] = len;
 	dev->midi_out_urb.transfer_buffer_length = len+3;
-	
+
 	ret = usb_submit_urb(&dev->midi_out_urb, GFP_ATOMIC);
 	if (ret < 0)
 		log("snd_usb_caiaq_midi_send(%p): usb_submit_urb() failed,"
@@ -88,7 +88,7 @@
 static void snd_usb_caiaq_midi_output_trigger(struct snd_rawmidi_substream *substream, int up)
 {
 	struct snd_usb_caiaqdev *dev = substream->rmidi->private_data;
-	
+
 	if (up) {
 		dev->midi_out_substream = substream;
 		if (!dev->midi_out_active)
@@ -113,12 +113,12 @@
 	.trigger =      snd_usb_caiaq_midi_input_trigger,
 };
 
-void snd_usb_caiaq_midi_handle_input(struct snd_usb_caiaqdev *dev, 
+void snd_usb_caiaq_midi_handle_input(struct snd_usb_caiaqdev *dev,
 				     int port, const char *buf, int len)
 {
 	if (!dev->midi_receive_substream)
 		return;
-	
+
 	snd_rawmidi_receive(dev->midi_receive_substream, buf, len);
 }
 
@@ -142,16 +142,16 @@
 
 	if (device->spec.num_midi_out > 0) {
 		rmidi->info_flags |= SNDRV_RAWMIDI_INFO_OUTPUT;
-		snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT, 
+		snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT,
 				    &snd_usb_caiaq_midi_output);
 	}
 
 	if (device->spec.num_midi_in > 0) {
 		rmidi->info_flags |= SNDRV_RAWMIDI_INFO_INPUT;
-		snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT, 
+		snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT,
 				    &snd_usb_caiaq_midi_input);
 	}
-	
+
 	device->rmidi = rmidi;
 
 	return 0;
@@ -160,7 +160,7 @@
 void snd_usb_caiaq_midi_output_done(struct urb* urb)
 {
 	struct snd_usb_caiaqdev *dev = urb->context;
-	
+
 	dev->midi_out_active = 0;
 	if (urb->status != 0)
 		return;
diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c
index a6b8848..c7b9023 100644
--- a/sound/usb/usbaudio.c
+++ b/sound/usb/usbaudio.c
@@ -627,6 +627,7 @@
 	subs->hwptr_done += offs;
 	if (subs->hwptr_done >= runtime->buffer_size)
 		subs->hwptr_done -= runtime->buffer_size;
+	runtime->delay += offs;
 	spin_unlock_irqrestore(&subs->lock, flags);
 	urb->transfer_buffer_length = offs * stride;
 	if (period_elapsed)
@@ -636,12 +637,22 @@
 
 /*
  * process after playback data complete
- * - nothing to do
+ * - decrease the delay count again
  */
 static int retire_playback_urb(struct snd_usb_substream *subs,
 			       struct snd_pcm_runtime *runtime,
 			       struct urb *urb)
 {
+	unsigned long flags;
+	int stride = runtime->frame_bits >> 3;
+	int processed = urb->transfer_buffer_length / stride;
+
+	spin_lock_irqsave(&subs->lock, flags);
+	if (processed > runtime->delay)
+		runtime->delay = 0;
+	else
+		runtime->delay -= processed;
+	spin_unlock_irqrestore(&subs->lock, flags);
 	return 0;
 }
 
@@ -1520,6 +1531,7 @@
 	subs->hwptr_done = 0;
 	subs->transfer_done = 0;
 	subs->phase = 0;
+	runtime->delay = 0;
 
 	/* clear urbs (to be sure) */
 	deactivate_urbs(subs, 0, 1);
@@ -3279,6 +3291,25 @@
 	return snd_usb_cm106_write_int_reg(dev, 2, 0x8004);
 }
 
+/*
+ * C-Media CM6206 is based on CM106 with two additional
+ * registers that are not documented in the data sheet.
+ * Values here are chosen based on sniffing USB traffic
+ * under Windows.
+ */
+static int snd_usb_cm6206_boot_quirk(struct usb_device *dev)
+{
+	int err, reg;
+	int val[] = {0x200c, 0x3000, 0xf800, 0x143f, 0x0000, 0x3000};
+
+	for (reg = 0; reg < ARRAY_SIZE(val); reg++) {
+		err = snd_usb_cm106_write_int_reg(dev, reg, val[reg]);
+		if (err < 0)
+			return err;
+	}
+
+	return err;
+}
 
 /*
  * Setup quirks
@@ -3565,6 +3596,12 @@
 			goto __err_val;
 	}
 
+	/* C-Media CM6206 / CM106-Like Sound Device */
+	if (id == USB_ID(0x0d8c, 0x0102)) {
+		if (snd_usb_cm6206_boot_quirk(dev) < 0)
+			goto __err_val;
+	}
+
 	/*
 	 * found a config.  now register to ALSA
 	 */
diff --git a/sound/usb/usbquirks.h b/sound/usb/usbquirks.h
index 5d955aa..f0f7624 100644
--- a/sound/usb/usbquirks.h
+++ b/sound/usb/usbquirks.h
@@ -1470,6 +1470,41 @@
 	}
 },
 {
+	/* Edirol M-16DX */
+	/* FIXME: This quirk gives a good-working capture stream but the
+	 *        playback seems problematic because of lacking of sync
+	 *        with capture stream.  It needs to sync with the capture
+	 *        clock.  As now, you'll get frequent sound distortions
+	 *        via the playback.
+	 */
+	USB_DEVICE(0x0582, 0x00c4),
+	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
+		.ifnum = QUIRK_ANY_INTERFACE,
+		.type = QUIRK_COMPOSITE,
+		.data = (const struct snd_usb_audio_quirk[]) {
+			{
+				.ifnum = 0,
+				.type = QUIRK_AUDIO_STANDARD_INTERFACE
+			},
+			{
+				.ifnum = 1,
+				.type = QUIRK_AUDIO_STANDARD_INTERFACE
+			},
+			{
+				.ifnum = 2,
+				.type = QUIRK_MIDI_FIXED_ENDPOINT,
+				.data = & (const struct snd_usb_midi_endpoint_info) {
+					.out_cables = 0x0001,
+					.in_cables  = 0x0001
+				}
+			},
+			{
+				.ifnum = -1
+			}
+		}
+	}
+},
+{
 	/* BOSS GT-10 */
 	USB_DEVICE(0x0582, 0x00da),
 	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
@@ -1951,6 +1986,14 @@
 	}
 },
 {
+	USB_DEVICE(0x0ccd, 0x0028),
+	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
+		.vendor_name = "TerraTec",
+		.product_name = "Aureon 5.1 MkII",
+		.ifnum = QUIRK_NO_INTERFACE
+	}
+},
+{
 	USB_DEVICE(0x0ccd, 0x0035),
 	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
 		.vendor_name = "Miditech",
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
new file mode 100644
index 0000000..d69a759
--- /dev/null
+++ b/tools/perf/.gitignore
@@ -0,0 +1,16 @@
+PERF-BUILD-OPTIONS
+PERF-CFLAGS
+PERF-GUI-VARS
+PERF-VERSION-FILE
+perf
+perf-help
+perf-record
+perf-report
+perf-stat
+perf-top
+perf*.1
+perf*.xml
+common-cmds.h
+tags
+TAGS
+cscope*
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
new file mode 100644
index 0000000..5457192
--- /dev/null
+++ b/tools/perf/Documentation/Makefile
@@ -0,0 +1,300 @@
+MAN1_TXT= \
+	$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
+		$(wildcard perf-*.txt)) \
+	perf.txt
+MAN5_TXT=
+MAN7_TXT=
+
+MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT)
+MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
+MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
+
+DOC_HTML=$(MAN_HTML)
+
+ARTICLES =
+# with their own formatting rules.
+SP_ARTICLES =
+API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technical/api-index.txt, $(wildcard technical/api-*.txt)))
+SP_ARTICLES += $(API_DOCS)
+SP_ARTICLES += technical/api-index
+
+DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
+
+DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
+DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
+DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
+
+prefix?=$(HOME)
+bindir?=$(prefix)/bin
+htmldir?=$(prefix)/share/doc/perf-doc
+pdfdir?=$(prefix)/share/doc/perf-doc
+mandir?=$(prefix)/share/man
+man1dir=$(mandir)/man1
+man5dir=$(mandir)/man5
+man7dir=$(mandir)/man7
+# DESTDIR=
+
+ASCIIDOC=asciidoc
+ASCIIDOC_EXTRA =
+MANPAGE_XSL = manpage-normal.xsl
+XMLTO_EXTRA =
+INSTALL?=install
+RM ?= rm -f
+DOC_REF = origin/man
+HTML_REF = origin/html
+
+infodir?=$(prefix)/share/info
+MAKEINFO=makeinfo
+INSTALL_INFO=install-info
+DOCBOOK2X_TEXI=docbook2x-texi
+DBLATEX=dblatex
+ifndef PERL_PATH
+	PERL_PATH = /usr/bin/perl
+endif
+
+-include ../config.mak.autogen
+-include ../config.mak
+
+#
+# For asciidoc ...
+#	-7.1.2,	no extra settings are needed.
+#	8.0-,	set ASCIIDOC8.
+#
+
+#
+# For docbook-xsl ...
+#	-1.68.1,	set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0)
+#	1.69.0,		no extra settings are needed?
+#	1.69.1-1.71.0,	set DOCBOOK_SUPPRESS_SP?
+#	1.71.1,		no extra settings are needed?
+#	1.72.0,		set DOCBOOK_XSL_172.
+#	1.73.0-,	set ASCIIDOC_NO_ROFF
+#
+
+#
+# If you had been using DOCBOOK_XSL_172 in an attempt to get rid
+# of 'the ".ft C" problem' in your generated manpages, and you
+# instead ended up with weird characters around callouts, try
+# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8).
+#
+
+ifdef ASCIIDOC8
+ASCIIDOC_EXTRA += -a asciidoc7compatible
+endif
+ifdef DOCBOOK_XSL_172
+ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
+MANPAGE_XSL = manpage-1.72.xsl
+else
+	ifdef ASCIIDOC_NO_ROFF
+	# docbook-xsl after 1.72 needs the regular XSL, but will not
+	# pass-thru raw roff codes from asciidoc.conf, so turn them off.
+	ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
+	endif
+endif
+ifdef MAN_BOLD_LITERAL
+XMLTO_EXTRA += -m manpage-bold-literal.xsl
+endif
+ifdef DOCBOOK_SUPPRESS_SP
+XMLTO_EXTRA += -m manpage-suppress-sp.xsl
+endif
+
+SHELL_PATH ?= $(SHELL)
+# Shell quote;
+SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
+
+#
+# Please note that there is a minor bug in asciidoc.
+# The version after 6.0.3 _will_ include the patch found here:
+#   http://marc.theaimsgroup.com/?l=perf&m=111558757202243&w=2
+#
+# Until that version is released you may have to apply the patch
+# yourself - yes, all 6 characters of it!
+#
+
+QUIET_SUBDIR0  = +$(MAKE) -C # space to separate -C and subdir
+QUIET_SUBDIR1  =
+
+ifneq ($(findstring $(MAKEFLAGS),w),w)
+PRINT_DIR = --no-print-directory
+else # "make -w"
+NO_SUBDIR = :
+endif
+
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+ifndef V
+	QUIET_ASCIIDOC	= @echo '   ' ASCIIDOC $@;
+	QUIET_XMLTO	= @echo '   ' XMLTO $@;
+	QUIET_DB2TEXI	= @echo '   ' DB2TEXI $@;
+	QUIET_MAKEINFO	= @echo '   ' MAKEINFO $@;
+	QUIET_DBLATEX	= @echo '   ' DBLATEX $@;
+	QUIET_XSLTPROC	= @echo '   ' XSLTPROC $@;
+	QUIET_GEN	= @echo '   ' GEN $@;
+	QUIET_STDERR	= 2> /dev/null
+	QUIET_SUBDIR0	= +@subdir=
+	QUIET_SUBDIR1	= ;$(NO_SUBDIR) echo '   ' SUBDIR $$subdir; \
+			  $(MAKE) $(PRINT_DIR) -C $$subdir
+	export V
+endif
+endif
+
+all: html man
+
+html: $(DOC_HTML)
+
+$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7): asciidoc.conf
+
+man: man1 man5 man7
+man1: $(DOC_MAN1)
+man5: $(DOC_MAN5)
+man7: $(DOC_MAN7)
+
+info: perf.info perfman.info
+
+pdf: user-manual.pdf
+
+install: install-man
+
+install-man: man
+	$(INSTALL) -d -m 755 $(DESTDIR)$(man1dir)
+#	$(INSTALL) -d -m 755 $(DESTDIR)$(man5dir)
+#	$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
+	$(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir)
+#	$(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir)
+#	$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
+
+install-info: info
+	$(INSTALL) -d -m 755 $(DESTDIR)$(infodir)
+	$(INSTALL) -m 644 perf.info perfman.info $(DESTDIR)$(infodir)
+	if test -r $(DESTDIR)$(infodir)/dir; then \
+	  $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
+	  $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
+	else \
+	  echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \
+	fi
+
+install-pdf: pdf
+	$(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir)
+	$(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir)
+
+install-html: html
+	'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
+
+../PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
+	$(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) PERF-VERSION-FILE
+
+-include ../PERF-VERSION-FILE
+
+#
+# Determine "include::" file references in asciidoc files.
+#
+doc.dep : $(wildcard *.txt) build-docdep.perl
+	$(QUIET_GEN)$(RM) $@+ $@ && \
+	$(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
+	mv $@+ $@
+
+-include doc.dep
+
+cmds_txt = cmds-ancillaryinterrogators.txt \
+	cmds-ancillarymanipulators.txt \
+	cmds-mainporcelain.txt \
+	cmds-plumbinginterrogators.txt \
+	cmds-plumbingmanipulators.txt \
+	cmds-synchingrepositories.txt \
+	cmds-synchelpers.txt \
+	cmds-purehelpers.txt \
+	cmds-foreignscminterface.txt
+
+$(cmds_txt): cmd-list.made
+
+cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
+	$(QUIET_GEN)$(RM) $@ && \
+	$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
+	date >$@
+
+clean:
+	$(RM) *.xml *.xml+ *.html *.html+ *.1 *.5 *.7
+	$(RM) *.texi *.texi+ *.texi++ perf.info perfman.info
+	$(RM) howto-index.txt howto/*.html doc.dep
+	$(RM) technical/api-*.html technical/api-index.txt
+	$(RM) $(cmds_txt) *.made
+
+$(MAN_HTML): %.html : %.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	$(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \
+		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+	mv $@+ $@
+
+%.1 %.5 %.7 : %.xml
+	$(QUIET_XMLTO)$(RM) $@ && \
+	xmlto -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+
+%.xml : %.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	$(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
+		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+	mv $@+ $@
+
+XSLT = docbook.xsl
+XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css
+
+user-manual.html: user-manual.xml
+	$(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $<
+
+perf.info: user-manual.texi
+	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi
+
+user-manual.texi: user-manual.xml
+	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+	$(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
+	$(PERL_PATH) fix-texi.perl <$@++ >$@+ && \
+	rm $@++ && \
+	mv $@+ $@
+
+user-manual.pdf: user-manual.xml
+	$(QUIET_DBLATEX)$(RM) $@+ $@ && \
+	$(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \
+	mv $@+ $@
+
+perfman.texi: $(MAN_XML) cat-texi.perl
+	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+	($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \
+		--to-stdout $(xml) &&) true) > $@++ && \
+	$(PERL_PATH) cat-texi.perl $@ <$@++ >$@+ && \
+	rm $@++ && \
+	mv $@+ $@
+
+perfman.info: perfman.texi
+	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi
+
+$(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
+	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+	$(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \
+	mv $@+ $@
+
+howto-index.txt: howto-index.sh $(wildcard howto/*.txt)
+	$(QUIET_GEN)$(RM) $@+ $@ && \
+	'$(SHELL_PATH_SQ)' ./howto-index.sh $(wildcard howto/*.txt) >$@+ && \
+	mv $@+ $@
+
+$(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
+	$(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt
+
+WEBDOC_DEST = /pub/software/tools/perf/docs
+
+$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \
+	mv $@+ $@
+
+install-webdoc : html
+	'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST)
+
+quick-install: quick-install-man
+
+quick-install-man:
+	'$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir)
+
+quick-install-html:
+	'$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
+
+.PHONY: .FORCE-PERF-VERSION-FILE
diff --git a/tools/perf/Documentation/asciidoc.conf b/tools/perf/Documentation/asciidoc.conf
new file mode 100644
index 0000000..356b23a
--- /dev/null
+++ b/tools/perf/Documentation/asciidoc.conf
@@ -0,0 +1,91 @@
+## linkperf: macro
+#
+# Usage: linkperf:command[manpage-section]
+#
+# Note, {0} is the manpage section, while {target} is the command.
+#
+# Show PERF link as: <command>(<section>); if section is defined, else just show
+# the command.
+
+[macros]
+(?su)[\\]?(?P<name>linkperf):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
+
+[attributes]
+asterisk=&#42;
+plus=&#43;
+caret=&#94;
+startsb=&#91;
+endsb=&#93;
+tilde=&#126;
+
+ifdef::backend-docbook[]
+[linkperf-inlinemacro]
+{0%{target}}
+{0#<citerefentry>}
+{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>}
+{0#</citerefentry>}
+endif::backend-docbook[]
+
+ifdef::backend-docbook[]
+ifndef::perf-asciidoc-no-roff[]
+# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this.
+# v1.72 breaks with this because it replaces dots not in roff requests.
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+ifdef::doctype-manpage[]
+&#10;.ft C&#10;
+endif::doctype-manpage[]
+|
+ifdef::doctype-manpage[]
+&#10;.ft&#10;
+endif::doctype-manpage[]
+</literallayout>
+{title#}</example>
+endif::perf-asciidoc-no-roff[]
+
+ifdef::perf-asciidoc-no-roff[]
+ifdef::doctype-manpage[]
+# The following two small workarounds insert a simple paragraph after screen
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+|
+</literallayout><simpara></simpara>
+{title#}</example>
+
+[verseblock]
+<formalpara{id? id="{id}"}><title>{title}</title><para>
+{title%}<literallayout{id? id="{id}"}>
+{title#}<literallayout>
+|
+</literallayout>
+{title#}</para></formalpara>
+{title%}<simpara></simpara>
+endif::doctype-manpage[]
+endif::perf-asciidoc-no-roff[]
+endif::backend-docbook[]
+
+ifdef::doctype-manpage[]
+ifdef::backend-docbook[]
+[header]
+template::[header-declarations]
+<refentry>
+<refmeta>
+<refentrytitle>{mantitle}</refentrytitle>
+<manvolnum>{manvolnum}</manvolnum>
+<refmiscinfo class="source">perf</refmiscinfo>
+<refmiscinfo class="version">{perf_version}</refmiscinfo>
+<refmiscinfo class="manual">perf Manual</refmiscinfo>
+</refmeta>
+<refnamediv>
+  <refname>{manname}</refname>
+  <refpurpose>{manpurpose}</refpurpose>
+</refnamediv>
+endif::backend-docbook[]
+endif::doctype-manpage[]
+
+ifdef::backend-xhtml11[]
+[linkperf-inlinemacro]
+<a href="{target}.html">{target}{0?({0})}</a>
+endif::backend-xhtml11[]
diff --git a/tools/perf/Documentation/manpage-1.72.xsl b/tools/perf/Documentation/manpage-1.72.xsl
new file mode 100644
index 0000000..b4d315c
--- /dev/null
+++ b/tools/perf/Documentation/manpage-1.72.xsl
@@ -0,0 +1,14 @@
+<!-- manpage-1.72.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles peculiarities in docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the special values for the roff control characters
+     needed for docbook-xsl 1.72.0 -->
+<xsl:param name="git.docbook.backslash">&#x2593;</xsl:param>
+<xsl:param name="git.docbook.dot"      >&#x2302;</xsl:param>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/manpage-base.xsl b/tools/perf/Documentation/manpage-base.xsl
new file mode 100644
index 0000000..a264fa61
--- /dev/null
+++ b/tools/perf/Documentation/manpage-base.xsl
@@ -0,0 +1,35 @@
+<!-- manpage-base.xsl:
+     special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- these params silence some output from xmlto -->
+<xsl:param name="man.output.quietly" select="1"/>
+<xsl:param name="refentry.meta.get.quietly" select="1"/>
+
+<!-- convert asciidoc callouts to man page format;
+     git.docbook.backslash and git.docbook.dot params
+     must be supplied by another XSL file or other means -->
+<xsl:template match="co">
+	<xsl:value-of select="concat(
+			      $git.docbook.backslash,'fB(',
+			      substring-after(@id,'-'),')',
+			      $git.docbook.backslash,'fR')"/>
+</xsl:template>
+<xsl:template match="calloutlist">
+	<xsl:value-of select="$git.docbook.dot"/>
+	<xsl:text>sp&#10;</xsl:text>
+	<xsl:apply-templates/>
+	<xsl:text>&#10;</xsl:text>
+</xsl:template>
+<xsl:template match="callout">
+	<xsl:value-of select="concat(
+			      $git.docbook.backslash,'fB',
+			      substring-after(@arearefs,'-'),
+			      '. ',$git.docbook.backslash,'fR')"/>
+	<xsl:apply-templates/>
+	<xsl:value-of select="$git.docbook.dot"/>
+	<xsl:text>br&#10;</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/manpage-bold-literal.xsl b/tools/perf/Documentation/manpage-bold-literal.xsl
new file mode 100644
index 0000000..608eb5d
--- /dev/null
+++ b/tools/perf/Documentation/manpage-bold-literal.xsl
@@ -0,0 +1,17 @@
+<!-- manpage-bold-literal.xsl:
+     special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- render literal text as bold (instead of plain or monospace);
+     this makes literal text easier to distinguish in manpages
+     viewed on a tty -->
+<xsl:template match="literal">
+	<xsl:value-of select="$git.docbook.backslash"/>
+	<xsl:text>fB</xsl:text>
+	<xsl:apply-templates/>
+	<xsl:value-of select="$git.docbook.backslash"/>
+	<xsl:text>fR</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/manpage-normal.xsl b/tools/perf/Documentation/manpage-normal.xsl
new file mode 100644
index 0000000..a48f5b1
--- /dev/null
+++ b/tools/perf/Documentation/manpage-normal.xsl
@@ -0,0 +1,13 @@
+<!-- manpage-normal.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles anything we want to keep away from docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the normal values for the roff control characters -->
+<xsl:param name="git.docbook.backslash">\</xsl:param>
+<xsl:param name="git.docbook.dot"	>.</xsl:param>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/manpage-suppress-sp.xsl b/tools/perf/Documentation/manpage-suppress-sp.xsl
new file mode 100644
index 0000000..a63c763
--- /dev/null
+++ b/tools/perf/Documentation/manpage-suppress-sp.xsl
@@ -0,0 +1,21 @@
+<!-- manpage-suppress-sp.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles erroneous, inline .sp in manpage output of some
+     versions of docbook-xsl -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- attempt to work around spurious .sp at the tail of the line
+     that some versions of docbook stylesheets seem to add -->
+<xsl:template match="simpara">
+  <xsl:variable name="content">
+    <xsl:apply-templates/>
+  </xsl:variable>
+  <xsl:value-of select="normalize-space($content)"/>
+  <xsl:if test="not(ancestor::authorblurb) and
+                not(ancestor::personblurb)">
+    <xsl:text>&#10;&#10;</xsl:text>
+  </xsl:if>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
new file mode 100644
index 0000000..c9dcade
--- /dev/null
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -0,0 +1,29 @@
+perf-annotate(1)
+==============
+
+NAME
+----
+perf-annotate - Read perf.data (created by perf record) and display annotated code
+
+SYNOPSIS
+--------
+[verse]
+'perf annotate' [-i <file> | --input=file] symbol_name
+
+DESCRIPTION
+-----------
+This command reads the input file and displays an annotated version of the
+code. If the object file has debug symbols then the source code will be
+displayed alongside assembly code.
+
+If there is no debug info in the object, then annotated assembly is displayed.
+
+OPTIONS
+-------
+-i::
+--input=::
+        Input file name. (default: perf.data)
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-help.txt b/tools/perf/Documentation/perf-help.txt
new file mode 100644
index 0000000..5143918
--- /dev/null
+++ b/tools/perf/Documentation/perf-help.txt
@@ -0,0 +1,38 @@
+perf-help(1)
+============
+
+NAME
+----
+perf-help - display help information about perf
+
+SYNOPSIS
+--------
+'perf help' [-a|--all] [COMMAND]
+
+DESCRIPTION
+-----------
+
+With no options and no COMMAND given, the synopsis of the 'perf'
+command and a list of the most commonly used perf commands are printed
+on the standard output.
+
+If the option '--all' or '-a' is given, then all available commands are
+printed on the standard output.
+
+If a perf command is named, a manual page for that command is brought
+up. The 'man' program is used by default for this purpose, but this
+can be overridden by other options or configuration variables.
+
+Note that `perf --help ...` is identical to `perf help ...` because the
+former is internally converted into the latter.
+
+OPTIONS
+-------
+-a::
+--all::
+	Prints all the available commands on the standard output. This
+	option supersedes any other option.
+
+PERF
+----
+Part of the linkperf:perf[1] suite
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
new file mode 100644
index 0000000..8290b94
--- /dev/null
+++ b/tools/perf/Documentation/perf-list.txt
@@ -0,0 +1,25 @@
+perf-list(1)
+============
+
+NAME
+----
+perf-list - List all symbolic event types
+
+SYNOPSIS
+--------
+[verse]
+'perf list'
+
+DESCRIPTION
+-----------
+This command displays the symbolic event types which can be selected in the
+various perf commands with the -e option.
+
+OPTIONS
+-------
+None
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-top[1],
+linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
new file mode 100644
index 0000000..1dbc1ee
--- /dev/null
+++ b/tools/perf/Documentation/perf-record.txt
@@ -0,0 +1,42 @@
+perf-record(1)
+==============
+
+NAME
+----
+perf-record - Run a command and record its profile into perf.data
+
+SYNOPSIS
+--------
+[verse]
+'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
+'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>]
+
+DESCRIPTION
+-----------
+This command runs a command and gathers a performance counter profile
+from it, into perf.data - without displaying anything.
+
+This file can then be inspected later on, using 'perf report'.
+
+
+OPTIONS
+-------
+<command>...::
+	Any command you can specify in a shell.
+
+-e::
+--event=::
+	Select the PMU event. Selection can be a symbolic event name
+	(use 'perf list' to list all events) or a raw PMU
+	event (eventsel+umask) in the form of rNNN where NNN is a
+	 hexadecimal event descriptor.
+
+-a::
+        system-wide collection
+
+-l::
+        scale counter values
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
new file mode 100644
index 0000000..52d3fc6
--- /dev/null
+++ b/tools/perf/Documentation/perf-report.txt
@@ -0,0 +1,26 @@
+perf-report(1)
+==============
+
+NAME
+----
+perf-report - Read perf.data (created by perf record) and display the profile
+
+SYNOPSIS
+--------
+[verse]
+'perf report' [-i <file> | --input=file]
+
+DESCRIPTION
+-----------
+This command displays the performance counter profile information recorded
+via perf report.
+
+OPTIONS
+-------
+-i::
+--input=::
+        Input file name. (default: perf.data)
+
+SEE ALSO
+--------
+linkperf:perf-stat[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
new file mode 100644
index 0000000..c368a72
--- /dev/null
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -0,0 +1,66 @@
+perf-stat(1)
+============
+
+NAME
+----
+perf-stat - Run a command and gather performance counter statistics
+
+SYNOPSIS
+--------
+[verse]
+'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
+'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>]
+
+DESCRIPTION
+-----------
+This command runs a command and gathers performance counter statistics
+from it.
+
+
+OPTIONS
+-------
+<command>...::
+	Any command you can specify in a shell.
+
+
+-e::
+--event=::
+	Select the PMU event. Selection can be a symbolic event name
+	(use 'perf list' to list all events) or a raw PMU
+	event (eventsel+umask) in the form of rNNN where NNN is a
+	 hexadecimal event descriptor.
+
+-i::
+--inherit::
+        child tasks inherit counters
+-p::
+--pid=<pid>::
+        stat events on existing pid
+
+-a::
+        system-wide collection
+
+-l::
+        scale counter values
+
+EXAMPLES
+--------
+
+$ perf stat -- make -j
+
+ Performance counter stats for 'make -j':
+
+    8117.370256  task clock ticks     #      11.281 CPU utilization factor
+            678  context switches     #       0.000 M/sec
+            133  CPU migrations       #       0.000 M/sec
+         235724  pagefaults           #       0.029 M/sec
+    24821162526  CPU cycles           #    3057.784 M/sec
+    18687303457  instructions         #    2302.138 M/sec
+      172158895  cache references     #      21.209 M/sec
+       27075259  cache misses         #       3.335 M/sec
+
+ Wall-clock time elapsed:   719.554352 msecs
+
+SEE ALSO
+--------
+linkperf:perf-top[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
new file mode 100644
index 0000000..539d012
--- /dev/null
+++ b/tools/perf/Documentation/perf-top.txt
@@ -0,0 +1,39 @@
+perf-top(1)
+===========
+
+NAME
+----
+perf-top - Run a command and profile it
+
+SYNOPSIS
+--------
+[verse]
+'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
+
+DESCRIPTION
+-----------
+This command runs a command and gathers a performance counter profile
+from it.
+
+
+OPTIONS
+-------
+<command>...::
+	Any command you can specify in a shell.
+
+-e::
+--event=::
+	Select the PMU event. Selection can be a symbolic event name
+	(use 'perf list' to list all events) or a raw PMU
+	event (eventsel+umask) in the form of rNNN where NNN is a
+	 hexadecimal event descriptor.
+
+-a::
+        system-wide collection
+
+-l::
+        scale counter values
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt
new file mode 100644
index 0000000..69c8325
--- /dev/null
+++ b/tools/perf/Documentation/perf.txt
@@ -0,0 +1,24 @@
+perf(1)
+=======
+
+NAME
+----
+perf - Performance analysis tools for Linux
+
+SYNOPSIS
+--------
+[verse]
+'perf' [--version] [--help] COMMAND [ARGS]
+
+DESCRIPTION
+-----------
+Performance counters for Linux are are a new kernel-based subsystem
+that provide a framework for all things performance analysis. It
+covers hardware level (CPU/PMU, Performance Monitoring Unit) features
+and software features (software counters, tracepoints) as well.
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-top[1],
+linkperf:perf-record[1], linkperf:perf-report[1],
+linkperf:perf-list[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
new file mode 100644
index 0000000..0cbd5d6
--- /dev/null
+++ b/tools/perf/Makefile
@@ -0,0 +1,929 @@
+# The default target of this Makefile is...
+all::
+
+# Define V=1 to have a more verbose compile.
+#
+# Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf()
+# or vsnprintf() return -1 instead of number of characters which would
+# have been written to the final string if enough space had been available.
+#
+# Define FREAD_READS_DIRECTORIES if your are on a system which succeeds
+# when attempting to read from an fopen'ed directory.
+#
+# Define NO_OPENSSL environment variable if you do not have OpenSSL.
+# This also implies MOZILLA_SHA1.
+#
+# Define CURLDIR=/foo/bar if your curl header and library files are in
+# /foo/bar/include and /foo/bar/lib directories.
+#
+# Define EXPATDIR=/foo/bar if your expat header and library files are in
+# /foo/bar/include and /foo/bar/lib directories.
+#
+# Define NO_D_INO_IN_DIRENT if you don't have d_ino in your struct dirent.
+#
+# Define NO_D_TYPE_IN_DIRENT if your platform defines DT_UNKNOWN but lacks
+# d_type in struct dirent (latest Cygwin -- will be fixed soonish).
+#
+# Define NO_C99_FORMAT if your formatted IO functions (printf/scanf et.al.)
+# do not support the 'size specifiers' introduced by C99, namely ll, hh,
+# j, z, t. (representing long long int, char, intmax_t, size_t, ptrdiff_t).
+# some C compilers supported these specifiers prior to C99 as an extension.
+#
+# Define NO_STRCASESTR if you don't have strcasestr.
+#
+# Define NO_MEMMEM if you don't have memmem.
+#
+# Define NO_STRTOUMAX if you don't have strtoumax in the C library.
+# If your compiler also does not support long long or does not have
+# strtoull, define NO_STRTOULL.
+#
+# Define NO_SETENV if you don't have setenv in the C library.
+#
+# Define NO_UNSETENV if you don't have unsetenv in the C library.
+#
+# Define NO_MKDTEMP if you don't have mkdtemp in the C library.
+#
+# Define NO_SYS_SELECT_H if you don't have sys/select.h.
+#
+# Define NO_SYMLINK_HEAD if you never want .perf/HEAD to be a symbolic link.
+# Enable it on Windows.  By default, symrefs are still used.
+#
+# Define NO_SVN_TESTS if you want to skip time-consuming SVN interoperability
+# tests.  These tests take up a significant amount of the total test time
+# but are not needed unless you plan to talk to SVN repos.
+#
+# Define NO_FINK if you are building on Darwin/Mac OS X, have Fink
+# installed in /sw, but don't want PERF to link against any libraries
+# installed there.  If defined you may specify your own (or Fink's)
+# include directories and library directories by defining CFLAGS
+# and LDFLAGS appropriately.
+#
+# Define NO_DARWIN_PORTS if you are building on Darwin/Mac OS X,
+# have DarwinPorts installed in /opt/local, but don't want PERF to
+# link against any libraries installed there.  If defined you may
+# specify your own (or DarwinPort's) include directories and
+# library directories by defining CFLAGS and LDFLAGS appropriately.
+#
+# Define PPC_SHA1 environment variable when running make to make use of
+# a bundled SHA1 routine optimized for PowerPC.
+#
+# Define ARM_SHA1 environment variable when running make to make use of
+# a bundled SHA1 routine optimized for ARM.
+#
+# Define MOZILLA_SHA1 environment variable when running make to make use of
+# a bundled SHA1 routine coming from Mozilla. It is GPL'd and should be fast
+# on non-x86 architectures (e.g. PowerPC), while the OpenSSL version (default
+# choice) has very fast version optimized for i586.
+#
+# Define NEEDS_SSL_WITH_CRYPTO if you need -lcrypto with -lssl (Darwin).
+#
+# Define NEEDS_LIBICONV if linking with libc is not enough (Darwin).
+#
+# Define NEEDS_SOCKET if linking with libc is not enough (SunOS,
+# Patrick Mauritz).
+#
+# Define NO_MMAP if you want to avoid mmap.
+#
+# Define NO_PTHREADS if you do not have or do not want to use Pthreads.
+#
+# Define NO_PREAD if you have a problem with pread() system call (e.g.
+# cygwin.dll before v1.5.22).
+#
+# Define NO_FAST_WORKING_DIRECTORY if accessing objects in pack files is
+# generally faster on your platform than accessing the working directory.
+#
+# Define NO_TRUSTABLE_FILEMODE if your filesystem may claim to support
+# the executable mode bit, but doesn't really do so.
+#
+# Define NO_IPV6 if you lack IPv6 support and getaddrinfo().
+#
+# Define NO_SOCKADDR_STORAGE if your platform does not have struct
+# sockaddr_storage.
+#
+# Define NO_ICONV if your libc does not properly support iconv.
+#
+# Define OLD_ICONV if your library has an old iconv(), where the second
+# (input buffer pointer) parameter is declared with type (const char **).
+#
+# Define NO_DEFLATE_BOUND if your zlib does not have deflateBound.
+#
+# Define NO_R_TO_GCC_LINKER if your gcc does not like "-R/path/lib"
+# that tells runtime paths to dynamic libraries;
+# "-Wl,-rpath=/path/lib" is used instead.
+#
+# Define USE_NSEC below if you want perf to care about sub-second file mtimes
+# and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and
+# it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely
+# randomly break unless your underlying filesystem supports those sub-second
+# times (my ext3 doesn't).
+#
+# Define USE_ST_TIMESPEC if your "struct stat" uses "st_ctimespec" instead of
+# "st_ctim"
+#
+# Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec"
+# available.  This automatically turns USE_NSEC off.
+#
+# Define USE_STDEV below if you want perf to care about the underlying device
+# change being considered an inode change from the update-index perspective.
+#
+# Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks
+# field that counts the on-disk footprint in 512-byte blocks.
+#
+# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
+#
+# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
+#
+# Define NO_PERL_MAKEMAKER if you cannot use Makefiles generated by perl's
+# MakeMaker (e.g. using ActiveState under Cygwin).
+#
+# Define NO_PERL if you do not want Perl scripts or libraries at all.
+#
+# Define INTERNAL_QSORT to use Git's implementation of qsort(), which
+# is a simplified version of the merge sort used in glibc. This is
+# recommended if Git triggers O(n^2) behavior in your platform's qsort().
+#
+# Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call
+# your external grep (e.g., if your system lacks grep, if its grep is
+# broken, or spawning external process is slower than built-in grep perf has).
+
+PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
+	@$(SHELL_PATH) util/PERF-VERSION-GEN
+-include PERF-VERSION-FILE
+
+uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
+uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
+uname_O := $(shell sh -c 'uname -o 2>/dev/null || echo not')
+uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not')
+uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
+uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
+
+# CFLAGS and LDFLAGS are for the users to override from the command line.
+
+CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6
+LDFLAGS = -lpthread -lrt -lelf
+ALL_CFLAGS = $(CFLAGS)
+ALL_LDFLAGS = $(LDFLAGS)
+STRIP ?= strip
+
+# Among the variables below, these:
+#   perfexecdir
+#   template_dir
+#   mandir
+#   infodir
+#   htmldir
+#   ETC_PERFCONFIG (but not sysconfdir)
+# can be specified as a relative path some/where/else;
+# this is interpreted as relative to $(prefix) and "perf" at
+# runtime figures out where they are based on the path to the executable.
+# This can help installing the suite in a relocatable way.
+
+prefix = $(HOME)
+bindir_relative = bin
+bindir = $(prefix)/$(bindir_relative)
+mandir = share/man
+infodir = share/info
+perfexecdir = libexec/perf-core
+sharedir = $(prefix)/share
+template_dir = share/perf-core/templates
+htmldir = share/doc/perf-doc
+ifeq ($(prefix),/usr)
+sysconfdir = /etc
+ETC_PERFCONFIG = $(sysconfdir)/perfconfig
+else
+sysconfdir = $(prefix)/etc
+ETC_PERFCONFIG = etc/perfconfig
+endif
+lib = lib
+# DESTDIR=
+
+export prefix bindir sharedir sysconfdir
+
+CC = gcc
+AR = ar
+RM = rm -f
+TAR = tar
+FIND = find
+INSTALL = install
+RPMBUILD = rpmbuild
+PTHREAD_LIBS = -lpthread
+
+# sparse is architecture-neutral, which means that we need to tell it
+# explicitly what architecture to check for. Fix this up for yours..
+SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
+
+
+
+### --- END CONFIGURATION SECTION ---
+
+# Those must not be GNU-specific; they are shared with perl/ which may
+# be built by a different compiler. (Note that this is an artifact now
+# but it still might be nice to keep that distinction.)
+BASIC_CFLAGS =
+BASIC_LDFLAGS =
+
+# Guard against environment variables
+BUILTIN_OBJS =
+BUILT_INS =
+COMPAT_CFLAGS =
+COMPAT_OBJS =
+LIB_H =
+LIB_OBJS =
+SCRIPT_PERL =
+SCRIPT_SH =
+TEST_PROGRAMS =
+
+#
+# No scripts right now:
+#
+
+# SCRIPT_SH += perf-am.sh
+
+#
+# No Perl scripts right now:
+#
+
+# SCRIPT_PERL += perf-add--interactive.perl
+
+SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \
+	  $(patsubst %.perl,%,$(SCRIPT_PERL))
+
+# Empty...
+EXTRA_PROGRAMS =
+
+# ... and all the rest that could be moved out of bindir to perfexecdir
+PROGRAMS += $(EXTRA_PROGRAMS)
+
+#
+# Single 'perf' binary right now:
+#
+PROGRAMS += perf
+
+# List built-in command $C whose implementation cmd_$C() is not in
+# builtin-$C.o but is linked in as part of some other command.
+#
+# None right now:
+#
+# BUILT_INS += perf-init $X
+
+# what 'all' will build and 'install' will install, in perfexecdir
+ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
+
+# what 'all' will build but not install in perfexecdir
+OTHER_PROGRAMS = perf$X
+
+# Set paths to tools early so that they can be used for version tests.
+ifndef SHELL_PATH
+	SHELL_PATH = /bin/sh
+endif
+ifndef PERL_PATH
+	PERL_PATH = /usr/bin/perl
+endif
+
+export PERL_PATH
+
+LIB_FILE=libperf.a
+
+LIB_H += ../../include/linux/perf_counter.h
+LIB_H += perf.h
+LIB_H += util/list.h
+LIB_H += util/rbtree.h
+LIB_H += util/levenshtein.h
+LIB_H += util/parse-options.h
+LIB_H += util/parse-events.h
+LIB_H += util/quote.h
+LIB_H += util/util.h
+LIB_H += util/help.h
+LIB_H += util/strbuf.h
+LIB_H += util/string.h
+LIB_H += util/run-command.h
+LIB_H += util/sigchain.h
+LIB_H += util/symbol.h
+LIB_H += util/color.h
+
+LIB_OBJS += util/abspath.o
+LIB_OBJS += util/alias.o
+LIB_OBJS += util/config.o
+LIB_OBJS += util/ctype.o
+LIB_OBJS += util/environment.o
+LIB_OBJS += util/exec_cmd.o
+LIB_OBJS += util/help.o
+LIB_OBJS += util/levenshtein.o
+LIB_OBJS += util/parse-options.o
+LIB_OBJS += util/parse-events.o
+LIB_OBJS += util/path.o
+LIB_OBJS += util/rbtree.o
+LIB_OBJS += util/run-command.o
+LIB_OBJS += util/quote.o
+LIB_OBJS += util/strbuf.o
+LIB_OBJS += util/string.o
+LIB_OBJS += util/usage.o
+LIB_OBJS += util/wrapper.o
+LIB_OBJS += util/sigchain.o
+LIB_OBJS += util/symbol.o
+LIB_OBJS += util/color.o
+LIB_OBJS += util/pager.o
+
+BUILTIN_OBJS += builtin-annotate.o
+BUILTIN_OBJS += builtin-help.o
+BUILTIN_OBJS += builtin-list.o
+BUILTIN_OBJS += builtin-record.o
+BUILTIN_OBJS += builtin-report.o
+BUILTIN_OBJS += builtin-stat.o
+BUILTIN_OBJS += builtin-top.o
+
+PERFLIBS = $(LIB_FILE)
+EXTLIBS =
+
+#
+# Platform specific tweaks
+#
+
+# We choose to avoid "if .. else if .. else .. endif endif"
+# because maintaining the nesting to match is a pain.  If
+# we had "elif" things would have been much nicer...
+
+-include config.mak.autogen
+-include config.mak
+
+ifeq ($(uname_S),Darwin)
+	ifndef NO_FINK
+		ifeq ($(shell test -d /sw/lib && echo y),y)
+			BASIC_CFLAGS += -I/sw/include
+			BASIC_LDFLAGS += -L/sw/lib
+		endif
+	endif
+	ifndef NO_DARWIN_PORTS
+		ifeq ($(shell test -d /opt/local/lib && echo y),y)
+			BASIC_CFLAGS += -I/opt/local/include
+			BASIC_LDFLAGS += -L/opt/local/lib
+		endif
+	endif
+	PTHREAD_LIBS =
+endif
+
+ifndef CC_LD_DYNPATH
+	ifdef NO_R_TO_GCC_LINKER
+		# Some gcc does not accept and pass -R to the linker to specify
+		# the runtime dynamic library path.
+		CC_LD_DYNPATH = -Wl,-rpath,
+	else
+		CC_LD_DYNPATH = -R
+	endif
+endif
+
+ifdef ZLIB_PATH
+	BASIC_CFLAGS += -I$(ZLIB_PATH)/include
+	EXTLIBS += -L$(ZLIB_PATH)/$(lib) $(CC_LD_DYNPATH)$(ZLIB_PATH)/$(lib)
+endif
+EXTLIBS += -lz
+
+ifdef NEEDS_SOCKET
+	EXTLIBS += -lsocket
+endif
+ifdef NEEDS_NSL
+	EXTLIBS += -lnsl
+endif
+ifdef NO_D_TYPE_IN_DIRENT
+	BASIC_CFLAGS += -DNO_D_TYPE_IN_DIRENT
+endif
+ifdef NO_D_INO_IN_DIRENT
+	BASIC_CFLAGS += -DNO_D_INO_IN_DIRENT
+endif
+ifdef NO_ST_BLOCKS_IN_STRUCT_STAT
+	BASIC_CFLAGS += -DNO_ST_BLOCKS_IN_STRUCT_STAT
+endif
+ifdef USE_NSEC
+	BASIC_CFLAGS += -DUSE_NSEC
+endif
+ifdef USE_ST_TIMESPEC
+	BASIC_CFLAGS += -DUSE_ST_TIMESPEC
+endif
+ifdef NO_NSEC
+	BASIC_CFLAGS += -DNO_NSEC
+endif
+ifdef NO_C99_FORMAT
+	BASIC_CFLAGS += -DNO_C99_FORMAT
+endif
+ifdef SNPRINTF_RETURNS_BOGUS
+	COMPAT_CFLAGS += -DSNPRINTF_RETURNS_BOGUS
+	COMPAT_OBJS += compat/snprintf.o
+endif
+ifdef FREAD_READS_DIRECTORIES
+	COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES
+	COMPAT_OBJS += compat/fopen.o
+endif
+ifdef NO_SYMLINK_HEAD
+	BASIC_CFLAGS += -DNO_SYMLINK_HEAD
+endif
+ifdef NO_STRCASESTR
+	COMPAT_CFLAGS += -DNO_STRCASESTR
+	COMPAT_OBJS += compat/strcasestr.o
+endif
+ifdef NO_STRTOUMAX
+	COMPAT_CFLAGS += -DNO_STRTOUMAX
+	COMPAT_OBJS += compat/strtoumax.o
+endif
+ifdef NO_STRTOULL
+	COMPAT_CFLAGS += -DNO_STRTOULL
+endif
+ifdef NO_SETENV
+	COMPAT_CFLAGS += -DNO_SETENV
+	COMPAT_OBJS += compat/setenv.o
+endif
+ifdef NO_MKDTEMP
+	COMPAT_CFLAGS += -DNO_MKDTEMP
+	COMPAT_OBJS += compat/mkdtemp.o
+endif
+ifdef NO_UNSETENV
+	COMPAT_CFLAGS += -DNO_UNSETENV
+	COMPAT_OBJS += compat/unsetenv.o
+endif
+ifdef NO_SYS_SELECT_H
+	BASIC_CFLAGS += -DNO_SYS_SELECT_H
+endif
+ifdef NO_MMAP
+	COMPAT_CFLAGS += -DNO_MMAP
+	COMPAT_OBJS += compat/mmap.o
+else
+	ifdef USE_WIN32_MMAP
+		COMPAT_CFLAGS += -DUSE_WIN32_MMAP
+		COMPAT_OBJS += compat/win32mmap.o
+	endif
+endif
+ifdef NO_PREAD
+	COMPAT_CFLAGS += -DNO_PREAD
+	COMPAT_OBJS += compat/pread.o
+endif
+ifdef NO_FAST_WORKING_DIRECTORY
+	BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY
+endif
+ifdef NO_TRUSTABLE_FILEMODE
+	BASIC_CFLAGS += -DNO_TRUSTABLE_FILEMODE
+endif
+ifdef NO_IPV6
+	BASIC_CFLAGS += -DNO_IPV6
+endif
+ifdef NO_UINTMAX_T
+	BASIC_CFLAGS += -Duintmax_t=uint32_t
+endif
+ifdef NO_SOCKADDR_STORAGE
+ifdef NO_IPV6
+	BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in
+else
+	BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in6
+endif
+endif
+ifdef NO_INET_NTOP
+	LIB_OBJS += compat/inet_ntop.o
+endif
+ifdef NO_INET_PTON
+	LIB_OBJS += compat/inet_pton.o
+endif
+
+ifdef NO_ICONV
+	BASIC_CFLAGS += -DNO_ICONV
+endif
+
+ifdef OLD_ICONV
+	BASIC_CFLAGS += -DOLD_ICONV
+endif
+
+ifdef NO_DEFLATE_BOUND
+	BASIC_CFLAGS += -DNO_DEFLATE_BOUND
+endif
+
+ifdef PPC_SHA1
+	SHA1_HEADER = "ppc/sha1.h"
+	LIB_OBJS += ppc/sha1.o ppc/sha1ppc.o
+else
+ifdef ARM_SHA1
+	SHA1_HEADER = "arm/sha1.h"
+	LIB_OBJS += arm/sha1.o arm/sha1_arm.o
+else
+ifdef MOZILLA_SHA1
+	SHA1_HEADER = "mozilla-sha1/sha1.h"
+	LIB_OBJS += mozilla-sha1/sha1.o
+else
+	SHA1_HEADER = <openssl/sha.h>
+	EXTLIBS += $(LIB_4_CRYPTO)
+endif
+endif
+endif
+ifdef NO_PERL_MAKEMAKER
+	export NO_PERL_MAKEMAKER
+endif
+ifdef NO_HSTRERROR
+	COMPAT_CFLAGS += -DNO_HSTRERROR
+	COMPAT_OBJS += compat/hstrerror.o
+endif
+ifdef NO_MEMMEM
+	COMPAT_CFLAGS += -DNO_MEMMEM
+	COMPAT_OBJS += compat/memmem.o
+endif
+ifdef INTERNAL_QSORT
+	COMPAT_CFLAGS += -DINTERNAL_QSORT
+	COMPAT_OBJS += compat/qsort.o
+endif
+ifdef RUNTIME_PREFIX
+	COMPAT_CFLAGS += -DRUNTIME_PREFIX
+endif
+
+ifdef DIR_HAS_BSD_GROUP_SEMANTICS
+	COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS
+endif
+ifdef NO_EXTERNAL_GREP
+	BASIC_CFLAGS += -DNO_EXTERNAL_GREP
+endif
+
+ifeq ($(PERL_PATH),)
+NO_PERL=NoThanks
+endif
+
+QUIET_SUBDIR0  = +$(MAKE) -C # space to separate -C and subdir
+QUIET_SUBDIR1  =
+
+ifneq ($(findstring $(MAKEFLAGS),w),w)
+PRINT_DIR = --no-print-directory
+else # "make -w"
+NO_SUBDIR = :
+endif
+
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+ifndef V
+	QUIET_CC       = @echo '   ' CC $@;
+	QUIET_AR       = @echo '   ' AR $@;
+	QUIET_LINK     = @echo '   ' LINK $@;
+	QUIET_BUILT_IN = @echo '   ' BUILTIN $@;
+	QUIET_GEN      = @echo '   ' GEN $@;
+	QUIET_SUBDIR0  = +@subdir=
+	QUIET_SUBDIR1  = ;$(NO_SUBDIR) echo '   ' SUBDIR $$subdir; \
+			 $(MAKE) $(PRINT_DIR) -C $$subdir
+	export V
+	export QUIET_GEN
+	export QUIET_BUILT_IN
+endif
+endif
+
+ifdef ASCIIDOC8
+	export ASCIIDOC8
+endif
+
+# Shell quote (do not use $(call) to accommodate ancient setups);
+
+SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER))
+ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
+
+DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
+bindir_SQ = $(subst ','\'',$(bindir))
+bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
+mandir_SQ = $(subst ','\'',$(mandir))
+infodir_SQ = $(subst ','\'',$(infodir))
+perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
+template_dir_SQ = $(subst ','\'',$(template_dir))
+htmldir_SQ = $(subst ','\'',$(htmldir))
+prefix_SQ = $(subst ','\'',$(prefix))
+
+SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
+PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
+
+LIBS = $(PERFLIBS) $(EXTLIBS)
+
+BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
+	$(COMPAT_CFLAGS)
+LIB_OBJS += $(COMPAT_OBJS)
+
+ALL_CFLAGS += $(BASIC_CFLAGS)
+ALL_LDFLAGS += $(BASIC_LDFLAGS)
+
+export TAR INSTALL DESTDIR SHELL_PATH
+
+
+### Build rules
+
+SHELL = $(SHELL_PATH)
+
+all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) PERF-BUILD-OPTIONS
+ifneq (,$X)
+	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';)
+endif
+
+all::
+
+please_set_SHELL_PATH_to_a_more_modern_shell:
+	@$$(:)
+
+shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
+
+strip: $(PROGRAMS) perf$X
+	$(STRIP) $(STRIP_OPTS) $(PROGRAMS) perf$X
+
+perf.o: perf.c common-cmds.h PERF-CFLAGS
+	$(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \
+		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+		$(ALL_CFLAGS) -c $(filter %.c,$^)
+
+perf$X: perf.o $(BUILTIN_OBJS) $(PERFLIBS)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ perf.o \
+		$(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS)
+
+builtin-help.o: builtin-help.c common-cmds.h PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \
+		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+		'-DPERF_MAN_PATH="$(mandir_SQ)"' \
+		'-DPERF_INFO_PATH="$(infodir_SQ)"' $<
+
+$(BUILT_INS): perf$X
+	$(QUIET_BUILT_IN)$(RM) $@ && \
+	ln perf$X $@ 2>/dev/null || \
+	ln -s perf$X $@ 2>/dev/null || \
+	cp perf$X $@
+
+common-cmds.h: util/generate-cmdlist.sh command-list.txt
+
+common-cmds.h: $(wildcard Documentation/perf-*.txt)
+	$(QUIET_GEN)util/generate-cmdlist.sh > $@+ && mv $@+ $@
+
+$(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh
+	$(QUIET_GEN)$(RM) $@ $@+ && \
+	sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
+	    -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \
+	    -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \
+	    -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
+	    -e 's/@@NO_CURL@@/$(NO_CURL)/g' \
+	    $@.sh >$@+ && \
+	chmod +x $@+ && \
+	mv $@+ $@
+
+configure: configure.ac
+	$(QUIET_GEN)$(RM) $@ $<+ && \
+	sed -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
+	    $< > $<+ && \
+	autoconf -o $@ $<+ && \
+	$(RM) $<+
+
+# These can record PERF_VERSION
+perf.o perf.spec \
+	$(patsubst %.sh,%,$(SCRIPT_SH)) \
+	$(patsubst %.perl,%,$(SCRIPT_PERL)) \
+	: PERF-VERSION-FILE
+
+%.o: %.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $<
+%.s: %.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $<
+%.o: %.S
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $<
+
+util/exec_cmd.o: util/exec_cmd.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \
+		'-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
+		'-DBINDIR="$(bindir_relative_SQ)"' \
+		'-DPREFIX="$(prefix_SQ)"' \
+		$<
+
+builtin-init-db.o: builtin-init-db.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $<
+
+util/config.o: util/config.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
+perf-%$X: %.o $(PERFLIBS)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
+
+$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
+$(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
+builtin-revert.o wt-status.o: wt-status.h
+
+$(LIB_FILE): $(LIB_OBJS)
+	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
+
+doc:
+	$(MAKE) -C Documentation all
+
+man:
+	$(MAKE) -C Documentation man
+
+html:
+	$(MAKE) -C Documentation html
+
+info:
+	$(MAKE) -C Documentation info
+
+pdf:
+	$(MAKE) -C Documentation pdf
+
+TAGS:
+	$(RM) TAGS
+	$(FIND) . -name '*.[hcS]' -print | xargs etags -a
+
+tags:
+	$(RM) tags
+	$(FIND) . -name '*.[hcS]' -print | xargs ctags -a
+
+cscope:
+	$(RM) cscope*
+	$(FIND) . -name '*.[hcS]' -print | xargs cscope -b
+
+### Detect prefix changes
+TRACK_CFLAGS = $(subst ','\'',$(ALL_CFLAGS)):\
+             $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ)
+
+PERF-CFLAGS: .FORCE-PERF-CFLAGS
+	@FLAGS='$(TRACK_CFLAGS)'; \
+	    if test x"$$FLAGS" != x"`cat PERF-CFLAGS 2>/dev/null`" ; then \
+		echo 1>&2 "    * new build flags or prefix"; \
+		echo "$$FLAGS" >PERF-CFLAGS; \
+            fi
+
+# We need to apply sq twice, once to protect from the shell
+# that runs PERF-BUILD-OPTIONS, and then again to protect it
+# and the first level quoting from the shell that runs "echo".
+PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS
+	@echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@
+	@echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@
+	@echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@
+	@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
+
+### Testing rules
+
+#
+# None right now:
+#
+# TEST_PROGRAMS += test-something$X
+
+all:: $(TEST_PROGRAMS)
+
+# GNU make supports exporting all variables by "export" without parameters.
+# However, the environment gets quite big, and some programs have problems
+# with that.
+
+export NO_SVN_TESTS
+
+check: common-cmds.h
+	if sparse; \
+	then \
+		for i in *.c */*.c; \
+		do \
+			sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
+		done; \
+	else \
+		echo 2>&1 "Did you mean 'make test'?"; \
+		exit 1; \
+	fi
+
+remove-dashes:
+	./fixup-builtins $(BUILT_INS) $(PROGRAMS) $(SCRIPTS)
+
+### Installation rules
+
+ifneq ($(filter /%,$(firstword $(template_dir))),)
+template_instdir = $(template_dir)
+else
+template_instdir = $(prefix)/$(template_dir)
+endif
+export template_instdir
+
+ifneq ($(filter /%,$(firstword $(perfexecdir))),)
+perfexec_instdir = $(perfexecdir)
+else
+perfexec_instdir = $(prefix)/$(perfexecdir)
+endif
+perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
+export perfexec_instdir
+
+install: all
+	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
+	$(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)'
+ifdef BUILT_INS
+	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+	$(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ifneq (,$X)
+	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';)
+endif
+endif
+
+install-doc:
+	$(MAKE) -C Documentation install
+
+install-man:
+	$(MAKE) -C Documentation install-man
+
+install-html:
+	$(MAKE) -C Documentation install-html
+
+install-info:
+	$(MAKE) -C Documentation install-info
+
+install-pdf:
+	$(MAKE) -C Documentation install-pdf
+
+quick-install-doc:
+	$(MAKE) -C Documentation quick-install
+
+quick-install-man:
+	$(MAKE) -C Documentation quick-install-man
+
+quick-install-html:
+	$(MAKE) -C Documentation quick-install-html
+
+
+### Maintainer's dist rules
+#
+# None right now
+#
+#
+# perf.spec: perf.spec.in
+#	sed -e 's/@@VERSION@@/$(PERF_VERSION)/g' < $< > $@+
+#	mv $@+ $@
+#
+# PERF_TARNAME=perf-$(PERF_VERSION)
+# dist: perf.spec perf-archive$(X) configure
+#	./perf-archive --format=tar \
+#		--prefix=$(PERF_TARNAME)/ HEAD^{tree} > $(PERF_TARNAME).tar
+#	@mkdir -p $(PERF_TARNAME)
+#	@cp perf.spec configure $(PERF_TARNAME)
+#	@echo $(PERF_VERSION) > $(PERF_TARNAME)/version
+#	$(TAR) rf $(PERF_TARNAME).tar \
+#		$(PERF_TARNAME)/perf.spec \
+#		$(PERF_TARNAME)/configure \
+#		$(PERF_TARNAME)/version
+#	@$(RM) -r $(PERF_TARNAME)
+#	gzip -f -9 $(PERF_TARNAME).tar
+#
+# htmldocs = perf-htmldocs-$(PERF_VERSION)
+# manpages = perf-manpages-$(PERF_VERSION)
+# dist-doc:
+#	$(RM) -r .doc-tmp-dir
+#	mkdir .doc-tmp-dir
+#	$(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc
+#	cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar .
+#	gzip -n -9 -f $(htmldocs).tar
+#	:
+#	$(RM) -r .doc-tmp-dir
+#	mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7
+#	$(MAKE) -C Documentation DESTDIR=./ \
+#		man1dir=../.doc-tmp-dir/man1 \
+#		man5dir=../.doc-tmp-dir/man5 \
+#		man7dir=../.doc-tmp-dir/man7 \
+#		install
+#	cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar .
+#	gzip -n -9 -f $(manpages).tar
+#	$(RM) -r .doc-tmp-dir
+#
+# rpm: dist
+#	$(RPMBUILD) -ta $(PERF_TARNAME).tar.gz
+
+### Cleaning rules
+
+distclean: clean
+#	$(RM) configure
+
+clean:
+	$(RM) *.o */*.o $(LIB_FILE)
+	$(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X
+	$(RM) $(TEST_PROGRAMS)
+	$(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope*
+	$(RM) -r autom4te.cache
+	$(RM) config.log config.mak.autogen config.mak.append config.status config.cache
+	$(RM) -r $(PERF_TARNAME) .doc-tmp-dir
+	$(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz
+	$(RM) $(htmldocs).tar.gz $(manpages).tar.gz
+	$(MAKE) -C Documentation/ clean
+	$(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-BUILD-OPTIONS
+
+.PHONY: all install clean strip
+.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
+.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
+.PHONY: .FORCE-PERF-BUILD-OPTIONS
+
+### Make sure built-ins do not have dups and listed in perf.c
+#
+check-builtins::
+	./check-builtins.sh
+
+### Test suite coverage testing
+#
+# None right now
+#
+# .PHONY: coverage coverage-clean coverage-build coverage-report
+#
+# coverage:
+#	$(MAKE) coverage-build
+#	$(MAKE) coverage-report
+#
+# coverage-clean:
+#	rm -f *.gcda *.gcno
+#
+# COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs
+# COVERAGE_LDFLAGS = $(CFLAGS)  -O0 -lgcov
+#
+# coverage-build: coverage-clean
+#	$(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all
+#	$(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" \
+#		-j1 test
+#
+# coverage-report:
+#	gcov -b *.c */*.c
+#	grep '^function.*called 0 ' *.c.gcov */*.c.gcov \
+#		| sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \
+#		| tee coverage-untested-functions
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
new file mode 100644
index 0000000..b1ed5f7
--- /dev/null
+++ b/tools/perf/builtin-annotate.c
@@ -0,0 +1,1356 @@
+/*
+ * builtin-annotate.c
+ *
+ * Builtin annotate command: Analyze the perf.data input file,
+ * look up and read DSOs and symbol information and display
+ * a histogram of results, along various sorting keys.
+ */
+#include "builtin.h"
+
+#include "util/util.h"
+
+#include "util/color.h"
+#include "util/list.h"
+#include "util/cache.h"
+#include "util/rbtree.h"
+#include "util/symbol.h"
+#include "util/string.h"
+
+#include "perf.h"
+
+#include "util/parse-options.h"
+#include "util/parse-events.h"
+
+#define SHOW_KERNEL	1
+#define SHOW_USER	2
+#define SHOW_HV		4
+
+static char		const *input_name = "perf.data";
+static char		*vmlinux = "vmlinux";
+
+static char		default_sort_order[] = "comm,symbol";
+static char		*sort_order = default_sort_order;
+
+static int		input;
+static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
+
+static int		dump_trace = 0;
+#define dprintf(x...)	do { if (dump_trace) printf(x); } while (0)
+
+static int		verbose;
+
+static unsigned long	page_size;
+static unsigned long	mmap_window = 32;
+
+struct ip_event {
+	struct perf_event_header header;
+	__u64 ip;
+	__u32 pid, tid;
+};
+
+struct mmap_event {
+	struct perf_event_header header;
+	__u32 pid, tid;
+	__u64 start;
+	__u64 len;
+	__u64 pgoff;
+	char filename[PATH_MAX];
+};
+
+struct comm_event {
+	struct perf_event_header header;
+	__u32 pid, tid;
+	char comm[16];
+};
+
+struct fork_event {
+	struct perf_event_header header;
+	__u32 pid, ppid;
+};
+
+struct period_event {
+	struct perf_event_header header;
+	__u64 time;
+	__u64 id;
+	__u64 sample_period;
+};
+
+typedef union event_union {
+	struct perf_event_header	header;
+	struct ip_event			ip;
+	struct mmap_event		mmap;
+	struct comm_event		comm;
+	struct fork_event		fork;
+	struct period_event		period;
+} event_t;
+
+static LIST_HEAD(dsos);
+static struct dso *kernel_dso;
+static struct dso *vdso;
+
+
+static void dsos__add(struct dso *dso)
+{
+	list_add_tail(&dso->node, &dsos);
+}
+
+static struct dso *dsos__find(const char *name)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, &dsos, node)
+		if (strcmp(pos->name, name) == 0)
+			return pos;
+	return NULL;
+}
+
+static struct dso *dsos__findnew(const char *name)
+{
+	struct dso *dso = dsos__find(name);
+	int nr;
+
+	if (dso)
+		return dso;
+
+	dso = dso__new(name, 0);
+	if (!dso)
+		goto out_delete_dso;
+
+	nr = dso__load(dso, NULL, verbose);
+	if (nr < 0) {
+		if (verbose)
+			fprintf(stderr, "Failed to open: %s\n", name);
+		goto out_delete_dso;
+	}
+	if (!nr && verbose) {
+		fprintf(stderr,
+		"No symbols found in: %s, maybe install a debug package?\n",
+				name);
+	}
+
+	dsos__add(dso);
+
+	return dso;
+
+out_delete_dso:
+	dso__delete(dso);
+	return NULL;
+}
+
+static void dsos__fprintf(FILE *fp)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, &dsos, node)
+		dso__fprintf(pos, fp);
+}
+
+static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip)
+{
+	return dso__find_symbol(kernel_dso, ip);
+}
+
+static int load_kernel(void)
+{
+	int err;
+
+	kernel_dso = dso__new("[kernel]", 0);
+	if (!kernel_dso)
+		return -1;
+
+	err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose);
+	if (err) {
+		dso__delete(kernel_dso);
+		kernel_dso = NULL;
+	} else
+		dsos__add(kernel_dso);
+
+	vdso = dso__new("[vdso]", 0);
+	if (!vdso)
+		return -1;
+
+	vdso->find_symbol = vdso__find_symbol;
+
+	dsos__add(vdso);
+
+	return err;
+}
+
+struct map {
+	struct list_head node;
+	__u64	 start;
+	__u64	 end;
+	__u64	 pgoff;
+	__u64	 (*map_ip)(struct map *, __u64);
+	struct dso	 *dso;
+};
+
+static __u64 map__map_ip(struct map *map, __u64 ip)
+{
+	return ip - map->start + map->pgoff;
+}
+
+static __u64 vdso__map_ip(struct map *map, __u64 ip)
+{
+	return ip;
+}
+
+static struct map *map__new(struct mmap_event *event)
+{
+	struct map *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		const char *filename = event->filename;
+
+		self->start = event->start;
+		self->end   = event->start + event->len;
+		self->pgoff = event->pgoff;
+
+		self->dso = dsos__findnew(filename);
+		if (self->dso == NULL)
+			goto out_delete;
+
+		if (self->dso == vdso)
+			self->map_ip = vdso__map_ip;
+		else
+			self->map_ip = map__map_ip;
+	}
+	return self;
+out_delete:
+	free(self);
+	return NULL;
+}
+
+static struct map *map__clone(struct map *self)
+{
+	struct map *map = malloc(sizeof(*self));
+
+	if (!map)
+		return NULL;
+
+	memcpy(map, self, sizeof(*self));
+
+	return map;
+}
+
+static int map__overlap(struct map *l, struct map *r)
+{
+	if (l->start > r->start) {
+		struct map *t = l;
+		l = r;
+		r = t;
+	}
+
+	if (l->end > r->start)
+		return 1;
+
+	return 0;
+}
+
+static size_t map__fprintf(struct map *self, FILE *fp)
+{
+	return fprintf(fp, " %Lx-%Lx %Lx %s\n",
+		       self->start, self->end, self->pgoff, self->dso->name);
+}
+
+
+struct thread {
+	struct rb_node	 rb_node;
+	struct list_head maps;
+	pid_t		 pid;
+	char		 *comm;
+};
+
+static struct thread *thread__new(pid_t pid)
+{
+	struct thread *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		self->pid = pid;
+		self->comm = malloc(32);
+		if (self->comm)
+			snprintf(self->comm, 32, ":%d", self->pid);
+		INIT_LIST_HEAD(&self->maps);
+	}
+
+	return self;
+}
+
+static int thread__set_comm(struct thread *self, const char *comm)
+{
+	if (self->comm)
+		free(self->comm);
+	self->comm = strdup(comm);
+	return self->comm ? 0 : -ENOMEM;
+}
+
+static size_t thread__fprintf(struct thread *self, FILE *fp)
+{
+	struct map *pos;
+	size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm);
+
+	list_for_each_entry(pos, &self->maps, node)
+		ret += map__fprintf(pos, fp);
+
+	return ret;
+}
+
+
+static struct rb_root threads;
+static struct thread *last_match;
+
+static struct thread *threads__findnew(pid_t pid)
+{
+	struct rb_node **p = &threads.rb_node;
+	struct rb_node *parent = NULL;
+	struct thread *th;
+
+	/*
+	 * Font-end cache - PID lookups come in blocks,
+	 * so most of the time we dont have to look up
+	 * the full rbtree:
+	 */
+	if (last_match && last_match->pid == pid)
+		return last_match;
+
+	while (*p != NULL) {
+		parent = *p;
+		th = rb_entry(parent, struct thread, rb_node);
+
+		if (th->pid == pid) {
+			last_match = th;
+			return th;
+		}
+
+		if (pid < th->pid)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	th = thread__new(pid);
+	if (th != NULL) {
+		rb_link_node(&th->rb_node, parent, p);
+		rb_insert_color(&th->rb_node, &threads);
+		last_match = th;
+	}
+
+	return th;
+}
+
+static void thread__insert_map(struct thread *self, struct map *map)
+{
+	struct map *pos, *tmp;
+
+	list_for_each_entry_safe(pos, tmp, &self->maps, node) {
+		if (map__overlap(pos, map)) {
+			list_del_init(&pos->node);
+			/* XXX leaks dsos */
+			free(pos);
+		}
+	}
+
+	list_add_tail(&map->node, &self->maps);
+}
+
+static int thread__fork(struct thread *self, struct thread *parent)
+{
+	struct map *map;
+
+	if (self->comm)
+		free(self->comm);
+	self->comm = strdup(parent->comm);
+	if (!self->comm)
+		return -ENOMEM;
+
+	list_for_each_entry(map, &parent->maps, node) {
+		struct map *new = map__clone(map);
+		if (!new)
+			return -ENOMEM;
+		thread__insert_map(self, new);
+	}
+
+	return 0;
+}
+
+static struct map *thread__find_map(struct thread *self, __u64 ip)
+{
+	struct map *pos;
+
+	if (self == NULL)
+		return NULL;
+
+	list_for_each_entry(pos, &self->maps, node)
+		if (ip >= pos->start && ip <= pos->end)
+			return pos;
+
+	return NULL;
+}
+
+static size_t threads__fprintf(FILE *fp)
+{
+	size_t ret = 0;
+	struct rb_node *nd;
+
+	for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
+		struct thread *pos = rb_entry(nd, struct thread, rb_node);
+
+		ret += thread__fprintf(pos, fp);
+	}
+
+	return ret;
+}
+
+/*
+ * histogram, sorted on item, collects counts
+ */
+
+static struct rb_root hist;
+
+struct hist_entry {
+	struct rb_node	 rb_node;
+
+	struct thread	 *thread;
+	struct map	 *map;
+	struct dso	 *dso;
+	struct symbol	 *sym;
+	__u64	 ip;
+	char		 level;
+
+	uint32_t	 count;
+};
+
+/*
+ * configurable sorting bits
+ */
+
+struct sort_entry {
+	struct list_head list;
+
+	char *header;
+
+	int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
+	int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
+	size_t	(*print)(FILE *fp, struct hist_entry *);
+};
+
+/* --sort pid */
+
+static int64_t
+sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->thread->pid - left->thread->pid;
+}
+
+static size_t
+sort__thread_print(FILE *fp, struct hist_entry *self)
+{
+	return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid);
+}
+
+static struct sort_entry sort_thread = {
+	.header = "         Command:  Pid",
+	.cmp	= sort__thread_cmp,
+	.print	= sort__thread_print,
+};
+
+/* --sort comm */
+
+static int64_t
+sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->thread->pid - left->thread->pid;
+}
+
+static int64_t
+sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	char *comm_l = left->thread->comm;
+	char *comm_r = right->thread->comm;
+
+	if (!comm_l || !comm_r) {
+		if (!comm_l && !comm_r)
+			return 0;
+		else if (!comm_l)
+			return -1;
+		else
+			return 1;
+	}
+
+	return strcmp(comm_l, comm_r);
+}
+
+static size_t
+sort__comm_print(FILE *fp, struct hist_entry *self)
+{
+	return fprintf(fp, "%16s", self->thread->comm);
+}
+
+static struct sort_entry sort_comm = {
+	.header		= "         Command",
+	.cmp		= sort__comm_cmp,
+	.collapse	= sort__comm_collapse,
+	.print		= sort__comm_print,
+};
+
+/* --sort dso */
+
+static int64_t
+sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct dso *dso_l = left->dso;
+	struct dso *dso_r = right->dso;
+
+	if (!dso_l || !dso_r) {
+		if (!dso_l && !dso_r)
+			return 0;
+		else if (!dso_l)
+			return -1;
+		else
+			return 1;
+	}
+
+	return strcmp(dso_l->name, dso_r->name);
+}
+
+static size_t
+sort__dso_print(FILE *fp, struct hist_entry *self)
+{
+	if (self->dso)
+		return fprintf(fp, "%-25s", self->dso->name);
+
+	return fprintf(fp, "%016llx         ", (__u64)self->ip);
+}
+
+static struct sort_entry sort_dso = {
+	.header = "Shared Object            ",
+	.cmp	= sort__dso_cmp,
+	.print	= sort__dso_print,
+};
+
+/* --sort symbol */
+
+static int64_t
+sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	__u64 ip_l, ip_r;
+
+	if (left->sym == right->sym)
+		return 0;
+
+	ip_l = left->sym ? left->sym->start : left->ip;
+	ip_r = right->sym ? right->sym->start : right->ip;
+
+	return (int64_t)(ip_r - ip_l);
+}
+
+static size_t
+sort__sym_print(FILE *fp, struct hist_entry *self)
+{
+	size_t ret = 0;
+
+	if (verbose)
+		ret += fprintf(fp, "%#018llx  ", (__u64)self->ip);
+
+	if (self->sym) {
+		ret += fprintf(fp, "[%c] %s",
+			self->dso == kernel_dso ? 'k' : '.', self->sym->name);
+	} else {
+		ret += fprintf(fp, "%#016llx", (__u64)self->ip);
+	}
+
+	return ret;
+}
+
+static struct sort_entry sort_sym = {
+	.header = "Symbol",
+	.cmp	= sort__sym_cmp,
+	.print	= sort__sym_print,
+};
+
+static int sort__need_collapse = 0;
+
+struct sort_dimension {
+	char			*name;
+	struct sort_entry	*entry;
+	int			taken;
+};
+
+static struct sort_dimension sort_dimensions[] = {
+	{ .name = "pid",	.entry = &sort_thread,	},
+	{ .name = "comm",	.entry = &sort_comm,	},
+	{ .name = "dso",	.entry = &sort_dso,	},
+	{ .name = "symbol",	.entry = &sort_sym,	},
+};
+
+static LIST_HEAD(hist_entry__sort_list);
+
+static int sort_dimension__add(char *tok)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
+		struct sort_dimension *sd = &sort_dimensions[i];
+
+		if (sd->taken)
+			continue;
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		if (sd->entry->collapse)
+			sort__need_collapse = 1;
+
+		list_add_tail(&sd->entry->list, &hist_entry__sort_list);
+		sd->taken = 1;
+
+		return 0;
+	}
+
+	return -ESRCH;
+}
+
+static int64_t
+hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct sort_entry *se;
+	int64_t cmp = 0;
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		cmp = se->cmp(left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+static int64_t
+hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	struct sort_entry *se;
+	int64_t cmp = 0;
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		int64_t (*f)(struct hist_entry *, struct hist_entry *);
+
+		f = se->collapse ?: se->cmp;
+
+		cmp = f(left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+/*
+ * collect histogram counts
+ */
+static void hist_hit(struct hist_entry *he, __u64 ip)
+{
+	unsigned int sym_size, offset;
+	struct symbol *sym = he->sym;
+
+	he->count++;
+
+	if (!sym || !sym->hist)
+		return;
+
+	sym_size = sym->end - sym->start;
+	offset = ip - sym->start;
+
+	if (offset >= sym_size)
+		return;
+
+	sym->hist_sum++;
+	sym->hist[offset]++;
+
+	if (verbose >= 3)
+		printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n",
+			(void *)(unsigned long)he->sym->start,
+			he->sym->name,
+			(void *)(unsigned long)ip, ip - he->sym->start,
+			sym->hist[offset]);
+}
+
+static int
+hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
+		struct symbol *sym, __u64 ip, char level)
+{
+	struct rb_node **p = &hist.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *he;
+	struct hist_entry entry = {
+		.thread	= thread,
+		.map	= map,
+		.dso	= dso,
+		.sym	= sym,
+		.ip	= ip,
+		.level	= level,
+		.count	= 1,
+	};
+	int cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		he = rb_entry(parent, struct hist_entry, rb_node);
+
+		cmp = hist_entry__cmp(&entry, he);
+
+		if (!cmp) {
+			hist_hit(he, ip);
+
+			return 0;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	he = malloc(sizeof(*he));
+	if (!he)
+		return -ENOMEM;
+	*he = entry;
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &hist);
+
+	return 0;
+}
+
+static void hist_entry__free(struct hist_entry *he)
+{
+	free(he);
+}
+
+/*
+ * collapse the histogram
+ */
+
+static struct rb_root collapse_hists;
+
+static void collapse__insert_entry(struct hist_entry *he)
+{
+	struct rb_node **p = &collapse_hists.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	int64_t cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		cmp = hist_entry__collapse(iter, he);
+
+		if (!cmp) {
+			iter->count += he->count;
+			hist_entry__free(he);
+			return;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &collapse_hists);
+}
+
+static void collapse__resort(void)
+{
+	struct rb_node *next;
+	struct hist_entry *n;
+
+	if (!sort__need_collapse)
+		return;
+
+	next = rb_first(&hist);
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+
+		rb_erase(&n->rb_node, &hist);
+		collapse__insert_entry(n);
+	}
+}
+
+/*
+ * reverse the map, sort on count.
+ */
+
+static struct rb_root output_hists;
+
+static void output__insert_entry(struct hist_entry *he)
+{
+	struct rb_node **p = &output_hists.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (he->count > iter->count)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &output_hists);
+}
+
+static void output__resort(void)
+{
+	struct rb_node *next;
+	struct hist_entry *n;
+	struct rb_root *tree = &hist;
+
+	if (sort__need_collapse)
+		tree = &collapse_hists;
+
+	next = rb_first(tree);
+
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+
+		rb_erase(&n->rb_node, tree);
+		output__insert_entry(n);
+	}
+}
+
+static void register_idle_thread(void)
+{
+	struct thread *thread = threads__findnew(0);
+
+	if (thread == NULL ||
+			thread__set_comm(thread, "[idle]")) {
+		fprintf(stderr, "problem inserting idle task.\n");
+		exit(-1);
+	}
+}
+
+static unsigned long total = 0,
+		     total_mmap = 0,
+		     total_comm = 0,
+		     total_fork = 0,
+		     total_unknown = 0;
+
+static int
+process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	char level;
+	int show = 0;
+	struct dso *dso = NULL;
+	struct thread *thread = threads__findnew(event->ip.pid);
+	__u64 ip = event->ip.ip;
+	struct map *map = NULL;
+
+	dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->header.misc,
+		event->ip.pid,
+		(void *)(long)ip);
+
+	dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
+	if (thread == NULL) {
+		fprintf(stderr, "problem processing %d event, skipping it.\n",
+			event->header.type);
+		return -1;
+	}
+
+	if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
+		show = SHOW_KERNEL;
+		level = 'k';
+
+		dso = kernel_dso;
+
+		dprintf(" ...... dso: %s\n", dso->name);
+
+	} else if (event->header.misc & PERF_EVENT_MISC_USER) {
+
+		show = SHOW_USER;
+		level = '.';
+
+		map = thread__find_map(thread, ip);
+		if (map != NULL) {
+			ip = map->map_ip(map, ip);
+			dso = map->dso;
+		} else {
+			/*
+			 * If this is outside of all known maps,
+			 * and is a negative address, try to look it
+			 * up in the kernel dso, as it might be a
+			 * vsyscall (which executes in user-mode):
+			 */
+			if ((long long)ip < 0)
+				dso = kernel_dso;
+		}
+		dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
+
+	} else {
+		show = SHOW_HV;
+		level = 'H';
+		dprintf(" ...... dso: [hypervisor]\n");
+	}
+
+	if (show & show_mask) {
+		struct symbol *sym = NULL;
+
+		if (dso)
+			sym = dso->find_symbol(dso, ip);
+
+		if (hist_entry__add(thread, map, dso, sym, ip, level)) {
+			fprintf(stderr,
+		"problem incrementing symbol count, skipping event\n");
+			return -1;
+		}
+	}
+	total++;
+
+	return 0;
+}
+
+static int
+process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->mmap.pid);
+	struct map *map = map__new(&event->mmap);
+
+	dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->mmap.pid,
+		(void *)(long)event->mmap.start,
+		(void *)(long)event->mmap.len,
+		(void *)(long)event->mmap.pgoff,
+		event->mmap.filename);
+
+	if (thread == NULL || map == NULL) {
+		dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n");
+		return 0;
+	}
+
+	thread__insert_map(thread, map);
+	total_mmap++;
+
+	return 0;
+}
+
+static int
+process_comm_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->comm.pid);
+
+	dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->comm.comm, event->comm.pid);
+
+	if (thread == NULL ||
+	    thread__set_comm(thread, event->comm.comm)) {
+		dprintf("problem processing PERF_EVENT_COMM, skipping event.\n");
+		return -1;
+	}
+	total_comm++;
+
+	return 0;
+}
+
+static int
+process_fork_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->fork.pid);
+	struct thread *parent = threads__findnew(event->fork.ppid);
+
+	dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->fork.pid, event->fork.ppid);
+
+	if (!thread || !parent || thread__fork(thread, parent)) {
+		dprintf("problem processing PERF_EVENT_FORK, skipping event.\n");
+		return -1;
+	}
+	total_fork++;
+
+	return 0;
+}
+
+static int
+process_period_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->period.time,
+		event->period.id,
+		event->period.sample_period);
+
+	return 0;
+}
+
+static int
+process_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
+		return process_overflow_event(event, offset, head);
+
+	switch (event->header.type) {
+	case PERF_EVENT_MMAP:
+		return process_mmap_event(event, offset, head);
+
+	case PERF_EVENT_COMM:
+		return process_comm_event(event, offset, head);
+
+	case PERF_EVENT_FORK:
+		return process_fork_event(event, offset, head);
+
+	case PERF_EVENT_PERIOD:
+		return process_period_event(event, offset, head);
+	/*
+	 * We dont process them right now but they are fine:
+	 */
+
+	case PERF_EVENT_THROTTLE:
+	case PERF_EVENT_UNTHROTTLE:
+		return 0;
+
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len)
+{
+	char *line = NULL, *tmp, *tmp2;
+	unsigned int offset;
+	size_t line_len;
+	__u64 line_ip;
+	int ret;
+	char *c;
+
+	if (getline(&line, &line_len, file) < 0)
+		return -1;
+	if (!line)
+		return -1;
+
+	c = strchr(line, '\n');
+	if (c)
+		*c = 0;
+
+	line_ip = -1;
+	offset = 0;
+	ret = -2;
+
+	/*
+	 * Strip leading spaces:
+	 */
+	tmp = line;
+	while (*tmp) {
+		if (*tmp != ' ')
+			break;
+		tmp++;
+	}
+
+	if (*tmp) {
+		/*
+		 * Parse hexa addresses followed by ':'
+		 */
+		line_ip = strtoull(tmp, &tmp2, 16);
+		if (*tmp2 != ':')
+			line_ip = -1;
+	}
+
+	if (line_ip != -1) {
+		unsigned int hits = 0;
+		double percent = 0.0;
+		char *color = PERF_COLOR_NORMAL;
+
+		offset = line_ip - start;
+		if (offset < len)
+			hits = sym->hist[offset];
+
+		if (sym->hist_sum)
+			percent = 100.0 * hits / sym->hist_sum;
+
+		/*
+		 * We color high-overhead entries in red, mid-overhead
+		 * entries in green - and keep the low overhead places
+		 * normal:
+		 */
+		if (percent >= 5.0)
+			color = PERF_COLOR_RED;
+		else {
+			if (percent > 0.5)
+				color = PERF_COLOR_GREEN;
+		}
+
+		color_fprintf(stdout, color, " %7.2f", percent);
+		printf(" :	");
+		color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", line);
+	} else {
+		if (!*line)
+			printf("         :\n");
+		else
+			printf("         :	%s\n", line);
+	}
+
+	return 0;
+}
+
+static void annotate_sym(struct dso *dso, struct symbol *sym)
+{
+	char *filename = dso->name;
+	__u64 start, end, len;
+	char command[PATH_MAX*2];
+	FILE *file;
+
+	if (!filename)
+		return;
+	if (dso == kernel_dso)
+		filename = vmlinux;
+
+	printf("\n------------------------------------------------\n");
+	printf(" Percent |	Source code & Disassembly of %s\n", filename);
+	printf("------------------------------------------------\n");
+
+	if (verbose >= 2)
+		printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name);
+
+	start = sym->obj_start;
+	if (!start)
+		start = sym->start;
+
+	end = start + sym->end - sym->start + 1;
+	len = sym->end - sym->start;
+
+	sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (__u64)start, (__u64)end, filename);
+
+	if (verbose >= 3)
+		printf("doing: %s\n", command);
+
+	file = popen(command, "r");
+	if (!file)
+		return;
+
+	while (!feof(file)) {
+		if (parse_line(file, sym, start, len) < 0)
+			break;
+	}
+
+	pclose(file);
+}
+
+static void find_annotations(void)
+{
+	struct rb_node *nd;
+	struct dso *dso;
+	int count = 0;
+
+	list_for_each_entry(dso, &dsos, node) {
+
+		for (nd = rb_first(&dso->syms); nd; nd = rb_next(nd)) {
+			struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+
+			if (sym->hist) {
+				annotate_sym(dso, sym);
+				count++;
+			}
+		}
+	}
+
+	if (!count)
+		printf(" Error: symbol '%s' not present amongst the samples.\n", sym_hist_filter);
+}
+
+static int __cmd_annotate(void)
+{
+	int ret, rc = EXIT_FAILURE;
+	unsigned long offset = 0;
+	unsigned long head = 0;
+	struct stat stat;
+	event_t *event;
+	uint32_t size;
+	char *buf;
+
+	register_idle_thread();
+
+	input = open(input_name, O_RDONLY);
+	if (input < 0) {
+		perror("failed to open file");
+		exit(-1);
+	}
+
+	ret = fstat(input, &stat);
+	if (ret < 0) {
+		perror("failed to stat file");
+		exit(-1);
+	}
+
+	if (!stat.st_size) {
+		fprintf(stderr, "zero-sized file, nothing to do!\n");
+		exit(0);
+	}
+
+	if (load_kernel() < 0) {
+		perror("failed to load kernel symbols");
+		return EXIT_FAILURE;
+	}
+
+remap:
+	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
+			   MAP_SHARED, input, offset);
+	if (buf == MAP_FAILED) {
+		perror("failed to mmap file");
+		exit(-1);
+	}
+
+more:
+	event = (event_t *)(buf + head);
+
+	size = event->header.size;
+	if (!size)
+		size = 8;
+
+	if (head + event->header.size >= page_size * mmap_window) {
+		unsigned long shift = page_size * (head / page_size);
+		int ret;
+
+		ret = munmap(buf, page_size * mmap_window);
+		assert(ret == 0);
+
+		offset += shift;
+		head -= shift;
+		goto remap;
+	}
+
+	size = event->header.size;
+
+	dprintf("%p [%p]: event: %d\n",
+			(void *)(offset + head),
+			(void *)(long)event->header.size,
+			event->header.type);
+
+	if (!size || process_event(event, offset, head) < 0) {
+
+		dprintf("%p [%p]: skipping unknown header type: %d\n",
+			(void *)(offset + head),
+			(void *)(long)(event->header.size),
+			event->header.type);
+
+		total_unknown++;
+
+		/*
+		 * assume we lost track of the stream, check alignment, and
+		 * increment a single u64 in the hope to catch on again 'soon'.
+		 */
+
+		if (unlikely(head & 7))
+			head &= ~7ULL;
+
+		size = 8;
+	}
+
+	head += size;
+
+	if (offset + head < stat.st_size)
+		goto more;
+
+	rc = EXIT_SUCCESS;
+	close(input);
+
+	dprintf("      IP events: %10ld\n", total);
+	dprintf("    mmap events: %10ld\n", total_mmap);
+	dprintf("    comm events: %10ld\n", total_comm);
+	dprintf("    fork events: %10ld\n", total_fork);
+	dprintf(" unknown events: %10ld\n", total_unknown);
+
+	if (dump_trace)
+		return 0;
+
+	if (verbose >= 3)
+		threads__fprintf(stdout);
+
+	if (verbose >= 2)
+		dsos__fprintf(stdout);
+
+	collapse__resort();
+	output__resort();
+
+	find_annotations();
+
+	return rc;
+}
+
+static const char * const annotate_usage[] = {
+	"perf annotate [<options>] <command>",
+	NULL
+};
+
+static const struct option options[] = {
+	OPT_STRING('i', "input", &input_name, "file",
+		    "input file name"),
+	OPT_STRING('s', "symbol", &sym_hist_filter, "symbol",
+		    "symbol to annotate"),
+	OPT_BOOLEAN('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
+	OPT_END()
+};
+
+static void setup_sorting(void)
+{
+	char *tmp, *tok, *str = strdup(sort_order);
+
+	for (tok = strtok_r(str, ", ", &tmp);
+			tok; tok = strtok_r(NULL, ", ", &tmp)) {
+		if (sort_dimension__add(tok) < 0) {
+			error("Unknown --sort key: `%s'", tok);
+			usage_with_options(annotate_usage, options);
+		}
+	}
+
+	free(str);
+}
+
+int cmd_annotate(int argc, const char **argv, const char *prefix)
+{
+	symbol__init();
+
+	page_size = getpagesize();
+
+	argc = parse_options(argc, argv, options, annotate_usage, 0);
+
+	setup_sorting();
+
+	if (argc) {
+		/*
+		 * Special case: if there's an argument left then assume tha
+		 * it's a symbol filter:
+		 */
+		if (argc > 1)
+			usage_with_options(annotate_usage, options);
+
+		sym_hist_filter = argv[0];
+	}
+
+	if (!sym_hist_filter)
+		usage_with_options(annotate_usage, options);
+
+	setup_pager();
+
+	return __cmd_annotate();
+}
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
new file mode 100644
index 0000000..0f32dc3
--- /dev/null
+++ b/tools/perf/builtin-help.c
@@ -0,0 +1,461 @@
+/*
+ * builtin-help.c
+ *
+ * Builtin help command
+ */
+#include "util/cache.h"
+#include "builtin.h"
+#include "util/exec_cmd.h"
+#include "common-cmds.h"
+#include "util/parse-options.h"
+#include "util/run-command.h"
+#include "util/help.h"
+
+static struct man_viewer_list {
+	struct man_viewer_list *next;
+	char name[FLEX_ARRAY];
+} *man_viewer_list;
+
+static struct man_viewer_info_list {
+	struct man_viewer_info_list *next;
+	const char *info;
+	char name[FLEX_ARRAY];
+} *man_viewer_info_list;
+
+enum help_format {
+	HELP_FORMAT_MAN,
+	HELP_FORMAT_INFO,
+	HELP_FORMAT_WEB,
+};
+
+static int show_all = 0;
+static enum help_format help_format = HELP_FORMAT_MAN;
+static struct option builtin_help_options[] = {
+	OPT_BOOLEAN('a', "all", &show_all, "print all available commands"),
+	OPT_SET_INT('m', "man", &help_format, "show man page", HELP_FORMAT_MAN),
+	OPT_SET_INT('w', "web", &help_format, "show manual in web browser",
+			HELP_FORMAT_WEB),
+	OPT_SET_INT('i', "info", &help_format, "show info page",
+			HELP_FORMAT_INFO),
+	OPT_END(),
+};
+
+static const char * const builtin_help_usage[] = {
+	"perf help [--all] [--man|--web|--info] [command]",
+	NULL
+};
+
+static enum help_format parse_help_format(const char *format)
+{
+	if (!strcmp(format, "man"))
+		return HELP_FORMAT_MAN;
+	if (!strcmp(format, "info"))
+		return HELP_FORMAT_INFO;
+	if (!strcmp(format, "web") || !strcmp(format, "html"))
+		return HELP_FORMAT_WEB;
+	die("unrecognized help format '%s'", format);
+}
+
+static const char *get_man_viewer_info(const char *name)
+{
+	struct man_viewer_info_list *viewer;
+
+	for (viewer = man_viewer_info_list; viewer; viewer = viewer->next)
+	{
+		if (!strcasecmp(name, viewer->name))
+			return viewer->info;
+	}
+	return NULL;
+}
+
+static int check_emacsclient_version(void)
+{
+	struct strbuf buffer = STRBUF_INIT;
+	struct child_process ec_process;
+	const char *argv_ec[] = { "emacsclient", "--version", NULL };
+	int version;
+
+	/* emacsclient prints its version number on stderr */
+	memset(&ec_process, 0, sizeof(ec_process));
+	ec_process.argv = argv_ec;
+	ec_process.err = -1;
+	ec_process.stdout_to_stderr = 1;
+	if (start_command(&ec_process)) {
+		fprintf(stderr, "Failed to start emacsclient.\n");
+		return -1;
+	}
+	strbuf_read(&buffer, ec_process.err, 20);
+	close(ec_process.err);
+
+	/*
+	 * Don't bother checking return value, because "emacsclient --version"
+	 * seems to always exits with code 1.
+	 */
+	finish_command(&ec_process);
+
+	if (prefixcmp(buffer.buf, "emacsclient")) {
+		fprintf(stderr, "Failed to parse emacsclient version.\n");
+		strbuf_release(&buffer);
+		return -1;
+	}
+
+	strbuf_remove(&buffer, 0, strlen("emacsclient"));
+	version = atoi(buffer.buf);
+
+	if (version < 22) {
+		fprintf(stderr,
+			"emacsclient version '%d' too old (< 22).\n",
+			version);
+		strbuf_release(&buffer);
+		return -1;
+	}
+
+	strbuf_release(&buffer);
+	return 0;
+}
+
+static void exec_woman_emacs(const char* path, const char *page)
+{
+	if (!check_emacsclient_version()) {
+		/* This works only with emacsclient version >= 22. */
+		struct strbuf man_page = STRBUF_INIT;
+
+		if (!path)
+			path = "emacsclient";
+		strbuf_addf(&man_page, "(woman \"%s\")", page);
+		execlp(path, "emacsclient", "-e", man_page.buf, NULL);
+		warning("failed to exec '%s': %s", path, strerror(errno));
+	}
+}
+
+static void exec_man_konqueror(const char* path, const char *page)
+{
+	const char *display = getenv("DISPLAY");
+	if (display && *display) {
+		struct strbuf man_page = STRBUF_INIT;
+		const char *filename = "kfmclient";
+
+		/* It's simpler to launch konqueror using kfmclient. */
+		if (path) {
+			const char *file = strrchr(path, '/');
+			if (file && !strcmp(file + 1, "konqueror")) {
+				char *new = strdup(path);
+				char *dest = strrchr(new, '/');
+
+				/* strlen("konqueror") == strlen("kfmclient") */
+				strcpy(dest + 1, "kfmclient");
+				path = new;
+			}
+			if (file)
+				filename = file;
+		} else
+			path = "kfmclient";
+		strbuf_addf(&man_page, "man:%s(1)", page);
+		execlp(path, filename, "newTab", man_page.buf, NULL);
+		warning("failed to exec '%s': %s", path, strerror(errno));
+	}
+}
+
+static void exec_man_man(const char* path, const char *page)
+{
+	if (!path)
+		path = "man";
+	execlp(path, "man", page, NULL);
+	warning("failed to exec '%s': %s", path, strerror(errno));
+}
+
+static void exec_man_cmd(const char *cmd, const char *page)
+{
+	struct strbuf shell_cmd = STRBUF_INIT;
+	strbuf_addf(&shell_cmd, "%s %s", cmd, page);
+	execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL);
+	warning("failed to exec '%s': %s", cmd, strerror(errno));
+}
+
+static void add_man_viewer(const char *name)
+{
+	struct man_viewer_list **p = &man_viewer_list;
+	size_t len = strlen(name);
+
+	while (*p)
+		p = &((*p)->next);
+	*p = calloc(1, (sizeof(**p) + len + 1));
+	strncpy((*p)->name, name, len);
+}
+
+static int supported_man_viewer(const char *name, size_t len)
+{
+	return (!strncasecmp("man", name, len) ||
+		!strncasecmp("woman", name, len) ||
+		!strncasecmp("konqueror", name, len));
+}
+
+static void do_add_man_viewer_info(const char *name,
+				   size_t len,
+				   const char *value)
+{
+	struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1);
+
+	strncpy(new->name, name, len);
+	new->info = strdup(value);
+	new->next = man_viewer_info_list;
+	man_viewer_info_list = new;
+}
+
+static int add_man_viewer_path(const char *name,
+			       size_t len,
+			       const char *value)
+{
+	if (supported_man_viewer(name, len))
+		do_add_man_viewer_info(name, len, value);
+	else
+		warning("'%s': path for unsupported man viewer.\n"
+			"Please consider using 'man.<tool>.cmd' instead.",
+			name);
+
+	return 0;
+}
+
+static int add_man_viewer_cmd(const char *name,
+			      size_t len,
+			      const char *value)
+{
+	if (supported_man_viewer(name, len))
+		warning("'%s': cmd for supported man viewer.\n"
+			"Please consider using 'man.<tool>.path' instead.",
+			name);
+	else
+		do_add_man_viewer_info(name, len, value);
+
+	return 0;
+}
+
+static int add_man_viewer_info(const char *var, const char *value)
+{
+	const char *name = var + 4;
+	const char *subkey = strrchr(name, '.');
+
+	if (!subkey)
+		return error("Config with no key for man viewer: %s", name);
+
+	if (!strcmp(subkey, ".path")) {
+		if (!value)
+			return config_error_nonbool(var);
+		return add_man_viewer_path(name, subkey - name, value);
+	}
+	if (!strcmp(subkey, ".cmd")) {
+		if (!value)
+			return config_error_nonbool(var);
+		return add_man_viewer_cmd(name, subkey - name, value);
+	}
+
+	warning("'%s': unsupported man viewer sub key.", subkey);
+	return 0;
+}
+
+static int perf_help_config(const char *var, const char *value, void *cb)
+{
+	if (!strcmp(var, "help.format")) {
+		if (!value)
+			return config_error_nonbool(var);
+		help_format = parse_help_format(value);
+		return 0;
+	}
+	if (!strcmp(var, "man.viewer")) {
+		if (!value)
+			return config_error_nonbool(var);
+		add_man_viewer(value);
+		return 0;
+	}
+	if (!prefixcmp(var, "man."))
+		return add_man_viewer_info(var, value);
+
+	return perf_default_config(var, value, cb);
+}
+
+static struct cmdnames main_cmds, other_cmds;
+
+void list_common_cmds_help(void)
+{
+	int i, longest = 0;
+
+	for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
+		if (longest < strlen(common_cmds[i].name))
+			longest = strlen(common_cmds[i].name);
+	}
+
+	puts(" The most commonly used perf commands are:");
+	for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
+		printf("   %s   ", common_cmds[i].name);
+		mput_char(' ', longest - strlen(common_cmds[i].name));
+		puts(common_cmds[i].help);
+	}
+}
+
+static int is_perf_command(const char *s)
+{
+	return is_in_cmdlist(&main_cmds, s) ||
+		is_in_cmdlist(&other_cmds, s);
+}
+
+static const char *prepend(const char *prefix, const char *cmd)
+{
+	size_t pre_len = strlen(prefix);
+	size_t cmd_len = strlen(cmd);
+	char *p = malloc(pre_len + cmd_len + 1);
+	memcpy(p, prefix, pre_len);
+	strcpy(p + pre_len, cmd);
+	return p;
+}
+
+static const char *cmd_to_page(const char *perf_cmd)
+{
+	if (!perf_cmd)
+		return "perf";
+	else if (!prefixcmp(perf_cmd, "perf"))
+		return perf_cmd;
+	else if (is_perf_command(perf_cmd))
+		return prepend("perf-", perf_cmd);
+	else
+		return prepend("perf-", perf_cmd);
+}
+
+static void setup_man_path(void)
+{
+	struct strbuf new_path = STRBUF_INIT;
+	const char *old_path = getenv("MANPATH");
+
+	/* We should always put ':' after our path. If there is no
+	 * old_path, the ':' at the end will let 'man' to try
+	 * system-wide paths after ours to find the manual page. If
+	 * there is old_path, we need ':' as delimiter. */
+	strbuf_addstr(&new_path, system_path(PERF_MAN_PATH));
+	strbuf_addch(&new_path, ':');
+	if (old_path)
+		strbuf_addstr(&new_path, old_path);
+
+	setenv("MANPATH", new_path.buf, 1);
+
+	strbuf_release(&new_path);
+}
+
+static void exec_viewer(const char *name, const char *page)
+{
+	const char *info = get_man_viewer_info(name);
+
+	if (!strcasecmp(name, "man"))
+		exec_man_man(info, page);
+	else if (!strcasecmp(name, "woman"))
+		exec_woman_emacs(info, page);
+	else if (!strcasecmp(name, "konqueror"))
+		exec_man_konqueror(info, page);
+	else if (info)
+		exec_man_cmd(info, page);
+	else
+		warning("'%s': unknown man viewer.", name);
+}
+
+static void show_man_page(const char *perf_cmd)
+{
+	struct man_viewer_list *viewer;
+	const char *page = cmd_to_page(perf_cmd);
+	const char *fallback = getenv("PERF_MAN_VIEWER");
+
+	setup_man_path();
+	for (viewer = man_viewer_list; viewer; viewer = viewer->next)
+	{
+		exec_viewer(viewer->name, page); /* will return when unable */
+	}
+	if (fallback)
+		exec_viewer(fallback, page);
+	exec_viewer("man", page);
+	die("no man viewer handled the request");
+}
+
+static void show_info_page(const char *perf_cmd)
+{
+	const char *page = cmd_to_page(perf_cmd);
+	setenv("INFOPATH", system_path(PERF_INFO_PATH), 1);
+	execlp("info", "info", "perfman", page, NULL);
+}
+
+static void get_html_page_path(struct strbuf *page_path, const char *page)
+{
+	struct stat st;
+	const char *html_path = system_path(PERF_HTML_PATH);
+
+	/* Check that we have a perf documentation directory. */
+	if (stat(mkpath("%s/perf.html", html_path), &st)
+	    || !S_ISREG(st.st_mode))
+		die("'%s': not a documentation directory.", html_path);
+
+	strbuf_init(page_path, 0);
+	strbuf_addf(page_path, "%s/%s.html", html_path, page);
+}
+
+/*
+ * If open_html is not defined in a platform-specific way (see for
+ * example compat/mingw.h), we use the script web--browse to display
+ * HTML.
+ */
+#ifndef open_html
+static void open_html(const char *path)
+{
+	execl_perf_cmd("web--browse", "-c", "help.browser", path, NULL);
+}
+#endif
+
+static void show_html_page(const char *perf_cmd)
+{
+	const char *page = cmd_to_page(perf_cmd);
+	struct strbuf page_path; /* it leaks but we exec bellow */
+
+	get_html_page_path(&page_path, page);
+
+	open_html(page_path.buf);
+}
+
+int cmd_help(int argc, const char **argv, const char *prefix)
+{
+	const char *alias;
+	load_command_list("perf-", &main_cmds, &other_cmds);
+
+	perf_config(perf_help_config, NULL);
+
+	argc = parse_options(argc, argv, builtin_help_options,
+			builtin_help_usage, 0);
+
+	if (show_all) {
+		printf("\n usage: %s\n\n", perf_usage_string);
+		list_commands("perf commands", &main_cmds, &other_cmds);
+		printf(" %s\n\n", perf_more_info_string);
+		return 0;
+	}
+
+	if (!argv[0]) {
+		printf("\n usage: %s\n\n", perf_usage_string);
+		list_common_cmds_help();
+		printf("\n %s\n\n", perf_more_info_string);
+		return 0;
+	}
+
+	alias = alias_lookup(argv[0]);
+	if (alias && !is_perf_command(argv[0])) {
+		printf("`perf %s' is aliased to `%s'\n", argv[0], alias);
+		return 0;
+	}
+
+	switch (help_format) {
+	case HELP_FORMAT_MAN:
+		show_man_page(argv[0]);
+		break;
+	case HELP_FORMAT_INFO:
+		show_info_page(argv[0]);
+		break;
+	case HELP_FORMAT_WEB:
+		show_html_page(argv[0]);
+		break;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
new file mode 100644
index 0000000..fe60e37
--- /dev/null
+++ b/tools/perf/builtin-list.c
@@ -0,0 +1,20 @@
+/*
+ * builtin-list.c
+ *
+ * Builtin list command: list all event types
+ *
+ * Copyright (C) 2009, Thomas Gleixner <tglx@linutronix.de>
+ * Copyright (C) 2008-2009, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/parse-options.h"
+#include "util/parse-events.h"
+
+int cmd_list(int argc, const char **argv, const char *prefix)
+{
+	print_events();
+	return 0;
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
new file mode 100644
index 0000000..0f5771f
--- /dev/null
+++ b/tools/perf/builtin-record.c
@@ -0,0 +1,585 @@
+/*
+ * builtin-record.c
+ *
+ * Builtin record command: Record the profile of a workload
+ * (or a CPU, or a PID) into the perf.data output file - for
+ * later analysis via perf report.
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/util.h"
+#include "util/parse-options.h"
+#include "util/parse-events.h"
+#include "util/string.h"
+
+#include <unistd.h>
+#include <sched.h>
+
+#define ALIGN(x, a)		__ALIGN_MASK(x, (typeof(x))(a)-1)
+#define __ALIGN_MASK(x, mask)	(((x)+(mask))&~(mask))
+
+static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
+
+static long			default_interval		= 100000;
+
+static int			nr_cpus				= 0;
+static unsigned int		page_size;
+static unsigned int		mmap_pages			= 128;
+static int			freq				= 0;
+static int			output;
+static const char		*output_name			= "perf.data";
+static int			group				= 0;
+static unsigned int		realtime_prio			= 0;
+static int			system_wide			= 0;
+static pid_t			target_pid			= -1;
+static int			inherit				= 1;
+static int			force				= 0;
+static int			append_file			= 0;
+static int			verbose				= 0;
+
+static long			samples;
+static struct timeval		last_read;
+static struct timeval		this_read;
+
+static __u64			bytes_written;
+
+static struct pollfd		event_array[MAX_NR_CPUS * MAX_COUNTERS];
+
+static int			nr_poll;
+static int			nr_cpu;
+
+struct mmap_event {
+	struct perf_event_header	header;
+	__u32				pid;
+	__u32				tid;
+	__u64				start;
+	__u64				len;
+	__u64				pgoff;
+	char				filename[PATH_MAX];
+};
+
+struct comm_event {
+	struct perf_event_header	header;
+	__u32				pid;
+	__u32				tid;
+	char				comm[16];
+};
+
+
+struct mmap_data {
+	int			counter;
+	void			*base;
+	unsigned int		mask;
+	unsigned int		prev;
+};
+
+static struct mmap_data		mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
+
+static unsigned int mmap_read_head(struct mmap_data *md)
+{
+	struct perf_counter_mmap_page *pc = md->base;
+	int head;
+
+	head = pc->data_head;
+	rmb();
+
+	return head;
+}
+
+static void mmap_read(struct mmap_data *md)
+{
+	unsigned int head = mmap_read_head(md);
+	unsigned int old = md->prev;
+	unsigned char *data = md->base + page_size;
+	unsigned long size;
+	void *buf;
+	int diff;
+
+	gettimeofday(&this_read, NULL);
+
+	/*
+	 * If we're further behind than half the buffer, there's a chance
+	 * the writer will bite our tail and mess up the samples under us.
+	 *
+	 * If we somehow ended up ahead of the head, we got messed up.
+	 *
+	 * In either case, truncate and restart at head.
+	 */
+	diff = head - old;
+	if (diff > md->mask / 2 || diff < 0) {
+		struct timeval iv;
+		unsigned long msecs;
+
+		timersub(&this_read, &last_read, &iv);
+		msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
+
+		fprintf(stderr, "WARNING: failed to keep up with mmap data."
+				"  Last read %lu msecs ago.\n", msecs);
+
+		/*
+		 * head points to a known good entry, start there.
+		 */
+		old = head;
+	}
+
+	last_read = this_read;
+
+	if (old != head)
+		samples++;
+
+	size = head - old;
+
+	if ((old & md->mask) + size != (head & md->mask)) {
+		buf = &data[old & md->mask];
+		size = md->mask + 1 - (old & md->mask);
+		old += size;
+
+		while (size) {
+			int ret = write(output, buf, size);
+
+			if (ret < 0)
+				die("failed to write");
+
+			size -= ret;
+			buf += ret;
+
+			bytes_written += ret;
+		}
+	}
+
+	buf = &data[old & md->mask];
+	size = head - old;
+	old += size;
+
+	while (size) {
+		int ret = write(output, buf, size);
+
+		if (ret < 0)
+			die("failed to write");
+
+		size -= ret;
+		buf += ret;
+
+		bytes_written += ret;
+	}
+
+	md->prev = old;
+}
+
+static volatile int done = 0;
+static volatile int signr = -1;
+
+static void sig_handler(int sig)
+{
+	done = 1;
+	signr = sig;
+}
+
+static void sig_atexit(void)
+{
+	if (signr == -1)
+		return;
+
+	signal(signr, SIG_DFL);
+	kill(getpid(), signr);
+}
+
+static void pid_synthesize_comm_event(pid_t pid, int full)
+{
+	struct comm_event comm_ev;
+	char filename[PATH_MAX];
+	char bf[BUFSIZ];
+	int fd, ret;
+	size_t size;
+	char *field, *sep;
+	DIR *tasks;
+	struct dirent dirent, *next;
+
+	snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0) {
+		fprintf(stderr, "couldn't open %s\n", filename);
+		exit(EXIT_FAILURE);
+	}
+	if (read(fd, bf, sizeof(bf)) < 0) {
+		fprintf(stderr, "couldn't read %s\n", filename);
+		exit(EXIT_FAILURE);
+	}
+	close(fd);
+
+	/* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
+	memset(&comm_ev, 0, sizeof(comm_ev));
+	field = strchr(bf, '(');
+	if (field == NULL)
+		goto out_failure;
+	sep = strchr(++field, ')');
+	if (sep == NULL)
+		goto out_failure;
+	size = sep - field;
+	memcpy(comm_ev.comm, field, size++);
+
+	comm_ev.pid = pid;
+	comm_ev.header.type = PERF_EVENT_COMM;
+	size = ALIGN(size, sizeof(__u64));
+	comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
+
+	if (!full) {
+		comm_ev.tid = pid;
+
+		ret = write(output, &comm_ev, comm_ev.header.size);
+		if (ret < 0) {
+			perror("failed to write");
+			exit(-1);
+		}
+		return;
+	}
+
+	snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
+
+	tasks = opendir(filename);
+	while (!readdir_r(tasks, &dirent, &next) && next) {
+		char *end;
+		pid = strtol(dirent.d_name, &end, 10);
+		if (*end)
+			continue;
+
+		comm_ev.tid = pid;
+
+		ret = write(output, &comm_ev, comm_ev.header.size);
+		if (ret < 0) {
+			perror("failed to write");
+			exit(-1);
+		}
+	}
+	closedir(tasks);
+	return;
+
+out_failure:
+	fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
+		filename);
+	exit(EXIT_FAILURE);
+}
+
+static void pid_synthesize_mmap_samples(pid_t pid)
+{
+	char filename[PATH_MAX];
+	FILE *fp;
+
+	snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
+
+	fp = fopen(filename, "r");
+	if (fp == NULL) {
+		fprintf(stderr, "couldn't open %s\n", filename);
+		exit(EXIT_FAILURE);
+	}
+	while (1) {
+		char bf[BUFSIZ], *pbf = bf;
+		struct mmap_event mmap_ev = {
+			.header.type = PERF_EVENT_MMAP,
+		};
+		int n;
+		size_t size;
+		if (fgets(bf, sizeof(bf), fp) == NULL)
+			break;
+
+		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
+		n = hex2u64(pbf, &mmap_ev.start);
+		if (n < 0)
+			continue;
+		pbf += n + 1;
+		n = hex2u64(pbf, &mmap_ev.len);
+		if (n < 0)
+			continue;
+		pbf += n + 3;
+		if (*pbf == 'x') { /* vm_exec */
+			char *execname = strrchr(bf, ' ');
+
+			if (execname == NULL || execname[1] != '/')
+				continue;
+
+			execname += 1;
+			size = strlen(execname);
+			execname[size - 1] = '\0'; /* Remove \n */
+			memcpy(mmap_ev.filename, execname, size);
+			size = ALIGN(size, sizeof(__u64));
+			mmap_ev.len -= mmap_ev.start;
+			mmap_ev.header.size = (sizeof(mmap_ev) -
+					       (sizeof(mmap_ev.filename) - size));
+			mmap_ev.pid = pid;
+			mmap_ev.tid = pid;
+
+			if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
+				perror("failed to write");
+				exit(-1);
+			}
+		}
+	}
+
+	fclose(fp);
+}
+
+static void synthesize_samples(void)
+{
+	DIR *proc;
+	struct dirent dirent, *next;
+
+	proc = opendir("/proc");
+
+	while (!readdir_r(proc, &dirent, &next) && next) {
+		char *end;
+		pid_t pid;
+
+		pid = strtol(dirent.d_name, &end, 10);
+		if (*end) /* only interested in proper numerical dirents */
+			continue;
+
+		pid_synthesize_comm_event(pid, 1);
+		pid_synthesize_mmap_samples(pid);
+	}
+
+	closedir(proc);
+}
+
+static int group_fd;
+
+static void create_counter(int counter, int cpu, pid_t pid)
+{
+	struct perf_counter_attr *attr = attrs + counter;
+	int track = 1;
+
+	attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+	if (freq) {
+		attr->sample_type	|= PERF_SAMPLE_PERIOD;
+		attr->freq		= 1;
+		attr->sample_freq	= freq;
+	}
+	attr->mmap		= track;
+	attr->comm		= track;
+	attr->inherit		= (cpu < 0) && inherit;
+	attr->disabled		= 1;
+
+	track = 0; /* only the first counter needs these */
+
+try_again:
+	fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0);
+
+	if (fd[nr_cpu][counter] < 0) {
+		int err = errno;
+
+		if (err == EPERM)
+			die("Permission error - are you root?\n");
+
+		/*
+		 * If it's cycles then fall back to hrtimer
+		 * based cpu-clock-tick sw counter, which
+		 * is always available even if no PMU support:
+		 */
+		if (attr->type == PERF_TYPE_HARDWARE
+			&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
+
+			if (verbose)
+				warning(" ... trying to fall back to cpu-clock-ticks\n");
+			attr->type = PERF_TYPE_SOFTWARE;
+			attr->config = PERF_COUNT_SW_CPU_CLOCK;
+			goto try_again;
+		}
+		printf("\n");
+		error("perfcounter syscall returned with %d (%s)\n",
+			fd[nr_cpu][counter], strerror(err));
+		die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n");
+		exit(-1);
+	}
+
+	assert(fd[nr_cpu][counter] >= 0);
+	fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
+
+	/*
+	 * First counter acts as the group leader:
+	 */
+	if (group && group_fd == -1)
+		group_fd = fd[nr_cpu][counter];
+
+	event_array[nr_poll].fd = fd[nr_cpu][counter];
+	event_array[nr_poll].events = POLLIN;
+	nr_poll++;
+
+	mmap_array[nr_cpu][counter].counter = counter;
+	mmap_array[nr_cpu][counter].prev = 0;
+	mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
+	mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
+			PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
+	if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
+		error("failed to mmap with %d (%s)\n", errno, strerror(errno));
+		exit(-1);
+	}
+
+	ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE);
+}
+
+static void open_counters(int cpu, pid_t pid)
+{
+	int counter;
+
+	if (pid > 0) {
+		pid_synthesize_comm_event(pid, 0);
+		pid_synthesize_mmap_samples(pid);
+	}
+
+	group_fd = -1;
+	for (counter = 0; counter < nr_counters; counter++)
+		create_counter(counter, cpu, pid);
+
+	nr_cpu++;
+}
+
+static int __cmd_record(int argc, const char **argv)
+{
+	int i, counter;
+	struct stat st;
+	pid_t pid;
+	int flags;
+	int ret;
+
+	page_size = sysconf(_SC_PAGE_SIZE);
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	assert(nr_cpus <= MAX_NR_CPUS);
+	assert(nr_cpus >= 0);
+
+	if (!stat(output_name, &st) && !force && !append_file) {
+		fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
+				output_name);
+		exit(-1);
+	}
+
+	flags = O_CREAT|O_RDWR;
+	if (append_file)
+		flags |= O_APPEND;
+	else
+		flags |= O_TRUNC;
+
+	output = open(output_name, flags, S_IRUSR|S_IWUSR);
+	if (output < 0) {
+		perror("failed to create output file");
+		exit(-1);
+	}
+
+	if (!system_wide) {
+		open_counters(-1, target_pid != -1 ? target_pid : getpid());
+	} else for (i = 0; i < nr_cpus; i++)
+		open_counters(i, target_pid);
+
+	atexit(sig_atexit);
+	signal(SIGCHLD, sig_handler);
+	signal(SIGINT, sig_handler);
+
+	if (target_pid == -1 && argc) {
+		pid = fork();
+		if (pid < 0)
+			perror("failed to fork");
+
+		if (!pid) {
+			if (execvp(argv[0], (char **)argv)) {
+				perror(argv[0]);
+				exit(-1);
+			}
+		}
+	}
+
+	if (realtime_prio) {
+		struct sched_param param;
+
+		param.sched_priority = realtime_prio;
+		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
+			printf("Could not set realtime priority.\n");
+			exit(-1);
+		}
+	}
+
+	if (system_wide)
+		synthesize_samples();
+
+	while (!done) {
+		int hits = samples;
+
+		for (i = 0; i < nr_cpu; i++) {
+			for (counter = 0; counter < nr_counters; counter++)
+				mmap_read(&mmap_array[i][counter]);
+		}
+
+		if (hits == samples)
+			ret = poll(event_array, nr_poll, 100);
+	}
+
+	/*
+	 * Approximate RIP event size: 24 bytes.
+	 */
+	fprintf(stderr,
+		"[ perf record: Captured and wrote %.3f MB %s (~%lld samples) ]\n",
+		(double)bytes_written / 1024.0 / 1024.0,
+		output_name,
+		bytes_written / 24);
+
+	return 0;
+}
+
+static const char * const record_usage[] = {
+	"perf record [<options>] [<command>]",
+	"perf record [<options>] -- <command> [<options>]",
+	NULL
+};
+
+static const struct option options[] = {
+	OPT_CALLBACK('e', "event", NULL, "event",
+		     "event selector. use 'perf list' to list available events",
+		     parse_events),
+	OPT_INTEGER('p', "pid", &target_pid,
+		    "record events on existing pid"),
+	OPT_INTEGER('r', "realtime", &realtime_prio,
+		    "collect data with this RT SCHED_FIFO priority"),
+	OPT_BOOLEAN('a', "all-cpus", &system_wide,
+			    "system-wide collection from all CPUs"),
+	OPT_BOOLEAN('A', "append", &append_file,
+			    "append to the output file to do incremental profiling"),
+	OPT_BOOLEAN('f', "force", &force,
+			"overwrite existing data file"),
+	OPT_LONG('c', "count", &default_interval,
+		    "event period to sample"),
+	OPT_STRING('o', "output", &output_name, "file",
+		    "output file name"),
+	OPT_BOOLEAN('i', "inherit", &inherit,
+		    "child tasks inherit counters"),
+	OPT_INTEGER('F', "freq", &freq,
+		    "profile at this frequency"),
+	OPT_INTEGER('m', "mmap-pages", &mmap_pages,
+		    "number of mmap data pages"),
+	OPT_BOOLEAN('v', "verbose", &verbose,
+		    "be more verbose (show counter open errors, etc)"),
+	OPT_END()
+};
+
+int cmd_record(int argc, const char **argv, const char *prefix)
+{
+	int counter;
+
+	argc = parse_options(argc, argv, options, record_usage, 0);
+	if (!argc && target_pid == -1 && !system_wide)
+		usage_with_options(record_usage, options);
+
+	if (!nr_counters) {
+		nr_counters	= 1;
+		attrs[0].type	= PERF_TYPE_HARDWARE;
+		attrs[0].config = PERF_COUNT_HW_CPU_CYCLES;
+	}
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		if (attrs[counter].sample_period)
+			continue;
+
+		attrs[counter].sample_period = default_interval;
+	}
+
+	return __cmd_record(argc, argv);
+}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
new file mode 100644
index 0000000..82fa93b
--- /dev/null
+++ b/tools/perf/builtin-report.c
@@ -0,0 +1,1316 @@
+/*
+ * builtin-report.c
+ *
+ * Builtin report command: Analyze the perf.data input file,
+ * look up and read DSOs and symbol information and display
+ * a histogram of results, along various sorting keys.
+ */
+#include "builtin.h"
+
+#include "util/util.h"
+
+#include "util/color.h"
+#include "util/list.h"
+#include "util/cache.h"
+#include "util/rbtree.h"
+#include "util/symbol.h"
+#include "util/string.h"
+
+#include "perf.h"
+
+#include "util/parse-options.h"
+#include "util/parse-events.h"
+
+#define SHOW_KERNEL	1
+#define SHOW_USER	2
+#define SHOW_HV		4
+
+static char		const *input_name = "perf.data";
+static char		*vmlinux = NULL;
+
+static char		default_sort_order[] = "comm,dso";
+static char		*sort_order = default_sort_order;
+
+static int		input;
+static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
+
+static int		dump_trace = 0;
+#define dprintf(x...)	do { if (dump_trace) printf(x); } while (0)
+
+static int		verbose;
+static int		full_paths;
+
+static unsigned long	page_size;
+static unsigned long	mmap_window = 32;
+
+struct ip_event {
+	struct perf_event_header header;
+	__u64 ip;
+	__u32 pid, tid;
+	__u64 period;
+};
+
+struct mmap_event {
+	struct perf_event_header header;
+	__u32 pid, tid;
+	__u64 start;
+	__u64 len;
+	__u64 pgoff;
+	char filename[PATH_MAX];
+};
+
+struct comm_event {
+	struct perf_event_header header;
+	__u32 pid, tid;
+	char comm[16];
+};
+
+struct fork_event {
+	struct perf_event_header header;
+	__u32 pid, ppid;
+};
+
+struct period_event {
+	struct perf_event_header header;
+	__u64 time;
+	__u64 id;
+	__u64 sample_period;
+};
+
+typedef union event_union {
+	struct perf_event_header	header;
+	struct ip_event			ip;
+	struct mmap_event		mmap;
+	struct comm_event		comm;
+	struct fork_event		fork;
+	struct period_event		period;
+} event_t;
+
+static LIST_HEAD(dsos);
+static struct dso *kernel_dso;
+static struct dso *vdso;
+
+static void dsos__add(struct dso *dso)
+{
+	list_add_tail(&dso->node, &dsos);
+}
+
+static struct dso *dsos__find(const char *name)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, &dsos, node)
+		if (strcmp(pos->name, name) == 0)
+			return pos;
+	return NULL;
+}
+
+static struct dso *dsos__findnew(const char *name)
+{
+	struct dso *dso = dsos__find(name);
+	int nr;
+
+	if (dso)
+		return dso;
+
+	dso = dso__new(name, 0);
+	if (!dso)
+		goto out_delete_dso;
+
+	nr = dso__load(dso, NULL, verbose);
+	if (nr < 0) {
+		if (verbose)
+			fprintf(stderr, "Failed to open: %s\n", name);
+		goto out_delete_dso;
+	}
+	if (!nr && verbose) {
+		fprintf(stderr,
+		"No symbols found in: %s, maybe install a debug package?\n",
+				name);
+	}
+
+	dsos__add(dso);
+
+	return dso;
+
+out_delete_dso:
+	dso__delete(dso);
+	return NULL;
+}
+
+static void dsos__fprintf(FILE *fp)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, &dsos, node)
+		dso__fprintf(pos, fp);
+}
+
+static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip)
+{
+	return dso__find_symbol(kernel_dso, ip);
+}
+
+static int load_kernel(void)
+{
+	int err;
+
+	kernel_dso = dso__new("[kernel]", 0);
+	if (!kernel_dso)
+		return -1;
+
+	err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose);
+	if (err) {
+		dso__delete(kernel_dso);
+		kernel_dso = NULL;
+	} else
+		dsos__add(kernel_dso);
+
+	vdso = dso__new("[vdso]", 0);
+	if (!vdso)
+		return -1;
+
+	vdso->find_symbol = vdso__find_symbol;
+
+	dsos__add(vdso);
+
+	return err;
+}
+
+static char __cwd[PATH_MAX];
+static char *cwd = __cwd;
+static int cwdlen;
+
+static int strcommon(const char *pathname)
+{
+	int n = 0;
+
+	while (pathname[n] == cwd[n] && n < cwdlen)
+		++n;
+
+	return n;
+}
+
+struct map {
+	struct list_head node;
+	__u64	 start;
+	__u64	 end;
+	__u64	 pgoff;
+	__u64	 (*map_ip)(struct map *, __u64);
+	struct dso	 *dso;
+};
+
+static __u64 map__map_ip(struct map *map, __u64 ip)
+{
+	return ip - map->start + map->pgoff;
+}
+
+static __u64 vdso__map_ip(struct map *map, __u64 ip)
+{
+	return ip;
+}
+
+static inline int is_anon_memory(const char *filename)
+{
+     return strcmp(filename, "//anon") == 0;
+}
+
+static struct map *map__new(struct mmap_event *event)
+{
+	struct map *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		const char *filename = event->filename;
+		char newfilename[PATH_MAX];
+		int anon;
+
+		if (cwd) {
+			int n = strcommon(filename);
+
+			if (n == cwdlen) {
+				snprintf(newfilename, sizeof(newfilename),
+					 ".%s", filename + n);
+				filename = newfilename;
+			}
+		}
+
+		anon = is_anon_memory(filename);
+
+		if (anon) {
+			snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", event->pid);
+			filename = newfilename;
+		}
+
+		self->start = event->start;
+		self->end   = event->start + event->len;
+		self->pgoff = event->pgoff;
+
+		self->dso = dsos__findnew(filename);
+		if (self->dso == NULL)
+			goto out_delete;
+
+		if (self->dso == vdso || anon)
+			self->map_ip = vdso__map_ip;
+		else
+			self->map_ip = map__map_ip;
+	}
+	return self;
+out_delete:
+	free(self);
+	return NULL;
+}
+
+static struct map *map__clone(struct map *self)
+{
+	struct map *map = malloc(sizeof(*self));
+
+	if (!map)
+		return NULL;
+
+	memcpy(map, self, sizeof(*self));
+
+	return map;
+}
+
+static int map__overlap(struct map *l, struct map *r)
+{
+	if (l->start > r->start) {
+		struct map *t = l;
+		l = r;
+		r = t;
+	}
+
+	if (l->end > r->start)
+		return 1;
+
+	return 0;
+}
+
+static size_t map__fprintf(struct map *self, FILE *fp)
+{
+	return fprintf(fp, " %Lx-%Lx %Lx %s\n",
+		       self->start, self->end, self->pgoff, self->dso->name);
+}
+
+
+struct thread {
+	struct rb_node	 rb_node;
+	struct list_head maps;
+	pid_t		 pid;
+	char		 *comm;
+};
+
+static struct thread *thread__new(pid_t pid)
+{
+	struct thread *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		self->pid = pid;
+		self->comm = malloc(32);
+		if (self->comm)
+			snprintf(self->comm, 32, ":%d", self->pid);
+		INIT_LIST_HEAD(&self->maps);
+	}
+
+	return self;
+}
+
+static int thread__set_comm(struct thread *self, const char *comm)
+{
+	if (self->comm)
+		free(self->comm);
+	self->comm = strdup(comm);
+	return self->comm ? 0 : -ENOMEM;
+}
+
+static size_t thread__fprintf(struct thread *self, FILE *fp)
+{
+	struct map *pos;
+	size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm);
+
+	list_for_each_entry(pos, &self->maps, node)
+		ret += map__fprintf(pos, fp);
+
+	return ret;
+}
+
+
+static struct rb_root threads;
+static struct thread *last_match;
+
+static struct thread *threads__findnew(pid_t pid)
+{
+	struct rb_node **p = &threads.rb_node;
+	struct rb_node *parent = NULL;
+	struct thread *th;
+
+	/*
+	 * Font-end cache - PID lookups come in blocks,
+	 * so most of the time we dont have to look up
+	 * the full rbtree:
+	 */
+	if (last_match && last_match->pid == pid)
+		return last_match;
+
+	while (*p != NULL) {
+		parent = *p;
+		th = rb_entry(parent, struct thread, rb_node);
+
+		if (th->pid == pid) {
+			last_match = th;
+			return th;
+		}
+
+		if (pid < th->pid)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	th = thread__new(pid);
+	if (th != NULL) {
+		rb_link_node(&th->rb_node, parent, p);
+		rb_insert_color(&th->rb_node, &threads);
+		last_match = th;
+	}
+
+	return th;
+}
+
+static void thread__insert_map(struct thread *self, struct map *map)
+{
+	struct map *pos, *tmp;
+
+	list_for_each_entry_safe(pos, tmp, &self->maps, node) {
+		if (map__overlap(pos, map)) {
+			list_del_init(&pos->node);
+			/* XXX leaks dsos */
+			free(pos);
+		}
+	}
+
+	list_add_tail(&map->node, &self->maps);
+}
+
+static int thread__fork(struct thread *self, struct thread *parent)
+{
+	struct map *map;
+
+	if (self->comm)
+		free(self->comm);
+	self->comm = strdup(parent->comm);
+	if (!self->comm)
+		return -ENOMEM;
+
+	list_for_each_entry(map, &parent->maps, node) {
+		struct map *new = map__clone(map);
+		if (!new)
+			return -ENOMEM;
+		thread__insert_map(self, new);
+	}
+
+	return 0;
+}
+
+static struct map *thread__find_map(struct thread *self, __u64 ip)
+{
+	struct map *pos;
+
+	if (self == NULL)
+		return NULL;
+
+	list_for_each_entry(pos, &self->maps, node)
+		if (ip >= pos->start && ip <= pos->end)
+			return pos;
+
+	return NULL;
+}
+
+static size_t threads__fprintf(FILE *fp)
+{
+	size_t ret = 0;
+	struct rb_node *nd;
+
+	for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
+		struct thread *pos = rb_entry(nd, struct thread, rb_node);
+
+		ret += thread__fprintf(pos, fp);
+	}
+
+	return ret;
+}
+
+/*
+ * histogram, sorted on item, collects counts
+ */
+
+static struct rb_root hist;
+
+struct hist_entry {
+	struct rb_node	 rb_node;
+
+	struct thread	 *thread;
+	struct map	 *map;
+	struct dso	 *dso;
+	struct symbol	 *sym;
+	__u64		 ip;
+	char		 level;
+
+	__u64		 count;
+};
+
+/*
+ * configurable sorting bits
+ */
+
+struct sort_entry {
+	struct list_head list;
+
+	char *header;
+
+	int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
+	int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
+	size_t	(*print)(FILE *fp, struct hist_entry *);
+};
+
+/* --sort pid */
+
+static int64_t
+sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->thread->pid - left->thread->pid;
+}
+
+static size_t
+sort__thread_print(FILE *fp, struct hist_entry *self)
+{
+	return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid);
+}
+
+static struct sort_entry sort_thread = {
+	.header = "         Command:  Pid",
+	.cmp	= sort__thread_cmp,
+	.print	= sort__thread_print,
+};
+
+/* --sort comm */
+
+static int64_t
+sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->thread->pid - left->thread->pid;
+}
+
+static int64_t
+sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	char *comm_l = left->thread->comm;
+	char *comm_r = right->thread->comm;
+
+	if (!comm_l || !comm_r) {
+		if (!comm_l && !comm_r)
+			return 0;
+		else if (!comm_l)
+			return -1;
+		else
+			return 1;
+	}
+
+	return strcmp(comm_l, comm_r);
+}
+
+static size_t
+sort__comm_print(FILE *fp, struct hist_entry *self)
+{
+	return fprintf(fp, "%16s", self->thread->comm);
+}
+
+static struct sort_entry sort_comm = {
+	.header		= "         Command",
+	.cmp		= sort__comm_cmp,
+	.collapse	= sort__comm_collapse,
+	.print		= sort__comm_print,
+};
+
+/* --sort dso */
+
+static int64_t
+sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct dso *dso_l = left->dso;
+	struct dso *dso_r = right->dso;
+
+	if (!dso_l || !dso_r) {
+		if (!dso_l && !dso_r)
+			return 0;
+		else if (!dso_l)
+			return -1;
+		else
+			return 1;
+	}
+
+	return strcmp(dso_l->name, dso_r->name);
+}
+
+static size_t
+sort__dso_print(FILE *fp, struct hist_entry *self)
+{
+	if (self->dso)
+		return fprintf(fp, "%-25s", self->dso->name);
+
+	return fprintf(fp, "%016llx         ", (__u64)self->ip);
+}
+
+static struct sort_entry sort_dso = {
+	.header = "Shared Object            ",
+	.cmp	= sort__dso_cmp,
+	.print	= sort__dso_print,
+};
+
+/* --sort symbol */
+
+static int64_t
+sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	__u64 ip_l, ip_r;
+
+	if (left->sym == right->sym)
+		return 0;
+
+	ip_l = left->sym ? left->sym->start : left->ip;
+	ip_r = right->sym ? right->sym->start : right->ip;
+
+	return (int64_t)(ip_r - ip_l);
+}
+
+static size_t
+sort__sym_print(FILE *fp, struct hist_entry *self)
+{
+	size_t ret = 0;
+
+	if (verbose)
+		ret += fprintf(fp, "%#018llx  ", (__u64)self->ip);
+
+	if (self->sym) {
+		ret += fprintf(fp, "[%c] %s",
+			self->dso == kernel_dso ? 'k' : '.', self->sym->name);
+	} else {
+		ret += fprintf(fp, "%#016llx", (__u64)self->ip);
+	}
+
+	return ret;
+}
+
+static struct sort_entry sort_sym = {
+	.header = "Symbol",
+	.cmp	= sort__sym_cmp,
+	.print	= sort__sym_print,
+};
+
+static int sort__need_collapse = 0;
+
+struct sort_dimension {
+	char			*name;
+	struct sort_entry	*entry;
+	int			taken;
+};
+
+static struct sort_dimension sort_dimensions[] = {
+	{ .name = "pid",	.entry = &sort_thread,	},
+	{ .name = "comm",	.entry = &sort_comm,	},
+	{ .name = "dso",	.entry = &sort_dso,	},
+	{ .name = "symbol",	.entry = &sort_sym,	},
+};
+
+static LIST_HEAD(hist_entry__sort_list);
+
+static int sort_dimension__add(char *tok)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
+		struct sort_dimension *sd = &sort_dimensions[i];
+
+		if (sd->taken)
+			continue;
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		if (sd->entry->collapse)
+			sort__need_collapse = 1;
+
+		list_add_tail(&sd->entry->list, &hist_entry__sort_list);
+		sd->taken = 1;
+
+		return 0;
+	}
+
+	return -ESRCH;
+}
+
+static int64_t
+hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct sort_entry *se;
+	int64_t cmp = 0;
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		cmp = se->cmp(left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+static int64_t
+hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	struct sort_entry *se;
+	int64_t cmp = 0;
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		int64_t (*f)(struct hist_entry *, struct hist_entry *);
+
+		f = se->collapse ?: se->cmp;
+
+		cmp = f(left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+static size_t
+hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples)
+{
+	struct sort_entry *se;
+	size_t ret;
+
+	if (total_samples) {
+		double percent = self->count * 100.0 / total_samples;
+		char *color = PERF_COLOR_NORMAL;
+
+		/*
+		 * We color high-overhead entries in red, mid-overhead
+		 * entries in green - and keep the low overhead places
+		 * normal:
+		 */
+		if (percent >= 5.0) {
+			color = PERF_COLOR_RED;
+		} else {
+			if (percent >= 0.5)
+				color = PERF_COLOR_GREEN;
+		}
+
+		ret = color_fprintf(fp, color, "   %6.2f%%",
+				(self->count * 100.0) / total_samples);
+	} else
+		ret = fprintf(fp, "%12Ld ", self->count);
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		fprintf(fp, "  ");
+		ret += se->print(fp, self);
+	}
+
+	ret += fprintf(fp, "\n");
+
+	return ret;
+}
+
+/*
+ * collect histogram counts
+ */
+
+static int
+hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
+		struct symbol *sym, __u64 ip, char level, __u64 count)
+{
+	struct rb_node **p = &hist.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *he;
+	struct hist_entry entry = {
+		.thread	= thread,
+		.map	= map,
+		.dso	= dso,
+		.sym	= sym,
+		.ip	= ip,
+		.level	= level,
+		.count	= count,
+	};
+	int cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		he = rb_entry(parent, struct hist_entry, rb_node);
+
+		cmp = hist_entry__cmp(&entry, he);
+
+		if (!cmp) {
+			he->count += count;
+			return 0;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	he = malloc(sizeof(*he));
+	if (!he)
+		return -ENOMEM;
+	*he = entry;
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &hist);
+
+	return 0;
+}
+
+static void hist_entry__free(struct hist_entry *he)
+{
+	free(he);
+}
+
+/*
+ * collapse the histogram
+ */
+
+static struct rb_root collapse_hists;
+
+static void collapse__insert_entry(struct hist_entry *he)
+{
+	struct rb_node **p = &collapse_hists.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	int64_t cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		cmp = hist_entry__collapse(iter, he);
+
+		if (!cmp) {
+			iter->count += he->count;
+			hist_entry__free(he);
+			return;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &collapse_hists);
+}
+
+static void collapse__resort(void)
+{
+	struct rb_node *next;
+	struct hist_entry *n;
+
+	if (!sort__need_collapse)
+		return;
+
+	next = rb_first(&hist);
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+
+		rb_erase(&n->rb_node, &hist);
+		collapse__insert_entry(n);
+	}
+}
+
+/*
+ * reverse the map, sort on count.
+ */
+
+static struct rb_root output_hists;
+
+static void output__insert_entry(struct hist_entry *he)
+{
+	struct rb_node **p = &output_hists.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (he->count > iter->count)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &output_hists);
+}
+
+static void output__resort(void)
+{
+	struct rb_node *next;
+	struct hist_entry *n;
+	struct rb_root *tree = &hist;
+
+	if (sort__need_collapse)
+		tree = &collapse_hists;
+
+	next = rb_first(tree);
+
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+
+		rb_erase(&n->rb_node, tree);
+		output__insert_entry(n);
+	}
+}
+
+static size_t output__fprintf(FILE *fp, __u64 total_samples)
+{
+	struct hist_entry *pos;
+	struct sort_entry *se;
+	struct rb_node *nd;
+	size_t ret = 0;
+
+	fprintf(fp, "\n");
+	fprintf(fp, "#\n");
+	fprintf(fp, "# (%Ld samples)\n", (__u64)total_samples);
+	fprintf(fp, "#\n");
+
+	fprintf(fp, "# Overhead");
+	list_for_each_entry(se, &hist_entry__sort_list, list)
+		fprintf(fp, "  %s", se->header);
+	fprintf(fp, "\n");
+
+	fprintf(fp, "# ........");
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		int i;
+
+		fprintf(fp, "  ");
+		for (i = 0; i < strlen(se->header); i++)
+			fprintf(fp, ".");
+	}
+	fprintf(fp, "\n");
+
+	fprintf(fp, "#\n");
+
+	for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) {
+		pos = rb_entry(nd, struct hist_entry, rb_node);
+		ret += hist_entry__fprintf(fp, pos, total_samples);
+	}
+
+	if (!strcmp(sort_order, default_sort_order)) {
+		fprintf(fp, "#\n");
+		fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n");
+		fprintf(fp, "#\n");
+	}
+	fprintf(fp, "\n");
+
+	return ret;
+}
+
+static void register_idle_thread(void)
+{
+	struct thread *thread = threads__findnew(0);
+
+	if (thread == NULL ||
+			thread__set_comm(thread, "[idle]")) {
+		fprintf(stderr, "problem inserting idle task.\n");
+		exit(-1);
+	}
+}
+
+static unsigned long total = 0,
+		     total_mmap = 0,
+		     total_comm = 0,
+		     total_fork = 0,
+		     total_unknown = 0;
+
+static int
+process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	char level;
+	int show = 0;
+	struct dso *dso = NULL;
+	struct thread *thread = threads__findnew(event->ip.pid);
+	__u64 ip = event->ip.ip;
+	__u64 period = 1;
+	struct map *map = NULL;
+
+	if (event->header.type & PERF_SAMPLE_PERIOD)
+		period = event->ip.period;
+
+	dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->header.misc,
+		event->ip.pid,
+		(void *)(long)ip,
+		(long long)period);
+
+	dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
+	if (thread == NULL) {
+		fprintf(stderr, "problem processing %d event, skipping it.\n",
+			event->header.type);
+		return -1;
+	}
+
+	if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
+		show = SHOW_KERNEL;
+		level = 'k';
+
+		dso = kernel_dso;
+
+		dprintf(" ...... dso: %s\n", dso->name);
+
+	} else if (event->header.misc & PERF_EVENT_MISC_USER) {
+
+		show = SHOW_USER;
+		level = '.';
+
+		map = thread__find_map(thread, ip);
+		if (map != NULL) {
+			ip = map->map_ip(map, ip);
+			dso = map->dso;
+		} else {
+			/*
+			 * If this is outside of all known maps,
+			 * and is a negative address, try to look it
+			 * up in the kernel dso, as it might be a
+			 * vsyscall (which executes in user-mode):
+			 */
+			if ((long long)ip < 0)
+				dso = kernel_dso;
+		}
+		dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
+
+	} else {
+		show = SHOW_HV;
+		level = 'H';
+		dprintf(" ...... dso: [hypervisor]\n");
+	}
+
+	if (show & show_mask) {
+		struct symbol *sym = NULL;
+
+		if (dso)
+			sym = dso->find_symbol(dso, ip);
+
+		if (hist_entry__add(thread, map, dso, sym, ip, level, period)) {
+			fprintf(stderr,
+		"problem incrementing symbol count, skipping event\n");
+			return -1;
+		}
+	}
+	total += period;
+
+	return 0;
+}
+
+static int
+process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->mmap.pid);
+	struct map *map = map__new(&event->mmap);
+
+	dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->mmap.pid,
+		(void *)(long)event->mmap.start,
+		(void *)(long)event->mmap.len,
+		(void *)(long)event->mmap.pgoff,
+		event->mmap.filename);
+
+	if (thread == NULL || map == NULL) {
+		dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n");
+		return 0;
+	}
+
+	thread__insert_map(thread, map);
+	total_mmap++;
+
+	return 0;
+}
+
+static int
+process_comm_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->comm.pid);
+
+	dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->comm.comm, event->comm.pid);
+
+	if (thread == NULL ||
+	    thread__set_comm(thread, event->comm.comm)) {
+		dprintf("problem processing PERF_EVENT_COMM, skipping event.\n");
+		return -1;
+	}
+	total_comm++;
+
+	return 0;
+}
+
+static int
+process_fork_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->fork.pid);
+	struct thread *parent = threads__findnew(event->fork.ppid);
+
+	dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->fork.pid, event->fork.ppid);
+
+	if (!thread || !parent || thread__fork(thread, parent)) {
+		dprintf("problem processing PERF_EVENT_FORK, skipping event.\n");
+		return -1;
+	}
+	total_fork++;
+
+	return 0;
+}
+
+static int
+process_period_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->period.time,
+		event->period.id,
+		event->period.sample_period);
+
+	return 0;
+}
+
+static int
+process_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
+		return process_overflow_event(event, offset, head);
+
+	switch (event->header.type) {
+	case PERF_EVENT_MMAP:
+		return process_mmap_event(event, offset, head);
+
+	case PERF_EVENT_COMM:
+		return process_comm_event(event, offset, head);
+
+	case PERF_EVENT_FORK:
+		return process_fork_event(event, offset, head);
+
+	case PERF_EVENT_PERIOD:
+		return process_period_event(event, offset, head);
+	/*
+	 * We dont process them right now but they are fine:
+	 */
+
+	case PERF_EVENT_THROTTLE:
+	case PERF_EVENT_UNTHROTTLE:
+		return 0;
+
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
+static int __cmd_report(void)
+{
+	int ret, rc = EXIT_FAILURE;
+	unsigned long offset = 0;
+	unsigned long head = 0;
+	struct stat stat;
+	event_t *event;
+	uint32_t size;
+	char *buf;
+
+	register_idle_thread();
+
+	input = open(input_name, O_RDONLY);
+	if (input < 0) {
+		fprintf(stderr, " failed to open file: %s", input_name);
+		if (!strcmp(input_name, "perf.data"))
+			fprintf(stderr, "  (try 'perf record' first)");
+		fprintf(stderr, "\n");
+		exit(-1);
+	}
+
+	ret = fstat(input, &stat);
+	if (ret < 0) {
+		perror("failed to stat file");
+		exit(-1);
+	}
+
+	if (!stat.st_size) {
+		fprintf(stderr, "zero-sized file, nothing to do!\n");
+		exit(0);
+	}
+
+	if (load_kernel() < 0) {
+		perror("failed to load kernel symbols");
+		return EXIT_FAILURE;
+	}
+
+	if (!full_paths) {
+		if (getcwd(__cwd, sizeof(__cwd)) == NULL) {
+			perror("failed to get the current directory");
+			return EXIT_FAILURE;
+		}
+		cwdlen = strlen(cwd);
+	} else {
+		cwd = NULL;
+		cwdlen = 0;
+	}
+remap:
+	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
+			   MAP_SHARED, input, offset);
+	if (buf == MAP_FAILED) {
+		perror("failed to mmap file");
+		exit(-1);
+	}
+
+more:
+	event = (event_t *)(buf + head);
+
+	size = event->header.size;
+	if (!size)
+		size = 8;
+
+	if (head + event->header.size >= page_size * mmap_window) {
+		unsigned long shift = page_size * (head / page_size);
+		int ret;
+
+		ret = munmap(buf, page_size * mmap_window);
+		assert(ret == 0);
+
+		offset += shift;
+		head -= shift;
+		goto remap;
+	}
+
+	size = event->header.size;
+
+	dprintf("%p [%p]: event: %d\n",
+			(void *)(offset + head),
+			(void *)(long)event->header.size,
+			event->header.type);
+
+	if (!size || process_event(event, offset, head) < 0) {
+
+		dprintf("%p [%p]: skipping unknown header type: %d\n",
+			(void *)(offset + head),
+			(void *)(long)(event->header.size),
+			event->header.type);
+
+		total_unknown++;
+
+		/*
+		 * assume we lost track of the stream, check alignment, and
+		 * increment a single u64 in the hope to catch on again 'soon'.
+		 */
+
+		if (unlikely(head & 7))
+			head &= ~7ULL;
+
+		size = 8;
+	}
+
+	head += size;
+
+	if (offset + head < stat.st_size)
+		goto more;
+
+	rc = EXIT_SUCCESS;
+	close(input);
+
+	dprintf("      IP events: %10ld\n", total);
+	dprintf("    mmap events: %10ld\n", total_mmap);
+	dprintf("    comm events: %10ld\n", total_comm);
+	dprintf("    fork events: %10ld\n", total_fork);
+	dprintf(" unknown events: %10ld\n", total_unknown);
+
+	if (dump_trace)
+		return 0;
+
+	if (verbose >= 3)
+		threads__fprintf(stdout);
+
+	if (verbose >= 2)
+		dsos__fprintf(stdout);
+
+	collapse__resort();
+	output__resort();
+	output__fprintf(stdout, total);
+
+	return rc;
+}
+
+static const char * const report_usage[] = {
+	"perf report [<options>] <command>",
+	NULL
+};
+
+static const struct option options[] = {
+	OPT_STRING('i', "input", &input_name, "file",
+		    "input file name"),
+	OPT_BOOLEAN('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
+	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
+		   "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"),
+	OPT_BOOLEAN('P', "full-paths", &full_paths,
+		    "Don't shorten the pathnames taking into account the cwd"),
+	OPT_END()
+};
+
+static void setup_sorting(void)
+{
+	char *tmp, *tok, *str = strdup(sort_order);
+
+	for (tok = strtok_r(str, ", ", &tmp);
+			tok; tok = strtok_r(NULL, ", ", &tmp)) {
+		if (sort_dimension__add(tok) < 0) {
+			error("Unknown --sort key: `%s'", tok);
+			usage_with_options(report_usage, options);
+		}
+	}
+
+	free(str);
+}
+
+int cmd_report(int argc, const char **argv, const char *prefix)
+{
+	symbol__init();
+
+	page_size = getpagesize();
+
+	argc = parse_options(argc, argv, options, report_usage, 0);
+
+	setup_sorting();
+
+	/*
+	 * Any (unrecognized) arguments left?
+	 */
+	if (argc)
+		usage_with_options(report_usage, options);
+
+	setup_pager();
+
+	return __cmd_report();
+}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
new file mode 100644
index 0000000..c43e4a9
--- /dev/null
+++ b/tools/perf/builtin-stat.c
@@ -0,0 +1,367 @@
+/*
+ * builtin-stat.c
+ *
+ * Builtin stat command: Give a precise performance counters summary
+ * overview about any workload, CPU or specific PID.
+ *
+ * Sample output:
+
+   $ perf stat ~/hackbench 10
+   Time: 0.104
+
+    Performance counter stats for '/home/mingo/hackbench':
+
+       1255.538611  task clock ticks     #      10.143 CPU utilization factor
+             54011  context switches     #       0.043 M/sec
+               385  CPU migrations       #       0.000 M/sec
+             17755  pagefaults           #       0.014 M/sec
+        3808323185  CPU cycles           #    3033.219 M/sec
+        1575111190  instructions         #    1254.530 M/sec
+          17367895  cache references     #      13.833 M/sec
+           7674421  cache misses         #       6.112 M/sec
+
+    Wall-clock time elapsed:   123.786620 msecs
+
+ *
+ * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ *
+ * Improvements and fixes by:
+ *
+ *   Arjan van de Ven <arjan@linux.intel.com>
+ *   Yanmin Zhang <yanmin.zhang@intel.com>
+ *   Wu Fengguang <fengguang.wu@intel.com>
+ *   Mike Galbraith <efault@gmx.de>
+ *   Paul Mackerras <paulus@samba.org>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "perf.h"
+#include "builtin.h"
+#include "util/util.h"
+#include "util/parse-options.h"
+#include "util/parse-events.h"
+
+#include <sys/prctl.h>
+
+static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
+
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS	},
+
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES	},
+
+};
+
+static int			system_wide			=  0;
+static int			inherit				=  1;
+static int			verbose				=  0;
+
+static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
+
+static int			target_pid			= -1;
+static int			nr_cpus				=  0;
+static unsigned int		page_size;
+
+static int			scale				=  1;
+
+static const unsigned int default_count[] = {
+	1000000,
+	1000000,
+	  10000,
+	  10000,
+	1000000,
+	  10000,
+};
+
+static __u64			event_res[MAX_COUNTERS][3];
+static __u64			event_scaled[MAX_COUNTERS];
+
+static __u64			runtime_nsecs;
+static __u64			walltime_nsecs;
+static __u64			runtime_cycles;
+
+static void create_perf_stat_counter(int counter)
+{
+	struct perf_counter_attr *attr = attrs + counter;
+
+	if (scale)
+		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+				    PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+	if (system_wide) {
+		int cpu;
+		for (cpu = 0; cpu < nr_cpus; cpu ++) {
+			fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0);
+			if (fd[cpu][counter] < 0 && verbose) {
+				printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno));
+			}
+		}
+	} else {
+		attr->inherit	= inherit;
+		attr->disabled	= 1;
+
+		fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0);
+		if (fd[0][counter] < 0 && verbose) {
+			printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno));
+		}
+	}
+}
+
+/*
+ * Does the counter have nsecs as a unit?
+ */
+static inline int nsec_counter(int counter)
+{
+	if (attrs[counter].type != PERF_TYPE_SOFTWARE)
+		return 0;
+
+	if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK)
+		return 1;
+
+	if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
+		return 1;
+
+	return 0;
+}
+
+/*
+ * Read out the results of a single counter:
+ */
+static void read_counter(int counter)
+{
+	__u64 *count, single_count[3];
+	ssize_t res;
+	int cpu, nv;
+	int scaled;
+
+	count = event_res[counter];
+
+	count[0] = count[1] = count[2] = 0;
+
+	nv = scale ? 3 : 1;
+	for (cpu = 0; cpu < nr_cpus; cpu ++) {
+		if (fd[cpu][counter] < 0)
+			continue;
+
+		res = read(fd[cpu][counter], single_count, nv * sizeof(__u64));
+		assert(res == nv * sizeof(__u64));
+
+		count[0] += single_count[0];
+		if (scale) {
+			count[1] += single_count[1];
+			count[2] += single_count[2];
+		}
+	}
+
+	scaled = 0;
+	if (scale) {
+		if (count[2] == 0) {
+			event_scaled[counter] = -1;
+			count[0] = 0;
+			return;
+		}
+
+		if (count[2] < count[1]) {
+			event_scaled[counter] = 1;
+			count[0] = (unsigned long long)
+				((double)count[0] * count[1] / count[2] + 0.5);
+		}
+	}
+	/*
+	 * Save the full runtime - to allow normalization during printout:
+	 */
+	if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
+		attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
+		runtime_nsecs = count[0];
+	if (attrs[counter].type == PERF_TYPE_HARDWARE &&
+		attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES)
+		runtime_cycles = count[0];
+}
+
+/*
+ * Print out the results of a single counter:
+ */
+static void print_counter(int counter)
+{
+	__u64 *count;
+	int scaled;
+
+	count = event_res[counter];
+	scaled = event_scaled[counter];
+
+	if (scaled == -1) {
+		fprintf(stderr, " %14s  %-20s\n",
+			"<not counted>", event_name(counter));
+		return;
+	}
+
+	if (nsec_counter(counter)) {
+		double msecs = (double)count[0] / 1000000;
+
+		fprintf(stderr, " %14.6f  %-20s",
+			msecs, event_name(counter));
+		if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
+			attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
+
+			if (walltime_nsecs)
+				fprintf(stderr, " # %11.3f CPU utilization factor",
+					(double)count[0] / (double)walltime_nsecs);
+		}
+	} else {
+		fprintf(stderr, " %14Ld  %-20s",
+			count[0], event_name(counter));
+		if (runtime_nsecs)
+			fprintf(stderr, " # %11.3f M/sec",
+				(double)count[0]/runtime_nsecs*1000.0);
+		if (runtime_cycles &&
+			attrs[counter].type == PERF_TYPE_HARDWARE &&
+				attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) {
+
+			fprintf(stderr, " # %1.3f per cycle",
+				(double)count[0] / (double)runtime_cycles);
+		}
+	}
+	if (scaled)
+		fprintf(stderr, "  (scaled from %.2f%%)",
+			(double) count[2] / count[1] * 100);
+	fprintf(stderr, "\n");
+}
+
+static int do_perf_stat(int argc, const char **argv)
+{
+	unsigned long long t0, t1;
+	int counter;
+	int status;
+	int pid;
+	int i;
+
+	if (!system_wide)
+		nr_cpus = 1;
+
+	for (counter = 0; counter < nr_counters; counter++)
+		create_perf_stat_counter(counter);
+
+	/*
+	 * Enable counters and exec the command:
+	 */
+	t0 = rdclock();
+	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
+
+	if ((pid = fork()) < 0)
+		perror("failed to fork");
+
+	if (!pid) {
+		if (execvp(argv[0], (char **)argv)) {
+			perror(argv[0]);
+			exit(-1);
+		}
+	}
+
+	while (wait(&status) >= 0)
+		;
+
+	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
+	t1 = rdclock();
+
+	walltime_nsecs = t1 - t0;
+
+	fflush(stdout);
+
+	fprintf(stderr, "\n");
+	fprintf(stderr, " Performance counter stats for \'%s", argv[0]);
+
+	for (i = 1; i < argc; i++)
+		fprintf(stderr, " %s", argv[i]);
+
+	fprintf(stderr, "\':\n");
+	fprintf(stderr, "\n");
+
+	for (counter = 0; counter < nr_counters; counter++)
+		read_counter(counter);
+
+	for (counter = 0; counter < nr_counters; counter++)
+		print_counter(counter);
+
+
+	fprintf(stderr, "\n");
+	fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
+			(double)(t1-t0)/1e6);
+	fprintf(stderr, "\n");
+
+	return 0;
+}
+
+static volatile int signr = -1;
+
+static void skip_signal(int signo)
+{
+	signr = signo;
+}
+
+static void sig_atexit(void)
+{
+	if (signr == -1)
+		return;
+
+	signal(signr, SIG_DFL);
+	kill(getpid(), signr);
+}
+
+static const char * const stat_usage[] = {
+	"perf stat [<options>] <command>",
+	NULL
+};
+
+static const struct option options[] = {
+	OPT_CALLBACK('e', "event", NULL, "event",
+		     "event selector. use 'perf list' to list available events",
+		     parse_events),
+	OPT_BOOLEAN('i', "inherit", &inherit,
+		    "child tasks inherit counters"),
+	OPT_INTEGER('p', "pid", &target_pid,
+		    "stat events on existing pid"),
+	OPT_BOOLEAN('a', "all-cpus", &system_wide,
+			    "system-wide collection from all CPUs"),
+	OPT_BOOLEAN('S', "scale", &scale,
+			    "scale/normalize counters"),
+	OPT_BOOLEAN('v', "verbose", &verbose,
+		    "be more verbose (show counter open errors, etc)"),
+	OPT_END()
+};
+
+int cmd_stat(int argc, const char **argv, const char *prefix)
+{
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	memcpy(attrs, default_attrs, sizeof(attrs));
+
+	argc = parse_options(argc, argv, options, stat_usage, 0);
+	if (!argc)
+		usage_with_options(stat_usage, options);
+
+	if (!nr_counters)
+		nr_counters = 8;
+
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	assert(nr_cpus <= MAX_NR_CPUS);
+	assert(nr_cpus >= 0);
+
+	/*
+	 * We dont want to block the signals - that would cause
+	 * child tasks to inherit that and Ctrl-C would not work.
+	 * What we want is for Ctrl-C to work in the exec()-ed
+	 * task, but being ignored by perf stat itself:
+	 */
+	atexit(sig_atexit);
+	signal(SIGINT,  skip_signal);
+	signal(SIGALRM, skip_signal);
+	signal(SIGABRT, skip_signal);
+
+	return do_perf_stat(argc, argv);
+}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
new file mode 100644
index 0000000..fe338d3
--- /dev/null
+++ b/tools/perf/builtin-top.c
@@ -0,0 +1,736 @@
+/*
+ * builtin-top.c
+ *
+ * Builtin top command: Display a continuously updated profile of
+ * any workload, CPU or specific PID.
+ *
+ * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ *
+ * Improvements and fixes by:
+ *
+ *   Arjan van de Ven <arjan@linux.intel.com>
+ *   Yanmin Zhang <yanmin.zhang@intel.com>
+ *   Wu Fengguang <fengguang.wu@intel.com>
+ *   Mike Galbraith <efault@gmx.de>
+ *   Paul Mackerras <paulus@samba.org>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/symbol.h"
+#include "util/color.h"
+#include "util/util.h"
+#include "util/rbtree.h"
+#include "util/parse-options.h"
+#include "util/parse-events.h"
+
+#include <assert.h>
+#include <fcntl.h>
+
+#include <stdio.h>
+
+#include <errno.h>
+#include <time.h>
+#include <sched.h>
+#include <pthread.h>
+
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+
+#include <linux/unistd.h>
+#include <linux/types.h>
+
+static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
+
+static int			system_wide			=  0;
+
+static int			default_interval		= 100000;
+
+static __u64			count_filter			=  5;
+static int			print_entries			= 15;
+
+static int			target_pid			= -1;
+static int			profile_cpu			= -1;
+static int			nr_cpus				=  0;
+static unsigned int		realtime_prio			=  0;
+static int			group				=  0;
+static unsigned int		page_size;
+static unsigned int		mmap_pages			= 16;
+static int			freq				=  0;
+static int			verbose				=  0;
+
+static char			*sym_filter;
+static unsigned long		filter_start;
+static unsigned long		filter_end;
+
+static int			delay_secs			=  2;
+static int			zero;
+static int			dump_symtab;
+
+/*
+ * Symbols
+ */
+
+static __u64			min_ip;
+static __u64			max_ip = -1ll;
+
+struct sym_entry {
+	struct rb_node		rb_node;
+	struct list_head	node;
+	unsigned long		count[MAX_COUNTERS];
+	unsigned long		snap_count;
+	double			weight;
+	int			skip;
+};
+
+struct sym_entry		*sym_filter_entry;
+
+struct dso			*kernel_dso;
+
+/*
+ * Symbols will be added here in record_ip and will get out
+ * after decayed.
+ */
+static LIST_HEAD(active_symbols);
+static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER;
+
+/*
+ * Ordering weight: count-1 * count-2 * ... / count-n
+ */
+static double sym_weight(const struct sym_entry *sym)
+{
+	double weight = sym->snap_count;
+	int counter;
+
+	for (counter = 1; counter < nr_counters-1; counter++)
+		weight *= sym->count[counter];
+
+	weight /= (sym->count[counter] + 1);
+
+	return weight;
+}
+
+static long			samples;
+static long			userspace_samples;
+static const char		CONSOLE_CLEAR[] = "";
+
+static void __list_insert_active_sym(struct sym_entry *syme)
+{
+	list_add(&syme->node, &active_symbols);
+}
+
+static void list_remove_active_sym(struct sym_entry *syme)
+{
+	pthread_mutex_lock(&active_symbols_lock);
+	list_del_init(&syme->node);
+	pthread_mutex_unlock(&active_symbols_lock);
+}
+
+static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
+{
+	struct rb_node **p = &tree->rb_node;
+	struct rb_node *parent = NULL;
+	struct sym_entry *iter;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct sym_entry, rb_node);
+
+		if (se->weight > iter->weight)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&se->rb_node, parent, p);
+	rb_insert_color(&se->rb_node, tree);
+}
+
+static void print_sym_table(void)
+{
+	int printed = 0, j;
+	int counter;
+	float samples_per_sec = samples/delay_secs;
+	float ksamples_per_sec = (samples-userspace_samples)/delay_secs;
+	float sum_ksamples = 0.0;
+	struct sym_entry *syme, *n;
+	struct rb_root tmp = RB_ROOT;
+	struct rb_node *nd;
+
+	samples = userspace_samples = 0;
+
+	/* Sort the active symbols */
+	pthread_mutex_lock(&active_symbols_lock);
+	syme = list_entry(active_symbols.next, struct sym_entry, node);
+	pthread_mutex_unlock(&active_symbols_lock);
+
+	list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
+		syme->snap_count = syme->count[0];
+		if (syme->snap_count != 0) {
+			syme->weight = sym_weight(syme);
+			rb_insert_active_sym(&tmp, syme);
+			sum_ksamples += syme->snap_count;
+
+			for (j = 0; j < nr_counters; j++)
+				syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
+		} else
+			list_remove_active_sym(syme);
+	}
+
+	puts(CONSOLE_CLEAR);
+
+	printf(
+"------------------------------------------------------------------------------\n");
+	printf( "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%% [",
+		samples_per_sec,
+		100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
+
+	if (nr_counters == 1) {
+		printf("%Ld", attrs[0].sample_period);
+		if (freq)
+			printf("Hz ");
+		else
+			printf(" ");
+	}
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		if (counter)
+			printf("/");
+
+		printf("%s", event_name(counter));
+	}
+
+	printf( "], ");
+
+	if (target_pid != -1)
+		printf(" (target_pid: %d", target_pid);
+	else
+		printf(" (all");
+
+	if (profile_cpu != -1)
+		printf(", cpu: %d)\n", profile_cpu);
+	else {
+		if (target_pid != -1)
+			printf(")\n");
+		else
+			printf(", %d CPUs)\n", nr_cpus);
+	}
+
+	printf("------------------------------------------------------------------------------\n\n");
+
+	if (nr_counters == 1)
+		printf("             samples    pcnt");
+	else
+		printf("  weight     samples    pcnt");
+
+	printf("         RIP          kernel function\n"
+	       	       "  ______     _______   _____   ________________   _______________\n\n"
+	);
+
+	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
+		struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node);
+		struct symbol *sym = (struct symbol *)(syme + 1);
+		char *color = PERF_COLOR_NORMAL;
+		double pcnt;
+
+		if (++printed > print_entries || syme->snap_count < count_filter)
+			continue;
+
+		pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
+					 sum_ksamples));
+
+		/*
+		 * We color high-overhead entries in red, mid-overhead
+		 * entries in green - and keep the low overhead places
+		 * normal:
+		 */
+		if (pcnt >= 5.0) {
+			color = PERF_COLOR_RED;
+		} else {
+			if (pcnt >= 0.5)
+				color = PERF_COLOR_GREEN;
+		}
+
+		if (nr_counters == 1)
+			printf("%20.2f - ", syme->weight);
+		else
+			printf("%9.1f %10ld - ", syme->weight, syme->snap_count);
+
+		color_fprintf(stdout, color, "%4.1f%%", pcnt);
+		printf(" - %016llx : %s\n", sym->start, sym->name);
+	}
+}
+
+static void *display_thread(void *arg)
+{
+	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
+	int delay_msecs = delay_secs * 1000;
+
+	printf("PerfTop refresh period: %d seconds\n", delay_secs);
+
+	do {
+		print_sym_table();
+	} while (!poll(&stdin_poll, 1, delay_msecs) == 1);
+
+	printf("key pressed - exiting.\n");
+	exit(0);
+
+	return NULL;
+}
+
+static int symbol_filter(struct dso *self, struct symbol *sym)
+{
+	static int filter_match;
+	struct sym_entry *syme;
+	const char *name = sym->name;
+
+	if (!strcmp(name, "_text") ||
+	    !strcmp(name, "_etext") ||
+	    !strcmp(name, "_sinittext") ||
+	    !strncmp("init_module", name, 11) ||
+	    !strncmp("cleanup_module", name, 14) ||
+	    strstr(name, "_text_start") ||
+	    strstr(name, "_text_end"))
+		return 1;
+
+	syme = dso__sym_priv(self, sym);
+	/* Tag samples to be skipped. */
+	if (!strcmp("default_idle", name) ||
+	    !strcmp("cpu_idle", name) ||
+	    !strcmp("enter_idle", name) ||
+	    !strcmp("exit_idle", name) ||
+	    !strcmp("mwait_idle", name))
+		syme->skip = 1;
+
+	if (filter_match == 1) {
+		filter_end = sym->start;
+		filter_match = -1;
+		if (filter_end - filter_start > 10000) {
+			fprintf(stderr,
+				"hm, too large filter symbol <%s> - skipping.\n",
+				sym_filter);
+			fprintf(stderr, "symbol filter start: %016lx\n",
+				filter_start);
+			fprintf(stderr, "                end: %016lx\n",
+				filter_end);
+			filter_end = filter_start = 0;
+			sym_filter = NULL;
+			sleep(1);
+		}
+	}
+
+	if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) {
+		filter_match = 1;
+		filter_start = sym->start;
+	}
+
+
+	return 0;
+}
+
+static int parse_symbols(void)
+{
+	struct rb_node *node;
+	struct symbol  *sym;
+
+	kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry));
+	if (kernel_dso == NULL)
+		return -1;
+
+	if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1) != 0)
+		goto out_delete_dso;
+
+	node = rb_first(&kernel_dso->syms);
+	sym = rb_entry(node, struct symbol, rb_node);
+	min_ip = sym->start;
+
+	node = rb_last(&kernel_dso->syms);
+	sym = rb_entry(node, struct symbol, rb_node);
+	max_ip = sym->end;
+
+	if (dump_symtab)
+		dso__fprintf(kernel_dso, stderr);
+
+	return 0;
+
+out_delete_dso:
+	dso__delete(kernel_dso);
+	kernel_dso = NULL;
+	return -1;
+}
+
+#define TRACE_COUNT     3
+
+/*
+ * Binary search in the histogram table and record the hit:
+ */
+static void record_ip(__u64 ip, int counter)
+{
+	struct symbol *sym = dso__find_symbol(kernel_dso, ip);
+
+	if (sym != NULL) {
+		struct sym_entry *syme = dso__sym_priv(kernel_dso, sym);
+
+		if (!syme->skip) {
+			syme->count[counter]++;
+			pthread_mutex_lock(&active_symbols_lock);
+			if (list_empty(&syme->node) || !syme->node.next)
+				__list_insert_active_sym(syme);
+			pthread_mutex_unlock(&active_symbols_lock);
+			return;
+		}
+	}
+
+	samples--;
+}
+
+static void process_event(__u64 ip, int counter)
+{
+	samples++;
+
+	if (ip < min_ip || ip > max_ip) {
+		userspace_samples++;
+		return;
+	}
+
+	record_ip(ip, counter);
+}
+
+struct mmap_data {
+	int			counter;
+	void			*base;
+	unsigned int		mask;
+	unsigned int		prev;
+};
+
+static unsigned int mmap_read_head(struct mmap_data *md)
+{
+	struct perf_counter_mmap_page *pc = md->base;
+	int head;
+
+	head = pc->data_head;
+	rmb();
+
+	return head;
+}
+
+struct timeval last_read, this_read;
+
+static void mmap_read_counter(struct mmap_data *md)
+{
+	unsigned int head = mmap_read_head(md);
+	unsigned int old = md->prev;
+	unsigned char *data = md->base + page_size;
+	int diff;
+
+	gettimeofday(&this_read, NULL);
+
+	/*
+	 * If we're further behind than half the buffer, there's a chance
+	 * the writer will bite our tail and mess up the samples under us.
+	 *
+	 * If we somehow ended up ahead of the head, we got messed up.
+	 *
+	 * In either case, truncate and restart at head.
+	 */
+	diff = head - old;
+	if (diff > md->mask / 2 || diff < 0) {
+		struct timeval iv;
+		unsigned long msecs;
+
+		timersub(&this_read, &last_read, &iv);
+		msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
+
+		fprintf(stderr, "WARNING: failed to keep up with mmap data."
+				"  Last read %lu msecs ago.\n", msecs);
+
+		/*
+		 * head points to a known good entry, start there.
+		 */
+		old = head;
+	}
+
+	last_read = this_read;
+
+	for (; old != head;) {
+		struct ip_event {
+			struct perf_event_header header;
+			__u64 ip;
+			__u32 pid, target_pid;
+		};
+		struct mmap_event {
+			struct perf_event_header header;
+			__u32 pid, target_pid;
+			__u64 start;
+			__u64 len;
+			__u64 pgoff;
+			char filename[PATH_MAX];
+		};
+
+		typedef union event_union {
+			struct perf_event_header header;
+			struct ip_event ip;
+			struct mmap_event mmap;
+		} event_t;
+
+		event_t *event = (event_t *)&data[old & md->mask];
+
+		event_t event_copy;
+
+		size_t size = event->header.size;
+
+		/*
+		 * Event straddles the mmap boundary -- header should always
+		 * be inside due to u64 alignment of output.
+		 */
+		if ((old & md->mask) + size != ((old + size) & md->mask)) {
+			unsigned int offset = old;
+			unsigned int len = min(sizeof(*event), size), cpy;
+			void *dst = &event_copy;
+
+			do {
+				cpy = min(md->mask + 1 - (offset & md->mask), len);
+				memcpy(dst, &data[offset & md->mask], cpy);
+				offset += cpy;
+				dst += cpy;
+				len -= cpy;
+			} while (len);
+
+			event = &event_copy;
+		}
+
+		old += size;
+
+		if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
+			if (event->header.type & PERF_SAMPLE_IP)
+				process_event(event->ip.ip, md->counter);
+		}
+	}
+
+	md->prev = old;
+}
+
+static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
+static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
+
+static void mmap_read(void)
+{
+	int i, counter;
+
+	for (i = 0; i < nr_cpus; i++) {
+		for (counter = 0; counter < nr_counters; counter++)
+			mmap_read_counter(&mmap_array[i][counter]);
+	}
+}
+
+int nr_poll;
+int group_fd;
+
+static void start_counter(int i, int counter)
+{
+	struct perf_counter_attr *attr;
+	unsigned int cpu;
+
+	cpu = profile_cpu;
+	if (target_pid == -1 && profile_cpu == -1)
+		cpu = i;
+
+	attr = attrs + counter;
+
+	attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+	attr->freq		= freq;
+
+try_again:
+	fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0);
+
+	if (fd[i][counter] < 0) {
+		int err = errno;
+
+		if (err == EPERM)
+			die("No permission - are you root?\n");
+		/*
+		 * If it's cycles then fall back to hrtimer
+		 * based cpu-clock-tick sw counter, which
+		 * is always available even if no PMU support:
+		 */
+		if (attr->type == PERF_TYPE_HARDWARE
+			&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
+
+			if (verbose)
+				warning(" ... trying to fall back to cpu-clock-ticks\n");
+
+			attr->type = PERF_TYPE_SOFTWARE;
+			attr->config = PERF_COUNT_SW_CPU_CLOCK;
+			goto try_again;
+		}
+		printf("\n");
+		error("perfcounter syscall returned with %d (%s)\n",
+			fd[i][counter], strerror(err));
+		die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n");
+		exit(-1);
+	}
+	assert(fd[i][counter] >= 0);
+	fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
+
+	/*
+	 * First counter acts as the group leader:
+	 */
+	if (group && group_fd == -1)
+		group_fd = fd[i][counter];
+
+	event_array[nr_poll].fd = fd[i][counter];
+	event_array[nr_poll].events = POLLIN;
+	nr_poll++;
+
+	mmap_array[i][counter].counter = counter;
+	mmap_array[i][counter].prev = 0;
+	mmap_array[i][counter].mask = mmap_pages*page_size - 1;
+	mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
+			PROT_READ, MAP_SHARED, fd[i][counter], 0);
+	if (mmap_array[i][counter].base == MAP_FAILED)
+		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
+}
+
+static int __cmd_top(void)
+{
+	pthread_t thread;
+	int i, counter;
+	int ret;
+
+	for (i = 0; i < nr_cpus; i++) {
+		group_fd = -1;
+		for (counter = 0; counter < nr_counters; counter++)
+			start_counter(i, counter);
+	}
+
+	/* Wait for a minimal set of events before starting the snapshot */
+	poll(event_array, nr_poll, 100);
+
+	mmap_read();
+
+	if (pthread_create(&thread, NULL, display_thread, NULL)) {
+		printf("Could not create display thread.\n");
+		exit(-1);
+	}
+
+	if (realtime_prio) {
+		struct sched_param param;
+
+		param.sched_priority = realtime_prio;
+		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
+			printf("Could not set realtime priority.\n");
+			exit(-1);
+		}
+	}
+
+	while (1) {
+		int hits = samples;
+
+		mmap_read();
+
+		if (hits == samples)
+			ret = poll(event_array, nr_poll, 100);
+	}
+
+	return 0;
+}
+
+static const char * const top_usage[] = {
+	"perf top [<options>]",
+	NULL
+};
+
+static const struct option options[] = {
+	OPT_CALLBACK('e', "event", NULL, "event",
+		     "event selector. use 'perf list' to list available events",
+		     parse_events),
+	OPT_INTEGER('c', "count", &default_interval,
+		    "event period to sample"),
+	OPT_INTEGER('p', "pid", &target_pid,
+		    "profile events on existing pid"),
+	OPT_BOOLEAN('a', "all-cpus", &system_wide,
+			    "system-wide collection from all CPUs"),
+	OPT_INTEGER('C', "CPU", &profile_cpu,
+		    "CPU to profile on"),
+	OPT_INTEGER('m', "mmap-pages", &mmap_pages,
+		    "number of mmap data pages"),
+	OPT_INTEGER('r', "realtime", &realtime_prio,
+		    "collect data with this RT SCHED_FIFO priority"),
+	OPT_INTEGER('d', "delay", &delay_secs,
+		    "number of seconds to delay between refreshes"),
+	OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
+			    "dump the symbol table used for profiling"),
+	OPT_INTEGER('f', "count-filter", &count_filter,
+		    "only display functions with more events than this"),
+	OPT_BOOLEAN('g', "group", &group,
+			    "put the counters into a counter group"),
+	OPT_STRING('s', "sym-filter", &sym_filter, "pattern",
+		    "only display symbols matchig this pattern"),
+	OPT_BOOLEAN('z', "zero", &group,
+		    "zero history across updates"),
+	OPT_INTEGER('F', "freq", &freq,
+		    "profile at this frequency"),
+	OPT_INTEGER('E', "entries", &print_entries,
+		    "display this many functions"),
+	OPT_BOOLEAN('v', "verbose", &verbose,
+		    "be more verbose (show counter open errors, etc)"),
+	OPT_END()
+};
+
+int cmd_top(int argc, const char **argv, const char *prefix)
+{
+	int counter;
+
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	argc = parse_options(argc, argv, options, top_usage, 0);
+	if (argc)
+		usage_with_options(top_usage, options);
+
+	if (freq) {
+		default_interval = freq;
+		freq = 1;
+	}
+
+	/* CPU and PID are mutually exclusive */
+	if (target_pid != -1 && profile_cpu != -1) {
+		printf("WARNING: PID switch overriding CPU\n");
+		sleep(1);
+		profile_cpu = -1;
+	}
+
+	if (!nr_counters)
+		nr_counters = 1;
+
+	if (delay_secs < 1)
+		delay_secs = 1;
+
+	parse_symbols();
+
+	/*
+	 * Fill in the ones not specifically initialized via -c:
+	 */
+	for (counter = 0; counter < nr_counters; counter++) {
+		if (attrs[counter].sample_period)
+			continue;
+
+		attrs[counter].sample_period = default_interval;
+	}
+
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	assert(nr_cpus <= MAX_NR_CPUS);
+	assert(nr_cpus >= 0);
+
+	if (target_pid != -1 || profile_cpu != -1)
+		nr_cpus = 1;
+
+	return __cmd_top();
+}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
new file mode 100644
index 0000000..51d1682
--- /dev/null
+++ b/tools/perf/builtin.h
@@ -0,0 +1,26 @@
+#ifndef BUILTIN_H
+#define BUILTIN_H
+
+#include "util/util.h"
+#include "util/strbuf.h"
+
+extern const char perf_version_string[];
+extern const char perf_usage_string[];
+extern const char perf_more_info_string[];
+
+extern void list_common_cmds_help(void);
+extern const char *help_unknown_cmd(const char *cmd);
+extern void prune_packed_objects(int);
+extern int read_line_with_nul(char *buf, int size, FILE *file);
+extern int check_pager_config(const char *cmd);
+
+extern int cmd_annotate(int argc, const char **argv, const char *prefix);
+extern int cmd_help(int argc, const char **argv, const char *prefix);
+extern int cmd_record(int argc, const char **argv, const char *prefix);
+extern int cmd_report(int argc, const char **argv, const char *prefix);
+extern int cmd_stat(int argc, const char **argv, const char *prefix);
+extern int cmd_top(int argc, const char **argv, const char *prefix);
+extern int cmd_version(int argc, const char **argv, const char *prefix);
+extern int cmd_list(int argc, const char **argv, const char *prefix);
+
+#endif
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
new file mode 100644
index 0000000..eebce30
--- /dev/null
+++ b/tools/perf/command-list.txt
@@ -0,0 +1,10 @@
+#
+# List of known perf commands.
+# command name			category [deprecated] [common]
+#
+perf-annotate			mainporcelain common
+perf-list			mainporcelain common
+perf-record			mainporcelain common
+perf-report			mainporcelain common
+perf-stat			mainporcelain common
+perf-top			mainporcelain common
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
new file mode 100644
index 0000000..f71e0d2
--- /dev/null
+++ b/tools/perf/design.txt
@@ -0,0 +1,457 @@
+
+Performance Counters for Linux
+------------------------------
+
+Performance counters are special hardware registers available on most modern
+CPUs. These registers count the number of certain types of hw events: such
+as instructions executed, cachemisses suffered, or branches mis-predicted -
+without slowing down the kernel or applications. These registers can also
+trigger interrupts when a threshold number of events have passed - and can
+thus be used to profile the code that runs on that CPU.
+
+The Linux Performance Counter subsystem provides an abstraction of these
+hardware capabilities. It provides per task and per CPU counters, counter
+groups, and it provides event capabilities on top of those.  It
+provides "virtual" 64-bit counters, regardless of the width of the
+underlying hardware counters.
+
+Performance counters are accessed via special file descriptors.
+There's one file descriptor per virtual counter used.
+
+The special file descriptor is opened via the perf_counter_open()
+system call:
+
+   int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
+			     pid_t pid, int cpu, int group_fd,
+			     unsigned long flags);
+
+The syscall returns the new fd. The fd can be used via the normal
+VFS system calls: read() can be used to read the counter, fcntl()
+can be used to set the blocking mode, etc.
+
+Multiple counters can be kept open at a time, and the counters
+can be poll()ed.
+
+When creating a new counter fd, 'perf_counter_hw_event' is:
+
+struct perf_counter_hw_event {
+        /*
+         * The MSB of the config word signifies if the rest contains cpu
+         * specific (raw) counter configuration data, if unset, the next
+         * 7 bits are an event type and the rest of the bits are the event
+         * identifier.
+         */
+        __u64                   config;
+
+        __u64                   irq_period;
+        __u32                   record_type;
+        __u32                   read_format;
+
+        __u64                   disabled       :  1, /* off by default        */
+                                inherit        :  1, /* children inherit it   */
+                                pinned         :  1, /* must always be on PMU */
+                                exclusive      :  1, /* only group on PMU     */
+                                exclude_user   :  1, /* don't count user      */
+                                exclude_kernel :  1, /* ditto kernel          */
+                                exclude_hv     :  1, /* ditto hypervisor      */
+                                exclude_idle   :  1, /* don't count when idle */
+                                mmap           :  1, /* include mmap data     */
+                                munmap         :  1, /* include munmap data   */
+                                comm           :  1, /* include comm data     */
+
+                                __reserved_1   : 52;
+
+        __u32                   extra_config_len;
+        __u32                   wakeup_events;  /* wakeup every n events */
+
+        __u64                   __reserved_2;
+        __u64                   __reserved_3;
+};
+
+The 'config' field specifies what the counter should count.  It
+is divided into 3 bit-fields:
+
+raw_type: 1 bit   (most significant bit)	0x8000_0000_0000_0000
+type:	  7 bits  (next most significant)	0x7f00_0000_0000_0000
+event_id: 56 bits (least significant)		0x00ff_ffff_ffff_ffff
+
+If 'raw_type' is 1, then the counter will count a hardware event
+specified by the remaining 63 bits of event_config.  The encoding is
+machine-specific.
+
+If 'raw_type' is 0, then the 'type' field says what kind of counter
+this is, with the following encoding:
+
+enum perf_event_types {
+	PERF_TYPE_HARDWARE		= 0,
+	PERF_TYPE_SOFTWARE		= 1,
+	PERF_TYPE_TRACEPOINT		= 2,
+};
+
+A counter of PERF_TYPE_HARDWARE will count the hardware event
+specified by 'event_id':
+
+/*
+ * Generalized performance counter event types, used by the hw_event.event_id
+ * parameter of the sys_perf_counter_open() syscall:
+ */
+enum hw_event_ids {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_HW_CPU_CYCLES		= 0,
+	PERF_COUNT_HW_INSTRUCTIONS		= 1,
+	PERF_COUNT_HW_CACHE_REFERENCES	= 2,
+	PERF_COUNT_HW_CACHE_MISSES		= 3,
+	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_HW_BRANCH_MISSES	= 5,
+	PERF_COUNT_HW_BUS_CYCLES		= 6,
+};
+
+These are standardized types of events that work relatively uniformly
+on all CPUs that implement Performance Counters support under Linux,
+although there may be variations (e.g., different CPUs might count
+cache references and misses at different levels of the cache hierarchy).
+If a CPU is not able to count the selected event, then the system call
+will return -EINVAL.
+
+More hw_event_types are supported as well, but they are CPU-specific
+and accessed as raw events.  For example, to count "External bus
+cycles while bus lock signal asserted" events on Intel Core CPUs, pass
+in a 0x4064 event_id value and set hw_event.raw_type to 1.
+
+A counter of type PERF_TYPE_SOFTWARE will count one of the available
+software events, selected by 'event_id':
+
+/*
+ * Special "software" counters provided by the kernel, even if the hardware
+ * does not support performance counters. These counters measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum sw_event_ids {
+	PERF_COUNT_SW_CPU_CLOCK		= 0,
+	PERF_COUNT_SW_TASK_CLOCK		= 1,
+	PERF_COUNT_SW_PAGE_FAULTS		= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES	= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS	= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN	= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ	= 6,
+};
+
+Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
+tracer is available, and event_id values can be obtained from
+/debug/tracing/events/*/*/id
+
+
+Counters come in two flavours: counting counters and sampling
+counters.  A "counting" counter is one that is used for counting the
+number of events that occur, and is characterised by having
+irq_period = 0.
+
+
+A read() on a counter returns the current value of the counter and possible
+additional values as specified by 'read_format', each value is a u64 (8 bytes)
+in size.
+
+/*
+ * Bits that can be set in hw_event.read_format to request that
+ * reads on the counter should return the indicated quantities,
+ * in increasing order of bit value, after the counter value.
+ */
+enum perf_counter_read_format {
+        PERF_FORMAT_TOTAL_TIME_ENABLED  =  1,
+        PERF_FORMAT_TOTAL_TIME_RUNNING  =  2,
+};
+
+Using these additional values one can establish the overcommit ratio for a
+particular counter allowing one to take the round-robin scheduling effect
+into account.
+
+
+A "sampling" counter is one that is set up to generate an interrupt
+every N events, where N is given by 'irq_period'.  A sampling counter
+has irq_period > 0. The record_type controls what data is recorded on each
+interrupt:
+
+/*
+ * Bits that can be set in hw_event.record_type to request information
+ * in the overflow packets.
+ */
+enum perf_counter_record_format {
+        PERF_RECORD_IP          = 1U << 0,
+        PERF_RECORD_TID         = 1U << 1,
+        PERF_RECORD_TIME        = 1U << 2,
+        PERF_RECORD_ADDR        = 1U << 3,
+        PERF_RECORD_GROUP       = 1U << 4,
+        PERF_RECORD_CALLCHAIN   = 1U << 5,
+};
+
+Such (and other) events will be recorded in a ring-buffer, which is
+available to user-space using mmap() (see below).
+
+The 'disabled' bit specifies whether the counter starts out disabled
+or enabled.  If it is initially disabled, it can be enabled by ioctl
+or prctl (see below).
+
+The 'inherit' bit, if set, specifies that this counter should count
+events on descendant tasks as well as the task specified.  This only
+applies to new descendents, not to any existing descendents at the
+time the counter is created (nor to any new descendents of existing
+descendents).
+
+The 'pinned' bit, if set, specifies that the counter should always be
+on the CPU if at all possible.  It only applies to hardware counters
+and only to group leaders.  If a pinned counter cannot be put onto the
+CPU (e.g. because there are not enough hardware counters or because of
+a conflict with some other event), then the counter goes into an
+'error' state, where reads return end-of-file (i.e. read() returns 0)
+until the counter is subsequently enabled or disabled.
+
+The 'exclusive' bit, if set, specifies that when this counter's group
+is on the CPU, it should be the only group using the CPU's counters.
+In future, this will allow sophisticated monitoring programs to supply
+extra configuration information via 'extra_config_len' to exploit
+advanced features of the CPU's Performance Monitor Unit (PMU) that are
+not otherwise accessible and that might disrupt other hardware
+counters.
+
+The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a
+way to request that counting of events be restricted to times when the
+CPU is in user, kernel and/or hypervisor mode.
+
+The 'mmap' and 'munmap' bits allow recording of PROT_EXEC mmap/munmap
+operations, these can be used to relate userspace IP addresses to actual
+code, even after the mapping (or even the whole process) is gone,
+these events are recorded in the ring-buffer (see below).
+
+The 'comm' bit allows tracking of process comm data on process creation.
+This too is recorded in the ring-buffer (see below).
+
+The 'pid' parameter to the perf_counter_open() system call allows the
+counter to be specific to a task:
+
+ pid == 0: if the pid parameter is zero, the counter is attached to the
+ current task.
+
+ pid > 0: the counter is attached to a specific task (if the current task
+ has sufficient privilege to do so)
+
+ pid < 0: all tasks are counted (per cpu counters)
+
+The 'cpu' parameter allows a counter to be made specific to a CPU:
+
+ cpu >= 0: the counter is restricted to a specific CPU
+ cpu == -1: the counter counts on all CPUs
+
+(Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.)
+
+A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts
+events of that task and 'follows' that task to whatever CPU the task
+gets schedule to. Per task counters can be created by any user, for
+their own tasks.
+
+A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
+all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
+
+The 'flags' parameter is currently unused and must be zero.
+
+The 'group_fd' parameter allows counter "groups" to be set up.  A
+counter group has one counter which is the group "leader".  The leader
+is created first, with group_fd = -1 in the perf_counter_open call
+that creates it.  The rest of the group members are created
+subsequently, with group_fd giving the fd of the group leader.
+(A single counter on its own is created with group_fd = -1 and is
+considered to be a group with only 1 member.)
+
+A counter group is scheduled onto the CPU as a unit, that is, it will
+only be put onto the CPU if all of the counters in the group can be
+put onto the CPU.  This means that the values of the member counters
+can be meaningfully compared, added, divided (to get ratios), etc.,
+with each other, since they have counted events for the same set of
+executed instructions.
+
+
+Like stated, asynchronous events, like counter overflow or PROT_EXEC mmap
+tracking are logged into a ring-buffer. This ring-buffer is created and
+accessed through mmap().
+
+The mmap size should be 1+2^n pages, where the first page is a meta-data page
+(struct perf_counter_mmap_page) that contains various bits of information such
+as where the ring-buffer head is.
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_counter_mmap_page {
+        __u32   version;                /* version number of this structure */
+        __u32   compat_version;         /* lowest version this is compat with */
+
+        /*
+         * Bits needed to read the hw counters in user-space.
+         *
+         *   u32 seq;
+         *   s64 count;
+         *
+         *   do {
+         *     seq = pc->lock;
+         *
+         *     barrier()
+         *     if (pc->index) {
+         *       count = pmc_read(pc->index - 1);
+         *       count += pc->offset;
+         *     } else
+         *       goto regular_read;
+         *
+         *     barrier();
+         *   } while (pc->lock != seq);
+         *
+         * NOTE: for obvious reason this only works on self-monitoring
+         *       processes.
+         */
+        __u32   lock;                   /* seqlock for synchronization */
+        __u32   index;                  /* hardware counter identifier */
+        __s64   offset;                 /* add to hardware counter value */
+
+        /*
+         * Control data for the mmap() data buffer.
+         *
+         * User-space reading this value should issue an rmb(), on SMP capable
+         * platforms, after reading this value -- see perf_counter_wakeup().
+         */
+        __u32   data_head;              /* head in the data section */
+};
+
+NOTE: the hw-counter userspace bits are arch specific and are currently only
+      implemented on powerpc.
+
+The following 2^n pages are the ring-buffer which contains events of the form:
+
+#define PERF_EVENT_MISC_KERNEL          (1 << 0)
+#define PERF_EVENT_MISC_USER            (1 << 1)
+#define PERF_EVENT_MISC_OVERFLOW        (1 << 2)
+
+struct perf_event_header {
+        __u32   type;
+        __u16   misc;
+        __u16   size;
+};
+
+enum perf_event_type {
+
+        /*
+         * The MMAP events record the PROT_EXEC mappings so that we can
+         * correlate userspace IPs to code. They have the following structure:
+         *
+         * struct {
+         *      struct perf_event_header        header;
+         *
+         *      u32                             pid, tid;
+         *      u64                             addr;
+         *      u64                             len;
+         *      u64                             pgoff;
+         *      char                            filename[];
+         * };
+         */
+        PERF_EVENT_MMAP                 = 1,
+        PERF_EVENT_MUNMAP               = 2,
+
+        /*
+         * struct {
+         *      struct perf_event_header        header;
+         *
+         *      u32                             pid, tid;
+         *      char                            comm[];
+         * };
+         */
+        PERF_EVENT_COMM                 = 3,
+
+        /*
+         * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
+         * will be PERF_RECORD_*
+         *
+         * struct {
+         *      struct perf_event_header        header;
+         *
+         *      { u64                   ip;       } && PERF_RECORD_IP
+         *      { u32                   pid, tid; } && PERF_RECORD_TID
+         *      { u64                   time;     } && PERF_RECORD_TIME
+         *      { u64                   addr;     } && PERF_RECORD_ADDR
+         *
+         *      { u64                   nr;
+         *        { u64 event, val; }   cnt[nr];  } && PERF_RECORD_GROUP
+         *
+         *      { u16                   nr,
+         *                              hv,
+         *                              kernel,
+         *                              user;
+         *        u64                   ips[nr];  } && PERF_RECORD_CALLCHAIN
+         * };
+         */
+};
+
+NOTE: PERF_RECORD_CALLCHAIN is arch specific and currently only implemented
+      on x86.
+
+Notification of new events is possible through poll()/select()/epoll() and
+fcntl() managing signals.
+
+Normally a notification is generated for every page filled, however one can
+additionally set perf_counter_hw_event.wakeup_events to generate one every
+so many counter overflow events.
+
+Future work will include a splice() interface to the ring-buffer.
+
+
+Counters can be enabled and disabled in two ways: via ioctl and via
+prctl.  When a counter is disabled, it doesn't count or generate
+events but does continue to exist and maintain its count value.
+
+An individual counter or counter group can be enabled with
+
+	ioctl(fd, PERF_COUNTER_IOC_ENABLE);
+
+or disabled with
+
+	ioctl(fd, PERF_COUNTER_IOC_DISABLE);
+
+Enabling or disabling the leader of a group enables or disables the
+whole group; that is, while the group leader is disabled, none of the
+counters in the group will count.  Enabling or disabling a member of a
+group other than the leader only affects that counter - disabling an
+non-leader stops that counter from counting but doesn't affect any
+other counter.
+
+Additionally, non-inherited overflow counters can use
+
+	ioctl(fd, PERF_COUNTER_IOC_REFRESH, nr);
+
+to enable a counter for 'nr' events, after which it gets disabled again.
+
+A process can enable or disable all the counter groups that are
+attached to it, using prctl:
+
+	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
+
+	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
+
+This applies to all counters on the current process, whether created
+by this process or by another, and doesn't affect any counters that
+this process has created on other processes.  It only enables or
+disables the group leaders, not any other members in the groups.
+
+
+Arch requirements
+-----------------
+
+If your architecture does not have hardware performance metrics, you can
+still use the generic software counters based on hrtimers for sampling.
+
+So to start with, in order to add HAVE_PERF_COUNTERS to your Kconfig, you
+will need at least this:
+	- asm/perf_counter.h - a basic stub will suffice at first
+	- support for atomic64 types (and associated helper functions)
+	- set_perf_counter_pending() implemented
+
+If your architecture does have hardware capabilities, you can override the
+weak stub hw_perf_counter_init() to register hardware counters.
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
new file mode 100644
index 0000000..4eb7259
--- /dev/null
+++ b/tools/perf/perf.c
@@ -0,0 +1,428 @@
+/*
+ * perf.c
+ *
+ * Performance analysis utility.
+ *
+ * This is the main hub from which the sub-commands (perf stat,
+ * perf top, perf record, perf report, etc.) are started.
+ */
+#include "builtin.h"
+
+#include "util/exec_cmd.h"
+#include "util/cache.h"
+#include "util/quote.h"
+#include "util/run-command.h"
+
+const char perf_usage_string[] =
+	"perf [--version] [--help] COMMAND [ARGS]";
+
+const char perf_more_info_string[] =
+	"See 'perf help COMMAND' for more information on a specific command.";
+
+static int use_pager = -1;
+struct pager_config {
+	const char *cmd;
+	int val;
+};
+
+static int pager_command_config(const char *var, const char *value, void *data)
+{
+	struct pager_config *c = data;
+	if (!prefixcmp(var, "pager.") && !strcmp(var + 6, c->cmd))
+		c->val = perf_config_bool(var, value);
+	return 0;
+}
+
+/* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */
+int check_pager_config(const char *cmd)
+{
+	struct pager_config c;
+	c.cmd = cmd;
+	c.val = -1;
+	perf_config(pager_command_config, &c);
+	return c.val;
+}
+
+static void commit_pager_choice(void) {
+	switch (use_pager) {
+	case 0:
+		setenv("PERF_PAGER", "cat", 1);
+		break;
+	case 1:
+		/* setup_pager(); */
+		break;
+	default:
+		break;
+	}
+}
+
+static int handle_options(const char*** argv, int* argc, int* envchanged)
+{
+	int handled = 0;
+
+	while (*argc > 0) {
+		const char *cmd = (*argv)[0];
+		if (cmd[0] != '-')
+			break;
+
+		/*
+		 * For legacy reasons, the "version" and "help"
+		 * commands can be written with "--" prepended
+		 * to make them look like flags.
+		 */
+		if (!strcmp(cmd, "--help") || !strcmp(cmd, "--version"))
+			break;
+
+		/*
+		 * Check remaining flags.
+		 */
+		if (!prefixcmp(cmd, "--exec-path")) {
+			cmd += 11;
+			if (*cmd == '=')
+				perf_set_argv_exec_path(cmd + 1);
+			else {
+				puts(perf_exec_path());
+				exit(0);
+			}
+		} else if (!strcmp(cmd, "--html-path")) {
+			puts(system_path(PERF_HTML_PATH));
+			exit(0);
+		} else if (!strcmp(cmd, "-p") || !strcmp(cmd, "--paginate")) {
+			use_pager = 1;
+		} else if (!strcmp(cmd, "--no-pager")) {
+			use_pager = 0;
+			if (envchanged)
+				*envchanged = 1;
+		} else if (!strcmp(cmd, "--perf-dir")) {
+			if (*argc < 2) {
+				fprintf(stderr, "No directory given for --perf-dir.\n" );
+				usage(perf_usage_string);
+			}
+			setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1);
+			if (envchanged)
+				*envchanged = 1;
+			(*argv)++;
+			(*argc)--;
+			handled++;
+		} else if (!prefixcmp(cmd, "--perf-dir=")) {
+			setenv(PERF_DIR_ENVIRONMENT, cmd + 10, 1);
+			if (envchanged)
+				*envchanged = 1;
+		} else if (!strcmp(cmd, "--work-tree")) {
+			if (*argc < 2) {
+				fprintf(stderr, "No directory given for --work-tree.\n" );
+				usage(perf_usage_string);
+			}
+			setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1);
+			if (envchanged)
+				*envchanged = 1;
+			(*argv)++;
+			(*argc)--;
+		} else if (!prefixcmp(cmd, "--work-tree=")) {
+			setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + 12, 1);
+			if (envchanged)
+				*envchanged = 1;
+		} else {
+			fprintf(stderr, "Unknown option: %s\n", cmd);
+			usage(perf_usage_string);
+		}
+
+		(*argv)++;
+		(*argc)--;
+		handled++;
+	}
+	return handled;
+}
+
+static int handle_alias(int *argcp, const char ***argv)
+{
+	int envchanged = 0, ret = 0, saved_errno = errno;
+	int count, option_count;
+	const char** new_argv;
+	const char *alias_command;
+	char *alias_string;
+
+	alias_command = (*argv)[0];
+	alias_string = alias_lookup(alias_command);
+	if (alias_string) {
+		if (alias_string[0] == '!') {
+			if (*argcp > 1) {
+				struct strbuf buf;
+
+				strbuf_init(&buf, PATH_MAX);
+				strbuf_addstr(&buf, alias_string);
+				sq_quote_argv(&buf, (*argv) + 1, PATH_MAX);
+				free(alias_string);
+				alias_string = buf.buf;
+			}
+			ret = system(alias_string + 1);
+			if (ret >= 0 && WIFEXITED(ret) &&
+			    WEXITSTATUS(ret) != 127)
+				exit(WEXITSTATUS(ret));
+			die("Failed to run '%s' when expanding alias '%s'",
+			    alias_string + 1, alias_command);
+		}
+		count = split_cmdline(alias_string, &new_argv);
+		if (count < 0)
+			die("Bad alias.%s string", alias_command);
+		option_count = handle_options(&new_argv, &count, &envchanged);
+		if (envchanged)
+			die("alias '%s' changes environment variables\n"
+				 "You can use '!perf' in the alias to do this.",
+				 alias_command);
+		memmove(new_argv - option_count, new_argv,
+				count * sizeof(char *));
+		new_argv -= option_count;
+
+		if (count < 1)
+			die("empty alias for %s", alias_command);
+
+		if (!strcmp(alias_command, new_argv[0]))
+			die("recursive alias: %s", alias_command);
+
+		new_argv = realloc(new_argv, sizeof(char*) *
+				    (count + *argcp + 1));
+		/* insert after command name */
+		memcpy(new_argv + count, *argv + 1, sizeof(char*) * *argcp);
+		new_argv[count+*argcp] = NULL;
+
+		*argv = new_argv;
+		*argcp += count - 1;
+
+		ret = 1;
+	}
+
+	errno = saved_errno;
+
+	return ret;
+}
+
+const char perf_version_string[] = PERF_VERSION;
+
+#define RUN_SETUP	(1<<0)
+#define USE_PAGER	(1<<1)
+/*
+ * require working tree to be present -- anything uses this needs
+ * RUN_SETUP for reading from the configuration file.
+ */
+#define NEED_WORK_TREE	(1<<2)
+
+struct cmd_struct {
+	const char *cmd;
+	int (*fn)(int, const char **, const char *);
+	int option;
+};
+
+static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
+{
+	int status;
+	struct stat st;
+	const char *prefix;
+
+	prefix = NULL;
+	if (p->option & RUN_SETUP)
+		prefix = NULL; /* setup_perf_directory(); */
+
+	if (use_pager == -1 && p->option & RUN_SETUP)
+		use_pager = check_pager_config(p->cmd);
+	if (use_pager == -1 && p->option & USE_PAGER)
+		use_pager = 1;
+	commit_pager_choice();
+
+	if (p->option & NEED_WORK_TREE)
+		/* setup_work_tree() */;
+
+	status = p->fn(argc, argv, prefix);
+	if (status)
+		return status & 0xff;
+
+	/* Somebody closed stdout? */
+	if (fstat(fileno(stdout), &st))
+		return 0;
+	/* Ignore write errors for pipes and sockets.. */
+	if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode))
+		return 0;
+
+	/* Check for ENOSPC and EIO errors.. */
+	if (fflush(stdout))
+		die("write failure on standard output: %s", strerror(errno));
+	if (ferror(stdout))
+		die("unknown write failure on standard output");
+	if (fclose(stdout))
+		die("close failed on standard output: %s", strerror(errno));
+	return 0;
+}
+
+static void handle_internal_command(int argc, const char **argv)
+{
+	const char *cmd = argv[0];
+	static struct cmd_struct commands[] = {
+		{ "help", cmd_help, 0 },
+		{ "list", cmd_list, 0 },
+		{ "record", cmd_record, 0 },
+		{ "report", cmd_report, 0 },
+		{ "stat", cmd_stat, 0 },
+		{ "top", cmd_top, 0 },
+		{ "annotate", cmd_annotate, 0 },
+		{ "version", cmd_version, 0 },
+	};
+	int i;
+	static const char ext[] = STRIP_EXTENSION;
+
+	if (sizeof(ext) > 1) {
+		i = strlen(argv[0]) - strlen(ext);
+		if (i > 0 && !strcmp(argv[0] + i, ext)) {
+			char *argv0 = strdup(argv[0]);
+			argv[0] = cmd = argv0;
+			argv0[i] = '\0';
+		}
+	}
+
+	/* Turn "perf cmd --help" into "perf help cmd" */
+	if (argc > 1 && !strcmp(argv[1], "--help")) {
+		argv[1] = argv[0];
+		argv[0] = cmd = "help";
+	}
+
+	for (i = 0; i < ARRAY_SIZE(commands); i++) {
+		struct cmd_struct *p = commands+i;
+		if (strcmp(p->cmd, cmd))
+			continue;
+		exit(run_builtin(p, argc, argv));
+	}
+}
+
+static void execv_dashed_external(const char **argv)
+{
+	struct strbuf cmd = STRBUF_INIT;
+	const char *tmp;
+	int status;
+
+	strbuf_addf(&cmd, "perf-%s", argv[0]);
+
+	/*
+	 * argv[0] must be the perf command, but the argv array
+	 * belongs to the caller, and may be reused in
+	 * subsequent loop iterations. Save argv[0] and
+	 * restore it on error.
+	 */
+	tmp = argv[0];
+	argv[0] = cmd.buf;
+
+	/*
+	 * if we fail because the command is not found, it is
+	 * OK to return. Otherwise, we just pass along the status code.
+	 */
+	status = run_command_v_opt(argv, 0);
+	if (status != -ERR_RUN_COMMAND_EXEC) {
+		if (IS_RUN_COMMAND_ERR(status))
+			die("unable to run '%s'", argv[0]);
+		exit(-status);
+	}
+	errno = ENOENT; /* as if we called execvp */
+
+	argv[0] = tmp;
+
+	strbuf_release(&cmd);
+}
+
+static int run_argv(int *argcp, const char ***argv)
+{
+	int done_alias = 0;
+
+	while (1) {
+		/* See if it's an internal command */
+		handle_internal_command(*argcp, *argv);
+
+		/* .. then try the external ones */
+		execv_dashed_external(*argv);
+
+		/* It could be an alias -- this works around the insanity
+		 * of overriding "perf log" with "perf show" by having
+		 * alias.log = show
+		 */
+		if (done_alias || !handle_alias(argcp, argv))
+			break;
+		done_alias = 1;
+	}
+
+	return done_alias;
+}
+
+
+int main(int argc, const char **argv)
+{
+	const char *cmd;
+
+	cmd = perf_extract_argv0_path(argv[0]);
+	if (!cmd)
+		cmd = "perf-help";
+
+	/*
+	 * "perf-xxxx" is the same as "perf xxxx", but we obviously:
+	 *
+	 *  - cannot take flags in between the "perf" and the "xxxx".
+	 *  - cannot execute it externally (since it would just do
+	 *    the same thing over again)
+	 *
+	 * So we just directly call the internal command handler, and
+	 * die if that one cannot handle it.
+	 */
+	if (!prefixcmp(cmd, "perf-")) {
+		cmd += 5;
+		argv[0] = cmd;
+		handle_internal_command(argc, argv);
+		die("cannot handle %s internally", cmd);
+	}
+
+	/* Look for flags.. */
+	argv++;
+	argc--;
+	handle_options(&argv, &argc, NULL);
+	commit_pager_choice();
+	if (argc > 0) {
+		if (!prefixcmp(argv[0], "--"))
+			argv[0] += 2;
+	} else {
+		/* The user didn't specify a command; give them help */
+		printf("\n usage: %s\n\n", perf_usage_string);
+		list_common_cmds_help();
+		printf("\n %s\n\n", perf_more_info_string);
+		exit(1);
+	}
+	cmd = argv[0];
+
+	/*
+	 * We use PATH to find perf commands, but we prepend some higher
+	 * precidence paths: the "--exec-path" option, the PERF_EXEC_PATH
+	 * environment, and the $(perfexecdir) from the Makefile at build
+	 * time.
+	 */
+	setup_path();
+
+	while (1) {
+		static int done_help = 0;
+		static int was_alias = 0;
+
+		was_alias = run_argv(&argc, &argv);
+		if (errno != ENOENT)
+			break;
+
+		if (was_alias) {
+			fprintf(stderr, "Expansion of alias '%s' failed; "
+				"'%s' is not a perf-command\n",
+				cmd, argv[0]);
+			exit(1);
+		}
+		if (!done_help) {
+			cmd = argv[0] = help_unknown_cmd(cmd);
+			done_help = 1;
+		} else
+			break;
+	}
+
+	fprintf(stderr, "Failed to run command '%s': %s\n",
+		cmd, strerror(errno));
+
+	return 1;
+}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
new file mode 100644
index 0000000..87a1aca
--- /dev/null
+++ b/tools/perf/perf.h
@@ -0,0 +1,68 @@
+#ifndef _PERF_PERF_H
+#define _PERF_PERF_H
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "../../arch/x86/include/asm/unistd.h"
+#define rmb()		asm volatile("lfence" ::: "memory")
+#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
+#endif
+
+#ifdef __powerpc__
+#include "../../arch/powerpc/include/asm/unistd.h"
+#define rmb()		asm volatile ("sync" ::: "memory")
+#define cpu_relax()	asm volatile ("" ::: "memory");
+#endif
+
+#include <time.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+
+#include "../../include/linux/perf_counter.h"
+
+/*
+ * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
+ * counters in the current task.
+ */
+#define PR_TASK_PERF_COUNTERS_DISABLE   31
+#define PR_TASK_PERF_COUNTERS_ENABLE    32
+
+#ifndef NSEC_PER_SEC
+# define NSEC_PER_SEC			1000000000ULL
+#endif
+
+static inline unsigned long long rdclock(void)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+}
+
+/*
+ * Pick up some kernel type conventions:
+ */
+#define __user
+#define asmlinkage
+
+#define unlikely(x)	__builtin_expect(!!(x), 0)
+#define min(x, y) ({				\
+	typeof(x) _min1 = (x);			\
+	typeof(y) _min2 = (y);			\
+	(void) (&_min1 == &_min2);		\
+	_min1 < _min2 ? _min1 : _min2; })
+
+static inline int
+sys_perf_counter_open(struct perf_counter_attr *attr,
+		      pid_t pid, int cpu, int group_fd,
+		      unsigned long flags)
+{
+	attr->size = sizeof(*attr);
+	return syscall(__NR_perf_counter_open, attr, pid, cpu,
+		       group_fd, flags);
+}
+
+#define MAX_COUNTERS			256
+#define MAX_NR_CPUS			256
+
+#endif
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
new file mode 100755
index 0000000..c561d15
--- /dev/null
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+GVF=PERF-VERSION-FILE
+DEF_VER=v0.0.1.PERF
+
+LF='
+'
+
+# First see if there is a version file (included in release tarballs),
+# then try git-describe, then default.
+if test -f version
+then
+	VN=$(cat version) || VN="$DEF_VER"
+elif test -d .git -o -f .git &&
+	VN=$(git describe --abbrev=4 HEAD 2>/dev/null) &&
+	case "$VN" in
+	*$LF*) (exit 1) ;;
+	v[0-9]*)
+		git update-index -q --refresh
+		test -z "$(git diff-index --name-only HEAD --)" ||
+		VN="$VN-dirty" ;;
+	esac
+then
+	VN=$(echo "$VN" | sed -e 's/-/./g');
+else
+	VN="$DEF_VER"
+fi
+
+VN=$(expr "$VN" : v*'\(.*\)')
+
+if test -r $GVF
+then
+	VC=$(sed -e 's/^PERF_VERSION = //' <$GVF)
+else
+	VC=unset
+fi
+test "$VN" = "$VC" || {
+	echo >&2 "PERF_VERSION = $VN"
+	echo "PERF_VERSION = $VN" >$GVF
+}
+
+
diff --git a/tools/perf/util/abspath.c b/tools/perf/util/abspath.c
new file mode 100644
index 0000000..61d33b8
--- /dev/null
+++ b/tools/perf/util/abspath.c
@@ -0,0 +1,117 @@
+#include "cache.h"
+
+/*
+ * Do not use this for inspecting *tracked* content.  When path is a
+ * symlink to a directory, we do not want to say it is a directory when
+ * dealing with tracked content in the working tree.
+ */
+static int is_directory(const char *path)
+{
+	struct stat st;
+	return (!stat(path, &st) && S_ISDIR(st.st_mode));
+}
+
+/* We allow "recursive" symbolic links. Only within reason, though. */
+#define MAXDEPTH 5
+
+const char *make_absolute_path(const char *path)
+{
+	static char bufs[2][PATH_MAX + 1], *buf = bufs[0], *next_buf = bufs[1];
+	char cwd[1024] = "";
+	int buf_index = 1, len;
+
+	int depth = MAXDEPTH;
+	char *last_elem = NULL;
+	struct stat st;
+
+	if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
+		die ("Too long path: %.*s", 60, path);
+
+	while (depth--) {
+		if (!is_directory(buf)) {
+			char *last_slash = strrchr(buf, '/');
+			if (last_slash) {
+				*last_slash = '\0';
+				last_elem = xstrdup(last_slash + 1);
+			} else {
+				last_elem = xstrdup(buf);
+				*buf = '\0';
+			}
+		}
+
+		if (*buf) {
+			if (!*cwd && !getcwd(cwd, sizeof(cwd)))
+				die ("Could not get current working directory");
+
+			if (chdir(buf))
+				die ("Could not switch to '%s'", buf);
+		}
+		if (!getcwd(buf, PATH_MAX))
+			die ("Could not get current working directory");
+
+		if (last_elem) {
+			int len = strlen(buf);
+			if (len + strlen(last_elem) + 2 > PATH_MAX)
+				die ("Too long path name: '%s/%s'",
+						buf, last_elem);
+			buf[len] = '/';
+			strcpy(buf + len + 1, last_elem);
+			free(last_elem);
+			last_elem = NULL;
+		}
+
+		if (!lstat(buf, &st) && S_ISLNK(st.st_mode)) {
+			len = readlink(buf, next_buf, PATH_MAX);
+			if (len < 0)
+				die ("Invalid symlink: %s", buf);
+			if (PATH_MAX <= len)
+				die("symbolic link too long: %s", buf);
+			next_buf[len] = '\0';
+			buf = next_buf;
+			buf_index = 1 - buf_index;
+			next_buf = bufs[buf_index];
+		} else
+			break;
+	}
+
+	if (*cwd && chdir(cwd))
+		die ("Could not change back to '%s'", cwd);
+
+	return buf;
+}
+
+static const char *get_pwd_cwd(void)
+{
+	static char cwd[PATH_MAX + 1];
+	char *pwd;
+	struct stat cwd_stat, pwd_stat;
+	if (getcwd(cwd, PATH_MAX) == NULL)
+		return NULL;
+	pwd = getenv("PWD");
+	if (pwd && strcmp(pwd, cwd)) {
+		stat(cwd, &cwd_stat);
+		if (!stat(pwd, &pwd_stat) &&
+		    pwd_stat.st_dev == cwd_stat.st_dev &&
+		    pwd_stat.st_ino == cwd_stat.st_ino) {
+			strlcpy(cwd, pwd, PATH_MAX);
+		}
+	}
+	return cwd;
+}
+
+const char *make_nonrelative_path(const char *path)
+{
+	static char buf[PATH_MAX + 1];
+
+	if (is_absolute_path(path)) {
+		if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
+			die("Too long path: %.*s", 60, path);
+	} else {
+		const char *cwd = get_pwd_cwd();
+		if (!cwd)
+			die("Cannot determine the current working directory");
+		if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
+			die("Too long path: %.*s", 60, path);
+	}
+	return buf;
+}
diff --git a/tools/perf/util/alias.c b/tools/perf/util/alias.c
new file mode 100644
index 0000000..9b3dd2b
--- /dev/null
+++ b/tools/perf/util/alias.c
@@ -0,0 +1,77 @@
+#include "cache.h"
+
+static const char *alias_key;
+static char *alias_val;
+
+static int alias_lookup_cb(const char *k, const char *v, void *cb)
+{
+	if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) {
+		if (!v)
+			return config_error_nonbool(k);
+		alias_val = strdup(v);
+		return 0;
+	}
+	return 0;
+}
+
+char *alias_lookup(const char *alias)
+{
+	alias_key = alias;
+	alias_val = NULL;
+	perf_config(alias_lookup_cb, NULL);
+	return alias_val;
+}
+
+int split_cmdline(char *cmdline, const char ***argv)
+{
+	int src, dst, count = 0, size = 16;
+	char quoted = 0;
+
+	*argv = malloc(sizeof(char*) * size);
+
+	/* split alias_string */
+	(*argv)[count++] = cmdline;
+	for (src = dst = 0; cmdline[src];) {
+		char c = cmdline[src];
+		if (!quoted && isspace(c)) {
+			cmdline[dst++] = 0;
+			while (cmdline[++src]
+					&& isspace(cmdline[src]))
+				; /* skip */
+			if (count >= size) {
+				size += 16;
+				*argv = realloc(*argv, sizeof(char*) * size);
+			}
+			(*argv)[count++] = cmdline + dst;
+		} else if (!quoted && (c == '\'' || c == '"')) {
+			quoted = c;
+			src++;
+		} else if (c == quoted) {
+			quoted = 0;
+			src++;
+		} else {
+			if (c == '\\' && quoted != '\'') {
+				src++;
+				c = cmdline[src];
+				if (!c) {
+					free(*argv);
+					*argv = NULL;
+					return error("cmdline ends with \\");
+				}
+			}
+			cmdline[dst++] = c;
+			src++;
+		}
+	}
+
+	cmdline[dst] = 0;
+
+	if (quoted) {
+		free(*argv);
+		*argv = NULL;
+		return error("unclosed quote");
+	}
+
+	return count;
+}
+
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
new file mode 100644
index 0000000..393d614
--- /dev/null
+++ b/tools/perf/util/cache.h
@@ -0,0 +1,119 @@
+#ifndef CACHE_H
+#define CACHE_H
+
+#include "util.h"
+#include "strbuf.h"
+
+#define PERF_DIR_ENVIRONMENT "PERF_DIR"
+#define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE"
+#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf"
+#define DB_ENVIRONMENT "PERF_OBJECT_DIRECTORY"
+#define INDEX_ENVIRONMENT "PERF_INDEX_FILE"
+#define GRAFT_ENVIRONMENT "PERF_GRAFT_FILE"
+#define TEMPLATE_DIR_ENVIRONMENT "PERF_TEMPLATE_DIR"
+#define CONFIG_ENVIRONMENT "PERF_CONFIG"
+#define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH"
+#define CEILING_DIRECTORIES_ENVIRONMENT "PERF_CEILING_DIRECTORIES"
+#define PERFATTRIBUTES_FILE ".perfattributes"
+#define INFOATTRIBUTES_FILE "info/attributes"
+#define ATTRIBUTE_MACRO_PREFIX "[attr]"
+
+typedef int (*config_fn_t)(const char *, const char *, void *);
+extern int perf_default_config(const char *, const char *, void *);
+extern int perf_config_from_file(config_fn_t fn, const char *, void *);
+extern int perf_config(config_fn_t fn, void *);
+extern int perf_parse_ulong(const char *, unsigned long *);
+extern int perf_config_int(const char *, const char *);
+extern unsigned long perf_config_ulong(const char *, const char *);
+extern int perf_config_bool_or_int(const char *, const char *, int *);
+extern int perf_config_bool(const char *, const char *);
+extern int perf_config_string(const char **, const char *, const char *);
+extern int perf_config_set(const char *, const char *);
+extern int perf_config_set_multivar(const char *, const char *, const char *, int);
+extern int perf_config_rename_section(const char *, const char *);
+extern const char *perf_etc_perfconfig(void);
+extern int check_repository_format_version(const char *var, const char *value, void *cb);
+extern int perf_config_system(void);
+extern int perf_config_global(void);
+extern int config_error_nonbool(const char *);
+extern const char *config_exclusive_filename;
+
+#define MAX_PERFNAME (1000)
+extern char perf_default_email[MAX_PERFNAME];
+extern char perf_default_name[MAX_PERFNAME];
+extern int user_ident_explicitly_given;
+
+extern const char *perf_log_output_encoding;
+extern const char *perf_mailmap_file;
+
+/* IO helper functions */
+extern void maybe_flush_or_die(FILE *, const char *);
+extern int copy_fd(int ifd, int ofd);
+extern int copy_file(const char *dst, const char *src, int mode);
+extern ssize_t read_in_full(int fd, void *buf, size_t count);
+extern ssize_t write_in_full(int fd, const void *buf, size_t count);
+extern void write_or_die(int fd, const void *buf, size_t count);
+extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);
+extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg);
+extern void fsync_or_die(int fd, const char *);
+
+/* pager.c */
+extern void setup_pager(void);
+extern const char *pager_program;
+extern int pager_in_use(void);
+extern int pager_use_color;
+
+extern const char *editor_program;
+extern const char *excludes_file;
+
+char *alias_lookup(const char *alias);
+int split_cmdline(char *cmdline, const char ***argv);
+
+#define alloc_nr(x) (((x)+16)*3/2)
+
+/*
+ * Realloc the buffer pointed at by variable 'x' so that it can hold
+ * at least 'nr' entries; the number of entries currently allocated
+ * is 'alloc', using the standard growing factor alloc_nr() macro.
+ *
+ * DO NOT USE any expression with side-effect for 'x' or 'alloc'.
+ */
+#define ALLOC_GROW(x, nr, alloc) \
+	do { \
+		if ((nr) > alloc) { \
+			if (alloc_nr(alloc) < (nr)) \
+				alloc = (nr); \
+			else \
+				alloc = alloc_nr(alloc); \
+			x = xrealloc((x), alloc * sizeof(*(x))); \
+		} \
+	} while(0)
+
+
+static inline int is_absolute_path(const char *path)
+{
+	return path[0] == '/';
+}
+
+const char *make_absolute_path(const char *path);
+const char *make_nonrelative_path(const char *path);
+const char *make_relative_path(const char *abs, const char *base);
+int normalize_path_copy(char *dst, const char *src);
+int longest_ancestor_length(const char *path, const char *prefix_list);
+char *strip_path_suffix(const char *path, const char *suffix);
+
+extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+/* perf_mkstemp() - create tmp file honoring TMPDIR variable */
+extern int perf_mkstemp(char *path, size_t len, const char *template);
+
+extern char *mksnpath(char *buf, size_t n, const char *fmt, ...)
+	__attribute__((format (printf, 3, 4)));
+extern char *perf_snpath(char *buf, size_t n, const char *fmt, ...)
+	__attribute__((format (printf, 3, 4)));
+extern char *perf_pathdup(const char *fmt, ...)
+	__attribute__((format (printf, 1, 2)));
+
+extern size_t strlcpy(char *dest, const char *src, size_t size);
+
+#endif /* CACHE_H */
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
new file mode 100644
index 0000000..9a8c20c
--- /dev/null
+++ b/tools/perf/util/color.c
@@ -0,0 +1,241 @@
+#include "cache.h"
+#include "color.h"
+
+int perf_use_color_default = -1;
+
+static int parse_color(const char *name, int len)
+{
+	static const char * const color_names[] = {
+		"normal", "black", "red", "green", "yellow",
+		"blue", "magenta", "cyan", "white"
+	};
+	char *end;
+	int i;
+	for (i = 0; i < ARRAY_SIZE(color_names); i++) {
+		const char *str = color_names[i];
+		if (!strncasecmp(name, str, len) && !str[len])
+			return i - 1;
+	}
+	i = strtol(name, &end, 10);
+	if (end - name == len && i >= -1 && i <= 255)
+		return i;
+	return -2;
+}
+
+static int parse_attr(const char *name, int len)
+{
+	static const int attr_values[] = { 1, 2, 4, 5, 7 };
+	static const char * const attr_names[] = {
+		"bold", "dim", "ul", "blink", "reverse"
+	};
+	int i;
+	for (i = 0; i < ARRAY_SIZE(attr_names); i++) {
+		const char *str = attr_names[i];
+		if (!strncasecmp(name, str, len) && !str[len])
+			return attr_values[i];
+	}
+	return -1;
+}
+
+void color_parse(const char *value, const char *var, char *dst)
+{
+	color_parse_mem(value, strlen(value), var, dst);
+}
+
+void color_parse_mem(const char *value, int value_len, const char *var,
+		char *dst)
+{
+	const char *ptr = value;
+	int len = value_len;
+	int attr = -1;
+	int fg = -2;
+	int bg = -2;
+
+	if (!strncasecmp(value, "reset", len)) {
+		strcpy(dst, PERF_COLOR_RESET);
+		return;
+	}
+
+	/* [fg [bg]] [attr] */
+	while (len > 0) {
+		const char *word = ptr;
+		int val, wordlen = 0;
+
+		while (len > 0 && !isspace(word[wordlen])) {
+			wordlen++;
+			len--;
+		}
+
+		ptr = word + wordlen;
+		while (len > 0 && isspace(*ptr)) {
+			ptr++;
+			len--;
+		}
+
+		val = parse_color(word, wordlen);
+		if (val >= -1) {
+			if (fg == -2) {
+				fg = val;
+				continue;
+			}
+			if (bg == -2) {
+				bg = val;
+				continue;
+			}
+			goto bad;
+		}
+		val = parse_attr(word, wordlen);
+		if (val < 0 || attr != -1)
+			goto bad;
+		attr = val;
+	}
+
+	if (attr >= 0 || fg >= 0 || bg >= 0) {
+		int sep = 0;
+
+		*dst++ = '\033';
+		*dst++ = '[';
+		if (attr >= 0) {
+			*dst++ = '0' + attr;
+			sep++;
+		}
+		if (fg >= 0) {
+			if (sep++)
+				*dst++ = ';';
+			if (fg < 8) {
+				*dst++ = '3';
+				*dst++ = '0' + fg;
+			} else {
+				dst += sprintf(dst, "38;5;%d", fg);
+			}
+		}
+		if (bg >= 0) {
+			if (sep++)
+				*dst++ = ';';
+			if (bg < 8) {
+				*dst++ = '4';
+				*dst++ = '0' + bg;
+			} else {
+				dst += sprintf(dst, "48;5;%d", bg);
+			}
+		}
+		*dst++ = 'm';
+	}
+	*dst = 0;
+	return;
+bad:
+	die("bad color value '%.*s' for variable '%s'", value_len, value, var);
+}
+
+int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
+{
+	if (value) {
+		if (!strcasecmp(value, "never"))
+			return 0;
+		if (!strcasecmp(value, "always"))
+			return 1;
+		if (!strcasecmp(value, "auto"))
+			goto auto_color;
+	}
+
+	/* Missing or explicit false to turn off colorization */
+	if (!perf_config_bool(var, value))
+		return 0;
+
+	/* any normal truth value defaults to 'auto' */
+ auto_color:
+	if (stdout_is_tty < 0)
+		stdout_is_tty = isatty(1);
+	if (stdout_is_tty || (pager_in_use() && pager_use_color)) {
+		char *term = getenv("TERM");
+		if (term && strcmp(term, "dumb"))
+			return 1;
+	}
+	return 0;
+}
+
+int perf_color_default_config(const char *var, const char *value, void *cb)
+{
+	if (!strcmp(var, "color.ui")) {
+		perf_use_color_default = perf_config_colorbool(var, value, -1);
+		return 0;
+	}
+
+	return perf_default_config(var, value, cb);
+}
+
+static int color_vfprintf(FILE *fp, const char *color, const char *fmt,
+		va_list args, const char *trail)
+{
+	int r = 0;
+
+	/*
+	 * Auto-detect:
+	 */
+	if (perf_use_color_default < 0) {
+		if (isatty(1) || pager_in_use())
+			perf_use_color_default = 1;
+		else
+			perf_use_color_default = 0;
+	}
+
+	if (perf_use_color_default && *color)
+		r += fprintf(fp, "%s", color);
+	r += vfprintf(fp, fmt, args);
+	if (perf_use_color_default && *color)
+		r += fprintf(fp, "%s", PERF_COLOR_RESET);
+	if (trail)
+		r += fprintf(fp, "%s", trail);
+	return r;
+}
+
+
+
+int color_fprintf(FILE *fp, const char *color, const char *fmt, ...)
+{
+	va_list args;
+	int r;
+
+	va_start(args, fmt);
+	r = color_vfprintf(fp, color, fmt, args, NULL);
+	va_end(args);
+	return r;
+}
+
+int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...)
+{
+	va_list args;
+	int r;
+	va_start(args, fmt);
+	r = color_vfprintf(fp, color, fmt, args, "\n");
+	va_end(args);
+	return r;
+}
+
+/*
+ * This function splits the buffer by newlines and colors the lines individually.
+ *
+ * Returns 0 on success.
+ */
+int color_fwrite_lines(FILE *fp, const char *color,
+		size_t count, const char *buf)
+{
+	if (!*color)
+		return fwrite(buf, count, 1, fp) != 1;
+	while (count) {
+		char *p = memchr(buf, '\n', count);
+		if (p != buf && (fputs(color, fp) < 0 ||
+				fwrite(buf, p ? p - buf : count, 1, fp) != 1 ||
+				fputs(PERF_COLOR_RESET, fp) < 0))
+			return -1;
+		if (!p)
+			return 0;
+		if (fputc('\n', fp) < 0)
+			return -1;
+		count -= p + 1 - buf;
+		buf = p + 1;
+	}
+	return 0;
+}
+
+
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
new file mode 100644
index 0000000..5abfd37
--- /dev/null
+++ b/tools/perf/util/color.h
@@ -0,0 +1,36 @@
+#ifndef COLOR_H
+#define COLOR_H
+
+/* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */
+#define COLOR_MAXLEN 24
+
+#define PERF_COLOR_NORMAL	""
+#define PERF_COLOR_RESET	"\033[m"
+#define PERF_COLOR_BOLD		"\033[1m"
+#define PERF_COLOR_RED		"\033[31m"
+#define PERF_COLOR_GREEN	"\033[32m"
+#define PERF_COLOR_YELLOW	"\033[33m"
+#define PERF_COLOR_BLUE		"\033[34m"
+#define PERF_COLOR_MAGENTA	"\033[35m"
+#define PERF_COLOR_CYAN		"\033[36m"
+#define PERF_COLOR_BG_RED	"\033[41m"
+
+/*
+ * This variable stores the value of color.ui
+ */
+extern int perf_use_color_default;
+
+
+/*
+ * Use this instead of perf_default_config if you need the value of color.ui.
+ */
+int perf_color_default_config(const char *var, const char *value, void *cb);
+
+int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty);
+void color_parse(const char *value, const char *var, char *dst);
+void color_parse_mem(const char *value, int len, const char *var, char *dst);
+int color_fprintf(FILE *fp, const char *color, const char *fmt, ...);
+int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...);
+int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf);
+
+#endif /* COLOR_H */
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
new file mode 100644
index 0000000..3dd13fa
--- /dev/null
+++ b/tools/perf/util/config.c
@@ -0,0 +1,873 @@
+/*
+ * GIT - The information manager from hell
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ * Copyright (C) Johannes Schindelin, 2005
+ *
+ */
+#include "util.h"
+#include "cache.h"
+#include "exec_cmd.h"
+
+#define MAXNAME (256)
+
+static FILE *config_file;
+static const char *config_file_name;
+static int config_linenr;
+static int config_file_eof;
+
+const char *config_exclusive_filename = NULL;
+
+static int get_next_char(void)
+{
+	int c;
+	FILE *f;
+
+	c = '\n';
+	if ((f = config_file) != NULL) {
+		c = fgetc(f);
+		if (c == '\r') {
+			/* DOS like systems */
+			c = fgetc(f);
+			if (c != '\n') {
+				ungetc(c, f);
+				c = '\r';
+			}
+		}
+		if (c == '\n')
+			config_linenr++;
+		if (c == EOF) {
+			config_file_eof = 1;
+			c = '\n';
+		}
+	}
+	return c;
+}
+
+static char *parse_value(void)
+{
+	static char value[1024];
+	int quote = 0, comment = 0, len = 0, space = 0;
+
+	for (;;) {
+		int c = get_next_char();
+		if (len >= sizeof(value) - 1)
+			return NULL;
+		if (c == '\n') {
+			if (quote)
+				return NULL;
+			value[len] = 0;
+			return value;
+		}
+		if (comment)
+			continue;
+		if (isspace(c) && !quote) {
+			space = 1;
+			continue;
+		}
+		if (!quote) {
+			if (c == ';' || c == '#') {
+				comment = 1;
+				continue;
+			}
+		}
+		if (space) {
+			if (len)
+				value[len++] = ' ';
+			space = 0;
+		}
+		if (c == '\\') {
+			c = get_next_char();
+			switch (c) {
+			case '\n':
+				continue;
+			case 't':
+				c = '\t';
+				break;
+			case 'b':
+				c = '\b';
+				break;
+			case 'n':
+				c = '\n';
+				break;
+			/* Some characters escape as themselves */
+			case '\\': case '"':
+				break;
+			/* Reject unknown escape sequences */
+			default:
+				return NULL;
+			}
+			value[len++] = c;
+			continue;
+		}
+		if (c == '"') {
+			quote = 1-quote;
+			continue;
+		}
+		value[len++] = c;
+	}
+}
+
+static inline int iskeychar(int c)
+{
+	return isalnum(c) || c == '-';
+}
+
+static int get_value(config_fn_t fn, void *data, char *name, unsigned int len)
+{
+	int c;
+	char *value;
+
+	/* Get the full name */
+	for (;;) {
+		c = get_next_char();
+		if (config_file_eof)
+			break;
+		if (!iskeychar(c))
+			break;
+		name[len++] = tolower(c);
+		if (len >= MAXNAME)
+			return -1;
+	}
+	name[len] = 0;
+	while (c == ' ' || c == '\t')
+		c = get_next_char();
+
+	value = NULL;
+	if (c != '\n') {
+		if (c != '=')
+			return -1;
+		value = parse_value();
+		if (!value)
+			return -1;
+	}
+	return fn(name, value, data);
+}
+
+static int get_extended_base_var(char *name, int baselen, int c)
+{
+	do {
+		if (c == '\n')
+			return -1;
+		c = get_next_char();
+	} while (isspace(c));
+
+	/* We require the format to be '[base "extension"]' */
+	if (c != '"')
+		return -1;
+	name[baselen++] = '.';
+
+	for (;;) {
+		int c = get_next_char();
+		if (c == '\n')
+			return -1;
+		if (c == '"')
+			break;
+		if (c == '\\') {
+			c = get_next_char();
+			if (c == '\n')
+				return -1;
+		}
+		name[baselen++] = c;
+		if (baselen > MAXNAME / 2)
+			return -1;
+	}
+
+	/* Final ']' */
+	if (get_next_char() != ']')
+		return -1;
+	return baselen;
+}
+
+static int get_base_var(char *name)
+{
+	int baselen = 0;
+
+	for (;;) {
+		int c = get_next_char();
+		if (config_file_eof)
+			return -1;
+		if (c == ']')
+			return baselen;
+		if (isspace(c))
+			return get_extended_base_var(name, baselen, c);
+		if (!iskeychar(c) && c != '.')
+			return -1;
+		if (baselen > MAXNAME / 2)
+			return -1;
+		name[baselen++] = tolower(c);
+	}
+}
+
+static int perf_parse_file(config_fn_t fn, void *data)
+{
+	int comment = 0;
+	int baselen = 0;
+	static char var[MAXNAME];
+
+	/* U+FEFF Byte Order Mark in UTF8 */
+	static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf";
+	const unsigned char *bomptr = utf8_bom;
+
+	for (;;) {
+		int c = get_next_char();
+		if (bomptr && *bomptr) {
+			/* We are at the file beginning; skip UTF8-encoded BOM
+			 * if present. Sane editors won't put this in on their
+			 * own, but e.g. Windows Notepad will do it happily. */
+			if ((unsigned char) c == *bomptr) {
+				bomptr++;
+				continue;
+			} else {
+				/* Do not tolerate partial BOM. */
+				if (bomptr != utf8_bom)
+					break;
+				/* No BOM at file beginning. Cool. */
+				bomptr = NULL;
+			}
+		}
+		if (c == '\n') {
+			if (config_file_eof)
+				return 0;
+			comment = 0;
+			continue;
+		}
+		if (comment || isspace(c))
+			continue;
+		if (c == '#' || c == ';') {
+			comment = 1;
+			continue;
+		}
+		if (c == '[') {
+			baselen = get_base_var(var);
+			if (baselen <= 0)
+				break;
+			var[baselen++] = '.';
+			var[baselen] = 0;
+			continue;
+		}
+		if (!isalpha(c))
+			break;
+		var[baselen] = tolower(c);
+		if (get_value(fn, data, var, baselen+1) < 0)
+			break;
+	}
+	die("bad config file line %d in %s", config_linenr, config_file_name);
+}
+
+static int parse_unit_factor(const char *end, unsigned long *val)
+{
+	if (!*end)
+		return 1;
+	else if (!strcasecmp(end, "k")) {
+		*val *= 1024;
+		return 1;
+	}
+	else if (!strcasecmp(end, "m")) {
+		*val *= 1024 * 1024;
+		return 1;
+	}
+	else if (!strcasecmp(end, "g")) {
+		*val *= 1024 * 1024 * 1024;
+		return 1;
+	}
+	return 0;
+}
+
+static int perf_parse_long(const char *value, long *ret)
+{
+	if (value && *value) {
+		char *end;
+		long val = strtol(value, &end, 0);
+		unsigned long factor = 1;
+		if (!parse_unit_factor(end, &factor))
+			return 0;
+		*ret = val * factor;
+		return 1;
+	}
+	return 0;
+}
+
+int perf_parse_ulong(const char *value, unsigned long *ret)
+{
+	if (value && *value) {
+		char *end;
+		unsigned long val = strtoul(value, &end, 0);
+		if (!parse_unit_factor(end, &val))
+			return 0;
+		*ret = val;
+		return 1;
+	}
+	return 0;
+}
+
+static void die_bad_config(const char *name)
+{
+	if (config_file_name)
+		die("bad config value for '%s' in %s", name, config_file_name);
+	die("bad config value for '%s'", name);
+}
+
+int perf_config_int(const char *name, const char *value)
+{
+	long ret = 0;
+	if (!perf_parse_long(value, &ret))
+		die_bad_config(name);
+	return ret;
+}
+
+unsigned long perf_config_ulong(const char *name, const char *value)
+{
+	unsigned long ret;
+	if (!perf_parse_ulong(value, &ret))
+		die_bad_config(name);
+	return ret;
+}
+
+int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
+{
+	*is_bool = 1;
+	if (!value)
+		return 1;
+	if (!*value)
+		return 0;
+	if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on"))
+		return 1;
+	if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off"))
+		return 0;
+	*is_bool = 0;
+	return perf_config_int(name, value);
+}
+
+int perf_config_bool(const char *name, const char *value)
+{
+	int discard;
+	return !!perf_config_bool_or_int(name, value, &discard);
+}
+
+int perf_config_string(const char **dest, const char *var, const char *value)
+{
+	if (!value)
+		return config_error_nonbool(var);
+	*dest = strdup(value);
+	return 0;
+}
+
+static int perf_default_core_config(const char *var, const char *value)
+{
+	/* Add other config variables here and to Documentation/config.txt. */
+	return 0;
+}
+
+int perf_default_config(const char *var, const char *value, void *dummy)
+{
+	if (!prefixcmp(var, "core."))
+		return perf_default_core_config(var, value);
+
+	/* Add other config variables here and to Documentation/config.txt. */
+	return 0;
+}
+
+int perf_config_from_file(config_fn_t fn, const char *filename, void *data)
+{
+	int ret;
+	FILE *f = fopen(filename, "r");
+
+	ret = -1;
+	if (f) {
+		config_file = f;
+		config_file_name = filename;
+		config_linenr = 1;
+		config_file_eof = 0;
+		ret = perf_parse_file(fn, data);
+		fclose(f);
+		config_file_name = NULL;
+	}
+	return ret;
+}
+
+const char *perf_etc_perfconfig(void)
+{
+	static const char *system_wide;
+	if (!system_wide)
+		system_wide = system_path(ETC_PERFCONFIG);
+	return system_wide;
+}
+
+static int perf_env_bool(const char *k, int def)
+{
+	const char *v = getenv(k);
+	return v ? perf_config_bool(k, v) : def;
+}
+
+int perf_config_system(void)
+{
+	return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0);
+}
+
+int perf_config_global(void)
+{
+	return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0);
+}
+
+int perf_config(config_fn_t fn, void *data)
+{
+	int ret = 0, found = 0;
+	char *repo_config = NULL;
+	const char *home = NULL;
+
+	/* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
+	if (config_exclusive_filename)
+		return perf_config_from_file(fn, config_exclusive_filename, data);
+	if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
+		ret += perf_config_from_file(fn, perf_etc_perfconfig(),
+					    data);
+		found += 1;
+	}
+
+	home = getenv("HOME");
+	if (perf_config_global() && home) {
+		char *user_config = strdup(mkpath("%s/.perfconfig", home));
+		if (!access(user_config, R_OK)) {
+			ret += perf_config_from_file(fn, user_config, data);
+			found += 1;
+		}
+		free(user_config);
+	}
+
+	repo_config = perf_pathdup("config");
+	if (!access(repo_config, R_OK)) {
+		ret += perf_config_from_file(fn, repo_config, data);
+		found += 1;
+	}
+	free(repo_config);
+	if (found == 0)
+		return -1;
+	return ret;
+}
+
+/*
+ * Find all the stuff for perf_config_set() below.
+ */
+
+#define MAX_MATCHES 512
+
+static struct {
+	int baselen;
+	char* key;
+	int do_not_match;
+	regex_t* value_regex;
+	int multi_replace;
+	size_t offset[MAX_MATCHES];
+	enum { START, SECTION_SEEN, SECTION_END_SEEN, KEY_SEEN } state;
+	int seen;
+} store;
+
+static int matches(const char* key, const char* value)
+{
+	return !strcmp(key, store.key) &&
+		(store.value_regex == NULL ||
+		 (store.do_not_match ^
+		  !regexec(store.value_regex, value, 0, NULL, 0)));
+}
+
+static int store_aux(const char* key, const char* value, void *cb)
+{
+	const char *ep;
+	size_t section_len;
+
+	switch (store.state) {
+	case KEY_SEEN:
+		if (matches(key, value)) {
+			if (store.seen == 1 && store.multi_replace == 0) {
+				warning("%s has multiple values", key);
+			} else if (store.seen >= MAX_MATCHES) {
+				error("too many matches for %s", key);
+				return 1;
+			}
+
+			store.offset[store.seen] = ftell(config_file);
+			store.seen++;
+		}
+		break;
+	case SECTION_SEEN:
+		/*
+		 * What we are looking for is in store.key (both
+		 * section and var), and its section part is baselen
+		 * long.  We found key (again, both section and var).
+		 * We would want to know if this key is in the same
+		 * section as what we are looking for.  We already
+		 * know we are in the same section as what should
+		 * hold store.key.
+		 */
+		ep = strrchr(key, '.');
+		section_len = ep - key;
+
+		if ((section_len != store.baselen) ||
+		    memcmp(key, store.key, section_len+1)) {
+			store.state = SECTION_END_SEEN;
+			break;
+		}
+
+		/*
+		 * Do not increment matches: this is no match, but we
+		 * just made sure we are in the desired section.
+		 */
+		store.offset[store.seen] = ftell(config_file);
+		/* fallthru */
+	case SECTION_END_SEEN:
+	case START:
+		if (matches(key, value)) {
+			store.offset[store.seen] = ftell(config_file);
+			store.state = KEY_SEEN;
+			store.seen++;
+		} else {
+			if (strrchr(key, '.') - key == store.baselen &&
+			      !strncmp(key, store.key, store.baselen)) {
+					store.state = SECTION_SEEN;
+					store.offset[store.seen] = ftell(config_file);
+			}
+		}
+	}
+	return 0;
+}
+
+static int store_write_section(int fd, const char* key)
+{
+	const char *dot;
+	int i, success;
+	struct strbuf sb = STRBUF_INIT;
+
+	dot = memchr(key, '.', store.baselen);
+	if (dot) {
+		strbuf_addf(&sb, "[%.*s \"", (int)(dot - key), key);
+		for (i = dot - key + 1; i < store.baselen; i++) {
+			if (key[i] == '"' || key[i] == '\\')
+				strbuf_addch(&sb, '\\');
+			strbuf_addch(&sb, key[i]);
+		}
+		strbuf_addstr(&sb, "\"]\n");
+	} else {
+		strbuf_addf(&sb, "[%.*s]\n", store.baselen, key);
+	}
+
+	success = write_in_full(fd, sb.buf, sb.len) == sb.len;
+	strbuf_release(&sb);
+
+	return success;
+}
+
+static int store_write_pair(int fd, const char* key, const char* value)
+{
+	int i, success;
+	int length = strlen(key + store.baselen + 1);
+	const char *quote = "";
+	struct strbuf sb = STRBUF_INIT;
+
+	/*
+	 * Check to see if the value needs to be surrounded with a dq pair.
+	 * Note that problematic characters are always backslash-quoted; this
+	 * check is about not losing leading or trailing SP and strings that
+	 * follow beginning-of-comment characters (i.e. ';' and '#') by the
+	 * configuration parser.
+	 */
+	if (value[0] == ' ')
+		quote = "\"";
+	for (i = 0; value[i]; i++)
+		if (value[i] == ';' || value[i] == '#')
+			quote = "\"";
+	if (i && value[i - 1] == ' ')
+		quote = "\"";
+
+	strbuf_addf(&sb, "\t%.*s = %s",
+		    length, key + store.baselen + 1, quote);
+
+	for (i = 0; value[i]; i++)
+		switch (value[i]) {
+		case '\n':
+			strbuf_addstr(&sb, "\\n");
+			break;
+		case '\t':
+			strbuf_addstr(&sb, "\\t");
+			break;
+		case '"':
+		case '\\':
+			strbuf_addch(&sb, '\\');
+		default:
+			strbuf_addch(&sb, value[i]);
+			break;
+		}
+	strbuf_addf(&sb, "%s\n", quote);
+
+	success = write_in_full(fd, sb.buf, sb.len) == sb.len;
+	strbuf_release(&sb);
+
+	return success;
+}
+
+static ssize_t find_beginning_of_line(const char* contents, size_t size,
+	size_t offset_, int* found_bracket)
+{
+	size_t equal_offset = size, bracket_offset = size;
+	ssize_t offset;
+
+contline:
+	for (offset = offset_-2; offset > 0
+			&& contents[offset] != '\n'; offset--)
+		switch (contents[offset]) {
+			case '=': equal_offset = offset; break;
+			case ']': bracket_offset = offset; break;
+		}
+	if (offset > 0 && contents[offset-1] == '\\') {
+		offset_ = offset;
+		goto contline;
+	}
+	if (bracket_offset < equal_offset) {
+		*found_bracket = 1;
+		offset = bracket_offset+1;
+	} else
+		offset++;
+
+	return offset;
+}
+
+int perf_config_set(const char* key, const char* value)
+{
+	return perf_config_set_multivar(key, value, NULL, 0);
+}
+
+/*
+ * If value==NULL, unset in (remove from) config,
+ * if value_regex!=NULL, disregard key/value pairs where value does not match.
+ * if multi_replace==0, nothing, or only one matching key/value is replaced,
+ *     else all matching key/values (regardless how many) are removed,
+ *     before the new pair is written.
+ *
+ * Returns 0 on success.
+ *
+ * This function does this:
+ *
+ * - it locks the config file by creating ".perf/config.lock"
+ *
+ * - it then parses the config using store_aux() as validator to find
+ *   the position on the key/value pair to replace. If it is to be unset,
+ *   it must be found exactly once.
+ *
+ * - the config file is mmap()ed and the part before the match (if any) is
+ *   written to the lock file, then the changed part and the rest.
+ *
+ * - the config file is removed and the lock file rename()d to it.
+ *
+ */
+int perf_config_set_multivar(const char* key, const char* value,
+	const char* value_regex, int multi_replace)
+{
+	int i, dot;
+	int fd = -1, in_fd;
+	int ret = 0;
+	char* config_filename;
+	const char* last_dot = strrchr(key, '.');
+
+	if (config_exclusive_filename)
+		config_filename = strdup(config_exclusive_filename);
+	else
+		config_filename = perf_pathdup("config");
+
+	/*
+	 * Since "key" actually contains the section name and the real
+	 * key name separated by a dot, we have to know where the dot is.
+	 */
+
+	if (last_dot == NULL) {
+		error("key does not contain a section: %s", key);
+		ret = 2;
+		goto out_free;
+	}
+	store.baselen = last_dot - key;
+
+	store.multi_replace = multi_replace;
+
+	/*
+	 * Validate the key and while at it, lower case it for matching.
+	 */
+	store.key = malloc(strlen(key) + 1);
+	dot = 0;
+	for (i = 0; key[i]; i++) {
+		unsigned char c = key[i];
+		if (c == '.')
+			dot = 1;
+		/* Leave the extended basename untouched.. */
+		if (!dot || i > store.baselen) {
+			if (!iskeychar(c) || (i == store.baselen+1 && !isalpha(c))) {
+				error("invalid key: %s", key);
+				free(store.key);
+				ret = 1;
+				goto out_free;
+			}
+			c = tolower(c);
+		} else if (c == '\n') {
+			error("invalid key (newline): %s", key);
+			free(store.key);
+			ret = 1;
+			goto out_free;
+		}
+		store.key[i] = c;
+	}
+	store.key[i] = 0;
+
+	/*
+	 * If .perf/config does not exist yet, write a minimal version.
+	 */
+	in_fd = open(config_filename, O_RDONLY);
+	if ( in_fd < 0 ) {
+		free(store.key);
+
+		if ( ENOENT != errno ) {
+			error("opening %s: %s", config_filename,
+			      strerror(errno));
+			ret = 3; /* same as "invalid config file" */
+			goto out_free;
+		}
+		/* if nothing to unset, error out */
+		if (value == NULL) {
+			ret = 5;
+			goto out_free;
+		}
+
+		store.key = (char*)key;
+		if (!store_write_section(fd, key) ||
+		    !store_write_pair(fd, key, value))
+			goto write_err_out;
+	} else {
+		struct stat st;
+		char* contents;
+		size_t contents_sz, copy_begin, copy_end;
+		int i, new_line = 0;
+
+		if (value_regex == NULL)
+			store.value_regex = NULL;
+		else {
+			if (value_regex[0] == '!') {
+				store.do_not_match = 1;
+				value_regex++;
+			} else
+				store.do_not_match = 0;
+
+			store.value_regex = (regex_t*)malloc(sizeof(regex_t));
+			if (regcomp(store.value_regex, value_regex,
+					REG_EXTENDED)) {
+				error("invalid pattern: %s", value_regex);
+				free(store.value_regex);
+				ret = 6;
+				goto out_free;
+			}
+		}
+
+		store.offset[0] = 0;
+		store.state = START;
+		store.seen = 0;
+
+		/*
+		 * After this, store.offset will contain the *end* offset
+		 * of the last match, or remain at 0 if no match was found.
+		 * As a side effect, we make sure to transform only a valid
+		 * existing config file.
+		 */
+		if (perf_config_from_file(store_aux, config_filename, NULL)) {
+			error("invalid config file %s", config_filename);
+			free(store.key);
+			if (store.value_regex != NULL) {
+				regfree(store.value_regex);
+				free(store.value_regex);
+			}
+			ret = 3;
+			goto out_free;
+		}
+
+		free(store.key);
+		if (store.value_regex != NULL) {
+			regfree(store.value_regex);
+			free(store.value_regex);
+		}
+
+		/* if nothing to unset, or too many matches, error out */
+		if ((store.seen == 0 && value == NULL) ||
+				(store.seen > 1 && multi_replace == 0)) {
+			ret = 5;
+			goto out_free;
+		}
+
+		fstat(in_fd, &st);
+		contents_sz = xsize_t(st.st_size);
+		contents = mmap(NULL, contents_sz, PROT_READ,
+			MAP_PRIVATE, in_fd, 0);
+		close(in_fd);
+
+		if (store.seen == 0)
+			store.seen = 1;
+
+		for (i = 0, copy_begin = 0; i < store.seen; i++) {
+			if (store.offset[i] == 0) {
+				store.offset[i] = copy_end = contents_sz;
+			} else if (store.state != KEY_SEEN) {
+				copy_end = store.offset[i];
+			} else
+				copy_end = find_beginning_of_line(
+					contents, contents_sz,
+					store.offset[i]-2, &new_line);
+
+			if (copy_end > 0 && contents[copy_end-1] != '\n')
+				new_line = 1;
+
+			/* write the first part of the config */
+			if (copy_end > copy_begin) {
+				if (write_in_full(fd, contents + copy_begin,
+						  copy_end - copy_begin) <
+				    copy_end - copy_begin)
+					goto write_err_out;
+				if (new_line &&
+				    write_in_full(fd, "\n", 1) != 1)
+					goto write_err_out;
+			}
+			copy_begin = store.offset[i];
+		}
+
+		/* write the pair (value == NULL means unset) */
+		if (value != NULL) {
+			if (store.state == START) {
+				if (!store_write_section(fd, key))
+					goto write_err_out;
+			}
+			if (!store_write_pair(fd, key, value))
+				goto write_err_out;
+		}
+
+		/* write the rest of the config */
+		if (copy_begin < contents_sz)
+			if (write_in_full(fd, contents + copy_begin,
+					  contents_sz - copy_begin) <
+			    contents_sz - copy_begin)
+				goto write_err_out;
+
+		munmap(contents, contents_sz);
+	}
+
+	ret = 0;
+
+out_free:
+	free(config_filename);
+	return ret;
+
+write_err_out:
+	goto out_free;
+
+}
+
+/*
+ * Call this to report error for your variable that should not
+ * get a boolean value (i.e. "[my] var" means "true").
+ */
+int config_error_nonbool(const char *var)
+{
+	return error("Missing value for '%s'", var);
+}
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c
new file mode 100644
index 0000000..b90ec00
--- /dev/null
+++ b/tools/perf/util/ctype.c
@@ -0,0 +1,26 @@
+/*
+ * Sane locale-independent, ASCII ctype.
+ *
+ * No surprises, and works with signed and unsigned chars.
+ */
+#include "cache.h"
+
+enum {
+	S = GIT_SPACE,
+	A = GIT_ALPHA,
+	D = GIT_DIGIT,
+	G = GIT_GLOB_SPECIAL,	/* *, ?, [, \\ */
+	R = GIT_REGEX_SPECIAL,	/* $, (, ), +, ., ^, {, | * */
+};
+
+unsigned char sane_ctype[256] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0,		/*   0.. 15 */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,		/*  16.. 31 */
+	S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0,		/*  32.. 47 */
+	D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G,		/*  48.. 63 */
+	0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  64.. 79 */
+	A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0,		/*  80.. 95 */
+	0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  96..111 */
+	A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0,		/* 112..127 */
+	/* Nothing in the 128.. range */
+};
diff --git a/tools/perf/util/environment.c b/tools/perf/util/environment.c
new file mode 100644
index 0000000..275b0ee
--- /dev/null
+++ b/tools/perf/util/environment.c
@@ -0,0 +1,9 @@
+/*
+ * We put all the perf config variables in this same object
+ * file, so that programs can link against the config parser
+ * without having to link against all the rest of perf.
+ */
+#include "cache.h"
+
+const char *pager_program;
+int pager_use_color = 1;
diff --git a/tools/perf/util/exec_cmd.c b/tools/perf/util/exec_cmd.c
new file mode 100644
index 0000000..d392922
--- /dev/null
+++ b/tools/perf/util/exec_cmd.c
@@ -0,0 +1,165 @@
+#include "cache.h"
+#include "exec_cmd.h"
+#include "quote.h"
+#define MAX_ARGS	32
+
+extern char **environ;
+static const char *argv_exec_path;
+static const char *argv0_path;
+
+const char *system_path(const char *path)
+{
+#ifdef RUNTIME_PREFIX
+	static const char *prefix;
+#else
+	static const char *prefix = PREFIX;
+#endif
+	struct strbuf d = STRBUF_INIT;
+
+	if (is_absolute_path(path))
+		return path;
+
+#ifdef RUNTIME_PREFIX
+	assert(argv0_path);
+	assert(is_absolute_path(argv0_path));
+
+	if (!prefix &&
+	    !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) &&
+	    !(prefix = strip_path_suffix(argv0_path, BINDIR)) &&
+	    !(prefix = strip_path_suffix(argv0_path, "perf"))) {
+		prefix = PREFIX;
+		fprintf(stderr, "RUNTIME_PREFIX requested, "
+				"but prefix computation failed.  "
+				"Using static fallback '%s'.\n", prefix);
+	}
+#endif
+
+	strbuf_addf(&d, "%s/%s", prefix, path);
+	path = strbuf_detach(&d, NULL);
+	return path;
+}
+
+const char *perf_extract_argv0_path(const char *argv0)
+{
+	const char *slash;
+
+	if (!argv0 || !*argv0)
+		return NULL;
+	slash = argv0 + strlen(argv0);
+
+	while (argv0 <= slash && !is_dir_sep(*slash))
+		slash--;
+
+	if (slash >= argv0) {
+		argv0_path = strndup(argv0, slash - argv0);
+		return slash + 1;
+	}
+
+	return argv0;
+}
+
+void perf_set_argv_exec_path(const char *exec_path)
+{
+	argv_exec_path = exec_path;
+	/*
+	 * Propagate this setting to external programs.
+	 */
+	setenv(EXEC_PATH_ENVIRONMENT, exec_path, 1);
+}
+
+
+/* Returns the highest-priority, location to look for perf programs. */
+const char *perf_exec_path(void)
+{
+	const char *env;
+
+	if (argv_exec_path)
+		return argv_exec_path;
+
+	env = getenv(EXEC_PATH_ENVIRONMENT);
+	if (env && *env) {
+		return env;
+	}
+
+	return system_path(PERF_EXEC_PATH);
+}
+
+static void add_path(struct strbuf *out, const char *path)
+{
+	if (path && *path) {
+		if (is_absolute_path(path))
+			strbuf_addstr(out, path);
+		else
+			strbuf_addstr(out, make_nonrelative_path(path));
+
+		strbuf_addch(out, PATH_SEP);
+	}
+}
+
+void setup_path(void)
+{
+	const char *old_path = getenv("PATH");
+	struct strbuf new_path = STRBUF_INIT;
+
+	add_path(&new_path, perf_exec_path());
+	add_path(&new_path, argv0_path);
+
+	if (old_path)
+		strbuf_addstr(&new_path, old_path);
+	else
+		strbuf_addstr(&new_path, "/usr/local/bin:/usr/bin:/bin");
+
+	setenv("PATH", new_path.buf, 1);
+
+	strbuf_release(&new_path);
+}
+
+const char **prepare_perf_cmd(const char **argv)
+{
+	int argc;
+	const char **nargv;
+
+	for (argc = 0; argv[argc]; argc++)
+		; /* just counting */
+	nargv = malloc(sizeof(*nargv) * (argc + 2));
+
+	nargv[0] = "perf";
+	for (argc = 0; argv[argc]; argc++)
+		nargv[argc + 1] = argv[argc];
+	nargv[argc + 1] = NULL;
+	return nargv;
+}
+
+int execv_perf_cmd(const char **argv) {
+	const char **nargv = prepare_perf_cmd(argv);
+
+	/* execvp() can only ever return if it fails */
+	execvp("perf", (char **)nargv);
+
+	free(nargv);
+	return -1;
+}
+
+
+int execl_perf_cmd(const char *cmd,...)
+{
+	int argc;
+	const char *argv[MAX_ARGS + 1];
+	const char *arg;
+	va_list param;
+
+	va_start(param, cmd);
+	argv[0] = cmd;
+	argc = 1;
+	while (argc < MAX_ARGS) {
+		arg = argv[argc++] = va_arg(param, char *);
+		if (!arg)
+			break;
+	}
+	va_end(param);
+	if (MAX_ARGS <= argc)
+		return error("too many args to run %s", cmd);
+
+	argv[argc] = NULL;
+	return execv_perf_cmd(argv);
+}
diff --git a/tools/perf/util/exec_cmd.h b/tools/perf/util/exec_cmd.h
new file mode 100644
index 0000000..effe25e
--- /dev/null
+++ b/tools/perf/util/exec_cmd.h
@@ -0,0 +1,13 @@
+#ifndef PERF_EXEC_CMD_H
+#define PERF_EXEC_CMD_H
+
+extern void perf_set_argv_exec_path(const char *exec_path);
+extern const char *perf_extract_argv0_path(const char *path);
+extern const char *perf_exec_path(void);
+extern void setup_path(void);
+extern const char **prepare_perf_cmd(const char **argv);
+extern int execv_perf_cmd(const char **argv); /* NULL terminated */
+extern int execl_perf_cmd(const char *cmd, ...);
+extern const char *system_path(const char *path);
+
+#endif /* PERF_EXEC_CMD_H */
diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh
new file mode 100755
index 0000000..f06f6fd
--- /dev/null
+++ b/tools/perf/util/generate-cmdlist.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+echo "/* Automatically generated by $0 */
+struct cmdname_help
+{
+    char name[16];
+    char help[80];
+};
+
+static struct cmdname_help common_cmds[] = {"
+
+sed -n -e 's/^perf-\([^ 	]*\)[ 	].* common.*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+     sed -n '
+     /^NAME/,/perf-'"$cmd"'/H
+     ${
+            x
+            s/.*perf-'"$cmd"' - \(.*\)/  {"'"$cmd"'", "\1"},/
+	    p
+     }' "Documentation/perf-$cmd.txt"
+done
+echo "};"
diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c
new file mode 100644
index 0000000..6653f7d
--- /dev/null
+++ b/tools/perf/util/help.c
@@ -0,0 +1,367 @@
+#include "cache.h"
+#include "../builtin.h"
+#include "exec_cmd.h"
+#include "levenshtein.h"
+#include "help.h"
+
+/* most GUI terminals set COLUMNS (although some don't export it) */
+static int term_columns(void)
+{
+	char *col_string = getenv("COLUMNS");
+	int n_cols;
+
+	if (col_string && (n_cols = atoi(col_string)) > 0)
+		return n_cols;
+
+#ifdef TIOCGWINSZ
+	{
+		struct winsize ws;
+		if (!ioctl(1, TIOCGWINSZ, &ws)) {
+			if (ws.ws_col)
+				return ws.ws_col;
+		}
+	}
+#endif
+
+	return 80;
+}
+
+void add_cmdname(struct cmdnames *cmds, const char *name, int len)
+{
+	struct cmdname *ent = malloc(sizeof(*ent) + len + 1);
+
+	ent->len = len;
+	memcpy(ent->name, name, len);
+	ent->name[len] = 0;
+
+	ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc);
+	cmds->names[cmds->cnt++] = ent;
+}
+
+static void clean_cmdnames(struct cmdnames *cmds)
+{
+	int i;
+	for (i = 0; i < cmds->cnt; ++i)
+		free(cmds->names[i]);
+	free(cmds->names);
+	cmds->cnt = 0;
+	cmds->alloc = 0;
+}
+
+static int cmdname_compare(const void *a_, const void *b_)
+{
+	struct cmdname *a = *(struct cmdname **)a_;
+	struct cmdname *b = *(struct cmdname **)b_;
+	return strcmp(a->name, b->name);
+}
+
+static void uniq(struct cmdnames *cmds)
+{
+	int i, j;
+
+	if (!cmds->cnt)
+		return;
+
+	for (i = j = 1; i < cmds->cnt; i++)
+		if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name))
+			cmds->names[j++] = cmds->names[i];
+
+	cmds->cnt = j;
+}
+
+void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
+{
+	int ci, cj, ei;
+	int cmp;
+
+	ci = cj = ei = 0;
+	while (ci < cmds->cnt && ei < excludes->cnt) {
+		cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
+		if (cmp < 0)
+			cmds->names[cj++] = cmds->names[ci++];
+		else if (cmp == 0)
+			ci++, ei++;
+		else if (cmp > 0)
+			ei++;
+	}
+
+	while (ci < cmds->cnt)
+		cmds->names[cj++] = cmds->names[ci++];
+
+	cmds->cnt = cj;
+}
+
+static void pretty_print_string_list(struct cmdnames *cmds, int longest)
+{
+	int cols = 1, rows;
+	int space = longest + 1; /* min 1 SP between words */
+	int max_cols = term_columns() - 1; /* don't print *on* the edge */
+	int i, j;
+
+	if (space < max_cols)
+		cols = max_cols / space;
+	rows = (cmds->cnt + cols - 1) / cols;
+
+	for (i = 0; i < rows; i++) {
+		printf("  ");
+
+		for (j = 0; j < cols; j++) {
+			int n = j * rows + i;
+			int size = space;
+			if (n >= cmds->cnt)
+				break;
+			if (j == cols-1 || n + rows >= cmds->cnt)
+				size = 1;
+			printf("%-*s", size, cmds->names[n]->name);
+		}
+		putchar('\n');
+	}
+}
+
+static int is_executable(const char *name)
+{
+	struct stat st;
+
+	if (stat(name, &st) || /* stat, not lstat */
+	    !S_ISREG(st.st_mode))
+		return 0;
+
+#ifdef __MINGW32__
+	/* cannot trust the executable bit, peek into the file instead */
+	char buf[3] = { 0 };
+	int n;
+	int fd = open(name, O_RDONLY);
+	st.st_mode &= ~S_IXUSR;
+	if (fd >= 0) {
+		n = read(fd, buf, 2);
+		if (n == 2)
+			/* DOS executables start with "MZ" */
+			if (!strcmp(buf, "#!") || !strcmp(buf, "MZ"))
+				st.st_mode |= S_IXUSR;
+		close(fd);
+	}
+#endif
+	return st.st_mode & S_IXUSR;
+}
+
+static void list_commands_in_dir(struct cmdnames *cmds,
+					 const char *path,
+					 const char *prefix)
+{
+	int prefix_len;
+	DIR *dir = opendir(path);
+	struct dirent *de;
+	struct strbuf buf = STRBUF_INIT;
+	int len;
+
+	if (!dir)
+		return;
+	if (!prefix)
+		prefix = "perf-";
+	prefix_len = strlen(prefix);
+
+	strbuf_addf(&buf, "%s/", path);
+	len = buf.len;
+
+	while ((de = readdir(dir)) != NULL) {
+		int entlen;
+
+		if (prefixcmp(de->d_name, prefix))
+			continue;
+
+		strbuf_setlen(&buf, len);
+		strbuf_addstr(&buf, de->d_name);
+		if (!is_executable(buf.buf))
+			continue;
+
+		entlen = strlen(de->d_name) - prefix_len;
+		if (has_extension(de->d_name, ".exe"))
+			entlen -= 4;
+
+		add_cmdname(cmds, de->d_name + prefix_len, entlen);
+	}
+	closedir(dir);
+	strbuf_release(&buf);
+}
+
+void load_command_list(const char *prefix,
+		struct cmdnames *main_cmds,
+		struct cmdnames *other_cmds)
+{
+	const char *env_path = getenv("PATH");
+	const char *exec_path = perf_exec_path();
+
+	if (exec_path) {
+		list_commands_in_dir(main_cmds, exec_path, prefix);
+		qsort(main_cmds->names, main_cmds->cnt,
+		      sizeof(*main_cmds->names), cmdname_compare);
+		uniq(main_cmds);
+	}
+
+	if (env_path) {
+		char *paths, *path, *colon;
+		path = paths = strdup(env_path);
+		while (1) {
+			if ((colon = strchr(path, PATH_SEP)))
+				*colon = 0;
+			if (!exec_path || strcmp(path, exec_path))
+				list_commands_in_dir(other_cmds, path, prefix);
+
+			if (!colon)
+				break;
+			path = colon + 1;
+		}
+		free(paths);
+
+		qsort(other_cmds->names, other_cmds->cnt,
+		      sizeof(*other_cmds->names), cmdname_compare);
+		uniq(other_cmds);
+	}
+	exclude_cmds(other_cmds, main_cmds);
+}
+
+void list_commands(const char *title, struct cmdnames *main_cmds,
+		   struct cmdnames *other_cmds)
+{
+	int i, longest = 0;
+
+	for (i = 0; i < main_cmds->cnt; i++)
+		if (longest < main_cmds->names[i]->len)
+			longest = main_cmds->names[i]->len;
+	for (i = 0; i < other_cmds->cnt; i++)
+		if (longest < other_cmds->names[i]->len)
+			longest = other_cmds->names[i]->len;
+
+	if (main_cmds->cnt) {
+		const char *exec_path = perf_exec_path();
+		printf("available %s in '%s'\n", title, exec_path);
+		printf("----------------");
+		mput_char('-', strlen(title) + strlen(exec_path));
+		putchar('\n');
+		pretty_print_string_list(main_cmds, longest);
+		putchar('\n');
+	}
+
+	if (other_cmds->cnt) {
+		printf("%s available from elsewhere on your $PATH\n", title);
+		printf("---------------------------------------");
+		mput_char('-', strlen(title));
+		putchar('\n');
+		pretty_print_string_list(other_cmds, longest);
+		putchar('\n');
+	}
+}
+
+int is_in_cmdlist(struct cmdnames *c, const char *s)
+{
+	int i;
+	for (i = 0; i < c->cnt; i++)
+		if (!strcmp(s, c->names[i]->name))
+			return 1;
+	return 0;
+}
+
+static int autocorrect;
+static struct cmdnames aliases;
+
+static int perf_unknown_cmd_config(const char *var, const char *value, void *cb)
+{
+	if (!strcmp(var, "help.autocorrect"))
+		autocorrect = perf_config_int(var,value);
+	/* Also use aliases for command lookup */
+	if (!prefixcmp(var, "alias."))
+		add_cmdname(&aliases, var + 6, strlen(var + 6));
+
+	return perf_default_config(var, value, cb);
+}
+
+static int levenshtein_compare(const void *p1, const void *p2)
+{
+	const struct cmdname *const *c1 = p1, *const *c2 = p2;
+	const char *s1 = (*c1)->name, *s2 = (*c2)->name;
+	int l1 = (*c1)->len;
+	int l2 = (*c2)->len;
+	return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
+}
+
+static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
+{
+	int i;
+	ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc);
+
+	for (i = 0; i < old->cnt; i++)
+		cmds->names[cmds->cnt++] = old->names[i];
+	free(old->names);
+	old->cnt = 0;
+	old->names = NULL;
+}
+
+const char *help_unknown_cmd(const char *cmd)
+{
+	int i, n = 0, best_similarity = 0;
+	struct cmdnames main_cmds, other_cmds;
+
+	memset(&main_cmds, 0, sizeof(main_cmds));
+	memset(&other_cmds, 0, sizeof(main_cmds));
+	memset(&aliases, 0, sizeof(aliases));
+
+	perf_config(perf_unknown_cmd_config, NULL);
+
+	load_command_list("perf-", &main_cmds, &other_cmds);
+
+	add_cmd_list(&main_cmds, &aliases);
+	add_cmd_list(&main_cmds, &other_cmds);
+	qsort(main_cmds.names, main_cmds.cnt,
+	      sizeof(main_cmds.names), cmdname_compare);
+	uniq(&main_cmds);
+
+	if (main_cmds.cnt) {
+		/* This reuses cmdname->len for similarity index */
+		for (i = 0; i < main_cmds.cnt; ++i)
+			main_cmds.names[i]->len =
+				levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
+
+		qsort(main_cmds.names, main_cmds.cnt,
+		      sizeof(*main_cmds.names), levenshtein_compare);
+
+		best_similarity = main_cmds.names[0]->len;
+		n = 1;
+		while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
+			++n;
+	}
+
+	if (autocorrect && n == 1) {
+		const char *assumed = main_cmds.names[0]->name;
+
+		main_cmds.names[0] = NULL;
+		clean_cmdnames(&main_cmds);
+		fprintf(stderr, "WARNING: You called a Git program named '%s', "
+			"which does not exist.\n"
+			"Continuing under the assumption that you meant '%s'\n",
+			cmd, assumed);
+		if (autocorrect > 0) {
+			fprintf(stderr, "in %0.1f seconds automatically...\n",
+				(float)autocorrect/10.0);
+			poll(NULL, 0, autocorrect * 100);
+		}
+		return assumed;
+	}
+
+	fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd);
+
+	if (main_cmds.cnt && best_similarity < 6) {
+		fprintf(stderr, "\nDid you mean %s?\n",
+			n < 2 ? "this": "one of these");
+
+		for (i = 0; i < n; i++)
+			fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
+	}
+
+	exit(1);
+}
+
+int cmd_version(int argc, const char **argv, const char *prefix)
+{
+	printf("perf version %s\n", perf_version_string);
+	return 0;
+}
diff --git a/tools/perf/util/help.h b/tools/perf/util/help.h
new file mode 100644
index 0000000..56bc154
--- /dev/null
+++ b/tools/perf/util/help.h
@@ -0,0 +1,29 @@
+#ifndef HELP_H
+#define HELP_H
+
+struct cmdnames {
+	int alloc;
+	int cnt;
+	struct cmdname {
+		size_t len; /* also used for similarity index in help.c */
+		char name[FLEX_ARRAY];
+	} **names;
+};
+
+static inline void mput_char(char c, unsigned int num)
+{
+	while(num--)
+		putchar(c);
+}
+
+void load_command_list(const char *prefix,
+		struct cmdnames *main_cmds,
+		struct cmdnames *other_cmds);
+void add_cmdname(struct cmdnames *cmds, const char *name, int len);
+/* Here we require that excludes is a sorted list. */
+void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes);
+int is_in_cmdlist(struct cmdnames *c, const char *s);
+void list_commands(const char *title, struct cmdnames *main_cmds,
+		   struct cmdnames *other_cmds);
+
+#endif /* HELP_H */
diff --git a/tools/perf/util/levenshtein.c b/tools/perf/util/levenshtein.c
new file mode 100644
index 0000000..e521d15
--- /dev/null
+++ b/tools/perf/util/levenshtein.c
@@ -0,0 +1,84 @@
+#include "cache.h"
+#include "levenshtein.h"
+
+/*
+ * This function implements the Damerau-Levenshtein algorithm to
+ * calculate a distance between strings.
+ *
+ * Basically, it says how many letters need to be swapped, substituted,
+ * deleted from, or added to string1, at least, to get string2.
+ *
+ * The idea is to build a distance matrix for the substrings of both
+ * strings.  To avoid a large space complexity, only the last three rows
+ * are kept in memory (if swaps had the same or higher cost as one deletion
+ * plus one insertion, only two rows would be needed).
+ *
+ * At any stage, "i + 1" denotes the length of the current substring of
+ * string1 that the distance is calculated for.
+ *
+ * row2 holds the current row, row1 the previous row (i.e. for the substring
+ * of string1 of length "i"), and row0 the row before that.
+ *
+ * In other words, at the start of the big loop, row2[j + 1] contains the
+ * Damerau-Levenshtein distance between the substring of string1 of length
+ * "i" and the substring of string2 of length "j + 1".
+ *
+ * All the big loop does is determine the partial minimum-cost paths.
+ *
+ * It does so by calculating the costs of the path ending in characters
+ * i (in string1) and j (in string2), respectively, given that the last
+ * operation is a substition, a swap, a deletion, or an insertion.
+ *
+ * This implementation allows the costs to be weighted:
+ *
+ * - w (as in "sWap")
+ * - s (as in "Substitution")
+ * - a (for insertion, AKA "Add")
+ * - d (as in "Deletion")
+ *
+ * Note that this algorithm calculates a distance _iff_ d == a.
+ */
+int levenshtein(const char *string1, const char *string2,
+		int w, int s, int a, int d)
+{
+	int len1 = strlen(string1), len2 = strlen(string2);
+	int *row0 = malloc(sizeof(int) * (len2 + 1));
+	int *row1 = malloc(sizeof(int) * (len2 + 1));
+	int *row2 = malloc(sizeof(int) * (len2 + 1));
+	int i, j;
+
+	for (j = 0; j <= len2; j++)
+		row1[j] = j * a;
+	for (i = 0; i < len1; i++) {
+		int *dummy;
+
+		row2[0] = (i + 1) * d;
+		for (j = 0; j < len2; j++) {
+			/* substitution */
+			row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
+			/* swap */
+			if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
+					string1[i] == string2[j - 1] &&
+					row2[j + 1] > row0[j - 1] + w)
+				row2[j + 1] = row0[j - 1] + w;
+			/* deletion */
+			if (row2[j + 1] > row1[j + 1] + d)
+				row2[j + 1] = row1[j + 1] + d;
+			/* insertion */
+			if (row2[j + 1] > row2[j] + a)
+				row2[j + 1] = row2[j] + a;
+		}
+
+		dummy = row0;
+		row0 = row1;
+		row1 = row2;
+		row2 = dummy;
+	}
+
+	i = row1[len2];
+	free(row0);
+	free(row1);
+	free(row2);
+
+	return i;
+}
diff --git a/tools/perf/util/levenshtein.h b/tools/perf/util/levenshtein.h
new file mode 100644
index 0000000..0173abe
--- /dev/null
+++ b/tools/perf/util/levenshtein.h
@@ -0,0 +1,8 @@
+#ifndef LEVENSHTEIN_H
+#define LEVENSHTEIN_H
+
+int levenshtein(const char *string1, const char *string2,
+	int swap_penalty, int substition_penalty,
+	int insertion_penalty, int deletion_penalty);
+
+#endif
diff --git a/tools/perf/util/list.h b/tools/perf/util/list.h
new file mode 100644
index 0000000..e2548e8
--- /dev/null
+++ b/tools/perf/util/list.h
@@ -0,0 +1,603 @@
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+/*
+  Copyright (C) Cast of dozens, comes from the Linux kernel
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+*/
+
+#include <stddef.h>
+
+/*
+ * These are non-NULL pointers that will result in page faults
+ * under normal circumstances, used to verify that nobody uses
+ * non-initialized list entries.
+ */
+#define LIST_POISON1 ((void *)0x00100100)
+#define LIST_POISON2 ((void *)0x00200200)
+
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr:	the pointer to the member.
+ * @type:	the type of the container struct this is embedded in.
+ * @member:	the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({			\
+        const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+        (type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+	struct list_head name = LIST_HEAD_INIT(name)
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+	list->next = list;
+	list->prev = list;
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next)
+{
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	entry->next = LIST_POISON1;
+	entry->prev = LIST_POISON2;
+}
+
+/**
+ * list_del_range - deletes range of entries from list.
+ * @beging: first element in the range to delete from the list.
+ * @beging: first element in the range to delete from the list.
+ * Note: list_empty on the range of entries does not return true after this,
+ * the entries is in an undefined state.
+ */
+static inline void list_del_range(struct list_head *begin,
+				  struct list_head *end)
+{
+	begin->prev->next = end->next;
+	end->next->prev = begin->prev;
+}
+
+/**
+ * list_replace - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ * Note: if 'old' was empty, it will be overwritten.
+ */
+static inline void list_replace(struct list_head *old,
+				struct list_head *new)
+{
+	new->next = old->next;
+	new->next->prev = new;
+	new->prev = old->prev;
+	new->prev->next = new;
+}
+
+static inline void list_replace_init(struct list_head *old,
+					struct list_head *new)
+{
+	list_replace(old, new);
+	INIT_LIST_HEAD(old);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+        __list_del(list->prev, list->next);
+        list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+				  struct list_head *head)
+{
+        __list_del(list->prev, list->next);
+        list_add_tail(list, head);
+}
+
+/**
+ * list_is_last - tests whether @list is the last entry in list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_last(const struct list_head *list,
+				const struct list_head *head)
+{
+	return list->next == head;
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(const struct list_head *head)
+{
+	return head->next == head;
+}
+
+/**
+ * list_empty_careful - tests whether a list is empty and not being modified
+ * @head: the list to test
+ *
+ * Description:
+ * tests whether a list is empty _and_ checks that no other CPU might be
+ * in the process of modifying either member (next or prev)
+ *
+ * NOTE: using list_empty_careful() without synchronization
+ * can only be safe if the only activity that can happen
+ * to the list entry is list_del_init(). Eg. it cannot be used
+ * if another CPU could re-list_add() it.
+ */
+static inline int list_empty_careful(const struct list_head *head)
+{
+	struct list_head *next = head->next;
+	return (next == head) && (next == head->prev);
+}
+
+static inline void __list_splice(struct list_head *list,
+				 struct list_head *head)
+{
+	struct list_head *first = list->next;
+	struct list_head *last = list->prev;
+	struct list_head *at = head->next;
+
+	first->prev = head;
+	head->next = first;
+
+	last->next = at;
+	at->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+	if (!list_empty(list))
+		__list_splice(list, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+				    struct list_head *head)
+{
+	if (!list_empty(list)) {
+		__list_splice(list, head);
+		INIT_LIST_HEAD(list);
+	}
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:	the &struct list_head pointer.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+	container_of(ptr, type, member)
+
+/**
+ * list_first_entry - get the first element from a list
+ * @ptr:       the list head to take the element from.
+ * @type:      the type of the struct this is embedded in.
+ * @member:    the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_first_entry(ptr, type, member) \
+	list_entry((ptr)->next, type, member)
+
+/**
+ * list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ */
+#define list_for_each(pos, head) \
+	for (pos = (head)->next; pos != (head); \
+        	pos = pos->next)
+
+/**
+ * __list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ *
+ * This variant differs from list_for_each() in that it's the
+ * simplest possible list iteration code, no prefetching is done.
+ * Use this for code that knows the list to be very short (empty
+ * or 1 entry) most of the time.
+ */
+#define __list_for_each(pos, head) \
+	for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_prev	-	iterate over a list backwards
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+	for (pos = (head)->prev; pos != (head); \
+        	pos = pos->prev)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @n:		another &struct list_head to use as temporary storage
+ * @head:	the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+	for (pos = (head)->next, n = pos->next; pos != (head); \
+		pos = n, n = pos->next)
+
+/**
+ * list_for_each_entry	-	iterate over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member)				\
+	for (pos = list_entry((head)->next, typeof(*pos), member);	\
+	     &pos->member != (head); 	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member)			\
+	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
+	     &pos->member != (head); 	\
+	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue
+ * @pos:	the type * to use as a start point
+ * @head:	the head of the list
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Prepares a pos entry for use as a start point in list_for_each_entry_continue.
+ */
+#define list_prepare_entry(pos, head, member) \
+	((pos) ? : list_entry(head, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_continue - continue iteration over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define list_for_each_entry_continue(pos, head, member) 		\
+	for (pos = list_entry(pos->member.next, typeof(*pos), member);	\
+	     &pos->member != (head);	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_from - iterate over list of given type from the current point
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing from current position.
+ */
+#define list_for_each_entry_from(pos, head, member) 			\
+	for (; &pos->member != (head);	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member)			\
+	for (pos = list_entry((head)->next, typeof(*pos), member),	\
+		n = list_entry(pos->member.next, typeof(*pos), member);	\
+	     &pos->member != (head); 					\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_continue
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing after current point,
+ * safe against removal of list entry.
+ */
+#define list_for_each_entry_safe_continue(pos, n, head, member) 		\
+	for (pos = list_entry(pos->member.next, typeof(*pos), member), 		\
+		n = list_entry(pos->member.next, typeof(*pos), member);		\
+	     &pos->member != (head);						\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_from
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type from current point, safe against
+ * removal of list entry.
+ */
+#define list_for_each_entry_safe_from(pos, n, head, member) 			\
+	for (n = list_entry(pos->member.next, typeof(*pos), member);		\
+	     &pos->member != (head);						\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_reverse
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate backwards over list of given type, safe against removal
+ * of list entry.
+ */
+#define list_for_each_entry_safe_reverse(pos, n, head, member)		\
+	for (pos = list_entry((head)->prev, typeof(*pos), member),	\
+		n = list_entry(pos->member.prev, typeof(*pos), member);	\
+	     &pos->member != (head); 					\
+	     pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
+/*
+ * Double linked lists with a single pointer list head.
+ * Mostly useful for hash tables where the two pointer list head is
+ * too wasteful.
+ * You lose the ability to access the tail in O(1).
+ */
+
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+
+#define HLIST_HEAD_INIT { .first = NULL }
+#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
+#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
+static inline void INIT_HLIST_NODE(struct hlist_node *h)
+{
+	h->next = NULL;
+	h->pprev = NULL;
+}
+
+static inline int hlist_unhashed(const struct hlist_node *h)
+{
+	return !h->pprev;
+}
+
+static inline int hlist_empty(const struct hlist_head *h)
+{
+	return !h->first;
+}
+
+static inline void __hlist_del(struct hlist_node *n)
+{
+	struct hlist_node *next = n->next;
+	struct hlist_node **pprev = n->pprev;
+	*pprev = next;
+	if (next)
+		next->pprev = pprev;
+}
+
+static inline void hlist_del(struct hlist_node *n)
+{
+	__hlist_del(n);
+	n->next = LIST_POISON1;
+	n->pprev = LIST_POISON2;
+}
+
+static inline void hlist_del_init(struct hlist_node *n)
+{
+	if (!hlist_unhashed(n)) {
+		__hlist_del(n);
+		INIT_HLIST_NODE(n);
+	}
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+	struct hlist_node *first = h->first;
+	n->next = first;
+	if (first)
+		first->pprev = &n->next;
+	h->first = n;
+	n->pprev = &h->first;
+}
+
+/* next must be != NULL */
+static inline void hlist_add_before(struct hlist_node *n,
+					struct hlist_node *next)
+{
+	n->pprev = next->pprev;
+	n->next = next;
+	next->pprev = &n->next;
+	*(n->pprev) = n;
+}
+
+static inline void hlist_add_after(struct hlist_node *n,
+					struct hlist_node *next)
+{
+	next->next = n->next;
+	n->next = next;
+	next->pprev = &n->next;
+
+	if(next->next)
+		next->next->pprev  = &next->next;
+}
+
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_for_each(pos, head) \
+	for (pos = (head)->first; pos; \
+	     pos = pos->next)
+
+#define hlist_for_each_safe(pos, n, head) \
+	for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
+	     pos = n)
+
+/**
+ * hlist_for_each_entry	- iterate over list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry(tpos, pos, head, member)			 \
+	for (pos = (head)->first;					 \
+	     pos && 			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_continue - iterate over a hlist continuing after current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_continue(tpos, pos, member)		 \
+	for (pos = (pos)->next;						 \
+	     pos && 			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_from - iterate over a hlist continuing from current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_from(tpos, pos, member)			 \
+	for (; pos && 			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @n:		another &struct hlist_node to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_safe(tpos, pos, n, head, member) 		 \
+	for (pos = (head)->first;					 \
+	     pos && ({ n = pos->next; 1; }) && 				 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = n)
+
+#endif
diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c
new file mode 100644
index 0000000..a28bcca
--- /dev/null
+++ b/tools/perf/util/pager.c
@@ -0,0 +1,99 @@
+#include "cache.h"
+#include "run-command.h"
+#include "sigchain.h"
+
+/*
+ * This is split up from the rest of git so that we can do
+ * something different on Windows.
+ */
+
+static int spawned_pager;
+
+#ifndef __MINGW32__
+static void pager_preexec(void)
+{
+	/*
+	 * Work around bug in "less" by not starting it until we
+	 * have real input
+	 */
+	fd_set in;
+
+	FD_ZERO(&in);
+	FD_SET(0, &in);
+	select(1, &in, NULL, &in, NULL);
+
+	setenv("LESS", "FRSX", 0);
+}
+#endif
+
+static const char *pager_argv[] = { "sh", "-c", NULL, NULL };
+static struct child_process pager_process;
+
+static void wait_for_pager(void)
+{
+	fflush(stdout);
+	fflush(stderr);
+	/* signal EOF to pager */
+	close(1);
+	close(2);
+	finish_command(&pager_process);
+}
+
+static void wait_for_pager_signal(int signo)
+{
+	wait_for_pager();
+	sigchain_pop(signo);
+	raise(signo);
+}
+
+void setup_pager(void)
+{
+	const char *pager = getenv("PERF_PAGER");
+
+	if (!isatty(1))
+		return;
+	if (!pager) {
+		if (!pager_program)
+			perf_config(perf_default_config, NULL);
+		pager = pager_program;
+	}
+	if (!pager)
+		pager = getenv("PAGER");
+	if (!pager)
+		pager = "less";
+	else if (!*pager || !strcmp(pager, "cat"))
+		return;
+
+	spawned_pager = 1; /* means we are emitting to terminal */
+
+	/* spawn the pager */
+	pager_argv[2] = pager;
+	pager_process.argv = pager_argv;
+	pager_process.in = -1;
+#ifndef __MINGW32__
+	pager_process.preexec_cb = pager_preexec;
+#endif
+	if (start_command(&pager_process))
+		return;
+
+	/* original process continues, but writes to the pipe */
+	dup2(pager_process.in, 1);
+	if (isatty(2))
+		dup2(pager_process.in, 2);
+	close(pager_process.in);
+
+	/* this makes sure that the parent terminates after the pager */
+	sigchain_push_common(wait_for_pager_signal);
+	atexit(wait_for_pager);
+}
+
+int pager_in_use(void)
+{
+	const char *env;
+
+	if (spawned_pager)
+		return 1;
+
+	env = getenv("PERF_PAGER_IN_USE");
+	return env ? perf_config_bool("PERF_PAGER_IN_USE", env) : 0;
+}
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
new file mode 100644
index 0000000..5a72586
--- /dev/null
+++ b/tools/perf/util/parse-events.c
@@ -0,0 +1,316 @@
+
+#include "../perf.h"
+#include "util.h"
+#include "parse-options.h"
+#include "parse-events.h"
+#include "exec_cmd.h"
+#include "string.h"
+
+extern char *strcasestr(const char *haystack, const char *needle);
+
+int					nr_counters;
+
+struct perf_counter_attr		attrs[MAX_COUNTERS];
+
+struct event_symbol {
+	__u8	type;
+	__u64	config;
+	char	*symbol;
+};
+
+#define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y
+#define CR(x, y) .type = PERF_TYPE_##x, .config = y
+
+static struct event_symbol event_symbols[] = {
+  { C(HARDWARE, HW_CPU_CYCLES),		"cpu-cycles",		},
+  { C(HARDWARE, HW_CPU_CYCLES),		"cycles",		},
+  { C(HARDWARE, HW_INSTRUCTIONS),	"instructions",		},
+  { C(HARDWARE, HW_CACHE_REFERENCES),	"cache-references",	},
+  { C(HARDWARE, HW_CACHE_MISSES),	"cache-misses",		},
+  { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branch-instructions",	},
+  { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branches",		},
+  { C(HARDWARE, HW_BRANCH_MISSES),	"branch-misses",	},
+  { C(HARDWARE, HW_BUS_CYCLES),		"bus-cycles",		},
+
+  { C(SOFTWARE, SW_CPU_CLOCK),		"cpu-clock",		},
+  { C(SOFTWARE, SW_TASK_CLOCK),		"task-clock",		},
+  { C(SOFTWARE, SW_PAGE_FAULTS),	"page-faults",		},
+  { C(SOFTWARE, SW_PAGE_FAULTS),	"faults",		},
+  { C(SOFTWARE, SW_PAGE_FAULTS_MIN),	"minor-faults",		},
+  { C(SOFTWARE, SW_PAGE_FAULTS_MAJ),	"major-faults",		},
+  { C(SOFTWARE, SW_CONTEXT_SWITCHES),	"context-switches",	},
+  { C(SOFTWARE, SW_CONTEXT_SWITCHES),	"cs",			},
+  { C(SOFTWARE, SW_CPU_MIGRATIONS),	"cpu-migrations",	},
+  { C(SOFTWARE, SW_CPU_MIGRATIONS),	"migrations",		},
+};
+
+#define __PERF_COUNTER_FIELD(config, name) \
+	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
+
+#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
+#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
+#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
+#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
+
+static char *hw_event_names[] = {
+	"cycles",
+	"instructions",
+	"cache-references",
+	"cache-misses",
+	"branches",
+	"branch-misses",
+	"bus-cycles",
+};
+
+static char *sw_event_names[] = {
+	"cpu-clock-ticks",
+	"task-clock-ticks",
+	"page-faults",
+	"context-switches",
+	"CPU-migrations",
+	"minor-faults",
+	"major-faults",
+};
+
+#define MAX_ALIASES 8
+
+static char *hw_cache [][MAX_ALIASES] = {
+	{ "L1-data"		, "l1-d", "l1d"					},
+	{ "L1-instruction"	, "l1-i", "l1i"					},
+	{ "L2"			, "l2"						},
+	{ "Data-TLB"		, "dtlb", "d-tlb"				},
+	{ "Instruction-TLB"	, "itlb", "i-tlb"				},
+	{ "Branch"		, "bpu" , "btb", "bpc"				},
+};
+
+static char *hw_cache_op [][MAX_ALIASES] = {
+	{ "Load"		, "read"					},
+	{ "Store"		, "write"					},
+	{ "Prefetch"		, "speculative-read", "speculative-load"	},
+};
+
+static char *hw_cache_result [][MAX_ALIASES] = {
+	{ "Reference"		, "ops", "access"				},
+	{ "Miss"								},
+};
+
+char *event_name(int counter)
+{
+	__u64 config = attrs[counter].config;
+	int type = attrs[counter].type;
+	static char buf[32];
+
+	if (attrs[counter].type == PERF_TYPE_RAW) {
+		sprintf(buf, "raw 0x%llx", config);
+		return buf;
+	}
+
+	switch (type) {
+	case PERF_TYPE_HARDWARE:
+		if (config < PERF_COUNT_HW_MAX)
+			return hw_event_names[config];
+		return "unknown-hardware";
+
+	case PERF_TYPE_HW_CACHE: {
+		__u8 cache_type, cache_op, cache_result;
+		static char name[100];
+
+		cache_type   = (config >>  0) & 0xff;
+		if (cache_type > PERF_COUNT_HW_CACHE_MAX)
+			return "unknown-ext-hardware-cache-type";
+
+		cache_op     = (config >>  8) & 0xff;
+		if (cache_op > PERF_COUNT_HW_CACHE_OP_MAX)
+			return "unknown-ext-hardware-cache-op";
+
+		cache_result = (config >> 16) & 0xff;
+		if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX)
+			return "unknown-ext-hardware-cache-result";
+
+		sprintf(name, "%s-Cache-%s-%ses",
+			hw_cache[cache_type][0],
+			hw_cache_op[cache_op][0],
+			hw_cache_result[cache_result][0]);
+
+		return name;
+	}
+
+	case PERF_TYPE_SOFTWARE:
+		if (config < PERF_COUNT_SW_MAX)
+			return sw_event_names[config];
+		return "unknown-software";
+
+	default:
+		break;
+	}
+
+	return "unknown";
+}
+
+static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size)
+{
+	int i, j;
+
+	for (i = 0; i < size; i++) {
+		for (j = 0; j < MAX_ALIASES; j++) {
+			if (!names[i][j])
+				break;
+			if (strcasestr(str, names[i][j]))
+				return i;
+		}
+	}
+
+	return -1;
+}
+
+static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
+{
+	int cache_type = -1, cache_op = 0, cache_result = 0;
+
+	cache_type = parse_aliases(str, hw_cache, PERF_COUNT_HW_CACHE_MAX);
+	/*
+	 * No fallback - if we cannot get a clear cache type
+	 * then bail out:
+	 */
+	if (cache_type == -1)
+		return -EINVAL;
+
+	cache_op = parse_aliases(str, hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX);
+	/*
+	 * Fall back to reads:
+	 */
+	if (cache_op == -1)
+		cache_op = PERF_COUNT_HW_CACHE_OP_READ;
+
+	cache_result = parse_aliases(str, hw_cache_result,
+					PERF_COUNT_HW_CACHE_RESULT_MAX);
+	/*
+	 * Fall back to accesses:
+	 */
+	if (cache_result == -1)
+		cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS;
+
+	attr->config = cache_type | (cache_op << 8) | (cache_result << 16);
+	attr->type = PERF_TYPE_HW_CACHE;
+
+	return 0;
+}
+
+/*
+ * Each event can have multiple symbolic names.
+ * Symbolic names are (almost) exactly matched.
+ */
+static int parse_event_symbols(const char *str, struct perf_counter_attr *attr)
+{
+	__u64 config, id;
+	int type;
+	unsigned int i;
+	const char *sep, *pstr;
+
+	if (str[0] == 'r' && hex2u64(str + 1, &config) > 0) {
+		attr->type = PERF_TYPE_RAW;
+		attr->config = config;
+
+		return 0;
+	}
+
+	pstr = str;
+	sep = strchr(pstr, ':');
+	if (sep) {
+		type = atoi(pstr);
+		pstr = sep + 1;
+		id = atoi(pstr);
+		sep = strchr(pstr, ':');
+		if (sep) {
+			pstr = sep + 1;
+			if (strchr(pstr, 'k'))
+				attr->exclude_user = 1;
+			if (strchr(pstr, 'u'))
+				attr->exclude_kernel = 1;
+		}
+		attr->type = type;
+		attr->config = id;
+
+		return 0;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		if (!strncmp(str, event_symbols[i].symbol,
+			     strlen(event_symbols[i].symbol))) {
+
+			attr->type = event_symbols[i].type;
+			attr->config = event_symbols[i].config;
+
+			return 0;
+		}
+	}
+
+	return parse_generic_hw_symbols(str, attr);
+}
+
+int parse_events(const struct option *opt, const char *str, int unset)
+{
+	struct perf_counter_attr attr;
+	int ret;
+
+	memset(&attr, 0, sizeof(attr));
+again:
+	if (nr_counters == MAX_COUNTERS)
+		return -1;
+
+	ret = parse_event_symbols(str, &attr);
+	if (ret < 0)
+		return ret;
+
+	attrs[nr_counters] = attr;
+	nr_counters++;
+
+	str = strstr(str, ",");
+	if (str) {
+		str++;
+		goto again;
+	}
+
+	return 0;
+}
+
+static const char * const event_type_descriptors[] = {
+	"",
+	"Hardware event",
+	"Software event",
+	"Tracepoint event",
+	"Hardware cache event",
+};
+
+/*
+ * Print the help text for the event symbols:
+ */
+void print_events(void)
+{
+	struct event_symbol *syms = event_symbols;
+	unsigned int i, type, prev_type = -1;
+
+	fprintf(stderr, "\n");
+	fprintf(stderr, "List of pre-defined events (to be used in -e):\n");
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
+		type = syms->type + 1;
+		if (type > ARRAY_SIZE(event_type_descriptors))
+			type = 0;
+
+		if (type != prev_type)
+			fprintf(stderr, "\n");
+
+		fprintf(stderr, "  %-30s [%s]\n", syms->symbol,
+			event_type_descriptors[type]);
+
+		prev_type = type;
+	}
+
+	fprintf(stderr, "\n");
+	fprintf(stderr, "  %-30s [raw hardware event descriptor]\n",
+		"rNNN");
+	fprintf(stderr, "\n");
+
+	exit(129);
+}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
new file mode 100644
index 0000000..e3d5529
--- /dev/null
+++ b/tools/perf/util/parse-events.h
@@ -0,0 +1,17 @@
+
+/*
+ * Parse symbolic events/counts passed in as options:
+ */
+
+extern int			nr_counters;
+
+extern struct perf_counter_attr attrs[MAX_COUNTERS];
+
+extern char *event_name(int ctr);
+
+extern int parse_events(const struct option *opt, const char *str, int unset);
+
+#define EVENTS_HELP_MAX (128*1024)
+
+extern void print_events(void);
+
diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c
new file mode 100644
index 0000000..b3affb1
--- /dev/null
+++ b/tools/perf/util/parse-options.c
@@ -0,0 +1,508 @@
+#include "util.h"
+#include "parse-options.h"
+#include "cache.h"
+
+#define OPT_SHORT 1
+#define OPT_UNSET 2
+
+static int opterror(const struct option *opt, const char *reason, int flags)
+{
+	if (flags & OPT_SHORT)
+		return error("switch `%c' %s", opt->short_name, reason);
+	if (flags & OPT_UNSET)
+		return error("option `no-%s' %s", opt->long_name, reason);
+	return error("option `%s' %s", opt->long_name, reason);
+}
+
+static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt,
+		   int flags, const char **arg)
+{
+	if (p->opt) {
+		*arg = p->opt;
+		p->opt = NULL;
+	} else if (p->argc == 1 && (opt->flags & PARSE_OPT_LASTARG_DEFAULT)) {
+		*arg = (const char *)opt->defval;
+	} else if (p->argc > 1) {
+		p->argc--;
+		*arg = *++p->argv;
+	} else
+		return opterror(opt, "requires a value", flags);
+	return 0;
+}
+
+static int get_value(struct parse_opt_ctx_t *p,
+		     const struct option *opt, int flags)
+{
+	const char *s, *arg = NULL;
+	const int unset = flags & OPT_UNSET;
+
+	if (unset && p->opt)
+		return opterror(opt, "takes no value", flags);
+	if (unset && (opt->flags & PARSE_OPT_NONEG))
+		return opterror(opt, "isn't available", flags);
+
+	if (!(flags & OPT_SHORT) && p->opt) {
+		switch (opt->type) {
+		case OPTION_CALLBACK:
+			if (!(opt->flags & PARSE_OPT_NOARG))
+				break;
+			/* FALLTHROUGH */
+		case OPTION_BOOLEAN:
+		case OPTION_BIT:
+		case OPTION_SET_INT:
+		case OPTION_SET_PTR:
+			return opterror(opt, "takes no value", flags);
+		default:
+			break;
+		}
+	}
+
+	switch (opt->type) {
+	case OPTION_BIT:
+		if (unset)
+			*(int *)opt->value &= ~opt->defval;
+		else
+			*(int *)opt->value |= opt->defval;
+		return 0;
+
+	case OPTION_BOOLEAN:
+		*(int *)opt->value = unset ? 0 : *(int *)opt->value + 1;
+		return 0;
+
+	case OPTION_SET_INT:
+		*(int *)opt->value = unset ? 0 : opt->defval;
+		return 0;
+
+	case OPTION_SET_PTR:
+		*(void **)opt->value = unset ? NULL : (void *)opt->defval;
+		return 0;
+
+	case OPTION_STRING:
+		if (unset)
+			*(const char **)opt->value = NULL;
+		else if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+			*(const char **)opt->value = (const char *)opt->defval;
+		else
+			return get_arg(p, opt, flags, (const char **)opt->value);
+		return 0;
+
+	case OPTION_CALLBACK:
+		if (unset)
+			return (*opt->callback)(opt, NULL, 1) ? (-1) : 0;
+		if (opt->flags & PARSE_OPT_NOARG)
+			return (*opt->callback)(opt, NULL, 0) ? (-1) : 0;
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+			return (*opt->callback)(opt, NULL, 0) ? (-1) : 0;
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		return (*opt->callback)(opt, arg, 0) ? (-1) : 0;
+
+	case OPTION_INTEGER:
+		if (unset) {
+			*(int *)opt->value = 0;
+			return 0;
+		}
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+			*(int *)opt->value = opt->defval;
+			return 0;
+		}
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		*(int *)opt->value = strtol(arg, (char **)&s, 10);
+		if (*s)
+			return opterror(opt, "expects a numerical value", flags);
+		return 0;
+
+	case OPTION_LONG:
+		if (unset) {
+			*(long *)opt->value = 0;
+			return 0;
+		}
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+			*(long *)opt->value = opt->defval;
+			return 0;
+		}
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		*(long *)opt->value = strtol(arg, (char **)&s, 10);
+		if (*s)
+			return opterror(opt, "expects a numerical value", flags);
+		return 0;
+
+	default:
+		die("should not happen, someone must be hit on the forehead");
+	}
+}
+
+static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options)
+{
+	for (; options->type != OPTION_END; options++) {
+		if (options->short_name == *p->opt) {
+			p->opt = p->opt[1] ? p->opt + 1 : NULL;
+			return get_value(p, options, OPT_SHORT);
+		}
+	}
+	return -2;
+}
+
+static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg,
+                          const struct option *options)
+{
+	const char *arg_end = strchr(arg, '=');
+	const struct option *abbrev_option = NULL, *ambiguous_option = NULL;
+	int abbrev_flags = 0, ambiguous_flags = 0;
+
+	if (!arg_end)
+		arg_end = arg + strlen(arg);
+
+	for (; options->type != OPTION_END; options++) {
+		const char *rest;
+		int flags = 0;
+
+		if (!options->long_name)
+			continue;
+
+		rest = skip_prefix(arg, options->long_name);
+		if (options->type == OPTION_ARGUMENT) {
+			if (!rest)
+				continue;
+			if (*rest == '=')
+				return opterror(options, "takes no value", flags);
+			if (*rest)
+				continue;
+			p->out[p->cpidx++] = arg - 2;
+			return 0;
+		}
+		if (!rest) {
+			/* abbreviated? */
+			if (!strncmp(options->long_name, arg, arg_end - arg)) {
+is_abbreviated:
+				if (abbrev_option) {
+					/*
+					 * If this is abbreviated, it is
+					 * ambiguous. So when there is no
+					 * exact match later, we need to
+					 * error out.
+					 */
+					ambiguous_option = abbrev_option;
+					ambiguous_flags = abbrev_flags;
+				}
+				if (!(flags & OPT_UNSET) && *arg_end)
+					p->opt = arg_end + 1;
+				abbrev_option = options;
+				abbrev_flags = flags;
+				continue;
+			}
+			/* negated and abbreviated very much? */
+			if (!prefixcmp("no-", arg)) {
+				flags |= OPT_UNSET;
+				goto is_abbreviated;
+			}
+			/* negated? */
+			if (strncmp(arg, "no-", 3))
+				continue;
+			flags |= OPT_UNSET;
+			rest = skip_prefix(arg + 3, options->long_name);
+			/* abbreviated and negated? */
+			if (!rest && !prefixcmp(options->long_name, arg + 3))
+				goto is_abbreviated;
+			if (!rest)
+				continue;
+		}
+		if (*rest) {
+			if (*rest != '=')
+				continue;
+			p->opt = rest + 1;
+		}
+		return get_value(p, options, flags);
+	}
+
+	if (ambiguous_option)
+		return error("Ambiguous option: %s "
+			"(could be --%s%s or --%s%s)",
+			arg,
+			(ambiguous_flags & OPT_UNSET) ?  "no-" : "",
+			ambiguous_option->long_name,
+			(abbrev_flags & OPT_UNSET) ?  "no-" : "",
+			abbrev_option->long_name);
+	if (abbrev_option)
+		return get_value(p, abbrev_option, abbrev_flags);
+	return -2;
+}
+
+static void check_typos(const char *arg, const struct option *options)
+{
+	if (strlen(arg) < 3)
+		return;
+
+	if (!prefixcmp(arg, "no-")) {
+		error ("did you mean `--%s` (with two dashes ?)", arg);
+		exit(129);
+	}
+
+	for (; options->type != OPTION_END; options++) {
+		if (!options->long_name)
+			continue;
+		if (!prefixcmp(options->long_name, arg)) {
+			error ("did you mean `--%s` (with two dashes ?)", arg);
+			exit(129);
+		}
+	}
+}
+
+void parse_options_start(struct parse_opt_ctx_t *ctx,
+			 int argc, const char **argv, int flags)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->argc = argc - 1;
+	ctx->argv = argv + 1;
+	ctx->out  = argv;
+	ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0);
+	ctx->flags = flags;
+	if ((flags & PARSE_OPT_KEEP_UNKNOWN) &&
+	    (flags & PARSE_OPT_STOP_AT_NON_OPTION))
+		die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together");
+}
+
+static int usage_with_options_internal(const char * const *,
+				       const struct option *, int);
+
+int parse_options_step(struct parse_opt_ctx_t *ctx,
+		       const struct option *options,
+		       const char * const usagestr[])
+{
+	int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP);
+
+	/* we must reset ->opt, unknown short option leave it dangling */
+	ctx->opt = NULL;
+
+	for (; ctx->argc; ctx->argc--, ctx->argv++) {
+		const char *arg = ctx->argv[0];
+
+		if (*arg != '-' || !arg[1]) {
+			if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION)
+				break;
+			ctx->out[ctx->cpidx++] = ctx->argv[0];
+			continue;
+		}
+
+		if (arg[1] != '-') {
+			ctx->opt = arg + 1;
+			if (internal_help && *ctx->opt == 'h')
+				return parse_options_usage(usagestr, options);
+			switch (parse_short_opt(ctx, options)) {
+			case -1:
+				return parse_options_usage(usagestr, options);
+			case -2:
+				goto unknown;
+			}
+			if (ctx->opt)
+				check_typos(arg + 1, options);
+			while (ctx->opt) {
+				if (internal_help && *ctx->opt == 'h')
+					return parse_options_usage(usagestr, options);
+				switch (parse_short_opt(ctx, options)) {
+				case -1:
+					return parse_options_usage(usagestr, options);
+				case -2:
+					/* fake a short option thing to hide the fact that we may have
+					 * started to parse aggregated stuff
+					 *
+					 * This is leaky, too bad.
+					 */
+					ctx->argv[0] = strdup(ctx->opt - 1);
+					*(char *)ctx->argv[0] = '-';
+					goto unknown;
+				}
+			}
+			continue;
+		}
+
+		if (!arg[2]) { /* "--" */
+			if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) {
+				ctx->argc--;
+				ctx->argv++;
+			}
+			break;
+		}
+
+		if (internal_help && !strcmp(arg + 2, "help-all"))
+			return usage_with_options_internal(usagestr, options, 1);
+		if (internal_help && !strcmp(arg + 2, "help"))
+			return parse_options_usage(usagestr, options);
+		switch (parse_long_opt(ctx, arg + 2, options)) {
+		case -1:
+			return parse_options_usage(usagestr, options);
+		case -2:
+			goto unknown;
+		}
+		continue;
+unknown:
+		if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN))
+			return PARSE_OPT_UNKNOWN;
+		ctx->out[ctx->cpidx++] = ctx->argv[0];
+		ctx->opt = NULL;
+	}
+	return PARSE_OPT_DONE;
+}
+
+int parse_options_end(struct parse_opt_ctx_t *ctx)
+{
+	memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out));
+	ctx->out[ctx->cpidx + ctx->argc] = NULL;
+	return ctx->cpidx + ctx->argc;
+}
+
+int parse_options(int argc, const char **argv, const struct option *options,
+		  const char * const usagestr[], int flags)
+{
+	struct parse_opt_ctx_t ctx;
+
+	parse_options_start(&ctx, argc, argv, flags);
+	switch (parse_options_step(&ctx, options, usagestr)) {
+	case PARSE_OPT_HELP:
+		exit(129);
+	case PARSE_OPT_DONE:
+		break;
+	default: /* PARSE_OPT_UNKNOWN */
+		if (ctx.argv[0][1] == '-') {
+			error("unknown option `%s'", ctx.argv[0] + 2);
+		} else {
+			error("unknown switch `%c'", *ctx.opt);
+		}
+		usage_with_options(usagestr, options);
+	}
+
+	return parse_options_end(&ctx);
+}
+
+#define USAGE_OPTS_WIDTH 24
+#define USAGE_GAP         2
+
+int usage_with_options_internal(const char * const *usagestr,
+				const struct option *opts, int full)
+{
+	if (!usagestr)
+		return PARSE_OPT_HELP;
+
+	fprintf(stderr, "\n usage: %s\n", *usagestr++);
+	while (*usagestr && **usagestr)
+		fprintf(stderr, "    or: %s\n", *usagestr++);
+	while (*usagestr) {
+		fprintf(stderr, "%s%s\n",
+				**usagestr ? "    " : "",
+				*usagestr);
+		usagestr++;
+	}
+
+	if (opts->type != OPTION_GROUP)
+		fputc('\n', stderr);
+
+	for (; opts->type != OPTION_END; opts++) {
+		size_t pos;
+		int pad;
+
+		if (opts->type == OPTION_GROUP) {
+			fputc('\n', stderr);
+			if (*opts->help)
+				fprintf(stderr, "%s\n", opts->help);
+			continue;
+		}
+		if (!full && (opts->flags & PARSE_OPT_HIDDEN))
+			continue;
+
+		pos = fprintf(stderr, "    ");
+		if (opts->short_name)
+			pos += fprintf(stderr, "-%c", opts->short_name);
+		if (opts->long_name && opts->short_name)
+			pos += fprintf(stderr, ", ");
+		if (opts->long_name)
+			pos += fprintf(stderr, "--%s", opts->long_name);
+
+		switch (opts->type) {
+		case OPTION_ARGUMENT:
+			break;
+		case OPTION_INTEGER:
+			if (opts->flags & PARSE_OPT_OPTARG)
+				if (opts->long_name)
+					pos += fprintf(stderr, "[=<n>]");
+				else
+					pos += fprintf(stderr, "[<n>]");
+			else
+				pos += fprintf(stderr, " <n>");
+			break;
+		case OPTION_CALLBACK:
+			if (opts->flags & PARSE_OPT_NOARG)
+				break;
+			/* FALLTHROUGH */
+		case OPTION_STRING:
+			if (opts->argh) {
+				if (opts->flags & PARSE_OPT_OPTARG)
+					if (opts->long_name)
+						pos += fprintf(stderr, "[=<%s>]", opts->argh);
+					else
+						pos += fprintf(stderr, "[<%s>]", opts->argh);
+				else
+					pos += fprintf(stderr, " <%s>", opts->argh);
+			} else {
+				if (opts->flags & PARSE_OPT_OPTARG)
+					if (opts->long_name)
+						pos += fprintf(stderr, "[=...]");
+					else
+						pos += fprintf(stderr, "[...]");
+				else
+					pos += fprintf(stderr, " ...");
+			}
+			break;
+		default: /* OPTION_{BIT,BOOLEAN,SET_INT,SET_PTR} */
+			break;
+		}
+
+		if (pos <= USAGE_OPTS_WIDTH)
+			pad = USAGE_OPTS_WIDTH - pos;
+		else {
+			fputc('\n', stderr);
+			pad = USAGE_OPTS_WIDTH;
+		}
+		fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help);
+	}
+	fputc('\n', stderr);
+
+	return PARSE_OPT_HELP;
+}
+
+void usage_with_options(const char * const *usagestr,
+			const struct option *opts)
+{
+	usage_with_options_internal(usagestr, opts, 0);
+	exit(129);
+}
+
+int parse_options_usage(const char * const *usagestr,
+			const struct option *opts)
+{
+	return usage_with_options_internal(usagestr, opts, 0);
+}
+
+
+int parse_opt_verbosity_cb(const struct option *opt, const char *arg,
+			   int unset)
+{
+	int *target = opt->value;
+
+	if (unset)
+		/* --no-quiet, --no-verbose */
+		*target = 0;
+	else if (opt->short_name == 'v') {
+		if (*target >= 0)
+			(*target)++;
+		else
+			*target = 1;
+	} else {
+		if (*target <= 0)
+			(*target)--;
+		else
+			*target = -1;
+	}
+	return 0;
+}
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
new file mode 100644
index 0000000..a1039a6
--- /dev/null
+++ b/tools/perf/util/parse-options.h
@@ -0,0 +1,174 @@
+#ifndef PARSE_OPTIONS_H
+#define PARSE_OPTIONS_H
+
+enum parse_opt_type {
+	/* special types */
+	OPTION_END,
+	OPTION_ARGUMENT,
+	OPTION_GROUP,
+	/* options with no arguments */
+	OPTION_BIT,
+	OPTION_BOOLEAN, /* _INCR would have been a better name */
+	OPTION_SET_INT,
+	OPTION_SET_PTR,
+	/* options with arguments (usually) */
+	OPTION_STRING,
+	OPTION_INTEGER,
+	OPTION_LONG,
+	OPTION_CALLBACK,
+};
+
+enum parse_opt_flags {
+	PARSE_OPT_KEEP_DASHDASH = 1,
+	PARSE_OPT_STOP_AT_NON_OPTION = 2,
+	PARSE_OPT_KEEP_ARGV0 = 4,
+	PARSE_OPT_KEEP_UNKNOWN = 8,
+	PARSE_OPT_NO_INTERNAL_HELP = 16,
+};
+
+enum parse_opt_option_flags {
+	PARSE_OPT_OPTARG  = 1,
+	PARSE_OPT_NOARG   = 2,
+	PARSE_OPT_NONEG   = 4,
+	PARSE_OPT_HIDDEN  = 8,
+	PARSE_OPT_LASTARG_DEFAULT = 16,
+};
+
+struct option;
+typedef int parse_opt_cb(const struct option *, const char *arg, int unset);
+
+/*
+ * `type`::
+ *   holds the type of the option, you must have an OPTION_END last in your
+ *   array.
+ *
+ * `short_name`::
+ *   the character to use as a short option name, '\0' if none.
+ *
+ * `long_name`::
+ *   the long option name, without the leading dashes, NULL if none.
+ *
+ * `value`::
+ *   stores pointers to the values to be filled.
+ *
+ * `argh`::
+ *   token to explain the kind of argument this option wants. Keep it
+ *   homogenous across the repository.
+ *
+ * `help`::
+ *   the short help associated to what the option does.
+ *   Must never be NULL (except for OPTION_END).
+ *   OPTION_GROUP uses this pointer to store the group header.
+ *
+ * `flags`::
+ *   mask of parse_opt_option_flags.
+ *   PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs)
+ *   PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs
+ *   PARSE_OPT_NONEG: says that this option cannot be negated
+ *   PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in
+ *                    the long one.
+ *
+ * `callback`::
+ *   pointer to the callback to use for OPTION_CALLBACK.
+ *
+ * `defval`::
+ *   default value to fill (*->value) with for PARSE_OPT_OPTARG.
+ *   OPTION_{BIT,SET_INT,SET_PTR} store the {mask,integer,pointer} to put in
+ *   the value when met.
+ *   CALLBACKS can use it like they want.
+ */
+struct option {
+	enum parse_opt_type type;
+	int short_name;
+	const char *long_name;
+	void *value;
+	const char *argh;
+	const char *help;
+
+	int flags;
+	parse_opt_cb *callback;
+	intptr_t defval;
+};
+
+#define OPT_END()                   { OPTION_END }
+#define OPT_ARGUMENT(l, h)          { OPTION_ARGUMENT, 0, (l), NULL, NULL, (h) }
+#define OPT_GROUP(h)                { OPTION_GROUP, 0, NULL, NULL, NULL, (h) }
+#define OPT_BIT(s, l, v, h, b)      { OPTION_BIT, (s), (l), (v), NULL, (h), 0, NULL, (b) }
+#define OPT_BOOLEAN(s, l, v, h)     { OPTION_BOOLEAN, (s), (l), (v), NULL, (h) }
+#define OPT_SET_INT(s, l, v, h, i)  { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) }
+#define OPT_SET_PTR(s, l, v, h, p)  { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) }
+#define OPT_INTEGER(s, l, v, h)     { OPTION_INTEGER, (s), (l), (v), NULL, (h) }
+#define OPT_LONG(s, l, v, h)        { OPTION_LONG, (s), (l), (v), NULL, (h) }
+#define OPT_STRING(s, l, v, a, h)   { OPTION_STRING,  (s), (l), (v), (a), (h) }
+#define OPT_DATE(s, l, v, h) \
+	{ OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \
+	  parse_opt_approxidate_cb }
+#define OPT_CALLBACK(s, l, v, a, h, f) \
+	{ OPTION_CALLBACK, (s), (l), (v), (a), (h), 0, (f) }
+
+/* parse_options() will filter out the processed options and leave the
+ * non-option argments in argv[].
+ * Returns the number of arguments left in argv[].
+ */
+extern int parse_options(int argc, const char **argv,
+                         const struct option *options,
+                         const char * const usagestr[], int flags);
+
+extern NORETURN void usage_with_options(const char * const *usagestr,
+                                        const struct option *options);
+
+/*----- incremantal advanced APIs -----*/
+
+enum {
+	PARSE_OPT_HELP = -1,
+	PARSE_OPT_DONE,
+	PARSE_OPT_UNKNOWN,
+};
+
+/*
+ * It's okay for the caller to consume argv/argc in the usual way.
+ * Other fields of that structure are private to parse-options and should not
+ * be modified in any way.
+ */
+struct parse_opt_ctx_t {
+	const char **argv;
+	const char **out;
+	int argc, cpidx;
+	const char *opt;
+	int flags;
+};
+
+extern int parse_options_usage(const char * const *usagestr,
+			       const struct option *opts);
+
+extern void parse_options_start(struct parse_opt_ctx_t *ctx,
+				int argc, const char **argv, int flags);
+
+extern int parse_options_step(struct parse_opt_ctx_t *ctx,
+			      const struct option *options,
+			      const char * const usagestr[]);
+
+extern int parse_options_end(struct parse_opt_ctx_t *ctx);
+
+
+/*----- some often used options -----*/
+extern int parse_opt_abbrev_cb(const struct option *, const char *, int);
+extern int parse_opt_approxidate_cb(const struct option *, const char *, int);
+extern int parse_opt_verbosity_cb(const struct option *, const char *, int);
+
+#define OPT__VERBOSE(var)  OPT_BOOLEAN('v', "verbose", (var), "be verbose")
+#define OPT__QUIET(var)    OPT_BOOLEAN('q', "quiet",   (var), "be quiet")
+#define OPT__VERBOSITY(var) \
+	{ OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \
+	  PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \
+	{ OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \
+	  PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }
+#define OPT__DRY_RUN(var)  OPT_BOOLEAN('n', "dry-run", (var), "dry run")
+#define OPT__ABBREV(var)  \
+	{ OPTION_CALLBACK, 0, "abbrev", (var), "n", \
+	  "use <n> digits to display SHA-1s", \
+	  PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 }
+
+extern const char *parse_options_fix_filename(const char *prefix, const char *file);
+
+#endif
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
new file mode 100644
index 0000000..a501a40
--- /dev/null
+++ b/tools/perf/util/path.c
@@ -0,0 +1,353 @@
+/*
+ * I'm tired of doing "vsnprintf()" etc just to open a
+ * file, so here's a "return static buffer with printf"
+ * interface for paths.
+ *
+ * It's obviously not thread-safe. Sue me. But it's quite
+ * useful for doing things like
+ *
+ *   f = open(mkpath("%s/%s.perf", base, name), O_RDONLY);
+ *
+ * which is what it's designed for.
+ */
+#include "cache.h"
+
+static char bad_path[] = "/bad-path/";
+/*
+ * Two hacks:
+ */
+
+static char *get_perf_dir(void)
+{
+	return ".";
+}
+
+size_t strlcpy(char *dest, const char *src, size_t size)
+{
+	size_t ret = strlen(src);
+
+	if (size) {
+		size_t len = (ret >= size) ? size - 1 : ret;
+		memcpy(dest, src, len);
+		dest[len] = '\0';
+	}
+	return ret;
+}
+
+
+static char *get_pathname(void)
+{
+	static char pathname_array[4][PATH_MAX];
+	static int index;
+	return pathname_array[3 & ++index];
+}
+
+static char *cleanup_path(char *path)
+{
+	/* Clean it up */
+	if (!memcmp(path, "./", 2)) {
+		path += 2;
+		while (*path == '/')
+			path++;
+	}
+	return path;
+}
+
+char *mksnpath(char *buf, size_t n, const char *fmt, ...)
+{
+	va_list args;
+	unsigned len;
+
+	va_start(args, fmt);
+	len = vsnprintf(buf, n, fmt, args);
+	va_end(args);
+	if (len >= n) {
+		strlcpy(buf, bad_path, n);
+		return buf;
+	}
+	return cleanup_path(buf);
+}
+
+static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args)
+{
+	const char *perf_dir = get_perf_dir();
+	size_t len;
+
+	len = strlen(perf_dir);
+	if (n < len + 1)
+		goto bad;
+	memcpy(buf, perf_dir, len);
+	if (len && !is_dir_sep(perf_dir[len-1]))
+		buf[len++] = '/';
+	len += vsnprintf(buf + len, n - len, fmt, args);
+	if (len >= n)
+		goto bad;
+	return cleanup_path(buf);
+bad:
+	strlcpy(buf, bad_path, n);
+	return buf;
+}
+
+char *perf_snpath(char *buf, size_t n, const char *fmt, ...)
+{
+	va_list args;
+	va_start(args, fmt);
+	(void)perf_vsnpath(buf, n, fmt, args);
+	va_end(args);
+	return buf;
+}
+
+char *perf_pathdup(const char *fmt, ...)
+{
+	char path[PATH_MAX];
+	va_list args;
+	va_start(args, fmt);
+	(void)perf_vsnpath(path, sizeof(path), fmt, args);
+	va_end(args);
+	return xstrdup(path);
+}
+
+char *mkpath(const char *fmt, ...)
+{
+	va_list args;
+	unsigned len;
+	char *pathname = get_pathname();
+
+	va_start(args, fmt);
+	len = vsnprintf(pathname, PATH_MAX, fmt, args);
+	va_end(args);
+	if (len >= PATH_MAX)
+		return bad_path;
+	return cleanup_path(pathname);
+}
+
+char *perf_path(const char *fmt, ...)
+{
+	const char *perf_dir = get_perf_dir();
+	char *pathname = get_pathname();
+	va_list args;
+	unsigned len;
+
+	len = strlen(perf_dir);
+	if (len > PATH_MAX-100)
+		return bad_path;
+	memcpy(pathname, perf_dir, len);
+	if (len && perf_dir[len-1] != '/')
+		pathname[len++] = '/';
+	va_start(args, fmt);
+	len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args);
+	va_end(args);
+	if (len >= PATH_MAX)
+		return bad_path;
+	return cleanup_path(pathname);
+}
+
+
+/* perf_mkstemp() - create tmp file honoring TMPDIR variable */
+int perf_mkstemp(char *path, size_t len, const char *template)
+{
+	const char *tmp;
+	size_t n;
+
+	tmp = getenv("TMPDIR");
+	if (!tmp)
+		tmp = "/tmp";
+	n = snprintf(path, len, "%s/%s", tmp, template);
+	if (len <= n) {
+		errno = ENAMETOOLONG;
+		return -1;
+	}
+	return mkstemp(path);
+}
+
+
+const char *make_relative_path(const char *abs, const char *base)
+{
+	static char buf[PATH_MAX + 1];
+	int baselen;
+	if (!base)
+		return abs;
+	baselen = strlen(base);
+	if (prefixcmp(abs, base))
+		return abs;
+	if (abs[baselen] == '/')
+		baselen++;
+	else if (base[baselen - 1] != '/')
+		return abs;
+	strcpy(buf, abs + baselen);
+	return buf;
+}
+
+/*
+ * It is okay if dst == src, but they should not overlap otherwise.
+ *
+ * Performs the following normalizations on src, storing the result in dst:
+ * - Ensures that components are separated by '/' (Windows only)
+ * - Squashes sequences of '/'.
+ * - Removes "." components.
+ * - Removes ".." components, and the components the precede them.
+ * Returns failure (non-zero) if a ".." component appears as first path
+ * component anytime during the normalization. Otherwise, returns success (0).
+ *
+ * Note that this function is purely textual.  It does not follow symlinks,
+ * verify the existence of the path, or make any system calls.
+ */
+int normalize_path_copy(char *dst, const char *src)
+{
+	char *dst0;
+
+	if (has_dos_drive_prefix(src)) {
+		*dst++ = *src++;
+		*dst++ = *src++;
+	}
+	dst0 = dst;
+
+	if (is_dir_sep(*src)) {
+		*dst++ = '/';
+		while (is_dir_sep(*src))
+			src++;
+	}
+
+	for (;;) {
+		char c = *src;
+
+		/*
+		 * A path component that begins with . could be
+		 * special:
+		 * (1) "." and ends   -- ignore and terminate.
+		 * (2) "./"           -- ignore them, eat slash and continue.
+		 * (3) ".." and ends  -- strip one and terminate.
+		 * (4) "../"          -- strip one, eat slash and continue.
+		 */
+		if (c == '.') {
+			if (!src[1]) {
+				/* (1) */
+				src++;
+			} else if (is_dir_sep(src[1])) {
+				/* (2) */
+				src += 2;
+				while (is_dir_sep(*src))
+					src++;
+				continue;
+			} else if (src[1] == '.') {
+				if (!src[2]) {
+					/* (3) */
+					src += 2;
+					goto up_one;
+				} else if (is_dir_sep(src[2])) {
+					/* (4) */
+					src += 3;
+					while (is_dir_sep(*src))
+						src++;
+					goto up_one;
+				}
+			}
+		}
+
+		/* copy up to the next '/', and eat all '/' */
+		while ((c = *src++) != '\0' && !is_dir_sep(c))
+			*dst++ = c;
+		if (is_dir_sep(c)) {
+			*dst++ = '/';
+			while (is_dir_sep(c))
+				c = *src++;
+			src--;
+		} else if (!c)
+			break;
+		continue;
+
+	up_one:
+		/*
+		 * dst0..dst is prefix portion, and dst[-1] is '/';
+		 * go up one level.
+		 */
+		dst--;	/* go to trailing '/' */
+		if (dst <= dst0)
+			return -1;
+		/* Windows: dst[-1] cannot be backslash anymore */
+		while (dst0 < dst && dst[-1] != '/')
+			dst--;
+	}
+	*dst = '\0';
+	return 0;
+}
+
+/*
+ * path = Canonical absolute path
+ * prefix_list = Colon-separated list of absolute paths
+ *
+ * Determines, for each path in prefix_list, whether the "prefix" really
+ * is an ancestor directory of path.  Returns the length of the longest
+ * ancestor directory, excluding any trailing slashes, or -1 if no prefix
+ * is an ancestor.  (Note that this means 0 is returned if prefix_list is
+ * "/".) "/foo" is not considered an ancestor of "/foobar".  Directories
+ * are not considered to be their own ancestors.  path must be in a
+ * canonical form: empty components, or "." or ".." components are not
+ * allowed.  prefix_list may be null, which is like "".
+ */
+int longest_ancestor_length(const char *path, const char *prefix_list)
+{
+	char buf[PATH_MAX+1];
+	const char *ceil, *colon;
+	int len, max_len = -1;
+
+	if (prefix_list == NULL || !strcmp(path, "/"))
+		return -1;
+
+	for (colon = ceil = prefix_list; *colon; ceil = colon+1) {
+		for (colon = ceil; *colon && *colon != PATH_SEP; colon++);
+		len = colon - ceil;
+		if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil))
+			continue;
+		strlcpy(buf, ceil, len+1);
+		if (normalize_path_copy(buf, buf) < 0)
+			continue;
+		len = strlen(buf);
+		if (len > 0 && buf[len-1] == '/')
+			buf[--len] = '\0';
+
+		if (!strncmp(path, buf, len) &&
+		    path[len] == '/' &&
+		    len > max_len) {
+			max_len = len;
+		}
+	}
+
+	return max_len;
+}
+
+/* strip arbitrary amount of directory separators at end of path */
+static inline int chomp_trailing_dir_sep(const char *path, int len)
+{
+	while (len && is_dir_sep(path[len - 1]))
+		len--;
+	return len;
+}
+
+/*
+ * If path ends with suffix (complete path components), returns the
+ * part before suffix (sans trailing directory separators).
+ * Otherwise returns NULL.
+ */
+char *strip_path_suffix(const char *path, const char *suffix)
+{
+	int path_len = strlen(path), suffix_len = strlen(suffix);
+
+	while (suffix_len) {
+		if (!path_len)
+			return NULL;
+
+		if (is_dir_sep(path[path_len - 1])) {
+			if (!is_dir_sep(suffix[suffix_len - 1]))
+				return NULL;
+			path_len = chomp_trailing_dir_sep(path, path_len);
+			suffix_len = chomp_trailing_dir_sep(suffix, suffix_len);
+		}
+		else if (path[--path_len] != suffix[--suffix_len])
+			return NULL;
+	}
+
+	if (path_len && !is_dir_sep(path[path_len - 1]))
+		return NULL;
+	return xstrndup(path, chomp_trailing_dir_sep(path, path_len));
+}
diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c
new file mode 100644
index 0000000..f18c521
--- /dev/null
+++ b/tools/perf/util/quote.c
@@ -0,0 +1,481 @@
+#include "cache.h"
+#include "quote.h"
+
+int quote_path_fully = 1;
+
+/* Help to copy the thing properly quoted for the shell safety.
+ * any single quote is replaced with '\'', any exclamation point
+ * is replaced with '\!', and the whole thing is enclosed in a
+ *
+ * E.g.
+ *  original     sq_quote     result
+ *  name     ==> name      ==> 'name'
+ *  a b      ==> a b       ==> 'a b'
+ *  a'b      ==> a'\''b    ==> 'a'\''b'
+ *  a!b      ==> a'\!'b    ==> 'a'\!'b'
+ */
+static inline int need_bs_quote(char c)
+{
+	return (c == '\'' || c == '!');
+}
+
+void sq_quote_buf(struct strbuf *dst, const char *src)
+{
+	char *to_free = NULL;
+
+	if (dst->buf == src)
+		to_free = strbuf_detach(dst, NULL);
+
+	strbuf_addch(dst, '\'');
+	while (*src) {
+		size_t len = strcspn(src, "'!");
+		strbuf_add(dst, src, len);
+		src += len;
+		while (need_bs_quote(*src)) {
+			strbuf_addstr(dst, "'\\");
+			strbuf_addch(dst, *src++);
+			strbuf_addch(dst, '\'');
+		}
+	}
+	strbuf_addch(dst, '\'');
+	free(to_free);
+}
+
+void sq_quote_print(FILE *stream, const char *src)
+{
+	char c;
+
+	fputc('\'', stream);
+	while ((c = *src++)) {
+		if (need_bs_quote(c)) {
+			fputs("'\\", stream);
+			fputc(c, stream);
+			fputc('\'', stream);
+		} else {
+			fputc(c, stream);
+		}
+	}
+	fputc('\'', stream);
+}
+
+void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen)
+{
+	int i;
+
+	/* Copy into destination buffer. */
+	strbuf_grow(dst, 255);
+	for (i = 0; argv[i]; ++i) {
+		strbuf_addch(dst, ' ');
+		sq_quote_buf(dst, argv[i]);
+		if (maxlen && dst->len > maxlen)
+			die("Too many or long arguments");
+	}
+}
+
+char *sq_dequote_step(char *arg, char **next)
+{
+	char *dst = arg;
+	char *src = arg;
+	char c;
+
+	if (*src != '\'')
+		return NULL;
+	for (;;) {
+		c = *++src;
+		if (!c)
+			return NULL;
+		if (c != '\'') {
+			*dst++ = c;
+			continue;
+		}
+		/* We stepped out of sq */
+		switch (*++src) {
+		case '\0':
+			*dst = 0;
+			if (next)
+				*next = NULL;
+			return arg;
+		case '\\':
+			c = *++src;
+			if (need_bs_quote(c) && *++src == '\'') {
+				*dst++ = c;
+				continue;
+			}
+		/* Fallthrough */
+		default:
+			if (!next || !isspace(*src))
+				return NULL;
+			do {
+				c = *++src;
+			} while (isspace(c));
+			*dst = 0;
+			*next = src;
+			return arg;
+		}
+	}
+}
+
+char *sq_dequote(char *arg)
+{
+	return sq_dequote_step(arg, NULL);
+}
+
+int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc)
+{
+	char *next = arg;
+
+	if (!*arg)
+		return 0;
+	do {
+		char *dequoted = sq_dequote_step(next, &next);
+		if (!dequoted)
+			return -1;
+		ALLOC_GROW(*argv, *nr + 1, *alloc);
+		(*argv)[(*nr)++] = dequoted;
+	} while (next);
+
+	return 0;
+}
+
+/* 1 means: quote as octal
+ * 0 means: quote as octal if (quote_path_fully)
+ * -1 means: never quote
+ * c: quote as "\\c"
+ */
+#define X8(x)   x, x, x, x, x, x, x, x
+#define X16(x)  X8(x), X8(x)
+static signed char const sq_lookup[256] = {
+	/*           0    1    2    3    4    5    6    7 */
+	/* 0x00 */   1,   1,   1,   1,   1,   1,   1, 'a',
+	/* 0x08 */ 'b', 't', 'n', 'v', 'f', 'r',   1,   1,
+	/* 0x10 */ X16(1),
+	/* 0x20 */  -1,  -1, '"',  -1,  -1,  -1,  -1,  -1,
+	/* 0x28 */ X16(-1), X16(-1), X16(-1),
+	/* 0x58 */  -1,  -1,  -1,  -1,'\\',  -1,  -1,  -1,
+	/* 0x60 */ X16(-1), X8(-1),
+	/* 0x78 */  -1,  -1,  -1,  -1,  -1,  -1,  -1,   1,
+	/* 0x80 */ /* set to 0 */
+};
+
+static inline int sq_must_quote(char c)
+{
+	return sq_lookup[(unsigned char)c] + quote_path_fully > 0;
+}
+
+/* returns the longest prefix not needing a quote up to maxlen if positive.
+   This stops at the first \0 because it's marked as a character needing an
+   escape */
+static size_t next_quote_pos(const char *s, ssize_t maxlen)
+{
+	size_t len;
+	if (maxlen < 0) {
+		for (len = 0; !sq_must_quote(s[len]); len++);
+	} else {
+		for (len = 0; len < maxlen && !sq_must_quote(s[len]); len++);
+	}
+	return len;
+}
+
+/*
+ * C-style name quoting.
+ *
+ * (1) if sb and fp are both NULL, inspect the input name and counts the
+ *     number of bytes that are needed to hold c_style quoted version of name,
+ *     counting the double quotes around it but not terminating NUL, and
+ *     returns it.
+ *     However, if name does not need c_style quoting, it returns 0.
+ *
+ * (2) if sb or fp are not NULL, it emits the c_style quoted version
+ *     of name, enclosed with double quotes if asked and needed only.
+ *     Return value is the same as in (1).
+ */
+static size_t quote_c_style_counted(const char *name, ssize_t maxlen,
+                                    struct strbuf *sb, FILE *fp, int no_dq)
+{
+#undef EMIT
+#define EMIT(c)                                 \
+	do {                                        \
+		if (sb) strbuf_addch(sb, (c));          \
+		if (fp) fputc((c), fp);                 \
+		count++;                                \
+	} while (0)
+#define EMITBUF(s, l)                           \
+	do {                                        \
+		int __ret;				\
+		if (sb) strbuf_add(sb, (s), (l));       \
+		if (fp) __ret = fwrite((s), (l), 1, fp);        \
+		count += (l);                           \
+	} while (0)
+
+	size_t len, count = 0;
+	const char *p = name;
+
+	for (;;) {
+		int ch;
+
+		len = next_quote_pos(p, maxlen);
+		if (len == maxlen || !p[len])
+			break;
+
+		if (!no_dq && p == name)
+			EMIT('"');
+
+		EMITBUF(p, len);
+		EMIT('\\');
+		p += len;
+		ch = (unsigned char)*p++;
+		if (sq_lookup[ch] >= ' ') {
+			EMIT(sq_lookup[ch]);
+		} else {
+			EMIT(((ch >> 6) & 03) + '0');
+			EMIT(((ch >> 3) & 07) + '0');
+			EMIT(((ch >> 0) & 07) + '0');
+		}
+	}
+
+	EMITBUF(p, len);
+	if (p == name)   /* no ending quote needed */
+		return 0;
+
+	if (!no_dq)
+		EMIT('"');
+	return count;
+}
+
+size_t quote_c_style(const char *name, struct strbuf *sb, FILE *fp, int nodq)
+{
+	return quote_c_style_counted(name, -1, sb, fp, nodq);
+}
+
+void quote_two_c_style(struct strbuf *sb, const char *prefix, const char *path, int nodq)
+{
+	if (quote_c_style(prefix, NULL, NULL, 0) ||
+	    quote_c_style(path, NULL, NULL, 0)) {
+		if (!nodq)
+			strbuf_addch(sb, '"');
+		quote_c_style(prefix, sb, NULL, 1);
+		quote_c_style(path, sb, NULL, 1);
+		if (!nodq)
+			strbuf_addch(sb, '"');
+	} else {
+		strbuf_addstr(sb, prefix);
+		strbuf_addstr(sb, path);
+	}
+}
+
+void write_name_quoted(const char *name, FILE *fp, int terminator)
+{
+	if (terminator) {
+		quote_c_style(name, NULL, fp, 0);
+	} else {
+		fputs(name, fp);
+	}
+	fputc(terminator, fp);
+}
+
+extern void write_name_quotedpfx(const char *pfx, size_t pfxlen,
+                                 const char *name, FILE *fp, int terminator)
+{
+	int needquote = 0;
+
+	if (terminator) {
+		needquote = next_quote_pos(pfx, pfxlen) < pfxlen
+			|| name[next_quote_pos(name, -1)];
+	}
+	if (needquote) {
+		fputc('"', fp);
+		quote_c_style_counted(pfx, pfxlen, NULL, fp, 1);
+		quote_c_style(name, NULL, fp, 1);
+		fputc('"', fp);
+	} else {
+		int ret;
+
+		ret = fwrite(pfx, pfxlen, 1, fp);
+		fputs(name, fp);
+	}
+	fputc(terminator, fp);
+}
+
+/* quote path as relative to the given prefix */
+char *quote_path_relative(const char *in, int len,
+			  struct strbuf *out, const char *prefix)
+{
+	int needquote;
+
+	if (len < 0)
+		len = strlen(in);
+
+	/* "../" prefix itself does not need quoting, but "in" might. */
+	needquote = next_quote_pos(in, len) < len;
+	strbuf_setlen(out, 0);
+	strbuf_grow(out, len);
+
+	if (needquote)
+		strbuf_addch(out, '"');
+	if (prefix) {
+		int off = 0;
+		while (prefix[off] && off < len && prefix[off] == in[off])
+			if (prefix[off] == '/') {
+				prefix += off + 1;
+				in += off + 1;
+				len -= off + 1;
+				off = 0;
+			} else
+				off++;
+
+		for (; *prefix; prefix++)
+			if (*prefix == '/')
+				strbuf_addstr(out, "../");
+	}
+
+	quote_c_style_counted (in, len, out, NULL, 1);
+
+	if (needquote)
+		strbuf_addch(out, '"');
+	if (!out->len)
+		strbuf_addstr(out, "./");
+
+	return out->buf;
+}
+
+/*
+ * C-style name unquoting.
+ *
+ * Quoted should point at the opening double quote.
+ * + Returns 0 if it was able to unquote the string properly, and appends the
+ *   result in the strbuf `sb'.
+ * + Returns -1 in case of error, and doesn't touch the strbuf. Though note
+ *   that this function will allocate memory in the strbuf, so calling
+ *   strbuf_release is mandatory whichever result unquote_c_style returns.
+ *
+ * Updates endp pointer to point at one past the ending double quote if given.
+ */
+int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp)
+{
+	size_t oldlen = sb->len, len;
+	int ch, ac;
+
+	if (*quoted++ != '"')
+		return -1;
+
+	for (;;) {
+		len = strcspn(quoted, "\"\\");
+		strbuf_add(sb, quoted, len);
+		quoted += len;
+
+		switch (*quoted++) {
+		  case '"':
+			if (endp)
+				*endp = quoted;
+			return 0;
+		  case '\\':
+			break;
+		  default:
+			goto error;
+		}
+
+		switch ((ch = *quoted++)) {
+		case 'a': ch = '\a'; break;
+		case 'b': ch = '\b'; break;
+		case 'f': ch = '\f'; break;
+		case 'n': ch = '\n'; break;
+		case 'r': ch = '\r'; break;
+		case 't': ch = '\t'; break;
+		case 'v': ch = '\v'; break;
+
+		case '\\': case '"':
+			break; /* verbatim */
+
+		/* octal values with first digit over 4 overflow */
+		case '0': case '1': case '2': case '3':
+					ac = ((ch - '0') << 6);
+			if ((ch = *quoted++) < '0' || '7' < ch)
+				goto error;
+					ac |= ((ch - '0') << 3);
+			if ((ch = *quoted++) < '0' || '7' < ch)
+				goto error;
+					ac |= (ch - '0');
+					ch = ac;
+					break;
+				default:
+			goto error;
+			}
+		strbuf_addch(sb, ch);
+		}
+
+  error:
+	strbuf_setlen(sb, oldlen);
+	return -1;
+}
+
+/* quoting as a string literal for other languages */
+
+void perl_quote_print(FILE *stream, const char *src)
+{
+	const char sq = '\'';
+	const char bq = '\\';
+	char c;
+
+	fputc(sq, stream);
+	while ((c = *src++)) {
+		if (c == sq || c == bq)
+			fputc(bq, stream);
+		fputc(c, stream);
+	}
+	fputc(sq, stream);
+}
+
+void python_quote_print(FILE *stream, const char *src)
+{
+	const char sq = '\'';
+	const char bq = '\\';
+	const char nl = '\n';
+	char c;
+
+	fputc(sq, stream);
+	while ((c = *src++)) {
+		if (c == nl) {
+			fputc(bq, stream);
+			fputc('n', stream);
+			continue;
+		}
+		if (c == sq || c == bq)
+			fputc(bq, stream);
+		fputc(c, stream);
+	}
+	fputc(sq, stream);
+}
+
+void tcl_quote_print(FILE *stream, const char *src)
+{
+	char c;
+
+	fputc('"', stream);
+	while ((c = *src++)) {
+		switch (c) {
+		case '[': case ']':
+		case '{': case '}':
+		case '$': case '\\': case '"':
+			fputc('\\', stream);
+		default:
+			fputc(c, stream);
+			break;
+		case '\f':
+			fputs("\\f", stream);
+			break;
+		case '\r':
+			fputs("\\r", stream);
+			break;
+		case '\n':
+			fputs("\\n", stream);
+			break;
+		case '\t':
+			fputs("\\t", stream);
+			break;
+		case '\v':
+			fputs("\\v", stream);
+			break;
+		}
+	}
+	fputc('"', stream);
+}
diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h
new file mode 100644
index 0000000..5dfad89
--- /dev/null
+++ b/tools/perf/util/quote.h
@@ -0,0 +1,68 @@
+#ifndef QUOTE_H
+#define QUOTE_H
+
+#include <stddef.h>
+#include <stdio.h>
+
+/* Help to copy the thing properly quoted for the shell safety.
+ * any single quote is replaced with '\'', any exclamation point
+ * is replaced with '\!', and the whole thing is enclosed in a
+ * single quote pair.
+ *
+ * For example, if you are passing the result to system() as an
+ * argument:
+ *
+ * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1))
+ *
+ * would be appropriate.  If the system() is going to call ssh to
+ * run the command on the other side:
+ *
+ * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1));
+ * sprintf(rcmd, "ssh %s %s", sq_util/quote.host), sq_quote(cmd));
+ *
+ * Note that the above examples leak memory!  Remember to free result from
+ * sq_quote() in a real application.
+ *
+ * sq_quote_buf() writes to an existing buffer of specified size; it
+ * will return the number of characters that would have been written
+ * excluding the final null regardless of the buffer size.
+ */
+
+extern void sq_quote_print(FILE *stream, const char *src);
+
+extern void sq_quote_buf(struct strbuf *, const char *src);
+extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
+
+/* This unwraps what sq_quote() produces in place, but returns
+ * NULL if the input does not look like what sq_quote would have
+ * produced.
+ */
+extern char *sq_dequote(char *);
+
+/*
+ * Same as the above, but can be used to unwrap many arguments in the
+ * same string separated by space. "next" is changed to point to the
+ * next argument that should be passed as first parameter. When there
+ * is no more argument to be dequoted, "next" is updated to point to NULL.
+ */
+extern char *sq_dequote_step(char *arg, char **next);
+extern int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc);
+
+extern int unquote_c_style(struct strbuf *, const char *quoted, const char **endp);
+extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq);
+extern void quote_two_c_style(struct strbuf *, const char *, const char *, int);
+
+extern void write_name_quoted(const char *name, FILE *, int terminator);
+extern void write_name_quotedpfx(const char *pfx, size_t pfxlen,
+                                 const char *name, FILE *, int terminator);
+
+/* quote path as relative to the given prefix */
+char *quote_path_relative(const char *in, int len,
+			  struct strbuf *out, const char *prefix);
+
+/* quoting as a string literal for other languages */
+extern void perl_quote_print(FILE *stream, const char *src);
+extern void python_quote_print(FILE *stream, const char *src);
+extern void tcl_quote_print(FILE *stream, const char *src);
+
+#endif
diff --git a/tools/perf/util/rbtree.c b/tools/perf/util/rbtree.c
new file mode 100644
index 0000000..b15ba9c
--- /dev/null
+++ b/tools/perf/util/rbtree.c
@@ -0,0 +1,383 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  (C) 2002  David Woodhouse <dwmw2@infradead.org>
+  
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/lib/rbtree.c
+*/
+
+#include "rbtree.h"
+
+static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *right = node->rb_right;
+	struct rb_node *parent = rb_parent(node);
+
+	if ((node->rb_right = right->rb_left))
+		rb_set_parent(right->rb_left, node);
+	right->rb_left = node;
+
+	rb_set_parent(right, parent);
+
+	if (parent)
+	{
+		if (node == parent->rb_left)
+			parent->rb_left = right;
+		else
+			parent->rb_right = right;
+	}
+	else
+		root->rb_node = right;
+	rb_set_parent(node, right);
+}
+
+static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *left = node->rb_left;
+	struct rb_node *parent = rb_parent(node);
+
+	if ((node->rb_left = left->rb_right))
+		rb_set_parent(left->rb_right, node);
+	left->rb_right = node;
+
+	rb_set_parent(left, parent);
+
+	if (parent)
+	{
+		if (node == parent->rb_right)
+			parent->rb_right = left;
+		else
+			parent->rb_left = left;
+	}
+	else
+		root->rb_node = left;
+	rb_set_parent(node, left);
+}
+
+void rb_insert_color(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *parent, *gparent;
+
+	while ((parent = rb_parent(node)) && rb_is_red(parent))
+	{
+		gparent = rb_parent(parent);
+
+		if (parent == gparent->rb_left)
+		{
+			{
+				register struct rb_node *uncle = gparent->rb_right;
+				if (uncle && rb_is_red(uncle))
+				{
+					rb_set_black(uncle);
+					rb_set_black(parent);
+					rb_set_red(gparent);
+					node = gparent;
+					continue;
+				}
+			}
+
+			if (parent->rb_right == node)
+			{
+				register struct rb_node *tmp;
+				__rb_rotate_left(parent, root);
+				tmp = parent;
+				parent = node;
+				node = tmp;
+			}
+
+			rb_set_black(parent);
+			rb_set_red(gparent);
+			__rb_rotate_right(gparent, root);
+		} else {
+			{
+				register struct rb_node *uncle = gparent->rb_left;
+				if (uncle && rb_is_red(uncle))
+				{
+					rb_set_black(uncle);
+					rb_set_black(parent);
+					rb_set_red(gparent);
+					node = gparent;
+					continue;
+				}
+			}
+
+			if (parent->rb_left == node)
+			{
+				register struct rb_node *tmp;
+				__rb_rotate_right(parent, root);
+				tmp = parent;
+				parent = node;
+				node = tmp;
+			}
+
+			rb_set_black(parent);
+			rb_set_red(gparent);
+			__rb_rotate_left(gparent, root);
+		}
+	}
+
+	rb_set_black(root->rb_node);
+}
+
+static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
+			     struct rb_root *root)
+{
+	struct rb_node *other;
+
+	while ((!node || rb_is_black(node)) && node != root->rb_node)
+	{
+		if (parent->rb_left == node)
+		{
+			other = parent->rb_right;
+			if (rb_is_red(other))
+			{
+				rb_set_black(other);
+				rb_set_red(parent);
+				__rb_rotate_left(parent, root);
+				other = parent->rb_right;
+			}
+			if ((!other->rb_left || rb_is_black(other->rb_left)) &&
+			    (!other->rb_right || rb_is_black(other->rb_right)))
+			{
+				rb_set_red(other);
+				node = parent;
+				parent = rb_parent(node);
+			}
+			else
+			{
+				if (!other->rb_right || rb_is_black(other->rb_right))
+				{
+					rb_set_black(other->rb_left);
+					rb_set_red(other);
+					__rb_rotate_right(other, root);
+					other = parent->rb_right;
+				}
+				rb_set_color(other, rb_color(parent));
+				rb_set_black(parent);
+				rb_set_black(other->rb_right);
+				__rb_rotate_left(parent, root);
+				node = root->rb_node;
+				break;
+			}
+		}
+		else
+		{
+			other = parent->rb_left;
+			if (rb_is_red(other))
+			{
+				rb_set_black(other);
+				rb_set_red(parent);
+				__rb_rotate_right(parent, root);
+				other = parent->rb_left;
+			}
+			if ((!other->rb_left || rb_is_black(other->rb_left)) &&
+			    (!other->rb_right || rb_is_black(other->rb_right)))
+			{
+				rb_set_red(other);
+				node = parent;
+				parent = rb_parent(node);
+			}
+			else
+			{
+				if (!other->rb_left || rb_is_black(other->rb_left))
+				{
+					rb_set_black(other->rb_right);
+					rb_set_red(other);
+					__rb_rotate_left(other, root);
+					other = parent->rb_left;
+				}
+				rb_set_color(other, rb_color(parent));
+				rb_set_black(parent);
+				rb_set_black(other->rb_left);
+				__rb_rotate_right(parent, root);
+				node = root->rb_node;
+				break;
+			}
+		}
+	}
+	if (node)
+		rb_set_black(node);
+}
+
+void rb_erase(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *child, *parent;
+	int color;
+
+	if (!node->rb_left)
+		child = node->rb_right;
+	else if (!node->rb_right)
+		child = node->rb_left;
+	else
+	{
+		struct rb_node *old = node, *left;
+
+		node = node->rb_right;
+		while ((left = node->rb_left) != NULL)
+			node = left;
+		child = node->rb_right;
+		parent = rb_parent(node);
+		color = rb_color(node);
+
+		if (child)
+			rb_set_parent(child, parent);
+		if (parent == old) {
+			parent->rb_right = child;
+			parent = node;
+		} else
+			parent->rb_left = child;
+
+		node->rb_parent_color = old->rb_parent_color;
+		node->rb_right = old->rb_right;
+		node->rb_left = old->rb_left;
+
+		if (rb_parent(old))
+		{
+			if (rb_parent(old)->rb_left == old)
+				rb_parent(old)->rb_left = node;
+			else
+				rb_parent(old)->rb_right = node;
+		} else
+			root->rb_node = node;
+
+		rb_set_parent(old->rb_left, node);
+		if (old->rb_right)
+			rb_set_parent(old->rb_right, node);
+		goto color;
+	}
+
+	parent = rb_parent(node);
+	color = rb_color(node);
+
+	if (child)
+		rb_set_parent(child, parent);
+	if (parent)
+	{
+		if (parent->rb_left == node)
+			parent->rb_left = child;
+		else
+			parent->rb_right = child;
+	}
+	else
+		root->rb_node = child;
+
+ color:
+	if (color == RB_BLACK)
+		__rb_erase_color(child, parent, root);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+struct rb_node *rb_first(const struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_left)
+		n = n->rb_left;
+	return n;
+}
+
+struct rb_node *rb_last(const struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_right)
+		n = n->rb_right;
+	return n;
+}
+
+struct rb_node *rb_next(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (rb_parent(node) == node)
+		return NULL;
+
+	/* If we have a right-hand child, go down and then left as far
+	   as we can. */
+	if (node->rb_right) {
+		node = node->rb_right; 
+		while (node->rb_left)
+			node=node->rb_left;
+		return (struct rb_node *)node;
+	}
+
+	/* No right-hand children.  Everything down and left is
+	   smaller than us, so any 'next' node must be in the general
+	   direction of our parent. Go up the tree; any time the
+	   ancestor is a right-hand child of its parent, keep going
+	   up. First time it's a left-hand child of its parent, said
+	   parent is our 'next' node. */
+	while ((parent = rb_parent(node)) && node == parent->rb_right)
+		node = parent;
+
+	return parent;
+}
+
+struct rb_node *rb_prev(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (rb_parent(node) == node)
+		return NULL;
+
+	/* If we have a left-hand child, go down and then right as far
+	   as we can. */
+	if (node->rb_left) {
+		node = node->rb_left; 
+		while (node->rb_right)
+			node=node->rb_right;
+		return (struct rb_node *)node;
+	}
+
+	/* No left-hand children. Go up till we find an ancestor which
+	   is a right-hand child of its parent */
+	while ((parent = rb_parent(node)) && node == parent->rb_left)
+		node = parent;
+
+	return parent;
+}
+
+void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+		     struct rb_root *root)
+{
+	struct rb_node *parent = rb_parent(victim);
+
+	/* Set the surrounding nodes to point to the replacement */
+	if (parent) {
+		if (victim == parent->rb_left)
+			parent->rb_left = new;
+		else
+			parent->rb_right = new;
+	} else {
+		root->rb_node = new;
+	}
+	if (victim->rb_left)
+		rb_set_parent(victim->rb_left, new);
+	if (victim->rb_right)
+		rb_set_parent(victim->rb_right, new);
+
+	/* Copy the pointers/colour from the victim to the replacement */
+	*new = *victim;
+}
diff --git a/tools/perf/util/rbtree.h b/tools/perf/util/rbtree.h
new file mode 100644
index 0000000..6bdc488
--- /dev/null
+++ b/tools/perf/util/rbtree.h
@@ -0,0 +1,171 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/include/linux/rbtree.h
+
+  To use rbtrees you'll have to implement your own insert and search cores.
+  This will avoid us to use callbacks and to drop drammatically performances.
+  I know it's not the cleaner way,  but in C (not in C++) to get
+  performances and genericity...
+
+  Some example of insert and search follows here. The search is a plain
+  normal search over an ordered tree. The insert instead must be implemented
+  int two steps: as first thing the code must insert the element in
+  order as a red leaf in the tree, then the support library function
+  rb_insert_color() must be called. Such function will do the
+  not trivial work to rebalance the rbtree if necessary.
+
+-----------------------------------------------------------------------
+static inline struct page * rb_search_page_cache(struct inode * inode,
+						 unsigned long offset)
+{
+	struct rb_node * n = inode->i_rb_page_cache.rb_node;
+	struct page * page;
+
+	while (n)
+	{
+		page = rb_entry(n, struct page, rb_page_cache);
+
+		if (offset < page->offset)
+			n = n->rb_left;
+		else if (offset > page->offset)
+			n = n->rb_right;
+		else
+			return page;
+	}
+	return NULL;
+}
+
+static inline struct page * __rb_insert_page_cache(struct inode * inode,
+						   unsigned long offset,
+						   struct rb_node * node)
+{
+	struct rb_node ** p = &inode->i_rb_page_cache.rb_node;
+	struct rb_node * parent = NULL;
+	struct page * page;
+
+	while (*p)
+	{
+		parent = *p;
+		page = rb_entry(parent, struct page, rb_page_cache);
+
+		if (offset < page->offset)
+			p = &(*p)->rb_left;
+		else if (offset > page->offset)
+			p = &(*p)->rb_right;
+		else
+			return page;
+	}
+
+	rb_link_node(node, parent, p);
+
+	return NULL;
+}
+
+static inline struct page * rb_insert_page_cache(struct inode * inode,
+						 unsigned long offset,
+						 struct rb_node * node)
+{
+	struct page * ret;
+	if ((ret = __rb_insert_page_cache(inode, offset, node)))
+		goto out;
+	rb_insert_color(node, &inode->i_rb_page_cache);
+ out:
+	return ret;
+}
+-----------------------------------------------------------------------
+*/
+
+#ifndef	_LINUX_RBTREE_H
+#define	_LINUX_RBTREE_H
+
+#include <stddef.h>
+
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr:	the pointer to the member.
+ * @type:	the type of the container struct this is embedded in.
+ * @member:	the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({			\
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+struct rb_node
+{
+	unsigned long  rb_parent_color;
+#define	RB_RED		0
+#define	RB_BLACK	1
+	struct rb_node *rb_right;
+	struct rb_node *rb_left;
+} __attribute__((aligned(sizeof(long))));
+    /* The alignment might seem pointless, but allegedly CRIS needs it */
+
+struct rb_root
+{
+	struct rb_node *rb_node;
+};
+
+
+#define rb_parent(r)   ((struct rb_node *)((r)->rb_parent_color & ~3))
+#define rb_color(r)   ((r)->rb_parent_color & 1)
+#define rb_is_red(r)   (!rb_color(r))
+#define rb_is_black(r) rb_color(r)
+#define rb_set_red(r)  do { (r)->rb_parent_color &= ~1; } while (0)
+#define rb_set_black(r)  do { (r)->rb_parent_color |= 1; } while (0)
+
+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
+{
+	rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p;
+}
+static inline void rb_set_color(struct rb_node *rb, int color)
+{
+	rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
+}
+
+#define RB_ROOT	(struct rb_root) { NULL, }
+#define	rb_entry(ptr, type, member) container_of(ptr, type, member)
+
+#define RB_EMPTY_ROOT(root)	((root)->rb_node == NULL)
+#define RB_EMPTY_NODE(node)	(rb_parent(node) == node)
+#define RB_CLEAR_NODE(node)	(rb_set_parent(node, node))
+
+extern void rb_insert_color(struct rb_node *, struct rb_root *);
+extern void rb_erase(struct rb_node *, struct rb_root *);
+
+/* Find logical next and previous nodes in a tree */
+extern struct rb_node *rb_next(const struct rb_node *);
+extern struct rb_node *rb_prev(const struct rb_node *);
+extern struct rb_node *rb_first(const struct rb_root *);
+extern struct rb_node *rb_last(const struct rb_root *);
+
+/* Fast replacement of a single node without remove/rebalance/add/rebalance */
+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, 
+			    struct rb_root *root);
+
+static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
+				struct rb_node ** rb_link)
+{
+	node->rb_parent_color = (unsigned long )parent;
+	node->rb_left = node->rb_right = NULL;
+
+	*rb_link = node;
+}
+
+#endif	/* _LINUX_RBTREE_H */
diff --git a/tools/perf/util/run-command.c b/tools/perf/util/run-command.c
new file mode 100644
index 0000000..b2f5e85
--- /dev/null
+++ b/tools/perf/util/run-command.c
@@ -0,0 +1,395 @@
+#include "cache.h"
+#include "run-command.h"
+#include "exec_cmd.h"
+
+static inline void close_pair(int fd[2])
+{
+	close(fd[0]);
+	close(fd[1]);
+}
+
+static inline void dup_devnull(int to)
+{
+	int fd = open("/dev/null", O_RDWR);
+	dup2(fd, to);
+	close(fd);
+}
+
+int start_command(struct child_process *cmd)
+{
+	int need_in, need_out, need_err;
+	int fdin[2], fdout[2], fderr[2];
+
+	/*
+	 * In case of errors we must keep the promise to close FDs
+	 * that have been passed in via ->in and ->out.
+	 */
+
+	need_in = !cmd->no_stdin && cmd->in < 0;
+	if (need_in) {
+		if (pipe(fdin) < 0) {
+			if (cmd->out > 0)
+				close(cmd->out);
+			return -ERR_RUN_COMMAND_PIPE;
+		}
+		cmd->in = fdin[1];
+	}
+
+	need_out = !cmd->no_stdout
+		&& !cmd->stdout_to_stderr
+		&& cmd->out < 0;
+	if (need_out) {
+		if (pipe(fdout) < 0) {
+			if (need_in)
+				close_pair(fdin);
+			else if (cmd->in)
+				close(cmd->in);
+			return -ERR_RUN_COMMAND_PIPE;
+		}
+		cmd->out = fdout[0];
+	}
+
+	need_err = !cmd->no_stderr && cmd->err < 0;
+	if (need_err) {
+		if (pipe(fderr) < 0) {
+			if (need_in)
+				close_pair(fdin);
+			else if (cmd->in)
+				close(cmd->in);
+			if (need_out)
+				close_pair(fdout);
+			else if (cmd->out)
+				close(cmd->out);
+			return -ERR_RUN_COMMAND_PIPE;
+		}
+		cmd->err = fderr[0];
+	}
+
+#ifndef __MINGW32__
+	fflush(NULL);
+	cmd->pid = fork();
+	if (!cmd->pid) {
+		if (cmd->no_stdin)
+			dup_devnull(0);
+		else if (need_in) {
+			dup2(fdin[0], 0);
+			close_pair(fdin);
+		} else if (cmd->in) {
+			dup2(cmd->in, 0);
+			close(cmd->in);
+		}
+
+		if (cmd->no_stderr)
+			dup_devnull(2);
+		else if (need_err) {
+			dup2(fderr[1], 2);
+			close_pair(fderr);
+		}
+
+		if (cmd->no_stdout)
+			dup_devnull(1);
+		else if (cmd->stdout_to_stderr)
+			dup2(2, 1);
+		else if (need_out) {
+			dup2(fdout[1], 1);
+			close_pair(fdout);
+		} else if (cmd->out > 1) {
+			dup2(cmd->out, 1);
+			close(cmd->out);
+		}
+
+		if (cmd->dir && chdir(cmd->dir))
+			die("exec %s: cd to %s failed (%s)", cmd->argv[0],
+			    cmd->dir, strerror(errno));
+		if (cmd->env) {
+			for (; *cmd->env; cmd->env++) {
+				if (strchr(*cmd->env, '='))
+					putenv((char*)*cmd->env);
+				else
+					unsetenv(*cmd->env);
+			}
+		}
+		if (cmd->preexec_cb)
+			cmd->preexec_cb();
+		if (cmd->perf_cmd) {
+			execv_perf_cmd(cmd->argv);
+		} else {
+			execvp(cmd->argv[0], (char *const*) cmd->argv);
+		}
+		exit(127);
+	}
+#else
+	int s0 = -1, s1 = -1, s2 = -1;	/* backups of stdin, stdout, stderr */
+	const char **sargv = cmd->argv;
+	char **env = environ;
+
+	if (cmd->no_stdin) {
+		s0 = dup(0);
+		dup_devnull(0);
+	} else if (need_in) {
+		s0 = dup(0);
+		dup2(fdin[0], 0);
+	} else if (cmd->in) {
+		s0 = dup(0);
+		dup2(cmd->in, 0);
+	}
+
+	if (cmd->no_stderr) {
+		s2 = dup(2);
+		dup_devnull(2);
+	} else if (need_err) {
+		s2 = dup(2);
+		dup2(fderr[1], 2);
+	}
+
+	if (cmd->no_stdout) {
+		s1 = dup(1);
+		dup_devnull(1);
+	} else if (cmd->stdout_to_stderr) {
+		s1 = dup(1);
+		dup2(2, 1);
+	} else if (need_out) {
+		s1 = dup(1);
+		dup2(fdout[1], 1);
+	} else if (cmd->out > 1) {
+		s1 = dup(1);
+		dup2(cmd->out, 1);
+	}
+
+	if (cmd->dir)
+		die("chdir in start_command() not implemented");
+	if (cmd->env) {
+		env = copy_environ();
+		for (; *cmd->env; cmd->env++)
+			env = env_setenv(env, *cmd->env);
+	}
+
+	if (cmd->perf_cmd) {
+		cmd->argv = prepare_perf_cmd(cmd->argv);
+	}
+
+	cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env);
+
+	if (cmd->env)
+		free_environ(env);
+	if (cmd->perf_cmd)
+		free(cmd->argv);
+
+	cmd->argv = sargv;
+	if (s0 >= 0)
+		dup2(s0, 0), close(s0);
+	if (s1 >= 0)
+		dup2(s1, 1), close(s1);
+	if (s2 >= 0)
+		dup2(s2, 2), close(s2);
+#endif
+
+	if (cmd->pid < 0) {
+		int err = errno;
+		if (need_in)
+			close_pair(fdin);
+		else if (cmd->in)
+			close(cmd->in);
+		if (need_out)
+			close_pair(fdout);
+		else if (cmd->out)
+			close(cmd->out);
+		if (need_err)
+			close_pair(fderr);
+		return err == ENOENT ?
+			-ERR_RUN_COMMAND_EXEC :
+			-ERR_RUN_COMMAND_FORK;
+	}
+
+	if (need_in)
+		close(fdin[0]);
+	else if (cmd->in)
+		close(cmd->in);
+
+	if (need_out)
+		close(fdout[1]);
+	else if (cmd->out)
+		close(cmd->out);
+
+	if (need_err)
+		close(fderr[1]);
+
+	return 0;
+}
+
+static int wait_or_whine(pid_t pid)
+{
+	for (;;) {
+		int status, code;
+		pid_t waiting = waitpid(pid, &status, 0);
+
+		if (waiting < 0) {
+			if (errno == EINTR)
+				continue;
+			error("waitpid failed (%s)", strerror(errno));
+			return -ERR_RUN_COMMAND_WAITPID;
+		}
+		if (waiting != pid)
+			return -ERR_RUN_COMMAND_WAITPID_WRONG_PID;
+		if (WIFSIGNALED(status))
+			return -ERR_RUN_COMMAND_WAITPID_SIGNAL;
+
+		if (!WIFEXITED(status))
+			return -ERR_RUN_COMMAND_WAITPID_NOEXIT;
+		code = WEXITSTATUS(status);
+		switch (code) {
+		case 127:
+			return -ERR_RUN_COMMAND_EXEC;
+		case 0:
+			return 0;
+		default:
+			return -code;
+		}
+	}
+}
+
+int finish_command(struct child_process *cmd)
+{
+	return wait_or_whine(cmd->pid);
+}
+
+int run_command(struct child_process *cmd)
+{
+	int code = start_command(cmd);
+	if (code)
+		return code;
+	return finish_command(cmd);
+}
+
+static void prepare_run_command_v_opt(struct child_process *cmd,
+				      const char **argv,
+				      int opt)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->argv = argv;
+	cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0;
+	cmd->perf_cmd = opt & RUN_PERF_CMD ? 1 : 0;
+	cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0;
+}
+
+int run_command_v_opt(const char **argv, int opt)
+{
+	struct child_process cmd;
+	prepare_run_command_v_opt(&cmd, argv, opt);
+	return run_command(&cmd);
+}
+
+int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env)
+{
+	struct child_process cmd;
+	prepare_run_command_v_opt(&cmd, argv, opt);
+	cmd.dir = dir;
+	cmd.env = env;
+	return run_command(&cmd);
+}
+
+#ifdef __MINGW32__
+static __stdcall unsigned run_thread(void *data)
+{
+	struct async *async = data;
+	return async->proc(async->fd_for_proc, async->data);
+}
+#endif
+
+int start_async(struct async *async)
+{
+	int pipe_out[2];
+
+	if (pipe(pipe_out) < 0)
+		return error("cannot create pipe: %s", strerror(errno));
+	async->out = pipe_out[0];
+
+#ifndef __MINGW32__
+	/* Flush stdio before fork() to avoid cloning buffers */
+	fflush(NULL);
+
+	async->pid = fork();
+	if (async->pid < 0) {
+		error("fork (async) failed: %s", strerror(errno));
+		close_pair(pipe_out);
+		return -1;
+	}
+	if (!async->pid) {
+		close(pipe_out[0]);
+		exit(!!async->proc(pipe_out[1], async->data));
+	}
+	close(pipe_out[1]);
+#else
+	async->fd_for_proc = pipe_out[1];
+	async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL);
+	if (!async->tid) {
+		error("cannot create thread: %s", strerror(errno));
+		close_pair(pipe_out);
+		return -1;
+	}
+#endif
+	return 0;
+}
+
+int finish_async(struct async *async)
+{
+#ifndef __MINGW32__
+	int ret = 0;
+
+	if (wait_or_whine(async->pid))
+		ret = error("waitpid (async) failed");
+#else
+	DWORD ret = 0;
+	if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0)
+		ret = error("waiting for thread failed: %lu", GetLastError());
+	else if (!GetExitCodeThread(async->tid, &ret))
+		ret = error("cannot get thread exit code: %lu", GetLastError());
+	CloseHandle(async->tid);
+#endif
+	return ret;
+}
+
+int run_hook(const char *index_file, const char *name, ...)
+{
+	struct child_process hook;
+	const char **argv = NULL, *env[2];
+	char index[PATH_MAX];
+	va_list args;
+	int ret;
+	size_t i = 0, alloc = 0;
+
+	if (access(perf_path("hooks/%s", name), X_OK) < 0)
+		return 0;
+
+	va_start(args, name);
+	ALLOC_GROW(argv, i + 1, alloc);
+	argv[i++] = perf_path("hooks/%s", name);
+	while (argv[i-1]) {
+		ALLOC_GROW(argv, i + 1, alloc);
+		argv[i++] = va_arg(args, const char *);
+	}
+	va_end(args);
+
+	memset(&hook, 0, sizeof(hook));
+	hook.argv = argv;
+	hook.no_stdin = 1;
+	hook.stdout_to_stderr = 1;
+	if (index_file) {
+		snprintf(index, sizeof(index), "PERF_INDEX_FILE=%s", index_file);
+		env[0] = index;
+		env[1] = NULL;
+		hook.env = env;
+	}
+
+	ret = start_command(&hook);
+	free(argv);
+	if (ret) {
+		warning("Could not spawn %s", argv[0]);
+		return ret;
+	}
+	ret = finish_command(&hook);
+	if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL)
+		warning("%s exited due to uncaught signal", argv[0]);
+
+	return ret;
+}
diff --git a/tools/perf/util/run-command.h b/tools/perf/util/run-command.h
new file mode 100644
index 0000000..328289f
--- /dev/null
+++ b/tools/perf/util/run-command.h
@@ -0,0 +1,93 @@
+#ifndef RUN_COMMAND_H
+#define RUN_COMMAND_H
+
+enum {
+	ERR_RUN_COMMAND_FORK = 10000,
+	ERR_RUN_COMMAND_EXEC,
+	ERR_RUN_COMMAND_PIPE,
+	ERR_RUN_COMMAND_WAITPID,
+	ERR_RUN_COMMAND_WAITPID_WRONG_PID,
+	ERR_RUN_COMMAND_WAITPID_SIGNAL,
+	ERR_RUN_COMMAND_WAITPID_NOEXIT,
+};
+#define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK)
+
+struct child_process {
+	const char **argv;
+	pid_t pid;
+	/*
+	 * Using .in, .out, .err:
+	 * - Specify 0 for no redirections (child inherits stdin, stdout,
+	 *   stderr from parent).
+	 * - Specify -1 to have a pipe allocated as follows:
+	 *     .in: returns the writable pipe end; parent writes to it,
+	 *          the readable pipe end becomes child's stdin
+	 *     .out, .err: returns the readable pipe end; parent reads from
+	 *          it, the writable pipe end becomes child's stdout/stderr
+	 *   The caller of start_command() must close the returned FDs
+	 *   after it has completed reading from/writing to it!
+	 * - Specify > 0 to set a channel to a particular FD as follows:
+	 *     .in: a readable FD, becomes child's stdin
+	 *     .out: a writable FD, becomes child's stdout/stderr
+	 *     .err > 0 not supported
+	 *   The specified FD is closed by start_command(), even in case
+	 *   of errors!
+	 */
+	int in;
+	int out;
+	int err;
+	const char *dir;
+	const char *const *env;
+	unsigned no_stdin:1;
+	unsigned no_stdout:1;
+	unsigned no_stderr:1;
+	unsigned perf_cmd:1; /* if this is to be perf sub-command */
+	unsigned stdout_to_stderr:1;
+	void (*preexec_cb)(void);
+};
+
+int start_command(struct child_process *);
+int finish_command(struct child_process *);
+int run_command(struct child_process *);
+
+extern int run_hook(const char *index_file, const char *name, ...);
+
+#define RUN_COMMAND_NO_STDIN 1
+#define RUN_PERF_CMD	     2	/*If this is to be perf sub-command */
+#define RUN_COMMAND_STDOUT_TO_STDERR 4
+int run_command_v_opt(const char **argv, int opt);
+
+/*
+ * env (the environment) is to be formatted like environ: "VAR=VALUE".
+ * To unset an environment variable use just "VAR".
+ */
+int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env);
+
+/*
+ * The purpose of the following functions is to feed a pipe by running
+ * a function asynchronously and providing output that the caller reads.
+ *
+ * It is expected that no synchronization and mutual exclusion between
+ * the caller and the feed function is necessary so that the function
+ * can run in a thread without interfering with the caller.
+ */
+struct async {
+	/*
+	 * proc writes to fd and closes it;
+	 * returns 0 on success, non-zero on failure
+	 */
+	int (*proc)(int fd, void *data);
+	void *data;
+	int out;	/* caller reads from here and closes it */
+#ifndef __MINGW32__
+	pid_t pid;
+#else
+	HANDLE tid;
+	int fd_for_proc;
+#endif
+};
+
+int start_async(struct async *async);
+int finish_async(struct async *async);
+
+#endif
diff --git a/tools/perf/util/sigchain.c b/tools/perf/util/sigchain.c
new file mode 100644
index 0000000..1118b99
--- /dev/null
+++ b/tools/perf/util/sigchain.c
@@ -0,0 +1,52 @@
+#include "sigchain.h"
+#include "cache.h"
+
+#define SIGCHAIN_MAX_SIGNALS 32
+
+struct sigchain_signal {
+	sigchain_fun *old;
+	int n;
+	int alloc;
+};
+static struct sigchain_signal signals[SIGCHAIN_MAX_SIGNALS];
+
+static void check_signum(int sig)
+{
+	if (sig < 1 || sig >= SIGCHAIN_MAX_SIGNALS)
+		die("BUG: signal out of range: %d", sig);
+}
+
+int sigchain_push(int sig, sigchain_fun f)
+{
+	struct sigchain_signal *s = signals + sig;
+	check_signum(sig);
+
+	ALLOC_GROW(s->old, s->n + 1, s->alloc);
+	s->old[s->n] = signal(sig, f);
+	if (s->old[s->n] == SIG_ERR)
+		return -1;
+	s->n++;
+	return 0;
+}
+
+int sigchain_pop(int sig)
+{
+	struct sigchain_signal *s = signals + sig;
+	check_signum(sig);
+	if (s->n < 1)
+		return 0;
+
+	if (signal(sig, s->old[s->n - 1]) == SIG_ERR)
+		return -1;
+	s->n--;
+	return 0;
+}
+
+void sigchain_push_common(sigchain_fun f)
+{
+	sigchain_push(SIGINT, f);
+	sigchain_push(SIGHUP, f);
+	sigchain_push(SIGTERM, f);
+	sigchain_push(SIGQUIT, f);
+	sigchain_push(SIGPIPE, f);
+}
diff --git a/tools/perf/util/sigchain.h b/tools/perf/util/sigchain.h
new file mode 100644
index 0000000..618083bc
--- /dev/null
+++ b/tools/perf/util/sigchain.h
@@ -0,0 +1,11 @@
+#ifndef SIGCHAIN_H
+#define SIGCHAIN_H
+
+typedef void (*sigchain_fun)(int);
+
+int sigchain_push(int sig, sigchain_fun f);
+int sigchain_pop(int sig);
+
+void sigchain_push_common(sigchain_fun f);
+
+#endif /* SIGCHAIN_H */
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
new file mode 100644
index 0000000..eaba093
--- /dev/null
+++ b/tools/perf/util/strbuf.c
@@ -0,0 +1,359 @@
+#include "cache.h"
+
+int prefixcmp(const char *str, const char *prefix)
+{
+	for (; ; str++, prefix++)
+		if (!*prefix)
+			return 0;
+		else if (*str != *prefix)
+			return (unsigned char)*prefix - (unsigned char)*str;
+}
+
+/*
+ * Used as the default ->buf value, so that people can always assume
+ * buf is non NULL and ->buf is NUL terminated even for a freshly
+ * initialized strbuf.
+ */
+char strbuf_slopbuf[1];
+
+void strbuf_init(struct strbuf *sb, size_t hint)
+{
+	sb->alloc = sb->len = 0;
+	sb->buf = strbuf_slopbuf;
+	if (hint)
+		strbuf_grow(sb, hint);
+}
+
+void strbuf_release(struct strbuf *sb)
+{
+	if (sb->alloc) {
+		free(sb->buf);
+		strbuf_init(sb, 0);
+	}
+}
+
+char *strbuf_detach(struct strbuf *sb, size_t *sz)
+{
+	char *res = sb->alloc ? sb->buf : NULL;
+	if (sz)
+		*sz = sb->len;
+	strbuf_init(sb, 0);
+	return res;
+}
+
+void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc)
+{
+	strbuf_release(sb);
+	sb->buf   = buf;
+	sb->len   = len;
+	sb->alloc = alloc;
+	strbuf_grow(sb, 0);
+	sb->buf[sb->len] = '\0';
+}
+
+void strbuf_grow(struct strbuf *sb, size_t extra)
+{
+	if (sb->len + extra + 1 <= sb->len)
+		die("you want to use way too much memory");
+	if (!sb->alloc)
+		sb->buf = NULL;
+	ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
+}
+
+void strbuf_trim(struct strbuf *sb)
+{
+	char *b = sb->buf;
+	while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
+		sb->len--;
+	while (sb->len > 0 && isspace(*b)) {
+		b++;
+		sb->len--;
+	}
+	memmove(sb->buf, b, sb->len);
+	sb->buf[sb->len] = '\0';
+}
+void strbuf_rtrim(struct strbuf *sb)
+{
+	while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
+		sb->len--;
+	sb->buf[sb->len] = '\0';
+}
+
+void strbuf_ltrim(struct strbuf *sb)
+{
+	char *b = sb->buf;
+	while (sb->len > 0 && isspace(*b)) {
+		b++;
+		sb->len--;
+	}
+	memmove(sb->buf, b, sb->len);
+	sb->buf[sb->len] = '\0';
+}
+
+void strbuf_tolower(struct strbuf *sb)
+{
+	int i;
+	for (i = 0; i < sb->len; i++)
+		sb->buf[i] = tolower(sb->buf[i]);
+}
+
+struct strbuf **strbuf_split(const struct strbuf *sb, int delim)
+{
+	int alloc = 2, pos = 0;
+	char *n, *p;
+	struct strbuf **ret;
+	struct strbuf *t;
+
+	ret = calloc(alloc, sizeof(struct strbuf *));
+	p = n = sb->buf;
+	while (n < sb->buf + sb->len) {
+		int len;
+		n = memchr(n, delim, sb->len - (n - sb->buf));
+		if (pos + 1 >= alloc) {
+			alloc = alloc * 2;
+			ret = realloc(ret, sizeof(struct strbuf *) * alloc);
+		}
+		if (!n)
+			n = sb->buf + sb->len - 1;
+		len = n - p + 1;
+		t = malloc(sizeof(struct strbuf));
+		strbuf_init(t, len);
+		strbuf_add(t, p, len);
+		ret[pos] = t;
+		ret[++pos] = NULL;
+		p = ++n;
+	}
+	return ret;
+}
+
+void strbuf_list_free(struct strbuf **sbs)
+{
+	struct strbuf **s = sbs;
+
+	while (*s) {
+		strbuf_release(*s);
+		free(*s++);
+	}
+	free(sbs);
+}
+
+int strbuf_cmp(const struct strbuf *a, const struct strbuf *b)
+{
+	int len = a->len < b->len ? a->len: b->len;
+	int cmp = memcmp(a->buf, b->buf, len);
+	if (cmp)
+		return cmp;
+	return a->len < b->len ? -1: a->len != b->len;
+}
+
+void strbuf_splice(struct strbuf *sb, size_t pos, size_t len,
+				   const void *data, size_t dlen)
+{
+	if (pos + len < pos)
+		die("you want to use way too much memory");
+	if (pos > sb->len)
+		die("`pos' is too far after the end of the buffer");
+	if (pos + len > sb->len)
+		die("`pos + len' is too far after the end of the buffer");
+
+	if (dlen >= len)
+		strbuf_grow(sb, dlen - len);
+	memmove(sb->buf + pos + dlen,
+			sb->buf + pos + len,
+			sb->len - pos - len);
+	memcpy(sb->buf + pos, data, dlen);
+	strbuf_setlen(sb, sb->len + dlen - len);
+}
+
+void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len)
+{
+	strbuf_splice(sb, pos, 0, data, len);
+}
+
+void strbuf_remove(struct strbuf *sb, size_t pos, size_t len)
+{
+	strbuf_splice(sb, pos, len, NULL, 0);
+}
+
+void strbuf_add(struct strbuf *sb, const void *data, size_t len)
+{
+	strbuf_grow(sb, len);
+	memcpy(sb->buf + sb->len, data, len);
+	strbuf_setlen(sb, sb->len + len);
+}
+
+void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len)
+{
+	strbuf_grow(sb, len);
+	memcpy(sb->buf + sb->len, sb->buf + pos, len);
+	strbuf_setlen(sb, sb->len + len);
+}
+
+void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
+{
+	int len;
+	va_list ap;
+
+	if (!strbuf_avail(sb))
+		strbuf_grow(sb, 64);
+	va_start(ap, fmt);
+	len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
+	va_end(ap);
+	if (len < 0)
+		die("your vsnprintf is broken");
+	if (len > strbuf_avail(sb)) {
+		strbuf_grow(sb, len);
+		va_start(ap, fmt);
+		len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
+		va_end(ap);
+		if (len > strbuf_avail(sb)) {
+			die("this should not happen, your snprintf is broken");
+		}
+	}
+	strbuf_setlen(sb, sb->len + len);
+}
+
+void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn,
+		   void *context)
+{
+	for (;;) {
+		const char *percent;
+		size_t consumed;
+
+		percent = strchrnul(format, '%');
+		strbuf_add(sb, format, percent - format);
+		if (!*percent)
+			break;
+		format = percent + 1;
+
+		consumed = fn(sb, format, context);
+		if (consumed)
+			format += consumed;
+		else
+			strbuf_addch(sb, '%');
+	}
+}
+
+size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder,
+		void *context)
+{
+	struct strbuf_expand_dict_entry *e = context;
+	size_t len;
+
+	for (; e->placeholder && (len = strlen(e->placeholder)); e++) {
+		if (!strncmp(placeholder, e->placeholder, len)) {
+			if (e->value)
+				strbuf_addstr(sb, e->value);
+			return len;
+		}
+	}
+	return 0;
+}
+
+size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f)
+{
+	size_t res;
+	size_t oldalloc = sb->alloc;
+
+	strbuf_grow(sb, size);
+	res = fread(sb->buf + sb->len, 1, size, f);
+	if (res > 0)
+		strbuf_setlen(sb, sb->len + res);
+	else if (res < 0 && oldalloc == 0)
+		strbuf_release(sb);
+	return res;
+}
+
+ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint)
+{
+	size_t oldlen = sb->len;
+	size_t oldalloc = sb->alloc;
+
+	strbuf_grow(sb, hint ? hint : 8192);
+	for (;;) {
+		ssize_t cnt;
+
+		cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1);
+		if (cnt < 0) {
+			if (oldalloc == 0)
+				strbuf_release(sb);
+			else
+				strbuf_setlen(sb, oldlen);
+			return -1;
+		}
+		if (!cnt)
+			break;
+		sb->len += cnt;
+		strbuf_grow(sb, 8192);
+	}
+
+	sb->buf[sb->len] = '\0';
+	return sb->len - oldlen;
+}
+
+#define STRBUF_MAXLINK (2*PATH_MAX)
+
+int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint)
+{
+	size_t oldalloc = sb->alloc;
+
+	if (hint < 32)
+		hint = 32;
+
+	while (hint < STRBUF_MAXLINK) {
+		int len;
+
+		strbuf_grow(sb, hint);
+		len = readlink(path, sb->buf, hint);
+		if (len < 0) {
+			if (errno != ERANGE)
+				break;
+		} else if (len < hint) {
+			strbuf_setlen(sb, len);
+			return 0;
+		}
+
+		/* .. the buffer was too small - try again */
+		hint *= 2;
+	}
+	if (oldalloc == 0)
+		strbuf_release(sb);
+	return -1;
+}
+
+int strbuf_getline(struct strbuf *sb, FILE *fp, int term)
+{
+	int ch;
+
+	strbuf_grow(sb, 0);
+	if (feof(fp))
+		return EOF;
+
+	strbuf_reset(sb);
+	while ((ch = fgetc(fp)) != EOF) {
+		if (ch == term)
+			break;
+		strbuf_grow(sb, 1);
+		sb->buf[sb->len++] = ch;
+	}
+	if (ch == EOF && sb->len == 0)
+		return EOF;
+
+	sb->buf[sb->len] = '\0';
+	return 0;
+}
+
+int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint)
+{
+	int fd, len;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return -1;
+	len = strbuf_read(sb, fd, hint);
+	close(fd);
+	if (len < 0)
+		return -1;
+
+	return len;
+}
diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
new file mode 100644
index 0000000..9ee908a
--- /dev/null
+++ b/tools/perf/util/strbuf.h
@@ -0,0 +1,137 @@
+#ifndef STRBUF_H
+#define STRBUF_H
+
+/*
+ * Strbuf's can be use in many ways: as a byte array, or to store arbitrary
+ * long, overflow safe strings.
+ *
+ * Strbufs has some invariants that are very important to keep in mind:
+ *
+ * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to
+ *    build complex strings/buffers whose final size isn't easily known.
+ *
+ *    It is NOT legal to copy the ->buf pointer away.
+ *    `strbuf_detach' is the operation that detachs a buffer from its shell
+ *    while keeping the shell valid wrt its invariants.
+ *
+ * 2. the ->buf member is a byte array that has at least ->len + 1 bytes
+ *    allocated. The extra byte is used to store a '\0', allowing the ->buf
+ *    member to be a valid C-string. Every strbuf function ensure this
+ *    invariant is preserved.
+ *
+ *    Note that it is OK to "play" with the buffer directly if you work it
+ *    that way:
+ *
+ *    strbuf_grow(sb, SOME_SIZE);
+ *       ... Here, the memory array starting at sb->buf, and of length
+ *       ... strbuf_avail(sb) is all yours, and you are sure that
+ *       ... strbuf_avail(sb) is at least SOME_SIZE.
+ *    strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE);
+ *
+ *    Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb).
+ *
+ *    Doing so is safe, though if it has to be done in many places, adding the
+ *    missing API to the strbuf module is the way to go.
+ *
+ *    XXX: do _not_ assume that the area that is yours is of size ->alloc - 1
+ *         even if it's true in the current implementation. Alloc is somehow a
+ *         "private" member that should not be messed with.
+ */
+
+#include <assert.h>
+
+extern char strbuf_slopbuf[];
+struct strbuf {
+	size_t alloc;
+	size_t len;
+	char *buf;
+};
+
+#define STRBUF_INIT  { 0, 0, strbuf_slopbuf }
+
+/*----- strbuf life cycle -----*/
+extern void strbuf_init(struct strbuf *, size_t);
+extern void strbuf_release(struct strbuf *);
+extern char *strbuf_detach(struct strbuf *, size_t *);
+extern void strbuf_attach(struct strbuf *, void *, size_t, size_t);
+static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) {
+	struct strbuf tmp = *a;
+	*a = *b;
+	*b = tmp;
+}
+
+/*----- strbuf size related -----*/
+static inline size_t strbuf_avail(const struct strbuf *sb) {
+	return sb->alloc ? sb->alloc - sb->len - 1 : 0;
+}
+
+extern void strbuf_grow(struct strbuf *, size_t);
+
+static inline void strbuf_setlen(struct strbuf *sb, size_t len) {
+	if (!sb->alloc)
+		strbuf_grow(sb, 0);
+	assert(len < sb->alloc);
+	sb->len = len;
+	sb->buf[len] = '\0';
+}
+#define strbuf_reset(sb)  strbuf_setlen(sb, 0)
+
+/*----- content related -----*/
+extern void strbuf_trim(struct strbuf *);
+extern void strbuf_rtrim(struct strbuf *);
+extern void strbuf_ltrim(struct strbuf *);
+extern int strbuf_cmp(const struct strbuf *, const struct strbuf *);
+extern void strbuf_tolower(struct strbuf *);
+
+extern struct strbuf **strbuf_split(const struct strbuf *, int delim);
+extern void strbuf_list_free(struct strbuf **);
+
+/*----- add data in your buffer -----*/
+static inline void strbuf_addch(struct strbuf *sb, int c) {
+	strbuf_grow(sb, 1);
+	sb->buf[sb->len++] = c;
+	sb->buf[sb->len] = '\0';
+}
+
+extern void strbuf_insert(struct strbuf *, size_t pos, const void *, size_t);
+extern void strbuf_remove(struct strbuf *, size_t pos, size_t len);
+
+/* splice pos..pos+len with given data */
+extern void strbuf_splice(struct strbuf *, size_t pos, size_t len,
+                          const void *, size_t);
+
+extern void strbuf_add(struct strbuf *, const void *, size_t);
+static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
+	strbuf_add(sb, s, strlen(s));
+}
+static inline void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2) {
+	strbuf_add(sb, sb2->buf, sb2->len);
+}
+extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len);
+
+typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
+extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context);
+struct strbuf_expand_dict_entry {
+	const char *placeholder;
+	const char *value;
+};
+extern size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, void *context);
+
+__attribute__((format(printf,2,3)))
+extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
+
+extern size_t strbuf_fread(struct strbuf *, size_t, FILE *);
+/* XXX: if read fails, any partial read is undone */
+extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint);
+extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint);
+extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint);
+
+extern int strbuf_getline(struct strbuf *, FILE *, int);
+
+extern void stripspace(struct strbuf *buf, int skip_comments);
+extern int launch_editor(const char *path, struct strbuf *buffer, const char *const *env);
+
+extern int strbuf_branchname(struct strbuf *sb, const char *name);
+extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name);
+
+#endif /* STRBUF_H */
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
new file mode 100644
index 0000000..ec33c0c
--- /dev/null
+++ b/tools/perf/util/string.c
@@ -0,0 +1,34 @@
+#include "string.h"
+
+static int hex(char ch)
+{
+	if ((ch >= '0') && (ch <= '9'))
+		return ch - '0';
+	if ((ch >= 'a') && (ch <= 'f'))
+		return ch - 'a' + 10;
+	if ((ch >= 'A') && (ch <= 'F'))
+		return ch - 'A' + 10;
+	return -1;
+}
+
+/*
+ * While we find nice hex chars, build a long_val.
+ * Return number of chars processed.
+ */
+int hex2u64(const char *ptr, __u64 *long_val)
+{
+	const char *p = ptr;
+	*long_val = 0;
+
+	while (*p) {
+		const int hex_val = hex(*p);
+
+		if (hex_val < 0)
+			break;
+
+		*long_val = (*long_val << 4) | hex_val;
+		p++;
+	}
+
+	return p - ptr;
+}
diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h
new file mode 100644
index 0000000..72812c1
--- /dev/null
+++ b/tools/perf/util/string.h
@@ -0,0 +1,8 @@
+#ifndef _PERF_STRING_H_
+#define _PERF_STRING_H_
+
+#include <linux/types.h>
+
+int hex2u64(const char *ptr, __u64 *val);
+
+#endif
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
new file mode 100644
index 0000000..49a55f8
--- /dev/null
+++ b/tools/perf/util/symbol.c
@@ -0,0 +1,641 @@
+#include "util.h"
+#include "../perf.h"
+#include "string.h"
+#include "symbol.h"
+
+#include <libelf.h>
+#include <gelf.h>
+#include <elf.h>
+
+const char *sym_hist_filter;
+
+static struct symbol *symbol__new(__u64 start, __u64 len,
+				  const char *name, unsigned int priv_size,
+				  __u64 obj_start, int verbose)
+{
+	size_t namelen = strlen(name) + 1;
+	struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen);
+
+	if (!self)
+		return NULL;
+
+	if (verbose >= 2)
+		printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n",
+			(__u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start);
+
+	self->obj_start= obj_start;
+	self->hist = NULL;
+	self->hist_sum = 0;
+
+	if (sym_hist_filter && !strcmp(name, sym_hist_filter))
+		self->hist = calloc(sizeof(__u64), len);
+
+	if (priv_size) {
+		memset(self, 0, priv_size);
+		self = ((void *)self) + priv_size;
+	}
+	self->start = start;
+	self->end   = start + len - 1;
+	memcpy(self->name, name, namelen);
+
+	return self;
+}
+
+static void symbol__delete(struct symbol *self, unsigned int priv_size)
+{
+	free(((void *)self) - priv_size);
+}
+
+static size_t symbol__fprintf(struct symbol *self, FILE *fp)
+{
+	return fprintf(fp, " %llx-%llx %s\n",
+		       self->start, self->end, self->name);
+}
+
+struct dso *dso__new(const char *name, unsigned int sym_priv_size)
+{
+	struct dso *self = malloc(sizeof(*self) + strlen(name) + 1);
+
+	if (self != NULL) {
+		strcpy(self->name, name);
+		self->syms = RB_ROOT;
+		self->sym_priv_size = sym_priv_size;
+		self->find_symbol = dso__find_symbol;
+	}
+
+	return self;
+}
+
+static void dso__delete_symbols(struct dso *self)
+{
+	struct symbol *pos;
+	struct rb_node *next = rb_first(&self->syms);
+
+	while (next) {
+		pos = rb_entry(next, struct symbol, rb_node);
+		next = rb_next(&pos->rb_node);
+		rb_erase(&pos->rb_node, &self->syms);
+		symbol__delete(pos, self->sym_priv_size);
+	}
+}
+
+void dso__delete(struct dso *self)
+{
+	dso__delete_symbols(self);
+	free(self);
+}
+
+static void dso__insert_symbol(struct dso *self, struct symbol *sym)
+{
+	struct rb_node **p = &self->syms.rb_node;
+	struct rb_node *parent = NULL;
+	const __u64 ip = sym->start;
+	struct symbol *s;
+
+	while (*p != NULL) {
+		parent = *p;
+		s = rb_entry(parent, struct symbol, rb_node);
+		if (ip < s->start)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&sym->rb_node, parent, p);
+	rb_insert_color(&sym->rb_node, &self->syms);
+}
+
+struct symbol *dso__find_symbol(struct dso *self, __u64 ip)
+{
+	struct rb_node *n;
+
+	if (self == NULL)
+		return NULL;
+
+	n = self->syms.rb_node;
+
+	while (n) {
+		struct symbol *s = rb_entry(n, struct symbol, rb_node);
+
+		if (ip < s->start)
+			n = n->rb_left;
+		else if (ip > s->end)
+			n = n->rb_right;
+		else
+			return s;
+	}
+
+	return NULL;
+}
+
+size_t dso__fprintf(struct dso *self, FILE *fp)
+{
+	size_t ret = fprintf(fp, "dso: %s\n", self->name);
+
+	struct rb_node *nd;
+	for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) {
+		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
+		ret += symbol__fprintf(pos, fp);
+	}
+
+	return ret;
+}
+
+static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verbose)
+{
+	struct rb_node *nd, *prevnd;
+	char *line = NULL;
+	size_t n;
+	FILE *file = fopen("/proc/kallsyms", "r");
+
+	if (file == NULL)
+		goto out_failure;
+
+	while (!feof(file)) {
+		__u64 start;
+		struct symbol *sym;
+		int line_len, len;
+		char symbol_type;
+
+		line_len = getline(&line, &n, file);
+		if (line_len < 0)
+			break;
+
+		if (!line)
+			goto out_failure;
+
+		line[--line_len] = '\0'; /* \n */
+
+		len = hex2u64(line, &start);
+
+		len++;
+		if (len + 2 >= line_len)
+			continue;
+
+		symbol_type = toupper(line[len]);
+		/*
+		 * We're interested only in code ('T'ext)
+		 */
+		if (symbol_type != 'T' && symbol_type != 'W')
+			continue;
+		/*
+		 * Well fix up the end later, when we have all sorted.
+		 */
+		sym = symbol__new(start, 0xdead, line + len + 2,
+				  self->sym_priv_size, 0, verbose);
+
+		if (sym == NULL)
+			goto out_delete_line;
+
+		if (filter && filter(self, sym))
+			symbol__delete(sym, self->sym_priv_size);
+		else
+			dso__insert_symbol(self, sym);
+	}
+
+	/*
+	 * Now that we have all sorted out, just set the ->end of all
+	 * symbols
+	 */
+	prevnd = rb_first(&self->syms);
+
+	if (prevnd == NULL)
+		goto out_delete_line;
+
+	for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
+		struct symbol *prev = rb_entry(prevnd, struct symbol, rb_node),
+			      *curr = rb_entry(nd, struct symbol, rb_node);
+
+		prev->end = curr->start - 1;
+		prevnd = nd;
+	}
+
+	free(line);
+	fclose(file);
+
+	return 0;
+
+out_delete_line:
+	free(line);
+out_failure:
+	return -1;
+}
+
+static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int verbose)
+{
+	char *line = NULL;
+	size_t n;
+	FILE *file;
+	int nr_syms = 0;
+
+	file = fopen(self->name, "r");
+	if (file == NULL)
+		goto out_failure;
+
+	while (!feof(file)) {
+		__u64 start, size;
+		struct symbol *sym;
+		int line_len, len;
+
+		line_len = getline(&line, &n, file);
+		if (line_len < 0)
+			break;
+
+		if (!line)
+			goto out_failure;
+
+		line[--line_len] = '\0'; /* \n */
+
+		len = hex2u64(line, &start);
+
+		len++;
+		if (len + 2 >= line_len)
+			continue;
+
+		len += hex2u64(line + len, &size);
+
+		len++;
+		if (len + 2 >= line_len)
+			continue;
+
+		sym = symbol__new(start, size, line + len,
+				  self->sym_priv_size, start, verbose);
+
+		if (sym == NULL)
+			goto out_delete_line;
+
+		if (filter && filter(self, sym))
+			symbol__delete(sym, self->sym_priv_size);
+		else {
+			dso__insert_symbol(self, sym);
+			nr_syms++;
+		}
+	}
+
+	free(line);
+	fclose(file);
+
+	return nr_syms;
+
+out_delete_line:
+	free(line);
+out_failure:
+	return -1;
+}
+
+/**
+ * elf_symtab__for_each_symbol - iterate thru all the symbols
+ *
+ * @self: struct elf_symtab instance to iterate
+ * @index: uint32_t index
+ * @sym: GElf_Sym iterator
+ */
+#define elf_symtab__for_each_symbol(syms, nr_syms, index, sym) \
+	for (index = 0, gelf_getsym(syms, index, &sym);\
+	     index < nr_syms; \
+	     index++, gelf_getsym(syms, index, &sym))
+
+static inline uint8_t elf_sym__type(const GElf_Sym *sym)
+{
+	return GELF_ST_TYPE(sym->st_info);
+}
+
+static inline int elf_sym__is_function(const GElf_Sym *sym)
+{
+	return elf_sym__type(sym) == STT_FUNC &&
+	       sym->st_name != 0 &&
+	       sym->st_shndx != SHN_UNDEF &&
+	       sym->st_size != 0;
+}
+
+static inline const char *elf_sym__name(const GElf_Sym *sym,
+					const Elf_Data *symstrs)
+{
+	return symstrs->d_buf + sym->st_name;
+}
+
+static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+				    GElf_Shdr *shp, const char *name,
+				    size_t *index)
+{
+	Elf_Scn *sec = NULL;
+	size_t cnt = 1;
+
+	while ((sec = elf_nextscn(elf, sec)) != NULL) {
+		char *str;
+
+		gelf_getshdr(sec, shp);
+		str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name);
+		if (!strcmp(name, str)) {
+			if (index)
+				*index = cnt;
+			break;
+		}
+		++cnt;
+	}
+
+	return sec;
+}
+
+#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \
+	for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \
+	     idx < nr_entries; \
+	     ++idx, pos = gelf_getrel(reldata, idx, &pos_mem))
+
+#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \
+	for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \
+	     idx < nr_entries; \
+	     ++idx, pos = gelf_getrela(reldata, idx, &pos_mem))
+
+static int dso__synthesize_plt_symbols(struct  dso *self, Elf *elf,
+				       GElf_Ehdr *ehdr, Elf_Scn *scn_dynsym,
+				       GElf_Shdr *shdr_dynsym,
+				       size_t dynsym_idx, int verbose)
+{
+	uint32_t nr_rel_entries, idx;
+	GElf_Sym sym;
+	__u64 plt_offset;
+	GElf_Shdr shdr_plt;
+	struct symbol *f;
+	GElf_Shdr shdr_rel_plt;
+	Elf_Data *reldata, *syms, *symstrs;
+	Elf_Scn *scn_plt_rel, *scn_symstrs;
+	char sympltname[1024];
+	int nr = 0, symidx;
+
+	scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt,
+					  ".rela.plt", NULL);
+	if (scn_plt_rel == NULL) {
+		scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt,
+						  ".rel.plt", NULL);
+		if (scn_plt_rel == NULL)
+			return 0;
+	}
+
+	if (shdr_rel_plt.sh_link != dynsym_idx)
+		return 0;
+
+	if (elf_section_by_name(elf, ehdr, &shdr_plt, ".plt", NULL) == NULL)
+		return 0;
+
+	/*
+	 * Fetch the relocation section to find the indexes to the GOT
+	 * and the symbols in the .dynsym they refer to.
+	 */
+	reldata = elf_getdata(scn_plt_rel, NULL);
+	if (reldata == NULL)
+		return -1;
+
+	syms = elf_getdata(scn_dynsym, NULL);
+	if (syms == NULL)
+		return -1;
+
+	scn_symstrs = elf_getscn(elf, shdr_dynsym->sh_link);
+	if (scn_symstrs == NULL)
+		return -1;
+
+	symstrs = elf_getdata(scn_symstrs, NULL);
+	if (symstrs == NULL)
+		return -1;
+
+	nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize;
+	plt_offset = shdr_plt.sh_offset;
+
+	if (shdr_rel_plt.sh_type == SHT_RELA) {
+		GElf_Rela pos_mem, *pos;
+
+		elf_section__for_each_rela(reldata, pos, pos_mem, idx,
+					   nr_rel_entries) {
+			symidx = GELF_R_SYM(pos->r_info);
+			plt_offset += shdr_plt.sh_entsize;
+			gelf_getsym(syms, symidx, &sym);
+			snprintf(sympltname, sizeof(sympltname),
+				 "%s@plt", elf_sym__name(&sym, symstrs));
+
+			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
+					sympltname, self->sym_priv_size, 0, verbose);
+			if (!f)
+				return -1;
+
+			dso__insert_symbol(self, f);
+			++nr;
+		}
+	} else if (shdr_rel_plt.sh_type == SHT_REL) {
+		GElf_Rel pos_mem, *pos;
+		elf_section__for_each_rel(reldata, pos, pos_mem, idx,
+					  nr_rel_entries) {
+			symidx = GELF_R_SYM(pos->r_info);
+			plt_offset += shdr_plt.sh_entsize;
+			gelf_getsym(syms, symidx, &sym);
+			snprintf(sympltname, sizeof(sympltname),
+				 "%s@plt", elf_sym__name(&sym, symstrs));
+
+			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
+					sympltname, self->sym_priv_size, 0, verbose);
+			if (!f)
+				return -1;
+
+			dso__insert_symbol(self, f);
+			++nr;
+		}
+	} else {
+		/*
+		 * TODO: There are still one more shdr_rel_plt.sh_type
+		 * I have to investigate, but probably should be ignored.
+		 */
+	}
+
+	return nr;
+}
+
+static int dso__load_sym(struct dso *self, int fd, const char *name,
+			 symbol_filter_t filter, int verbose)
+{
+	Elf_Data *symstrs;
+	uint32_t nr_syms;
+	int err = -1;
+	uint32_t index;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	Elf_Data *syms;
+	GElf_Sym sym;
+	Elf_Scn *sec, *sec_dynsym;
+	Elf *elf;
+	size_t dynsym_idx;
+	int nr = 0;
+
+	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+	if (elf == NULL) {
+		if (verbose)
+			fprintf(stderr, "%s: cannot read %s ELF file.\n",
+				__func__, name);
+		goto out_close;
+	}
+
+	if (gelf_getehdr(elf, &ehdr) == NULL) {
+		if (verbose)
+			fprintf(stderr, "%s: cannot get elf header.\n", __func__);
+		goto out_elf_end;
+	}
+
+	/*
+	 * We need to check if we have a .dynsym, so that we can handle the
+	 * .plt, synthesizing its symbols, that aren't on the symtabs (be it
+	 * .dynsym or .symtab)
+	 */
+	sec_dynsym = elf_section_by_name(elf, &ehdr, &shdr,
+					 ".dynsym", &dynsym_idx);
+	if (sec_dynsym != NULL) {
+		nr = dso__synthesize_plt_symbols(self, elf, &ehdr,
+						 sec_dynsym, &shdr,
+						 dynsym_idx, verbose);
+		if (nr < 0)
+			goto out_elf_end;
+	}
+
+	/*
+	 * But if we have a full .symtab (that is a superset of .dynsym) we
+	 * should add the symbols not in the .dynsyn
+	 */
+	sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL);
+	if (sec == NULL) {
+		if (sec_dynsym == NULL)
+			goto out_elf_end;
+
+		sec = sec_dynsym;
+		gelf_getshdr(sec, &shdr);
+	}
+
+	syms = elf_getdata(sec, NULL);
+	if (syms == NULL)
+		goto out_elf_end;
+
+	sec = elf_getscn(elf, shdr.sh_link);
+	if (sec == NULL)
+		goto out_elf_end;
+
+	symstrs = elf_getdata(sec, NULL);
+	if (symstrs == NULL)
+		goto out_elf_end;
+
+	nr_syms = shdr.sh_size / shdr.sh_entsize;
+
+	memset(&sym, 0, sizeof(sym));
+
+	elf_symtab__for_each_symbol(syms, nr_syms, index, sym) {
+		struct symbol *f;
+		__u64 obj_start;
+
+		if (!elf_sym__is_function(&sym))
+			continue;
+
+		sec = elf_getscn(elf, sym.st_shndx);
+		if (!sec)
+			goto out_elf_end;
+
+		gelf_getshdr(sec, &shdr);
+		obj_start = sym.st_value;
+
+		sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+
+		f = symbol__new(sym.st_value, sym.st_size,
+				elf_sym__name(&sym, symstrs),
+				self->sym_priv_size, obj_start, verbose);
+		if (!f)
+			goto out_elf_end;
+
+		if (filter && filter(self, f))
+			symbol__delete(f, self->sym_priv_size);
+		else {
+			dso__insert_symbol(self, f);
+			nr++;
+		}
+	}
+
+	err = nr;
+out_elf_end:
+	elf_end(elf);
+out_close:
+	return err;
+}
+
+int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
+{
+	int size = strlen(self->name) + sizeof("/usr/lib/debug%s.debug");
+	char *name = malloc(size);
+	int variant = 0;
+	int ret = -1;
+	int fd;
+
+	if (!name)
+		return -1;
+
+	if (strncmp(self->name, "/tmp/perf-", 10) == 0)
+		return dso__load_perf_map(self, filter, verbose);
+
+more:
+	do {
+		switch (variant) {
+		case 0: /* Fedora */
+			snprintf(name, size, "/usr/lib/debug%s.debug", self->name);
+			break;
+		case 1: /* Ubuntu */
+			snprintf(name, size, "/usr/lib/debug%s", self->name);
+			break;
+		case 2: /* Sane people */
+			snprintf(name, size, "%s", self->name);
+			break;
+
+		default:
+			goto out;
+		}
+		variant++;
+
+		fd = open(name, O_RDONLY);
+	} while (fd < 0);
+
+	ret = dso__load_sym(self, fd, name, filter, verbose);
+	close(fd);
+
+	/*
+	 * Some people seem to have debuginfo files _WITHOUT_ debug info!?!?
+	 */
+	if (!ret)
+		goto more;
+
+out:
+	free(name);
+	return ret;
+}
+
+static int dso__load_vmlinux(struct dso *self, const char *vmlinux,
+			     symbol_filter_t filter, int verbose)
+{
+	int err, fd = open(vmlinux, O_RDONLY);
+
+	if (fd < 0)
+		return -1;
+
+	err = dso__load_sym(self, fd, vmlinux, filter, verbose);
+	close(fd);
+
+	return err;
+}
+
+int dso__load_kernel(struct dso *self, const char *vmlinux,
+		     symbol_filter_t filter, int verbose)
+{
+	int err = -1;
+
+	if (vmlinux)
+		err = dso__load_vmlinux(self, vmlinux, filter, verbose);
+
+	if (err)
+		err = dso__load_kallsyms(self, filter, verbose);
+
+	return err;
+}
+
+void symbol__init(void)
+{
+	elf_version(EV_CURRENT);
+}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
new file mode 100644
index 0000000..0d1292b
--- /dev/null
+++ b/tools/perf/util/symbol.h
@@ -0,0 +1,47 @@
+#ifndef _PERF_SYMBOL_
+#define _PERF_SYMBOL_ 1
+
+#include <linux/types.h>
+#include "list.h"
+#include "rbtree.h"
+
+struct symbol {
+	struct rb_node	rb_node;
+	__u64		start;
+	__u64		end;
+	__u64		obj_start;
+	__u64		hist_sum;
+	__u64		*hist;
+	char		name[0];
+};
+
+struct dso {
+	struct list_head node;
+	struct rb_root	 syms;
+	unsigned int	 sym_priv_size;
+	struct symbol    *(*find_symbol)(struct dso *, __u64 ip);
+	char		 name[0];
+};
+
+const char *sym_hist_filter;
+
+typedef int (*symbol_filter_t)(struct dso *self, struct symbol *sym);
+
+struct dso *dso__new(const char *name, unsigned int sym_priv_size);
+void dso__delete(struct dso *self);
+
+static inline void *dso__sym_priv(struct dso *self, struct symbol *sym)
+{
+	return ((void *)sym) - self->sym_priv_size;
+}
+
+struct symbol *dso__find_symbol(struct dso *self, __u64 ip);
+
+int dso__load_kernel(struct dso *self, const char *vmlinux,
+		     symbol_filter_t filter, int verbose);
+int dso__load(struct dso *self, symbol_filter_t filter, int verbose);
+
+size_t dso__fprintf(struct dso *self, FILE *fp);
+
+void symbol__init(void);
+#endif /* _PERF_SYMBOL_ */
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
new file mode 100644
index 0000000..e16bf9a7
--- /dev/null
+++ b/tools/perf/util/usage.c
@@ -0,0 +1,80 @@
+/*
+ * GIT - The information manager from hell
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ */
+#include "util.h"
+
+static void report(const char *prefix, const char *err, va_list params)
+{
+	char msg[1024];
+	vsnprintf(msg, sizeof(msg), err, params);
+	fprintf(stderr, " %s%s\n", prefix, msg);
+}
+
+static NORETURN void usage_builtin(const char *err)
+{
+	fprintf(stderr, "\n Usage: %s\n", err);
+	exit(129);
+}
+
+static NORETURN void die_builtin(const char *err, va_list params)
+{
+	report(" Fatal: ", err, params);
+	exit(128);
+}
+
+static void error_builtin(const char *err, va_list params)
+{
+	report(" Error: ", err, params);
+}
+
+static void warn_builtin(const char *warn, va_list params)
+{
+	report(" Warning: ", warn, params);
+}
+
+/* If we are in a dlopen()ed .so write to a global variable would segfault
+ * (ugh), so keep things static. */
+static void (*usage_routine)(const char *err) NORETURN = usage_builtin;
+static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin;
+static void (*error_routine)(const char *err, va_list params) = error_builtin;
+static void (*warn_routine)(const char *err, va_list params) = warn_builtin;
+
+void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN)
+{
+	die_routine = routine;
+}
+
+void usage(const char *err)
+{
+	usage_routine(err);
+}
+
+void die(const char *err, ...)
+{
+	va_list params;
+
+	va_start(params, err);
+	die_routine(err, params);
+	va_end(params);
+}
+
+int error(const char *err, ...)
+{
+	va_list params;
+
+	va_start(params, err);
+	error_routine(err, params);
+	va_end(params);
+	return -1;
+}
+
+void warning(const char *warn, ...)
+{
+	va_list params;
+
+	va_start(params, warn);
+	warn_routine(warn, params);
+	va_end(params);
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
new file mode 100644
index 0000000..76590a1
--- /dev/null
+++ b/tools/perf/util/util.h
@@ -0,0 +1,410 @@
+#ifndef GIT_COMPAT_UTIL_H
+#define GIT_COMPAT_UTIL_H
+
+#define _FILE_OFFSET_BITS 64
+
+#ifndef FLEX_ARRAY
+/*
+ * See if our compiler is known to support flexible array members.
+ */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+# define FLEX_ARRAY /* empty */
+#elif defined(__GNUC__)
+# if (__GNUC__ >= 3)
+#  define FLEX_ARRAY /* empty */
+# else
+#  define FLEX_ARRAY 0 /* older GNU extension */
+# endif
+#endif
+
+/*
+ * Otherwise, default to safer but a bit wasteful traditional style
+ */
+#ifndef FLEX_ARRAY
+# define FLEX_ARRAY 1
+#endif
+#endif
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+
+#ifdef __GNUC__
+#define TYPEOF(x) (__typeof__(x))
+#else
+#define TYPEOF(x)
+#endif
+
+#define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (sizeof(x) * 8 - (bits))))
+#define HAS_MULTI_BITS(i)  ((i) & ((i) - 1))  /* checks if an integer has more than 1 bit set */
+
+/* Approximation of the length of the decimal representation of this type. */
+#define decimal_length(x)	((int)(sizeof(x) * 2.56 + 0.5) + 1)
+
+#if !defined(__APPLE__) && !defined(__FreeBSD__)  && !defined(__USLC__) && !defined(_M_UNIX)
+#define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */
+#define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */
+#endif
+#define _ALL_SOURCE 1
+#define _GNU_SOURCE 1
+#define _BSD_SOURCE 1
+
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <sys/time.h>
+#include <time.h>
+#include <signal.h>
+#include <fnmatch.h>
+#include <assert.h>
+#include <regex.h>
+#include <utime.h>
+#ifndef __MINGW32__
+#include <sys/wait.h>
+#include <sys/poll.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#ifndef NO_SYS_SELECT_H
+#include <sys/select.h>
+#endif
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <pwd.h>
+#include <inttypes.h>
+#if defined(__CYGWIN__)
+#undef _XOPEN_SOURCE
+#include <grp.h>
+#define _XOPEN_SOURCE 600
+#include "compat/cygwin.h"
+#else
+#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */
+#include <grp.h>
+#define _ALL_SOURCE 1
+#endif
+#else 	/* __MINGW32__ */
+/* pull in Windows compatibility stuff */
+#include "compat/mingw.h"
+#endif	/* __MINGW32__ */
+
+#ifndef NO_ICONV
+#include <iconv.h>
+#endif
+
+#ifndef NO_OPENSSL
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+#endif
+
+/* On most systems <limits.h> would have given us this, but
+ * not on some systems (e.g. GNU/Hurd).
+ */
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+#ifndef PRIuMAX
+#define PRIuMAX "llu"
+#endif
+
+#ifndef PRIu32
+#define PRIu32 "u"
+#endif
+
+#ifndef PRIx32
+#define PRIx32 "x"
+#endif
+
+#ifndef PATH_SEP
+#define PATH_SEP ':'
+#endif
+
+#ifndef STRIP_EXTENSION
+#define STRIP_EXTENSION ""
+#endif
+
+#ifndef has_dos_drive_prefix
+#define has_dos_drive_prefix(path) 0
+#endif
+
+#ifndef is_dir_sep
+#define is_dir_sep(c) ((c) == '/')
+#endif
+
+#ifdef __GNUC__
+#define NORETURN __attribute__((__noreturn__))
+#else
+#define NORETURN
+#ifndef __attribute__
+#define __attribute__(x)
+#endif
+#endif
+
+/* General helper functions */
+extern void usage(const char *err) NORETURN;
+extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
+extern int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
+extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
+
+extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
+
+extern int prefixcmp(const char *str, const char *prefix);
+extern time_t tm_to_time_t(const struct tm *tm);
+
+static inline const char *skip_prefix(const char *str, const char *prefix)
+{
+	size_t len = strlen(prefix);
+	return strncmp(str, prefix, len) ? NULL : str + len;
+}
+
+#if defined(NO_MMAP) || defined(USE_WIN32_MMAP)
+
+#ifndef PROT_READ
+#define PROT_READ 1
+#define PROT_WRITE 2
+#define MAP_PRIVATE 1
+#define MAP_FAILED ((void*)-1)
+#endif
+
+#define mmap git_mmap
+#define munmap git_munmap
+extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
+extern int git_munmap(void *start, size_t length);
+
+#else /* NO_MMAP || USE_WIN32_MMAP */
+
+#include <sys/mman.h>
+
+#endif /* NO_MMAP || USE_WIN32_MMAP */
+
+#ifdef NO_MMAP
+
+/* This value must be multiple of (pagesize * 2) */
+#define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024)
+
+#else /* NO_MMAP */
+
+/* This value must be multiple of (pagesize * 2) */
+#define DEFAULT_PACKED_GIT_WINDOW_SIZE \
+	(sizeof(void*) >= 8 \
+		?  1 * 1024 * 1024 * 1024 \
+		: 32 * 1024 * 1024)
+
+#endif /* NO_MMAP */
+
+#ifdef NO_ST_BLOCKS_IN_STRUCT_STAT
+#define on_disk_bytes(st) ((st).st_size)
+#else
+#define on_disk_bytes(st) ((st).st_blocks * 512)
+#endif
+
+#define DEFAULT_PACKED_GIT_LIMIT \
+	((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256))
+
+#ifdef NO_PREAD
+#define pread git_pread
+extern ssize_t git_pread(int fd, void *buf, size_t count, off_t offset);
+#endif
+/*
+ * Forward decl that will remind us if its twin in cache.h changes.
+ * This function is used in compat/pread.c.  But we can't include
+ * cache.h there.
+ */
+extern ssize_t read_in_full(int fd, void *buf, size_t count);
+
+#ifdef NO_SETENV
+#define setenv gitsetenv
+extern int gitsetenv(const char *, const char *, int);
+#endif
+
+#ifdef NO_MKDTEMP
+#define mkdtemp gitmkdtemp
+extern char *gitmkdtemp(char *);
+#endif
+
+#ifdef NO_UNSETENV
+#define unsetenv gitunsetenv
+extern void gitunsetenv(const char *);
+#endif
+
+#ifdef NO_STRCASESTR
+#define strcasestr gitstrcasestr
+extern char *gitstrcasestr(const char *haystack, const char *needle);
+#endif
+
+#ifdef NO_STRLCPY
+#define strlcpy gitstrlcpy
+extern size_t gitstrlcpy(char *, const char *, size_t);
+#endif
+
+#ifdef NO_STRTOUMAX
+#define strtoumax gitstrtoumax
+extern uintmax_t gitstrtoumax(const char *, char **, int);
+#endif
+
+#ifdef NO_HSTRERROR
+#define hstrerror githstrerror
+extern const char *githstrerror(int herror);
+#endif
+
+#ifdef NO_MEMMEM
+#define memmem gitmemmem
+void *gitmemmem(const void *haystack, size_t haystacklen,
+                const void *needle, size_t needlelen);
+#endif
+
+#ifdef FREAD_READS_DIRECTORIES
+#ifdef fopen
+#undef fopen
+#endif
+#define fopen(a,b) git_fopen(a,b)
+extern FILE *git_fopen(const char*, const char*);
+#endif
+
+#ifdef SNPRINTF_RETURNS_BOGUS
+#define snprintf git_snprintf
+extern int git_snprintf(char *str, size_t maxsize,
+			const char *format, ...);
+#define vsnprintf git_vsnprintf
+extern int git_vsnprintf(char *str, size_t maxsize,
+			 const char *format, va_list ap);
+#endif
+
+#ifdef __GLIBC_PREREQ
+#if __GLIBC_PREREQ(2, 1)
+#define HAVE_STRCHRNUL
+#endif
+#endif
+
+#ifndef HAVE_STRCHRNUL
+#define strchrnul gitstrchrnul
+static inline char *gitstrchrnul(const char *s, int c)
+{
+	while (*s && *s != c)
+		s++;
+	return (char *)s;
+}
+#endif
+
+/*
+ * Wrappers:
+ */
+extern char *xstrdup(const char *str);
+extern void *xmalloc(size_t size);
+extern void *xmemdupz(const void *data, size_t len);
+extern char *xstrndup(const char *str, size_t len);
+extern void *xrealloc(void *ptr, size_t size);
+extern void *xcalloc(size_t nmemb, size_t size);
+extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
+extern ssize_t xread(int fd, void *buf, size_t len);
+extern ssize_t xwrite(int fd, const void *buf, size_t len);
+extern int xdup(int fd);
+extern FILE *xfdopen(int fd, const char *mode);
+extern int xmkstemp(char *template);
+
+static inline size_t xsize_t(off_t len)
+{
+	return (size_t)len;
+}
+
+static inline int has_extension(const char *filename, const char *ext)
+{
+	size_t len = strlen(filename);
+	size_t extlen = strlen(ext);
+	return len > extlen && !memcmp(filename + len - extlen, ext, extlen);
+}
+
+/* Sane ctype - no locale, and works with signed chars */
+#undef isascii
+#undef isspace
+#undef isdigit
+#undef isalpha
+#undef isalnum
+#undef tolower
+#undef toupper
+extern unsigned char sane_ctype[256];
+#define GIT_SPACE 0x01
+#define GIT_DIGIT 0x02
+#define GIT_ALPHA 0x04
+#define GIT_GLOB_SPECIAL 0x08
+#define GIT_REGEX_SPECIAL 0x10
+#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
+#define isascii(x) (((x) & ~0x7f) == 0)
+#define isspace(x) sane_istest(x,GIT_SPACE)
+#define isdigit(x) sane_istest(x,GIT_DIGIT)
+#define isalpha(x) sane_istest(x,GIT_ALPHA)
+#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
+#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
+#define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
+#define tolower(x) sane_case((unsigned char)(x), 0x20)
+#define toupper(x) sane_case((unsigned char)(x), 0)
+
+static inline int sane_case(int x, int high)
+{
+	if (sane_istest(x, GIT_ALPHA))
+		x = (x & ~0x20) | high;
+	return x;
+}
+
+static inline int strtoul_ui(char const *s, int base, unsigned int *result)
+{
+	unsigned long ul;
+	char *p;
+
+	errno = 0;
+	ul = strtoul(s, &p, base);
+	if (errno || *p || p == s || (unsigned int) ul != ul)
+		return -1;
+	*result = ul;
+	return 0;
+}
+
+static inline int strtol_i(char const *s, int base, int *result)
+{
+	long ul;
+	char *p;
+
+	errno = 0;
+	ul = strtol(s, &p, base);
+	if (errno || *p || p == s || (int) ul != ul)
+		return -1;
+	*result = ul;
+	return 0;
+}
+
+#ifdef INTERNAL_QSORT
+void git_qsort(void *base, size_t nmemb, size_t size,
+	       int(*compar)(const void *, const void *));
+#define qsort git_qsort
+#endif
+
+#ifndef DIR_HAS_BSD_GROUP_SEMANTICS
+# define FORCE_DIR_SET_GID S_ISGID
+#else
+# define FORCE_DIR_SET_GID 0
+#endif
+
+#ifdef NO_NSEC
+#undef USE_NSEC
+#define ST_CTIME_NSEC(st) 0
+#define ST_MTIME_NSEC(st) 0
+#else
+#ifdef USE_ST_TIMESPEC
+#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec))
+#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec))
+#else
+#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec))
+#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec))
+#endif
+#endif
+
+#endif
diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c
new file mode 100644
index 0000000..6350d65
--- /dev/null
+++ b/tools/perf/util/wrapper.c
@@ -0,0 +1,206 @@
+/*
+ * Various trivial helper wrappers around standard functions
+ */
+#include "cache.h"
+
+/*
+ * There's no pack memory to release - but stay close to the Git
+ * version so wrap this away:
+ */
+static inline void release_pack_memory(size_t size, int flag)
+{
+}
+
+char *xstrdup(const char *str)
+{
+	char *ret = strdup(str);
+	if (!ret) {
+		release_pack_memory(strlen(str) + 1, -1);
+		ret = strdup(str);
+		if (!ret)
+			die("Out of memory, strdup failed");
+	}
+	return ret;
+}
+
+void *xmalloc(size_t size)
+{
+	void *ret = malloc(size);
+	if (!ret && !size)
+		ret = malloc(1);
+	if (!ret) {
+		release_pack_memory(size, -1);
+		ret = malloc(size);
+		if (!ret && !size)
+			ret = malloc(1);
+		if (!ret)
+			die("Out of memory, malloc failed");
+	}
+#ifdef XMALLOC_POISON
+	memset(ret, 0xA5, size);
+#endif
+	return ret;
+}
+
+/*
+ * xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of
+ * "data" to the allocated memory, zero terminates the allocated memory,
+ * and returns a pointer to the allocated memory. If the allocation fails,
+ * the program dies.
+ */
+void *xmemdupz(const void *data, size_t len)
+{
+	char *p = xmalloc(len + 1);
+	memcpy(p, data, len);
+	p[len] = '\0';
+	return p;
+}
+
+char *xstrndup(const char *str, size_t len)
+{
+	char *p = memchr(str, '\0', len);
+	return xmemdupz(str, p ? p - str : len);
+}
+
+void *xrealloc(void *ptr, size_t size)
+{
+	void *ret = realloc(ptr, size);
+	if (!ret && !size)
+		ret = realloc(ptr, 1);
+	if (!ret) {
+		release_pack_memory(size, -1);
+		ret = realloc(ptr, size);
+		if (!ret && !size)
+			ret = realloc(ptr, 1);
+		if (!ret)
+			die("Out of memory, realloc failed");
+	}
+	return ret;
+}
+
+void *xcalloc(size_t nmemb, size_t size)
+{
+	void *ret = calloc(nmemb, size);
+	if (!ret && (!nmemb || !size))
+		ret = calloc(1, 1);
+	if (!ret) {
+		release_pack_memory(nmemb * size, -1);
+		ret = calloc(nmemb, size);
+		if (!ret && (!nmemb || !size))
+			ret = calloc(1, 1);
+		if (!ret)
+			die("Out of memory, calloc failed");
+	}
+	return ret;
+}
+
+void *xmmap(void *start, size_t length,
+	int prot, int flags, int fd, off_t offset)
+{
+	void *ret = mmap(start, length, prot, flags, fd, offset);
+	if (ret == MAP_FAILED) {
+		if (!length)
+			return NULL;
+		release_pack_memory(length, fd);
+		ret = mmap(start, length, prot, flags, fd, offset);
+		if (ret == MAP_FAILED)
+			die("Out of memory? mmap failed: %s", strerror(errno));
+	}
+	return ret;
+}
+
+/*
+ * xread() is the same a read(), but it automatically restarts read()
+ * operations with a recoverable error (EAGAIN and EINTR). xread()
+ * DOES NOT GUARANTEE that "len" bytes is read even if the data is available.
+ */
+ssize_t xread(int fd, void *buf, size_t len)
+{
+	ssize_t nr;
+	while (1) {
+		nr = read(fd, buf, len);
+		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
+			continue;
+		return nr;
+	}
+}
+
+/*
+ * xwrite() is the same a write(), but it automatically restarts write()
+ * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT
+ * GUARANTEE that "len" bytes is written even if the operation is successful.
+ */
+ssize_t xwrite(int fd, const void *buf, size_t len)
+{
+	ssize_t nr;
+	while (1) {
+		nr = write(fd, buf, len);
+		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
+			continue;
+		return nr;
+	}
+}
+
+ssize_t read_in_full(int fd, void *buf, size_t count)
+{
+	char *p = buf;
+	ssize_t total = 0;
+
+	while (count > 0) {
+		ssize_t loaded = xread(fd, p, count);
+		if (loaded <= 0)
+			return total ? total : loaded;
+		count -= loaded;
+		p += loaded;
+		total += loaded;
+	}
+
+	return total;
+}
+
+ssize_t write_in_full(int fd, const void *buf, size_t count)
+{
+	const char *p = buf;
+	ssize_t total = 0;
+
+	while (count > 0) {
+		ssize_t written = xwrite(fd, p, count);
+		if (written < 0)
+			return -1;
+		if (!written) {
+			errno = ENOSPC;
+			return -1;
+		}
+		count -= written;
+		p += written;
+		total += written;
+	}
+
+	return total;
+}
+
+int xdup(int fd)
+{
+	int ret = dup(fd);
+	if (ret < 0)
+		die("dup failed: %s", strerror(errno));
+	return ret;
+}
+
+FILE *xfdopen(int fd, const char *mode)
+{
+	FILE *stream = fdopen(fd, mode);
+	if (stream == NULL)
+		die("Out of memory? fdopen failed: %s", strerror(errno));
+	return stream;
+}
+
+int xmkstemp(char *template)
+{
+	int fd;
+
+	fd = mkstemp(template);
+	if (fd < 0)
+		die("Unable to create temporary file: %s", strerror(errno));
+	return fd;
+}
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index c3b99de..1eddae9 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -85,7 +85,7 @@
 
 static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
 {
-	union ioapic_redir_entry *pent;
+	union kvm_ioapic_redirect_entry *pent;
 	int injected = -1;
 
 	pent = &ioapic->redirtbl[idx];
@@ -142,149 +142,40 @@
 	}
 }
 
-static int ioapic_inj_irq(struct kvm_ioapic *ioapic,
-			   struct kvm_vcpu *vcpu,
-			   u8 vector, u8 trig_mode, u8 delivery_mode)
-{
-	ioapic_debug("irq %d trig %d deliv %d\n", vector, trig_mode,
-		     delivery_mode);
-
-	ASSERT((delivery_mode == IOAPIC_FIXED) ||
-	       (delivery_mode == IOAPIC_LOWEST_PRIORITY));
-
-	return kvm_apic_set_irq(vcpu, vector, trig_mode);
-}
-
-static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
-{
-	kvm_inject_nmi(vcpu);
-	kvm_vcpu_kick(vcpu);
-}
-
-u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
-				    u8 dest_mode)
-{
-	u32 mask = 0;
-	int i;
-	struct kvm *kvm = ioapic->kvm;
-	struct kvm_vcpu *vcpu;
-
-	ioapic_debug("dest %d dest_mode %d\n", dest, dest_mode);
-
-	if (dest_mode == 0) {	/* Physical mode. */
-		if (dest == 0xFF) {	/* Broadcast. */
-			for (i = 0; i < KVM_MAX_VCPUS; ++i)
-				if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic)
-					mask |= 1 << i;
-			return mask;
-		}
-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = kvm->vcpus[i];
-			if (!vcpu)
-				continue;
-			if (kvm_apic_match_physical_addr(vcpu->arch.apic, dest)) {
-				if (vcpu->arch.apic)
-					mask = 1 << i;
-				break;
-			}
-		}
-	} else if (dest != 0)	/* Logical mode, MDA non-zero. */
-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = kvm->vcpus[i];
-			if (!vcpu)
-				continue;
-			if (vcpu->arch.apic &&
-			    kvm_apic_match_logical_addr(vcpu->arch.apic, dest))
-				mask |= 1 << vcpu->vcpu_id;
-		}
-	ioapic_debug("mask %x\n", mask);
-	return mask;
-}
-
 static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 {
-	u8 dest = ioapic->redirtbl[irq].fields.dest_id;
-	u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode;
-	u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode;
-	u8 vector = ioapic->redirtbl[irq].fields.vector;
-	u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
-	u32 deliver_bitmask;
-	struct kvm_vcpu *vcpu;
-	int vcpu_id, r = -1;
+	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
+	struct kvm_lapic_irq irqe;
 
 	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
 		     "vector=%x trig_mode=%x\n",
-		     dest, dest_mode, delivery_mode, vector, trig_mode);
+		     entry->fields.dest, entry->fields.dest_mode,
+		     entry->fields.delivery_mode, entry->fields.vector,
+		     entry->fields.trig_mode);
 
-	deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest,
-							  dest_mode);
-	if (!deliver_bitmask) {
-		ioapic_debug("no target on destination\n");
-		return 0;
-	}
+	irqe.dest_id = entry->fields.dest_id;
+	irqe.vector = entry->fields.vector;
+	irqe.dest_mode = entry->fields.dest_mode;
+	irqe.trig_mode = entry->fields.trig_mode;
+	irqe.delivery_mode = entry->fields.delivery_mode << 8;
+	irqe.level = 1;
+	irqe.shorthand = 0;
 
-	switch (delivery_mode) {
-	case IOAPIC_LOWEST_PRIORITY:
-		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
-				deliver_bitmask);
 #ifdef CONFIG_X86
-		if (irq == 0)
-			vcpu = ioapic->kvm->vcpus[0];
-#endif
-		if (vcpu != NULL)
-			r = ioapic_inj_irq(ioapic, vcpu, vector,
-				       trig_mode, delivery_mode);
-		else
-			ioapic_debug("null lowest prio vcpu: "
-				     "mask=%x vector=%x delivery_mode=%x\n",
-				     deliver_bitmask, vector, IOAPIC_LOWEST_PRIORITY);
-		break;
-	case IOAPIC_FIXED:
-#ifdef CONFIG_X86
-		if (irq == 0)
-			deliver_bitmask = 1;
-#endif
-		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-			if (!(deliver_bitmask & (1 << vcpu_id)))
-				continue;
-			deliver_bitmask &= ~(1 << vcpu_id);
-			vcpu = ioapic->kvm->vcpus[vcpu_id];
-			if (vcpu) {
-				if (r < 0)
-					r = 0;
-				r += ioapic_inj_irq(ioapic, vcpu, vector,
-					       trig_mode, delivery_mode);
-			}
-		}
-		break;
-	case IOAPIC_NMI:
-		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-			if (!(deliver_bitmask & (1 << vcpu_id)))
-				continue;
-			deliver_bitmask &= ~(1 << vcpu_id);
-			vcpu = ioapic->kvm->vcpus[vcpu_id];
-			if (vcpu) {
-				ioapic_inj_nmi(vcpu);
-				r = 1;
-			}
-			else
-				ioapic_debug("NMI to vcpu %d failed\n",
-						vcpu->vcpu_id);
-		}
-		break;
-	default:
-		printk(KERN_WARNING "Unsupported delivery mode %d\n",
-		       delivery_mode);
-		break;
+	/* Always delivery PIT interrupt to vcpu 0 */
+	if (irq == 0) {
+		irqe.dest_mode = 0; /* Physical mode. */
+		irqe.dest_id = ioapic->kvm->vcpus[0]->vcpu_id;
 	}
-	return r;
+#endif
+	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
 }
 
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 {
 	u32 old_irr = ioapic->irr;
 	u32 mask = 1 << irq;
-	union ioapic_redir_entry entry;
+	union kvm_ioapic_redirect_entry entry;
 	int ret = 1;
 
 	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
@@ -305,7 +196,7 @@
 static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
 				    int trigger_mode)
 {
-	union ioapic_redir_entry *ent;
+	union kvm_ioapic_redirect_entry *ent;
 
 	ent = &ioapic->redirtbl[pin];
 
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index a34bd5e..7080b71 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -40,22 +40,7 @@
 	u32 id;
 	u32 irr;
 	u32 pad;
-	union ioapic_redir_entry {
-		u64 bits;
-		struct {
-			u8 vector;
-			u8 delivery_mode:3;
-			u8 dest_mode:1;
-			u8 delivery_status:1;
-			u8 polarity:1;
-			u8 remote_irr:1;
-			u8 trig_mode:1;
-			u8 mask:1;
-			u8 reserve:7;
-			u8 reserved[4];
-			u8 dest_id;
-		} fields;
-	} redirtbl[IOAPIC_NUM_PINS];
+	union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
 	struct kvm_io_device dev;
 	struct kvm *kvm;
 	void (*ack_notifier)(void *opaque, int irq);
@@ -79,13 +64,13 @@
 	return kvm->arch.vioapic;
 }
 
-struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
-				       unsigned long bitmap);
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+		int short_hand, int dest, int dest_mode);
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
-u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
-				u8 dest_mode);
-
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq);
 #endif
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 4c40375..1514758 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -39,11 +39,16 @@
 	pfn_t pfn;
 	int i, r = 0;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
+	int flags;
 
 	/* check if iommu exists and in use */
 	if (!domain)
 		return 0;
 
+	flags = IOMMU_READ | IOMMU_WRITE;
+	if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
+		flags |= IOMMU_CACHE;
+
 	for (i = 0; i < npages; i++) {
 		/* check if already mapped */
 		if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
@@ -53,8 +58,7 @@
 		r = iommu_map_range(domain,
 				    gfn_to_gpa(gfn),
 				    pfn_to_hpa(pfn),
-				    PAGE_SIZE,
-				    IOMMU_READ | IOMMU_WRITE);
+				    PAGE_SIZE, flags);
 		if (r) {
 			printk(KERN_ERR "kvm_iommu_map_address:"
 			       "iommu failed to map pfn=%lx\n", pfn);
@@ -88,7 +92,7 @@
 {
 	struct pci_dev *pdev = NULL;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
-	int r;
+	int r, last_flags;
 
 	/* check if iommu exists and in use */
 	if (!domain)
@@ -107,12 +111,29 @@
 		return r;
 	}
 
+	last_flags = kvm->arch.iommu_flags;
+	if (iommu_domain_has_cap(kvm->arch.iommu_domain,
+				 IOMMU_CAP_CACHE_COHERENCY))
+		kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY;
+
+	/* Check if need to update IOMMU page table for guest memory */
+	if ((last_flags ^ kvm->arch.iommu_flags) ==
+			KVM_IOMMU_CACHE_COHERENCY) {
+		kvm_iommu_unmap_memslots(kvm);
+		r = kvm_iommu_map_memslots(kvm);
+		if (r)
+			goto out_unmap;
+	}
+
 	printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
 		assigned_dev->host_busnr,
 		PCI_SLOT(assigned_dev->host_devfn),
 		PCI_FUNC(assigned_dev->host_devfn));
 
 	return 0;
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return r;
 }
 
 int kvm_deassign_device(struct kvm *kvm,
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 864ac54..a8bd466 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -22,6 +22,9 @@
 #include <linux/kvm_host.h>
 
 #include <asm/msidef.h>
+#ifdef CONFIG_IA64
+#include <asm/iosapic.h>
+#endif
 
 #include "irq.h"
 
@@ -43,55 +46,71 @@
 	return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
 }
 
+inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
+{
+#ifdef CONFIG_IA64
+	return irq->delivery_mode ==
+		(IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
+#else
+	return irq->delivery_mode == APIC_DM_LOWEST;
+#endif
+}
+
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq)
+{
+	int i, r = -1;
+	struct kvm_vcpu *vcpu, *lowest = NULL;
+
+	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
+			kvm_is_dm_lowest_prio(irq))
+		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
+
+	for (i = 0; i < KVM_MAX_VCPUS; i++) {
+		vcpu = kvm->vcpus[i];
+
+		if (!vcpu || !kvm_apic_present(vcpu))
+			continue;
+
+		if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
+					irq->dest_id, irq->dest_mode))
+			continue;
+
+		if (!kvm_is_dm_lowest_prio(irq)) {
+			if (r < 0)
+				r = 0;
+			r += kvm_apic_set_irq(vcpu, irq);
+		} else {
+			if (!lowest)
+				lowest = vcpu;
+			else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
+				lowest = vcpu;
+		}
+	}
+
+	if (lowest)
+		r = kvm_apic_set_irq(lowest, irq);
+
+	return r;
+}
+
 static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 		       struct kvm *kvm, int level)
 {
-	int vcpu_id, r = -1;
-	struct kvm_vcpu *vcpu;
-	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
-	int dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK)
-			>> MSI_ADDR_DEST_ID_SHIFT;
-	int vector = (e->msi.data & MSI_DATA_VECTOR_MASK)
-			>> MSI_DATA_VECTOR_SHIFT;
-	int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
-				(unsigned long *)&e->msi.address_lo);
-	int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
-				(unsigned long *)&e->msi.data);
-	int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
-				(unsigned long *)&e->msi.data);
-	u32 deliver_bitmask;
+	struct kvm_lapic_irq irq;
 
-	BUG_ON(!ioapic);
+	irq.dest_id = (e->msi.address_lo &
+			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+	irq.vector = (e->msi.data &
+			MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+	irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
+	irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
+	irq.delivery_mode = e->msi.data & 0x700;
+	irq.level = 1;
+	irq.shorthand = 0;
 
-	deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
-				dest_id, dest_mode);
-	/* IOAPIC delivery mode value is the same as MSI here */
-	switch (delivery_mode) {
-	case IOAPIC_LOWEST_PRIORITY:
-		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
-				deliver_bitmask);
-		if (vcpu != NULL)
-			r = kvm_apic_set_irq(vcpu, vector, trig_mode);
-		else
-			printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
-		break;
-	case IOAPIC_FIXED:
-		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-			if (!(deliver_bitmask & (1 << vcpu_id)))
-				continue;
-			deliver_bitmask &= ~(1 << vcpu_id);
-			vcpu = ioapic->kvm->vcpus[vcpu_id];
-			if (vcpu) {
-				if (r < 0)
-					r = 0;
-				r += kvm_apic_set_irq(vcpu, vector, trig_mode);
-			}
-		}
-		break;
-	default:
-		break;
-	}
-	return r;
+	/* TODO Deal with RH bit of MSI message address */
+	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
 }
 
 /* This should be called with the kvm->lock mutex held
@@ -252,7 +271,7 @@
 			delta = 8;
 			break;
 		case KVM_IRQCHIP_IOAPIC:
-				e->set = kvm_set_ioapic_irq;
+			e->set = kvm_set_ioapic_irq;
 			break;
 		default:
 			goto out;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1ecbe23..7645543 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,8 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/bitops.h>
+#include <linux/spinlock.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -60,9 +62,6 @@
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
-static int msi2intx = 1;
-module_param(msi2intx, bool, 0);
-
 DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
@@ -95,38 +94,96 @@
 	return NULL;
 }
 
+static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
+				    *assigned_dev, int irq)
+{
+	int i, index;
+	struct msix_entry *host_msix_entries;
+
+	host_msix_entries = assigned_dev->host_msix_entries;
+
+	index = -1;
+	for (i = 0; i < assigned_dev->entries_nr; i++)
+		if (irq == host_msix_entries[i].vector) {
+			index = i;
+			break;
+		}
+	if (index < 0) {
+		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
+		return 0;
+	}
+
+	return index;
+}
+
 static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 {
 	struct kvm_assigned_dev_kernel *assigned_dev;
+	struct kvm *kvm;
+	int irq, i;
 
 	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
 				    interrupt_work);
+	kvm = assigned_dev->kvm;
 
 	/* This is taken to safely inject irq inside the guest. When
 	 * the interrupt injection (or the ioapic code) uses a
 	 * finer-grained lock, update this
 	 */
-	mutex_lock(&assigned_dev->kvm->lock);
-	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
-		    assigned_dev->guest_irq, 1);
-
-	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) {
-		enable_irq(assigned_dev->host_irq);
-		assigned_dev->host_irq_disabled = false;
+	mutex_lock(&kvm->lock);
+	spin_lock_irq(&assigned_dev->assigned_dev_lock);
+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
+		struct kvm_guest_msix_entry *guest_entries =
+			assigned_dev->guest_msix_entries;
+		for (i = 0; i < assigned_dev->entries_nr; i++) {
+			if (!(guest_entries[i].flags &
+					KVM_ASSIGNED_MSIX_PENDING))
+				continue;
+			guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
+			kvm_set_irq(assigned_dev->kvm,
+				    assigned_dev->irq_source_id,
+				    guest_entries[i].vector, 1);
+			irq = assigned_dev->host_msix_entries[i].vector;
+			if (irq != 0)
+				enable_irq(irq);
+			assigned_dev->host_irq_disabled = false;
+		}
+	} else {
+		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+			    assigned_dev->guest_irq, 1);
+		if (assigned_dev->irq_requested_type &
+				KVM_DEV_IRQ_GUEST_MSI) {
+			enable_irq(assigned_dev->host_irq);
+			assigned_dev->host_irq_disabled = false;
+		}
 	}
+
+	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
 	mutex_unlock(&assigned_dev->kvm->lock);
 }
 
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 {
+	unsigned long flags;
 	struct kvm_assigned_dev_kernel *assigned_dev =
 		(struct kvm_assigned_dev_kernel *) dev_id;
 
+	spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
+		int index = find_index_from_host_irq(assigned_dev, irq);
+		if (index < 0)
+			goto out;
+		assigned_dev->guest_msix_entries[index].flags |=
+			KVM_ASSIGNED_MSIX_PENDING;
+	}
+
 	schedule_work(&assigned_dev->interrupt_work);
 
 	disable_irq_nosync(irq);
 	assigned_dev->host_irq_disabled = true;
 
+out:
+	spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
 	return IRQ_HANDLED;
 }
 
@@ -134,6 +191,7 @@
 static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 {
 	struct kvm_assigned_dev_kernel *dev;
+	unsigned long flags;
 
 	if (kian->gsi == -1)
 		return;
@@ -146,28 +204,30 @@
 	/* The guest irq may be shared so this ack may be
 	 * from another device.
 	 */
+	spin_lock_irqsave(&dev->assigned_dev_lock, flags);
 	if (dev->host_irq_disabled) {
 		enable_irq(dev->host_irq);
 		dev->host_irq_disabled = false;
 	}
+	spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
 }
 
-/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
-static void kvm_free_assigned_irq(struct kvm *kvm,
-				  struct kvm_assigned_dev_kernel *assigned_dev)
+static void deassign_guest_irq(struct kvm *kvm,
+			       struct kvm_assigned_dev_kernel *assigned_dev)
 {
-	if (!irqchip_in_kernel(kvm))
-		return;
-
 	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
+	assigned_dev->ack_notifier.gsi = -1;
 
 	if (assigned_dev->irq_source_id != -1)
 		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
 	assigned_dev->irq_source_id = -1;
+	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
+}
 
-	if (!assigned_dev->irq_requested_type)
-		return;
-
+/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
+static void deassign_host_irq(struct kvm *kvm,
+			      struct kvm_assigned_dev_kernel *assigned_dev)
+{
 	/*
 	 * In kvm_free_device_irq, cancel_work_sync return true if:
 	 * 1. work is scheduled, and then cancelled.
@@ -184,17 +244,64 @@
 	 * now, the kvm state is still legal for probably we also have to wait
 	 * interrupt_work done.
 	 */
-	disable_irq_nosync(assigned_dev->host_irq);
-	cancel_work_sync(&assigned_dev->interrupt_work);
+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
+		int i;
+		for (i = 0; i < assigned_dev->entries_nr; i++)
+			disable_irq_nosync(assigned_dev->
+					   host_msix_entries[i].vector);
 
-	free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+		cancel_work_sync(&assigned_dev->interrupt_work);
 
-	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
-		pci_disable_msi(assigned_dev->dev);
+		for (i = 0; i < assigned_dev->entries_nr; i++)
+			free_irq(assigned_dev->host_msix_entries[i].vector,
+				 (void *)assigned_dev);
 
-	assigned_dev->irq_requested_type = 0;
+		assigned_dev->entries_nr = 0;
+		kfree(assigned_dev->host_msix_entries);
+		kfree(assigned_dev->guest_msix_entries);
+		pci_disable_msix(assigned_dev->dev);
+	} else {
+		/* Deal with MSI and INTx */
+		disable_irq_nosync(assigned_dev->host_irq);
+		cancel_work_sync(&assigned_dev->interrupt_work);
+
+		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+
+		if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
+			pci_disable_msi(assigned_dev->dev);
+	}
+
+	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
 }
 
+static int kvm_deassign_irq(struct kvm *kvm,
+			    struct kvm_assigned_dev_kernel *assigned_dev,
+			    unsigned long irq_requested_type)
+{
+	unsigned long guest_irq_type, host_irq_type;
+
+	if (!irqchip_in_kernel(kvm))
+		return -EINVAL;
+	/* no irq assignment to deassign */
+	if (!assigned_dev->irq_requested_type)
+		return -ENXIO;
+
+	host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
+	guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
+
+	if (host_irq_type)
+		deassign_host_irq(kvm, assigned_dev);
+	if (guest_irq_type)
+		deassign_guest_irq(kvm, assigned_dev);
+
+	return 0;
+}
+
+static void kvm_free_assigned_irq(struct kvm *kvm,
+				  struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
+}
 
 static void kvm_free_assigned_device(struct kvm *kvm,
 				     struct kvm_assigned_dev_kernel
@@ -226,191 +333,245 @@
 	}
 }
 
-static int assigned_device_update_intx(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *adev,
-			struct kvm_assigned_irq *airq)
+static int assigned_device_enable_host_intx(struct kvm *kvm,
+					    struct kvm_assigned_dev_kernel *dev)
 {
-	adev->guest_irq = airq->guest_irq;
-	adev->ack_notifier.gsi = airq->guest_irq;
-
-	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX)
-		return 0;
-
-	if (irqchip_in_kernel(kvm)) {
-		if (!msi2intx &&
-		    (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) {
-			free_irq(adev->host_irq, (void *)adev);
-			pci_disable_msi(adev->dev);
-		}
-
-		if (!capable(CAP_SYS_RAWIO))
-			return -EPERM;
-
-		if (airq->host_irq)
-			adev->host_irq = airq->host_irq;
-		else
-			adev->host_irq = adev->dev->irq;
-
-		/* Even though this is PCI, we don't want to use shared
-		 * interrupts. Sharing host devices with guest-assigned devices
-		 * on the same interrupt line is not a happy situation: there
-		 * are going to be long delays in accepting, acking, etc.
-		 */
-		if (request_irq(adev->host_irq, kvm_assigned_dev_intr,
-				0, "kvm_assigned_intx_device", (void *)adev))
-			return -EIO;
-	}
-
-	adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX |
-				   KVM_ASSIGNED_DEV_HOST_INTX;
+	dev->host_irq = dev->dev->irq;
+	/* Even though this is PCI, we don't want to use shared
+	 * interrupts. Sharing host devices with guest-assigned devices
+	 * on the same interrupt line is not a happy situation: there
+	 * are going to be long delays in accepting, acking, etc.
+	 */
+	if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
+			0, "kvm_assigned_intx_device", (void *)dev))
+		return -EIO;
 	return 0;
 }
 
-#ifdef CONFIG_X86
-static int assigned_device_update_msi(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *adev,
-			struct kvm_assigned_irq *airq)
+#ifdef __KVM_HAVE_MSI
+static int assigned_device_enable_host_msi(struct kvm *kvm,
+					   struct kvm_assigned_dev_kernel *dev)
 {
 	int r;
 
-	adev->guest_irq = airq->guest_irq;
-	if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
-		/* x86 don't care upper address of guest msi message addr */
-		adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI;
-		adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX;
-		adev->ack_notifier.gsi = -1;
-	} else if (msi2intx) {
-		adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
-		adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
-		adev->ack_notifier.gsi = airq->guest_irq;
-	} else {
-		/*
-		 * Guest require to disable device MSI, we disable MSI and
-		 * re-enable INTx by default again. Notice it's only for
-		 * non-msi2intx.
-		 */
-		assigned_device_update_intx(kvm, adev, airq);
-		return 0;
+	if (!dev->dev->msi_enabled) {
+		r = pci_enable_msi(dev->dev);
+		if (r)
+			return r;
 	}
 
-	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
-		return 0;
-
-	if (irqchip_in_kernel(kvm)) {
-		if (!msi2intx) {
-			if (adev->irq_requested_type &
-					KVM_ASSIGNED_DEV_HOST_INTX)
-				free_irq(adev->host_irq, (void *)adev);
-
-			r = pci_enable_msi(adev->dev);
-			if (r)
-				return r;
-		}
-
-		adev->host_irq = adev->dev->irq;
-		if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0,
-				"kvm_assigned_msi_device", (void *)adev))
-			return -EIO;
+	dev->host_irq = dev->dev->irq;
+	if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
+			"kvm_assigned_msi_device", (void *)dev)) {
+		pci_disable_msi(dev->dev);
+		return -EIO;
 	}
 
-	if (!msi2intx)
-		adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI;
-
-	adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI;
 	return 0;
 }
 #endif
 
-static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
-				   struct kvm_assigned_irq
-				   *assigned_irq)
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_enable_host_msix(struct kvm *kvm,
+					    struct kvm_assigned_dev_kernel *dev)
 {
-	int r = 0;
+	int i, r = -EINVAL;
+
+	/* host_msix_entries and guest_msix_entries should have been
+	 * initialized */
+	if (dev->entries_nr == 0)
+		return r;
+
+	r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
+	if (r)
+		return r;
+
+	for (i = 0; i < dev->entries_nr; i++) {
+		r = request_irq(dev->host_msix_entries[i].vector,
+				kvm_assigned_dev_intr, 0,
+				"kvm_assigned_msix_device",
+				(void *)dev);
+		/* FIXME: free requested_irq's on failure */
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+#endif
+
+static int assigned_device_enable_guest_intx(struct kvm *kvm,
+				struct kvm_assigned_dev_kernel *dev,
+				struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = irq->guest_irq;
+	return 0;
+}
+
+#ifdef __KVM_HAVE_MSI
+static int assigned_device_enable_guest_msi(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *dev,
+			struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = -1;
+	return 0;
+}
+#endif
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_enable_guest_msix(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *dev,
+			struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = -1;
+	return 0;
+}
+#endif
+
+static int assign_host_irq(struct kvm *kvm,
+			   struct kvm_assigned_dev_kernel *dev,
+			   __u32 host_irq_type)
+{
+	int r = -EEXIST;
+
+	if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
+		return r;
+
+	switch (host_irq_type) {
+	case KVM_DEV_IRQ_HOST_INTX:
+		r = assigned_device_enable_host_intx(kvm, dev);
+		break;
+#ifdef __KVM_HAVE_MSI
+	case KVM_DEV_IRQ_HOST_MSI:
+		r = assigned_device_enable_host_msi(kvm, dev);
+		break;
+#endif
+#ifdef __KVM_HAVE_MSIX
+	case KVM_DEV_IRQ_HOST_MSIX:
+		r = assigned_device_enable_host_msix(kvm, dev);
+		break;
+#endif
+	default:
+		r = -EINVAL;
+	}
+
+	if (!r)
+		dev->irq_requested_type |= host_irq_type;
+
+	return r;
+}
+
+static int assign_guest_irq(struct kvm *kvm,
+			    struct kvm_assigned_dev_kernel *dev,
+			    struct kvm_assigned_irq *irq,
+			    unsigned long guest_irq_type)
+{
+	int id;
+	int r = -EEXIST;
+
+	if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
+		return r;
+
+	id = kvm_request_irq_source_id(kvm);
+	if (id < 0)
+		return id;
+
+	dev->irq_source_id = id;
+
+	switch (guest_irq_type) {
+	case KVM_DEV_IRQ_GUEST_INTX:
+		r = assigned_device_enable_guest_intx(kvm, dev, irq);
+		break;
+#ifdef __KVM_HAVE_MSI
+	case KVM_DEV_IRQ_GUEST_MSI:
+		r = assigned_device_enable_guest_msi(kvm, dev, irq);
+		break;
+#endif
+#ifdef __KVM_HAVE_MSIX
+	case KVM_DEV_IRQ_GUEST_MSIX:
+		r = assigned_device_enable_guest_msix(kvm, dev, irq);
+		break;
+#endif
+	default:
+		r = -EINVAL;
+	}
+
+	if (!r) {
+		dev->irq_requested_type |= guest_irq_type;
+		kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
+	} else
+		kvm_free_irq_source_id(kvm, dev->irq_source_id);
+
+	return r;
+}
+
+/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
+static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
+				   struct kvm_assigned_irq *assigned_irq)
+{
+	int r = -EINVAL;
 	struct kvm_assigned_dev_kernel *match;
-	u32 current_flags = 0, changed_flags;
+	unsigned long host_irq_type, guest_irq_type;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	if (!irqchip_in_kernel(kvm))
+		return r;
+
+	mutex_lock(&kvm->lock);
+	r = -ENODEV;
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_irq->assigned_dev_id);
+	if (!match)
+		goto out;
+
+	host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
+	guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
+
+	r = -EINVAL;
+	/* can only assign one type at a time */
+	if (hweight_long(host_irq_type) > 1)
+		goto out;
+	if (hweight_long(guest_irq_type) > 1)
+		goto out;
+	if (host_irq_type == 0 && guest_irq_type == 0)
+		goto out;
+
+	r = 0;
+	if (host_irq_type)
+		r = assign_host_irq(kvm, match, host_irq_type);
+	if (r)
+		goto out;
+
+	if (guest_irq_type)
+		r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
+					 struct kvm_assigned_irq
+					 *assigned_irq)
+{
+	int r = -ENODEV;
+	struct kvm_assigned_dev_kernel *match;
 
 	mutex_lock(&kvm->lock);
 
 	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 				      assigned_irq->assigned_dev_id);
-	if (!match) {
-		mutex_unlock(&kvm->lock);
-		return -EINVAL;
-	}
+	if (!match)
+		goto out;
 
-	if (!match->irq_requested_type) {
-		INIT_WORK(&match->interrupt_work,
-				kvm_assigned_dev_interrupt_work_handler);
-		if (irqchip_in_kernel(kvm)) {
-			/* Register ack nofitier */
-			match->ack_notifier.gsi = -1;
-			match->ack_notifier.irq_acked =
-					kvm_assigned_dev_ack_irq;
-			kvm_register_irq_ack_notifier(kvm,
-					&match->ack_notifier);
-
-			/* Request IRQ source ID */
-			r = kvm_request_irq_source_id(kvm);
-			if (r < 0)
-				goto out_release;
-			else
-				match->irq_source_id = r;
-
-#ifdef CONFIG_X86
-			/* Determine host device irq type, we can know the
-			 * result from dev->msi_enabled */
-			if (msi2intx)
-				pci_enable_msi(match->dev);
-#endif
-		}
-	}
-
-	if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
-		 (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI))
-		current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
-
-	changed_flags = assigned_irq->flags ^ current_flags;
-
-	if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) ||
-	    (msi2intx && match->dev->msi_enabled)) {
-#ifdef CONFIG_X86
-		r = assigned_device_update_msi(kvm, match, assigned_irq);
-		if (r) {
-			printk(KERN_WARNING "kvm: failed to enable "
-					"MSI device!\n");
-			goto out_release;
-		}
-#else
-		r = -ENOTTY;
-#endif
-	} else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
-		/* Host device IRQ 0 means don't support INTx */
-		if (!msi2intx) {
-			printk(KERN_WARNING
-			       "kvm: wait device to enable MSI!\n");
-			r = 0;
-		} else {
-			printk(KERN_WARNING
-			       "kvm: failed to enable MSI device!\n");
-			r = -ENOTTY;
-			goto out_release;
-		}
-	} else {
-		/* Non-sharing INTx mode */
-		r = assigned_device_update_intx(kvm, match, assigned_irq);
-		if (r) {
-			printk(KERN_WARNING "kvm: failed to enable "
-					"INTx device!\n");
-			goto out_release;
-		}
-	}
-
+	r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
+out:
 	mutex_unlock(&kvm->lock);
 	return r;
-out_release:
-	mutex_unlock(&kvm->lock);
-	kvm_free_assigned_device(kvm, match);
-	return r;
 }
 
 static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
@@ -427,7 +588,7 @@
 				      assigned_dev->assigned_dev_id);
 	if (match) {
 		/* device already assigned */
-		r = -EINVAL;
+		r = -EEXIST;
 		goto out;
 	}
 
@@ -464,8 +625,12 @@
 	match->host_devfn = assigned_dev->devfn;
 	match->flags = assigned_dev->flags;
 	match->dev = dev;
+	spin_lock_init(&match->assigned_dev_lock);
 	match->irq_source_id = -1;
 	match->kvm = kvm;
+	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
+	INIT_WORK(&match->interrupt_work,
+		  kvm_assigned_dev_interrupt_work_handler);
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
@@ -878,6 +1043,8 @@
 #endif
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
+#else
+	kvm_arch_flush_shadow(kvm);
 #endif
 	kvm_arch_destroy_vm(kvm);
 	mmdrop(mm);
@@ -919,9 +1086,8 @@
 {
 	int r;
 	gfn_t base_gfn;
-	unsigned long npages;
-	int largepages;
-	unsigned long i;
+	unsigned long npages, ugfn;
+	unsigned long largepages, i;
 	struct kvm_memory_slot *memslot;
 	struct kvm_memory_slot old, new;
 
@@ -1010,6 +1176,14 @@
 			new.lpage_info[0].write_count = 1;
 		if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE)
 			new.lpage_info[largepages-1].write_count = 1;
+		ugfn = new.userspace_addr >> PAGE_SHIFT;
+		/*
+		 * If the gfn and userspace address are not aligned wrt each
+		 * other, disable large page support for this slot
+		 */
+		if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1))
+			for (i = 0; i < largepages; ++i)
+				new.lpage_info[i].write_count = 1;
 	}
 
 	/* Allocate page dirty bitmap if needed */
@@ -1043,8 +1217,10 @@
 
 	kvm_free_physmem_slot(&old, npages ? &new : NULL);
 	/* Slot deletion case: we have to update the current slot */
+	spin_lock(&kvm->mmu_lock);
 	if (!npages)
 		*memslot = old;
+	spin_unlock(&kvm->mmu_lock);
 #ifdef CONFIG_DMAR
 	/* map the pages in iommu page table */
 	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
@@ -1454,12 +1630,14 @@
 	for (;;) {
 		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
-		if (kvm_cpu_has_interrupt(vcpu) ||
-		    kvm_cpu_has_pending_timer(vcpu) ||
-		    kvm_arch_vcpu_runnable(vcpu)) {
+		if ((kvm_arch_interrupt_allowed(vcpu) &&
+					kvm_cpu_has_interrupt(vcpu)) ||
+				kvm_arch_vcpu_runnable(vcpu)) {
 			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
 			break;
 		}
+		if (kvm_cpu_has_pending_timer(vcpu))
+			break;
 		if (signal_pending(current))
 			break;
 
@@ -1593,6 +1771,88 @@
 	return 0;
 }
 
+#ifdef __KVM_HAVE_MSIX
+static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
+				    struct kvm_assigned_msix_nr *entry_nr)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *adev;
+
+	mutex_lock(&kvm->lock);
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      entry_nr->assigned_dev_id);
+	if (!adev) {
+		r = -EINVAL;
+		goto msix_nr_out;
+	}
+
+	if (adev->entries_nr == 0) {
+		adev->entries_nr = entry_nr->entry_nr;
+		if (adev->entries_nr == 0 ||
+		    adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) {
+			r = -EINVAL;
+			goto msix_nr_out;
+		}
+
+		adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
+						entry_nr->entry_nr,
+						GFP_KERNEL);
+		if (!adev->host_msix_entries) {
+			r = -ENOMEM;
+			goto msix_nr_out;
+		}
+		adev->guest_msix_entries = kzalloc(
+				sizeof(struct kvm_guest_msix_entry) *
+				entry_nr->entry_nr, GFP_KERNEL);
+		if (!adev->guest_msix_entries) {
+			kfree(adev->host_msix_entries);
+			r = -ENOMEM;
+			goto msix_nr_out;
+		}
+	} else /* Not allowed set MSI-X number twice */
+		r = -EINVAL;
+msix_nr_out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
+				       struct kvm_assigned_msix_entry *entry)
+{
+	int r = 0, i;
+	struct kvm_assigned_dev_kernel *adev;
+
+	mutex_lock(&kvm->lock);
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      entry->assigned_dev_id);
+
+	if (!adev) {
+		r = -EINVAL;
+		goto msix_entry_out;
+	}
+
+	for (i = 0; i < adev->entries_nr; i++)
+		if (adev->guest_msix_entries[i].vector == 0 ||
+		    adev->guest_msix_entries[i].entry == entry->entry) {
+			adev->guest_msix_entries[i].entry = entry->entry;
+			adev->guest_msix_entries[i].vector = entry->gsi;
+			adev->host_msix_entries[i].entry = entry->entry;
+			break;
+		}
+	if (i == adev->entries_nr) {
+		r = -ENOSPC;
+		goto msix_entry_out;
+	}
+
+msix_entry_out:
+	mutex_unlock(&kvm->lock);
+
+	return r;
+}
+#endif
+
 static long kvm_vcpu_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -1864,6 +2124,11 @@
 		break;
 	}
 	case KVM_ASSIGN_IRQ: {
+		r = -EOPNOTSUPP;
+		break;
+	}
+#ifdef KVM_CAP_ASSIGN_DEV_IRQ
+	case KVM_ASSIGN_DEV_IRQ: {
 		struct kvm_assigned_irq assigned_irq;
 
 		r = -EFAULT;
@@ -1874,6 +2139,18 @@
 			goto out;
 		break;
 	}
+	case KVM_DEASSIGN_DEV_IRQ: {
+		struct kvm_assigned_irq assigned_irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
+			goto out;
+		r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
+		if (r)
+			goto out;
+		break;
+	}
+#endif
 #endif
 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
 	case KVM_DEASSIGN_PCI_DEVICE: {
@@ -1917,7 +2194,29 @@
 		vfree(entries);
 		break;
 	}
+#ifdef __KVM_HAVE_MSIX
+	case KVM_ASSIGN_SET_MSIX_NR: {
+		struct kvm_assigned_msix_nr entry_nr;
+		r = -EFAULT;
+		if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
+			goto out;
+		r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_ASSIGN_SET_MSIX_ENTRY: {
+		struct kvm_assigned_msix_entry entry;
+		r = -EFAULT;
+		if (copy_from_user(&entry, argp, sizeof entry))
+			goto out;
+		r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
+		if (r)
+			goto out;
+		break;
+	}
 #endif
+#endif /* KVM_CAP_IRQ_ROUTING */
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 	}
@@ -2112,15 +2411,15 @@
 static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
 		      void *v)
 {
-	if (val == SYS_RESTART) {
-		/*
-		 * Some (well, at least mine) BIOSes hang on reboot if
-		 * in vmx root mode.
-		 */
-		printk(KERN_INFO "kvm: exiting hardware virtualization\n");
-		kvm_rebooting = true;
-		on_each_cpu(hardware_disable, NULL, 1);
-	}
+	/*
+	 * Some (well, at least mine) BIOSes hang on reboot if
+	 * in vmx root mode.
+	 *
+	 * And Intel TXT required VMX off for all cpu when system shutdown.
+	 */
+	printk(KERN_INFO "kvm: exiting hardware virtualization\n");
+	kvm_rebooting = true;
+	on_each_cpu(hardware_disable, NULL, 1);
 	return NOTIFY_OK;
 }
 
@@ -2301,7 +2600,7 @@
 
 	bad_pfn = page_to_pfn(bad_page);
 
-	if (!alloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
+	if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
 		r = -ENOMEM;
 		goto out_free_0;
 	}
@@ -2353,9 +2652,6 @@
 
 	kvm_preempt_ops.sched_in = kvm_sched_in;
 	kvm_preempt_ops.sched_out = kvm_sched_out;
-#ifndef CONFIG_X86
-	msi2intx = 0;
-#endif
 
 	return 0;